Merge pull request #192 from vanstee/hcl2-with-interpolation
Upgrade to hcl2 to support variables and functionspull/270/head v0.4.0
commit
205165bec5
@ -0,0 +1,2 @@
|
||||
README.html
|
||||
coverage.out
|
@ -0,0 +1,70 @@
|
||||
language: go
|
||||
sudo: false
|
||||
go:
|
||||
- 1.8
|
||||
- 1.7.5
|
||||
- 1.7.4
|
||||
- 1.7.3
|
||||
- 1.7.2
|
||||
- 1.7.1
|
||||
- 1.7
|
||||
- tip
|
||||
- 1.6.4
|
||||
- 1.6.3
|
||||
- 1.6.2
|
||||
- 1.6.1
|
||||
- 1.6
|
||||
- 1.5.4
|
||||
- 1.5.3
|
||||
- 1.5.2
|
||||
- 1.5.1
|
||||
- 1.5
|
||||
- 1.4.3
|
||||
- 1.4.2
|
||||
- 1.4.1
|
||||
- 1.4
|
||||
- 1.3.3
|
||||
- 1.3.2
|
||||
- 1.3.1
|
||||
- 1.3
|
||||
- 1.2.2
|
||||
- 1.2.1
|
||||
- 1.2
|
||||
- 1.1.2
|
||||
- 1.1.1
|
||||
- 1.1
|
||||
before_install:
|
||||
- go get github.com/mattn/goveralls
|
||||
script:
|
||||
- $HOME/gopath/bin/goveralls -service=travis-ci
|
||||
notifications:
|
||||
email:
|
||||
on_success: never
|
||||
matrix:
|
||||
fast_finish: true
|
||||
allow_failures:
|
||||
- go: tip
|
||||
- go: 1.6.4
|
||||
- go: 1.6.3
|
||||
- go: 1.6.2
|
||||
- go: 1.6.1
|
||||
- go: 1.6
|
||||
- go: 1.5.4
|
||||
- go: 1.5.3
|
||||
- go: 1.5.2
|
||||
- go: 1.5.1
|
||||
- go: 1.5
|
||||
- go: 1.4.3
|
||||
- go: 1.4.2
|
||||
- go: 1.4.1
|
||||
- go: 1.4
|
||||
- go: 1.3.3
|
||||
- go: 1.3.2
|
||||
- go: 1.3.1
|
||||
- go: 1.3
|
||||
- go: 1.2.2
|
||||
- go: 1.2.1
|
||||
- go: 1.2
|
||||
- go: 1.1.2
|
||||
- go: 1.1.1
|
||||
- go: 1.1
|
@ -0,0 +1,36 @@
|
||||
Developer Certificate of Origin
|
||||
Version 1.1
|
||||
|
||||
Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
|
||||
660 York Street, Suite 102,
|
||||
San Francisco, CA 94110 USA
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim copies of this
|
||||
license document, but changing it is not allowed.
|
||||
|
||||
|
||||
Developer's Certificate of Origin 1.1
|
||||
|
||||
By making a contribution to this project, I certify that:
|
||||
|
||||
(a) The contribution was created in whole or in part by me and I
|
||||
have the right to submit it under the open source license
|
||||
indicated in the file; or
|
||||
|
||||
(b) The contribution is based upon previous work that, to the best
|
||||
of my knowledge, is covered under an appropriate open source
|
||||
license and I have the right under that license to submit that
|
||||
work with modifications, whether created in whole or in part
|
||||
by me, under the same open source license (unless I am
|
||||
permitted to submit under a different license), as indicated
|
||||
in the file; or
|
||||
|
||||
(c) The contribution was provided directly to me by some other
|
||||
person who certified (a), (b) or (c) and I have not modified
|
||||
it.
|
||||
|
||||
(d) I understand and agree that this project and the contribution
|
||||
are public and that a record of the contribution (including all
|
||||
personal information I submit with it, including my sign-off) is
|
||||
maintained indefinitely and may be redistributed consistent with
|
||||
this project or the open source license(s) involved.
|
@ -0,0 +1,201 @@
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
@ -0,0 +1 @@
|
||||
Alex Bucataru <alex@alrux.com> (@AlexBucataru)
|
@ -0,0 +1,5 @@
|
||||
Alrux Go EXTensions (AGExt) - package levenshtein
|
||||
Copyright 2016 ALRUX Inc.
|
||||
|
||||
This product includes software developed at ALRUX Inc.
|
||||
(http://www.alrux.com/).
|
@ -0,0 +1,290 @@
|
||||
// Copyright 2016 ALRUX Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
/*
|
||||
Package levenshtein implements distance and similarity metrics for strings, based on the Levenshtein measure.
|
||||
|
||||
The Levenshtein `Distance` between two strings is the minimum total cost of edits that would convert the first string into the second. The allowed edit operations are insertions, deletions, and substitutions, all at character (one UTF-8 code point) level. Each operation has a default cost of 1, but each can be assigned its own cost equal to or greater than 0.
|
||||
|
||||
A `Distance` of 0 means the two strings are identical, and the higher the value the more different the strings. Since in practice we are interested in finding if the two strings are "close enough", it often does not make sense to continue the calculation once the result is mathematically guaranteed to exceed a desired threshold. Providing this value to the `Distance` function allows it to take a shortcut and return a lower bound instead of an exact cost when the threshold is exceeded.
|
||||
|
||||
The `Similarity` function calculates the distance, then converts it into a normalized metric within the range 0..1, with 1 meaning the strings are identical, and 0 that they have nothing in common. A minimum similarity threshold can be provided to speed up the calculation of the metric for strings that are far too dissimilar for the purpose at hand. All values under this threshold are rounded down to 0.
|
||||
|
||||
The `Match` function provides a similarity metric, with the same range and meaning as `Similarity`, but with a bonus for string pairs that share a common prefix and have a similarity above a "bonus threshold". It uses the same method as proposed by Winkler for the Jaro distance, and the reasoning behind it is that these string pairs are very likely spelling variations or errors, and they are more closely linked than the edit distance alone would suggest.
|
||||
|
||||
The underlying `Calculate` function is also exported, to allow the building of other derivative metrics, if needed.
|
||||
*/
|
||||
package levenshtein
|
||||
|
||||
// Calculate determines the Levenshtein distance between two strings, using
|
||||
// the given costs for each edit operation. It returns the distance along with
|
||||
// the lengths of the longest common prefix and suffix.
|
||||
//
|
||||
// If maxCost is non-zero, the calculation stops as soon as the distance is determined
|
||||
// to be greater than maxCost. Therefore, any return value higher than maxCost is a
|
||||
// lower bound for the actual distance.
|
||||
func Calculate(str1, str2 []rune, maxCost, insCost, subCost, delCost int) (dist, prefixLen, suffixLen int) {
|
||||
l1, l2 := len(str1), len(str2)
|
||||
// trim common prefix, if any, as it doesn't affect the distance
|
||||
for ; prefixLen < l1 && prefixLen < l2; prefixLen++ {
|
||||
if str1[prefixLen] != str2[prefixLen] {
|
||||
break
|
||||
}
|
||||
}
|
||||
str1, str2 = str1[prefixLen:], str2[prefixLen:]
|
||||
l1 -= prefixLen
|
||||
l2 -= prefixLen
|
||||
// trim common suffix, if any, as it doesn't affect the distance
|
||||
for 0 < l1 && 0 < l2 {
|
||||
if str1[l1-1] != str2[l2-1] {
|
||||
str1, str2 = str1[:l1], str2[:l2]
|
||||
break
|
||||
}
|
||||
l1--
|
||||
l2--
|
||||
suffixLen++
|
||||
}
|
||||
// if the first string is empty, the distance is the length of the second string times the cost of insertion
|
||||
if l1 == 0 {
|
||||
dist = l2 * insCost
|
||||
return
|
||||
}
|
||||
// if the second string is empty, the distance is the length of the first string times the cost of deletion
|
||||
if l2 == 0 {
|
||||
dist = l1 * delCost
|
||||
return
|
||||
}
|
||||
|
||||
// variables used in inner "for" loops
|
||||
var y, dy, c, l int
|
||||
|
||||
// if maxCost is greater than or equal to the maximum possible distance, it's equivalent to 'unlimited'
|
||||
if maxCost > 0 {
|
||||
if subCost < delCost+insCost {
|
||||
if maxCost >= l1*subCost+(l2-l1)*insCost {
|
||||
maxCost = 0
|
||||
}
|
||||
} else {
|
||||
if maxCost >= l1*delCost+l2*insCost {
|
||||
maxCost = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if maxCost > 0 {
|
||||
// prefer the longer string first, to minimize time;
|
||||
// a swap also transposes the meanings of insertion and deletion.
|
||||
if l1 < l2 {
|
||||
str1, str2, l1, l2, insCost, delCost = str2, str1, l2, l1, delCost, insCost
|
||||
}
|
||||
|
||||
// the length differential times cost of deletion is a lower bound for the cost;
|
||||
// if it is higher than the maxCost, there is no point going into the main calculation.
|
||||
if dist = (l1 - l2) * delCost; dist > maxCost {
|
||||
return
|
||||
}
|
||||
|
||||
d := make([]int, l1+1)
|
||||
|
||||
// offset and length of d in the current row
|
||||
doff, dlen := 0, 1
|
||||
for y, dy = 1, delCost; y <= l1 && dy <= maxCost; dlen++ {
|
||||
d[y] = dy
|
||||
y++
|
||||
dy = y * delCost
|
||||
}
|
||||
// fmt.Printf("%q -> %q: init doff=%d dlen=%d d[%d:%d]=%v\n", str1, str2, doff, dlen, doff, doff+dlen, d[doff:doff+dlen])
|
||||
|
||||
for x := 0; x < l2; x++ {
|
||||
dy, d[doff] = d[doff], d[doff]+insCost
|
||||
for d[doff] > maxCost && dlen > 0 {
|
||||
if str1[doff] != str2[x] {
|
||||
dy += subCost
|
||||
}
|
||||
doff++
|
||||
dlen--
|
||||
if c = d[doff] + insCost; c < dy {
|
||||
dy = c
|
||||
}
|
||||
dy, d[doff] = d[doff], dy
|
||||
}
|
||||
for y, l = doff, doff+dlen-1; y < l; dy, d[y] = d[y], dy {
|
||||
if str1[y] != str2[x] {
|
||||
dy += subCost
|
||||
}
|
||||
if c = d[y] + delCost; c < dy {
|
||||
dy = c
|
||||
}
|
||||
y++
|
||||
if c = d[y] + insCost; c < dy {
|
||||
dy = c
|
||||
}
|
||||
}
|
||||
if y < l1 {
|
||||
if str1[y] != str2[x] {
|
||||
dy += subCost
|
||||
}
|
||||
if c = d[y] + delCost; c < dy {
|
||||
dy = c
|
||||
}
|
||||
for ; dy <= maxCost && y < l1; dy, d[y] = dy+delCost, dy {
|
||||
y++
|
||||
dlen++
|
||||
}
|
||||
}
|
||||
// fmt.Printf("%q -> %q: x=%d doff=%d dlen=%d d[%d:%d]=%v\n", str1, str2, x, doff, dlen, doff, doff+dlen, d[doff:doff+dlen])
|
||||
if dlen == 0 {
|
||||
dist = maxCost + 1
|
||||
return
|
||||
}
|
||||
}
|
||||
if doff+dlen-1 < l1 {
|
||||
dist = maxCost + 1
|
||||
return
|
||||
}
|
||||
dist = d[l1]
|
||||
} else {
|
||||
// ToDo: This is O(l1*l2) time and O(min(l1,l2)) space; investigate if it is
|
||||
// worth to implement diagonal approach - O(l1*(1+dist)) time, up to O(l1*l2) space
|
||||
// http://www.csse.monash.edu.au/~lloyd/tildeStrings/Alignment/92.IPL.html
|
||||
|
||||
// prefer the shorter string first, to minimize space; time is O(l1*l2) anyway;
|
||||
// a swap also transposes the meanings of insertion and deletion.
|
||||
if l1 > l2 {
|
||||
str1, str2, l1, l2, insCost, delCost = str2, str1, l2, l1, delCost, insCost
|
||||
}
|
||||
d := make([]int, l1+1)
|
||||
|
||||
for y = 1; y <= l1; y++ {
|
||||
d[y] = y * delCost
|
||||
}
|
||||
for x := 0; x < l2; x++ {
|
||||
dy, d[0] = d[0], d[0]+insCost
|
||||
for y = 0; y < l1; dy, d[y] = d[y], dy {
|
||||
if str1[y] != str2[x] {
|
||||
dy += subCost
|
||||
}
|
||||
if c = d[y] + delCost; c < dy {
|
||||
dy = c
|
||||
}
|
||||
y++
|
||||
if c = d[y] + insCost; c < dy {
|
||||
dy = c
|
||||
}
|
||||
}
|
||||
}
|
||||
dist = d[l1]
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Distance returns the Levenshtein distance between str1 and str2, using the
|
||||
// default or provided cost values. Pass nil for the third argument to use the
|
||||
// default cost of 1 for all three operations, with no maximum.
|
||||
func Distance(str1, str2 string, p *Params) int {
|
||||
if p == nil {
|
||||
p = defaultParams
|
||||
}
|
||||
dist, _, _ := Calculate([]rune(str1), []rune(str2), p.maxCost, p.insCost, p.subCost, p.delCost)
|
||||
return dist
|
||||
}
|
||||
|
||||
// Similarity returns a score in the range of 0..1 for how similar the two strings are.
|
||||
// A score of 1 means the strings are identical, and 0 means they have nothing in common.
|
||||
//
|
||||
// A nil third argument uses the default cost of 1 for all three operations.
|
||||
//
|
||||
// If a non-zero MinScore value is provided in the parameters, scores lower than it
|
||||
// will be returned as 0.
|
||||
func Similarity(str1, str2 string, p *Params) float64 {
|
||||
return Match(str1, str2, p.Clone().BonusThreshold(1.1)) // guaranteed no bonus
|
||||
}
|
||||
|
||||
// Match returns a similarity score adjusted by the same method as proposed by Winkler for
|
||||
// the Jaro distance - giving a bonus to string pairs that share a common prefix, only if their
|
||||
// similarity score is already over a threshold.
|
||||
//
|
||||
// The score is in the range of 0..1, with 1 meaning the strings are identical,
|
||||
// and 0 meaning they have nothing in common.
|
||||
//
|
||||
// A nil third argument uses the default cost of 1 for all three operations, maximum length of
|
||||
// common prefix to consider for bonus of 4, scaling factor of 0.1, and bonus threshold of 0.7.
|
||||
//
|
||||
// If a non-zero MinScore value is provided in the parameters, scores lower than it
|
||||
// will be returned as 0.
|
||||
func Match(str1, str2 string, p *Params) float64 {
|
||||
s1, s2 := []rune(str1), []rune(str2)
|
||||
l1, l2 := len(s1), len(s2)
|
||||
// two empty strings are identical; shortcut also avoids divByZero issues later on.
|
||||
if l1 == 0 && l2 == 0 {
|
||||
return 1
|
||||
}
|
||||
|
||||
if p == nil {
|
||||
p = defaultParams
|
||||
}
|
||||
|
||||
// a min over 1 can never be satisfied, so the score is 0.
|
||||
if p.minScore > 1 {
|
||||
return 0
|
||||
}
|
||||
|
||||
insCost, delCost, maxDist, max := p.insCost, p.delCost, 0, 0
|
||||
if l1 > l2 {
|
||||
l1, l2, insCost, delCost = l2, l1, delCost, insCost
|
||||
}
|
||||
|
||||
if p.subCost < delCost+insCost {
|
||||
maxDist = l1*p.subCost + (l2-l1)*insCost
|
||||
} else {
|
||||
maxDist = l1*delCost + l2*insCost
|
||||
}
|
||||
|
||||
// a zero min is always satisfied, so no need to set a max cost.
|
||||
if p.minScore > 0 {
|
||||
// if p.minScore is lower than p.bonusThreshold, we can use a simplified formula
|
||||
// for the max cost, because a sim score below min cannot receive a bonus.
|
||||
if p.minScore < p.bonusThreshold {
|
||||
// round down the max - a cost equal to a rounded up max would already be under min.
|
||||
max = int((1 - p.minScore) * float64(maxDist))
|
||||
} else {
|
||||
// p.minScore <= sim + p.bonusPrefix*p.bonusScale*(1-sim)
|
||||
// p.minScore <= (1-dist/maxDist) + p.bonusPrefix*p.bonusScale*(1-(1-dist/maxDist))
|
||||
// p.minScore <= 1 - dist/maxDist + p.bonusPrefix*p.bonusScale*dist/maxDist
|
||||
// 1 - p.minScore >= dist/maxDist - p.bonusPrefix*p.bonusScale*dist/maxDist
|
||||
// (1-p.minScore)*maxDist/(1-p.bonusPrefix*p.bonusScale) >= dist
|
||||
max = int((1 - p.minScore) * float64(maxDist) / (1 - float64(p.bonusPrefix)*p.bonusScale))
|
||||
}
|
||||
}
|
||||
|
||||
dist, pl, _ := Calculate(s1, s2, max, p.insCost, p.subCost, p.delCost)
|
||||
if max > 0 && dist > max {
|
||||
return 0
|
||||
}
|
||||
sim := 1 - float64(dist)/float64(maxDist)
|
||||
|
||||
if sim >= p.bonusThreshold && sim < 1 && p.bonusPrefix > 0 && p.bonusScale > 0 {
|
||||
if pl > p.bonusPrefix {
|
||||
pl = p.bonusPrefix
|
||||
}
|
||||
sim += float64(pl) * p.bonusScale * (1 - sim)
|
||||
}
|
||||
|
||||
if sim < p.minScore {
|
||||
return 0
|
||||
}
|
||||
|
||||
return sim
|
||||
}
|
@ -0,0 +1,152 @@
|
||||
// Copyright 2016 ALRUX Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package levenshtein
|
||||
|
||||
// Params represents a set of parameter values for the various formulas involved
|
||||
// in the calculation of the Levenshtein string metrics.
|
||||
type Params struct {
|
||||
insCost int
|
||||
subCost int
|
||||
delCost int
|
||||
maxCost int
|
||||
minScore float64
|
||||
bonusPrefix int
|
||||
bonusScale float64
|
||||
bonusThreshold float64
|
||||
}
|
||||
|
||||
var (
|
||||
defaultParams = NewParams()
|
||||
)
|
||||
|
||||
// NewParams creates a new set of parameters and initializes it with the default values.
|
||||
func NewParams() *Params {
|
||||
return &Params{
|
||||
insCost: 1,
|
||||
subCost: 1,
|
||||
delCost: 1,
|
||||
maxCost: 0,
|
||||
minScore: 0,
|
||||
bonusPrefix: 4,
|
||||
bonusScale: .1,
|
||||
bonusThreshold: .7,
|
||||
}
|
||||
}
|
||||
|
||||
// Clone returns a pointer to a copy of the receiver parameter set, or of a new
|
||||
// default parameter set if the receiver is nil.
|
||||
func (p *Params) Clone() *Params {
|
||||
if p == nil {
|
||||
return NewParams()
|
||||
}
|
||||
return &Params{
|
||||
insCost: p.insCost,
|
||||
subCost: p.subCost,
|
||||
delCost: p.delCost,
|
||||
maxCost: p.maxCost,
|
||||
minScore: p.minScore,
|
||||
bonusPrefix: p.bonusPrefix,
|
||||
bonusScale: p.bonusScale,
|
||||
bonusThreshold: p.bonusThreshold,
|
||||
}
|
||||
}
|
||||
|
||||
// InsCost overrides the default value of 1 for the cost of insertion.
|
||||
// The new value must be zero or positive.
|
||||
func (p *Params) InsCost(v int) *Params {
|
||||
if v >= 0 {
|
||||
p.insCost = v
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
// SubCost overrides the default value of 1 for the cost of substitution.
|
||||
// The new value must be zero or positive.
|
||||
func (p *Params) SubCost(v int) *Params {
|
||||
if v >= 0 {
|
||||
p.subCost = v
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
// DelCost overrides the default value of 1 for the cost of deletion.
|
||||
// The new value must be zero or positive.
|
||||
func (p *Params) DelCost(v int) *Params {
|
||||
if v >= 0 {
|
||||
p.delCost = v
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
// MaxCost overrides the default value of 0 (meaning unlimited) for the maximum cost.
|
||||
// The calculation of Distance() stops when the result is guaranteed to exceed
|
||||
// this maximum, returning a lower-bound rather than exact value.
|
||||
// The new value must be zero or positive.
|
||||
func (p *Params) MaxCost(v int) *Params {
|
||||
if v >= 0 {
|
||||
p.maxCost = v
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
// MinScore overrides the default value of 0 for the minimum similarity score.
|
||||
// Scores below this threshold are returned as 0 by Similarity() and Match().
|
||||
// The new value must be zero or positive. Note that a minimum greater than 1
|
||||
// can never be satisfied, resulting in a score of 0 for any pair of strings.
|
||||
func (p *Params) MinScore(v float64) *Params {
|
||||
if v >= 0 {
|
||||
p.minScore = v
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
// BonusPrefix overrides the default value for the maximum length of
|
||||
// common prefix to be considered for bonus by Match().
|
||||
// The new value must be zero or positive.
|
||||
func (p *Params) BonusPrefix(v int) *Params {
|
||||
if v >= 0 {
|
||||
p.bonusPrefix = v
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
// BonusScale overrides the default value for the scaling factor used by Match()
|
||||
// in calculating the bonus.
|
||||
// The new value must be zero or positive. To guarantee that the similarity score
|
||||
// remains in the interval 0..1, this scaling factor is not allowed to exceed
|
||||
// 1 / BonusPrefix.
|
||||
func (p *Params) BonusScale(v float64) *Params {
|
||||
if v >= 0 {
|
||||
p.bonusScale = v
|
||||
}
|
||||
|
||||
// the bonus cannot exceed (1-sim), or the score may become greater than 1.
|
||||
if float64(p.bonusPrefix)*p.bonusScale > 1 {
|
||||
p.bonusScale = 1 / float64(p.bonusPrefix)
|
||||
}
|
||||
|
||||
return p
|
||||
}
|
||||
|
||||
// BonusThreshold overrides the default value for the minimum similarity score
|
||||
// for which Match() can assign a bonus.
|
||||
// The new value must be zero or positive. Note that a threshold greater than 1
|
||||
// effectively makes Match() become the equivalent of Similarity().
|
||||
func (p *Params) BonusThreshold(v float64) *Params {
|
||||
if v >= 0 {
|
||||
p.bonusThreshold = v
|
||||
}
|
||||
return p
|
||||
}
|
@ -0,0 +1,95 @@
|
||||
Copyright (c) 2017 Martin Atkins
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
---------
|
||||
|
||||
Unicode table generation programs are under a separate copyright and license:
|
||||
|
||||
Copyright (c) 2014 Couchbase, Inc.
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
except in compliance with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
either express or implied. See the License for the specific language governing permissions
|
||||
and limitations under the License.
|
||||
|
||||
---------
|
||||
|
||||
Grapheme break data is provided as part of the Unicode character database,
|
||||
copright 2016 Unicode, Inc, which is provided with the following license:
|
||||
|
||||
Unicode Data Files include all data files under the directories
|
||||
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
|
||||
http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
|
||||
http://www.unicode.org/utility/trac/browser/.
|
||||
|
||||
Unicode Data Files do not include PDF online code charts under the
|
||||
directory http://www.unicode.org/Public/.
|
||||
|
||||
Software includes any source code published in the Unicode Standard
|
||||
or under the directories
|
||||
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
|
||||
http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
|
||||
http://www.unicode.org/utility/trac/browser/.
|
||||
|
||||
NOTICE TO USER: Carefully read the following legal agreement.
|
||||
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
|
||||
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
|
||||
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
|
||||
TERMS AND CONDITIONS OF THIS AGREEMENT.
|
||||
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
|
||||
THE DATA FILES OR SOFTWARE.
|
||||
|
||||
COPYRIGHT AND PERMISSION NOTICE
|
||||
|
||||
Copyright © 1991-2017 Unicode, Inc. All rights reserved.
|
||||
Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of the Unicode data files and any associated documentation
|
||||
(the "Data Files") or Unicode software and any associated documentation
|
||||
(the "Software") to deal in the Data Files or Software
|
||||
without restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, and/or sell copies of
|
||||
the Data Files or Software, and to permit persons to whom the Data Files
|
||||
or Software are furnished to do so, provided that either
|
||||
(a) this copyright and permission notice appear with all copies
|
||||
of the Data Files or Software, or
|
||||
(b) this copyright and permission notice appear in associated
|
||||
Documentation.
|
||||
|
||||
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
||||
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
|
||||
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
|
||||
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
|
||||
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
|
||||
Except as contained in this notice, the name of a copyright holder
|
||||
shall not be used in advertising or otherwise to promote the sale,
|
||||
use or other dealings in these Data Files or Software without prior
|
||||
written authorization of the copyright holder.
|
@ -0,0 +1,30 @@
|
||||
package textseg
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
)
|
||||
|
||||
// AllTokens is a utility that uses a bufio.SplitFunc to produce a slice of
|
||||
// all of the recognized tokens in the given buffer.
|
||||
func AllTokens(buf []byte, splitFunc bufio.SplitFunc) ([][]byte, error) {
|
||||
scanner := bufio.NewScanner(bytes.NewReader(buf))
|
||||
scanner.Split(splitFunc)
|
||||
var ret [][]byte
|
||||
for scanner.Scan() {
|
||||
ret = append(ret, scanner.Bytes())
|
||||
}
|
||||
return ret, scanner.Err()
|
||||
}
|
||||
|
||||
// TokenCount is a utility that uses a bufio.SplitFunc to count the number of
|
||||
// recognized tokens in the given buffer.
|
||||
func TokenCount(buf []byte, splitFunc bufio.SplitFunc) (int, error) {
|
||||
scanner := bufio.NewScanner(bytes.NewReader(buf))
|
||||
scanner.Split(splitFunc)
|
||||
var ret int
|
||||
for scanner.Scan() {
|
||||
ret++
|
||||
}
|
||||
return ret, scanner.Err()
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
package textseg
|
||||
|
||||
//go:generate go run make_tables.go -output tables.go
|
||||
//go:generate go run make_test_tables.go -output tables_test.go
|
||||
//go:generate ruby unicode2ragel.rb --url=http://www.unicode.org/Public/9.0.0/ucd/auxiliary/GraphemeBreakProperty.txt -m GraphemeCluster -p "Prepend,CR,LF,Control,Extend,Regional_Indicator,SpacingMark,L,V,T,LV,LVT,E_Base,E_Modifier,ZWJ,Glue_After_Zwj,E_Base_GAZ" -o grapheme_clusters_table.rl
|
||||
//go:generate ragel -Z grapheme_clusters.rl
|
||||
//go:generate gofmt -w grapheme_clusters.go
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,132 @@
|
||||
package textseg
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Generated from grapheme_clusters.rl. DO NOT EDIT
|
||||
%%{
|
||||
# (except you are actually in grapheme_clusters.rl here, so edit away!)
|
||||
|
||||
machine graphclust;
|
||||
write data;
|
||||
}%%
|
||||
|
||||
var Error = errors.New("invalid UTF8 text")
|
||||
|
||||
// ScanGraphemeClusters is a split function for bufio.Scanner that splits
|
||||
// on grapheme cluster boundaries.
|
||||
func ScanGraphemeClusters(data []byte, atEOF bool) (int, []byte, error) {
|
||||
if len(data) == 0 {
|
||||
return 0, nil, nil
|
||||
}
|
||||
|
||||
// Ragel state
|
||||
cs := 0 // Current State
|
||||
p := 0 // "Pointer" into data
|
||||
pe := len(data) // End-of-data "pointer"
|
||||
ts := 0
|
||||
te := 0
|
||||
act := 0
|
||||
eof := pe
|
||||
|
||||
// Make Go compiler happy
|
||||
_ = ts
|
||||
_ = te
|
||||
_ = act
|
||||
_ = eof
|
||||
|
||||
startPos := 0
|
||||
endPos := 0
|
||||
|
||||
%%{
|
||||
include GraphemeCluster "grapheme_clusters_table.rl";
|
||||
|
||||
action start {
|
||||
startPos = p
|
||||
}
|
||||
|
||||
action end {
|
||||
endPos = p
|
||||
}
|
||||
|
||||
action emit {
|
||||
return endPos+1, data[startPos:endPos+1], nil
|
||||
}
|
||||
|
||||
ZWJGlue = ZWJ (Glue_After_Zwj | E_Base_GAZ Extend* E_Modifier?)?;
|
||||
AnyExtender = Extend | ZWJGlue | SpacingMark;
|
||||
Extension = AnyExtender*;
|
||||
ReplacementChar = (0xEF 0xBF 0xBD);
|
||||
|
||||
CRLFSeq = CR LF;
|
||||
ControlSeq = Control | ReplacementChar;
|
||||
HangulSeq = (
|
||||
L+ (((LV? V+ | LVT) T*)?|LV?) |
|
||||
LV V* T* |
|
||||
V+ T* |
|
||||
LVT T* |
|
||||
T+
|
||||
) Extension;
|
||||
EmojiSeq = (E_Base | E_Base_GAZ) Extend* E_Modifier? Extension;
|
||||
ZWJSeq = ZWJGlue Extension;
|
||||
EmojiFlagSeq = Regional_Indicator Regional_Indicator? Extension;
|
||||
|
||||
UTF8Cont = 0x80 .. 0xBF;
|
||||
AnyUTF8 = (
|
||||
0x00..0x7F |
|
||||
0xC0..0xDF . UTF8Cont |
|
||||
0xE0..0xEF . UTF8Cont . UTF8Cont |
|
||||
0xF0..0xF7 . UTF8Cont . UTF8Cont . UTF8Cont
|
||||
);
|
||||
|
||||
# OtherSeq is any character that isn't at the start of one of the extended sequences above, followed by extension
|
||||
OtherSeq = (AnyUTF8 - (CR|LF|Control|ReplacementChar|L|LV|V|LVT|T|E_Base|E_Base_GAZ|ZWJ|Regional_Indicator|Prepend)) Extension;
|
||||
|
||||
# PrependSeq is prepend followed by any of the other patterns above, except control characters which explicitly break
|
||||
PrependSeq = Prepend+ (HangulSeq|EmojiSeq|ZWJSeq|EmojiFlagSeq|OtherSeq)?;
|
||||
|
||||
CRLFTok = CRLFSeq >start @end;
|
||||
ControlTok = ControlSeq >start @end;
|
||||
HangulTok = HangulSeq >start @end;
|
||||
EmojiTok = EmojiSeq >start @end;
|
||||
ZWJTok = ZWJSeq >start @end;
|
||||
EmojiFlagTok = EmojiFlagSeq >start @end;
|
||||
OtherTok = OtherSeq >start @end;
|
||||
PrependTok = PrependSeq >start @end;
|
||||
|
||||
main := |*
|
||||
CRLFTok => emit;
|
||||
ControlTok => emit;
|
||||
HangulTok => emit;
|
||||
EmojiTok => emit;
|
||||
ZWJTok => emit;
|
||||
EmojiFlagTok => emit;
|
||||
PrependTok => emit;
|
||||
OtherTok => emit;
|
||||
|
||||
# any single valid UTF-8 character would also be valid per spec,
|
||||
# but we'll handle that separately after the loop so we can deal
|
||||
# with requesting more bytes if we're not at EOF.
|
||||
*|;
|
||||
|
||||
write init;
|
||||
write exec;
|
||||
}%%
|
||||
|
||||
// If we fall out here then we were unable to complete a sequence.
|
||||
// If we weren't able to complete a sequence then either we've
|
||||
// reached the end of a partial buffer (so there's more data to come)
|
||||
// or we have an isolated symbol that would normally be part of a
|
||||
// grapheme cluster but has appeared in isolation here.
|
||||
|
||||
if !atEOF {
|
||||
// Request more
|
||||
return 0, nil, nil
|
||||
}
|
||||
|
||||
// Just take the first UTF-8 sequence and return that.
|
||||
_, seqLen := utf8.DecodeRune(data)
|
||||
return seqLen, data[:seqLen], nil
|
||||
}
|
1583
vendor/github.com/apparentlymart/go-textseg/textseg/grapheme_clusters_table.rl
generated
vendored
1583
vendor/github.com/apparentlymart/go-textseg/textseg/grapheme_clusters_table.rl
generated
vendored
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,335 @@
|
||||
#!/usr/bin/env ruby
|
||||
#
|
||||
# This scripted has been updated to accept more command-line arguments:
|
||||
#
|
||||
# -u, --url URL to process
|
||||
# -m, --machine Machine name
|
||||
# -p, --properties Properties to add to the machine
|
||||
# -o, --output Write output to file
|
||||
#
|
||||
# Updated by: Marty Schoch <marty.schoch@gmail.com>
|
||||
#
|
||||
# This script uses the unicode spec to generate a Ragel state machine
|
||||
# that recognizes unicode alphanumeric characters. It generates 5
|
||||
# character classes: uupper, ulower, ualpha, udigit, and ualnum.
|
||||
# Currently supported encodings are UTF-8 [default] and UCS-4.
|
||||
#
|
||||
# Usage: unicode2ragel.rb [options]
|
||||
# -e, --encoding [ucs4 | utf8] Data encoding
|
||||
# -h, --help Show this message
|
||||
#
|
||||
# This script was originally written as part of the Ferret search
|
||||
# engine library.
|
||||
#
|
||||
# Author: Rakan El-Khalil <rakan@well.com>
|
||||
|
||||
require 'optparse'
|
||||
require 'open-uri'
|
||||
|
||||
ENCODINGS = [ :utf8, :ucs4 ]
|
||||
ALPHTYPES = { :utf8 => "byte", :ucs4 => "rune" }
|
||||
DEFAULT_CHART_URL = "http://www.unicode.org/Public/5.1.0/ucd/DerivedCoreProperties.txt"
|
||||
DEFAULT_MACHINE_NAME= "WChar"
|
||||
|
||||
###
|
||||
# Display vars & default option
|
||||
|
||||
TOTAL_WIDTH = 80
|
||||
RANGE_WIDTH = 23
|
||||
@encoding = :utf8
|
||||
@chart_url = DEFAULT_CHART_URL
|
||||
machine_name = DEFAULT_MACHINE_NAME
|
||||
properties = []
|
||||
@output = $stdout
|
||||
|
||||
###
|
||||
# Option parsing
|
||||
|
||||
cli_opts = OptionParser.new do |opts|
|
||||
opts.on("-e", "--encoding [ucs4 | utf8]", "Data encoding") do |o|
|
||||
@encoding = o.downcase.to_sym
|
||||
end
|
||||
opts.on("-h", "--help", "Show this message") do
|
||||
puts opts
|
||||
exit
|
||||
end
|
||||
opts.on("-u", "--url URL", "URL to process") do |o|
|
||||
@chart_url = o
|
||||
end
|
||||
opts.on("-m", "--machine MACHINE_NAME", "Machine name") do |o|
|
||||
machine_name = o
|
||||
end
|
||||
opts.on("-p", "--properties x,y,z", Array, "Properties to add to machine") do |o|
|
||||
properties = o
|
||||
end
|
||||
opts.on("-o", "--output FILE", "output file") do |o|
|
||||
@output = File.new(o, "w+")
|
||||
end
|
||||
end
|
||||
|
||||
cli_opts.parse(ARGV)
|
||||
unless ENCODINGS.member? @encoding
|
||||
puts "Invalid encoding: #{@encoding}"
|
||||
puts cli_opts
|
||||
exit
|
||||
end
|
||||
|
||||
##
|
||||
# Downloads the document at url and yields every alpha line's hex
|
||||
# range and description.
|
||||
|
||||
def each_alpha( url, property )
|
||||
open( url ) do |file|
|
||||
file.each_line do |line|
|
||||
next if line =~ /^#/;
|
||||
next if line !~ /; #{property} #/;
|
||||
|
||||
range, description = line.split(/;/)
|
||||
range.strip!
|
||||
description.gsub!(/.*#/, '').strip!
|
||||
|
||||
if range =~ /\.\./
|
||||
start, stop = range.split '..'
|
||||
else start = stop = range
|
||||
end
|
||||
|
||||
yield start.hex .. stop.hex, description
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
###
|
||||
# Formats to hex at minimum width
|
||||
|
||||
def to_hex( n )
|
||||
r = "%0X" % n
|
||||
r = "0#{r}" unless (r.length % 2).zero?
|
||||
r
|
||||
end
|
||||
|
||||
###
|
||||
# UCS4 is just a straight hex conversion of the unicode codepoint.
|
||||
|
||||
def to_ucs4( range )
|
||||
rangestr = "0x" + to_hex(range.begin)
|
||||
rangestr << "..0x" + to_hex(range.end) if range.begin != range.end
|
||||
[ rangestr ]
|
||||
end
|
||||
|
||||
##
|
||||
# 0x00 - 0x7f -> 0zzzzzzz[7]
|
||||
# 0x80 - 0x7ff -> 110yyyyy[5] 10zzzzzz[6]
|
||||
# 0x800 - 0xffff -> 1110xxxx[4] 10yyyyyy[6] 10zzzzzz[6]
|
||||
# 0x010000 - 0x10ffff -> 11110www[3] 10xxxxxx[6] 10yyyyyy[6] 10zzzzzz[6]
|
||||
|
||||
UTF8_BOUNDARIES = [0x7f, 0x7ff, 0xffff, 0x10ffff]
|
||||
|
||||
def to_utf8_enc( n )
|
||||
r = 0
|
||||
if n <= 0x7f
|
||||
r = n
|
||||
elsif n <= 0x7ff
|
||||
y = 0xc0 | (n >> 6)
|
||||
z = 0x80 | (n & 0x3f)
|
||||
r = y << 8 | z
|
||||
elsif n <= 0xffff
|
||||
x = 0xe0 | (n >> 12)
|
||||
y = 0x80 | (n >> 6) & 0x3f
|
||||
z = 0x80 | n & 0x3f
|
||||
r = x << 16 | y << 8 | z
|
||||
elsif n <= 0x10ffff
|
||||
w = 0xf0 | (n >> 18)
|
||||
x = 0x80 | (n >> 12) & 0x3f
|
||||
y = 0x80 | (n >> 6) & 0x3f
|
||||
z = 0x80 | n & 0x3f
|
||||
r = w << 24 | x << 16 | y << 8 | z
|
||||
end
|
||||
|
||||
to_hex(r)
|
||||
end
|
||||
|
||||
def from_utf8_enc( n )
|
||||
n = n.hex
|
||||
r = 0
|
||||
if n <= 0x7f
|
||||
r = n
|
||||
elsif n <= 0xdfff
|
||||
y = (n >> 8) & 0x1f
|
||||
z = n & 0x3f
|
||||
r = y << 6 | z
|
||||
elsif n <= 0xefffff
|
||||
x = (n >> 16) & 0x0f
|
||||
y = (n >> 8) & 0x3f
|
||||
z = n & 0x3f
|
||||
r = x << 10 | y << 6 | z
|
||||
elsif n <= 0xf7ffffff
|
||||
w = (n >> 24) & 0x07
|
||||
x = (n >> 16) & 0x3f
|
||||
y = (n >> 8) & 0x3f
|
||||
z = n & 0x3f
|
||||
r = w << 18 | x << 12 | y << 6 | z
|
||||
end
|
||||
r
|
||||
end
|
||||
|
||||
###
|
||||
# Given a range, splits it up into ranges that can be continuously
|
||||
# encoded into utf8. Eg: 0x00 .. 0xff => [0x00..0x7f, 0x80..0xff]
|
||||
# This is not strictly needed since the current [5.1] unicode standard
|
||||
# doesn't have ranges that straddle utf8 boundaries. This is included
|
||||
# for completeness as there is no telling if that will ever change.
|
||||
|
||||
def utf8_ranges( range )
|
||||
ranges = []
|
||||
UTF8_BOUNDARIES.each do |max|
|
||||
if range.begin <= max
|
||||
if range.end <= max
|
||||
ranges << range
|
||||
return ranges
|
||||
end
|
||||
|
||||
ranges << (range.begin .. max)
|
||||
range = (max + 1) .. range.end
|
||||
end
|
||||
end
|
||||
ranges
|
||||
end
|
||||
|
||||
def build_range( start, stop )
|
||||
size = start.size/2
|
||||
left = size - 1
|
||||
return [""] if size < 1
|
||||
|
||||
a = start[0..1]
|
||||
b = stop[0..1]
|
||||
|
||||
###
|
||||
# Shared prefix
|
||||
|
||||
if a == b
|
||||
return build_range(start[2..-1], stop[2..-1]).map do |elt|
|
||||
"0x#{a} " + elt
|
||||
end
|
||||
end
|
||||
|
||||
###
|
||||
# Unshared prefix, end of run
|
||||
|
||||
return ["0x#{a}..0x#{b} "] if left.zero?
|
||||
|
||||
###
|
||||
# Unshared prefix, not end of run
|
||||
# Range can be 0x123456..0x56789A
|
||||
# Which is equivalent to:
|
||||
# 0x123456 .. 0x12FFFF
|
||||
# 0x130000 .. 0x55FFFF
|
||||
# 0x560000 .. 0x56789A
|
||||
|
||||
ret = []
|
||||
ret << build_range(start, a + "FF" * left)
|
||||
|
||||
###
|
||||
# Only generate middle range if need be.
|
||||
|
||||
if a.hex+1 != b.hex
|
||||
max = to_hex(b.hex - 1)
|
||||
max = "FF" if b == "FF"
|
||||
ret << "0x#{to_hex(a.hex+1)}..0x#{max} " + "0x00..0xFF " * left
|
||||
end
|
||||
|
||||
###
|
||||
# Don't generate last range if it is covered by first range
|
||||
|
||||
ret << build_range(b + "00" * left, stop) unless b == "FF"
|
||||
ret.flatten!
|
||||
end
|
||||
|
||||
def to_utf8( range )
|
||||
utf8_ranges( range ).map do |r|
|
||||
begin_enc = to_utf8_enc(r.begin)
|
||||
end_enc = to_utf8_enc(r.end)
|
||||
build_range begin_enc, end_enc
|
||||
end.flatten!
|
||||
end
|
||||
|
||||
##
|
||||
# Perform a 3-way comparison of the number of codepoints advertised by
|
||||
# the unicode spec for the given range, the originally parsed range,
|
||||
# and the resulting utf8 encoded range.
|
||||
|
||||
def count_codepoints( code )
|
||||
code.split(' ').inject(1) do |acc, elt|
|
||||
if elt =~ /0x(.+)\.\.0x(.+)/
|
||||
if @encoding == :utf8
|
||||
acc * (from_utf8_enc($2) - from_utf8_enc($1) + 1)
|
||||
else
|
||||
acc * ($2.hex - $1.hex + 1)
|
||||
end
|
||||
else
|
||||
acc
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def is_valid?( range, desc, codes )
|
||||
spec_count = 1
|
||||
spec_count = $1.to_i if desc =~ /\[(\d+)\]/
|
||||
range_count = range.end - range.begin + 1
|
||||
|
||||
sum = codes.inject(0) { |acc, elt| acc + count_codepoints(elt) }
|
||||
sum == spec_count and sum == range_count
|
||||
end
|
||||
|
||||
##
|
||||
# Generate the state maching to stdout
|
||||
|
||||
def generate_machine( name, property )
|
||||
pipe = " "
|
||||
@output.puts " #{name} = "
|
||||
each_alpha( @chart_url, property ) do |range, desc|
|
||||
|
||||
codes = (@encoding == :ucs4) ? to_ucs4(range) : to_utf8(range)
|
||||
|
||||
#raise "Invalid encoding of range #{range}: #{codes.inspect}" unless
|
||||
# is_valid? range, desc, codes
|
||||
|
||||
range_width = codes.map { |a| a.size }.max
|
||||
range_width = RANGE_WIDTH if range_width < RANGE_WIDTH
|
||||
|
||||
desc_width = TOTAL_WIDTH - RANGE_WIDTH - 11
|
||||
desc_width -= (range_width - RANGE_WIDTH) if range_width > RANGE_WIDTH
|
||||
|
||||
if desc.size > desc_width
|
||||
desc = desc[0..desc_width - 4] + "..."
|
||||
end
|
||||
|
||||
codes.each_with_index do |r, idx|
|
||||
desc = "" unless idx.zero?
|
||||
code = "%-#{range_width}s" % r
|
||||
@output.puts " #{pipe} #{code} ##{desc}"
|
||||
pipe = "|"
|
||||
end
|
||||
end
|
||||
@output.puts " ;"
|
||||
@output.puts ""
|
||||
end
|
||||
|
||||
@output.puts <<EOF
|
||||
# The following Ragel file was autogenerated with #{$0}
|
||||
# from: #{@chart_url}
|
||||
#
|
||||
# It defines #{properties}.
|
||||
#
|
||||
# To use this, make sure that your alphtype is set to #{ALPHTYPES[@encoding]},
|
||||
# and that your input is in #{@encoding}.
|
||||
|
||||
%%{
|
||||
machine #{machine_name};
|
||||
|
||||
EOF
|
||||
|
||||
properties.each { |x| generate_machine( x, x ) }
|
||||
|
||||
@output.puts <<EOF
|
||||
}%%
|
||||
EOF
|
@ -0,0 +1,19 @@
|
||||
package textseg
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
// ScanGraphemeClusters is a split function for bufio.Scanner that splits
|
||||
// on UTF8 sequence boundaries.
|
||||
//
|
||||
// This is included largely for completeness, since this behavior is already
|
||||
// built in to Go when ranging over a string.
|
||||
func ScanUTF8Sequences(data []byte, atEOF bool) (int, []byte, error) {
|
||||
if len(data) == 0 {
|
||||
return 0, nil, nil
|
||||
}
|
||||
r, seqLen := utf8.DecodeRune(data)
|
||||
if r == utf8.RuneError && !atEOF {
|
||||
return 0, nil, nil
|
||||
}
|
||||
return seqLen, data[:seqLen], nil
|
||||
}
|
@ -0,0 +1,95 @@
|
||||
Copyright (c) 2017 Martin Atkins
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
---------
|
||||
|
||||
Unicode table generation programs are under a separate copyright and license:
|
||||
|
||||
Copyright (c) 2014 Couchbase, Inc.
|
||||
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
except in compliance with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
either express or implied. See the License for the specific language governing permissions
|
||||
and limitations under the License.
|
||||
|
||||
---------
|
||||
|
||||
Grapheme break data is provided as part of the Unicode character database,
|
||||
copright 2016 Unicode, Inc, which is provided with the following license:
|
||||
|
||||
Unicode Data Files include all data files under the directories
|
||||
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
|
||||
http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
|
||||
http://www.unicode.org/utility/trac/browser/.
|
||||
|
||||
Unicode Data Files do not include PDF online code charts under the
|
||||
directory http://www.unicode.org/Public/.
|
||||
|
||||
Software includes any source code published in the Unicode Standard
|
||||
or under the directories
|
||||
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
|
||||
http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
|
||||
http://www.unicode.org/utility/trac/browser/.
|
||||
|
||||
NOTICE TO USER: Carefully read the following legal agreement.
|
||||
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
|
||||
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
|
||||
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
|
||||
TERMS AND CONDITIONS OF THIS AGREEMENT.
|
||||
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
|
||||
THE DATA FILES OR SOFTWARE.
|
||||
|
||||
COPYRIGHT AND PERMISSION NOTICE
|
||||
|
||||
Copyright © 1991-2017 Unicode, Inc. All rights reserved.
|
||||
Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of the Unicode data files and any associated documentation
|
||||
(the "Data Files") or Unicode software and any associated documentation
|
||||
(the "Software") to deal in the Data Files or Software
|
||||
without restriction, including without limitation the rights to use,
|
||||
copy, modify, merge, publish, distribute, and/or sell copies of
|
||||
the Data Files or Software, and to permit persons to whom the Data Files
|
||||
or Software are furnished to do so, provided that either
|
||||
(a) this copyright and permission notice appear with all copies
|
||||
of the Data Files or Software, or
|
||||
(b) this copyright and permission notice appear in associated
|
||||
Documentation.
|
||||
|
||||
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
|
||||
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
|
||||
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
|
||||
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
|
||||
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
|
||||
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
|
||||
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
|
||||
|
||||
Except as contained in this notice, the name of a copyright holder
|
||||
shall not be used in advertising or otherwise to promote the sale,
|
||||
use or other dealings in these Data Files or Software without prior
|
||||
written authorization of the copyright holder.
|
@ -0,0 +1,30 @@
|
||||
package textseg
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
)
|
||||
|
||||
// AllTokens is a utility that uses a bufio.SplitFunc to produce a slice of
|
||||
// all of the recognized tokens in the given buffer.
|
||||
func AllTokens(buf []byte, splitFunc bufio.SplitFunc) ([][]byte, error) {
|
||||
scanner := bufio.NewScanner(bytes.NewReader(buf))
|
||||
scanner.Split(splitFunc)
|
||||
var ret [][]byte
|
||||
for scanner.Scan() {
|
||||
ret = append(ret, scanner.Bytes())
|
||||
}
|
||||
return ret, scanner.Err()
|
||||
}
|
||||
|
||||
// TokenCount is a utility that uses a bufio.SplitFunc to count the number of
|
||||
// recognized tokens in the given buffer.
|
||||
func TokenCount(buf []byte, splitFunc bufio.SplitFunc) (int, error) {
|
||||
scanner := bufio.NewScanner(bytes.NewReader(buf))
|
||||
scanner.Split(splitFunc)
|
||||
var ret int
|
||||
for scanner.Scan() {
|
||||
ret++
|
||||
}
|
||||
return ret, scanner.Err()
|
||||
}
|
@ -0,0 +1,8 @@
|
||||
package textseg
|
||||
|
||||
//go:generate go run make_tables.go -output tables.go
|
||||
//go:generate go run make_test_tables.go -output tables_test.go
|
||||
//go:generate ruby unicode2ragel.rb --url=https://www.unicode.org/Public/12.0.0/ucd/auxiliary/GraphemeBreakProperty.txt -m GraphemeCluster -p "Prepend,CR,LF,Control,Extend,Regional_Indicator,SpacingMark,L,V,T,LV,LVT,ZWJ" -o grapheme_clusters_table.rl
|
||||
//go:generate ruby unicode2ragel.rb --url=https://www.unicode.org/Public/emoji/12.0/emoji-data.txt -m Emoji -p "Extended_Pictographic" -o emoji_table.rl
|
||||
//go:generate ragel -Z grapheme_clusters.rl
|
||||
//go:generate gofmt -w grapheme_clusters.go
|
4078
vendor/github.com/apparentlymart/go-textseg/v12/textseg/grapheme_clusters.go
generated
vendored
4078
vendor/github.com/apparentlymart/go-textseg/v12/textseg/grapheme_clusters.go
generated
vendored
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,133 @@
|
||||
package textseg
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Generated from grapheme_clusters.rl. DO NOT EDIT
|
||||
%%{
|
||||
# (except you are actually in grapheme_clusters.rl here, so edit away!)
|
||||
|
||||
machine graphclust;
|
||||
write data;
|
||||
}%%
|
||||
|
||||
var Error = errors.New("invalid UTF8 text")
|
||||
|
||||
// ScanGraphemeClusters is a split function for bufio.Scanner that splits
|
||||
// on grapheme cluster boundaries.
|
||||
func ScanGraphemeClusters(data []byte, atEOF bool) (int, []byte, error) {
|
||||
if len(data) == 0 {
|
||||
return 0, nil, nil
|
||||
}
|
||||
|
||||
// Ragel state
|
||||
cs := 0 // Current State
|
||||
p := 0 // "Pointer" into data
|
||||
pe := len(data) // End-of-data "pointer"
|
||||
ts := 0
|
||||
te := 0
|
||||
act := 0
|
||||
eof := pe
|
||||
|
||||
// Make Go compiler happy
|
||||
_ = ts
|
||||
_ = te
|
||||
_ = act
|
||||
_ = eof
|
||||
|
||||
startPos := 0
|
||||
endPos := 0
|
||||
|
||||
%%{
|
||||
include GraphemeCluster "grapheme_clusters_table.rl";
|
||||
include Emoji "emoji_table.rl";
|
||||
|
||||
action start {
|
||||
startPos = p
|
||||
}
|
||||
|
||||
action end {
|
||||
endPos = p
|
||||
}
|
||||
|
||||
action emit {
|
||||
return endPos+1, data[startPos:endPos+1], nil
|
||||
}
|
||||
|
||||
ZWJGlue = ZWJ (Extended_Pictographic Extend*)?;
|
||||
AnyExtender = Extend | ZWJGlue | SpacingMark;
|
||||
Extension = AnyExtender*;
|
||||
ReplacementChar = (0xEF 0xBF 0xBD);
|
||||
|
||||
CRLFSeq = CR LF;
|
||||
ControlSeq = Control | ReplacementChar;
|
||||
HangulSeq = (
|
||||
L+ (((LV? V+ | LVT) T*)?|LV?) |
|
||||
LV V* T* |
|
||||
V+ T* |
|
||||
LVT T* |
|
||||
T+
|
||||
) Extension;
|
||||
EmojiSeq = Extended_Pictographic Extend* Extension;
|
||||
ZWJSeq = ZWJ (ZWJ | Extend | SpacingMark)*;
|
||||
EmojiFlagSeq = Regional_Indicator Regional_Indicator? Extension;
|
||||
|
||||
UTF8Cont = 0x80 .. 0xBF;
|
||||
AnyUTF8 = (
|
||||
0x00..0x7F |
|
||||
0xC0..0xDF . UTF8Cont |
|
||||
0xE0..0xEF . UTF8Cont . UTF8Cont |
|
||||
0xF0..0xF7 . UTF8Cont . UTF8Cont . UTF8Cont
|
||||
);
|
||||
|
||||
# OtherSeq is any character that isn't at the start of one of the extended sequences above, followed by extension
|
||||
OtherSeq = (AnyUTF8 - (CR|LF|Control|ReplacementChar|L|LV|V|LVT|T|Extended_Pictographic|ZWJ|Regional_Indicator|Prepend)) (Extend | ZWJ | SpacingMark)*;
|
||||
|
||||
# PrependSeq is prepend followed by any of the other patterns above, except control characters which explicitly break
|
||||
PrependSeq = Prepend+ (HangulSeq|EmojiSeq|ZWJSeq|EmojiFlagSeq|OtherSeq)?;
|
||||
|
||||
CRLFTok = CRLFSeq >start @end;
|
||||
ControlTok = ControlSeq >start @end;
|
||||
HangulTok = HangulSeq >start @end;
|
||||
EmojiTok = EmojiSeq >start @end;
|
||||
ZWJTok = ZWJSeq >start @end;
|
||||
EmojiFlagTok = EmojiFlagSeq >start @end;
|
||||
OtherTok = OtherSeq >start @end;
|
||||
PrependTok = PrependSeq >start @end;
|
||||
|
||||
main := |*
|
||||
CRLFTok => emit;
|
||||
ControlTok => emit;
|
||||
HangulTok => emit;
|
||||
EmojiTok => emit;
|
||||
ZWJTok => emit;
|
||||
EmojiFlagTok => emit;
|
||||
PrependTok => emit;
|
||||
OtherTok => emit;
|
||||
|
||||
# any single valid UTF-8 character would also be valid per spec,
|
||||
# but we'll handle that separately after the loop so we can deal
|
||||
# with requesting more bytes if we're not at EOF.
|
||||
*|;
|
||||
|
||||
write init;
|
||||
write exec;
|
||||
}%%
|
||||
|
||||
// If we fall out here then we were unable to complete a sequence.
|
||||
// If we weren't able to complete a sequence then either we've
|
||||
// reached the end of a partial buffer (so there's more data to come)
|
||||
// or we have an isolated symbol that would normally be part of a
|
||||
// grapheme cluster but has appeared in isolation here.
|
||||
|
||||
if !atEOF {
|
||||
// Request more
|
||||
return 0, nil, nil
|
||||
}
|
||||
|
||||
// Just take the first UTF-8 sequence and return that.
|
||||
_, seqLen := utf8.DecodeRune(data)
|
||||
return seqLen, data[:seqLen], nil
|
||||
}
|
1589
vendor/github.com/apparentlymart/go-textseg/v12/textseg/grapheme_clusters_table.rl
generated
vendored
1589
vendor/github.com/apparentlymart/go-textseg/v12/textseg/grapheme_clusters_table.rl
generated
vendored
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,335 @@
|
||||
#!/usr/bin/env ruby
|
||||
#
|
||||
# This scripted has been updated to accept more command-line arguments:
|
||||
#
|
||||
# -u, --url URL to process
|
||||
# -m, --machine Machine name
|
||||
# -p, --properties Properties to add to the machine
|
||||
# -o, --output Write output to file
|
||||
#
|
||||
# Updated by: Marty Schoch <marty.schoch@gmail.com>
|
||||
#
|
||||
# This script uses the unicode spec to generate a Ragel state machine
|
||||
# that recognizes unicode alphanumeric characters. It generates 5
|
||||
# character classes: uupper, ulower, ualpha, udigit, and ualnum.
|
||||
# Currently supported encodings are UTF-8 [default] and UCS-4.
|
||||
#
|
||||
# Usage: unicode2ragel.rb [options]
|
||||
# -e, --encoding [ucs4 | utf8] Data encoding
|
||||
# -h, --help Show this message
|
||||
#
|
||||
# This script was originally written as part of the Ferret search
|
||||
# engine library.
|
||||
#
|
||||
# Author: Rakan El-Khalil <rakan@well.com>
|
||||
|
||||
require 'optparse'
|
||||
require 'open-uri'
|
||||
|
||||
ENCODINGS = [ :utf8, :ucs4 ]
|
||||
ALPHTYPES = { :utf8 => "byte", :ucs4 => "rune" }
|
||||
DEFAULT_CHART_URL = "http://www.unicode.org/Public/5.1.0/ucd/DerivedCoreProperties.txt"
|
||||
DEFAULT_MACHINE_NAME= "WChar"
|
||||
|
||||
###
|
||||
# Display vars & default option
|
||||
|
||||
TOTAL_WIDTH = 80
|
||||
RANGE_WIDTH = 23
|
||||
@encoding = :utf8
|
||||
@chart_url = DEFAULT_CHART_URL
|
||||
machine_name = DEFAULT_MACHINE_NAME
|
||||
properties = []
|
||||
@output = $stdout
|
||||
|
||||
###
|
||||
# Option parsing
|
||||
|
||||
cli_opts = OptionParser.new do |opts|
|
||||
opts.on("-e", "--encoding [ucs4 | utf8]", "Data encoding") do |o|
|
||||
@encoding = o.downcase.to_sym
|
||||
end
|
||||
opts.on("-h", "--help", "Show this message") do
|
||||
puts opts
|
||||
exit
|
||||
end
|
||||
opts.on("-u", "--url URL", "URL to process") do |o|
|
||||
@chart_url = o
|
||||
end
|
||||
opts.on("-m", "--machine MACHINE_NAME", "Machine name") do |o|
|
||||
machine_name = o
|
||||
end
|
||||
opts.on("-p", "--properties x,y,z", Array, "Properties to add to machine") do |o|
|
||||
properties = o
|
||||
end
|
||||
opts.on("-o", "--output FILE", "output file") do |o|
|
||||
@output = File.new(o, "w+")
|
||||
end
|
||||
end
|
||||
|
||||
cli_opts.parse(ARGV)
|
||||
unless ENCODINGS.member? @encoding
|
||||
puts "Invalid encoding: #{@encoding}"
|
||||
puts cli_opts
|
||||
exit
|
||||
end
|
||||
|
||||
##
|
||||
# Downloads the document at url and yields every alpha line's hex
|
||||
# range and description.
|
||||
|
||||
def each_alpha( url, property )
|
||||
open( url ) do |file|
|
||||
file.each_line do |line|
|
||||
next if line =~ /^#/;
|
||||
next if line !~ /; #{property} *#/;
|
||||
|
||||
range, description = line.split(/;/)
|
||||
range.strip!
|
||||
description.gsub!(/.*#/, '').strip!
|
||||
|
||||
if range =~ /\.\./
|
||||
start, stop = range.split '..'
|
||||
else start = stop = range
|
||||
end
|
||||
|
||||
yield start.hex .. stop.hex, description
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
###
|
||||
# Formats to hex at minimum width
|
||||
|
||||
def to_hex( n )
|
||||
r = "%0X" % n
|
||||
r = "0#{r}" unless (r.length % 2).zero?
|
||||
r
|
||||
end
|
||||
|
||||
###
|
||||
# UCS4 is just a straight hex conversion of the unicode codepoint.
|
||||
|
||||
def to_ucs4( range )
|
||||
rangestr = "0x" + to_hex(range.begin)
|
||||
rangestr << "..0x" + to_hex(range.end) if range.begin != range.end
|
||||
[ rangestr ]
|
||||
end
|
||||
|
||||
##
|
||||
# 0x00 - 0x7f -> 0zzzzzzz[7]
|
||||
# 0x80 - 0x7ff -> 110yyyyy[5] 10zzzzzz[6]
|
||||
# 0x800 - 0xffff -> 1110xxxx[4] 10yyyyyy[6] 10zzzzzz[6]
|
||||
# 0x010000 - 0x10ffff -> 11110www[3] 10xxxxxx[6] 10yyyyyy[6] 10zzzzzz[6]
|
||||
|
||||
UTF8_BOUNDARIES = [0x7f, 0x7ff, 0xffff, 0x10ffff]
|
||||
|
||||
def to_utf8_enc( n )
|
||||
r = 0
|
||||
if n <= 0x7f
|
||||
r = n
|
||||
elsif n <= 0x7ff
|
||||
y = 0xc0 | (n >> 6)
|
||||
z = 0x80 | (n & 0x3f)
|
||||
r = y << 8 | z
|
||||
elsif n <= 0xffff
|
||||
x = 0xe0 | (n >> 12)
|
||||
y = 0x80 | (n >> 6) & 0x3f
|
||||
z = 0x80 | n & 0x3f
|
||||
r = x << 16 | y << 8 | z
|
||||
elsif n <= 0x10ffff
|
||||
w = 0xf0 | (n >> 18)
|
||||
x = 0x80 | (n >> 12) & 0x3f
|
||||
y = 0x80 | (n >> 6) & 0x3f
|
||||
z = 0x80 | n & 0x3f
|
||||
r = w << 24 | x << 16 | y << 8 | z
|
||||
end
|
||||
|
||||
to_hex(r)
|
||||
end
|
||||
|
||||
def from_utf8_enc( n )
|
||||
n = n.hex
|
||||
r = 0
|
||||
if n <= 0x7f
|
||||
r = n
|
||||
elsif n <= 0xdfff
|
||||
y = (n >> 8) & 0x1f
|
||||
z = n & 0x3f
|
||||
r = y << 6 | z
|
||||
elsif n <= 0xefffff
|
||||
x = (n >> 16) & 0x0f
|
||||
y = (n >> 8) & 0x3f
|
||||
z = n & 0x3f
|
||||
r = x << 10 | y << 6 | z
|
||||
elsif n <= 0xf7ffffff
|
||||
w = (n >> 24) & 0x07
|
||||
x = (n >> 16) & 0x3f
|
||||
y = (n >> 8) & 0x3f
|
||||
z = n & 0x3f
|
||||
r = w << 18 | x << 12 | y << 6 | z
|
||||
end
|
||||
r
|
||||
end
|
||||
|
||||
###
|
||||
# Given a range, splits it up into ranges that can be continuously
|
||||
# encoded into utf8. Eg: 0x00 .. 0xff => [0x00..0x7f, 0x80..0xff]
|
||||
# This is not strictly needed since the current [5.1] unicode standard
|
||||
# doesn't have ranges that straddle utf8 boundaries. This is included
|
||||
# for completeness as there is no telling if that will ever change.
|
||||
|
||||
def utf8_ranges( range )
|
||||
ranges = []
|
||||
UTF8_BOUNDARIES.each do |max|
|
||||
if range.begin <= max
|
||||
if range.end <= max
|
||||
ranges << range
|
||||
return ranges
|
||||
end
|
||||
|
||||
ranges << (range.begin .. max)
|
||||
range = (max + 1) .. range.end
|
||||
end
|
||||
end
|
||||
ranges
|
||||
end
|
||||
|
||||
def build_range( start, stop )
|
||||
size = start.size/2
|
||||
left = size - 1
|
||||
return [""] if size < 1
|
||||
|
||||
a = start[0..1]
|
||||
b = stop[0..1]
|
||||
|
||||
###
|
||||
# Shared prefix
|
||||
|
||||
if a == b
|
||||
return build_range(start[2..-1], stop[2..-1]).map do |elt|
|
||||
"0x#{a} " + elt
|
||||
end
|
||||
end
|
||||
|
||||
###
|
||||
# Unshared prefix, end of run
|
||||
|
||||
return ["0x#{a}..0x#{b} "] if left.zero?
|
||||
|
||||
###
|
||||
# Unshared prefix, not end of run
|
||||
# Range can be 0x123456..0x56789A
|
||||
# Which is equivalent to:
|
||||
# 0x123456 .. 0x12FFFF
|
||||
# 0x130000 .. 0x55FFFF
|
||||
# 0x560000 .. 0x56789A
|
||||
|
||||
ret = []
|
||||
ret << build_range(start, a + "FF" * left)
|
||||
|
||||
###
|
||||
# Only generate middle range if need be.
|
||||
|
||||
if a.hex+1 != b.hex
|
||||
max = to_hex(b.hex - 1)
|
||||
max = "FF" if b == "FF"
|
||||
ret << "0x#{to_hex(a.hex+1)}..0x#{max} " + "0x00..0xFF " * left
|
||||
end
|
||||
|
||||
###
|
||||
# Don't generate last range if it is covered by first range
|
||||
|
||||
ret << build_range(b + "00" * left, stop) unless b == "FF"
|
||||
ret.flatten!
|
||||
end
|
||||
|
||||
def to_utf8( range )
|
||||
utf8_ranges( range ).map do |r|
|
||||
begin_enc = to_utf8_enc(r.begin)
|
||||
end_enc = to_utf8_enc(r.end)
|
||||
build_range begin_enc, end_enc
|
||||
end.flatten!
|
||||
end
|
||||
|
||||
##
|
||||
# Perform a 3-way comparison of the number of codepoints advertised by
|
||||
# the unicode spec for the given range, the originally parsed range,
|
||||
# and the resulting utf8 encoded range.
|
||||
|
||||
def count_codepoints( code )
|
||||
code.split(' ').inject(1) do |acc, elt|
|
||||
if elt =~ /0x(.+)\.\.0x(.+)/
|
||||
if @encoding == :utf8
|
||||
acc * (from_utf8_enc($2) - from_utf8_enc($1) + 1)
|
||||
else
|
||||
acc * ($2.hex - $1.hex + 1)
|
||||
end
|
||||
else
|
||||
acc
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def is_valid?( range, desc, codes )
|
||||
spec_count = 1
|
||||
spec_count = $1.to_i if desc =~ /\[(\d+)\]/
|
||||
range_count = range.end - range.begin + 1
|
||||
|
||||
sum = codes.inject(0) { |acc, elt| acc + count_codepoints(elt) }
|
||||
sum == spec_count and sum == range_count
|
||||
end
|
||||
|
||||
##
|
||||
# Generate the state maching to stdout
|
||||
|
||||
def generate_machine( name, property )
|
||||
pipe = " "
|
||||
@output.puts " #{name} = "
|
||||
each_alpha( @chart_url, property ) do |range, desc|
|
||||
|
||||
codes = (@encoding == :ucs4) ? to_ucs4(range) : to_utf8(range)
|
||||
|
||||
#raise "Invalid encoding of range #{range}: #{codes.inspect}" unless
|
||||
# is_valid? range, desc, codes
|
||||
|
||||
range_width = codes.map { |a| a.size }.max
|
||||
range_width = RANGE_WIDTH if range_width < RANGE_WIDTH
|
||||
|
||||
desc_width = TOTAL_WIDTH - RANGE_WIDTH - 11
|
||||
desc_width -= (range_width - RANGE_WIDTH) if range_width > RANGE_WIDTH
|
||||
|
||||
if desc.size > desc_width
|
||||
desc = desc[0..desc_width - 4] + "..."
|
||||
end
|
||||
|
||||
codes.each_with_index do |r, idx|
|
||||
desc = "" unless idx.zero?
|
||||
code = "%-#{range_width}s" % r
|
||||
@output.puts " #{pipe} #{code} ##{desc}"
|
||||
pipe = "|"
|
||||
end
|
||||
end
|
||||
@output.puts " ;"
|
||||
@output.puts ""
|
||||
end
|
||||
|
||||
@output.puts <<EOF
|
||||
# The following Ragel file was autogenerated with #{$0}
|
||||
# from: #{@chart_url}
|
||||
#
|
||||
# It defines #{properties}.
|
||||
#
|
||||
# To use this, make sure that your alphtype is set to #{ALPHTYPES[@encoding]},
|
||||
# and that your input is in #{@encoding}.
|
||||
|
||||
%%{
|
||||
machine #{machine_name};
|
||||
|
||||
EOF
|
||||
|
||||
properties.each { |x| generate_machine( x, x ) }
|
||||
|
||||
@output.puts <<EOF
|
||||
}%%
|
||||
EOF
|
@ -0,0 +1,19 @@
|
||||
package textseg
|
||||
|
||||
import "unicode/utf8"
|
||||
|
||||
// ScanGraphemeClusters is a split function for bufio.Scanner that splits
|
||||
// on UTF8 sequence boundaries.
|
||||
//
|
||||
// This is included largely for completeness, since this behavior is already
|
||||
// built in to Go when ranging over a string.
|
||||
func ScanUTF8Sequences(data []byte, atEOF bool) (int, []byte, error) {
|
||||
if len(data) == 0 {
|
||||
return 0, nil, nil
|
||||
}
|
||||
r, seqLen := utf8.DecodeRune(data)
|
||||
if r == utf8.RuneError && !atEOF {
|
||||
return 0, nil, nil
|
||||
}
|
||||
return seqLen, data[:seqLen], nil
|
||||
}
|
@ -0,0 +1,27 @@
|
||||
Copyright (c) 2017 The Go Authors. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
@ -0,0 +1,616 @@
|
||||
// Copyright 2017, The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// Package cmp determines equality of values.
|
||||
//
|
||||
// This package is intended to be a more powerful and safer alternative to
|
||||
// reflect.DeepEqual for comparing whether two values are semantically equal.
|
||||
//
|
||||
// The primary features of cmp are:
|
||||
//
|
||||
// • When the default behavior of equality does not suit the needs of the test,
|
||||
// custom equality functions can override the equality operation.
|
||||
// For example, an equality function may report floats as equal so long as they
|
||||
// are within some tolerance of each other.
|
||||
//
|
||||
// • Types that have an Equal method may use that method to determine equality.
|
||||
// This allows package authors to determine the equality operation for the types
|
||||
// that they define.
|
||||
//
|
||||
// • If no custom equality functions are used and no Equal method is defined,
|
||||
// equality is determined by recursively comparing the primitive kinds on both
|
||||
// values, much like reflect.DeepEqual. Unlike reflect.DeepEqual, unexported
|
||||
// fields are not compared by default; they result in panics unless suppressed
|
||||
// by using an Ignore option (see cmpopts.IgnoreUnexported) or explicitly compared
|
||||
// using the AllowUnexported option.
|
||||
package cmp
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strings"
|
||||
|
||||
"github.com/google/go-cmp/cmp/internal/diff"
|
||||
"github.com/google/go-cmp/cmp/internal/flags"
|
||||
"github.com/google/go-cmp/cmp/internal/function"
|
||||
"github.com/google/go-cmp/cmp/internal/value"
|
||||
)
|
||||
|
||||
// Equal reports whether x and y are equal by recursively applying the
|
||||
// following rules in the given order to x and y and all of their sub-values:
|
||||
//
|
||||
// • Let S be the set of all Ignore, Transformer, and Comparer options that
|
||||
// remain after applying all path filters, value filters, and type filters.
|
||||
// If at least one Ignore exists in S, then the comparison is ignored.
|
||||
// If the number of Transformer and Comparer options in S is greater than one,
|
||||
// then Equal panics because it is ambiguous which option to use.
|
||||
// If S contains a single Transformer, then use that to transform the current
|
||||
// values and recursively call Equal on the output values.
|
||||
// If S contains a single Comparer, then use that to compare the current values.
|
||||
// Otherwise, evaluation proceeds to the next rule.
|
||||
//
|
||||
// • If the values have an Equal method of the form "(T) Equal(T) bool" or
|
||||
// "(T) Equal(I) bool" where T is assignable to I, then use the result of
|
||||
// x.Equal(y) even if x or y is nil. Otherwise, no such method exists and
|
||||
// evaluation proceeds to the next rule.
|
||||
//
|
||||
// • Lastly, try to compare x and y based on their basic kinds.
|
||||
// Simple kinds like booleans, integers, floats, complex numbers, strings, and
|
||||
// channels are compared using the equivalent of the == operator in Go.
|
||||
// Functions are only equal if they are both nil, otherwise they are unequal.
|
||||
//
|
||||
// Structs are equal if recursively calling Equal on all fields report equal.
|
||||
// If a struct contains unexported fields, Equal panics unless an Ignore option
|
||||
// (e.g., cmpopts.IgnoreUnexported) ignores that field or the AllowUnexported
|
||||
// option explicitly permits comparing the unexported field.
|
||||
//
|
||||
// Slices are equal if they are both nil or both non-nil, where recursively
|
||||
// calling Equal on all non-ignored slice or array elements report equal.
|
||||
// Empty non-nil slices and nil slices are not equal; to equate empty slices,
|
||||
// consider using cmpopts.EquateEmpty.
|
||||
//
|
||||
// Maps are equal if they are both nil or both non-nil, where recursively
|
||||
// calling Equal on all non-ignored map entries report equal.
|
||||
// Map keys are equal according to the == operator.
|
||||
// To use custom comparisons for map keys, consider using cmpopts.SortMaps.
|
||||
// Empty non-nil maps and nil maps are not equal; to equate empty maps,
|
||||
// consider using cmpopts.EquateEmpty.
|
||||
//
|
||||
// Pointers and interfaces are equal if they are both nil or both non-nil,
|
||||
// where they have the same underlying concrete type and recursively
|
||||
// calling Equal on the underlying values reports equal.
|
||||
func Equal(x, y interface{}, opts ...Option) bool {
|
||||
vx := reflect.ValueOf(x)
|
||||
vy := reflect.ValueOf(y)
|
||||
|
||||
// If the inputs are different types, auto-wrap them in an empty interface
|
||||
// so that they have the same parent type.
|
||||
var t reflect.Type
|
||||
if !vx.IsValid() || !vy.IsValid() || vx.Type() != vy.Type() {
|
||||
t = reflect.TypeOf((*interface{})(nil)).Elem()
|
||||
if vx.IsValid() {
|
||||
vvx := reflect.New(t).Elem()
|
||||
vvx.Set(vx)
|
||||
vx = vvx
|
||||
}
|
||||
if vy.IsValid() {
|
||||
vvy := reflect.New(t).Elem()
|
||||
vvy.Set(vy)
|
||||
vy = vvy
|
||||
}
|
||||
} else {
|
||||
t = vx.Type()
|
||||
}
|
||||
|
||||
s := newState(opts)
|
||||
s.compareAny(&pathStep{t, vx, vy})
|
||||
return s.result.Equal()
|
||||
}
|
||||
|
||||
// Diff returns a human-readable report of the differences between two values.
|
||||
// It returns an empty string if and only if Equal returns true for the same
|
||||
// input values and options.
|
||||
//
|
||||
// The output is displayed as a literal in pseudo-Go syntax.
|
||||
// At the start of each line, a "-" prefix indicates an element removed from x,
|
||||
// a "+" prefix to indicates an element added to y, and the lack of a prefix
|
||||
// indicates an element common to both x and y. If possible, the output
|
||||
// uses fmt.Stringer.String or error.Error methods to produce more humanly
|
||||
// readable outputs. In such cases, the string is prefixed with either an
|
||||
// 's' or 'e' character, respectively, to indicate that the method was called.
|
||||
//
|
||||
// Do not depend on this output being stable. If you need the ability to
|
||||
// programmatically interpret the difference, consider using a custom Reporter.
|
||||
func Diff(x, y interface{}, opts ...Option) string {
|
||||
r := new(defaultReporter)
|
||||
eq := Equal(x, y, Options(opts), Reporter(r))
|
||||
d := r.String()
|
||||
if (d == "") != eq {
|
||||
panic("inconsistent difference and equality results")
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
type state struct {
|
||||
// These fields represent the "comparison state".
|
||||
// Calling statelessCompare must not result in observable changes to these.
|
||||
result diff.Result // The current result of comparison
|
||||
curPath Path // The current path in the value tree
|
||||
reporters []reporter // Optional reporters
|
||||
|
||||
// recChecker checks for infinite cycles applying the same set of
|
||||
// transformers upon the output of itself.
|
||||
recChecker recChecker
|
||||
|
||||
// dynChecker triggers pseudo-random checks for option correctness.
|
||||
// It is safe for statelessCompare to mutate this value.
|
||||
dynChecker dynChecker
|
||||
|
||||
// These fields, once set by processOption, will not change.
|
||||
exporters map[reflect.Type]bool // Set of structs with unexported field visibility
|
||||
opts Options // List of all fundamental and filter options
|
||||
}
|
||||
|
||||
func newState(opts []Option) *state {
|
||||
// Always ensure a validator option exists to validate the inputs.
|
||||
s := &state{opts: Options{validator{}}}
|
||||
s.processOption(Options(opts))
|
||||
return s
|
||||
}
|
||||
|
||||
func (s *state) processOption(opt Option) {
|
||||
switch opt := opt.(type) {
|
||||
case nil:
|
||||
case Options:
|
||||
for _, o := range opt {
|
||||
s.processOption(o)
|
||||
}
|
||||
case coreOption:
|
||||
type filtered interface {
|
||||
isFiltered() bool
|
||||
}
|
||||
if fopt, ok := opt.(filtered); ok && !fopt.isFiltered() {
|
||||
panic(fmt.Sprintf("cannot use an unfiltered option: %v", opt))
|
||||
}
|
||||
s.opts = append(s.opts, opt)
|
||||
case visibleStructs:
|
||||
if s.exporters == nil {
|
||||
s.exporters = make(map[reflect.Type]bool)
|
||||
}
|
||||
for t := range opt {
|
||||
s.exporters[t] = true
|
||||
}
|
||||
case reporter:
|
||||
s.reporters = append(s.reporters, opt)
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown option %T", opt))
|
||||
}
|
||||
}
|
||||
|
||||
// statelessCompare compares two values and returns the result.
|
||||
// This function is stateless in that it does not alter the current result,
|
||||
// or output to any registered reporters.
|
||||
func (s *state) statelessCompare(step PathStep) diff.Result {
|
||||
// We do not save and restore the curPath because all of the compareX
|
||||
// methods should properly push and pop from the path.
|
||||
// It is an implementation bug if the contents of curPath differs from
|
||||
// when calling this function to when returning from it.
|
||||
|
||||
oldResult, oldReporters := s.result, s.reporters
|
||||
s.result = diff.Result{} // Reset result
|
||||
s.reporters = nil // Remove reporters to avoid spurious printouts
|
||||
s.compareAny(step)
|
||||
res := s.result
|
||||
s.result, s.reporters = oldResult, oldReporters
|
||||
return res
|
||||
}
|
||||
|
||||
func (s *state) compareAny(step PathStep) {
|
||||
// Update the path stack.
|
||||
s.curPath.push(step)
|
||||
defer s.curPath.pop()
|
||||
for _, r := range s.reporters {
|
||||
r.PushStep(step)
|
||||
defer r.PopStep()
|
||||
}
|
||||
s.recChecker.Check(s.curPath)
|
||||
|
||||
// Obtain the current type and values.
|
||||
t := step.Type()
|
||||
vx, vy := step.Values()
|
||||
|
||||
// Rule 1: Check whether an option applies on this node in the value tree.
|
||||
if s.tryOptions(t, vx, vy) {
|
||||
return
|
||||
}
|
||||
|
||||
// Rule 2: Check whether the type has a valid Equal method.
|
||||
if s.tryMethod(t, vx, vy) {
|
||||
return
|
||||
}
|
||||
|
||||
// Rule 3: Compare based on the underlying kind.
|
||||
switch t.Kind() {
|
||||
case reflect.Bool:
|
||||
s.report(vx.Bool() == vy.Bool(), 0)
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
||||
s.report(vx.Int() == vy.Int(), 0)
|
||||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
|
||||
s.report(vx.Uint() == vy.Uint(), 0)
|
||||
case reflect.Float32, reflect.Float64:
|
||||
s.report(vx.Float() == vy.Float(), 0)
|
||||
case reflect.Complex64, reflect.Complex128:
|
||||
s.report(vx.Complex() == vy.Complex(), 0)
|
||||
case reflect.String:
|
||||
s.report(vx.String() == vy.String(), 0)
|
||||
case reflect.Chan, reflect.UnsafePointer:
|
||||
s.report(vx.Pointer() == vy.Pointer(), 0)
|
||||
case reflect.Func:
|
||||
s.report(vx.IsNil() && vy.IsNil(), 0)
|
||||
case reflect.Struct:
|
||||
s.compareStruct(t, vx, vy)
|
||||
case reflect.Slice, reflect.Array:
|
||||
s.compareSlice(t, vx, vy)
|
||||
case reflect.Map:
|
||||
s.compareMap(t, vx, vy)
|
||||
case reflect.Ptr:
|
||||
s.comparePtr(t, vx, vy)
|
||||
case reflect.Interface:
|
||||
s.compareInterface(t, vx, vy)
|
||||
default:
|
||||
panic(fmt.Sprintf("%v kind not handled", t.Kind()))
|
||||
}
|
||||
}
|
||||
|
||||
func (s *state) tryOptions(t reflect.Type, vx, vy reflect.Value) bool {
|
||||
// Evaluate all filters and apply the remaining options.
|
||||
if opt := s.opts.filter(s, t, vx, vy); opt != nil {
|
||||
opt.apply(s, vx, vy)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (s *state) tryMethod(t reflect.Type, vx, vy reflect.Value) bool {
|
||||
// Check if this type even has an Equal method.
|
||||
m, ok := t.MethodByName("Equal")
|
||||
if !ok || !function.IsType(m.Type, function.EqualAssignable) {
|
||||
return false
|
||||
}
|
||||
|
||||
eq := s.callTTBFunc(m.Func, vx, vy)
|
||||
s.report(eq, reportByMethod)
|
||||
return true
|
||||
}
|
||||
|
||||
func (s *state) callTRFunc(f, v reflect.Value, step Transform) reflect.Value {
|
||||
v = sanitizeValue(v, f.Type().In(0))
|
||||
if !s.dynChecker.Next() {
|
||||
return f.Call([]reflect.Value{v})[0]
|
||||
}
|
||||
|
||||
// Run the function twice and ensure that we get the same results back.
|
||||
// We run in goroutines so that the race detector (if enabled) can detect
|
||||
// unsafe mutations to the input.
|
||||
c := make(chan reflect.Value)
|
||||
go detectRaces(c, f, v)
|
||||
got := <-c
|
||||
want := f.Call([]reflect.Value{v})[0]
|
||||
if step.vx, step.vy = got, want; !s.statelessCompare(step).Equal() {
|
||||
// To avoid false-positives with non-reflexive equality operations,
|
||||
// we sanity check whether a value is equal to itself.
|
||||
if step.vx, step.vy = want, want; !s.statelessCompare(step).Equal() {
|
||||
return want
|
||||
}
|
||||
panic(fmt.Sprintf("non-deterministic function detected: %s", function.NameOf(f)))
|
||||
}
|
||||
return want
|
||||
}
|
||||
|
||||
func (s *state) callTTBFunc(f, x, y reflect.Value) bool {
|
||||
x = sanitizeValue(x, f.Type().In(0))
|
||||
y = sanitizeValue(y, f.Type().In(1))
|
||||
if !s.dynChecker.Next() {
|
||||
return f.Call([]reflect.Value{x, y})[0].Bool()
|
||||
}
|
||||
|
||||
// Swapping the input arguments is sufficient to check that
|
||||
// f is symmetric and deterministic.
|
||||
// We run in goroutines so that the race detector (if enabled) can detect
|
||||
// unsafe mutations to the input.
|
||||
c := make(chan reflect.Value)
|
||||
go detectRaces(c, f, y, x)
|
||||
got := <-c
|
||||
want := f.Call([]reflect.Value{x, y})[0].Bool()
|
||||
if !got.IsValid() || got.Bool() != want {
|
||||
panic(fmt.Sprintf("non-deterministic or non-symmetric function detected: %s", function.NameOf(f)))
|
||||
}
|
||||
return want
|
||||
}
|
||||
|
||||
func detectRaces(c chan<- reflect.Value, f reflect.Value, vs ...reflect.Value) {
|
||||
var ret reflect.Value
|
||||
defer func() {
|
||||
recover() // Ignore panics, let the other call to f panic instead
|
||||
c <- ret
|
||||
}()
|
||||
ret = f.Call(vs)[0]
|
||||
}
|
||||
|
||||
// sanitizeValue converts nil interfaces of type T to those of type R,
|
||||
// assuming that T is assignable to R.
|
||||
// Otherwise, it returns the input value as is.
|
||||
func sanitizeValue(v reflect.Value, t reflect.Type) reflect.Value {
|
||||
// TODO(dsnet): Workaround for reflect bug (https://golang.org/issue/22143).
|
||||
if !flags.AtLeastGo110 {
|
||||
if v.Kind() == reflect.Interface && v.IsNil() && v.Type() != t {
|
||||
return reflect.New(t).Elem()
|
||||
}
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
func (s *state) compareStruct(t reflect.Type, vx, vy reflect.Value) {
|
||||
var vax, vay reflect.Value // Addressable versions of vx and vy
|
||||
|
||||
step := StructField{&structField{}}
|
||||
for i := 0; i < t.NumField(); i++ {
|
||||
step.typ = t.Field(i).Type
|
||||
step.vx = vx.Field(i)
|
||||
step.vy = vy.Field(i)
|
||||
step.name = t.Field(i).Name
|
||||
step.idx = i
|
||||
step.unexported = !isExported(step.name)
|
||||
if step.unexported {
|
||||
if step.name == "_" {
|
||||
continue
|
||||
}
|
||||
// Defer checking of unexported fields until later to give an
|
||||
// Ignore a chance to ignore the field.
|
||||
if !vax.IsValid() || !vay.IsValid() {
|
||||
// For retrieveUnexportedField to work, the parent struct must
|
||||
// be addressable. Create a new copy of the values if
|
||||
// necessary to make them addressable.
|
||||
vax = makeAddressable(vx)
|
||||
vay = makeAddressable(vy)
|
||||
}
|
||||
step.mayForce = s.exporters[t]
|
||||
step.pvx = vax
|
||||
step.pvy = vay
|
||||
step.field = t.Field(i)
|
||||
}
|
||||
s.compareAny(step)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *state) compareSlice(t reflect.Type, vx, vy reflect.Value) {
|
||||
isSlice := t.Kind() == reflect.Slice
|
||||
if isSlice && (vx.IsNil() || vy.IsNil()) {
|
||||
s.report(vx.IsNil() && vy.IsNil(), 0)
|
||||
return
|
||||
}
|
||||
|
||||
// TODO: Support cyclic data structures.
|
||||
|
||||
step := SliceIndex{&sliceIndex{pathStep: pathStep{typ: t.Elem()}}}
|
||||
withIndexes := func(ix, iy int) SliceIndex {
|
||||
if ix >= 0 {
|
||||
step.vx, step.xkey = vx.Index(ix), ix
|
||||
} else {
|
||||
step.vx, step.xkey = reflect.Value{}, -1
|
||||
}
|
||||
if iy >= 0 {
|
||||
step.vy, step.ykey = vy.Index(iy), iy
|
||||
} else {
|
||||
step.vy, step.ykey = reflect.Value{}, -1
|
||||
}
|
||||
return step
|
||||
}
|
||||
|
||||
// Ignore options are able to ignore missing elements in a slice.
|
||||
// However, detecting these reliably requires an optimal differencing
|
||||
// algorithm, for which diff.Difference is not.
|
||||
//
|
||||
// Instead, we first iterate through both slices to detect which elements
|
||||
// would be ignored if standing alone. The index of non-discarded elements
|
||||
// are stored in a separate slice, which diffing is then performed on.
|
||||
var indexesX, indexesY []int
|
||||
var ignoredX, ignoredY []bool
|
||||
for ix := 0; ix < vx.Len(); ix++ {
|
||||
ignored := s.statelessCompare(withIndexes(ix, -1)).NumDiff == 0
|
||||
if !ignored {
|
||||
indexesX = append(indexesX, ix)
|
||||
}
|
||||
ignoredX = append(ignoredX, ignored)
|
||||
}
|
||||
for iy := 0; iy < vy.Len(); iy++ {
|
||||
ignored := s.statelessCompare(withIndexes(-1, iy)).NumDiff == 0
|
||||
if !ignored {
|
||||
indexesY = append(indexesY, iy)
|
||||
}
|
||||
ignoredY = append(ignoredY, ignored)
|
||||
}
|
||||
|
||||
// Compute an edit-script for slices vx and vy (excluding ignored elements).
|
||||
edits := diff.Difference(len(indexesX), len(indexesY), func(ix, iy int) diff.Result {
|
||||
return s.statelessCompare(withIndexes(indexesX[ix], indexesY[iy]))
|
||||
})
|
||||
|
||||
// Replay the ignore-scripts and the edit-script.
|
||||
var ix, iy int
|
||||
for ix < vx.Len() || iy < vy.Len() {
|
||||
var e diff.EditType
|
||||
switch {
|
||||
case ix < len(ignoredX) && ignoredX[ix]:
|
||||
e = diff.UniqueX
|
||||
case iy < len(ignoredY) && ignoredY[iy]:
|
||||
e = diff.UniqueY
|
||||
default:
|
||||
e, edits = edits[0], edits[1:]
|
||||
}
|
||||
switch e {
|
||||
case diff.UniqueX:
|
||||
s.compareAny(withIndexes(ix, -1))
|
||||
ix++
|
||||
case diff.UniqueY:
|
||||
s.compareAny(withIndexes(-1, iy))
|
||||
iy++
|
||||
default:
|
||||
s.compareAny(withIndexes(ix, iy))
|
||||
ix++
|
||||
iy++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (s *state) compareMap(t reflect.Type, vx, vy reflect.Value) {
|
||||
if vx.IsNil() || vy.IsNil() {
|
||||
s.report(vx.IsNil() && vy.IsNil(), 0)
|
||||
return
|
||||
}
|
||||
|
||||
// TODO: Support cyclic data structures.
|
||||
|
||||
// We combine and sort the two map keys so that we can perform the
|
||||
// comparisons in a deterministic order.
|
||||
step := MapIndex{&mapIndex{pathStep: pathStep{typ: t.Elem()}}}
|
||||
for _, k := range value.SortKeys(append(vx.MapKeys(), vy.MapKeys()...)) {
|
||||
step.vx = vx.MapIndex(k)
|
||||
step.vy = vy.MapIndex(k)
|
||||
step.key = k
|
||||
if !step.vx.IsValid() && !step.vy.IsValid() {
|
||||
// It is possible for both vx and vy to be invalid if the
|
||||
// key contained a NaN value in it.
|
||||
//
|
||||
// Even with the ability to retrieve NaN keys in Go 1.12,
|
||||
// there still isn't a sensible way to compare the values since
|
||||
// a NaN key may map to multiple unordered values.
|
||||
// The most reasonable way to compare NaNs would be to compare the
|
||||
// set of values. However, this is impossible to do efficiently
|
||||
// since set equality is provably an O(n^2) operation given only
|
||||
// an Equal function. If we had a Less function or Hash function,
|
||||
// this could be done in O(n*log(n)) or O(n), respectively.
|
||||
//
|
||||
// Rather than adding complex logic to deal with NaNs, make it
|
||||
// the user's responsibility to compare such obscure maps.
|
||||
const help = "consider providing a Comparer to compare the map"
|
||||
panic(fmt.Sprintf("%#v has map key with NaNs\n%s", s.curPath, help))
|
||||
}
|
||||
s.compareAny(step)
|
||||
}
|
||||
}
|
||||
|
||||
func (s *state) comparePtr(t reflect.Type, vx, vy reflect.Value) {
|
||||
if vx.IsNil() || vy.IsNil() {
|
||||
s.report(vx.IsNil() && vy.IsNil(), 0)
|
||||
return
|
||||
}
|
||||
|
||||
// TODO: Support cyclic data structures.
|
||||
|
||||
vx, vy = vx.Elem(), vy.Elem()
|
||||
s.compareAny(Indirect{&indirect{pathStep{t.Elem(), vx, vy}}})
|
||||
}
|
||||
|
||||
func (s *state) compareInterface(t reflect.Type, vx, vy reflect.Value) {
|
||||
if vx.IsNil() || vy.IsNil() {
|
||||
s.report(vx.IsNil() && vy.IsNil(), 0)
|
||||
return
|
||||
}
|
||||
vx, vy = vx.Elem(), vy.Elem()
|
||||
if vx.Type() != vy.Type() {
|
||||
s.report(false, 0)
|
||||
return
|
||||
}
|
||||
s.compareAny(TypeAssertion{&typeAssertion{pathStep{vx.Type(), vx, vy}}})
|
||||
}
|
||||
|
||||
func (s *state) report(eq bool, rf resultFlags) {
|
||||
if rf&reportByIgnore == 0 {
|
||||
if eq {
|
||||
s.result.NumSame++
|
||||
rf |= reportEqual
|
||||
} else {
|
||||
s.result.NumDiff++
|
||||
rf |= reportUnequal
|
||||
}
|
||||
}
|
||||
for _, r := range s.reporters {
|
||||
r.Report(Result{flags: rf})
|
||||
}
|
||||
}
|
||||
|
||||
// recChecker tracks the state needed to periodically perform checks that
|
||||
// user provided transformers are not stuck in an infinitely recursive cycle.
|
||||
type recChecker struct{ next int }
|
||||
|
||||
// Check scans the Path for any recursive transformers and panics when any
|
||||
// recursive transformers are detected. Note that the presence of a
|
||||
// recursive Transformer does not necessarily imply an infinite cycle.
|
||||
// As such, this check only activates after some minimal number of path steps.
|
||||
func (rc *recChecker) Check(p Path) {
|
||||
const minLen = 1 << 16
|
||||
if rc.next == 0 {
|
||||
rc.next = minLen
|
||||
}
|
||||
if len(p) < rc.next {
|
||||
return
|
||||
}
|
||||
rc.next <<= 1
|
||||
|
||||
// Check whether the same transformer has appeared at least twice.
|
||||
var ss []string
|
||||
m := map[Option]int{}
|
||||
for _, ps := range p {
|
||||
if t, ok := ps.(Transform); ok {
|
||||
t := t.Option()
|
||||
if m[t] == 1 { // Transformer was used exactly once before
|
||||
tf := t.(*transformer).fnc.Type()
|
||||
ss = append(ss, fmt.Sprintf("%v: %v => %v", t, tf.In(0), tf.Out(0)))
|
||||
}
|
||||
m[t]++
|
||||
}
|
||||
}
|
||||
if len(ss) > 0 {
|
||||
const warning = "recursive set of Transformers detected"
|
||||
const help = "consider using cmpopts.AcyclicTransformer"
|
||||
set := strings.Join(ss, "\n\t")
|
||||
panic(fmt.Sprintf("%s:\n\t%s\n%s", warning, set, help))
|
||||
}
|
||||
}
|
||||
|
||||
// dynChecker tracks the state needed to periodically perform checks that
|
||||
// user provided functions are symmetric and deterministic.
|
||||
// The zero value is safe for immediate use.
|
||||
type dynChecker struct{ curr, next int }
|
||||
|
||||
// Next increments the state and reports whether a check should be performed.
|
||||
//
|
||||
// Checks occur every Nth function call, where N is a triangular number:
|
||||
// 0 1 3 6 10 15 21 28 36 45 55 66 78 91 105 120 136 153 171 190 ...
|
||||
// See https://en.wikipedia.org/wiki/Triangular_number
|
||||
//
|
||||
// This sequence ensures that the cost of checks drops significantly as
|
||||
// the number of functions calls grows larger.
|
||||
func (dc *dynChecker) Next() bool {
|
||||
ok := dc.curr == dc.next
|
||||
if ok {
|
||||
dc.curr = 0
|
||||
dc.next++
|
||||
}
|
||||
dc.curr++
|
||||
return ok
|
||||
}
|
||||
|
||||
// makeAddressable returns a value that is always addressable.
|
||||
// It returns the input verbatim if it is already addressable,
|
||||
// otherwise it creates a new value and returns an addressable copy.
|
||||
func makeAddressable(v reflect.Value) reflect.Value {
|
||||
if v.CanAddr() {
|
||||
return v
|
||||
}
|
||||
vc := reflect.New(v.Type()).Elem()
|
||||
vc.Set(v)
|
||||
return vc
|
||||
}
|
@ -0,0 +1,15 @@
|
||||
// Copyright 2017, The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// +build purego
|
||||
|
||||
package cmp
|
||||
|
||||
import "reflect"
|
||||
|
||||
const supportAllowUnexported = false
|
||||
|
||||
func retrieveUnexportedField(reflect.Value, reflect.StructField) reflect.Value {
|
||||
panic("retrieveUnexportedField is not implemented")
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
// Copyright 2017, The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// +build !purego
|
||||
|
||||
package cmp
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const supportAllowUnexported = true
|
||||
|
||||
// retrieveUnexportedField uses unsafe to forcibly retrieve any field from
|
||||
// a struct such that the value has read-write permissions.
|
||||
//
|
||||
// The parent struct, v, must be addressable, while f must be a StructField
|
||||
// describing the field to retrieve.
|
||||
func retrieveUnexportedField(v reflect.Value, f reflect.StructField) reflect.Value {
|
||||
return reflect.NewAt(f.Type, unsafe.Pointer(v.UnsafeAddr()+f.Offset)).Elem()
|
||||
}
|
@ -0,0 +1,17 @@
|
||||
// Copyright 2017, The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// +build !cmp_debug
|
||||
|
||||
package diff
|
||||
|
||||
var debug debugger
|
||||
|
||||
type debugger struct{}
|
||||
|
||||
func (debugger) Begin(_, _ int, f EqualFunc, _, _ *EditScript) EqualFunc {
|
||||
return f
|
||||
}
|
||||
func (debugger) Update() {}
|
||||
func (debugger) Finish() {}
|
@ -0,0 +1,122 @@
|
||||
// Copyright 2017, The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// +build cmp_debug
|
||||
|
||||
package diff
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// The algorithm can be seen running in real-time by enabling debugging:
|
||||
// go test -tags=cmp_debug -v
|
||||
//
|
||||
// Example output:
|
||||
// === RUN TestDifference/#34
|
||||
// ┌───────────────────────────────┐
|
||||
// │ \ · · · · · · · · · · · · · · │
|
||||
// │ · # · · · · · · · · · · · · · │
|
||||
// │ · \ · · · · · · · · · · · · · │
|
||||
// │ · · \ · · · · · · · · · · · · │
|
||||
// │ · · · X # · · · · · · · · · · │
|
||||
// │ · · · # \ · · · · · · · · · · │
|
||||
// │ · · · · · # # · · · · · · · · │
|
||||
// │ · · · · · # \ · · · · · · · · │
|
||||
// │ · · · · · · · \ · · · · · · · │
|
||||
// │ · · · · · · · · \ · · · · · · │
|
||||
// │ · · · · · · · · · \ · · · · · │
|
||||
// │ · · · · · · · · · · \ · · # · │
|
||||
// │ · · · · · · · · · · · \ # # · │
|
||||
// │ · · · · · · · · · · · # # # · │
|
||||
// │ · · · · · · · · · · # # # # · │
|
||||
// │ · · · · · · · · · # # # # # · │
|
||||
// │ · · · · · · · · · · · · · · \ │
|
||||
// └───────────────────────────────┘
|
||||
// [.Y..M.XY......YXYXY.|]
|
||||
//
|
||||
// The grid represents the edit-graph where the horizontal axis represents
|
||||
// list X and the vertical axis represents list Y. The start of the two lists
|
||||
// is the top-left, while the ends are the bottom-right. The '·' represents
|
||||
// an unexplored node in the graph. The '\' indicates that the two symbols
|
||||
// from list X and Y are equal. The 'X' indicates that two symbols are similar
|
||||
// (but not exactly equal) to each other. The '#' indicates that the two symbols
|
||||
// are different (and not similar). The algorithm traverses this graph trying to
|
||||
// make the paths starting in the top-left and the bottom-right connect.
|
||||
//
|
||||
// The series of '.', 'X', 'Y', and 'M' characters at the bottom represents
|
||||
// the currently established path from the forward and reverse searches,
|
||||
// separated by a '|' character.
|
||||
|
||||
const (
|
||||
updateDelay = 100 * time.Millisecond
|
||||
finishDelay = 500 * time.Millisecond
|
||||
ansiTerminal = true // ANSI escape codes used to move terminal cursor
|
||||
)
|
||||
|
||||
var debug debugger
|
||||
|
||||
type debugger struct {
|
||||
sync.Mutex
|
||||
p1, p2 EditScript
|
||||
fwdPath, revPath *EditScript
|
||||
grid []byte
|
||||
lines int
|
||||
}
|
||||
|
||||
func (dbg *debugger) Begin(nx, ny int, f EqualFunc, p1, p2 *EditScript) EqualFunc {
|
||||
dbg.Lock()
|
||||
dbg.fwdPath, dbg.revPath = p1, p2
|
||||
top := "┌─" + strings.Repeat("──", nx) + "┐\n"
|
||||
row := "│ " + strings.Repeat("· ", nx) + "│\n"
|
||||
btm := "└─" + strings.Repeat("──", nx) + "┘\n"
|
||||
dbg.grid = []byte(top + strings.Repeat(row, ny) + btm)
|
||||
dbg.lines = strings.Count(dbg.String(), "\n")
|
||||
fmt.Print(dbg)
|
||||
|
||||
// Wrap the EqualFunc so that we can intercept each result.
|
||||
return func(ix, iy int) (r Result) {
|
||||
cell := dbg.grid[len(top)+iy*len(row):][len("│ ")+len("· ")*ix:][:len("·")]
|
||||
for i := range cell {
|
||||
cell[i] = 0 // Zero out the multiple bytes of UTF-8 middle-dot
|
||||
}
|
||||
switch r = f(ix, iy); {
|
||||
case r.Equal():
|
||||
cell[0] = '\\'
|
||||
case r.Similar():
|
||||
cell[0] = 'X'
|
||||
default:
|
||||
cell[0] = '#'
|
||||
}
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func (dbg *debugger) Update() {
|
||||
dbg.print(updateDelay)
|
||||
}
|
||||
|
||||
func (dbg *debugger) Finish() {
|
||||
dbg.print(finishDelay)
|
||||
dbg.Unlock()
|
||||
}
|
||||
|
||||
func (dbg *debugger) String() string {
|
||||
dbg.p1, dbg.p2 = *dbg.fwdPath, dbg.p2[:0]
|
||||
for i := len(*dbg.revPath) - 1; i >= 0; i-- {
|
||||
dbg.p2 = append(dbg.p2, (*dbg.revPath)[i])
|
||||
}
|
||||
return fmt.Sprintf("%s[%v|%v]\n\n", dbg.grid, dbg.p1, dbg.p2)
|
||||
}
|
||||
|
||||
func (dbg *debugger) print(d time.Duration) {
|
||||
if ansiTerminal {
|
||||
fmt.Printf("\x1b[%dA", dbg.lines) // Reset terminal cursor
|
||||
}
|
||||
fmt.Print(dbg)
|
||||
time.Sleep(d)
|
||||
}
|
@ -0,0 +1,372 @@
|
||||
// Copyright 2017, The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// Package diff implements an algorithm for producing edit-scripts.
|
||||
// The edit-script is a sequence of operations needed to transform one list
|
||||
// of symbols into another (or vice-versa). The edits allowed are insertions,
|
||||
// deletions, and modifications. The summation of all edits is called the
|
||||
// Levenshtein distance as this problem is well-known in computer science.
|
||||
//
|
||||
// This package prioritizes performance over accuracy. That is, the run time
|
||||
// is more important than obtaining a minimal Levenshtein distance.
|
||||
package diff
|
||||
|
||||
// EditType represents a single operation within an edit-script.
|
||||
type EditType uint8
|
||||
|
||||
const (
|
||||
// Identity indicates that a symbol pair is identical in both list X and Y.
|
||||
Identity EditType = iota
|
||||
// UniqueX indicates that a symbol only exists in X and not Y.
|
||||
UniqueX
|
||||
// UniqueY indicates that a symbol only exists in Y and not X.
|
||||
UniqueY
|
||||
// Modified indicates that a symbol pair is a modification of each other.
|
||||
Modified
|
||||
)
|
||||
|
||||
// EditScript represents the series of differences between two lists.
|
||||
type EditScript []EditType
|
||||
|
||||
// String returns a human-readable string representing the edit-script where
|
||||
// Identity, UniqueX, UniqueY, and Modified are represented by the
|
||||
// '.', 'X', 'Y', and 'M' characters, respectively.
|
||||
func (es EditScript) String() string {
|
||||
b := make([]byte, len(es))
|
||||
for i, e := range es {
|
||||
switch e {
|
||||
case Identity:
|
||||
b[i] = '.'
|
||||
case UniqueX:
|
||||
b[i] = 'X'
|
||||
case UniqueY:
|
||||
b[i] = 'Y'
|
||||
case Modified:
|
||||
b[i] = 'M'
|
||||
default:
|
||||
panic("invalid edit-type")
|
||||
}
|
||||
}
|
||||
return string(b)
|
||||
}
|
||||
|
||||
// stats returns a histogram of the number of each type of edit operation.
|
||||
func (es EditScript) stats() (s struct{ NI, NX, NY, NM int }) {
|
||||
for _, e := range es {
|
||||
switch e {
|
||||
case Identity:
|
||||
s.NI++
|
||||
case UniqueX:
|
||||
s.NX++
|
||||
case UniqueY:
|
||||
s.NY++
|
||||
case Modified:
|
||||
s.NM++
|
||||
default:
|
||||
panic("invalid edit-type")
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Dist is the Levenshtein distance and is guaranteed to be 0 if and only if
|
||||
// lists X and Y are equal.
|
||||
func (es EditScript) Dist() int { return len(es) - es.stats().NI }
|
||||
|
||||
// LenX is the length of the X list.
|
||||
func (es EditScript) LenX() int { return len(es) - es.stats().NY }
|
||||
|
||||
// LenY is the length of the Y list.
|
||||
func (es EditScript) LenY() int { return len(es) - es.stats().NX }
|
||||
|
||||
// EqualFunc reports whether the symbols at indexes ix and iy are equal.
|
||||
// When called by Difference, the index is guaranteed to be within nx and ny.
|
||||
type EqualFunc func(ix int, iy int) Result
|
||||
|
||||
// Result is the result of comparison.
|
||||
// NumSame is the number of sub-elements that are equal.
|
||||
// NumDiff is the number of sub-elements that are not equal.
|
||||
type Result struct{ NumSame, NumDiff int }
|
||||
|
||||
// BoolResult returns a Result that is either Equal or not Equal.
|
||||
func BoolResult(b bool) Result {
|
||||
if b {
|
||||
return Result{NumSame: 1} // Equal, Similar
|
||||
} else {
|
||||
return Result{NumDiff: 2} // Not Equal, not Similar
|
||||
}
|
||||
}
|
||||
|
||||
// Equal indicates whether the symbols are equal. Two symbols are equal
|
||||
// if and only if NumDiff == 0. If Equal, then they are also Similar.
|
||||
func (r Result) Equal() bool { return r.NumDiff == 0 }
|
||||
|
||||
// Similar indicates whether two symbols are similar and may be represented
|
||||
// by using the Modified type. As a special case, we consider binary comparisons
|
||||
// (i.e., those that return Result{1, 0} or Result{0, 1}) to be similar.
|
||||
//
|
||||
// The exact ratio of NumSame to NumDiff to determine similarity may change.
|
||||
func (r Result) Similar() bool {
|
||||
// Use NumSame+1 to offset NumSame so that binary comparisons are similar.
|
||||
return r.NumSame+1 >= r.NumDiff
|
||||
}
|
||||
|
||||
// Difference reports whether two lists of lengths nx and ny are equal
|
||||
// given the definition of equality provided as f.
|
||||
//
|
||||
// This function returns an edit-script, which is a sequence of operations
|
||||
// needed to convert one list into the other. The following invariants for
|
||||
// the edit-script are maintained:
|
||||
// • eq == (es.Dist()==0)
|
||||
// • nx == es.LenX()
|
||||
// • ny == es.LenY()
|
||||
//
|
||||
// This algorithm is not guaranteed to be an optimal solution (i.e., one that
|
||||
// produces an edit-script with a minimal Levenshtein distance). This algorithm
|
||||
// favors performance over optimality. The exact output is not guaranteed to
|
||||
// be stable and may change over time.
|
||||
func Difference(nx, ny int, f EqualFunc) (es EditScript) {
|
||||
// This algorithm is based on traversing what is known as an "edit-graph".
|
||||
// See Figure 1 from "An O(ND) Difference Algorithm and Its Variations"
|
||||
// by Eugene W. Myers. Since D can be as large as N itself, this is
|
||||
// effectively O(N^2). Unlike the algorithm from that paper, we are not
|
||||
// interested in the optimal path, but at least some "decent" path.
|
||||
//
|
||||
// For example, let X and Y be lists of symbols:
|
||||
// X = [A B C A B B A]
|
||||
// Y = [C B A B A C]
|
||||
//
|
||||
// The edit-graph can be drawn as the following:
|
||||
// A B C A B B A
|
||||
// ┌─────────────┐
|
||||
// C │_|_|\|_|_|_|_│ 0
|
||||
// B │_|\|_|_|\|\|_│ 1
|
||||
// A │\|_|_|\|_|_|\│ 2
|
||||
// B │_|\|_|_|\|\|_│ 3
|
||||
// A │\|_|_|\|_|_|\│ 4
|
||||
// C │ | |\| | | | │ 5
|
||||
// └─────────────┘ 6
|
||||
// 0 1 2 3 4 5 6 7
|
||||
//
|
||||
// List X is written along the horizontal axis, while list Y is written
|
||||
// along the vertical axis. At any point on this grid, if the symbol in
|
||||
// list X matches the corresponding symbol in list Y, then a '\' is drawn.
|
||||
// The goal of any minimal edit-script algorithm is to find a path from the
|
||||
// top-left corner to the bottom-right corner, while traveling through the
|
||||
// fewest horizontal or vertical edges.
|
||||
// A horizontal edge is equivalent to inserting a symbol from list X.
|
||||
// A vertical edge is equivalent to inserting a symbol from list Y.
|
||||
// A diagonal edge is equivalent to a matching symbol between both X and Y.
|
||||
|
||||
// Invariants:
|
||||
// • 0 ≤ fwdPath.X ≤ (fwdFrontier.X, revFrontier.X) ≤ revPath.X ≤ nx
|
||||
// • 0 ≤ fwdPath.Y ≤ (fwdFrontier.Y, revFrontier.Y) ≤ revPath.Y ≤ ny
|
||||
//
|
||||
// In general:
|
||||
// • fwdFrontier.X < revFrontier.X
|
||||
// • fwdFrontier.Y < revFrontier.Y
|
||||
// Unless, it is time for the algorithm to terminate.
|
||||
fwdPath := path{+1, point{0, 0}, make(EditScript, 0, (nx+ny)/2)}
|
||||
revPath := path{-1, point{nx, ny}, make(EditScript, 0)}
|
||||
fwdFrontier := fwdPath.point // Forward search frontier
|
||||
revFrontier := revPath.point // Reverse search frontier
|
||||
|
||||
// Search budget bounds the cost of searching for better paths.
|
||||
// The longest sequence of non-matching symbols that can be tolerated is
|
||||
// approximately the square-root of the search budget.
|
||||
searchBudget := 4 * (nx + ny) // O(n)
|
||||
|
||||
// The algorithm below is a greedy, meet-in-the-middle algorithm for
|
||||
// computing sub-optimal edit-scripts between two lists.
|
||||
//
|
||||
// The algorithm is approximately as follows:
|
||||
// • Searching for differences switches back-and-forth between
|
||||
// a search that starts at the beginning (the top-left corner), and
|
||||
// a search that starts at the end (the bottom-right corner). The goal of
|
||||
// the search is connect with the search from the opposite corner.
|
||||
// • As we search, we build a path in a greedy manner, where the first
|
||||
// match seen is added to the path (this is sub-optimal, but provides a
|
||||
// decent result in practice). When matches are found, we try the next pair
|
||||
// of symbols in the lists and follow all matches as far as possible.
|
||||
// • When searching for matches, we search along a diagonal going through
|
||||
// through the "frontier" point. If no matches are found, we advance the
|
||||
// frontier towards the opposite corner.
|
||||
// • This algorithm terminates when either the X coordinates or the
|
||||
// Y coordinates of the forward and reverse frontier points ever intersect.
|
||||
//
|
||||
// This algorithm is correct even if searching only in the forward direction
|
||||
// or in the reverse direction. We do both because it is commonly observed
|
||||
// that two lists commonly differ because elements were added to the front
|
||||
// or end of the other list.
|
||||
//
|
||||
// Running the tests with the "cmp_debug" build tag prints a visualization
|
||||
// of the algorithm running in real-time. This is educational for
|
||||
// understanding how the algorithm works. See debug_enable.go.
|
||||
f = debug.Begin(nx, ny, f, &fwdPath.es, &revPath.es)
|
||||
for {
|
||||
// Forward search from the beginning.
|
||||
if fwdFrontier.X >= revFrontier.X || fwdFrontier.Y >= revFrontier.Y || searchBudget == 0 {
|
||||
break
|
||||
}
|
||||
for stop1, stop2, i := false, false, 0; !(stop1 && stop2) && searchBudget > 0; i++ {
|
||||
// Search in a diagonal pattern for a match.
|
||||
z := zigzag(i)
|
||||
p := point{fwdFrontier.X + z, fwdFrontier.Y - z}
|
||||
switch {
|
||||
case p.X >= revPath.X || p.Y < fwdPath.Y:
|
||||
stop1 = true // Hit top-right corner
|
||||
case p.Y >= revPath.Y || p.X < fwdPath.X:
|
||||
stop2 = true // Hit bottom-left corner
|
||||
case f(p.X, p.Y).Equal():
|
||||
// Match found, so connect the path to this point.
|
||||
fwdPath.connect(p, f)
|
||||
fwdPath.append(Identity)
|
||||
// Follow sequence of matches as far as possible.
|
||||
for fwdPath.X < revPath.X && fwdPath.Y < revPath.Y {
|
||||
if !f(fwdPath.X, fwdPath.Y).Equal() {
|
||||
break
|
||||
}
|
||||
fwdPath.append(Identity)
|
||||
}
|
||||
fwdFrontier = fwdPath.point
|
||||
stop1, stop2 = true, true
|
||||
default:
|
||||
searchBudget-- // Match not found
|
||||
}
|
||||
debug.Update()
|
||||
}
|
||||
// Advance the frontier towards reverse point.
|
||||
if revPath.X-fwdFrontier.X >= revPath.Y-fwdFrontier.Y {
|
||||
fwdFrontier.X++
|
||||
} else {
|
||||
fwdFrontier.Y++
|
||||
}
|
||||
|
||||
// Reverse search from the end.
|
||||
if fwdFrontier.X >= revFrontier.X || fwdFrontier.Y >= revFrontier.Y || searchBudget == 0 {
|
||||
break
|
||||
}
|
||||
for stop1, stop2, i := false, false, 0; !(stop1 && stop2) && searchBudget > 0; i++ {
|
||||
// Search in a diagonal pattern for a match.
|
||||
z := zigzag(i)
|
||||
p := point{revFrontier.X - z, revFrontier.Y + z}
|
||||
switch {
|
||||
case fwdPath.X >= p.X || revPath.Y < p.Y:
|
||||
stop1 = true // Hit bottom-left corner
|
||||
case fwdPath.Y >= p.Y || revPath.X < p.X:
|
||||
stop2 = true // Hit top-right corner
|
||||
case f(p.X-1, p.Y-1).Equal():
|
||||
// Match found, so connect the path to this point.
|
||||
revPath.connect(p, f)
|
||||
revPath.append(Identity)
|
||||
// Follow sequence of matches as far as possible.
|
||||
for fwdPath.X < revPath.X && fwdPath.Y < revPath.Y {
|
||||
if !f(revPath.X-1, revPath.Y-1).Equal() {
|
||||
break
|
||||
}
|
||||
revPath.append(Identity)
|
||||
}
|
||||
revFrontier = revPath.point
|
||||
stop1, stop2 = true, true
|
||||
default:
|
||||
searchBudget-- // Match not found
|
||||
}
|
||||
debug.Update()
|
||||
}
|
||||
// Advance the frontier towards forward point.
|
||||
if revFrontier.X-fwdPath.X >= revFrontier.Y-fwdPath.Y {
|
||||
revFrontier.X--
|
||||
} else {
|
||||
revFrontier.Y--
|
||||
}
|
||||
}
|
||||
|
||||
// Join the forward and reverse paths and then append the reverse path.
|
||||
fwdPath.connect(revPath.point, f)
|
||||
for i := len(revPath.es) - 1; i >= 0; i-- {
|
||||
t := revPath.es[i]
|
||||
revPath.es = revPath.es[:i]
|
||||
fwdPath.append(t)
|
||||
}
|
||||
debug.Finish()
|
||||
return fwdPath.es
|
||||
}
|
||||
|
||||
type path struct {
|
||||
dir int // +1 if forward, -1 if reverse
|
||||
point // Leading point of the EditScript path
|
||||
es EditScript
|
||||
}
|
||||
|
||||
// connect appends any necessary Identity, Modified, UniqueX, or UniqueY types
|
||||
// to the edit-script to connect p.point to dst.
|
||||
func (p *path) connect(dst point, f EqualFunc) {
|
||||
if p.dir > 0 {
|
||||
// Connect in forward direction.
|
||||
for dst.X > p.X && dst.Y > p.Y {
|
||||
switch r := f(p.X, p.Y); {
|
||||
case r.Equal():
|
||||
p.append(Identity)
|
||||
case r.Similar():
|
||||
p.append(Modified)
|
||||
case dst.X-p.X >= dst.Y-p.Y:
|
||||
p.append(UniqueX)
|
||||
default:
|
||||
p.append(UniqueY)
|
||||
}
|
||||
}
|
||||
for dst.X > p.X {
|
||||
p.append(UniqueX)
|
||||
}
|
||||
for dst.Y > p.Y {
|
||||
p.append(UniqueY)
|
||||
}
|
||||
} else {
|
||||
// Connect in reverse direction.
|
||||
for p.X > dst.X && p.Y > dst.Y {
|
||||
switch r := f(p.X-1, p.Y-1); {
|
||||
case r.Equal():
|
||||
p.append(Identity)
|
||||
case r.Similar():
|
||||
p.append(Modified)
|
||||
case p.Y-dst.Y >= p.X-dst.X:
|
||||
p.append(UniqueY)
|
||||
default:
|
||||
p.append(UniqueX)
|
||||
}
|
||||
}
|
||||
for p.X > dst.X {
|
||||
p.append(UniqueX)
|
||||
}
|
||||
for p.Y > dst.Y {
|
||||
p.append(UniqueY)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (p *path) append(t EditType) {
|
||||
p.es = append(p.es, t)
|
||||
switch t {
|
||||
case Identity, Modified:
|
||||
p.add(p.dir, p.dir)
|
||||
case UniqueX:
|
||||
p.add(p.dir, 0)
|
||||
case UniqueY:
|
||||
p.add(0, p.dir)
|
||||
}
|
||||
debug.Update()
|
||||
}
|
||||
|
||||
type point struct{ X, Y int }
|
||||
|
||||
func (p *point) add(dx, dy int) { p.X += dx; p.Y += dy }
|
||||
|
||||
// zigzag maps a consecutive sequence of integers to a zig-zag sequence.
|
||||
// [0 1 2 3 4 5 ...] => [0 -1 +1 -2 +2 ...]
|
||||
func zigzag(x int) int {
|
||||
if x&1 != 0 {
|
||||
x = ^x
|
||||
}
|
||||
return x >> 1
|
||||
}
|
@ -0,0 +1,9 @@
|
||||
// Copyright 2019, The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package flags
|
||||
|
||||
// Deterministic controls whether the output of Diff should be deterministic.
|
||||
// This is only used for testing.
|
||||
var Deterministic bool
|
@ -0,0 +1,10 @@
|
||||
// Copyright 2019, The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// +build !go1.10
|
||||
|
||||
package flags
|
||||
|
||||
// AtLeastGo110 reports whether the Go toolchain is at least Go 1.10.
|
||||
const AtLeastGo110 = false
|
@ -0,0 +1,10 @@
|
||||
// Copyright 2019, The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// +build go1.10
|
||||
|
||||
package flags
|
||||
|
||||
// AtLeastGo110 reports whether the Go toolchain is at least Go 1.10.
|
||||
const AtLeastGo110 = true
|
@ -0,0 +1,99 @@
|
||||
// Copyright 2017, The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// Package function provides functionality for identifying function types.
|
||||
package function
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"regexp"
|
||||
"runtime"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type funcType int
|
||||
|
||||
const (
|
||||
_ funcType = iota
|
||||
|
||||
tbFunc // func(T) bool
|
||||
ttbFunc // func(T, T) bool
|
||||
trbFunc // func(T, R) bool
|
||||
tibFunc // func(T, I) bool
|
||||
trFunc // func(T) R
|
||||
|
||||
Equal = ttbFunc // func(T, T) bool
|
||||
EqualAssignable = tibFunc // func(T, I) bool; encapsulates func(T, T) bool
|
||||
Transformer = trFunc // func(T) R
|
||||
ValueFilter = ttbFunc // func(T, T) bool
|
||||
Less = ttbFunc // func(T, T) bool
|
||||
ValuePredicate = tbFunc // func(T) bool
|
||||
KeyValuePredicate = trbFunc // func(T, R) bool
|
||||
)
|
||||
|
||||
var boolType = reflect.TypeOf(true)
|
||||
|
||||
// IsType reports whether the reflect.Type is of the specified function type.
|
||||
func IsType(t reflect.Type, ft funcType) bool {
|
||||
if t == nil || t.Kind() != reflect.Func || t.IsVariadic() {
|
||||
return false
|
||||
}
|
||||
ni, no := t.NumIn(), t.NumOut()
|
||||
switch ft {
|
||||
case tbFunc: // func(T) bool
|
||||
if ni == 1 && no == 1 && t.Out(0) == boolType {
|
||||
return true
|
||||
}
|
||||
case ttbFunc: // func(T, T) bool
|
||||
if ni == 2 && no == 1 && t.In(0) == t.In(1) && t.Out(0) == boolType {
|
||||
return true
|
||||
}
|
||||
case trbFunc: // func(T, R) bool
|
||||
if ni == 2 && no == 1 && t.Out(0) == boolType {
|
||||
return true
|
||||
}
|
||||
case tibFunc: // func(T, I) bool
|
||||
if ni == 2 && no == 1 && t.In(0).AssignableTo(t.In(1)) && t.Out(0) == boolType {
|
||||
return true
|
||||
}
|
||||
case trFunc: // func(T) R
|
||||
if ni == 1 && no == 1 {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
var lastIdentRx = regexp.MustCompile(`[_\p{L}][_\p{L}\p{N}]*$`)
|
||||
|
||||
// NameOf returns the name of the function value.
|
||||
func NameOf(v reflect.Value) string {
|
||||
fnc := runtime.FuncForPC(v.Pointer())
|
||||
if fnc == nil {
|
||||
return "<unknown>"
|
||||
}
|
||||
fullName := fnc.Name() // e.g., "long/path/name/mypkg.(*MyType).(long/path/name/mypkg.myMethod)-fm"
|
||||
|
||||
// Method closures have a "-fm" suffix.
|
||||
fullName = strings.TrimSuffix(fullName, "-fm")
|
||||
|
||||
var name string
|
||||
for len(fullName) > 0 {
|
||||
inParen := strings.HasSuffix(fullName, ")")
|
||||
fullName = strings.TrimSuffix(fullName, ")")
|
||||
|
||||
s := lastIdentRx.FindString(fullName)
|
||||
if s == "" {
|
||||
break
|
||||
}
|
||||
name = s + "." + name
|
||||
fullName = strings.TrimSuffix(fullName, s)
|
||||
|
||||
if i := strings.LastIndexByte(fullName, '('); inParen && i >= 0 {
|
||||
fullName = fullName[:i]
|
||||
}
|
||||
fullName = strings.TrimSuffix(fullName, ".")
|
||||
}
|
||||
return strings.TrimSuffix(name, ".")
|
||||
}
|
@ -0,0 +1,23 @@
|
||||
// Copyright 2018, The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// +build purego
|
||||
|
||||
package value
|
||||
|
||||
import "reflect"
|
||||
|
||||
// Pointer is an opaque typed pointer and is guaranteed to be comparable.
|
||||
type Pointer struct {
|
||||
p uintptr
|
||||
t reflect.Type
|
||||
}
|
||||
|
||||
// PointerOf returns a Pointer from v, which must be a
|
||||
// reflect.Ptr, reflect.Slice, or reflect.Map.
|
||||
func PointerOf(v reflect.Value) Pointer {
|
||||
// NOTE: Storing a pointer as an uintptr is technically incorrect as it
|
||||
// assumes that the GC implementation does not use a moving collector.
|
||||
return Pointer{v.Pointer(), v.Type()}
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
// Copyright 2018, The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// +build !purego
|
||||
|
||||
package value
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
// Pointer is an opaque typed pointer and is guaranteed to be comparable.
|
||||
type Pointer struct {
|
||||
p unsafe.Pointer
|
||||
t reflect.Type
|
||||
}
|
||||
|
||||
// PointerOf returns a Pointer from v, which must be a
|
||||
// reflect.Ptr, reflect.Slice, or reflect.Map.
|
||||
func PointerOf(v reflect.Value) Pointer {
|
||||
// The proper representation of a pointer is unsafe.Pointer,
|
||||
// which is necessary if the GC ever uses a moving collector.
|
||||
return Pointer{unsafe.Pointer(v.Pointer()), v.Type()}
|
||||
}
|
@ -0,0 +1,106 @@
|
||||
// Copyright 2017, The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package value
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
"sort"
|
||||
)
|
||||
|
||||
// SortKeys sorts a list of map keys, deduplicating keys if necessary.
|
||||
// The type of each value must be comparable.
|
||||
func SortKeys(vs []reflect.Value) []reflect.Value {
|
||||
if len(vs) == 0 {
|
||||
return vs
|
||||
}
|
||||
|
||||
// Sort the map keys.
|
||||
sort.SliceStable(vs, func(i, j int) bool { return isLess(vs[i], vs[j]) })
|
||||
|
||||
// Deduplicate keys (fails for NaNs).
|
||||
vs2 := vs[:1]
|
||||
for _, v := range vs[1:] {
|
||||
if isLess(vs2[len(vs2)-1], v) {
|
||||
vs2 = append(vs2, v)
|
||||
}
|
||||
}
|
||||
return vs2
|
||||
}
|
||||
|
||||
// isLess is a generic function for sorting arbitrary map keys.
|
||||
// The inputs must be of the same type and must be comparable.
|
||||
func isLess(x, y reflect.Value) bool {
|
||||
switch x.Type().Kind() {
|
||||
case reflect.Bool:
|
||||
return !x.Bool() && y.Bool()
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
||||
return x.Int() < y.Int()
|
||||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
|
||||
return x.Uint() < y.Uint()
|
||||
case reflect.Float32, reflect.Float64:
|
||||
// NOTE: This does not sort -0 as less than +0
|
||||
// since Go maps treat -0 and +0 as equal keys.
|
||||
fx, fy := x.Float(), y.Float()
|
||||
return fx < fy || math.IsNaN(fx) && !math.IsNaN(fy)
|
||||
case reflect.Complex64, reflect.Complex128:
|
||||
cx, cy := x.Complex(), y.Complex()
|
||||
rx, ix, ry, iy := real(cx), imag(cx), real(cy), imag(cy)
|
||||
if rx == ry || (math.IsNaN(rx) && math.IsNaN(ry)) {
|
||||
return ix < iy || math.IsNaN(ix) && !math.IsNaN(iy)
|
||||
}
|
||||
return rx < ry || math.IsNaN(rx) && !math.IsNaN(ry)
|
||||
case reflect.Ptr, reflect.UnsafePointer, reflect.Chan:
|
||||
return x.Pointer() < y.Pointer()
|
||||
case reflect.String:
|
||||
return x.String() < y.String()
|
||||
case reflect.Array:
|
||||
for i := 0; i < x.Len(); i++ {
|
||||
if isLess(x.Index(i), y.Index(i)) {
|
||||
return true
|
||||
}
|
||||
if isLess(y.Index(i), x.Index(i)) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return false
|
||||
case reflect.Struct:
|
||||
for i := 0; i < x.NumField(); i++ {
|
||||
if isLess(x.Field(i), y.Field(i)) {
|
||||
return true
|
||||
}
|
||||
if isLess(y.Field(i), x.Field(i)) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return false
|
||||
case reflect.Interface:
|
||||
vx, vy := x.Elem(), y.Elem()
|
||||
if !vx.IsValid() || !vy.IsValid() {
|
||||
return !vx.IsValid() && vy.IsValid()
|
||||
}
|
||||
tx, ty := vx.Type(), vy.Type()
|
||||
if tx == ty {
|
||||
return isLess(x.Elem(), y.Elem())
|
||||
}
|
||||
if tx.Kind() != ty.Kind() {
|
||||
return vx.Kind() < vy.Kind()
|
||||
}
|
||||
if tx.String() != ty.String() {
|
||||
return tx.String() < ty.String()
|
||||
}
|
||||
if tx.PkgPath() != ty.PkgPath() {
|
||||
return tx.PkgPath() < ty.PkgPath()
|
||||
}
|
||||
// This can happen in rare situations, so we fallback to just comparing
|
||||
// the unique pointer for a reflect.Type. This guarantees deterministic
|
||||
// ordering within a program, but it is obviously not stable.
|
||||
return reflect.ValueOf(vx.Type()).Pointer() < reflect.ValueOf(vy.Type()).Pointer()
|
||||
default:
|
||||
// Must be Func, Map, or Slice; which are not comparable.
|
||||
panic(fmt.Sprintf("%T is not comparable", x.Type()))
|
||||
}
|
||||
}
|
@ -0,0 +1,48 @@
|
||||
// Copyright 2017, The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package value
|
||||
|
||||
import (
|
||||
"math"
|
||||
"reflect"
|
||||
)
|
||||
|
||||
// IsZero reports whether v is the zero value.
|
||||
// This does not rely on Interface and so can be used on unexported fields.
|
||||
func IsZero(v reflect.Value) bool {
|
||||
switch v.Kind() {
|
||||
case reflect.Bool:
|
||||
return v.Bool() == false
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
||||
return v.Int() == 0
|
||||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
|
||||
return v.Uint() == 0
|
||||
case reflect.Float32, reflect.Float64:
|
||||
return math.Float64bits(v.Float()) == 0
|
||||
case reflect.Complex64, reflect.Complex128:
|
||||
return math.Float64bits(real(v.Complex())) == 0 && math.Float64bits(imag(v.Complex())) == 0
|
||||
case reflect.String:
|
||||
return v.String() == ""
|
||||
case reflect.UnsafePointer:
|
||||
return v.Pointer() == 0
|
||||
case reflect.Chan, reflect.Func, reflect.Interface, reflect.Ptr, reflect.Map, reflect.Slice:
|
||||
return v.IsNil()
|
||||
case reflect.Array:
|
||||
for i := 0; i < v.Len(); i++ {
|
||||
if !IsZero(v.Index(i)) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
case reflect.Struct:
|
||||
for i := 0; i < v.NumField(); i++ {
|
||||
if !IsZero(v.Field(i)) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
@ -0,0 +1,524 @@
|
||||
// Copyright 2017, The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package cmp
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/google/go-cmp/cmp/internal/function"
|
||||
)
|
||||
|
||||
// Option configures for specific behavior of Equal and Diff. In particular,
|
||||
// the fundamental Option functions (Ignore, Transformer, and Comparer),
|
||||
// configure how equality is determined.
|
||||
//
|
||||
// The fundamental options may be composed with filters (FilterPath and
|
||||
// FilterValues) to control the scope over which they are applied.
|
||||
//
|
||||
// The cmp/cmpopts package provides helper functions for creating options that
|
||||
// may be used with Equal and Diff.
|
||||
type Option interface {
|
||||
// filter applies all filters and returns the option that remains.
|
||||
// Each option may only read s.curPath and call s.callTTBFunc.
|
||||
//
|
||||
// An Options is returned only if multiple comparers or transformers
|
||||
// can apply simultaneously and will only contain values of those types
|
||||
// or sub-Options containing values of those types.
|
||||
filter(s *state, t reflect.Type, vx, vy reflect.Value) applicableOption
|
||||
}
|
||||
|
||||
// applicableOption represents the following types:
|
||||
// Fundamental: ignore | validator | *comparer | *transformer
|
||||
// Grouping: Options
|
||||
type applicableOption interface {
|
||||
Option
|
||||
|
||||
// apply executes the option, which may mutate s or panic.
|
||||
apply(s *state, vx, vy reflect.Value)
|
||||
}
|
||||
|
||||
// coreOption represents the following types:
|
||||
// Fundamental: ignore | validator | *comparer | *transformer
|
||||
// Filters: *pathFilter | *valuesFilter
|
||||
type coreOption interface {
|
||||
Option
|
||||
isCore()
|
||||
}
|
||||
|
||||
type core struct{}
|
||||
|
||||
func (core) isCore() {}
|
||||
|
||||
// Options is a list of Option values that also satisfies the Option interface.
|
||||
// Helper comparison packages may return an Options value when packing multiple
|
||||
// Option values into a single Option. When this package processes an Options,
|
||||
// it will be implicitly expanded into a flat list.
|
||||
//
|
||||
// Applying a filter on an Options is equivalent to applying that same filter
|
||||
// on all individual options held within.
|
||||
type Options []Option
|
||||
|
||||
func (opts Options) filter(s *state, t reflect.Type, vx, vy reflect.Value) (out applicableOption) {
|
||||
for _, opt := range opts {
|
||||
switch opt := opt.filter(s, t, vx, vy); opt.(type) {
|
||||
case ignore:
|
||||
return ignore{} // Only ignore can short-circuit evaluation
|
||||
case validator:
|
||||
out = validator{} // Takes precedence over comparer or transformer
|
||||
case *comparer, *transformer, Options:
|
||||
switch out.(type) {
|
||||
case nil:
|
||||
out = opt
|
||||
case validator:
|
||||
// Keep validator
|
||||
case *comparer, *transformer, Options:
|
||||
out = Options{out, opt} // Conflicting comparers or transformers
|
||||
}
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
func (opts Options) apply(s *state, _, _ reflect.Value) {
|
||||
const warning = "ambiguous set of applicable options"
|
||||
const help = "consider using filters to ensure at most one Comparer or Transformer may apply"
|
||||
var ss []string
|
||||
for _, opt := range flattenOptions(nil, opts) {
|
||||
ss = append(ss, fmt.Sprint(opt))
|
||||
}
|
||||
set := strings.Join(ss, "\n\t")
|
||||
panic(fmt.Sprintf("%s at %#v:\n\t%s\n%s", warning, s.curPath, set, help))
|
||||
}
|
||||
|
||||
func (opts Options) String() string {
|
||||
var ss []string
|
||||
for _, opt := range opts {
|
||||
ss = append(ss, fmt.Sprint(opt))
|
||||
}
|
||||
return fmt.Sprintf("Options{%s}", strings.Join(ss, ", "))
|
||||
}
|
||||
|
||||
// FilterPath returns a new Option where opt is only evaluated if filter f
|
||||
// returns true for the current Path in the value tree.
|
||||
//
|
||||
// This filter is called even if a slice element or map entry is missing and
|
||||
// provides an opportunity to ignore such cases. The filter function must be
|
||||
// symmetric such that the filter result is identical regardless of whether the
|
||||
// missing value is from x or y.
|
||||
//
|
||||
// The option passed in may be an Ignore, Transformer, Comparer, Options, or
|
||||
// a previously filtered Option.
|
||||
func FilterPath(f func(Path) bool, opt Option) Option {
|
||||
if f == nil {
|
||||
panic("invalid path filter function")
|
||||
}
|
||||
if opt := normalizeOption(opt); opt != nil {
|
||||
return &pathFilter{fnc: f, opt: opt}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type pathFilter struct {
|
||||
core
|
||||
fnc func(Path) bool
|
||||
opt Option
|
||||
}
|
||||
|
||||
func (f pathFilter) filter(s *state, t reflect.Type, vx, vy reflect.Value) applicableOption {
|
||||
if f.fnc(s.curPath) {
|
||||
return f.opt.filter(s, t, vx, vy)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f pathFilter) String() string {
|
||||
return fmt.Sprintf("FilterPath(%s, %v)", function.NameOf(reflect.ValueOf(f.fnc)), f.opt)
|
||||
}
|
||||
|
||||
// FilterValues returns a new Option where opt is only evaluated if filter f,
|
||||
// which is a function of the form "func(T, T) bool", returns true for the
|
||||
// current pair of values being compared. If either value is invalid or
|
||||
// the type of the values is not assignable to T, then this filter implicitly
|
||||
// returns false.
|
||||
//
|
||||
// The filter function must be
|
||||
// symmetric (i.e., agnostic to the order of the inputs) and
|
||||
// deterministic (i.e., produces the same result when given the same inputs).
|
||||
// If T is an interface, it is possible that f is called with two values with
|
||||
// different concrete types that both implement T.
|
||||
//
|
||||
// The option passed in may be an Ignore, Transformer, Comparer, Options, or
|
||||
// a previously filtered Option.
|
||||
func FilterValues(f interface{}, opt Option) Option {
|
||||
v := reflect.ValueOf(f)
|
||||
if !function.IsType(v.Type(), function.ValueFilter) || v.IsNil() {
|
||||
panic(fmt.Sprintf("invalid values filter function: %T", f))
|
||||
}
|
||||
if opt := normalizeOption(opt); opt != nil {
|
||||
vf := &valuesFilter{fnc: v, opt: opt}
|
||||
if ti := v.Type().In(0); ti.Kind() != reflect.Interface || ti.NumMethod() > 0 {
|
||||
vf.typ = ti
|
||||
}
|
||||
return vf
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type valuesFilter struct {
|
||||
core
|
||||
typ reflect.Type // T
|
||||
fnc reflect.Value // func(T, T) bool
|
||||
opt Option
|
||||
}
|
||||
|
||||
func (f valuesFilter) filter(s *state, t reflect.Type, vx, vy reflect.Value) applicableOption {
|
||||
if !vx.IsValid() || !vx.CanInterface() || !vy.IsValid() || !vy.CanInterface() {
|
||||
return nil
|
||||
}
|
||||
if (f.typ == nil || t.AssignableTo(f.typ)) && s.callTTBFunc(f.fnc, vx, vy) {
|
||||
return f.opt.filter(s, t, vx, vy)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f valuesFilter) String() string {
|
||||
return fmt.Sprintf("FilterValues(%s, %v)", function.NameOf(f.fnc), f.opt)
|
||||
}
|
||||
|
||||
// Ignore is an Option that causes all comparisons to be ignored.
|
||||
// This value is intended to be combined with FilterPath or FilterValues.
|
||||
// It is an error to pass an unfiltered Ignore option to Equal.
|
||||
func Ignore() Option { return ignore{} }
|
||||
|
||||
type ignore struct{ core }
|
||||
|
||||
func (ignore) isFiltered() bool { return false }
|
||||
func (ignore) filter(_ *state, _ reflect.Type, _, _ reflect.Value) applicableOption { return ignore{} }
|
||||
func (ignore) apply(s *state, _, _ reflect.Value) { s.report(true, reportByIgnore) }
|
||||
func (ignore) String() string { return "Ignore()" }
|
||||
|
||||
// validator is a sentinel Option type to indicate that some options could not
|
||||
// be evaluated due to unexported fields, missing slice elements, or
|
||||
// missing map entries. Both values are validator only for unexported fields.
|
||||
type validator struct{ core }
|
||||
|
||||
func (validator) filter(_ *state, _ reflect.Type, vx, vy reflect.Value) applicableOption {
|
||||
if !vx.IsValid() || !vy.IsValid() {
|
||||
return validator{}
|
||||
}
|
||||
if !vx.CanInterface() || !vy.CanInterface() {
|
||||
return validator{}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
func (validator) apply(s *state, vx, vy reflect.Value) {
|
||||
// Implies missing slice element or map entry.
|
||||
if !vx.IsValid() || !vy.IsValid() {
|
||||
s.report(vx.IsValid() == vy.IsValid(), 0)
|
||||
return
|
||||
}
|
||||
|
||||
// Unable to Interface implies unexported field without visibility access.
|
||||
if !vx.CanInterface() || !vy.CanInterface() {
|
||||
const help = "consider using a custom Comparer; if you control the implementation of type, you can also consider AllowUnexported or cmpopts.IgnoreUnexported"
|
||||
panic(fmt.Sprintf("cannot handle unexported field: %#v\n%s", s.curPath, help))
|
||||
}
|
||||
|
||||
panic("not reachable")
|
||||
}
|
||||
|
||||
// identRx represents a valid identifier according to the Go specification.
|
||||
const identRx = `[_\p{L}][_\p{L}\p{N}]*`
|
||||
|
||||
var identsRx = regexp.MustCompile(`^` + identRx + `(\.` + identRx + `)*$`)
|
||||
|
||||
// Transformer returns an Option that applies a transformation function that
|
||||
// converts values of a certain type into that of another.
|
||||
//
|
||||
// The transformer f must be a function "func(T) R" that converts values of
|
||||
// type T to those of type R and is implicitly filtered to input values
|
||||
// assignable to T. The transformer must not mutate T in any way.
|
||||
//
|
||||
// To help prevent some cases of infinite recursive cycles applying the
|
||||
// same transform to the output of itself (e.g., in the case where the
|
||||
// input and output types are the same), an implicit filter is added such that
|
||||
// a transformer is applicable only if that exact transformer is not already
|
||||
// in the tail of the Path since the last non-Transform step.
|
||||
// For situations where the implicit filter is still insufficient,
|
||||
// consider using cmpopts.AcyclicTransformer, which adds a filter
|
||||
// to prevent the transformer from being recursively applied upon itself.
|
||||
//
|
||||
// The name is a user provided label that is used as the Transform.Name in the
|
||||
// transformation PathStep (and eventually shown in the Diff output).
|
||||
// The name must be a valid identifier or qualified identifier in Go syntax.
|
||||
// If empty, an arbitrary name is used.
|
||||
func Transformer(name string, f interface{}) Option {
|
||||
v := reflect.ValueOf(f)
|
||||
if !function.IsType(v.Type(), function.Transformer) || v.IsNil() {
|
||||
panic(fmt.Sprintf("invalid transformer function: %T", f))
|
||||
}
|
||||
if name == "" {
|
||||
name = function.NameOf(v)
|
||||
if !identsRx.MatchString(name) {
|
||||
name = "λ" // Lambda-symbol as placeholder name
|
||||
}
|
||||
} else if !identsRx.MatchString(name) {
|
||||
panic(fmt.Sprintf("invalid name: %q", name))
|
||||
}
|
||||
tr := &transformer{name: name, fnc: reflect.ValueOf(f)}
|
||||
if ti := v.Type().In(0); ti.Kind() != reflect.Interface || ti.NumMethod() > 0 {
|
||||
tr.typ = ti
|
||||
}
|
||||
return tr
|
||||
}
|
||||
|
||||
type transformer struct {
|
||||
core
|
||||
name string
|
||||
typ reflect.Type // T
|
||||
fnc reflect.Value // func(T) R
|
||||
}
|
||||
|
||||
func (tr *transformer) isFiltered() bool { return tr.typ != nil }
|
||||
|
||||
func (tr *transformer) filter(s *state, t reflect.Type, _, _ reflect.Value) applicableOption {
|
||||
for i := len(s.curPath) - 1; i >= 0; i-- {
|
||||
if t, ok := s.curPath[i].(Transform); !ok {
|
||||
break // Hit most recent non-Transform step
|
||||
} else if tr == t.trans {
|
||||
return nil // Cannot directly use same Transform
|
||||
}
|
||||
}
|
||||
if tr.typ == nil || t.AssignableTo(tr.typ) {
|
||||
return tr
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (tr *transformer) apply(s *state, vx, vy reflect.Value) {
|
||||
step := Transform{&transform{pathStep{typ: tr.fnc.Type().Out(0)}, tr}}
|
||||
vvx := s.callTRFunc(tr.fnc, vx, step)
|
||||
vvy := s.callTRFunc(tr.fnc, vy, step)
|
||||
step.vx, step.vy = vvx, vvy
|
||||
s.compareAny(step)
|
||||
}
|
||||
|
||||
func (tr transformer) String() string {
|
||||
return fmt.Sprintf("Transformer(%s, %s)", tr.name, function.NameOf(tr.fnc))
|
||||
}
|
||||
|
||||
// Comparer returns an Option that determines whether two values are equal
|
||||
// to each other.
|
||||
//
|
||||
// The comparer f must be a function "func(T, T) bool" and is implicitly
|
||||
// filtered to input values assignable to T. If T is an interface, it is
|
||||
// possible that f is called with two values of different concrete types that
|
||||
// both implement T.
|
||||
//
|
||||
// The equality function must be:
|
||||
// • Symmetric: equal(x, y) == equal(y, x)
|
||||
// • Deterministic: equal(x, y) == equal(x, y)
|
||||
// • Pure: equal(x, y) does not modify x or y
|
||||
func Comparer(f interface{}) Option {
|
||||
v := reflect.ValueOf(f)
|
||||
if !function.IsType(v.Type(), function.Equal) || v.IsNil() {
|
||||
panic(fmt.Sprintf("invalid comparer function: %T", f))
|
||||
}
|
||||
cm := &comparer{fnc: v}
|
||||
if ti := v.Type().In(0); ti.Kind() != reflect.Interface || ti.NumMethod() > 0 {
|
||||
cm.typ = ti
|
||||
}
|
||||
return cm
|
||||
}
|
||||
|
||||
type comparer struct {
|
||||
core
|
||||
typ reflect.Type // T
|
||||
fnc reflect.Value // func(T, T) bool
|
||||
}
|
||||
|
||||
func (cm *comparer) isFiltered() bool { return cm.typ != nil }
|
||||
|
||||
func (cm *comparer) filter(_ *state, t reflect.Type, _, _ reflect.Value) applicableOption {
|
||||
if cm.typ == nil || t.AssignableTo(cm.typ) {
|
||||
return cm
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cm *comparer) apply(s *state, vx, vy reflect.Value) {
|
||||
eq := s.callTTBFunc(cm.fnc, vx, vy)
|
||||
s.report(eq, reportByFunc)
|
||||
}
|
||||
|
||||
func (cm comparer) String() string {
|
||||
return fmt.Sprintf("Comparer(%s)", function.NameOf(cm.fnc))
|
||||
}
|
||||
|
||||
// AllowUnexported returns an Option that forcibly allows operations on
|
||||
// unexported fields in certain structs, which are specified by passing in a
|
||||
// value of each struct type.
|
||||
//
|
||||
// Users of this option must understand that comparing on unexported fields
|
||||
// from external packages is not safe since changes in the internal
|
||||
// implementation of some external package may cause the result of Equal
|
||||
// to unexpectedly change. However, it may be valid to use this option on types
|
||||
// defined in an internal package where the semantic meaning of an unexported
|
||||
// field is in the control of the user.
|
||||
//
|
||||
// In many cases, a custom Comparer should be used instead that defines
|
||||
// equality as a function of the public API of a type rather than the underlying
|
||||
// unexported implementation.
|
||||
//
|
||||
// For example, the reflect.Type documentation defines equality to be determined
|
||||
// by the == operator on the interface (essentially performing a shallow pointer
|
||||
// comparison) and most attempts to compare *regexp.Regexp types are interested
|
||||
// in only checking that the regular expression strings are equal.
|
||||
// Both of these are accomplished using Comparers:
|
||||
//
|
||||
// Comparer(func(x, y reflect.Type) bool { return x == y })
|
||||
// Comparer(func(x, y *regexp.Regexp) bool { return x.String() == y.String() })
|
||||
//
|
||||
// In other cases, the cmpopts.IgnoreUnexported option can be used to ignore
|
||||
// all unexported fields on specified struct types.
|
||||
func AllowUnexported(types ...interface{}) Option {
|
||||
if !supportAllowUnexported {
|
||||
panic("AllowUnexported is not supported on purego builds, Google App Engine Standard, or GopherJS")
|
||||
}
|
||||
m := make(map[reflect.Type]bool)
|
||||
for _, typ := range types {
|
||||
t := reflect.TypeOf(typ)
|
||||
if t.Kind() != reflect.Struct {
|
||||
panic(fmt.Sprintf("invalid struct type: %T", typ))
|
||||
}
|
||||
m[t] = true
|
||||
}
|
||||
return visibleStructs(m)
|
||||
}
|
||||
|
||||
type visibleStructs map[reflect.Type]bool
|
||||
|
||||
func (visibleStructs) filter(_ *state, _ reflect.Type, _, _ reflect.Value) applicableOption {
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
// Result represents the comparison result for a single node and
|
||||
// is provided by cmp when calling Result (see Reporter).
|
||||
type Result struct {
|
||||
_ [0]func() // Make Result incomparable
|
||||
flags resultFlags
|
||||
}
|
||||
|
||||
// Equal reports whether the node was determined to be equal or not.
|
||||
// As a special case, ignored nodes are considered equal.
|
||||
func (r Result) Equal() bool {
|
||||
return r.flags&(reportEqual|reportByIgnore) != 0
|
||||
}
|
||||
|
||||
// ByIgnore reports whether the node is equal because it was ignored.
|
||||
// This never reports true if Equal reports false.
|
||||
func (r Result) ByIgnore() bool {
|
||||
return r.flags&reportByIgnore != 0
|
||||
}
|
||||
|
||||
// ByMethod reports whether the Equal method determined equality.
|
||||
func (r Result) ByMethod() bool {
|
||||
return r.flags&reportByMethod != 0
|
||||
}
|
||||
|
||||
// ByFunc reports whether a Comparer function determined equality.
|
||||
func (r Result) ByFunc() bool {
|
||||
return r.flags&reportByFunc != 0
|
||||
}
|
||||
|
||||
type resultFlags uint
|
||||
|
||||
const (
|
||||
_ resultFlags = (1 << iota) / 2
|
||||
|
||||
reportEqual
|
||||
reportUnequal
|
||||
reportByIgnore
|
||||
reportByMethod
|
||||
reportByFunc
|
||||
)
|
||||
|
||||
// Reporter is an Option that can be passed to Equal. When Equal traverses
|
||||
// the value trees, it calls PushStep as it descends into each node in the
|
||||
// tree and PopStep as it ascend out of the node. The leaves of the tree are
|
||||
// either compared (determined to be equal or not equal) or ignored and reported
|
||||
// as such by calling the Report method.
|
||||
func Reporter(r interface {
|
||||
// PushStep is called when a tree-traversal operation is performed.
|
||||
// The PathStep itself is only valid until the step is popped.
|
||||
// The PathStep.Values are valid for the duration of the entire traversal
|
||||
// and must not be mutated.
|
||||
//
|
||||
// Equal always calls PushStep at the start to provide an operation-less
|
||||
// PathStep used to report the root values.
|
||||
//
|
||||
// Within a slice, the exact set of inserted, removed, or modified elements
|
||||
// is unspecified and may change in future implementations.
|
||||
// The entries of a map are iterated through in an unspecified order.
|
||||
PushStep(PathStep)
|
||||
|
||||
// Report is called exactly once on leaf nodes to report whether the
|
||||
// comparison identified the node as equal, unequal, or ignored.
|
||||
// A leaf node is one that is immediately preceded by and followed by
|
||||
// a pair of PushStep and PopStep calls.
|
||||
Report(Result)
|
||||
|
||||
// PopStep ascends back up the value tree.
|
||||
// There is always a matching pop call for every push call.
|
||||
PopStep()
|
||||
}) Option {
|
||||
return reporter{r}
|
||||
}
|
||||
|
||||
type reporter struct{ reporterIface }
|
||||
type reporterIface interface {
|
||||
PushStep(PathStep)
|
||||
Report(Result)
|
||||
PopStep()
|
||||
}
|
||||
|
||||
func (reporter) filter(_ *state, _ reflect.Type, _, _ reflect.Value) applicableOption {
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
// normalizeOption normalizes the input options such that all Options groups
|
||||
// are flattened and groups with a single element are reduced to that element.
|
||||
// Only coreOptions and Options containing coreOptions are allowed.
|
||||
func normalizeOption(src Option) Option {
|
||||
switch opts := flattenOptions(nil, Options{src}); len(opts) {
|
||||
case 0:
|
||||
return nil
|
||||
case 1:
|
||||
return opts[0]
|
||||
default:
|
||||
return opts
|
||||
}
|
||||
}
|
||||
|
||||
// flattenOptions copies all options in src to dst as a flat list.
|
||||
// Only coreOptions and Options containing coreOptions are allowed.
|
||||
func flattenOptions(dst, src Options) Options {
|
||||
for _, opt := range src {
|
||||
switch opt := opt.(type) {
|
||||
case nil:
|
||||
continue
|
||||
case Options:
|
||||
dst = flattenOptions(dst, opt)
|
||||
case coreOption:
|
||||
dst = append(dst, opt)
|
||||
default:
|
||||
panic(fmt.Sprintf("invalid option type: %T", opt))
|
||||
}
|
||||
}
|
||||
return dst
|
||||
}
|
@ -0,0 +1,308 @@
|
||||
// Copyright 2017, The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package cmp
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Path is a list of PathSteps describing the sequence of operations to get
|
||||
// from some root type to the current position in the value tree.
|
||||
// The first Path element is always an operation-less PathStep that exists
|
||||
// simply to identify the initial type.
|
||||
//
|
||||
// When traversing structs with embedded structs, the embedded struct will
|
||||
// always be accessed as a field before traversing the fields of the
|
||||
// embedded struct themselves. That is, an exported field from the
|
||||
// embedded struct will never be accessed directly from the parent struct.
|
||||
type Path []PathStep
|
||||
|
||||
// PathStep is a union-type for specific operations to traverse
|
||||
// a value's tree structure. Users of this package never need to implement
|
||||
// these types as values of this type will be returned by this package.
|
||||
//
|
||||
// Implementations of this interface are
|
||||
// StructField, SliceIndex, MapIndex, Indirect, TypeAssertion, and Transform.
|
||||
type PathStep interface {
|
||||
String() string
|
||||
|
||||
// Type is the resulting type after performing the path step.
|
||||
Type() reflect.Type
|
||||
|
||||
// Values is the resulting values after performing the path step.
|
||||
// The type of each valid value is guaranteed to be identical to Type.
|
||||
//
|
||||
// In some cases, one or both may be invalid or have restrictions:
|
||||
// • For StructField, both are not interface-able if the current field
|
||||
// is unexported and the struct type is not explicitly permitted by
|
||||
// AllowUnexported to traverse unexported fields.
|
||||
// • For SliceIndex, one may be invalid if an element is missing from
|
||||
// either the x or y slice.
|
||||
// • For MapIndex, one may be invalid if an entry is missing from
|
||||
// either the x or y map.
|
||||
//
|
||||
// The provided values must not be mutated.
|
||||
Values() (vx, vy reflect.Value)
|
||||
}
|
||||
|
||||
var (
|
||||
_ PathStep = StructField{}
|
||||
_ PathStep = SliceIndex{}
|
||||
_ PathStep = MapIndex{}
|
||||
_ PathStep = Indirect{}
|
||||
_ PathStep = TypeAssertion{}
|
||||
_ PathStep = Transform{}
|
||||
)
|
||||
|
||||
func (pa *Path) push(s PathStep) {
|
||||
*pa = append(*pa, s)
|
||||
}
|
||||
|
||||
func (pa *Path) pop() {
|
||||
*pa = (*pa)[:len(*pa)-1]
|
||||
}
|
||||
|
||||
// Last returns the last PathStep in the Path.
|
||||
// If the path is empty, this returns a non-nil PathStep that reports a nil Type.
|
||||
func (pa Path) Last() PathStep {
|
||||
return pa.Index(-1)
|
||||
}
|
||||
|
||||
// Index returns the ith step in the Path and supports negative indexing.
|
||||
// A negative index starts counting from the tail of the Path such that -1
|
||||
// refers to the last step, -2 refers to the second-to-last step, and so on.
|
||||
// If index is invalid, this returns a non-nil PathStep that reports a nil Type.
|
||||
func (pa Path) Index(i int) PathStep {
|
||||
if i < 0 {
|
||||
i = len(pa) + i
|
||||
}
|
||||
if i < 0 || i >= len(pa) {
|
||||
return pathStep{}
|
||||
}
|
||||
return pa[i]
|
||||
}
|
||||
|
||||
// String returns the simplified path to a node.
|
||||
// The simplified path only contains struct field accesses.
|
||||
//
|
||||
// For example:
|
||||
// MyMap.MySlices.MyField
|
||||
func (pa Path) String() string {
|
||||
var ss []string
|
||||
for _, s := range pa {
|
||||
if _, ok := s.(StructField); ok {
|
||||
ss = append(ss, s.String())
|
||||
}
|
||||
}
|
||||
return strings.TrimPrefix(strings.Join(ss, ""), ".")
|
||||
}
|
||||
|
||||
// GoString returns the path to a specific node using Go syntax.
|
||||
//
|
||||
// For example:
|
||||
// (*root.MyMap["key"].(*mypkg.MyStruct).MySlices)[2][3].MyField
|
||||
func (pa Path) GoString() string {
|
||||
var ssPre, ssPost []string
|
||||
var numIndirect int
|
||||
for i, s := range pa {
|
||||
var nextStep PathStep
|
||||
if i+1 < len(pa) {
|
||||
nextStep = pa[i+1]
|
||||
}
|
||||
switch s := s.(type) {
|
||||
case Indirect:
|
||||
numIndirect++
|
||||
pPre, pPost := "(", ")"
|
||||
switch nextStep.(type) {
|
||||
case Indirect:
|
||||
continue // Next step is indirection, so let them batch up
|
||||
case StructField:
|
||||
numIndirect-- // Automatic indirection on struct fields
|
||||
case nil:
|
||||
pPre, pPost = "", "" // Last step; no need for parenthesis
|
||||
}
|
||||
if numIndirect > 0 {
|
||||
ssPre = append(ssPre, pPre+strings.Repeat("*", numIndirect))
|
||||
ssPost = append(ssPost, pPost)
|
||||
}
|
||||
numIndirect = 0
|
||||
continue
|
||||
case Transform:
|
||||
ssPre = append(ssPre, s.trans.name+"(")
|
||||
ssPost = append(ssPost, ")")
|
||||
continue
|
||||
}
|
||||
ssPost = append(ssPost, s.String())
|
||||
}
|
||||
for i, j := 0, len(ssPre)-1; i < j; i, j = i+1, j-1 {
|
||||
ssPre[i], ssPre[j] = ssPre[j], ssPre[i]
|
||||
}
|
||||
return strings.Join(ssPre, "") + strings.Join(ssPost, "")
|
||||
}
|
||||
|
||||
type pathStep struct {
|
||||
typ reflect.Type
|
||||
vx, vy reflect.Value
|
||||
}
|
||||
|
||||
func (ps pathStep) Type() reflect.Type { return ps.typ }
|
||||
func (ps pathStep) Values() (vx, vy reflect.Value) { return ps.vx, ps.vy }
|
||||
func (ps pathStep) String() string {
|
||||
if ps.typ == nil {
|
||||
return "<nil>"
|
||||
}
|
||||
s := ps.typ.String()
|
||||
if s == "" || strings.ContainsAny(s, "{}\n") {
|
||||
return "root" // Type too simple or complex to print
|
||||
}
|
||||
return fmt.Sprintf("{%s}", s)
|
||||
}
|
||||
|
||||
// StructField represents a struct field access on a field called Name.
|
||||
type StructField struct{ *structField }
|
||||
type structField struct {
|
||||
pathStep
|
||||
name string
|
||||
idx int
|
||||
|
||||
// These fields are used for forcibly accessing an unexported field.
|
||||
// pvx, pvy, and field are only valid if unexported is true.
|
||||
unexported bool
|
||||
mayForce bool // Forcibly allow visibility
|
||||
pvx, pvy reflect.Value // Parent values
|
||||
field reflect.StructField // Field information
|
||||
}
|
||||
|
||||
func (sf StructField) Type() reflect.Type { return sf.typ }
|
||||
func (sf StructField) Values() (vx, vy reflect.Value) {
|
||||
if !sf.unexported {
|
||||
return sf.vx, sf.vy // CanInterface reports true
|
||||
}
|
||||
|
||||
// Forcibly obtain read-write access to an unexported struct field.
|
||||
if sf.mayForce {
|
||||
vx = retrieveUnexportedField(sf.pvx, sf.field)
|
||||
vy = retrieveUnexportedField(sf.pvy, sf.field)
|
||||
return vx, vy // CanInterface reports true
|
||||
}
|
||||
return sf.vx, sf.vy // CanInterface reports false
|
||||
}
|
||||
func (sf StructField) String() string { return fmt.Sprintf(".%s", sf.name) }
|
||||
|
||||
// Name is the field name.
|
||||
func (sf StructField) Name() string { return sf.name }
|
||||
|
||||
// Index is the index of the field in the parent struct type.
|
||||
// See reflect.Type.Field.
|
||||
func (sf StructField) Index() int { return sf.idx }
|
||||
|
||||
// SliceIndex is an index operation on a slice or array at some index Key.
|
||||
type SliceIndex struct{ *sliceIndex }
|
||||
type sliceIndex struct {
|
||||
pathStep
|
||||
xkey, ykey int
|
||||
}
|
||||
|
||||
func (si SliceIndex) Type() reflect.Type { return si.typ }
|
||||
func (si SliceIndex) Values() (vx, vy reflect.Value) { return si.vx, si.vy }
|
||||
func (si SliceIndex) String() string {
|
||||
switch {
|
||||
case si.xkey == si.ykey:
|
||||
return fmt.Sprintf("[%d]", si.xkey)
|
||||
case si.ykey == -1:
|
||||
// [5->?] means "I don't know where X[5] went"
|
||||
return fmt.Sprintf("[%d->?]", si.xkey)
|
||||
case si.xkey == -1:
|
||||
// [?->3] means "I don't know where Y[3] came from"
|
||||
return fmt.Sprintf("[?->%d]", si.ykey)
|
||||
default:
|
||||
// [5->3] means "X[5] moved to Y[3]"
|
||||
return fmt.Sprintf("[%d->%d]", si.xkey, si.ykey)
|
||||
}
|
||||
}
|
||||
|
||||
// Key is the index key; it may return -1 if in a split state
|
||||
func (si SliceIndex) Key() int {
|
||||
if si.xkey != si.ykey {
|
||||
return -1
|
||||
}
|
||||
return si.xkey
|
||||
}
|
||||
|
||||
// SplitKeys are the indexes for indexing into slices in the
|
||||
// x and y values, respectively. These indexes may differ due to the
|
||||
// insertion or removal of an element in one of the slices, causing
|
||||
// all of the indexes to be shifted. If an index is -1, then that
|
||||
// indicates that the element does not exist in the associated slice.
|
||||
//
|
||||
// Key is guaranteed to return -1 if and only if the indexes returned
|
||||
// by SplitKeys are not the same. SplitKeys will never return -1 for
|
||||
// both indexes.
|
||||
func (si SliceIndex) SplitKeys() (ix, iy int) { return si.xkey, si.ykey }
|
||||
|
||||
// MapIndex is an index operation on a map at some index Key.
|
||||
type MapIndex struct{ *mapIndex }
|
||||
type mapIndex struct {
|
||||
pathStep
|
||||
key reflect.Value
|
||||
}
|
||||
|
||||
func (mi MapIndex) Type() reflect.Type { return mi.typ }
|
||||
func (mi MapIndex) Values() (vx, vy reflect.Value) { return mi.vx, mi.vy }
|
||||
func (mi MapIndex) String() string { return fmt.Sprintf("[%#v]", mi.key) }
|
||||
|
||||
// Key is the value of the map key.
|
||||
func (mi MapIndex) Key() reflect.Value { return mi.key }
|
||||
|
||||
// Indirect represents pointer indirection on the parent type.
|
||||
type Indirect struct{ *indirect }
|
||||
type indirect struct {
|
||||
pathStep
|
||||
}
|
||||
|
||||
func (in Indirect) Type() reflect.Type { return in.typ }
|
||||
func (in Indirect) Values() (vx, vy reflect.Value) { return in.vx, in.vy }
|
||||
func (in Indirect) String() string { return "*" }
|
||||
|
||||
// TypeAssertion represents a type assertion on an interface.
|
||||
type TypeAssertion struct{ *typeAssertion }
|
||||
type typeAssertion struct {
|
||||
pathStep
|
||||
}
|
||||
|
||||
func (ta TypeAssertion) Type() reflect.Type { return ta.typ }
|
||||
func (ta TypeAssertion) Values() (vx, vy reflect.Value) { return ta.vx, ta.vy }
|
||||
func (ta TypeAssertion) String() string { return fmt.Sprintf(".(%v)", ta.typ) }
|
||||
|
||||
// Transform is a transformation from the parent type to the current type.
|
||||
type Transform struct{ *transform }
|
||||
type transform struct {
|
||||
pathStep
|
||||
trans *transformer
|
||||
}
|
||||
|
||||
func (tf Transform) Type() reflect.Type { return tf.typ }
|
||||
func (tf Transform) Values() (vx, vy reflect.Value) { return tf.vx, tf.vy }
|
||||
func (tf Transform) String() string { return fmt.Sprintf("%s()", tf.trans.name) }
|
||||
|
||||
// Name is the name of the Transformer.
|
||||
func (tf Transform) Name() string { return tf.trans.name }
|
||||
|
||||
// Func is the function pointer to the transformer function.
|
||||
func (tf Transform) Func() reflect.Value { return tf.trans.fnc }
|
||||
|
||||
// Option returns the originally constructed Transformer option.
|
||||
// The == operator can be used to detect the exact option used.
|
||||
func (tf Transform) Option() Option { return tf.trans }
|
||||
|
||||
// isExported reports whether the identifier is exported.
|
||||
func isExported(id string) bool {
|
||||
r, _ := utf8.DecodeRuneInString(id)
|
||||
return unicode.IsUpper(r)
|
||||
}
|
@ -0,0 +1,51 @@
|
||||
// Copyright 2017, The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package cmp
|
||||
|
||||
// defaultReporter implements the reporter interface.
|
||||
//
|
||||
// As Equal serially calls the PushStep, Report, and PopStep methods, the
|
||||
// defaultReporter constructs a tree-based representation of the compared value
|
||||
// and the result of each comparison (see valueNode).
|
||||
//
|
||||
// When the String method is called, the FormatDiff method transforms the
|
||||
// valueNode tree into a textNode tree, which is a tree-based representation
|
||||
// of the textual output (see textNode).
|
||||
//
|
||||
// Lastly, the textNode.String method produces the final report as a string.
|
||||
type defaultReporter struct {
|
||||
root *valueNode
|
||||
curr *valueNode
|
||||
}
|
||||
|
||||
func (r *defaultReporter) PushStep(ps PathStep) {
|
||||
r.curr = r.curr.PushStep(ps)
|
||||
if r.root == nil {
|
||||
r.root = r.curr
|
||||
}
|
||||
}
|
||||
func (r *defaultReporter) Report(rs Result) {
|
||||
r.curr.Report(rs)
|
||||
}
|
||||
func (r *defaultReporter) PopStep() {
|
||||
r.curr = r.curr.PopStep()
|
||||
}
|
||||
|
||||
// String provides a full report of the differences detected as a structured
|
||||
// literal in pseudo-Go syntax. String may only be called after the entire tree
|
||||
// has been traversed.
|
||||
func (r *defaultReporter) String() string {
|
||||
assert(r.root != nil && r.curr == nil)
|
||||
if r.root.NumDiff == 0 {
|
||||
return ""
|
||||
}
|
||||
return formatOptions{}.FormatDiff(r.root).String()
|
||||
}
|
||||
|
||||
func assert(ok bool) {
|
||||
if !ok {
|
||||
panic("assertion failure")
|
||||
}
|
||||
}
|
@ -0,0 +1,296 @@
|
||||
// Copyright 2019, The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package cmp
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
|
||||
"github.com/google/go-cmp/cmp/internal/value"
|
||||
)
|
||||
|
||||
// TODO: Enforce limits?
|
||||
// * Enforce maximum number of records to print per node?
|
||||
// * Enforce maximum size in bytes allowed?
|
||||
// * As a heuristic, use less verbosity for equal nodes than unequal nodes.
|
||||
// TODO: Enforce unique outputs?
|
||||
// * Avoid Stringer methods if it results in same output?
|
||||
// * Print pointer address if outputs still equal?
|
||||
|
||||
// numContextRecords is the number of surrounding equal records to print.
|
||||
const numContextRecords = 2
|
||||
|
||||
type diffMode byte
|
||||
|
||||
const (
|
||||
diffUnknown diffMode = 0
|
||||
diffIdentical diffMode = ' '
|
||||
diffRemoved diffMode = '-'
|
||||
diffInserted diffMode = '+'
|
||||
)
|
||||
|
||||
type typeMode int
|
||||
|
||||
const (
|
||||
// emitType always prints the type.
|
||||
emitType typeMode = iota
|
||||
// elideType never prints the type.
|
||||
elideType
|
||||
// autoType prints the type only for composite kinds
|
||||
// (i.e., structs, slices, arrays, and maps).
|
||||
autoType
|
||||
)
|
||||
|
||||
type formatOptions struct {
|
||||
// DiffMode controls the output mode of FormatDiff.
|
||||
//
|
||||
// If diffUnknown, then produce a diff of the x and y values.
|
||||
// If diffIdentical, then emit values as if they were equal.
|
||||
// If diffRemoved, then only emit x values (ignoring y values).
|
||||
// If diffInserted, then only emit y values (ignoring x values).
|
||||
DiffMode diffMode
|
||||
|
||||
// TypeMode controls whether to print the type for the current node.
|
||||
//
|
||||
// As a general rule of thumb, we always print the type of the next node
|
||||
// after an interface, and always elide the type of the next node after
|
||||
// a slice or map node.
|
||||
TypeMode typeMode
|
||||
|
||||
// formatValueOptions are options specific to printing reflect.Values.
|
||||
formatValueOptions
|
||||
}
|
||||
|
||||
func (opts formatOptions) WithDiffMode(d diffMode) formatOptions {
|
||||
opts.DiffMode = d
|
||||
return opts
|
||||
}
|
||||
func (opts formatOptions) WithTypeMode(t typeMode) formatOptions {
|
||||
opts.TypeMode = t
|
||||
return opts
|
||||
}
|
||||
|
||||
// FormatDiff converts a valueNode tree into a textNode tree, where the later
|
||||
// is a textual representation of the differences detected in the former.
|
||||
func (opts formatOptions) FormatDiff(v *valueNode) textNode {
|
||||
// Check whether we have specialized formatting for this node.
|
||||
// This is not necessary, but helpful for producing more readable outputs.
|
||||
if opts.CanFormatDiffSlice(v) {
|
||||
return opts.FormatDiffSlice(v)
|
||||
}
|
||||
|
||||
// For leaf nodes, format the value based on the reflect.Values alone.
|
||||
if v.MaxDepth == 0 {
|
||||
switch opts.DiffMode {
|
||||
case diffUnknown, diffIdentical:
|
||||
// Format Equal.
|
||||
if v.NumDiff == 0 {
|
||||
outx := opts.FormatValue(v.ValueX, visitedPointers{})
|
||||
outy := opts.FormatValue(v.ValueY, visitedPointers{})
|
||||
if v.NumIgnored > 0 && v.NumSame == 0 {
|
||||
return textEllipsis
|
||||
} else if outx.Len() < outy.Len() {
|
||||
return outx
|
||||
} else {
|
||||
return outy
|
||||
}
|
||||
}
|
||||
|
||||
// Format unequal.
|
||||
assert(opts.DiffMode == diffUnknown)
|
||||
var list textList
|
||||
outx := opts.WithTypeMode(elideType).FormatValue(v.ValueX, visitedPointers{})
|
||||
outy := opts.WithTypeMode(elideType).FormatValue(v.ValueY, visitedPointers{})
|
||||
if outx != nil {
|
||||
list = append(list, textRecord{Diff: '-', Value: outx})
|
||||
}
|
||||
if outy != nil {
|
||||
list = append(list, textRecord{Diff: '+', Value: outy})
|
||||
}
|
||||
return opts.WithTypeMode(emitType).FormatType(v.Type, list)
|
||||
case diffRemoved:
|
||||
return opts.FormatValue(v.ValueX, visitedPointers{})
|
||||
case diffInserted:
|
||||
return opts.FormatValue(v.ValueY, visitedPointers{})
|
||||
default:
|
||||
panic("invalid diff mode")
|
||||
}
|
||||
}
|
||||
|
||||
// Descend into the child value node.
|
||||
if v.TransformerName != "" {
|
||||
out := opts.WithTypeMode(emitType).FormatDiff(v.Value)
|
||||
out = textWrap{"Inverse(" + v.TransformerName + ", ", out, ")"}
|
||||
return opts.FormatType(v.Type, out)
|
||||
} else {
|
||||
switch k := v.Type.Kind(); k {
|
||||
case reflect.Struct, reflect.Array, reflect.Slice, reflect.Map:
|
||||
return opts.FormatType(v.Type, opts.formatDiffList(v.Records, k))
|
||||
case reflect.Ptr:
|
||||
return textWrap{"&", opts.FormatDiff(v.Value), ""}
|
||||
case reflect.Interface:
|
||||
return opts.WithTypeMode(emitType).FormatDiff(v.Value)
|
||||
default:
|
||||
panic(fmt.Sprintf("%v cannot have children", k))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (opts formatOptions) formatDiffList(recs []reportRecord, k reflect.Kind) textNode {
|
||||
// Derive record name based on the data structure kind.
|
||||
var name string
|
||||
var formatKey func(reflect.Value) string
|
||||
switch k {
|
||||
case reflect.Struct:
|
||||
name = "field"
|
||||
opts = opts.WithTypeMode(autoType)
|
||||
formatKey = func(v reflect.Value) string { return v.String() }
|
||||
case reflect.Slice, reflect.Array:
|
||||
name = "element"
|
||||
opts = opts.WithTypeMode(elideType)
|
||||
formatKey = func(reflect.Value) string { return "" }
|
||||
case reflect.Map:
|
||||
name = "entry"
|
||||
opts = opts.WithTypeMode(elideType)
|
||||
formatKey = formatMapKey
|
||||
}
|
||||
|
||||
// Handle unification.
|
||||
switch opts.DiffMode {
|
||||
case diffIdentical, diffRemoved, diffInserted:
|
||||
var list textList
|
||||
var deferredEllipsis bool // Add final "..." to indicate records were dropped
|
||||
for _, r := range recs {
|
||||
// Elide struct fields that are zero value.
|
||||
if k == reflect.Struct {
|
||||
var isZero bool
|
||||
switch opts.DiffMode {
|
||||
case diffIdentical:
|
||||
isZero = value.IsZero(r.Value.ValueX) || value.IsZero(r.Value.ValueY)
|
||||
case diffRemoved:
|
||||
isZero = value.IsZero(r.Value.ValueX)
|
||||
case diffInserted:
|
||||
isZero = value.IsZero(r.Value.ValueY)
|
||||
}
|
||||
if isZero {
|
||||
continue
|
||||
}
|
||||
}
|
||||
// Elide ignored nodes.
|
||||
if r.Value.NumIgnored > 0 && r.Value.NumSame+r.Value.NumDiff == 0 {
|
||||
deferredEllipsis = !(k == reflect.Slice || k == reflect.Array)
|
||||
if !deferredEllipsis {
|
||||
list.AppendEllipsis(diffStats{})
|
||||
}
|
||||
continue
|
||||
}
|
||||
if out := opts.FormatDiff(r.Value); out != nil {
|
||||
list = append(list, textRecord{Key: formatKey(r.Key), Value: out})
|
||||
}
|
||||
}
|
||||
if deferredEllipsis {
|
||||
list.AppendEllipsis(diffStats{})
|
||||
}
|
||||
return textWrap{"{", list, "}"}
|
||||
case diffUnknown:
|
||||
default:
|
||||
panic("invalid diff mode")
|
||||
}
|
||||
|
||||
// Handle differencing.
|
||||
var list textList
|
||||
groups := coalesceAdjacentRecords(name, recs)
|
||||
for i, ds := range groups {
|
||||
// Handle equal records.
|
||||
if ds.NumDiff() == 0 {
|
||||
// Compute the number of leading and trailing records to print.
|
||||
var numLo, numHi int
|
||||
numEqual := ds.NumIgnored + ds.NumIdentical
|
||||
for numLo < numContextRecords && numLo+numHi < numEqual && i != 0 {
|
||||
if r := recs[numLo].Value; r.NumIgnored > 0 && r.NumSame+r.NumDiff == 0 {
|
||||
break
|
||||
}
|
||||
numLo++
|
||||
}
|
||||
for numHi < numContextRecords && numLo+numHi < numEqual && i != len(groups)-1 {
|
||||
if r := recs[numEqual-numHi-1].Value; r.NumIgnored > 0 && r.NumSame+r.NumDiff == 0 {
|
||||
break
|
||||
}
|
||||
numHi++
|
||||
}
|
||||
if numEqual-(numLo+numHi) == 1 && ds.NumIgnored == 0 {
|
||||
numHi++ // Avoid pointless coalescing of a single equal record
|
||||
}
|
||||
|
||||
// Format the equal values.
|
||||
for _, r := range recs[:numLo] {
|
||||
out := opts.WithDiffMode(diffIdentical).FormatDiff(r.Value)
|
||||
list = append(list, textRecord{Key: formatKey(r.Key), Value: out})
|
||||
}
|
||||
if numEqual > numLo+numHi {
|
||||
ds.NumIdentical -= numLo + numHi
|
||||
list.AppendEllipsis(ds)
|
||||
}
|
||||
for _, r := range recs[numEqual-numHi : numEqual] {
|
||||
out := opts.WithDiffMode(diffIdentical).FormatDiff(r.Value)
|
||||
list = append(list, textRecord{Key: formatKey(r.Key), Value: out})
|
||||
}
|
||||
recs = recs[numEqual:]
|
||||
continue
|
||||
}
|
||||
|
||||
// Handle unequal records.
|
||||
for _, r := range recs[:ds.NumDiff()] {
|
||||
switch {
|
||||
case opts.CanFormatDiffSlice(r.Value):
|
||||
out := opts.FormatDiffSlice(r.Value)
|
||||
list = append(list, textRecord{Key: formatKey(r.Key), Value: out})
|
||||
case r.Value.NumChildren == r.Value.MaxDepth:
|
||||
outx := opts.WithDiffMode(diffRemoved).FormatDiff(r.Value)
|
||||
outy := opts.WithDiffMode(diffInserted).FormatDiff(r.Value)
|
||||
if outx != nil {
|
||||
list = append(list, textRecord{Diff: diffRemoved, Key: formatKey(r.Key), Value: outx})
|
||||
}
|
||||
if outy != nil {
|
||||
list = append(list, textRecord{Diff: diffInserted, Key: formatKey(r.Key), Value: outy})
|
||||
}
|
||||
default:
|
||||
out := opts.FormatDiff(r.Value)
|
||||
list = append(list, textRecord{Key: formatKey(r.Key), Value: out})
|
||||
}
|
||||
}
|
||||
recs = recs[ds.NumDiff():]
|
||||
}
|
||||
assert(len(recs) == 0)
|
||||
return textWrap{"{", list, "}"}
|
||||
}
|
||||
|
||||
// coalesceAdjacentRecords coalesces the list of records into groups of
|
||||
// adjacent equal, or unequal counts.
|
||||
func coalesceAdjacentRecords(name string, recs []reportRecord) (groups []diffStats) {
|
||||
var prevCase int // Arbitrary index into which case last occurred
|
||||
lastStats := func(i int) *diffStats {
|
||||
if prevCase != i {
|
||||
groups = append(groups, diffStats{Name: name})
|
||||
prevCase = i
|
||||
}
|
||||
return &groups[len(groups)-1]
|
||||
}
|
||||
for _, r := range recs {
|
||||
switch rv := r.Value; {
|
||||
case rv.NumIgnored > 0 && rv.NumSame+rv.NumDiff == 0:
|
||||
lastStats(1).NumIgnored++
|
||||
case rv.NumDiff == 0:
|
||||
lastStats(1).NumIdentical++
|
||||
case rv.NumDiff > 0 && !rv.ValueY.IsValid():
|
||||
lastStats(2).NumRemoved++
|
||||
case rv.NumDiff > 0 && !rv.ValueX.IsValid():
|
||||
lastStats(2).NumInserted++
|
||||
default:
|
||||
lastStats(2).NumModified++
|
||||
}
|
||||
}
|
||||
return groups
|
||||
}
|
@ -0,0 +1,278 @@
|
||||
// Copyright 2019, The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package cmp
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strconv"
|
||||
"strings"
|
||||
"unicode"
|
||||
|
||||
"github.com/google/go-cmp/cmp/internal/flags"
|
||||
"github.com/google/go-cmp/cmp/internal/value"
|
||||
)
|
||||
|
||||
type formatValueOptions struct {
|
||||
// AvoidStringer controls whether to avoid calling custom stringer
|
||||
// methods like error.Error or fmt.Stringer.String.
|
||||
AvoidStringer bool
|
||||
|
||||
// ShallowPointers controls whether to avoid descending into pointers.
|
||||
// Useful when printing map keys, where pointer comparison is performed
|
||||
// on the pointer address rather than the pointed-at value.
|
||||
ShallowPointers bool
|
||||
|
||||
// PrintAddresses controls whether to print the address of all pointers,
|
||||
// slice elements, and maps.
|
||||
PrintAddresses bool
|
||||
}
|
||||
|
||||
// FormatType prints the type as if it were wrapping s.
|
||||
// This may return s as-is depending on the current type and TypeMode mode.
|
||||
func (opts formatOptions) FormatType(t reflect.Type, s textNode) textNode {
|
||||
// Check whether to emit the type or not.
|
||||
switch opts.TypeMode {
|
||||
case autoType:
|
||||
switch t.Kind() {
|
||||
case reflect.Struct, reflect.Slice, reflect.Array, reflect.Map:
|
||||
if s.Equal(textNil) {
|
||||
return s
|
||||
}
|
||||
default:
|
||||
return s
|
||||
}
|
||||
case elideType:
|
||||
return s
|
||||
}
|
||||
|
||||
// Determine the type label, applying special handling for unnamed types.
|
||||
typeName := t.String()
|
||||
if t.Name() == "" {
|
||||
// According to Go grammar, certain type literals contain symbols that
|
||||
// do not strongly bind to the next lexicographical token (e.g., *T).
|
||||
switch t.Kind() {
|
||||
case reflect.Chan, reflect.Func, reflect.Ptr:
|
||||
typeName = "(" + typeName + ")"
|
||||
}
|
||||
typeName = strings.Replace(typeName, "struct {", "struct{", -1)
|
||||
typeName = strings.Replace(typeName, "interface {", "interface{", -1)
|
||||
}
|
||||
|
||||
// Avoid wrap the value in parenthesis if unnecessary.
|
||||
if s, ok := s.(textWrap); ok {
|
||||
hasParens := strings.HasPrefix(s.Prefix, "(") && strings.HasSuffix(s.Suffix, ")")
|
||||
hasBraces := strings.HasPrefix(s.Prefix, "{") && strings.HasSuffix(s.Suffix, "}")
|
||||
if hasParens || hasBraces {
|
||||
return textWrap{typeName, s, ""}
|
||||
}
|
||||
}
|
||||
return textWrap{typeName + "(", s, ")"}
|
||||
}
|
||||
|
||||
// FormatValue prints the reflect.Value, taking extra care to avoid descending
|
||||
// into pointers already in m. As pointers are visited, m is also updated.
|
||||
func (opts formatOptions) FormatValue(v reflect.Value, m visitedPointers) (out textNode) {
|
||||
if !v.IsValid() {
|
||||
return nil
|
||||
}
|
||||
t := v.Type()
|
||||
|
||||
// Check whether there is an Error or String method to call.
|
||||
if !opts.AvoidStringer && v.CanInterface() {
|
||||
// Avoid calling Error or String methods on nil receivers since many
|
||||
// implementations crash when doing so.
|
||||
if (t.Kind() != reflect.Ptr && t.Kind() != reflect.Interface) || !v.IsNil() {
|
||||
switch v := v.Interface().(type) {
|
||||
case error:
|
||||
return textLine("e" + formatString(v.Error()))
|
||||
case fmt.Stringer:
|
||||
return textLine("s" + formatString(v.String()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check whether to explicitly wrap the result with the type.
|
||||
var skipType bool
|
||||
defer func() {
|
||||
if !skipType {
|
||||
out = opts.FormatType(t, out)
|
||||
}
|
||||
}()
|
||||
|
||||
var ptr string
|
||||
switch t.Kind() {
|
||||
case reflect.Bool:
|
||||
return textLine(fmt.Sprint(v.Bool()))
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
||||
return textLine(fmt.Sprint(v.Int()))
|
||||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
|
||||
// Unnamed uints are usually bytes or words, so use hexadecimal.
|
||||
if t.PkgPath() == "" || t.Kind() == reflect.Uintptr {
|
||||
return textLine(formatHex(v.Uint()))
|
||||
}
|
||||
return textLine(fmt.Sprint(v.Uint()))
|
||||
case reflect.Float32, reflect.Float64:
|
||||
return textLine(fmt.Sprint(v.Float()))
|
||||
case reflect.Complex64, reflect.Complex128:
|
||||
return textLine(fmt.Sprint(v.Complex()))
|
||||
case reflect.String:
|
||||
return textLine(formatString(v.String()))
|
||||
case reflect.UnsafePointer, reflect.Chan, reflect.Func:
|
||||
return textLine(formatPointer(v))
|
||||
case reflect.Struct:
|
||||
var list textList
|
||||
for i := 0; i < v.NumField(); i++ {
|
||||
vv := v.Field(i)
|
||||
if value.IsZero(vv) {
|
||||
continue // Elide fields with zero values
|
||||
}
|
||||
s := opts.WithTypeMode(autoType).FormatValue(vv, m)
|
||||
list = append(list, textRecord{Key: t.Field(i).Name, Value: s})
|
||||
}
|
||||
return textWrap{"{", list, "}"}
|
||||
case reflect.Slice:
|
||||
if v.IsNil() {
|
||||
return textNil
|
||||
}
|
||||
if opts.PrintAddresses {
|
||||
ptr = formatPointer(v)
|
||||
}
|
||||
fallthrough
|
||||
case reflect.Array:
|
||||
var list textList
|
||||
for i := 0; i < v.Len(); i++ {
|
||||
vi := v.Index(i)
|
||||
if vi.CanAddr() { // Check for cyclic elements
|
||||
p := vi.Addr()
|
||||
if m.Visit(p) {
|
||||
var out textNode
|
||||
out = textLine(formatPointer(p))
|
||||
out = opts.WithTypeMode(emitType).FormatType(p.Type(), out)
|
||||
out = textWrap{"*", out, ""}
|
||||
list = append(list, textRecord{Value: out})
|
||||
continue
|
||||
}
|
||||
}
|
||||
s := opts.WithTypeMode(elideType).FormatValue(vi, m)
|
||||
list = append(list, textRecord{Value: s})
|
||||
}
|
||||
return textWrap{ptr + "{", list, "}"}
|
||||
case reflect.Map:
|
||||
if v.IsNil() {
|
||||
return textNil
|
||||
}
|
||||
if m.Visit(v) {
|
||||
return textLine(formatPointer(v))
|
||||
}
|
||||
|
||||
var list textList
|
||||
for _, k := range value.SortKeys(v.MapKeys()) {
|
||||
sk := formatMapKey(k)
|
||||
sv := opts.WithTypeMode(elideType).FormatValue(v.MapIndex(k), m)
|
||||
list = append(list, textRecord{Key: sk, Value: sv})
|
||||
}
|
||||
if opts.PrintAddresses {
|
||||
ptr = formatPointer(v)
|
||||
}
|
||||
return textWrap{ptr + "{", list, "}"}
|
||||
case reflect.Ptr:
|
||||
if v.IsNil() {
|
||||
return textNil
|
||||
}
|
||||
if m.Visit(v) || opts.ShallowPointers {
|
||||
return textLine(formatPointer(v))
|
||||
}
|
||||
if opts.PrintAddresses {
|
||||
ptr = formatPointer(v)
|
||||
}
|
||||
skipType = true // Let the underlying value print the type instead
|
||||
return textWrap{"&" + ptr, opts.FormatValue(v.Elem(), m), ""}
|
||||
case reflect.Interface:
|
||||
if v.IsNil() {
|
||||
return textNil
|
||||
}
|
||||
// Interfaces accept different concrete types,
|
||||
// so configure the underlying value to explicitly print the type.
|
||||
skipType = true // Print the concrete type instead
|
||||
return opts.WithTypeMode(emitType).FormatValue(v.Elem(), m)
|
||||
default:
|
||||
panic(fmt.Sprintf("%v kind not handled", v.Kind()))
|
||||
}
|
||||
}
|
||||
|
||||
// formatMapKey formats v as if it were a map key.
|
||||
// The result is guaranteed to be a single line.
|
||||
func formatMapKey(v reflect.Value) string {
|
||||
var opts formatOptions
|
||||
opts.TypeMode = elideType
|
||||
opts.ShallowPointers = true
|
||||
s := opts.FormatValue(v, visitedPointers{}).String()
|
||||
return strings.TrimSpace(s)
|
||||
}
|
||||
|
||||
// formatString prints s as a double-quoted or backtick-quoted string.
|
||||
func formatString(s string) string {
|
||||
// Use quoted string if it the same length as a raw string literal.
|
||||
// Otherwise, attempt to use the raw string form.
|
||||
qs := strconv.Quote(s)
|
||||
if len(qs) == 1+len(s)+1 {
|
||||
return qs
|
||||
}
|
||||
|
||||
// Disallow newlines to ensure output is a single line.
|
||||
// Only allow printable runes for readability purposes.
|
||||
rawInvalid := func(r rune) bool {
|
||||
return r == '`' || r == '\n' || !(unicode.IsPrint(r) || r == '\t')
|
||||
}
|
||||
if strings.IndexFunc(s, rawInvalid) < 0 {
|
||||
return "`" + s + "`"
|
||||
}
|
||||
return qs
|
||||
}
|
||||
|
||||
// formatHex prints u as a hexadecimal integer in Go notation.
|
||||
func formatHex(u uint64) string {
|
||||
var f string
|
||||
switch {
|
||||
case u <= 0xff:
|
||||
f = "0x%02x"
|
||||
case u <= 0xffff:
|
||||
f = "0x%04x"
|
||||
case u <= 0xffffff:
|
||||
f = "0x%06x"
|
||||
case u <= 0xffffffff:
|
||||
f = "0x%08x"
|
||||
case u <= 0xffffffffff:
|
||||
f = "0x%010x"
|
||||
case u <= 0xffffffffffff:
|
||||
f = "0x%012x"
|
||||
case u <= 0xffffffffffffff:
|
||||
f = "0x%014x"
|
||||
case u <= 0xffffffffffffffff:
|
||||
f = "0x%016x"
|
||||
}
|
||||
return fmt.Sprintf(f, u)
|
||||
}
|
||||
|
||||
// formatPointer prints the address of the pointer.
|
||||
func formatPointer(v reflect.Value) string {
|
||||
p := v.Pointer()
|
||||
if flags.Deterministic {
|
||||
p = 0xdeadf00f // Only used for stable testing purposes
|
||||
}
|
||||
return fmt.Sprintf("⟪0x%x⟫", p)
|
||||
}
|
||||
|
||||
type visitedPointers map[value.Pointer]struct{}
|
||||
|
||||
// Visit inserts pointer v into the visited map and reports whether it had
|
||||
// already been visited before.
|
||||
func (m visitedPointers) Visit(v reflect.Value) bool {
|
||||
p := value.PointerOf(v)
|
||||
_, visited := m[p]
|
||||
m[p] = struct{}{}
|
||||
return visited
|
||||
}
|
@ -0,0 +1,333 @@
|
||||
// Copyright 2019, The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package cmp
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"strings"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/google/go-cmp/cmp/internal/diff"
|
||||
)
|
||||
|
||||
// CanFormatDiffSlice reports whether we support custom formatting for nodes
|
||||
// that are slices of primitive kinds or strings.
|
||||
func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool {
|
||||
switch {
|
||||
case opts.DiffMode != diffUnknown:
|
||||
return false // Must be formatting in diff mode
|
||||
case v.NumDiff == 0:
|
||||
return false // No differences detected
|
||||
case v.NumIgnored+v.NumCompared+v.NumTransformed > 0:
|
||||
// TODO: Handle the case where someone uses bytes.Equal on a large slice.
|
||||
return false // Some custom option was used to determined equality
|
||||
case !v.ValueX.IsValid() || !v.ValueY.IsValid():
|
||||
return false // Both values must be valid
|
||||
}
|
||||
|
||||
switch t := v.Type; t.Kind() {
|
||||
case reflect.String:
|
||||
case reflect.Array, reflect.Slice:
|
||||
// Only slices of primitive types have specialized handling.
|
||||
switch t.Elem().Kind() {
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
|
||||
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
|
||||
reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
|
||||
default:
|
||||
return false
|
||||
}
|
||||
|
||||
// If a sufficient number of elements already differ,
|
||||
// use specialized formatting even if length requirement is not met.
|
||||
if v.NumDiff > v.NumSame {
|
||||
return true
|
||||
}
|
||||
default:
|
||||
return false
|
||||
}
|
||||
|
||||
// Use specialized string diffing for longer slices or strings.
|
||||
const minLength = 64
|
||||
return v.ValueX.Len() >= minLength && v.ValueY.Len() >= minLength
|
||||
}
|
||||
|
||||
// FormatDiffSlice prints a diff for the slices (or strings) represented by v.
|
||||
// This provides custom-tailored logic to make printing of differences in
|
||||
// textual strings and slices of primitive kinds more readable.
|
||||
func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
|
||||
assert(opts.DiffMode == diffUnknown)
|
||||
t, vx, vy := v.Type, v.ValueX, v.ValueY
|
||||
|
||||
// Auto-detect the type of the data.
|
||||
var isLinedText, isText, isBinary bool
|
||||
var sx, sy string
|
||||
switch {
|
||||
case t.Kind() == reflect.String:
|
||||
sx, sy = vx.String(), vy.String()
|
||||
isText = true // Initial estimate, verify later
|
||||
case t.Kind() == reflect.Slice && t.Elem() == reflect.TypeOf(byte(0)):
|
||||
sx, sy = string(vx.Bytes()), string(vy.Bytes())
|
||||
isBinary = true // Initial estimate, verify later
|
||||
case t.Kind() == reflect.Array:
|
||||
// Arrays need to be addressable for slice operations to work.
|
||||
vx2, vy2 := reflect.New(t).Elem(), reflect.New(t).Elem()
|
||||
vx2.Set(vx)
|
||||
vy2.Set(vy)
|
||||
vx, vy = vx2, vy2
|
||||
}
|
||||
if isText || isBinary {
|
||||
var numLines, lastLineIdx, maxLineLen int
|
||||
isBinary = false
|
||||
for i, r := range sx + sy {
|
||||
if !(unicode.IsPrint(r) || unicode.IsSpace(r)) || r == utf8.RuneError {
|
||||
isBinary = true
|
||||
break
|
||||
}
|
||||
if r == '\n' {
|
||||
if maxLineLen < i-lastLineIdx {
|
||||
maxLineLen = i - lastLineIdx
|
||||
}
|
||||
lastLineIdx = i + 1
|
||||
numLines++
|
||||
}
|
||||
}
|
||||
isText = !isBinary
|
||||
isLinedText = isText && numLines >= 4 && maxLineLen <= 256
|
||||
}
|
||||
|
||||
// Format the string into printable records.
|
||||
var list textList
|
||||
var delim string
|
||||
switch {
|
||||
// If the text appears to be multi-lined text,
|
||||
// then perform differencing across individual lines.
|
||||
case isLinedText:
|
||||
ssx := strings.Split(sx, "\n")
|
||||
ssy := strings.Split(sy, "\n")
|
||||
list = opts.formatDiffSlice(
|
||||
reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line",
|
||||
func(v reflect.Value, d diffMode) textRecord {
|
||||
s := formatString(v.Index(0).String())
|
||||
return textRecord{Diff: d, Value: textLine(s)}
|
||||
},
|
||||
)
|
||||
delim = "\n"
|
||||
// If the text appears to be single-lined text,
|
||||
// then perform differencing in approximately fixed-sized chunks.
|
||||
// The output is printed as quoted strings.
|
||||
case isText:
|
||||
list = opts.formatDiffSlice(
|
||||
reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte",
|
||||
func(v reflect.Value, d diffMode) textRecord {
|
||||
s := formatString(v.String())
|
||||
return textRecord{Diff: d, Value: textLine(s)}
|
||||
},
|
||||
)
|
||||
delim = ""
|
||||
// If the text appears to be binary data,
|
||||
// then perform differencing in approximately fixed-sized chunks.
|
||||
// The output is inspired by hexdump.
|
||||
case isBinary:
|
||||
list = opts.formatDiffSlice(
|
||||
reflect.ValueOf(sx), reflect.ValueOf(sy), 16, "byte",
|
||||
func(v reflect.Value, d diffMode) textRecord {
|
||||
var ss []string
|
||||
for i := 0; i < v.Len(); i++ {
|
||||
ss = append(ss, formatHex(v.Index(i).Uint()))
|
||||
}
|
||||
s := strings.Join(ss, ", ")
|
||||
comment := commentString(fmt.Sprintf("%c|%v|", d, formatASCII(v.String())))
|
||||
return textRecord{Diff: d, Value: textLine(s), Comment: comment}
|
||||
},
|
||||
)
|
||||
// For all other slices of primitive types,
|
||||
// then perform differencing in approximately fixed-sized chunks.
|
||||
// The size of each chunk depends on the width of the element kind.
|
||||
default:
|
||||
var chunkSize int
|
||||
if t.Elem().Kind() == reflect.Bool {
|
||||
chunkSize = 16
|
||||
} else {
|
||||
switch t.Elem().Bits() {
|
||||
case 8:
|
||||
chunkSize = 16
|
||||
case 16:
|
||||
chunkSize = 12
|
||||
case 32:
|
||||
chunkSize = 8
|
||||
default:
|
||||
chunkSize = 8
|
||||
}
|
||||
}
|
||||
list = opts.formatDiffSlice(
|
||||
vx, vy, chunkSize, t.Elem().Kind().String(),
|
||||
func(v reflect.Value, d diffMode) textRecord {
|
||||
var ss []string
|
||||
for i := 0; i < v.Len(); i++ {
|
||||
switch t.Elem().Kind() {
|
||||
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
||||
ss = append(ss, fmt.Sprint(v.Index(i).Int()))
|
||||
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
|
||||
ss = append(ss, formatHex(v.Index(i).Uint()))
|
||||
case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
|
||||
ss = append(ss, fmt.Sprint(v.Index(i).Interface()))
|
||||
}
|
||||
}
|
||||
s := strings.Join(ss, ", ")
|
||||
return textRecord{Diff: d, Value: textLine(s)}
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
// Wrap the output with appropriate type information.
|
||||
var out textNode = textWrap{"{", list, "}"}
|
||||
if !isText {
|
||||
// The "{...}" byte-sequence literal is not valid Go syntax for strings.
|
||||
// Emit the type for extra clarity (e.g. "string{...}").
|
||||
if t.Kind() == reflect.String {
|
||||
opts = opts.WithTypeMode(emitType)
|
||||
}
|
||||
return opts.FormatType(t, out)
|
||||
}
|
||||
switch t.Kind() {
|
||||
case reflect.String:
|
||||
out = textWrap{"strings.Join(", out, fmt.Sprintf(", %q)", delim)}
|
||||
if t != reflect.TypeOf(string("")) {
|
||||
out = opts.FormatType(t, out)
|
||||
}
|
||||
case reflect.Slice:
|
||||
out = textWrap{"bytes.Join(", out, fmt.Sprintf(", %q)", delim)}
|
||||
if t != reflect.TypeOf([]byte(nil)) {
|
||||
out = opts.FormatType(t, out)
|
||||
}
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// formatASCII formats s as an ASCII string.
|
||||
// This is useful for printing binary strings in a semi-legible way.
|
||||
func formatASCII(s string) string {
|
||||
b := bytes.Repeat([]byte{'.'}, len(s))
|
||||
for i := 0; i < len(s); i++ {
|
||||
if ' ' <= s[i] && s[i] <= '~' {
|
||||
b[i] = s[i]
|
||||
}
|
||||
}
|
||||
return string(b)
|
||||
}
|
||||
|
||||
func (opts formatOptions) formatDiffSlice(
|
||||
vx, vy reflect.Value, chunkSize int, name string,
|
||||
makeRec func(reflect.Value, diffMode) textRecord,
|
||||
) (list textList) {
|
||||
es := diff.Difference(vx.Len(), vy.Len(), func(ix int, iy int) diff.Result {
|
||||
return diff.BoolResult(vx.Index(ix).Interface() == vy.Index(iy).Interface())
|
||||
})
|
||||
|
||||
appendChunks := func(v reflect.Value, d diffMode) int {
|
||||
n0 := v.Len()
|
||||
for v.Len() > 0 {
|
||||
n := chunkSize
|
||||
if n > v.Len() {
|
||||
n = v.Len()
|
||||
}
|
||||
list = append(list, makeRec(v.Slice(0, n), d))
|
||||
v = v.Slice(n, v.Len())
|
||||
}
|
||||
return n0 - v.Len()
|
||||
}
|
||||
|
||||
groups := coalesceAdjacentEdits(name, es)
|
||||
groups = coalesceInterveningIdentical(groups, chunkSize/4)
|
||||
for i, ds := range groups {
|
||||
// Print equal.
|
||||
if ds.NumDiff() == 0 {
|
||||
// Compute the number of leading and trailing equal bytes to print.
|
||||
var numLo, numHi int
|
||||
numEqual := ds.NumIgnored + ds.NumIdentical
|
||||
for numLo < chunkSize*numContextRecords && numLo+numHi < numEqual && i != 0 {
|
||||
numLo++
|
||||
}
|
||||
for numHi < chunkSize*numContextRecords && numLo+numHi < numEqual && i != len(groups)-1 {
|
||||
numHi++
|
||||
}
|
||||
if numEqual-(numLo+numHi) <= chunkSize && ds.NumIgnored == 0 {
|
||||
numHi = numEqual - numLo // Avoid pointless coalescing of single equal row
|
||||
}
|
||||
|
||||
// Print the equal bytes.
|
||||
appendChunks(vx.Slice(0, numLo), diffIdentical)
|
||||
if numEqual > numLo+numHi {
|
||||
ds.NumIdentical -= numLo + numHi
|
||||
list.AppendEllipsis(ds)
|
||||
}
|
||||
appendChunks(vx.Slice(numEqual-numHi, numEqual), diffIdentical)
|
||||
vx = vx.Slice(numEqual, vx.Len())
|
||||
vy = vy.Slice(numEqual, vy.Len())
|
||||
continue
|
||||
}
|
||||
|
||||
// Print unequal.
|
||||
nx := appendChunks(vx.Slice(0, ds.NumIdentical+ds.NumRemoved+ds.NumModified), diffRemoved)
|
||||
vx = vx.Slice(nx, vx.Len())
|
||||
ny := appendChunks(vy.Slice(0, ds.NumIdentical+ds.NumInserted+ds.NumModified), diffInserted)
|
||||
vy = vy.Slice(ny, vy.Len())
|
||||
}
|
||||
assert(vx.Len() == 0 && vy.Len() == 0)
|
||||
return list
|
||||
}
|
||||
|
||||
// coalesceAdjacentEdits coalesces the list of edits into groups of adjacent
|
||||
// equal or unequal counts.
|
||||
func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) {
|
||||
var prevCase int // Arbitrary index into which case last occurred
|
||||
lastStats := func(i int) *diffStats {
|
||||
if prevCase != i {
|
||||
groups = append(groups, diffStats{Name: name})
|
||||
prevCase = i
|
||||
}
|
||||
return &groups[len(groups)-1]
|
||||
}
|
||||
for _, e := range es {
|
||||
switch e {
|
||||
case diff.Identity:
|
||||
lastStats(1).NumIdentical++
|
||||
case diff.UniqueX:
|
||||
lastStats(2).NumRemoved++
|
||||
case diff.UniqueY:
|
||||
lastStats(2).NumInserted++
|
||||
case diff.Modified:
|
||||
lastStats(2).NumModified++
|
||||
}
|
||||
}
|
||||
return groups
|
||||
}
|
||||
|
||||
// coalesceInterveningIdentical coalesces sufficiently short (<= windowSize)
|
||||
// equal groups into adjacent unequal groups that currently result in a
|
||||
// dual inserted/removed printout. This acts as a high-pass filter to smooth
|
||||
// out high-frequency changes within the windowSize.
|
||||
func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats {
|
||||
groups, groupsOrig := groups[:0], groups
|
||||
for i, ds := range groupsOrig {
|
||||
if len(groups) >= 2 && ds.NumDiff() > 0 {
|
||||
prev := &groups[len(groups)-2] // Unequal group
|
||||
curr := &groups[len(groups)-1] // Equal group
|
||||
next := &groupsOrig[i] // Unequal group
|
||||
hadX, hadY := prev.NumRemoved > 0, prev.NumInserted > 0
|
||||
hasX, hasY := next.NumRemoved > 0, next.NumInserted > 0
|
||||
if ((hadX || hasX) && (hadY || hasY)) && curr.NumIdentical <= windowSize {
|
||||
*prev = prev.Append(*curr).Append(*next)
|
||||
groups = groups[:len(groups)-1] // Truncate off equal group
|
||||
continue
|
||||
}
|
||||
}
|
||||
groups = append(groups, ds)
|
||||
}
|
||||
return groups
|
||||
}
|
@ -0,0 +1,121 @@
|
||||
// Copyright 2019, The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package cmp
|
||||
|
||||
import "reflect"
|
||||
|
||||
// valueNode represents a single node within a report, which is a
|
||||
// structured representation of the value tree, containing information
|
||||
// regarding which nodes are equal or not.
|
||||
type valueNode struct {
|
||||
parent *valueNode
|
||||
|
||||
Type reflect.Type
|
||||
ValueX reflect.Value
|
||||
ValueY reflect.Value
|
||||
|
||||
// NumSame is the number of leaf nodes that are equal.
|
||||
// All descendants are equal only if NumDiff is 0.
|
||||
NumSame int
|
||||
// NumDiff is the number of leaf nodes that are not equal.
|
||||
NumDiff int
|
||||
// NumIgnored is the number of leaf nodes that are ignored.
|
||||
NumIgnored int
|
||||
// NumCompared is the number of leaf nodes that were compared
|
||||
// using an Equal method or Comparer function.
|
||||
NumCompared int
|
||||
// NumTransformed is the number of non-leaf nodes that were transformed.
|
||||
NumTransformed int
|
||||
// NumChildren is the number of transitive descendants of this node.
|
||||
// This counts from zero; thus, leaf nodes have no descendants.
|
||||
NumChildren int
|
||||
// MaxDepth is the maximum depth of the tree. This counts from zero;
|
||||
// thus, leaf nodes have a depth of zero.
|
||||
MaxDepth int
|
||||
|
||||
// Records is a list of struct fields, slice elements, or map entries.
|
||||
Records []reportRecord // If populated, implies Value is not populated
|
||||
|
||||
// Value is the result of a transformation, pointer indirect, of
|
||||
// type assertion.
|
||||
Value *valueNode // If populated, implies Records is not populated
|
||||
|
||||
// TransformerName is the name of the transformer.
|
||||
TransformerName string // If non-empty, implies Value is populated
|
||||
}
|
||||
type reportRecord struct {
|
||||
Key reflect.Value // Invalid for slice element
|
||||
Value *valueNode
|
||||
}
|
||||
|
||||
func (parent *valueNode) PushStep(ps PathStep) (child *valueNode) {
|
||||
vx, vy := ps.Values()
|
||||
child = &valueNode{parent: parent, Type: ps.Type(), ValueX: vx, ValueY: vy}
|
||||
switch s := ps.(type) {
|
||||
case StructField:
|
||||
assert(parent.Value == nil)
|
||||
parent.Records = append(parent.Records, reportRecord{Key: reflect.ValueOf(s.Name()), Value: child})
|
||||
case SliceIndex:
|
||||
assert(parent.Value == nil)
|
||||
parent.Records = append(parent.Records, reportRecord{Value: child})
|
||||
case MapIndex:
|
||||
assert(parent.Value == nil)
|
||||
parent.Records = append(parent.Records, reportRecord{Key: s.Key(), Value: child})
|
||||
case Indirect:
|
||||
assert(parent.Value == nil && parent.Records == nil)
|
||||
parent.Value = child
|
||||
case TypeAssertion:
|
||||
assert(parent.Value == nil && parent.Records == nil)
|
||||
parent.Value = child
|
||||
case Transform:
|
||||
assert(parent.Value == nil && parent.Records == nil)
|
||||
parent.Value = child
|
||||
parent.TransformerName = s.Name()
|
||||
parent.NumTransformed++
|
||||
default:
|
||||
assert(parent == nil) // Must be the root step
|
||||
}
|
||||
return child
|
||||
}
|
||||
|
||||
func (r *valueNode) Report(rs Result) {
|
||||
assert(r.MaxDepth == 0) // May only be called on leaf nodes
|
||||
|
||||
if rs.ByIgnore() {
|
||||
r.NumIgnored++
|
||||
} else {
|
||||
if rs.Equal() {
|
||||
r.NumSame++
|
||||
} else {
|
||||
r.NumDiff++
|
||||
}
|
||||
}
|
||||
assert(r.NumSame+r.NumDiff+r.NumIgnored == 1)
|
||||
|
||||
if rs.ByMethod() {
|
||||
r.NumCompared++
|
||||
}
|
||||
if rs.ByFunc() {
|
||||
r.NumCompared++
|
||||
}
|
||||
assert(r.NumCompared <= 1)
|
||||
}
|
||||
|
||||
func (child *valueNode) PopStep() (parent *valueNode) {
|
||||
if child.parent == nil {
|
||||
return nil
|
||||
}
|
||||
parent = child.parent
|
||||
parent.NumSame += child.NumSame
|
||||
parent.NumDiff += child.NumDiff
|
||||
parent.NumIgnored += child.NumIgnored
|
||||
parent.NumCompared += child.NumCompared
|
||||
parent.NumTransformed += child.NumTransformed
|
||||
parent.NumChildren += child.NumChildren + 1
|
||||
if parent.MaxDepth < child.MaxDepth+1 {
|
||||
parent.MaxDepth = child.MaxDepth + 1
|
||||
}
|
||||
return parent
|
||||
}
|
@ -1,9 +0,0 @@
|
||||
y.output
|
||||
|
||||
# ignore intellij files
|
||||
.idea
|
||||
*.iml
|
||||
*.ipr
|
||||
*.iws
|
||||
|
||||
*.test
|
@ -1,13 +0,0 @@
|
||||
sudo: false
|
||||
|
||||
language: go
|
||||
|
||||
go:
|
||||
- 1.x
|
||||
- tip
|
||||
|
||||
branches:
|
||||
only:
|
||||
- master
|
||||
|
||||
script: make test
|
@ -1,18 +0,0 @@
|
||||
TEST?=./...
|
||||
|
||||
default: test
|
||||
|
||||
fmt: generate
|
||||
go fmt ./...
|
||||
|
||||
test: generate
|
||||
go get -t ./...
|
||||
go test $(TEST) $(TESTARGS)
|
||||
|
||||
generate:
|
||||
go generate ./...
|
||||
|
||||
updatedeps:
|
||||
go get -u golang.org/x/tools/cmd/stringer
|
||||
|
||||
.PHONY: default generate test updatedeps
|
@ -1,125 +0,0 @@
|
||||
# HCL
|
||||
|
||||
[](https://godoc.org/github.com/hashicorp/hcl) [](https://travis-ci.org/hashicorp/hcl)
|
||||
|
||||
HCL (HashiCorp Configuration Language) is a configuration language built
|
||||
by HashiCorp. The goal of HCL is to build a structured configuration language
|
||||
that is both human and machine friendly for use with command-line tools, but
|
||||
specifically targeted towards DevOps tools, servers, etc.
|
||||
|
||||
HCL is also fully JSON compatible. That is, JSON can be used as completely
|
||||
valid input to a system expecting HCL. This helps makes systems
|
||||
interoperable with other systems.
|
||||
|
||||
HCL is heavily inspired by
|
||||
[libucl](https://github.com/vstakhov/libucl),
|
||||
nginx configuration, and others similar.
|
||||
|
||||
## Why?
|
||||
|
||||
A common question when viewing HCL is to ask the question: why not
|
||||
JSON, YAML, etc.?
|
||||
|
||||
Prior to HCL, the tools we built at [HashiCorp](http://www.hashicorp.com)
|
||||
used a variety of configuration languages from full programming languages
|
||||
such as Ruby to complete data structure languages such as JSON. What we
|
||||
learned is that some people wanted human-friendly configuration languages
|
||||
and some people wanted machine-friendly languages.
|
||||
|
||||
JSON fits a nice balance in this, but is fairly verbose and most
|
||||
importantly doesn't support comments. With YAML, we found that beginners
|
||||
had a really hard time determining what the actual structure was, and
|
||||
ended up guessing more often than not whether to use a hyphen, colon, etc.
|
||||
in order to represent some configuration key.
|
||||
|
||||
Full programming languages such as Ruby enable complex behavior
|
||||
a configuration language shouldn't usually allow, and also forces
|
||||
people to learn some set of Ruby.
|
||||
|
||||
Because of this, we decided to create our own configuration language
|
||||
that is JSON-compatible. Our configuration language (HCL) is designed
|
||||
to be written and modified by humans. The API for HCL allows JSON
|
||||
as an input so that it is also machine-friendly (machines can generate
|
||||
JSON instead of trying to generate HCL).
|
||||
|
||||
Our goal with HCL is not to alienate other configuration languages.
|
||||
It is instead to provide HCL as a specialized language for our tools,
|
||||
and JSON as the interoperability layer.
|
||||
|
||||
## Syntax
|
||||
|
||||
For a complete grammar, please see the parser itself. A high-level overview
|
||||
of the syntax and grammar is listed here.
|
||||
|
||||
* Single line comments start with `#` or `//`
|
||||
|
||||
* Multi-line comments are wrapped in `/*` and `*/`. Nested block comments
|
||||
are not allowed. A multi-line comment (also known as a block comment)
|
||||
terminates at the first `*/` found.
|
||||
|
||||
* Values are assigned with the syntax `key = value` (whitespace doesn't
|
||||
matter). The value can be any primitive: a string, number, boolean,
|
||||
object, or list.
|
||||
|
||||
* Strings are double-quoted and can contain any UTF-8 characters.
|
||||
Example: `"Hello, World"`
|
||||
|
||||
* Multi-line strings start with `<<EOF` at the end of a line, and end
|
||||
with `EOF` on its own line ([here documents](https://en.wikipedia.org/wiki/Here_document)).
|
||||
Any text may be used in place of `EOF`. Example:
|
||||
```
|
||||
<<FOO
|
||||
hello
|
||||
world
|
||||
FOO
|
||||
```
|
||||
|
||||
* Numbers are assumed to be base 10. If you prefix a number with 0x,
|
||||
it is treated as a hexadecimal. If it is prefixed with 0, it is
|
||||
treated as an octal. Numbers can be in scientific notation: "1e10".
|
||||
|
||||
* Boolean values: `true`, `false`
|
||||
|
||||
* Arrays can be made by wrapping it in `[]`. Example:
|
||||
`["foo", "bar", 42]`. Arrays can contain primitives,
|
||||
other arrays, and objects. As an alternative, lists
|
||||
of objects can be created with repeated blocks, using
|
||||
this structure:
|
||||
|
||||
```hcl
|
||||
service {
|
||||
key = "value"
|
||||
}
|
||||
|
||||
service {
|
||||
key = "value"
|
||||
}
|
||||
```
|
||||
|
||||
Objects and nested objects are created using the structure shown below:
|
||||
|
||||
```
|
||||
variable "ami" {
|
||||
description = "the AMI to use"
|
||||
}
|
||||
```
|
||||
This would be equivalent to the following json:
|
||||
``` json
|
||||
{
|
||||
"variable": {
|
||||
"ami": {
|
||||
"description": "the AMI to use"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Thanks
|
||||
|
||||
Thanks to:
|
||||
|
||||
* [@vstakhov](https://github.com/vstakhov) - The original libucl parser
|
||||
and syntax that HCL was based off of.
|
||||
|
||||
* [@fatih](https://github.com/fatih) - The rewritten HCL parser
|
||||
in pure Go (no goyacc) and support for a printer.
|
@ -1,19 +0,0 @@
|
||||
version: "build-{branch}-{build}"
|
||||
image: Visual Studio 2015
|
||||
clone_folder: c:\gopath\src\github.com\hashicorp\hcl
|
||||
environment:
|
||||
GOPATH: c:\gopath
|
||||
init:
|
||||
- git config --global core.autocrlf false
|
||||
install:
|
||||
- cmd: >-
|
||||
echo %Path%
|
||||
|
||||
go version
|
||||
|
||||
go env
|
||||
|
||||
go get -t ./...
|
||||
|
||||
build_script:
|
||||
- cmd: go test -v ./...
|
@ -1,729 +0,0 @@
|
||||
package hcl
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/hashicorp/hcl/hcl/ast"
|
||||
"github.com/hashicorp/hcl/hcl/parser"
|
||||
"github.com/hashicorp/hcl/hcl/token"
|
||||
)
|
||||
|
||||
// This is the tag to use with structures to have settings for HCL
|
||||
const tagName = "hcl"
|
||||
|
||||
var (
|
||||
// nodeType holds a reference to the type of ast.Node
|
||||
nodeType reflect.Type = findNodeType()
|
||||
)
|
||||
|
||||
// Unmarshal accepts a byte slice as input and writes the
|
||||
// data to the value pointed to by v.
|
||||
func Unmarshal(bs []byte, v interface{}) error {
|
||||
root, err := parse(bs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return DecodeObject(v, root)
|
||||
}
|
||||
|
||||
// Decode reads the given input and decodes it into the structure
|
||||
// given by `out`.
|
||||
func Decode(out interface{}, in string) error {
|
||||
obj, err := Parse(in)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return DecodeObject(out, obj)
|
||||
}
|
||||
|
||||
// DecodeObject is a lower-level version of Decode. It decodes a
|
||||
// raw Object into the given output.
|
||||
func DecodeObject(out interface{}, n ast.Node) error {
|
||||
val := reflect.ValueOf(out)
|
||||
if val.Kind() != reflect.Ptr {
|
||||
return errors.New("result must be a pointer")
|
||||
}
|
||||
|
||||
// If we have the file, we really decode the root node
|
||||
if f, ok := n.(*ast.File); ok {
|
||||
n = f.Node
|
||||
}
|
||||
|
||||
var d decoder
|
||||
return d.decode("root", n, val.Elem())
|
||||
}
|
||||
|
||||
type decoder struct {
|
||||
stack []reflect.Kind
|
||||
}
|
||||
|
||||
func (d *decoder) decode(name string, node ast.Node, result reflect.Value) error {
|
||||
k := result
|
||||
|
||||
// If we have an interface with a valid value, we use that
|
||||
// for the check.
|
||||
if result.Kind() == reflect.Interface {
|
||||
elem := result.Elem()
|
||||
if elem.IsValid() {
|
||||
k = elem
|
||||
}
|
||||
}
|
||||
|
||||
// Push current onto stack unless it is an interface.
|
||||
if k.Kind() != reflect.Interface {
|
||||
d.stack = append(d.stack, k.Kind())
|
||||
|
||||
// Schedule a pop
|
||||
defer func() {
|
||||
d.stack = d.stack[:len(d.stack)-1]
|
||||
}()
|
||||
}
|
||||
|
||||
switch k.Kind() {
|
||||
case reflect.Bool:
|
||||
return d.decodeBool(name, node, result)
|
||||
case reflect.Float32, reflect.Float64:
|
||||
return d.decodeFloat(name, node, result)
|
||||
case reflect.Int, reflect.Int32, reflect.Int64:
|
||||
return d.decodeInt(name, node, result)
|
||||
case reflect.Interface:
|
||||
// When we see an interface, we make our own thing
|
||||
return d.decodeInterface(name, node, result)
|
||||
case reflect.Map:
|
||||
return d.decodeMap(name, node, result)
|
||||
case reflect.Ptr:
|
||||
return d.decodePtr(name, node, result)
|
||||
case reflect.Slice:
|
||||
return d.decodeSlice(name, node, result)
|
||||
case reflect.String:
|
||||
return d.decodeString(name, node, result)
|
||||
case reflect.Struct:
|
||||
return d.decodeStruct(name, node, result)
|
||||
default:
|
||||
return &parser.PosError{
|
||||
Pos: node.Pos(),
|
||||
Err: fmt.Errorf("%s: unknown kind to decode into: %s", name, k.Kind()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *decoder) decodeBool(name string, node ast.Node, result reflect.Value) error {
|
||||
switch n := node.(type) {
|
||||
case *ast.LiteralType:
|
||||
if n.Token.Type == token.BOOL {
|
||||
v, err := strconv.ParseBool(n.Token.Text)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
result.Set(reflect.ValueOf(v))
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return &parser.PosError{
|
||||
Pos: node.Pos(),
|
||||
Err: fmt.Errorf("%s: unknown type %T", name, node),
|
||||
}
|
||||
}
|
||||
|
||||
func (d *decoder) decodeFloat(name string, node ast.Node, result reflect.Value) error {
|
||||
switch n := node.(type) {
|
||||
case *ast.LiteralType:
|
||||
if n.Token.Type == token.FLOAT || n.Token.Type == token.NUMBER {
|
||||
v, err := strconv.ParseFloat(n.Token.Text, 64)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
result.Set(reflect.ValueOf(v).Convert(result.Type()))
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return &parser.PosError{
|
||||
Pos: node.Pos(),
|
||||
Err: fmt.Errorf("%s: unknown type %T", name, node),
|
||||
}
|
||||
}
|
||||
|
||||
func (d *decoder) decodeInt(name string, node ast.Node, result reflect.Value) error {
|
||||
switch n := node.(type) {
|
||||
case *ast.LiteralType:
|
||||
switch n.Token.Type {
|
||||
case token.NUMBER:
|
||||
v, err := strconv.ParseInt(n.Token.Text, 0, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if result.Kind() == reflect.Interface {
|
||||
result.Set(reflect.ValueOf(int(v)))
|
||||
} else {
|
||||
result.SetInt(v)
|
||||
}
|
||||
return nil
|
||||
case token.STRING:
|
||||
v, err := strconv.ParseInt(n.Token.Value().(string), 0, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if result.Kind() == reflect.Interface {
|
||||
result.Set(reflect.ValueOf(int(v)))
|
||||
} else {
|
||||
result.SetInt(v)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return &parser.PosError{
|
||||
Pos: node.Pos(),
|
||||
Err: fmt.Errorf("%s: unknown type %T", name, node),
|
||||
}
|
||||
}
|
||||
|
||||
func (d *decoder) decodeInterface(name string, node ast.Node, result reflect.Value) error {
|
||||
// When we see an ast.Node, we retain the value to enable deferred decoding.
|
||||
// Very useful in situations where we want to preserve ast.Node information
|
||||
// like Pos
|
||||
if result.Type() == nodeType && result.CanSet() {
|
||||
result.Set(reflect.ValueOf(node))
|
||||
return nil
|
||||
}
|
||||
|
||||
var set reflect.Value
|
||||
redecode := true
|
||||
|
||||
// For testing types, ObjectType should just be treated as a list. We
|
||||
// set this to a temporary var because we want to pass in the real node.
|
||||
testNode := node
|
||||
if ot, ok := node.(*ast.ObjectType); ok {
|
||||
testNode = ot.List
|
||||
}
|
||||
|
||||
switch n := testNode.(type) {
|
||||
case *ast.ObjectList:
|
||||
// If we're at the root or we're directly within a slice, then we
|
||||
// decode objects into map[string]interface{}, otherwise we decode
|
||||
// them into lists.
|
||||
if len(d.stack) == 0 || d.stack[len(d.stack)-1] == reflect.Slice {
|
||||
var temp map[string]interface{}
|
||||
tempVal := reflect.ValueOf(temp)
|
||||
result := reflect.MakeMap(
|
||||
reflect.MapOf(
|
||||
reflect.TypeOf(""),
|
||||
tempVal.Type().Elem()))
|
||||
|
||||
set = result
|
||||
} else {
|
||||
var temp []map[string]interface{}
|
||||
tempVal := reflect.ValueOf(temp)
|
||||
result := reflect.MakeSlice(
|
||||
reflect.SliceOf(tempVal.Type().Elem()), 0, len(n.Items))
|
||||
set = result
|
||||
}
|
||||
case *ast.ObjectType:
|
||||
// If we're at the root or we're directly within a slice, then we
|
||||
// decode objects into map[string]interface{}, otherwise we decode
|
||||
// them into lists.
|
||||
if len(d.stack) == 0 || d.stack[len(d.stack)-1] == reflect.Slice {
|
||||
var temp map[string]interface{}
|
||||
tempVal := reflect.ValueOf(temp)
|
||||
result := reflect.MakeMap(
|
||||
reflect.MapOf(
|
||||
reflect.TypeOf(""),
|
||||
tempVal.Type().Elem()))
|
||||
|
||||
set = result
|
||||
} else {
|
||||
var temp []map[string]interface{}
|
||||
tempVal := reflect.ValueOf(temp)
|
||||
result := reflect.MakeSlice(
|
||||
reflect.SliceOf(tempVal.Type().Elem()), 0, 1)
|
||||
set = result
|
||||
}
|
||||
case *ast.ListType:
|
||||
var temp []interface{}
|
||||
tempVal := reflect.ValueOf(temp)
|
||||
result := reflect.MakeSlice(
|
||||
reflect.SliceOf(tempVal.Type().Elem()), 0, 0)
|
||||
set = result
|
||||
case *ast.LiteralType:
|
||||
switch n.Token.Type {
|
||||
case token.BOOL:
|
||||
var result bool
|
||||
set = reflect.Indirect(reflect.New(reflect.TypeOf(result)))
|
||||
case token.FLOAT:
|
||||
var result float64
|
||||
set = reflect.Indirect(reflect.New(reflect.TypeOf(result)))
|
||||
case token.NUMBER:
|
||||
var result int
|
||||
set = reflect.Indirect(reflect.New(reflect.TypeOf(result)))
|
||||
case token.STRING, token.HEREDOC:
|
||||
set = reflect.Indirect(reflect.New(reflect.TypeOf("")))
|
||||
default:
|
||||
return &parser.PosError{
|
||||
Pos: node.Pos(),
|
||||
Err: fmt.Errorf("%s: cannot decode into interface: %T", name, node),
|
||||
}
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf(
|
||||
"%s: cannot decode into interface: %T",
|
||||
name, node)
|
||||
}
|
||||
|
||||
// Set the result to what its supposed to be, then reset
|
||||
// result so we don't reflect into this method anymore.
|
||||
result.Set(set)
|
||||
|
||||
if redecode {
|
||||
// Revisit the node so that we can use the newly instantiated
|
||||
// thing and populate it.
|
||||
if err := d.decode(name, node, result); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *decoder) decodeMap(name string, node ast.Node, result reflect.Value) error {
|
||||
if item, ok := node.(*ast.ObjectItem); ok {
|
||||
node = &ast.ObjectList{Items: []*ast.ObjectItem{item}}
|
||||
}
|
||||
|
||||
if ot, ok := node.(*ast.ObjectType); ok {
|
||||
node = ot.List
|
||||
}
|
||||
|
||||
n, ok := node.(*ast.ObjectList)
|
||||
if !ok {
|
||||
return &parser.PosError{
|
||||
Pos: node.Pos(),
|
||||
Err: fmt.Errorf("%s: not an object type for map (%T)", name, node),
|
||||
}
|
||||
}
|
||||
|
||||
// If we have an interface, then we can address the interface,
|
||||
// but not the slice itself, so get the element but set the interface
|
||||
set := result
|
||||
if result.Kind() == reflect.Interface {
|
||||
result = result.Elem()
|
||||
}
|
||||
|
||||
resultType := result.Type()
|
||||
resultElemType := resultType.Elem()
|
||||
resultKeyType := resultType.Key()
|
||||
if resultKeyType.Kind() != reflect.String {
|
||||
return &parser.PosError{
|
||||
Pos: node.Pos(),
|
||||
Err: fmt.Errorf("%s: map must have string keys", name),
|
||||
}
|
||||
}
|
||||
|
||||
// Make a map if it is nil
|
||||
resultMap := result
|
||||
if result.IsNil() {
|
||||
resultMap = reflect.MakeMap(
|
||||
reflect.MapOf(resultKeyType, resultElemType))
|
||||
}
|
||||
|
||||
// Go through each element and decode it.
|
||||
done := make(map[string]struct{})
|
||||
for _, item := range n.Items {
|
||||
if item.Val == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// github.com/hashicorp/terraform/issue/5740
|
||||
if len(item.Keys) == 0 {
|
||||
return &parser.PosError{
|
||||
Pos: node.Pos(),
|
||||
Err: fmt.Errorf("%s: map must have string keys", name),
|
||||
}
|
||||
}
|
||||
|
||||
// Get the key we're dealing with, which is the first item
|
||||
keyStr := item.Keys[0].Token.Value().(string)
|
||||
|
||||
// If we've already processed this key, then ignore it
|
||||
if _, ok := done[keyStr]; ok {
|
||||
continue
|
||||
}
|
||||
|
||||
// Determine the value. If we have more than one key, then we
|
||||
// get the objectlist of only these keys.
|
||||
itemVal := item.Val
|
||||
if len(item.Keys) > 1 {
|
||||
itemVal = n.Filter(keyStr)
|
||||
done[keyStr] = struct{}{}
|
||||
}
|
||||
|
||||
// Make the field name
|
||||
fieldName := fmt.Sprintf("%s.%s", name, keyStr)
|
||||
|
||||
// Get the key/value as reflection values
|
||||
key := reflect.ValueOf(keyStr)
|
||||
val := reflect.Indirect(reflect.New(resultElemType))
|
||||
|
||||
// If we have a pre-existing value in the map, use that
|
||||
oldVal := resultMap.MapIndex(key)
|
||||
if oldVal.IsValid() {
|
||||
val.Set(oldVal)
|
||||
}
|
||||
|
||||
// Decode!
|
||||
if err := d.decode(fieldName, itemVal, val); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Set the value on the map
|
||||
resultMap.SetMapIndex(key, val)
|
||||
}
|
||||
|
||||
// Set the final map if we can
|
||||
set.Set(resultMap)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *decoder) decodePtr(name string, node ast.Node, result reflect.Value) error {
|
||||
// Create an element of the concrete (non pointer) type and decode
|
||||
// into that. Then set the value of the pointer to this type.
|
||||
resultType := result.Type()
|
||||
resultElemType := resultType.Elem()
|
||||
val := reflect.New(resultElemType)
|
||||
if err := d.decode(name, node, reflect.Indirect(val)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
result.Set(val)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *decoder) decodeSlice(name string, node ast.Node, result reflect.Value) error {
|
||||
// If we have an interface, then we can address the interface,
|
||||
// but not the slice itself, so get the element but set the interface
|
||||
set := result
|
||||
if result.Kind() == reflect.Interface {
|
||||
result = result.Elem()
|
||||
}
|
||||
// Create the slice if it isn't nil
|
||||
resultType := result.Type()
|
||||
resultElemType := resultType.Elem()
|
||||
if result.IsNil() {
|
||||
resultSliceType := reflect.SliceOf(resultElemType)
|
||||
result = reflect.MakeSlice(
|
||||
resultSliceType, 0, 0)
|
||||
}
|
||||
|
||||
// Figure out the items we'll be copying into the slice
|
||||
var items []ast.Node
|
||||
switch n := node.(type) {
|
||||
case *ast.ObjectList:
|
||||
items = make([]ast.Node, len(n.Items))
|
||||
for i, item := range n.Items {
|
||||
items[i] = item
|
||||
}
|
||||
case *ast.ObjectType:
|
||||
items = []ast.Node{n}
|
||||
case *ast.ListType:
|
||||
items = n.List
|
||||
default:
|
||||
return &parser.PosError{
|
||||
Pos: node.Pos(),
|
||||
Err: fmt.Errorf("unknown slice type: %T", node),
|
||||
}
|
||||
}
|
||||
|
||||
for i, item := range items {
|
||||
fieldName := fmt.Sprintf("%s[%d]", name, i)
|
||||
|
||||
// Decode
|
||||
val := reflect.Indirect(reflect.New(resultElemType))
|
||||
|
||||
// if item is an object that was decoded from ambiguous JSON and
|
||||
// flattened, make sure it's expanded if it needs to decode into a
|
||||
// defined structure.
|
||||
item := expandObject(item, val)
|
||||
|
||||
if err := d.decode(fieldName, item, val); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Append it onto the slice
|
||||
result = reflect.Append(result, val)
|
||||
}
|
||||
|
||||
set.Set(result)
|
||||
return nil
|
||||
}
|
||||
|
||||
// expandObject detects if an ambiguous JSON object was flattened to a List which
|
||||
// should be decoded into a struct, and expands the ast to properly deocode.
|
||||
func expandObject(node ast.Node, result reflect.Value) ast.Node {
|
||||
item, ok := node.(*ast.ObjectItem)
|
||||
if !ok {
|
||||
return node
|
||||
}
|
||||
|
||||
elemType := result.Type()
|
||||
|
||||
// our target type must be a struct
|
||||
switch elemType.Kind() {
|
||||
case reflect.Ptr:
|
||||
switch elemType.Elem().Kind() {
|
||||
case reflect.Struct:
|
||||
//OK
|
||||
default:
|
||||
return node
|
||||
}
|
||||
case reflect.Struct:
|
||||
//OK
|
||||
default:
|
||||
return node
|
||||
}
|
||||
|
||||
// A list value will have a key and field name. If it had more fields,
|
||||
// it wouldn't have been flattened.
|
||||
if len(item.Keys) != 2 {
|
||||
return node
|
||||
}
|
||||
|
||||
keyToken := item.Keys[0].Token
|
||||
item.Keys = item.Keys[1:]
|
||||
|
||||
// we need to un-flatten the ast enough to decode
|
||||
newNode := &ast.ObjectItem{
|
||||
Keys: []*ast.ObjectKey{
|
||||
&ast.ObjectKey{
|
||||
Token: keyToken,
|
||||
},
|
||||
},
|
||||
Val: &ast.ObjectType{
|
||||
List: &ast.ObjectList{
|
||||
Items: []*ast.ObjectItem{item},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return newNode
|
||||
}
|
||||
|
||||
func (d *decoder) decodeString(name string, node ast.Node, result reflect.Value) error {
|
||||
switch n := node.(type) {
|
||||
case *ast.LiteralType:
|
||||
switch n.Token.Type {
|
||||
case token.NUMBER:
|
||||
result.Set(reflect.ValueOf(n.Token.Text).Convert(result.Type()))
|
||||
return nil
|
||||
case token.STRING, token.HEREDOC:
|
||||
result.Set(reflect.ValueOf(n.Token.Value()).Convert(result.Type()))
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
return &parser.PosError{
|
||||
Pos: node.Pos(),
|
||||
Err: fmt.Errorf("%s: unknown type for string %T", name, node),
|
||||
}
|
||||
}
|
||||
|
||||
func (d *decoder) decodeStruct(name string, node ast.Node, result reflect.Value) error {
|
||||
var item *ast.ObjectItem
|
||||
if it, ok := node.(*ast.ObjectItem); ok {
|
||||
item = it
|
||||
node = it.Val
|
||||
}
|
||||
|
||||
if ot, ok := node.(*ast.ObjectType); ok {
|
||||
node = ot.List
|
||||
}
|
||||
|
||||
// Handle the special case where the object itself is a literal. Previously
|
||||
// the yacc parser would always ensure top-level elements were arrays. The new
|
||||
// parser does not make the same guarantees, thus we need to convert any
|
||||
// top-level literal elements into a list.
|
||||
if _, ok := node.(*ast.LiteralType); ok && item != nil {
|
||||
node = &ast.ObjectList{Items: []*ast.ObjectItem{item}}
|
||||
}
|
||||
|
||||
list, ok := node.(*ast.ObjectList)
|
||||
if !ok {
|
||||
return &parser.PosError{
|
||||
Pos: node.Pos(),
|
||||
Err: fmt.Errorf("%s: not an object type for struct (%T)", name, node),
|
||||
}
|
||||
}
|
||||
|
||||
// This slice will keep track of all the structs we'll be decoding.
|
||||
// There can be more than one struct if there are embedded structs
|
||||
// that are squashed.
|
||||
structs := make([]reflect.Value, 1, 5)
|
||||
structs[0] = result
|
||||
|
||||
// Compile the list of all the fields that we're going to be decoding
|
||||
// from all the structs.
|
||||
type field struct {
|
||||
field reflect.StructField
|
||||
val reflect.Value
|
||||
}
|
||||
fields := []field{}
|
||||
for len(structs) > 0 {
|
||||
structVal := structs[0]
|
||||
structs = structs[1:]
|
||||
|
||||
structType := structVal.Type()
|
||||
for i := 0; i < structType.NumField(); i++ {
|
||||
fieldType := structType.Field(i)
|
||||
tagParts := strings.Split(fieldType.Tag.Get(tagName), ",")
|
||||
|
||||
// Ignore fields with tag name "-"
|
||||
if tagParts[0] == "-" {
|
||||
continue
|
||||
}
|
||||
|
||||
if fieldType.Anonymous {
|
||||
fieldKind := fieldType.Type.Kind()
|
||||
if fieldKind != reflect.Struct {
|
||||
return &parser.PosError{
|
||||
Pos: node.Pos(),
|
||||
Err: fmt.Errorf("%s: unsupported type to struct: %s",
|
||||
fieldType.Name, fieldKind),
|
||||
}
|
||||
}
|
||||
|
||||
// We have an embedded field. We "squash" the fields down
|
||||
// if specified in the tag.
|
||||
squash := false
|
||||
for _, tag := range tagParts[1:] {
|
||||
if tag == "squash" {
|
||||
squash = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if squash {
|
||||
structs = append(
|
||||
structs, result.FieldByName(fieldType.Name))
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Normal struct field, store it away
|
||||
fields = append(fields, field{fieldType, structVal.Field(i)})
|
||||
}
|
||||
}
|
||||
|
||||
usedKeys := make(map[string]struct{})
|
||||
decodedFields := make([]string, 0, len(fields))
|
||||
decodedFieldsVal := make([]reflect.Value, 0)
|
||||
unusedKeysVal := make([]reflect.Value, 0)
|
||||
for _, f := range fields {
|
||||
field, fieldValue := f.field, f.val
|
||||
if !fieldValue.IsValid() {
|
||||
// This should never happen
|
||||
panic("field is not valid")
|
||||
}
|
||||
|
||||
// If we can't set the field, then it is unexported or something,
|
||||
// and we just continue onwards.
|
||||
if !fieldValue.CanSet() {
|
||||
continue
|
||||
}
|
||||
|
||||
fieldName := field.Name
|
||||
|
||||
tagValue := field.Tag.Get(tagName)
|
||||
tagParts := strings.SplitN(tagValue, ",", 2)
|
||||
if len(tagParts) >= 2 {
|
||||
switch tagParts[1] {
|
||||
case "decodedFields":
|
||||
decodedFieldsVal = append(decodedFieldsVal, fieldValue)
|
||||
continue
|
||||
case "key":
|
||||
if item == nil {
|
||||
return &parser.PosError{
|
||||
Pos: node.Pos(),
|
||||
Err: fmt.Errorf("%s: %s asked for 'key', impossible",
|
||||
name, fieldName),
|
||||
}
|
||||
}
|
||||
|
||||
fieldValue.SetString(item.Keys[0].Token.Value().(string))
|
||||
continue
|
||||
case "unusedKeys":
|
||||
unusedKeysVal = append(unusedKeysVal, fieldValue)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if tagParts[0] != "" {
|
||||
fieldName = tagParts[0]
|
||||
}
|
||||
|
||||
// Determine the element we'll use to decode. If it is a single
|
||||
// match (only object with the field), then we decode it exactly.
|
||||
// If it is a prefix match, then we decode the matches.
|
||||
filter := list.Filter(fieldName)
|
||||
|
||||
prefixMatches := filter.Children()
|
||||
matches := filter.Elem()
|
||||
if len(matches.Items) == 0 && len(prefixMatches.Items) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
// Track the used key
|
||||
usedKeys[fieldName] = struct{}{}
|
||||
|
||||
// Create the field name and decode. We range over the elements
|
||||
// because we actually want the value.
|
||||
fieldName = fmt.Sprintf("%s.%s", name, fieldName)
|
||||
if len(prefixMatches.Items) > 0 {
|
||||
if err := d.decode(fieldName, prefixMatches, fieldValue); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
for _, match := range matches.Items {
|
||||
var decodeNode ast.Node = match.Val
|
||||
if ot, ok := decodeNode.(*ast.ObjectType); ok {
|
||||
decodeNode = &ast.ObjectList{Items: ot.List.Items}
|
||||
}
|
||||
|
||||
if err := d.decode(fieldName, decodeNode, fieldValue); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
decodedFields = append(decodedFields, field.Name)
|
||||
}
|
||||
|
||||
if len(decodedFieldsVal) > 0 {
|
||||
// Sort it so that it is deterministic
|
||||
sort.Strings(decodedFields)
|
||||
|
||||
for _, v := range decodedFieldsVal {
|
||||
v.Set(reflect.ValueOf(decodedFields))
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// findNodeType returns the type of ast.Node
|
||||
func findNodeType() reflect.Type {
|
||||
var nodeContainer struct {
|
||||
Node ast.Node
|
||||
}
|
||||
value := reflect.ValueOf(nodeContainer).FieldByName("Node")
|
||||
return value.Type()
|
||||
}
|
@ -1,3 +0,0 @@
|
||||
module github.com/hashicorp/hcl
|
||||
|
||||
require github.com/davecgh/go-spew v1.1.1
|
@ -1,2 +0,0 @@
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
@ -1,11 +0,0 @@
|
||||
// Package hcl decodes HCL into usable Go structures.
|
||||
//
|
||||
// hcl input can come in either pure HCL format or JSON format.
|
||||
// It can be parsed into an AST, and then decoded into a structure,
|
||||
// or it can be decoded directly from a string into a structure.
|
||||
//
|
||||
// If you choose to parse HCL into a raw AST, the benefit is that you
|
||||
// can write custom visitor implementations to implement custom
|
||||
// semantic checks. By default, HCL does not perform any semantic
|
||||
// checks.
|
||||
package hcl
|
@ -1,219 +0,0 @@
|
||||
// Package ast declares the types used to represent syntax trees for HCL
|
||||
// (HashiCorp Configuration Language)
|
||||
package ast
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/hashicorp/hcl/hcl/token"
|
||||
)
|
||||
|
||||
// Node is an element in the abstract syntax tree.
|
||||
type Node interface {
|
||||
node()
|
||||
Pos() token.Pos
|
||||
}
|
||||
|
||||
func (File) node() {}
|
||||
func (ObjectList) node() {}
|
||||
func (ObjectKey) node() {}
|
||||
func (ObjectItem) node() {}
|
||||
func (Comment) node() {}
|
||||
func (CommentGroup) node() {}
|
||||
func (ObjectType) node() {}
|
||||
func (LiteralType) node() {}
|
||||
func (ListType) node() {}
|
||||
|
||||
// File represents a single HCL file
|
||||
type File struct {
|
||||
Node Node // usually a *ObjectList
|
||||
Comments []*CommentGroup // list of all comments in the source
|
||||
}
|
||||
|
||||
func (f *File) Pos() token.Pos {
|
||||
return f.Node.Pos()
|
||||
}
|
||||
|
||||
// ObjectList represents a list of ObjectItems. An HCL file itself is an
|
||||
// ObjectList.
|
||||
type ObjectList struct {
|
||||
Items []*ObjectItem
|
||||
}
|
||||
|
||||
func (o *ObjectList) Add(item *ObjectItem) {
|
||||
o.Items = append(o.Items, item)
|
||||
}
|
||||
|
||||
// Filter filters out the objects with the given key list as a prefix.
|
||||
//
|
||||
// The returned list of objects contain ObjectItems where the keys have
|
||||
// this prefix already stripped off. This might result in objects with
|
||||
// zero-length key lists if they have no children.
|
||||
//
|
||||
// If no matches are found, an empty ObjectList (non-nil) is returned.
|
||||
func (o *ObjectList) Filter(keys ...string) *ObjectList {
|
||||
var result ObjectList
|
||||
for _, item := range o.Items {
|
||||
// If there aren't enough keys, then ignore this
|
||||
if len(item.Keys) < len(keys) {
|
||||
continue
|
||||
}
|
||||
|
||||
match := true
|
||||
for i, key := range item.Keys[:len(keys)] {
|
||||
key := key.Token.Value().(string)
|
||||
if key != keys[i] && !strings.EqualFold(key, keys[i]) {
|
||||
match = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if !match {
|
||||
continue
|
||||
}
|
||||
|
||||
// Strip off the prefix from the children
|
||||
newItem := *item
|
||||
newItem.Keys = newItem.Keys[len(keys):]
|
||||
result.Add(&newItem)
|
||||
}
|
||||
|
||||
return &result
|
||||
}
|
||||
|
||||
// Children returns further nested objects (key length > 0) within this
|
||||
// ObjectList. This should be used with Filter to get at child items.
|
||||
func (o *ObjectList) Children() *ObjectList {
|
||||
var result ObjectList
|
||||
for _, item := range o.Items {
|
||||
if len(item.Keys) > 0 {
|
||||
result.Add(item)
|
||||
}
|
||||
}
|
||||
|
||||
return &result
|
||||
}
|
||||
|
||||
// Elem returns items in the list that are direct element assignments
|
||||
// (key length == 0). This should be used with Filter to get at elements.
|
||||
func (o *ObjectList) Elem() *ObjectList {
|
||||
var result ObjectList
|
||||
for _, item := range o.Items {
|
||||
if len(item.Keys) == 0 {
|
||||
result.Add(item)
|
||||
}
|
||||
}
|
||||
|
||||
return &result
|
||||
}
|
||||
|
||||
func (o *ObjectList) Pos() token.Pos {
|
||||
// always returns the uninitiliazed position
|
||||
return o.Items[0].Pos()
|
||||
}
|
||||
|
||||
// ObjectItem represents a HCL Object Item. An item is represented with a key
|
||||
// (or keys). It can be an assignment or an object (both normal and nested)
|
||||
type ObjectItem struct {
|
||||
// keys is only one length long if it's of type assignment. If it's a
|
||||
// nested object it can be larger than one. In that case "assign" is
|
||||
// invalid as there is no assignments for a nested object.
|
||||
Keys []*ObjectKey
|
||||
|
||||
// assign contains the position of "=", if any
|
||||
Assign token.Pos
|
||||
|
||||
// val is the item itself. It can be an object,list, number, bool or a
|
||||
// string. If key length is larger than one, val can be only of type
|
||||
// Object.
|
||||
Val Node
|
||||
|
||||
LeadComment *CommentGroup // associated lead comment
|
||||
LineComment *CommentGroup // associated line comment
|
||||
}
|
||||
|
||||
func (o *ObjectItem) Pos() token.Pos {
|
||||
// I'm not entirely sure what causes this, but removing this causes
|
||||
// a test failure. We should investigate at some point.
|
||||
if len(o.Keys) == 0 {
|
||||
return token.Pos{}
|
||||
}
|
||||
|
||||
return o.Keys[0].Pos()
|
||||
}
|
||||
|
||||
// ObjectKeys are either an identifier or of type string.
|
||||
type ObjectKey struct {
|
||||
Token token.Token
|
||||
}
|
||||
|
||||
func (o *ObjectKey) Pos() token.Pos {
|
||||
return o.Token.Pos
|
||||
}
|
||||
|
||||
// LiteralType represents a literal of basic type. Valid types are:
|
||||
// token.NUMBER, token.FLOAT, token.BOOL and token.STRING
|
||||
type LiteralType struct {
|
||||
Token token.Token
|
||||
|
||||
// comment types, only used when in a list
|
||||
LeadComment *CommentGroup
|
||||
LineComment *CommentGroup
|
||||
}
|
||||
|
||||
func (l *LiteralType) Pos() token.Pos {
|
||||
return l.Token.Pos
|
||||
}
|
||||
|
||||
// ListStatement represents a HCL List type
|
||||
type ListType struct {
|
||||
Lbrack token.Pos // position of "["
|
||||
Rbrack token.Pos // position of "]"
|
||||
List []Node // the elements in lexical order
|
||||
}
|
||||
|
||||
func (l *ListType) Pos() token.Pos {
|
||||
return l.Lbrack
|
||||
}
|
||||
|
||||
func (l *ListType) Add(node Node) {
|
||||
l.List = append(l.List, node)
|
||||
}
|
||||
|
||||
// ObjectType represents a HCL Object Type
|
||||
type ObjectType struct {
|
||||
Lbrace token.Pos // position of "{"
|
||||
Rbrace token.Pos // position of "}"
|
||||
List *ObjectList // the nodes in lexical order
|
||||
}
|
||||
|
||||
func (o *ObjectType) Pos() token.Pos {
|
||||
return o.Lbrace
|
||||
}
|
||||
|
||||
// Comment node represents a single //, # style or /*- style commment
|
||||
type Comment struct {
|
||||
Start token.Pos // position of / or #
|
||||
Text string
|
||||
}
|
||||
|
||||
func (c *Comment) Pos() token.Pos {
|
||||
return c.Start
|
||||
}
|
||||
|
||||
// CommentGroup node represents a sequence of comments with no other tokens and
|
||||
// no empty lines between.
|
||||
type CommentGroup struct {
|
||||
List []*Comment // len(List) > 0
|
||||
}
|
||||
|
||||
func (c *CommentGroup) Pos() token.Pos {
|
||||
return c.List[0].Pos()
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------
|
||||
// GoStringer
|
||||
//-------------------------------------------------------------------
|
||||
|
||||
func (o *ObjectKey) GoString() string { return fmt.Sprintf("*%#v", *o) }
|
||||
func (o *ObjectList) GoString() string { return fmt.Sprintf("*%#v", *o) }
|
@ -1,52 +0,0 @@
|
||||
package ast
|
||||
|
||||
import "fmt"
|
||||
|
||||
// WalkFunc describes a function to be called for each node during a Walk. The
|
||||
// returned node can be used to rewrite the AST. Walking stops the returned
|
||||
// bool is false.
|
||||
type WalkFunc func(Node) (Node, bool)
|
||||
|
||||
// Walk traverses an AST in depth-first order: It starts by calling fn(node);
|
||||
// node must not be nil. If fn returns true, Walk invokes fn recursively for
|
||||
// each of the non-nil children of node, followed by a call of fn(nil). The
|
||||
// returned node of fn can be used to rewrite the passed node to fn.
|
||||
func Walk(node Node, fn WalkFunc) Node {
|
||||
rewritten, ok := fn(node)
|
||||
if !ok {
|
||||
return rewritten
|
||||
}
|
||||
|
||||
switch n := node.(type) {
|
||||
case *File:
|
||||
n.Node = Walk(n.Node, fn)
|
||||
case *ObjectList:
|
||||
for i, item := range n.Items {
|
||||
n.Items[i] = Walk(item, fn).(*ObjectItem)
|
||||
}
|
||||
case *ObjectKey:
|
||||
// nothing to do
|
||||
case *ObjectItem:
|
||||
for i, k := range n.Keys {
|
||||
n.Keys[i] = Walk(k, fn).(*ObjectKey)
|
||||
}
|
||||
|
||||
if n.Val != nil {
|
||||
n.Val = Walk(n.Val, fn)
|
||||
}
|
||||
case *LiteralType:
|
||||
// nothing to do
|
||||
case *ListType:
|
||||
for i, l := range n.List {
|
||||
n.List[i] = Walk(l, fn)
|
||||
}
|
||||
case *ObjectType:
|
||||
n.List = Walk(n.List, fn).(*ObjectList)
|
||||
default:
|
||||
// should we panic here?
|
||||
fmt.Printf("unknown type: %T\n", n)
|
||||
}
|
||||
|
||||
fn(nil)
|
||||
return rewritten
|
||||
}
|
@ -1,17 +0,0 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/hashicorp/hcl/hcl/token"
|
||||
)
|
||||
|
||||
// PosError is a parse error that contains a position.
|
||||
type PosError struct {
|
||||
Pos token.Pos
|
||||
Err error
|
||||
}
|
||||
|
||||
func (e *PosError) Error() string {
|
||||
return fmt.Sprintf("At %s: %s", e.Pos, e.Err)
|
||||
}
|
@ -1,532 +0,0 @@
|
||||
// Package parser implements a parser for HCL (HashiCorp Configuration
|
||||
// Language)
|
||||
package parser
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/hashicorp/hcl/hcl/ast"
|
||||
"github.com/hashicorp/hcl/hcl/scanner"
|
||||
"github.com/hashicorp/hcl/hcl/token"
|
||||
)
|
||||
|
||||
type Parser struct {
|
||||
sc *scanner.Scanner
|
||||
|
||||
// Last read token
|
||||
tok token.Token
|
||||
commaPrev token.Token
|
||||
|
||||
comments []*ast.CommentGroup
|
||||
leadComment *ast.CommentGroup // last lead comment
|
||||
lineComment *ast.CommentGroup // last line comment
|
||||
|
||||
enableTrace bool
|
||||
indent int
|
||||
n int // buffer size (max = 1)
|
||||
}
|
||||
|
||||
func newParser(src []byte) *Parser {
|
||||
return &Parser{
|
||||
sc: scanner.New(src),
|
||||
}
|
||||
}
|
||||
|
||||
// Parse returns the fully parsed source and returns the abstract syntax tree.
|
||||
func Parse(src []byte) (*ast.File, error) {
|
||||
// normalize all line endings
|
||||
// since the scanner and output only work with "\n" line endings, we may
|
||||
// end up with dangling "\r" characters in the parsed data.
|
||||
src = bytes.Replace(src, []byte("\r\n"), []byte("\n"), -1)
|
||||
|
||||
p := newParser(src)
|
||||
return p.Parse()
|
||||
}
|
||||
|
||||
var errEofToken = errors.New("EOF token found")
|
||||
|
||||
// Parse returns the fully parsed source and returns the abstract syntax tree.
|
||||
func (p *Parser) Parse() (*ast.File, error) {
|
||||
f := &ast.File{}
|
||||
var err, scerr error
|
||||
p.sc.Error = func(pos token.Pos, msg string) {
|
||||
scerr = &PosError{Pos: pos, Err: errors.New(msg)}
|
||||
}
|
||||
|
||||
f.Node, err = p.objectList(false)
|
||||
if scerr != nil {
|
||||
return nil, scerr
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
f.Comments = p.comments
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// objectList parses a list of items within an object (generally k/v pairs).
|
||||
// The parameter" obj" tells this whether to we are within an object (braces:
|
||||
// '{', '}') or just at the top level. If we're within an object, we end
|
||||
// at an RBRACE.
|
||||
func (p *Parser) objectList(obj bool) (*ast.ObjectList, error) {
|
||||
defer un(trace(p, "ParseObjectList"))
|
||||
node := &ast.ObjectList{}
|
||||
|
||||
for {
|
||||
if obj {
|
||||
tok := p.scan()
|
||||
p.unscan()
|
||||
if tok.Type == token.RBRACE {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
n, err := p.objectItem()
|
||||
if err == errEofToken {
|
||||
break // we are finished
|
||||
}
|
||||
|
||||
// we don't return a nil node, because might want to use already
|
||||
// collected items.
|
||||
if err != nil {
|
||||
return node, err
|
||||
}
|
||||
|
||||
node.Add(n)
|
||||
|
||||
// object lists can be optionally comma-delimited e.g. when a list of maps
|
||||
// is being expressed, so a comma is allowed here - it's simply consumed
|
||||
tok := p.scan()
|
||||
if tok.Type != token.COMMA {
|
||||
p.unscan()
|
||||
}
|
||||
}
|
||||
return node, nil
|
||||
}
|
||||
|
||||
func (p *Parser) consumeComment() (comment *ast.Comment, endline int) {
|
||||
endline = p.tok.Pos.Line
|
||||
|
||||
// count the endline if it's multiline comment, ie starting with /*
|
||||
if len(p.tok.Text) > 1 && p.tok.Text[1] == '*' {
|
||||
// don't use range here - no need to decode Unicode code points
|
||||
for i := 0; i < len(p.tok.Text); i++ {
|
||||
if p.tok.Text[i] == '\n' {
|
||||
endline++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
comment = &ast.Comment{Start: p.tok.Pos, Text: p.tok.Text}
|
||||
p.tok = p.sc.Scan()
|
||||
return
|
||||
}
|
||||
|
||||
func (p *Parser) consumeCommentGroup(n int) (comments *ast.CommentGroup, endline int) {
|
||||
var list []*ast.Comment
|
||||
endline = p.tok.Pos.Line
|
||||
|
||||
for p.tok.Type == token.COMMENT && p.tok.Pos.Line <= endline+n {
|
||||
var comment *ast.Comment
|
||||
comment, endline = p.consumeComment()
|
||||
list = append(list, comment)
|
||||
}
|
||||
|
||||
// add comment group to the comments list
|
||||
comments = &ast.CommentGroup{List: list}
|
||||
p.comments = append(p.comments, comments)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// objectItem parses a single object item
|
||||
func (p *Parser) objectItem() (*ast.ObjectItem, error) {
|
||||
defer un(trace(p, "ParseObjectItem"))
|
||||
|
||||
keys, err := p.objectKey()
|
||||
if len(keys) > 0 && err == errEofToken {
|
||||
// We ignore eof token here since it is an error if we didn't
|
||||
// receive a value (but we did receive a key) for the item.
|
||||
err = nil
|
||||
}
|
||||
if len(keys) > 0 && err != nil && p.tok.Type == token.RBRACE {
|
||||
// This is a strange boolean statement, but what it means is:
|
||||
// We have keys with no value, and we're likely in an object
|
||||
// (since RBrace ends an object). For this, we set err to nil so
|
||||
// we continue and get the error below of having the wrong value
|
||||
// type.
|
||||
err = nil
|
||||
|
||||
// Reset the token type so we don't think it completed fine. See
|
||||
// objectType which uses p.tok.Type to check if we're done with
|
||||
// the object.
|
||||
p.tok.Type = token.EOF
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
o := &ast.ObjectItem{
|
||||
Keys: keys,
|
||||
}
|
||||
|
||||
if p.leadComment != nil {
|
||||
o.LeadComment = p.leadComment
|
||||
p.leadComment = nil
|
||||
}
|
||||
|
||||
switch p.tok.Type {
|
||||
case token.ASSIGN:
|
||||
o.Assign = p.tok.Pos
|
||||
o.Val, err = p.object()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
case token.LBRACE:
|
||||
o.Val, err = p.objectType()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
default:
|
||||
keyStr := make([]string, 0, len(keys))
|
||||
for _, k := range keys {
|
||||
keyStr = append(keyStr, k.Token.Text)
|
||||
}
|
||||
|
||||
return nil, &PosError{
|
||||
Pos: p.tok.Pos,
|
||||
Err: fmt.Errorf(
|
||||
"key '%s' expected start of object ('{') or assignment ('=')",
|
||||
strings.Join(keyStr, " ")),
|
||||
}
|
||||
}
|
||||
|
||||
// key=#comment
|
||||
// val
|
||||
if p.lineComment != nil {
|
||||
o.LineComment, p.lineComment = p.lineComment, nil
|
||||
}
|
||||
|
||||
// do a look-ahead for line comment
|
||||
p.scan()
|
||||
if len(keys) > 0 && o.Val.Pos().Line == keys[0].Pos().Line && p.lineComment != nil {
|
||||
o.LineComment = p.lineComment
|
||||
p.lineComment = nil
|
||||
}
|
||||
p.unscan()
|
||||
return o, nil
|
||||
}
|
||||
|
||||
// objectKey parses an object key and returns a ObjectKey AST
|
||||
func (p *Parser) objectKey() ([]*ast.ObjectKey, error) {
|
||||
keyCount := 0
|
||||
keys := make([]*ast.ObjectKey, 0)
|
||||
|
||||
for {
|
||||
tok := p.scan()
|
||||
switch tok.Type {
|
||||
case token.EOF:
|
||||
// It is very important to also return the keys here as well as
|
||||
// the error. This is because we need to be able to tell if we
|
||||
// did parse keys prior to finding the EOF, or if we just found
|
||||
// a bare EOF.
|
||||
return keys, errEofToken
|
||||
case token.ASSIGN:
|
||||
// assignment or object only, but not nested objects. this is not
|
||||
// allowed: `foo bar = {}`
|
||||
if keyCount > 1 {
|
||||
return nil, &PosError{
|
||||
Pos: p.tok.Pos,
|
||||
Err: fmt.Errorf("nested object expected: LBRACE got: %s", p.tok.Type),
|
||||
}
|
||||
}
|
||||
|
||||
if keyCount == 0 {
|
||||
return nil, &PosError{
|
||||
Pos: p.tok.Pos,
|
||||
Err: errors.New("no object keys found!"),
|
||||
}
|
||||
}
|
||||
|
||||
return keys, nil
|
||||
case token.LBRACE:
|
||||
var err error
|
||||
|
||||
// If we have no keys, then it is a syntax error. i.e. {{}} is not
|
||||
// allowed.
|
||||
if len(keys) == 0 {
|
||||
err = &PosError{
|
||||
Pos: p.tok.Pos,
|
||||
Err: fmt.Errorf("expected: IDENT | STRING got: %s", p.tok.Type),
|
||||
}
|
||||
}
|
||||
|
||||
// object
|
||||
return keys, err
|
||||
case token.IDENT, token.STRING:
|
||||
keyCount++
|
||||
keys = append(keys, &ast.ObjectKey{Token: p.tok})
|
||||
case token.ILLEGAL:
|
||||
return keys, &PosError{
|
||||
Pos: p.tok.Pos,
|
||||
Err: fmt.Errorf("illegal character"),
|
||||
}
|
||||
default:
|
||||
return keys, &PosError{
|
||||
Pos: p.tok.Pos,
|
||||
Err: fmt.Errorf("expected: IDENT | STRING | ASSIGN | LBRACE got: %s", p.tok.Type),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// object parses any type of object, such as number, bool, string, object or
|
||||
// list.
|
||||
func (p *Parser) object() (ast.Node, error) {
|
||||
defer un(trace(p, "ParseType"))
|
||||
tok := p.scan()
|
||||
|
||||
switch tok.Type {
|
||||
case token.NUMBER, token.FLOAT, token.BOOL, token.STRING, token.HEREDOC:
|
||||
return p.literalType()
|
||||
case token.LBRACE:
|
||||
return p.objectType()
|
||||
case token.LBRACK:
|
||||
return p.listType()
|
||||
case token.COMMENT:
|
||||
// implement comment
|
||||
case token.EOF:
|
||||
return nil, errEofToken
|
||||
}
|
||||
|
||||
return nil, &PosError{
|
||||
Pos: tok.Pos,
|
||||
Err: fmt.Errorf("Unknown token: %+v", tok),
|
||||
}
|
||||
}
|
||||
|
||||
// objectType parses an object type and returns a ObjectType AST
|
||||
func (p *Parser) objectType() (*ast.ObjectType, error) {
|
||||
defer un(trace(p, "ParseObjectType"))
|
||||
|
||||
// we assume that the currently scanned token is a LBRACE
|
||||
o := &ast.ObjectType{
|
||||
Lbrace: p.tok.Pos,
|
||||
}
|
||||
|
||||
l, err := p.objectList(true)
|
||||
|
||||
// if we hit RBRACE, we are good to go (means we parsed all Items), if it's
|
||||
// not a RBRACE, it's an syntax error and we just return it.
|
||||
if err != nil && p.tok.Type != token.RBRACE {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// No error, scan and expect the ending to be a brace
|
||||
if tok := p.scan(); tok.Type != token.RBRACE {
|
||||
return nil, &PosError{
|
||||
Pos: tok.Pos,
|
||||
Err: fmt.Errorf("object expected closing RBRACE got: %s", tok.Type),
|
||||
}
|
||||
}
|
||||
|
||||
o.List = l
|
||||
o.Rbrace = p.tok.Pos // advanced via parseObjectList
|
||||
return o, nil
|
||||
}
|
||||
|
||||
// listType parses a list type and returns a ListType AST
|
||||
func (p *Parser) listType() (*ast.ListType, error) {
|
||||
defer un(trace(p, "ParseListType"))
|
||||
|
||||
// we assume that the currently scanned token is a LBRACK
|
||||
l := &ast.ListType{
|
||||
Lbrack: p.tok.Pos,
|
||||
}
|
||||
|
||||
needComma := false
|
||||
for {
|
||||
tok := p.scan()
|
||||
if needComma {
|
||||
switch tok.Type {
|
||||
case token.COMMA, token.RBRACK:
|
||||
default:
|
||||
return nil, &PosError{
|
||||
Pos: tok.Pos,
|
||||
Err: fmt.Errorf(
|
||||
"error parsing list, expected comma or list end, got: %s",
|
||||
tok.Type),
|
||||
}
|
||||
}
|
||||
}
|
||||
switch tok.Type {
|
||||
case token.BOOL, token.NUMBER, token.FLOAT, token.STRING, token.HEREDOC:
|
||||
node, err := p.literalType()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// If there is a lead comment, apply it
|
||||
if p.leadComment != nil {
|
||||
node.LeadComment = p.leadComment
|
||||
p.leadComment = nil
|
||||
}
|
||||
|
||||
l.Add(node)
|
||||
needComma = true
|
||||
case token.COMMA:
|
||||
// get next list item or we are at the end
|
||||
// do a look-ahead for line comment
|
||||
p.scan()
|
||||
if p.lineComment != nil && len(l.List) > 0 {
|
||||
lit, ok := l.List[len(l.List)-1].(*ast.LiteralType)
|
||||
if ok {
|
||||
lit.LineComment = p.lineComment
|
||||
l.List[len(l.List)-1] = lit
|
||||
p.lineComment = nil
|
||||
}
|
||||
}
|
||||
p.unscan()
|
||||
|
||||
needComma = false
|
||||
continue
|
||||
case token.LBRACE:
|
||||
// Looks like a nested object, so parse it out
|
||||
node, err := p.objectType()
|
||||
if err != nil {
|
||||
return nil, &PosError{
|
||||
Pos: tok.Pos,
|
||||
Err: fmt.Errorf(
|
||||
"error while trying to parse object within list: %s", err),
|
||||
}
|
||||
}
|
||||
l.Add(node)
|
||||
needComma = true
|
||||
case token.LBRACK:
|
||||
node, err := p.listType()
|
||||
if err != nil {
|
||||
return nil, &PosError{
|
||||
Pos: tok.Pos,
|
||||
Err: fmt.Errorf(
|
||||
"error while trying to parse list within list: %s", err),
|
||||
}
|
||||
}
|
||||
l.Add(node)
|
||||
case token.RBRACK:
|
||||
// finished
|
||||
l.Rbrack = p.tok.Pos
|
||||
return l, nil
|
||||
default:
|
||||
return nil, &PosError{
|
||||
Pos: tok.Pos,
|
||||
Err: fmt.Errorf("unexpected token while parsing list: %s", tok.Type),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// literalType parses a literal type and returns a LiteralType AST
|
||||
func (p *Parser) literalType() (*ast.LiteralType, error) {
|
||||
defer un(trace(p, "ParseLiteral"))
|
||||
|
||||
return &ast.LiteralType{
|
||||
Token: p.tok,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// scan returns the next token from the underlying scanner. If a token has
|
||||
// been unscanned then read that instead. In the process, it collects any
|
||||
// comment groups encountered, and remembers the last lead and line comments.
|
||||
func (p *Parser) scan() token.Token {
|
||||
// If we have a token on the buffer, then return it.
|
||||
if p.n != 0 {
|
||||
p.n = 0
|
||||
return p.tok
|
||||
}
|
||||
|
||||
// Otherwise read the next token from the scanner and Save it to the buffer
|
||||
// in case we unscan later.
|
||||
prev := p.tok
|
||||
p.tok = p.sc.Scan()
|
||||
|
||||
if p.tok.Type == token.COMMENT {
|
||||
var comment *ast.CommentGroup
|
||||
var endline int
|
||||
|
||||
// fmt.Printf("p.tok.Pos.Line = %+v prev: %d endline %d \n",
|
||||
// p.tok.Pos.Line, prev.Pos.Line, endline)
|
||||
if p.tok.Pos.Line == prev.Pos.Line {
|
||||
// The comment is on same line as the previous token; it
|
||||
// cannot be a lead comment but may be a line comment.
|
||||
comment, endline = p.consumeCommentGroup(0)
|
||||
if p.tok.Pos.Line != endline {
|
||||
// The next token is on a different line, thus
|
||||
// the last comment group is a line comment.
|
||||
p.lineComment = comment
|
||||
}
|
||||
}
|
||||
|
||||
// consume successor comments, if any
|
||||
endline = -1
|
||||
for p.tok.Type == token.COMMENT {
|
||||
comment, endline = p.consumeCommentGroup(1)
|
||||
}
|
||||
|
||||
if endline+1 == p.tok.Pos.Line && p.tok.Type != token.RBRACE {
|
||||
switch p.tok.Type {
|
||||
case token.RBRACE, token.RBRACK:
|
||||
// Do not count for these cases
|
||||
default:
|
||||
// The next token is following on the line immediately after the
|
||||
// comment group, thus the last comment group is a lead comment.
|
||||
p.leadComment = comment
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return p.tok
|
||||
}
|
||||
|
||||
// unscan pushes the previously read token back onto the buffer.
|
||||
func (p *Parser) unscan() {
|
||||
p.n = 1
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Parsing support
|
||||
|
||||
func (p *Parser) printTrace(a ...interface{}) {
|
||||
if !p.enableTrace {
|
||||
return
|
||||
}
|
||||
|
||||
const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . "
|
||||
const n = len(dots)
|
||||
fmt.Printf("%5d:%3d: ", p.tok.Pos.Line, p.tok.Pos.Column)
|
||||
|
||||
i := 2 * p.indent
|
||||
for i > n {
|
||||
fmt.Print(dots)
|
||||
i -= n
|
||||
}
|
||||
// i <= n
|
||||
fmt.Print(dots[0:i])
|
||||
fmt.Println(a...)
|
||||
}
|
||||
|
||||
func trace(p *Parser, msg string) *Parser {
|
||||
p.printTrace(msg, "(")
|
||||
p.indent++
|
||||
return p
|
||||
}
|
||||
|
||||
// Usage pattern: defer un(trace(p, "..."))
|
||||
func un(p *Parser) {
|
||||
p.indent--
|
||||
p.printTrace(")")
|
||||
}
|
@ -1,652 +0,0 @@
|
||||
// Package scanner implements a scanner for HCL (HashiCorp Configuration
|
||||
// Language) source text.
|
||||
package scanner
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
"regexp"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/hashicorp/hcl/hcl/token"
|
||||
)
|
||||
|
||||
// eof represents a marker rune for the end of the reader.
|
||||
const eof = rune(0)
|
||||
|
||||
// Scanner defines a lexical scanner
|
||||
type Scanner struct {
|
||||
buf *bytes.Buffer // Source buffer for advancing and scanning
|
||||
src []byte // Source buffer for immutable access
|
||||
|
||||
// Source Position
|
||||
srcPos token.Pos // current position
|
||||
prevPos token.Pos // previous position, used for peek() method
|
||||
|
||||
lastCharLen int // length of last character in bytes
|
||||
lastLineLen int // length of last line in characters (for correct column reporting)
|
||||
|
||||
tokStart int // token text start position
|
||||
tokEnd int // token text end position
|
||||
|
||||
// Error is called for each error encountered. If no Error
|
||||
// function is set, the error is reported to os.Stderr.
|
||||
Error func(pos token.Pos, msg string)
|
||||
|
||||
// ErrorCount is incremented by one for each error encountered.
|
||||
ErrorCount int
|
||||
|
||||
// tokPos is the start position of most recently scanned token; set by
|
||||
// Scan. The Filename field is always left untouched by the Scanner. If
|
||||
// an error is reported (via Error) and Position is invalid, the scanner is
|
||||
// not inside a token.
|
||||
tokPos token.Pos
|
||||
}
|
||||
|
||||
// New creates and initializes a new instance of Scanner using src as
|
||||
// its source content.
|
||||
func New(src []byte) *Scanner {
|
||||
// even though we accept a src, we read from a io.Reader compatible type
|
||||
// (*bytes.Buffer). So in the future we might easily change it to streaming
|
||||
// read.
|
||||
b := bytes.NewBuffer(src)
|
||||
s := &Scanner{
|
||||
buf: b,
|
||||
src: src,
|
||||
}
|
||||
|
||||
// srcPosition always starts with 1
|
||||
s.srcPos.Line = 1
|
||||
return s
|
||||
}
|
||||
|
||||
// next reads the next rune from the bufferred reader. Returns the rune(0) if
|
||||
// an error occurs (or io.EOF is returned).
|
||||
func (s *Scanner) next() rune {
|
||||
ch, size, err := s.buf.ReadRune()
|
||||
if err != nil {
|
||||
// advance for error reporting
|
||||
s.srcPos.Column++
|
||||
s.srcPos.Offset += size
|
||||
s.lastCharLen = size
|
||||
return eof
|
||||
}
|
||||
|
||||
// remember last position
|
||||
s.prevPos = s.srcPos
|
||||
|
||||
s.srcPos.Column++
|
||||
s.lastCharLen = size
|
||||
s.srcPos.Offset += size
|
||||
|
||||
if ch == utf8.RuneError && size == 1 {
|
||||
s.err("illegal UTF-8 encoding")
|
||||
return ch
|
||||
}
|
||||
|
||||
if ch == '\n' {
|
||||
s.srcPos.Line++
|
||||
s.lastLineLen = s.srcPos.Column
|
||||
s.srcPos.Column = 0
|
||||
}
|
||||
|
||||
if ch == '\x00' {
|
||||
s.err("unexpected null character (0x00)")
|
||||
return eof
|
||||
}
|
||||
|
||||
if ch == '\uE123' {
|
||||
s.err("unicode code point U+E123 reserved for internal use")
|
||||
return utf8.RuneError
|
||||
}
|
||||
|
||||
// debug
|
||||
// fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
|
||||
return ch
|
||||
}
|
||||
|
||||
// unread unreads the previous read Rune and updates the source position
|
||||
func (s *Scanner) unread() {
|
||||
if err := s.buf.UnreadRune(); err != nil {
|
||||
panic(err) // this is user fault, we should catch it
|
||||
}
|
||||
s.srcPos = s.prevPos // put back last position
|
||||
}
|
||||
|
||||
// peek returns the next rune without advancing the reader.
|
||||
func (s *Scanner) peek() rune {
|
||||
peek, _, err := s.buf.ReadRune()
|
||||
if err != nil {
|
||||
return eof
|
||||
}
|
||||
|
||||
s.buf.UnreadRune()
|
||||
return peek
|
||||
}
|
||||
|
||||
// Scan scans the next token and returns the token.
|
||||
func (s *Scanner) Scan() token.Token {
|
||||
ch := s.next()
|
||||
|
||||
// skip white space
|
||||
for isWhitespace(ch) {
|
||||
ch = s.next()
|
||||
}
|
||||
|
||||
var tok token.Type
|
||||
|
||||
// token text markings
|
||||
s.tokStart = s.srcPos.Offset - s.lastCharLen
|
||||
|
||||
// token position, initial next() is moving the offset by one(size of rune
|
||||
// actually), though we are interested with the starting point
|
||||
s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen
|
||||
if s.srcPos.Column > 0 {
|
||||
// common case: last character was not a '\n'
|
||||
s.tokPos.Line = s.srcPos.Line
|
||||
s.tokPos.Column = s.srcPos.Column
|
||||
} else {
|
||||
// last character was a '\n'
|
||||
// (we cannot be at the beginning of the source
|
||||
// since we have called next() at least once)
|
||||
s.tokPos.Line = s.srcPos.Line - 1
|
||||
s.tokPos.Column = s.lastLineLen
|
||||
}
|
||||
|
||||
switch {
|
||||
case isLetter(ch):
|
||||
tok = token.IDENT
|
||||
lit := s.scanIdentifier()
|
||||
if lit == "true" || lit == "false" {
|
||||
tok = token.BOOL
|
||||
}
|
||||
case isDecimal(ch):
|
||||
tok = s.scanNumber(ch)
|
||||
default:
|
||||
switch ch {
|
||||
case eof:
|
||||
tok = token.EOF
|
||||
case '"':
|
||||
tok = token.STRING
|
||||
s.scanString()
|
||||
case '#', '/':
|
||||
tok = token.COMMENT
|
||||
s.scanComment(ch)
|
||||
case '.':
|
||||
tok = token.PERIOD
|
||||
ch = s.peek()
|
||||
if isDecimal(ch) {
|
||||
tok = token.FLOAT
|
||||
ch = s.scanMantissa(ch)
|
||||
ch = s.scanExponent(ch)
|
||||
}
|
||||
case '<':
|
||||
tok = token.HEREDOC
|
||||
s.scanHeredoc()
|
||||
case '[':
|
||||
tok = token.LBRACK
|
||||
case ']':
|
||||
tok = token.RBRACK
|
||||
case '{':
|
||||
tok = token.LBRACE
|
||||
case '}':
|
||||
tok = token.RBRACE
|
||||
case ',':
|
||||
tok = token.COMMA
|
||||
case '=':
|
||||
tok = token.ASSIGN
|
||||
case '+':
|
||||
tok = token.ADD
|
||||
case '-':
|
||||
if isDecimal(s.peek()) {
|
||||
ch := s.next()
|
||||
tok = s.scanNumber(ch)
|
||||
} else {
|
||||
tok = token.SUB
|
||||
}
|
||||
default:
|
||||
s.err("illegal char")
|
||||
}
|
||||
}
|
||||
|
||||
// finish token ending
|
||||
s.tokEnd = s.srcPos.Offset
|
||||
|
||||
// create token literal
|
||||
var tokenText string
|
||||
if s.tokStart >= 0 {
|
||||
tokenText = string(s.src[s.tokStart:s.tokEnd])
|
||||
}
|
||||
s.tokStart = s.tokEnd // ensure idempotency of tokenText() call
|
||||
|
||||
return token.Token{
|
||||
Type: tok,
|
||||
Pos: s.tokPos,
|
||||
Text: tokenText,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scanner) scanComment(ch rune) {
|
||||
// single line comments
|
||||
if ch == '#' || (ch == '/' && s.peek() != '*') {
|
||||
if ch == '/' && s.peek() != '/' {
|
||||
s.err("expected '/' for comment")
|
||||
return
|
||||
}
|
||||
|
||||
ch = s.next()
|
||||
for ch != '\n' && ch >= 0 && ch != eof {
|
||||
ch = s.next()
|
||||
}
|
||||
if ch != eof && ch >= 0 {
|
||||
s.unread()
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// be sure we get the character after /* This allows us to find comment's
|
||||
// that are not erminated
|
||||
if ch == '/' {
|
||||
s.next()
|
||||
ch = s.next() // read character after "/*"
|
||||
}
|
||||
|
||||
// look for /* - style comments
|
||||
for {
|
||||
if ch < 0 || ch == eof {
|
||||
s.err("comment not terminated")
|
||||
break
|
||||
}
|
||||
|
||||
ch0 := ch
|
||||
ch = s.next()
|
||||
if ch0 == '*' && ch == '/' {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// scanNumber scans a HCL number definition starting with the given rune
|
||||
func (s *Scanner) scanNumber(ch rune) token.Type {
|
||||
if ch == '0' {
|
||||
// check for hexadecimal, octal or float
|
||||
ch = s.next()
|
||||
if ch == 'x' || ch == 'X' {
|
||||
// hexadecimal
|
||||
ch = s.next()
|
||||
found := false
|
||||
for isHexadecimal(ch) {
|
||||
ch = s.next()
|
||||
found = true
|
||||
}
|
||||
|
||||
if !found {
|
||||
s.err("illegal hexadecimal number")
|
||||
}
|
||||
|
||||
if ch != eof {
|
||||
s.unread()
|
||||
}
|
||||
|
||||
return token.NUMBER
|
||||
}
|
||||
|
||||
// now it's either something like: 0421(octal) or 0.1231(float)
|
||||
illegalOctal := false
|
||||
for isDecimal(ch) {
|
||||
ch = s.next()
|
||||
if ch == '8' || ch == '9' {
|
||||
// this is just a possibility. For example 0159 is illegal, but
|
||||
// 0159.23 is valid. So we mark a possible illegal octal. If
|
||||
// the next character is not a period, we'll print the error.
|
||||
illegalOctal = true
|
||||
}
|
||||
}
|
||||
|
||||
if ch == 'e' || ch == 'E' {
|
||||
ch = s.scanExponent(ch)
|
||||
return token.FLOAT
|
||||
}
|
||||
|
||||
if ch == '.' {
|
||||
ch = s.scanFraction(ch)
|
||||
|
||||
if ch == 'e' || ch == 'E' {
|
||||
ch = s.next()
|
||||
ch = s.scanExponent(ch)
|
||||
}
|
||||
return token.FLOAT
|
||||
}
|
||||
|
||||
if illegalOctal {
|
||||
s.err("illegal octal number")
|
||||
}
|
||||
|
||||
if ch != eof {
|
||||
s.unread()
|
||||
}
|
||||
return token.NUMBER
|
||||
}
|
||||
|
||||
s.scanMantissa(ch)
|
||||
ch = s.next() // seek forward
|
||||
if ch == 'e' || ch == 'E' {
|
||||
ch = s.scanExponent(ch)
|
||||
return token.FLOAT
|
||||
}
|
||||
|
||||
if ch == '.' {
|
||||
ch = s.scanFraction(ch)
|
||||
if ch == 'e' || ch == 'E' {
|
||||
ch = s.next()
|
||||
ch = s.scanExponent(ch)
|
||||
}
|
||||
return token.FLOAT
|
||||
}
|
||||
|
||||
if ch != eof {
|
||||
s.unread()
|
||||
}
|
||||
return token.NUMBER
|
||||
}
|
||||
|
||||
// scanMantissa scans the mantissa beginning from the rune. It returns the next
|
||||
// non decimal rune. It's used to determine wheter it's a fraction or exponent.
|
||||
func (s *Scanner) scanMantissa(ch rune) rune {
|
||||
scanned := false
|
||||
for isDecimal(ch) {
|
||||
ch = s.next()
|
||||
scanned = true
|
||||
}
|
||||
|
||||
if scanned && ch != eof {
|
||||
s.unread()
|
||||
}
|
||||
return ch
|
||||
}
|
||||
|
||||
// scanFraction scans the fraction after the '.' rune
|
||||
func (s *Scanner) scanFraction(ch rune) rune {
|
||||
if ch == '.' {
|
||||
ch = s.peek() // we peek just to see if we can move forward
|
||||
ch = s.scanMantissa(ch)
|
||||
}
|
||||
return ch
|
||||
}
|
||||
|
||||
// scanExponent scans the remaining parts of an exponent after the 'e' or 'E'
|
||||
// rune.
|
||||
func (s *Scanner) scanExponent(ch rune) rune {
|
||||
if ch == 'e' || ch == 'E' {
|
||||
ch = s.next()
|
||||
if ch == '-' || ch == '+' {
|
||||
ch = s.next()
|
||||
}
|
||||
ch = s.scanMantissa(ch)
|
||||
}
|
||||
return ch
|
||||
}
|
||||
|
||||
// scanHeredoc scans a heredoc string
|
||||
func (s *Scanner) scanHeredoc() {
|
||||
// Scan the second '<' in example: '<<EOF'
|
||||
if s.next() != '<' {
|
||||
s.err("heredoc expected second '<', didn't see it")
|
||||
return
|
||||
}
|
||||
|
||||
// Get the original offset so we can read just the heredoc ident
|
||||
offs := s.srcPos.Offset
|
||||
|
||||
// Scan the identifier
|
||||
ch := s.next()
|
||||
|
||||
// Indented heredoc syntax
|
||||
if ch == '-' {
|
||||
ch = s.next()
|
||||
}
|
||||
|
||||
for isLetter(ch) || isDigit(ch) {
|
||||
ch = s.next()
|
||||
}
|
||||
|
||||
// If we reached an EOF then that is not good
|
||||
if ch == eof {
|
||||
s.err("heredoc not terminated")
|
||||
return
|
||||
}
|
||||
|
||||
// Ignore the '\r' in Windows line endings
|
||||
if ch == '\r' {
|
||||
if s.peek() == '\n' {
|
||||
ch = s.next()
|
||||
}
|
||||
}
|
||||
|
||||
// If we didn't reach a newline then that is also not good
|
||||
if ch != '\n' {
|
||||
s.err("invalid characters in heredoc anchor")
|
||||
return
|
||||
}
|
||||
|
||||
// Read the identifier
|
||||
identBytes := s.src[offs : s.srcPos.Offset-s.lastCharLen]
|
||||
if len(identBytes) == 0 || (len(identBytes) == 1 && identBytes[0] == '-') {
|
||||
s.err("zero-length heredoc anchor")
|
||||
return
|
||||
}
|
||||
|
||||
var identRegexp *regexp.Regexp
|
||||
if identBytes[0] == '-' {
|
||||
identRegexp = regexp.MustCompile(fmt.Sprintf(`^[[:space:]]*%s\r*\z`, identBytes[1:]))
|
||||
} else {
|
||||
identRegexp = regexp.MustCompile(fmt.Sprintf(`^[[:space:]]*%s\r*\z`, identBytes))
|
||||
}
|
||||
|
||||
// Read the actual string value
|
||||
lineStart := s.srcPos.Offset
|
||||
for {
|
||||
ch := s.next()
|
||||
|
||||
// Special newline handling.
|
||||
if ch == '\n' {
|
||||
// Math is fast, so we first compare the byte counts to see if we have a chance
|
||||
// of seeing the same identifier - if the length is less than the number of bytes
|
||||
// in the identifier, this cannot be a valid terminator.
|
||||
lineBytesLen := s.srcPos.Offset - s.lastCharLen - lineStart
|
||||
if lineBytesLen >= len(identBytes) && identRegexp.Match(s.src[lineStart:s.srcPos.Offset-s.lastCharLen]) {
|
||||
break
|
||||
}
|
||||
|
||||
// Not an anchor match, record the start of a new line
|
||||
lineStart = s.srcPos.Offset
|
||||
}
|
||||
|
||||
if ch == eof {
|
||||
s.err("heredoc not terminated")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// scanString scans a quoted string
|
||||
func (s *Scanner) scanString() {
|
||||
braces := 0
|
||||
for {
|
||||
// '"' opening already consumed
|
||||
// read character after quote
|
||||
ch := s.next()
|
||||
|
||||
if (ch == '\n' && braces == 0) || ch < 0 || ch == eof {
|
||||
s.err("literal not terminated")
|
||||
return
|
||||
}
|
||||
|
||||
if ch == '"' && braces == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
// If we're going into a ${} then we can ignore quotes for awhile
|
||||
if braces == 0 && ch == '$' && s.peek() == '{' {
|
||||
braces++
|
||||
s.next()
|
||||
} else if braces > 0 && ch == '{' {
|
||||
braces++
|
||||
}
|
||||
if braces > 0 && ch == '}' {
|
||||
braces--
|
||||
}
|
||||
|
||||
if ch == '\\' {
|
||||
s.scanEscape()
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// scanEscape scans an escape sequence
|
||||
func (s *Scanner) scanEscape() rune {
|
||||
// http://en.cppreference.com/w/cpp/language/escape
|
||||
ch := s.next() // read character after '/'
|
||||
switch ch {
|
||||
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
|
||||
// nothing to do
|
||||
case '0', '1', '2', '3', '4', '5', '6', '7':
|
||||
// octal notation
|
||||
ch = s.scanDigits(ch, 8, 3)
|
||||
case 'x':
|
||||
// hexademical notation
|
||||
ch = s.scanDigits(s.next(), 16, 2)
|
||||
case 'u':
|
||||
// universal character name
|
||||
ch = s.scanDigits(s.next(), 16, 4)
|
||||
case 'U':
|
||||
// universal character name
|
||||
ch = s.scanDigits(s.next(), 16, 8)
|
||||
default:
|
||||
s.err("illegal char escape")
|
||||
}
|
||||
return ch
|
||||
}
|
||||
|
||||
// scanDigits scans a rune with the given base for n times. For example an
|
||||
// octal notation \184 would yield in scanDigits(ch, 8, 3)
|
||||
func (s *Scanner) scanDigits(ch rune, base, n int) rune {
|
||||
start := n
|
||||
for n > 0 && digitVal(ch) < base {
|
||||
ch = s.next()
|
||||
if ch == eof {
|
||||
// If we see an EOF, we halt any more scanning of digits
|
||||
// immediately.
|
||||
break
|
||||
}
|
||||
|
||||
n--
|
||||
}
|
||||
if n > 0 {
|
||||
s.err("illegal char escape")
|
||||
}
|
||||
|
||||
if n != start && ch != eof {
|
||||
// we scanned all digits, put the last non digit char back,
|
||||
// only if we read anything at all
|
||||
s.unread()
|
||||
}
|
||||
|
||||
return ch
|
||||
}
|
||||
|
||||
// scanIdentifier scans an identifier and returns the literal string
|
||||
func (s *Scanner) scanIdentifier() string {
|
||||
offs := s.srcPos.Offset - s.lastCharLen
|
||||
ch := s.next()
|
||||
for isLetter(ch) || isDigit(ch) || ch == '-' || ch == '.' {
|
||||
ch = s.next()
|
||||
}
|
||||
|
||||
if ch != eof {
|
||||
s.unread() // we got identifier, put back latest char
|
||||
}
|
||||
|
||||
return string(s.src[offs:s.srcPos.Offset])
|
||||
}
|
||||
|
||||
// recentPosition returns the position of the character immediately after the
|
||||
// character or token returned by the last call to Scan.
|
||||
func (s *Scanner) recentPosition() (pos token.Pos) {
|
||||
pos.Offset = s.srcPos.Offset - s.lastCharLen
|
||||
switch {
|
||||
case s.srcPos.Column > 0:
|
||||
// common case: last character was not a '\n'
|
||||
pos.Line = s.srcPos.Line
|
||||
pos.Column = s.srcPos.Column
|
||||
case s.lastLineLen > 0:
|
||||
// last character was a '\n'
|
||||
// (we cannot be at the beginning of the source
|
||||
// since we have called next() at least once)
|
||||
pos.Line = s.srcPos.Line - 1
|
||||
pos.Column = s.lastLineLen
|
||||
default:
|
||||
// at the beginning of the source
|
||||
pos.Line = 1
|
||||
pos.Column = 1
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// err prints the error of any scanning to s.Error function. If the function is
|
||||
// not defined, by default it prints them to os.Stderr
|
||||
func (s *Scanner) err(msg string) {
|
||||
s.ErrorCount++
|
||||
pos := s.recentPosition()
|
||||
|
||||
if s.Error != nil {
|
||||
s.Error(pos, msg)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
|
||||
}
|
||||
|
||||
// isHexadecimal returns true if the given rune is a letter
|
||||
func isLetter(ch rune) bool {
|
||||
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
|
||||
}
|
||||
|
||||
// isDigit returns true if the given rune is a decimal digit
|
||||
func isDigit(ch rune) bool {
|
||||
return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
|
||||
}
|
||||
|
||||
// isDecimal returns true if the given rune is a decimal number
|
||||
func isDecimal(ch rune) bool {
|
||||
return '0' <= ch && ch <= '9'
|
||||
}
|
||||
|
||||
// isHexadecimal returns true if the given rune is an hexadecimal number
|
||||
func isHexadecimal(ch rune) bool {
|
||||
return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
|
||||
}
|
||||
|
||||
// isWhitespace returns true if the rune is a space, tab, newline or carriage return
|
||||
func isWhitespace(ch rune) bool {
|
||||
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
|
||||
}
|
||||
|
||||
// digitVal returns the integer value of a given octal,decimal or hexadecimal rune
|
||||
func digitVal(ch rune) int {
|
||||
switch {
|
||||
case '0' <= ch && ch <= '9':
|
||||
return int(ch - '0')
|
||||
case 'a' <= ch && ch <= 'f':
|
||||
return int(ch - 'a' + 10)
|
||||
case 'A' <= ch && ch <= 'F':
|
||||
return int(ch - 'A' + 10)
|
||||
}
|
||||
return 16 // larger than any legal digit val
|
||||
}
|
@ -1,241 +0,0 @@
|
||||
package strconv
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// ErrSyntax indicates that a value does not have the right syntax for the target type.
|
||||
var ErrSyntax = errors.New("invalid syntax")
|
||||
|
||||
// Unquote interprets s as a single-quoted, double-quoted,
|
||||
// or backquoted Go string literal, returning the string value
|
||||
// that s quotes. (If s is single-quoted, it would be a Go
|
||||
// character literal; Unquote returns the corresponding
|
||||
// one-character string.)
|
||||
func Unquote(s string) (t string, err error) {
|
||||
n := len(s)
|
||||
if n < 2 {
|
||||
return "", ErrSyntax
|
||||
}
|
||||
quote := s[0]
|
||||
if quote != s[n-1] {
|
||||
return "", ErrSyntax
|
||||
}
|
||||
s = s[1 : n-1]
|
||||
|
||||
if quote != '"' {
|
||||
return "", ErrSyntax
|
||||
}
|
||||
if !contains(s, '$') && !contains(s, '{') && contains(s, '\n') {
|
||||
return "", ErrSyntax
|
||||
}
|
||||
|
||||
// Is it trivial? Avoid allocation.
|
||||
if !contains(s, '\\') && !contains(s, quote) && !contains(s, '$') {
|
||||
switch quote {
|
||||
case '"':
|
||||
return s, nil
|
||||
case '\'':
|
||||
r, size := utf8.DecodeRuneInString(s)
|
||||
if size == len(s) && (r != utf8.RuneError || size != 1) {
|
||||
return s, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var runeTmp [utf8.UTFMax]byte
|
||||
buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
|
||||
for len(s) > 0 {
|
||||
// If we're starting a '${}' then let it through un-unquoted.
|
||||
// Specifically: we don't unquote any characters within the `${}`
|
||||
// section.
|
||||
if s[0] == '$' && len(s) > 1 && s[1] == '{' {
|
||||
buf = append(buf, '$', '{')
|
||||
s = s[2:]
|
||||
|
||||
// Continue reading until we find the closing brace, copying as-is
|
||||
braces := 1
|
||||
for len(s) > 0 && braces > 0 {
|
||||
r, size := utf8.DecodeRuneInString(s)
|
||||
if r == utf8.RuneError {
|
||||
return "", ErrSyntax
|
||||
}
|
||||
|
||||
s = s[size:]
|
||||
|
||||
n := utf8.EncodeRune(runeTmp[:], r)
|
||||
buf = append(buf, runeTmp[:n]...)
|
||||
|
||||
switch r {
|
||||
case '{':
|
||||
braces++
|
||||
case '}':
|
||||
braces--
|
||||
}
|
||||
}
|
||||
if braces != 0 {
|
||||
return "", ErrSyntax
|
||||
}
|
||||
if len(s) == 0 {
|
||||
// If there's no string left, we're done!
|
||||
break
|
||||
} else {
|
||||
// If there's more left, we need to pop back up to the top of the loop
|
||||
// in case there's another interpolation in this string.
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if s[0] == '\n' {
|
||||
return "", ErrSyntax
|
||||
}
|
||||
|
||||
c, multibyte, ss, err := unquoteChar(s, quote)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
s = ss
|
||||
if c < utf8.RuneSelf || !multibyte {
|
||||
buf = append(buf, byte(c))
|
||||
} else {
|
||||
n := utf8.EncodeRune(runeTmp[:], c)
|
||||
buf = append(buf, runeTmp[:n]...)
|
||||
}
|
||||
if quote == '\'' && len(s) != 0 {
|
||||
// single-quoted must be single character
|
||||
return "", ErrSyntax
|
||||
}
|
||||
}
|
||||
return string(buf), nil
|
||||
}
|
||||
|
||||
// contains reports whether the string contains the byte c.
|
||||
func contains(s string, c byte) bool {
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] == c {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func unhex(b byte) (v rune, ok bool) {
|
||||
c := rune(b)
|
||||
switch {
|
||||
case '0' <= c && c <= '9':
|
||||
return c - '0', true
|
||||
case 'a' <= c && c <= 'f':
|
||||
return c - 'a' + 10, true
|
||||
case 'A' <= c && c <= 'F':
|
||||
return c - 'A' + 10, true
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
|
||||
// easy cases
|
||||
switch c := s[0]; {
|
||||
case c == quote && (quote == '\'' || quote == '"'):
|
||||
err = ErrSyntax
|
||||
return
|
||||
case c >= utf8.RuneSelf:
|
||||
r, size := utf8.DecodeRuneInString(s)
|
||||
return r, true, s[size:], nil
|
||||
case c != '\\':
|
||||
return rune(s[0]), false, s[1:], nil
|
||||
}
|
||||
|
||||
// hard case: c is backslash
|
||||
if len(s) <= 1 {
|
||||
err = ErrSyntax
|
||||
return
|
||||
}
|
||||
c := s[1]
|
||||
s = s[2:]
|
||||
|
||||
switch c {
|
||||
case 'a':
|
||||
value = '\a'
|
||||
case 'b':
|
||||
value = '\b'
|
||||
case 'f':
|
||||
value = '\f'
|
||||
case 'n':
|
||||
value = '\n'
|
||||
case 'r':
|
||||
value = '\r'
|
||||
case 't':
|
||||
value = '\t'
|
||||
case 'v':
|
||||
value = '\v'
|
||||
case 'x', 'u', 'U':
|
||||
n := 0
|
||||
switch c {
|
||||
case 'x':
|
||||
n = 2
|
||||
case 'u':
|
||||
n = 4
|
||||
case 'U':
|
||||
n = 8
|
||||
}
|
||||
var v rune
|
||||
if len(s) < n {
|
||||
err = ErrSyntax
|
||||
return
|
||||
}
|
||||
for j := 0; j < n; j++ {
|
||||
x, ok := unhex(s[j])
|
||||
if !ok {
|
||||
err = ErrSyntax
|
||||
return
|
||||
}
|
||||
v = v<<4 | x
|
||||
}
|
||||
s = s[n:]
|
||||
if c == 'x' {
|
||||
// single-byte string, possibly not UTF-8
|
||||
value = v
|
||||
break
|
||||
}
|
||||
if v > utf8.MaxRune {
|
||||
err = ErrSyntax
|
||||
return
|
||||
}
|
||||
value = v
|
||||
multibyte = true
|
||||
case '0', '1', '2', '3', '4', '5', '6', '7':
|
||||
v := rune(c) - '0'
|
||||
if len(s) < 2 {
|
||||
err = ErrSyntax
|
||||
return
|
||||
}
|
||||
for j := 0; j < 2; j++ { // one digit already; two more
|
||||
x := rune(s[j]) - '0'
|
||||
if x < 0 || x > 7 {
|
||||
err = ErrSyntax
|
||||
return
|
||||
}
|
||||
v = (v << 3) | x
|
||||
}
|
||||
s = s[2:]
|
||||
if v > 255 {
|
||||
err = ErrSyntax
|
||||
return
|
||||
}
|
||||
value = v
|
||||
case '\\':
|
||||
value = '\\'
|
||||
case '\'', '"':
|
||||
if c != quote {
|
||||
err = ErrSyntax
|
||||
return
|
||||
}
|
||||
value = rune(c)
|
||||
default:
|
||||
err = ErrSyntax
|
||||
return
|
||||
}
|
||||
tail = s
|
||||
return
|
||||
}
|
@ -1,46 +0,0 @@
|
||||
package token
|
||||
|
||||
import "fmt"
|
||||
|
||||
// Pos describes an arbitrary source position
|
||||
// including the file, line, and column location.
|
||||
// A Position is valid if the line number is > 0.
|
||||
type Pos struct {
|
||||
Filename string // filename, if any
|
||||
Offset int // offset, starting at 0
|
||||
Line int // line number, starting at 1
|
||||
Column int // column number, starting at 1 (character count)
|
||||
}
|
||||
|
||||
// IsValid returns true if the position is valid.
|
||||
func (p *Pos) IsValid() bool { return p.Line > 0 }
|
||||
|
||||
// String returns a string in one of several forms:
|
||||
//
|
||||
// file:line:column valid position with file name
|
||||
// line:column valid position without file name
|
||||
// file invalid position with file name
|
||||
// - invalid position without file name
|
||||
func (p Pos) String() string {
|
||||
s := p.Filename
|
||||
if p.IsValid() {
|
||||
if s != "" {
|
||||
s += ":"
|
||||
}
|
||||
s += fmt.Sprintf("%d:%d", p.Line, p.Column)
|
||||
}
|
||||
if s == "" {
|
||||
s = "-"
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// Before reports whether the position p is before u.
|
||||
func (p Pos) Before(u Pos) bool {
|
||||
return u.Offset > p.Offset || u.Line > p.Line
|
||||
}
|
||||
|
||||
// After reports whether the position p is after u.
|
||||
func (p Pos) After(u Pos) bool {
|
||||
return u.Offset < p.Offset || u.Line < p.Line
|
||||
}
|
@ -1,219 +0,0 @@
|
||||
// Package token defines constants representing the lexical tokens for HCL
|
||||
// (HashiCorp Configuration Language)
|
||||
package token
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
hclstrconv "github.com/hashicorp/hcl/hcl/strconv"
|
||||
)
|
||||
|
||||
// Token defines a single HCL token which can be obtained via the Scanner
|
||||
type Token struct {
|
||||
Type Type
|
||||
Pos Pos
|
||||
Text string
|
||||
JSON bool
|
||||
}
|
||||
|
||||
// Type is the set of lexical tokens of the HCL (HashiCorp Configuration Language)
|
||||
type Type int
|
||||
|
||||
const (
|
||||
// Special tokens
|
||||
ILLEGAL Type = iota
|
||||
EOF
|
||||
COMMENT
|
||||
|
||||
identifier_beg
|
||||
IDENT // literals
|
||||
literal_beg
|
||||
NUMBER // 12345
|
||||
FLOAT // 123.45
|
||||
BOOL // true,false
|
||||
STRING // "abc"
|
||||
HEREDOC // <<FOO\nbar\nFOO
|
||||
literal_end
|
||||
identifier_end
|
||||
|
||||
operator_beg
|
||||
LBRACK // [
|
||||
LBRACE // {
|
||||
COMMA // ,
|
||||
PERIOD // .
|
||||
|
||||
RBRACK // ]
|
||||
RBRACE // }
|
||||
|
||||
ASSIGN // =
|
||||
ADD // +
|
||||
SUB // -
|
||||
operator_end
|
||||
)
|
||||
|
||||
var tokens = [...]string{
|
||||
ILLEGAL: "ILLEGAL",
|
||||
|
||||
EOF: "EOF",
|
||||
COMMENT: "COMMENT",
|
||||
|
||||
IDENT: "IDENT",
|
||||
NUMBER: "NUMBER",
|
||||
FLOAT: "FLOAT",
|
||||
BOOL: "BOOL",
|
||||
STRING: "STRING",
|
||||
|
||||
LBRACK: "LBRACK",
|
||||
LBRACE: "LBRACE",
|
||||
COMMA: "COMMA",
|
||||
PERIOD: "PERIOD",
|
||||
HEREDOC: "HEREDOC",
|
||||
|
||||
RBRACK: "RBRACK",
|
||||
RBRACE: "RBRACE",
|
||||
|
||||
ASSIGN: "ASSIGN",
|
||||
ADD: "ADD",
|
||||
SUB: "SUB",
|
||||
}
|
||||
|
||||
// String returns the string corresponding to the token tok.
|
||||
func (t Type) String() string {
|
||||
s := ""
|
||||
if 0 <= t && t < Type(len(tokens)) {
|
||||
s = tokens[t]
|
||||
}
|
||||
if s == "" {
|
||||
s = "token(" + strconv.Itoa(int(t)) + ")"
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// IsIdentifier returns true for tokens corresponding to identifiers and basic
|
||||
// type literals; it returns false otherwise.
|
||||
func (t Type) IsIdentifier() bool { return identifier_beg < t && t < identifier_end }
|
||||
|
||||
// IsLiteral returns true for tokens corresponding to basic type literals; it
|
||||
// returns false otherwise.
|
||||
func (t Type) IsLiteral() bool { return literal_beg < t && t < literal_end }
|
||||
|
||||
// IsOperator returns true for tokens corresponding to operators and
|
||||
// delimiters; it returns false otherwise.
|
||||
func (t Type) IsOperator() bool { return operator_beg < t && t < operator_end }
|
||||
|
||||
// String returns the token's literal text. Note that this is only
|
||||
// applicable for certain token types, such as token.IDENT,
|
||||
// token.STRING, etc..
|
||||
func (t Token) String() string {
|
||||
return fmt.Sprintf("%s %s %s", t.Pos.String(), t.Type.String(), t.Text)
|
||||
}
|
||||
|
||||
// Value returns the properly typed value for this token. The type of
|
||||
// the returned interface{} is guaranteed based on the Type field.
|
||||
//
|
||||
// This can only be called for literal types. If it is called for any other
|
||||
// type, this will panic.
|
||||
func (t Token) Value() interface{} {
|
||||
switch t.Type {
|
||||
case BOOL:
|
||||
if t.Text == "true" {
|
||||
return true
|
||||
} else if t.Text == "false" {
|
||||
return false
|
||||
}
|
||||
|
||||
panic("unknown bool value: " + t.Text)
|
||||
case FLOAT:
|
||||
v, err := strconv.ParseFloat(t.Text, 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return float64(v)
|
||||
case NUMBER:
|
||||
v, err := strconv.ParseInt(t.Text, 0, 64)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return int64(v)
|
||||
case IDENT:
|
||||
return t.Text
|
||||
case HEREDOC:
|
||||
return unindentHeredoc(t.Text)
|
||||
case STRING:
|
||||
// Determine the Unquote method to use. If it came from JSON,
|
||||
// then we need to use the built-in unquote since we have to
|
||||
// escape interpolations there.
|
||||
f := hclstrconv.Unquote
|
||||
if t.JSON {
|
||||
f = strconv.Unquote
|
||||
}
|
||||
|
||||
// This case occurs if json null is used
|
||||
if t.Text == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
v, err := f(t.Text)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("unquote %s err: %s", t.Text, err))
|
||||
}
|
||||
|
||||
return v
|
||||
default:
|
||||
panic(fmt.Sprintf("unimplemented Value for type: %s", t.Type))
|
||||
}
|
||||
}
|
||||
|
||||
// unindentHeredoc returns the string content of a HEREDOC if it is started with <<
|
||||
// and the content of a HEREDOC with the hanging indent removed if it is started with
|
||||
// a <<-, and the terminating line is at least as indented as the least indented line.
|
||||
func unindentHeredoc(heredoc string) string {
|
||||
// We need to find the end of the marker
|
||||
idx := strings.IndexByte(heredoc, '\n')
|
||||
if idx == -1 {
|
||||
panic("heredoc doesn't contain newline")
|
||||
}
|
||||
|
||||
unindent := heredoc[2] == '-'
|
||||
|
||||
// We can optimize if the heredoc isn't marked for indentation
|
||||
if !unindent {
|
||||
return string(heredoc[idx+1 : len(heredoc)-idx+1])
|
||||
}
|
||||
|
||||
// We need to unindent each line based on the indentation level of the marker
|
||||
lines := strings.Split(string(heredoc[idx+1:len(heredoc)-idx+2]), "\n")
|
||||
whitespacePrefix := lines[len(lines)-1]
|
||||
|
||||
isIndented := true
|
||||
for _, v := range lines {
|
||||
if strings.HasPrefix(v, whitespacePrefix) {
|
||||
continue
|
||||
}
|
||||
|
||||
isIndented = false
|
||||
break
|
||||
}
|
||||
|
||||
// If all lines are not at least as indented as the terminating mark, return the
|
||||
// heredoc as is, but trim the leading space from the marker on the final line.
|
||||
if !isIndented {
|
||||
return strings.TrimRight(string(heredoc[idx+1:len(heredoc)-idx+1]), " \t")
|
||||
}
|
||||
|
||||
unindentedLines := make([]string, len(lines))
|
||||
for k, v := range lines {
|
||||
if k == len(lines)-1 {
|
||||
unindentedLines[k] = ""
|
||||
break
|
||||
}
|
||||
|
||||
unindentedLines[k] = strings.TrimPrefix(v, whitespacePrefix)
|
||||
}
|
||||
|
||||
return strings.Join(unindentedLines, "\n")
|
||||
}
|
@ -1,117 +0,0 @@
|
||||
package parser
|
||||
|
||||
import "github.com/hashicorp/hcl/hcl/ast"
|
||||
|
||||
// flattenObjects takes an AST node, walks it, and flattens
|
||||
func flattenObjects(node ast.Node) {
|
||||
ast.Walk(node, func(n ast.Node) (ast.Node, bool) {
|
||||
// We only care about lists, because this is what we modify
|
||||
list, ok := n.(*ast.ObjectList)
|
||||
if !ok {
|
||||
return n, true
|
||||
}
|
||||
|
||||
// Rebuild the item list
|
||||
items := make([]*ast.ObjectItem, 0, len(list.Items))
|
||||
frontier := make([]*ast.ObjectItem, len(list.Items))
|
||||
copy(frontier, list.Items)
|
||||
for len(frontier) > 0 {
|
||||
// Pop the current item
|
||||
n := len(frontier)
|
||||
item := frontier[n-1]
|
||||
frontier = frontier[:n-1]
|
||||
|
||||
switch v := item.Val.(type) {
|
||||
case *ast.ObjectType:
|
||||
items, frontier = flattenObjectType(v, item, items, frontier)
|
||||
case *ast.ListType:
|
||||
items, frontier = flattenListType(v, item, items, frontier)
|
||||
default:
|
||||
items = append(items, item)
|
||||
}
|
||||
}
|
||||
|
||||
// Reverse the list since the frontier model runs things backwards
|
||||
for i := len(items)/2 - 1; i >= 0; i-- {
|
||||
opp := len(items) - 1 - i
|
||||
items[i], items[opp] = items[opp], items[i]
|
||||
}
|
||||
|
||||
// Done! Set the original items
|
||||
list.Items = items
|
||||
return n, true
|
||||
})
|
||||
}
|
||||
|
||||
func flattenListType(
|
||||
ot *ast.ListType,
|
||||
item *ast.ObjectItem,
|
||||
items []*ast.ObjectItem,
|
||||
frontier []*ast.ObjectItem) ([]*ast.ObjectItem, []*ast.ObjectItem) {
|
||||
// If the list is empty, keep the original list
|
||||
if len(ot.List) == 0 {
|
||||
items = append(items, item)
|
||||
return items, frontier
|
||||
}
|
||||
|
||||
// All the elements of this object must also be objects!
|
||||
for _, subitem := range ot.List {
|
||||
if _, ok := subitem.(*ast.ObjectType); !ok {
|
||||
items = append(items, item)
|
||||
return items, frontier
|
||||
}
|
||||
}
|
||||
|
||||
// Great! We have a match go through all the items and flatten
|
||||
for _, elem := range ot.List {
|
||||
// Add it to the frontier so that we can recurse
|
||||
frontier = append(frontier, &ast.ObjectItem{
|
||||
Keys: item.Keys,
|
||||
Assign: item.Assign,
|
||||
Val: elem,
|
||||
LeadComment: item.LeadComment,
|
||||
LineComment: item.LineComment,
|
||||
})
|
||||
}
|
||||
|
||||
return items, frontier
|
||||
}
|
||||
|
||||
func flattenObjectType(
|
||||
ot *ast.ObjectType,
|
||||
item *ast.ObjectItem,
|
||||
items []*ast.ObjectItem,
|
||||
frontier []*ast.ObjectItem) ([]*ast.ObjectItem, []*ast.ObjectItem) {
|
||||
// If the list has no items we do not have to flatten anything
|
||||
if ot.List.Items == nil {
|
||||
items = append(items, item)
|
||||
return items, frontier
|
||||
}
|
||||
|
||||
// All the elements of this object must also be objects!
|
||||
for _, subitem := range ot.List.Items {
|
||||
if _, ok := subitem.Val.(*ast.ObjectType); !ok {
|
||||
items = append(items, item)
|
||||
return items, frontier
|
||||
}
|
||||
}
|
||||
|
||||
// Great! We have a match go through all the items and flatten
|
||||
for _, subitem := range ot.List.Items {
|
||||
// Copy the new key
|
||||
keys := make([]*ast.ObjectKey, len(item.Keys)+len(subitem.Keys))
|
||||
copy(keys, item.Keys)
|
||||
copy(keys[len(item.Keys):], subitem.Keys)
|
||||
|
||||
// Add it to the frontier so that we can recurse
|
||||
frontier = append(frontier, &ast.ObjectItem{
|
||||
Keys: keys,
|
||||
Assign: item.Assign,
|
||||
Val: subitem.Val,
|
||||
LeadComment: item.LeadComment,
|
||||
LineComment: item.LineComment,
|
||||
})
|
||||
}
|
||||
|
||||
return items, frontier
|
||||
}
|
@ -1,313 +0,0 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/hashicorp/hcl/hcl/ast"
|
||||
hcltoken "github.com/hashicorp/hcl/hcl/token"
|
||||
"github.com/hashicorp/hcl/json/scanner"
|
||||
"github.com/hashicorp/hcl/json/token"
|
||||
)
|
||||
|
||||
type Parser struct {
|
||||
sc *scanner.Scanner
|
||||
|
||||
// Last read token
|
||||
tok token.Token
|
||||
commaPrev token.Token
|
||||
|
||||
enableTrace bool
|
||||
indent int
|
||||
n int // buffer size (max = 1)
|
||||
}
|
||||
|
||||
func newParser(src []byte) *Parser {
|
||||
return &Parser{
|
||||
sc: scanner.New(src),
|
||||
}
|
||||
}
|
||||
|
||||
// Parse returns the fully parsed source and returns the abstract syntax tree.
|
||||
func Parse(src []byte) (*ast.File, error) {
|
||||
p := newParser(src)
|
||||
return p.Parse()
|
||||
}
|
||||
|
||||
var errEofToken = errors.New("EOF token found")
|
||||
|
||||
// Parse returns the fully parsed source and returns the abstract syntax tree.
|
||||
func (p *Parser) Parse() (*ast.File, error) {
|
||||
f := &ast.File{}
|
||||
var err, scerr error
|
||||
p.sc.Error = func(pos token.Pos, msg string) {
|
||||
scerr = fmt.Errorf("%s: %s", pos, msg)
|
||||
}
|
||||
|
||||
// The root must be an object in JSON
|
||||
object, err := p.object()
|
||||
if scerr != nil {
|
||||
return nil, scerr
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// We make our final node an object list so it is more HCL compatible
|
||||
f.Node = object.List
|
||||
|
||||
// Flatten it, which finds patterns and turns them into more HCL-like
|
||||
// AST trees.
|
||||
flattenObjects(f.Node)
|
||||
|
||||
return f, nil
|
||||
}
|
||||
|
||||
func (p *Parser) objectList() (*ast.ObjectList, error) {
|
||||
defer un(trace(p, "ParseObjectList"))
|
||||
node := &ast.ObjectList{}
|
||||
|
||||
for {
|
||||
n, err := p.objectItem()
|
||||
if err == errEofToken {
|
||||
break // we are finished
|
||||
}
|
||||
|
||||
// we don't return a nil node, because might want to use already
|
||||
// collected items.
|
||||
if err != nil {
|
||||
return node, err
|
||||
}
|
||||
|
||||
node.Add(n)
|
||||
|
||||
// Check for a followup comma. If it isn't a comma, then we're done
|
||||
if tok := p.scan(); tok.Type != token.COMMA {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return node, nil
|
||||
}
|
||||
|
||||
// objectItem parses a single object item
|
||||
func (p *Parser) objectItem() (*ast.ObjectItem, error) {
|
||||
defer un(trace(p, "ParseObjectItem"))
|
||||
|
||||
keys, err := p.objectKey()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
o := &ast.ObjectItem{
|
||||
Keys: keys,
|
||||
}
|
||||
|
||||
switch p.tok.Type {
|
||||
case token.COLON:
|
||||
pos := p.tok.Pos
|
||||
o.Assign = hcltoken.Pos{
|
||||
Filename: pos.Filename,
|
||||
Offset: pos.Offset,
|
||||
Line: pos.Line,
|
||||
Column: pos.Column,
|
||||
}
|
||||
|
||||
o.Val, err = p.objectValue()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return o, nil
|
||||
}
|
||||
|
||||
// objectKey parses an object key and returns a ObjectKey AST
|
||||
func (p *Parser) objectKey() ([]*ast.ObjectKey, error) {
|
||||
keyCount := 0
|
||||
keys := make([]*ast.ObjectKey, 0)
|
||||
|
||||
for {
|
||||
tok := p.scan()
|
||||
switch tok.Type {
|
||||
case token.EOF:
|
||||
return nil, errEofToken
|
||||
case token.STRING:
|
||||
keyCount++
|
||||
keys = append(keys, &ast.ObjectKey{
|
||||
Token: p.tok.HCLToken(),
|
||||
})
|
||||
case token.COLON:
|
||||
// If we have a zero keycount it means that we never got
|
||||
// an object key, i.e. `{ :`. This is a syntax error.
|
||||
if keyCount == 0 {
|
||||
return nil, fmt.Errorf("expected: STRING got: %s", p.tok.Type)
|
||||
}
|
||||
|
||||
// Done
|
||||
return keys, nil
|
||||
case token.ILLEGAL:
|
||||
return nil, errors.New("illegal")
|
||||
default:
|
||||
return nil, fmt.Errorf("expected: STRING got: %s", p.tok.Type)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// object parses any type of object, such as number, bool, string, object or
|
||||
// list.
|
||||
func (p *Parser) objectValue() (ast.Node, error) {
|
||||
defer un(trace(p, "ParseObjectValue"))
|
||||
tok := p.scan()
|
||||
|
||||
switch tok.Type {
|
||||
case token.NUMBER, token.FLOAT, token.BOOL, token.NULL, token.STRING:
|
||||
return p.literalType()
|
||||
case token.LBRACE:
|
||||
return p.objectType()
|
||||
case token.LBRACK:
|
||||
return p.listType()
|
||||
case token.EOF:
|
||||
return nil, errEofToken
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("Expected object value, got unknown token: %+v", tok)
|
||||
}
|
||||
|
||||
// object parses any type of object, such as number, bool, string, object or
|
||||
// list.
|
||||
func (p *Parser) object() (*ast.ObjectType, error) {
|
||||
defer un(trace(p, "ParseType"))
|
||||
tok := p.scan()
|
||||
|
||||
switch tok.Type {
|
||||
case token.LBRACE:
|
||||
return p.objectType()
|
||||
case token.EOF:
|
||||
return nil, errEofToken
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("Expected object, got unknown token: %+v", tok)
|
||||
}
|
||||
|
||||
// objectType parses an object type and returns a ObjectType AST
|
||||
func (p *Parser) objectType() (*ast.ObjectType, error) {
|
||||
defer un(trace(p, "ParseObjectType"))
|
||||
|
||||
// we assume that the currently scanned token is a LBRACE
|
||||
o := &ast.ObjectType{}
|
||||
|
||||
l, err := p.objectList()
|
||||
|
||||
// if we hit RBRACE, we are good to go (means we parsed all Items), if it's
|
||||
// not a RBRACE, it's an syntax error and we just return it.
|
||||
if err != nil && p.tok.Type != token.RBRACE {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
o.List = l
|
||||
return o, nil
|
||||
}
|
||||
|
||||
// listType parses a list type and returns a ListType AST
|
||||
func (p *Parser) listType() (*ast.ListType, error) {
|
||||
defer un(trace(p, "ParseListType"))
|
||||
|
||||
// we assume that the currently scanned token is a LBRACK
|
||||
l := &ast.ListType{}
|
||||
|
||||
for {
|
||||
tok := p.scan()
|
||||
switch tok.Type {
|
||||
case token.NUMBER, token.FLOAT, token.STRING:
|
||||
node, err := p.literalType()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
l.Add(node)
|
||||
case token.COMMA:
|
||||
continue
|
||||
case token.LBRACE:
|
||||
node, err := p.objectType()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
l.Add(node)
|
||||
case token.BOOL:
|
||||
// TODO(arslan) should we support? not supported by HCL yet
|
||||
case token.LBRACK:
|
||||
// TODO(arslan) should we support nested lists? Even though it's
|
||||
// written in README of HCL, it's not a part of the grammar
|
||||
// (not defined in parse.y)
|
||||
case token.RBRACK:
|
||||
// finished
|
||||
return l, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected token while parsing list: %s", tok.Type)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// literalType parses a literal type and returns a LiteralType AST
|
||||
func (p *Parser) literalType() (*ast.LiteralType, error) {
|
||||
defer un(trace(p, "ParseLiteral"))
|
||||
|
||||
return &ast.LiteralType{
|
||||
Token: p.tok.HCLToken(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// scan returns the next token from the underlying scanner. If a token has
|
||||
// been unscanned then read that instead.
|
||||
func (p *Parser) scan() token.Token {
|
||||
// If we have a token on the buffer, then return it.
|
||||
if p.n != 0 {
|
||||
p.n = 0
|
||||
return p.tok
|
||||
}
|
||||
|
||||
p.tok = p.sc.Scan()
|
||||
return p.tok
|
||||
}
|
||||
|
||||
// unscan pushes the previously read token back onto the buffer.
|
||||
func (p *Parser) unscan() {
|
||||
p.n = 1
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Parsing support
|
||||
|
||||
func (p *Parser) printTrace(a ...interface{}) {
|
||||
if !p.enableTrace {
|
||||
return
|
||||
}
|
||||
|
||||
const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . "
|
||||
const n = len(dots)
|
||||
fmt.Printf("%5d:%3d: ", p.tok.Pos.Line, p.tok.Pos.Column)
|
||||
|
||||
i := 2 * p.indent
|
||||
for i > n {
|
||||
fmt.Print(dots)
|
||||
i -= n
|
||||
}
|
||||
// i <= n
|
||||
fmt.Print(dots[0:i])
|
||||
fmt.Println(a...)
|
||||
}
|
||||
|
||||
func trace(p *Parser, msg string) *Parser {
|
||||
p.printTrace(msg, "(")
|
||||
p.indent++
|
||||
return p
|
||||
}
|
||||
|
||||
// Usage pattern: defer un(trace(p, "..."))
|
||||
func un(p *Parser) {
|
||||
p.indent--
|
||||
p.printTrace(")")
|
||||
}
|
@ -1,451 +0,0 @@
|
||||
package scanner
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"os"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/hashicorp/hcl/json/token"
|
||||
)
|
||||
|
||||
// eof represents a marker rune for the end of the reader.
|
||||
const eof = rune(0)
|
||||
|
||||
// Scanner defines a lexical scanner
|
||||
type Scanner struct {
|
||||
buf *bytes.Buffer // Source buffer for advancing and scanning
|
||||
src []byte // Source buffer for immutable access
|
||||
|
||||
// Source Position
|
||||
srcPos token.Pos // current position
|
||||
prevPos token.Pos // previous position, used for peek() method
|
||||
|
||||
lastCharLen int // length of last character in bytes
|
||||
lastLineLen int // length of last line in characters (for correct column reporting)
|
||||
|
||||
tokStart int // token text start position
|
||||
tokEnd int // token text end position
|
||||
|
||||
// Error is called for each error encountered. If no Error
|
||||
// function is set, the error is reported to os.Stderr.
|
||||
Error func(pos token.Pos, msg string)
|
||||
|
||||
// ErrorCount is incremented by one for each error encountered.
|
||||
ErrorCount int
|
||||
|
||||
// tokPos is the start position of most recently scanned token; set by
|
||||
// Scan. The Filename field is always left untouched by the Scanner. If
|
||||
// an error is reported (via Error) and Position is invalid, the scanner is
|
||||
// not inside a token.
|
||||
tokPos token.Pos
|
||||
}
|
||||
|
||||
// New creates and initializes a new instance of Scanner using src as
|
||||
// its source content.
|
||||
func New(src []byte) *Scanner {
|
||||
// even though we accept a src, we read from a io.Reader compatible type
|
||||
// (*bytes.Buffer). So in the future we might easily change it to streaming
|
||||
// read.
|
||||
b := bytes.NewBuffer(src)
|
||||
s := &Scanner{
|
||||
buf: b,
|
||||
src: src,
|
||||
}
|
||||
|
||||
// srcPosition always starts with 1
|
||||
s.srcPos.Line = 1
|
||||
return s
|
||||
}
|
||||
|
||||
// next reads the next rune from the bufferred reader. Returns the rune(0) if
|
||||
// an error occurs (or io.EOF is returned).
|
||||
func (s *Scanner) next() rune {
|
||||
ch, size, err := s.buf.ReadRune()
|
||||
if err != nil {
|
||||
// advance for error reporting
|
||||
s.srcPos.Column++
|
||||
s.srcPos.Offset += size
|
||||
s.lastCharLen = size
|
||||
return eof
|
||||
}
|
||||
|
||||
if ch == utf8.RuneError && size == 1 {
|
||||
s.srcPos.Column++
|
||||
s.srcPos.Offset += size
|
||||
s.lastCharLen = size
|
||||
s.err("illegal UTF-8 encoding")
|
||||
return ch
|
||||
}
|
||||
|
||||
// remember last position
|
||||
s.prevPos = s.srcPos
|
||||
|
||||
s.srcPos.Column++
|
||||
s.lastCharLen = size
|
||||
s.srcPos.Offset += size
|
||||
|
||||
if ch == '\n' {
|
||||
s.srcPos.Line++
|
||||
s.lastLineLen = s.srcPos.Column
|
||||
s.srcPos.Column = 0
|
||||
}
|
||||
|
||||
// debug
|
||||
// fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
|
||||
return ch
|
||||
}
|
||||
|
||||
// unread unreads the previous read Rune and updates the source position
|
||||
func (s *Scanner) unread() {
|
||||
if err := s.buf.UnreadRune(); err != nil {
|
||||
panic(err) // this is user fault, we should catch it
|
||||
}
|
||||
s.srcPos = s.prevPos // put back last position
|
||||
}
|
||||
|
||||
// peek returns the next rune without advancing the reader.
|
||||
func (s *Scanner) peek() rune {
|
||||
peek, _, err := s.buf.ReadRune()
|
||||
if err != nil {
|
||||
return eof
|
||||
}
|
||||
|
||||
s.buf.UnreadRune()
|
||||
return peek
|
||||
}
|
||||
|
||||
// Scan scans the next token and returns the token.
|
||||
func (s *Scanner) Scan() token.Token {
|
||||
ch := s.next()
|
||||
|
||||
// skip white space
|
||||
for isWhitespace(ch) {
|
||||
ch = s.next()
|
||||
}
|
||||
|
||||
var tok token.Type
|
||||
|
||||
// token text markings
|
||||
s.tokStart = s.srcPos.Offset - s.lastCharLen
|
||||
|
||||
// token position, initial next() is moving the offset by one(size of rune
|
||||
// actually), though we are interested with the starting point
|
||||
s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen
|
||||
if s.srcPos.Column > 0 {
|
||||
// common case: last character was not a '\n'
|
||||
s.tokPos.Line = s.srcPos.Line
|
||||
s.tokPos.Column = s.srcPos.Column
|
||||
} else {
|
||||
// last character was a '\n'
|
||||
// (we cannot be at the beginning of the source
|
||||
// since we have called next() at least once)
|
||||
s.tokPos.Line = s.srcPos.Line - 1
|
||||
s.tokPos.Column = s.lastLineLen
|
||||
}
|
||||
|
||||
switch {
|
||||
case isLetter(ch):
|
||||
lit := s.scanIdentifier()
|
||||
if lit == "true" || lit == "false" {
|
||||
tok = token.BOOL
|
||||
} else if lit == "null" {
|
||||
tok = token.NULL
|
||||
} else {
|
||||
s.err("illegal char")
|
||||
}
|
||||
case isDecimal(ch):
|
||||
tok = s.scanNumber(ch)
|
||||
default:
|
||||
switch ch {
|
||||
case eof:
|
||||
tok = token.EOF
|
||||
case '"':
|
||||
tok = token.STRING
|
||||
s.scanString()
|
||||
case '.':
|
||||
tok = token.PERIOD
|
||||
ch = s.peek()
|
||||
if isDecimal(ch) {
|
||||
tok = token.FLOAT
|
||||
ch = s.scanMantissa(ch)
|
||||
ch = s.scanExponent(ch)
|
||||
}
|
||||
case '[':
|
||||
tok = token.LBRACK
|
||||
case ']':
|
||||
tok = token.RBRACK
|
||||
case '{':
|
||||
tok = token.LBRACE
|
||||
case '}':
|
||||
tok = token.RBRACE
|
||||
case ',':
|
||||
tok = token.COMMA
|
||||
case ':':
|
||||
tok = token.COLON
|
||||
case '-':
|
||||
if isDecimal(s.peek()) {
|
||||
ch := s.next()
|
||||
tok = s.scanNumber(ch)
|
||||
} else {
|
||||
s.err("illegal char")
|
||||
}
|
||||
default:
|
||||
s.err("illegal char: " + string(ch))
|
||||
}
|
||||
}
|
||||
|
||||
// finish token ending
|
||||
s.tokEnd = s.srcPos.Offset
|
||||
|
||||
// create token literal
|
||||
var tokenText string
|
||||
if s.tokStart >= 0 {
|
||||
tokenText = string(s.src[s.tokStart:s.tokEnd])
|
||||
}
|
||||
s.tokStart = s.tokEnd // ensure idempotency of tokenText() call
|
||||
|
||||
return token.Token{
|
||||
Type: tok,
|
||||
Pos: s.tokPos,
|
||||
Text: tokenText,
|
||||
}
|
||||
}
|
||||
|
||||
// scanNumber scans a HCL number definition starting with the given rune
|
||||
func (s *Scanner) scanNumber(ch rune) token.Type {
|
||||
zero := ch == '0'
|
||||
pos := s.srcPos
|
||||
|
||||
s.scanMantissa(ch)
|
||||
ch = s.next() // seek forward
|
||||
if ch == 'e' || ch == 'E' {
|
||||
ch = s.scanExponent(ch)
|
||||
return token.FLOAT
|
||||
}
|
||||
|
||||
if ch == '.' {
|
||||
ch = s.scanFraction(ch)
|
||||
if ch == 'e' || ch == 'E' {
|
||||
ch = s.next()
|
||||
ch = s.scanExponent(ch)
|
||||
}
|
||||
return token.FLOAT
|
||||
}
|
||||
|
||||
if ch != eof {
|
||||
s.unread()
|
||||
}
|
||||
|
||||
// If we have a larger number and this is zero, error
|
||||
if zero && pos != s.srcPos {
|
||||
s.err("numbers cannot start with 0")
|
||||
}
|
||||
|
||||
return token.NUMBER
|
||||
}
|
||||
|
||||
// scanMantissa scans the mantissa beginning from the rune. It returns the next
|
||||
// non decimal rune. It's used to determine wheter it's a fraction or exponent.
|
||||
func (s *Scanner) scanMantissa(ch rune) rune {
|
||||
scanned := false
|
||||
for isDecimal(ch) {
|
||||
ch = s.next()
|
||||
scanned = true
|
||||
}
|
||||
|
||||
if scanned && ch != eof {
|
||||
s.unread()
|
||||
}
|
||||
return ch
|
||||
}
|
||||
|
||||
// scanFraction scans the fraction after the '.' rune
|
||||
func (s *Scanner) scanFraction(ch rune) rune {
|
||||
if ch == '.' {
|
||||
ch = s.peek() // we peek just to see if we can move forward
|
||||
ch = s.scanMantissa(ch)
|
||||
}
|
||||
return ch
|
||||
}
|
||||
|
||||
// scanExponent scans the remaining parts of an exponent after the 'e' or 'E'
|
||||
// rune.
|
||||
func (s *Scanner) scanExponent(ch rune) rune {
|
||||
if ch == 'e' || ch == 'E' {
|
||||
ch = s.next()
|
||||
if ch == '-' || ch == '+' {
|
||||
ch = s.next()
|
||||
}
|
||||
ch = s.scanMantissa(ch)
|
||||
}
|
||||
return ch
|
||||
}
|
||||
|
||||
// scanString scans a quoted string
|
||||
func (s *Scanner) scanString() {
|
||||
braces := 0
|
||||
for {
|
||||
// '"' opening already consumed
|
||||
// read character after quote
|
||||
ch := s.next()
|
||||
|
||||
if ch == '\n' || ch < 0 || ch == eof {
|
||||
s.err("literal not terminated")
|
||||
return
|
||||
}
|
||||
|
||||
if ch == '"' {
|
||||
break
|
||||
}
|
||||
|
||||
// If we're going into a ${} then we can ignore quotes for awhile
|
||||
if braces == 0 && ch == '$' && s.peek() == '{' {
|
||||
braces++
|
||||
s.next()
|
||||
} else if braces > 0 && ch == '{' {
|
||||
braces++
|
||||
}
|
||||
if braces > 0 && ch == '}' {
|
||||
braces--
|
||||
}
|
||||
|
||||
if ch == '\\' {
|
||||
s.scanEscape()
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// scanEscape scans an escape sequence
|
||||
func (s *Scanner) scanEscape() rune {
|
||||
// http://en.cppreference.com/w/cpp/language/escape
|
||||
ch := s.next() // read character after '/'
|
||||
switch ch {
|
||||
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
|
||||
// nothing to do
|
||||
case '0', '1', '2', '3', '4', '5', '6', '7':
|
||||
// octal notation
|
||||
ch = s.scanDigits(ch, 8, 3)
|
||||
case 'x':
|
||||
// hexademical notation
|
||||
ch = s.scanDigits(s.next(), 16, 2)
|
||||
case 'u':
|
||||
// universal character name
|
||||
ch = s.scanDigits(s.next(), 16, 4)
|
||||
case 'U':
|
||||
// universal character name
|
||||
ch = s.scanDigits(s.next(), 16, 8)
|
||||
default:
|
||||
s.err("illegal char escape")
|
||||
}
|
||||
return ch
|
||||
}
|
||||
|
||||
// scanDigits scans a rune with the given base for n times. For example an
|
||||
// octal notation \184 would yield in scanDigits(ch, 8, 3)
|
||||
func (s *Scanner) scanDigits(ch rune, base, n int) rune {
|
||||
for n > 0 && digitVal(ch) < base {
|
||||
ch = s.next()
|
||||
n--
|
||||
}
|
||||
if n > 0 {
|
||||
s.err("illegal char escape")
|
||||
}
|
||||
|
||||
// we scanned all digits, put the last non digit char back
|
||||
s.unread()
|
||||
return ch
|
||||
}
|
||||
|
||||
// scanIdentifier scans an identifier and returns the literal string
|
||||
func (s *Scanner) scanIdentifier() string {
|
||||
offs := s.srcPos.Offset - s.lastCharLen
|
||||
ch := s.next()
|
||||
for isLetter(ch) || isDigit(ch) || ch == '-' {
|
||||
ch = s.next()
|
||||
}
|
||||
|
||||
if ch != eof {
|
||||
s.unread() // we got identifier, put back latest char
|
||||
}
|
||||
|
||||
return string(s.src[offs:s.srcPos.Offset])
|
||||
}
|
||||
|
||||
// recentPosition returns the position of the character immediately after the
|
||||
// character or token returned by the last call to Scan.
|
||||
func (s *Scanner) recentPosition() (pos token.Pos) {
|
||||
pos.Offset = s.srcPos.Offset - s.lastCharLen
|
||||
switch {
|
||||
case s.srcPos.Column > 0:
|
||||
// common case: last character was not a '\n'
|
||||
pos.Line = s.srcPos.Line
|
||||
pos.Column = s.srcPos.Column
|
||||
case s.lastLineLen > 0:
|
||||
// last character was a '\n'
|
||||
// (we cannot be at the beginning of the source
|
||||
// since we have called next() at least once)
|
||||
pos.Line = s.srcPos.Line - 1
|
||||
pos.Column = s.lastLineLen
|
||||
default:
|
||||
// at the beginning of the source
|
||||
pos.Line = 1
|
||||
pos.Column = 1
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// err prints the error of any scanning to s.Error function. If the function is
|
||||
// not defined, by default it prints them to os.Stderr
|
||||
func (s *Scanner) err(msg string) {
|
||||
s.ErrorCount++
|
||||
pos := s.recentPosition()
|
||||
|
||||
if s.Error != nil {
|
||||
s.Error(pos, msg)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
|
||||
}
|
||||
|
||||
// isHexadecimal returns true if the given rune is a letter
|
||||
func isLetter(ch rune) bool {
|
||||
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
|
||||
}
|
||||
|
||||
// isHexadecimal returns true if the given rune is a decimal digit
|
||||
func isDigit(ch rune) bool {
|
||||
return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
|
||||
}
|
||||
|
||||
// isHexadecimal returns true if the given rune is a decimal number
|
||||
func isDecimal(ch rune) bool {
|
||||
return '0' <= ch && ch <= '9'
|
||||
}
|
||||
|
||||
// isHexadecimal returns true if the given rune is an hexadecimal number
|
||||
func isHexadecimal(ch rune) bool {
|
||||
return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
|
||||
}
|
||||
|
||||
// isWhitespace returns true if the rune is a space, tab, newline or carriage return
|
||||
func isWhitespace(ch rune) bool {
|
||||
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
|
||||
}
|
||||
|
||||
// digitVal returns the integer value of a given octal,decimal or hexadecimal rune
|
||||
func digitVal(ch rune) int {
|
||||
switch {
|
||||
case '0' <= ch && ch <= '9':
|
||||
return int(ch - '0')
|
||||
case 'a' <= ch && ch <= 'f':
|
||||
return int(ch - 'a' + 10)
|
||||
case 'A' <= ch && ch <= 'F':
|
||||
return int(ch - 'A' + 10)
|
||||
}
|
||||
return 16 // larger than any legal digit val
|
||||
}
|
@ -1,46 +0,0 @@
|
||||
package token
|
||||
|
||||
import "fmt"
|
||||
|
||||
// Pos describes an arbitrary source position
|
||||
// including the file, line, and column location.
|
||||
// A Position is valid if the line number is > 0.
|
||||
type Pos struct {
|
||||
Filename string // filename, if any
|
||||
Offset int // offset, starting at 0
|
||||
Line int // line number, starting at 1
|
||||
Column int // column number, starting at 1 (character count)
|
||||
}
|
||||
|
||||
// IsValid returns true if the position is valid.
|
||||
func (p *Pos) IsValid() bool { return p.Line > 0 }
|
||||
|
||||
// String returns a string in one of several forms:
|
||||
//
|
||||
// file:line:column valid position with file name
|
||||
// line:column valid position without file name
|
||||
// file invalid position with file name
|
||||
// - invalid position without file name
|
||||
func (p Pos) String() string {
|
||||
s := p.Filename
|
||||
if p.IsValid() {
|
||||
if s != "" {
|
||||
s += ":"
|
||||
}
|
||||
s += fmt.Sprintf("%d:%d", p.Line, p.Column)
|
||||
}
|
||||
if s == "" {
|
||||
s = "-"
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// Before reports whether the position p is before u.
|
||||
func (p Pos) Before(u Pos) bool {
|
||||
return u.Offset > p.Offset || u.Line > p.Line
|
||||
}
|
||||
|
||||
// After reports whether the position p is after u.
|
||||
func (p Pos) After(u Pos) bool {
|
||||
return u.Offset < p.Offset || u.Line < p.Line
|
||||
}
|
@ -1,118 +0,0 @@
|
||||
package token
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
|
||||
hcltoken "github.com/hashicorp/hcl/hcl/token"
|
||||
)
|
||||
|
||||
// Token defines a single HCL token which can be obtained via the Scanner
|
||||
type Token struct {
|
||||
Type Type
|
||||
Pos Pos
|
||||
Text string
|
||||
}
|
||||
|
||||
// Type is the set of lexical tokens of the HCL (HashiCorp Configuration Language)
|
||||
type Type int
|
||||
|
||||
const (
|
||||
// Special tokens
|
||||
ILLEGAL Type = iota
|
||||
EOF
|
||||
|
||||
identifier_beg
|
||||
literal_beg
|
||||
NUMBER // 12345
|
||||
FLOAT // 123.45
|
||||
BOOL // true,false
|
||||
STRING // "abc"
|
||||
NULL // null
|
||||
literal_end
|
||||
identifier_end
|
||||
|
||||
operator_beg
|
||||
LBRACK // [
|
||||
LBRACE // {
|
||||
COMMA // ,
|
||||
PERIOD // .
|
||||
COLON // :
|
||||
|
||||
RBRACK // ]
|
||||
RBRACE // }
|
||||
|
||||
operator_end
|
||||
)
|
||||
|
||||
var tokens = [...]string{
|
||||
ILLEGAL: "ILLEGAL",
|
||||
|
||||
EOF: "EOF",
|
||||
|
||||
NUMBER: "NUMBER",
|
||||
FLOAT: "FLOAT",
|
||||
BOOL: "BOOL",
|
||||
STRING: "STRING",
|
||||
NULL: "NULL",
|
||||
|
||||
LBRACK: "LBRACK",
|
||||
LBRACE: "LBRACE",
|
||||
COMMA: "COMMA",
|
||||
PERIOD: "PERIOD",
|
||||
COLON: "COLON",
|
||||
|
||||
RBRACK: "RBRACK",
|
||||
RBRACE: "RBRACE",
|
||||
}
|
||||
|
||||
// String returns the string corresponding to the token tok.
|
||||
func (t Type) String() string {
|
||||
s := ""
|
||||
if 0 <= t && t < Type(len(tokens)) {
|
||||
s = tokens[t]
|
||||
}
|
||||
if s == "" {
|
||||
s = "token(" + strconv.Itoa(int(t)) + ")"
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// IsIdentifier returns true for tokens corresponding to identifiers and basic
|
||||
// type literals; it returns false otherwise.
|
||||
func (t Type) IsIdentifier() bool { return identifier_beg < t && t < identifier_end }
|
||||
|
||||
// IsLiteral returns true for tokens corresponding to basic type literals; it
|
||||
// returns false otherwise.
|
||||
func (t Type) IsLiteral() bool { return literal_beg < t && t < literal_end }
|
||||
|
||||
// IsOperator returns true for tokens corresponding to operators and
|
||||
// delimiters; it returns false otherwise.
|
||||
func (t Type) IsOperator() bool { return operator_beg < t && t < operator_end }
|
||||
|
||||
// String returns the token's literal text. Note that this is only
|
||||
// applicable for certain token types, such as token.IDENT,
|
||||
// token.STRING, etc..
|
||||
func (t Token) String() string {
|
||||
return fmt.Sprintf("%s %s %s", t.Pos.String(), t.Type.String(), t.Text)
|
||||
}
|
||||
|
||||
// HCLToken converts this token to an HCL token.
|
||||
//
|
||||
// The token type must be a literal type or this will panic.
|
||||
func (t Token) HCLToken() hcltoken.Token {
|
||||
switch t.Type {
|
||||
case BOOL:
|
||||
return hcltoken.Token{Type: hcltoken.BOOL, Text: t.Text}
|
||||
case FLOAT:
|
||||
return hcltoken.Token{Type: hcltoken.FLOAT, Text: t.Text}
|
||||
case NULL:
|
||||
return hcltoken.Token{Type: hcltoken.STRING, Text: ""}
|
||||
case NUMBER:
|
||||
return hcltoken.Token{Type: hcltoken.NUMBER, Text: t.Text}
|
||||
case STRING:
|
||||
return hcltoken.Token{Type: hcltoken.STRING, Text: t.Text, JSON: true}
|
||||
default:
|
||||
panic(fmt.Sprintf("unimplemented HCLToken for type: %s", t.Type))
|
||||
}
|
||||
}
|
@ -1,38 +0,0 @@
|
||||
package hcl
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
type lexModeValue byte
|
||||
|
||||
const (
|
||||
lexModeUnknown lexModeValue = iota
|
||||
lexModeHcl
|
||||
lexModeJson
|
||||
)
|
||||
|
||||
// lexMode returns whether we're going to be parsing in JSON
|
||||
// mode or HCL mode.
|
||||
func lexMode(v []byte) lexModeValue {
|
||||
var (
|
||||
r rune
|
||||
w int
|
||||
offset int
|
||||
)
|
||||
|
||||
for {
|
||||
r, w = utf8.DecodeRune(v[offset:])
|
||||
offset += w
|
||||
if unicode.IsSpace(r) {
|
||||
continue
|
||||
}
|
||||
if r == '{' {
|
||||
return lexModeJson
|
||||
}
|
||||
break
|
||||
}
|
||||
|
||||
return lexModeHcl
|
||||
}
|
@ -1,39 +0,0 @@
|
||||
package hcl
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/hashicorp/hcl/hcl/ast"
|
||||
hclParser "github.com/hashicorp/hcl/hcl/parser"
|
||||
jsonParser "github.com/hashicorp/hcl/json/parser"
|
||||
)
|
||||
|
||||
// ParseBytes accepts as input byte slice and returns ast tree.
|
||||
//
|
||||
// Input can be either JSON or HCL
|
||||
func ParseBytes(in []byte) (*ast.File, error) {
|
||||
return parse(in)
|
||||
}
|
||||
|
||||
// ParseString accepts input as a string and returns ast tree.
|
||||
func ParseString(input string) (*ast.File, error) {
|
||||
return parse([]byte(input))
|
||||
}
|
||||
|
||||
func parse(in []byte) (*ast.File, error) {
|
||||
switch lexMode(in) {
|
||||
case lexModeHcl:
|
||||
return hclParser.Parse(in)
|
||||
case lexModeJson:
|
||||
return jsonParser.Parse(in)
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("unknown config format")
|
||||
}
|
||||
|
||||
// Parse parses the given input and returns the root object.
|
||||
//
|
||||
// The input format can be either HCL or JSON.
|
||||
func Parse(input string) (*ast.File, error) {
|
||||
return parse([]byte(input))
|
||||
}
|
@ -0,0 +1,66 @@
|
||||
# HCL Changelog
|
||||
|
||||
## v2.4.0 (Apr 13, 2020)
|
||||
|
||||
### Enhancements
|
||||
|
||||
* The Unicode data tables that HCL uses to produce user-perceived "column" positions in diagnostics and other source ranges are now updated to Unicode 12.0.0, which will cause HCL to produce more accurate column numbers for combining characters introduced to Unicode since Unicode 9.0.0.
|
||||
|
||||
### Bugs Fixed
|
||||
|
||||
* json: Fix panic when parsing malformed JSON. ([#358](https://github.com/hashicorp/hcl/pull/358))
|
||||
|
||||
## v2.3.0 (Jan 3, 2020)
|
||||
|
||||
### Enhancements
|
||||
|
||||
* ext/tryfunc: Optional functions `try` and `can` to include in your `hcl.EvalContext` when evaluating expressions, which allow users to make decisions based on the success of expressions. ([#330](https://github.com/hashicorp/hcl/pull/330))
|
||||
* ext/typeexpr: Now has an optional function `convert` which you can include in your `hcl.EvalContext` when evaluating expressions, allowing users to convert values to specific type constraints using the type constraint expression syntax. ([#330](https://github.com/hashicorp/hcl/pull/330))
|
||||
* ext/typeexpr: A new `cty` capsule type `typeexpr.TypeConstraintType` which, when used as either a type constraint for a function parameter or as a type constraint for a `hcldec` attribute specification will cause the given expression to be interpreted as a type constraint expression rather than a value expression. ([#330](https://github.com/hashicorp/hcl/pull/330))
|
||||
* ext/customdecode: An optional extension that allows overriding the static decoding behavior for expressions either in function arguments or `hcldec` attribute specifications. ([#330](https://github.com/hashicorp/hcl/pull/330))
|
||||
* ext/customdecode: New `cty` capsuletypes `customdecode.ExpressionType` and `customdecode.ExpressionClosureType` which, when used as either a type constraint for a function parameter or as a type constraint for a `hcldec` attribute specification will cause the given expression (and, for the closure type, also the `hcl.EvalContext` it was evaluated in) to be captured for later analysis, rather than immediately evaluated. ([#330](https://github.com/hashicorp/hcl/pull/330))
|
||||
|
||||
## v2.2.0 (Dec 11, 2019)
|
||||
|
||||
### Enhancements
|
||||
|
||||
* hcldec: Attribute evaluation (as part of `AttrSpec` or `BlockAttrsSpec`) now captures expression evaluation metadata in any errors it produces during type conversions, allowing for better feedback in calling applications that are able to make use of this metadata when printing diagnostic messages. ([#329](https://github.com/hashicorp/hcl/pull/329))
|
||||
|
||||
### Bugs Fixed
|
||||
|
||||
* hclsyntax: `IndexExpr`, `SplatExpr`, and `RelativeTraversalExpr` will now report a source range that covers all of their child expression nodes. Previously they would report only the operator part, such as `["foo"]`, `[*]`, or `.foo`, which was problematic for callers using source ranges for code analysis. ([#328](https://github.com/hashicorp/hcl/pull/328))
|
||||
* hclwrite: Parser will no longer panic when the input includes index, splat, or relative traversal syntax. ([#328](https://github.com/hashicorp/hcl/pull/328))
|
||||
|
||||
## v2.1.0 (Nov 19, 2019)
|
||||
|
||||
### Enhancements
|
||||
|
||||
* gohcl: When decoding into a struct value with some fields already populated, those values will be retained if not explicitly overwritten in the given HCL body, with similar overriding/merging behavior as `json.Unmarshal` in the Go standard library.
|
||||
* hclwrite: New interface to set the expression for an attribute to be a raw token sequence, with no special processing. This has some caveats, so if you intend to use it please refer to the godoc comments. ([#320](https://github.com/hashicorp/hcl/pull/320))
|
||||
|
||||
### Bugs Fixed
|
||||
|
||||
* hclwrite: The `Body.Blocks` method was returing the blocks in an indefined order, rather than preserving the order of declaration in the source input. ([#313](https://github.com/hashicorp/hcl/pull/313))
|
||||
* hclwrite: The `TokensForTraversal` function (and thus in turn the `Body.SetAttributeTraversal` method) was not correctly handling index steps in traversals, and thus producing invalid results. ([#319](https://github.com/hashicorp/hcl/pull/319))
|
||||
|
||||
## v2.0.0 (Oct 2, 2019)
|
||||
|
||||
Initial release of HCL 2, which is a new implementating combining the HCL 1
|
||||
language with the HIL expression language to produce a single language
|
||||
supporting both nested configuration structures and arbitrary expressions.
|
||||
|
||||
HCL 2 has an entirely new Go library API and so is _not_ a drop-in upgrade
|
||||
relative to HCL 1. It's possible to import both versions of HCL into a single
|
||||
program using Go's _semantic import versioning_ mechanism:
|
||||
|
||||
```
|
||||
import (
|
||||
hcl1 "github.com/hashicorp/hcl"
|
||||
hcl2 "github.com/hashicorp/hcl/v2"
|
||||
)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
Prior to v2.0.0 there was not a curated changelog. Consult the git history
|
||||
from the latest v1.x.x tag for information on the changes to HCL 1.
|
1
vendor/github.com/hashicorp/hcl/LICENSE → vendor/github.com/hashicorp/hcl/v2/LICENSE
generated
vendored
1
vendor/github.com/hashicorp/hcl/LICENSE → vendor/github.com/hashicorp/hcl/v2/LICENSE
generated
vendored
@ -0,0 +1,205 @@
|
||||
# HCL
|
||||
|
||||
HCL is a toolkit for creating structured configuration languages that are
|
||||
both human- and machine-friendly, for use with command-line tools.
|
||||
Although intended to be generally useful, it is primarily targeted
|
||||
towards devops tools, servers, etc.
|
||||
|
||||
> **NOTE:** This is major version 2 of HCL, whose Go API is incompatible with
|
||||
> major version 1. Both versions are available for selection in Go Modules
|
||||
> projects. HCL 2 _cannot_ be imported from Go projects that are not using Go Modules. For more information, see
|
||||
> [our version selection guide](https://github.com/hashicorp/hcl/wiki/Version-Selection).
|
||||
|
||||
HCL has both a _native syntax_, intended to be pleasant to read and write for
|
||||
humans, and a JSON-based variant that is easier for machines to generate
|
||||
and parse.
|
||||
|
||||
The HCL native syntax is inspired by [libucl](https://github.com/vstakhov/libucl),
|
||||
[nginx configuration](http://nginx.org/en/docs/beginners_guide.html#conf_structure),
|
||||
and others.
|
||||
|
||||
It includes an expression syntax that allows basic inline computation and,
|
||||
with support from the calling application, use of variables and functions
|
||||
for more dynamic configuration languages.
|
||||
|
||||
HCL provides a set of constructs that can be used by a calling application to
|
||||
construct a configuration language. The application defines which attribute
|
||||
names and nested block types are expected, and HCL parses the configuration
|
||||
file, verifies that it conforms to the expected structure, and returns
|
||||
high-level objects that the application can use for further processing.
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
"github.com/hashicorp/hcl/v2/hclsimple"
|
||||
)
|
||||
|
||||
type Config struct {
|
||||
LogLevel string `hcl:"log_level"`
|
||||
}
|
||||
|
||||
func main() {
|
||||
var config Config
|
||||
err := hclsimple.DecodeFile("config.hcl", nil, &config)
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to load configuration: %s", err)
|
||||
}
|
||||
log.Printf("Configuration is %#v", config)
|
||||
}
|
||||
```
|
||||
|
||||
A lower-level API is available for applications that need more control over
|
||||
the parsing, decoding, and evaluation of configuration. For more information,
|
||||
see [the package documentation](https://pkg.go.dev/github.com/hashicorp/hcl/v2).
|
||||
|
||||
## Why?
|
||||
|
||||
Newcomers to HCL often ask: why not JSON, YAML, etc?
|
||||
|
||||
Whereas JSON and YAML are formats for serializing data structures, HCL is
|
||||
a syntax and API specifically designed for building structured configuration
|
||||
formats.
|
||||
|
||||
HCL attempts to strike a compromise between generic serialization formats
|
||||
such as JSON and configuration formats built around full programming languages
|
||||
such as Ruby. HCL syntax is designed to be easily read and written by humans,
|
||||
and allows _declarative_ logic to permit its use in more complex applications.
|
||||
|
||||
HCL is intended as a base syntax for configuration formats built
|
||||
around key-value pairs and hierarchical blocks whose structure is well-defined
|
||||
by the calling application, and this definition of the configuration structure
|
||||
allows for better error messages and more convenient definition within the
|
||||
calling application.
|
||||
|
||||
It can't be denied that JSON is very convenient as a _lingua franca_
|
||||
for interoperability between different pieces of software. Because of this,
|
||||
HCL defines a common configuration model that can be parsed from either its
|
||||
native syntax or from a well-defined equivalent JSON structure. This allows
|
||||
configuration to be provided as a mixture of human-authored configuration
|
||||
files in the native syntax and machine-generated files in JSON.
|
||||
|
||||
## Information Model and Syntax
|
||||
|
||||
HCL is built around two primary concepts: _attributes_ and _blocks_. In
|
||||
native syntax, a configuration file for a hypothetical application might look
|
||||
something like this:
|
||||
|
||||
```hcl
|
||||
io_mode = "async"
|
||||
|
||||
service "http" "web_proxy" {
|
||||
listen_addr = "127.0.0.1:8080"
|
||||
|
||||
process "main" {
|
||||
command = ["/usr/local/bin/awesome-app", "server"]
|
||||
}
|
||||
|
||||
process "mgmt" {
|
||||
command = ["/usr/local/bin/awesome-app", "mgmt"]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
The JSON equivalent of this configuration is the following:
|
||||
|
||||
```json
|
||||
{
|
||||
"io_mode": "async",
|
||||
"service": {
|
||||
"http": {
|
||||
"web_proxy": {
|
||||
"listen_addr": "127.0.0.1:8080",
|
||||
"process": {
|
||||
"main": {
|
||||
"command": ["/usr/local/bin/awesome-app", "server"]
|
||||
},
|
||||
"mgmt": {
|
||||
"command": ["/usr/local/bin/awesome-app", "mgmt"]
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Regardless of which syntax is used, the API within the calling application
|
||||
is the same. It can either work directly with the low-level attributes and
|
||||
blocks, for more advanced use-cases, or it can use one of the _decoder_
|
||||
packages to declaratively extract into either Go structs or dynamic value
|
||||
structures.
|
||||
|
||||
Attribute values can be expressions as well as just literal values:
|
||||
|
||||
```hcl
|
||||
# Arithmetic with literals and application-provided variables
|
||||
sum = 1 + addend
|
||||
|
||||
# String interpolation and templates
|
||||
message = "Hello, ${name}!"
|
||||
|
||||
# Application-provided functions
|
||||
shouty_message = upper(message)
|
||||
```
|
||||
|
||||
Although JSON syntax doesn't permit direct use of expressions, the interpolation
|
||||
syntax allows use of arbitrary expressions within JSON strings:
|
||||
|
||||
```json
|
||||
{
|
||||
"sum": "${1 + addend}",
|
||||
"message": "Hello, ${name}!",
|
||||
"shouty_message": "${upper(message)}"
|
||||
}
|
||||
```
|
||||
|
||||
For more information, see the detailed specifications:
|
||||
|
||||
* [Syntax-agnostic Information Model](spec.md)
|
||||
* [HCL Native Syntax](hclsyntax/spec.md)
|
||||
* [JSON Representation](json/spec.md)
|
||||
|
||||
## Changes in 2.0
|
||||
|
||||
Version 2.0 of HCL combines the features of HCL 1.0 with those of the
|
||||
interpolation language HIL to produce a single configuration language that
|
||||
supports arbitrary expressions.
|
||||
|
||||
This new version has a completely new parser and Go API, with no direct
|
||||
migration path. Although the syntax is similar, the implementation takes some
|
||||
very different approaches to improve on some "rough edges" that existed with
|
||||
the original implementation and to allow for more robust error handling.
|
||||
|
||||
It's possible to import both HCL 1 and HCL 2 into the same program using Go's
|
||||
_semantic import versioning_ mechanism:
|
||||
|
||||
```go
|
||||
import (
|
||||
hcl1 "github.com/hashicorp/hcl"
|
||||
hcl2 "github.com/hashicorp/hcl/v2"
|
||||
)
|
||||
```
|
||||
|
||||
## Acknowledgements
|
||||
|
||||
HCL was heavily inspired by [libucl](https://github.com/vstakhov/libucl),
|
||||
by [Vsevolod Stakhov](https://github.com/vstakhov).
|
||||
|
||||
HCL and HIL originate in [HashiCorp Terraform](https://terraform.io/),
|
||||
with the original parsers for each written by
|
||||
[Mitchell Hashimoto](https://github.com/mitchellh).
|
||||
|
||||
The original HCL parser was ported to pure Go (from yacc) by
|
||||
[Fatih Arslan](https://github.com/fatih). The structure-related portions of
|
||||
the new native syntax parser build on that work.
|
||||
|
||||
The original HIL parser was ported to pure Go (from yacc) by
|
||||
[Martin Atkins](https://github.com/apparentlymart). The expression-related
|
||||
portions of the new native syntax parser build on that work.
|
||||
|
||||
HCL 2, which merged the original HCL and HIL languages into this single new
|
||||
language, builds on design and prototyping work by
|
||||
[Martin Atkins](https://github.com/apparentlymart) in
|
||||
[zcl](https://github.com/zclconf/go-zcl).
|
@ -0,0 +1,13 @@
|
||||
build: off
|
||||
|
||||
clone_folder: c:\gopath\src\github.com\hashicorp\hcl
|
||||
|
||||
environment:
|
||||
GOPATH: c:\gopath
|
||||
GO111MODULE: on
|
||||
GOPROXY: https://goproxy.io
|
||||
|
||||
stack: go 1.12
|
||||
|
||||
test_script:
|
||||
- go test ./...
|
@ -0,0 +1,143 @@
|
||||
package hcl
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// DiagnosticSeverity represents the severity of a diagnostic.
|
||||
type DiagnosticSeverity int
|
||||
|
||||
const (
|
||||
// DiagInvalid is the invalid zero value of DiagnosticSeverity
|
||||
DiagInvalid DiagnosticSeverity = iota
|
||||
|
||||
// DiagError indicates that the problem reported by a diagnostic prevents
|
||||
// further progress in parsing and/or evaluating the subject.
|
||||
DiagError
|
||||
|
||||
// DiagWarning indicates that the problem reported by a diagnostic warrants
|
||||
// user attention but does not prevent further progress. It is most
|
||||
// commonly used for showing deprecation notices.
|
||||
DiagWarning
|
||||
)
|
||||
|
||||
// Diagnostic represents information to be presented to a user about an
|
||||
// error or anomoly in parsing or evaluating configuration.
|
||||
type Diagnostic struct {
|
||||
Severity DiagnosticSeverity
|
||||
|
||||
// Summary and Detail contain the English-language description of the
|
||||
// problem. Summary is a terse description of the general problem and
|
||||
// detail is a more elaborate, often-multi-sentence description of
|
||||
// the probem and what might be done to solve it.
|
||||
Summary string
|
||||
Detail string
|
||||
|
||||
// Subject and Context are both source ranges relating to the diagnostic.
|
||||
//
|
||||
// Subject is a tight range referring to exactly the construct that
|
||||
// is problematic, while Context is an optional broader range (which should
|
||||
// fully contain Subject) that ought to be shown around Subject when
|
||||
// generating isolated source-code snippets in diagnostic messages.
|
||||
// If Context is nil, the Subject is also the Context.
|
||||
//
|
||||
// Some diagnostics have no source ranges at all. If Context is set then
|
||||
// Subject should always also be set.
|
||||
Subject *Range
|
||||
Context *Range
|
||||
|
||||
// For diagnostics that occur when evaluating an expression, Expression
|
||||
// may refer to that expression and EvalContext may point to the
|
||||
// EvalContext that was active when evaluating it. This may allow for the
|
||||
// inclusion of additional useful information when rendering a diagnostic
|
||||
// message to the user.
|
||||
//
|
||||
// It is not always possible to select a single EvalContext for a
|
||||
// diagnostic, and so in some cases this field may be nil even when an
|
||||
// expression causes a problem.
|
||||
//
|
||||
// EvalContexts form a tree, so the given EvalContext may refer to a parent
|
||||
// which in turn refers to another parent, etc. For a full picture of all
|
||||
// of the active variables and functions the caller must walk up this
|
||||
// chain, preferring definitions that are "closer" to the expression in
|
||||
// case of colliding names.
|
||||
Expression Expression
|
||||
EvalContext *EvalContext
|
||||
}
|
||||
|
||||
// Diagnostics is a list of Diagnostic instances.
|
||||
type Diagnostics []*Diagnostic
|
||||
|
||||
// error implementation, so that diagnostics can be returned via APIs
|
||||
// that normally deal in vanilla Go errors.
|
||||
//
|
||||
// This presents only minimal context about the error, for compatibility
|
||||
// with usual expectations about how errors will present as strings.
|
||||
func (d *Diagnostic) Error() string {
|
||||
return fmt.Sprintf("%s: %s; %s", d.Subject, d.Summary, d.Detail)
|
||||
}
|
||||
|
||||
// error implementation, so that sets of diagnostics can be returned via
|
||||
// APIs that normally deal in vanilla Go errors.
|
||||
func (d Diagnostics) Error() string {
|
||||
count := len(d)
|
||||
switch {
|
||||
case count == 0:
|
||||
return "no diagnostics"
|
||||
case count == 1:
|
||||
return d[0].Error()
|
||||
default:
|
||||
return fmt.Sprintf("%s, and %d other diagnostic(s)", d[0].Error(), count-1)
|
||||
}
|
||||
}
|
||||
|
||||
// Append appends a new error to a Diagnostics and return the whole Diagnostics.
|
||||
//
|
||||
// This is provided as a convenience for returning from a function that
|
||||
// collects and then returns a set of diagnostics:
|
||||
//
|
||||
// return nil, diags.Append(&hcl.Diagnostic{ ... })
|
||||
//
|
||||
// Note that this modifies the array underlying the diagnostics slice, so
|
||||
// must be used carefully within a single codepath. It is incorrect (and rude)
|
||||
// to extend a diagnostics created by a different subsystem.
|
||||
func (d Diagnostics) Append(diag *Diagnostic) Diagnostics {
|
||||
return append(d, diag)
|
||||
}
|
||||
|
||||
// Extend concatenates the given Diagnostics with the receiver and returns
|
||||
// the whole new Diagnostics.
|
||||
//
|
||||
// This is similar to Append but accepts multiple diagnostics to add. It has
|
||||
// all the same caveats and constraints.
|
||||
func (d Diagnostics) Extend(diags Diagnostics) Diagnostics {
|
||||
return append(d, diags...)
|
||||
}
|
||||
|
||||
// HasErrors returns true if the receiver contains any diagnostics of
|
||||
// severity DiagError.
|
||||
func (d Diagnostics) HasErrors() bool {
|
||||
for _, diag := range d {
|
||||
if diag.Severity == DiagError {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (d Diagnostics) Errs() []error {
|
||||
var errs []error
|
||||
for _, diag := range d {
|
||||
if diag.Severity == DiagError {
|
||||
errs = append(errs, diag)
|
||||
}
|
||||
}
|
||||
|
||||
return errs
|
||||
}
|
||||
|
||||
// A DiagnosticWriter emits diagnostics somehow.
|
||||
type DiagnosticWriter interface {
|
||||
WriteDiagnostic(*Diagnostic) error
|
||||
WriteDiagnostics(Diagnostics) error
|
||||
}
|
@ -0,0 +1,311 @@
|
||||
package hcl
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
|
||||
wordwrap "github.com/mitchellh/go-wordwrap"
|
||||
"github.com/zclconf/go-cty/cty"
|
||||
)
|
||||
|
||||
type diagnosticTextWriter struct {
|
||||
files map[string]*File
|
||||
wr io.Writer
|
||||
width uint
|
||||
color bool
|
||||
}
|
||||
|
||||
// NewDiagnosticTextWriter creates a DiagnosticWriter that writes diagnostics
|
||||
// to the given writer as formatted text.
|
||||
//
|
||||
// It is designed to produce text appropriate to print in a monospaced font
|
||||
// in a terminal of a particular width, or optionally with no width limit.
|
||||
//
|
||||
// The given width may be zero to disable word-wrapping of the detail text
|
||||
// and truncation of source code snippets.
|
||||
//
|
||||
// If color is set to true, the output will include VT100 escape sequences to
|
||||
// color-code the severity indicators. It is suggested to turn this off if
|
||||
// the target writer is not a terminal.
|
||||
func NewDiagnosticTextWriter(wr io.Writer, files map[string]*File, width uint, color bool) DiagnosticWriter {
|
||||
return &diagnosticTextWriter{
|
||||
files: files,
|
||||
wr: wr,
|
||||
width: width,
|
||||
color: color,
|
||||
}
|
||||
}
|
||||
|
||||
func (w *diagnosticTextWriter) WriteDiagnostic(diag *Diagnostic) error {
|
||||
if diag == nil {
|
||||
return errors.New("nil diagnostic")
|
||||
}
|
||||
|
||||
var colorCode, highlightCode, resetCode string
|
||||
if w.color {
|
||||
switch diag.Severity {
|
||||
case DiagError:
|
||||
colorCode = "\x1b[31m"
|
||||
case DiagWarning:
|
||||
colorCode = "\x1b[33m"
|
||||
}
|
||||
resetCode = "\x1b[0m"
|
||||
highlightCode = "\x1b[1;4m"
|
||||
}
|
||||
|
||||
var severityStr string
|
||||
switch diag.Severity {
|
||||
case DiagError:
|
||||
severityStr = "Error"
|
||||
case DiagWarning:
|
||||
severityStr = "Warning"
|
||||
default:
|
||||
// should never happen
|
||||
severityStr = "???????"
|
||||
}
|
||||
|
||||
fmt.Fprintf(w.wr, "%s%s%s: %s\n\n", colorCode, severityStr, resetCode, diag.Summary)
|
||||
|
||||
if diag.Subject != nil {
|
||||
snipRange := *diag.Subject
|
||||
highlightRange := snipRange
|
||||
if diag.Context != nil {
|
||||
// Show enough of the source code to include both the subject
|
||||
// and context ranges, which overlap in all reasonable
|
||||
// situations.
|
||||
snipRange = RangeOver(snipRange, *diag.Context)
|
||||
}
|
||||
// We can't illustrate an empty range, so we'll turn such ranges into
|
||||
// single-character ranges, which might not be totally valid (may point
|
||||
// off the end of a line, or off the end of the file) but are good
|
||||
// enough for the bounds checks we do below.
|
||||
if snipRange.Empty() {
|
||||
snipRange.End.Byte++
|
||||
snipRange.End.Column++
|
||||
}
|
||||
if highlightRange.Empty() {
|
||||
highlightRange.End.Byte++
|
||||
highlightRange.End.Column++
|
||||
}
|
||||
|
||||
file := w.files[diag.Subject.Filename]
|
||||
if file == nil || file.Bytes == nil {
|
||||
fmt.Fprintf(w.wr, " on %s line %d:\n (source code not available)\n\n", diag.Subject.Filename, diag.Subject.Start.Line)
|
||||
} else {
|
||||
|
||||
var contextLine string
|
||||
if diag.Subject != nil {
|
||||
contextLine = contextString(file, diag.Subject.Start.Byte)
|
||||
if contextLine != "" {
|
||||
contextLine = ", in " + contextLine
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Fprintf(w.wr, " on %s line %d%s:\n", diag.Subject.Filename, diag.Subject.Start.Line, contextLine)
|
||||
|
||||
src := file.Bytes
|
||||
sc := NewRangeScanner(src, diag.Subject.Filename, bufio.ScanLines)
|
||||
|
||||
for sc.Scan() {
|
||||
lineRange := sc.Range()
|
||||
if !lineRange.Overlaps(snipRange) {
|
||||
continue
|
||||
}
|
||||
|
||||
beforeRange, highlightedRange, afterRange := lineRange.PartitionAround(highlightRange)
|
||||
if highlightedRange.Empty() {
|
||||
fmt.Fprintf(w.wr, "%4d: %s\n", lineRange.Start.Line, sc.Bytes())
|
||||
} else {
|
||||
before := beforeRange.SliceBytes(src)
|
||||
highlighted := highlightedRange.SliceBytes(src)
|
||||
after := afterRange.SliceBytes(src)
|
||||
fmt.Fprintf(
|
||||
w.wr, "%4d: %s%s%s%s%s\n",
|
||||
lineRange.Start.Line,
|
||||
before,
|
||||
highlightCode, highlighted, resetCode,
|
||||
after,
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
w.wr.Write([]byte{'\n'})
|
||||
}
|
||||
|
||||
if diag.Expression != nil && diag.EvalContext != nil {
|
||||
// We will attempt to render the values for any variables
|
||||
// referenced in the given expression as additional context, for
|
||||
// situations where the same expression is evaluated multiple
|
||||
// times in different scopes.
|
||||
expr := diag.Expression
|
||||
ctx := diag.EvalContext
|
||||
|
||||
vars := expr.Variables()
|
||||
stmts := make([]string, 0, len(vars))
|
||||
seen := make(map[string]struct{}, len(vars))
|
||||
for _, traversal := range vars {
|
||||
val, diags := traversal.TraverseAbs(ctx)
|
||||
if diags.HasErrors() {
|
||||
// Skip anything that generates errors, since we probably
|
||||
// already have the same error in our diagnostics set
|
||||
// already.
|
||||
continue
|
||||
}
|
||||
|
||||
traversalStr := w.traversalStr(traversal)
|
||||
if _, exists := seen[traversalStr]; exists {
|
||||
continue // don't show duplicates when the same variable is referenced multiple times
|
||||
}
|
||||
switch {
|
||||
case !val.IsKnown():
|
||||
// Can't say anything about this yet, then.
|
||||
continue
|
||||
case val.IsNull():
|
||||
stmts = append(stmts, fmt.Sprintf("%s set to null", traversalStr))
|
||||
default:
|
||||
stmts = append(stmts, fmt.Sprintf("%s as %s", traversalStr, w.valueStr(val)))
|
||||
}
|
||||
seen[traversalStr] = struct{}{}
|
||||
}
|
||||
|
||||
sort.Strings(stmts) // FIXME: Should maybe use a traversal-aware sort that can sort numeric indexes properly?
|
||||
last := len(stmts) - 1
|
||||
|
||||
for i, stmt := range stmts {
|
||||
switch i {
|
||||
case 0:
|
||||
w.wr.Write([]byte{'w', 'i', 't', 'h', ' '})
|
||||
default:
|
||||
w.wr.Write([]byte{' ', ' ', ' ', ' ', ' '})
|
||||
}
|
||||
w.wr.Write([]byte(stmt))
|
||||
switch i {
|
||||
case last:
|
||||
w.wr.Write([]byte{'.', '\n', '\n'})
|
||||
default:
|
||||
w.wr.Write([]byte{',', '\n'})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if diag.Detail != "" {
|
||||
detail := diag.Detail
|
||||
if w.width != 0 {
|
||||
detail = wordwrap.WrapString(detail, w.width)
|
||||
}
|
||||
fmt.Fprintf(w.wr, "%s\n\n", detail)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *diagnosticTextWriter) WriteDiagnostics(diags Diagnostics) error {
|
||||
for _, diag := range diags {
|
||||
err := w.WriteDiagnostic(diag)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *diagnosticTextWriter) traversalStr(traversal Traversal) string {
|
||||
// This is a specialized subset of traversal rendering tailored to
|
||||
// producing helpful contextual messages in diagnostics. It is not
|
||||
// comprehensive nor intended to be used for other purposes.
|
||||
|
||||
var buf bytes.Buffer
|
||||
for _, step := range traversal {
|
||||
switch tStep := step.(type) {
|
||||
case TraverseRoot:
|
||||
buf.WriteString(tStep.Name)
|
||||
case TraverseAttr:
|
||||
buf.WriteByte('.')
|
||||
buf.WriteString(tStep.Name)
|
||||
case TraverseIndex:
|
||||
buf.WriteByte('[')
|
||||
if keyTy := tStep.Key.Type(); keyTy.IsPrimitiveType() {
|
||||
buf.WriteString(w.valueStr(tStep.Key))
|
||||
} else {
|
||||
// We'll just use a placeholder for more complex values,
|
||||
// since otherwise our result could grow ridiculously long.
|
||||
buf.WriteString("...")
|
||||
}
|
||||
buf.WriteByte(']')
|
||||
}
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
func (w *diagnosticTextWriter) valueStr(val cty.Value) string {
|
||||
// This is a specialized subset of value rendering tailored to producing
|
||||
// helpful but concise messages in diagnostics. It is not comprehensive
|
||||
// nor intended to be used for other purposes.
|
||||
|
||||
ty := val.Type()
|
||||
switch {
|
||||
case val.IsNull():
|
||||
return "null"
|
||||
case !val.IsKnown():
|
||||
// Should never happen here because we should filter before we get
|
||||
// in here, but we'll do something reasonable rather than panic.
|
||||
return "(not yet known)"
|
||||
case ty == cty.Bool:
|
||||
if val.True() {
|
||||
return "true"
|
||||
}
|
||||
return "false"
|
||||
case ty == cty.Number:
|
||||
bf := val.AsBigFloat()
|
||||
return bf.Text('g', 10)
|
||||
case ty == cty.String:
|
||||
// Go string syntax is not exactly the same as HCL native string syntax,
|
||||
// but we'll accept the minor edge-cases where this is different here
|
||||
// for now, just to get something reasonable here.
|
||||
return fmt.Sprintf("%q", val.AsString())
|
||||
case ty.IsCollectionType() || ty.IsTupleType():
|
||||
l := val.LengthInt()
|
||||
switch l {
|
||||
case 0:
|
||||
return "empty " + ty.FriendlyName()
|
||||
case 1:
|
||||
return ty.FriendlyName() + " with 1 element"
|
||||
default:
|
||||
return fmt.Sprintf("%s with %d elements", ty.FriendlyName(), l)
|
||||
}
|
||||
case ty.IsObjectType():
|
||||
atys := ty.AttributeTypes()
|
||||
l := len(atys)
|
||||
switch l {
|
||||
case 0:
|
||||
return "object with no attributes"
|
||||
case 1:
|
||||
var name string
|
||||
for k := range atys {
|
||||
name = k
|
||||
}
|
||||
return fmt.Sprintf("object with 1 attribute %q", name)
|
||||
default:
|
||||
return fmt.Sprintf("object with %d attributes", l)
|
||||
}
|
||||
default:
|
||||
return ty.FriendlyName()
|
||||
}
|
||||
}
|
||||
|
||||
func contextString(file *File, offset int) string {
|
||||
type contextStringer interface {
|
||||
ContextString(offset int) string
|
||||
}
|
||||
|
||||
if cser, ok := file.Nav.(contextStringer); ok {
|
||||
return cser.ContextString(offset)
|
||||
}
|
||||
return ""
|
||||
}
|
@ -0,0 +1,24 @@
|
||||
package hcl
|
||||
|
||||
import (
|
||||
"github.com/agext/levenshtein"
|
||||
)
|
||||
|
||||
// nameSuggestion tries to find a name from the given slice of suggested names
|
||||
// that is close to the given name and returns it if found. If no suggestion
|
||||
// is close enough, returns the empty string.
|
||||
//
|
||||
// The suggestions are tried in order, so earlier suggestions take precedence
|
||||
// if the given string is similar to two or more suggestions.
|
||||
//
|
||||
// This function is intended to be used with a relatively-small number of
|
||||
// suggestions. It's not optimized for hundreds or thousands of them.
|
||||
func nameSuggestion(given string, suggestions []string) string {
|
||||
for _, suggestion := range suggestions {
|
||||
dist := levenshtein.Distance(given, suggestion, nil)
|
||||
if dist < 3 { // threshold determined experimentally
|
||||
return suggestion
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
@ -0,0 +1,34 @@
|
||||
// Package hcl contains the main modelling types and general utility functions
|
||||
// for HCL.
|
||||
//
|
||||
// For a simple entry point into HCL, see the package in the subdirectory
|
||||
// "hclsimple", which has an opinionated function Decode that can decode HCL
|
||||
// configurations in either native HCL syntax or JSON syntax into a Go struct
|
||||
// type:
|
||||
//
|
||||
// package main
|
||||
//
|
||||
// import (
|
||||
// "log"
|
||||
// "github.com/hashicorp/hcl/v2/hclsimple"
|
||||
// )
|
||||
//
|
||||
// type Config struct {
|
||||
// LogLevel string `hcl:"log_level"`
|
||||
// }
|
||||
//
|
||||
// func main() {
|
||||
// var config Config
|
||||
// err := hclsimple.DecodeFile("config.hcl", nil, &config)
|
||||
// if err != nil {
|
||||
// log.Fatalf("Failed to load configuration: %s", err)
|
||||
// }
|
||||
// log.Printf("Configuration is %#v", config)
|
||||
// }
|
||||
//
|
||||
// If your application needs more control over the evaluation of the
|
||||
// configuration, you can use the functions in the subdirectories hclparse,
|
||||
// gohcl, hcldec, etc. Splitting the handling of configuration into multiple
|
||||
// phases allows for advanced patterns such as allowing expressions in one
|
||||
// part of the configuration to refer to data defined in another part.
|
||||
package hcl
|
@ -0,0 +1,25 @@
|
||||
package hcl
|
||||
|
||||
import (
|
||||
"github.com/zclconf/go-cty/cty"
|
||||
"github.com/zclconf/go-cty/cty/function"
|
||||
)
|
||||
|
||||
// An EvalContext provides the variables and functions that should be used
|
||||
// to evaluate an expression.
|
||||
type EvalContext struct {
|
||||
Variables map[string]cty.Value
|
||||
Functions map[string]function.Function
|
||||
parent *EvalContext
|
||||
}
|
||||
|
||||
// NewChild returns a new EvalContext that is a child of the receiver.
|
||||
func (ctx *EvalContext) NewChild() *EvalContext {
|
||||
return &EvalContext{parent: ctx}
|
||||
}
|
||||
|
||||
// Parent returns the parent of the receiver, or nil if the receiver has
|
||||
// no parent.
|
||||
func (ctx *EvalContext) Parent() *EvalContext {
|
||||
return ctx.parent
|
||||
}
|
@ -0,0 +1,46 @@
|
||||
package hcl
|
||||
|
||||
// ExprCall tests if the given expression is a function call and,
|
||||
// if so, extracts the function name and the expressions that represent
|
||||
// the arguments. If the given expression is not statically a function call,
|
||||
// error diagnostics are returned.
|
||||
//
|
||||
// A particular Expression implementation can support this function by
|
||||
// offering a method called ExprCall that takes no arguments and returns
|
||||
// *StaticCall. This method should return nil if a static call cannot
|
||||
// be extracted. Alternatively, an implementation can support
|
||||
// UnwrapExpression to delegate handling of this function to a wrapped
|
||||
// Expression object.
|
||||
func ExprCall(expr Expression) (*StaticCall, Diagnostics) {
|
||||
type exprCall interface {
|
||||
ExprCall() *StaticCall
|
||||
}
|
||||
|
||||
physExpr := UnwrapExpressionUntil(expr, func(expr Expression) bool {
|
||||
_, supported := expr.(exprCall)
|
||||
return supported
|
||||
})
|
||||
|
||||
if exC, supported := physExpr.(exprCall); supported {
|
||||
if call := exC.ExprCall(); call != nil {
|
||||
return call, nil
|
||||
}
|
||||
}
|
||||
return nil, Diagnostics{
|
||||
&Diagnostic{
|
||||
Severity: DiagError,
|
||||
Summary: "Invalid expression",
|
||||
Detail: "A static function call is required.",
|
||||
Subject: expr.StartRange().Ptr(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// StaticCall represents a function call that was extracted statically from
|
||||
// an expression using ExprCall.
|
||||
type StaticCall struct {
|
||||
Name string
|
||||
NameRange Range
|
||||
Arguments []Expression
|
||||
ArgsRange Range
|
||||
}
|
@ -0,0 +1,37 @@
|
||||
package hcl
|
||||
|
||||
// ExprList tests if the given expression is a static list construct and,
|
||||
// if so, extracts the expressions that represent the list elements.
|
||||
// If the given expression is not a static list, error diagnostics are
|
||||
// returned.
|
||||
//
|
||||
// A particular Expression implementation can support this function by
|
||||
// offering a method called ExprList that takes no arguments and returns
|
||||
// []Expression. This method should return nil if a static list cannot
|
||||
// be extracted. Alternatively, an implementation can support
|
||||
// UnwrapExpression to delegate handling of this function to a wrapped
|
||||
// Expression object.
|
||||
func ExprList(expr Expression) ([]Expression, Diagnostics) {
|
||||
type exprList interface {
|
||||
ExprList() []Expression
|
||||
}
|
||||
|
||||
physExpr := UnwrapExpressionUntil(expr, func(expr Expression) bool {
|
||||
_, supported := expr.(exprList)
|
||||
return supported
|
||||
})
|
||||
|
||||
if exL, supported := physExpr.(exprList); supported {
|
||||
if list := exL.ExprList(); list != nil {
|
||||
return list, nil
|
||||
}
|
||||
}
|
||||
return nil, Diagnostics{
|
||||
&Diagnostic{
|
||||
Severity: DiagError,
|
||||
Summary: "Invalid expression",
|
||||
Detail: "A static list expression is required.",
|
||||
Subject: expr.StartRange().Ptr(),
|
||||
},
|
||||
}
|
||||
}
|
@ -0,0 +1,44 @@
|
||||
package hcl
|
||||
|
||||
// ExprMap tests if the given expression is a static map construct and,
|
||||
// if so, extracts the expressions that represent the map elements.
|
||||
// If the given expression is not a static map, error diagnostics are
|
||||
// returned.
|
||||
//
|
||||
// A particular Expression implementation can support this function by
|
||||
// offering a method called ExprMap that takes no arguments and returns
|
||||
// []KeyValuePair. This method should return nil if a static map cannot
|
||||
// be extracted. Alternatively, an implementation can support
|
||||
// UnwrapExpression to delegate handling of this function to a wrapped
|
||||
// Expression object.
|
||||
func ExprMap(expr Expression) ([]KeyValuePair, Diagnostics) {
|
||||
type exprMap interface {
|
||||
ExprMap() []KeyValuePair
|
||||
}
|
||||
|
||||
physExpr := UnwrapExpressionUntil(expr, func(expr Expression) bool {
|
||||
_, supported := expr.(exprMap)
|
||||
return supported
|
||||
})
|
||||
|
||||
if exM, supported := physExpr.(exprMap); supported {
|
||||
if pairs := exM.ExprMap(); pairs != nil {
|
||||
return pairs, nil
|
||||
}
|
||||
}
|
||||
return nil, Diagnostics{
|
||||
&Diagnostic{
|
||||
Severity: DiagError,
|
||||
Summary: "Invalid expression",
|
||||
Detail: "A static map expression is required.",
|
||||
Subject: expr.StartRange().Ptr(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// KeyValuePair represents a pair of expressions that serve as a single item
|
||||
// within a map or object definition construct.
|
||||
type KeyValuePair struct {
|
||||
Key Expression
|
||||
Value Expression
|
||||
}
|
@ -0,0 +1,68 @@
|
||||
package hcl
|
||||
|
||||
type unwrapExpression interface {
|
||||
UnwrapExpression() Expression
|
||||
}
|
||||
|
||||
// UnwrapExpression removes any "wrapper" expressions from the given expression,
|
||||
// to recover the representation of the physical expression given in source
|
||||
// code.
|
||||
//
|
||||
// Sometimes wrapping expressions are used to modify expression behavior, e.g.
|
||||
// in extensions that need to make some local variables available to certain
|
||||
// sub-trees of the configuration. This can make it difficult to reliably
|
||||
// type-assert on the physical AST types used by the underlying syntax.
|
||||
//
|
||||
// Unwrapping an expression may modify its behavior by stripping away any
|
||||
// additional constraints or capabilities being applied to the Value and
|
||||
// Variables methods, so this function should generally only be used prior
|
||||
// to operations that concern themselves with the static syntax of the input
|
||||
// configuration, and not with the effective value of the expression.
|
||||
//
|
||||
// Wrapper expression types must support unwrapping by implementing a method
|
||||
// called UnwrapExpression that takes no arguments and returns the embedded
|
||||
// Expression. Implementations of this method should peel away only one level
|
||||
// of wrapping, if multiple are present. This method may return nil to
|
||||
// indicate _dynamically_ that no wrapped expression is available, for
|
||||
// expression types that might only behave as wrappers in certain cases.
|
||||
func UnwrapExpression(expr Expression) Expression {
|
||||
for {
|
||||
unwrap, wrapped := expr.(unwrapExpression)
|
||||
if !wrapped {
|
||||
return expr
|
||||
}
|
||||
innerExpr := unwrap.UnwrapExpression()
|
||||
if innerExpr == nil {
|
||||
return expr
|
||||
}
|
||||
expr = innerExpr
|
||||
}
|
||||
}
|
||||
|
||||
// UnwrapExpressionUntil is similar to UnwrapExpression except it gives the
|
||||
// caller an opportunity to test each level of unwrapping to see each a
|
||||
// particular expression is accepted.
|
||||
//
|
||||
// This could be used, for example, to unwrap until a particular other
|
||||
// interface is satisfied, regardless of wrap wrapping level it is satisfied
|
||||
// at.
|
||||
//
|
||||
// The given callback function must return false to continue wrapping, or
|
||||
// true to accept and return the proposed expression given. If the callback
|
||||
// function rejects even the final, physical expression then the result of
|
||||
// this function is nil.
|
||||
func UnwrapExpressionUntil(expr Expression, until func(Expression) bool) Expression {
|
||||
for {
|
||||
if until(expr) {
|
||||
return expr
|
||||
}
|
||||
unwrap, wrapped := expr.(unwrapExpression)
|
||||
if !wrapped {
|
||||
return nil
|
||||
}
|
||||
expr = unwrap.UnwrapExpression()
|
||||
if expr == nil {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,209 @@
|
||||
# HCL Custom Static Decoding Extension
|
||||
|
||||
This HCL extension provides a mechanism for defining arguments in an HCL-based
|
||||
language whose values are derived using custom decoding rules against the
|
||||
HCL expression syntax, overriding the usual behavior of normal expression
|
||||
evaluation.
|
||||
|
||||
"Arguments", for the purpose of this extension, currently includes the
|
||||
following two contexts:
|
||||
|
||||
* For applications using `hcldec` for dynamic decoding, a `hcldec.AttrSpec`
|
||||
or `hcldec.BlockAttrsSpec` can be given a special type constraint that
|
||||
opts in to custom decoding behavior for the attribute(s) that are selected
|
||||
by that specification.
|
||||
|
||||
* When working with the HCL native expression syntax, a function given in
|
||||
the `hcl.EvalContext` during evaluation can have parameters with special
|
||||
type constraints that opt in to custom decoding behavior for the argument
|
||||
expression associated with that parameter in any call.
|
||||
|
||||
The above use-cases are rather abstract, so we'll consider a motivating
|
||||
real-world example: sometimes we (language designers) need to allow users
|
||||
to specify type constraints directly in the language itself, such as in
|
||||
[Terraform's Input Variables](https://www.terraform.io/docs/configuration/variables.html).
|
||||
Terraform's `variable` blocks include an argument called `type` which takes
|
||||
a type constraint given using HCL expression building-blocks as defined by
|
||||
[the HCL `typeexpr` extension](../typeexpr/README.md).
|
||||
|
||||
A "type constraint expression" of that sort is not an expression intended to
|
||||
be evaluated in the usual way. Instead, the physical expression is
|
||||
deconstructed using [the static analysis operations](../../spec.md#static-analysis)
|
||||
to produce a `cty.Type` as the result, rather than a `cty.Value`.
|
||||
|
||||
The purpose of this Custom Static Decoding Extension, then, is to provide a
|
||||
bridge to allow that sort of custom decoding to be used via mechanisms that
|
||||
normally deal in `cty.Value`, such as `hcldec` and native syntax function
|
||||
calls as listed above.
|
||||
|
||||
(Note: [`gohcl`](https://pkg.go.dev/github.com/hashicorp/hcl/v2/gohcl) has
|
||||
its own mechanism to support this use case, exploiting the fact that it is
|
||||
working directly with "normal" Go types. Decoding into a struct field of
|
||||
type `hcl.Expression` obtains the expression directly without evaluating it
|
||||
first. The Custom Static Decoding Extension is not necessary for that `gohcl`
|
||||
technique. You can also implement custom decoding by working directly with
|
||||
the lowest-level HCL API, which separates extraction of and evaluation of
|
||||
expressions into two steps.)
|
||||
|
||||
## Custom Decoding Types
|
||||
|
||||
This extension relies on a convention implemented in terms of
|
||||
[_Capsule Types_ in the underlying `cty` type system](https://github.com/zclconf/go-cty/blob/master/docs/types.md#capsule-types). `cty` allows a capsule type to carry arbitrary
|
||||
extension metadata values as an aid to creating higher-level abstractions like
|
||||
this extension.
|
||||
|
||||
A custom argument decoding mode, then, is implemented by creating a new `cty`
|
||||
capsule type that implements the `ExtensionData` custom operation to return
|
||||
a decoding function when requested. For example:
|
||||
|
||||
```go
|
||||
var keywordType cty.Type
|
||||
keywordType = cty.CapsuleWithOps("keyword", reflect.TypeOf(""), &cty.CapsuleOps{
|
||||
ExtensionData: func(key interface{}) interface{} {
|
||||
switch key {
|
||||
case customdecode.CustomExpressionDecoder:
|
||||
return customdecode.CustomExpressionDecoderFunc(
|
||||
func(expr hcl.Expression, ctx *hcl.EvalContext) (cty.Value, hcl.Diagnostics) {
|
||||
var diags hcl.Diagnostics
|
||||
kw := hcl.ExprAsKeyword(expr)
|
||||
if kw == "" {
|
||||
diags = append(diags, &hcl.Diagnostic{
|
||||
Severity: hcl.DiagError,
|
||||
Summary: "Invalid keyword",
|
||||
Detail: "A keyword is required",
|
||||
Subject: expr.Range().Ptr(),
|
||||
})
|
||||
return cty.UnkownVal(keywordType), diags
|
||||
}
|
||||
return cty.CapsuleVal(keywordType, &kw)
|
||||
},
|
||||
)
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
},
|
||||
})
|
||||
```
|
||||
|
||||
The boilerplate here is a bit fussy, but the important part for our purposes
|
||||
is the `case customdecode.CustomExpressionDecoder:` clause, which uses
|
||||
a custom extension key type defined in this package to recognize when a
|
||||
component implementing this extension is checking to see if a target type
|
||||
has a custom decode implementation.
|
||||
|
||||
In the above case we've defined a type that decodes expressions as static
|
||||
keywords, so a keyword like `foo` would decode as an encapsulated `"foo"`
|
||||
string, while any other sort of expression like `"baz"` or `1 + 1` would
|
||||
return an error.
|
||||
|
||||
We could then use `keywordType` as a type constraint either for a function
|
||||
parameter or a `hcldec` attribute specification, which would require the
|
||||
argument for that function parameter or the expression for the matching
|
||||
attributes to be a static keyword, rather than an arbitrary expression.
|
||||
For example, in a `hcldec.AttrSpec`:
|
||||
|
||||
```go
|
||||
keywordSpec := &hcldec.AttrSpec{
|
||||
Name: "keyword",
|
||||
Type: keywordType,
|
||||
}
|
||||
```
|
||||
|
||||
The above would accept input like the following and would set its result to
|
||||
a `cty.Value` of `keywordType`, after decoding:
|
||||
|
||||
```hcl
|
||||
keyword = foo
|
||||
```
|
||||
|
||||
## The Expression and Expression Closure `cty` types
|
||||
|
||||
Building on the above, this package also includes two capsule types that use
|
||||
the above mechanism to allow calling applications to capture expressions
|
||||
directly and thus defer analysis to a later step, after initial decoding.
|
||||
|
||||
The `customdecode.ExpressionType` type encapsulates an `hcl.Expression` alone,
|
||||
for situations like our type constraint expression example above where it's
|
||||
the static structure of the expression we want to inspect, and thus any
|
||||
variables and functions defined in the evaluation context are irrelevant.
|
||||
|
||||
The `customdecode.ExpressionClosureType` type encapsulates a
|
||||
`*customdecode.ExpressionClosure` value, which binds the given expression to
|
||||
the `hcl.EvalContext` it was asked to evaluate against and thus allows the
|
||||
receiver of that result to later perform normal evaluation of the expression
|
||||
with all the same variables and functions that would've been available to it
|
||||
naturally.
|
||||
|
||||
Both of these types can be used as type constraints either for `hcldec`
|
||||
attribute specifications or for function arguments. Here's an example of
|
||||
`ExpressionClosureType` to implement a function that can evaluate
|
||||
an expression with some additional variables defined locally, which we'll
|
||||
call the `with(...)` function:
|
||||
|
||||
```go
|
||||
var WithFunc = function.New(&function.Spec{
|
||||
Params: []function.Parameter{
|
||||
{
|
||||
Name: "variables",
|
||||
Type: cty.DynamicPseudoType,
|
||||
},
|
||||
{
|
||||
Name: "expression",
|
||||
Type: customdecode.ExpressionClosureType,
|
||||
},
|
||||
},
|
||||
Type: func(args []cty.Value) (cty.Type, error) {
|
||||
varsVal := args[0]
|
||||
exprVal := args[1]
|
||||
if !varsVal.Type().IsObjectType() {
|
||||
return cty.NilVal, function.NewArgErrorf(0, "must be an object defining local variables")
|
||||
}
|
||||
if !varsVal.IsKnown() {
|
||||
// We can't predict our result type until the variables object
|
||||
// is known.
|
||||
return cty.DynamicPseudoType, nil
|
||||
}
|
||||
vars := varsVal.AsValueMap()
|
||||
closure := customdecode.ExpressionClosureFromVal(exprVal)
|
||||
result, err := evalWithLocals(vars, closure)
|
||||
if err != nil {
|
||||
return cty.NilVal, err
|
||||
}
|
||||
return result.Type(), nil
|
||||
},
|
||||
Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) {
|
||||
varsVal := args[0]
|
||||
exprVal := args[1]
|
||||
vars := varsVal.AsValueMap()
|
||||
closure := customdecode.ExpressionClosureFromVal(exprVal)
|
||||
return evalWithLocals(vars, closure)
|
||||
},
|
||||
})
|
||||
|
||||
func evalWithLocals(locals map[string]cty.Value, closure *customdecode.ExpressionClosure) (cty.Value, error) {
|
||||
childCtx := closure.EvalContext.NewChild()
|
||||
childCtx.Variables = locals
|
||||
val, diags := closure.Expression.Value(childCtx)
|
||||
if diags.HasErrors() {
|
||||
return cty.NilVal, function.NewArgErrorf(1, "couldn't evaluate expression: %s", diags.Error())
|
||||
}
|
||||
return val, nil
|
||||
}
|
||||
```
|
||||
|
||||
If the above function were placed into an `hcl.EvalContext` as `with`, it
|
||||
could be used in a native syntax call to that function as follows:
|
||||
|
||||
```hcl
|
||||
foo = with({name = "Cory"}, "${greeting}, ${name}!")
|
||||
```
|
||||
|
||||
The above assumes a variable in the main context called `greeting`, to which
|
||||
the `with` function adds `name` before evaluating the expression given in
|
||||
its second argument. This makes that second argument context-sensitive -- it
|
||||
would behave differently if the user wrote the same thing somewhere else -- so
|
||||
this capability should be used with care to make sure it doesn't cause confusion
|
||||
for the end-users of your language.
|
||||
|
||||
There are some other examples of this capability to evaluate expressions in
|
||||
unusual ways in the `tryfunc` directory that is a sibling of this one.
|
@ -0,0 +1,56 @@
|
||||
// Package customdecode contains a HCL extension that allows, in certain
|
||||
// contexts, expression evaluation to be overridden by custom static analysis.
|
||||
//
|
||||
// This mechanism is only supported in certain specific contexts where
|
||||
// expressions are decoded with a specific target type in mind. For more
|
||||
// information, see the documentation on CustomExpressionDecoder.
|
||||
package customdecode
|
||||
|
||||
import (
|
||||
"github.com/hashicorp/hcl/v2"
|
||||
"github.com/zclconf/go-cty/cty"
|
||||
)
|
||||
|
||||
type customDecoderImpl int
|
||||
|
||||
// CustomExpressionDecoder is a value intended to be used as a cty capsule
|
||||
// type ExtensionData key for capsule types whose values are to be obtained
|
||||
// by static analysis of an expression rather than normal evaluation of that
|
||||
// expression.
|
||||
//
|
||||
// When a cooperating capsule type is asked for ExtensionData with this key,
|
||||
// it must return a non-nil CustomExpressionDecoderFunc value.
|
||||
//
|
||||
// This mechanism is not universally supported; instead, it's handled in a few
|
||||
// specific places where expressions are evaluated with the intent of producing
|
||||
// a cty.Value of a type given by the calling application.
|
||||
//
|
||||
// Specifically, this currently works for type constraints given in
|
||||
// hcldec.AttrSpec and hcldec.BlockAttrsSpec, and it works for arguments to
|
||||
// function calls in the HCL native syntax. HCL extensions implemented outside
|
||||
// of the main HCL module may also implement this; consult their own
|
||||
// documentation for details.
|
||||
const CustomExpressionDecoder = customDecoderImpl(1)
|
||||
|
||||
// CustomExpressionDecoderFunc is the type of value that must be returned by
|
||||
// a capsule type handling the key CustomExpressionDecoder in its ExtensionData
|
||||
// implementation.
|
||||
//
|
||||
// If no error diagnostics are returned, the result value MUST be of the
|
||||
// capsule type that the decoder function was derived from. If the returned
|
||||
// error diagnostics prevent producing a value at all, return cty.NilVal.
|
||||
type CustomExpressionDecoderFunc func(expr hcl.Expression, ctx *hcl.EvalContext) (cty.Value, hcl.Diagnostics)
|
||||
|
||||
// CustomExpressionDecoderForType takes any cty type and returns its
|
||||
// custom expression decoder implementation if it has one. If it is not a
|
||||
// capsule type or it does not implement a custom expression decoder, this
|
||||
// function returns nil.
|
||||
func CustomExpressionDecoderForType(ty cty.Type) CustomExpressionDecoderFunc {
|
||||
if !ty.IsCapsuleType() {
|
||||
return nil
|
||||
}
|
||||
if fn, ok := ty.CapsuleExtensionData(CustomExpressionDecoder).(CustomExpressionDecoderFunc); ok {
|
||||
return fn
|
||||
}
|
||||
return nil
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue