Modify parsing functions and config structs to accept hcl changes

Signed-off-by: Patrick Van Stee <patrick@vanstee.me>
pull/192/head
Patrick Van Stee 5 years ago
parent 87c4bf1df9
commit 4121ae50b5

@ -15,7 +15,7 @@ import (
"github.com/pkg/errors"
)
func ReadTargets(ctx context.Context, files, targets, overrides []string) (map[string]Target, error) {
func ReadTargets(ctx context.Context, files, targets, overrides []string) (map[string]*Target, error) {
var c Config
for _, f := range files {
cfg, err := ParseFile(f)
@ -28,7 +28,7 @@ func ReadTargets(ctx context.Context, files, targets, overrides []string) (map[s
if err != nil {
return nil, err
}
m := map[string]Target{}
m := map[string]*Target{}
for _, n := range targets {
for _, n := range c.ResolveGroup(n) {
t, err := c.ResolveTarget(n, o)
@ -36,7 +36,7 @@ func ReadTargets(ctx context.Context, files, targets, overrides []string) (map[s
return nil, err
}
if t != nil {
m[n] = *t
m[n] = t
}
}
}
@ -55,12 +55,12 @@ func ParseFile(fn string) (*Config, error) {
}
if strings.HasSuffix(fnl, ".json") || strings.HasSuffix(fnl, ".hcl") {
return ParseHCL(dt)
return ParseHCL(dt, fn)
}
cfg, err := ParseCompose(dt)
if err != nil {
cfg, err2 := ParseHCL(dt)
cfg, err2 := ParseHCL(dt, fn)
if err2 != nil {
return nil, errors.Errorf("failed to parse %s: parsing yaml: %s, parsing hcl: %s", fn, err.Error(), err2.Error())
}
@ -70,55 +70,76 @@ func ParseFile(fn string) (*Config, error) {
}
type Config struct {
Group map[string]Group
Target map[string]Target
Groups []*Group `hcl:"group,block"`
Targets []*Target `hcl:"target,block"`
}
func mergeConfig(c1, c2 Config) Config {
for k, g := range c2.Group {
if c1.Group == nil {
c1.Group = map[string]Group{}
if c1.Groups == nil {
c1.Groups = []*Group{}
}
for _, g2 := range c2.Groups {
var g1 *Group
for _, g := range c1.Groups {
if g2.Name == g.Name {
g1 = g
break
}
}
if g1, exists := c1.Group[k]; exists {
nextTarget:
for _, t := range g.Targets {
for _, t2 := range g1.Targets {
if t == t2 {
continue nextTarget
}
if g1 == nil {
c1.Groups = append(c1.Groups, g2)
continue
}
nextTarget:
for _, t2 := range g2.Targets {
for _, t1 := range g1.Targets {
if t1 == t2 {
continue nextTarget
}
g1.Targets = append(g1.Targets, t)
}
c1.Group[k] = g1
} else {
c1.Group[k] = g
g1.Targets = append(g1.Targets, t2)
}
c1.Groups = append(c1.Groups, g1)
}
for k, t := range c2.Target {
if c1.Target == nil {
c1.Target = map[string]Target{}
if c1.Targets == nil {
c1.Targets = []*Target{}
}
for _, t2 := range c2.Targets {
var t1 *Target
for _, t := range c1.Targets {
if t2.Name == t.Name {
t1 = t
break
}
}
if base, ok := c1.Target[k]; ok {
t = merge(base, t)
if t1 != nil {
t2 = merge(t1, t2)
}
c1.Target[k] = t
c1.Targets = append(c1.Targets, t2)
}
return c1
}
func (c Config) expandTargets(pattern string) ([]string, error) {
if _, ok := c.Target[pattern]; ok {
return []string{pattern}, nil
for _, target := range c.Targets {
if target.Name == pattern {
return []string{pattern}, nil
}
}
var names []string
for name := range c.Target {
ok, err := path.Match(pattern, name)
for _, target := range c.Targets {
ok, err := path.Match(pattern, target.Name)
if err != nil {
return nil, errors.Wrapf(err, "could not match targets with '%s'", pattern)
}
if ok {
names = append(names, name)
names = append(names, target.Name)
}
}
if len(names) == 0 {
@ -127,8 +148,8 @@ func (c Config) expandTargets(pattern string) ([]string, error) {
return names, nil
}
func (c Config) newOverrides(v []string) (map[string]Target, error) {
m := map[string]Target{}
func (c Config) newOverrides(v []string) (map[string]*Target, error) {
m := map[string]*Target{}
for _, v := range v {
parts := strings.SplitN(v, "=", 2)
@ -148,7 +169,10 @@ func (c Config) newOverrides(v []string) (map[string]Target, error) {
}
for _, name := range names {
t := m[name]
t, ok := m[name]
if !ok {
t = &Target{}
}
switch keys[1] {
case "context":
@ -224,8 +248,14 @@ func (c Config) group(name string, visited map[string]struct{}) []string {
if _, ok := visited[name]; ok {
return nil
}
g, ok := c.Group[name]
if !ok {
var g *Group
for _, group := range c.Groups {
if group.Name == name {
g = group
break
}
}
if g == nil {
return []string{name}
}
visited[name] = struct{}{}
@ -236,7 +266,7 @@ func (c Config) group(name string, visited map[string]struct{}) []string {
return targets
}
func (c Config) ResolveTarget(name string, overrides map[string]Target) (*Target, error) {
func (c Config) ResolveTarget(name string, overrides map[string]*Target) (*Target, error) {
t, err := c.target(name, map[string]struct{}{}, overrides)
if err != nil {
return nil, err
@ -252,54 +282,66 @@ func (c Config) ResolveTarget(name string, overrides map[string]Target) (*Target
return t, nil
}
func (c Config) target(name string, visited map[string]struct{}, overrides map[string]Target) (*Target, error) {
func (c Config) target(name string, visited map[string]struct{}, overrides map[string]*Target) (*Target, error) {
if _, ok := visited[name]; ok {
return nil, nil
}
visited[name] = struct{}{}
t, ok := c.Target[name]
if !ok {
var t *Target
for _, target := range c.Targets {
if target.Name == name {
t = target
break
}
}
if t == nil {
return nil, errors.Errorf("failed to find target %s", name)
}
var tt Target
tt := &Target{}
for _, name := range t.Inherits {
t, err := c.target(name, visited, overrides)
if err != nil {
return nil, err
}
if t != nil {
tt = merge(tt, *t)
tt = merge(tt, t)
}
}
t.Inherits = nil
tt = merge(merge(merge(defaultTarget(), tt), t), overrides[name])
tt = merge(merge(defaultTarget(), tt), t)
if override, ok := overrides[name]; ok {
tt = merge(tt, override)
}
tt.normalize()
return &tt, nil
return tt, nil
}
type Group struct {
Targets []string
Name string `json:"-" hcl:"name,label"`
Targets []string `json:"targets" hcl:"targets"`
// Target // TODO?
}
type Target struct {
Name string `json:"-" hcl:"name,label"`
// Inherits is the only field that cannot be overridden with --set
Inherits []string `json:"inherits,omitempty" hcl:"inherits,omitempty"`
Context *string `json:"context,omitempty" hcl:"context,omitempty"`
Dockerfile *string `json:"dockerfile,omitempty" hcl:"dockerfile,omitempty"`
Args map[string]string `json:"args,omitempty" hcl:"args,omitempty"`
Labels map[string]string `json:"labels,omitempty" hcl:"labels,omitempty"`
Tags []string `json:"tags,omitempty" hcl:"tags,omitempty"`
CacheFrom []string `json:"cache-from,omitempty" hcl:"cache-from,omitempty"`
CacheTo []string `json:"cache-to,omitempty" hcl:"cache-to,omitempty"`
Target *string `json:"target,omitempty" hcl:"target,omitempty"`
Secrets []string `json:"secret,omitempty" hcl:"secret,omitempty"`
SSH []string `json:"ssh,omitempty" hcl:"ssh,omitempty"`
Platforms []string `json:"platforms,omitempty" hcl:"platforms,omitempty"`
Outputs []string `json:"output,omitempty" hcl:"output,omitempty"`
Pull bool `json:"pull,omitempty": hcl:"pull,omitempty"`
NoCache bool `json:"no-cache,omitempty": hcl:"no-cache,omitempty"`
Inherits []string `json:"inherits,omitempty" hcl:"inherits,optional"`
Context *string `json:"context,omitempty" hcl:"context,optional"`
Dockerfile *string `json:"dockerfile,omitempty" hcl:"dockerfile,optional"`
Args map[string]string `json:"args,omitempty" hcl:"args,optional"`
Labels map[string]string `json:"labels,omitempty" hcl:"labels,optional"`
Tags []string `json:"tags,omitempty" hcl:"tags,optional"`
CacheFrom []string `json:"cache-from,omitempty" hcl:"cache-from,optional"`
CacheTo []string `json:"cache-to,omitempty" hcl:"cache-to,optional"`
Target *string `json:"target,omitempty" hcl:"target,optional"`
Secrets []string `json:"secret,omitempty" hcl:"secret,optional"`
SSH []string `json:"ssh,omitempty" hcl:"ssh,optional"`
Platforms []string `json:"platforms,omitempty" hcl:"platforms,optional"`
Outputs []string `json:"output,omitempty" hcl:"output,optional"`
Pull bool `json:"pull,omitempty": hcl:"pull,optional"`
NoCache bool `json:"no-cache,omitempty": hcl:"no-cache,optional"`
// IMPORTANT: if you add more fields here, do not forget to update newOverrides and README.
}
@ -313,7 +355,7 @@ func (t *Target) normalize() {
t.Outputs = removeDupes(t.Outputs)
}
func TargetsToBuildOpt(m map[string]Target) (map[string]build.Options, error) {
func TargetsToBuildOpt(m map[string]*Target) (map[string]build.Options, error) {
m2 := make(map[string]build.Options, len(m))
for k, v := range m {
bo, err := toBuildOpt(v)
@ -325,7 +367,7 @@ func TargetsToBuildOpt(m map[string]Target) (map[string]build.Options, error) {
return m2, nil
}
func toBuildOpt(t Target) (*build.Options, error) {
func toBuildOpt(t *Target) (*build.Options, error) {
if v := t.Context; v != nil && *v == "-" {
return nil, errors.Errorf("context from stdin not allowed in bake")
}
@ -403,11 +445,11 @@ func toBuildOpt(t Target) (*build.Options, error) {
return bo, nil
}
func defaultTarget() Target {
return Target{}
func defaultTarget() *Target {
return &Target{}
}
func merge(t1, t2 Target) Target {
func merge(t1, t2 *Target) *Target {
if t2.Context != nil {
t1.Context = t2.Context
}

@ -19,7 +19,7 @@ func TestReadTargets(t *testing.T) {
fp := filepath.Join(tmpdir, "config.hcl")
err = ioutil.WriteFile(fp, []byte(`
target "webDEP" {
args {
args = {
VAR_INHERITED = "webDEP"
VAR_BOTH = "webDEP"
}
@ -27,7 +27,7 @@ target "webDEP" {
target "webapp" {
dockerfile = "Dockerfile.webapp"
args {
args = {
VAR_BOTH = "webapp"
}
inherits = ["webDEP"]
@ -108,7 +108,7 @@ target "webapp" {
t.Run("PatternOverride", func(t *testing.T) {
// same check for two cases
multiTargetCheck := func(t *testing.T, m map[string]Target, err error) {
multiTargetCheck := func(t *testing.T, m map[string]*Target, err error) {
require.NoError(t, err)
require.Equal(t, 2, len(m))
require.Equal(t, "foo", *m["webapp"].Dockerfile)
@ -121,7 +121,7 @@ target "webapp" {
name string
targets []string
overrides []string
check func(*testing.T, map[string]Target, error)
check func(*testing.T, map[string]*Target, error)
}{
{
name: "multi target single pattern",
@ -139,7 +139,7 @@ target "webapp" {
name: "single target",
targets: []string{"webapp"},
overrides: []string{"web*.dockerfile=foo"},
check: func(t *testing.T, m map[string]Target, err error) {
check: func(t *testing.T, m map[string]*Target, err error) {
require.NoError(t, err)
require.Equal(t, 1, len(m))
require.Equal(t, "foo", *m["webapp"].Dockerfile)
@ -150,7 +150,7 @@ target "webapp" {
name: "nomatch",
targets: []string{"webapp"},
overrides: []string{"nomatch*.dockerfile=foo"},
check: func(t *testing.T, m map[string]Target, err error) {
check: func(t *testing.T, m map[string]*Target, err error) {
// NOTE: I am unsure whether failing to match should always error out
// instead of simply skipping that override.
// Let's enforce the error and we can relax it later if users complain.

@ -46,10 +46,10 @@ func ParseCompose(dt []byte) (*Config, error) {
var c Config
var zeroBuildConfig composetypes.BuildConfig
if len(cfg.Services) > 0 {
c.Group = map[string]Group{}
c.Target = map[string]Target{}
c.Groups = []*Group{}
c.Targets = []*Target{}
var g Group
g := &Group{Name: "default"}
for _, s := range cfg.Services {
@ -72,7 +72,8 @@ func ParseCompose(dt []byte) (*Config, error) {
dockerfilePathP = &dockerfilePath
}
g.Targets = append(g.Targets, s.Name)
t := Target{
t := &Target{
Name: s.Name,
Context: contextPathP,
Dockerfile: dockerfilePathP,
Labels: s.Build.Labels,
@ -87,9 +88,9 @@ func ParseCompose(dt []byte) (*Config, error) {
if s.Image != "" {
t.Tags = []string{s.Image}
}
c.Target[s.Name] = t
c.Targets = append(c.Targets, t)
}
c.Group["default"] = g
c.Groups = append(c.Groups, g)
}

@ -27,17 +27,23 @@ services:
c, err := ParseCompose(dt)
require.NoError(t, err)
require.Equal(t, 1, len(c.Group))
sort.Strings(c.Group["default"].Targets)
require.Equal(t, []string{"db", "webapp"}, c.Group["default"].Targets)
require.Equal(t, 2, len(c.Target))
require.Equal(t, "./db", *c.Target["db"].Context)
require.Equal(t, "./dir", *c.Target["webapp"].Context)
require.Equal(t, "Dockerfile-alternate", *c.Target["webapp"].Dockerfile)
require.Equal(t, 1, len(c.Target["webapp"].Args))
require.Equal(t, "123", c.Target["webapp"].Args["buildno"])
require.Equal(t, 1, len(c.Groups))
require.Equal(t, c.Groups[0].Name, "default")
sort.Strings(c.Groups[0].Targets)
require.Equal(t, []string{"db", "webapp"}, c.Groups[0].Targets)
require.Equal(t, 2, len(c.Targets))
sort.Slice(c.Targets, func(i, j int) bool {
return c.Targets[i].Name < c.Targets[j].Name
})
require.Equal(t, "db", c.Targets[0].Name)
require.Equal(t, "./db", *c.Targets[0].Context)
require.Equal(t, "webapp", c.Targets[1].Name)
require.Equal(t, "./dir", *c.Targets[1].Context)
require.Equal(t, "Dockerfile-alternate", *c.Targets[1].Dockerfile)
require.Equal(t, 1, len(c.Targets[1].Args))
require.Equal(t, "123", c.Targets[1].Args["buildno"])
}
func TestNoBuildOutOfTreeService(t *testing.T) {
@ -52,7 +58,7 @@ services:
`)
c, err := ParseCompose(dt)
require.NoError(t, err)
require.Equal(t, 1, len(c.Group))
require.Equal(t, 1, len(c.Groups))
}
func TestParseComposeTarget(t *testing.T) {
@ -73,8 +79,14 @@ services:
c, err := ParseCompose(dt)
require.NoError(t, err)
require.Equal(t, "db", *c.Target["db"].Target)
require.Equal(t, "webapp", *c.Target["webapp"].Target)
require.Equal(t, 2, len(c.Targets))
sort.Slice(c.Targets, func(i, j int) bool {
return c.Targets[i].Name < c.Targets[j].Name
})
require.Equal(t, "db", c.Targets[0].Name)
require.Equal(t, "db", *c.Targets[0].Target)
require.Equal(t, "webapp", c.Targets[1].Name)
require.Equal(t, "webapp", *c.Targets[1].Target)
}
func TestComposeBuildWithoutContext(t *testing.T) {
@ -93,8 +105,14 @@ services:
c, err := ParseCompose(dt)
require.NoError(t, err)
require.Equal(t, "db", *c.Target["db"].Target)
require.Equal(t, "webapp", *c.Target["webapp"].Target)
require.Equal(t, 2, len(c.Targets))
sort.Slice(c.Targets, func(i, j int) bool {
return c.Targets[i].Name < c.Targets[j].Name
})
require.Equal(t, c.Targets[0].Name, "db")
require.Equal(t, "db", *c.Targets[0].Target)
require.Equal(t, c.Targets[1].Name, "webapp")
require.Equal(t, "webapp", *c.Targets[1].Target)
}
func TestBogusCompose(t *testing.T) {

@ -1,10 +1,10 @@
package bake
import "github.com/hashicorp/hcl"
import "github.com/hashicorp/hcl/v2/hclsimple"
func ParseHCL(dt []byte) (*Config, error) {
func ParseHCL(dt []byte, fn string) (*Config, error) {
var c Config
if err := hcl.Unmarshal(dt, &c); err != nil {
if err := hclsimple.Decode(fn, dt, nil, &c); err != nil {
return nil, err
}
return &c, nil

@ -40,18 +40,22 @@ func TestParseHCL(t *testing.T) {
}
`)
c, err := ParseHCL(dt)
c, err := ParseHCL(dt, "docker-bake.hcl")
require.NoError(t, err)
require.Equal(t, 1, len(c.Group))
require.Equal(t, []string{"db", "webapp"}, c.Group["default"].Targets)
require.Equal(t, 1, len(c.Groups))
require.Equal(t, "default", c.Groups[0].Name)
require.Equal(t, []string{"db", "webapp"}, c.Groups[0].Targets)
require.Equal(t, 4, len(c.Target))
require.Equal(t, "./db", *c.Target["db"].Context)
require.Equal(t, 4, len(c.Targets))
require.Equal(t, c.Targets[0].Name, "db")
require.Equal(t, "./db", *c.Targets[0].Context)
require.Equal(t, 1, len(c.Target["webapp"].Args))
require.Equal(t, "123", c.Target["webapp"].Args["buildno"])
require.Equal(t, c.Targets[1].Name, "webapp")
require.Equal(t, 1, len(c.Targets[1].Args))
require.Equal(t, "123", c.Targets[1].Args["buildno"])
require.Equal(t, 2, len(c.Target["cross"].Platforms))
require.Equal(t, []string{"linux/amd64", "linux/arm64"}, c.Target["cross"].Platforms)
require.Equal(t, c.Targets[2].Name, "cross")
require.Equal(t, 2, len(c.Targets[2].Platforms))
require.Equal(t, []string{"linux/amd64", "linux/arm64"}, c.Targets[2].Platforms)
}

@ -59,7 +59,7 @@ func runBake(dockerCli command.Cli, targets []string, in bakeOptions) error {
}
if in.printOnly {
dt, err := json.MarshalIndent(map[string]map[string]bake.Target{"target": m}, "", " ")
dt, err := json.MarshalIndent(map[string]map[string]*bake.Target{"target": m}, "", " ")
if err != nil {
return err
}

@ -0,0 +1,95 @@
Copyright (c) 2017 Martin Atkins
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
---------
Unicode table generation programs are under a separate copyright and license:
Copyright (c) 2014 Couchbase, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
either express or implied. See the License for the specific language governing permissions
and limitations under the License.
---------
Grapheme break data is provided as part of the Unicode character database,
copright 2016 Unicode, Inc, which is provided with the following license:
Unicode Data Files include all data files under the directories
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
http://www.unicode.org/utility/trac/browser/.
Unicode Data Files do not include PDF online code charts under the
directory http://www.unicode.org/Public/.
Software includes any source code published in the Unicode Standard
or under the directories
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
http://www.unicode.org/utility/trac/browser/.
NOTICE TO USER: Carefully read the following legal agreement.
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT.
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
THE DATA FILES OR SOFTWARE.
COPYRIGHT AND PERMISSION NOTICE
Copyright © 1991-2017 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Unicode data files and any associated documentation
(the "Data Files") or Unicode software and any associated documentation
(the "Software") to deal in the Data Files or Software
without restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, and/or sell copies of
the Data Files or Software, and to permit persons to whom the Data Files
or Software are furnished to do so, provided that either
(a) this copyright and permission notice appear with all copies
of the Data Files or Software, or
(b) this copyright and permission notice appear in associated
Documentation.
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
Except as contained in this notice, the name of a copyright holder
shall not be used in advertising or otherwise to promote the sale,
use or other dealings in these Data Files or Software without prior
written authorization of the copyright holder.

@ -0,0 +1,30 @@
package textseg
import (
"bufio"
"bytes"
)
// AllTokens is a utility that uses a bufio.SplitFunc to produce a slice of
// all of the recognized tokens in the given buffer.
func AllTokens(buf []byte, splitFunc bufio.SplitFunc) ([][]byte, error) {
scanner := bufio.NewScanner(bytes.NewReader(buf))
scanner.Split(splitFunc)
var ret [][]byte
for scanner.Scan() {
ret = append(ret, scanner.Bytes())
}
return ret, scanner.Err()
}
// TokenCount is a utility that uses a bufio.SplitFunc to count the number of
// recognized tokens in the given buffer.
func TokenCount(buf []byte, splitFunc bufio.SplitFunc) (int, error) {
scanner := bufio.NewScanner(bytes.NewReader(buf))
scanner.Split(splitFunc)
var ret int
for scanner.Scan() {
ret++
}
return ret, scanner.Err()
}

@ -0,0 +1,7 @@
package textseg
//go:generate go run make_tables.go -output tables.go
//go:generate go run make_test_tables.go -output tables_test.go
//go:generate ruby unicode2ragel.rb --url=http://www.unicode.org/Public/9.0.0/ucd/auxiliary/GraphemeBreakProperty.txt -m GraphemeCluster -p "Prepend,CR,LF,Control,Extend,Regional_Indicator,SpacingMark,L,V,T,LV,LVT,E_Base,E_Modifier,ZWJ,Glue_After_Zwj,E_Base_GAZ" -o grapheme_clusters_table.rl
//go:generate ragel -Z grapheme_clusters.rl
//go:generate gofmt -w grapheme_clusters.go

File diff suppressed because it is too large Load Diff

@ -0,0 +1,132 @@
package textseg
import (
"errors"
"unicode/utf8"
)
// Generated from grapheme_clusters.rl. DO NOT EDIT
%%{
# (except you are actually in grapheme_clusters.rl here, so edit away!)
machine graphclust;
write data;
}%%
var Error = errors.New("invalid UTF8 text")
// ScanGraphemeClusters is a split function for bufio.Scanner that splits
// on grapheme cluster boundaries.
func ScanGraphemeClusters(data []byte, atEOF bool) (int, []byte, error) {
if len(data) == 0 {
return 0, nil, nil
}
// Ragel state
cs := 0 // Current State
p := 0 // "Pointer" into data
pe := len(data) // End-of-data "pointer"
ts := 0
te := 0
act := 0
eof := pe
// Make Go compiler happy
_ = ts
_ = te
_ = act
_ = eof
startPos := 0
endPos := 0
%%{
include GraphemeCluster "grapheme_clusters_table.rl";
action start {
startPos = p
}
action end {
endPos = p
}
action emit {
return endPos+1, data[startPos:endPos+1], nil
}
ZWJGlue = ZWJ (Glue_After_Zwj | E_Base_GAZ Extend* E_Modifier?)?;
AnyExtender = Extend | ZWJGlue | SpacingMark;
Extension = AnyExtender*;
ReplacementChar = (0xEF 0xBF 0xBD);
CRLFSeq = CR LF;
ControlSeq = Control | ReplacementChar;
HangulSeq = (
L+ (((LV? V+ | LVT) T*)?|LV?) |
LV V* T* |
V+ T* |
LVT T* |
T+
) Extension;
EmojiSeq = (E_Base | E_Base_GAZ) Extend* E_Modifier? Extension;
ZWJSeq = ZWJGlue Extension;
EmojiFlagSeq = Regional_Indicator Regional_Indicator? Extension;
UTF8Cont = 0x80 .. 0xBF;
AnyUTF8 = (
0x00..0x7F |
0xC0..0xDF . UTF8Cont |
0xE0..0xEF . UTF8Cont . UTF8Cont |
0xF0..0xF7 . UTF8Cont . UTF8Cont . UTF8Cont
);
# OtherSeq is any character that isn't at the start of one of the extended sequences above, followed by extension
OtherSeq = (AnyUTF8 - (CR|LF|Control|ReplacementChar|L|LV|V|LVT|T|E_Base|E_Base_GAZ|ZWJ|Regional_Indicator|Prepend)) Extension;
# PrependSeq is prepend followed by any of the other patterns above, except control characters which explicitly break
PrependSeq = Prepend+ (HangulSeq|EmojiSeq|ZWJSeq|EmojiFlagSeq|OtherSeq)?;
CRLFTok = CRLFSeq >start @end;
ControlTok = ControlSeq >start @end;
HangulTok = HangulSeq >start @end;
EmojiTok = EmojiSeq >start @end;
ZWJTok = ZWJSeq >start @end;
EmojiFlagTok = EmojiFlagSeq >start @end;
OtherTok = OtherSeq >start @end;
PrependTok = PrependSeq >start @end;
main := |*
CRLFTok => emit;
ControlTok => emit;
HangulTok => emit;
EmojiTok => emit;
ZWJTok => emit;
EmojiFlagTok => emit;
PrependTok => emit;
OtherTok => emit;
# any single valid UTF-8 character would also be valid per spec,
# but we'll handle that separately after the loop so we can deal
# with requesting more bytes if we're not at EOF.
*|;
write init;
write exec;
}%%
// If we fall out here then we were unable to complete a sequence.
// If we weren't able to complete a sequence then either we've
// reached the end of a partial buffer (so there's more data to come)
// or we have an isolated symbol that would normally be part of a
// grapheme cluster but has appeared in isolation here.
if !atEOF {
// Request more
return 0, nil, nil
}
// Just take the first UTF-8 sequence and return that.
_, seqLen := utf8.DecodeRune(data)
return seqLen, data[:seqLen], nil
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,335 @@
#!/usr/bin/env ruby
#
# This scripted has been updated to accept more command-line arguments:
#
# -u, --url URL to process
# -m, --machine Machine name
# -p, --properties Properties to add to the machine
# -o, --output Write output to file
#
# Updated by: Marty Schoch <marty.schoch@gmail.com>
#
# This script uses the unicode spec to generate a Ragel state machine
# that recognizes unicode alphanumeric characters. It generates 5
# character classes: uupper, ulower, ualpha, udigit, and ualnum.
# Currently supported encodings are UTF-8 [default] and UCS-4.
#
# Usage: unicode2ragel.rb [options]
# -e, --encoding [ucs4 | utf8] Data encoding
# -h, --help Show this message
#
# This script was originally written as part of the Ferret search
# engine library.
#
# Author: Rakan El-Khalil <rakan@well.com>
require 'optparse'
require 'open-uri'
ENCODINGS = [ :utf8, :ucs4 ]
ALPHTYPES = { :utf8 => "byte", :ucs4 => "rune" }
DEFAULT_CHART_URL = "http://www.unicode.org/Public/5.1.0/ucd/DerivedCoreProperties.txt"
DEFAULT_MACHINE_NAME= "WChar"
###
# Display vars & default option
TOTAL_WIDTH = 80
RANGE_WIDTH = 23
@encoding = :utf8
@chart_url = DEFAULT_CHART_URL
machine_name = DEFAULT_MACHINE_NAME
properties = []
@output = $stdout
###
# Option parsing
cli_opts = OptionParser.new do |opts|
opts.on("-e", "--encoding [ucs4 | utf8]", "Data encoding") do |o|
@encoding = o.downcase.to_sym
end
opts.on("-h", "--help", "Show this message") do
puts opts
exit
end
opts.on("-u", "--url URL", "URL to process") do |o|
@chart_url = o
end
opts.on("-m", "--machine MACHINE_NAME", "Machine name") do |o|
machine_name = o
end
opts.on("-p", "--properties x,y,z", Array, "Properties to add to machine") do |o|
properties = o
end
opts.on("-o", "--output FILE", "output file") do |o|
@output = File.new(o, "w+")
end
end
cli_opts.parse(ARGV)
unless ENCODINGS.member? @encoding
puts "Invalid encoding: #{@encoding}"
puts cli_opts
exit
end
##
# Downloads the document at url and yields every alpha line's hex
# range and description.
def each_alpha( url, property )
open( url ) do |file|
file.each_line do |line|
next if line =~ /^#/;
next if line !~ /; #{property} #/;
range, description = line.split(/;/)
range.strip!
description.gsub!(/.*#/, '').strip!
if range =~ /\.\./
start, stop = range.split '..'
else start = stop = range
end
yield start.hex .. stop.hex, description
end
end
end
###
# Formats to hex at minimum width
def to_hex( n )
r = "%0X" % n
r = "0#{r}" unless (r.length % 2).zero?
r
end
###
# UCS4 is just a straight hex conversion of the unicode codepoint.
def to_ucs4( range )
rangestr = "0x" + to_hex(range.begin)
rangestr << "..0x" + to_hex(range.end) if range.begin != range.end
[ rangestr ]
end
##
# 0x00 - 0x7f -> 0zzzzzzz[7]
# 0x80 - 0x7ff -> 110yyyyy[5] 10zzzzzz[6]
# 0x800 - 0xffff -> 1110xxxx[4] 10yyyyyy[6] 10zzzzzz[6]
# 0x010000 - 0x10ffff -> 11110www[3] 10xxxxxx[6] 10yyyyyy[6] 10zzzzzz[6]
UTF8_BOUNDARIES = [0x7f, 0x7ff, 0xffff, 0x10ffff]
def to_utf8_enc( n )
r = 0
if n <= 0x7f
r = n
elsif n <= 0x7ff
y = 0xc0 | (n >> 6)
z = 0x80 | (n & 0x3f)
r = y << 8 | z
elsif n <= 0xffff
x = 0xe0 | (n >> 12)
y = 0x80 | (n >> 6) & 0x3f
z = 0x80 | n & 0x3f
r = x << 16 | y << 8 | z
elsif n <= 0x10ffff
w = 0xf0 | (n >> 18)
x = 0x80 | (n >> 12) & 0x3f
y = 0x80 | (n >> 6) & 0x3f
z = 0x80 | n & 0x3f
r = w << 24 | x << 16 | y << 8 | z
end
to_hex(r)
end
def from_utf8_enc( n )
n = n.hex
r = 0
if n <= 0x7f
r = n
elsif n <= 0xdfff
y = (n >> 8) & 0x1f
z = n & 0x3f
r = y << 6 | z
elsif n <= 0xefffff
x = (n >> 16) & 0x0f
y = (n >> 8) & 0x3f
z = n & 0x3f
r = x << 10 | y << 6 | z
elsif n <= 0xf7ffffff
w = (n >> 24) & 0x07
x = (n >> 16) & 0x3f
y = (n >> 8) & 0x3f
z = n & 0x3f
r = w << 18 | x << 12 | y << 6 | z
end
r
end
###
# Given a range, splits it up into ranges that can be continuously
# encoded into utf8. Eg: 0x00 .. 0xff => [0x00..0x7f, 0x80..0xff]
# This is not strictly needed since the current [5.1] unicode standard
# doesn't have ranges that straddle utf8 boundaries. This is included
# for completeness as there is no telling if that will ever change.
def utf8_ranges( range )
ranges = []
UTF8_BOUNDARIES.each do |max|
if range.begin <= max
if range.end <= max
ranges << range
return ranges
end
ranges << (range.begin .. max)
range = (max + 1) .. range.end
end
end
ranges
end
def build_range( start, stop )
size = start.size/2
left = size - 1
return [""] if size < 1
a = start[0..1]
b = stop[0..1]
###
# Shared prefix
if a == b
return build_range(start[2..-1], stop[2..-1]).map do |elt|
"0x#{a} " + elt
end
end
###
# Unshared prefix, end of run
return ["0x#{a}..0x#{b} "] if left.zero?
###
# Unshared prefix, not end of run
# Range can be 0x123456..0x56789A
# Which is equivalent to:
# 0x123456 .. 0x12FFFF
# 0x130000 .. 0x55FFFF
# 0x560000 .. 0x56789A
ret = []
ret << build_range(start, a + "FF" * left)
###
# Only generate middle range if need be.
if a.hex+1 != b.hex
max = to_hex(b.hex - 1)
max = "FF" if b == "FF"
ret << "0x#{to_hex(a.hex+1)}..0x#{max} " + "0x00..0xFF " * left
end
###
# Don't generate last range if it is covered by first range
ret << build_range(b + "00" * left, stop) unless b == "FF"
ret.flatten!
end
def to_utf8( range )
utf8_ranges( range ).map do |r|
begin_enc = to_utf8_enc(r.begin)
end_enc = to_utf8_enc(r.end)
build_range begin_enc, end_enc
end.flatten!
end
##
# Perform a 3-way comparison of the number of codepoints advertised by
# the unicode spec for the given range, the originally parsed range,
# and the resulting utf8 encoded range.
def count_codepoints( code )
code.split(' ').inject(1) do |acc, elt|
if elt =~ /0x(.+)\.\.0x(.+)/
if @encoding == :utf8
acc * (from_utf8_enc($2) - from_utf8_enc($1) + 1)
else
acc * ($2.hex - $1.hex + 1)
end
else
acc
end
end
end
def is_valid?( range, desc, codes )
spec_count = 1
spec_count = $1.to_i if desc =~ /\[(\d+)\]/
range_count = range.end - range.begin + 1
sum = codes.inject(0) { |acc, elt| acc + count_codepoints(elt) }
sum == spec_count and sum == range_count
end
##
# Generate the state maching to stdout
def generate_machine( name, property )
pipe = " "
@output.puts " #{name} = "
each_alpha( @chart_url, property ) do |range, desc|
codes = (@encoding == :ucs4) ? to_ucs4(range) : to_utf8(range)
#raise "Invalid encoding of range #{range}: #{codes.inspect}" unless
# is_valid? range, desc, codes
range_width = codes.map { |a| a.size }.max
range_width = RANGE_WIDTH if range_width < RANGE_WIDTH
desc_width = TOTAL_WIDTH - RANGE_WIDTH - 11
desc_width -= (range_width - RANGE_WIDTH) if range_width > RANGE_WIDTH
if desc.size > desc_width
desc = desc[0..desc_width - 4] + "..."
end
codes.each_with_index do |r, idx|
desc = "" unless idx.zero?
code = "%-#{range_width}s" % r
@output.puts " #{pipe} #{code} ##{desc}"
pipe = "|"
end
end
@output.puts " ;"
@output.puts ""
end
@output.puts <<EOF
# The following Ragel file was autogenerated with #{$0}
# from: #{@chart_url}
#
# It defines #{properties}.
#
# To use this, make sure that your alphtype is set to #{ALPHTYPES[@encoding]},
# and that your input is in #{@encoding}.
%%{
machine #{machine_name};
EOF
properties.each { |x| generate_machine( x, x ) }
@output.puts <<EOF
}%%
EOF

@ -0,0 +1,19 @@
package textseg
import "unicode/utf8"
// ScanGraphemeClusters is a split function for bufio.Scanner that splits
// on UTF8 sequence boundaries.
//
// This is included largely for completeness, since this behavior is already
// built in to Go when ranging over a string.
func ScanUTF8Sequences(data []byte, atEOF bool) (int, []byte, error) {
if len(data) == 0 {
return 0, nil, nil
}
r, seqLen := utf8.DecodeRune(data)
if r == utf8.RuneError && !atEOF {
return 0, nil, nil
}
return seqLen, data[:seqLen], nil
}

@ -0,0 +1,27 @@
Copyright (c) 2017 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -0,0 +1,616 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// Package cmp determines equality of values.
//
// This package is intended to be a more powerful and safer alternative to
// reflect.DeepEqual for comparing whether two values are semantically equal.
//
// The primary features of cmp are:
//
// • When the default behavior of equality does not suit the needs of the test,
// custom equality functions can override the equality operation.
// For example, an equality function may report floats as equal so long as they
// are within some tolerance of each other.
//
// • Types that have an Equal method may use that method to determine equality.
// This allows package authors to determine the equality operation for the types
// that they define.
//
// • If no custom equality functions are used and no Equal method is defined,
// equality is determined by recursively comparing the primitive kinds on both
// values, much like reflect.DeepEqual. Unlike reflect.DeepEqual, unexported
// fields are not compared by default; they result in panics unless suppressed
// by using an Ignore option (see cmpopts.IgnoreUnexported) or explicitly compared
// using the AllowUnexported option.
package cmp
import (
"fmt"
"reflect"
"strings"
"github.com/google/go-cmp/cmp/internal/diff"
"github.com/google/go-cmp/cmp/internal/flags"
"github.com/google/go-cmp/cmp/internal/function"
"github.com/google/go-cmp/cmp/internal/value"
)
// Equal reports whether x and y are equal by recursively applying the
// following rules in the given order to x and y and all of their sub-values:
//
// • Let S be the set of all Ignore, Transformer, and Comparer options that
// remain after applying all path filters, value filters, and type filters.
// If at least one Ignore exists in S, then the comparison is ignored.
// If the number of Transformer and Comparer options in S is greater than one,
// then Equal panics because it is ambiguous which option to use.
// If S contains a single Transformer, then use that to transform the current
// values and recursively call Equal on the output values.
// If S contains a single Comparer, then use that to compare the current values.
// Otherwise, evaluation proceeds to the next rule.
//
// • If the values have an Equal method of the form "(T) Equal(T) bool" or
// "(T) Equal(I) bool" where T is assignable to I, then use the result of
// x.Equal(y) even if x or y is nil. Otherwise, no such method exists and
// evaluation proceeds to the next rule.
//
// • Lastly, try to compare x and y based on their basic kinds.
// Simple kinds like booleans, integers, floats, complex numbers, strings, and
// channels are compared using the equivalent of the == operator in Go.
// Functions are only equal if they are both nil, otherwise they are unequal.
//
// Structs are equal if recursively calling Equal on all fields report equal.
// If a struct contains unexported fields, Equal panics unless an Ignore option
// (e.g., cmpopts.IgnoreUnexported) ignores that field or the AllowUnexported
// option explicitly permits comparing the unexported field.
//
// Slices are equal if they are both nil or both non-nil, where recursively
// calling Equal on all non-ignored slice or array elements report equal.
// Empty non-nil slices and nil slices are not equal; to equate empty slices,
// consider using cmpopts.EquateEmpty.
//
// Maps are equal if they are both nil or both non-nil, where recursively
// calling Equal on all non-ignored map entries report equal.
// Map keys are equal according to the == operator.
// To use custom comparisons for map keys, consider using cmpopts.SortMaps.
// Empty non-nil maps and nil maps are not equal; to equate empty maps,
// consider using cmpopts.EquateEmpty.
//
// Pointers and interfaces are equal if they are both nil or both non-nil,
// where they have the same underlying concrete type and recursively
// calling Equal on the underlying values reports equal.
func Equal(x, y interface{}, opts ...Option) bool {
vx := reflect.ValueOf(x)
vy := reflect.ValueOf(y)
// If the inputs are different types, auto-wrap them in an empty interface
// so that they have the same parent type.
var t reflect.Type
if !vx.IsValid() || !vy.IsValid() || vx.Type() != vy.Type() {
t = reflect.TypeOf((*interface{})(nil)).Elem()
if vx.IsValid() {
vvx := reflect.New(t).Elem()
vvx.Set(vx)
vx = vvx
}
if vy.IsValid() {
vvy := reflect.New(t).Elem()
vvy.Set(vy)
vy = vvy
}
} else {
t = vx.Type()
}
s := newState(opts)
s.compareAny(&pathStep{t, vx, vy})
return s.result.Equal()
}
// Diff returns a human-readable report of the differences between two values.
// It returns an empty string if and only if Equal returns true for the same
// input values and options.
//
// The output is displayed as a literal in pseudo-Go syntax.
// At the start of each line, a "-" prefix indicates an element removed from x,
// a "+" prefix to indicates an element added to y, and the lack of a prefix
// indicates an element common to both x and y. If possible, the output
// uses fmt.Stringer.String or error.Error methods to produce more humanly
// readable outputs. In such cases, the string is prefixed with either an
// 's' or 'e' character, respectively, to indicate that the method was called.
//
// Do not depend on this output being stable. If you need the ability to
// programmatically interpret the difference, consider using a custom Reporter.
func Diff(x, y interface{}, opts ...Option) string {
r := new(defaultReporter)
eq := Equal(x, y, Options(opts), Reporter(r))
d := r.String()
if (d == "") != eq {
panic("inconsistent difference and equality results")
}
return d
}
type state struct {
// These fields represent the "comparison state".
// Calling statelessCompare must not result in observable changes to these.
result diff.Result // The current result of comparison
curPath Path // The current path in the value tree
reporters []reporter // Optional reporters
// recChecker checks for infinite cycles applying the same set of
// transformers upon the output of itself.
recChecker recChecker
// dynChecker triggers pseudo-random checks for option correctness.
// It is safe for statelessCompare to mutate this value.
dynChecker dynChecker
// These fields, once set by processOption, will not change.
exporters map[reflect.Type]bool // Set of structs with unexported field visibility
opts Options // List of all fundamental and filter options
}
func newState(opts []Option) *state {
// Always ensure a validator option exists to validate the inputs.
s := &state{opts: Options{validator{}}}
s.processOption(Options(opts))
return s
}
func (s *state) processOption(opt Option) {
switch opt := opt.(type) {
case nil:
case Options:
for _, o := range opt {
s.processOption(o)
}
case coreOption:
type filtered interface {
isFiltered() bool
}
if fopt, ok := opt.(filtered); ok && !fopt.isFiltered() {
panic(fmt.Sprintf("cannot use an unfiltered option: %v", opt))
}
s.opts = append(s.opts, opt)
case visibleStructs:
if s.exporters == nil {
s.exporters = make(map[reflect.Type]bool)
}
for t := range opt {
s.exporters[t] = true
}
case reporter:
s.reporters = append(s.reporters, opt)
default:
panic(fmt.Sprintf("unknown option %T", opt))
}
}
// statelessCompare compares two values and returns the result.
// This function is stateless in that it does not alter the current result,
// or output to any registered reporters.
func (s *state) statelessCompare(step PathStep) diff.Result {
// We do not save and restore the curPath because all of the compareX
// methods should properly push and pop from the path.
// It is an implementation bug if the contents of curPath differs from
// when calling this function to when returning from it.
oldResult, oldReporters := s.result, s.reporters
s.result = diff.Result{} // Reset result
s.reporters = nil // Remove reporters to avoid spurious printouts
s.compareAny(step)
res := s.result
s.result, s.reporters = oldResult, oldReporters
return res
}
func (s *state) compareAny(step PathStep) {
// Update the path stack.
s.curPath.push(step)
defer s.curPath.pop()
for _, r := range s.reporters {
r.PushStep(step)
defer r.PopStep()
}
s.recChecker.Check(s.curPath)
// Obtain the current type and values.
t := step.Type()
vx, vy := step.Values()
// Rule 1: Check whether an option applies on this node in the value tree.
if s.tryOptions(t, vx, vy) {
return
}
// Rule 2: Check whether the type has a valid Equal method.
if s.tryMethod(t, vx, vy) {
return
}
// Rule 3: Compare based on the underlying kind.
switch t.Kind() {
case reflect.Bool:
s.report(vx.Bool() == vy.Bool(), 0)
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
s.report(vx.Int() == vy.Int(), 0)
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
s.report(vx.Uint() == vy.Uint(), 0)
case reflect.Float32, reflect.Float64:
s.report(vx.Float() == vy.Float(), 0)
case reflect.Complex64, reflect.Complex128:
s.report(vx.Complex() == vy.Complex(), 0)
case reflect.String:
s.report(vx.String() == vy.String(), 0)
case reflect.Chan, reflect.UnsafePointer:
s.report(vx.Pointer() == vy.Pointer(), 0)
case reflect.Func:
s.report(vx.IsNil() && vy.IsNil(), 0)
case reflect.Struct:
s.compareStruct(t, vx, vy)
case reflect.Slice, reflect.Array:
s.compareSlice(t, vx, vy)
case reflect.Map:
s.compareMap(t, vx, vy)
case reflect.Ptr:
s.comparePtr(t, vx, vy)
case reflect.Interface:
s.compareInterface(t, vx, vy)
default:
panic(fmt.Sprintf("%v kind not handled", t.Kind()))
}
}
func (s *state) tryOptions(t reflect.Type, vx, vy reflect.Value) bool {
// Evaluate all filters and apply the remaining options.
if opt := s.opts.filter(s, t, vx, vy); opt != nil {
opt.apply(s, vx, vy)
return true
}
return false
}
func (s *state) tryMethod(t reflect.Type, vx, vy reflect.Value) bool {
// Check if this type even has an Equal method.
m, ok := t.MethodByName("Equal")
if !ok || !function.IsType(m.Type, function.EqualAssignable) {
return false
}
eq := s.callTTBFunc(m.Func, vx, vy)
s.report(eq, reportByMethod)
return true
}
func (s *state) callTRFunc(f, v reflect.Value, step Transform) reflect.Value {
v = sanitizeValue(v, f.Type().In(0))
if !s.dynChecker.Next() {
return f.Call([]reflect.Value{v})[0]
}
// Run the function twice and ensure that we get the same results back.
// We run in goroutines so that the race detector (if enabled) can detect
// unsafe mutations to the input.
c := make(chan reflect.Value)
go detectRaces(c, f, v)
got := <-c
want := f.Call([]reflect.Value{v})[0]
if step.vx, step.vy = got, want; !s.statelessCompare(step).Equal() {
// To avoid false-positives with non-reflexive equality operations,
// we sanity check whether a value is equal to itself.
if step.vx, step.vy = want, want; !s.statelessCompare(step).Equal() {
return want
}
panic(fmt.Sprintf("non-deterministic function detected: %s", function.NameOf(f)))
}
return want
}
func (s *state) callTTBFunc(f, x, y reflect.Value) bool {
x = sanitizeValue(x, f.Type().In(0))
y = sanitizeValue(y, f.Type().In(1))
if !s.dynChecker.Next() {
return f.Call([]reflect.Value{x, y})[0].Bool()
}
// Swapping the input arguments is sufficient to check that
// f is symmetric and deterministic.
// We run in goroutines so that the race detector (if enabled) can detect
// unsafe mutations to the input.
c := make(chan reflect.Value)
go detectRaces(c, f, y, x)
got := <-c
want := f.Call([]reflect.Value{x, y})[0].Bool()
if !got.IsValid() || got.Bool() != want {
panic(fmt.Sprintf("non-deterministic or non-symmetric function detected: %s", function.NameOf(f)))
}
return want
}
func detectRaces(c chan<- reflect.Value, f reflect.Value, vs ...reflect.Value) {
var ret reflect.Value
defer func() {
recover() // Ignore panics, let the other call to f panic instead
c <- ret
}()
ret = f.Call(vs)[0]
}
// sanitizeValue converts nil interfaces of type T to those of type R,
// assuming that T is assignable to R.
// Otherwise, it returns the input value as is.
func sanitizeValue(v reflect.Value, t reflect.Type) reflect.Value {
// TODO(dsnet): Workaround for reflect bug (https://golang.org/issue/22143).
if !flags.AtLeastGo110 {
if v.Kind() == reflect.Interface && v.IsNil() && v.Type() != t {
return reflect.New(t).Elem()
}
}
return v
}
func (s *state) compareStruct(t reflect.Type, vx, vy reflect.Value) {
var vax, vay reflect.Value // Addressable versions of vx and vy
step := StructField{&structField{}}
for i := 0; i < t.NumField(); i++ {
step.typ = t.Field(i).Type
step.vx = vx.Field(i)
step.vy = vy.Field(i)
step.name = t.Field(i).Name
step.idx = i
step.unexported = !isExported(step.name)
if step.unexported {
if step.name == "_" {
continue
}
// Defer checking of unexported fields until later to give an
// Ignore a chance to ignore the field.
if !vax.IsValid() || !vay.IsValid() {
// For retrieveUnexportedField to work, the parent struct must
// be addressable. Create a new copy of the values if
// necessary to make them addressable.
vax = makeAddressable(vx)
vay = makeAddressable(vy)
}
step.mayForce = s.exporters[t]
step.pvx = vax
step.pvy = vay
step.field = t.Field(i)
}
s.compareAny(step)
}
}
func (s *state) compareSlice(t reflect.Type, vx, vy reflect.Value) {
isSlice := t.Kind() == reflect.Slice
if isSlice && (vx.IsNil() || vy.IsNil()) {
s.report(vx.IsNil() && vy.IsNil(), 0)
return
}
// TODO: Support cyclic data structures.
step := SliceIndex{&sliceIndex{pathStep: pathStep{typ: t.Elem()}}}
withIndexes := func(ix, iy int) SliceIndex {
if ix >= 0 {
step.vx, step.xkey = vx.Index(ix), ix
} else {
step.vx, step.xkey = reflect.Value{}, -1
}
if iy >= 0 {
step.vy, step.ykey = vy.Index(iy), iy
} else {
step.vy, step.ykey = reflect.Value{}, -1
}
return step
}
// Ignore options are able to ignore missing elements in a slice.
// However, detecting these reliably requires an optimal differencing
// algorithm, for which diff.Difference is not.
//
// Instead, we first iterate through both slices to detect which elements
// would be ignored if standing alone. The index of non-discarded elements
// are stored in a separate slice, which diffing is then performed on.
var indexesX, indexesY []int
var ignoredX, ignoredY []bool
for ix := 0; ix < vx.Len(); ix++ {
ignored := s.statelessCompare(withIndexes(ix, -1)).NumDiff == 0
if !ignored {
indexesX = append(indexesX, ix)
}
ignoredX = append(ignoredX, ignored)
}
for iy := 0; iy < vy.Len(); iy++ {
ignored := s.statelessCompare(withIndexes(-1, iy)).NumDiff == 0
if !ignored {
indexesY = append(indexesY, iy)
}
ignoredY = append(ignoredY, ignored)
}
// Compute an edit-script for slices vx and vy (excluding ignored elements).
edits := diff.Difference(len(indexesX), len(indexesY), func(ix, iy int) diff.Result {
return s.statelessCompare(withIndexes(indexesX[ix], indexesY[iy]))
})
// Replay the ignore-scripts and the edit-script.
var ix, iy int
for ix < vx.Len() || iy < vy.Len() {
var e diff.EditType
switch {
case ix < len(ignoredX) && ignoredX[ix]:
e = diff.UniqueX
case iy < len(ignoredY) && ignoredY[iy]:
e = diff.UniqueY
default:
e, edits = edits[0], edits[1:]
}
switch e {
case diff.UniqueX:
s.compareAny(withIndexes(ix, -1))
ix++
case diff.UniqueY:
s.compareAny(withIndexes(-1, iy))
iy++
default:
s.compareAny(withIndexes(ix, iy))
ix++
iy++
}
}
}
func (s *state) compareMap(t reflect.Type, vx, vy reflect.Value) {
if vx.IsNil() || vy.IsNil() {
s.report(vx.IsNil() && vy.IsNil(), 0)
return
}
// TODO: Support cyclic data structures.
// We combine and sort the two map keys so that we can perform the
// comparisons in a deterministic order.
step := MapIndex{&mapIndex{pathStep: pathStep{typ: t.Elem()}}}
for _, k := range value.SortKeys(append(vx.MapKeys(), vy.MapKeys()...)) {
step.vx = vx.MapIndex(k)
step.vy = vy.MapIndex(k)
step.key = k
if !step.vx.IsValid() && !step.vy.IsValid() {
// It is possible for both vx and vy to be invalid if the
// key contained a NaN value in it.
//
// Even with the ability to retrieve NaN keys in Go 1.12,
// there still isn't a sensible way to compare the values since
// a NaN key may map to multiple unordered values.
// The most reasonable way to compare NaNs would be to compare the
// set of values. However, this is impossible to do efficiently
// since set equality is provably an O(n^2) operation given only
// an Equal function. If we had a Less function or Hash function,
// this could be done in O(n*log(n)) or O(n), respectively.
//
// Rather than adding complex logic to deal with NaNs, make it
// the user's responsibility to compare such obscure maps.
const help = "consider providing a Comparer to compare the map"
panic(fmt.Sprintf("%#v has map key with NaNs\n%s", s.curPath, help))
}
s.compareAny(step)
}
}
func (s *state) comparePtr(t reflect.Type, vx, vy reflect.Value) {
if vx.IsNil() || vy.IsNil() {
s.report(vx.IsNil() && vy.IsNil(), 0)
return
}
// TODO: Support cyclic data structures.
vx, vy = vx.Elem(), vy.Elem()
s.compareAny(Indirect{&indirect{pathStep{t.Elem(), vx, vy}}})
}
func (s *state) compareInterface(t reflect.Type, vx, vy reflect.Value) {
if vx.IsNil() || vy.IsNil() {
s.report(vx.IsNil() && vy.IsNil(), 0)
return
}
vx, vy = vx.Elem(), vy.Elem()
if vx.Type() != vy.Type() {
s.report(false, 0)
return
}
s.compareAny(TypeAssertion{&typeAssertion{pathStep{vx.Type(), vx, vy}}})
}
func (s *state) report(eq bool, rf resultFlags) {
if rf&reportByIgnore == 0 {
if eq {
s.result.NumSame++
rf |= reportEqual
} else {
s.result.NumDiff++
rf |= reportUnequal
}
}
for _, r := range s.reporters {
r.Report(Result{flags: rf})
}
}
// recChecker tracks the state needed to periodically perform checks that
// user provided transformers are not stuck in an infinitely recursive cycle.
type recChecker struct{ next int }
// Check scans the Path for any recursive transformers and panics when any
// recursive transformers are detected. Note that the presence of a
// recursive Transformer does not necessarily imply an infinite cycle.
// As such, this check only activates after some minimal number of path steps.
func (rc *recChecker) Check(p Path) {
const minLen = 1 << 16
if rc.next == 0 {
rc.next = minLen
}
if len(p) < rc.next {
return
}
rc.next <<= 1
// Check whether the same transformer has appeared at least twice.
var ss []string
m := map[Option]int{}
for _, ps := range p {
if t, ok := ps.(Transform); ok {
t := t.Option()
if m[t] == 1 { // Transformer was used exactly once before
tf := t.(*transformer).fnc.Type()
ss = append(ss, fmt.Sprintf("%v: %v => %v", t, tf.In(0), tf.Out(0)))
}
m[t]++
}
}
if len(ss) > 0 {
const warning = "recursive set of Transformers detected"
const help = "consider using cmpopts.AcyclicTransformer"
set := strings.Join(ss, "\n\t")
panic(fmt.Sprintf("%s:\n\t%s\n%s", warning, set, help))
}
}
// dynChecker tracks the state needed to periodically perform checks that
// user provided functions are symmetric and deterministic.
// The zero value is safe for immediate use.
type dynChecker struct{ curr, next int }
// Next increments the state and reports whether a check should be performed.
//
// Checks occur every Nth function call, where N is a triangular number:
// 0 1 3 6 10 15 21 28 36 45 55 66 78 91 105 120 136 153 171 190 ...
// See https://en.wikipedia.org/wiki/Triangular_number
//
// This sequence ensures that the cost of checks drops significantly as
// the number of functions calls grows larger.
func (dc *dynChecker) Next() bool {
ok := dc.curr == dc.next
if ok {
dc.curr = 0
dc.next++
}
dc.curr++
return ok
}
// makeAddressable returns a value that is always addressable.
// It returns the input verbatim if it is already addressable,
// otherwise it creates a new value and returns an addressable copy.
func makeAddressable(v reflect.Value) reflect.Value {
if v.CanAddr() {
return v
}
vc := reflect.New(v.Type()).Elem()
vc.Set(v)
return vc
}

@ -0,0 +1,15 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build purego
package cmp
import "reflect"
const supportAllowUnexported = false
func retrieveUnexportedField(reflect.Value, reflect.StructField) reflect.Value {
panic("retrieveUnexportedField is not implemented")
}

@ -0,0 +1,23 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build !purego
package cmp
import (
"reflect"
"unsafe"
)
const supportAllowUnexported = true
// retrieveUnexportedField uses unsafe to forcibly retrieve any field from
// a struct such that the value has read-write permissions.
//
// The parent struct, v, must be addressable, while f must be a StructField
// describing the field to retrieve.
func retrieveUnexportedField(v reflect.Value, f reflect.StructField) reflect.Value {
return reflect.NewAt(f.Type, unsafe.Pointer(v.UnsafeAddr()+f.Offset)).Elem()
}

@ -0,0 +1,17 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build !cmp_debug
package diff
var debug debugger
type debugger struct{}
func (debugger) Begin(_, _ int, f EqualFunc, _, _ *EditScript) EqualFunc {
return f
}
func (debugger) Update() {}
func (debugger) Finish() {}

@ -0,0 +1,122 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build cmp_debug
package diff
import (
"fmt"
"strings"
"sync"
"time"
)
// The algorithm can be seen running in real-time by enabling debugging:
// go test -tags=cmp_debug -v
//
// Example output:
// === RUN TestDifference/#34
// ┌───────────────────────────────┐
// │ \ · · · · · · · · · · · · · · │
// │ · # · · · · · · · · · · · · · │
// │ · \ · · · · · · · · · · · · · │
// │ · · \ · · · · · · · · · · · · │
// │ · · · X # · · · · · · · · · · │
// │ · · · # \ · · · · · · · · · · │
// │ · · · · · # # · · · · · · · · │
// │ · · · · · # \ · · · · · · · · │
// │ · · · · · · · \ · · · · · · · │
// │ · · · · · · · · \ · · · · · · │
// │ · · · · · · · · · \ · · · · · │
// │ · · · · · · · · · · \ · · # · │
// │ · · · · · · · · · · · \ # # · │
// │ · · · · · · · · · · · # # # · │
// │ · · · · · · · · · · # # # # · │
// │ · · · · · · · · · # # # # # · │
// │ · · · · · · · · · · · · · · \ │
// └───────────────────────────────┘
// [.Y..M.XY......YXYXY.|]
//
// The grid represents the edit-graph where the horizontal axis represents
// list X and the vertical axis represents list Y. The start of the two lists
// is the top-left, while the ends are the bottom-right. The '·' represents
// an unexplored node in the graph. The '\' indicates that the two symbols
// from list X and Y are equal. The 'X' indicates that two symbols are similar
// (but not exactly equal) to each other. The '#' indicates that the two symbols
// are different (and not similar). The algorithm traverses this graph trying to
// make the paths starting in the top-left and the bottom-right connect.
//
// The series of '.', 'X', 'Y', and 'M' characters at the bottom represents
// the currently established path from the forward and reverse searches,
// separated by a '|' character.
const (
updateDelay = 100 * time.Millisecond
finishDelay = 500 * time.Millisecond
ansiTerminal = true // ANSI escape codes used to move terminal cursor
)
var debug debugger
type debugger struct {
sync.Mutex
p1, p2 EditScript
fwdPath, revPath *EditScript
grid []byte
lines int
}
func (dbg *debugger) Begin(nx, ny int, f EqualFunc, p1, p2 *EditScript) EqualFunc {
dbg.Lock()
dbg.fwdPath, dbg.revPath = p1, p2
top := "┌─" + strings.Repeat("──", nx) + "┐\n"
row := "│ " + strings.Repeat("· ", nx) + "│\n"
btm := "└─" + strings.Repeat("──", nx) + "┘\n"
dbg.grid = []byte(top + strings.Repeat(row, ny) + btm)
dbg.lines = strings.Count(dbg.String(), "\n")
fmt.Print(dbg)
// Wrap the EqualFunc so that we can intercept each result.
return func(ix, iy int) (r Result) {
cell := dbg.grid[len(top)+iy*len(row):][len("│ ")+len("· ")*ix:][:len("·")]
for i := range cell {
cell[i] = 0 // Zero out the multiple bytes of UTF-8 middle-dot
}
switch r = f(ix, iy); {
case r.Equal():
cell[0] = '\\'
case r.Similar():
cell[0] = 'X'
default:
cell[0] = '#'
}
return
}
}
func (dbg *debugger) Update() {
dbg.print(updateDelay)
}
func (dbg *debugger) Finish() {
dbg.print(finishDelay)
dbg.Unlock()
}
func (dbg *debugger) String() string {
dbg.p1, dbg.p2 = *dbg.fwdPath, dbg.p2[:0]
for i := len(*dbg.revPath) - 1; i >= 0; i-- {
dbg.p2 = append(dbg.p2, (*dbg.revPath)[i])
}
return fmt.Sprintf("%s[%v|%v]\n\n", dbg.grid, dbg.p1, dbg.p2)
}
func (dbg *debugger) print(d time.Duration) {
if ansiTerminal {
fmt.Printf("\x1b[%dA", dbg.lines) // Reset terminal cursor
}
fmt.Print(dbg)
time.Sleep(d)
}

@ -0,0 +1,372 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// Package diff implements an algorithm for producing edit-scripts.
// The edit-script is a sequence of operations needed to transform one list
// of symbols into another (or vice-versa). The edits allowed are insertions,
// deletions, and modifications. The summation of all edits is called the
// Levenshtein distance as this problem is well-known in computer science.
//
// This package prioritizes performance over accuracy. That is, the run time
// is more important than obtaining a minimal Levenshtein distance.
package diff
// EditType represents a single operation within an edit-script.
type EditType uint8
const (
// Identity indicates that a symbol pair is identical in both list X and Y.
Identity EditType = iota
// UniqueX indicates that a symbol only exists in X and not Y.
UniqueX
// UniqueY indicates that a symbol only exists in Y and not X.
UniqueY
// Modified indicates that a symbol pair is a modification of each other.
Modified
)
// EditScript represents the series of differences between two lists.
type EditScript []EditType
// String returns a human-readable string representing the edit-script where
// Identity, UniqueX, UniqueY, and Modified are represented by the
// '.', 'X', 'Y', and 'M' characters, respectively.
func (es EditScript) String() string {
b := make([]byte, len(es))
for i, e := range es {
switch e {
case Identity:
b[i] = '.'
case UniqueX:
b[i] = 'X'
case UniqueY:
b[i] = 'Y'
case Modified:
b[i] = 'M'
default:
panic("invalid edit-type")
}
}
return string(b)
}
// stats returns a histogram of the number of each type of edit operation.
func (es EditScript) stats() (s struct{ NI, NX, NY, NM int }) {
for _, e := range es {
switch e {
case Identity:
s.NI++
case UniqueX:
s.NX++
case UniqueY:
s.NY++
case Modified:
s.NM++
default:
panic("invalid edit-type")
}
}
return
}
// Dist is the Levenshtein distance and is guaranteed to be 0 if and only if
// lists X and Y are equal.
func (es EditScript) Dist() int { return len(es) - es.stats().NI }
// LenX is the length of the X list.
func (es EditScript) LenX() int { return len(es) - es.stats().NY }
// LenY is the length of the Y list.
func (es EditScript) LenY() int { return len(es) - es.stats().NX }
// EqualFunc reports whether the symbols at indexes ix and iy are equal.
// When called by Difference, the index is guaranteed to be within nx and ny.
type EqualFunc func(ix int, iy int) Result
// Result is the result of comparison.
// NumSame is the number of sub-elements that are equal.
// NumDiff is the number of sub-elements that are not equal.
type Result struct{ NumSame, NumDiff int }
// BoolResult returns a Result that is either Equal or not Equal.
func BoolResult(b bool) Result {
if b {
return Result{NumSame: 1} // Equal, Similar
} else {
return Result{NumDiff: 2} // Not Equal, not Similar
}
}
// Equal indicates whether the symbols are equal. Two symbols are equal
// if and only if NumDiff == 0. If Equal, then they are also Similar.
func (r Result) Equal() bool { return r.NumDiff == 0 }
// Similar indicates whether two symbols are similar and may be represented
// by using the Modified type. As a special case, we consider binary comparisons
// (i.e., those that return Result{1, 0} or Result{0, 1}) to be similar.
//
// The exact ratio of NumSame to NumDiff to determine similarity may change.
func (r Result) Similar() bool {
// Use NumSame+1 to offset NumSame so that binary comparisons are similar.
return r.NumSame+1 >= r.NumDiff
}
// Difference reports whether two lists of lengths nx and ny are equal
// given the definition of equality provided as f.
//
// This function returns an edit-script, which is a sequence of operations
// needed to convert one list into the other. The following invariants for
// the edit-script are maintained:
// • eq == (es.Dist()==0)
// • nx == es.LenX()
// • ny == es.LenY()
//
// This algorithm is not guaranteed to be an optimal solution (i.e., one that
// produces an edit-script with a minimal Levenshtein distance). This algorithm
// favors performance over optimality. The exact output is not guaranteed to
// be stable and may change over time.
func Difference(nx, ny int, f EqualFunc) (es EditScript) {
// This algorithm is based on traversing what is known as an "edit-graph".
// See Figure 1 from "An O(ND) Difference Algorithm and Its Variations"
// by Eugene W. Myers. Since D can be as large as N itself, this is
// effectively O(N^2). Unlike the algorithm from that paper, we are not
// interested in the optimal path, but at least some "decent" path.
//
// For example, let X and Y be lists of symbols:
// X = [A B C A B B A]
// Y = [C B A B A C]
//
// The edit-graph can be drawn as the following:
// A B C A B B A
// ┌─────────────┐
// C │_|_|\|_|_|_|_│ 0
// B │_|\|_|_|\|\|_│ 1
// A │\|_|_|\|_|_|\│ 2
// B │_|\|_|_|\|\|_│ 3
// A │\|_|_|\|_|_|\│ 4
// C │ | |\| | | | │ 5
// └─────────────┘ 6
// 0 1 2 3 4 5 6 7
//
// List X is written along the horizontal axis, while list Y is written
// along the vertical axis. At any point on this grid, if the symbol in
// list X matches the corresponding symbol in list Y, then a '\' is drawn.
// The goal of any minimal edit-script algorithm is to find a path from the
// top-left corner to the bottom-right corner, while traveling through the
// fewest horizontal or vertical edges.
// A horizontal edge is equivalent to inserting a symbol from list X.
// A vertical edge is equivalent to inserting a symbol from list Y.
// A diagonal edge is equivalent to a matching symbol between both X and Y.
// Invariants:
// • 0 ≤ fwdPath.X ≤ (fwdFrontier.X, revFrontier.X) ≤ revPath.X ≤ nx
// • 0 ≤ fwdPath.Y ≤ (fwdFrontier.Y, revFrontier.Y) ≤ revPath.Y ≤ ny
//
// In general:
// • fwdFrontier.X < revFrontier.X
// • fwdFrontier.Y < revFrontier.Y
// Unless, it is time for the algorithm to terminate.
fwdPath := path{+1, point{0, 0}, make(EditScript, 0, (nx+ny)/2)}
revPath := path{-1, point{nx, ny}, make(EditScript, 0)}
fwdFrontier := fwdPath.point // Forward search frontier
revFrontier := revPath.point // Reverse search frontier
// Search budget bounds the cost of searching for better paths.
// The longest sequence of non-matching symbols that can be tolerated is
// approximately the square-root of the search budget.
searchBudget := 4 * (nx + ny) // O(n)
// The algorithm below is a greedy, meet-in-the-middle algorithm for
// computing sub-optimal edit-scripts between two lists.
//
// The algorithm is approximately as follows:
// • Searching for differences switches back-and-forth between
// a search that starts at the beginning (the top-left corner), and
// a search that starts at the end (the bottom-right corner). The goal of
// the search is connect with the search from the opposite corner.
// • As we search, we build a path in a greedy manner, where the first
// match seen is added to the path (this is sub-optimal, but provides a
// decent result in practice). When matches are found, we try the next pair
// of symbols in the lists and follow all matches as far as possible.
// • When searching for matches, we search along a diagonal going through
// through the "frontier" point. If no matches are found, we advance the
// frontier towards the opposite corner.
// • This algorithm terminates when either the X coordinates or the
// Y coordinates of the forward and reverse frontier points ever intersect.
//
// This algorithm is correct even if searching only in the forward direction
// or in the reverse direction. We do both because it is commonly observed
// that two lists commonly differ because elements were added to the front
// or end of the other list.
//
// Running the tests with the "cmp_debug" build tag prints a visualization
// of the algorithm running in real-time. This is educational for
// understanding how the algorithm works. See debug_enable.go.
f = debug.Begin(nx, ny, f, &fwdPath.es, &revPath.es)
for {
// Forward search from the beginning.
if fwdFrontier.X >= revFrontier.X || fwdFrontier.Y >= revFrontier.Y || searchBudget == 0 {
break
}
for stop1, stop2, i := false, false, 0; !(stop1 && stop2) && searchBudget > 0; i++ {
// Search in a diagonal pattern for a match.
z := zigzag(i)
p := point{fwdFrontier.X + z, fwdFrontier.Y - z}
switch {
case p.X >= revPath.X || p.Y < fwdPath.Y:
stop1 = true // Hit top-right corner
case p.Y >= revPath.Y || p.X < fwdPath.X:
stop2 = true // Hit bottom-left corner
case f(p.X, p.Y).Equal():
// Match found, so connect the path to this point.
fwdPath.connect(p, f)
fwdPath.append(Identity)
// Follow sequence of matches as far as possible.
for fwdPath.X < revPath.X && fwdPath.Y < revPath.Y {
if !f(fwdPath.X, fwdPath.Y).Equal() {
break
}
fwdPath.append(Identity)
}
fwdFrontier = fwdPath.point
stop1, stop2 = true, true
default:
searchBudget-- // Match not found
}
debug.Update()
}
// Advance the frontier towards reverse point.
if revPath.X-fwdFrontier.X >= revPath.Y-fwdFrontier.Y {
fwdFrontier.X++
} else {
fwdFrontier.Y++
}
// Reverse search from the end.
if fwdFrontier.X >= revFrontier.X || fwdFrontier.Y >= revFrontier.Y || searchBudget == 0 {
break
}
for stop1, stop2, i := false, false, 0; !(stop1 && stop2) && searchBudget > 0; i++ {
// Search in a diagonal pattern for a match.
z := zigzag(i)
p := point{revFrontier.X - z, revFrontier.Y + z}
switch {
case fwdPath.X >= p.X || revPath.Y < p.Y:
stop1 = true // Hit bottom-left corner
case fwdPath.Y >= p.Y || revPath.X < p.X:
stop2 = true // Hit top-right corner
case f(p.X-1, p.Y-1).Equal():
// Match found, so connect the path to this point.
revPath.connect(p, f)
revPath.append(Identity)
// Follow sequence of matches as far as possible.
for fwdPath.X < revPath.X && fwdPath.Y < revPath.Y {
if !f(revPath.X-1, revPath.Y-1).Equal() {
break
}
revPath.append(Identity)
}
revFrontier = revPath.point
stop1, stop2 = true, true
default:
searchBudget-- // Match not found
}
debug.Update()
}
// Advance the frontier towards forward point.
if revFrontier.X-fwdPath.X >= revFrontier.Y-fwdPath.Y {
revFrontier.X--
} else {
revFrontier.Y--
}
}
// Join the forward and reverse paths and then append the reverse path.
fwdPath.connect(revPath.point, f)
for i := len(revPath.es) - 1; i >= 0; i-- {
t := revPath.es[i]
revPath.es = revPath.es[:i]
fwdPath.append(t)
}
debug.Finish()
return fwdPath.es
}
type path struct {
dir int // +1 if forward, -1 if reverse
point // Leading point of the EditScript path
es EditScript
}
// connect appends any necessary Identity, Modified, UniqueX, or UniqueY types
// to the edit-script to connect p.point to dst.
func (p *path) connect(dst point, f EqualFunc) {
if p.dir > 0 {
// Connect in forward direction.
for dst.X > p.X && dst.Y > p.Y {
switch r := f(p.X, p.Y); {
case r.Equal():
p.append(Identity)
case r.Similar():
p.append(Modified)
case dst.X-p.X >= dst.Y-p.Y:
p.append(UniqueX)
default:
p.append(UniqueY)
}
}
for dst.X > p.X {
p.append(UniqueX)
}
for dst.Y > p.Y {
p.append(UniqueY)
}
} else {
// Connect in reverse direction.
for p.X > dst.X && p.Y > dst.Y {
switch r := f(p.X-1, p.Y-1); {
case r.Equal():
p.append(Identity)
case r.Similar():
p.append(Modified)
case p.Y-dst.Y >= p.X-dst.X:
p.append(UniqueY)
default:
p.append(UniqueX)
}
}
for p.X > dst.X {
p.append(UniqueX)
}
for p.Y > dst.Y {
p.append(UniqueY)
}
}
}
func (p *path) append(t EditType) {
p.es = append(p.es, t)
switch t {
case Identity, Modified:
p.add(p.dir, p.dir)
case UniqueX:
p.add(p.dir, 0)
case UniqueY:
p.add(0, p.dir)
}
debug.Update()
}
type point struct{ X, Y int }
func (p *point) add(dx, dy int) { p.X += dx; p.Y += dy }
// zigzag maps a consecutive sequence of integers to a zig-zag sequence.
// [0 1 2 3 4 5 ...] => [0 -1 +1 -2 +2 ...]
func zigzag(x int) int {
if x&1 != 0 {
x = ^x
}
return x >> 1
}

@ -0,0 +1,9 @@
// Copyright 2019, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package flags
// Deterministic controls whether the output of Diff should be deterministic.
// This is only used for testing.
var Deterministic bool

@ -0,0 +1,10 @@
// Copyright 2019, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build !go1.10
package flags
// AtLeastGo110 reports whether the Go toolchain is at least Go 1.10.
const AtLeastGo110 = false

@ -0,0 +1,10 @@
// Copyright 2019, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build go1.10
package flags
// AtLeastGo110 reports whether the Go toolchain is at least Go 1.10.
const AtLeastGo110 = true

@ -0,0 +1,99 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// Package function provides functionality for identifying function types.
package function
import (
"reflect"
"regexp"
"runtime"
"strings"
)
type funcType int
const (
_ funcType = iota
tbFunc // func(T) bool
ttbFunc // func(T, T) bool
trbFunc // func(T, R) bool
tibFunc // func(T, I) bool
trFunc // func(T) R
Equal = ttbFunc // func(T, T) bool
EqualAssignable = tibFunc // func(T, I) bool; encapsulates func(T, T) bool
Transformer = trFunc // func(T) R
ValueFilter = ttbFunc // func(T, T) bool
Less = ttbFunc // func(T, T) bool
ValuePredicate = tbFunc // func(T) bool
KeyValuePredicate = trbFunc // func(T, R) bool
)
var boolType = reflect.TypeOf(true)
// IsType reports whether the reflect.Type is of the specified function type.
func IsType(t reflect.Type, ft funcType) bool {
if t == nil || t.Kind() != reflect.Func || t.IsVariadic() {
return false
}
ni, no := t.NumIn(), t.NumOut()
switch ft {
case tbFunc: // func(T) bool
if ni == 1 && no == 1 && t.Out(0) == boolType {
return true
}
case ttbFunc: // func(T, T) bool
if ni == 2 && no == 1 && t.In(0) == t.In(1) && t.Out(0) == boolType {
return true
}
case trbFunc: // func(T, R) bool
if ni == 2 && no == 1 && t.Out(0) == boolType {
return true
}
case tibFunc: // func(T, I) bool
if ni == 2 && no == 1 && t.In(0).AssignableTo(t.In(1)) && t.Out(0) == boolType {
return true
}
case trFunc: // func(T) R
if ni == 1 && no == 1 {
return true
}
}
return false
}
var lastIdentRx = regexp.MustCompile(`[_\p{L}][_\p{L}\p{N}]*$`)
// NameOf returns the name of the function value.
func NameOf(v reflect.Value) string {
fnc := runtime.FuncForPC(v.Pointer())
if fnc == nil {
return "<unknown>"
}
fullName := fnc.Name() // e.g., "long/path/name/mypkg.(*MyType).(long/path/name/mypkg.myMethod)-fm"
// Method closures have a "-fm" suffix.
fullName = strings.TrimSuffix(fullName, "-fm")
var name string
for len(fullName) > 0 {
inParen := strings.HasSuffix(fullName, ")")
fullName = strings.TrimSuffix(fullName, ")")
s := lastIdentRx.FindString(fullName)
if s == "" {
break
}
name = s + "." + name
fullName = strings.TrimSuffix(fullName, s)
if i := strings.LastIndexByte(fullName, '('); inParen && i >= 0 {
fullName = fullName[:i]
}
fullName = strings.TrimSuffix(fullName, ".")
}
return strings.TrimSuffix(name, ".")
}

@ -0,0 +1,23 @@
// Copyright 2018, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build purego
package value
import "reflect"
// Pointer is an opaque typed pointer and is guaranteed to be comparable.
type Pointer struct {
p uintptr
t reflect.Type
}
// PointerOf returns a Pointer from v, which must be a
// reflect.Ptr, reflect.Slice, or reflect.Map.
func PointerOf(v reflect.Value) Pointer {
// NOTE: Storing a pointer as an uintptr is technically incorrect as it
// assumes that the GC implementation does not use a moving collector.
return Pointer{v.Pointer(), v.Type()}
}

@ -0,0 +1,26 @@
// Copyright 2018, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build !purego
package value
import (
"reflect"
"unsafe"
)
// Pointer is an opaque typed pointer and is guaranteed to be comparable.
type Pointer struct {
p unsafe.Pointer
t reflect.Type
}
// PointerOf returns a Pointer from v, which must be a
// reflect.Ptr, reflect.Slice, or reflect.Map.
func PointerOf(v reflect.Value) Pointer {
// The proper representation of a pointer is unsafe.Pointer,
// which is necessary if the GC ever uses a moving collector.
return Pointer{unsafe.Pointer(v.Pointer()), v.Type()}
}

@ -0,0 +1,106 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package value
import (
"fmt"
"math"
"reflect"
"sort"
)
// SortKeys sorts a list of map keys, deduplicating keys if necessary.
// The type of each value must be comparable.
func SortKeys(vs []reflect.Value) []reflect.Value {
if len(vs) == 0 {
return vs
}
// Sort the map keys.
sort.SliceStable(vs, func(i, j int) bool { return isLess(vs[i], vs[j]) })
// Deduplicate keys (fails for NaNs).
vs2 := vs[:1]
for _, v := range vs[1:] {
if isLess(vs2[len(vs2)-1], v) {
vs2 = append(vs2, v)
}
}
return vs2
}
// isLess is a generic function for sorting arbitrary map keys.
// The inputs must be of the same type and must be comparable.
func isLess(x, y reflect.Value) bool {
switch x.Type().Kind() {
case reflect.Bool:
return !x.Bool() && y.Bool()
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return x.Int() < y.Int()
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
return x.Uint() < y.Uint()
case reflect.Float32, reflect.Float64:
// NOTE: This does not sort -0 as less than +0
// since Go maps treat -0 and +0 as equal keys.
fx, fy := x.Float(), y.Float()
return fx < fy || math.IsNaN(fx) && !math.IsNaN(fy)
case reflect.Complex64, reflect.Complex128:
cx, cy := x.Complex(), y.Complex()
rx, ix, ry, iy := real(cx), imag(cx), real(cy), imag(cy)
if rx == ry || (math.IsNaN(rx) && math.IsNaN(ry)) {
return ix < iy || math.IsNaN(ix) && !math.IsNaN(iy)
}
return rx < ry || math.IsNaN(rx) && !math.IsNaN(ry)
case reflect.Ptr, reflect.UnsafePointer, reflect.Chan:
return x.Pointer() < y.Pointer()
case reflect.String:
return x.String() < y.String()
case reflect.Array:
for i := 0; i < x.Len(); i++ {
if isLess(x.Index(i), y.Index(i)) {
return true
}
if isLess(y.Index(i), x.Index(i)) {
return false
}
}
return false
case reflect.Struct:
for i := 0; i < x.NumField(); i++ {
if isLess(x.Field(i), y.Field(i)) {
return true
}
if isLess(y.Field(i), x.Field(i)) {
return false
}
}
return false
case reflect.Interface:
vx, vy := x.Elem(), y.Elem()
if !vx.IsValid() || !vy.IsValid() {
return !vx.IsValid() && vy.IsValid()
}
tx, ty := vx.Type(), vy.Type()
if tx == ty {
return isLess(x.Elem(), y.Elem())
}
if tx.Kind() != ty.Kind() {
return vx.Kind() < vy.Kind()
}
if tx.String() != ty.String() {
return tx.String() < ty.String()
}
if tx.PkgPath() != ty.PkgPath() {
return tx.PkgPath() < ty.PkgPath()
}
// This can happen in rare situations, so we fallback to just comparing
// the unique pointer for a reflect.Type. This guarantees deterministic
// ordering within a program, but it is obviously not stable.
return reflect.ValueOf(vx.Type()).Pointer() < reflect.ValueOf(vy.Type()).Pointer()
default:
// Must be Func, Map, or Slice; which are not comparable.
panic(fmt.Sprintf("%T is not comparable", x.Type()))
}
}

@ -0,0 +1,48 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package value
import (
"math"
"reflect"
)
// IsZero reports whether v is the zero value.
// This does not rely on Interface and so can be used on unexported fields.
func IsZero(v reflect.Value) bool {
switch v.Kind() {
case reflect.Bool:
return v.Bool() == false
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return v.Int() == 0
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
return v.Uint() == 0
case reflect.Float32, reflect.Float64:
return math.Float64bits(v.Float()) == 0
case reflect.Complex64, reflect.Complex128:
return math.Float64bits(real(v.Complex())) == 0 && math.Float64bits(imag(v.Complex())) == 0
case reflect.String:
return v.String() == ""
case reflect.UnsafePointer:
return v.Pointer() == 0
case reflect.Chan, reflect.Func, reflect.Interface, reflect.Ptr, reflect.Map, reflect.Slice:
return v.IsNil()
case reflect.Array:
for i := 0; i < v.Len(); i++ {
if !IsZero(v.Index(i)) {
return false
}
}
return true
case reflect.Struct:
for i := 0; i < v.NumField(); i++ {
if !IsZero(v.Field(i)) {
return false
}
}
return true
}
return false
}

@ -0,0 +1,524 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package cmp
import (
"fmt"
"reflect"
"regexp"
"strings"
"github.com/google/go-cmp/cmp/internal/function"
)
// Option configures for specific behavior of Equal and Diff. In particular,
// the fundamental Option functions (Ignore, Transformer, and Comparer),
// configure how equality is determined.
//
// The fundamental options may be composed with filters (FilterPath and
// FilterValues) to control the scope over which they are applied.
//
// The cmp/cmpopts package provides helper functions for creating options that
// may be used with Equal and Diff.
type Option interface {
// filter applies all filters and returns the option that remains.
// Each option may only read s.curPath and call s.callTTBFunc.
//
// An Options is returned only if multiple comparers or transformers
// can apply simultaneously and will only contain values of those types
// or sub-Options containing values of those types.
filter(s *state, t reflect.Type, vx, vy reflect.Value) applicableOption
}
// applicableOption represents the following types:
// Fundamental: ignore | validator | *comparer | *transformer
// Grouping: Options
type applicableOption interface {
Option
// apply executes the option, which may mutate s or panic.
apply(s *state, vx, vy reflect.Value)
}
// coreOption represents the following types:
// Fundamental: ignore | validator | *comparer | *transformer
// Filters: *pathFilter | *valuesFilter
type coreOption interface {
Option
isCore()
}
type core struct{}
func (core) isCore() {}
// Options is a list of Option values that also satisfies the Option interface.
// Helper comparison packages may return an Options value when packing multiple
// Option values into a single Option. When this package processes an Options,
// it will be implicitly expanded into a flat list.
//
// Applying a filter on an Options is equivalent to applying that same filter
// on all individual options held within.
type Options []Option
func (opts Options) filter(s *state, t reflect.Type, vx, vy reflect.Value) (out applicableOption) {
for _, opt := range opts {
switch opt := opt.filter(s, t, vx, vy); opt.(type) {
case ignore:
return ignore{} // Only ignore can short-circuit evaluation
case validator:
out = validator{} // Takes precedence over comparer or transformer
case *comparer, *transformer, Options:
switch out.(type) {
case nil:
out = opt
case validator:
// Keep validator
case *comparer, *transformer, Options:
out = Options{out, opt} // Conflicting comparers or transformers
}
}
}
return out
}
func (opts Options) apply(s *state, _, _ reflect.Value) {
const warning = "ambiguous set of applicable options"
const help = "consider using filters to ensure at most one Comparer or Transformer may apply"
var ss []string
for _, opt := range flattenOptions(nil, opts) {
ss = append(ss, fmt.Sprint(opt))
}
set := strings.Join(ss, "\n\t")
panic(fmt.Sprintf("%s at %#v:\n\t%s\n%s", warning, s.curPath, set, help))
}
func (opts Options) String() string {
var ss []string
for _, opt := range opts {
ss = append(ss, fmt.Sprint(opt))
}
return fmt.Sprintf("Options{%s}", strings.Join(ss, ", "))
}
// FilterPath returns a new Option where opt is only evaluated if filter f
// returns true for the current Path in the value tree.
//
// This filter is called even if a slice element or map entry is missing and
// provides an opportunity to ignore such cases. The filter function must be
// symmetric such that the filter result is identical regardless of whether the
// missing value is from x or y.
//
// The option passed in may be an Ignore, Transformer, Comparer, Options, or
// a previously filtered Option.
func FilterPath(f func(Path) bool, opt Option) Option {
if f == nil {
panic("invalid path filter function")
}
if opt := normalizeOption(opt); opt != nil {
return &pathFilter{fnc: f, opt: opt}
}
return nil
}
type pathFilter struct {
core
fnc func(Path) bool
opt Option
}
func (f pathFilter) filter(s *state, t reflect.Type, vx, vy reflect.Value) applicableOption {
if f.fnc(s.curPath) {
return f.opt.filter(s, t, vx, vy)
}
return nil
}
func (f pathFilter) String() string {
return fmt.Sprintf("FilterPath(%s, %v)", function.NameOf(reflect.ValueOf(f.fnc)), f.opt)
}
// FilterValues returns a new Option where opt is only evaluated if filter f,
// which is a function of the form "func(T, T) bool", returns true for the
// current pair of values being compared. If either value is invalid or
// the type of the values is not assignable to T, then this filter implicitly
// returns false.
//
// The filter function must be
// symmetric (i.e., agnostic to the order of the inputs) and
// deterministic (i.e., produces the same result when given the same inputs).
// If T is an interface, it is possible that f is called with two values with
// different concrete types that both implement T.
//
// The option passed in may be an Ignore, Transformer, Comparer, Options, or
// a previously filtered Option.
func FilterValues(f interface{}, opt Option) Option {
v := reflect.ValueOf(f)
if !function.IsType(v.Type(), function.ValueFilter) || v.IsNil() {
panic(fmt.Sprintf("invalid values filter function: %T", f))
}
if opt := normalizeOption(opt); opt != nil {
vf := &valuesFilter{fnc: v, opt: opt}
if ti := v.Type().In(0); ti.Kind() != reflect.Interface || ti.NumMethod() > 0 {
vf.typ = ti
}
return vf
}
return nil
}
type valuesFilter struct {
core
typ reflect.Type // T
fnc reflect.Value // func(T, T) bool
opt Option
}
func (f valuesFilter) filter(s *state, t reflect.Type, vx, vy reflect.Value) applicableOption {
if !vx.IsValid() || !vx.CanInterface() || !vy.IsValid() || !vy.CanInterface() {
return nil
}
if (f.typ == nil || t.AssignableTo(f.typ)) && s.callTTBFunc(f.fnc, vx, vy) {
return f.opt.filter(s, t, vx, vy)
}
return nil
}
func (f valuesFilter) String() string {
return fmt.Sprintf("FilterValues(%s, %v)", function.NameOf(f.fnc), f.opt)
}
// Ignore is an Option that causes all comparisons to be ignored.
// This value is intended to be combined with FilterPath or FilterValues.
// It is an error to pass an unfiltered Ignore option to Equal.
func Ignore() Option { return ignore{} }
type ignore struct{ core }
func (ignore) isFiltered() bool { return false }
func (ignore) filter(_ *state, _ reflect.Type, _, _ reflect.Value) applicableOption { return ignore{} }
func (ignore) apply(s *state, _, _ reflect.Value) { s.report(true, reportByIgnore) }
func (ignore) String() string { return "Ignore()" }
// validator is a sentinel Option type to indicate that some options could not
// be evaluated due to unexported fields, missing slice elements, or
// missing map entries. Both values are validator only for unexported fields.
type validator struct{ core }
func (validator) filter(_ *state, _ reflect.Type, vx, vy reflect.Value) applicableOption {
if !vx.IsValid() || !vy.IsValid() {
return validator{}
}
if !vx.CanInterface() || !vy.CanInterface() {
return validator{}
}
return nil
}
func (validator) apply(s *state, vx, vy reflect.Value) {
// Implies missing slice element or map entry.
if !vx.IsValid() || !vy.IsValid() {
s.report(vx.IsValid() == vy.IsValid(), 0)
return
}
// Unable to Interface implies unexported field without visibility access.
if !vx.CanInterface() || !vy.CanInterface() {
const help = "consider using a custom Comparer; if you control the implementation of type, you can also consider AllowUnexported or cmpopts.IgnoreUnexported"
panic(fmt.Sprintf("cannot handle unexported field: %#v\n%s", s.curPath, help))
}
panic("not reachable")
}
// identRx represents a valid identifier according to the Go specification.
const identRx = `[_\p{L}][_\p{L}\p{N}]*`
var identsRx = regexp.MustCompile(`^` + identRx + `(\.` + identRx + `)*$`)
// Transformer returns an Option that applies a transformation function that
// converts values of a certain type into that of another.
//
// The transformer f must be a function "func(T) R" that converts values of
// type T to those of type R and is implicitly filtered to input values
// assignable to T. The transformer must not mutate T in any way.
//
// To help prevent some cases of infinite recursive cycles applying the
// same transform to the output of itself (e.g., in the case where the
// input and output types are the same), an implicit filter is added such that
// a transformer is applicable only if that exact transformer is not already
// in the tail of the Path since the last non-Transform step.
// For situations where the implicit filter is still insufficient,
// consider using cmpopts.AcyclicTransformer, which adds a filter
// to prevent the transformer from being recursively applied upon itself.
//
// The name is a user provided label that is used as the Transform.Name in the
// transformation PathStep (and eventually shown in the Diff output).
// The name must be a valid identifier or qualified identifier in Go syntax.
// If empty, an arbitrary name is used.
func Transformer(name string, f interface{}) Option {
v := reflect.ValueOf(f)
if !function.IsType(v.Type(), function.Transformer) || v.IsNil() {
panic(fmt.Sprintf("invalid transformer function: %T", f))
}
if name == "" {
name = function.NameOf(v)
if !identsRx.MatchString(name) {
name = "λ" // Lambda-symbol as placeholder name
}
} else if !identsRx.MatchString(name) {
panic(fmt.Sprintf("invalid name: %q", name))
}
tr := &transformer{name: name, fnc: reflect.ValueOf(f)}
if ti := v.Type().In(0); ti.Kind() != reflect.Interface || ti.NumMethod() > 0 {
tr.typ = ti
}
return tr
}
type transformer struct {
core
name string
typ reflect.Type // T
fnc reflect.Value // func(T) R
}
func (tr *transformer) isFiltered() bool { return tr.typ != nil }
func (tr *transformer) filter(s *state, t reflect.Type, _, _ reflect.Value) applicableOption {
for i := len(s.curPath) - 1; i >= 0; i-- {
if t, ok := s.curPath[i].(Transform); !ok {
break // Hit most recent non-Transform step
} else if tr == t.trans {
return nil // Cannot directly use same Transform
}
}
if tr.typ == nil || t.AssignableTo(tr.typ) {
return tr
}
return nil
}
func (tr *transformer) apply(s *state, vx, vy reflect.Value) {
step := Transform{&transform{pathStep{typ: tr.fnc.Type().Out(0)}, tr}}
vvx := s.callTRFunc(tr.fnc, vx, step)
vvy := s.callTRFunc(tr.fnc, vy, step)
step.vx, step.vy = vvx, vvy
s.compareAny(step)
}
func (tr transformer) String() string {
return fmt.Sprintf("Transformer(%s, %s)", tr.name, function.NameOf(tr.fnc))
}
// Comparer returns an Option that determines whether two values are equal
// to each other.
//
// The comparer f must be a function "func(T, T) bool" and is implicitly
// filtered to input values assignable to T. If T is an interface, it is
// possible that f is called with two values of different concrete types that
// both implement T.
//
// The equality function must be:
// • Symmetric: equal(x, y) == equal(y, x)
// • Deterministic: equal(x, y) == equal(x, y)
// • Pure: equal(x, y) does not modify x or y
func Comparer(f interface{}) Option {
v := reflect.ValueOf(f)
if !function.IsType(v.Type(), function.Equal) || v.IsNil() {
panic(fmt.Sprintf("invalid comparer function: %T", f))
}
cm := &comparer{fnc: v}
if ti := v.Type().In(0); ti.Kind() != reflect.Interface || ti.NumMethod() > 0 {
cm.typ = ti
}
return cm
}
type comparer struct {
core
typ reflect.Type // T
fnc reflect.Value // func(T, T) bool
}
func (cm *comparer) isFiltered() bool { return cm.typ != nil }
func (cm *comparer) filter(_ *state, t reflect.Type, _, _ reflect.Value) applicableOption {
if cm.typ == nil || t.AssignableTo(cm.typ) {
return cm
}
return nil
}
func (cm *comparer) apply(s *state, vx, vy reflect.Value) {
eq := s.callTTBFunc(cm.fnc, vx, vy)
s.report(eq, reportByFunc)
}
func (cm comparer) String() string {
return fmt.Sprintf("Comparer(%s)", function.NameOf(cm.fnc))
}
// AllowUnexported returns an Option that forcibly allows operations on
// unexported fields in certain structs, which are specified by passing in a
// value of each struct type.
//
// Users of this option must understand that comparing on unexported fields
// from external packages is not safe since changes in the internal
// implementation of some external package may cause the result of Equal
// to unexpectedly change. However, it may be valid to use this option on types
// defined in an internal package where the semantic meaning of an unexported
// field is in the control of the user.
//
// In many cases, a custom Comparer should be used instead that defines
// equality as a function of the public API of a type rather than the underlying
// unexported implementation.
//
// For example, the reflect.Type documentation defines equality to be determined
// by the == operator on the interface (essentially performing a shallow pointer
// comparison) and most attempts to compare *regexp.Regexp types are interested
// in only checking that the regular expression strings are equal.
// Both of these are accomplished using Comparers:
//
// Comparer(func(x, y reflect.Type) bool { return x == y })
// Comparer(func(x, y *regexp.Regexp) bool { return x.String() == y.String() })
//
// In other cases, the cmpopts.IgnoreUnexported option can be used to ignore
// all unexported fields on specified struct types.
func AllowUnexported(types ...interface{}) Option {
if !supportAllowUnexported {
panic("AllowUnexported is not supported on purego builds, Google App Engine Standard, or GopherJS")
}
m := make(map[reflect.Type]bool)
for _, typ := range types {
t := reflect.TypeOf(typ)
if t.Kind() != reflect.Struct {
panic(fmt.Sprintf("invalid struct type: %T", typ))
}
m[t] = true
}
return visibleStructs(m)
}
type visibleStructs map[reflect.Type]bool
func (visibleStructs) filter(_ *state, _ reflect.Type, _, _ reflect.Value) applicableOption {
panic("not implemented")
}
// Result represents the comparison result for a single node and
// is provided by cmp when calling Result (see Reporter).
type Result struct {
_ [0]func() // Make Result incomparable
flags resultFlags
}
// Equal reports whether the node was determined to be equal or not.
// As a special case, ignored nodes are considered equal.
func (r Result) Equal() bool {
return r.flags&(reportEqual|reportByIgnore) != 0
}
// ByIgnore reports whether the node is equal because it was ignored.
// This never reports true if Equal reports false.
func (r Result) ByIgnore() bool {
return r.flags&reportByIgnore != 0
}
// ByMethod reports whether the Equal method determined equality.
func (r Result) ByMethod() bool {
return r.flags&reportByMethod != 0
}
// ByFunc reports whether a Comparer function determined equality.
func (r Result) ByFunc() bool {
return r.flags&reportByFunc != 0
}
type resultFlags uint
const (
_ resultFlags = (1 << iota) / 2
reportEqual
reportUnequal
reportByIgnore
reportByMethod
reportByFunc
)
// Reporter is an Option that can be passed to Equal. When Equal traverses
// the value trees, it calls PushStep as it descends into each node in the
// tree and PopStep as it ascend out of the node. The leaves of the tree are
// either compared (determined to be equal or not equal) or ignored and reported
// as such by calling the Report method.
func Reporter(r interface {
// PushStep is called when a tree-traversal operation is performed.
// The PathStep itself is only valid until the step is popped.
// The PathStep.Values are valid for the duration of the entire traversal
// and must not be mutated.
//
// Equal always calls PushStep at the start to provide an operation-less
// PathStep used to report the root values.
//
// Within a slice, the exact set of inserted, removed, or modified elements
// is unspecified and may change in future implementations.
// The entries of a map are iterated through in an unspecified order.
PushStep(PathStep)
// Report is called exactly once on leaf nodes to report whether the
// comparison identified the node as equal, unequal, or ignored.
// A leaf node is one that is immediately preceded by and followed by
// a pair of PushStep and PopStep calls.
Report(Result)
// PopStep ascends back up the value tree.
// There is always a matching pop call for every push call.
PopStep()
}) Option {
return reporter{r}
}
type reporter struct{ reporterIface }
type reporterIface interface {
PushStep(PathStep)
Report(Result)
PopStep()
}
func (reporter) filter(_ *state, _ reflect.Type, _, _ reflect.Value) applicableOption {
panic("not implemented")
}
// normalizeOption normalizes the input options such that all Options groups
// are flattened and groups with a single element are reduced to that element.
// Only coreOptions and Options containing coreOptions are allowed.
func normalizeOption(src Option) Option {
switch opts := flattenOptions(nil, Options{src}); len(opts) {
case 0:
return nil
case 1:
return opts[0]
default:
return opts
}
}
// flattenOptions copies all options in src to dst as a flat list.
// Only coreOptions and Options containing coreOptions are allowed.
func flattenOptions(dst, src Options) Options {
for _, opt := range src {
switch opt := opt.(type) {
case nil:
continue
case Options:
dst = flattenOptions(dst, opt)
case coreOption:
dst = append(dst, opt)
default:
panic(fmt.Sprintf("invalid option type: %T", opt))
}
}
return dst
}

@ -0,0 +1,308 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package cmp
import (
"fmt"
"reflect"
"strings"
"unicode"
"unicode/utf8"
)
// Path is a list of PathSteps describing the sequence of operations to get
// from some root type to the current position in the value tree.
// The first Path element is always an operation-less PathStep that exists
// simply to identify the initial type.
//
// When traversing structs with embedded structs, the embedded struct will
// always be accessed as a field before traversing the fields of the
// embedded struct themselves. That is, an exported field from the
// embedded struct will never be accessed directly from the parent struct.
type Path []PathStep
// PathStep is a union-type for specific operations to traverse
// a value's tree structure. Users of this package never need to implement
// these types as values of this type will be returned by this package.
//
// Implementations of this interface are
// StructField, SliceIndex, MapIndex, Indirect, TypeAssertion, and Transform.
type PathStep interface {
String() string
// Type is the resulting type after performing the path step.
Type() reflect.Type
// Values is the resulting values after performing the path step.
// The type of each valid value is guaranteed to be identical to Type.
//
// In some cases, one or both may be invalid or have restrictions:
// • For StructField, both are not interface-able if the current field
// is unexported and the struct type is not explicitly permitted by
// AllowUnexported to traverse unexported fields.
// • For SliceIndex, one may be invalid if an element is missing from
// either the x or y slice.
// • For MapIndex, one may be invalid if an entry is missing from
// either the x or y map.
//
// The provided values must not be mutated.
Values() (vx, vy reflect.Value)
}
var (
_ PathStep = StructField{}
_ PathStep = SliceIndex{}
_ PathStep = MapIndex{}
_ PathStep = Indirect{}
_ PathStep = TypeAssertion{}
_ PathStep = Transform{}
)
func (pa *Path) push(s PathStep) {
*pa = append(*pa, s)
}
func (pa *Path) pop() {
*pa = (*pa)[:len(*pa)-1]
}
// Last returns the last PathStep in the Path.
// If the path is empty, this returns a non-nil PathStep that reports a nil Type.
func (pa Path) Last() PathStep {
return pa.Index(-1)
}
// Index returns the ith step in the Path and supports negative indexing.
// A negative index starts counting from the tail of the Path such that -1
// refers to the last step, -2 refers to the second-to-last step, and so on.
// If index is invalid, this returns a non-nil PathStep that reports a nil Type.
func (pa Path) Index(i int) PathStep {
if i < 0 {
i = len(pa) + i
}
if i < 0 || i >= len(pa) {
return pathStep{}
}
return pa[i]
}
// String returns the simplified path to a node.
// The simplified path only contains struct field accesses.
//
// For example:
// MyMap.MySlices.MyField
func (pa Path) String() string {
var ss []string
for _, s := range pa {
if _, ok := s.(StructField); ok {
ss = append(ss, s.String())
}
}
return strings.TrimPrefix(strings.Join(ss, ""), ".")
}
// GoString returns the path to a specific node using Go syntax.
//
// For example:
// (*root.MyMap["key"].(*mypkg.MyStruct).MySlices)[2][3].MyField
func (pa Path) GoString() string {
var ssPre, ssPost []string
var numIndirect int
for i, s := range pa {
var nextStep PathStep
if i+1 < len(pa) {
nextStep = pa[i+1]
}
switch s := s.(type) {
case Indirect:
numIndirect++
pPre, pPost := "(", ")"
switch nextStep.(type) {
case Indirect:
continue // Next step is indirection, so let them batch up
case StructField:
numIndirect-- // Automatic indirection on struct fields
case nil:
pPre, pPost = "", "" // Last step; no need for parenthesis
}
if numIndirect > 0 {
ssPre = append(ssPre, pPre+strings.Repeat("*", numIndirect))
ssPost = append(ssPost, pPost)
}
numIndirect = 0
continue
case Transform:
ssPre = append(ssPre, s.trans.name+"(")
ssPost = append(ssPost, ")")
continue
}
ssPost = append(ssPost, s.String())
}
for i, j := 0, len(ssPre)-1; i < j; i, j = i+1, j-1 {
ssPre[i], ssPre[j] = ssPre[j], ssPre[i]
}
return strings.Join(ssPre, "") + strings.Join(ssPost, "")
}
type pathStep struct {
typ reflect.Type
vx, vy reflect.Value
}
func (ps pathStep) Type() reflect.Type { return ps.typ }
func (ps pathStep) Values() (vx, vy reflect.Value) { return ps.vx, ps.vy }
func (ps pathStep) String() string {
if ps.typ == nil {
return "<nil>"
}
s := ps.typ.String()
if s == "" || strings.ContainsAny(s, "{}\n") {
return "root" // Type too simple or complex to print
}
return fmt.Sprintf("{%s}", s)
}
// StructField represents a struct field access on a field called Name.
type StructField struct{ *structField }
type structField struct {
pathStep
name string
idx int
// These fields are used for forcibly accessing an unexported field.
// pvx, pvy, and field are only valid if unexported is true.
unexported bool
mayForce bool // Forcibly allow visibility
pvx, pvy reflect.Value // Parent values
field reflect.StructField // Field information
}
func (sf StructField) Type() reflect.Type { return sf.typ }
func (sf StructField) Values() (vx, vy reflect.Value) {
if !sf.unexported {
return sf.vx, sf.vy // CanInterface reports true
}
// Forcibly obtain read-write access to an unexported struct field.
if sf.mayForce {
vx = retrieveUnexportedField(sf.pvx, sf.field)
vy = retrieveUnexportedField(sf.pvy, sf.field)
return vx, vy // CanInterface reports true
}
return sf.vx, sf.vy // CanInterface reports false
}
func (sf StructField) String() string { return fmt.Sprintf(".%s", sf.name) }
// Name is the field name.
func (sf StructField) Name() string { return sf.name }
// Index is the index of the field in the parent struct type.
// See reflect.Type.Field.
func (sf StructField) Index() int { return sf.idx }
// SliceIndex is an index operation on a slice or array at some index Key.
type SliceIndex struct{ *sliceIndex }
type sliceIndex struct {
pathStep
xkey, ykey int
}
func (si SliceIndex) Type() reflect.Type { return si.typ }
func (si SliceIndex) Values() (vx, vy reflect.Value) { return si.vx, si.vy }
func (si SliceIndex) String() string {
switch {
case si.xkey == si.ykey:
return fmt.Sprintf("[%d]", si.xkey)
case si.ykey == -1:
// [5->?] means "I don't know where X[5] went"
return fmt.Sprintf("[%d->?]", si.xkey)
case si.xkey == -1:
// [?->3] means "I don't know where Y[3] came from"
return fmt.Sprintf("[?->%d]", si.ykey)
default:
// [5->3] means "X[5] moved to Y[3]"
return fmt.Sprintf("[%d->%d]", si.xkey, si.ykey)
}
}
// Key is the index key; it may return -1 if in a split state
func (si SliceIndex) Key() int {
if si.xkey != si.ykey {
return -1
}
return si.xkey
}
// SplitKeys are the indexes for indexing into slices in the
// x and y values, respectively. These indexes may differ due to the
// insertion or removal of an element in one of the slices, causing
// all of the indexes to be shifted. If an index is -1, then that
// indicates that the element does not exist in the associated slice.
//
// Key is guaranteed to return -1 if and only if the indexes returned
// by SplitKeys are not the same. SplitKeys will never return -1 for
// both indexes.
func (si SliceIndex) SplitKeys() (ix, iy int) { return si.xkey, si.ykey }
// MapIndex is an index operation on a map at some index Key.
type MapIndex struct{ *mapIndex }
type mapIndex struct {
pathStep
key reflect.Value
}
func (mi MapIndex) Type() reflect.Type { return mi.typ }
func (mi MapIndex) Values() (vx, vy reflect.Value) { return mi.vx, mi.vy }
func (mi MapIndex) String() string { return fmt.Sprintf("[%#v]", mi.key) }
// Key is the value of the map key.
func (mi MapIndex) Key() reflect.Value { return mi.key }
// Indirect represents pointer indirection on the parent type.
type Indirect struct{ *indirect }
type indirect struct {
pathStep
}
func (in Indirect) Type() reflect.Type { return in.typ }
func (in Indirect) Values() (vx, vy reflect.Value) { return in.vx, in.vy }
func (in Indirect) String() string { return "*" }
// TypeAssertion represents a type assertion on an interface.
type TypeAssertion struct{ *typeAssertion }
type typeAssertion struct {
pathStep
}
func (ta TypeAssertion) Type() reflect.Type { return ta.typ }
func (ta TypeAssertion) Values() (vx, vy reflect.Value) { return ta.vx, ta.vy }
func (ta TypeAssertion) String() string { return fmt.Sprintf(".(%v)", ta.typ) }
// Transform is a transformation from the parent type to the current type.
type Transform struct{ *transform }
type transform struct {
pathStep
trans *transformer
}
func (tf Transform) Type() reflect.Type { return tf.typ }
func (tf Transform) Values() (vx, vy reflect.Value) { return tf.vx, tf.vy }
func (tf Transform) String() string { return fmt.Sprintf("%s()", tf.trans.name) }
// Name is the name of the Transformer.
func (tf Transform) Name() string { return tf.trans.name }
// Func is the function pointer to the transformer function.
func (tf Transform) Func() reflect.Value { return tf.trans.fnc }
// Option returns the originally constructed Transformer option.
// The == operator can be used to detect the exact option used.
func (tf Transform) Option() Option { return tf.trans }
// isExported reports whether the identifier is exported.
func isExported(id string) bool {
r, _ := utf8.DecodeRuneInString(id)
return unicode.IsUpper(r)
}

@ -0,0 +1,51 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package cmp
// defaultReporter implements the reporter interface.
//
// As Equal serially calls the PushStep, Report, and PopStep methods, the
// defaultReporter constructs a tree-based representation of the compared value
// and the result of each comparison (see valueNode).
//
// When the String method is called, the FormatDiff method transforms the
// valueNode tree into a textNode tree, which is a tree-based representation
// of the textual output (see textNode).
//
// Lastly, the textNode.String method produces the final report as a string.
type defaultReporter struct {
root *valueNode
curr *valueNode
}
func (r *defaultReporter) PushStep(ps PathStep) {
r.curr = r.curr.PushStep(ps)
if r.root == nil {
r.root = r.curr
}
}
func (r *defaultReporter) Report(rs Result) {
r.curr.Report(rs)
}
func (r *defaultReporter) PopStep() {
r.curr = r.curr.PopStep()
}
// String provides a full report of the differences detected as a structured
// literal in pseudo-Go syntax. String may only be called after the entire tree
// has been traversed.
func (r *defaultReporter) String() string {
assert(r.root != nil && r.curr == nil)
if r.root.NumDiff == 0 {
return ""
}
return formatOptions{}.FormatDiff(r.root).String()
}
func assert(ok bool) {
if !ok {
panic("assertion failure")
}
}

@ -0,0 +1,296 @@
// Copyright 2019, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package cmp
import (
"fmt"
"reflect"
"github.com/google/go-cmp/cmp/internal/value"
)
// TODO: Enforce limits?
// * Enforce maximum number of records to print per node?
// * Enforce maximum size in bytes allowed?
// * As a heuristic, use less verbosity for equal nodes than unequal nodes.
// TODO: Enforce unique outputs?
// * Avoid Stringer methods if it results in same output?
// * Print pointer address if outputs still equal?
// numContextRecords is the number of surrounding equal records to print.
const numContextRecords = 2
type diffMode byte
const (
diffUnknown diffMode = 0
diffIdentical diffMode = ' '
diffRemoved diffMode = '-'
diffInserted diffMode = '+'
)
type typeMode int
const (
// emitType always prints the type.
emitType typeMode = iota
// elideType never prints the type.
elideType
// autoType prints the type only for composite kinds
// (i.e., structs, slices, arrays, and maps).
autoType
)
type formatOptions struct {
// DiffMode controls the output mode of FormatDiff.
//
// If diffUnknown, then produce a diff of the x and y values.
// If diffIdentical, then emit values as if they were equal.
// If diffRemoved, then only emit x values (ignoring y values).
// If diffInserted, then only emit y values (ignoring x values).
DiffMode diffMode
// TypeMode controls whether to print the type for the current node.
//
// As a general rule of thumb, we always print the type of the next node
// after an interface, and always elide the type of the next node after
// a slice or map node.
TypeMode typeMode
// formatValueOptions are options specific to printing reflect.Values.
formatValueOptions
}
func (opts formatOptions) WithDiffMode(d diffMode) formatOptions {
opts.DiffMode = d
return opts
}
func (opts formatOptions) WithTypeMode(t typeMode) formatOptions {
opts.TypeMode = t
return opts
}
// FormatDiff converts a valueNode tree into a textNode tree, where the later
// is a textual representation of the differences detected in the former.
func (opts formatOptions) FormatDiff(v *valueNode) textNode {
// Check whether we have specialized formatting for this node.
// This is not necessary, but helpful for producing more readable outputs.
if opts.CanFormatDiffSlice(v) {
return opts.FormatDiffSlice(v)
}
// For leaf nodes, format the value based on the reflect.Values alone.
if v.MaxDepth == 0 {
switch opts.DiffMode {
case diffUnknown, diffIdentical:
// Format Equal.
if v.NumDiff == 0 {
outx := opts.FormatValue(v.ValueX, visitedPointers{})
outy := opts.FormatValue(v.ValueY, visitedPointers{})
if v.NumIgnored > 0 && v.NumSame == 0 {
return textEllipsis
} else if outx.Len() < outy.Len() {
return outx
} else {
return outy
}
}
// Format unequal.
assert(opts.DiffMode == diffUnknown)
var list textList
outx := opts.WithTypeMode(elideType).FormatValue(v.ValueX, visitedPointers{})
outy := opts.WithTypeMode(elideType).FormatValue(v.ValueY, visitedPointers{})
if outx != nil {
list = append(list, textRecord{Diff: '-', Value: outx})
}
if outy != nil {
list = append(list, textRecord{Diff: '+', Value: outy})
}
return opts.WithTypeMode(emitType).FormatType(v.Type, list)
case diffRemoved:
return opts.FormatValue(v.ValueX, visitedPointers{})
case diffInserted:
return opts.FormatValue(v.ValueY, visitedPointers{})
default:
panic("invalid diff mode")
}
}
// Descend into the child value node.
if v.TransformerName != "" {
out := opts.WithTypeMode(emitType).FormatDiff(v.Value)
out = textWrap{"Inverse(" + v.TransformerName + ", ", out, ")"}
return opts.FormatType(v.Type, out)
} else {
switch k := v.Type.Kind(); k {
case reflect.Struct, reflect.Array, reflect.Slice, reflect.Map:
return opts.FormatType(v.Type, opts.formatDiffList(v.Records, k))
case reflect.Ptr:
return textWrap{"&", opts.FormatDiff(v.Value), ""}
case reflect.Interface:
return opts.WithTypeMode(emitType).FormatDiff(v.Value)
default:
panic(fmt.Sprintf("%v cannot have children", k))
}
}
}
func (opts formatOptions) formatDiffList(recs []reportRecord, k reflect.Kind) textNode {
// Derive record name based on the data structure kind.
var name string
var formatKey func(reflect.Value) string
switch k {
case reflect.Struct:
name = "field"
opts = opts.WithTypeMode(autoType)
formatKey = func(v reflect.Value) string { return v.String() }
case reflect.Slice, reflect.Array:
name = "element"
opts = opts.WithTypeMode(elideType)
formatKey = func(reflect.Value) string { return "" }
case reflect.Map:
name = "entry"
opts = opts.WithTypeMode(elideType)
formatKey = formatMapKey
}
// Handle unification.
switch opts.DiffMode {
case diffIdentical, diffRemoved, diffInserted:
var list textList
var deferredEllipsis bool // Add final "..." to indicate records were dropped
for _, r := range recs {
// Elide struct fields that are zero value.
if k == reflect.Struct {
var isZero bool
switch opts.DiffMode {
case diffIdentical:
isZero = value.IsZero(r.Value.ValueX) || value.IsZero(r.Value.ValueY)
case diffRemoved:
isZero = value.IsZero(r.Value.ValueX)
case diffInserted:
isZero = value.IsZero(r.Value.ValueY)
}
if isZero {
continue
}
}
// Elide ignored nodes.
if r.Value.NumIgnored > 0 && r.Value.NumSame+r.Value.NumDiff == 0 {
deferredEllipsis = !(k == reflect.Slice || k == reflect.Array)
if !deferredEllipsis {
list.AppendEllipsis(diffStats{})
}
continue
}
if out := opts.FormatDiff(r.Value); out != nil {
list = append(list, textRecord{Key: formatKey(r.Key), Value: out})
}
}
if deferredEllipsis {
list.AppendEllipsis(diffStats{})
}
return textWrap{"{", list, "}"}
case diffUnknown:
default:
panic("invalid diff mode")
}
// Handle differencing.
var list textList
groups := coalesceAdjacentRecords(name, recs)
for i, ds := range groups {
// Handle equal records.
if ds.NumDiff() == 0 {
// Compute the number of leading and trailing records to print.
var numLo, numHi int
numEqual := ds.NumIgnored + ds.NumIdentical
for numLo < numContextRecords && numLo+numHi < numEqual && i != 0 {
if r := recs[numLo].Value; r.NumIgnored > 0 && r.NumSame+r.NumDiff == 0 {
break
}
numLo++
}
for numHi < numContextRecords && numLo+numHi < numEqual && i != len(groups)-1 {
if r := recs[numEqual-numHi-1].Value; r.NumIgnored > 0 && r.NumSame+r.NumDiff == 0 {
break
}
numHi++
}
if numEqual-(numLo+numHi) == 1 && ds.NumIgnored == 0 {
numHi++ // Avoid pointless coalescing of a single equal record
}
// Format the equal values.
for _, r := range recs[:numLo] {
out := opts.WithDiffMode(diffIdentical).FormatDiff(r.Value)
list = append(list, textRecord{Key: formatKey(r.Key), Value: out})
}
if numEqual > numLo+numHi {
ds.NumIdentical -= numLo + numHi
list.AppendEllipsis(ds)
}
for _, r := range recs[numEqual-numHi : numEqual] {
out := opts.WithDiffMode(diffIdentical).FormatDiff(r.Value)
list = append(list, textRecord{Key: formatKey(r.Key), Value: out})
}
recs = recs[numEqual:]
continue
}
// Handle unequal records.
for _, r := range recs[:ds.NumDiff()] {
switch {
case opts.CanFormatDiffSlice(r.Value):
out := opts.FormatDiffSlice(r.Value)
list = append(list, textRecord{Key: formatKey(r.Key), Value: out})
case r.Value.NumChildren == r.Value.MaxDepth:
outx := opts.WithDiffMode(diffRemoved).FormatDiff(r.Value)
outy := opts.WithDiffMode(diffInserted).FormatDiff(r.Value)
if outx != nil {
list = append(list, textRecord{Diff: diffRemoved, Key: formatKey(r.Key), Value: outx})
}
if outy != nil {
list = append(list, textRecord{Diff: diffInserted, Key: formatKey(r.Key), Value: outy})
}
default:
out := opts.FormatDiff(r.Value)
list = append(list, textRecord{Key: formatKey(r.Key), Value: out})
}
}
recs = recs[ds.NumDiff():]
}
assert(len(recs) == 0)
return textWrap{"{", list, "}"}
}
// coalesceAdjacentRecords coalesces the list of records into groups of
// adjacent equal, or unequal counts.
func coalesceAdjacentRecords(name string, recs []reportRecord) (groups []diffStats) {
var prevCase int // Arbitrary index into which case last occurred
lastStats := func(i int) *diffStats {
if prevCase != i {
groups = append(groups, diffStats{Name: name})
prevCase = i
}
return &groups[len(groups)-1]
}
for _, r := range recs {
switch rv := r.Value; {
case rv.NumIgnored > 0 && rv.NumSame+rv.NumDiff == 0:
lastStats(1).NumIgnored++
case rv.NumDiff == 0:
lastStats(1).NumIdentical++
case rv.NumDiff > 0 && !rv.ValueY.IsValid():
lastStats(2).NumRemoved++
case rv.NumDiff > 0 && !rv.ValueX.IsValid():
lastStats(2).NumInserted++
default:
lastStats(2).NumModified++
}
}
return groups
}

@ -0,0 +1,278 @@
// Copyright 2019, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package cmp
import (
"fmt"
"reflect"
"strconv"
"strings"
"unicode"
"github.com/google/go-cmp/cmp/internal/flags"
"github.com/google/go-cmp/cmp/internal/value"
)
type formatValueOptions struct {
// AvoidStringer controls whether to avoid calling custom stringer
// methods like error.Error or fmt.Stringer.String.
AvoidStringer bool
// ShallowPointers controls whether to avoid descending into pointers.
// Useful when printing map keys, where pointer comparison is performed
// on the pointer address rather than the pointed-at value.
ShallowPointers bool
// PrintAddresses controls whether to print the address of all pointers,
// slice elements, and maps.
PrintAddresses bool
}
// FormatType prints the type as if it were wrapping s.
// This may return s as-is depending on the current type and TypeMode mode.
func (opts formatOptions) FormatType(t reflect.Type, s textNode) textNode {
// Check whether to emit the type or not.
switch opts.TypeMode {
case autoType:
switch t.Kind() {
case reflect.Struct, reflect.Slice, reflect.Array, reflect.Map:
if s.Equal(textNil) {
return s
}
default:
return s
}
case elideType:
return s
}
// Determine the type label, applying special handling for unnamed types.
typeName := t.String()
if t.Name() == "" {
// According to Go grammar, certain type literals contain symbols that
// do not strongly bind to the next lexicographical token (e.g., *T).
switch t.Kind() {
case reflect.Chan, reflect.Func, reflect.Ptr:
typeName = "(" + typeName + ")"
}
typeName = strings.Replace(typeName, "struct {", "struct{", -1)
typeName = strings.Replace(typeName, "interface {", "interface{", -1)
}
// Avoid wrap the value in parenthesis if unnecessary.
if s, ok := s.(textWrap); ok {
hasParens := strings.HasPrefix(s.Prefix, "(") && strings.HasSuffix(s.Suffix, ")")
hasBraces := strings.HasPrefix(s.Prefix, "{") && strings.HasSuffix(s.Suffix, "}")
if hasParens || hasBraces {
return textWrap{typeName, s, ""}
}
}
return textWrap{typeName + "(", s, ")"}
}
// FormatValue prints the reflect.Value, taking extra care to avoid descending
// into pointers already in m. As pointers are visited, m is also updated.
func (opts formatOptions) FormatValue(v reflect.Value, m visitedPointers) (out textNode) {
if !v.IsValid() {
return nil
}
t := v.Type()
// Check whether there is an Error or String method to call.
if !opts.AvoidStringer && v.CanInterface() {
// Avoid calling Error or String methods on nil receivers since many
// implementations crash when doing so.
if (t.Kind() != reflect.Ptr && t.Kind() != reflect.Interface) || !v.IsNil() {
switch v := v.Interface().(type) {
case error:
return textLine("e" + formatString(v.Error()))
case fmt.Stringer:
return textLine("s" + formatString(v.String()))
}
}
}
// Check whether to explicitly wrap the result with the type.
var skipType bool
defer func() {
if !skipType {
out = opts.FormatType(t, out)
}
}()
var ptr string
switch t.Kind() {
case reflect.Bool:
return textLine(fmt.Sprint(v.Bool()))
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return textLine(fmt.Sprint(v.Int()))
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
// Unnamed uints are usually bytes or words, so use hexadecimal.
if t.PkgPath() == "" || t.Kind() == reflect.Uintptr {
return textLine(formatHex(v.Uint()))
}
return textLine(fmt.Sprint(v.Uint()))
case reflect.Float32, reflect.Float64:
return textLine(fmt.Sprint(v.Float()))
case reflect.Complex64, reflect.Complex128:
return textLine(fmt.Sprint(v.Complex()))
case reflect.String:
return textLine(formatString(v.String()))
case reflect.UnsafePointer, reflect.Chan, reflect.Func:
return textLine(formatPointer(v))
case reflect.Struct:
var list textList
for i := 0; i < v.NumField(); i++ {
vv := v.Field(i)
if value.IsZero(vv) {
continue // Elide fields with zero values
}
s := opts.WithTypeMode(autoType).FormatValue(vv, m)
list = append(list, textRecord{Key: t.Field(i).Name, Value: s})
}
return textWrap{"{", list, "}"}
case reflect.Slice:
if v.IsNil() {
return textNil
}
if opts.PrintAddresses {
ptr = formatPointer(v)
}
fallthrough
case reflect.Array:
var list textList
for i := 0; i < v.Len(); i++ {
vi := v.Index(i)
if vi.CanAddr() { // Check for cyclic elements
p := vi.Addr()
if m.Visit(p) {
var out textNode
out = textLine(formatPointer(p))
out = opts.WithTypeMode(emitType).FormatType(p.Type(), out)
out = textWrap{"*", out, ""}
list = append(list, textRecord{Value: out})
continue
}
}
s := opts.WithTypeMode(elideType).FormatValue(vi, m)
list = append(list, textRecord{Value: s})
}
return textWrap{ptr + "{", list, "}"}
case reflect.Map:
if v.IsNil() {
return textNil
}
if m.Visit(v) {
return textLine(formatPointer(v))
}
var list textList
for _, k := range value.SortKeys(v.MapKeys()) {
sk := formatMapKey(k)
sv := opts.WithTypeMode(elideType).FormatValue(v.MapIndex(k), m)
list = append(list, textRecord{Key: sk, Value: sv})
}
if opts.PrintAddresses {
ptr = formatPointer(v)
}
return textWrap{ptr + "{", list, "}"}
case reflect.Ptr:
if v.IsNil() {
return textNil
}
if m.Visit(v) || opts.ShallowPointers {
return textLine(formatPointer(v))
}
if opts.PrintAddresses {
ptr = formatPointer(v)
}
skipType = true // Let the underlying value print the type instead
return textWrap{"&" + ptr, opts.FormatValue(v.Elem(), m), ""}
case reflect.Interface:
if v.IsNil() {
return textNil
}
// Interfaces accept different concrete types,
// so configure the underlying value to explicitly print the type.
skipType = true // Print the concrete type instead
return opts.WithTypeMode(emitType).FormatValue(v.Elem(), m)
default:
panic(fmt.Sprintf("%v kind not handled", v.Kind()))
}
}
// formatMapKey formats v as if it were a map key.
// The result is guaranteed to be a single line.
func formatMapKey(v reflect.Value) string {
var opts formatOptions
opts.TypeMode = elideType
opts.ShallowPointers = true
s := opts.FormatValue(v, visitedPointers{}).String()
return strings.TrimSpace(s)
}
// formatString prints s as a double-quoted or backtick-quoted string.
func formatString(s string) string {
// Use quoted string if it the same length as a raw string literal.
// Otherwise, attempt to use the raw string form.
qs := strconv.Quote(s)
if len(qs) == 1+len(s)+1 {
return qs
}
// Disallow newlines to ensure output is a single line.
// Only allow printable runes for readability purposes.
rawInvalid := func(r rune) bool {
return r == '`' || r == '\n' || !(unicode.IsPrint(r) || r == '\t')
}
if strings.IndexFunc(s, rawInvalid) < 0 {
return "`" + s + "`"
}
return qs
}
// formatHex prints u as a hexadecimal integer in Go notation.
func formatHex(u uint64) string {
var f string
switch {
case u <= 0xff:
f = "0x%02x"
case u <= 0xffff:
f = "0x%04x"
case u <= 0xffffff:
f = "0x%06x"
case u <= 0xffffffff:
f = "0x%08x"
case u <= 0xffffffffff:
f = "0x%010x"
case u <= 0xffffffffffff:
f = "0x%012x"
case u <= 0xffffffffffffff:
f = "0x%014x"
case u <= 0xffffffffffffffff:
f = "0x%016x"
}
return fmt.Sprintf(f, u)
}
// formatPointer prints the address of the pointer.
func formatPointer(v reflect.Value) string {
p := v.Pointer()
if flags.Deterministic {
p = 0xdeadf00f // Only used for stable testing purposes
}
return fmt.Sprintf("⟪0x%x⟫", p)
}
type visitedPointers map[value.Pointer]struct{}
// Visit inserts pointer v into the visited map and reports whether it had
// already been visited before.
func (m visitedPointers) Visit(v reflect.Value) bool {
p := value.PointerOf(v)
_, visited := m[p]
m[p] = struct{}{}
return visited
}

@ -0,0 +1,333 @@
// Copyright 2019, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package cmp
import (
"bytes"
"fmt"
"reflect"
"strings"
"unicode"
"unicode/utf8"
"github.com/google/go-cmp/cmp/internal/diff"
)
// CanFormatDiffSlice reports whether we support custom formatting for nodes
// that are slices of primitive kinds or strings.
func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool {
switch {
case opts.DiffMode != diffUnknown:
return false // Must be formatting in diff mode
case v.NumDiff == 0:
return false // No differences detected
case v.NumIgnored+v.NumCompared+v.NumTransformed > 0:
// TODO: Handle the case where someone uses bytes.Equal on a large slice.
return false // Some custom option was used to determined equality
case !v.ValueX.IsValid() || !v.ValueY.IsValid():
return false // Both values must be valid
}
switch t := v.Type; t.Kind() {
case reflect.String:
case reflect.Array, reflect.Slice:
// Only slices of primitive types have specialized handling.
switch t.Elem().Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
default:
return false
}
// If a sufficient number of elements already differ,
// use specialized formatting even if length requirement is not met.
if v.NumDiff > v.NumSame {
return true
}
default:
return false
}
// Use specialized string diffing for longer slices or strings.
const minLength = 64
return v.ValueX.Len() >= minLength && v.ValueY.Len() >= minLength
}
// FormatDiffSlice prints a diff for the slices (or strings) represented by v.
// This provides custom-tailored logic to make printing of differences in
// textual strings and slices of primitive kinds more readable.
func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
assert(opts.DiffMode == diffUnknown)
t, vx, vy := v.Type, v.ValueX, v.ValueY
// Auto-detect the type of the data.
var isLinedText, isText, isBinary bool
var sx, sy string
switch {
case t.Kind() == reflect.String:
sx, sy = vx.String(), vy.String()
isText = true // Initial estimate, verify later
case t.Kind() == reflect.Slice && t.Elem() == reflect.TypeOf(byte(0)):
sx, sy = string(vx.Bytes()), string(vy.Bytes())
isBinary = true // Initial estimate, verify later
case t.Kind() == reflect.Array:
// Arrays need to be addressable for slice operations to work.
vx2, vy2 := reflect.New(t).Elem(), reflect.New(t).Elem()
vx2.Set(vx)
vy2.Set(vy)
vx, vy = vx2, vy2
}
if isText || isBinary {
var numLines, lastLineIdx, maxLineLen int
isBinary = false
for i, r := range sx + sy {
if !(unicode.IsPrint(r) || unicode.IsSpace(r)) || r == utf8.RuneError {
isBinary = true
break
}
if r == '\n' {
if maxLineLen < i-lastLineIdx {
maxLineLen = i - lastLineIdx
}
lastLineIdx = i + 1
numLines++
}
}
isText = !isBinary
isLinedText = isText && numLines >= 4 && maxLineLen <= 256
}
// Format the string into printable records.
var list textList
var delim string
switch {
// If the text appears to be multi-lined text,
// then perform differencing across individual lines.
case isLinedText:
ssx := strings.Split(sx, "\n")
ssy := strings.Split(sy, "\n")
list = opts.formatDiffSlice(
reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line",
func(v reflect.Value, d diffMode) textRecord {
s := formatString(v.Index(0).String())
return textRecord{Diff: d, Value: textLine(s)}
},
)
delim = "\n"
// If the text appears to be single-lined text,
// then perform differencing in approximately fixed-sized chunks.
// The output is printed as quoted strings.
case isText:
list = opts.formatDiffSlice(
reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte",
func(v reflect.Value, d diffMode) textRecord {
s := formatString(v.String())
return textRecord{Diff: d, Value: textLine(s)}
},
)
delim = ""
// If the text appears to be binary data,
// then perform differencing in approximately fixed-sized chunks.
// The output is inspired by hexdump.
case isBinary:
list = opts.formatDiffSlice(
reflect.ValueOf(sx), reflect.ValueOf(sy), 16, "byte",
func(v reflect.Value, d diffMode) textRecord {
var ss []string
for i := 0; i < v.Len(); i++ {
ss = append(ss, formatHex(v.Index(i).Uint()))
}
s := strings.Join(ss, ", ")
comment := commentString(fmt.Sprintf("%c|%v|", d, formatASCII(v.String())))
return textRecord{Diff: d, Value: textLine(s), Comment: comment}
},
)
// For all other slices of primitive types,
// then perform differencing in approximately fixed-sized chunks.
// The size of each chunk depends on the width of the element kind.
default:
var chunkSize int
if t.Elem().Kind() == reflect.Bool {
chunkSize = 16
} else {
switch t.Elem().Bits() {
case 8:
chunkSize = 16
case 16:
chunkSize = 12
case 32:
chunkSize = 8
default:
chunkSize = 8
}
}
list = opts.formatDiffSlice(
vx, vy, chunkSize, t.Elem().Kind().String(),
func(v reflect.Value, d diffMode) textRecord {
var ss []string
for i := 0; i < v.Len(); i++ {
switch t.Elem().Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
ss = append(ss, fmt.Sprint(v.Index(i).Int()))
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
ss = append(ss, formatHex(v.Index(i).Uint()))
case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
ss = append(ss, fmt.Sprint(v.Index(i).Interface()))
}
}
s := strings.Join(ss, ", ")
return textRecord{Diff: d, Value: textLine(s)}
},
)
}
// Wrap the output with appropriate type information.
var out textNode = textWrap{"{", list, "}"}
if !isText {
// The "{...}" byte-sequence literal is not valid Go syntax for strings.
// Emit the type for extra clarity (e.g. "string{...}").
if t.Kind() == reflect.String {
opts = opts.WithTypeMode(emitType)
}
return opts.FormatType(t, out)
}
switch t.Kind() {
case reflect.String:
out = textWrap{"strings.Join(", out, fmt.Sprintf(", %q)", delim)}
if t != reflect.TypeOf(string("")) {
out = opts.FormatType(t, out)
}
case reflect.Slice:
out = textWrap{"bytes.Join(", out, fmt.Sprintf(", %q)", delim)}
if t != reflect.TypeOf([]byte(nil)) {
out = opts.FormatType(t, out)
}
}
return out
}
// formatASCII formats s as an ASCII string.
// This is useful for printing binary strings in a semi-legible way.
func formatASCII(s string) string {
b := bytes.Repeat([]byte{'.'}, len(s))
for i := 0; i < len(s); i++ {
if ' ' <= s[i] && s[i] <= '~' {
b[i] = s[i]
}
}
return string(b)
}
func (opts formatOptions) formatDiffSlice(
vx, vy reflect.Value, chunkSize int, name string,
makeRec func(reflect.Value, diffMode) textRecord,
) (list textList) {
es := diff.Difference(vx.Len(), vy.Len(), func(ix int, iy int) diff.Result {
return diff.BoolResult(vx.Index(ix).Interface() == vy.Index(iy).Interface())
})
appendChunks := func(v reflect.Value, d diffMode) int {
n0 := v.Len()
for v.Len() > 0 {
n := chunkSize
if n > v.Len() {
n = v.Len()
}
list = append(list, makeRec(v.Slice(0, n), d))
v = v.Slice(n, v.Len())
}
return n0 - v.Len()
}
groups := coalesceAdjacentEdits(name, es)
groups = coalesceInterveningIdentical(groups, chunkSize/4)
for i, ds := range groups {
// Print equal.
if ds.NumDiff() == 0 {
// Compute the number of leading and trailing equal bytes to print.
var numLo, numHi int
numEqual := ds.NumIgnored + ds.NumIdentical
for numLo < chunkSize*numContextRecords && numLo+numHi < numEqual && i != 0 {
numLo++
}
for numHi < chunkSize*numContextRecords && numLo+numHi < numEqual && i != len(groups)-1 {
numHi++
}
if numEqual-(numLo+numHi) <= chunkSize && ds.NumIgnored == 0 {
numHi = numEqual - numLo // Avoid pointless coalescing of single equal row
}
// Print the equal bytes.
appendChunks(vx.Slice(0, numLo), diffIdentical)
if numEqual > numLo+numHi {
ds.NumIdentical -= numLo + numHi
list.AppendEllipsis(ds)
}
appendChunks(vx.Slice(numEqual-numHi, numEqual), diffIdentical)
vx = vx.Slice(numEqual, vx.Len())
vy = vy.Slice(numEqual, vy.Len())
continue
}
// Print unequal.
nx := appendChunks(vx.Slice(0, ds.NumIdentical+ds.NumRemoved+ds.NumModified), diffRemoved)
vx = vx.Slice(nx, vx.Len())
ny := appendChunks(vy.Slice(0, ds.NumIdentical+ds.NumInserted+ds.NumModified), diffInserted)
vy = vy.Slice(ny, vy.Len())
}
assert(vx.Len() == 0 && vy.Len() == 0)
return list
}
// coalesceAdjacentEdits coalesces the list of edits into groups of adjacent
// equal or unequal counts.
func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) {
var prevCase int // Arbitrary index into which case last occurred
lastStats := func(i int) *diffStats {
if prevCase != i {
groups = append(groups, diffStats{Name: name})
prevCase = i
}
return &groups[len(groups)-1]
}
for _, e := range es {
switch e {
case diff.Identity:
lastStats(1).NumIdentical++
case diff.UniqueX:
lastStats(2).NumRemoved++
case diff.UniqueY:
lastStats(2).NumInserted++
case diff.Modified:
lastStats(2).NumModified++
}
}
return groups
}
// coalesceInterveningIdentical coalesces sufficiently short (<= windowSize)
// equal groups into adjacent unequal groups that currently result in a
// dual inserted/removed printout. This acts as a high-pass filter to smooth
// out high-frequency changes within the windowSize.
func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats {
groups, groupsOrig := groups[:0], groups
for i, ds := range groupsOrig {
if len(groups) >= 2 && ds.NumDiff() > 0 {
prev := &groups[len(groups)-2] // Unequal group
curr := &groups[len(groups)-1] // Equal group
next := &groupsOrig[i] // Unequal group
hadX, hadY := prev.NumRemoved > 0, prev.NumInserted > 0
hasX, hasY := next.NumRemoved > 0, next.NumInserted > 0
if ((hadX || hasX) && (hadY || hasY)) && curr.NumIdentical <= windowSize {
*prev = prev.Append(*curr).Append(*next)
groups = groups[:len(groups)-1] // Truncate off equal group
continue
}
}
groups = append(groups, ds)
}
return groups
}

@ -0,0 +1,387 @@
// Copyright 2019, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package cmp
import (
"bytes"
"fmt"
"math/rand"
"strings"
"time"
"github.com/google/go-cmp/cmp/internal/flags"
)
var randBool = rand.New(rand.NewSource(time.Now().Unix())).Intn(2) == 0
type indentMode int
func (n indentMode) appendIndent(b []byte, d diffMode) []byte {
// The output of Diff is documented as being unstable to provide future
// flexibility in changing the output for more humanly readable reports.
// This logic intentionally introduces instability to the exact output
// so that users can detect accidental reliance on stability early on,
// rather than much later when an actual change to the format occurs.
if flags.Deterministic || randBool {
// Use regular spaces (U+0020).
switch d {
case diffUnknown, diffIdentical:
b = append(b, " "...)
case diffRemoved:
b = append(b, "- "...)
case diffInserted:
b = append(b, "+ "...)
}
} else {
// Use non-breaking spaces (U+00a0).
switch d {
case diffUnknown, diffIdentical:
b = append(b, "  "...)
case diffRemoved:
b = append(b, "- "...)
case diffInserted:
b = append(b, "+ "...)
}
}
return repeatCount(n).appendChar(b, '\t')
}
type repeatCount int
func (n repeatCount) appendChar(b []byte, c byte) []byte {
for ; n > 0; n-- {
b = append(b, c)
}
return b
}
// textNode is a simplified tree-based representation of structured text.
// Possible node types are textWrap, textList, or textLine.
type textNode interface {
// Len reports the length in bytes of a single-line version of the tree.
// Nested textRecord.Diff and textRecord.Comment fields are ignored.
Len() int
// Equal reports whether the two trees are structurally identical.
// Nested textRecord.Diff and textRecord.Comment fields are compared.
Equal(textNode) bool
// String returns the string representation of the text tree.
// It is not guaranteed that len(x.String()) == x.Len(),
// nor that x.String() == y.String() implies that x.Equal(y).
String() string
// formatCompactTo formats the contents of the tree as a single-line string
// to the provided buffer. Any nested textRecord.Diff and textRecord.Comment
// fields are ignored.
//
// However, not all nodes in the tree should be collapsed as a single-line.
// If a node can be collapsed as a single-line, it is replaced by a textLine
// node. Since the top-level node cannot replace itself, this also returns
// the current node itself.
//
// This does not mutate the receiver.
formatCompactTo([]byte, diffMode) ([]byte, textNode)
// formatExpandedTo formats the contents of the tree as a multi-line string
// to the provided buffer. In order for column alignment to operate well,
// formatCompactTo must be called before calling formatExpandedTo.
formatExpandedTo([]byte, diffMode, indentMode) []byte
}
// textWrap is a wrapper that concatenates a prefix and/or a suffix
// to the underlying node.
type textWrap struct {
Prefix string // e.g., "bytes.Buffer{"
Value textNode // textWrap | textList | textLine
Suffix string // e.g., "}"
}
func (s textWrap) Len() int {
return len(s.Prefix) + s.Value.Len() + len(s.Suffix)
}
func (s1 textWrap) Equal(s2 textNode) bool {
if s2, ok := s2.(textWrap); ok {
return s1.Prefix == s2.Prefix && s1.Value.Equal(s2.Value) && s1.Suffix == s2.Suffix
}
return false
}
func (s textWrap) String() string {
var d diffMode
var n indentMode
_, s2 := s.formatCompactTo(nil, d)
b := n.appendIndent(nil, d) // Leading indent
b = s2.formatExpandedTo(b, d, n) // Main body
b = append(b, '\n') // Trailing newline
return string(b)
}
func (s textWrap) formatCompactTo(b []byte, d diffMode) ([]byte, textNode) {
n0 := len(b) // Original buffer length
b = append(b, s.Prefix...)
b, s.Value = s.Value.formatCompactTo(b, d)
b = append(b, s.Suffix...)
if _, ok := s.Value.(textLine); ok {
return b, textLine(b[n0:])
}
return b, s
}
func (s textWrap) formatExpandedTo(b []byte, d diffMode, n indentMode) []byte {
b = append(b, s.Prefix...)
b = s.Value.formatExpandedTo(b, d, n)
b = append(b, s.Suffix...)
return b
}
// textList is a comma-separated list of textWrap or textLine nodes.
// The list may be formatted as multi-lines or single-line at the discretion
// of the textList.formatCompactTo method.
type textList []textRecord
type textRecord struct {
Diff diffMode // e.g., 0 or '-' or '+'
Key string // e.g., "MyField"
Value textNode // textWrap | textLine
Comment fmt.Stringer // e.g., "6 identical fields"
}
// AppendEllipsis appends a new ellipsis node to the list if none already
// exists at the end. If cs is non-zero it coalesces the statistics with the
// previous diffStats.
func (s *textList) AppendEllipsis(ds diffStats) {
hasStats := ds != diffStats{}
if len(*s) == 0 || !(*s)[len(*s)-1].Value.Equal(textEllipsis) {
if hasStats {
*s = append(*s, textRecord{Value: textEllipsis, Comment: ds})
} else {
*s = append(*s, textRecord{Value: textEllipsis})
}
return
}
if hasStats {
(*s)[len(*s)-1].Comment = (*s)[len(*s)-1].Comment.(diffStats).Append(ds)
}
}
func (s textList) Len() (n int) {
for i, r := range s {
n += len(r.Key)
if r.Key != "" {
n += len(": ")
}
n += r.Value.Len()
if i < len(s)-1 {
n += len(", ")
}
}
return n
}
func (s1 textList) Equal(s2 textNode) bool {
if s2, ok := s2.(textList); ok {
if len(s1) != len(s2) {
return false
}
for i := range s1 {
r1, r2 := s1[i], s2[i]
if !(r1.Diff == r2.Diff && r1.Key == r2.Key && r1.Value.Equal(r2.Value) && r1.Comment == r2.Comment) {
return false
}
}
return true
}
return false
}
func (s textList) String() string {
return textWrap{"{", s, "}"}.String()
}
func (s textList) formatCompactTo(b []byte, d diffMode) ([]byte, textNode) {
s = append(textList(nil), s...) // Avoid mutating original
// Determine whether we can collapse this list as a single line.
n0 := len(b) // Original buffer length
var multiLine bool
for i, r := range s {
if r.Diff == diffInserted || r.Diff == diffRemoved {
multiLine = true
}
b = append(b, r.Key...)
if r.Key != "" {
b = append(b, ": "...)
}
b, s[i].Value = r.Value.formatCompactTo(b, d|r.Diff)
if _, ok := s[i].Value.(textLine); !ok {
multiLine = true
}
if r.Comment != nil {
multiLine = true
}
if i < len(s)-1 {
b = append(b, ", "...)
}
}
// Force multi-lined output when printing a removed/inserted node that
// is sufficiently long.
if (d == diffInserted || d == diffRemoved) && len(b[n0:]) > 80 {
multiLine = true
}
if !multiLine {
return b, textLine(b[n0:])
}
return b, s
}
func (s textList) formatExpandedTo(b []byte, d diffMode, n indentMode) []byte {
alignKeyLens := s.alignLens(
func(r textRecord) bool {
_, isLine := r.Value.(textLine)
return r.Key == "" || !isLine
},
func(r textRecord) int { return len(r.Key) },
)
alignValueLens := s.alignLens(
func(r textRecord) bool {
_, isLine := r.Value.(textLine)
return !isLine || r.Value.Equal(textEllipsis) || r.Comment == nil
},
func(r textRecord) int { return len(r.Value.(textLine)) },
)
// Format the list as a multi-lined output.
n++
for i, r := range s {
b = n.appendIndent(append(b, '\n'), d|r.Diff)
if r.Key != "" {
b = append(b, r.Key+": "...)
}
b = alignKeyLens[i].appendChar(b, ' ')
b = r.Value.formatExpandedTo(b, d|r.Diff, n)
if !r.Value.Equal(textEllipsis) {
b = append(b, ',')
}
b = alignValueLens[i].appendChar(b, ' ')
if r.Comment != nil {
b = append(b, " // "+r.Comment.String()...)
}
}
n--
return n.appendIndent(append(b, '\n'), d)
}
func (s textList) alignLens(
skipFunc func(textRecord) bool,
lenFunc func(textRecord) int,
) []repeatCount {
var startIdx, endIdx, maxLen int
lens := make([]repeatCount, len(s))
for i, r := range s {
if skipFunc(r) {
for j := startIdx; j < endIdx && j < len(s); j++ {
lens[j] = repeatCount(maxLen - lenFunc(s[j]))
}
startIdx, endIdx, maxLen = i+1, i+1, 0
} else {
if maxLen < lenFunc(r) {
maxLen = lenFunc(r)
}
endIdx = i + 1
}
}
for j := startIdx; j < endIdx && j < len(s); j++ {
lens[j] = repeatCount(maxLen - lenFunc(s[j]))
}
return lens
}
// textLine is a single-line segment of text and is always a leaf node
// in the textNode tree.
type textLine []byte
var (
textNil = textLine("nil")
textEllipsis = textLine("...")
)
func (s textLine) Len() int {
return len(s)
}
func (s1 textLine) Equal(s2 textNode) bool {
if s2, ok := s2.(textLine); ok {
return bytes.Equal([]byte(s1), []byte(s2))
}
return false
}
func (s textLine) String() string {
return string(s)
}
func (s textLine) formatCompactTo(b []byte, d diffMode) ([]byte, textNode) {
return append(b, s...), s
}
func (s textLine) formatExpandedTo(b []byte, _ diffMode, _ indentMode) []byte {
return append(b, s...)
}
type diffStats struct {
Name string
NumIgnored int
NumIdentical int
NumRemoved int
NumInserted int
NumModified int
}
func (s diffStats) NumDiff() int {
return s.NumRemoved + s.NumInserted + s.NumModified
}
func (s diffStats) Append(ds diffStats) diffStats {
assert(s.Name == ds.Name)
s.NumIgnored += ds.NumIgnored
s.NumIdentical += ds.NumIdentical
s.NumRemoved += ds.NumRemoved
s.NumInserted += ds.NumInserted
s.NumModified += ds.NumModified
return s
}
// String prints a humanly-readable summary of coalesced records.
//
// Example:
// diffStats{Name: "Field", NumIgnored: 5}.String() => "5 ignored fields"
func (s diffStats) String() string {
var ss []string
var sum int
labels := [...]string{"ignored", "identical", "removed", "inserted", "modified"}
counts := [...]int{s.NumIgnored, s.NumIdentical, s.NumRemoved, s.NumInserted, s.NumModified}
for i, n := range counts {
if n > 0 {
ss = append(ss, fmt.Sprintf("%d %v", n, labels[i]))
}
sum += n
}
// Pluralize the name (adjusting for some obscure English grammar rules).
name := s.Name
if sum > 1 {
name += "s"
if strings.HasSuffix(name, "ys") {
name = name[:len(name)-2] + "ies" // e.g., "entrys" => "entries"
}
}
// Format the list according to English grammar (with Oxford comma).
switch n := len(ss); n {
case 0:
return ""
case 1, 2:
return strings.Join(ss, " and ") + " " + name
default:
return strings.Join(ss[:n-1], ", ") + ", and " + ss[n-1] + " " + name
}
}
type commentString string
func (s commentString) String() string { return string(s) }

@ -0,0 +1,121 @@
// Copyright 2019, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package cmp
import "reflect"
// valueNode represents a single node within a report, which is a
// structured representation of the value tree, containing information
// regarding which nodes are equal or not.
type valueNode struct {
parent *valueNode
Type reflect.Type
ValueX reflect.Value
ValueY reflect.Value
// NumSame is the number of leaf nodes that are equal.
// All descendants are equal only if NumDiff is 0.
NumSame int
// NumDiff is the number of leaf nodes that are not equal.
NumDiff int
// NumIgnored is the number of leaf nodes that are ignored.
NumIgnored int
// NumCompared is the number of leaf nodes that were compared
// using an Equal method or Comparer function.
NumCompared int
// NumTransformed is the number of non-leaf nodes that were transformed.
NumTransformed int
// NumChildren is the number of transitive descendants of this node.
// This counts from zero; thus, leaf nodes have no descendants.
NumChildren int
// MaxDepth is the maximum depth of the tree. This counts from zero;
// thus, leaf nodes have a depth of zero.
MaxDepth int
// Records is a list of struct fields, slice elements, or map entries.
Records []reportRecord // If populated, implies Value is not populated
// Value is the result of a transformation, pointer indirect, of
// type assertion.
Value *valueNode // If populated, implies Records is not populated
// TransformerName is the name of the transformer.
TransformerName string // If non-empty, implies Value is populated
}
type reportRecord struct {
Key reflect.Value // Invalid for slice element
Value *valueNode
}
func (parent *valueNode) PushStep(ps PathStep) (child *valueNode) {
vx, vy := ps.Values()
child = &valueNode{parent: parent, Type: ps.Type(), ValueX: vx, ValueY: vy}
switch s := ps.(type) {
case StructField:
assert(parent.Value == nil)
parent.Records = append(parent.Records, reportRecord{Key: reflect.ValueOf(s.Name()), Value: child})
case SliceIndex:
assert(parent.Value == nil)
parent.Records = append(parent.Records, reportRecord{Value: child})
case MapIndex:
assert(parent.Value == nil)
parent.Records = append(parent.Records, reportRecord{Key: s.Key(), Value: child})
case Indirect:
assert(parent.Value == nil && parent.Records == nil)
parent.Value = child
case TypeAssertion:
assert(parent.Value == nil && parent.Records == nil)
parent.Value = child
case Transform:
assert(parent.Value == nil && parent.Records == nil)
parent.Value = child
parent.TransformerName = s.Name()
parent.NumTransformed++
default:
assert(parent == nil) // Must be the root step
}
return child
}
func (r *valueNode) Report(rs Result) {
assert(r.MaxDepth == 0) // May only be called on leaf nodes
if rs.ByIgnore() {
r.NumIgnored++
} else {
if rs.Equal() {
r.NumSame++
} else {
r.NumDiff++
}
}
assert(r.NumSame+r.NumDiff+r.NumIgnored == 1)
if rs.ByMethod() {
r.NumCompared++
}
if rs.ByFunc() {
r.NumCompared++
}
assert(r.NumCompared <= 1)
}
func (child *valueNode) PopStep() (parent *valueNode) {
if child.parent == nil {
return nil
}
parent = child.parent
parent.NumSame += child.NumSame
parent.NumDiff += child.NumDiff
parent.NumIgnored += child.NumIgnored
parent.NumCompared += child.NumCompared
parent.NumTransformed += child.NumTransformed
parent.NumChildren += child.NumChildren + 1
if parent.MaxDepth < child.MaxDepth+1 {
parent.MaxDepth = child.MaxDepth + 1
}
return parent
}

@ -0,0 +1,209 @@
# HCL Custom Static Decoding Extension
This HCL extension provides a mechanism for defining arguments in an HCL-based
language whose values are derived using custom decoding rules against the
HCL expression syntax, overriding the usual behavior of normal expression
evaluation.
"Arguments", for the purpose of this extension, currently includes the
following two contexts:
* For applications using `hcldec` for dynamic decoding, a `hcldec.AttrSpec`
or `hcldec.BlockAttrsSpec` can be given a special type constraint that
opts in to custom decoding behavior for the attribute(s) that are selected
by that specification.
* When working with the HCL native expression syntax, a function given in
the `hcl.EvalContext` during evaluation can have parameters with special
type constraints that opt in to custom decoding behavior for the argument
expression associated with that parameter in any call.
The above use-cases are rather abstract, so we'll consider a motivating
real-world example: sometimes we (language designers) need to allow users
to specify type constraints directly in the language itself, such as in
[Terraform's Input Variables](https://www.terraform.io/docs/configuration/variables.html).
Terraform's `variable` blocks include an argument called `type` which takes
a type constraint given using HCL expression building-blocks as defined by
[the HCL `typeexpr` extension](../typeexpr/README.md).
A "type constraint expression" of that sort is not an expression intended to
be evaluated in the usual way. Instead, the physical expression is
deconstructed using [the static analysis operations](../../spec.md#static-analysis)
to produce a `cty.Type` as the result, rather than a `cty.Value`.
The purpose of this Custom Static Decoding Extension, then, is to provide a
bridge to allow that sort of custom decoding to be used via mechanisms that
normally deal in `cty.Value`, such as `hcldec` and native syntax function
calls as listed above.
(Note: [`gohcl`](https://pkg.go.dev/github.com/hashicorp/hcl/v2/gohcl) has
its own mechanism to support this use case, exploiting the fact that it is
working directly with "normal" Go types. Decoding into a struct field of
type `hcl.Expression` obtains the expression directly without evaluating it
first. The Custom Static Decoding Extension is not necessary for that `gohcl`
technique. You can also implement custom decoding by working directly with
the lowest-level HCL API, which separates extraction of and evaluation of
expressions into two steps.)
## Custom Decoding Types
This extension relies on a convention implemented in terms of
[_Capsule Types_ in the underlying `cty` type system](https://github.com/zclconf/go-cty/blob/master/docs/types.md#capsule-types). `cty` allows a capsule type to carry arbitrary
extension metadata values as an aid to creating higher-level abstractions like
this extension.
A custom argument decoding mode, then, is implemented by creating a new `cty`
capsule type that implements the `ExtensionData` custom operation to return
a decoding function when requested. For example:
```go
var keywordType cty.Type
keywordType = cty.CapsuleWithOps("keyword", reflect.TypeOf(""), &cty.CapsuleOps{
ExtensionData: func(key interface{}) interface{} {
switch key {
case customdecode.CustomExpressionDecoder:
return customdecode.CustomExpressionDecoderFunc(
func(expr hcl.Expression, ctx *hcl.EvalContext) (cty.Value, hcl.Diagnostics) {
var diags hcl.Diagnostics
kw := hcl.ExprAsKeyword(expr)
if kw == "" {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid keyword",
Detail: "A keyword is required",
Subject: expr.Range().Ptr(),
})
return cty.UnkownVal(keywordType), diags
}
return cty.CapsuleVal(keywordType, &kw)
},
)
default:
return nil
}
},
})
```
The boilerplate here is a bit fussy, but the important part for our purposes
is the `case customdecode.CustomExpressionDecoder:` clause, which uses
a custom extension key type defined in this package to recognize when a
component implementing this extension is checking to see if a target type
has a custom decode implementation.
In the above case we've defined a type that decodes expressions as static
keywords, so a keyword like `foo` would decode as an encapsulated `"foo"`
string, while any other sort of expression like `"baz"` or `1 + 1` would
return an error.
We could then use `keywordType` as a type constraint either for a function
parameter or a `hcldec` attribute specification, which would require the
argument for that function parameter or the expression for the matching
attributes to be a static keyword, rather than an arbitrary expression.
For example, in a `hcldec.AttrSpec`:
```go
keywordSpec := &hcldec.AttrSpec{
Name: "keyword",
Type: keywordType,
}
```
The above would accept input like the following and would set its result to
a `cty.Value` of `keywordType`, after decoding:
```hcl
keyword = foo
```
## The Expression and Expression Closure `cty` types
Building on the above, this package also includes two capsule types that use
the above mechanism to allow calling applications to capture expressions
directly and thus defer analysis to a later step, after initial decoding.
The `customdecode.ExpressionType` type encapsulates an `hcl.Expression` alone,
for situations like our type constraint expression example above where it's
the static structure of the expression we want to inspect, and thus any
variables and functions defined in the evaluation context are irrelevant.
The `customdecode.ExpressionClosureType` type encapsulates a
`*customdecode.ExpressionClosure` value, which binds the given expression to
the `hcl.EvalContext` it was asked to evaluate against and thus allows the
receiver of that result to later perform normal evaluation of the expression
with all the same variables and functions that would've been available to it
naturally.
Both of these types can be used as type constraints either for `hcldec`
attribute specifications or for function arguments. Here's an example of
`ExpressionClosureType` to implement a function that can evaluate
an expression with some additional variables defined locally, which we'll
call the `with(...)` function:
```go
var WithFunc = function.New(&function.Spec{
Params: []function.Parameter{
{
Name: "variables",
Type: cty.DynamicPseudoType,
},
{
Name: "expression",
Type: customdecode.ExpressionClosureType,
},
},
Type: func(args []cty.Value) (cty.Type, error) {
varsVal := args[0]
exprVal := args[1]
if !varsVal.Type().IsObjectType() {
return cty.NilVal, function.NewArgErrorf(0, "must be an object defining local variables")
}
if !varsVal.IsKnown() {
// We can't predict our result type until the variables object
// is known.
return cty.DynamicPseudoType, nil
}
vars := varsVal.AsValueMap()
closure := customdecode.ExpressionClosureFromVal(exprVal)
result, err := evalWithLocals(vars, closure)
if err != nil {
return cty.NilVal, err
}
return result.Type(), nil
},
Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) {
varsVal := args[0]
exprVal := args[1]
vars := varsVal.AsValueMap()
closure := customdecode.ExpressionClosureFromVal(exprVal)
return evalWithLocals(vars, closure)
},
})
func evalWithLocals(locals map[string]cty.Value, closure *customdecode.ExpressionClosure) (cty.Value, error) {
childCtx := closure.EvalContext.NewChild()
childCtx.Variables = locals
val, diags := closure.Expression.Value(childCtx)
if diags.HasErrors() {
return cty.NilVal, function.NewArgErrorf(1, "couldn't evaluate expression: %s", diags.Error())
}
return val, nil
}
```
If the above function were placed into an `hcl.EvalContext` as `with`, it
could be used in a native syntax call to that function as follows:
```hcl
foo = with({name = "Cory"}, "${greeting}, ${name}!")
```
The above assumes a variable in the main context called `greeting`, to which
the `with` function adds `name` before evaluating the expression given in
its second argument. This makes that second argument context-sensitive -- it
would behave differently if the user wrote the same thing somewhere else -- so
this capability should be used with care to make sure it doesn't cause confusion
for the end-users of your language.
There are some other examples of this capability to evaluate expressions in
unusual ways in the `tryfunc` directory that is a sibling of this one.

@ -0,0 +1,56 @@
// Package customdecode contains a HCL extension that allows, in certain
// contexts, expression evaluation to be overridden by custom static analysis.
//
// This mechanism is only supported in certain specific contexts where
// expressions are decoded with a specific target type in mind. For more
// information, see the documentation on CustomExpressionDecoder.
package customdecode
import (
"github.com/hashicorp/hcl/v2"
"github.com/zclconf/go-cty/cty"
)
type customDecoderImpl int
// CustomExpressionDecoder is a value intended to be used as a cty capsule
// type ExtensionData key for capsule types whose values are to be obtained
// by static analysis of an expression rather than normal evaluation of that
// expression.
//
// When a cooperating capsule type is asked for ExtensionData with this key,
// it must return a non-nil CustomExpressionDecoderFunc value.
//
// This mechanism is not universally supported; instead, it's handled in a few
// specific places where expressions are evaluated with the intent of producing
// a cty.Value of a type given by the calling application.
//
// Specifically, this currently works for type constraints given in
// hcldec.AttrSpec and hcldec.BlockAttrsSpec, and it works for arguments to
// function calls in the HCL native syntax. HCL extensions implemented outside
// of the main HCL module may also implement this; consult their own
// documentation for details.
const CustomExpressionDecoder = customDecoderImpl(1)
// CustomExpressionDecoderFunc is the type of value that must be returned by
// a capsule type handling the key CustomExpressionDecoder in its ExtensionData
// implementation.
//
// If no error diagnostics are returned, the result value MUST be of the
// capsule type that the decoder function was derived from. If the returned
// error diagnostics prevent producing a value at all, return cty.NilVal.
type CustomExpressionDecoderFunc func(expr hcl.Expression, ctx *hcl.EvalContext) (cty.Value, hcl.Diagnostics)
// CustomExpressionDecoderForType takes any cty type and returns its
// custom expression decoder implementation if it has one. If it is not a
// capsule type or it does not implement a custom expression decoder, this
// function returns nil.
func CustomExpressionDecoderForType(ty cty.Type) CustomExpressionDecoderFunc {
if !ty.IsCapsuleType() {
return nil
}
if fn, ok := ty.CapsuleExtensionData(CustomExpressionDecoder).(CustomExpressionDecoderFunc); ok {
return fn
}
return nil
}

@ -0,0 +1,146 @@
package customdecode
import (
"fmt"
"reflect"
"github.com/hashicorp/hcl/v2"
"github.com/zclconf/go-cty/cty"
)
// ExpressionType is a cty capsule type that carries hcl.Expression values.
//
// This type implements custom decoding in the most general way possible: it
// just captures whatever expression is given to it, with no further processing
// whatsoever. It could therefore be useful in situations where an application
// must defer processing of the expression content until a later step.
//
// ExpressionType only captures the expression, not the evaluation context it
// was destined to be evaluated in. That means this type can be fine for
// situations where the recipient of the value only intends to do static
// analysis, but ExpressionClosureType is more appropriate in situations where
// the recipient will eventually evaluate the given expression.
var ExpressionType cty.Type
// ExpressionVal returns a new cty value of type ExpressionType, wrapping the
// given expression.
func ExpressionVal(expr hcl.Expression) cty.Value {
return cty.CapsuleVal(ExpressionType, &expr)
}
// ExpressionFromVal returns the expression encapsulated in the given value, or
// panics if the value is not a known value of ExpressionType.
func ExpressionFromVal(v cty.Value) hcl.Expression {
if !v.Type().Equals(ExpressionType) {
panic("value is not of ExpressionType")
}
ptr := v.EncapsulatedValue().(*hcl.Expression)
return *ptr
}
// ExpressionClosureType is a cty capsule type that carries hcl.Expression
// values along with their original evaluation contexts.
//
// This is similar to ExpressionType except that during custom decoding it
// also captures the hcl.EvalContext that was provided, allowing callers to
// evaluate the expression later in the same context where it would originally
// have been evaluated, or a context derived from that one.
var ExpressionClosureType cty.Type
// ExpressionClosure is the type encapsulated in ExpressionClosureType
type ExpressionClosure struct {
Expression hcl.Expression
EvalContext *hcl.EvalContext
}
// ExpressionClosureVal returns a new cty value of type ExpressionClosureType,
// wrapping the given expression closure.
func ExpressionClosureVal(closure *ExpressionClosure) cty.Value {
return cty.CapsuleVal(ExpressionClosureType, closure)
}
// Value evaluates the closure's expression using the closure's EvalContext,
// returning the result.
func (c *ExpressionClosure) Value() (cty.Value, hcl.Diagnostics) {
return c.Expression.Value(c.EvalContext)
}
// ExpressionClosureFromVal returns the expression closure encapsulated in the
// given value, or panics if the value is not a known value of
// ExpressionClosureType.
//
// The caller MUST NOT modify the returned closure or the EvalContext inside
// it. To derive a new EvalContext, either create a child context or make
// a copy.
func ExpressionClosureFromVal(v cty.Value) *ExpressionClosure {
if !v.Type().Equals(ExpressionClosureType) {
panic("value is not of ExpressionClosureType")
}
return v.EncapsulatedValue().(*ExpressionClosure)
}
func init() {
// Getting hold of a reflect.Type for hcl.Expression is a bit tricky because
// it's an interface type, but we can do it with some indirection.
goExpressionType := reflect.TypeOf((*hcl.Expression)(nil)).Elem()
ExpressionType = cty.CapsuleWithOps("expression", goExpressionType, &cty.CapsuleOps{
ExtensionData: func(key interface{}) interface{} {
switch key {
case CustomExpressionDecoder:
return CustomExpressionDecoderFunc(
func(expr hcl.Expression, ctx *hcl.EvalContext) (cty.Value, hcl.Diagnostics) {
return ExpressionVal(expr), nil
},
)
default:
return nil
}
},
TypeGoString: func(_ reflect.Type) string {
return "customdecode.ExpressionType"
},
GoString: func(raw interface{}) string {
exprPtr := raw.(*hcl.Expression)
return fmt.Sprintf("customdecode.ExpressionVal(%#v)", *exprPtr)
},
RawEquals: func(a, b interface{}) bool {
aPtr := a.(*hcl.Expression)
bPtr := b.(*hcl.Expression)
return reflect.DeepEqual(*aPtr, *bPtr)
},
})
ExpressionClosureType = cty.CapsuleWithOps("expression closure", reflect.TypeOf(ExpressionClosure{}), &cty.CapsuleOps{
ExtensionData: func(key interface{}) interface{} {
switch key {
case CustomExpressionDecoder:
return CustomExpressionDecoderFunc(
func(expr hcl.Expression, ctx *hcl.EvalContext) (cty.Value, hcl.Diagnostics) {
return ExpressionClosureVal(&ExpressionClosure{
Expression: expr,
EvalContext: ctx,
}), nil
},
)
default:
return nil
}
},
TypeGoString: func(_ reflect.Type) string {
return "customdecode.ExpressionClosureType"
},
GoString: func(raw interface{}) string {
closure := raw.(*ExpressionClosure)
return fmt.Sprintf("customdecode.ExpressionClosureVal(%#v)", closure)
},
RawEquals: func(a, b interface{}) bool {
closureA := a.(*ExpressionClosure)
closureB := b.(*ExpressionClosure)
// The expression itself compares by deep equality, but EvalContexts
// conventionally compare by pointer identity, so we'll comply
// with both conventions here by testing them separately.
return closureA.EvalContext == closureB.EvalContext &&
reflect.DeepEqual(closureA.Expression, closureB.Expression)
},
})
}

@ -0,0 +1,322 @@
package gohcl
import (
"fmt"
"reflect"
"github.com/zclconf/go-cty/cty"
"github.com/hashicorp/hcl/v2"
"github.com/zclconf/go-cty/cty/convert"
"github.com/zclconf/go-cty/cty/gocty"
)
// DecodeBody extracts the configuration within the given body into the given
// value. This value must be a non-nil pointer to either a struct or
// a map, where in the former case the configuration will be decoded using
// struct tags and in the latter case only attributes are allowed and their
// values are decoded into the map.
//
// The given EvalContext is used to resolve any variables or functions in
// expressions encountered while decoding. This may be nil to require only
// constant values, for simple applications that do not support variables or
// functions.
//
// The returned diagnostics should be inspected with its HasErrors method to
// determine if the populated value is valid and complete. If error diagnostics
// are returned then the given value may have been partially-populated but
// may still be accessed by a careful caller for static analysis and editor
// integration use-cases.
func DecodeBody(body hcl.Body, ctx *hcl.EvalContext, val interface{}) hcl.Diagnostics {
rv := reflect.ValueOf(val)
if rv.Kind() != reflect.Ptr {
panic(fmt.Sprintf("target value must be a pointer, not %s", rv.Type().String()))
}
return decodeBodyToValue(body, ctx, rv.Elem())
}
func decodeBodyToValue(body hcl.Body, ctx *hcl.EvalContext, val reflect.Value) hcl.Diagnostics {
et := val.Type()
switch et.Kind() {
case reflect.Struct:
return decodeBodyToStruct(body, ctx, val)
case reflect.Map:
return decodeBodyToMap(body, ctx, val)
default:
panic(fmt.Sprintf("target value must be pointer to struct or map, not %s", et.String()))
}
}
func decodeBodyToStruct(body hcl.Body, ctx *hcl.EvalContext, val reflect.Value) hcl.Diagnostics {
schema, partial := ImpliedBodySchema(val.Interface())
var content *hcl.BodyContent
var leftovers hcl.Body
var diags hcl.Diagnostics
if partial {
content, leftovers, diags = body.PartialContent(schema)
} else {
content, diags = body.Content(schema)
}
if content == nil {
return diags
}
tags := getFieldTags(val.Type())
if tags.Remain != nil {
fieldIdx := *tags.Remain
field := val.Type().Field(fieldIdx)
fieldV := val.Field(fieldIdx)
switch {
case bodyType.AssignableTo(field.Type):
fieldV.Set(reflect.ValueOf(leftovers))
case attrsType.AssignableTo(field.Type):
attrs, attrsDiags := leftovers.JustAttributes()
if len(attrsDiags) > 0 {
diags = append(diags, attrsDiags...)
}
fieldV.Set(reflect.ValueOf(attrs))
default:
diags = append(diags, decodeBodyToValue(leftovers, ctx, fieldV)...)
}
}
for name, fieldIdx := range tags.Attributes {
attr := content.Attributes[name]
field := val.Type().Field(fieldIdx)
fieldV := val.Field(fieldIdx)
if attr == nil {
if !exprType.AssignableTo(field.Type) {
continue
}
// As a special case, if the target is of type hcl.Expression then
// we'll assign an actual expression that evalues to a cty null,
// so the caller can deal with it within the cty realm rather
// than within the Go realm.
synthExpr := hcl.StaticExpr(cty.NullVal(cty.DynamicPseudoType), body.MissingItemRange())
fieldV.Set(reflect.ValueOf(synthExpr))
continue
}
switch {
case attrType.AssignableTo(field.Type):
fieldV.Set(reflect.ValueOf(attr))
case exprType.AssignableTo(field.Type):
fieldV.Set(reflect.ValueOf(attr.Expr))
default:
diags = append(diags, DecodeExpression(
attr.Expr, ctx, fieldV.Addr().Interface(),
)...)
}
}
blocksByType := content.Blocks.ByType()
for typeName, fieldIdx := range tags.Blocks {
blocks := blocksByType[typeName]
field := val.Type().Field(fieldIdx)
ty := field.Type
isSlice := false
isPtr := false
if ty.Kind() == reflect.Slice {
isSlice = true
ty = ty.Elem()
}
if ty.Kind() == reflect.Ptr {
isPtr = true
ty = ty.Elem()
}
if len(blocks) > 1 && !isSlice {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: fmt.Sprintf("Duplicate %s block", typeName),
Detail: fmt.Sprintf(
"Only one %s block is allowed. Another was defined at %s.",
typeName, blocks[0].DefRange.String(),
),
Subject: &blocks[1].DefRange,
})
continue
}
if len(blocks) == 0 {
if isSlice || isPtr {
if val.Field(fieldIdx).IsNil() {
val.Field(fieldIdx).Set(reflect.Zero(field.Type))
}
} else {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: fmt.Sprintf("Missing %s block", typeName),
Detail: fmt.Sprintf("A %s block is required.", typeName),
Subject: body.MissingItemRange().Ptr(),
})
}
continue
}
switch {
case isSlice:
elemType := ty
if isPtr {
elemType = reflect.PtrTo(ty)
}
sli := val.Field(fieldIdx)
if sli.IsNil() {
sli = reflect.MakeSlice(reflect.SliceOf(elemType), len(blocks), len(blocks))
}
for i, block := range blocks {
if isPtr {
if i >= sli.Len() {
sli = reflect.Append(sli, reflect.New(ty))
}
v := sli.Index(i)
if v.IsNil() {
v = reflect.New(ty)
}
diags = append(diags, decodeBlockToValue(block, ctx, v.Elem())...)
sli.Index(i).Set(v)
} else {
diags = append(diags, decodeBlockToValue(block, ctx, sli.Index(i))...)
}
}
if sli.Len() > len(blocks) {
sli.SetLen(len(blocks))
}
val.Field(fieldIdx).Set(sli)
default:
block := blocks[0]
if isPtr {
v := val.Field(fieldIdx)
if v.IsNil() {
v = reflect.New(ty)
}
diags = append(diags, decodeBlockToValue(block, ctx, v.Elem())...)
val.Field(fieldIdx).Set(v)
} else {
diags = append(diags, decodeBlockToValue(block, ctx, val.Field(fieldIdx))...)
}
}
}
return diags
}
func decodeBodyToMap(body hcl.Body, ctx *hcl.EvalContext, v reflect.Value) hcl.Diagnostics {
attrs, diags := body.JustAttributes()
if attrs == nil {
return diags
}
mv := reflect.MakeMap(v.Type())
for k, attr := range attrs {
switch {
case attrType.AssignableTo(v.Type().Elem()):
mv.SetMapIndex(reflect.ValueOf(k), reflect.ValueOf(attr))
case exprType.AssignableTo(v.Type().Elem()):
mv.SetMapIndex(reflect.ValueOf(k), reflect.ValueOf(attr.Expr))
default:
ev := reflect.New(v.Type().Elem())
diags = append(diags, DecodeExpression(attr.Expr, ctx, ev.Interface())...)
mv.SetMapIndex(reflect.ValueOf(k), ev.Elem())
}
}
v.Set(mv)
return diags
}
func decodeBlockToValue(block *hcl.Block, ctx *hcl.EvalContext, v reflect.Value) hcl.Diagnostics {
var diags hcl.Diagnostics
ty := v.Type()
switch {
case blockType.AssignableTo(ty):
v.Elem().Set(reflect.ValueOf(block))
case bodyType.AssignableTo(ty):
v.Elem().Set(reflect.ValueOf(block.Body))
case attrsType.AssignableTo(ty):
attrs, attrsDiags := block.Body.JustAttributes()
if len(attrsDiags) > 0 {
diags = append(diags, attrsDiags...)
}
v.Elem().Set(reflect.ValueOf(attrs))
default:
diags = append(diags, decodeBodyToValue(block.Body, ctx, v)...)
if len(block.Labels) > 0 {
blockTags := getFieldTags(ty)
for li, lv := range block.Labels {
lfieldIdx := blockTags.Labels[li].FieldIndex
v.Field(lfieldIdx).Set(reflect.ValueOf(lv))
}
}
}
return diags
}
// DecodeExpression extracts the value of the given expression into the given
// value. This value must be something that gocty is able to decode into,
// since the final decoding is delegated to that package.
//
// The given EvalContext is used to resolve any variables or functions in
// expressions encountered while decoding. This may be nil to require only
// constant values, for simple applications that do not support variables or
// functions.
//
// The returned diagnostics should be inspected with its HasErrors method to
// determine if the populated value is valid and complete. If error diagnostics
// are returned then the given value may have been partially-populated but
// may still be accessed by a careful caller for static analysis and editor
// integration use-cases.
func DecodeExpression(expr hcl.Expression, ctx *hcl.EvalContext, val interface{}) hcl.Diagnostics {
srcVal, diags := expr.Value(ctx)
convTy, err := gocty.ImpliedType(val)
if err != nil {
panic(fmt.Sprintf("unsuitable DecodeExpression target: %s", err))
}
srcVal, err = convert.Convert(srcVal, convTy)
if err != nil {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Unsuitable value type",
Detail: fmt.Sprintf("Unsuitable value: %s", err.Error()),
Subject: expr.StartRange().Ptr(),
Context: expr.Range().Ptr(),
})
return diags
}
err = gocty.FromCtyValue(srcVal, val)
if err != nil {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Unsuitable value type",
Detail: fmt.Sprintf("Unsuitable value: %s", err.Error()),
Subject: expr.StartRange().Ptr(),
Context: expr.Range().Ptr(),
})
}
return diags
}

@ -0,0 +1,57 @@
// Package gohcl allows decoding HCL configurations into Go data structures.
//
// It provides a convenient and concise way of describing the schema for
// configuration and then accessing the resulting data via native Go
// types.
//
// A struct field tag scheme is used, similar to other decoding and
// unmarshalling libraries. The tags are formatted as in the following example:
//
// ThingType string `hcl:"thing_type,attr"`
//
// Within each tag there are two comma-separated tokens. The first is the
// name of the corresponding construct in configuration, while the second
// is a keyword giving the kind of construct expected. The following
// kind keywords are supported:
//
// attr (the default) indicates that the value is to be populated from an attribute
// block indicates that the value is to populated from a block
// label indicates that the value is to populated from a block label
// optional is the same as attr, but the field is optional
// remain indicates that the value is to be populated from the remaining body after populating other fields
//
// "attr" fields may either be of type *hcl.Expression, in which case the raw
// expression is assigned, or of any type accepted by gocty, in which case
// gocty will be used to assign the value to a native Go type.
//
// "block" fields may be of type *hcl.Block or hcl.Body, in which case the
// corresponding raw value is assigned, or may be a struct that recursively
// uses the same tags. Block fields may also be slices of any of these types,
// in which case multiple blocks of the corresponding type are decoded into
// the slice.
//
// "label" fields are considered only in a struct used as the type of a field
// marked as "block", and are used sequentially to capture the labels of
// the blocks being decoded. In this case, the name token is used only as
// an identifier for the label in diagnostic messages.
//
// "optional" fields behave like "attr" fields, but they are optional
// and will not give parsing errors if they are missing.
//
// "remain" can be placed on a single field that may be either of type
// hcl.Body or hcl.Attributes, in which case any remaining body content is
// placed into this field for delayed processing. If no "remain" field is
// present then any attributes or blocks not matched by another valid tag
// will cause an error diagnostic.
//
// Only a subset of this tagging/typing vocabulary is supported for the
// "Encode" family of functions. See the EncodeIntoBody docs for full details
// on the constraints there.
//
// Broadly-speaking this package deals with two types of error. The first is
// errors in the configuration itself, which are returned as diagnostics
// written with the configuration author as the target audience. The second
// is bugs in the calling program, such as invalid struct tags, which are
// surfaced via panics since there can be no useful runtime handling of such
// errors and they should certainly not be returned to the user as diagnostics.
package gohcl

@ -0,0 +1,191 @@
package gohcl
import (
"fmt"
"reflect"
"sort"
"github.com/hashicorp/hcl/v2/hclwrite"
"github.com/zclconf/go-cty/cty/gocty"
)
// EncodeIntoBody replaces the contents of the given hclwrite Body with
// attributes and blocks derived from the given value, which must be a
// struct value or a pointer to a struct value with the struct tags defined
// in this package.
//
// This function can work only with fully-decoded data. It will ignore any
// fields tagged as "remain", any fields that decode attributes into either
// hcl.Attribute or hcl.Expression values, and any fields that decode blocks
// into hcl.Attributes values. This function does not have enough information
// to complete the decoding of these types.
//
// Any fields tagged as "label" are ignored by this function. Use EncodeAsBlock
// to produce a whole hclwrite.Block including block labels.
//
// As long as a suitable value is given to encode and the destination body
// is non-nil, this function will always complete. It will panic in case of
// any errors in the calling program, such as passing an inappropriate type
// or a nil body.
//
// The layout of the resulting HCL source is derived from the ordering of
// the struct fields, with blank lines around nested blocks of different types.
// Fields representing attributes should usually precede those representing
// blocks so that the attributes can group togather in the result. For more
// control, use the hclwrite API directly.
func EncodeIntoBody(val interface{}, dst *hclwrite.Body) {
rv := reflect.ValueOf(val)
ty := rv.Type()
if ty.Kind() == reflect.Ptr {
rv = rv.Elem()
ty = rv.Type()
}
if ty.Kind() != reflect.Struct {
panic(fmt.Sprintf("value is %s, not struct", ty.Kind()))
}
tags := getFieldTags(ty)
populateBody(rv, ty, tags, dst)
}
// EncodeAsBlock creates a new hclwrite.Block populated with the data from
// the given value, which must be a struct or pointer to struct with the
// struct tags defined in this package.
//
// If the given struct type has fields tagged with "label" tags then they
// will be used in order to annotate the created block with labels.
//
// This function has the same constraints as EncodeIntoBody and will panic
// if they are violated.
func EncodeAsBlock(val interface{}, blockType string) *hclwrite.Block {
rv := reflect.ValueOf(val)
ty := rv.Type()
if ty.Kind() == reflect.Ptr {
rv = rv.Elem()
ty = rv.Type()
}
if ty.Kind() != reflect.Struct {
panic(fmt.Sprintf("value is %s, not struct", ty.Kind()))
}
tags := getFieldTags(ty)
labels := make([]string, len(tags.Labels))
for i, lf := range tags.Labels {
lv := rv.Field(lf.FieldIndex)
// We just stringify whatever we find. It should always be a string
// but if not then we'll still do something reasonable.
labels[i] = fmt.Sprintf("%s", lv.Interface())
}
block := hclwrite.NewBlock(blockType, labels)
populateBody(rv, ty, tags, block.Body())
return block
}
func populateBody(rv reflect.Value, ty reflect.Type, tags *fieldTags, dst *hclwrite.Body) {
nameIdxs := make(map[string]int, len(tags.Attributes)+len(tags.Blocks))
namesOrder := make([]string, 0, len(tags.Attributes)+len(tags.Blocks))
for n, i := range tags.Attributes {
nameIdxs[n] = i
namesOrder = append(namesOrder, n)
}
for n, i := range tags.Blocks {
nameIdxs[n] = i
namesOrder = append(namesOrder, n)
}
sort.SliceStable(namesOrder, func(i, j int) bool {
ni, nj := namesOrder[i], namesOrder[j]
return nameIdxs[ni] < nameIdxs[nj]
})
dst.Clear()
prevWasBlock := false
for _, name := range namesOrder {
fieldIdx := nameIdxs[name]
field := ty.Field(fieldIdx)
fieldTy := field.Type
fieldVal := rv.Field(fieldIdx)
if fieldTy.Kind() == reflect.Ptr {
fieldTy = fieldTy.Elem()
fieldVal = fieldVal.Elem()
}
if _, isAttr := tags.Attributes[name]; isAttr {
if exprType.AssignableTo(fieldTy) || attrType.AssignableTo(fieldTy) {
continue // ignore undecoded fields
}
if !fieldVal.IsValid() {
continue // ignore (field value is nil pointer)
}
if fieldTy.Kind() == reflect.Ptr && fieldVal.IsNil() {
continue // ignore
}
if prevWasBlock {
dst.AppendNewline()
prevWasBlock = false
}
valTy, err := gocty.ImpliedType(fieldVal.Interface())
if err != nil {
panic(fmt.Sprintf("cannot encode %T as HCL expression: %s", fieldVal.Interface(), err))
}
val, err := gocty.ToCtyValue(fieldVal.Interface(), valTy)
if err != nil {
// This should never happen, since we should always be able
// to decode into the implied type.
panic(fmt.Sprintf("failed to encode %T as %#v: %s", fieldVal.Interface(), valTy, err))
}
dst.SetAttributeValue(name, val)
} else { // must be a block, then
elemTy := fieldTy
isSeq := false
if elemTy.Kind() == reflect.Slice || elemTy.Kind() == reflect.Array {
isSeq = true
elemTy = elemTy.Elem()
}
if bodyType.AssignableTo(elemTy) || attrsType.AssignableTo(elemTy) {
continue // ignore undecoded fields
}
prevWasBlock = false
if isSeq {
l := fieldVal.Len()
for i := 0; i < l; i++ {
elemVal := fieldVal.Index(i)
if !elemVal.IsValid() {
continue // ignore (elem value is nil pointer)
}
if elemTy.Kind() == reflect.Ptr && elemVal.IsNil() {
continue // ignore
}
block := EncodeAsBlock(elemVal.Interface(), name)
if !prevWasBlock {
dst.AppendNewline()
prevWasBlock = true
}
dst.AppendBlock(block)
}
} else {
if !fieldVal.IsValid() {
continue // ignore (field value is nil pointer)
}
if elemTy.Kind() == reflect.Ptr && fieldVal.IsNil() {
continue // ignore
}
block := EncodeAsBlock(fieldVal.Interface(), name)
if !prevWasBlock {
dst.AppendNewline()
prevWasBlock = true
}
dst.AppendBlock(block)
}
}
}
}

@ -0,0 +1,174 @@
package gohcl
import (
"fmt"
"reflect"
"sort"
"strings"
"github.com/hashicorp/hcl/v2"
)
// ImpliedBodySchema produces a hcl.BodySchema derived from the type of the
// given value, which must be a struct value or a pointer to one. If an
// inappropriate value is passed, this function will panic.
//
// The second return argument indicates whether the given struct includes
// a "remain" field, and thus the returned schema is non-exhaustive.
//
// This uses the tags on the fields of the struct to discover how each
// field's value should be expressed within configuration. If an invalid
// mapping is attempted, this function will panic.
func ImpliedBodySchema(val interface{}) (schema *hcl.BodySchema, partial bool) {
ty := reflect.TypeOf(val)
if ty.Kind() == reflect.Ptr {
ty = ty.Elem()
}
if ty.Kind() != reflect.Struct {
panic(fmt.Sprintf("given value must be struct, not %T", val))
}
var attrSchemas []hcl.AttributeSchema
var blockSchemas []hcl.BlockHeaderSchema
tags := getFieldTags(ty)
attrNames := make([]string, 0, len(tags.Attributes))
for n := range tags.Attributes {
attrNames = append(attrNames, n)
}
sort.Strings(attrNames)
for _, n := range attrNames {
idx := tags.Attributes[n]
optional := tags.Optional[n]
field := ty.Field(idx)
var required bool
switch {
case field.Type.AssignableTo(exprType):
// If we're decoding to hcl.Expression then absense can be
// indicated via a null value, so we don't specify that
// the field is required during decoding.
required = false
case field.Type.Kind() != reflect.Ptr && !optional:
required = true
default:
required = false
}
attrSchemas = append(attrSchemas, hcl.AttributeSchema{
Name: n,
Required: required,
})
}
blockNames := make([]string, 0, len(tags.Blocks))
for n := range tags.Blocks {
blockNames = append(blockNames, n)
}
sort.Strings(blockNames)
for _, n := range blockNames {
idx := tags.Blocks[n]
field := ty.Field(idx)
fty := field.Type
if fty.Kind() == reflect.Slice {
fty = fty.Elem()
}
if fty.Kind() == reflect.Ptr {
fty = fty.Elem()
}
if fty.Kind() != reflect.Struct {
panic(fmt.Sprintf(
"hcl 'block' tag kind cannot be applied to %s field %s: struct required", field.Type.String(), field.Name,
))
}
ftags := getFieldTags(fty)
var labelNames []string
if len(ftags.Labels) > 0 {
labelNames = make([]string, len(ftags.Labels))
for i, l := range ftags.Labels {
labelNames[i] = l.Name
}
}
blockSchemas = append(blockSchemas, hcl.BlockHeaderSchema{
Type: n,
LabelNames: labelNames,
})
}
partial = tags.Remain != nil
schema = &hcl.BodySchema{
Attributes: attrSchemas,
Blocks: blockSchemas,
}
return schema, partial
}
type fieldTags struct {
Attributes map[string]int
Blocks map[string]int
Labels []labelField
Remain *int
Optional map[string]bool
}
type labelField struct {
FieldIndex int
Name string
}
func getFieldTags(ty reflect.Type) *fieldTags {
ret := &fieldTags{
Attributes: map[string]int{},
Blocks: map[string]int{},
Optional: map[string]bool{},
}
ct := ty.NumField()
for i := 0; i < ct; i++ {
field := ty.Field(i)
tag := field.Tag.Get("hcl")
if tag == "" {
continue
}
comma := strings.Index(tag, ",")
var name, kind string
if comma != -1 {
name = tag[:comma]
kind = tag[comma+1:]
} else {
name = tag
kind = "attr"
}
switch kind {
case "attr":
ret.Attributes[name] = i
case "block":
ret.Blocks[name] = i
case "label":
ret.Labels = append(ret.Labels, labelField{
FieldIndex: i,
Name: name,
})
case "remain":
if ret.Remain != nil {
panic("only one 'remain' tag is permitted")
}
idx := i // copy, because this loop will continue assigning to i
ret.Remain = &idx
case "optional":
ret.Attributes[name] = i
ret.Optional[name] = true
default:
panic(fmt.Sprintf("invalid hcl field tag kind %q on %s %q", kind, field.Type.String(), field.Name))
}
}
return ret
}

@ -0,0 +1,16 @@
package gohcl
import (
"reflect"
"github.com/hashicorp/hcl/v2"
)
var victimExpr hcl.Expression
var victimBody hcl.Body
var exprType = reflect.TypeOf(&victimExpr).Elem()
var bodyType = reflect.TypeOf(&victimBody).Elem()
var blockType = reflect.TypeOf((*hcl.Block)(nil))
var attrType = reflect.TypeOf((*hcl.Attribute)(nil))
var attrsType = reflect.TypeOf(hcl.Attributes(nil))

@ -0,0 +1,108 @@
// Package hclsimple is a higher-level entry point for loading HCL
// configuration files directly into Go struct values in a single step.
//
// This package is more opinionated than the rest of the HCL API. See the
// documentation for function Decode for more information.
package hclsimple
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
"github.com/hashicorp/hcl/v2"
"github.com/hashicorp/hcl/v2/gohcl"
"github.com/hashicorp/hcl/v2/hclsyntax"
"github.com/hashicorp/hcl/v2/json"
)
// Decode parses, decodes, and evaluates expressions in the given HCL source
// code, in a single step.
//
// The main HCL API is built to allow applications that need to decompose
// the processing steps into a pipeline, with different tasks done by
// different parts of the program: parsing the source code into an abstract
// representation, analysing the block structure, evaluating expressions,
// and then extracting the results into a form consumable by the rest of
// the program.
//
// This function does all of those steps in one call, going directly from
// source code to a populated Go struct value.
//
// The "filename" and "src" arguments describe the input configuration. The
// filename is used to add source location context to any returned error
// messages and its suffix will choose one of the two supported syntaxes:
// ".hcl" for native syntax, and ".json" for HCL JSON. The src must therefore
// contain a sequence of bytes that is valid for the selected syntax.
//
// The "ctx" argument provides variables and functions for use during
// expression evaluation. Applications that need no variables nor functions
// can just pass nil.
//
// The "target" argument must be a pointer to a value of a struct type,
// with struct tags as defined by the sibling package "gohcl".
//
// The return type is error but any non-nil error is guaranteed to be
// type-assertable to hcl.Diagnostics for applications that wish to access
// the full error details.
//
// This is a very opinionated function that is intended to serve the needs of
// applications that are just using HCL for simple configuration and don't
// need detailed control over the decoding process. Because this function is
// just wrapping functionality elsewhere, if it doesn't meet your needs then
// please consider copying it into your program and adapting it as needed.
func Decode(filename string, src []byte, ctx *hcl.EvalContext, target interface{}) error {
var file *hcl.File
var diags hcl.Diagnostics
switch suffix := strings.ToLower(filepath.Ext(filename)); suffix {
case ".hcl":
file, diags = hclsyntax.ParseConfig(src, filename, hcl.Pos{Line: 1, Column: 1})
case ".json":
file, diags = json.Parse(src, filename)
default:
diags = diags.Append(&hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Unsupported file format",
Detail: fmt.Sprintf("Cannot read from %s: unrecognized file format suffix %q.", filename, suffix),
})
return diags
}
if diags.HasErrors() {
return diags
}
diags = gohcl.DecodeBody(file.Body, ctx, target)
if diags.HasErrors() {
return diags
}
return nil
}
// DecodeFile is a wrapper around Decode that first reads the given filename
// from disk. See the Decode documentation for more information.
func DecodeFile(filename string, ctx *hcl.EvalContext, target interface{}) error {
src, err := ioutil.ReadFile(filename)
if err != nil {
if os.IsNotExist(err) {
return hcl.Diagnostics{
{
Severity: hcl.DiagError,
Summary: "Configuration file not found",
Detail: fmt.Sprintf("The configuration file %s does not exist.", filename),
},
}
}
return hcl.Diagnostics{
{
Severity: hcl.DiagError,
Summary: "Failed to read configuration",
Detail: fmt.Sprintf("Can't read %s: %s.", filename, err),
},
}
}
return Decode(filename, src, ctx, target)
}

@ -0,0 +1,23 @@
package hclsyntax
import (
"github.com/hashicorp/hcl/v2"
)
// setDiagEvalContext is an internal helper that will impose a particular
// EvalContext on a set of diagnostics in-place, for any diagnostic that
// does not already have an EvalContext set.
//
// We generally expect diagnostics to be immutable, but this is safe to use
// on any Diagnostics where none of the contained Diagnostic objects have yet
// been seen by a caller. Its purpose is to apply additional context to a
// set of diagnostics produced by a "deeper" component as the stack unwinds
// during expression evaluation.
func setDiagEvalContext(diags hcl.Diagnostics, expr hcl.Expression, ctx *hcl.EvalContext) {
for _, diag := range diags {
if diag.Expression == nil {
diag.Expression = expr
diag.EvalContext = ctx
}
}
}

@ -0,0 +1,24 @@
package hclsyntax
import (
"github.com/agext/levenshtein"
)
// nameSuggestion tries to find a name from the given slice of suggested names
// that is close to the given name and returns it if found. If no suggestion
// is close enough, returns the empty string.
//
// The suggestions are tried in order, so earlier suggestions take precedence
// if the given string is similar to two or more suggestions.
//
// This function is intended to be used with a relatively-small number of
// suggestions. It's not optimized for hundreds or thousands of them.
func nameSuggestion(given string, suggestions []string) string {
for _, suggestion := range suggestions {
dist := levenshtein.Distance(given, suggestion, nil)
if dist < 3 { // threshold determined experimentally
return suggestion
}
}
return ""
}

@ -0,0 +1,7 @@
// Package hclsyntax contains the parser, AST, etc for HCL's native language,
// as opposed to the JSON variant.
//
// In normal use applications should rarely depend on this package directly,
// instead preferring the higher-level interface of the main hcl package and
// its companion package hclparse.
package hclsyntax

File diff suppressed because it is too large Load Diff

@ -0,0 +1,268 @@
package hclsyntax
import (
"fmt"
"github.com/hashicorp/hcl/v2"
"github.com/zclconf/go-cty/cty"
"github.com/zclconf/go-cty/cty/convert"
"github.com/zclconf/go-cty/cty/function"
"github.com/zclconf/go-cty/cty/function/stdlib"
)
type Operation struct {
Impl function.Function
Type cty.Type
}
var (
OpLogicalOr = &Operation{
Impl: stdlib.OrFunc,
Type: cty.Bool,
}
OpLogicalAnd = &Operation{
Impl: stdlib.AndFunc,
Type: cty.Bool,
}
OpLogicalNot = &Operation{
Impl: stdlib.NotFunc,
Type: cty.Bool,
}
OpEqual = &Operation{
Impl: stdlib.EqualFunc,
Type: cty.Bool,
}
OpNotEqual = &Operation{
Impl: stdlib.NotEqualFunc,
Type: cty.Bool,
}
OpGreaterThan = &Operation{
Impl: stdlib.GreaterThanFunc,
Type: cty.Bool,
}
OpGreaterThanOrEqual = &Operation{
Impl: stdlib.GreaterThanOrEqualToFunc,
Type: cty.Bool,
}
OpLessThan = &Operation{
Impl: stdlib.LessThanFunc,
Type: cty.Bool,
}
OpLessThanOrEqual = &Operation{
Impl: stdlib.LessThanOrEqualToFunc,
Type: cty.Bool,
}
OpAdd = &Operation{
Impl: stdlib.AddFunc,
Type: cty.Number,
}
OpSubtract = &Operation{
Impl: stdlib.SubtractFunc,
Type: cty.Number,
}
OpMultiply = &Operation{
Impl: stdlib.MultiplyFunc,
Type: cty.Number,
}
OpDivide = &Operation{
Impl: stdlib.DivideFunc,
Type: cty.Number,
}
OpModulo = &Operation{
Impl: stdlib.ModuloFunc,
Type: cty.Number,
}
OpNegate = &Operation{
Impl: stdlib.NegateFunc,
Type: cty.Number,
}
)
var binaryOps []map[TokenType]*Operation
func init() {
// This operation table maps from the operator's token type
// to the AST operation type. All expressions produced from
// binary operators are BinaryOp nodes.
//
// Binary operator groups are listed in order of precedence, with
// the *lowest* precedence first. Operators within the same group
// have left-to-right associativity.
binaryOps = []map[TokenType]*Operation{
{
TokenOr: OpLogicalOr,
},
{
TokenAnd: OpLogicalAnd,
},
{
TokenEqualOp: OpEqual,
TokenNotEqual: OpNotEqual,
},
{
TokenGreaterThan: OpGreaterThan,
TokenGreaterThanEq: OpGreaterThanOrEqual,
TokenLessThan: OpLessThan,
TokenLessThanEq: OpLessThanOrEqual,
},
{
TokenPlus: OpAdd,
TokenMinus: OpSubtract,
},
{
TokenStar: OpMultiply,
TokenSlash: OpDivide,
TokenPercent: OpModulo,
},
}
}
type BinaryOpExpr struct {
LHS Expression
Op *Operation
RHS Expression
SrcRange hcl.Range
}
func (e *BinaryOpExpr) walkChildNodes(w internalWalkFunc) {
w(e.LHS)
w(e.RHS)
}
func (e *BinaryOpExpr) Value(ctx *hcl.EvalContext) (cty.Value, hcl.Diagnostics) {
impl := e.Op.Impl // assumed to be a function taking exactly two arguments
params := impl.Params()
lhsParam := params[0]
rhsParam := params[1]
var diags hcl.Diagnostics
givenLHSVal, lhsDiags := e.LHS.Value(ctx)
givenRHSVal, rhsDiags := e.RHS.Value(ctx)
diags = append(diags, lhsDiags...)
diags = append(diags, rhsDiags...)
lhsVal, err := convert.Convert(givenLHSVal, lhsParam.Type)
if err != nil {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid operand",
Detail: fmt.Sprintf("Unsuitable value for left operand: %s.", err),
Subject: e.LHS.Range().Ptr(),
Context: &e.SrcRange,
Expression: e.LHS,
EvalContext: ctx,
})
}
rhsVal, err := convert.Convert(givenRHSVal, rhsParam.Type)
if err != nil {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid operand",
Detail: fmt.Sprintf("Unsuitable value for right operand: %s.", err),
Subject: e.RHS.Range().Ptr(),
Context: &e.SrcRange,
Expression: e.RHS,
EvalContext: ctx,
})
}
if diags.HasErrors() {
// Don't actually try the call if we have errors already, since the
// this will probably just produce a confusing duplicative diagnostic.
return cty.UnknownVal(e.Op.Type), diags
}
args := []cty.Value{lhsVal, rhsVal}
result, err := impl.Call(args)
if err != nil {
diags = append(diags, &hcl.Diagnostic{
// FIXME: This diagnostic is useless.
Severity: hcl.DiagError,
Summary: "Operation failed",
Detail: fmt.Sprintf("Error during operation: %s.", err),
Subject: &e.SrcRange,
Expression: e,
EvalContext: ctx,
})
return cty.UnknownVal(e.Op.Type), diags
}
return result, diags
}
func (e *BinaryOpExpr) Range() hcl.Range {
return e.SrcRange
}
func (e *BinaryOpExpr) StartRange() hcl.Range {
return e.LHS.StartRange()
}
type UnaryOpExpr struct {
Op *Operation
Val Expression
SrcRange hcl.Range
SymbolRange hcl.Range
}
func (e *UnaryOpExpr) walkChildNodes(w internalWalkFunc) {
w(e.Val)
}
func (e *UnaryOpExpr) Value(ctx *hcl.EvalContext) (cty.Value, hcl.Diagnostics) {
impl := e.Op.Impl // assumed to be a function taking exactly one argument
params := impl.Params()
param := params[0]
givenVal, diags := e.Val.Value(ctx)
val, err := convert.Convert(givenVal, param.Type)
if err != nil {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid operand",
Detail: fmt.Sprintf("Unsuitable value for unary operand: %s.", err),
Subject: e.Val.Range().Ptr(),
Context: &e.SrcRange,
Expression: e.Val,
EvalContext: ctx,
})
}
if diags.HasErrors() {
// Don't actually try the call if we have errors already, since the
// this will probably just produce a confusing duplicative diagnostic.
return cty.UnknownVal(e.Op.Type), diags
}
args := []cty.Value{val}
result, err := impl.Call(args)
if err != nil {
diags = append(diags, &hcl.Diagnostic{
// FIXME: This diagnostic is useless.
Severity: hcl.DiagError,
Summary: "Operation failed",
Detail: fmt.Sprintf("Error during operation: %s.", err),
Subject: &e.SrcRange,
Expression: e,
EvalContext: ctx,
})
return cty.UnknownVal(e.Op.Type), diags
}
return result, diags
}
func (e *UnaryOpExpr) Range() hcl.Range {
return e.SrcRange
}
func (e *UnaryOpExpr) StartRange() hcl.Range {
return e.SymbolRange
}

@ -0,0 +1,220 @@
package hclsyntax
import (
"bytes"
"fmt"
"github.com/hashicorp/hcl/v2"
"github.com/zclconf/go-cty/cty"
"github.com/zclconf/go-cty/cty/convert"
)
type TemplateExpr struct {
Parts []Expression
SrcRange hcl.Range
}
func (e *TemplateExpr) walkChildNodes(w internalWalkFunc) {
for _, part := range e.Parts {
w(part)
}
}
func (e *TemplateExpr) Value(ctx *hcl.EvalContext) (cty.Value, hcl.Diagnostics) {
buf := &bytes.Buffer{}
var diags hcl.Diagnostics
isKnown := true
for _, part := range e.Parts {
partVal, partDiags := part.Value(ctx)
diags = append(diags, partDiags...)
if partVal.IsNull() {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid template interpolation value",
Detail: fmt.Sprintf(
"The expression result is null. Cannot include a null value in a string template.",
),
Subject: part.Range().Ptr(),
Context: &e.SrcRange,
Expression: part,
EvalContext: ctx,
})
continue
}
if !partVal.IsKnown() {
// If any part is unknown then the result as a whole must be
// unknown too. We'll keep on processing the rest of the parts
// anyway, because we want to still emit any diagnostics resulting
// from evaluating those.
isKnown = false
continue
}
strVal, err := convert.Convert(partVal, cty.String)
if err != nil {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid template interpolation value",
Detail: fmt.Sprintf(
"Cannot include the given value in a string template: %s.",
err.Error(),
),
Subject: part.Range().Ptr(),
Context: &e.SrcRange,
Expression: part,
EvalContext: ctx,
})
continue
}
buf.WriteString(strVal.AsString())
}
if !isKnown {
return cty.UnknownVal(cty.String), diags
}
return cty.StringVal(buf.String()), diags
}
func (e *TemplateExpr) Range() hcl.Range {
return e.SrcRange
}
func (e *TemplateExpr) StartRange() hcl.Range {
return e.Parts[0].StartRange()
}
// IsStringLiteral returns true if and only if the template consists only of
// single string literal, as would be created for a simple quoted string like
// "foo".
//
// If this function returns true, then calling Value on the same expression
// with a nil EvalContext will return the literal value.
//
// Note that "${"foo"}", "${1}", etc aren't considered literal values for the
// purposes of this method, because the intent of this method is to identify
// situations where the user seems to be explicitly intending literal string
// interpretation, not situations that result in literals as a technicality
// of the template expression unwrapping behavior.
func (e *TemplateExpr) IsStringLiteral() bool {
if len(e.Parts) != 1 {
return false
}
_, ok := e.Parts[0].(*LiteralValueExpr)
return ok
}
// TemplateJoinExpr is used to convert tuples of strings produced by template
// constructs (i.e. for loops) into flat strings, by converting the values
// tos strings and joining them. This AST node is not used directly; it's
// produced as part of the AST of a "for" loop in a template.
type TemplateJoinExpr struct {
Tuple Expression
}
func (e *TemplateJoinExpr) walkChildNodes(w internalWalkFunc) {
w(e.Tuple)
}
func (e *TemplateJoinExpr) Value(ctx *hcl.EvalContext) (cty.Value, hcl.Diagnostics) {
tuple, diags := e.Tuple.Value(ctx)
if tuple.IsNull() {
// This indicates a bug in the code that constructed the AST.
panic("TemplateJoinExpr got null tuple")
}
if tuple.Type() == cty.DynamicPseudoType {
return cty.UnknownVal(cty.String), diags
}
if !tuple.Type().IsTupleType() {
// This indicates a bug in the code that constructed the AST.
panic("TemplateJoinExpr got non-tuple tuple")
}
if !tuple.IsKnown() {
return cty.UnknownVal(cty.String), diags
}
buf := &bytes.Buffer{}
it := tuple.ElementIterator()
for it.Next() {
_, val := it.Element()
if val.IsNull() {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid template interpolation value",
Detail: fmt.Sprintf(
"An iteration result is null. Cannot include a null value in a string template.",
),
Subject: e.Range().Ptr(),
Expression: e,
EvalContext: ctx,
})
continue
}
if val.Type() == cty.DynamicPseudoType {
return cty.UnknownVal(cty.String), diags
}
strVal, err := convert.Convert(val, cty.String)
if err != nil {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid template interpolation value",
Detail: fmt.Sprintf(
"Cannot include one of the interpolation results into the string template: %s.",
err.Error(),
),
Subject: e.Range().Ptr(),
Expression: e,
EvalContext: ctx,
})
continue
}
if !val.IsKnown() {
return cty.UnknownVal(cty.String), diags
}
buf.WriteString(strVal.AsString())
}
return cty.StringVal(buf.String()), diags
}
func (e *TemplateJoinExpr) Range() hcl.Range {
return e.Tuple.Range()
}
func (e *TemplateJoinExpr) StartRange() hcl.Range {
return e.Tuple.StartRange()
}
// TemplateWrapExpr is used instead of a TemplateExpr when a template
// consists _only_ of a single interpolation sequence. In that case, the
// template's result is the single interpolation's result, verbatim with
// no type conversions.
type TemplateWrapExpr struct {
Wrapped Expression
SrcRange hcl.Range
}
func (e *TemplateWrapExpr) walkChildNodes(w internalWalkFunc) {
w(e.Wrapped)
}
func (e *TemplateWrapExpr) Value(ctx *hcl.EvalContext) (cty.Value, hcl.Diagnostics) {
return e.Wrapped.Value(ctx)
}
func (e *TemplateWrapExpr) Range() hcl.Range {
return e.SrcRange
}
func (e *TemplateWrapExpr) StartRange() hcl.Range {
return e.SrcRange
}

@ -0,0 +1,76 @@
package hclsyntax
// Generated by expression_vars_get.go. DO NOT EDIT.
// Run 'go generate' on this package to update the set of functions here.
import (
"github.com/hashicorp/hcl/v2"
)
func (e *AnonSymbolExpr) Variables() []hcl.Traversal {
return Variables(e)
}
func (e *BinaryOpExpr) Variables() []hcl.Traversal {
return Variables(e)
}
func (e *ConditionalExpr) Variables() []hcl.Traversal {
return Variables(e)
}
func (e *ForExpr) Variables() []hcl.Traversal {
return Variables(e)
}
func (e *FunctionCallExpr) Variables() []hcl.Traversal {
return Variables(e)
}
func (e *IndexExpr) Variables() []hcl.Traversal {
return Variables(e)
}
func (e *LiteralValueExpr) Variables() []hcl.Traversal {
return Variables(e)
}
func (e *ObjectConsExpr) Variables() []hcl.Traversal {
return Variables(e)
}
func (e *ObjectConsKeyExpr) Variables() []hcl.Traversal {
return Variables(e)
}
func (e *RelativeTraversalExpr) Variables() []hcl.Traversal {
return Variables(e)
}
func (e *ScopeTraversalExpr) Variables() []hcl.Traversal {
return Variables(e)
}
func (e *SplatExpr) Variables() []hcl.Traversal {
return Variables(e)
}
func (e *TemplateExpr) Variables() []hcl.Traversal {
return Variables(e)
}
func (e *TemplateJoinExpr) Variables() []hcl.Traversal {
return Variables(e)
}
func (e *TemplateWrapExpr) Variables() []hcl.Traversal {
return Variables(e)
}
func (e *TupleConsExpr) Variables() []hcl.Traversal {
return Variables(e)
}
func (e *UnaryOpExpr) Variables() []hcl.Traversal {
return Variables(e)
}

@ -0,0 +1,20 @@
package hclsyntax
import (
"github.com/hashicorp/hcl/v2"
)
// File is the top-level object resulting from parsing a configuration file.
type File struct {
Body *Body
Bytes []byte
}
func (f *File) AsHCLFile() *hcl.File {
return &hcl.File{
Body: f.Body,
Bytes: f.Bytes,
// TODO: The Nav object, once we have an implementation of it
}
}

@ -0,0 +1,9 @@
package hclsyntax
//go:generate go run expression_vars_gen.go
//go:generate ruby unicode2ragel.rb --url=http://www.unicode.org/Public/9.0.0/ucd/DerivedCoreProperties.txt -m UnicodeDerived -p ID_Start,ID_Continue -o unicode_derived.rl
//go:generate ragel -Z scan_tokens.rl
//go:generate gofmt -w scan_tokens.go
//go:generate ragel -Z scan_string_lit.rl
//go:generate gofmt -w scan_string_lit.go
//go:generate stringer -type TokenType -output token_type_string.go

@ -0,0 +1,21 @@
package hclsyntax
import (
"bytes"
)
type Keyword []byte
var forKeyword = Keyword([]byte{'f', 'o', 'r'})
var inKeyword = Keyword([]byte{'i', 'n'})
var ifKeyword = Keyword([]byte{'i', 'f'})
var elseKeyword = Keyword([]byte{'e', 'l', 's', 'e'})
var endifKeyword = Keyword([]byte{'e', 'n', 'd', 'i', 'f'})
var endforKeyword = Keyword([]byte{'e', 'n', 'd', 'f', 'o', 'r'})
func (kw Keyword) TokenMatches(token Token) bool {
if token.Type != TokenIdent {
return false
}
return bytes.Equal([]byte(kw), token.Bytes)
}

@ -0,0 +1,59 @@
package hclsyntax
import (
"bytes"
"fmt"
"github.com/hashicorp/hcl/v2"
)
type navigation struct {
root *Body
}
// Implementation of hcled.ContextString
func (n navigation) ContextString(offset int) string {
// We will walk our top-level blocks until we find one that contains
// the given offset, and then construct a representation of the header
// of the block.
var block *Block
for _, candidate := range n.root.Blocks {
if candidate.Range().ContainsOffset(offset) {
block = candidate
break
}
}
if block == nil {
return ""
}
if len(block.Labels) == 0 {
// Easy case!
return block.Type
}
buf := &bytes.Buffer{}
buf.WriteString(block.Type)
for _, label := range block.Labels {
fmt.Fprintf(buf, " %q", label)
}
return buf.String()
}
func (n navigation) ContextDefRange(offset int) hcl.Range {
var block *Block
for _, candidate := range n.root.Blocks {
if candidate.Range().ContainsOffset(offset) {
block = candidate
break
}
}
if block == nil {
return hcl.Range{}
}
return block.DefRange()
}

@ -0,0 +1,22 @@
package hclsyntax
import (
"github.com/hashicorp/hcl/v2"
)
// Node is the abstract type that every AST node implements.
//
// This is a closed interface, so it cannot be implemented from outside of
// this package.
type Node interface {
// This is the mechanism by which the public-facing walk functions
// are implemented. Implementations should call the given function
// for each child node and then replace that node with its return value.
// The return value might just be the same node, for non-transforming
// walks.
walkChildNodes(w internalWalkFunc)
Range() hcl.Range
}
type internalWalkFunc func(Node)

File diff suppressed because it is too large Load Diff

@ -0,0 +1,799 @@
package hclsyntax
import (
"fmt"
"strings"
"unicode"
"github.com/apparentlymart/go-textseg/v12/textseg"
"github.com/hashicorp/hcl/v2"
"github.com/zclconf/go-cty/cty"
)
func (p *parser) ParseTemplate() (Expression, hcl.Diagnostics) {
return p.parseTemplate(TokenEOF, false)
}
func (p *parser) parseTemplate(end TokenType, flushHeredoc bool) (Expression, hcl.Diagnostics) {
exprs, passthru, rng, diags := p.parseTemplateInner(end, flushHeredoc)
if passthru {
if len(exprs) != 1 {
panic("passthru set with len(exprs) != 1")
}
return &TemplateWrapExpr{
Wrapped: exprs[0],
SrcRange: rng,
}, diags
}
return &TemplateExpr{
Parts: exprs,
SrcRange: rng,
}, diags
}
func (p *parser) parseTemplateInner(end TokenType, flushHeredoc bool) ([]Expression, bool, hcl.Range, hcl.Diagnostics) {
parts, diags := p.parseTemplateParts(end)
if flushHeredoc {
flushHeredocTemplateParts(parts) // Trim off leading spaces on lines per the flush heredoc spec
}
tp := templateParser{
Tokens: parts.Tokens,
SrcRange: parts.SrcRange,
}
exprs, exprsDiags := tp.parseRoot()
diags = append(diags, exprsDiags...)
passthru := false
if len(parts.Tokens) == 2 { // one real token and one synthetic "end" token
if _, isInterp := parts.Tokens[0].(*templateInterpToken); isInterp {
passthru = true
}
}
return exprs, passthru, parts.SrcRange, diags
}
type templateParser struct {
Tokens []templateToken
SrcRange hcl.Range
pos int
}
func (p *templateParser) parseRoot() ([]Expression, hcl.Diagnostics) {
var exprs []Expression
var diags hcl.Diagnostics
for {
next := p.Peek()
if _, isEnd := next.(*templateEndToken); isEnd {
break
}
expr, exprDiags := p.parseExpr()
diags = append(diags, exprDiags...)
exprs = append(exprs, expr)
}
return exprs, diags
}
func (p *templateParser) parseExpr() (Expression, hcl.Diagnostics) {
next := p.Peek()
switch tok := next.(type) {
case *templateLiteralToken:
p.Read() // eat literal
return &LiteralValueExpr{
Val: cty.StringVal(tok.Val),
SrcRange: tok.SrcRange,
}, nil
case *templateInterpToken:
p.Read() // eat interp
return tok.Expr, nil
case *templateIfToken:
return p.parseIf()
case *templateForToken:
return p.parseFor()
case *templateEndToken:
p.Read() // eat erroneous token
return errPlaceholderExpr(tok.SrcRange), hcl.Diagnostics{
{
// This is a particularly unhelpful diagnostic, so callers
// should attempt to pre-empt it and produce a more helpful
// diagnostic that is context-aware.
Severity: hcl.DiagError,
Summary: "Unexpected end of template",
Detail: "The control directives within this template are unbalanced.",
Subject: &tok.SrcRange,
},
}
case *templateEndCtrlToken:
p.Read() // eat erroneous token
return errPlaceholderExpr(tok.SrcRange), hcl.Diagnostics{
{
Severity: hcl.DiagError,
Summary: fmt.Sprintf("Unexpected %s directive", tok.Name()),
Detail: "The control directives within this template are unbalanced.",
Subject: &tok.SrcRange,
},
}
default:
// should never happen, because above should be exhaustive
panic(fmt.Sprintf("unhandled template token type %T", next))
}
}
func (p *templateParser) parseIf() (Expression, hcl.Diagnostics) {
open := p.Read()
openIf, isIf := open.(*templateIfToken)
if !isIf {
// should never happen if caller is behaving
panic("parseIf called with peeker not pointing at if token")
}
var ifExprs, elseExprs []Expression
var diags hcl.Diagnostics
var endifRange hcl.Range
currentExprs := &ifExprs
Token:
for {
next := p.Peek()
if end, isEnd := next.(*templateEndToken); isEnd {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Unexpected end of template",
Detail: fmt.Sprintf(
"The if directive at %s is missing its corresponding endif directive.",
openIf.SrcRange,
),
Subject: &end.SrcRange,
})
return errPlaceholderExpr(end.SrcRange), diags
}
if end, isCtrlEnd := next.(*templateEndCtrlToken); isCtrlEnd {
p.Read() // eat end directive
switch end.Type {
case templateElse:
if currentExprs == &ifExprs {
currentExprs = &elseExprs
continue Token
}
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Unexpected else directive",
Detail: fmt.Sprintf(
"Already in the else clause for the if started at %s.",
openIf.SrcRange,
),
Subject: &end.SrcRange,
})
case templateEndIf:
endifRange = end.SrcRange
break Token
default:
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: fmt.Sprintf("Unexpected %s directive", end.Name()),
Detail: fmt.Sprintf(
"Expecting an endif directive for the if started at %s.",
openIf.SrcRange,
),
Subject: &end.SrcRange,
})
}
return errPlaceholderExpr(end.SrcRange), diags
}
expr, exprDiags := p.parseExpr()
diags = append(diags, exprDiags...)
*currentExprs = append(*currentExprs, expr)
}
if len(ifExprs) == 0 {
ifExprs = append(ifExprs, &LiteralValueExpr{
Val: cty.StringVal(""),
SrcRange: hcl.Range{
Filename: openIf.SrcRange.Filename,
Start: openIf.SrcRange.End,
End: openIf.SrcRange.End,
},
})
}
if len(elseExprs) == 0 {
elseExprs = append(elseExprs, &LiteralValueExpr{
Val: cty.StringVal(""),
SrcRange: hcl.Range{
Filename: endifRange.Filename,
Start: endifRange.Start,
End: endifRange.Start,
},
})
}
trueExpr := &TemplateExpr{
Parts: ifExprs,
SrcRange: hcl.RangeBetween(ifExprs[0].Range(), ifExprs[len(ifExprs)-1].Range()),
}
falseExpr := &TemplateExpr{
Parts: elseExprs,
SrcRange: hcl.RangeBetween(elseExprs[0].Range(), elseExprs[len(elseExprs)-1].Range()),
}
return &ConditionalExpr{
Condition: openIf.CondExpr,
TrueResult: trueExpr,
FalseResult: falseExpr,
SrcRange: hcl.RangeBetween(openIf.SrcRange, endifRange),
}, diags
}
func (p *templateParser) parseFor() (Expression, hcl.Diagnostics) {
open := p.Read()
openFor, isFor := open.(*templateForToken)
if !isFor {
// should never happen if caller is behaving
panic("parseFor called with peeker not pointing at for token")
}
var contentExprs []Expression
var diags hcl.Diagnostics
var endforRange hcl.Range
Token:
for {
next := p.Peek()
if end, isEnd := next.(*templateEndToken); isEnd {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Unexpected end of template",
Detail: fmt.Sprintf(
"The for directive at %s is missing its corresponding endfor directive.",
openFor.SrcRange,
),
Subject: &end.SrcRange,
})
return errPlaceholderExpr(end.SrcRange), diags
}
if end, isCtrlEnd := next.(*templateEndCtrlToken); isCtrlEnd {
p.Read() // eat end directive
switch end.Type {
case templateElse:
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Unexpected else directive",
Detail: "An else clause is not expected for a for directive.",
Subject: &end.SrcRange,
})
case templateEndFor:
endforRange = end.SrcRange
break Token
default:
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: fmt.Sprintf("Unexpected %s directive", end.Name()),
Detail: fmt.Sprintf(
"Expecting an endfor directive corresponding to the for directive at %s.",
openFor.SrcRange,
),
Subject: &end.SrcRange,
})
}
return errPlaceholderExpr(end.SrcRange), diags
}
expr, exprDiags := p.parseExpr()
diags = append(diags, exprDiags...)
contentExprs = append(contentExprs, expr)
}
if len(contentExprs) == 0 {
contentExprs = append(contentExprs, &LiteralValueExpr{
Val: cty.StringVal(""),
SrcRange: hcl.Range{
Filename: openFor.SrcRange.Filename,
Start: openFor.SrcRange.End,
End: openFor.SrcRange.End,
},
})
}
contentExpr := &TemplateExpr{
Parts: contentExprs,
SrcRange: hcl.RangeBetween(contentExprs[0].Range(), contentExprs[len(contentExprs)-1].Range()),
}
forExpr := &ForExpr{
KeyVar: openFor.KeyVar,
ValVar: openFor.ValVar,
CollExpr: openFor.CollExpr,
ValExpr: contentExpr,
SrcRange: hcl.RangeBetween(openFor.SrcRange, endforRange),
OpenRange: openFor.SrcRange,
CloseRange: endforRange,
}
return &TemplateJoinExpr{
Tuple: forExpr,
}, diags
}
func (p *templateParser) Peek() templateToken {
return p.Tokens[p.pos]
}
func (p *templateParser) Read() templateToken {
ret := p.Peek()
if _, end := ret.(*templateEndToken); !end {
p.pos++
}
return ret
}
// parseTemplateParts produces a flat sequence of "template tokens", which are
// either literal values (with any "trimming" already applied), interpolation
// sequences, or control flow markers.
//
// A further pass is required on the result to turn it into an AST.
func (p *parser) parseTemplateParts(end TokenType) (*templateParts, hcl.Diagnostics) {
var parts []templateToken
var diags hcl.Diagnostics
startRange := p.NextRange()
ltrimNext := false
nextCanTrimPrev := false
var endRange hcl.Range
Token:
for {
next := p.Read()
if next.Type == end {
// all done!
endRange = next.Range
break
}
ltrim := ltrimNext
ltrimNext = false
canTrimPrev := nextCanTrimPrev
nextCanTrimPrev = false
switch next.Type {
case TokenStringLit, TokenQuotedLit:
str, strDiags := ParseStringLiteralToken(next)
diags = append(diags, strDiags...)
if ltrim {
str = strings.TrimLeftFunc(str, unicode.IsSpace)
}
parts = append(parts, &templateLiteralToken{
Val: str,
SrcRange: next.Range,
})
nextCanTrimPrev = true
case TokenTemplateInterp:
// if the opener is ${~ then we want to eat any trailing whitespace
// in the preceding literal token, assuming it is indeed a literal
// token.
if canTrimPrev && len(next.Bytes) == 3 && next.Bytes[2] == '~' && len(parts) > 0 {
prevExpr := parts[len(parts)-1]
if lexpr, ok := prevExpr.(*templateLiteralToken); ok {
lexpr.Val = strings.TrimRightFunc(lexpr.Val, unicode.IsSpace)
}
}
p.PushIncludeNewlines(false)
expr, exprDiags := p.ParseExpression()
diags = append(diags, exprDiags...)
close := p.Peek()
if close.Type != TokenTemplateSeqEnd {
if !p.recovery {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Extra characters after interpolation expression",
Detail: "Expected a closing brace to end the interpolation expression, but found extra characters.",
Subject: &close.Range,
Context: hcl.RangeBetween(startRange, close.Range).Ptr(),
})
}
p.recover(TokenTemplateSeqEnd)
} else {
p.Read() // eat closing brace
// If the closer is ~} then we want to eat any leading
// whitespace on the next token, if it turns out to be a
// literal token.
if len(close.Bytes) == 2 && close.Bytes[0] == '~' {
ltrimNext = true
}
}
p.PopIncludeNewlines()
parts = append(parts, &templateInterpToken{
Expr: expr,
SrcRange: hcl.RangeBetween(next.Range, close.Range),
})
case TokenTemplateControl:
// if the opener is %{~ then we want to eat any trailing whitespace
// in the preceding literal token, assuming it is indeed a literal
// token.
if canTrimPrev && len(next.Bytes) == 3 && next.Bytes[2] == '~' && len(parts) > 0 {
prevExpr := parts[len(parts)-1]
if lexpr, ok := prevExpr.(*templateLiteralToken); ok {
lexpr.Val = strings.TrimRightFunc(lexpr.Val, unicode.IsSpace)
}
}
p.PushIncludeNewlines(false)
kw := p.Peek()
if kw.Type != TokenIdent {
if !p.recovery {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid template directive",
Detail: "A template directive keyword (\"if\", \"for\", etc) is expected at the beginning of a %{ sequence.",
Subject: &kw.Range,
Context: hcl.RangeBetween(next.Range, kw.Range).Ptr(),
})
}
p.recover(TokenTemplateSeqEnd)
p.PopIncludeNewlines()
continue Token
}
p.Read() // eat keyword token
switch {
case ifKeyword.TokenMatches(kw):
condExpr, exprDiags := p.ParseExpression()
diags = append(diags, exprDiags...)
parts = append(parts, &templateIfToken{
CondExpr: condExpr,
SrcRange: hcl.RangeBetween(next.Range, p.NextRange()),
})
case elseKeyword.TokenMatches(kw):
parts = append(parts, &templateEndCtrlToken{
Type: templateElse,
SrcRange: hcl.RangeBetween(next.Range, p.NextRange()),
})
case endifKeyword.TokenMatches(kw):
parts = append(parts, &templateEndCtrlToken{
Type: templateEndIf,
SrcRange: hcl.RangeBetween(next.Range, p.NextRange()),
})
case forKeyword.TokenMatches(kw):
var keyName, valName string
if p.Peek().Type != TokenIdent {
if !p.recovery {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid 'for' directive",
Detail: "For directive requires variable name after 'for'.",
Subject: p.Peek().Range.Ptr(),
})
}
p.recover(TokenTemplateSeqEnd)
p.PopIncludeNewlines()
continue Token
}
valName = string(p.Read().Bytes)
if p.Peek().Type == TokenComma {
// What we just read was actually the key, then.
keyName = valName
p.Read() // eat comma
if p.Peek().Type != TokenIdent {
if !p.recovery {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid 'for' directive",
Detail: "For directive requires value variable name after comma.",
Subject: p.Peek().Range.Ptr(),
})
}
p.recover(TokenTemplateSeqEnd)
p.PopIncludeNewlines()
continue Token
}
valName = string(p.Read().Bytes)
}
if !inKeyword.TokenMatches(p.Peek()) {
if !p.recovery {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid 'for' directive",
Detail: "For directive requires 'in' keyword after names.",
Subject: p.Peek().Range.Ptr(),
})
}
p.recover(TokenTemplateSeqEnd)
p.PopIncludeNewlines()
continue Token
}
p.Read() // eat 'in' keyword
collExpr, collDiags := p.ParseExpression()
diags = append(diags, collDiags...)
parts = append(parts, &templateForToken{
KeyVar: keyName,
ValVar: valName,
CollExpr: collExpr,
SrcRange: hcl.RangeBetween(next.Range, p.NextRange()),
})
case endforKeyword.TokenMatches(kw):
parts = append(parts, &templateEndCtrlToken{
Type: templateEndFor,
SrcRange: hcl.RangeBetween(next.Range, p.NextRange()),
})
default:
if !p.recovery {
suggestions := []string{"if", "for", "else", "endif", "endfor"}
given := string(kw.Bytes)
suggestion := nameSuggestion(given, suggestions)
if suggestion != "" {
suggestion = fmt.Sprintf(" Did you mean %q?", suggestion)
}
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid template control keyword",
Detail: fmt.Sprintf("%q is not a valid template control keyword.%s", given, suggestion),
Subject: &kw.Range,
Context: hcl.RangeBetween(next.Range, kw.Range).Ptr(),
})
}
p.recover(TokenTemplateSeqEnd)
p.PopIncludeNewlines()
continue Token
}
close := p.Peek()
if close.Type != TokenTemplateSeqEnd {
if !p.recovery {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: fmt.Sprintf("Extra characters in %s marker", kw.Bytes),
Detail: "Expected a closing brace to end the sequence, but found extra characters.",
Subject: &close.Range,
Context: hcl.RangeBetween(startRange, close.Range).Ptr(),
})
}
p.recover(TokenTemplateSeqEnd)
} else {
p.Read() // eat closing brace
// If the closer is ~} then we want to eat any leading
// whitespace on the next token, if it turns out to be a
// literal token.
if len(close.Bytes) == 2 && close.Bytes[0] == '~' {
ltrimNext = true
}
}
p.PopIncludeNewlines()
default:
if !p.recovery {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Unterminated template string",
Detail: "No closing marker was found for the string.",
Subject: &next.Range,
Context: hcl.RangeBetween(startRange, next.Range).Ptr(),
})
}
final := p.recover(end)
endRange = final.Range
break Token
}
}
if len(parts) == 0 {
// If a sequence has no content, we'll treat it as if it had an
// empty string in it because that's what the user probably means
// if they write "" in configuration.
parts = append(parts, &templateLiteralToken{
Val: "",
SrcRange: hcl.Range{
// Range is the zero-character span immediately after the
// opening quote.
Filename: startRange.Filename,
Start: startRange.End,
End: startRange.End,
},
})
}
// Always end with an end token, so the parser can produce diagnostics
// about unclosed items with proper position information.
parts = append(parts, &templateEndToken{
SrcRange: endRange,
})
ret := &templateParts{
Tokens: parts,
SrcRange: hcl.RangeBetween(startRange, endRange),
}
return ret, diags
}
// flushHeredocTemplateParts modifies in-place the line-leading literal strings
// to apply the flush heredoc processing rule: find the line with the smallest
// number of whitespace characters as prefix and then trim that number of
// characters from all of the lines.
//
// This rule is applied to static tokens rather than to the rendered result,
// so interpolating a string with leading whitespace cannot affect the chosen
// prefix length.
func flushHeredocTemplateParts(parts *templateParts) {
if len(parts.Tokens) == 0 {
// Nothing to do
return
}
const maxInt = int((^uint(0)) >> 1)
minSpaces := maxInt
newline := true
var adjust []*templateLiteralToken
for _, ttok := range parts.Tokens {
if newline {
newline = false
var spaces int
if lit, ok := ttok.(*templateLiteralToken); ok {
orig := lit.Val
trimmed := strings.TrimLeftFunc(orig, unicode.IsSpace)
// If a token is entirely spaces and ends with a newline
// then it's a "blank line" and thus not considered for
// space-prefix-counting purposes.
if len(trimmed) == 0 && strings.HasSuffix(orig, "\n") {
spaces = maxInt
} else {
spaceBytes := len(lit.Val) - len(trimmed)
spaces, _ = textseg.TokenCount([]byte(orig[:spaceBytes]), textseg.ScanGraphemeClusters)
adjust = append(adjust, lit)
}
} else if _, ok := ttok.(*templateEndToken); ok {
break // don't process the end token since it never has spaces before it
}
if spaces < minSpaces {
minSpaces = spaces
}
}
if lit, ok := ttok.(*templateLiteralToken); ok {
if strings.HasSuffix(lit.Val, "\n") {
newline = true // The following token, if any, begins a new line
}
}
}
for _, lit := range adjust {
// Since we want to count space _characters_ rather than space _bytes_,
// we can't just do a straightforward slice operation here and instead
// need to hunt for the split point with a scanner.
valBytes := []byte(lit.Val)
spaceByteCount := 0
for i := 0; i < minSpaces; i++ {
adv, _, _ := textseg.ScanGraphemeClusters(valBytes, true)
spaceByteCount += adv
valBytes = valBytes[adv:]
}
lit.Val = lit.Val[spaceByteCount:]
lit.SrcRange.Start.Column += minSpaces
lit.SrcRange.Start.Byte += spaceByteCount
}
}
type templateParts struct {
Tokens []templateToken
SrcRange hcl.Range
}
// templateToken is a higher-level token that represents a single atom within
// the template language. Our template parsing first raises the raw token
// stream to a sequence of templateToken, and then transforms the result into
// an expression tree.
type templateToken interface {
templateToken() templateToken
}
type templateLiteralToken struct {
Val string
SrcRange hcl.Range
isTemplateToken
}
type templateInterpToken struct {
Expr Expression
SrcRange hcl.Range
isTemplateToken
}
type templateIfToken struct {
CondExpr Expression
SrcRange hcl.Range
isTemplateToken
}
type templateForToken struct {
KeyVar string // empty if ignoring key
ValVar string
CollExpr Expression
SrcRange hcl.Range
isTemplateToken
}
type templateEndCtrlType int
const (
templateEndIf templateEndCtrlType = iota
templateElse
templateEndFor
)
type templateEndCtrlToken struct {
Type templateEndCtrlType
SrcRange hcl.Range
isTemplateToken
}
func (t *templateEndCtrlToken) Name() string {
switch t.Type {
case templateEndIf:
return "endif"
case templateElse:
return "else"
case templateEndFor:
return "endfor"
default:
// should never happen
panic("invalid templateEndCtrlType")
}
}
type templateEndToken struct {
SrcRange hcl.Range
isTemplateToken
}
type isTemplateToken [0]int
func (t isTemplateToken) templateToken() templateToken {
return t
}

@ -0,0 +1,159 @@
package hclsyntax
import (
"github.com/hashicorp/hcl/v2"
"github.com/zclconf/go-cty/cty"
)
// ParseTraversalAbs parses an absolute traversal that is assumed to consume
// all of the remaining tokens in the peeker. The usual parser recovery
// behavior is not supported here because traversals are not expected to
// be parsed as part of a larger program.
func (p *parser) ParseTraversalAbs() (hcl.Traversal, hcl.Diagnostics) {
var ret hcl.Traversal
var diags hcl.Diagnostics
// Absolute traversal must always begin with a variable name
varTok := p.Read()
if varTok.Type != TokenIdent {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Variable name required",
Detail: "Must begin with a variable name.",
Subject: &varTok.Range,
})
return ret, diags
}
varName := string(varTok.Bytes)
ret = append(ret, hcl.TraverseRoot{
Name: varName,
SrcRange: varTok.Range,
})
for {
next := p.Peek()
if next.Type == TokenEOF {
return ret, diags
}
switch next.Type {
case TokenDot:
// Attribute access
dot := p.Read() // eat dot
nameTok := p.Read()
if nameTok.Type != TokenIdent {
if nameTok.Type == TokenStar {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Attribute name required",
Detail: "Splat expressions (.*) may not be used here.",
Subject: &nameTok.Range,
Context: hcl.RangeBetween(varTok.Range, nameTok.Range).Ptr(),
})
} else {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Attribute name required",
Detail: "Dot must be followed by attribute name.",
Subject: &nameTok.Range,
Context: hcl.RangeBetween(varTok.Range, nameTok.Range).Ptr(),
})
}
return ret, diags
}
attrName := string(nameTok.Bytes)
ret = append(ret, hcl.TraverseAttr{
Name: attrName,
SrcRange: hcl.RangeBetween(dot.Range, nameTok.Range),
})
case TokenOBrack:
// Index
open := p.Read() // eat open bracket
next := p.Peek()
switch next.Type {
case TokenNumberLit:
tok := p.Read() // eat number
numVal, numDiags := p.numberLitValue(tok)
diags = append(diags, numDiags...)
close := p.Read()
if close.Type != TokenCBrack {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Unclosed index brackets",
Detail: "Index key must be followed by a closing bracket.",
Subject: &close.Range,
Context: hcl.RangeBetween(open.Range, close.Range).Ptr(),
})
}
ret = append(ret, hcl.TraverseIndex{
Key: numVal,
SrcRange: hcl.RangeBetween(open.Range, close.Range),
})
if diags.HasErrors() {
return ret, diags
}
case TokenOQuote:
str, _, strDiags := p.parseQuotedStringLiteral()
diags = append(diags, strDiags...)
close := p.Read()
if close.Type != TokenCBrack {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Unclosed index brackets",
Detail: "Index key must be followed by a closing bracket.",
Subject: &close.Range,
Context: hcl.RangeBetween(open.Range, close.Range).Ptr(),
})
}
ret = append(ret, hcl.TraverseIndex{
Key: cty.StringVal(str),
SrcRange: hcl.RangeBetween(open.Range, close.Range),
})
if diags.HasErrors() {
return ret, diags
}
default:
if next.Type == TokenStar {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Attribute name required",
Detail: "Splat expressions ([*]) may not be used here.",
Subject: &next.Range,
Context: hcl.RangeBetween(varTok.Range, next.Range).Ptr(),
})
} else {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Index value required",
Detail: "Index brackets must contain either a literal number or a literal string.",
Subject: &next.Range,
Context: hcl.RangeBetween(varTok.Range, next.Range).Ptr(),
})
}
return ret, diags
}
default:
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid character",
Detail: "Expected an attribute access or an index operator.",
Subject: &next.Range,
Context: hcl.RangeBetween(varTok.Range, next.Range).Ptr(),
})
return ret, diags
}
}
}

@ -0,0 +1,212 @@
package hclsyntax
import (
"bytes"
"fmt"
"path/filepath"
"runtime"
"strings"
"github.com/hashicorp/hcl/v2"
)
// This is set to true at init() time in tests, to enable more useful output
// if a stack discipline error is detected. It should not be enabled in
// normal mode since there is a performance penalty from accessing the
// runtime stack to produce the traces, but could be temporarily set to
// true for debugging if desired.
var tracePeekerNewlinesStack = false
type peeker struct {
Tokens Tokens
NextIndex int
IncludeComments bool
IncludeNewlinesStack []bool
// used only when tracePeekerNewlinesStack is set
newlineStackChanges []peekerNewlineStackChange
}
// for use in debugging the stack usage only
type peekerNewlineStackChange struct {
Pushing bool // if false, then popping
Frame runtime.Frame
Include bool
}
func newPeeker(tokens Tokens, includeComments bool) *peeker {
return &peeker{
Tokens: tokens,
IncludeComments: includeComments,
IncludeNewlinesStack: []bool{true},
}
}
func (p *peeker) Peek() Token {
ret, _ := p.nextToken()
return ret
}
func (p *peeker) Read() Token {
ret, nextIdx := p.nextToken()
p.NextIndex = nextIdx
return ret
}
func (p *peeker) NextRange() hcl.Range {
return p.Peek().Range
}
func (p *peeker) PrevRange() hcl.Range {
if p.NextIndex == 0 {
return p.NextRange()
}
return p.Tokens[p.NextIndex-1].Range
}
func (p *peeker) nextToken() (Token, int) {
for i := p.NextIndex; i < len(p.Tokens); i++ {
tok := p.Tokens[i]
switch tok.Type {
case TokenComment:
if !p.IncludeComments {
// Single-line comment tokens, starting with # or //, absorb
// the trailing newline that terminates them as part of their
// bytes. When we're filtering out comments, we must as a
// special case transform these to newline tokens in order
// to properly parse newline-terminated block items.
if p.includingNewlines() {
if len(tok.Bytes) > 0 && tok.Bytes[len(tok.Bytes)-1] == '\n' {
fakeNewline := Token{
Type: TokenNewline,
Bytes: tok.Bytes[len(tok.Bytes)-1 : len(tok.Bytes)],
// We use the whole token range as the newline
// range, even though that's a little... weird,
// because otherwise we'd need to go count
// characters again in order to figure out the
// column of the newline, and that complexity
// isn't justified when ranges of newlines are
// so rarely printed anyway.
Range: tok.Range,
}
return fakeNewline, i + 1
}
}
continue
}
case TokenNewline:
if !p.includingNewlines() {
continue
}
}
return tok, i + 1
}
// if we fall out here then we'll return the EOF token, and leave
// our index pointed off the end of the array so we'll keep
// returning EOF in future too.
return p.Tokens[len(p.Tokens)-1], len(p.Tokens)
}
func (p *peeker) includingNewlines() bool {
return p.IncludeNewlinesStack[len(p.IncludeNewlinesStack)-1]
}
func (p *peeker) PushIncludeNewlines(include bool) {
if tracePeekerNewlinesStack {
// Record who called us so that we can more easily track down any
// mismanagement of the stack in the parser.
callers := []uintptr{0}
runtime.Callers(2, callers)
frames := runtime.CallersFrames(callers)
frame, _ := frames.Next()
p.newlineStackChanges = append(p.newlineStackChanges, peekerNewlineStackChange{
true, frame, include,
})
}
p.IncludeNewlinesStack = append(p.IncludeNewlinesStack, include)
}
func (p *peeker) PopIncludeNewlines() bool {
stack := p.IncludeNewlinesStack
remain, ret := stack[:len(stack)-1], stack[len(stack)-1]
p.IncludeNewlinesStack = remain
if tracePeekerNewlinesStack {
// Record who called us so that we can more easily track down any
// mismanagement of the stack in the parser.
callers := []uintptr{0}
runtime.Callers(2, callers)
frames := runtime.CallersFrames(callers)
frame, _ := frames.Next()
p.newlineStackChanges = append(p.newlineStackChanges, peekerNewlineStackChange{
false, frame, ret,
})
}
return ret
}
// AssertEmptyNewlinesStack checks if the IncludeNewlinesStack is empty, doing
// panicking if it is not. This can be used to catch stack mismanagement that
// might otherwise just cause confusing downstream errors.
//
// This function is a no-op if the stack is empty when called.
//
// If newlines stack tracing is enabled by setting the global variable
// tracePeekerNewlinesStack at init time, a full log of all of the push/pop
// calls will be produced to help identify which caller in the parser is
// misbehaving.
func (p *peeker) AssertEmptyIncludeNewlinesStack() {
if len(p.IncludeNewlinesStack) != 1 {
// Should never happen; indicates mismanagement of the stack inside
// the parser.
if p.newlineStackChanges != nil { // only if traceNewlinesStack is enabled above
panic(fmt.Errorf(
"non-empty IncludeNewlinesStack after parse with %d calls unaccounted for:\n%s",
len(p.IncludeNewlinesStack)-1,
formatPeekerNewlineStackChanges(p.newlineStackChanges),
))
} else {
panic(fmt.Errorf("non-empty IncludeNewlinesStack after parse: %#v", p.IncludeNewlinesStack))
}
}
}
func formatPeekerNewlineStackChanges(changes []peekerNewlineStackChange) string {
indent := 0
var buf bytes.Buffer
for _, change := range changes {
funcName := change.Frame.Function
if idx := strings.LastIndexByte(funcName, '.'); idx != -1 {
funcName = funcName[idx+1:]
}
filename := change.Frame.File
if idx := strings.LastIndexByte(filename, filepath.Separator); idx != -1 {
filename = filename[idx+1:]
}
switch change.Pushing {
case true:
buf.WriteString(strings.Repeat(" ", indent))
fmt.Fprintf(&buf, "PUSH %#v (%s at %s:%d)\n", change.Include, funcName, filename, change.Frame.Line)
indent++
case false:
indent--
buf.WriteString(strings.Repeat(" ", indent))
fmt.Fprintf(&buf, "POP %#v (%s at %s:%d)\n", change.Include, funcName, filename, change.Frame.Line)
}
}
return buf.String()
}

@ -0,0 +1,171 @@
package hclsyntax
import (
"github.com/hashicorp/hcl/v2"
)
// ParseConfig parses the given buffer as a whole HCL config file, returning
// a *hcl.File representing its contents. If HasErrors called on the returned
// diagnostics returns true, the returned body is likely to be incomplete
// and should therefore be used with care.
//
// The body in the returned file has dynamic type *hclsyntax.Body, so callers
// may freely type-assert this to get access to the full hclsyntax API in
// situations where detailed access is required. However, most common use-cases
// should be served using the hcl.Body interface to ensure compatibility with
// other configurationg syntaxes, such as JSON.
func ParseConfig(src []byte, filename string, start hcl.Pos) (*hcl.File, hcl.Diagnostics) {
tokens, diags := LexConfig(src, filename, start)
peeker := newPeeker(tokens, false)
parser := &parser{peeker: peeker}
body, parseDiags := parser.ParseBody(TokenEOF)
diags = append(diags, parseDiags...)
// Panic if the parser uses incorrect stack discipline with the peeker's
// newlines stack, since otherwise it will produce confusing downstream
// errors.
peeker.AssertEmptyIncludeNewlinesStack()
return &hcl.File{
Body: body,
Bytes: src,
Nav: navigation{
root: body,
},
}, diags
}
// ParseExpression parses the given buffer as a standalone HCL expression,
// returning it as an instance of Expression.
func ParseExpression(src []byte, filename string, start hcl.Pos) (Expression, hcl.Diagnostics) {
tokens, diags := LexExpression(src, filename, start)
peeker := newPeeker(tokens, false)
parser := &parser{peeker: peeker}
// Bare expressions are always parsed in "ignore newlines" mode, as if
// they were wrapped in parentheses.
parser.PushIncludeNewlines(false)
expr, parseDiags := parser.ParseExpression()
diags = append(diags, parseDiags...)
next := parser.Peek()
if next.Type != TokenEOF && !parser.recovery {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Extra characters after expression",
Detail: "An expression was successfully parsed, but extra characters were found after it.",
Subject: &next.Range,
})
}
parser.PopIncludeNewlines()
// Panic if the parser uses incorrect stack discipline with the peeker's
// newlines stack, since otherwise it will produce confusing downstream
// errors.
peeker.AssertEmptyIncludeNewlinesStack()
return expr, diags
}
// ParseTemplate parses the given buffer as a standalone HCL template,
// returning it as an instance of Expression.
func ParseTemplate(src []byte, filename string, start hcl.Pos) (Expression, hcl.Diagnostics) {
tokens, diags := LexTemplate(src, filename, start)
peeker := newPeeker(tokens, false)
parser := &parser{peeker: peeker}
expr, parseDiags := parser.ParseTemplate()
diags = append(diags, parseDiags...)
// Panic if the parser uses incorrect stack discipline with the peeker's
// newlines stack, since otherwise it will produce confusing downstream
// errors.
peeker.AssertEmptyIncludeNewlinesStack()
return expr, diags
}
// ParseTraversalAbs parses the given buffer as a standalone absolute traversal.
//
// Parsing as a traversal is more limited than parsing as an expession since
// it allows only attribute and indexing operations on variables. Traverals
// are useful as a syntax for referring to objects without necessarily
// evaluating them.
func ParseTraversalAbs(src []byte, filename string, start hcl.Pos) (hcl.Traversal, hcl.Diagnostics) {
tokens, diags := LexExpression(src, filename, start)
peeker := newPeeker(tokens, false)
parser := &parser{peeker: peeker}
// Bare traverals are always parsed in "ignore newlines" mode, as if
// they were wrapped in parentheses.
parser.PushIncludeNewlines(false)
expr, parseDiags := parser.ParseTraversalAbs()
diags = append(diags, parseDiags...)
parser.PopIncludeNewlines()
// Panic if the parser uses incorrect stack discipline with the peeker's
// newlines stack, since otherwise it will produce confusing downstream
// errors.
peeker.AssertEmptyIncludeNewlinesStack()
return expr, diags
}
// LexConfig performs lexical analysis on the given buffer, treating it as a
// whole HCL config file, and returns the resulting tokens.
//
// Only minimal validation is done during lexical analysis, so the returned
// diagnostics may include errors about lexical issues such as bad character
// encodings or unrecognized characters, but full parsing is required to
// detect _all_ syntax errors.
func LexConfig(src []byte, filename string, start hcl.Pos) (Tokens, hcl.Diagnostics) {
tokens := scanTokens(src, filename, start, scanNormal)
diags := checkInvalidTokens(tokens)
return tokens, diags
}
// LexExpression performs lexical analysis on the given buffer, treating it as
// a standalone HCL expression, and returns the resulting tokens.
//
// Only minimal validation is done during lexical analysis, so the returned
// diagnostics may include errors about lexical issues such as bad character
// encodings or unrecognized characters, but full parsing is required to
// detect _all_ syntax errors.
func LexExpression(src []byte, filename string, start hcl.Pos) (Tokens, hcl.Diagnostics) {
// This is actually just the same thing as LexConfig, since configs
// and expressions lex in the same way.
tokens := scanTokens(src, filename, start, scanNormal)
diags := checkInvalidTokens(tokens)
return tokens, diags
}
// LexTemplate performs lexical analysis on the given buffer, treating it as a
// standalone HCL template, and returns the resulting tokens.
//
// Only minimal validation is done during lexical analysis, so the returned
// diagnostics may include errors about lexical issues such as bad character
// encodings or unrecognized characters, but full parsing is required to
// detect _all_ syntax errors.
func LexTemplate(src []byte, filename string, start hcl.Pos) (Tokens, hcl.Diagnostics) {
tokens := scanTokens(src, filename, start, scanTemplate)
diags := checkInvalidTokens(tokens)
return tokens, diags
}
// ValidIdentifier tests if the given string could be a valid identifier in
// a native syntax expression.
//
// This is useful when accepting names from the user that will be used as
// variable or attribute names in the scope, to ensure that any name chosen
// will be traversable using the variable or attribute traversal syntax.
func ValidIdentifier(s string) bool {
// This is a kinda-expensive way to do something pretty simple, but it
// is easiest to do with our existing scanner-related infrastructure here
// and nobody should be validating identifiers in a tight loop.
tokens := scanTokens([]byte(s), "", hcl.Pos{}, scanIdentOnly)
return len(tokens) == 2 && tokens[0].Type == TokenIdent && tokens[1].Type == TokenEOF
}

@ -0,0 +1,301 @@
//line scan_string_lit.rl:1
package hclsyntax
// This file is generated from scan_string_lit.rl. DO NOT EDIT.
//line scan_string_lit.go:9
var _hclstrtok_actions []byte = []byte{
0, 1, 0, 1, 1, 2, 1, 0,
}
var _hclstrtok_key_offsets []byte = []byte{
0, 0, 2, 4, 6, 10, 14, 18,
22, 27, 31, 36, 41, 46, 51, 57,
62, 74, 85, 96, 107, 118, 129, 140,
151,
}
var _hclstrtok_trans_keys []byte = []byte{
128, 191, 128, 191, 128, 191, 10, 13,
36, 37, 10, 13, 36, 37, 10, 13,
36, 37, 10, 13, 36, 37, 10, 13,
36, 37, 123, 10, 13, 36, 37, 10,
13, 36, 37, 92, 10, 13, 36, 37,
92, 10, 13, 36, 37, 92, 10, 13,
36, 37, 92, 10, 13, 36, 37, 92,
123, 10, 13, 36, 37, 92, 85, 117,
128, 191, 192, 223, 224, 239, 240, 247,
248, 255, 10, 13, 36, 37, 92, 48,
57, 65, 70, 97, 102, 10, 13, 36,
37, 92, 48, 57, 65, 70, 97, 102,
10, 13, 36, 37, 92, 48, 57, 65,
70, 97, 102, 10, 13, 36, 37, 92,
48, 57, 65, 70, 97, 102, 10, 13,
36, 37, 92, 48, 57, 65, 70, 97,
102, 10, 13, 36, 37, 92, 48, 57,
65, 70, 97, 102, 10, 13, 36, 37,
92, 48, 57, 65, 70, 97, 102, 10,
13, 36, 37, 92, 48, 57, 65, 70,
97, 102,
}
var _hclstrtok_single_lengths []byte = []byte{
0, 0, 0, 0, 4, 4, 4, 4,
5, 4, 5, 5, 5, 5, 6, 5,
2, 5, 5, 5, 5, 5, 5, 5,
5,
}
var _hclstrtok_range_lengths []byte = []byte{
0, 1, 1, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
5, 3, 3, 3, 3, 3, 3, 3,
3,
}
var _hclstrtok_index_offsets []byte = []byte{
0, 0, 2, 4, 6, 11, 16, 21,
26, 32, 37, 43, 49, 55, 61, 68,
74, 82, 91, 100, 109, 118, 127, 136,
145,
}
var _hclstrtok_indicies []byte = []byte{
0, 1, 2, 1, 3, 1, 5, 6,
7, 8, 4, 10, 11, 12, 13, 9,
14, 11, 12, 13, 9, 10, 11, 15,
13, 9, 10, 11, 12, 13, 14, 9,
10, 11, 12, 15, 9, 17, 18, 19,
20, 21, 16, 23, 24, 25, 26, 27,
22, 0, 24, 25, 26, 27, 22, 23,
24, 28, 26, 27, 22, 23, 24, 25,
26, 27, 0, 22, 23, 24, 25, 28,
27, 22, 29, 30, 22, 2, 3, 31,
22, 0, 23, 24, 25, 26, 27, 32,
32, 32, 22, 23, 24, 25, 26, 27,
33, 33, 33, 22, 23, 24, 25, 26,
27, 34, 34, 34, 22, 23, 24, 25,
26, 27, 30, 30, 30, 22, 23, 24,
25, 26, 27, 35, 35, 35, 22, 23,
24, 25, 26, 27, 36, 36, 36, 22,
23, 24, 25, 26, 27, 37, 37, 37,
22, 23, 24, 25, 26, 27, 0, 0,
0, 22,
}
var _hclstrtok_trans_targs []byte = []byte{
11, 0, 1, 2, 4, 5, 6, 7,
9, 4, 5, 6, 7, 9, 5, 8,
10, 11, 12, 13, 15, 16, 10, 11,
12, 13, 15, 16, 14, 17, 21, 3,
18, 19, 20, 22, 23, 24,
}
var _hclstrtok_trans_actions []byte = []byte{
0, 0, 0, 0, 0, 1, 1, 1,
1, 3, 5, 5, 5, 5, 0, 0,
0, 1, 1, 1, 1, 1, 3, 5,
5, 5, 5, 5, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0,
}
var _hclstrtok_eof_actions []byte = []byte{
0, 0, 0, 0, 0, 3, 3, 3,
3, 3, 0, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3,
3,
}
const hclstrtok_start int = 4
const hclstrtok_first_final int = 4
const hclstrtok_error int = 0
const hclstrtok_en_quoted int = 10
const hclstrtok_en_unquoted int = 4
//line scan_string_lit.rl:10
func scanStringLit(data []byte, quoted bool) [][]byte {
var ret [][]byte
//line scan_string_lit.rl:61
// Ragel state
p := 0 // "Pointer" into data
pe := len(data) // End-of-data "pointer"
ts := 0
te := 0
eof := pe
var cs int // current state
switch {
case quoted:
cs = hclstrtok_en_quoted
default:
cs = hclstrtok_en_unquoted
}
// Make Go compiler happy
_ = ts
_ = eof
/*token := func () {
ret = append(ret, data[ts:te])
}*/
//line scan_string_lit.go:154
{
}
//line scan_string_lit.go:158
{
var _klen int
var _trans int
var _acts int
var _nacts uint
var _keys int
if p == pe {
goto _test_eof
}
if cs == 0 {
goto _out
}
_resume:
_keys = int(_hclstrtok_key_offsets[cs])
_trans = int(_hclstrtok_index_offsets[cs])
_klen = int(_hclstrtok_single_lengths[cs])
if _klen > 0 {
_lower := int(_keys)
var _mid int
_upper := int(_keys + _klen - 1)
for {
if _upper < _lower {
break
}
_mid = _lower + ((_upper - _lower) >> 1)
switch {
case data[p] < _hclstrtok_trans_keys[_mid]:
_upper = _mid - 1
case data[p] > _hclstrtok_trans_keys[_mid]:
_lower = _mid + 1
default:
_trans += int(_mid - int(_keys))
goto _match
}
}
_keys += _klen
_trans += _klen
}
_klen = int(_hclstrtok_range_lengths[cs])
if _klen > 0 {
_lower := int(_keys)
var _mid int
_upper := int(_keys + (_klen << 1) - 2)
for {
if _upper < _lower {
break
}
_mid = _lower + (((_upper - _lower) >> 1) & ^1)
switch {
case data[p] < _hclstrtok_trans_keys[_mid]:
_upper = _mid - 2
case data[p] > _hclstrtok_trans_keys[_mid+1]:
_lower = _mid + 2
default:
_trans += int((_mid - int(_keys)) >> 1)
goto _match
}
}
_trans += _klen
}
_match:
_trans = int(_hclstrtok_indicies[_trans])
cs = int(_hclstrtok_trans_targs[_trans])
if _hclstrtok_trans_actions[_trans] == 0 {
goto _again
}
_acts = int(_hclstrtok_trans_actions[_trans])
_nacts = uint(_hclstrtok_actions[_acts])
_acts++
for ; _nacts > 0; _nacts-- {
_acts++
switch _hclstrtok_actions[_acts-1] {
case 0:
//line scan_string_lit.rl:40
// If te is behind p then we've skipped over some literal
// characters which we must now return.
if te < p {
ret = append(ret, data[te:p])
}
ts = p
case 1:
//line scan_string_lit.rl:48
te = p
ret = append(ret, data[ts:te])
//line scan_string_lit.go:253
}
}
_again:
if cs == 0 {
goto _out
}
p++
if p != pe {
goto _resume
}
_test_eof:
{
}
if p == eof {
__acts := _hclstrtok_eof_actions[cs]
__nacts := uint(_hclstrtok_actions[__acts])
__acts++
for ; __nacts > 0; __nacts-- {
__acts++
switch _hclstrtok_actions[__acts-1] {
case 1:
//line scan_string_lit.rl:48
te = p
ret = append(ret, data[ts:te])
//line scan_string_lit.go:278
}
}
}
_out:
{
}
}
//line scan_string_lit.rl:89
if te < p {
// Collect any leftover literal characters at the end of the input
ret = append(ret, data[te:p])
}
// If we fall out here without being in a final state then we've
// encountered something that the scanner can't match, which should
// be impossible (the scanner matches all bytes _somehow_) but we'll
// tolerate it and let the caller deal with it.
if cs < hclstrtok_first_final {
ret = append(ret, data[p:len(data)])
}
return ret
}

@ -0,0 +1,105 @@
package hclsyntax
// This file is generated from scan_string_lit.rl. DO NOT EDIT.
%%{
# (except you are actually in scan_string_lit.rl here, so edit away!)
machine hclstrtok;
write data;
}%%
func scanStringLit(data []byte, quoted bool) [][]byte {
var ret [][]byte
%%{
include UnicodeDerived "unicode_derived.rl";
UTF8Cont = 0x80 .. 0xBF;
AnyUTF8 = (
0x00..0x7F |
0xC0..0xDF . UTF8Cont |
0xE0..0xEF . UTF8Cont . UTF8Cont |
0xF0..0xF7 . UTF8Cont . UTF8Cont . UTF8Cont
);
BadUTF8 = any - AnyUTF8;
Hex = ('0'..'9' | 'a'..'f' | 'A'..'F');
# Our goal with this patterns is to capture user intent as best as
# possible, even if the input is invalid. The caller will then verify
# whether each token is valid and generate suitable error messages
# if not.
UnicodeEscapeShort = "\\u" . Hex{0,4};
UnicodeEscapeLong = "\\U" . Hex{0,8};
UnicodeEscape = (UnicodeEscapeShort | UnicodeEscapeLong);
SimpleEscape = "\\" . (AnyUTF8 - ('U'|'u'))?;
TemplateEscape = ("$" . ("$" . ("{"?))?) | ("%" . ("%" . ("{"?))?);
Newline = ("\r\n" | "\r" | "\n");
action Begin {
// If te is behind p then we've skipped over some literal
// characters which we must now return.
if te < p {
ret = append(ret, data[te:p])
}
ts = p;
}
action End {
te = p;
ret = append(ret, data[ts:te]);
}
QuotedToken = (UnicodeEscape | SimpleEscape | TemplateEscape | Newline) >Begin %End;
UnquotedToken = (TemplateEscape | Newline) >Begin %End;
QuotedLiteral = (any - ("\\" | "$" | "%" | "\r" | "\n"));
UnquotedLiteral = (any - ("$" | "%" | "\r" | "\n"));
quoted := (QuotedToken | QuotedLiteral)**;
unquoted := (UnquotedToken | UnquotedLiteral)**;
}%%
// Ragel state
p := 0 // "Pointer" into data
pe := len(data) // End-of-data "pointer"
ts := 0
te := 0
eof := pe
var cs int // current state
switch {
case quoted:
cs = hclstrtok_en_quoted
default:
cs = hclstrtok_en_unquoted
}
// Make Go compiler happy
_ = ts
_ = eof
/*token := func () {
ret = append(ret, data[ts:te])
}*/
%%{
write init nocs;
write exec;
}%%
if te < p {
// Collect any leftover literal characters at the end of the input
ret = append(ret, data[te:p])
}
// If we fall out here without being in a final state then we've
// encountered something that the scanner can't match, which should
// be impossible (the scanner matches all bytes _somehow_) but we'll
// tolerate it and let the caller deal with it.
if cs < hclstrtok_first_final {
ret = append(ret, data[p:len(data)])
}
return ret
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,395 @@
package hclsyntax
import (
"bytes"
"github.com/hashicorp/hcl/v2"
)
// This file is generated from scan_tokens.rl. DO NOT EDIT.
%%{
# (except when you are actually in scan_tokens.rl here, so edit away!)
machine hcltok;
write data;
}%%
func scanTokens(data []byte, filename string, start hcl.Pos, mode scanMode) []Token {
stripData := stripUTF8BOM(data)
start.Byte += len(data) - len(stripData)
data = stripData
f := &tokenAccum{
Filename: filename,
Bytes: data,
Pos: start,
StartByte: start.Byte,
}
%%{
include UnicodeDerived "unicode_derived.rl";
UTF8Cont = 0x80 .. 0xBF;
AnyUTF8 = (
0x00..0x7F |
0xC0..0xDF . UTF8Cont |
0xE0..0xEF . UTF8Cont . UTF8Cont |
0xF0..0xF7 . UTF8Cont . UTF8Cont . UTF8Cont
);
BrokenUTF8 = any - AnyUTF8;
NumberLitContinue = (digit|'.'|('e'|'E') ('+'|'-')? digit);
NumberLit = digit ("" | (NumberLitContinue - '.') | (NumberLitContinue* (NumberLitContinue - '.')));
Ident = (ID_Start | '_') (ID_Continue | '-')*;
# Symbols that just represent themselves are handled as a single rule.
SelfToken = "[" | "]" | "(" | ")" | "." | "," | "*" | "/" | "%" | "+" | "-" | "=" | "<" | ">" | "!" | "?" | ":" | "\n" | "&" | "|" | "~" | "^" | ";" | "`" | "'";
EqualOp = "==";
NotEqual = "!=";
GreaterThanEqual = ">=";
LessThanEqual = "<=";
LogicalAnd = "&&";
LogicalOr = "||";
Ellipsis = "...";
FatArrow = "=>";
Newline = '\r' ? '\n';
EndOfLine = Newline;
BeginStringTmpl = '"';
BeginHeredocTmpl = '<<' ('-')? Ident Newline;
Comment = (
# The :>> operator in these is a "finish-guarded concatenation",
# which terminates the sequence on its left when it completes
# the sequence on its right.
# In the single-line comment cases this is allowing us to make
# the trailing EndOfLine optional while still having the overall
# pattern terminate. In the multi-line case it ensures that
# the first comment in the file ends at the first */, rather than
# gobbling up all of the "any*" until the _final_ */ in the file.
("#" (any - EndOfLine)* :>> EndOfLine?) |
("//" (any - EndOfLine)* :>> EndOfLine?) |
("/*" any* :>> "*/")
);
# Note: hclwrite assumes that only ASCII spaces appear between tokens,
# and uses this assumption to recreate the spaces between tokens by
# looking at byte offset differences. This means it will produce
# incorrect results in the presence of tabs, but that's acceptable
# because the canonical style (which hclwrite itself can impose
# automatically is to never use tabs).
Spaces = (' ' | 0x09)+;
action beginStringTemplate {
token(TokenOQuote);
fcall stringTemplate;
}
action endStringTemplate {
token(TokenCQuote);
fret;
}
action beginHeredocTemplate {
token(TokenOHeredoc);
// the token is currently the whole heredoc introducer, like
// <<EOT or <<-EOT, followed by a newline. We want to extract
// just the "EOT" portion that we'll use as the closing marker.
marker := data[ts+2:te-1]
if marker[0] == '-' {
marker = marker[1:]
}
if marker[len(marker)-1] == '\r' {
marker = marker[:len(marker)-1]
}
heredocs = append(heredocs, heredocInProgress{
Marker: marker,
StartOfLine: true,
})
fcall heredocTemplate;
}
action heredocLiteralEOL {
// This action is called specificially when a heredoc literal
// ends with a newline character.
// This might actually be our end marker.
topdoc := &heredocs[len(heredocs)-1]
if topdoc.StartOfLine {
maybeMarker := bytes.TrimSpace(data[ts:te])
if bytes.Equal(maybeMarker, topdoc.Marker) {
// We actually emit two tokens here: the end-of-heredoc
// marker first, and then separately the newline that
// follows it. This then avoids issues with the closing
// marker consuming a newline that would normally be used
// to mark the end of an attribute definition.
// We might have either a \n sequence or an \r\n sequence
// here, so we must handle both.
nls := te-1
nle := te
te--
if data[te-1] == '\r' {
// back up one more byte
nls--
te--
}
token(TokenCHeredoc);
ts = nls
te = nle
token(TokenNewline);
heredocs = heredocs[:len(heredocs)-1]
fret;
}
}
topdoc.StartOfLine = true;
token(TokenStringLit);
}
action heredocLiteralMidline {
// This action is called when a heredoc literal _doesn't_ end
// with a newline character, e.g. because we're about to enter
// an interpolation sequence.
heredocs[len(heredocs)-1].StartOfLine = false;
token(TokenStringLit);
}
action bareTemplateLiteral {
token(TokenStringLit);
}
action beginTemplateInterp {
token(TokenTemplateInterp);
braces++;
retBraces = append(retBraces, braces);
if len(heredocs) > 0 {
heredocs[len(heredocs)-1].StartOfLine = false;
}
fcall main;
}
action beginTemplateControl {
token(TokenTemplateControl);
braces++;
retBraces = append(retBraces, braces);
if len(heredocs) > 0 {
heredocs[len(heredocs)-1].StartOfLine = false;
}
fcall main;
}
action openBrace {
token(TokenOBrace);
braces++;
}
action closeBrace {
if len(retBraces) > 0 && retBraces[len(retBraces)-1] == braces {
token(TokenTemplateSeqEnd);
braces--;
retBraces = retBraces[0:len(retBraces)-1]
fret;
} else {
token(TokenCBrace);
braces--;
}
}
action closeTemplateSeqEatWhitespace {
// Only consume from the retBraces stack and return if we are at
// a suitable brace nesting level, otherwise things will get
// confused. (Not entering this branch indicates a syntax error,
// which we will catch in the parser.)
if len(retBraces) > 0 && retBraces[len(retBraces)-1] == braces {
token(TokenTemplateSeqEnd);
braces--;
retBraces = retBraces[0:len(retBraces)-1]
fret;
} else {
// We intentionally generate a TokenTemplateSeqEnd here,
// even though the user apparently wanted a brace, because
// we want to allow the parser to catch the incorrect use
// of a ~} to balance a generic opening brace, rather than
// a template sequence.
token(TokenTemplateSeqEnd);
braces--;
}
}
TemplateInterp = "${" ("~")?;
TemplateControl = "%{" ("~")?;
EndStringTmpl = '"';
NewlineChars = ("\r"|"\n");
NewlineCharsSeq = NewlineChars+;
StringLiteralChars = (AnyUTF8 - NewlineChars);
TemplateIgnoredNonBrace = (^'{' %{ fhold; });
TemplateNotInterp = '$' (TemplateIgnoredNonBrace | TemplateInterp);
TemplateNotControl = '%' (TemplateIgnoredNonBrace | TemplateControl);
QuotedStringLiteralWithEsc = ('\\' StringLiteralChars) | (StringLiteralChars - ("$" | '%' | '"' | "\\"));
TemplateStringLiteral = (
(TemplateNotInterp) |
(TemplateNotControl) |
(QuotedStringLiteralWithEsc)+
);
HeredocStringLiteral = (
(TemplateNotInterp) |
(TemplateNotControl) |
(StringLiteralChars - ("$" | '%'))*
);
BareStringLiteral = (
(TemplateNotInterp) |
(TemplateNotControl) |
(StringLiteralChars - ("$" | '%'))*
) Newline?;
stringTemplate := |*
TemplateInterp => beginTemplateInterp;
TemplateControl => beginTemplateControl;
EndStringTmpl => endStringTemplate;
TemplateStringLiteral => { token(TokenQuotedLit); };
NewlineCharsSeq => { token(TokenQuotedNewline); };
AnyUTF8 => { token(TokenInvalid); };
BrokenUTF8 => { token(TokenBadUTF8); };
*|;
heredocTemplate := |*
TemplateInterp => beginTemplateInterp;
TemplateControl => beginTemplateControl;
HeredocStringLiteral EndOfLine => heredocLiteralEOL;
HeredocStringLiteral => heredocLiteralMidline;
BrokenUTF8 => { token(TokenBadUTF8); };
*|;
bareTemplate := |*
TemplateInterp => beginTemplateInterp;
TemplateControl => beginTemplateControl;
BareStringLiteral => bareTemplateLiteral;
BrokenUTF8 => { token(TokenBadUTF8); };
*|;
identOnly := |*
Ident => { token(TokenIdent) };
BrokenUTF8 => { token(TokenBadUTF8) };
AnyUTF8 => { token(TokenInvalid) };
*|;
main := |*
Spaces => {};
NumberLit => { token(TokenNumberLit) };
Ident => { token(TokenIdent) };
Comment => { token(TokenComment) };
Newline => { token(TokenNewline) };
EqualOp => { token(TokenEqualOp); };
NotEqual => { token(TokenNotEqual); };
GreaterThanEqual => { token(TokenGreaterThanEq); };
LessThanEqual => { token(TokenLessThanEq); };
LogicalAnd => { token(TokenAnd); };
LogicalOr => { token(TokenOr); };
Ellipsis => { token(TokenEllipsis); };
FatArrow => { token(TokenFatArrow); };
SelfToken => { selfToken() };
"{" => openBrace;
"}" => closeBrace;
"~}" => closeTemplateSeqEatWhitespace;
BeginStringTmpl => beginStringTemplate;
BeginHeredocTmpl => beginHeredocTemplate;
BrokenUTF8 => { token(TokenBadUTF8) };
AnyUTF8 => { token(TokenInvalid) };
*|;
}%%
// Ragel state
p := 0 // "Pointer" into data
pe := len(data) // End-of-data "pointer"
ts := 0
te := 0
act := 0
eof := pe
var stack []int
var top int
var cs int // current state
switch mode {
case scanNormal:
cs = hcltok_en_main
case scanTemplate:
cs = hcltok_en_bareTemplate
case scanIdentOnly:
cs = hcltok_en_identOnly
default:
panic("invalid scanMode")
}
braces := 0
var retBraces []int // stack of brace levels that cause us to use fret
var heredocs []heredocInProgress // stack of heredocs we're currently processing
%%{
prepush {
stack = append(stack, 0);
}
postpop {
stack = stack[:len(stack)-1];
}
}%%
// Make Go compiler happy
_ = ts
_ = te
_ = act
_ = eof
token := func (ty TokenType) {
f.emitToken(ty, ts, te)
}
selfToken := func () {
b := data[ts:te]
if len(b) != 1 {
// should never happen
panic("selfToken only works for single-character tokens")
}
f.emitToken(TokenType(b[0]), ts, te)
}
%%{
write init nocs;
write exec;
}%%
// If we fall out here without being in a final state then we've
// encountered something that the scanner can't match, which we'll
// deal with as an invalid.
if cs < hcltok_first_final {
if mode == scanTemplate && len(stack) == 0 {
// If we're scanning a bare template then any straggling
// top-level stuff is actually literal string, rather than
// invalid. This handles the case where the template ends
// with a single "$" or "%", which trips us up because we
// want to see another character to decide if it's a sequence
// or an escape.
f.emitToken(TokenStringLit, ts, len(data))
} else {
f.emitToken(TokenInvalid, ts, len(data))
}
}
// We always emit a synthetic EOF token at the end, since it gives the
// parser position information for an "unexpected EOF" diagnostic.
f.emitToken(TokenEOF, len(data), len(data))
return f.Tokens
}

@ -0,0 +1,941 @@
# HCL Native Syntax Specification
This is the specification of the syntax and semantics of the native syntax
for HCL. HCL is a system for defining configuration languages for applications.
The HCL information model is designed to support multiple concrete syntaxes
for configuration, but this native syntax is considered the primary format
and is optimized for human authoring and maintenance, as opposed to machine
generation of configuration.
The language consists of three integrated sub-languages:
- The _structural_ language defines the overall hierarchical configuration
structure, and is a serialization of HCL bodies, blocks and attributes.
- The _expression_ language is used to express attribute values, either as
literals or as derivations of other values.
- The _template_ language is used to compose values together into strings,
as one of several types of expression in the expression language.
In normal use these three sub-languages are used together within configuration
files to describe an overall configuration, with the structural language
being used at the top level. The expression and template languages can also
be used in isolation, to implement features such as REPLs, debuggers, and
integration into more limited HCL syntaxes such as the JSON profile.
## Syntax Notation
Within this specification a semi-formal notation is used to illustrate the
details of syntax. This notation is intended for human consumption rather
than machine consumption, with the following conventions:
- A naked name starting with an uppercase letter is a global production,
common to all of the syntax specifications in this document.
- A naked name starting with a lowercase letter is a local production,
meaningful only within the specification where it is defined.
- Double and single quotes (`"` and `'`) are used to mark literal character
sequences, which may be either punctuation markers or keywords.
- The default operator for combining items, which has no punctuation,
is concatenation.
- The symbol `|` indicates that any one of its left and right operands may
be present.
- The `*` symbol indicates zero or more repetitions of the item to its left.
- The `?` symbol indicates zero or one of the item to its left.
- Parentheses (`(` and `)`) are used to group items together to apply
the `|`, `*` and `?` operators to them collectively.
The grammar notation does not fully describe the language. The prose may
augment or conflict with the illustrated grammar. In case of conflict, prose
has priority.
## Source Code Representation
Source code is unicode text expressed in the UTF-8 encoding. The language
itself does not perform unicode normalization, so syntax features such as
identifiers are sequences of unicode code points and so e.g. a precombined
accented character is distinct from a letter associated with a combining
accent. (String literals have some special handling with regard to Unicode
normalization which will be covered later in the relevant section.)
UTF-8 encoded Unicode byte order marks are not permitted. Invalid or
non-normalized UTF-8 encoding is always a parse error.
## Lexical Elements
### Comments and Whitespace
Comments and Whitespace are recognized as lexical elements but are ignored
except as described below.
Whitespace is defined as a sequence of zero or more space characters
(U+0020). Newline sequences (either U+000A or U+000D followed by U+000A)
are _not_ considered whitespace but are ignored as such in certain contexts.
Horizontal tab characters (U+0009) are also treated as whitespace, but are
counted only as one "column" for the purpose of reporting source positions.
Comments serve as program documentation and come in two forms:
- _Line comments_ start with either the `//` or `#` sequences and end with
the next newline sequence. A line comment is considered equivalent to a
newline sequence.
- _Inline comments_ start with the `/*` sequence and end with the `*/`
sequence, and may have any characters within except the ending sequence.
An inline comments is considered equivalent to a whitespace sequence.
Comments and whitespace cannot begin within within other comments, or within
template literals except inside an interpolation sequence or template directive.
### Identifiers
Identifiers name entities such as blocks, attributes and expression variables.
Identifiers are interpreted as per [UAX #31][uax31] Section 2. Specifically,
their syntax is defined in terms of the `ID_Start` and `ID_Continue`
character properties as follows:
```ebnf
Identifier = ID_Start (ID_Continue | '-')*;
```
The Unicode specification provides the normative requirements for identifier
parsing. Non-normatively, the spirit of this specification is that `ID_Start`
consists of Unicode letter and certain unambiguous punctuation tokens, while
`ID_Continue` augments that set with Unicode digits, combining marks, etc.
The dash character `-` is additionally allowed in identifiers, even though
that is not part of the unicode `ID_Continue` definition. This is to allow
attribute names and block type names to contain dashes, although underscores
as word separators are considered the idiomatic usage.
[uax31]: http://unicode.org/reports/tr31/ "Unicode Identifier and Pattern Syntax"
### Keywords
There are no globally-reserved words, but in some contexts certain identifiers
are reserved to function as keywords. These are discussed further in the
relevant documentation sections that follow. In such situations, the
identifier's role as a keyword supersedes any other valid interpretation that
may be possible. Outside of these specific situations, the keywords have no
special meaning and are interpreted as regular identifiers.
### Operators and Delimiters
The following character sequences represent operators, delimiters, and other
special tokens:
```
+ && == < : { [ ( ${
- || != > ? } ] ) %{
* ! <= = .
/ >= => ,
% ...
```
### Numeric Literals
A numeric literal is a decimal representation of a
real number. It has an integer part, a fractional part,
and an exponent part.
```ebnf
NumericLit = decimal+ ("." decimal+)? (expmark decimal+)?;
decimal = '0' .. '9';
expmark = ('e' | 'E') ("+" | "-")?;
```
## Structural Elements
The structural language consists of syntax representing the following
constructs:
- _Attributes_, which assign a value to a specified name.
- _Blocks_, which create a child body annotated by a type and optional labels.
- _Body Content_, which consists of a collection of attributes and blocks.
These constructs correspond to the similarly-named concepts in the
language-agnostic HCL information model.
```ebnf
ConfigFile = Body;
Body = (Attribute | Block | OneLineBlock)*;
Attribute = Identifier "=" Expression Newline;
Block = Identifier (StringLit|Identifier)* "{" Newline Body "}" Newline;
OneLineBlock = Identifier (StringLit|Identifier)* "{" (Identifier "=" Expression)? "}" Newline;
```
### Configuration Files
A _configuration file_ is a sequence of characters whose top-level is
interpreted as a Body.
### Bodies
A _body_ is a collection of associated attributes and blocks. The meaning of
this association is defined by the calling application.
### Attribute Definitions
An _attribute definition_ assigns a value to a particular attribute name within
a body. Each distinct attribute name may be defined no more than once within a
single body.
The attribute value is given as an expression, which is retained literally
for later evaluation by the calling application.
### Blocks
A _block_ creates a child body that is annotated with a block _type_ and
zero or more block _labels_. Blocks create a structural hierarchy which can be
interpreted by the calling application.
Block labels can either be quoted literal strings or naked identifiers.
## Expressions
The expression sub-language is used within attribute definitions to specify
values.
```ebnf
Expression = (
ExprTerm |
Operation |
Conditional
);
```
### Types
The value types used within the expression language are those defined by the
syntax-agnostic HCL information model. An expression may return any valid
type, but only a subset of the available types have first-class syntax.
A calling application may make other types available via _variables_ and
_functions_.
### Expression Terms
Expression _terms_ are the operands for unary and binary expressions, as well
as acting as expressions in their own right.
```ebnf
ExprTerm = (
LiteralValue |
CollectionValue |
TemplateExpr |
VariableExpr |
FunctionCall |
ForExpr |
ExprTerm Index |
ExprTerm GetAttr |
ExprTerm Splat |
"(" Expression ")"
);
```
The productions for these different term types are given in their corresponding
sections.
Between the `(` and `)` characters denoting a sub-expression, newline
characters are ignored as whitespace.
### Literal Values
A _literal value_ immediately represents a particular value of a primitive
type.
```ebnf
LiteralValue = (
NumericLit |
"true" |
"false" |
"null"
);
```
- Numeric literals represent values of type _number_.
- The `true` and `false` keywords represent values of type _bool_.
- The `null` keyword represents a null value of the dynamic pseudo-type.
String literals are not directly available in the expression sub-language, but
are available via the template sub-language, which can in turn be incorporated
via _template expressions_.
### Collection Values
A _collection value_ combines zero or more other expressions to produce a
collection value.
```ebnf
CollectionValue = tuple | object;
tuple = "[" (
(Expression ("," Expression)* ","?)?
) "]";
object = "{" (
(objectelem ("," objectelem)* ","?)?
) "}";
objectelem = (Identifier | Expression) ("=" | ":") Expression;
```
Only tuple and object values can be directly constructed via native syntax.
Tuple and object values can in turn be converted to list, set and map values
with other operations, which behaves as defined by the syntax-agnostic HCL
information model.
When specifying an object element, an identifier is interpreted as a literal
attribute name as opposed to a variable reference. To populate an item key
from a variable, use parentheses to disambiguate:
- `{foo = "baz"}` is interpreted as an attribute literally named `foo`.
- `{(foo) = "baz"}` is interpreted as an attribute whose name is taken
from the variable named `foo`.
Between the open and closing delimiters of these sequences, newline sequences
are ignored as whitespace.
There is a syntax ambiguity between _for expressions_ and collection values
whose first element is a reference to a variable named `for`. The
_for expression_ interpretation has priority, so to produce a tuple whose
first element is the value of a variable named `for`, or an object with a
key named `for`, use parentheses to disambiguate:
- `[for, foo, baz]` is a syntax error.
- `[(for), foo, baz]` is a tuple whose first element is the value of variable
`for`.
- `{for: 1, baz: 2}` is a syntax error.
- `{(for): 1, baz: 2}` is an object with an attribute literally named `for`.
- `{baz: 2, for: 1}` is equivalent to the previous example, and resolves the
ambiguity by reordering.
### Template Expressions
A _template expression_ embeds a program written in the template sub-language
as an expression. Template expressions come in two forms:
- A _quoted_ template expression is delimited by quote characters (`"`) and
defines a template as a single-line expression with escape characters.
- A _heredoc_ template expression is introduced by a `<<` sequence and
defines a template via a multi-line sequence terminated by a user-chosen
delimiter.
In both cases the template interpolation and directive syntax is available for
use within the delimiters, and any text outside of these special sequences is
interpreted as a literal string.
In _quoted_ template expressions any literal string sequences within the
template behave in a special way: literal newline sequences are not permitted
and instead _escape sequences_ can be included, starting with the
backslash `\`:
```
\n Unicode newline control character
\r Unicode carriage return control character
\t Unicode tab control character
\" Literal quote mark, used to prevent interpretation as end of string
\\ Literal backslash, used to prevent interpretation as escape sequence
\uNNNN Unicode character from Basic Multilingual Plane (NNNN is four hexadecimal digits)
\UNNNNNNNN Unicode character from supplementary planes (NNNNNNNN is eight hexadecimal digits)
```
The _heredoc_ template expression type is introduced by either `<<` or `<<-`,
followed by an identifier. The template expression ends when the given
identifier subsequently appears again on a line of its own.
If a heredoc template is introduced with the `<<-` symbol, any literal string
at the start of each line is analyzed to find the minimum number of leading
spaces, and then that number of prefix spaces is removed from all line-leading
literal strings. The final closing marker may also have an arbitrary number
of spaces preceding it on its line.
```ebnf
TemplateExpr = quotedTemplate | heredocTemplate;
quotedTemplate = (as defined in prose above);
heredocTemplate = (
("<<" | "<<-") Identifier Newline
(content as defined in prose above)
Identifier Newline
);
```
A quoted template expression containing only a single literal string serves
as a syntax for defining literal string _expressions_. In certain contexts
the template syntax is restricted in this manner:
```ebnf
StringLit = '"' (quoted literals as defined in prose above) '"';
```
The `StringLit` production permits the escape sequences discussed for quoted
template expressions as above, but does _not_ permit template interpolation
or directive sequences.
### Variables and Variable Expressions
A _variable_ is a value that has been assigned a symbolic name. Variables are
made available for use in expressions by the calling application, by populating
the _global scope_ used for expression evaluation.
Variables can also be created by expressions themselves, which always creates
a _child scope_ that incorporates the variables from its parent scope but
(re-)defines zero or more names with new values.
The value of a variable is accessed using a _variable expression_, which is
a standalone `Identifier` whose name corresponds to a defined variable:
```ebnf
VariableExpr = Identifier;
```
Variables in a particular scope are immutable, but child scopes may _hide_
a variable from an ancestor scope by defining a new variable of the same name.
When looking up variables, the most locally-defined variable of the given name
is used, and ancestor-scoped variables of the same name cannot be accessed.
No direct syntax is provided for declaring or assigning variables, but other
expression constructs implicitly create child scopes and define variables as
part of their evaluation.
### Functions and Function Calls
A _function_ is an operation that has been assigned a symbolic name. Functions
are made available for use in expressions by the calling application, by
populating the _function table_ used for expression evaluation.
The namespace of functions is distinct from the namespace of variables. A
function and a variable may share the same name with no implication that they
are in any way related.
A function can be executed via a _function call_ expression:
```ebnf
FunctionCall = Identifier "(" arguments ")";
Arguments = (
() ||
(Expression ("," Expression)* ("," | "...")?)
);
```
The definition of functions and the semantics of calling them are defined by
the language-agnostic HCL information model. The given arguments are mapped
onto the function's _parameters_ and the result of a function call expression
is the return value of the named function when given those arguments.
If the final argument expression is followed by the ellipsis symbol (`...`),
the final argument expression must evaluate to either a list or tuple value.
The elements of the value are each mapped to a single parameter of the
named function, beginning at the first parameter remaining after all other
argument expressions have been mapped.
Within the parentheses that delimit the function arguments, newline sequences
are ignored as whitespace.
### For Expressions
A _for expression_ is a construct for constructing a collection by projecting
the items from another collection.
```ebnf
ForExpr = forTupleExpr | forObjectExpr;
forTupleExpr = "[" forIntro Expression forCond? "]";
forObjectExpr = "{" forIntro Expression "=>" Expression "..."? forCond? "}";
forIntro = "for" Identifier ("," Identifier)? "in" Expression ":";
forCond = "if" Expression;
```
The punctuation used to delimit a for expression decide whether it will produce
a tuple value (`[` and `]`) or an object value (`{` and `}`).
The "introduction" is equivalent in both cases: the keyword `for` followed by
either one or two identifiers separated by a comma which define the temporary
variable names used for iteration, followed by the keyword `in` and then
an expression that must evaluate to a value that can be iterated. The
introduction is then terminated by the colon (`:`) symbol.
If only one identifier is provided, it is the name of a variable that will
be temporarily assigned the value of each element during iteration. If both
are provided, the first is the key and the second is the value.
Tuple, object, list, map, and set types are iterable. The type of collection
used defines how the key and value variables are populated:
- For tuple and list types, the _key_ is the zero-based index into the
sequence for each element, and the _value_ is the element value. The
elements are visited in index order.
- For object and map types, the _key_ is the string attribute name or element
key, and the _value_ is the attribute or element value. The elements are
visited in the order defined by a lexicographic sort of the attribute names
or keys.
- For set types, the _key_ and _value_ are both the element value. The elements
are visited in an undefined but consistent order.
The expression after the colon and (in the case of object `for`) the expression
after the `=>` are both evaluated once for each element of the source
collection, in a local scope that defines the key and value variable names
specified.
The results of evaluating these expressions for each input element are used
to populate an element in the new collection. In the case of tuple `for`, the
single expression becomes an element, appending values to the tuple in visit
order. In the case of object `for`, the pair of expressions is used as an
attribute name and value respectively, creating an element in the resulting
object.
In the case of object `for`, it is an error if two input elements produce
the same result from the attribute name expression, since duplicate
attributes are not possible. If the ellipsis symbol (`...`) appears
immediately after the value expression, this activates the grouping mode in
which each value in the resulting object is a _tuple_ of all of the values
that were produced against each distinct key.
- `[for v in ["a", "b"]: v]` returns `["a", "b"]`.
- `[for i, v in ["a", "b"]: i]` returns `[0, 1]`.
- `{for i, v in ["a", "b"]: v => i}` returns `{a = 0, b = 1}`.
- `{for i, v in ["a", "a", "b"]: k => v}` produces an error, because attribute
`a` is defined twice.
- `{for i, v in ["a", "a", "b"]: v => i...}` returns `{a = [0, 1], b = [2]}`.
If the `if` keyword is used after the element expression(s), it applies an
additional predicate that can be used to conditionally filter elements from
the source collection from consideration. The expression following `if` is
evaluated once for each source element, in the same scope used for the
element expression(s). It must evaluate to a boolean value; if `true`, the
element will be evaluated as normal, while if `false` the element will be
skipped.
- `[for i, v in ["a", "b", "c"]: v if i < 2]` returns `["a", "b"]`.
If the collection value, element expression(s) or condition expression return
unknown values that are otherwise type-valid, the result is a value of the
dynamic pseudo-type.
### Index Operator
The _index_ operator returns the value of a single element of a collection
value. It is a postfix operator and can be applied to any value that has
a tuple, object, map, or list type.
```ebnf
Index = "[" Expression "]";
```
The expression delimited by the brackets is the _key_ by which an element
will be looked up.
If the index operator is applied to a value of tuple or list type, the
key expression must be an non-negative integer number representing the
zero-based element index to access. If applied to a value of object or map
type, the key expression must be a string representing the attribute name
or element key. If the given key value is not of the appropriate type, a
conversion is attempted using the conversion rules from the HCL
syntax-agnostic information model.
An error is produced if the given key expression does not correspond to
an element in the collection, either because it is of an unconvertable type,
because it is outside the range of elements for a tuple or list, or because
the given attribute or key does not exist.
If either the collection or the key are an unknown value of an
otherwise-suitable type, the return value is an unknown value whose type
matches what type would be returned given known values, or a value of the
dynamic pseudo-type if type information alone cannot determine a suitable
return type.
Within the brackets that delimit the index key, newline sequences are ignored
as whitespace.
The HCL native syntax also includes a _legacy_ index operator that exists
only for compatibility with the precursor language HIL:
```ebnf
LegacyIndex = '.' digit+
```
This legacy index operator must be supported by parser for compatibility but
should not be used in new configurations. This allows an attribute-access-like
syntax for indexing, must still be interpreted as an index operation rather
than attribute access.
The legacy syntax does not support chaining of index operations, like
`foo.0.0.bar`, because the interpretation of `0.0` as a number literal token
takes priority and thus renders the resulting sequence invalid.
### Attribute Access Operator
The _attribute access_ operator returns the value of a single attribute in
an object value. It is a postfix operator and can be applied to any value
that has an object type.
```ebnf
GetAttr = "." Identifier;
```
The given identifier is interpreted as the name of the attribute to access.
An error is produced if the object to which the operator is applied does not
have an attribute with the given name.
If the object is an unknown value of a type that has the attribute named, the
result is an unknown value of the attribute's type.
### Splat Operators
The _splat operators_ allow convenient access to attributes or elements of
elements in a tuple, list, or set value.
There are two kinds of "splat" operator:
- The _attribute-only_ splat operator supports only attribute lookups into
the elements from a list, but supports an arbitrary number of them.
- The _full_ splat operator additionally supports indexing into the elements
from a list, and allows any combination of attribute access and index
operations.
```ebnf
Splat = attrSplat | fullSplat;
attrSplat = "." "*" GetAttr*;
fullSplat = "[" "*" "]" (GetAttr | Index)*;
```
The splat operators can be thought of as shorthands for common operations that
could otherwise be performed using _for expressions_:
- `tuple.*.foo.bar[0]` is approximately equivalent to
`[for v in tuple: v.foo.bar][0]`.
- `tuple[*].foo.bar[0]` is approximately equivalent to
`[for v in tuple: v.foo.bar[0]]`
Note the difference in how the trailing index operator is interpreted in
each case. This different interpretation is the key difference between the
_attribute-only_ and _full_ splat operators.
Splat operators have one additional behavior compared to the equivalent
_for expressions_ shown above: if a splat operator is applied to a value that
is _not_ of tuple, list, or set type, the value is coerced automatically into
a single-value list of the value type:
- `any_object.*.id` is equivalent to `[any_object.id]`, assuming that `any_object`
is a single object.
- `any_number.*` is equivalent to `[any_number]`, assuming that `any_number`
is a single number.
If applied to a null value that is not tuple, list, or set, the result is always
an empty tuple, which allows conveniently converting a possibly-null scalar
value into a tuple of zero or one elements. It is illegal to apply a splat
operator to a null value of tuple, list, or set type.
### Operations
Operations apply a particular operator to either one or two expression terms.
```ebnf
Operation = unaryOp | binaryOp;
unaryOp = ("-" | "!") ExprTerm;
binaryOp = ExprTerm binaryOperator ExprTerm;
binaryOperator = compareOperator | arithmeticOperator | logicOperator;
compareOperator = "==" | "!=" | "<" | ">" | "<=" | ">=";
arithmeticOperator = "+" | "-" | "*" | "/" | "%";
logicOperator = "&&" | "||" | "!";
```
The unary operators have the highest precedence.
The binary operators are grouped into the following precedence levels:
```
Level Operators
6 * / %
5 + -
4 > >= < <=
3 == !=
2 &&
1 ||
```
Higher values of "level" bind tighter. Operators within the same precedence
level have left-to-right associativity. For example, `x / y * z` is equivalent
to `(x / y) * z`.
### Comparison Operators
Comparison operators always produce boolean values, as a result of testing
the relationship between two values.
The two equality operators apply to values of any type:
```
a == b equal
a != b not equal
```
Two values are equal if the are of identical types and their values are
equal as defined in the HCL syntax-agnostic information model. The equality
operators are commutative and opposite, such that `(a == b) == !(a != b)`
and `(a == b) == (b == a)` for all values `a` and `b`.
The four numeric comparison operators apply only to numbers:
```
a < b less than
a <= b less than or equal to
a > b greater than
a >= b greater than or equal to
```
If either operand of a comparison operator is a correctly-typed unknown value
or a value of the dynamic pseudo-type, the result is an unknown boolean.
### Arithmetic Operators
Arithmetic operators apply only to number values and always produce number
values as results.
```
a + b sum (addition)
a - b difference (subtraction)
a * b product (multiplication)
a / b quotient (division)
a % b remainder (modulo)
-a negation
```
Arithmetic operations are considered to be performed in an arbitrary-precision
number space.
If either operand of an arithmetic operator is an unknown number or a value
of the dynamic pseudo-type, the result is an unknown number.
### Logic Operators
Logic operators apply only to boolean values and always produce boolean values
as results.
```
a && b logical AND
a || b logical OR
!a logical NOT
```
If either operand of a logic operator is an unknown bool value or a value
of the dynamic pseudo-type, the result is an unknown bool value.
### Conditional Operator
The conditional operator allows selecting from one of two expressions based on
the outcome of a boolean expression.
```ebnf
Conditional = Expression "?" Expression ":" Expression;
```
The first expression is the _predicate_, which is evaluated and must produce
a boolean result. If the predicate value is `true`, the result of the second
expression is the result of the conditional. If the predicate value is
`false`, the result of the third expression is the result of the conditional.
The second and third expressions must be of the same type or must be able to
unify into a common type using the type unification rules defined in the
HCL syntax-agnostic information model. This unified type is the result type
of the conditional, with both expressions converted as necessary to the
unified type.
If the predicate is an unknown boolean value or a value of the dynamic
pseudo-type then the result is an unknown value of the unified type of the
other two expressions.
If either the second or third expressions produce errors when evaluated,
these errors are passed through only if the erroneous expression is selected.
This allows for expressions such as
`length(some_list) > 0 ? some_list[0] : default` (given some suitable `length`
function) without producing an error when the predicate is `false`.
## Templates
The template sub-language is used within template expressions to concisely
combine strings and other values to produce other strings. It can also be
used in isolation as a standalone template language.
```ebnf
Template = (
TemplateLiteral |
TemplateInterpolation |
TemplateDirective
)*
TemplateDirective = TemplateIf | TemplateFor;
```
A template behaves like an expression that always returns a string value.
The different elements of the template are evaluated and combined into a
single string to return. If any of the elements produce an unknown string
or a value of the dynamic pseudo-type, the result is an unknown string.
An important use-case for standalone templates is to enable the use of
expressions in alternative HCL syntaxes where a native expression grammar is
not available. For example, the HCL JSON profile treats the values of JSON
strings as standalone templates when attributes are evaluated in expression
mode.
### Template Literals
A template literal is a literal sequence of characters to include in the
resulting string. When the template sub-language is used standalone, a
template literal can contain any unicode character, with the exception
of the sequences that introduce interpolations and directives, and for the
sequences that escape those introductions.
The interpolation and directive introductions are escaped by doubling their
leading characters. The `${` sequence is escaped as `$${` and the `%{`
sequence is escaped as `%%{`.
When the template sub-language is embedded in the expression language via
_template expressions_, additional constraints and transforms are applied to
template literals as described in the definition of template expressions.
The value of a template literal can be modified by _strip markers_ in any
interpolations or directives that are adjacent to it. A strip marker is
a tilde (`~`) placed immediately after the opening `{` or before the closing
`}` of a template sequence:
- `hello ${~ "world" }` produces `"helloworld"`.
- `%{ if true ~} hello %{~ endif }` produces `"hello"`.
When a strip marker is present, any spaces adjacent to it in the corresponding
string literal (if any) are removed before producing the final value. Space
characters are interpreted as per Unicode's definition.
Stripping is done at syntax level rather than value level. Values returned
by interpolations or directives are not subject to stripping:
- `${"hello" ~}${" world"}` produces `"hello world"`, and not `"helloworld"`,
because the space is not in a template literal directly adjacent to the
strip marker.
### Template Interpolations
An _interpolation sequence_ evaluates an expression (written in the
expression sub-language), converts the result to a string value, and
replaces itself with the resulting string.
```ebnf
TemplateInterpolation = ("${" | "${~") Expression ("}" | "~}";
```
If the expression result cannot be converted to a string, an error is
produced.
### Template If Directive
The template `if` directive is the template equivalent of the
_conditional expression_, allowing selection of one of two sub-templates based
on the value of a predicate expression.
```ebnf
TemplateIf = (
("%{" | "%{~") "if" Expression ("}" | "~}")
Template
(
("%{" | "%{~") "else" ("}" | "~}")
Template
)?
("%{" | "%{~") "endif" ("}" | "~}")
);
```
The evaluation of the `if` directive is equivalent to the conditional
expression, with the following exceptions:
- The two sub-templates always produce strings, and thus the result value is
also always a string.
- The `else` clause may be omitted, in which case the conditional's third
expression result is implied to be the empty string.
### Template For Directive
The template `for` directive is the template equivalent of the _for expression_,
producing zero or more copies of its sub-template based on the elements of
a collection.
```ebnf
TemplateFor = (
("%{" | "%{~") "for" Identifier ("," Identifier) "in" Expression ("}" | "~}")
Template
("%{" | "%{~") "endfor" ("}" | "~}")
);
```
The evaluation of the `for` directive is equivalent to the _for expression_
when producing a tuple, with the following exceptions:
- The sub-template always produces a string.
- There is no equivalent of the "if" clause on the for expression.
- The elements of the resulting tuple are all converted to strings and
concatenated to produce a flat string result.
### Template Interpolation Unwrapping
As a special case, a template that consists only of a single interpolation,
with no surrounding literals, directives or other interpolations, is
"unwrapped". In this case, the result of the interpolation expression is
returned verbatim, without conversion to string.
This special case exists primarily to enable the native template language
to be used inside strings in alternative HCL syntaxes that lack a first-class
template or expression syntax. Unwrapping allows arbitrary expressions to be
used to populate attributes when strings in such languages are interpreted
as templates.
- `${true}` produces the boolean value `true`
- `${"${true}"}` produces the boolean value `true`, because both the inner
and outer interpolations are subject to unwrapping.
- `hello ${true}` produces the string `"hello true"`
- `${""}${true}` produces the string `"true"` because there are two
interpolation sequences, even though one produces an empty result.
- `%{ for v in [true] }${v}%{ endif }` produces the string `true` because
the presence of the `for` directive circumvents the unwrapping even though
the final result is a single value.
In some contexts this unwrapping behavior may be circumvented by the calling
application, by converting the final template result to string. This is
necessary, for example, if a standalone template is being used to produce
the direct contents of a file, since the result in that case must always be a
string.
## Static Analysis
The HCL static analysis operations are implemented for some expression types
in the native syntax, as described in the following sections.
A goal for static analysis of the native syntax is for the interpretation to
be as consistent as possible with the dynamic evaluation interpretation of
the given expression, though some deviations are intentionally made in order
to maximize the potential for analysis.
### Static List
The tuple construction syntax can be interpreted as a static list. All of
the expression elements given are returned as the static list elements,
with no further interpretation.
### Static Map
The object construction syntax can be interpreted as a static map. All of the
key/value pairs given are returned as the static pairs, with no further
interpretation.
The usual requirement that an attribute name be interpretable as a string
does not apply to this static analysis, allowing callers to provide map-like
constructs with different key types by building on the map syntax.
### Static Call
The function call syntax can be interpreted as a static call. The called
function name is returned verbatim and the given argument expressions are
returned as the static arguments, with no further interpretation.
### Static Traversal
A variable expression and any attached attribute access operations and
constant index operations can be interpreted as a static traversal.
The keywords `true`, `false` and `null` can also be interpreted as
static traversals, behaving as if they were references to variables of those
names, to allow callers to redefine the meaning of those keywords in certain
contexts.

@ -0,0 +1,394 @@
package hclsyntax
import (
"fmt"
"strings"
"github.com/hashicorp/hcl/v2"
)
// AsHCLBlock returns the block data expressed as a *hcl.Block.
func (b *Block) AsHCLBlock() *hcl.Block {
if b == nil {
return nil
}
lastHeaderRange := b.TypeRange
if len(b.LabelRanges) > 0 {
lastHeaderRange = b.LabelRanges[len(b.LabelRanges)-1]
}
return &hcl.Block{
Type: b.Type,
Labels: b.Labels,
Body: b.Body,
DefRange: hcl.RangeBetween(b.TypeRange, lastHeaderRange),
TypeRange: b.TypeRange,
LabelRanges: b.LabelRanges,
}
}
// Body is the implementation of hcl.Body for the HCL native syntax.
type Body struct {
Attributes Attributes
Blocks Blocks
// These are used with PartialContent to produce a "remaining items"
// body to return. They are nil on all bodies fresh out of the parser.
hiddenAttrs map[string]struct{}
hiddenBlocks map[string]struct{}
SrcRange hcl.Range
EndRange hcl.Range // Final token of the body, for reporting missing items
}
// Assert that *Body implements hcl.Body
var assertBodyImplBody hcl.Body = &Body{}
func (b *Body) walkChildNodes(w internalWalkFunc) {
w(b.Attributes)
w(b.Blocks)
}
func (b *Body) Range() hcl.Range {
return b.SrcRange
}
func (b *Body) Content(schema *hcl.BodySchema) (*hcl.BodyContent, hcl.Diagnostics) {
content, remainHCL, diags := b.PartialContent(schema)
// No we'll see if anything actually remains, to produce errors about
// extraneous items.
remain := remainHCL.(*Body)
for name, attr := range b.Attributes {
if _, hidden := remain.hiddenAttrs[name]; !hidden {
var suggestions []string
for _, attrS := range schema.Attributes {
if _, defined := content.Attributes[attrS.Name]; defined {
continue
}
suggestions = append(suggestions, attrS.Name)
}
suggestion := nameSuggestion(name, suggestions)
if suggestion != "" {
suggestion = fmt.Sprintf(" Did you mean %q?", suggestion)
} else {
// Is there a block of the same name?
for _, blockS := range schema.Blocks {
if blockS.Type == name {
suggestion = fmt.Sprintf(" Did you mean to define a block of type %q?", name)
break
}
}
}
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Unsupported argument",
Detail: fmt.Sprintf("An argument named %q is not expected here.%s", name, suggestion),
Subject: &attr.NameRange,
})
}
}
for _, block := range b.Blocks {
blockTy := block.Type
if _, hidden := remain.hiddenBlocks[blockTy]; !hidden {
var suggestions []string
for _, blockS := range schema.Blocks {
suggestions = append(suggestions, blockS.Type)
}
suggestion := nameSuggestion(blockTy, suggestions)
if suggestion != "" {
suggestion = fmt.Sprintf(" Did you mean %q?", suggestion)
} else {
// Is there an attribute of the same name?
for _, attrS := range schema.Attributes {
if attrS.Name == blockTy {
suggestion = fmt.Sprintf(" Did you mean to define argument %q? If so, use the equals sign to assign it a value.", blockTy)
break
}
}
}
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Unsupported block type",
Detail: fmt.Sprintf("Blocks of type %q are not expected here.%s", blockTy, suggestion),
Subject: &block.TypeRange,
})
}
}
return content, diags
}
func (b *Body) PartialContent(schema *hcl.BodySchema) (*hcl.BodyContent, hcl.Body, hcl.Diagnostics) {
attrs := make(hcl.Attributes)
var blocks hcl.Blocks
var diags hcl.Diagnostics
hiddenAttrs := make(map[string]struct{})
hiddenBlocks := make(map[string]struct{})
if b.hiddenAttrs != nil {
for k, v := range b.hiddenAttrs {
hiddenAttrs[k] = v
}
}
if b.hiddenBlocks != nil {
for k, v := range b.hiddenBlocks {
hiddenBlocks[k] = v
}
}
for _, attrS := range schema.Attributes {
name := attrS.Name
attr, exists := b.Attributes[name]
_, hidden := hiddenAttrs[name]
if hidden || !exists {
if attrS.Required {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Missing required argument",
Detail: fmt.Sprintf("The argument %q is required, but no definition was found.", attrS.Name),
Subject: b.MissingItemRange().Ptr(),
})
}
continue
}
hiddenAttrs[name] = struct{}{}
attrs[name] = attr.AsHCLAttribute()
}
blocksWanted := make(map[string]hcl.BlockHeaderSchema)
for _, blockS := range schema.Blocks {
blocksWanted[blockS.Type] = blockS
}
for _, block := range b.Blocks {
if _, hidden := hiddenBlocks[block.Type]; hidden {
continue
}
blockS, wanted := blocksWanted[block.Type]
if !wanted {
continue
}
if len(block.Labels) > len(blockS.LabelNames) {
name := block.Type
if len(blockS.LabelNames) == 0 {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: fmt.Sprintf("Extraneous label for %s", name),
Detail: fmt.Sprintf(
"No labels are expected for %s blocks.", name,
),
Subject: block.LabelRanges[0].Ptr(),
Context: hcl.RangeBetween(block.TypeRange, block.OpenBraceRange).Ptr(),
})
} else {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: fmt.Sprintf("Extraneous label for %s", name),
Detail: fmt.Sprintf(
"Only %d labels (%s) are expected for %s blocks.",
len(blockS.LabelNames), strings.Join(blockS.LabelNames, ", "), name,
),
Subject: block.LabelRanges[len(blockS.LabelNames)].Ptr(),
Context: hcl.RangeBetween(block.TypeRange, block.OpenBraceRange).Ptr(),
})
}
continue
}
if len(block.Labels) < len(blockS.LabelNames) {
name := block.Type
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: fmt.Sprintf("Missing %s for %s", blockS.LabelNames[len(block.Labels)], name),
Detail: fmt.Sprintf(
"All %s blocks must have %d labels (%s).",
name, len(blockS.LabelNames), strings.Join(blockS.LabelNames, ", "),
),
Subject: &block.OpenBraceRange,
Context: hcl.RangeBetween(block.TypeRange, block.OpenBraceRange).Ptr(),
})
continue
}
blocks = append(blocks, block.AsHCLBlock())
}
// We hide blocks only after we've processed all of them, since otherwise
// we can't process more than one of the same type.
for _, blockS := range schema.Blocks {
hiddenBlocks[blockS.Type] = struct{}{}
}
remain := &Body{
Attributes: b.Attributes,
Blocks: b.Blocks,
hiddenAttrs: hiddenAttrs,
hiddenBlocks: hiddenBlocks,
SrcRange: b.SrcRange,
EndRange: b.EndRange,
}
return &hcl.BodyContent{
Attributes: attrs,
Blocks: blocks,
MissingItemRange: b.MissingItemRange(),
}, remain, diags
}
func (b *Body) JustAttributes() (hcl.Attributes, hcl.Diagnostics) {
attrs := make(hcl.Attributes)
var diags hcl.Diagnostics
if len(b.Blocks) > 0 {
example := b.Blocks[0]
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: fmt.Sprintf("Unexpected %q block", example.Type),
Detail: "Blocks are not allowed here.",
Subject: &example.TypeRange,
})
// we will continue processing anyway, and return the attributes
// we are able to find so that certain analyses can still be done
// in the face of errors.
}
if b.Attributes == nil {
return attrs, diags
}
for name, attr := range b.Attributes {
if _, hidden := b.hiddenAttrs[name]; hidden {
continue
}
attrs[name] = attr.AsHCLAttribute()
}
return attrs, diags
}
func (b *Body) MissingItemRange() hcl.Range {
return hcl.Range{
Filename: b.SrcRange.Filename,
Start: b.SrcRange.Start,
End: b.SrcRange.Start,
}
}
// Attributes is the collection of attribute definitions within a body.
type Attributes map[string]*Attribute
func (a Attributes) walkChildNodes(w internalWalkFunc) {
for _, attr := range a {
w(attr)
}
}
// Range returns the range of some arbitrary point within the set of
// attributes, or an invalid range if there are no attributes.
//
// This is provided only to complete the Node interface, but has no practical
// use.
func (a Attributes) Range() hcl.Range {
// An attributes doesn't really have a useful range to report, since
// it's just a grouping construct. So we'll arbitrarily take the
// range of one of the attributes, or produce an invalid range if we have
// none. In practice, there's little reason to ask for the range of
// an Attributes.
for _, attr := range a {
return attr.Range()
}
return hcl.Range{
Filename: "<unknown>",
}
}
// Attribute represents a single attribute definition within a body.
type Attribute struct {
Name string
Expr Expression
SrcRange hcl.Range
NameRange hcl.Range
EqualsRange hcl.Range
}
func (a *Attribute) walkChildNodes(w internalWalkFunc) {
w(a.Expr)
}
func (a *Attribute) Range() hcl.Range {
return a.SrcRange
}
// AsHCLAttribute returns the block data expressed as a *hcl.Attribute.
func (a *Attribute) AsHCLAttribute() *hcl.Attribute {
if a == nil {
return nil
}
return &hcl.Attribute{
Name: a.Name,
Expr: a.Expr,
Range: a.SrcRange,
NameRange: a.NameRange,
}
}
// Blocks is the list of nested blocks within a body.
type Blocks []*Block
func (bs Blocks) walkChildNodes(w internalWalkFunc) {
for _, block := range bs {
w(block)
}
}
// Range returns the range of some arbitrary point within the list of
// blocks, or an invalid range if there are no blocks.
//
// This is provided only to complete the Node interface, but has no practical
// use.
func (bs Blocks) Range() hcl.Range {
if len(bs) > 0 {
return bs[0].Range()
}
return hcl.Range{
Filename: "<unknown>",
}
}
// Block represents a nested block structure
type Block struct {
Type string
Labels []string
Body *Body
TypeRange hcl.Range
LabelRanges []hcl.Range
OpenBraceRange hcl.Range
CloseBraceRange hcl.Range
}
func (b *Block) walkChildNodes(w internalWalkFunc) {
w(b.Body)
}
func (b *Block) Range() hcl.Range {
return hcl.RangeBetween(b.TypeRange, b.CloseBraceRange)
}
func (b *Block) DefRange() hcl.Range {
return hcl.RangeBetween(b.TypeRange, b.OpenBraceRange)
}

@ -0,0 +1,118 @@
package hclsyntax
import (
"github.com/hashicorp/hcl/v2"
)
// -----------------------------------------------------------------------------
// The methods in this file are all optional extension methods that serve to
// implement the methods of the same name on *hcl.File when its root body
// is provided by this package.
// -----------------------------------------------------------------------------
// BlocksAtPos implements the method of the same name for an *hcl.File that
// is backed by a *Body.
func (b *Body) BlocksAtPos(pos hcl.Pos) []*hcl.Block {
list, _ := b.blocksAtPos(pos, true)
return list
}
// InnermostBlockAtPos implements the method of the same name for an *hcl.File
// that is backed by a *Body.
func (b *Body) InnermostBlockAtPos(pos hcl.Pos) *hcl.Block {
_, innermost := b.blocksAtPos(pos, false)
return innermost.AsHCLBlock()
}
// OutermostBlockAtPos implements the method of the same name for an *hcl.File
// that is backed by a *Body.
func (b *Body) OutermostBlockAtPos(pos hcl.Pos) *hcl.Block {
return b.outermostBlockAtPos(pos).AsHCLBlock()
}
// blocksAtPos is the internal engine of both BlocksAtPos and
// InnermostBlockAtPos, which both need to do the same logic but return a
// differently-shaped result.
//
// list is nil if makeList is false, avoiding an allocation. Innermost is
// always set, and if the returned list is non-nil it will always match the
// final element from that list.
func (b *Body) blocksAtPos(pos hcl.Pos, makeList bool) (list []*hcl.Block, innermost *Block) {
current := b
Blocks:
for current != nil {
for _, block := range current.Blocks {
wholeRange := hcl.RangeBetween(block.TypeRange, block.CloseBraceRange)
if wholeRange.ContainsPos(pos) {
innermost = block
if makeList {
list = append(list, innermost.AsHCLBlock())
}
current = block.Body
continue Blocks
}
}
// If we fall out here then none of the current body's nested blocks
// contain the position we are looking for, and so we're done.
break
}
return
}
// outermostBlockAtPos is the internal version of OutermostBlockAtPos that
// returns a hclsyntax.Block rather than an hcl.Block, allowing for further
// analysis if necessary.
func (b *Body) outermostBlockAtPos(pos hcl.Pos) *Block {
// This is similar to blocksAtPos, but simpler because we know it only
// ever needs to search the first level of nested blocks.
for _, block := range b.Blocks {
wholeRange := hcl.RangeBetween(block.TypeRange, block.CloseBraceRange)
if wholeRange.ContainsPos(pos) {
return block
}
}
return nil
}
// AttributeAtPos implements the method of the same name for an *hcl.File
// that is backed by a *Body.
func (b *Body) AttributeAtPos(pos hcl.Pos) *hcl.Attribute {
return b.attributeAtPos(pos).AsHCLAttribute()
}
// attributeAtPos is the internal version of AttributeAtPos that returns a
// hclsyntax.Block rather than an hcl.Block, allowing for further analysis if
// necessary.
func (b *Body) attributeAtPos(pos hcl.Pos) *Attribute {
searchBody := b
_, block := b.blocksAtPos(pos, false)
if block != nil {
searchBody = block.Body
}
for _, attr := range searchBody.Attributes {
if attr.SrcRange.ContainsPos(pos) {
return attr
}
}
return nil
}
// OutermostExprAtPos implements the method of the same name for an *hcl.File
// that is backed by a *Body.
func (b *Body) OutermostExprAtPos(pos hcl.Pos) hcl.Expression {
attr := b.attributeAtPos(pos)
if attr == nil {
return nil
}
if !attr.Expr.Range().ContainsPos(pos) {
return nil
}
return attr.Expr
}

@ -0,0 +1,320 @@
package hclsyntax
import (
"bytes"
"fmt"
"github.com/apparentlymart/go-textseg/v12/textseg"
"github.com/hashicorp/hcl/v2"
)
// Token represents a sequence of bytes from some HCL code that has been
// tagged with a type and its range within the source file.
type Token struct {
Type TokenType
Bytes []byte
Range hcl.Range
}
// Tokens is a slice of Token.
type Tokens []Token
// TokenType is an enumeration used for the Type field on Token.
type TokenType rune
const (
// Single-character tokens are represented by their own character, for
// convenience in producing these within the scanner. However, the values
// are otherwise arbitrary and just intended to be mnemonic for humans
// who might see them in debug output.
TokenOBrace TokenType = '{'
TokenCBrace TokenType = '}'
TokenOBrack TokenType = '['
TokenCBrack TokenType = ']'
TokenOParen TokenType = '('
TokenCParen TokenType = ')'
TokenOQuote TokenType = '«'
TokenCQuote TokenType = '»'
TokenOHeredoc TokenType = 'H'
TokenCHeredoc TokenType = 'h'
TokenStar TokenType = '*'
TokenSlash TokenType = '/'
TokenPlus TokenType = '+'
TokenMinus TokenType = '-'
TokenPercent TokenType = '%'
TokenEqual TokenType = '='
TokenEqualOp TokenType = '≔'
TokenNotEqual TokenType = '≠'
TokenLessThan TokenType = '<'
TokenLessThanEq TokenType = '≤'
TokenGreaterThan TokenType = '>'
TokenGreaterThanEq TokenType = '≥'
TokenAnd TokenType = '∧'
TokenOr TokenType = ''
TokenBang TokenType = '!'
TokenDot TokenType = '.'
TokenComma TokenType = ','
TokenEllipsis TokenType = '…'
TokenFatArrow TokenType = '⇒'
TokenQuestion TokenType = '?'
TokenColon TokenType = ':'
TokenTemplateInterp TokenType = '∫'
TokenTemplateControl TokenType = 'λ'
TokenTemplateSeqEnd TokenType = '∎'
TokenQuotedLit TokenType = 'Q' // might contain backslash escapes
TokenStringLit TokenType = 'S' // cannot contain backslash escapes
TokenNumberLit TokenType = 'N'
TokenIdent TokenType = 'I'
TokenComment TokenType = 'C'
TokenNewline TokenType = '\n'
TokenEOF TokenType = '␄'
// The rest are not used in the language but recognized by the scanner so
// we can generate good diagnostics in the parser when users try to write
// things that might work in other languages they are familiar with, or
// simply make incorrect assumptions about the HCL language.
TokenBitwiseAnd TokenType = '&'
TokenBitwiseOr TokenType = '|'
TokenBitwiseNot TokenType = '~'
TokenBitwiseXor TokenType = '^'
TokenStarStar TokenType = '➚'
TokenApostrophe TokenType = '\''
TokenBacktick TokenType = '`'
TokenSemicolon TokenType = ';'
TokenTabs TokenType = '␉'
TokenInvalid TokenType = '<27>'
TokenBadUTF8 TokenType = '💩'
TokenQuotedNewline TokenType = '␤'
// TokenNil is a placeholder for when a token is required but none is
// available, e.g. when reporting errors. The scanner will never produce
// this as part of a token stream.
TokenNil TokenType = '\x00'
)
func (t TokenType) GoString() string {
return fmt.Sprintf("hclsyntax.%s", t.String())
}
type scanMode int
const (
scanNormal scanMode = iota
scanTemplate
scanIdentOnly
)
type tokenAccum struct {
Filename string
Bytes []byte
Pos hcl.Pos
Tokens []Token
StartByte int
}
func (f *tokenAccum) emitToken(ty TokenType, startOfs, endOfs int) {
// Walk through our buffer to figure out how much we need to adjust
// the start pos to get our end pos.
start := f.Pos
start.Column += startOfs + f.StartByte - f.Pos.Byte // Safe because only ASCII spaces can be in the offset
start.Byte = startOfs + f.StartByte
end := start
end.Byte = endOfs + f.StartByte
b := f.Bytes[startOfs:endOfs]
for len(b) > 0 {
advance, seq, _ := textseg.ScanGraphemeClusters(b, true)
if (len(seq) == 1 && seq[0] == '\n') || (len(seq) == 2 && seq[0] == '\r' && seq[1] == '\n') {
end.Line++
end.Column = 1
} else {
end.Column++
}
b = b[advance:]
}
f.Pos = end
f.Tokens = append(f.Tokens, Token{
Type: ty,
Bytes: f.Bytes[startOfs:endOfs],
Range: hcl.Range{
Filename: f.Filename,
Start: start,
End: end,
},
})
}
type heredocInProgress struct {
Marker []byte
StartOfLine bool
}
func tokenOpensFlushHeredoc(tok Token) bool {
if tok.Type != TokenOHeredoc {
return false
}
return bytes.HasPrefix(tok.Bytes, []byte{'<', '<', '-'})
}
// checkInvalidTokens does a simple pass across the given tokens and generates
// diagnostics for tokens that should _never_ appear in HCL source. This
// is intended to avoid the need for the parser to have special support
// for them all over.
//
// Returns a diagnostics with no errors if everything seems acceptable.
// Otherwise, returns zero or more error diagnostics, though tries to limit
// repetition of the same information.
func checkInvalidTokens(tokens Tokens) hcl.Diagnostics {
var diags hcl.Diagnostics
toldBitwise := 0
toldExponent := 0
toldBacktick := 0
toldApostrophe := 0
toldSemicolon := 0
toldTabs := 0
toldBadUTF8 := 0
for _, tok := range tokens {
// copy token so it's safe to point to it
tok := tok
switch tok.Type {
case TokenBitwiseAnd, TokenBitwiseOr, TokenBitwiseXor, TokenBitwiseNot:
if toldBitwise < 4 {
var suggestion string
switch tok.Type {
case TokenBitwiseAnd:
suggestion = " Did you mean boolean AND (\"&&\")?"
case TokenBitwiseOr:
suggestion = " Did you mean boolean OR (\"&&\")?"
case TokenBitwiseNot:
suggestion = " Did you mean boolean NOT (\"!\")?"
}
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Unsupported operator",
Detail: fmt.Sprintf("Bitwise operators are not supported.%s", suggestion),
Subject: &tok.Range,
})
toldBitwise++
}
case TokenStarStar:
if toldExponent < 1 {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Unsupported operator",
Detail: "\"**\" is not a supported operator. Exponentiation is not supported as an operator.",
Subject: &tok.Range,
})
toldExponent++
}
case TokenBacktick:
// Only report for alternating (even) backticks, so we won't report both start and ends of the same
// backtick-quoted string.
if (toldBacktick % 2) == 0 {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid character",
Detail: "The \"`\" character is not valid. To create a multi-line string, use the \"heredoc\" syntax, like \"<<EOT\".",
Subject: &tok.Range,
})
}
if toldBacktick <= 2 {
toldBacktick++
}
case TokenApostrophe:
if (toldApostrophe % 2) == 0 {
newDiag := &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid character",
Detail: "Single quotes are not valid. Use double quotes (\") to enclose strings.",
Subject: &tok.Range,
}
diags = append(diags, newDiag)
}
if toldApostrophe <= 2 {
toldApostrophe++
}
case TokenSemicolon:
if toldSemicolon < 1 {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid character",
Detail: "The \";\" character is not valid. Use newlines to separate arguments and blocks, and commas to separate items in collection values.",
Subject: &tok.Range,
})
toldSemicolon++
}
case TokenTabs:
if toldTabs < 1 {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid character",
Detail: "Tab characters may not be used. The recommended indentation style is two spaces per indent.",
Subject: &tok.Range,
})
toldTabs++
}
case TokenBadUTF8:
if toldBadUTF8 < 1 {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid character encoding",
Detail: "All input files must be UTF-8 encoded. Ensure that UTF-8 encoding is selected in your editor.",
Subject: &tok.Range,
})
toldBadUTF8++
}
case TokenQuotedNewline:
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid multi-line string",
Detail: "Quoted strings may not be split over multiple lines. To produce a multi-line string, either use the \\n escape to represent a newline character or use the \"heredoc\" multi-line template syntax.",
Subject: &tok.Range,
})
case TokenInvalid:
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid character",
Detail: "This character is not used within the language.",
Subject: &tok.Range,
})
}
}
return diags
}
var utf8BOM = []byte{0xef, 0xbb, 0xbf}
// stripUTF8BOM checks whether the given buffer begins with a UTF-8 byte order
// mark (0xEF 0xBB 0xBF) and, if so, returns a truncated slice with the same
// backing array but with the BOM skipped.
//
// If there is no BOM present, the given slice is returned verbatim.
func stripUTF8BOM(src []byte) []byte {
if bytes.HasPrefix(src, utf8BOM) {
return src[3:]
}
return src
}

@ -0,0 +1,131 @@
// Code generated by "stringer -type TokenType -output token_type_string.go"; DO NOT EDIT.
package hclsyntax
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[TokenOBrace-123]
_ = x[TokenCBrace-125]
_ = x[TokenOBrack-91]
_ = x[TokenCBrack-93]
_ = x[TokenOParen-40]
_ = x[TokenCParen-41]
_ = x[TokenOQuote-171]
_ = x[TokenCQuote-187]
_ = x[TokenOHeredoc-72]
_ = x[TokenCHeredoc-104]
_ = x[TokenStar-42]
_ = x[TokenSlash-47]
_ = x[TokenPlus-43]
_ = x[TokenMinus-45]
_ = x[TokenPercent-37]
_ = x[TokenEqual-61]
_ = x[TokenEqualOp-8788]
_ = x[TokenNotEqual-8800]
_ = x[TokenLessThan-60]
_ = x[TokenLessThanEq-8804]
_ = x[TokenGreaterThan-62]
_ = x[TokenGreaterThanEq-8805]
_ = x[TokenAnd-8743]
_ = x[TokenOr-8744]
_ = x[TokenBang-33]
_ = x[TokenDot-46]
_ = x[TokenComma-44]
_ = x[TokenEllipsis-8230]
_ = x[TokenFatArrow-8658]
_ = x[TokenQuestion-63]
_ = x[TokenColon-58]
_ = x[TokenTemplateInterp-8747]
_ = x[TokenTemplateControl-955]
_ = x[TokenTemplateSeqEnd-8718]
_ = x[TokenQuotedLit-81]
_ = x[TokenStringLit-83]
_ = x[TokenNumberLit-78]
_ = x[TokenIdent-73]
_ = x[TokenComment-67]
_ = x[TokenNewline-10]
_ = x[TokenEOF-9220]
_ = x[TokenBitwiseAnd-38]
_ = x[TokenBitwiseOr-124]
_ = x[TokenBitwiseNot-126]
_ = x[TokenBitwiseXor-94]
_ = x[TokenStarStar-10138]
_ = x[TokenApostrophe-39]
_ = x[TokenBacktick-96]
_ = x[TokenSemicolon-59]
_ = x[TokenTabs-9225]
_ = x[TokenInvalid-65533]
_ = x[TokenBadUTF8-128169]
_ = x[TokenQuotedNewline-9252]
_ = x[TokenNil-0]
}
const _TokenType_name = "TokenNilTokenNewlineTokenBangTokenPercentTokenBitwiseAndTokenApostropheTokenOParenTokenCParenTokenStarTokenPlusTokenCommaTokenMinusTokenDotTokenSlashTokenColonTokenSemicolonTokenLessThanTokenEqualTokenGreaterThanTokenQuestionTokenCommentTokenOHeredocTokenIdentTokenNumberLitTokenQuotedLitTokenStringLitTokenOBrackTokenCBrackTokenBitwiseXorTokenBacktickTokenCHeredocTokenOBraceTokenBitwiseOrTokenCBraceTokenBitwiseNotTokenOQuoteTokenCQuoteTokenTemplateControlTokenEllipsisTokenFatArrowTokenTemplateSeqEndTokenAndTokenOrTokenTemplateInterpTokenEqualOpTokenNotEqualTokenLessThanEqTokenGreaterThanEqTokenEOFTokenTabsTokenQuotedNewlineTokenStarStarTokenInvalidTokenBadUTF8"
var _TokenType_map = map[TokenType]string{
0: _TokenType_name[0:8],
10: _TokenType_name[8:20],
33: _TokenType_name[20:29],
37: _TokenType_name[29:41],
38: _TokenType_name[41:56],
39: _TokenType_name[56:71],
40: _TokenType_name[71:82],
41: _TokenType_name[82:93],
42: _TokenType_name[93:102],
43: _TokenType_name[102:111],
44: _TokenType_name[111:121],
45: _TokenType_name[121:131],
46: _TokenType_name[131:139],
47: _TokenType_name[139:149],
58: _TokenType_name[149:159],
59: _TokenType_name[159:173],
60: _TokenType_name[173:186],
61: _TokenType_name[186:196],
62: _TokenType_name[196:212],
63: _TokenType_name[212:225],
67: _TokenType_name[225:237],
72: _TokenType_name[237:250],
73: _TokenType_name[250:260],
78: _TokenType_name[260:274],
81: _TokenType_name[274:288],
83: _TokenType_name[288:302],
91: _TokenType_name[302:313],
93: _TokenType_name[313:324],
94: _TokenType_name[324:339],
96: _TokenType_name[339:352],
104: _TokenType_name[352:365],
123: _TokenType_name[365:376],
124: _TokenType_name[376:390],
125: _TokenType_name[390:401],
126: _TokenType_name[401:416],
171: _TokenType_name[416:427],
187: _TokenType_name[427:438],
955: _TokenType_name[438:458],
8230: _TokenType_name[458:471],
8658: _TokenType_name[471:484],
8718: _TokenType_name[484:503],
8743: _TokenType_name[503:511],
8744: _TokenType_name[511:518],
8747: _TokenType_name[518:537],
8788: _TokenType_name[537:549],
8800: _TokenType_name[549:562],
8804: _TokenType_name[562:577],
8805: _TokenType_name[577:595],
9220: _TokenType_name[595:603],
9225: _TokenType_name[603:612],
9252: _TokenType_name[612:630],
10138: _TokenType_name[630:643],
65533: _TokenType_name[643:655],
128169: _TokenType_name[655:667],
}
func (i TokenType) String() string {
if str, ok := _TokenType_map[i]; ok {
return str
}
return "TokenType(" + strconv.FormatInt(int64(i), 10) + ")"
}

@ -0,0 +1,335 @@
#!/usr/bin/env ruby
#
# This scripted has been updated to accept more command-line arguments:
#
# -u, --url URL to process
# -m, --machine Machine name
# -p, --properties Properties to add to the machine
# -o, --output Write output to file
#
# Updated by: Marty Schoch <marty.schoch@gmail.com>
#
# This script uses the unicode spec to generate a Ragel state machine
# that recognizes unicode alphanumeric characters. It generates 5
# character classes: uupper, ulower, ualpha, udigit, and ualnum.
# Currently supported encodings are UTF-8 [default] and UCS-4.
#
# Usage: unicode2ragel.rb [options]
# -e, --encoding [ucs4 | utf8] Data encoding
# -h, --help Show this message
#
# This script was originally written as part of the Ferret search
# engine library.
#
# Author: Rakan El-Khalil <rakan@well.com>
require 'optparse'
require 'open-uri'
ENCODINGS = [ :utf8, :ucs4 ]
ALPHTYPES = { :utf8 => "byte", :ucs4 => "rune" }
DEFAULT_CHART_URL = "http://www.unicode.org/Public/5.1.0/ucd/DerivedCoreProperties.txt"
DEFAULT_MACHINE_NAME= "WChar"
###
# Display vars & default option
TOTAL_WIDTH = 80
RANGE_WIDTH = 23
@encoding = :utf8
@chart_url = DEFAULT_CHART_URL
machine_name = DEFAULT_MACHINE_NAME
properties = []
@output = $stdout
###
# Option parsing
cli_opts = OptionParser.new do |opts|
opts.on("-e", "--encoding [ucs4 | utf8]", "Data encoding") do |o|
@encoding = o.downcase.to_sym
end
opts.on("-h", "--help", "Show this message") do
puts opts
exit
end
opts.on("-u", "--url URL", "URL to process") do |o|
@chart_url = o
end
opts.on("-m", "--machine MACHINE_NAME", "Machine name") do |o|
machine_name = o
end
opts.on("-p", "--properties x,y,z", Array, "Properties to add to machine") do |o|
properties = o
end
opts.on("-o", "--output FILE", "output file") do |o|
@output = File.new(o, "w+")
end
end
cli_opts.parse(ARGV)
unless ENCODINGS.member? @encoding
puts "Invalid encoding: #{@encoding}"
puts cli_opts
exit
end
##
# Downloads the document at url and yields every alpha line's hex
# range and description.
def each_alpha( url, property )
open( url ) do |file|
file.each_line do |line|
next if line =~ /^#/;
next if line !~ /; #{property} #/;
range, description = line.split(/;/)
range.strip!
description.gsub!(/.*#/, '').strip!
if range =~ /\.\./
start, stop = range.split '..'
else start = stop = range
end
yield start.hex .. stop.hex, description
end
end
end
###
# Formats to hex at minimum width
def to_hex( n )
r = "%0X" % n
r = "0#{r}" unless (r.length % 2).zero?
r
end
###
# UCS4 is just a straight hex conversion of the unicode codepoint.
def to_ucs4( range )
rangestr = "0x" + to_hex(range.begin)
rangestr << "..0x" + to_hex(range.end) if range.begin != range.end
[ rangestr ]
end
##
# 0x00 - 0x7f -> 0zzzzzzz[7]
# 0x80 - 0x7ff -> 110yyyyy[5] 10zzzzzz[6]
# 0x800 - 0xffff -> 1110xxxx[4] 10yyyyyy[6] 10zzzzzz[6]
# 0x010000 - 0x10ffff -> 11110www[3] 10xxxxxx[6] 10yyyyyy[6] 10zzzzzz[6]
UTF8_BOUNDARIES = [0x7f, 0x7ff, 0xffff, 0x10ffff]
def to_utf8_enc( n )
r = 0
if n <= 0x7f
r = n
elsif n <= 0x7ff
y = 0xc0 | (n >> 6)
z = 0x80 | (n & 0x3f)
r = y << 8 | z
elsif n <= 0xffff
x = 0xe0 | (n >> 12)
y = 0x80 | (n >> 6) & 0x3f
z = 0x80 | n & 0x3f
r = x << 16 | y << 8 | z
elsif n <= 0x10ffff
w = 0xf0 | (n >> 18)
x = 0x80 | (n >> 12) & 0x3f
y = 0x80 | (n >> 6) & 0x3f
z = 0x80 | n & 0x3f
r = w << 24 | x << 16 | y << 8 | z
end
to_hex(r)
end
def from_utf8_enc( n )
n = n.hex
r = 0
if n <= 0x7f
r = n
elsif n <= 0xdfff
y = (n >> 8) & 0x1f
z = n & 0x3f
r = y << 6 | z
elsif n <= 0xefffff
x = (n >> 16) & 0x0f
y = (n >> 8) & 0x3f
z = n & 0x3f
r = x << 10 | y << 6 | z
elsif n <= 0xf7ffffff
w = (n >> 24) & 0x07
x = (n >> 16) & 0x3f
y = (n >> 8) & 0x3f
z = n & 0x3f
r = w << 18 | x << 12 | y << 6 | z
end
r
end
###
# Given a range, splits it up into ranges that can be continuously
# encoded into utf8. Eg: 0x00 .. 0xff => [0x00..0x7f, 0x80..0xff]
# This is not strictly needed since the current [5.1] unicode standard
# doesn't have ranges that straddle utf8 boundaries. This is included
# for completeness as there is no telling if that will ever change.
def utf8_ranges( range )
ranges = []
UTF8_BOUNDARIES.each do |max|
if range.begin <= max
if range.end <= max
ranges << range
return ranges
end
ranges << (range.begin .. max)
range = (max + 1) .. range.end
end
end
ranges
end
def build_range( start, stop )
size = start.size/2
left = size - 1
return [""] if size < 1
a = start[0..1]
b = stop[0..1]
###
# Shared prefix
if a == b
return build_range(start[2..-1], stop[2..-1]).map do |elt|
"0x#{a} " + elt
end
end
###
# Unshared prefix, end of run
return ["0x#{a}..0x#{b} "] if left.zero?
###
# Unshared prefix, not end of run
# Range can be 0x123456..0x56789A
# Which is equivalent to:
# 0x123456 .. 0x12FFFF
# 0x130000 .. 0x55FFFF
# 0x560000 .. 0x56789A
ret = []
ret << build_range(start, a + "FF" * left)
###
# Only generate middle range if need be.
if a.hex+1 != b.hex
max = to_hex(b.hex - 1)
max = "FF" if b == "FF"
ret << "0x#{to_hex(a.hex+1)}..0x#{max} " + "0x00..0xFF " * left
end
###
# Don't generate last range if it is covered by first range
ret << build_range(b + "00" * left, stop) unless b == "FF"
ret.flatten!
end
def to_utf8( range )
utf8_ranges( range ).map do |r|
begin_enc = to_utf8_enc(r.begin)
end_enc = to_utf8_enc(r.end)
build_range begin_enc, end_enc
end.flatten!
end
##
# Perform a 3-way comparison of the number of codepoints advertised by
# the unicode spec for the given range, the originally parsed range,
# and the resulting utf8 encoded range.
def count_codepoints( code )
code.split(' ').inject(1) do |acc, elt|
if elt =~ /0x(.+)\.\.0x(.+)/
if @encoding == :utf8
acc * (from_utf8_enc($2) - from_utf8_enc($1) + 1)
else
acc * ($2.hex - $1.hex + 1)
end
else
acc
end
end
end
def is_valid?( range, desc, codes )
spec_count = 1
spec_count = $1.to_i if desc =~ /\[(\d+)\]/
range_count = range.end - range.begin + 1
sum = codes.inject(0) { |acc, elt| acc + count_codepoints(elt) }
sum == spec_count and sum == range_count
end
##
# Generate the state maching to stdout
def generate_machine( name, property )
pipe = " "
@output.puts " #{name} = "
each_alpha( @chart_url, property ) do |range, desc|
codes = (@encoding == :ucs4) ? to_ucs4(range) : to_utf8(range)
#raise "Invalid encoding of range #{range}: #{codes.inspect}" unless
# is_valid? range, desc, codes
range_width = codes.map { |a| a.size }.max
range_width = RANGE_WIDTH if range_width < RANGE_WIDTH
desc_width = TOTAL_WIDTH - RANGE_WIDTH - 11
desc_width -= (range_width - RANGE_WIDTH) if range_width > RANGE_WIDTH
if desc.size > desc_width
desc = desc[0..desc_width - 4] + "..."
end
codes.each_with_index do |r, idx|
desc = "" unless idx.zero?
code = "%-#{range_width}s" % r
@output.puts " #{pipe} #{code} ##{desc}"
pipe = "|"
end
end
@output.puts " ;"
@output.puts ""
end
@output.puts <<EOF
# The following Ragel file was autogenerated with #{$0}
# from: #{@chart_url}
#
# It defines #{properties}.
#
# To use this, make sure that your alphtype is set to #{ALPHTYPES[@encoding]},
# and that your input is in #{@encoding}.
%%{
machine #{machine_name};
EOF
properties.each { |x| generate_machine( x, x ) }
@output.puts <<EOF
}%%
EOF

File diff suppressed because it is too large Load Diff

@ -0,0 +1,86 @@
package hclsyntax
import (
"github.com/hashicorp/hcl/v2"
)
// Variables returns all of the variables referenced within a given experssion.
//
// This is the implementation of the "Variables" method on every native
// expression.
func Variables(expr Expression) []hcl.Traversal {
var vars []hcl.Traversal
walker := &variablesWalker{
Callback: func(t hcl.Traversal) {
vars = append(vars, t)
},
}
Walk(expr, walker)
return vars
}
// variablesWalker is a Walker implementation that calls its callback for any
// root scope traversal found while walking.
type variablesWalker struct {
Callback func(hcl.Traversal)
localScopes []map[string]struct{}
}
func (w *variablesWalker) Enter(n Node) hcl.Diagnostics {
switch tn := n.(type) {
case *ScopeTraversalExpr:
t := tn.Traversal
// Check if the given root name appears in any of the active
// local scopes. We don't want to return local variables here, since
// the goal of walking variables is to tell the calling application
// which names it needs to populate in the _root_ scope.
name := t.RootName()
for _, names := range w.localScopes {
if _, localized := names[name]; localized {
return nil
}
}
w.Callback(t)
case ChildScope:
w.localScopes = append(w.localScopes, tn.LocalNames)
}
return nil
}
func (w *variablesWalker) Exit(n Node) hcl.Diagnostics {
switch n.(type) {
case ChildScope:
// pop the latest local scope, assuming that the walker will
// behave symmetrically as promised.
w.localScopes = w.localScopes[:len(w.localScopes)-1]
}
return nil
}
// ChildScope is a synthetic AST node that is visited during a walk to
// indicate that its descendent will be evaluated in a child scope, which
// may mask certain variables from the parent scope as locals.
//
// ChildScope nodes don't really exist in the AST, but are rather synthesized
// on the fly during walk. Therefore it doesn't do any good to transform them;
// instead, transform either parent node that created a scope or the expression
// that the child scope struct wraps.
type ChildScope struct {
LocalNames map[string]struct{}
Expr Expression
}
func (e ChildScope) walkChildNodes(w internalWalkFunc) {
w(e.Expr)
}
// Range returns the range of the expression that the ChildScope is
// encapsulating. It isn't really very useful to call Range on a ChildScope.
func (e ChildScope) Range() hcl.Range {
return e.Expr.Range()
}

@ -0,0 +1,41 @@
package hclsyntax
import (
"github.com/hashicorp/hcl/v2"
)
// VisitFunc is the callback signature for VisitAll.
type VisitFunc func(node Node) hcl.Diagnostics
// VisitAll is a basic way to traverse the AST beginning with a particular
// node. The given function will be called once for each AST node in
// depth-first order, but no context is provided about the shape of the tree.
//
// The VisitFunc may return diagnostics, in which case they will be accumulated
// and returned as a single set.
func VisitAll(node Node, f VisitFunc) hcl.Diagnostics {
diags := f(node)
node.walkChildNodes(func(node Node) {
diags = append(diags, VisitAll(node, f)...)
})
return diags
}
// Walker is an interface used with Walk.
type Walker interface {
Enter(node Node) hcl.Diagnostics
Exit(node Node) hcl.Diagnostics
}
// Walk is a more complex way to traverse the AST starting with a particular
// node, which provides information about the tree structure via separate
// Enter and Exit functions.
func Walk(node Node, w Walker) hcl.Diagnostics {
diags := w.Enter(node)
node.walkChildNodes(func(node Node) {
diags = append(diags, Walk(node, w)...)
})
moreDiags := w.Exit(node)
diags = append(diags, moreDiags...)
return diags
}

@ -0,0 +1,121 @@
package hclwrite
import (
"bytes"
"io"
)
type File struct {
inTree
srcBytes []byte
body *node
}
// NewEmptyFile constructs a new file with no content, ready to be mutated
// by other calls that append to its body.
func NewEmptyFile() *File {
f := &File{
inTree: newInTree(),
}
body := newBody()
f.body = f.children.Append(body)
return f
}
// Body returns the root body of the file, which contains the top-level
// attributes and blocks.
func (f *File) Body() *Body {
return f.body.content.(*Body)
}
// WriteTo writes the tokens underlying the receiving file to the given writer.
//
// The tokens first have a simple formatting pass applied that adjusts only
// the spaces between them.
func (f *File) WriteTo(wr io.Writer) (int64, error) {
tokens := f.inTree.children.BuildTokens(nil)
format(tokens)
return tokens.WriteTo(wr)
}
// Bytes returns a buffer containing the source code resulting from the
// tokens underlying the receiving file. If any updates have been made via
// the AST API, these will be reflected in the result.
func (f *File) Bytes() []byte {
buf := &bytes.Buffer{}
f.WriteTo(buf)
return buf.Bytes()
}
type comments struct {
leafNode
parent *node
tokens Tokens
}
func newComments(tokens Tokens) *comments {
return &comments{
tokens: tokens,
}
}
func (c *comments) BuildTokens(to Tokens) Tokens {
return c.tokens.BuildTokens(to)
}
type identifier struct {
leafNode
parent *node
token *Token
}
func newIdentifier(token *Token) *identifier {
return &identifier{
token: token,
}
}
func (i *identifier) BuildTokens(to Tokens) Tokens {
return append(to, i.token)
}
func (i *identifier) hasName(name string) bool {
return name == string(i.token.Bytes)
}
type number struct {
leafNode
parent *node
token *Token
}
func newNumber(token *Token) *number {
return &number{
token: token,
}
}
func (n *number) BuildTokens(to Tokens) Tokens {
return append(to, n.token)
}
type quoted struct {
leafNode
parent *node
tokens Tokens
}
func newQuoted(tokens Tokens) *quoted {
return &quoted{
tokens: tokens,
}
}
func (q *quoted) BuildTokens(to Tokens) Tokens {
return q.tokens.BuildTokens(to)
}

@ -0,0 +1,48 @@
package hclwrite
import (
"github.com/hashicorp/hcl/v2/hclsyntax"
)
type Attribute struct {
inTree
leadComments *node
name *node
expr *node
lineComments *node
}
func newAttribute() *Attribute {
return &Attribute{
inTree: newInTree(),
}
}
func (a *Attribute) init(name string, expr *Expression) {
expr.assertUnattached()
nameTok := newIdentToken(name)
nameObj := newIdentifier(nameTok)
a.leadComments = a.children.Append(newComments(nil))
a.name = a.children.Append(nameObj)
a.children.AppendUnstructuredTokens(Tokens{
{
Type: hclsyntax.TokenEqual,
Bytes: []byte{'='},
},
})
a.expr = a.children.Append(expr)
a.expr.list = a.children
a.lineComments = a.children.Append(newComments(nil))
a.children.AppendUnstructuredTokens(Tokens{
{
Type: hclsyntax.TokenNewline,
Bytes: []byte{'\n'},
},
})
}
func (a *Attribute) Expr() *Expression {
return a.expr.content.(*Expression)
}

@ -0,0 +1,118 @@
package hclwrite
import (
"github.com/hashicorp/hcl/v2/hclsyntax"
"github.com/zclconf/go-cty/cty"
)
type Block struct {
inTree
leadComments *node
typeName *node
labels nodeSet
open *node
body *node
close *node
}
func newBlock() *Block {
return &Block{
inTree: newInTree(),
labels: newNodeSet(),
}
}
// NewBlock constructs a new, empty block with the given type name and labels.
func NewBlock(typeName string, labels []string) *Block {
block := newBlock()
block.init(typeName, labels)
return block
}
func (b *Block) init(typeName string, labels []string) {
nameTok := newIdentToken(typeName)
nameObj := newIdentifier(nameTok)
b.leadComments = b.children.Append(newComments(nil))
b.typeName = b.children.Append(nameObj)
for _, label := range labels {
labelToks := TokensForValue(cty.StringVal(label))
labelObj := newQuoted(labelToks)
labelNode := b.children.Append(labelObj)
b.labels.Add(labelNode)
}
b.open = b.children.AppendUnstructuredTokens(Tokens{
{
Type: hclsyntax.TokenOBrace,
Bytes: []byte{'{'},
},
{
Type: hclsyntax.TokenNewline,
Bytes: []byte{'\n'},
},
})
body := newBody() // initially totally empty; caller can append to it subsequently
b.body = b.children.Append(body)
b.close = b.children.AppendUnstructuredTokens(Tokens{
{
Type: hclsyntax.TokenCBrace,
Bytes: []byte{'}'},
},
{
Type: hclsyntax.TokenNewline,
Bytes: []byte{'\n'},
},
})
}
// Body returns the body that represents the content of the receiving block.
//
// Appending to or otherwise modifying this body will make changes to the
// tokens that are generated between the blocks open and close braces.
func (b *Block) Body() *Body {
return b.body.content.(*Body)
}
// Type returns the type name of the block.
func (b *Block) Type() string {
typeNameObj := b.typeName.content.(*identifier)
return string(typeNameObj.token.Bytes)
}
// Labels returns the labels of the block.
func (b *Block) Labels() []string {
labelNames := make([]string, 0, len(b.labels))
list := b.labels.List()
for _, label := range list {
switch labelObj := label.content.(type) {
case *identifier:
if labelObj.token.Type == hclsyntax.TokenIdent {
labelString := string(labelObj.token.Bytes)
labelNames = append(labelNames, labelString)
}
case *quoted:
tokens := labelObj.tokens
if len(tokens) == 3 &&
tokens[0].Type == hclsyntax.TokenOQuote &&
tokens[1].Type == hclsyntax.TokenQuotedLit &&
tokens[2].Type == hclsyntax.TokenCQuote {
// Note that TokenQuotedLit may contain escape sequences.
labelString, diags := hclsyntax.ParseStringLiteralToken(tokens[1].asHCLSyntax())
// If parsing the string literal returns error diagnostics
// then we can just assume the label doesn't match, because it's invalid in some way.
if !diags.HasErrors() {
labelNames = append(labelNames, labelString)
}
}
default:
// If neither of the previous cases are true (should be impossible)
// then we can just ignore it, because it's invalid too.
}
}
return labelNames
}

@ -0,0 +1,239 @@
package hclwrite
import (
"reflect"
"github.com/hashicorp/hcl/v2"
"github.com/hashicorp/hcl/v2/hclsyntax"
"github.com/zclconf/go-cty/cty"
)
type Body struct {
inTree
items nodeSet
}
func newBody() *Body {
return &Body{
inTree: newInTree(),
items: newNodeSet(),
}
}
func (b *Body) appendItem(c nodeContent) *node {
nn := b.children.Append(c)
b.items.Add(nn)
return nn
}
func (b *Body) appendItemNode(nn *node) *node {
nn.assertUnattached()
b.children.AppendNode(nn)
b.items.Add(nn)
return nn
}
// Clear removes all of the items from the body, making it empty.
func (b *Body) Clear() {
b.children.Clear()
}
func (b *Body) AppendUnstructuredTokens(ts Tokens) {
b.inTree.children.Append(ts)
}
// Attributes returns a new map of all of the attributes in the body, with
// the attribute names as the keys.
func (b *Body) Attributes() map[string]*Attribute {
ret := make(map[string]*Attribute)
for n := range b.items {
if attr, isAttr := n.content.(*Attribute); isAttr {
nameObj := attr.name.content.(*identifier)
name := string(nameObj.token.Bytes)
ret[name] = attr
}
}
return ret
}
// Blocks returns a new slice of all the blocks in the body.
func (b *Body) Blocks() []*Block {
ret := make([]*Block, 0, len(b.items))
for _, n := range b.items.List() {
if block, isBlock := n.content.(*Block); isBlock {
ret = append(ret, block)
}
}
return ret
}
// GetAttribute returns the attribute from the body that has the given name,
// or returns nil if there is currently no matching attribute.
func (b *Body) GetAttribute(name string) *Attribute {
for n := range b.items {
if attr, isAttr := n.content.(*Attribute); isAttr {
nameObj := attr.name.content.(*identifier)
if nameObj.hasName(name) {
// We've found it!
return attr
}
}
}
return nil
}
// getAttributeNode is like GetAttribute but it returns the node containing
// the selected attribute (if one is found) rather than the attribute itself.
func (b *Body) getAttributeNode(name string) *node {
for n := range b.items {
if attr, isAttr := n.content.(*Attribute); isAttr {
nameObj := attr.name.content.(*identifier)
if nameObj.hasName(name) {
// We've found it!
return n
}
}
}
return nil
}
// FirstMatchingBlock returns a first matching block from the body that has the
// given name and labels or returns nil if there is currently no matching
// block.
func (b *Body) FirstMatchingBlock(typeName string, labels []string) *Block {
for _, block := range b.Blocks() {
if typeName == block.Type() {
labelNames := block.Labels()
if len(labels) == 0 && len(labelNames) == 0 {
return block
}
if reflect.DeepEqual(labels, labelNames) {
return block
}
}
}
return nil
}
// RemoveBlock removes the given block from the body, if it's in that body.
// If it isn't present, this is a no-op.
//
// Returns true if it removed something, or false otherwise.
func (b *Body) RemoveBlock(block *Block) bool {
for n := range b.items {
if n.content == block {
n.Detach()
b.items.Remove(n)
return true
}
}
return false
}
// SetAttributeRaw either replaces the expression of an existing attribute
// of the given name or adds a new attribute definition to the end of the block,
// using the given tokens verbatim as the expression.
//
// The same caveats apply to this function as for NewExpressionRaw on which
// it is based. If possible, prefer to use SetAttributeValue or
// SetAttributeTraversal.
func (b *Body) SetAttributeRaw(name string, tokens Tokens) *Attribute {
attr := b.GetAttribute(name)
expr := NewExpressionRaw(tokens)
if attr != nil {
attr.expr = attr.expr.ReplaceWith(expr)
} else {
attr := newAttribute()
attr.init(name, expr)
b.appendItem(attr)
}
return attr
}
// SetAttributeValue either replaces the expression of an existing attribute
// of the given name or adds a new attribute definition to the end of the block.
//
// The value is given as a cty.Value, and must therefore be a literal. To set
// a variable reference or other traversal, use SetAttributeTraversal.
//
// The return value is the attribute that was either modified in-place or
// created.
func (b *Body) SetAttributeValue(name string, val cty.Value) *Attribute {
attr := b.GetAttribute(name)
expr := NewExpressionLiteral(val)
if attr != nil {
attr.expr = attr.expr.ReplaceWith(expr)
} else {
attr := newAttribute()
attr.init(name, expr)
b.appendItem(attr)
}
return attr
}
// SetAttributeTraversal either replaces the expression of an existing attribute
// of the given name or adds a new attribute definition to the end of the body.
//
// The new expression is given as a hcl.Traversal, which must be an absolute
// traversal. To set a literal value, use SetAttributeValue.
//
// The return value is the attribute that was either modified in-place or
// created.
func (b *Body) SetAttributeTraversal(name string, traversal hcl.Traversal) *Attribute {
attr := b.GetAttribute(name)
expr := NewExpressionAbsTraversal(traversal)
if attr != nil {
attr.expr = attr.expr.ReplaceWith(expr)
} else {
attr := newAttribute()
attr.init(name, expr)
b.appendItem(attr)
}
return attr
}
// RemoveAttribute removes the attribute with the given name from the body.
//
// The return value is the attribute that was removed, or nil if there was
// no such attribute (in which case the call was a no-op).
func (b *Body) RemoveAttribute(name string) *Attribute {
node := b.getAttributeNode(name)
if node == nil {
return nil
}
node.Detach()
b.items.Remove(node)
return node.content.(*Attribute)
}
// AppendBlock appends an existing block (which must not be already attached
// to a body) to the end of the receiving body.
func (b *Body) AppendBlock(block *Block) *Block {
b.appendItem(block)
return block
}
// AppendNewBlock appends a new nested block to the end of the receiving body
// with the given type name and labels.
func (b *Body) AppendNewBlock(typeName string, labels []string) *Block {
block := newBlock()
block.init(typeName, labels)
b.appendItem(block)
return block
}
// AppendNewline appends a newline token to th end of the receiving body,
// which generally serves as a separator between different sets of body
// contents.
func (b *Body) AppendNewline() {
b.AppendUnstructuredTokens(Tokens{
{
Type: hclsyntax.TokenNewline,
Bytes: []byte{'\n'},
},
})
}

@ -0,0 +1,224 @@
package hclwrite
import (
"fmt"
"github.com/hashicorp/hcl/v2"
"github.com/hashicorp/hcl/v2/hclsyntax"
"github.com/zclconf/go-cty/cty"
)
type Expression struct {
inTree
absTraversals nodeSet
}
func newExpression() *Expression {
return &Expression{
inTree: newInTree(),
absTraversals: newNodeSet(),
}
}
// NewExpressionRaw constructs an expression containing the given raw tokens.
//
// There is no automatic validation that the given tokens produce a valid
// expression. Callers of thus function must take care to produce invalid
// expression tokens. Where possible, use the higher-level functions
// NewExpressionLiteral or NewExpressionAbsTraversal instead.
//
// Because NewExpressionRaw does not interpret the given tokens in any way,
// an expression created by NewExpressionRaw will produce an empty result
// for calls to its method Variables, even if the given token sequence
// contains a subslice that would normally be interpreted as a traversal under
// parsing.
func NewExpressionRaw(tokens Tokens) *Expression {
expr := newExpression()
// We copy the tokens here in order to make sure that later mutations
// by the caller don't inadvertently cause our expression to become
// invalid.
copyTokens := make(Tokens, len(tokens))
copy(copyTokens, tokens)
expr.children.AppendUnstructuredTokens(copyTokens)
return expr
}
// NewExpressionLiteral constructs an an expression that represents the given
// literal value.
//
// Since an unknown value cannot be represented in source code, this function
// will panic if the given value is unknown or contains a nested unknown value.
// Use val.IsWhollyKnown before calling to be sure.
//
// HCL native syntax does not directly represent lists, maps, and sets, and
// instead relies on the automatic conversions to those collection types from
// either list or tuple constructor syntax. Therefore converting collection
// values to source code and re-reading them will lose type information, and
// the reader must provide a suitable type at decode time to recover the
// original value.
func NewExpressionLiteral(val cty.Value) *Expression {
toks := TokensForValue(val)
expr := newExpression()
expr.children.AppendUnstructuredTokens(toks)
return expr
}
// NewExpressionAbsTraversal constructs an expression that represents the
// given traversal, which must be absolute or this function will panic.
func NewExpressionAbsTraversal(traversal hcl.Traversal) *Expression {
if traversal.IsRelative() {
panic("can't construct expression from relative traversal")
}
physT := newTraversal()
rootName := traversal.RootName()
steps := traversal[1:]
{
tn := newTraverseName()
tn.name = tn.children.Append(newIdentifier(&Token{
Type: hclsyntax.TokenIdent,
Bytes: []byte(rootName),
}))
physT.steps.Add(physT.children.Append(tn))
}
for _, step := range steps {
switch ts := step.(type) {
case hcl.TraverseAttr:
tn := newTraverseName()
tn.children.AppendUnstructuredTokens(Tokens{
{
Type: hclsyntax.TokenDot,
Bytes: []byte{'.'},
},
})
tn.name = tn.children.Append(newIdentifier(&Token{
Type: hclsyntax.TokenIdent,
Bytes: []byte(ts.Name),
}))
physT.steps.Add(physT.children.Append(tn))
case hcl.TraverseIndex:
ti := newTraverseIndex()
ti.children.AppendUnstructuredTokens(Tokens{
{
Type: hclsyntax.TokenOBrack,
Bytes: []byte{'['},
},
})
indexExpr := NewExpressionLiteral(ts.Key)
ti.key = ti.children.Append(indexExpr)
ti.children.AppendUnstructuredTokens(Tokens{
{
Type: hclsyntax.TokenCBrack,
Bytes: []byte{']'},
},
})
physT.steps.Add(physT.children.Append(ti))
}
}
expr := newExpression()
expr.absTraversals.Add(expr.children.Append(physT))
return expr
}
// Variables returns the absolute traversals that exist within the receiving
// expression.
func (e *Expression) Variables() []*Traversal {
nodes := e.absTraversals.List()
ret := make([]*Traversal, len(nodes))
for i, node := range nodes {
ret[i] = node.content.(*Traversal)
}
return ret
}
// RenameVariablePrefix examines each of the absolute traversals in the
// receiving expression to see if they have the given sequence of names as
// a prefix prefix. If so, they are updated in place to have the given
// replacement names instead of that prefix.
//
// This can be used to implement symbol renaming. The calling application can
// visit all relevant expressions in its input and apply the same renaming
// to implement a global symbol rename.
//
// The search and replacement traversals must be the same length, or this
// method will panic. Only attribute access operations can be matched and
// replaced. Index steps never match the prefix.
func (e *Expression) RenameVariablePrefix(search, replacement []string) {
if len(search) != len(replacement) {
panic(fmt.Sprintf("search and replacement length mismatch (%d and %d)", len(search), len(replacement)))
}
Traversals:
for node := range e.absTraversals {
traversal := node.content.(*Traversal)
if len(traversal.steps) < len(search) {
// If it's shorter then it can't have our prefix
continue
}
stepNodes := traversal.steps.List()
for i, name := range search {
step, isName := stepNodes[i].content.(*TraverseName)
if !isName {
continue Traversals // only name nodes can match
}
foundNameBytes := step.name.content.(*identifier).token.Bytes
if len(foundNameBytes) != len(name) {
continue Traversals
}
if string(foundNameBytes) != name {
continue Traversals
}
}
// If we get here then the prefix matched, so now we'll swap in
// the replacement strings.
for i, name := range replacement {
step := stepNodes[i].content.(*TraverseName)
token := step.name.content.(*identifier).token
token.Bytes = []byte(name)
}
}
}
// Traversal represents a sequence of variable, attribute, and/or index
// operations.
type Traversal struct {
inTree
steps nodeSet
}
func newTraversal() *Traversal {
return &Traversal{
inTree: newInTree(),
steps: newNodeSet(),
}
}
type TraverseName struct {
inTree
name *node
}
func newTraverseName() *TraverseName {
return &TraverseName{
inTree: newInTree(),
}
}
type TraverseIndex struct {
inTree
key *node
}
func newTraverseIndex() *TraverseIndex {
return &TraverseIndex{
inTree: newInTree(),
}
}

@ -0,0 +1,11 @@
// Package hclwrite deals with the problem of generating HCL configuration
// and of making specific surgical changes to existing HCL configurations.
//
// It operates at a different level of abstraction than the main HCL parser
// and AST, since details such as the placement of comments and newlines
// are preserved when unchanged.
//
// The hclwrite API follows a similar principle to XML/HTML DOM, allowing nodes
// to be read out, created and inserted, etc. Nodes represent syntax constructs
// rather than semantic concepts.
package hclwrite

@ -0,0 +1,463 @@
package hclwrite
import (
"github.com/hashicorp/hcl/v2/hclsyntax"
)
var inKeyword = hclsyntax.Keyword([]byte{'i', 'n'})
// placeholder token used when we don't have a token but we don't want
// to pass a real "nil" and complicate things with nil pointer checks
var nilToken = &Token{
Type: hclsyntax.TokenNil,
Bytes: []byte{},
SpacesBefore: 0,
}
// format rewrites tokens within the given sequence, in-place, to adjust the
// whitespace around their content to achieve canonical formatting.
func format(tokens Tokens) {
// Formatting is a multi-pass process. More details on the passes below,
// but this is the overview:
// - adjust the leading space on each line to create appropriate
// indentation
// - adjust spaces between tokens in a single cell using a set of rules
// - adjust the leading space in the "assign" and "comment" cells on each
// line to vertically align with neighboring lines.
// All of these steps operate in-place on the given tokens, so a caller
// may collect a flat sequence of all of the tokens underlying an AST
// and pass it here and we will then indirectly modify the AST itself.
// Formatting must change only whitespace. Specifically, that means
// changing the SpacesBefore attribute on a token while leaving the
// other token attributes unchanged.
lines := linesForFormat(tokens)
formatIndent(lines)
formatSpaces(lines)
formatCells(lines)
}
func formatIndent(lines []formatLine) {
// Our methodology for indents is to take the input one line at a time
// and count the bracketing delimiters on each line. If a line has a net
// increase in open brackets, we increase the indent level by one and
// remember how many new openers we had. If the line has a net _decrease_,
// we'll compare it to the most recent number of openers and decrease the
// dedent level by one each time we pass an indent level remembered
// earlier.
// The "indent stack" used here allows for us to recognize degenerate
// input where brackets are not symmetrical within lines and avoid
// pushing things too far left or right, creating confusion.
// We'll start our indent stack at a reasonable capacity to minimize the
// chance of us needing to grow it; 10 here means 10 levels of indent,
// which should be more than enough for reasonable HCL uses.
indents := make([]int, 0, 10)
for i := range lines {
line := &lines[i]
if len(line.lead) == 0 {
continue
}
if line.lead[0].Type == hclsyntax.TokenNewline {
// Never place spaces before a newline
line.lead[0].SpacesBefore = 0
continue
}
netBrackets := 0
for _, token := range line.lead {
netBrackets += tokenBracketChange(token)
if token.Type == hclsyntax.TokenOHeredoc {
break
}
}
for _, token := range line.assign {
netBrackets += tokenBracketChange(token)
}
switch {
case netBrackets > 0:
line.lead[0].SpacesBefore = 2 * len(indents)
indents = append(indents, netBrackets)
case netBrackets < 0:
closed := -netBrackets
for closed > 0 && len(indents) > 0 {
switch {
case closed > indents[len(indents)-1]:
closed -= indents[len(indents)-1]
indents = indents[:len(indents)-1]
case closed < indents[len(indents)-1]:
indents[len(indents)-1] -= closed
closed = 0
default:
indents = indents[:len(indents)-1]
closed = 0
}
}
line.lead[0].SpacesBefore = 2 * len(indents)
default:
line.lead[0].SpacesBefore = 2 * len(indents)
}
}
}
func formatSpaces(lines []formatLine) {
for _, line := range lines {
for i, token := range line.lead {
var before, after *Token
if i > 0 {
before = line.lead[i-1]
} else {
before = nilToken
}
if i < (len(line.lead) - 1) {
after = line.lead[i+1]
} else {
after = nilToken
}
if spaceAfterToken(token, before, after) {
after.SpacesBefore = 1
} else {
after.SpacesBefore = 0
}
}
for i, token := range line.assign {
if i == 0 {
// first token in "assign" always has one space before to
// separate the equals sign from what it's assigning.
token.SpacesBefore = 1
}
var before, after *Token
if i > 0 {
before = line.assign[i-1]
} else {
before = nilToken
}
if i < (len(line.assign) - 1) {
after = line.assign[i+1]
} else {
after = nilToken
}
if spaceAfterToken(token, before, after) {
after.SpacesBefore = 1
} else {
after.SpacesBefore = 0
}
}
}
}
func formatCells(lines []formatLine) {
chainStart := -1
maxColumns := 0
// We'll deal with the "assign" cell first, since moving that will
// also impact the "comment" cell.
closeAssignChain := func(i int) {
for _, chainLine := range lines[chainStart:i] {
columns := chainLine.lead.Columns()
spaces := (maxColumns - columns) + 1
chainLine.assign[0].SpacesBefore = spaces
}
chainStart = -1
maxColumns = 0
}
for i, line := range lines {
if line.assign == nil {
if chainStart != -1 {
closeAssignChain(i)
}
} else {
if chainStart == -1 {
chainStart = i
}
columns := line.lead.Columns()
if columns > maxColumns {
maxColumns = columns
}
}
}
if chainStart != -1 {
closeAssignChain(len(lines))
}
// Now we'll deal with the comments
closeCommentChain := func(i int) {
for _, chainLine := range lines[chainStart:i] {
columns := chainLine.lead.Columns() + chainLine.assign.Columns()
spaces := (maxColumns - columns) + 1
chainLine.comment[0].SpacesBefore = spaces
}
chainStart = -1
maxColumns = 0
}
for i, line := range lines {
if line.comment == nil {
if chainStart != -1 {
closeCommentChain(i)
}
} else {
if chainStart == -1 {
chainStart = i
}
columns := line.lead.Columns() + line.assign.Columns()
if columns > maxColumns {
maxColumns = columns
}
}
}
if chainStart != -1 {
closeCommentChain(len(lines))
}
}
// spaceAfterToken decides whether a particular subject token should have a
// space after it when surrounded by the given before and after tokens.
// "before" can be TokenNil, if the subject token is at the start of a sequence.
func spaceAfterToken(subject, before, after *Token) bool {
switch {
case after.Type == hclsyntax.TokenNewline || after.Type == hclsyntax.TokenNil:
// Never add spaces before a newline
return false
case subject.Type == hclsyntax.TokenIdent && after.Type == hclsyntax.TokenOParen:
// Don't split a function name from open paren in a call
return false
case subject.Type == hclsyntax.TokenDot || after.Type == hclsyntax.TokenDot:
// Don't use spaces around attribute access dots
return false
case after.Type == hclsyntax.TokenComma || after.Type == hclsyntax.TokenEllipsis:
// No space right before a comma or ... in an argument list
return false
case subject.Type == hclsyntax.TokenComma:
// Always a space after a comma
return true
case subject.Type == hclsyntax.TokenQuotedLit || subject.Type == hclsyntax.TokenStringLit || subject.Type == hclsyntax.TokenOQuote || subject.Type == hclsyntax.TokenOHeredoc || after.Type == hclsyntax.TokenQuotedLit || after.Type == hclsyntax.TokenStringLit || after.Type == hclsyntax.TokenCQuote || after.Type == hclsyntax.TokenCHeredoc:
// No extra spaces within templates
return false
case inKeyword.TokenMatches(subject.asHCLSyntax()) && before.Type == hclsyntax.TokenIdent:
// This is a special case for inside for expressions where a user
// might want to use a literal tuple constructor:
// [for x in [foo]: x]
// ... in that case, we would normally produce in[foo] thinking that
// in is a reference, but we'll recognize it as a keyword here instead
// to make the result less confusing.
return true
case after.Type == hclsyntax.TokenOBrack && (subject.Type == hclsyntax.TokenIdent || subject.Type == hclsyntax.TokenNumberLit || tokenBracketChange(subject) < 0):
return false
case subject.Type == hclsyntax.TokenMinus:
// Since a minus can either be subtraction or negation, and the latter
// should _not_ have a space after it, we need to use some heuristics
// to decide which case this is.
// We guess that we have a negation if the token before doesn't look
// like it could be the end of an expression.
switch before.Type {
case hclsyntax.TokenNil:
// Minus at the start of input must be a negation
return false
case hclsyntax.TokenOParen, hclsyntax.TokenOBrace, hclsyntax.TokenOBrack, hclsyntax.TokenEqual, hclsyntax.TokenColon, hclsyntax.TokenComma, hclsyntax.TokenQuestion:
// Minus immediately after an opening bracket or separator must be a negation.
return false
case hclsyntax.TokenPlus, hclsyntax.TokenStar, hclsyntax.TokenSlash, hclsyntax.TokenPercent, hclsyntax.TokenMinus:
// Minus immediately after another arithmetic operator must be negation.
return false
case hclsyntax.TokenEqualOp, hclsyntax.TokenNotEqual, hclsyntax.TokenGreaterThan, hclsyntax.TokenGreaterThanEq, hclsyntax.TokenLessThan, hclsyntax.TokenLessThanEq:
// Minus immediately after another comparison operator must be negation.
return false
case hclsyntax.TokenAnd, hclsyntax.TokenOr, hclsyntax.TokenBang:
// Minus immediately after logical operator doesn't make sense but probably intended as negation.
return false
default:
return true
}
case subject.Type == hclsyntax.TokenOBrace || after.Type == hclsyntax.TokenCBrace:
// Unlike other bracket types, braces have spaces on both sides of them,
// both in single-line nested blocks foo { bar = baz } and in object
// constructor expressions foo = { bar = baz }.
if subject.Type == hclsyntax.TokenOBrace && after.Type == hclsyntax.TokenCBrace {
// An open brace followed by a close brace is an exception, however.
// e.g. foo {} rather than foo { }
return false
}
return true
// In the unlikely event that an interpolation expression is just
// a single object constructor, we'll put a space between the ${ and
// the following { to make this more obvious, and then the same
// thing for the two braces at the end.
case (subject.Type == hclsyntax.TokenTemplateInterp || subject.Type == hclsyntax.TokenTemplateControl) && after.Type == hclsyntax.TokenOBrace:
return true
case subject.Type == hclsyntax.TokenCBrace && after.Type == hclsyntax.TokenTemplateSeqEnd:
return true
// Don't add spaces between interpolated items
case subject.Type == hclsyntax.TokenTemplateSeqEnd && (after.Type == hclsyntax.TokenTemplateInterp || after.Type == hclsyntax.TokenTemplateControl):
return false
case tokenBracketChange(subject) > 0:
// No spaces after open brackets
return false
case tokenBracketChange(after) < 0:
// No spaces before close brackets
return false
default:
// Most tokens are space-separated
return true
}
}
func linesForFormat(tokens Tokens) []formatLine {
if len(tokens) == 0 {
return make([]formatLine, 0)
}
// first we'll count our lines, so we can allocate the array for them in
// a single block. (We want to minimize memory pressure in this codepath,
// so it can be run somewhat-frequently by editor integrations.)
lineCount := 1 // if there are zero newlines then there is one line
for _, tok := range tokens {
if tokenIsNewline(tok) {
lineCount++
}
}
// To start, we'll just put everything in the "lead" cell on each line,
// and then do another pass over the lines afterwards to adjust.
lines := make([]formatLine, lineCount)
li := 0
lineStart := 0
for i, tok := range tokens {
if tok.Type == hclsyntax.TokenEOF {
// The EOF token doesn't belong to any line, and terminates the
// token sequence.
lines[li].lead = tokens[lineStart:i]
break
}
if tokenIsNewline(tok) {
lines[li].lead = tokens[lineStart : i+1]
lineStart = i + 1
li++
}
}
// If a set of tokens doesn't end in TokenEOF (e.g. because it's a
// fragment of tokens from the middle of a file) then we might fall
// out here with a line still pending.
if lineStart < len(tokens) {
lines[li].lead = tokens[lineStart:]
if lines[li].lead[len(lines[li].lead)-1].Type == hclsyntax.TokenEOF {
lines[li].lead = lines[li].lead[:len(lines[li].lead)-1]
}
}
// Now we'll pick off any trailing comments and attribute assignments
// to shuffle off into the "comment" and "assign" cells.
for i := range lines {
line := &lines[i]
if len(line.lead) == 0 {
// if the line is empty then there's nothing for us to do
// (this should happen only for the final line, because all other
// lines would have a newline token of some kind)
continue
}
if len(line.lead) > 1 && line.lead[len(line.lead)-1].Type == hclsyntax.TokenComment {
line.comment = line.lead[len(line.lead)-1:]
line.lead = line.lead[:len(line.lead)-1]
}
for i, tok := range line.lead {
if i > 0 && tok.Type == hclsyntax.TokenEqual {
// We only move the tokens into "assign" if the RHS seems to
// be a whole expression, which we determine by counting
// brackets. If there's a net positive number of brackets
// then that suggests we're introducing a multi-line expression.
netBrackets := 0
for _, token := range line.lead[i:] {
netBrackets += tokenBracketChange(token)
}
if netBrackets == 0 {
line.assign = line.lead[i:]
line.lead = line.lead[:i]
}
break
}
}
}
return lines
}
func tokenIsNewline(tok *Token) bool {
if tok.Type == hclsyntax.TokenNewline {
return true
} else if tok.Type == hclsyntax.TokenComment {
// Single line tokens (# and //) consume their terminating newline,
// so we need to treat them as newline tokens as well.
if len(tok.Bytes) > 0 && tok.Bytes[len(tok.Bytes)-1] == '\n' {
return true
}
}
return false
}
func tokenBracketChange(tok *Token) int {
switch tok.Type {
case hclsyntax.TokenOBrace, hclsyntax.TokenOBrack, hclsyntax.TokenOParen, hclsyntax.TokenTemplateControl, hclsyntax.TokenTemplateInterp:
return 1
case hclsyntax.TokenCBrace, hclsyntax.TokenCBrack, hclsyntax.TokenCParen, hclsyntax.TokenTemplateSeqEnd:
return -1
default:
return 0
}
}
// formatLine represents a single line of source code for formatting purposes,
// splitting its tokens into up to three "cells":
//
// lead: always present, representing everything up to one of the others
// assign: if line contains an attribute assignment, represents the tokens
// starting at (and including) the equals symbol
// comment: if line contains any non-comment tokens and ends with a
// single-line comment token, represents the comment.
//
// When formatting, the leading spaces of the first tokens in each of these
// cells is adjusted to align vertically their occurences on consecutive
// rows.
type formatLine struct {
lead Tokens
assign Tokens
comment Tokens
}

@ -0,0 +1,252 @@
package hclwrite
import (
"fmt"
"unicode"
"unicode/utf8"
"github.com/hashicorp/hcl/v2"
"github.com/hashicorp/hcl/v2/hclsyntax"
"github.com/zclconf/go-cty/cty"
)
// TokensForValue returns a sequence of tokens that represents the given
// constant value.
//
// This function only supports types that are used by HCL. In particular, it
// does not support capsule types and will panic if given one.
//
// It is not possible to express an unknown value in source code, so this
// function will panic if the given value is unknown or contains any unknown
// values. A caller can call the value's IsWhollyKnown method to verify that
// no unknown values are present before calling TokensForValue.
func TokensForValue(val cty.Value) Tokens {
toks := appendTokensForValue(val, nil)
format(toks) // fiddle with the SpacesBefore field to get canonical spacing
return toks
}
// TokensForTraversal returns a sequence of tokens that represents the given
// traversal.
//
// If the traversal is absolute then the result is a self-contained, valid
// reference expression. If the traversal is relative then the returned tokens
// could be appended to some other expression tokens to traverse into the
// represented expression.
func TokensForTraversal(traversal hcl.Traversal) Tokens {
toks := appendTokensForTraversal(traversal, nil)
format(toks) // fiddle with the SpacesBefore field to get canonical spacing
return toks
}
func appendTokensForValue(val cty.Value, toks Tokens) Tokens {
switch {
case !val.IsKnown():
panic("cannot produce tokens for unknown value")
case val.IsNull():
toks = append(toks, &Token{
Type: hclsyntax.TokenIdent,
Bytes: []byte(`null`),
})
case val.Type() == cty.Bool:
var src []byte
if val.True() {
src = []byte(`true`)
} else {
src = []byte(`false`)
}
toks = append(toks, &Token{
Type: hclsyntax.TokenIdent,
Bytes: src,
})
case val.Type() == cty.Number:
bf := val.AsBigFloat()
srcStr := bf.Text('f', -1)
toks = append(toks, &Token{
Type: hclsyntax.TokenNumberLit,
Bytes: []byte(srcStr),
})
case val.Type() == cty.String:
// TODO: If it's a multi-line string ending in a newline, format
// it as a HEREDOC instead.
src := escapeQuotedStringLit(val.AsString())
toks = append(toks, &Token{
Type: hclsyntax.TokenOQuote,
Bytes: []byte{'"'},
})
if len(src) > 0 {
toks = append(toks, &Token{
Type: hclsyntax.TokenQuotedLit,
Bytes: src,
})
}
toks = append(toks, &Token{
Type: hclsyntax.TokenCQuote,
Bytes: []byte{'"'},
})
case val.Type().IsListType() || val.Type().IsSetType() || val.Type().IsTupleType():
toks = append(toks, &Token{
Type: hclsyntax.TokenOBrack,
Bytes: []byte{'['},
})
i := 0
for it := val.ElementIterator(); it.Next(); {
if i > 0 {
toks = append(toks, &Token{
Type: hclsyntax.TokenComma,
Bytes: []byte{','},
})
}
_, eVal := it.Element()
toks = appendTokensForValue(eVal, toks)
i++
}
toks = append(toks, &Token{
Type: hclsyntax.TokenCBrack,
Bytes: []byte{']'},
})
case val.Type().IsMapType() || val.Type().IsObjectType():
toks = append(toks, &Token{
Type: hclsyntax.TokenOBrace,
Bytes: []byte{'{'},
})
i := 0
for it := val.ElementIterator(); it.Next(); {
if i > 0 {
toks = append(toks, &Token{
Type: hclsyntax.TokenComma,
Bytes: []byte{','},
})
}
eKey, eVal := it.Element()
if hclsyntax.ValidIdentifier(eKey.AsString()) {
toks = append(toks, &Token{
Type: hclsyntax.TokenIdent,
Bytes: []byte(eKey.AsString()),
})
} else {
toks = appendTokensForValue(eKey, toks)
}
toks = append(toks, &Token{
Type: hclsyntax.TokenEqual,
Bytes: []byte{'='},
})
toks = appendTokensForValue(eVal, toks)
i++
}
toks = append(toks, &Token{
Type: hclsyntax.TokenCBrace,
Bytes: []byte{'}'},
})
default:
panic(fmt.Sprintf("cannot produce tokens for %#v", val))
}
return toks
}
func appendTokensForTraversal(traversal hcl.Traversal, toks Tokens) Tokens {
for _, step := range traversal {
toks = appendTokensForTraversalStep(step, toks)
}
return toks
}
func appendTokensForTraversalStep(step hcl.Traverser, toks Tokens) Tokens {
switch ts := step.(type) {
case hcl.TraverseRoot:
toks = append(toks, &Token{
Type: hclsyntax.TokenIdent,
Bytes: []byte(ts.Name),
})
case hcl.TraverseAttr:
toks = append(
toks,
&Token{
Type: hclsyntax.TokenDot,
Bytes: []byte{'.'},
},
&Token{
Type: hclsyntax.TokenIdent,
Bytes: []byte(ts.Name),
},
)
case hcl.TraverseIndex:
toks = append(toks, &Token{
Type: hclsyntax.TokenOBrack,
Bytes: []byte{'['},
})
toks = appendTokensForValue(ts.Key, toks)
toks = append(toks, &Token{
Type: hclsyntax.TokenCBrack,
Bytes: []byte{']'},
})
default:
panic(fmt.Sprintf("unsupported traversal step type %T", step))
}
return toks
}
func escapeQuotedStringLit(s string) []byte {
if len(s) == 0 {
return nil
}
buf := make([]byte, 0, len(s))
for i, r := range s {
switch r {
case '\n':
buf = append(buf, '\\', 'n')
case '\r':
buf = append(buf, '\\', 'r')
case '\t':
buf = append(buf, '\\', 't')
case '"':
buf = append(buf, '\\', '"')
case '\\':
buf = append(buf, '\\', '\\')
case '$', '%':
buf = appendRune(buf, r)
remain := s[i+1:]
if len(remain) > 0 && remain[0] == '{' {
// Double up our template introducer symbol to escape it.
buf = appendRune(buf, r)
}
default:
if !unicode.IsPrint(r) {
var fmted string
if r < 65536 {
fmted = fmt.Sprintf("\\u%04x", r)
} else {
fmted = fmt.Sprintf("\\U%08x", r)
}
buf = append(buf, fmted...)
} else {
buf = appendRune(buf, r)
}
}
}
return buf
}
func appendRune(b []byte, r rune) []byte {
l := utf8.RuneLen(r)
for i := 0; i < l; i++ {
b = append(b, 0) // make room at the end of our buffer
}
ch := b[len(b)-l:]
utf8.EncodeRune(ch, r)
return b
}

@ -0,0 +1,23 @@
package hclwrite
import (
"github.com/hashicorp/hcl/v2/hclsyntax"
)
type nativeNodeSorter struct {
Nodes []hclsyntax.Node
}
func (s nativeNodeSorter) Len() int {
return len(s.Nodes)
}
func (s nativeNodeSorter) Less(i, j int) bool {
rangeI := s.Nodes[i].Range()
rangeJ := s.Nodes[j].Range()
return rangeI.Start.Byte < rangeJ.Start.Byte
}
func (s nativeNodeSorter) Swap(i, j int) {
s.Nodes[i], s.Nodes[j] = s.Nodes[j], s.Nodes[i]
}

@ -0,0 +1,260 @@
package hclwrite
import (
"fmt"
"github.com/google/go-cmp/cmp"
)
// node represents a node in the AST.
type node struct {
content nodeContent
list *nodes
before, after *node
}
func newNode(c nodeContent) *node {
return &node{
content: c,
}
}
func (n *node) Equal(other *node) bool {
return cmp.Equal(n.content, other.content)
}
func (n *node) BuildTokens(to Tokens) Tokens {
return n.content.BuildTokens(to)
}
// Detach removes the receiver from the list it currently belongs to. If the
// node is not currently in a list, this is a no-op.
func (n *node) Detach() {
if n.list == nil {
return
}
if n.before != nil {
n.before.after = n.after
}
if n.after != nil {
n.after.before = n.before
}
if n.list.first == n {
n.list.first = n.after
}
if n.list.last == n {
n.list.last = n.before
}
n.list = nil
n.before = nil
n.after = nil
}
// ReplaceWith removes the receiver from the list it currently belongs to and
// inserts a new node with the given content in its place. If the node is not
// currently in a list, this function will panic.
//
// The return value is the newly-constructed node, containing the given content.
// After this function returns, the reciever is no longer attached to a list.
func (n *node) ReplaceWith(c nodeContent) *node {
if n.list == nil {
panic("can't replace node that is not in a list")
}
before := n.before
after := n.after
list := n.list
n.before, n.after, n.list = nil, nil, nil
nn := newNode(c)
nn.before = before
nn.after = after
nn.list = list
if before != nil {
before.after = nn
}
if after != nil {
after.before = nn
}
return nn
}
func (n *node) assertUnattached() {
if n.list != nil {
panic(fmt.Sprintf("attempt to attach already-attached node %#v", n))
}
}
// nodeContent is the interface type implemented by all AST content types.
type nodeContent interface {
walkChildNodes(w internalWalkFunc)
BuildTokens(to Tokens) Tokens
}
// nodes is a list of nodes.
type nodes struct {
first, last *node
}
func (ns *nodes) BuildTokens(to Tokens) Tokens {
for n := ns.first; n != nil; n = n.after {
to = n.BuildTokens(to)
}
return to
}
func (ns *nodes) Clear() {
ns.first = nil
ns.last = nil
}
func (ns *nodes) Append(c nodeContent) *node {
n := &node{
content: c,
}
ns.AppendNode(n)
n.list = ns
return n
}
func (ns *nodes) AppendNode(n *node) {
if ns.last != nil {
n.before = ns.last
ns.last.after = n
}
n.list = ns
ns.last = n
if ns.first == nil {
ns.first = n
}
}
func (ns *nodes) AppendUnstructuredTokens(tokens Tokens) *node {
if len(tokens) == 0 {
return nil
}
n := newNode(tokens)
ns.AppendNode(n)
n.list = ns
return n
}
// FindNodeWithContent searches the nodes for a node whose content equals
// the given content. If it finds one then it returns it. Otherwise it returns
// nil.
func (ns *nodes) FindNodeWithContent(content nodeContent) *node {
for n := ns.first; n != nil; n = n.after {
if n.content == content {
return n
}
}
return nil
}
// nodeSet is an unordered set of nodes. It is used to describe a set of nodes
// that all belong to the same list that have some role or characteristic
// in common.
type nodeSet map[*node]struct{}
func newNodeSet() nodeSet {
return make(nodeSet)
}
func (ns nodeSet) Has(n *node) bool {
if ns == nil {
return false
}
_, exists := ns[n]
return exists
}
func (ns nodeSet) Add(n *node) {
ns[n] = struct{}{}
}
func (ns nodeSet) Remove(n *node) {
delete(ns, n)
}
func (ns nodeSet) List() []*node {
if len(ns) == 0 {
return nil
}
ret := make([]*node, 0, len(ns))
// Determine which list we are working with. We assume here that all of
// the nodes belong to the same list, since that is part of the contract
// for nodeSet.
var list *nodes
for n := range ns {
list = n.list
break
}
// We recover the order by iterating over the whole list. This is not
// the most efficient way to do it, but our node lists should always be
// small so not worth making things more complex.
for n := list.first; n != nil; n = n.after {
if ns.Has(n) {
ret = append(ret, n)
}
}
return ret
}
// FindNodeWithContent searches the nodes for a node whose content equals
// the given content. If it finds one then it returns it. Otherwise it returns
// nil.
func (ns nodeSet) FindNodeWithContent(content nodeContent) *node {
for n := range ns {
if n.content == content {
return n
}
}
return nil
}
type internalWalkFunc func(*node)
// inTree can be embedded into a content struct that has child nodes to get
// a standard implementation of the NodeContent interface and a record of
// a potential parent node.
type inTree struct {
parent *node
children *nodes
}
func newInTree() inTree {
return inTree{
children: &nodes{},
}
}
func (it *inTree) assertUnattached() {
if it.parent != nil {
panic(fmt.Sprintf("node is already attached to %T", it.parent.content))
}
}
func (it *inTree) walkChildNodes(w internalWalkFunc) {
for n := it.children.first; n != nil; n = n.after {
w(n)
}
}
func (it *inTree) BuildTokens(to Tokens) Tokens {
for n := it.children.first; n != nil; n = n.after {
to = n.BuildTokens(to)
}
return to
}
// leafNode can be embedded into a content struct to give it a do-nothing
// implementation of walkChildNodes
type leafNode struct {
}
func (n *leafNode) walkChildNodes(w internalWalkFunc) {
}

@ -0,0 +1,599 @@
package hclwrite
import (
"fmt"
"sort"
"github.com/hashicorp/hcl/v2"
"github.com/hashicorp/hcl/v2/hclsyntax"
"github.com/zclconf/go-cty/cty"
)
// Our "parser" here is actually not doing any parsing of its own. Instead,
// it leans on the native parser in hclsyntax, and then uses the source ranges
// from the AST to partition the raw token sequence to match the raw tokens
// up to AST nodes.
//
// This strategy feels somewhat counter-intuitive, since most of the work the
// parser does is thrown away here, but this strategy is chosen because the
// normal parsing work done by hclsyntax is considered to be the "main case",
// while modifying and re-printing source is more of an edge case, used only
// in ancillary tools, and so it's good to keep all the main parsing logic
// with the main case but keep all of the extra complexity of token wrangling
// out of the main parser, which is already rather complex just serving the
// use-cases it already serves.
//
// If the parsing step produces any errors, the returned File is nil because
// we can't reliably extract tokens from the partial AST produced by an
// erroneous parse.
func parse(src []byte, filename string, start hcl.Pos) (*File, hcl.Diagnostics) {
file, diags := hclsyntax.ParseConfig(src, filename, start)
if diags.HasErrors() {
return nil, diags
}
// To do our work here, we use the "native" tokens (those from hclsyntax)
// to match against source ranges in the AST, but ultimately produce
// slices from our sequence of "writer" tokens, which contain only
// *relative* position information that is more appropriate for
// transformation/writing use-cases.
nativeTokens, diags := hclsyntax.LexConfig(src, filename, start)
if diags.HasErrors() {
// should never happen, since we would've caught these diags in
// the first call above.
return nil, diags
}
writerTokens := writerTokens(nativeTokens)
from := inputTokens{
nativeTokens: nativeTokens,
writerTokens: writerTokens,
}
before, root, after := parseBody(file.Body.(*hclsyntax.Body), from)
ret := &File{
inTree: newInTree(),
srcBytes: src,
body: root,
}
nodes := ret.inTree.children
nodes.Append(before.Tokens())
nodes.AppendNode(root)
nodes.Append(after.Tokens())
return ret, diags
}
type inputTokens struct {
nativeTokens hclsyntax.Tokens
writerTokens Tokens
}
func (it inputTokens) Partition(rng hcl.Range) (before, within, after inputTokens) {
start, end := partitionTokens(it.nativeTokens, rng)
before = it.Slice(0, start)
within = it.Slice(start, end)
after = it.Slice(end, len(it.nativeTokens))
return
}
func (it inputTokens) PartitionType(ty hclsyntax.TokenType) (before, within, after inputTokens) {
for i, t := range it.writerTokens {
if t.Type == ty {
return it.Slice(0, i), it.Slice(i, i+1), it.Slice(i+1, len(it.nativeTokens))
}
}
panic(fmt.Sprintf("didn't find any token of type %s", ty))
}
func (it inputTokens) PartitionTypeSingle(ty hclsyntax.TokenType) (before inputTokens, found *Token, after inputTokens) {
before, within, after := it.PartitionType(ty)
if within.Len() != 1 {
panic("PartitionType found more than one token")
}
return before, within.Tokens()[0], after
}
// PartitionIncludeComments is like Partition except the returned "within"
// range includes any lead and line comments associated with the range.
func (it inputTokens) PartitionIncludingComments(rng hcl.Range) (before, within, after inputTokens) {
start, end := partitionTokens(it.nativeTokens, rng)
start = partitionLeadCommentTokens(it.nativeTokens[:start])
_, afterNewline := partitionLineEndTokens(it.nativeTokens[end:])
end += afterNewline
before = it.Slice(0, start)
within = it.Slice(start, end)
after = it.Slice(end, len(it.nativeTokens))
return
}
// PartitionBlockItem is similar to PartitionIncludeComments but it returns
// the comments as separate token sequences so that they can be captured into
// AST attributes. It makes assumptions that apply only to block items, so
// should not be used for other constructs.
func (it inputTokens) PartitionBlockItem(rng hcl.Range) (before, leadComments, within, lineComments, newline, after inputTokens) {
before, within, after = it.Partition(rng)
before, leadComments = before.PartitionLeadComments()
lineComments, newline, after = after.PartitionLineEndTokens()
return
}
func (it inputTokens) PartitionLeadComments() (before, within inputTokens) {
start := partitionLeadCommentTokens(it.nativeTokens)
before = it.Slice(0, start)
within = it.Slice(start, len(it.nativeTokens))
return
}
func (it inputTokens) PartitionLineEndTokens() (comments, newline, after inputTokens) {
afterComments, afterNewline := partitionLineEndTokens(it.nativeTokens)
comments = it.Slice(0, afterComments)
newline = it.Slice(afterComments, afterNewline)
after = it.Slice(afterNewline, len(it.nativeTokens))
return
}
func (it inputTokens) Slice(start, end int) inputTokens {
// When we slice, we create a new slice with no additional capacity because
// we expect that these slices will be mutated in order to insert
// new code into the AST, and we want to ensure that a new underlying
// array gets allocated in that case, rather than writing into some
// following slice and corrupting it.
return inputTokens{
nativeTokens: it.nativeTokens[start:end:end],
writerTokens: it.writerTokens[start:end:end],
}
}
func (it inputTokens) Len() int {
return len(it.nativeTokens)
}
func (it inputTokens) Tokens() Tokens {
return it.writerTokens
}
func (it inputTokens) Types() []hclsyntax.TokenType {
ret := make([]hclsyntax.TokenType, len(it.nativeTokens))
for i, tok := range it.nativeTokens {
ret[i] = tok.Type
}
return ret
}
// parseBody locates the given body within the given input tokens and returns
// the resulting *Body object as well as the tokens that appeared before and
// after it.
func parseBody(nativeBody *hclsyntax.Body, from inputTokens) (inputTokens, *node, inputTokens) {
before, within, after := from.PartitionIncludingComments(nativeBody.SrcRange)
// The main AST doesn't retain the original source ordering of the
// body items, so we need to reconstruct that ordering by inspecting
// their source ranges.
nativeItems := make([]hclsyntax.Node, 0, len(nativeBody.Attributes)+len(nativeBody.Blocks))
for _, nativeAttr := range nativeBody.Attributes {
nativeItems = append(nativeItems, nativeAttr)
}
for _, nativeBlock := range nativeBody.Blocks {
nativeItems = append(nativeItems, nativeBlock)
}
sort.Sort(nativeNodeSorter{nativeItems})
body := &Body{
inTree: newInTree(),
items: newNodeSet(),
}
remain := within
for _, nativeItem := range nativeItems {
beforeItem, item, afterItem := parseBodyItem(nativeItem, remain)
if beforeItem.Len() > 0 {
body.AppendUnstructuredTokens(beforeItem.Tokens())
}
body.appendItemNode(item)
remain = afterItem
}
if remain.Len() > 0 {
body.AppendUnstructuredTokens(remain.Tokens())
}
return before, newNode(body), after
}
func parseBodyItem(nativeItem hclsyntax.Node, from inputTokens) (inputTokens, *node, inputTokens) {
before, leadComments, within, lineComments, newline, after := from.PartitionBlockItem(nativeItem.Range())
var item *node
switch tItem := nativeItem.(type) {
case *hclsyntax.Attribute:
item = parseAttribute(tItem, within, leadComments, lineComments, newline)
case *hclsyntax.Block:
item = parseBlock(tItem, within, leadComments, lineComments, newline)
default:
// should never happen if caller is behaving
panic("unsupported native item type")
}
return before, item, after
}
func parseAttribute(nativeAttr *hclsyntax.Attribute, from, leadComments, lineComments, newline inputTokens) *node {
attr := &Attribute{
inTree: newInTree(),
}
children := attr.inTree.children
{
cn := newNode(newComments(leadComments.Tokens()))
attr.leadComments = cn
children.AppendNode(cn)
}
before, nameTokens, from := from.Partition(nativeAttr.NameRange)
{
children.AppendUnstructuredTokens(before.Tokens())
if nameTokens.Len() != 1 {
// Should never happen with valid input
panic("attribute name is not exactly one token")
}
token := nameTokens.Tokens()[0]
in := newNode(newIdentifier(token))
attr.name = in
children.AppendNode(in)
}
before, equalsTokens, from := from.Partition(nativeAttr.EqualsRange)
children.AppendUnstructuredTokens(before.Tokens())
children.AppendUnstructuredTokens(equalsTokens.Tokens())
before, exprTokens, from := from.Partition(nativeAttr.Expr.Range())
{
children.AppendUnstructuredTokens(before.Tokens())
exprNode := parseExpression(nativeAttr.Expr, exprTokens)
attr.expr = exprNode
children.AppendNode(exprNode)
}
{
cn := newNode(newComments(lineComments.Tokens()))
attr.lineComments = cn
children.AppendNode(cn)
}
children.AppendUnstructuredTokens(newline.Tokens())
// Collect any stragglers, though there shouldn't be any
children.AppendUnstructuredTokens(from.Tokens())
return newNode(attr)
}
func parseBlock(nativeBlock *hclsyntax.Block, from, leadComments, lineComments, newline inputTokens) *node {
block := &Block{
inTree: newInTree(),
labels: newNodeSet(),
}
children := block.inTree.children
{
cn := newNode(newComments(leadComments.Tokens()))
block.leadComments = cn
children.AppendNode(cn)
}
before, typeTokens, from := from.Partition(nativeBlock.TypeRange)
{
children.AppendUnstructuredTokens(before.Tokens())
if typeTokens.Len() != 1 {
// Should never happen with valid input
panic("block type name is not exactly one token")
}
token := typeTokens.Tokens()[0]
in := newNode(newIdentifier(token))
block.typeName = in
children.AppendNode(in)
}
for _, rng := range nativeBlock.LabelRanges {
var labelTokens inputTokens
before, labelTokens, from = from.Partition(rng)
children.AppendUnstructuredTokens(before.Tokens())
tokens := labelTokens.Tokens()
var ln *node
if len(tokens) == 1 && tokens[0].Type == hclsyntax.TokenIdent {
ln = newNode(newIdentifier(tokens[0]))
} else {
ln = newNode(newQuoted(tokens))
}
block.labels.Add(ln)
children.AppendNode(ln)
}
before, oBrace, from := from.Partition(nativeBlock.OpenBraceRange)
children.AppendUnstructuredTokens(before.Tokens())
children.AppendUnstructuredTokens(oBrace.Tokens())
// We go a bit out of order here: we go hunting for the closing brace
// so that we have a delimited body, but then we'll deal with the body
// before we actually append the closing brace and any straggling tokens
// that appear after it.
bodyTokens, cBrace, from := from.Partition(nativeBlock.CloseBraceRange)
before, body, after := parseBody(nativeBlock.Body, bodyTokens)
children.AppendUnstructuredTokens(before.Tokens())
block.body = body
children.AppendNode(body)
children.AppendUnstructuredTokens(after.Tokens())
children.AppendUnstructuredTokens(cBrace.Tokens())
// stragglers
children.AppendUnstructuredTokens(from.Tokens())
if lineComments.Len() > 0 {
// blocks don't actually have line comments, so we'll just treat
// them as extra stragglers
children.AppendUnstructuredTokens(lineComments.Tokens())
}
children.AppendUnstructuredTokens(newline.Tokens())
return newNode(block)
}
func parseExpression(nativeExpr hclsyntax.Expression, from inputTokens) *node {
expr := newExpression()
children := expr.inTree.children
nativeVars := nativeExpr.Variables()
for _, nativeTraversal := range nativeVars {
before, traversal, after := parseTraversal(nativeTraversal, from)
children.AppendUnstructuredTokens(before.Tokens())
children.AppendNode(traversal)
expr.absTraversals.Add(traversal)
from = after
}
// Attach any stragglers that don't belong to a traversal to the expression
// itself. In an expression with no traversals at all, this is just the
// entirety of "from".
children.AppendUnstructuredTokens(from.Tokens())
return newNode(expr)
}
func parseTraversal(nativeTraversal hcl.Traversal, from inputTokens) (before inputTokens, n *node, after inputTokens) {
traversal := newTraversal()
children := traversal.inTree.children
before, from, after = from.Partition(nativeTraversal.SourceRange())
stepAfter := from
for _, nativeStep := range nativeTraversal {
before, step, after := parseTraversalStep(nativeStep, stepAfter)
children.AppendUnstructuredTokens(before.Tokens())
children.AppendNode(step)
traversal.steps.Add(step)
stepAfter = after
}
return before, newNode(traversal), after
}
func parseTraversalStep(nativeStep hcl.Traverser, from inputTokens) (before inputTokens, n *node, after inputTokens) {
var children *nodes
switch tNativeStep := nativeStep.(type) {
case hcl.TraverseRoot, hcl.TraverseAttr:
step := newTraverseName()
children = step.inTree.children
before, from, after = from.Partition(nativeStep.SourceRange())
inBefore, token, inAfter := from.PartitionTypeSingle(hclsyntax.TokenIdent)
name := newIdentifier(token)
children.AppendUnstructuredTokens(inBefore.Tokens())
step.name = children.Append(name)
children.AppendUnstructuredTokens(inAfter.Tokens())
return before, newNode(step), after
case hcl.TraverseIndex:
step := newTraverseIndex()
children = step.inTree.children
before, from, after = from.Partition(nativeStep.SourceRange())
var inBefore, oBrack, keyTokens, cBrack inputTokens
inBefore, oBrack, from = from.PartitionType(hclsyntax.TokenOBrack)
children.AppendUnstructuredTokens(inBefore.Tokens())
children.AppendUnstructuredTokens(oBrack.Tokens())
keyTokens, cBrack, from = from.PartitionType(hclsyntax.TokenCBrack)
keyVal := tNativeStep.Key
switch keyVal.Type() {
case cty.String:
key := newQuoted(keyTokens.Tokens())
step.key = children.Append(key)
case cty.Number:
valBefore, valToken, valAfter := keyTokens.PartitionTypeSingle(hclsyntax.TokenNumberLit)
children.AppendUnstructuredTokens(valBefore.Tokens())
key := newNumber(valToken)
step.key = children.Append(key)
children.AppendUnstructuredTokens(valAfter.Tokens())
}
children.AppendUnstructuredTokens(cBrack.Tokens())
children.AppendUnstructuredTokens(from.Tokens())
return before, newNode(step), after
default:
panic(fmt.Sprintf("unsupported traversal step type %T", nativeStep))
}
}
// writerTokens takes a sequence of tokens as produced by the main hclsyntax
// package and transforms it into an equivalent sequence of tokens using
// this package's own token model.
//
// The resulting list contains the same number of tokens and uses the same
// indices as the input, allowing the two sets of tokens to be correlated
// by index.
func writerTokens(nativeTokens hclsyntax.Tokens) Tokens {
// Ultimately we want a slice of token _pointers_, but since we can
// predict how much memory we're going to devote to tokens we'll allocate
// it all as a single flat buffer and thus give the GC less work to do.
tokBuf := make([]Token, len(nativeTokens))
var lastByteOffset int
for i, mainToken := range nativeTokens {
// Create a copy of the bytes so that we can mutate without
// corrupting the original token stream.
bytes := make([]byte, len(mainToken.Bytes))
copy(bytes, mainToken.Bytes)
tokBuf[i] = Token{
Type: mainToken.Type,
Bytes: bytes,
// We assume here that spaces are always ASCII spaces, since
// that's what the scanner also assumes, and thus the number
// of bytes skipped is also the number of space characters.
SpacesBefore: mainToken.Range.Start.Byte - lastByteOffset,
}
lastByteOffset = mainToken.Range.End.Byte
}
// Now make a slice of pointers into the previous slice.
ret := make(Tokens, len(tokBuf))
for i := range ret {
ret[i] = &tokBuf[i]
}
return ret
}
// partitionTokens takes a sequence of tokens and a hcl.Range and returns
// two indices within the token sequence that correspond with the range
// boundaries, such that the slice operator could be used to produce
// three token sequences for before, within, and after respectively:
//
// start, end := partitionTokens(toks, rng)
// before := toks[:start]
// within := toks[start:end]
// after := toks[end:]
//
// This works best when the range is aligned with token boundaries (e.g.
// because it was produced in terms of the scanner's result) but if that isn't
// true then it will make a best effort that may produce strange results at
// the boundaries.
//
// Native hclsyntax tokens are used here, because they contain the necessary
// absolute position information. However, since writerTokens produces a
// correlatable sequence of writer tokens, the resulting indices can be
// used also to index into its result, allowing the partitioning of writer
// tokens to be driven by the partitioning of native tokens.
//
// The tokens are assumed to be in source order and non-overlapping, which
// will be true if the token sequence from the scanner is used directly.
func partitionTokens(toks hclsyntax.Tokens, rng hcl.Range) (start, end int) {
// We us a linear search here because we assume tha in most cases our
// target range is close to the beginning of the sequence, and the seqences
// are generally small for most reasonable files anyway.
for i := 0; ; i++ {
if i >= len(toks) {
// No tokens for the given range at all!
return len(toks), len(toks)
}
if toks[i].Range.Start.Byte >= rng.Start.Byte {
start = i
break
}
}
for i := start; ; i++ {
if i >= len(toks) {
// The range "hangs off" the end of the token sequence
return start, len(toks)
}
if toks[i].Range.Start.Byte >= rng.End.Byte {
end = i // end marker is exclusive
break
}
}
return start, end
}
// partitionLeadCommentTokens takes a sequence of tokens that is assumed
// to immediately precede a construct that can have lead comment tokens,
// and returns the index into that sequence where the lead comments begin.
//
// Lead comments are defined as whole lines containing only comment tokens
// with no blank lines between. If no such lines are found, the returned
// index will be len(toks).
func partitionLeadCommentTokens(toks hclsyntax.Tokens) int {
// single-line comments (which is what we're interested in here)
// consume their trailing newline, so we can just walk backwards
// until we stop seeing comment tokens.
for i := len(toks) - 1; i >= 0; i-- {
if toks[i].Type != hclsyntax.TokenComment {
return i + 1
}
}
return 0
}
// partitionLineEndTokens takes a sequence of tokens that is assumed
// to immediately follow a construct that can have a line comment, and
// returns first the index where any line comments end and then second
// the index immediately after the trailing newline.
//
// Line comments are defined as comments that appear immediately after
// a construct on the same line where its significant tokens ended.
//
// Since single-line comment tokens (# and //) include the newline that
// terminates them, in the presence of these the two returned indices
// will be the same since the comment itself serves as the line end.
func partitionLineEndTokens(toks hclsyntax.Tokens) (afterComment, afterNewline int) {
for i := 0; i < len(toks); i++ {
tok := toks[i]
if tok.Type != hclsyntax.TokenComment {
switch tok.Type {
case hclsyntax.TokenNewline:
return i, i + 1
case hclsyntax.TokenEOF:
// Although this is valid, we mustn't include the EOF
// itself as our "newline" or else strange things will
// happen when we try to append new items.
return i, i
default:
// If we have well-formed input here then nothing else should be
// possible. This path should never happen, because we only try
// to extract tokens from the sequence if the parser succeeded,
// and it should catch this problem itself.
panic("malformed line trailers: expected only comments and newlines")
}
}
if len(tok.Bytes) > 0 && tok.Bytes[len(tok.Bytes)-1] == '\n' {
// Newline at the end of a single-line comment serves both as
// the end of comments *and* the end of the line.
return i + 1, i + 1
}
}
return len(toks), len(toks)
}
// lexConfig uses the hclsyntax scanner to get a token stream and then
// rewrites it into this package's token model.
//
// Any errors produced during scanning are ignored, so the results of this
// function should be used with care.
func lexConfig(src []byte) Tokens {
mainTokens, _ := hclsyntax.LexConfig(src, "", hcl.Pos{Byte: 0, Line: 1, Column: 1})
return writerTokens(mainTokens)
}

@ -0,0 +1,44 @@
package hclwrite
import (
"bytes"
"github.com/hashicorp/hcl/v2"
)
// NewFile creates a new file object that is empty and ready to have constructs
// added t it.
func NewFile() *File {
body := &Body{
inTree: newInTree(),
items: newNodeSet(),
}
file := &File{
inTree: newInTree(),
}
file.body = file.inTree.children.Append(body)
return file
}
// ParseConfig interprets the given source bytes into a *hclwrite.File. The
// resulting AST can be used to perform surgical edits on the source code
// before turning it back into bytes again.
func ParseConfig(src []byte, filename string, start hcl.Pos) (*File, hcl.Diagnostics) {
return parse(src, filename, start)
}
// Format takes source code and performs simple whitespace changes to transform
// it to a canonical layout style.
//
// Format skips constructing an AST and works directly with tokens, so it
// is less expensive than formatting via the AST for situations where no other
// changes will be made. It also ignores syntax errors and can thus be applied
// to partial source code, although the result in that case may not be
// desirable.
func Format(src []byte) []byte {
tokens := lexConfig(src)
format(tokens)
buf := &bytes.Buffer{}
tokens.WriteTo(buf)
return buf.Bytes()
}

@ -0,0 +1,122 @@
package hclwrite
import (
"bytes"
"io"
"github.com/apparentlymart/go-textseg/v12/textseg"
"github.com/hashicorp/hcl/v2"
"github.com/hashicorp/hcl/v2/hclsyntax"
)
// Token is a single sequence of bytes annotated with a type. It is similar
// in purpose to hclsyntax.Token, but discards the source position information
// since that is not useful in code generation.
type Token struct {
Type hclsyntax.TokenType
Bytes []byte
// We record the number of spaces before each token so that we can
// reproduce the exact layout of the original file when we're making
// surgical changes in-place. When _new_ code is created it will always
// be in the canonical style, but we preserve layout of existing code.
SpacesBefore int
}
// asHCLSyntax returns the receiver expressed as an incomplete hclsyntax.Token.
// A complete token is not possible since we don't have source location
// information here, and so this method is unexported so we can be sure it will
// only be used for internal purposes where we know the range isn't important.
//
// This is primarily intended to allow us to re-use certain functionality from
// hclsyntax rather than re-implementing it against our own token type here.
func (t *Token) asHCLSyntax() hclsyntax.Token {
return hclsyntax.Token{
Type: t.Type,
Bytes: t.Bytes,
Range: hcl.Range{
Filename: "<invalid>",
},
}
}
// Tokens is a flat list of tokens.
type Tokens []*Token
func (ts Tokens) Bytes() []byte {
buf := &bytes.Buffer{}
ts.WriteTo(buf)
return buf.Bytes()
}
func (ts Tokens) testValue() string {
return string(ts.Bytes())
}
// Columns returns the number of columns (grapheme clusters) the token sequence
// occupies. The result is not meaningful if there are newline or single-line
// comment tokens in the sequence.
func (ts Tokens) Columns() int {
ret := 0
for _, token := range ts {
ret += token.SpacesBefore // spaces are always worth one column each
ct, _ := textseg.TokenCount(token.Bytes, textseg.ScanGraphemeClusters)
ret += ct
}
return ret
}
// WriteTo takes an io.Writer and writes the bytes for each token to it,
// along with the spacing that separates each token. In other words, this
// allows serializing the tokens to a file or other such byte stream.
func (ts Tokens) WriteTo(wr io.Writer) (int64, error) {
// We know we're going to be writing a lot of small chunks of repeated
// space characters, so we'll prepare a buffer of these that we can
// easily pass to wr.Write without any further allocation.
spaces := make([]byte, 40)
for i := range spaces {
spaces[i] = ' '
}
var n int64
var err error
for _, token := range ts {
if err != nil {
return n, err
}
for spacesBefore := token.SpacesBefore; spacesBefore > 0; spacesBefore -= len(spaces) {
thisChunk := spacesBefore
if thisChunk > len(spaces) {
thisChunk = len(spaces)
}
var thisN int
thisN, err = wr.Write(spaces[:thisChunk])
n += int64(thisN)
if err != nil {
return n, err
}
}
var thisN int
thisN, err = wr.Write(token.Bytes)
n += int64(thisN)
}
return n, err
}
func (ts Tokens) walkChildNodes(w internalWalkFunc) {
// Unstructured tokens have no child nodes
}
func (ts Tokens) BuildTokens(to Tokens) Tokens {
return append(to, ts...)
}
func newIdentToken(name string) *Token {
return &Token{
Type: hclsyntax.TokenIdent,
Bytes: []byte(name),
}
}

@ -0,0 +1,121 @@
package json
import (
"math/big"
"github.com/hashicorp/hcl/v2"
)
type node interface {
Range() hcl.Range
StartRange() hcl.Range
}
type objectVal struct {
Attrs []*objectAttr
SrcRange hcl.Range // range of the entire object, brace-to-brace
OpenRange hcl.Range // range of the opening brace
CloseRange hcl.Range // range of the closing brace
}
func (n *objectVal) Range() hcl.Range {
return n.SrcRange
}
func (n *objectVal) StartRange() hcl.Range {
return n.OpenRange
}
type objectAttr struct {
Name string
Value node
NameRange hcl.Range // range of the name string
}
func (n *objectAttr) Range() hcl.Range {
return n.NameRange
}
func (n *objectAttr) StartRange() hcl.Range {
return n.NameRange
}
type arrayVal struct {
Values []node
SrcRange hcl.Range // range of the entire object, bracket-to-bracket
OpenRange hcl.Range // range of the opening bracket
}
func (n *arrayVal) Range() hcl.Range {
return n.SrcRange
}
func (n *arrayVal) StartRange() hcl.Range {
return n.OpenRange
}
type booleanVal struct {
Value bool
SrcRange hcl.Range
}
func (n *booleanVal) Range() hcl.Range {
return n.SrcRange
}
func (n *booleanVal) StartRange() hcl.Range {
return n.SrcRange
}
type numberVal struct {
Value *big.Float
SrcRange hcl.Range
}
func (n *numberVal) Range() hcl.Range {
return n.SrcRange
}
func (n *numberVal) StartRange() hcl.Range {
return n.SrcRange
}
type stringVal struct {
Value string
SrcRange hcl.Range
}
func (n *stringVal) Range() hcl.Range {
return n.SrcRange
}
func (n *stringVal) StartRange() hcl.Range {
return n.SrcRange
}
type nullVal struct {
SrcRange hcl.Range
}
func (n *nullVal) Range() hcl.Range {
return n.SrcRange
}
func (n *nullVal) StartRange() hcl.Range {
return n.SrcRange
}
// invalidVal is used as a placeholder where a value is needed for a valid
// parse tree but the input was invalid enough to prevent one from being
// created.
type invalidVal struct {
SrcRange hcl.Range
}
func (n invalidVal) Range() hcl.Range {
return n.SrcRange
}
func (n invalidVal) StartRange() hcl.Range {
return n.SrcRange
}

@ -0,0 +1,33 @@
package json
import (
"github.com/agext/levenshtein"
)
var keywords = []string{"false", "true", "null"}
// keywordSuggestion tries to find a valid JSON keyword that is close to the
// given string and returns it if found. If no keyword is close enough, returns
// the empty string.
func keywordSuggestion(given string) string {
return nameSuggestion(given, keywords)
}
// nameSuggestion tries to find a name from the given slice of suggested names
// that is close to the given name and returns it if found. If no suggestion
// is close enough, returns the empty string.
//
// The suggestions are tried in order, so earlier suggestions take precedence
// if the given string is similar to two or more suggestions.
//
// This function is intended to be used with a relatively-small number of
// suggestions. It's not optimized for hundreds or thousands of them.
func nameSuggestion(given string, suggestions []string) string {
for _, suggestion := range suggestions {
dist := levenshtein.Distance(given, suggestion, nil)
if dist < 3 { // threshold determined experimentally
return suggestion
}
}
return ""
}

@ -0,0 +1,12 @@
// Package json is the JSON parser for HCL. It parses JSON files and returns
// implementations of the core HCL structural interfaces in terms of the
// JSON data inside.
//
// This is not a generic JSON parser. Instead, it deals with the mapping from
// the JSON information model to the HCL information model, using a number
// of hard-coded structural conventions.
//
// In most cases applications will not import this package directly, but will
// instead access its functionality indirectly through functions in the main
// "hcl" package and in the "hclparse" package.
package json

@ -0,0 +1,70 @@
package json
import (
"fmt"
"strings"
)
type navigation struct {
root node
}
// Implementation of hcled.ContextString
func (n navigation) ContextString(offset int) string {
steps := navigationStepsRev(n.root, offset)
if steps == nil {
return ""
}
// We built our slice backwards, so we'll reverse it in-place now.
half := len(steps) / 2 // integer division
for i := 0; i < half; i++ {
steps[i], steps[len(steps)-1-i] = steps[len(steps)-1-i], steps[i]
}
ret := strings.Join(steps, "")
if len(ret) > 0 && ret[0] == '.' {
ret = ret[1:]
}
return ret
}
func navigationStepsRev(v node, offset int) []string {
switch tv := v.(type) {
case *objectVal:
// Do any of our properties have an object that contains the target
// offset?
for _, attr := range tv.Attrs {
k := attr.Name
av := attr.Value
switch av.(type) {
case *objectVal, *arrayVal:
// okay
default:
continue
}
if av.Range().ContainsOffset(offset) {
return append(navigationStepsRev(av, offset), "."+k)
}
}
case *arrayVal:
// Do any of our elements contain the target offset?
for i, elem := range tv.Values {
switch elem.(type) {
case *objectVal, *arrayVal:
// okay
default:
continue
}
if elem.Range().ContainsOffset(offset) {
return append(navigationStepsRev(elem, offset), fmt.Sprintf("[%d]", i))
}
}
}
return nil
}

@ -0,0 +1,496 @@
package json
import (
"encoding/json"
"fmt"
"github.com/hashicorp/hcl/v2"
"github.com/zclconf/go-cty/cty"
)
func parseFileContent(buf []byte, filename string) (node, hcl.Diagnostics) {
tokens := scan(buf, pos{
Filename: filename,
Pos: hcl.Pos{
Byte: 0,
Line: 1,
Column: 1,
},
})
p := newPeeker(tokens)
node, diags := parseValue(p)
if len(diags) == 0 && p.Peek().Type != tokenEOF {
diags = diags.Append(&hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Extraneous data after value",
Detail: "Extra characters appear after the JSON value.",
Subject: p.Peek().Range.Ptr(),
})
}
return node, diags
}
func parseValue(p *peeker) (node, hcl.Diagnostics) {
tok := p.Peek()
wrapInvalid := func(n node, diags hcl.Diagnostics) (node, hcl.Diagnostics) {
if n != nil {
return n, diags
}
return invalidVal{tok.Range}, diags
}
switch tok.Type {
case tokenBraceO:
return wrapInvalid(parseObject(p))
case tokenBrackO:
return wrapInvalid(parseArray(p))
case tokenNumber:
return wrapInvalid(parseNumber(p))
case tokenString:
return wrapInvalid(parseString(p))
case tokenKeyword:
return wrapInvalid(parseKeyword(p))
case tokenBraceC:
return wrapInvalid(nil, hcl.Diagnostics{
{
Severity: hcl.DiagError,
Summary: "Missing JSON value",
Detail: "A JSON value must start with a brace, a bracket, a number, a string, or a keyword.",
Subject: &tok.Range,
},
})
case tokenBrackC:
return wrapInvalid(nil, hcl.Diagnostics{
{
Severity: hcl.DiagError,
Summary: "Missing array element value",
Detail: "A JSON value must start with a brace, a bracket, a number, a string, or a keyword.",
Subject: &tok.Range,
},
})
case tokenEOF:
return wrapInvalid(nil, hcl.Diagnostics{
{
Severity: hcl.DiagError,
Summary: "Missing value",
Detail: "The JSON data ends prematurely.",
Subject: &tok.Range,
},
})
default:
return wrapInvalid(nil, hcl.Diagnostics{
{
Severity: hcl.DiagError,
Summary: "Invalid start of value",
Detail: "A JSON value must start with a brace, a bracket, a number, a string, or a keyword.",
Subject: &tok.Range,
},
})
}
}
func tokenCanStartValue(tok token) bool {
switch tok.Type {
case tokenBraceO, tokenBrackO, tokenNumber, tokenString, tokenKeyword:
return true
default:
return false
}
}
func parseObject(p *peeker) (node, hcl.Diagnostics) {
var diags hcl.Diagnostics
open := p.Read()
attrs := []*objectAttr{}
// recover is used to shift the peeker to what seems to be the end of
// our object, so that when we encounter an error we leave the peeker
// at a reasonable point in the token stream to continue parsing.
recover := func(tok token) {
open := 1
for {
switch tok.Type {
case tokenBraceO:
open++
case tokenBraceC:
open--
if open <= 1 {
return
}
case tokenEOF:
// Ran out of source before we were able to recover,
// so we'll bail here and let the caller deal with it.
return
}
tok = p.Read()
}
}
Token:
for {
if p.Peek().Type == tokenBraceC {
break Token
}
keyNode, keyDiags := parseValue(p)
diags = diags.Extend(keyDiags)
if keyNode == nil {
return nil, diags
}
keyStrNode, ok := keyNode.(*stringVal)
if !ok {
return nil, diags.Append(&hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid object property name",
Detail: "A JSON object property name must be a string",
Subject: keyNode.StartRange().Ptr(),
})
}
key := keyStrNode.Value
colon := p.Read()
if colon.Type != tokenColon {
recover(colon)
if colon.Type == tokenBraceC || colon.Type == tokenComma {
// Catch common mistake of using braces instead of brackets
// for an object.
return nil, diags.Append(&hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Missing object value",
Detail: "A JSON object attribute must have a value, introduced by a colon.",
Subject: &colon.Range,
})
}
if colon.Type == tokenEquals {
// Possible confusion with native HCL syntax.
return nil, diags.Append(&hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Missing property value colon",
Detail: "JSON uses a colon as its name/value delimiter, not an equals sign.",
Subject: &colon.Range,
})
}
return nil, diags.Append(&hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Missing property value colon",
Detail: "A colon must appear between an object property's name and its value.",
Subject: &colon.Range,
})
}
valNode, valDiags := parseValue(p)
diags = diags.Extend(valDiags)
if valNode == nil {
return nil, diags
}
attrs = append(attrs, &objectAttr{
Name: key,
Value: valNode,
NameRange: keyStrNode.SrcRange,
})
switch p.Peek().Type {
case tokenComma:
comma := p.Read()
if p.Peek().Type == tokenBraceC {
// Special error message for this common mistake
return nil, diags.Append(&hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Trailing comma in object",
Detail: "JSON does not permit a trailing comma after the final property in an object.",
Subject: &comma.Range,
})
}
continue Token
case tokenEOF:
return nil, diags.Append(&hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Unclosed object",
Detail: "No closing brace was found for this JSON object.",
Subject: &open.Range,
})
case tokenBrackC:
// Consume the bracket anyway, so that we don't return with the peeker
// at a strange place.
p.Read()
return nil, diags.Append(&hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Mismatched braces",
Detail: "A JSON object must be closed with a brace, not a bracket.",
Subject: p.Peek().Range.Ptr(),
})
case tokenBraceC:
break Token
default:
recover(p.Read())
return nil, diags.Append(&hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Missing attribute seperator comma",
Detail: "A comma must appear between each property definition in an object.",
Subject: p.Peek().Range.Ptr(),
})
}
}
close := p.Read()
return &objectVal{
Attrs: attrs,
SrcRange: hcl.RangeBetween(open.Range, close.Range),
OpenRange: open.Range,
CloseRange: close.Range,
}, diags
}
func parseArray(p *peeker) (node, hcl.Diagnostics) {
var diags hcl.Diagnostics
open := p.Read()
vals := []node{}
// recover is used to shift the peeker to what seems to be the end of
// our array, so that when we encounter an error we leave the peeker
// at a reasonable point in the token stream to continue parsing.
recover := func(tok token) {
open := 1
for {
switch tok.Type {
case tokenBrackO:
open++
case tokenBrackC:
open--
if open <= 1 {
return
}
case tokenEOF:
// Ran out of source before we were able to recover,
// so we'll bail here and let the caller deal with it.
return
}
tok = p.Read()
}
}
Token:
for {
if p.Peek().Type == tokenBrackC {
break Token
}
valNode, valDiags := parseValue(p)
diags = diags.Extend(valDiags)
if valNode == nil {
return nil, diags
}
vals = append(vals, valNode)
switch p.Peek().Type {
case tokenComma:
comma := p.Read()
if p.Peek().Type == tokenBrackC {
// Special error message for this common mistake
return nil, diags.Append(&hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Trailing comma in array",
Detail: "JSON does not permit a trailing comma after the final value in an array.",
Subject: &comma.Range,
})
}
continue Token
case tokenColon:
recover(p.Read())
return nil, diags.Append(&hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid array value",
Detail: "A colon is not used to introduce values in a JSON array.",
Subject: p.Peek().Range.Ptr(),
})
case tokenEOF:
recover(p.Read())
return nil, diags.Append(&hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Unclosed object",
Detail: "No closing bracket was found for this JSON array.",
Subject: &open.Range,
})
case tokenBraceC:
recover(p.Read())
return nil, diags.Append(&hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Mismatched brackets",
Detail: "A JSON array must be closed with a bracket, not a brace.",
Subject: p.Peek().Range.Ptr(),
})
case tokenBrackC:
break Token
default:
recover(p.Read())
return nil, diags.Append(&hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Missing attribute seperator comma",
Detail: "A comma must appear between each value in an array.",
Subject: p.Peek().Range.Ptr(),
})
}
}
close := p.Read()
return &arrayVal{
Values: vals,
SrcRange: hcl.RangeBetween(open.Range, close.Range),
OpenRange: open.Range,
}, diags
}
func parseNumber(p *peeker) (node, hcl.Diagnostics) {
tok := p.Read()
// Use encoding/json to validate the number syntax.
// TODO: Do this more directly to produce better diagnostics.
var num json.Number
err := json.Unmarshal(tok.Bytes, &num)
if err != nil {
return nil, hcl.Diagnostics{
{
Severity: hcl.DiagError,
Summary: "Invalid JSON number",
Detail: fmt.Sprintf("There is a syntax error in the given JSON number."),
Subject: &tok.Range,
},
}
}
// We want to guarantee that we parse numbers the same way as cty (and thus
// native syntax HCL) would here, so we'll use the cty parser even though
// in most other cases we don't actually introduce cty concepts until
// decoding time. We'll unwrap the parsed float immediately afterwards, so
// the cty value is just a temporary helper.
nv, err := cty.ParseNumberVal(string(num))
if err != nil {
// Should never happen if above passed, since JSON numbers are a subset
// of what cty can parse...
return nil, hcl.Diagnostics{
{
Severity: hcl.DiagError,
Summary: "Invalid JSON number",
Detail: fmt.Sprintf("There is a syntax error in the given JSON number."),
Subject: &tok.Range,
},
}
}
return &numberVal{
Value: nv.AsBigFloat(),
SrcRange: tok.Range,
}, nil
}
func parseString(p *peeker) (node, hcl.Diagnostics) {
tok := p.Read()
var str string
err := json.Unmarshal(tok.Bytes, &str)
if err != nil {
var errRange hcl.Range
if serr, ok := err.(*json.SyntaxError); ok {
errOfs := serr.Offset
errPos := tok.Range.Start
errPos.Byte += int(errOfs)
// TODO: Use the byte offset to properly count unicode
// characters for the column, and mark the whole of the
// character that was wrong as part of our range.
errPos.Column += int(errOfs)
errEndPos := errPos
errEndPos.Byte++
errEndPos.Column++
errRange = hcl.Range{
Filename: tok.Range.Filename,
Start: errPos,
End: errEndPos,
}
} else {
errRange = tok.Range
}
var contextRange *hcl.Range
if errRange != tok.Range {
contextRange = &tok.Range
}
// FIXME: Eventually we should parse strings directly here so
// we can produce a more useful error message in the face fo things
// such as invalid escapes, etc.
return nil, hcl.Diagnostics{
{
Severity: hcl.DiagError,
Summary: "Invalid JSON string",
Detail: fmt.Sprintf("There is a syntax error in the given JSON string."),
Subject: &errRange,
Context: contextRange,
},
}
}
return &stringVal{
Value: str,
SrcRange: tok.Range,
}, nil
}
func parseKeyword(p *peeker) (node, hcl.Diagnostics) {
tok := p.Read()
s := string(tok.Bytes)
switch s {
case "true":
return &booleanVal{
Value: true,
SrcRange: tok.Range,
}, nil
case "false":
return &booleanVal{
Value: false,
SrcRange: tok.Range,
}, nil
case "null":
return &nullVal{
SrcRange: tok.Range,
}, nil
case "undefined", "NaN", "Infinity":
return nil, hcl.Diagnostics{
{
Severity: hcl.DiagError,
Summary: "Invalid JSON keyword",
Detail: fmt.Sprintf("The JavaScript identifier %q cannot be used in JSON.", s),
Subject: &tok.Range,
},
}
default:
var dym string
if suggest := keywordSuggestion(s); suggest != "" {
dym = fmt.Sprintf(" Did you mean %q?", suggest)
}
return nil, hcl.Diagnostics{
{
Severity: hcl.DiagError,
Summary: "Invalid JSON keyword",
Detail: fmt.Sprintf("%q is not a valid JSON keyword.%s", s, dym),
Subject: &tok.Range,
},
}
}
}

@ -0,0 +1,25 @@
package json
type peeker struct {
tokens []token
pos int
}
func newPeeker(tokens []token) *peeker {
return &peeker{
tokens: tokens,
pos: 0,
}
}
func (p *peeker) Peek() token {
return p.tokens[p.pos]
}
func (p *peeker) Read() token {
ret := p.tokens[p.pos]
if ret.Type != tokenEOF {
p.pos++
}
return ret
}

@ -0,0 +1,94 @@
package json
import (
"fmt"
"io/ioutil"
"os"
"github.com/hashicorp/hcl/v2"
)
// Parse attempts to parse the given buffer as JSON and, if successful, returns
// a hcl.File for the HCL configuration represented by it.
//
// This is not a generic JSON parser. Instead, it deals only with the profile
// of JSON used to express HCL configuration.
//
// The returned file is valid only if the returned diagnostics returns false
// from its HasErrors method. If HasErrors returns true, the file represents
// the subset of data that was able to be parsed, which may be none.
func Parse(src []byte, filename string) (*hcl.File, hcl.Diagnostics) {
rootNode, diags := parseFileContent(src, filename)
switch rootNode.(type) {
case *objectVal, *arrayVal:
// okay
default:
diags = diags.Append(&hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Root value must be object",
Detail: "The root value in a JSON-based configuration must be either a JSON object or a JSON array of objects.",
Subject: rootNode.StartRange().Ptr(),
})
// Since we've already produced an error message for this being
// invalid, we'll return an empty placeholder here so that trying to
// extract content from our root body won't produce a redundant
// error saying the same thing again in more general terms.
fakePos := hcl.Pos{
Byte: 0,
Line: 1,
Column: 1,
}
fakeRange := hcl.Range{
Filename: filename,
Start: fakePos,
End: fakePos,
}
rootNode = &objectVal{
Attrs: []*objectAttr{},
SrcRange: fakeRange,
OpenRange: fakeRange,
}
}
file := &hcl.File{
Body: &body{
val: rootNode,
},
Bytes: src,
Nav: navigation{rootNode},
}
return file, diags
}
// ParseFile is a convenience wrapper around Parse that first attempts to load
// data from the given filename, passing the result to Parse if successful.
//
// If the file cannot be read, an error diagnostic with nil context is returned.
func ParseFile(filename string) (*hcl.File, hcl.Diagnostics) {
f, err := os.Open(filename)
if err != nil {
return nil, hcl.Diagnostics{
{
Severity: hcl.DiagError,
Summary: "Failed to open file",
Detail: fmt.Sprintf("The file %q could not be opened.", filename),
},
}
}
defer f.Close()
src, err := ioutil.ReadAll(f)
if err != nil {
return nil, hcl.Diagnostics{
{
Severity: hcl.DiagError,
Summary: "Failed to read file",
Detail: fmt.Sprintf("The file %q was opened, but an error occured while reading it.", filename),
},
}
}
return Parse(src, filename)
}

@ -0,0 +1,306 @@
package json
import (
"fmt"
"github.com/apparentlymart/go-textseg/v12/textseg"
"github.com/hashicorp/hcl/v2"
)
//go:generate stringer -type tokenType scanner.go
type tokenType rune
const (
tokenBraceO tokenType = '{'
tokenBraceC tokenType = '}'
tokenBrackO tokenType = '['
tokenBrackC tokenType = ']'
tokenComma tokenType = ','
tokenColon tokenType = ':'
tokenKeyword tokenType = 'K'
tokenString tokenType = 'S'
tokenNumber tokenType = 'N'
tokenEOF tokenType = '␄'
tokenInvalid tokenType = 0
tokenEquals tokenType = '=' // used only for reminding the user of JSON syntax
)
type token struct {
Type tokenType
Bytes []byte
Range hcl.Range
}
// scan returns the primary tokens for the given JSON buffer in sequence.
//
// The responsibility of this pass is to just mark the slices of the buffer
// as being of various types. It is lax in how it interprets the multi-byte
// token types keyword, string and number, preferring to capture erroneous
// extra bytes that we presume the user intended to be part of the token
// so that we can generate more helpful diagnostics in the parser.
func scan(buf []byte, start pos) []token {
var tokens []token
p := start
for {
if len(buf) == 0 {
tokens = append(tokens, token{
Type: tokenEOF,
Bytes: nil,
Range: posRange(p, p),
})
return tokens
}
buf, p = skipWhitespace(buf, p)
if len(buf) == 0 {
tokens = append(tokens, token{
Type: tokenEOF,
Bytes: nil,
Range: posRange(p, p),
})
return tokens
}
start = p
first := buf[0]
switch {
case first == '{' || first == '}' || first == '[' || first == ']' || first == ',' || first == ':' || first == '=':
p.Pos.Column++
p.Pos.Byte++
tokens = append(tokens, token{
Type: tokenType(first),
Bytes: buf[0:1],
Range: posRange(start, p),
})
buf = buf[1:]
case first == '"':
var tokBuf []byte
tokBuf, buf, p = scanString(buf, p)
tokens = append(tokens, token{
Type: tokenString,
Bytes: tokBuf,
Range: posRange(start, p),
})
case byteCanStartNumber(first):
var tokBuf []byte
tokBuf, buf, p = scanNumber(buf, p)
tokens = append(tokens, token{
Type: tokenNumber,
Bytes: tokBuf,
Range: posRange(start, p),
})
case byteCanStartKeyword(first):
var tokBuf []byte
tokBuf, buf, p = scanKeyword(buf, p)
tokens = append(tokens, token{
Type: tokenKeyword,
Bytes: tokBuf,
Range: posRange(start, p),
})
default:
tokens = append(tokens, token{
Type: tokenInvalid,
Bytes: buf[:1],
Range: start.Range(1, 1),
})
// If we've encountered an invalid then we might as well stop
// scanning since the parser won't proceed beyond this point.
// We insert a synthetic EOF marker here to match the expectations
// of consumers of this data structure.
p.Pos.Column++
p.Pos.Byte++
tokens = append(tokens, token{
Type: tokenEOF,
Bytes: nil,
Range: posRange(p, p),
})
return tokens
}
}
}
func byteCanStartNumber(b byte) bool {
switch b {
// We are slightly more tolerant than JSON requires here since we
// expect the parser will make a stricter interpretation of the
// number bytes, but we specifically don't allow 'e' or 'E' here
// since we want the scanner to treat that as the start of an
// invalid keyword instead, to produce more intelligible error messages.
case '-', '+', '.', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
return true
default:
return false
}
}
func scanNumber(buf []byte, start pos) ([]byte, []byte, pos) {
// The scanner doesn't check that the sequence of digit-ish bytes is
// in a valid order. The parser must do this when decoding a number
// token.
var i int
p := start
Byte:
for i = 0; i < len(buf); i++ {
switch buf[i] {
case '-', '+', '.', 'e', 'E', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
p.Pos.Byte++
p.Pos.Column++
default:
break Byte
}
}
return buf[:i], buf[i:], p
}
func byteCanStartKeyword(b byte) bool {
switch {
// We allow any sequence of alphabetical characters here, even though
// JSON is more constrained, so that we can collect what we presume
// the user intended to be a single keyword and then check its validity
// in the parser, where we can generate better diagnostics.
// So e.g. we want to be able to say:
// unrecognized keyword "True". Did you mean "true"?
case isAlphabetical(b):
return true
default:
return false
}
}
func scanKeyword(buf []byte, start pos) ([]byte, []byte, pos) {
var i int
p := start
Byte:
for i = 0; i < len(buf); i++ {
b := buf[i]
switch {
case isAlphabetical(b) || b == '_':
p.Pos.Byte++
p.Pos.Column++
default:
break Byte
}
}
return buf[:i], buf[i:], p
}
func scanString(buf []byte, start pos) ([]byte, []byte, pos) {
// The scanner doesn't validate correct use of escapes, etc. It pays
// attention to escapes only for the purpose of identifying the closing
// quote character. It's the parser's responsibility to do proper
// validation.
//
// The scanner also doesn't specifically detect unterminated string
// literals, though they can be identified in the parser by checking if
// the final byte in a string token is the double-quote character.
// Skip the opening quote symbol
i := 1
p := start
p.Pos.Byte++
p.Pos.Column++
escaping := false
Byte:
for i < len(buf) {
b := buf[i]
switch {
case b == '\\':
escaping = !escaping
p.Pos.Byte++
p.Pos.Column++
i++
case b == '"':
p.Pos.Byte++
p.Pos.Column++
i++
if !escaping {
break Byte
}
escaping = false
case b < 32:
break Byte
default:
// Advance by one grapheme cluster, so that we consider each
// grapheme to be a "column".
// Ignoring error because this scanner cannot produce errors.
advance, _, _ := textseg.ScanGraphemeClusters(buf[i:], true)
p.Pos.Byte += advance
p.Pos.Column++
i += advance
escaping = false
}
}
return buf[:i], buf[i:], p
}
func skipWhitespace(buf []byte, start pos) ([]byte, pos) {
var i int
p := start
Byte:
for i = 0; i < len(buf); i++ {
switch buf[i] {
case ' ':
p.Pos.Byte++
p.Pos.Column++
case '\n':
p.Pos.Byte++
p.Pos.Column = 1
p.Pos.Line++
case '\r':
// For the purpose of line/column counting we consider a
// carriage return to take up no space, assuming that it will
// be paired up with a newline (on Windows, for example) that
// will account for both of them.
p.Pos.Byte++
case '\t':
// We arbitrarily count a tab as if it were two spaces, because
// we need to choose _some_ number here. This means any system
// that renders code on-screen with markers must itself treat
// tabs as a pair of spaces for rendering purposes, or instead
// use the byte offset and back into its own column position.
p.Pos.Byte++
p.Pos.Column += 2
default:
break Byte
}
}
return buf[i:], p
}
type pos struct {
Filename string
Pos hcl.Pos
}
func (p *pos) Range(byteLen, charLen int) hcl.Range {
start := p.Pos
end := p.Pos
end.Byte += byteLen
end.Column += charLen
return hcl.Range{
Filename: p.Filename,
Start: start,
End: end,
}
}
func posRange(start, end pos) hcl.Range {
return hcl.Range{
Filename: start.Filename,
Start: start.Pos,
End: end.Pos,
}
}
func (t token) GoString() string {
return fmt.Sprintf("json.token{json.%s, []byte(%q), %#v}", t.Type, t.Bytes, t.Range)
}
func isAlphabetical(b byte) bool {
return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')
}

@ -0,0 +1,405 @@
# HCL JSON Syntax Specification
This is the specification for the JSON serialization for hcl. HCL is a system
for defining configuration languages for applications. The HCL information
model is designed to support multiple concrete syntaxes for configuration,
and this JSON-based format complements [the native syntax](../hclsyntax/spec.md)
by being easy to machine-generate, whereas the native syntax is oriented
towards human authoring and maintenance
This syntax is defined in terms of JSON as defined in
[RFC7159](https://tools.ietf.org/html/rfc7159). As such it inherits the JSON
grammar as-is, and merely defines a specific methodology for interpreting
JSON constructs into HCL structural elements and expressions.
This mapping is defined such that valid JSON-serialized HCL input can be
_produced_ using standard JSON implementations in various programming languages.
_Parsing_ such JSON has some additional constraints not beyond what is normally
supported by JSON parsers, so a specialized parser may be required that
is able to:
- Preserve the relative ordering of properties defined in an object.
- Preserve multiple definitions of the same property name.
- Preserve numeric values to the precision required by the number type
in [the HCL syntax-agnostic information model](../spec.md).
- Retain source location information for parsed tokens/constructs in order
to produce good error messages.
## Structural Elements
[The HCL syntax-agnostic information model](../spec.md) defines a _body_ as an
abstract container for attribute definitions and child blocks. A body is
represented in JSON as either a single JSON object or a JSON array of objects.
Body processing is in terms of JSON object properties, visited in the order
they appear in the input. Where a body is represented by a single JSON object,
the properties of that object are visited in order. Where a body is
represented by a JSON array, each of its elements are visited in order and
each element has its properties visited in order. If any element of the array
is not a JSON object then the input is erroneous.
When a body is being processed in the _dynamic attributes_ mode, the allowance
of a JSON array in the previous paragraph does not apply and instead a single
JSON object is always required.
As defined in the language-agnostic model, body processing is in terms
of a schema which provides context for interpreting the body's content. For
JSON bodies, the schema is crucial to allow differentiation of attribute
definitions and block definitions, both of which are represented via object
properties.
The special property name `"//"`, when used in an object representing a HCL
body, is parsed and ignored. A property with this name can be used to
include human-readable comments. (This special property name is _not_
processed in this way for any _other_ HCL constructs that are represented as
JSON objects.)
### Attributes
Where the given schema describes an attribute with a given name, the object
property with the matching name — if present — serves as the attribute's
definition.
When a body is being processed in the _dynamic attributes_ mode, each object
property serves as an attribute definition for the attribute whose name
matches the property name.
The value of an attribute definition property is interpreted as an _expression_,
as described in a later section.
Given a schema that calls for an attribute named "foo", a JSON object like
the following provides a definition for that attribute:
```json
{
"foo": "bar baz"
}
```
### Blocks
Where the given schema describes a block with a given type name, each object
property with the matching name serves as a definition of zero or more blocks
of that type.
Processing of child blocks is in terms of nested JSON objects and arrays.
If the schema defines one or more _labels_ for the block type, a nested JSON
object or JSON array of objects is required for each labelling level. These
are flattened to a single ordered sequence of object properties using the
same algorithm as for body content as defined above. Each object property
serves as a label value at the corresponding level.
After any labelling levels, the next nested value is either a JSON object
representing a single block body, or a JSON array of JSON objects that each
represent a single block body. Use of an array accommodates the definition
of multiple blocks that have identical type and labels.
Given a schema that calls for a block type named "foo" with no labels, the
following JSON objects are all valid definitions of zero or more blocks of this
type:
```json
{
"foo": {
"child_attr": "baz"
}
}
```
```json
{
"foo": [
{
"child_attr": "baz"
},
{
"child_attr": "boz"
}
]
}
```
```json
{
"foo": []
}
```
The first of these defines a single child block of type "foo". The second
defines _two_ such blocks. The final example shows a degenerate definition
of zero blocks, though generators should prefer to omit the property entirely
in this scenario.
Given a schema that calls for a block type named "foo" with _two_ labels, the
extra label levels must be represented as objects or arrays of objects as in
the following examples:
```json
{
"foo": {
"bar": {
"baz": {
"child_attr": "baz"
},
"boz": {
"child_attr": "baz"
}
},
"boz": {
"baz": {
"child_attr": "baz"
}
}
}
}
```
```json
{
"foo": {
"bar": {
"baz": {
"child_attr": "baz"
},
"boz": {
"child_attr": "baz"
}
},
"boz": {
"baz": [
{
"child_attr": "baz"
},
{
"child_attr": "boz"
}
]
}
}
}
```
```json
{
"foo": [
{
"bar": {
"baz": {
"child_attr": "baz"
},
"boz": {
"child_attr": "baz"
}
}
},
{
"bar": {
"baz": [
{
"child_attr": "baz"
},
{
"child_attr": "boz"
}
]
}
}
]
}
```
```json
{
"foo": {
"bar": {
"baz": {
"child_attr": "baz"
},
"boz": {
"child_attr": "baz"
}
},
"bar": {
"baz": [
{
"child_attr": "baz"
},
{
"child_attr": "boz"
}
]
}
}
}
```
Arrays can be introduced at either the label definition or block body
definition levels to define multiple definitions of the same block type
or labels while preserving order.
A JSON HCL parser _must_ support duplicate definitions of the same property
name within a single object, preserving all of them and the relative ordering
between them. The array-based forms are also required so that JSON HCL
configurations can be produced with JSON producing libraries that are not
able to preserve property definition order and multiple definitions of
the same property.
## Expressions
JSON lacks a native expression syntax, so the HCL JSON syntax instead defines
a mapping for each of the JSON value types, including a special mapping for
strings that allows optional use of arbitrary expressions.
### Objects
When interpreted as an expression, a JSON object represents a value of a HCL
object type.
Each property of the JSON object represents an attribute of the HCL object type.
The property name string given in the JSON input is interpreted as a string
expression as described below, and its result is converted to string as defined
by the syntax-agnostic information model. If such a conversion is not possible,
an error is produced and evaluation fails.
An instance of the constructed object type is then created, whose values
are interpreted by again recursively applying the mapping rules defined in
this section to each of the property values.
If any evaluated property name strings produce null values, an error is
produced and evaluation fails. If any produce _unknown_ values, the _entire
object's_ result is an unknown value of the dynamic pseudo-type, signalling
that the type of the object cannot be determined.
It is an error to define the same property name multiple times within a single
JSON object interpreted as an expression. In full expression mode, this
constraint applies to the name expression results after conversion to string,
rather than the raw string that may contain interpolation expressions.
### Arrays
When interpreted as an expression, a JSON array represents a value of a HCL
tuple type.
Each element of the JSON array represents an element of the HCL tuple type.
The tuple type is constructed by enumerating the JSON array elements, creating
for each an element whose type is the result of recursively applying the
expression mapping rules. Correspondence is preserved between the array element
indices and the tuple element indices.
An instance of the constructed tuple type is then created, whose values are
interpreted by again recursively applying the mapping rules defined in this
section.
### Numbers
When interpreted as an expression, a JSON number represents a HCL number value.
HCL numbers are arbitrary-precision decimal values, so a JSON HCL parser must
be able to translate exactly the value given to a number of corresponding
precision, within the constraints set by the HCL syntax-agnostic information
model.
In practice, off-the-shelf JSON serializers often do not support customizing the
processing of numbers, and instead force processing as 32-bit or 64-bit
floating point values.
A _producer_ of JSON HCL that uses such a serializer can provide numeric values
as JSON strings where they have precision too great for representation in the
serializer's chosen numeric type in situations where the result will be
converted to number (using the standard conversion rules) by a calling
application.
Alternatively, for expressions that are evaluated in full expression mode an
embedded template interpolation can be used to faithfully represent a number,
such as `"${1e150}"`, which will then be evaluated by the underlying HCL native
syntax expression evaluator.
### Boolean Values
The JSON boolean values `true` and `false`, when interpreted as expressions,
represent the corresponding HCL boolean values.
### The Null Value
The JSON value `null`, when interpreted as an expression, represents a
HCL null value of the dynamic pseudo-type.
### Strings
When interpreted as an expression, a JSON string may be interpreted in one of
two ways depending on the evaluation mode.
If evaluating in literal-only mode (as defined by the syntax-agnostic
information model) the literal string is intepreted directly as a HCL string
value, by directly using the exact sequence of unicode characters represented.
Template interpolations and directives MUST NOT be processed in this mode,
allowing any characters that appear as introduction sequences to pass through
literally:
```json
"Hello world! Template sequences like ${ are not intepreted here."
```
When evaluating in full expression mode (again, as defined by the syntax-
agnostic information model) the literal string is instead interpreted as a
_standalone template_ in the HCL Native Syntax. The expression evaluation
result is then the direct result of evaluating that template with the current
variable scope and function table.
```json
"Hello, ${name}! Template sequences are interpreted in full expression mode."
```
In particular the _Template Interpolation Unwrapping_ requirement from the
HCL native syntax specification must be implemented, allowing the use of
single-interpolation templates to represent expressions that would not
otherwise be representable in JSON, such as the following example where
the result must be a number, rather than a string representation of a number:
```json
"${ a + b }"
```
## Static Analysis
The HCL static analysis operations are implemented for JSON values that
represent expressions, as described in the following sections.
Due to the limited expressive power of the JSON syntax alone, use of these
static analyses functions rather than normal expression evaluation is used
as additional context for how a JSON value is to be interpreted, which means
that static analyses can result in a different interpretation of a given
expression than normal evaluation.
### Static List
An expression interpreted as a static list must be a JSON array. Each of the
values in the array is interpreted as an expression and returned.
### Static Map
An expression interpreted as a static map must be a JSON object. Each of the
key/value pairs in the object is presented as a pair of expressions. Since
object property names are always strings, evaluating the key expression with
a non-`nil` evaluation context will evaluate any template sequences given
in the property name.
### Static Call
An expression interpreted as a static call must be a string. The content of
the string is interpreted as a native syntax expression (not a _template_,
unlike normal evaluation) and then the static call analysis is delegated to
that expression.
If the original expression is not a string or its contents cannot be parsed
as a native syntax expression then static call analysis is not supported.
### Static Traversal
An expression interpreted as a static traversal must be a string. The content
of the string is interpreted as a native syntax expression (not a _template_,
unlike normal evaluation) and then static traversal analysis is delegated
to that expression.
If the original expression is not a string or its contents cannot be parsed
as a native syntax expression then static call analysis is not supported.

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save