Merge pull request #192 from vanstee/hcl2-with-interpolation

Upgrade to hcl2 to support variables and functions
pull/270/head v0.4.0
Tõnis Tiigi 5 years ago committed by GitHub
commit 205165bec5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -614,6 +614,128 @@ target "db" {
Complete list of valid target fields:
args, cache-from, cache-to, context, dockerfile, inherits, labels, no-cache, output, platform, pull, secrets, ssh, tags, target
#### HCL variables and functions
Similar to how Terraform provides a way to [define variables](https://www.terraform.io/docs/configuration/variables.html#declaring-an-input-variable), the HCL file format also supports variable block definitions. These can be used to define variables with values provided by the current environment or a default value when unset.
Example of using interpolation to tag an image with the git sha:
```
$ cat <<'EOF' > docker-bake.hcl
variable "TAG" {
default = "latest"
}
group "default" {
targets = ["webapp"]
}
target "webapp" {
tags = ["docker.io/username/webapp:${TAG}"]
}
EOF
$ docker buildx bake --print webapp
{
"target": {
"webapp": {
"context": ".",
"dockerfile": "Dockerfile",
"tags": [
"docker.io/username/webapp:latest"
]
}
}
}
$ TAG=$(git rev-parse --short HEAD) docker buildx bake --print webapp
{
"target": {
"webapp": {
"context": ".",
"dockerfile": "Dockerfile",
"tags": [
"docker.io/username/webapp:985e9e9"
]
}
}
}
```
A [set of generally useful functions](https://github.com/docker/buildx/blob/master/bake/hcl.go#L19-L65) provided by [go-cty](https://github.com/zclconf/go-cty/tree/master/cty/function/stdlib) are avaialble for use in HCL files. In addition, [user defined functions](https://github.com/hashicorp/hcl/tree/hcl2/ext/userfunc) are also supported.
Example of using the `add` function:
```
$ cat <<'EOF' > docker-bake.hcl
variable "TAG" {
default = "latest"
}
group "default" {
targets = ["webapp"]
}
target "webapp" {
args = {
buildno = "${add(123, 1)}"
}
}
EOF
$ docker buildx bake --print webapp
{
"target": {
"webapp": {
"context": ".",
"dockerfile": "Dockerfile",
"args": {
"buildno": "124"
}
}
}
}
```
Example of defining an `increment` function:
```
$ cat <<'EOF' > docker-bake.hcl
function "increment" {
params = [number]
result = number + 1
}
group "default" {
targets = ["webapp"]
}
target "webapp" {
args = {
buildno = "${increment(123)}"
}
}
EOF
$ docker buildx bake --print webapp
{
"target": {
"webapp": {
"context": ".",
"dockerfile": "Dockerfile",
"args": {
"buildno": "124"
}
}
}
}
```
### `buildx imagetools create [OPTIONS] [SOURCE] [SOURCE...]`
Imagetools contains commands for working with manifest lists in the registry. These commands are useful for inspecting multi-platform build results.

@ -11,11 +11,12 @@ import (
"github.com/docker/buildx/build"
"github.com/docker/buildx/util/platformutil"
"github.com/docker/docker/pkg/urlutil"
hcl "github.com/hashicorp/hcl/v2"
"github.com/moby/buildkit/session/auth/authprovider"
"github.com/pkg/errors"
)
func ReadTargets(ctx context.Context, files, targets, overrides []string) (map[string]Target, error) {
func ReadTargets(ctx context.Context, files, targets, overrides []string) (map[string]*Target, error) {
var c Config
for _, f := range files {
cfg, err := ParseFile(f)
@ -28,7 +29,7 @@ func ReadTargets(ctx context.Context, files, targets, overrides []string) (map[s
if err != nil {
return nil, err
}
m := map[string]Target{}
m := map[string]*Target{}
for _, n := range targets {
for _, n := range c.ResolveGroup(n) {
t, err := c.ResolveTarget(n, o)
@ -36,7 +37,7 @@ func ReadTargets(ctx context.Context, files, targets, overrides []string) (map[s
return nil, err
}
if t != nil {
m[n] = *t
m[n] = t
}
}
}
@ -55,12 +56,12 @@ func ParseFile(fn string) (*Config, error) {
}
if strings.HasSuffix(fnl, ".json") || strings.HasSuffix(fnl, ".hcl") {
return ParseHCL(dt)
return ParseHCL(dt, fn)
}
cfg, err := ParseCompose(dt)
if err != nil {
cfg, err2 := ParseHCL(dt)
cfg, err2 := ParseHCL(dt, fn)
if err2 != nil {
return nil, errors.Errorf("failed to parse %s: parsing yaml: %s, parsing hcl: %s", fn, err.Error(), err2.Error())
}
@ -70,55 +71,78 @@ func ParseFile(fn string) (*Config, error) {
}
type Config struct {
Group map[string]Group
Target map[string]Target
Variables []*Variable `json:"-" hcl:"variable,block"`
Groups []*Group `json:"groups" hcl:"group,block"`
Targets []*Target `json:"targets" hcl:"target,block"`
Remain hcl.Body `json:"-" hcl:",remain"`
}
func mergeConfig(c1, c2 Config) Config {
for k, g := range c2.Group {
if c1.Group == nil {
c1.Group = map[string]Group{}
if c1.Groups == nil {
c1.Groups = []*Group{}
}
for _, g2 := range c2.Groups {
var g1 *Group
for _, g := range c1.Groups {
if g2.Name == g.Name {
g1 = g
break
}
}
if g1, exists := c1.Group[k]; exists {
nextTarget:
for _, t := range g.Targets {
for _, t2 := range g1.Targets {
if t == t2 {
continue nextTarget
}
if g1 == nil {
c1.Groups = append(c1.Groups, g2)
continue
}
nextTarget:
for _, t2 := range g2.Targets {
for _, t1 := range g1.Targets {
if t1 == t2 {
continue nextTarget
}
g1.Targets = append(g1.Targets, t)
}
c1.Group[k] = g1
} else {
c1.Group[k] = g
g1.Targets = append(g1.Targets, t2)
}
c1.Groups = append(c1.Groups, g1)
}
if c1.Targets == nil {
c1.Targets = []*Target{}
}
for k, t := range c2.Target {
if c1.Target == nil {
c1.Target = map[string]Target{}
for _, t2 := range c2.Targets {
var t1 *Target
for _, t := range c1.Targets {
if t2.Name == t.Name {
t1 = t
break
}
}
if base, ok := c1.Target[k]; ok {
t = merge(base, t)
if t1 != nil {
t2 = merge(t1, t2)
}
c1.Target[k] = t
c1.Targets = append(c1.Targets, t2)
}
return c1
}
func (c Config) expandTargets(pattern string) ([]string, error) {
if _, ok := c.Target[pattern]; ok {
return []string{pattern}, nil
for _, target := range c.Targets {
if target.Name == pattern {
return []string{pattern}, nil
}
}
var names []string
for name := range c.Target {
ok, err := path.Match(pattern, name)
for _, target := range c.Targets {
ok, err := path.Match(pattern, target.Name)
if err != nil {
return nil, errors.Wrapf(err, "could not match targets with '%s'", pattern)
}
if ok {
names = append(names, name)
names = append(names, target.Name)
}
}
if len(names) == 0 {
@ -127,8 +151,8 @@ func (c Config) expandTargets(pattern string) ([]string, error) {
return names, nil
}
func (c Config) newOverrides(v []string) (map[string]Target, error) {
m := map[string]Target{}
func (c Config) newOverrides(v []string) (map[string]*Target, error) {
m := map[string]*Target{}
for _, v := range v {
parts := strings.SplitN(v, "=", 2)
@ -148,7 +172,10 @@ func (c Config) newOverrides(v []string) (map[string]Target, error) {
}
for _, name := range names {
t := m[name]
t, ok := m[name]
if !ok {
t = &Target{}
}
switch keys[1] {
case "context":
@ -224,8 +251,14 @@ func (c Config) group(name string, visited map[string]struct{}) []string {
if _, ok := visited[name]; ok {
return nil
}
g, ok := c.Group[name]
if !ok {
var g *Group
for _, group := range c.Groups {
if group.Name == name {
g = group
break
}
}
if g == nil {
return []string{name}
}
visited[name] = struct{}{}
@ -236,7 +269,7 @@ func (c Config) group(name string, visited map[string]struct{}) []string {
return targets
}
func (c Config) ResolveTarget(name string, overrides map[string]Target) (*Target, error) {
func (c Config) ResolveTarget(name string, overrides map[string]*Target) (*Target, error) {
t, err := c.target(name, map[string]struct{}{}, overrides)
if err != nil {
return nil, err
@ -252,54 +285,71 @@ func (c Config) ResolveTarget(name string, overrides map[string]Target) (*Target
return t, nil
}
func (c Config) target(name string, visited map[string]struct{}, overrides map[string]Target) (*Target, error) {
func (c Config) target(name string, visited map[string]struct{}, overrides map[string]*Target) (*Target, error) {
if _, ok := visited[name]; ok {
return nil, nil
}
visited[name] = struct{}{}
t, ok := c.Target[name]
if !ok {
var t *Target
for _, target := range c.Targets {
if target.Name == name {
t = target
break
}
}
if t == nil {
return nil, errors.Errorf("failed to find target %s", name)
}
var tt Target
tt := &Target{}
for _, name := range t.Inherits {
t, err := c.target(name, visited, overrides)
if err != nil {
return nil, err
}
if t != nil {
tt = merge(tt, *t)
tt = merge(tt, t)
}
}
t.Inherits = nil
tt = merge(merge(merge(defaultTarget(), tt), t), overrides[name])
tt = merge(merge(defaultTarget(), tt), t)
if override, ok := overrides[name]; ok {
tt = merge(tt, override)
}
tt.normalize()
return &tt, nil
return tt, nil
}
type Variable struct {
Name string `json:"-" hcl:"name,label"`
Default string `json:"default,omitempty" hcl:"default,optional"`
}
type Group struct {
Targets []string
Name string `json:"-" hcl:"name,label"`
Targets []string `json:"targets" hcl:"targets"`
// Target // TODO?
}
type Target struct {
Name string `json:"-" hcl:"name,label"`
// Inherits is the only field that cannot be overridden with --set
Inherits []string `json:"inherits,omitempty" hcl:"inherits,omitempty"`
Context *string `json:"context,omitempty" hcl:"context,omitempty"`
Dockerfile *string `json:"dockerfile,omitempty" hcl:"dockerfile,omitempty"`
Args map[string]string `json:"args,omitempty" hcl:"args,omitempty"`
Labels map[string]string `json:"labels,omitempty" hcl:"labels,omitempty"`
Tags []string `json:"tags,omitempty" hcl:"tags,omitempty"`
CacheFrom []string `json:"cache-from,omitempty" hcl:"cache-from,omitempty"`
CacheTo []string `json:"cache-to,omitempty" hcl:"cache-to,omitempty"`
Target *string `json:"target,omitempty" hcl:"target,omitempty"`
Secrets []string `json:"secret,omitempty" hcl:"secret,omitempty"`
SSH []string `json:"ssh,omitempty" hcl:"ssh,omitempty"`
Platforms []string `json:"platforms,omitempty" hcl:"platforms,omitempty"`
Outputs []string `json:"output,omitempty" hcl:"output,omitempty"`
Pull bool `json:"pull,omitempty": hcl:"pull,omitempty"`
NoCache bool `json:"no-cache,omitempty": hcl:"no-cache,omitempty"`
Inherits []string `json:"inherits,omitempty" hcl:"inherits,optional"`
Context *string `json:"context,omitempty" hcl:"context,optional"`
Dockerfile *string `json:"dockerfile,omitempty" hcl:"dockerfile,optional"`
Args map[string]string `json:"args,omitempty" hcl:"args,optional"`
Labels map[string]string `json:"labels,omitempty" hcl:"labels,optional"`
Tags []string `json:"tags,omitempty" hcl:"tags,optional"`
CacheFrom []string `json:"cache-from,omitempty" hcl:"cache-from,optional"`
CacheTo []string `json:"cache-to,omitempty" hcl:"cache-to,optional"`
Target *string `json:"target,omitempty" hcl:"target,optional"`
Secrets []string `json:"secret,omitempty" hcl:"secret,optional"`
SSH []string `json:"ssh,omitempty" hcl:"ssh,optional"`
Platforms []string `json:"platforms,omitempty" hcl:"platforms,optional"`
Outputs []string `json:"output,omitempty" hcl:"output,optional"`
Pull bool `json:"pull,omitempty": hcl:"pull,optional"`
NoCache bool `json:"no-cache,omitempty": hcl:"no-cache,optional"`
// IMPORTANT: if you add more fields here, do not forget to update newOverrides and README.
}
@ -313,7 +363,7 @@ func (t *Target) normalize() {
t.Outputs = removeDupes(t.Outputs)
}
func TargetsToBuildOpt(m map[string]Target) (map[string]build.Options, error) {
func TargetsToBuildOpt(m map[string]*Target) (map[string]build.Options, error) {
m2 := make(map[string]build.Options, len(m))
for k, v := range m {
bo, err := toBuildOpt(v)
@ -325,7 +375,7 @@ func TargetsToBuildOpt(m map[string]Target) (map[string]build.Options, error) {
return m2, nil
}
func toBuildOpt(t Target) (*build.Options, error) {
func toBuildOpt(t *Target) (*build.Options, error) {
if v := t.Context; v != nil && *v == "-" {
return nil, errors.Errorf("context from stdin not allowed in bake")
}
@ -403,11 +453,11 @@ func toBuildOpt(t Target) (*build.Options, error) {
return bo, nil
}
func defaultTarget() Target {
return Target{}
func defaultTarget() *Target {
return &Target{}
}
func merge(t1, t2 Target) Target {
func merge(t1, t2 *Target) *Target {
if t2.Context != nil {
t1.Context = t2.Context
}

@ -19,7 +19,7 @@ func TestReadTargets(t *testing.T) {
fp := filepath.Join(tmpdir, "config.hcl")
err = ioutil.WriteFile(fp, []byte(`
target "webDEP" {
args {
args = {
VAR_INHERITED = "webDEP"
VAR_BOTH = "webDEP"
}
@ -27,7 +27,7 @@ target "webDEP" {
target "webapp" {
dockerfile = "Dockerfile.webapp"
args {
args = {
VAR_BOTH = "webapp"
}
inherits = ["webDEP"]
@ -108,7 +108,7 @@ target "webapp" {
t.Run("PatternOverride", func(t *testing.T) {
// same check for two cases
multiTargetCheck := func(t *testing.T, m map[string]Target, err error) {
multiTargetCheck := func(t *testing.T, m map[string]*Target, err error) {
require.NoError(t, err)
require.Equal(t, 2, len(m))
require.Equal(t, "foo", *m["webapp"].Dockerfile)
@ -121,7 +121,7 @@ target "webapp" {
name string
targets []string
overrides []string
check func(*testing.T, map[string]Target, error)
check func(*testing.T, map[string]*Target, error)
}{
{
name: "multi target single pattern",
@ -139,7 +139,7 @@ target "webapp" {
name: "single target",
targets: []string{"webapp"},
overrides: []string{"web*.dockerfile=foo"},
check: func(t *testing.T, m map[string]Target, err error) {
check: func(t *testing.T, m map[string]*Target, err error) {
require.NoError(t, err)
require.Equal(t, 1, len(m))
require.Equal(t, "foo", *m["webapp"].Dockerfile)
@ -150,7 +150,7 @@ target "webapp" {
name: "nomatch",
targets: []string{"webapp"},
overrides: []string{"nomatch*.dockerfile=foo"},
check: func(t *testing.T, m map[string]Target, err error) {
check: func(t *testing.T, m map[string]*Target, err error) {
// NOTE: I am unsure whether failing to match should always error out
// instead of simply skipping that override.
// Let's enforce the error and we can relax it later if users complain.

@ -46,10 +46,10 @@ func ParseCompose(dt []byte) (*Config, error) {
var c Config
var zeroBuildConfig composetypes.BuildConfig
if len(cfg.Services) > 0 {
c.Group = map[string]Group{}
c.Target = map[string]Target{}
c.Groups = []*Group{}
c.Targets = []*Target{}
var g Group
g := &Group{Name: "default"}
for _, s := range cfg.Services {
@ -72,7 +72,8 @@ func ParseCompose(dt []byte) (*Config, error) {
dockerfilePathP = &dockerfilePath
}
g.Targets = append(g.Targets, s.Name)
t := Target{
t := &Target{
Name: s.Name,
Context: contextPathP,
Dockerfile: dockerfilePathP,
Labels: s.Build.Labels,
@ -87,9 +88,9 @@ func ParseCompose(dt []byte) (*Config, error) {
if s.Image != "" {
t.Tags = []string{s.Image}
}
c.Target[s.Name] = t
c.Targets = append(c.Targets, t)
}
c.Group["default"] = g
c.Groups = append(c.Groups, g)
}

@ -27,17 +27,23 @@ services:
c, err := ParseCompose(dt)
require.NoError(t, err)
require.Equal(t, 1, len(c.Group))
sort.Strings(c.Group["default"].Targets)
require.Equal(t, []string{"db", "webapp"}, c.Group["default"].Targets)
require.Equal(t, 2, len(c.Target))
require.Equal(t, "./db", *c.Target["db"].Context)
require.Equal(t, "./dir", *c.Target["webapp"].Context)
require.Equal(t, "Dockerfile-alternate", *c.Target["webapp"].Dockerfile)
require.Equal(t, 1, len(c.Target["webapp"].Args))
require.Equal(t, "123", c.Target["webapp"].Args["buildno"])
require.Equal(t, 1, len(c.Groups))
require.Equal(t, c.Groups[0].Name, "default")
sort.Strings(c.Groups[0].Targets)
require.Equal(t, []string{"db", "webapp"}, c.Groups[0].Targets)
require.Equal(t, 2, len(c.Targets))
sort.Slice(c.Targets, func(i, j int) bool {
return c.Targets[i].Name < c.Targets[j].Name
})
require.Equal(t, "db", c.Targets[0].Name)
require.Equal(t, "./db", *c.Targets[0].Context)
require.Equal(t, "webapp", c.Targets[1].Name)
require.Equal(t, "./dir", *c.Targets[1].Context)
require.Equal(t, "Dockerfile-alternate", *c.Targets[1].Dockerfile)
require.Equal(t, 1, len(c.Targets[1].Args))
require.Equal(t, "123", c.Targets[1].Args["buildno"])
}
func TestNoBuildOutOfTreeService(t *testing.T) {
@ -52,7 +58,7 @@ services:
`)
c, err := ParseCompose(dt)
require.NoError(t, err)
require.Equal(t, 1, len(c.Group))
require.Equal(t, 1, len(c.Groups))
}
func TestParseComposeTarget(t *testing.T) {
@ -73,8 +79,14 @@ services:
c, err := ParseCompose(dt)
require.NoError(t, err)
require.Equal(t, "db", *c.Target["db"].Target)
require.Equal(t, "webapp", *c.Target["webapp"].Target)
require.Equal(t, 2, len(c.Targets))
sort.Slice(c.Targets, func(i, j int) bool {
return c.Targets[i].Name < c.Targets[j].Name
})
require.Equal(t, "db", c.Targets[0].Name)
require.Equal(t, "db", *c.Targets[0].Target)
require.Equal(t, "webapp", c.Targets[1].Name)
require.Equal(t, "webapp", *c.Targets[1].Target)
}
func TestComposeBuildWithoutContext(t *testing.T) {
@ -93,8 +105,14 @@ services:
c, err := ParseCompose(dt)
require.NoError(t, err)
require.Equal(t, "db", *c.Target["db"].Target)
require.Equal(t, "webapp", *c.Target["webapp"].Target)
require.Equal(t, 2, len(c.Targets))
sort.Slice(c.Targets, func(i, j int) bool {
return c.Targets[i].Name < c.Targets[j].Name
})
require.Equal(t, c.Targets[0].Name, "db")
require.Equal(t, "db", *c.Targets[0].Target)
require.Equal(t, c.Targets[1].Name, "webapp")
require.Equal(t, "webapp", *c.Targets[1].Target)
}
func TestBogusCompose(t *testing.T) {

@ -1,10 +1,132 @@
package bake
import "github.com/hashicorp/hcl"
import (
"os"
"strings"
hcl "github.com/hashicorp/hcl/v2"
"github.com/hashicorp/hcl/v2/ext/userfunc"
"github.com/hashicorp/hcl/v2/hclsimple"
"github.com/hashicorp/hcl/v2/hclsyntax"
"github.com/zclconf/go-cty/cty"
"github.com/zclconf/go-cty/cty/function"
"github.com/zclconf/go-cty/cty/function/stdlib"
)
// Collection of generally useful functions in cty-using applications, which
// HCL supports. These functions are available for use in HCL files.
var (
stdlibFunctions = map[string]function.Function{
"absolute": stdlib.AbsoluteFunc,
"add": stdlib.AddFunc,
"and": stdlib.AndFunc,
"byteslen": stdlib.BytesLenFunc,
"bytesslice": stdlib.BytesSliceFunc,
"csvdecode": stdlib.CSVDecodeFunc,
"coalesce": stdlib.CoalesceFunc,
"concat": stdlib.ConcatFunc,
"divide": stdlib.DivideFunc,
"equal": stdlib.EqualFunc,
"formatdate": stdlib.FormatDateFunc,
"format": stdlib.FormatFunc,
"formatlist": stdlib.FormatListFunc,
"greaterthan": stdlib.GreaterThanFunc,
"greaterthanorequalto": stdlib.GreaterThanOrEqualToFunc,
"hasindex": stdlib.HasIndexFunc,
"index": stdlib.IndexFunc,
"int": stdlib.IntFunc,
"jsondecode": stdlib.JSONDecodeFunc,
"jsonencode": stdlib.JSONEncodeFunc,
"length": stdlib.LengthFunc,
"lessthan": stdlib.LessThanFunc,
"lessthanorequalto": stdlib.LessThanOrEqualToFunc,
"lower": stdlib.LowerFunc,
"max": stdlib.MaxFunc,
"min": stdlib.MinFunc,
"modulo": stdlib.ModuloFunc,
"multiply": stdlib.MultiplyFunc,
"negate": stdlib.NegateFunc,
"notequal": stdlib.NotEqualFunc,
"not": stdlib.NotFunc,
"or": stdlib.OrFunc,
"range": stdlib.RangeFunc,
"regexall": stdlib.RegexAllFunc,
"regex": stdlib.RegexFunc,
"reverse": stdlib.ReverseFunc,
"sethaselement": stdlib.SetHasElementFunc,
"setintersection": stdlib.SetIntersectionFunc,
"setsubtract": stdlib.SetSubtractFunc,
"setsymmetricdifference": stdlib.SetSymmetricDifferenceFunc,
"setunion": stdlib.SetUnionFunc,
"strlen": stdlib.StrlenFunc,
"substr": stdlib.SubstrFunc,
"subtract": stdlib.SubtractFunc,
"upper": stdlib.UpperFunc,
}
)
// Used in the first pass of decoding instead of the Config struct to disallow
// interpolation while parsing variable blocks.
type staticConfig struct {
Variables []*Variable `hcl:"variable,block"`
Remain hcl.Body `hcl:",remain"`
}
func ParseHCL(dt []byte, fn string) (*Config, error) {
// Decode user defined functions.
file, diags := hclsyntax.ParseConfig(dt, fn, hcl.Pos{Line: 1, Column: 1})
if diags.HasErrors() {
return nil, diags
}
userFunctions, _, diags := userfunc.DecodeUserFunctions(file.Body, "function", func() *hcl.EvalContext {
return &hcl.EvalContext{
Functions: stdlibFunctions,
}
})
if diags.HasErrors() {
return nil, diags
}
var sc staticConfig
// Decode only variable blocks without interpolation.
if err := hclsimple.Decode(fn, dt, nil, &sc); err != nil {
return nil, err
}
// Set all variables to their default value if defined.
variables := make(map[string]cty.Value)
for _, variable := range sc.Variables {
variables[variable.Name] = cty.StringVal(variable.Default)
}
// Override default with values from environment.
for _, env := range os.Environ() {
parts := strings.SplitN(env, "=", 2)
name, value := parts[0], parts[1]
if _, ok := variables[name]; ok {
variables[name] = cty.StringVal(value)
}
}
functions := make(map[string]function.Function)
for k, v := range stdlibFunctions {
functions[k] = v
}
for k, v := range userFunctions {
functions[k] = v
}
ctx := &hcl.EvalContext{
Variables: variables,
Functions: functions,
}
func ParseHCL(dt []byte) (*Config, error) {
var c Config
if err := hcl.Unmarshal(dt, &c); err != nil {
// Decode with variables and functions.
if err := hclsimple.Decode(fn, dt, ctx, &c); err != nil {
return nil, err
}
return &c, nil

@ -1,57 +1,193 @@
package bake
import (
"os"
"testing"
"github.com/stretchr/testify/require"
)
func TestParseHCL(t *testing.T) {
var dt = []byte(`
group "default" {
targets = ["db", "webapp"]
}
t.Parallel()
target "db" {
context = "./db"
tags = ["docker.io/tonistiigi/db"]
}
t.Run("Basic", func(t *testing.T) {
dt := []byte(`
group "default" {
targets = ["db", "webapp"]
}
target "db" {
context = "./db"
tags = ["docker.io/tonistiigi/db"]
}
target "webapp" {
context = "./dir"
dockerfile = "Dockerfile-alternate"
args = {
buildno = "123"
}
}
target "cross" {
platforms = [
"linux/amd64",
"linux/arm64"
]
}
target "webapp-plus" {
inherits = ["webapp", "cross"]
args = {
IAMCROSS = "true"
}
}
`)
c, err := ParseHCL(dt, "docker-bake.hcl")
require.NoError(t, err)
require.Equal(t, 1, len(c.Groups))
require.Equal(t, "default", c.Groups[0].Name)
require.Equal(t, []string{"db", "webapp"}, c.Groups[0].Targets)
target "webapp" {
context = "./dir"
dockerfile = "Dockerfile-alternate"
args = {
buildno = "123"
require.Equal(t, 4, len(c.Targets))
require.Equal(t, c.Targets[0].Name, "db")
require.Equal(t, "./db", *c.Targets[0].Context)
require.Equal(t, c.Targets[1].Name, "webapp")
require.Equal(t, 1, len(c.Targets[1].Args))
require.Equal(t, "123", c.Targets[1].Args["buildno"])
require.Equal(t, c.Targets[2].Name, "cross")
require.Equal(t, 2, len(c.Targets[2].Platforms))
require.Equal(t, []string{"linux/amd64", "linux/arm64"}, c.Targets[2].Platforms)
require.Equal(t, c.Targets[3].Name, "webapp-plus")
require.Equal(t, 1, len(c.Targets[3].Args))
require.Equal(t, map[string]string{"IAMCROSS": "true"}, c.Targets[3].Args)
})
t.Run("WithFunctions", func(t *testing.T) {
dt := []byte(`
group "default" {
targets = ["webapp"]
}
target "webapp" {
args = {
buildno = "${add(123, 1)}"
}
}
}
`)
target "cross" {
platforms = [
"linux/amd64",
"linux/arm64"
]
}
c, err := ParseHCL(dt, "docker-bake.hcl")
require.NoError(t, err)
target "webapp-plus" {
inherits = ["webapp", "cross"]
args = {
IAMCROSS = "true"
require.Equal(t, 1, len(c.Groups))
require.Equal(t, "default", c.Groups[0].Name)
require.Equal(t, []string{"webapp"}, c.Groups[0].Targets)
require.Equal(t, 1, len(c.Targets))
require.Equal(t, c.Targets[0].Name, "webapp")
require.Equal(t, "124", c.Targets[0].Args["buildno"])
})
t.Run("WithUserDefinedFunctions", func(t *testing.T) {
dt := []byte(`
function "increment" {
params = [number]
result = number + 1
}
}
`)
c, err := ParseHCL(dt)
require.NoError(t, err)
group "default" {
targets = ["webapp"]
}
target "webapp" {
args = {
buildno = "${increment(123)}"
}
}
`)
require.Equal(t, 1, len(c.Group))
require.Equal(t, []string{"db", "webapp"}, c.Group["default"].Targets)
c, err := ParseHCL(dt, "docker-bake.hcl")
require.NoError(t, err)
require.Equal(t, 4, len(c.Target))
require.Equal(t, "./db", *c.Target["db"].Context)
require.Equal(t, 1, len(c.Groups))
require.Equal(t, "default", c.Groups[0].Name)
require.Equal(t, []string{"webapp"}, c.Groups[0].Targets)
require.Equal(t, 1, len(c.Target["webapp"].Args))
require.Equal(t, "123", c.Target["webapp"].Args["buildno"])
require.Equal(t, 1, len(c.Targets))
require.Equal(t, c.Targets[0].Name, "webapp")
require.Equal(t, "124", c.Targets[0].Args["buildno"])
})
t.Run("WithVariables", func(t *testing.T) {
dt := []byte(`
variable "BUILD_NUMBER" {
default = "123"
}
group "default" {
targets = ["webapp"]
}
target "webapp" {
args = {
buildno = "${BUILD_NUMBER}"
}
}
`)
c, err := ParseHCL(dt, "docker-bake.hcl")
require.NoError(t, err)
require.Equal(t, 1, len(c.Groups))
require.Equal(t, "default", c.Groups[0].Name)
require.Equal(t, []string{"webapp"}, c.Groups[0].Targets)
require.Equal(t, 1, len(c.Targets))
require.Equal(t, c.Targets[0].Name, "webapp")
require.Equal(t, "123", c.Targets[0].Args["buildno"])
os.Setenv("BUILD_NUMBER", "456")
c, err = ParseHCL(dt, "docker-bake.hcl")
require.NoError(t, err)
require.Equal(t, 1, len(c.Groups))
require.Equal(t, "default", c.Groups[0].Name)
require.Equal(t, []string{"webapp"}, c.Groups[0].Targets)
require.Equal(t, 1, len(c.Targets))
require.Equal(t, c.Targets[0].Name, "webapp")
require.Equal(t, "456", c.Targets[0].Args["buildno"])
})
t.Run("WithIncorrectVariables", func(t *testing.T) {
dt := []byte(`
variable "DEFAULT_BUILD_NUMBER" {
default = "1"
}
variable "BUILD_NUMBER" {
default = "${DEFAULT_BUILD_NUMBER}"
}
group "default" {
targets = ["webapp"]
}
target "webapp" {
args = {
buildno = "${BUILD_NUMBER}"
}
}
`)
require.Equal(t, 2, len(c.Target["cross"].Platforms))
require.Equal(t, []string{"linux/amd64", "linux/arm64"}, c.Target["cross"].Platforms)
_, err := ParseHCL(dt, "docker-bake.hcl")
require.Error(t, err)
require.Contains(t, err.Error(), "docker-bake.hcl:7,17-37: Variables not allowed; Variables may not be used here.")
})
}

@ -59,7 +59,7 @@ func runBake(dockerCli command.Cli, targets []string, in bakeOptions) error {
}
if in.printOnly {
dt, err := json.MarshalIndent(map[string]map[string]bake.Target{"target": m}, "", " ")
dt, err := json.MarshalIndent(map[string]map[string]*bake.Target{"target": m}, "", " ")
if err != nil {
return err
}

@ -31,7 +31,7 @@ require (
github.com/googleapis/gnostic v0.3.1 // indirect
github.com/gophercloud/gophercloud v0.6.0 // indirect
github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed // indirect
github.com/hashicorp/hcl v1.0.0
github.com/hashicorp/hcl/v2 v2.4.0
github.com/jinzhu/gorm v1.9.2 // indirect
github.com/jinzhu/inflection v0.0.0-20180308033659-04140366298a // indirect
github.com/jinzhu/now v1.0.0 // indirect
@ -55,6 +55,7 @@ require (
github.com/theupdateframework/notary v0.6.1 // indirect
github.com/tonistiigi/units v0.0.0-20180711220420-6950e57a87ea
github.com/xlab/handysort v0.0.0-20150421192137-fb3537ed64a1 // indirect
github.com/zclconf/go-cty v1.2.0
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e
gopkg.in/dancannon/gorethink.v3 v3.0.5 // indirect
gopkg.in/fatih/pool.v2 v2.0.0 // indirect

@ -19,6 +19,7 @@ github.com/Azure/go-autorest/tracing v0.5.0/go.mod h1:r/s2XiOKccPW3HrqB+W0TQzfbt
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/Microsoft/go-winio v0.4.11/go.mod h1:VhR8bwka0BXejwEJY73c50VrPtXAaKcyvVC4A4RozmA=
github.com/Microsoft/go-winio v0.4.14 h1:+hMXMk01us9KgxGb7ftKQt2Xpf5hH/yky+TDA+qxleU=
github.com/Microsoft/go-winio v0.4.14/go.mod h1:qXqCSQ3Xa7+6tgxaGTIe4Kpcdsi+P8jBhyzoq1bpyYA=
github.com/Microsoft/go-winio v0.4.15-0.20190919025122-fc70bd9a86b5 h1:ygIc8M6trr62pF5DucadTWGdEB4mEyvzi0e2nbcmcyA=
github.com/Microsoft/go-winio v0.4.15-0.20190919025122-fc70bd9a86b5/go.mod h1:tTuCMEN+UleMWgg9dVx4Hu52b1bJo+59jBh3ajtinzw=
@ -30,9 +31,16 @@ github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbt
github.com/PuerkitoBio/urlesc v0.0.0-20160726150825-5bd2802263f2/go.mod h1:uGdkoq3SwY9Y+13GIhn11/XLaGBb4BfwItxLd5jeuXE=
github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d h1:UrqY+r/OJnIp5u0s1SbQ8dVfLCZJsnvazdBP5hS4iRs=
github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d/go.mod h1:HI8ITrYtUY+O+ZhtlqUnD8+KwNPOyugEhfP9fdUIaEQ=
github.com/agext/levenshtein v1.2.1 h1:QmvMAjj2aEICytGiWzmxoE0x2KZvE0fvmqMOfy2tjT8=
github.com/agext/levenshtein v1.2.1/go.mod h1:JEDfjyjHDjOF/1e4FlBE/PkbqA9OfWu2ki2W0IB5558=
github.com/agl/ed25519 v0.0.0-20170116200512-5312a6153412 h1:w1UutsfOrms1J05zt7ISrnJIXKzwaspym5BTKGx93EI=
github.com/agl/ed25519 v0.0.0-20170116200512-5312a6153412/go.mod h1:WPjqKcmVOxf0XSf3YxCJs6N6AOSrOx3obionmG7T0y0=
github.com/apache/thrift v0.0.0-20161221203622-b2a4d4ae21c7/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
github.com/apparentlymart/go-dump v0.0.0-20180507223929-23540a00eaa3/go.mod h1:oL81AME2rN47vu18xqj1S1jPIPuN7afo62yKTNn3XMM=
github.com/apparentlymart/go-textseg v1.0.0 h1:rRmlIsPEEhUTIKQb7T++Nz/A5Q6C9IuX2wFoYVvnCs0=
github.com/apparentlymart/go-textseg v1.0.0/go.mod h1:z96Txxhf3xSFMPmb5X/1W05FF/Nj9VFpLOpjS5yuumk=
github.com/apparentlymart/go-textseg/v12 v12.0.0 h1:bNEQyAGak9tojivJNkoqWErVCQbjdL7GzRt3F8NvfJ0=
github.com/apparentlymart/go-textseg/v12 v12.0.0/go.mod h1:S/4uRK2UtaQttw1GenVJEynmyUenKwP++x/+DdGV/Ec=
github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5doyWs3UAsr3K4I6qtAmlQcZDesFNEHPZAzj8=
github.com/aws/aws-sdk-go v1.15.11/go.mod h1:mFuSZ37Z9YOHbQEwBWztmVzqXrEkub65tZoCYDt7FT0=
github.com/beorn7/perks v0.0.0-20160804104726-4c0e84591b9a/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
@ -76,14 +84,12 @@ github.com/containerd/continuity v0.0.0-20181001140422-bd77b46c8352/go.mod h1:GL
github.com/containerd/continuity v0.0.0-20190426062206-aaeac12a7ffc/go.mod h1:GL3xCUCBDV3CZiTSEKksMWbLE66hEyuu9qyDOOqM47Y=
github.com/containerd/continuity v0.0.0-20200107194136-26c1120b8d41 h1:kIFnQBO7rQ0XkMe6xEwbybYHBEaWmh/f++laI6Emt7M=
github.com/containerd/continuity v0.0.0-20200107194136-26c1120b8d41/go.mod h1:Dq467ZllaHgAtVp4p1xUQWBrFXR9s/wyoTpG8zOJGkY=
github.com/containerd/fifo v0.0.0-20190226154929-a9fb20d87448 h1:PUD50EuOMkXVcpBIA/R95d56duJR9VxhwncsFbNnxW4=
github.com/containerd/fifo v0.0.0-20190226154929-a9fb20d87448/go.mod h1:ODA38xgv3Kuk8dQz2ZQXpnv/UZZUHUCL7pnLehbXgQI=
github.com/containerd/fifo v0.0.0-20191213151349-ff969a566b00 h1:lsjC5ENBl+Zgf38+B0ymougXFp0BaubeIVETltYZTQw=
github.com/containerd/fifo v0.0.0-20191213151349-ff969a566b00/go.mod h1:jPQ2IAeZRCYxpS/Cm1495vGFww6ecHmMk1YJH2Q5ln0=
github.com/containerd/go-cni v0.0.0-20200107172653-c154a49e2c75/go.mod h1:0mg8r6FCdbxvLDqCXwAx2rO+KA37QICjKL8+wHOG5OE=
github.com/containerd/go-runc v0.0.0-20180907222934-5a6d9f37cfa3/go.mod h1:IV7qH3hrUgRmyYrtgEeGWJfWbgcHL9CSRruz2Vqcph0=
github.com/containerd/go-runc v0.0.0-20200220073739-7016d3ce2328/go.mod h1:PpyHrqVs8FTi9vpyHwPwiNEGaACDxT/N/pLcvMSRA9g=
github.com/containerd/ttrpc v0.0.0-20190828154514-0e0f228740de h1:dlfGmNcE3jDAecLqwKPMNX6nk2qh1c1Vg1/YTzpOOF4=
github.com/containerd/ttrpc v0.0.0-20190828154514-0e0f228740de/go.mod h1:PvCDdDGpgqzQIzDW1TphrGLssLDZp2GuS+X5DkEJB8o=
github.com/containerd/ttrpc v0.0.0-20191028202541-4f1b8fe65a5c/go.mod h1:LPm1u0xBw8r8NOKoOdNMeVHSawSsltak+Ihv+etqsE8=
github.com/containerd/ttrpc v0.0.0-20200121165050-0be804eadb15 h1:+jgiLE5QylzgADj0Yldb4id1NQNRrDOROj7KDvY9PEc=
@ -169,6 +175,8 @@ github.com/go-openapi/spec v0.0.0-20160808142527-6aced65f8501/go.mod h1:J8+jY1nA
github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dpr1UfpPtxFw+EFuQ41HhCWZfha5jSVRG7C7I=
github.com/go-sql-driver/mysql v1.4.1 h1:g24URVg0OFbNUTx9qqY1IRZ9D9z3iPyi5zKhQZpNwpA=
github.com/go-sql-driver/mysql v1.4.1/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w=
github.com/go-test/deep v1.0.3 h1:ZrJSEWsXzPOxaZnFteGEfooLba+ju3FYIbOrS+rQd68=
github.com/go-test/deep v1.0.3/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA=
github.com/godbus/dbus v0.0.0-20190422162347-ade71ed3457e h1:BWhy2j3IXJhjCbC68FptL43tDKIq8FladmaTs3Xs7Z8=
github.com/godbus/dbus v0.0.0-20190422162347-ade71ed3457e/go.mod h1:bBOAhwG1umN6/6ZUMtDFBMQR8jRg9O75tm9K00oMsK4=
github.com/godbus/dbus/v5 v5.0.3 h1:ZqHaoEF7TBzh4jzPmqVhE/5A1z9of6orkAe5uHoAeME=
@ -191,6 +199,7 @@ github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4er
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/protobuf v0.0.0-20161109072736-4bd1920723d7/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.1.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
@ -242,9 +251,10 @@ github.com/hashicorp/golang-lru v0.5.1 h1:0hERBMJE1eitiLkihrMvRVBYAkpHzc/J3QdDN+
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/hashicorp/hcl/v2 v2.4.0 h1:xwVa1aj4nCSoAjUnFPBAIfqlzPgSZEVMdkJv/mgj4jY=
github.com/hashicorp/hcl/v2 v2.4.0/go.mod h1:bQTN5mpo+jewjJgh8jr0JUguIi7qPHUF6yIfAEN3jqY=
github.com/hashicorp/uuid v0.0.0-20160311170451-ebb0a03e909c/go.mod h1:fHzc09UnyJyqyW+bFuq864eh+wC7dj65aXmXLRe5to0=
github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU=
github.com/imdario/mergo v0.3.5 h1:JboBksRwiiAJWvIYJVo46AfV+IAIKZpfrSzVKj42R4Q=
github.com/imdario/mergo v0.3.5/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
github.com/imdario/mergo v0.3.7 h1:Y+UAYTZ7gDEuOfhxKWy+dvb5dRQ6rJjFSdX2HZY1/gI=
github.com/imdario/mergo v0.3.7/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
@ -278,6 +288,8 @@ github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORN
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348 h1:MtvEpTB6LX3vkb4ax0b5D2DHbNAUsen0Gx5wZoq3lV4=
github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348/go.mod h1:B69LEHPfb2qLo0BaaOLcbitczOKLWTsrBG9LczfCD4k=
github.com/lib/pq v1.0.0 h1:X5PMW56eZitiTeO7tKzZxFCSpbFZJtkMMooicw2us9A=
github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/magiconair/properties v1.8.0 h1:LLgXmsheXeRoUOBOjtwPQCWIYqM/LU1ayDtDePerRcY=
@ -292,6 +304,8 @@ github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0j
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/miekg/pkcs11 v0.0.0-20190322140431-074fd7a1ed19 h1:UEWeJCqsIp+93IcMCuqA3KFln2LAUd/tDtoItl0bgJM=
github.com/miekg/pkcs11 v0.0.0-20190322140431-074fd7a1ed19/go.mod h1:WCBAbTOdfhHhz7YXujeZMF7owC4tPb1naKFsgfUISjo=
github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7 h1:DpOJ2HYzCv8LZP15IdmG+YdwD2luVPHITV96TkirNBM=
github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo=
github.com/mitchellh/hashstructure v0.0.0-20170609045927-2bca23e0e452/go.mod h1:QjSHrPWS+BGUVBYkbTZWEnOh3G1DutKwClXU/ABz6AQ=
github.com/mitchellh/mapstructure v1.1.2 h1:fmNYVwqnSfB9mZU6OS2O6GsXM+wcskZDuKQzvN1EDeE=
github.com/mitchellh/mapstructure v1.1.2/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y=
@ -372,6 +386,8 @@ github.com/prometheus/procfs v0.0.5/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDa
github.com/rogpeppe/go-charset v0.0.0-20180617210344-2471d30d28b4/go.mod h1:qgYeAmZ5ZIpBWTGllZSQnw97Dj+woV0toclVaRGI8pc=
github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0=
github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ=
github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
github.com/serialx/hashring v0.0.0-20190422032157-8b2912629002 h1:ka9QPuQg2u4LGipiZGsgkg3rJCo4iIUCy75FddM0GRQ=
github.com/serialx/hashring v0.0.0-20190422032157-8b2912629002/go.mod h1:/yeG0My1xr/u+HZrFQ1tOQQQQrOawfyMUH13ai5brBc=
github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc=
@ -396,6 +412,7 @@ github.com/spf13/jwalterweatherman v1.0.0 h1:XHEdyB+EcvlqZamSM4ZOMGlc93t6AcsBEu9
github.com/spf13/jwalterweatherman v1.0.0/go.mod h1:cQK4TGJAtQXfYWX+Ddv3mKDzgVb68N+wFjFa4jdeBTo=
github.com/spf13/pflag v0.0.0-20170130214245-9ff6c6923cff/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/pflag v1.0.1-0.20171106142849-4c012f6dcd95/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/pflag v1.0.2/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/pflag v1.0.3 h1:zPAT6CGy6wXeQ7NtTnaTerfKOsV6V6F8agHXFiazDkg=
github.com/spf13/pflag v1.0.3/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
@ -430,6 +447,7 @@ github.com/urfave/cli v0.0.0-20171014202726-7bc6a0acffa5/go.mod h1:70zkFmudgCuE/
github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0=
github.com/vishvananda/netlink v1.0.0/go.mod h1:+SR5DhBJrl6ZM7CoCKvpw5BKroDKQ+PJqOg65H/2ktk=
github.com/vishvananda/netns v0.0.0-20180720170159-13995c7128cc/go.mod h1:ZjcWmFBXmLKZu9Nxj3WKYEafiSqer2rnvPr0en9UNpI=
github.com/vmihailenco/msgpack v3.3.3+incompatible/go.mod h1:fy3FlTQTDXWkZ7Bh6AcGMlsjHatGryHQYUTf1ShIgkk=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f h1:J9EGpcZtP0E/raorCMxlFGSTBrsSlaDGf3jU/qvAE2c=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0=
@ -442,6 +460,8 @@ github.com/xordataexchange/crypt v0.0.3-0.20170626215501-b2862e3d0a77/go.mod h1:
github.com/yvasiyarov/go-metrics v0.0.0-20140926110328-57bccd1ccd43/go.mod h1:aX5oPXxHm3bOH+xeAttToC8pqch2ScQN/JoXYupl6xs=
github.com/yvasiyarov/gorelic v0.0.0-20141212073537-a9bba5b9ab50/go.mod h1:NUSPSUX/bi6SeDMUh6brw0nXpxHnc96TguQh0+r/ssA=
github.com/yvasiyarov/newrelic_platform_go v0.0.0-20140908184405-b21fdbd4370f/go.mod h1:GlGEuHIJweS1mbCqG+7vt2nvWLzLLnRHbXz5JKd/Qbg=
github.com/zclconf/go-cty v1.2.0 h1:sPHsy7ADcIZQP3vILvTjrh74ZA175TFP5vqiNK1UmlI=
github.com/zclconf/go-cty v1.2.0/go.mod h1:hOPWgoHbaTUnI5k4D2ld+GRpFJSCe6bCM7m1q/N4PQ8=
go.etcd.io/bbolt v1.3.3 h1:MUGmc65QhB3pIlaQ5bB4LwqSj6GIonVJXpZiaKNyaKk=
go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
@ -453,6 +473,8 @@ golang.org/x/crypto v0.0.0-20181203042331-505ab145d0a9/go.mod h1:6SG95UA2DQfeDnf
golang.org/x/crypto v0.0.0-20190211182817-74369b46fc67/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2 h1:VklqNMn3ovrHsnt90PveolxSbWFaJdECFbxSq0Mqo2M=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190426145343-a29dc8fdc734 h1:p/H982KKEjUnLJkM3tt/LemDnOc1GiZL5FCVlORJ5zo=
golang.org/x/crypto v0.0.0-20190426145343-a29dc8fdc734/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190820162420-60c769a6c586/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200128174031-69ecbb4d6d5d/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20200221231518-2aa609cf4a9d h1:1ZiEyfaQIg3Qh0EoqpwAakHVhecoE5wlSg5GjnafJGw=
@ -462,18 +484,20 @@ golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTk
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/net v0.0.0-20170114055629-f2499483f923/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180811021610-c39426892332/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a h1:oWX7TPOiFAMXLq8o0ikBYfCJVlRHBcsciT5bXOrH628=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3 h1:0GoQqolDA55aaLxZyTzK/Y2ePZzZTUrRacwib7cNsYQ=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190619014844-b5b0513f8c1b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20191004110552-13f9640d40b9 h1:rjwSpXsdiK0dV8/Naq3kAw9ymfAeJIyd0upUIElB+lI=
golang.org/x/net v0.0.0-20191004110552-13f9640d40b9/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b h1:0mm1VjtFUOIlE1SbDlwjYaDxZVDP2S5ou6y0gSgXHu8=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
@ -501,6 +525,8 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190502175342-a43fa875dd82 h1:vsphBvatvfbhlb4PO1BYSr9dzugGxJ/SQHoNufZJq1w=
golang.org/x/sys v0.0.0-20190502175342-a43fa875dd82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190514135907-3a4b5fb9f71f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190522044717-8097e1b27ff5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@ -553,14 +579,12 @@ google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRn
google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb h1:i1Ppqkc3WQXikh8bXiwHqAN5Rv3/qDCcRk0/Otx73BY=
google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190522204451-c2c4e71fbf69/go.mod h1:z3L6/3dTEVtUr6QSP8miRzeRqwQOioJ9I66odjN4I7s=
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55 h1:gSJIx1SDwno+2ElGhA4+qG2zF97qiUzTM+rQ0klBOcE=
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20200117163144-32f20d992d24/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20200227132054-3f1135a288c9 h1:Koy0f8zyrEVfIHetH7wjP5mQLUXiqDpubSg8V1fAxqc=
google.golang.org/genproto v0.0.0-20200227132054-3f1135a288c9/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/grpc v0.0.0-20160317175043-d3ddb4469d5a/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.20.1 h1:Hz2g2wirWK7H0qIIhGIqRGTuMwTE8HEKFnDZZ7lm9NU=
google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
google.golang.org/grpc v1.21.0/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
google.golang.org/grpc v1.23.0 h1:AzbTB6ux+okLTzP8Ru1Xs41C303zdcfEht7MQnYJt5A=

@ -0,0 +1,2 @@
README.html
coverage.out

@ -0,0 +1,70 @@
language: go
sudo: false
go:
- 1.8
- 1.7.5
- 1.7.4
- 1.7.3
- 1.7.2
- 1.7.1
- 1.7
- tip
- 1.6.4
- 1.6.3
- 1.6.2
- 1.6.1
- 1.6
- 1.5.4
- 1.5.3
- 1.5.2
- 1.5.1
- 1.5
- 1.4.3
- 1.4.2
- 1.4.1
- 1.4
- 1.3.3
- 1.3.2
- 1.3.1
- 1.3
- 1.2.2
- 1.2.1
- 1.2
- 1.1.2
- 1.1.1
- 1.1
before_install:
- go get github.com/mattn/goveralls
script:
- $HOME/gopath/bin/goveralls -service=travis-ci
notifications:
email:
on_success: never
matrix:
fast_finish: true
allow_failures:
- go: tip
- go: 1.6.4
- go: 1.6.3
- go: 1.6.2
- go: 1.6.1
- go: 1.6
- go: 1.5.4
- go: 1.5.3
- go: 1.5.2
- go: 1.5.1
- go: 1.5
- go: 1.4.3
- go: 1.4.2
- go: 1.4.1
- go: 1.4
- go: 1.3.3
- go: 1.3.2
- go: 1.3.1
- go: 1.3
- go: 1.2.2
- go: 1.2.1
- go: 1.2
- go: 1.1.2
- go: 1.1.1
- go: 1.1

@ -0,0 +1,36 @@
Developer Certificate of Origin
Version 1.1
Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
660 York Street, Suite 102,
San Francisco, CA 94110 USA
Everyone is permitted to copy and distribute verbatim copies of this
license document, but changing it is not allowed.
Developer's Certificate of Origin 1.1
By making a contribution to this project, I certify that:
(a) The contribution was created in whole or in part by me and I
have the right to submit it under the open source license
indicated in the file; or
(b) The contribution is based upon previous work that, to the best
of my knowledge, is covered under an appropriate open source
license and I have the right under that license to submit that
work with modifications, whether created in whole or in part
by me, under the same open source license (unless I am
permitted to submit under a different license), as indicated
in the file; or
(c) The contribution was provided directly to me by some other
person who certified (a), (b) or (c) and I have not modified
it.
(d) I understand and agree that this project and the contribution
are public and that a record of the contribution (including all
personal information I submit with it, including my sign-off) is
maintained indefinitely and may be redistributed consistent with
this project or the open source license(s) involved.

@ -0,0 +1,201 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

@ -0,0 +1 @@
Alex Bucataru <alex@alrux.com> (@AlexBucataru)

@ -0,0 +1,5 @@
Alrux Go EXTensions (AGExt) - package levenshtein
Copyright 2016 ALRUX Inc.
This product includes software developed at ALRUX Inc.
(http://www.alrux.com/).

@ -0,0 +1,38 @@
# A Go package for calculating the Levenshtein distance between two strings
[![Release](https://img.shields.io/github/release/agext/levenshtein.svg?style=flat)](https://github.com/agext/levenshtein/releases/latest)
[![GoDoc](https://img.shields.io/badge/godoc-reference-blue.svg?style=flat)](https://godoc.org/github.com/agext/levenshtein) 
[![Build Status](https://travis-ci.org/agext/levenshtein.svg?branch=master&style=flat)](https://travis-ci.org/agext/levenshtein)
[![Coverage Status](https://coveralls.io/repos/github/agext/levenshtein/badge.svg?style=flat)](https://coveralls.io/github/agext/levenshtein)
[![Go Report Card](https://goreportcard.com/badge/github.com/agext/levenshtein?style=flat)](https://goreportcard.com/report/github.com/agext/levenshtein)
This package implements distance and similarity metrics for strings, based on the Levenshtein measure, in [Go](http://golang.org).
## Project Status
v1.2.1 Stable: Guaranteed no breaking changes to the API in future v1.x releases. Probably safe to use in production, though provided on "AS IS" basis.
This package is being actively maintained. If you encounter any problems or have any suggestions for improvement, please [open an issue](https://github.com/agext/levenshtein/issues). Pull requests are welcome.
## Overview
The Levenshtein `Distance` between two strings is the minimum total cost of edits that would convert the first string into the second. The allowed edit operations are insertions, deletions, and substitutions, all at character (one UTF-8 code point) level. Each operation has a default cost of 1, but each can be assigned its own cost equal to or greater than 0.
A `Distance` of 0 means the two strings are identical, and the higher the value the more different the strings. Since in practice we are interested in finding if the two strings are "close enough", it often does not make sense to continue the calculation once the result is mathematically guaranteed to exceed a desired threshold. Providing this value to the `Distance` function allows it to take a shortcut and return a lower bound instead of an exact cost when the threshold is exceeded.
The `Similarity` function calculates the distance, then converts it into a normalized metric within the range 0..1, with 1 meaning the strings are identical, and 0 that they have nothing in common. A minimum similarity threshold can be provided to speed up the calculation of the metric for strings that are far too dissimilar for the purpose at hand. All values under this threshold are rounded down to 0.
The `Match` function provides a similarity metric, with the same range and meaning as `Similarity`, but with a bonus for string pairs that share a common prefix and have a similarity above a "bonus threshold". It uses the same method as proposed by Winkler for the Jaro distance, and the reasoning behind it is that these string pairs are very likely spelling variations or errors, and they are more closely linked than the edit distance alone would suggest.
The underlying `Calculate` function is also exported, to allow the building of other derivative metrics, if needed.
## Installation
```
go get github.com/agext/levenshtein
```
## License
Package levenshtein is released under the Apache 2.0 license. See the [LICENSE](LICENSE) file for details.

@ -0,0 +1,290 @@
// Copyright 2016 ALRUX Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*
Package levenshtein implements distance and similarity metrics for strings, based on the Levenshtein measure.
The Levenshtein `Distance` between two strings is the minimum total cost of edits that would convert the first string into the second. The allowed edit operations are insertions, deletions, and substitutions, all at character (one UTF-8 code point) level. Each operation has a default cost of 1, but each can be assigned its own cost equal to or greater than 0.
A `Distance` of 0 means the two strings are identical, and the higher the value the more different the strings. Since in practice we are interested in finding if the two strings are "close enough", it often does not make sense to continue the calculation once the result is mathematically guaranteed to exceed a desired threshold. Providing this value to the `Distance` function allows it to take a shortcut and return a lower bound instead of an exact cost when the threshold is exceeded.
The `Similarity` function calculates the distance, then converts it into a normalized metric within the range 0..1, with 1 meaning the strings are identical, and 0 that they have nothing in common. A minimum similarity threshold can be provided to speed up the calculation of the metric for strings that are far too dissimilar for the purpose at hand. All values under this threshold are rounded down to 0.
The `Match` function provides a similarity metric, with the same range and meaning as `Similarity`, but with a bonus for string pairs that share a common prefix and have a similarity above a "bonus threshold". It uses the same method as proposed by Winkler for the Jaro distance, and the reasoning behind it is that these string pairs are very likely spelling variations or errors, and they are more closely linked than the edit distance alone would suggest.
The underlying `Calculate` function is also exported, to allow the building of other derivative metrics, if needed.
*/
package levenshtein
// Calculate determines the Levenshtein distance between two strings, using
// the given costs for each edit operation. It returns the distance along with
// the lengths of the longest common prefix and suffix.
//
// If maxCost is non-zero, the calculation stops as soon as the distance is determined
// to be greater than maxCost. Therefore, any return value higher than maxCost is a
// lower bound for the actual distance.
func Calculate(str1, str2 []rune, maxCost, insCost, subCost, delCost int) (dist, prefixLen, suffixLen int) {
l1, l2 := len(str1), len(str2)
// trim common prefix, if any, as it doesn't affect the distance
for ; prefixLen < l1 && prefixLen < l2; prefixLen++ {
if str1[prefixLen] != str2[prefixLen] {
break
}
}
str1, str2 = str1[prefixLen:], str2[prefixLen:]
l1 -= prefixLen
l2 -= prefixLen
// trim common suffix, if any, as it doesn't affect the distance
for 0 < l1 && 0 < l2 {
if str1[l1-1] != str2[l2-1] {
str1, str2 = str1[:l1], str2[:l2]
break
}
l1--
l2--
suffixLen++
}
// if the first string is empty, the distance is the length of the second string times the cost of insertion
if l1 == 0 {
dist = l2 * insCost
return
}
// if the second string is empty, the distance is the length of the first string times the cost of deletion
if l2 == 0 {
dist = l1 * delCost
return
}
// variables used in inner "for" loops
var y, dy, c, l int
// if maxCost is greater than or equal to the maximum possible distance, it's equivalent to 'unlimited'
if maxCost > 0 {
if subCost < delCost+insCost {
if maxCost >= l1*subCost+(l2-l1)*insCost {
maxCost = 0
}
} else {
if maxCost >= l1*delCost+l2*insCost {
maxCost = 0
}
}
}
if maxCost > 0 {
// prefer the longer string first, to minimize time;
// a swap also transposes the meanings of insertion and deletion.
if l1 < l2 {
str1, str2, l1, l2, insCost, delCost = str2, str1, l2, l1, delCost, insCost
}
// the length differential times cost of deletion is a lower bound for the cost;
// if it is higher than the maxCost, there is no point going into the main calculation.
if dist = (l1 - l2) * delCost; dist > maxCost {
return
}
d := make([]int, l1+1)
// offset and length of d in the current row
doff, dlen := 0, 1
for y, dy = 1, delCost; y <= l1 && dy <= maxCost; dlen++ {
d[y] = dy
y++
dy = y * delCost
}
// fmt.Printf("%q -> %q: init doff=%d dlen=%d d[%d:%d]=%v\n", str1, str2, doff, dlen, doff, doff+dlen, d[doff:doff+dlen])
for x := 0; x < l2; x++ {
dy, d[doff] = d[doff], d[doff]+insCost
for d[doff] > maxCost && dlen > 0 {
if str1[doff] != str2[x] {
dy += subCost
}
doff++
dlen--
if c = d[doff] + insCost; c < dy {
dy = c
}
dy, d[doff] = d[doff], dy
}
for y, l = doff, doff+dlen-1; y < l; dy, d[y] = d[y], dy {
if str1[y] != str2[x] {
dy += subCost
}
if c = d[y] + delCost; c < dy {
dy = c
}
y++
if c = d[y] + insCost; c < dy {
dy = c
}
}
if y < l1 {
if str1[y] != str2[x] {
dy += subCost
}
if c = d[y] + delCost; c < dy {
dy = c
}
for ; dy <= maxCost && y < l1; dy, d[y] = dy+delCost, dy {
y++
dlen++
}
}
// fmt.Printf("%q -> %q: x=%d doff=%d dlen=%d d[%d:%d]=%v\n", str1, str2, x, doff, dlen, doff, doff+dlen, d[doff:doff+dlen])
if dlen == 0 {
dist = maxCost + 1
return
}
}
if doff+dlen-1 < l1 {
dist = maxCost + 1
return
}
dist = d[l1]
} else {
// ToDo: This is O(l1*l2) time and O(min(l1,l2)) space; investigate if it is
// worth to implement diagonal approach - O(l1*(1+dist)) time, up to O(l1*l2) space
// http://www.csse.monash.edu.au/~lloyd/tildeStrings/Alignment/92.IPL.html
// prefer the shorter string first, to minimize space; time is O(l1*l2) anyway;
// a swap also transposes the meanings of insertion and deletion.
if l1 > l2 {
str1, str2, l1, l2, insCost, delCost = str2, str1, l2, l1, delCost, insCost
}
d := make([]int, l1+1)
for y = 1; y <= l1; y++ {
d[y] = y * delCost
}
for x := 0; x < l2; x++ {
dy, d[0] = d[0], d[0]+insCost
for y = 0; y < l1; dy, d[y] = d[y], dy {
if str1[y] != str2[x] {
dy += subCost
}
if c = d[y] + delCost; c < dy {
dy = c
}
y++
if c = d[y] + insCost; c < dy {
dy = c
}
}
}
dist = d[l1]
}
return
}
// Distance returns the Levenshtein distance between str1 and str2, using the
// default or provided cost values. Pass nil for the third argument to use the
// default cost of 1 for all three operations, with no maximum.
func Distance(str1, str2 string, p *Params) int {
if p == nil {
p = defaultParams
}
dist, _, _ := Calculate([]rune(str1), []rune(str2), p.maxCost, p.insCost, p.subCost, p.delCost)
return dist
}
// Similarity returns a score in the range of 0..1 for how similar the two strings are.
// A score of 1 means the strings are identical, and 0 means they have nothing in common.
//
// A nil third argument uses the default cost of 1 for all three operations.
//
// If a non-zero MinScore value is provided in the parameters, scores lower than it
// will be returned as 0.
func Similarity(str1, str2 string, p *Params) float64 {
return Match(str1, str2, p.Clone().BonusThreshold(1.1)) // guaranteed no bonus
}
// Match returns a similarity score adjusted by the same method as proposed by Winkler for
// the Jaro distance - giving a bonus to string pairs that share a common prefix, only if their
// similarity score is already over a threshold.
//
// The score is in the range of 0..1, with 1 meaning the strings are identical,
// and 0 meaning they have nothing in common.
//
// A nil third argument uses the default cost of 1 for all three operations, maximum length of
// common prefix to consider for bonus of 4, scaling factor of 0.1, and bonus threshold of 0.7.
//
// If a non-zero MinScore value is provided in the parameters, scores lower than it
// will be returned as 0.
func Match(str1, str2 string, p *Params) float64 {
s1, s2 := []rune(str1), []rune(str2)
l1, l2 := len(s1), len(s2)
// two empty strings are identical; shortcut also avoids divByZero issues later on.
if l1 == 0 && l2 == 0 {
return 1
}
if p == nil {
p = defaultParams
}
// a min over 1 can never be satisfied, so the score is 0.
if p.minScore > 1 {
return 0
}
insCost, delCost, maxDist, max := p.insCost, p.delCost, 0, 0
if l1 > l2 {
l1, l2, insCost, delCost = l2, l1, delCost, insCost
}
if p.subCost < delCost+insCost {
maxDist = l1*p.subCost + (l2-l1)*insCost
} else {
maxDist = l1*delCost + l2*insCost
}
// a zero min is always satisfied, so no need to set a max cost.
if p.minScore > 0 {
// if p.minScore is lower than p.bonusThreshold, we can use a simplified formula
// for the max cost, because a sim score below min cannot receive a bonus.
if p.minScore < p.bonusThreshold {
// round down the max - a cost equal to a rounded up max would already be under min.
max = int((1 - p.minScore) * float64(maxDist))
} else {
// p.minScore <= sim + p.bonusPrefix*p.bonusScale*(1-sim)
// p.minScore <= (1-dist/maxDist) + p.bonusPrefix*p.bonusScale*(1-(1-dist/maxDist))
// p.minScore <= 1 - dist/maxDist + p.bonusPrefix*p.bonusScale*dist/maxDist
// 1 - p.minScore >= dist/maxDist - p.bonusPrefix*p.bonusScale*dist/maxDist
// (1-p.minScore)*maxDist/(1-p.bonusPrefix*p.bonusScale) >= dist
max = int((1 - p.minScore) * float64(maxDist) / (1 - float64(p.bonusPrefix)*p.bonusScale))
}
}
dist, pl, _ := Calculate(s1, s2, max, p.insCost, p.subCost, p.delCost)
if max > 0 && dist > max {
return 0
}
sim := 1 - float64(dist)/float64(maxDist)
if sim >= p.bonusThreshold && sim < 1 && p.bonusPrefix > 0 && p.bonusScale > 0 {
if pl > p.bonusPrefix {
pl = p.bonusPrefix
}
sim += float64(pl) * p.bonusScale * (1 - sim)
}
if sim < p.minScore {
return 0
}
return sim
}

@ -0,0 +1,152 @@
// Copyright 2016 ALRUX Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package levenshtein
// Params represents a set of parameter values for the various formulas involved
// in the calculation of the Levenshtein string metrics.
type Params struct {
insCost int
subCost int
delCost int
maxCost int
minScore float64
bonusPrefix int
bonusScale float64
bonusThreshold float64
}
var (
defaultParams = NewParams()
)
// NewParams creates a new set of parameters and initializes it with the default values.
func NewParams() *Params {
return &Params{
insCost: 1,
subCost: 1,
delCost: 1,
maxCost: 0,
minScore: 0,
bonusPrefix: 4,
bonusScale: .1,
bonusThreshold: .7,
}
}
// Clone returns a pointer to a copy of the receiver parameter set, or of a new
// default parameter set if the receiver is nil.
func (p *Params) Clone() *Params {
if p == nil {
return NewParams()
}
return &Params{
insCost: p.insCost,
subCost: p.subCost,
delCost: p.delCost,
maxCost: p.maxCost,
minScore: p.minScore,
bonusPrefix: p.bonusPrefix,
bonusScale: p.bonusScale,
bonusThreshold: p.bonusThreshold,
}
}
// InsCost overrides the default value of 1 for the cost of insertion.
// The new value must be zero or positive.
func (p *Params) InsCost(v int) *Params {
if v >= 0 {
p.insCost = v
}
return p
}
// SubCost overrides the default value of 1 for the cost of substitution.
// The new value must be zero or positive.
func (p *Params) SubCost(v int) *Params {
if v >= 0 {
p.subCost = v
}
return p
}
// DelCost overrides the default value of 1 for the cost of deletion.
// The new value must be zero or positive.
func (p *Params) DelCost(v int) *Params {
if v >= 0 {
p.delCost = v
}
return p
}
// MaxCost overrides the default value of 0 (meaning unlimited) for the maximum cost.
// The calculation of Distance() stops when the result is guaranteed to exceed
// this maximum, returning a lower-bound rather than exact value.
// The new value must be zero or positive.
func (p *Params) MaxCost(v int) *Params {
if v >= 0 {
p.maxCost = v
}
return p
}
// MinScore overrides the default value of 0 for the minimum similarity score.
// Scores below this threshold are returned as 0 by Similarity() and Match().
// The new value must be zero or positive. Note that a minimum greater than 1
// can never be satisfied, resulting in a score of 0 for any pair of strings.
func (p *Params) MinScore(v float64) *Params {
if v >= 0 {
p.minScore = v
}
return p
}
// BonusPrefix overrides the default value for the maximum length of
// common prefix to be considered for bonus by Match().
// The new value must be zero or positive.
func (p *Params) BonusPrefix(v int) *Params {
if v >= 0 {
p.bonusPrefix = v
}
return p
}
// BonusScale overrides the default value for the scaling factor used by Match()
// in calculating the bonus.
// The new value must be zero or positive. To guarantee that the similarity score
// remains in the interval 0..1, this scaling factor is not allowed to exceed
// 1 / BonusPrefix.
func (p *Params) BonusScale(v float64) *Params {
if v >= 0 {
p.bonusScale = v
}
// the bonus cannot exceed (1-sim), or the score may become greater than 1.
if float64(p.bonusPrefix)*p.bonusScale > 1 {
p.bonusScale = 1 / float64(p.bonusPrefix)
}
return p
}
// BonusThreshold overrides the default value for the minimum similarity score
// for which Match() can assign a bonus.
// The new value must be zero or positive. Note that a threshold greater than 1
// effectively makes Match() become the equivalent of Similarity().
func (p *Params) BonusThreshold(v float64) *Params {
if v >= 0 {
p.bonusThreshold = v
}
return p
}

@ -0,0 +1,95 @@
Copyright (c) 2017 Martin Atkins
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
---------
Unicode table generation programs are under a separate copyright and license:
Copyright (c) 2014 Couchbase, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
either express or implied. See the License for the specific language governing permissions
and limitations under the License.
---------
Grapheme break data is provided as part of the Unicode character database,
copright 2016 Unicode, Inc, which is provided with the following license:
Unicode Data Files include all data files under the directories
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
http://www.unicode.org/utility/trac/browser/.
Unicode Data Files do not include PDF online code charts under the
directory http://www.unicode.org/Public/.
Software includes any source code published in the Unicode Standard
or under the directories
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
http://www.unicode.org/utility/trac/browser/.
NOTICE TO USER: Carefully read the following legal agreement.
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT.
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
THE DATA FILES OR SOFTWARE.
COPYRIGHT AND PERMISSION NOTICE
Copyright © 1991-2017 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Unicode data files and any associated documentation
(the "Data Files") or Unicode software and any associated documentation
(the "Software") to deal in the Data Files or Software
without restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, and/or sell copies of
the Data Files or Software, and to permit persons to whom the Data Files
or Software are furnished to do so, provided that either
(a) this copyright and permission notice appear with all copies
of the Data Files or Software, or
(b) this copyright and permission notice appear in associated
Documentation.
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
Except as contained in this notice, the name of a copyright holder
shall not be used in advertising or otherwise to promote the sale,
use or other dealings in these Data Files or Software without prior
written authorization of the copyright holder.

@ -0,0 +1,30 @@
package textseg
import (
"bufio"
"bytes"
)
// AllTokens is a utility that uses a bufio.SplitFunc to produce a slice of
// all of the recognized tokens in the given buffer.
func AllTokens(buf []byte, splitFunc bufio.SplitFunc) ([][]byte, error) {
scanner := bufio.NewScanner(bytes.NewReader(buf))
scanner.Split(splitFunc)
var ret [][]byte
for scanner.Scan() {
ret = append(ret, scanner.Bytes())
}
return ret, scanner.Err()
}
// TokenCount is a utility that uses a bufio.SplitFunc to count the number of
// recognized tokens in the given buffer.
func TokenCount(buf []byte, splitFunc bufio.SplitFunc) (int, error) {
scanner := bufio.NewScanner(bytes.NewReader(buf))
scanner.Split(splitFunc)
var ret int
for scanner.Scan() {
ret++
}
return ret, scanner.Err()
}

@ -0,0 +1,7 @@
package textseg
//go:generate go run make_tables.go -output tables.go
//go:generate go run make_test_tables.go -output tables_test.go
//go:generate ruby unicode2ragel.rb --url=http://www.unicode.org/Public/9.0.0/ucd/auxiliary/GraphemeBreakProperty.txt -m GraphemeCluster -p "Prepend,CR,LF,Control,Extend,Regional_Indicator,SpacingMark,L,V,T,LV,LVT,E_Base,E_Modifier,ZWJ,Glue_After_Zwj,E_Base_GAZ" -o grapheme_clusters_table.rl
//go:generate ragel -Z grapheme_clusters.rl
//go:generate gofmt -w grapheme_clusters.go

File diff suppressed because it is too large Load Diff

@ -0,0 +1,132 @@
package textseg
import (
"errors"
"unicode/utf8"
)
// Generated from grapheme_clusters.rl. DO NOT EDIT
%%{
# (except you are actually in grapheme_clusters.rl here, so edit away!)
machine graphclust;
write data;
}%%
var Error = errors.New("invalid UTF8 text")
// ScanGraphemeClusters is a split function for bufio.Scanner that splits
// on grapheme cluster boundaries.
func ScanGraphemeClusters(data []byte, atEOF bool) (int, []byte, error) {
if len(data) == 0 {
return 0, nil, nil
}
// Ragel state
cs := 0 // Current State
p := 0 // "Pointer" into data
pe := len(data) // End-of-data "pointer"
ts := 0
te := 0
act := 0
eof := pe
// Make Go compiler happy
_ = ts
_ = te
_ = act
_ = eof
startPos := 0
endPos := 0
%%{
include GraphemeCluster "grapheme_clusters_table.rl";
action start {
startPos = p
}
action end {
endPos = p
}
action emit {
return endPos+1, data[startPos:endPos+1], nil
}
ZWJGlue = ZWJ (Glue_After_Zwj | E_Base_GAZ Extend* E_Modifier?)?;
AnyExtender = Extend | ZWJGlue | SpacingMark;
Extension = AnyExtender*;
ReplacementChar = (0xEF 0xBF 0xBD);
CRLFSeq = CR LF;
ControlSeq = Control | ReplacementChar;
HangulSeq = (
L+ (((LV? V+ | LVT) T*)?|LV?) |
LV V* T* |
V+ T* |
LVT T* |
T+
) Extension;
EmojiSeq = (E_Base | E_Base_GAZ) Extend* E_Modifier? Extension;
ZWJSeq = ZWJGlue Extension;
EmojiFlagSeq = Regional_Indicator Regional_Indicator? Extension;
UTF8Cont = 0x80 .. 0xBF;
AnyUTF8 = (
0x00..0x7F |
0xC0..0xDF . UTF8Cont |
0xE0..0xEF . UTF8Cont . UTF8Cont |
0xF0..0xF7 . UTF8Cont . UTF8Cont . UTF8Cont
);
# OtherSeq is any character that isn't at the start of one of the extended sequences above, followed by extension
OtherSeq = (AnyUTF8 - (CR|LF|Control|ReplacementChar|L|LV|V|LVT|T|E_Base|E_Base_GAZ|ZWJ|Regional_Indicator|Prepend)) Extension;
# PrependSeq is prepend followed by any of the other patterns above, except control characters which explicitly break
PrependSeq = Prepend+ (HangulSeq|EmojiSeq|ZWJSeq|EmojiFlagSeq|OtherSeq)?;
CRLFTok = CRLFSeq >start @end;
ControlTok = ControlSeq >start @end;
HangulTok = HangulSeq >start @end;
EmojiTok = EmojiSeq >start @end;
ZWJTok = ZWJSeq >start @end;
EmojiFlagTok = EmojiFlagSeq >start @end;
OtherTok = OtherSeq >start @end;
PrependTok = PrependSeq >start @end;
main := |*
CRLFTok => emit;
ControlTok => emit;
HangulTok => emit;
EmojiTok => emit;
ZWJTok => emit;
EmojiFlagTok => emit;
PrependTok => emit;
OtherTok => emit;
# any single valid UTF-8 character would also be valid per spec,
# but we'll handle that separately after the loop so we can deal
# with requesting more bytes if we're not at EOF.
*|;
write init;
write exec;
}%%
// If we fall out here then we were unable to complete a sequence.
// If we weren't able to complete a sequence then either we've
// reached the end of a partial buffer (so there's more data to come)
// or we have an isolated symbol that would normally be part of a
// grapheme cluster but has appeared in isolation here.
if !atEOF {
// Request more
return 0, nil, nil
}
// Just take the first UTF-8 sequence and return that.
_, seqLen := utf8.DecodeRune(data)
return seqLen, data[:seqLen], nil
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,335 @@
#!/usr/bin/env ruby
#
# This scripted has been updated to accept more command-line arguments:
#
# -u, --url URL to process
# -m, --machine Machine name
# -p, --properties Properties to add to the machine
# -o, --output Write output to file
#
# Updated by: Marty Schoch <marty.schoch@gmail.com>
#
# This script uses the unicode spec to generate a Ragel state machine
# that recognizes unicode alphanumeric characters. It generates 5
# character classes: uupper, ulower, ualpha, udigit, and ualnum.
# Currently supported encodings are UTF-8 [default] and UCS-4.
#
# Usage: unicode2ragel.rb [options]
# -e, --encoding [ucs4 | utf8] Data encoding
# -h, --help Show this message
#
# This script was originally written as part of the Ferret search
# engine library.
#
# Author: Rakan El-Khalil <rakan@well.com>
require 'optparse'
require 'open-uri'
ENCODINGS = [ :utf8, :ucs4 ]
ALPHTYPES = { :utf8 => "byte", :ucs4 => "rune" }
DEFAULT_CHART_URL = "http://www.unicode.org/Public/5.1.0/ucd/DerivedCoreProperties.txt"
DEFAULT_MACHINE_NAME= "WChar"
###
# Display vars & default option
TOTAL_WIDTH = 80
RANGE_WIDTH = 23
@encoding = :utf8
@chart_url = DEFAULT_CHART_URL
machine_name = DEFAULT_MACHINE_NAME
properties = []
@output = $stdout
###
# Option parsing
cli_opts = OptionParser.new do |opts|
opts.on("-e", "--encoding [ucs4 | utf8]", "Data encoding") do |o|
@encoding = o.downcase.to_sym
end
opts.on("-h", "--help", "Show this message") do
puts opts
exit
end
opts.on("-u", "--url URL", "URL to process") do |o|
@chart_url = o
end
opts.on("-m", "--machine MACHINE_NAME", "Machine name") do |o|
machine_name = o
end
opts.on("-p", "--properties x,y,z", Array, "Properties to add to machine") do |o|
properties = o
end
opts.on("-o", "--output FILE", "output file") do |o|
@output = File.new(o, "w+")
end
end
cli_opts.parse(ARGV)
unless ENCODINGS.member? @encoding
puts "Invalid encoding: #{@encoding}"
puts cli_opts
exit
end
##
# Downloads the document at url and yields every alpha line's hex
# range and description.
def each_alpha( url, property )
open( url ) do |file|
file.each_line do |line|
next if line =~ /^#/;
next if line !~ /; #{property} #/;
range, description = line.split(/;/)
range.strip!
description.gsub!(/.*#/, '').strip!
if range =~ /\.\./
start, stop = range.split '..'
else start = stop = range
end
yield start.hex .. stop.hex, description
end
end
end
###
# Formats to hex at minimum width
def to_hex( n )
r = "%0X" % n
r = "0#{r}" unless (r.length % 2).zero?
r
end
###
# UCS4 is just a straight hex conversion of the unicode codepoint.
def to_ucs4( range )
rangestr = "0x" + to_hex(range.begin)
rangestr << "..0x" + to_hex(range.end) if range.begin != range.end
[ rangestr ]
end
##
# 0x00 - 0x7f -> 0zzzzzzz[7]
# 0x80 - 0x7ff -> 110yyyyy[5] 10zzzzzz[6]
# 0x800 - 0xffff -> 1110xxxx[4] 10yyyyyy[6] 10zzzzzz[6]
# 0x010000 - 0x10ffff -> 11110www[3] 10xxxxxx[6] 10yyyyyy[6] 10zzzzzz[6]
UTF8_BOUNDARIES = [0x7f, 0x7ff, 0xffff, 0x10ffff]
def to_utf8_enc( n )
r = 0
if n <= 0x7f
r = n
elsif n <= 0x7ff
y = 0xc0 | (n >> 6)
z = 0x80 | (n & 0x3f)
r = y << 8 | z
elsif n <= 0xffff
x = 0xe0 | (n >> 12)
y = 0x80 | (n >> 6) & 0x3f
z = 0x80 | n & 0x3f
r = x << 16 | y << 8 | z
elsif n <= 0x10ffff
w = 0xf0 | (n >> 18)
x = 0x80 | (n >> 12) & 0x3f
y = 0x80 | (n >> 6) & 0x3f
z = 0x80 | n & 0x3f
r = w << 24 | x << 16 | y << 8 | z
end
to_hex(r)
end
def from_utf8_enc( n )
n = n.hex
r = 0
if n <= 0x7f
r = n
elsif n <= 0xdfff
y = (n >> 8) & 0x1f
z = n & 0x3f
r = y << 6 | z
elsif n <= 0xefffff
x = (n >> 16) & 0x0f
y = (n >> 8) & 0x3f
z = n & 0x3f
r = x << 10 | y << 6 | z
elsif n <= 0xf7ffffff
w = (n >> 24) & 0x07
x = (n >> 16) & 0x3f
y = (n >> 8) & 0x3f
z = n & 0x3f
r = w << 18 | x << 12 | y << 6 | z
end
r
end
###
# Given a range, splits it up into ranges that can be continuously
# encoded into utf8. Eg: 0x00 .. 0xff => [0x00..0x7f, 0x80..0xff]
# This is not strictly needed since the current [5.1] unicode standard
# doesn't have ranges that straddle utf8 boundaries. This is included
# for completeness as there is no telling if that will ever change.
def utf8_ranges( range )
ranges = []
UTF8_BOUNDARIES.each do |max|
if range.begin <= max
if range.end <= max
ranges << range
return ranges
end
ranges << (range.begin .. max)
range = (max + 1) .. range.end
end
end
ranges
end
def build_range( start, stop )
size = start.size/2
left = size - 1
return [""] if size < 1
a = start[0..1]
b = stop[0..1]
###
# Shared prefix
if a == b
return build_range(start[2..-1], stop[2..-1]).map do |elt|
"0x#{a} " + elt
end
end
###
# Unshared prefix, end of run
return ["0x#{a}..0x#{b} "] if left.zero?
###
# Unshared prefix, not end of run
# Range can be 0x123456..0x56789A
# Which is equivalent to:
# 0x123456 .. 0x12FFFF
# 0x130000 .. 0x55FFFF
# 0x560000 .. 0x56789A
ret = []
ret << build_range(start, a + "FF" * left)
###
# Only generate middle range if need be.
if a.hex+1 != b.hex
max = to_hex(b.hex - 1)
max = "FF" if b == "FF"
ret << "0x#{to_hex(a.hex+1)}..0x#{max} " + "0x00..0xFF " * left
end
###
# Don't generate last range if it is covered by first range
ret << build_range(b + "00" * left, stop) unless b == "FF"
ret.flatten!
end
def to_utf8( range )
utf8_ranges( range ).map do |r|
begin_enc = to_utf8_enc(r.begin)
end_enc = to_utf8_enc(r.end)
build_range begin_enc, end_enc
end.flatten!
end
##
# Perform a 3-way comparison of the number of codepoints advertised by
# the unicode spec for the given range, the originally parsed range,
# and the resulting utf8 encoded range.
def count_codepoints( code )
code.split(' ').inject(1) do |acc, elt|
if elt =~ /0x(.+)\.\.0x(.+)/
if @encoding == :utf8
acc * (from_utf8_enc($2) - from_utf8_enc($1) + 1)
else
acc * ($2.hex - $1.hex + 1)
end
else
acc
end
end
end
def is_valid?( range, desc, codes )
spec_count = 1
spec_count = $1.to_i if desc =~ /\[(\d+)\]/
range_count = range.end - range.begin + 1
sum = codes.inject(0) { |acc, elt| acc + count_codepoints(elt) }
sum == spec_count and sum == range_count
end
##
# Generate the state maching to stdout
def generate_machine( name, property )
pipe = " "
@output.puts " #{name} = "
each_alpha( @chart_url, property ) do |range, desc|
codes = (@encoding == :ucs4) ? to_ucs4(range) : to_utf8(range)
#raise "Invalid encoding of range #{range}: #{codes.inspect}" unless
# is_valid? range, desc, codes
range_width = codes.map { |a| a.size }.max
range_width = RANGE_WIDTH if range_width < RANGE_WIDTH
desc_width = TOTAL_WIDTH - RANGE_WIDTH - 11
desc_width -= (range_width - RANGE_WIDTH) if range_width > RANGE_WIDTH
if desc.size > desc_width
desc = desc[0..desc_width - 4] + "..."
end
codes.each_with_index do |r, idx|
desc = "" unless idx.zero?
code = "%-#{range_width}s" % r
@output.puts " #{pipe} #{code} ##{desc}"
pipe = "|"
end
end
@output.puts " ;"
@output.puts ""
end
@output.puts <<EOF
# The following Ragel file was autogenerated with #{$0}
# from: #{@chart_url}
#
# It defines #{properties}.
#
# To use this, make sure that your alphtype is set to #{ALPHTYPES[@encoding]},
# and that your input is in #{@encoding}.
%%{
machine #{machine_name};
EOF
properties.each { |x| generate_machine( x, x ) }
@output.puts <<EOF
}%%
EOF

@ -0,0 +1,19 @@
package textseg
import "unicode/utf8"
// ScanGraphemeClusters is a split function for bufio.Scanner that splits
// on UTF8 sequence boundaries.
//
// This is included largely for completeness, since this behavior is already
// built in to Go when ranging over a string.
func ScanUTF8Sequences(data []byte, atEOF bool) (int, []byte, error) {
if len(data) == 0 {
return 0, nil, nil
}
r, seqLen := utf8.DecodeRune(data)
if r == utf8.RuneError && !atEOF {
return 0, nil, nil
}
return seqLen, data[:seqLen], nil
}

@ -0,0 +1,95 @@
Copyright (c) 2017 Martin Atkins
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
---------
Unicode table generation programs are under a separate copyright and license:
Copyright (c) 2014 Couchbase, Inc.
Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
except in compliance with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software distributed under the
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
either express or implied. See the License for the specific language governing permissions
and limitations under the License.
---------
Grapheme break data is provided as part of the Unicode character database,
copright 2016 Unicode, Inc, which is provided with the following license:
Unicode Data Files include all data files under the directories
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
http://www.unicode.org/utility/trac/browser/.
Unicode Data Files do not include PDF online code charts under the
directory http://www.unicode.org/Public/.
Software includes any source code published in the Unicode Standard
or under the directories
http://www.unicode.org/Public/, http://www.unicode.org/reports/,
http://www.unicode.org/cldr/data/, http://source.icu-project.org/repos/icu/, and
http://www.unicode.org/utility/trac/browser/.
NOTICE TO USER: Carefully read the following legal agreement.
BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
TERMS AND CONDITIONS OF THIS AGREEMENT.
IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
THE DATA FILES OR SOFTWARE.
COPYRIGHT AND PERMISSION NOTICE
Copyright © 1991-2017 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in http://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Unicode data files and any associated documentation
(the "Data Files") or Unicode software and any associated documentation
(the "Software") to deal in the Data Files or Software
without restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, and/or sell copies of
the Data Files or Software, and to permit persons to whom the Data Files
or Software are furnished to do so, provided that either
(a) this copyright and permission notice appear with all copies
of the Data Files or Software, or
(b) this copyright and permission notice appear in associated
Documentation.
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
Except as contained in this notice, the name of a copyright holder
shall not be used in advertising or otherwise to promote the sale,
use or other dealings in these Data Files or Software without prior
written authorization of the copyright holder.

@ -0,0 +1,30 @@
package textseg
import (
"bufio"
"bytes"
)
// AllTokens is a utility that uses a bufio.SplitFunc to produce a slice of
// all of the recognized tokens in the given buffer.
func AllTokens(buf []byte, splitFunc bufio.SplitFunc) ([][]byte, error) {
scanner := bufio.NewScanner(bytes.NewReader(buf))
scanner.Split(splitFunc)
var ret [][]byte
for scanner.Scan() {
ret = append(ret, scanner.Bytes())
}
return ret, scanner.Err()
}
// TokenCount is a utility that uses a bufio.SplitFunc to count the number of
// recognized tokens in the given buffer.
func TokenCount(buf []byte, splitFunc bufio.SplitFunc) (int, error) {
scanner := bufio.NewScanner(bytes.NewReader(buf))
scanner.Split(splitFunc)
var ret int
for scanner.Scan() {
ret++
}
return ret, scanner.Err()
}

@ -0,0 +1,290 @@
# The following Ragel file was autogenerated with unicode2ragel.rb
# from: https://www.unicode.org/Public/emoji/12.0/emoji-data.txt
#
# It defines ["Extended_Pictographic"].
#
# To use this, make sure that your alphtype is set to byte,
# and that your input is in utf8.
%%{
machine Emoji;
Extended_Pictographic =
0xC2 0xA9 #1.1 [1] (©️) copyright
| 0xC2 0xAE #1.1 [1] (®️) registered
| 0xE2 0x80 0xBC #1.1 [1] (‼️) double exclamation mark
| 0xE2 0x81 0x89 #3.0 [1] (⁉️) exclamation question mark
| 0xE2 0x84 0xA2 #1.1 [1] (™️) trade mark
| 0xE2 0x84 0xB9 #3.0 [1] () information
| 0xE2 0x86 0x94..0x99 #1.1 [6] (↔️..↙️) left-right arrow..down...
| 0xE2 0x86 0xA9..0xAA #1.1 [2] (↩️..↪️) right arrow curving le...
| 0xE2 0x8C 0x9A..0x9B #1.1 [2] (⌚..⌛) watch..hourglass done
| 0xE2 0x8C 0xA8 #1.1 [1] (⌨️) keyboard
| 0xE2 0x8E 0x88 #3.0 [1] (⎈) HELM SYMBOL
| 0xE2 0x8F 0x8F #4.0 [1] (⏏️) eject button
| 0xE2 0x8F 0xA9..0xB3 #6.0 [11] (⏩..⏳) fast-forward button..hou...
| 0xE2 0x8F 0xB8..0xBA #7.0 [3] (⏸️..⏺️) pause button..record b...
| 0xE2 0x93 0x82 #1.1 [1] (Ⓜ️) circled M
| 0xE2 0x96 0xAA..0xAB #1.1 [2] (▪️..▫️) black small square..wh...
| 0xE2 0x96 0xB6 #1.1 [1] (▶️) play button
| 0xE2 0x97 0x80 #1.1 [1] (◀️) reverse button
| 0xE2 0x97 0xBB..0xBE #3.2 [4] (◻️..◾) white medium square..bl...
| 0xE2 0x98 0x80..0x85 #1.1 [6] (☀️..★) sun..BLACK STAR
| 0xE2 0x98 0x87..0x92 #1.1 [12] (☇..☒) LIGHTNING..BALLOT BOX WI...
| 0xE2 0x98 0x94..0x95 #4.0 [2] (☔..☕) umbrella with rain drops...
| 0xE2 0x98 0x96..0x97 #3.2 [2] (☖..☗) WHITE SHOGI PIECE..BLACK...
| 0xE2 0x98 0x98 #4.1 [1] (☘️) shamrock
| 0xE2 0x98 0x99 #3.0 [1] (☙) REVERSED ROTATED FLORAL ...
| 0xE2 0x98 0x9A..0xFF #1.1 [86] (☚..♯) BLACK LEFT POINTING INDE...
| 0xE2 0x99 0x00..0xAF #
| 0xE2 0x99 0xB0..0xB1 #3.0 [2] (♰..♱) WEST SYRIAC CROSS..EAST ...
| 0xE2 0x99 0xB2..0xBD #3.2 [12] (♲..♽) UNIVERSAL RECYCLING SYMB...
| 0xE2 0x99 0xBE..0xBF #4.1 [2] (♾️..♿) infinity..wheelchair sy...
| 0xE2 0x9A 0x80..0x85 #3.2 [6] (⚀..⚅) DIE FACE-1..DIE FACE-6
| 0xE2 0x9A 0x90..0x91 #4.0 [2] (⚐..⚑) WHITE FLAG..BLACK FLAG
| 0xE2 0x9A 0x92..0x9C #4.1 [11] (⚒️..⚜️) hammer and pick..fleur...
| 0xE2 0x9A 0x9D #5.1 [1] (⚝) OUTLINED WHITE STAR
| 0xE2 0x9A 0x9E..0x9F #5.2 [2] (⚞..⚟) THREE LINES CONVERGING R...
| 0xE2 0x9A 0xA0..0xA1 #4.0 [2] (⚠️..⚡) warning..high voltage
| 0xE2 0x9A 0xA2..0xB1 #4.1 [16] (⚢..⚱️) DOUBLED FEMALE SIGN..fu...
| 0xE2 0x9A 0xB2 #5.0 [1] (⚲) NEUTER
| 0xE2 0x9A 0xB3..0xBC #5.1 [10] (⚳..⚼) CERES..SESQUIQUADRATE
| 0xE2 0x9A 0xBD..0xBF #5.2 [3] (⚽..⚿) soccer ball..SQUARED KEY
| 0xE2 0x9B 0x80..0x83 #5.1 [4] (⛀..⛃) WHITE DRAUGHTS MAN..BLAC...
| 0xE2 0x9B 0x84..0x8D #5.2 [10] (⛄..⛍) snowman without snow..DI...
| 0xE2 0x9B 0x8E #6.0 [1] (⛎) Ophiuchus
| 0xE2 0x9B 0x8F..0xA1 #5.2 [19] (⛏️..⛡) pick..RESTRICTED LEFT E...
| 0xE2 0x9B 0xA2 #6.0 [1] (⛢) ASTRONOMICAL SYMBOL FOR ...
| 0xE2 0x9B 0xA3 #5.2 [1] (⛣) HEAVY CIRCLE WITH STROKE...
| 0xE2 0x9B 0xA4..0xA7 #6.0 [4] (⛤..⛧) PENTAGRAM..INVERTED PENT...
| 0xE2 0x9B 0xA8..0xBF #5.2 [24] (⛨..⛿) BLACK CROSS ON SHIELD..W...
| 0xE2 0x9C 0x80 #7.0 [1] (✀) BLACK SAFETY SCISSORS
| 0xE2 0x9C 0x81..0x84 #1.1 [4] (✁..✄) UPPER BLADE SCISSORS..WH...
| 0xE2 0x9C 0x85 #6.0 [1] (✅) check mark button
| 0xE2 0x9C 0x88..0x89 #1.1 [2] (✈️..✉️) airplane..envelope
| 0xE2 0x9C 0x8A..0x8B #6.0 [2] (✊..✋) raised fist..raised hand
| 0xE2 0x9C 0x8C..0x92 #1.1 [7] (✌️..✒️) victory hand..black nib
| 0xE2 0x9C 0x94 #1.1 [1] (✔️) check mark
| 0xE2 0x9C 0x96 #1.1 [1] (✖️) multiplication sign
| 0xE2 0x9C 0x9D #1.1 [1] (✝️) latin cross
| 0xE2 0x9C 0xA1 #1.1 [1] (✡️) star of David
| 0xE2 0x9C 0xA8 #6.0 [1] (✨) sparkles
| 0xE2 0x9C 0xB3..0xB4 #1.1 [2] (✳️..✴️) eight-spoked asterisk....
| 0xE2 0x9D 0x84 #1.1 [1] (❄️) snowflake
| 0xE2 0x9D 0x87 #1.1 [1] (❇️) sparkle
| 0xE2 0x9D 0x8C #6.0 [1] (❌) cross mark
| 0xE2 0x9D 0x8E #6.0 [1] (❎) cross mark button
| 0xE2 0x9D 0x93..0x95 #6.0 [3] (❓..❕) question mark..white exc...
| 0xE2 0x9D 0x97 #5.2 [1] (❗) exclamation mark
| 0xE2 0x9D 0xA3..0xA7 #1.1 [5] (❣️..❧) heart exclamation..ROTA...
| 0xE2 0x9E 0x95..0x97 #6.0 [3] (..➗) plus sign..division sign
| 0xE2 0x9E 0xA1 #1.1 [1] (➡️) right arrow
| 0xE2 0x9E 0xB0 #6.0 [1] (➰) curly loop
| 0xE2 0x9E 0xBF #6.0 [1] (➿) double curly loop
| 0xE2 0xA4 0xB4..0xB5 #3.2 [2] (⤴️..⤵️) right arrow curving up...
| 0xE2 0xAC 0x85..0x87 #4.0 [3] (⬅️..⬇️) left arrow..down arrow
| 0xE2 0xAC 0x9B..0x9C #5.1 [2] (⬛..⬜) black large square..whit...
| 0xE2 0xAD 0x90 #5.1 [1] (⭐) star
| 0xE2 0xAD 0x95 #5.2 [1] (⭕) hollow red circle
| 0xE3 0x80 0xB0 #1.1 [1] (〰️) wavy dash
| 0xE3 0x80 0xBD #3.2 [1] (〽️) part alternation mark
| 0xE3 0x8A 0x97 #1.1 [1] (㊗️) Japanese “congratulatio...
| 0xE3 0x8A 0x99 #1.1 [1] (㊙️) Japanese “secret” button
| 0xF0 0x9F 0x80 0x80..0xAB #5.1 [44] (🀀..🀫) MAHJONG TILE EAST WIN...
| 0xF0 0x9F 0x80 0xAC..0xAF #NA [4] (🀬..🀯) <reserved-1F02C>..<res...
| 0xF0 0x9F 0x80 0xB0..0xFF #5.1[100] (🀰..🂓) DOMINO TILE HOR...
| 0xF0 0x9F 0x81..0x81 0x00..0xFF #
| 0xF0 0x9F 0x82 0x00..0x93 #
| 0xF0 0x9F 0x82 0x94..0x9F #NA [12] (🂔..🂟) <reserved-1F094>..<res...
| 0xF0 0x9F 0x82 0xA0..0xAE #6.0 [15] (🂠..🂮) PLAYING CARD BACK..PL...
| 0xF0 0x9F 0x82 0xAF..0xB0 #NA [2] (🂯..🂰) <reserved-1F0AF>..<res...
| 0xF0 0x9F 0x82 0xB1..0xBE #6.0 [14] (🂱..🂾) PLAYING CARD ACE OF H...
| 0xF0 0x9F 0x82 0xBF #7.0 [1] (🂿) PLAYING CARD RED JOKER
| 0xF0 0x9F 0x83 0x80 #NA [1] (🃀) <reserved-1F0C0>
| 0xF0 0x9F 0x83 0x81..0x8F #6.0 [15] (🃁..🃏) PLAYING CARD ACE OF D...
| 0xF0 0x9F 0x83 0x90 #NA [1] (🃐) <reserved-1F0D0>
| 0xF0 0x9F 0x83 0x91..0x9F #6.0 [15] (🃑..🃟) PLAYING CARD ACE OF C...
| 0xF0 0x9F 0x83 0xA0..0xB5 #7.0 [22] (🃠..🃵) PLAYING CARD FOOL..PL...
| 0xF0 0x9F 0x83 0xB6..0xBF #NA [10] (🃶..🃿) <reserved-1F0F6>..<res...
| 0xF0 0x9F 0x84 0x8D..0x8F #NA [3] (🄍..🄏) <reserved-1F10D>..<res...
| 0xF0 0x9F 0x84 0xAF #11.0 [1] (🄯) COPYLEFT SYMBOL
| 0xF0 0x9F 0x85 0xAC #12.0 [1] (🅬) RAISED MR SIGN
| 0xF0 0x9F 0x85 0xAD..0xAF #NA [3] (🅭..🅯) <reserved-1F16D>..<res...
| 0xF0 0x9F 0x85 0xB0..0xB1 #6.0 [2] (🅰️..🅱️) A button (blood typ...
| 0xF0 0x9F 0x85 0xBE #6.0 [1] (🅾️) O button (blood type)
| 0xF0 0x9F 0x85 0xBF #5.2 [1] (🅿️) P button
| 0xF0 0x9F 0x86 0x8E #6.0 [1] (🆎) AB button (blood type)
| 0xF0 0x9F 0x86 0x91..0x9A #6.0 [10] (🆑..🆚) CL button..VS button
| 0xF0 0x9F 0x86 0xAD..0xFF #NA [57] (🆭..🇥) <reserved-1F1AD>..<res...
| 0xF0 0x9F 0x87 0x00..0xA5 #
| 0xF0 0x9F 0x88 0x81..0x82 #6.0 [2] (🈁..🈂️) Japanese “here” butt...
| 0xF0 0x9F 0x88 0x83..0x8F #NA [13] (🈃..🈏) <reserved-1F203>..<res...
| 0xF0 0x9F 0x88 0x9A #5.2 [1] (🈚) Japanese “free of charge...
| 0xF0 0x9F 0x88 0xAF #5.2 [1] (🈯) Japanese “reserved” button
| 0xF0 0x9F 0x88 0xB2..0xBA #6.0 [9] (🈲..🈺) Japanese “prohibited”...
| 0xF0 0x9F 0x88 0xBC..0xBF #NA [4] (🈼..🈿) <reserved-1F23C>..<res...
| 0xF0 0x9F 0x89 0x89..0x8F #NA [7] (🉉..🉏) <reserved-1F249>..<res...
| 0xF0 0x9F 0x89 0x90..0x91 #6.0 [2] (🉐..🉑) Japanese “bargain” bu...
| 0xF0 0x9F 0x89 0x92..0x9F #NA [14] (🉒..🉟) <reserved-1F252>..<res...
| 0xF0 0x9F 0x89 0xA0..0xA5 #10.0 [6] (🉠..🉥) ROUNDED SYMBOL FOR F...
| 0xF0 0x9F 0x89 0xA6..0xFF #NA[154] (🉦..🋿) <reserved-1F266>...
| 0xF0 0x9F 0x8A..0x8A 0x00..0xFF #
| 0xF0 0x9F 0x8B 0x00..0xBF #
| 0xF0 0x9F 0x8C 0x80..0xA0 #6.0 [33] (🌀..🌠) cyclone..shooting star
| 0xF0 0x9F 0x8C 0xA1..0xAC #7.0 [12] (🌡️..🌬️) thermometer..wind face
| 0xF0 0x9F 0x8C 0xAD..0xAF #8.0 [3] (🌭..🌯) hot dog..burrito
| 0xF0 0x9F 0x8C 0xB0..0xB5 #6.0 [6] (🌰..🌵) chestnut..cactus
| 0xF0 0x9F 0x8C 0xB6 #7.0 [1] (🌶️) hot pepper
| 0xF0 0x9F 0x8C 0xB7..0xFF #6.0 [70] (🌷..🍼) tulip..baby bottle
| 0xF0 0x9F 0x8D 0x00..0xBC #
| 0xF0 0x9F 0x8D 0xBD #7.0 [1] (🍽️) fork and knife with plate
| 0xF0 0x9F 0x8D 0xBE..0xBF #8.0 [2] (🍾..🍿) bottle with popping c...
| 0xF0 0x9F 0x8E 0x80..0x93 #6.0 [20] (🎀..🎓) ribbon..graduation cap
| 0xF0 0x9F 0x8E 0x94..0x9F #7.0 [12] (🎔..🎟️) HEART WITH TIP ON TH...
| 0xF0 0x9F 0x8E 0xA0..0xFF #6.0 [37] (🎠..🏄) carousel horse..perso...
| 0xF0 0x9F 0x8F 0x00..0x84 #
| 0xF0 0x9F 0x8F 0x85 #7.0 [1] (🏅) sports medal
| 0xF0 0x9F 0x8F 0x86..0x8A #6.0 [5] (🏆..🏊) trophy..person swimming
| 0xF0 0x9F 0x8F 0x8B..0x8E #7.0 [4] (🏋️..🏎️) person lifting weig...
| 0xF0 0x9F 0x8F 0x8F..0x93 #8.0 [5] (🏏..🏓) cricket game..ping pong
| 0xF0 0x9F 0x8F 0x94..0x9F #7.0 [12] (🏔️..🏟️) snow-capped mountai...
| 0xF0 0x9F 0x8F 0xA0..0xB0 #6.0 [17] (🏠..🏰) house..castle
| 0xF0 0x9F 0x8F 0xB1..0xB7 #7.0 [7] (🏱..🏷️) WHITE PENNANT..label
| 0xF0 0x9F 0x8F 0xB8..0xBA #8.0 [3] (🏸..🏺) badminton..amphora
| 0xF0 0x9F 0x90 0x80..0xBE #6.0 [63] (🐀..🐾) rat..paw prints
| 0xF0 0x9F 0x90 0xBF #7.0 [1] (🐿️) chipmunk
| 0xF0 0x9F 0x91 0x80 #6.0 [1] (👀) eyes
| 0xF0 0x9F 0x91 0x81 #7.0 [1] (👁️) eye
| 0xF0 0x9F 0x91 0x82..0xFF #6.0[182] (👂..📷) ear..camera
| 0xF0 0x9F 0x92..0x92 0x00..0xFF #
| 0xF0 0x9F 0x93 0x00..0xB7 #
| 0xF0 0x9F 0x93 0xB8 #7.0 [1] (📸) camera with flash
| 0xF0 0x9F 0x93 0xB9..0xBC #6.0 [4] (📹..📼) video camera..videoca...
| 0xF0 0x9F 0x93 0xBD..0xBE #7.0 [2] (📽️..📾) film projector..PORT...
| 0xF0 0x9F 0x93 0xBF #8.0 [1] (📿) prayer beads
| 0xF0 0x9F 0x94 0x80..0xBD #6.0 [62] (🔀..🔽) shuffle tracks button...
| 0xF0 0x9F 0x95 0x86..0x8A #7.0 [5] (🕆..🕊️) WHITE LATIN CROSS..dove
| 0xF0 0x9F 0x95 0x8B..0x8F #8.0 [5] (🕋..🕏) kaaba..BOWL OF HYGIEIA
| 0xF0 0x9F 0x95 0x90..0xA7 #6.0 [24] (🕐..🕧) one oclock..twelve-t...
| 0xF0 0x9F 0x95 0xA8..0xB9 #7.0 [18] (🕨..🕹️) RIGHT SPEAKER..joystick
| 0xF0 0x9F 0x95 0xBA #9.0 [1] (🕺) man dancing
| 0xF0 0x9F 0x95 0xBB..0xFF #7.0 [41] (🕻..🖣) LEFT HAND TELEPHONE R...
| 0xF0 0x9F 0x96 0x00..0xA3 #
| 0xF0 0x9F 0x96 0xA4 #9.0 [1] (🖤) black heart
| 0xF0 0x9F 0x96 0xA5..0xFF #7.0 [86] (🖥️..🗺️) desktop computer..w...
| 0xF0 0x9F 0x97 0x00..0xBA #
| 0xF0 0x9F 0x97 0xBB..0xBF #6.0 [5] (🗻..🗿) mount fuji..moai
| 0xF0 0x9F 0x98 0x80 #6.1 [1] (😀) grinning face
| 0xF0 0x9F 0x98 0x81..0x90 #6.0 [16] (😁..😐) beaming face with smi...
| 0xF0 0x9F 0x98 0x91 #6.1 [1] (😑) expressionless face
| 0xF0 0x9F 0x98 0x92..0x94 #6.0 [3] (😒..😔) unamused face..pensiv...
| 0xF0 0x9F 0x98 0x95 #6.1 [1] (😕) confused face
| 0xF0 0x9F 0x98 0x96 #6.0 [1] (😖) confounded face
| 0xF0 0x9F 0x98 0x97 #6.1 [1] (😗) kissing face
| 0xF0 0x9F 0x98 0x98 #6.0 [1] (😘) face blowing a kiss
| 0xF0 0x9F 0x98 0x99 #6.1 [1] (😙) kissing face with smilin...
| 0xF0 0x9F 0x98 0x9A #6.0 [1] (😚) kissing face with closed...
| 0xF0 0x9F 0x98 0x9B #6.1 [1] (😛) face with tongue
| 0xF0 0x9F 0x98 0x9C..0x9E #6.0 [3] (😜..😞) winking face with ton...
| 0xF0 0x9F 0x98 0x9F #6.1 [1] (😟) worried face
| 0xF0 0x9F 0x98 0xA0..0xA5 #6.0 [6] (😠..😥) angry face..sad but r...
| 0xF0 0x9F 0x98 0xA6..0xA7 #6.1 [2] (😦..😧) frowning face with op...
| 0xF0 0x9F 0x98 0xA8..0xAB #6.0 [4] (😨..😫) fearful face..tired face
| 0xF0 0x9F 0x98 0xAC #6.1 [1] (😬) grimacing face
| 0xF0 0x9F 0x98 0xAD #6.0 [1] (😭) loudly crying face
| 0xF0 0x9F 0x98 0xAE..0xAF #6.1 [2] (😮..😯) face with open mouth....
| 0xF0 0x9F 0x98 0xB0..0xB3 #6.0 [4] (😰..😳) anxious face with swe...
| 0xF0 0x9F 0x98 0xB4 #6.1 [1] (😴) sleeping face
| 0xF0 0x9F 0x98 0xB5..0xFF #6.0 [12] (😵..🙀) dizzy face..weary cat
| 0xF0 0x9F 0x99 0x00..0x80 #
| 0xF0 0x9F 0x99 0x81..0x82 #7.0 [2] (🙁..🙂) slightly frowning fac...
| 0xF0 0x9F 0x99 0x83..0x84 #8.0 [2] (🙃..🙄) upside-down face..fac...
| 0xF0 0x9F 0x99 0x85..0x8F #6.0 [11] (🙅..🙏) person gesturing NO.....
| 0xF0 0x9F 0x9A 0x80..0xFF #6.0 [70] (🚀..🛅) rocket..left luggage
| 0xF0 0x9F 0x9B 0x00..0x85 #
| 0xF0 0x9F 0x9B 0x86..0x8F #7.0 [10] (🛆..🛏️) TRIANGLE WITH ROUNDE...
| 0xF0 0x9F 0x9B 0x90 #8.0 [1] (🛐) place of worship
| 0xF0 0x9F 0x9B 0x91..0x92 #9.0 [2] (🛑..🛒) stop sign..shopping cart
| 0xF0 0x9F 0x9B 0x93..0x94 #10.0 [2] (🛓..🛔) STUPA..PAGODA
| 0xF0 0x9F 0x9B 0x95 #12.0 [1] (🛕) hindu temple
| 0xF0 0x9F 0x9B 0x96..0x9F #NA [10] (🛖..🛟) <reserved-1F6D6>..<res...
| 0xF0 0x9F 0x9B 0xA0..0xAC #7.0 [13] (🛠️..🛬) hammer and wrench..a...
| 0xF0 0x9F 0x9B 0xAD..0xAF #NA [3] (🛭..🛯) <reserved-1F6ED>..<res...
| 0xF0 0x9F 0x9B 0xB0..0xB3 #7.0 [4] (🛰️..🛳️) satellite..passenge...
| 0xF0 0x9F 0x9B 0xB4..0xB6 #9.0 [3] (🛴..🛶) kick scooter..canoe
| 0xF0 0x9F 0x9B 0xB7..0xB8 #10.0 [2] (🛷..🛸) sled..flying saucer
| 0xF0 0x9F 0x9B 0xB9 #11.0 [1] (🛹) skateboard
| 0xF0 0x9F 0x9B 0xBA #12.0 [1] (🛺) auto rickshaw
| 0xF0 0x9F 0x9B 0xBB..0xBF #NA [5] (🛻..🛿) <reserved-1F6FB>..<res...
| 0xF0 0x9F 0x9D 0xB4..0xBF #NA [12] (🝴..🝿) <reserved-1F774>..<res...
| 0xF0 0x9F 0x9F 0x95..0x98 #11.0 [4] (🟕..🟘) CIRCLED TRIANGLE..NE...
| 0xF0 0x9F 0x9F 0x99..0x9F #NA [7] (🟙..🟟) <reserved-1F7D9>..<res...
| 0xF0 0x9F 0x9F 0xA0..0xAB #12.0 [12] (🟠..🟫) orange circle..brown...
| 0xF0 0x9F 0x9F 0xAC..0xBF #NA [20] (🟬..🟿) <reserved-1F7EC>..<res...
| 0xF0 0x9F 0xA0 0x8C..0x8F #NA [4] (🠌..🠏) <reserved-1F80C>..<res...
| 0xF0 0x9F 0xA1 0x88..0x8F #NA [8] (🡈..🡏) <reserved-1F848>..<res...
| 0xF0 0x9F 0xA1 0x9A..0x9F #NA [6] (🡚..🡟) <reserved-1F85A>..<res...
| 0xF0 0x9F 0xA2 0x88..0x8F #NA [8] (🢈..🢏) <reserved-1F888>..<res...
| 0xF0 0x9F 0xA2 0xAE..0xFF #NA [82] (🢮..🣿) <reserved-1F8AE>..<res...
| 0xF0 0x9F 0xA3 0x00..0xBF #
| 0xF0 0x9F 0xA4 0x8C #NA [1] (🤌) <reserved-1F90C>
| 0xF0 0x9F 0xA4 0x8D..0x8F #12.0 [3] (🤍..🤏) white heart..pinchin...
| 0xF0 0x9F 0xA4 0x90..0x98 #8.0 [9] (🤐..🤘) zipper-mouth face..si...
| 0xF0 0x9F 0xA4 0x99..0x9E #9.0 [6] (🤙..🤞) call me hand..crossed...
| 0xF0 0x9F 0xA4 0x9F #10.0 [1] (🤟) love-you gesture
| 0xF0 0x9F 0xA4 0xA0..0xA7 #9.0 [8] (🤠..🤧) cowboy hat face..snee...
| 0xF0 0x9F 0xA4 0xA8..0xAF #10.0 [8] (🤨..🤯) face with raised eye...
| 0xF0 0x9F 0xA4 0xB0 #9.0 [1] (🤰) pregnant woman
| 0xF0 0x9F 0xA4 0xB1..0xB2 #10.0 [2] (🤱..🤲) breast-feeding..palm...
| 0xF0 0x9F 0xA4 0xB3..0xBA #9.0 [8] (🤳..🤺) selfie..person fencing
| 0xF0 0x9F 0xA4 0xBC..0xBE #9.0 [3] (🤼..🤾) people wrestling..per...
| 0xF0 0x9F 0xA4 0xBF #12.0 [1] (🤿) diving mask
| 0xF0 0x9F 0xA5 0x80..0x85 #9.0 [6] (🥀..🥅) wilted flower..goal net
| 0xF0 0x9F 0xA5 0x87..0x8B #9.0 [5] (🥇..🥋) 1st place medal..mart...
| 0xF0 0x9F 0xA5 0x8C #10.0 [1] (🥌) curling stone
| 0xF0 0x9F 0xA5 0x8D..0x8F #11.0 [3] (🥍..🥏) lacrosse..flying disc
| 0xF0 0x9F 0xA5 0x90..0x9E #9.0 [15] (🥐..🥞) croissant..pancakes
| 0xF0 0x9F 0xA5 0x9F..0xAB #10.0 [13] (🥟..🥫) dumpling..canned food
| 0xF0 0x9F 0xA5 0xAC..0xB0 #11.0 [5] (🥬..🥰) leafy green..smiling...
| 0xF0 0x9F 0xA5 0xB1 #12.0 [1] (🥱) yawning face
| 0xF0 0x9F 0xA5 0xB2 #NA [1] (🥲) <reserved-1F972>
| 0xF0 0x9F 0xA5 0xB3..0xB6 #11.0 [4] (🥳..🥶) partying face..cold ...
| 0xF0 0x9F 0xA5 0xB7..0xB9 #NA [3] (🥷..🥹) <reserved-1F977>..<res...
| 0xF0 0x9F 0xA5 0xBA #11.0 [1] (🥺) pleading face
| 0xF0 0x9F 0xA5 0xBB #12.0 [1] (🥻) sari
| 0xF0 0x9F 0xA5 0xBC..0xBF #11.0 [4] (🥼..🥿) lab coat..flat shoe
| 0xF0 0x9F 0xA6 0x80..0x84 #8.0 [5] (🦀..🦄) crab..unicorn
| 0xF0 0x9F 0xA6 0x85..0x91 #9.0 [13] (🦅..🦑) eagle..squid
| 0xF0 0x9F 0xA6 0x92..0x97 #10.0 [6] (🦒..🦗) giraffe..cricket
| 0xF0 0x9F 0xA6 0x98..0xA2 #11.0 [11] (🦘..🦢) kangaroo..swan
| 0xF0 0x9F 0xA6 0xA3..0xA4 #NA [2] (🦣..🦤) <reserved-1F9A3>..<res...
| 0xF0 0x9F 0xA6 0xA5..0xAA #12.0 [6] (🦥..🦪) sloth..oyster
| 0xF0 0x9F 0xA6 0xAB..0xAD #NA [3] (🦫..🦭) <reserved-1F9AB>..<res...
| 0xF0 0x9F 0xA6 0xAE..0xAF #12.0 [2] (🦮..🦯) guide dog..probing cane
| 0xF0 0x9F 0xA6 0xB0..0xB9 #11.0 [10] (🦰..🦹) red hair..supervillain
| 0xF0 0x9F 0xA6 0xBA..0xBF #12.0 [6] (🦺..🦿) safety vest..mechani...
| 0xF0 0x9F 0xA7 0x80 #8.0 [1] (🧀) cheese wedge
| 0xF0 0x9F 0xA7 0x81..0x82 #11.0 [2] (🧁..🧂) cupcake..salt
| 0xF0 0x9F 0xA7 0x83..0x8A #12.0 [8] (🧃..🧊) beverage box..ice cube
| 0xF0 0x9F 0xA7 0x8B..0x8C #NA [2] (🧋..🧌) <reserved-1F9CB>..<res...
| 0xF0 0x9F 0xA7 0x8D..0x8F #12.0 [3] (🧍..🧏) person standing..dea...
| 0xF0 0x9F 0xA7 0x90..0xA6 #10.0 [23] (🧐..🧦) face with monocle..s...
| 0xF0 0x9F 0xA7 0xA7..0xBF #11.0 [25] (🧧..🧿) red envelope..nazar ...
| 0xF0 0x9F 0xA8 0x80..0xFF #12.0 [84] (🨀..🩓) NEUTRAL CHESS KING.....
| 0xF0 0x9F 0xA9 0x00..0x93 #
| 0xF0 0x9F 0xA9 0x94..0x9F #NA [12] (🩔..🩟) <reserved-1FA54>..<res...
| 0xF0 0x9F 0xA9 0xA0..0xAD #11.0 [14] (🩠..🩭) XIANGQI RED GENERAL....
| 0xF0 0x9F 0xA9 0xAE..0xAF #NA [2] (🩮..🩯) <reserved-1FA6E>..<res...
| 0xF0 0x9F 0xA9 0xB0..0xB3 #12.0 [4] (🩰..🩳) ballet shoes..shorts
| 0xF0 0x9F 0xA9 0xB4..0xB7 #NA [4] (🩴..🩷) <reserved-1FA74>..<res...
| 0xF0 0x9F 0xA9 0xB8..0xBA #12.0 [3] (🩸..🩺) drop of blood..steth...
| 0xF0 0x9F 0xA9 0xBB..0xBF #NA [5] (🩻..🩿) <reserved-1FA7B>..<res...
| 0xF0 0x9F 0xAA 0x80..0x82 #12.0 [3] (🪀..🪂) yo-yo..parachute
| 0xF0 0x9F 0xAA 0x83..0x8F #NA [13] (🪃..🪏) <reserved-1FA83>..<res...
| 0xF0 0x9F 0xAA 0x90..0x95 #12.0 [6] (🪐..🪕) ringed planet..banjo
| 0xF0 0x9F 0xAA 0x96..0xFF #NA[1384] (🪖..🿽) <reserved-1FA96>...
| 0xF0 0x9F 0xAB..0xBE 0x00..0xFF #
| 0xF0 0x9F 0xBF 0x00..0xBD #
;
}%%

@ -0,0 +1,8 @@
package textseg
//go:generate go run make_tables.go -output tables.go
//go:generate go run make_test_tables.go -output tables_test.go
//go:generate ruby unicode2ragel.rb --url=https://www.unicode.org/Public/12.0.0/ucd/auxiliary/GraphemeBreakProperty.txt -m GraphemeCluster -p "Prepend,CR,LF,Control,Extend,Regional_Indicator,SpacingMark,L,V,T,LV,LVT,ZWJ" -o grapheme_clusters_table.rl
//go:generate ruby unicode2ragel.rb --url=https://www.unicode.org/Public/emoji/12.0/emoji-data.txt -m Emoji -p "Extended_Pictographic" -o emoji_table.rl
//go:generate ragel -Z grapheme_clusters.rl
//go:generate gofmt -w grapheme_clusters.go

File diff suppressed because it is too large Load Diff

@ -0,0 +1,133 @@
package textseg
import (
"errors"
"unicode/utf8"
)
// Generated from grapheme_clusters.rl. DO NOT EDIT
%%{
# (except you are actually in grapheme_clusters.rl here, so edit away!)
machine graphclust;
write data;
}%%
var Error = errors.New("invalid UTF8 text")
// ScanGraphemeClusters is a split function for bufio.Scanner that splits
// on grapheme cluster boundaries.
func ScanGraphemeClusters(data []byte, atEOF bool) (int, []byte, error) {
if len(data) == 0 {
return 0, nil, nil
}
// Ragel state
cs := 0 // Current State
p := 0 // "Pointer" into data
pe := len(data) // End-of-data "pointer"
ts := 0
te := 0
act := 0
eof := pe
// Make Go compiler happy
_ = ts
_ = te
_ = act
_ = eof
startPos := 0
endPos := 0
%%{
include GraphemeCluster "grapheme_clusters_table.rl";
include Emoji "emoji_table.rl";
action start {
startPos = p
}
action end {
endPos = p
}
action emit {
return endPos+1, data[startPos:endPos+1], nil
}
ZWJGlue = ZWJ (Extended_Pictographic Extend*)?;
AnyExtender = Extend | ZWJGlue | SpacingMark;
Extension = AnyExtender*;
ReplacementChar = (0xEF 0xBF 0xBD);
CRLFSeq = CR LF;
ControlSeq = Control | ReplacementChar;
HangulSeq = (
L+ (((LV? V+ | LVT) T*)?|LV?) |
LV V* T* |
V+ T* |
LVT T* |
T+
) Extension;
EmojiSeq = Extended_Pictographic Extend* Extension;
ZWJSeq = ZWJ (ZWJ | Extend | SpacingMark)*;
EmojiFlagSeq = Regional_Indicator Regional_Indicator? Extension;
UTF8Cont = 0x80 .. 0xBF;
AnyUTF8 = (
0x00..0x7F |
0xC0..0xDF . UTF8Cont |
0xE0..0xEF . UTF8Cont . UTF8Cont |
0xF0..0xF7 . UTF8Cont . UTF8Cont . UTF8Cont
);
# OtherSeq is any character that isn't at the start of one of the extended sequences above, followed by extension
OtherSeq = (AnyUTF8 - (CR|LF|Control|ReplacementChar|L|LV|V|LVT|T|Extended_Pictographic|ZWJ|Regional_Indicator|Prepend)) (Extend | ZWJ | SpacingMark)*;
# PrependSeq is prepend followed by any of the other patterns above, except control characters which explicitly break
PrependSeq = Prepend+ (HangulSeq|EmojiSeq|ZWJSeq|EmojiFlagSeq|OtherSeq)?;
CRLFTok = CRLFSeq >start @end;
ControlTok = ControlSeq >start @end;
HangulTok = HangulSeq >start @end;
EmojiTok = EmojiSeq >start @end;
ZWJTok = ZWJSeq >start @end;
EmojiFlagTok = EmojiFlagSeq >start @end;
OtherTok = OtherSeq >start @end;
PrependTok = PrependSeq >start @end;
main := |*
CRLFTok => emit;
ControlTok => emit;
HangulTok => emit;
EmojiTok => emit;
ZWJTok => emit;
EmojiFlagTok => emit;
PrependTok => emit;
OtherTok => emit;
# any single valid UTF-8 character would also be valid per spec,
# but we'll handle that separately after the loop so we can deal
# with requesting more bytes if we're not at EOF.
*|;
write init;
write exec;
}%%
// If we fall out here then we were unable to complete a sequence.
// If we weren't able to complete a sequence then either we've
// reached the end of a partial buffer (so there's more data to come)
// or we have an isolated symbol that would normally be part of a
// grapheme cluster but has appeared in isolation here.
if !atEOF {
// Request more
return 0, nil, nil
}
// Just take the first UTF-8 sequence and return that.
_, seqLen := utf8.DecodeRune(data)
return seqLen, data[:seqLen], nil
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -0,0 +1,335 @@
#!/usr/bin/env ruby
#
# This scripted has been updated to accept more command-line arguments:
#
# -u, --url URL to process
# -m, --machine Machine name
# -p, --properties Properties to add to the machine
# -o, --output Write output to file
#
# Updated by: Marty Schoch <marty.schoch@gmail.com>
#
# This script uses the unicode spec to generate a Ragel state machine
# that recognizes unicode alphanumeric characters. It generates 5
# character classes: uupper, ulower, ualpha, udigit, and ualnum.
# Currently supported encodings are UTF-8 [default] and UCS-4.
#
# Usage: unicode2ragel.rb [options]
# -e, --encoding [ucs4 | utf8] Data encoding
# -h, --help Show this message
#
# This script was originally written as part of the Ferret search
# engine library.
#
# Author: Rakan El-Khalil <rakan@well.com>
require 'optparse'
require 'open-uri'
ENCODINGS = [ :utf8, :ucs4 ]
ALPHTYPES = { :utf8 => "byte", :ucs4 => "rune" }
DEFAULT_CHART_URL = "http://www.unicode.org/Public/5.1.0/ucd/DerivedCoreProperties.txt"
DEFAULT_MACHINE_NAME= "WChar"
###
# Display vars & default option
TOTAL_WIDTH = 80
RANGE_WIDTH = 23
@encoding = :utf8
@chart_url = DEFAULT_CHART_URL
machine_name = DEFAULT_MACHINE_NAME
properties = []
@output = $stdout
###
# Option parsing
cli_opts = OptionParser.new do |opts|
opts.on("-e", "--encoding [ucs4 | utf8]", "Data encoding") do |o|
@encoding = o.downcase.to_sym
end
opts.on("-h", "--help", "Show this message") do
puts opts
exit
end
opts.on("-u", "--url URL", "URL to process") do |o|
@chart_url = o
end
opts.on("-m", "--machine MACHINE_NAME", "Machine name") do |o|
machine_name = o
end
opts.on("-p", "--properties x,y,z", Array, "Properties to add to machine") do |o|
properties = o
end
opts.on("-o", "--output FILE", "output file") do |o|
@output = File.new(o, "w+")
end
end
cli_opts.parse(ARGV)
unless ENCODINGS.member? @encoding
puts "Invalid encoding: #{@encoding}"
puts cli_opts
exit
end
##
# Downloads the document at url and yields every alpha line's hex
# range and description.
def each_alpha( url, property )
open( url ) do |file|
file.each_line do |line|
next if line =~ /^#/;
next if line !~ /; #{property} *#/;
range, description = line.split(/;/)
range.strip!
description.gsub!(/.*#/, '').strip!
if range =~ /\.\./
start, stop = range.split '..'
else start = stop = range
end
yield start.hex .. stop.hex, description
end
end
end
###
# Formats to hex at minimum width
def to_hex( n )
r = "%0X" % n
r = "0#{r}" unless (r.length % 2).zero?
r
end
###
# UCS4 is just a straight hex conversion of the unicode codepoint.
def to_ucs4( range )
rangestr = "0x" + to_hex(range.begin)
rangestr << "..0x" + to_hex(range.end) if range.begin != range.end
[ rangestr ]
end
##
# 0x00 - 0x7f -> 0zzzzzzz[7]
# 0x80 - 0x7ff -> 110yyyyy[5] 10zzzzzz[6]
# 0x800 - 0xffff -> 1110xxxx[4] 10yyyyyy[6] 10zzzzzz[6]
# 0x010000 - 0x10ffff -> 11110www[3] 10xxxxxx[6] 10yyyyyy[6] 10zzzzzz[6]
UTF8_BOUNDARIES = [0x7f, 0x7ff, 0xffff, 0x10ffff]
def to_utf8_enc( n )
r = 0
if n <= 0x7f
r = n
elsif n <= 0x7ff
y = 0xc0 | (n >> 6)
z = 0x80 | (n & 0x3f)
r = y << 8 | z
elsif n <= 0xffff
x = 0xe0 | (n >> 12)
y = 0x80 | (n >> 6) & 0x3f
z = 0x80 | n & 0x3f
r = x << 16 | y << 8 | z
elsif n <= 0x10ffff
w = 0xf0 | (n >> 18)
x = 0x80 | (n >> 12) & 0x3f
y = 0x80 | (n >> 6) & 0x3f
z = 0x80 | n & 0x3f
r = w << 24 | x << 16 | y << 8 | z
end
to_hex(r)
end
def from_utf8_enc( n )
n = n.hex
r = 0
if n <= 0x7f
r = n
elsif n <= 0xdfff
y = (n >> 8) & 0x1f
z = n & 0x3f
r = y << 6 | z
elsif n <= 0xefffff
x = (n >> 16) & 0x0f
y = (n >> 8) & 0x3f
z = n & 0x3f
r = x << 10 | y << 6 | z
elsif n <= 0xf7ffffff
w = (n >> 24) & 0x07
x = (n >> 16) & 0x3f
y = (n >> 8) & 0x3f
z = n & 0x3f
r = w << 18 | x << 12 | y << 6 | z
end
r
end
###
# Given a range, splits it up into ranges that can be continuously
# encoded into utf8. Eg: 0x00 .. 0xff => [0x00..0x7f, 0x80..0xff]
# This is not strictly needed since the current [5.1] unicode standard
# doesn't have ranges that straddle utf8 boundaries. This is included
# for completeness as there is no telling if that will ever change.
def utf8_ranges( range )
ranges = []
UTF8_BOUNDARIES.each do |max|
if range.begin <= max
if range.end <= max
ranges << range
return ranges
end
ranges << (range.begin .. max)
range = (max + 1) .. range.end
end
end
ranges
end
def build_range( start, stop )
size = start.size/2
left = size - 1
return [""] if size < 1
a = start[0..1]
b = stop[0..1]
###
# Shared prefix
if a == b
return build_range(start[2..-1], stop[2..-1]).map do |elt|
"0x#{a} " + elt
end
end
###
# Unshared prefix, end of run
return ["0x#{a}..0x#{b} "] if left.zero?
###
# Unshared prefix, not end of run
# Range can be 0x123456..0x56789A
# Which is equivalent to:
# 0x123456 .. 0x12FFFF
# 0x130000 .. 0x55FFFF
# 0x560000 .. 0x56789A
ret = []
ret << build_range(start, a + "FF" * left)
###
# Only generate middle range if need be.
if a.hex+1 != b.hex
max = to_hex(b.hex - 1)
max = "FF" if b == "FF"
ret << "0x#{to_hex(a.hex+1)}..0x#{max} " + "0x00..0xFF " * left
end
###
# Don't generate last range if it is covered by first range
ret << build_range(b + "00" * left, stop) unless b == "FF"
ret.flatten!
end
def to_utf8( range )
utf8_ranges( range ).map do |r|
begin_enc = to_utf8_enc(r.begin)
end_enc = to_utf8_enc(r.end)
build_range begin_enc, end_enc
end.flatten!
end
##
# Perform a 3-way comparison of the number of codepoints advertised by
# the unicode spec for the given range, the originally parsed range,
# and the resulting utf8 encoded range.
def count_codepoints( code )
code.split(' ').inject(1) do |acc, elt|
if elt =~ /0x(.+)\.\.0x(.+)/
if @encoding == :utf8
acc * (from_utf8_enc($2) - from_utf8_enc($1) + 1)
else
acc * ($2.hex - $1.hex + 1)
end
else
acc
end
end
end
def is_valid?( range, desc, codes )
spec_count = 1
spec_count = $1.to_i if desc =~ /\[(\d+)\]/
range_count = range.end - range.begin + 1
sum = codes.inject(0) { |acc, elt| acc + count_codepoints(elt) }
sum == spec_count and sum == range_count
end
##
# Generate the state maching to stdout
def generate_machine( name, property )
pipe = " "
@output.puts " #{name} = "
each_alpha( @chart_url, property ) do |range, desc|
codes = (@encoding == :ucs4) ? to_ucs4(range) : to_utf8(range)
#raise "Invalid encoding of range #{range}: #{codes.inspect}" unless
# is_valid? range, desc, codes
range_width = codes.map { |a| a.size }.max
range_width = RANGE_WIDTH if range_width < RANGE_WIDTH
desc_width = TOTAL_WIDTH - RANGE_WIDTH - 11
desc_width -= (range_width - RANGE_WIDTH) if range_width > RANGE_WIDTH
if desc.size > desc_width
desc = desc[0..desc_width - 4] + "..."
end
codes.each_with_index do |r, idx|
desc = "" unless idx.zero?
code = "%-#{range_width}s" % r
@output.puts " #{pipe} #{code} ##{desc}"
pipe = "|"
end
end
@output.puts " ;"
@output.puts ""
end
@output.puts <<EOF
# The following Ragel file was autogenerated with #{$0}
# from: #{@chart_url}
#
# It defines #{properties}.
#
# To use this, make sure that your alphtype is set to #{ALPHTYPES[@encoding]},
# and that your input is in #{@encoding}.
%%{
machine #{machine_name};
EOF
properties.each { |x| generate_machine( x, x ) }
@output.puts <<EOF
}%%
EOF

@ -0,0 +1,19 @@
package textseg
import "unicode/utf8"
// ScanGraphemeClusters is a split function for bufio.Scanner that splits
// on UTF8 sequence boundaries.
//
// This is included largely for completeness, since this behavior is already
// built in to Go when ranging over a string.
func ScanUTF8Sequences(data []byte, atEOF bool) (int, []byte, error) {
if len(data) == 0 {
return 0, nil, nil
}
r, seqLen := utf8.DecodeRune(data)
if r == utf8.RuneError && !atEOF {
return 0, nil, nil
}
return seqLen, data[:seqLen], nil
}

@ -0,0 +1,27 @@
Copyright (c) 2017 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

@ -0,0 +1,616 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// Package cmp determines equality of values.
//
// This package is intended to be a more powerful and safer alternative to
// reflect.DeepEqual for comparing whether two values are semantically equal.
//
// The primary features of cmp are:
//
// • When the default behavior of equality does not suit the needs of the test,
// custom equality functions can override the equality operation.
// For example, an equality function may report floats as equal so long as they
// are within some tolerance of each other.
//
// • Types that have an Equal method may use that method to determine equality.
// This allows package authors to determine the equality operation for the types
// that they define.
//
// • If no custom equality functions are used and no Equal method is defined,
// equality is determined by recursively comparing the primitive kinds on both
// values, much like reflect.DeepEqual. Unlike reflect.DeepEqual, unexported
// fields are not compared by default; they result in panics unless suppressed
// by using an Ignore option (see cmpopts.IgnoreUnexported) or explicitly compared
// using the AllowUnexported option.
package cmp
import (
"fmt"
"reflect"
"strings"
"github.com/google/go-cmp/cmp/internal/diff"
"github.com/google/go-cmp/cmp/internal/flags"
"github.com/google/go-cmp/cmp/internal/function"
"github.com/google/go-cmp/cmp/internal/value"
)
// Equal reports whether x and y are equal by recursively applying the
// following rules in the given order to x and y and all of their sub-values:
//
// • Let S be the set of all Ignore, Transformer, and Comparer options that
// remain after applying all path filters, value filters, and type filters.
// If at least one Ignore exists in S, then the comparison is ignored.
// If the number of Transformer and Comparer options in S is greater than one,
// then Equal panics because it is ambiguous which option to use.
// If S contains a single Transformer, then use that to transform the current
// values and recursively call Equal on the output values.
// If S contains a single Comparer, then use that to compare the current values.
// Otherwise, evaluation proceeds to the next rule.
//
// • If the values have an Equal method of the form "(T) Equal(T) bool" or
// "(T) Equal(I) bool" where T is assignable to I, then use the result of
// x.Equal(y) even if x or y is nil. Otherwise, no such method exists and
// evaluation proceeds to the next rule.
//
// • Lastly, try to compare x and y based on their basic kinds.
// Simple kinds like booleans, integers, floats, complex numbers, strings, and
// channels are compared using the equivalent of the == operator in Go.
// Functions are only equal if they are both nil, otherwise they are unequal.
//
// Structs are equal if recursively calling Equal on all fields report equal.
// If a struct contains unexported fields, Equal panics unless an Ignore option
// (e.g., cmpopts.IgnoreUnexported) ignores that field or the AllowUnexported
// option explicitly permits comparing the unexported field.
//
// Slices are equal if they are both nil or both non-nil, where recursively
// calling Equal on all non-ignored slice or array elements report equal.
// Empty non-nil slices and nil slices are not equal; to equate empty slices,
// consider using cmpopts.EquateEmpty.
//
// Maps are equal if they are both nil or both non-nil, where recursively
// calling Equal on all non-ignored map entries report equal.
// Map keys are equal according to the == operator.
// To use custom comparisons for map keys, consider using cmpopts.SortMaps.
// Empty non-nil maps and nil maps are not equal; to equate empty maps,
// consider using cmpopts.EquateEmpty.
//
// Pointers and interfaces are equal if they are both nil or both non-nil,
// where they have the same underlying concrete type and recursively
// calling Equal on the underlying values reports equal.
func Equal(x, y interface{}, opts ...Option) bool {
vx := reflect.ValueOf(x)
vy := reflect.ValueOf(y)
// If the inputs are different types, auto-wrap them in an empty interface
// so that they have the same parent type.
var t reflect.Type
if !vx.IsValid() || !vy.IsValid() || vx.Type() != vy.Type() {
t = reflect.TypeOf((*interface{})(nil)).Elem()
if vx.IsValid() {
vvx := reflect.New(t).Elem()
vvx.Set(vx)
vx = vvx
}
if vy.IsValid() {
vvy := reflect.New(t).Elem()
vvy.Set(vy)
vy = vvy
}
} else {
t = vx.Type()
}
s := newState(opts)
s.compareAny(&pathStep{t, vx, vy})
return s.result.Equal()
}
// Diff returns a human-readable report of the differences between two values.
// It returns an empty string if and only if Equal returns true for the same
// input values and options.
//
// The output is displayed as a literal in pseudo-Go syntax.
// At the start of each line, a "-" prefix indicates an element removed from x,
// a "+" prefix to indicates an element added to y, and the lack of a prefix
// indicates an element common to both x and y. If possible, the output
// uses fmt.Stringer.String or error.Error methods to produce more humanly
// readable outputs. In such cases, the string is prefixed with either an
// 's' or 'e' character, respectively, to indicate that the method was called.
//
// Do not depend on this output being stable. If you need the ability to
// programmatically interpret the difference, consider using a custom Reporter.
func Diff(x, y interface{}, opts ...Option) string {
r := new(defaultReporter)
eq := Equal(x, y, Options(opts), Reporter(r))
d := r.String()
if (d == "") != eq {
panic("inconsistent difference and equality results")
}
return d
}
type state struct {
// These fields represent the "comparison state".
// Calling statelessCompare must not result in observable changes to these.
result diff.Result // The current result of comparison
curPath Path // The current path in the value tree
reporters []reporter // Optional reporters
// recChecker checks for infinite cycles applying the same set of
// transformers upon the output of itself.
recChecker recChecker
// dynChecker triggers pseudo-random checks for option correctness.
// It is safe for statelessCompare to mutate this value.
dynChecker dynChecker
// These fields, once set by processOption, will not change.
exporters map[reflect.Type]bool // Set of structs with unexported field visibility
opts Options // List of all fundamental and filter options
}
func newState(opts []Option) *state {
// Always ensure a validator option exists to validate the inputs.
s := &state{opts: Options{validator{}}}
s.processOption(Options(opts))
return s
}
func (s *state) processOption(opt Option) {
switch opt := opt.(type) {
case nil:
case Options:
for _, o := range opt {
s.processOption(o)
}
case coreOption:
type filtered interface {
isFiltered() bool
}
if fopt, ok := opt.(filtered); ok && !fopt.isFiltered() {
panic(fmt.Sprintf("cannot use an unfiltered option: %v", opt))
}
s.opts = append(s.opts, opt)
case visibleStructs:
if s.exporters == nil {
s.exporters = make(map[reflect.Type]bool)
}
for t := range opt {
s.exporters[t] = true
}
case reporter:
s.reporters = append(s.reporters, opt)
default:
panic(fmt.Sprintf("unknown option %T", opt))
}
}
// statelessCompare compares two values and returns the result.
// This function is stateless in that it does not alter the current result,
// or output to any registered reporters.
func (s *state) statelessCompare(step PathStep) diff.Result {
// We do not save and restore the curPath because all of the compareX
// methods should properly push and pop from the path.
// It is an implementation bug if the contents of curPath differs from
// when calling this function to when returning from it.
oldResult, oldReporters := s.result, s.reporters
s.result = diff.Result{} // Reset result
s.reporters = nil // Remove reporters to avoid spurious printouts
s.compareAny(step)
res := s.result
s.result, s.reporters = oldResult, oldReporters
return res
}
func (s *state) compareAny(step PathStep) {
// Update the path stack.
s.curPath.push(step)
defer s.curPath.pop()
for _, r := range s.reporters {
r.PushStep(step)
defer r.PopStep()
}
s.recChecker.Check(s.curPath)
// Obtain the current type and values.
t := step.Type()
vx, vy := step.Values()
// Rule 1: Check whether an option applies on this node in the value tree.
if s.tryOptions(t, vx, vy) {
return
}
// Rule 2: Check whether the type has a valid Equal method.
if s.tryMethod(t, vx, vy) {
return
}
// Rule 3: Compare based on the underlying kind.
switch t.Kind() {
case reflect.Bool:
s.report(vx.Bool() == vy.Bool(), 0)
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
s.report(vx.Int() == vy.Int(), 0)
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
s.report(vx.Uint() == vy.Uint(), 0)
case reflect.Float32, reflect.Float64:
s.report(vx.Float() == vy.Float(), 0)
case reflect.Complex64, reflect.Complex128:
s.report(vx.Complex() == vy.Complex(), 0)
case reflect.String:
s.report(vx.String() == vy.String(), 0)
case reflect.Chan, reflect.UnsafePointer:
s.report(vx.Pointer() == vy.Pointer(), 0)
case reflect.Func:
s.report(vx.IsNil() && vy.IsNil(), 0)
case reflect.Struct:
s.compareStruct(t, vx, vy)
case reflect.Slice, reflect.Array:
s.compareSlice(t, vx, vy)
case reflect.Map:
s.compareMap(t, vx, vy)
case reflect.Ptr:
s.comparePtr(t, vx, vy)
case reflect.Interface:
s.compareInterface(t, vx, vy)
default:
panic(fmt.Sprintf("%v kind not handled", t.Kind()))
}
}
func (s *state) tryOptions(t reflect.Type, vx, vy reflect.Value) bool {
// Evaluate all filters and apply the remaining options.
if opt := s.opts.filter(s, t, vx, vy); opt != nil {
opt.apply(s, vx, vy)
return true
}
return false
}
func (s *state) tryMethod(t reflect.Type, vx, vy reflect.Value) bool {
// Check if this type even has an Equal method.
m, ok := t.MethodByName("Equal")
if !ok || !function.IsType(m.Type, function.EqualAssignable) {
return false
}
eq := s.callTTBFunc(m.Func, vx, vy)
s.report(eq, reportByMethod)
return true
}
func (s *state) callTRFunc(f, v reflect.Value, step Transform) reflect.Value {
v = sanitizeValue(v, f.Type().In(0))
if !s.dynChecker.Next() {
return f.Call([]reflect.Value{v})[0]
}
// Run the function twice and ensure that we get the same results back.
// We run in goroutines so that the race detector (if enabled) can detect
// unsafe mutations to the input.
c := make(chan reflect.Value)
go detectRaces(c, f, v)
got := <-c
want := f.Call([]reflect.Value{v})[0]
if step.vx, step.vy = got, want; !s.statelessCompare(step).Equal() {
// To avoid false-positives with non-reflexive equality operations,
// we sanity check whether a value is equal to itself.
if step.vx, step.vy = want, want; !s.statelessCompare(step).Equal() {
return want
}
panic(fmt.Sprintf("non-deterministic function detected: %s", function.NameOf(f)))
}
return want
}
func (s *state) callTTBFunc(f, x, y reflect.Value) bool {
x = sanitizeValue(x, f.Type().In(0))
y = sanitizeValue(y, f.Type().In(1))
if !s.dynChecker.Next() {
return f.Call([]reflect.Value{x, y})[0].Bool()
}
// Swapping the input arguments is sufficient to check that
// f is symmetric and deterministic.
// We run in goroutines so that the race detector (if enabled) can detect
// unsafe mutations to the input.
c := make(chan reflect.Value)
go detectRaces(c, f, y, x)
got := <-c
want := f.Call([]reflect.Value{x, y})[0].Bool()
if !got.IsValid() || got.Bool() != want {
panic(fmt.Sprintf("non-deterministic or non-symmetric function detected: %s", function.NameOf(f)))
}
return want
}
func detectRaces(c chan<- reflect.Value, f reflect.Value, vs ...reflect.Value) {
var ret reflect.Value
defer func() {
recover() // Ignore panics, let the other call to f panic instead
c <- ret
}()
ret = f.Call(vs)[0]
}
// sanitizeValue converts nil interfaces of type T to those of type R,
// assuming that T is assignable to R.
// Otherwise, it returns the input value as is.
func sanitizeValue(v reflect.Value, t reflect.Type) reflect.Value {
// TODO(dsnet): Workaround for reflect bug (https://golang.org/issue/22143).
if !flags.AtLeastGo110 {
if v.Kind() == reflect.Interface && v.IsNil() && v.Type() != t {
return reflect.New(t).Elem()
}
}
return v
}
func (s *state) compareStruct(t reflect.Type, vx, vy reflect.Value) {
var vax, vay reflect.Value // Addressable versions of vx and vy
step := StructField{&structField{}}
for i := 0; i < t.NumField(); i++ {
step.typ = t.Field(i).Type
step.vx = vx.Field(i)
step.vy = vy.Field(i)
step.name = t.Field(i).Name
step.idx = i
step.unexported = !isExported(step.name)
if step.unexported {
if step.name == "_" {
continue
}
// Defer checking of unexported fields until later to give an
// Ignore a chance to ignore the field.
if !vax.IsValid() || !vay.IsValid() {
// For retrieveUnexportedField to work, the parent struct must
// be addressable. Create a new copy of the values if
// necessary to make them addressable.
vax = makeAddressable(vx)
vay = makeAddressable(vy)
}
step.mayForce = s.exporters[t]
step.pvx = vax
step.pvy = vay
step.field = t.Field(i)
}
s.compareAny(step)
}
}
func (s *state) compareSlice(t reflect.Type, vx, vy reflect.Value) {
isSlice := t.Kind() == reflect.Slice
if isSlice && (vx.IsNil() || vy.IsNil()) {
s.report(vx.IsNil() && vy.IsNil(), 0)
return
}
// TODO: Support cyclic data structures.
step := SliceIndex{&sliceIndex{pathStep: pathStep{typ: t.Elem()}}}
withIndexes := func(ix, iy int) SliceIndex {
if ix >= 0 {
step.vx, step.xkey = vx.Index(ix), ix
} else {
step.vx, step.xkey = reflect.Value{}, -1
}
if iy >= 0 {
step.vy, step.ykey = vy.Index(iy), iy
} else {
step.vy, step.ykey = reflect.Value{}, -1
}
return step
}
// Ignore options are able to ignore missing elements in a slice.
// However, detecting these reliably requires an optimal differencing
// algorithm, for which diff.Difference is not.
//
// Instead, we first iterate through both slices to detect which elements
// would be ignored if standing alone. The index of non-discarded elements
// are stored in a separate slice, which diffing is then performed on.
var indexesX, indexesY []int
var ignoredX, ignoredY []bool
for ix := 0; ix < vx.Len(); ix++ {
ignored := s.statelessCompare(withIndexes(ix, -1)).NumDiff == 0
if !ignored {
indexesX = append(indexesX, ix)
}
ignoredX = append(ignoredX, ignored)
}
for iy := 0; iy < vy.Len(); iy++ {
ignored := s.statelessCompare(withIndexes(-1, iy)).NumDiff == 0
if !ignored {
indexesY = append(indexesY, iy)
}
ignoredY = append(ignoredY, ignored)
}
// Compute an edit-script for slices vx and vy (excluding ignored elements).
edits := diff.Difference(len(indexesX), len(indexesY), func(ix, iy int) diff.Result {
return s.statelessCompare(withIndexes(indexesX[ix], indexesY[iy]))
})
// Replay the ignore-scripts and the edit-script.
var ix, iy int
for ix < vx.Len() || iy < vy.Len() {
var e diff.EditType
switch {
case ix < len(ignoredX) && ignoredX[ix]:
e = diff.UniqueX
case iy < len(ignoredY) && ignoredY[iy]:
e = diff.UniqueY
default:
e, edits = edits[0], edits[1:]
}
switch e {
case diff.UniqueX:
s.compareAny(withIndexes(ix, -1))
ix++
case diff.UniqueY:
s.compareAny(withIndexes(-1, iy))
iy++
default:
s.compareAny(withIndexes(ix, iy))
ix++
iy++
}
}
}
func (s *state) compareMap(t reflect.Type, vx, vy reflect.Value) {
if vx.IsNil() || vy.IsNil() {
s.report(vx.IsNil() && vy.IsNil(), 0)
return
}
// TODO: Support cyclic data structures.
// We combine and sort the two map keys so that we can perform the
// comparisons in a deterministic order.
step := MapIndex{&mapIndex{pathStep: pathStep{typ: t.Elem()}}}
for _, k := range value.SortKeys(append(vx.MapKeys(), vy.MapKeys()...)) {
step.vx = vx.MapIndex(k)
step.vy = vy.MapIndex(k)
step.key = k
if !step.vx.IsValid() && !step.vy.IsValid() {
// It is possible for both vx and vy to be invalid if the
// key contained a NaN value in it.
//
// Even with the ability to retrieve NaN keys in Go 1.12,
// there still isn't a sensible way to compare the values since
// a NaN key may map to multiple unordered values.
// The most reasonable way to compare NaNs would be to compare the
// set of values. However, this is impossible to do efficiently
// since set equality is provably an O(n^2) operation given only
// an Equal function. If we had a Less function or Hash function,
// this could be done in O(n*log(n)) or O(n), respectively.
//
// Rather than adding complex logic to deal with NaNs, make it
// the user's responsibility to compare such obscure maps.
const help = "consider providing a Comparer to compare the map"
panic(fmt.Sprintf("%#v has map key with NaNs\n%s", s.curPath, help))
}
s.compareAny(step)
}
}
func (s *state) comparePtr(t reflect.Type, vx, vy reflect.Value) {
if vx.IsNil() || vy.IsNil() {
s.report(vx.IsNil() && vy.IsNil(), 0)
return
}
// TODO: Support cyclic data structures.
vx, vy = vx.Elem(), vy.Elem()
s.compareAny(Indirect{&indirect{pathStep{t.Elem(), vx, vy}}})
}
func (s *state) compareInterface(t reflect.Type, vx, vy reflect.Value) {
if vx.IsNil() || vy.IsNil() {
s.report(vx.IsNil() && vy.IsNil(), 0)
return
}
vx, vy = vx.Elem(), vy.Elem()
if vx.Type() != vy.Type() {
s.report(false, 0)
return
}
s.compareAny(TypeAssertion{&typeAssertion{pathStep{vx.Type(), vx, vy}}})
}
func (s *state) report(eq bool, rf resultFlags) {
if rf&reportByIgnore == 0 {
if eq {
s.result.NumSame++
rf |= reportEqual
} else {
s.result.NumDiff++
rf |= reportUnequal
}
}
for _, r := range s.reporters {
r.Report(Result{flags: rf})
}
}
// recChecker tracks the state needed to periodically perform checks that
// user provided transformers are not stuck in an infinitely recursive cycle.
type recChecker struct{ next int }
// Check scans the Path for any recursive transformers and panics when any
// recursive transformers are detected. Note that the presence of a
// recursive Transformer does not necessarily imply an infinite cycle.
// As such, this check only activates after some minimal number of path steps.
func (rc *recChecker) Check(p Path) {
const minLen = 1 << 16
if rc.next == 0 {
rc.next = minLen
}
if len(p) < rc.next {
return
}
rc.next <<= 1
// Check whether the same transformer has appeared at least twice.
var ss []string
m := map[Option]int{}
for _, ps := range p {
if t, ok := ps.(Transform); ok {
t := t.Option()
if m[t] == 1 { // Transformer was used exactly once before
tf := t.(*transformer).fnc.Type()
ss = append(ss, fmt.Sprintf("%v: %v => %v", t, tf.In(0), tf.Out(0)))
}
m[t]++
}
}
if len(ss) > 0 {
const warning = "recursive set of Transformers detected"
const help = "consider using cmpopts.AcyclicTransformer"
set := strings.Join(ss, "\n\t")
panic(fmt.Sprintf("%s:\n\t%s\n%s", warning, set, help))
}
}
// dynChecker tracks the state needed to periodically perform checks that
// user provided functions are symmetric and deterministic.
// The zero value is safe for immediate use.
type dynChecker struct{ curr, next int }
// Next increments the state and reports whether a check should be performed.
//
// Checks occur every Nth function call, where N is a triangular number:
// 0 1 3 6 10 15 21 28 36 45 55 66 78 91 105 120 136 153 171 190 ...
// See https://en.wikipedia.org/wiki/Triangular_number
//
// This sequence ensures that the cost of checks drops significantly as
// the number of functions calls grows larger.
func (dc *dynChecker) Next() bool {
ok := dc.curr == dc.next
if ok {
dc.curr = 0
dc.next++
}
dc.curr++
return ok
}
// makeAddressable returns a value that is always addressable.
// It returns the input verbatim if it is already addressable,
// otherwise it creates a new value and returns an addressable copy.
func makeAddressable(v reflect.Value) reflect.Value {
if v.CanAddr() {
return v
}
vc := reflect.New(v.Type()).Elem()
vc.Set(v)
return vc
}

@ -0,0 +1,15 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build purego
package cmp
import "reflect"
const supportAllowUnexported = false
func retrieveUnexportedField(reflect.Value, reflect.StructField) reflect.Value {
panic("retrieveUnexportedField is not implemented")
}

@ -0,0 +1,23 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build !purego
package cmp
import (
"reflect"
"unsafe"
)
const supportAllowUnexported = true
// retrieveUnexportedField uses unsafe to forcibly retrieve any field from
// a struct such that the value has read-write permissions.
//
// The parent struct, v, must be addressable, while f must be a StructField
// describing the field to retrieve.
func retrieveUnexportedField(v reflect.Value, f reflect.StructField) reflect.Value {
return reflect.NewAt(f.Type, unsafe.Pointer(v.UnsafeAddr()+f.Offset)).Elem()
}

@ -0,0 +1,17 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build !cmp_debug
package diff
var debug debugger
type debugger struct{}
func (debugger) Begin(_, _ int, f EqualFunc, _, _ *EditScript) EqualFunc {
return f
}
func (debugger) Update() {}
func (debugger) Finish() {}

@ -0,0 +1,122 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build cmp_debug
package diff
import (
"fmt"
"strings"
"sync"
"time"
)
// The algorithm can be seen running in real-time by enabling debugging:
// go test -tags=cmp_debug -v
//
// Example output:
// === RUN TestDifference/#34
// ┌───────────────────────────────┐
// │ \ · · · · · · · · · · · · · · │
// │ · # · · · · · · · · · · · · · │
// │ · \ · · · · · · · · · · · · · │
// │ · · \ · · · · · · · · · · · · │
// │ · · · X # · · · · · · · · · · │
// │ · · · # \ · · · · · · · · · · │
// │ · · · · · # # · · · · · · · · │
// │ · · · · · # \ · · · · · · · · │
// │ · · · · · · · \ · · · · · · · │
// │ · · · · · · · · \ · · · · · · │
// │ · · · · · · · · · \ · · · · · │
// │ · · · · · · · · · · \ · · # · │
// │ · · · · · · · · · · · \ # # · │
// │ · · · · · · · · · · · # # # · │
// │ · · · · · · · · · · # # # # · │
// │ · · · · · · · · · # # # # # · │
// │ · · · · · · · · · · · · · · \ │
// └───────────────────────────────┘
// [.Y..M.XY......YXYXY.|]
//
// The grid represents the edit-graph where the horizontal axis represents
// list X and the vertical axis represents list Y. The start of the two lists
// is the top-left, while the ends are the bottom-right. The '·' represents
// an unexplored node in the graph. The '\' indicates that the two symbols
// from list X and Y are equal. The 'X' indicates that two symbols are similar
// (but not exactly equal) to each other. The '#' indicates that the two symbols
// are different (and not similar). The algorithm traverses this graph trying to
// make the paths starting in the top-left and the bottom-right connect.
//
// The series of '.', 'X', 'Y', and 'M' characters at the bottom represents
// the currently established path from the forward and reverse searches,
// separated by a '|' character.
const (
updateDelay = 100 * time.Millisecond
finishDelay = 500 * time.Millisecond
ansiTerminal = true // ANSI escape codes used to move terminal cursor
)
var debug debugger
type debugger struct {
sync.Mutex
p1, p2 EditScript
fwdPath, revPath *EditScript
grid []byte
lines int
}
func (dbg *debugger) Begin(nx, ny int, f EqualFunc, p1, p2 *EditScript) EqualFunc {
dbg.Lock()
dbg.fwdPath, dbg.revPath = p1, p2
top := "┌─" + strings.Repeat("──", nx) + "┐\n"
row := "│ " + strings.Repeat("· ", nx) + "│\n"
btm := "└─" + strings.Repeat("──", nx) + "┘\n"
dbg.grid = []byte(top + strings.Repeat(row, ny) + btm)
dbg.lines = strings.Count(dbg.String(), "\n")
fmt.Print(dbg)
// Wrap the EqualFunc so that we can intercept each result.
return func(ix, iy int) (r Result) {
cell := dbg.grid[len(top)+iy*len(row):][len("│ ")+len("· ")*ix:][:len("·")]
for i := range cell {
cell[i] = 0 // Zero out the multiple bytes of UTF-8 middle-dot
}
switch r = f(ix, iy); {
case r.Equal():
cell[0] = '\\'
case r.Similar():
cell[0] = 'X'
default:
cell[0] = '#'
}
return
}
}
func (dbg *debugger) Update() {
dbg.print(updateDelay)
}
func (dbg *debugger) Finish() {
dbg.print(finishDelay)
dbg.Unlock()
}
func (dbg *debugger) String() string {
dbg.p1, dbg.p2 = *dbg.fwdPath, dbg.p2[:0]
for i := len(*dbg.revPath) - 1; i >= 0; i-- {
dbg.p2 = append(dbg.p2, (*dbg.revPath)[i])
}
return fmt.Sprintf("%s[%v|%v]\n\n", dbg.grid, dbg.p1, dbg.p2)
}
func (dbg *debugger) print(d time.Duration) {
if ansiTerminal {
fmt.Printf("\x1b[%dA", dbg.lines) // Reset terminal cursor
}
fmt.Print(dbg)
time.Sleep(d)
}

@ -0,0 +1,372 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// Package diff implements an algorithm for producing edit-scripts.
// The edit-script is a sequence of operations needed to transform one list
// of symbols into another (or vice-versa). The edits allowed are insertions,
// deletions, and modifications. The summation of all edits is called the
// Levenshtein distance as this problem is well-known in computer science.
//
// This package prioritizes performance over accuracy. That is, the run time
// is more important than obtaining a minimal Levenshtein distance.
package diff
// EditType represents a single operation within an edit-script.
type EditType uint8
const (
// Identity indicates that a symbol pair is identical in both list X and Y.
Identity EditType = iota
// UniqueX indicates that a symbol only exists in X and not Y.
UniqueX
// UniqueY indicates that a symbol only exists in Y and not X.
UniqueY
// Modified indicates that a symbol pair is a modification of each other.
Modified
)
// EditScript represents the series of differences between two lists.
type EditScript []EditType
// String returns a human-readable string representing the edit-script where
// Identity, UniqueX, UniqueY, and Modified are represented by the
// '.', 'X', 'Y', and 'M' characters, respectively.
func (es EditScript) String() string {
b := make([]byte, len(es))
for i, e := range es {
switch e {
case Identity:
b[i] = '.'
case UniqueX:
b[i] = 'X'
case UniqueY:
b[i] = 'Y'
case Modified:
b[i] = 'M'
default:
panic("invalid edit-type")
}
}
return string(b)
}
// stats returns a histogram of the number of each type of edit operation.
func (es EditScript) stats() (s struct{ NI, NX, NY, NM int }) {
for _, e := range es {
switch e {
case Identity:
s.NI++
case UniqueX:
s.NX++
case UniqueY:
s.NY++
case Modified:
s.NM++
default:
panic("invalid edit-type")
}
}
return
}
// Dist is the Levenshtein distance and is guaranteed to be 0 if and only if
// lists X and Y are equal.
func (es EditScript) Dist() int { return len(es) - es.stats().NI }
// LenX is the length of the X list.
func (es EditScript) LenX() int { return len(es) - es.stats().NY }
// LenY is the length of the Y list.
func (es EditScript) LenY() int { return len(es) - es.stats().NX }
// EqualFunc reports whether the symbols at indexes ix and iy are equal.
// When called by Difference, the index is guaranteed to be within nx and ny.
type EqualFunc func(ix int, iy int) Result
// Result is the result of comparison.
// NumSame is the number of sub-elements that are equal.
// NumDiff is the number of sub-elements that are not equal.
type Result struct{ NumSame, NumDiff int }
// BoolResult returns a Result that is either Equal or not Equal.
func BoolResult(b bool) Result {
if b {
return Result{NumSame: 1} // Equal, Similar
} else {
return Result{NumDiff: 2} // Not Equal, not Similar
}
}
// Equal indicates whether the symbols are equal. Two symbols are equal
// if and only if NumDiff == 0. If Equal, then they are also Similar.
func (r Result) Equal() bool { return r.NumDiff == 0 }
// Similar indicates whether two symbols are similar and may be represented
// by using the Modified type. As a special case, we consider binary comparisons
// (i.e., those that return Result{1, 0} or Result{0, 1}) to be similar.
//
// The exact ratio of NumSame to NumDiff to determine similarity may change.
func (r Result) Similar() bool {
// Use NumSame+1 to offset NumSame so that binary comparisons are similar.
return r.NumSame+1 >= r.NumDiff
}
// Difference reports whether two lists of lengths nx and ny are equal
// given the definition of equality provided as f.
//
// This function returns an edit-script, which is a sequence of operations
// needed to convert one list into the other. The following invariants for
// the edit-script are maintained:
// • eq == (es.Dist()==0)
// • nx == es.LenX()
// • ny == es.LenY()
//
// This algorithm is not guaranteed to be an optimal solution (i.e., one that
// produces an edit-script with a minimal Levenshtein distance). This algorithm
// favors performance over optimality. The exact output is not guaranteed to
// be stable and may change over time.
func Difference(nx, ny int, f EqualFunc) (es EditScript) {
// This algorithm is based on traversing what is known as an "edit-graph".
// See Figure 1 from "An O(ND) Difference Algorithm and Its Variations"
// by Eugene W. Myers. Since D can be as large as N itself, this is
// effectively O(N^2). Unlike the algorithm from that paper, we are not
// interested in the optimal path, but at least some "decent" path.
//
// For example, let X and Y be lists of symbols:
// X = [A B C A B B A]
// Y = [C B A B A C]
//
// The edit-graph can be drawn as the following:
// A B C A B B A
// ┌─────────────┐
// C │_|_|\|_|_|_|_│ 0
// B │_|\|_|_|\|\|_│ 1
// A │\|_|_|\|_|_|\│ 2
// B │_|\|_|_|\|\|_│ 3
// A │\|_|_|\|_|_|\│ 4
// C │ | |\| | | | │ 5
// └─────────────┘ 6
// 0 1 2 3 4 5 6 7
//
// List X is written along the horizontal axis, while list Y is written
// along the vertical axis. At any point on this grid, if the symbol in
// list X matches the corresponding symbol in list Y, then a '\' is drawn.
// The goal of any minimal edit-script algorithm is to find a path from the
// top-left corner to the bottom-right corner, while traveling through the
// fewest horizontal or vertical edges.
// A horizontal edge is equivalent to inserting a symbol from list X.
// A vertical edge is equivalent to inserting a symbol from list Y.
// A diagonal edge is equivalent to a matching symbol between both X and Y.
// Invariants:
// • 0 ≤ fwdPath.X ≤ (fwdFrontier.X, revFrontier.X) ≤ revPath.X ≤ nx
// • 0 ≤ fwdPath.Y ≤ (fwdFrontier.Y, revFrontier.Y) ≤ revPath.Y ≤ ny
//
// In general:
// • fwdFrontier.X < revFrontier.X
// • fwdFrontier.Y < revFrontier.Y
// Unless, it is time for the algorithm to terminate.
fwdPath := path{+1, point{0, 0}, make(EditScript, 0, (nx+ny)/2)}
revPath := path{-1, point{nx, ny}, make(EditScript, 0)}
fwdFrontier := fwdPath.point // Forward search frontier
revFrontier := revPath.point // Reverse search frontier
// Search budget bounds the cost of searching for better paths.
// The longest sequence of non-matching symbols that can be tolerated is
// approximately the square-root of the search budget.
searchBudget := 4 * (nx + ny) // O(n)
// The algorithm below is a greedy, meet-in-the-middle algorithm for
// computing sub-optimal edit-scripts between two lists.
//
// The algorithm is approximately as follows:
// • Searching for differences switches back-and-forth between
// a search that starts at the beginning (the top-left corner), and
// a search that starts at the end (the bottom-right corner). The goal of
// the search is connect with the search from the opposite corner.
// • As we search, we build a path in a greedy manner, where the first
// match seen is added to the path (this is sub-optimal, but provides a
// decent result in practice). When matches are found, we try the next pair
// of symbols in the lists and follow all matches as far as possible.
// • When searching for matches, we search along a diagonal going through
// through the "frontier" point. If no matches are found, we advance the
// frontier towards the opposite corner.
// • This algorithm terminates when either the X coordinates or the
// Y coordinates of the forward and reverse frontier points ever intersect.
//
// This algorithm is correct even if searching only in the forward direction
// or in the reverse direction. We do both because it is commonly observed
// that two lists commonly differ because elements were added to the front
// or end of the other list.
//
// Running the tests with the "cmp_debug" build tag prints a visualization
// of the algorithm running in real-time. This is educational for
// understanding how the algorithm works. See debug_enable.go.
f = debug.Begin(nx, ny, f, &fwdPath.es, &revPath.es)
for {
// Forward search from the beginning.
if fwdFrontier.X >= revFrontier.X || fwdFrontier.Y >= revFrontier.Y || searchBudget == 0 {
break
}
for stop1, stop2, i := false, false, 0; !(stop1 && stop2) && searchBudget > 0; i++ {
// Search in a diagonal pattern for a match.
z := zigzag(i)
p := point{fwdFrontier.X + z, fwdFrontier.Y - z}
switch {
case p.X >= revPath.X || p.Y < fwdPath.Y:
stop1 = true // Hit top-right corner
case p.Y >= revPath.Y || p.X < fwdPath.X:
stop2 = true // Hit bottom-left corner
case f(p.X, p.Y).Equal():
// Match found, so connect the path to this point.
fwdPath.connect(p, f)
fwdPath.append(Identity)
// Follow sequence of matches as far as possible.
for fwdPath.X < revPath.X && fwdPath.Y < revPath.Y {
if !f(fwdPath.X, fwdPath.Y).Equal() {
break
}
fwdPath.append(Identity)
}
fwdFrontier = fwdPath.point
stop1, stop2 = true, true
default:
searchBudget-- // Match not found
}
debug.Update()
}
// Advance the frontier towards reverse point.
if revPath.X-fwdFrontier.X >= revPath.Y-fwdFrontier.Y {
fwdFrontier.X++
} else {
fwdFrontier.Y++
}
// Reverse search from the end.
if fwdFrontier.X >= revFrontier.X || fwdFrontier.Y >= revFrontier.Y || searchBudget == 0 {
break
}
for stop1, stop2, i := false, false, 0; !(stop1 && stop2) && searchBudget > 0; i++ {
// Search in a diagonal pattern for a match.
z := zigzag(i)
p := point{revFrontier.X - z, revFrontier.Y + z}
switch {
case fwdPath.X >= p.X || revPath.Y < p.Y:
stop1 = true // Hit bottom-left corner
case fwdPath.Y >= p.Y || revPath.X < p.X:
stop2 = true // Hit top-right corner
case f(p.X-1, p.Y-1).Equal():
// Match found, so connect the path to this point.
revPath.connect(p, f)
revPath.append(Identity)
// Follow sequence of matches as far as possible.
for fwdPath.X < revPath.X && fwdPath.Y < revPath.Y {
if !f(revPath.X-1, revPath.Y-1).Equal() {
break
}
revPath.append(Identity)
}
revFrontier = revPath.point
stop1, stop2 = true, true
default:
searchBudget-- // Match not found
}
debug.Update()
}
// Advance the frontier towards forward point.
if revFrontier.X-fwdPath.X >= revFrontier.Y-fwdPath.Y {
revFrontier.X--
} else {
revFrontier.Y--
}
}
// Join the forward and reverse paths and then append the reverse path.
fwdPath.connect(revPath.point, f)
for i := len(revPath.es) - 1; i >= 0; i-- {
t := revPath.es[i]
revPath.es = revPath.es[:i]
fwdPath.append(t)
}
debug.Finish()
return fwdPath.es
}
type path struct {
dir int // +1 if forward, -1 if reverse
point // Leading point of the EditScript path
es EditScript
}
// connect appends any necessary Identity, Modified, UniqueX, or UniqueY types
// to the edit-script to connect p.point to dst.
func (p *path) connect(dst point, f EqualFunc) {
if p.dir > 0 {
// Connect in forward direction.
for dst.X > p.X && dst.Y > p.Y {
switch r := f(p.X, p.Y); {
case r.Equal():
p.append(Identity)
case r.Similar():
p.append(Modified)
case dst.X-p.X >= dst.Y-p.Y:
p.append(UniqueX)
default:
p.append(UniqueY)
}
}
for dst.X > p.X {
p.append(UniqueX)
}
for dst.Y > p.Y {
p.append(UniqueY)
}
} else {
// Connect in reverse direction.
for p.X > dst.X && p.Y > dst.Y {
switch r := f(p.X-1, p.Y-1); {
case r.Equal():
p.append(Identity)
case r.Similar():
p.append(Modified)
case p.Y-dst.Y >= p.X-dst.X:
p.append(UniqueY)
default:
p.append(UniqueX)
}
}
for p.X > dst.X {
p.append(UniqueX)
}
for p.Y > dst.Y {
p.append(UniqueY)
}
}
}
func (p *path) append(t EditType) {
p.es = append(p.es, t)
switch t {
case Identity, Modified:
p.add(p.dir, p.dir)
case UniqueX:
p.add(p.dir, 0)
case UniqueY:
p.add(0, p.dir)
}
debug.Update()
}
type point struct{ X, Y int }
func (p *point) add(dx, dy int) { p.X += dx; p.Y += dy }
// zigzag maps a consecutive sequence of integers to a zig-zag sequence.
// [0 1 2 3 4 5 ...] => [0 -1 +1 -2 +2 ...]
func zigzag(x int) int {
if x&1 != 0 {
x = ^x
}
return x >> 1
}

@ -0,0 +1,9 @@
// Copyright 2019, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package flags
// Deterministic controls whether the output of Diff should be deterministic.
// This is only used for testing.
var Deterministic bool

@ -0,0 +1,10 @@
// Copyright 2019, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build !go1.10
package flags
// AtLeastGo110 reports whether the Go toolchain is at least Go 1.10.
const AtLeastGo110 = false

@ -0,0 +1,10 @@
// Copyright 2019, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build go1.10
package flags
// AtLeastGo110 reports whether the Go toolchain is at least Go 1.10.
const AtLeastGo110 = true

@ -0,0 +1,99 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// Package function provides functionality for identifying function types.
package function
import (
"reflect"
"regexp"
"runtime"
"strings"
)
type funcType int
const (
_ funcType = iota
tbFunc // func(T) bool
ttbFunc // func(T, T) bool
trbFunc // func(T, R) bool
tibFunc // func(T, I) bool
trFunc // func(T) R
Equal = ttbFunc // func(T, T) bool
EqualAssignable = tibFunc // func(T, I) bool; encapsulates func(T, T) bool
Transformer = trFunc // func(T) R
ValueFilter = ttbFunc // func(T, T) bool
Less = ttbFunc // func(T, T) bool
ValuePredicate = tbFunc // func(T) bool
KeyValuePredicate = trbFunc // func(T, R) bool
)
var boolType = reflect.TypeOf(true)
// IsType reports whether the reflect.Type is of the specified function type.
func IsType(t reflect.Type, ft funcType) bool {
if t == nil || t.Kind() != reflect.Func || t.IsVariadic() {
return false
}
ni, no := t.NumIn(), t.NumOut()
switch ft {
case tbFunc: // func(T) bool
if ni == 1 && no == 1 && t.Out(0) == boolType {
return true
}
case ttbFunc: // func(T, T) bool
if ni == 2 && no == 1 && t.In(0) == t.In(1) && t.Out(0) == boolType {
return true
}
case trbFunc: // func(T, R) bool
if ni == 2 && no == 1 && t.Out(0) == boolType {
return true
}
case tibFunc: // func(T, I) bool
if ni == 2 && no == 1 && t.In(0).AssignableTo(t.In(1)) && t.Out(0) == boolType {
return true
}
case trFunc: // func(T) R
if ni == 1 && no == 1 {
return true
}
}
return false
}
var lastIdentRx = regexp.MustCompile(`[_\p{L}][_\p{L}\p{N}]*$`)
// NameOf returns the name of the function value.
func NameOf(v reflect.Value) string {
fnc := runtime.FuncForPC(v.Pointer())
if fnc == nil {
return "<unknown>"
}
fullName := fnc.Name() // e.g., "long/path/name/mypkg.(*MyType).(long/path/name/mypkg.myMethod)-fm"
// Method closures have a "-fm" suffix.
fullName = strings.TrimSuffix(fullName, "-fm")
var name string
for len(fullName) > 0 {
inParen := strings.HasSuffix(fullName, ")")
fullName = strings.TrimSuffix(fullName, ")")
s := lastIdentRx.FindString(fullName)
if s == "" {
break
}
name = s + "." + name
fullName = strings.TrimSuffix(fullName, s)
if i := strings.LastIndexByte(fullName, '('); inParen && i >= 0 {
fullName = fullName[:i]
}
fullName = strings.TrimSuffix(fullName, ".")
}
return strings.TrimSuffix(name, ".")
}

@ -0,0 +1,23 @@
// Copyright 2018, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build purego
package value
import "reflect"
// Pointer is an opaque typed pointer and is guaranteed to be comparable.
type Pointer struct {
p uintptr
t reflect.Type
}
// PointerOf returns a Pointer from v, which must be a
// reflect.Ptr, reflect.Slice, or reflect.Map.
func PointerOf(v reflect.Value) Pointer {
// NOTE: Storing a pointer as an uintptr is technically incorrect as it
// assumes that the GC implementation does not use a moving collector.
return Pointer{v.Pointer(), v.Type()}
}

@ -0,0 +1,26 @@
// Copyright 2018, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build !purego
package value
import (
"reflect"
"unsafe"
)
// Pointer is an opaque typed pointer and is guaranteed to be comparable.
type Pointer struct {
p unsafe.Pointer
t reflect.Type
}
// PointerOf returns a Pointer from v, which must be a
// reflect.Ptr, reflect.Slice, or reflect.Map.
func PointerOf(v reflect.Value) Pointer {
// The proper representation of a pointer is unsafe.Pointer,
// which is necessary if the GC ever uses a moving collector.
return Pointer{unsafe.Pointer(v.Pointer()), v.Type()}
}

@ -0,0 +1,106 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package value
import (
"fmt"
"math"
"reflect"
"sort"
)
// SortKeys sorts a list of map keys, deduplicating keys if necessary.
// The type of each value must be comparable.
func SortKeys(vs []reflect.Value) []reflect.Value {
if len(vs) == 0 {
return vs
}
// Sort the map keys.
sort.SliceStable(vs, func(i, j int) bool { return isLess(vs[i], vs[j]) })
// Deduplicate keys (fails for NaNs).
vs2 := vs[:1]
for _, v := range vs[1:] {
if isLess(vs2[len(vs2)-1], v) {
vs2 = append(vs2, v)
}
}
return vs2
}
// isLess is a generic function for sorting arbitrary map keys.
// The inputs must be of the same type and must be comparable.
func isLess(x, y reflect.Value) bool {
switch x.Type().Kind() {
case reflect.Bool:
return !x.Bool() && y.Bool()
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return x.Int() < y.Int()
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
return x.Uint() < y.Uint()
case reflect.Float32, reflect.Float64:
// NOTE: This does not sort -0 as less than +0
// since Go maps treat -0 and +0 as equal keys.
fx, fy := x.Float(), y.Float()
return fx < fy || math.IsNaN(fx) && !math.IsNaN(fy)
case reflect.Complex64, reflect.Complex128:
cx, cy := x.Complex(), y.Complex()
rx, ix, ry, iy := real(cx), imag(cx), real(cy), imag(cy)
if rx == ry || (math.IsNaN(rx) && math.IsNaN(ry)) {
return ix < iy || math.IsNaN(ix) && !math.IsNaN(iy)
}
return rx < ry || math.IsNaN(rx) && !math.IsNaN(ry)
case reflect.Ptr, reflect.UnsafePointer, reflect.Chan:
return x.Pointer() < y.Pointer()
case reflect.String:
return x.String() < y.String()
case reflect.Array:
for i := 0; i < x.Len(); i++ {
if isLess(x.Index(i), y.Index(i)) {
return true
}
if isLess(y.Index(i), x.Index(i)) {
return false
}
}
return false
case reflect.Struct:
for i := 0; i < x.NumField(); i++ {
if isLess(x.Field(i), y.Field(i)) {
return true
}
if isLess(y.Field(i), x.Field(i)) {
return false
}
}
return false
case reflect.Interface:
vx, vy := x.Elem(), y.Elem()
if !vx.IsValid() || !vy.IsValid() {
return !vx.IsValid() && vy.IsValid()
}
tx, ty := vx.Type(), vy.Type()
if tx == ty {
return isLess(x.Elem(), y.Elem())
}
if tx.Kind() != ty.Kind() {
return vx.Kind() < vy.Kind()
}
if tx.String() != ty.String() {
return tx.String() < ty.String()
}
if tx.PkgPath() != ty.PkgPath() {
return tx.PkgPath() < ty.PkgPath()
}
// This can happen in rare situations, so we fallback to just comparing
// the unique pointer for a reflect.Type. This guarantees deterministic
// ordering within a program, but it is obviously not stable.
return reflect.ValueOf(vx.Type()).Pointer() < reflect.ValueOf(vy.Type()).Pointer()
default:
// Must be Func, Map, or Slice; which are not comparable.
panic(fmt.Sprintf("%T is not comparable", x.Type()))
}
}

@ -0,0 +1,48 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package value
import (
"math"
"reflect"
)
// IsZero reports whether v is the zero value.
// This does not rely on Interface and so can be used on unexported fields.
func IsZero(v reflect.Value) bool {
switch v.Kind() {
case reflect.Bool:
return v.Bool() == false
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return v.Int() == 0
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
return v.Uint() == 0
case reflect.Float32, reflect.Float64:
return math.Float64bits(v.Float()) == 0
case reflect.Complex64, reflect.Complex128:
return math.Float64bits(real(v.Complex())) == 0 && math.Float64bits(imag(v.Complex())) == 0
case reflect.String:
return v.String() == ""
case reflect.UnsafePointer:
return v.Pointer() == 0
case reflect.Chan, reflect.Func, reflect.Interface, reflect.Ptr, reflect.Map, reflect.Slice:
return v.IsNil()
case reflect.Array:
for i := 0; i < v.Len(); i++ {
if !IsZero(v.Index(i)) {
return false
}
}
return true
case reflect.Struct:
for i := 0; i < v.NumField(); i++ {
if !IsZero(v.Field(i)) {
return false
}
}
return true
}
return false
}

@ -0,0 +1,524 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package cmp
import (
"fmt"
"reflect"
"regexp"
"strings"
"github.com/google/go-cmp/cmp/internal/function"
)
// Option configures for specific behavior of Equal and Diff. In particular,
// the fundamental Option functions (Ignore, Transformer, and Comparer),
// configure how equality is determined.
//
// The fundamental options may be composed with filters (FilterPath and
// FilterValues) to control the scope over which they are applied.
//
// The cmp/cmpopts package provides helper functions for creating options that
// may be used with Equal and Diff.
type Option interface {
// filter applies all filters and returns the option that remains.
// Each option may only read s.curPath and call s.callTTBFunc.
//
// An Options is returned only if multiple comparers or transformers
// can apply simultaneously and will only contain values of those types
// or sub-Options containing values of those types.
filter(s *state, t reflect.Type, vx, vy reflect.Value) applicableOption
}
// applicableOption represents the following types:
// Fundamental: ignore | validator | *comparer | *transformer
// Grouping: Options
type applicableOption interface {
Option
// apply executes the option, which may mutate s or panic.
apply(s *state, vx, vy reflect.Value)
}
// coreOption represents the following types:
// Fundamental: ignore | validator | *comparer | *transformer
// Filters: *pathFilter | *valuesFilter
type coreOption interface {
Option
isCore()
}
type core struct{}
func (core) isCore() {}
// Options is a list of Option values that also satisfies the Option interface.
// Helper comparison packages may return an Options value when packing multiple
// Option values into a single Option. When this package processes an Options,
// it will be implicitly expanded into a flat list.
//
// Applying a filter on an Options is equivalent to applying that same filter
// on all individual options held within.
type Options []Option
func (opts Options) filter(s *state, t reflect.Type, vx, vy reflect.Value) (out applicableOption) {
for _, opt := range opts {
switch opt := opt.filter(s, t, vx, vy); opt.(type) {
case ignore:
return ignore{} // Only ignore can short-circuit evaluation
case validator:
out = validator{} // Takes precedence over comparer or transformer
case *comparer, *transformer, Options:
switch out.(type) {
case nil:
out = opt
case validator:
// Keep validator
case *comparer, *transformer, Options:
out = Options{out, opt} // Conflicting comparers or transformers
}
}
}
return out
}
func (opts Options) apply(s *state, _, _ reflect.Value) {
const warning = "ambiguous set of applicable options"
const help = "consider using filters to ensure at most one Comparer or Transformer may apply"
var ss []string
for _, opt := range flattenOptions(nil, opts) {
ss = append(ss, fmt.Sprint(opt))
}
set := strings.Join(ss, "\n\t")
panic(fmt.Sprintf("%s at %#v:\n\t%s\n%s", warning, s.curPath, set, help))
}
func (opts Options) String() string {
var ss []string
for _, opt := range opts {
ss = append(ss, fmt.Sprint(opt))
}
return fmt.Sprintf("Options{%s}", strings.Join(ss, ", "))
}
// FilterPath returns a new Option where opt is only evaluated if filter f
// returns true for the current Path in the value tree.
//
// This filter is called even if a slice element or map entry is missing and
// provides an opportunity to ignore such cases. The filter function must be
// symmetric such that the filter result is identical regardless of whether the
// missing value is from x or y.
//
// The option passed in may be an Ignore, Transformer, Comparer, Options, or
// a previously filtered Option.
func FilterPath(f func(Path) bool, opt Option) Option {
if f == nil {
panic("invalid path filter function")
}
if opt := normalizeOption(opt); opt != nil {
return &pathFilter{fnc: f, opt: opt}
}
return nil
}
type pathFilter struct {
core
fnc func(Path) bool
opt Option
}
func (f pathFilter) filter(s *state, t reflect.Type, vx, vy reflect.Value) applicableOption {
if f.fnc(s.curPath) {
return f.opt.filter(s, t, vx, vy)
}
return nil
}
func (f pathFilter) String() string {
return fmt.Sprintf("FilterPath(%s, %v)", function.NameOf(reflect.ValueOf(f.fnc)), f.opt)
}
// FilterValues returns a new Option where opt is only evaluated if filter f,
// which is a function of the form "func(T, T) bool", returns true for the
// current pair of values being compared. If either value is invalid or
// the type of the values is not assignable to T, then this filter implicitly
// returns false.
//
// The filter function must be
// symmetric (i.e., agnostic to the order of the inputs) and
// deterministic (i.e., produces the same result when given the same inputs).
// If T is an interface, it is possible that f is called with two values with
// different concrete types that both implement T.
//
// The option passed in may be an Ignore, Transformer, Comparer, Options, or
// a previously filtered Option.
func FilterValues(f interface{}, opt Option) Option {
v := reflect.ValueOf(f)
if !function.IsType(v.Type(), function.ValueFilter) || v.IsNil() {
panic(fmt.Sprintf("invalid values filter function: %T", f))
}
if opt := normalizeOption(opt); opt != nil {
vf := &valuesFilter{fnc: v, opt: opt}
if ti := v.Type().In(0); ti.Kind() != reflect.Interface || ti.NumMethod() > 0 {
vf.typ = ti
}
return vf
}
return nil
}
type valuesFilter struct {
core
typ reflect.Type // T
fnc reflect.Value // func(T, T) bool
opt Option
}
func (f valuesFilter) filter(s *state, t reflect.Type, vx, vy reflect.Value) applicableOption {
if !vx.IsValid() || !vx.CanInterface() || !vy.IsValid() || !vy.CanInterface() {
return nil
}
if (f.typ == nil || t.AssignableTo(f.typ)) && s.callTTBFunc(f.fnc, vx, vy) {
return f.opt.filter(s, t, vx, vy)
}
return nil
}
func (f valuesFilter) String() string {
return fmt.Sprintf("FilterValues(%s, %v)", function.NameOf(f.fnc), f.opt)
}
// Ignore is an Option that causes all comparisons to be ignored.
// This value is intended to be combined with FilterPath or FilterValues.
// It is an error to pass an unfiltered Ignore option to Equal.
func Ignore() Option { return ignore{} }
type ignore struct{ core }
func (ignore) isFiltered() bool { return false }
func (ignore) filter(_ *state, _ reflect.Type, _, _ reflect.Value) applicableOption { return ignore{} }
func (ignore) apply(s *state, _, _ reflect.Value) { s.report(true, reportByIgnore) }
func (ignore) String() string { return "Ignore()" }
// validator is a sentinel Option type to indicate that some options could not
// be evaluated due to unexported fields, missing slice elements, or
// missing map entries. Both values are validator only for unexported fields.
type validator struct{ core }
func (validator) filter(_ *state, _ reflect.Type, vx, vy reflect.Value) applicableOption {
if !vx.IsValid() || !vy.IsValid() {
return validator{}
}
if !vx.CanInterface() || !vy.CanInterface() {
return validator{}
}
return nil
}
func (validator) apply(s *state, vx, vy reflect.Value) {
// Implies missing slice element or map entry.
if !vx.IsValid() || !vy.IsValid() {
s.report(vx.IsValid() == vy.IsValid(), 0)
return
}
// Unable to Interface implies unexported field without visibility access.
if !vx.CanInterface() || !vy.CanInterface() {
const help = "consider using a custom Comparer; if you control the implementation of type, you can also consider AllowUnexported or cmpopts.IgnoreUnexported"
panic(fmt.Sprintf("cannot handle unexported field: %#v\n%s", s.curPath, help))
}
panic("not reachable")
}
// identRx represents a valid identifier according to the Go specification.
const identRx = `[_\p{L}][_\p{L}\p{N}]*`
var identsRx = regexp.MustCompile(`^` + identRx + `(\.` + identRx + `)*$`)
// Transformer returns an Option that applies a transformation function that
// converts values of a certain type into that of another.
//
// The transformer f must be a function "func(T) R" that converts values of
// type T to those of type R and is implicitly filtered to input values
// assignable to T. The transformer must not mutate T in any way.
//
// To help prevent some cases of infinite recursive cycles applying the
// same transform to the output of itself (e.g., in the case where the
// input and output types are the same), an implicit filter is added such that
// a transformer is applicable only if that exact transformer is not already
// in the tail of the Path since the last non-Transform step.
// For situations where the implicit filter is still insufficient,
// consider using cmpopts.AcyclicTransformer, which adds a filter
// to prevent the transformer from being recursively applied upon itself.
//
// The name is a user provided label that is used as the Transform.Name in the
// transformation PathStep (and eventually shown in the Diff output).
// The name must be a valid identifier or qualified identifier in Go syntax.
// If empty, an arbitrary name is used.
func Transformer(name string, f interface{}) Option {
v := reflect.ValueOf(f)
if !function.IsType(v.Type(), function.Transformer) || v.IsNil() {
panic(fmt.Sprintf("invalid transformer function: %T", f))
}
if name == "" {
name = function.NameOf(v)
if !identsRx.MatchString(name) {
name = "λ" // Lambda-symbol as placeholder name
}
} else if !identsRx.MatchString(name) {
panic(fmt.Sprintf("invalid name: %q", name))
}
tr := &transformer{name: name, fnc: reflect.ValueOf(f)}
if ti := v.Type().In(0); ti.Kind() != reflect.Interface || ti.NumMethod() > 0 {
tr.typ = ti
}
return tr
}
type transformer struct {
core
name string
typ reflect.Type // T
fnc reflect.Value // func(T) R
}
func (tr *transformer) isFiltered() bool { return tr.typ != nil }
func (tr *transformer) filter(s *state, t reflect.Type, _, _ reflect.Value) applicableOption {
for i := len(s.curPath) - 1; i >= 0; i-- {
if t, ok := s.curPath[i].(Transform); !ok {
break // Hit most recent non-Transform step
} else if tr == t.trans {
return nil // Cannot directly use same Transform
}
}
if tr.typ == nil || t.AssignableTo(tr.typ) {
return tr
}
return nil
}
func (tr *transformer) apply(s *state, vx, vy reflect.Value) {
step := Transform{&transform{pathStep{typ: tr.fnc.Type().Out(0)}, tr}}
vvx := s.callTRFunc(tr.fnc, vx, step)
vvy := s.callTRFunc(tr.fnc, vy, step)
step.vx, step.vy = vvx, vvy
s.compareAny(step)
}
func (tr transformer) String() string {
return fmt.Sprintf("Transformer(%s, %s)", tr.name, function.NameOf(tr.fnc))
}
// Comparer returns an Option that determines whether two values are equal
// to each other.
//
// The comparer f must be a function "func(T, T) bool" and is implicitly
// filtered to input values assignable to T. If T is an interface, it is
// possible that f is called with two values of different concrete types that
// both implement T.
//
// The equality function must be:
// • Symmetric: equal(x, y) == equal(y, x)
// • Deterministic: equal(x, y) == equal(x, y)
// • Pure: equal(x, y) does not modify x or y
func Comparer(f interface{}) Option {
v := reflect.ValueOf(f)
if !function.IsType(v.Type(), function.Equal) || v.IsNil() {
panic(fmt.Sprintf("invalid comparer function: %T", f))
}
cm := &comparer{fnc: v}
if ti := v.Type().In(0); ti.Kind() != reflect.Interface || ti.NumMethod() > 0 {
cm.typ = ti
}
return cm
}
type comparer struct {
core
typ reflect.Type // T
fnc reflect.Value // func(T, T) bool
}
func (cm *comparer) isFiltered() bool { return cm.typ != nil }
func (cm *comparer) filter(_ *state, t reflect.Type, _, _ reflect.Value) applicableOption {
if cm.typ == nil || t.AssignableTo(cm.typ) {
return cm
}
return nil
}
func (cm *comparer) apply(s *state, vx, vy reflect.Value) {
eq := s.callTTBFunc(cm.fnc, vx, vy)
s.report(eq, reportByFunc)
}
func (cm comparer) String() string {
return fmt.Sprintf("Comparer(%s)", function.NameOf(cm.fnc))
}
// AllowUnexported returns an Option that forcibly allows operations on
// unexported fields in certain structs, which are specified by passing in a
// value of each struct type.
//
// Users of this option must understand that comparing on unexported fields
// from external packages is not safe since changes in the internal
// implementation of some external package may cause the result of Equal
// to unexpectedly change. However, it may be valid to use this option on types
// defined in an internal package where the semantic meaning of an unexported
// field is in the control of the user.
//
// In many cases, a custom Comparer should be used instead that defines
// equality as a function of the public API of a type rather than the underlying
// unexported implementation.
//
// For example, the reflect.Type documentation defines equality to be determined
// by the == operator on the interface (essentially performing a shallow pointer
// comparison) and most attempts to compare *regexp.Regexp types are interested
// in only checking that the regular expression strings are equal.
// Both of these are accomplished using Comparers:
//
// Comparer(func(x, y reflect.Type) bool { return x == y })
// Comparer(func(x, y *regexp.Regexp) bool { return x.String() == y.String() })
//
// In other cases, the cmpopts.IgnoreUnexported option can be used to ignore
// all unexported fields on specified struct types.
func AllowUnexported(types ...interface{}) Option {
if !supportAllowUnexported {
panic("AllowUnexported is not supported on purego builds, Google App Engine Standard, or GopherJS")
}
m := make(map[reflect.Type]bool)
for _, typ := range types {
t := reflect.TypeOf(typ)
if t.Kind() != reflect.Struct {
panic(fmt.Sprintf("invalid struct type: %T", typ))
}
m[t] = true
}
return visibleStructs(m)
}
type visibleStructs map[reflect.Type]bool
func (visibleStructs) filter(_ *state, _ reflect.Type, _, _ reflect.Value) applicableOption {
panic("not implemented")
}
// Result represents the comparison result for a single node and
// is provided by cmp when calling Result (see Reporter).
type Result struct {
_ [0]func() // Make Result incomparable
flags resultFlags
}
// Equal reports whether the node was determined to be equal or not.
// As a special case, ignored nodes are considered equal.
func (r Result) Equal() bool {
return r.flags&(reportEqual|reportByIgnore) != 0
}
// ByIgnore reports whether the node is equal because it was ignored.
// This never reports true if Equal reports false.
func (r Result) ByIgnore() bool {
return r.flags&reportByIgnore != 0
}
// ByMethod reports whether the Equal method determined equality.
func (r Result) ByMethod() bool {
return r.flags&reportByMethod != 0
}
// ByFunc reports whether a Comparer function determined equality.
func (r Result) ByFunc() bool {
return r.flags&reportByFunc != 0
}
type resultFlags uint
const (
_ resultFlags = (1 << iota) / 2
reportEqual
reportUnequal
reportByIgnore
reportByMethod
reportByFunc
)
// Reporter is an Option that can be passed to Equal. When Equal traverses
// the value trees, it calls PushStep as it descends into each node in the
// tree and PopStep as it ascend out of the node. The leaves of the tree are
// either compared (determined to be equal or not equal) or ignored and reported
// as such by calling the Report method.
func Reporter(r interface {
// PushStep is called when a tree-traversal operation is performed.
// The PathStep itself is only valid until the step is popped.
// The PathStep.Values are valid for the duration of the entire traversal
// and must not be mutated.
//
// Equal always calls PushStep at the start to provide an operation-less
// PathStep used to report the root values.
//
// Within a slice, the exact set of inserted, removed, or modified elements
// is unspecified and may change in future implementations.
// The entries of a map are iterated through in an unspecified order.
PushStep(PathStep)
// Report is called exactly once on leaf nodes to report whether the
// comparison identified the node as equal, unequal, or ignored.
// A leaf node is one that is immediately preceded by and followed by
// a pair of PushStep and PopStep calls.
Report(Result)
// PopStep ascends back up the value tree.
// There is always a matching pop call for every push call.
PopStep()
}) Option {
return reporter{r}
}
type reporter struct{ reporterIface }
type reporterIface interface {
PushStep(PathStep)
Report(Result)
PopStep()
}
func (reporter) filter(_ *state, _ reflect.Type, _, _ reflect.Value) applicableOption {
panic("not implemented")
}
// normalizeOption normalizes the input options such that all Options groups
// are flattened and groups with a single element are reduced to that element.
// Only coreOptions and Options containing coreOptions are allowed.
func normalizeOption(src Option) Option {
switch opts := flattenOptions(nil, Options{src}); len(opts) {
case 0:
return nil
case 1:
return opts[0]
default:
return opts
}
}
// flattenOptions copies all options in src to dst as a flat list.
// Only coreOptions and Options containing coreOptions are allowed.
func flattenOptions(dst, src Options) Options {
for _, opt := range src {
switch opt := opt.(type) {
case nil:
continue
case Options:
dst = flattenOptions(dst, opt)
case coreOption:
dst = append(dst, opt)
default:
panic(fmt.Sprintf("invalid option type: %T", opt))
}
}
return dst
}

@ -0,0 +1,308 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package cmp
import (
"fmt"
"reflect"
"strings"
"unicode"
"unicode/utf8"
)
// Path is a list of PathSteps describing the sequence of operations to get
// from some root type to the current position in the value tree.
// The first Path element is always an operation-less PathStep that exists
// simply to identify the initial type.
//
// When traversing structs with embedded structs, the embedded struct will
// always be accessed as a field before traversing the fields of the
// embedded struct themselves. That is, an exported field from the
// embedded struct will never be accessed directly from the parent struct.
type Path []PathStep
// PathStep is a union-type for specific operations to traverse
// a value's tree structure. Users of this package never need to implement
// these types as values of this type will be returned by this package.
//
// Implementations of this interface are
// StructField, SliceIndex, MapIndex, Indirect, TypeAssertion, and Transform.
type PathStep interface {
String() string
// Type is the resulting type after performing the path step.
Type() reflect.Type
// Values is the resulting values after performing the path step.
// The type of each valid value is guaranteed to be identical to Type.
//
// In some cases, one or both may be invalid or have restrictions:
// • For StructField, both are not interface-able if the current field
// is unexported and the struct type is not explicitly permitted by
// AllowUnexported to traverse unexported fields.
// • For SliceIndex, one may be invalid if an element is missing from
// either the x or y slice.
// • For MapIndex, one may be invalid if an entry is missing from
// either the x or y map.
//
// The provided values must not be mutated.
Values() (vx, vy reflect.Value)
}
var (
_ PathStep = StructField{}
_ PathStep = SliceIndex{}
_ PathStep = MapIndex{}
_ PathStep = Indirect{}
_ PathStep = TypeAssertion{}
_ PathStep = Transform{}
)
func (pa *Path) push(s PathStep) {
*pa = append(*pa, s)
}
func (pa *Path) pop() {
*pa = (*pa)[:len(*pa)-1]
}
// Last returns the last PathStep in the Path.
// If the path is empty, this returns a non-nil PathStep that reports a nil Type.
func (pa Path) Last() PathStep {
return pa.Index(-1)
}
// Index returns the ith step in the Path and supports negative indexing.
// A negative index starts counting from the tail of the Path such that -1
// refers to the last step, -2 refers to the second-to-last step, and so on.
// If index is invalid, this returns a non-nil PathStep that reports a nil Type.
func (pa Path) Index(i int) PathStep {
if i < 0 {
i = len(pa) + i
}
if i < 0 || i >= len(pa) {
return pathStep{}
}
return pa[i]
}
// String returns the simplified path to a node.
// The simplified path only contains struct field accesses.
//
// For example:
// MyMap.MySlices.MyField
func (pa Path) String() string {
var ss []string
for _, s := range pa {
if _, ok := s.(StructField); ok {
ss = append(ss, s.String())
}
}
return strings.TrimPrefix(strings.Join(ss, ""), ".")
}
// GoString returns the path to a specific node using Go syntax.
//
// For example:
// (*root.MyMap["key"].(*mypkg.MyStruct).MySlices)[2][3].MyField
func (pa Path) GoString() string {
var ssPre, ssPost []string
var numIndirect int
for i, s := range pa {
var nextStep PathStep
if i+1 < len(pa) {
nextStep = pa[i+1]
}
switch s := s.(type) {
case Indirect:
numIndirect++
pPre, pPost := "(", ")"
switch nextStep.(type) {
case Indirect:
continue // Next step is indirection, so let them batch up
case StructField:
numIndirect-- // Automatic indirection on struct fields
case nil:
pPre, pPost = "", "" // Last step; no need for parenthesis
}
if numIndirect > 0 {
ssPre = append(ssPre, pPre+strings.Repeat("*", numIndirect))
ssPost = append(ssPost, pPost)
}
numIndirect = 0
continue
case Transform:
ssPre = append(ssPre, s.trans.name+"(")
ssPost = append(ssPost, ")")
continue
}
ssPost = append(ssPost, s.String())
}
for i, j := 0, len(ssPre)-1; i < j; i, j = i+1, j-1 {
ssPre[i], ssPre[j] = ssPre[j], ssPre[i]
}
return strings.Join(ssPre, "") + strings.Join(ssPost, "")
}
type pathStep struct {
typ reflect.Type
vx, vy reflect.Value
}
func (ps pathStep) Type() reflect.Type { return ps.typ }
func (ps pathStep) Values() (vx, vy reflect.Value) { return ps.vx, ps.vy }
func (ps pathStep) String() string {
if ps.typ == nil {
return "<nil>"
}
s := ps.typ.String()
if s == "" || strings.ContainsAny(s, "{}\n") {
return "root" // Type too simple or complex to print
}
return fmt.Sprintf("{%s}", s)
}
// StructField represents a struct field access on a field called Name.
type StructField struct{ *structField }
type structField struct {
pathStep
name string
idx int
// These fields are used for forcibly accessing an unexported field.
// pvx, pvy, and field are only valid if unexported is true.
unexported bool
mayForce bool // Forcibly allow visibility
pvx, pvy reflect.Value // Parent values
field reflect.StructField // Field information
}
func (sf StructField) Type() reflect.Type { return sf.typ }
func (sf StructField) Values() (vx, vy reflect.Value) {
if !sf.unexported {
return sf.vx, sf.vy // CanInterface reports true
}
// Forcibly obtain read-write access to an unexported struct field.
if sf.mayForce {
vx = retrieveUnexportedField(sf.pvx, sf.field)
vy = retrieveUnexportedField(sf.pvy, sf.field)
return vx, vy // CanInterface reports true
}
return sf.vx, sf.vy // CanInterface reports false
}
func (sf StructField) String() string { return fmt.Sprintf(".%s", sf.name) }
// Name is the field name.
func (sf StructField) Name() string { return sf.name }
// Index is the index of the field in the parent struct type.
// See reflect.Type.Field.
func (sf StructField) Index() int { return sf.idx }
// SliceIndex is an index operation on a slice or array at some index Key.
type SliceIndex struct{ *sliceIndex }
type sliceIndex struct {
pathStep
xkey, ykey int
}
func (si SliceIndex) Type() reflect.Type { return si.typ }
func (si SliceIndex) Values() (vx, vy reflect.Value) { return si.vx, si.vy }
func (si SliceIndex) String() string {
switch {
case si.xkey == si.ykey:
return fmt.Sprintf("[%d]", si.xkey)
case si.ykey == -1:
// [5->?] means "I don't know where X[5] went"
return fmt.Sprintf("[%d->?]", si.xkey)
case si.xkey == -1:
// [?->3] means "I don't know where Y[3] came from"
return fmt.Sprintf("[?->%d]", si.ykey)
default:
// [5->3] means "X[5] moved to Y[3]"
return fmt.Sprintf("[%d->%d]", si.xkey, si.ykey)
}
}
// Key is the index key; it may return -1 if in a split state
func (si SliceIndex) Key() int {
if si.xkey != si.ykey {
return -1
}
return si.xkey
}
// SplitKeys are the indexes for indexing into slices in the
// x and y values, respectively. These indexes may differ due to the
// insertion or removal of an element in one of the slices, causing
// all of the indexes to be shifted. If an index is -1, then that
// indicates that the element does not exist in the associated slice.
//
// Key is guaranteed to return -1 if and only if the indexes returned
// by SplitKeys are not the same. SplitKeys will never return -1 for
// both indexes.
func (si SliceIndex) SplitKeys() (ix, iy int) { return si.xkey, si.ykey }
// MapIndex is an index operation on a map at some index Key.
type MapIndex struct{ *mapIndex }
type mapIndex struct {
pathStep
key reflect.Value
}
func (mi MapIndex) Type() reflect.Type { return mi.typ }
func (mi MapIndex) Values() (vx, vy reflect.Value) { return mi.vx, mi.vy }
func (mi MapIndex) String() string { return fmt.Sprintf("[%#v]", mi.key) }
// Key is the value of the map key.
func (mi MapIndex) Key() reflect.Value { return mi.key }
// Indirect represents pointer indirection on the parent type.
type Indirect struct{ *indirect }
type indirect struct {
pathStep
}
func (in Indirect) Type() reflect.Type { return in.typ }
func (in Indirect) Values() (vx, vy reflect.Value) { return in.vx, in.vy }
func (in Indirect) String() string { return "*" }
// TypeAssertion represents a type assertion on an interface.
type TypeAssertion struct{ *typeAssertion }
type typeAssertion struct {
pathStep
}
func (ta TypeAssertion) Type() reflect.Type { return ta.typ }
func (ta TypeAssertion) Values() (vx, vy reflect.Value) { return ta.vx, ta.vy }
func (ta TypeAssertion) String() string { return fmt.Sprintf(".(%v)", ta.typ) }
// Transform is a transformation from the parent type to the current type.
type Transform struct{ *transform }
type transform struct {
pathStep
trans *transformer
}
func (tf Transform) Type() reflect.Type { return tf.typ }
func (tf Transform) Values() (vx, vy reflect.Value) { return tf.vx, tf.vy }
func (tf Transform) String() string { return fmt.Sprintf("%s()", tf.trans.name) }
// Name is the name of the Transformer.
func (tf Transform) Name() string { return tf.trans.name }
// Func is the function pointer to the transformer function.
func (tf Transform) Func() reflect.Value { return tf.trans.fnc }
// Option returns the originally constructed Transformer option.
// The == operator can be used to detect the exact option used.
func (tf Transform) Option() Option { return tf.trans }
// isExported reports whether the identifier is exported.
func isExported(id string) bool {
r, _ := utf8.DecodeRuneInString(id)
return unicode.IsUpper(r)
}

@ -0,0 +1,51 @@
// Copyright 2017, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package cmp
// defaultReporter implements the reporter interface.
//
// As Equal serially calls the PushStep, Report, and PopStep methods, the
// defaultReporter constructs a tree-based representation of the compared value
// and the result of each comparison (see valueNode).
//
// When the String method is called, the FormatDiff method transforms the
// valueNode tree into a textNode tree, which is a tree-based representation
// of the textual output (see textNode).
//
// Lastly, the textNode.String method produces the final report as a string.
type defaultReporter struct {
root *valueNode
curr *valueNode
}
func (r *defaultReporter) PushStep(ps PathStep) {
r.curr = r.curr.PushStep(ps)
if r.root == nil {
r.root = r.curr
}
}
func (r *defaultReporter) Report(rs Result) {
r.curr.Report(rs)
}
func (r *defaultReporter) PopStep() {
r.curr = r.curr.PopStep()
}
// String provides a full report of the differences detected as a structured
// literal in pseudo-Go syntax. String may only be called after the entire tree
// has been traversed.
func (r *defaultReporter) String() string {
assert(r.root != nil && r.curr == nil)
if r.root.NumDiff == 0 {
return ""
}
return formatOptions{}.FormatDiff(r.root).String()
}
func assert(ok bool) {
if !ok {
panic("assertion failure")
}
}

@ -0,0 +1,296 @@
// Copyright 2019, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package cmp
import (
"fmt"
"reflect"
"github.com/google/go-cmp/cmp/internal/value"
)
// TODO: Enforce limits?
// * Enforce maximum number of records to print per node?
// * Enforce maximum size in bytes allowed?
// * As a heuristic, use less verbosity for equal nodes than unequal nodes.
// TODO: Enforce unique outputs?
// * Avoid Stringer methods if it results in same output?
// * Print pointer address if outputs still equal?
// numContextRecords is the number of surrounding equal records to print.
const numContextRecords = 2
type diffMode byte
const (
diffUnknown diffMode = 0
diffIdentical diffMode = ' '
diffRemoved diffMode = '-'
diffInserted diffMode = '+'
)
type typeMode int
const (
// emitType always prints the type.
emitType typeMode = iota
// elideType never prints the type.
elideType
// autoType prints the type only for composite kinds
// (i.e., structs, slices, arrays, and maps).
autoType
)
type formatOptions struct {
// DiffMode controls the output mode of FormatDiff.
//
// If diffUnknown, then produce a diff of the x and y values.
// If diffIdentical, then emit values as if they were equal.
// If diffRemoved, then only emit x values (ignoring y values).
// If diffInserted, then only emit y values (ignoring x values).
DiffMode diffMode
// TypeMode controls whether to print the type for the current node.
//
// As a general rule of thumb, we always print the type of the next node
// after an interface, and always elide the type of the next node after
// a slice or map node.
TypeMode typeMode
// formatValueOptions are options specific to printing reflect.Values.
formatValueOptions
}
func (opts formatOptions) WithDiffMode(d diffMode) formatOptions {
opts.DiffMode = d
return opts
}
func (opts formatOptions) WithTypeMode(t typeMode) formatOptions {
opts.TypeMode = t
return opts
}
// FormatDiff converts a valueNode tree into a textNode tree, where the later
// is a textual representation of the differences detected in the former.
func (opts formatOptions) FormatDiff(v *valueNode) textNode {
// Check whether we have specialized formatting for this node.
// This is not necessary, but helpful for producing more readable outputs.
if opts.CanFormatDiffSlice(v) {
return opts.FormatDiffSlice(v)
}
// For leaf nodes, format the value based on the reflect.Values alone.
if v.MaxDepth == 0 {
switch opts.DiffMode {
case diffUnknown, diffIdentical:
// Format Equal.
if v.NumDiff == 0 {
outx := opts.FormatValue(v.ValueX, visitedPointers{})
outy := opts.FormatValue(v.ValueY, visitedPointers{})
if v.NumIgnored > 0 && v.NumSame == 0 {
return textEllipsis
} else if outx.Len() < outy.Len() {
return outx
} else {
return outy
}
}
// Format unequal.
assert(opts.DiffMode == diffUnknown)
var list textList
outx := opts.WithTypeMode(elideType).FormatValue(v.ValueX, visitedPointers{})
outy := opts.WithTypeMode(elideType).FormatValue(v.ValueY, visitedPointers{})
if outx != nil {
list = append(list, textRecord{Diff: '-', Value: outx})
}
if outy != nil {
list = append(list, textRecord{Diff: '+', Value: outy})
}
return opts.WithTypeMode(emitType).FormatType(v.Type, list)
case diffRemoved:
return opts.FormatValue(v.ValueX, visitedPointers{})
case diffInserted:
return opts.FormatValue(v.ValueY, visitedPointers{})
default:
panic("invalid diff mode")
}
}
// Descend into the child value node.
if v.TransformerName != "" {
out := opts.WithTypeMode(emitType).FormatDiff(v.Value)
out = textWrap{"Inverse(" + v.TransformerName + ", ", out, ")"}
return opts.FormatType(v.Type, out)
} else {
switch k := v.Type.Kind(); k {
case reflect.Struct, reflect.Array, reflect.Slice, reflect.Map:
return opts.FormatType(v.Type, opts.formatDiffList(v.Records, k))
case reflect.Ptr:
return textWrap{"&", opts.FormatDiff(v.Value), ""}
case reflect.Interface:
return opts.WithTypeMode(emitType).FormatDiff(v.Value)
default:
panic(fmt.Sprintf("%v cannot have children", k))
}
}
}
func (opts formatOptions) formatDiffList(recs []reportRecord, k reflect.Kind) textNode {
// Derive record name based on the data structure kind.
var name string
var formatKey func(reflect.Value) string
switch k {
case reflect.Struct:
name = "field"
opts = opts.WithTypeMode(autoType)
formatKey = func(v reflect.Value) string { return v.String() }
case reflect.Slice, reflect.Array:
name = "element"
opts = opts.WithTypeMode(elideType)
formatKey = func(reflect.Value) string { return "" }
case reflect.Map:
name = "entry"
opts = opts.WithTypeMode(elideType)
formatKey = formatMapKey
}
// Handle unification.
switch opts.DiffMode {
case diffIdentical, diffRemoved, diffInserted:
var list textList
var deferredEllipsis bool // Add final "..." to indicate records were dropped
for _, r := range recs {
// Elide struct fields that are zero value.
if k == reflect.Struct {
var isZero bool
switch opts.DiffMode {
case diffIdentical:
isZero = value.IsZero(r.Value.ValueX) || value.IsZero(r.Value.ValueY)
case diffRemoved:
isZero = value.IsZero(r.Value.ValueX)
case diffInserted:
isZero = value.IsZero(r.Value.ValueY)
}
if isZero {
continue
}
}
// Elide ignored nodes.
if r.Value.NumIgnored > 0 && r.Value.NumSame+r.Value.NumDiff == 0 {
deferredEllipsis = !(k == reflect.Slice || k == reflect.Array)
if !deferredEllipsis {
list.AppendEllipsis(diffStats{})
}
continue
}
if out := opts.FormatDiff(r.Value); out != nil {
list = append(list, textRecord{Key: formatKey(r.Key), Value: out})
}
}
if deferredEllipsis {
list.AppendEllipsis(diffStats{})
}
return textWrap{"{", list, "}"}
case diffUnknown:
default:
panic("invalid diff mode")
}
// Handle differencing.
var list textList
groups := coalesceAdjacentRecords(name, recs)
for i, ds := range groups {
// Handle equal records.
if ds.NumDiff() == 0 {
// Compute the number of leading and trailing records to print.
var numLo, numHi int
numEqual := ds.NumIgnored + ds.NumIdentical
for numLo < numContextRecords && numLo+numHi < numEqual && i != 0 {
if r := recs[numLo].Value; r.NumIgnored > 0 && r.NumSame+r.NumDiff == 0 {
break
}
numLo++
}
for numHi < numContextRecords && numLo+numHi < numEqual && i != len(groups)-1 {
if r := recs[numEqual-numHi-1].Value; r.NumIgnored > 0 && r.NumSame+r.NumDiff == 0 {
break
}
numHi++
}
if numEqual-(numLo+numHi) == 1 && ds.NumIgnored == 0 {
numHi++ // Avoid pointless coalescing of a single equal record
}
// Format the equal values.
for _, r := range recs[:numLo] {
out := opts.WithDiffMode(diffIdentical).FormatDiff(r.Value)
list = append(list, textRecord{Key: formatKey(r.Key), Value: out})
}
if numEqual > numLo+numHi {
ds.NumIdentical -= numLo + numHi
list.AppendEllipsis(ds)
}
for _, r := range recs[numEqual-numHi : numEqual] {
out := opts.WithDiffMode(diffIdentical).FormatDiff(r.Value)
list = append(list, textRecord{Key: formatKey(r.Key), Value: out})
}
recs = recs[numEqual:]
continue
}
// Handle unequal records.
for _, r := range recs[:ds.NumDiff()] {
switch {
case opts.CanFormatDiffSlice(r.Value):
out := opts.FormatDiffSlice(r.Value)
list = append(list, textRecord{Key: formatKey(r.Key), Value: out})
case r.Value.NumChildren == r.Value.MaxDepth:
outx := opts.WithDiffMode(diffRemoved).FormatDiff(r.Value)
outy := opts.WithDiffMode(diffInserted).FormatDiff(r.Value)
if outx != nil {
list = append(list, textRecord{Diff: diffRemoved, Key: formatKey(r.Key), Value: outx})
}
if outy != nil {
list = append(list, textRecord{Diff: diffInserted, Key: formatKey(r.Key), Value: outy})
}
default:
out := opts.FormatDiff(r.Value)
list = append(list, textRecord{Key: formatKey(r.Key), Value: out})
}
}
recs = recs[ds.NumDiff():]
}
assert(len(recs) == 0)
return textWrap{"{", list, "}"}
}
// coalesceAdjacentRecords coalesces the list of records into groups of
// adjacent equal, or unequal counts.
func coalesceAdjacentRecords(name string, recs []reportRecord) (groups []diffStats) {
var prevCase int // Arbitrary index into which case last occurred
lastStats := func(i int) *diffStats {
if prevCase != i {
groups = append(groups, diffStats{Name: name})
prevCase = i
}
return &groups[len(groups)-1]
}
for _, r := range recs {
switch rv := r.Value; {
case rv.NumIgnored > 0 && rv.NumSame+rv.NumDiff == 0:
lastStats(1).NumIgnored++
case rv.NumDiff == 0:
lastStats(1).NumIdentical++
case rv.NumDiff > 0 && !rv.ValueY.IsValid():
lastStats(2).NumRemoved++
case rv.NumDiff > 0 && !rv.ValueX.IsValid():
lastStats(2).NumInserted++
default:
lastStats(2).NumModified++
}
}
return groups
}

@ -0,0 +1,278 @@
// Copyright 2019, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package cmp
import (
"fmt"
"reflect"
"strconv"
"strings"
"unicode"
"github.com/google/go-cmp/cmp/internal/flags"
"github.com/google/go-cmp/cmp/internal/value"
)
type formatValueOptions struct {
// AvoidStringer controls whether to avoid calling custom stringer
// methods like error.Error or fmt.Stringer.String.
AvoidStringer bool
// ShallowPointers controls whether to avoid descending into pointers.
// Useful when printing map keys, where pointer comparison is performed
// on the pointer address rather than the pointed-at value.
ShallowPointers bool
// PrintAddresses controls whether to print the address of all pointers,
// slice elements, and maps.
PrintAddresses bool
}
// FormatType prints the type as if it were wrapping s.
// This may return s as-is depending on the current type and TypeMode mode.
func (opts formatOptions) FormatType(t reflect.Type, s textNode) textNode {
// Check whether to emit the type or not.
switch opts.TypeMode {
case autoType:
switch t.Kind() {
case reflect.Struct, reflect.Slice, reflect.Array, reflect.Map:
if s.Equal(textNil) {
return s
}
default:
return s
}
case elideType:
return s
}
// Determine the type label, applying special handling for unnamed types.
typeName := t.String()
if t.Name() == "" {
// According to Go grammar, certain type literals contain symbols that
// do not strongly bind to the next lexicographical token (e.g., *T).
switch t.Kind() {
case reflect.Chan, reflect.Func, reflect.Ptr:
typeName = "(" + typeName + ")"
}
typeName = strings.Replace(typeName, "struct {", "struct{", -1)
typeName = strings.Replace(typeName, "interface {", "interface{", -1)
}
// Avoid wrap the value in parenthesis if unnecessary.
if s, ok := s.(textWrap); ok {
hasParens := strings.HasPrefix(s.Prefix, "(") && strings.HasSuffix(s.Suffix, ")")
hasBraces := strings.HasPrefix(s.Prefix, "{") && strings.HasSuffix(s.Suffix, "}")
if hasParens || hasBraces {
return textWrap{typeName, s, ""}
}
}
return textWrap{typeName + "(", s, ")"}
}
// FormatValue prints the reflect.Value, taking extra care to avoid descending
// into pointers already in m. As pointers are visited, m is also updated.
func (opts formatOptions) FormatValue(v reflect.Value, m visitedPointers) (out textNode) {
if !v.IsValid() {
return nil
}
t := v.Type()
// Check whether there is an Error or String method to call.
if !opts.AvoidStringer && v.CanInterface() {
// Avoid calling Error or String methods on nil receivers since many
// implementations crash when doing so.
if (t.Kind() != reflect.Ptr && t.Kind() != reflect.Interface) || !v.IsNil() {
switch v := v.Interface().(type) {
case error:
return textLine("e" + formatString(v.Error()))
case fmt.Stringer:
return textLine("s" + formatString(v.String()))
}
}
}
// Check whether to explicitly wrap the result with the type.
var skipType bool
defer func() {
if !skipType {
out = opts.FormatType(t, out)
}
}()
var ptr string
switch t.Kind() {
case reflect.Bool:
return textLine(fmt.Sprint(v.Bool()))
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
return textLine(fmt.Sprint(v.Int()))
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
// Unnamed uints are usually bytes or words, so use hexadecimal.
if t.PkgPath() == "" || t.Kind() == reflect.Uintptr {
return textLine(formatHex(v.Uint()))
}
return textLine(fmt.Sprint(v.Uint()))
case reflect.Float32, reflect.Float64:
return textLine(fmt.Sprint(v.Float()))
case reflect.Complex64, reflect.Complex128:
return textLine(fmt.Sprint(v.Complex()))
case reflect.String:
return textLine(formatString(v.String()))
case reflect.UnsafePointer, reflect.Chan, reflect.Func:
return textLine(formatPointer(v))
case reflect.Struct:
var list textList
for i := 0; i < v.NumField(); i++ {
vv := v.Field(i)
if value.IsZero(vv) {
continue // Elide fields with zero values
}
s := opts.WithTypeMode(autoType).FormatValue(vv, m)
list = append(list, textRecord{Key: t.Field(i).Name, Value: s})
}
return textWrap{"{", list, "}"}
case reflect.Slice:
if v.IsNil() {
return textNil
}
if opts.PrintAddresses {
ptr = formatPointer(v)
}
fallthrough
case reflect.Array:
var list textList
for i := 0; i < v.Len(); i++ {
vi := v.Index(i)
if vi.CanAddr() { // Check for cyclic elements
p := vi.Addr()
if m.Visit(p) {
var out textNode
out = textLine(formatPointer(p))
out = opts.WithTypeMode(emitType).FormatType(p.Type(), out)
out = textWrap{"*", out, ""}
list = append(list, textRecord{Value: out})
continue
}
}
s := opts.WithTypeMode(elideType).FormatValue(vi, m)
list = append(list, textRecord{Value: s})
}
return textWrap{ptr + "{", list, "}"}
case reflect.Map:
if v.IsNil() {
return textNil
}
if m.Visit(v) {
return textLine(formatPointer(v))
}
var list textList
for _, k := range value.SortKeys(v.MapKeys()) {
sk := formatMapKey(k)
sv := opts.WithTypeMode(elideType).FormatValue(v.MapIndex(k), m)
list = append(list, textRecord{Key: sk, Value: sv})
}
if opts.PrintAddresses {
ptr = formatPointer(v)
}
return textWrap{ptr + "{", list, "}"}
case reflect.Ptr:
if v.IsNil() {
return textNil
}
if m.Visit(v) || opts.ShallowPointers {
return textLine(formatPointer(v))
}
if opts.PrintAddresses {
ptr = formatPointer(v)
}
skipType = true // Let the underlying value print the type instead
return textWrap{"&" + ptr, opts.FormatValue(v.Elem(), m), ""}
case reflect.Interface:
if v.IsNil() {
return textNil
}
// Interfaces accept different concrete types,
// so configure the underlying value to explicitly print the type.
skipType = true // Print the concrete type instead
return opts.WithTypeMode(emitType).FormatValue(v.Elem(), m)
default:
panic(fmt.Sprintf("%v kind not handled", v.Kind()))
}
}
// formatMapKey formats v as if it were a map key.
// The result is guaranteed to be a single line.
func formatMapKey(v reflect.Value) string {
var opts formatOptions
opts.TypeMode = elideType
opts.ShallowPointers = true
s := opts.FormatValue(v, visitedPointers{}).String()
return strings.TrimSpace(s)
}
// formatString prints s as a double-quoted or backtick-quoted string.
func formatString(s string) string {
// Use quoted string if it the same length as a raw string literal.
// Otherwise, attempt to use the raw string form.
qs := strconv.Quote(s)
if len(qs) == 1+len(s)+1 {
return qs
}
// Disallow newlines to ensure output is a single line.
// Only allow printable runes for readability purposes.
rawInvalid := func(r rune) bool {
return r == '`' || r == '\n' || !(unicode.IsPrint(r) || r == '\t')
}
if strings.IndexFunc(s, rawInvalid) < 0 {
return "`" + s + "`"
}
return qs
}
// formatHex prints u as a hexadecimal integer in Go notation.
func formatHex(u uint64) string {
var f string
switch {
case u <= 0xff:
f = "0x%02x"
case u <= 0xffff:
f = "0x%04x"
case u <= 0xffffff:
f = "0x%06x"
case u <= 0xffffffff:
f = "0x%08x"
case u <= 0xffffffffff:
f = "0x%010x"
case u <= 0xffffffffffff:
f = "0x%012x"
case u <= 0xffffffffffffff:
f = "0x%014x"
case u <= 0xffffffffffffffff:
f = "0x%016x"
}
return fmt.Sprintf(f, u)
}
// formatPointer prints the address of the pointer.
func formatPointer(v reflect.Value) string {
p := v.Pointer()
if flags.Deterministic {
p = 0xdeadf00f // Only used for stable testing purposes
}
return fmt.Sprintf("⟪0x%x⟫", p)
}
type visitedPointers map[value.Pointer]struct{}
// Visit inserts pointer v into the visited map and reports whether it had
// already been visited before.
func (m visitedPointers) Visit(v reflect.Value) bool {
p := value.PointerOf(v)
_, visited := m[p]
m[p] = struct{}{}
return visited
}

@ -0,0 +1,333 @@
// Copyright 2019, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package cmp
import (
"bytes"
"fmt"
"reflect"
"strings"
"unicode"
"unicode/utf8"
"github.com/google/go-cmp/cmp/internal/diff"
)
// CanFormatDiffSlice reports whether we support custom formatting for nodes
// that are slices of primitive kinds or strings.
func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool {
switch {
case opts.DiffMode != diffUnknown:
return false // Must be formatting in diff mode
case v.NumDiff == 0:
return false // No differences detected
case v.NumIgnored+v.NumCompared+v.NumTransformed > 0:
// TODO: Handle the case where someone uses bytes.Equal on a large slice.
return false // Some custom option was used to determined equality
case !v.ValueX.IsValid() || !v.ValueY.IsValid():
return false // Both values must be valid
}
switch t := v.Type; t.Kind() {
case reflect.String:
case reflect.Array, reflect.Slice:
// Only slices of primitive types have specialized handling.
switch t.Elem().Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
default:
return false
}
// If a sufficient number of elements already differ,
// use specialized formatting even if length requirement is not met.
if v.NumDiff > v.NumSame {
return true
}
default:
return false
}
// Use specialized string diffing for longer slices or strings.
const minLength = 64
return v.ValueX.Len() >= minLength && v.ValueY.Len() >= minLength
}
// FormatDiffSlice prints a diff for the slices (or strings) represented by v.
// This provides custom-tailored logic to make printing of differences in
// textual strings and slices of primitive kinds more readable.
func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
assert(opts.DiffMode == diffUnknown)
t, vx, vy := v.Type, v.ValueX, v.ValueY
// Auto-detect the type of the data.
var isLinedText, isText, isBinary bool
var sx, sy string
switch {
case t.Kind() == reflect.String:
sx, sy = vx.String(), vy.String()
isText = true // Initial estimate, verify later
case t.Kind() == reflect.Slice && t.Elem() == reflect.TypeOf(byte(0)):
sx, sy = string(vx.Bytes()), string(vy.Bytes())
isBinary = true // Initial estimate, verify later
case t.Kind() == reflect.Array:
// Arrays need to be addressable for slice operations to work.
vx2, vy2 := reflect.New(t).Elem(), reflect.New(t).Elem()
vx2.Set(vx)
vy2.Set(vy)
vx, vy = vx2, vy2
}
if isText || isBinary {
var numLines, lastLineIdx, maxLineLen int
isBinary = false
for i, r := range sx + sy {
if !(unicode.IsPrint(r) || unicode.IsSpace(r)) || r == utf8.RuneError {
isBinary = true
break
}
if r == '\n' {
if maxLineLen < i-lastLineIdx {
maxLineLen = i - lastLineIdx
}
lastLineIdx = i + 1
numLines++
}
}
isText = !isBinary
isLinedText = isText && numLines >= 4 && maxLineLen <= 256
}
// Format the string into printable records.
var list textList
var delim string
switch {
// If the text appears to be multi-lined text,
// then perform differencing across individual lines.
case isLinedText:
ssx := strings.Split(sx, "\n")
ssy := strings.Split(sy, "\n")
list = opts.formatDiffSlice(
reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line",
func(v reflect.Value, d diffMode) textRecord {
s := formatString(v.Index(0).String())
return textRecord{Diff: d, Value: textLine(s)}
},
)
delim = "\n"
// If the text appears to be single-lined text,
// then perform differencing in approximately fixed-sized chunks.
// The output is printed as quoted strings.
case isText:
list = opts.formatDiffSlice(
reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte",
func(v reflect.Value, d diffMode) textRecord {
s := formatString(v.String())
return textRecord{Diff: d, Value: textLine(s)}
},
)
delim = ""
// If the text appears to be binary data,
// then perform differencing in approximately fixed-sized chunks.
// The output is inspired by hexdump.
case isBinary:
list = opts.formatDiffSlice(
reflect.ValueOf(sx), reflect.ValueOf(sy), 16, "byte",
func(v reflect.Value, d diffMode) textRecord {
var ss []string
for i := 0; i < v.Len(); i++ {
ss = append(ss, formatHex(v.Index(i).Uint()))
}
s := strings.Join(ss, ", ")
comment := commentString(fmt.Sprintf("%c|%v|", d, formatASCII(v.String())))
return textRecord{Diff: d, Value: textLine(s), Comment: comment}
},
)
// For all other slices of primitive types,
// then perform differencing in approximately fixed-sized chunks.
// The size of each chunk depends on the width of the element kind.
default:
var chunkSize int
if t.Elem().Kind() == reflect.Bool {
chunkSize = 16
} else {
switch t.Elem().Bits() {
case 8:
chunkSize = 16
case 16:
chunkSize = 12
case 32:
chunkSize = 8
default:
chunkSize = 8
}
}
list = opts.formatDiffSlice(
vx, vy, chunkSize, t.Elem().Kind().String(),
func(v reflect.Value, d diffMode) textRecord {
var ss []string
for i := 0; i < v.Len(); i++ {
switch t.Elem().Kind() {
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
ss = append(ss, fmt.Sprint(v.Index(i).Int()))
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
ss = append(ss, formatHex(v.Index(i).Uint()))
case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
ss = append(ss, fmt.Sprint(v.Index(i).Interface()))
}
}
s := strings.Join(ss, ", ")
return textRecord{Diff: d, Value: textLine(s)}
},
)
}
// Wrap the output with appropriate type information.
var out textNode = textWrap{"{", list, "}"}
if !isText {
// The "{...}" byte-sequence literal is not valid Go syntax for strings.
// Emit the type for extra clarity (e.g. "string{...}").
if t.Kind() == reflect.String {
opts = opts.WithTypeMode(emitType)
}
return opts.FormatType(t, out)
}
switch t.Kind() {
case reflect.String:
out = textWrap{"strings.Join(", out, fmt.Sprintf(", %q)", delim)}
if t != reflect.TypeOf(string("")) {
out = opts.FormatType(t, out)
}
case reflect.Slice:
out = textWrap{"bytes.Join(", out, fmt.Sprintf(", %q)", delim)}
if t != reflect.TypeOf([]byte(nil)) {
out = opts.FormatType(t, out)
}
}
return out
}
// formatASCII formats s as an ASCII string.
// This is useful for printing binary strings in a semi-legible way.
func formatASCII(s string) string {
b := bytes.Repeat([]byte{'.'}, len(s))
for i := 0; i < len(s); i++ {
if ' ' <= s[i] && s[i] <= '~' {
b[i] = s[i]
}
}
return string(b)
}
func (opts formatOptions) formatDiffSlice(
vx, vy reflect.Value, chunkSize int, name string,
makeRec func(reflect.Value, diffMode) textRecord,
) (list textList) {
es := diff.Difference(vx.Len(), vy.Len(), func(ix int, iy int) diff.Result {
return diff.BoolResult(vx.Index(ix).Interface() == vy.Index(iy).Interface())
})
appendChunks := func(v reflect.Value, d diffMode) int {
n0 := v.Len()
for v.Len() > 0 {
n := chunkSize
if n > v.Len() {
n = v.Len()
}
list = append(list, makeRec(v.Slice(0, n), d))
v = v.Slice(n, v.Len())
}
return n0 - v.Len()
}
groups := coalesceAdjacentEdits(name, es)
groups = coalesceInterveningIdentical(groups, chunkSize/4)
for i, ds := range groups {
// Print equal.
if ds.NumDiff() == 0 {
// Compute the number of leading and trailing equal bytes to print.
var numLo, numHi int
numEqual := ds.NumIgnored + ds.NumIdentical
for numLo < chunkSize*numContextRecords && numLo+numHi < numEqual && i != 0 {
numLo++
}
for numHi < chunkSize*numContextRecords && numLo+numHi < numEqual && i != len(groups)-1 {
numHi++
}
if numEqual-(numLo+numHi) <= chunkSize && ds.NumIgnored == 0 {
numHi = numEqual - numLo // Avoid pointless coalescing of single equal row
}
// Print the equal bytes.
appendChunks(vx.Slice(0, numLo), diffIdentical)
if numEqual > numLo+numHi {
ds.NumIdentical -= numLo + numHi
list.AppendEllipsis(ds)
}
appendChunks(vx.Slice(numEqual-numHi, numEqual), diffIdentical)
vx = vx.Slice(numEqual, vx.Len())
vy = vy.Slice(numEqual, vy.Len())
continue
}
// Print unequal.
nx := appendChunks(vx.Slice(0, ds.NumIdentical+ds.NumRemoved+ds.NumModified), diffRemoved)
vx = vx.Slice(nx, vx.Len())
ny := appendChunks(vy.Slice(0, ds.NumIdentical+ds.NumInserted+ds.NumModified), diffInserted)
vy = vy.Slice(ny, vy.Len())
}
assert(vx.Len() == 0 && vy.Len() == 0)
return list
}
// coalesceAdjacentEdits coalesces the list of edits into groups of adjacent
// equal or unequal counts.
func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) {
var prevCase int // Arbitrary index into which case last occurred
lastStats := func(i int) *diffStats {
if prevCase != i {
groups = append(groups, diffStats{Name: name})
prevCase = i
}
return &groups[len(groups)-1]
}
for _, e := range es {
switch e {
case diff.Identity:
lastStats(1).NumIdentical++
case diff.UniqueX:
lastStats(2).NumRemoved++
case diff.UniqueY:
lastStats(2).NumInserted++
case diff.Modified:
lastStats(2).NumModified++
}
}
return groups
}
// coalesceInterveningIdentical coalesces sufficiently short (<= windowSize)
// equal groups into adjacent unequal groups that currently result in a
// dual inserted/removed printout. This acts as a high-pass filter to smooth
// out high-frequency changes within the windowSize.
func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats {
groups, groupsOrig := groups[:0], groups
for i, ds := range groupsOrig {
if len(groups) >= 2 && ds.NumDiff() > 0 {
prev := &groups[len(groups)-2] // Unequal group
curr := &groups[len(groups)-1] // Equal group
next := &groupsOrig[i] // Unequal group
hadX, hadY := prev.NumRemoved > 0, prev.NumInserted > 0
hasX, hasY := next.NumRemoved > 0, next.NumInserted > 0
if ((hadX || hasX) && (hadY || hasY)) && curr.NumIdentical <= windowSize {
*prev = prev.Append(*curr).Append(*next)
groups = groups[:len(groups)-1] // Truncate off equal group
continue
}
}
groups = append(groups, ds)
}
return groups
}

@ -0,0 +1,387 @@
// Copyright 2019, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package cmp
import (
"bytes"
"fmt"
"math/rand"
"strings"
"time"
"github.com/google/go-cmp/cmp/internal/flags"
)
var randBool = rand.New(rand.NewSource(time.Now().Unix())).Intn(2) == 0
type indentMode int
func (n indentMode) appendIndent(b []byte, d diffMode) []byte {
// The output of Diff is documented as being unstable to provide future
// flexibility in changing the output for more humanly readable reports.
// This logic intentionally introduces instability to the exact output
// so that users can detect accidental reliance on stability early on,
// rather than much later when an actual change to the format occurs.
if flags.Deterministic || randBool {
// Use regular spaces (U+0020).
switch d {
case diffUnknown, diffIdentical:
b = append(b, " "...)
case diffRemoved:
b = append(b, "- "...)
case diffInserted:
b = append(b, "+ "...)
}
} else {
// Use non-breaking spaces (U+00a0).
switch d {
case diffUnknown, diffIdentical:
b = append(b, "  "...)
case diffRemoved:
b = append(b, "- "...)
case diffInserted:
b = append(b, "+ "...)
}
}
return repeatCount(n).appendChar(b, '\t')
}
type repeatCount int
func (n repeatCount) appendChar(b []byte, c byte) []byte {
for ; n > 0; n-- {
b = append(b, c)
}
return b
}
// textNode is a simplified tree-based representation of structured text.
// Possible node types are textWrap, textList, or textLine.
type textNode interface {
// Len reports the length in bytes of a single-line version of the tree.
// Nested textRecord.Diff and textRecord.Comment fields are ignored.
Len() int
// Equal reports whether the two trees are structurally identical.
// Nested textRecord.Diff and textRecord.Comment fields are compared.
Equal(textNode) bool
// String returns the string representation of the text tree.
// It is not guaranteed that len(x.String()) == x.Len(),
// nor that x.String() == y.String() implies that x.Equal(y).
String() string
// formatCompactTo formats the contents of the tree as a single-line string
// to the provided buffer. Any nested textRecord.Diff and textRecord.Comment
// fields are ignored.
//
// However, not all nodes in the tree should be collapsed as a single-line.
// If a node can be collapsed as a single-line, it is replaced by a textLine
// node. Since the top-level node cannot replace itself, this also returns
// the current node itself.
//
// This does not mutate the receiver.
formatCompactTo([]byte, diffMode) ([]byte, textNode)
// formatExpandedTo formats the contents of the tree as a multi-line string
// to the provided buffer. In order for column alignment to operate well,
// formatCompactTo must be called before calling formatExpandedTo.
formatExpandedTo([]byte, diffMode, indentMode) []byte
}
// textWrap is a wrapper that concatenates a prefix and/or a suffix
// to the underlying node.
type textWrap struct {
Prefix string // e.g., "bytes.Buffer{"
Value textNode // textWrap | textList | textLine
Suffix string // e.g., "}"
}
func (s textWrap) Len() int {
return len(s.Prefix) + s.Value.Len() + len(s.Suffix)
}
func (s1 textWrap) Equal(s2 textNode) bool {
if s2, ok := s2.(textWrap); ok {
return s1.Prefix == s2.Prefix && s1.Value.Equal(s2.Value) && s1.Suffix == s2.Suffix
}
return false
}
func (s textWrap) String() string {
var d diffMode
var n indentMode
_, s2 := s.formatCompactTo(nil, d)
b := n.appendIndent(nil, d) // Leading indent
b = s2.formatExpandedTo(b, d, n) // Main body
b = append(b, '\n') // Trailing newline
return string(b)
}
func (s textWrap) formatCompactTo(b []byte, d diffMode) ([]byte, textNode) {
n0 := len(b) // Original buffer length
b = append(b, s.Prefix...)
b, s.Value = s.Value.formatCompactTo(b, d)
b = append(b, s.Suffix...)
if _, ok := s.Value.(textLine); ok {
return b, textLine(b[n0:])
}
return b, s
}
func (s textWrap) formatExpandedTo(b []byte, d diffMode, n indentMode) []byte {
b = append(b, s.Prefix...)
b = s.Value.formatExpandedTo(b, d, n)
b = append(b, s.Suffix...)
return b
}
// textList is a comma-separated list of textWrap or textLine nodes.
// The list may be formatted as multi-lines or single-line at the discretion
// of the textList.formatCompactTo method.
type textList []textRecord
type textRecord struct {
Diff diffMode // e.g., 0 or '-' or '+'
Key string // e.g., "MyField"
Value textNode // textWrap | textLine
Comment fmt.Stringer // e.g., "6 identical fields"
}
// AppendEllipsis appends a new ellipsis node to the list if none already
// exists at the end. If cs is non-zero it coalesces the statistics with the
// previous diffStats.
func (s *textList) AppendEllipsis(ds diffStats) {
hasStats := ds != diffStats{}
if len(*s) == 0 || !(*s)[len(*s)-1].Value.Equal(textEllipsis) {
if hasStats {
*s = append(*s, textRecord{Value: textEllipsis, Comment: ds})
} else {
*s = append(*s, textRecord{Value: textEllipsis})
}
return
}
if hasStats {
(*s)[len(*s)-1].Comment = (*s)[len(*s)-1].Comment.(diffStats).Append(ds)
}
}
func (s textList) Len() (n int) {
for i, r := range s {
n += len(r.Key)
if r.Key != "" {
n += len(": ")
}
n += r.Value.Len()
if i < len(s)-1 {
n += len(", ")
}
}
return n
}
func (s1 textList) Equal(s2 textNode) bool {
if s2, ok := s2.(textList); ok {
if len(s1) != len(s2) {
return false
}
for i := range s1 {
r1, r2 := s1[i], s2[i]
if !(r1.Diff == r2.Diff && r1.Key == r2.Key && r1.Value.Equal(r2.Value) && r1.Comment == r2.Comment) {
return false
}
}
return true
}
return false
}
func (s textList) String() string {
return textWrap{"{", s, "}"}.String()
}
func (s textList) formatCompactTo(b []byte, d diffMode) ([]byte, textNode) {
s = append(textList(nil), s...) // Avoid mutating original
// Determine whether we can collapse this list as a single line.
n0 := len(b) // Original buffer length
var multiLine bool
for i, r := range s {
if r.Diff == diffInserted || r.Diff == diffRemoved {
multiLine = true
}
b = append(b, r.Key...)
if r.Key != "" {
b = append(b, ": "...)
}
b, s[i].Value = r.Value.formatCompactTo(b, d|r.Diff)
if _, ok := s[i].Value.(textLine); !ok {
multiLine = true
}
if r.Comment != nil {
multiLine = true
}
if i < len(s)-1 {
b = append(b, ", "...)
}
}
// Force multi-lined output when printing a removed/inserted node that
// is sufficiently long.
if (d == diffInserted || d == diffRemoved) && len(b[n0:]) > 80 {
multiLine = true
}
if !multiLine {
return b, textLine(b[n0:])
}
return b, s
}
func (s textList) formatExpandedTo(b []byte, d diffMode, n indentMode) []byte {
alignKeyLens := s.alignLens(
func(r textRecord) bool {
_, isLine := r.Value.(textLine)
return r.Key == "" || !isLine
},
func(r textRecord) int { return len(r.Key) },
)
alignValueLens := s.alignLens(
func(r textRecord) bool {
_, isLine := r.Value.(textLine)
return !isLine || r.Value.Equal(textEllipsis) || r.Comment == nil
},
func(r textRecord) int { return len(r.Value.(textLine)) },
)
// Format the list as a multi-lined output.
n++
for i, r := range s {
b = n.appendIndent(append(b, '\n'), d|r.Diff)
if r.Key != "" {
b = append(b, r.Key+": "...)
}
b = alignKeyLens[i].appendChar(b, ' ')
b = r.Value.formatExpandedTo(b, d|r.Diff, n)
if !r.Value.Equal(textEllipsis) {
b = append(b, ',')
}
b = alignValueLens[i].appendChar(b, ' ')
if r.Comment != nil {
b = append(b, " // "+r.Comment.String()...)
}
}
n--
return n.appendIndent(append(b, '\n'), d)
}
func (s textList) alignLens(
skipFunc func(textRecord) bool,
lenFunc func(textRecord) int,
) []repeatCount {
var startIdx, endIdx, maxLen int
lens := make([]repeatCount, len(s))
for i, r := range s {
if skipFunc(r) {
for j := startIdx; j < endIdx && j < len(s); j++ {
lens[j] = repeatCount(maxLen - lenFunc(s[j]))
}
startIdx, endIdx, maxLen = i+1, i+1, 0
} else {
if maxLen < lenFunc(r) {
maxLen = lenFunc(r)
}
endIdx = i + 1
}
}
for j := startIdx; j < endIdx && j < len(s); j++ {
lens[j] = repeatCount(maxLen - lenFunc(s[j]))
}
return lens
}
// textLine is a single-line segment of text and is always a leaf node
// in the textNode tree.
type textLine []byte
var (
textNil = textLine("nil")
textEllipsis = textLine("...")
)
func (s textLine) Len() int {
return len(s)
}
func (s1 textLine) Equal(s2 textNode) bool {
if s2, ok := s2.(textLine); ok {
return bytes.Equal([]byte(s1), []byte(s2))
}
return false
}
func (s textLine) String() string {
return string(s)
}
func (s textLine) formatCompactTo(b []byte, d diffMode) ([]byte, textNode) {
return append(b, s...), s
}
func (s textLine) formatExpandedTo(b []byte, _ diffMode, _ indentMode) []byte {
return append(b, s...)
}
type diffStats struct {
Name string
NumIgnored int
NumIdentical int
NumRemoved int
NumInserted int
NumModified int
}
func (s diffStats) NumDiff() int {
return s.NumRemoved + s.NumInserted + s.NumModified
}
func (s diffStats) Append(ds diffStats) diffStats {
assert(s.Name == ds.Name)
s.NumIgnored += ds.NumIgnored
s.NumIdentical += ds.NumIdentical
s.NumRemoved += ds.NumRemoved
s.NumInserted += ds.NumInserted
s.NumModified += ds.NumModified
return s
}
// String prints a humanly-readable summary of coalesced records.
//
// Example:
// diffStats{Name: "Field", NumIgnored: 5}.String() => "5 ignored fields"
func (s diffStats) String() string {
var ss []string
var sum int
labels := [...]string{"ignored", "identical", "removed", "inserted", "modified"}
counts := [...]int{s.NumIgnored, s.NumIdentical, s.NumRemoved, s.NumInserted, s.NumModified}
for i, n := range counts {
if n > 0 {
ss = append(ss, fmt.Sprintf("%d %v", n, labels[i]))
}
sum += n
}
// Pluralize the name (adjusting for some obscure English grammar rules).
name := s.Name
if sum > 1 {
name += "s"
if strings.HasSuffix(name, "ys") {
name = name[:len(name)-2] + "ies" // e.g., "entrys" => "entries"
}
}
// Format the list according to English grammar (with Oxford comma).
switch n := len(ss); n {
case 0:
return ""
case 1, 2:
return strings.Join(ss, " and ") + " " + name
default:
return strings.Join(ss[:n-1], ", ") + ", and " + ss[n-1] + " " + name
}
}
type commentString string
func (s commentString) String() string { return string(s) }

@ -0,0 +1,121 @@
// Copyright 2019, The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package cmp
import "reflect"
// valueNode represents a single node within a report, which is a
// structured representation of the value tree, containing information
// regarding which nodes are equal or not.
type valueNode struct {
parent *valueNode
Type reflect.Type
ValueX reflect.Value
ValueY reflect.Value
// NumSame is the number of leaf nodes that are equal.
// All descendants are equal only if NumDiff is 0.
NumSame int
// NumDiff is the number of leaf nodes that are not equal.
NumDiff int
// NumIgnored is the number of leaf nodes that are ignored.
NumIgnored int
// NumCompared is the number of leaf nodes that were compared
// using an Equal method or Comparer function.
NumCompared int
// NumTransformed is the number of non-leaf nodes that were transformed.
NumTransformed int
// NumChildren is the number of transitive descendants of this node.
// This counts from zero; thus, leaf nodes have no descendants.
NumChildren int
// MaxDepth is the maximum depth of the tree. This counts from zero;
// thus, leaf nodes have a depth of zero.
MaxDepth int
// Records is a list of struct fields, slice elements, or map entries.
Records []reportRecord // If populated, implies Value is not populated
// Value is the result of a transformation, pointer indirect, of
// type assertion.
Value *valueNode // If populated, implies Records is not populated
// TransformerName is the name of the transformer.
TransformerName string // If non-empty, implies Value is populated
}
type reportRecord struct {
Key reflect.Value // Invalid for slice element
Value *valueNode
}
func (parent *valueNode) PushStep(ps PathStep) (child *valueNode) {
vx, vy := ps.Values()
child = &valueNode{parent: parent, Type: ps.Type(), ValueX: vx, ValueY: vy}
switch s := ps.(type) {
case StructField:
assert(parent.Value == nil)
parent.Records = append(parent.Records, reportRecord{Key: reflect.ValueOf(s.Name()), Value: child})
case SliceIndex:
assert(parent.Value == nil)
parent.Records = append(parent.Records, reportRecord{Value: child})
case MapIndex:
assert(parent.Value == nil)
parent.Records = append(parent.Records, reportRecord{Key: s.Key(), Value: child})
case Indirect:
assert(parent.Value == nil && parent.Records == nil)
parent.Value = child
case TypeAssertion:
assert(parent.Value == nil && parent.Records == nil)
parent.Value = child
case Transform:
assert(parent.Value == nil && parent.Records == nil)
parent.Value = child
parent.TransformerName = s.Name()
parent.NumTransformed++
default:
assert(parent == nil) // Must be the root step
}
return child
}
func (r *valueNode) Report(rs Result) {
assert(r.MaxDepth == 0) // May only be called on leaf nodes
if rs.ByIgnore() {
r.NumIgnored++
} else {
if rs.Equal() {
r.NumSame++
} else {
r.NumDiff++
}
}
assert(r.NumSame+r.NumDiff+r.NumIgnored == 1)
if rs.ByMethod() {
r.NumCompared++
}
if rs.ByFunc() {
r.NumCompared++
}
assert(r.NumCompared <= 1)
}
func (child *valueNode) PopStep() (parent *valueNode) {
if child.parent == nil {
return nil
}
parent = child.parent
parent.NumSame += child.NumSame
parent.NumDiff += child.NumDiff
parent.NumIgnored += child.NumIgnored
parent.NumCompared += child.NumCompared
parent.NumTransformed += child.NumTransformed
parent.NumChildren += child.NumChildren + 1
if parent.MaxDepth < child.MaxDepth+1 {
parent.MaxDepth = child.MaxDepth + 1
}
return parent
}

@ -1,9 +0,0 @@
y.output
# ignore intellij files
.idea
*.iml
*.ipr
*.iws
*.test

@ -1,13 +0,0 @@
sudo: false
language: go
go:
- 1.x
- tip
branches:
only:
- master
script: make test

@ -1,18 +0,0 @@
TEST?=./...
default: test
fmt: generate
go fmt ./...
test: generate
go get -t ./...
go test $(TEST) $(TESTARGS)
generate:
go generate ./...
updatedeps:
go get -u golang.org/x/tools/cmd/stringer
.PHONY: default generate test updatedeps

@ -1,125 +0,0 @@
# HCL
[![GoDoc](https://godoc.org/github.com/hashicorp/hcl?status.png)](https://godoc.org/github.com/hashicorp/hcl) [![Build Status](https://travis-ci.org/hashicorp/hcl.svg?branch=master)](https://travis-ci.org/hashicorp/hcl)
HCL (HashiCorp Configuration Language) is a configuration language built
by HashiCorp. The goal of HCL is to build a structured configuration language
that is both human and machine friendly for use with command-line tools, but
specifically targeted towards DevOps tools, servers, etc.
HCL is also fully JSON compatible. That is, JSON can be used as completely
valid input to a system expecting HCL. This helps makes systems
interoperable with other systems.
HCL is heavily inspired by
[libucl](https://github.com/vstakhov/libucl),
nginx configuration, and others similar.
## Why?
A common question when viewing HCL is to ask the question: why not
JSON, YAML, etc.?
Prior to HCL, the tools we built at [HashiCorp](http://www.hashicorp.com)
used a variety of configuration languages from full programming languages
such as Ruby to complete data structure languages such as JSON. What we
learned is that some people wanted human-friendly configuration languages
and some people wanted machine-friendly languages.
JSON fits a nice balance in this, but is fairly verbose and most
importantly doesn't support comments. With YAML, we found that beginners
had a really hard time determining what the actual structure was, and
ended up guessing more often than not whether to use a hyphen, colon, etc.
in order to represent some configuration key.
Full programming languages such as Ruby enable complex behavior
a configuration language shouldn't usually allow, and also forces
people to learn some set of Ruby.
Because of this, we decided to create our own configuration language
that is JSON-compatible. Our configuration language (HCL) is designed
to be written and modified by humans. The API for HCL allows JSON
as an input so that it is also machine-friendly (machines can generate
JSON instead of trying to generate HCL).
Our goal with HCL is not to alienate other configuration languages.
It is instead to provide HCL as a specialized language for our tools,
and JSON as the interoperability layer.
## Syntax
For a complete grammar, please see the parser itself. A high-level overview
of the syntax and grammar is listed here.
* Single line comments start with `#` or `//`
* Multi-line comments are wrapped in `/*` and `*/`. Nested block comments
are not allowed. A multi-line comment (also known as a block comment)
terminates at the first `*/` found.
* Values are assigned with the syntax `key = value` (whitespace doesn't
matter). The value can be any primitive: a string, number, boolean,
object, or list.
* Strings are double-quoted and can contain any UTF-8 characters.
Example: `"Hello, World"`
* Multi-line strings start with `<<EOF` at the end of a line, and end
with `EOF` on its own line ([here documents](https://en.wikipedia.org/wiki/Here_document)).
Any text may be used in place of `EOF`. Example:
```
<<FOO
hello
world
FOO
```
* Numbers are assumed to be base 10. If you prefix a number with 0x,
it is treated as a hexadecimal. If it is prefixed with 0, it is
treated as an octal. Numbers can be in scientific notation: "1e10".
* Boolean values: `true`, `false`
* Arrays can be made by wrapping it in `[]`. Example:
`["foo", "bar", 42]`. Arrays can contain primitives,
other arrays, and objects. As an alternative, lists
of objects can be created with repeated blocks, using
this structure:
```hcl
service {
key = "value"
}
service {
key = "value"
}
```
Objects and nested objects are created using the structure shown below:
```
variable "ami" {
description = "the AMI to use"
}
```
This would be equivalent to the following json:
``` json
{
"variable": {
"ami": {
"description": "the AMI to use"
}
}
}
```
## Thanks
Thanks to:
* [@vstakhov](https://github.com/vstakhov) - The original libucl parser
and syntax that HCL was based off of.
* [@fatih](https://github.com/fatih) - The rewritten HCL parser
in pure Go (no goyacc) and support for a printer.

@ -1,19 +0,0 @@
version: "build-{branch}-{build}"
image: Visual Studio 2015
clone_folder: c:\gopath\src\github.com\hashicorp\hcl
environment:
GOPATH: c:\gopath
init:
- git config --global core.autocrlf false
install:
- cmd: >-
echo %Path%
go version
go env
go get -t ./...
build_script:
- cmd: go test -v ./...

@ -1,729 +0,0 @@
package hcl
import (
"errors"
"fmt"
"reflect"
"sort"
"strconv"
"strings"
"github.com/hashicorp/hcl/hcl/ast"
"github.com/hashicorp/hcl/hcl/parser"
"github.com/hashicorp/hcl/hcl/token"
)
// This is the tag to use with structures to have settings for HCL
const tagName = "hcl"
var (
// nodeType holds a reference to the type of ast.Node
nodeType reflect.Type = findNodeType()
)
// Unmarshal accepts a byte slice as input and writes the
// data to the value pointed to by v.
func Unmarshal(bs []byte, v interface{}) error {
root, err := parse(bs)
if err != nil {
return err
}
return DecodeObject(v, root)
}
// Decode reads the given input and decodes it into the structure
// given by `out`.
func Decode(out interface{}, in string) error {
obj, err := Parse(in)
if err != nil {
return err
}
return DecodeObject(out, obj)
}
// DecodeObject is a lower-level version of Decode. It decodes a
// raw Object into the given output.
func DecodeObject(out interface{}, n ast.Node) error {
val := reflect.ValueOf(out)
if val.Kind() != reflect.Ptr {
return errors.New("result must be a pointer")
}
// If we have the file, we really decode the root node
if f, ok := n.(*ast.File); ok {
n = f.Node
}
var d decoder
return d.decode("root", n, val.Elem())
}
type decoder struct {
stack []reflect.Kind
}
func (d *decoder) decode(name string, node ast.Node, result reflect.Value) error {
k := result
// If we have an interface with a valid value, we use that
// for the check.
if result.Kind() == reflect.Interface {
elem := result.Elem()
if elem.IsValid() {
k = elem
}
}
// Push current onto stack unless it is an interface.
if k.Kind() != reflect.Interface {
d.stack = append(d.stack, k.Kind())
// Schedule a pop
defer func() {
d.stack = d.stack[:len(d.stack)-1]
}()
}
switch k.Kind() {
case reflect.Bool:
return d.decodeBool(name, node, result)
case reflect.Float32, reflect.Float64:
return d.decodeFloat(name, node, result)
case reflect.Int, reflect.Int32, reflect.Int64:
return d.decodeInt(name, node, result)
case reflect.Interface:
// When we see an interface, we make our own thing
return d.decodeInterface(name, node, result)
case reflect.Map:
return d.decodeMap(name, node, result)
case reflect.Ptr:
return d.decodePtr(name, node, result)
case reflect.Slice:
return d.decodeSlice(name, node, result)
case reflect.String:
return d.decodeString(name, node, result)
case reflect.Struct:
return d.decodeStruct(name, node, result)
default:
return &parser.PosError{
Pos: node.Pos(),
Err: fmt.Errorf("%s: unknown kind to decode into: %s", name, k.Kind()),
}
}
}
func (d *decoder) decodeBool(name string, node ast.Node, result reflect.Value) error {
switch n := node.(type) {
case *ast.LiteralType:
if n.Token.Type == token.BOOL {
v, err := strconv.ParseBool(n.Token.Text)
if err != nil {
return err
}
result.Set(reflect.ValueOf(v))
return nil
}
}
return &parser.PosError{
Pos: node.Pos(),
Err: fmt.Errorf("%s: unknown type %T", name, node),
}
}
func (d *decoder) decodeFloat(name string, node ast.Node, result reflect.Value) error {
switch n := node.(type) {
case *ast.LiteralType:
if n.Token.Type == token.FLOAT || n.Token.Type == token.NUMBER {
v, err := strconv.ParseFloat(n.Token.Text, 64)
if err != nil {
return err
}
result.Set(reflect.ValueOf(v).Convert(result.Type()))
return nil
}
}
return &parser.PosError{
Pos: node.Pos(),
Err: fmt.Errorf("%s: unknown type %T", name, node),
}
}
func (d *decoder) decodeInt(name string, node ast.Node, result reflect.Value) error {
switch n := node.(type) {
case *ast.LiteralType:
switch n.Token.Type {
case token.NUMBER:
v, err := strconv.ParseInt(n.Token.Text, 0, 0)
if err != nil {
return err
}
if result.Kind() == reflect.Interface {
result.Set(reflect.ValueOf(int(v)))
} else {
result.SetInt(v)
}
return nil
case token.STRING:
v, err := strconv.ParseInt(n.Token.Value().(string), 0, 0)
if err != nil {
return err
}
if result.Kind() == reflect.Interface {
result.Set(reflect.ValueOf(int(v)))
} else {
result.SetInt(v)
}
return nil
}
}
return &parser.PosError{
Pos: node.Pos(),
Err: fmt.Errorf("%s: unknown type %T", name, node),
}
}
func (d *decoder) decodeInterface(name string, node ast.Node, result reflect.Value) error {
// When we see an ast.Node, we retain the value to enable deferred decoding.
// Very useful in situations where we want to preserve ast.Node information
// like Pos
if result.Type() == nodeType && result.CanSet() {
result.Set(reflect.ValueOf(node))
return nil
}
var set reflect.Value
redecode := true
// For testing types, ObjectType should just be treated as a list. We
// set this to a temporary var because we want to pass in the real node.
testNode := node
if ot, ok := node.(*ast.ObjectType); ok {
testNode = ot.List
}
switch n := testNode.(type) {
case *ast.ObjectList:
// If we're at the root or we're directly within a slice, then we
// decode objects into map[string]interface{}, otherwise we decode
// them into lists.
if len(d.stack) == 0 || d.stack[len(d.stack)-1] == reflect.Slice {
var temp map[string]interface{}
tempVal := reflect.ValueOf(temp)
result := reflect.MakeMap(
reflect.MapOf(
reflect.TypeOf(""),
tempVal.Type().Elem()))
set = result
} else {
var temp []map[string]interface{}
tempVal := reflect.ValueOf(temp)
result := reflect.MakeSlice(
reflect.SliceOf(tempVal.Type().Elem()), 0, len(n.Items))
set = result
}
case *ast.ObjectType:
// If we're at the root or we're directly within a slice, then we
// decode objects into map[string]interface{}, otherwise we decode
// them into lists.
if len(d.stack) == 0 || d.stack[len(d.stack)-1] == reflect.Slice {
var temp map[string]interface{}
tempVal := reflect.ValueOf(temp)
result := reflect.MakeMap(
reflect.MapOf(
reflect.TypeOf(""),
tempVal.Type().Elem()))
set = result
} else {
var temp []map[string]interface{}
tempVal := reflect.ValueOf(temp)
result := reflect.MakeSlice(
reflect.SliceOf(tempVal.Type().Elem()), 0, 1)
set = result
}
case *ast.ListType:
var temp []interface{}
tempVal := reflect.ValueOf(temp)
result := reflect.MakeSlice(
reflect.SliceOf(tempVal.Type().Elem()), 0, 0)
set = result
case *ast.LiteralType:
switch n.Token.Type {
case token.BOOL:
var result bool
set = reflect.Indirect(reflect.New(reflect.TypeOf(result)))
case token.FLOAT:
var result float64
set = reflect.Indirect(reflect.New(reflect.TypeOf(result)))
case token.NUMBER:
var result int
set = reflect.Indirect(reflect.New(reflect.TypeOf(result)))
case token.STRING, token.HEREDOC:
set = reflect.Indirect(reflect.New(reflect.TypeOf("")))
default:
return &parser.PosError{
Pos: node.Pos(),
Err: fmt.Errorf("%s: cannot decode into interface: %T", name, node),
}
}
default:
return fmt.Errorf(
"%s: cannot decode into interface: %T",
name, node)
}
// Set the result to what its supposed to be, then reset
// result so we don't reflect into this method anymore.
result.Set(set)
if redecode {
// Revisit the node so that we can use the newly instantiated
// thing and populate it.
if err := d.decode(name, node, result); err != nil {
return err
}
}
return nil
}
func (d *decoder) decodeMap(name string, node ast.Node, result reflect.Value) error {
if item, ok := node.(*ast.ObjectItem); ok {
node = &ast.ObjectList{Items: []*ast.ObjectItem{item}}
}
if ot, ok := node.(*ast.ObjectType); ok {
node = ot.List
}
n, ok := node.(*ast.ObjectList)
if !ok {
return &parser.PosError{
Pos: node.Pos(),
Err: fmt.Errorf("%s: not an object type for map (%T)", name, node),
}
}
// If we have an interface, then we can address the interface,
// but not the slice itself, so get the element but set the interface
set := result
if result.Kind() == reflect.Interface {
result = result.Elem()
}
resultType := result.Type()
resultElemType := resultType.Elem()
resultKeyType := resultType.Key()
if resultKeyType.Kind() != reflect.String {
return &parser.PosError{
Pos: node.Pos(),
Err: fmt.Errorf("%s: map must have string keys", name),
}
}
// Make a map if it is nil
resultMap := result
if result.IsNil() {
resultMap = reflect.MakeMap(
reflect.MapOf(resultKeyType, resultElemType))
}
// Go through each element and decode it.
done := make(map[string]struct{})
for _, item := range n.Items {
if item.Val == nil {
continue
}
// github.com/hashicorp/terraform/issue/5740
if len(item.Keys) == 0 {
return &parser.PosError{
Pos: node.Pos(),
Err: fmt.Errorf("%s: map must have string keys", name),
}
}
// Get the key we're dealing with, which is the first item
keyStr := item.Keys[0].Token.Value().(string)
// If we've already processed this key, then ignore it
if _, ok := done[keyStr]; ok {
continue
}
// Determine the value. If we have more than one key, then we
// get the objectlist of only these keys.
itemVal := item.Val
if len(item.Keys) > 1 {
itemVal = n.Filter(keyStr)
done[keyStr] = struct{}{}
}
// Make the field name
fieldName := fmt.Sprintf("%s.%s", name, keyStr)
// Get the key/value as reflection values
key := reflect.ValueOf(keyStr)
val := reflect.Indirect(reflect.New(resultElemType))
// If we have a pre-existing value in the map, use that
oldVal := resultMap.MapIndex(key)
if oldVal.IsValid() {
val.Set(oldVal)
}
// Decode!
if err := d.decode(fieldName, itemVal, val); err != nil {
return err
}
// Set the value on the map
resultMap.SetMapIndex(key, val)
}
// Set the final map if we can
set.Set(resultMap)
return nil
}
func (d *decoder) decodePtr(name string, node ast.Node, result reflect.Value) error {
// Create an element of the concrete (non pointer) type and decode
// into that. Then set the value of the pointer to this type.
resultType := result.Type()
resultElemType := resultType.Elem()
val := reflect.New(resultElemType)
if err := d.decode(name, node, reflect.Indirect(val)); err != nil {
return err
}
result.Set(val)
return nil
}
func (d *decoder) decodeSlice(name string, node ast.Node, result reflect.Value) error {
// If we have an interface, then we can address the interface,
// but not the slice itself, so get the element but set the interface
set := result
if result.Kind() == reflect.Interface {
result = result.Elem()
}
// Create the slice if it isn't nil
resultType := result.Type()
resultElemType := resultType.Elem()
if result.IsNil() {
resultSliceType := reflect.SliceOf(resultElemType)
result = reflect.MakeSlice(
resultSliceType, 0, 0)
}
// Figure out the items we'll be copying into the slice
var items []ast.Node
switch n := node.(type) {
case *ast.ObjectList:
items = make([]ast.Node, len(n.Items))
for i, item := range n.Items {
items[i] = item
}
case *ast.ObjectType:
items = []ast.Node{n}
case *ast.ListType:
items = n.List
default:
return &parser.PosError{
Pos: node.Pos(),
Err: fmt.Errorf("unknown slice type: %T", node),
}
}
for i, item := range items {
fieldName := fmt.Sprintf("%s[%d]", name, i)
// Decode
val := reflect.Indirect(reflect.New(resultElemType))
// if item is an object that was decoded from ambiguous JSON and
// flattened, make sure it's expanded if it needs to decode into a
// defined structure.
item := expandObject(item, val)
if err := d.decode(fieldName, item, val); err != nil {
return err
}
// Append it onto the slice
result = reflect.Append(result, val)
}
set.Set(result)
return nil
}
// expandObject detects if an ambiguous JSON object was flattened to a List which
// should be decoded into a struct, and expands the ast to properly deocode.
func expandObject(node ast.Node, result reflect.Value) ast.Node {
item, ok := node.(*ast.ObjectItem)
if !ok {
return node
}
elemType := result.Type()
// our target type must be a struct
switch elemType.Kind() {
case reflect.Ptr:
switch elemType.Elem().Kind() {
case reflect.Struct:
//OK
default:
return node
}
case reflect.Struct:
//OK
default:
return node
}
// A list value will have a key and field name. If it had more fields,
// it wouldn't have been flattened.
if len(item.Keys) != 2 {
return node
}
keyToken := item.Keys[0].Token
item.Keys = item.Keys[1:]
// we need to un-flatten the ast enough to decode
newNode := &ast.ObjectItem{
Keys: []*ast.ObjectKey{
&ast.ObjectKey{
Token: keyToken,
},
},
Val: &ast.ObjectType{
List: &ast.ObjectList{
Items: []*ast.ObjectItem{item},
},
},
}
return newNode
}
func (d *decoder) decodeString(name string, node ast.Node, result reflect.Value) error {
switch n := node.(type) {
case *ast.LiteralType:
switch n.Token.Type {
case token.NUMBER:
result.Set(reflect.ValueOf(n.Token.Text).Convert(result.Type()))
return nil
case token.STRING, token.HEREDOC:
result.Set(reflect.ValueOf(n.Token.Value()).Convert(result.Type()))
return nil
}
}
return &parser.PosError{
Pos: node.Pos(),
Err: fmt.Errorf("%s: unknown type for string %T", name, node),
}
}
func (d *decoder) decodeStruct(name string, node ast.Node, result reflect.Value) error {
var item *ast.ObjectItem
if it, ok := node.(*ast.ObjectItem); ok {
item = it
node = it.Val
}
if ot, ok := node.(*ast.ObjectType); ok {
node = ot.List
}
// Handle the special case where the object itself is a literal. Previously
// the yacc parser would always ensure top-level elements were arrays. The new
// parser does not make the same guarantees, thus we need to convert any
// top-level literal elements into a list.
if _, ok := node.(*ast.LiteralType); ok && item != nil {
node = &ast.ObjectList{Items: []*ast.ObjectItem{item}}
}
list, ok := node.(*ast.ObjectList)
if !ok {
return &parser.PosError{
Pos: node.Pos(),
Err: fmt.Errorf("%s: not an object type for struct (%T)", name, node),
}
}
// This slice will keep track of all the structs we'll be decoding.
// There can be more than one struct if there are embedded structs
// that are squashed.
structs := make([]reflect.Value, 1, 5)
structs[0] = result
// Compile the list of all the fields that we're going to be decoding
// from all the structs.
type field struct {
field reflect.StructField
val reflect.Value
}
fields := []field{}
for len(structs) > 0 {
structVal := structs[0]
structs = structs[1:]
structType := structVal.Type()
for i := 0; i < structType.NumField(); i++ {
fieldType := structType.Field(i)
tagParts := strings.Split(fieldType.Tag.Get(tagName), ",")
// Ignore fields with tag name "-"
if tagParts[0] == "-" {
continue
}
if fieldType.Anonymous {
fieldKind := fieldType.Type.Kind()
if fieldKind != reflect.Struct {
return &parser.PosError{
Pos: node.Pos(),
Err: fmt.Errorf("%s: unsupported type to struct: %s",
fieldType.Name, fieldKind),
}
}
// We have an embedded field. We "squash" the fields down
// if specified in the tag.
squash := false
for _, tag := range tagParts[1:] {
if tag == "squash" {
squash = true
break
}
}
if squash {
structs = append(
structs, result.FieldByName(fieldType.Name))
continue
}
}
// Normal struct field, store it away
fields = append(fields, field{fieldType, structVal.Field(i)})
}
}
usedKeys := make(map[string]struct{})
decodedFields := make([]string, 0, len(fields))
decodedFieldsVal := make([]reflect.Value, 0)
unusedKeysVal := make([]reflect.Value, 0)
for _, f := range fields {
field, fieldValue := f.field, f.val
if !fieldValue.IsValid() {
// This should never happen
panic("field is not valid")
}
// If we can't set the field, then it is unexported or something,
// and we just continue onwards.
if !fieldValue.CanSet() {
continue
}
fieldName := field.Name
tagValue := field.Tag.Get(tagName)
tagParts := strings.SplitN(tagValue, ",", 2)
if len(tagParts) >= 2 {
switch tagParts[1] {
case "decodedFields":
decodedFieldsVal = append(decodedFieldsVal, fieldValue)
continue
case "key":
if item == nil {
return &parser.PosError{
Pos: node.Pos(),
Err: fmt.Errorf("%s: %s asked for 'key', impossible",
name, fieldName),
}
}
fieldValue.SetString(item.Keys[0].Token.Value().(string))
continue
case "unusedKeys":
unusedKeysVal = append(unusedKeysVal, fieldValue)
continue
}
}
if tagParts[0] != "" {
fieldName = tagParts[0]
}
// Determine the element we'll use to decode. If it is a single
// match (only object with the field), then we decode it exactly.
// If it is a prefix match, then we decode the matches.
filter := list.Filter(fieldName)
prefixMatches := filter.Children()
matches := filter.Elem()
if len(matches.Items) == 0 && len(prefixMatches.Items) == 0 {
continue
}
// Track the used key
usedKeys[fieldName] = struct{}{}
// Create the field name and decode. We range over the elements
// because we actually want the value.
fieldName = fmt.Sprintf("%s.%s", name, fieldName)
if len(prefixMatches.Items) > 0 {
if err := d.decode(fieldName, prefixMatches, fieldValue); err != nil {
return err
}
}
for _, match := range matches.Items {
var decodeNode ast.Node = match.Val
if ot, ok := decodeNode.(*ast.ObjectType); ok {
decodeNode = &ast.ObjectList{Items: ot.List.Items}
}
if err := d.decode(fieldName, decodeNode, fieldValue); err != nil {
return err
}
}
decodedFields = append(decodedFields, field.Name)
}
if len(decodedFieldsVal) > 0 {
// Sort it so that it is deterministic
sort.Strings(decodedFields)
for _, v := range decodedFieldsVal {
v.Set(reflect.ValueOf(decodedFields))
}
}
return nil
}
// findNodeType returns the type of ast.Node
func findNodeType() reflect.Type {
var nodeContainer struct {
Node ast.Node
}
value := reflect.ValueOf(nodeContainer).FieldByName("Node")
return value.Type()
}

@ -1,3 +0,0 @@
module github.com/hashicorp/hcl
require github.com/davecgh/go-spew v1.1.1

@ -1,2 +0,0 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=

@ -1,11 +0,0 @@
// Package hcl decodes HCL into usable Go structures.
//
// hcl input can come in either pure HCL format or JSON format.
// It can be parsed into an AST, and then decoded into a structure,
// or it can be decoded directly from a string into a structure.
//
// If you choose to parse HCL into a raw AST, the benefit is that you
// can write custom visitor implementations to implement custom
// semantic checks. By default, HCL does not perform any semantic
// checks.
package hcl

@ -1,219 +0,0 @@
// Package ast declares the types used to represent syntax trees for HCL
// (HashiCorp Configuration Language)
package ast
import (
"fmt"
"strings"
"github.com/hashicorp/hcl/hcl/token"
)
// Node is an element in the abstract syntax tree.
type Node interface {
node()
Pos() token.Pos
}
func (File) node() {}
func (ObjectList) node() {}
func (ObjectKey) node() {}
func (ObjectItem) node() {}
func (Comment) node() {}
func (CommentGroup) node() {}
func (ObjectType) node() {}
func (LiteralType) node() {}
func (ListType) node() {}
// File represents a single HCL file
type File struct {
Node Node // usually a *ObjectList
Comments []*CommentGroup // list of all comments in the source
}
func (f *File) Pos() token.Pos {
return f.Node.Pos()
}
// ObjectList represents a list of ObjectItems. An HCL file itself is an
// ObjectList.
type ObjectList struct {
Items []*ObjectItem
}
func (o *ObjectList) Add(item *ObjectItem) {
o.Items = append(o.Items, item)
}
// Filter filters out the objects with the given key list as a prefix.
//
// The returned list of objects contain ObjectItems where the keys have
// this prefix already stripped off. This might result in objects with
// zero-length key lists if they have no children.
//
// If no matches are found, an empty ObjectList (non-nil) is returned.
func (o *ObjectList) Filter(keys ...string) *ObjectList {
var result ObjectList
for _, item := range o.Items {
// If there aren't enough keys, then ignore this
if len(item.Keys) < len(keys) {
continue
}
match := true
for i, key := range item.Keys[:len(keys)] {
key := key.Token.Value().(string)
if key != keys[i] && !strings.EqualFold(key, keys[i]) {
match = false
break
}
}
if !match {
continue
}
// Strip off the prefix from the children
newItem := *item
newItem.Keys = newItem.Keys[len(keys):]
result.Add(&newItem)
}
return &result
}
// Children returns further nested objects (key length > 0) within this
// ObjectList. This should be used with Filter to get at child items.
func (o *ObjectList) Children() *ObjectList {
var result ObjectList
for _, item := range o.Items {
if len(item.Keys) > 0 {
result.Add(item)
}
}
return &result
}
// Elem returns items in the list that are direct element assignments
// (key length == 0). This should be used with Filter to get at elements.
func (o *ObjectList) Elem() *ObjectList {
var result ObjectList
for _, item := range o.Items {
if len(item.Keys) == 0 {
result.Add(item)
}
}
return &result
}
func (o *ObjectList) Pos() token.Pos {
// always returns the uninitiliazed position
return o.Items[0].Pos()
}
// ObjectItem represents a HCL Object Item. An item is represented with a key
// (or keys). It can be an assignment or an object (both normal and nested)
type ObjectItem struct {
// keys is only one length long if it's of type assignment. If it's a
// nested object it can be larger than one. In that case "assign" is
// invalid as there is no assignments for a nested object.
Keys []*ObjectKey
// assign contains the position of "=", if any
Assign token.Pos
// val is the item itself. It can be an object,list, number, bool or a
// string. If key length is larger than one, val can be only of type
// Object.
Val Node
LeadComment *CommentGroup // associated lead comment
LineComment *CommentGroup // associated line comment
}
func (o *ObjectItem) Pos() token.Pos {
// I'm not entirely sure what causes this, but removing this causes
// a test failure. We should investigate at some point.
if len(o.Keys) == 0 {
return token.Pos{}
}
return o.Keys[0].Pos()
}
// ObjectKeys are either an identifier or of type string.
type ObjectKey struct {
Token token.Token
}
func (o *ObjectKey) Pos() token.Pos {
return o.Token.Pos
}
// LiteralType represents a literal of basic type. Valid types are:
// token.NUMBER, token.FLOAT, token.BOOL and token.STRING
type LiteralType struct {
Token token.Token
// comment types, only used when in a list
LeadComment *CommentGroup
LineComment *CommentGroup
}
func (l *LiteralType) Pos() token.Pos {
return l.Token.Pos
}
// ListStatement represents a HCL List type
type ListType struct {
Lbrack token.Pos // position of "["
Rbrack token.Pos // position of "]"
List []Node // the elements in lexical order
}
func (l *ListType) Pos() token.Pos {
return l.Lbrack
}
func (l *ListType) Add(node Node) {
l.List = append(l.List, node)
}
// ObjectType represents a HCL Object Type
type ObjectType struct {
Lbrace token.Pos // position of "{"
Rbrace token.Pos // position of "}"
List *ObjectList // the nodes in lexical order
}
func (o *ObjectType) Pos() token.Pos {
return o.Lbrace
}
// Comment node represents a single //, # style or /*- style commment
type Comment struct {
Start token.Pos // position of / or #
Text string
}
func (c *Comment) Pos() token.Pos {
return c.Start
}
// CommentGroup node represents a sequence of comments with no other tokens and
// no empty lines between.
type CommentGroup struct {
List []*Comment // len(List) > 0
}
func (c *CommentGroup) Pos() token.Pos {
return c.List[0].Pos()
}
//-------------------------------------------------------------------
// GoStringer
//-------------------------------------------------------------------
func (o *ObjectKey) GoString() string { return fmt.Sprintf("*%#v", *o) }
func (o *ObjectList) GoString() string { return fmt.Sprintf("*%#v", *o) }

@ -1,52 +0,0 @@
package ast
import "fmt"
// WalkFunc describes a function to be called for each node during a Walk. The
// returned node can be used to rewrite the AST. Walking stops the returned
// bool is false.
type WalkFunc func(Node) (Node, bool)
// Walk traverses an AST in depth-first order: It starts by calling fn(node);
// node must not be nil. If fn returns true, Walk invokes fn recursively for
// each of the non-nil children of node, followed by a call of fn(nil). The
// returned node of fn can be used to rewrite the passed node to fn.
func Walk(node Node, fn WalkFunc) Node {
rewritten, ok := fn(node)
if !ok {
return rewritten
}
switch n := node.(type) {
case *File:
n.Node = Walk(n.Node, fn)
case *ObjectList:
for i, item := range n.Items {
n.Items[i] = Walk(item, fn).(*ObjectItem)
}
case *ObjectKey:
// nothing to do
case *ObjectItem:
for i, k := range n.Keys {
n.Keys[i] = Walk(k, fn).(*ObjectKey)
}
if n.Val != nil {
n.Val = Walk(n.Val, fn)
}
case *LiteralType:
// nothing to do
case *ListType:
for i, l := range n.List {
n.List[i] = Walk(l, fn)
}
case *ObjectType:
n.List = Walk(n.List, fn).(*ObjectList)
default:
// should we panic here?
fmt.Printf("unknown type: %T\n", n)
}
fn(nil)
return rewritten
}

@ -1,17 +0,0 @@
package parser
import (
"fmt"
"github.com/hashicorp/hcl/hcl/token"
)
// PosError is a parse error that contains a position.
type PosError struct {
Pos token.Pos
Err error
}
func (e *PosError) Error() string {
return fmt.Sprintf("At %s: %s", e.Pos, e.Err)
}

@ -1,532 +0,0 @@
// Package parser implements a parser for HCL (HashiCorp Configuration
// Language)
package parser
import (
"bytes"
"errors"
"fmt"
"strings"
"github.com/hashicorp/hcl/hcl/ast"
"github.com/hashicorp/hcl/hcl/scanner"
"github.com/hashicorp/hcl/hcl/token"
)
type Parser struct {
sc *scanner.Scanner
// Last read token
tok token.Token
commaPrev token.Token
comments []*ast.CommentGroup
leadComment *ast.CommentGroup // last lead comment
lineComment *ast.CommentGroup // last line comment
enableTrace bool
indent int
n int // buffer size (max = 1)
}
func newParser(src []byte) *Parser {
return &Parser{
sc: scanner.New(src),
}
}
// Parse returns the fully parsed source and returns the abstract syntax tree.
func Parse(src []byte) (*ast.File, error) {
// normalize all line endings
// since the scanner and output only work with "\n" line endings, we may
// end up with dangling "\r" characters in the parsed data.
src = bytes.Replace(src, []byte("\r\n"), []byte("\n"), -1)
p := newParser(src)
return p.Parse()
}
var errEofToken = errors.New("EOF token found")
// Parse returns the fully parsed source and returns the abstract syntax tree.
func (p *Parser) Parse() (*ast.File, error) {
f := &ast.File{}
var err, scerr error
p.sc.Error = func(pos token.Pos, msg string) {
scerr = &PosError{Pos: pos, Err: errors.New(msg)}
}
f.Node, err = p.objectList(false)
if scerr != nil {
return nil, scerr
}
if err != nil {
return nil, err
}
f.Comments = p.comments
return f, nil
}
// objectList parses a list of items within an object (generally k/v pairs).
// The parameter" obj" tells this whether to we are within an object (braces:
// '{', '}') or just at the top level. If we're within an object, we end
// at an RBRACE.
func (p *Parser) objectList(obj bool) (*ast.ObjectList, error) {
defer un(trace(p, "ParseObjectList"))
node := &ast.ObjectList{}
for {
if obj {
tok := p.scan()
p.unscan()
if tok.Type == token.RBRACE {
break
}
}
n, err := p.objectItem()
if err == errEofToken {
break // we are finished
}
// we don't return a nil node, because might want to use already
// collected items.
if err != nil {
return node, err
}
node.Add(n)
// object lists can be optionally comma-delimited e.g. when a list of maps
// is being expressed, so a comma is allowed here - it's simply consumed
tok := p.scan()
if tok.Type != token.COMMA {
p.unscan()
}
}
return node, nil
}
func (p *Parser) consumeComment() (comment *ast.Comment, endline int) {
endline = p.tok.Pos.Line
// count the endline if it's multiline comment, ie starting with /*
if len(p.tok.Text) > 1 && p.tok.Text[1] == '*' {
// don't use range here - no need to decode Unicode code points
for i := 0; i < len(p.tok.Text); i++ {
if p.tok.Text[i] == '\n' {
endline++
}
}
}
comment = &ast.Comment{Start: p.tok.Pos, Text: p.tok.Text}
p.tok = p.sc.Scan()
return
}
func (p *Parser) consumeCommentGroup(n int) (comments *ast.CommentGroup, endline int) {
var list []*ast.Comment
endline = p.tok.Pos.Line
for p.tok.Type == token.COMMENT && p.tok.Pos.Line <= endline+n {
var comment *ast.Comment
comment, endline = p.consumeComment()
list = append(list, comment)
}
// add comment group to the comments list
comments = &ast.CommentGroup{List: list}
p.comments = append(p.comments, comments)
return
}
// objectItem parses a single object item
func (p *Parser) objectItem() (*ast.ObjectItem, error) {
defer un(trace(p, "ParseObjectItem"))
keys, err := p.objectKey()
if len(keys) > 0 && err == errEofToken {
// We ignore eof token here since it is an error if we didn't
// receive a value (but we did receive a key) for the item.
err = nil
}
if len(keys) > 0 && err != nil && p.tok.Type == token.RBRACE {
// This is a strange boolean statement, but what it means is:
// We have keys with no value, and we're likely in an object
// (since RBrace ends an object). For this, we set err to nil so
// we continue and get the error below of having the wrong value
// type.
err = nil
// Reset the token type so we don't think it completed fine. See
// objectType which uses p.tok.Type to check if we're done with
// the object.
p.tok.Type = token.EOF
}
if err != nil {
return nil, err
}
o := &ast.ObjectItem{
Keys: keys,
}
if p.leadComment != nil {
o.LeadComment = p.leadComment
p.leadComment = nil
}
switch p.tok.Type {
case token.ASSIGN:
o.Assign = p.tok.Pos
o.Val, err = p.object()
if err != nil {
return nil, err
}
case token.LBRACE:
o.Val, err = p.objectType()
if err != nil {
return nil, err
}
default:
keyStr := make([]string, 0, len(keys))
for _, k := range keys {
keyStr = append(keyStr, k.Token.Text)
}
return nil, &PosError{
Pos: p.tok.Pos,
Err: fmt.Errorf(
"key '%s' expected start of object ('{') or assignment ('=')",
strings.Join(keyStr, " ")),
}
}
// key=#comment
// val
if p.lineComment != nil {
o.LineComment, p.lineComment = p.lineComment, nil
}
// do a look-ahead for line comment
p.scan()
if len(keys) > 0 && o.Val.Pos().Line == keys[0].Pos().Line && p.lineComment != nil {
o.LineComment = p.lineComment
p.lineComment = nil
}
p.unscan()
return o, nil
}
// objectKey parses an object key and returns a ObjectKey AST
func (p *Parser) objectKey() ([]*ast.ObjectKey, error) {
keyCount := 0
keys := make([]*ast.ObjectKey, 0)
for {
tok := p.scan()
switch tok.Type {
case token.EOF:
// It is very important to also return the keys here as well as
// the error. This is because we need to be able to tell if we
// did parse keys prior to finding the EOF, or if we just found
// a bare EOF.
return keys, errEofToken
case token.ASSIGN:
// assignment or object only, but not nested objects. this is not
// allowed: `foo bar = {}`
if keyCount > 1 {
return nil, &PosError{
Pos: p.tok.Pos,
Err: fmt.Errorf("nested object expected: LBRACE got: %s", p.tok.Type),
}
}
if keyCount == 0 {
return nil, &PosError{
Pos: p.tok.Pos,
Err: errors.New("no object keys found!"),
}
}
return keys, nil
case token.LBRACE:
var err error
// If we have no keys, then it is a syntax error. i.e. {{}} is not
// allowed.
if len(keys) == 0 {
err = &PosError{
Pos: p.tok.Pos,
Err: fmt.Errorf("expected: IDENT | STRING got: %s", p.tok.Type),
}
}
// object
return keys, err
case token.IDENT, token.STRING:
keyCount++
keys = append(keys, &ast.ObjectKey{Token: p.tok})
case token.ILLEGAL:
return keys, &PosError{
Pos: p.tok.Pos,
Err: fmt.Errorf("illegal character"),
}
default:
return keys, &PosError{
Pos: p.tok.Pos,
Err: fmt.Errorf("expected: IDENT | STRING | ASSIGN | LBRACE got: %s", p.tok.Type),
}
}
}
}
// object parses any type of object, such as number, bool, string, object or
// list.
func (p *Parser) object() (ast.Node, error) {
defer un(trace(p, "ParseType"))
tok := p.scan()
switch tok.Type {
case token.NUMBER, token.FLOAT, token.BOOL, token.STRING, token.HEREDOC:
return p.literalType()
case token.LBRACE:
return p.objectType()
case token.LBRACK:
return p.listType()
case token.COMMENT:
// implement comment
case token.EOF:
return nil, errEofToken
}
return nil, &PosError{
Pos: tok.Pos,
Err: fmt.Errorf("Unknown token: %+v", tok),
}
}
// objectType parses an object type and returns a ObjectType AST
func (p *Parser) objectType() (*ast.ObjectType, error) {
defer un(trace(p, "ParseObjectType"))
// we assume that the currently scanned token is a LBRACE
o := &ast.ObjectType{
Lbrace: p.tok.Pos,
}
l, err := p.objectList(true)
// if we hit RBRACE, we are good to go (means we parsed all Items), if it's
// not a RBRACE, it's an syntax error and we just return it.
if err != nil && p.tok.Type != token.RBRACE {
return nil, err
}
// No error, scan and expect the ending to be a brace
if tok := p.scan(); tok.Type != token.RBRACE {
return nil, &PosError{
Pos: tok.Pos,
Err: fmt.Errorf("object expected closing RBRACE got: %s", tok.Type),
}
}
o.List = l
o.Rbrace = p.tok.Pos // advanced via parseObjectList
return o, nil
}
// listType parses a list type and returns a ListType AST
func (p *Parser) listType() (*ast.ListType, error) {
defer un(trace(p, "ParseListType"))
// we assume that the currently scanned token is a LBRACK
l := &ast.ListType{
Lbrack: p.tok.Pos,
}
needComma := false
for {
tok := p.scan()
if needComma {
switch tok.Type {
case token.COMMA, token.RBRACK:
default:
return nil, &PosError{
Pos: tok.Pos,
Err: fmt.Errorf(
"error parsing list, expected comma or list end, got: %s",
tok.Type),
}
}
}
switch tok.Type {
case token.BOOL, token.NUMBER, token.FLOAT, token.STRING, token.HEREDOC:
node, err := p.literalType()
if err != nil {
return nil, err
}
// If there is a lead comment, apply it
if p.leadComment != nil {
node.LeadComment = p.leadComment
p.leadComment = nil
}
l.Add(node)
needComma = true
case token.COMMA:
// get next list item or we are at the end
// do a look-ahead for line comment
p.scan()
if p.lineComment != nil && len(l.List) > 0 {
lit, ok := l.List[len(l.List)-1].(*ast.LiteralType)
if ok {
lit.LineComment = p.lineComment
l.List[len(l.List)-1] = lit
p.lineComment = nil
}
}
p.unscan()
needComma = false
continue
case token.LBRACE:
// Looks like a nested object, so parse it out
node, err := p.objectType()
if err != nil {
return nil, &PosError{
Pos: tok.Pos,
Err: fmt.Errorf(
"error while trying to parse object within list: %s", err),
}
}
l.Add(node)
needComma = true
case token.LBRACK:
node, err := p.listType()
if err != nil {
return nil, &PosError{
Pos: tok.Pos,
Err: fmt.Errorf(
"error while trying to parse list within list: %s", err),
}
}
l.Add(node)
case token.RBRACK:
// finished
l.Rbrack = p.tok.Pos
return l, nil
default:
return nil, &PosError{
Pos: tok.Pos,
Err: fmt.Errorf("unexpected token while parsing list: %s", tok.Type),
}
}
}
}
// literalType parses a literal type and returns a LiteralType AST
func (p *Parser) literalType() (*ast.LiteralType, error) {
defer un(trace(p, "ParseLiteral"))
return &ast.LiteralType{
Token: p.tok,
}, nil
}
// scan returns the next token from the underlying scanner. If a token has
// been unscanned then read that instead. In the process, it collects any
// comment groups encountered, and remembers the last lead and line comments.
func (p *Parser) scan() token.Token {
// If we have a token on the buffer, then return it.
if p.n != 0 {
p.n = 0
return p.tok
}
// Otherwise read the next token from the scanner and Save it to the buffer
// in case we unscan later.
prev := p.tok
p.tok = p.sc.Scan()
if p.tok.Type == token.COMMENT {
var comment *ast.CommentGroup
var endline int
// fmt.Printf("p.tok.Pos.Line = %+v prev: %d endline %d \n",
// p.tok.Pos.Line, prev.Pos.Line, endline)
if p.tok.Pos.Line == prev.Pos.Line {
// The comment is on same line as the previous token; it
// cannot be a lead comment but may be a line comment.
comment, endline = p.consumeCommentGroup(0)
if p.tok.Pos.Line != endline {
// The next token is on a different line, thus
// the last comment group is a line comment.
p.lineComment = comment
}
}
// consume successor comments, if any
endline = -1
for p.tok.Type == token.COMMENT {
comment, endline = p.consumeCommentGroup(1)
}
if endline+1 == p.tok.Pos.Line && p.tok.Type != token.RBRACE {
switch p.tok.Type {
case token.RBRACE, token.RBRACK:
// Do not count for these cases
default:
// The next token is following on the line immediately after the
// comment group, thus the last comment group is a lead comment.
p.leadComment = comment
}
}
}
return p.tok
}
// unscan pushes the previously read token back onto the buffer.
func (p *Parser) unscan() {
p.n = 1
}
// ----------------------------------------------------------------------------
// Parsing support
func (p *Parser) printTrace(a ...interface{}) {
if !p.enableTrace {
return
}
const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . "
const n = len(dots)
fmt.Printf("%5d:%3d: ", p.tok.Pos.Line, p.tok.Pos.Column)
i := 2 * p.indent
for i > n {
fmt.Print(dots)
i -= n
}
// i <= n
fmt.Print(dots[0:i])
fmt.Println(a...)
}
func trace(p *Parser, msg string) *Parser {
p.printTrace(msg, "(")
p.indent++
return p
}
// Usage pattern: defer un(trace(p, "..."))
func un(p *Parser) {
p.indent--
p.printTrace(")")
}

@ -1,652 +0,0 @@
// Package scanner implements a scanner for HCL (HashiCorp Configuration
// Language) source text.
package scanner
import (
"bytes"
"fmt"
"os"
"regexp"
"unicode"
"unicode/utf8"
"github.com/hashicorp/hcl/hcl/token"
)
// eof represents a marker rune for the end of the reader.
const eof = rune(0)
// Scanner defines a lexical scanner
type Scanner struct {
buf *bytes.Buffer // Source buffer for advancing and scanning
src []byte // Source buffer for immutable access
// Source Position
srcPos token.Pos // current position
prevPos token.Pos // previous position, used for peek() method
lastCharLen int // length of last character in bytes
lastLineLen int // length of last line in characters (for correct column reporting)
tokStart int // token text start position
tokEnd int // token text end position
// Error is called for each error encountered. If no Error
// function is set, the error is reported to os.Stderr.
Error func(pos token.Pos, msg string)
// ErrorCount is incremented by one for each error encountered.
ErrorCount int
// tokPos is the start position of most recently scanned token; set by
// Scan. The Filename field is always left untouched by the Scanner. If
// an error is reported (via Error) and Position is invalid, the scanner is
// not inside a token.
tokPos token.Pos
}
// New creates and initializes a new instance of Scanner using src as
// its source content.
func New(src []byte) *Scanner {
// even though we accept a src, we read from a io.Reader compatible type
// (*bytes.Buffer). So in the future we might easily change it to streaming
// read.
b := bytes.NewBuffer(src)
s := &Scanner{
buf: b,
src: src,
}
// srcPosition always starts with 1
s.srcPos.Line = 1
return s
}
// next reads the next rune from the bufferred reader. Returns the rune(0) if
// an error occurs (or io.EOF is returned).
func (s *Scanner) next() rune {
ch, size, err := s.buf.ReadRune()
if err != nil {
// advance for error reporting
s.srcPos.Column++
s.srcPos.Offset += size
s.lastCharLen = size
return eof
}
// remember last position
s.prevPos = s.srcPos
s.srcPos.Column++
s.lastCharLen = size
s.srcPos.Offset += size
if ch == utf8.RuneError && size == 1 {
s.err("illegal UTF-8 encoding")
return ch
}
if ch == '\n' {
s.srcPos.Line++
s.lastLineLen = s.srcPos.Column
s.srcPos.Column = 0
}
if ch == '\x00' {
s.err("unexpected null character (0x00)")
return eof
}
if ch == '\uE123' {
s.err("unicode code point U+E123 reserved for internal use")
return utf8.RuneError
}
// debug
// fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
return ch
}
// unread unreads the previous read Rune and updates the source position
func (s *Scanner) unread() {
if err := s.buf.UnreadRune(); err != nil {
panic(err) // this is user fault, we should catch it
}
s.srcPos = s.prevPos // put back last position
}
// peek returns the next rune without advancing the reader.
func (s *Scanner) peek() rune {
peek, _, err := s.buf.ReadRune()
if err != nil {
return eof
}
s.buf.UnreadRune()
return peek
}
// Scan scans the next token and returns the token.
func (s *Scanner) Scan() token.Token {
ch := s.next()
// skip white space
for isWhitespace(ch) {
ch = s.next()
}
var tok token.Type
// token text markings
s.tokStart = s.srcPos.Offset - s.lastCharLen
// token position, initial next() is moving the offset by one(size of rune
// actually), though we are interested with the starting point
s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen
if s.srcPos.Column > 0 {
// common case: last character was not a '\n'
s.tokPos.Line = s.srcPos.Line
s.tokPos.Column = s.srcPos.Column
} else {
// last character was a '\n'
// (we cannot be at the beginning of the source
// since we have called next() at least once)
s.tokPos.Line = s.srcPos.Line - 1
s.tokPos.Column = s.lastLineLen
}
switch {
case isLetter(ch):
tok = token.IDENT
lit := s.scanIdentifier()
if lit == "true" || lit == "false" {
tok = token.BOOL
}
case isDecimal(ch):
tok = s.scanNumber(ch)
default:
switch ch {
case eof:
tok = token.EOF
case '"':
tok = token.STRING
s.scanString()
case '#', '/':
tok = token.COMMENT
s.scanComment(ch)
case '.':
tok = token.PERIOD
ch = s.peek()
if isDecimal(ch) {
tok = token.FLOAT
ch = s.scanMantissa(ch)
ch = s.scanExponent(ch)
}
case '<':
tok = token.HEREDOC
s.scanHeredoc()
case '[':
tok = token.LBRACK
case ']':
tok = token.RBRACK
case '{':
tok = token.LBRACE
case '}':
tok = token.RBRACE
case ',':
tok = token.COMMA
case '=':
tok = token.ASSIGN
case '+':
tok = token.ADD
case '-':
if isDecimal(s.peek()) {
ch := s.next()
tok = s.scanNumber(ch)
} else {
tok = token.SUB
}
default:
s.err("illegal char")
}
}
// finish token ending
s.tokEnd = s.srcPos.Offset
// create token literal
var tokenText string
if s.tokStart >= 0 {
tokenText = string(s.src[s.tokStart:s.tokEnd])
}
s.tokStart = s.tokEnd // ensure idempotency of tokenText() call
return token.Token{
Type: tok,
Pos: s.tokPos,
Text: tokenText,
}
}
func (s *Scanner) scanComment(ch rune) {
// single line comments
if ch == '#' || (ch == '/' && s.peek() != '*') {
if ch == '/' && s.peek() != '/' {
s.err("expected '/' for comment")
return
}
ch = s.next()
for ch != '\n' && ch >= 0 && ch != eof {
ch = s.next()
}
if ch != eof && ch >= 0 {
s.unread()
}
return
}
// be sure we get the character after /* This allows us to find comment's
// that are not erminated
if ch == '/' {
s.next()
ch = s.next() // read character after "/*"
}
// look for /* - style comments
for {
if ch < 0 || ch == eof {
s.err("comment not terminated")
break
}
ch0 := ch
ch = s.next()
if ch0 == '*' && ch == '/' {
break
}
}
}
// scanNumber scans a HCL number definition starting with the given rune
func (s *Scanner) scanNumber(ch rune) token.Type {
if ch == '0' {
// check for hexadecimal, octal or float
ch = s.next()
if ch == 'x' || ch == 'X' {
// hexadecimal
ch = s.next()
found := false
for isHexadecimal(ch) {
ch = s.next()
found = true
}
if !found {
s.err("illegal hexadecimal number")
}
if ch != eof {
s.unread()
}
return token.NUMBER
}
// now it's either something like: 0421(octal) or 0.1231(float)
illegalOctal := false
for isDecimal(ch) {
ch = s.next()
if ch == '8' || ch == '9' {
// this is just a possibility. For example 0159 is illegal, but
// 0159.23 is valid. So we mark a possible illegal octal. If
// the next character is not a period, we'll print the error.
illegalOctal = true
}
}
if ch == 'e' || ch == 'E' {
ch = s.scanExponent(ch)
return token.FLOAT
}
if ch == '.' {
ch = s.scanFraction(ch)
if ch == 'e' || ch == 'E' {
ch = s.next()
ch = s.scanExponent(ch)
}
return token.FLOAT
}
if illegalOctal {
s.err("illegal octal number")
}
if ch != eof {
s.unread()
}
return token.NUMBER
}
s.scanMantissa(ch)
ch = s.next() // seek forward
if ch == 'e' || ch == 'E' {
ch = s.scanExponent(ch)
return token.FLOAT
}
if ch == '.' {
ch = s.scanFraction(ch)
if ch == 'e' || ch == 'E' {
ch = s.next()
ch = s.scanExponent(ch)
}
return token.FLOAT
}
if ch != eof {
s.unread()
}
return token.NUMBER
}
// scanMantissa scans the mantissa beginning from the rune. It returns the next
// non decimal rune. It's used to determine wheter it's a fraction or exponent.
func (s *Scanner) scanMantissa(ch rune) rune {
scanned := false
for isDecimal(ch) {
ch = s.next()
scanned = true
}
if scanned && ch != eof {
s.unread()
}
return ch
}
// scanFraction scans the fraction after the '.' rune
func (s *Scanner) scanFraction(ch rune) rune {
if ch == '.' {
ch = s.peek() // we peek just to see if we can move forward
ch = s.scanMantissa(ch)
}
return ch
}
// scanExponent scans the remaining parts of an exponent after the 'e' or 'E'
// rune.
func (s *Scanner) scanExponent(ch rune) rune {
if ch == 'e' || ch == 'E' {
ch = s.next()
if ch == '-' || ch == '+' {
ch = s.next()
}
ch = s.scanMantissa(ch)
}
return ch
}
// scanHeredoc scans a heredoc string
func (s *Scanner) scanHeredoc() {
// Scan the second '<' in example: '<<EOF'
if s.next() != '<' {
s.err("heredoc expected second '<', didn't see it")
return
}
// Get the original offset so we can read just the heredoc ident
offs := s.srcPos.Offset
// Scan the identifier
ch := s.next()
// Indented heredoc syntax
if ch == '-' {
ch = s.next()
}
for isLetter(ch) || isDigit(ch) {
ch = s.next()
}
// If we reached an EOF then that is not good
if ch == eof {
s.err("heredoc not terminated")
return
}
// Ignore the '\r' in Windows line endings
if ch == '\r' {
if s.peek() == '\n' {
ch = s.next()
}
}
// If we didn't reach a newline then that is also not good
if ch != '\n' {
s.err("invalid characters in heredoc anchor")
return
}
// Read the identifier
identBytes := s.src[offs : s.srcPos.Offset-s.lastCharLen]
if len(identBytes) == 0 || (len(identBytes) == 1 && identBytes[0] == '-') {
s.err("zero-length heredoc anchor")
return
}
var identRegexp *regexp.Regexp
if identBytes[0] == '-' {
identRegexp = regexp.MustCompile(fmt.Sprintf(`^[[:space:]]*%s\r*\z`, identBytes[1:]))
} else {
identRegexp = regexp.MustCompile(fmt.Sprintf(`^[[:space:]]*%s\r*\z`, identBytes))
}
// Read the actual string value
lineStart := s.srcPos.Offset
for {
ch := s.next()
// Special newline handling.
if ch == '\n' {
// Math is fast, so we first compare the byte counts to see if we have a chance
// of seeing the same identifier - if the length is less than the number of bytes
// in the identifier, this cannot be a valid terminator.
lineBytesLen := s.srcPos.Offset - s.lastCharLen - lineStart
if lineBytesLen >= len(identBytes) && identRegexp.Match(s.src[lineStart:s.srcPos.Offset-s.lastCharLen]) {
break
}
// Not an anchor match, record the start of a new line
lineStart = s.srcPos.Offset
}
if ch == eof {
s.err("heredoc not terminated")
return
}
}
return
}
// scanString scans a quoted string
func (s *Scanner) scanString() {
braces := 0
for {
// '"' opening already consumed
// read character after quote
ch := s.next()
if (ch == '\n' && braces == 0) || ch < 0 || ch == eof {
s.err("literal not terminated")
return
}
if ch == '"' && braces == 0 {
break
}
// If we're going into a ${} then we can ignore quotes for awhile
if braces == 0 && ch == '$' && s.peek() == '{' {
braces++
s.next()
} else if braces > 0 && ch == '{' {
braces++
}
if braces > 0 && ch == '}' {
braces--
}
if ch == '\\' {
s.scanEscape()
}
}
return
}
// scanEscape scans an escape sequence
func (s *Scanner) scanEscape() rune {
// http://en.cppreference.com/w/cpp/language/escape
ch := s.next() // read character after '/'
switch ch {
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
// nothing to do
case '0', '1', '2', '3', '4', '5', '6', '7':
// octal notation
ch = s.scanDigits(ch, 8, 3)
case 'x':
// hexademical notation
ch = s.scanDigits(s.next(), 16, 2)
case 'u':
// universal character name
ch = s.scanDigits(s.next(), 16, 4)
case 'U':
// universal character name
ch = s.scanDigits(s.next(), 16, 8)
default:
s.err("illegal char escape")
}
return ch
}
// scanDigits scans a rune with the given base for n times. For example an
// octal notation \184 would yield in scanDigits(ch, 8, 3)
func (s *Scanner) scanDigits(ch rune, base, n int) rune {
start := n
for n > 0 && digitVal(ch) < base {
ch = s.next()
if ch == eof {
// If we see an EOF, we halt any more scanning of digits
// immediately.
break
}
n--
}
if n > 0 {
s.err("illegal char escape")
}
if n != start && ch != eof {
// we scanned all digits, put the last non digit char back,
// only if we read anything at all
s.unread()
}
return ch
}
// scanIdentifier scans an identifier and returns the literal string
func (s *Scanner) scanIdentifier() string {
offs := s.srcPos.Offset - s.lastCharLen
ch := s.next()
for isLetter(ch) || isDigit(ch) || ch == '-' || ch == '.' {
ch = s.next()
}
if ch != eof {
s.unread() // we got identifier, put back latest char
}
return string(s.src[offs:s.srcPos.Offset])
}
// recentPosition returns the position of the character immediately after the
// character or token returned by the last call to Scan.
func (s *Scanner) recentPosition() (pos token.Pos) {
pos.Offset = s.srcPos.Offset - s.lastCharLen
switch {
case s.srcPos.Column > 0:
// common case: last character was not a '\n'
pos.Line = s.srcPos.Line
pos.Column = s.srcPos.Column
case s.lastLineLen > 0:
// last character was a '\n'
// (we cannot be at the beginning of the source
// since we have called next() at least once)
pos.Line = s.srcPos.Line - 1
pos.Column = s.lastLineLen
default:
// at the beginning of the source
pos.Line = 1
pos.Column = 1
}
return
}
// err prints the error of any scanning to s.Error function. If the function is
// not defined, by default it prints them to os.Stderr
func (s *Scanner) err(msg string) {
s.ErrorCount++
pos := s.recentPosition()
if s.Error != nil {
s.Error(pos, msg)
return
}
fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
}
// isHexadecimal returns true if the given rune is a letter
func isLetter(ch rune) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
}
// isDigit returns true if the given rune is a decimal digit
func isDigit(ch rune) bool {
return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
}
// isDecimal returns true if the given rune is a decimal number
func isDecimal(ch rune) bool {
return '0' <= ch && ch <= '9'
}
// isHexadecimal returns true if the given rune is an hexadecimal number
func isHexadecimal(ch rune) bool {
return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
}
// isWhitespace returns true if the rune is a space, tab, newline or carriage return
func isWhitespace(ch rune) bool {
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
}
// digitVal returns the integer value of a given octal,decimal or hexadecimal rune
func digitVal(ch rune) int {
switch {
case '0' <= ch && ch <= '9':
return int(ch - '0')
case 'a' <= ch && ch <= 'f':
return int(ch - 'a' + 10)
case 'A' <= ch && ch <= 'F':
return int(ch - 'A' + 10)
}
return 16 // larger than any legal digit val
}

@ -1,241 +0,0 @@
package strconv
import (
"errors"
"unicode/utf8"
)
// ErrSyntax indicates that a value does not have the right syntax for the target type.
var ErrSyntax = errors.New("invalid syntax")
// Unquote interprets s as a single-quoted, double-quoted,
// or backquoted Go string literal, returning the string value
// that s quotes. (If s is single-quoted, it would be a Go
// character literal; Unquote returns the corresponding
// one-character string.)
func Unquote(s string) (t string, err error) {
n := len(s)
if n < 2 {
return "", ErrSyntax
}
quote := s[0]
if quote != s[n-1] {
return "", ErrSyntax
}
s = s[1 : n-1]
if quote != '"' {
return "", ErrSyntax
}
if !contains(s, '$') && !contains(s, '{') && contains(s, '\n') {
return "", ErrSyntax
}
// Is it trivial? Avoid allocation.
if !contains(s, '\\') && !contains(s, quote) && !contains(s, '$') {
switch quote {
case '"':
return s, nil
case '\'':
r, size := utf8.DecodeRuneInString(s)
if size == len(s) && (r != utf8.RuneError || size != 1) {
return s, nil
}
}
}
var runeTmp [utf8.UTFMax]byte
buf := make([]byte, 0, 3*len(s)/2) // Try to avoid more allocations.
for len(s) > 0 {
// If we're starting a '${}' then let it through un-unquoted.
// Specifically: we don't unquote any characters within the `${}`
// section.
if s[0] == '$' && len(s) > 1 && s[1] == '{' {
buf = append(buf, '$', '{')
s = s[2:]
// Continue reading until we find the closing brace, copying as-is
braces := 1
for len(s) > 0 && braces > 0 {
r, size := utf8.DecodeRuneInString(s)
if r == utf8.RuneError {
return "", ErrSyntax
}
s = s[size:]
n := utf8.EncodeRune(runeTmp[:], r)
buf = append(buf, runeTmp[:n]...)
switch r {
case '{':
braces++
case '}':
braces--
}
}
if braces != 0 {
return "", ErrSyntax
}
if len(s) == 0 {
// If there's no string left, we're done!
break
} else {
// If there's more left, we need to pop back up to the top of the loop
// in case there's another interpolation in this string.
continue
}
}
if s[0] == '\n' {
return "", ErrSyntax
}
c, multibyte, ss, err := unquoteChar(s, quote)
if err != nil {
return "", err
}
s = ss
if c < utf8.RuneSelf || !multibyte {
buf = append(buf, byte(c))
} else {
n := utf8.EncodeRune(runeTmp[:], c)
buf = append(buf, runeTmp[:n]...)
}
if quote == '\'' && len(s) != 0 {
// single-quoted must be single character
return "", ErrSyntax
}
}
return string(buf), nil
}
// contains reports whether the string contains the byte c.
func contains(s string, c byte) bool {
for i := 0; i < len(s); i++ {
if s[i] == c {
return true
}
}
return false
}
func unhex(b byte) (v rune, ok bool) {
c := rune(b)
switch {
case '0' <= c && c <= '9':
return c - '0', true
case 'a' <= c && c <= 'f':
return c - 'a' + 10, true
case 'A' <= c && c <= 'F':
return c - 'A' + 10, true
}
return
}
func unquoteChar(s string, quote byte) (value rune, multibyte bool, tail string, err error) {
// easy cases
switch c := s[0]; {
case c == quote && (quote == '\'' || quote == '"'):
err = ErrSyntax
return
case c >= utf8.RuneSelf:
r, size := utf8.DecodeRuneInString(s)
return r, true, s[size:], nil
case c != '\\':
return rune(s[0]), false, s[1:], nil
}
// hard case: c is backslash
if len(s) <= 1 {
err = ErrSyntax
return
}
c := s[1]
s = s[2:]
switch c {
case 'a':
value = '\a'
case 'b':
value = '\b'
case 'f':
value = '\f'
case 'n':
value = '\n'
case 'r':
value = '\r'
case 't':
value = '\t'
case 'v':
value = '\v'
case 'x', 'u', 'U':
n := 0
switch c {
case 'x':
n = 2
case 'u':
n = 4
case 'U':
n = 8
}
var v rune
if len(s) < n {
err = ErrSyntax
return
}
for j := 0; j < n; j++ {
x, ok := unhex(s[j])
if !ok {
err = ErrSyntax
return
}
v = v<<4 | x
}
s = s[n:]
if c == 'x' {
// single-byte string, possibly not UTF-8
value = v
break
}
if v > utf8.MaxRune {
err = ErrSyntax
return
}
value = v
multibyte = true
case '0', '1', '2', '3', '4', '5', '6', '7':
v := rune(c) - '0'
if len(s) < 2 {
err = ErrSyntax
return
}
for j := 0; j < 2; j++ { // one digit already; two more
x := rune(s[j]) - '0'
if x < 0 || x > 7 {
err = ErrSyntax
return
}
v = (v << 3) | x
}
s = s[2:]
if v > 255 {
err = ErrSyntax
return
}
value = v
case '\\':
value = '\\'
case '\'', '"':
if c != quote {
err = ErrSyntax
return
}
value = rune(c)
default:
err = ErrSyntax
return
}
tail = s
return
}

@ -1,46 +0,0 @@
package token
import "fmt"
// Pos describes an arbitrary source position
// including the file, line, and column location.
// A Position is valid if the line number is > 0.
type Pos struct {
Filename string // filename, if any
Offset int // offset, starting at 0
Line int // line number, starting at 1
Column int // column number, starting at 1 (character count)
}
// IsValid returns true if the position is valid.
func (p *Pos) IsValid() bool { return p.Line > 0 }
// String returns a string in one of several forms:
//
// file:line:column valid position with file name
// line:column valid position without file name
// file invalid position with file name
// - invalid position without file name
func (p Pos) String() string {
s := p.Filename
if p.IsValid() {
if s != "" {
s += ":"
}
s += fmt.Sprintf("%d:%d", p.Line, p.Column)
}
if s == "" {
s = "-"
}
return s
}
// Before reports whether the position p is before u.
func (p Pos) Before(u Pos) bool {
return u.Offset > p.Offset || u.Line > p.Line
}
// After reports whether the position p is after u.
func (p Pos) After(u Pos) bool {
return u.Offset < p.Offset || u.Line < p.Line
}

@ -1,219 +0,0 @@
// Package token defines constants representing the lexical tokens for HCL
// (HashiCorp Configuration Language)
package token
import (
"fmt"
"strconv"
"strings"
hclstrconv "github.com/hashicorp/hcl/hcl/strconv"
)
// Token defines a single HCL token which can be obtained via the Scanner
type Token struct {
Type Type
Pos Pos
Text string
JSON bool
}
// Type is the set of lexical tokens of the HCL (HashiCorp Configuration Language)
type Type int
const (
// Special tokens
ILLEGAL Type = iota
EOF
COMMENT
identifier_beg
IDENT // literals
literal_beg
NUMBER // 12345
FLOAT // 123.45
BOOL // true,false
STRING // "abc"
HEREDOC // <<FOO\nbar\nFOO
literal_end
identifier_end
operator_beg
LBRACK // [
LBRACE // {
COMMA // ,
PERIOD // .
RBRACK // ]
RBRACE // }
ASSIGN // =
ADD // +
SUB // -
operator_end
)
var tokens = [...]string{
ILLEGAL: "ILLEGAL",
EOF: "EOF",
COMMENT: "COMMENT",
IDENT: "IDENT",
NUMBER: "NUMBER",
FLOAT: "FLOAT",
BOOL: "BOOL",
STRING: "STRING",
LBRACK: "LBRACK",
LBRACE: "LBRACE",
COMMA: "COMMA",
PERIOD: "PERIOD",
HEREDOC: "HEREDOC",
RBRACK: "RBRACK",
RBRACE: "RBRACE",
ASSIGN: "ASSIGN",
ADD: "ADD",
SUB: "SUB",
}
// String returns the string corresponding to the token tok.
func (t Type) String() string {
s := ""
if 0 <= t && t < Type(len(tokens)) {
s = tokens[t]
}
if s == "" {
s = "token(" + strconv.Itoa(int(t)) + ")"
}
return s
}
// IsIdentifier returns true for tokens corresponding to identifiers and basic
// type literals; it returns false otherwise.
func (t Type) IsIdentifier() bool { return identifier_beg < t && t < identifier_end }
// IsLiteral returns true for tokens corresponding to basic type literals; it
// returns false otherwise.
func (t Type) IsLiteral() bool { return literal_beg < t && t < literal_end }
// IsOperator returns true for tokens corresponding to operators and
// delimiters; it returns false otherwise.
func (t Type) IsOperator() bool { return operator_beg < t && t < operator_end }
// String returns the token's literal text. Note that this is only
// applicable for certain token types, such as token.IDENT,
// token.STRING, etc..
func (t Token) String() string {
return fmt.Sprintf("%s %s %s", t.Pos.String(), t.Type.String(), t.Text)
}
// Value returns the properly typed value for this token. The type of
// the returned interface{} is guaranteed based on the Type field.
//
// This can only be called for literal types. If it is called for any other
// type, this will panic.
func (t Token) Value() interface{} {
switch t.Type {
case BOOL:
if t.Text == "true" {
return true
} else if t.Text == "false" {
return false
}
panic("unknown bool value: " + t.Text)
case FLOAT:
v, err := strconv.ParseFloat(t.Text, 64)
if err != nil {
panic(err)
}
return float64(v)
case NUMBER:
v, err := strconv.ParseInt(t.Text, 0, 64)
if err != nil {
panic(err)
}
return int64(v)
case IDENT:
return t.Text
case HEREDOC:
return unindentHeredoc(t.Text)
case STRING:
// Determine the Unquote method to use. If it came from JSON,
// then we need to use the built-in unquote since we have to
// escape interpolations there.
f := hclstrconv.Unquote
if t.JSON {
f = strconv.Unquote
}
// This case occurs if json null is used
if t.Text == "" {
return ""
}
v, err := f(t.Text)
if err != nil {
panic(fmt.Sprintf("unquote %s err: %s", t.Text, err))
}
return v
default:
panic(fmt.Sprintf("unimplemented Value for type: %s", t.Type))
}
}
// unindentHeredoc returns the string content of a HEREDOC if it is started with <<
// and the content of a HEREDOC with the hanging indent removed if it is started with
// a <<-, and the terminating line is at least as indented as the least indented line.
func unindentHeredoc(heredoc string) string {
// We need to find the end of the marker
idx := strings.IndexByte(heredoc, '\n')
if idx == -1 {
panic("heredoc doesn't contain newline")
}
unindent := heredoc[2] == '-'
// We can optimize if the heredoc isn't marked for indentation
if !unindent {
return string(heredoc[idx+1 : len(heredoc)-idx+1])
}
// We need to unindent each line based on the indentation level of the marker
lines := strings.Split(string(heredoc[idx+1:len(heredoc)-idx+2]), "\n")
whitespacePrefix := lines[len(lines)-1]
isIndented := true
for _, v := range lines {
if strings.HasPrefix(v, whitespacePrefix) {
continue
}
isIndented = false
break
}
// If all lines are not at least as indented as the terminating mark, return the
// heredoc as is, but trim the leading space from the marker on the final line.
if !isIndented {
return strings.TrimRight(string(heredoc[idx+1:len(heredoc)-idx+1]), " \t")
}
unindentedLines := make([]string, len(lines))
for k, v := range lines {
if k == len(lines)-1 {
unindentedLines[k] = ""
break
}
unindentedLines[k] = strings.TrimPrefix(v, whitespacePrefix)
}
return strings.Join(unindentedLines, "\n")
}

@ -1,117 +0,0 @@
package parser
import "github.com/hashicorp/hcl/hcl/ast"
// flattenObjects takes an AST node, walks it, and flattens
func flattenObjects(node ast.Node) {
ast.Walk(node, func(n ast.Node) (ast.Node, bool) {
// We only care about lists, because this is what we modify
list, ok := n.(*ast.ObjectList)
if !ok {
return n, true
}
// Rebuild the item list
items := make([]*ast.ObjectItem, 0, len(list.Items))
frontier := make([]*ast.ObjectItem, len(list.Items))
copy(frontier, list.Items)
for len(frontier) > 0 {
// Pop the current item
n := len(frontier)
item := frontier[n-1]
frontier = frontier[:n-1]
switch v := item.Val.(type) {
case *ast.ObjectType:
items, frontier = flattenObjectType(v, item, items, frontier)
case *ast.ListType:
items, frontier = flattenListType(v, item, items, frontier)
default:
items = append(items, item)
}
}
// Reverse the list since the frontier model runs things backwards
for i := len(items)/2 - 1; i >= 0; i-- {
opp := len(items) - 1 - i
items[i], items[opp] = items[opp], items[i]
}
// Done! Set the original items
list.Items = items
return n, true
})
}
func flattenListType(
ot *ast.ListType,
item *ast.ObjectItem,
items []*ast.ObjectItem,
frontier []*ast.ObjectItem) ([]*ast.ObjectItem, []*ast.ObjectItem) {
// If the list is empty, keep the original list
if len(ot.List) == 0 {
items = append(items, item)
return items, frontier
}
// All the elements of this object must also be objects!
for _, subitem := range ot.List {
if _, ok := subitem.(*ast.ObjectType); !ok {
items = append(items, item)
return items, frontier
}
}
// Great! We have a match go through all the items and flatten
for _, elem := range ot.List {
// Add it to the frontier so that we can recurse
frontier = append(frontier, &ast.ObjectItem{
Keys: item.Keys,
Assign: item.Assign,
Val: elem,
LeadComment: item.LeadComment,
LineComment: item.LineComment,
})
}
return items, frontier
}
func flattenObjectType(
ot *ast.ObjectType,
item *ast.ObjectItem,
items []*ast.ObjectItem,
frontier []*ast.ObjectItem) ([]*ast.ObjectItem, []*ast.ObjectItem) {
// If the list has no items we do not have to flatten anything
if ot.List.Items == nil {
items = append(items, item)
return items, frontier
}
// All the elements of this object must also be objects!
for _, subitem := range ot.List.Items {
if _, ok := subitem.Val.(*ast.ObjectType); !ok {
items = append(items, item)
return items, frontier
}
}
// Great! We have a match go through all the items and flatten
for _, subitem := range ot.List.Items {
// Copy the new key
keys := make([]*ast.ObjectKey, len(item.Keys)+len(subitem.Keys))
copy(keys, item.Keys)
copy(keys[len(item.Keys):], subitem.Keys)
// Add it to the frontier so that we can recurse
frontier = append(frontier, &ast.ObjectItem{
Keys: keys,
Assign: item.Assign,
Val: subitem.Val,
LeadComment: item.LeadComment,
LineComment: item.LineComment,
})
}
return items, frontier
}

@ -1,313 +0,0 @@
package parser
import (
"errors"
"fmt"
"github.com/hashicorp/hcl/hcl/ast"
hcltoken "github.com/hashicorp/hcl/hcl/token"
"github.com/hashicorp/hcl/json/scanner"
"github.com/hashicorp/hcl/json/token"
)
type Parser struct {
sc *scanner.Scanner
// Last read token
tok token.Token
commaPrev token.Token
enableTrace bool
indent int
n int // buffer size (max = 1)
}
func newParser(src []byte) *Parser {
return &Parser{
sc: scanner.New(src),
}
}
// Parse returns the fully parsed source and returns the abstract syntax tree.
func Parse(src []byte) (*ast.File, error) {
p := newParser(src)
return p.Parse()
}
var errEofToken = errors.New("EOF token found")
// Parse returns the fully parsed source and returns the abstract syntax tree.
func (p *Parser) Parse() (*ast.File, error) {
f := &ast.File{}
var err, scerr error
p.sc.Error = func(pos token.Pos, msg string) {
scerr = fmt.Errorf("%s: %s", pos, msg)
}
// The root must be an object in JSON
object, err := p.object()
if scerr != nil {
return nil, scerr
}
if err != nil {
return nil, err
}
// We make our final node an object list so it is more HCL compatible
f.Node = object.List
// Flatten it, which finds patterns and turns them into more HCL-like
// AST trees.
flattenObjects(f.Node)
return f, nil
}
func (p *Parser) objectList() (*ast.ObjectList, error) {
defer un(trace(p, "ParseObjectList"))
node := &ast.ObjectList{}
for {
n, err := p.objectItem()
if err == errEofToken {
break // we are finished
}
// we don't return a nil node, because might want to use already
// collected items.
if err != nil {
return node, err
}
node.Add(n)
// Check for a followup comma. If it isn't a comma, then we're done
if tok := p.scan(); tok.Type != token.COMMA {
break
}
}
return node, nil
}
// objectItem parses a single object item
func (p *Parser) objectItem() (*ast.ObjectItem, error) {
defer un(trace(p, "ParseObjectItem"))
keys, err := p.objectKey()
if err != nil {
return nil, err
}
o := &ast.ObjectItem{
Keys: keys,
}
switch p.tok.Type {
case token.COLON:
pos := p.tok.Pos
o.Assign = hcltoken.Pos{
Filename: pos.Filename,
Offset: pos.Offset,
Line: pos.Line,
Column: pos.Column,
}
o.Val, err = p.objectValue()
if err != nil {
return nil, err
}
}
return o, nil
}
// objectKey parses an object key and returns a ObjectKey AST
func (p *Parser) objectKey() ([]*ast.ObjectKey, error) {
keyCount := 0
keys := make([]*ast.ObjectKey, 0)
for {
tok := p.scan()
switch tok.Type {
case token.EOF:
return nil, errEofToken
case token.STRING:
keyCount++
keys = append(keys, &ast.ObjectKey{
Token: p.tok.HCLToken(),
})
case token.COLON:
// If we have a zero keycount it means that we never got
// an object key, i.e. `{ :`. This is a syntax error.
if keyCount == 0 {
return nil, fmt.Errorf("expected: STRING got: %s", p.tok.Type)
}
// Done
return keys, nil
case token.ILLEGAL:
return nil, errors.New("illegal")
default:
return nil, fmt.Errorf("expected: STRING got: %s", p.tok.Type)
}
}
}
// object parses any type of object, such as number, bool, string, object or
// list.
func (p *Parser) objectValue() (ast.Node, error) {
defer un(trace(p, "ParseObjectValue"))
tok := p.scan()
switch tok.Type {
case token.NUMBER, token.FLOAT, token.BOOL, token.NULL, token.STRING:
return p.literalType()
case token.LBRACE:
return p.objectType()
case token.LBRACK:
return p.listType()
case token.EOF:
return nil, errEofToken
}
return nil, fmt.Errorf("Expected object value, got unknown token: %+v", tok)
}
// object parses any type of object, such as number, bool, string, object or
// list.
func (p *Parser) object() (*ast.ObjectType, error) {
defer un(trace(p, "ParseType"))
tok := p.scan()
switch tok.Type {
case token.LBRACE:
return p.objectType()
case token.EOF:
return nil, errEofToken
}
return nil, fmt.Errorf("Expected object, got unknown token: %+v", tok)
}
// objectType parses an object type and returns a ObjectType AST
func (p *Parser) objectType() (*ast.ObjectType, error) {
defer un(trace(p, "ParseObjectType"))
// we assume that the currently scanned token is a LBRACE
o := &ast.ObjectType{}
l, err := p.objectList()
// if we hit RBRACE, we are good to go (means we parsed all Items), if it's
// not a RBRACE, it's an syntax error and we just return it.
if err != nil && p.tok.Type != token.RBRACE {
return nil, err
}
o.List = l
return o, nil
}
// listType parses a list type and returns a ListType AST
func (p *Parser) listType() (*ast.ListType, error) {
defer un(trace(p, "ParseListType"))
// we assume that the currently scanned token is a LBRACK
l := &ast.ListType{}
for {
tok := p.scan()
switch tok.Type {
case token.NUMBER, token.FLOAT, token.STRING:
node, err := p.literalType()
if err != nil {
return nil, err
}
l.Add(node)
case token.COMMA:
continue
case token.LBRACE:
node, err := p.objectType()
if err != nil {
return nil, err
}
l.Add(node)
case token.BOOL:
// TODO(arslan) should we support? not supported by HCL yet
case token.LBRACK:
// TODO(arslan) should we support nested lists? Even though it's
// written in README of HCL, it's not a part of the grammar
// (not defined in parse.y)
case token.RBRACK:
// finished
return l, nil
default:
return nil, fmt.Errorf("unexpected token while parsing list: %s", tok.Type)
}
}
}
// literalType parses a literal type and returns a LiteralType AST
func (p *Parser) literalType() (*ast.LiteralType, error) {
defer un(trace(p, "ParseLiteral"))
return &ast.LiteralType{
Token: p.tok.HCLToken(),
}, nil
}
// scan returns the next token from the underlying scanner. If a token has
// been unscanned then read that instead.
func (p *Parser) scan() token.Token {
// If we have a token on the buffer, then return it.
if p.n != 0 {
p.n = 0
return p.tok
}
p.tok = p.sc.Scan()
return p.tok
}
// unscan pushes the previously read token back onto the buffer.
func (p *Parser) unscan() {
p.n = 1
}
// ----------------------------------------------------------------------------
// Parsing support
func (p *Parser) printTrace(a ...interface{}) {
if !p.enableTrace {
return
}
const dots = ". . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . "
const n = len(dots)
fmt.Printf("%5d:%3d: ", p.tok.Pos.Line, p.tok.Pos.Column)
i := 2 * p.indent
for i > n {
fmt.Print(dots)
i -= n
}
// i <= n
fmt.Print(dots[0:i])
fmt.Println(a...)
}
func trace(p *Parser, msg string) *Parser {
p.printTrace(msg, "(")
p.indent++
return p
}
// Usage pattern: defer un(trace(p, "..."))
func un(p *Parser) {
p.indent--
p.printTrace(")")
}

@ -1,451 +0,0 @@
package scanner
import (
"bytes"
"fmt"
"os"
"unicode"
"unicode/utf8"
"github.com/hashicorp/hcl/json/token"
)
// eof represents a marker rune for the end of the reader.
const eof = rune(0)
// Scanner defines a lexical scanner
type Scanner struct {
buf *bytes.Buffer // Source buffer for advancing and scanning
src []byte // Source buffer for immutable access
// Source Position
srcPos token.Pos // current position
prevPos token.Pos // previous position, used for peek() method
lastCharLen int // length of last character in bytes
lastLineLen int // length of last line in characters (for correct column reporting)
tokStart int // token text start position
tokEnd int // token text end position
// Error is called for each error encountered. If no Error
// function is set, the error is reported to os.Stderr.
Error func(pos token.Pos, msg string)
// ErrorCount is incremented by one for each error encountered.
ErrorCount int
// tokPos is the start position of most recently scanned token; set by
// Scan. The Filename field is always left untouched by the Scanner. If
// an error is reported (via Error) and Position is invalid, the scanner is
// not inside a token.
tokPos token.Pos
}
// New creates and initializes a new instance of Scanner using src as
// its source content.
func New(src []byte) *Scanner {
// even though we accept a src, we read from a io.Reader compatible type
// (*bytes.Buffer). So in the future we might easily change it to streaming
// read.
b := bytes.NewBuffer(src)
s := &Scanner{
buf: b,
src: src,
}
// srcPosition always starts with 1
s.srcPos.Line = 1
return s
}
// next reads the next rune from the bufferred reader. Returns the rune(0) if
// an error occurs (or io.EOF is returned).
func (s *Scanner) next() rune {
ch, size, err := s.buf.ReadRune()
if err != nil {
// advance for error reporting
s.srcPos.Column++
s.srcPos.Offset += size
s.lastCharLen = size
return eof
}
if ch == utf8.RuneError && size == 1 {
s.srcPos.Column++
s.srcPos.Offset += size
s.lastCharLen = size
s.err("illegal UTF-8 encoding")
return ch
}
// remember last position
s.prevPos = s.srcPos
s.srcPos.Column++
s.lastCharLen = size
s.srcPos.Offset += size
if ch == '\n' {
s.srcPos.Line++
s.lastLineLen = s.srcPos.Column
s.srcPos.Column = 0
}
// debug
// fmt.Printf("ch: %q, offset:column: %d:%d\n", ch, s.srcPos.Offset, s.srcPos.Column)
return ch
}
// unread unreads the previous read Rune and updates the source position
func (s *Scanner) unread() {
if err := s.buf.UnreadRune(); err != nil {
panic(err) // this is user fault, we should catch it
}
s.srcPos = s.prevPos // put back last position
}
// peek returns the next rune without advancing the reader.
func (s *Scanner) peek() rune {
peek, _, err := s.buf.ReadRune()
if err != nil {
return eof
}
s.buf.UnreadRune()
return peek
}
// Scan scans the next token and returns the token.
func (s *Scanner) Scan() token.Token {
ch := s.next()
// skip white space
for isWhitespace(ch) {
ch = s.next()
}
var tok token.Type
// token text markings
s.tokStart = s.srcPos.Offset - s.lastCharLen
// token position, initial next() is moving the offset by one(size of rune
// actually), though we are interested with the starting point
s.tokPos.Offset = s.srcPos.Offset - s.lastCharLen
if s.srcPos.Column > 0 {
// common case: last character was not a '\n'
s.tokPos.Line = s.srcPos.Line
s.tokPos.Column = s.srcPos.Column
} else {
// last character was a '\n'
// (we cannot be at the beginning of the source
// since we have called next() at least once)
s.tokPos.Line = s.srcPos.Line - 1
s.tokPos.Column = s.lastLineLen
}
switch {
case isLetter(ch):
lit := s.scanIdentifier()
if lit == "true" || lit == "false" {
tok = token.BOOL
} else if lit == "null" {
tok = token.NULL
} else {
s.err("illegal char")
}
case isDecimal(ch):
tok = s.scanNumber(ch)
default:
switch ch {
case eof:
tok = token.EOF
case '"':
tok = token.STRING
s.scanString()
case '.':
tok = token.PERIOD
ch = s.peek()
if isDecimal(ch) {
tok = token.FLOAT
ch = s.scanMantissa(ch)
ch = s.scanExponent(ch)
}
case '[':
tok = token.LBRACK
case ']':
tok = token.RBRACK
case '{':
tok = token.LBRACE
case '}':
tok = token.RBRACE
case ',':
tok = token.COMMA
case ':':
tok = token.COLON
case '-':
if isDecimal(s.peek()) {
ch := s.next()
tok = s.scanNumber(ch)
} else {
s.err("illegal char")
}
default:
s.err("illegal char: " + string(ch))
}
}
// finish token ending
s.tokEnd = s.srcPos.Offset
// create token literal
var tokenText string
if s.tokStart >= 0 {
tokenText = string(s.src[s.tokStart:s.tokEnd])
}
s.tokStart = s.tokEnd // ensure idempotency of tokenText() call
return token.Token{
Type: tok,
Pos: s.tokPos,
Text: tokenText,
}
}
// scanNumber scans a HCL number definition starting with the given rune
func (s *Scanner) scanNumber(ch rune) token.Type {
zero := ch == '0'
pos := s.srcPos
s.scanMantissa(ch)
ch = s.next() // seek forward
if ch == 'e' || ch == 'E' {
ch = s.scanExponent(ch)
return token.FLOAT
}
if ch == '.' {
ch = s.scanFraction(ch)
if ch == 'e' || ch == 'E' {
ch = s.next()
ch = s.scanExponent(ch)
}
return token.FLOAT
}
if ch != eof {
s.unread()
}
// If we have a larger number and this is zero, error
if zero && pos != s.srcPos {
s.err("numbers cannot start with 0")
}
return token.NUMBER
}
// scanMantissa scans the mantissa beginning from the rune. It returns the next
// non decimal rune. It's used to determine wheter it's a fraction or exponent.
func (s *Scanner) scanMantissa(ch rune) rune {
scanned := false
for isDecimal(ch) {
ch = s.next()
scanned = true
}
if scanned && ch != eof {
s.unread()
}
return ch
}
// scanFraction scans the fraction after the '.' rune
func (s *Scanner) scanFraction(ch rune) rune {
if ch == '.' {
ch = s.peek() // we peek just to see if we can move forward
ch = s.scanMantissa(ch)
}
return ch
}
// scanExponent scans the remaining parts of an exponent after the 'e' or 'E'
// rune.
func (s *Scanner) scanExponent(ch rune) rune {
if ch == 'e' || ch == 'E' {
ch = s.next()
if ch == '-' || ch == '+' {
ch = s.next()
}
ch = s.scanMantissa(ch)
}
return ch
}
// scanString scans a quoted string
func (s *Scanner) scanString() {
braces := 0
for {
// '"' opening already consumed
// read character after quote
ch := s.next()
if ch == '\n' || ch < 0 || ch == eof {
s.err("literal not terminated")
return
}
if ch == '"' {
break
}
// If we're going into a ${} then we can ignore quotes for awhile
if braces == 0 && ch == '$' && s.peek() == '{' {
braces++
s.next()
} else if braces > 0 && ch == '{' {
braces++
}
if braces > 0 && ch == '}' {
braces--
}
if ch == '\\' {
s.scanEscape()
}
}
return
}
// scanEscape scans an escape sequence
func (s *Scanner) scanEscape() rune {
// http://en.cppreference.com/w/cpp/language/escape
ch := s.next() // read character after '/'
switch ch {
case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', '"':
// nothing to do
case '0', '1', '2', '3', '4', '5', '6', '7':
// octal notation
ch = s.scanDigits(ch, 8, 3)
case 'x':
// hexademical notation
ch = s.scanDigits(s.next(), 16, 2)
case 'u':
// universal character name
ch = s.scanDigits(s.next(), 16, 4)
case 'U':
// universal character name
ch = s.scanDigits(s.next(), 16, 8)
default:
s.err("illegal char escape")
}
return ch
}
// scanDigits scans a rune with the given base for n times. For example an
// octal notation \184 would yield in scanDigits(ch, 8, 3)
func (s *Scanner) scanDigits(ch rune, base, n int) rune {
for n > 0 && digitVal(ch) < base {
ch = s.next()
n--
}
if n > 0 {
s.err("illegal char escape")
}
// we scanned all digits, put the last non digit char back
s.unread()
return ch
}
// scanIdentifier scans an identifier and returns the literal string
func (s *Scanner) scanIdentifier() string {
offs := s.srcPos.Offset - s.lastCharLen
ch := s.next()
for isLetter(ch) || isDigit(ch) || ch == '-' {
ch = s.next()
}
if ch != eof {
s.unread() // we got identifier, put back latest char
}
return string(s.src[offs:s.srcPos.Offset])
}
// recentPosition returns the position of the character immediately after the
// character or token returned by the last call to Scan.
func (s *Scanner) recentPosition() (pos token.Pos) {
pos.Offset = s.srcPos.Offset - s.lastCharLen
switch {
case s.srcPos.Column > 0:
// common case: last character was not a '\n'
pos.Line = s.srcPos.Line
pos.Column = s.srcPos.Column
case s.lastLineLen > 0:
// last character was a '\n'
// (we cannot be at the beginning of the source
// since we have called next() at least once)
pos.Line = s.srcPos.Line - 1
pos.Column = s.lastLineLen
default:
// at the beginning of the source
pos.Line = 1
pos.Column = 1
}
return
}
// err prints the error of any scanning to s.Error function. If the function is
// not defined, by default it prints them to os.Stderr
func (s *Scanner) err(msg string) {
s.ErrorCount++
pos := s.recentPosition()
if s.Error != nil {
s.Error(pos, msg)
return
}
fmt.Fprintf(os.Stderr, "%s: %s\n", pos, msg)
}
// isHexadecimal returns true if the given rune is a letter
func isLetter(ch rune) bool {
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
}
// isHexadecimal returns true if the given rune is a decimal digit
func isDigit(ch rune) bool {
return '0' <= ch && ch <= '9' || ch >= 0x80 && unicode.IsDigit(ch)
}
// isHexadecimal returns true if the given rune is a decimal number
func isDecimal(ch rune) bool {
return '0' <= ch && ch <= '9'
}
// isHexadecimal returns true if the given rune is an hexadecimal number
func isHexadecimal(ch rune) bool {
return '0' <= ch && ch <= '9' || 'a' <= ch && ch <= 'f' || 'A' <= ch && ch <= 'F'
}
// isWhitespace returns true if the rune is a space, tab, newline or carriage return
func isWhitespace(ch rune) bool {
return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r'
}
// digitVal returns the integer value of a given octal,decimal or hexadecimal rune
func digitVal(ch rune) int {
switch {
case '0' <= ch && ch <= '9':
return int(ch - '0')
case 'a' <= ch && ch <= 'f':
return int(ch - 'a' + 10)
case 'A' <= ch && ch <= 'F':
return int(ch - 'A' + 10)
}
return 16 // larger than any legal digit val
}

@ -1,46 +0,0 @@
package token
import "fmt"
// Pos describes an arbitrary source position
// including the file, line, and column location.
// A Position is valid if the line number is > 0.
type Pos struct {
Filename string // filename, if any
Offset int // offset, starting at 0
Line int // line number, starting at 1
Column int // column number, starting at 1 (character count)
}
// IsValid returns true if the position is valid.
func (p *Pos) IsValid() bool { return p.Line > 0 }
// String returns a string in one of several forms:
//
// file:line:column valid position with file name
// line:column valid position without file name
// file invalid position with file name
// - invalid position without file name
func (p Pos) String() string {
s := p.Filename
if p.IsValid() {
if s != "" {
s += ":"
}
s += fmt.Sprintf("%d:%d", p.Line, p.Column)
}
if s == "" {
s = "-"
}
return s
}
// Before reports whether the position p is before u.
func (p Pos) Before(u Pos) bool {
return u.Offset > p.Offset || u.Line > p.Line
}
// After reports whether the position p is after u.
func (p Pos) After(u Pos) bool {
return u.Offset < p.Offset || u.Line < p.Line
}

@ -1,118 +0,0 @@
package token
import (
"fmt"
"strconv"
hcltoken "github.com/hashicorp/hcl/hcl/token"
)
// Token defines a single HCL token which can be obtained via the Scanner
type Token struct {
Type Type
Pos Pos
Text string
}
// Type is the set of lexical tokens of the HCL (HashiCorp Configuration Language)
type Type int
const (
// Special tokens
ILLEGAL Type = iota
EOF
identifier_beg
literal_beg
NUMBER // 12345
FLOAT // 123.45
BOOL // true,false
STRING // "abc"
NULL // null
literal_end
identifier_end
operator_beg
LBRACK // [
LBRACE // {
COMMA // ,
PERIOD // .
COLON // :
RBRACK // ]
RBRACE // }
operator_end
)
var tokens = [...]string{
ILLEGAL: "ILLEGAL",
EOF: "EOF",
NUMBER: "NUMBER",
FLOAT: "FLOAT",
BOOL: "BOOL",
STRING: "STRING",
NULL: "NULL",
LBRACK: "LBRACK",
LBRACE: "LBRACE",
COMMA: "COMMA",
PERIOD: "PERIOD",
COLON: "COLON",
RBRACK: "RBRACK",
RBRACE: "RBRACE",
}
// String returns the string corresponding to the token tok.
func (t Type) String() string {
s := ""
if 0 <= t && t < Type(len(tokens)) {
s = tokens[t]
}
if s == "" {
s = "token(" + strconv.Itoa(int(t)) + ")"
}
return s
}
// IsIdentifier returns true for tokens corresponding to identifiers and basic
// type literals; it returns false otherwise.
func (t Type) IsIdentifier() bool { return identifier_beg < t && t < identifier_end }
// IsLiteral returns true for tokens corresponding to basic type literals; it
// returns false otherwise.
func (t Type) IsLiteral() bool { return literal_beg < t && t < literal_end }
// IsOperator returns true for tokens corresponding to operators and
// delimiters; it returns false otherwise.
func (t Type) IsOperator() bool { return operator_beg < t && t < operator_end }
// String returns the token's literal text. Note that this is only
// applicable for certain token types, such as token.IDENT,
// token.STRING, etc..
func (t Token) String() string {
return fmt.Sprintf("%s %s %s", t.Pos.String(), t.Type.String(), t.Text)
}
// HCLToken converts this token to an HCL token.
//
// The token type must be a literal type or this will panic.
func (t Token) HCLToken() hcltoken.Token {
switch t.Type {
case BOOL:
return hcltoken.Token{Type: hcltoken.BOOL, Text: t.Text}
case FLOAT:
return hcltoken.Token{Type: hcltoken.FLOAT, Text: t.Text}
case NULL:
return hcltoken.Token{Type: hcltoken.STRING, Text: ""}
case NUMBER:
return hcltoken.Token{Type: hcltoken.NUMBER, Text: t.Text}
case STRING:
return hcltoken.Token{Type: hcltoken.STRING, Text: t.Text, JSON: true}
default:
panic(fmt.Sprintf("unimplemented HCLToken for type: %s", t.Type))
}
}

@ -1,38 +0,0 @@
package hcl
import (
"unicode"
"unicode/utf8"
)
type lexModeValue byte
const (
lexModeUnknown lexModeValue = iota
lexModeHcl
lexModeJson
)
// lexMode returns whether we're going to be parsing in JSON
// mode or HCL mode.
func lexMode(v []byte) lexModeValue {
var (
r rune
w int
offset int
)
for {
r, w = utf8.DecodeRune(v[offset:])
offset += w
if unicode.IsSpace(r) {
continue
}
if r == '{' {
return lexModeJson
}
break
}
return lexModeHcl
}

@ -1,39 +0,0 @@
package hcl
import (
"fmt"
"github.com/hashicorp/hcl/hcl/ast"
hclParser "github.com/hashicorp/hcl/hcl/parser"
jsonParser "github.com/hashicorp/hcl/json/parser"
)
// ParseBytes accepts as input byte slice and returns ast tree.
//
// Input can be either JSON or HCL
func ParseBytes(in []byte) (*ast.File, error) {
return parse(in)
}
// ParseString accepts input as a string and returns ast tree.
func ParseString(input string) (*ast.File, error) {
return parse([]byte(input))
}
func parse(in []byte) (*ast.File, error) {
switch lexMode(in) {
case lexModeHcl:
return hclParser.Parse(in)
case lexModeJson:
return jsonParser.Parse(in)
}
return nil, fmt.Errorf("unknown config format")
}
// Parse parses the given input and returns the root object.
//
// The input format can be either HCL or JSON.
func Parse(input string) (*ast.File, error) {
return parse([]byte(input))
}

@ -0,0 +1,66 @@
# HCL Changelog
## v2.4.0 (Apr 13, 2020)
### Enhancements
* The Unicode data tables that HCL uses to produce user-perceived "column" positions in diagnostics and other source ranges are now updated to Unicode 12.0.0, which will cause HCL to produce more accurate column numbers for combining characters introduced to Unicode since Unicode 9.0.0.
### Bugs Fixed
* json: Fix panic when parsing malformed JSON. ([#358](https://github.com/hashicorp/hcl/pull/358))
## v2.3.0 (Jan 3, 2020)
### Enhancements
* ext/tryfunc: Optional functions `try` and `can` to include in your `hcl.EvalContext` when evaluating expressions, which allow users to make decisions based on the success of expressions. ([#330](https://github.com/hashicorp/hcl/pull/330))
* ext/typeexpr: Now has an optional function `convert` which you can include in your `hcl.EvalContext` when evaluating expressions, allowing users to convert values to specific type constraints using the type constraint expression syntax. ([#330](https://github.com/hashicorp/hcl/pull/330))
* ext/typeexpr: A new `cty` capsule type `typeexpr.TypeConstraintType` which, when used as either a type constraint for a function parameter or as a type constraint for a `hcldec` attribute specification will cause the given expression to be interpreted as a type constraint expression rather than a value expression. ([#330](https://github.com/hashicorp/hcl/pull/330))
* ext/customdecode: An optional extension that allows overriding the static decoding behavior for expressions either in function arguments or `hcldec` attribute specifications. ([#330](https://github.com/hashicorp/hcl/pull/330))
* ext/customdecode: New `cty` capsuletypes `customdecode.ExpressionType` and `customdecode.ExpressionClosureType` which, when used as either a type constraint for a function parameter or as a type constraint for a `hcldec` attribute specification will cause the given expression (and, for the closure type, also the `hcl.EvalContext` it was evaluated in) to be captured for later analysis, rather than immediately evaluated. ([#330](https://github.com/hashicorp/hcl/pull/330))
## v2.2.0 (Dec 11, 2019)
### Enhancements
* hcldec: Attribute evaluation (as part of `AttrSpec` or `BlockAttrsSpec`) now captures expression evaluation metadata in any errors it produces during type conversions, allowing for better feedback in calling applications that are able to make use of this metadata when printing diagnostic messages. ([#329](https://github.com/hashicorp/hcl/pull/329))
### Bugs Fixed
* hclsyntax: `IndexExpr`, `SplatExpr`, and `RelativeTraversalExpr` will now report a source range that covers all of their child expression nodes. Previously they would report only the operator part, such as `["foo"]`, `[*]`, or `.foo`, which was problematic for callers using source ranges for code analysis. ([#328](https://github.com/hashicorp/hcl/pull/328))
* hclwrite: Parser will no longer panic when the input includes index, splat, or relative traversal syntax. ([#328](https://github.com/hashicorp/hcl/pull/328))
## v2.1.0 (Nov 19, 2019)
### Enhancements
* gohcl: When decoding into a struct value with some fields already populated, those values will be retained if not explicitly overwritten in the given HCL body, with similar overriding/merging behavior as `json.Unmarshal` in the Go standard library.
* hclwrite: New interface to set the expression for an attribute to be a raw token sequence, with no special processing. This has some caveats, so if you intend to use it please refer to the godoc comments. ([#320](https://github.com/hashicorp/hcl/pull/320))
### Bugs Fixed
* hclwrite: The `Body.Blocks` method was returing the blocks in an indefined order, rather than preserving the order of declaration in the source input. ([#313](https://github.com/hashicorp/hcl/pull/313))
* hclwrite: The `TokensForTraversal` function (and thus in turn the `Body.SetAttributeTraversal` method) was not correctly handling index steps in traversals, and thus producing invalid results. ([#319](https://github.com/hashicorp/hcl/pull/319))
## v2.0.0 (Oct 2, 2019)
Initial release of HCL 2, which is a new implementating combining the HCL 1
language with the HIL expression language to produce a single language
supporting both nested configuration structures and arbitrary expressions.
HCL 2 has an entirely new Go library API and so is _not_ a drop-in upgrade
relative to HCL 1. It's possible to import both versions of HCL into a single
program using Go's _semantic import versioning_ mechanism:
```
import (
hcl1 "github.com/hashicorp/hcl"
hcl2 "github.com/hashicorp/hcl/v2"
)
```
---
Prior to v2.0.0 there was not a curated changelog. Consult the git history
from the latest v1.x.x tag for information on the changes to HCL 1.

@ -351,4 +351,3 @@ Exhibit B - “Incompatible With Secondary Licenses” Notice
This Source Code Form is “Incompatible
With Secondary Licenses”, as defined by
the Mozilla Public License, v. 2.0.

@ -0,0 +1,205 @@
# HCL
HCL is a toolkit for creating structured configuration languages that are
both human- and machine-friendly, for use with command-line tools.
Although intended to be generally useful, it is primarily targeted
towards devops tools, servers, etc.
> **NOTE:** This is major version 2 of HCL, whose Go API is incompatible with
> major version 1. Both versions are available for selection in Go Modules
> projects. HCL 2 _cannot_ be imported from Go projects that are not using Go Modules. For more information, see
> [our version selection guide](https://github.com/hashicorp/hcl/wiki/Version-Selection).
HCL has both a _native syntax_, intended to be pleasant to read and write for
humans, and a JSON-based variant that is easier for machines to generate
and parse.
The HCL native syntax is inspired by [libucl](https://github.com/vstakhov/libucl),
[nginx configuration](http://nginx.org/en/docs/beginners_guide.html#conf_structure),
and others.
It includes an expression syntax that allows basic inline computation and,
with support from the calling application, use of variables and functions
for more dynamic configuration languages.
HCL provides a set of constructs that can be used by a calling application to
construct a configuration language. The application defines which attribute
names and nested block types are expected, and HCL parses the configuration
file, verifies that it conforms to the expected structure, and returns
high-level objects that the application can use for further processing.
```go
package main
import (
"log"
"github.com/hashicorp/hcl/v2/hclsimple"
)
type Config struct {
LogLevel string `hcl:"log_level"`
}
func main() {
var config Config
err := hclsimple.DecodeFile("config.hcl", nil, &config)
if err != nil {
log.Fatalf("Failed to load configuration: %s", err)
}
log.Printf("Configuration is %#v", config)
}
```
A lower-level API is available for applications that need more control over
the parsing, decoding, and evaluation of configuration. For more information,
see [the package documentation](https://pkg.go.dev/github.com/hashicorp/hcl/v2).
## Why?
Newcomers to HCL often ask: why not JSON, YAML, etc?
Whereas JSON and YAML are formats for serializing data structures, HCL is
a syntax and API specifically designed for building structured configuration
formats.
HCL attempts to strike a compromise between generic serialization formats
such as JSON and configuration formats built around full programming languages
such as Ruby. HCL syntax is designed to be easily read and written by humans,
and allows _declarative_ logic to permit its use in more complex applications.
HCL is intended as a base syntax for configuration formats built
around key-value pairs and hierarchical blocks whose structure is well-defined
by the calling application, and this definition of the configuration structure
allows for better error messages and more convenient definition within the
calling application.
It can't be denied that JSON is very convenient as a _lingua franca_
for interoperability between different pieces of software. Because of this,
HCL defines a common configuration model that can be parsed from either its
native syntax or from a well-defined equivalent JSON structure. This allows
configuration to be provided as a mixture of human-authored configuration
files in the native syntax and machine-generated files in JSON.
## Information Model and Syntax
HCL is built around two primary concepts: _attributes_ and _blocks_. In
native syntax, a configuration file for a hypothetical application might look
something like this:
```hcl
io_mode = "async"
service "http" "web_proxy" {
listen_addr = "127.0.0.1:8080"
process "main" {
command = ["/usr/local/bin/awesome-app", "server"]
}
process "mgmt" {
command = ["/usr/local/bin/awesome-app", "mgmt"]
}
}
```
The JSON equivalent of this configuration is the following:
```json
{
"io_mode": "async",
"service": {
"http": {
"web_proxy": {
"listen_addr": "127.0.0.1:8080",
"process": {
"main": {
"command": ["/usr/local/bin/awesome-app", "server"]
},
"mgmt": {
"command": ["/usr/local/bin/awesome-app", "mgmt"]
},
}
}
}
}
}
```
Regardless of which syntax is used, the API within the calling application
is the same. It can either work directly with the low-level attributes and
blocks, for more advanced use-cases, or it can use one of the _decoder_
packages to declaratively extract into either Go structs or dynamic value
structures.
Attribute values can be expressions as well as just literal values:
```hcl
# Arithmetic with literals and application-provided variables
sum = 1 + addend
# String interpolation and templates
message = "Hello, ${name}!"
# Application-provided functions
shouty_message = upper(message)
```
Although JSON syntax doesn't permit direct use of expressions, the interpolation
syntax allows use of arbitrary expressions within JSON strings:
```json
{
"sum": "${1 + addend}",
"message": "Hello, ${name}!",
"shouty_message": "${upper(message)}"
}
```
For more information, see the detailed specifications:
* [Syntax-agnostic Information Model](spec.md)
* [HCL Native Syntax](hclsyntax/spec.md)
* [JSON Representation](json/spec.md)
## Changes in 2.0
Version 2.0 of HCL combines the features of HCL 1.0 with those of the
interpolation language HIL to produce a single configuration language that
supports arbitrary expressions.
This new version has a completely new parser and Go API, with no direct
migration path. Although the syntax is similar, the implementation takes some
very different approaches to improve on some "rough edges" that existed with
the original implementation and to allow for more robust error handling.
It's possible to import both HCL 1 and HCL 2 into the same program using Go's
_semantic import versioning_ mechanism:
```go
import (
hcl1 "github.com/hashicorp/hcl"
hcl2 "github.com/hashicorp/hcl/v2"
)
```
## Acknowledgements
HCL was heavily inspired by [libucl](https://github.com/vstakhov/libucl),
by [Vsevolod Stakhov](https://github.com/vstakhov).
HCL and HIL originate in [HashiCorp Terraform](https://terraform.io/),
with the original parsers for each written by
[Mitchell Hashimoto](https://github.com/mitchellh).
The original HCL parser was ported to pure Go (from yacc) by
[Fatih Arslan](https://github.com/fatih). The structure-related portions of
the new native syntax parser build on that work.
The original HIL parser was ported to pure Go (from yacc) by
[Martin Atkins](https://github.com/apparentlymart). The expression-related
portions of the new native syntax parser build on that work.
HCL 2, which merged the original HCL and HIL languages into this single new
language, builds on design and prototyping work by
[Martin Atkins](https://github.com/apparentlymart) in
[zcl](https://github.com/zclconf/go-zcl).

@ -0,0 +1,13 @@
build: off
clone_folder: c:\gopath\src\github.com\hashicorp\hcl
environment:
GOPATH: c:\gopath
GO111MODULE: on
GOPROXY: https://goproxy.io
stack: go 1.12
test_script:
- go test ./...

@ -0,0 +1,143 @@
package hcl
import (
"fmt"
)
// DiagnosticSeverity represents the severity of a diagnostic.
type DiagnosticSeverity int
const (
// DiagInvalid is the invalid zero value of DiagnosticSeverity
DiagInvalid DiagnosticSeverity = iota
// DiagError indicates that the problem reported by a diagnostic prevents
// further progress in parsing and/or evaluating the subject.
DiagError
// DiagWarning indicates that the problem reported by a diagnostic warrants
// user attention but does not prevent further progress. It is most
// commonly used for showing deprecation notices.
DiagWarning
)
// Diagnostic represents information to be presented to a user about an
// error or anomoly in parsing or evaluating configuration.
type Diagnostic struct {
Severity DiagnosticSeverity
// Summary and Detail contain the English-language description of the
// problem. Summary is a terse description of the general problem and
// detail is a more elaborate, often-multi-sentence description of
// the probem and what might be done to solve it.
Summary string
Detail string
// Subject and Context are both source ranges relating to the diagnostic.
//
// Subject is a tight range referring to exactly the construct that
// is problematic, while Context is an optional broader range (which should
// fully contain Subject) that ought to be shown around Subject when
// generating isolated source-code snippets in diagnostic messages.
// If Context is nil, the Subject is also the Context.
//
// Some diagnostics have no source ranges at all. If Context is set then
// Subject should always also be set.
Subject *Range
Context *Range
// For diagnostics that occur when evaluating an expression, Expression
// may refer to that expression and EvalContext may point to the
// EvalContext that was active when evaluating it. This may allow for the
// inclusion of additional useful information when rendering a diagnostic
// message to the user.
//
// It is not always possible to select a single EvalContext for a
// diagnostic, and so in some cases this field may be nil even when an
// expression causes a problem.
//
// EvalContexts form a tree, so the given EvalContext may refer to a parent
// which in turn refers to another parent, etc. For a full picture of all
// of the active variables and functions the caller must walk up this
// chain, preferring definitions that are "closer" to the expression in
// case of colliding names.
Expression Expression
EvalContext *EvalContext
}
// Diagnostics is a list of Diagnostic instances.
type Diagnostics []*Diagnostic
// error implementation, so that diagnostics can be returned via APIs
// that normally deal in vanilla Go errors.
//
// This presents only minimal context about the error, for compatibility
// with usual expectations about how errors will present as strings.
func (d *Diagnostic) Error() string {
return fmt.Sprintf("%s: %s; %s", d.Subject, d.Summary, d.Detail)
}
// error implementation, so that sets of diagnostics can be returned via
// APIs that normally deal in vanilla Go errors.
func (d Diagnostics) Error() string {
count := len(d)
switch {
case count == 0:
return "no diagnostics"
case count == 1:
return d[0].Error()
default:
return fmt.Sprintf("%s, and %d other diagnostic(s)", d[0].Error(), count-1)
}
}
// Append appends a new error to a Diagnostics and return the whole Diagnostics.
//
// This is provided as a convenience for returning from a function that
// collects and then returns a set of diagnostics:
//
// return nil, diags.Append(&hcl.Diagnostic{ ... })
//
// Note that this modifies the array underlying the diagnostics slice, so
// must be used carefully within a single codepath. It is incorrect (and rude)
// to extend a diagnostics created by a different subsystem.
func (d Diagnostics) Append(diag *Diagnostic) Diagnostics {
return append(d, diag)
}
// Extend concatenates the given Diagnostics with the receiver and returns
// the whole new Diagnostics.
//
// This is similar to Append but accepts multiple diagnostics to add. It has
// all the same caveats and constraints.
func (d Diagnostics) Extend(diags Diagnostics) Diagnostics {
return append(d, diags...)
}
// HasErrors returns true if the receiver contains any diagnostics of
// severity DiagError.
func (d Diagnostics) HasErrors() bool {
for _, diag := range d {
if diag.Severity == DiagError {
return true
}
}
return false
}
func (d Diagnostics) Errs() []error {
var errs []error
for _, diag := range d {
if diag.Severity == DiagError {
errs = append(errs, diag)
}
}
return errs
}
// A DiagnosticWriter emits diagnostics somehow.
type DiagnosticWriter interface {
WriteDiagnostic(*Diagnostic) error
WriteDiagnostics(Diagnostics) error
}

@ -0,0 +1,311 @@
package hcl
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"sort"
wordwrap "github.com/mitchellh/go-wordwrap"
"github.com/zclconf/go-cty/cty"
)
type diagnosticTextWriter struct {
files map[string]*File
wr io.Writer
width uint
color bool
}
// NewDiagnosticTextWriter creates a DiagnosticWriter that writes diagnostics
// to the given writer as formatted text.
//
// It is designed to produce text appropriate to print in a monospaced font
// in a terminal of a particular width, or optionally with no width limit.
//
// The given width may be zero to disable word-wrapping of the detail text
// and truncation of source code snippets.
//
// If color is set to true, the output will include VT100 escape sequences to
// color-code the severity indicators. It is suggested to turn this off if
// the target writer is not a terminal.
func NewDiagnosticTextWriter(wr io.Writer, files map[string]*File, width uint, color bool) DiagnosticWriter {
return &diagnosticTextWriter{
files: files,
wr: wr,
width: width,
color: color,
}
}
func (w *diagnosticTextWriter) WriteDiagnostic(diag *Diagnostic) error {
if diag == nil {
return errors.New("nil diagnostic")
}
var colorCode, highlightCode, resetCode string
if w.color {
switch diag.Severity {
case DiagError:
colorCode = "\x1b[31m"
case DiagWarning:
colorCode = "\x1b[33m"
}
resetCode = "\x1b[0m"
highlightCode = "\x1b[1;4m"
}
var severityStr string
switch diag.Severity {
case DiagError:
severityStr = "Error"
case DiagWarning:
severityStr = "Warning"
default:
// should never happen
severityStr = "???????"
}
fmt.Fprintf(w.wr, "%s%s%s: %s\n\n", colorCode, severityStr, resetCode, diag.Summary)
if diag.Subject != nil {
snipRange := *diag.Subject
highlightRange := snipRange
if diag.Context != nil {
// Show enough of the source code to include both the subject
// and context ranges, which overlap in all reasonable
// situations.
snipRange = RangeOver(snipRange, *diag.Context)
}
// We can't illustrate an empty range, so we'll turn such ranges into
// single-character ranges, which might not be totally valid (may point
// off the end of a line, or off the end of the file) but are good
// enough for the bounds checks we do below.
if snipRange.Empty() {
snipRange.End.Byte++
snipRange.End.Column++
}
if highlightRange.Empty() {
highlightRange.End.Byte++
highlightRange.End.Column++
}
file := w.files[diag.Subject.Filename]
if file == nil || file.Bytes == nil {
fmt.Fprintf(w.wr, " on %s line %d:\n (source code not available)\n\n", diag.Subject.Filename, diag.Subject.Start.Line)
} else {
var contextLine string
if diag.Subject != nil {
contextLine = contextString(file, diag.Subject.Start.Byte)
if contextLine != "" {
contextLine = ", in " + contextLine
}
}
fmt.Fprintf(w.wr, " on %s line %d%s:\n", diag.Subject.Filename, diag.Subject.Start.Line, contextLine)
src := file.Bytes
sc := NewRangeScanner(src, diag.Subject.Filename, bufio.ScanLines)
for sc.Scan() {
lineRange := sc.Range()
if !lineRange.Overlaps(snipRange) {
continue
}
beforeRange, highlightedRange, afterRange := lineRange.PartitionAround(highlightRange)
if highlightedRange.Empty() {
fmt.Fprintf(w.wr, "%4d: %s\n", lineRange.Start.Line, sc.Bytes())
} else {
before := beforeRange.SliceBytes(src)
highlighted := highlightedRange.SliceBytes(src)
after := afterRange.SliceBytes(src)
fmt.Fprintf(
w.wr, "%4d: %s%s%s%s%s\n",
lineRange.Start.Line,
before,
highlightCode, highlighted, resetCode,
after,
)
}
}
w.wr.Write([]byte{'\n'})
}
if diag.Expression != nil && diag.EvalContext != nil {
// We will attempt to render the values for any variables
// referenced in the given expression as additional context, for
// situations where the same expression is evaluated multiple
// times in different scopes.
expr := diag.Expression
ctx := diag.EvalContext
vars := expr.Variables()
stmts := make([]string, 0, len(vars))
seen := make(map[string]struct{}, len(vars))
for _, traversal := range vars {
val, diags := traversal.TraverseAbs(ctx)
if diags.HasErrors() {
// Skip anything that generates errors, since we probably
// already have the same error in our diagnostics set
// already.
continue
}
traversalStr := w.traversalStr(traversal)
if _, exists := seen[traversalStr]; exists {
continue // don't show duplicates when the same variable is referenced multiple times
}
switch {
case !val.IsKnown():
// Can't say anything about this yet, then.
continue
case val.IsNull():
stmts = append(stmts, fmt.Sprintf("%s set to null", traversalStr))
default:
stmts = append(stmts, fmt.Sprintf("%s as %s", traversalStr, w.valueStr(val)))
}
seen[traversalStr] = struct{}{}
}
sort.Strings(stmts) // FIXME: Should maybe use a traversal-aware sort that can sort numeric indexes properly?
last := len(stmts) - 1
for i, stmt := range stmts {
switch i {
case 0:
w.wr.Write([]byte{'w', 'i', 't', 'h', ' '})
default:
w.wr.Write([]byte{' ', ' ', ' ', ' ', ' '})
}
w.wr.Write([]byte(stmt))
switch i {
case last:
w.wr.Write([]byte{'.', '\n', '\n'})
default:
w.wr.Write([]byte{',', '\n'})
}
}
}
}
if diag.Detail != "" {
detail := diag.Detail
if w.width != 0 {
detail = wordwrap.WrapString(detail, w.width)
}
fmt.Fprintf(w.wr, "%s\n\n", detail)
}
return nil
}
func (w *diagnosticTextWriter) WriteDiagnostics(diags Diagnostics) error {
for _, diag := range diags {
err := w.WriteDiagnostic(diag)
if err != nil {
return err
}
}
return nil
}
func (w *diagnosticTextWriter) traversalStr(traversal Traversal) string {
// This is a specialized subset of traversal rendering tailored to
// producing helpful contextual messages in diagnostics. It is not
// comprehensive nor intended to be used for other purposes.
var buf bytes.Buffer
for _, step := range traversal {
switch tStep := step.(type) {
case TraverseRoot:
buf.WriteString(tStep.Name)
case TraverseAttr:
buf.WriteByte('.')
buf.WriteString(tStep.Name)
case TraverseIndex:
buf.WriteByte('[')
if keyTy := tStep.Key.Type(); keyTy.IsPrimitiveType() {
buf.WriteString(w.valueStr(tStep.Key))
} else {
// We'll just use a placeholder for more complex values,
// since otherwise our result could grow ridiculously long.
buf.WriteString("...")
}
buf.WriteByte(']')
}
}
return buf.String()
}
func (w *diagnosticTextWriter) valueStr(val cty.Value) string {
// This is a specialized subset of value rendering tailored to producing
// helpful but concise messages in diagnostics. It is not comprehensive
// nor intended to be used for other purposes.
ty := val.Type()
switch {
case val.IsNull():
return "null"
case !val.IsKnown():
// Should never happen here because we should filter before we get
// in here, but we'll do something reasonable rather than panic.
return "(not yet known)"
case ty == cty.Bool:
if val.True() {
return "true"
}
return "false"
case ty == cty.Number:
bf := val.AsBigFloat()
return bf.Text('g', 10)
case ty == cty.String:
// Go string syntax is not exactly the same as HCL native string syntax,
// but we'll accept the minor edge-cases where this is different here
// for now, just to get something reasonable here.
return fmt.Sprintf("%q", val.AsString())
case ty.IsCollectionType() || ty.IsTupleType():
l := val.LengthInt()
switch l {
case 0:
return "empty " + ty.FriendlyName()
case 1:
return ty.FriendlyName() + " with 1 element"
default:
return fmt.Sprintf("%s with %d elements", ty.FriendlyName(), l)
}
case ty.IsObjectType():
atys := ty.AttributeTypes()
l := len(atys)
switch l {
case 0:
return "object with no attributes"
case 1:
var name string
for k := range atys {
name = k
}
return fmt.Sprintf("object with 1 attribute %q", name)
default:
return fmt.Sprintf("object with %d attributes", l)
}
default:
return ty.FriendlyName()
}
}
func contextString(file *File, offset int) string {
type contextStringer interface {
ContextString(offset int) string
}
if cser, ok := file.Nav.(contextStringer); ok {
return cser.ContextString(offset)
}
return ""
}

@ -0,0 +1,24 @@
package hcl
import (
"github.com/agext/levenshtein"
)
// nameSuggestion tries to find a name from the given slice of suggested names
// that is close to the given name and returns it if found. If no suggestion
// is close enough, returns the empty string.
//
// The suggestions are tried in order, so earlier suggestions take precedence
// if the given string is similar to two or more suggestions.
//
// This function is intended to be used with a relatively-small number of
// suggestions. It's not optimized for hundreds or thousands of them.
func nameSuggestion(given string, suggestions []string) string {
for _, suggestion := range suggestions {
dist := levenshtein.Distance(given, suggestion, nil)
if dist < 3 { // threshold determined experimentally
return suggestion
}
}
return ""
}

@ -0,0 +1,34 @@
// Package hcl contains the main modelling types and general utility functions
// for HCL.
//
// For a simple entry point into HCL, see the package in the subdirectory
// "hclsimple", which has an opinionated function Decode that can decode HCL
// configurations in either native HCL syntax or JSON syntax into a Go struct
// type:
//
// package main
//
// import (
// "log"
// "github.com/hashicorp/hcl/v2/hclsimple"
// )
//
// type Config struct {
// LogLevel string `hcl:"log_level"`
// }
//
// func main() {
// var config Config
// err := hclsimple.DecodeFile("config.hcl", nil, &config)
// if err != nil {
// log.Fatalf("Failed to load configuration: %s", err)
// }
// log.Printf("Configuration is %#v", config)
// }
//
// If your application needs more control over the evaluation of the
// configuration, you can use the functions in the subdirectories hclparse,
// gohcl, hcldec, etc. Splitting the handling of configuration into multiple
// phases allows for advanced patterns such as allowing expressions in one
// part of the configuration to refer to data defined in another part.
package hcl

@ -0,0 +1,25 @@
package hcl
import (
"github.com/zclconf/go-cty/cty"
"github.com/zclconf/go-cty/cty/function"
)
// An EvalContext provides the variables and functions that should be used
// to evaluate an expression.
type EvalContext struct {
Variables map[string]cty.Value
Functions map[string]function.Function
parent *EvalContext
}
// NewChild returns a new EvalContext that is a child of the receiver.
func (ctx *EvalContext) NewChild() *EvalContext {
return &EvalContext{parent: ctx}
}
// Parent returns the parent of the receiver, or nil if the receiver has
// no parent.
func (ctx *EvalContext) Parent() *EvalContext {
return ctx.parent
}

@ -0,0 +1,46 @@
package hcl
// ExprCall tests if the given expression is a function call and,
// if so, extracts the function name and the expressions that represent
// the arguments. If the given expression is not statically a function call,
// error diagnostics are returned.
//
// A particular Expression implementation can support this function by
// offering a method called ExprCall that takes no arguments and returns
// *StaticCall. This method should return nil if a static call cannot
// be extracted. Alternatively, an implementation can support
// UnwrapExpression to delegate handling of this function to a wrapped
// Expression object.
func ExprCall(expr Expression) (*StaticCall, Diagnostics) {
type exprCall interface {
ExprCall() *StaticCall
}
physExpr := UnwrapExpressionUntil(expr, func(expr Expression) bool {
_, supported := expr.(exprCall)
return supported
})
if exC, supported := physExpr.(exprCall); supported {
if call := exC.ExprCall(); call != nil {
return call, nil
}
}
return nil, Diagnostics{
&Diagnostic{
Severity: DiagError,
Summary: "Invalid expression",
Detail: "A static function call is required.",
Subject: expr.StartRange().Ptr(),
},
}
}
// StaticCall represents a function call that was extracted statically from
// an expression using ExprCall.
type StaticCall struct {
Name string
NameRange Range
Arguments []Expression
ArgsRange Range
}

@ -0,0 +1,37 @@
package hcl
// ExprList tests if the given expression is a static list construct and,
// if so, extracts the expressions that represent the list elements.
// If the given expression is not a static list, error diagnostics are
// returned.
//
// A particular Expression implementation can support this function by
// offering a method called ExprList that takes no arguments and returns
// []Expression. This method should return nil if a static list cannot
// be extracted. Alternatively, an implementation can support
// UnwrapExpression to delegate handling of this function to a wrapped
// Expression object.
func ExprList(expr Expression) ([]Expression, Diagnostics) {
type exprList interface {
ExprList() []Expression
}
physExpr := UnwrapExpressionUntil(expr, func(expr Expression) bool {
_, supported := expr.(exprList)
return supported
})
if exL, supported := physExpr.(exprList); supported {
if list := exL.ExprList(); list != nil {
return list, nil
}
}
return nil, Diagnostics{
&Diagnostic{
Severity: DiagError,
Summary: "Invalid expression",
Detail: "A static list expression is required.",
Subject: expr.StartRange().Ptr(),
},
}
}

@ -0,0 +1,44 @@
package hcl
// ExprMap tests if the given expression is a static map construct and,
// if so, extracts the expressions that represent the map elements.
// If the given expression is not a static map, error diagnostics are
// returned.
//
// A particular Expression implementation can support this function by
// offering a method called ExprMap that takes no arguments and returns
// []KeyValuePair. This method should return nil if a static map cannot
// be extracted. Alternatively, an implementation can support
// UnwrapExpression to delegate handling of this function to a wrapped
// Expression object.
func ExprMap(expr Expression) ([]KeyValuePair, Diagnostics) {
type exprMap interface {
ExprMap() []KeyValuePair
}
physExpr := UnwrapExpressionUntil(expr, func(expr Expression) bool {
_, supported := expr.(exprMap)
return supported
})
if exM, supported := physExpr.(exprMap); supported {
if pairs := exM.ExprMap(); pairs != nil {
return pairs, nil
}
}
return nil, Diagnostics{
&Diagnostic{
Severity: DiagError,
Summary: "Invalid expression",
Detail: "A static map expression is required.",
Subject: expr.StartRange().Ptr(),
},
}
}
// KeyValuePair represents a pair of expressions that serve as a single item
// within a map or object definition construct.
type KeyValuePair struct {
Key Expression
Value Expression
}

@ -0,0 +1,68 @@
package hcl
type unwrapExpression interface {
UnwrapExpression() Expression
}
// UnwrapExpression removes any "wrapper" expressions from the given expression,
// to recover the representation of the physical expression given in source
// code.
//
// Sometimes wrapping expressions are used to modify expression behavior, e.g.
// in extensions that need to make some local variables available to certain
// sub-trees of the configuration. This can make it difficult to reliably
// type-assert on the physical AST types used by the underlying syntax.
//
// Unwrapping an expression may modify its behavior by stripping away any
// additional constraints or capabilities being applied to the Value and
// Variables methods, so this function should generally only be used prior
// to operations that concern themselves with the static syntax of the input
// configuration, and not with the effective value of the expression.
//
// Wrapper expression types must support unwrapping by implementing a method
// called UnwrapExpression that takes no arguments and returns the embedded
// Expression. Implementations of this method should peel away only one level
// of wrapping, if multiple are present. This method may return nil to
// indicate _dynamically_ that no wrapped expression is available, for
// expression types that might only behave as wrappers in certain cases.
func UnwrapExpression(expr Expression) Expression {
for {
unwrap, wrapped := expr.(unwrapExpression)
if !wrapped {
return expr
}
innerExpr := unwrap.UnwrapExpression()
if innerExpr == nil {
return expr
}
expr = innerExpr
}
}
// UnwrapExpressionUntil is similar to UnwrapExpression except it gives the
// caller an opportunity to test each level of unwrapping to see each a
// particular expression is accepted.
//
// This could be used, for example, to unwrap until a particular other
// interface is satisfied, regardless of wrap wrapping level it is satisfied
// at.
//
// The given callback function must return false to continue wrapping, or
// true to accept and return the proposed expression given. If the callback
// function rejects even the final, physical expression then the result of
// this function is nil.
func UnwrapExpressionUntil(expr Expression, until func(Expression) bool) Expression {
for {
if until(expr) {
return expr
}
unwrap, wrapped := expr.(unwrapExpression)
if !wrapped {
return nil
}
expr = unwrap.UnwrapExpression()
if expr == nil {
return nil
}
}
}

@ -0,0 +1,209 @@
# HCL Custom Static Decoding Extension
This HCL extension provides a mechanism for defining arguments in an HCL-based
language whose values are derived using custom decoding rules against the
HCL expression syntax, overriding the usual behavior of normal expression
evaluation.
"Arguments", for the purpose of this extension, currently includes the
following two contexts:
* For applications using `hcldec` for dynamic decoding, a `hcldec.AttrSpec`
or `hcldec.BlockAttrsSpec` can be given a special type constraint that
opts in to custom decoding behavior for the attribute(s) that are selected
by that specification.
* When working with the HCL native expression syntax, a function given in
the `hcl.EvalContext` during evaluation can have parameters with special
type constraints that opt in to custom decoding behavior for the argument
expression associated with that parameter in any call.
The above use-cases are rather abstract, so we'll consider a motivating
real-world example: sometimes we (language designers) need to allow users
to specify type constraints directly in the language itself, such as in
[Terraform's Input Variables](https://www.terraform.io/docs/configuration/variables.html).
Terraform's `variable` blocks include an argument called `type` which takes
a type constraint given using HCL expression building-blocks as defined by
[the HCL `typeexpr` extension](../typeexpr/README.md).
A "type constraint expression" of that sort is not an expression intended to
be evaluated in the usual way. Instead, the physical expression is
deconstructed using [the static analysis operations](../../spec.md#static-analysis)
to produce a `cty.Type` as the result, rather than a `cty.Value`.
The purpose of this Custom Static Decoding Extension, then, is to provide a
bridge to allow that sort of custom decoding to be used via mechanisms that
normally deal in `cty.Value`, such as `hcldec` and native syntax function
calls as listed above.
(Note: [`gohcl`](https://pkg.go.dev/github.com/hashicorp/hcl/v2/gohcl) has
its own mechanism to support this use case, exploiting the fact that it is
working directly with "normal" Go types. Decoding into a struct field of
type `hcl.Expression` obtains the expression directly without evaluating it
first. The Custom Static Decoding Extension is not necessary for that `gohcl`
technique. You can also implement custom decoding by working directly with
the lowest-level HCL API, which separates extraction of and evaluation of
expressions into two steps.)
## Custom Decoding Types
This extension relies on a convention implemented in terms of
[_Capsule Types_ in the underlying `cty` type system](https://github.com/zclconf/go-cty/blob/master/docs/types.md#capsule-types). `cty` allows a capsule type to carry arbitrary
extension metadata values as an aid to creating higher-level abstractions like
this extension.
A custom argument decoding mode, then, is implemented by creating a new `cty`
capsule type that implements the `ExtensionData` custom operation to return
a decoding function when requested. For example:
```go
var keywordType cty.Type
keywordType = cty.CapsuleWithOps("keyword", reflect.TypeOf(""), &cty.CapsuleOps{
ExtensionData: func(key interface{}) interface{} {
switch key {
case customdecode.CustomExpressionDecoder:
return customdecode.CustomExpressionDecoderFunc(
func(expr hcl.Expression, ctx *hcl.EvalContext) (cty.Value, hcl.Diagnostics) {
var diags hcl.Diagnostics
kw := hcl.ExprAsKeyword(expr)
if kw == "" {
diags = append(diags, &hcl.Diagnostic{
Severity: hcl.DiagError,
Summary: "Invalid keyword",
Detail: "A keyword is required",
Subject: expr.Range().Ptr(),
})
return cty.UnkownVal(keywordType), diags
}
return cty.CapsuleVal(keywordType, &kw)
},
)
default:
return nil
}
},
})
```
The boilerplate here is a bit fussy, but the important part for our purposes
is the `case customdecode.CustomExpressionDecoder:` clause, which uses
a custom extension key type defined in this package to recognize when a
component implementing this extension is checking to see if a target type
has a custom decode implementation.
In the above case we've defined a type that decodes expressions as static
keywords, so a keyword like `foo` would decode as an encapsulated `"foo"`
string, while any other sort of expression like `"baz"` or `1 + 1` would
return an error.
We could then use `keywordType` as a type constraint either for a function
parameter or a `hcldec` attribute specification, which would require the
argument for that function parameter or the expression for the matching
attributes to be a static keyword, rather than an arbitrary expression.
For example, in a `hcldec.AttrSpec`:
```go
keywordSpec := &hcldec.AttrSpec{
Name: "keyword",
Type: keywordType,
}
```
The above would accept input like the following and would set its result to
a `cty.Value` of `keywordType`, after decoding:
```hcl
keyword = foo
```
## The Expression and Expression Closure `cty` types
Building on the above, this package also includes two capsule types that use
the above mechanism to allow calling applications to capture expressions
directly and thus defer analysis to a later step, after initial decoding.
The `customdecode.ExpressionType` type encapsulates an `hcl.Expression` alone,
for situations like our type constraint expression example above where it's
the static structure of the expression we want to inspect, and thus any
variables and functions defined in the evaluation context are irrelevant.
The `customdecode.ExpressionClosureType` type encapsulates a
`*customdecode.ExpressionClosure` value, which binds the given expression to
the `hcl.EvalContext` it was asked to evaluate against and thus allows the
receiver of that result to later perform normal evaluation of the expression
with all the same variables and functions that would've been available to it
naturally.
Both of these types can be used as type constraints either for `hcldec`
attribute specifications or for function arguments. Here's an example of
`ExpressionClosureType` to implement a function that can evaluate
an expression with some additional variables defined locally, which we'll
call the `with(...)` function:
```go
var WithFunc = function.New(&function.Spec{
Params: []function.Parameter{
{
Name: "variables",
Type: cty.DynamicPseudoType,
},
{
Name: "expression",
Type: customdecode.ExpressionClosureType,
},
},
Type: func(args []cty.Value) (cty.Type, error) {
varsVal := args[0]
exprVal := args[1]
if !varsVal.Type().IsObjectType() {
return cty.NilVal, function.NewArgErrorf(0, "must be an object defining local variables")
}
if !varsVal.IsKnown() {
// We can't predict our result type until the variables object
// is known.
return cty.DynamicPseudoType, nil
}
vars := varsVal.AsValueMap()
closure := customdecode.ExpressionClosureFromVal(exprVal)
result, err := evalWithLocals(vars, closure)
if err != nil {
return cty.NilVal, err
}
return result.Type(), nil
},
Impl: func(args []cty.Value, retType cty.Type) (cty.Value, error) {
varsVal := args[0]
exprVal := args[1]
vars := varsVal.AsValueMap()
closure := customdecode.ExpressionClosureFromVal(exprVal)
return evalWithLocals(vars, closure)
},
})
func evalWithLocals(locals map[string]cty.Value, closure *customdecode.ExpressionClosure) (cty.Value, error) {
childCtx := closure.EvalContext.NewChild()
childCtx.Variables = locals
val, diags := closure.Expression.Value(childCtx)
if diags.HasErrors() {
return cty.NilVal, function.NewArgErrorf(1, "couldn't evaluate expression: %s", diags.Error())
}
return val, nil
}
```
If the above function were placed into an `hcl.EvalContext` as `with`, it
could be used in a native syntax call to that function as follows:
```hcl
foo = with({name = "Cory"}, "${greeting}, ${name}!")
```
The above assumes a variable in the main context called `greeting`, to which
the `with` function adds `name` before evaluating the expression given in
its second argument. This makes that second argument context-sensitive -- it
would behave differently if the user wrote the same thing somewhere else -- so
this capability should be used with care to make sure it doesn't cause confusion
for the end-users of your language.
There are some other examples of this capability to evaluate expressions in
unusual ways in the `tryfunc` directory that is a sibling of this one.

@ -0,0 +1,56 @@
// Package customdecode contains a HCL extension that allows, in certain
// contexts, expression evaluation to be overridden by custom static analysis.
//
// This mechanism is only supported in certain specific contexts where
// expressions are decoded with a specific target type in mind. For more
// information, see the documentation on CustomExpressionDecoder.
package customdecode
import (
"github.com/hashicorp/hcl/v2"
"github.com/zclconf/go-cty/cty"
)
type customDecoderImpl int
// CustomExpressionDecoder is a value intended to be used as a cty capsule
// type ExtensionData key for capsule types whose values are to be obtained
// by static analysis of an expression rather than normal evaluation of that
// expression.
//
// When a cooperating capsule type is asked for ExtensionData with this key,
// it must return a non-nil CustomExpressionDecoderFunc value.
//
// This mechanism is not universally supported; instead, it's handled in a few
// specific places where expressions are evaluated with the intent of producing
// a cty.Value of a type given by the calling application.
//
// Specifically, this currently works for type constraints given in
// hcldec.AttrSpec and hcldec.BlockAttrsSpec, and it works for arguments to
// function calls in the HCL native syntax. HCL extensions implemented outside
// of the main HCL module may also implement this; consult their own
// documentation for details.
const CustomExpressionDecoder = customDecoderImpl(1)
// CustomExpressionDecoderFunc is the type of value that must be returned by
// a capsule type handling the key CustomExpressionDecoder in its ExtensionData
// implementation.
//
// If no error diagnostics are returned, the result value MUST be of the
// capsule type that the decoder function was derived from. If the returned
// error diagnostics prevent producing a value at all, return cty.NilVal.
type CustomExpressionDecoderFunc func(expr hcl.Expression, ctx *hcl.EvalContext) (cty.Value, hcl.Diagnostics)
// CustomExpressionDecoderForType takes any cty type and returns its
// custom expression decoder implementation if it has one. If it is not a
// capsule type or it does not implement a custom expression decoder, this
// function returns nil.
func CustomExpressionDecoderForType(ty cty.Type) CustomExpressionDecoderFunc {
if !ty.IsCapsuleType() {
return nil
}
if fn, ok := ty.CapsuleExtensionData(CustomExpressionDecoder).(CustomExpressionDecoderFunc); ok {
return fn
}
return nil
}

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save