You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
466 lines
15 KiB
Go
466 lines
15 KiB
Go
// Copyright 2019, The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package cmp
|
|
|
|
import (
|
|
"bytes"
|
|
"fmt"
|
|
"reflect"
|
|
"strconv"
|
|
"strings"
|
|
"unicode"
|
|
"unicode/utf8"
|
|
|
|
"github.com/google/go-cmp/cmp/internal/diff"
|
|
)
|
|
|
|
// CanFormatDiffSlice reports whether we support custom formatting for nodes
|
|
// that are slices of primitive kinds or strings.
|
|
func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool {
|
|
switch {
|
|
case opts.DiffMode != diffUnknown:
|
|
return false // Must be formatting in diff mode
|
|
case v.NumDiff == 0:
|
|
return false // No differences detected
|
|
case !v.ValueX.IsValid() || !v.ValueY.IsValid():
|
|
return false // Both values must be valid
|
|
case v.NumIgnored > 0:
|
|
return false // Some ignore option was used
|
|
case v.NumTransformed > 0:
|
|
return false // Some transform option was used
|
|
case v.NumCompared > 1:
|
|
return false // More than one comparison was used
|
|
case v.NumCompared == 1 && v.Type.Name() != "":
|
|
// The need for cmp to check applicability of options on every element
|
|
// in a slice is a significant performance detriment for large []byte.
|
|
// The workaround is to specify Comparer(bytes.Equal),
|
|
// which enables cmp to compare []byte more efficiently.
|
|
// If they differ, we still want to provide batched diffing.
|
|
// The logic disallows named types since they tend to have their own
|
|
// String method, with nicer formatting than what this provides.
|
|
return false
|
|
}
|
|
|
|
// Check whether this is an interface with the same concrete types.
|
|
t := v.Type
|
|
vx, vy := v.ValueX, v.ValueY
|
|
if t.Kind() == reflect.Interface && !vx.IsNil() && !vy.IsNil() && vx.Elem().Type() == vy.Elem().Type() {
|
|
vx, vy = vx.Elem(), vy.Elem()
|
|
t = vx.Type()
|
|
}
|
|
|
|
// Check whether we provide specialized diffing for this type.
|
|
switch t.Kind() {
|
|
case reflect.String:
|
|
case reflect.Array, reflect.Slice:
|
|
// Only slices of primitive types have specialized handling.
|
|
switch t.Elem().Kind() {
|
|
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
|
|
reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
|
|
reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
|
|
default:
|
|
return false
|
|
}
|
|
|
|
// Both slice values have to be non-empty.
|
|
if t.Kind() == reflect.Slice && (vx.Len() == 0 || vy.Len() == 0) {
|
|
return false
|
|
}
|
|
|
|
// If a sufficient number of elements already differ,
|
|
// use specialized formatting even if length requirement is not met.
|
|
if v.NumDiff > v.NumSame {
|
|
return true
|
|
}
|
|
default:
|
|
return false
|
|
}
|
|
|
|
// Use specialized string diffing for longer slices or strings.
|
|
const minLength = 64
|
|
return vx.Len() >= minLength && vy.Len() >= minLength
|
|
}
|
|
|
|
// FormatDiffSlice prints a diff for the slices (or strings) represented by v.
|
|
// This provides custom-tailored logic to make printing of differences in
|
|
// textual strings and slices of primitive kinds more readable.
|
|
func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
|
|
assert(opts.DiffMode == diffUnknown)
|
|
t, vx, vy := v.Type, v.ValueX, v.ValueY
|
|
if t.Kind() == reflect.Interface {
|
|
vx, vy = vx.Elem(), vy.Elem()
|
|
t = vx.Type()
|
|
opts = opts.WithTypeMode(emitType)
|
|
}
|
|
|
|
// Auto-detect the type of the data.
|
|
var isLinedText, isText, isBinary bool
|
|
var sx, sy string
|
|
switch {
|
|
case t.Kind() == reflect.String:
|
|
sx, sy = vx.String(), vy.String()
|
|
isText = true // Initial estimate, verify later
|
|
case t.Kind() == reflect.Slice && t.Elem() == reflect.TypeOf(byte(0)):
|
|
sx, sy = string(vx.Bytes()), string(vy.Bytes())
|
|
isBinary = true // Initial estimate, verify later
|
|
case t.Kind() == reflect.Array:
|
|
// Arrays need to be addressable for slice operations to work.
|
|
vx2, vy2 := reflect.New(t).Elem(), reflect.New(t).Elem()
|
|
vx2.Set(vx)
|
|
vy2.Set(vy)
|
|
vx, vy = vx2, vy2
|
|
}
|
|
if isText || isBinary {
|
|
var numLines, lastLineIdx, maxLineLen int
|
|
isBinary = !utf8.ValidString(sx) || !utf8.ValidString(sy)
|
|
for i, r := range sx + sy {
|
|
if !(unicode.IsPrint(r) || unicode.IsSpace(r)) || r == utf8.RuneError {
|
|
isBinary = true
|
|
break
|
|
}
|
|
if r == '\n' {
|
|
if maxLineLen < i-lastLineIdx {
|
|
maxLineLen = i - lastLineIdx
|
|
}
|
|
lastLineIdx = i + 1
|
|
numLines++
|
|
}
|
|
}
|
|
isText = !isBinary
|
|
isLinedText = isText && numLines >= 4 && maxLineLen <= 1024
|
|
}
|
|
|
|
// Format the string into printable records.
|
|
var list textList
|
|
var delim string
|
|
switch {
|
|
// If the text appears to be multi-lined text,
|
|
// then perform differencing across individual lines.
|
|
case isLinedText:
|
|
ssx := strings.Split(sx, "\n")
|
|
ssy := strings.Split(sy, "\n")
|
|
list = opts.formatDiffSlice(
|
|
reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line",
|
|
func(v reflect.Value, d diffMode) textRecord {
|
|
s := formatString(v.Index(0).String())
|
|
return textRecord{Diff: d, Value: textLine(s)}
|
|
},
|
|
)
|
|
delim = "\n"
|
|
|
|
// If possible, use a custom triple-quote (""") syntax for printing
|
|
// differences in a string literal. This format is more readable,
|
|
// but has edge-cases where differences are visually indistinguishable.
|
|
// This format is avoided under the following conditions:
|
|
// • A line starts with `"""`
|
|
// • A line starts with "..."
|
|
// • A line contains non-printable characters
|
|
// • Adjacent different lines differ only by whitespace
|
|
//
|
|
// For example:
|
|
// """
|
|
// ... // 3 identical lines
|
|
// foo
|
|
// bar
|
|
// - baz
|
|
// + BAZ
|
|
// """
|
|
isTripleQuoted := true
|
|
prevRemoveLines := map[string]bool{}
|
|
prevInsertLines := map[string]bool{}
|
|
var list2 textList
|
|
list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true})
|
|
for _, r := range list {
|
|
if !r.Value.Equal(textEllipsis) {
|
|
line, _ := strconv.Unquote(string(r.Value.(textLine)))
|
|
line = strings.TrimPrefix(strings.TrimSuffix(line, "\r"), "\r") // trim leading/trailing carriage returns for legacy Windows endline support
|
|
normLine := strings.Map(func(r rune) rune {
|
|
if unicode.IsSpace(r) {
|
|
return -1 // drop whitespace to avoid visually indistinguishable output
|
|
}
|
|
return r
|
|
}, line)
|
|
isPrintable := func(r rune) bool {
|
|
return unicode.IsPrint(r) || r == '\t' // specially treat tab as printable
|
|
}
|
|
isTripleQuoted = !strings.HasPrefix(line, `"""`) && !strings.HasPrefix(line, "...") && strings.TrimFunc(line, isPrintable) == ""
|
|
switch r.Diff {
|
|
case diffRemoved:
|
|
isTripleQuoted = isTripleQuoted && !prevInsertLines[normLine]
|
|
prevRemoveLines[normLine] = true
|
|
case diffInserted:
|
|
isTripleQuoted = isTripleQuoted && !prevRemoveLines[normLine]
|
|
prevInsertLines[normLine] = true
|
|
}
|
|
if !isTripleQuoted {
|
|
break
|
|
}
|
|
r.Value = textLine(line)
|
|
r.ElideComma = true
|
|
}
|
|
if !(r.Diff == diffRemoved || r.Diff == diffInserted) { // start a new non-adjacent difference group
|
|
prevRemoveLines = map[string]bool{}
|
|
prevInsertLines = map[string]bool{}
|
|
}
|
|
list2 = append(list2, r)
|
|
}
|
|
if r := list2[len(list2)-1]; r.Diff == diffIdentical && len(r.Value.(textLine)) == 0 {
|
|
list2 = list2[:len(list2)-1] // elide single empty line at the end
|
|
}
|
|
list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true})
|
|
if isTripleQuoted {
|
|
var out textNode = &textWrap{Prefix: "(", Value: list2, Suffix: ")"}
|
|
switch t.Kind() {
|
|
case reflect.String:
|
|
if t != reflect.TypeOf(string("")) {
|
|
out = opts.FormatType(t, out)
|
|
}
|
|
case reflect.Slice:
|
|
// Always emit type for slices since the triple-quote syntax
|
|
// looks like a string (not a slice).
|
|
opts = opts.WithTypeMode(emitType)
|
|
out = opts.FormatType(t, out)
|
|
}
|
|
return out
|
|
}
|
|
|
|
// If the text appears to be single-lined text,
|
|
// then perform differencing in approximately fixed-sized chunks.
|
|
// The output is printed as quoted strings.
|
|
case isText:
|
|
list = opts.formatDiffSlice(
|
|
reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte",
|
|
func(v reflect.Value, d diffMode) textRecord {
|
|
s := formatString(v.String())
|
|
return textRecord{Diff: d, Value: textLine(s)}
|
|
},
|
|
)
|
|
delim = ""
|
|
|
|
// If the text appears to be binary data,
|
|
// then perform differencing in approximately fixed-sized chunks.
|
|
// The output is inspired by hexdump.
|
|
case isBinary:
|
|
list = opts.formatDiffSlice(
|
|
reflect.ValueOf(sx), reflect.ValueOf(sy), 16, "byte",
|
|
func(v reflect.Value, d diffMode) textRecord {
|
|
var ss []string
|
|
for i := 0; i < v.Len(); i++ {
|
|
ss = append(ss, formatHex(v.Index(i).Uint()))
|
|
}
|
|
s := strings.Join(ss, ", ")
|
|
comment := commentString(fmt.Sprintf("%c|%v|", d, formatASCII(v.String())))
|
|
return textRecord{Diff: d, Value: textLine(s), Comment: comment}
|
|
},
|
|
)
|
|
|
|
// For all other slices of primitive types,
|
|
// then perform differencing in approximately fixed-sized chunks.
|
|
// The size of each chunk depends on the width of the element kind.
|
|
default:
|
|
var chunkSize int
|
|
if t.Elem().Kind() == reflect.Bool {
|
|
chunkSize = 16
|
|
} else {
|
|
switch t.Elem().Bits() {
|
|
case 8:
|
|
chunkSize = 16
|
|
case 16:
|
|
chunkSize = 12
|
|
case 32:
|
|
chunkSize = 8
|
|
default:
|
|
chunkSize = 8
|
|
}
|
|
}
|
|
list = opts.formatDiffSlice(
|
|
vx, vy, chunkSize, t.Elem().Kind().String(),
|
|
func(v reflect.Value, d diffMode) textRecord {
|
|
var ss []string
|
|
for i := 0; i < v.Len(); i++ {
|
|
switch t.Elem().Kind() {
|
|
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
|
|
ss = append(ss, fmt.Sprint(v.Index(i).Int()))
|
|
case reflect.Uint, reflect.Uint16, reflect.Uint32, reflect.Uint64:
|
|
ss = append(ss, fmt.Sprint(v.Index(i).Uint()))
|
|
case reflect.Uint8, reflect.Uintptr:
|
|
ss = append(ss, formatHex(v.Index(i).Uint()))
|
|
case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
|
|
ss = append(ss, fmt.Sprint(v.Index(i).Interface()))
|
|
}
|
|
}
|
|
s := strings.Join(ss, ", ")
|
|
return textRecord{Diff: d, Value: textLine(s)}
|
|
},
|
|
)
|
|
}
|
|
|
|
// Wrap the output with appropriate type information.
|
|
var out textNode = &textWrap{Prefix: "{", Value: list, Suffix: "}"}
|
|
if !isText {
|
|
// The "{...}" byte-sequence literal is not valid Go syntax for strings.
|
|
// Emit the type for extra clarity (e.g. "string{...}").
|
|
if t.Kind() == reflect.String {
|
|
opts = opts.WithTypeMode(emitType)
|
|
}
|
|
return opts.FormatType(t, out)
|
|
}
|
|
switch t.Kind() {
|
|
case reflect.String:
|
|
out = &textWrap{Prefix: "strings.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)}
|
|
if t != reflect.TypeOf(string("")) {
|
|
out = opts.FormatType(t, out)
|
|
}
|
|
case reflect.Slice:
|
|
out = &textWrap{Prefix: "bytes.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)}
|
|
if t != reflect.TypeOf([]byte(nil)) {
|
|
out = opts.FormatType(t, out)
|
|
}
|
|
}
|
|
return out
|
|
}
|
|
|
|
// formatASCII formats s as an ASCII string.
|
|
// This is useful for printing binary strings in a semi-legible way.
|
|
func formatASCII(s string) string {
|
|
b := bytes.Repeat([]byte{'.'}, len(s))
|
|
for i := 0; i < len(s); i++ {
|
|
if ' ' <= s[i] && s[i] <= '~' {
|
|
b[i] = s[i]
|
|
}
|
|
}
|
|
return string(b)
|
|
}
|
|
|
|
func (opts formatOptions) formatDiffSlice(
|
|
vx, vy reflect.Value, chunkSize int, name string,
|
|
makeRec func(reflect.Value, diffMode) textRecord,
|
|
) (list textList) {
|
|
es := diff.Difference(vx.Len(), vy.Len(), func(ix int, iy int) diff.Result {
|
|
return diff.BoolResult(vx.Index(ix).Interface() == vy.Index(iy).Interface())
|
|
})
|
|
|
|
appendChunks := func(v reflect.Value, d diffMode) int {
|
|
n0 := v.Len()
|
|
for v.Len() > 0 {
|
|
n := chunkSize
|
|
if n > v.Len() {
|
|
n = v.Len()
|
|
}
|
|
list = append(list, makeRec(v.Slice(0, n), d))
|
|
v = v.Slice(n, v.Len())
|
|
}
|
|
return n0 - v.Len()
|
|
}
|
|
|
|
var numDiffs int
|
|
maxLen := -1
|
|
if opts.LimitVerbosity {
|
|
maxLen = (1 << opts.verbosity()) << 2 // 4, 8, 16, 32, 64, etc...
|
|
opts.VerbosityLevel--
|
|
}
|
|
|
|
groups := coalesceAdjacentEdits(name, es)
|
|
groups = coalesceInterveningIdentical(groups, chunkSize/4)
|
|
maxGroup := diffStats{Name: name}
|
|
for i, ds := range groups {
|
|
if maxLen >= 0 && numDiffs >= maxLen {
|
|
maxGroup = maxGroup.Append(ds)
|
|
continue
|
|
}
|
|
|
|
// Print equal.
|
|
if ds.NumDiff() == 0 {
|
|
// Compute the number of leading and trailing equal bytes to print.
|
|
var numLo, numHi int
|
|
numEqual := ds.NumIgnored + ds.NumIdentical
|
|
for numLo < chunkSize*numContextRecords && numLo+numHi < numEqual && i != 0 {
|
|
numLo++
|
|
}
|
|
for numHi < chunkSize*numContextRecords && numLo+numHi < numEqual && i != len(groups)-1 {
|
|
numHi++
|
|
}
|
|
if numEqual-(numLo+numHi) <= chunkSize && ds.NumIgnored == 0 {
|
|
numHi = numEqual - numLo // Avoid pointless coalescing of single equal row
|
|
}
|
|
|
|
// Print the equal bytes.
|
|
appendChunks(vx.Slice(0, numLo), diffIdentical)
|
|
if numEqual > numLo+numHi {
|
|
ds.NumIdentical -= numLo + numHi
|
|
list.AppendEllipsis(ds)
|
|
}
|
|
appendChunks(vx.Slice(numEqual-numHi, numEqual), diffIdentical)
|
|
vx = vx.Slice(numEqual, vx.Len())
|
|
vy = vy.Slice(numEqual, vy.Len())
|
|
continue
|
|
}
|
|
|
|
// Print unequal.
|
|
len0 := len(list)
|
|
nx := appendChunks(vx.Slice(0, ds.NumIdentical+ds.NumRemoved+ds.NumModified), diffRemoved)
|
|
vx = vx.Slice(nx, vx.Len())
|
|
ny := appendChunks(vy.Slice(0, ds.NumIdentical+ds.NumInserted+ds.NumModified), diffInserted)
|
|
vy = vy.Slice(ny, vy.Len())
|
|
numDiffs += len(list) - len0
|
|
}
|
|
if maxGroup.IsZero() {
|
|
assert(vx.Len() == 0 && vy.Len() == 0)
|
|
} else {
|
|
list.AppendEllipsis(maxGroup)
|
|
}
|
|
return list
|
|
}
|
|
|
|
// coalesceAdjacentEdits coalesces the list of edits into groups of adjacent
|
|
// equal or unequal counts.
|
|
func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) {
|
|
var prevCase int // Arbitrary index into which case last occurred
|
|
lastStats := func(i int) *diffStats {
|
|
if prevCase != i {
|
|
groups = append(groups, diffStats{Name: name})
|
|
prevCase = i
|
|
}
|
|
return &groups[len(groups)-1]
|
|
}
|
|
for _, e := range es {
|
|
switch e {
|
|
case diff.Identity:
|
|
lastStats(1).NumIdentical++
|
|
case diff.UniqueX:
|
|
lastStats(2).NumRemoved++
|
|
case diff.UniqueY:
|
|
lastStats(2).NumInserted++
|
|
case diff.Modified:
|
|
lastStats(2).NumModified++
|
|
}
|
|
}
|
|
return groups
|
|
}
|
|
|
|
// coalesceInterveningIdentical coalesces sufficiently short (<= windowSize)
|
|
// equal groups into adjacent unequal groups that currently result in a
|
|
// dual inserted/removed printout. This acts as a high-pass filter to smooth
|
|
// out high-frequency changes within the windowSize.
|
|
func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats {
|
|
groups, groupsOrig := groups[:0], groups
|
|
for i, ds := range groupsOrig {
|
|
if len(groups) >= 2 && ds.NumDiff() > 0 {
|
|
prev := &groups[len(groups)-2] // Unequal group
|
|
curr := &groups[len(groups)-1] // Equal group
|
|
next := &groupsOrig[i] // Unequal group
|
|
hadX, hadY := prev.NumRemoved > 0, prev.NumInserted > 0
|
|
hasX, hasY := next.NumRemoved > 0, next.NumInserted > 0
|
|
if ((hadX || hasX) && (hadY || hasY)) && curr.NumIdentical <= windowSize {
|
|
*prev = prev.Append(*curr).Append(*next)
|
|
groups = groups[:len(groups)-1] // Truncate off equal group
|
|
continue
|
|
}
|
|
}
|
|
groups = append(groups, ds)
|
|
}
|
|
return groups
|
|
}
|