You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
153 lines
4.2 KiB
Go
153 lines
4.2 KiB
Go
5 years ago
|
// Copyright 2016 ALRUX Inc.
|
||
|
//
|
||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
// you may not use this file except in compliance with the License.
|
||
|
// You may obtain a copy of the License at
|
||
|
//
|
||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||
|
//
|
||
|
// Unless required by applicable law or agreed to in writing, software
|
||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
// See the License for the specific language governing permissions and
|
||
|
// limitations under the License.
|
||
|
|
||
|
package levenshtein
|
||
|
|
||
|
// Params represents a set of parameter values for the various formulas involved
|
||
|
// in the calculation of the Levenshtein string metrics.
|
||
|
type Params struct {
|
||
|
insCost int
|
||
|
subCost int
|
||
|
delCost int
|
||
|
maxCost int
|
||
|
minScore float64
|
||
|
bonusPrefix int
|
||
|
bonusScale float64
|
||
|
bonusThreshold float64
|
||
|
}
|
||
|
|
||
|
var (
|
||
|
defaultParams = NewParams()
|
||
|
)
|
||
|
|
||
|
// NewParams creates a new set of parameters and initializes it with the default values.
|
||
|
func NewParams() *Params {
|
||
|
return &Params{
|
||
|
insCost: 1,
|
||
|
subCost: 1,
|
||
|
delCost: 1,
|
||
|
maxCost: 0,
|
||
|
minScore: 0,
|
||
|
bonusPrefix: 4,
|
||
|
bonusScale: .1,
|
||
|
bonusThreshold: .7,
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Clone returns a pointer to a copy of the receiver parameter set, or of a new
|
||
|
// default parameter set if the receiver is nil.
|
||
|
func (p *Params) Clone() *Params {
|
||
|
if p == nil {
|
||
|
return NewParams()
|
||
|
}
|
||
|
return &Params{
|
||
|
insCost: p.insCost,
|
||
|
subCost: p.subCost,
|
||
|
delCost: p.delCost,
|
||
|
maxCost: p.maxCost,
|
||
|
minScore: p.minScore,
|
||
|
bonusPrefix: p.bonusPrefix,
|
||
|
bonusScale: p.bonusScale,
|
||
|
bonusThreshold: p.bonusThreshold,
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// InsCost overrides the default value of 1 for the cost of insertion.
|
||
|
// The new value must be zero or positive.
|
||
|
func (p *Params) InsCost(v int) *Params {
|
||
|
if v >= 0 {
|
||
|
p.insCost = v
|
||
|
}
|
||
|
return p
|
||
|
}
|
||
|
|
||
|
// SubCost overrides the default value of 1 for the cost of substitution.
|
||
|
// The new value must be zero or positive.
|
||
|
func (p *Params) SubCost(v int) *Params {
|
||
|
if v >= 0 {
|
||
|
p.subCost = v
|
||
|
}
|
||
|
return p
|
||
|
}
|
||
|
|
||
|
// DelCost overrides the default value of 1 for the cost of deletion.
|
||
|
// The new value must be zero or positive.
|
||
|
func (p *Params) DelCost(v int) *Params {
|
||
|
if v >= 0 {
|
||
|
p.delCost = v
|
||
|
}
|
||
|
return p
|
||
|
}
|
||
|
|
||
|
// MaxCost overrides the default value of 0 (meaning unlimited) for the maximum cost.
|
||
|
// The calculation of Distance() stops when the result is guaranteed to exceed
|
||
|
// this maximum, returning a lower-bound rather than exact value.
|
||
|
// The new value must be zero or positive.
|
||
|
func (p *Params) MaxCost(v int) *Params {
|
||
|
if v >= 0 {
|
||
|
p.maxCost = v
|
||
|
}
|
||
|
return p
|
||
|
}
|
||
|
|
||
|
// MinScore overrides the default value of 0 for the minimum similarity score.
|
||
|
// Scores below this threshold are returned as 0 by Similarity() and Match().
|
||
|
// The new value must be zero or positive. Note that a minimum greater than 1
|
||
|
// can never be satisfied, resulting in a score of 0 for any pair of strings.
|
||
|
func (p *Params) MinScore(v float64) *Params {
|
||
|
if v >= 0 {
|
||
|
p.minScore = v
|
||
|
}
|
||
|
return p
|
||
|
}
|
||
|
|
||
|
// BonusPrefix overrides the default value for the maximum length of
|
||
|
// common prefix to be considered for bonus by Match().
|
||
|
// The new value must be zero or positive.
|
||
|
func (p *Params) BonusPrefix(v int) *Params {
|
||
|
if v >= 0 {
|
||
|
p.bonusPrefix = v
|
||
|
}
|
||
|
return p
|
||
|
}
|
||
|
|
||
|
// BonusScale overrides the default value for the scaling factor used by Match()
|
||
|
// in calculating the bonus.
|
||
|
// The new value must be zero or positive. To guarantee that the similarity score
|
||
|
// remains in the interval 0..1, this scaling factor is not allowed to exceed
|
||
|
// 1 / BonusPrefix.
|
||
|
func (p *Params) BonusScale(v float64) *Params {
|
||
|
if v >= 0 {
|
||
|
p.bonusScale = v
|
||
|
}
|
||
|
|
||
|
// the bonus cannot exceed (1-sim), or the score may become greater than 1.
|
||
|
if float64(p.bonusPrefix)*p.bonusScale > 1 {
|
||
|
p.bonusScale = 1 / float64(p.bonusPrefix)
|
||
|
}
|
||
|
|
||
|
return p
|
||
|
}
|
||
|
|
||
|
// BonusThreshold overrides the default value for the minimum similarity score
|
||
|
// for which Match() can assign a bonus.
|
||
|
// The new value must be zero or positive. Note that a threshold greater than 1
|
||
|
// effectively makes Match() become the equivalent of Similarity().
|
||
|
func (p *Params) BonusThreshold(v float64) *Params {
|
||
|
if v >= 0 {
|
||
|
p.bonusThreshold = v
|
||
|
}
|
||
|
return p
|
||
|
}
|