// Package levenshtein is a Go implementation to calculate Levenshtein Distance. // // Implementation taken from // https://gist.github.com/andrei-m/982927#gistcomment-1931258 package levenshtein import "unicode/utf8" // minLengthThreshold is the length of the string beyond which // an allocation will be made. Strings smaller than this will be // zero alloc. const minLengthThreshold = 32 // ComputeDistance computes the levenshtein distance between the two // strings passed as an argument. The return value is the levenshtein distance // // Works on runes (Unicode code points) but does not normalize // the input strings. See https://blog.golang.org/normalization // and the golang.org/x/text/unicode/norm package. func ComputeDistance(a, b string) int { if len(a) == 0 { return utf8.RuneCountInString(b) } if len(b) == 0 { return utf8.RuneCountInString(a) } if a == b { return 0 } // We need to convert to []rune if the strings are non-ASCII. // This could be avoided by using utf8.RuneCountInString // and then doing some juggling with rune indices, // but leads to far more bounds checks. It is a reasonable trade-off. s1 := []rune(a) s2 := []rune(b) // swap to save some memory O(min(a,b)) instead of O(a) if len(s1) > len(s2) { s1, s2 = s2, s1 } lenS1 := len(s1) lenS2 := len(s2) // Init the row. var x []uint16 if lenS1+1 > minLengthThreshold { x = make([]uint16, lenS1+1) } else { // We make a small optimization here for small strings. // Because a slice of constant length is effectively an array, // it does not allocate. So we can re-slice it to the right length // as long as it is below a desired threshold. x = make([]uint16, minLengthThreshold) x = x[:lenS1+1] } // we start from 1 because index 0 is already 0. for i := 1; i < len(x); i++ { x[i] = uint16(i) } // make a dummy bounds check to prevent the 2 bounds check down below. // The one inside the loop is particularly costly. _ = x[lenS1] // fill in the rest for i := 1; i <= lenS2; i++ { prev := uint16(i) for j := 1; j <= lenS1; j++ { current := x[j-1] // match if s2[i-1] != s1[j-1] { current = min(min(x[j-1]+1, prev+1), x[j]+1) } x[j-1] = prev prev = current } x[lenS1] = prev } return int(x[lenS1]) } func min(a, b uint16) uint16 { if a < b { return a } return b }