Changed jaro-winkler to only perform winkler step if both strings are at least 4 characters long
This commit is contained in:
parent
8e7689fba8
commit
f5a907c4de
|
@ -105,7 +105,7 @@ double _jaro_winkler(const char *ying, const char *yang,
|
|||
weight /= 3.0;
|
||||
|
||||
// Continue to boost the weight if the strings are similar
|
||||
if (winklerize && weight > 0.7) {
|
||||
if (winklerize && weight > 0.7 && ying_length > 3 && yang_length > 3) {
|
||||
|
||||
// Adjust for having up to the first 4 characters in common
|
||||
j = (min_len >= 4) ? 4 : min_len;
|
||||
|
|
|
@ -84,7 +84,7 @@ def _jaro_winkler(ying, yang, long_tolerance, winklerize):
|
|||
(common_chars-trans_count) / common_chars)) / 3
|
||||
|
||||
# winkler modification: continue to boost if strings are similar
|
||||
if winklerize and weight > 0.7:
|
||||
if winklerize and weight > 0.7 and ying_len > 3 and yang_len > 3:
|
||||
# adjust for up to first 4 chars in common
|
||||
j = max(min_len, 4)
|
||||
i = 0
|
||||
|
|
Loading…
Reference in New Issue