From f5a907c4de07ea6c1f62b83c5998af70a8851dbf Mon Sep 17 00:00:00 2001 From: David McKean Date: Thu, 24 Jul 2014 16:19:27 -0600 Subject: [PATCH] Changed jaro-winkler to only perform winkler step if both strings are at least 4 characters long --- cjellyfish/jaro.c | 2 +- jellyfish/_jellyfish.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cjellyfish/jaro.c b/cjellyfish/jaro.c index c6a16f1..e9e548c 100644 --- a/cjellyfish/jaro.c +++ b/cjellyfish/jaro.c @@ -105,7 +105,7 @@ double _jaro_winkler(const char *ying, const char *yang, weight /= 3.0; // Continue to boost the weight if the strings are similar - if (winklerize && weight > 0.7) { + if (winklerize && weight > 0.7 && ying_length > 3 && yang_length > 3) { // Adjust for having up to the first 4 characters in common j = (min_len >= 4) ? 4 : min_len; diff --git a/jellyfish/_jellyfish.py b/jellyfish/_jellyfish.py index 8805bef..66ef072 100644 --- a/jellyfish/_jellyfish.py +++ b/jellyfish/_jellyfish.py @@ -84,7 +84,7 @@ def _jaro_winkler(ying, yang, long_tolerance, winklerize): (common_chars-trans_count) / common_chars)) / 3 # winkler modification: continue to boost if strings are similar - if winklerize and weight > 0.7: + if winklerize and weight > 0.7 and ying_len > 3 and yang_len > 3: # adjust for up to first 4 chars in common j = max(min_len, 4) i = 0