Fix segfault in Damerau-Levenstein C code.
If one of the characters had a value of 128 or above, this would be treated as a signed char and would result in an array lookup with a negative index. The somewhat contrived test case given here -- comparing a space with a non-breaking space -- reproduces the segmentation fault prior to the fix. This also makes a Clang warning go away. Thanks, compiler! :-)
This commit is contained in:
parent
77df97b59b
commit
b9bbb0d450
|
@ -40,7 +40,7 @@ int damerau_levenshtein_distance(const char *s1, const char *s2)
|
|||
for (i = 1; i <= len1; i++) {
|
||||
db = 0;
|
||||
for (j = 1; j <= len2; j++) {
|
||||
i1 = da[(size_t)(s2[j-1])];
|
||||
i1 = da[(unsigned char)s2[j-1]];
|
||||
j1 = db;
|
||||
|
||||
if (s1[i - 1] == s2[j - 1]) {
|
||||
|
@ -58,7 +58,7 @@ int damerau_levenshtein_distance(const char *s1, const char *s2)
|
|||
dist[((i+1)*cols) + j + 1] = MIN(MIN(d1, d2), MIN(d3, d4));
|
||||
}
|
||||
|
||||
da[s1[i-1]] = i;
|
||||
da[(unsigned char)s1[i-1]] = i;
|
||||
}
|
||||
|
||||
result = dist[((len1+1) * cols) + len2 + 1];
|
||||
|
|
|
@ -59,7 +59,8 @@ class JellyfishTests(object):
|
|||
("abcd", "acb", 2),
|
||||
("cape sand recycling ", "edith ann graham", 17),
|
||||
("jellyifhs", "jellyfish", 2),
|
||||
("ifhs", "fish", 2)]
|
||||
("ifhs", "fish", 2),
|
||||
("Hello, world!", "Hello,\xc2\xa0world!", 2)]
|
||||
|
||||
for (s1, s2, value) in cases:
|
||||
self.assertEqual(self.jf.damerau_levenshtein_distance(s1, s2), value)
|
||||
|
|
Loading…
Reference in New Issue