bugfix: generic search wasn't finding matches ending with a sub

This commit is contained in:
Tal Einat 2014-03-19 23:46:34 +02:00
parent 7148d3b0d5
commit 39000bda80
3 changed files with 24 additions and 0 deletions

View File

@ -83,6 +83,16 @@ def find_near_matches_generic_linear_programming(subsequence, sequence,
subseq_index=cand.subseq_index + 1,
l_dist=cand.l_dist + 1,
))
else:
# cand.subseq_index == _subseq_len - 1
if (
cand.n_subs < max_substitutions or
(
cand.n_dels < max_deletions and
cand.n_ins < max_insertions
)
):
yield Match(cand.start, index + 1, cand.l_dist + 1)
# try skipping subsequence chars
for n_skipped in xrange(1, min(max_deletions - cand.n_dels, max_l_dist - cand.l_dist) + 1):

View File

@ -3,6 +3,7 @@ from tests.test_levenshtein import TestFindNearMatchesLevenshteinBase
from fuzzysearch.common import Match, get_best_match_in_group, group_matches
from fuzzysearch.generic_search import \
find_near_matches_generic_linear_programming as fnm_generic_lp
from tests.test_substitutions_only import TestSubstitionsOnlyBase
class TestGenericSearchAsLevenshtein(TestFindNearMatchesLevenshteinBase,
@ -17,6 +18,14 @@ class TestGenericSearchAsLevenshtein(TestFindNearMatchesLevenshteinBase,
]
class TestGenericSearchAsSubstitutionsOnly(TestSubstitionsOnlyBase,
unittest.TestCase):
def search(self, subsequence, sequence, max_subs):
return list(
fnm_generic_lp(subsequence, sequence, max_subs, 0, 0, max_subs)
)
class TestGenericSearch(unittest.TestCase):
def search(self, pattern, sequence, max_subs, max_ins, max_dels,
max_l_dist=None):

View File

@ -198,6 +198,11 @@ class TestSubstitionsOnlyBase(object):
Match(start=99, end=109, dist=0)],
)
def test_missing_at_beginning(self):
self.assertEqual(
self.search("ATTEST","TESTOSTERONE", max_subs=2),
[],
)
class TestFindNearMatchesSubstitionsLinearProgramming(TestSubstitionsOnlyBase, unittest.TestCase):
def search(self, subsequence, sequence, max_subs):