bugfix: generic search wasn't finding matches ending with a sub
This commit is contained in:
parent
7148d3b0d5
commit
39000bda80
|
@ -83,6 +83,16 @@ def find_near_matches_generic_linear_programming(subsequence, sequence,
|
|||
subseq_index=cand.subseq_index + 1,
|
||||
l_dist=cand.l_dist + 1,
|
||||
))
|
||||
else:
|
||||
# cand.subseq_index == _subseq_len - 1
|
||||
if (
|
||||
cand.n_subs < max_substitutions or
|
||||
(
|
||||
cand.n_dels < max_deletions and
|
||||
cand.n_ins < max_insertions
|
||||
)
|
||||
):
|
||||
yield Match(cand.start, index + 1, cand.l_dist + 1)
|
||||
|
||||
# try skipping subsequence chars
|
||||
for n_skipped in xrange(1, min(max_deletions - cand.n_dels, max_l_dist - cand.l_dist) + 1):
|
||||
|
|
|
@ -3,6 +3,7 @@ from tests.test_levenshtein import TestFindNearMatchesLevenshteinBase
|
|||
from fuzzysearch.common import Match, get_best_match_in_group, group_matches
|
||||
from fuzzysearch.generic_search import \
|
||||
find_near_matches_generic_linear_programming as fnm_generic_lp
|
||||
from tests.test_substitutions_only import TestSubstitionsOnlyBase
|
||||
|
||||
|
||||
class TestGenericSearchAsLevenshtein(TestFindNearMatchesLevenshteinBase,
|
||||
|
@ -17,6 +18,14 @@ class TestGenericSearchAsLevenshtein(TestFindNearMatchesLevenshteinBase,
|
|||
]
|
||||
|
||||
|
||||
class TestGenericSearchAsSubstitutionsOnly(TestSubstitionsOnlyBase,
|
||||
unittest.TestCase):
|
||||
def search(self, subsequence, sequence, max_subs):
|
||||
return list(
|
||||
fnm_generic_lp(subsequence, sequence, max_subs, 0, 0, max_subs)
|
||||
)
|
||||
|
||||
|
||||
class TestGenericSearch(unittest.TestCase):
|
||||
def search(self, pattern, sequence, max_subs, max_ins, max_dels,
|
||||
max_l_dist=None):
|
||||
|
|
|
@ -198,6 +198,11 @@ class TestSubstitionsOnlyBase(object):
|
|||
Match(start=99, end=109, dist=0)],
|
||||
)
|
||||
|
||||
def test_missing_at_beginning(self):
|
||||
self.assertEqual(
|
||||
self.search("ATTEST","TESTOSTERONE", max_subs=2),
|
||||
[],
|
||||
)
|
||||
|
||||
class TestFindNearMatchesSubstitionsLinearProgramming(TestSubstitionsOnlyBase, unittest.TestCase):
|
||||
def search(self, subsequence, sequence, max_subs):
|
||||
|
|
Loading…
Reference in New Issue