From 39000bda807a1fe8a8dae6ecf0b1c0df780b8487 Mon Sep 17 00:00:00 2001 From: Tal Einat Date: Wed, 19 Mar 2014 23:46:34 +0200 Subject: [PATCH] bugfix: generic search wasn't finding matches ending with a sub --- fuzzysearch/generic_search.py | 10 ++++++++++ tests/test_generic_search.py | 9 +++++++++ tests/test_substitutions_only.py | 5 +++++ 3 files changed, 24 insertions(+) diff --git a/fuzzysearch/generic_search.py b/fuzzysearch/generic_search.py index ad359bb..14b74fd 100644 --- a/fuzzysearch/generic_search.py +++ b/fuzzysearch/generic_search.py @@ -83,6 +83,16 @@ def find_near_matches_generic_linear_programming(subsequence, sequence, subseq_index=cand.subseq_index + 1, l_dist=cand.l_dist + 1, )) + else: + # cand.subseq_index == _subseq_len - 1 + if ( + cand.n_subs < max_substitutions or + ( + cand.n_dels < max_deletions and + cand.n_ins < max_insertions + ) + ): + yield Match(cand.start, index + 1, cand.l_dist + 1) # try skipping subsequence chars for n_skipped in xrange(1, min(max_deletions - cand.n_dels, max_l_dist - cand.l_dist) + 1): diff --git a/tests/test_generic_search.py b/tests/test_generic_search.py index 1869f2a..74d768a 100644 --- a/tests/test_generic_search.py +++ b/tests/test_generic_search.py @@ -3,6 +3,7 @@ from tests.test_levenshtein import TestFindNearMatchesLevenshteinBase from fuzzysearch.common import Match, get_best_match_in_group, group_matches from fuzzysearch.generic_search import \ find_near_matches_generic_linear_programming as fnm_generic_lp +from tests.test_substitutions_only import TestSubstitionsOnlyBase class TestGenericSearchAsLevenshtein(TestFindNearMatchesLevenshteinBase, @@ -17,6 +18,14 @@ class TestGenericSearchAsLevenshtein(TestFindNearMatchesLevenshteinBase, ] +class TestGenericSearchAsSubstitutionsOnly(TestSubstitionsOnlyBase, + unittest.TestCase): + def search(self, subsequence, sequence, max_subs): + return list( + fnm_generic_lp(subsequence, sequence, max_subs, 0, 0, max_subs) + ) + + class TestGenericSearch(unittest.TestCase): def search(self, pattern, sequence, max_subs, max_ins, max_dels, max_l_dist=None): diff --git a/tests/test_substitutions_only.py b/tests/test_substitutions_only.py index 2163680..f59bc50 100644 --- a/tests/test_substitutions_only.py +++ b/tests/test_substitutions_only.py @@ -198,6 +198,11 @@ class TestSubstitionsOnlyBase(object): Match(start=99, end=109, dist=0)], ) + def test_missing_at_beginning(self): + self.assertEqual( + self.search("ATTEST","TESTOSTERONE", max_subs=2), + [], + ) class TestFindNearMatchesSubstitionsLinearProgramming(TestSubstitionsOnlyBase, unittest.TestCase): def search(self, subsequence, sequence, max_subs):