2014-03-28 07:49:21 +00:00
|
|
|
from tests.compat import unittest
|
|
|
|
from tests.test_levenshtein import TestFindNearMatchesLevenshteinBase
|
|
|
|
from fuzzysearch.common import Match, get_best_match_in_group, group_matches
|
|
|
|
from tests.test_substitutions_only import TestSubstitionsOnlyBase
|
2014-05-06 17:51:04 +00:00
|
|
|
from tests.test_generic_search import TestGenericSearchBase
|
2014-03-28 07:49:21 +00:00
|
|
|
|
2014-04-18 22:31:32 +00:00
|
|
|
try:
|
|
|
|
from fuzzysearch._generic_search import \
|
2014-04-24 16:37:59 +00:00
|
|
|
c_find_near_matches_generic_linear_programming as c_fnm_generic_lp, \
|
|
|
|
c_find_near_matches_generic_ngrams as c_fnm_generic_ngrams
|
2014-04-18 22:31:32 +00:00
|
|
|
except ImportError:
|
|
|
|
pass
|
|
|
|
else:
|
2014-04-24 16:37:59 +00:00
|
|
|
class TestGenericSearchLpAsLevenshtein(TestFindNearMatchesLevenshteinBase,
|
2014-05-06 17:51:04 +00:00
|
|
|
unittest.TestCase):
|
2014-04-18 22:31:32 +00:00
|
|
|
def search(self, subsequence, sequence, max_l_dist):
|
|
|
|
return [
|
|
|
|
get_best_match_in_group(group)
|
|
|
|
for group in group_matches(
|
2014-04-20 18:40:15 +00:00
|
|
|
c_fnm_generic_lp(subsequence.encode('ascii'),
|
|
|
|
sequence.encode('ascii'),
|
2014-04-24 16:37:59 +00:00
|
|
|
max_l_dist, max_l_dist,
|
|
|
|
max_l_dist, max_l_dist)
|
2014-04-18 22:31:32 +00:00
|
|
|
)
|
|
|
|
]
|
|
|
|
|
2014-04-24 16:37:59 +00:00
|
|
|
class TestGenericSearchNgramsAsLevenshtein(
|
|
|
|
TestFindNearMatchesLevenshteinBase, unittest.TestCase):
|
|
|
|
def search(self, subsequence, sequence, max_l_dist):
|
|
|
|
return [
|
|
|
|
get_best_match_in_group(group)
|
|
|
|
for group in group_matches(
|
|
|
|
c_fnm_generic_ngrams(subsequence.encode('ascii'),
|
|
|
|
sequence.encode('ascii'),
|
|
|
|
max_l_dist, max_l_dist,
|
|
|
|
max_l_dist, max_l_dist)
|
|
|
|
)
|
|
|
|
]
|
2014-04-18 22:31:32 +00:00
|
|
|
|
2014-04-24 16:37:59 +00:00
|
|
|
|
|
|
|
class TestGenericSearchLpAsSubstitutionsOnly(TestSubstitionsOnlyBase,
|
|
|
|
unittest.TestCase):
|
2014-04-18 22:31:32 +00:00
|
|
|
def search(self, subsequence, sequence, max_subs):
|
|
|
|
return list(
|
2014-04-20 18:40:15 +00:00
|
|
|
c_fnm_generic_lp(subsequence.encode('ascii'),
|
|
|
|
sequence.encode('ascii'),
|
|
|
|
max_subs, 0, 0, max_subs)
|
2014-04-18 22:31:32 +00:00
|
|
|
)
|
2014-03-28 07:49:21 +00:00
|
|
|
|
|
|
|
|
2014-04-24 16:37:59 +00:00
|
|
|
class TestGenericSearchNgramsAsSubstitutionsOnly(TestSubstitionsOnlyBase,
|
|
|
|
unittest.TestCase):
|
|
|
|
def search(self, subsequence, sequence, max_subs):
|
|
|
|
return [
|
|
|
|
get_best_match_in_group(group)
|
|
|
|
for group in group_matches(
|
|
|
|
c_fnm_generic_ngrams(subsequence.encode('ascii'),
|
|
|
|
sequence.encode('ascii'),
|
|
|
|
max_subs, 0, 0, max_subs)
|
|
|
|
)
|
|
|
|
]
|
|
|
|
|
|
|
|
@unittest.skip("Ngrams search doesn't return overlapping matches")
|
|
|
|
def test_double_first_item(self):
|
|
|
|
return super(TestGenericSearchNgramsAsSubstitutionsOnly,
|
|
|
|
self).test_double_first_item()
|
|
|
|
|
2014-04-18 22:31:32 +00:00
|
|
|
|
2014-04-24 16:37:59 +00:00
|
|
|
class TestGenericSearchLp(TestGenericSearchBase, unittest.TestCase):
|
|
|
|
def search(self, pattern, sequence, max_subs, max_ins, max_dels,
|
|
|
|
max_l_dist=None):
|
|
|
|
return list(c_fnm_generic_lp(pattern.encode('ascii'),
|
|
|
|
sequence.encode('ascii'),
|
|
|
|
max_subs, max_ins,
|
|
|
|
max_dels, max_l_dist))
|
|
|
|
|
2014-05-06 21:20:59 +00:00
|
|
|
def expectedOutcomes(self, search_result, expected_outcomes,
|
|
|
|
*args, **kw):
|
|
|
|
self.assertEqual(search_result, expected_outcomes, *args, **kw)
|
2014-05-06 17:51:04 +00:00
|
|
|
|
2014-04-24 16:37:59 +00:00
|
|
|
def test_double_first_item_two_results(self):
|
|
|
|
# sequence = 'abcdefg'
|
|
|
|
# pattern = 'bde'
|
2014-05-06 19:19:56 +00:00
|
|
|
self.assertEqual(
|
2014-04-24 16:37:59 +00:00
|
|
|
self.search('def', 'abcddefg', 0, 1, 0),
|
|
|
|
[Match(start=3, end=7, dist=1), Match(start=4, end=7, dist=0)],
|
|
|
|
)
|
|
|
|
|
|
|
|
def test_missing_second_item_complex(self):
|
2014-05-06 19:19:56 +00:00
|
|
|
self.assertEqual(
|
2014-04-24 16:37:59 +00:00
|
|
|
self.search('bde', 'abcdefg', 1, 1, 1, 1),
|
|
|
|
[Match(start=1, end=5, dist=1),
|
|
|
|
Match(start=2, end=5, dist=1),
|
|
|
|
Match(start=3, end=5, dist=1)],
|
|
|
|
)
|
|
|
|
|
|
|
|
self.assertTrue(
|
|
|
|
set([
|
|
|
|
Match(start=1, end=5, dist=1),
|
|
|
|
Match(start=2, end=5, dist=1),
|
|
|
|
Match(start=3, end=5, dist=1),
|
|
|
|
Match(start=2, end=5, dist=3),
|
|
|
|
]).issubset(set(
|
|
|
|
self.search('bde', 'abcdefg', 1, 1, 1, 3),
|
|
|
|
))
|
|
|
|
)
|
|
|
|
|
|
|
|
class TestGenericSearchNgrams(TestGenericSearchBase, unittest.TestCase):
|
|
|
|
def search(self, pattern, sequence, max_subs, max_ins, max_dels,
|
|
|
|
max_l_dist=None):
|
|
|
|
return [
|
|
|
|
get_best_match_in_group(group)
|
|
|
|
for group in group_matches(
|
|
|
|
c_fnm_generic_ngrams(pattern.encode('ascii'),
|
|
|
|
sequence.encode('ascii'),
|
|
|
|
max_subs, max_ins,
|
|
|
|
max_dels, max_l_dist)
|
|
|
|
)
|
|
|
|
]
|
|
|
|
|
2014-05-06 21:20:59 +00:00
|
|
|
def expectedOutcomes(self, search_result, expected_outcomes,
|
|
|
|
*args, **kw):
|
2014-05-06 17:51:04 +00:00
|
|
|
best_from_groups = [
|
|
|
|
get_best_match_in_group(group)
|
|
|
|
for group in group_matches(search_result)
|
|
|
|
]
|
2014-05-06 21:20:59 +00:00
|
|
|
self.assertEqual(search_result, best_from_groups, *args, **kw)
|
2014-05-06 17:51:04 +00:00
|
|
|
|
2014-04-24 16:37:59 +00:00
|
|
|
def test_missing_second_item_complex(self):
|
|
|
|
self.assertTrue(
|
|
|
|
set(self.search('bde', 'abcdefg', 1, 1, 1, 1)).issubset([
|
|
|
|
Match(start=1, end=5, dist=1),
|
|
|
|
Match(start=2, end=5, dist=1),
|
|
|
|
Match(start=3, end=5, dist=1),
|
|
|
|
])
|
|
|
|
)
|