fuzzysearch/tests/test_common.py

176 lines
5.6 KiB
Python

from fuzzysearch.common import Match, group_matches, GroupOfMatches, \
search_exact, _count_differences_with_maximum
from tests.compat import unittest
from six import b, u
class TestGroupOfMatches(unittest.TestCase):
def test_is_match_in_group(self):
match = Match(2, 4, 0)
group = GroupOfMatches(match)
self.assertTrue(group.is_match_in_group(match))
self.assertTrue(group.is_match_in_group(Match(2, 4, 0)))
class TestGroupMatches(unittest.TestCase):
def test_separate(self):
matches = [
Match(start=19, end=29, dist=1),
Match(start=42, end=52, dist=1),
Match(start=99, end=109, dist=0),
]
self.assertEqual(
group_matches(matches),
[set([m]) for m in matches],
)
def test_separate_with_duplicate(self):
matches = [
Match(start=19, end=29, dist=1),
Match(start=42, end=52, dist=1),
Match(start=99, end=109, dist=0),
]
self.assertEqual(
group_matches(matches + [matches[1]]),
[set([m]) for m in matches],
)
class TestSearchExactBase(object):
def search(self, sequence, subsequence):
raise NotImplementedError
def test_empty_sequence(self):
self.assertEqual(self.search('PATTERN', ''), [])
def test_empty_subsequence(self):
with self.assertRaises(ValueError):
self.search('', 'TEXT')
def test_match_identical_sequence(self):
self.assertEqual(self.search('PATTERN', 'PATTERN'), [0])
def test_substring(self):
substring = 'PATTERN'
text = 'aaaaaaaaaaPATTERNaaaaaaaaa'
self.assertEqual(self.search(substring, text), [10])
def test_double_first_item(self):
self.assertEqual(self.search('def', 'abcddefg'), [4])
def test_missing_second_item(self):
self.assertEqual(self.search('bde', 'abcdefg'), [])
def test_completely_different(self):
self.assertEqual(self.search('abc', 'def'), [])
def test_startswith(self):
self.assertEqual(self.search('abc', 'abcd'), [0])
def test_endswith(self):
self.assertEqual(self.search('bcd', 'abcd'), [1])
class TestSearchExact(TestSearchExactBase, unittest.TestCase):
def search(self, sequence, subsequence):
return list(search_exact(sequence, subsequence))
def test_bytes(self):
text = b('abc')
self.assertEqual(self.search(text, text), [0])
def test_unicode_identical(self):
text = u('abc')
self.assertEqual(self.search(text, text), [0])
def test_unicode_substring(self):
pattern = u('\u03A3\u0393')
text = u('\u03A0\u03A3\u0393\u0394')
self.assertEqual(self.search(pattern, text), [1])
def test_biopython_Seq(self):
try:
from Bio.Seq import Seq
except ImportError:
raise unittest.SkipTest('Test requires BioPython')
else:
self.assertEqual(self.search(Seq('abc'), Seq('abc')), [0])
class TestCountDifferencesWithMaximumBase(object):
def count_diffs(self, seq1, seq2, max_diffs):
raise NotImplementedError
def test_empty(self):
result = self.count_diffs(b'', b'', 1)
self.assertEqual(result, 0)
def test_identical_one_character(self):
result = self.count_diffs(b'a', b'a', 1)
self.assertEqual(result, 0)
def test_identical_word(self):
result = self.count_diffs(b'word', b'word', 1)
self.assertEqual(result, 0)
def test_identical_long(self):
result = self.count_diffs(b'long'*10, b'long'*10, 1)
self.assertEqual(result, 0)
def test_different_less_than_max(self):
result = self.count_diffs(b'abc', b'def', 4)
self.assertEqual(result, 3)
def test_different_more_than_max(self):
result = self.count_diffs(b'abc', b'def', 2)
self.assertEqual(result, 2)
def test_partially_different_in_middle(self):
result = self.count_diffs(b'abcdef', b'a--d-f', 4)
self.assertEqual(result, 3)
result = self.count_diffs(b'abcdef', b'a--d-f', 2)
self.assertEqual(result, 2)
def test_partially_different_at_start(self):
result = self.count_diffs(b'abcdef', b'--c-ef', 4)
self.assertEqual(result, 3)
result = self.count_diffs(b'abcdef', b'--c-ef', 2)
self.assertEqual(result, 2)
def test_partially_different_at_end(self):
result = self.count_diffs(b'abcdef', b'ab-d--', 4)
self.assertEqual(result, 3)
result = self.count_diffs(b'abcdef', b'ab-d--', 2)
self.assertEqual(result, 2)
class TestCountDifferencesWithMaximum(TestCountDifferencesWithMaximumBase,
unittest.TestCase):
def count_diffs(self, seq1, seq2, max_diffs):
return _count_differences_with_maximum(seq1, seq2, max_diffs)
try:
from fuzzysearch._common import count_differences_with_maximum_byteslike, \
search_exact_byteslike
except ImportError:
pass
else:
class TestCountDifferencesWithMaximumByteslike(
TestCountDifferencesWithMaximumBase, unittest.TestCase):
def count_diffs(self, seq1, seq2, max_diffs):
return count_differences_with_maximum_byteslike(seq1, seq2,
max_diffs)
class TestSearchExactByteslike(TestSearchExactBase, unittest.TestCase):
def search(self, sequence, subsequence):
return search_exact_byteslike(sequence, subsequence)
def test_bytes(self):
text = b('abc')
self.assertEqual(self.search(text, text), [0])