fuzzysearch/tests/test_common.py

157 lines
5.0 KiB
Python
Raw Normal View History

from fuzzysearch.common import Match, group_matches, GroupOfMatches, \
search_exact, _count_differences_with_maximum
from tests.compat import unittest
from six import b, u
class TestGroupOfMatches(unittest.TestCase):
def test_is_match_in_group(self):
match = Match(2, 4, 0)
group = GroupOfMatches(match)
self.assertTrue(group.is_match_in_group(match))
self.assertTrue(group.is_match_in_group(Match(2, 4, 0)))
class TestGroupMatches(unittest.TestCase):
def test_separate(self):
matches = [
Match(start=19, end=29, dist=1),
Match(start=42, end=52, dist=1),
Match(start=99, end=109, dist=0),
]
self.assertListEqual(
group_matches(matches),
[set([m]) for m in matches],
)
def test_separate_with_duplicate(self):
matches = [
Match(start=19, end=29, dist=1),
Match(start=42, end=52, dist=1),
Match(start=99, end=109, dist=0),
]
self.assertListEqual(
group_matches(matches + [matches[1]]),
[set([m]) for m in matches],
)
class TestSearchExact(unittest.TestCase):
def search(self, sequence, subsequence):
return list(search_exact(sequence, subsequence))
def test_bytes(self):
text = b('abc')
self.assertEqual(self.search(text, text), [0])
def test_unicode(self):
text = u('abc')
self.assertEqual(self.search(text, text), [0])
def test_biopython_Seq(self):
try:
from Bio.Seq import Seq
except ImportError:
raise unittest.SkipTest('Test requires BioPython')
else:
self.assertEqual(self.search(Seq('abc'), Seq('abc')), [0])
def test_empty_sequence(self):
self.assertEqual(self.search('PATTERN', ''), [])
def test_empty_subsequence(self):
with self.assertRaises(ValueError):
self.search('', 'TEXT')
def test_match_identical_sequence(self):
self.assertEqual(self.search('PATTERN', 'PATTERN'), [0])
def test_substring(self):
substring = 'PATTERN'
text = 'aaaaaaaaaaPATTERNaaaaaaaaa'
self.assertEqual(self.search(substring, text), [10])
def test_double_first_item(self):
self.assertEqual(self.search('def', 'abcddefg'), [4])
def test_missing_second_item(self):
self.assertEqual(self.search('bde', 'abcdefg'), [])
def test_completely_different(self):
self.assertEqual(self.search('abc', 'def'), [])
def test_startswith(self):
self.assertEqual(self.search('abc', 'abcd'), [0])
def test_endswith(self):
self.assertEqual(self.search('bcd', 'abcd'), [1])
class TestCountDifferencesWithMaximumBase(object):
def count_diffs(self, seq1, seq2, max_diffs):
raise NotImplementedError
def test_empty(self):
result = self.count_diffs(b'', b'', 1)
self.assertEqual(result, 0)
def test_identical_one_character(self):
result = self.count_diffs(b'a', b'a', 1)
self.assertEqual(result, 0)
def test_identical_word(self):
result = self.count_diffs(b'word', b'word', 1)
self.assertEqual(result, 0)
def test_identical_long(self):
result = self.count_diffs(b'long'*10, b'long'*10, 1)
self.assertEqual(result, 0)
def test_different_less_than_max(self):
result = self.count_diffs(b'abc', b'def', 4)
self.assertEqual(result, 3)
def test_different_more_than_max(self):
result = self.count_diffs(b'abc', b'def', 2)
self.assertEqual(result, 2)
def test_partially_different_in_middle(self):
result = self.count_diffs(b'abcdef', b'a--d-f', 4)
self.assertEqual(result, 3)
result = self.count_diffs(b'abcdef', b'a--d-f', 2)
self.assertEqual(result, 2)
def test_partially_different_at_start(self):
result = self.count_diffs(b'abcdef', b'--c-ef', 4)
self.assertEqual(result, 3)
result = self.count_diffs(b'abcdef', b'--c-ef', 2)
self.assertEqual(result, 2)
def test_partially_different_at_end(self):
result = self.count_diffs(b'abcdef', b'ab-d--', 4)
self.assertEqual(result, 3)
result = self.count_diffs(b'abcdef', b'ab-d--', 2)
self.assertEqual(result, 2)
class TestCountDifferencesWithMaximum(TestCountDifferencesWithMaximumBase,
unittest.TestCase):
def count_diffs(self, seq1, seq2, max_diffs):
return _count_differences_with_maximum(seq1, seq2, max_diffs)
try:
from fuzzysearch._common import count_differences_with_maximum_byteslike
except ImportError:
pass
else:
class TestCountDifferencesWithMaximum(TestCountDifferencesWithMaximumBase,
unittest.TestCase):
def count_diffs(self, seq1, seq2, max_diffs):
return count_differences_with_maximum_byteslike(seq1, seq2,
max_diffs)