fuzzysearch/tests/test_search_exact.py

172 lines
8.0 KiB
Python

from fuzzysearch.compat import text_type
from fuzzysearch.search_exact import search_exact
from tests.compat import b, u, unittest
class TestSearchExactBase(object):
def search(self, subsequence, sequence, start_index=0, end_index=None):
raise NotImplementedError
def test_empty_sequence(self):
self.assertEqual(self.search('PATTERN', ''), [])
def test_empty_subsequence(self):
with self.assertRaises(ValueError):
self.search('', 'TEXT')
def test_match_identical_sequence(self):
self.assertEqual(self.search('PATTERN', 'PATTERN'), [0])
def test_substring(self):
substring = 'PATTERN'
text = 'aaaaaaaaaaPATTERNaaaaaaaaa'
self.assertEqual(self.search(substring, text), [10])
def test_double_first_item(self):
self.assertEqual(self.search('def', 'abcddefg'), [4])
def test_missing_second_item(self):
self.assertEqual(self.search('bde', 'abcdefg'), [])
def test_completely_different(self):
self.assertEqual(self.search('abc', 'def'), [])
def test_startswith(self):
self.assertEqual(self.search('abc', 'abcd'), [0])
def test_endswith(self):
self.assertEqual(self.search('bcd', 'abcd'), [1])
@classmethod
def get_supported_sequence_types(cls):
raise NotImplementedError
def test_identical(self):
# search for a pattern in itself, should match once at index 0
for initializer in self.get_supported_sequence_types():
pattern = initializer('abc')
with self.subTest("search_exact({0!r}, {0!r})".format(pattern)):
self.assertEqual(self.search(pattern, pattern), [0])
def test_subsequence(self):
# search for a pattern appearing once at index 4
for initializer in self.get_supported_sequence_types():
pattern = initializer('abc')
sequence = initializer('-ab-abc-ab-')
with self.subTest("search_exact({0!r}, {1!r})".format(pattern, sequence)):
self.assertEqual(self.search(pattern, sequence), [4])
def test_multiple_matches(self):
# search for a pattern appearing at indexes 1, 5 and 9
for initializer in self.get_supported_sequence_types():
pattern = initializer('abc')
sequence = initializer('-abc-abc-abc-')
with self.subTest("search_exact({0!r}, {1!r})".format(pattern, sequence)):
self.assertEqual(self.search(pattern, sequence), [1, 5, 9])
def test_outside_range_limits(self):
for initializer in self.get_supported_sequence_types():
pattern = initializer('abc')
sequence = initializer('-abc-abc-abc')
with self.subTest("search_exact({0!r}, {1!r}, {2}, {3})".format(pattern, sequence, 0, 3)):
self.assertEqual(self.search(pattern, sequence, 0, 3), [])
with self.subTest("search_exact({0!r}, {1!r}, {2}, {3})".format(pattern, sequence, 0, 4)):
self.assertEqual(self.search(pattern, sequence, 0, 4), [1])
with self.subTest("search_exact({0!r}, {1!r}, {2}, {3})".format(pattern, sequence, 0, 7)):
self.assertEqual(self.search(pattern, sequence, 0, 7), [1])
with self.subTest("search_exact({0!r}, {1!r}, {2}, {3})".format(pattern, sequence, 0, 8)):
self.assertEqual(self.search(pattern, sequence, 0, 8), [1, 5])
with self.subTest("search_exact({0!r}, {1!r}, {2}, {3})".format(pattern, sequence, 0, 11)):
self.assertEqual(self.search(pattern, sequence, 0, 11), [1, 5])
with self.subTest("search_exact({0!r}, {1!r}, {2}, {3})".format(pattern, sequence, 0, 12)):
self.assertEqual(self.search(pattern, sequence, 0, 12), [1, 5, 9])
with self.subTest("search_exact({0!r}, {1!r}, {2})".format(pattern, sequence, 1)):
self.assertEqual(self.search(pattern, sequence, 1), [1, 5, 9])
with self.subTest("search_exact({0!r}, {1!r}, {2})".format(pattern, sequence, 2)):
self.assertEqual(self.search(pattern, sequence, 2), [5, 9])
with self.subTest("search_exact({0!r}, {1!r}, {2})".format(pattern, sequence, 5)):
self.assertEqual(self.search(pattern, sequence, 5), [5, 9])
with self.subTest("search_exact({0!r}, {1!r}, {2})".format(pattern, sequence, 6)):
self.assertEqual(self.search(pattern, sequence, 6), [9])
with self.subTest("search_exact({0!r}, {1!r}, {2}, {3})".format(pattern, sequence, 4, 10)):
self.assertEqual(self.search(pattern, sequence, 4, 10), [5])
with self.subTest("search_exact({0!r}, {1!r}, {2}, {3})".format(pattern, sequence, 3, 7)):
self.assertEqual(self.search(pattern, sequence, 3, 7), [])
class TestSearchExact(TestSearchExactBase, unittest.TestCase):
def search(self, subsequence, sequence, start_index=0, end_index=None):
return list(search_exact(subsequence, sequence, start_index, end_index))
@classmethod
def get_supported_sequence_types(cls):
types_to_test = [b, u, list, tuple]
try:
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
except ImportError:
pass
else:
types_to_test.append(Seq)
types_to_test.append(
lambda text: Seq(text.replace('b', 'g').replace('-', 't'),
alphabet=IUPAC.unambiguous_dna))
return types_to_test
def test_unicode_subsequence(self):
self.assertEqual(self.search(u('\u03A3\u0393'), u('\u03A0\u03A3\u0393\u0394')), [1])
try:
from fuzzysearch._common import search_exact_byteslike
except ImportError:
pass
else:
class TestSearchExactByteslike(TestSearchExactBase, unittest.TestCase):
def search(self, subsequence, sequence, start_index=0, end_index=None):
if isinstance(subsequence, text_type):
try:
subsequence = subsequence.encode('ascii')
except UnicodeEncodeError:
raise self.skipTest("skipping test with non-ascii-encodable string for byteslike function")
if isinstance(sequence, text_type):
try:
sequence = sequence.encode('ascii')
except UnicodeEncodeError:
raise self.skipTest("skipping test with non-ascii-encodable string for byteslike function")
if end_index is not None:
return search_exact_byteslike(subsequence, sequence, start_index, end_index)
else:
return search_exact_byteslike(subsequence, sequence, start_index)
@classmethod
def get_supported_sequence_types(cls):
types_to_test = [b]
return types_to_test
def test_input_argument_handling(self):
self.assertEqual(search_exact_byteslike(b'abc', b'abc'), [0])
self.assertEqual(search_exact_byteslike(b'abc', b'abc', 0), [0])
self.assertEqual(search_exact_byteslike(b'abc', b'abc', 1), [])
self.assertEqual(search_exact_byteslike(b'abc', b'abc', 0, 3), [0])
self.assertEqual(search_exact_byteslike(b'abc', b'abc', 0, end_index=3), [0])
self.assertEqual(search_exact_byteslike(b'abc', b'abc', end_index=3, start_index=0), [0])
self.assertEqual(search_exact_byteslike(subsequence=b'abc', sequence=b'abc',
start_index=0, end_index=3), [0])
self.assertEqual(search_exact_byteslike(b'abc', b'abc', 0, 4), [0])
self.assertEqual(search_exact_byteslike(b'abc', b'abc', 0, -1), [0])
self.assertEqual(search_exact_byteslike(b'abc', b'abc', 0, 2), [])
self.assertEqual(search_exact_byteslike(b'abc', b'abc', 2, 1), [])
with self.assertRaises(Exception):
search_exact_byteslike(b'abc', subsequence=b'abc')
with self.assertRaises(Exception):
search_exact_byteslike(b'abc', b'abc', 0, start_index=0)