172 lines
8.0 KiB
Python
172 lines
8.0 KiB
Python
from fuzzysearch.compat import text_type
|
|
from fuzzysearch.search_exact import search_exact
|
|
from tests.compat import b, u, unittest
|
|
|
|
|
|
class TestSearchExactBase(object):
|
|
def search(self, subsequence, sequence, start_index=0, end_index=None):
|
|
raise NotImplementedError
|
|
|
|
def test_empty_sequence(self):
|
|
self.assertEqual(self.search('PATTERN', ''), [])
|
|
|
|
def test_empty_subsequence(self):
|
|
with self.assertRaises(ValueError):
|
|
self.search('', 'TEXT')
|
|
|
|
def test_match_identical_sequence(self):
|
|
self.assertEqual(self.search('PATTERN', 'PATTERN'), [0])
|
|
|
|
def test_substring(self):
|
|
substring = 'PATTERN'
|
|
text = 'aaaaaaaaaaPATTERNaaaaaaaaa'
|
|
self.assertEqual(self.search(substring, text), [10])
|
|
|
|
def test_double_first_item(self):
|
|
self.assertEqual(self.search('def', 'abcddefg'), [4])
|
|
|
|
def test_missing_second_item(self):
|
|
self.assertEqual(self.search('bde', 'abcdefg'), [])
|
|
|
|
def test_completely_different(self):
|
|
self.assertEqual(self.search('abc', 'def'), [])
|
|
|
|
def test_startswith(self):
|
|
self.assertEqual(self.search('abc', 'abcd'), [0])
|
|
|
|
def test_endswith(self):
|
|
self.assertEqual(self.search('bcd', 'abcd'), [1])
|
|
|
|
@classmethod
|
|
def get_supported_sequence_types(cls):
|
|
raise NotImplementedError
|
|
|
|
def test_identical(self):
|
|
# search for a pattern in itself, should match once at index 0
|
|
for initializer in self.get_supported_sequence_types():
|
|
pattern = initializer('abc')
|
|
with self.subTest("search_exact({0!r}, {0!r})".format(pattern)):
|
|
self.assertEqual(self.search(pattern, pattern), [0])
|
|
|
|
def test_subsequence(self):
|
|
# search for a pattern appearing once at index 4
|
|
for initializer in self.get_supported_sequence_types():
|
|
pattern = initializer('abc')
|
|
sequence = initializer('-ab-abc-ab-')
|
|
with self.subTest("search_exact({0!r}, {1!r})".format(pattern, sequence)):
|
|
self.assertEqual(self.search(pattern, sequence), [4])
|
|
|
|
def test_multiple_matches(self):
|
|
# search for a pattern appearing at indexes 1, 5 and 9
|
|
for initializer in self.get_supported_sequence_types():
|
|
pattern = initializer('abc')
|
|
sequence = initializer('-abc-abc-abc-')
|
|
with self.subTest("search_exact({0!r}, {1!r})".format(pattern, sequence)):
|
|
self.assertEqual(self.search(pattern, sequence), [1, 5, 9])
|
|
|
|
def test_outside_range_limits(self):
|
|
for initializer in self.get_supported_sequence_types():
|
|
pattern = initializer('abc')
|
|
sequence = initializer('-abc-abc-abc')
|
|
with self.subTest("search_exact({0!r}, {1!r}, {2}, {3})".format(pattern, sequence, 0, 3)):
|
|
self.assertEqual(self.search(pattern, sequence, 0, 3), [])
|
|
with self.subTest("search_exact({0!r}, {1!r}, {2}, {3})".format(pattern, sequence, 0, 4)):
|
|
self.assertEqual(self.search(pattern, sequence, 0, 4), [1])
|
|
with self.subTest("search_exact({0!r}, {1!r}, {2}, {3})".format(pattern, sequence, 0, 7)):
|
|
self.assertEqual(self.search(pattern, sequence, 0, 7), [1])
|
|
with self.subTest("search_exact({0!r}, {1!r}, {2}, {3})".format(pattern, sequence, 0, 8)):
|
|
self.assertEqual(self.search(pattern, sequence, 0, 8), [1, 5])
|
|
with self.subTest("search_exact({0!r}, {1!r}, {2}, {3})".format(pattern, sequence, 0, 11)):
|
|
self.assertEqual(self.search(pattern, sequence, 0, 11), [1, 5])
|
|
with self.subTest("search_exact({0!r}, {1!r}, {2}, {3})".format(pattern, sequence, 0, 12)):
|
|
self.assertEqual(self.search(pattern, sequence, 0, 12), [1, 5, 9])
|
|
|
|
with self.subTest("search_exact({0!r}, {1!r}, {2})".format(pattern, sequence, 1)):
|
|
self.assertEqual(self.search(pattern, sequence, 1), [1, 5, 9])
|
|
with self.subTest("search_exact({0!r}, {1!r}, {2})".format(pattern, sequence, 2)):
|
|
self.assertEqual(self.search(pattern, sequence, 2), [5, 9])
|
|
with self.subTest("search_exact({0!r}, {1!r}, {2})".format(pattern, sequence, 5)):
|
|
self.assertEqual(self.search(pattern, sequence, 5), [5, 9])
|
|
with self.subTest("search_exact({0!r}, {1!r}, {2})".format(pattern, sequence, 6)):
|
|
self.assertEqual(self.search(pattern, sequence, 6), [9])
|
|
|
|
with self.subTest("search_exact({0!r}, {1!r}, {2}, {3})".format(pattern, sequence, 4, 10)):
|
|
self.assertEqual(self.search(pattern, sequence, 4, 10), [5])
|
|
|
|
with self.subTest("search_exact({0!r}, {1!r}, {2}, {3})".format(pattern, sequence, 3, 7)):
|
|
self.assertEqual(self.search(pattern, sequence, 3, 7), [])
|
|
|
|
|
|
class TestSearchExact(TestSearchExactBase, unittest.TestCase):
|
|
def search(self, subsequence, sequence, start_index=0, end_index=None):
|
|
return list(search_exact(subsequence, sequence, start_index, end_index))
|
|
|
|
@classmethod
|
|
def get_supported_sequence_types(cls):
|
|
types_to_test = [b, u, list, tuple]
|
|
|
|
try:
|
|
from Bio.Seq import Seq
|
|
from Bio.Alphabet import IUPAC
|
|
except ImportError:
|
|
pass
|
|
else:
|
|
types_to_test.append(Seq)
|
|
types_to_test.append(
|
|
lambda text: Seq(text.replace('b', 'g').replace('-', 't'),
|
|
alphabet=IUPAC.unambiguous_dna))
|
|
|
|
return types_to_test
|
|
|
|
def test_unicode_subsequence(self):
|
|
self.assertEqual(self.search(u('\u03A3\u0393'), u('\u03A0\u03A3\u0393\u0394')), [1])
|
|
|
|
|
|
try:
|
|
from fuzzysearch._common import search_exact_byteslike
|
|
except ImportError:
|
|
pass
|
|
else:
|
|
class TestSearchExactByteslike(TestSearchExactBase, unittest.TestCase):
|
|
def search(self, subsequence, sequence, start_index=0, end_index=None):
|
|
if isinstance(subsequence, text_type):
|
|
try:
|
|
subsequence = subsequence.encode('ascii')
|
|
except UnicodeEncodeError:
|
|
raise self.skipTest("skipping test with non-ascii-encodable string for byteslike function")
|
|
if isinstance(sequence, text_type):
|
|
try:
|
|
sequence = sequence.encode('ascii')
|
|
except UnicodeEncodeError:
|
|
raise self.skipTest("skipping test with non-ascii-encodable string for byteslike function")
|
|
|
|
if end_index is not None:
|
|
return search_exact_byteslike(subsequence, sequence, start_index, end_index)
|
|
else:
|
|
return search_exact_byteslike(subsequence, sequence, start_index)
|
|
|
|
@classmethod
|
|
def get_supported_sequence_types(cls):
|
|
types_to_test = [b]
|
|
return types_to_test
|
|
|
|
def test_input_argument_handling(self):
|
|
self.assertEqual(search_exact_byteslike(b'abc', b'abc'), [0])
|
|
self.assertEqual(search_exact_byteslike(b'abc', b'abc', 0), [0])
|
|
self.assertEqual(search_exact_byteslike(b'abc', b'abc', 1), [])
|
|
self.assertEqual(search_exact_byteslike(b'abc', b'abc', 0, 3), [0])
|
|
self.assertEqual(search_exact_byteslike(b'abc', b'abc', 0, end_index=3), [0])
|
|
self.assertEqual(search_exact_byteslike(b'abc', b'abc', end_index=3, start_index=0), [0])
|
|
self.assertEqual(search_exact_byteslike(subsequence=b'abc', sequence=b'abc',
|
|
start_index=0, end_index=3), [0])
|
|
self.assertEqual(search_exact_byteslike(b'abc', b'abc', 0, 4), [0])
|
|
self.assertEqual(search_exact_byteslike(b'abc', b'abc', 0, -1), [0])
|
|
self.assertEqual(search_exact_byteslike(b'abc', b'abc', 0, 2), [])
|
|
self.assertEqual(search_exact_byteslike(b'abc', b'abc', 2, 1), [])
|
|
|
|
with self.assertRaises(Exception):
|
|
search_exact_byteslike(b'abc', subsequence=b'abc')
|
|
|
|
with self.assertRaises(Exception):
|
|
search_exact_byteslike(b'abc', b'abc', 0, start_index=0)
|