mirror of https://github.com/Yomguithereal/fog.git
39 lines
1.3 KiB
Python
39 lines
1.3 KiB
Python
# =============================================================================
|
|
# Fog Ngrams Unit Tests
|
|
# =============================================================================
|
|
import math
|
|
from fog.tokenizers import ngrams, bigrams, trigrams, quadrigrams
|
|
|
|
ALIASES = [None, bigrams, trigrams, quadrigrams]
|
|
|
|
STRING = 'Bonjour'
|
|
|
|
STRING_TESTS = [
|
|
('B', 'o', 'n', 'j', 'o', 'u', 'r'),
|
|
('Bo', 'on', 'nj', 'jo', 'ou', 'ur'),
|
|
('Bon', 'onj', 'njo', 'jou', 'our'),
|
|
('Bonj', 'onjo', 'njou', 'jour')
|
|
]
|
|
|
|
SENTENCE = tuple('the cat eats the mouse'.split(' '))
|
|
|
|
SENTENCE_TEST = [
|
|
(('the',), ('cat',), ('eats',), ('the',), ('mouse',)),
|
|
(('the', 'cat'), ('cat', 'eats'), ('eats', 'the'), ('the', 'mouse')),
|
|
(('the', 'cat', 'eats'), ('cat', 'eats', 'the'), ('eats', 'the', 'mouse')),
|
|
(('the', 'cat', 'eats', 'the'), ('cat', 'eats', 'the', 'mouse'))
|
|
]
|
|
|
|
|
|
class TestNgrams(object):
|
|
def test_basics(self):
|
|
for i in range(4):
|
|
assert tuple(ngrams(i + 1, STRING)) == STRING_TESTS[i]
|
|
assert tuple(ngrams(i + 1, SENTENCE)) == SENTENCE_TEST[i]
|
|
|
|
alias = ALIASES[i]
|
|
|
|
if alias is not None:
|
|
assert tuple(alias(STRING)) == STRING_TESTS[i]
|
|
assert tuple(alias(SENTENCE)) == SENTENCE_TEST[i]
|