diff --git a/tests/test_span.py b/tests/test_span.py new file mode 100644 index 000000000..cc9e59018 --- /dev/null +++ b/tests/test_span.py @@ -0,0 +1,23 @@ +from __future__ import unicode_literals + +from spacy.en import English +from spacy.spans import Spans + +import pytest +import re + + +EN = English() + + +@pytest.fixture +def doc(): + return EN('This is a sentence. This is another sentence. And a third.') + + +def test_sent_spans(doc): + sents = list(doc.sents) + assert sents[0].start == 0 + assert sents[0].end == 5 + assert len(sents) == 3 + assert sum(len(sent) for sent in sents) == len(doc) diff --git a/tests/test_times.py b/tests/test_times.py new file mode 100644 index 000000000..b45fb4f7c --- /dev/null +++ b/tests/test_times.py @@ -0,0 +1,23 @@ +from __future__ import unicode_literals + +from spacy.en import English + +import pytest + +NLU = English() + +def test_am_pm(): + numbers = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12'] + variants = ['a.m.', 'am', 'p.m.', 'pm'] + spaces = ['', ' '] + for num in numbers: + for var in variants: + for space in spaces: + string = u"The meeting was at %s%s%s wasn't it?" % (num, space, var) + tokens = NLU(string) + assert tokens[4].orth_ == '%s%s%s' % (num, space, var) + ents = list(tokens.ents) + assert len(ents) == 1 + assert ents[0].label_ == 'TIME', string + if ents[0].start == 4 and ents[0].end == 5: + assert ents[0].orth_ == '%s%s%s' % (num, space, var)