Auto-format

This commit is contained in:
Ines Montani 2019-02-06 13:32:18 +01:00
parent f25bd9f5e4
commit e51a238b3f
1 changed files with 30 additions and 28 deletions

View File

@ -181,74 +181,76 @@ def test_matcher_any_token_operator(en_vocab):
def test_matcher_extension_attribute(en_vocab):
matcher = Matcher(en_vocab)
Token.set_extension('is_fruit',
getter=lambda token: token.text in ('apple', 'banana'), force=True)
pattern = [{'ORTH': 'an'}, {'_': {'is_fruit': True}}]
matcher.add('HAVING_FRUIT', None, pattern)
doc = Doc(en_vocab, words=['an', 'apple'])
get_is_fruit = lambda token: token.text in ("apple", "banana")
Token.set_extension("is_fruit", getter=get_is_fruit, force=True)
pattern = [{"ORTH": "an"}, {"_": {"is_fruit": True}}]
matcher.add("HAVING_FRUIT", None, pattern)
doc = Doc(en_vocab, words=["an", "apple"])
matches = matcher(doc)
assert len(matches) == 1
doc = Doc(en_vocab, words=['an', 'aardvark'])
doc = Doc(en_vocab, words=["an", "aardvark"])
matches = matcher(doc)
assert len(matches) == 0
def test_matcher_set_value(en_vocab):
matcher = Matcher(en_vocab)
pattern = [{'ORTH': {'IN': ['an', 'a']}}]
matcher.add('A_OR_AN', None, pattern)
doc = Doc(en_vocab, words=['an', 'a', 'apple'])
pattern = [{"ORTH": {"IN": ["an", "a"]}}]
matcher.add("A_OR_AN", None, pattern)
doc = Doc(en_vocab, words=["an", "a", "apple"])
matches = matcher(doc)
assert len(matches) == 2
doc = Doc(en_vocab, words=['aardvark'])
doc = Doc(en_vocab, words=["aardvark"])
matches = matcher(doc)
assert len(matches) == 0
def test_matcher_regex(en_vocab):
matcher = Matcher(en_vocab)
pattern = [{'ORTH': {'REGEX': r'(?:a|an)'}}]
matcher.add('A_OR_AN', None, pattern)
doc = Doc(en_vocab, words=['an', 'a', 'hi'])
pattern = [{"ORTH": {"REGEX": r"(?:a|an)"}}]
matcher.add("A_OR_AN", None, pattern)
doc = Doc(en_vocab, words=["an", "a", "hi"])
matches = matcher(doc)
assert len(matches) == 2
doc = Doc(en_vocab, words=['bye'])
doc = Doc(en_vocab, words=["bye"])
matches = matcher(doc)
assert len(matches) == 0
def test_matcher_regex_shape(en_vocab):
matcher = Matcher(en_vocab)
pattern = [{'SHAPE': {'REGEX': r'^[^x]+$'}}]
matcher.add('NON_ALPHA', None, pattern)
doc = Doc(en_vocab, words=['99', 'problems', '!'])
pattern = [{"SHAPE": {"REGEX": r"^[^x]+$"}}]
matcher.add("NON_ALPHA", None, pattern)
doc = Doc(en_vocab, words=["99", "problems", "!"])
matches = matcher(doc)
assert len(matches) == 2
doc = Doc(en_vocab, words=['bye'])
doc = Doc(en_vocab, words=["bye"])
matches = matcher(doc)
assert len(matches) == 0
def test_matcher_compare_length(en_vocab):
matcher = Matcher(en_vocab)
pattern = [{'LENGTH': {'>=': 2}}]
matcher.add('LENGTH_COMPARE', None, pattern)
doc = Doc(en_vocab, words=['a', 'aa', 'aaa'])
pattern = [{"LENGTH": {">=": 2}}]
matcher.add("LENGTH_COMPARE", None, pattern)
doc = Doc(en_vocab, words=["a", "aa", "aaa"])
matches = matcher(doc)
assert len(matches) == 2
doc = Doc(en_vocab, words=['a'])
doc = Doc(en_vocab, words=["a"])
matches = matcher(doc)
assert len(matches) == 0
def test_matcher_extension_set_membership(en_vocab):
matcher = Matcher(en_vocab)
Token.set_extension('reversed',
getter=lambda token: ''.join(reversed(token.text)), force=True)
pattern = [{'_': {'reversed': {"IN": ["eyb", "ih"]}}}]
matcher.add('REVERSED', None, pattern)
doc = Doc(en_vocab, words=['hi', 'bye', 'hello'])
get_reversed = lambda token: "".join(reversed(token.text))
Token.set_extension("reversed", getter=get_reversed, force=True)
pattern = [{"_": {"reversed": {"IN": ["eyb", "ih"]}}}]
matcher.add("REVERSED", None, pattern)
doc = Doc(en_vocab, words=["hi", "bye", "hello"])
matches = matcher(doc)
assert len(matches) == 2
doc = Doc(en_vocab, words=['aardvark'])
doc = Doc(en_vocab, words=["aardvark"])
matches = matcher(doc)
assert len(matches) == 0