mirror of https://github.com/explosion/spaCy.git
Auto-format
This commit is contained in:
parent
f25bd9f5e4
commit
e51a238b3f
|
@ -181,74 +181,76 @@ def test_matcher_any_token_operator(en_vocab):
|
|||
|
||||
def test_matcher_extension_attribute(en_vocab):
|
||||
matcher = Matcher(en_vocab)
|
||||
Token.set_extension('is_fruit',
|
||||
getter=lambda token: token.text in ('apple', 'banana'), force=True)
|
||||
pattern = [{'ORTH': 'an'}, {'_': {'is_fruit': True}}]
|
||||
matcher.add('HAVING_FRUIT', None, pattern)
|
||||
doc = Doc(en_vocab, words=['an', 'apple'])
|
||||
get_is_fruit = lambda token: token.text in ("apple", "banana")
|
||||
Token.set_extension("is_fruit", getter=get_is_fruit, force=True)
|
||||
pattern = [{"ORTH": "an"}, {"_": {"is_fruit": True}}]
|
||||
matcher.add("HAVING_FRUIT", None, pattern)
|
||||
doc = Doc(en_vocab, words=["an", "apple"])
|
||||
matches = matcher(doc)
|
||||
assert len(matches) == 1
|
||||
doc = Doc(en_vocab, words=['an', 'aardvark'])
|
||||
doc = Doc(en_vocab, words=["an", "aardvark"])
|
||||
matches = matcher(doc)
|
||||
assert len(matches) == 0
|
||||
|
||||
|
||||
def test_matcher_set_value(en_vocab):
|
||||
matcher = Matcher(en_vocab)
|
||||
pattern = [{'ORTH': {'IN': ['an', 'a']}}]
|
||||
matcher.add('A_OR_AN', None, pattern)
|
||||
doc = Doc(en_vocab, words=['an', 'a', 'apple'])
|
||||
pattern = [{"ORTH": {"IN": ["an", "a"]}}]
|
||||
matcher.add("A_OR_AN", None, pattern)
|
||||
doc = Doc(en_vocab, words=["an", "a", "apple"])
|
||||
matches = matcher(doc)
|
||||
assert len(matches) == 2
|
||||
doc = Doc(en_vocab, words=['aardvark'])
|
||||
doc = Doc(en_vocab, words=["aardvark"])
|
||||
matches = matcher(doc)
|
||||
assert len(matches) == 0
|
||||
|
||||
|
||||
def test_matcher_regex(en_vocab):
|
||||
matcher = Matcher(en_vocab)
|
||||
pattern = [{'ORTH': {'REGEX': r'(?:a|an)'}}]
|
||||
matcher.add('A_OR_AN', None, pattern)
|
||||
doc = Doc(en_vocab, words=['an', 'a', 'hi'])
|
||||
pattern = [{"ORTH": {"REGEX": r"(?:a|an)"}}]
|
||||
matcher.add("A_OR_AN", None, pattern)
|
||||
doc = Doc(en_vocab, words=["an", "a", "hi"])
|
||||
matches = matcher(doc)
|
||||
assert len(matches) == 2
|
||||
doc = Doc(en_vocab, words=['bye'])
|
||||
doc = Doc(en_vocab, words=["bye"])
|
||||
matches = matcher(doc)
|
||||
assert len(matches) == 0
|
||||
|
||||
|
||||
def test_matcher_regex_shape(en_vocab):
|
||||
matcher = Matcher(en_vocab)
|
||||
pattern = [{'SHAPE': {'REGEX': r'^[^x]+$'}}]
|
||||
matcher.add('NON_ALPHA', None, pattern)
|
||||
doc = Doc(en_vocab, words=['99', 'problems', '!'])
|
||||
pattern = [{"SHAPE": {"REGEX": r"^[^x]+$"}}]
|
||||
matcher.add("NON_ALPHA", None, pattern)
|
||||
doc = Doc(en_vocab, words=["99", "problems", "!"])
|
||||
matches = matcher(doc)
|
||||
assert len(matches) == 2
|
||||
doc = Doc(en_vocab, words=['bye'])
|
||||
doc = Doc(en_vocab, words=["bye"])
|
||||
matches = matcher(doc)
|
||||
assert len(matches) == 0
|
||||
|
||||
|
||||
def test_matcher_compare_length(en_vocab):
|
||||
matcher = Matcher(en_vocab)
|
||||
pattern = [{'LENGTH': {'>=': 2}}]
|
||||
matcher.add('LENGTH_COMPARE', None, pattern)
|
||||
doc = Doc(en_vocab, words=['a', 'aa', 'aaa'])
|
||||
pattern = [{"LENGTH": {">=": 2}}]
|
||||
matcher.add("LENGTH_COMPARE", None, pattern)
|
||||
doc = Doc(en_vocab, words=["a", "aa", "aaa"])
|
||||
matches = matcher(doc)
|
||||
assert len(matches) == 2
|
||||
doc = Doc(en_vocab, words=['a'])
|
||||
doc = Doc(en_vocab, words=["a"])
|
||||
matches = matcher(doc)
|
||||
assert len(matches) == 0
|
||||
|
||||
|
||||
def test_matcher_extension_set_membership(en_vocab):
|
||||
matcher = Matcher(en_vocab)
|
||||
Token.set_extension('reversed',
|
||||
getter=lambda token: ''.join(reversed(token.text)), force=True)
|
||||
pattern = [{'_': {'reversed': {"IN": ["eyb", "ih"]}}}]
|
||||
matcher.add('REVERSED', None, pattern)
|
||||
doc = Doc(en_vocab, words=['hi', 'bye', 'hello'])
|
||||
get_reversed = lambda token: "".join(reversed(token.text))
|
||||
Token.set_extension("reversed", getter=get_reversed, force=True)
|
||||
pattern = [{"_": {"reversed": {"IN": ["eyb", "ih"]}}}]
|
||||
matcher.add("REVERSED", None, pattern)
|
||||
doc = Doc(en_vocab, words=["hi", "bye", "hello"])
|
||||
matches = matcher(doc)
|
||||
assert len(matches) == 2
|
||||
doc = Doc(en_vocab, words=['aardvark'])
|
||||
doc = Doc(en_vocab, words=["aardvark"])
|
||||
matches = matcher(doc)
|
||||
assert len(matches) == 0
|
||||
|
||||
|
|
Loading…
Reference in New Issue