From 275c9ad872a87c4ba131987656be4861617d3d90 Mon Sep 17 00:00:00 2001 From: adrianeboyd Date: Wed, 16 Oct 2019 13:40:18 +0200 Subject: [PATCH] Allow int values in token patterns (#4444) * Add missing int value option to top-level pattern validation in Matcher * Adjust existing tests accordingly * Add new test for valid pattern `{"LENGTH": int}` --- spacy/matcher/matcher.pyx | 2 +- spacy/tests/matcher/test_pattern_validation.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx index 4a5f69ff4..5dd6eab77 100644 --- a/spacy/matcher/matcher.pyx +++ b/spacy/matcher/matcher.pyx @@ -658,7 +658,7 @@ def _get_attr_values(spec, string_store): value = string_store.add(value) elif isinstance(value, bool): value = int(value) - elif isinstance(value, dict): + elif isinstance(value, (dict, int)): continue else: raise ValueError(Errors.E153.format(vtype=type(value).__name__)) diff --git a/spacy/tests/matcher/test_pattern_validation.py b/spacy/tests/matcher/test_pattern_validation.py index 665bcf935..80f08e40c 100644 --- a/spacy/tests/matcher/test_pattern_validation.py +++ b/spacy/tests/matcher/test_pattern_validation.py @@ -12,24 +12,25 @@ from spacy.util import get_json_validator, validate_json TEST_PATTERNS = [ # Bad patterns flagged in all cases ([{"XX": "foo"}], 1, 1), - ([{"LENGTH": "2", "TEXT": 2}, {"LOWER": "test"}], 2, 1), ([{"IS_ALPHA": {"==": True}}, {"LIKE_NUM": None}], 2, 1), ([{"IS_PUNCT": True, "OP": "$"}], 1, 1), - ([{"IS_DIGIT": -1}], 1, 1), - ([{"ORTH": -1}], 1, 1), ([{"_": "foo"}], 1, 1), ('[{"TEXT": "foo"}, {"LOWER": "bar"}]', 1, 1), ([1, 2, 3], 3, 1), # Bad patterns flagged outside of Matcher ([{"_": {"foo": "bar", "baz": {"IN": "foo"}}}], 1, 0), # Bad patterns not flagged with minimal checks + ([{"LENGTH": "2", "TEXT": 2}, {"LOWER": "test"}], 2, 0), ([{"LENGTH": {"IN": [1, 2, "3"]}}, {"POS": {"IN": "VERB"}}], 2, 0), ([{"LENGTH": {"VALUE": 5}}], 1, 0), ([{"TEXT": {"VALUE": "foo"}}], 1, 0), + ([{"IS_DIGIT": -1}], 1, 0), + ([{"ORTH": -1}], 1, 0), # Good patterns ([{"TEXT": "foo"}, {"LOWER": "bar"}], 0, 0), ([{"LEMMA": {"IN": ["love", "like"]}}, {"POS": "DET", "OP": "?"}], 0, 0), ([{"LIKE_NUM": True, "LENGTH": {">=": 5}}], 0, 0), + ([{"LENGTH": 2}], 0, 0), ([{"LOWER": {"REGEX": "^X", "NOT_IN": ["XXX", "XY"]}}], 0, 0), ([{"NORM": "a"}, {"POS": {"IN": ["NOUN"]}}], 0, 0), ([{"_": {"foo": {"NOT_IN": ["bar", "baz"]}, "a": 5, "b": {">": 10}}}], 0, 0),