mirror of https://github.com/explosion/spaCy.git
Allow Matcher to match on ENT_ID and ENT_KB_ID (#9688)
* Added ENT_ID and ENT_KB_ID into the list of the attributes that Matcher matches on * Added ENT_ID and ENT_KB_ID to TEST_PATTERNS in test_pattern_validation.py. Disabled tests that I added before * Update website/docs/api/matcher.md * Format * Remove skipped tests Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
parent
7fec5fd647
commit
a4c43e5c57
|
@ -222,6 +222,8 @@ class TokenPattern(BaseModel):
|
||||||
lemma: Optional[StringValue] = None
|
lemma: Optional[StringValue] = None
|
||||||
shape: Optional[StringValue] = None
|
shape: Optional[StringValue] = None
|
||||||
ent_type: Optional[StringValue] = None
|
ent_type: Optional[StringValue] = None
|
||||||
|
ent_id: Optional[StringValue] = None
|
||||||
|
ent_kb_id: Optional[StringValue] = None
|
||||||
norm: Optional[StringValue] = None
|
norm: Optional[StringValue] = None
|
||||||
length: Optional[NumberValue] = None
|
length: Optional[NumberValue] = None
|
||||||
spacy: Optional[StrictBool] = None
|
spacy: Optional[StrictBool] = None
|
||||||
|
|
|
@ -22,6 +22,8 @@ TEST_PATTERNS = [
|
||||||
([{"TEXT": {"VALUE": "foo"}}], 2, 0), # prev: (1, 0)
|
([{"TEXT": {"VALUE": "foo"}}], 2, 0), # prev: (1, 0)
|
||||||
([{"IS_DIGIT": -1}], 1, 0),
|
([{"IS_DIGIT": -1}], 1, 0),
|
||||||
([{"ORTH": -1}], 1, 0),
|
([{"ORTH": -1}], 1, 0),
|
||||||
|
([{"ENT_ID": -1}], 1, 0),
|
||||||
|
([{"ENT_KB_ID": -1}], 1, 0),
|
||||||
# Good patterns
|
# Good patterns
|
||||||
([{"TEXT": "foo"}, {"LOWER": "bar"}], 0, 0),
|
([{"TEXT": "foo"}, {"LOWER": "bar"}], 0, 0),
|
||||||
([{"LEMMA": {"IN": ["love", "like"]}}, {"POS": "DET", "OP": "?"}], 0, 0),
|
([{"LEMMA": {"IN": ["love", "like"]}}, {"POS": "DET", "OP": "?"}], 0, 0),
|
||||||
|
@ -33,6 +35,8 @@ TEST_PATTERNS = [
|
||||||
([{"orth": "foo"}], 0, 0), # prev: xfail
|
([{"orth": "foo"}], 0, 0), # prev: xfail
|
||||||
([{"IS_SENT_START": True}], 0, 0),
|
([{"IS_SENT_START": True}], 0, 0),
|
||||||
([{"SENT_START": True}], 0, 0),
|
([{"SENT_START": True}], 0, 0),
|
||||||
|
([{"ENT_ID": "STRING"}], 0, 0),
|
||||||
|
([{"ENT_KB_ID": "STRING"}], 0, 0),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,8 @@ rule-based matching are:
|
||||||
| `SPACY` | Token has a trailing space. ~~bool~~ |
|
| `SPACY` | Token has a trailing space. ~~bool~~ |
|
||||||
| `POS`, `TAG`, `MORPH`, `DEP`, `LEMMA`, `SHAPE` | The token's simple and extended part-of-speech tag, morphological analysis, dependency label, lemma, shape. ~~str~~ |
|
| `POS`, `TAG`, `MORPH`, `DEP`, `LEMMA`, `SHAPE` | The token's simple and extended part-of-speech tag, morphological analysis, dependency label, lemma, shape. ~~str~~ |
|
||||||
| `ENT_TYPE` | The token's entity label. ~~str~~ |
|
| `ENT_TYPE` | The token's entity label. ~~str~~ |
|
||||||
|
| `ENT_ID` | The token's entity ID (`ent_id`). ~~str~~ |
|
||||||
|
| `ENT_KB_ID` | The token's entity knowledge base ID (`ent_kb_id`). ~~str~~ |
|
||||||
| `_` <Tag variant="new">2.1</Tag> | Properties in [custom extension attributes](/usage/processing-pipelines#custom-components-attributes). ~~Dict[str, Any]~~ |
|
| `_` <Tag variant="new">2.1</Tag> | Properties in [custom extension attributes](/usage/processing-pipelines#custom-components-attributes). ~~Dict[str, Any]~~ |
|
||||||
| `OP` | Operator or quantifier to determine how often to match a token pattern. ~~str~~ |
|
| `OP` | Operator or quantifier to determine how often to match a token pattern. ~~str~~ |
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue