mirror of https://github.com/explosion/spaCy.git
support IS_SENT_START in PhraseMatcher (#6771)
* support IS_SENT_START in PhraseMatcher * add unit test and friendlier error * use IDS.get instead
This commit is contained in:
parent
bc7d83d4be
commit
fdf8c77630
|
@ -8,6 +8,7 @@ from preshed.maps cimport map_init, map_set, map_get, map_clear, map_iter
|
|||
|
||||
import warnings
|
||||
|
||||
from ..attrs import IDS
|
||||
from ..attrs cimport ORTH, POS, TAG, DEP, LEMMA
|
||||
from ..structs cimport TokenC
|
||||
from ..tokens.token cimport Token
|
||||
|
@ -58,9 +59,11 @@ cdef class PhraseMatcher:
|
|||
attr = attr.upper()
|
||||
if attr == "TEXT":
|
||||
attr = "ORTH"
|
||||
if attr == "IS_SENT_START":
|
||||
attr = "SENT_START"
|
||||
if attr not in TOKEN_PATTERN_SCHEMA["items"]["properties"]:
|
||||
raise ValueError(Errors.E152.format(attr=attr))
|
||||
self.attr = self.vocab.strings[attr]
|
||||
self.attr = IDS.get(attr)
|
||||
|
||||
def __len__(self):
|
||||
"""Get the number of match IDs added to the matcher.
|
||||
|
|
|
@ -290,3 +290,8 @@ def test_phrase_matcher_pickle(en_vocab):
|
|||
# clunky way to vaguely check that callback is unpickled
|
||||
(vocab, docs, callbacks, attr) = matcher_unpickled.__reduce__()[1]
|
||||
assert isinstance(callbacks.get("TEST2"), Mock)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("attr", ["SENT_START", "IS_SENT_START"])
|
||||
def test_phrase_matcher_sent_start(en_vocab, attr):
|
||||
matcher = PhraseMatcher(en_vocab, attr=attr)
|
||||
|
|
Loading…
Reference in New Issue