mirror of https://github.com/explosion/spaCy.git
Add merge phrases from https://github.com/explosion/spaCy/issues/523#issuecomment-255172782
This commit is contained in:
parent
6b30cbaf0b
commit
cf7b6f7a9d
|
@ -9,6 +9,18 @@ p.u-text-large spaCy features a rule-matching engine that operates over tokens.
|
|||
|
||||
nlp = spacy.load('en', parser=False, entity=False)
|
||||
|
||||
def merge_phrases(matcher, doc, i, matches):
|
||||
'''
|
||||
Merge a phrase. We have to be careful here because we'll change the token indices.
|
||||
To avoid problems, merge all the phrases once we're called on the last match.
|
||||
'''
|
||||
if i != len(matches)-1:
|
||||
return None
|
||||
# Get Span objects
|
||||
spans = [(ent_id, label, doc[start : end]) for ent_id, label, start, end in matches]
|
||||
for ent_id, label, span in spans:
|
||||
span.merge(label=label, tag='NNP' if label else span.root.tag_)
|
||||
|
||||
matcher = Matcher(nlp.vocab)
|
||||
|
||||
matcher.add_entity(
|
||||
|
@ -17,6 +29,7 @@ p.u-text-large spaCy features a rule-matching engine that operates over tokens.
|
|||
acceptor=None, # Accept or modify the match
|
||||
on_match=merge_phrases # Callback to act on the matches
|
||||
)
|
||||
|
||||
matcher.add_pattern(
|
||||
"GoogleNow", # Entity ID -- Created if doesn't exist.
|
||||
[ # The pattern is a list of *Token Specifiers*.
|
||||
|
|
Loading…
Reference in New Issue