Fix attributeruler

This commit is contained in:
Matthew Honnibal 2020-09-26 00:23:09 +02:00
parent 821f37254c
commit 702edf52a0
1 changed files with 14 additions and 8 deletions

View File

@ -79,18 +79,24 @@ class AttributeRuler(Pipe):
DOCS: https://nightly.spacy.io/api/attributeruler#call
"""
matches = sorted(self.matcher(doc, allow_missing=True))
print("Attrs", self.attrs)
print("Matches", matches)
for match_id, start, end in matches:
matches = self.matcher(doc, allow_missing=True)
# Sort by the attribute ID, so that later rules have precendence
matches = [
(_parse_key(self.vocab.strings[m_id]), m_id, s, e)
for m_id, s, e in matches
]
matches.sort()
for attr_id, match_id, start, end in matches:
span = Span(doc, start, end, label=match_id)
attr_id = _parse_key(span.label_)
attrs = self.attrs[attr_id]
index = self.indices[attr_id]
try:
# The index can be negative, which makes it annoying to do
# the boundscheck. Let Span do it instead.
token = span[index]
except IndexError:
# The original exception is just our conditional logic, so we
# raise from.
raise ValueError(
Errors.E1001.format(
patterns=self.matcher.get(span.label),
@ -98,7 +104,7 @@ class AttributeRuler(Pipe):
index=index,
)
) from None
set_token_attrs(token, attrs)
set_token_attrs(span[index], attrs)
return doc
def pipe(self, stream, *, batch_size=128):