Fix attributeruler

This commit is contained in:
Matthew Honnibal 2020-09-26 00:23:09 +02:00
parent 821f37254c
commit 702edf52a0
1 changed files with 14 additions and 8 deletions

View File

@ -79,26 +79,32 @@ class AttributeRuler(Pipe):
DOCS: https://nightly.spacy.io/api/attributeruler#call DOCS: https://nightly.spacy.io/api/attributeruler#call
""" """
matches = sorted(self.matcher(doc, allow_missing=True)) matches = self.matcher(doc, allow_missing=True)
print("Attrs", self.attrs) # Sort by the attribute ID, so that later rules have precendence
print("Matches", matches) matches = [
(_parse_key(self.vocab.strings[m_id]), m_id, s, e)
for match_id, start, end in matches: for m_id, s, e in matches
]
matches.sort()
for attr_id, match_id, start, end in matches:
span = Span(doc, start, end, label=match_id) span = Span(doc, start, end, label=match_id)
attr_id = _parse_key(span.label_)
attrs = self.attrs[attr_id] attrs = self.attrs[attr_id]
index = self.indices[attr_id] index = self.indices[attr_id]
try: try:
# The index can be negative, which makes it annoying to do
# the boundscheck. Let Span do it instead.
token = span[index] token = span[index]
except IndexError: except IndexError:
# The original exception is just our conditional logic, so we
# raise from.
raise ValueError( raise ValueError(
Errors.E1001.format( Errors.E1001.format(
patterns=self.matcher.get(span.label), patterns=self.matcher.get(span.label),
span=[t.text for t in span], span=[t.text for t in span],
index=index, index=index,
) )
) from None ) from None
set_token_attrs(token, attrs) set_token_attrs(span[index], attrs)
return doc return doc
def pipe(self, stream, *, batch_size=128): def pipe(self, stream, *, batch_size=128):