Merge pull request #6149 from adrianeboyd/feature/attributeruler-match-ids

Simplify string match IDs for AttributeRuler
This commit is contained in:
Ines Montani 2020-09-26 15:31:30 +02:00 committed by GitHub
commit 8fea06d55e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 3 additions and 9 deletions

View File

@ -82,7 +82,7 @@ class AttributeRuler(Pipe):
matches = self.matcher(doc, allow_missing=True) matches = self.matcher(doc, allow_missing=True)
# Sort by the attribute ID, so that later rules have precendence # Sort by the attribute ID, so that later rules have precendence
matches = [ matches = [
(_parse_key(self.vocab.strings[m_id]), m_id, s, e) (int(self.vocab.strings[m_id]), m_id, s, e)
for m_id, s, e in matches for m_id, s, e in matches
] ]
matches.sort() matches.sort()
@ -184,7 +184,7 @@ class AttributeRuler(Pipe):
""" """
# We need to make a string here, because otherwise the ID we pass back # We need to make a string here, because otherwise the ID we pass back
# will be interpreted as the hash of a string, rather than an ordinal. # will be interpreted as the hash of a string, rather than an ordinal.
key = _make_key(len(self.attrs)) key = str(len(self.attrs))
self.matcher.add(self.vocab.strings.add(key), patterns) self.matcher.add(self.vocab.strings.add(key), patterns)
self._attrs_unnormed.append(attrs) self._attrs_unnormed.append(attrs)
attrs = normalize_token_attrs(self.vocab, attrs) attrs = normalize_token_attrs(self.vocab, attrs)
@ -209,7 +209,7 @@ class AttributeRuler(Pipe):
all_patterns = [] all_patterns = []
for i in range(len(self.attrs)): for i in range(len(self.attrs)):
p = {} p = {}
p["patterns"] = self.matcher.get(_make_key(i))[1] p["patterns"] = self.matcher.get(str(i))[1]
p["attrs"] = self._attrs_unnormed[i] p["attrs"] = self._attrs_unnormed[i]
p["index"] = self.indices[i] p["index"] = self.indices[i]
all_patterns.append(p) all_patterns.append(p)
@ -313,12 +313,6 @@ class AttributeRuler(Pipe):
return self return self
def _make_key(n_attr):
return f"attr_rule_{n_attr}"
def _parse_key(key):
return int(key.rsplit("_", 1)[1])
def _split_morph_attrs(attrs): def _split_morph_attrs(attrs):
"""Split entries from a tag map or morph rules dict into to two dicts, one """Split entries from a tag map or morph rules dict into to two dicts, one