From 6c25e60089931e4801a4c74cc807ea31f2c02bee Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Sat, 26 Sep 2020 11:12:39 +0200 Subject: [PATCH] Simplify string match IDs for AttributeRuler --- spacy/pipeline/attributeruler.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/spacy/pipeline/attributeruler.py b/spacy/pipeline/attributeruler.py index 1dc2a10dd..4243ebcfb 100644 --- a/spacy/pipeline/attributeruler.py +++ b/spacy/pipeline/attributeruler.py @@ -82,7 +82,7 @@ class AttributeRuler(Pipe): matches = self.matcher(doc, allow_missing=True) # Sort by the attribute ID, so that later rules have precendence matches = [ - (_parse_key(self.vocab.strings[m_id]), m_id, s, e) + (int(self.vocab.strings[m_id]), m_id, s, e) for m_id, s, e in matches ] matches.sort() @@ -184,7 +184,7 @@ class AttributeRuler(Pipe): """ # We need to make a string here, because otherwise the ID we pass back # will be interpreted as the hash of a string, rather than an ordinal. - key = _make_key(len(self.attrs)) + key = str(len(self.attrs)) self.matcher.add(self.vocab.strings.add(key), patterns) self._attrs_unnormed.append(attrs) attrs = normalize_token_attrs(self.vocab, attrs) @@ -209,7 +209,7 @@ class AttributeRuler(Pipe): all_patterns = [] for i in range(len(self.attrs)): p = {} - p["patterns"] = self.matcher.get(_make_key(i))[1] + p["patterns"] = self.matcher.get(str(i))[1] p["attrs"] = self._attrs_unnormed[i] p["index"] = self.indices[i] all_patterns.append(p) @@ -313,12 +313,6 @@ class AttributeRuler(Pipe): return self -def _make_key(n_attr): - return f"attr_rule_{n_attr}" - -def _parse_key(key): - return int(key.rsplit("_", 1)[1]) - def _split_morph_attrs(attrs): """Split entries from a tag map or morph rules dict into to two dicts, one