From 801bf14f4ff63cc88a896239a3384330cbe85232 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 14 Mar 2015 11:10:27 -0400 Subject: [PATCH] * Clean up handling of dep_strings and ent_strings, using StringStore to encode the label names. --- spacy/en/__init__.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/spacy/en/__init__.py b/spacy/en/__init__.py index 1aae1e3a7..2f906b2d4 100644 --- a/spacy/en/__init__.py +++ b/spacy/en/__init__.py @@ -104,14 +104,16 @@ class English(object): @property def parser(self): if self._parser is None: - self._parser = GreedyParser(path.join(self._data_dir, 'deps'), + self._parser = GreedyParser(self.vocab.strings, + path.join(self._data_dir, 'deps'), self.ParserTransitionSystem) return self._parser @property def entity(self): if self._entity is None: - self._entity = GreedyParser(path.join(self._data_dir, 'ner'), + self._entity = GreedyParser(self.vocab.strings, + path.join(self._data_dir, 'ner'), self.EntityTransitionSystem) return self._entity @@ -180,13 +182,7 @@ class English(object): if parse and self.has_parser_model: self.parser(tokens) if entity and self.has_entity_model: - # TODO: Clean this up self.entity(tokens) - ent_strings = [None] * (max(self.entity.moves.label_ids.values()) + 1) - for label, i in self.entity.moves.label_ids.items(): - if i >= 0: - ent_strings[i] = label - tokens._ent_strings = tuple(ent_strings) return tokens @property