* Use the symbols file in vocab instead of the symbols subfiles like attrs.pxd

2015-10-10 22:12:06 +11:00 · 2015-10-10 22:12:06 +11:00 · 8f0f47b9a6
parent 6b30d1cf7b
commit 8f0f47b9a6
1 changed files with 2 additions and 11 deletions
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -20,7 +20,7 @@ from .cfile cimport CFile
 from .lemmatizer import Lemmatizer
 from . import attrs
-from . import parts_of_speech
+from . import symbols
 from cymem.cymem cimport Address
 from . import util
@ -75,18 +75,9 @@ cdef class Vocab:
        # is the frequency rank of the word, plus a certain offset. The structural
        # strings are loaded first, because the vocab is open-class, and these
        # symbols are closed class.
-        for name in attrs.NAMES:
+        for name in symbols.NAMES + list(sorted(tag_map.keys())):
            if name:
                _ = self.strings[name]
        for name in parts_of_speech.NAMES:
            if name:
                _ = self.strings[name]
        #for morph_name in UNIV_MORPH_NAMES:
        #    _ = self.strings[morph_name]
        #for entity_type_name in entity_types.NAMES:
        #    _ = self.strings[entity_type_name]
        #for tag_name in sorted(tag_map.keys()):
        #    _ = self.strings[tag_name]
        self.get_lex_attr = get_lex_attr
        self.morphology = Morphology(self.strings, tag_map, lemmatizer)
        self.serializer_freqs = serializer_freqs