From 8f0f47b9a6f2ce34a75173e24e5a29f34f6b1197 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 10 Oct 2015 22:12:06 +1100 Subject: [PATCH] * Use the symbols file in vocab instead of the symbols subfiles like attrs.pxd --- spacy/vocab.pyx | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index 6cf829344..0f43967bb 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -20,7 +20,7 @@ from .cfile cimport CFile from .lemmatizer import Lemmatizer from . import attrs -from . import parts_of_speech +from . import symbols from cymem.cymem cimport Address from . import util @@ -75,18 +75,9 @@ cdef class Vocab: # is the frequency rank of the word, plus a certain offset. The structural # strings are loaded first, because the vocab is open-class, and these # symbols are closed class. - for name in attrs.NAMES: + for name in symbols.NAMES + list(sorted(tag_map.keys())): if name: _ = self.strings[name] - for name in parts_of_speech.NAMES: - if name: - _ = self.strings[name] - #for morph_name in UNIV_MORPH_NAMES: - # _ = self.strings[morph_name] - #for entity_type_name in entity_types.NAMES: - # _ = self.strings[entity_type_name] - #for tag_name in sorted(tag_map.keys()): - # _ = self.strings[tag_name] self.get_lex_attr = get_lex_attr self.morphology = Morphology(self.strings, tag_map, lemmatizer) self.serializer_freqs = serializer_freqs