mirror of https://github.com/explosion/spaCy.git
This commit is contained in:
parent
87613edf8f
commit
1e0f566d95
|
@ -20,6 +20,8 @@ from .orth cimport word_shape
|
||||||
from .typedefs cimport attr_t
|
from .typedefs cimport attr_t
|
||||||
from .cfile cimport CFile
|
from .cfile cimport CFile
|
||||||
from .lemmatizer import Lemmatizer
|
from .lemmatizer import Lemmatizer
|
||||||
|
from .attrs import intify_attrs
|
||||||
|
from .tokens.token cimport Token
|
||||||
|
|
||||||
from . import attrs
|
from . import attrs
|
||||||
from . import symbols
|
from . import symbols
|
||||||
|
@ -336,16 +338,14 @@ cdef class Vocab:
|
||||||
cdef int i
|
cdef int i
|
||||||
tokens = <TokenC*>self.mem.alloc(len(substrings) + 1, sizeof(TokenC))
|
tokens = <TokenC*>self.mem.alloc(len(substrings) + 1, sizeof(TokenC))
|
||||||
for i, props in enumerate(substrings):
|
for i, props in enumerate(substrings):
|
||||||
|
props = intify_attrs(props, strings_map=self.strings, _do_deprecated=True)
|
||||||
token = &tokens[i]
|
token = &tokens[i]
|
||||||
# Set the special tokens up to have morphology and lemmas if
|
# Set the special tokens up to have arbitrary attributes
|
||||||
# specified, otherwise use the part-of-speech tag (if specified)
|
token.lex = <LexemeC*>self.get_by_orth(self.mem, props[attrs.ORTH])
|
||||||
token.lex = <LexemeC*>self.get(self.mem, props['F'])
|
if attrs.TAG in props:
|
||||||
if 'pos' in props:
|
self.morphology.assign_tag(token, props[attrs.TAG])
|
||||||
self.morphology.assign_tag(token, props['pos'])
|
for attr_id, value in props.items():
|
||||||
if 'L' in props:
|
Token.set_struct_attr(token, attr_id, value)
|
||||||
tokens[i].lemma = self.strings[props['L']]
|
|
||||||
for feature, value in props.get('morph', {}).items():
|
|
||||||
self.morphology.assign_feature(&token.morph, feature, value)
|
|
||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
def dump(self, loc):
|
def dump(self, loc):
|
||||||
|
|
Loading…
Reference in New Issue