Set lex ID correctly for new tokens in Vocab

This commit is contained in:
Matthew Honnibal 2017-11-15 13:58:03 +01:00
parent fe3c42a06b
commit 2f169fdb0a
1 changed files with 3 additions and 4 deletions

View File

@ -1,4 +1,5 @@
# coding: utf8
# cython: profile=True
from __future__ import unicode_literals
import numpy
@ -154,7 +155,7 @@ cdef class Vocab:
lex = <LexemeC*>mem.alloc(sizeof(LexemeC), 1)
lex.orth = self.strings.add(string)
lex.length = len(string)
lex.id = self.length
lex.id = self.vectors.key2row.get(lex.orth, 0)
if self.lex_attr_getters is not None:
for attr, func in self.lex_attr_getters.items():
value = func(string)
@ -164,9 +165,7 @@ cdef class Vocab:
lex.prob = value
elif value is not None:
Lexeme.set_struct_attr(lex, attr, value)
if is_oov:
lex.id = 0
else:
if not is_oov:
key = hash_string(string)
self._add_lex_to_vocab(key, lex)
assert lex != NULL, string