Add Vocab.cfg attr, to hold stuff like oov probs

This commit is contained in:
Explosion Bot 2017-10-30 16:08:50 +01:00
parent ab5d5ed880
commit 7b56b2f04b
2 changed files with 3 additions and 1 deletions

View File

@ -32,6 +32,7 @@ cdef class Vocab:
cdef readonly int length cdef readonly int length
cdef public object data_dir cdef public object data_dir
cdef public object lex_attr_getters cdef public object lex_attr_getters
cdef public object cfg
cdef const LexemeC* get(self, Pool mem, unicode string) except NULL cdef const LexemeC* get(self, Pool mem, unicode string) except NULL
cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL

View File

@ -27,7 +27,7 @@ cdef class Vocab:
C-data that is shared between `Doc` objects. C-data that is shared between `Doc` objects.
""" """
def __init__(self, lex_attr_getters=None, tag_map=None, lemmatizer=None, def __init__(self, lex_attr_getters=None, tag_map=None, lemmatizer=None,
strings=tuple(), **deprecated_kwargs): strings=tuple(), oov_prob=-20., **deprecated_kwargs):
"""Create the vocabulary. """Create the vocabulary.
lex_attr_getters (dict): A dictionary mapping attribute IDs to lex_attr_getters (dict): A dictionary mapping attribute IDs to
@ -43,6 +43,7 @@ cdef class Vocab:
tag_map = tag_map if tag_map is not None else {} tag_map = tag_map if tag_map is not None else {}
if lemmatizer in (None, True, False): if lemmatizer in (None, True, False):
lemmatizer = Lemmatizer({}, {}, {}) lemmatizer = Lemmatizer({}, {}, {})
self.cfg = {'oov_prob': oov_prob}
self.mem = Pool() self.mem = Pool()
self._by_hash = PreshMap() self._by_hash = PreshMap()
self._by_orth = PreshMap() self._by_orth = PreshMap()