mirror of https://github.com/explosion/spaCy.git
Add Vocab.cfg attr, to hold stuff like oov probs
This commit is contained in:
parent
ab5d5ed880
commit
7b56b2f04b
|
@ -32,6 +32,7 @@ cdef class Vocab:
|
||||||
cdef readonly int length
|
cdef readonly int length
|
||||||
cdef public object data_dir
|
cdef public object data_dir
|
||||||
cdef public object lex_attr_getters
|
cdef public object lex_attr_getters
|
||||||
|
cdef public object cfg
|
||||||
|
|
||||||
cdef const LexemeC* get(self, Pool mem, unicode string) except NULL
|
cdef const LexemeC* get(self, Pool mem, unicode string) except NULL
|
||||||
cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL
|
cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL
|
||||||
|
|
|
@ -27,7 +27,7 @@ cdef class Vocab:
|
||||||
C-data that is shared between `Doc` objects.
|
C-data that is shared between `Doc` objects.
|
||||||
"""
|
"""
|
||||||
def __init__(self, lex_attr_getters=None, tag_map=None, lemmatizer=None,
|
def __init__(self, lex_attr_getters=None, tag_map=None, lemmatizer=None,
|
||||||
strings=tuple(), **deprecated_kwargs):
|
strings=tuple(), oov_prob=-20., **deprecated_kwargs):
|
||||||
"""Create the vocabulary.
|
"""Create the vocabulary.
|
||||||
|
|
||||||
lex_attr_getters (dict): A dictionary mapping attribute IDs to
|
lex_attr_getters (dict): A dictionary mapping attribute IDs to
|
||||||
|
@ -43,6 +43,7 @@ cdef class Vocab:
|
||||||
tag_map = tag_map if tag_map is not None else {}
|
tag_map = tag_map if tag_map is not None else {}
|
||||||
if lemmatizer in (None, True, False):
|
if lemmatizer in (None, True, False):
|
||||||
lemmatizer = Lemmatizer({}, {}, {})
|
lemmatizer = Lemmatizer({}, {}, {})
|
||||||
|
self.cfg = {'oov_prob': oov_prob}
|
||||||
self.mem = Pool()
|
self.mem = Pool()
|
||||||
self._by_hash = PreshMap()
|
self._by_hash = PreshMap()
|
||||||
self._by_orth = PreshMap()
|
self._by_orth = PreshMap()
|
||||||
|
|
Loading…
Reference in New Issue