Fix Issue #536: oov_prob was 0 for OOV words.

This commit is contained in:
Matthew Honnibal 2016-10-19 23:38:14 +02:00
parent dfa752d064
commit d10c17f2a4
1 changed files with 7 additions and 2 deletions

View File

@ -50,12 +50,17 @@ cdef class Vocab:
''' '''
@classmethod @classmethod
def load(cls, path, lex_attr_getters=None, vectors=True, lemmatizer=True, def load(cls, path, lex_attr_getters=None, vectors=True, lemmatizer=True,
tag_map=True, serializer_freqs=True, **deprecated_kwargs): tag_map=True, serializer_freqs=True, oov_prob=True, **deprecated_kwargs):
util.check_renamed_kwargs({'get_lex_attr': 'lex_attr_getters'}, deprecated_kwargs) util.check_renamed_kwargs({'get_lex_attr': 'lex_attr_getters'}, deprecated_kwargs)
if tag_map is True and (path / 'vocab' / 'tag_map.json').exists(): if tag_map is True and (path / 'vocab' / 'tag_map.json').exists():
with (path / 'vocab' / 'tag_map.json').open() as file_: with (path / 'vocab' / 'tag_map.json').open() as file_:
tag_map = json.load(file_) tag_map = json.load(file_)
if lex_attr_getters is not None \
and oov_prob is True \
and (path / 'vocab' / 'oov_prob').exists():
with (path / 'vocab' / 'oov_prob').open() as file_:
oov_prob = float(file_.read())
lex_attr_getters[PROB] = lambda text: oov_prob
if lemmatizer is True: if lemmatizer is True:
lemmatizer = Lemmatizer.load(path) lemmatizer = Lemmatizer.load(path)
if serializer_freqs is True and (path / 'vocab' / 'serializer.json').exists(): if serializer_freqs is True and (path / 'vocab' / 'serializer.json').exists():