* Allow an OOV probability to be specified in get_lex_props

This commit is contained in:
Matthew Honnibal 2015-07-26 00:01:46 +02:00
parent 5c04dcd7c1
commit cd6e25132b
1 changed files with 4 additions and 3 deletions

View File

@ -25,7 +25,7 @@ from ..util import read_lang_data
from ..attrs import TAG, HEAD, DEP, ENT_TYPE, ENT_IOB
def get_lex_props(string):
def get_lex_props(string, oov_prob=-30):
return {
'flags': get_flags(string),
'length': len(string),
@ -36,7 +36,7 @@ def get_lex_props(string):
'prefix': string[0],
'suffix': string[-3:],
'cluster': 0,
'prob': -22,
'prob': oov_prob,
'sentiment': 0
}
@ -83,7 +83,8 @@ class English(object):
self.vocab = Vocab(data_dir=path.join(data_dir, 'vocab') if data_dir else None,
get_lex_props=get_lex_props, load_vectors=load_vectors,
pos_tags=POS_TAGS)
pos_tags=POS_TAGS,
float(open(path.join(data_dir, 'vocab', 'oov_prob')).read()))
if Tagger is True:
Tagger = EnPosTagger
if Parser is True: