Merge pull request #4208 from adrianeboyd/bugfix/orth-vs-noise

Add separate noise vs orth level to train CLI
This commit is contained in:
Matthew Honnibal 2019-08-29 10:26:42 +02:00 committed by GitHub
commit 216f63a987
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 3 additions and 1 deletions

View File

@ -65,6 +65,7 @@ from .. import about
str,
),
noise_level=("Amount of corruption for data augmentation", "option", "nl", float),
orth_variant_level=("Amount of orthography variation for data augmentation", "option", "ovl", float),
eval_beam_widths=("Beam widths to evaluate, e.g. 4,8", "option", "bw", str),
gold_preproc=("Use gold preprocessing", "flag", "G", bool),
learn_tokens=("Make parser learn gold-standard tokenization", "flag", "T", bool),
@ -90,6 +91,7 @@ def train(
parser_multitasks="",
entity_multitasks="",
noise_level=0.0,
orth_variant_level=0.0,
eval_beam_widths="",
gold_preproc=False,
learn_tokens=False,
@ -240,7 +242,7 @@ def train(
best_score = 0.0
for i in range(n_iter):
train_docs = corpus.train_docs(
nlp, orth_variant_level=noise_level, gold_preproc=gold_preproc, max_length=0
nlp, noise_level=noise_level, orth_variant_level=orth_variant_level, gold_preproc=gold_preproc, max_length=0
)
if raw_text:
random.shuffle(raw_text)