From f8d1e2f2142575d6c1209c96ff5724302ac79fdf Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 28 Sep 2019 13:12:30 +0200 Subject: [PATCH] Update CLI docs [ci skip] --- spacy/cli/train.py | 7 +------ website/docs/api/cli.md | 8 +++++--- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/spacy/cli/train.py b/spacy/cli/train.py index d32e4b79d..2588a81a2 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -40,12 +40,7 @@ from .. import about parser_multitasks=("Side objectives for parser CNN, e.g. 'dep' or 'dep,tag'", "option", "pt", str), entity_multitasks=("Side objectives for NER CNN, e.g. 'dep' or 'dep,tag'", "option", "et", str), noise_level=("Amount of corruption for data augmentation", "option", "nl", float), - orth_variant_level=( - "Amount of orthography variation for data augmentation", - "option", - "ovl", - float, - ), + orth_variant_level=("Amount of orthography variation for data augmentation", "option", "ovl", float), eval_beam_widths=("Beam widths to evaluate, e.g. 4,8", "option", "bw", str), gold_preproc=("Use gold preprocessing", "flag", "G", bool), learn_tokens=("Make parser learn gold-standard tokenization", "flag", "T", bool), diff --git a/website/docs/api/cli.md b/website/docs/api/cli.md index 8c6caa443..7b20b76de 100644 --- a/website/docs/api/cli.md +++ b/website/docs/api/cli.md @@ -181,6 +181,7 @@ All output files generated by this command are compatible with | `conll`, `conllu`, `conllubio` | Universal Dependencies `.conllu` or `.conll` format. | | `ner` | NER with IOB/IOB2 tags, one token per line with columns separated by whitespace. The first column is the token and the final column is the IOB tag. Sentences are separated by blank lines and documents are separated by the line `-DOCSTART- -X- O O`. Supports CoNLL 2003 NER format. See [sample data](https://github.com/explosion/spaCy/tree/master/examples/training/ner_example_data). | | `iob` | NER with IOB/IOB2 tags, one sentence per line with tokens separated by whitespace and annotation separated by `|`, either `word|B-ENT` or `word|POS|B-ENT`. See [sample data](https://github.com/explosion/spaCy/tree/master/examples/training/ner_example_data). | + ## Debug data {#debug-data new="2.2"} Analyze, debug and validate your training and development data, get useful @@ -363,8 +364,8 @@ $ python -m spacy train [lang] [output_path] [train_path] [dev_path] [--base-model] [--pipeline] [--vectors] [--n-iter] [--n-early-stopping] [--n-examples] [--use-gpu] [--version] [--meta-path] [--init-tok2vec] [--parser-multitasks] [--entity-multitasks] [--gold-preproc] [--noise-level] -[--learn-tokens] [--textcat-arch] [--textcat-multilabel] [--textcat-positive-label] -[--verbose] +[--orth-variant-level] [--learn-tokens] [--textcat-arch] [--textcat-multilabel] +[--textcat-positive-label] [--verbose] ``` | Argument | Type | Description | @@ -386,6 +387,7 @@ $ python -m spacy train [lang] [output_path] [train_path] [dev_path] | `--parser-multitasks`, `-pt` | option | Side objectives for parser CNN, e.g. `'dep'` or `'dep,tag'` | | `--entity-multitasks`, `-et` | option | Side objectives for NER CNN, e.g. `'dep'` or `'dep,tag'` | | `--noise-level`, `-nl` | option | Float indicating the amount of corruption for data augmentation. | +| `--orth-variant-level`, `-ovl` 2.2 | option | Float indicating the orthography variation for data augmentation (e.g. `0.3` for making 30% of occurrences of some tokens subject to replacement). | | `--gold-preproc`, `-G` | flag | Use gold preprocessing. | | `--learn-tokens`, `-T` | flag | Make parser learn gold-standard tokenization by merging ] subtokens. Typically used for languages like Chinese. | | `--textcat-multilabel`, `-TML` 2.2 | flag | Text classification classes aren't mutually exclusive (multilabel). | @@ -538,7 +540,7 @@ $ python -m spacy init-model [lang] [output_dir] [--jsonl-loc] [--vectors-loc] | `--jsonl-loc`, `-j` | option | Optional location of JSONL-formatted [vocabulary file](/api/annotation#vocab-jsonl) with lexical attributes. | | `--vectors-loc`, `-v` | option | Optional location of vectors. Should be a file where the first row contains the dimensions of the vectors, followed by a space-separated Word2Vec table. File can be provided in `.txt` format or as a zipped text file in `.zip` or `.tar.gz` format. | | `--prune-vectors`, `-V` | flag | Number of vectors to prune the vocabulary to. Defaults to `-1` for no pruning. | -| `--vectors-name`, `-vn` | option | Name to assign to the word vectors in the `meta.json`, e.g. `en_core_web_md.vectors`. | +| `--vectors-name`, `-vn` | option | Name to assign to the word vectors in the `meta.json`, e.g. `en_core_web_md.vectors`. | | **CREATES** | model | A spaCy model containing the vocab and vectors. | ## Evaluate {#evaluate new="2"}