diff --git a/spacy/cli/convert.py b/spacy/cli/convert.py index 2a24bd145..84040a712 100644 --- a/spacy/cli/convert.py +++ b/spacy/cli/convert.py @@ -44,7 +44,7 @@ def convert_cli( file_type: FileTypes = Opt("spacy", "--file-type", "-t", help="Type of data to produce"), n_sents: int = Opt(1, "--n-sents", "-n", help="Number of sentences per doc (0 to disable)"), seg_sents: bool = Opt(False, "--seg-sents", "-s", help="Segment sentences (for -c ner)"), - model: Optional[str] = Opt(None, "--model", "-b", help="Trained spaCy pipeline for sentence segmentation (for -s)"), + model: Optional[str] = Opt(None, "--model", "--base", "-b", help="Trained spaCy pipeline for sentence segmentation to use as base (for --seg-sents)"), morphology: bool = Opt(False, "--morphology", "-m", help="Enable appending morphology to tags"), merge_subtokens: bool = Opt(False, "--merge-subtokens", "-T", help="Merge CoNLL-U subtokens"), converter: str = Opt("auto", "--converter", "-c", help=f"Converter: {tuple(CONVERTERS.keys())}"), diff --git a/spacy/cli/init_model.py b/spacy/cli/init_model.py index 071d5f659..4fd3025fd 100644 --- a/spacy/cli/init_model.py +++ b/spacy/cli/init_model.py @@ -46,8 +46,8 @@ def init_model_cli( prune_vectors: int = Opt(-1, "--prune-vectors", "-V", help="Optional number of vectors to prune to"), truncate_vectors: int = Opt(0, "--truncate-vectors", "-t", help="Optional number of vectors to truncate to when reading in vectors file"), vectors_name: Optional[str] = Opt(None, "--vectors-name", "-vn", help="Optional name for the word vectors, e.g. en_core_web_lg.vectors"), - model_name: Optional[str] = Opt(None, "--model-name", "-mn", help="Optional name for the pipeline meta"), - base_model: Optional[str] = Opt(None, "--base-model", "-b", help="Base pipeline (for languages with custom tokenizers)") + model_name: Optional[str] = Opt(None, "--meta-name", "-mn", help="Optional name of the package for the pipeline meta"), + base_model: Optional[str] = Opt(None, "--base", "-b", help="Name of or path to base pipeline to start with (mostly relevant for pipelines with custom tokenizers)") # fmt: on ): """ diff --git a/website/docs/api/cli.md b/website/docs/api/cli.md index 98da62eb3..aee285763 100644 --- a/website/docs/api/cli.md +++ b/website/docs/api/cli.md @@ -183,7 +183,7 @@ This command was previously called `init-model`. ```cli -$ python -m spacy init vocab [lang] [output_dir] [--jsonl-loc] [--vectors-loc] [--prune-vectors] +$ python -m spacy init vocab [lang] [output_dir] [--jsonl-loc] [--vectors-loc] [--prune-vectors] [--vectors-name] [--meta-name] [--base] ``` | Name | Description | @@ -194,7 +194,9 @@ $ python -m spacy init vocab [lang] [output_dir] [--jsonl-loc] [--vectors-loc] [ | `--vectors-loc`, `-v` | Optional location of vectors. Should be a file where the first row contains the dimensions of the vectors, followed by a space-separated Word2Vec table. File can be provided in `.txt` format or as a zipped text file in `.zip` or `.tar.gz` format. ~~Optional[Path] \(option)~~ | | `--truncate-vectors`, `-t` 2.3 | Number of vectors to truncate to when reading in vectors file. Defaults to `0` for no truncation. ~~int (option)~~ | | `--prune-vectors`, `-V` | Number of vectors to prune the vocabulary to. Defaults to `-1` for no pruning. ~~int (option)~~ | -| `--vectors-name`, `-vn` | Name to assign to the word vectors in the `meta.json`, e.g. `en_core_web_md.vectors`. ~~str (option)~~ | +| `--vectors-name`, `-vn` | Name to assign to the word vectors in the `meta.json`, e.g. `en_core_web_md.vectors`. ~~Optional[str] \(option)~~ | +| `--meta-name`, `-mn` | Optional name of the package for the pipeline meta. ~~Optional[str] \(option)~~ | +| `--base`, `-b` | Optional name of or path to base pipeline to start with (mostly relevant for pipelines with custom tokenizers). ~~Optional[str] \(option)~~ | | `--help`, `-h` | Show help message and available arguments. ~~bool (flag)~~ | | **CREATES** | A spaCy pipeline directory containing the vocab and vectors. | @@ -207,7 +209,7 @@ management functions. The converter can be specified on the command line, or chosen based on the file extension of the input file. ```cli -$ python -m spacy convert [input_file] [output_dir] [--converter] [--file-type] [--n-sents] [--seg-sents] [--model] [--morphology] [--merge-subtokens] [--ner-map] [--lang] +$ python -m spacy convert [input_file] [output_dir] [--converter] [--file-type] [--n-sents] [--seg-sents] [--base] [--morphology] [--merge-subtokens] [--ner-map] [--lang] ``` | Name | Description | @@ -218,7 +220,7 @@ $ python -m spacy convert [input_file] [output_dir] [--converter] [--file-type] | `--file-type`, `-t` 2.1 | Type of file to create. Either `spacy` (default) for binary [`DocBin`](/api/docbin) data or `json` for v2.x JSON format. ~~str (option)~~ | | `--n-sents`, `-n` | Number of sentences per document. ~~int (option)~~ | | `--seg-sents`, `-s` 2.2 | Segment sentences (for `--converter ner`). ~~bool (flag)~~ | -| `--model`, `-b` 2.2 | Model for parser-based sentence segmentation (for `--seg-sents`). ~~Optional[str](option)~~ | +| `--base`, `-b` | Trained spaCy pipeline for sentence segmentation to use as base (for `--seg-sents`). ~~Optional[str](option)~~ | | `--morphology`, `-m` | Enable appending morphology to tags. ~~bool (flag)~~ | | `--ner-map`, `-nm` | NER tag mapping (as JSON-encoded dict of entity types). ~~Optional[Path](option)~~ | | `--lang`, `-l` 2.1 | Language code (if tokenizer required). ~~Optional[str] \(option)~~ |