Hide jsonl_loc on init vectors and tidy up [ci skip]

2020-10-01 16:44:17 +02:00 · 2020-10-01 16:44:17 +02:00 · 7f68f4bd92
parent 27cbffff1b
commit 7f68f4bd92
1 changed files with 13 additions and 7 deletions
--- a/spacy/cli/init_pipeline.py
+++ b/spacy/cli/init_pipeline.py
@ -7,6 +7,7 @@ import srsly
 from .. import util
 from ..training.initialize import init_nlp, convert_vectors
 from ..language import Language
 from ._util import init_cli, Arg, Opt, parse_config_overrides, show_validation_error
 from ._util import import_code, setup_gpu
@ -19,9 +20,9 @@ def init_vectors_cli(
    output_dir: Path = Arg(..., help="Pipeline output directory"),
    prune: int = Opt(-1, "--prune", "-p", help="Optional number of vectors to prune to"),
    truncate: int = Opt(0, "--truncate", "-t", help="Optional number of vectors to truncate to when reading in vectors file"),
    jsonl_loc: Optional[Path]=Opt(None, "--lexemes-jsonl", "-j", help="Location of JSONL-formatted attributes file"),
    name: Optional[str] = Opt(None, "--name", "-n", help="Optional name for the word vectors, e.g. en_core_web_lg.vectors"),
    verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
    jsonl_loc: Optional[Path] = Opt(None, "--lexemes-jsonl", "-j", help="Location of JSONL-formatted attributes file", hidden=True),
    # fmt: on
 ):
    """Convert word vectors for use with spaCy. Will export an nlp object that
@ -32,12 +33,7 @@ def init_vectors_cli(
    msg.info(f"Creating blank nlp object for language '{lang}'")
    nlp = util.get_lang_class(lang)()
    if jsonl_loc is not None:
-        lex_attrs = srsly.read_jsonl(jsonl_loc)
+        update_lexemes(nlp, jsonl_loc)
        for attrs in lex_attrs:
            if "settings" in attrs:
                continue
            lexeme = nlp.vocab[attrs["orth"]]
            lexeme.set_attrs(**attrs)
    convert_vectors(nlp, vectors_loc, truncate=truncate, prune=prune, name=name)
    msg.good(f"Successfully converted {len(nlp.vocab.vectors)} vectors")
    nlp.to_disk(output_dir)
@ -48,6 +44,16 @@ def init_vectors_cli(
    )
 def update_lexemes(nlp: Language, jsonl_loc: Path) -> None:
    # Mostly used for backwards-compatibility and may be removed in the future
    lex_attrs = srsly.read_jsonl(jsonl_loc)
    for attrs in lex_attrs:
        if "settings" in attrs:
            continue
        lexeme = nlp.vocab[attrs["orth"]]
        lexeme.set_attrs(**attrs)
@init_cli.command(
    "nlp",
    context_settings={"allow_extra_args": True, "ignore_unknown_options": True},