mirror of https://github.com/explosion/spaCy.git
Hide jsonl_loc on init vectors and tidy up [ci skip]
This commit is contained in:
parent
27cbffff1b
commit
7f68f4bd92
|
@ -7,6 +7,7 @@ import srsly
|
|||
|
||||
from .. import util
|
||||
from ..training.initialize import init_nlp, convert_vectors
|
||||
from ..language import Language
|
||||
from ._util import init_cli, Arg, Opt, parse_config_overrides, show_validation_error
|
||||
from ._util import import_code, setup_gpu
|
||||
|
||||
|
@ -19,9 +20,9 @@ def init_vectors_cli(
|
|||
output_dir: Path = Arg(..., help="Pipeline output directory"),
|
||||
prune: int = Opt(-1, "--prune", "-p", help="Optional number of vectors to prune to"),
|
||||
truncate: int = Opt(0, "--truncate", "-t", help="Optional number of vectors to truncate to when reading in vectors file"),
|
||||
jsonl_loc: Optional[Path]=Opt(None, "--lexemes-jsonl", "-j", help="Location of JSONL-formatted attributes file"),
|
||||
name: Optional[str] = Opt(None, "--name", "-n", help="Optional name for the word vectors, e.g. en_core_web_lg.vectors"),
|
||||
verbose: bool = Opt(False, "--verbose", "-V", "-VV", help="Display more information for debugging purposes"),
|
||||
jsonl_loc: Optional[Path] = Opt(None, "--lexemes-jsonl", "-j", help="Location of JSONL-formatted attributes file", hidden=True),
|
||||
# fmt: on
|
||||
):
|
||||
"""Convert word vectors for use with spaCy. Will export an nlp object that
|
||||
|
@ -32,12 +33,7 @@ def init_vectors_cli(
|
|||
msg.info(f"Creating blank nlp object for language '{lang}'")
|
||||
nlp = util.get_lang_class(lang)()
|
||||
if jsonl_loc is not None:
|
||||
lex_attrs = srsly.read_jsonl(jsonl_loc)
|
||||
for attrs in lex_attrs:
|
||||
if "settings" in attrs:
|
||||
continue
|
||||
lexeme = nlp.vocab[attrs["orth"]]
|
||||
lexeme.set_attrs(**attrs)
|
||||
update_lexemes(nlp, jsonl_loc)
|
||||
convert_vectors(nlp, vectors_loc, truncate=truncate, prune=prune, name=name)
|
||||
msg.good(f"Successfully converted {len(nlp.vocab.vectors)} vectors")
|
||||
nlp.to_disk(output_dir)
|
||||
|
@ -48,6 +44,16 @@ def init_vectors_cli(
|
|||
)
|
||||
|
||||
|
||||
def update_lexemes(nlp: Language, jsonl_loc: Path) -> None:
|
||||
# Mostly used for backwards-compatibility and may be removed in the future
|
||||
lex_attrs = srsly.read_jsonl(jsonl_loc)
|
||||
for attrs in lex_attrs:
|
||||
if "settings" in attrs:
|
||||
continue
|
||||
lexeme = nlp.vocab[attrs["orth"]]
|
||||
lexeme.set_attrs(**attrs)
|
||||
|
||||
|
||||
@init_cli.command(
|
||||
"nlp",
|
||||
context_settings={"allow_extra_args": True, "ignore_unknown_options": True},
|
||||
|
|
Loading…
Reference in New Issue