mirror of https://github.com/explosion/spaCy.git
Add option to omit extra lexeme tables in CLI
This commit is contained in:
parent
40e65d6f63
commit
daaa7bf451
|
@ -18,6 +18,7 @@ from wasabi import msg
|
||||||
from ..vectors import Vectors
|
from ..vectors import Vectors
|
||||||
from ..errors import Errors, Warnings
|
from ..errors import Errors, Warnings
|
||||||
from ..util import ensure_path, get_lang_class, OOV_RANK
|
from ..util import ensure_path, get_lang_class, OOV_RANK
|
||||||
|
from ..lookups import Lookups
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import ftfy
|
import ftfy
|
||||||
|
@ -49,6 +50,7 @@ DEFAULT_OOV_PROB = -20
|
||||||
str,
|
str,
|
||||||
),
|
),
|
||||||
model_name=("Optional name for the model meta", "option", "mn", str),
|
model_name=("Optional name for the model meta", "option", "mn", str),
|
||||||
|
omit_extra_lookups=("Don't include extra lookups in model", "flag", "OEL", bool),
|
||||||
)
|
)
|
||||||
def init_model(
|
def init_model(
|
||||||
lang,
|
lang,
|
||||||
|
@ -61,6 +63,7 @@ def init_model(
|
||||||
prune_vectors=-1,
|
prune_vectors=-1,
|
||||||
vectors_name=None,
|
vectors_name=None,
|
||||||
model_name=None,
|
model_name=None,
|
||||||
|
omit_extra_lookups=False,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Create a new model from raw data, like word frequencies, Brown clusters
|
Create a new model from raw data, like word frequencies, Brown clusters
|
||||||
|
@ -93,6 +96,15 @@ def init_model(
|
||||||
|
|
||||||
with msg.loading("Creating model..."):
|
with msg.loading("Creating model..."):
|
||||||
nlp = create_model(lang, lex_attrs, name=model_name)
|
nlp = create_model(lang, lex_attrs, name=model_name)
|
||||||
|
|
||||||
|
# Create empty extra lexeme tables so the data from spacy-lookups-data
|
||||||
|
# isn't loaded if these features are accessed
|
||||||
|
if omit_extra_lookups:
|
||||||
|
nlp.vocab.lookups_extra = Lookups()
|
||||||
|
nlp.vocab.lookups_extra.add_table("lexeme_cluster")
|
||||||
|
nlp.vocab.lookups_extra.add_table("lexeme_prob")
|
||||||
|
nlp.vocab.lookups_extra.add_table("lexeme_settings")
|
||||||
|
|
||||||
msg.good("Successfully created model")
|
msg.good("Successfully created model")
|
||||||
if vectors_loc is not None:
|
if vectors_loc is not None:
|
||||||
add_vectors(nlp, vectors_loc, truncate_vectors, prune_vectors, vectors_name)
|
add_vectors(nlp, vectors_loc, truncate_vectors, prune_vectors, vectors_name)
|
||||||
|
|
|
@ -17,6 +17,7 @@ from .._ml import create_default_optimizer
|
||||||
from ..util import use_gpu as set_gpu
|
from ..util import use_gpu as set_gpu
|
||||||
from ..gold import GoldCorpus
|
from ..gold import GoldCorpus
|
||||||
from ..compat import path2str
|
from ..compat import path2str
|
||||||
|
from ..lookups import Lookups
|
||||||
from .. import util
|
from .. import util
|
||||||
from .. import about
|
from .. import about
|
||||||
|
|
||||||
|
@ -57,6 +58,7 @@ from .. import about
|
||||||
textcat_arch=("Textcat model architecture", "option", "ta", str),
|
textcat_arch=("Textcat model architecture", "option", "ta", str),
|
||||||
textcat_positive_label=("Textcat positive label for binary classes with two labels", "option", "tpl", str),
|
textcat_positive_label=("Textcat positive label for binary classes with two labels", "option", "tpl", str),
|
||||||
tag_map_path=("Location of JSON-formatted tag map", "option", "tm", Path),
|
tag_map_path=("Location of JSON-formatted tag map", "option", "tm", Path),
|
||||||
|
omit_extra_lookups=("Don't include extra lookups in model", "flag", "OEL", bool),
|
||||||
verbose=("Display more information for debug", "flag", "VV", bool),
|
verbose=("Display more information for debug", "flag", "VV", bool),
|
||||||
debug=("Run data diagnostics before training", "flag", "D", bool),
|
debug=("Run data diagnostics before training", "flag", "D", bool),
|
||||||
# fmt: on
|
# fmt: on
|
||||||
|
@ -96,6 +98,7 @@ def train(
|
||||||
textcat_arch="bow",
|
textcat_arch="bow",
|
||||||
textcat_positive_label=None,
|
textcat_positive_label=None,
|
||||||
tag_map_path=None,
|
tag_map_path=None,
|
||||||
|
omit_extra_lookups=False,
|
||||||
verbose=False,
|
verbose=False,
|
||||||
debug=False,
|
debug=False,
|
||||||
):
|
):
|
||||||
|
@ -247,6 +250,14 @@ def train(
|
||||||
# Update tag map with provided mapping
|
# Update tag map with provided mapping
|
||||||
nlp.vocab.morphology.tag_map.update(tag_map)
|
nlp.vocab.morphology.tag_map.update(tag_map)
|
||||||
|
|
||||||
|
# Create empty extra lexeme tables so the data from spacy-lookups-data
|
||||||
|
# isn't loaded if these features are accessed
|
||||||
|
if omit_extra_lookups:
|
||||||
|
nlp.vocab.lookups_extra = Lookups()
|
||||||
|
nlp.vocab.lookups_extra.add_table("lexeme_cluster")
|
||||||
|
nlp.vocab.lookups_extra.add_table("lexeme_prob")
|
||||||
|
nlp.vocab.lookups_extra.add_table("lexeme_settings")
|
||||||
|
|
||||||
if vectors:
|
if vectors:
|
||||||
msg.text("Loading vector from model '{}'".format(vectors))
|
msg.text("Loading vector from model '{}'".format(vectors))
|
||||||
_load_vectors(nlp, vectors)
|
_load_vectors(nlp, vectors)
|
||||||
|
|
Loading…
Reference in New Issue