mirror of https://github.com/explosion/spaCy.git
cleanup
This commit is contained in:
parent
fa5c416db6
commit
45b29c4a5b
|
@ -78,7 +78,9 @@ def debug_model_cli(
|
||||||
debug_model(config, nlp, model, print_settings=print_settings)
|
debug_model(config, nlp, model, print_settings=print_settings)
|
||||||
|
|
||||||
|
|
||||||
def debug_model(config, nlp, model: Model, *, print_settings: Optional[Dict[str, Any]] = None):
|
def debug_model(
|
||||||
|
config, nlp, model: Model, *, print_settings: Optional[Dict[str, Any]] = None
|
||||||
|
):
|
||||||
if not isinstance(model, Model):
|
if not isinstance(model, Model):
|
||||||
msg.fail(
|
msg.fail(
|
||||||
f"Requires a Thinc Model to be analysed, but found {type(model)} instead.",
|
f"Requires a Thinc Model to be analysed, but found {type(model)} instead.",
|
||||||
|
@ -97,7 +99,6 @@ def debug_model(config, nlp, model: Model, *, print_settings: Optional[Dict[str,
|
||||||
X = _get_docs()
|
X = _get_docs()
|
||||||
# The output vector might differ from the official type of the output layer
|
# The output vector might differ from the official type of the output layer
|
||||||
with data_validation(False):
|
with data_validation(False):
|
||||||
# msg.info(f"Could not initialize the model with dummy data - using the train_corpus.")
|
|
||||||
try:
|
try:
|
||||||
train_corpus = dot_to_object(config, config["training"]["train_corpus"])
|
train_corpus = dot_to_object(config, config["training"]["train_corpus"])
|
||||||
nlp.begin_training(lambda: train_corpus(nlp))
|
nlp.begin_training(lambda: train_corpus(nlp))
|
||||||
|
@ -108,7 +109,10 @@ def debug_model(config, nlp, model: Model, *, print_settings: Optional[Dict[str,
|
||||||
nlp.begin_training(lambda: [Example.from_dict(x, {}) for x in X])
|
nlp.begin_training(lambda: [Example.from_dict(x, {}) for x in X])
|
||||||
msg.info("Initialized the model with dummy data.")
|
msg.info("Initialized the model with dummy data.")
|
||||||
except:
|
except:
|
||||||
msg.fail("Could not initialize the model: you'll have to provide a valid train_corpus argument in the config file.", exits=1)
|
msg.fail(
|
||||||
|
"Could not initialize the model: you'll have to provide a valid train_corpus argument in the config file.",
|
||||||
|
exits=1,
|
||||||
|
)
|
||||||
|
|
||||||
if print_settings.get("print_after_init"):
|
if print_settings.get("print_after_init"):
|
||||||
msg.divider(f"STEP 1 - after initialization")
|
msg.divider(f"STEP 1 - after initialization")
|
||||||
|
@ -121,7 +125,6 @@ def debug_model(config, nlp, model: Model, *, print_settings: Optional[Dict[str,
|
||||||
tok2vec = None
|
tok2vec = None
|
||||||
if model.has_ref("tok2vec") and model.get_ref("tok2vec").name == "tok2vec-listener":
|
if model.has_ref("tok2vec") and model.get_ref("tok2vec").name == "tok2vec-listener":
|
||||||
tok2vec = nlp.get_pipe("tok2vec")
|
tok2vec = nlp.get_pipe("tok2vec")
|
||||||
tok2vec.model.initialize(X=X)
|
|
||||||
goldY = None
|
goldY = None
|
||||||
for e in range(3):
|
for e in range(3):
|
||||||
if tok2vec:
|
if tok2vec:
|
||||||
|
@ -145,6 +148,10 @@ def debug_model(config, nlp, model: Model, *, print_settings: Optional[Dict[str,
|
||||||
msg.good(f"Succesfully ended analysis - model looks good.")
|
msg.good(f"Succesfully ended analysis - model looks good.")
|
||||||
|
|
||||||
|
|
||||||
|
def get_gradient(goldY, Y, ops):
|
||||||
|
return ops.asarray(Y) - ops.asarray(goldY)
|
||||||
|
|
||||||
|
|
||||||
def _simulate_gold(element, counter=1):
|
def _simulate_gold(element, counter=1):
|
||||||
if isinstance(element, Iterable):
|
if isinstance(element, Iterable):
|
||||||
for i in range(len(element)):
|
for i in range(len(element)):
|
||||||
|
@ -154,10 +161,6 @@ def _simulate_gold(element, counter=1):
|
||||||
return 1 / counter
|
return 1 / counter
|
||||||
|
|
||||||
|
|
||||||
def get_gradient(goldY, Y, ops):
|
|
||||||
return ops.asarray(Y) - ops.asarray(goldY)
|
|
||||||
|
|
||||||
|
|
||||||
def _sentences():
|
def _sentences():
|
||||||
return [
|
return [
|
||||||
"Apple is looking at buying U.K. startup for $1 billion",
|
"Apple is looking at buying U.K. startup for $1 billion",
|
||||||
|
@ -229,12 +232,3 @@ def _print_matrix(value):
|
||||||
sample_matrix = sample_matrix[0:5]
|
sample_matrix = sample_matrix[0:5]
|
||||||
result = result + str(sample_matrix)
|
result = result + str(sample_matrix)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def _set_output_dim(model, nO):
|
|
||||||
# the dim inference doesn't always work 100%, we need this hack like we have it in pipe.pyx
|
|
||||||
if model.has_dim("nO") is None:
|
|
||||||
model.set_dim("nO", nO)
|
|
||||||
if model.has_ref("output_layer"):
|
|
||||||
if model.get_ref("output_layer").has_dim("nO") is None:
|
|
||||||
model.get_ref("output_layer").set_dim("nO", nO)
|
|
Loading…
Reference in New Issue