Tidy up [ci skip]

This commit is contained in:
Ines Montani 2021-07-17 13:43:15 +10:00
parent 1ba2e8a646
commit 483f3175cb
1 changed files with 5 additions and 3 deletions

View File

@ -71,10 +71,13 @@ def init_nlp(config: Config, *, use_gpu: int = -1) -> "Language":
nlp._link_components() nlp._link_components()
with nlp.select_pipes(disable=[*frozen_components, *resume_components]): with nlp.select_pipes(disable=[*frozen_components, *resume_components]):
if T["max_epochs"] == -1: if T["max_epochs"] == -1:
sample_size = 100
logger.debug( logger.debug(
"Due to streamed train corpus, using only first 100 examples for initialization. If necessary, provide all labels in [initialize]. More info: https://spacy.io/api/cli#init_labels" f"Due to streamed train corpus, using only first {sample_size} "
f"examples for initialization. If necessary, provide all labels "
f"in [initialize]. More info: https://spacy.io/api/cli#init_labels"
) )
nlp.initialize(lambda: islice(train_corpus(nlp), 100), sgd=optimizer) nlp.initialize(lambda: islice(train_corpus(nlp), sample_size), sgd=optimizer)
else: else:
nlp.initialize(lambda: train_corpus(nlp), sgd=optimizer) nlp.initialize(lambda: train_corpus(nlp), sgd=optimizer)
logger.info(f"Initialized pipeline components: {nlp.pipe_names}") logger.info(f"Initialized pipeline components: {nlp.pipe_names}")
@ -86,7 +89,6 @@ def init_nlp(config: Config, *, use_gpu: int = -1) -> "Language":
# Don't warn about components not in the pipeline # Don't warn about components not in the pipeline
if listener not in nlp.pipe_names: if listener not in nlp.pipe_names:
continue continue
if listener in frozen_components and name not in frozen_components: if listener in frozen_components and name not in frozen_components:
logger.warning(Warnings.W087.format(name=name, listener=listener)) logger.warning(Warnings.W087.format(name=name, listener=listener))
# We always check this regardless, in case user freezes tok2vec # We always check this regardless, in case user freezes tok2vec