Fix linking resumed components (#6859)

* link components across enabled, resumed and frozen * revert renaming * revert renaming, the sequel
2021-02-01 12:19:58 +01:00 · 2021-02-01 12:19:58 +01:00 · acabb284dd
parent 8a245076c4
commit acabb284dd
2 changed files with 9 additions and 3 deletions
--- a/spacy/language.py
+++ b/spacy/language.py
@ -1190,6 +1190,7 @@ class Language:
        get_examples: Optional[Callable[[], Iterable[Example]]] = None,
        *,
        sgd: Optional[Optimizer] = None,
+        link_components: bool = True,
    ) -> Optimizer:
        """Initialize the pipe for training, using data examples if available.

@ -1197,6 +1198,8 @@ class Language:
            returns gold-standard Example objects.
        sgd (Optional[Optimizer]): An optimizer to use for updates. If not
            provided, will be created using the .create_optimizer() method.
+        link_components (bool): Link listener components automatically or not
+            (default True)
        RETURNS (thinc.api.Optimizer): The optimizer.

        DOCS: https://spacy.io/api/language#initialize
@ -1244,7 +1247,8 @@ class Language:
                    proc.initialize, p_settings, section="components", name=name
                )
                proc.initialize(get_examples, nlp=self, **p_settings)
-        self._link_components()
+        if link_components:
+            self._link_components()
        self._optimizer = sgd
        if sgd is not None:
            self._optimizer = sgd
@ -1528,7 +1532,7 @@ class Language:
        """Register 'listeners' within pipeline components, to allow them to
        effectively share weights.
        """
-        # I had though, "Why do we do this inside the Language object? Shouldn't
+        # I had thought, "Why do we do this inside the Language object? Shouldn't
        # it be the tok2vec/transformer/etc's job?
        # The problem is we need to do it during deserialization...And the
        # components don't receive the pipeline then. So this does have to be
--- a/spacy/training/initialize.py
+++ b/spacy/training/initialize.py
@ -64,8 +64,10 @@ def init_nlp(config: Config, *, use_gpu: int = -1) -> "Language":
        with nlp.select_pipes(enable=resume_components):
            logger.info(f"Resuming training for: {resume_components}")
            nlp.resume_training(sgd=optimizer)
+    # Make sure that listeners are defined before initializing further
+    nlp._link_components()
    with nlp.select_pipes(disable=[*frozen_components, *resume_components]):
-        nlp.initialize(lambda: train_corpus(nlp), sgd=optimizer)
+        nlp.initialize(lambda: train_corpus(nlp), sgd=optimizer, link_components=False)
        logger.info(f"Initialized pipeline components: {nlp.pipe_names}")
    # Detect components with listeners that are not frozen consistently
    for name, proc in nlp.pipeline: