From 35a393106404d8f69d69e6c12d62e21a7d517065 Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Thu, 17 Sep 2020 16:36:27 +0200
Subject: [PATCH 1/7] fix typo

---
 spacy/cli/debug_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index a4899a458..58908c5e8 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -54,7 +54,7 @@ def debug_model_cli(
     config_overrides = parse_config_overrides(ctx.args)
     with show_validation_error(config_path):
         config = util.load_config(config_path, overrides=config_overrides)
-        nlp, config = util.load_model_from_config(config_path)
+        nlp, config = util.load_model_from_config(config)
     seed = config["training"]["seed"]
     if seed is not None:
         msg.info(f"Fixing random seed: {seed}")

From e4fc7e0222621c40b6d0aa025d3fc0450a672079 Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Thu, 17 Sep 2020 22:34:36 +0200
Subject: [PATCH 2/7] fixing output sample to proper 2D array

---
 spacy/cli/debug_model.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index 58908c5e8..04a14bdc9 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -60,13 +60,12 @@ def debug_model_cli(
         msg.info(f"Fixing random seed: {seed}")
         fix_random_seed(seed)
     pipe = nlp.get_pipe(component)
-    if hasattr(pipe, "model"):
-        model = pipe.model
-    else:
+    if not hasattr(pipe, "model"):
         msg.fail(
             f"The component '{component}' does not specify an object that holds a Model.",
             exits=1,
         )
+    model = pipe.model
     debug_model(model, print_settings=print_settings)
 
 
@@ -87,7 +86,7 @@ def debug_model(model: Model, *, print_settings: Optional[Dict[str, Any]] = None
 
     # STEP 1: Initializing the model and printing again
     X = _get_docs()
-    Y = _get_output(model.ops.xp)
+    Y = _get_output(model.ops)
     # The output vector might differ from the official type of the output layer
     with data_validation(False):
         model.initialize(X=X, Y=Y)
@@ -113,9 +112,11 @@ def debug_model(model: Model, *, print_settings: Optional[Dict[str, Any]] = None
         msg.divider(f"STEP 3 - prediction")
         msg.info(str(prediction))
 
+    msg.good(f"Succesfully ended analysis - model looks good!")
+
 
 def get_gradient(model, Y):
-    goldY = _get_output(model.ops.xp)
+    goldY = _get_output(model.ops)
     return Y - goldY
 
 
@@ -133,8 +134,14 @@ def _get_docs(lang: str = "en"):
     return list(nlp.pipe(_sentences()))
 
 
-def _get_output(xp):
-    return xp.asarray([i + 10 for i, _ in enumerate(_get_docs())], dtype="float32")
+def _get_output(ops):
+    docs = len(_get_docs())
+    labels = 6
+    output = ops.alloc2f(d0=docs, d1=labels)
+    for i in range(docs):
+        for j in range(labels):
+            output[i, j] = 1 / (i+j+0.01)
+    return ops.xp.asarray(output)
 
 
 def _print_model(model, print_settings):

From 73ff52b9ec9e61ae2d7faeacfef1b7bee53ea10e Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Fri, 18 Sep 2020 16:43:15 +0200
Subject: [PATCH 3/7] hack for tok2vec listener

---
 spacy/cli/debug_model.py | 26 +++++++++++++++++---------
 spacy/errors.py          |  3 ++-
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index 04a14bdc9..1d8d043fd 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -66,10 +66,12 @@ def debug_model_cli(
             exits=1,
         )
     model = pipe.model
-    debug_model(model, print_settings=print_settings)
+    # call _link_components directly as we won't call nlp.begin_training
+    nlp._link_components()
+    debug_model(nlp, model, print_settings=print_settings)
 
 
-def debug_model(model: Model, *, print_settings: Optional[Dict[str, Any]] = None):
+def debug_model(nlp, model: Model, *, print_settings: Optional[Dict[str, Any]] = None):
     if not isinstance(model, Model):
         msg.fail(
             f"Requires a Thinc Model to be analysed, but found {type(model)} instead.",
@@ -86,10 +88,10 @@ def debug_model(model: Model, *, print_settings: Optional[Dict[str, Any]] = None
 
     # STEP 1: Initializing the model and printing again
     X = _get_docs()
-    Y = _get_output(model.ops)
+    goldY = _get_output(model.ops)
     # The output vector might differ from the official type of the output layer
     with data_validation(False):
-        model.initialize(X=X, Y=Y)
+        model.initialize(X=X, Y=goldY)
     if print_settings.get("print_after_init"):
         msg.divider(f"STEP 1 - after initialization")
         _print_model(model, print_settings)
@@ -97,9 +99,16 @@ def debug_model(model: Model, *, print_settings: Optional[Dict[str, Any]] = None
     # STEP 2: Updating the model and printing again
     optimizer = Adam(0.001)
     set_dropout_rate(model, 0.2)
+    # ugly hack to deal with Tok2Vec listeners
+    tok2vec = None
+    if model.has_ref("tok2vec") and model.get_ref("tok2vec").name == "tok2vec-listener":
+        tok2vec = nlp.get_pipe("tok2vec")
+        tok2vec.model.initialize(X=X)
     for e in range(3):
-        Y, get_dX = model.begin_update(_get_docs())
-        dY = get_gradient(model, Y)
+        if tok2vec:
+            tok2vec.predict(X)
+        Y, get_dX = model.begin_update(X)
+        dY = get_gradient(goldY, Y)
         get_dX(dY)
         model.finish_update(optimizer)
     if print_settings.get("print_after_training"):
@@ -107,7 +116,7 @@ def debug_model(model: Model, *, print_settings: Optional[Dict[str, Any]] = None
         _print_model(model, print_settings)
 
     # STEP 3: the final prediction
-    prediction = model.predict(_get_docs())
+    prediction = model.predict(X)
     if print_settings.get("print_prediction"):
         msg.divider(f"STEP 3 - prediction")
         msg.info(str(prediction))
@@ -115,8 +124,7 @@ def debug_model(model: Model, *, print_settings: Optional[Dict[str, Any]] = None
     msg.good(f"Succesfully ended analysis - model looks good!")
 
 
-def get_gradient(model, Y):
-    goldY = _get_output(model.ops)
+def get_gradient(goldY, Y):
     return Y - goldY
 
 
diff --git a/spacy/errors.py b/spacy/errors.py
index 173aedab9..af307e069 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -545,7 +545,8 @@ class Errors:
     E949 = ("Can only create an alignment when the texts are the same.")
     E952 = ("The section '{name}' is not a valid section in the provided config.")
     E953 = ("Mismatched IDs received by the Tok2Vec listener: {id1} vs. {id2}")
-    E954 = ("The Tok2Vec listener did not receive a valid input.")
+    E954 = ("The Tok2Vec listener did not receive any valid input from an upstream "
+            "component.")
     E955 = ("Can't find table(s) '{table}' for language '{lang}' in spacy-lookups-data.")
     E956 = ("Can't find component '{name}' in [components] block in the config. "
             "Available components: {opts}")

From 6db1d5dc0dff848dded3d2990543f749707afc45 Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Sat, 19 Sep 2020 19:11:30 +0200
Subject: [PATCH 4/7] trying some stuff

---
 spacy/cli/debug_model.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index 1d8d043fd..09feaf671 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -89,6 +89,7 @@ def debug_model(nlp, model: Model, *, print_settings: Optional[Dict[str, Any]] =
     # STEP 1: Initializing the model and printing again
     X = _get_docs()
     goldY = _get_output(model.ops)
+    # _set_output_dim(nO=goldY.shape[-1], model=model)
     # The output vector might differ from the official type of the output layer
     with data_validation(False):
         model.initialize(X=X, Y=goldY)
@@ -108,6 +109,7 @@ def debug_model(nlp, model: Model, *, print_settings: Optional[Dict[str, Any]] =
         if tok2vec:
             tok2vec.predict(X)
         Y, get_dX = model.begin_update(X)
+        print("get_dX", get_dX)
         dY = get_gradient(goldY, Y)
         get_dX(dY)
         model.finish_update(optimizer)
@@ -152,6 +154,10 @@ def _get_output(ops):
     return ops.xp.asarray(output)
 
 
+def _get_output_old(xp):
+    return xp.asarray([i + 10 for i, _ in enumerate(_get_docs())], dtype="float32")
+
+
 def _print_model(model, print_settings):
     layers = print_settings.get("layers", "")
     parameters = print_settings.get("parameters", False)
@@ -200,3 +206,12 @@ def _print_matrix(value):
     sample_matrix = sample_matrix[0:5]
     result = result + str(sample_matrix)
     return result
+
+
+def _set_output_dim(model, nO):
+    # the dim inference doesn't always work 100%, we need this hack like we have it in pipe.pyx
+    if model.has_dim("nO") is None:
+        model.set_dim("nO", nO)
+    if model.has_ref("output_layer"):
+        if model.get_ref("output_layer").has_dim("nO") is None:
+            model.get_ref("output_layer").set_dim("nO", nO)
\ No newline at end of file

From 447b3e5787dec59f2ed4b8a96c4b2ceb808d182f Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Mon, 21 Sep 2020 16:58:40 +0200
Subject: [PATCH 5/7] Merge remote-tracking branch 'upstream/develop' into
 fix/debug_model

# Conflicts:
#	spacy/cli/debug_model.py
---
 spacy/cli/debug_model.py | 48 ++++++++++++++++++++++------------------
 1 file changed, 27 insertions(+), 21 deletions(-)

diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index af961d033..3d76cdbde 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -1,4 +1,4 @@
-from typing import Dict, Any, Optional
+from typing import Dict, Any, Optional, Iterable
 from pathlib import Path
 from wasabi import msg
 from thinc.api import require_gpu, fix_random_seed, set_dropout_rate, Adam
@@ -93,11 +93,10 @@ def debug_model(nlp, model: Model, *, print_settings: Optional[Dict[str, Any]] =
 
     # STEP 1: Initializing the model and printing again
     X = _get_docs()
-    goldY = _get_output(model.ops)
-    # _set_output_dim(nO=goldY.shape[-1], model=model)
+    _set_output_dim(nO=7, model=model)
     # The output vector might differ from the official type of the output layer
     with data_validation(False):
-        model.initialize(X=X, Y=goldY)
+        model.initialize(X=X)
     if print_settings.get("print_after_init"):
         msg.divider(f"STEP 1 - after initialization")
         _print_model(model, print_settings)
@@ -110,12 +109,15 @@ def debug_model(nlp, model: Model, *, print_settings: Optional[Dict[str, Any]] =
     if model.has_ref("tok2vec") and model.get_ref("tok2vec").name == "tok2vec-listener":
         tok2vec = nlp.get_pipe("tok2vec")
         tok2vec.model.initialize(X=X)
+    goldY = None
     for e in range(3):
         if tok2vec:
             tok2vec.predict(X)
         Y, get_dX = model.begin_update(X)
-        print("get_dX", get_dX)
-        dY = get_gradient(goldY, Y)
+        # simulate a goldY value
+        if not goldY:
+            goldY = _simulate_gold(Y)
+        dY = get_gradient(goldY, Y, model.ops)
         get_dX(dY)
         model.finish_update(optimizer)
     if print_settings.get("print_after_training"):
@@ -128,11 +130,20 @@ def debug_model(nlp, model: Model, *, print_settings: Optional[Dict[str, Any]] =
         msg.divider(f"STEP 3 - prediction")
         msg.info(str(prediction))
 
-    msg.good(f"Succesfully ended analysis - model looks good!")
+    msg.good(f"Succesfully ended analysis - model looks good.")
 
 
-def get_gradient(goldY, Y):
-    return Y - goldY
+def _simulate_gold(element, counter=1):
+    if isinstance(element, Iterable):
+        for i in range(len(element)):
+            element[i] = _simulate_gold(element[i], counter+i)
+        return element
+    else:
+        return 1/counter
+
+
+def get_gradient(goldY, Y, ops):
+    return ops.asarray(Y) - ops.asarray(goldY)
 
 
 def _sentences():
@@ -149,18 +160,13 @@ def _get_docs(lang: str = "en"):
     return list(nlp.pipe(_sentences()))
 
 
-def _get_output(ops):
-    docs = len(_get_docs())
-    labels = 6
-    output = ops.alloc2f(d0=docs, d1=labels)
-    for i in range(docs):
-        for j in range(labels):
-            output[i, j] = 1 / (i+j+0.01)
-    return ops.xp.asarray(output)
-
-
-def _get_output_old(xp):
-    return xp.asarray([i + 10 for i, _ in enumerate(_get_docs())], dtype="float32")
+def _set_output_dim(model, nO):
+    # simulating dim inference by directly setting the nO argument of the model
+    if model.has_dim("nO") is None:
+        model.set_dim("nO", nO)
+    if model.has_ref("output_layer"):
+        if model.get_ref("output_layer").has_dim("nO") is None:
+            model.get_ref("output_layer").set_dim("nO", nO)
 
 
 def _print_model(model, print_settings):

From fa5c416db646b919153a362c02f842c7a19dbb9e Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Mon, 21 Sep 2020 23:09:22 +0200
Subject: [PATCH 6/7] initialize through nlp object and with train_corpus

---
 spacy/cli/debug_model.py | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index 3d76cdbde..017bcd239 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -1,5 +1,9 @@
+import warnings
 from typing import Dict, Any, Optional, Iterable
 from pathlib import Path
+
+from spacy.training import Example
+from spacy.util import dot_to_object
 from wasabi import msg
 from thinc.api import require_gpu, fix_random_seed, set_dropout_rate, Adam
 from thinc.api import Model, data_validation, set_gpu_allocator
@@ -71,12 +75,10 @@ def debug_model_cli(
             exits=1,
         )
     model = pipe.model
-    # call _link_components directly as we won't call nlp.begin_training
-    nlp._link_components()
-    debug_model(nlp, model, print_settings=print_settings)
+    debug_model(config, nlp, model, print_settings=print_settings)
 
 
-def debug_model(nlp, model: Model, *, print_settings: Optional[Dict[str, Any]] = None):
+def debug_model(config, nlp, model: Model, *, print_settings: Optional[Dict[str, Any]] = None):
     if not isinstance(model, Model):
         msg.fail(
             f"Requires a Thinc Model to be analysed, but found {type(model)} instead.",
@@ -93,10 +95,21 @@ def debug_model(nlp, model: Model, *, print_settings: Optional[Dict[str, Any]] =
 
     # STEP 1: Initializing the model and printing again
     X = _get_docs()
-    _set_output_dim(nO=7, model=model)
     # The output vector might differ from the official type of the output layer
     with data_validation(False):
-        model.initialize(X=X)
+        # msg.info(f"Could not initialize the model with dummy data - using the train_corpus.")
+        try:
+            train_corpus = dot_to_object(config, config["training"]["train_corpus"])
+            nlp.begin_training(lambda: train_corpus(nlp))
+            msg.info("Initialized the model with the training corpus.")
+        except ValueError:
+            try:
+                _set_output_dim(nO=7, model=model)
+                nlp.begin_training(lambda: [Example.from_dict(x, {}) for x in X])
+                msg.info("Initialized the model with dummy data.")
+            except:
+                msg.fail("Could not initialize the model: you'll have to provide a valid train_corpus argument in the config file.", exits=1)
+
     if print_settings.get("print_after_init"):
         msg.divider(f"STEP 1 - after initialization")
         _print_model(model, print_settings)
@@ -114,8 +127,7 @@ def debug_model(nlp, model: Model, *, print_settings: Optional[Dict[str, Any]] =
         if tok2vec:
             tok2vec.predict(X)
         Y, get_dX = model.begin_update(X)
-        # simulate a goldY value
-        if not goldY:
+        if goldY is None:
             goldY = _simulate_gold(Y)
         dY = get_gradient(goldY, Y, model.ops)
         get_dX(dY)

From 45b29c4a5b926c8f85b0a2ed4a9b8be13c5bf7eb Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Mon, 21 Sep 2020 23:17:23 +0200
Subject: [PATCH 7/7] cleanup

---
 spacy/cli/debug_model.py | 32 +++++++++++++-------------------
 1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/spacy/cli/debug_model.py b/spacy/cli/debug_model.py
index 017bcd239..1d27c7c52 100644
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@@ -78,7 +78,9 @@ def debug_model_cli(
     debug_model(config, nlp, model, print_settings=print_settings)
 
 
-def debug_model(config, nlp, model: Model, *, print_settings: Optional[Dict[str, Any]] = None):
+def debug_model(
+    config, nlp, model: Model, *, print_settings: Optional[Dict[str, Any]] = None
+):
     if not isinstance(model, Model):
         msg.fail(
             f"Requires a Thinc Model to be analysed, but found {type(model)} instead.",
@@ -97,7 +99,6 @@ def debug_model(config, nlp, model: Model, *, print_settings: Optional[Dict[str,
     X = _get_docs()
     # The output vector might differ from the official type of the output layer
     with data_validation(False):
-        # msg.info(f"Could not initialize the model with dummy data - using the train_corpus.")
         try:
             train_corpus = dot_to_object(config, config["training"]["train_corpus"])
             nlp.begin_training(lambda: train_corpus(nlp))
@@ -108,7 +109,10 @@ def debug_model(config, nlp, model: Model, *, print_settings: Optional[Dict[str,
                 nlp.begin_training(lambda: [Example.from_dict(x, {}) for x in X])
                 msg.info("Initialized the model with dummy data.")
             except:
-                msg.fail("Could not initialize the model: you'll have to provide a valid train_corpus argument in the config file.", exits=1)
+                msg.fail(
+                    "Could not initialize the model: you'll have to provide a valid train_corpus argument in the config file.",
+                    exits=1,
+                )
 
     if print_settings.get("print_after_init"):
         msg.divider(f"STEP 1 - after initialization")
@@ -121,7 +125,6 @@ def debug_model(config, nlp, model: Model, *, print_settings: Optional[Dict[str,
     tok2vec = None
     if model.has_ref("tok2vec") and model.get_ref("tok2vec").name == "tok2vec-listener":
         tok2vec = nlp.get_pipe("tok2vec")
-        tok2vec.model.initialize(X=X)
     goldY = None
     for e in range(3):
         if tok2vec:
@@ -145,17 +148,17 @@ def debug_model(config, nlp, model: Model, *, print_settings: Optional[Dict[str,
     msg.good(f"Succesfully ended analysis - model looks good.")
 
 
+def get_gradient(goldY, Y, ops):
+    return ops.asarray(Y) - ops.asarray(goldY)
+
+
 def _simulate_gold(element, counter=1):
     if isinstance(element, Iterable):
         for i in range(len(element)):
-            element[i] = _simulate_gold(element[i], counter+i)
+            element[i] = _simulate_gold(element[i], counter + i)
         return element
     else:
-        return 1/counter
-
-
-def get_gradient(goldY, Y, ops):
-    return ops.asarray(Y) - ops.asarray(goldY)
+        return 1 / counter
 
 
 def _sentences():
@@ -229,12 +232,3 @@ def _print_matrix(value):
     sample_matrix = sample_matrix[0:5]
     result = result + str(sample_matrix)
     return result
-
-
-def _set_output_dim(model, nO):
-    # the dim inference doesn't always work 100%, we need this hack like we have it in pipe.pyx
-    if model.has_dim("nO") is None:
-        model.set_dim("nO", nO)
-    if model.has_ref("output_layer"):
-        if model.get_ref("output_layer").has_dim("nO") is None:
-            model.get_ref("output_layer").set_dim("nO", nO)
\ No newline at end of file