Allow Tagger.add_label() before training

2017-11-01 21:49:24 +01:00 · 2017-11-01 21:49:24 +01:00 · b30dd36179
parent e033162a1d
commit b30dd36179
1 changed files with 23 additions and 10 deletions
--- a/spacy/pipeline.pyx
+++ b/spacy/pipeline.pyx
@ -431,18 +431,31 @@ class Tagger(Pipe):
    def Model(cls, n_tags, **cfg):
        return build_tagger_model(n_tags, **cfg)
-    def add_label(self, label):
+    def add_label(self, label, values=None):
        if label in self.labels:
            return 0
-        raise NotImplementedError
+        if self.model not in (True, False, None):
-        #if self.model not in (True, False, None):
+            # Here's how the model resizing will work, once the
-        #    smaller = self.model._layers[-1]
+            # neuron-to-tag mapping is no longer controlled by
-        #    larger = Softmax(len(self.labels)+1, smaller.nI)
+            # the Morphology class, which sorts the tag names.
-        #    copy_array(larger.W[:smaller.nO], smaller.W)
+            # The sorting makes adding labels difficult.
-        #    copy_array(larger.b[:smaller.nO], smaller.b)
+            # smaller = self.model._layers[-1]
-        #    self.model._layers[-1] = larger
+            # larger = Softmax(len(self.labels)+1, smaller.nI)
-        #self.labels.append(label)
+            # copy_array(larger.W[:smaller.nO], smaller.W)
-        #return 1
+            # copy_array(larger.b[:smaller.nO], smaller.b)
            # self.model._layers[-1] = larger
            raise ValueError(
                "Resizing pre-trained Tagger models is not "
                "currently supported.")
        tag_map = dict(self.vocab.morphology.tag_map)
        if values is None:
            values = {POS: "X"}
        tag_map[label] = values
        self.vocab.morphology = Morphology(
            self.vocab.strings, tag_map=tag_map,
            lemmatizer=self.vocab.morphology.lemmatizer,
            exc=self.vocab.morphology.exc)
        return 1
    def use_params(self, params):
        with self.model.use_params(params):