diff --git a/spacy/errors.py b/spacy/errors.py index 54f206339..2713f76c8 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -239,6 +239,11 @@ class Errors(object): "existing extension, set `force=True` on `{obj}.set_extension`.") E091 = ("Invalid extension attribute {name}: expected callable or None, " "but got: {value}") + E092 = ("Could not find or assign name for word vectors. Ususally, the " + "name is read from the model's meta.json in vector.name. " + "Alternatively, it is built from the 'lang' and 'name' keys in " + "the meta.json. Vector names are required to avoid issue #1660.") + E093 = ("token.ent_iob values make invalid sequence: I without B\n{seq}") @add_codes @@ -252,6 +257,10 @@ class TempErrors(object): T006 = ("Currently history width is hard-coded to 0. Received: {value}.") T007 = ("Can't yet set {attr} from Span. Vote for this feature on the " "issue tracker: http://github.com/explosion/spaCy/issues") + T008 = ("Bad configuration of Tagger. This is probably a bug within " + "spaCy. We changed the name of an internal attribute for loading " + "pre-trained vectors, and the class has been passed the old name " + "(pretrained_dims) but not the new name (pretrained_vectors).") class ModelsWarning(UserWarning): diff --git a/spacy/language.py b/spacy/language.py index 8d2122c40..5f2b52e93 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -707,7 +707,7 @@ def _fix_pretrained_vectors_name(nlp): vectors_name = '%s_%s.vectors' % (nlp.meta['lang'], nlp.meta['name']) nlp.vocab.vectors.name = vectors_name else: - raise ValueError("Unnamed vectors") + raise ValueError(Errors.E092) for name, proc in nlp.pipeline: if not hasattr(proc, 'cfg'): continue diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx index 41b71a13b..e0cc1951d 100644 --- a/spacy/pipeline.pyx +++ b/spacy/pipeline.pyx @@ -207,7 +207,7 @@ class Pipe(object): def load_model(b): # TODO: Remove this once we don't have to handle previous models if 'pretrained_dims' in self.cfg and 'pretrained_vectors' not in self.cfg: - self.cfg['pretrained_vectors'] = self.vocab.vectors.name + self.cfg['pretrained_vectors'] = self.vocab.vectors.name if self.model is True: self.model = self.Model(**self.cfg) self.model.from_bytes(b) @@ -234,7 +234,7 @@ class Pipe(object): def load_model(p): # TODO: Remove this once we don't have to handle previous models if 'pretrained_dims' in self.cfg and 'pretrained_vectors' not in self.cfg: - self.cfg['pretrained_vectors'] = self.vocab.vectors.name + self.cfg['pretrained_vectors'] = self.vocab.vectors.name if self.model is True: self.model = self.Model(**self.cfg) self.model.from_bytes(p.open('rb').read()) @@ -531,12 +531,7 @@ class Tagger(Pipe): @classmethod def Model(cls, n_tags, **cfg): if cfg.get('pretrained_dims') and not cfg.get('pretrained_vectors'): - raise ValueError( - "Bad configuration of Tagger --- this is probably a bug " - "within spaCy. We changed the name of an internal attribute " - "for loading pre-trained vectors, and the class has been " - "passed the old name (pretrained_dims) but not the new name " - "(pretrained_vectors)") + raise ValueError(TempErrors.T008) return build_tagger_model(n_tags, **cfg) def add_label(self, label, values=None): @@ -584,8 +579,8 @@ class Tagger(Pipe): def load_model(b): # TODO: Remove this once we don't have to handle previous models if 'pretrained_dims' in self.cfg and 'pretrained_vectors' not in self.cfg: - self.cfg['pretrained_vectors'] = self.vocab.vectors.name - + self.cfg['pretrained_vectors'] = self.vocab.vectors.name + if self.model is True: token_vector_width = util.env_opt( 'token_vector_width', diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index c81ace212..b89d6a1c7 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -430,10 +430,7 @@ cdef class Doc: if token.ent_iob == 1: if start == -1: seq = ['%s|%s' % (t.text, t.ent_iob_) for t in self[i-5:i+5]] - raise ValueError( - "token.ent_iob values make invalid sequence: " - "I without B\n" - "{seq}".format(seq=' '.join(seq))) + raise ValueError(Errors.E093.format(seq=' '.join(seq))) elif token.ent_iob == 2 or token.ent_iob == 0: if start != -1: output.append(Span(self, start, i, label=label))