From 73e095923f93b81fe40b80c384be644d2a829749 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 24 Jul 2019 11:27:34 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=92=AB=20Improve=20error=20message=20when?= =?UTF-8?q?=20model.from=5Fbytes()=20dies=20(#4014)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Improve error message when model.from_bytes() dies When Thinc's model.from_bytes() is called with a mismatched model, often we get a particularly ungraceful error, e.g. "AttributeError: FunctionLayer has no attribute G" This is because we're trying to load the parameters for something like a LayerNorm layer, and the model architecture has some other layer there instead. This is obviously terrible, especially since the error *type* is wrong. I've changed it to raise a ValueError. The error message is still probably a bit terse, but it's hard to be sure exactly what's gone wrong. * Update spacy/pipeline/pipes.pyx * Update spacy/pipeline/pipes.pyx * Update spacy/pipeline/pipes.pyx * Update spacy/syntax/nn_parser.pyx * Update spacy/syntax/nn_parser.pyx * Update spacy/pipeline/pipes.pyx Co-Authored-By: Matthew Honnibal * Update spacy/pipeline/pipes.pyx Co-Authored-By: Matthew Honnibal Co-authored-by: Ines Montani --- spacy/errors.py | 3 ++- spacy/pipeline/pipes.pyx | 27 +++++++++++++++++++++------ spacy/syntax/nn_parser.pyx | 10 ++++++++-- 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/spacy/errors.py b/spacy/errors.py index 4af8b756c..1699809a7 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -413,7 +413,8 @@ class Errors(object): "This is likely a bug in spaCy, so feel free to open an issue.") E148 = ("Expected {ents} KB identifiers but got {ids}. Make sure that each entity in `doc.ents` " "is assigned to a KB identifier.") - + E149 = ("Error deserializing model. Check that the config used to create the " + "component matches the model being loaded.") @add_codes class TempErrors(object): diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx index 609c4e852..ca166607f 100644 --- a/spacy/pipeline/pipes.pyx +++ b/spacy/pipeline/pipes.pyx @@ -167,7 +167,10 @@ class Pipe(object): self.cfg["pretrained_vectors"] = self.vocab.vectors.name if self.model is True: self.model = self.Model(**self.cfg) - self.model.from_bytes(b) + try: + self.model.from_bytes(b) + except AttributeError: + raise ValueError(Errors.E149) deserialize = OrderedDict() deserialize["cfg"] = lambda b: self.cfg.update(srsly.json_loads(b)) @@ -196,7 +199,10 @@ class Pipe(object): self.cfg["pretrained_vectors"] = self.vocab.vectors.name if self.model is True: self.model = self.Model(**self.cfg) - self.model.from_bytes(p.open("rb").read()) + try: + self.model.from_bytes(p.open("rb").read()) + except AttributeError: + raise ValueError(Errors.E149) deserialize = OrderedDict() deserialize["cfg"] = lambda p: self.cfg.update(_load_cfg(p)) @@ -562,7 +568,10 @@ class Tagger(Pipe): "token_vector_width", self.cfg.get("token_vector_width", 96)) self.model = self.Model(self.vocab.morphology.n_tags, **self.cfg) - self.model.from_bytes(b) + try: + self.model.from_bytes(b) + except AttributeError: + raise ValueError(Errors.E149) def load_tag_map(b): tag_map = srsly.msgpack_loads(b) @@ -600,7 +609,10 @@ class Tagger(Pipe): if self.model is True: self.model = self.Model(self.vocab.morphology.n_tags, **self.cfg) with p.open("rb") as file_: - self.model.from_bytes(file_.read()) + try: + self.model.from_bytes(file_.read()) + except AttributeError: + raise ValueError(Errors.E149) def load_tag_map(p): tag_map = srsly.read_msgpack(p) @@ -1315,9 +1327,12 @@ class EntityLinker(Pipe): def from_disk(self, path, exclude=tuple(), **kwargs): def load_model(p): - if self.model is True: + if self.model is True: self.model = self.Model(**self.cfg) - self.model.from_bytes(p.open("rb").read()) + try: + self.model.from_bytes(p.open("rb").read()) + except AttributeError: + raise ValueError(Errors.E149) def load_kb(p): kb = KnowledgeBase(vocab=self.vocab, entity_vector_length=self.cfg["entity_width"]) diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index a6a476901..fa1a41fa4 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -631,7 +631,10 @@ cdef class Parser: cfg = {} with (path / 'model').open('rb') as file_: bytes_data = file_.read() - self.model.from_bytes(bytes_data) + try: + self.model.from_bytes(bytes_data) + except AttributeError: + raise ValueError(Errors.E149) self.cfg.update(cfg) return self @@ -663,6 +666,9 @@ cdef class Parser: else: cfg = {} if 'model' in msg: - self.model.from_bytes(msg['model']) + try: + self.model.from_bytes(msg['model']) + except AttributeError: + raise ValueError(Errors.E149) self.cfg.update(cfg) return self