From a361df00cd65dd829a07d1256476dfd9639be4a9 Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Mon, 27 Sep 2021 20:43:03 +0200 Subject: [PATCH] Raise E983 early on in docbin init (#9247) * raise E983 early on in docbin init * catch situation before error is raised * add more info on the spacy debug command --- spacy/errors.py | 6 ++++-- spacy/tokens/_serialize.py | 8 ++++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/spacy/errors.py b/spacy/errors.py index 135aacf92..b6659a041 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -657,7 +657,9 @@ class Errors: "{nO} - cannot add any more labels.") E923 = ("It looks like there is no proper sample data to initialize the " "Model of component '{name}'. To check your input data paths and " - "annotation, run: python -m spacy debug data config.cfg") + "annotation, run: python -m spacy debug data config.cfg " + "and include the same config override values you would specify " + "for the 'spacy train' command.") E924 = ("The '{name}' component does not seem to be initialized properly. " "This is likely a bug in spaCy, so feel free to open an issue: " "https://github.com/explosion/spaCy/issues") @@ -792,7 +794,7 @@ class Errors: "to token boundaries.") E982 = ("The `Token.ent_iob` attribute should be an integer indexing " "into {values}, but found {value}.") - E983 = ("Invalid key for '{dict}': {key}. Available keys: " + E983 = ("Invalid key(s) for '{dict}': {key}. Available keys: " "{keys}") E984 = ("Invalid component config for '{name}': component block needs either " "a key `factory` specifying the registered function used to " diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py index 868eb3eab..5be66c801 100644 --- a/spacy/tokens/_serialize.py +++ b/spacy/tokens/_serialize.py @@ -8,7 +8,7 @@ from thinc.api import NumpyOps from .doc import Doc from ..vocab import Vocab from ..compat import copy_reg -from ..attrs import SPACY, ORTH, intify_attr +from ..attrs import SPACY, ORTH, intify_attr, IDS from ..errors import Errors from ..util import ensure_path, SimpleFrozenList @@ -64,7 +64,11 @@ class DocBin: DOCS: https://spacy.io/api/docbin#init """ - attrs = sorted([intify_attr(attr) for attr in attrs]) + int_attrs = [intify_attr(attr) for attr in attrs] + if None in int_attrs: + non_valid = [attr for attr in attrs if intify_attr(attr) is None] + raise KeyError(Errors.E983.format(dict="attrs", key=non_valid, keys=IDS.keys())) from None + attrs = sorted(int_attrs) self.version = "0.1" self.attrs = [attr for attr in attrs if attr != ORTH and attr != SPACY] self.attrs.insert(0, ORTH) # Ensure ORTH is always attrs[0]