From f3f7afa21f05b19f84f2bc1692253f5b8ff5410a Mon Sep 17 00:00:00 2001 From: svlandeg Date: Mon, 20 Sep 2021 15:49:31 +0200 Subject: [PATCH] raise E983 early on in docbin init --- spacy/errors.py | 2 +- spacy/tokens/_serialize.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/spacy/errors.py b/spacy/errors.py index a206826ff..d1b33b1c9 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -792,7 +792,7 @@ class Errors: "to token boundaries.") E982 = ("The `Token.ent_iob` attribute should be an integer indexing " "into {values}, but found {value}.") - E983 = ("Invalid key for '{dict}': {key}. Available keys: " + E983 = ("Invalid key(s) for '{dict}': {key}. Available keys: " "{keys}") E984 = ("Invalid component config for '{name}': component block needs either " "a key `factory` specifying the registered function used to " diff --git a/spacy/tokens/_serialize.py b/spacy/tokens/_serialize.py index 868eb3eab..064885b3b 100644 --- a/spacy/tokens/_serialize.py +++ b/spacy/tokens/_serialize.py @@ -8,7 +8,7 @@ from thinc.api import NumpyOps from .doc import Doc from ..vocab import Vocab from ..compat import copy_reg -from ..attrs import SPACY, ORTH, intify_attr +from ..attrs import SPACY, ORTH, intify_attr, IDS from ..errors import Errors from ..util import ensure_path, SimpleFrozenList @@ -64,7 +64,11 @@ class DocBin: DOCS: https://spacy.io/api/docbin#init """ - attrs = sorted([intify_attr(attr) for attr in attrs]) + try: + attrs = sorted([intify_attr(attr) for attr in attrs]) + except TypeError: + non_valid = [attr for attr in attrs if intify_attr(attr) is None] + raise KeyError(Errors.E983.format(dict="attrs", key=non_valid, keys=IDS.keys())) from None self.version = "0.1" self.attrs = [attr for attr in attrs if attr != ORTH and attr != SPACY] self.attrs.insert(0, ORTH) # Ensure ORTH is always attrs[0]