raise E983 early on in docbin init

This commit is contained in:
svlandeg 2021-09-20 15:49:31 +02:00
parent 79c7c62970
commit f3f7afa21f
2 changed files with 7 additions and 3 deletions

View File

@ -792,7 +792,7 @@ class Errors:
"to token boundaries.") "to token boundaries.")
E982 = ("The `Token.ent_iob` attribute should be an integer indexing " E982 = ("The `Token.ent_iob` attribute should be an integer indexing "
"into {values}, but found {value}.") "into {values}, but found {value}.")
E983 = ("Invalid key for '{dict}': {key}. Available keys: " E983 = ("Invalid key(s) for '{dict}': {key}. Available keys: "
"{keys}") "{keys}")
E984 = ("Invalid component config for '{name}': component block needs either " E984 = ("Invalid component config for '{name}': component block needs either "
"a key `factory` specifying the registered function used to " "a key `factory` specifying the registered function used to "

View File

@ -8,7 +8,7 @@ from thinc.api import NumpyOps
from .doc import Doc from .doc import Doc
from ..vocab import Vocab from ..vocab import Vocab
from ..compat import copy_reg from ..compat import copy_reg
from ..attrs import SPACY, ORTH, intify_attr from ..attrs import SPACY, ORTH, intify_attr, IDS
from ..errors import Errors from ..errors import Errors
from ..util import ensure_path, SimpleFrozenList from ..util import ensure_path, SimpleFrozenList
@ -64,7 +64,11 @@ class DocBin:
DOCS: https://spacy.io/api/docbin#init DOCS: https://spacy.io/api/docbin#init
""" """
try:
attrs = sorted([intify_attr(attr) for attr in attrs]) attrs = sorted([intify_attr(attr) for attr in attrs])
except TypeError:
non_valid = [attr for attr in attrs if intify_attr(attr) is None]
raise KeyError(Errors.E983.format(dict="attrs", key=non_valid, keys=IDS.keys())) from None
self.version = "0.1" self.version = "0.1"
self.attrs = [attr for attr in attrs if attr != ORTH and attr != SPACY] self.attrs = [attr for attr in attrs if attr != ORTH and attr != SPACY]
self.attrs.insert(0, ORTH) # Ensure ORTH is always attrs[0] self.attrs.insert(0, ORTH) # Ensure ORTH is always attrs[0]