Raise E983 early on in docbin init (#9247)

* raise E983 early on in docbin init

* catch situation before error is raised

* add more info on the spacy debug command
This commit is contained in:
Sofie Van Landeghem 2021-09-27 20:43:03 +02:00 committed by GitHub
parent effae12cbd
commit a361df00cd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 10 additions and 4 deletions

View File

@ -657,7 +657,9 @@ class Errors:
"{nO} - cannot add any more labels.") "{nO} - cannot add any more labels.")
E923 = ("It looks like there is no proper sample data to initialize the " E923 = ("It looks like there is no proper sample data to initialize the "
"Model of component '{name}'. To check your input data paths and " "Model of component '{name}'. To check your input data paths and "
"annotation, run: python -m spacy debug data config.cfg") "annotation, run: python -m spacy debug data config.cfg "
"and include the same config override values you would specify "
"for the 'spacy train' command.")
E924 = ("The '{name}' component does not seem to be initialized properly. " E924 = ("The '{name}' component does not seem to be initialized properly. "
"This is likely a bug in spaCy, so feel free to open an issue: " "This is likely a bug in spaCy, so feel free to open an issue: "
"https://github.com/explosion/spaCy/issues") "https://github.com/explosion/spaCy/issues")
@ -792,7 +794,7 @@ class Errors:
"to token boundaries.") "to token boundaries.")
E982 = ("The `Token.ent_iob` attribute should be an integer indexing " E982 = ("The `Token.ent_iob` attribute should be an integer indexing "
"into {values}, but found {value}.") "into {values}, but found {value}.")
E983 = ("Invalid key for '{dict}': {key}. Available keys: " E983 = ("Invalid key(s) for '{dict}': {key}. Available keys: "
"{keys}") "{keys}")
E984 = ("Invalid component config for '{name}': component block needs either " E984 = ("Invalid component config for '{name}': component block needs either "
"a key `factory` specifying the registered function used to " "a key `factory` specifying the registered function used to "

View File

@ -8,7 +8,7 @@ from thinc.api import NumpyOps
from .doc import Doc from .doc import Doc
from ..vocab import Vocab from ..vocab import Vocab
from ..compat import copy_reg from ..compat import copy_reg
from ..attrs import SPACY, ORTH, intify_attr from ..attrs import SPACY, ORTH, intify_attr, IDS
from ..errors import Errors from ..errors import Errors
from ..util import ensure_path, SimpleFrozenList from ..util import ensure_path, SimpleFrozenList
@ -64,7 +64,11 @@ class DocBin:
DOCS: https://spacy.io/api/docbin#init DOCS: https://spacy.io/api/docbin#init
""" """
attrs = sorted([intify_attr(attr) for attr in attrs]) int_attrs = [intify_attr(attr) for attr in attrs]
if None in int_attrs:
non_valid = [attr for attr in attrs if intify_attr(attr) is None]
raise KeyError(Errors.E983.format(dict="attrs", key=non_valid, keys=IDS.keys())) from None
attrs = sorted(int_attrs)
self.version = "0.1" self.version = "0.1"
self.attrs = [attr for attr in attrs if attr != ORTH and attr != SPACY] self.attrs = [attr for attr in attrs if attr != ORTH and attr != SPACY]
self.attrs.insert(0, ORTH) # Ensure ORTH is always attrs[0] self.attrs.insert(0, ORTH) # Ensure ORTH is always attrs[0]