diff --git a/spacy/cli/debug_config.py b/spacy/cli/debug_config.py index ed0a43921..5f9759c8c 100644 --- a/spacy/cli/debug_config.py +++ b/spacy/cli/debug_config.py @@ -7,7 +7,7 @@ import typer from ._util import Arg, Opt, show_validation_error, parse_config_overrides from ._util import import_code, debug_cli -from ..schemas import ConfigSchemaTraining +from ..schemas import ConfigSchemaInit, ConfigSchemaTraining from ..util import registry from .. import util @@ -55,6 +55,11 @@ def debug_config( config = util.load_config(config_path, overrides=overrides) nlp = util.load_model_from_config(config) config = nlp.config.interpolate() + msg.divider("Config validation for [initialize]") + with show_validation_error(config_path): + T = registry.resolve(config["initialize"], schema=ConfigSchemaInit) + msg.divider("Config validation for [training]") + with show_validation_error(config_path): T = registry.resolve(config["training"], schema=ConfigSchemaTraining) dot_names = [T["train_corpus"], T["dev_corpus"]] util.resolve_dot_names(config, dot_names) diff --git a/spacy/errors.py b/spacy/errors.py index a286a5ac3..8cbcbe6d9 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -730,6 +730,8 @@ class Errors: "DocBin (.spacy) format. If your data is in spaCy v2's JSON " "training format, convert it using `python -m spacy convert " "file.json .`.") + E1015 = ("Can't initialize model from config: no {value} found. For more " + "information, run: python -m spacy debug config config.cfg") # Deprecated model shortcuts, only used in errors and warnings diff --git a/spacy/training/initialize.py b/spacy/training/initialize.py index e8a0a46de..1947e7c27 100644 --- a/spacy/training/initialize.py +++ b/spacy/training/initialize.py @@ -23,6 +23,10 @@ if TYPE_CHECKING: def init_nlp(config: Config, *, use_gpu: int = -1) -> "Language": raw_config = config config = raw_config.interpolate() + if "seed" not in config["training"]: + raise ValueError(Errors.E1015.format(value="[training] seed")) + if "gpu_allocator" not in config["training"]: + raise ValueError(Errors.E1015.format(value="[training] gpu_allocator")) if config["training"]["seed"] is not None: fix_random_seed(config["training"]["seed"]) allocator = config["training"]["gpu_allocator"]