Merge pull request #6720 from adrianeboyd/feature/improved-init-training-config-validation

This commit is contained in:
Ines Montani 2021-01-15 11:45:24 +11:00 committed by GitHub
commit e8a97a2bd6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 12 additions and 1 deletions

View File

@ -7,7 +7,7 @@ import typer
from ._util import Arg, Opt, show_validation_error, parse_config_overrides
from ._util import import_code, debug_cli
from ..schemas import ConfigSchemaTraining
from ..schemas import ConfigSchemaInit, ConfigSchemaTraining
from ..util import registry
from .. import util
@ -55,6 +55,11 @@ def debug_config(
config = util.load_config(config_path, overrides=overrides)
nlp = util.load_model_from_config(config)
config = nlp.config.interpolate()
msg.divider("Config validation for [initialize]")
with show_validation_error(config_path):
T = registry.resolve(config["initialize"], schema=ConfigSchemaInit)
msg.divider("Config validation for [training]")
with show_validation_error(config_path):
T = registry.resolve(config["training"], schema=ConfigSchemaTraining)
dot_names = [T["train_corpus"], T["dev_corpus"]]
util.resolve_dot_names(config, dot_names)

View File

@ -730,6 +730,8 @@ class Errors:
"DocBin (.spacy) format. If your data is in spaCy v2's JSON "
"training format, convert it using `python -m spacy convert "
"file.json .`.")
E1015 = ("Can't initialize model from config: no {value} found. For more "
"information, run: python -m spacy debug config config.cfg")
# Deprecated model shortcuts, only used in errors and warnings

View File

@ -23,6 +23,10 @@ if TYPE_CHECKING:
def init_nlp(config: Config, *, use_gpu: int = -1) -> "Language":
raw_config = config
config = raw_config.interpolate()
if "seed" not in config["training"]:
raise ValueError(Errors.E1015.format(value="[training] seed"))
if "gpu_allocator" not in config["training"]:
raise ValueError(Errors.E1015.format(value="[training] gpu_allocator"))
if config["training"]["seed"] is not None:
fix_random_seed(config["training"]["seed"])
allocator = config["training"]["gpu_allocator"]