Merge pull request #5516 from explosion/feature/improve-model-version-deps

This commit is contained in:
Ines Montani 2020-05-31 12:54:01 +02:00 committed by GitHub
commit b5ae2edcba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 87 additions and 43 deletions

View File

@ -13,8 +13,11 @@ numpy>=1.15.0
requests>=2.13.0,<3.0.0
plac>=0.9.6,<1.2.0
tqdm>=4.38.0,<5.0.0
importlib_metadata>=0.20; python_version < "3.8"
pydantic>=1.3.0,<2.0.0
# Official Python utilities
setuptools
packaging
importlib_metadata>=0.20; python_version < "3.8"
# Development dependencies
cython>=0.25
pytest>=4.6.5

View File

@ -50,11 +50,13 @@ install_requires =
ml_datasets>=0.1.1
# Third-party dependencies
tqdm>=4.38.0,<5.0.0
setuptools
numpy>=1.15.0
plac>=0.9.6,<1.2.0
requests>=2.13.0,<3.0.0
pydantic>=1.3.0,<2.0.0
# Official Python utilities
setuptools
packaging
importlib_metadata>=0.20; python_version < "3.8"
[options.extras_require]

View File

@ -5,7 +5,7 @@ import sys
from wasabi import msg
from .. import about
from ..util import is_package
from ..util import is_package, get_base_version
def download(
@ -63,8 +63,7 @@ def get_json(url, desc):
def get_compatibility():
version = about.__version__
version = version.rsplit(".dev", 1)[0]
version = get_base_version(about.__version__)
comp_table = get_json(about.__compatibility__, "compatibility table")
comp = comp_table["spacy"]
if version not in comp:
@ -73,7 +72,7 @@ def get_compatibility():
def get_version(model, comp):
model = model.rsplit(".dev", 1)[0]
model = get_base_version(model)
if model not in comp:
msg.fail(
f"No compatible model found for '{model}' (spaCy v{about.__version__})",

View File

@ -90,7 +90,7 @@ def generate_meta(model_path, existing_meta, msg):
("license", "License", meta.get("license", "MIT")),
]
nlp = util.load_model_from_path(Path(model_path))
meta["spacy_version"] = about.__version__
meta["spacy_version"] = util.get_model_version_range(about.__version__)
meta["pipeline"] = nlp.pipe_names
meta["vectors"] = {
"width": nlp.vocab.vectors_length,

View File

@ -467,7 +467,6 @@ def train(
# Update model meta.json
meta["lang"] = nlp.lang
meta["pipeline"] = nlp.pipe_names
meta["spacy_version"] = about.__version__
if beam_width == 1:
meta["speed"] = {
"nwords": nwords,

View File

@ -4,8 +4,8 @@ import requests
from wasabi import msg
from .. import about
from ..util import get_package_version, get_installed_models, split_version
from ..util import get_package_path, get_model_meta, is_compatible_model
from ..util import get_package_version, get_installed_models, get_base_version
from ..util import get_package_path, get_model_meta, is_compatible_version
def validate():
@ -14,7 +14,7 @@ def validate():
with the installed models. Should be run after `pip install -U spacy`.
"""
model_pkgs, compat = get_model_pkgs()
spacy_version = about.__version__.rsplit(".dev", 1)[0]
spacy_version = get_base_version(about.__version__)
current_compat = compat.get(spacy_version, {})
if not current_compat:
msg.warn(f"No compatible models found for v{spacy_version} of spaCy")
@ -78,13 +78,12 @@ def get_model_pkgs():
version = get_package_version(pkg_name)
if package in compat:
is_compat = version in compat[package]
v_maj, v_min = split_version(about.__version__)
spacy_version = f"{v_maj}.{v_min}"
spacy_version = about.__version__
else:
model_path = get_package_path(package)
model_meta = get_model_meta(model_path)
is_compat = is_compatible_model(model_meta)
spacy_version = model_meta.get("spacy_version", "n/a")
is_compat = is_compatible_version(about.__version__, spacy_version)
pkgs[pkg_name] = {
"name": package,
"version": version,

View File

@ -104,6 +104,12 @@ class Warnings(object):
"string \"Field1=Value1,Value2|Field2=Value3\".")
# TODO: fix numbering after merging develop into master
W095 = ("Model '{model}' ({model_version}) requires spaCy {version} and is "
"incompatible with the current version ({current}). This may lead "
"to unexpected results or runtime errors. To resolve this, "
"download a newer compatible model or retrain your custom model "
"with the current spaCy version. For more details and available "
"updates, run: python -m spacy validate")
W096 = ("The method 'disable_pipes' has become deprecated - use 'select_pipes' "
"instead.")
W097 = ("No Model config was provided to create the '{name}' component, "

View File

@ -191,13 +191,14 @@ class Language(object):
@property
def meta(self):
spacy_version = util.get_model_version_range(about.__version__)
if self.vocab.lang:
self._meta.setdefault("lang", self.vocab.lang)
else:
self._meta.setdefault("lang", self.lang)
self._meta.setdefault("name", "model")
self._meta.setdefault("version", "0.0.0")
self._meta.setdefault("spacy_version", about.__version__)
self._meta.setdefault("spacy_version", spacy_version)
self._meta.setdefault("description", "")
self._meta.setdefault("author", "")
self._meta.setdefault("email", "")

View File

@ -94,8 +94,18 @@ def test_ascii_filenames():
@pytest.mark.parametrize(
"version,compatible",
[(spacy_version, True), ("2.0.0", False), (">=1.2.3,<4.5.6", False)],
"version,constraint,compatible",
[
(spacy_version, spacy_version, True),
(spacy_version, f">={spacy_version}", True),
("3.0.0", "2.0.0", False),
("3.2.1", ">=2.0.0", True),
("2.2.10a1", ">=1.0.0,<2.1.1", False),
("3.0.0.dev3", ">=1.2.3,<4.5.6", True),
("n/a", ">=1.2.3,<4.5.6", None),
("1.2.3", "n/a", None),
("n/a", "n/a", None),
],
)
def test_is_compatible_model(version, compatible):
assert util.is_compatible_model({"spacy_version": version}) is compatible
def test_is_compatible_version(version, constraint, compatible):
assert util.is_compatible_version(version, constraint) is compatible

View File

@ -14,6 +14,8 @@ import srsly
import catalogue
import sys
import warnings
from packaging.specifiers import SpecifierSet, InvalidSpecifier
from packaging.version import Version, InvalidVersion
try:
@ -236,33 +238,46 @@ def get_package_version(name):
return None
def split_version(version):
"""RETURNS (tuple): Two integers, the major and minor spaCy version."""
pieces = version.split(".", 3)
return int(pieces[0]), int(pieces[1])
def is_compatible_version(version, constraint, prereleases=True):
"""Check if a version (e.g. "2.0.0") is compatible given a version
constraint (e.g. ">=1.9.0,<2.2.1"). If the constraint is a specific version,
it's interpreted as =={version}.
def is_compatible_model(meta):
"""Check if a model is compatible with the current version of spaCy, based
on its meta.json. We compare the version of spaCy the model was created with
with the current version. If the minor version is different, it's considered
incompatible.
meta (dict): The model's meta.
RETURNS (bool / None): Whether the model is compatible with the current
spaCy or None if we don't have enough info.
version (str): The version to check.
constraint (str): The constraint string.
prereleases (bool): Whether to allow prereleases. If set to False,
prerelease versions will be considered incompatible.
RETURNS (bool / None): Whether the version is compatible, or None if the
version or constraint are invalid.
"""
cur_v = about.__version__
pkg_v = meta.get("spacy_version")
if not pkg_v or not isinstance(pkg_v, str):
# Handle cases where exact version is provided as constraint
if constraint[0].isdigit():
constraint = f"=={constraint}"
try:
spec = SpecifierSet(constraint)
version = Version(version)
except (InvalidSpecifier, InvalidVersion):
return None
# Handle spacy_version values like >=x,<y, just in case
pkg_v = re.sub(r"[^0-9.]", "", pkg_v.split(",")[0])
cur_major, cur_minor = split_version(cur_v)
pkg_major, pkg_minor = split_version(pkg_v)
if cur_major != pkg_major or cur_minor != pkg_minor:
return False
return True
spec.prereleases = prereleases
return version in spec
def get_model_version_range(spacy_version):
"""Generate a version range like >=1.2.3,<1.3.0 based on a given spaCy
version. Models are always compatible across patch versions but not
across minor or major versions.
"""
release = Version(spacy_version).release
return f">={spacy_version},<{release[0]}.{release[1] + 1}.0"
def get_base_version(version):
"""Generate the base version without any prerelease identifiers.
version (str): The version, e.g. "3.0.0.dev1".
RETURNS (str): The base version, e.g. "3.0.0".
"""
return Version(version).base_version
def load_config(path, create_objects=False):
@ -315,6 +330,16 @@ def get_model_meta(path):
for setting in ["lang", "name", "version"]:
if setting not in meta or not meta[setting]:
raise ValueError(Errors.E054.format(setting=setting))
if "spacy_version" in meta:
if not is_compatible_version(about.__version__, meta["spacy_version"]):
warnings.warn(
Warnings.W095.format(
model=f"{meta['lang']}_{meta['name']}",
model_version=meta["version"],
version=meta["spacy_version"],
current=about.__version__,
)
)
return meta