From 4fd087572a1c597781fef8ca4fbcfebed825c0fb Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Thu, 28 May 2020 12:51:37 +0200 Subject: [PATCH 1/6] WIP: improve model version deps --- spacy/cli/package.py | 2 +- spacy/util.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/spacy/cli/package.py b/spacy/cli/package.py index cf93c872f..15ae2033c 100644 --- a/spacy/cli/package.py +++ b/spacy/cli/package.py @@ -138,7 +138,7 @@ def list_files(data_dir): def list_requirements(meta): parent_package = meta.get('parent_package', 'spacy') - requirements = [parent_package + meta['spacy_version']] + requirements = [parent_package + '>=' + meta['spacy_version']] if 'setup_requires' in meta: requirements += meta['setup_requires'] if 'requirements' in meta: diff --git a/spacy/util.py b/spacy/util.py index b614c29c7..4e468ef9d 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -265,6 +265,15 @@ def is_compatible_model(meta): return True +def get_model_version_range(version): + """Generate a version range like >=1.2.3,<1.3.0 based on a given spaCy + version. Models are always compatible across patch versions but not + across minor or major versions. + """ + major, minor = split_version(version) + return f">={version},<{major}.{minor + 1}.0" + + def load_config(path, create_objects=False): """Load a Thinc-formatted config file, optionally filling in objects where the config references registry entries. See "Thinc config files" for details. From bed62991add4ff12282a00dd1d321441878b27ef Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 30 May 2020 14:59:55 +0200 Subject: [PATCH 2/6] Tidy up requirements --- requirements.txt | 5 ++++- setup.cfg | 7 ++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index add083a05..a104b68ba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,8 +13,11 @@ numpy>=1.15.0 requests>=2.13.0,<3.0.0 plac>=0.9.6,<1.2.0 tqdm>=4.38.0,<5.0.0 -importlib_metadata>=0.20; python_version < "3.8" pydantic>=1.3.0,<2.0.0 +# Official Python utilities +setuptools +packaging +importlib_metadata>=0.20; python_version < "3.8" # Development dependencies cython>=0.25 pytest>=4.6.5 diff --git a/setup.cfg b/setup.cfg index eb7608c4e..ae09d071c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -47,15 +47,16 @@ install_requires = wasabi>=0.4.0,<1.1.0 srsly>=2.0.0,<3.0.0 catalogue>=0.0.7,<1.1.0 - ml_datasets + ml_datasets>=0.1.1 # Third-party dependencies tqdm>=4.38.0,<5.0.0 - setuptools numpy>=1.15.0 plac>=0.9.6,<1.2.0 requests>=2.13.0,<3.0.0 pydantic>=1.3.0,<2.0.0 - tqdm>=4.38.0,<5.0.0 + # Official Python utilities + setuptools + packaging importlib_metadata>=0.20; python_version < "3.8" [options.extras_require] From e47e5a4b10e0d3c5b6fed255040cebc019173e39 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 30 May 2020 15:01:58 +0200 Subject: [PATCH 3/6] Use more sophisticated version parsing logic --- spacy/cli/download.py | 7 +++--- spacy/cli/package.py | 4 ++-- spacy/cli/validate.py | 9 ++++---- spacy/language.py | 3 ++- spacy/tests/test_misc.py | 12 ++++++++-- spacy/util.py | 49 ++++++++++++++++------------------------ 6 files changed, 41 insertions(+), 43 deletions(-) diff --git a/spacy/cli/download.py b/spacy/cli/download.py index af132bbbe..3d56822a5 100644 --- a/spacy/cli/download.py +++ b/spacy/cli/download.py @@ -5,7 +5,7 @@ import sys from wasabi import msg from .. import about -from ..util import is_package +from ..util import is_package, get_base_version def download( @@ -63,8 +63,7 @@ def get_json(url, desc): def get_compatibility(): - version = about.__version__ - version = version.rsplit(".dev", 1)[0] + version = get_base_version(about.__version__) comp_table = get_json(about.__compatibility__, "compatibility table") comp = comp_table["spacy"] if version not in comp: @@ -73,7 +72,7 @@ def get_compatibility(): def get_version(model, comp): - model = model.rsplit(".dev", 1)[0] + model = get_base_version(model) if model not in comp: msg.fail( f"No compatible model found for '{model}' (spaCy v{about.__version__})", diff --git a/spacy/cli/package.py b/spacy/cli/package.py index 15ae2033c..153e61ba3 100644 --- a/spacy/cli/package.py +++ b/spacy/cli/package.py @@ -90,7 +90,7 @@ def generate_meta(model_path, existing_meta, msg): ("license", "License", meta.get("license", "MIT")), ] nlp = util.load_model_from_path(Path(model_path)) - meta["spacy_version"] = about.__version__ + meta["spacy_version"] = util.get_model_version_range(about.__version__) meta["pipeline"] = nlp.pipe_names meta["vectors"] = { "width": nlp.vocab.vectors_length, @@ -138,7 +138,7 @@ def list_files(data_dir): def list_requirements(meta): parent_package = meta.get('parent_package', 'spacy') - requirements = [parent_package + '>=' + meta['spacy_version']] + requirements = [parent_package + meta['spacy_version']] if 'setup_requires' in meta: requirements += meta['setup_requires'] if 'requirements' in meta: diff --git a/spacy/cli/validate.py b/spacy/cli/validate.py index c39cadc7b..3c49abb3e 100644 --- a/spacy/cli/validate.py +++ b/spacy/cli/validate.py @@ -4,7 +4,7 @@ import requests from wasabi import msg from .. import about -from ..util import get_package_version, get_installed_models, split_version +from ..util import get_package_version, get_installed_models, get_base_version from ..util import get_package_path, get_model_meta, is_compatible_model @@ -14,7 +14,7 @@ def validate(): with the installed models. Should be run after `pip install -U spacy`. """ model_pkgs, compat = get_model_pkgs() - spacy_version = about.__version__.rsplit(".dev", 1)[0] + spacy_version = get_base_version(about.__version__) current_compat = compat.get(spacy_version, {}) if not current_compat: msg.warn(f"No compatible models found for v{spacy_version} of spaCy") @@ -78,13 +78,12 @@ def get_model_pkgs(): version = get_package_version(pkg_name) if package in compat: is_compat = version in compat[package] - v_maj, v_min = split_version(about.__version__) - spacy_version = f"{v_maj}.{v_min}" + spacy_version = about.__version__ else: model_path = get_package_path(package) model_meta = get_model_meta(model_path) - is_compat = is_compatible_model(model_meta) spacy_version = model_meta.get("spacy_version", "n/a") + is_compat = is_compatible_model(spacy_version) pkgs[pkg_name] = { "name": package, "version": version, diff --git a/spacy/language.py b/spacy/language.py index 551b8c9af..61d69b63e 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -191,13 +191,14 @@ class Language(object): @property def meta(self): + spacy_version = util.get_model_version_range(about.__version__) if self.vocab.lang: self._meta.setdefault("lang", self.vocab.lang) else: self._meta.setdefault("lang", self.lang) self._meta.setdefault("name", "model") self._meta.setdefault("version", "0.0.0") - self._meta.setdefault("spacy_version", about.__version__) + self._meta.setdefault("spacy_version", spacy_version) self._meta.setdefault("description", "") self._meta.setdefault("author", "") self._meta.setdefault("email", "") diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py index 9e67ae83b..9aa95c431 100644 --- a/spacy/tests/test_misc.py +++ b/spacy/tests/test_misc.py @@ -95,7 +95,15 @@ def test_ascii_filenames(): @pytest.mark.parametrize( "version,compatible", - [(spacy_version, True), ("2.0.0", False), (">=1.2.3,<4.5.6", False)], + [ + (spacy_version, True), + (f">={spacy_version}", True), + ("2.0.0", False), + (">=2.0.0", True), + (">=1.0.0,<2.1.1", False), + (">=1.2.3,<4.5.6", True), + ("n/a", None), + ], ) def test_is_compatible_model(version, compatible): - assert util.is_compatible_model({"spacy_version": version}) is compatible + assert util.is_compatible_model(version) is compatible diff --git a/spacy/util.py b/spacy/util.py index 4e468ef9d..835e46fc6 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -14,6 +14,8 @@ import srsly import catalogue import sys import warnings +from packaging.specifiers import SpecifierSet, InvalidSpecifier +from packaging.version import Version, InvalidVersion try: @@ -236,42 +238,31 @@ def get_package_version(name): return None -def split_version(version): - """RETURNS (tuple): Two integers, the major and minor spaCy version.""" - pieces = version.split(".", 3) - return int(pieces[0]), int(pieces[1]) - - -def is_compatible_model(meta): - """Check if a model is compatible with the current version of spaCy, based - on its meta.json. We compare the version of spaCy the model was created with - with the current version. If the minor version is different, it's considered - incompatible. - - meta (dict): The model's meta. - RETURNS (bool / None): Whether the model is compatible with the current - spaCy or None if we don't have enough info. - """ - cur_v = about.__version__ - pkg_v = meta.get("spacy_version") - if not pkg_v or not isinstance(pkg_v, str): +def is_compatible_model(constraint): + version = Version(about.__version__) + if constraint[0].isdigit(): + # Handle cases where exact version is provided as constraint + constraint = f"=={constraint}" + try: + spec = SpecifierSet(constraint) + except InvalidSpecifier: return None - # Handle spacy_version values like >=x,=1.2.3,<1.3.0 based on a given spaCy version. Models are always compatible across patch versions but not across minor or major versions. """ - major, minor = split_version(version) - return f">={version},<{major}.{minor + 1}.0" + release = Version(spacy_version).release + return f">={spacy_version},<{release[0]}.{release[1] + 1}.0" + + +def get_base_version(version): + return Version(version).base_version def load_config(path, create_objects=False): From a7e370bcbfd4234b53061a004c0b588e3ec76c06 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 30 May 2020 15:03:18 +0200 Subject: [PATCH 4/6] Don't override spaCy version --- spacy/cli/train.py | 1 - 1 file changed, 1 deletion(-) diff --git a/spacy/cli/train.py b/spacy/cli/train.py index c205fa5b2..590ce4f13 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -467,7 +467,6 @@ def train( # Update model meta.json meta["lang"] = nlp.lang meta["pipeline"] = nlp.pipe_names - meta["spacy_version"] = about.__version__ if beam_width == 1: meta["speed"] = { "nwords": nwords, From b7aff6020c34ecae3bb0891b469193d8772b8197 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 30 May 2020 15:18:53 +0200 Subject: [PATCH 5/6] Make functions more general purpose and update docstrings and tests --- spacy/cli/validate.py | 4 ++-- spacy/tests/test_misc.py | 22 ++++++++++++---------- spacy/util.py | 27 +++++++++++++++++++++------ 3 files changed, 35 insertions(+), 18 deletions(-) diff --git a/spacy/cli/validate.py b/spacy/cli/validate.py index 3c49abb3e..080cd77e2 100644 --- a/spacy/cli/validate.py +++ b/spacy/cli/validate.py @@ -5,7 +5,7 @@ from wasabi import msg from .. import about from ..util import get_package_version, get_installed_models, get_base_version -from ..util import get_package_path, get_model_meta, is_compatible_model +from ..util import get_package_path, get_model_meta, is_compatible_version def validate(): @@ -83,7 +83,7 @@ def get_model_pkgs(): model_path = get_package_path(package) model_meta = get_model_meta(model_path) spacy_version = model_meta.get("spacy_version", "n/a") - is_compat = is_compatible_model(spacy_version) + is_compat = is_compatible_version(about.__version__, spacy_version) pkgs[pkg_name] = { "name": package, "version": version, diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py index 9aa95c431..e4b4e570c 100644 --- a/spacy/tests/test_misc.py +++ b/spacy/tests/test_misc.py @@ -94,16 +94,18 @@ def test_ascii_filenames(): @pytest.mark.parametrize( - "version,compatible", + "version,constraint,compatible", [ - (spacy_version, True), - (f">={spacy_version}", True), - ("2.0.0", False), - (">=2.0.0", True), - (">=1.0.0,<2.1.1", False), - (">=1.2.3,<4.5.6", True), - ("n/a", None), + (spacy_version, spacy_version, True), + (spacy_version, f">={spacy_version}", True), + ("3.0.0", "2.0.0", False), + ("3.2.1", ">=2.0.0", True), + ("2.2.10a1", ">=1.0.0,<2.1.1", False), + ("3.0.0.dev3", ">=1.2.3,<4.5.6", True), + ("n/a", ">=1.2.3,<4.5.6", None), + ("1.2.3", "n/a", None), + ("n/a", "n/a", None), ], ) -def test_is_compatible_model(version, compatible): - assert util.is_compatible_model(version) is compatible +def test_is_compatible_version(version, constraint, compatible): + assert util.is_compatible_version(version, constraint) is compatible diff --git a/spacy/util.py b/spacy/util.py index 835e46fc6..741b289c1 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -238,17 +238,27 @@ def get_package_version(name): return None -def is_compatible_model(constraint): - version = Version(about.__version__) +def is_compatible_version(version, constraint, prereleases=True): + """Check if a version (e.g. "2.0.0") is compatible given a version + constraint (e.g. ">=1.9.0,<2.2.1"). If the constraint is a specific version, + it's interpreted as =={version}. + + version (str): The version to check. + constraint (str): The constraint string. + prereleases (bool): Whether to allow prereleases. If set to False, + prerelease versions will be considered incompatible. + RETURNS (bool / None): Whether the version is compatible, or None if the + version or constraint are invalid. + """ + # Handle cases where exact version is provided as constraint if constraint[0].isdigit(): - # Handle cases where exact version is provided as constraint constraint = f"=={constraint}" try: spec = SpecifierSet(constraint) - except InvalidSpecifier: + version = Version(version) + except (InvalidSpecifier, InvalidVersion): return None - # Allow prereleases and dev versions - spec.prereleases = True + spec.prereleases = prereleases return version in spec @@ -262,6 +272,11 @@ def get_model_version_range(spacy_version): def get_base_version(version): + """Generate the base version without any prerelease identifiers. + + version (str): The version, e.g. "3.0.0.dev1". + RETURNS (str): The base version, e.g. "3.0.0". + """ return Version(version).base_version From dc186afdc5b7f42dd32eeafb239b3d5604b8fbbd Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sat, 30 May 2020 15:34:54 +0200 Subject: [PATCH 6/6] Add warning --- spacy/errors.py | 6 ++++++ spacy/util.py | 10 ++++++++++ 2 files changed, 16 insertions(+) diff --git a/spacy/errors.py b/spacy/errors.py index 932bb1eff..da2cfdf04 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -104,6 +104,12 @@ class Warnings(object): "string \"Field1=Value1,Value2|Field2=Value3\".") # TODO: fix numbering after merging develop into master + W095 = ("Model '{model}' ({model_version}) requires spaCy {version} and is " + "incompatible with the current version ({current}). This may lead " + "to unexpected results or runtime errors. To resolve this, " + "download a newer compatible model or retrain your custom model " + "with the current spaCy version. For more details and available " + "updates, run: python -m spacy validate") W096 = ("The method 'disable_pipes' has become deprecated - use 'select_pipes' " "instead.") W097 = ("No Model config was provided to create the '{name}' component, " diff --git a/spacy/util.py b/spacy/util.py index 741b289c1..79134400c 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -330,6 +330,16 @@ def get_model_meta(path): for setting in ["lang", "name", "version"]: if setting not in meta or not meta[setting]: raise ValueError(Errors.E054.format(setting=setting)) + if "spacy_version" in meta: + if not is_compatible_version(about.__version__, meta["spacy_version"]): + warnings.warn( + Warnings.W095.format( + model=f"{meta['lang']}_{meta['name']}", + model_version=meta["version"], + version=meta["spacy_version"], + current=about.__version__, + ) + ) return meta