mirror of https://github.com/explosion/spaCy.git
Auto-detect package dependencies in spacy package (#8948)
* Auto-detect package dependencies in spacy package * Add simple get_third_party_dependencies test * Import packages_distributions explicitly * Inline packages_distributions * Fix docstring [ci skip] * Relax catalogue requirement * Move importlib_metadata to spacy.compat with note * Include license information [ci skip]
This commit is contained in:
parent
0a6b68848f
commit
d94ddd5686
|
@ -104,3 +104,26 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
||||
|
||||
importlib_metadata
|
||||
------------------
|
||||
|
||||
* Files: util.py
|
||||
|
||||
The implementation of packages_distributions() is adapted from
|
||||
importlib_metadata, which is distributed under the following license:
|
||||
|
||||
Copyright 2017-2019 Jason R. Coombs, Barry Warsaw
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
|
|
@ -2,6 +2,8 @@ from typing import Optional, Union, Any, Dict, List, Tuple
|
|||
import shutil
|
||||
from pathlib import Path
|
||||
from wasabi import Printer, MarkdownRenderer, get_raw_input
|
||||
from thinc.api import Config
|
||||
from collections import defaultdict
|
||||
import srsly
|
||||
import sys
|
||||
|
||||
|
@ -99,6 +101,12 @@ def package(
|
|||
msg.fail("Can't load pipeline meta.json", meta_path, exits=1)
|
||||
meta = srsly.read_json(meta_path)
|
||||
meta = get_meta(input_dir, meta)
|
||||
if meta["requirements"]:
|
||||
msg.good(
|
||||
f"Including {len(meta['requirements'])} package requirement(s) from "
|
||||
f"meta and config",
|
||||
", ".join(meta["requirements"]),
|
||||
)
|
||||
if name is not None:
|
||||
meta["name"] = name
|
||||
if version is not None:
|
||||
|
@ -175,6 +183,51 @@ def has_wheel() -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def get_third_party_dependencies(
|
||||
config: Config, exclude: List[str] = util.SimpleFrozenList()
|
||||
) -> List[str]:
|
||||
"""If the config includes references to registered functions that are
|
||||
provided by third-party packages (spacy-transformers, other libraries), we
|
||||
want to include them in meta["requirements"] so that the package specifies
|
||||
them as dependencies and the user won't have to do it manually.
|
||||
|
||||
We do this by:
|
||||
- traversing the config to check for registered function (@ keys)
|
||||
- looking up the functions and getting their module
|
||||
- looking up the module version and generating an appropriate version range
|
||||
|
||||
config (Config): The pipeline config.
|
||||
exclude (list): List of packages to exclude (e.g. that already exist in meta).
|
||||
RETURNS (list): The versioned requirements.
|
||||
"""
|
||||
own_packages = ("spacy", "spacy-nightly", "thinc", "srsly")
|
||||
distributions = util.packages_distributions()
|
||||
funcs = defaultdict(set)
|
||||
for path, value in util.walk_dict(config):
|
||||
if path[-1].startswith("@"): # collect all function references by registry
|
||||
funcs[path[-1][1:]].add(value)
|
||||
modules = set()
|
||||
for reg_name, func_names in funcs.items():
|
||||
sub_registry = getattr(util.registry, reg_name)
|
||||
for func_name in func_names:
|
||||
func_info = sub_registry.find(func_name)
|
||||
module_name = func_info.get("module")
|
||||
if module_name: # the code is part of a module, not a --code file
|
||||
modules.add(func_info["module"].split(".")[0])
|
||||
dependencies = []
|
||||
for module_name in modules:
|
||||
if module_name in distributions:
|
||||
dist = distributions.get(module_name)
|
||||
if dist:
|
||||
pkg = dist[0]
|
||||
if pkg in own_packages or pkg in exclude:
|
||||
continue
|
||||
version = util.get_package_version(pkg)
|
||||
version_range = util.get_minor_version_range(version)
|
||||
dependencies.append(f"{pkg}{version_range}")
|
||||
return dependencies
|
||||
|
||||
|
||||
def get_build_formats(formats: List[str]) -> Tuple[bool, bool]:
|
||||
supported = ["sdist", "wheel", "none"]
|
||||
for form in formats:
|
||||
|
@ -208,7 +261,7 @@ def get_meta(
|
|||
nlp = util.load_model_from_path(Path(model_path))
|
||||
meta.update(nlp.meta)
|
||||
meta.update(existing_meta)
|
||||
meta["spacy_version"] = util.get_model_version_range(about.__version__)
|
||||
meta["spacy_version"] = util.get_minor_version_range(about.__version__)
|
||||
meta["vectors"] = {
|
||||
"width": nlp.vocab.vectors_length,
|
||||
"vectors": len(nlp.vocab.vectors),
|
||||
|
@ -217,6 +270,11 @@ def get_meta(
|
|||
}
|
||||
if about.__title__ != "spacy":
|
||||
meta["parent_package"] = about.__title__
|
||||
meta.setdefault("requirements", [])
|
||||
# Update the requirements with all third-party packages in the config
|
||||
existing_reqs = [util.split_requirement(req)[0] for req in meta["requirements"]]
|
||||
reqs = get_third_party_dependencies(nlp.config, exclude=existing_reqs)
|
||||
meta["requirements"].extend(reqs)
|
||||
return meta
|
||||
|
||||
|
||||
|
|
|
@ -27,6 +27,14 @@ try: # Python 3.8+
|
|||
except ImportError:
|
||||
from typing_extensions import Literal # noqa: F401
|
||||
|
||||
# Important note: The importlib_metadata "backport" includes functionality
|
||||
# that's not part of the built-in importlib.metadata. We should treat this
|
||||
# import like the built-in and only use what's available there.
|
||||
try: # Python 3.8+
|
||||
import importlib.metadata as importlib_metadata
|
||||
except ImportError:
|
||||
from catalogue import _importlib_metadata as importlib_metadata # noqa: F401
|
||||
|
||||
from thinc.api import Optimizer # noqa: F401
|
||||
|
||||
pickle = pickle
|
||||
|
|
|
@ -199,7 +199,7 @@ class Language:
|
|||
|
||||
DOCS: https://spacy.io/api/language#meta
|
||||
"""
|
||||
spacy_version = util.get_model_version_range(about.__version__)
|
||||
spacy_version = util.get_minor_version_range(about.__version__)
|
||||
if self.vocab.lang:
|
||||
self._meta.setdefault("lang", self.vocab.lang)
|
||||
else:
|
||||
|
|
|
@ -14,6 +14,7 @@ from spacy import about
|
|||
from spacy.util import get_minor_version
|
||||
from spacy.cli.validate import get_model_pkgs
|
||||
from spacy.cli.download import get_compatibility, get_version
|
||||
from spacy.cli.package import get_third_party_dependencies
|
||||
from thinc.api import ConfigValidationError, Config
|
||||
import srsly
|
||||
import os
|
||||
|
@ -532,3 +533,10 @@ def test_init_labels(component_name):
|
|||
assert len(nlp2.get_pipe(component_name).labels) == 0
|
||||
nlp2.initialize()
|
||||
assert len(nlp2.get_pipe(component_name).labels) == 4
|
||||
|
||||
|
||||
def test_get_third_party_dependencies_runs():
|
||||
# We can't easily test the detection of third-party packages here, but we
|
||||
# can at least make sure that the function and its importlib magic runs.
|
||||
nlp = Dutch()
|
||||
assert get_third_party_dependencies(nlp.config) == []
|
||||
|
|
|
@ -20,8 +20,10 @@ import sys
|
|||
import warnings
|
||||
from packaging.specifiers import SpecifierSet, InvalidSpecifier
|
||||
from packaging.version import Version, InvalidVersion
|
||||
from packaging.requirements import Requirement
|
||||
import subprocess
|
||||
from contextlib import contextmanager
|
||||
from collections import defaultdict
|
||||
import tempfile
|
||||
import shutil
|
||||
import shlex
|
||||
|
@ -33,11 +35,6 @@ try:
|
|||
except ImportError:
|
||||
cupy = None
|
||||
|
||||
try: # Python 3.8
|
||||
import importlib.metadata as importlib_metadata
|
||||
except ImportError:
|
||||
from catalogue import _importlib_metadata as importlib_metadata
|
||||
|
||||
# These are functions that were previously (v2.x) available from spacy.util
|
||||
# and have since moved to Thinc. We're importing them here so people's code
|
||||
# doesn't break, but they should always be imported from Thinc from now on,
|
||||
|
@ -46,7 +43,7 @@ from thinc.api import fix_random_seed, compounding, decaying # noqa: F401
|
|||
|
||||
|
||||
from .symbols import ORTH
|
||||
from .compat import cupy, CudaStream, is_windows
|
||||
from .compat import cupy, CudaStream, is_windows, importlib_metadata
|
||||
from .errors import Errors, Warnings, OLD_MODEL_SHORTCUTS
|
||||
from . import about
|
||||
|
||||
|
@ -639,13 +636,18 @@ def is_unconstrained_version(
|
|||
return True
|
||||
|
||||
|
||||
def get_model_version_range(spacy_version: str) -> str:
|
||||
"""Generate a version range like >=1.2.3,<1.3.0 based on a given spaCy
|
||||
version. Models are always compatible across patch versions but not
|
||||
across minor or major versions.
|
||||
def split_requirement(requirement: str) -> Tuple[str, str]:
|
||||
"""Split a requirement like spacy>=1.2.3 into ("spacy", ">=1.2.3")."""
|
||||
req = Requirement(requirement)
|
||||
return (req.name, str(req.specifier))
|
||||
|
||||
|
||||
def get_minor_version_range(version: str) -> str:
|
||||
"""Generate a version range like >=1.2.3,<1.3.0 based on a given version
|
||||
(e.g. of spaCy).
|
||||
"""
|
||||
release = Version(spacy_version).release
|
||||
return f">={spacy_version},<{release[0]}.{release[1] + 1}.0"
|
||||
release = Version(version).release
|
||||
return f">={version},<{release[0]}.{release[1] + 1}.0"
|
||||
|
||||
|
||||
def get_model_lower_version(constraint: str) -> Optional[str]:
|
||||
|
@ -733,7 +735,7 @@ def load_meta(path: Union[str, Path]) -> Dict[str, Any]:
|
|||
model=f"{meta['lang']}_{meta['name']}",
|
||||
model_version=meta["version"],
|
||||
version=meta["spacy_version"],
|
||||
example=get_model_version_range(about.__version__),
|
||||
example=get_minor_version_range(about.__version__),
|
||||
)
|
||||
warnings.warn(warn_msg)
|
||||
return meta
|
||||
|
@ -1549,3 +1551,19 @@ def to_ternary_int(val) -> int:
|
|||
return 0
|
||||
else:
|
||||
return -1
|
||||
|
||||
|
||||
# The following implementation of packages_distributions() is adapted from
|
||||
# importlib_metadata, which is distributed under the Apache 2.0 License.
|
||||
# Copyright (c) 2017-2019 Jason R. Coombs, Barry Warsaw
|
||||
# See licenses/3rd_party_licenses.txt
|
||||
def packages_distributions() -> Dict[str, List[str]]:
|
||||
"""Return a mapping of top-level packages to their distributions. We're
|
||||
inlining this helper from the importlib_metadata "backport" here, since
|
||||
it's not available in the builtin importlib.metadata.
|
||||
"""
|
||||
pkg_to_dist = defaultdict(list)
|
||||
for dist in importlib_metadata.distributions():
|
||||
for pkg in (dist.read_text("top_level.txt") or "").split():
|
||||
pkg_to_dist[pkg].append(dist.metadata["Name"])
|
||||
return dict(pkg_to_dist)
|
||||
|
|
Loading…
Reference in New Issue