mirror of https://github.com/explosion/spaCy.git
Update util functions for model loading
This commit is contained in:
parent
c8543c8237
commit
c1983621fb
|
@ -1,9 +1,6 @@
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import importlib
|
|
||||||
|
|
||||||
from .compat import basestring_
|
|
||||||
from .cli.info import info as cli_info
|
from .cli.info import info as cli_info
|
||||||
from .glossary import explain
|
from .glossary import explain
|
||||||
from .deprecated import resolve_load_name
|
from .deprecated import resolve_load_name
|
||||||
|
@ -12,14 +9,7 @@ from . import util
|
||||||
|
|
||||||
def load(name, **overrides):
|
def load(name, **overrides):
|
||||||
name = resolve_load_name(name, **overrides)
|
name = resolve_load_name(name, **overrides)
|
||||||
model_path = util.resolve_model_path(name)
|
return util.load_model(name)
|
||||||
meta = util.parse_package_meta(model_path)
|
|
||||||
if 'lang' not in meta:
|
|
||||||
raise IOError('No language setting found in model meta.')
|
|
||||||
cls = util.get_lang_class(meta['lang'])
|
|
||||||
overrides['meta'] = meta
|
|
||||||
overrides['path'] = model_path
|
|
||||||
return cls(**overrides)
|
|
||||||
|
|
||||||
|
|
||||||
def info(model=None, markdown=False):
|
def info(model=None, markdown=False):
|
||||||
|
|
|
@ -20,8 +20,14 @@ def info(cmd, model=None, markdown=False):
|
||||||
prints details in Markdown for easy copy-pasting to GitHub issues.
|
prints details in Markdown for easy copy-pasting to GitHub issues.
|
||||||
"""
|
"""
|
||||||
if model:
|
if model:
|
||||||
model_path = util.resolve_model_path(model)
|
if util.is_package(model):
|
||||||
meta = util.parse_package_meta(model_path)
|
model_path = util.get_package_path(model)
|
||||||
|
else:
|
||||||
|
model_path = util.get_data_path() / model
|
||||||
|
meta_path = model_path / 'meta.json'
|
||||||
|
if not meta_path.is_file():
|
||||||
|
prints(meta_path, title="Can't find model meta.json", exits=1)
|
||||||
|
meta = read_json(meta_path)
|
||||||
if model_path.resolve() != model_path:
|
if model_path.resolve() != model_path:
|
||||||
meta['link'] = path2str(model_path)
|
meta['link'] = path2str(model_path)
|
||||||
meta['source'] = path2str(model_path.resolve())
|
meta['source'] = path2str(model_path.resolve())
|
||||||
|
|
|
@ -21,7 +21,7 @@ def link(cmd, origin, link_name, force=False):
|
||||||
directory. Linking models allows loading them via spacy.load(link_name).
|
directory. Linking models allows loading them via spacy.load(link_name).
|
||||||
"""
|
"""
|
||||||
if util.is_package(origin):
|
if util.is_package(origin):
|
||||||
model_path = util.get_model_package_path(origin)
|
model_path = util.get_package_path(model)
|
||||||
else:
|
else:
|
||||||
model_path = Path(origin)
|
model_path = Path(origin)
|
||||||
if not model_path.exists():
|
if not model_path.exists():
|
||||||
|
|
111
spacy/util.py
111
spacy/util.py
|
@ -78,27 +78,86 @@ def ensure_path(path):
|
||||||
return path
|
return path
|
||||||
|
|
||||||
|
|
||||||
def resolve_model_path(name):
|
def load_model(name):
|
||||||
"""Resolve a model name or string to a model path.
|
"""Load a model from a shortcut link, package or data path.
|
||||||
|
|
||||||
name (unicode): Package name, shortcut link or model path.
|
name (unicode): Package name, shortcut link or model path.
|
||||||
RETURNS (Path): Path to model data directory.
|
RETURNS (Language): `Language` class with the loaded model.
|
||||||
"""
|
"""
|
||||||
data_path = get_data_path()
|
data_path = get_data_path()
|
||||||
if not data_path or not data_path.exists():
|
if not data_path or not data_path.exists():
|
||||||
raise IOError("Can't find spaCy data path: %s" % path2str(data_path))
|
raise IOError("Can't find spaCy data path: %s" % path2str(data_path))
|
||||||
if isinstance(name, basestring_):
|
if isinstance(name, basestring_):
|
||||||
if (data_path / name).exists(): # in data dir or shortcut link
|
if (data_path / name).exists(): # in data dir or shortcut
|
||||||
return (data_path / name)
|
return load_model_from_path(data_path / name)
|
||||||
if is_package(name): # installed as a package
|
if is_package(name): # installed as package
|
||||||
return get_model_package_path(name)
|
return load_model_from_pkg(name)
|
||||||
if Path(name).exists(): # path to model
|
if Path(name).exists(): # path to model data directory
|
||||||
return Path(name)
|
return load_data_from_path(Path(name))
|
||||||
elif hasattr(name, 'exists'): # Path or Path-like object
|
elif hasattr(name, 'exists'): # Path or Path-like to model data
|
||||||
return name
|
return load_data_from_path(name)
|
||||||
raise IOError("Can't find model '%s'" % name)
|
raise IOError("Can't find model '%s'" % name)
|
||||||
|
|
||||||
|
|
||||||
|
def load_model_from_init_py(init_file):
|
||||||
|
"""Helper function to use in the `load()` method of a model package's
|
||||||
|
__init__.py.
|
||||||
|
|
||||||
|
init_file (unicode): Path to model's __init__.py, i.e. `__file__`.
|
||||||
|
RETURNS (Language): `Language` class with loaded model.
|
||||||
|
"""
|
||||||
|
model_path = Path(init_file).parent
|
||||||
|
return load_data_from_path(model_path, package=True)
|
||||||
|
|
||||||
|
|
||||||
|
def load_model_from_path(model_path):
|
||||||
|
"""Import and load a model package from its file path.
|
||||||
|
|
||||||
|
path (unicode or Path): Path to package directory.
|
||||||
|
RETURNS (Language): `Language` class with loaded model.
|
||||||
|
"""
|
||||||
|
model_path = ensure_path(model_path)
|
||||||
|
spec = importlib.util.spec_from_file_location('model', model_path)
|
||||||
|
module = importlib.util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(module)
|
||||||
|
return module.load()
|
||||||
|
|
||||||
|
|
||||||
|
def load_model_from_pkg(name):
|
||||||
|
"""Import and load a model package.
|
||||||
|
|
||||||
|
name (unicode): Name of model package installed via pip.
|
||||||
|
RETURNS (Language): `Language` class with loaded model.
|
||||||
|
"""
|
||||||
|
module = importlib.import_module(name)
|
||||||
|
return module.load()
|
||||||
|
|
||||||
|
|
||||||
|
def load_data_from_path(model_path, package=False):
|
||||||
|
"""Initialie a `Language` class with a loaded model from a model data path.
|
||||||
|
|
||||||
|
model_path (unicode or Path): Path to model data directory.
|
||||||
|
package (bool): Does the path point to the parent package directory?
|
||||||
|
RETURNS (Language): `Language` class with loaded model.
|
||||||
|
"""
|
||||||
|
model_path = ensure_path(model_path)
|
||||||
|
meta_path = model_path / 'meta.json'
|
||||||
|
if not meta_path.is_file():
|
||||||
|
raise IOError("Could not read meta.json from %s" % location)
|
||||||
|
meta = read_json(location)
|
||||||
|
for setting in ['lang', 'name', 'version']:
|
||||||
|
if setting not in meta:
|
||||||
|
raise IOError('No %s setting found in model meta.json' % setting)
|
||||||
|
if package:
|
||||||
|
model_data_path = '%s_%s-%s' % (meta['lang'], meta['name'], meta['version'])
|
||||||
|
model_path = model_path / model_data_path
|
||||||
|
if not model_path.exists():
|
||||||
|
raise ValueError("Can't find model directory: %s" % path2str(model_path))
|
||||||
|
cls = get_lang_class(meta['lang'])
|
||||||
|
nlp = cls(pipeline=meta.get('pipeline', True))
|
||||||
|
return nlp.from_disk(model_path)
|
||||||
|
|
||||||
|
|
||||||
def is_package(name):
|
def is_package(name):
|
||||||
"""Check if string maps to a package installed via pip.
|
"""Check if string maps to a package installed via pip.
|
||||||
|
|
||||||
|
@ -112,36 +171,16 @@ def is_package(name):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def get_model_package_path(package_name):
|
def get_package_path(name):
|
||||||
"""Get path to a model package installed via pip.
|
"""Get the path to an installed package.
|
||||||
|
|
||||||
package_name (unicode): Name of installed package.
|
name (unicode): Package name.
|
||||||
RETURNS (Path): Path to model data directory.
|
RETURNS (Path): Path to installed package.
|
||||||
"""
|
"""
|
||||||
# Here we're importing the module just to find it. This is worryingly
|
# Here we're importing the module just to find it. This is worryingly
|
||||||
# indirect, but it's otherwise very difficult to find the package.
|
# indirect, but it's otherwise very difficult to find the package.
|
||||||
# Python's installation and import rules are very complicated.
|
|
||||||
pkg = importlib.import_module(package_name)
|
pkg = importlib.import_module(package_name)
|
||||||
package_path = Path(pkg.__file__).parent.parent
|
return Path(pkg.__file__).parent
|
||||||
meta = parse_package_meta(package_path / package_name)
|
|
||||||
model_name = '%s-%s' % (package_name, meta['version'])
|
|
||||||
return package_path / package_name / model_name
|
|
||||||
|
|
||||||
|
|
||||||
def parse_package_meta(package_path, require=True):
|
|
||||||
"""Check if a meta.json exists in a package and return its contents.
|
|
||||||
|
|
||||||
package_path (Path): Path to model package directory.
|
|
||||||
require (bool): If True, raise error if no meta.json is found.
|
|
||||||
RETURNS (dict or None): Model meta.json data or None.
|
|
||||||
"""
|
|
||||||
location = package_path / 'meta.json'
|
|
||||||
if location.is_file():
|
|
||||||
return read_json(location)
|
|
||||||
elif require:
|
|
||||||
raise IOError("Could not read meta.json from %s" % location)
|
|
||||||
else:
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def is_in_jupyter():
|
def is_in_jupyter():
|
||||||
|
|
|
@ -1,12 +1,10 @@
|
||||||
//- 💫 DOCS > API > ANNOTATION SPECS
|
//- 💫 DOCS > API > UTIL
|
||||||
|
|
||||||
include ../../_includes/_mixins
|
include ../../_includes/_mixins
|
||||||
|
|
||||||
p
|
p
|
||||||
| spaCy comes with a small collection of utility functions located in
|
| spaCy comes with a small collection of utility functions located in
|
||||||
| #[+src(gh("spaCy", "spacy/util.py")) spacy/util.py].
|
| #[+src(gh("spaCy", "spacy/util.py")) spacy/util.py].
|
||||||
|
|
||||||
+infobox("Important note")
|
|
||||||
| Because utility functions are mostly intended for
|
| Because utility functions are mostly intended for
|
||||||
| #[strong internal use within spaCy], their behaviour may change with
|
| #[strong internal use within spaCy], their behaviour may change with
|
||||||
| future releases. The functions documented on this page should be safe
|
| future releases. The functions documented on this page should be safe
|
||||||
|
@ -74,15 +72,23 @@ p
|
||||||
+cell #[code Language]
|
+cell #[code Language]
|
||||||
+cell Language class.
|
+cell Language class.
|
||||||
|
|
||||||
+h(2, "resolve_model_path") util.resolve_model_path
|
+h(2, "load_model") util.load_model
|
||||||
+tag function
|
+tag function
|
||||||
+tag-new(2)
|
+tag-new(2)
|
||||||
|
|
||||||
p Resolve a model name or string to a model path.
|
p
|
||||||
|
| Load a model from a shortcut link, package or data path. If called with a
|
||||||
|
| shortcut link or package name, spaCy will assume the model is a Python
|
||||||
|
| package and import and call its #[code load()] method. If called with a
|
||||||
|
| path, spaCy will assume it's a data directory, read the language and
|
||||||
|
| pipeline settings from the meta.json and initialise a #[code Language]
|
||||||
|
| class. The model data will then be loaded in via
|
||||||
|
| #[+api("language#from_disk") #[code Language.from_disk()]].
|
||||||
|
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
model_path = util.resolve_model_path('en')
|
nlp = util.load_model('en')
|
||||||
model_path = util.resolve_model_path('/path/to/en')
|
nlp = util.load_model('en_core_web_sm')
|
||||||
|
nlp = util.load_model('/path/to/data')
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
+row
|
+row
|
||||||
|
@ -92,8 +98,33 @@ p Resolve a model name or string to a model path.
|
||||||
|
|
||||||
+footrow
|
+footrow
|
||||||
+cell returns
|
+cell returns
|
||||||
+cell #[code Path]
|
+cell #[code Language]
|
||||||
+cell Path to model data directory.
|
+cell #[code Language] class with the loaded model.
|
||||||
|
|
||||||
|
+h(2, "load_model_from_init_py") util.load_model_from_init_py
|
||||||
|
+tag function
|
||||||
|
+tag-new(2)
|
||||||
|
|
||||||
|
p
|
||||||
|
| A helper function to use in the #[code load()] method of a model package's
|
||||||
|
| #[+src(gh("spacy-dev-resources", "templates/model/en_model_name/__init__.py")) __init__.py].
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
from spacy.util import load_model_from_init_py
|
||||||
|
|
||||||
|
def load():
|
||||||
|
return load_model_from_init_py(__file__)
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row
|
||||||
|
+cell #[code init_file]
|
||||||
|
+cell unicode
|
||||||
|
+cell Path to model's __init__.py, i.e. #[code __file__].
|
||||||
|
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell #[code Language]
|
||||||
|
+cell #[code Language] class with the loaded model.
|
||||||
|
|
||||||
+h(2, "is_package") util.is_package
|
+h(2, "is_package") util.is_package
|
||||||
+tag function
|
+tag function
|
||||||
|
@ -117,16 +148,18 @@ p
|
||||||
+cell #[code bool]
|
+cell #[code bool]
|
||||||
+cell #[code True] if installed package, #[code False] if not.
|
+cell #[code True] if installed package, #[code False] if not.
|
||||||
|
|
||||||
+h(2, "get_model_package_path") util.get_model_package_path
|
+h(2, "get_package_path") util.get_package_path
|
||||||
+tag function
|
+tag function
|
||||||
|
+tag-new(2)
|
||||||
|
|
||||||
p
|
p
|
||||||
| Get path to a #[+a("/docs/usage/models") model package] installed via pip.
|
| Get path to an installed package. Mainly used to resolve the location of
|
||||||
| Currently imports the package to find it and parse its meta data.
|
| #[+a("/docs/usage/models") model packages]. Currently imports the package
|
||||||
|
| to find its path.
|
||||||
|
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
util.get_model_package_path('en_core_web_sm')
|
util.get_package_path('en_core_web_sm')
|
||||||
# /usr/lib/python3.6/site-packages/en_core_web_sm/en_core_web_sm-1.2.0
|
# /usr/lib/python3.6/site-packages/en_core_web_sm
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
+row
|
+row
|
||||||
|
@ -137,37 +170,8 @@ p
|
||||||
+footrow
|
+footrow
|
||||||
+cell returns
|
+cell returns
|
||||||
+cell #[code Path]
|
+cell #[code Path]
|
||||||
+cell Path to model data directory.
|
|
||||||
|
|
||||||
+h(2, "parse_package_meta") util.parse_package_meta
|
|
||||||
+tag function
|
|
||||||
|
|
||||||
p
|
|
||||||
| Check if a #[code meta.json] exists in a model package and return its
|
|
||||||
| contents.
|
|
||||||
|
|
||||||
+aside-code("Example").
|
|
||||||
if util.is_package('en_core_web_sm'):
|
|
||||||
path = util.get_model_package_path('en_core_web_sm')
|
|
||||||
meta = util.parse_package_meta(path, require=True)
|
|
||||||
# {'name': 'core_web_sm', 'lang': 'en', ...}
|
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
|
||||||
+row
|
|
||||||
+cell #[code package_path]
|
|
||||||
+cell #[code Path]
|
|
||||||
+cell Path to model package directory.
|
+cell Path to model package directory.
|
||||||
|
|
||||||
+row
|
|
||||||
+cell #[code require]
|
|
||||||
+cell #[code bool]
|
|
||||||
+cell If #[code True], raise error if no #[code meta.json] is found.
|
|
||||||
|
|
||||||
+footrow
|
|
||||||
+cell returns
|
|
||||||
+cell dict / #[code None]
|
|
||||||
+cell Model meta data or #[code None].
|
|
||||||
|
|
||||||
+h(2, "is_in_jupyter") util.is_in_jupyter
|
+h(2, "is_in_jupyter") util.is_in_jupyter
|
||||||
+tag function
|
+tag function
|
||||||
+tag-new(2)
|
+tag-new(2)
|
||||||
|
|
Loading…
Reference in New Issue