From c1983621fbe34659b9243b1af603ed9b85495ac6 Mon Sep 17 00:00:00 2001 From: ines Date: Sun, 28 May 2017 00:22:00 +0200 Subject: [PATCH] Update util functions for model loading --- spacy/__init__.py | 12 +--- spacy/cli/info.py | 10 +++- spacy/cli/link.py | 2 +- spacy/util.py | 111 +++++++++++++++++++++++++------------ website/docs/api/util.jade | 90 ++++++++++++++++-------------- 5 files changed, 132 insertions(+), 93 deletions(-) diff --git a/spacy/__init__.py b/spacy/__init__.py index 6beb7955e..f9e29037f 100644 --- a/spacy/__init__.py +++ b/spacy/__init__.py @@ -1,9 +1,6 @@ # coding: utf8 from __future__ import unicode_literals -import importlib - -from .compat import basestring_ from .cli.info import info as cli_info from .glossary import explain from .deprecated import resolve_load_name @@ -12,14 +9,7 @@ from . import util def load(name, **overrides): name = resolve_load_name(name, **overrides) - model_path = util.resolve_model_path(name) - meta = util.parse_package_meta(model_path) - if 'lang' not in meta: - raise IOError('No language setting found in model meta.') - cls = util.get_lang_class(meta['lang']) - overrides['meta'] = meta - overrides['path'] = model_path - return cls(**overrides) + return util.load_model(name) def info(model=None, markdown=False): diff --git a/spacy/cli/info.py b/spacy/cli/info.py index 75aac10c7..70f054d84 100644 --- a/spacy/cli/info.py +++ b/spacy/cli/info.py @@ -20,8 +20,14 @@ def info(cmd, model=None, markdown=False): prints details in Markdown for easy copy-pasting to GitHub issues. """ if model: - model_path = util.resolve_model_path(model) - meta = util.parse_package_meta(model_path) + if util.is_package(model): + model_path = util.get_package_path(model) + else: + model_path = util.get_data_path() / model + meta_path = model_path / 'meta.json' + if not meta_path.is_file(): + prints(meta_path, title="Can't find model meta.json", exits=1) + meta = read_json(meta_path) if model_path.resolve() != model_path: meta['link'] = path2str(model_path) meta['source'] = path2str(model_path.resolve()) diff --git a/spacy/cli/link.py b/spacy/cli/link.py index 9aecdabfe..66824c042 100644 --- a/spacy/cli/link.py +++ b/spacy/cli/link.py @@ -21,7 +21,7 @@ def link(cmd, origin, link_name, force=False): directory. Linking models allows loading them via spacy.load(link_name). """ if util.is_package(origin): - model_path = util.get_model_package_path(origin) + model_path = util.get_package_path(model) else: model_path = Path(origin) if not model_path.exists(): diff --git a/spacy/util.py b/spacy/util.py index a30b35a06..25fe198f4 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -78,27 +78,86 @@ def ensure_path(path): return path -def resolve_model_path(name): - """Resolve a model name or string to a model path. +def load_model(name): + """Load a model from a shortcut link, package or data path. name (unicode): Package name, shortcut link or model path. - RETURNS (Path): Path to model data directory. + RETURNS (Language): `Language` class with the loaded model. """ data_path = get_data_path() if not data_path or not data_path.exists(): raise IOError("Can't find spaCy data path: %s" % path2str(data_path)) if isinstance(name, basestring_): - if (data_path / name).exists(): # in data dir or shortcut link - return (data_path / name) - if is_package(name): # installed as a package - return get_model_package_path(name) - if Path(name).exists(): # path to model - return Path(name) - elif hasattr(name, 'exists'): # Path or Path-like object - return name + if (data_path / name).exists(): # in data dir or shortcut + return load_model_from_path(data_path / name) + if is_package(name): # installed as package + return load_model_from_pkg(name) + if Path(name).exists(): # path to model data directory + return load_data_from_path(Path(name)) + elif hasattr(name, 'exists'): # Path or Path-like to model data + return load_data_from_path(name) raise IOError("Can't find model '%s'" % name) +def load_model_from_init_py(init_file): + """Helper function to use in the `load()` method of a model package's + __init__.py. + + init_file (unicode): Path to model's __init__.py, i.e. `__file__`. + RETURNS (Language): `Language` class with loaded model. + """ + model_path = Path(init_file).parent + return load_data_from_path(model_path, package=True) + + +def load_model_from_path(model_path): + """Import and load a model package from its file path. + + path (unicode or Path): Path to package directory. + RETURNS (Language): `Language` class with loaded model. + """ + model_path = ensure_path(model_path) + spec = importlib.util.spec_from_file_location('model', model_path) + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) + return module.load() + + +def load_model_from_pkg(name): + """Import and load a model package. + + name (unicode): Name of model package installed via pip. + RETURNS (Language): `Language` class with loaded model. + """ + module = importlib.import_module(name) + return module.load() + + +def load_data_from_path(model_path, package=False): + """Initialie a `Language` class with a loaded model from a model data path. + + model_path (unicode or Path): Path to model data directory. + package (bool): Does the path point to the parent package directory? + RETURNS (Language): `Language` class with loaded model. + """ + model_path = ensure_path(model_path) + meta_path = model_path / 'meta.json' + if not meta_path.is_file(): + raise IOError("Could not read meta.json from %s" % location) + meta = read_json(location) + for setting in ['lang', 'name', 'version']: + if setting not in meta: + raise IOError('No %s setting found in model meta.json' % setting) + if package: + model_data_path = '%s_%s-%s' % (meta['lang'], meta['name'], meta['version']) + model_path = model_path / model_data_path + if not model_path.exists(): + raise ValueError("Can't find model directory: %s" % path2str(model_path)) + cls = get_lang_class(meta['lang']) + nlp = cls(pipeline=meta.get('pipeline', True)) + return nlp.from_disk(model_path) + + def is_package(name): """Check if string maps to a package installed via pip. @@ -112,36 +171,16 @@ def is_package(name): return False -def get_model_package_path(package_name): - """Get path to a model package installed via pip. +def get_package_path(name): + """Get the path to an installed package. - package_name (unicode): Name of installed package. - RETURNS (Path): Path to model data directory. + name (unicode): Package name. + RETURNS (Path): Path to installed package. """ # Here we're importing the module just to find it. This is worryingly # indirect, but it's otherwise very difficult to find the package. - # Python's installation and import rules are very complicated. pkg = importlib.import_module(package_name) - package_path = Path(pkg.__file__).parent.parent - meta = parse_package_meta(package_path / package_name) - model_name = '%s-%s' % (package_name, meta['version']) - return package_path / package_name / model_name - - -def parse_package_meta(package_path, require=True): - """Check if a meta.json exists in a package and return its contents. - - package_path (Path): Path to model package directory. - require (bool): If True, raise error if no meta.json is found. - RETURNS (dict or None): Model meta.json data or None. - """ - location = package_path / 'meta.json' - if location.is_file(): - return read_json(location) - elif require: - raise IOError("Could not read meta.json from %s" % location) - else: - return None + return Path(pkg.__file__).parent def is_in_jupyter(): diff --git a/website/docs/api/util.jade b/website/docs/api/util.jade index 717abf34a..3e132b7b4 100644 --- a/website/docs/api/util.jade +++ b/website/docs/api/util.jade @@ -1,12 +1,10 @@ -//- 💫 DOCS > API > ANNOTATION SPECS +//- 💫 DOCS > API > UTIL include ../../_includes/_mixins p | spaCy comes with a small collection of utility functions located in | #[+src(gh("spaCy", "spacy/util.py")) spacy/util.py]. - -+infobox("Important note") | Because utility functions are mostly intended for | #[strong internal use within spaCy], their behaviour may change with | future releases. The functions documented on this page should be safe @@ -74,15 +72,23 @@ p +cell #[code Language] +cell Language class. -+h(2, "resolve_model_path") util.resolve_model_path ++h(2, "load_model") util.load_model +tag function +tag-new(2) -p Resolve a model name or string to a model path. +p + | Load a model from a shortcut link, package or data path. If called with a + | shortcut link or package name, spaCy will assume the model is a Python + | package and import and call its #[code load()] method. If called with a + | path, spaCy will assume it's a data directory, read the language and + | pipeline settings from the meta.json and initialise a #[code Language] + | class. The model data will then be loaded in via + | #[+api("language#from_disk") #[code Language.from_disk()]]. +aside-code("Example"). - model_path = util.resolve_model_path('en') - model_path = util.resolve_model_path('/path/to/en') + nlp = util.load_model('en') + nlp = util.load_model('en_core_web_sm') + nlp = util.load_model('/path/to/data') +table(["Name", "Type", "Description"]) +row @@ -92,8 +98,33 @@ p Resolve a model name or string to a model path. +footrow +cell returns - +cell #[code Path] - +cell Path to model data directory. + +cell #[code Language] + +cell #[code Language] class with the loaded model. + ++h(2, "load_model_from_init_py") util.load_model_from_init_py + +tag function + +tag-new(2) + +p + | A helper function to use in the #[code load()] method of a model package's + | #[+src(gh("spacy-dev-resources", "templates/model/en_model_name/__init__.py")) __init__.py]. + ++aside-code("Example"). + from spacy.util import load_model_from_init_py + + def load(): + return load_model_from_init_py(__file__) + ++table(["Name", "Type", "Description"]) + +row + +cell #[code init_file] + +cell unicode + +cell Path to model's __init__.py, i.e. #[code __file__]. + + +footrow + +cell returns + +cell #[code Language] + +cell #[code Language] class with the loaded model. +h(2, "is_package") util.is_package +tag function @@ -117,16 +148,18 @@ p +cell #[code bool] +cell #[code True] if installed package, #[code False] if not. -+h(2, "get_model_package_path") util.get_model_package_path ++h(2, "get_package_path") util.get_package_path +tag function + +tag-new(2) p - | Get path to a #[+a("/docs/usage/models") model package] installed via pip. - | Currently imports the package to find it and parse its meta data. + | Get path to an installed package. Mainly used to resolve the location of + | #[+a("/docs/usage/models") model packages]. Currently imports the package + | to find its path. +aside-code("Example"). - util.get_model_package_path('en_core_web_sm') - # /usr/lib/python3.6/site-packages/en_core_web_sm/en_core_web_sm-1.2.0 + util.get_package_path('en_core_web_sm') + # /usr/lib/python3.6/site-packages/en_core_web_sm +table(["Name", "Type", "Description"]) +row @@ -137,37 +170,8 @@ p +footrow +cell returns +cell #[code Path] - +cell Path to model data directory. - -+h(2, "parse_package_meta") util.parse_package_meta - +tag function - -p - | Check if a #[code meta.json] exists in a model package and return its - | contents. - -+aside-code("Example"). - if util.is_package('en_core_web_sm'): - path = util.get_model_package_path('en_core_web_sm') - meta = util.parse_package_meta(path, require=True) - # {'name': 'core_web_sm', 'lang': 'en', ...} - -+table(["Name", "Type", "Description"]) - +row - +cell #[code package_path] - +cell #[code Path] +cell Path to model package directory. - +row - +cell #[code require] - +cell #[code bool] - +cell If #[code True], raise error if no #[code meta.json] is found. - - +footrow - +cell returns - +cell dict / #[code None] - +cell Model meta data or #[code None]. - +h(2, "is_in_jupyter") util.is_in_jupyter +tag function +tag-new(2)