From 567485a8183ebbdcfbf9d7f6db321c24fb009478 Mon Sep 17 00:00:00 2001 From: ines Date: Mon, 29 May 2017 14:10:10 +0200 Subject: [PATCH] Fix and document model loading with pipeline and overrides --- spacy/__init__.py | 2 +- spacy/util.py | 83 +++++++++++++++++--------------------- website/docs/api/util.jade | 37 +++++++++++++++-- 3 files changed, 72 insertions(+), 50 deletions(-) diff --git a/spacy/__init__.py b/spacy/__init__.py index f9e29037f..05822c177 100644 --- a/spacy/__init__.py +++ b/spacy/__init__.py @@ -9,7 +9,7 @@ from . import util def load(name, **overrides): name = resolve_load_name(name, **overrides) - return util.load_model(name) + return util.load_model(name, **overrides) def info(model=None, markdown=False): diff --git a/spacy/util.py b/spacy/util.py index fbcf3ae6b..c2b46e9b9 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -84,10 +84,11 @@ def ensure_path(path): return path -def load_model(name): +def load_model(name, **overrides): """Load a model from a shortcut link, package or data path. name (unicode): Package name, shortcut link or model path. + **overrides: Specific overrides, like pipeline components to disable. RETURNS (Language): `Language` class with the loaded model. """ data_path = get_data_path() @@ -95,73 +96,63 @@ def load_model(name): raise IOError("Can't find spaCy data path: %s" % path2str(data_path)) if isinstance(name, basestring_): if (data_path / name).exists(): # in data dir or shortcut - return load_model_from_path(data_path / name) + spec = importlib.util.spec_from_file_location('model', data_path / name) + cls = importlib.util.module_from_spec(spec) + spec.loader.exec_module(cls) + return cls.load(**overrides) if is_package(name): # installed as package - return load_model_from_pkg(name) + cls = importlib.import_module(name) + return cls.load(**overrides) if Path(name).exists(): # path to model data directory - return load_data_from_path(Path(name)) + model_path = Path(name) + meta = get_package_meta(model_path) + cls = get_lang_class(meta['lang']) + nlp = cls(pipeline=meta.get('pipeline', True)) + return nlp.from_disk(model_path, **overrides) elif hasattr(name, 'exists'): # Path or Path-like to model data - return load_data_from_path(name) + meta = get_package_meta(name) + cls = get_lang_class(meta['lang']) + nlp = cls(pipeline=meta.get('pipeline', True)) + return nlp.from_disk(name, **overrides) raise IOError("Can't find model '%s'" % name) -def load_model_from_init_py(init_file): +def load_model_from_init_py(init_file, **overrides): """Helper function to use in the `load()` method of a model package's __init__.py. init_file (unicode): Path to model's __init__.py, i.e. `__file__`. + **overrides: Specific overrides, like pipeline components to disable. RETURNS (Language): `Language` class with loaded model. """ model_path = Path(init_file).parent - return load_data_from_path(model_path, package=True) + meta = get_model_meta(model_path) + data_dir = '%s_%s-%s' % (meta['lang'], meta['name'], meta['version']) + data_path = model_path / data_dir + if not model_path.exists(): + raise ValueError("Can't find model directory: %s" % path2str(data_path)) + cls = get_lang_class(meta['lang']) + nlp = cls(pipeline=meta.get('pipeline', True)) + return nlp.from_disk(data_path, **overrides) -def load_model_from_path(model_path): - """Import and load a model package from its file path. +def get_model_meta(path): + """Get model meta.json from a directory path and validate its contents. - path (unicode or Path): Path to package directory. - RETURNS (Language): `Language` class with loaded model. + path (unicode or Path): Path to model directory. + RETURNS (dict): The model's meta data. """ - model_path = ensure_path(model_path) - spec = importlib.util.spec_from_file_location('model', model_path) - module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(module) - return module.load() - - -def load_model_from_pkg(name): - """Import and load a model package. - - name (unicode): Name of model package installed via pip. - RETURNS (Language): `Language` class with loaded model. - """ - module = importlib.import_module(name) - return module.load() - - -def load_data_from_path(model_path, package=False): - """Initialie a `Language` class with a loaded model from a model data path. - - model_path (unicode or Path): Path to model data directory. - package (bool): Does the path point to the parent package directory? - RETURNS (Language): `Language` class with loaded model. - """ - model_path = ensure_path(model_path) + model_path = ensure_path(path) + if not model_path.exists(): + raise ValueError("Can't find model directory: %s" % path2str(model_path)) meta_path = model_path / 'meta.json' if not meta_path.is_file(): - raise IOError("Could not read meta.json from %s" % location) - meta = read_json(location) + raise IOError("Could not read meta.json from %s" % meta_path) + meta = read_json(meta_path) for setting in ['lang', 'name', 'version']: if setting not in meta: raise IOError('No %s setting found in model meta.json' % setting) - if package: - model_data_path = '%s_%s-%s' % (meta['lang'], meta['name'], meta['version']) - model_path = model_path / model_data_path - if not model_path.exists(): - raise ValueError("Can't find model directory: %s" % path2str(model_path)) - cls = get_lang_class(meta['lang']) - nlp = cls(pipeline=meta.get('pipeline', True)) - return nlp.from_disk(model_path) + return meta def is_package(name): diff --git a/website/docs/api/util.jade b/website/docs/api/util.jade index 3e132b7b4..f45dc7120 100644 --- a/website/docs/api/util.jade +++ b/website/docs/api/util.jade @@ -87,7 +87,7 @@ p +aside-code("Example"). nlp = util.load_model('en') - nlp = util.load_model('en_core_web_sm') + nlp = util.load_model('en_core_web_sm', disable=['ner']) nlp = util.load_model('/path/to/data') +table(["Name", "Type", "Description"]) @@ -96,6 +96,11 @@ p +cell unicode +cell Package name, shortcut link or model path. + +row + +cell #[code **overrides] + +cell - + +cell Specific overrides, like pipeline components to disable. + +footrow +cell returns +cell #[code Language] @@ -112,8 +117,8 @@ p +aside-code("Example"). from spacy.util import load_model_from_init_py - def load(): - return load_model_from_init_py(__file__) + def load(**overrides): + return load_model_from_init_py(__file__, **overrides) +table(["Name", "Type", "Description"]) +row @@ -121,11 +126,37 @@ p +cell unicode +cell Path to model's __init__.py, i.e. #[code __file__]. + +row + +cell #[code **overrides] + +cell - + +cell Specific overrides, like pipeline components to disable. + +footrow +cell returns +cell #[code Language] +cell #[code Language] class with the loaded model. ++h(2, "get_model_meta") util.get_model_meta + +tag function + +tag-new(2) + +p + | Get a model's meta.json from a directory path and validate its contents. + ++aside-code("Example"). + meta = util.get_model_meta('/path/to/model') + ++table(["Name", "Type", "Description"]) + +row + +cell #[code path] + +cell unicode or #[code Path] + +cell Path to model directory. + + +footrow + +cell returns + +cell dict + +cell The model's meta data. + +h(2, "is_package") util.is_package +tag function