Fix and document model loading with pipeline and overrides

This commit is contained in:
ines 2017-05-29 14:10:10 +02:00
parent deac7eb01c
commit 567485a818
3 changed files with 72 additions and 50 deletions

View File

@ -9,7 +9,7 @@ from . import util
def load(name, **overrides): def load(name, **overrides):
name = resolve_load_name(name, **overrides) name = resolve_load_name(name, **overrides)
return util.load_model(name) return util.load_model(name, **overrides)
def info(model=None, markdown=False): def info(model=None, markdown=False):

View File

@ -84,10 +84,11 @@ def ensure_path(path):
return path return path
def load_model(name): def load_model(name, **overrides):
"""Load a model from a shortcut link, package or data path. """Load a model from a shortcut link, package or data path.
name (unicode): Package name, shortcut link or model path. name (unicode): Package name, shortcut link or model path.
**overrides: Specific overrides, like pipeline components to disable.
RETURNS (Language): `Language` class with the loaded model. RETURNS (Language): `Language` class with the loaded model.
""" """
data_path = get_data_path() data_path = get_data_path()
@ -95,73 +96,63 @@ def load_model(name):
raise IOError("Can't find spaCy data path: %s" % path2str(data_path)) raise IOError("Can't find spaCy data path: %s" % path2str(data_path))
if isinstance(name, basestring_): if isinstance(name, basestring_):
if (data_path / name).exists(): # in data dir or shortcut if (data_path / name).exists(): # in data dir or shortcut
return load_model_from_path(data_path / name) spec = importlib.util.spec_from_file_location('model', data_path / name)
cls = importlib.util.module_from_spec(spec)
spec.loader.exec_module(cls)
return cls.load(**overrides)
if is_package(name): # installed as package if is_package(name): # installed as package
return load_model_from_pkg(name) cls = importlib.import_module(name)
return cls.load(**overrides)
if Path(name).exists(): # path to model data directory if Path(name).exists(): # path to model data directory
return load_data_from_path(Path(name)) model_path = Path(name)
meta = get_package_meta(model_path)
cls = get_lang_class(meta['lang'])
nlp = cls(pipeline=meta.get('pipeline', True))
return nlp.from_disk(model_path, **overrides)
elif hasattr(name, 'exists'): # Path or Path-like to model data elif hasattr(name, 'exists'): # Path or Path-like to model data
return load_data_from_path(name) meta = get_package_meta(name)
cls = get_lang_class(meta['lang'])
nlp = cls(pipeline=meta.get('pipeline', True))
return nlp.from_disk(name, **overrides)
raise IOError("Can't find model '%s'" % name) raise IOError("Can't find model '%s'" % name)
def load_model_from_init_py(init_file): def load_model_from_init_py(init_file, **overrides):
"""Helper function to use in the `load()` method of a model package's """Helper function to use in the `load()` method of a model package's
__init__.py. __init__.py.
init_file (unicode): Path to model's __init__.py, i.e. `__file__`. init_file (unicode): Path to model's __init__.py, i.e. `__file__`.
**overrides: Specific overrides, like pipeline components to disable.
RETURNS (Language): `Language` class with loaded model. RETURNS (Language): `Language` class with loaded model.
""" """
model_path = Path(init_file).parent model_path = Path(init_file).parent
return load_data_from_path(model_path, package=True) meta = get_model_meta(model_path)
data_dir = '%s_%s-%s' % (meta['lang'], meta['name'], meta['version'])
data_path = model_path / data_dir
if not model_path.exists():
raise ValueError("Can't find model directory: %s" % path2str(data_path))
cls = get_lang_class(meta['lang'])
nlp = cls(pipeline=meta.get('pipeline', True))
return nlp.from_disk(data_path, **overrides)
def load_model_from_path(model_path): def get_model_meta(path):
"""Import and load a model package from its file path. """Get model meta.json from a directory path and validate its contents.
path (unicode or Path): Path to package directory. path (unicode or Path): Path to model directory.
RETURNS (Language): `Language` class with loaded model. RETURNS (dict): The model's meta data.
""" """
model_path = ensure_path(model_path) model_path = ensure_path(path)
spec = importlib.util.spec_from_file_location('model', model_path) if not model_path.exists():
module = importlib.util.module_from_spec(spec) raise ValueError("Can't find model directory: %s" % path2str(model_path))
spec.loader.exec_module(module)
return module.load()
def load_model_from_pkg(name):
"""Import and load a model package.
name (unicode): Name of model package installed via pip.
RETURNS (Language): `Language` class with loaded model.
"""
module = importlib.import_module(name)
return module.load()
def load_data_from_path(model_path, package=False):
"""Initialie a `Language` class with a loaded model from a model data path.
model_path (unicode or Path): Path to model data directory.
package (bool): Does the path point to the parent package directory?
RETURNS (Language): `Language` class with loaded model.
"""
model_path = ensure_path(model_path)
meta_path = model_path / 'meta.json' meta_path = model_path / 'meta.json'
if not meta_path.is_file(): if not meta_path.is_file():
raise IOError("Could not read meta.json from %s" % location) raise IOError("Could not read meta.json from %s" % meta_path)
meta = read_json(location) meta = read_json(meta_path)
for setting in ['lang', 'name', 'version']: for setting in ['lang', 'name', 'version']:
if setting not in meta: if setting not in meta:
raise IOError('No %s setting found in model meta.json' % setting) raise IOError('No %s setting found in model meta.json' % setting)
if package: return meta
model_data_path = '%s_%s-%s' % (meta['lang'], meta['name'], meta['version'])
model_path = model_path / model_data_path
if not model_path.exists():
raise ValueError("Can't find model directory: %s" % path2str(model_path))
cls = get_lang_class(meta['lang'])
nlp = cls(pipeline=meta.get('pipeline', True))
return nlp.from_disk(model_path)
def is_package(name): def is_package(name):

View File

@ -87,7 +87,7 @@ p
+aside-code("Example"). +aside-code("Example").
nlp = util.load_model('en') nlp = util.load_model('en')
nlp = util.load_model('en_core_web_sm') nlp = util.load_model('en_core_web_sm', disable=['ner'])
nlp = util.load_model('/path/to/data') nlp = util.load_model('/path/to/data')
+table(["Name", "Type", "Description"]) +table(["Name", "Type", "Description"])
@ -96,6 +96,11 @@ p
+cell unicode +cell unicode
+cell Package name, shortcut link or model path. +cell Package name, shortcut link or model path.
+row
+cell #[code **overrides]
+cell -
+cell Specific overrides, like pipeline components to disable.
+footrow +footrow
+cell returns +cell returns
+cell #[code Language] +cell #[code Language]
@ -112,8 +117,8 @@ p
+aside-code("Example"). +aside-code("Example").
from spacy.util import load_model_from_init_py from spacy.util import load_model_from_init_py
def load(): def load(**overrides):
return load_model_from_init_py(__file__) return load_model_from_init_py(__file__, **overrides)
+table(["Name", "Type", "Description"]) +table(["Name", "Type", "Description"])
+row +row
@ -121,11 +126,37 @@ p
+cell unicode +cell unicode
+cell Path to model's __init__.py, i.e. #[code __file__]. +cell Path to model's __init__.py, i.e. #[code __file__].
+row
+cell #[code **overrides]
+cell -
+cell Specific overrides, like pipeline components to disable.
+footrow +footrow
+cell returns +cell returns
+cell #[code Language] +cell #[code Language]
+cell #[code Language] class with the loaded model. +cell #[code Language] class with the loaded model.
+h(2, "get_model_meta") util.get_model_meta
+tag function
+tag-new(2)
p
| Get a model's meta.json from a directory path and validate its contents.
+aside-code("Example").
meta = util.get_model_meta('/path/to/model')
+table(["Name", "Type", "Description"])
+row
+cell #[code path]
+cell unicode or #[code Path]
+cell Path to model directory.
+footrow
+cell returns
+cell dict
+cell The model's meta data.
+h(2, "is_package") util.is_package +h(2, "is_package") util.is_package
+tag function +tag function