2017-03-20 21:50:13 +00:00
|
|
|
|
# coding: utf8
|
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
2017-05-22 10:28:58 +00:00
|
|
|
|
import plac
|
2017-03-21 01:06:29 +00:00
|
|
|
|
import shutil
|
2017-03-20 21:50:13 +00:00
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
2018-04-03 13:50:31 +00:00
|
|
|
|
from ._messages import Messages
|
2017-05-07 21:25:29 +00:00
|
|
|
|
from ..compat import path2str, json_dumps
|
|
|
|
|
from ..util import prints
|
2017-03-20 21:50:13 +00:00
|
|
|
|
from .. import util
|
2017-05-07 21:25:29 +00:00
|
|
|
|
from .. import about
|
2017-03-20 21:50:13 +00:00
|
|
|
|
|
|
|
|
|
|
2017-05-22 10:28:58 +00:00
|
|
|
|
@plac.annotations(
|
|
|
|
|
input_dir=("directory with model data", "positional", None, str),
|
|
|
|
|
output_dir=("output parent directory", "positional", None, str),
|
2017-08-12 19:44:15 +00:00
|
|
|
|
meta_path=("path to meta.json", "option", "m", str),
|
2017-10-30 17:39:38 +00:00
|
|
|
|
create_meta=("create meta.json, even if one exists in directory – if "
|
|
|
|
|
"existing meta is found, entries are shown as defaults in "
|
|
|
|
|
"the command line prompt", "flag", "c", bool),
|
|
|
|
|
force=("force overwriting of existing model directory in output directory",
|
|
|
|
|
"flag", "f", bool))
|
2018-01-04 20:33:47 +00:00
|
|
|
|
def package(input_dir, output_dir, meta_path=None, create_meta=False,
|
2017-10-27 12:38:39 +00:00
|
|
|
|
force=False):
|
2017-05-27 18:01:46 +00:00
|
|
|
|
"""
|
|
|
|
|
Generate Python package for model data, including meta and required
|
2017-05-22 10:28:58 +00:00
|
|
|
|
installation files. A new directory will be created in the specified
|
|
|
|
|
output directory, and model data will be copied over.
|
|
|
|
|
"""
|
2017-05-07 21:25:29 +00:00
|
|
|
|
input_path = util.ensure_path(input_dir)
|
|
|
|
|
output_path = util.ensure_path(output_dir)
|
2017-08-12 19:44:15 +00:00
|
|
|
|
meta_path = util.ensure_path(meta_path)
|
2017-05-07 21:25:29 +00:00
|
|
|
|
if not input_path or not input_path.exists():
|
2018-04-03 13:50:31 +00:00
|
|
|
|
prints(input_path, title=Messages.M008, exits=1)
|
2017-05-07 21:25:29 +00:00
|
|
|
|
if not output_path or not output_path.exists():
|
2018-04-03 13:50:31 +00:00
|
|
|
|
prints(output_path, title=Messages.M040, exits=1)
|
2017-05-07 21:25:29 +00:00
|
|
|
|
if meta_path and not meta_path.exists():
|
2018-04-03 13:50:31 +00:00
|
|
|
|
prints(meta_path, title=Messages.M020, exits=1)
|
2017-03-20 21:50:13 +00:00
|
|
|
|
|
2017-04-16 11:06:02 +00:00
|
|
|
|
meta_path = meta_path or input_path / 'meta.json'
|
2017-10-30 17:39:38 +00:00
|
|
|
|
if meta_path.is_file():
|
2017-04-16 11:06:02 +00:00
|
|
|
|
meta = util.read_json(meta_path)
|
2017-10-30 17:39:38 +00:00
|
|
|
|
if not create_meta: # only print this if user doesn't want to overwrite
|
2018-04-03 13:50:31 +00:00
|
|
|
|
prints(meta_path, title=Messages.M041)
|
2017-10-30 17:39:38 +00:00
|
|
|
|
else:
|
|
|
|
|
meta = generate_meta(input_dir, meta)
|
2017-05-27 18:02:01 +00:00
|
|
|
|
meta = validate_meta(meta, ['lang', 'name', 'version'])
|
2017-03-20 21:50:13 +00:00
|
|
|
|
model_name = meta['lang'] + '_' + meta['name']
|
|
|
|
|
model_name_v = model_name + '-' + meta['version']
|
|
|
|
|
main_path = output_path / model_name_v
|
|
|
|
|
package_path = main_path / model_name
|
|
|
|
|
|
2017-03-21 01:06:53 +00:00
|
|
|
|
create_dirs(package_path, force)
|
2017-10-27 12:38:39 +00:00
|
|
|
|
shutil.copytree(path2str(input_path),
|
|
|
|
|
path2str(package_path / model_name_v))
|
2017-04-13 21:30:47 +00:00
|
|
|
|
create_file(main_path / 'meta.json', json_dumps(meta))
|
2017-11-07 11:15:35 +00:00
|
|
|
|
create_file(main_path / 'setup.py', TEMPLATE_SETUP)
|
|
|
|
|
create_file(main_path / 'MANIFEST.in', TEMPLATE_MANIFEST)
|
|
|
|
|
create_file(package_path / '__init__.py', TEMPLATE_INIT)
|
2018-04-03 13:50:31 +00:00
|
|
|
|
prints(main_path, Messages.M043,
|
|
|
|
|
title=Messages.M042.format(name=model_name_v))
|
2017-03-20 21:50:13 +00:00
|
|
|
|
|
|
|
|
|
|
2017-03-21 01:06:53 +00:00
|
|
|
|
def create_dirs(package_path, force):
|
|
|
|
|
if package_path.exists():
|
|
|
|
|
if force:
|
2017-05-07 21:25:29 +00:00
|
|
|
|
shutil.rmtree(path2str(package_path))
|
2017-03-21 01:06:53 +00:00
|
|
|
|
else:
|
2018-04-03 13:50:31 +00:00
|
|
|
|
prints(package_path, Messages.M045, title=Messages.M044, exits=1)
|
2017-03-21 01:06:53 +00:00
|
|
|
|
Path.mkdir(package_path, parents=True)
|
|
|
|
|
|
|
|
|
|
|
2017-03-20 21:50:13 +00:00
|
|
|
|
def create_file(file_path, contents):
|
|
|
|
|
file_path.touch()
|
2017-03-29 07:11:02 +00:00
|
|
|
|
file_path.open('w', encoding='utf-8').write(contents)
|
2017-03-20 21:50:13 +00:00
|
|
|
|
|
|
|
|
|
|
2017-10-30 17:39:38 +00:00
|
|
|
|
def generate_meta(model_path, existing_meta):
|
|
|
|
|
meta = existing_meta or {}
|
|
|
|
|
settings = [('lang', 'Model language', meta.get('lang', 'en')),
|
|
|
|
|
('name', 'Model name', meta.get('name', 'model')),
|
|
|
|
|
('version', 'Model version', meta.get('version', '0.0.0')),
|
2017-10-27 12:38:39 +00:00
|
|
|
|
('spacy_version', 'Required spaCy version',
|
|
|
|
|
'>=%s,<3.0.0' % about.__version__),
|
2017-10-30 17:39:38 +00:00
|
|
|
|
('description', 'Model description',
|
|
|
|
|
meta.get('description', False)),
|
|
|
|
|
('author', 'Author', meta.get('author', False)),
|
|
|
|
|
('email', 'Author email', meta.get('email', False)),
|
|
|
|
|
('url', 'Author website', meta.get('url', False)),
|
|
|
|
|
('license', 'License', meta.get('license', 'CC BY-SA 3.0'))]
|
2017-10-25 14:03:26 +00:00
|
|
|
|
nlp = util.load_model_from_path(Path(model_path))
|
|
|
|
|
meta['pipeline'] = nlp.pipe_names
|
|
|
|
|
meta['vectors'] = {'width': nlp.vocab.vectors_length,
|
2017-11-01 00:25:09 +00:00
|
|
|
|
'vectors': len(nlp.vocab.vectors),
|
|
|
|
|
'keys': nlp.vocab.vectors.n_keys}
|
2018-04-10 17:14:31 +00:00
|
|
|
|
prints(Messages.M047, title=Messages.M046)
|
2017-03-20 21:50:13 +00:00
|
|
|
|
for setting, desc, default in settings:
|
|
|
|
|
response = util.get_raw_input(desc, default)
|
|
|
|
|
meta[setting] = default if response == '' and default else response
|
2017-06-05 18:11:02 +00:00
|
|
|
|
if about.__title__ != 'spacy':
|
|
|
|
|
meta['parent_package'] = about.__title__
|
2017-03-20 21:50:13 +00:00
|
|
|
|
return meta
|
|
|
|
|
|
|
|
|
|
|
2017-04-16 11:13:17 +00:00
|
|
|
|
def validate_meta(meta, keys):
|
|
|
|
|
for key in keys:
|
|
|
|
|
if key not in meta or meta[key] == '':
|
2018-04-03 13:50:31 +00:00
|
|
|
|
prints(Messages.M049, title=Messages.M048.format(key=key), exits=1)
|
2017-05-27 18:02:01 +00:00
|
|
|
|
return meta
|
2017-04-16 11:13:17 +00:00
|
|
|
|
|
|
|
|
|
|
2017-11-07 11:15:35 +00:00
|
|
|
|
TEMPLATE_SETUP = """
|
|
|
|
|
#!/usr/bin/env python
|
|
|
|
|
# coding: utf8
|
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
|
|
|
|
import io
|
|
|
|
|
import json
|
|
|
|
|
from os import path, walk
|
|
|
|
|
from shutil import copy
|
|
|
|
|
from setuptools import setup
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_meta(fp):
|
|
|
|
|
with io.open(fp, encoding='utf8') as f:
|
|
|
|
|
return json.load(f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def list_files(data_dir):
|
|
|
|
|
output = []
|
|
|
|
|
for root, _, filenames in walk(data_dir):
|
|
|
|
|
for filename in filenames:
|
|
|
|
|
if not filename.startswith('.'):
|
|
|
|
|
output.append(path.join(root, filename))
|
|
|
|
|
output = [path.relpath(p, path.dirname(data_dir)) for p in output]
|
|
|
|
|
output.append('meta.json')
|
|
|
|
|
return output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def list_requirements(meta):
|
|
|
|
|
parent_package = meta.get('parent_package', 'spacy')
|
2018-05-22 18:50:46 +00:00
|
|
|
|
requirements = [parent_package + meta['spacy_version']]
|
2017-11-07 11:15:35 +00:00
|
|
|
|
if 'setup_requires' in meta:
|
|
|
|
|
requirements += meta['setup_requires']
|
|
|
|
|
return requirements
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def setup_package():
|
|
|
|
|
root = path.abspath(path.dirname(__file__))
|
|
|
|
|
meta_path = path.join(root, 'meta.json')
|
|
|
|
|
meta = load_meta(meta_path)
|
|
|
|
|
model_name = str(meta['lang'] + '_' + meta['name'])
|
|
|
|
|
model_dir = path.join(model_name, model_name + '-' + meta['version'])
|
|
|
|
|
|
|
|
|
|
copy(meta_path, path.join(model_name))
|
|
|
|
|
copy(meta_path, model_dir)
|
|
|
|
|
|
|
|
|
|
setup(
|
|
|
|
|
name=model_name,
|
|
|
|
|
description=meta['description'],
|
|
|
|
|
author=meta['author'],
|
|
|
|
|
author_email=meta['email'],
|
|
|
|
|
url=meta['url'],
|
|
|
|
|
version=meta['version'],
|
|
|
|
|
license=meta['license'],
|
|
|
|
|
packages=[model_name],
|
|
|
|
|
package_data={model_name: list_files(model_dir)},
|
|
|
|
|
install_requires=list_requirements(meta),
|
|
|
|
|
zip_safe=False,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
setup_package()
|
|
|
|
|
""".strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TEMPLATE_MANIFEST = """
|
|
|
|
|
include meta.json
|
|
|
|
|
""".strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
TEMPLATE_INIT = """
|
|
|
|
|
# coding: utf8
|
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
from spacy.util import load_model_from_init_py, get_model_meta
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__version__ = get_model_meta(Path(__file__).parent)['version']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load(**overrides):
|
|
|
|
|
return load_model_from_init_py(__file__, **overrides)
|
|
|
|
|
""".strip()
|