Tidy up CLI and fix print functions

This commit is contained in:
ines 2017-05-07 23:25:29 +02:00
parent 311704674d
commit 59c3b9d4dd
11 changed files with 167 additions and 291 deletions

View File

@ -2,6 +2,7 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from . import util from . import util
from .util import prints
from .deprecated import resolve_model_name from .deprecated import resolve_model_name
from .cli.info import info from .cli.info import info
from .glossary import explain from .glossary import explain
@ -26,9 +27,8 @@ def load(name, **overrides):
if not model_path.exists(): if not model_path.exists():
lang_name = util.get_lang_class(name).lang lang_name = util.get_lang_class(name).lang
model_path = None model_path = None
util.print_msg( prints("Only loading the '%s' tokenizer." % lang_name,
"Only loading the '{}' tokenizer.".format(lang_name), title="Warning: no model found for '%s'" % name)
title="Warning: no model found for '{}'".format(name))
else: else:
model_path = util.ensure_path(overrides['path']) model_path = util.ensure_path(overrides['path'])
data_path = model_path.parent data_path = model_path.parent

View File

@ -4,7 +4,7 @@ from __future__ import unicode_literals
from pathlib import Path from pathlib import Path
from .converters import conllu2json from .converters import conllu2json
from .. import util from ..util import prints
# Converters are matched by file extension. To add a converter, add a new entry # Converters are matched by file extension. To add a converter, add a new entry
@ -19,17 +19,12 @@ CONVERTERS = {
def convert(input_file, output_dir, *args): def convert(input_file, output_dir, *args):
input_path = Path(input_file) input_path = Path(input_file)
output_path = Path(output_dir) output_path = Path(output_dir)
check_dirs(input_path, output_path) if not input_path.exists():
file_ext = input_path.suffix prints(input_path, title="Input file not found", exits=True)
if file_ext in CONVERTERS:
CONVERTERS[file_ext](input_path, output_path, *args)
else:
util.sys_exit("Can't find converter for {}".format(input_path.parts[-1]),
title="Unknown format")
def check_dirs(input_file, output_path):
if not input_file.exists():
util.sys_exit(input_file.as_posix(), title="Input file not found")
if not output_path.exists(): if not output_path.exists():
util.sys_exit(output_path.as_posix(), title="Output directory not found") prints(output_path, title="Output directory not found", exits=True)
file_ext = input_path.suffix
if not file_ext in CONVERTERS:
prints("Can't find converter for %s" % input_path.parts[-1],
title="Unknown format", exits=True)
CONVERTERS[file_ext](input_path, output_path, *args)

View File

@ -1,9 +1,8 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
import json from ...compat import json_dumps, path2str
from ...compat import json_dumps from ...util import prints
from ... import util
def conllu2json(input_path, output_path, n_sents=10, use_morphology=False): def conllu2json(input_path, output_path, n_sents=10, use_morphology=False):
@ -32,8 +31,8 @@ def conllu2json(input_path, output_path, n_sents=10, use_morphology=False):
output_file = output_path / output_filename output_file = output_path / output_filename
with output_file.open('w', encoding='utf-8') as f: with output_file.open('w', encoding='utf-8') as f:
f.write(json_dumps(docs)) f.write(json_dumps(docs))
util.print_msg("Created {} documents".format(len(docs)), prints("Created %d documents" % len(docs),
title="Generated output file {}".format(output_file)) title="Generated output file %s" % path2str(output_file))
def read_conllx(input_path, use_morphology=False, n=0): def read_conllx(input_path, use_morphology=False, n=0):

View File

@ -6,78 +6,52 @@ import os
import subprocess import subprocess
import sys import sys
from .link import link_package from .link import link
from ..util import prints
from .. import about from .. import about
from .. import util
def download(model=None, direct=False): def download(model, direct=False):
check_error_depr(model)
if direct: if direct:
download_model('{m}/{m}.tar.gz'.format(m=model)) download_model('{m}/{m}.tar.gz'.format(m=model))
else: else:
model_name = check_shortcut(model) shortcuts = get_json(about.__shortcuts__, "available shortcuts")
model_name = shortcuts.get(model, model)
compatibility = get_compatibility() compatibility = get_compatibility()
version = get_version(model_name, compatibility) version = get_version(model_name, compatibility)
download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version)) download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version))
link_package(model_name, model, force=True) link(model_name, model, force=True)
def get_json(url, desc): def get_json(url, desc):
r = requests.get(url) r = requests.get(url)
if r.status_code != 200: if r.status_code != 200:
util.sys_exit( prints("Couldn't fetch %s. Please find a model for your spaCy installation "
"Couldn't fetch {d}. Please find the right model for your spaCy " "(v%s), and download it manually." % (desc, about.__version__),
"installation (v{v}), and download it manually:".format(d=desc, v=about.__version__), about.__docs__, title="Server error (%d)" % r.status_code, exits=True)
"python -m spacy.download [full model name + version] --direct",
title="Server error ({c})".format(c=r.status_code))
return r.json() return r.json()
def check_shortcut(model):
shortcuts = get_json(about.__shortcuts__, "available shortcuts")
return shortcuts.get(model, model)
def get_compatibility(): def get_compatibility():
version = about.__version__ version = about.__version__
comp_table = get_json(about.__compatibility__, "compatibility table") comp_table = get_json(about.__compatibility__, "compatibility table")
comp = comp_table['spacy'] comp = comp_table['spacy']
if version not in comp: if version not in comp:
util.sys_exit( prints("No compatible models found for v%s of spaCy." % version,
"No compatible models found for v{v} of spaCy.".format(v=version), title="Compatibility error", exits=True)
title="Compatibility error")
return comp[version] return comp[version]
def get_version(model, comp): def get_version(model, comp):
if model not in comp: if model not in comp:
util.sys_exit( version = about.__version__
"No compatible model found for " prints("No compatible model found for '%s' (spaCy v%s)." % (model, version),
"'{m}' (spaCy v{v}).".format(m=model, v=about.__version__), title="Compatibility error", exits=True)
title="Compatibility error")
return comp[model][0] return comp[model][0]
def download_model(filename): def download_model(filename):
util.print_msg("Downloading {f}".format(f=filename))
download_url = about.__download_url__ + '/' + filename download_url = about.__download_url__ + '/' + filename
subprocess.call([sys.executable, '-m', subprocess.call([sys.executable, '-m',
'pip', 'install', '--no-cache-dir', download_url], 'pip', 'install', '--no-cache-dir', download_url],
env=os.environ.copy()) env=os.environ.copy())
def check_error_depr(model):
if not model:
util.sys_exit(
"python -m spacy.download [name or shortcut]",
title="Missing model name or shortcut")
if model == 'all':
util.sys_exit(
"As of v1.7.0, the download all command is deprecated. Please "
"download the models individually via spacy.download [model name] "
"or pip install. For more info on this, see the documentation: "
"{d}".format(d=about.__docs__),
title="Deprecated command")

View File

@ -4,49 +4,46 @@ from __future__ import unicode_literals
import platform import platform
from pathlib import Path from pathlib import Path
from ..compat import unicode_ from ..compat import path2str
from .. import about from .. import about
from .. import util from .. import util
def info(model=None, markdown=False): def info(model=None, markdown=False):
if model: if model:
data = util.parse_package_meta(util.get_data_path(), model, require=True) data_path = util.get_data_path()
model_path = Path(__file__).parent / util.get_data_path() / model data = util.parse_package_meta(data_path, model, require=True)
model_path = Path(__file__).parent / data_path / model
if model_path.resolve() != model_path: if model_path.resolve() != model_path:
data['link'] = unicode_(model_path) data['link'] = path2str(model_path)
data['source'] = unicode_(model_path.resolve()) data['source'] = path2str(model_path.resolve())
else: else:
data['source'] = unicode_(model_path) data['source'] = path2str(model_path)
print_info(data, "model " + model, markdown) print_info(data, 'model %s' % model, markdown)
else: else:
data = get_spacy_data() data = {'spaCy version': about.__version__,
print_info(data, "spaCy", markdown) 'Location': path2str(Path(__file__).parent.parent),
'Platform': platform.platform(),
'Python version': platform.python_version(),
'Models': list_models()}
print_info(data, 'spaCy', markdown)
def print_info(data, title, markdown): def print_info(data, title, markdown):
title = "Info about {title}".format(title=title) title = 'Info about %s' % title
if markdown: if markdown:
util.print_markdown(data, title=title) util.print_markdown(data, title=title)
else: else:
util.print_table(data, title=title) util.print_table(data, title=title)
def get_spacy_data():
return {
'spaCy version': about.__version__,
'Location': unicode_(Path(__file__).parent.parent),
'Platform': platform.platform(),
'Python version': platform.python_version(),
'Installed models': ', '.join(list_models())
}
def list_models(): def list_models():
# exclude common cache directories this means models called "cache" etc. def exclude_dir(dir_name):
# won't show up in list, but it seems worth it # exclude common cache directories and hidden directories
exclude = ['cache', 'pycache', '__pycache__'] exclude = ['cache', 'pycache', '__pycache__']
return dir_name in exclude or dir_name.startswith('.')
data_path = util.get_data_path() data_path = util.get_data_path()
if data_path: if data_path:
models = [f.parts[-1] for f in data_path.iterdir() if f.is_dir()] models = [f.parts[-1] for f in data_path.iterdir() if f.is_dir()]
return [m for m in models if m not in exclude] return ', '.join([m for m in models if not exclude_dir(m)])
return '-'

View File

@ -1,78 +1,37 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
import pip
from pathlib import Path from pathlib import Path
import importlib from ..compat import symlink_to, path2str
from ..compat import unicode_, symlink_to from ..util import prints
from .. import util from .. import util
def link(origin, link_name, force=False): def link(origin, link_name, force=False):
if is_package(origin): if util.is_package(origin):
link_package(origin, link_name, force) model_path = util.get_model_package_path(origin)
else: else:
symlink(origin, link_name, force) model_path = Path(origin)
def link_package(package_name, link_name, force=False):
# Here we're importing the module just to find it. This is worryingly
# indirect, but it's otherwise very difficult to find the package.
# Python's installation and import rules are very complicated.
pkg = importlib.import_module(package_name)
package_path = Path(pkg.__file__).parent.parent
meta = get_meta(package_path, package_name)
model_name = package_name + '-' + meta['version']
model_path = package_path / package_name / model_name
symlink(model_path, link_name, force)
def symlink(model_path, link_name, force):
model_path = Path(model_path)
if not model_path.exists(): if not model_path.exists():
util.sys_exit( prints("The data should be located in %s" % path2str(model_path),
"The data should be located in {p}".format(p=model_path), title="Can't locate model data", exits=True)
title="Can't locate model data")
link_path = util.get_data_path() / link_name link_path = util.get_data_path() / link_name
if link_path.exists() and not force: if link_path.exists() and not force:
util.sys_exit( prints("To overwrite an existing link, use the --force flag.",
"To overwrite an existing link, use the --force flag.", title="Link %s already exists" % link_name, exits=True)
title="Link {l} already exists".format(l=link_name))
elif link_path.exists(): elif link_path.exists():
link_path.unlink() link_path.unlink()
try: try:
symlink_to(link_path, model_path) symlink_to(link_path, model_path)
except: except:
# This is quite dirty, but just making sure other errors are caught so # This is quite dirty, but just making sure other errors are caught.
# users at least see a proper message. prints("Creating a symlink in spacy/data failed. Make sure you have "
util.print_msg( "the required permissions and try re-running the command as "
"Creating a symlink in spacy/data failed. Make sure you have the " "admin, or use a virtualenv. You can still import the model as a "
"required permissions and try re-running the command as admin, or " "module and call its load() method, or create the symlink manually.",
"use a virtualenv to install spaCy in a user directory, instead of " "%s --> %s" % (path2str(model_path), path2str(link_path)),
"doing a system installation.", title="Error: Couldn't link model to '%s'" % link_name)
"You can still import the model as a Python package and call its "
"load() method, or create the symlink manually:",
"{a} --> {b}".format(a=unicode_(model_path), b=unicode_(link_path)),
title="Error: Couldn't link model to '{l}'".format(l=link_name))
raise raise
prints("%s --> %s" % (path2str(model_path), path2str(link_path)),
util.print_msg( "You can now load the model via spacy.load('%s')." % link_name,
"{a} --> {b}".format(a=model_path.as_posix(), b=link_path.as_posix()), title="Linking successful")
"You can now load the model via spacy.load('{l}').".format(l=link_name),
title="Linking successful")
def get_meta(package_path, package):
meta = util.parse_package_meta(package_path, package)
return meta
def is_package(origin):
packages = pip.get_installed_distributions()
for package in packages:
if package.project_name.replace('-', '_') == origin:
return True
return False

View File

@ -4,21 +4,25 @@ from __future__ import unicode_literals
import gzip import gzip
import math import math
from ast import literal_eval from ast import literal_eval
from pathlib import Path
from preshed.counter import PreshCounter from preshed.counter import PreshCounter
from ..vocab import write_binary_vectors from ..vocab import write_binary_vectors
from ..compat import fix_text from ..compat import fix_text, path2str
from ..util import prints
from .. import util from .. import util
def model(lang, model_dir, freqs_data, clusters_data, vectors_data): def model(lang, model_dir, freqs_data, clusters_data, vectors_data):
model_path = Path(model_dir) model_path = util.ensure_path(model_dir)
freqs_path = Path(freqs_data) freqs_path = util.ensure_path(freqs_data)
clusters_path = Path(clusters_data) if clusters_data else None clusters_path = util.ensure_path(clusters_data)
vectors_path = Path(vectors_data) if vectors_data else None vectors_path = util.ensure_path(vectors_data)
if not freqs_path.is_file():
check_dirs(freqs_path, clusters_path, vectors_path) prints(freqs_path, title="No frequencies file found", exits=True)
if clusters_path and not clusters_path.is_file():
prints(clusters_path, title="No Brown clusters file found", exits=True)
if vectors_path and not vectors_path.is_file():
prints(vectors_path, title="No word vectors file found", exits=True)
vocab = util.get_lang_class(lang).Defaults.create_vocab() vocab = util.get_lang_class(lang).Defaults.create_vocab()
probs, oov_prob = read_probs(freqs_path) probs, oov_prob = read_probs(freqs_path)
clusters = read_clusters(clusters_path) if clusters_path else {} clusters = read_clusters(clusters_path) if clusters_path else {}
@ -36,14 +40,14 @@ def create_model(model_path, vectors_path, vocab, oov_prob):
model_path.mkdir() model_path.mkdir()
if not vocab_path.exists(): if not vocab_path.exists():
vocab_path.mkdir() vocab_path.mkdir()
vocab.dump(lexemes_path.as_posix()) vocab.dump(path2str(lexemes_path))
with strings_path.open('w') as f: with strings_path.open('w') as f:
vocab.strings.dump(f) vocab.strings.dump(f)
with oov_path.open('w') as f: with oov_path.open('w') as f:
f.write('%f' % oov_prob) f.write('%f' % oov_prob)
if vectors_path: if vectors_path:
vectors_dest = vocab_path / 'vec.bin' vectors_dest = vocab_path / 'vec.bin'
write_binary_vectors(vectors_path.as_posix(), vectors_dest.as_posix()) write_binary_vectors(path2str(vectors_path), path2str(vectors_dest))
def read_probs(freqs_path, max_length=100, min_doc_freq=5, min_freq=200): def read_probs(freqs_path, max_length=100, min_doc_freq=5, min_freq=200):
@ -115,17 +119,8 @@ def populate_vocab(vocab, clusters, probs, oov_prob):
def check_unzip(file_path): def check_unzip(file_path):
file_path_str = file_path.as_posix() file_path_str = path2str(file_path)
if file_path_str.endswith('gz'): if file_path_str.endswith('gz'):
return gzip.open(file_path_str) return gzip.open(file_path_str)
else: else:
return file_path.open() return file_path.open()
def check_dirs(freqs_data, clusters_data, vectors_data):
if not freqs_data.is_file():
util.sys_exit(freqs_data.as_posix(), title="No frequencies file found")
if clusters_data and not clusters_data.is_file():
util.sys_exit(clusters_data.as_posix(), title="No Brown clusters file found")
if vectors_data and not vectors_data.is_file():
util.sys_exit(vectors_data.as_posix(), title="No word vectors file found")

View File

@ -5,64 +5,57 @@ import shutil
import requests import requests
from pathlib import Path from pathlib import Path
from ..compat import unicode_, json_dumps from ..compat import path2str, json_dumps
from ..util import prints
from .. import util from .. import util
from .. import about
def package(input_dir, output_dir, meta_path, force): def package(input_dir, output_dir, meta_path, force):
input_path = Path(input_dir) input_path = util.ensure_path(input_dir)
output_path = Path(output_dir) output_path = util.ensure_path(output_dir)
meta_path = util.ensure_path(meta_path) meta_path = util.ensure_path(meta_path)
check_dirs(input_path, output_path, meta_path) if not input_path or not input_path.exists():
prints(input_path, title="Model directory not found", exits=True)
if not output_path or not output_path.exists():
prints(output_path, title="Output directory not found", exits=True)
if meta_path and not meta_path.exists():
prints(meta_path, title="meta.json not found", exits=True)
template_setup = get_template('setup.py') template_setup = get_template('setup.py')
template_manifest = get_template('MANIFEST.in') template_manifest = get_template('MANIFEST.in')
template_init = get_template('en_model_name/__init__.py') template_init = get_template('en_model_name/__init__.py')
meta_path = meta_path or input_path / 'meta.json' meta_path = meta_path or input_path / 'meta.json'
if meta_path.is_file(): if meta_path.is_file():
util.print_msg(unicode_(meta_path), title="Reading meta.json from file") prints(meta_path, title="Reading meta.json from file")
meta = util.read_json(meta_path) meta = util.read_json(meta_path)
else: else:
meta = generate_meta() meta = generate_meta()
validate_meta(meta, ['lang', 'name', 'version']) validate_meta(meta, ['lang', 'name', 'version'])
model_name = meta['lang'] + '_' + meta['name'] model_name = meta['lang'] + '_' + meta['name']
model_name_v = model_name + '-' + meta['version'] model_name_v = model_name + '-' + meta['version']
main_path = output_path / model_name_v main_path = output_path / model_name_v
package_path = main_path / model_name package_path = main_path / model_name
create_dirs(package_path, force) create_dirs(package_path, force)
shutil.copytree(unicode_(input_path), unicode_(package_path / model_name_v)) shutil.copytree(path2str(input_path), path2str(package_path / model_name_v))
create_file(main_path / 'meta.json', json_dumps(meta)) create_file(main_path / 'meta.json', json_dumps(meta))
create_file(main_path / 'setup.py', template_setup) create_file(main_path / 'setup.py', template_setup)
create_file(main_path / 'MANIFEST.in', template_manifest) create_file(main_path / 'MANIFEST.in', template_manifest)
create_file(package_path / '__init__.py', template_init) create_file(package_path / '__init__.py', template_init)
prints(main_path, "To build the package, run `python setup.py sdist` in this "
util.print_msg( "directory.", title="Successfully created package '%s'" % model_name_v)
unicode_(main_path),
"To build the package, run `python setup.py sdist` in that directory.",
title="Successfully created package {p}".format(p=model_name_v))
def check_dirs(input_path, output_path, meta_path):
if not input_path.exists():
util.sys_exit(unicode_(input_path.as_poisx), title="Model directory not found")
if not output_path.exists():
util.sys_exit(unicode_(output_path), title="Output directory not found")
if meta_path and not meta_path.exists():
util.sys_exit(unicode_(meta_path), title="meta.json not found")
def create_dirs(package_path, force): def create_dirs(package_path, force):
if package_path.exists(): if package_path.exists():
if force: if force:
shutil.rmtree(unicode_(package_path)) shutil.rmtree(path2str(package_path))
else: else:
util.sys_exit(unicode_(package_path), prints(package_path, "Please delete the directory and try again, or "
"Please delete the directory and try again, or use the --force " "use the --force flag to overwrite existing directories.",
"flag to overwrite existing directories.", title="Package directory already exists", exits=True)
title="Package directory already exists")
Path.mkdir(package_path, parents=True) Path.mkdir(package_path, parents=True)
@ -75,15 +68,14 @@ def generate_meta():
settings = [('lang', 'Model language', 'en'), settings = [('lang', 'Model language', 'en'),
('name', 'Model name', 'model'), ('name', 'Model name', 'model'),
('version', 'Model version', '0.0.0'), ('version', 'Model version', '0.0.0'),
('spacy_version', 'Required spaCy version', '>=1.7.0,<2.0.0'), ('spacy_version', 'Required spaCy version', '>=2.0.0,<3.0.0'),
('description', 'Model description', False), ('description', 'Model description', False),
('author', 'Author', False), ('author', 'Author', False),
('email', 'Author email', False), ('email', 'Author email', False),
('url', 'Author website', False), ('url', 'Author website', False),
('license', 'License', 'CC BY-NC 3.0')] ('license', 'License', 'CC BY-NC 3.0')]
util.print_msg("Enter the package settings for your model.", title="Generating meta.json") prints("Enter the package settings for your model.", title="Generating meta.json")
meta = {} meta = {}
for setting, desc, default in settings: for setting, desc, default in settings:
response = util.get_raw_input(desc, default) response = util.get_raw_input(desc, default)
@ -94,16 +86,13 @@ def generate_meta():
def validate_meta(meta, keys): def validate_meta(meta, keys):
for key in keys: for key in keys:
if key not in meta or meta[key] == '': if key not in meta or meta[key] == '':
util.sys_exit( prints("This setting is required to build your package.",
"This setting is required to build your package.", title='No "%s" setting found in meta.json' % key, exits=True)
title='No "{k}" setting found in meta.json'.format(k=key))
def get_template(filepath): def get_template(filepath):
url = 'https://raw.githubusercontent.com/explosion/spacy-dev-resources/master/templates/model/' r = requests.get(about.__model_files__ + filepath)
r = requests.get(url + filepath)
if r.status_code != 200: if r.status_code != 200:
util.sys_exit( prints("Couldn't fetch template files from GitHub.",
"Couldn't fetch template files from GitHub.", title="Server error (%d)" % r.status_code, exits=True)
title="Server error ({c})".format(c=r.status_code))
return r.text return r.text

View File

@ -4,19 +4,24 @@ from __future__ import unicode_literals, division, print_function
import json import json
from collections import defaultdict from collections import defaultdict
from ..util import ensure_path
from ..scorer import Scorer from ..scorer import Scorer
from ..gold import GoldParse, merge_sents from ..gold import GoldParse, merge_sents
from ..gold import read_json_file as read_gold_json from ..gold import read_json_file as read_gold_json
from ..util import prints
from .. import util from .. import util
def train(language, output_dir, train_data, dev_data, n_iter, tagger, parser, ner, def train(language, output_dir, train_data, dev_data, n_iter, tagger, parser, ner,
parser_L1): parser_L1):
output_path = ensure_path(output_dir) output_path = util.ensure_path(output_dir)
train_path = ensure_path(train_data) train_path = util.ensure_path(train_data)
dev_path = ensure_path(dev_data) dev_path = util.ensure_path(dev_data)
check_dirs(output_path, train_path, dev_path) if not output_path.exists():
prints(output_path, title="Output directory not found", exits=True)
if not train_path.exists():
prints(train_path, title="Training data not found", exits=True)
if dev_path and not dev_path.exists():
prints(dev_path, title="Development data not found", exits=True)
lang = util.get_lang_class(language) lang = util.get_lang_class(language)
parser_cfg = { parser_cfg = {
@ -44,14 +49,13 @@ def train(language, output_dir, train_data, dev_data, n_iter, tagger, parser, ne
def train_config(config): def train_config(config):
config_path = ensure_path(config) config_path = util.ensure_path(config)
if not config_path.is_file(): if not config_path.is_file():
util.sys_exit(config_path.as_posix(), title="Config file not found") prints(config_path, title="Config file not found", exits=True)
config = json.load(config_path) config = json.load(config_path)
for setting in []: for setting in []:
if setting not in config.keys(): if setting not in config.keys():
util.sys_exit("{s} not found in config file.".format(s=setting), prints("%s not found in config file." % setting, title="Missing setting")
title="Missing setting")
def train_model(Language, train_data, dev_data, output_path, tagger_cfg, parser_cfg, def train_model(Language, train_data, dev_data, output_path, tagger_cfg, parser_cfg,
@ -88,16 +92,8 @@ def evaluate(Language, gold_tuples, output_path):
return scorer return scorer
def check_dirs(output_path, train_path, dev_path):
if not output_path.exists():
util.sys_exit(output_path.as_posix(), title="Output directory not found")
if not train_path.exists():
util.sys_exit(train_path.as_posix(), title="Training data not found")
if dev_path and not dev_path.exists():
util.sys_exit(dev_path.as_posix(), title="Development data not found")
def print_progress(itn, nr_weight, nr_active_feat, **scores): def print_progress(itn, nr_weight, nr_active_feat, **scores):
# TODO: Fix!
tpl = '{:d}\t{:d}\t{:d}\t{uas:.3f}\t{ents_f:.3f}\t{tags_acc:.3f}\t{token_acc:.3f}' tpl = '{:d}\t{:d}\t{:d}\t{uas:.3f}\t{ents_f:.3f}\t{tags_acc:.3f}\t{token_acc:.3f}'
print(tpl.format(itn, nr_weight, nr_active_feat, **scores)) print(tpl.format(itn, nr_weight, nr_active_feat, **scores))

View File

@ -5,6 +5,8 @@ from pathlib import Path
from . import about from . import about
from . import util from . import util
from .util import prints
from .compat import path2str
from .cli import download from .cli import download
from .cli import link from .cli import link
@ -114,9 +116,9 @@ def resolve_model_name(name):
""" """
if name == 'en' or name == 'de': if name == 'en' or name == 'de':
versions = ['1.0.0', '1.1.0'] versions = ['1.0.0', '1.1.0']
data_path = Path(util.get_data_path()) data_path = util.get_data_path()
model_path = data_path / name model_path = data_path / name
v_model_paths = [data_path / Path(name + '-' + v) for v in versions] v_model_paths = [data_path / '%s-%s' % (name, v) for v in versions]
if not model_path.exists(): # no shortcut found if not model_path.exists(): # no shortcut found
for v_path in v_model_paths: for v_path in v_model_paths:
@ -126,10 +128,10 @@ def resolve_model_name(name):
return name return name
else: else:
raise ValueError( raise ValueError(
"Found English model at {p}. This model is not " "Found English model at %s. This model is not "
"compatible with the current version. See " "compatible with the current version. See "
"https://spacy.io/docs/usage/models to download the " "https://spacy.io/docs/usage/models to download the "
"new model.".format(p=v_path)) "new model." % path2str(v_path))
return name return name
@ -142,12 +144,11 @@ class ModelDownload():
@classmethod @classmethod
def load(self, lang): def load(self, lang):
util.print_msg( prints("The spacy.%s.download command is now deprecated. Please use "
"The spacy.{l}.download command is now deprecated. Please use " "python -m spacy download [model name or shortcut] instead. For "
"python -m spacy download [model name or shortcut] instead. For more " "more info, see the docs: %s." % (lang, about.__docs__),
"info and available models, see the documentation: {d}. " "Downloading default '%s' model now..." % lang,
"Downloading default '{l}' model now...".format(d=about.__docs__, l=lang), title="Warning: deprecated command")
title="Warning: deprecated command")
download(lang) download(lang)
@classmethod @classmethod

View File

@ -9,7 +9,7 @@ from pathlib import Path
import sys import sys
import textwrap import textwrap
from .compat import basestring_, unicode_, input_ from .compat import path2str, basestring_, input_
LANGUAGES = {} LANGUAGES = {}
@ -151,95 +151,66 @@ def parse_package_meta(package_path, package, require=True):
def get_raw_input(description, default=False): def get_raw_input(description, default=False):
""" """
Get user input via raw_input / input and return input value. Takes a Get user input via raw_input / input and return input value. Takes a
description for the prompt, and an optional default value that's displayed description, and an optional default value to display with the prompt.
with the prompt.
""" """
additional = ' (default: {d})'.format(d=default) if default else '' additional = ' (default: %s)' % default if default else ''
prompt = ' {d}{a}: '.format(d=description, a=additional) prompt = ' %s%s: ' % (description, additional)
user_input = input_(prompt) user_input = input_(prompt)
return user_input return user_input
def print_table(data, **kwargs): def print_table(data, title=None):
""" """
Print data in table format. Can either take a list of tuples or a Print data in table format. Can either take a list of tuples or a
dictionary, which will be converted to a list of tuples. dictionary, which will be converted to a list of tuples.
""" """
if type(data) == dict: if type(data) == dict:
data = list(data.items()) data = list(data.items())
tpl_row = ' {:<15}' * len(data[0])
tpl_msg = '\n{msg}\n'
tpl_title = '\n \033[93m{msg}\033[0m'
tpl_row =" {:<15}" * len(data[0])
table = '\n'.join([tpl_row.format(l, v) for l, v in data]) table = '\n'.join([tpl_row.format(l, v) for l, v in data])
if title:
if 'title' in kwargs and kwargs['title']: print('\n \033[93m{}\033[0m'.format(title))
print(tpl_title.format(msg=kwargs['title'])) print('\n{}\n'.format(table))
print(tpl_msg.format(msg=table))
def print_markdown(data, **kwargs): def print_markdown(data, title=None):
""" """
Print listed data in GitHub-flavoured Markdown format so it can be Print listed data in GitHub-flavoured Markdown format so it can be
copy-pasted into issues. Can either take a list of tuples or a dictionary, copy-pasted into issues. Can either take a list of tuples or a dictionary.
which will be converted to a list of tuples.
""" """
def excl_value(value): def excl_value(value):
# don't print value if it contains absolute path of directory (i.e. return Path(value).exists() # contains path (personal info)
# personal info). Other conditions can be included here if necessary.
if unicode_(Path(__file__).parent) in value:
return True
if type(data) == dict: if type(data) == dict:
data = list(data.items()) data = list(data.items())
markdown = ["* **{}:** {}".format(l, v) for l, v in data if not excl_value(v)]
tpl_msg = "\n{msg}\n" if title:
tpl_title = "\n## {msg}" print("\n## {}".format(title))
tpl_row = "* **{l}:** {v}" print('\n{}\n'.format('\n'.join(markdown)))
markdown = '\n'.join([tpl_row.format(l=l, v=v) for l, v in data if not excl_value(v)])
if 'title' in kwargs and kwargs['title']:
print(tpl_title.format(msg=kwargs['title']))
print(tpl_msg.format(msg=markdown))
def print_msg(*text, **kwargs): def prints(*texts, title=None, exits=False):
""" """
Print formatted message. Each positional argument is rendered as newline- Print formatted message. Each positional argument is rendered as newline-
separated paragraph. If kwarg 'title' exist, title is printed above the text separated paragraph. An optional highlighted title is printed above the text
and highlighted (using ANSI escape sequences manually to avoid unnecessary (using ANSI escape sequences manually to avoid unnecessary dependency).
dependency).
""" """
message = '\n\n'.join([_wrap_text(t) for t in text]) title = '\033[93m{}\033[0m\n'.format(_wrap(title)) if title else ''
tpl_msg = '\n{msg}\n' message = '\n\n'.join([_wrap(text) for text in texts])
tpl_title = '\n\033[93m{msg}\033[0m' print('\n{}{}\n'.format(title, message))
if exits:
if 'title' in kwargs and kwargs['title']: sys.exit(0)
title = _wrap_text(kwargs['title'])
print(tpl_title.format(msg=title))
print(tpl_msg.format(msg=message))
def _wrap_text(text): def _wrap(text, wrap_max=80, indent=4):
""" """
Wrap text at given width using textwrap module. Indent should consist of Wrap text at given width using textwrap module. Indent should consist of
spaces. Its length is deducted from wrap width to ensure exact wrapping. spaces. Its length is deducted from wrap width to ensure exact wrapping.
""" """
wrap_max = 80 indent = indent * ' '
indent = ' '
wrap_width = wrap_max - len(indent) wrap_width = wrap_max - len(indent)
if isinstance(text, Path):
text = path2str(text)
return textwrap.fill(text, width=wrap_width, initial_indent=indent, return textwrap.fill(text, width=wrap_width, initial_indent=indent,
subsequent_indent=indent, break_long_words=False, subsequent_indent=indent, break_long_words=False,
break_on_hyphens=False) break_on_hyphens=False)
def sys_exit(*messages, **kwargs):
"""
Performs SystemExit. For modules used from the command line, like
download and link. To print message, use the same arguments as for
print_msg().
"""
if messages:
print_msg(*messages, **kwargs)
sys.exit(0)