Tidy up CLI and fix print functions

2017-05-07 23:25:29 +02:00 · 2017-05-07 23:25:29 +02:00 · 59c3b9d4dd
parent 311704674d
commit 59c3b9d4dd
11 changed files with 167 additions and 291 deletions
--- a/spacy/init.py
+++ b/spacy/init.py
@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 from . import util
 from .util import prints
 from .deprecated import resolve_model_name
 from .cli.info import info
 from .glossary import explain
@ -26,9 +27,8 @@ def load(name, **overrides):
        if not model_path.exists():
            lang_name = util.get_lang_class(name).lang
            model_path = None
-            util.print_msg(
+            prints("Only loading the '%s' tokenizer." % lang_name,
-                "Only loading the '{}' tokenizer.".format(lang_name),
+                   title="Warning: no model found for '%s'" % name)
                title="Warning: no model found for '{}'".format(name))
    else:
        model_path = util.ensure_path(overrides['path'])
        data_path = model_path.parent
--- a/spacy/cli/convert.py
+++ b/spacy/cli/convert.py
@ -4,7 +4,7 @@ from __future__ import unicode_literals
 from pathlib import Path
 from .converters import conllu2json
-from .. import util
+from ..util import prints
 # Converters are matched by file extension. To add a converter, add a new entry
@ -19,17 +19,12 @@ CONVERTERS = {
 def convert(input_file, output_dir, *args):
    input_path = Path(input_file)
    output_path = Path(output_dir)
-    check_dirs(input_path, output_path)
+    if not input_path.exists():
-    file_ext = input_path.suffix
+        prints(input_path, title="Input file not found", exits=True)
    if file_ext in CONVERTERS:
        CONVERTERS[file_ext](input_path, output_path, *args)
    else:
        util.sys_exit("Can't find converter for {}".format(input_path.parts[-1]),
                      title="Unknown format")
 def check_dirs(input_file, output_path):
    if not input_file.exists():
        util.sys_exit(input_file.as_posix(), title="Input file not found")
    if not output_path.exists():
-        util.sys_exit(output_path.as_posix(), title="Output directory not found")
+        prints(output_path, title="Output directory not found", exits=True)
    file_ext = input_path.suffix
    if not file_ext in CONVERTERS:
        prints("Can't find converter for %s" % input_path.parts[-1],
               title="Unknown format", exits=True)
    CONVERTERS[file_ext](input_path, output_path, *args)
--- a/spacy/cli/converters/conllu2json.py
+++ b/spacy/cli/converters/conllu2json.py
@ -1,9 +1,8 @@
 # coding: utf8
 from __future__ import unicode_literals
-import json
+from ...compat import json_dumps, path2str
-from ...compat import json_dumps
+from ...util import prints
 from ... import util
 def conllu2json(input_path, output_path, n_sents=10, use_morphology=False):
@ -32,8 +31,8 @@ def conllu2json(input_path, output_path, n_sents=10, use_morphology=False):
    output_file = output_path / output_filename
    with output_file.open('w', encoding='utf-8') as f:
        f.write(json_dumps(docs))
-    util.print_msg("Created {} documents".format(len(docs)),
+    prints("Created %d documents" % len(docs),
-                   title="Generated output file {}".format(output_file))
+           title="Generated output file %s" % path2str(output_file))
 def read_conllx(input_path, use_morphology=False, n=0):
--- a/spacy/cli/download.py
+++ b/spacy/cli/download.py
@ -6,78 +6,52 @@ import os
 import subprocess
 import sys
-from .link import link_package
+from .link import link
 from ..util import prints
 from .. import about
 from .. import util
-def download(model=None, direct=False):
+def download(model, direct=False):
    check_error_depr(model)
    if direct:
        download_model('{m}/{m}.tar.gz'.format(m=model))
    else:
-        model_name = check_shortcut(model)
+        shortcuts = get_json(about.__shortcuts__, "available shortcuts")
        model_name = shortcuts.get(model, model)
        compatibility = get_compatibility()
        version = get_version(model_name, compatibility)
        download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model_name, v=version))
-        link_package(model_name, model, force=True)
+        link(model_name, model, force=True)
 def get_json(url, desc):
    r = requests.get(url)
    if r.status_code != 200:
-        util.sys_exit(
+        prints("Couldn't fetch %s. Please find a model for your spaCy installation "
-            "Couldn't fetch {d}. Please find the right model for your spaCy "
+               "(v%s), and download it manually." % (desc, about.__version__),
-            "installation (v{v}), and download it manually:".format(d=desc, v=about.__version__),
+               about.__docs__, title="Server error (%d)" % r.status_code, exits=True)
            "python -m spacy.download [full model name + version] --direct",
            title="Server error ({c})".format(c=r.status_code))
    return r.json()
 def check_shortcut(model):
    shortcuts = get_json(about.__shortcuts__, "available shortcuts")
    return shortcuts.get(model, model)
 def get_compatibility():
    version = about.__version__
    comp_table = get_json(about.__compatibility__, "compatibility table")
    comp = comp_table['spacy']
    if version not in comp:
-        util.sys_exit(
+        prints("No compatible models found for v%s of spaCy." % version,
-            "No compatible models found for v{v} of spaCy.".format(v=version),
+               title="Compatibility error", exits=True)
            title="Compatibility error")
    return comp[version]
 def get_version(model, comp):
    if model not in comp:
-        util.sys_exit(
+        version = about.__version__
-            "No compatible model found for "
+        prints("No compatible model found for '%s' (spaCy v%s)." % (model, version),
-            "'{m}' (spaCy v{v}).".format(m=model, v=about.__version__),
+               title="Compatibility error", exits=True)
            title="Compatibility error")
    return comp[model][0]
 def download_model(filename):
    util.print_msg("Downloading {f}".format(f=filename))
    download_url = about.__download_url__ + '/' + filename
    subprocess.call([sys.executable, '-m',
        'pip', 'install', '--no-cache-dir', download_url],
        env=os.environ.copy())
 def check_error_depr(model):
    if not model:
        util.sys_exit(
            "python -m spacy.download [name or shortcut]",
            title="Missing model name or shortcut")
    if model == 'all':
        util.sys_exit(
            "As of v1.7.0, the download all command is deprecated. Please "
            "download the models individually via spacy.download [model name] "
            "or pip install. For more info on this, see the documentation: "
            "{d}".format(d=about.__docs__),
            title="Deprecated command")
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@ -4,49 +4,46 @@ from __future__ import unicode_literals
 import platform
 from pathlib import Path
-from ..compat import unicode_
+from ..compat import path2str
 from .. import about
 from .. import util
 def info(model=None, markdown=False):
    if model:
-        data = util.parse_package_meta(util.get_data_path(), model, require=True)
+        data_path = util.get_data_path()
-        model_path = Path(__file__).parent / util.get_data_path() / model
+        data = util.parse_package_meta(data_path, model, require=True)
        model_path = Path(__file__).parent / data_path / model
        if model_path.resolve() != model_path:
-            data['link'] = unicode_(model_path)
+            data['link'] = path2str(model_path)
-            data['source'] = unicode_(model_path.resolve())
+            data['source'] = path2str(model_path.resolve())
        else:
-            data['source'] = unicode_(model_path)
+            data['source'] = path2str(model_path)
-        print_info(data, "model " + model, markdown)
+        print_info(data, 'model %s' % model, markdown)
    else:
-        data = get_spacy_data()
+        data = {'spaCy version': about.__version__,
-        print_info(data, "spaCy", markdown)
+                'Location': path2str(Path(__file__).parent.parent),
                'Platform': platform.platform(),
                'Python version': platform.python_version(),
                'Models': list_models()}
        print_info(data, 'spaCy', markdown)
 def print_info(data, title, markdown):
-    title = "Info about {title}".format(title=title)
+    title = 'Info about %s' % title
    if markdown:
        util.print_markdown(data, title=title)
    else:
        util.print_table(data, title=title)
 def get_spacy_data():
    return {
        'spaCy version': about.__version__,
        'Location': unicode_(Path(__file__).parent.parent),
        'Platform': platform.platform(),
        'Python version': platform.python_version(),
        'Installed models': ', '.join(list_models())
    }
 def list_models():
-    # exclude common cache directories – this means models called "cache" etc.
+    def exclude_dir(dir_name):
-    # won't show up in list, but it seems worth it
+        # exclude common cache directories and hidden directories
-    exclude = ['cache', 'pycache', '__pycache__']
+        exclude = ['cache', 'pycache', '__pycache__']
        return dir_name in exclude or dir_name.startswith('.')
    data_path = util.get_data_path()
    if data_path:
        models = [f.parts[-1] for f in data_path.iterdir() if f.is_dir()]
-        return [m for m in models if m not in exclude]
+        return ', '.join([m for m in models if not exclude_dir(m)])
    return '-'
--- a/spacy/cli/link.py
+++ b/spacy/cli/link.py
@ -1,78 +1,37 @@
 # coding: utf8
 from __future__ import unicode_literals
 import pip
 from pathlib import Path
-import importlib
+from ..compat import symlink_to, path2str
-from ..compat import unicode_, symlink_to
+from ..util import prints
 from .. import util
 def link(origin, link_name, force=False):
-    if is_package(origin):
+    if util.is_package(origin):
-        link_package(origin, link_name, force)
+        model_path = util.get_model_package_path(origin)
    else:
-        symlink(origin, link_name, force)
+        model_path = Path(origin)
 def link_package(package_name, link_name, force=False):
    # Here we're importing the module just to find it. This is worryingly
    # indirect, but it's otherwise very difficult to find the package.
    # Python's installation and import rules are very complicated.
    pkg = importlib.import_module(package_name)
    package_path = Path(pkg.__file__).parent.parent
    meta = get_meta(package_path, package_name)
    model_name = package_name + '-' + meta['version']
    model_path = package_path / package_name / model_name
    symlink(model_path, link_name, force)
 def symlink(model_path, link_name, force):
    model_path = Path(model_path)
    if not model_path.exists():
-        util.sys_exit(
+        prints("The data should be located in %s" % path2str(model_path),
-            "The data should be located in {p}".format(p=model_path),
+               title="Can't locate model data", exits=True)
            title="Can't locate model data")
    link_path = util.get_data_path() / link_name
    if link_path.exists() and not force:
-        util.sys_exit(
+        prints("To overwrite an existing link, use the --force flag.",
-            "To overwrite an existing link, use the --force flag.",
+               title="Link %s already exists" % link_name, exits=True)
            title="Link {l} already exists".format(l=link_name))
    elif link_path.exists():
        link_path.unlink()
    try:
        symlink_to(link_path, model_path)
    except:
-        # This is quite dirty, but just making sure other errors are caught so
+        # This is quite dirty, but just making sure other errors are caught.
-        # users at least see a proper message.
+        prints("Creating a symlink in spacy/data failed. Make sure you have "
-        util.print_msg(
+               "the required permissions and try re-running the command as "
-            "Creating a symlink in spacy/data failed. Make sure you have the "
+               "admin, or use a virtualenv. You can still import the model as a "
-            "required permissions and try re-running the command as admin, or "
+               "module and call its load() method, or create the symlink manually.",
-            "use a virtualenv to install spaCy in a user directory, instead of "
+               "%s --> %s" % (path2str(model_path), path2str(link_path)),
-            "doing a system installation.",
+               title="Error: Couldn't link model to '%s'" % link_name)
            "You can still import the model as a Python package and call its "
            "load() method, or create the symlink manually:",
            "{a} --> {b}".format(a=unicode_(model_path), b=unicode_(link_path)),
            title="Error: Couldn't link model to '{l}'".format(l=link_name))
        raise
-
+    prints("%s --> %s" % (path2str(model_path), path2str(link_path)),
-    util.print_msg(
+           "You can now load the model via spacy.load('%s')." % link_name,
-        "{a} --> {b}".format(a=model_path.as_posix(), b=link_path.as_posix()),
+           title="Linking successful")
        "You can now load the model via spacy.load('{l}').".format(l=link_name),
        title="Linking successful")
 def get_meta(package_path, package):
    meta = util.parse_package_meta(package_path, package)
    return meta
 def is_package(origin):
    packages = pip.get_installed_distributions()
    for package in packages:
        if package.project_name.replace('-', '_') == origin:
            return True
    return False
--- a/spacy/cli/model.py
+++ b/spacy/cli/model.py
@ -4,21 +4,25 @@ from __future__ import unicode_literals
 import gzip
 import math
 from ast import literal_eval
 from pathlib import Path
 from preshed.counter import PreshCounter
 from ..vocab import write_binary_vectors
-from ..compat import fix_text
+from ..compat import fix_text, path2str
 from ..util import prints
 from .. import util
 def model(lang, model_dir, freqs_data, clusters_data, vectors_data):
-    model_path = Path(model_dir)
+    model_path = util.ensure_path(model_dir)
-    freqs_path = Path(freqs_data)
+    freqs_path = util.ensure_path(freqs_data)
-    clusters_path = Path(clusters_data) if clusters_data else None
+    clusters_path = util.ensure_path(clusters_data)
-    vectors_path = Path(vectors_data) if vectors_data else None
+    vectors_path = util.ensure_path(vectors_data)
-
+    if not freqs_path.is_file():
-    check_dirs(freqs_path, clusters_path, vectors_path)
+        prints(freqs_path, title="No frequencies file found", exits=True)
    if clusters_path and not clusters_path.is_file():
        prints(clusters_path, title="No Brown clusters file found", exits=True)
    if vectors_path and not vectors_path.is_file():
        prints(vectors_path, title="No word vectors file found", exits=True)
    vocab = util.get_lang_class(lang).Defaults.create_vocab()
    probs, oov_prob = read_probs(freqs_path)
    clusters = read_clusters(clusters_path) if clusters_path else {}
@ -36,14 +40,14 @@ def create_model(model_path, vectors_path, vocab, oov_prob):
        model_path.mkdir()
    if not vocab_path.exists():
        vocab_path.mkdir()
-    vocab.dump(lexemes_path.as_posix())
+    vocab.dump(path2str(lexemes_path))
    with strings_path.open('w') as f:
        vocab.strings.dump(f)
    with oov_path.open('w') as f:
        f.write('%f' % oov_prob)
    if vectors_path:
        vectors_dest = vocab_path / 'vec.bin'
-        write_binary_vectors(vectors_path.as_posix(), vectors_dest.as_posix())
+        write_binary_vectors(path2str(vectors_path), path2str(vectors_dest))
 def read_probs(freqs_path, max_length=100, min_doc_freq=5, min_freq=200):
@ -115,17 +119,8 @@ def populate_vocab(vocab, clusters, probs, oov_prob):
 def check_unzip(file_path):
-    file_path_str = file_path.as_posix()
+    file_path_str = path2str(file_path)
    if file_path_str.endswith('gz'):
        return gzip.open(file_path_str)
    else:
        return file_path.open()
 def check_dirs(freqs_data, clusters_data, vectors_data):
    if not freqs_data.is_file():
        util.sys_exit(freqs_data.as_posix(), title="No frequencies file found")
    if clusters_data and not clusters_data.is_file():
        util.sys_exit(clusters_data.as_posix(), title="No Brown clusters file found")
    if vectors_data and not vectors_data.is_file():
        util.sys_exit(vectors_data.as_posix(), title="No word vectors file found")
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@ -5,64 +5,57 @@ import shutil
 import requests
 from pathlib import Path
-from ..compat import unicode_, json_dumps
+from ..compat import path2str, json_dumps
 from ..util import prints
 from .. import util
 from .. import about
 def package(input_dir, output_dir, meta_path, force):
-    input_path = Path(input_dir)
+    input_path = util.ensure_path(input_dir)
-    output_path = Path(output_dir)
+    output_path = util.ensure_path(output_dir)
    meta_path = util.ensure_path(meta_path)
-    check_dirs(input_path, output_path, meta_path)
+    if not input_path or not input_path.exists():
        prints(input_path, title="Model directory not found", exits=True)
    if not output_path or not output_path.exists():
        prints(output_path, title="Output directory not found", exits=True)
    if meta_path and not meta_path.exists():
        prints(meta_path, title="meta.json not found", exits=True)
    template_setup = get_template('setup.py')
    template_manifest = get_template('MANIFEST.in')
    template_init = get_template('en_model_name/__init__.py')
    meta_path = meta_path or input_path / 'meta.json'
    if meta_path.is_file():
-        util.print_msg(unicode_(meta_path), title="Reading meta.json from file")
+        prints(meta_path, title="Reading meta.json from file")
        meta = util.read_json(meta_path)
    else:
        meta = generate_meta()
    validate_meta(meta, ['lang', 'name', 'version'])
    model_name = meta['lang'] + '_' + meta['name']
    model_name_v = model_name + '-' + meta['version']
    main_path = output_path / model_name_v
    package_path = main_path / model_name
    create_dirs(package_path, force)
-    shutil.copytree(unicode_(input_path), unicode_(package_path / model_name_v))
+    shutil.copytree(path2str(input_path), path2str(package_path / model_name_v))
    create_file(main_path / 'meta.json', json_dumps(meta))
    create_file(main_path / 'setup.py', template_setup)
    create_file(main_path / 'MANIFEST.in', template_manifest)
    create_file(package_path / '__init__.py', template_init)
-
+    prints(main_path, "To build the package, run `python setup.py sdist` in this "
-    util.print_msg(
+           "directory.", title="Successfully created package '%s'" % model_name_v)
        unicode_(main_path),
        "To build the package, run `python setup.py sdist` in that directory.",
        title="Successfully created package {p}".format(p=model_name_v))
 def check_dirs(input_path, output_path, meta_path):
    if not input_path.exists():
        util.sys_exit(unicode_(input_path.as_poisx), title="Model directory not found")
    if not output_path.exists():
        util.sys_exit(unicode_(output_path), title="Output directory not found")
    if meta_path and not meta_path.exists():
        util.sys_exit(unicode_(meta_path), title="meta.json not found")
 def create_dirs(package_path, force):
    if package_path.exists():
        if force:
-            shutil.rmtree(unicode_(package_path))
+            shutil.rmtree(path2str(package_path))
        else:
-            util.sys_exit(unicode_(package_path),
+            prints(package_path, "Please delete the directory and try again, or "
-                "Please delete the directory and try again, or use the --force "
+                   "use the --force flag to overwrite existing directories.",
-                "flag to overwrite existing directories.",
+                   title="Package directory already exists", exits=True)
                title="Package directory already exists")
    Path.mkdir(package_path, parents=True)
@ -75,15 +68,14 @@ def generate_meta():
    settings = [('lang', 'Model language', 'en'),
                ('name', 'Model name', 'model'),
                ('version', 'Model version', '0.0.0'),
-                ('spacy_version', 'Required spaCy version', '>=1.7.0,<2.0.0'),
+                ('spacy_version', 'Required spaCy version', '>=2.0.0,<3.0.0'),
                ('description', 'Model description', False),
                ('author', 'Author', False),
                ('email', 'Author email', False),
                ('url', 'Author website', False),
                ('license', 'License', 'CC BY-NC 3.0')]
-    util.print_msg("Enter the package settings for your model.", title="Generating meta.json")
+    prints("Enter the package settings for your model.", title="Generating meta.json")
    meta = {}
    for setting, desc, default in settings:
        response = util.get_raw_input(desc, default)
@ -94,16 +86,13 @@ def generate_meta():
 def validate_meta(meta, keys):
    for key in keys:
        if key not in meta or meta[key] == '':
-            util.sys_exit(
+            prints("This setting is required to build your package.",
-                "This setting is required to build your package.",
+                   title='No "%s" setting found in meta.json' % key, exits=True)
                title='No "{k}" setting found in meta.json'.format(k=key))
 def get_template(filepath):
-    url = 'https://raw.githubusercontent.com/explosion/spacy-dev-resources/master/templates/model/'
+    r = requests.get(about.__model_files__ + filepath)
    r = requests.get(url + filepath)
    if r.status_code != 200:
-        util.sys_exit(
+        prints("Couldn't fetch template files from GitHub.",
-            "Couldn't fetch template files from GitHub.",
+               title="Server error (%d)" % r.status_code, exits=True)
            title="Server error ({c})".format(c=r.status_code))
    return r.text
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -4,19 +4,24 @@ from __future__ import unicode_literals, division, print_function
 import json
 from collections import defaultdict
 from ..util import ensure_path
 from ..scorer import Scorer
 from ..gold import GoldParse, merge_sents
 from ..gold import read_json_file as read_gold_json
 from ..util import prints
 from .. import util
 def train(language, output_dir, train_data, dev_data, n_iter, tagger, parser, ner,
          parser_L1):
-    output_path = ensure_path(output_dir)
+    output_path = util.ensure_path(output_dir)
-    train_path = ensure_path(train_data)
+    train_path = util.ensure_path(train_data)
-    dev_path = ensure_path(dev_data)
+    dev_path = util.ensure_path(dev_data)
-    check_dirs(output_path, train_path, dev_path)
+    if not output_path.exists():
        prints(output_path, title="Output directory not found", exits=True)
    if not train_path.exists():
        prints(train_path, title="Training data not found", exits=True)
    if dev_path and not dev_path.exists():
        prints(dev_path, title="Development data not found", exits=True)
    lang = util.get_lang_class(language)
    parser_cfg = {
@ -44,14 +49,13 @@ def train(language, output_dir, train_data, dev_data, n_iter, tagger, parser, ne
 def train_config(config):
-    config_path = ensure_path(config)
+    config_path = util.ensure_path(config)
    if not config_path.is_file():
-        util.sys_exit(config_path.as_posix(), title="Config file not found")
+        prints(config_path, title="Config file not found", exits=True)
    config = json.load(config_path)
    for setting in []:
        if setting not in config.keys():
-            util.sys_exit("{s} not found in config file.".format(s=setting),
+            prints("%s not found in config file." % setting, title="Missing setting")
                          title="Missing setting")
 def train_model(Language, train_data, dev_data, output_path, tagger_cfg, parser_cfg,
@ -88,16 +92,8 @@ def evaluate(Language, gold_tuples, output_path):
    return scorer
 def check_dirs(output_path, train_path, dev_path):
    if not output_path.exists():
        util.sys_exit(output_path.as_posix(), title="Output directory not found")
    if not train_path.exists():
        util.sys_exit(train_path.as_posix(), title="Training data not found")
    if dev_path and not dev_path.exists():
        util.sys_exit(dev_path.as_posix(), title="Development data not found")
 def print_progress(itn, nr_weight, nr_active_feat, **scores):
    # TODO: Fix!
    tpl = '{:d}\t{:d}\t{:d}\t{uas:.3f}\t{ents_f:.3f}\t{tags_acc:.3f}\t{token_acc:.3f}'
    print(tpl.format(itn, nr_weight, nr_active_feat, **scores))
--- a/spacy/deprecated.py
+++ b/spacy/deprecated.py
@ -5,6 +5,8 @@ from pathlib import Path
 from . import about
 from . import util
 from .util import prints
 from .compat import path2str
 from .cli import download
 from .cli import link
@ -114,9 +116,9 @@ def resolve_model_name(name):
    """
    if name == 'en' or name == 'de':
        versions = ['1.0.0', '1.1.0']
-        data_path = Path(util.get_data_path())
+        data_path = util.get_data_path()
        model_path = data_path / name
-        v_model_paths = [data_path / Path(name + '-' + v) for v in versions]
+        v_model_paths = [data_path / '%s-%s' % (name, v) for v in versions]
        if not model_path.exists(): # no shortcut found
            for v_path in v_model_paths:
@ -126,10 +128,10 @@ def resolve_model_name(name):
                        return name
                    else:
                        raise ValueError(
-                            "Found English model at {p}. This model is not "
+                            "Found English model at %s. This model is not "
                            "compatible with the current version. See "
                            "https://spacy.io/docs/usage/models to download the "
-                            "new model.".format(p=v_path))
+                            "new model." % path2str(v_path))
    return name
@ -142,12 +144,11 @@ class ModelDownload():
    @classmethod
    def load(self, lang):
-        util.print_msg(
+        prints("The spacy.%s.download command is now deprecated. Please use "
-            "The spacy.{l}.download command is now deprecated. Please use "
+               "python -m spacy download [model name or shortcut] instead. For "
-            "python -m spacy download [model name or shortcut] instead. For more "
+               "more info, see the docs: %s." % (lang, about.__docs__),
-            "info and available models, see the documentation: {d}. "
+               "Downloading default '%s' model now..." % lang,
-            "Downloading default '{l}' model now...".format(d=about.__docs__, l=lang),
+               title="Warning: deprecated command")
            title="Warning: deprecated command")
        download(lang)
    @classmethod
--- a/spacy/util.py
+++ b/spacy/util.py
@ -9,7 +9,7 @@ from pathlib import Path
 import sys
 import textwrap
-from .compat import basestring_, unicode_, input_
+from .compat import path2str, basestring_, input_
 LANGUAGES = {}
@ -151,95 +151,66 @@ def parse_package_meta(package_path, package, require=True):
 def get_raw_input(description, default=False):
    """
    Get user input via raw_input / input and return input value. Takes a
-    description for the prompt, and an optional default value that's displayed
+    description, and an optional default value to display with the prompt.
    with the prompt.
    """
-    additional = ' (default: {d})'.format(d=default) if default else ''
+    additional = ' (default: %s)' % default if default else ''
-    prompt = '    {d}{a}: '.format(d=description, a=additional)
+    prompt = '    %s%s: ' % (description, additional)
    user_input = input_(prompt)
    return user_input
-def print_table(data, **kwargs):
+def print_table(data, title=None):
    """
    Print data in table format. Can either take a list of tuples or a
    dictionary, which will be converted to a list of tuples.
    """
    if type(data) == dict:
        data = list(data.items())
-
+    tpl_row = '    {:<15}' * len(data[0])
    tpl_msg = '\n{msg}\n'
    tpl_title = '\n    \033[93m{msg}\033[0m'
    tpl_row ="    {:<15}" * len(data[0])
    table = '\n'.join([tpl_row.format(l, v) for l, v in data])
-
+    if title:
-    if 'title' in kwargs and kwargs['title']:
+        print('\n    \033[93m{}\033[0m'.format(title))
-        print(tpl_title.format(msg=kwargs['title']))
+    print('\n{}\n'.format(table))
    print(tpl_msg.format(msg=table))
-def print_markdown(data, **kwargs):
+def print_markdown(data, title=None):
    """
    Print listed data in GitHub-flavoured Markdown format so it can be
-    copy-pasted into issues. Can either take a list of tuples or a dictionary,
+    copy-pasted into issues. Can either take a list of tuples or a dictionary.
    which will be converted to a list of tuples.
    """
    def excl_value(value):
-        # don't print value if it contains absolute path of directory (i.e.
+        return Path(value).exists() # contains path (personal info)
        # personal info). Other conditions can be included here if necessary.
        if unicode_(Path(__file__).parent) in value:
            return True
    if type(data) == dict:
        data = list(data.items())
-
+    markdown = ["* **{}:** {}".format(l, v) for l, v in data if not excl_value(v)]
-    tpl_msg = "\n{msg}\n"
+    if title:
-    tpl_title = "\n## {msg}"
+        print("\n## {}".format(title))
-    tpl_row = "* **{l}:** {v}"
+    print('\n{}\n'.format('\n'.join(markdown)))
    markdown = '\n'.join([tpl_row.format(l=l, v=v) for l, v in data if not excl_value(v)])
    if 'title' in kwargs and kwargs['title']:
        print(tpl_title.format(msg=kwargs['title']))
    print(tpl_msg.format(msg=markdown))
-def print_msg(*text, **kwargs):
+def prints(*texts, title=None, exits=False):
    """
    Print formatted message. Each positional argument is rendered as newline-
-    separated paragraph. If kwarg 'title' exist, title is printed above the text
+    separated paragraph. An optional highlighted title is printed above the text
-    and highlighted (using ANSI escape sequences manually to avoid unnecessary
+    (using ANSI escape sequences manually to avoid unnecessary dependency).
    dependency).
    """
-    message = '\n\n'.join([_wrap_text(t) for t in text])
+    title = '\033[93m{}\033[0m\n'.format(_wrap(title)) if title else ''
-    tpl_msg = '\n{msg}\n'
+    message = '\n\n'.join([_wrap(text) for text in texts])
-    tpl_title = '\n\033[93m{msg}\033[0m'
+    print('\n{}{}\n'.format(title, message))
-
+    if exits:
-    if 'title' in kwargs and kwargs['title']:
+        sys.exit(0)
        title = _wrap_text(kwargs['title'])
        print(tpl_title.format(msg=title))
    print(tpl_msg.format(msg=message))
-def _wrap_text(text):
+def _wrap(text, wrap_max=80, indent=4):
    """
    Wrap text at given width using textwrap module. Indent should consist of
    spaces. Its length is deducted from wrap width to ensure exact wrapping.
    """
-    wrap_max = 80
+    indent = indent * ' '
    indent = '    '
    wrap_width = wrap_max - len(indent)
    if isinstance(text, Path):
        text = path2str(text)
    return textwrap.fill(text, width=wrap_width, initial_indent=indent,
-                               subsequent_indent=indent, break_long_words=False,
+                         subsequent_indent=indent, break_long_words=False,
-                               break_on_hyphens=False)
+                         break_on_hyphens=False)
 def sys_exit(*messages, **kwargs):
    """
    Performs SystemExit. For modules used from the command line, like
    download and link. To print message, use the same arguments as for
    print_msg().
    """
    if messages:
        print_msg(*messages, **kwargs)
    sys.exit(0)