spaCy/spacy/__main__.py

# coding: utf8
from __future__ import print_function
# NB! This breaks in plac on Python 2!!
#from __future__ import unicode_literals

import plac
from spacy.cli import download as cli_download
from spacy.cli import link as cli_link
from spacy.cli import info as cli_info
from spacy.cli import package as cli_package
from spacy.cli import train as cli_train
from spacy.cli import model as cli_model
from spacy.cli import convert as cli_convert


class CLI(object):
    """
    Command-line interface for spaCy
    """
    commands = ('download', 'link', 'info', 'package', 'train', 'model', 'convert')

    @plac.annotations(
        model=("model to download (shortcut or model name)", "positional", None, str),
        direct=("force direct download. Needs model name with version and won't "
                "perform compatibility check", "flag", "d", bool)
    )
    def download(self, model, direct=False):
        """
        Download compatible model from default download path using pip. Model
        can be shortcut, model name or, if --direct flag is set, full model name
        with version.
        """
        cli_download(model, direct)


    @plac.annotations(
        origin=("package name or local path to model", "positional", None, str),
        link_name=("name of shortuct link to create", "positional", None, str),
        force=("force overwriting of existing link", "flag", "f", bool)
    )
    def link(self, origin, link_name, force=False):
        """
        Create a symlink for models within the spacy/data directory. Accepts
        either the name of a pip package, or the local path to the model data
        directory. Linking models allows loading them via spacy.load(link_name).
        """
        cli_link(origin, link_name, force)


    @plac.annotations(
        model=("optional: shortcut link of model", "positional", None, str),
        markdown=("generate Markdown for GitHub issues", "flag", "md", str)
    )
    def info(self, model=None, markdown=False):
        """
        Print info about spaCy installation. If a model shortcut link is
        speficied as an argument, print model information. Flag --markdown
        prints details in Markdown for easy copy-pasting to GitHub issues.
        """
        cli_info(model, markdown)


    @plac.annotations(
        input_dir=("directory with model data", "positional", None, str),
        output_dir=("output parent directory", "positional", None, str),
        meta=("path to meta.json", "option", "m", str),
        force=("force overwriting of existing folder in output directory", "flag", "f", bool)
    )
    def package(self, input_dir, output_dir, meta=None, force=False):
        """
        Generate Python package for model data, including meta and required
        installation files. A new directory will be created in the specified
        output directory, and model data will be copied over.
        """
        cli_package(input_dir, output_dir, meta, force)


    @plac.annotations(
        lang=("model language", "positional", None, str),
        output_dir=("output directory to store model in", "positional", None, str),
        train_data=("location of JSON-formatted training data", "positional", None, str),
        dev_data=("location of JSON-formatted development data (optional)", "positional", None, str),
        n_iter=("number of iterations", "option", "n", int),
        nsents=("number of sentences", "option", None, int),
        parser_L1=("L1 regularization penalty for parser", "option", "L", float),
        use_gpu=("Use GPU", "flag", "g", bool),
        no_tagger=("Don't train tagger", "flag", "T", bool),
        no_parser=("Don't train parser", "flag", "P", bool),
        no_entities=("Don't train NER", "flag", "N", bool)
    )
    def train(self, lang, output_dir, train_data, dev_data=None, n_iter=15,
              nsents=0, parser_L1=0.0, use_gpu=False,
              no_tagger=False, no_parser=False, no_entities=False):
        """
        Train a model. Expects data in spaCy's JSON format.
        """
        nsents = nsents or None
        cli_train(lang, output_dir, train_data, dev_data, n_iter, nsents,
                  use_gpu, no_tagger, no_parser, no_entities, parser_L1)

    @plac.annotations(
        lang=("model language", "positional", None, str),
        model_dir=("output directory to store model in", "positional", None, str),
        freqs_data=("tab-separated frequencies file", "positional", None, str),
        clusters_data=("Brown clusters file", "positional", None, str),
        vectors_data=("word vectors file", "positional", None, str)
    )
    def model(self, lang, model_dir, freqs_data, clusters_data=None, vectors_data=None):
        """
        Initialize a new model and its data directory.
        """
        cli_model(lang, model_dir, freqs_data, clusters_data, vectors_data)

    @plac.annotations(
        input_file=("input file", "positional", None, str),
        output_dir=("output directory for converted file", "positional", None, str),
        n_sents=("Number of sentences per doc", "option", "n", float),
        morphology=("Enable appending morphology to tags", "flag", "m", bool)
    )
    def convert(self, input_file, output_dir, n_sents=10, morphology=False):
        """
        Convert files into JSON format for use with train command and other
        experiment management functions.
        """
        cli_convert(input_file, output_dir, n_sents, morphology)


    def __missing__(self, name):
        print("\n   Command %r does not exist."
              "\n   Use the --help flag for a list of available commands.\n" % name)


if __name__ == '__main__':
    import plac
    import sys
    sys.argv[0] = 'spacy'
    plac.Interpreter.call(CLI)
Add directory cli and set up command line interface 2017-03-18 14:14:48 +00:00			`# coding: utf8`
Fix CLI for Python 2 2017-03-18 17:14:03 +00:00			`from __future__ import print_function`
			`# NB! This breaks in plac on Python 2!!`
Fix formatting 2017-03-23 10:08:30 +00:00			`#from __future__ import unicode_literals`
Add directory cli and set up command line interface 2017-03-18 14:14:48 +00:00
			`import plac`
			`from spacy.cli import download as cli_download`
			`from spacy.cli import link as cli_link`
			`from spacy.cli import info as cli_info`
Add cli.package command to build model packages 2017-03-20 21:50:13 +00:00			`from spacy.cli import package as cli_package`
Add spacy train work in progress 2017-03-23 10:08:41 +00:00			`from spacy.cli import train as cli_train`
Add experimental model init command 2017-03-26 18:51:40 +00:00			`from spacy.cli import model as cli_model`
Add convert command 2017-04-07 11:04:17 +00:00			`from spacy.cli import convert as cli_convert`
Add directory cli and set up command line interface 2017-03-18 14:14:48 +00:00

			`class CLI(object):`
Use consistent formatting for docstrings 2017-04-15 09:59:21 +00:00			`"""`
			`Command-line interface for spaCy`
			`"""`
Add convert command 2017-04-07 11:04:17 +00:00			`commands = ('download', 'link', 'info', 'package', 'train', 'model', 'convert')`
Add directory cli and set up command line interface 2017-03-18 14:14:48 +00:00
			`@plac.annotations(`
			`model=("model to download (shortcut or model name)", "positional", None, str),`
			`direct=("force direct download. Needs model name with version and won't "`
			`"perform compatibility check", "flag", "d", bool)`
			`)`
Don't set default value for model 2017-05-07 21:22:21 +00:00			`def download(self, model, direct=False):`
Add directory cli and set up command line interface 2017-03-18 14:14:48 +00:00			`"""`
			`Download compatible model from default download path using pip. Model`
			`can be shortcut, model name or, if --direct flag is set, full model name`
			`with version.`
			`"""`
			`cli_download(model, direct)`


			`@plac.annotations(`
			`origin=("package name or local path to model", "positional", None, str),`
Fix formatting 2017-03-21 01:05:14 +00:00			`link_name=("name of shortuct link to create", "positional", None, str),`
			`force=("force overwriting of existing link", "flag", "f", bool)`
Add directory cli and set up command line interface 2017-03-18 14:14:48 +00:00			`)`
			`def link(self, origin, link_name, force=False):`
			`"""`
			`Create a symlink for models within the spacy/data directory. Accepts`
			`either the name of a pip package, or the local path to the model data`
			`directory. Linking models allows loading them via spacy.load(link_name).`
			`"""`
			`cli_link(origin, link_name, force)`


			`@plac.annotations(`
			`model=("optional: shortcut link of model", "positional", None, str),`
			`markdown=("generate Markdown for GitHub issues", "flag", "md", str)`
			`)`
			`def info(self, model=None, markdown=False):`
			`"""`
			`Print info about spaCy installation. If a model shortcut link is`
			`speficied as an argument, print model information. Flag --markdown`
			`prints details in Markdown for easy copy-pasting to GitHub issues.`
			`"""`
			`cli_info(model, markdown)`


Add cli.package command to build model packages 2017-03-20 21:50:13 +00:00			`@plac.annotations(`
			`input_dir=("directory with model data", "positional", None, str),`
Add spacy train work in progress 2017-03-23 10:08:41 +00:00			`output_dir=("output parent directory", "positional", None, str),`
Add more options to read in meta data in package command Add meta option to supply path to meta.json. If no meta path is set, check if meta.json exists in input directory and use it. Otherwise, prompt for details on the command line. 2017-04-16 11:06:02 +00:00			`meta=("path to meta.json", "option", "m", str),`
Add docs for package command 2017-03-21 10:19:21 +00:00			`force=("force overwriting of existing folder in output directory", "flag", "f", bool)`
Add cli.package command to build model packages 2017-03-20 21:50:13 +00:00			`)`
Add more options to read in meta data in package command Add meta option to supply path to meta.json. If no meta path is set, check if meta.json exists in input directory and use it. Otherwise, prompt for details on the command line. 2017-04-16 11:06:02 +00:00			`def package(self, input_dir, output_dir, meta=None, force=False):`
Add cli.package command to build model packages 2017-03-20 21:50:13 +00:00			`"""`
			`Generate Python package for model data, including meta and required`
			`installation files. A new directory will be created in the specified`
Update docstring 2017-03-20 21:50:55 +00:00			`output directory, and model data will be copied over.`
Add cli.package command to build model packages 2017-03-20 21:50:13 +00:00			`"""`
Add more options to read in meta data in package command Add meta option to supply path to meta.json. If no meta path is set, check if meta.json exists in input directory and use it. Otherwise, prompt for details on the command line. 2017-04-16 11:06:02 +00:00			`cli_package(input_dir, output_dir, meta, force)`
Add cli.package command to build model packages 2017-03-20 21:50:13 +00:00

Add spacy train work in progress 2017-03-23 10:08:41 +00:00			`@plac.annotations(`
Update spacy train CLI documentation 2017-03-26 13:33:48 +00:00			`lang=("model language", "positional", None, str),`
			`output_dir=("output directory to store model in", "positional", None, str),`
			`train_data=("location of JSON-formatted training data", "positional", None, str),`
			`dev_data=("location of JSON-formatted development data (optional)", "positional", None, str),`
Update train CLI 2017-03-26 12:16:52 +00:00			`n_iter=("number of iterations", "option", "n", int),`
Get spaCy train command working with neural network * Integrate models into pipeline * Add basic serialization (maybe incorrect) * Fix pickle on vocab 2017-05-17 10:04:50 +00:00			`nsents=("number of sentences", "option", None, int),`
Connect parser L1 option to train CLI 2017-03-26 12:24:07 +00:00			`parser_L1=("L1 regularization penalty for parser", "option", "L", float),`
Add option to use gpu to spacy train 2017-05-18 09:21:49 +00:00			`use_gpu=("Use GPU", "flag", "g", bool),`
Update train CLI 2017-03-26 12:16:52 +00:00			`no_tagger=("Don't train tagger", "flag", "T", bool),`
			`no_parser=("Don't train parser", "flag", "P", bool),`
Rename no_ner arg to no_entities 2017-05-19 18:23:11 +00:00			`no_entities=("Don't train NER", "flag", "N", bool)`
Add spacy train work in progress 2017-03-23 10:08:41 +00:00			`)`
Update spacy train CLI documentation 2017-03-26 13:33:48 +00:00			`def train(self, lang, output_dir, train_data, dev_data=None, n_iter=15,`
Add option to use gpu to spacy train 2017-05-18 09:21:49 +00:00			`nsents=0, parser_L1=0.0, use_gpu=False,`
Rename no_ner arg to no_entities 2017-05-19 18:23:11 +00:00			`no_tagger=False, no_parser=False, no_entities=False):`
Update spacy train CLI documentation 2017-03-26 13:33:48 +00:00			`"""`
			`Train a model. Expects data in spaCy's JSON format.`
			`"""`
Get spaCy train command working with neural network * Integrate models into pipeline * Add basic serialization (maybe incorrect) * Fix pickle on vocab 2017-05-17 10:04:50 +00:00			`nsents = nsents or None`
Add option to use gpu to spacy train 2017-05-18 09:21:49 +00:00			`cli_train(lang, output_dir, train_data, dev_data, n_iter, nsents,`
Rename no_ner arg to no_entities 2017-05-19 18:23:11 +00:00			`use_gpu, no_tagger, no_parser, no_entities, parser_L1)`
Add spacy train work in progress 2017-03-23 10:08:41 +00:00
Add experimental model init command 2017-03-26 18:51:40 +00:00			`@plac.annotations(`
			`lang=("model language", "positional", None, str),`
			`model_dir=("output directory to store model in", "positional", None, str),`
			`freqs_data=("tab-separated frequencies file", "positional", None, str),`
			`clusters_data=("Brown clusters file", "positional", None, str),`
			`vectors_data=("word vectors file", "positional", None, str)`
			`)`
			`def model(self, lang, model_dir, freqs_data, clusters_data=None, vectors_data=None):`
			`"""`
			`Initialize a new model and its data directory.`
			`"""`
			`cli_model(lang, model_dir, freqs_data, clusters_data, vectors_data)`

Add convert command 2017-04-07 11:04:17 +00:00			`@plac.annotations(`
			`input_file=("input file", "positional", None, str),`
			`output_dir=("output directory for converted file", "positional", None, str),`
			`n_sents=("Number of sentences per doc", "option", "n", float),`
			`morphology=("Enable appending morphology to tags", "flag", "m", bool)`
			`)`
			`def convert(self, input_file, output_dir, n_sents=10, morphology=False):`
			`"""`
			`Convert files into JSON format for use with train command and other`
			`experiment management functions.`
			`"""`
			`cli_convert(input_file, output_dir, n_sents, morphology)`

Add spacy train work in progress 2017-03-23 10:08:41 +00:00
Add directory cli and set up command line interface 2017-03-18 14:14:48 +00:00			`def __missing__(self, name):`
Update error message for missing commands 2017-03-26 13:40:02 +00:00			`print("\n Command %r does not exist."`
			`"\n Use the --help flag for a list of available commands.\n" % name)`
Add directory cli and set up command line interface 2017-03-18 14:14:48 +00:00

			`if __name__ == '__main__':`
			`import plac`
			`import sys`
			`sys.argv[0] = 'spacy'`
			`plac.Interpreter.call(CLI)`