diff --git a/.travis.yml b/.travis.yml index c90da84d2..2a0295156 100644 --- a/.travis.yml +++ b/.travis.yml @@ -21,7 +21,9 @@ install: - "mv WordNet-3.0 wordnet" - "cd ../../" - "export PYTHONPATH=`pwd`" - - "python bin/init_model.py en lang_data/ corpora/ spacy/en/data" + - "python bin/init_model.py en lang_data/ corpora/ data" + - "sputnik build ." + - "sputnik install en_default-*.sputnik" # run tests script: diff --git a/package.json b/package.json new file mode 100644 index 000000000..563009699 --- /dev/null +++ b/package.json @@ -0,0 +1,10 @@ +{ + "name": "en_default", + "version": "0.99.0", + "description": "english default model", + "license": "public domain", + "include": ["data/**/*"], + "compatibility": { + "spacy": "==0.99.0" + } +} diff --git a/spacy/language.py b/spacy/language.py index 2dce0ea0d..83b91cdca 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -144,13 +144,15 @@ class Language(object): @classmethod def default_parser(cls, package, vocab): - data_dir = package.dir_path('data', 'deps') - return Parser.from_dir(data_dir, vocab.strings, ArcEager) + data_dir = package.dir_path('data', 'deps', require=False) + if data_dir and path.exists(data_dir): + return Parser.from_dir(data_dir, vocab.strings, ArcEager) @classmethod def default_entity(cls, package, vocab): - data_dir = package.dir_path('data', 'ner') - return Parser.from_dir(data_dir, vocab.strings, BiluoPushDown) + data_dir = package.dir_path('data', 'ner', require=False) + if data_dir and path.exists(data_dir): + return Parser.from_dir(data_dir, vocab.strings, BiluoPushDown) def __init__(self, package=None, vocab=None, tokenizer=None, tagger=None, parser=None, entity=None, matcher=None, serializer=None, diff --git a/spacy/tagger.pyx b/spacy/tagger.pyx index 91f574348..1c345c6e8 100644 --- a/spacy/tagger.pyx +++ b/spacy/tagger.pyx @@ -155,7 +155,8 @@ cdef class Tagger: model = TaggerModel(vocab.morphology.n_tags, ConjunctionExtracter(N_CONTEXT_FIELDS, templates)) - model.load(package.file_path('data', 'pos', 'model', require=False)) # TODO: really optional? + if package.has_file('data', 'pos', 'model'): # TODO: really optional? + model.load(package.file_path('data', 'pos', 'model')) return cls(vocab, model)