From 24ed3fc25c9350b8e3a4342a40fcc9f40ce8ba59 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 13 Sep 2015 10:45:21 +1000 Subject: [PATCH] * Check file existance before opening in lemmatizer --- spacy/lemmatizer.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/spacy/lemmatizer.py b/spacy/lemmatizer.py index e2626b4d3..ed04e2d77 100644 --- a/spacy/lemmatizer.py +++ b/spacy/lemmatizer.py @@ -18,7 +18,10 @@ class Lemmatizer(object): for pos in ['adj', 'noun', 'verb']: index[pos] = read_index(path.join(data_dir, 'wordnet', 'index.%s' % pos)) exc[pos] = read_exc(path.join(data_dir, 'wordnet', '%s.exc' % pos)) - rules = json.load(open(path.join(data_dir, 'vocab', 'lemma_rules.json'))) + if path.exists(path.join(data_dir, 'vocab', 'lemma_rules.json')): + rules = json.load(open(path.join(data_dir, 'vocab', 'lemma_rules.json'))) + else: + rules = {} return cls(index, exc, rules) def __init__(self, index, exceptions, rules): @@ -64,6 +67,8 @@ def lemmatize(string, index, exceptions, rules): def read_index(loc): index = set() + if not path.exists(loc): + return index for line in codecs.open(loc, 'r', 'utf8'): if line.startswith(' '): continue @@ -76,6 +81,8 @@ def read_index(loc): def read_exc(loc): exceptions = {} + if not path.exists(loc): + return exceptions for line in codecs.open(loc, 'r', 'utf8'): if line.startswith(' '): continue