* Lazy-load tagger and parser

This commit is contained in:
Matthew Honnibal 2014-12-30 23:25:09 +11:00
parent 9976aa976e
commit 30e5805656
1 changed files with 19 additions and 14 deletions

View File

@ -41,25 +41,32 @@ class English(object):
parser (spacy.syntax.parser.GreedyParser): parser (spacy.syntax.parser.GreedyParser):
A greedy shift-reduce dependency parser. A greedy shift-reduce dependency parser.
""" """
def __init__(self, data_dir=None): def __init__(self, data_dir=None):
if data_dir is None: if data_dir is None:
data_dir = path.join(path.dirname(__file__), 'data') data_dir = path.join(path.dirname(__file__), 'data')
self._data_dir = data_dir
self.vocab = Vocab(data_dir=data_dir, get_lex_props=get_lex_props) self.vocab = Vocab(data_dir=data_dir, get_lex_props=get_lex_props)
self.tokenizer = Tokenizer.from_dir(self.vocab, data_dir) tag_names = list(POS_TAGS.keys())
if path.exists(path.join(data_dir, 'pos')): tag_names.sort()
self.tagger = EnPosTagger(self.vocab.strings, data_dir) self.tokenizer = Tokenizer.from_dir(self.vocab, data_dir, POS_TAGS, tag_names)
else:
self.tagger = None
if path.exists(path.join(data_dir, 'deps')):
self.parser = GreedyParser(path.join(data_dir, 'deps'))
else:
self.parser = None
self.strings = self.vocab.strings self.strings = self.vocab.strings
self._tagger = None
self._parser = None
def __call__(self, text, tag=True, parse=True): @property
def tagger(self):
if self._tagger is None:
self._tagger = EnPosTagger(self.vocab.strings, self._data_dir)
return self._tagger
@property
def parser(self):
if self._parser is None:
self._parser = GreedyParser(path.join(self._data_dir, 'deps'))
return self._parser
def __call__(self, text, tag=True, parse=False):
"""Apply the pipeline to some text. """Apply the pipeline to some text.
Args: Args:
@ -88,5 +95,3 @@ class English(object):
return [] return []
else: else:
return self.tagger.tag_names return self.tagger.tag_names