diff --git a/spacy/ru/__init__.py b/spacy/ru/__init__.py index d8f38e199..12b480a8a 100644 --- a/spacy/ru/__init__.py +++ b/spacy/ru/__init__.py @@ -8,17 +8,19 @@ from .language_data import * class RussianTokenizer(object): - try: - from pymorphy2 import MorphAnalyzer - except ImportError: - raise ImportError( - "The Russian tokenizer requires the pymorphy2 library: " - "try to fix it with " - "pip install pymorphy2==0.8") - - _morph = MorphAnalyzer() + _morph = None def __init__(self, spacy_tokenizer, cls, nlp=None): + try: + from pymorphy2 import MorphAnalyzer + except ImportError: + raise ImportError( + "The Russian tokenizer requires the pymorphy2 library: " + "try to fix it with " + "pip install pymorphy2==0.8") + + RussianTokenizer._morph = RussianTokenizer._create_morph(MorphAnalyzer) + self.vocab = nlp.vocab if nlp else cls.create_vocab(nlp) self._spacy_tokenizer = spacy_tokenizer @@ -36,6 +38,12 @@ class RussianTokenizer(object): def _normalize(cls, word): return cls._morph.parse(word)[0].normal_form + @classmethod + def _create_morph(cls, morph_analyzer_class): + if not cls._morph: + cls._morph = morph_analyzer_class() + return cls._morph + class RussianDefaults(Language.Defaults): lex_attr_getters = dict(Language.Defaults.lex_attr_getters)