mirror of https://github.com/explosion/spaCy.git
updated Russian tokenizer
moved the trying to import pymorph into __init__
This commit is contained in:
parent
3aad66cf00
commit
7401152289
|
@ -8,6 +8,9 @@ from .language_data import *
|
||||||
|
|
||||||
|
|
||||||
class RussianTokenizer(object):
|
class RussianTokenizer(object):
|
||||||
|
_morph = None
|
||||||
|
|
||||||
|
def __init__(self, spacy_tokenizer, cls, nlp=None):
|
||||||
try:
|
try:
|
||||||
from pymorphy2 import MorphAnalyzer
|
from pymorphy2 import MorphAnalyzer
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
@ -16,9 +19,8 @@ class RussianTokenizer(object):
|
||||||
"try to fix it with "
|
"try to fix it with "
|
||||||
"pip install pymorphy2==0.8")
|
"pip install pymorphy2==0.8")
|
||||||
|
|
||||||
_morph = MorphAnalyzer()
|
RussianTokenizer._morph = RussianTokenizer._create_morph(MorphAnalyzer)
|
||||||
|
|
||||||
def __init__(self, spacy_tokenizer, cls, nlp=None):
|
|
||||||
self.vocab = nlp.vocab if nlp else cls.create_vocab(nlp)
|
self.vocab = nlp.vocab if nlp else cls.create_vocab(nlp)
|
||||||
self._spacy_tokenizer = spacy_tokenizer
|
self._spacy_tokenizer = spacy_tokenizer
|
||||||
|
|
||||||
|
@ -36,6 +38,12 @@ class RussianTokenizer(object):
|
||||||
def _normalize(cls, word):
|
def _normalize(cls, word):
|
||||||
return cls._morph.parse(word)[0].normal_form
|
return cls._morph.parse(word)[0].normal_form
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _create_morph(cls, morph_analyzer_class):
|
||||||
|
if not cls._morph:
|
||||||
|
cls._morph = morph_analyzer_class()
|
||||||
|
return cls._morph
|
||||||
|
|
||||||
|
|
||||||
class RussianDefaults(Language.Defaults):
|
class RussianDefaults(Language.Defaults):
|
||||||
lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
|
lex_attr_getters = dict(Language.Defaults.lex_attr_getters)
|
||||||
|
|
Loading…
Reference in New Issue