Merge load_lang_class and get_lang_class

This commit is contained in:
ines 2017-05-14 01:31:10 +02:00
parent 36bebe7164
commit b462076d80
5 changed files with 39 additions and 39 deletions

View File

@ -16,7 +16,7 @@ def load(name, **overrides):
meta = util.parse_package_meta(model_path) meta = util.parse_package_meta(model_path)
if 'lang' not in meta: if 'lang' not in meta:
raise IOError('No language setting found in model meta.') raise IOError('No language setting found in model meta.')
cls = util.load_lang_class(meta['lang']) cls = util.get_lang_class(meta['lang'])
overrides['meta'] = meta overrides['meta'] = meta
overrides['path'] = model_path overrides['path'] = model_path
return cls(**overrides) return cls(**overrides)

View File

@ -18,67 +18,67 @@ _languages = ['bn', 'da', 'de', 'en', 'es', 'fi', 'fr', 'he', 'hu', 'it', 'nb',
@pytest.fixture(params=_languages) @pytest.fixture(params=_languages)
def tokenizer(request): def tokenizer(request):
lang = util.load_lang_class(request.param) lang = util.get_lang_class(request.param)
return lang.Defaults.create_tokenizer() return lang.Defaults.create_tokenizer()
@pytest.fixture @pytest.fixture
def en_tokenizer(): def en_tokenizer():
return util.load_lang_class('en').Defaults.create_tokenizer() return util.get_lang_class('en').Defaults.create_tokenizer()
@pytest.fixture @pytest.fixture
def en_vocab(): def en_vocab():
return util.load_lang_class('en').Defaults.create_vocab() return util.get_lang_class('en').Defaults.create_vocab()
@pytest.fixture @pytest.fixture
def en_parser(): def en_parser():
return util.load_lang_class('en').Defaults.create_parser() return util.get_lang_class('en').Defaults.create_parser()
@pytest.fixture @pytest.fixture
def es_tokenizer(): def es_tokenizer():
return util.load_lang_class('es').Defaults.create_tokenizer() return util.get_lang_class('es').Defaults.create_tokenizer()
@pytest.fixture @pytest.fixture
def de_tokenizer(): def de_tokenizer():
return util.load_lang_class('de').Defaults.create_tokenizer() return util.get_lang_class('de').Defaults.create_tokenizer()
@pytest.fixture(scope='module') @pytest.fixture(scope='module')
def fr_tokenizer(): def fr_tokenizer():
return util.load_lang_class('fr').Defaults.create_tokenizer() return util.get_lang_class('fr').Defaults.create_tokenizer()
@pytest.fixture @pytest.fixture
def hu_tokenizer(): def hu_tokenizer():
return util.load_lang_class('hu').Defaults.create_tokenizer() return util.get_lang_class('hu').Defaults.create_tokenizer()
@pytest.fixture @pytest.fixture
def fi_tokenizer(): def fi_tokenizer():
return util.load_lang_class('fi').Defaults.create_tokenizer() return util.get_lang_class('fi').Defaults.create_tokenizer()
@pytest.fixture @pytest.fixture
def sv_tokenizer(): def sv_tokenizer():
return util.load_lang_class('sv').Defaults.create_tokenizer() return util.get_lang_class('sv').Defaults.create_tokenizer()
@pytest.fixture @pytest.fixture
def bn_tokenizer(): def bn_tokenizer():
return util.load_lang_class('bn').Defaults.create_tokenizer() return util.get_lang_class('bn').Defaults.create_tokenizer()
@pytest.fixture @pytest.fixture
def he_tokenizer(): def he_tokenizer():
return util.load_lang_class('he').Defaults.create_tokenizer() return util.get_lang_class('he').Defaults.create_tokenizer()
@pytest.fixture @pytest.fixture
def nb_tokenizer(): def nb_tokenizer():
return util.load_lang_class('nb').Defaults.create_tokenizer() return util.get_lang_class('nb').Defaults.create_tokenizer()
@pytest.fixture @pytest.fixture
@ -88,12 +88,12 @@ def stringstore():
@pytest.fixture @pytest.fixture
def en_entityrecognizer(): def en_entityrecognizer():
return util.load_lang_class('en').Defaults.create_entity() return util.get_lang_class('en').Defaults.create_entity()
@pytest.fixture @pytest.fixture
def lemmatizer(): def lemmatizer():
return util.load_lang_class('en').Defaults.create_lemmatizer() return util.get_lang_class('en').Defaults.create_lemmatizer()
@pytest.fixture @pytest.fixture

View File

@ -17,30 +17,30 @@ LANGUAGES = {}
_data_path = Path(__file__).parent / 'data' _data_path = Path(__file__).parent / 'data'
def set_lang_class(name, cls): def get_lang_class(lang):
"""Import and load a Language class.
lang (unicode): Two-letter language code, e.g. 'en'.
RETURNS (Language): Language class.
"""
global LANGUAGES global LANGUAGES
LANGUAGES[name] = cls if not lang in LANGUAGES:
try:
module = importlib.import_module('.lang.%s' % lang, 'spacy')
def get_lang_class(name): except ImportError:
if name in LANGUAGES: raise ImportError("Can't import language %s from spacy.lang." %lang)
return LANGUAGES[name] LANGUAGES[lang] = getattr(module, module.__all__[0])
lang = re.split('[^a-zA-Z0-9]', name, 1)[0]
if lang not in LANGUAGES:
raise RuntimeError('Language not supported: %s' % name)
return LANGUAGES[lang] return LANGUAGES[lang]
def load_lang_class(lang): def set_lang_class(name, cls):
"""Import and load a Language class. """Set a custom Language class name that can be loaded via get_lang_class.
Args: name (unicode): Name of Language class.
lang (unicode): Two-letter language code, e.g. 'en'. cls (Language): Language class.
Returns:
Language: Language class.
""" """
module = importlib.import_module('.lang.%s' % lang, 'spacy') global LANGUAGES
return getattr(module, module.__all__[0]) LANGUAGES[name] = cls
def get_data_path(require_exists=True): def get_data_path(require_exists=True):

View File

@ -49,7 +49,7 @@ p
+cell unicode or #[code Path] +cell unicode or #[code Path]
+cell Path to new data directory. +cell Path to new data directory.
+h(2, "load_lang_class") load_lang_class +h(2, "get_lang_class") get_lang_class
+tag function +tag function
p p
@ -59,7 +59,7 @@ p
+aside-code("Example"). +aside-code("Example").
for lang_id in ['en', 'de']: for lang_id in ['en', 'de']:
lang_class = util.load_lang_class(lang_id) lang_class = util.get_lang_class(lang_id)
lang = lang_class() lang = lang_class()
tokenizer = lang.Defaults.create_tokenizer() tokenizer = lang.Defaults.create_tokenizer()

View File

@ -80,7 +80,7 @@ p
| compute. As of spaCy v2.0, #[code Language] classes are not imported on | compute. As of spaCy v2.0, #[code Language] classes are not imported on
| initialisation and are only loaded when you import them directly, or load | initialisation and are only loaded when you import them directly, or load
| a model that requires a language to be loaded. To lazy-load languages in | a model that requires a language to be loaded. To lazy-load languages in
| your application, you can use the #[code util.load_lang_class()] helper | your application, you can use the #[code util.get_lang_class()] helper
| function with the two-letter language code as its argument. | function with the two-letter language code as its argument.
+h(2, "language-data") Adding language data +h(2, "language-data") Adding language data
@ -486,7 +486,7 @@ p
| #[+src(gh("spaCy", "spacy/tests/lang")) tests/lang] in a directory named | #[+src(gh("spaCy", "spacy/tests/lang")) tests/lang] in a directory named
| after the language ID. You'll also need to create a fixture for your | after the language ID. You'll also need to create a fixture for your
| tokenizer in the #[+src(gh("spaCy", "spacy/tests/conftest.py")) conftest.py]. | tokenizer in the #[+src(gh("spaCy", "spacy/tests/conftest.py")) conftest.py].
| Always use the #[code load_lang_class()] helper function within the fixture, | Always use the #[code get_lang_class()] helper function within the fixture,
| instead of importing the class at the top of the file. This will load the | instead of importing the class at the top of the file. This will load the
| language data only when it's needed. (Otherwise, #[em all data] would be | language data only when it's needed. (Otherwise, #[em all data] would be
| loaded every time you run a test.) | loaded every time you run a test.)
@ -494,7 +494,7 @@ p
+code. +code.
@pytest.fixture @pytest.fixture
def en_tokenizer(): def en_tokenizer():
return util.load_lang_class('en').Defaults.create_tokenizer() return util.get_lang_class('en').Defaults.create_tokenizer()
p p
| When adding test cases, always | When adding test cases, always