diff --git a/spacy/__init__.py b/spacy/__init__.py index 344fc427e..8dc0937f5 100644 --- a/spacy/__init__.py +++ b/spacy/__init__.py @@ -16,7 +16,7 @@ def load(name, **overrides): meta = util.parse_package_meta(model_path) if 'lang' not in meta: raise IOError('No language setting found in model meta.') - cls = util.load_lang_class(meta['lang']) + cls = util.get_lang_class(meta['lang']) overrides['meta'] = meta overrides['path'] = model_path return cls(**overrides) diff --git a/spacy/tests/conftest.py b/spacy/tests/conftest.py index 66ba94ea6..6b577be62 100644 --- a/spacy/tests/conftest.py +++ b/spacy/tests/conftest.py @@ -18,67 +18,67 @@ _languages = ['bn', 'da', 'de', 'en', 'es', 'fi', 'fr', 'he', 'hu', 'it', 'nb', @pytest.fixture(params=_languages) def tokenizer(request): - lang = util.load_lang_class(request.param) + lang = util.get_lang_class(request.param) return lang.Defaults.create_tokenizer() @pytest.fixture def en_tokenizer(): - return util.load_lang_class('en').Defaults.create_tokenizer() + return util.get_lang_class('en').Defaults.create_tokenizer() @pytest.fixture def en_vocab(): - return util.load_lang_class('en').Defaults.create_vocab() + return util.get_lang_class('en').Defaults.create_vocab() @pytest.fixture def en_parser(): - return util.load_lang_class('en').Defaults.create_parser() + return util.get_lang_class('en').Defaults.create_parser() @pytest.fixture def es_tokenizer(): - return util.load_lang_class('es').Defaults.create_tokenizer() + return util.get_lang_class('es').Defaults.create_tokenizer() @pytest.fixture def de_tokenizer(): - return util.load_lang_class('de').Defaults.create_tokenizer() + return util.get_lang_class('de').Defaults.create_tokenizer() @pytest.fixture(scope='module') def fr_tokenizer(): - return util.load_lang_class('fr').Defaults.create_tokenizer() + return util.get_lang_class('fr').Defaults.create_tokenizer() @pytest.fixture def hu_tokenizer(): - return util.load_lang_class('hu').Defaults.create_tokenizer() + return util.get_lang_class('hu').Defaults.create_tokenizer() @pytest.fixture def fi_tokenizer(): - return util.load_lang_class('fi').Defaults.create_tokenizer() + return util.get_lang_class('fi').Defaults.create_tokenizer() @pytest.fixture def sv_tokenizer(): - return util.load_lang_class('sv').Defaults.create_tokenizer() + return util.get_lang_class('sv').Defaults.create_tokenizer() @pytest.fixture def bn_tokenizer(): - return util.load_lang_class('bn').Defaults.create_tokenizer() + return util.get_lang_class('bn').Defaults.create_tokenizer() @pytest.fixture def he_tokenizer(): - return util.load_lang_class('he').Defaults.create_tokenizer() + return util.get_lang_class('he').Defaults.create_tokenizer() @pytest.fixture def nb_tokenizer(): - return util.load_lang_class('nb').Defaults.create_tokenizer() + return util.get_lang_class('nb').Defaults.create_tokenizer() @pytest.fixture @@ -88,12 +88,12 @@ def stringstore(): @pytest.fixture def en_entityrecognizer(): - return util.load_lang_class('en').Defaults.create_entity() + return util.get_lang_class('en').Defaults.create_entity() @pytest.fixture def lemmatizer(): - return util.load_lang_class('en').Defaults.create_lemmatizer() + return util.get_lang_class('en').Defaults.create_lemmatizer() @pytest.fixture diff --git a/spacy/util.py b/spacy/util.py index 4f916409d..db7be7e69 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -17,30 +17,30 @@ LANGUAGES = {} _data_path = Path(__file__).parent / 'data' -def set_lang_class(name, cls): +def get_lang_class(lang): + """Import and load a Language class. + + lang (unicode): Two-letter language code, e.g. 'en'. + RETURNS (Language): Language class. + """ global LANGUAGES - LANGUAGES[name] = cls - - -def get_lang_class(name): - if name in LANGUAGES: - return LANGUAGES[name] - lang = re.split('[^a-zA-Z0-9]', name, 1)[0] - if lang not in LANGUAGES: - raise RuntimeError('Language not supported: %s' % name) + if not lang in LANGUAGES: + try: + module = importlib.import_module('.lang.%s' % lang, 'spacy') + except ImportError: + raise ImportError("Can't import language %s from spacy.lang." %lang) + LANGUAGES[lang] = getattr(module, module.__all__[0]) return LANGUAGES[lang] -def load_lang_class(lang): - """Import and load a Language class. +def set_lang_class(name, cls): + """Set a custom Language class name that can be loaded via get_lang_class. - Args: - lang (unicode): Two-letter language code, e.g. 'en'. - Returns: - Language: Language class. + name (unicode): Name of Language class. + cls (Language): Language class. """ - module = importlib.import_module('.lang.%s' % lang, 'spacy') - return getattr(module, module.__all__[0]) + global LANGUAGES + LANGUAGES[name] = cls def get_data_path(require_exists=True): diff --git a/website/docs/api/util.jade b/website/docs/api/util.jade index 3b1f305a9..97ed7c6e0 100644 --- a/website/docs/api/util.jade +++ b/website/docs/api/util.jade @@ -49,7 +49,7 @@ p +cell unicode or #[code Path] +cell Path to new data directory. -+h(2, "load_lang_class") load_lang_class ++h(2, "get_lang_class") get_lang_class +tag function p @@ -59,7 +59,7 @@ p +aside-code("Example"). for lang_id in ['en', 'de']: - lang_class = util.load_lang_class(lang_id) + lang_class = util.get_lang_class(lang_id) lang = lang_class() tokenizer = lang.Defaults.create_tokenizer() diff --git a/website/docs/usage/adding-languages.jade b/website/docs/usage/adding-languages.jade index d8f4a9a06..2d90028f0 100644 --- a/website/docs/usage/adding-languages.jade +++ b/website/docs/usage/adding-languages.jade @@ -80,7 +80,7 @@ p | compute. As of spaCy v2.0, #[code Language] classes are not imported on | initialisation and are only loaded when you import them directly, or load | a model that requires a language to be loaded. To lazy-load languages in - | your application, you can use the #[code util.load_lang_class()] helper + | your application, you can use the #[code util.get_lang_class()] helper | function with the two-letter language code as its argument. +h(2, "language-data") Adding language data @@ -486,7 +486,7 @@ p | #[+src(gh("spaCy", "spacy/tests/lang")) tests/lang] in a directory named | after the language ID. You'll also need to create a fixture for your | tokenizer in the #[+src(gh("spaCy", "spacy/tests/conftest.py")) conftest.py]. - | Always use the #[code load_lang_class()] helper function within the fixture, + | Always use the #[code get_lang_class()] helper function within the fixture, | instead of importing the class at the top of the file. This will load the | language data only when it's needed. (Otherwise, #[em all data] would be | loaded every time you run a test.) @@ -494,7 +494,7 @@ p +code. @pytest.fixture def en_tokenizer(): - return util.load_lang_class('en').Defaults.create_tokenizer() + return util.get_lang_class('en').Defaults.create_tokenizer() p | When adding test cases, always