Improve entry points and allow custom language classes via entry points (#3080)

* Remove check for overwritten factory

This needs to be handled differently – on first initialization, a new factory will be added and any subsequent initializations will trigger this warning, even if it's a new entry point that doesn't overwrite a built-in.

* Add helper to only load specific entry point

Useful for loading languages via entry points, so that they can be lazy-loaded. Otherwise, all entry point languages would have to be loaded upfront.

* Check entry points for custom languages
This commit is contained in:
Ines Montani 2018-12-20 23:58:43 +01:00 committed by Matthew Honnibal
parent ca244f5f84
commit bb9ad37e05
2 changed files with 19 additions and 4 deletions

View File

@ -28,7 +28,7 @@ from .lang.punctuation import TOKENIZER_INFIXES
from .lang.tokenizer_exceptions import TOKEN_MATCH
from .lang.tag_map import TAG_MAP
from .lang.lex_attrs import LEX_ATTRS, is_stop
from .errors import Errors, Warnings, user_warning
from .errors import Errors
from . import util
from . import about
@ -146,9 +146,6 @@ class Language(object):
RETURNS (Language): The newly constructed object.
"""
user_factories = util.get_entry_points("spacy_factories")
for factory in user_factories.keys():
if factory in self.factories:
user_warning(Warnings.W009.format(name=factory))
self.factories.update(user_factories)
self._meta = dict(meta)
self._path = None

View File

@ -43,6 +43,11 @@ def get_lang_class(lang):
RETURNS (Language): Language class.
"""
global LANGUAGES
# Check if an entry point is exposed for the language code
entry_point = get_entry_point("spacy_languages", lang)
if entry_point is not None:
LANGUAGES[lang] = entry_point
return entry_point
if lang not in LANGUAGES:
try:
module = importlib.import_module(".lang.%s" % lang, "spacy")
@ -230,6 +235,19 @@ def get_entry_points(key):
return result
def get_entry_point(key, value):
"""Check if registered entry point is available for a given name and
load it. Otherwise, return None.
key (unicode): Entry point name.
value (unicode): Name of entry point to load.
RETURNS: The loaded entry point or None.
"""
for entry_point in pkg_resources.iter_entry_points(key):
if entry_point.name == value:
return entry_point.load()
def is_in_jupyter():
"""Check if user is running spaCy from a Jupyter notebook by detecting the
IPython kernel. Mainly used for the displaCy visualizer.