mirror of https://github.com/explosion/spaCy.git
Check if full string is found in lang classes first
This allows users to set arbitrary strings. (Otherwise, custom lang class "my_custom_class" would always load Burmese "my" tokenizer if one was available.)
This commit is contained in:
parent
13d30b6c01
commit
5c5f8c0a72
|
@ -20,9 +20,11 @@ def set_lang_class(name, cls):
|
||||||
|
|
||||||
|
|
||||||
def get_lang_class(name):
|
def get_lang_class(name):
|
||||||
|
if name in LANGUAGES:
|
||||||
|
return LANGUAGES[name]
|
||||||
lang = re.split('[^a-zA-Z0-9]', name, 1)[0]
|
lang = re.split('[^a-zA-Z0-9]', name, 1)[0]
|
||||||
if lang not in LANGUAGES:
|
if lang not in LANGUAGES:
|
||||||
raise RuntimeError('Language not supported: %s' % lang)
|
raise RuntimeError('Language not supported: %s' % name)
|
||||||
return LANGUAGES[lang]
|
return LANGUAGES[lang]
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue