mirror of https://github.com/explosion/spaCy.git
Check if full string is found in lang classes first
This allows users to set arbitrary strings. (Otherwise, custom lang class "my_custom_class" would always load Burmese "my" tokenizer if one was available.)
This commit is contained in:
parent
13d30b6c01
commit
5c5f8c0a72
|
@ -20,9 +20,11 @@ def set_lang_class(name, cls):
|
|||
|
||||
|
||||
def get_lang_class(name):
|
||||
if name in LANGUAGES:
|
||||
return LANGUAGES[name]
|
||||
lang = re.split('[^a-zA-Z0-9]', name, 1)[0]
|
||||
if lang not in LANGUAGES:
|
||||
raise RuntimeError('Language not supported: %s' % lang)
|
||||
raise RuntimeError('Language not supported: %s' % name)
|
||||
return LANGUAGES[lang]
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue