Check if full string is found in lang classes first

This allows users to set arbitrary strings. (Otherwise, custom lang
class "my_custom_class" would always load Burmese "my" tokenizer if one
was available.)
This commit is contained in:
ines 2017-04-16 22:14:38 +02:00
parent 13d30b6c01
commit 5c5f8c0a72
1 changed files with 3 additions and 1 deletions

View File

@ -20,9 +20,11 @@ def set_lang_class(name, cls):
def get_lang_class(name): def get_lang_class(name):
if name in LANGUAGES:
return LANGUAGES[name]
lang = re.split('[^a-zA-Z0-9]', name, 1)[0] lang = re.split('[^a-zA-Z0-9]', name, 1)[0]
if lang not in LANGUAGES: if lang not in LANGUAGES:
raise RuntimeError('Language not supported: %s' % lang) raise RuntimeError('Language not supported: %s' % name)
return LANGUAGES[lang] return LANGUAGES[lang]