diff --git a/spacy/lang/char_classes.py b/spacy/lang/char_classes.py index cb2e817d5..fb320b2ff 100644 --- a/spacy/lang/char_classes.py +++ b/spacy/lang/char_classes.py @@ -9,6 +9,8 @@ _bengali = r"\u0980-\u09FF" _hebrew = r"\u0591-\u05F4\uFB1D-\uFB4F" +_hindi = r"\u0900-\u097F" + # Latin standard _latin_u_standard = r"A-Z" _latin_l_standard = r"a-z" @@ -193,7 +195,7 @@ _ukrainian = r"а-щюяіїєґА-ЩЮЯІЇЄҐ" _upper = LATIN_UPPER + _russian_upper + _tatar_upper + _greek_upper + _ukrainian_upper _lower = LATIN_LOWER + _russian_lower + _tatar_lower + _greek_lower + _ukrainian_lower -_uncased = _bengali + _hebrew + _persian + _sinhala +_uncased = _bengali + _hebrew + _persian + _sinhala + _hindi ALPHA = group_chars(LATIN + _russian + _tatar + _greek + _ukrainian + _uncased) ALPHA_LOWER = group_chars(_lower + _uncased)