mirror of https://github.com/explosion/spaCy.git
Fix default punctuation rules for hindi text (#3625 explosion)
This commit is contained in:
parent
d5311b3c42
commit
815f8d13dd
|
@ -9,6 +9,8 @@ _bengali = r"\u0980-\u09FF"
|
|||
|
||||
_hebrew = r"\u0591-\u05F4\uFB1D-\uFB4F"
|
||||
|
||||
_hindi = r"\u0900-\u097F"
|
||||
|
||||
# Latin standard
|
||||
_latin_u_standard = r"A-Z"
|
||||
_latin_l_standard = r"a-z"
|
||||
|
@ -193,7 +195,7 @@ _ukrainian = r"а-щюяіїєґА-ЩЮЯІЇЄҐ"
|
|||
_upper = LATIN_UPPER + _russian_upper + _tatar_upper + _greek_upper + _ukrainian_upper
|
||||
_lower = LATIN_LOWER + _russian_lower + _tatar_lower + _greek_lower + _ukrainian_lower
|
||||
|
||||
_uncased = _bengali + _hebrew + _persian + _sinhala
|
||||
_uncased = _bengali + _hebrew + _persian + _sinhala + _hindi
|
||||
|
||||
ALPHA = group_chars(LATIN + _russian + _tatar + _greek + _ukrainian + _uncased)
|
||||
ALPHA_LOWER = group_chars(_lower + _uncased)
|
||||
|
|
Loading…
Reference in New Issue