mirror of https://github.com/explosion/spaCy.git
Add Ukrainian unicode
This commit is contained in:
parent
e2d93e4852
commit
402d133c90
|
@ -32,9 +32,11 @@ _tatar_lower = r"әөүҗңһ"
|
|||
_tatar_upper = r"ӘӨҮҖҢҺ"
|
||||
_greek_lower = r"α-ωάέίόώήύ"
|
||||
_greek_upper = r"Α-ΩΆΈΊΌΏΉΎ"
|
||||
_ukrainian_lower = r"а-щюяіїєґ"
|
||||
_ukrainian_upper = r"А-ЩЮЯІЇЄҐ"
|
||||
|
||||
_upper = _latin_upper + _russian_upper + _tatar_upper + _greek_upper
|
||||
_lower = _latin_lower + _russian_lower + _tatar_lower + _greek_lower
|
||||
_upper = _latin_upper + _russian_upper + _tatar_upper + _greek_upper + _ukrainian_upper
|
||||
_lower = _latin_lower + _russian_lower + _tatar_lower + _greek_lower + _ukrainian_lower
|
||||
_uncased = _bengali + _hebrew + _persian + _sinhala
|
||||
|
||||
ALPHA = group_chars(_upper + _lower + _uncased)
|
||||
|
@ -48,7 +50,7 @@ _units = (
|
|||
"кг г мг м/с км/ч кПа Па мбар Кб КБ кб Мб МБ мб Гб ГБ гб Тб ТБ тб"
|
||||
"كم كم² كم³ م م² م³ سم سم² سم³ مم مم² مم³ كم غرام جرام جم كغ ملغ كوب اكواب"
|
||||
)
|
||||
_currency = r"\$ £ € ¥ ฿ US\$ C\$ A\$ ₽ ﷼"
|
||||
_currency = r"\$ £ € ¥ ฿ US\$ C\$ A\$ ₽ ﷼ ₴"
|
||||
|
||||
# These expressions contain various unicode variations, including characters
|
||||
# used in Chinese (see #1333, #1340, #1351) – unless there are cross-language
|
||||
|
|
Loading…
Reference in New Issue