mirror of https://github.com/explosion/spaCy.git
added USD in currency rules
This commit is contained in:
parent
2572a9ddf0
commit
ba07e23c87
|
@ -11,7 +11,7 @@ _units = (_units + 's bit Gbps Mbps mbps Kbps kbps ƒ ppi px '
|
|||
'ratus rb ribu ribuan '
|
||||
'juta jt jutaan mill?iar million bil[l]?iun bilyun billion '
|
||||
)
|
||||
_currency = (_currency + r' Rp IDR RMB SGD S\$')
|
||||
_currency = (_currency + r' USD Rp IDR RMB SGD S\$')
|
||||
_months = ('Januari Februari Maret April Mei Juni Juli Agustus September '
|
||||
'Oktober November Desember January February March May June '
|
||||
'July August October December Jan Feb Mar Jun Jul Aug Sept '
|
||||
|
@ -29,7 +29,7 @@ TOKENIZER_PREFIXES.remove('#') # hashtag
|
|||
_prefixes = TOKENIZER_PREFIXES + LIST_CURRENCY + [HTML_PREFIX] + ['/', '—']
|
||||
|
||||
_suffixes = TOKENIZER_SUFFIXES + [r'\-[Nn]ya', '-[KkMm]u', '[—-]'] + [
|
||||
r'(?<=[0-9])(?:{c})'.format(c=CURRENCY),
|
||||
r'(?<={c})(?:[0-9]+)'.format(c=CURRENCY),
|
||||
r'(?<=[0-9])(?:{u})'.format(u=UNITS),
|
||||
r'(?<=[0-9])%',
|
||||
r'(?<=[0-9{a}]{h})(?:[\.,:-])'.format(a=ALPHA, h=HTML_SUFFIX),
|
||||
|
|
Loading…
Reference in New Issue