2019-08-12 08:37:48 +00:00
|
|
|
|
# coding: utf8
|
|
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
import re
|
|
|
|
|
from ...attrs import LIKE_NUM
|
|
|
|
|
|
|
|
|
|
_single_num_words = [
|
|
|
|
|
"〇",
|
|
|
|
|
"一",
|
|
|
|
|
"二",
|
|
|
|
|
"三",
|
|
|
|
|
"四",
|
|
|
|
|
"五",
|
|
|
|
|
"六",
|
|
|
|
|
"七",
|
|
|
|
|
"八",
|
|
|
|
|
"九",
|
|
|
|
|
"十",
|
|
|
|
|
"十一",
|
|
|
|
|
"十二",
|
|
|
|
|
"十三",
|
|
|
|
|
"十四",
|
|
|
|
|
"十五",
|
|
|
|
|
"十六",
|
|
|
|
|
"十七",
|
|
|
|
|
"十八",
|
|
|
|
|
"十九",
|
|
|
|
|
"廿",
|
|
|
|
|
"卅",
|
|
|
|
|
"卌",
|
|
|
|
|
"皕",
|
|
|
|
|
"零",
|
|
|
|
|
"壹",
|
|
|
|
|
"贰",
|
|
|
|
|
"叁",
|
|
|
|
|
"肆",
|
|
|
|
|
"伍",
|
|
|
|
|
"陆",
|
|
|
|
|
"柒",
|
|
|
|
|
"捌",
|
|
|
|
|
"玖",
|
|
|
|
|
"拾",
|
|
|
|
|
"拾壹",
|
|
|
|
|
"拾贰",
|
|
|
|
|
"拾叁",
|
|
|
|
|
"拾肆",
|
|
|
|
|
"拾伍",
|
|
|
|
|
"拾陆",
|
|
|
|
|
"拾柒",
|
|
|
|
|
"拾捌",
|
2019-08-20 15:36:34 +00:00
|
|
|
|
"拾玖",
|
2019-08-12 08:37:48 +00:00
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
_count_num_words = [
|
|
|
|
|
"一",
|
|
|
|
|
"二",
|
|
|
|
|
"三",
|
|
|
|
|
"四",
|
|
|
|
|
"五",
|
|
|
|
|
"六",
|
|
|
|
|
"七",
|
|
|
|
|
"八",
|
|
|
|
|
"九",
|
|
|
|
|
"壹",
|
|
|
|
|
"贰",
|
|
|
|
|
"叁",
|
|
|
|
|
"肆",
|
|
|
|
|
"伍",
|
|
|
|
|
"陆",
|
|
|
|
|
"柒",
|
|
|
|
|
"捌",
|
2019-08-20 15:36:34 +00:00
|
|
|
|
"玖",
|
2019-08-12 08:37:48 +00:00
|
|
|
|
]
|
|
|
|
|
|
2019-08-20 15:36:34 +00:00
|
|
|
|
_base_num_words = ["十", "百", "千", "万", "亿", "兆", "拾", "佰", "仟"]
|
2019-08-12 08:37:48 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def like_num(text):
|
|
|
|
|
if text.startswith(("+", "-", "±", "~")):
|
|
|
|
|
text = text[1:]
|
2019-08-20 15:36:34 +00:00
|
|
|
|
text = text.replace(",", "").replace(".", "").replace(",", "").replace("。", "")
|
2019-08-12 08:37:48 +00:00
|
|
|
|
if text.isdigit():
|
|
|
|
|
return True
|
|
|
|
|
if text.count("/") == 1:
|
|
|
|
|
num, denom = text.split("/")
|
|
|
|
|
if num.isdigit() and denom.isdigit():
|
|
|
|
|
return True
|
|
|
|
|
if text in _single_num_words:
|
|
|
|
|
return True
|
2019-08-20 15:36:34 +00:00
|
|
|
|
# fmt: off
|
2019-08-12 08:37:48 +00:00
|
|
|
|
if re.match('^((' + '|'.join(_count_num_words) + '){1}'
|
|
|
|
|
+ '(' + '|'.join(_base_num_words) + '){1})+'
|
|
|
|
|
+ '(' + '|'.join(_count_num_words) + ')?$', text):
|
|
|
|
|
return True
|
2019-08-20 15:36:34 +00:00
|
|
|
|
# fmt: on
|
2019-08-12 08:37:48 +00:00
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
LEX_ATTRS = {LIKE_NUM: like_num}
|