Skip to content

Commit

Permalink
v0.3.11 cn2an 支持 smart 模式
Browse files Browse the repository at this point in the history
  • Loading branch information
Ailln committed Dec 15, 2019
1 parent 5c38146 commit 37667e1
Show file tree
Hide file tree
Showing 4 changed files with 121 additions and 31 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

- 支持 `中文数字` => `阿拉伯数字`
- 支持 `大写中文数字` => `阿拉伯数字`
- 支持 `中文数字和阿拉伯数字` => `阿拉伯数字`(开发中,暂不能使用)
- 支持 `中文数字和阿拉伯数字` => `阿拉伯数字`

### 1.2 `阿拉伯数字` => `中文数字`

Expand Down Expand Up @@ -78,7 +78,7 @@ output = cn2an.cn2an("一二三", "normal")
print(output)
# 123

# 在 smart 模式下,还可以将混合描述的 1百23 进行转化 (开发中,暂不能使用)
# 在 smart 模式下,还可以将混合描述的 1百23 进行转化
output = cn2an.cn2an("1百23", "smart")
print(output)
# 123
Expand Down Expand Up @@ -164,6 +164,7 @@ bash local_test.sh

### 6.3 性能测试

- 测试版本:0.3.10
- 测试设备:`2.3 GHz 双核Intel Core i5 MacBook Pro`
- 测试代码:[performance.py](https://github.com/Ailln/cn2an/tree/master/cn2an/performance.py)
- 测试方法:
Expand Down
89 changes: 61 additions & 28 deletions cn2an/cn2an.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,22 @@
import re

from . import utils
from .an2cn import An2Cn


class Cn2An(object):
def __init__(self):
self.conf = utils.get_default_conf()
self.ac = An2Cn()

def cn2an(self, inputs=None, mode="strict"):
if inputs is not None:
# 检查转换模式是否有效
if mode not in ["strict", "normal"]:
raise ValueError("mode 仅支持 strict normal 两种!")

negative = 1
if inputs[0] == "负":
negative = -1
inputs = inputs[1:]
if mode not in ["strict", "normal", "smart"]:
raise ValueError("mode 仅支持 strict normal smart 三种!")

# 检查输入数据是否有效
data_type = self.check_input_data_is_valid(inputs, mode)
negative, inputs, data_type = self.check_input_data_is_valid(inputs, mode)

if data_type == "integer":
# 不包含小数的输入
Expand All @@ -35,16 +32,52 @@ def cn2an(self, inputs=None, mode="strict"):
else:
raise ValueError("输入数据为空!")

return negative*output
return negative * output

def check_input_data_is_valid(self, check_data, mode):
# 检查输入数据是否在规定的字典中
all_check_keys = list(self.conf["number_unit"].keys())
all_check_keys.append("点")

for data in check_data:
if data not in all_check_keys:
raise ValueError(f"输入的数据不在转化范围内:{data}!")
# 正负号
nag = 1

if mode == "strict":
strict_check_key = self.conf["number_low"] + self.conf["number_up"] + ["十", "拾", "百", "佰", "千", "仟", "万",
"亿", "兆", "点", "负"]
for data in check_data:
if data not in strict_check_key:
raise ValueError(f"当前为{mode}模式,输入的数据不在转化范围内:{data}!")

# 确定正负号
if check_data[0] == "负":
check_data = check_data[1:]
nag = -1

elif mode == "normal":
normal_check_key = list(self.conf["number_unit"].keys()) + ["点", "负"]
for data in check_data:
if data not in normal_check_key:
raise ValueError(f"当前为{mode}模式,输入的数据不在转化范围内:{data}!")

# 确定正负号
if check_data[0] == "负":
check_data = check_data[1:]
nag = -1

elif mode == "smart":
smart_check_key = list(self.conf["number_unit"].keys()) + ["点", "负", "0", "1", "2", "3", "4", "5", "6", "7",
"8", "9", ".", "-"]
for data in check_data:
if data not in smart_check_key:
raise ValueError(f"当前为{mode}模式,输入的数据不在转化范围内:{data}!")

# 确定正负号
if check_data[0] in ["负", "-"]:
check_data = check_data[1:]
nag = -1

def an2cn_sub(matched):
return self.ac.an2cn(matched.group())

check_data = re.sub(r"\d+", an2cn_sub, check_data)
mode = "normal"

if "点" in check_data:
split_data = check_data.split("点")
Expand All @@ -56,7 +89,7 @@ def check_input_data_is_valid(self, check_data, mode):
integer_data = check_data
decimal_data = None

all_num = "".join(set(self.conf["number_low"] + self.conf["number_up"])) + ""
all_num = "".join(set(self.conf["number_low"] + self.conf["number_up"])) + "两幺"
all_unit = "".join(set(self.conf["unit_low"] + self.conf["unit_up"]))

# 整数部分检查
Expand All @@ -76,40 +109,40 @@ def check_input_data_is_valid(self, check_data, mode):
if re_all_num.group() != integer_data:
raise ValueError(f"不符合格式的数据:{integer_data}")
else:
return "all_num"
return nag, check_data, "all_num"
else:
raise ValueError(f"不符合格式的数据:{integer_data}")
else:
if decimal_data:
return "decimal"
return nag, check_data, "decimal"
else:
if check_data[-1] == "点":
if mode == "strict":
raise ValueError(f"不符合格式的数据:{check_data}")
elif mode == "normal":
return "decimal"
return nag, check_data, "decimal"
else:
return "integer"
return nag, check_data, "integer"
else:
if mode == "strict":
raise ValueError(f"不符合格式的数据:{integer_data}")
elif mode == "normal":
if decimal_data:
return "decimal"
return nag, check_data, "decimal"
else:
raise ValueError(f"不符合格式的数据:{integer_data}")
else:
raise ValueError(f"不符合格式的数据:{integer_data}")

def integer_convert(self, integer_data):
all_num = "".join(set(self.conf["number_low"] + self.conf["number_up"])) + ""
all_num = "".join(set(self.conf["number_low"] + self.conf["number_up"])) + "两幺"
# 口语模式 比如:两千三
ptn_speaking_mode = re.compile(f"^[{all_num}][万千百][{all_num}]$")
result = ptn_speaking_mode.search(integer_data)

if result:
high_num = self.conf["number_unit"].get(integer_data[0]) * self.conf["number_unit"].get(integer_data[1])
low_num = self.conf["number_unit"].get(integer_data[2]) * self.conf["number_unit"].get(integer_data[1])/10
low_num = self.conf["number_unit"].get(integer_data[2]) * self.conf["number_unit"].get(integer_data[1]) / 10
output_integer = high_num + low_num
else:
# 核心
Expand Down Expand Up @@ -162,7 +195,7 @@ def decimal_convert(self, decimal_data):
len_decimal_data = 15

output_decimal = 0
for index in range(len(decimal_data)-1, -1, -1):
for index in range(len(decimal_data) - 1, -1, -1):
unit_key = self.conf["number_unit"].get(decimal_data[index])
output_decimal += unit_key * 10 ** -(index + 1)

Expand All @@ -179,15 +212,15 @@ def direct_convert(self, data):
unit_key = self.conf["number_unit"].get(data[index_integer])
output_data += unit_key * 10 ** (point_index - index_integer - 1)

for index_decimal in range(len(data)-1, point_index, -1):
for index_decimal in range(len(data) - 1, point_index, -1):
unit_key = self.conf["number_unit"].get(data[index_decimal])
output_data += unit_key * 10 ** -(index_decimal - point_index)

# 处理精度溢出问题
output_data = round(output_data, len(data) - point_index)
else:
for index in range(len(data)-1, -1, -1):
for index in range(len(data) - 1, -1, -1):
unit_key = self.conf["number_unit"].get(data[index])
output_data += unit_key * 10 ** (len(data)-index-1)
output_data += unit_key * 10 ** (len(data) - index - 1)

return output_data
26 changes: 25 additions & 1 deletion cn2an/cn2an_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ class Cn2anTest(unittest.TestCase):
def setUp(self):
self.strict_data_dict = {
"一": 1,
"": 2,
"": 2,
"十": 10,
"十一": 11,
"一十一": 11,
Expand Down Expand Up @@ -112,10 +112,22 @@ def setUp(self):
}
self.normal_data_dict.update(self.strict_data_dict)

self.smart_data_dict = {
"100万": 1000000,
"100万三千": 1003000,
"200亿零四千230": 20000004230
}
self.smart_data_dict.update(self.normal_data_dict)

self.error_smart_datas = [
"10.1万"
]

self.error_normal_datas = [
"零点点",
"零点零大"
]
self.error_normal_datas.extend(self.error_smart_datas)

self.error_strict_datas = [
"一一",
Expand All @@ -137,6 +149,10 @@ def test_cn2an(self):
self.assertEqual(self.ca.cn2an(normal_item, "normal"),
self.normal_data_dict[normal_item])

for smart_item in self.smart_data_dict.keys():
self.assertEqual(self.ca.cn2an(smart_item, "smart"),
self.smart_data_dict[smart_item])

for error_strict_item in self.error_strict_datas:
try:
self.ca.cn2an(error_strict_item)
Expand All @@ -153,6 +169,14 @@ def test_cn2an(self):
else:
raise Exception(f'ValueError not raised: {error_normal_item}')

for error_smart_item in self.error_smart_datas:
try:
self.ca.cn2an(error_smart_item)
except ValueError as e:
self.assertEqual(type(e), ValueError)
else:
raise Exception(f'ValueError not raised: {error_smart_item}')


if __name__ == '__main__':
unittest.main()
32 changes: 32 additions & 0 deletions example/cn2an_core1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
number_map = {
"千": 1000,
"百": 100,
"十": 10,
"一": 1,
"二": 2,
"三": 3,
"四": 4,
"五": 5,
"六": 6,
"七": 7,
"八": 8,
"九": 9
}

input_data = "九千八百七十六"
result = 0
num = 0
unit = 1

for index, item in enumerate(input_data):
number = number_map.get(item)
if number < 10:
result = result + num * unit
num = number
else:
unit = number

if index == len(input_data) - 1:
result = result + num

print(result)

0 comments on commit 37667e1

Please sign in to comment.