#coding=utf-8
chs_arabic_map = {u'零':0, u'一':1, u'二':2, u'三':3, u'四':4,
u'五':5, u'六':6, u'七':7, u'八':8, u'九':9,
u'十':10, u'百':100, u'千':10 ** 3, u'萬':10 ** 4,
u'〇':0, u'壹':1, u'貳':2, u'叄':3, u'肆':4,
u'伍':5, u'陸':6, u'柒':7, u'捌':8, u'玖':9,
u'拾':10, u'佰':100, u'仟':10 ** 3, u'萬':10 ** 4,
u'億':10 ** 8, u'億':10 ** 8, u'幺': 1,
u'0':0, u'1':1, u'2':2, u'3':3, u'4':4,
u'5':5, u'6':6, u'7':7, u'8':8, u'9':9}
def convertChineseDigitsToArabic (chinese_digits, encoding="utf-8"):
if isinstance (chinese_digits, str):
chinese_digits = chinese_digits.decode (encoding)
print chinese_digits,len(chinese_digits)
result = 0
tmp = 0
hnd_mln = 0
for count in range(len(chinese_digits)):
curr_char = chinese_digits[count]
curr_digit = chs_arabic_map.get(curr_char, None)
# meet 「億」 or 「億」
if curr_digit == 10 ** 8:
result = result + tmp
result = result * curr_digit
# get result before 「億」 and store it into hnd_mln
# reset `result`
hnd_mln = hnd_mln * 10 ** 8 + result
result = 0
tmp = 0
# meet 「萬」 or 「萬」
elif curr_digit == 10 ** 4:
result = result + tmp
result = result * curr_digit
tmp = 0
# meet 「十」, 「百」, 「千」 or their traditional version
elif curr_digit >= 10:
tmp = 1 if tmp == 0 else tmp
result = result + curr_digit * tmp
tmp = 0
# meet single digit
elif curr_digit is not None:
# tmp=curr_digit also work here
# in the case like 零二十
tmp = tmp * 10 + curr_digit
#
#print tmp
else:
return result
result = result + tmp
result = result + hnd_mln
return result
print convertChineseDigitsToArabic("十一萬三千六百二十一")
version 2:
#coding=utf-8
dic = {u'零':0, u'一':1, u'二':2, u'三':3, u'四':4,
u'五':5, u'六':6, u'七':7, u'八':8, u'九':9,
u'十':10, u'百':100, u'千':10 ** 3, u'萬':10 ** 4,
u'〇':0, u'壹':1, u'貳':2, u'叄':3, u'肆':4,
u'伍':5, u'陸':6, u'柒':7, u'捌':8, u'玖':9,
u'拾':10, u'佰':100, u'仟':10 ** 3, u'萬':10 ** 4,
u'億':10 ** 8, u'億':10 ** 8, u'幺': 1,
u'0':0, u'1':1, u'2':2, u'3':3, u'4':4,
u'5':5, u'6':6, u'7':7, u'8':8, u'9':9}
def convert(s,encoding="utf-8"):
s=s.decode(encoding)
ret=0
temp=0
t=0
for c in s:
n=dic[c]
if n==10**8:
temp+=t
ret+=temp
ret=ret*n
temp=0
t=0
elif n==10**4:
temp+=t
temp=temp*n
t=0
elif n>=10:
temp+=t*n
t=0
else:
if n==0:
t=1
else:
t=n
ret+=temp+t
return ret
print convert("三萬零二十一億億九千三百萬三千零二十一")