[Algorithm] 中文數字轉換爲阿拉伯數字

#coding=utf-8
chs_arabic_map = {u'零':0, u'一':1, u'二':2, u'三':3, u'四':4,
        u'五':5, u'六':6, u'七':7, u'八':8, u'九':9,
        u'十':10, u'百':100, u'千':10 ** 3, u'萬':10 ** 4,
        u'〇':0, u'壹':1, u'貳':2, u'叄':3, u'肆':4,
        u'伍':5, u'陸':6, u'柒':7, u'捌':8, u'玖':9,
        u'拾':10, u'佰':100, u'仟':10 ** 3, u'萬':10 ** 4,
        u'億':10 ** 8, u'億':10 ** 8, u'幺': 1,
        u'0':0, u'1':1, u'2':2, u'3':3, u'4':4,
        u'5':5, u'6':6, u'7':7, u'8':8, u'9':9}


def convertChineseDigitsToArabic (chinese_digits, encoding="utf-8"):
    if isinstance (chinese_digits, str):
        chinese_digits = chinese_digits.decode (encoding)

    print chinese_digits,len(chinese_digits)
    result  = 0
    tmp     = 0
    hnd_mln = 0
    for count in range(len(chinese_digits)):
        curr_char  = chinese_digits[count]
        curr_digit = chs_arabic_map.get(curr_char, None)
        # meet 「億」 or 「億」
        if curr_digit == 10 ** 8:
            result  = result + tmp
            result  = result * curr_digit
            # get result before 「億」 and store it into hnd_mln
            # reset `result`
            hnd_mln = hnd_mln * 10 ** 8 + result
            result  = 0
            tmp     = 0
        # meet 「萬」 or 「萬」
        elif curr_digit == 10 ** 4:
            
            result = result + tmp
            result = result * curr_digit
            tmp    = 0
        # meet 「十」, 「百」, 「千」 or their traditional version
        elif curr_digit >= 10:
            tmp    = 1 if tmp == 0 else tmp
            result = result + curr_digit * tmp
            tmp    = 0
        # meet single digit
        elif curr_digit is not None:
        	# tmp=curr_digit  also work here
        	# in the case like 零二十
            tmp = tmp * 10 + curr_digit
            #
            #print tmp

        else:
            return result
    result = result + tmp
    result = result + hnd_mln
    return result


print convertChineseDigitsToArabic("十一萬三千六百二十一")

version 2:

#coding=utf-8
dic = {u'零':0, u'一':1, u'二':2, u'三':3, u'四':4,
        u'五':5, u'六':6, u'七':7, u'八':8, u'九':9,
        u'十':10, u'百':100, u'千':10 ** 3, u'萬':10 ** 4,
        u'〇':0, u'壹':1, u'貳':2, u'叄':3, u'肆':4,
        u'伍':5, u'陸':6, u'柒':7, u'捌':8, u'玖':9,
        u'拾':10, u'佰':100, u'仟':10 ** 3, u'萬':10 ** 4,
        u'億':10 ** 8, u'億':10 ** 8, u'幺': 1,
        u'0':0, u'1':1, u'2':2, u'3':3, u'4':4,
        u'5':5, u'6':6, u'7':7, u'8':8, u'9':9}


def convert(s,encoding="utf-8"):
	s=s.decode(encoding)
	ret=0
	temp=0
	t=0
	for c in s:
		n=dic[c]

		if n==10**8:
			temp+=t
			ret+=temp
			ret=ret*n
			temp=0
			t=0
		elif n==10**4:
			temp+=t
			temp=temp*n
			t=0

		elif n>=10:
			temp+=t*n
			t=0
		else:
			if n==0:
				t=1
			else:
				t=n

	ret+=temp+t

	return ret

print convert("三萬零二十一億億九千三百萬三千零二十一")
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章