字符串處理技巧
拆分字符串
# coding=utf-8
import re
s1 = "a b c d e f g h i j k"
res1 = s1.split()
print(res1)
s2 = 'a;b|c,d-e|f\tg;h,i&j k'
res2 = re.split('[-,;|\t& ]', s2)
print(res2)
endswith() startwith()
# coding=utf-8
import os
path = './data'
files = os.listdir(path)
py = filter(lambda s: s.endswith('.py'), files)
sh_c = filter(lambda x: x.endswith(('.sh', '.c')), files)
print(list(sh_c))
調整字符串順序
2017-10-28 ==》10/28/2017
import re
filename = './data/log'
with open(filename) as f:
for line in f:
#rline = re.sub('(\d{4})-(\d{2})-(\d{2})', r'\2/\3/\1', line[:100])
rline = re.sub('(?P<year>\d{4})-(?P<mouth>\d{2})-(?P<day>\d{2})',
r'\g<mouth>/\g<day>/\g<year>', line[:100])
print(rline)
拼接字符串
# str+str 加號就不多說了,元素多了,他就需要不停的創建釋放對象,開銷太大
# [x for x in something] 叫做“列表解析”,內存開銷很大
# (x for x in something) 叫做“生成器” ,內存佔用小,對於大文件很有效
aa = ['h', 'el', 'lo']
resaa = ''.join(aa)
bb = ['q', 1, '2', 'qwe']
resbb = ''.join(str(x) for x in bb)
print(resaa, resbb)
對齊
person = {
'name': 'xiaoming',
'age': '23',
'sex': 'male',
'good_friends': 'a,b,v',
'hobby': 'socker'
}
w = max(map(len, person.keys()))
# ljust rjust center 左右上對齊
for k, v in person.items():
print(k.ljust(w), ':', v)
# < > ^ 左右上對齊
# for k, v in person.items():
# print(format(k, '<3'), v)
去掉不要的字符串
import re
s = '-----hello-----world-----'
print(s.lstrip('='))
print(s.rstrip('='))
print(s.strip('='))
s = '123-qwe'
print(s[:3]+s[4:])
print(s.replace('-', ''))
s2 = 'a;b|c,d-e|f\tg;h,i&j k'
s2 = re.sub('[-,;|\t& ]', '', s2)
print(s2)