Python基礎知識(六)--字符串

#字符串  
 
#字符串是用固定的str數據類型表示的，用來存放Unicode字符序列  
#str數據類型可以用來創建一個字符串對象，參數爲空時返回一個空字符串  
a = str()  
print(a)                        #  
a = str("abcdef")  
print(a)                        #abcdef  
#str()函數可以用來進行類型轉換  
a = str(123)  
print(a)                        #123  
#字符串是使用引號創建的，可以使用雙引號，也可以使用單引號，  
#字符串兩端所用引號必須相同  
#還可以使用三引號包含的字符串，這是Python對兩端都使用三個引號的字符串的叫法  
text = """A triple quoted string like this can include 'quotes' and  
"quotes" without formality. We can also escape newlines \  
so this particular string is actually only two lines long.""" 
#如果字符串中使用的引號與包含字符串所用引號不同時，  
#可以直接使用，如果相同時，需要進行轉義  
a = "Single 'quotes' are fine; \"doubles\" must be escaped." 
b ='single \'quotes\' must be escaped; "doubles" are fine.' 
 
#在三引號內可以直接使用換行，通過\n可以在任何字符串中包含換行

#Python字符串轉義  
\newline            #忽略換行?  
\\                  #反斜槓  
\'                  #單引號  
\"                  #雙引號  
\a                  #ASCII蜂鳴 (BEL)  
\b                  #ASCII退格(BS)  
\f                  #ASCII走紙(FF)  
\n                  #ASCII換行(LF)  
\n{name}            #給定名稱的Unicode字符  
\ooo                #給定八進制的字符  
\r                  #ASCII回國符(CR)  
\t                  #ASCII製表符(TAB)  
\uhhhh              #給定16位十六進制的Unicode字符  
\Uhhhhhhhh          #給定32位十六進制的Unicode字符  
\v                  #ASCII垂直指標(VT)  
\xhh                #給定8位十六進制的Unicode字符

#在使用正則表達式的時候，由於需要使用大量字面意義反斜槓，  
#由於每個反斜槓都需要進行轉義處理，從而造成了不便：  
import re  
phone1 = re.compile("^((?:[(}\\d+[)])?\\s*\\d+(?:-\\d+)?)$")  
#解決的方法是使用原始字符串  
#這種引號或三引號包含的字符串的第一個引號由r引導  
phone2 = re.compile(r"((?:[(}\d+[)])?\s*\d+(?:-\d+)?)$")  
 
#如果有一個長字符串跨越了兩行或更多行，但不使用三引號包含，有兩種方法：  
t = "This is not the best way to join two long strings " + \  
    "together since it relies on ugly newline escaping" 
      
s = ("this is the nice way to join two long strings" 
     "together; it relies on string literal concatenation.")  
#第二種情況，用圓括號將其包含在一起，構成一個單獨的表達式，  
#如果不使用圓括號就只會對第一個字符串賦值，  
#第二個字符串會引起IndentationError異常  
 
#.py文件默認使用UTF-8 Unicode編碼，因此可以寫入任何Unicode字符  
#(這點我就遇到過問題，出現SyntaxError: Non-UTF-8 code)難道是Eclipse搞的鬼?)  
#(改變文件編碼可以解決這個問題)  
#(但IDLE支持倒是真的)  
euros = "€\N{euro sign}\u20AC\U000020AC" 
                          #Unicode字符非大小寫敏感  
print(euros)              #€€€€  
#而且連標誌符也可以  
姓名 = "張小三" 
print(姓名)               #張小三  
#也就是說支持中文變量名的，雖然這樣用的人很少，但我倒是覺得以後可以這麼試試了  
#如果想知道字符串中某個字符的Unicode字元，可以用內置的ord()函數  
print(ord(euros[0]))      #8364  
print(hex(ord(euros[0]))) #0x20ac  
#同樣，也可以用表示有效字元的任意整數轉換成Unicode字符  
#這需要使用內置chr()函數  
s = "anarchists are " + chr(8734) + chr(0x23B7)  
print(s)                        #anarchists are ∞⎷  
print(ascii(s))                 #'anarchists are \u221e\u23b7'

#比較字符串  
#字符串支持的比較運算符包括：< <= == != > >=  
#對於使用Unicode的字符串，比較運算存在兩個問題：  
#1.字符可以有三種不同的UTF-8編碼字節的表示方式  
#  解決方法導入unicodedata模塊  
#  以"NFKD"爲第一個參數，調用unicodedata.normalize()  
#  該函數返回的UTF-8編碼字節表示的字符串總是字節序列  
#2.有些字符的排序是特定於某種語言的，而有些字符並不具備有意義的排序位置  
 
 
#字符串分片與步距  
#序列中的單個數據或字符串中的單個字符可以用數據項存取操作符[]來提取  
#索引值從0開始，直到字符串長度-1  
#負索引值最後一個字符爲-1，向前逐漸遞減  
#存取超過索引範圍的字符會產生IndexError  
#分片操作符的語法格式  
#seq[start:]                        #提取star開始到字符串結尾  
#seq[start:end]                     #提取start到end-1的字符串  
#seq[start:end:step]                #提取start到end-1的字符串，每次間隔step  
text = "abcdefghijklmnopqrstuvwxyz" 
print(text[0])                      #a  
print(text[0:])                     #abcdefghijklmnopqrstuvwxyz  
print(text[2:10])                   #cdefghij  
print(text[:20])                    #abcdefghijklmnopqrst  
print(text[::2])                    #acegikmoqsuwy  
print(text[10::2])                  #kmoqsuwy  
print(text[10:26:2])                #kmoqsuwy  
print(text[26::-1])                 #zyxwvutsrqponmlkjihgfedcba  
print(text[::-1])                   #zyxwvutsrqponmlkjihgfedcba  
 
#字符串操作符與方法  
#字符串是固定序列，所有用於固定序列的功能都可用於字符串  
#包括in進行成員關係測試，+=進行追加操作  * 進行復制  *= 進行增加的複製  
subtext = "def" 
print(subtext in text)              #True  
subtext += "ghi" 
print(subtext)                      #defghi  
subtext *= 3 
print(subtext)                      #defghidefghidefghi

#字符串方法  
#--------------------------------------------------------------------  
s.capitalize()          #返回字符串s的副本，並將首字符大寫  
text = "this is a test text" 
print(text.capitalize())            #This is a test text  
#--------------------------------------------------------------------  
s.center(width, char)   #返回一個長度爲width的字符串  
                        #字符串s在返回字符串的中間位置  
                        #其餘部份用char添充，char默認爲空格  
s = "abd" 
print(s.center(20))                 #        abd           
print(s.center(20, "*"))            #********abd*********  
#--------------------------------------------------------------------  
s.count(t, start, end)  #返回在s字符串中，start:end分片中，  
                        #子串t出現的次數  
s = "abcdabcdabcd" 
s.count("bc")                       #3  
s.count("bcda")                     #2  
s.count("bcda", 1, 8)               #1  
#--------------------------------------------------------------------  
s.encode(encoding, err) #返回一個bytes對象用指定編碼格式來表示該字符串  
                        #並根據可選的err處理錯誤  
s = "中國" 
print(s.encode(encoding='utf_8', errors='strict'))  
                                    #b'\xe4\xb8\xad\xe5\x9b\xbd'   
print(s.encode(encoding='GB2312', errors='strict'))  
                                    #b'\xd6\xd0\xb9\xfa'  
print(s.encode(errors='strict'))    #b'\xe4\xb8\xad\xe5\x9b\xbd'  
                                    #默認的encoding是'utf_8'  
#--------------------------------------------------------------------  
s.endswith(x, start, end)   #如果在s或s[start:end]分片中從字符串x或  
                            #元組x中的任意字符串結尾，則返回True，否則返回False  
s = "中華人民共和國" 
x = "國" 
print(s.endswith(x))                #True  
print(s.endswith(x, 2, 5))          #False  
x = ('一', '國')  
print(s.endswith(x))                #True  
#--------------------------------------------------------------------  
s.expandtabs(size)      #返回s的一個副本，其中製表符用8(默認)或size個空格替換  
                        #這個替換不是直接在tab的位置上插入size個空格，而是與前文相關聯計算空格數  
s = "abc\tdef\tghi" 
print(s.expandtabs(4))              #abc def ghi  
print(s.expandtabs(8))              #abc     def     ghi  
print(s.expandtabs())               #abc     def     ghi  
#--------------------------------------------------------------------  
s.find(t, start, end)   #返回t在s或s[start:end]之中的最左位置，如果沒有找到返回-1  
                        #使用s.rfind()可以返回相應的最右位置  
s = "this is a test text" 
print(s.find('is'))                 #2  
print(s.rfind('is'))                #5  
#--------------------------------------------------------------------  
s.format(...)           #格式化字符串，這個在後面詳細解釋  
#--------------------------------------------------------------------  
s.index(t, start, end)  #返回t在s或s[start:end]之中的最左位置，如果沒有找到返回ValueError  
                        #使用s.rindex()可以從最右邊開始搜索  
                        #用法同s.find()                                
#--------------------------------------------------------------------  
s.isalnum()             #如果s非空，並且其中每個字符都是字母數字的就返回True  
s = "abd123" 
print(s.isalnum())                  #True  
s += "_" 
print(s.isalnum())                  #False  
#--------------------------------------------------------------------  
s.isalpha()             #如果s非空，並且其中每個字符都是字母的就返回True  
s = "abd" 
print(s.isalnum())                  #True  
s += "123" 
print(s.isalnum())                  #False  
#--------------------------------------------------------------------  
s.isdecimal()           #如果s非空，並且每個字符都是Unicode的基數爲10的數字就返回True  
s = "1234" 
print(s.isdecimal())                #True  
s = "0x1304" 
print(s.isdecimal())                #False  
#--------------------------------------------------------------------  
s.isdigit()             #如果非空，並且每個字符都是ASCII數字，則返回True  
s = "1234" 
print(s.isdigit())                  #True  
s += "a" 
print(s.isdigit())                  #False  
#--------------------------------------------------------------------  
s.isidentifier()        #如果s非空，並且是一個有效的標識符，則返回True  
s = "abc" 
print(s.isidentifier())             #True  
s = "abc#%^#" 
print(s.isidentifier())             #False  
#--------------------------------------------------------------------  
s.islower()             #如果s有至少一個小寫字符，並且所有小寫字符都是小寫就返回True  
s = "abc" 
print(s.islower())                  #True  
s = "Abc" 
print(s.islower())                  #False  
s = "123" 
print(s.islower())                  #False  
#--------------------------------------------------------------------  
s.isnumeric()           #同s.isdigit()，字符爲Unicode字符  
#--------------------------------------------------------------------  
s.isprintable()         #如果s非空，並且每個字符都是可打印字符，  
                        #包括空格但不包括換行，則返回True  
s = "this is a text" 
print(s.isprintable())              #True  
s = "this is a text\n" 
print(s.isprintable())              #False  
#--------------------------------------------------------------------  
s.isspace()             #如果s非空，並且所有字符都是空白，則返回True  
s = "   " 
print(s.isspace())                  #True  
s = "  1 " 
print(s.isspace())                  #False  
#--------------------------------------------------------------------  
s.istitle()             #如果s是非空的且首字母大寫的字符串就返回True  
s = "This is a test" 
print(s.istitle())                  #False  
s = "This Is A Test" 
print(s.istitle())                  #True  
#--------------------------------------------------------------------  
s.isupper()             #如果s有至少一個可大寫字符且所有可大寫字符均爲大寫則返回True  
                        #可參考s.islower()  
#--------------------------------------------------------------------  
s.join(seq)             #返回序列中所有項連接起來的結果，  
                        #並以s(可以爲空)在每兩項之間分隔  
s = "*" 
seqs = ("this", "is", "a", "test")  
print(s.join(seqs))                 #this*is*a*test  
print(" ".join(seqs))               #this is a test  
print("".join(seqs))                #thisisatest  
print(" ".join(["this", "is", "a", "test"]))  
                                    #this is a test  
#--------------------------------------------------------------------  
s.ljust(width, char)    #返回一個長度爲width的字符串，並以char來添充s左側  
                        #可參考s.center()，s.rjust()爲右添充  
#--------------------------------------------------------------------  
s.lower()               #將s中的字符變爲小寫  
s = "ABC123" 
print(s.lower())                    #abc123  
#--------------------------------------------------------------------  
s.maketrans()           #與s.translate()對應，可以產生一個轉換表  
a = "abcde" 
b = "Hello" 
x = a.maketrans(a, b)  
print(a.translate(x))               #Hello  
                        #貌似可以進行替換，或是小小的加密也不錯  
#--------------------------------------------------------------------  
s.partition(t)          #返回三個字符串的無級，分別是：  
                        #s中在t子串之前的部分  
                        #t  
                        #s中在t子串之後的部分  
                        #如果t不在s中，則返回s與兩個空字符串  
                        #使用s.lpartition(t)可以在s最右邊分區  
s = "My country is China" 
t = "country" 
print(s.partition(t))               #('My ', 'country', ' is China')  
t = "ABCD" 
print(s.partition(t))               #('My country is China', '', '')  
t = "is" 
print(s.rpartition(t))              #('My country ', 'is', ' China')  
#--------------------------------------------------------------------  
s.replace(t, u, n)      #返回字符串s的一個副本，其中每個或n個t用u替換  
s = "this is a text" 
print(s.replace("is", "Is"))        #thIs Is a text  
print(s.replace("is", "Is", 1))     #thIs is a text  
#--------------------------------------------------------------------  
s.split(t, n)           #返回一個字符串列表，在t處最多分割n次  
                        #如果n沒指定，就儘可能分割多次  
                        #如果t沒指定，就以空白處分割  
                        #s.rsplit(t, n)是從右側開始分割，只有指定n，  
                        #且n小於可分割的最大次數時纔有效  
s = "this is a test text" 
print(s.split("s"))                 #['thi', ' i', ' a te', 't text']  
print(s.split('s', 2))              #['thi', ' i', ' a test text']  
print(s.rsplit('s', 2))             #['this i', ' a te', 't text']  
#--------------------------------------------------------------------  
s.splitlines(f)         #返回的行終結符處分割產生的行列表  
                        #並剝離行終結符(除非f爲True)  
 
print(s.splitlines())               #['this', 'is', 'a', 'test', 'text']  
print(s.splitlines(True))           #['this\n', 'is\n', 'a\n', 'test\n', 'text']  
#--------------------------------------------------------------------  
s.startswith(x, start, end)  
                        #如果字符串s或s[start:end]是以字符串x，  
                        #或元組中任一項開始，則返回True，否則返回False  
                        #可參考s.endswith()  
#--------------------------------------------------------------------  
s.strip(chars)          #將字符串開始和結尾處的chars中的字符移除  
                        #chars默認爲空格  
                        #s.lstrip(chars)爲去除字符串開始處的chars  
                        #s.rstrip(chars)爲去除字符串結尾處的chars  
print(" my name is Xiaoming. ".strip())  
                                    #my name is Xiaoming.   
print(" my name is Xiaoming. ".lstrip())  
                                    #my name is Xiaoming.  
print(" my name is Xiaoming. ".rstrip())  
                                    # my name is Xiaoming.  
#--------------------------------------------------------------------  
s.swepcase()            #將字符串中大寫轉換成小寫，小寫轉換成大寫  
s = "This Is A Test Text" 
print(s.swapcase())                 #tHIS iS a tEST tEXT  
#--------------------------------------------------------------------  
s.title()               #將每個單詞的首字母轉成大寫，其它字母轉成小寫  
s ="tHIS iS a tEST tEXT" 
print(s.title())                    #This Is A Test Text  
#--------------------------------------------------------------------  
s.upper()               #將字符全部轉換成大寫，可參考s.lower()  
#--------------------------------------------------------------------  
s.zfill(width)          #返回s的副本，如果s長度小於width則在開始處添加0使之長度爲width  
s = "test" 
print(s.zfill(15))                  #00000000000test  
#--------------------------------------------------------------------

Python基礎知識(六)--字符串

Python基礎知識(五)--數據類型

Python基礎知識(六)--字符串

Python基礎知識(七)--字符串詳解

我的友情鏈接

Python基礎知識（一）

Mac下配置sublime實現LaTeX

https://yachay.unat.edu.pe/blog/index.php?comment_area=format_blog&comment_component=blog&comment_co

linux以太網驅動總結