from urllib import parse as p
import urllib.request
def use_urlparse(url):
# 1、urlparse(url=,scheme=,allow_fragments=) url識別和分段
# scheme 當url不含有協議時生效
result = p.urlparse(url=url)
print("協議:"+result[0])
print("域名:"+result[1])
print("訪問路徑:"+result[2])
print("參數:"+result[3])
print("查詢條件:"+result[4])
# 2、urlunparse() url合成
data=[]
for i in range(len(result)):
data.append(result[i])
print(p.urlunparse(data))
def use_urlsplit(url):
# 3、urlsplit() 與urlparse類似將param和path和並了
data_url = p.urlsplit(url=url)
print(data_url)
# 4、urlunsplit() 與urlunparse()類似
print(p.urlunsplit(data_url))
# 5、urljoin 鏈接合成 第一個參數爲基礎鏈接,第二個參數爲基礎鏈接
def use_urljoin():
# 分析基礎鏈接,並對新連接缺失部分盡心補充
print(p.urljoin('http://www.baidu.com', 'FAQ.html '))
print(p.urljoin('http://www.baidu.com ', 'https://cuiqingcai.com/FAQ.html'))
print(p.urljoin ('http://www.baidu.com/about.html', 'https://www.cuiqingcai.com/FAQ.html'))
print(p.urljoin('http://www.baidu.com/about.html','https://cuiqingcai.com/FAQ.html?question=2'))
# 6、urlencode 字典轉請求參數
def use_urlopen_data():
url = "http://www.baidu.com"
# urlopen data 參數
data={
'wd':'csdn'
}
# 格式化參數
# 將參數字典轉化爲字符串
form_data = p.urlencode(data).encode()
response = urllib.request.urlopen(url=url,data=form_data)
print(response.read())
# 7、將請求參數化成字典或元組
def use_parse_qs_qsl(query):
# 返回字典
print(p.parse_qs(query))
# 返回元組
print(p.parse_qsl(query))
# 8、quote 中文參數編碼與解碼
def use_quote(word):
# URL編碼
url = 'https://www.baidu.com/s?wd='+p.quote(word)
print(url)
# URL解碼
print(p.unquote(url))
if __name__ == '__main__':
url = 'https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=baidu&wd=%E6%83%85%E6%AD%8C&oq=yue&rsv_pq=dacbcb5f00043218&rsv_t=fae8tkAseCzI6Y%2FJ3EvIm%2BG4Zy1%2BnWO3oZ%2BaCJXmJwazl7yBr48QuZswiF4&rqlang=cn&rsv_enter=1&rsv_dl=tb&inputT=1744&rsv_sug3=11&rsv_sug1=6&rsv_sug7=100&rsv_sug2=0&rsv_sug4=1744'
query = 'ie=utf-8&f=8&rsv_bp=1&rsv_idx=1&tn=baidu&wd=%E6%83%85%E6%AD%8C&oq=yue&rsv_pq=dacbcb5f00043218&rsv_t=fae8tkAseCzI6Y%2FJ3EvIm%2BG4Zy1%2BnWO3oZ%2BaCJXmJwazl7yBr48QuZswiF4&rqlang=cn&rsv_enter=1&rsv_dl=tb&inputT=1744&rsv_sug3=11&rsv_sug1=6&rsv_sug7=100&rsv_sug2=0&rsv_sug4=1744'
use_urlparse(url)
use_urlsplit(url)
use_urljoin()
use_urlopen_data()
use_parse_qs_qsl(query)
use_quote('魏振東')
爬蟲請求urllib. parse模塊你知多少
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.