#! /user/bin/env python
#encoding=utf-8
__author__ = 'chw'
import re
# from urllib import parse, request
import cookielib
import urllib2,urllib
from PIL import Image
import time
import json
# 建立LWPCookieJar實例,可以存Set-Cookie3類型的文件。
# 而MozillaCookieJar類是存爲'/.txt'格式的文件
cookie = cookielib.MozillaCookieJar('cookie.txt')
# 若本地有cookie則不用再post數據了
try:
cookie.load(ignore_discard=True)
except IOError:
print('Cookie未加載!')
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36',
"Host": "www.zhihu.com",
"Referer": "https://www.zhihu.com/",
}
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie))
# 給openner添加headers, addheaders方法接受元組而非字典
opener.addheaders = [(key, value) for key, value in headers.items()]
def get_xsrf():
"""
獲取參數_xsrf
"""
response = opener.open('https://www.zhihu.com')
html = response.read().decode('utf-8')
get_xsrf_pattern = re.compile(r'<input type="hidden" name="_xsrf" value="(.*?)"')
_xsrf = re.findall(get_xsrf_pattern, html)[0]
return _xsrf
def get_captcha():
"""
獲取驗證碼本地顯示
返回你輸入的驗證碼
"""
t = str(int(time.time() * 1000))
captcha_url = 'http://www.zhihu.com/captcha.gif?r=' + t + "&type=login"
# 獲取驗證碼也用同一個opener
image_data = opener.open(captcha_url).read()
with open('cptcha.gif', 'wb') as f:
f.write(image_data)
im = Image.open('cptcha.gif')
im.show()
captcha = raw_input('本次登錄需要輸入驗證碼: ')
return captcha
def login(username, password):
"""
輸入自己的賬號密碼,模擬登錄知乎
"""
# 檢測到11位數字則是手機登錄
if re.match(r'\d{11}$', account):
url = 'http://www.zhihu.com/login/phone_num'
data = {'_xsrf': get_xsrf(),
'password': password,
'remember_me': 'true',
'phone_num': username
}
else:
url = 'https://www.zhihu.com/login/email'
data = {'_xsrf': get_xsrf(),
'password': password,
'remember_me': 'true',
'email': username
}
# 若不用驗證碼,直接登錄
post_data = urllib.urlencode(data).encode('utf-8')
r = opener.open(url, post_data)
result = r.read().decode('utf-8')
# 打印返回的響應,r = 1代表響應失敗,msg裏是失敗的原因
# 要用驗證碼,post後登錄
if (json.loads(result))["r"] == 1:
data['captcha'] = get_captcha()
post_data = urllib.urlencode(data).encode('utf-8')
r = opener.open(url, post_data)
result = r.read().decode('utf-8')
print((json.loads(result))['msg'])
# 保存cookie到本地
cookie.save(ignore_discard=True, ignore_expires=True)
def isLogin():
# 通過查看用戶個人信息來判斷是否已經登錄
url = 'https://www.zhihu.com/settings/profile'
# 獲得真實網址,可能重定向了
actual_url = opener.open(url).geturl()
if actual_url == 'https://www.zhihu.com/settings/profile':
return True
else:
return False
if __name__ == '__main__':
if isLogin():
print('您已經登錄')
else:
account = raw_input('輸入賬號:')
secret = raw_input('輸入密碼:')
login(account, secret)
利用cook訪問其他頁面
#! /user/bin/env python
#encoding=utf-8
__author__ = 'chw'
import cookielib,urllib2
# 創建一個MozillaCookieJar對象
cookie = cookielib.MozillaCookieJar()
# 從文件中的讀取cookie內容到變量
cookie.load('cookie.txt', ignore_discard=True, ignore_expires=True)
# 打印cookie內容,證明獲取cookie成功
# for item in cookie:
# print 'name:' + item.name + '-value:' + item.value
# 利用獲取到的cookie創建一個opener
handler = urllib2.HTTPCookieProcessor(cookie)
opener = urllib2.build_opener(handler)
opener.addheaders = [
('User-Agent',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 '
'(KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36'),
('Referer', 'https://www.zhihu.com/'),
( "Host","www.zhihu.com")
]
#創建請求的request
# req = urllib2.Request("http://my.csdn.net/")
res = opener.open('https://www.zhihu.com/people/lao-chang-58/activities')
print res.read()
參考:http://www.jianshu.com/p/3debfb110ad9
http://www.tuicool.com/articles/UJfaM3A