python3發新浪微博
創建微博應用
登陸http://open.weibo.com/apps
創建微博應用,獲取App Key
和App Secret
,填寫OAuth2.0授權回調頁,如果不知道是什麼就寫微博默認的https://api.weibo.com/oauth2/default.html
在 應用信息–>高級信息 中,授權回調頁和取消授權回調頁都填一樣的。
這個網上很多。
獲取微博的python接口
微博官網提供的python接口是python2.7的,經網友改寫成py3,這裏貼出代碼。
出處:http://www.cnblogs.com/txw1958/archive/2012/08/13/weibo-oauth2-python3.html
#!/usr/bin/env python
# -*- coding: utf-8 -*-
__version__ = '1.04'
__author__ = 'Liao Xuefeng ([email protected])'
__publish__ = 'http://www.cnblogs.com/txw1958/'
'''
Python3 client SDK for sina weibo API using OAuth 2.
'''
try:
import json
except ImportError:
import simplejson as json
import time
import urllib.request
import logging
def _obj_hook(pairs):
'''
convert json object to python object.
'''
o = JsonObject()
for k, v in pairs.items():
o[str(k)] = v
return o
class APIError(Exception):
'''
raise APIError if got failed json message.
'''
def __init__(self, error_code, error, request):
self.error_code = error_code
self.error = error
self.request = request
Exception.__init__(self, error)
def __str__(self):
return 'APIError: %s: %s, request: %s' % (self.error_code, self.error, self.request)
class JsonObject(dict):
'''
general json object that can bind any fields but also act as a dict.
'''
def __getattr__(self, attr):
return self[attr]
def __setattr__(self, attr, value):
self[attr] = value
def _encode_params(**kw):
'''
Encode parameters.
'''
args = []
for k, v in kw.items():
qv = v.encode('utf-8') if isinstance(v, str) else str(v)
args.append('%s=%s' % (k, urllib.parse.quote(qv)))
return '&'.join(args)
def _encode_multipart(**kw):
'''
Build a multipart/form-data body with generated random boundary.
'''
boundary = '----------%s' % hex(int(time.time() * 1000))
data = []
for k, v in kw.items():
data.append('--%s' % boundary)
if hasattr(v, 'read'):
filename = getattr(v, 'name', '')
n = filename.rfind('.')
ext = filename[n:].lower() if n != (-1) else ""
content = v.read()
content = content.decode('ISO-8859-1')
data.append('Content-Disposition: form-data; name="%s"; filename="hidden"' % k)
data.append('Content-Length: %d' % len(content))
data.append('Content-Type: %s\r\n' % _guess_content_type(ext))
data.append(content)
else:
data.append('Content-Disposition: form-data; name="%s"\r\n' % k)
data.append(v if isinstance(v, str) else v.decode('utf-8'))
data.append('--%s--\r\n' % boundary)
return '\r\n'.join(data), boundary
_CONTENT_TYPES = { '.png': 'image/png', '.gif': 'image/gif', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.jpe': 'image/jpeg' }
def _guess_content_type(ext):
return _CONTENT_TYPES.get(ext, 'application/octet-stream')
_HTTP_GET = 0
_HTTP_POST = 1
_HTTP_UPLOAD = 2
def _http_get(url, authorization=None, **kw):
logging.info('GET %s' % url)
return _http_call(url, _HTTP_GET, authorization, **kw)
def _http_post(url, authorization=None, **kw):
logging.info('POST %s' % url)
return _http_call(url, _HTTP_POST, authorization, **kw)
def _http_upload(url, authorization=None, **kw):
logging.info('MULTIPART POST %s' % url)
return _http_call(url, _HTTP_UPLOAD, authorization, **kw)
def _http_call(url, method, authorization, **kw):
'''
send an http request and expect to return a json object if no error.
'''
params = None
boundary = None
if method==_HTTP_UPLOAD:
params, boundary = _encode_multipart(**kw)
else:
params = _encode_params(**kw)
http_url = '%s?%s' % (url, params) if method==_HTTP_GET else url
http_body = None if method==_HTTP_GET else params.encode(encoding='utf-8')
req = urllib.request.Request(http_url, data=http_body)
if authorization:
req.add_header('Authorization', 'OAuth2 %s' % authorization)
if boundary:
req.add_header('Content-Type', 'multipart/form-data; boundary=%s' % boundary)
resp = urllib.request.urlopen(req)
body = resp.read().decode("utf-8")
r = json.loads(body, object_hook=_obj_hook)
if 'error_code' in r:
raise APIError(r.error_code, r['error_code'], r['request'])
return r
class HttpObject(object):
def __init__(self, client, method):
self.client = client
self.method = method
def __getattr__(self, attr):
def wrap(**kw):
if self.client.is_expires():
raise APIError('21327', 'expired_token', attr)
return _http_call('%s%s.json' % (self.client.api_url, attr.replace('__', '/')), self.method, self.client.access_token, **kw)
return wrap
class APIClient(object):
'''
API client using synchronized invocation.
'''
def __init__(self, app_key, app_secret, redirect_uri=None, response_type='code', domain='api.weibo.com', version='2'):
self.client_id = app_key
self.client_secret = app_secret
self.redirect_uri = redirect_uri
self.response_type = response_type
self.auth_url = 'https://%s/oauth2/' % domain
self.api_url = 'https://%s/%s/' % (domain, version)
self.access_token = None
self.expires = 0.0
self.get = HttpObject(self, _HTTP_GET)
self.post = HttpObject(self, _HTTP_POST)
self.upload = HttpObject(self, _HTTP_UPLOAD)
def set_access_token(self, access_token, expires_in):
self.access_token = str(access_token)
self.expires = float(expires_in)
def get_authorize_url(self, redirect_uri=None, display='default'):
'''
return the authroize url that should be redirect.
'''
redirect = redirect_uri if redirect_uri else self.redirect_uri
if not redirect:
raise APIError('21305', 'Parameter absent: redirect_uri', 'OAuth2 request')
return '%s%s?%s' % (self.auth_url, 'authorize', \
_encode_params(client_id = self.client_id, \
response_type = 'code', \
display = display, \
redirect_uri = redirect))
def request_access_token(self, code, redirect_uri=None):
'''
return access token as object: {"access_token":"your-access-token","expires_in":12345678}, expires_in is standard unix-epoch-time
'''
redirect = redirect_uri if redirect_uri else self.redirect_uri
if not redirect:
raise APIError('21305', 'Parameter absent: redirect_uri', 'OAuth2 request')
r = _http_post('%s%s' % (self.auth_url, 'access_token'), \
client_id = self.client_id, \
client_secret = self.client_secret, \
redirect_uri = redirect, \
code = code, grant_type = 'authorization_code')
r.expires_in += int(time.time())
return r
def is_expires(self):
return not self.access_token or time.time() > self.expires
def __getattr__(self, attr):
return getattr(self.get, attr)
def main():
try:
#step 1 定義 app key,app secret,回調地址:
APP_KEY = "12345678912"
APP_SECRET = "7be6f636faf7b17d048888888888888888888"
CALLBACK_URL = 'https://api.weibo.com/oauth2/default.html'
#step 2 引導用戶到授權地址
client = APIClient(app_key=APP_KEY, app_secret=APP_SECRET, redirect_uri=CALLBACK_URL)
print(client.get_authorize_url())
#step 3 換取Access Token
# 將上面的地址複製到瀏覽器中運行,輸入賬號密碼授權,新的網址裏面就有code
r = client.request_access_token(input("Input code:")) #輸入授權地址中獲得的CODE
client.set_access_token(r.access_token, r.expires_in)
#
# 此時獲取到access_token和expires_in應該保存下來,因爲有一個有效期,如果短時間需要多次發微博,可以重複使用,而不需要每次獲取。
#
#step 4 使用獲得的OAuth2.0 Access Token調用API
print(client.get.account__get_uid())
print(client.post.statuses__update(status='測試Python3 + OAuth 2.0發微博 ' + str(time.time())))
#print(client.upload.statuses__upload(status='測試Python3 OAuth 2.0帶圖片發微博 ' + str(time.time()), pic=open('test.png', 'rb')))
except Exception as pyOauth2Error:
print(pyOauth2Error)
if __name__ == '__main__':
main()
selenium操作瀏覽器獲取code
這裏面有個不好的地方是,每次發微博都要將網址複製出來用瀏覽器打開才能得到code,豈不是很麻煩,還不如直接用瀏覽器發呢,現在的瀏覽器還能保存密碼自動登陸呢。
順便提一句,用selenium操作瀏覽器真是就是跟實際你打開瀏覽器上網的操作是一樣的,根本不用操心反爬蟲問題,還可以獲取cookie,傳給其他模塊如urllib的操作,同樣可以獲取網頁源碼進而爬取數據
這裏介紹用selenium操作瀏覽器自動登陸獲取code的方法。
方法中使用的是chrome瀏覽器最新版本的內核是55,需要下載對應的瀏覽器驅動,版本是2.27,並將驅動程序所在路徑添加到環境變量中。
selenium的chrome驅動下載地址:http://chromedriver.storage.googleapis.com/index.html
chrome內核版本與驅動對應關係參考:http://blog.csdn.net/huilan_same/article/details/51896672
需要注意的是,每操作一步記得等一等,time.sleep()一下,如果你不能瀏覽器加載完,會看不到結果的。
from selenium import webdriver
from selenium.webdriver.common.keys import Keys #引入keys類操作
import time
# 打開chrome瀏覽器
browser = webdriver.Chrome()
# browser.implicitly_wait(30) # 隱性等待,最長等30秒
time.sleep(30)
# 打開授權頁面
# 這個頁面是上一步 weibo.APIClient.get_authorize_url() 獲取得到的
url=r'https://api.weibo.com/oauth2/authorize?display=default&client_id=88888888&redirect_uri=https%3A//api.weibo.com/oauth2/default.html&response_type=code'
browser.get(url)
# 輸入賬號密碼,然後點擊登陸按鈕
# 這一步需要查看網頁源碼才知道爲什麼是這麼寫
browser.find_element_by_id('userId').send_keys('your_user_name')
time.sleep(1)
browser.find_element_by_id('passwd').send_keys('your_password')
time.sleep(3)
browser.find_element_by_class_name('WB_btn_login').send_keys(Keys.ENTER)
time.sleep(3)
# 然後會自動跳轉,獲取url,從url中提取code
# 用這個 code 就可以在 weibo.APIClient 中登陸了
code_url = browser.current_url
code = code_url.split('code=')[-1]
# 獲取網頁源碼
browser.page_source
# 獲取cookie
browser.get_cookies()
模擬鼠標鍵盤操作獲取url
我發現,360急速瀏覽器保存賬號密碼後,下次直接輸入url就會自動跳轉到code的頁面,根本不需要輸入賬號密碼,省心很多。這裏通過模擬鼠標操作,鍵盤按鍵來獲取url。
原理是很簡單,就是打開瀏覽器輸入網址,將鼠標移動到指定的像素點位置,然後右鍵,F鍵,複製網址,最後通過獲取剪貼板內容得到url。
import webbrowser # 打開瀏覽器
from pymouse import PyMouse # 模擬鼠標
from pykeyboard import PyKeyboard # 模擬鍵盤
import time
import win32clipboard, win32con # 剪切板
# 在瀏覽器中打開網址
url=r'https://api.weibo.com/oauth2/authorize?display=default&client_id=88888888&redirect_uri=https%3A//api.weibo.com/oauth2/default.html&response_type=code'
webbrowser.open_new_tab(url)
time.sleep(10)
# 實例化鼠標鍵盤
m = PyMouse()
k = PyKeyboard()
x_dim, y_dim = m.screen_size() # 屏幕分辨率
m.position() # 鼠標位置,可以通過這個函數獲取網址的像素點位置
# 複製網址的方法是,右鍵網址,然後F鍵,可以在瀏覽器中手動操作一下
# 移動到網址欄
time.sleep(1)
m.click(int(x_dim*0.5), int(y_dim*0.045), button=2, n=1) # 移動到指定位置,然後右鍵單擊
time.sleep(1)
k.press_key('F') # 按下F鍵
time.sleep(1)
k.release_key('F') # 釋放F鍵
time.sleep(1)
# 關閉瀏覽器
m.click(int(x_dim * 0.985), int(y_dim * 0.015), button=1, n=1)
# alt+tab組合鍵
# k.press_key(k.alt_key)
# k.tap_key(k.tab_key)
# k.release_key(k.alt_key)
# 獲取剪切板內容
win32clipboard.OpenClipboard()
time.sleep(1)
new_url = win32clipboard.GetClipboardData(win32con.CF_TEXT)
time.sleep(1)
win32clipboard.CloseClipboard()
new_url = new_url.decode('gbk')
# 獲取code
code = new_url.split('code=')[-1]
總結:
這種每次發微博都需要獲取code的方法,真的太笨太繁瑣了,只適合自娛自樂罷了。真正商業環境中肯定是用c或者java的接口去做。
selenium很強大,還能登陸微博發微博呢,但是有一種情況不能處理,就是輸入驗證碼啦。
參考
微博的python3接口
http://www.cnblogs.com/txw1958/archive/2012/08/13/weibo-oauth2-python3.html
微博接口的詳細使用方法
http://blog.csdn.net/dongtingzhizi/article/details/9098527
selenium使用方法
http://blog.163.com/ly676830315@126/blog/static/1017337222013102310617946/
http://www.dataguru.cn/thread-477510-1-1.html
https://my.oschina.net/yangyanxing/blog/280871?p=1
http://www.aichengxu.com/view/11949
http://blog.csdn.net/five3/article/details/50013159
chrome內核版本和驅動版本對應參考
http://blog.csdn.net/huilan_same/article/details/51896672
chrome驅動下載
http://chromedriver.storage.googleapis.com/index.html
模擬鼠標鍵盤操作
http://blog.csdn.net/shij19/article/details/53046048