python3發新浪微博

創建微博應用

登陸http://open.weibo.com/apps創建微博應用，獲取App Key和App Secret，填寫OAuth2.0授權回調頁，如果不知道是什麼就寫微博默認的https://api.weibo.com/oauth2/default.html在應用信息–>高級信息中，授權回調頁和取消授權回調頁都填一樣的。
這個網上很多。

獲取微博的python接口

微博官網提供的python接口是python2.7的，經網友改寫成py3，這裏貼出代碼。
出處：http://www.cnblogs.com/txw1958/archive/2012/08/13/weibo-oauth2-python3.html

#!/usr/bin/env python
# -*- coding: utf-8 -*-

__version__ = '1.04'
__author__ = 'Liao Xuefeng ([email protected])'
__publish__ = 'http://www.cnblogs.com/txw1958/'

'''
Python3 client SDK for sina weibo API using OAuth 2.
'''

try:
    import json
except ImportError:
    import simplejson as json
import time
import urllib.request
import logging

def _obj_hook(pairs):
    '''
    convert json object to python object.
    '''
    o = JsonObject()
    for k, v in pairs.items():
        o[str(k)] = v
    return o

class APIError(Exception):
    '''
    raise APIError if got failed json message.
    '''
    def __init__(self, error_code, error, request):
        self.error_code = error_code
        self.error = error
        self.request = request
        Exception.__init__(self, error)

    def __str__(self):
        return 'APIError: %s: %s, request: %s' % (self.error_code, self.error, self.request)

class JsonObject(dict):
    '''
    general json object that can bind any fields but also act as a dict.
    '''
    def __getattr__(self, attr):
        return self[attr]

    def __setattr__(self, attr, value):
        self[attr] = value

def _encode_params(**kw):
    '''
    Encode parameters.
    '''
    args = []
    for k, v in kw.items():
        qv = v.encode('utf-8') if isinstance(v, str) else str(v)
        args.append('%s=%s' % (k, urllib.parse.quote(qv)))
    return '&'.join(args)

def _encode_multipart(**kw):
    '''
    Build a multipart/form-data body with generated random boundary.
    '''
    boundary = '----------%s' % hex(int(time.time() * 1000))
    data = []
    for k, v in kw.items():
        data.append('--%s' % boundary)
        if hasattr(v, 'read'):
            filename = getattr(v, 'name', '')
            n = filename.rfind('.')
            ext = filename[n:].lower() if n != (-1) else ""
            content = v.read()
            content = content.decode('ISO-8859-1')
            data.append('Content-Disposition: form-data; name="%s"; filename="hidden"' % k)
            data.append('Content-Length: %d' % len(content))
            data.append('Content-Type: %s\r\n' % _guess_content_type(ext))
            data.append(content)
        else:
            data.append('Content-Disposition: form-data; name="%s"\r\n' % k)
            data.append(v if isinstance(v, str) else v.decode('utf-8'))
    data.append('--%s--\r\n' % boundary)
    return '\r\n'.join(data), boundary

_CONTENT_TYPES = { '.png': 'image/png', '.gif': 'image/gif', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.jpe': 'image/jpeg' }

def _guess_content_type(ext):
    return _CONTENT_TYPES.get(ext, 'application/octet-stream')

_HTTP_GET = 0
_HTTP_POST = 1
_HTTP_UPLOAD = 2

def _http_get(url, authorization=None, **kw):
    logging.info('GET %s' % url)
    return _http_call(url, _HTTP_GET, authorization, **kw)

def _http_post(url, authorization=None, **kw):
    logging.info('POST %s' % url)
    return _http_call(url, _HTTP_POST, authorization, **kw)

def _http_upload(url, authorization=None, **kw):
    logging.info('MULTIPART POST %s' % url)
    return _http_call(url, _HTTP_UPLOAD, authorization, **kw)

def _http_call(url, method, authorization, **kw):
    '''
    send an http request and expect to return a json object if no error.
    '''
    params = None
    boundary = None
    if method==_HTTP_UPLOAD:
        params, boundary = _encode_multipart(**kw)
    else:
        params = _encode_params(**kw)
    http_url = '%s?%s' % (url, params) if method==_HTTP_GET else url
    http_body = None if method==_HTTP_GET else params.encode(encoding='utf-8')
    req = urllib.request.Request(http_url, data=http_body)
    if authorization:
        req.add_header('Authorization', 'OAuth2 %s' % authorization)
    if boundary:
        req.add_header('Content-Type', 'multipart/form-data; boundary=%s' % boundary)
    resp = urllib.request.urlopen(req)
    body = resp.read().decode("utf-8")
    r = json.loads(body, object_hook=_obj_hook)
    if 'error_code' in r:
        raise APIError(r.error_code, r['error_code'], r['request'])
    return r

class HttpObject(object):

    def __init__(self, client, method):
        self.client = client
        self.method = method

    def __getattr__(self, attr):
        def wrap(**kw):
            if self.client.is_expires():
                raise APIError('21327', 'expired_token', attr)
            return _http_call('%s%s.json' % (self.client.api_url, attr.replace('__', '/')), self.method, self.client.access_token, **kw)
        return wrap

class APIClient(object):
    '''
    API client using synchronized invocation.
    '''
    def __init__(self, app_key, app_secret, redirect_uri=None, response_type='code', domain='api.weibo.com', version='2'):
        self.client_id = app_key
        self.client_secret = app_secret
        self.redirect_uri = redirect_uri
        self.response_type = response_type
        self.auth_url = 'https://%s/oauth2/' % domain
        self.api_url = 'https://%s/%s/' % (domain, version)
        self.access_token = None
        self.expires = 0.0
        self.get = HttpObject(self, _HTTP_GET)
        self.post = HttpObject(self, _HTTP_POST)
        self.upload = HttpObject(self, _HTTP_UPLOAD)

    def set_access_token(self, access_token, expires_in):
        self.access_token = str(access_token)
        self.expires = float(expires_in)

    def get_authorize_url(self, redirect_uri=None, display='default'):
        '''
        return the authroize url that should be redirect.
        '''
        redirect = redirect_uri if redirect_uri else self.redirect_uri
        if not redirect:
            raise APIError('21305', 'Parameter absent: redirect_uri', 'OAuth2 request')
        return '%s%s?%s' % (self.auth_url, 'authorize', \
                _encode_params(client_id = self.client_id, \
                        response_type = 'code', \
                        display = display, \
                        redirect_uri = redirect))

    def request_access_token(self, code, redirect_uri=None):
        '''
        return access token as object: {"access_token":"your-access-token","expires_in":12345678}, expires_in is standard unix-epoch-time
        '''
        redirect = redirect_uri if redirect_uri else self.redirect_uri
        if not redirect:
            raise APIError('21305', 'Parameter absent: redirect_uri', 'OAuth2 request')

        r = _http_post('%s%s' % (self.auth_url, 'access_token'), \
                client_id = self.client_id, \
                client_secret = self.client_secret, \
                redirect_uri = redirect, \
                code = code, grant_type = 'authorization_code')

        r.expires_in += int(time.time())
        return r

    def is_expires(self):
        return not self.access_token or time.time() > self.expires

    def __getattr__(self, attr):
        return getattr(self.get, attr)


def main():
    try:
        #step 1 定義 app key，app secret，回調地址：
        APP_KEY = "12345678912"
        APP_SECRET = "7be6f636faf7b17d048888888888888888888"
        CALLBACK_URL = 'https://api.weibo.com/oauth2/default.html'
        #step 2 引導用戶到授權地址
        client = APIClient(app_key=APP_KEY, app_secret=APP_SECRET, redirect_uri=CALLBACK_URL)
        print(client.get_authorize_url())
        #step 3 換取Access Token
        # 將上面的地址複製到瀏覽器中運行，輸入賬號密碼授權，新的網址裏面就有code
        r = client.request_access_token(input("Input code:"))  #輸入授權地址中獲得的CODE
        client.set_access_token(r.access_token, r.expires_in)
        #
        # 此時獲取到access_token和expires_in應該保存下來，因爲有一個有效期，如果短時間需要多次發微博，可以重複使用，而不需要每次獲取。
        # 
        #step 4 使用獲得的OAuth2.0 Access Token調用API
        print(client.get.account__get_uid())
        print(client.post.statuses__update(status='測試Python3 + OAuth 2.0發微博 ' + str(time.time())))
        #print(client.upload.statuses__upload(status='測試Python3 OAuth 2.0帶圖片發微博 ' + str(time.time()), pic=open('test.png', 'rb')))

    except Exception as pyOauth2Error:
        print(pyOauth2Error)

if __name__ == '__main__':
    main()

selenium操作瀏覽器獲取code

這裏面有個不好的地方是，每次發微博都要將網址複製出來用瀏覽器打開才能得到code，豈不是很麻煩，還不如直接用瀏覽器發呢，現在的瀏覽器還能保存密碼自動登陸呢。

順便提一句，用selenium操作瀏覽器真是就是跟實際你打開瀏覽器上網的操作是一樣的，根本不用操心反爬蟲問題，還可以獲取cookie，傳給其他模塊如urllib的操作，同樣可以獲取網頁源碼進而爬取數據

這裏介紹用selenium操作瀏覽器自動登陸獲取code的方法。
方法中使用的是chrome瀏覽器最新版本的內核是55，需要下載對應的瀏覽器驅動，版本是2.27，並將驅動程序所在路徑添加到環境變量中。
selenium的chrome驅動下載地址：http://chromedriver.storage.googleapis.com/index.html
chrome內核版本與驅動對應關係參考：http://blog.csdn.net/huilan_same/article/details/51896672
需要注意的是，每操作一步記得等一等，time.sleep()一下，如果你不能瀏覽器加載完，會看不到結果的。

from selenium import webdriver
from selenium.webdriver.common.keys import Keys #引入keys類操作
import time

# 打開chrome瀏覽器
browser = webdriver.Chrome()
# browser.implicitly_wait(30)  # 隱性等待，最長等30秒
time.sleep(30)

# 打開授權頁面
# 這個頁面是上一步 weibo.APIClient.get_authorize_url() 獲取得到的
url=r'https://api.weibo.com/oauth2/authorize?display=default&client_id=88888888&redirect_uri=https%3A//api.weibo.com/oauth2/default.html&response_type=code'
browser.get(url)

# 輸入賬號密碼，然後點擊登陸按鈕
# 這一步需要查看網頁源碼才知道爲什麼是這麼寫
browser.find_element_by_id('userId').send_keys('your_user_name')
time.sleep(1)
browser.find_element_by_id('passwd').send_keys('your_password')
time.sleep(3)
browser.find_element_by_class_name('WB_btn_login').send_keys(Keys.ENTER)
time.sleep(3)

# 然後會自動跳轉，獲取url，從url中提取code
# 用這個 code 就可以在 weibo.APIClient 中登陸了
code_url = browser.current_url
code = code_url.split('code=')[-1]

# 獲取網頁源碼
browser.page_source
# 獲取cookie
browser.get_cookies()

模擬鼠標鍵盤操作獲取url

我發現，360急速瀏覽器保存賬號密碼後，下次直接輸入url就會自動跳轉到code的頁面，根本不需要輸入賬號密碼，省心很多。這裏通過模擬鼠標操作，鍵盤按鍵來獲取url。
原理是很簡單，就是打開瀏覽器輸入網址，將鼠標移動到指定的像素點位置，然後右鍵，F鍵，複製網址，最後通過獲取剪貼板內容得到url。

import webbrowser  # 打開瀏覽器
from pymouse import PyMouse  # 模擬鼠標
from pykeyboard import PyKeyboard   # 模擬鍵盤
import time
import win32clipboard, win32con   # 剪切板

# 在瀏覽器中打開網址
url=r'https://api.weibo.com/oauth2/authorize?display=default&client_id=88888888&redirect_uri=https%3A//api.weibo.com/oauth2/default.html&response_type=code'
webbrowser.open_new_tab(url)
time.sleep(10)

# 實例化鼠標鍵盤
m = PyMouse()
k = PyKeyboard()

x_dim, y_dim = m.screen_size()  # 屏幕分辨率

m.position()  # 鼠標位置，可以通過這個函數獲取網址的像素點位置

# 複製網址的方法是，右鍵網址，然後F鍵，可以在瀏覽器中手動操作一下
# 移動到網址欄
time.sleep(1)
m.click(int(x_dim*0.5), int(y_dim*0.045), button=2, n=1)  # 移動到指定位置，然後右鍵單擊
time.sleep(1)

k.press_key('F')  # 按下F鍵
time.sleep(1)
k.release_key('F')  # 釋放F鍵
time.sleep(1)

# 關閉瀏覽器
m.click(int(x_dim * 0.985), int(y_dim * 0.015), button=1, n=1)

#  alt+tab組合鍵
# k.press_key(k.alt_key)
# k.tap_key(k.tab_key)
# k.release_key(k.alt_key)

# 獲取剪切板內容
win32clipboard.OpenClipboard()
time.sleep(1)
new_url = win32clipboard.GetClipboardData(win32con.CF_TEXT)
time.sleep(1)
win32clipboard.CloseClipboard()
new_url = new_url.decode('gbk')

# 獲取code
code = new_url.split('code=')[-1]