python2.7代碼實現:
爬蟲主文件:
# -*- coding: utf-8 -*-
import sys
from scrapy.http import Request
#python語言轉碼,非必要
reload(sys)
sys.setdefaultencoding("utf-8")
import scrapy
#定義爬蟲類,必須繼承scrapy.Spider
class DmozSpider(scrapy.Spider):
name = "itcast" #設置爬蟲名稱
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'} #設置瀏覽器用戶代理
#用start_requests()方法,代替start_urls
def start_requests(self):
return [Request('https://www.baidu.com',meta={'cookiejar':1},callback=self.parse)]
#parse回調函數
def parse(self, response):
Cookie = response.headers.getlist('Cookie') #獲取響應cookie
print 1111111111111111111111
print Cookie
print 1111111111111111111111
return 0