scrapy startproject tutorial #創建
#Scrapy\tutorial\tutorial\spiders\demo_spider1.py 中代碼
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html
import scrapy
from scrapy.selector import Selector
item=[] #存儲方便輸出
class DemoSpider(scrapy.Spider):
"""docstring for DemoSpider"""
name = "80s"
allowed_domains = ["80s.tw"]
start_urls = (
"https://www.80s.tw/movie/list/-----p%s" %x for x in range(3)
#"https://www.80s.tw/movie/list/-----p1",
#"https://www.80s.tw/movie/list/-----p2",
)
def parse(self, response):
sel = Selector(response)
sites = sel.xpath('//ul[@class="me1 clearfix"]/li')
for site in sites:
mvname = site.xpath('a/@title').extract()
mvurl= 'https://www.80s.tw'+''.join(site.xpath('a/@href').extract())
yield scrapy.Request(url=mvurl, callback=lambda response,name = mvname: self.parse_item(response,name))
print json.dumps(item)
def parse_item(self, response,name):
sel = Selector(response)
sites = sel.xpath('//span[@class="dlname nm"]/input/@value')
tem = {}
tem[name[0]] = sel.xpath('//span[@class="dlname nm"]/input/@value').extract()[0]
item.append(tem)
Scrapy 簡單爬取80s下載鏈接
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.