Scrapy 簡單爬取80s下載鏈接

scrapy startproject tutorial      #創建

#Scrapy\tutorial\tutorial\spiders\demo_spider1.py 中代碼
# -*- coding: utf-8 -*-
# Define here the models for your scraped items
# See documentation in:
# https://doc.scrapy.org/en/latest/topics/items.html

import scrapy
from scrapy.selector import Selector

item=[]      #存儲方便輸出

class DemoSpider(scrapy.Spider):
    """docstring for DemoSpider"""
    name = "80s"
    allowed_domains = ["80s.tw"]
    start_urls = (
            "https://www.80s.tw/movie/list/-----p%s" %x for x in range(3)
            #"https://www.80s.tw/movie/list/-----p1",
            #"https://www.80s.tw/movie/list/-----p2",
    )
    def parse(self, response):
        sel = Selector(response)
        sites = sel.xpath('//ul[@class="me1 clearfix"]/li')
        for site in sites:
            mvname = site.xpath('a/@title').extract()
            mvurl= 'https://www.80s.tw'+''.join(site.xpath('a/@href').extract())
            yield scrapy.Request(url=mvurl, callback=lambda response,name = mvname: self.parse_item(response,name))
        print json.dumps(item)

    def parse_item(self, response,name):
        sel = Selector(response)
        sites = sel.xpath('//span[@class="dlname nm"]/input/@value')
        tem = {}
        tem[name[0]] = sel.xpath('//span[@class="dlname nm"]/input/@value').extract()[0]
        item.append(tem)
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章