scrapy——京東

import scrapy
import json


class CatalogSpider(scrapy.Spider):
    name = 'catalog'
    allowed_domains = ['3.cn']
    start_urls = ['https://dc.3.cn/category/get']

    def parse(self, response):
        jd_json = json.loads(
            str(response.body,encoding='gbk'),
            encoding='gbk'
        )
        result=[]
        for data in jd_json['data']:
            for data2 in data['s']:
                url=data2['n'].split('|')[0]
                title = data2['n'].split('|')[1]
                res1 = {
                    "url": url,
                    "title": title,
                    "child": []
                }
                result.append(res1

                )

                for data3 in data2['s']:
                    url2=data3['n'].split('|')[0]
                    title2 = data3['n'].split('|')[1]
                    res2 = {
                        "url": url2,
                        "title": title2,
                        "child": []
                    }
                    res1["child"].append(
                        res2

                    )
                    for data4 in data3['s']:
                        url3 = data4['n'].split('|')[0]
                        title3 = data4['n'].split('|')[1]
                        res2['child'].append({
                            "url":url3,
                            "title":title3
                        })
                    res1["child"].append(res2)
                result.append(res1)
        print(result)

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章