python爬取某瓣top250 demo 輸出到html

import re
import urllib.request;
from bs4 import BeautifulSoup


url = 'https://movie.douban.com/top250?start=';
fout = open('douban250.html','w',encoding='utf-8');
fout.write("<html>")
fout.write("<head>")
fout.write("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">")
fout.write("<title>豆瓣")
fout.write("</title>")
fout.write("</head>")
fout.write("<body>")

fout.write("<table border = '1'>")
for pageNum in range(10):
    page = (pageNum*25);
    #print(url+str(page));
    resp = urllib.request.urlopen(url+str(page))
    doc = resp.read();
    soup = BeautifulSoup(doc,'html.parser',from_encoding='utf-8')
    card = soup.find('ol',class_='grid_view')
    items = card.find_all('div',class_= 'item');
    for item in items:
        pics = item.find_all('div',class_= 'pic')
        for pic in pics:
            index = pic.find('em');
            a = pic.find('a');
            href = a.get('href')
            img = pic.find('img');
            name = img.get('alt');
            src = img.get('src');
            #print(index.get_text(),href,name)
            print(index.get_text(),name)
            fout.write("<tr>")
            fout.write("<td>")
            fout.write(index.get_text())
            fout.write("</td>")
            fout.write("<td>")
            fout.write(name)
            fout.write("</td>")
            fout.write("<td>")
            fout.write("<img ")
            fout.write("src='"+src+'\' width =50 ')
            fout.write(">")
            fout.write("</td>")
            fout.write("</tr>")
fout.write("</table>")
fout.write("</body>")
fout.write("</html>")
fout.close();

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章