實踐目的:
獲取安居客網站上北京二手房的數據。
網頁地址:
https://beijing.anjuke.com/sale/
實踐代碼:
import requests
import time
from bs4 import BeautifulSoup
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) \
AppleWebKit/537.36 (KHTML, like Gecko) \
Chrome/70.0.3538.25 \
Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400 ',
}
#爬取前10頁的數據
for i in range(1,11):
link = 'https://beijing.anjuke.com/sale/p' + str(i)
r = requests.get(link, headers=headers)
print('現在爬取的是第', i, '頁')
soup = BeautifulSoup(r.text, 'lxml')
#得到房間列表
house_list = soup.find_all('li', class_="list-item")
for house in house_list:
#獲取房屋名稱
name = house.find('div', class_='house-title').a.text.strip()
#獲取房屋總價格
price = house.find('span', class_="price-det").text.strip()
#獲取房屋單位價格
price_area = house.find('span', class_="unit-price").text.strip()
no_room = house.find('div', class_="details-item").span.text
area = house.find('div', class_="details-item").contents[3].text
floor = house.find('div', class_="details-item").contents[5].text
try:
year = house.find('div', class_="details-item").contents[7].text
except:
print("此項爲空",end="")
broker = house.find('div', class_='broker-item').contents[3].text
address = house.find('span', class_='comm-address').text.strip()
tag_list = house.find_all('span', class_='item-tags')
tags = [i.text for i in tag_list]
print(name, price, price_area, no_room, area, floor, year, broker,
address, tags)
time.sleep(5)