python爬蟲實戰 爬取一比分網的球員數據存入MySQL數據庫

#代碼比較粗糙只能一次性獲得一個球隊最近大名單的球員數據,要獲取一個聯賽的還要寫一個迭代,懶就沒寫了,當時爬了歐洲五大頂級聯賽的數據,是爲了tableau可視化爬取的數據
import pymysql
from urllib.parse import urlencode
import requests
from lxml import etree
conn=pymysql.connect(host=‘localhost’,port=3306,user=‘root’,password=‘123456789’,db=‘bra’)
cur=conn.cursor()
url = “https://data.13322.com/team-362/2.html
ua =‘Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36’
with requests.request(‘GET’,url,headers = {‘User-agent’:ua}) as res:
content = res.text #獲取HTML的內容
html = etree.HTML(content) #分析HTML,返回DOM根節點
#path = //div[@class=‘billboard-bd’]//td//a/text()
wz1 = html.xpath( “//div[@class=‘team_zj’]//td[@align=‘left’]//a[@target=’_blank’]/@href”)
for url1 in wz1:
ua =‘Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36’
with requests.request(‘GET’,url1,headers = {‘User-agent’:ua}) as res:
content = res.text #獲取HTML的內容
html = etree.HTML(content) #分析HTML,返回DOM根節點
titles = html.xpath( “//div[@class=‘player-info’]//tr/td[2]/text()”) #使用xpath函數,返回文本列表
orders = html.xpath("//div[@class=‘player-info’]//tr/td[1]/text()")
orders1 = html.xpath("//div[@class=‘player-info’]//tr/td[3]/text()")
titles1 = html.xpath( “//div[@class=‘player-info’]//td//a/text()”)
name=orders[0][3:]#姓名
age=orders[1][3:]#年齡
number=orders[2][5:]#球衣號碼
status=orders[3][7:]#預計身價
height=titles[1][3:]#身高
birthdate=orders1[0][5:]#出生日期
weight=orders1[1][3:]#體重
Idiomaticfeet=orders1[2][-2:]#慣用腳
nationality=titles1[0]#國籍
location=titles1[-1]#位置
sqli=“insert into fm_copy values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)”
cur.executemany(sqli,[
(‘甘岡’,name,nationality,birthdate,age,height,weight,number,location,Idiomaticfeet,status)])
conn.commit()
conn.close()

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章