爬蟲2_2019年549所中國大學排名

"""
# - 實戰
# - 教程:https://www.bilibili.com/video/av9784617?p=32
"""
import requests
from bs4 import BeautifulSoup
import bs4


def request_url(url, *params):
    """
    :param url:
    :param params: 一般爲輸入內容,例如百度搜索:params = {'wd': "搜索內容"}
    :return: 查看源
    """
    try:
        headers = {'user-agent': 'my-app/0.0.1'}  # 僞裝成瀏覽器,避免403,被禁止訪問
        response = requests.get(url, headers=headers, params=params)
        print(response.request.url)
        print('訪問狀態:', response.status_code)
        print('編碼方式:', response.encoding)
        response.encoding = response.apparent_encoding
        if response.status_code == 200:
            return response
    except requests.RequestException:
        return "返回異常"


def fillUnivList(ulist, html):
    """
    :param ulist: 每個大學的數據
    :param html: response.text
    :return: 排名list
    """
    soup = BeautifulSoup(html, 'html.parser')
    for tr in soup.find('tbody').children:
    # tbody = soup.find('tbody')
    # for tr in tbody.find_all('tr'):
        if isinstance(tr, bs4.element.Tag):  # 如果爲Tag類型
            td_list = tr.find_all('td')
            # 排名/學校/評分
            ulist.append([td_list[0].string, td_list[1].string, td_list[2].string, td_list[3].string])

    return ulist


def PrintUnivList(list2):
    """
    :param list2: 排名數據列表
    :return: 打印結果
    """
    tplt = '{0:^5}\t{1:{4}^10}\t{2:{5}^6}\t{3:^6}'
    print(tplt.format('排名', '大學', '位置', '評分', chr(12288), chr(12288)))
    for per_univ in list2:
        print(tplt.format(per_univ[0], per_univ[1], per_univ[2], per_univ[3], chr(12288), chr(12288)))


if __name__ == "__main__":
    # - 中國大學排名
    list1 = []
    url1 = 'http://www.zuihaodaxue.com/zuihaodaxuepaiming2019.html'
    response = request_url(url=url1)
    # print(response.text)
    ulist = fillUnivList(ulist=list1, html=response.text)
    # print(ulist)
    PrintUnivList(list2=ulist)
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章