Python代碼實現中國日報網雙語文章訂閱至郵箱

import requests
from lxml import etree
import random
import smtplib
import requests
from email.mime.text import MIMEText
import re
import schedule
import time

#請求連接獲取文章內容
def deal_url(url):
    header = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'}
    proxyIPs = ['27.188.64.70','163.204.246.139','121.13.252.60','180.118.247.69','111.75.223.9','1.193.244.92']
    proxyIP = random.choice(proxyIPs)
    proxies = {
            'http': proxyIP,
            'https': proxyIP
        }
    html = requests.get(url,proxies).text
    return html

#獲取更新的鏈接
def geturls():
    url = 'https://language.chinadaily.com.cn/news_bilingual/'
    html = deal_url(url)
    alist = [i.start() for i in re.finditer('gy_box_txt2',html)]    #查找到首頁中包含文章的鏈接起始位置
    da = []
    urllist = []
    for i in range(len(alist)):
        dd = html[alist[i]+68:alist[i]+150]     #截取鏈接字段
        if dd.count('"')>=2:
            da = [i.start() for i in re.finditer('"',dd)]   #定位到超鏈接引號數組位置
            dd = dd[da[0]+1:da[1]]      #提取兩個引號中的字段
            urllist.append(dd)      #取到的鏈接存入列表
    return urllist

#獲取到正文和標題
def getContent():
    #隨機獲取一篇文章
    ran = random.randint(0,len(geturls())-1)
    url = 'https:'+geturls()[0]
    html = deal_url(url)
    html0 = etree.HTML(html)
    #分割出標題
    title0 = ''
    title = [i.start() for i in re.finditer('main_title1',html)]
    tit = html[title[0]+12:title[0]+100]
    a = tit.index('>')
    b = tit.index('<')
    title0 = tit[a+1:b]
    #分割出正文部分
    datas = ''
    for i in range(1,100,1):
        data = str(html0.xpath('//*[@id="Content"]/p['+str(i)+']/text()'))
        ss = '\xa0|\[\]|\"]|\[\"|\[\'|\'\]'     #正則匹配去除[''],[""]
        data = re.sub(ss,'',data)
        data = re.sub(r'\\xa0','',data)     #正則匹配去除\xa0
        data += '\n'*2
        if data.find('來源:')<=0:
            datas+=data
        else:
            break
    return title0,datas

#郵箱發送
def sendtoEmail():  

    msg = MIMEText(getContent()[1]) #發送正文部分
    msg["Subject"] = '【原視界文章推送】'+getContent()[0]    #發送標題部分
    msg["From"]    = user
    msg["To"]      = to
    try:
        s = smtplib.SMTP_SSL("smtp.qq.com", 465)
        s.login(user, pwd)
        s.sendmail(user, to, msg.as_string())
        s.quit()
        print("發送成功!",time.ctime(time.time()))
    except smtplib.SMTPException as e: 
        print ("發送出錯,%s" %e)

#設置定時發送
def send_mail_by_schedule():
    schedule.every().day.at("12:00").do(sendtoEmail)
    schedule.every().day.at("18:00").do(sendtoEmail)
    while True:
        schedule.run_pending()
        time.sleep(1)

if __name__=="__main__":
    print('=======訂閱系統=======')
    global user,pwd,to
    user = input('請輸入QQ號:')+'@qq.com'
    pwd = input('請輸入郵箱授權碼:')
    to = input('請輸入訂閱號:')+'@qq.com'
    print('蟲子管家已爲您開啓訂閱,請勿退出此程序並保持網絡通暢,訂閱文章於每天12:00和18:00準時發送至您的郵箱!')
    send_mail_by_schedule()

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章