Python Create EPUB

import logging
from ebooklib import epub
import urllib3
import certifi
from lxml import html
import re

log = logging.getLogger(__name__)
log.setLevel(logging.INFO)

log_handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
log_handler.setFormatter(formatter)
log.addHandler(log_handler)

http = urllib3.PoolManager(cert_reqs='CERT_REQUIRED', ca_certs=certifi.where())
base = 'https://www.qingyunian.net/qingyunian'

book = epub.EpubBook()
book.set_cover(file_name='cover.jpg', content='<h2>慶餘年</h2><b>貓膩&nbsp;</b>', create_page=True)
book.set_identifier('id123456')
book.set_title('慶餘年')
book.set_language('zh')
book.add_author('貓膩')
chapter_count = 1
toc_list = list()
spine_list = ['nav']
for vol_number in range(1, 8):
    vol_toc_list = list()
    url = f'{base}{vol_number}'
    vol_res = http.request(method='GET', url=url)
    vol_page = html.fromstring(vol_res.data.decode('utf-8'))
    vol_title = vol_page.xpath('/html/body/div[@class="main"]/h1/a/text()')[0]
    vol_title_short = vol_title.replace('慶餘年 ', '')
    log.info(f'vol_title_short: {vol_title_short}')
    vol_toc_list.append(epub.Section(title=vol_title_short, href=f'ch_{chapter_count:03}.xhtml'))

    chapter_list = vol_page.xpath('/html/body/div[@class="main"]/div[@class="content"]/ul/li')
    ch_toc_list = list()
    for ch_item in chapter_list:
        ch_title = ch_item.xpath('a/text()')[0]
        ch_title = ch_title.replace(vol_title, '').lstrip(' ').rstrip('?')
        ch_href = ch_item.xpath('a/@href')[0]
        # log.info(f'{ch_title}: {ch_href}')
        ch_res = http.request(method='GET', url=ch_href)
        ch_page = html.fromstring(ch_res.data.decode('utf-8'))
        paragraph_list = ch_page.xpath('/html/body/div[@class="main"]/div[@class="content"]/p')
        ch_content = ''
        for p in paragraph_list[:-1]:
            ch_content += html.tostring(p, encoding='utf-8').decode('utf-8')
        ch_content = re.sub(pattern=r'(\xa0)+', repl='    ', string=ch_content)
        ch_content = ch_content.replace('\n', '')
        chapter = epub.EpubHtml(title=ch_title, file_name=f'ch_{chapter_count:03}.xhtml', lang='zh')
        chapter.content = f'<h1>{ch_title}</h1>{ch_content}'
        book.add_item(chapter)
        spine_list.append(chapter)
        ch_toc_list.append(epub.Link(href=f'ch_{chapter_count:03}.xhtml', title=ch_title, uid=f'ch_{chapter_count:03}'))
        chapter_count += 1
    vol_toc_list.append(tuple(ch_toc_list))
    toc_list.append(tuple(vol_toc_list))
book.toc = tuple(toc_list)
# add default NCX and Nav file
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
# define CSS style
style = 'BODY {color: white;}'
nav_css = epub.EpubItem(uid="style_nav", file_name="style/nav.css", media_type="text/css", content=style)
# add CSS file
book.add_item(nav_css)
# basic spine
book.spine = spine_list
# write to the file
epub.write_epub('慶餘年.epub', book)
發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章