|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- # -*- coding:UTF-8 -*-
- import requests
- import re
- import unicodedata
- def get_novel_list_url():
- global response
- # 标记 response 为全局变量
- global headers
- # 标记 headers 为全局变量
- headers = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0'}
- # 伪装响应头
- response = requests.get(novel_url,headers=headers)
- # 获取小说目录网页数据
- response.encoding='UTF-8'
- # 设置编码为 UTF8
- global response_all
- # 标记 response_all 为全局变量
- response_all = response.text
- # 将获取到的小说目录源码赋值给 response_all
- def get_novel_title():
- global novel_title_
- # 标记 novel_title 为全局变量
- novel_title_ = re.findall(r'.html\' >.*?</a></dd>',response_all)
- # 正则表达式过滤无关数据 获得标题
- def get_novel_text():
- global novel_list_all
- # 标记 novel_list_all 为全局变量
- novel_list_all = re.findall(r'<dd><a href=.*?\' >',response_all)
- # 正则表达式过滤无关数据 获得以下数据
- # <dd><a href='/15/15003/6795760.html' >第一章 没白来</a></dd>
- l = len(novel_list_all)
- i = 1
- for novel_list in novel_list_all:
- if i <= l:
- # 此处if用处 :完成下载破坏循环
- novel_list = novel_list.replace('<dd><a href=\'','')
- novel_list = novel_list.replace('\' >','')
- # 过滤无用数据
- novel_text_url = 'http://www.xbiquge.la' + novel_list
- # 构建每章小说的URL
- novel_text_code_ = requests.get(novel_text_url)
- # 获取小说每一章网页数据
- novel_text_code_.encoding='UTF-8'
- # 设置编码为 UTF8
- novel_text = novel_text_code_.text
- # 获取小说内容
- novel_text_ = re.findall(r'<div id="content">.*?<a href=',novel_text)
- else:
- break
-
- for novel in novel_text_:
- if i <= l:
- # 此处if用处 :完成下载破坏循环
- novel = novel.replace(' ',' ')
- novel = novel.replace('<br />',' ')
- novel = novel.replace('<p><a href=',' ')
- # 过滤无用数据
-
- else:
- break
- for novel_title in novel_title_:
- if i <= l:
- # 此处if用处 :完成下载破坏循环
- novel_title = novel_title.replace('</a></dd>','')
- novel_title = novel_title.replace('.html\' >','')
- novel_title = novel_title.replace('*','')
- novel_title = novel_title.replace('?','')
- novel_title = novel_title.replace('?','')
- # 过滤无用数据
- file = open(novel_title + '_.txt','a+')
- file.write(novel_title)
- file.write('\n')
- file.write(novel)
- file.write('\n')
- print(novel_title+'已下载至当前目录')
- # 以上为保存文件至当前目录
- i = i + 1
- else:
- print ("已全部下载完成")
- break
- def main():
- get_novel_list_url()
- get_novel_title()
- get_novel_text()
- if __name__=="__main__":
- print ('------------------------------------------------')
- b = input("输入(Y/y)进入下载小说\n")
- if b == 'Y' or b == 'y':
- print ('------------------------------------------------')
- novel_url = input('请输入您要在新笔趣网下载小说的URL(http://www.xbiquge.la/):\n例如http://www.xbiquge.la/10/10489/"\n')
- print ('------------------------------------------------')
- print("---------------------正在下载---------------------")
- main()
- else:
- print("已退出程序~")
复制代码
|
|