|
|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- import requests
- from lxml import etree
- import os
- #设计模式--面向对象
- class Spider(object):
- #1.请求一级页面HTML源代码
- def start_request(self):
- response = requests.get("https://www.qidian.com/all")
- html=etree.HTML(response.text)
- Bigtit_list=html.xpath('//div[@class="book-mid-info"]/h4/a/text()')
- Bigsrc_list=html.xpath('//div[@class="book-mid-info"]/h4/a/@href')
- for Bigtit, Bigsrc in zip(Bigtit_list,Bigsrc_list):
- if os.path.exists(Bigtit)==False:
- os.mkdir(Bigtit)
- self.next_file(Bigtit,Bigsrc)
- def next_file(self,Bigtit,Bigsrc):
- response=requests.get("https:" + Bigsrc)
- html=etree.HTML(response.text)
- Littit_list=html.xpath('//ul[@class="cf"]/li/a/text')
- Litsrc_list=html.xpath('//ul[@class="cf"]/li/a/@href')
- for Littit,Litsrc in zip(Littit_list,Litsrc_list):
- self.finally_file(Littit,Litsrc,Bigtit)
-
- def finally_file(self, Littit,Litsrc,Bigtit):
- response=requests.get("https:"+Litsrc)
- html=etree.HTML(response.text)
- content = "\n".join(html.xpath('//div[@class="read-content j_readContent"]/p/text()'))
- file_name=Bigtit+"\"+Littit+".txt"
- print("正在保存文件:"+file_name)
- with open(file_name,"w",encoding="utf-8")as f:
- f.file_name
-
- spider = Spider()
- spider.start_request()
复制代码
哪里错了,怎么弄,点不会,怎么爬 |
|