|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
直接上代码:
- import requests
- import time
- import re
- import os
- from bs4 import BeautifulSoup
- path = "D:/APython/spider/douluo/斗罗大陆3龙王传说/"# 路径记得改
- url = "https://www.bifeige.com/9_9235/"
- def openurl(url):
- headers = {"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36 Edg/88.0.705.81"}
- respound = requests.get(url, headers=headers)
- respound.encoding = respound.apparent_encoding
- return respound.text
- def save_text(respound, name):
- # 解析并保存文件
- try:
- soup = BeautifulSoup(respound, "html.parser")
- targets = soup.find_all("div", id="content")
- with open(path + name, "a", encoding="utf-8") as f:
- for target in targets:
- f.write(target.text)
- print("下载成功;" + name)
- except OSError:
- return ""
- respound = openurl(url)
- soup = BeautifulSoup(respound, "html.parser")
- targets = soup.find_all("dd")
- i = 1
- for target in targets:
- name = str(target.text)+ ".txt"
- if not os.path.exists(path + name):
- url = "https://www.bifeige.com" + str(target.a.get("href"))
- respound = openurl(url)
- save_text(respound, name)
- if i % 10 == 0:
- time.sleep(10)
- i += 1
复制代码
|
|