|
发表于 2018-12-11 13:55:21
|
显示全部楼层
抱歉,我看了两三遍代码,不知道改哪里。比如我想保存到这个绝对路径,“D:\jiandan”,我应该将下面代码的哪一行修改成什么样呢?
- import requests
- import os
- import time
- from bs4 import BeautifulSoup as bs
- # 打开网页函数
- def get_response(url):
- headers = {
- 'User-Agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36"}
- response = requests.get(url, headers) # 加上浏览器头,以防被禁
- response.encoding = 'utf-8' # 指定编码格式
- return response
- # 写一个js代码
- def writeFile(content):
- with open('js/cest.js','w',encoding='utf-8')as txt_file:
- txt_file.write("var JianDan = require('./main');\n")
- txt_file.write(f'var e = "{content}";\n')
- txt_file.write('hello = new JianDan(e);\n')
- txt_file.close
- # 获取并下载图片
- def get_img():
- # 运行js代码
- url = 'http:' + os.popen(cmd="node js/cest.js").read()[:-1]
- headers = {
- 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
- 'Accept-Encoding': 'gzip, deflate',
- 'Accept-Language': 'zh-CN,zh;q=0.9',
- 'Cache-Control': 'no-cache',
- 'Connection': 'keep-alive',
- 'Pragma': 'no-cache',
- 'Upgrade-Insecure-Requests': '1',
- 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
- }
- response = requests.get(url=url,headers=headers)
- suffix = url[-4]
- file_time = time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime())#以时间命名文件,使文件不重名,方便保存下载
- try:
- os.mkdir(f'download_img')
- except Exception as e:
- pass
- path = f'download_img/{file_time}{suffix}'
- f = open(path, 'wb')
- f.write(response.content)
- f.close()
- def main():
- url = 'http://jandan.net/ooxx'
- response = get_response(url)
- soup = bs(response.text,'lxml')
- # 获取最大页码数
- max_pages = int(soup.select('.cp-pagenavi .current-comment-page')[0].text.replace('[','').replace(']',''))+1
- for i in range(1,max_pages):
- url = f'http://jandan.net/ooxx/page-{i}'
- response = get_response(url)
- soup = bs(response.text,'lxml')
- # 获取密文
- print(f'>>>>>>>>>>>>>>>>>>>>>>当前第{i}页')
- for i in soup.select('.commentlist .img-hash'):
- # 写js代码
- writeFile(i.text)
- # 获取真实链接
- get_img()
- if __name__ == '__main__':
- main()
复制代码 |
|