|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
网上照抄了个爬虫代码,IDLE运行没有报错,却找不到输出文件在哪里,各位大神帮忙解答下怎么在windows下寻找文件存储路径或者帮忙指出哪里出问题了。
系统windows 10
代码 如下:
import requests
import os
import time
import threading
from bs4 import BeautifulSoup
def download_page (url):
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; x64;rv:61.0) Gecko/20100101 Firefox/61.0"}
r = requests.get(url,headers=headers)
r.encoding="gb2312"
return r.text
def get_pic_list(html):
soup = BeautifulSoup(html,"html.parser")
pic_list = soup.find_all("li",class_="wp-item")
for i in pic_list:
a_tag=i.find("h3",class_="tit").find("a")
link=a_tag.get("href")
text=a_tag.get_text()
get_pic(link,text)
def get_pic(link,text):
html = download_page(link)
soup = BeautifulSoup(html,"html.parser")
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; x64;rv:61.0) Gecko/20100101 Firefox/61.0"}
create_dir("pic/{}".format(text))
for i in pic_list:
pic_link=i.get("src")
r = requests.get(url,headers=headers)
with open ("pic/{}/{}".format(text,link.split('/')[-1]),"wb")as f:
f.write(r.content)
time.sleep(1)
def creat_dir(name):
if not os.path.exists(name):
os.makedirs(name)
def execute(url):
page_html=download_page(url)
get_pic_list(page_html)
def main():
create_dir("pic")
queue=[i for i in range(1,72)]
thread=[]
while len(queue)>0:
for thread in threads:
if not thread.is_alive():
threads.remove(thread)
while len(threads)<5 and len(queue)>0:
cur_page = queue.pop(0)
url="http://meizitu.com/a/more_{}.html".format(cur_page)
thread=threading.Thread(target=execue,args=(url,))
thread,setDaemon(True)
thread.start()
print("{}正在下载{}".format(threading.current_thread().name,cur_page))
threads.append(thread)
if __name__=="___main__":
main()
|
|