逍遥恺
发表于 2020-2-28 23:50:52
感谢
步川邱酷
发表于 2020-2-29 00:21:20
让我康康
ckypamym
发表于 2020-2-29 00:23:47
这是用什么软件运行的啊
13817592317
发表于 2020-2-29 00:39:02
谢谢分享
老壁虎
发表于 2020-2-29 03:03:34
KANKAN
felix8513
发表于 2020-2-29 08:56:56
{:10_297:}
Neglect
发表于 2020-2-29 10:13:28
打卡
言空白
发表于 2020-2-29 10:20:15
咳咳咳~我来试验以下真假
xiao206
发表于 2020-2-29 10:22:46
看一看瞧一瞧
lujunnan
发表于 2020-2-29 10:26:15
咖喱鸡块
Blw
发表于 2020-2-29 11:31:49
优秀
lqh爱学习
发表于 2020-2-29 12:11:05
网警路过
君王的君
发表于 2020-2-29 14:39:41
66666,眼睛都看花了
evileyes
发表于 2020-2-29 14:48:20
主要是想看第三步的车
kevino2o
发表于 2020-2-29 15:02:41
我来审核
qs2000
发表于 2020-2-29 15:26:19
#creator:qs
#description:download picture from https://www.mzitu.com/
#structure:download---year---month---file---picture
#date:2020.2.28
#Need:延时自动切换
#借鉴了楼主!没写注释!有点bug!但能用!
import requests as r
from bs4 import BeautifulSoup as bs
import easygui as g
import lxml
import os
import time as t
def get_soup(url):
# url------html------soup
headers ={'Referer':'https://www.mzitu.com','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 UBrowser/6.1.2107.204 Safari/537.36'}
response = r.get(url,headers=headers)
html = response.text
soup = bs(html,"lxml")
return soup
def get_pic(url):
# url------pic
headers ={'Referer':'https://www.mzitu.com','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 UBrowser/6.1.2107.204 Safari/537.36'}
response = r.get(url,headers=headers)
pic = response.content
return pic
def init_file(file,soup):
# soup----year-month-file in download_file
year_names = soup.find_all(class_="year")
year_len=len(year_names)
years = soup.find_all(class_="archives")
for i in range(year_len):
year_name = year_names.text
os.mkdir(file+"\\"+year_name)
year = years
month_complex = year.find_all("li")
month_len=len(month_complex)
for j in range(month_len):
month_name = month_complex.p.em.text
month_text = month_complex.p.text
os.mkdir(file+"\\"+year_name+"\\"+month_name)
month = month_complex.contents
file_addresses=year_name+"\t"+month_name+"\t"+month_text
file_complex = month.find_all("a")
file_len = len(file_complex)
for k in range(file_len):
file_name = file_complex.text
file_address = file+"\\"+year_name+"\\"+month_name+"\\"+file_name
os.mkdir(file_address)
file_href = file_complex["href"]
file_addresses += "\n"+str(k+1)+"\t"+file_href
download_pics(file_address,file_href,file_name)
t.sleep(3)
file_addresses_txt = open(file+"\\"+year_name+"\\"+month_name+"\\"+month_name+".txt","w")
file_addresses_txt.write(file_addresses)
file_addresses_txt.close()
def download_pics(file_address,file_href,file_name):
"""
file_href-----pic_first_url---pages------pics in file_addresses
"""
soup = get_soup(file_href)
pic_first_url = soup.find(class_="main-image").p.a.img["src"]
page_num = int(soup.find(class_="pagenavi").find_all("a")[-2].text)
addresses=file_name+"\t"+str(page_num)+"张"
print(addresses)#testing
for i in range(page_num):
pic_url = get_pic_url(pic_first_url,i+1)
addresses += "\n"+str(i+1)+"\t"+pic_url
pic = get_pic(pic_url)
pic_address = file_address +"\\"+str(i+1)+pic_first_url[-4:]
pic_file = open(pic_address,"wb")
pic_file.write(pic)
pic_file.close()
del pic,pic_url,pic_address
file_save_address=file_address+"\\"+file_name+".txt"
addresses_txt = open(file_save_address,"w")
addresses_txt.write(addresses)
addresses_txt.close()
del soup,pic_fir
st_url,page_num,addresses,file_save_address
def get_pic_url(pic_first_url,i):
#pic_first_url----------pic_i_url<>
if i <= 9:
pic_url = pic_first_url[:-5]+str(i)+pic_first_url[-4:]
return pic_url
elif i >= 10 and i <= 99:
pic_url = pic_first_url[:-6]+str(i)+pic_first_url[-4:]
return pic_url
else:
pic_url = pic_first_url[:-6]+str(i)+pic_first_url[-4:]
return pic_url
urls = ["https://www.mzitu.com/all/","https://`1`ghjl:www.mzitu.com/old/"]
file = g.diropenbox("选择储藏物质")
files =
for i in range(len(files)):
soup = get_soup(urls)
os.mkdir(files)
init_file(files,soup)
kezi
发表于 2020-2-29 15:38:12
: 爬取妹子图 三部曲 [修改]
学Python不开车
发表于 2020-2-29 16:17:53
play98 发表于 2020-2-28 12:37
干哈呢 干哈呢! 岛国网址?
说啥呢 大家都是正经司机
学Python不开车
发表于 2020-2-29 16:18:25
ckypamym 发表于 2020-2-29 00:23
这是用什么软件运行的啊
python我用的vscode 自带的软件就行
学Python不开车
发表于 2020-2-29 16:19:49
qs2000 发表于 2020-2-29 15:26
#creator:qs
#description:download picture from https://www.mzitu.com/
#structure:download---year-- ...
谢谢大佬指点
页:
1
2
[3]
4
5
6
7
8
9
10
11
12