kuangshigubei
发表于 2020-11-13 08:51:35
{:5_109:}
大耳朵
发表于 2020-11-13 18:32:35
手机上编写python
青松100
发表于 2020-11-13 21:44:41
来学习学习
小帅小帅
发表于 2020-11-13 23:34:41
学习一下
小帅小帅
发表于 2020-11-13 23:35:17
不知道行不行
zk18336238932
发表于 2020-11-14 06:24:19
。。。
临时号
发表于 2020-11-14 11:19:25
看看
大昊
发表于 2020-11-14 15:51:02
收到
筱陌
发表于 2020-11-14 16:55:24
看看
6xl
发表于 2020-11-30 14:25:55
学习学习!
shaol2008
发表于 2020-11-30 22:16:26
谢谢分享
景行维贤
发表于 2020-11-30 23:38:24
1
伊格利斯
发表于 2020-12-1 00:04:02
看看再说
zhangshilin98
发表于 2020-12-1 09:32:29
学习!致敬!
今天又起雾了
发表于 2020-12-1 09:53:49
1
1240413749
发表于 2020-12-1 10:01:42
666
thon怪
发表于 2020-12-1 15:00:47
打卡
QuengC
发表于 2020-12-1 17:13:06
6666
yangminer
发表于 2020-12-1 21:31:19
看看
日暮途远Ez
发表于 2020-12-3 01:30:55
import requests
import os
import re
from bs4 import BeautifulSoup
import time
def url_open(url):
headers = {
'Referer': 'https://www.mzitu.com/',
'User-Agent': 'Mozilla /5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0'
}
response = requests.get(url, headers=headers)
#print(response)
#return response
html=response.text
#soup=BeautifulSoup(html,'html.parser')
soup=BeautifulSoup(html,'lxml')
#div=soup.find('div',attrs={'id':'pins'})
soup=soup.find(class_='postlist')
soup1=str(soup)
ss = r'<a href="([^"]+\d)"'
urls=re.findall(ss,soup1)
return urls
def url_open2(url):
headers = {
'Referer': 'https://www.mzitu.com/',
'User-Agent': 'Mozilla /5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0'
}
response = requests.get(url, headers=headers)
#print(response)
#return response
html=response.text
return response
def save_jpg(jpg,x):
jpg_dir='jpgs'
filename =str(x) + '_' + jpg.split('/')[-1]
#filename=jpg
#x += 1
print('正在保存文件'+filename)
print('='*50)
with open(filename, 'wb') as f:
img = url_open2(jpg).content
f.write(img)
def find_imgs(url):
os.system("cls")
x=0
while True:
headers = {
'Referer': 'https://www.mzitu.com/',
'User-Agent': 'Mozilla /5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0'
}
response = requests.get(url, headers=headers)
html = response.text
soup = BeautifulSoup(html, 'lxml')
# print(html)
soup = soup.find(class_='main-image')
soup1 = str(soup)
p = r'src="([^"]+\.jpg)"'
pp = re.findall(p, soup1)
# print(soup1)
s = r'href="([^"]+\d)"'
ss = re.findall(s,soup1)
ppp=pp
sss=ss
print()
print('处理图片:%s \n下一网址:%s' %(ppp,sss),)
url=sss
save_jpg(ppp,x)
x+=1
def url_open1(htmls):
print(htmls)
for url in htmls:
find_imgs(url)
print('Ctrl+C退出吧。')
time.sleep(555)
def download_new(folder='OOXX'):
os.system('cls')
os.system('del /qOOXX\*.*')
os.system('rmdir OOXX')
os.mkdir(folder)
os.chdir(folder)
print(url)
htmls = url_open(url)
htmls1 = url_open1(htmls)
if __name__ == '__main__':
url = 'http://www.mzitu.com/'
#download_mm()
download_new()