妹子图的源码，包括出来错误信息，加上进程池，加上IP替换，加上进度条

python_mysql · 发表于 2019-7-6 15:25:14

马上注册，结交更多好友，享用更多功能^_^

您需要登录才可以下载或查看，没有账号？立即注册

x

本帖最后由 python_mysql 于 2019-7-7 10:23 编辑

import urllib3,random,os
from tqdm import tqdm
from lxml import etree
import requests,time
from multiprocessing import Pool
class Open_go(object):
def ip_proxy(self):
if self.getProxyIp() != []:
ip = self.getProxyIp()[random.randrange(0, len(self.getProxyIp()))]
proxy_ip = 'http://' + ip
proxies = {'http': proxy_ip}
return proxies
else:
print('代理列表获取失败')
def getProxyIp(seif):
try:
proxy = []
for i in range(1, 5):
url = 'https://www.kuaidaili.com/free/inha/{}/'.format(i)
headers = {
'Cookie': 'channelid=0; sid=1559955051965303; _ga=GA1.2.885541243.1559956488; _gid=GA1.2.474522862.1559956488; Hm_lvt_7ed65b1cc4b810e9fd37959c9bb51b31=1559956487,1559971332; Hm_lpvt_7ed65b1cc4b810e9fd37959c9bb51b31=1559971354',
'Host': 'www.kuaidaili.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
}
response = requests.get(url, headers=headers)
html = etree.HTML(response.text)
ip = html.xpath('//td[@data-title="IP"]/text()')
ip_port = html.xpath('//td[@data-title="PORT"]/text()')
for x, z in zip(ip, ip_port):
name_ip_port = x + ':' + z
proxy.append(name_ip_port)
return proxy
except:
print('代理没有获取到。')
def nameMei(self):
print('马上开始挑选你们的妹子图片吧')
time.sleep(3)
proxies = {
"http": "http://{}".format(self.ip_proxy),
# "https": "https://221.228.17.172:8181",
}
url='https://www.mzitu.com/tag/youhuo/'
headers={
'cookie': 'Hm_lvt_dbc355aef238b6c32b43eacbbf161c3c=1559885691; Hm_lpvt_dbc355aef238b6c32b43eacbbf161c3c=1559885733',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
}
html =requests.get(url,headers=headers,proxies=proxies)
if html.status_code == 200:
print('很成功的，我们程序没有被屏蔽，(╥╯^╰╥)')
time.sleep(2)
html_xpath=etree.HTML(html.text)
url_html=html_xpath.xpath('//div[@class="footer"]/a/@href')
url_html_text=html_xpath.xpath('//div[@class="footer"]/a/text()')
for H_url,T_tex in zip(url_html[1:-1],url_html_text[1:-1]):
print(T_tex)
try:
x=4
while x>=1:
name = input('请输入你您要下载的图片是：')
if name.isalnum() or name =='':
if name in url_html_text:
name_url=url_html_text.index(name)
url_data=url_html[name_url]
self.cenmMsibo(url_data,name)
else:
print('请认真对待我们的程序哦')
elif str(name) == '1':
return
else:
print('请输入正确的美女图片'+'您还有%s机会'%(x))
x-=1
except:
print('程序错误等待更新。')
else:
print('错误访问')
def cenmMsibo(self,url_data,name):
print(name+'美女很多要好好看哦')
time.sleep(3)
html_URL = urllib3.PoolManager()
headers = {
'cookie': 'Hm_lvt_dbc355aef238b6c32b43eacbbf161c3c=1559885691; Hm_lpvt_dbc355aef238b6c32b43eacbbf161c3c=1559885733',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
}
html = html_URL.request('GET', url_data, headers=headers)
if html.status ==200:
name_html = html.data.decode('utf-8')
html_xpath = etree.HTML(name_html)
ul_html=html_xpath.xpath('//ul[@id="pins"]/li/a/@href')
ul_txt=html_xpath.xpath('//ul[@id="pins"]/li/a/img/@alt')
for i ,x in zip(ul_txt,range(0,30)):
print(str(x)+':'+i)
try:
x=4
while x>=1:
name=input('请输入你要下载美女的图片的数字：')
if name.isalnum() or name =='':
url000=ul_html[int(name)]
P = Pool()
P.apply_async(self.domowoer(url000))
P.close()
else:
print('您输入有错了，还有%s次机会'%(x))
x-=1
print('阻塞程序')
P.join()
except:
print('程序错误,等待更新')
else:
print('访问错误')
def domowoer(self,url000):
print('开始下载了哦')
time.sleep(3)
for i in range(2,50):
wenjian='妹子图'
html_url_name=url000+'/%s'%(i)
html_URL = urllib3.PoolManager()
headers = {
'cookie': 'Hm_lvt_dbc355aef238b6c32b43eacbbf161c3c=1559885691; Hm_lpvt_dbc355aef238b6c32b43eacbbf161c3c=1559885733',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
}
html = html_URL.request('GET', html_url_name, headers=headers)
if html.status == 200:
name_html = html.data.decode('utf-8')
html_xpath = etree.HTML(name_html)
main_title=html_xpath.xpath('//h2[@class="main-title"]/text()')
main_img=html_xpath.xpath('//div[@class="main-image"]/p/a/img/@src')
wenjianming=''.join(main_title)
if not os.path.exists(wenjian):
os.makedirs(wenjian)
if not os.path.exists(wenjian+'\\'+wenjianming[8:19]):
os.mkdir(wenjian+'\\'+wenjianming[8:19])
if main_img!=[]:
url = ''.join(main_img)
headers = {
'referer':'https://www.mzitu.com/177079/2',
'upgrade-insecure-requests':'1',
'cookie':'UM_distinctid=16b3128b8341-0d238ded195901-4048032c-100200-16b3128b8353f4; Hm_lvt_dbc355aef238b6c32b43eacbbf161c3c=1559885691,1559901291,1559971044; Hm_lpvt_dbc355aef238b6c32b43eacbbf161c3c=1559976366',
'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'
}
html1 = requests.get(url, headers=headers)
try:
if html1.status_code == 200:
for i in tqdm(range(1,101)):
time.sleep(0.001)
time.sleep(2)
pinjie=wenjian+'\\'+wenjianming[8:19]+'\\'
name_int=random.randint(1,200)
f=open(pinjie+str(name_int)+'.jpg','wb')
f.write(html1.content)
f.close()
else:
print('下载失败'+url)
except:
print('网站获取失败等待更新')
print('重新调用程序')
time.sleep(5)
self.nameMei()
else:
print('抱歉，网站不存在'+str(html_url_name))
print('重新调用程序')
time.sleep(5)
self.nameMei()
if __name__ == '__main__':
print('--- --- 欢迎下载妹子图，热烈欢迎 --- ---')
time.sleep(3)
print('我们有大量的妹子图供你们下载')
time.sleep(3)
print('启动程序了哦')
K_1=Open_go()
for i in tqdm(range(100)):
time.sleep(0.05)
print('成功启动了')
time.sleep(3)
K_1.nameMei()

复制代码

python_mysql · 发表于 2019-7-7 10:20:07

顶

python_mysql · 发表于 2019-7-7 10:22:04

这是我对妹子的爬去的源码可以直接下载使用

只是新手007 · 发表于 2019-7-22 15:17:08

哥你知道怎么把自己ip地址改成别的地方的不不然新疆地区用不了百度网盘

python_mysql · 发表于 2019-8-4 14:25:22

顶一下

一颗小甲鱼 · 发表于 2021-7-7 21:41:45

这个是真的顶，我给您跪下了

swanseabrian · 发表于 2021-9-1 09:53:36

python_mysql 发表于 2019-7-7 10:20
顶

tkinter怎么加这样的进度条？可以用于生产的

账号		自动登录	找回密码
密码			立即注册

[作品展示] 妹子图的源码，包括出来错误信息，加上进程池，加上IP替换，加上进度条

马上注册，结交更多好友，享用更多功能^_^

浏览过的版块