|
|
发表于 2018-4-21 23:09:00
|
显示全部楼层
<img src="http://wx3.sinaimg.cn/mw600/0076BSS5ly1fqjjkv6w6rj30p00go0u3.jpg" style="max-width: 480px; max-height: 750px;">
- import os
- import urllib2
- import random
- def url_open(url):
- request=urllib2.Request(url)
- request.add_header('User-Agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36')
-
- '''
- 如果使用代理的话,下载的不是妹子图,而是乱七八糟的图片
- proxies=['60.190.199.68:808','183.159.82.206:18118','222.186.45.127:55336']
- proxy=random.choice(proxies)
- proxy_support=urllib2.ProxyHandler({'https':proxy})
- opener=urllib2.build_opener(proxy_support)
- urllib2.install_opener(opener)
- '''
- response=urllib2.urlopen(request)
- html=response.read()
- print url
- return html
- def get_pagenumber(url):
- html=url_open(url).decode('utf-8')
- a=html.find('current-comment-page')+23
- b=html.find(']',a)
- return html[a:b]
- def find_image(url):
- html=url_open(url).decode('utf-8')
- image_address=[]
- a=html.find('img src=')
- while a!=-1:
- b=html.find('.jpg',a,a+255)
- if b != -1:
- image_address.append(html[a+9:b+4])
- else:
- b=a+9
- a=html.find('img src=',b)
- return image_address
- def save_image(folder,image_address):
- for each in image_address:
- filename=each.split('/')[-1]
- with open(filename,'wb') as f:
- image=url_open(each)
- f.write(image)
-
- def download_mm(folder='OOXX',pages=10):
- os.mkdir(folder)
- os.chdir(folder)
- url='http://jandan.net/ooxx'
- page_num=int(get_pagenumber(url))
- for i in range(pages):
- page_num-=1
- page_url=url+'/page-'+str(page_num)+'#comments'
- image_address=find_image(page_url)
- save_image(folder,image_address)
- if __name__=='__main__':
- download_mm()
复制代码
和楼主的问题一样,代码正常执行,但OOXX文件为空。图片地址的格式还是一样的,没有变啊,感觉问题出现在find_image()方法上,image_address这个列表没有成功存放进图片的地址。但不知道为什么?还是说find_image()方法也是对的,原因是图片地址加密了?求大神解答~~~~ |
|