|
|

楼主 |
发表于 2017-11-22 22:46:47
|
显示全部楼层
- import urllib.request
- import os
- def url_open(url):
- data=None
- headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36'}
- req=urllib.request.Request('http://jandan.net/ooxx',data,headers)
- response = urllib.request.urlopen(req)
- html=response.read()
- return html
- def get_page(url):
- html=url_open(url).decode('utf-8')
- a=html.find('current-comment-page')+23
- b=html.find(']',a)
- return html[a:b]
-
- def find_img(url):
-
- html=url_open(url).decode('utf-8')
- img_address=[]
- a=html.find('img src=')
-
- while a!=-1:
- b=html.find('.jpg',a,a+255)
- if b!=-1:
- img_address.append(html[a+9:b+4])
- else:
- b=a+9
- a=html.find('img src=',b)
- img_new=["http:"+i for i in img_address]
- #for each in img_address:
- img_address=img_new
- return img_address
-
-
- def save_imgs(img_address):
-
复制代码 |
|