|
|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
直接上代码
- import urllib.request
- import urllib.parse
- import json
- import os
- def url_open(url):
- req = urllib.request.Request(url)
- req.add_header('User-Agent','Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Mobile Safari/537.36')
- response = urllib.request.urlopen(url)
- html = response.read()
- return html
-
- def get_page(url):
- html = url_open(url).decode('utf-8')
- a = html.find('href="/home/')+12
- b = html.find('"',a)
- return html[a:b]
- def find_urls(url):
- html = url_open(url).decode('utf-8')
- url_addrs = []
- a = html.find('http://img.mmjpg.com/small/2018/')
- while a != -1:
- b = html.find('.jpg',a,a+255)
- if b != -1:
- if b-a == 36:#返回套图第一页位置
- url_addrs.append('http://www.mmjpg.com/mm/'+str(html[a+32:b]))
- else:
- b = a + 32
- a = html.find('http://img.mmjpg.com/small/2018/',b)
- for each in url_addrs:
- print (each)
- html = url_open(each).decode('utf-8')
- img_addrs = []
- i=1
- page6 = html.find('/6')
- page = html.find('href="/mm/',page6+4)
- strpages = str(html[page+15:page+17])
- print(page)
- print (strpages)
- pages = int(html[page+15:page+17])
- #返回套图总页数
- print (pages)
- while i != pages:
- i=i+1
- html = url_open(each + '/' + str(i)).decode('utf-8')
- a = html.find('img src=')
- b = html.find('.jpg',a,a+255)
- if b != -1:
- img_addrs.append(html[a+9:b+4])
- else:
- b = a + 9
- a = html.find('img src=',b)
- for each2 in img_addrs:
- print(each2)
- filename = each2.split('/')[-1]
- with open(filename,'wb') as f:
- img = url_open(each)
- f.write(img)
-
- def download_mm(folder='ooxx',pages=1):
- os.chdir(folder)
- url = "http://www.mmjpg.com/"
- page_num = int(get_page(url))
- for i in range(pages):
- page_num += 1
- page_url = url + 'home/' + str(page_num)
- url_addrs = find_urls(page_url)
- if __name__ == "__main__":
- download_mm()
复制代码
代码有点乱,图片地址都能获取到,但是保存下来都是同一张无关的图片。
返回结果:
http://img.mmjpg.com/2018/1323/2iqw.jpg
http://img.mmjpg.com/2018/1323/3ikb.jpg
http://img.mmjpg.com/2018/1323/4iak.jpg
http://img.mmjpg.com/2018/1323/5ijf.jpg
http://img.mmjpg.com/2018/1323/6i0f.jpg
http://img.mmjpg.com/2018/1323/7i3u.jpg
http://img.mmjpg.com/2018/1323/8iqc.jpg
http://img.mmjpg.com/2018/1323/9i9y.jpg |
|