|
|

楼主 |
发表于 2016-12-31 16:57:35
|
显示全部楼层
不是的 这里连‘>>>’都没有 还有 我只import urllib.request 之后也一样
- #python3.5.2
- import urllib.request
- import os
- def url_open(url):
- req=urllib.request.Request(url)
- req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36')
- resp=urllib.request.urlopen(url)
- html=resp.read()
- return html
- def get_page(url):
-
- html=url_open(url).decode('utf-8')
- a= html.find('current-comment-page') + 23#.find是从搜索的第一个字符开始计算的包括'<之类的
- b= html.find(']',a)
- h=html[a:b]
- return html[a:b]
-
- def find_imgs(url):
- html=url_open(url).decode('utf-8')
- img_addrs=[]
- a=html.find('img src=')
- while a != -1:
- b= html.find('.jpg',a,a+255)
- if b!= -1:
- img_addrs.append(html[a+9:b+4])
- else:
- b=a+9
- a= html.find('img src=',b)
- return img_addrs
- def save_imgs(folder, img_addrs):
- for each in img_addrs:
- filename=each.split('/')[-1]
- with open(filename,'wb')as f:
- img = url_open(each)
- f.write(img)
- def download_mm(folder='ooxx',pages=50):
- url='http://jandan.net/ooxx'
- a=os.getcwd()
- '''if not os.path.exits(a):
- path=a+os.sep+folder
- os.mkdir(path)
- else:
- path=a+os.sep+folder
- os.chdir(path)'''
- os.mkdir(folder)
- os.chdir(folder)
- page_num = int(get_page(url))
- for i in range(pages):
- page_num-=i
- page_url=url+'page-'+str(page_num)+'#comments'
- img_addrs=find_imgs(page_url)
- save_imgs(folder,img_addrs)
-
- if __name__='__main__':
- download_mm()
复制代码
按F5之后
就是如上图的样子 |
|