|
发表于 2018-5-2 19:58:28
|
显示全部楼层
- from selenium import webdriver
- import urllib.request
- import os,time
- def find_add(url):
- a = webdriver.Chrome('E:\Google\Chrome\Application\chromedriver.exe')
- a.get(url)
- b =a.page_source
- a.close()
- img_add = []
- pagea = b.find('img src=')
- while pagea!=-1:
- pageb = b.find('.jpg',pagea,pagea+255)
- if pageb!=-1:
- img_add.append(b[pagea+9:pageb+4])
- else:
- pageb = pagea+9
- pagea = b.find('img src=',pageb)
- return img_add
- def open_url(url):
- head = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36'}
- qte = urllib.request.Request(url,headers =head)
- html = urllib.request.urlopen(qte)
- html1 = html.read()
- return html1
- def save(add):
- for each in add:
- img = open_url(each)
- a =each.split('/')[-1]
- with open(a,'wb') as f:
- f.write(img)
- def load(folder='煎蛋妹子图片',page =10):
- os.mkdir(folder)
- os.chdir(folder)
- url = 'http://jandan.net/ooxx/'
- for each in range(page):
- num = 47-each
- html = url +'page-'+str(num)+'#comments'
- a =find_add(html)
- save(a)
- time.sleep(2)
- if __name__=='__main__':
- load()
复制代码
这样贴出来 好看些 |
|