|
|

楼主 |
发表于 2018-1-8 14:50:23
|
显示全部楼层
import re
import urllib.request
def open_url(url):
req = urllib.request.Request(url)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 UBrowser/6.1.2107.204 Safari/537.36')
page=urllib.request.urlopen(req)
html=page.read().decode('utf-8')
return html
def get_img(html):
p = r'''<img id="hdFirstImgObj" data-replace="1" onload="document.getElementById('currentImg').src=this.src; this.setAttribute('data-replace', '1'); alog && alog('speed.set', 'c_imgobjshow', +new Date); alog.fire && alog.fire('mark'); speed && speed.mark('firstSc');" src="([^"]+\.jpg)"'''
imglist = re.findall(p,html)
#for each in imglist:
# print(each)
for each in imglist:
filename=each.split("/")[-1]
urllib.request.urlretrieve(each,filename,None)
if __name__ == '__main__':
url='http://image.baidu.com/search/index?tn=baiduimage&ipn=r&ct=201326592&cl=2&lm=-1&st=-1&sf=1&fmq=1389861203899_R&pv=&ic=0&nc=1&z=&se=1&showtab=0&fb=0&width=&height=&face=0&istype=2&ie=utf-8&ala=6&ori_query=%E5%94%AF%E7%BE%8E%E5%9B%BE%E7%89%87&fr=ala&ala=1&alatpl=adress&pos=2&oriquery=%E5%94%AF%E7%BE%8E%E5%9B%BE%E7%89%87&alaTag=0&&word=%E5%94%AF%E7%BE%8E%E5%9B%BE%E7%89%87%20%E6%84%8F%E5%A2%83&hs=2&xthttps=000000'
get_img(open_url(url))
谢谢您!!
|
|