|

楼主 |
发表于 2018-3-31 13:14:50
|
显示全部楼层
import os
import re
import requests
import urllib.request as ur
def openurl(url):
req=ur.Request(url)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.90 Safari/537.36 2345Explorer/9.2.1.17116')
response=ur.urlopen(req)
html=response.read().decode('gbk')
return html
def getimage(html):
r=r'<img id="[^"]+" aid="\d+" src="([^"]+\.jpg)" zoomfile="[^"]+\.jpg" file="[^"]+\.jpg" class="zoom" onclick="zoom(this, this.src, 0, 0, 0)" width="600" alt="楚楚可怜的小学生萝莉音波最新高价收购的极品- 淘女吧资源站 " title="楚楚可怜的小学生萝莉音波最新高价收购的极品- 淘女吧资源站 " inpost="1" onmouseover="[^5]+" lazyloaded="true" height="336" initialized="true"'
p=re.findall(r,html)
for each in p:
f=each.split('/')[-1]
ur.urlretrieve(each,f)
if __name__=='__main__':
url='http://www.cl4.cc/thread-8196-1-1.html'
getimage(openurl(url))
|
|