|
|

楼主 |
发表于 2017-12-13 23:17:01
|
显示全部楼层
import urllib.request
import os
import re
def url_open(url):
req = urllib.request.Request(url)
req.add_header('user-agent',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36')
response = urllib.request.urlopen(req)
html = response.read().decode('utf-8')
return html
def get_img(html):
p=r'<img class="BDE_Image".*?src=([^"]*\.jpg)".*?>'
imglist=re.findall(p,html)
os.mkdir('newpics')
os.chdir('newpics')
for each in imglist:
filename=each.split('/')[-1]
urllib.request.urlretrieve(each,filename,None)
if __name__=='__main__':
url='https://tieba.baidu.com/p/3823765471'
get_img(url_open(url))
我把网址都换了好多次了,每一次能抓到地址的,print那个imglist是空的 |
|