爬取贴吧图片
import urllib.request as reqimport re, os
def open_url(url):
'''获取页面内容,修改头部信息'''
request = req.Request(url)
request.add_header('user-agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36')
response = req.urlopen(request)
html_data = response.read().decode('utf-8')
return html_data
def get_img(html_data):
img = r'<img class="BDE_Image".*?src="([^"]+\.jpg)"'
img_list = re.findall(img, html_data)
## 创建文件夹,用于同一存放下载的图片
os.mkdir('Img')
os.chdir('Img')
a = 0
for each in img_list:
# print(each)
## 数字递增命名图片
a += 1
filename = str(a) + '.jpg'
req.urlretrieve(each, filename, None)
if __name__ == '__main__':
url = 'https://tieba.baidu.com/p/7138577846'
get_img(open_url(url))
页:
[1]