爬取糗图百科图片求助
import requestsimport re
if __name__ == '__main__':
url='https://www.qiushibaike.com/imgrank/page/2/'
headers={
'User-Agent':'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Mobile Safari/537.36'
}
response=requests.get(url=url,headers=headers).text
print(response)
regex ='div class="author clearfix">.*?<img src="(.*?)\?.*?</div>'
image_src_list=re.findall(regex,response,re.S)
print(image_src_list)
为啥image_scr_list是空列表呢
正则不对呗,参考代码:
import requests
import re
if __name__ == '__main__':
url = 'https://www.qiushibaike.com/imgrank/page/2/'
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Mobile Safari/537.36'}
response = requests.get(url=url, headers=headers).text
print(response)
regex = '<img src="(//pic.qiushibaike.com/system/pictures.+?)"'
image_src_list = re.findall(regex, response, re.S)
for i in image_src_list:
print('https:' + i)
页:
[1]