爬虫求助
import requestsif __name__ == '__main__':
url='https://www.pearvideo.com/videoStatus.jsp?contId=1735519&mrd=0.25104991488736483'
headers={
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Mobile Safari/537.36'
}
page_data=requests.get(url=url,headers=headers).text
print(page_data)
结果
{
"resultCode":"5",
"resultMsg":"该文章已经下线!",
"systemTime": "1626613181774"
}
为什么和网页上显示的不一样呢 加上cookie 你应该是被反爬了,加上cookie和refer就好了,这两个在那个包里会有
import requests
if __name__ == '__main__':
url = 'https://www.pearvideo.com/videoStatus.jsp?contId=1735519&mrd=0.5638120245206848'
headers = {
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Mobile Safari/537.36', "Cookie": "__secdyid=82d51ae29235d33d1ecfc291a45a6930dfc640cfe892543f021626621462; acw_tc=76b20f4716266214622415126e913e92c4ade2252cc87b1eeb2e9941c7a8de; JSESSIONID=CCED1D635A872559F59130C598200E2C; PEAR_UUID=ceed637c-05c6-4dea-aab7-2931d707ebb2; _uab_collina=162662147669689960966628; UM_distinctid=17aba320173894-04ca3b0358cd75-6373264-144000-17aba32017482a; CNZZDATA1260553744=38181339-1626621355-%7C1626621355; Hm_lvt_9707bc8d5f6bba210e7218b8496f076a=1626621477; p_h5_u=2F61B91A-DAD2-4BBB-AA8E-5DA9E4CD17D5; Hm_lpvt_9707bc8d5f6bba210e7218b8496f076a=1626621508; SERVERID=ed8d5ad7d9b044d0dd5993c7c771ef48|1626621650|1626621462", "Referer": "https://www.pearvideo.com/video_1735519"
}
page_data = requests.get(url=url, headers=headers).text
print(page_data)
页:
[1]