|

楼主 |
发表于 2020-3-8 20:39:13
|
显示全部楼层
- import requests
- import json
- from lxml import etree
- url ="https://haokan.baidu.com/videoui/api/videorec?tab=yingshi&act=pcFeed&pd=pc&num=5&shuaxin_id=1583468488484"
- headers = {
- "user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
- "cookie": "BIDUPSID=46B223B8A7EAECCA159168C5EF538730; PSTM=1580279175; BAIDUID=46B223B8A7EAECCA1D299D6661AB1F78:FG=1; BDUSS=wyaHJzcUFsOWdtYkVMQ0FneWFCfjVUOTcyTXZMRHJmUWJtbWlrOW1abW1zbGhlSVFBQUFBJCQAAAAAAAAAAAEAAAC9Y9sXYTE1Mzg4NjU0AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKYlMV6mJTFeY0; Hm_lvt_4aadd610dfd2f5972f1efee2653a2bc5=1581861785,1581914947,1582809107; COMMON_LID=8d29357345ecabe02bcad89b3bd55ab6; reptileData=%7B%22data%22%3A%227e946299daa801da748c32b90adc9d7587f263b748089c6f531c52f9288451033df59d8bcd2c41c5ea90515a8a59833e572833311e74af94f7e2aa1794a63686198f2e47c079610fbaf9740df8731397ca5a287539edecd7194117534d906ad3d67750c2ddd9d756c1352c874ec21387f39c3f33f49f4f9d63f2d0f530ed45645ab8b1fe1841446ca7f3e2bdc2badd48%22%2C%22key_id%22%3A%2230%22%2C%22sign%22%3A%22b3439eb1%22%7D; PC_TAB_LOG=haokan_website_page; Hm_lpvt_4aadd610dfd2f5972f1efee2653a2bc5=1582809193"
- }
- response = requests.get(url,headers=headers)
- data = response.text
- #print(data)
- json_data = json.loads(data)
- #print(json_data)
- date_list = json_data['data']['response']['videos']
- #print(date_list)
- urls = []
- for date in date_list:
- video_urls = date['url']
- #print(video_urls)
- urls.append(date['url'])
- #print(urls)
- for url_video in urls:
- responses = requests.get(url_video,headers=headers)
- data_res = responses.text
- html_ele = etree.HTML(data_res)
- url_video = html_ele.xpath('/html/body/div/div/div[1]/div[1]/div/hk-controls/hk-definition/ul/li[3]')
- print(url_video)
复制代码 |
|