本帖最后由 suchocolate 于 2020-9-25 10:43 编辑
soup没深入研究,xpath熟悉,给你写了一个。import requests
from lxml import etree
def main():
url = 'https://bj.xiaozhu.com/'
headers = {'user-agent': 'firefox', 'cookie': 'abtest_ABTest4SearchDate=b; xzuuid=60edda4c; sajssdk_2015_cross_new_user=1; distinctId=174bb0e4812102-034d2bbd6334d-6373664-1327104-174bb0e481374; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22174bb0e4812102-034d2bbd6334d-6373664-1327104-174bb0e481374%22%2C%22first_id%22%3A%22%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%7D%2C%22%24device_id%22%3A%22174bb0e47ee1d9-07ed0f67d639cd-6373664-1327104-174bb0e47f019e%22%7D; Hm_lvt_92e8bc890f374994dd570aa15afc99e1=1600866110; _uab_collina=160086611042232800553623; wttXMuWwbC=3bb7e13a2cada458f866cd09b3158a88e4a443f4; ATNgmRNkrw=1600866157; Hm_lpvt_92e8bc890f374994dd570aa15afc99e1=1600866160'}
r = requests.get(url, headers=headers)
# with open('r.txt', 'w', encoding='utf-8') as f:
# f.write(r.text)
html = etree.HTML(r.text)
lis = html.xpath('//li[@lodgeunitid]')
for item in lis:
title = item.xpath('./a/img/@title')[0]
txt = item.xpath('normalize-space(./div[2]/div[2]/em//text())')
price = item.xpath('./div[2]/div[1]/span/i/text()')[0]
print(title, txt, price + '每晚')
print('=' * 100)
if __name__ == '__main__':
main()
|