xpath定位不到
本帖最后由 a2554146036 于 2022-11-27 00:37 编辑url = "https://tieba.baidu.com/f?kw=%E5%94%AF%E7%BE%8E%E5%9B%BE%E7%89%87"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36 Core/1.94.1004.400 QQBrowser/11.0.5127.400',
'Referer': 'https://www.baidu.com/link?url=OaZ4HcAH6v5bSG8_W1osu0lB_fcXlhZhqO2QjZuJutc263_yK-6VChEYzhY_qFE_TKkwY2thf4fqHUitDJ5LaAvrJL3LnT9fte5x29r3lfm&wd=&eqid=964f9d5f00005690000000066380ed01',
'Cookie': 'BIDUPSID=D7E776C5D1BE70572BA3FA9129E2C5FA; PSTM=1669037450; BAIDUID=D7E776C5D1BE7057610E592608BC84AF:FG=1; BDORZ=FFFB88E999055A3F8A630C64834BD6D0; FPTOKEN=30$+wQpPxcG+Pghasa25c5gp47I0T6QG+HIK5iY1UHvU3mUhEHMlZvFMxGBIANowuSG2DOibnal1eNlSDkDiSASY/8fqr1WtY9O7RyuM/YJCLkedRG6Yw+Uf5HAaIYO3Y+6/jYOV+CLqP4equ0hwJTz0ztEEnuxsIA59CZE6pqPc8RoCO6SV/d3rLg7rkCHHsJHSCf5so3R4Bsl8RsFXS7hm4BVIzpQmvkyrRXf6YHctZHDqz6hpGNPT/hHFSIUhzAV8A0k43t1jnluzwpUzHZxJb4JIm3/l3gciywD/ADq4nVMLLSpMau2L5afwMRU19WuUfhne1eSgPIiV/JmdUVi1mgk5hvW+kSAvKByyPHwXu+gUiW6CpSS54Z5O1uyzgWV|HIxh3nAn8z01eyDnKe6UjnVwHchRVTE2fntiSqOuonM=|10|5154fad491d2597c71e640882f728a7a; BAIDUID_BFESS=D7E776C5D1BE7057610E592608BC84AF:FG=1; BDRCVFR=mk3SLVN4HKm; H_PS_PSSID=36561_37773_37828_37303_37797_37761_37740_26350; BA_HECTOR=21a40h800l01a5818g8l0jre1ho462e1e; ZFY=2ip3OF1mH75GiIUwsUTomvMzStl9nP1brsHd:BFZ5r0c:C; BAIDU_WISE_UID=wapp_1669478419880_542; USER_JUMP=-1; Hm_lvt_98b9d8c2fd6608d564bf2ac2ae642948=1669371183,1669393670,1669478420; Hm_lpvt_98b9d8c2fd6608d564bf2ac2ae642948=1669478420; XFI=6e6f05e0-6da3-11ed-964a-e59b06015c54; ab_sr=1.0.1_OGJiNmYxMDVmYzY4MWYwNzEyYjUyOWRmODViNmY4MzkxZjkxYzliODgyOGFhYzI5M2NlYzYxMTFiYWMwZWNlZWY2NWQ2M2QzYjQyNmFiMWM4OGM2ZmUzMTJhYjQ5MTVhMzkyNDgwODUyNzI0Zjk3MzUzNjBjYjAwYWM4Y2U2YTdlNjliMDdhOWMxMjg1N2U4NGRmZDU5ZDhkNmM3MDA4OQ==; st_data=e74b1a9879fcdfa97f99275a6307d87c6ad1513f6362736c11073fa53d90032d1ef61599291bcfa05062a9983f51d8f16d690bdc49a354ca900be880e2cbabd2f8d0c2e948c9c119da03998614908c84108c82601dc700d01211f10424a3481e; st_key_id=17; st_sign=339c20ee; __bid_n=184af9de8e0c581c1e4207; XFCS=15326BBD2E07003334036634232C4A89CB42A455CF1287BE3F6BF3D861F8E8AB; XFT=nHssokOp9JswrguE6sLDsdZNjtvDo9zPZYOkvUkyTVA=; RT="z=1&dm=baidu.com&si=9a8bae5f-5f11-4b80-9210-89c6ed68f598&ss=lay4cgp4&sl=3&tt=4ah&bcn=https%3A%2F%2Ffclog.baidu.com%2Flog%2Fweirwood%3Ftype%3Dperf&ld=w4z&ul=101f"'
}
html = requests.get(url, headers=headers)
soup = etree.HTML(html.text)
ss = soup.xpath('//div[@class="threadlist_title pull_left j_th_tit"]/a')
for i in ss:
print(i)
//这行代码无法取到数据 但是在网页的xpath helper中调试又可以
//
爬取这个链接https://tieba.baidu.com/f?kw=%E5%94%AF%E7%BE%8E%E5%9B%BE%E7%89%87下的帖子标题
页:
[1]