|

楼主 |
发表于 2024-11-20 21:47:51
|
显示全部楼层
import requests
from bs4 import BeautifulSoup
import time
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}
def get_info(url):
wb_data=requests.get(url,headers=headers)
wb_data.encoding='utf-8'
# print(wb_data.text)
soup=BeautifulSoup(wb_data.text,'html.parser')
titles=soup.select('feedCardContent > div > div > h2 > a')
#FIXME
print(titles)
times=soup.select('#feedCardContent > div > div > div.feed-card-a.feed-card-clearfix > div.feed-card-time')
# comments = soup.select('div.info clearfix info1 > div.action > a')
for title,time in zip(titles,times):
data={
'title':title.get_text(),
'time':time.get_text(),
# 'comment':comment.get_text()
}
# print(data)
if __name__=='__main__':
url='http://news.sina.com.cn/china/'
get_info(url)
time.sleep(2)
这是我的代码 请教下 我是按照你说的 修改了下 但是还是没有 抓取到任何内容 可以帮忙看下吗 |
|