|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import requests
from bs4 import BeautifulSoup
import pandas as pd
def pars_soup(soup):
items = []
m_feed_item_list = soup.find_all('div', 'm_feed_item')
for m_feed_item in m_feed_item_list:
temp = dict()
id = m_feed_item.get('id')
name = m_feed_item.find('p').text
qa = m_feed_item.find_all('div', 'm_feed_txt')
q = qa[0].text.strip()
if len(qa) == 2:
a = qa[1].text.strip()
else:
a = ''
temp['id'] = id
temp['name'] = name
temp['question'] = q
temp['answer'] = a
items.append(temp)
return items
url = 'https://sns.sseinfo.com/ajax/feeds.do?type=11&pageSize=10&lastid=-1&show=1&page=1&_=1690729988214'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36 Edg/91.0.864.48',
}
res = requests.get(url, headers=headers)
soup = BeautifulSoup(res.text, features="html.parser")
data = pars_soup(soup) # 解析后的数据列表
# 将数据存储到DataFrame对象
df = pd.DataFrame(data)
# 将DataFrame写入Excel文件
output_path = 'f://123.xlsx'
df.to_excel(output_path, index=False)
print(f"数据已成功写入到文件:{output_path}")
以上程序可以提取到内容
我在搜索框输入关键字进入的搜索结果页面却提取不到内容,到底哪里错了?
import requests
from bs4 import BeautifulSoup
import pandas as pd
def pars_soup(soup):
items = []
m_feed_item_list = soup.find_all('div', 'm_feed_item')
for m_feed_item in m_feed_item_list:
temp = dict()
id = m_feed_item.get('id')
name = m_feed_item.find('p').text
qa = m_feed_item.find_all('div', 'm_feed_txt')
q = qa[0].text.strip()
if len(qa) == 2:
a = qa[1].text.strip()
else:
a = ''
temp['id'] = id
temp['name'] = name
temp['question'] = q
temp['answer'] = a
items.append(temp)
return items
url = "https://sns.sseinfo.com/qasearch.do"
headers = {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
"Connection": "keep-alive",
"Content-Length": "65",
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
"Host": "sns.sseinfo.com",
"Origin": "https://sns.sseinfo.com",
"Referer": "https://sns.sseinfo.com/search.do?keyword=%E7%A9%BA%E9%97%B4%E8%AE%A1%E7%AE%97&keywordEnd=%E7%A9%BA%E9%97%B4%E8%AE%A1%E7%AE%97",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.188",
"X-Requested-With": "XMLHttpRequest"
} # 请求头
data = {
"keyword": "空间计算"
} # 请求体的参数
response = requests.post(url, data=data, headers=headers) # 发送POST请求
soup = BeautifulSoup(response.text, features="html.parser")
data = pars_soup(soup) # 解析后的数据列表
# 将数据存储到DataFrame对象
df = pd.DataFrame(data)
# 将DataFrame写入Excel文件
output_path = 'f://123.xlsx'
df.to_excel(output_path, index=False)
print(f"数据已成功写入到文件:{output_path}")
|
-
第1个程序图片
-
第2个程序图片
-
第2个程序图片
-
第2个程序图片
|