|
1鱼币
steam评测页面下拉后每次出现十条新内容,代码运行时无法获取下拉后的加载内容,求教
- import requests
- from bs4 import BeautifulSoup
- import json
- import time
- # from selenium import webdriver
- #
- # driver = webdriver.Chrome(r'C:\Users\m1359\AppData\Local\Google\Chrome\Application\chromedriver.exe')
- def sen_from_text(text):
- SENTIMENT_URL = 'http://api.bosonnlp.com/sentiment/analysis'
- h = {'X-Token': 'balbala'} # your token
- data = json.dumps(text)
- resp = requests.post(SENTIMENT_URL, headers=h, data=data.encode('utf-8'))
- resp = json.loads(resp.text) # print(resp)
- front = float(resp[0][0])
- return front
- headers = {"Accept-Language": "zh-CN,zh;q=0.9"}
- file = open('steam.txt', 'w+', encoding='utf-8')
- for i in range(1, 10): # 动态获取评测页面
- url = 'http://steamcommunity.com/app/578080/homecontent/?userreviewsoffset=' + str(10 * (i - 1)) + '&p=' + str(
- i) + '&workshopitemspage=' + str(i) + '&readytouseitemspage=' + str(i) + '&mtxitemspage=' + str(
- i) + '&itemspage=' + str(i) + '&screenshotspage=' + str(i) + '&videospage=' + str(i) + '&artpage=' + str(
- i) + '&allguidepage=' + str(i) + '&webguidepage=' + str(i) + '&integratedguidepage=' + str(
- i) + '&discussionspage=' + str(
- i) + '&numperpage=10&browsefilter=toprated&browsefilter=toprated&appid=578080&l=schinese&appHubSubSection=10' \
- '&filterLanguage=default&searchText=&forceanon=1 ' # steam评测页面的url,参考链接[url=https://www.tinymind.net.cn/articles/6c517fc1b33931]https://www.tinymind.net.cn/articles/6c517fc1b33931[/url]写的
- html = requests.get(url, headers=headers, timeout=10).text
- soup = BeautifulSoup(html, 'html.parser')
- reviews = soup.find_all('div', {'class': "apphub_Card modalContentLink interactable"})
- for review in reviews:
- # nick = review.find('div', {'class': 'apphub_CardContentAuthorName offline ellipsis'})
- title = review.find('div', {'class': 'title'}).text # 获取评价(推荐or不推荐)
- hour = review.find('div', {'class': 'hours'}).text.split(' ')[1] # 获取游玩时间
- link = review.find('a').attrs['href'] # 获取用户主页链接
- comment = review.find('div', {'class': 'apphub_CardTextContent'}).text.split('\n')[2].strip('\t') # 获取评价内容
- # sen = sen_from_text(comment)
- print(title, hour, link, comment)
复制代码 |
|