|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import requests
from bs4 import BeautifulSoup as bs
import json
import csv
import re
#宏变量存储目标js的URL列表
COMMENT_PAGE_URL = []
#生成链接列表
def Get_Url(num):
urlFront = ''
urlRear = ''
for i in range(0,num):
COMMENT_PAGE_URL.append(urlFront+str(1+i)+urlRear)
#获取评论数据
def GetInfo(num):
#定义需要的字段
nickname = []
auctionSku = []
ratecontent = []
ratedate = []
#循环获取每一页评论
for i in range(num):
#头文件,没有头文件会返回错误的js
headers = {
'cookie':'',
'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
'referer': '',
'accept': '*/*',
'accept-encoding':'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9'
}
#解析JS文件内容
content = requests.get(COMMENT_PAGE_URL[i],headers=headers).text
nk = re.findall('"displayUserNick":"(.*?)"', content)
nickname.extend(nk)
print(nk)
auctionSku.extend(re.findall('"auctionSku":"(.*?)"', content))
ratecontent.extend(re.findall('"rateContent":"(.*?)"', content))
ratedate.extend(re.findall('"rateDate":"(.*?)"', content))
#将数据写入TEXT文件中
for i in list(range(0, len(nickname))):
text = ','.join((nickname[i], ratedate[i], auctionSku[i], ratecontent[i])) + '\n'
with open(r"E:\TmallContent.txt", 'a+',encoding='UTF-8') as file:
file.write(text + ' ')
print(i+1,":写入成功")
#主函数
if __name__ == "__main__":
Page_Num = 900
Get_Url(Page_Num)
GetInfo(900) |
|