马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
从网上爬取的评论存储不到TXT文件中了,老报错,求大神:import requests
import csv
import multiprocessing
from bs4 import BeautifulSoup
from requests.exceptions import RequestException
from multiprocessing import Pool
def get_comments(url):
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}
response=requests.get(url,headers=headers)
response.encoding = 'gbk'
if response.status_code==200:
return response.text
print('请求评论错误')
return None
except RequestException:
return None
def parse_comments(html):
soup = BeautifulSoup(html,'lxml')
items = soup.find_all(class_='comment-item')
for item in items:
comments = item.select('.commenttext')
if comments:
comments = comments[0].text.replace('\n','').replace('\xa0','').replace('\u3000','').replace('\r','').replace(' ','')
data={
'comments': comments
}
yield data
def save_to_file(content):
try:
with codecs.open(r'C:\Users\lenovo\Desktop\数据\白沙comment.txt', 'a', encoding='utf-8') as f:
for i in content:
f.write(i.strip() + '\r\n')
except Exception:
print('存储到文件失败')
def main(page):
url = 'http://www.yanyue.cn/product/comments/15?paramsend=postget&productid=15¶msend=postget&page_offset='+str(page)
html=get_comments(url)
for item in parse_comments(html):
print(item)
save_to_file(item)
if __name__=='__main__':
for i in range(53):
main(page=i+1)
|