|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
从网上爬取的评论存储不到TXT文件中了,老报错,求大神:
- import requests
- import csv
- import multiprocessing
- from bs4 import BeautifulSoup
- from requests.exceptions import RequestException
- from multiprocessing import Pool
- def get_comments(url):
- try:
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
- }
- response=requests.get(url,headers=headers)
- response.encoding = 'gbk'
- if response.status_code==200:
- return response.text
- print('请求评论错误')
- return None
- except RequestException:
- return None
- def parse_comments(html):
- soup = BeautifulSoup(html,'lxml')
- items = soup.find_all(class_='comment-item')
- for item in items:
- comments = item.select('.commenttext')
- if comments:
- comments = comments[0].text.replace('\n','').replace('\xa0','').replace('\u3000','').replace('\r','').replace(' ','')
- data={
- 'comments': comments
- }
- yield data
- def save_to_file(content):
- try:
- with codecs.open(r'C:\Users\lenovo\Desktop\数据\白沙comment.txt', 'a', encoding='utf-8') as f:
- for i in content:
- f.write(i.strip() + '\r\n')
- except Exception:
- print('存储到文件失败')
- def main(page):
- url = 'http://www.yanyue.cn/product/comments/15?paramsend=postget&productid=15¶msend=postget&page_offset='+str(page)
- html=get_comments(url)
- for item in parse_comments(html):
- print(item)
- save_to_file(item)
- if __name__=='__main__':
- for i in range(53):
- main(page=i+1)
复制代码 |
|