|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- import requests
- import re
- import json
- from bs4 import BeautifulSoup
- from requests.exceptions import RequestException
- def get_one_page(url):
- headers={
- 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
- }
- try:
- response=requests.get(url,headers=headers)
- if response.status_code==200:
- return response.text
- print('请求错误')
- return None
- except RequestException:
- return None
- def parse_one_page(html):
- soup=BeautifulSoup(html,'lxml')
- items={}
- for d in range(10):
- items['排名']=soup.select('.board-index')[d].text
- items['片名']=soup.select('.name')[d].text
- items['主演']=soup.select('.star')[d].text.strip()[3:]
- items['上映时间']=soup.select('.releasetime')[d].text.strip()[5:]
- items['评分']=soup.select('.integer')[d].text+soup.select('.fraction')[d].text
- print(items)
- def write_to_file(content):
- with open(r'C:\Users\lenovo\Desktop\爬虫\猫眼\猫眼TOP100(BS).txt','a',encoding='utf-8') as f:
- f.write(json.dumps(content,ensure_ascii=False)+'\n')
- f.close()
- def main(offset):
- url='https://maoyan.com/board/4?offset='+str(offset)
- html=get_one_page(url)
- items=parse_one_page(html)
- write_to_file(items)
- if __name__=='__main__':
- for i in range(10):
- main(offset=i*10)
复制代码
这是我爬取的猫眼榜单的TOP100,但为什么不能存储到txt文件中,txt文件中都是乱码的,求大神指教 |
|