马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import requests
import re
import json
from bs4 import BeautifulSoup
from requests.exceptions import RequestException
def get_one_page(url):
headers={
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}
try:
response=requests.get(url,headers=headers)
if response.status_code==200:
return response.text
print('请求错误')
return None
except RequestException:
return None
def parse_one_page(html):
soup=BeautifulSoup(html,'lxml')
items={}
for d in range(10):
items['排名']=soup.select('.board-index')[d].text
items['片名']=soup.select('.name')[d].text
items['主演']=soup.select('.star')[d].text.strip()[3:]
items['上映时间']=soup.select('.releasetime')[d].text.strip()[5:]
items['评分']=soup.select('.integer')[d].text+soup.select('.fraction')[d].text
print(items)
def write_to_file(content):
with open(r'C:\Users\lenovo\Desktop\爬虫\猫眼\猫眼TOP100(BS).txt','a',encoding='utf-8') as f:
f.write(json.dumps(content,ensure_ascii=False)+'\n')
f.close()
def main(offset):
url='https://maoyan.com/board/4?offset='+str(offset)
html=get_one_page(url)
items=parse_one_page(html)
write_to_file(items)
if __name__=='__main__':
for i in range(10):
main(offset=i*10)
这是我爬取的猫眼榜单的TOP100,但为什么不能存储到txt文件中,txt文件中都是乱码的,求大神指教 |