马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
怎样把数据存储到csv文件中,为什么我爬取下来的数据写入csv文件中是乱码的import requests
import json
import re
import pandas
from multiprocessing import Pool
from requests.exceptions import RequestException
def get_one_page(url):
headers={
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
}
try:
response=requests.get(url,headers=headers)
if response.status_code==200:
return response.text
return None
except RequestException:
return None
def parse_one_page(html):
pattern=re.compile('<li>.*?blank">(.*?)</a></em><span>(.*?)</span></li>',re.S)
items=re.findall(pattern,html)
for item in items:
yield{
'title':item[0],
'time':item[1]
}
def write_to_file(content):
with open(r'C:\Users\lenovo\Desktop\爬虫\烟草\烟草咨询.csv','a',encoding='utf-8') as f:
f.write(json.dumps(content,ensure_ascii=False)+'\n')
f.close()
def main(page):
if page != 1:#第一页不用page的形式
url='http://www.echinatobacco.com/html/site27/ynzlyns/index'+'_'+str(page)+'.html'
else:
url='http://www.echinatobacco.com//html/site27/ynzlyns/index.html'
html=get_one_page(url)
for item in parse_one_page(html):
print(item)
write_to_file(item)
if __name__=='__main__':
pool=Pool()
pool.map(main,[i+1 for i in range(53)])
|