爬取高考网历年分数线并可视化
import requestsfrom lxml import etree
from pyecharts import options
from pyecharts.charts import Bar
from pypinyin import lazy_pinyin
def get(url):
response = requests.get(url).content.decode('gbk')
#print(response)
html = etree.HTML(response)
year=html.xpath('//th[@width="72"]/text()')
wenke=html.xpath('//tr[@class="c_blue"]/td/text()')+html.xpath('//tr[@class="c_white"]/td/text()')
wen = []
for i in wenke:
i = i.replace('\r','').replace('\n','').replace('\t','')
wen.append(i)
like = html.xpath('//div[@class="cjArea tm15"]/table/tr[@class="c_blue"]/td/text()')+html.xpath('//div[@class="cjArea tm15"]/table/tr[@class="c_white"]/td/text()')
li = []
for i in like:
i = i.replace('\r', '').replace('\n', '').replace('\t', '')
li.append(i)
visulize(pro,li,wen,year)
pass
def visulize(pro,li,wen,year):
wen1 = wen
wen2 = wen
li1 = li
li2 = li
c = Bar()#生成条形图
#添加x,y
c.add_xaxis(year)
c.add_yaxis('文科一本',wen1)
c.add_yaxis('文科二本', wen2)
c.add_yaxis('理科一本', li1)
c.add_yaxis('理科二本', li2)
c.set_global_opts(title_opts=options.TitleOpts(title=pro+"历年高考分数线",subtitle="2009-2019年"))
c.render(pro+"历年高考分数线.html")
pass
if __name__ == '__main__':
pro = input('想要获得的省份:')
prov = lazy_pinyin(pro)
prov = "".join(prov)
url = 'http://www.gaokao.com/'+prov+'/fsx/'
get(url) 我估计这是离我最近的可视化数据了 想要获得的省份:湖南
C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\site-packages\pyecharts\charts\chart.py:14: PendingDeprecationWarning: pyecharts 所有图表类型将在 v1.9.0 版本开始强制使用 ChartItem 进行数据项配置 :)
super().__init__(init_opts=init_opts)
Process finished with exit code 0
抱错呀,跑不起来
页:
[1]