萧易天 发表于 2020-5-1 12:54:09

爬取高考网历年分数线并可视化

import requests
from lxml import etree
from pyecharts import options
from pyecharts.charts import Bar
from pypinyin import lazy_pinyin
def get(url):
    response = requests.get(url).content.decode('gbk')
    #print(response)
    html = etree.HTML(response)
    year=html.xpath('//th[@width="72"]/text()')
    wenke=html.xpath('//tr[@class="c_blue"]/td/text()')+html.xpath('//tr[@class="c_white"]/td/text()')
    wen = []
    for i in wenke:
      i = i.replace('\r','').replace('\n','').replace('\t','')
      wen.append(i)
    like = html.xpath('//div[@class="cjArea tm15"]/table/tr[@class="c_blue"]/td/text()')+html.xpath('//div[@class="cjArea tm15"]/table/tr[@class="c_white"]/td/text()')
    li = []
    for i in like:
      i = i.replace('\r', '').replace('\n', '').replace('\t', '')
      li.append(i)
    visulize(pro,li,wen,year)
    pass
def visulize(pro,li,wen,year):
    wen1 = wen
    wen2 = wen
    li1 = li
    li2 = li
    c = Bar()#生成条形图
    #添加x,y
    c.add_xaxis(year)
    c.add_yaxis('文科一本',wen1)
    c.add_yaxis('文科二本', wen2)
    c.add_yaxis('理科一本', li1)
    c.add_yaxis('理科二本', li2)
    c.set_global_opts(title_opts=options.TitleOpts(title=pro+"历年高考分数线",subtitle="2009-2019年"))
    c.render(pro+"历年高考分数线.html")
    pass
if __name__ == '__main__':
    pro = input('想要获得的省份:')
    prov = lazy_pinyin(pro)
    prov = "".join(prov)
    url = 'http://www.gaokao.com/'+prov+'/fsx/'
    get(url)

海月清辉 发表于 2020-7-10 16:34:04

我估计这是离我最近的可视化数据了

海月清辉 发表于 2020-7-10 16:46:19

想要获得的省份:湖南
C:\Users\Administrator\AppData\Local\Programs\Python\Python38\lib\site-packages\pyecharts\charts\chart.py:14: PendingDeprecationWarning: pyecharts 所有图表类型将在 v1.9.0 版本开始强制使用 ChartItem 进行数据项配置 :)
super().__init__(init_opts=init_opts)

Process finished with exit code 0

leizi666 发表于 2020-8-3 14:53:53

抱错呀,跑不起来
页: [1]
查看完整版本: 爬取高考网历年分数线并可视化