|
发表于 2019-6-12 07:33:32
|
显示全部楼层
本帖最后由 wp231957 于 2019-6-12 12:26 编辑
- # coding: utf-8
- import requests,re
- from lxml import etree
- if __name__ == '__main__':
- url="http://www.digquant.com.cn/document/2"
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'}
- response = requests.get(url=url,headers=headers)
- html = etree.HTML(response.text)
- lens = len(html.xpath("//ul[@id='left-nav']/li"))
- lst=[]
- for x in range(1,lens+1):
- text=(html.xpath("//ul[@id='left-nav']/li[%d]/span"%x)[0].text).replace("\n","").replace(" ","").replace("(","(").replace(")",")").replace(")","")
- lst.append(text)
- newlst=[]
- for x in lst:
- t=x.split("(",2)
- newlst.append(t)
- f=open("123.txt","w")
- for x in newlst:
- if len(x)>2:
- x[0]=x[0]+"("+x[1]+")"
- x[1]=x[2]
- del x[2]
- f.write(str(x).replace("[","").replace("]","").replace("'","")+'\n')
- f.close()
- print("解析完毕,请查阅123.txt")
-
复制代码 |
|