|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- ls = ['http://lwj.sanya.gov.cn/wljsite/ydtj/list2.shtml/wljsite/ydtj/202203/b43442ca2f8c4ca5a9f8d10186b8a8f1.shtml',
- 'http://lwj.sanya.gov.cn/wljsite/ydtj/list2.shtml/wljsite/ydtj/202202/d7498541d147437ea352be69f0080ded.shtml',
- 'http://lwj.sanya.gov.cn/wljsite/ydtj/list2.shtml/wljsite/ydtj/202201/446aa96ff45c49bcb52a2facc02b6cd2.shtml']
- def sylv(link):
- r = requests.get(link,headers=headers)
- r.encoding = 'utf-8'
- html1 = etree.HTML(r.text)
- link_xpath = html1.xpath('//*[@id="news_content"]/ucapcontent//text()')
- #去除不必要字符
- link_xpath = [el.replace('\r\n', '') for el in link_xpath]
- print(link_xpath)
-
- data_n = link_xpath
- f = r"C:\Users\13783\Desktop\test.txt"
- a = data_n
- with open(f,"a") as file:
- for i in range(len(a)):
- file.write(str(a[i]) + "d" + " "+"\n")
- file.write('*'*50)
-
- for t in ls:
- sylv(t)
复制代码 |
|