|

楼主 |
发表于 2022-9-10 22:02:45
|
显示全部楼层
误解上面意思了,代码如下:
- ### 爬取ADX ###
- import urllib.request,urllib.error
- import re
- from bs4 import BeautifulSoup
- import xlwt
- def main():
- baseurl = "https://www.binance.com/zh-CN/futures/ETHUSDT"
- #爬取网页
- datalist = getData(baseurl)
- #爬取网页
- def getData():
- datalist=[] #h获取地址中的数据封装成列表,并将数据返回
- html=askURL(url) #将获取的网页源码保存到HTML中
- #逐一解析数据
- soup = BeautifulSoup(html,"html.parser") #html.parser是网页解析器
- for item in soup.find_all('div',class_ = "chart-title-indicator-container"):
- print(item)
-
- return datalist
- #获取指定一个网页的内容的方法
- def askURL(url):
- #伪装成网页的形式
- headers = {
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"
- }
- requests = urllib.request.Request(url=url,headers=headers)
- html="" #存放到html中
- #防止出现意外
- try:
- response = urllib.request.urlopen(requests)
- html = response.read().decode("utf-8") #读取response
- except urllib.error.URLError as e : #捕获404 500等浏览器错误
- if hasattr(e,"code"):
- print(e.code)
- if hasattr(e,"reasin"):
- print(e.reason)
- return html
复制代码
|
|