|
发表于 2022-4-25 08:17:55
|
显示全部楼层
- import chardet as cha ##查询编码器
- import urllib.request
- class Urlread:
- def url_read(self, html = ''):
- def load_encode(self):
- '''读取网站编码器信息,并相应解码'''
- self.encoding_method = cha.detect(self.response)['encoding']
- if self.encoding_method == 'GB2312' :
- self.encoding_method = 'GBK'
- return self.encoding_method
- self.html = html
- self.response = urllib.request.urlopen(self.html).read() #读取网站内容
- load_encode(self) # 修改这一行
- return self.response.decode(self.encoding_method)
-
-
- u = Urlread()
- print(u.url_read(r'http://www.fishc.com'))
复制代码 |
|