编码
import urllib.requestimport chardet
def main():
i=0
with(open('urls.txt')) as f:
urls = f.read().splitlines()
for each_url in urls:
response = urllib.request.urlopen(each_url)
html = response.read()
encode = chardet.detect(html)['encoding']
if encode == 'GB2312':
encode = 'GBK'
# i=1 在这里会被重复定义为 1
i +=1
file_name = 'urls%d.txt'%i
with open(file_name, "w",encoding = encode) as each_file:
each_file.write(html.decode(encode, "ignore"))
# i +=1在这个地方进行的话
if __name__ == '__main__':
main()
这个encoding=encode 保持编码一致
Python 对于编码 很看重吗 当然,不使用正确的编码就无法打开文件
页:
[1]