|  | 
 
| 
x
马上注册,结交更多好友,享用更多功能^_^您需要 登录 才可以下载或查看,没有账号?立即注册  复制代码import urllib.request as urt
import chardet as ch
def write_http(num, res, encode):
    name = 'url_' + str(num) + '.txt'
    with open(name, 'wt') as each_file:
        each_file.write(html.decode(encode))
with open('urls.txt', 'rt') as f:
    num = 0
    for each in f:
        num += 1
        response = urt.urlopen(each)
        html = response.read()
        encode = ch.detect(html)['encoding']
        if encode == 'GB2312':
            encode = 'GBK'
        write_http(num, html, encode)
 感觉没有太大问题结果出现了一个
 UnicodeEncodeError: 'gbk' codec can't encode character '\xbb' in position 29836: illegal multibyte sequence
 这样的问题,还希望大家能够解答一下啊。
 
复制代码import urllib.request as urt
import chardet as ch
def write_http(num, res, encode):
    name = 'url_' + str(num) + '.txt'
    
    with open(name, 'wt', encoding='utf-8') as each_file:   # 加上encoding
        each_file.write(res.decode(encode))     # 应该使用res
with open('urls.txt', 'rt') as f:
    num = 0
    for each in f:
        num += 1
        response = urt.urlopen(each)
        html = response.read()        
        
        encode = ch.detect(html)['encoding']
        if encode == 'GB2312':
            encode = 'GBK'
        
        write_http(num, html, encode)
 | 
 |