|
|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- import urllib.request
- import urllib.parse
- import chardet
- def response(fileurl,filename):
- print('正在加载%s'% filename)
- Requ=urllib.request.Request(fileurl)
- Requ.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36 Core/1.53.3368.400 QQBrowser/9.6.11974.400')
- res=urllib.request.urlopen(Requ).read()
- resde=chardet.detect(res)['encoding']
- if resde == 'GB2312':
- resde='GBK'
- name=res.decode(resde)
- return name,resde
- def write_tie(html,resde,filename):
- print('正在保存%s' % filename)
- with open(filename,'w',encoding=resde)as f:
- f.write(html)
- print('-'*20)
- def tieba_page(url,start_page,end_page):
- for i in range(start_page,end_page+1):
- pn=(i-1)*50
- filename='第%s页.html' % i
- fileurl=url+('&pn=%d' % pn)
- html,resde=response(fileurl,filename)
- write_tie(html,resde,filename)
-
- if __name__=='__main__':
- kw=input('请输入贴吧名:')
- start_page=int(input('请输入起始页:'))
- end_page=int(input('请输入结束页:'))
- urld='https://tieba.baidu.com/f?'
- key=urllib.parse.urlencode({'kw':kw})
- print(type(key))
- url=urld+key
- tieba_page(url,start_page,end_page)
复制代码
Traceback (most recent call last):
File "C:\Users\Administrator\Desktop\tieba\789.py", line 41, in <module>
tieba_page(url,start_page,end_page)
File "C:\Users\Administrator\Desktop\tieba\789.py", line 28, in tieba_page
html,resde=response(fileurl,filename)
File "C:\Users\Administrator\Desktop\tieba\789.py", line 13, in response
name=res.decode(resde)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python35-32\lib\encodings\cp1254.py", line 15, in decode
return codecs.charmap_decode(input,errors,decoding_table)
UnicodeDecodeError: 'charmap' codec can't decode byte 0x90 in position 266: character maps to <undefined>
|
|