|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import urllib.request
import os
import re
def open_url(url):
req = urllib.request.Request(url)
req.add_header('User_Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36')
response = urllib.request.urlopen(req)
html = response.read()
return html
def get_num(url):
html = open_url(url).decode('utf-8')
note_num = re.findall('<a href="/(.*?).html"><img src',html)
#print(note_num)
return note_num
def get_downloadurl(note_url):
download_URL = []
for each in note_url:
html = open_url(each).decode('utf-8')
download_url = re.findall("href='(.*?\.txt)'",html)
#download_url = re.findall('href="(.*?\.txt)"',html)
download_URL.append(download_url)
#print(download_URL)
return download_URL
def save_note(download_url):
for each in download_url:
for EACH in each:
filename = EACH.split('/')[-1]
#print(filename)
with open(filename,'wb')as f:
note = open_url(EACH).decode('gb18030').encode('utf-8')
f.write(note)
def download_qishu(folder = '都市'):
os.mkdir(folder)
os.chdir(folder)
url = 'http://www.qisuu.com/soft/sort04/'
url1 = 'http://www.qisuu.com/'
note_num = get_num(url)
note_url = []
for each in note_num:
URL = url1 + each + '.html'
note_url.append(URL)
print(note_url)
download_url = get_downloadurl(note_url)
save_note(download_url)
if __name__ == '__main__':
download_qishu()
报错信息:UnicodeEncodeError: 'ascii' codec can't encode characters in position 9-16: ordinal not in range(128) 求大神带我飞 |
|