|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 zua 于 2017-1-8 01:03 编辑
代码很简单。复制运行,还没加代理,主要是学习逻辑。
- import urllib.request
- import traceback
- import sys
- import os
- import re
- def open_url(url):
- req = urllib.request.Request(url)
- req.add_header("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393")
- response = urllib.request.urlopen(req)
- html = response.read()
- return html
- def get_url(r,html):
- url = re.findall(r,html)
- return url
-
- def chdir(folder):
- if not os.path.exists(folder):
- os.mkdir(folder)
- os.chdir(folder)
-
- def main():
- folder = "C:\\Users\Administrator\Desktop\_Download_Sound"
- chdir(folder) #创建文件夹
- original_url = "http://sc.chinaz.com/yinxiao"
- r = r'<a target="_blank" href="/(.+?)</a>' #主菜单链接
- r1 = r'<p class="z"><a target="_blank" href="/(.+ alt.+?)">'
- r2 = r'http.+?\.wav'
- n = 0
- html = open_url(original_url).decode("utf-8")
- list = get_url(r,html)
- for i in list:
- a = i.split('">')[0]
- b = i.split('">')[1]
- if "/" in b:
- b = b[3:-4]
- #创建文件夹
- chdir(b)
- new_url = os.path.join("http://sc.chinaz.com//",a)
- #print(new_url)
- #下载每个链接里面的二级链接
- new_html = open_url(new_url).decode("utf-8")
- list = get_url(r1,new_html)
- for i in list:
- a = i.split('" alt="')[0]
- b = i.split('" alt="')[1]
- #获取wav文件下载网页
- new_url = os.path.join("http://sc.chinaz.com//",a)
- #print(new_url,b)#是下载目录链接了,名字
- #取wav下载链接
- wav_html = open_url(new_url).decode("utf-8")
- list = get_url(r2,wav_html)
- #print(list[0])
- file = b +".wav"
- if not os.path.exists(file):
- file = b + ".wav"
- else:
- file = b + "%d"%(n) +".wav"
- n += 1
- with open(file,"wb") as f:
- html = open_url(list[0])
- print("正在下载:%s" % b)
- f.write(html)
- os.chdir(os.pardir) #下载完返回上一次目录
- if __name__ == "__main__":
- try:
- main()
- except SystemExit:
- pass
- except:
- traceback.print_exc()
- sys.quit()
- input()
复制代码 |
-
|