|
|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 紫枫醉听雨 于 2017-1-10 13:55 编辑
代码如下:
import urllib.request
import os
import re
def url_open(url):
req = urllib.request.Request(url)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0')
response = urllib.request.urlopen(req)
html = response.read()
return html
def get_imgs(url):
html = url_open(url).decode('utf-8')
p = r'<img class="BDE_Image" src="(.*?\.jpg)"'
img_adders = re.findall(p,html)
return img_adders
def download(folder='xxoo'):
os.mkdir(folder)
os.chdir(folder)
url = 'http://tieba.baidu.com/p/4913057524'
page_url = url_open(url)
img_adders = get_imgs(page_url)
print(img_adders)
for each in img_adders:
filename = each.split('/')[-1]
urllib.request.urlretrieve(each,filename)
if __name__ == '__main__':
download()
报错信息:
Traceback (most recent call last):
File "C:/Users/Administrator/AppData/Local/Programs/Python/Python36-32/shunvba.py", line 31, in <module>
download()
File "C:/Users/Administrator/AppData/Local/Programs/Python/Python36-32/shunvba.py", line 24, in download
img_adders = get_imgs(page_url)
File "C:/Users/Administrator/AppData/Local/Programs/Python/Python36-32/shunvba.py", line 14, in get_imgs
html = url_open(url).decode('utf-8')
File "C:/Users/Administrator/AppData/Local/Programs/Python/Python36-32/shunvba.py", line 7, in url_open
req = urllib.request.Request(url)
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 329, in __init__
self.full_url = url
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 355, in full_url
self._parse()
File "C:\Users\Administrator\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 384, in _parse
raise ValueError("unknown url type: %r" % self.full_url)
ValueError: unknown url type: 'b\'\\n<!DOCTYPE html><!--STATUS OK--><html><head><meta name="keywords" content="\\xe7\\x99\ |
|