|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 冰封雪舞 于 2018-1-23 13:24 编辑
import urllib.request
import re
def open_url(url):
req = urllib.request.Request(url)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36')
page=urllib.request.urlopen(req)
html=page.read().decode('utf-8')
return html
def get_img(html):
p = r'<img class="BDE_Image" src = "([^"]+\.jpg"'
imglist = re.findall(p,html)
for each in imglist:
print(each)
if __name__=='__main__':
url = 'http://tieba.baidu.com/p/3563409202'
get_img(open_url(url))
按照老师说的打的代码但是出现错误,错误如下:不知道错误出在哪里?希望大神指点迷津。
======================== RESTART: D:/Python34/下载图片.py ========================
Traceback (most recent call last):
File "D:/Python34/下载图片.py", line 18, in <module>
get_img(open_url(url))
File "D:/Python34/下载图片.py", line 11, in get_img
imglist = re.findall(p,html)
File "D:\Python34\lib\re.py", line 210, in findall
return _compile(pattern, flags).findall(string)
File "D:\Python34\lib\re.py", line 294, in _compile
p = sre_compile.compile(pattern, flags)
File "D:\Python34\lib\sre_compile.py", line 568, in compile
p = sre_parse.parse(p, flags)
File "D:\Python34\lib\sre_parse.py", line 780, in parse
p = _parse_sub(source, pattern, 0)
File "D:\Python34\lib\sre_parse.py", line 377, in _parse_sub
itemsappend(_parse(source, state))
File "D:\Python34\lib\sre_parse.py", line 730, in _parse
raise error("unbalanced parenthesis")
sre_constants.error: unbalanced parenthesis
>>>
p = r'<img class="BDE_Image" src = "([^"]+\.jpg"' 少了个右括号。
|
|