py3环境
import re
import urllib.request
def getHtml(url):
page = urllib .request.urlopen(url)
html = page.read()
# print(html)
return html
def getImg(html):
# reg = 'src="(.*?\.jpg)"'
# imgre = re.compile(reg)
imglist = re.findall('src="(.*?\.jpg)"',html.decode('utf-8'))
for imgurl in imglist:
urllib.request.urlretrieve(imgurl,'1.jpg')
html = getHtml("https://tieba.baidu.com/p/3740796143#!/l/p1")
print(getImg(html))
遇到两个报错:
AttributeError: module 'urllib' has no attribute 'urlretrieve'
通过 urllib.request.urlretrieve() 解决
TypeError: cannot use a string pattern on a bytes-like object
通过 html.decode('utf-8') 解决 |