|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import urllib.request
import re
def open_url(url):
req=urllib.request.Request(url)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.6788.400 QQBrowser/10.3.2843.400')
page=urllib.request.urlopen(req)
html=page.read().decode('utf-8')
return html
def get_img(html):
p=r'<img src="([^"]+\.jpeg)"'
imglist=re.findall(p,html)
for each in imglist:
print(each)
if __name__=='__main__':
url="http://pic.sogou.com/d?query=%CD%F5%D2%BB%B2%A9%B1%DA%D6%BD&st=255&mode=13&dm=4&cwidth=1280&cheight=720&did=18#did17"
get_img(open_url(url))
Traceback (most recent call last):
File "C:\Users\lenovo\Desktop\tieba.py", line 26, in <module>
get_img(open_url(url))
File "C:\Users\lenovo\Desktop\tieba.py", line 8, in open_url
html=page.read().decode('utf-8')
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xbb in position 880: invalid start byte |
|