求助python
新人求助,为啥我这个打印不出地址啊,结果既不报错,也没有结果,希望有人能帮忙解答一下源码如下
import urllib.request
import re
def url_open(url):
req = urllib.request.Request(url)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3766.400 QQBrowser/10.6.4163.400')
#req.add_header('Referer','http://wwww.mzitu.com')#设置请求头,不加爬下来的都是防盗图
page = urllib.request.urlopen(req)
html = page.read().decode('utf-8')
return html
def get_img(html):
p = r'<img class="lazy" src="[^"]+\.jpg"'
imglist = re.findall(p,html)
for each in imglist:
print(each)
if __name__== '__main__':
url = "https://www.mzitu.com/page/2/"
get_img(url_open(url)) 没人吗{:5_96:}
你正则匹配不到数据,我改了下
参考代码:
import urllib.request
import re
def url_open(url):
req = urllib.request.Request(url)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3766.400 QQBrowser/10.6.4163.400')
# req.add_header('Referer','https://www.mzitu.com') #设置请求头,不加爬下来的都是防盗图
page = urllib.request.urlopen(req)
html = page.read().decode('utf-8')
return html
def get_img(html):
p = r"<img class=.+? .+ data-original=('.+.jpg')"
imglist = re.findall(p,html)
for each in imglist:
print(each)
if __name__== '__main__':
url = "https://www.mzitu.com/page/2/"
get_img(url_open(url))
另外你 Referer 的网址多了个 w ,少了个s 帮你改了
页:
[1]