|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import urllib.request
import urllib.parse
import re
def movietop():
#获取网页源代码
url = 'https://movie.douban.com/typerank?type_name=%E7%A7%91%E5%B9%BB%E7%89%87&type=17&interval_id=100:90&action=playable'
response = urllib.request.Request(url)
response.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400')
html = urllib.request.urlopen(response).read().decode('utf-8')
#print(html)
#<a href="https://movie.douban.com/subject/3541415/" target="_blank">盗梦空间</a>
reg=r'<div class="movie-name"><a href="(.*?)" target="_blank">(.*?)</a></div>'
chapt_url = re.findall(reg,html)
print(chapt_url)
movietop()
为什么这里输出chapt_url 为空列表?
跪求大神讲解
|
|