| 
 | 
 
5鱼币 
 
import urllib.request 
 
def download_html(url): 
 
    header = { 
        "User-Agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64)" 
        "AppleWebKit/537.36 (KHTML, like Gecko)" 
        "Chrome/90.0.4430.85 Safari/537.36" 
 
    } 
 
    req = urllib.request.Request(url = url, headers = header) 
 
    response = urllib.request.urlopen(req) 
 
    html = response.read().decode("utf-8") 
 
    return html 
 
html = duwnload_html("https://movie.douban.com/top250") 
 
import re 
 
pattern = 'https://movie.douban.com/subject/[0-9]+/' 
 
urls = re.findall(pattern,html) 
 
urls = set(urls) 
 
print("urls count=%d"%(len(urls))) 
for url in urls: 
    print(url) 
改好之后我这运行正常
 
 
 
 
 
 |   
 
 
 
 
 
 |