|
|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import urllib.request
import http.cookiejar
url = 'https://news.163.com/20/0218/08/F5LFGKKS000189FH.html'
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; rv:73.0) Gecko/20100101 Firefox/73.0",
"Accept-Language": "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2",
"Accept-Encoding": "utf-8",
"Connection":"keep-alive",
"referer" : "http://www.163.com/",
"Accept":"image/webp,*/*"
}
cjar = http.cookiejar.CookieJar()
proxy = urllib.request.ProxyHandler({'http':'127.0.0.1.8888'})
opener = urllib.request.build_opener(proxy,urllib.request.HTTPHandler,urllib.request.HTTPCookieProcessor(cjar))
headall=[]
for key,value in headers.items():
item = (key , value)
headall.append(item)
opener.addheaders = headall
print(opener)
urllib.request.install_opener(opener)
data = urllib.request.urlopen(url).read()
fhandle = open("C:/Users/Administrator/Desktop/网易新闻/1.html","wb")
fhandle.write(data)
fhandle.close()
你爬取的只是这个网页的源代码,它还调用了其他的js文件、css文件和图片。
|
|