打印html,Python交流,编程语言专区,鱼C论坛

cyy6666 发表于 2021-2-12 22:36:06

打印html

本帖最后由 cyy6666 于 2021-2-16 03:17 编辑

import urllib.request
import urllib.parse
import urllib.error
import json
import time
import random
import os
import re
import gzip
'''html中以"b’\x1f\x8b\x08"开头的，说明它是gzip压缩过的数据'''
'''不是所有网页都用utf-8'''

def url_open(url,data=None):
head={}
head['User-Agent']='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36 Edg/87.0.664.75'
req=urllib.request.Request(url,data,head)
response=urllib.request.urlopen(req)
html=response.read()
'''html=gzip.decompress(html)'''
return html

'''收集ip'''
def get_ip():
url=''
html=url_open(url).decode('utf-8')
p=r'(?:(?:25|2\d|{0,1}\d{0,1}\d)\.){3}(?:25|2\d|{0,1}\d{0,1}\d):[^@]{1,}'
iplist=re.findall(p,html)
return iplist

'''代理'''
def proxy_url_open(url,data=None):
ip=get_ip()
while True:
   try:
         proxy_support=urllib.request.ProxyHandler({'http':random.choice(ip)})
         '''创建opener'''
         opener=urllib.request.build_opener(proxy_support)
         '''隐藏'''
         opener.addheaders=[('User-Agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36 Edg/87.0.664.75')]
         '''安装opener'''
         response=opener.open(url,data)
   except:
         pass
   else:
         break
html=response.read()
'''html=gzip.decompress(html)'''
return html

'''打印网页'''
def print_(url,proxy=0):
if proxy==0:
   html=url_open(url).decode('utf-8')
else:
   html=proxy_url_open(url).decode('utf-8')
print(html)

if __name__=='__main__':
while True:
url=input('请输入网站(按0退出):')
   if url=='0':
         break
   proxy=input('是否使用代理(0不使用,1使用):')
   proxy=int(proxy)
   print_(url,proxy)

出现错误，可能是html文件需要解压，也可能是编码格式错误
新人，请大佬指教{:10_298:}

页: [1]

鱼C论坛's Archiver

打印html