试一下,爬下来,难得会有2、3个有效的免费代理IPimport requests
from lxml import etree
import re
import time
class Get_Free_Ip():
def __init__(self):
self.url1='http://www.kxdaili.com/dailiip/{}/{}.html'
self.headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36 Edg/85.0.564.51'}
def Get_Url1_data(self):
ip_data_list1=[]
for type_num in range(0,2):
for page_num in range(0,10):
start_url=self.url1.format(type_num+1,page_num+1)
print(start_url)
response=requests.get(url=start_url,headers=self.headers)
html_str=response.content.decode()
#print(html_str)
patten='<td>(.*?)</td>'
ip_data_temp=re.findall(patten,html_str)
for each in range(0,len(ip_data_temp),7):
ip_data = {}
ip_data['IP地址']=ip_data_temp[each]
ip_data['端口'] = ip_data_temp[each+1]
ip_data['代理类型'] = ip_data_temp[each+3]
ip_data_list1.append(ip_data)
time.sleep(1)
#print(ip_data_list1)
return ip_data_list1
def Change_data(self,ip_data_list):
new_list=[]
for each in ip_data_list:
each['代理类型'] = each['代理类型'].split(',')
for i in each['代理类型']:
new_list_temp = {}
if i=='HTTP':
new_list_temp['http'] = each['IP地址'] + ':' + each['端口']
elif i=='HTTPS':
new_list_temp['https'] = each['IP地址'] + ':' + each['端口']
new_list.append(new_list_temp)
print(new_list)
return new_list
def Check_httpip(self,new_list):
'''代理IP地址(高匿)'''
new_list_active=[]
for proxy in new_list:
print(f'开始检测{proxy}...')
if 'http' in proxy.keys() :
'''http://icanhazip.com会返回当前的IP地址'''
try:
response = requests.get(url='http://icanhazip.com', headers=self.headers, proxies=proxy,timeout=3)
print(f'返回结果:{response.text}')
new_list_active.append(proxy)
time.sleep(1)
except:
print(f'{proxy}未返回结果,无效...')
elif 'https' in proxy.keys() :
try:
response = requests.get(url='https://foundation.youdao.com/ip/ipinfo', headers=self.headers, proxies=proxy,timeout=3)
print(f'返回结果:{response.text}')
new_list_active.append(proxy)
time.sleep(1)
except:
print(f'{proxy}未返回结果,无效...')
print(new_list_active)
if len(new_list_active)>0:
self.Save_To_Txt(new_list_active)
else:
print('无有效免费代理IP地址')
def Save_To_Txt(self,list):
with open('免费代理IP地址.txt', mode='w', encoding='utf-8') as f:
for each in list:
f.write(str(each))
f.write('\n')
def Run(self):
#http://www.kxdaili.com/dailiip/1/1.html
ip_data_list1=self.Get_Url1_data()
new_list=self.Change_data(ip_data_list1)
self.Check_httpip(new_list)
if __name__ == '__main__':
get_freeip=Get_Free_Ip()
get_freeip.Run()
|