|
发表于 2020-9-22 12:39:48
|
显示全部楼层
试一下,爬下来,难得会有2、3个有效的免费代理IP
- import requests
- from lxml import etree
- import re
- import time
- class Get_Free_Ip():
- def __init__(self):
- self.url1='http://www.kxdaili.com/dailiip/{}/{}.html'
- self.headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36 Edg/85.0.564.51'}
- def Get_Url1_data(self):
- ip_data_list1=[]
- for type_num in range(0,2):
- for page_num in range(0,10):
- start_url=self.url1.format(type_num+1,page_num+1)
- print(start_url)
- response=requests.get(url=start_url,headers=self.headers)
- html_str=response.content.decode()
- #print(html_str)
- patten='<td>(.*?)</td>'
- ip_data_temp=re.findall(patten,html_str)
- for each in range(0,len(ip_data_temp),7):
- ip_data = {}
- ip_data['IP地址']=ip_data_temp[each]
- ip_data['端口'] = ip_data_temp[each+1]
- ip_data['代理类型'] = ip_data_temp[each+3]
- ip_data_list1.append(ip_data)
- time.sleep(1)
- #print(ip_data_list1)
- return ip_data_list1
- def Change_data(self,ip_data_list):
- new_list=[]
- for each in ip_data_list:
- each['代理类型'] = each['代理类型'].split(',')
- for i in each['代理类型']:
- new_list_temp = {}
- if i=='HTTP':
- new_list_temp['http'] = each['IP地址'] + ':' + each['端口']
- elif i=='HTTPS':
- new_list_temp['https'] = each['IP地址'] + ':' + each['端口']
- new_list.append(new_list_temp)
- print(new_list)
- return new_list
- def Check_httpip(self,new_list):
- '''代理IP地址(高匿)'''
- new_list_active=[]
- for proxy in new_list:
- print(f'开始检测{proxy}...')
- if 'http' in proxy.keys() :
- '''http://icanhazip.com会返回当前的IP地址'''
- try:
- response = requests.get(url='http://icanhazip.com', headers=self.headers, proxies=proxy,timeout=3)
- print(f'返回结果:{response.text}')
- new_list_active.append(proxy)
- time.sleep(1)
- except:
- print(f'{proxy}未返回结果,无效...')
- elif 'https' in proxy.keys() :
- try:
- response = requests.get(url='https://foundation.youdao.com/ip/ipinfo', headers=self.headers, proxies=proxy,timeout=3)
- print(f'返回结果:{response.text}')
- new_list_active.append(proxy)
- time.sleep(1)
- except:
- print(f'{proxy}未返回结果,无效...')
- print(new_list_active)
- if len(new_list_active)>0:
- self.Save_To_Txt(new_list_active)
- else:
- print('无有效免费代理IP地址')
- def Save_To_Txt(self,list):
- with open('免费代理IP地址.txt', mode='w', encoding='utf-8') as f:
- for each in list:
- f.write(str(each))
- f.write('\n')
- def Run(self):
- #http://www.kxdaili.com/dailiip/1/1.html
- ip_data_list1=self.Get_Url1_data()
- new_list=self.Change_data(ip_data_list1)
- self.Check_httpip(new_list)
- if __name__ == '__main__':
- get_freeip=Get_Free_Ip()
- get_freeip.Run()
复制代码 |
|