快速获取ip代理并检测获取代理的有效性
# -*- coding = utf-8 -*-# @清凉
# @快速获取ip代理并检测获取代理的有效性.py
import requests
import pprint
import parsel
import time
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36 Edg/81.0.416.68',
'Cookie': 'channelid=0; sid=1588905849664960; Hm_lvt_7ed65b1cc4b810e9fd37959c9bb51b31=1588905972; _ga=GA1.2.620440764.1588905972; _gid=GA1.2.1803663967.1588905972; MEIQIA_TRACK_ID=1bbg0JvBZbe016Kxzb1t3ZCj7tC; MEIQIA_VISIT_ID=1bbg0Lh0pypxP2MrSeKFT8zQg8V; _gat=1; Hm_lpvt_7ed65b1cc4b810e9fd37959c9bb51b31=1588906165'}
iplist = []
# num=int(input("请输入你想要获取的代理ip数量,建议范围15以上,1000以内"//int(15)))
num = int(input("请输入你想要获取的代理ip数量,建议范围15以上,1000以内:"))
num = num // 15 + 2
timewait = int(input("请输入你想要的速度,1最快,10最慢"))
for i in range(1, num):
url = 'https://www.kuaidaili.com/free/inha/{}/'.format(str(i))
response = requests.get(url, headers=headers, timeout=5)
data = response.content.decode("utf-8")# 这个地方出过错
html = parsel.Selector(data)
temp = html.xpath(
"//table[@class='table table-bordered table-striped']/tbody/tr")
for item in temp:
ipdata = {}
ip_class = item.xpath('./td/text()').extract_first()
ip_num = item.xpath('./td/text()').extract_first()
ip_port = item.xpath('./td/text()').extract_first()
ip_add = item.xpath('./td/text()').extract_first()
ipdata = ip_num + ':' + ip_port
iplist.append(ipdata)
time.sleep(timewait)
print("第{}页已经获取完毕".format(i))
# print(ip_class,ip_num,ip_port,ip_add)
print(len(iplist))
print("*" * 100)
# print(html)
def checkbaidu(iplist):
'''快速检测代理有效性'''
global can_use
can_use = []
for ip in iplist:
try:
temp = requests.get(
'https://www.baidu.com',
headers=headers,
timeout=2,
proxies=ip)
if temp.status_code == 200:
can_use.append(ip)
except BaseException:
print(ip, "不能使用或超时了")
print("一共有{}个可以有效使用".format(len(can_use)))
print("##" * 20)
checkbaidu(iplist)
print("##" * 20)
print("他们分别是:")
print(can_use)
效果如下,希望指点
页:
[1]