|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- # -*- coding = utf-8 -*-
- # @清凉
- # @快速获取ip代理并检测获取代理的有效性.py
- import requests
- import pprint
- import parsel
- import time
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36 Edg/81.0.416.68',
- 'Cookie': 'channelid=0; sid=1588905849664960; Hm_lvt_7ed65b1cc4b810e9fd37959c9bb51b31=1588905972; _ga=GA1.2.620440764.1588905972; _gid=GA1.2.1803663967.1588905972; MEIQIA_TRACK_ID=1bbg0JvBZbe016Kxzb1t3ZCj7tC; MEIQIA_VISIT_ID=1bbg0Lh0pypxP2MrSeKFT8zQg8V; _gat=1; Hm_lpvt_7ed65b1cc4b810e9fd37959c9bb51b31=1588906165'}
- iplist = []
- # num=int(input("请输入你想要获取的代理ip数量,建议范围15以上,1000以内"//int(15)))
- num = int(input("请输入你想要获取的代理ip数量,建议范围15以上,1000以内:"))
- num = num // 15 + 2
- timewait = int(input("请输入你想要的速度,1最快,10最慢"))
- for i in range(1, num):
- url = 'https://www.kuaidaili.com/free/inha/{}/'.format(str(i))
- response = requests.get(url, headers=headers, timeout=5)
- data = response.content.decode("utf-8") # 这个地方出过错
- html = parsel.Selector(data)
- temp = html.xpath(
- "//table[@class='table table-bordered table-striped']/tbody/tr")
- for item in temp:
- ipdata = {}
- ip_class = item.xpath('./td[4]/text()').extract_first()
- ip_num = item.xpath('./td[1]/text()').extract_first()
- ip_port = item.xpath('./td[2]/text()').extract_first()
- ip_add = item.xpath('./td[5]/text()').extract_first()
- ipdata[ip_class] = ip_num + ':' + ip_port
- iplist.append(ipdata)
- time.sleep(timewait)
- print("第{}页已经获取完毕".format(i))
- # print(ip_class,ip_num,ip_port,ip_add)
- print(len(iplist))
- print("*" * 100)
- # print(html)
- def checkbaidu(iplist):
- '''快速检测代理有效性'''
- global can_use
- can_use = []
- for ip in iplist:
- try:
- temp = requests.get(
- 'https://www.baidu.com',
- headers=headers,
- timeout=2,
- proxies=ip)
- if temp.status_code == 200:
- can_use.append(ip)
- except BaseException:
- print(ip, "不能使用或超时了")
- print("一共有{}个可以有效使用".format(len(can_use)))
- print("##" * 20)
- checkbaidu(iplist)
- print("##" * 20)
- print("他们分别是:")
- print(can_use)
复制代码
效果如下,希望指点
|
|