学了小甲鱼的正则学以致用,抓取代理ip,:lol:有什么可以优化的还要请小甲鱼老师指导import urllib.request
import re
head = {}
head['User-Agent']='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'
data = {}
url = 'http://cn-proxy.com/'
data = urllib.parse.urlencode(data).encode('utf-8')
req = urllib.request.Request(url,data,head)
response = urllib.request.urlopen(req)
html = response.read().decode('utf-8')
listip = re.findall(r"<tr>.+?</tr>",html,re.S)
#print(listip)
for each in listip:
li = re.findall(r"<td>.+?</td>",each,re.S)
if len(li):
ip = re.search(r'(([0-9]{1,3}\.){3}([0-9]{1,3}))',li[0])
if ip:
port = re.search(r'[0-9]{1,5}',li[1])
print(ip.group(0)+":"+port.group(0))
|