|
100鱼币
本帖最后由 欣欣celin 于 2016-1-3 13:18 编辑
- import urllib.request as r
- from urllib.error import URLError
- import random
- import re
- iplist = {} #存放IP和端口
- def open_url(url='http://www.xicidaili.com/wn/'):
- req = r.Request(url)
- req.add_header('User-Agent','Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36')
- try:
- page = r.urlopen(req)
- except URLError as e:
- if hasattr(e, 'reason'):
- print('we failed to reach a server,')
- print('reson: ', e.reason)
- elif hasattr(e, 'code'):
- print('the server could not fulfill the request,')
- print('error code:', e.code)
- else:
- html = page.read().decode('utf-8')
- return html
- def get_ip(html):
- p = r'(?:(?:[0,1]?\d?\d|2[0-4]\d|25[0-5])\.){3}(?:[0,1]?\d?\d|2[0-4]\d|25[0-5])'
- #'?:'表示:非bu获组
- #要修正:iplist.append(re.findall(p, html))
- return iplist
- def url_opener(url):
- req = r.Request(url)
- req.add_header('User-Agent','Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36')
- proxies = get_ip(open_url(url))
- proxy = random.choice(proxies)
- proxy_support = r.ProxyHandler({'http':proxy})
- opener = r.build_opener(proxy_support)
- r.install_opener(opener)
-
- try:
- response = r.urlopen(url)
- except URLError as e:
- if hasattr(e, 'reason'):
- print('we failed to reach a server,')
- print('reson: ', e.reason)
- elif hasattr(e, 'code'):
- print('the server could not fulfill the request,')
- print('error code:', e.code)
- else:
- html2 = response.read()
- return html2
- if __name__=='__main__':
- url = input('请输入要打开的网页:')
- url_opener(url)
- print(html2)
-
复制代码 |
|