|
|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
我的代码如下:
- import requests
- import re
- from bs4 import BeautifulSoup
- import random
- import urllib.request
- # 设置代理
- def get_ip(ip_url, headers): # 随机选择一个代理ip
- web_data = requests.get(ip_url, headers = headers)
- soup = BeautifulSoup(web_data.text, 'lxml')
- trs = soup.find_all('tr')
- ip_list = [] # 所有代理ip
- for i in range(2, len(trs)):
- if '服务器地址' not in trs[i].text:
- ip_info = trs[i]
- tds = ip_info.find_all('td')
- ip_list.append(tds[0].text + ':' + tds[1].text)
- return random.choice(ip_list)
- def set_opener(url, headers):
- proxy_handler = urllib.request.ProxyHandler({'http':proxy_ip, 'https':proxy_ip})
- opener = urllib.request.build_opener(proxy_handler)
- request = urllib.request.Request(url, headers = headers)
- urllib.request.install_opener(opener)
- response = urllib.request.urlopen(request)
- html = response.read().decode('utf-8', 'ignore')
- response.close()
- return html
- if __name__ == '__main__':
- ip_url = 'https://cn-proxy.com/'
- url = 'https://baike.baidu.com/item/%E7%BD%91%E7%BB%9C%E7%88%AC%E8%99%AB'
- headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
- proxy_ip = get_ip(ip_url, headers)
- print(set_opener(url, headers))
复制代码 |
|