爬虫代理求助
import requestsif __name__ == '__main__':
url='https://www.baidu.com/s?ie=utf-8&f=8&rsv_bp=1&tn=baidu&wd=ip&oq=%25E8%25A5%25BF%25E5%2588%25BA%25E4%25BB%25A3%25E7%2590%2586&rsv_pq=8cad34a90000513c&rsv_t=e257huE%2FVv1N529AfMbS8aP6bOMYKmEd5rwbqhAZnkKfReOVyfMe52kWcDU&rqlang=cn&rsv_enter=1&rsv_dl=tb&rsv_btype=t&inputT=750&rsv_sug3=48&rsv_sug1=39&rsv_sug7=100&rsv_sug2=0&rsv_sug4=750'
headers={
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Mobile Safari/537.36'
}
page_data=requests.get(url=url,headers=headers,proxies={"https":"181.30.220.116:8080"}).text
with open('aa.html','w') as fp:
fp.write(page_data)
C:\Users\liu\AppData\Local\Programs\Python\Python39\python.exe C:/Users/liu/PycharmProjects/pythonProject1/venv/爬虫/test.py
Traceback (most recent call last):
File "C:\Users\liu\AppData\Local\Programs\Python\Python39\lib\site-packages\requests\adapters.py", line 412, in send
conn = self.get_connection(request.url, proxies)
File "C:\Users\liu\AppData\Local\Programs\Python\Python39\lib\site-packages\requests\adapters.py", line 309, in get_connection
proxy_manager = self.proxy_manager_for(proxy)
File "C:\Users\liu\AppData\Local\Programs\Python\Python39\lib\site-packages\requests\adapters.py", line 193, in proxy_manager_for
manager = self.proxy_manager = proxy_from_url(
File "C:\Users\liu\AppData\Local\Programs\Python\Python39\lib\site-packages\urllib3\poolmanager.py", line 536, in proxy_from_url
return ProxyManager(proxy_url=url, **kw)
File "C:\Users\liu\AppData\Local\Programs\Python\Python39\lib\site-packages\urllib3\poolmanager.py", line 480, in __init__
raise ProxySchemeUnknown(proxy.scheme)
urllib3.exceptions.ProxySchemeUnknown: Proxy URL had no scheme, should start with http:// or https://
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\liu\PycharmProjects\pythonProject1\venv\爬虫\test.py", line 9, in <module>
page_data=requests.get(url=url,headers=headers,proxies={"https":"181.30.220.116:8080"}).text
File "C:\Users\liu\AppData\Local\Programs\Python\Python39\lib\site-packages\requests\api.py", line 76, in get
return request('get', url, params=params, **kwargs)
File "C:\Users\liu\AppData\Local\Programs\Python\Python39\lib\site-packages\requests\api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\liu\AppData\Local\Programs\Python\Python39\lib\site-packages\requests\sessions.py", line 542, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\liu\AppData\Local\Programs\Python\Python39\lib\site-packages\requests\sessions.py", line 655, in send
r = adapter.send(request, **kwargs)
File "C:\Users\liu\AppData\Local\Programs\Python\Python39\lib\site-packages\requests\adapters.py", line 414, in send
raise InvalidURL(e, request=request)
requests.exceptions.InvalidURL: Proxy URL had no scheme, should start with http:// or https://
为啥会报以上错误呢,是代理失效了吗 page_data=requests.get(url=url,headers=headers,proxies={"https":"181.30.220.116:8080"}).text
这里改成https://试试看,因为报错的是should start with http:// or https://
页:
[1]