【作品分享】妹子图片硬盘爬满!
import requestsfrom bs4 import BeautifulSoup
import re
import time
url = 'https://pic.netbian.com/new/index.html'
url1 = 'https://pic.netbian.com'
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"
}
def p(n):
resp = requests.get(url,headers=header)
resp.encoding = 'utf-8'
page = resp.text
soup = BeautifulSoup(page,'html.parser')
result1 = soup.find('ul',class_="clearfix").find_all('a')#搜索后的源代码1,find_all(把找到的a标签形成一个列表)
for i in result1:
new_url = i.get('href')
url2 = url1 + new_url
#print(url2) #测试
last_resp = requests.get(str(url2),headers=header)
last_resp.encoding = 'utf-8'
last_page = last_resp.text
last_soup = BeautifulSoup(last_page, 'html.parser')
result = last_soup.find('div',class_="photo-pic").find_all("img")
for i in result:
lastlast=i.get('src')
lastlast= url1+lastlast
#print(lastlast) #测试
time.sleep(0.5)
file_name = '%d.jpg' % n
with open('D://爬取的壁纸/%s' % file_name,'wb') as f:
img_content = requests.get(lastlast,headers = header)
f.write(img_content.content)
print("第%d张over!" % n)
n += 1
print('20张结束')
p(1)
m = 2
x = 21
while x < 3000:
url='https://pic.netbian.com/new/index_%d.html' % m
p(x)
m += 1
x +=20 爬了两张就报错了
Traceback (most recent call last):
File "D:\Programs\Python\Python310\lib\site-packages\urllib3\connectionpool.py", line 703, in urlopen
httplib_response = self._make_request(
File "D:\Programs\Python\Python310\lib\site-packages\urllib3\connectionpool.py", line 386, in _make_request
self._validate_conn(conn)
File "D:\Programs\Python\Python310\lib\site-packages\urllib3\connectionpool.py", line 1042, in _validate_conn
conn.connect()
File "D:\Programs\Python\Python310\lib\site-packages\urllib3\connection.py", line 414, in connect
self.sock = ssl_wrap_socket(
File "D:\Programs\Python\Python310\lib\site-packages\urllib3\util\ssl_.py", line 449, in ssl_wrap_socket
ssl_sock = _ssl_wrap_socket_impl(
File "D:\Programs\Python\Python310\lib\site-packages\urllib3\util\ssl_.py", line 493, in _ssl_wrap_socket_impl
return ssl_context.wrap_socket(sock, server_hostname=server_hostname)
File "D:\Programs\Python\Python310\lib\ssl.py", line 513, in wrap_socket
return self.sslsocket_class._create(
File "D:\Programs\Python\Python310\lib\ssl.py", line 1071, in _create
self.do_handshake()
File "D:\Programs\Python\Python310\lib\ssl.py", line 1342, in do_handshake
self._sslobj.do_handshake()
ssl.SSLError: length mismatch (_ssl.c:997)
During handling of the above exception, another exception occurred:
页:
[1]