pythonlbc 发表于 2022-9-1 20:31:52

【作品分享】妹子图片硬盘爬满!

import requests
from bs4 import BeautifulSoup
import re
import time
url = 'https://pic.netbian.com/new/index.html'
url1 = 'https://pic.netbian.com'
header = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36"
}
def p(n):
    resp = requests.get(url,headers=header)
    resp.encoding = 'utf-8'
    page = resp.text
    soup = BeautifulSoup(page,'html.parser')
    result1 = soup.find('ul',class_="clearfix").find_all('a')#搜索后的源代码1,find_all(把找到的a标签形成一个列表)
    for i in result1:
      new_url = i.get('href')
      url2 = url1 + new_url
      #print(url2) #测试
      last_resp = requests.get(str(url2),headers=header)
      last_resp.encoding = 'utf-8'
      last_page = last_resp.text
      last_soup = BeautifulSoup(last_page, 'html.parser')
      result = last_soup.find('div',class_="photo-pic").find_all("img")
      for i in result:
            lastlast=i.get('src')
            lastlast= url1+lastlast
            #print(lastlast) #测试
      time.sleep(0.5)
      file_name = '%d.jpg' % n
      with open('D://爬取的壁纸/%s' % file_name,'wb') as f:
            img_content = requests.get(lastlast,headers = header)
            f.write(img_content.content)
            print("第%d张over!" % n)
      n += 1
    print('20张结束')
p(1)
m = 2
x = 21
while x < 3000:
    url='https://pic.netbian.com/new/index_%d.html' % m
    p(x)
    m += 1
    x +=20

ldjan 发表于 2022-10-4 21:41:00

爬了两张就报错了
Traceback (most recent call last):
File "D:\Programs\Python\Python310\lib\site-packages\urllib3\connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
File "D:\Programs\Python\Python310\lib\site-packages\urllib3\connectionpool.py", line 386, in _make_request
    self._validate_conn(conn)
File "D:\Programs\Python\Python310\lib\site-packages\urllib3\connectionpool.py", line 1042, in _validate_conn
    conn.connect()
File "D:\Programs\Python\Python310\lib\site-packages\urllib3\connection.py", line 414, in connect
    self.sock = ssl_wrap_socket(
File "D:\Programs\Python\Python310\lib\site-packages\urllib3\util\ssl_.py", line 449, in ssl_wrap_socket
    ssl_sock = _ssl_wrap_socket_impl(
File "D:\Programs\Python\Python310\lib\site-packages\urllib3\util\ssl_.py", line 493, in _ssl_wrap_socket_impl
    return ssl_context.wrap_socket(sock, server_hostname=server_hostname)
File "D:\Programs\Python\Python310\lib\ssl.py", line 513, in wrap_socket
    return self.sslsocket_class._create(
File "D:\Programs\Python\Python310\lib\ssl.py", line 1071, in _create
    self.do_handshake()
File "D:\Programs\Python\Python310\lib\ssl.py", line 1342, in do_handshake
    self._sslobj.do_handshake()
ssl.SSLError: length mismatch (_ssl.c:997)
During handling of the above exception, another exception occurred:
页: [1]
查看完整版本: 【作品分享】妹子图片硬盘爬满!