飘雨之城 发表于 2021-3-19 23:30:13

煎蛋网下图片

网址有改版 不能用page来了 不过可以用 下一页这个链接的来跳转
import requests
from bs4 import BeautifulSoup
import os
import random

def open_url(url):
    headers ={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.90 Safari/537.36'}
    proxies =['103.233.152.140:8080','106.14.198.6:8080','118.190.152.166:1080']
    proxy = {'https':random.choice(proxies)}
   
    res = requests.get(url,headers=headers,proxies=proxy)
    return res

def get_next(url):
    html = open_url(url).text
    soup = BeautifulSoup(html,'html.parser')
    try:
      target_url=''.join(['http:',soup.find(title='Older Comments')['href']])
    except:
      return None
    return target_url

def find_img(url):
    img_list=[]
    html = open_url(url).text
    soup = BeautifulSoup(html,'html.parser')
    for each in soup.find_all('img'):
         img_list.append(each['src'])
    return img_list
      

def save_img(folder,img_addrs):
    for each in img_addrs:
      filename = each.split('/')[-1]
      with open(filename,'wb') as f:
            img = open_url('http:'+each).content
            f.write(img)
   
def download_mm(folder='美女',page=10):
    print('正在下载美女图片。。。。。')
    if not os.path.exists(folder):
      os.mkdir(folder)
    os.chdir(folder)
    img_addrs=[]
    url='http://jandan.net/ooxx'
    for i in range(page):
      img_addrs=find_img(url)
      save_img(folder,img_addrs)
      url = get_next(url)
      if not url:
            break
    print('下载结束!!!!!')

if __name__ == '__main__':
    download_mm()
    os.system('pause')
页: [1]
查看完整版本: 煎蛋网下图片