dongli2020 发表于 2020-8-25 22:52:22

第一次写的爬MM图片,请指教

学python有一段时间了,看到论坛很多人写的爬mm图,今天心血来潮,自己练练手,请指教:
爬取的网页为:https://www.mzitu.com/zipai/

import requests
import re
import os.path
import time

def get_one_page(url):#单个网页的html
    headers = {
      'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0'
      }
    r = requests.get(url,headers = headers)
    if r.status_code == 200:
      print('获取成功...')
      os.chdir(r'C:\Users\Administrator\Desktop')   
      return r.text
      

def parse_one_page(content):#对网页html进行分析,获取图片的地址李彪
    patterns = re.compile(r'<p>.*?class="lazy".*?original="(.*?)".*?</p>',re.S)
    items = re.findall(patterns,content)
    return items

def write_to_file(download_url_list):#下载图片
    headers = {
      'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0'
      }
    os.chdir(r'C:\Users\Administrator\Desktop')
    if os.path.exists(r'C:\Users\Administrator\Desktop\mzt'):
      os.chdir(r'C:\Users\Administrator\Desktop\mzt')
    else:
      os.mkdir('mzt')
      os.chdir(r'C:\Users\Administrator\Desktop\mzt')
    for each in download_url_list:
      r= requests.get(each,headers = headers)
      filename = each[-10:-4]+'.png'
      with open(filename,'wb') as f:
            f.write(r.content)
def main(url):
    html = get_one_page(url)
    download_url_list = parse_one_page(html)
    write_to_file(download_url_list)

if __name__ == '__main__':
    base_url = 'https://www.mzitu.com/zipai/'
    for each in range(1,400):
      url = base_url +'comment-page-'+str(each)+'/#comments'
      main(url)
      time.sleep(2)
   
页: [1]
查看完整版本: 第一次写的爬MM图片,请指教