第一次写的爬MM图片,请指教
学python有一段时间了,看到论坛很多人写的爬mm图,今天心血来潮,自己练练手,请指教:爬取的网页为:https://www.mzitu.com/zipai/
import requests
import re
import os.path
import time
def get_one_page(url):#单个网页的html
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0'
}
r = requests.get(url,headers = headers)
if r.status_code == 200:
print('获取成功...')
os.chdir(r'C:\Users\Administrator\Desktop')
return r.text
def parse_one_page(content):#对网页html进行分析,获取图片的地址李彪
patterns = re.compile(r'<p>.*?class="lazy".*?original="(.*?)".*?</p>',re.S)
items = re.findall(patterns,content)
return items
def write_to_file(download_url_list):#下载图片
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:79.0) Gecko/20100101 Firefox/79.0'
}
os.chdir(r'C:\Users\Administrator\Desktop')
if os.path.exists(r'C:\Users\Administrator\Desktop\mzt'):
os.chdir(r'C:\Users\Administrator\Desktop\mzt')
else:
os.mkdir('mzt')
os.chdir(r'C:\Users\Administrator\Desktop\mzt')
for each in download_url_list:
r= requests.get(each,headers = headers)
filename = each[-10:-4]+'.png'
with open(filename,'wb') as f:
f.write(r.content)
def main(url):
html = get_one_page(url)
download_url_list = parse_one_page(html)
write_to_file(download_url_list)
if __name__ == '__main__':
base_url = 'https://www.mzitu.com/zipai/'
for each in range(1,400):
url = base_url +'comment-page-'+str(each)+'/#comments'
main(url)
time.sleep(2)
页:
[1]