爬取明日方舟立绘,Python交流,编程语言专区,鱼C论坛

Narcissus1 发表于 2021-3-10 09:25:16

爬取明日方舟立绘

代码写得很烂，看看就行{:10_254:}

爬虫基于明日方舟的Wiki：http://prts.wiki/w/%E9%A6%96%E9%A1%B5

import requests
import urllib
import os
from bs4 import BeautifulSoup

pre_url = "http://prts.wiki/w/"
PATH = os.getcwd()+"\\Arknights\\"
print(PATH)

def get_res(url):
res = requests.get(url)
if not res:
   print(url, "is not a correct url;")
return res

def get_names(html):
soup = BeautifulSoup(html.text, 'html.parser')
_targets = soup.find_all('div', class_='smwdata')
names = []
for each in _targets:
   each = each.get("data-cn")
   if each:
         names.append(each.strip())
return names

def get_url(name):
url = pre_url + urllib.parse.quote(name)
print("正在爬取", name, "的网页，网址为：", url)
return url

def pic_url(url):
html = get_res(url)
soup = BeautifulSoup(html.text, 'html.parser')
images = []
for each in soup.find_all('div', id='charimg'):
   pic1 = each.find('div', id='img-stage1')
   pic2 = each.find('div', id='img-stage2')
   if pic1:
         img1 = pic1.find('img').get('src')
         images.append(img1)
   if pic2:
         img2 = pic2.find('img').get('src')
         if img2:
            images.append(img2)
if images:
   print("成功从网页中找到图片链接!")
else:
   print("ERROR!!!in", url)
return images

def ins_png(urls, name):
path = PATH + '立绘\\'
if not os.path.exists(path):
   os.makedirs(path)
for i in range(len(urls)):
   res = requests.get(urls)
   if res:
         if os.path.exists(path+name+'_'+str(i+1)+'.png'):
            return
         img = open(path+name+'_'+str(i+1)+'.png', 'wb')
         img.write(res.content)
         print(name, i+1, "已经保存到", path)
         img.close()

def main_code(home_url):
html = get_res(home_url)
names = get_names(html)
for name in names:
   url = get_url(name)
   pics = pic_url(url)
   ins_png(pics, name)
print("爬取皮肤函数运行完毕, 请于文件夹中查看\n")

if __name__ == "__main__":
url0 = "http://prts.wiki/w/%E5%B9%B2%E5%91%98%E4%B8%80%E8%A7%88"
main_code(url0)

良木发表于 2021-3-12 13:09:52

瞅瞅

页: [1]

鱼C论坛's Archiver

爬取明日方舟立绘