爬取明日方舟立绘

Narcissus1 · 发表于 2021-3-10 09:25:16

马上注册，结交更多好友，享用更多功能^_^

您需要登录才可以下载或查看，没有账号？立即注册

x

代码写得很烂，看看就行

爬虫基于明日方舟的Wiki：http://prts.wiki/w/%E9%A6%96%E9%A1%B5

import requests
import urllib
import os
from bs4 import BeautifulSoup
pre_url = "http://prts.wiki/w/"
PATH = os.getcwd()+"\\Arknights\"
print(PATH)
def get_res(url):
res = requests.get(url)
if not res:
print(url, "is not a correct url;")
return res
def get_names(html):
soup = BeautifulSoup(html.text, 'html.parser')
_targets = soup.find_all('div', class_='smwdata')
names = []
for each in _targets:
each = each.get("data-cn")
if each:
names.append(each.strip())
return names
def get_url(name):
url = pre_url + urllib.parse.quote(name)
print("正在爬取", name, "的网页，网址为：", url)
return url
def pic_url(url):
html = get_res(url)
soup = BeautifulSoup(html.text, 'html.parser')
images = []
for each in soup.find_all('div', id='charimg'):
pic1 = each.find('div', id='img-stage1')
pic2 = each.find('div', id='img-stage2')
if pic1:
img1 = pic1.find('img').get('src')
images.append(img1)
if pic2:
img2 = pic2.find('img').get('src')
if img2:
images.append(img2)
if images:
print("成功从网页中找到图片链接!")
else:
print("ERROR!!!in", url)
return images
def ins_png(urls, name):
path = PATH + '立绘\\'
if not os.path.exists(path):
os.makedirs(path)
for i in range(len(urls)):
res = requests.get(urls[i])
if res:
if os.path.exists(path+name+'_'+str(i+1)+'.png'):
return
img = open(path+name+'_'+str(i+1)+'.png', 'wb')
img.write(res.content)
print(name, i+1, "已经保存到", path)
img.close()
def main_code(home_url):
html = get_res(home_url)
names = get_names(html)
for name in names:
url = get_url(name)
pics = pic_url(url)
ins_png(pics, name)
print("爬取皮肤函数运行完毕, 请于文件夹中查看\n")
if __name__ == "__main__":
url0 = "http://prts.wiki/w/%E5%B9%B2%E5%91%98%E4%B8%80%E8%A7%88"
main_code(url0)

复制代码

良木 · 发表于 2021-3-12 13:09:52

瞅瞅

账号		自动登录	找回密码
密码			立即注册

[技术交流] 爬取明日方舟立绘

马上注册，结交更多好友，享用更多功能^_^