import requests
from pyquery import PyQuery
from bs4 import BeautifulSoup
import os
from selenium import webdriver
from time import sleep
from PIL import Image
driver = webdriver.Chrome()
driver.get("https://pvp.qq.com/web201605/herolist.shtml")
sleep(1) # 等待js加载
source = driver.page_source
driver.close()
#html = requests.get("https://pvp.qq.com/web201605/herolist.shtml").content
doc = PyQuery(source)
#print(doc)
items = doc('.herolist > li').items()
href_datas = {}
for item in items:
url = item.find('img').attr('src')
urls = 'https:' + url
name = item.find('a').text()
href = item.find('a').attr('href')
# href_lists.append(href)
href_datas[name] = "https://pvp.qq.com/web201605/" + href
'''
# os.path.exists(path) 判断文件是否存在 固定语法
path = f"E:/Pythonfile/王者荣耀英雄图片"
isexists = os.path.exists(path)
if not isexists:
os.makedirs(path)
url_content = requests.get(urls).content
print(f"正在爬取{name}图片中...",urls)
with open("./王者荣耀英雄图片/" + name + '.jpg','wb') as file:
file.write(url_content)
'''
for name,pf_url in href_datas.items():
print(name,pf_url)
driver = webdriver.Chrome()
driver.get(pf_url)
sleep(1) # 等待js加载
pf_html = driver.page_source
driver.close()
# html = requests.get(url).content
doc = PyQuery(pf_html)
pf_items = doc('.pic-pf-list.pic-pf-list3>li').items()
for pf_item in pf_items:
pf_url = pf_item.find('img').attr('src')
pf_urls = 'https:' + pf_url
#print(pf_urls)
pf_name = pf_item.find('p').text()
url_content = requests.get(pf_urls).content
print(f"正在爬取{name}_{pf_name}皮肤图片中...", pf_urls)
# 调整分辨率为统一大小
image = Image.open(BytesIO(url_content))
image = image.resize((500, 500)) # 设置统一的图片尺寸,这里假设设置为500x500
url_content = image.tobytes()
# os.path.exists(path) 判断文件是否存在 固定语法
pf_path = f"E:/Pythonfile/王者荣耀皮肤图片"
isexists = os.path.exists(pf_path)
if not isexists:
os.makedirs(pf_path)
with open("./王者荣耀皮肤图片/" + name + '_' + pf_name + '.jpg', 'wb') as file:
file.write(url_content)
# 打开图片
img = Image.open(file.name)
# 统一图片大小
img = img.resize((800, 600))
# 保存修改后的图片
img.save(file.name)
D:\python39\python.exe E:/Pythonfile/herolist.py
亚连 https://pvp.qq.com/web201605/herodetail/514.shtml
正在爬取亚连_追忆之刃皮肤图片中... https://game.gtimg.cn/images/yxzj/img201606/heroimg/514/514-smallskin-1.jpg
Traceback (most recent call last):
File "E:\Pythonfile\herolist.py", line 76, in <module>
img = Image.open(file.name)
File "D:\python39\lib\site-packages\PIL\Image.py", line 3023, in open
raise UnidentifiedImageError(
PIL.UnidentifiedImageError: cannot identify image file './王者荣耀皮肤图片/亚连_追忆之刃.jpg'
报错了 |