| 
 | 
 
 
 楼主 |
发表于 2023-8-18 15:16:47
|
显示全部楼层
 
 
 
- import requests
 
 - from pyquery import PyQuery
 
 - from bs4 import BeautifulSoup
 
 - import os
 
 - from selenium import webdriver
 
 - from time import sleep
 
 - from PIL import Image
 
  
- driver = webdriver.Chrome()
 
 - driver.get("https://pvp.qq.com/web201605/herolist.shtml")
 
 - sleep(1) # 等待js加载
 
 - source = driver.page_source
 
 - driver.close()
 
  
- #html = requests.get("https://pvp.qq.com/web201605/herolist.shtml").content
 
 - doc = PyQuery(source)
 
  
- #print(doc)
 
 - items = doc('.herolist > li').items()
 
 - href_datas = {}
 
 - for item in items:
 
 -     url = item.find('img').attr('src')
 
 -     urls = 'https:' + url
 
  
-     name = item.find('a').text()
 
 -     href = item.find('a').attr('href')
 
 -     # href_lists.append(href)
 
 -     href_datas[name] = "https://pvp.qq.com/web201605/" + href
 
 -     '''
 
 -     # os.path.exists(path) 判断文件是否存在 固定语法
 
 -     path = f"E:/Pythonfile/王者荣耀英雄图片"
 
 -     isexists = os.path.exists(path)
 
 -     if not isexists:
 
 -         os.makedirs(path)
 
 -     url_content = requests.get(urls).content
 
 -     print(f"正在爬取{name}图片中...",urls)
 
 -     with open("./王者荣耀英雄图片/" + name + '.jpg','wb') as file:
 
 -         file.write(url_content)
 
 -     '''
 
 - for name,pf_url in href_datas.items():
 
 -     print(name,pf_url)
 
 -     driver = webdriver.Chrome()
 
 -     driver.get(pf_url)
 
 -     sleep(1)  # 等待js加载
 
 -     pf_html = driver.page_source
 
 -     driver.close()
 
 -     # html = requests.get(url).content
 
 -     doc = PyQuery(pf_html)
 
  
-     pf_items = doc('.pic-pf-list.pic-pf-list3>li').items()
 
  
-     for pf_item in pf_items:
 
 -         pf_url = pf_item.find('img').attr('src')
 
 -         pf_urls = 'https:' + pf_url
 
 -         #print(pf_urls)
 
 -         pf_name = pf_item.find('p').text()
 
 -         url_content = requests.get(pf_urls).content
 
 -         print(f"正在爬取{name}_{pf_name}皮肤图片中...", pf_urls)
 
 -       
 
 -         # os.path.exists(path) 判断文件是否存在 固定语法
 
 -         pf_path = f"E:/Pythonfile/王者荣耀皮肤图片"
 
 -         isexists = os.path.exists(pf_path)
 
 -         if not isexists:
 
 -             os.makedirs(pf_path)
 
 -         with open("./王者荣耀皮肤图片/" + name + '_' + pf_name + '.jpg', 'wb') as file:
 
 -             file.write(url_content)
 
  
-         # 打开图片并调整大小
 
 -         img = Image.open(f"{pf_path}/" + name + '_' + pf_name + '.jpg')
 
 -         img = img.resize((500, 500))  # 设置统一的图片大小,修改为你想要的大小
 
 -         img.save(f"{pf_path}/" + name + '_' + pf_name + '.jpg')
 
  复制代码D:\python39\python.exe E:/Pythonfile/herolist.py 
亚连 https://pvp.qq.com/web201605/herodetail/514.shtml 
正在爬取亚连_追忆之刃皮肤图片中... https://game.gtimg.cn/images/yxzj/img201606/heroimg/514/514-smallskin-1.jpg 
姬小满 https://pvp.qq.com/web201605/herodetail/564.shtml 
正在爬取姬小满_武道奇才皮肤图片中... https://game.gtimg.cn/images/yxzj/img201606/heroimg/564/564-smallskin-1.jpg 
正在爬取姬小满_零食大作战皮肤图片中... https://game.gtimg.cn/images/yxzj/img201606/heroimg/564/564-smallskin-2.jpg 
Traceback (most recent call last): 
  File "D:\python39\lib\site-packages\PIL\JpegImagePlugin.py", line 639, in _save 
    rawmode = RAWMODE[im.mode] 
KeyError: 'RGBA' 
 
The above exception was the direct cause of the following exception: 
 
Traceback (most recent call last): 
  File "E:\Pythonfile\herolist.py", line 78, in <module> 
    img = Image.open(f"{pf_path}/" + name + '_' + pf_name + '.jpg') 
  File "D:\python39\lib\site-packages\PIL\Image.py", line 2413, in save 
    save_handler(self, fp, filename) 
  File "D:\python39\lib\site-packages\PIL\JpegImagePlugin.py", line 642, in _save 
    raise OSError(msg) from e 
OSError: cannot write mode RGBA as JPEG  
 
前面的几张图片可以正常保存,后面的报错了OSError: cannot write mode RGBA as JPEG |   
 
 
 
 |