|

楼主 |
发表于 2023-8-18 15:16:47
|
显示全部楼层
- import requests
- from pyquery import PyQuery
- from bs4 import BeautifulSoup
- import os
- from selenium import webdriver
- from time import sleep
- from PIL import Image
- driver = webdriver.Chrome()
- driver.get("https://pvp.qq.com/web201605/herolist.shtml")
- sleep(1) # 等待js加载
- source = driver.page_source
- driver.close()
- #html = requests.get("https://pvp.qq.com/web201605/herolist.shtml").content
- doc = PyQuery(source)
- #print(doc)
- items = doc('.herolist > li').items()
- href_datas = {}
- for item in items:
- url = item.find('img').attr('src')
- urls = 'https:' + url
- name = item.find('a').text()
- href = item.find('a').attr('href')
- # href_lists.append(href)
- href_datas[name] = "https://pvp.qq.com/web201605/" + href
- '''
- # os.path.exists(path) 判断文件是否存在 固定语法
- path = f"E:/Pythonfile/王者荣耀英雄图片"
- isexists = os.path.exists(path)
- if not isexists:
- os.makedirs(path)
- url_content = requests.get(urls).content
- print(f"正在爬取{name}图片中...",urls)
- with open("./王者荣耀英雄图片/" + name + '.jpg','wb') as file:
- file.write(url_content)
- '''
- for name,pf_url in href_datas.items():
- print(name,pf_url)
- driver = webdriver.Chrome()
- driver.get(pf_url)
- sleep(1) # 等待js加载
- pf_html = driver.page_source
- driver.close()
- # html = requests.get(url).content
- doc = PyQuery(pf_html)
- pf_items = doc('.pic-pf-list.pic-pf-list3>li').items()
- for pf_item in pf_items:
- pf_url = pf_item.find('img').attr('src')
- pf_urls = 'https:' + pf_url
- #print(pf_urls)
- pf_name = pf_item.find('p').text()
- url_content = requests.get(pf_urls).content
- print(f"正在爬取{name}_{pf_name}皮肤图片中...", pf_urls)
-
- # os.path.exists(path) 判断文件是否存在 固定语法
- pf_path = f"E:/Pythonfile/王者荣耀皮肤图片"
- isexists = os.path.exists(pf_path)
- if not isexists:
- os.makedirs(pf_path)
- with open("./王者荣耀皮肤图片/" + name + '_' + pf_name + '.jpg', 'wb') as file:
- file.write(url_content)
- # 打开图片并调整大小
- img = Image.open(f"{pf_path}/" + name + '_' + pf_name + '.jpg')
- img = img.resize((500, 500)) # 设置统一的图片大小,修改为你想要的大小
- img.save(f"{pf_path}/" + name + '_' + pf_name + '.jpg')
复制代码D:\python39\python.exe E:/Pythonfile/herolist.py
亚连 https://pvp.qq.com/web201605/herodetail/514.shtml
正在爬取亚连_追忆之刃皮肤图片中... https://game.gtimg.cn/images/yxzj/img201606/heroimg/514/514-smallskin-1.jpg
姬小满 https://pvp.qq.com/web201605/herodetail/564.shtml
正在爬取姬小满_武道奇才皮肤图片中... https://game.gtimg.cn/images/yxzj/img201606/heroimg/564/564-smallskin-1.jpg
正在爬取姬小满_零食大作战皮肤图片中... https://game.gtimg.cn/images/yxzj/img201606/heroimg/564/564-smallskin-2.jpg
Traceback (most recent call last):
File "D:\python39\lib\site-packages\PIL\JpegImagePlugin.py", line 639, in _save
rawmode = RAWMODE[im.mode]
KeyError: 'RGBA'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "E:\Pythonfile\herolist.py", line 78, in <module>
img = Image.open(f"{pf_path}/" + name + '_' + pf_name + '.jpg')
File "D:\python39\lib\site-packages\PIL\Image.py", line 2413, in save
save_handler(self, fp, filename)
File "D:\python39\lib\site-packages\PIL\JpegImagePlugin.py", line 642, in _save
raise OSError(msg) from e
OSError: cannot write mode RGBA as JPEG
前面的几张图片可以正常保存,后面的报错了OSError: cannot write mode RGBA as JPEG |
|