import requests
from selenium import webdriver
from pyquery import PyQuery
import re
import base64
word = input('输入爬取的单词图片:')
driver = webdriver.Edge()
url = 'https://www.google.com.hk/search?q=%s' % word
driver.get(url)
# input(":")
html = PyQuery(driver.page_source)
for item in html('.hdtb-mitem').items():
if item.text() in ['Image', '图片']:
image_page = 'https://www.google.com.hk/' + item('a').attr('href')
driver.get(image_page)
# input(":")
img = driver.page_source
length = 0
for i in PyQuery(img)('#islrg div div a div img').items():
img_name = '%s.jpg' % word
if not i.attr('src'):
code = requests.get(i.attr('data-src')).content
else:
if str(i.attr('src')).startswith('http'):
code = requests.get(i.attr('src')).content
else:
code = base64.b64decode(re.sub(r'data:image/.+?;base64,', '', str(i.attr('src'))))
with open(img_name, 'wb') as f:
f.write(code)
break
记得安装pyquery库,还有第九行的webdriver.Edge()记得传入驱动路径,我是因为驱动在环境变量里设置了才不用传入路径
你也可以用webdriver.Chrome,都行 |