|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- from selenium import webdriver
- from selenium.webdriver.common.by import By
- from selenium.webdriver.support import expected_conditions as EC
- from selenium.webdriver.support.wait import WebDriverWait
- from selenium.common.exceptions import TimeoutException
- import time
- import requests
- import os
- '''屏蔽掉浏览器界面'''
- URL = 'http://jandan.net/ooxx'
- chrome_options = webdriver.ChromeOptions()
- chrome_options.add_argument('--headless')
- browser = webdriver.Chrome(chrome_options = chrome_options)
- wait = WebDriverWait(browser, 10)
- browser.get(URL)
- def next_page(): # 点击下一页
- button = wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'previous-comment-page')))
- return button
- def cur_page(): # 获取当前页数
- page = wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'current-comment-page')))
- return page.text
- def parse_html(lyst):
- """
- :param img:单个图片链接
- :param lyst:存储图片链接
- """
- imgs_info = browser.find_elements_by_xpath('//*[@id="comments"]/ol/li//p/img')
- for img in imgs_info:
- img = img.get_attribute('src')
- if img[len(img) - 3:] == 'jpg': # 剔除广告的'.gif'图片
- lyst.append(img)
- def dowmloader(url):
- '''图片下载'''
- try:
- response = requests.get(url)
- if response.status_code == 200:
- return response.content
- return None
- except Exception:
- pass
- def save_img(img_content, num):
- '''
- :param img_content:二进制数据
- :param num:图片保存的次序
- '''
- with open(str(num) + '.jpg', 'wb') as f:
- f.write(img_content)
- def jandan_crawlers(lyst):
- '''
- :param FLAG:抓取网页页数
- '''
- try:
- current_page = cur_page()
- FLAG = int(current_page[1:len(current_page) - 1])
- while FLAG:
- print('正在抓取煎蛋网第%d页图片' % FLAG)
- parse_html(lyst)
- time.sleep(3)
- button = next_page()
- button.click()
- FLAG -= 1
- if FLAG == 1:
- jandan_crawlers(lyst)
- FLAG = False
- return lyst
- except TimeoutException:
- pass
- if __name__ == '__main__':
- os.chdir('picture') # 切换图片的保存路径
- lyst = []
- num = 1
- url_info = jandan_crawlers(lyst)
- for url in url_info:
- img_content = dowmloader(url)
- print('正在保存图片:' + str(num) + '.jpg')
- save_img(img_content, num)
- num += 1
- print('煎蛋网图片抓取完成')
- browser.close()
复制代码 |
|