|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- from selenium import webdriver
- import time
- from bs4 import BeautifulSoup
- import re
- from openpyxl import Workbook
- browser = webdriver.Firefox(executable_path="C:\\Program Files\\Mozilla Firefox\\geckodriver.exe")
- # 保存到表格
- wb = Workbook()
- ws = wb.active
- ws.title = "Sheet1"
- Project = [
- '排名','书名','作者','价格','内容介绍'
- ]
- for i in range(len(Project)):
- ws.cell(1, i + 1, Project[i])
- url = 'http://e.dangdang.com/rank_detail_page.html?listType=ddds_sale&channelType=all'
- browser.get (url)
- time.sleep(3)
- browser.find_element_by_xpath('//div[contains(text(),"新书榜")]').click()
- [hide]time.sleep(5)
- for i in range(10):
- browser.execute_script('window.scrollTo(0,document.body.scrollHeight)')
- time.sleep(3)
- html = browser.page_source
- bs4_html = BeautifulSoup(html,'html.parser')
- n = 2
- for d in bs4_html.find_all(class_='book'):
- f = []
- index = re.sub('、','',d.find(class_='index').text)
- BookName = d.find(class_='tit').text
- monye = re.findall('¥[0-9].*',d.find(class_='price').text)
- book = d.find(class_='auth').text
- summury = d.find(class_='summury').text
- f.append(index)
- f.append(BookName)
- f.append(book)
- f.append(monye[0])
- f.append(summury)
- for i in range(len(f)):
- ws.cell(n, i + 1, str(f[i]))
- wb.save("当当网电子书新书排名.xlsx")
- n +=1
复制代码 [/hide][/hide] |
|