爬虫
急!急!急!题目:访问https://www.suning.com
使用selenium实现输入iphone进行搜索,跳转到iphone商品页面
爬取所有商品的名称、价格和评论数据,存为json文件
有没有兄弟会这个题目啊,我以及爬取出内容了,但是不会截取和存为json文件
我的代码:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from time import sleep
import json
driver = webdriver.Chrome()
driver.get('https://www.suning.com')
driver.maximize_window()
wait = WebDriverWait(driver,10)
_input = wait.until(EC.presence_of_element_located((By.ID,'searchKeywords')))
_input.clear()
_input.send_keys('iphone')
_input.send_keys(Keys.ENTER)
js = 'window.scrollTo(0,document.body.scrollHeight)'
driver.execute_script(js)
p = driver.page_source
t = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,'#product-list')))
sleep(10)
print(t)
for item in t:
print(item.text)
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import json
import time
from lxml import etree
driver = webdriver.Chrome()
driver.get('https://www.suning.com')
driver.maximize_window()
wait = WebDriverWait(driver, 10)
_input = wait.until(EC.presence_of_element_located((By.ID, 'searchKeywords')))
_input.clear()
_input.send_keys('iphone')
_input.send_keys(Keys.ENTER)
js = 'window.scrollTo(0,document.body.scrollHeight)'
driver.execute_script(js)
time.sleep(3)
wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '#product-list')))
html = etree.HTML(driver.page_source)
lis = html.xpath('//li')# 商品列表
result = []
for li in lis:
price = li.xpath('.//span[@class="def-price"]/text()')[-1]
description = li.xpath('.//a[@tabindex="0"]/@aria-label')
comment = li.xpath('.//a[@tabindex="-1"]/i/text()')
data = {'price': price, 'description': description, 'comment': comment}
result.append(data)
with open('test.json', 'w', encoding='utf-8') as f:
f.write(json.dumps(result, ensure_ascii=False))
# f.write(json.dumps(result, indent=2, ensure_ascii=False))# 如果要缩进好看一点就用这个
页:
[1]