使用openpyxl 怎么把第二页的数据保存下来
RT内容已经拿到手了,测试第一页的时候很正常
但开始弄翻页的的时候,第二页的数据会把第一页的覆盖
求各位大神帮帮,第一时间给最佳{:5_110:}
#应该就是这部分代码缺少什么
wb = openpyxl.Workbook()
wb.guess_types = True
ws = wb.active
ws.append(['投诉编号','投诉品牌','投诉车系','投诉车型','问题简述','投诉时间','投诉状态'])
for each in datalist:
print(each)
ws.append(each)
time.sleep(0.5)
wb.save("车辆问题.xlsx") 麻烦发个完整代码 import requests
import parsel
import openpyxl
import time
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3760.400 QQBrowser/10.5.4083.400',
'Referer': 'https://image.baidu.com/'
}
for page in range(1,10):
target = f"http://www.12365auto.com/zlts/273-0-0-0-0-0_0-0-0-0-0-0-0-{page}.shtml"
print(target)
response = requests.get(target,headers=headers).text
html = parsel.Selector(response)
# 搜索编号
num = html.xpath('//tr/td/text()').getall()
# 公司名称
name = html.xpath('//tr/td/text()').getall()
# 车辆品牌
car = html.xpath('//tr/td/text()').getall()
# 车辆款式
cars = html.xpath('//tr/td/text()').getall()
# 问题信息
carinfo = html.xpath('//tr/td/a/text()').getall()
# 投诉时间
timer = html.xpath('//tr/td/text()').getall()
# 投诉状态
infos = html.xpath('//tr/td/em/text()').getall()
datalist = []
for i in range(len(num)):
#创建一个新列表接受
data = []
data.append(num)
data.append(name)
data.append(car)
data.append(cars)
data.append(carinfo)
data.append(timer)
data.append(infos)
#再次添加进一个列表
datalist.append(data)
wb = openpyxl.Workbook()
wb.guess_types = True
ws = wb.active
ws.append(['投诉编号','投诉品牌','投诉车系','投诉车型','问题简述','投诉时间','投诉状态'])
for each in datalist:
print(each)
ws.append(each)
time.sleep(0.5)
wb.save("车辆问题.xlsx") 永恒的蓝色梦想 发表于 2020-8-13 10:48
麻烦发个完整代码
import requests
import parsel
import openpyxl
import time
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3760.400 QQBrowser/10.5.4083.400',
'Referer': 'https://image.baidu.com/'
}
for page in range(1,10):
target = f"http://www.12365auto.com/zlts/273-0-0-0-0-0_0-0-0-0-0-0-0-{page}.shtml"
print(target)
response = requests.get(target,headers=headers).text
html = parsel.Selector(response)
# 搜索编号
num = html.xpath('//tr/td/text()').getall()
# 公司名称
name = html.xpath('//tr/td/text()').getall()
# 车辆品牌
car = html.xpath('//tr/td/text()').getall()
# 车辆款式
cars = html.xpath('//tr/td/text()').getall()
# 问题信息
carinfo = html.xpath('//tr/td/a/text()').getall()
# 投诉时间
timer = html.xpath('//tr/td/text()').getall()
# 投诉状态
infos = html.xpath('//tr/td/em/text()').getall()
datalist = []
for i in range(len(num)):
#创建一个新列表接受
data = []
data.append(num)
data.append(name)
data.append(car)
data.append(cars)
data.append(carinfo)
data.append(timer)
data.append(infos)
#再次添加进一个列表
datalist.append(data)
wb = openpyxl.Workbook()
wb.guess_types = True
ws = wb.active
ws.append(['投诉编号','投诉品牌','投诉车系','投诉车型','问题简述','投诉时间','投诉状态'])
for each in datalist:
print(each)
ws.append(each)
time.sleep(0.5)
wb.save("车辆问题.xlsx") 永恒的蓝色梦想 发表于 2020-8-13 10:48
麻烦发个完整代码
回复需要审核{:10_266:} import requests
import parsel
import openpyxl
import time
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3760.400 QQBrowser/10.5.4083.400',
'Referer': 'https://image.baidu.com/'
}
for page in range(1,10):
target = f"http://www.12365auto.com/zlts/273-0-0-0-0-0_0-0-0-0-0-0-0-{page}.shtml"
print(target)
response = requests.get(target,headers=headers).text
html = parsel.Selector(response)
# 搜索编号
num = html.xpath('//tr/td/text()').getall()
# 公司名称
name = html.xpath('//tr/td/text()').getall()
# 车辆品牌
car = html.xpath('//tr/td/text()').getall()
# 车辆款式
cars = html.xpath('//tr/td/text()').getall()
# 问题信息
carinfo = html.xpath('//tr/td/a/text()').getall()
# 投诉时间
timer = html.xpath('//tr/td/text()').getall()
# 投诉状态
infos = html.xpath('//tr/td/em/text()').getall()
datalist = []
for i in range(len(num)):
#创建一个新列表接受
data = []
data.append(num)
data.append(name)
data.append(car)
data.append(cars)
data.append(carinfo)
data.append(timer)
data.append(infos)
#再次添加进一个列表
datalist.append(data)
wb = openpyxl.Workbook()
wb.guess_types = True
ws = wb.active
ws.append(['投诉编号','投诉品牌','投诉车系','投诉车型','问题简述','投诉时间','投诉状态'])
for each in datalist:
print(each)
ws.append(each)
time.sleep(0.5)
wb.save("车辆问题.xlsx") 切页(用一个对象):
ws = wb['Sheet1']
# your code
# switch to sheet2
ws = wb['Sheet2']
多页对象:
ws1 = wb['Sheet1']
ws2 = wb['Sheet2']
suchocolate 发表于 2020-8-13 11:52
切页(用一个对象):
我想放在一起,到时候好做数据分析{:10_254:} jtxs0000 发表于 2020-8-13 11:58
我想放在一起,到时候好做数据分析
你的标题容易引起歧义,我以为你是要openpyxl操作sheet翻页。
wb = Workbook()是新建excel表格,第二次使用保存就会把之前的文件覆盖掉,你要用这个:
from openpyxl import load_workbook
wb = load_workbook('test.xls') jtxs0000 发表于 2020-8-13 11:58
我想放在一起,到时候好做数据分析
帮你写了一个
import requests
import parsel
#from lxml import etree
from openpyxl import load_workbook
import time
def main():
wb = load_workbook('test.xlsx')
wb.guess_types = True
ws = wb.active
ws.append(['投诉编号', '投诉品牌', '投诉车系', '投诉车型', '问题简述', '投诉时间', '投诉状态'])
headers = {'User-Agent': 'firefox'}
for page in range(1, 10):
target = f"http://www.12365auto.com/zlts/273-0-0-0-0-0_0-0-0-0-0-0-0-{page}.shtml"
print(target)
r = requests.get(target, headers=headers)
#html = etree.HTML(r.text)
html = parsel.Selector(r.text)
# 搜索编号
num = html.xpath('//tr/td/text()')
# 公司名称
name = html.xpath('//tr/td/text()')
# 车辆品牌
serial = html.xpath('//tr/td/text()')
# 车辆款式
type = html.xpath('//tr/td/text()')
# 问题信息
description = html.xpath('//tr/td/a/text()')
# 投诉时间
stime = html.xpath('//tr/td/text()')
# 投诉状态
status = html.xpath('//tr/td/em/text()')
offset = ws.max_row + 1
for n, v in enumerate(num):
row = str(n + offset)
ws['a' + row] = num
ws['b' + row] = name
ws['c' + row] = serial
ws['d' + row] = type
ws['e' + row] = description
ws['f' + row] = stime
ws['g' + row] = status
wb.save("test.xlsx")
if __name__ == '__main__':
main()
suchocolate 发表于 2020-8-13 13:36
帮你写了一个
抱歉,今天有点忙,没及时上来看,等我看看代码 suchocolate 发表于 2020-8-13 13:36
帮你写了一个
谢谢,设最佳了,果然是我吧数据存储这块弄错了{:10_266:}
页:
[1]