马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 allearn_Python 于 2020-4-2 10:30 编辑
工作原因需要经常核对word中一些数据是否准确。有时候工作量一大很容易出现遗漏。学习完 python操作excel后,发现可以利用openpyxl 和 docx 来帮助自己核对word中的内容,于是就写了一个很简单的小程序,基本实现了自动提取数据,自动分类比对,错误提示的功能。
思路是:
1. 从excle中读取需要的数据,编写了独立程序去实现。
2. 从word中读取需要对比的数据,编写独立程序。
3. 按类别分类对比。
希望得到大家指点 ,
另外我想对比的数据每一个都有 “型号”,“公司”,“证书号”等若干信息,我就把源数据和待对比数据分别封装成一个独立的类对象,再把这些类对象储存到一个列表中,然后去依次对比。但感觉这样不是很方便,请问有什么更好的办法吗?
效果GIF如下图:
代码如下,请指教:
程序代码.zip
(91.66 KB, 下载次数: 2)
'''
名称:安全部件信息比对程序 V1.0
功能:提取Word中安全部件信息与Excel中的正确信息进行对比。
最后更新:2020/3/28
'''
import openpyxl
from docx import Document
from ComparisonData import *
from OriginalData import *
#打开目标文件
wb = openpyxl.load_workbook('Excel file.xlsx')
ws = wb.active
document = Document('Word file.docx')
class DateError():
'定义错误类,用于存放错误型号'
def __init__(self):
self.type = 'None'
self.model = 'None'
def check(original_data_list, comparison_data_list, name):
'检查两个数据类的每个属性值是否相同'
error_list = []
print('正在对比', name)
for B in comparison_data_list:
date_error = DateError()
model_ok = False
company_ok = False
cert_ok = False
nb_ok = False
#依次比较
for A in original_data_list:
if B.model == A.model:
model_ok = True
#有同一型号,不同制造商的情况,单独判断
if B.model in ['AQ1', 'XS1']:
for A in original_data_list:
if B.model == A.model and B.company == A.company:
model_ok = True
company_ok = True
if B.cert == A.cert:
cert_ok = True
if B.nb == A.nb:
nb_ok = True
print('%s | %s --> OK' % (B.model, B.company))
break
else:
print('%s该型号公告机构错误!' % B.model)
date_error.type = '公告机构错误'
date_error.model = B.model
error_list.append(date_error)
break
else:
print('%s该型号证书号错误!' % B.model)
date_error.type = '证书号错误'
date_error.model = B.model
error_list.append(date_error)
break
if not company_ok:
print('%s该型号制造商错误!' % B.model)
print(B.company)
date_error.type = '制造商错误'
date_error.model = B.model
error_list.append(date_error)
break
if B.company == A.company:
company_ok = True
if B.cert == A.cert:
cert_ok = True
if B.nb == A.nb:
nb_ok = True
print('%s --> OK' % B.model)
break
else:
print('%s该型号公告机构错误!' % B.model)
print(A.nb, B.nb)
date_error.type = '公告机构错误'
date_error.model = B.model
error_list.append(date_error)
break
else:
print('%s该型号证书号错误!' % B.model)
date_error.type = '证书号错误'
date_error.model = B.model
error_list.append(date_error)
break
else:
print('%s该型号制造商错误!' % B.model)
print(A.company, B.company, sep='\n')
date_error.type = '制造商错误'
date_error.model = B.model
error_list.append(date_error)
break
if not model_ok:
#遍历完列表找不到该型号
print('%s没有对应型号!' % B.model)
date_error.type = '没有此型号'
date_error.model = B.model
error_list.append(date_error)
return error_list
def sort(original_data, comparison_data):
'按照安全部件的名称分类'
for name in comparison_data.__dict__:
if name == 'SG':
original_data_list = original_data.SG
comparison_data_list = comparison_data.SG
if name == 'OG':
original_data_list = original_data.OG
comparison_data_list = comparison_data.OG
if name == 'BRAKE':
original_data_list = original_data.BRAKE
comparison_data_list = comparison_data.BRAKE
if name == 'BF':
original_data_list = original_data.BF
comparison_data_list = comparison_data.BF
if name == 'DL':
original_data_list = original_data.DL
comparison_data_list = comparison_data.DL
if name == 'UCMP':
original_data_list = original_data.UCMP
comparison_data_list = comparison_data.UCMP
#检查
error = check(original_data_list, comparison_data_list, name)
#集中打印错误
if error:
print('\n%s的错误有:' % name)
for each in error:
print(each.model, each.type)
else:
print('\n%s无错误^_^' % name)
input('\n按任意键继续:')
def main():
comparison_data = Comparison()
original_data = Original()
#从Word获取数据
get_data_from_word(document, comparison_data)
#从Excel获取数据
get_data_from_excel(original_data, ws)
#开始对比
sort(original_data, comparison_data)
if __name__ == '__main__':
main()
'从word获得待对比数据'
from docx import Document
#定义安全部件的类
class Component:
def __init__(self):
self.infos = []
self.model = 'None'
self.cert = 'None'
self.company = 'None'
self.nb = 'None'
self.nb_name = 'None'
self.type_ = 'None'
def info(self):
self.infos.append(self.model)
self.infos.append(self.company)
self.infos.append(self.cert)
self.infos.append(self.nb)
self.infos.append(self.nb_name)
return self.infos
def print(self):
self.info(self)
for each in self.infos:
print(each, sep=' | ')
#定义Comparison类
class Comparison:
def __init__(self):
self.SG = []
self.BF = []
self.OG = []
self.BRAKE = []
self.UCMP = []
self.DL = []
#定义添加到列表的函数
def add2comparison_list(table, component, comparison_data):
'table是哪一个安全部件的表格,component是相应的类对象'
if table == sg_table:
comparison_data.SG.append(component)
if table == og_table:
comparison_data.OG.append(component)
if table == brake_table:
comparison_data.BRAKE.append(component)
if table == bf_table:
comparison_data.BF.append(component)
if table == dl_table:
comparison_data.DL.append(component)
if table == ucmp_table:
comparison_data.UCMP.append(component)
#定义从表格获取数据的函数
def get_data_from_table(table, comparison_data):
'从表格中获取数据,每一行的数据储存在实例化的component对象中,每个component对象再分类储存在comparison_data类中'
for each in range(1, len(table.rows)): #第0行和第1行非正文,默认从第1行开始读取数据
if 'Type' not in table.row_cells(each)[0].text:
#每个部件实例化一个对象
component = Component()
#model
component.model = table.row_cells(each)[0].text
#Manufacturer Name
component.company = table.row_cells(each)[1].text
#Cert. No
component.cert = table.row_cells(each)[2].text
#Notified body
component.nb = table.row_cells(each)[3].text
#component.print()
#component 以列表形式添加进comparison类中
add2comparison_list(table, component, comparison_data)
def get_data_from_word(document, comparison_data):
'定位各个安全部件所在表格位置\
document: Word文件对象; comparison_data: 要对比的数据类'
table_list = []
global sg_table, og_table, brake_table, bf_table, dl_table, ucmp_table
#判断word中每一个表格
for each_table in document.tables:
#以每个表格左上角内容是否匹配关键字的方式来确定是否属于目标表格,是的话命名并存入列表
if 'gear' in each_table.cell(0, 0).text:
sg_table = each_table
table_list.append(each_table)
if 'governor' in each_table.cell(0, 0).text:
og_table = each_table
table_list.append(each_table)
if 'overspeed' in each_table.cell(0, 0).text:
brake_table = each_table
table_list.append(each_table)
if 'Buffer' in each_table.cell(0, 0).text:
bf_table = each_table
table_list.append(each_table)
if 'Locking' in each_table.cell(0, 0).text:
dl_table = each_table
table_list.append(each_table)
if 'UCMP' in each_table.cell(0, 0).text:
ucmp_table = each_table
table_list.append(each_table)
#从各个表格获取数据
for each_table in table_list:
get_data_from_table(each_table, comparison_data)
def main():
'测试函数'
document = Document('LF_A_C_18897_19-FINAL.docx')
comparison_data = Comparison()
get_data_from_word(document, comparison_data)
print('共统计得到:')
print('安全钳:%d个' % len(comparison_data.SG))
print('限速器:%d个' % len(comparison_data.OG))
print('制动器:%d个' % len(comparison_data.BRAKE))
print('缓冲器:%d个' % len(comparison_data.BF))
print('门锁:%d个' % len(comparison_data.DL))
print('UCMP:%d个' % len(comparison_data.UCMP))
if __name__ == '__main__':
main()
''' 从Excel中获取原始的数据信息,存储在Original类中以便于对比 '''
import openpyxl
from ComparisonData import Component
#定义Original类
class Original:
def __init__(self):
self.SG = []
self.BF = []
self.OG = []
self.BRAKE = []
self.UCMP = []
self.DL = []
def get_data_from_excel(original_data, worksheet):
#识别安全部件类型, 默认从39行开始
#detect_component('B39', 'D200')
#安全钳
for each in worksheet['B42':'E69']:
component = Component()
component.model = each[0].value
component.cert = each[1].value
component.nb = each[2].value
component.company = each[3].value
original_data.SG.append(component)
#限速器
for each in worksheet['B73':'E84']:
component = Component()
component.model = each[0].value
component.cert = each[1].value
component.nb = each[2].value
component.company = each[3].value
original_data.OG.append(component)
#制动器
for each in worksheet['B88':'E108']:
component = Component()
component.model = each[0].value
component.cert = each[1].value
component.nb = each[2].value
component.company = each[3].value
original_data.BRAKE.append(component)
#缓冲器
for each in worksheet['B112':'E126']:
component = Component()
component.model = each[0].value
component.cert = each[1].value
component.nb = each[2].value
component.company = each[3].value
original_data.BF.append(component)
#门锁
for each in worksheet['B130':'E159']:
component = Component()
component.model = each[0].value
component.cert = each[1].value
component.nb = each[2].value
component.company = each[3].value
original_data.DL.append(component)
#UCMP
for each in worksheet['B163':'E188']:
component = Component()
component.model = each[0].value
component.cert = each[1].value
component.nb = each[2].value
component.company = each[3].value
original_data.UCMP.append(component)
def main():
'测试函数'
wb = openpyxl.load_workbook('A3-TECHNICAL SPECIFICATION SHEET-FUJI-MR.xlsx')
ws = wb.active
original_data = Original()
get_data_from_excel(original_data, ws)
print('共统计得到:')
print('安全钳:%d个' % len(original_data.SG))
print('限速器:%d个' % len(original_data.OG))
print('制动器:%d个' % len(original_data.BRAKE))
print('缓冲器:%d个' % len(original_data.BF))
print('门锁:%d个' % len(original_data.DL))
print('UCMP:%d个' % len(original_data.UCMP))
if __name__ == '__main__':
main()
|