|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 allearn_Python 于 2020-4-2 10:30 编辑
工作原因需要经常核对word中一些数据是否准确。有时候工作量一大很容易出现遗漏。学习完 python操作excel后,发现可以利用openpyxl 和 docx 来帮助自己核对word中的内容,于是就写了一个很简单的小程序,基本实现了自动提取数据,自动分类比对,错误提示的功能。
思路是:
1. 从excle中读取需要的数据,编写了独立程序去实现。
2. 从word中读取需要对比的数据,编写独立程序。
3. 按类别分类对比。
希望得到大家指点 ,
另外我想对比的数据每一个都有 “型号”,“公司”,“证书号”等若干信息,我就把源数据和待对比数据分别封装成一个独立的类对象,再把这些类对象储存到一个列表中,然后去依次对比。但感觉这样不是很方便,请问有什么更好的办法吗?
效果GIF如下图:
代码如下,请指教:
程序代码.zip
(91.66 KB, 下载次数: 2)
- '''
- 名称:安全部件信息比对程序 V1.0
- 功能:提取Word中安全部件信息与Excel中的正确信息进行对比。
- 最后更新:2020/3/28
- '''
- import openpyxl
- from docx import Document
- from ComparisonData import *
- from OriginalData import *
- #打开目标文件
- wb = openpyxl.load_workbook('Excel file.xlsx')
- ws = wb.active
- document = Document('Word file.docx')
- class DateError():
- '定义错误类,用于存放错误型号'
- def __init__(self):
- self.type = 'None'
- self.model = 'None'
-
- def check(original_data_list, comparison_data_list, name):
- '检查两个数据类的每个属性值是否相同'
- error_list = []
- print('正在对比', name)
- for B in comparison_data_list:
- date_error = DateError()
- model_ok = False
- company_ok = False
- cert_ok = False
- nb_ok = False
-
- #依次比较
- for A in original_data_list:
- if B.model == A.model:
- model_ok = True
- #有同一型号,不同制造商的情况,单独判断
- if B.model in ['AQ1', 'XS1']:
- for A in original_data_list:
- if B.model == A.model and B.company == A.company:
- model_ok = True
- company_ok = True
- if B.cert == A.cert:
- cert_ok = True
- if B.nb == A.nb:
- nb_ok = True
- print('%s | %s --> OK' % (B.model, B.company))
- break
- else:
- print('%s该型号公告机构错误!' % B.model)
- date_error.type = '公告机构错误'
- date_error.model = B.model
- error_list.append(date_error)
- break
- else:
- print('%s该型号证书号错误!' % B.model)
- date_error.type = '证书号错误'
- date_error.model = B.model
- error_list.append(date_error)
- break
- if not company_ok:
- print('%s该型号制造商错误!' % B.model)
- print(B.company)
- date_error.type = '制造商错误'
- date_error.model = B.model
- error_list.append(date_error)
- break
- if B.company == A.company:
- company_ok = True
- if B.cert == A.cert:
- cert_ok = True
- if B.nb == A.nb:
- nb_ok = True
- print('%s --> OK' % B.model)
- break
- else:
- print('%s该型号公告机构错误!' % B.model)
- print(A.nb, B.nb)
- date_error.type = '公告机构错误'
- date_error.model = B.model
- error_list.append(date_error)
- break
- else:
- print('%s该型号证书号错误!' % B.model)
- date_error.type = '证书号错误'
- date_error.model = B.model
- error_list.append(date_error)
- break
- else:
- print('%s该型号制造商错误!' % B.model)
- print(A.company, B.company, sep='\n')
- date_error.type = '制造商错误'
- date_error.model = B.model
- error_list.append(date_error)
- break
- if not model_ok:
- #遍历完列表找不到该型号
- print('%s没有对应型号!' % B.model)
- date_error.type = '没有此型号'
- date_error.model = B.model
- error_list.append(date_error)
- return error_list
-
-
-
- def sort(original_data, comparison_data):
- '按照安全部件的名称分类'
- for name in comparison_data.__dict__:
- if name == 'SG':
- original_data_list = original_data.SG
- comparison_data_list = comparison_data.SG
- if name == 'OG':
- original_data_list = original_data.OG
- comparison_data_list = comparison_data.OG
- if name == 'BRAKE':
- original_data_list = original_data.BRAKE
- comparison_data_list = comparison_data.BRAKE
- if name == 'BF':
- original_data_list = original_data.BF
- comparison_data_list = comparison_data.BF
- if name == 'DL':
- original_data_list = original_data.DL
- comparison_data_list = comparison_data.DL
- if name == 'UCMP':
- original_data_list = original_data.UCMP
- comparison_data_list = comparison_data.UCMP
- #检查
- error = check(original_data_list, comparison_data_list, name)
- #集中打印错误
- if error:
- print('\n%s的错误有:' % name)
- for each in error:
- print(each.model, each.type)
- else:
- print('\n%s无错误^_^' % name)
- input('\n按任意键继续:')
- def main():
- comparison_data = Comparison()
- original_data = Original()
-
- #从Word获取数据
- get_data_from_word(document, comparison_data)
- #从Excel获取数据
- get_data_from_excel(original_data, ws)
- #开始对比
- sort(original_data, comparison_data)
-
-
-
-
- if __name__ == '__main__':
- main()
-
-
-
-
复制代码
- '从word获得待对比数据'
- from docx import Document
- #定义安全部件的类
- class Component:
- def __init__(self):
- self.infos = []
- self.model = 'None'
- self.cert = 'None'
- self.company = 'None'
- self.nb = 'None'
- self.nb_name = 'None'
- self.type_ = 'None'
- def info(self):
- self.infos.append(self.model)
- self.infos.append(self.company)
- self.infos.append(self.cert)
- self.infos.append(self.nb)
- self.infos.append(self.nb_name)
- return self.infos
- def print(self):
- self.info(self)
- for each in self.infos:
- print(each, sep=' | ')
- #定义Comparison类
- class Comparison:
- def __init__(self):
- self.SG = []
- self.BF = []
- self.OG = []
- self.BRAKE = []
- self.UCMP = []
- self.DL = []
- #定义添加到列表的函数
- def add2comparison_list(table, component, comparison_data):
- 'table是哪一个安全部件的表格,component是相应的类对象'
- if table == sg_table:
- comparison_data.SG.append(component)
- if table == og_table:
- comparison_data.OG.append(component)
- if table == brake_table:
- comparison_data.BRAKE.append(component)
- if table == bf_table:
- comparison_data.BF.append(component)
- if table == dl_table:
- comparison_data.DL.append(component)
- if table == ucmp_table:
- comparison_data.UCMP.append(component)
- #定义从表格获取数据的函数
- def get_data_from_table(table, comparison_data):
- '从表格中获取数据,每一行的数据储存在实例化的component对象中,每个component对象再分类储存在comparison_data类中'
- for each in range(1, len(table.rows)): #第0行和第1行非正文,默认从第1行开始读取数据
- if 'Type' not in table.row_cells(each)[0].text:
- #每个部件实例化一个对象
- component = Component()
- #model
- component.model = table.row_cells(each)[0].text
- #Manufacturer Name
- component.company = table.row_cells(each)[1].text
- #Cert. No
- component.cert = table.row_cells(each)[2].text
- #Notified body
- component.nb = table.row_cells(each)[3].text
- #component.print()
- #component 以列表形式添加进comparison类中
- add2comparison_list(table, component, comparison_data)
- def get_data_from_word(document, comparison_data):
- '定位各个安全部件所在表格位置\
- document: Word文件对象; comparison_data: 要对比的数据类'
- table_list = []
- global sg_table, og_table, brake_table, bf_table, dl_table, ucmp_table
- #判断word中每一个表格
- for each_table in document.tables:
- #以每个表格左上角内容是否匹配关键字的方式来确定是否属于目标表格,是的话命名并存入列表
- if 'gear' in each_table.cell(0, 0).text:
- sg_table = each_table
- table_list.append(each_table)
- if 'governor' in each_table.cell(0, 0).text:
- og_table = each_table
- table_list.append(each_table)
- if 'overspeed' in each_table.cell(0, 0).text:
- brake_table = each_table
- table_list.append(each_table)
- if 'Buffer' in each_table.cell(0, 0).text:
- bf_table = each_table
- table_list.append(each_table)
- if 'Locking' in each_table.cell(0, 0).text:
- dl_table = each_table
- table_list.append(each_table)
- if 'UCMP' in each_table.cell(0, 0).text:
- ucmp_table = each_table
- table_list.append(each_table)
-
- #从各个表格获取数据
- for each_table in table_list:
- get_data_from_table(each_table, comparison_data)
- def main():
- '测试函数'
- document = Document('LF_A_C_18897_19-FINAL.docx')
- comparison_data = Comparison()
- get_data_from_word(document, comparison_data)
- print('共统计得到:')
- print('安全钳:%d个' % len(comparison_data.SG))
- print('限速器:%d个' % len(comparison_data.OG))
- print('制动器:%d个' % len(comparison_data.BRAKE))
- print('缓冲器:%d个' % len(comparison_data.BF))
- print('门锁:%d个' % len(comparison_data.DL))
- print('UCMP:%d个' % len(comparison_data.UCMP))
- if __name__ == '__main__':
- main()
-
-
复制代码
|
|