总参会名单,10个中队 参会统计:
D盘文件夹250,有excel, 10个中队名单:
import os
import pandas as pd
import re
# 设置文件夹路径
folder_path = r"D:\\250"
# 存储A列数据的列表
list1 = []
# 要读取的工作表名称
sheet_name = "sheet1"
# 循环读取每个Excel文件
for file_name in os.listdir(folder_path):
file_path = os.path.join(folder_path, file_name)
if os.path.isfile(file_path) and file_name.endswith(".xlsx"):
# 读取Excel文件
xls = pd.ExcelFile(file_path)
# 判断指定的工作表是否存在
if sheet_name in xls.sheet_names:
# 将 example.xlsx 文件中 Sheet1 表格的第一列数据读取并存储为 pandas.DataFrame 对象 df;获取A列转为列表
df = pd.read_excel(file_path, sheet_name=sheet_name, usecols=[0], skiprows=6, names=["A"])
col_A = df["A"].tolist()
list1 += col_A
#print(list1)
list2 = []
list3 = []
# 遍历所有txt文件
for txt_file in os.listdir(folder_path):
if txt_file.endswith(".txt"):
# 读取当前txt文件中的数据到list2中
with open(os.path.join(folder_path, txt_file), "r",encoding='gb18030') as f:
list2.extend(f.read().splitlines())
# 判断哪些人未参加会议并存入list3中
# for name in list2:
# if not any(name in c for c in list1):
# list3.append(name)
attend_number1 = 0
unattend_number2 = 0
for item in list2:
found = False
for s in list1:
item=item.split("$")[0]
item = re.sub('([^\u4e00-\u9fa5])', '', item)
s = re.sub('([^\u4e00-\u9fa5])', '', s)
if str(item) in s:
#print(item+" "+s)
found = True
attend_number1 += 1
break
if not found:
list3.append(item)
#print("参加会议人员数量:{}人".format(number2))
unattend_number2 = len(list2) - attend_number1
#print("参加会议人员数量:{}人".format(number2))
# 输出当前中队参会人员数量和未参会人员名单
print(f"{os.path.splitext(txt_file)[0]}中队参加会议人员数量: {attend_number1}人,未参加会议{unattend_number2}人,名单为{list3}\n\n")
# 清空list2和list3
list2.clear()
list3.clear()
|