import pandas as pd class Comparator: def __init__(self, file1, file2): self.file1 = file1 self.file2 = file2 self.log_messages = [] def compare_ExcelFiles(self): # 读取Excel文件 xls1 = pd.ExcelFile(f"{self.file1}.xlsx", engine="openpyxl") xls2 = pd.ExcelFile(f"{self.file2}.xlsx", engine="openpyxl") # 获取所有工作表的名称 sheet_names1 = set(xls1.sheet_names) sheet_names2 = set(xls2.sheet_names) all_sheet_names = sheet_names1.union(sheet_names2) message = f"文件1有{len(sheet_names1)}个sheet,文件2有{len(sheet_names2)}个sheet" print(message) self.log_messages.append(f"\n{message}") # 读取Excel文件中的特定工作表 for i in all_sheet_names: if i not in sheet_names1: message = f"文件1中不存在sheet: {i}" print(message) self.log_messages.append(f"\n{message}") continue if i not in sheet_names2: message = f"文件2中不存在sheet: {i}" print(message) self.log_messages.append(f"\n{message}") continue df1_ = pd.read_excel( f"{self.file1}.xlsx", sheet_name=i, engine="openpyxl", skiprows=1 ) df2_ = pd.read_excel( f"{self.file2}.xlsx", sheet_name=i, engine="openpyxl", skiprows=1 ) # 按列名称重新排序后进行比较 df1 = df1_.reindex(sorted(df1_.columns), axis=1) df2 = df2_.reindex(sorted(df2_.columns), axis=1) # 比较两个DataFrame中的数据 if df1.equals(df2): message = f"第{i}个sheet的数据相同" print(message) self.log_messages.append(f"\n{message}") else: message = f"第{i}个sheet的数据不同\n----------------------------\n不同数据如下:" print(message) self.log_messages.append(f"\n{message}") try: message = df1.compare(df2) print(message) self.log_messages.append(f"\n{message}") except ValueError: message = "数据列名称不同" print(message) self.log_messages.append(f"\n{message}") message = "----------------------------" print(message) self.log_messages.append(f"\n{message}") if __name__ == "__main__": pass