Compare-PBI-Data/compareData.py
chenwu c104fe51ed 比较函数逻辑修改
将列名称重新排序后再比较,解决因为列名称顺序不同导致的结果不同
2024-12-25 11:26:36 +08:00

72 lines
2.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
class Comparator:
def __init__(self, file1, file2):
self.file1 = file1
self.file2 = file2
self.log_messages = []
def compare_ExcelFiles(self):
# 读取Excel文件
xls1 = pd.ExcelFile(f"{self.file1}.xlsx", engine="openpyxl")
xls2 = pd.ExcelFile(f"{self.file2}.xlsx", engine="openpyxl")
# 获取所有工作表的名称
sheet_names1 = set(xls1.sheet_names)
sheet_names2 = set(xls2.sheet_names)
all_sheet_names = sheet_names1.union(sheet_names2)
message = f"文件1有{len(sheet_names1)}个sheet文件2有{len(sheet_names2)}个sheet"
print(message)
self.log_messages.append(f"\n{message}")
# 读取Excel文件中的特定工作表
for i in all_sheet_names:
if i not in sheet_names1:
message = f"文件1中不存在sheet: {i}"
print(message)
self.log_messages.append(f"\n{message}")
continue
if i not in sheet_names2:
message = f"文件2中不存在sheet: {i}"
print(message)
self.log_messages.append(f"\n{message}")
continue
df1_ = pd.read_excel(
f"{self.file1}.xlsx", sheet_name=i, engine="openpyxl", skiprows=1
)
df2_ = pd.read_excel(
f"{self.file2}.xlsx", sheet_name=i, engine="openpyxl", skiprows=1
)
# 按列名称重新排序后进行比较
df1 = df1_.reindex(sorted(df1_.columns), axis=1)
df2 = df2_.reindex(sorted(df2_.columns), axis=1)
# 比较两个DataFrame中的数据
if df1.equals(df2):
message = f"{i}个sheet的数据相同"
print(message)
self.log_messages.append(f"\n{message}")
else:
message = f"{i}个sheet的数据不同\n----------------------------\n不同数据如下:"
print(message)
self.log_messages.append(f"\n{message}")
try:
message = df1.compare(df2)
print(message)
self.log_messages.append(f"\n{message}")
except ValueError:
message = "数据列名称不同"
print(message)
self.log_messages.append(f"\n{message}")
message = "----------------------------"
print(message)
self.log_messages.append(f"\n{message}")
if __name__ == "__main__":
pass