import argparse from unidecode import unidecode from xlrd import open_workbook class Person: def __init__(self, fname, lname): self.fname = str(fname).lower() self.lname = str(lname).lower() @staticmethod def normalize(name): return unidecode(name.replace('-', ' ')) def __eq__(self, other): return Person.normalize(self.fname) == Person.normalize(other.fname) \ and Person.normalize(self.lname) == Person.normalize(other.lname) def __str__(self): return "{0} {1}".format(self.fname.title(), self.lname.title()) def __hash__(self): return hash(self.fname + self.lname) def main(files): items = [] dupes = set() for file in files: with open_workbook(file) as wb: for sheet in wb.sheets(): number_of_rows = sheet.nrows number_of_columns = sheet.ncols for row in range(number_of_rows): values = [] for col in range(number_of_columns): value = sheet.cell(row, col).value values.append(value) item = Person(*values) if item not in items: items.append(item) else: dupes.add(item) print('Nombre de duplicatas : {0}'.format(len(dupes))) for dupe in dupes: print(dupe) if __name__ == '__main__': ap = argparse.ArgumentParser() ap.add_argument('-f', '--files', type=str, nargs='+', help='Files to compare.', required=True) args = ap.parse_args() files = args.files print('WARNING') print('All your sheets should have the same column order.\n') main(files)