From 5c19d4ee48d8cf41952e2915b6b3cfcd0a2bedff Mon Sep 17 00:00:00 2001 From: Gabriel Augendre Date: Sat, 19 Nov 2016 23:33:37 +0100 Subject: [PATCH] Initial commit --- requirements.txt | 1 + src/main.py | 61 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 src/main.py diff --git a/requirements.txt b/requirements.txt index 58a270c..a8dc82a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ +Unidecode==0.4.19 xlrd==1.0.0 diff --git a/src/main.py b/src/main.py new file mode 100644 index 0000000..540ad6e --- /dev/null +++ b/src/main.py @@ -0,0 +1,61 @@ +import argparse +from unidecode import unidecode + +from xlrd import open_workbook + + +class Person: + def __init__(self, fname, lname): + self.fname = str(fname).lower() + self.lname = str(lname).lower() + + @staticmethod + def normalize(name): + return unidecode(name.replace('-', ' ')) + + def __eq__(self, other): + return Person.normalize(self.fname) == Person.normalize(other.fname) \ + and Person.normalize(self.lname) == Person.normalize(other.lname) + + def __str__(self): + return "{0} {1}".format(self.fname.title(), self.lname.title()) + + def __hash__(self): + return hash(self.fname + self.lname) + + +def main(files): + items = [] + dupes = set() + + for file in files: + with open_workbook(file) as wb: + for sheet in wb.sheets(): + number_of_rows = sheet.nrows + number_of_columns = sheet.ncols + + for row in range(number_of_rows): + values = [] + for col in range(number_of_columns): + value = sheet.cell(row, col).value + values.append(value) + item = Person(*values) + + if item not in items: + items.append(item) + else: + dupes.add(item) + + print('Nombre de duplicatas : {0}'.format(len(dupes))) + for dupe in dupes: + print(dupe) + + +if __name__ == '__main__': + ap = argparse.ArgumentParser() + ap.add_argument('-f', '--files', type=str, nargs='+', help='Files to upload.', required=True) + + args = ap.parse_args() + files = args.files + + main(files)