Initial commit

This commit is contained in:
Gabriel Augendre 2016-11-19 23:33:37 +01:00
parent e997b66d7c
commit 5c19d4ee48
No known key found for this signature in database
GPG Key ID: D2B6A5B41FC438B1
2 changed files with 62 additions and 0 deletions

View File

@ -1 +1,2 @@
Unidecode==0.4.19
xlrd==1.0.0

61
src/main.py Normal file
View File

@ -0,0 +1,61 @@
import argparse
from unidecode import unidecode
from xlrd import open_workbook
class Person:
def __init__(self, fname, lname):
self.fname = str(fname).lower()
self.lname = str(lname).lower()
@staticmethod
def normalize(name):
return unidecode(name.replace('-', ' '))
def __eq__(self, other):
return Person.normalize(self.fname) == Person.normalize(other.fname) \
and Person.normalize(self.lname) == Person.normalize(other.lname)
def __str__(self):
return "{0} {1}".format(self.fname.title(), self.lname.title())
def __hash__(self):
return hash(self.fname + self.lname)
def main(files):
items = []
dupes = set()
for file in files:
with open_workbook(file) as wb:
for sheet in wb.sheets():
number_of_rows = sheet.nrows
number_of_columns = sheet.ncols
for row in range(number_of_rows):
values = []
for col in range(number_of_columns):
value = sheet.cell(row, col).value
values.append(value)
item = Person(*values)
if item not in items:
items.append(item)
else:
dupes.add(item)
print('Nombre de duplicatas : {0}'.format(len(dupes)))
for dupe in dupes:
print(dupe)
if __name__ == '__main__':
ap = argparse.ArgumentParser()
ap.add_argument('-f', '--files', type=str, nargs='+', help='Files to upload.', required=True)
args = ap.parse_args()
files = args.files
main(files)