test neutralizer
This commit is contained in:
parent
bf91c40ceb
commit
0ce1788be1
23 changed files with 1874 additions and 1951 deletions
BIN
de.xlsx
BIN
de.xlsx
Binary file not shown.
BIN
eng.xlsx
BIN
eng.xlsx
Binary file not shown.
BIN
fr.xlsx
BIN
fr.xlsx
Binary file not shown.
BIN
it.xlsx
BIN
it.xlsx
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -1,5 +0,0 @@
|
|||
Name,Geburtsdatum,Adresse,Telefon,Email,Arbeitgeber,Mitarbeiter-ID,Kontonummer,Abteilung,Beruf,Sprache,Arbeitsstatus,Lieblingsfarbe
|
||||
Anna-Lena Schmitt,1985-03-14,"Rosenstraße 12, 80331 München",+49 89 12345678,anna.schmitt85@gmail.com,"Müller & Partner GmbH",DE19854321,"DE89 3704 0044 0532 0130 00",Marketing,Projektmanagerin,Deutsch,Vollzeit,Blau
|
||||
John Michael Anderson,1990-08-22,"1250 W Addison St, Apt 4B, Chicago, IL 60613",+1 312-555-7890,jm.anderson90@yahoo.com,"ClearTech Solutions",US65432109,"US12 1234 5678 9012 3456 78",Engineering,Software Developer,English,Full-time,Green
|
||||
Sophie Martin,1978-07-02,"18 rue des Lilas, 69003 Lyon",+33 6 12 34 56 78,sophie.martin78@orange.fr,"Banque Populaire",FR30219876,"FR76 3000 6000 0112 3456 7890 189","Ressources humaines","Responsable RH",Français,"Temps plein",Violet
|
||||
Giulia Bianchi,1983-11-09,"Via della Pergola 25, 50121 Firenze",+39 055 1234567,giulia.bianchi83@libero.it,"Enel Energia",IT98127643,"IT60 X054 2811 1010 0000 0123 456",Amministrazione,Contabile,Italiano,"Tempo pieno",Rosso
|
||||
|
|
|
@ -1,38 +0,0 @@
|
|||
[
|
||||
{
|
||||
"name": "Anna-Lena Schmitt",
|
||||
"geburtsdatum": "1985-03-14",
|
||||
"adresse": "Rosenstraße 12, 80331 München",
|
||||
"telefon": "+49 89 12345678",
|
||||
"email": "anna.schmitt85@gmail.com",
|
||||
"arbeitgeber": "Müller & Partner GmbH",
|
||||
"mitarbeiter_id": "DE19854321",
|
||||
},
|
||||
{
|
||||
"name": "John Michael Anderson",
|
||||
"birthdate": "1990-08-22",
|
||||
"address": "1250 W Addison St, Apt 4B, Chicago, IL 60613",
|
||||
"phone": "+1 312-555-7890",
|
||||
"email": "jm.anderson90@yahoo.com",
|
||||
"employer": "ClearTech Solutions",
|
||||
"employee_id": "US65432109",
|
||||
},
|
||||
{
|
||||
"nom": "Sophie Martin",
|
||||
"date_naissance": "1978-07-02",
|
||||
"adresse": "18 rue des Lilas, 69003 Lyon",
|
||||
"téléphone": "+33 6 12 34 56 78",
|
||||
"email": "sophie.martin78@orange.fr",
|
||||
"employeur": "Banque Populaire",
|
||||
"id_employé": "FR30219876",
|
||||
},
|
||||
{
|
||||
"nome": "Giulia Bianchi",
|
||||
"data_nascita": "1983-11-09",
|
||||
"indirizzo": "Via della Pergola 25, 50121 Firenze",
|
||||
"telefono": "+39 055 1234567",
|
||||
"email": "giulia.bianchi83@libero.it",
|
||||
"datore_di_lavoro": "Enel Energia",
|
||||
"id_dipendente": "IT98127643",
|
||||
}
|
||||
]
|
||||
|
|
@ -1 +0,0 @@
|
|||
Mein Name ist Anna-Lena Schmitt, ich wurde am 14. März 1985 in München geboren. Meine aktuelle Adresse lautet: Rosenstraße 12, 80331 München. Du erreichst mich unter +49 89 12345678 oder per E-Mail an anna.schmitt85@gmail.com. Ich arbeite bei der Müller & Partner GmbH, meine Mitarbeiter-ID ist DE19854321. Meine Kontonummer bei der Deutschen Bank lautet DE89 3704 0044 0532 0130 00. Mein Ehemann Thomas Schmitt, geb. am 3. Mai 1982, arbeitet bei BMW. Unsere Tochter Lena Schmitt, geboren am 10. Oktober 2012, geht auf das Ludwig-Gymnasium.
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
Name,Birthdate,Address,Phone,Email,Employer,Employee ID,Bank Account,Department,Position,Language,Employment Status,Favorite Color
|
||||
Anna-Lena Schmitt,1985-03-14,"Rosenstraße 12, 80331 München",+49 89 12345678,anna.schmitt85@gmail.com,"Müller & Partner GmbH",DE19854321,"DE89 3704 0044 0532 0130 00",Marketing,Projektmanagerin,Deutsch,Vollzeit,Blau
|
||||
John Michael Anderson,1990-08-22,"1250 W Addison St, Apt 4B, Chicago, IL 60613",+1 312-555-7890,jm.anderson90@yahoo.com,"ClearTech Solutions",US65432109,"US12 1234 5678 9012 3456 78",Engineering,Software Developer,English,Full-time,Green
|
||||
Sophie Martin,1978-07-02,"18 rue des Lilas, 69003 Lyon",+33 6 12 34 56 78,sophie.martin78@orange.fr,"Banque Populaire",FR30219876,"FR76 3000 6000 0112 3456 7890 189","Ressources humaines","Responsable RH",Français,"Temps plein",Violet
|
||||
Giulia Bianchi,1983-11-09,"Via della Pergola 25, 50121 Firenze",+39 055 1234567,giulia.bianchi83@libero.it,"Enel Energia",IT98127643,"IT60 X054 2811 1010 0000 0123 456",Amministrazione,Contabile,Italiano,"Tempo pieno",Rosso
|
||||
|
|
|
@ -1,17 +0,0 @@
|
|||
[
|
||||
{
|
||||
"name": "John Michael Anderson",
|
||||
"birthdate": "1990-08-22",
|
||||
"address": "1250 W Addison St, Apt 4B, Chicago, IL 60613",
|
||||
"phone": "+1 312-555-7890",
|
||||
"email": "jm.anderson90@yahoo.com",
|
||||
"employer": "ClearTech Solutions",
|
||||
"employee_id": "US65432109",
|
||||
"bank_account": "US12 1234 5678 9012 3456 78",
|
||||
"department": "Engineering",
|
||||
"position": "Software Developer",
|
||||
"language": "English",
|
||||
"employment_status": "Full-time",
|
||||
"favorite_color": "Green"
|
||||
}
|
||||
]
|
||||
|
|
@ -1 +0,0 @@
|
|||
My name is John Michael Anderson, born on August 22, 1990, in Chicago, Illinois. I currently live at 1250 W Addison St, Apt 4B, Chicago, IL 60613. You can reach me at +1 312-555-7890 or via email: jm.anderson90@yahoo.com. I work at ClearTech Solutions, employee number US65432109. My Chase Bank account number is US12 1234 5678 9012 3456 78. My wife Emily Anderson, born June 18, 1991, is a nurse at Northwestern Memorial Hospital. Our son Jacob, born January 5, 2019, attends Lincoln Park Preschool.
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
Nom,Date de naissance,Adresse,Téléphone,Email,Employeur,ID Employé,RIB,Département,Poste,Langue,Statut emploi,Couleur préféréeAnna-Lena Schmitt,1985-03-14,"Rosenstraße 12, 80331 München",+49 89 12345678,anna.schmitt85@gmail.com,"Müller & Partner GmbH",DE19854321,"DE89 3704 0044 0532 0130 00",Marketing,Projektmanagerin,Deutsch,Vollzeit,Blau
|
||||
John Michael Anderson,1990-08-22,"1250 W Addison St, Apt 4B, Chicago, IL 60613",+1 312-555-7890,jm.anderson90@yahoo.com,"ClearTech Solutions",US65432109,"US12 1234 5678 9012 3456 78",Engineering,Software Developer,English,Full-time,Green
|
||||
Sophie Martin,1978-07-02,"18 rue des Lilas, 69003 Lyon",+33 6 12 34 56 78,sophie.martin78@orange.fr,"Banque Populaire",FR30219876,"FR76 3000 6000 0112 3456 7890 189","Ressources humaines","Responsable RH",Français,"Temps plein",Violet
|
||||
Giulia Bianchi,1983-11-09,"Via della Pergola 25, 50121 Firenze",+39 055 1234567,giulia.bianchi83@libero.it,"Enel Energia",IT98127643,"IT60 X054 2811 1010 0000 0123 456",Amministrazione,Contabile,Italiano,"Tempo pieno",Rosso
|
||||
|
Can't render this file because it has a wrong number of fields in line 2.
|
|
|
@ -1,17 +0,0 @@
|
|||
[
|
||||
{
|
||||
"nom": "Sophie Martin",
|
||||
"date_naissance": "1978-07-02",
|
||||
"adresse": "18 rue des Lilas, 69003 Lyon",
|
||||
"téléphone": "+33 6 12 34 56 78",
|
||||
"email": "sophie.martin78@orange.fr",
|
||||
"employeur": "Banque Populaire",
|
||||
"id_employé": "FR30219876",
|
||||
"rib": "FR76 3000 6000 0112 3456 7890 189",
|
||||
"département": "Ressources humaines",
|
||||
"poste": "Responsable RH",
|
||||
"langue": "Français",
|
||||
"statut_emploi": "Temps plein",
|
||||
"couleur_préférée": "Violet"
|
||||
}
|
||||
]
|
||||
|
|
@ -1 +0,0 @@
|
|||
Je m'appelle Sophie Martin, née le 2 juillet 1978 à Lyon. J'habite au 18 rue des Lilas, 69003 Lyon. Mon numéro de téléphone est le +33 6 12 34 56 78, mon adresse e-mail est sophie.martin78@orange.fr. Je travaille chez Banque Populaire, numéro d'employée FR30219876. Mon RIB est FR76 3000 6000 0112 3456 7890 189. Mon mari Laurent Martin, né le 12 janvier 1975, est professeur à l'Université Lyon 2. Notre fille Camille, née le 15 avril 2008, est au Collège Jean Moulin.
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
Nome,Data di nascita,Indirizzo,Telefono,Email,Datore di lavoro,ID Dipendente,IBAN,Reparto,Posizione,Lingua,Stato lavorativo,Colore preferitoAnna-Lena Schmitt,1985-03-14,"Rosenstraße 12, 80331 München",+49 89 12345678,anna.schmitt85@gmail.com,"Müller & Partner GmbH",DE19854321,"DE89 3704 0044 0532 0130 00",Marketing,Projektmanagerin,Deutsch,Vollzeit,Blau
|
||||
John Michael Anderson,1990-08-22,"1250 W Addison St, Apt 4B, Chicago, IL 60613",+1 312-555-7890,jm.anderson90@yahoo.com,"ClearTech Solutions",US65432109,"US12 1234 5678 9012 3456 78",Engineering,Software Developer,English,Full-time,Green
|
||||
Sophie Martin,1978-07-02,"18 rue des Lilas, 69003 Lyon",+33 6 12 34 56 78,sophie.martin78@orange.fr,"Banque Populaire",FR30219876,"FR76 3000 6000 0112 3456 7890 189","Ressources humaines","Responsable RH",Français,"Temps plein",Violet
|
||||
Giulia Bianchi,1983-11-09,"Via della Pergola 25, 50121 Firenze",+39 055 1234567,giulia.bianchi83@libero.it,"Enel Energia",IT98127643,"IT60 X054 2811 1010 0000 0123 456",Amministrazione,Contabile,Italiano,"Tempo pieno",Rosso
|
||||
|
Can't render this file because it has a wrong number of fields in line 2.
|
|
|
@ -1,17 +0,0 @@
|
|||
[
|
||||
{
|
||||
"nome": "Giulia Bianchi",
|
||||
"data_nascita": "1983-11-09",
|
||||
"indirizzo": "Via della Pergola 25, 50121 Firenze",
|
||||
"telefono": "+39 055 1234567",
|
||||
"email": "giulia.bianchi83@libero.it",
|
||||
"datore_di_lavoro": "Enel Energia",
|
||||
"id_dipendente": "IT98127643",
|
||||
"iban": "IT60 X054 2811 1010 0000 0123 456",
|
||||
"reparto": "Amministrazione",
|
||||
"posizione": "Contabile",
|
||||
"lingua": "Italiano",
|
||||
"stato_lavorativo": "Tempo pieno",
|
||||
"colore_preferito": "Rosso"
|
||||
}
|
||||
]
|
||||
|
|
@ -1 +0,0 @@
|
|||
Mi chiamo Giulia Bianchi, sono nata il 9 novembre 1983 a Firenze. Abito in Via della Pergola 25, 50121 Firenze. Il mio numero di telefono è +39 055 1234567 e la mia email è giulia.bianchi83@libero.it. Lavoro presso Enel Energia, numero impiegata IT98127643. Il mio IBAN è IT60 X054 2811 1010 0000 0123 456. Mio marito Marco Rossi, nato il 30 giugno 1980, è ingegnere alla Piaggio. Nostro figlio Luca, nato il 22 febbraio 2014, frequenta la Scuola Elementare Galileo Galilei.
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
2025-06-07 18:12:07,994 - __main__ - INFO - Starting file processing...
|
||||
2025-06-07 18:12:07,995 - __main__ - INFO - Processing file: Case.md
|
||||
2025-06-07 18:12:08,020 - __main__ - INFO - Anonymization completed for Case.md
|
||||
2025-06-07 18:12:08,020 - __main__ - INFO - Processing file: customers.csv
|
||||
2025-06-07 18:12:08,050 - __main__ - INFO - Anonymization completed for customers.csv
|
||||
2025-06-07 18:12:08,050 - __main__ - INFO - Processing file: cv_lara_meier.txt
|
||||
2025-06-07 18:12:08,076 - __main__ - INFO - Anonymization completed for cv_lara_meier.txt
|
||||
2025-06-07 18:12:08,076 - __main__ - INFO - Processing file: employees.csv
|
||||
2025-06-07 18:12:08,098 - __main__ - INFO - Anonymization completed for employees.csv
|
||||
2025-06-07 18:12:08,099 - __main__ - INFO - Processing file: english.txt
|
||||
2025-06-07 18:12:08,119 - __main__ - INFO - Anonymization completed for english.txt
|
||||
2025-06-07 18:12:08,119 - __main__ - INFO - Processing file: example.json
|
||||
2025-06-07 18:12:08,143 - __main__ - INFO - Anonymization completed for example.json
|
||||
2025-06-07 18:12:08,143 - __main__ - INFO - Processing file: example.xml
|
||||
2025-06-07 18:12:08,174 - __main__ - INFO - Anonymization completed for example.xml
|
||||
2025-06-07 18:12:08,176 - __main__ - INFO - Processing file: french.txt
|
||||
2025-06-07 18:12:08,198 - __main__ - INFO - Anonymization completed for french.txt
|
||||
2025-06-07 18:12:08,199 - __main__ - INFO - Processing file: german.txt
|
||||
2025-06-07 18:12:08,222 - __main__ - INFO - Anonymization completed for german.txt
|
||||
2025-06-07 18:12:08,222 - __main__ - INFO - Processing file: geschaeftsstrategie.txt
|
||||
2025-06-07 18:12:08,251 - __main__ - INFO - Anonymization completed for geschaeftsstrategie.txt
|
||||
2025-06-07 18:12:08,251 - __main__ - INFO - Processing file: geschäfte.csv
|
||||
2025-06-07 18:12:08,281 - __main__ - INFO - Anonymization completed for geschäfte.csv
|
||||
2025-06-07 18:12:08,281 - __main__ - INFO - Processing file: italian.txt
|
||||
2025-06-07 18:12:08,310 - __main__ - INFO - Anonymization completed for italian.txt
|
||||
2025-06-07 18:12:08,311 - __main__ - INFO - Processing file: kunden.csv
|
||||
2025-06-07 18:12:08,346 - __main__ - INFO - Anonymization completed for kunden.csv
|
||||
2025-06-07 18:12:08,346 - __main__ - INFO - Processing file: mitarbeiter.csv
|
||||
2025-06-07 18:12:08,387 - __main__ - INFO - Anonymization completed for mitarbeiter.csv
|
||||
2025-06-07 18:12:08,387 - __main__ - INFO - Processing file: swiss.txt
|
||||
2025-06-07 18:12:08,427 - __main__ - INFO - Anonymization completed for swiss.txt
|
||||
2025-06-07 18:12:08,427 - __main__ - INFO - Processing file: transactions.csv
|
||||
2025-06-07 18:12:08,470 - __main__ - INFO - Anonymization completed for transactions.csv
|
||||
2025-06-07 18:12:08,470 - __main__ - INFO - Processing completed!
|
||||
|
|
@ -1,17 +1,17 @@
|
|||
timestamp,success,file_name,replaced_fields,content_type,headers,row_count
|
||||
2025-06-07T18:01:20.434978,True,Case.md,,text,,0
|
||||
2025-06-07T18:01:20.456980,True,customers.csv,address;phone;email;credit_card;name;ahv_number;iban,table,id;name;email;phone;address;iban;credit_card;ahv_number,5
|
||||
2025-06-07T18:01:20.478871,True,cv_lara_meier.txt,,text,,0
|
||||
2025-06-07T18:01:20.494871,True,employees.csv,first_name;last_name;phone;email;office_address;uid_number;bank_account,table,employee_id;first_name;last_name;email;phone;department;office_address;uid_number;bank_account,5
|
||||
2025-06-07T18:01:20.515325,True,english.txt,,text,,0
|
||||
2025-06-07T18:01:20.540410,True,example.json,,json
|
||||
2025-06-07T18:01:20.574010,True,example.xml,,xml
|
||||
2025-06-07T18:01:20.597827,True,french.txt,,text,,0
|
||||
2025-06-07T18:01:20.619198,True,german.txt,,text,,0
|
||||
2025-06-07T18:01:20.646196,True,geschaeftsstrategie.txt,,text,,0
|
||||
2025-06-07T18:01:20.673711,True,geschäfte.csv,datum;zahlungsdetails;kundenemail;kundenname;lieferadresse,table,geschäft_id;datum;kundenname;kundenemail;betrag;zahlungsmethode;zahlungsdetails;lieferadresse,5
|
||||
2025-06-07T18:01:20.703980,True,italian.txt,,text,,0
|
||||
2025-06-07T18:01:20.734539,True,kunden.csv,vorname;adresse;email;telefon;steuernummer;kreditkarte;nachname;iban,table,kunden_id;vorname;nachname;email;telefon;adresse;iban;kreditkarte;steuernummer,5
|
||||
2025-06-07T18:01:20.770287,True,mitarbeiter.csv,vorname;sozialversicherungsnummer;email;telefon;steuernummer;büroadresse;nachname,table,mitarbeiter_id;vorname;nachname;email;telefon;abteilung;büroadresse;steuernummer;sozialversicherungsnummer,5
|
||||
2025-06-07T18:01:20.809477,True,swiss.txt,,text,,0
|
||||
2025-06-07T18:01:20.852367,True,transactions.csv,shipping_address;customer_email;date;customer_name;payment_details,table,transaction_id;date;customer_name;customer_email;amount;payment_method;payment_details;shipping_address,5
|
||||
2025-06-07T18:12:08.019661,True,Case.md,,text,,0
|
||||
2025-06-07T18:12:08.040653,True,customers.csv,ahv_number;phone;credit_card;address;name;iban;email,table,id;name;email;phone;address;iban;credit_card;ahv_number,5
|
||||
2025-06-07T18:12:08.064201,True,cv_lara_meier.txt,,text,,0
|
||||
2025-06-07T18:12:08.080961,True,employees.csv,bank_account;first_name;last_name;office_address;phone;uid_number;email,table,employee_id;first_name;last_name;email;phone;department;office_address;uid_number;bank_account,5
|
||||
2025-06-07T18:12:08.101660,True,english.txt,,text,,0
|
||||
2025-06-07T18:12:08.125634,True,example.json,,json
|
||||
2025-06-07T18:12:08.157397,True,example.xml,,xml
|
||||
2025-06-07T18:12:08.178030,True,french.txt,,text,,0
|
||||
2025-06-07T18:12:08.201071,True,german.txt,,text,,0
|
||||
2025-06-07T18:12:08.228652,True,geschaeftsstrategie.txt,,text,,0
|
||||
2025-06-07T18:12:08.255652,True,geschäfte.csv,datum;kundenname;kundenemail;zahlungsdetails;lieferadresse,table,geschäft_id;datum;kundenname;kundenemail;betrag;zahlungsmethode;zahlungsdetails;lieferadresse,5
|
||||
2025-06-07T18:12:08.284172,True,italian.txt,,text,,0
|
||||
2025-06-07T18:12:08.314527,True,kunden.csv,kreditkarte;vorname;adresse;telefon;iban;steuernummer;email;nachname,table,kunden_id;vorname;nachname;email;telefon;adresse;iban;kreditkarte;steuernummer,5
|
||||
2025-06-07T18:12:08.349750,True,mitarbeiter.csv,vorname;büroadresse;telefon;steuernummer;email;sozialversicherungsnummer;nachname,table,mitarbeiter_id;vorname;nachname;email;telefon;abteilung;büroadresse;steuernummer;sozialversicherungsnummer,5
|
||||
2025-06-07T18:12:08.389324,True,swiss.txt,,text,,0
|
||||
2025-06-07T18:12:08.431916,True,transactions.csv,date;customer_email;payment_details;customer_name;shipping_address,table,transaction_id;date;customer_name;customer_email;amount;payment_method;payment_details;shipping_address,5
|
||||
|
|
|
|||
|
File diff suppressed because it is too large
Load diff
|
|
@ -138,8 +138,8 @@ class DataAnonymizer:
|
|||
replacement = self.mapping[matched_text]
|
||||
|
||||
if pattern_name == 'email':
|
||||
logger.debug(f"DEBUG: Replacing email '{matched_text}' with '{replacement}'")
|
||||
logger.debug(f"DEBUG: Text after replacement: {current_text[:start] + replacement + current_text[end:]}")
|
||||
print(f"DEBUG: Replacing email '{matched_text}' with '{replacement}'")
|
||||
print(f"DEBUG: Text after replacement: {current_text[:start] + replacement + current_text[end:]}")
|
||||
|
||||
# Replace the matched text while preserving surrounding whitespace
|
||||
current_text = current_text[:start] + replacement + current_text[end:]
|
||||
|
|
|
|||
Loading…
Reference in a new issue