chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,495 @@
{
"source_file": "CRH 23100690.pdf",
"total_replacements": 277,
"regex_replacements": 218,
"ner_replacements": 6,
"sweep_replacements": 53,
"entities_found": [
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002111572",
"replacement": "[CODE_BARRE_1]",
"source": "regex",
"category": "code_barre"
},
{
"original": "10002111572",
"replacement": "[CODE_BARRE_1]",
"source": "regex",
"category": "code_barre"
},
{
"original": "10002111572",
"replacement": "[CODE_BARRE_1]",
"source": "regex",
"category": "code_barre"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "MARTIN Lilian",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9364063739776611
},
{
"original": "J. LOEB",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9565077424049377
},
{
"original": "Grellety",
"replacement": "[ADRESSE_2]",
"source": "ner",
"score": 0.962624728679657
},
{
"original": "J. LOEB",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9431359767913818
},
{
"original": "MARTIN Lilian",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9641620516777039
},
{
"original": "MARTIN Lilian",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9818165898323059
}
]
}

View File

@@ -0,0 +1,501 @@
{
"source_file": "trackare-BA053801-23100690_BA053801_23100690.pdf",
"total_replacements": 918,
"regex_replacements": 357,
"ner_replacements": 37,
"sweep_replacements": 524,
"entities_found": [
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23100690",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "99999",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "99999",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "HAARLEM",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "HC\nDossier Patient\nDétails des patients",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "MARTIN",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "MARTIN",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "MARTIN",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "MARTIN",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "MARTIN",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "LILIAN",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "LILIAN",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "LILIAN",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "LILIAN",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "LILIAN",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "148 AVENUE JEAN JAURES Ville de résidence: TARNOS",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "Marion PUJOS",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9894607663154602
},
{
"original": "BURTIN",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.989393949508667
},
{
"original": "SAUTIER Aurore",
"replacement": "[SOIGNANT_19]",
"source": "ner",
"score": 0.9749075174331665
},
{
"original": "SOL GUILNGAR Anne",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.849865198135376
},
{
"original": "GUILNGAR Anne",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.8478538393974304
},
{
"original": "VERGEZ Magali",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.9937296509742737
},
{
"original": "VERGEZ Magali",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.984062671661377
},
{
"original": "VERGEZ Magali",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.8882125020027161
},
{
"original": "Florence BIVES",
"replacement": "[MEDECIN_11]",
"source": "ner",
"score": 0.9950606822967529
},
{
"original": "VERGEZ Magali",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.8729881644248962
},
{
"original": "Florence TOILETTE",
"replacement": "[MEDECIN_11]",
"source": "ner",
"score": 0.9881629347801208
},
{
"original": "Aurore SOMMEIL",
"replacement": "[SOIGNANT_19]",
"source": "ner",
"score": 0.994674563407898
},
{
"original": "Segolene BANDELETTE URINAIRE",
"replacement": "[MEDECIN_11]",
"source": "ner",
"score": 0.8850006461143494
},
{
"original": "BANDELETTE URINAIRE",
"replacement": "[MEDECIN_11]",
"source": "ner",
"score": 0.9217853546142578
},
{
"original": "DR.",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.856285810470581
},
{
"original": "Stéphane Signé",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9980824589729309
},
{
"original": "Stéphane Signé",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9978163242340088
},
{
"original": "Stéphane Signé",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9973459839820862
},
{
"original": "Stéphane Signé",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9972022175788879
},
{
"original": "Florence A JEUN Signé",
"replacement": "[MEDECIN_11]",
"source": "ner",
"score": 0.9663448929786682
},
{
"original": "Marion PUJOS",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9644891023635864
},
{
"original": "Capucine CHAPPE",
"replacement": "[SOIGNANT_10]",
"source": "ner",
"score": 0.9671915173530579
},
{
"original": "Lucile CHEVALIER",
"replacement": "[SOIGNANT_9]",
"source": "ner",
"score": 0.9965578317642212
},
{
"original": "Segolene RODRIGO",
"replacement": "[MEDECIN_19]",
"source": "ner",
"score": 0.999019980430603
},
{
"original": "VENLAFAXINE ARW Segolene",
"replacement": "[MEDECIN_19]",
"source": "ner",
"score": 0.8128743171691895
},
{
"original": "Florence BIVES",
"replacement": "[MEDECIN_11]",
"source": "ner",
"score": 0.9912522435188293
},
{
"original": "OLIVIA LALANNE",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.96076500415802
},
{
"original": "Anne-Laure SORIN",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9962477684020996
},
{
"original": "OLIVIA LALANNE",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9970515370368958
},
{
"original": "Anne-Laure SORIN",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9978489875793457
},
{
"original": "OLIVIA LALANNE",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.8911562561988831
},
{
"original": "Anne-Laure SORIN",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9976954460144043
},
{
"original": "OLIVIA LALANNE",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9803857803344727
},
{
"original": "Anne-Laure SORIN",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.996042788028717
},
{
"original": "Segolene RODRIGO",
"replacement": "[MEDECIN_19]",
"source": "ner",
"score": 0.9649170637130737
},
{
"original": "GUILINGARD",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.918915867805481
},
{
"original": "Charles De Gaulle",
"replacement": "[PERSONNE_6]",
"source": "ner",
"score": 0.9074727892875671
}
]
}