chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,459 @@
{
"source_file": "CRH 23076141.pdf",
"total_replacements": 245,
"regex_replacements": 214,
"ner_replacements": 7,
"sweep_replacements": 24,
"entities_found": [
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100292134",
"replacement": "[CODE_BARRE_1]",
"source": "regex",
"category": "code_barre"
},
{
"original": "10100292134",
"replacement": "[CODE_BARRE_1]",
"source": "regex",
"category": "code_barre"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "J. LOEB",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9878210425376892
},
{
"original": "ROLLET Marie-jeanne",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.987621009349823
},
{
"original": "PANNECAU",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.8655059933662415
},
{
"original": "J. LOEB",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.890143096446991
},
{
"original": "J. LOEB",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9878210425376892
},
{
"original": "Dr M.",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9154891967773438
},
{
"original": "ROLLET Marie-jeanne",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9869318008422852
}
]
}

View File

@@ -0,0 +1,471 @@
{
"source_file": "trackare-99249436-23076141_99249436_23076141.pdf",
"total_replacements": 535,
"regex_replacements": 251,
"ner_replacements": 29,
"sweep_replacements": 255,
"entities_found": [
{
"original": "99249436",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "99249436",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "99249436",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "99249436",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "99249436",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "99249436",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "99249436",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "99249436",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "99249436",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "99249436",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "99249436",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "99249436",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "99249436",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "99249436",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "99249436",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "99249436",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "99249436",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "99249436",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "99249436",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23076141",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23076141",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23076141",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23076141",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23076141",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23076141",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23076141",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23076141",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23076141",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23076141",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23076141",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23076141",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23076141",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23076141",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23076141",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23076141",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23076141",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23076141",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23076141",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "242122424000575",
"replacement": "[NIR_1]",
"source": "regex",
"category": "nir"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "24240",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "LIMEUIL",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "HC\nDossier Patient\nDétails des patients",
"replacement": "[PATIENT_3]",
"source": "regex",
"category": "patient"
},
{
"original": "ROLLET",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "CHASSAGNE",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "MARIE-JEANNE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "MARIE-JEANNE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "70 ALLEE BORDENAVE Ville de résidence: ANGLET",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "GUILLEMAUD DEROURE JAOUEN",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9981071949005127
},
{
"original": "Hedy NEMEUR",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9985736608505249
},
{
"original": "THOMAS Chloé",
"replacement": "[SOIGNANT_23]",
"source": "ner",
"score": 0.9655766487121582
},
{
"original": "Wirsung",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.8040562868118286
},
{
"original": "Wirsung",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.8831466436386108
},
{
"original": "SOL NIVET Dorothée",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9110928177833557
},
{
"original": "IVET Dorothée",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.8166471719741821
},
{
"original": "BOUBE Mathilde",
"replacement": "[MEDECIN_7]",
"source": "ner",
"score": 0.809947669506073
},
{
"original": "PONTIER Bénédicte",
"replacement": "[MEDECIN_10]",
"source": "ner",
"score": 0.9862568974494934
},
{
"original": "GAUTHIER Isabelle",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9964271783828735
},
{
"original": "SOL NIVET Dorothée",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9120680689811707
},
{
"original": "IVET Dorothée",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.8201793432235718
},
{
"original": "BOUBE Mathilde",
"replacement": "[MEDECIN_7]",
"source": "ner",
"score": 0.9036867022514343
},
{
"original": "Mathilde PELL",
"replacement": "[MEDECIN_7]",
"source": "ner",
"score": 0.8758963942527771
},
{
"original": "BOUBE Mathilde",
"replacement": "[MEDECIN_7]",
"source": "ner",
"score": 0.8430468440055847
},
{
"original": "BOUBE Mathilde",
"replacement": "[MEDECIN_7]",
"source": "ner",
"score": 0.9753868579864502
},
{
"original": "PAR",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.8388725519180298
},
{
"original": "BOUBE Mathilde",
"replacement": "[MEDECIN_7]",
"source": "ner",
"score": 0.805560290813446
},
{
"original": "PONTIER Bénédicte S",
"replacement": "[MEDECIN_10]",
"source": "ner",
"score": 0.919536292552948
},
{
"original": "Isabelle SOMMEIL",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.8948761820793152
},
{
"original": "Isabelle REPAS",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9622352719306946
},
{
"original": "Isabelle",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9068058729171753
},
{
"original": "Isabelle",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.810939610004425
},
{
"original": "Eva LEPLEY",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9759193658828735
},
{
"original": "Sarah DUTREY",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9986661672592163
},
{
"original": "Sarah DUTREY",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9980900287628174
},
{
"original": "Sarah DUTREY",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9885814189910889
},
{
"original": "Evane BOULARD",
"replacement": "[SOIGNANT_10]",
"source": "ner",
"score": 0.9129526019096375
},
{
"original": "Pannecau",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.8290410041809082
}
]
}