chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,645 @@
{
"source_file": "CRH 23077240.pdf",
"total_replacements": 239,
"regex_replacements": 219,
"ner_replacements": 4,
"sweep_replacements": 16,
"entities_found": [
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "10102326468",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100422012",
"replacement": "[RPPS_2]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100673481",
"replacement": "[RPPS_3]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100402527",
"replacement": "[RPPS_4]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002083912",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "rpps"
},
{
"original": "10004401718",
"replacement": "[RPPS_6]",
"source": "regex",
"category": "rpps"
},
{
"original": "10000598366",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102326468",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100422012",
"replacement": "[RPPS_2]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100673481",
"replacement": "[RPPS_3]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100402527",
"replacement": "[RPPS_4]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002083912",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "rpps"
},
{
"original": "10004401718",
"replacement": "[RPPS_6]",
"source": "regex",
"category": "rpps"
},
{
"original": "10000598366",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102326468",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100422012",
"replacement": "[RPPS_2]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100673481",
"replacement": "[RPPS_3]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100402527",
"replacement": "[RPPS_4]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002083912",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "rpps"
},
{
"original": "10004401718",
"replacement": "[RPPS_6]",
"source": "regex",
"category": "rpps"
},
{
"original": "10000598366",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102326468",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100422012",
"replacement": "[RPPS_2]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100673481",
"replacement": "[RPPS_3]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100402527",
"replacement": "[RPPS_4]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002083912",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "rpps"
},
{
"original": "10004401718",
"replacement": "[RPPS_6]",
"source": "regex",
"category": "rpps"
},
{
"original": "10000598366",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102326468",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100422012",
"replacement": "[RPPS_2]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100673481",
"replacement": "[RPPS_3]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100402527",
"replacement": "[RPPS_4]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002083912",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "rpps"
},
{
"original": "10004401718",
"replacement": "[RPPS_6]",
"source": "regex",
"category": "rpps"
},
{
"original": "10000598366",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102326468",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100422012",
"replacement": "[RPPS_2]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100673481",
"replacement": "[RPPS_3]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100402527",
"replacement": "[RPPS_4]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002083912",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "rpps"
},
{
"original": "10000598366",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002083912",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "code_barre"
},
{
"original": "10002083912",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "code_barre"
},
{
"original": "secr.csmed@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.endocrino@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "arajot@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "mauzi@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "mgschwind@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "emauryy@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "lritz@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "ddemarsy@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "sbordes-couecou@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "secr.csmed@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.endocrino@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "arajot@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "mauzi@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "mgschwind@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "emauryy@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "lritz@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "ddemarsy@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "sbordes-couecou@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "secr.csmed@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.endocrino@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "arajot@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "mauzi@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "mgschwind@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "emauryy@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "lritz@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "ddemarsy@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "sbordes-couecou@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "secr.csmed@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.endocrino@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "arajot@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "mauzi@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "mgschwind@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "emauryy@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "lritz@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "ddemarsy@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "sbordes-couecou@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "secr.csmed@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.endocrino@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "arajot@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "mauzi@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "mgschwind@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "emauryy@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "lritz@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "ddemarsy@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "sbordes-couecou@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "secr.csmed@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.endocrino@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "arajot@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "mauzi@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "mgschwind@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "emauryy@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "lritz@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "sbordes-couecou@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "LOUTRE Anne-marie",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9657219052314758
},
{
"original": "QUILLACQ",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.803705096244812
},
{
"original": "J. LOEB",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.814069926738739
},
{
"original": "LOUTRE Anne-marie",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9931214451789856
}
]
}

View File

@@ -0,0 +1,405 @@
{
"source_file": "trackare-BA184895-23077240_BA184895_23077240.pdf",
"total_replacements": 648,
"regex_replacements": 248,
"ner_replacements": 34,
"sweep_replacements": 366,
"entities_found": [
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23077240",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "260039935043584",
"replacement": "[NIR_1]",
"source": "regex",
"category": "nir"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "99350",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "BENI MELLAL",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "BENQUET Delphine",
"replacement": "[MEDECIN_1]",
"source": "regex",
"category": "patient"
},
{
"original": "MEDECINE ENDOCRINOLOGIE - Endocrino - Diabétologie\nDossier Patient\nDétails des patients",
"replacement": "[MEDECIN_6]",
"source": "regex",
"category": "patient"
},
{
"original": "LOUTRE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "CAMPIGLIA",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "ANNE-MARIE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "ANNE-MARIE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "10 AV OIHAN ARGI VILLA SYMPHONIE Ville de résidence: ST PIERRE D IRUBE",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "GUILLEMAUD GUILLEMAUD",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9987524747848511
},
{
"original": "SCANNER THORACO-ABDOMINO-PELVIEN",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9888858795166016
},
{
"original": "RITZ-QUILLACQ Laurence S",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9791138768196106
},
{
"original": "RITZ-QUILLACQ",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9972130656242371
},
{
"original": "RITZ-QUILLACQ Laurence",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9963743090629578
},
{
"original": "RITZ-QUILLACQ Laurence",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9940347671508789
},
{
"original": "DEMARSY Delphine S",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.897955060005188
},
{
"original": "RITZ-QUILLACQ Laurence Signé",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9529110193252563
},
{
"original": "RITZ-QUILLACQ Laurence",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9948678016662598
},
{
"original": "RITZ-QUILLACQ Laurence",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9982442855834961
},
{
"original": "DEMARSY Delphine S",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.8895190954208374
},
{
"original": "RITZ- QUILLACQ",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9588702321052551
},
{
"original": "QUILLACQ",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9748414754867554
},
{
"original": "RITZ",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9942448139190674
},
{
"original": "QUILLACQ",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9971765875816345
},
{
"original": "RITZ",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9962194561958313
},
{
"original": "QUILLACQ",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9967455267906189
},
{
"original": "RITZ",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9951821565628052
},
{
"original": "QUILLACQ DR",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9739920496940613
},
{
"original": "RI",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.8423864245414734
},
{
"original": "QUILLACQ DR",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9581697583198547
},
{
"original": "RITZ",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9823956489562988
},
{
"original": "Laurence Signé",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9617998600006104
},
{
"original": "QUILLACQ DR",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9054034352302551
},
{
"original": "QUILLACQ DR",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9777330160140991
},
{
"original": "QUILLACQ DR",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9017960429191589
},
{
"original": "QUILLACQ",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9302743077278137
},
{
"original": "QUILLACQ",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9793306589126587
},
{
"original": "Me L",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.8923859596252441
},
{
"original": "Mathieu",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9264497756958008
},
{
"original": "Xabi BERRO",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9923381805419922
},
{
"original": "veunac",
"replacement": "[PERSONNE_6]",
"source": "ner",
"score": 0.9882298707962036
},
{
"original": "berneau",
"replacement": "[PERSONNE_7]",
"source": "ner",
"score": 0.9889025688171387
},
{
"original": "Jacques Laffitte",
"replacement": "[PERSONNE_8]",
"source": "ner",
"score": 0.9832095503807068
}
]
}