chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,537 @@
{
"source_file": "CRH 23106204.pdf",
"total_replacements": 313,
"regex_replacements": 229,
"ner_replacements": 6,
"sweep_replacements": 78,
"entities_found": [
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "10110601324",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101096005",
"replacement": "[RPPS_2]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100163277",
"replacement": "[RPPS_3]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101639580",
"replacement": "[RPPS_4]",
"source": "regex",
"category": "rpps"
},
{
"original": "10004606595",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101480506",
"replacement": "[RPPS_6]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100858090",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102268702",
"replacement": "[RPPS_8]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102272209",
"replacement": "[RPPS_9]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101856135",
"replacement": "[RPPS_10]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002815024",
"replacement": "[RPPS_11]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100817005",
"replacement": "[RPPS_12]",
"source": "regex",
"category": "rpps"
},
{
"original": "10110601324",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101096005",
"replacement": "[RPPS_2]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100163277",
"replacement": "[RPPS_3]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101639580",
"replacement": "[RPPS_4]",
"source": "regex",
"category": "rpps"
},
{
"original": "10004606595",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101480506",
"replacement": "[RPPS_6]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100858090",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102268702",
"replacement": "[RPPS_8]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102272209",
"replacement": "[RPPS_9]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101856135",
"replacement": "[RPPS_10]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002815024",
"replacement": "[RPPS_11]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100817005",
"replacement": "[RPPS_12]",
"source": "regex",
"category": "rpps"
},
{
"original": "10110601324",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101096005",
"replacement": "[RPPS_2]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100163277",
"replacement": "[RPPS_3]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101639580",
"replacement": "[RPPS_4]",
"source": "regex",
"category": "rpps"
},
{
"original": "10004606595",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101480506",
"replacement": "[RPPS_6]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100858090",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102268702",
"replacement": "[RPPS_8]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102272209",
"replacement": "[RPPS_9]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002815024",
"replacement": "[RPPS_11]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100817005",
"replacement": "[RPPS_12]",
"source": "regex",
"category": "rpps"
},
{
"original": "10110601324",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101096005",
"replacement": "[RPPS_2]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100163277",
"replacement": "[RPPS_3]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101639580",
"replacement": "[RPPS_4]",
"source": "regex",
"category": "rpps"
},
{
"original": "10004606595",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101480506",
"replacement": "[RPPS_6]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100858090",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102268702",
"replacement": "[RPPS_8]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102272209",
"replacement": "[RPPS_9]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101856135",
"replacement": "[RPPS_10]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002815024",
"replacement": "[RPPS_11]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100817005",
"replacement": "[RPPS_12]",
"source": "regex",
"category": "rpps"
},
{
"original": "10110601324",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101096005",
"replacement": "[RPPS_2]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100163277",
"replacement": "[RPPS_3]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101639580",
"replacement": "[RPPS_4]",
"source": "regex",
"category": "rpps"
},
{
"original": "10004606595",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101480506",
"replacement": "[RPPS_6]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100858090",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102268702",
"replacement": "[RPPS_8]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102272209",
"replacement": "[RPPS_9]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101856135",
"replacement": "[RPPS_10]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002815024",
"replacement": "[RPPS_11]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100817005",
"replacement": "[RPPS_12]",
"source": "regex",
"category": "rpps"
},
{
"original": "10110601324",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101096005",
"replacement": "[RPPS_2]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100163277",
"replacement": "[RPPS_3]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101639580",
"replacement": "[RPPS_4]",
"source": "regex",
"category": "rpps"
},
{
"original": "10004606595",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101480506",
"replacement": "[RPPS_6]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100858090",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102268702",
"replacement": "[RPPS_8]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102272209",
"replacement": "[RPPS_9]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002815024",
"replacement": "[RPPS_11]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100817005",
"replacement": "[RPPS_12]",
"source": "regex",
"category": "rpps"
},
{
"original": "secr.medint@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.medint@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.medint@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.medint@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.medint@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.medint@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "Babinski",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9955669641494751
},
{
"original": "Schaeverbeke",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9974033236503601
},
{
"original": "GONZALEZ Isabelle",
"replacement": "[MEDECIN_2]",
"source": "ner",
"score": 0.9948249459266663
},
{
"original": "Babinski",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9955669641494751
},
{
"original": "Schaeverbeke",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9974033236503601
},
{
"original": "GONZALEZ Isabelle",
"replacement": "[MEDECIN_2]",
"source": "ner",
"score": 0.995349645614624
}
]
}

View File

@@ -0,0 +1,471 @@
{
"source_file": "trackare-BA038066-23106204_BA038066_23106204.pdf",
"total_replacements": 423,
"regex_replacements": 226,
"ner_replacements": 26,
"sweep_replacements": 171,
"entities_found": [
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23106204",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "274126410218232",
"replacement": "[NIR_1]",
"source": "regex",
"category": "nir"
},
{
"original": "274126410218232",
"replacement": "[NIR_1]",
"source": "regex",
"category": "nir"
},
{
"original": "274126410218232",
"replacement": "[NIR_1]",
"source": "regex",
"category": "nir"
},
{
"original": "274126410218232",
"replacement": "[NIR_1]",
"source": "regex",
"category": "nir"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "64102",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "64102",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "64102",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "64102",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "BAYONNE",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "MED INTERNE RHUMATOLOGIE - RHUMATOLOGIE\nDossier Patient\nDétails des patients",
"replacement": "[PATIENT_3]",
"source": "regex",
"category": "patient"
},
{
"original": "GONZALEZ",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "SOUQUIERES",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "GONZALEZ",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "SOUQUIERES",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "GONZALEZ",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "SOUQUIERES",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "GONZALEZ",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "SOUQUIERES",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "ISABELLE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "ISABELLE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "ISABELLE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "ISABELLE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "ISABELLE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "ISABELLE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "ISABELLE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "ISABELLE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "238 COTE DE NOGARET Ville de résidence: HASTINGUES",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "JAOUEN BURTIN BURTIN",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9887604713439941
},
{
"original": "Kappa",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9625274538993835
},
{
"original": "Hutchison",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9971087574958801
},
{
"original": "Kappa",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9603561758995056
},
{
"original": "ABDOMINO-PELVIEN",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9635387063026428
},
{
"original": "VILLETTE Paul-Emile S",
"replacement": "[SOIGNANT_4]",
"source": "ner",
"score": 0.9736319780349731
},
{
"original": "VILLETTE Paul-Emile Signé",
"replacement": "[SOIGNANT_4]",
"source": "ner",
"score": 0.9411766529083252
},
{
"original": "VILLETTE Paul-Emile",
"replacement": "[SOIGNANT_4]",
"source": "ner",
"score": 0.9929646253585815
},
{
"original": "VILLETTE Paul-Emile",
"replacement": "[SOIGNANT_4]",
"source": "ner",
"score": 0.9638868570327759
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[SOIGNANT_4]",
"source": "ner",
"score": 0.9982150197029114
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[SOIGNANT_4]",
"source": "ner",
"score": 0.9957097172737122
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[SOIGNANT_4]",
"source": "ner",
"score": 0.9979967474937439
},
{
"original": "Paul-Emile",
"replacement": "[SOIGNANT_4]",
"source": "ner",
"score": 0.9166670441627502
},
{
"original": "Babinski",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9952602386474609
},
{
"original": "Schaverberke",
"replacement": "[PERSONNE_6]",
"source": "ner",
"score": 0.9971520900726318
},
{
"original": "Babinski",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9905867576599121
},
{
"original": "Humira",
"replacement": "[PERSONNE_7]",
"source": "ner",
"score": 0.9053311347961426
},
{
"original": "Schaeverbeke",
"replacement": "[PERSONNE_8]",
"source": "ner",
"score": 0.9973140954971313
},
{
"original": "ARDILOUZE",
"replacement": "[MEDECIN_8]",
"source": "ner",
"score": 0.9717352390289307
},
{
"original": "Paul-Emile",
"replacement": "[SOIGNANT_4]",
"source": "ner",
"score": 0.82752925157547
},
{
"original": "Babinski",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9901454448699951
},
{
"original": "Humira",
"replacement": "[PERSONNE_7]",
"source": "ner",
"score": 0.968502938747406
},
{
"original": "Schaeverbeke",
"replacement": "[PERSONNE_8]",
"source": "ner",
"score": 0.9972837567329407
},
{
"original": "Babinski",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9958868026733398
},
{
"original": "Schaeverbeke",
"replacement": "[PERSONNE_8]",
"source": "ner",
"score": 0.9972837567329407
},
{
"original": "Epoux gonzalez xavier",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9548711776733398
}
]
}