chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,513 @@
{
"source_file": "CRH 23090597.pdf",
"total_replacements": 238,
"regex_replacements": 208,
"ner_replacements": 12,
"sweep_replacements": 18,
"entities_found": [
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "Grellety",
"replacement": "[MEDECIN_2]",
"source": "ner",
"score": 0.9527943134307861
},
{
"original": "ne J. LOEB",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.863017201423645
},
{
"original": "Grellety",
"replacement": "[MEDECIN_2]",
"source": "ner",
"score": 0.9806966781616211
},
{
"original": "neau",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.8127477169036865
},
{
"original": "DARRIGADE Bernard",
"replacement": "[PATIENT_2]",
"source": "ner",
"score": 0.992107093334198
},
{
"original": "BARTHASSOT",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9376484155654907
},
{
"original": "Grellety",
"replacement": "[MEDECIN_2]",
"source": "ner",
"score": 0.9527943134307861
},
{
"original": "ne J. LOEB",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.863017201423645
},
{
"original": "Grellety",
"replacement": "[MEDECIN_2]",
"source": "ner",
"score": 0.9806966781616211
},
{
"original": "neau",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.8127477169036865
},
{
"original": "DARRIGADE Bernard",
"replacement": "[PATIENT_2]",
"source": "ner",
"score": 0.992107093334198
},
{
"original": "RUE BARTHASSOT",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.8546881675720215
}
]
}

View File

@@ -0,0 +1,411 @@
{
"source_file": "trackare-BA042686-23090597_BA042686_23090597.pdf",
"total_replacements": 552,
"regex_replacements": 187,
"ner_replacements": 30,
"sweep_replacements": 335,
"entities_found": [
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23090597",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "156114031202512",
"replacement": "[NIR_1]",
"source": "regex",
"category": "nir"
},
{
"original": "156114031202512",
"replacement": "[NIR_1]",
"source": "regex",
"category": "nir"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "40312",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "40312",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "TARNOS",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "HC\nDossier Patient\nDétails des patients",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "DARRIGADE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "DARRIGADE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "DARRIGADE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "DARRIGADE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "BERNARD",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "BERNARD",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "BERNARD",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "BERNARD",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "4 RUE DU 19 MARS 1962 Ville de résidence: BOUCAU",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "JAOUEN BURTIN BURTIN",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9678107500076294
},
{
"original": "MELAINE Léna GELULE",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.9319809675216675
},
{
"original": "ELAINE Léna GELULE",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.9075828790664673
},
{
"original": "MELAINE Léna Signé",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.91854327917099
},
{
"original": "MELAINE Léna",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.9293058514595032
},
{
"original": "Léna GELULE",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.9941021203994751
},
{
"original": "Léna GELULE",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.993756115436554
},
{
"original": "MELAINE Léna",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.9556295275688171
},
{
"original": "MELAINE Léna Signé",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.9171546101570129
},
{
"original": "Capucine CHAPPE",
"replacement": "[MEDECIN_7]",
"source": "ner",
"score": 0.992388904094696
},
{
"original": "Léna MELAINE",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.9976258277893066
},
{
"original": "Léna MELAINE",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.997351348400116
},
{
"original": "Léna MELAINE",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.9589370489120483
},
{
"original": "Capucine KWIKPEN",
"replacement": "[MEDECIN_7]",
"source": "ner",
"score": 0.973297119140625
},
{
"original": "Léna MELAINE",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.998430609703064
},
{
"original": "Léna MELAINE",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.9982582926750183
},
{
"original": "Léna MELAINE",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.9984056353569031
},
{
"original": "Léna MELAINE",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.9985054135322571
},
{
"original": "Léna MELAINE",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.9983503222465515
},
{
"original": "Léna MELAINE",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.9989615678787231
},
{
"original": "Léna MELAINE",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.998916745185852
},
{
"original": "Léna MELAINE",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.998957633972168
},
{
"original": "LANSOPRAZOLE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9821999669075012
},
{
"original": "Léna MELAINE",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.9764553904533386
},
{
"original": "Tomas AINCIART",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.993716299533844
},
{
"original": "Juliette GENDRE",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9821590185165405
},
{
"original": "Tiffeneau",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9458867907524109
},
{
"original": "Léna MELAINE",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.9310070872306824
},
{
"original": "Léna MELAINE",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.9921495318412781
},
{
"original": "Tiffeneau",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9776116013526917
}
]
}