chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,321 @@
{
"source_file": "CRH 23115483.pdf",
"total_replacements": 178,
"regex_replacements": 144,
"ner_replacements": 6,
"sweep_replacements": 28,
"entities_found": [
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002456746",
"replacement": "[CODE_BARRE_1]",
"source": "regex",
"category": "code_barre"
},
{
"original": "10002456746",
"replacement": "[CODE_BARRE_1]",
"source": "regex",
"category": "code_barre"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "Grellety",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9573414325714111
},
{
"original": "FOLFOX A",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9052630066871643
},
{
"original": "POTTIER Edgard",
"replacement": "[PATIENT_2]",
"source": "ner",
"score": 0.9913237690925598
},
{
"original": "Grellety",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.8851580619812012
},
{
"original": "POTTIER Edgard",
"replacement": "[PATIENT_2]",
"source": "ner",
"score": 0.9874683618545532
},
{
"original": "POTTIER EDGARD",
"replacement": "[PATIENT_2]",
"source": "ner",
"score": 0.996470034122467
}
]
}

View File

@@ -0,0 +1,381 @@
{
"source_file": "trackare-07010206-23115483_07010206_23115483.pdf",
"total_replacements": 549,
"regex_replacements": 220,
"ner_replacements": 20,
"sweep_replacements": 309,
"entities_found": [
{
"original": "07010206",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "07010206",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "07010206",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "07010206",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "07010206",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "07010206",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "07010206",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "07010206",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "07010206",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "07010206",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "07010206",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "07010206",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "07010206",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "07010206",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "07010206",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "07010206",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "07010206",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "07010206",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "07010206",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23115483",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23115483",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23115483",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23115483",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23115483",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23115483",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23115483",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23115483",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23115483",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23115483",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23115483",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23115483",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23115483",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23115483",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23115483",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23115483",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23115483",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23115483",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23115483",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "VENDOME",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "HC\nDossier Patient\nDétails des patients",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "BAYLE",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "8MG Caroline",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9827437996864319
},
{
"original": "SETOFILM",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9584081172943115
},
{
"original": "8MG Caroline",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9817571640014648
},
{
"original": "SETOFILM",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9592560529708862
},
{
"original": "MG Caroline",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9158672094345093
},
{
"original": "PRIMPERAN",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9325525760650635
},
{
"original": "Caroline GELULE",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9927664995193481
},
{
"original": "Caroline GELULE",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9821792244911194
},
{
"original": "MARGUERITE Caroline S",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9036006927490234
},
{
"original": "8MG Caroline",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9827437996864319
},
{
"original": "SETOFILM",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9584081172943115
},
{
"original": "8MG Caroline",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9817571640014648
},
{
"original": "SETOFILM",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9592560529708862
},
{
"original": "MG Caroline",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9154716730117798
},
{
"original": "PRIMPERAN",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9323108196258545
},
{
"original": "Caroline GELULE",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9921978712081909
},
{
"original": "Caroline GELULE",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9804865121841431
},
{
"original": "MARGUERITE Caroline S",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.866780698299408
},
{
"original": "Anne-Laure SORIN",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9968778491020203
},
{
"original": "Anne-Laure SORIN",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9902318120002747
}
]
}