chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,27 @@
{
"source_file": "CRO-23084754.pdf",
"total_replacements": 17,
"regex_replacements": 6,
"ner_replacements": 3,
"sweep_replacements": 8,
"entities_found": [
{
"original": "HEMOLOCK",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8775874972343445
},
{
"original": "Callot",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9380627870559692
},
{
"original": "ANDRIANAVALOMIONONA",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9936531782150269
}
]
}

View File

@@ -0,0 +1,27 @@
{
"source_file": "CRO 23084754.pdf",
"total_replacements": 17,
"regex_replacements": 6,
"ner_replacements": 3,
"sweep_replacements": 8,
"entities_found": [
{
"original": "HEMOLOCK",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8775874972343445
},
{
"original": "Callot",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9380627870559692
},
{
"original": "ANDRIANAVALOMIONONA",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9936531782150269
}
]
}

View File

@@ -0,0 +1,711 @@
{
"source_file": "trackare-00253758-23084754_00253758_23084754.pdf",
"total_replacements": 1034,
"regex_replacements": 379,
"ner_replacements": 46,
"sweep_replacements": 609,
"entities_found": [
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "00253758",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23084754",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "99999",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "99999",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "ARBONNE",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "NGUYEN Philippe",
"replacement": "[MEDECIN_6]",
"source": "regex",
"category": "patient"
},
{
"original": "NGUYEN Philippe",
"replacement": "[MEDECIN_6]",
"source": "regex",
"category": "patient"
},
{
"original": "AGUIRRE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "ECHEVERRIA",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "AGUIRRE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "ECHEVERRIA",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "MARIE-THEREZE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "MARIE-THEREZE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "MARIE-THEREZE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "MARIE-THEREZE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "13 ALLEE DES CAMELIAS Ville de résidence: ANGLET",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "Paul Jean MATERNOWSKI",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9971949458122253
},
{
"original": "MATERNOWSKI",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9473606944084167
},
{
"original": "AGUIRRE",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.8010779619216919
},
{
"original": "GUILLEMAUD GUILLEMAUD",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9966429471969604
},
{
"original": "TARDIO Aurélia",
"replacement": "[SOIGNANT_17]",
"source": "ner",
"score": 0.9747874140739441
},
{
"original": "HENRIOT Jeremy",
"replacement": "[SOIGNANT_16]",
"source": "ner",
"score": 0.9938110113143921
},
{
"original": "HENRIOT Jeremy",
"replacement": "[SOIGNANT_16]",
"source": "ner",
"score": 0.99663907289505
},
{
"original": "HENRIOT Jeremy",
"replacement": "[SOIGNANT_16]",
"source": "ner",
"score": 0.9971486926078796
},
{
"original": "HENRIOT Jeremy",
"replacement": "[SOIGNANT_16]",
"source": "ner",
"score": 0.9803449511528015
},
{
"original": "HENRIOT Jeremy",
"replacement": "[SOIGNANT_16]",
"source": "ner",
"score": 0.9781505465507507
},
{
"original": "Admin",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.8571096658706665
},
{
"original": "HENRIOT Jeremy S",
"replacement": "[SOIGNANT_16]",
"source": "ner",
"score": 0.9600256085395813
},
{
"original": "HENRIOT Jeremy",
"replacement": "[SOIGNANT_16]",
"source": "ner",
"score": 0.9958184957504272
},
{
"original": "LACLAU- S",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9071234464645386
},
{
"original": "LACROUTS",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9170938730239868
},
{
"original": "LACLAU",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9274430274963379
},
{
"original": "HENRIOT Jeremy",
"replacement": "[SOIGNANT_16]",
"source": "ner",
"score": 0.9667332768440247
},
{
"original": "HENRIOT Jeremy",
"replacement": "[SOIGNANT_16]",
"source": "ner",
"score": 0.9970366358757019
},
{
"original": "HENRIOT Jeremy",
"replacement": "[SOIGNANT_16]",
"source": "ner",
"score": 0.9972242116928101
},
{
"original": "HENRIOT Jeremy",
"replacement": "[SOIGNANT_16]",
"source": "ner",
"score": 0.9805771112442017
},
{
"original": "Admin",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.8581966757774353
},
{
"original": "HENRIOT Jeremy S",
"replacement": "[SOIGNANT_16]",
"source": "ner",
"score": 0.9437782168388367
},
{
"original": "Paul Jean Signé",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9318066835403442
},
{
"original": "Paul Jean 100",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.8514785766601562
},
{
"original": "Paul Jean Réalisé",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9367172718048096
},
{
"original": "Paul Jean",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9604936838150024
},
{
"original": "Paul Jean",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9185220003128052
},
{
"original": "Paul Jean",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.8356407284736633
},
{
"original": "Paul Jean",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9325695037841797
},
{
"original": "Paul Jean",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9086077809333801
},
{
"original": "Paul Jean",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.8616173267364502
},
{
"original": "Sophie KT COURT",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.9831474423408508
},
{
"original": "Sophie KT COURT",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.9798388481140137
},
{
"original": "Paul Jean",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9354991912841797
},
{
"original": "HENRIOT",
"replacement": "[SOIGNANT_16]",
"source": "ner",
"score": 0.9973776340484619
},
{
"original": "Paul Jean MATERNOWSKI",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9987330436706543
},
{
"original": "Paul Jean MATERNOWSKI",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9989485144615173
},
{
"original": "Paul Jean MATERNOWSKI",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9990648031234741
},
{
"original": "Paul Jean MATERNOWSKI",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9978137612342834
},
{
"original": "Paul Jean MATERNOWSKI",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9977149963378906
},
{
"original": "Paul Jean MATERNOWSKI",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9975112676620483
},
{
"original": "Paul Jean",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.8704956769943237
},
{
"original": "Nathalie DUHAU",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9966807961463928
},
{
"original": "Amélie DEBIEZ",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9964140057563782
},
{
"original": "NGUYEN Philippe",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9823210835456848
},
{
"original": "Iñaki",
"replacement": "[PERSONNE_6]",
"source": "ner",
"score": 0.9872614741325378
}
]
}