chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,45 @@
{
"source_file": "CRO 23104446.pdf",
"total_replacements": 14,
"regex_replacements": 6,
"ner_replacements": 6,
"sweep_replacements": 2,
"entities_found": [
{
"original": "MARIE-THERESE DAGORRET",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9980680346488953
},
{
"original": "LAPISTOY",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9911671876907349
},
{
"original": "Marine",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9739218354225159
},
{
"original": "LEGAYE Aurélie",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9920905232429504
},
{
"original": "MARIE-THERESE DAGORRET",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9925521016120911
},
{
"original": "LAPISTOY",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9774647951126099
}
]
}

View File

@@ -0,0 +1,681 @@
{
"source_file": "trackare-03008209-23104446_03008209_23104446.pdf",
"total_replacements": 490,
"regex_replacements": 208,
"ner_replacements": 68,
"sweep_replacements": 214,
"entities_found": [
{
"original": "03008209",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "03008209",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "03008209",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "03008209",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "03008209",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "03008209",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "03008209",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "03008209",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "03008209",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "03008209",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "03008209",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "03008209",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "03008209",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "03008209",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "03008209",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "03008209",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "03008209",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "03008209",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "03008209",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "03008209",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23104446",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104446",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104446",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104446",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104446",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104446",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104446",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104446",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104446",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104446",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104446",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104446",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104446",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104446",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104446",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104446",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104446",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104446",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104446",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104446",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "MACAYE",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "GYNECOLOGIE BAYONNE - GYNECOLOGIE\nDossier Patient\nDétails des patients",
"replacement": "[PATIENT_3]",
"source": "regex",
"category": "patient"
},
{
"original": "315 MAISON GNAGNOIA Ville de résidence: ILHARRE",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "DEROURE",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8904464244842529
},
{
"original": "Myriam MAIA",
"replacement": "[SOIGNANT_17]",
"source": "ner",
"score": 0.9831429123878479
},
{
"original": "Myriam MAIA",
"replacement": "[SOIGNANT_17]",
"source": "ner",
"score": 0.9964204430580139
},
{
"original": "David MALABAT",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9988381862640381
},
{
"original": "David MALABAT",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9989517331123352
},
{
"original": "David MALABAT",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9982332587242126
},
{
"original": "David MALABAT",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9980830550193787
},
{
"original": "David MALABAT",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9971656799316406
},
{
"original": "David MALABAT",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.998489260673523
},
{
"original": "Myriam MAIA",
"replacement": "[SOIGNANT_17]",
"source": "ner",
"score": 0.9975073933601379
},
{
"original": "Myriam MAIA",
"replacement": "[SOIGNANT_17]",
"source": "ner",
"score": 0.9978794455528259
},
{
"original": "Myriam MAIA",
"replacement": "[SOIGNANT_17]",
"source": "ner",
"score": 0.9970610737800598
},
{
"original": "KADDOURI Nora",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9965422749519348
},
{
"original": "KADDOURI Nora",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9867899417877197
},
{
"original": "KADDOURI Nora",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9872907996177673
},
{
"original": "KARAM Lydia",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9602617025375366
},
{
"original": "KARAM Lydia",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.967383861541748
},
{
"original": "KARAM Lydia",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9776641130447388
},
{
"original": "Signé — DOULEUR",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.8552605509757996
},
{
"original": "KARAM Lydia",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9827429056167603
},
{
"original": "Myriam MAIA",
"replacement": "[SOIGNANT_17]",
"source": "ner",
"score": 0.9962542653083801
},
{
"original": "Myriam MAIA",
"replacement": "[SOIGNANT_17]",
"source": "ner",
"score": 0.9951801300048828
},
{
"original": "David MALABAT",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9987874031066895
},
{
"original": "David MALABAT",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.998649001121521
},
{
"original": "David MALABAT",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9981468915939331
},
{
"original": "David MALABAT",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9982117414474487
},
{
"original": "David MALABAT",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9975424408912659
},
{
"original": "David MALABAT",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9980124235153198
},
{
"original": "David MALABAT",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9981576204299927
},
{
"original": "David MALABAT",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9981698989868164
},
{
"original": "David MALABAT",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9980830550193787
},
{
"original": "David MALABAT",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9971656799316406
},
{
"original": "David MALABAT",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.998489260673523
},
{
"original": "Myriam MAIA",
"replacement": "[SOIGNANT_17]",
"source": "ner",
"score": 0.9975073933601379
},
{
"original": "Myriam MAIA",
"replacement": "[SOIGNANT_17]",
"source": "ner",
"score": 0.9978794455528259
},
{
"original": "Myriam MAIA",
"replacement": "[SOIGNANT_17]",
"source": "ner",
"score": 0.9970610737800598
},
{
"original": "KADDOURI Nora",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9965422749519348
},
{
"original": "KADDOURI Nora",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9933724999427795
},
{
"original": "KADDOURI Nora",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9926764369010925
},
{
"original": "KARAM Lydia",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9889391660690308
},
{
"original": "KARAM Lydia",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9883062243461609
},
{
"original": "KARAM Lydia",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9874131083488464
},
{
"original": "KARAM Lydia",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9843629598617554
},
{
"original": "Lise S",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9053037166595459
},
{
"original": "Lise SOINS",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9508402943611145
},
{
"original": "Lise SOINS",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9657267332077026
},
{
"original": "Lise SOINS",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.8864014744758606
},
{
"original": "Lise Pansement",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.8627662658691406
},
{
"original": "Lise SOINS",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9442092180252075
},
{
"original": "Lise SOINS",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.8830267190933228
},
{
"original": "Myriam TOILETTE",
"replacement": "[SOIGNANT_17]",
"source": "ner",
"score": 0.9857131242752075
},
{
"original": "David SOMMEIL",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9766817092895508
},
{
"original": "David POCHE A",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9379512071609497
},
{
"original": "Myriam S",
"replacement": "[SOIGNANT_17]",
"source": "ner",
"score": 0.9693726897239685
},
{
"original": "Myriam LEVER",
"replacement": "[SOIGNANT_17]",
"source": "ner",
"score": 0.9525647759437561
},
{
"original": "Lise BRANET",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.997637152671814
},
{
"original": "Marine BERTHOU",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9976328611373901
},
{
"original": "Lise BRANET",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9984577894210815
},
{
"original": "Nora BUZZIAN",
"replacement": "[SOIGNANT_16]",
"source": "ner",
"score": 0.9975428581237793
},
{
"original": "Lise BRANET",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9986082911491394
},
{
"original": "Lise BRANET",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9973065257072449
},
{
"original": "Camille ROUFIAT",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9958539605140686
},
{
"original": "Agnes SAINT-",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.822978138923645
},
{
"original": "M.ACHERITOGARAY",
"replacement": "[PERSONNE_6]",
"source": "ner",
"score": 0.8980792760848999
},
{
"original": "M.ACHERITOGARAY",
"replacement": "[PERSONNE_6]",
"source": "ner",
"score": 0.9331843256950378
},
{
"original": "Nora BUZZIAN",
"replacement": "[SOIGNANT_16]",
"source": "ner",
"score": 0.9612149000167847
},
{
"original": "Elodie JAYET",
"replacement": "[PERSONNE_7]",
"source": "ner",
"score": 0.8609872460365295
},
{
"original": "lapistoy andré",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9801124334335327
}
]
}