chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,57 @@
{
"source_file": "CRH 23079402.pdf",
"total_replacements": 141,
"regex_replacements": 127,
"ner_replacements": 7,
"sweep_replacements": 7,
"entities_found": [
{
"original": "10101772613",
"replacement": "[CODE_BARRE_1]",
"source": "regex",
"category": "code_barre"
},
{
"original": "BARBEYRAC",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.991329550743103
},
{
"original": "J. LOEB",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.8698606491088867
},
{
"original": "M.PUSTETTO",
"replacement": "[PATIENT_5]",
"source": "ner",
"score": 0.9867308735847473
},
{
"original": "Mr K. GLADELPédiat",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9006365537643433
},
{
"original": "BARBEYRAC",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9882445931434631
},
{
"original": "TOULOUZET Josh",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.993985116481781
},
{
"original": "TOULOUZET JOSH",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.990885317325592
}
]
}

View File

@@ -0,0 +1,681 @@
{
"source_file": "trackare-23009267-23079402_23009267_23079402.pdf",
"total_replacements": 516,
"regex_replacements": 263,
"ner_replacements": 49,
"sweep_replacements": 204,
"entities_found": [
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23009267",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23079402",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "99999",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "99999",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "PAU",
"replacement": "[ADRESSE_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "TOULOUZET",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "TOULOUZET",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "TOULOUZET",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "TOULOUZET",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "JOSH",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "JOSH",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "JOSH",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "JOSH",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "12 PLACE GRAMONT Ville de résidence: PAU",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "DEROURE GUILLEMAUD",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.9922608733177185
},
{
"original": "Oui",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8828151226043701
},
{
"original": "Elise Réalisé",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.990545928478241
},
{
"original": "Elise Réalisé",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9895639419555664
},
{
"original": "Elise Réalisé",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9903874397277832
},
{
"original": "Elise Réalisé",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9892802834510803
},
{
"original": "Elise Réalisé",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9912241101264954
},
{
"original": "Elise Réalisé",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9901577830314636
},
{
"original": "Elise Réalisé",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9347937703132629
},
{
"original": "Elise",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8537516593933105
},
{
"original": "Elise",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.828201413154602
},
{
"original": "Elise",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8146060109138489
},
{
"original": "Elise",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.948283314704895
},
{
"original": "Elise Réalisé",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.910869300365448
},
{
"original": "Elise Réalisé",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9016795754432678
},
{
"original": "Elise Réalisé",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.907725989818573
},
{
"original": "Elise Réalisé",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8990790247917175
},
{
"original": "Elise",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9239907264709473
},
{
"original": "Elise",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8756997585296631
},
{
"original": "PICAMILH Marie- Signé",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8123010993003845
},
{
"original": "Elise S",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.863130509853363
},
{
"original": "PICAMILH Marie-",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8607170581817627
},
{
"original": "PICAMILH Marie-",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9532921314239502
},
{
"original": "PICAMILH Marie-",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9662632346153259
},
{
"original": "PICAMILH Marie- Elise",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.979305624961853
},
{
"original": "Elise Réalisé",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9911680817604065
},
{
"original": "Elise Réalisé",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9900614619255066
},
{
"original": "Elise Réalisé",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.990545928478241
},
{
"original": "Elise Réalisé",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9895639419555664
},
{
"original": "Elise",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8465457558631897
},
{
"original": "Elise",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8590439558029175
},
{
"original": "Elise Réalisé",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8216238617897034
},
{
"original": "Elise Réalisé",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.817462146282196
},
{
"original": "Elise",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9041643142700195
},
{
"original": "Elise",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8997451066970825
},
{
"original": "Elise",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8458263874053955
},
{
"original": "PICAMILH Marie- Elise",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9923776388168335
},
{
"original": "Elise",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8571558594703674
},
{
"original": "PICAMILH Marie",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8936671018600464
},
{
"original": "PICAMILH Marie-",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8598709106445312
},
{
"original": "PICAMILH Marie-",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9074783325195312
},
{
"original": "PICAMILH Marie- Elise Signé",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9874225854873657
},
{
"original": "Elise SURV",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9845698475837708
},
{
"original": "Eugénie PESÉE",
"replacement": "[SOIGNANT_9]",
"source": "ner",
"score": 0.9219857454299927
},
{
"original": "Elise SURV",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9701398611068726
},
{
"original": "Josh",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9143946170806885
},
{
"original": "Josh",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9519485235214233
},
{
"original": "Eugénie BENEY",
"replacement": "[SOIGNANT_9]",
"source": "ner",
"score": 0.9697074890136719
},
{
"original": "Guillaume PORTET",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9689823985099792
}
]
}