chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,93 @@
{
"source_file": "CRH 23102469.pdf",
"total_replacements": 121,
"regex_replacements": 89,
"ner_replacements": 4,
"sweep_replacements": 28,
"entities_found": [
{
"original": "10100843241",
"replacement": "[CODE_BARRE_1]",
"source": "regex",
"category": "code_barre"
},
{
"original": "secr.hdjcancero@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.oncogenetique@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "secr.onco@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "secr.hdjcancero@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.oncogenetique@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "secr.onco@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "secr.hdjcancero@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.oncogenetique@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "secr.onco@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "J. LOEB",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8326364755630493
},
{
"original": "PEMBROLIZUMAB",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.8164262175559998
},
{
"original": "Antoine DOUARD",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9895920753479004
},
{
"original": "DUFOURG Patricia",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9858732223510742
}
]
}

View File

@@ -0,0 +1,417 @@
{
"source_file": "trackare-04020706-23102469_04020706_23102469.pdf",
"total_replacements": 396,
"regex_replacements": 150,
"ner_replacements": 29,
"sweep_replacements": 217,
"entities_found": [
{
"original": "04020706",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "04020706",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "04020706",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "04020706",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "04020706",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "04020706",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "04020706",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "04020706",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "04020706",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "04020706",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "04020706",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "04020706",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "04020706",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "04020706",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23102469",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102469",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102469",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102469",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102469",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102469",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102469",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102469",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102469",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102469",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102469",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102469",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102469",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102469",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "254086410208476",
"replacement": "[NIR_1]",
"source": "regex",
"category": "nir"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "64102",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "BAYONNE",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "FRANCOIS Louis",
"replacement": "[MEDECIN_1]",
"source": "regex",
"category": "patient"
},
{
"original": "ONCOLOGIE - ONCOLOGIE HC\nDossier Patient\nDétails des patients",
"replacement": "[PATIENT_3]",
"source": "regex",
"category": "patient"
},
{
"original": "DUFOURG",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "ARANAGA",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "PATRICIA",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "PATRICIA",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "129 BIS AVENUE LENINE Ville de résidence: TARNOS",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "MG LAFON Mathilde",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9465932846069336
},
{
"original": "SERESTA",
"replacement": "[SOIGNANT_12]",
"source": "ner",
"score": 0.8057416081428528
},
{
"original": "MG LAFON Mathilde",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9011602401733398
},
{
"original": "LAFON Mathilde",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9425837397575378
},
{
"original": "LAFON Mathilde",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9567314982414246
},
{
"original": "LAFON Mathilde",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9794477224349976
},
{
"original": "LAFON Mathilde",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9806877970695496
},
{
"original": "LAFON Mathilde",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9415130615234375
},
{
"original": "MG LAFON Mathilde",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9287955164909363
},
{
"original": "SERESTA",
"replacement": "[SOIGNANT_12]",
"source": "ner",
"score": 0.9775089621543884
},
{
"original": "MG LAFON Mathilde",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9255757331848145
},
{
"original": "SERESTA",
"replacement": "[SOIGNANT_12]",
"source": "ner",
"score": 0.9525420069694519
},
{
"original": "Mathilde",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.8234625458717346
},
{
"original": "LAFON Mathilde",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9797960519790649
},
{
"original": "LAFON Mathilde",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.965001106262207
},
{
"original": "LAFON Mathilde",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.8976554870605469
},
{
"original": "Mathilde LAFON COMPRIME",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.939756453037262
},
{
"original": "Mathilde LAFON COMPRIME",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.906286895275116
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9984822273254395
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9985731840133667
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9987765550613403
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9985570311546326
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9980286359786987
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9976605772972107
},
{
"original": "A FRANCOIS",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.850176990032196
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9983214139938354
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9976925253868103
},
{
"original": "Antoine DOUARD",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9899287819862366
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.970710277557373
}
]
}