chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,171 @@
{
"source_file": "CRH 23093202.pdf",
"total_replacements": 83,
"regex_replacements": 72,
"ner_replacements": 2,
"sweep_replacements": 9,
"entities_found": [
{
"original": "23093202",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23093202",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002456746",
"replacement": "[CODE_BARRE_1]",
"source": "regex",
"category": "code_barre"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "Murphy",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8365517854690552
},
{
"original": "DUBOUE Yvonne",
"replacement": "[PATIENT_2]",
"source": "ner",
"score": 0.9706795811653137
}
]
}

View File

@@ -0,0 +1,8 @@
{
"source_file": "endoscopie 142_23093202.pdf",
"total_replacements": 0,
"regex_replacements": 0,
"ner_replacements": 0,
"sweep_replacements": 0,
"entities_found": []
}

View File

@@ -0,0 +1,429 @@
{
"source_file": "trackare--23093202__23093202.pdf",
"total_replacements": 503,
"regex_replacements": 117,
"ner_replacements": 52,
"sweep_replacements": 334,
"entities_found": [
{
"original": "23093202",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23093202",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23093202",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23093202",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23093202",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23093202",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23093202",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23093202",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23093202",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23093202",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23093202",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23093202",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23093202",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23093202",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "SAUVETERRE DE BEARN",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "HC\nDossier Patient\nDétails des patients",
"replacement": "[PATIENT_3]",
"source": "regex",
"category": "patient"
},
{
"original": "5 AVENUE DES PYRENEES Ville de résidence: SALIES DE BEARN",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "JAOUEN",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.94847571849823
},
{
"original": "MEURAT Aurore GELULE",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9812020063400269
},
{
"original": "MEURAT Aurore GELULE",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.983947217464447
},
{
"original": "MEURAT Aurore",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9828202724456787
},
{
"original": "MEURAT Aurore",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9435399770736694
},
{
"original": "MEURAT Aurore",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9420435428619385
},
{
"original": "MEURAT Aurore",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9453245401382446
},
{
"original": "MEURAT Aurore",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.819678783416748
},
{
"original": "OXYNORMO",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9261814951896667
},
{
"original": "MOLSIDOMINE",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.8042247891426086
},
{
"original": "MEURAT Aurore Signé",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9230003952980042
},
{
"original": "MEURAT Aurore GELULE",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9801673889160156
},
{
"original": "MEURAT Aurore GELULE",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9836487174034119
},
{
"original": "MEURAT Aurore",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9826489686965942
},
{
"original": "MEURAT Aurore",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9431301951408386
},
{
"original": "MEURAT Aurore",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9406644105911255
},
{
"original": "MEURAT Aurore",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9481397271156311
},
{
"original": "MEURAT Aurore",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.8253654837608337
},
{
"original": "OXYNORMO",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9337372779846191
},
{
"original": "MOLSIDOMINE",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.8250940442085266
},
{
"original": "MEURAT Aurore Signé",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.886906087398529
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9947999715805054
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9985347390174866
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9986424446105957
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9983470439910889
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9985253810882568
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9982880353927612
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9984630942344666
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9980663657188416
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9981490969657898
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9981296062469482
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9982612133026123
},
{
"original": "KHUONG HUU",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9719927310943604
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9985482692718506
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9987385272979736
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9987306594848633
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9986267685890198
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.998258650302887
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.998162031173706
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9982490539550781
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.998477041721344
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9982790946960449
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9983238577842712
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9988064169883728
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9987790584564209
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9987673759460449
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9987876415252686
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9982732534408569
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9973545670509338
},
{
"original": "Anne-Laure SORIN",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9964650273323059
},
{
"original": "Anne-Laure SORIN",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9976375102996826
},
{
"original": "MEURAT Aurore",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9704724550247192
}
]
}