chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,21 @@
{
"source_file": "CRO 23098082.pdf",
"total_replacements": 11,
"regex_replacements": 7,
"ner_replacements": 2,
"sweep_replacements": 2,
"entities_found": [
{
"original": "Hemolock",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.991529643535614
},
{
"original": "VELASCO",
"replacement": "[ADRESSE_1]",
"source": "ner",
"score": 0.981950581073761
}
]
}

View File

@@ -0,0 +1,417 @@
{
"source_file": "trackare-BA086556-23098082_BA086556_23098082.pdf",
"total_replacements": 467,
"regex_replacements": 208,
"ner_replacements": 36,
"sweep_replacements": 223,
"entities_found": [
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23098082",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "160046410212221",
"replacement": "[NIR_1]",
"source": "regex",
"category": "nir"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "64102",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "BAYONNE",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "HC\nDossier Patient\nDétails des patients",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "VELASCO",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "VELASCO",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "DANIEL",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "DANIEL",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "VILLA ELODIE 26 AV DU CAMPAS SOULAN Ville de résidence: ST MARTIN DE SEIGNANX",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "BURTIN BURTIN",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9953117370605469
},
{
"original": "GLYCEMIE CAPILLAIRE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9775737524032593
},
{
"original": "GLYCEMIE CAPILLAIRE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9775729179382324
},
{
"original": "MEURAT Aurore S",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.8756847381591797
},
{
"original": "ISERENTANT Jules",
"replacement": "[MEDECIN_7]",
"source": "ner",
"score": 0.857349693775177
},
{
"original": "GLYCEMIE CAPILLAIRE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.880556583404541
},
{
"original": "GLYCEMIE CAPILLAIRE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9977841377258301
},
{
"original": "GLYCEMIE CAPILLAIRE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9948892593383789
},
{
"original": "PONTIER Bénédicte S",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9522582292556763
},
{
"original": "MEURAT Aurore",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9848564863204956
},
{
"original": "ISERENTANT Jules",
"replacement": "[MEDECIN_7]",
"source": "ner",
"score": 0.8493807911872864
},
{
"original": "PHLOROGLUCINOL ARW MEURAT Aurore",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.8711116313934326
},
{
"original": "PONTIER Bénédicte",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9839240908622742
},
{
"original": "PONTIER Bénédicte",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9736313223838806
},
{
"original": "Annabelle",
"replacement": "[SOIGNANT_14]",
"source": "ner",
"score": 0.9777320623397827
},
{
"original": "Annabelle",
"replacement": "[SOIGNANT_14]",
"source": "ner",
"score": 0.9773586988449097
},
{
"original": "Annabelle",
"replacement": "[SOIGNANT_14]",
"source": "ner",
"score": 0.9512907266616821
},
{
"original": "Annabelle",
"replacement": "[SOIGNANT_14]",
"source": "ner",
"score": 0.9713605642318726
},
{
"original": "Paul Jean MATERNOWSKI",
"replacement": "[MEDECIN_11]",
"source": "ner",
"score": 0.9797369837760925
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9979270696640015
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9982620477676392
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9985063076019287
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[MEDECIN_11]",
"source": "ner",
"score": 0.9982371926307678
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9980502724647522
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[MEDECIN_11]",
"source": "ner",
"score": 0.998734176158905
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.8739939332008362
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9982857704162598
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9980360269546509
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9974215030670166
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9588079452514648
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9179856777191162
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9927630424499512
},
{
"original": "Elise MOLINE",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9957805871963501
},
{
"original": "Anne-Laure SORIN",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9934665560722351
},
{
"original": "Anne-Laure SORIN",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9530022740364075
},
{
"original": "Wirsung",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9853911399841309
}
]
}