chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,39 @@
{
"source_file": "CRH 23066188.pdf",
"total_replacements": 139,
"regex_replacements": 128,
"ner_replacements": 4,
"sweep_replacements": 7,
"entities_found": [
{
"original": "10101772613",
"replacement": "[CODE_BARRE_1]",
"source": "regex",
"category": "code_barre"
},
{
"original": "Attachés",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9190148711204529
},
{
"original": "GALL",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.8843303918838501
},
{
"original": "M.PUSTETTO",
"replacement": "[PATIENT_5]",
"source": "ner",
"score": 0.9910038113594055
},
{
"original": "HURTADO Eneko",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.8595902323722839
}
]
}

View File

@@ -0,0 +1,495 @@
{
"source_file": "trackare-17001141-23066188_17001141_23066188.pdf",
"total_replacements": 591,
"regex_replacements": 216,
"ner_replacements": 37,
"sweep_replacements": 338,
"entities_found": [
{
"original": "17001141",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17001141",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17001141",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17001141",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17001141",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17001141",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17001141",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17001141",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17001141",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17001141",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17001141",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17001141",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17001141",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17001141",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17001141",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17001141",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17001141",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17001141",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17001141",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17001141",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23066188",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23066188",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23066188",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23066188",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23066188",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23066188",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23066188",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23066188",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23066188",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23066188",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23066188",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23066188",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23066188",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23066188",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23066188",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23066188",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23066188",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23066188",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23066188",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23066188",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "BAYONNE CEDEX",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "MEDECINE PEDIATRIE - PEDIATRIE GENERALE\nDossier Patient\nDétails des patients",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "33 RUE JEAN FOURCADE Ville de résidence: URRUGNE",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "ENEKO HURTADO",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9971879124641418
},
{
"original": "JAOUEN JAOUEN JAOUEN JAOUEN",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9971573948860168
},
{
"original": "BELLOSO Sylvie",
"replacement": "[MEDECIN_7]",
"source": "ner",
"score": 0.9883405566215515
},
{
"original": "BOHN Quentin",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9645351767539978
},
{
"original": "BOHN Quentin",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9404496550559998
},
{
"original": "CARRIERE Juliette",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.8441716432571411
},
{
"original": "CARRIERE Juliette",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.8766684532165527
},
{
"original": "BELLEAU Céline",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9348111152648926
},
{
"original": "BELLEAU Céline",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.955315887928009
},
{
"original": "BELLEAU Céline S",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9268406629562378
},
{
"original": "BELLEAU Céline",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9919153451919556
},
{
"original": "BELLEAU Céline",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9895777702331543
},
{
"original": "BELLEAU Céline",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9844146966934204
},
{
"original": "nubain",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9399969577789307
},
{
"original": "nubain",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9356703758239746
},
{
"original": "CARRIERE Juliette",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.8655610680580139
},
{
"original": "BELLOSO Sylvie",
"replacement": "[MEDECIN_7]",
"source": "ner",
"score": 0.959860622882843
},
{
"original": "BOHN Quentin",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9726643562316895
},
{
"original": "BOHN Quentin",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9317216873168945
},
{
"original": "CARRIERE Juliette",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.8533228635787964
},
{
"original": "CARRIERE Juliette",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.883411705493927
},
{
"original": "BELLEAU Céline",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.989759087562561
},
{
"original": "BELLEAU Céline",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9846604466438293
},
{
"original": "BELLEAU Céline",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9810514450073242
},
{
"original": "BELLEAU Céline",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.985490620136261
},
{
"original": "BELLEAU Céline",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9302145838737488
},
{
"original": "BELLEAU Céline",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9524434804916382
},
{
"original": "CARRIERE Juliette",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.8662012815475464
},
{
"original": "Quentin BOHN",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9934483766555786
},
{
"original": "Quentin BOHN",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9958497285842896
},
{
"original": "Quentin BOHN SACHET",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9262264370918274
},
{
"original": "Quentin BOHN",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9978212118148804
},
{
"original": "BELLEAU",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.8218538165092468
},
{
"original": "Juliette CARRIERE",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.8824392557144165
},
{
"original": "Quentin BOHN",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9673187732696533
},
{
"original": "Kernig",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9417259097099304
},
{
"original": "Bernard de Coral",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9543604850769043
}
]
}