chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,273 @@
{
"source_file": "CRH 23133268.pdf",
"total_replacements": 151,
"regex_replacements": 115,
"ner_replacements": 5,
"sweep_replacements": 31,
"entities_found": [
{
"original": "10110601324",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101096005",
"replacement": "[RPPS_2]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100163277",
"replacement": "[RPPS_3]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101639580",
"replacement": "[RPPS_4]",
"source": "regex",
"category": "rpps"
},
{
"original": "10004606595",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101480506",
"replacement": "[RPPS_6]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100858090",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102268702",
"replacement": "[RPPS_8]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102272209",
"replacement": "[RPPS_9]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101856135",
"replacement": "[RPPS_10]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002815024",
"replacement": "[RPPS_11]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100817005",
"replacement": "[RPPS_12]",
"source": "regex",
"category": "rpps"
},
{
"original": "10110601324",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101096005",
"replacement": "[RPPS_2]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100163277",
"replacement": "[RPPS_3]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101639580",
"replacement": "[RPPS_4]",
"source": "regex",
"category": "rpps"
},
{
"original": "10004606595",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101480506",
"replacement": "[RPPS_6]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100858090",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102268702",
"replacement": "[RPPS_8]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102272209",
"replacement": "[RPPS_9]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101856135",
"replacement": "[RPPS_10]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002815024",
"replacement": "[RPPS_11]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100817005",
"replacement": "[RPPS_12]",
"source": "regex",
"category": "rpps"
},
{
"original": "10110601324",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101096005",
"replacement": "[RPPS_2]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100163277",
"replacement": "[RPPS_3]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101639580",
"replacement": "[RPPS_4]",
"source": "regex",
"category": "rpps"
},
{
"original": "10004606595",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101480506",
"replacement": "[RPPS_6]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100858090",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102268702",
"replacement": "[RPPS_8]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102272209",
"replacement": "[RPPS_9]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002815024",
"replacement": "[RPPS_11]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100817005",
"replacement": "[RPPS_12]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101480506",
"replacement": "[RPPS_6]",
"source": "regex",
"category": "code_barre"
},
{
"original": "secr.medint@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.medint@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.medint@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "Behçet",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9079658389091492
},
{
"original": "Behçet",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8924194574356079
},
{
"original": "Bechet",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.887986958026886
},
{
"original": "Behçet",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8251274824142456
},
{
"original": "HLIB SABIR Luisa",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.995819091796875
}
]
}

View File

@@ -0,0 +1,501 @@
{
"source_file": "trackare-18023563-23133268_18023563_23133268.pdf",
"total_replacements": 475,
"regex_replacements": 159,
"ner_replacements": 33,
"sweep_replacements": 283,
"entities_found": [
{
"original": "18023563",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "18023563",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "18023563",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "18023563",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "18023563",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "18023563",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "18023563",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "18023563",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "18023563",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "18023563",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "18023563",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "18023563",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "18023563",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "18023563",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "18023563",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "18023563",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "18023563",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "18023563",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "18023563",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "18023563",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23133268",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23133268",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23133268",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23133268",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23133268",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23133268",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23133268",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23133268",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23133268",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23133268",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23133268",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23133268",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23133268",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23133268",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23133268",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23133268",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23133268",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23133268",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23133268",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23133268",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "99134",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "VITORIA",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "JIMENEZ JIMENEZ",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "HLIB",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "contact"
},
{
"original": "HLIB SABIR",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "LUISA",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "LUISA",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "17 ALLEE DU PETIT JEANGUIGNAN Ville de résidence: BAYONNE",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "Laure CURUTCHET",
"replacement": "[MEDECIN_2]",
"source": "ner",
"score": 0.9980528354644775
},
{
"original": "Behçet",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9260626435279846
},
{
"original": "Paul-Emile GELULE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9816436767578125
},
{
"original": "Paul-Emile GELULE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9854688048362732
},
{
"original": "ILLETTE Paul-Emile GELULE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9356408715248108
},
{
"original": "ILLETTE Paul-Emile GELULE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9014372229576111
},
{
"original": "VILLETTE Paul-Emile",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.9351859092712402
},
{
"original": "VILLETTE Paul-Emile",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.9708938002586365
},
{
"original": "SACHET",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.8160562515258789
},
{
"original": "Paul-Emile SACHET",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9986287355422974
},
{
"original": "Paul-Emile SACHET",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9983199238777161
},
{
"original": "Paul-Emile SACHET",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9774131774902344
},
{
"original": "SACHET",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.8043932318687439
},
{
"original": "Paul-Emile SACHET",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.8732007145881653
},
{
"original": "Paul-Emile GELULE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9813079237937927
},
{
"original": "Paul-Emile GELULE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9856926202774048
},
{
"original": "ILLETTE Paul-Emile GELULE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9345122575759888
},
{
"original": "ETTE Paul-Emile GELULE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9506801962852478
},
{
"original": "VILLETTE Paul-Emile",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.9263460040092468
},
{
"original": "VILLETTE Paul-Emile",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.9365009665489197
},
{
"original": "SACHET",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.8022711277008057
},
{
"original": "Paul-Emile SACHET",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9986217617988586
},
{
"original": "Paul-Emile SACHET",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.998304545879364
},
{
"original": "Paul-Emile SACHET",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9705367088317871
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.9987853169441223
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.9985471367835999
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.9988533854484558
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.9977638125419617
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.9983574151992798
},
{
"original": "Bechet",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.8123688697814941
},
{
"original": "Behcet",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9049016237258911
},
{
"original": "Behçet",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.831231415271759
},
{
"original": "Behcet",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9258153438568115
}
]
}