chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,81 @@
{
"source_file": "CRH 23104564.pdf",
"total_replacements": 114,
"regex_replacements": 94,
"ner_replacements": 2,
"sweep_replacements": 18,
"entities_found": [
{
"original": "10101431459",
"replacement": "[CODE_BARRE_1]",
"source": "regex",
"category": "code_barre"
},
{
"original": "secr.hdjcancero@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.oncogenetique@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "secr.onco@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "secr.hdjcancero@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.oncogenetique@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "secr.onco@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "secr.hdjcancero@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.oncogenetique@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "secr.onco@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "J. LOEB",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8244937062263489
},
{
"original": "PAILLOT Genevieve",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9833872318267822
}
]
}

View File

@@ -0,0 +1,405 @@
{
"source_file": "trackare-19002072-23104564_19002072_23104564.pdf",
"total_replacements": 286,
"regex_replacements": 138,
"ner_replacements": 29,
"sweep_replacements": 119,
"entities_found": [
{
"original": "19002072",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "19002072",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "19002072",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "19002072",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "19002072",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "19002072",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "19002072",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "19002072",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "19002072",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "19002072",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "19002072",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "19002072",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "19002072",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "19002072",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23104564",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104564",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104564",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104564",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104564",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104564",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104564",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104564",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104564",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104564",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104564",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104564",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104564",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23104564",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "99999",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "NIORT",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "ONCOLOGIE - ONCOLOGIE HC\nDossier Patient\nDétails des patients",
"replacement": "[PATIENT_3]",
"source": "regex",
"category": "patient"
},
{
"original": "PAILLOT",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "CORNUAU",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "GENEVIEVE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "GENEVIEVE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "29 CHEMIN DE L ESTANQUET RES EMPEREUR Ville de résidence: BAYONNE",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "Willis",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9622949361801147
},
{
"original": "Raphael",
"replacement": "[MEDECIN_9]",
"source": "ner",
"score": 0.9829978346824646
},
{
"original": "GASSIAT Anne",
"replacement": "[CONTACT_1]",
"source": "ner",
"score": 0.9546640515327454
},
{
"original": "MG DA SILVA Fabio",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9189867973327637
},
{
"original": "PRIMPERAN",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9492479562759399
},
{
"original": "DA SILVA Fabio",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9826717972755432
},
{
"original": "PRIMPERAN",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9278037548065186
},
{
"original": "Fabio GELULE",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9793413281440735
},
{
"original": "Fabio GELULE",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9875615239143372
},
{
"original": "LAFON Mathilde",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9340648055076599
},
{
"original": "GASSIAT Anne",
"replacement": "[CONTACT_1]",
"source": "ner",
"score": 0.9557799100875854
},
{
"original": "Fabio",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.824968695640564
},
{
"original": "MG DA SILVA Fabio",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9154313206672668
},
{
"original": "PRIMPERAN",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.946074903011322
},
{
"original": "DA SILVA Fabio",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9815697073936462
},
{
"original": "PRIMPERAN",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9243323802947998
},
{
"original": "Fabio GELULE",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9788462519645691
},
{
"original": "Fabio GELULE",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9868971705436707
},
{
"original": "LAFON Mathilde",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.915564775466919
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9980685710906982
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9981911182403564
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9983707070350647
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9982085824012756
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9985663890838623
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9986586570739746
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9806984663009644
},
{
"original": "Hérvé Beauvois",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9932999610900879
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9636020660400391
},
{
"original": "Dubrocq",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.8270697593688965
}
]
}