chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,243 @@
{
"source_file": "CRH 23071970.pdf",
"total_replacements": 125,
"regex_replacements": 115,
"ner_replacements": 5,
"sweep_replacements": 5,
"entities_found": [
{
"original": "10101990900",
"replacement": "[CODE_BARRE_1]",
"source": "regex",
"category": "code_barre"
},
{
"original": "secr.pneumo@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "sschneider@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "msabatini@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "prigaud@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "cnocent@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "jpmathieu@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "lmasse@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "clethrosne@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "dbonnet@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "eabraham@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "eellie@ch-cotebasque.fr",
"replacement": "[EMAIL_11]",
"source": "regex",
"category": "email"
},
{
"original": "secr.pneumo@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "sschneider@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "msabatini@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "prigaud@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "cnocent@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "jpmathieu@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "lmasse@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "clethrosne@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "dbonnet@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "eabraham@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "eellie@ch-cotebasque.fr",
"replacement": "[EMAIL_11]",
"source": "regex",
"category": "email"
},
{
"original": "secr.pneumo@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "sschneider@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "msabatini@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "prigaud@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "cnocent@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "jpmathieu@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "lmasse@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "clethrosne@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "dbonnet@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "eabraham@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "eellie@ch-cotebasque.fr",
"replacement": "[EMAIL_11]",
"source": "regex",
"category": "email"
},
{
"original": "Mackin",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9560713768005371
},
{
"original": "J. LOEB",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.8379188179969788
},
{
"original": "Macklin",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9760656356811523
},
{
"original": "MATHIEU",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.915472686290741
},
{
"original": "BALHADERE Cabenson",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9918733239173889
}
]
}

View File

@@ -0,0 +1,543 @@
{
"source_file": "trackare-23008358-23071970_23008358_23071970.pdf",
"total_replacements": 723,
"regex_replacements": 297,
"ner_replacements": 23,
"sweep_replacements": 403,
"entities_found": [
{
"original": "23008358",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23008358",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23008358",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23008358",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23008358",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23008358",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23008358",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23008358",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23008358",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23008358",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23008358",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23008358",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23008358",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23008358",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23008358",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23008358",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23008358",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23008358",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23008358",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23008358",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23008358",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23071970",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23071970",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23071970",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23071970",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23071970",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23071970",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23071970",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23071970",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23071970",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23071970",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23071970",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23071970",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23071970",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23071970",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23071970",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23071970",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23071970",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23071970",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23071970",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23071970",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23071970",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "99999",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "99999",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "99999",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "99999",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "SAINT MICHEL DE L ATALAYE",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "MEDECINE PNEUMOLOGIE - PNEUMOLOGIE PHTISIOLOGIE HC\nDossier Patient\nDétails des patients",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "BALHADERE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "BALHADERE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "BALHADERE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "BALHADERE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "BALHADERE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "BALHADERE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "BALHADERE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "BALHADERE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "CABENSON",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "CABENSON",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "CABENSON",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "CABENSON",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "CABENSON",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "CABENSON",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "CABENSON",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "CABENSON",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "4 BOULEVARD MARENSIN Ville de résidence: VIEUX BOUCAU LES BAINS",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "GUILLEMAUD BURTIN",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9983226656913757
},
{
"original": "GAUTHIER Isabelle",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9883964657783508
},
{
"original": "MASSE Laurence",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9056572914123535
},
{
"original": "SOL BONNET Daniel INHAL",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9447402954101562
},
{
"original": "SOL BONNET Daniel INHAL",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9467064738273621
},
{
"original": "BONNET Daniel INHAL",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8967077136039734
},
{
"original": "GAUTHIER Isabelle",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.997058093547821
},
{
"original": "MASSE Laurence",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9336331486701965
},
{
"original": "NET Daniel INHAL",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8748958706855774
},
{
"original": "BONNET Daniel",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9088730812072754
},
{
"original": "SOL BONNET Daniel",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.976989209651947
},
{
"original": "SOL BONNET Daniel",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9267868995666504
},
{
"original": "AD BONNET Daniel",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8216885924339294
},
{
"original": "AD BONNET Daniel",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9497919678688049
},
{
"original": "SDZ",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.803423285484314
},
{
"original": "Baptiste DROUET GELULE",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9850649833679199
},
{
"original": "Pascale PISANI",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9921207427978516
},
{
"original": "Sheila PELAEZ DEL",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9976763129234314
},
{
"original": "Sheila PELAEZ DEL",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9948893785476685
},
{
"original": "Sheila PELAEZ DEL",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9938464164733887
},
{
"original": "Laetitia",
"replacement": "[MEDECIN_18]",
"source": "ner",
"score": 0.8973441123962402
},
{
"original": "Pascale PISANI",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9938381910324097
},
{
"original": "Macklin",
"replacement": "[PERSONNE_6]",
"source": "ner",
"score": 0.9254959225654602
}
]
}