chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,45 @@
{
"source_file": "CONSULTATION ANESTHESISTE 23050890.pdf",
"total_replacements": 22,
"regex_replacements": 20,
"ner_replacements": 2,
"sweep_replacements": 0,
"entities_found": [
{
"original": "23593387",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23593387",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "10 RUE DE BEHOBIE N° Tél : [TEL_1]",
"replacement": "[ADRESSE_2]",
"source": "regex",
"category": "adresse"
},
{
"original": "10 RUE DE BEHOBIE N° Tél : [TEL_1]",
"replacement": "[ADRESSE_2]",
"source": "regex",
"category": "adresse"
},
{
"original": "ELEJALDE Jacinta",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9949061274528503
},
{
"original": "ELEJALDE Jacinta",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9975634813308716
}
]
}

View File

@@ -0,0 +1,15 @@
{
"source_file": "CRH 23050890.pdf",
"total_replacements": 14,
"regex_replacements": 8,
"ner_replacements": 1,
"sweep_replacements": 5,
"entities_found": [
{
"original": "ELEJALDE Jacinta",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.907014012336731
}
]
}

View File

@@ -0,0 +1,33 @@
{
"source_file": "CRO 23050890.pdf",
"total_replacements": 18,
"regex_replacements": 10,
"ner_replacements": 4,
"sweep_replacements": 4,
"entities_found": [
{
"original": "BRUNETEAU P",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.8540693521499634
},
{
"original": "BRUNETEAU",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9746828079223633
},
{
"original": "ELEJALDE JACINTA",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9916704893112183
},
{
"original": "Christian RAZAFINDRANDEHA",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9988396167755127
}
]
}

View File

@@ -0,0 +1,699 @@
{
"source_file": "trackare-09009875-23050890_09009875_23050890.pdf",
"total_replacements": 538,
"regex_replacements": 282,
"ner_replacements": 67,
"sweep_replacements": 189,
"entities_found": [
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "09009875",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23050890",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "ST JEAN DE LUZ",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "CHIRURGIE ORTHOPEDIQUE\nDossier Patient\nDétails des patients",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "10 RUE DE BEHOBIE Ville de résidence: HENDAYE",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "DUFOUR Eric Glyc",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.8356322646141052
},
{
"original": "Glyc",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9833895564079285
},
{
"original": "DUFOUR Eric",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9976780414581299
},
{
"original": "DUFOUR Eric Glyc",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9641603827476501
},
{
"original": "DUFOUR Eric Glyc",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.8699536323547363
},
{
"original": "URINAL",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8256017565727234
},
{
"original": "Laétitia S",
"replacement": "[SOIGNANT_10]",
"source": "ner",
"score": 0.8323132991790771
},
{
"original": "BASSIN",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9441294074058533
},
{
"original": "URINAL",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9295366406440735
},
{
"original": "BASSIN",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9398418068885803
},
{
"original": "URINAL",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9197612404823303
},
{
"original": "VIDER",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9340912699699402
},
{
"original": "BASSIN",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9691035151481628
},
{
"original": "URINAL",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9581315517425537
},
{
"original": "VIDER",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9117892980575562
},
{
"original": "BASSIN",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9663861393928528
},
{
"original": "URINAL",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9440365433692932
},
{
"original": "Laétitia",
"replacement": "[SOIGNANT_10]",
"source": "ner",
"score": 0.9508371949195862
},
{
"original": "FORDIN Manon",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9839631915092468
},
{
"original": "HERAUD Laura",
"replacement": "[SOIGNANT_12]",
"source": "ner",
"score": 0.8755786418914795
},
{
"original": "DUFOUR Eric",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9475892782211304
},
{
"original": "DUFOUR Eric",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.941400408744812
},
{
"original": "DUFOUR Eric",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.973997175693512
},
{
"original": "DUFOUR Eric",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.971599817276001
},
{
"original": "DUFOUR Eric Signé",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9222881197929382
},
{
"original": "Eric Signé",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.976438045501709
},
{
"original": "Eric Glyc",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9519787430763245
},
{
"original": "Glyc",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9718229174613953
},
{
"original": "DUFOUR Eric",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9976780414581299
},
{
"original": "DUFOUR Eric Glyc",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9641603827476501
},
{
"original": "DUFOUR Eric Glyc",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.8699536323547363
},
{
"original": "BASSIN : VIDER",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9066290855407715
},
{
"original": "URINAL",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9848185181617737
},
{
"original": "VIDER",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9775452613830566
},
{
"original": "BASSIN",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9878584742546082
},
{
"original": "URINAL",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9781915545463562
},
{
"original": "VIDER",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.832006573677063
},
{
"original": "BASSIN",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9189426898956299
},
{
"original": "URINAL",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8983125686645508
},
{
"original": "VIDER",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.8761799335479736
},
{
"original": "BASSIN",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9201973080635071
},
{
"original": "URINAL",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8893820643424988
},
{
"original": "BASSIN",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.906650960445404
},
{
"original": "URINAL",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8578572273254395
},
{
"original": "DUFOUR",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.8416035771369934
},
{
"original": "DUFOUR Eric",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9715151786804199
},
{
"original": "DUFOUR Eric",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9697731733322144
},
{
"original": "DUFOUR Eric",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.942922055721283
},
{
"original": "DUFOUR Eric",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.934730589389801
},
{
"original": "DUFOUR Eric",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9703347086906433
},
{
"original": "DUFOUR Eric",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9923639297485352
},
{
"original": "DUFOUR Eric",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9885381460189819
},
{
"original": "DUFOUR Eric",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9844707250595093
},
{
"original": "DUFOUR Eric",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9860362410545349
},
{
"original": "DUFOUR Eric",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9162486791610718
},
{
"original": "DUFOUR Eric S",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9372367858886719
},
{
"original": "Laétitia",
"replacement": "[SOIGNANT_10]",
"source": "ner",
"score": 0.9764806628227234
},
{
"original": "Manon Signé",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9892941117286682
},
{
"original": "Laétitia Signé",
"replacement": "[SOIGNANT_10]",
"source": "ner",
"score": 0.9956066012382507
},
{
"original": "Manon Signé",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9259222149848938
},
{
"original": "Laétitia Signé",
"replacement": "[SOIGNANT_10]",
"source": "ner",
"score": 0.9372614622116089
},
{
"original": "Laétitia",
"replacement": "[SOIGNANT_10]",
"source": "ner",
"score": 0.9686622023582458
},
{
"original": "Manon Signé",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.9441601634025574
},
{
"original": "Laétitia S",
"replacement": "[SOIGNANT_10]",
"source": "ner",
"score": 0.9126425385475159
},
{
"original": "Manon",
"replacement": "[SOIGNANT_11]",
"source": "ner",
"score": 0.8496630191802979
},
{
"original": "Laétitia S",
"replacement": "[SOIGNANT_10]",
"source": "ner",
"score": 0.8803927898406982
},
{
"original": "Kattina ARTOLA",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9890163540840149
}
]
}