chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,87 @@
{
"source_file": "CRH 23109279.pdf",
"total_replacements": 111,
"regex_replacements": 90,
"ner_replacements": 3,
"sweep_replacements": 18,
"entities_found": [
{
"original": "10101431459",
"replacement": "[CODE_BARRE_1]",
"source": "regex",
"category": "code_barre"
},
{
"original": "secr.hdjcancero@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.oncogenetique@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "secr.onco@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "secr.hdjcancero@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.oncogenetique@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "secr.onco@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "secr.hdjcancero@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.oncogenetique@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "secr.onco@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "SILVA",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9765211939811707
},
{
"original": "Belustine",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8119329810142517
},
{
"original": "HAISSAGUERRE Marie",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9903615117073059
}
]
}

View File

@@ -0,0 +1,495 @@
{
"source_file": "trackare-20020367-23109279_20020367_23109279.pdf",
"total_replacements": 384,
"regex_replacements": 170,
"ner_replacements": 31,
"sweep_replacements": 183,
"entities_found": [
{
"original": "20020367",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "20020367",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "20020367",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "20020367",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "20020367",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "20020367",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "20020367",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "20020367",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "20020367",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "20020367",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "20020367",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "20020367",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "20020367",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "20020367",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "20020367",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "20020367",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "20020367",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23109279",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23109279",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23109279",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23109279",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23109279",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23109279",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23109279",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23109279",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23109279",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23109279",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23109279",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23109279",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23109279",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23109279",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23109279",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23109279",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23109279",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "257096449303105",
"replacement": "[NIR_1]",
"source": "regex",
"category": "nir"
},
{
"original": "257096449303105",
"replacement": "[NIR_1]",
"source": "regex",
"category": "nir"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "64493",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "64493",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "ST PALAIS",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "ONCOLOGIE - ONCOLOGIE HC\nDossier Patient\nDétails des patients",
"replacement": "[MEDECIN_5]",
"source": "regex",
"category": "patient"
},
{
"original": "HAISSAGUERRE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "SABAROTS",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "HAISSAGUERRE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "SABAROTS",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "MARIE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "MARIE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "MARIE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "MARIE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "832 CHEMIN DE CIGARO Ville de résidence: MOUGUERRE",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "BURTIN",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9900565147399902
},
{
"original": "Stephanie REGNIER",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9977933764457703
},
{
"original": "Stephanie REGNIER",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9986053109169006
},
{
"original": "Stephanie REGNIER",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9984363317489624
},
{
"original": "LAFON Mathilde",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9652057886123657
},
{
"original": "LAFON Mathilde",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9798720479011536
},
{
"original": "LAFON Mathilde",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9813047647476196
},
{
"original": "Stephanie REGNIER",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9980685114860535
},
{
"original": "Stephanie REGNIER",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9980543851852417
},
{
"original": "Stephanie REGNIER",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.997908353805542
},
{
"original": "LANSOPRAZOLE BGA",
"replacement": "[MEDECIN_13]",
"source": "ner",
"score": 0.8477557897567749
},
{
"original": "LAFON Mathilde",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9603459239006042
},
{
"original": "Guillaume",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9117331504821777
},
{
"original": "Guillaume",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8171425461769104
},
{
"original": "Guillaume",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9440146088600159
},
{
"original": "Guillaume",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8355476260185242
},
{
"original": "Guillaume",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8240005373954773
},
{
"original": "GOLDZAK",
"replacement": "[MEDECIN_7]",
"source": "ner",
"score": 0.9844144582748413
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9974366426467896
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9971708655357361
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9979793429374695
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9984355568885803
},
{
"original": "Mathilde LAFON ECHODOPPLER",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9984610676765442
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9821275472640991
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.998617947101593
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.998386561870575
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9988256096839905
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9984393119812012
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9983559250831604
},
{
"original": "Avastin",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.8000807762145996
},
{
"original": "Mathilde LAFON",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.977577269077301
}
]
}