chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,87 @@
{
"source_file": "BACTERIO 23105874.pdf",
"total_replacements": 28,
"regex_replacements": 25,
"ner_replacements": 1,
"sweep_replacements": 2,
"entities_found": [
{
"original": "98200064",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "GOUANERE Annie",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "GOUANERE Annie",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "UHALDEBORDE PNEUMOLOGIE PHTISIOLOGIE HC",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "UHALDEBORDE PNEUMOLOGIE PHTISIOLOGIE HC",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "10/03/1950",
"replacement": "[DATE_NAISS_1]",
"source": "regex",
"category": "date_naissance"
},
{
"original": "10/03/1950",
"replacement": "[DATE_NAISS_1]",
"source": "regex",
"category": "date_naissance"
},
{
"original": "DOBOSZ Mathilde\nReçu le",
"replacement": "[SOIGNANT_1]",
"source": "regex",
"category": "soignant"
},
{
"original": "ABRAHAM Elise\nPrélevé le",
"replacement": "[SOIGNANT_2]",
"source": "regex",
"category": "soignant"
},
{
"original": "2300121141",
"replacement": "[IDENTIFIANT_1]",
"source": "regex",
"category": "identifiant"
},
{
"original": "2300121141",
"replacement": "[IDENTIFIANT_1]",
"source": "regex",
"category": "identifiant"
},
{
"original": "23105874",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "Jacques Loëb",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9893258810043335
}
]
}

View File

@@ -0,0 +1,393 @@
{
"source_file": "CRH 23105874.pdf",
"total_replacements": 213,
"regex_replacements": 192,
"ner_replacements": 8,
"sweep_replacements": 13,
"entities_found": [
{
"original": "10001613875",
"replacement": "[CODE_BARRE_1]",
"source": "regex",
"category": "code_barre"
},
{
"original": "secr.pneumo@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "sschneider@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "msabatini@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "prigaud@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "cnocent@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "jpmathieu@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "lmasse@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "clethrosne@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "dbonnet@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "eabraham@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "eellie@ch-cotebasque.fr",
"replacement": "[EMAIL_11]",
"source": "regex",
"category": "email"
},
{
"original": "secr.pneumo@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "sschneider@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "msabatini@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "prigaud@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "cnocent@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "jpmathieu@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "lmasse@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "clethrosne@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "dbonnet@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "eabraham@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "eellie@ch-cotebasque.fr",
"replacement": "[EMAIL_11]",
"source": "regex",
"category": "email"
},
{
"original": "secr.pneumo@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "sschneider@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "msabatini@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "prigaud@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "cnocent@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "jpmathieu@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "lmasse@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "clethrosne@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "dbonnet@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "eabraham@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "eellie@ch-cotebasque.fr",
"replacement": "[EMAIL_11]",
"source": "regex",
"category": "email"
},
{
"original": "secr.pneumo@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "sschneider@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "msabatini@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "prigaud@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "cnocent@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "jpmathieu@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "lmasse@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "clethrosne@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "dbonnet@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "eabraham@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "eellie@ch-cotebasque.fr",
"replacement": "[EMAIL_11]",
"source": "regex",
"category": "email"
},
{
"original": "secr.pneumo@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "sschneider@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "msabatini@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "prigaud@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "cnocent@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "jpmathieu@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "lmasse@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "clethrosne@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "dbonnet@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "eabraham@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "eellie@ch-cotebasque.fr",
"replacement": "[EMAIL_11]",
"source": "regex",
"category": "email"
},
{
"original": "MATHIEU",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8567638993263245
},
{
"original": "lopez lea",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9984113574028015
},
{
"original": "Grandcher",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9102364182472229
},
{
"original": "Landouzy",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.939244270324707
},
{
"original": "Toki Eder",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9800830483436584
},
{
"original": "J. LOEB",
"replacement": "[PERSONNE_6]",
"source": "ner",
"score": 0.8102761507034302
},
{
"original": "MATHIEU",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9790584444999695
},
{
"original": "UHALDEBORDE Annie",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9927913546562195
}
]
}