chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,219 @@
{
"source_file": "CRH 23127286.pdf",
"total_replacements": 126,
"regex_replacements": 106,
"ner_replacements": 1,
"sweep_replacements": 19,
"entities_found": [
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100532760",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002456746",
"replacement": "[CODE_BARRE_1]",
"source": "regex",
"category": "code_barre"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "fprevost@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "boui@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "dnivet@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "tkhuong-huu@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "aguilngar@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "fgoutorbe@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "mcboudier@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "mbrugel@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "mboube@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "faudemar@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "Grellety",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.8966193199157715
}
]
}

View File

@@ -0,0 +1,27 @@
{
"source_file": "CRO 23127286.pdf",
"total_replacements": 8,
"regex_replacements": 5,
"ner_replacements": 3,
"sweep_replacements": 0,
"entities_found": [
{
"original": "R. DIDAILLER",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9757986068725586
},
{
"original": "ANDRIANAVALOMIONONA",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9930287599563599
},
{
"original": "MARC FREYNET",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9783095121383667
}
]
}

View File

@@ -0,0 +1,555 @@
{
"source_file": "trackare-21020800-23127286_21020800_23127286.pdf",
"total_replacements": 549,
"regex_replacements": 224,
"ner_replacements": 37,
"sweep_replacements": 288,
"entities_found": [
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21020800",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127286",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "198041616632053",
"replacement": "[NIR_1]",
"source": "regex",
"category": "nir"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "16166",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "L ISLE D ESPAGNAC",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "HC\nDossier Patient\nDétails des patients",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "FREYNET",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "FREYNET",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "MARC",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "MARC",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "49 AVENUE HENRI GRENET Ville de résidence: BAYONNE",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "JAOUEN GUILLEMAUD GUILLEMAUD",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9977097511291504
},
{
"original": "LE BAIL Antoine",
"replacement": "[MEDECIN_8]",
"source": "ner",
"score": 0.8118376731872559
},
{
"original": "GABON Annabelle",
"replacement": "[SOIGNANT_12]",
"source": "ner",
"score": 0.961482048034668
},
{
"original": "GABON Annabelle",
"replacement": "[SOIGNANT_12]",
"source": "ner",
"score": 0.9241671562194824
},
{
"original": "CHEVALIER Lucile",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.9569379687309265
},
{
"original": "ACTISKENAN",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.99072265625
},
{
"original": "CHEVALIER Lucile",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.9673080444335938
},
{
"original": "ACTISKENAN 10",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9042820334434509
},
{
"original": "CHEVALIER Lucile",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.8596128225326538
},
{
"original": "GABON Annabelle",
"replacement": "[SOIGNANT_12]",
"source": "ner",
"score": 0.8696037530899048
},
{
"original": "GABON Annabelle",
"replacement": "[SOIGNANT_12]",
"source": "ner",
"score": 0.9583513140678406
},
{
"original": "Anne GELULE",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8837799429893494
},
{
"original": "GELULE",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9144318699836731
},
{
"original": "CHEVALIER Lucile",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.9294397234916687
},
{
"original": "CHEVALIER Lucile",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.971193790435791
},
{
"original": "ACTISKENAN 10MG G",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.8721816539764404
},
{
"original": "CHEVALIER Lucile",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.9695871472358704
},
{
"original": "ACTISKENAN 10MG",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9340677261352539
},
{
"original": "CHEVALIER Lucile",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.9928911328315735
},
{
"original": "Lucile",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.9330402612686157
},
{
"original": "Annabelle",
"replacement": "[SOIGNANT_12]",
"source": "ner",
"score": 0.9341849088668823
},
{
"original": "Annabelle",
"replacement": "[SOIGNANT_12]",
"source": "ner",
"score": 0.952471137046814
},
{
"original": "Annabelle",
"replacement": "[SOIGNANT_12]",
"source": "ner",
"score": 0.8981753587722778
},
{
"original": "Lucile BANDELETTE",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.853449285030365
},
{
"original": "Léna MELAINE",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9985434412956238
},
{
"original": "Léna MELAINE",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9983968734741211
},
{
"original": "BAIL",
"replacement": "[MEDECIN_8]",
"source": "ner",
"score": 0.9530472159385681
},
{
"original": "Lucile CHEVALIER",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.9955784678459167
},
{
"original": "Lucile",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.8664941787719727
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9930249452590942
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9986113905906677
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9986014366149902
},
{
"original": "Aurore MEURAT",
"replacement": "[SOIGNANT_7]",
"source": "ner",
"score": 0.9985669851303101
},
{
"original": "Jérémy SAUGER",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.994288444519043
},
{
"original": "Lucile CHEVALIER",
"replacement": "[MEDECIN_12]",
"source": "ner",
"score": 0.9798873662948608
},
{
"original": "Pujos",
"replacement": "[MEDECIN_2]",
"source": "ner",
"score": 0.9804922938346863
},
{
"original": "vidal rudy",
"replacement": "[CONTACT_1]",
"source": "ner",
"score": 0.9960243701934814
}
]
}