chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,243 @@
{
"source_file": "CRH 23097531.pdf",
"total_replacements": 128,
"regex_replacements": 119,
"ner_replacements": 5,
"sweep_replacements": 4,
"entities_found": [
{
"original": "10003854436",
"replacement": "[CODE_BARRE_1]",
"source": "regex",
"category": "code_barre"
},
{
"original": "secr.pneumo@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "sschneider@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "msabatini@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "prigaud@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "cnocent@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "jpmathieu@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "lmasse@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "clethrosne@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "dbonnet@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "eabraham@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "eellie@ch-cotebasque.fr",
"replacement": "[EMAIL_11]",
"source": "regex",
"category": "email"
},
{
"original": "secr.pneumo@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "sschneider@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "msabatini@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "prigaud@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "cnocent@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "jpmathieu@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "lmasse@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "clethrosne@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "dbonnet@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "eabraham@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "eellie@ch-cotebasque.fr",
"replacement": "[EMAIL_11]",
"source": "regex",
"category": "email"
},
{
"original": "secr.pneumo@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "sschneider@ch-cotebasque.fr",
"replacement": "[EMAIL_2]",
"source": "regex",
"category": "email"
},
{
"original": "msabatini@ch-cotebasque.fr",
"replacement": "[EMAIL_3]",
"source": "regex",
"category": "email"
},
{
"original": "prigaud@ch-cotebasque.fr",
"replacement": "[EMAIL_4]",
"source": "regex",
"category": "email"
},
{
"original": "cnocent@ch-cotebasque.fr",
"replacement": "[EMAIL_5]",
"source": "regex",
"category": "email"
},
{
"original": "jpmathieu@ch-cotebasque.fr",
"replacement": "[EMAIL_6]",
"source": "regex",
"category": "email"
},
{
"original": "lmasse@ch-cotebasque.fr",
"replacement": "[EMAIL_7]",
"source": "regex",
"category": "email"
},
{
"original": "clethrosne@ch-cotebasque.fr",
"replacement": "[EMAIL_8]",
"source": "regex",
"category": "email"
},
{
"original": "dbonnet@ch-cotebasque.fr",
"replacement": "[EMAIL_9]",
"source": "regex",
"category": "email"
},
{
"original": "eabraham@ch-cotebasque.fr",
"replacement": "[EMAIL_10]",
"source": "regex",
"category": "email"
},
{
"original": "eellie@ch-cotebasque.fr",
"replacement": "[EMAIL_11]",
"source": "regex",
"category": "email"
},
{
"original": "MATHIEU",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9059529900550842
},
{
"original": "Pierre Rigaud",
"replacement": "[MEDECIN_8]",
"source": "ner",
"score": 0.9827147126197815
},
{
"original": "Pneu",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.823718786239624
},
{
"original": "MATHIEU CAT",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9677302241325378
},
{
"original": "LESCAT Anne-marie",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9788333773612976
}
]
}

View File

@@ -0,0 +1,681 @@
{
"source_file": "trackare-02010052-23097531_02010052_23097531.pdf",
"total_replacements": 527,
"regex_replacements": 168,
"ner_replacements": 74,
"sweep_replacements": 285,
"entities_found": [
{
"original": "02010052",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "02010052",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "02010052",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "02010052",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "02010052",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "02010052",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "02010052",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "02010052",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "02010052",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "02010052",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "02010052",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "02010052",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "02010052",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "02010052",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "02010052",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "02010052",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "02010052",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23097531",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23097531",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23097531",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23097531",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23097531",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23097531",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23097531",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23097531",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23097531",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23097531",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23097531",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23097531",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23097531",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23097531",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23097531",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23097531",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23097531",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "CASABLANCA",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "MEDECINE PNEUMOLOGIE - PNEUMOLOGIE PHTISIOLOGIE HC\nDossier Patient\nDétails des patients",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "9 RUE ST FORCET Ville de résidence: BAYONNE",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9962010979652405
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9713833928108215
},
{
"original": "NOCENT-EJNAINI Cécilia Signé",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9962087273597717
},
{
"original": "NOCENT-EJNAINI Cécilia S",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.986583411693573
},
{
"original": "NOCENT-EJNAINI Cécilia Signé",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9768854975700378
},
{
"original": "NOCENT-EJNAINI Cécilia S",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9951922297477722
},
{
"original": "HYDROCORTISONE ROUSS 10MG",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9616023898124695
},
{
"original": "NOCENT-EJNAINI Cécilia Signé",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.99609375
},
{
"original": "HYDROCORTISONE ROUSS",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.997081995010376
},
{
"original": "NOCENT-EJNAINI Cécilia S",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9970112442970276
},
{
"original": "DAONIL",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9085171818733215
},
{
"original": "NOCENT-EJNAINI Cécilia Signé",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.962679922580719
},
{
"original": "NOCENT-EJNAINI Cécilia S",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9748985767364502
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9722592234611511
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9984172582626343
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9980573058128357
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9714818596839905
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9904401302337646
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9973537921905518
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9971930384635925
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9928559064865112
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9784151315689087
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9948411583900452
},
{
"original": "Cécilia INHAL",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9263215661048889
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9984602928161621
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9984579086303711
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9986034035682678
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9986034035682678
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9977575540542603
},
{
"original": "NICOPASS",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8170772790908813
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9986754059791565
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9985149502754211
},
{
"original": "NOCENT-EJNAINI Cécilia Signé",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9985886812210083
},
{
"original": "NOCENT-EJNAINI Cécilia Signé",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9859687089920044
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.8771359324455261
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9986799359321594
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.998671293258667
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9988832473754883
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.998837411403656
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9988554120063782
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9985926151275635
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9986005425453186
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9935305118560791
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9958094954490662
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9888385534286499
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.8378026485443115
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9974005222320557
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9972805976867676
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.994172215461731
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9901843667030334
},
{
"original": "Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9138231873512268
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.997428834438324
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9973223209381104
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.994253396987915
},
{
"original": "NOCENT-EJNAINI",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9850424528121948
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9986146688461304
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9986152052879333
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9978153109550476
},
{
"original": "NICOPASS",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8014658689498901
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9986839294433594
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9985370635986328
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9988048672676086
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9984458684921265
},
{
"original": "NOCENT-EJNAINI Cécilia",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9886226058006287
},
{
"original": "Alexandra Signé",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9970399737358093
},
{
"original": "Alexandra Signé",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9925352931022644
},
{
"original": "NICOPASS",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8102948069572449
},
{
"original": "NICOPATCHLIB",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9808580279350281
},
{
"original": "EJNAINI DR.",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9221240878105164
},
{
"original": "Sheila PELAEZ DEL",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9902235269546509
},
{
"original": "Pierre Rigaud",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.958838164806366
},
{
"original": "Père Lescat Jean",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.8382512331008911
},
{
"original": "Mère Lescat Monique",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9075232148170471
},
{
"original": "Lescat Jean",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9812251925468445
}
]
}