chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,63 @@
{
"source_file": "BACTERIO 23138778.pdf",
"total_replacements": 21,
"regex_replacements": 19,
"ner_replacements": 2,
"sweep_replacements": 0,
"entities_found": [
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "ALEXANDRE Patrick",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "21/04/1956",
"replacement": "[DATE_NAISS_1]",
"source": "regex",
"category": "date_naissance"
},
{
"original": "DALLEMANE Carine\nReçu le",
"replacement": "[SOIGNANT_1]",
"source": "regex",
"category": "soignant"
},
{
"original": "VILLETTE Paul-Emile\nPrélevé le",
"replacement": "[SOIGNANT_2]",
"source": "regex",
"category": "soignant"
},
{
"original": "2300157421",
"replacement": "[IDENTIFIANT_1]",
"source": "regex",
"category": "identifiant"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "ALEXANDRE",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9154006242752075
},
{
"original": "Jacques Loëb",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9813205599784851
}
]
}

View File

@@ -0,0 +1,339 @@
{
"source_file": "CRH 23138778.pdf",
"total_replacements": 161,
"regex_replacements": 153,
"ner_replacements": 3,
"sweep_replacements": 5,
"entities_found": [
{
"original": "10110601324",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101096005",
"replacement": "[RPPS_2]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100163277",
"replacement": "[RPPS_3]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101639580",
"replacement": "[RPPS_4]",
"source": "regex",
"category": "rpps"
},
{
"original": "10004606595",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101480506",
"replacement": "[RPPS_6]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100858090",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102268702",
"replacement": "[RPPS_8]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102272209",
"replacement": "[RPPS_9]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101856135",
"replacement": "[RPPS_10]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002815024",
"replacement": "[RPPS_11]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100817005",
"replacement": "[RPPS_12]",
"source": "regex",
"category": "rpps"
},
{
"original": "10110601324",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101096005",
"replacement": "[RPPS_2]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100163277",
"replacement": "[RPPS_3]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101639580",
"replacement": "[RPPS_4]",
"source": "regex",
"category": "rpps"
},
{
"original": "10004606595",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101480506",
"replacement": "[RPPS_6]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100858090",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102268702",
"replacement": "[RPPS_8]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102272209",
"replacement": "[RPPS_9]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101856135",
"replacement": "[RPPS_10]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002815024",
"replacement": "[RPPS_11]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100817005",
"replacement": "[RPPS_12]",
"source": "regex",
"category": "rpps"
},
{
"original": "10110601324",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101096005",
"replacement": "[RPPS_2]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100163277",
"replacement": "[RPPS_3]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101639580",
"replacement": "[RPPS_4]",
"source": "regex",
"category": "rpps"
},
{
"original": "10004606595",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101480506",
"replacement": "[RPPS_6]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100858090",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102268702",
"replacement": "[RPPS_8]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102272209",
"replacement": "[RPPS_9]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101856135",
"replacement": "[RPPS_10]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002815024",
"replacement": "[RPPS_11]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100817005",
"replacement": "[RPPS_12]",
"source": "regex",
"category": "rpps"
},
{
"original": "10110601324",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101096005",
"replacement": "[RPPS_2]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100163277",
"replacement": "[RPPS_3]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101639580",
"replacement": "[RPPS_4]",
"source": "regex",
"category": "rpps"
},
{
"original": "10004606595",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101480506",
"replacement": "[RPPS_6]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100858090",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102268702",
"replacement": "[RPPS_8]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102272209",
"replacement": "[RPPS_9]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002815024",
"replacement": "[RPPS_11]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100817005",
"replacement": "[RPPS_12]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100858090",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "code_barre"
},
{
"original": "secr.medint@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.medint@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.medint@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.medint@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "LOPEZ",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9977224469184875
},
{
"original": "J. LOEB",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8251490592956543
},
{
"original": "ALEXANDRE Patrick",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9276596307754517
}
]
}

View File

@@ -0,0 +1,567 @@
{
"source_file": "trackare-05025000-23138778_05025000_23138778.pdf",
"total_replacements": 372,
"regex_replacements": 192,
"ner_replacements": 35,
"sweep_replacements": 145,
"entities_found": [
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05025000",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23138778",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "156049935079880",
"replacement": "[NIR_1]",
"source": "regex",
"category": "nir"
},
{
"original": "156049935079880",
"replacement": "[NIR_1]",
"source": "regex",
"category": "nir"
},
{
"original": "156049935079880",
"replacement": "[NIR_1]",
"source": "regex",
"category": "nir"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "99350",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "99350",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "CASABLANCA",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "ALEXANDRE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "ALEXANDRE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "ALEXANDRE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "ALEXANDRE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "PATRICK",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "PATRICK",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "PATRICK",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "PATRICK",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "23 RUE DE SULLY Ville de résidence: BIARRITZ",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "BURTIN",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9912121891975403
},
{
"original": "PATRICK",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9818107485771179
},
{
"original": "PATRICK",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9760265946388245
},
{
"original": "ALEXANDRE",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9843394160270691
},
{
"original": "ALEXANDRE Nom",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9356931447982788
},
{
"original": "ANNER THORACO-ABDOMINO-PELVIEN",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9373758435249329
},
{
"original": "VILLETTE Paul-Emile Signé",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9482550024986267
},
{
"original": "VILLETTE Paul-Emile",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9859574437141418
},
{
"original": "Bernadette SOMMEIL",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.8867559432983398
},
{
"original": "Bernadette",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9193010330200195
},
{
"original": "Bernadette",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.8616361021995544
},
{
"original": "Paul-Emile A",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.8506852388381958
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9982785582542419
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9982942342758179
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.998555064201355
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9929669499397278
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9986178278923035
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9870741963386536
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9980136156082153
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9976998567581177
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9980810880661011
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9970659017562866
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9978504180908203
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9979763627052307
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9982691407203674
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9982608556747437
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.998308002948761
},
{
"original": "Paul-Emile VILLETTE",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9976710081100464
},
{
"original": "Maria Del",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9500057101249695
},
{
"original": "Madame Sabine",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.8413289189338684
},
{
"original": "Madame Sabine",
"replacement": "[MEDECIN_14]",
"source": "ner",
"score": 0.8126353025436401
},
{
"original": "Paul-Emile",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9236652851104736
},
{
"original": "LOPEZ",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9946433901786804
},
{
"original": "LOPEZ",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9898214936256409
},
{
"original": "Alexis HAMON",
"replacement": "[PERSONNE_6]",
"source": "ner",
"score": 0.991778552532196
}
]
}