chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,93 @@
{
"source_file": "BACTERIO 23099769.pdf",
"total_replacements": 28,
"regex_replacements": 23,
"ner_replacements": 4,
"sweep_replacements": 1,
"entities_found": [
{
"original": "17023408",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "GUERY Michel",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "GUERY Michel",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "19/01/1948",
"replacement": "[DATE_NAISS_1]",
"source": "regex",
"category": "date_naissance"
},
{
"original": "19/01/1948",
"replacement": "[DATE_NAISS_1]",
"source": "regex",
"category": "date_naissance"
},
{
"original": "CARRERA Marion\nReçu le",
"replacement": "[SOIGNANT_1]",
"source": "regex",
"category": "soignant"
},
{
"original": "LEROY Alexandre\nPrélevé le",
"replacement": "[SOIGNANT_2]",
"source": "regex",
"category": "soignant"
},
{
"original": "2300113363",
"replacement": "[IDENTIFIANT_1]",
"source": "regex",
"category": "identifiant"
},
{
"original": "2300113363",
"replacement": "[IDENTIFIANT_1]",
"source": "regex",
"category": "identifiant"
},
{
"original": "23002477",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "GUERY",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9778995513916016
},
{
"original": "JAOUEN",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.922277569770813
},
{
"original": "GUERY",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9589006304740906
},
{
"original": "Jacques Loëb",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9900330901145935
}
]
}

View File

@@ -0,0 +1,447 @@
{
"source_file": "trackare-17023408-23099769_17023408_23099769.pdf",
"total_replacements": 434,
"regex_replacements": 154,
"ner_replacements": 41,
"sweep_replacements": 239,
"entities_found": [
{
"original": "17023408",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17023408",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17023408",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17023408",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17023408",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17023408",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17023408",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17023408",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17023408",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17023408",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17023408",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17023408",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17023408",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "17023408",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23099769",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23099769",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23099769",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23099769",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23099769",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23099769",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23099769",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23099769",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23099769",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23099769",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23099769",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23099769",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23099769",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23099769",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "MARMANDE",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "MEDECINE HEMATOLOGIE CLINIQUE - HEMATOLOGIE CLINIQUE\nDossier Patient\nDétails des patients",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "SAINTE SUZANNE",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "CAPDUPUY Claudie DEBUT",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.979243278503418
},
{
"original": "Patricia URRUTIA- IRIBARREN(DOUSSEN",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9973158240318298
},
{
"original": "Patricia URRUTIA- IRIBARREN(DOUSSEN",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9988004565238953
},
{
"original": "Patricia URRUTIA- IRIBARREN(DOUSSEN",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9986621141433716
},
{
"original": "Patricia URRUTIA- IRIBARREN(DOUSSEN",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9988299608230591
},
{
"original": "Patricia URRUTIA- IRIBARREN(DOUSSEN",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.998656153678894
},
{
"original": "Patricia URRUTIA- IRIBARREN(DOUSSEN",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.997369647026062
},
{
"original": "Patricia URRUTIA- IRIBARREN(DOUSSEN",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9962088465690613
},
{
"original": "Patricia URRUTIA- IRIBARREN(DOUSSEN",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9983986020088196
},
{
"original": "Patricia URRUTIA- IRIBARREN(DOUSSEN",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9978131055831909
},
{
"original": "Patricia URRUTIA",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9792571067810059
},
{
"original": "CAPDUPUY Claudie",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.8859272003173828
},
{
"original": "ATOVAQUONE",
"replacement": "[MEDECIN_7]",
"source": "ner",
"score": 0.9214233160018921
},
{
"original": "CAPDUPUY Claudie",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9753637313842773
},
{
"original": "CAPDUPUY Claudie DEBUT",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9830524325370789
},
{
"original": "Patricia URRUTIA- IRIBARREN(DOUSSEN",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9987625479698181
},
{
"original": "Patricia URRUTIA- IRIBARREN(DOUSSEN",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9984529614448547
},
{
"original": "Patricia URRUTIA- IRIBARREN(DOUSSEN",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9979394674301147
},
{
"original": "Patricia URRUTIA- IRIBARREN(DOUSSEN",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9956426620483398
},
{
"original": "Patricia URRUTIA- IRIBARREN(DOUSSEN",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9874882698059082
},
{
"original": "Patricia URRUTIA- IRIBARREN(DOUSSEN",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9953193068504333
},
{
"original": "Patricia URRUTIA- IRIBARREN(DOUSSEN",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9980379343032837
},
{
"original": "Patricia URRUTIA- IRIBARREN(DOUSSEN",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9976900815963745
},
{
"original": "Patricia URRUTIA- IRIBARREN(DOUSSEN",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9974011182785034
},
{
"original": "Patricia URRUTIA- IRIBARREN(DOUSSEN)",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9726247787475586
},
{
"original": "ATOVAQUONE",
"replacement": "[MEDECIN_7]",
"source": "ner",
"score": 0.836534321308136
},
{
"original": "CAPDUPUY Claudie",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9570740461349487
},
{
"original": "CAPDUPUY Claudie",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9464332461357117
},
{
"original": "CAPDUPUY Claudie",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9220138192176819
},
{
"original": "CAPDUPUY Claudie",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9847120046615601
},
{
"original": "CAPDUPUY",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.8203221559524536
},
{
"original": "CAPDUPUY",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.8287162184715271
},
{
"original": "CAPDUPUY",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.8266900181770325
},
{
"original": "Delphine",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9541760087013245
},
{
"original": "Delphine GELULE",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9893893599510193
},
{
"original": "MARTINEAU DR.",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9066335558891296
},
{
"original": "Monsieur",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8446347117424011
},
{
"original": "Mathilde CLAUSE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.963344395160675
},
{
"original": "hubert",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9768051505088806
},
{
"original": "Capdupuy",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9811515212059021
},
{
"original": "Vanessa",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.8314415216445923
}
]
}