chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,63 @@
{
"source_file": "CRO 23127321.pdf",
"total_replacements": 23,
"regex_replacements": 11,
"ner_replacements": 9,
"sweep_replacements": 3,
"entities_found": [
{
"original": "ADRIEN",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.8564140200614929
},
{
"original": "HUBERT-ETCHEVERRY",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.994333803653717
},
{
"original": "Adrien HUBERT-ETCHEVERRY",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9954462051391602
},
{
"original": "ADRIEN",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9658953547477722
},
{
"original": "HUBERT-ETCHEVERRY",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9933086633682251
},
{
"original": "davier de Verbrugge",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9986215233802795
},
{
"original": "C. DERUY",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9953195452690125
},
{
"original": "ADRIEN",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.8724865317344666
},
{
"original": "HUBERT-ETCHEVERRY",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9950833320617676
}
]
}

View File

@@ -0,0 +1,747 @@
{
"source_file": "trackare-BA060655-23127321_BA060655_23127321.pdf",
"total_replacements": 1534,
"regex_replacements": 457,
"ner_replacements": 65,
"sweep_replacements": 1012,
"entities_found": [
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23127321",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "99999",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "99999",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "99999",
"replacement": "[LIEU_NAISS_2]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "BAYONNE",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "MEZZANO Thomas",
"replacement": "[MEDECIN_7]",
"source": "regex",
"category": "patient"
},
{
"original": "CHIRURGIE ORTHOPEDIQUE\nDossier Patient\nDétails des patients",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "HUBERT",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "contact"
},
{
"original": "Hubert",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "contact"
},
{
"original": "HUBERT-ETCHEVERRY",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "HUBERT-ETCHEVERRY",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "HUBERT-ETCHEVERRY",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "HUBERT-ETCHEVERRY",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "HUBERT-ETCHEVERRY",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "HUBERT-ETCHEVERRY",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "ADRIEN",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "ADRIEN",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "ADRIEN",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "ADRIEN",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "ADRIEN",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "ADRIEN",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "75 RUE DE SALON Ville de résidence: BIARRITZ",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "ELICAGARAY Arnaud",
"replacement": "[MEDECIN_8]",
"source": "ner",
"score": 0.9756957292556763
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.997791588306427
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9956895112991333
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9978934526443481
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9960446357727051
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9958305954933167
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9929541945457458
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9962515234947205
},
{
"original": "ELICAGARAY Arnaud",
"replacement": "[MEDECIN_8]",
"source": "ner",
"score": 0.9875360727310181
},
{
"original": "HERAUD Laura",
"replacement": "[SOIGNANT_19]",
"source": "ner",
"score": 0.9563214778900146
},
{
"original": "BESSONART Mélanie",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9334554672241211
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9954563975334167
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9988836050033569
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9953944087028503
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9882310628890991
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9631856679916382
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9923845529556274
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9911132454872131
},
{
"original": "BESSONART Mélanie",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9904767870903015
},
{
"original": "Admin",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8099757432937622
},
{
"original": "BESSONART Mélanie",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9644150733947754
},
{
"original": "Arnaud GEL",
"replacement": "[MEDECIN_8]",
"source": "ner",
"score": 0.9188685417175293
},
{
"original": "Arnaud",
"replacement": "[MEDECIN_8]",
"source": "ner",
"score": 0.9594337940216064
},
{
"original": "Arnaud",
"replacement": "[MEDECIN_8]",
"source": "ner",
"score": 0.9453325867652893
},
{
"original": "Charlotte",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.879085898399353
},
{
"original": "ALPRAZOLAM ARW",
"replacement": "[SOIGNANT_17]",
"source": "ner",
"score": 0.8476254343986511
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9971165060997009
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9962759613990784
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9957548379898071
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9962000250816345
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9955812096595764
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9984784722328186
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9956223368644714
},
{
"original": "HERAUD Laura",
"replacement": "[SOIGNANT_19]",
"source": "ner",
"score": 0.9767059683799744
},
{
"original": "HERAUD Laura",
"replacement": "[SOIGNANT_19]",
"source": "ner",
"score": 0.9677546620368958
},
{
"original": "BESSONART Mélanie",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9937002062797546
},
{
"original": "HERAUD Laura",
"replacement": "[SOIGNANT_19]",
"source": "ner",
"score": 0.9572766423225403
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9988096952438354
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9988200068473816
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9988221526145935
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9988301396369934
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9974544644355774
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9975453019142151
},
{
"original": "Valerie MANCICIDOR",
"replacement": "[MEDECIN_15]",
"source": "ner",
"score": 0.9972466826438904
},
{
"original": "BESSONART Mélanie",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9739809036254883
},
{
"original": "BESSONART Mélanie",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9892011880874634
},
{
"original": "COUTRIX Charlotte",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.8947831988334656
},
{
"original": "ELICAGARAY",
"replacement": "[SOIGNANT_16]",
"source": "ner",
"score": 0.9219810962677002
},
{
"original": "Laura VESSIE DE GLACE",
"replacement": "[SOIGNANT_19]",
"source": "ner",
"score": 0.9725115895271301
},
{
"original": "BASSIN : VIDER",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9347565770149231
},
{
"original": "Mélanie URINAL",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9988512992858887
},
{
"original": "VIDER",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.8573927879333496
},
{
"original": "BASSIN",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9606509208679199
},
{
"original": "Laura URINAL",
"replacement": "[SOIGNANT_19]",
"source": "ner",
"score": 0.9986175298690796
},
{
"original": "Mélanie",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.8487902879714966
},
{
"original": "Mélanie SOMMEIL",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.9542282819747925
},
{
"original": "Laura SOMMEIL",
"replacement": "[SOIGNANT_19]",
"source": "ner",
"score": 0.9588227272033691
},
{
"original": "Mélanie",
"replacement": "[SOIGNANT_18]",
"source": "ner",
"score": 0.8253406286239624
},
{
"original": "Charles DERUY",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9913523197174072
},
{
"original": "Amélie DEBIEZ",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9946175813674927
},
{
"original": "Kattina ARTOLA",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9614532589912415
},
{
"original": "Kattina ARTOLA",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9968552589416504
},
{
"original": "G Olatz",
"replacement": "[PERSONNE_6]",
"source": "ner",
"score": 0.9511386156082153
},
{
"original": "Patricia FRANCE",
"replacement": "[CONTACT_1]",
"source": "ner",
"score": 0.860893964767456
},
{
"original": "Laurent FRANCE",
"replacement": "[CONTACT_1]",
"source": "ner",
"score": 0.8227070569992065
}
]
}