chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,63 @@
{
"source_file": "BACTERIO 23018396.pdf",
"total_replacements": 21,
"regex_replacements": 20,
"ner_replacements": 1,
"sweep_replacements": 0,
"entities_found": [
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "GHRISSI Rabha",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "GHRISSI CHIRURGIE ORTHOPEDIQUE",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "15/12/1947",
"replacement": "[DATE_NAISS_1]",
"source": "regex",
"category": "date_naissance"
},
{
"original": "ANDRIAMIANDRASOA Justin\nReçu le",
"replacement": "[SOIGNANT_1]",
"source": "regex",
"category": "soignant"
},
{
"original": "ANDRIAMIANDRASOA Justin\nPrélevé le",
"replacement": "[SOIGNANT_1]",
"source": "regex",
"category": "soignant"
},
{
"original": "2300114426",
"replacement": "[IDENTIFIANT_1]",
"source": "regex",
"category": "identifiant"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "Jacques Loëb",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9892126321792603
}
]
}

View File

@@ -0,0 +1,21 @@
{
"source_file": "CRH 23018396.pdf",
"total_replacements": 14,
"regex_replacements": 8,
"ner_replacements": 2,
"sweep_replacements": 4,
"entities_found": [
{
"original": "GHRISSI",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.8391801714897156
},
{
"original": "GHRISSI Rabha",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9489634037017822
}
]
}

View File

@@ -0,0 +1,921 @@
{
"source_file": "trackare-23000862-23018396_23000862_23018396.pdf",
"total_replacements": 1100,
"regex_replacements": 341,
"ner_replacements": 100,
"sweep_replacements": 659,
"entities_found": [
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23000862",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23018396",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "COURCELLES LES LENS",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "CHIRURGIE ORTHOPEDIQUE\nDossier Patient\nDétails des patients",
"replacement": "[PATIENT_2]",
"source": "regex",
"category": "patient"
},
{
"original": "80 IMPASSES DES CHENES Ville de résidence: SERRESLOUS ET ARRIBANS",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "GUILLEMAUD",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9750199317932129
},
{
"original": "PONTIER Bénédicte",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9797468185424805
},
{
"original": "ONTIER Bénédicte Glyc",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.8101521134376526
},
{
"original": "Glyc",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.8814635872840881
},
{
"original": "PONTIER Bénédicte",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9834274649620056
},
{
"original": "PONTIER Bénédicte Glyc",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9550154209136963
},
{
"original": "VIDER",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9266887903213501
},
{
"original": "BASSIN",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9854373335838318
},
{
"original": "URINAL",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9778075218200684
},
{
"original": "VIDER",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9220559597015381
},
{
"original": "BASSIN",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.97491455078125
},
{
"original": "URINAL",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9450704455375671
},
{
"original": "HERAUD Laura S",
"replacement": "[SOIGNANT_19]",
"source": "ner",
"score": 0.9259770512580872
},
{
"original": "VIDER",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9456329941749573
},
{
"original": "BASSIN",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9820137023925781
},
{
"original": "URINAL",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9773168563842773
},
{
"original": "VIDER",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9329472780227661
},
{
"original": "BASSIN",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9764602780342102
},
{
"original": "URINAL",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9670381546020508
},
{
"original": "BASSIN",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9250944256782532
},
{
"original": "URINAL",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9028052687644958
},
{
"original": "BASSIN",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9313097596168518
},
{
"original": "URINAL",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9236277937889099
},
{
"original": "PONTIER Bénédicte",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9595574736595154
},
{
"original": "PONTIER Bénédicte",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9696699976921082
},
{
"original": "PONTIER Bénédicte",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9638180136680603
},
{
"original": "PONTIER Bénédicte",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9844565987586975
},
{
"original": "PONTIER Bénédicte",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9758985638618469
},
{
"original": "PONTIER Bénédicte",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9616854190826416
},
{
"original": "PONTIER Bénédicte",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9423827528953552
},
{
"original": "PONTIER Bénédicte",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.8611497282981873
},
{
"original": "PIPER/TAZOB",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9045613408088684
},
{
"original": "Glyc",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9683322310447693
},
{
"original": "PONTIER Bénédicte",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9738919734954834
},
{
"original": "PONTIER Bénédicte Glyc",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9959189891815186
},
{
"original": "PONTIER Bénédicte Glyc",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9963873624801636
},
{
"original": "KT COURT",
"replacement": "[PERSONNE_6]",
"source": "ner",
"score": 0.8363161087036133
},
{
"original": "KT COURT",
"replacement": "[PERSONNE_6]",
"source": "ner",
"score": 0.8190720081329346
},
{
"original": "VIDER",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9266887903213501
},
{
"original": "BASSIN",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9854373335838318
},
{
"original": "URINAL",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9778075218200684
},
{
"original": "VIDER",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9220559597015381
},
{
"original": "BASSIN",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.97491455078125
},
{
"original": "URINAL",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9450704455375671
},
{
"original": "VIDER",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9268276691436768
},
{
"original": "BASSIN",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9858763217926025
},
{
"original": "URINAL",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.978825569152832
},
{
"original": "VIDER",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9235278367996216
},
{
"original": "BASSIN",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9759274125099182
},
{
"original": "URINAL",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9481039643287659
},
{
"original": "VIDER",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9495140910148621
},
{
"original": "BASSIN",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9844425320625305
},
{
"original": "URINAL",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9708201289176941
},
{
"original": "VIDER",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9253542423248291
},
{
"original": "BASSIN",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.972852885723114
},
{
"original": "URINAL",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9368593096733093
},
{
"original": "CAMANO ALVARADO Ana Cécilia",
"replacement": "[SOIGNANT_15]",
"source": "ner",
"score": 0.9943796396255493
},
{
"original": "ALVARADO Ana Cécilia S",
"replacement": "[SOIGNANT_15]",
"source": "ner",
"score": 0.9360173344612122
},
{
"original": "ALVARADO Ana Cécilia Signé",
"replacement": "[SOIGNANT_15]",
"source": "ner",
"score": 0.9763692617416382
},
{
"original": "ALVARADO Ana Cécilia Signé",
"replacement": "[SOIGNANT_15]",
"source": "ner",
"score": 0.9481626749038696
},
{
"original": "ALVARADO Ana Cécilia Signé",
"replacement": "[SOIGNANT_15]",
"source": "ner",
"score": 0.9958453178405762
},
{
"original": "ALVARADO Ana Cécilia Signé",
"replacement": "[SOIGNANT_15]",
"source": "ner",
"score": 0.9773902893066406
},
{
"original": "ANDRIAMIANDRASOA Justin",
"replacement": "[SOIGNANT_17]",
"source": "ner",
"score": 0.9974877238273621
},
{
"original": "PONTIER Bénédicte",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9474138617515564
},
{
"original": "PONTIER Bénédicte",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.958077609539032
},
{
"original": "PONTIER Bénédicte",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.8145690560340881
},
{
"original": "PONTIER Bénédicte S",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9768133759498596
},
{
"original": "PONTIER Bénédicte",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.8819968104362488
},
{
"original": "PIPER/TAZOB",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.8553078174591064
},
{
"original": "PONTIER Bénédicte",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.8854700922966003
},
{
"original": "PIPER/TAZOB",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.8576228022575378
},
{
"original": "Christian",
"replacement": "[PERSONNE_7]",
"source": "ner",
"score": 0.8244688510894775
},
{
"original": "Christian Signé",
"replacement": "[PERSONNE_7]",
"source": "ner",
"score": 0.9928109049797058
},
{
"original": "Christian Signé",
"replacement": "[PERSONNE_7]",
"source": "ner",
"score": 0.9431058764457703
},
{
"original": "Christian Signé",
"replacement": "[PERSONNE_7]",
"source": "ner",
"score": 0.9618098139762878
},
{
"original": "Bénédicte",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.8006708025932312
},
{
"original": "Sophie KT COURT",
"replacement": "[SOIGNANT_5]",
"source": "ner",
"score": 0.8544895052909851
},
{
"original": "Laura VESSIE DE GLACE",
"replacement": "[SOIGNANT_19]",
"source": "ner",
"score": 0.9929372072219849
},
{
"original": "Laure VESSIE DE GLACE",
"replacement": "[SOIGNANT_19]",
"source": "ner",
"score": 0.9964620471000671
},
{
"original": "Laura URINAL",
"replacement": "[SOIGNANT_19]",
"source": "ner",
"score": 0.940993070602417
},
{
"original": "Karine Signé",
"replacement": "[SOIGNANT_20]",
"source": "ner",
"score": 0.990338146686554
},
{
"original": "Laure Signé",
"replacement": "[PERSONNE_8]",
"source": "ner",
"score": 0.9961590766906738
},
{
"original": "Laure Signé",
"replacement": "[PERSONNE_8]",
"source": "ner",
"score": 0.9931009411811829
},
{
"original": "Laure Signé",
"replacement": "[PERSONNE_8]",
"source": "ner",
"score": 0.9972451329231262
},
{
"original": "Ana Cécilia",
"replacement": "[SOIGNANT_15]",
"source": "ner",
"score": 0.8694664835929871
},
{
"original": "Laure Signé",
"replacement": "[PERSONNE_8]",
"source": "ner",
"score": 0.9028523564338684
},
{
"original": "Justin",
"replacement": "[SOIGNANT_17]",
"source": "ner",
"score": 0.9666572213172913
},
{
"original": "Laure Signé",
"replacement": "[PERSONNE_8]",
"source": "ner",
"score": 0.9431261420249939
},
{
"original": "Laure",
"replacement": "[ADRESSE_3]",
"source": "ner",
"score": 0.8310956358909607
},
{
"original": "Laure Signé",
"replacement": "[PERSONNE_8]",
"source": "ner",
"score": 0.8953642845153809
},
{
"original": "Justin ANDRIAMIANDRASOA",
"replacement": "[SOIGNANT_17]",
"source": "ner",
"score": 0.998911440372467
},
{
"original": "Justin ANDRIAMIANDRASOA",
"replacement": "[SOIGNANT_17]",
"source": "ner",
"score": 0.9989106059074402
},
{
"original": "Justin ANDRIAMIANDRASOA",
"replacement": "[SOIGNANT_17]",
"source": "ner",
"score": 0.9988903403282166
},
{
"original": "SERING",
"replacement": "[PERSONNE_9]",
"source": "ner",
"score": 0.8193433284759521
},
{
"original": "Christian SERINGUE",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9553429484367371
},
{
"original": "P. Bruneteau",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9861077070236206
},
{
"original": "P. Bruneteau",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9722352027893066
},
{
"original": "Redon",
"replacement": "[PERSONNE_10]",
"source": "ner",
"score": 0.8905139565467834
},
{
"original": "bruneteau",
"replacement": "[MEDECIN_1]",
"source": "ner",
"score": 0.9771625399589539
},
{
"original": "Quincke",
"replacement": "[PERSONNE_11]",
"source": "ner",
"score": 0.8849055767059326
}
]
}