chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,63 @@
{
"source_file": "BACTERIO 23102610.pdf",
"total_replacements": 22,
"regex_replacements": 19,
"ner_replacements": 2,
"sweep_replacements": 1,
"entities_found": [
{
"original": "01290152",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "BRONSWICK Gildas",
"replacement": "[PATIENT_1]",
"source": "regex",
"category": "patient"
},
{
"original": "30/07/1950",
"replacement": "[DATE_NAISS_1]",
"source": "regex",
"category": "date_naissance"
},
{
"original": "BARGAIN Beatrice\nReçu le",
"replacement": "[SOIGNANT_1]",
"source": "regex",
"category": "soignant"
},
{
"original": "GSCHWIND Marion\nPrélevé le",
"replacement": "[SOIGNANT_2]",
"source": "regex",
"category": "soignant"
},
{
"original": "2300119413",
"replacement": "[IDENTIFIANT_1]",
"source": "regex",
"category": "identifiant"
},
{
"original": "23102610",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "BRONSWICK",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9041751623153687
},
{
"original": "Jacques Loëb",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9833078384399414
}
]
}

View File

@@ -0,0 +1,183 @@
{
"source_file": "CRH 23102610.pdf",
"total_replacements": 414,
"regex_replacements": 358,
"ner_replacements": 26,
"sweep_replacements": 30,
"entities_found": [
{
"original": "10100402527",
"replacement": "[CODE_BARRE_1]",
"source": "regex",
"category": "code_barre"
},
{
"original": "10100402527",
"replacement": "[CODE_BARRE_1]",
"source": "regex",
"category": "code_barre"
},
{
"original": "10100402527",
"replacement": "[CODE_BARRE_1]",
"source": "regex",
"category": "code_barre"
},
{
"original": "PR. LOISEAU Hugues",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8880854249000549
},
{
"original": "VAREIL",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.857205331325531
},
{
"original": "J. LOEB",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9572899341583252
},
{
"original": "Matutes",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9746289253234863
},
{
"original": "J. LOEB",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9518731832504272
},
{
"original": "Pr Loiseau",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8719029426574707
},
{
"original": "Janumet",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9500665068626404
},
{
"original": "BRONSWICK Gildas",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.985684871673584
},
{
"original": "Ch",
"replacement": "[PERSONNE_6]",
"source": "ner",
"score": 0.8453240394592285
},
{
"original": "AMELIE RABA LEON",
"replacement": "[PERSONNE_7]",
"source": "ner",
"score": 0.8604055047035217
},
{
"original": "PR. LOISEAU Hugues",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8880854249000549
},
{
"original": "VAREIL",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.857205331325531
},
{
"original": "J. LOEB",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9572899341583252
},
{
"original": "Matutes",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9746289253234863
},
{
"original": "J. LOEB",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9518731832504272
},
{
"original": "Pr Loiseau",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8719029426574707
},
{
"original": "Janumet",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9500665068626404
},
{
"original": "BRONSWICK Gildas",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.985684871673584
},
{
"original": "PR. LOISEAU Hugues",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8880854249000549
},
{
"original": "VAREIL",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.857205331325531
},
{
"original": "J. LOEB",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9572899341583252
},
{
"original": "Matutes",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9746289253234863
},
{
"original": "J. LOEB",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9518731832504272
},
{
"original": "Pr Loiseau",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.8719029426574707
},
{
"original": "BRONSWICK Gildas",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9899542927742004
},
{
"original": "BRONSWICK GILDAS",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9979207515716553
}
]
}

View File

@@ -0,0 +1,573 @@
{
"source_file": "trackare-01290152-23102610_01290152_23102610.pdf",
"total_replacements": 336,
"regex_replacements": 159,
"ner_replacements": 57,
"sweep_replacements": 120,
"entities_found": [
{
"original": "01290152",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "01290152",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "01290152",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "01290152",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "01290152",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "01290152",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "01290152",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "01290152",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "01290152",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "01290152",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "01290152",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "01290152",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "01290152",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "01290152",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "01290152",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "01290152",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "01290152",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23102610",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102610",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102610",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102610",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102610",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102610",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102610",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102610",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102610",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102610",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102610",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102610",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102610",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102610",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102610",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102610",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23102610",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "CHAMPIGNY-SUR-MARNE",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "24 AVENUE DE LA BAIE DE TXIGUNDI Ville de résidence: HENDAYE",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "JAOUEN",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9871866703033447
},
{
"original": "GLYCEMIE CAPILLAIRE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9979681968688965
},
{
"original": "GLYCEMIE CAPILLAIRE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9969624876976013
},
{
"original": "GLYCEMIE CAPILLAIRE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.953896164894104
},
{
"original": "BRAYER Céline",
"replacement": "[SOIGNANT_5]",
"source": "ner",
"score": 0.979704737663269
},
{
"original": "GLYCEMIE CAPILLAIRE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9962416291236877
},
{
"original": "GLYCEMIE CAPILLAIRE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9960997700691223
},
{
"original": "GLYCEMIE CAPILLAIRE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.810707151889801
},
{
"original": "Camille PATIENT",
"replacement": "[MEDECIN_7]",
"source": "ner",
"score": 0.9097305536270142
},
{
"original": "Marie",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.9106920957565308
},
{
"original": "Marie Signé",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.9363949298858643
},
{
"original": "Marie",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.9444496035575867
},
{
"original": "BRAYER Céline",
"replacement": "[SOIGNANT_5]",
"source": "ner",
"score": 0.9500954747200012
},
{
"original": "GLYCEMIE CAPILLAIRE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9979681968688965
},
{
"original": "GLYCEMIE CAPILLAIRE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9969624876976013
},
{
"original": "GLYCEMIE CAPILLAIRE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9648771286010742
},
{
"original": "BRAYER Céline",
"replacement": "[SOIGNANT_5]",
"source": "ner",
"score": 0.979671061038971
},
{
"original": "GLYCEMIE CAPILLAIRE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9962416291236877
},
{
"original": "GLYCEMIE CAPILLAIRE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9960997700691223
},
{
"original": "GLYCEMIE CAPILLAIRE",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.810707151889801
},
{
"original": "Camille PATIENT",
"replacement": "[MEDECIN_7]",
"source": "ner",
"score": 0.9126949906349182
},
{
"original": "Marie",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.9111253619194031
},
{
"original": "Marie Signé",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.9348947405815125
},
{
"original": "Marie",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.9404518008232117
},
{
"original": "BRAYER Céline",
"replacement": "[SOIGNANT_5]",
"source": "ner",
"score": 0.9725162982940674
},
{
"original": "Marie",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.8638518452644348
},
{
"original": "Camille OZEMPIC",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.8133776783943176
},
{
"original": "Marie ORALE",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.9380481243133545
},
{
"original": "Marie",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.8897591233253479
},
{
"original": "Marie",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.9703838229179382
},
{
"original": "BGA Camille",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9406434893608093
},
{
"original": "Marie",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.9764019250869751
},
{
"original": "MAJCHRZAK Camille",
"replacement": "[MEDECIN_8]",
"source": "ner",
"score": 0.9779800772666931
},
{
"original": "Marie",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.9774014949798584
},
{
"original": "Camille Marie MAJCHRZAK",
"replacement": "[MEDECIN_8]",
"source": "ner",
"score": 0.9440662860870361
},
{
"original": "Camille Marie",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.9739792346954346
},
{
"original": "MAJCHRZAK",
"replacement": "[MEDECIN_8]",
"source": "ner",
"score": 0.9522646069526672
},
{
"original": "Camille Marie",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.9486829042434692
},
{
"original": "Camille Marie",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.9008316397666931
},
{
"original": "Camille Marie",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.8720721006393433
},
{
"original": "Camille Marie",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.8382924795150757
},
{
"original": "Camille Marie",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.9084925651550293
},
{
"original": "Camille Marie",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.9797958135604858
},
{
"original": "Camille Marie",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.977617084980011
},
{
"original": "Camille Marie",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.9789612293243408
},
{
"original": "Camille Marie",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.9875625371932983
},
{
"original": "Bronswick",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9406649470329285
},
{
"original": "Loiseau",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9950379133224487
},
{
"original": "Janumet",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.8945621848106384
},
{
"original": "Loiseau",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9947525858879089
},
{
"original": "Matutes",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.9717926979064941
},
{
"original": "Pr Loiseau",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.8556192517280579
},
{
"original": "Camille Marie",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.9073517322540283
},
{
"original": "Loiseau",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9895035624504089
},
{
"original": "Camille Marie",
"replacement": "[SOIGNANT_3]",
"source": "ner",
"score": 0.920089840888977
},
{
"original": "LAGNAOUI",
"replacement": "[PERSONNE_6]",
"source": "ner",
"score": 0.8377308249473572
},
{
"original": "J. LOEB",
"replacement": "[PERSONNE_7]",
"source": "ner",
"score": 0.8426556587219238
}
]
}