chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,27 @@
{
"source_file": "CRO 23028431.pdf",
"total_replacements": 11,
"regex_replacements": 6,
"ner_replacements": 3,
"sweep_replacements": 2,
"entities_found": [
{
"original": "Hemolock",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.935861349105835
},
{
"original": "NATHALIE ABELLARD",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9692195653915405
},
{
"original": "EYQUEM",
"replacement": "[ADRESSE_1]",
"source": "ner",
"score": 0.9260977506637573
}
]
}

View File

@@ -0,0 +1,507 @@
{
"source_file": "trackare-05010445-23028431_05010445_23028431.pdf",
"total_replacements": 390,
"regex_replacements": 172,
"ner_replacements": 48,
"sweep_replacements": 170,
"entities_found": [
{
"original": "05010445",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05010445",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05010445",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05010445",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05010445",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05010445",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05010445",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05010445",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05010445",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05010445",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05010445",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05010445",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05010445",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05010445",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05010445",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "05010445",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23028431",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23028431",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23028431",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23028431",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23028431",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23028431",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23028431",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23028431",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23028431",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23028431",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23028431",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23028431",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23028431",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23028431",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23028431",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23028431",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "DAX",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "240 RUE CALAMARDIN Ville de résidence: BIDART",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "EYQUEM ABELLARD NATHALIE",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9213985800743103
},
{
"original": "JAOUEN BURTIN",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9973164200782776
},
{
"original": "CAZENAVE Xabi Quentin Réalisé",
"replacement": "[SOIGNANT_12]",
"source": "ner",
"score": 0.9822101593017578
},
{
"original": "BONNEBAS Pascale",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9878706932067871
},
{
"original": "BONNEBAS Pascale",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9819172620773315
},
{
"original": "CARRAZÉ Anaïs",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9236248135566711
},
{
"original": "CARRAZÉ Anaïs",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.921068549156189
},
{
"original": "BONNEBAS Pascale",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.961296558380127
},
{
"original": "ONDANSETRON A",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.8667423129081726
},
{
"original": "BONNEBAS Pascale",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9440414309501648
},
{
"original": "Quentin",
"replacement": "[SOIGNANT_12]",
"source": "ner",
"score": 0.8268662691116333
},
{
"original": "Quentin",
"replacement": "[SOIGNANT_12]",
"source": "ner",
"score": 0.8215237855911255
},
{
"original": "Quentin Réalisé",
"replacement": "[SOIGNANT_12]",
"source": "ner",
"score": 0.8149619102478027
},
{
"original": "Quentin Réalisé",
"replacement": "[SOIGNANT_12]",
"source": "ner",
"score": 0.8780404329299927
},
{
"original": "Quentin",
"replacement": "[SOIGNANT_12]",
"source": "ner",
"score": 0.935181200504303
},
{
"original": "JOEST Loïc",
"replacement": "[SOIGNANT_13]",
"source": "ner",
"score": 0.9819827079772949
},
{
"original": "BONNEBAS Pascale",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.8736136555671692
},
{
"original": "BONNEBAS Pascale",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9732561111450195
},
{
"original": "BONNEBAS Pascale",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9717990756034851
},
{
"original": "KETOPROF",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.8650211095809937
},
{
"original": "KETOPROF",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.8660323023796082
},
{
"original": "CARRAZÉ Anaïs",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9603657722473145
},
{
"original": "CARRAZÉ Anaïs",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9228758811950684
},
{
"original": "SOL BONNEBAS Pascale",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9661476016044617
},
{
"original": "SOL BONNEBAS Pascale",
"replacement": "[MEDECIN_4]",
"source": "ner",
"score": 0.9452807903289795
},
{
"original": "Loïc JOEST",
"replacement": "[SOIGNANT_13]",
"source": "ner",
"score": 0.9974126219749451
},
{
"original": "Loïc JOEST",
"replacement": "[SOIGNANT_13]",
"source": "ner",
"score": 0.9966721534729004
},
{
"original": "Xabi Quentin",
"replacement": "[SOIGNANT_12]",
"source": "ner",
"score": 0.9910724759101868
},
{
"original": "Loïc JOEST",
"replacement": "[SOIGNANT_13]",
"source": "ner",
"score": 0.9970163702964783
},
{
"original": "Xabi Quentin",
"replacement": "[SOIGNANT_12]",
"source": "ner",
"score": 0.9935845732688904
},
{
"original": "Loïc JOEST",
"replacement": "[SOIGNANT_13]",
"source": "ner",
"score": 0.9967995285987854
},
{
"original": "Loïc JOEST",
"replacement": "[SOIGNANT_13]",
"source": "ner",
"score": 0.9970676898956299
},
{
"original": "Loïc JOEST",
"replacement": "[SOIGNANT_13]",
"source": "ner",
"score": 0.9965763688087463
},
{
"original": "Loïc JOEST",
"replacement": "[SOIGNANT_13]",
"source": "ner",
"score": 0.9028632640838623
},
{
"original": "Loïc JOEST",
"replacement": "[SOIGNANT_13]",
"source": "ner",
"score": 0.9940513968467712
},
{
"original": "Loïc JOEST",
"replacement": "[SOIGNANT_13]",
"source": "ner",
"score": 0.9956267476081848
},
{
"original": "Anaïs",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.82716304063797
},
{
"original": "Anaïs CARRAZÉ",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9918802976608276
},
{
"original": "Anaïs CARRAZÉ",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9987406730651855
},
{
"original": "Anaïs CARRAZÉ",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9987810850143433
},
{
"original": "Anaïs CARRAZÉ",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9988628625869751
},
{
"original": "Anaïs CARRAZÉ",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9988614916801453
},
{
"original": "Anaïs CARRAZÉ",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9987770318984985
},
{
"original": "Anaïs CARRAZÉ",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9984853863716125
},
{
"original": "Anaïs CARRAZÉ",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9984027147293091
},
{
"original": "Anaïs CARRAZÉ",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.998410701751709
},
{
"original": "Anaïs CARRAZÉ",
"replacement": "[SOIGNANT_6]",
"source": "ner",
"score": 0.9639517664909363
},
{
"original": "eyquem hervé",
"replacement": "[PATIENT_1]",
"source": "ner",
"score": 0.9695360064506531
}
]
}