chore: mise à jour output pipeline (anonymized + structured)

Résultats de re-traitement pipeline v2 sur 261 dossiers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-03-07 23:14:42 +01:00
parent c73515ac89
commit 13fe9fa666
734 changed files with 157158 additions and 304963 deletions

View File

@@ -0,0 +1,159 @@
{
"source_file": "CRH 23056749.pdf",
"total_replacements": 81,
"regex_replacements": 79,
"ner_replacements": 0,
"sweep_replacements": 2,
"entities_found": [
{
"original": "10110601324",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101096005",
"replacement": "[RPPS_2]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100163277",
"replacement": "[RPPS_3]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101639580",
"replacement": "[RPPS_4]",
"source": "regex",
"category": "rpps"
},
{
"original": "10004606595",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101480506",
"replacement": "[RPPS_6]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100858090",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102268702",
"replacement": "[RPPS_8]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102272209",
"replacement": "[RPPS_9]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101856135",
"replacement": "[RPPS_10]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002815024",
"replacement": "[RPPS_11]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100817005",
"replacement": "[RPPS_12]",
"source": "regex",
"category": "rpps"
},
{
"original": "10110601324",
"replacement": "[RPPS_1]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101096005",
"replacement": "[RPPS_2]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100163277",
"replacement": "[RPPS_3]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101639580",
"replacement": "[RPPS_4]",
"source": "regex",
"category": "rpps"
},
{
"original": "10004606595",
"replacement": "[RPPS_5]",
"source": "regex",
"category": "rpps"
},
{
"original": "10101480506",
"replacement": "[RPPS_6]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100858090",
"replacement": "[RPPS_7]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102268702",
"replacement": "[RPPS_8]",
"source": "regex",
"category": "rpps"
},
{
"original": "10102272209",
"replacement": "[RPPS_9]",
"source": "regex",
"category": "rpps"
},
{
"original": "10002815024",
"replacement": "[RPPS_11]",
"source": "regex",
"category": "rpps"
},
{
"original": "10100817005",
"replacement": "[RPPS_12]",
"source": "regex",
"category": "rpps"
},
{
"original": "secr.medint@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
},
{
"original": "secr.medint@ch-cotebasque.fr",
"replacement": "[EMAIL_1]",
"source": "regex",
"category": "email"
}
]
}

View File

@@ -0,0 +1,423 @@
{
"source_file": "trackare-21008244-23056749_21008244_23056749.pdf",
"total_replacements": 320,
"regex_replacements": 198,
"ner_replacements": 32,
"sweep_replacements": 90,
"entities_found": [
{
"original": "21008244",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21008244",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21008244",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21008244",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21008244",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21008244",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21008244",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21008244",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21008244",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21008244",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21008244",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21008244",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21008244",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21008244",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21008244",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21008244",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "21008244",
"replacement": "[IPP_1]",
"source": "regex",
"category": "ipp"
},
{
"original": "23056749",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23056749",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23056749",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23056749",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23056749",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23056749",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23056749",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23056749",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23056749",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23056749",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23056749",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23056749",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23056749",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23056749",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23056749",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23056749",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "23056749",
"replacement": "[EPISODE_1]",
"source": "regex",
"category": "episode"
},
{
"original": "640000162",
"replacement": "[FINESS]",
"source": "regex",
"category": "finess"
},
{
"original": "CARACAS",
"replacement": "[LIEU_NAISS_1]",
"source": "regex",
"category": "lieu_naissance"
},
{
"original": "10 RUE MAUBEC Ville de résidence: BAYONNE",
"replacement": "[ADRESSE_1]",
"source": "regex",
"category": "adresse"
},
{
"original": "Laure CURUTCHET",
"replacement": "[MEDECIN_21]",
"source": "ner",
"score": 0.9981613755226135
},
{
"original": "THERALENE",
"replacement": "[SOIGNANT_13]",
"source": "ner",
"score": 0.8046954274177551
},
{
"original": "Alexia ORALE",
"replacement": "[MEDECIN_11]",
"source": "ner",
"score": 0.9978717565536499
},
{
"original": "Alexia ORALE",
"replacement": "[MEDECIN_11]",
"source": "ner",
"score": 0.9973835945129395
},
{
"original": "Alexia",
"replacement": "[MEDECIN_11]",
"source": "ner",
"score": 0.9202207326889038
},
{
"original": "HOURDILLE Alexia",
"replacement": "[MEDECIN_11]",
"source": "ner",
"score": 0.9710190296173096
},
{
"original": "HOURDILLE Alexia",
"replacement": "[MEDECIN_11]",
"source": "ner",
"score": 0.8682654500007629
},
{
"original": "THERALENE",
"replacement": "[SOIGNANT_13]",
"source": "ner",
"score": 0.8047897219657898
},
{
"original": "Alexia ORALE",
"replacement": "[MEDECIN_11]",
"source": "ner",
"score": 0.9978717565536499
},
{
"original": "Alexia ORALE",
"replacement": "[MEDECIN_11]",
"source": "ner",
"score": 0.9973835945129395
},
{
"original": "Alexia",
"replacement": "[MEDECIN_11]",
"source": "ner",
"score": 0.9202207326889038
},
{
"original": "HOURDILLE Alexia",
"replacement": "[MEDECIN_11]",
"source": "ner",
"score": 0.9873743057250977
},
{
"original": "Typhaine",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9905490279197693
},
{
"original": "Typhaine",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9875137805938721
},
{
"original": "Typhaine",
"replacement": "[MEDECIN_3]",
"source": "ner",
"score": 0.9865841865539551
},
{
"original": "Caroline HAMID",
"replacement": "[MEDECIN_23]",
"source": "ner",
"score": 0.9935200214385986
},
{
"original": "Caroline HAMID",
"replacement": "[MEDECIN_23]",
"source": "ner",
"score": 0.9989202618598938
},
{
"original": "Caroline HAMID",
"replacement": "[MEDECIN_23]",
"source": "ner",
"score": 0.9989671111106873
},
{
"original": "Caroline HAMID",
"replacement": "[MEDECIN_23]",
"source": "ner",
"score": 0.9988827109336853
},
{
"original": "Caroline HAMID",
"replacement": "[MEDECIN_23]",
"source": "ner",
"score": 0.9989028573036194
},
{
"original": "Elise MOLINE",
"replacement": "[SOIGNANT_8]",
"source": "ner",
"score": 0.9799536466598511
},
{
"original": "Mikael DA GRACA",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9983142018318176
},
{
"original": "Mikael DA GRACA",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9990451335906982
},
{
"original": "Mikael DA GRACA",
"replacement": "[PERSONNE_1]",
"source": "ner",
"score": 0.9990527033805847
},
{
"original": "Sheila PELAEZ DEL",
"replacement": "[PERSONNE_2]",
"source": "ner",
"score": 0.9965109825134277
},
{
"original": "Caroline HAMID",
"replacement": "[MEDECIN_23]",
"source": "ner",
"score": 0.9969545602798462
},
{
"original": "Sandrine PAGOAGA",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9968077540397644
},
{
"original": "Martin S",
"replacement": "[PERSONNE_4]",
"source": "ner",
"score": 0.9687907695770264
},
{
"original": "Pagoaga S",
"replacement": "[PERSONNE_3]",
"source": "ner",
"score": 0.9771784543991089
},
{
"original": "Caroline HAMID",
"replacement": "[MEDECIN_23]",
"source": "ner",
"score": 0.8771003484725952
},
{
"original": "dr larricq",
"replacement": "[MEDECIN_6]",
"source": "ner",
"score": 0.9439975619316101
},
{
"original": "J. LOEB",
"replacement": "[PERSONNE_5]",
"source": "ner",
"score": 0.838388204574585
}
]
}