- Token min length relevé de 2-3 → 4 chars (élimine FP EPO, IRC, SIB...) - Stop-words enrichis : acronymes médicaux 3 lettres, termes pharma, soins infirmiers - BDPM stop-words : ~7300 noms commerciaux + DCI/substances actives - Gazetteers adresses FINESS : 63K patterns Aho-Corasick (position-preserving normalization) - Filtre contextuel anatomique pour FINESS établissements - Nouvelles regex : RE_CIVILITE_COMMA_LIST, RE_EXTRACT_NOM_UTILISE, RE_EXTRACT_PRENOM, RE_NUM_EXAMEN_PATIENT, RE_ADRESSE_LIEU_DIT, RE_CIVILITE_INITIALE, Dr X.NOM - URLs complètes (RE_URL) + détection multiline - N° venue inversé (layout-aware) + EPISODE/NDA dans _CRITICAL_PII_TYPES - HospitalFilter désactivé pour ADRESSE/TEL/VILLE/EPISODE (identifient le patient) - Batch silver export parallélisé (multiprocessing spawn, N workers) - Seuil sur-masquage relevé à 8%, server.py enrichi (source regex/ner) - Blacklist villes : COURANT, PARIS ; contexte villes étendu (UHCD, spécialités) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
254 lines
6.3 KiB
JSON
254 lines
6.3 KiB
JSON
{
|
|
"date": "2026-03-12T17:16:25.993851",
|
|
"scores": {
|
|
"global_score": 97.0,
|
|
"leak_score": 100.0,
|
|
"fp_score": 90,
|
|
"totals": {
|
|
"documents": 29,
|
|
"audit_hits": 3186,
|
|
"name_tokens_known": 457,
|
|
"leak_audit": 0,
|
|
"leak_occurrences": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 570,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 2
|
|
}
|
|
},
|
|
"per_file": {
|
|
"BACTERIO 23232115": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 3,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"CONSULTATION ANESTHESISTE 23056022": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 11,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"CONSULTATION ANESTHESISTE 23060661": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 6,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"CONSULTATION ANESTHESISTE 23139653": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 6,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"CRH 60_23106634": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 5,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 1
|
|
},
|
|
"CRO 23159905": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 5,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 1
|
|
},
|
|
"CRO 23160703": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 2,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"LETTRE DE SORTIE 23087212": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 0,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"trackare-00260974-23070213_00260974_23070213": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 29,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"trackare-03020576-23175616_03020576_23175616": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 31,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"trackare-05000272-23074376_05000272_23074376": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 11,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"trackare-05012679-23098722_05012679_23098722": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 24,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"trackare-05012965-23060770_05012965_23060770": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 31,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"trackare-07003136-23135847_07003136_23135847": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 35,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"trackare-11004431-23124019_11004431_23124019": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 20,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"trackare-13013848-23165708_13013848_23165708": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 17,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"trackare-14025311-23034958_14025311_23034958": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 12,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"trackare-17015185-23043950_17015185_23043950": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 19,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"trackare-23000862-23018396_23000862_23018396": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 32,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"trackare-99246761-23159905_99246761_23159905": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 34,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"trackare-99252128-23177582_99252128_23177582": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 33,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"trackare-BA042686-23090597_BA042686_23090597": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 23,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"trackare-BA065989-23102874_BA065989_23102874": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 11,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"trackare-BA067657-23076655_BA067657_23076655": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 33,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"trackare-BA093659-23074520_BA093659_23074520": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 30,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"trackare-BA121804-23016863_BA121804_23016863": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 32,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"trackare-BA127127-23135726_BA127127_23135726": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 26,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"trackare-BA171849-23214501_BA171849_23214501": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 22,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
},
|
|
"trackare-BA192486-23127395_BA192486_23127395": {
|
|
"leak_audit": 0,
|
|
"leak_regex": 0,
|
|
"leak_insee_high": 0,
|
|
"leak_insee_medium": 27,
|
|
"fp_medical": 0,
|
|
"fp_overmasking": 0
|
|
}
|
|
}
|
|
} |