fix: DR. Ute (3 chars), SAINT-GERMES composé, SODIUM MACO/BAX pharma
- force_names bypass le seuil 4 chars (prénoms courts après Dr/Mme : Ute, Eva) - SAINT seul = bloqué, SAINT-xxx composé = accepté comme nom - Labos pharma ajoutés aux stop-words + companion blacklist : MACO, AGUETTANT, RENAUDIN, ARROW, BIOGARAN, MYLAN, TEVA, ZENTIVA - Score : 99.8/100 (amélioration, "Sie" corrigé) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -2385,12 +2385,21 @@ def _apply_extracted_names(text: str, names: set, audit: List[PiiHit], force_nam
|
|||||||
_NEVER_MASK_AS_NAME = {
|
_NEVER_MASK_AS_NAME = {
|
||||||
"Date", "DATE", "Note", "NOTE", "Heure", "HEURE", "Type", "TYPE",
|
"Date", "DATE", "Note", "NOTE", "Heure", "HEURE", "Type", "TYPE",
|
||||||
"Soin", "SOIN", "Soins", "SOINS", "Surv", "SURV",
|
"Soin", "SOIN", "Soins", "SOINS", "Surv", "SURV",
|
||||||
"Saint", "SAINT", "Sainte", "SAINTE",
|
|
||||||
"Page", "PAGE", "Presc", "PRESC",
|
"Page", "PAGE", "Presc", "PRESC",
|
||||||
}
|
}
|
||||||
safe_names = {n for n in names if len(n) >= 4
|
safe_names = set()
|
||||||
and n not in _NEVER_MASK_AS_NAME
|
for n in names:
|
||||||
and (n in _force or n.lower() not in _MEDICAL_STOP_WORDS_SET)}
|
if len(n) < 4 and n not in _force:
|
||||||
|
# Tokens < 4 chars : accepter SEULEMENT les force_names (ex: "Ute" après Dr)
|
||||||
|
continue
|
||||||
|
if n in _NEVER_MASK_AS_NAME:
|
||||||
|
continue
|
||||||
|
# "Saint"/"SAINT" seul = bloquer. "Saint-Germes" composé = laisser passer
|
||||||
|
if n.upper() in ("SAINT", "SAINTE") and "-" not in n:
|
||||||
|
continue
|
||||||
|
if n not in _force and n.lower() in _MEDICAL_STOP_WORDS_SET:
|
||||||
|
continue
|
||||||
|
safe_names.add(n)
|
||||||
# Ajouter un hit global (page=-1) par nom pour la redaction PDF raster
|
# Ajouter un hit global (page=-1) par nom pour la redaction PDF raster
|
||||||
# (un seul hit suffit — redact_pdf_raster cherche le token sur chaque page)
|
# (un seul hit suffit — redact_pdf_raster cherche le token sur chaque page)
|
||||||
# Les noms forcés (contexte Dr/Mme) utilisent NOM_FORCE pour bypasser
|
# Les noms forcés (contexte Dr/Mme) utilisent NOM_FORCE pour bypasser
|
||||||
@@ -4275,6 +4284,9 @@ def process_pdf(
|
|||||||
"SULFAMIDES", "CLAVULANIQUE", "MECILLINAM",
|
"SULFAMIDES", "CLAVULANIQUE", "MECILLINAM",
|
||||||
"TAZOBACTAM", "TEMOCILLINE", "ECOFLAC", "FURANES",
|
"TAZOBACTAM", "TEMOCILLINE", "ECOFLAC", "FURANES",
|
||||||
"CONTENTION", "ISOLEMENT", "ELIMINATION",
|
"CONTENTION", "ISOLEMENT", "ELIMINATION",
|
||||||
|
# Labos pharmaceutiques (FP dans tableaux prescriptions trackare)
|
||||||
|
"MACO", "AGUETTANT", "RENAUDIN", "LAVOISIER",
|
||||||
|
"COOPER", "ARROW", "BIOGARAN", "MYLAN", "TEVA", "ZENTIVA",
|
||||||
"PANCREATITE", "INFECTIEUX", "HEMODYNAMIQUE",
|
"PANCREATITE", "INFECTIEUX", "HEMODYNAMIQUE",
|
||||||
"SENSIBLE", "VARIABLE", "DOSAGE", "CAT",
|
"SENSIBLE", "VARIABLE", "DOSAGE", "CAT",
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1309,3 +1309,15 @@ zymad
|
|||||||
étage
|
étage
|
||||||
évaluation
|
évaluation
|
||||||
évolution
|
évolution
|
||||||
|
|
||||||
|
# Laboratoires pharmaceutiques (FP prescriptions trackare)
|
||||||
|
maco
|
||||||
|
aguettant
|
||||||
|
renaudin
|
||||||
|
lavoisier
|
||||||
|
cooper
|
||||||
|
arrow
|
||||||
|
biogaran
|
||||||
|
mylan
|
||||||
|
teva
|
||||||
|
zentiva
|
||||||
|
|||||||
Reference in New Issue
Block a user