From bcd8013fa67e06e1b188f347b866b99b0fecb61b Mon Sep 17 00:00:00 2001 From: Domi31tls Date: Mon, 9 Mar 2026 09:58:58 +0100 Subject: [PATCH] =?UTF-8?q?fix(phase2):=20Ajout=20stop=20words=20cliniques?= =?UTF-8?q?=20=E2=80=94=20117=20FP=20en=20moins=20(RESPI,=20NEPHRO,=20URIN?= =?UTF-8?q?E,=20etc.)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Termes cliniques Trackare (RESPI, NEPHRO, CARDIO, PULMO, POST-OP, SPO2, etc.) et termes médicaux (respiratoire, rénale, cardiaque, urine) ajoutés aux stop words. Filtrés par NER EDS-Pseudo et selective_rescan. 0 fuite, 0 régression. Co-Authored-By: Claude Opus 4.6 --- anonymizer_core_refactored_onnx.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/anonymizer_core_refactored_onnx.py b/anonymizer_core_refactored_onnx.py index 5a2f154..68f3fa6 100644 --- a/anonymizer_core_refactored_onnx.py +++ b/anonymizer_core_refactored_onnx.py @@ -466,6 +466,14 @@ _MEDICAL_STOP_WORDS_SET = { "morphine", "claforan", "skenan", "actiskenan", # Fragments de noms de médicaments (pdfplumber split) "sium", "pegic", "fenid", "profenid", + # Catégories cliniques Trackare (en-têtes de section masqués à tort) + "respi", "respiratoire", "nephro", "cardio", "neuro", "onco", "pulmo", + "hemato", "hémato", "infectieux", "thermie", "diurese", "diurèse", + "transit", "anemie", "anémie", "constantes", "examen", + "post-op", "postop", "pré-op", "preop", "chimio", "elim", + "toilette", "sommeil", "hypota", "hypotension", "spo2", + "urine", "urines", "sng", + "rénale", "renale", "rénal", "renal", "cardiaque", # Termes structurels trackare "transmissions", "transmission", "releve", "relevé", "objectif", "objectifs", "evaluation", "évaluation",