feat(server): sanitize_event — assainissement PII au niveau event
sanitize_event(event, mapping) applique le principe « Léa apprend l'interface, pas la donnée » (décision Dom 28/06) avant persistance : - text_input -> contenu (text + raw_keys) remplacé par [SAISIE] (option b) : résout la fuite la plus grave (contenu médical) SANS NER ni détection ; - titres de fenêtre (active_window_title + window/to/from.title) : identité patient tokenisée (anonymize_text), app/écran gardés ; cohérence par mapping. Copie défensive (ne mute pas l'event d'origine). 4 tests (9 au total) verts. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -16,6 +16,7 @@ Branche feat/push-log-dgx — assainissement PII clinique.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import copy
|
||||
import re
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
@@ -131,3 +132,43 @@ def anonymize_text(
|
||||
)
|
||||
entities.reverse()
|
||||
return out, entities
|
||||
|
||||
|
||||
# Conteneurs de titre de fenêtre dans les events (window_focus_change, clic, saisie).
|
||||
_TITLE_CONTAINERS = ("window", "to", "from")
|
||||
_PLACEHOLDER_SAISIE = "[SAISIE]"
|
||||
|
||||
|
||||
def sanitize_event(event: Dict, *, mapping: Optional[Dict] = None) -> Dict:
|
||||
"""Assainit un event capturé avant persistance (copie, ne mute pas l'original).
|
||||
|
||||
Principe « Léa apprend l'interface, pas la donnée » (décision Dom 28/06) :
|
||||
- `text_input` : le **contenu tapé** (`text`, `raw_keys`) = donnée de santé →
|
||||
remplacé par `[SAISIE]` (on garde le champ, pas la valeur — option b) ;
|
||||
- **titres de fenêtre** (`active_window_title`, et `title` dans `window`/`to`/
|
||||
`from`) : l'**identité patient** est tokenisée, l'app/écran est gardé
|
||||
(contexte d'apprentissage), via `anonymize_text` + `mapping` partagé (cohérence).
|
||||
"""
|
||||
if mapping is None:
|
||||
mapping = {}
|
||||
ev = copy.deepcopy(event)
|
||||
|
||||
# text_input : on ne garde pas le contenu
|
||||
if ev.get("type") == "text_input":
|
||||
for k in ("text", "raw_keys"):
|
||||
if ev.get(k) not in (None, ""):
|
||||
ev[k] = _PLACEHOLDER_SAISIE
|
||||
|
||||
# titre direct (heartbeat)
|
||||
if isinstance(ev.get("active_window_title"), str):
|
||||
ev["active_window_title"] = anonymize_text(
|
||||
ev["active_window_title"], mapping=mapping
|
||||
)[0]
|
||||
|
||||
# titres imbriqués (window / to / from)
|
||||
for key in _TITLE_CONTAINERS:
|
||||
sub = ev.get(key)
|
||||
if isinstance(sub, dict) and isinstance(sub.get("title"), str):
|
||||
sub["title"] = anonymize_text(sub["title"], mapping=mapping)[0]
|
||||
|
||||
return ev
|
||||
|
||||
@@ -79,3 +79,60 @@ def test_texte_sans_pii_inchange():
|
||||
out, ents = anonymize_text(t)
|
||||
assert out == t
|
||||
assert ents == []
|
||||
|
||||
|
||||
# --- sanitize_event : assainissement au niveau event (option b pour text_input) ---
|
||||
|
||||
def test_sanitize_text_input_remplace_contenu_par_saisie():
|
||||
"""Option b (Dom) : le contenu tapé n'est pas gardé -> [SAISIE]."""
|
||||
from agent_v0.server_v1.pii_sanitizer import sanitize_event
|
||||
|
||||
ev = {
|
||||
"type": "text_input",
|
||||
"text": "hemorragie post-operatoire saignement", # contenu médical
|
||||
"raw_keys": ["h", "e", "m"],
|
||||
"window": {"title": "VIOLA (VIOLA) Liliane 90 ans - IPP: 168246 - Firefox",
|
||||
"app_name": "firefox.exe"},
|
||||
}
|
||||
out = sanitize_event(ev)
|
||||
|
||||
assert out["text"] == "[SAISIE]"
|
||||
assert out["raw_keys"] == "[SAISIE]"
|
||||
# le titre de la fenêtre est assaini (identité tokenisée, app gardée)
|
||||
assert "168246" not in out["window"]["title"]
|
||||
assert "VIOLA" not in out["window"]["title"]
|
||||
assert "[IPP_1]" in out["window"]["title"] and "Firefox" in out["window"]["title"]
|
||||
# l'event d'origine n'est PAS muté
|
||||
assert ev["text"].startswith("hemorragie")
|
||||
|
||||
|
||||
def test_sanitize_heartbeat_titre_direct():
|
||||
from agent_v0.server_v1.pii_sanitizer import sanitize_event
|
||||
|
||||
ev = {"type": "heartbeat",
|
||||
"active_window_title": "GXD5 Pacs CIM ARES - [DATTIN Alix] - Firefox"}
|
||||
out = sanitize_event(ev)
|
||||
assert "DATTIN" not in out["active_window_title"]
|
||||
assert "[NOM_1]" in out["active_window_title"] and "Pacs" in out["active_window_title"]
|
||||
|
||||
|
||||
def test_sanitize_focus_change_to_from_window():
|
||||
from agent_v0.server_v1.pii_sanitizer import sanitize_event
|
||||
|
||||
ev = {"type": "window_focus_change",
|
||||
"from": None,
|
||||
"to": {"title": "LAVAL (BARTHELEMY) Nicole 86 ans - Expert Sante", "app_name": "firefox.exe"},
|
||||
"window": {"title": "LAVAL (BARTHELEMY) Nicole 86 ans - Expert Sante"}}
|
||||
out = sanitize_event(ev)
|
||||
assert out["from"] is None # null géré
|
||||
assert "LAVAL" not in out["to"]["title"]
|
||||
assert "[NOM_1]" in out["to"]["title"]
|
||||
# cohérence : même patient dans to et window -> même token
|
||||
assert out["window"]["title"] == out["to"]["title"]
|
||||
|
||||
|
||||
def test_sanitize_action_result_inchange():
|
||||
from agent_v0.server_v1.pii_sanitizer import sanitize_event
|
||||
|
||||
ev = {"type": "action_result", "base_shot_id": "shot_0003", "image": "x.png"}
|
||||
assert sanitize_event(ev) == ev
|
||||
|
||||
Reference in New Issue
Block a user