feat(server): sanitize_event — assainissement PII au niveau event

sanitize_event(event, mapping) applique le principe « Léa apprend l'interface,
pas la donnée » (décision Dom 28/06) avant persistance :
- text_input -> contenu (text + raw_keys) remplacé par [SAISIE] (option b) :
  résout la fuite la plus grave (contenu médical) SANS NER ni détection ;
- titres de fenêtre (active_window_title + window/to/from.title) : identité
  patient tokenisée (anonymize_text), app/écran gardés ; cohérence par mapping.
Copie défensive (ne mute pas l'event d'origine). 4 tests (9 au total) verts.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dom
2026-06-28 19:53:09 +02:00
parent 8e4d09594c
commit 30d8f65e9a
2 changed files with 98 additions and 0 deletions

View File

@@ -79,3 +79,60 @@ def test_texte_sans_pii_inchange():
out, ents = anonymize_text(t)
assert out == t
assert ents == []
# --- sanitize_event : assainissement au niveau event (option b pour text_input) ---
def test_sanitize_text_input_remplace_contenu_par_saisie():
"""Option b (Dom) : le contenu tapé n'est pas gardé -> [SAISIE]."""
from agent_v0.server_v1.pii_sanitizer import sanitize_event
ev = {
"type": "text_input",
"text": "hemorragie post-operatoire saignement", # contenu médical
"raw_keys": ["h", "e", "m"],
"window": {"title": "VIOLA (VIOLA) Liliane 90 ans - IPP: 168246 - Firefox",
"app_name": "firefox.exe"},
}
out = sanitize_event(ev)
assert out["text"] == "[SAISIE]"
assert out["raw_keys"] == "[SAISIE]"
# le titre de la fenêtre est assaini (identité tokenisée, app gardée)
assert "168246" not in out["window"]["title"]
assert "VIOLA" not in out["window"]["title"]
assert "[IPP_1]" in out["window"]["title"] and "Firefox" in out["window"]["title"]
# l'event d'origine n'est PAS muté
assert ev["text"].startswith("hemorragie")
def test_sanitize_heartbeat_titre_direct():
from agent_v0.server_v1.pii_sanitizer import sanitize_event
ev = {"type": "heartbeat",
"active_window_title": "GXD5 Pacs CIM ARES - [DATTIN Alix] - Firefox"}
out = sanitize_event(ev)
assert "DATTIN" not in out["active_window_title"]
assert "[NOM_1]" in out["active_window_title"] and "Pacs" in out["active_window_title"]
def test_sanitize_focus_change_to_from_window():
from agent_v0.server_v1.pii_sanitizer import sanitize_event
ev = {"type": "window_focus_change",
"from": None,
"to": {"title": "LAVAL (BARTHELEMY) Nicole 86 ans - Expert Sante", "app_name": "firefox.exe"},
"window": {"title": "LAVAL (BARTHELEMY) Nicole 86 ans - Expert Sante"}}
out = sanitize_event(ev)
assert out["from"] is None # null géré
assert "LAVAL" not in out["to"]["title"]
assert "[NOM_1]" in out["to"]["title"]
# cohérence : même patient dans to et window -> même token
assert out["window"]["title"] == out["to"]["title"]
def test_sanitize_action_result_inchange():
from agent_v0.server_v1.pii_sanitizer import sanitize_event
ev = {"type": "action_result", "base_shot_id": "shot_0003", "image": "x.png"}
assert sanitize_event(ev) == ev