feat(gui): module diagnostics — payload liste-blanche RGPD (E2)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-30 10:36:16 +02:00
parent 4412512d4b
commit 4b7a31b9df
2 changed files with 125 additions and 0 deletions

79
gui_v6/diagnostics.py Normal file
View File

@@ -0,0 +1,79 @@
"""Diagnostics structurés de la GUI V6 (E2/E3) — RGPD strict.
On n'émet QUE des métadonnées techniques liste-blanche : type d'exception
(nom de classe), catégorie d'erreur d'un ensemble fermé, statut, ordinal,
durée. JAMAIS de nom/chemin/texte de document, ni de message d'exception brut.
L'envoi est non bloquant : un échec réseau n'interrompt jamais le traitement.
Patron : gui_v6/usage_telemetry.py (télémétrie d'usage).
"""
from __future__ import annotations
import json
import uuid
from pathlib import Path
from typing import Any, Callable, Iterable, Optional
# Clés autorisées par item de diagnostic (filtre RGPD appliqué à la construction).
_ALLOWED_ITEM_KEYS = {"ordinal", "status", "error_type", "error_code", "duration_ms"}
REPORT_PATH = "/api/v1/diagnostics/report"
def new_run_id() -> str:
return uuid.uuid4().hex
def items_from_summary(summary: Any) -> list[dict]:
"""Extrait les items de diagnostic (RGPD-safe) d'un ``RunSummary``.
Ne lit que les attributs autorisés ; aucun nom/chemin/message n'est lu.
"""
items: list[dict] = []
for item in getattr(summary, "documents", None) or []:
items.append(
{
"ordinal": getattr(item, "ordinal", 0),
"status": getattr(item, "status", "success"),
"error_type": getattr(item, "error_type", None),
"error_code": getattr(item, "error_code", None),
"duration_ms": getattr(item, "duration_ms", None),
}
)
return items
def build_diagnostics_payload(
*,
run_id: str,
app_name: str,
app_version: Optional[str],
license_ref: Optional[str],
machine_id: Optional[str],
duration_ms: Optional[int],
items: Iterable[dict],
) -> dict:
"""Construit le payload diagnostic. Chaque item est filtré aux seules clés
autorisées → aucun nom/chemin/message ne peut fuir, même fourni par erreur."""
clean_items: list[dict] = []
succeeded = failed = 0
for raw in items:
it = {k: raw[k] for k in _ALLOWED_ITEM_KEYS if k in raw}
status = it.get("status")
if status == "success":
succeeded += 1
elif status == "failed":
failed += 1
clean_items.append(it)
return {
"run_id": run_id,
"license_ref": license_ref,
"machine_id": machine_id,
"app_name": app_name,
"app_version": app_version,
"duration_ms": duration_ms,
"document_count": len(clean_items),
"succeeded_count": succeeded,
"failed_count": failed,
"items": clean_items,
}

View File

@@ -0,0 +1,46 @@
import json
from types import SimpleNamespace
from gui_v6 import diagnostics
def _doc(**kw):
base = dict(ordinal=0, status="success", error_type=None, error_code=None, duration_ms=12)
base.update(kw)
return SimpleNamespace(**base)
def test_new_run_id_is_hex():
rid = diagnostics.new_run_id()
assert isinstance(rid, str) and len(rid) >= 16
def test_items_from_summary_whitelist_only():
summary = SimpleNamespace(documents=[
_doc(ordinal=0, status="success"),
_doc(ordinal=1, status="failed", error_type="ValueError", error_code="processing_error"),
])
items = diagnostics.items_from_summary(summary)
assert items[1]["error_type"] == "ValueError"
assert set(items[0]) <= {"ordinal", "status", "error_type", "error_code", "duration_ms"}
def test_build_payload_counts_and_no_pii_leak():
# On INJECTE de la PII via des clés interdites + un faux message d'erreur :
raw_docs = [
{"ordinal": 0, "status": "success", "duration_ms": 5,
"filename": "LETTRE Dupont 1980.pdf", "path": "/home/dom/secret.pdf"},
{"ordinal": 1, "status": "failed", "error_type": "ValueError",
"error_code": "processing_error", "error_message": "patient Dupont Jean"},
]
payload = diagnostics.build_diagnostics_payload(
run_id="r" * 16, app_name="gui_v6", app_version="6.0.0-g1",
license_ref="LIC-1", machine_id="m" * 12, duration_ms=999, items=raw_docs,
)
assert payload["document_count"] == 2
assert payload["succeeded_count"] == 1 and payload["failed_count"] == 1
blob = json.dumps(payload).lower()
for forbidden in ("filename", "path", "secret", "dupont", "lettre", "error_message", "patient"):
assert forbidden not in blob, f"fuite RGPD : {forbidden}"
for item in payload["items"]:
assert set(item) <= {"ordinal", "status", "error_type", "error_code", "duration_ms"}