From 4b7a31b9df8dc5a38ce7cfabdb780ad0546d9958 Mon Sep 17 00:00:00 2001 From: Domi31tls Date: Tue, 30 Jun 2026 10:36:16 +0200 Subject: [PATCH] =?UTF-8?q?feat(gui):=20module=20diagnostics=20=E2=80=94?= =?UTF-8?q?=20payload=20liste-blanche=20RGPD=20(E2)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 (1M context) --- gui_v6/diagnostics.py | 79 +++++++++++++++++++++++++++ tests/unit/test_gui_v6_diagnostics.py | 46 ++++++++++++++++ 2 files changed, 125 insertions(+) create mode 100644 gui_v6/diagnostics.py create mode 100644 tests/unit/test_gui_v6_diagnostics.py diff --git a/gui_v6/diagnostics.py b/gui_v6/diagnostics.py new file mode 100644 index 0000000..d27d63f --- /dev/null +++ b/gui_v6/diagnostics.py @@ -0,0 +1,79 @@ +"""Diagnostics structurés de la GUI V6 (E2/E3) — RGPD strict. + +On n'émet QUE des métadonnées techniques liste-blanche : type d'exception +(nom de classe), catégorie d'erreur d'un ensemble fermé, statut, ordinal, +durée. JAMAIS de nom/chemin/texte de document, ni de message d'exception brut. +L'envoi est non bloquant : un échec réseau n'interrompt jamais le traitement. +Patron : gui_v6/usage_telemetry.py (télémétrie d'usage). +""" + +from __future__ import annotations + +import json +import uuid +from pathlib import Path +from typing import Any, Callable, Iterable, Optional + +# Clés autorisées par item de diagnostic (filtre RGPD appliqué à la construction). +_ALLOWED_ITEM_KEYS = {"ordinal", "status", "error_type", "error_code", "duration_ms"} + +REPORT_PATH = "/api/v1/diagnostics/report" + + +def new_run_id() -> str: + return uuid.uuid4().hex + + +def items_from_summary(summary: Any) -> list[dict]: + """Extrait les items de diagnostic (RGPD-safe) d'un ``RunSummary``. + + Ne lit que les attributs autorisés ; aucun nom/chemin/message n'est lu. + """ + items: list[dict] = [] + for item in getattr(summary, "documents", None) or []: + items.append( + { + "ordinal": getattr(item, "ordinal", 0), + "status": getattr(item, "status", "success"), + "error_type": getattr(item, "error_type", None), + "error_code": getattr(item, "error_code", None), + "duration_ms": getattr(item, "duration_ms", None), + } + ) + return items + + +def build_diagnostics_payload( + *, + run_id: str, + app_name: str, + app_version: Optional[str], + license_ref: Optional[str], + machine_id: Optional[str], + duration_ms: Optional[int], + items: Iterable[dict], +) -> dict: + """Construit le payload diagnostic. Chaque item est filtré aux seules clés + autorisées → aucun nom/chemin/message ne peut fuir, même fourni par erreur.""" + clean_items: list[dict] = [] + succeeded = failed = 0 + for raw in items: + it = {k: raw[k] for k in _ALLOWED_ITEM_KEYS if k in raw} + status = it.get("status") + if status == "success": + succeeded += 1 + elif status == "failed": + failed += 1 + clean_items.append(it) + return { + "run_id": run_id, + "license_ref": license_ref, + "machine_id": machine_id, + "app_name": app_name, + "app_version": app_version, + "duration_ms": duration_ms, + "document_count": len(clean_items), + "succeeded_count": succeeded, + "failed_count": failed, + "items": clean_items, + } diff --git a/tests/unit/test_gui_v6_diagnostics.py b/tests/unit/test_gui_v6_diagnostics.py new file mode 100644 index 0000000..452a09c --- /dev/null +++ b/tests/unit/test_gui_v6_diagnostics.py @@ -0,0 +1,46 @@ +import json +from types import SimpleNamespace + +from gui_v6 import diagnostics + + +def _doc(**kw): + base = dict(ordinal=0, status="success", error_type=None, error_code=None, duration_ms=12) + base.update(kw) + return SimpleNamespace(**base) + + +def test_new_run_id_is_hex(): + rid = diagnostics.new_run_id() + assert isinstance(rid, str) and len(rid) >= 16 + + +def test_items_from_summary_whitelist_only(): + summary = SimpleNamespace(documents=[ + _doc(ordinal=0, status="success"), + _doc(ordinal=1, status="failed", error_type="ValueError", error_code="processing_error"), + ]) + items = diagnostics.items_from_summary(summary) + assert items[1]["error_type"] == "ValueError" + assert set(items[0]) <= {"ordinal", "status", "error_type", "error_code", "duration_ms"} + + +def test_build_payload_counts_and_no_pii_leak(): + # On INJECTE de la PII via des clés interdites + un faux message d'erreur : + raw_docs = [ + {"ordinal": 0, "status": "success", "duration_ms": 5, + "filename": "LETTRE Dupont 1980.pdf", "path": "/home/dom/secret.pdf"}, + {"ordinal": 1, "status": "failed", "error_type": "ValueError", + "error_code": "processing_error", "error_message": "patient Dupont Jean"}, + ] + payload = diagnostics.build_diagnostics_payload( + run_id="r" * 16, app_name="gui_v6", app_version="6.0.0-g1", + license_ref="LIC-1", machine_id="m" * 12, duration_ms=999, items=raw_docs, + ) + assert payload["document_count"] == 2 + assert payload["succeeded_count"] == 1 and payload["failed_count"] == 1 + blob = json.dumps(payload).lower() + for forbidden in ("filename", "path", "secret", "dupont", "lettre", "error_message", "patient"): + assert forbidden not in blob, f"fuite RGPD : {forbidden}" + for item in payload["items"]: + assert set(item) <= {"ordinal", "status", "error_type", "error_code", "duration_ms"}