From 952a1c6ca0ef8ac109ea6863ede8a6abf7449f44 Mon Sep 17 00:00:00 2001 From: Domi31tls Date: Tue, 30 Jun 2026 10:28:42 +0200 Subject: [PATCH] =?UTF-8?q?feat(gui):=20DocResult=20porte=20type+cat=C3=A9?= =?UTF-8?q?gorie=20d'erreur=20RGPD-safe=20(E2)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 (1M context) --- gui_v6/processing_runner.py | 31 +++++++++++++++ tests/unit/test_gui_v6_processing_runner.py | 42 +++++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/gui_v6/processing_runner.py b/gui_v6/processing_runner.py index 1c34f85..7f9455d 100644 --- a/gui_v6/processing_runner.py +++ b/gui_v6/processing_runner.py @@ -87,6 +87,27 @@ def _engine_result_error(result: object) -> str | None: return None +# Ensemble FERMÉ de catégories d'erreur (aucune PII ne peut y entrer). +_ERROR_CODES = ("ner_unavailable", "quarantined", "no_output", "processing_error") + + +def classify_error_code(exc: Exception) -> str: + """Catégorise une exception de run en une valeur de l'ensemble fermé _ERROR_CODES. + + Lit le type et d'éventuels préfixes de message GÉNÉRÉS PAR NOUS pour classer ; + ne renvoie JAMAIS le message lui-même (RGPD). Inconnu → 'processing_error'. + """ + name = type(exc).__name__ + if name == "EngineUnavailableError": + return "ner_unavailable" + msg = str(exc) + if "quarantaine" in msg: + return "quarantined" + if "Aucune sortie" in msg: + return "no_output" + return "processing_error" + + def discover_documents(input_path, extensions: Optional[Sequence[str]] = None) -> list[Path]: """Liste les documents à traiter (fichier unique ou dossier récursif).""" path = Path(input_path) @@ -115,6 +136,10 @@ class DocResult: status: str # "success" | "failed" duration_ms: Optional[int] extension: Optional[str] + # Diagnostics RGPD-safe : nom de classe d'exception + catégorie fermée. + # JAMAIS le message d'exception (str(exc)) ni nom/chemin de document. + error_type: Optional[str] = None + error_code: Optional[str] = None @dataclass @@ -224,6 +249,8 @@ class ProcessingRunner: page_count = page_count_for(doc) started = time.monotonic() status = "success" + error_type = None + error_code = None try: if input_path.is_dir(): doc_out = build_batch_output_dir(root_dir, out_root, doc) @@ -238,6 +265,8 @@ class ProcessingRunner: log(f"OK : {doc.name}") except Exception as exc: # un échec n'interrompt pas le lot status = "failed" + error_type = type(exc).__name__ + error_code = classify_error_code(exc) summary.failed += 1 summary.errors.append((doc.name, str(exc))) log(f"ÉCHEC : {doc.name} — {exc}") @@ -248,6 +277,8 @@ class ProcessingRunner: status=status, duration_ms=int((time.monotonic() - started) * 1000), extension=extension, + error_type=error_type, + error_code=error_code, ) ) if on_progress: diff --git a/tests/unit/test_gui_v6_processing_runner.py b/tests/unit/test_gui_v6_processing_runner.py index 1a3565a..8afc1b1 100644 --- a/tests/unit/test_gui_v6_processing_runner.py +++ b/tests/unit/test_gui_v6_processing_runner.py @@ -261,3 +261,45 @@ def test_run_records_per_document_details(tmp_path): assert not hasattr(doc, "path") assert not hasattr(doc, "filename") assert not hasattr(doc, "name") + + +# -- diagnostics d'erreur RGPD-safe (E2) ----------------------------------- + +def test_failed_doc_carries_rgpd_safe_error_fields(tmp_path): + from gui_v6.processing_runner import ProcessingRunner + + secret = "Dupont Jean 1980" # simulacre de PII dans un message d'exception + + def boom(_inp, _out): + raise ValueError(f"échec sur patient {secret}") + + inp = tmp_path / "in"; inp.mkdir() + (inp / "a.pdf").write_bytes(b"%PDF-1.4\n") + out = tmp_path / "out"; out.mkdir() + runner = ProcessingRunner(process_fn=boom) + summary = runner.run(inp, out) + + assert summary.failed == 1 + doc = summary.documents[0] + assert doc.error_type == "ValueError" + assert doc.error_code in {"ner_unavailable", "quarantined", "no_output", "processing_error"} + blob = repr(vars(doc)).lower() + assert "dupont" not in blob and "patient" not in blob and secret.lower() not in blob + + +def test_success_doc_has_no_error_fields(tmp_path): + from gui_v6.processing_runner import ProcessingRunner + + def ok(_inp, out_dir): + # process_fn reçoit le DOSSIER de sortie : on y écrit un PDF livrable. + pdf = out_dir / "a.redacted_raster.pdf" + pdf.write_bytes(b"%PDF-1.4\n") + return {"status": "ok", "pdf_raster": str(pdf)} + + inp = tmp_path / "in"; inp.mkdir() + (inp / "a.pdf").write_bytes(b"%PDF-1.4\n") + out = tmp_path / "out"; out.mkdir() + summary = ProcessingRunner(process_fn=ok).run(inp, out) + doc = summary.documents[0] + assert doc.status == "success" + assert doc.error_type is None and doc.error_code is None