feat(gui): DocResult porte type+catégorie d'erreur RGPD-safe (E2)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-30 10:28:42 +02:00
parent 675e328d8c
commit 952a1c6ca0
2 changed files with 73 additions and 0 deletions

View File

@@ -87,6 +87,27 @@ def _engine_result_error(result: object) -> str | None:
return None
# Ensemble FERMÉ de catégories d'erreur (aucune PII ne peut y entrer).
_ERROR_CODES = ("ner_unavailable", "quarantined", "no_output", "processing_error")
def classify_error_code(exc: Exception) -> str:
"""Catégorise une exception de run en une valeur de l'ensemble fermé _ERROR_CODES.
Lit le type et d'éventuels préfixes de message GÉNÉRÉS PAR NOUS pour classer ;
ne renvoie JAMAIS le message lui-même (RGPD). Inconnu → 'processing_error'.
"""
name = type(exc).__name__
if name == "EngineUnavailableError":
return "ner_unavailable"
msg = str(exc)
if "quarantaine" in msg:
return "quarantined"
if "Aucune sortie" in msg:
return "no_output"
return "processing_error"
def discover_documents(input_path, extensions: Optional[Sequence[str]] = None) -> list[Path]:
"""Liste les documents à traiter (fichier unique ou dossier récursif)."""
path = Path(input_path)
@@ -115,6 +136,10 @@ class DocResult:
status: str # "success" | "failed"
duration_ms: Optional[int]
extension: Optional[str]
# Diagnostics RGPD-safe : nom de classe d'exception + catégorie fermée.
# JAMAIS le message d'exception (str(exc)) ni nom/chemin de document.
error_type: Optional[str] = None
error_code: Optional[str] = None
@dataclass
@@ -224,6 +249,8 @@ class ProcessingRunner:
page_count = page_count_for(doc)
started = time.monotonic()
status = "success"
error_type = None
error_code = None
try:
if input_path.is_dir():
doc_out = build_batch_output_dir(root_dir, out_root, doc)
@@ -238,6 +265,8 @@ class ProcessingRunner:
log(f"OK : {doc.name}")
except Exception as exc: # un échec n'interrompt pas le lot
status = "failed"
error_type = type(exc).__name__
error_code = classify_error_code(exc)
summary.failed += 1
summary.errors.append((doc.name, str(exc)))
log(f"ÉCHEC : {doc.name}{exc}")
@@ -248,6 +277,8 @@ class ProcessingRunner:
status=status,
duration_ms=int((time.monotonic() - started) * 1000),
extension=extension,
error_type=error_type,
error_code=error_code,
)
)
if on_progress:

View File

@@ -261,3 +261,45 @@ def test_run_records_per_document_details(tmp_path):
assert not hasattr(doc, "path")
assert not hasattr(doc, "filename")
assert not hasattr(doc, "name")
# -- diagnostics d'erreur RGPD-safe (E2) -----------------------------------
def test_failed_doc_carries_rgpd_safe_error_fields(tmp_path):
from gui_v6.processing_runner import ProcessingRunner
secret = "Dupont Jean 1980" # simulacre de PII dans un message d'exception
def boom(_inp, _out):
raise ValueError(f"échec sur patient {secret}")
inp = tmp_path / "in"; inp.mkdir()
(inp / "a.pdf").write_bytes(b"%PDF-1.4\n")
out = tmp_path / "out"; out.mkdir()
runner = ProcessingRunner(process_fn=boom)
summary = runner.run(inp, out)
assert summary.failed == 1
doc = summary.documents[0]
assert doc.error_type == "ValueError"
assert doc.error_code in {"ner_unavailable", "quarantined", "no_output", "processing_error"}
blob = repr(vars(doc)).lower()
assert "dupont" not in blob and "patient" not in blob and secret.lower() not in blob
def test_success_doc_has_no_error_fields(tmp_path):
from gui_v6.processing_runner import ProcessingRunner
def ok(_inp, out_dir):
# process_fn reçoit le DOSSIER de sortie : on y écrit un PDF livrable.
pdf = out_dir / "a.redacted_raster.pdf"
pdf.write_bytes(b"%PDF-1.4\n")
return {"status": "ok", "pdf_raster": str(pdf)}
inp = tmp_path / "in"; inp.mkdir()
(inp / "a.pdf").write_bytes(b"%PDF-1.4\n")
out = tmp_path / "out"; out.mkdir()
summary = ProcessingRunner(process_fn=ok).run(inp, out)
doc = summary.documents[0]
assert doc.status == "success"
assert doc.error_type is None and doc.error_code is None