feat(gui): DocResult porte type+catégorie d'erreur RGPD-safe (E2)
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -87,6 +87,27 @@ def _engine_result_error(result: object) -> str | None:
|
||||
return None
|
||||
|
||||
|
||||
# Ensemble FERMÉ de catégories d'erreur (aucune PII ne peut y entrer).
|
||||
_ERROR_CODES = ("ner_unavailable", "quarantined", "no_output", "processing_error")
|
||||
|
||||
|
||||
def classify_error_code(exc: Exception) -> str:
|
||||
"""Catégorise une exception de run en une valeur de l'ensemble fermé _ERROR_CODES.
|
||||
|
||||
Lit le type et d'éventuels préfixes de message GÉNÉRÉS PAR NOUS pour classer ;
|
||||
ne renvoie JAMAIS le message lui-même (RGPD). Inconnu → 'processing_error'.
|
||||
"""
|
||||
name = type(exc).__name__
|
||||
if name == "EngineUnavailableError":
|
||||
return "ner_unavailable"
|
||||
msg = str(exc)
|
||||
if "quarantaine" in msg:
|
||||
return "quarantined"
|
||||
if "Aucune sortie" in msg:
|
||||
return "no_output"
|
||||
return "processing_error"
|
||||
|
||||
|
||||
def discover_documents(input_path, extensions: Optional[Sequence[str]] = None) -> list[Path]:
|
||||
"""Liste les documents à traiter (fichier unique ou dossier récursif)."""
|
||||
path = Path(input_path)
|
||||
@@ -115,6 +136,10 @@ class DocResult:
|
||||
status: str # "success" | "failed"
|
||||
duration_ms: Optional[int]
|
||||
extension: Optional[str]
|
||||
# Diagnostics RGPD-safe : nom de classe d'exception + catégorie fermée.
|
||||
# JAMAIS le message d'exception (str(exc)) ni nom/chemin de document.
|
||||
error_type: Optional[str] = None
|
||||
error_code: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -224,6 +249,8 @@ class ProcessingRunner:
|
||||
page_count = page_count_for(doc)
|
||||
started = time.monotonic()
|
||||
status = "success"
|
||||
error_type = None
|
||||
error_code = None
|
||||
try:
|
||||
if input_path.is_dir():
|
||||
doc_out = build_batch_output_dir(root_dir, out_root, doc)
|
||||
@@ -238,6 +265,8 @@ class ProcessingRunner:
|
||||
log(f"OK : {doc.name}")
|
||||
except Exception as exc: # un échec n'interrompt pas le lot
|
||||
status = "failed"
|
||||
error_type = type(exc).__name__
|
||||
error_code = classify_error_code(exc)
|
||||
summary.failed += 1
|
||||
summary.errors.append((doc.name, str(exc)))
|
||||
log(f"ÉCHEC : {doc.name} — {exc}")
|
||||
@@ -248,6 +277,8 @@ class ProcessingRunner:
|
||||
status=status,
|
||||
duration_ms=int((time.monotonic() - started) * 1000),
|
||||
extension=extension,
|
||||
error_type=error_type,
|
||||
error_code=error_code,
|
||||
)
|
||||
)
|
||||
if on_progress:
|
||||
|
||||
@@ -261,3 +261,45 @@ def test_run_records_per_document_details(tmp_path):
|
||||
assert not hasattr(doc, "path")
|
||||
assert not hasattr(doc, "filename")
|
||||
assert not hasattr(doc, "name")
|
||||
|
||||
|
||||
# -- diagnostics d'erreur RGPD-safe (E2) -----------------------------------
|
||||
|
||||
def test_failed_doc_carries_rgpd_safe_error_fields(tmp_path):
|
||||
from gui_v6.processing_runner import ProcessingRunner
|
||||
|
||||
secret = "Dupont Jean 1980" # simulacre de PII dans un message d'exception
|
||||
|
||||
def boom(_inp, _out):
|
||||
raise ValueError(f"échec sur patient {secret}")
|
||||
|
||||
inp = tmp_path / "in"; inp.mkdir()
|
||||
(inp / "a.pdf").write_bytes(b"%PDF-1.4\n")
|
||||
out = tmp_path / "out"; out.mkdir()
|
||||
runner = ProcessingRunner(process_fn=boom)
|
||||
summary = runner.run(inp, out)
|
||||
|
||||
assert summary.failed == 1
|
||||
doc = summary.documents[0]
|
||||
assert doc.error_type == "ValueError"
|
||||
assert doc.error_code in {"ner_unavailable", "quarantined", "no_output", "processing_error"}
|
||||
blob = repr(vars(doc)).lower()
|
||||
assert "dupont" not in blob and "patient" not in blob and secret.lower() not in blob
|
||||
|
||||
|
||||
def test_success_doc_has_no_error_fields(tmp_path):
|
||||
from gui_v6.processing_runner import ProcessingRunner
|
||||
|
||||
def ok(_inp, out_dir):
|
||||
# process_fn reçoit le DOSSIER de sortie : on y écrit un PDF livrable.
|
||||
pdf = out_dir / "a.redacted_raster.pdf"
|
||||
pdf.write_bytes(b"%PDF-1.4\n")
|
||||
return {"status": "ok", "pdf_raster": str(pdf)}
|
||||
|
||||
inp = tmp_path / "in"; inp.mkdir()
|
||||
(inp / "a.pdf").write_bytes(b"%PDF-1.4\n")
|
||||
out = tmp_path / "out"; out.mkdir()
|
||||
summary = ProcessingRunner(process_fn=ok).run(inp, out)
|
||||
doc = summary.documents[0]
|
||||
assert doc.status == "success"
|
||||
assert doc.error_type is None and doc.error_code is None
|
||||
|
||||
Reference in New Issue
Block a user