feat(gui): DocResult porte type+catégorie d'erreur RGPD-safe (E2)
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -87,6 +87,27 @@ def _engine_result_error(result: object) -> str | None:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# Ensemble FERMÉ de catégories d'erreur (aucune PII ne peut y entrer).
|
||||||
|
_ERROR_CODES = ("ner_unavailable", "quarantined", "no_output", "processing_error")
|
||||||
|
|
||||||
|
|
||||||
|
def classify_error_code(exc: Exception) -> str:
|
||||||
|
"""Catégorise une exception de run en une valeur de l'ensemble fermé _ERROR_CODES.
|
||||||
|
|
||||||
|
Lit le type et d'éventuels préfixes de message GÉNÉRÉS PAR NOUS pour classer ;
|
||||||
|
ne renvoie JAMAIS le message lui-même (RGPD). Inconnu → 'processing_error'.
|
||||||
|
"""
|
||||||
|
name = type(exc).__name__
|
||||||
|
if name == "EngineUnavailableError":
|
||||||
|
return "ner_unavailable"
|
||||||
|
msg = str(exc)
|
||||||
|
if "quarantaine" in msg:
|
||||||
|
return "quarantined"
|
||||||
|
if "Aucune sortie" in msg:
|
||||||
|
return "no_output"
|
||||||
|
return "processing_error"
|
||||||
|
|
||||||
|
|
||||||
def discover_documents(input_path, extensions: Optional[Sequence[str]] = None) -> list[Path]:
|
def discover_documents(input_path, extensions: Optional[Sequence[str]] = None) -> list[Path]:
|
||||||
"""Liste les documents à traiter (fichier unique ou dossier récursif)."""
|
"""Liste les documents à traiter (fichier unique ou dossier récursif)."""
|
||||||
path = Path(input_path)
|
path = Path(input_path)
|
||||||
@@ -115,6 +136,10 @@ class DocResult:
|
|||||||
status: str # "success" | "failed"
|
status: str # "success" | "failed"
|
||||||
duration_ms: Optional[int]
|
duration_ms: Optional[int]
|
||||||
extension: Optional[str]
|
extension: Optional[str]
|
||||||
|
# Diagnostics RGPD-safe : nom de classe d'exception + catégorie fermée.
|
||||||
|
# JAMAIS le message d'exception (str(exc)) ni nom/chemin de document.
|
||||||
|
error_type: Optional[str] = None
|
||||||
|
error_code: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -224,6 +249,8 @@ class ProcessingRunner:
|
|||||||
page_count = page_count_for(doc)
|
page_count = page_count_for(doc)
|
||||||
started = time.monotonic()
|
started = time.monotonic()
|
||||||
status = "success"
|
status = "success"
|
||||||
|
error_type = None
|
||||||
|
error_code = None
|
||||||
try:
|
try:
|
||||||
if input_path.is_dir():
|
if input_path.is_dir():
|
||||||
doc_out = build_batch_output_dir(root_dir, out_root, doc)
|
doc_out = build_batch_output_dir(root_dir, out_root, doc)
|
||||||
@@ -238,6 +265,8 @@ class ProcessingRunner:
|
|||||||
log(f"OK : {doc.name}")
|
log(f"OK : {doc.name}")
|
||||||
except Exception as exc: # un échec n'interrompt pas le lot
|
except Exception as exc: # un échec n'interrompt pas le lot
|
||||||
status = "failed"
|
status = "failed"
|
||||||
|
error_type = type(exc).__name__
|
||||||
|
error_code = classify_error_code(exc)
|
||||||
summary.failed += 1
|
summary.failed += 1
|
||||||
summary.errors.append((doc.name, str(exc)))
|
summary.errors.append((doc.name, str(exc)))
|
||||||
log(f"ÉCHEC : {doc.name} — {exc}")
|
log(f"ÉCHEC : {doc.name} — {exc}")
|
||||||
@@ -248,6 +277,8 @@ class ProcessingRunner:
|
|||||||
status=status,
|
status=status,
|
||||||
duration_ms=int((time.monotonic() - started) * 1000),
|
duration_ms=int((time.monotonic() - started) * 1000),
|
||||||
extension=extension,
|
extension=extension,
|
||||||
|
error_type=error_type,
|
||||||
|
error_code=error_code,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
if on_progress:
|
if on_progress:
|
||||||
|
|||||||
@@ -261,3 +261,45 @@ def test_run_records_per_document_details(tmp_path):
|
|||||||
assert not hasattr(doc, "path")
|
assert not hasattr(doc, "path")
|
||||||
assert not hasattr(doc, "filename")
|
assert not hasattr(doc, "filename")
|
||||||
assert not hasattr(doc, "name")
|
assert not hasattr(doc, "name")
|
||||||
|
|
||||||
|
|
||||||
|
# -- diagnostics d'erreur RGPD-safe (E2) -----------------------------------
|
||||||
|
|
||||||
|
def test_failed_doc_carries_rgpd_safe_error_fields(tmp_path):
|
||||||
|
from gui_v6.processing_runner import ProcessingRunner
|
||||||
|
|
||||||
|
secret = "Dupont Jean 1980" # simulacre de PII dans un message d'exception
|
||||||
|
|
||||||
|
def boom(_inp, _out):
|
||||||
|
raise ValueError(f"échec sur patient {secret}")
|
||||||
|
|
||||||
|
inp = tmp_path / "in"; inp.mkdir()
|
||||||
|
(inp / "a.pdf").write_bytes(b"%PDF-1.4\n")
|
||||||
|
out = tmp_path / "out"; out.mkdir()
|
||||||
|
runner = ProcessingRunner(process_fn=boom)
|
||||||
|
summary = runner.run(inp, out)
|
||||||
|
|
||||||
|
assert summary.failed == 1
|
||||||
|
doc = summary.documents[0]
|
||||||
|
assert doc.error_type == "ValueError"
|
||||||
|
assert doc.error_code in {"ner_unavailable", "quarantined", "no_output", "processing_error"}
|
||||||
|
blob = repr(vars(doc)).lower()
|
||||||
|
assert "dupont" not in blob and "patient" not in blob and secret.lower() not in blob
|
||||||
|
|
||||||
|
|
||||||
|
def test_success_doc_has_no_error_fields(tmp_path):
|
||||||
|
from gui_v6.processing_runner import ProcessingRunner
|
||||||
|
|
||||||
|
def ok(_inp, out_dir):
|
||||||
|
# process_fn reçoit le DOSSIER de sortie : on y écrit un PDF livrable.
|
||||||
|
pdf = out_dir / "a.redacted_raster.pdf"
|
||||||
|
pdf.write_bytes(b"%PDF-1.4\n")
|
||||||
|
return {"status": "ok", "pdf_raster": str(pdf)}
|
||||||
|
|
||||||
|
inp = tmp_path / "in"; inp.mkdir()
|
||||||
|
(inp / "a.pdf").write_bytes(b"%PDF-1.4\n")
|
||||||
|
out = tmp_path / "out"; out.mkdir()
|
||||||
|
summary = ProcessingRunner(process_fn=ok).run(inp, out)
|
||||||
|
doc = summary.documents[0]
|
||||||
|
assert doc.status == "success"
|
||||||
|
assert doc.error_type is None and doc.error_code is None
|
||||||
|
|||||||
Reference in New Issue
Block a user