feat(core): garde-fou adresse burn + doc chemins conservateurs (P1-2/F-3)
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
160
tests/unit/test_core_address_burn_guard.py
Normal file
160
tests/unit/test_core_address_burn_guard.py
Normal file
@@ -0,0 +1,160 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Plan 1b — Task 4 (P1-2/F-3) : garde-fou du burn adresse géométrique.
|
||||
|
||||
`_search_pdf_address_lines` est un chemin de caviardage INDÉPENDANT de
|
||||
l'audit : il noircit directement les lignes d'adresse trouvées
|
||||
géométriquement sur la page (cf. `test_pdf_redaction_directly_masks_finess_address_range`).
|
||||
Le filtre d'audit de la Task 1 ne le couvre donc PAS.
|
||||
|
||||
Ces tests vérifient que ce chemin est gaté sous la catégorie ADRESSE :
|
||||
- ADRESSE désactivée → `_search_pdf_address_lines` n'est PAS appliqué ;
|
||||
- ADRESSE activée (ou disabled vide) → il est appelé comme avant.
|
||||
"""
|
||||
import anonymizer_core_refactored_onnx as core
|
||||
from anonymizer_core_refactored_onnx import (
|
||||
PiiHit,
|
||||
fitz,
|
||||
redact_pdf_raster,
|
||||
redact_pdf_vector,
|
||||
)
|
||||
|
||||
|
||||
def _make_address_pdf(tmp_path):
|
||||
source = tmp_path / "addr.pdf"
|
||||
doc = fitz.open()
|
||||
page = doc.new_page()
|
||||
page.insert_text((72, 72), "15 à 35 rue Claude Boucher Bordeaux Cedex")
|
||||
page.insert_text((72, 108), "Motif d'hospitalisation : contrôle clinique.")
|
||||
doc.save(source)
|
||||
doc.close()
|
||||
return source
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# VECTOR
|
||||
# ---------------------------------------------------------------------------
|
||||
def test_vector_address_search_called_when_adresse_enabled(tmp_path, monkeypatch):
|
||||
if fitz is None:
|
||||
return
|
||||
source = _make_address_pdf(tmp_path)
|
||||
output = tmp_path / "addr.enabled.pdf"
|
||||
|
||||
calls = []
|
||||
real = core._search_pdf_address_lines
|
||||
|
||||
def _spy(page):
|
||||
calls.append(page.number)
|
||||
return real(page)
|
||||
|
||||
monkeypatch.setattr(core, "_search_pdf_address_lines", _spy)
|
||||
|
||||
# disabled vide → comportement par défaut (adresse cherchée)
|
||||
redact_pdf_vector(source, [], output, disabled_kinds=set())
|
||||
|
||||
assert calls, "ADRESSE activée : _search_pdf_address_lines doit être appelé"
|
||||
redacted = fitz.open(output)
|
||||
text = redacted[0].get_text()
|
||||
redacted.close()
|
||||
# L'adresse a bien été caviardée (le burn géométrique s'applique)
|
||||
assert "rue Claude Boucher" not in text
|
||||
# La ligne clinique reste lisible
|
||||
assert "Motif d'hospitalisation" in text
|
||||
|
||||
|
||||
def test_vector_address_search_not_applied_when_adresse_disabled(tmp_path, monkeypatch):
|
||||
if fitz is None:
|
||||
return
|
||||
source = _make_address_pdf(tmp_path)
|
||||
output = tmp_path / "addr.disabled.pdf"
|
||||
|
||||
calls = []
|
||||
real = core._search_pdf_address_lines
|
||||
|
||||
def _spy(page):
|
||||
calls.append(page.number)
|
||||
return real(page)
|
||||
|
||||
monkeypatch.setattr(core, "_search_pdf_address_lines", _spy)
|
||||
|
||||
redact_pdf_vector(source, [], output, disabled_kinds={"ADRESSE"})
|
||||
|
||||
# Le burn géométrique d'adresse ne doit PAS être appliqué.
|
||||
assert not calls, (
|
||||
"ADRESSE désactivée : _search_pdf_address_lines ne doit pas être appliqué"
|
||||
)
|
||||
redacted = fitz.open(output)
|
||||
text = redacted[0].get_text()
|
||||
redacted.close()
|
||||
# L'adresse reste lisible puisque la catégorie est décochée.
|
||||
assert "rue Claude Boucher" in text
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# RASTER
|
||||
# ---------------------------------------------------------------------------
|
||||
def test_raster_address_search_called_when_adresse_enabled(tmp_path, monkeypatch):
|
||||
if fitz is None:
|
||||
return
|
||||
source = _make_address_pdf(tmp_path)
|
||||
output = tmp_path / "addr.raster.enabled.pdf"
|
||||
|
||||
calls = []
|
||||
real = core._search_pdf_address_lines
|
||||
|
||||
def _spy(page):
|
||||
calls.append(page.number)
|
||||
return real(page)
|
||||
|
||||
monkeypatch.setattr(core, "_search_pdf_address_lines", _spy)
|
||||
|
||||
redact_pdf_raster(source, [], output, disabled_kinds=set())
|
||||
|
||||
assert calls, "ADRESSE activée (raster) : _search_pdf_address_lines doit être appelé"
|
||||
|
||||
|
||||
def test_raster_address_search_not_applied_when_adresse_disabled(tmp_path, monkeypatch):
|
||||
if fitz is None:
|
||||
return
|
||||
source = _make_address_pdf(tmp_path)
|
||||
output = tmp_path / "addr.raster.disabled.pdf"
|
||||
|
||||
calls = []
|
||||
real = core._search_pdf_address_lines
|
||||
|
||||
def _spy(page):
|
||||
calls.append(page.number)
|
||||
return real(page)
|
||||
|
||||
monkeypatch.setattr(core, "_search_pdf_address_lines", _spy)
|
||||
|
||||
redact_pdf_raster(source, [], output, disabled_kinds={"ADRESSE"})
|
||||
|
||||
assert not calls, (
|
||||
"ADRESSE désactivée (raster) : _search_pdf_address_lines ne doit pas être appliqué"
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Non-régression : signature positionnelle d'origine + défaut byte-for-byte
|
||||
# ---------------------------------------------------------------------------
|
||||
def test_vector_default_signature_still_calls_address_search(tmp_path, monkeypatch):
|
||||
"""Sans disabled_kinds (appel positionnel d'origine), le burn adresse
|
||||
reste actif — non-régression stricte."""
|
||||
if fitz is None:
|
||||
return
|
||||
source = _make_address_pdf(tmp_path)
|
||||
output = tmp_path / "addr.default.pdf"
|
||||
|
||||
calls = []
|
||||
real = core._search_pdf_address_lines
|
||||
|
||||
def _spy(page):
|
||||
calls.append(page.number)
|
||||
return real(page)
|
||||
|
||||
monkeypatch.setattr(core, "_search_pdf_address_lines", _spy)
|
||||
|
||||
# Appel d'origine : aucun argument disabled.
|
||||
redact_pdf_vector(source, [PiiHit(0, "OGC", "14", "[OGC]")], output)
|
||||
|
||||
assert calls, "Défaut (pas de disabled) : burn adresse doit rester actif"
|
||||
Reference in New Issue
Block a user