Files
anonymisation/tests/unit/test_real_world_identifier_layouts.py
Domi31tls f2375d6be2 test: non-régression F5 + batch paths + masquage manuel + layouts réels
- test_f5_nom_compose_orphelin.py : 13 tests (regex F5, application, scénario Trackare EJNAINI)
- test_gui_batch_paths.py / test_manual_masking.py : couverture des modules
- test_real_world_identifier_layouts.py : non-régression layouts réels (D-15)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-04 16:30:56 +02:00

47 lines
1.2 KiB
Python

#!/usr/bin/env python3
"""
Tests de non-régression sur des layouts d'identifiants vus en documents réels.
"""
from anonymizer_core_refactored_onnx import (
RE_SCAN_FILENAME_ARTIFACT,
anonymise_document_regex,
load_dictionaries,
)
def test_bacterio_multiline_venue_number_before_ipp_is_masked():
cfg = load_dictionaries(None)
text = (
"Diffusé le :\n"
"à\n"
"N° venue :\n"
"31/07/1973\n"
"VAN DE GRAAF\n"
"23176885\n"
"IPP :\n"
"2300201230\n"
)
anon = anonymise_document_regex([text], [[]], cfg)
assert "23176885" not in anon.text_out
assert "[NDA]" in anon.text_out
assert any(h.kind == "NDA" and h.original == "23176885" for h in anon.audit)
def test_scan_filename_artifact_suffix_is_masked():
cfg = load_dictionaries(None)
text = (
"IPP:\n"
"16014215\n"
"Document scanné non\n"
"éditable pour patient (dont\ngénétique)\n"
"EXT2-16014215-2300249096.TIF\n"
)
anon = anonymise_document_regex([text], [[]], cfg)
assert RE_SCAN_FILENAME_ARTIFACT.search("EXT2-[IPP]-2300249096.TIF") is not None
assert "2300249096" not in anon.text_out
assert "EXT2-[IPP]-[DOSSIER].TIF" in anon.text_out