- test_f5_nom_compose_orphelin.py : 13 tests (regex F5, application, scénario Trackare EJNAINI) - test_gui_batch_paths.py / test_manual_masking.py : couverture des modules - test_real_world_identifier_layouts.py : non-régression layouts réels (D-15) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
47 lines
1.2 KiB
Python
47 lines
1.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Tests de non-régression sur des layouts d'identifiants vus en documents réels.
|
|
"""
|
|
from anonymizer_core_refactored_onnx import (
|
|
RE_SCAN_FILENAME_ARTIFACT,
|
|
anonymise_document_regex,
|
|
load_dictionaries,
|
|
)
|
|
|
|
|
|
def test_bacterio_multiline_venue_number_before_ipp_is_masked():
|
|
cfg = load_dictionaries(None)
|
|
text = (
|
|
"Diffusé le :\n"
|
|
"à\n"
|
|
"N° venue :\n"
|
|
"31/07/1973\n"
|
|
"VAN DE GRAAF\n"
|
|
"23176885\n"
|
|
"IPP :\n"
|
|
"2300201230\n"
|
|
)
|
|
|
|
anon = anonymise_document_regex([text], [[]], cfg)
|
|
|
|
assert "23176885" not in anon.text_out
|
|
assert "[NDA]" in anon.text_out
|
|
assert any(h.kind == "NDA" and h.original == "23176885" for h in anon.audit)
|
|
|
|
|
|
def test_scan_filename_artifact_suffix_is_masked():
|
|
cfg = load_dictionaries(None)
|
|
text = (
|
|
"IPP:\n"
|
|
"16014215\n"
|
|
"Document scanné non\n"
|
|
"éditable pour patient (dont\ngénétique)\n"
|
|
"EXT2-16014215-2300249096.TIF\n"
|
|
)
|
|
|
|
anon = anonymise_document_regex([text], [[]], cfg)
|
|
|
|
assert RE_SCAN_FILENAME_ARTIFACT.search("EXT2-[IPP]-2300249096.TIF") is not None
|
|
assert "2300249096" not in anon.text_out
|
|
assert "EXT2-[IPP]-[DOSSIER].TIF" in anon.text_out
|