test: non-régression F5 + batch paths + masquage manuel + layouts réels
- test_f5_nom_compose_orphelin.py : 13 tests (regex F5, application, scénario Trackare EJNAINI) - test_gui_batch_paths.py / test_manual_masking.py : couverture des modules - test_real_world_identifier_layouts.py : non-régression layouts réels (D-15) Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
46
tests/unit/test_real_world_identifier_layouts.py
Normal file
46
tests/unit/test_real_world_identifier_layouts.py
Normal file
@@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Tests de non-régression sur des layouts d'identifiants vus en documents réels.
|
||||
"""
|
||||
from anonymizer_core_refactored_onnx import (
|
||||
RE_SCAN_FILENAME_ARTIFACT,
|
||||
anonymise_document_regex,
|
||||
load_dictionaries,
|
||||
)
|
||||
|
||||
|
||||
def test_bacterio_multiline_venue_number_before_ipp_is_masked():
|
||||
cfg = load_dictionaries(None)
|
||||
text = (
|
||||
"Diffusé le :\n"
|
||||
"à\n"
|
||||
"N° venue :\n"
|
||||
"31/07/1973\n"
|
||||
"VAN DE GRAAF\n"
|
||||
"23176885\n"
|
||||
"IPP :\n"
|
||||
"2300201230\n"
|
||||
)
|
||||
|
||||
anon = anonymise_document_regex([text], [[]], cfg)
|
||||
|
||||
assert "23176885" not in anon.text_out
|
||||
assert "[NDA]" in anon.text_out
|
||||
assert any(h.kind == "NDA" and h.original == "23176885" for h in anon.audit)
|
||||
|
||||
|
||||
def test_scan_filename_artifact_suffix_is_masked():
|
||||
cfg = load_dictionaries(None)
|
||||
text = (
|
||||
"IPP:\n"
|
||||
"16014215\n"
|
||||
"Document scanné non\n"
|
||||
"éditable pour patient (dont\ngénétique)\n"
|
||||
"EXT2-16014215-2300249096.TIF\n"
|
||||
)
|
||||
|
||||
anon = anonymise_document_regex([text], [[]], cfg)
|
||||
|
||||
assert RE_SCAN_FILENAME_ARTIFACT.search("EXT2-[IPP]-2300249096.TIF") is not None
|
||||
assert "2300249096" not in anon.text_out
|
||||
assert "EXT2-[IPP]-[DOSSIER].TIF" in anon.text_out
|
||||
Reference in New Issue
Block a user