fix(core): renforcer detection PII et FINESS Corse
Couvre les corrections PII batch A/A-2, le NIR multi-ligne en flux reel, le gazetteer FINESS Corse derive depuis la base locale, et les tests de regression associes. Aucun build ni diffusion.
This commit is contained in:
@@ -109,6 +109,31 @@ def test_ogc_pdf_redaction_does_not_mask_numeric_substrings(tmp_path):
|
||||
assert "142 : La facturation" in text
|
||||
|
||||
|
||||
def test_pdf_redaction_directly_masks_finess_address_range(tmp_path):
|
||||
"""Cas Dom 2026-06-16 : une adresse d'établissement visible dans le PDF
|
||||
doit être caviardée même si l'audit n'a pas fourni le hit exact."""
|
||||
if fitz is None:
|
||||
return
|
||||
|
||||
source = tmp_path / "finess_address_range.pdf"
|
||||
output = tmp_path / "finess_address_range.redacted.pdf"
|
||||
doc = fitz.open()
|
||||
page = doc.new_page()
|
||||
page.insert_text((72, 72), "15 à 35 rue Claude Boucher Bordeaux Cedex")
|
||||
page.insert_text((72, 108), "Motif d'hospitalisation : contrôle clinique.")
|
||||
doc.save(source)
|
||||
doc.close()
|
||||
|
||||
redact_pdf_vector(source, [], output)
|
||||
|
||||
redacted = fitz.open(output)
|
||||
text = redacted[0].get_text()
|
||||
redacted.close()
|
||||
assert "Claude Boucher" not in text
|
||||
assert "15 à 35" not in text
|
||||
assert "Motif d'hospitalisation" in text
|
||||
|
||||
|
||||
def test_crop_epi_header_name_is_masked():
|
||||
cfg = load_dictionaries(None)
|
||||
text = (
|
||||
|
||||
Reference in New Issue
Block a user