anonymisation/tests/unit/test_leak_scanner.py

#!/usr/bin/env python3
"""
Tests unitaires pour le scanner de fuite.
"""
import pytest
from pathlib import Path
from evaluation.leak_scanner import LeakScanner, LeakReport


class TestLeakScanner:
    """Tests pour LeakScanner."""

    def test_scan_text_no_leak(self):
        """Test sans fuite."""
        scanner = LeakScanner()

        text = "Le patient a été examiné par le Dr. [NOM] le [DATE]."
        original_pii = [
            {"kind": "NOM", "original": "DUPONT"},
            {"kind": "DATE", "original": "15/01/2024"}
        ]

        leaks = scanner.scan_text(text, original_pii)

        assert len(leaks) == 0

    def test_scan_text_original_pii_present(self):
        """Test avec PII original présent."""
        scanner = LeakScanner()

        text = "Le patient DUPONT a été examiné le 15/01/2024."
        original_pii = [
            {"kind": "NOM", "original": "DUPONT"},
            {"kind": "DATE", "original": "15/01/2024"}
        ]

        leaks = scanner.scan_text(text, original_pii)

        assert len(leaks) == 2
        assert all(leak["severity"] == "CRITIQUE" for leak in leaks)
        assert all(leak["type"] == "original_pii_present" for leak in leaks)

    def test_scan_text_new_pii_detected(self):
        """Test avec nouveau PII détecté."""
        scanner = LeakScanner()

        text = "Contact: jean.dupont@example.com ou 01 23 45 67 89"
        original_pii = []

        leaks = scanner.scan_text(text, original_pii)

        # Devrait détecter l'email et le téléphone
        assert len(leaks) >= 2

        email_leak = next((l for l in leaks if l["pii_type"] == "EMAIL"), None)
        assert email_leak is not None
        assert email_leak["severity"] == "HAUTE"

        tel_leak = next((l for l in leaks if l["pii_type"] == "TEL"), None)
        assert tel_leak is not None
        assert tel_leak["severity"] == "HAUTE"

    def test_leak_report_is_safe(self):
        """Test de rapport sûr."""
        report = LeakReport(
            is_safe=True,
            leak_count=0,
            leaks=[],
            severity_counts={}
        )

        assert report.is_safe
        assert report.leak_count == 0

    def test_leak_report_not_safe(self):
        """Test de rapport non sûr."""
        report = LeakReport(
            is_safe=False,
            leak_count=2,
            leaks=[
                {"severity": "CRITIQUE", "type": "original_pii_present"},
                {"severity": "HAUTE", "type": "new_pii_detected"}
            ],
            severity_counts={"CRITIQUE": 1, "HAUTE": 1}
        )

        assert not report.is_safe
        assert report.leak_count == 2
        assert report.severity_counts["CRITIQUE"] == 1
        assert report.severity_counts["HAUTE"] == 1

    def test_leak_report_to_dict(self):
        """Test de conversion en dictionnaire."""
        report = LeakReport(
            is_safe=False,
            leak_count=1,
            leaks=[{"severity": "CRITIQUE"}],
            severity_counts={"CRITIQUE": 1}
        )

        data = report.to_dict()

        assert data["is_safe"] is False
        assert data["leak_count"] == 1
        assert len(data["leaks"]) == 1
        assert data["severity_counts"]["CRITIQUE"] == 1


if __name__ == "__main__":
    pytest.main([__file__, "-v"])