Aivanov_scan_ogc/tests/test_checkboxes.py

"""Tests unitaires pour pipeline.checkboxes."""
from __future__ import annotations

import numpy as np
import pytest
from PIL import Image

from pipeline.checkboxes import (
    AMBIGU_MARGIN,
    CheckboxZones,
    RECUEIL_ACCORD_DESACCORD,
    dark_ratio,
    detect_accord_desaccord,
    parse_ghs_injustifie,
)


# ============================================================
# parse_ghs_injustifie
# ============================================================

class TestParseGhsInjustifie:
    @pytest.mark.parametrize("raw,expected", [
        ("0", "0"),
        ("1", "1"),
        ("0 SE 1 2 3 4 ATU FFM FSD", "0"),
        ("1 SE 2 ATU", "1"),
        (" 0 ", "0"),
        ("", ""),
        (None, ""),
        ("SE 1 2 3 4 ATU FFM FSD", ""),  # pas de chiffre de tête
        ("abc", ""),
        ("2 SE 1", ""),  # 2 n'est ni 0 ni 1
    ])
    def test_cas_varies(self, raw, expected):
        assert parse_ghs_injustifie(raw) == expected


# ============================================================
# dark_ratio (avec images synthétiques)
# ============================================================

def _solid_image(w: int, h: int, gray_value: int = 255) -> Image.Image:
    arr = np.full((h, w), gray_value, dtype=np.uint8)
    return Image.fromarray(arr, mode="L").convert("RGB")


def _image_with_dark_square(w: int, h: int,
                             square_bbox: tuple[float, float, float, float]) -> Image.Image:
    """Image blanche avec un carré noir dans la zone bbox (coords relatives)."""
    arr = np.full((h, w), 255, dtype=np.uint8)
    x1, y1, x2, y2 = square_bbox
    arr[int(y1*h):int(y2*h), int(x1*w):int(x2*w)] = 0
    return Image.fromarray(arr, mode="L").convert("RGB")


class TestDarkRatio:
    def test_image_blanche(self):
        img = _solid_image(100, 100, 255)
        ratio = dark_ratio(img, (0.2, 0.2, 0.8, 0.8))
        assert ratio == 0.0

    def test_image_noire(self):
        img = _solid_image(100, 100, 0)
        ratio = dark_ratio(img, (0.2, 0.2, 0.8, 0.8))
        assert ratio == 1.0

    def test_inner_frac_ignore_les_bords(self):
        """Un carré noir occupe toute la zone mais avec un grand inner_frac
        on ne voit que le centre, qui reste dans la zone noire."""
        img = _image_with_dark_square(100, 100, (0.0, 0.0, 1.0, 1.0))
        # Tout noir, peu importe inner_frac
        assert dark_ratio(img, (0.0, 0.0, 1.0, 1.0), inner_frac=0.35) == 1.0

    def test_cadre_seul_vs_contenu_central(self):
        """Une case 'vide' (cadre seul) doit avoir un ratio inner_frac faible ;
        une case 'cochée' (croix au centre) doit avoir un ratio plus élevé."""
        # Simuler un cadre : carré noir sur le pourtour uniquement
        w, h = 100, 100
        arr = np.full((h, w), 255, dtype=np.uint8)
        arr[:5, :] = 0; arr[-5:, :] = 0; arr[:, :5] = 0; arr[:, -5:] = 0
        frame_only = Image.fromarray(arr, mode="L").convert("RGB")
        # Cadre + croix au centre
        arr2 = arr.copy()
        # Une croix : 2 diagonales
        for i in range(20, 80):
            arr2[i, i] = 0
            arr2[i, 100 - 1 - i] = 0
        checked = Image.fromarray(arr2, mode="L").convert("RGB")

        ratio_empty = dark_ratio(frame_only, (0.0, 0.0, 1.0, 1.0), inner_frac=0.35)
        ratio_full = dark_ratio(checked, (0.0, 0.0, 1.0, 1.0), inner_frac=0.35)

        # La case cochée doit avoir un ratio clairement plus élevé
        assert ratio_full > ratio_empty + 0.05


# ============================================================
# detect_accord_desaccord (fixtures cache)
# ============================================================

class TestDetectAccordDesaccord:
    """Tests sur les images réelles du cache, avec ground truth vérifié
    visuellement (cf. historique du projet, crops audités un par un).

    Ground truth indexé par numéro d'OGC — le mapping vers le hash du cache
    est résolu au runtime via pipeline.ingest.pdf_hash pour éviter de coder
    les hashes en dur (fragile).
    """

    # Ground truth vérifié visuellement sur les 18 dossiers 2018 CARC
    GROUND_TRUTH_BY_OGC = {
        1: "accord",
        7: "accord",
        9: "désaccord",
        18: "désaccord",
        20: "désaccord",
        27: "désaccord",
        29: "accord",
        55: "accord",
        66: "désaccord",
        68: "accord",
        69: "accord",
        74: "désaccord",
        76: "désaccord",
        84: "accord",
        86: "désaccord",
        97: "accord",
        99: "désaccord",
    }

    @pytest.fixture
    def cached_pages_with_truth(self):
        """Résout le mapping numéro OGC → page_01.png disponible au runtime."""
        from pathlib import Path
        from pipeline.ingest import pdf_hash
        pdf_dir = Path("2018 CARC")
        if not pdf_dir.is_dir():
            pytest.skip("répertoire 2018 CARC/ absent")
        found = {}
        for n, expected in self.GROUND_TRUTH_BY_OGC.items():
            pdf = pdf_dir / f"OGC {n}.pdf"
            if not pdf.exists():
                continue
            h = pdf_hash(str(pdf))
            img = Path(f".cache/images/{h}/page_01.png")
            if img.exists():
                found[f"OGC {n}"] = (str(img), expected)
        if not found:
            pytest.skip("pas de cache d'images disponible — lance le pipeline d'abord")
        return found

    def test_ground_truth_echantillon(self, cached_pages_with_truth):
        """Sur les cas vérifiés visuellement, le détecteur doit matcher."""
        errors = []
        for name, (path, expected) in cached_pages_with_truth.items():
            r = detect_accord_desaccord(path)
            if r["decision"] != expected:
                errors.append(f"{name}: attendu={expected}, got={r}")
        assert not errors, "\n".join(errors)