rpa_vision_v3/core/semantic/phase25_analyzer.py

"""Phase 2.5 — Analyseur sémantique post-apprentissage.

Module isolé qui prend en entrée un ensemble de screenshots capturés
pendant la phase Shadow et produit un payload structuré
``{tables, forms, buttons, text_blocks}`` par écran distinct,
stocké dans un fichier ``.semantic.yaml`` séparé.

Specs : ``docs/POC/SPECS_PHASE_25_SEMANTIQUE_2026-06-01.md``

Garde-fous :
- Wrapper try/except global autour de chaque appel OmniParser.
- Fallback OCR-seul (docTR) si OmniParser indisponible ou KO.
- Healthcheck OmniParser au démarrage : KO ⇒ bascule auto en dégradé.
- Cache disque ``data/cache/omniparser/<session>/<index>.json``.
- Cap 10 écrans distincts par session.
- Aucun import de FastAPI, aucun appel réseau direct.
"""

from __future__ import annotations

import concurrent.futures
import hashlib
import io
import json
import logging
import re
import time
import traceback
from dataclasses import asdict, dataclass, field
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Iterable, List, Optional, Sequence, Tuple

try:  # pragma: no cover - dépendance externe déjà présente dans le projet
    import yaml
except ImportError as exc:  # pragma: no cover
    raise RuntimeError("PyYAML est requis pour core.semantic.phase25_analyzer") from exc

try:  # PIL toujours présent côté Linux dev / DGX
    from PIL import Image
    _HAS_PIL = True
except ImportError:  # pragma: no cover
    Image = None  # type: ignore[assignment]
    _HAS_PIL = False

try:
    import imagehash  # type: ignore
    _HAS_IMAGEHASH = True
except ImportError:  # pragma: no cover - fallback MD5 thumbnail
    imagehash = None  # type: ignore[assignment]
    _HAS_IMAGEHASH = False


logger = logging.getLogger(__name__)


# ----------------------------------------------------------------------------
# Constantes et chemins
# ----------------------------------------------------------------------------

REPO_ROOT = Path(__file__).resolve().parents[2]
DATA_ROOT = REPO_ROOT / "data"
SEMANTIC_DIR = DATA_ROOT / "competences" / "candidate"
OMNIPARSER_CACHE_ROOT = DATA_ROOT / "cache" / "omniparser"
OMNIPARSER_CACHE_DIR = OMNIPARSER_CACHE_ROOT  # alias public
LOGS_DIR = REPO_ROOT / "logs"
OMNIPARSER_ERROR_LOG = LOGS_DIR / "omniparser_errors.log"

# Heuristique de regroupement perceptuel (cf. specs §3).
PHASH_HAMMING_THRESHOLD = 8
MAX_SCREENS_PER_SESSION = 10
THUMBNAIL_SIZE = (256, 256)  # fallback MD5

# Timeout par screenshot (cf. specs §2).
OMNIPARSER_TIMEOUT_SEC = 30.0

# Slug autorisé (réutilisation du pattern persist : a-z0-9_).
SLUG_PATTERN = re.compile(r"^[a-z][a-z0-9_]{2,79}$")
# session_id autorisé : caractères inoffensifs uniquement.
SESSION_ID_PATTERN = re.compile(r"^[A-Za-z0-9][A-Za-z0-9_\-]{0,127}$")


# ----------------------------------------------------------------------------
# Dataclasses
# ----------------------------------------------------------------------------


@dataclass
class SemanticStructure:
    """Structure sémantique d'un écran (cf. specs §2)."""

    tables: List[dict] = field(default_factory=list)
    forms: List[dict] = field(default_factory=list)
    buttons: List[dict] = field(default_factory=list)
    text_blocks: List[dict] = field(default_factory=list)

    def to_dict(self) -> dict:
        return {
            "tables": list(self.tables),
            "forms": list(self.forms),
            "buttons": list(self.buttons),
            "text_blocks": list(self.text_blocks),
        }


@dataclass
class ScreenAnalysis:
    """Analyse d'un écran représentatif (cf. specs §3)."""

    index: int
    phash: str
    screen_id: str
    screenshot_path: Optional[str]
    structure: SemanticStructure
    degraded: bool = False
    degraded_reason: Optional[str] = None
    elapsed_sec: float = 0.0
    window_title: Optional[str] = None
    # Snapshot "contrat Codex" : représentation aplatie destinée à
    # l'agent-chat / dashboard. Calculée à la volée par to_dict().

    def to_dict(self) -> dict:
        elements = _structure_to_elements(self.structure)
        return {
            "index": self.index,
            "hash": self.phash,
            "screen_id": self.screen_id,
            "window_title": self.window_title,
            "screenshot_path": self.screenshot_path,
            "structure": self.structure.to_dict(),
            "elements": elements,
            "degraded": self.degraded,
            "degraded_reason": self.degraded_reason,
            "elapsed_sec": round(self.elapsed_sec, 3),
        }


@dataclass
class Phase25Result:
    """Résultat global d'une analyse Phase 2.5."""

    session_id: str
    generated_at: str
    omniparser_available: bool
    degraded: bool
    too_complex: bool
    screens: List[ScreenAnalysis] = field(default_factory=list)
    healthcheck_passed: bool = True
    healthcheck_reason: Optional[str] = None

    def to_dict(self) -> dict:
        return {
            "session_id": self.session_id,
            "generated_at": self.generated_at,
            "omniparser_available": self.omniparser_available,
            "degraded": self.degraded,
            "too_complex": self.too_complex,
            "healthcheck_passed": self.healthcheck_passed,
            "healthcheck_reason": self.healthcheck_reason,
            "screens": [s.to_dict() for s in self.screens],
        }


# ----------------------------------------------------------------------------
# Helpers : validation et FS
# ----------------------------------------------------------------------------


def _validate_session_id(session_id: Any) -> str:
    if not isinstance(session_id, str) or not session_id.strip():
        raise ValueError("session_id doit etre une chaine non vide")
    sid = session_id.strip()
    if not SESSION_ID_PATTERN.match(sid):
        raise ValueError(
            "session_id invalide (autorise : [A-Za-z0-9][A-Za-z0-9_-]{0,127})"
        )
    # Anti path-traversal de ceinture-bretelles : on refuse explicitement
    # toute tentative ../ même si le regex ne devrait pas la laisser passer.
    if ".." in sid or "/" in sid or "\\" in sid:
        raise ValueError("session_id invalide (path-traversal interdit)")
    return sid


def _validate_slug(slug: Any) -> str:
    if not isinstance(slug, str):
        raise ValueError("slug doit etre une chaine")
    s = slug.strip()
    if not SLUG_PATTERN.match(s):
        raise ValueError(
            f"slug invalide '{s}' (regle : {SLUG_PATTERN.pattern})"
        )
    return s


def _ensure_dir(path: Path) -> None:
    path.mkdir(parents=True, exist_ok=True)


def _log_omniparser_error(session_id: str, frame_index: int, exc: BaseException) -> None:
    """Append-only sur ``logs/omniparser_errors.log`` (cf. specs §7)."""
    try:
        _ensure_dir(LOGS_DIR)
        entry = {
            "timestamp": datetime.now(timezone.utc).isoformat(),
            "session_id": session_id,
            "frame_index": frame_index,
            "error_type": type(exc).__name__,
            "error_message": str(exc),
            "traceback": traceback.format_exception_only(type(exc), exc),
        }
        with OMNIPARSER_ERROR_LOG.open("a", encoding="utf-8") as fh:
            fh.write(json.dumps(entry, ensure_ascii=False) + "\n")
    except OSError as log_exc:  # pragma: no cover - log best-effort
        logger.warning("[PHASE25] echec ecriture omniparser_errors.log : %s", log_exc)


# ----------------------------------------------------------------------------
# Hash perceptuel (avec fallback MD5)
# ----------------------------------------------------------------------------


def compute_phash(image: "Image.Image") -> str:
    """Calcule un hash perceptuel ou un hash MD5 thumbnail (fallback)."""
    if _HAS_IMAGEHASH and imagehash is not None:
        try:
            return str(imagehash.phash(image))
        except Exception as exc:  # pragma: no cover
            logger.warning("[PHASE25] phash imagehash KO, fallback MD5 : %s", exc)
    # Fallback MD5 sur thumbnail.
    thumb = image.copy()
    thumb.thumbnail(THUMBNAIL_SIZE)
    buf = io.BytesIO()
    thumb.convert("RGB").save(buf, format="PNG")
    return "md5:" + hashlib.md5(buf.getvalue()).hexdigest()


def _hamming_distance(h1: str, h2: str) -> int:
    """Distance de Hamming entre deux phash imagehash, ou fallback MD5.

    - Cas imagehash : on reconvertit via ``imagehash.hex_to_hash``.
    - Cas MD5 (préfixe ``md5:``) : 0 si égal, sinon distance "haute" pour ne
      jamais les considérer comme similaires (heuristique conservative).
    """
    if h1.startswith("md5:") or h2.startswith("md5:"):
        return 0 if h1 == h2 else PHASH_HAMMING_THRESHOLD + 1
    if not _HAS_IMAGEHASH or imagehash is None:
        # Pas d'imagehash mais les hashes hex présents (rare) : XOR brut.
        try:
            i1 = int(h1, 16)
            i2 = int(h2, 16)
            return bin(i1 ^ i2).count("1")
        except ValueError:
            return PHASH_HAMMING_THRESHOLD + 1
    try:
        return abs(imagehash.hex_to_hash(h1) - imagehash.hex_to_hash(h2))
    except Exception:
        return PHASH_HAMMING_THRESHOLD + 1


def identify_distinct_screens(
    frames: Sequence[Tuple[int, "Image.Image"]],
    threshold: int = PHASH_HAMMING_THRESHOLD,
) -> List[Tuple[int, "Image.Image", str]]:
    """Regroupe les frames par similarité phash et retourne un représentant par groupe.

    Args:
        frames: séquence ``(frame_index, PIL.Image)``.
        threshold: Hamming distance max pour considérer deux frames identiques.

    Returns:
        Liste ``(frame_index, image, phash)`` — un représentant par groupe,
        dans l'ordre temporel d'apparition (premier vu = représentant).
    """
    representatives: List[Tuple[int, Image.Image, str]] = []
    for idx, img in frames:
        h = compute_phash(img)
        matched = False
        for ridx, _rimg, rhash in representatives:
            if _hamming_distance(h, rhash) <= threshold:
                matched = True
                logger.debug(
                    "[PHASE25] frame %d regroupee avec representant %d (phash=%s)",
                    idx, ridx, h,
                )
                break
        if not matched:
            representatives.append((idx, img, h))
    return representatives


# ----------------------------------------------------------------------------
# Conversion structure ⇄ "elements" (contrat Codex)
# ----------------------------------------------------------------------------


def _structure_to_elements(struct: SemanticStructure) -> List[dict]:
    """Aplatissement structure -> liste d'éléments {kind, label, bbox, confidence}."""
    elements: List[dict] = []
    for tbl in struct.tables:
        elements.append({
            "kind": "table",
            "label": tbl.get("label", "table"),
            "bbox": tbl.get("bbox", []),
            "confidence": float(tbl.get("confidence", 0.5)),
        })
    for frm in struct.forms:
        elements.append({
            "kind": "field",
            "label": frm.get("label", "field"),
            "bbox": frm.get("bbox", []),
            "confidence": float(frm.get("confidence", 0.5)),
        })
    for btn in struct.buttons:
        elements.append({
            "kind": "button",
            "label": btn.get("label", "button"),
            "bbox": btn.get("bbox", []),
            "confidence": float(btn.get("confidence", 0.5)),
        })
    for tb in struct.text_blocks:
        elements.append({
            "kind": "text_block",
            "label": tb.get("label", tb.get("text", "")),
            "bbox": tb.get("bbox", []),
            "confidence": float(tb.get("confidence", 0.5)),
        })
    return elements


def _classify_element(label: str, kind_hint: str | None = None) -> str:
    """Heuristique de classification d'un élément OmniParser.

    Cohérente avec ``OmniParserAdapter._classify_element``, mais retourne
    nos catégories sémantiques : ``table | field | button | text_block``.
    """
    lab = (label or "").lower()
    if kind_hint:
        kh = kind_hint.lower()
        if "table" in kh:
            return "table"
        if "input" in kh or "field" in kh or "edit" in kh:
            return "field"
        if "button" in kh or "btn" in kh:
            return "button"
    if any(kw in lab for kw in ("button", "btn", "submit", "valider", "annuler", "ok", "close")):
        return "button"
    if any(kw in lab for kw in ("input", "field", "saisie", "textbox", "champ")):
        return "field"
    if "table" in lab or "grille" in lab:
        return "table"
    return "text_block"


# ----------------------------------------------------------------------------
# Adapter wrappers : OmniParser et docTR (fallback)
# ----------------------------------------------------------------------------


class _OmniParserSafeWrapper:
    """Wrap fragile OmniParserAdapter avec garde-fou anti-exception.

    - Import paresseux (lazy) pour ne pas casser l'import du module si
      OmniParser n'est pas installé.
    - ``available=False`` ⇒ caller bascule en fallback OCR-seul.
    - Timeout effectif appliqué autour de chaque appel ``detect`` via
      ``ThreadPoolExecutor`` + ``future.result(timeout=...)``.
    """

    # Executor module-level pour ne pas créer un pool par appel.
    _TIMEOUT_EXECUTOR: Optional[concurrent.futures.ThreadPoolExecutor] = None

    @classmethod
    def _get_executor(cls) -> concurrent.futures.ThreadPoolExecutor:
        if cls._TIMEOUT_EXECUTOR is None:
            cls._TIMEOUT_EXECUTOR = concurrent.futures.ThreadPoolExecutor(
                max_workers=2, thread_name_prefix="phase25-omniparser-timeout",
            )
        return cls._TIMEOUT_EXECUTOR

    def __init__(self) -> None:
        self._adapter: Any = None
        self._available: bool = False
        self._import_error: Optional[str] = None
        self._try_import()

    def _try_import(self) -> None:
        try:
            from core.detection.omniparser_adapter import OmniParserAdapter  # type: ignore
            self._adapter = OmniParserAdapter()
            self._available = bool(getattr(self._adapter, "available", False))
            if not self._available:
                # L'adapter existe mais le check de disponibilité a échoué.
                self._import_error = "OmniParser adapter installé mais modèles non disponibles"
        except Exception as exc:
            self._adapter = None
            self._available = False
            self._import_error = f"{type(exc).__name__}: {exc}"

    @property
    def available(self) -> bool:
        return self._available

    @property
    def import_error(self) -> Optional[str]:
        return self._import_error

    def detect(
        self,
        image: "Image.Image",
        *,
        timeout: Optional[float] = None,
    ) -> List[Any]:
        """Appel sécurisé : enrobé d'un timeout dur, lève en cas d'exception.

        Args:
            image: image PIL à analyser.
            timeout: timeout en secondes (défaut : ``OMNIPARSER_TIMEOUT_SEC``).
                Si dépassé ⇒ ``concurrent.futures.TimeoutError`` propagée au
                caller, qui bascule en fallback docTR + ``degraded=True``.
        """
        if not self._available or self._adapter is None:
            return []
        effective_timeout = (
            timeout if timeout is not None else OMNIPARSER_TIMEOUT_SEC
        )
        executor = self._get_executor()
        future = executor.submit(self._adapter.detect, image)
        try:
            return list(future.result(timeout=effective_timeout))
        except concurrent.futures.TimeoutError as exc:
            # Le thread OmniParser continue son travail en arrière-plan mais
            # le résultat est ignoré ; le caller bascule en fallback docTR.
            logger.warning(
                "[PHASE25] OmniParser.detect timeout (%.1fs) -> fallback",
                effective_timeout,
            )
            raise
        except Exception as exc:
            logger.warning("[PHASE25] OmniParser.detect KO : %s", exc)
            raise  # remonté au caller pour log + fallback


def _detect_via_omniparser(
    wrapper: _OmniParserSafeWrapper,
    image: "Image.Image",
    *,
    timeout: Optional[float] = None,
) -> List[Any]:
    return wrapper.detect(image, timeout=timeout)


def _detect_via_doctr(image: "Image.Image", screenshot_path: Optional[str]) -> List[dict]:
    """Fallback OCR-seul (docTR). Retourne une liste de text_blocks bruts.

    Aucun VLM, aucune classification fine — juste OCR ⇒ ``text_blocks``.
    """
    if not _HAS_PIL or image is None:
        return []
    try:
        from doctr.io import DocumentFile  # type: ignore
        from doctr.models import ocr_predictor  # type: ignore
    except ImportError:
        logger.info("[PHASE25] docTR non disponible pour fallback OCR")
        return []

    # Cache predictor module-level pour éviter rechargement.
    global _DOCTR_PREDICTOR
    try:
        _DOCTR_PREDICTOR  # type: ignore[used-before-def]
    except NameError:
        _DOCTR_PREDICTOR = None  # type: ignore[assignment]

    try:
        if _DOCTR_PREDICTOR is None:  # type: ignore[has-type]
            _DOCTR_PREDICTOR = ocr_predictor(  # type: ignore[assignment]
                det_arch="db_resnet50", reco_arch="crnn_vgg16_bn", pretrained=True,
            )
    except Exception as exc:  # pragma: no cover
        logger.warning("[PHASE25] docTR init KO : %s", exc)
        return []

    # docTR prend un fichier ou un array numpy ; on privilégie le chemin si fourni.
    blocks: List[dict] = []
    try:
        if screenshot_path and Path(screenshot_path).exists():
            doc = DocumentFile.from_images([screenshot_path])
        else:
            buf = io.BytesIO()
            image.convert("RGB").save(buf, format="PNG")
            buf.seek(0)
            doc = DocumentFile.from_images([buf.getvalue()])
        result = _DOCTR_PREDICTOR(doc)  # type: ignore[misc]
        W, H = image.size
        for page in result.pages:
            for block in page.blocks:
                for line_obj in block.lines:
                    text = " ".join(w.value for w in line_obj.words).strip()
                    if not text:
                        continue
                    geom = line_obj.geometry  # ((x1,y1), (x2,y2)) norm 0-1
                    x1 = int(geom[0][0] * W)
                    y1 = int(geom[0][1] * H)
                    x2 = int(geom[1][0] * W)
                    y2 = int(geom[1][1] * H)
                    blocks.append({
                        "label": text,
                        "text": text,
                        "bbox": [x1, y1, x2, y2],
                        "confidence": 0.6,  # docTR ne donne pas de score line-level facilement
                    })
    except Exception as exc:  # pragma: no cover
        logger.warning("[PHASE25] docTR predict KO : %s", exc)
        return []

    return blocks


def _elements_to_structure(elements: Iterable[Any]) -> SemanticStructure:
    """Convertit la liste OmniParser ``DetectedElement`` en SemanticStructure."""
    struct = SemanticStructure()
    for el in elements:
        # Compatible avec DetectedElement (dataclass) et dict.
        if hasattr(el, "label"):
            label = getattr(el, "label", "") or ""
            bbox = list(getattr(el, "bbox", ()) or ())
            conf = float(getattr(el, "confidence", 0.5) or 0.5)
            kind_hint = getattr(el, "element_type", None)
        elif isinstance(el, dict):
            label = str(el.get("label") or el.get("text") or "")
            bbox = list(el.get("bbox") or [])
            conf = float(el.get("confidence", el.get("score", 0.5)) or 0.5)
            kind_hint = el.get("element_type") or el.get("type")
        else:
            continue

        kind = _classify_element(label, kind_hint)
        entry = {"label": label, "bbox": bbox, "confidence": conf}
        if kind == "table":
            struct.tables.append(entry)
        elif kind == "field":
            struct.forms.append(entry)
        elif kind == "button":
            struct.buttons.append(entry)
        else:
            struct.text_blocks.append({**entry, "text": label})
    return struct


# ----------------------------------------------------------------------------
# Cache disque
# ----------------------------------------------------------------------------


def _cache_path(session_id: str, frame_index: int) -> Path:
    sid = _validate_session_id(session_id)
    return OMNIPARSER_CACHE_ROOT / sid / f"{int(frame_index)}.json"


def _cache_read(session_id: str, frame_index: int) -> Optional[dict]:
    path = _cache_path(session_id, frame_index)
    if not path.exists():
        return None
    try:
        with path.open("r", encoding="utf-8") as fh:
            return json.load(fh)
    except (OSError, json.JSONDecodeError) as exc:
        logger.warning("[PHASE25] cache illisible %s : %s", path, exc)
        return None


def _cache_write(session_id: str, frame_index: int, payload: dict) -> None:
    path = _cache_path(session_id, frame_index)
    try:
        _ensure_dir(path.parent)
        tmp = path.with_suffix(".json.tmp")
        with tmp.open("w", encoding="utf-8") as fh:
            json.dump(payload, fh, ensure_ascii=False, indent=2)
        tmp.replace(path)
    except OSError as exc:  # pragma: no cover
        logger.warning("[PHASE25] cache ecriture KO %s : %s", path, exc)


# ----------------------------------------------------------------------------
# Analyseur principal
# ----------------------------------------------------------------------------


class Phase25Analyzer:
    """Analyseur sémantique post-apprentissage.

    Usage minimal :

        analyzer = Phase25Analyzer(session_id="abc123")
        result = analyzer.analyze_frames(frames=[(0, img0), (12, img12), ...])
        path = analyzer.write_semantic_yaml(result, slug="ma_competence")

    ``frames`` est une séquence ``(frame_index, PIL.Image[, screenshot_path])``.
    """

    def __init__(
        self,
        session_id: str,
        *,
        omniparser: Optional[_OmniParserSafeWrapper] = None,
        max_screens: int = MAX_SCREENS_PER_SESSION,
        timeout_sec: float = OMNIPARSER_TIMEOUT_SEC,
    ) -> None:
        self.session_id = _validate_session_id(session_id)
        self.omniparser = omniparser if omniparser is not None else _OmniParserSafeWrapper()
        self.max_screens = max_screens
        self.timeout_sec = timeout_sec
        self._healthcheck_passed = True
        self._healthcheck_reason: Optional[str] = None

    # -- healthcheck -------------------------------------------------------

    def healthcheck(self) -> bool:
        """Vérifie qu'OmniParser répond sur une image bidon (cf. specs §7).

        - Si l'adapter est ``available=False`` ⇒ healthcheck KO (mais on
          continuera quand même en mode dégradé OCR-seul).
        - Si l'adapter lève une exception ⇒ KO + log dédié.
        """
        if not _HAS_PIL:
            self._healthcheck_passed = False
            self._healthcheck_reason = "PIL indisponible"
            return False
        if not self.omniparser.available:
            self._healthcheck_passed = False
            self._healthcheck_reason = (
                self.omniparser.import_error or "OmniParser indisponible"
            )
            return False
        try:
            dummy = Image.new("RGB", (64, 64), color=(255, 255, 255))
            _ = self.omniparser.detect(dummy, timeout=self.timeout_sec)
            self._healthcheck_passed = True
            self._healthcheck_reason = None
            return True
        except Exception as exc:
            _log_omniparser_error(self.session_id, -1, exc)
            self._healthcheck_passed = False
            self._healthcheck_reason = f"{type(exc).__name__}: {exc}"
            return False

    # -- analyse écran ----------------------------------------------------

    def analyze_screen(
        self,
        frame_index: int,
        image: "Image.Image",
        phash: str,
        *,
        screenshot_path: Optional[str] = None,
        window_title: Optional[str] = None,
        force_fallback: bool = False,
    ) -> ScreenAnalysis:
        """Analyse un écran représentatif.

        Stratégie :
        1. Cache disque (idempotence par session_id+frame_index).
        2. OmniParser via wrapper safe → sinon fallback OCR-seul docTR.
        3. Exception ⇒ log dédié + ``degraded=True`` + structure docTR.
        """
        # 1. Cache
        cached = _cache_read(self.session_id, frame_index)
        if cached is not None:
            struct = SemanticStructure(
                tables=cached.get("structure", {}).get("tables", []),
                forms=cached.get("structure", {}).get("forms", []),
                buttons=cached.get("structure", {}).get("buttons", []),
                text_blocks=cached.get("structure", {}).get("text_blocks", []),
            )
            return ScreenAnalysis(
                index=frame_index,
                phash=cached.get("phash", phash),
                screen_id=cached.get("screen_id", f"screen_{frame_index:03d}"),
                screenshot_path=cached.get("screenshot_path", screenshot_path),
                structure=struct,
                degraded=bool(cached.get("degraded", False)),
                degraded_reason=cached.get("degraded_reason"),
                elapsed_sec=float(cached.get("elapsed_sec", 0.0)),
                window_title=cached.get("window_title", window_title),
            )

        t0 = time.monotonic()
        degraded = False
        degraded_reason: Optional[str] = None
        structure: SemanticStructure

        use_omniparser = self.omniparser.available and not force_fallback
        if use_omniparser:
            try:
                elements = _detect_via_omniparser(
                    self.omniparser, image, timeout=self.timeout_sec,
                )
                structure = _elements_to_structure(elements)
                if not (structure.tables or structure.forms or structure.buttons or structure.text_blocks):
                    # OmniParser n'a rien produit : on ajoute en complément docTR text_blocks.
                    blocks = _detect_via_doctr(image, screenshot_path)
                    structure.text_blocks.extend(blocks)
            except Exception as exc:
                _log_omniparser_error(self.session_id, frame_index, exc)
                degraded = True
                degraded_reason = f"omniparser_exception: {type(exc).__name__}"
                blocks = _detect_via_doctr(image, screenshot_path)
                structure = SemanticStructure(text_blocks=blocks)
        else:
            degraded = True
            degraded_reason = (
                "omniparser_unavailable: " + (self.omniparser.import_error or "n/a")
                if not self.omniparser.available
                else "forced_fallback"
            )
            blocks = _detect_via_doctr(image, screenshot_path)
            structure = SemanticStructure(text_blocks=blocks)

        elapsed = time.monotonic() - t0
        analysis = ScreenAnalysis(
            index=frame_index,
            phash=phash,
            screen_id=f"screen_{frame_index:03d}",
            screenshot_path=screenshot_path,
            structure=structure,
            degraded=degraded,
            degraded_reason=degraded_reason,
            elapsed_sec=elapsed,
            window_title=window_title,
        )

        # Cache écriture (best-effort).
        _cache_write(self.session_id, frame_index, analysis.to_dict())
        return analysis

    # -- pipeline complet -------------------------------------------------

    def analyze_frames(
        self,
        frames: Sequence[Tuple[int, "Image.Image"]],
        *,
        screenshot_paths: Optional[dict[int, str]] = None,
        window_titles: Optional[dict[int, str]] = None,
        run_healthcheck: bool = True,
    ) -> Phase25Result:
        """Pipeline complet : grouping phash → analyse → cap → résultat.

        Args:
            frames: liste ``(frame_index, PIL.Image)``.
            screenshot_paths: mapping ``frame_index -> path`` (optionnel).
            window_titles: mapping ``frame_index -> window_title`` (optionnel).
            run_healthcheck: lancer le healthcheck OmniParser avant analyse.

        Returns:
            ``Phase25Result`` avec ``too_complex=True`` si > max_screens.
        """
        if not _HAS_PIL:
            raise RuntimeError("PIL est requis pour Phase25Analyzer.analyze_frames")

        if run_healthcheck:
            self.healthcheck()
            if not self._healthcheck_passed:
                logger.warning(
                    "[PHASE25] healthcheck OmniParser KO (%s) -> mode degrade docTR",
                    self._healthcheck_reason,
                )

        force_fallback = not self._healthcheck_passed

        # 1. Regrouper par similarité perceptuelle.
        reps = identify_distinct_screens(frames)

        # 2. Cap MAX_SCREENS_PER_SESSION.
        too_complex = len(reps) > self.max_screens
        if too_complex:
            logger.warning(
                "[PHASE25] session %s : %d ecrans distincts > cap %d -> too_complex",
                self.session_id, len(reps), self.max_screens,
            )
            reps = reps[: self.max_screens]

        # 3. Analyser chaque représentant.
        sp = screenshot_paths or {}
        wt = window_titles or {}
        screens: List[ScreenAnalysis] = []
        any_degraded = False
        for idx, img, phash in reps:
            analysis = self.analyze_screen(
                idx,
                img,
                phash,
                screenshot_path=sp.get(idx),
                window_title=wt.get(idx),
                force_fallback=force_fallback,
            )
            screens.append(analysis)
            any_degraded = any_degraded or analysis.degraded

        return Phase25Result(
            session_id=self.session_id,
            generated_at=datetime.now(timezone.utc).isoformat(),
            omniparser_available=self.omniparser.available and self._healthcheck_passed,
            degraded=any_degraded or not self._healthcheck_passed,
            too_complex=too_complex,
            screens=screens,
            healthcheck_passed=self._healthcheck_passed,
            healthcheck_reason=self._healthcheck_reason,
        )

    # -- écriture YAML -----------------------------------------------------

    def write_semantic_yaml(
        self,
        result: Phase25Result,
        slug: str,
        *,
        target_dir: Optional[Path] = None,
    ) -> Path:
        """Écrit le ``.semantic.yaml`` à côté du YAML compétence candidate.

        Args:
            result: Résultat d'analyse Phase 2.5.
            slug: slug compétence (validé contre SLUG_PATTERN).
            target_dir: répertoire cible (défaut : ``data/competences/candidate/``).

        Returns:
            Path absolu du fichier écrit.

        Raises:
            ValueError: slug invalide.
            OSError: écriture impossible.
        """
        s = _validate_slug(slug)
        out_dir = target_dir if target_dir is not None else SEMANTIC_DIR
        out_dir = Path(out_dir)
        _ensure_dir(out_dir)

        # Anti écrasement supervised/stable : on refuse explicitement.
        forbidden = {"supervised", "stable"}
        if out_dir.name in forbidden:
            raise ValueError(
                f"target_dir interdit '{out_dir.name}' (autorise : candidate uniquement)"
            )

        payload = {
            "competence_id": s,
            "semantic_version": 1,
            "generated_at": result.generated_at,
            "session_id": result.session_id,
            "omniparser_available": result.omniparser_available,
            "degraded": result.degraded,
            "too_complex": result.too_complex,
            "healthcheck_passed": result.healthcheck_passed,
            "healthcheck_reason": result.healthcheck_reason,
            "screens": [],
        }
        for sc in result.screens:
            payload["screens"].append({
                "screen_id": sc.screen_id,
                "phash": sc.phash,
                "representative_frame_index": sc.index,
                "screenshot_path": sc.screenshot_path,
                "window_title": sc.window_title,
                "degraded": sc.degraded,
                "degraded_reason": sc.degraded_reason,
                "elapsed_sec": round(sc.elapsed_sec, 3),
                "structure": sc.structure.to_dict(),
                "annotations": [],  # placeholder — annotation humaine ultérieure
            })

        target = out_dir / f"{s}.semantic.yaml"
        tmp = target.with_suffix(".yaml.tmp")
        with tmp.open("w", encoding="utf-8") as fh:
            yaml.safe_dump(payload, fh, allow_unicode=True, sort_keys=False)
        tmp.replace(target)
        logger.info(
            "[PHASE25] semantic yaml ecrit : %s (screens=%d, degraded=%s)",
            target, len(result.screens), result.degraded,
        )
        return target


# ----------------------------------------------------------------------------
# Helpers utilitaires (chargement frames)
# ----------------------------------------------------------------------------


def load_frames_from_paths(paths_by_index: dict[int, str]) -> List[Tuple[int, "Image.Image"]]:
    """Charge des images PIL à partir d'un mapping ``frame_index -> path``.

    Ignore silencieusement les chemins inexistants (avec log warning).
    """
    if not _HAS_PIL:
        raise RuntimeError("PIL est requis pour load_frames_from_paths")
    frames: List[Tuple[int, Image.Image]] = []
    for idx in sorted(paths_by_index.keys()):
        p = paths_by_index[idx]
        try:
            img = Image.open(p)
            img.load()
            frames.append((int(idx), img))
        except (FileNotFoundError, OSError) as exc:
            logger.warning("[PHASE25] frame %d illisible (%s) : %s", idx, p, exc)
    return frames


__all__ = [
    "Phase25Analyzer",
    "Phase25Result",
    "ScreenAnalysis",
    "SemanticStructure",
    "SEMANTIC_DIR",
    "OMNIPARSER_CACHE_DIR",
    "OMNIPARSER_CACHE_ROOT",
    "OMNIPARSER_ERROR_LOG",
    "PHASH_HAMMING_THRESHOLD",
    "MAX_SCREENS_PER_SESSION",
    "compute_phash",
    "identify_distinct_screens",
    "load_frames_from_paths",
]