feat(p1): persist workflows and semantic learning artifacts

2026-06-02 16:20:38 +02:00
parent 7a1a5cb6fd
commit 86b3c8f7e7
21 changed files with 3816 additions and 31 deletions
--- a/core/competences/persist.py
+++ b/core/competences/persist.py
@@ -0,0 +1,518 @@
+"""Helpers de persistance pour les competences candidates (POC Lea-first).
+
+Couvre :
+- slugification stricte (ASCII, regex ^[a-z][a-z0-9_]{2,79}$)
+- detection PII (regex MVP, paramétrable)
+- atomic write + rename POSIX
+- append-only audit JSONL avec verrou fcntl
+- detection de collision cross-states (candidate / supervised / stable)
+
+Le module est volontairement minimal : il n'importe pas FastAPI ni le pipeline
+VWB, il ne fait pas de logique reseau. Il est consomme depuis
+``agent_v0/server_v1/api_stream.py`` endpoint ``/persist``.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import re
+import time
+import unicodedata
+import uuid
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Iterable, Optional
+
+try:  # pragma: no cover - dependance externe deja presente dans le projet
+    import yaml
+except ImportError as exc:  # pragma: no cover
+    raise RuntimeError("PyYAML est requis pour core.competences.persist") from exc
+
+try:
+    import fcntl  # POSIX uniquement
+    _HAS_FCNTL = True
+except ImportError:  # pragma: no cover - Windows
+    fcntl = None  # type: ignore[assignment]
+    _HAS_FCNTL = False
+
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+COMPETENCES_ROOT = REPO_ROOT / "data" / "competences"
+CANDIDATE_DIR = COMPETENCES_ROOT / "candidate"
+SUPERVISED_DIR = COMPETENCES_ROOT / "supervised"
+STABLE_DIR = COMPETENCES_ROOT / "stable"
+AUDIT_PATH = COMPETENCES_ROOT / "persist_audit.jsonl"
+INCOMPLETE_PATH = COMPETENCES_ROOT / "incomplete_learnings.jsonl"
+
+# Pattern final autorise pour un slug de competence.
+SLUG_PATTERN = re.compile(r"^[a-z][a-z0-9_]{2,79}$")
+
+# Detection PII MVP — regex parametrable via env RPA_PII_PATTERNS
+# (separes par |). Defaut : couvre patterns simples (IPP, NIR, email, tel FR).
+_DEFAULT_PII_PATTERNS = [
+    r"\b\d{13}\b",                          # NIR FR (13 chiffres)
+    r"\b\d{15}\b",                          # NIR FR + cle
+    r"\bIPP[\s:_-]*\d{6,}\b",              # IPP hospitalier
+    r"[\w\.-]+@[\w\.-]+\.\w{2,}",          # email
+    r"\b0[1-9](?:[ .-]?\d{2}){4}\b",       # telephone FR
+]
+
+
+def _compile_pii_patterns() -> list[re.Pattern[str]]:
+    raw = os.environ.get("RPA_PII_PATTERNS")
+    patterns = raw.split("|") if raw else _DEFAULT_PII_PATTERNS
+    compiled: list[re.Pattern[str]] = []
+    for pat in patterns:
+        pat = pat.strip()
+        if not pat:
+            continue
+        try:
+            compiled.append(re.compile(pat, re.IGNORECASE))
+        except re.error:
+            continue
+    return compiled
+
+
+# ----------------------------------------------------------------------------
+# Slugification
+# ----------------------------------------------------------------------------
+
+
+def slugify(name: str) -> str:
+    """Convertir un nom libre en slug ASCII strict.
+
+    Regle :
+    - translitteration NFKD (suppression accents)
+    - lowercase, espaces / tirets / points -> '_'
+    - chars hors [a-z0-9_] retires
+    - underscores multiples reduits a 1
+    - troncature a 80 chars max
+    - doit matcher SLUG_PATTERN
+
+    Leve ValueError si le slug final ne matche pas le pattern.
+    """
+    if not isinstance(name, str):
+        raise ValueError("name doit etre une chaine non vide")
+    raw = name.strip()
+    if not raw:
+        raise ValueError("name est vide")
+
+    # NFKD pour decomposer les accents puis suppression des combinaisons
+    normalized = unicodedata.normalize("NFKD", raw)
+    ascii_only = normalized.encode("ascii", "ignore").decode("ascii")
+    # Espaces / tirets / points / slashes -> underscore
+    cleaned = re.sub(r"[\s\-./\\]+", "_", ascii_only.lower())
+    # Tout ce qui n'est pas [a-z0-9_] -> supprime
+    cleaned = re.sub(r"[^a-z0-9_]+", "", cleaned)
+    # Reduire underscores multiples
+    cleaned = re.sub(r"_+", "_", cleaned).strip("_")
+    # Forcer commencement par une lettre (si commence par chiffre, prefixer)
+    if cleaned and cleaned[0].isdigit():
+        cleaned = f"c_{cleaned}"
+    # Tronquer
+    if len(cleaned) > 80:
+        cleaned = cleaned[:80].rstrip("_")
+
+    if not SLUG_PATTERN.match(cleaned):
+        raise ValueError(
+            f"slug invalide '{cleaned}' (regle : {SLUG_PATTERN.pattern})"
+        )
+    return cleaned
+
+
+# ----------------------------------------------------------------------------
+# Collisions cross-states
+# ----------------------------------------------------------------------------
+
+
+def detect_cross_state_collision(
+    slug: str,
+    *,
+    competences_root: Path = COMPETENCES_ROOT,
+) -> Optional[str]:
+    """Retourne le sous-dossier ou un YAML <slug>.yaml existe deja, sinon None.
+
+    Verifie candidate/, supervised/, stable/.
+    """
+    for sub in ("candidate", "supervised", "stable"):
+        target = competences_root / sub / f"{slug}.yaml"
+        if target.exists():
+            return sub
+    return None
+
+
+# ----------------------------------------------------------------------------
+# Detection PII
+# ----------------------------------------------------------------------------
+
+
+def detect_pii(payload: Any) -> list[str]:
+    """Parcourt recursivement un payload (dict/list/str) et retourne la liste
+    des patterns PII matches. Liste vide = pas de PII detecte.
+
+    L'appelant decide quoi en faire (HTTP 400 + log non-sensible).
+    """
+    matches: list[str] = []
+    patterns = _compile_pii_patterns()
+    if not patterns:
+        return matches
+
+    def _walk(node: Any) -> None:
+        if isinstance(node, str):
+            for pat in patterns:
+                if pat.search(node):
+                    matches.append(pat.pattern)
+        elif isinstance(node, dict):
+            for v in node.values():
+                _walk(v)
+        elif isinstance(node, (list, tuple)):
+            for v in node:
+                _walk(v)
+
+    _walk(payload)
+    # dedoublonner en preservant l'ordre
+    seen = set()
+    out: list[str] = []
+    for p in matches:
+        if p not in seen:
+            seen.add(p)
+            out.append(p)
+    return out
+
+
+# ----------------------------------------------------------------------------
+# Atomic write
+# ----------------------------------------------------------------------------
+
+
+def atomic_write_yaml(
+    target_path: Path,
+    data: dict[str, Any],
+    *,
+    persist_id: str,
+) -> Path:
+    """Ecrire un dict en YAML de maniere atomique.
+
+    1. Ecrit dans <target_dir>/.<basename>.tmp.<persist_id>
+    2. os.rename vers target_path (POSIX atomic)
+    3. En cas d'echec, supprime le .tmp si possible.
+
+    Retourne le chemin final (target_path).
+    """
+    target_path = Path(target_path)
+    target_dir = target_path.parent
+    target_dir.mkdir(parents=True, exist_ok=True)
+    tmp_name = f".{target_path.name}.tmp.{persist_id}"
+    tmp_path = target_dir / tmp_name
+
+    try:
+        with tmp_path.open("w", encoding="utf-8") as handle:
+            yaml.safe_dump(
+                data,
+                handle,
+                allow_unicode=True,
+                sort_keys=False,
+                default_flow_style=False,
+            )
+            handle.flush()
+            try:
+                os.fsync(handle.fileno())
+            except OSError:
+                pass
+        # rename atomique (POSIX). Echoue si target existe deja sur Windows,
+        # mais Linux (POSIX) ecrase silencieusement. On a verifie la collision
+        # avant l'appel.
+        os.rename(tmp_path, target_path)
+    except Exception:
+        if tmp_path.exists():
+            try:
+                tmp_path.unlink()
+            except OSError:
+                pass
+        raise
+
+    return target_path
+
+
+# ----------------------------------------------------------------------------
+# Audit append (JSONL + verrou)
+# ----------------------------------------------------------------------------
+
+
+def audit_append(
+    entry: dict[str, Any],
+    *,
+    audit_path: Path = AUDIT_PATH,
+) -> int:
+    """Append une ligne JSON dans le fichier audit, retourne audit_entry_id.
+
+    L'audit_entry_id est un compteur monotone derive du nombre de lignes
+    avant l'append. La concurrence est serialisee via fcntl.flock (POSIX).
+    Sur les systemes sans fcntl (Windows), l'ecriture est best-effort.
+    """
+    audit_path = Path(audit_path)
+    audit_path.parent.mkdir(parents=True, exist_ok=True)
+
+    if "timestamp" not in entry:
+        entry["timestamp"] = (
+            datetime.now(timezone.utc).astimezone().isoformat(timespec="seconds")
+        )
+
+    # Open en append + lecture pour compter les lignes existantes (audit_entry_id).
+    flags = "a+"
+    with open(audit_path, flags, encoding="utf-8") as handle:
+        if _HAS_FCNTL:
+            try:
+                fcntl.flock(handle.fileno(), fcntl.LOCK_EX)  # type: ignore[union-attr]
+            except OSError:
+                pass
+        try:
+            handle.seek(0)
+            line_count = sum(1 for _ in handle)
+            audit_entry_id = line_count + 1
+            entry["audit_entry_id"] = audit_entry_id
+            handle.write(json.dumps(entry, ensure_ascii=False) + "\n")
+            handle.flush()
+            try:
+                os.fsync(handle.fileno())
+            except OSError:
+                pass
+        finally:
+            if _HAS_FCNTL:
+                try:
+                    fcntl.flock(handle.fileno(), fcntl.LOCK_UN)  # type: ignore[union-attr]
+                except OSError:
+                    pass
+    return audit_entry_id
+
+
+def find_existing_audit_entry(
+    persist_id: str,
+    *,
+    audit_path: Path = AUDIT_PATH,
+) -> Optional[dict[str, Any]]:
+    """Recherche une entree existante par persist_id pour l'idempotence."""
+    if not persist_id:
+        return None
+    audit_path = Path(audit_path)
+    if not audit_path.exists():
+        return None
+    try:
+        with audit_path.open("r", encoding="utf-8") as handle:
+            for line in handle:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    record = json.loads(line)
+                except json.JSONDecodeError:
+                    continue
+                if record.get("persist_id") == persist_id:
+                    return record
+    except OSError:
+        return None
+    return None
+
+
+# ----------------------------------------------------------------------------
+# YAML body construction
+# ----------------------------------------------------------------------------
+
+
+REQUIRED_YAML_FIELDS = (
+    "schema_version",
+    "id",
+    "name",
+    "version",
+    "learning_state",
+    "intent",
+    "parameters",
+    "preconditions",
+    "methods",
+    "success_marker",
+    "failure_message_template",
+    "promotion",
+    "generalisation",
+    "failure_log",
+    "created_at",
+    "last_updated_at",
+    "methods_execution",
+)
+
+
+def build_competence_yaml(
+    *,
+    slug: str,
+    name: str,
+    workflow_ir: dict[str, Any],
+    parameters: Optional[list[dict[str, Any]]],
+    intent_fr: str,
+    learning_state: str,
+    session_id: Optional[str],
+    machine_id: Optional[str],
+    external_agent_id: Optional[str] = None,
+) -> dict[str, Any]:
+    """Construit le dict YAML conforme au schema de reference.
+
+    Aligne sur ``data/competences/candidate/key_win_r_wait_explorer_exe.yaml``.
+    """
+    now_iso = datetime.now(timezone.utc).astimezone().isoformat(timespec="seconds")
+    steps = list(workflow_ir.get("steps") or [])
+    preconditions = list(workflow_ir.get("preconditions") or [])
+    success_marker = workflow_ir.get("success_marker") or {
+        "mode": "all_of",
+        "timeout_ms": 5000,
+        "markers": [],
+    }
+
+    methods: list[dict[str, Any]] = []
+    for idx, step in enumerate(steps, start=1):
+        if not isinstance(step, dict):
+            continue
+        method = dict(step)
+        method.setdefault("id", f"step_{idx}_{step.get('kind') or 'action'}")
+        if "primitive_ref" not in method and method.get("kind"):
+            method["primitive_ref"] = method["kind"]
+        method.setdefault("observed", False)
+        methods.append(method)
+
+    params_dict: dict[str, Any] = {}
+    for p in (parameters or []):
+        if isinstance(p, dict) and p.get("name"):
+            params_dict[str(p["name"])] = {
+                "type": p.get("type", "string"),
+                "required": bool(p.get("required", False)),
+                "description": p.get("description", ""),
+            }
+
+    yaml_body: dict[str, Any] = {
+        "schema_version": 1,
+        "id": slug,
+        "name": name,
+        "version": 1,
+        "learning_state": learning_state,
+        "intent": {"fr": intent_fr or name},
+        "parameters": params_dict,
+        "preconditions": preconditions,
+        "methods": methods,
+        "success_marker": success_marker,
+        "failure_message_template": workflow_ir.get("failure_message_template")
+        or {
+            "intention": intent_fr or name,
+            "attendu": "",
+            "vu": "{observed_human_state}",
+            "demande": "indiquer la correction attendue",
+        },
+        "promotion": {
+            "history": [
+                {
+                    "at": now_iso,
+                    "from": "observed",
+                    "to": learning_state,
+                    "by": "lea_persist_endpoint",
+                    "reason": "persisted via /api/v1/lea/competences/candidate/persist",
+                }
+            ],
+            "candidate_requires": [
+                "method_trace_present",
+                "success_marker_defined",
+                "failure_message_template_valid",
+            ],
+            "supervised_requires": ["replay_verified_once", "human_validation"],
+            "stable_requires": {
+                "min_successes": 3,
+                "distinct_contexts": 3,
+                "max_unexplained_failures": 0,
+            },
+            "t2_known_gaps": [],
+        },
+        "generalisation": {
+            "seen_contexts": [],
+            "method_success_rate": {},
+            "variance_log": [],
+        },
+        "failure_log": [],
+        "created_at": now_iso,
+        "last_updated_at": now_iso,
+        "methods_execution": "sequence",
+    }
+
+    if session_id or machine_id or external_agent_id:
+        yaml_body["chain_refs"] = {
+            "source_session": session_id,
+            "machine_id": machine_id,
+            "external_agent_id": external_agent_id,
+        }
+    return yaml_body
+
+
+def validate_yaml_schema(data: dict[str, Any]) -> list[str]:
+    """Verifie la presence des champs obligatoires. Retourne la liste des manquants."""
+    return [field for field in REQUIRED_YAML_FIELDS if field not in data]
+
+
+# ----------------------------------------------------------------------------
+# Rate limit token-bucket simple (en memoire, par machine_id)
+# ----------------------------------------------------------------------------
+
+
+class PersistRateLimiter:
+    """Token-bucket minimal pour /persist.
+
+    Par defaut : 10 requetes / minute / machine_id (cf. specs §6).
+    Instance unique attendue ; thread-safe via lock minimal.
+    """
+
+    def __init__(self, *, max_per_minute: int = 10, window_seconds: int = 60) -> None:
+        self.max_per_minute = max_per_minute
+        self.window_seconds = window_seconds
+        self._timestamps: dict[str, list[float]] = {}
+
+    def allow(self, machine_id: str) -> tuple[bool, int]:
+        """Renvoie (allowed, retry_after_seconds).
+
+        retry_after_seconds = 0 si autorise.
+        """
+        if not machine_id:
+            return True, 0
+        now = time.time()
+        bucket = self._timestamps.setdefault(machine_id, [])
+        # Purger les entrees hors fenetre
+        bucket[:] = [ts for ts in bucket if now - ts < self.window_seconds]
+        if len(bucket) >= self.max_per_minute:
+            oldest = bucket[0]
+            retry_after = max(1, int(self.window_seconds - (now - oldest)))
+            return False, retry_after
+        bucket.append(now)
+        return True, 0
+
+    def reset(self, machine_id: Optional[str] = None) -> None:
+        if machine_id is None:
+            self._timestamps.clear()
+        else:
+            self._timestamps.pop(machine_id, None)
+
+
+# Instance partagee importable depuis api_stream
+persist_rate_limiter = PersistRateLimiter()
+
+
+__all__ = [
+    "SLUG_PATTERN",
+    "COMPETENCES_ROOT",
+    "CANDIDATE_DIR",
+    "AUDIT_PATH",
+    "INCOMPLETE_PATH",
+    "REQUIRED_YAML_FIELDS",
+    "slugify",
+    "detect_cross_state_collision",
+    "detect_pii",
+    "atomic_write_yaml",
+    "audit_append",
+    "find_existing_audit_entry",
+    "build_competence_yaml",
+    "validate_yaml_schema",
+    "PersistRateLimiter",
+    "persist_rate_limiter",
+]
--- a/core/detection/ollama_client.py
+++ b/core/detection/ollama_client.py
@@ -16,6 +16,48 @@ import io
 logger = logging.getLogger(__name__)


+def _extract_first_json_object(text: str) -> Optional[Dict[str, Any]]:
+    """Extrait le premier objet JSON racine d'un texte qui peut contenir
+    du contenu parasite après (typique des modèles VLM qui ajoutent une
+    explication post-JSON).
+
+    Retourne None si aucun JSON valide n'est trouvé.
+    """
+    if not text:
+        return None
+    # Trouver la première '{' au niveau racine
+    start = text.find("{")
+    if start < 0:
+        return None
+    depth = 0
+    in_string = False
+    escape = False
+    for i in range(start, len(text)):
+        c = text[i]
+        if escape:
+            escape = False
+            continue
+        if c == "\\" and in_string:
+            escape = True
+            continue
+        if c == '"':
+            in_string = not in_string
+            continue
+        if in_string:
+            continue
+        if c == "{":
+            depth += 1
+        elif c == "}":
+            depth -= 1
+            if depth == 0:
+                candidate = text[start : i + 1]
+                try:
+                    return json.loads(candidate)
+                except json.JSONDecodeError:
+                    return None
+    return None
+
+
 class OllamaClient:
    """
    Client Ollama pour VLM
@@ -219,7 +261,93 @@ class OllamaClient:
                "success": False,
                "error": str(e)
            }
-    
+
+    def generate_grounding(
+        self,
+        prompt: str,
+        image_path: Optional[str] = None,
+        image: Optional[Image.Image] = None,
+        extra_images_b64: Optional[List[str]] = None,
+        profile: Optional[Dict[str, Any]] = None,
+    ) -> Dict[str, Any]:
+        """D5-v2 (2026-05-25) : appel grounding VLM centralisé, prefill-aware.
+
+        Utilise le profil dédié `vlm_config.get_grounding_profile()` pour
+        garantir num_ctx pinned (défaut 4096), prefill JSON, think=false,
+        temperature=0, num_predict court. Évite les chemins qui retomberaient
+        sur qwen2.5vl en ctx 8192.
+
+        Le profile peut être surchargé via param explicite (utile tests).
+
+        Reconstitue le JSON complet via prefill : la réponse Ollama est
+        complétée par le préfixe `{"x_pct":` avant parsing, pour que
+        `json.loads()` voit le JSON natif.
+
+        Args:
+            prompt: prompt textuel (typiquement "Find element X")
+            image_path / image / extra_images_b64: cf. generate()
+            profile: override du profile grounding (sinon get_grounding_profile())
+
+        Returns:
+            Dict avec `response` (texte complet incluant prefill), `success`,
+            `error`, `parsed_json` (dict {x_pct, y_pct, confidence, ...} ou
+            None si non parsable), `profile_used` (dict).
+
+        Notes:
+            - Pas de fallback automatique sur fallback_model ici. Le caller
+              décide de retry avec un autre modèle si besoin.
+            - `keep_alive` du profile n'est PAS envoyé en payload (Ollama
+              accepte mais non standard). À gérer côté pull/keep si critique.
+        """
+        if profile is None:
+            from core.detection.vlm_config import get_grounding_profile
+            profile = get_grounding_profile(endpoint=self.endpoint)
+
+        # Préserver le modèle courant, switcher temporairement.
+        original_model = self.model
+        self.model = profile["model"]
+        try:
+            result = self.generate(
+                prompt=prompt,
+                image_path=image_path,
+                image=image,
+                extra_images_b64=extra_images_b64,
+                temperature=profile["temperature"],
+                max_tokens=profile["num_predict"],
+                assistant_prefill=profile["prefill"],
+                num_ctx=profile["num_ctx"],
+                force_json=False,  # prefill suffit, format=json ralentit qwen3.5
+            )
+        finally:
+            self.model = original_model
+
+        # Logging non-bruyant : 1 ligne par appel grounding
+        elapsed_hint = ""  # caller mesure via time.perf_counter si besoin
+        logger.info(
+            "[PERF] vlm.grounding model=%s ctx=%d prefill=%s success=%s",
+            profile["model"], profile["num_ctx"],
+            "yes" if profile["prefill"] else "no",
+            result.get("success", False),
+        )
+
+        # Parse JSON prefill-aware. Le contenu complet inclut déjà le prefill
+        # (reconstitué par generate()) sauf si prefill=None. Si pas de prefill,
+        # tenter parse direct (le modèle peut avoir produit du JSON pur).
+        parsed = None
+        content = (result.get("response") or "").strip()
+        if content:
+            try:
+                # Le JSON peut être suivi de texte parasite (qwen termine
+                # parfois par des explications). Couper à la 1ère accolade
+                # fermante au niveau racine.
+                parsed = _extract_first_json_object(content)
+            except Exception as e:
+                logger.debug("[PERF] vlm.grounding parse failed: %s — content=%r", e, content[:160])
+
+        result["parsed_json"] = parsed
+        result["profile_used"] = dict(profile)
+        return result
+
    def detect_ui_elements(self, image_path: str) -> Dict[str, Any]:
        """
        Détecter les éléments UI dans une image
--- a/core/detection/vlm_config.py
+++ b/core/detection/vlm_config.py
@@ -134,13 +134,13 @@ def reset_vlm_model_cache():


 def is_thinking_model(model_name: str) -> bool:
-    """Détermine si un modèle est un modèle 'thinking' (qwen3).
+    """Détermine si un modèle est un modèle 'thinking' (qwen3, qwen3.5).

    Les modèles thinking nécessitent un assistant prefill pour éviter
    le mode réflexion interne qui peut durer >180s avec des images.

    Args:
-        model_name: Nom du modèle (ex: "qwen3-vl:8b", "gemma4:e4b")
+        model_name: Nom du modèle (ex: "qwen3-vl:8b", "qwen3.5:9b", "gemma4:e4b")

    Returns:
        True si le modèle est de type thinking (nécessite prefill workaround)
@@ -148,6 +148,92 @@ def is_thinking_model(model_name: str) -> bool:
    return "qwen3" in model_name.lower()


+# ────────────────────────────────────────────────────────────────────────────
+# D5-v2 (2026-05-25) : profil grounding dédié, centralisé, env-overridable
+# ────────────────────────────────────────────────────────────────────────────
+
+# Profil grounding par défaut — qwen3.5:9b avec ctx 4096 et prefill JSON.
+# Cohérent avec décision Codex après revue Gemini : empêcher rechauffe
+# qwen2.5vl en ctx 8192 et garantir un chemin grounding reproductible.
+DEFAULT_GROUNDING_MODEL = "qwen3.5:9b"
+DEFAULT_GROUNDING_CTX = 4096
+DEFAULT_GROUNDING_PREFILL = '{"x_pct":'
+DEFAULT_GROUNDING_TEMPERATURE = 0.0
+DEFAULT_GROUNDING_NUM_PREDICT = 96  # ~80 tokens suffisent pour `{x_pct,y_pct,confidence}`
+DEFAULT_GROUNDING_KEEP_ALIVE = "30m"  # éviter cold reload entre actions
+
+# Fallback grounding : qwen2.5vl conservé pour compat existante (rpa-tag).
+DEFAULT_GROUNDING_FALLBACK = "qwen2.5vl:7b-rpa"
+
+
+def get_grounding_profile(endpoint: str = DEFAULT_OLLAMA_ENDPOINT) -> dict:
+    """Retourne le profil VLM pour les appels de grounding **format JSON**
+    (réponse `{"x_pct": ..., "y_pct": ..., "confidence": ...}`).
+
+    ⚠️ ATTENTION SCOPE D5-v3a (2026-05-25) :
+    Ce profil est destiné aux appels qui consomment la sortie via prefill JSON
+    (typiquement qwen3.5:9b avec prefill `{"x_pct":`). Il n'est PAS adapté
+    aux appels grounding **format bbox_2d natif** de qwen2.5vl (utilisés
+    dans `agent_v0/server_v1/resolve_engine.py:959-1013, 3008-3045` avec
+    parsing via `core.grounding.bbox_parser.parse_bbox_to_norm`).
+
+    Conflit env var connu : `resolve_engine.py:959` lit aussi
+    `RPA_GROUNDING_MODEL` mais attend un modèle bbox_2d (qwen2.5vl).
+    Si tu setes `RPA_GROUNDING_MODEL=qwen3.5:9b`, ce profil OK mais le
+    site bbox legacy de resolve_engine va recevoir un modèle incompatible.
+    Reporté à D5-v3b : renommer en `RPA_BBOX_GROUNDING_MODEL` côté legacy
+    + introduire `OllamaClient.generate_bbox_grounding()`.
+
+    Centralise la politique pour empêcher les chemins VLM de retomber sur
+    qwen2.5vl en num_ctx=8192 (Modelfile). Sortie consommée par
+    OllamaClient.generate_grounding().
+
+    Env vars supportées :
+      - RPA_GROUNDING_MODEL : modèle principal (défaut qwen3.5:9b)
+      - RPA_GROUNDING_CTX   : context window (défaut 4096)
+      - RPA_GROUNDING_FALLBACK : modèle fallback (défaut qwen2.5vl:7b-rpa)
+      - RPA_VLM_PREFILL=false : désactive le prefill JSON (rare, debug)
+
+    Returns:
+        dict avec clés :
+          - model: str
+          - num_ctx: int
+          - prefill: str ou None
+          - temperature: float
+          - num_predict: int
+          - think: bool (False pour qwen3 et qwen3.5)
+          - keep_alive: str
+          - fallback_model: str
+    """
+    model = os.environ.get("RPA_GROUNDING_MODEL", DEFAULT_GROUNDING_MODEL).strip()
+    try:
+        num_ctx = int(os.environ.get("RPA_GROUNDING_CTX", str(DEFAULT_GROUNDING_CTX)))
+    except (TypeError, ValueError):
+        num_ctx = DEFAULT_GROUNDING_CTX
+    fallback = os.environ.get(
+        "RPA_GROUNDING_FALLBACK", DEFAULT_GROUNDING_FALLBACK
+    ).strip()
+    prefill_enabled = os.environ.get("RPA_VLM_PREFILL", "true").strip().lower() not in (
+        "0", "false", "no", "off"
+    )
+    prefill = DEFAULT_GROUNDING_PREFILL if prefill_enabled else None
+
+    # think=False obligatoire pour qwen3/qwen3.5 (prefill = mécanisme principal)
+    # et gemma4 (sinon tokens vides Ollama >=0.20).
+    think_false = is_thinking_model(model) or needs_think_false(model)
+
+    return {
+        "model": model,
+        "num_ctx": num_ctx,
+        "prefill": prefill,
+        "temperature": DEFAULT_GROUNDING_TEMPERATURE,
+        "num_predict": DEFAULT_GROUNDING_NUM_PREDICT,
+        "think": not think_false,  # API Ollama : think=False → on envoie False
+        "keep_alive": DEFAULT_GROUNDING_KEEP_ALIVE,
+        "fallback_model": fallback,
+    }
+
+
 def needs_think_false(model_name: str) -> bool:
    """Détermine si un modèle nécessite think=false dans le payload.

--- a/core/embedding/clip_embedder.py
+++ b/core/embedding/clip_embedder.py
@@ -59,8 +59,13 @@ class CLIPEmbedder(EmbedderBase):
            )
        
        if device is None:
+            # NOTE: utiliser le `torch` du scope module (l. 8). Un import local
+            # ici rendait `torch` LOCAL à __init__ pour tout le scope, faisant
+            # planter `with torch.no_grad():` plus bas en UnboundLocalError
+            # quand l'appelant passait device="cpu" (l'import local n'était
+            # alors pas exécuté). Voir inbox_codex/2026-05-25_1235_..._enquete-
+            # feedbackbus-5004.md.
            try:
-                import torch
                if torch.cuda.is_available():
                    free_vram = torch.cuda.mem_get_info()[0] / 1024**3
                    if free_vram > 1.5:
--- a/core/llm/init.py
+++ b/core/llm/init.py
@@ -6,7 +6,11 @@ from .t2a_decision import (
    analyze_dpi,
    build_dpi_enriched,
 )
-from .ocr_extractor import extract_table_from_image, extract_text_from_image
+from .ocr_extractor import (
+    extract_digits_tesseract_from_image,
+    extract_table_from_image,
+    extract_text_from_image,
+)

 __all__ = [
    "PROMPT_TEMPLATE",
@@ -15,4 +19,5 @@ __all__ = [
    "build_dpi_enriched",
    "extract_text_from_image",
    "extract_table_from_image",
+    "extract_digits_tesseract_from_image",
 ]
--- a/core/llm/ocr_extractor.py
+++ b/core/llm/ocr_extractor.py
@@ -1,6 +1,7 @@
 """Extracteur OCR — texte depuis une image (screenshot d'écran).

 Utilise EasyOCR fr+en. Singleton (chargement modèle ~3s au premier appel).
+Ajoute un chemin Tesseract spécialisé pour les chiffres/IPP d'écrans propres.

 Conçu pour le pipeline streaming serveur (actions `extract_text` /
 `extract_table`) : récupère un screenshot fresh (dernier heartbeat ou
@@ -11,6 +12,7 @@ pour analyse downstream (ex: t2a_decision, boucle sur N patients).
 from __future__ import annotations

 import logging
+import os
 import re
 from pathlib import Path
 from typing import List, Optional, Tuple
@@ -20,6 +22,19 @@ logger = logging.getLogger(__name__)
 _easyocr_reader = None


+def easyocr_gpu_enabled(default: bool = False) -> bool:
+    """Return whether EasyOCR may allocate GPU memory.
+
+    The replay server shares the GPU with Ollama. Defaulting EasyOCR to CPU
+    keeps VRAM available for the VLM; set RPA_EASYOCR_GPU=1 only for a measured
+    OCR benchmark or a runtime that has spare VRAM.
+    """
+    raw = os.getenv("RPA_EASYOCR_GPU", "")
+    if not raw:
+        return default
+    return raw.strip().lower() in {"1", "true", "yes", "on"}
+
+
 def _get_reader():
    """Initialise EasyOCR fr+en au premier appel (singleton, CPU forcé).

@@ -29,8 +44,9 @@ def _get_reader():
    global _easyocr_reader
    if _easyocr_reader is None:
        import easyocr
-        _easyocr_reader = easyocr.Reader(['fr', 'en'], gpu=False, verbose=False)
-        logger.info("EasyOCR initialisé (fr+en, CPU)")
+        gpu = easyocr_gpu_enabled(default=False)
+        _easyocr_reader = easyocr.Reader(['fr', 'en'], gpu=gpu, verbose=False)
+        logger.info("EasyOCR initialisé (fr+en, %s)", "GPU" if gpu else "CPU")
    return _easyocr_reader


@@ -73,17 +89,86 @@ def extract_text_from_image(
        return ""


+def extract_digits_tesseract_from_image(
+    image_path: str,
+    region: Optional[Tuple[int, int, int, int]] = None,
+    pattern: Optional[str] = None,
+    limit: Optional[int] = None,
+    psm: int = 6,
+    lang: str = "eng",
+    whitelist: str = "0123456789",
+) -> List[str]:
+    """Extrait des valeurs numeriques via Tesseract.
+
+    Cas d'usage principal : IPP/champs chiffres dans des tableaux d'écran.
+    Ce chemin est volontairement explicite pour ne pas changer le comportement
+    EasyOCR general utilise par `extract_text`.
+
+    Args:
+        image_path: chemin du PNG/JPG sur disque.
+        region: (x, y, w, h) pour cropper avant OCR. None = image entière.
+        pattern: regex Python appliquee aux sequences de chiffres extraites.
+                 Exemple IPP : r"^25\\d{6}$".
+        limit: nombre maximal de valeurs retournees.
+        psm: page segmentation mode Tesseract. 6 = bloc uniforme de texte.
+        lang: langue Tesseract.
+        whitelist: caracteres autorises. Par defaut chiffres uniquement.
+
+    Returns:
+        Liste de sequences numeriques dans l'ordre de lecture Tesseract.
+        En cas d'erreur, retourne une liste vide et log un warning.
+    """
+    path = Path(image_path)
+    if not path.exists():
+        logger.warning("extract_digits_tesseract: fichier introuvable %s", image_path)
+        return []
+
+    try:
+        from PIL import Image
+        import pytesseract
+
+        with Image.open(path) as img:
+            if region:
+                x, y, w, h = region
+                img = img.crop((x, y, x + w, y + h))
+            if img.mode not in {"L", "RGB"}:
+                img = img.convert("RGB")
+
+            config_parts = ["--psm", str(psm)]
+            if whitelist:
+                config_parts.extend(["-c", f"tessedit_char_whitelist={whitelist}"])
+            text = pytesseract.image_to_string(
+                img,
+                lang=lang,
+                config=" ".join(config_parts),
+            )
+
+        values = re.findall(r"\d+", text)
+        if pattern:
+            compiled = re.compile(pattern)
+            values = [v for v in values if compiled.match(v)]
+        if limit:
+            values = values[:limit]
+        return values
+    except Exception as e:
+        logger.warning("extract_digits_tesseract échoué sur %s : %s", image_path, e)
+        return []
+
+
 def extract_table_from_image(
    image_path: str,
    region: Optional[Tuple[int, int, int, int]] = None,
    pattern: Optional[str] = None,
    limit: Optional[int] = None,
+    engine: str = "easyocr",
 ) -> List[str]:
    """Extrait une liste de valeurs d'un tableau via OCR.

    Cas d'usage principal : lire la liste des IPP d'un tableau de patients
-    pour boucler dessus. EasyOCR retourne tous les tokens avec leur bbox,
-    on filtre par regex puis on trie par position (y croissant).
+    pour boucler dessus. Par défaut, EasyOCR retourne tous les tokens avec
+    leur bbox, on filtre par regex puis on trie par position (y croissant).
+    Pour des champs chiffres/IPP, `engine="tesseract"` active le chemin
+    spécialisé Tesseract validé sur captures Easily.

    Args:
        image_path: chemin du PNG sur disque.
@@ -92,6 +177,7 @@ def extract_table_from_image(
                 Si None : tous les tokens non vides sont retournés.
                 Exemple IPP : r"^\\d{8,10}$" ou r"^25\\d{6}$"
        limit: nombre maximal d'entrées à retourner (None = sans limite).
+        engine: "easyocr" (defaut) ou "tesseract" / "digits" / "ipp".

    Returns:
        Liste de strings dans l'ordre top → bottom (par y de bbox).
@@ -102,6 +188,15 @@ def extract_table_from_image(
        logger.warning("extract_table: fichier introuvable %s", image_path)
        return []

+    engine_name = (engine or "easyocr").strip().lower()
+    if engine_name in {"tesseract", "digits", "ipp"}:
+        return extract_digits_tesseract_from_image(
+            image_path,
+            region=region,
+            pattern=pattern,
+            limit=limit,
+        )
+
    try:
        from PIL import Image
        import numpy as np
--- a/core/pipeline/workflow_pipeline.py
+++ b/core/pipeline/workflow_pipeline.py
@@ -99,10 +99,17 @@ class WorkflowPipeline:
        logger.info("✓ Fusion Engine initialized")
        
        # 3. State Embedding Builder
+        clip_embedders = {
+            "image": self.clip_embedder,
+            "text": self.clip_embedder,
+            "title": self.clip_embedder,
+            "ui": self.clip_embedder,
+        }
        self.embedding_builder = StateEmbeddingBuilder(
            fusion_engine=self.fusion_engine,
+            embedders=clip_embedders,
            output_dir=self.embeddings_dir,
-            use_clip=True
+            use_clip=False
        )
        logger.info("✓ State Embedding Builder initialized")
        
--- a/core/semantic/init.py
+++ b/core/semantic/init.py
@@ -0,0 +1,38 @@
+"""Phase 2.5 — Analyse sémantique post-apprentissage.
+
+Module dédié à l'analyse sémantique des écrans capturés en phase Shadow,
+**après** ``/api/v1/shadow/stop`` et **avant** restitution Option C.
+
+Specs : ``docs/POC/SPECS_PHASE_25_SEMANTIQUE_2026-06-01.md``
+
+Principes (arbitrage Plato 2026-06-01) :
+- Post-apprentissage uniquement, **jamais en hot path replay**.
+- OmniParser encapsulé derrière garde-fou anti-fragilité.
+- Fallback OCR-seul (docTR) systématique en cas d'exception.
+- Stockage ``.semantic.yaml`` séparé du YAML compétence principal.
+- Opt-in par compétence (rétrocompat totale).
+"""
+
+from .phase25_analyzer import (
+    Phase25Analyzer,
+    Phase25Result,
+    ScreenAnalysis,
+    SemanticStructure,
+    SEMANTIC_DIR,
+    OMNIPARSER_CACHE_DIR,
+    OMNIPARSER_ERROR_LOG,
+    PHASH_HAMMING_THRESHOLD,
+    MAX_SCREENS_PER_SESSION,
+)
+
+__all__ = [
+    "Phase25Analyzer",
+    "Phase25Result",
+    "ScreenAnalysis",
+    "SemanticStructure",
+    "SEMANTIC_DIR",
+    "OMNIPARSER_CACHE_DIR",
+    "OMNIPARSER_ERROR_LOG",
+    "PHASH_HAMMING_THRESHOLD",
+    "MAX_SCREENS_PER_SESSION",
+]
--- a/core/semantic/phase25_analyzer.py
+++ b/core/semantic/phase25_analyzer.py
@@ -0,0 +1,920 @@
+"""Phase 2.5 — Analyseur sémantique post-apprentissage.
+
+Module isolé qui prend en entrée un ensemble de screenshots capturés
+pendant la phase Shadow et produit un payload structuré
+``{tables, forms, buttons, text_blocks}`` par écran distinct,
+stocké dans un fichier ``.semantic.yaml`` séparé.
+
+Specs : ``docs/POC/SPECS_PHASE_25_SEMANTIQUE_2026-06-01.md``
+
+Garde-fous :
+- Wrapper try/except global autour de chaque appel OmniParser.
+- Fallback OCR-seul (docTR) si OmniParser indisponible ou KO.
+- Healthcheck OmniParser au démarrage : KO ⇒ bascule auto en dégradé.
+- Cache disque ``data/cache/omniparser/<session>/<index>.json``.
+- Cap 10 écrans distincts par session.
+- Aucun import de FastAPI, aucun appel réseau direct.
+"""
+
+from __future__ import annotations
+
+import concurrent.futures
+import hashlib
+import io
+import json
+import logging
+import re
+import time
+import traceback
+from dataclasses import asdict, dataclass, field
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Iterable, List, Optional, Sequence, Tuple
+
+try:  # pragma: no cover - dépendance externe déjà présente dans le projet
+    import yaml
+except ImportError as exc:  # pragma: no cover
+    raise RuntimeError("PyYAML est requis pour core.semantic.phase25_analyzer") from exc
+
+try:  # PIL toujours présent côté Linux dev / DGX
+    from PIL import Image
+    _HAS_PIL = True
+except ImportError:  # pragma: no cover
+    Image = None  # type: ignore[assignment]
+    _HAS_PIL = False
+
+try:
+    import imagehash  # type: ignore
+    _HAS_IMAGEHASH = True
+except ImportError:  # pragma: no cover - fallback MD5 thumbnail
+    imagehash = None  # type: ignore[assignment]
+    _HAS_IMAGEHASH = False
+
+
+logger = logging.getLogger(__name__)
+
+
+# ----------------------------------------------------------------------------
+# Constantes et chemins
+# ----------------------------------------------------------------------------
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+DATA_ROOT = REPO_ROOT / "data"
+SEMANTIC_DIR = DATA_ROOT / "competences" / "candidate"
+OMNIPARSER_CACHE_ROOT = DATA_ROOT / "cache" / "omniparser"
+OMNIPARSER_CACHE_DIR = OMNIPARSER_CACHE_ROOT  # alias public
+LOGS_DIR = REPO_ROOT / "logs"
+OMNIPARSER_ERROR_LOG = LOGS_DIR / "omniparser_errors.log"
+
+# Heuristique de regroupement perceptuel (cf. specs §3).
+PHASH_HAMMING_THRESHOLD = 8
+MAX_SCREENS_PER_SESSION = 10
+THUMBNAIL_SIZE = (256, 256)  # fallback MD5
+
+# Timeout par screenshot (cf. specs §2).
+OMNIPARSER_TIMEOUT_SEC = 30.0
+
+# Slug autorisé (réutilisation du pattern persist : a-z0-9_).
+SLUG_PATTERN = re.compile(r"^[a-z][a-z0-9_]{2,79}$")
+# session_id autorisé : caractères inoffensifs uniquement.
+SESSION_ID_PATTERN = re.compile(r"^[A-Za-z0-9][A-Za-z0-9_\-]{0,127}$")
+
+
+# ----------------------------------------------------------------------------
+# Dataclasses
+# ----------------------------------------------------------------------------
+
+
+@dataclass
+class SemanticStructure:
+    """Structure sémantique d'un écran (cf. specs §2)."""
+
+    tables: List[dict] = field(default_factory=list)
+    forms: List[dict] = field(default_factory=list)
+    buttons: List[dict] = field(default_factory=list)
+    text_blocks: List[dict] = field(default_factory=list)
+
+    def to_dict(self) -> dict:
+        return {
+            "tables": list(self.tables),
+            "forms": list(self.forms),
+            "buttons": list(self.buttons),
+            "text_blocks": list(self.text_blocks),
+        }
+
+
+@dataclass
+class ScreenAnalysis:
+    """Analyse d'un écran représentatif (cf. specs §3)."""
+
+    index: int
+    phash: str
+    screen_id: str
+    screenshot_path: Optional[str]
+    structure: SemanticStructure
+    degraded: bool = False
+    degraded_reason: Optional[str] = None
+    elapsed_sec: float = 0.0
+    window_title: Optional[str] = None
+    # Snapshot "contrat Codex" : représentation aplatie destinée à
+    # l'agent-chat / dashboard. Calculée à la volée par to_dict().
+
+    def to_dict(self) -> dict:
+        elements = _structure_to_elements(self.structure)
+        return {
+            "index": self.index,
+            "hash": self.phash,
+            "screen_id": self.screen_id,
+            "window_title": self.window_title,
+            "screenshot_path": self.screenshot_path,
+            "structure": self.structure.to_dict(),
+            "elements": elements,
+            "degraded": self.degraded,
+            "degraded_reason": self.degraded_reason,
+            "elapsed_sec": round(self.elapsed_sec, 3),
+        }
+
+
+@dataclass
+class Phase25Result:
+    """Résultat global d'une analyse Phase 2.5."""
+
+    session_id: str
+    generated_at: str
+    omniparser_available: bool
+    degraded: bool
+    too_complex: bool
+    screens: List[ScreenAnalysis] = field(default_factory=list)
+    healthcheck_passed: bool = True
+    healthcheck_reason: Optional[str] = None
+
+    def to_dict(self) -> dict:
+        return {
+            "session_id": self.session_id,
+            "generated_at": self.generated_at,
+            "omniparser_available": self.omniparser_available,
+            "degraded": self.degraded,
+            "too_complex": self.too_complex,
+            "healthcheck_passed": self.healthcheck_passed,
+            "healthcheck_reason": self.healthcheck_reason,
+            "screens": [s.to_dict() for s in self.screens],
+        }
+
+
+# ----------------------------------------------------------------------------
+# Helpers : validation et FS
+# ----------------------------------------------------------------------------
+
+
+def _validate_session_id(session_id: Any) -> str:
+    if not isinstance(session_id, str) or not session_id.strip():
+        raise ValueError("session_id doit etre une chaine non vide")
+    sid = session_id.strip()
+    if not SESSION_ID_PATTERN.match(sid):
+        raise ValueError(
+            "session_id invalide (autorise : [A-Za-z0-9][A-Za-z0-9_-]{0,127})"
+        )
+    # Anti path-traversal de ceinture-bretelles : on refuse explicitement
+    # toute tentative ../ même si le regex ne devrait pas la laisser passer.
+    if ".." in sid or "/" in sid or "\\" in sid:
+        raise ValueError("session_id invalide (path-traversal interdit)")
+    return sid
+
+
+def _validate_slug(slug: Any) -> str:
+    if not isinstance(slug, str):
+        raise ValueError("slug doit etre une chaine")
+    s = slug.strip()
+    if not SLUG_PATTERN.match(s):
+        raise ValueError(
+            f"slug invalide '{s}' (regle : {SLUG_PATTERN.pattern})"
+        )
+    return s
+
+
+def _ensure_dir(path: Path) -> None:
+    path.mkdir(parents=True, exist_ok=True)
+
+
+def _log_omniparser_error(session_id: str, frame_index: int, exc: BaseException) -> None:
+    """Append-only sur ``logs/omniparser_errors.log`` (cf. specs §7)."""
+    try:
+        _ensure_dir(LOGS_DIR)
+        entry = {
+            "timestamp": datetime.now(timezone.utc).isoformat(),
+            "session_id": session_id,
+            "frame_index": frame_index,
+            "error_type": type(exc).__name__,
+            "error_message": str(exc),
+            "traceback": traceback.format_exception_only(type(exc), exc),
+        }
+        with OMNIPARSER_ERROR_LOG.open("a", encoding="utf-8") as fh:
+            fh.write(json.dumps(entry, ensure_ascii=False) + "\n")
+    except OSError as log_exc:  # pragma: no cover - log best-effort
+        logger.warning("[PHASE25] echec ecriture omniparser_errors.log : %s", log_exc)
+
+
+# ----------------------------------------------------------------------------
+# Hash perceptuel (avec fallback MD5)
+# ----------------------------------------------------------------------------
+
+
+def compute_phash(image: "Image.Image") -> str:
+    """Calcule un hash perceptuel ou un hash MD5 thumbnail (fallback)."""
+    if _HAS_IMAGEHASH and imagehash is not None:
+        try:
+            return str(imagehash.phash(image))
+        except Exception as exc:  # pragma: no cover
+            logger.warning("[PHASE25] phash imagehash KO, fallback MD5 : %s", exc)
+    # Fallback MD5 sur thumbnail.
+    thumb = image.copy()
+    thumb.thumbnail(THUMBNAIL_SIZE)
+    buf = io.BytesIO()
+    thumb.convert("RGB").save(buf, format="PNG")
+    return "md5:" + hashlib.md5(buf.getvalue()).hexdigest()
+
+
+def _hamming_distance(h1: str, h2: str) -> int:
+    """Distance de Hamming entre deux phash imagehash, ou fallback MD5.
+
+    - Cas imagehash : on reconvertit via ``imagehash.hex_to_hash``.
+    - Cas MD5 (préfixe ``md5:``) : 0 si égal, sinon distance "haute" pour ne
+      jamais les considérer comme similaires (heuristique conservative).
+    """
+    if h1.startswith("md5:") or h2.startswith("md5:"):
+        return 0 if h1 == h2 else PHASH_HAMMING_THRESHOLD + 1
+    if not _HAS_IMAGEHASH or imagehash is None:
+        # Pas d'imagehash mais les hashes hex présents (rare) : XOR brut.
+        try:
+            i1 = int(h1, 16)
+            i2 = int(h2, 16)
+            return bin(i1 ^ i2).count("1")
+        except ValueError:
+            return PHASH_HAMMING_THRESHOLD + 1
+    try:
+        return abs(imagehash.hex_to_hash(h1) - imagehash.hex_to_hash(h2))
+    except Exception:
+        return PHASH_HAMMING_THRESHOLD + 1
+
+
+def identify_distinct_screens(
+    frames: Sequence[Tuple[int, "Image.Image"]],
+    threshold: int = PHASH_HAMMING_THRESHOLD,
+) -> List[Tuple[int, "Image.Image", str]]:
+    """Regroupe les frames par similarité phash et retourne un représentant par groupe.
+
+    Args:
+        frames: séquence ``(frame_index, PIL.Image)``.
+        threshold: Hamming distance max pour considérer deux frames identiques.
+
+    Returns:
+        Liste ``(frame_index, image, phash)`` — un représentant par groupe,
+        dans l'ordre temporel d'apparition (premier vu = représentant).
+    """
+    representatives: List[Tuple[int, Image.Image, str]] = []
+    for idx, img in frames:
+        h = compute_phash(img)
+        matched = False
+        for ridx, _rimg, rhash in representatives:
+            if _hamming_distance(h, rhash) <= threshold:
+                matched = True
+                logger.debug(
+                    "[PHASE25] frame %d regroupee avec representant %d (phash=%s)",
+                    idx, ridx, h,
+                )
+                break
+        if not matched:
+            representatives.append((idx, img, h))
+    return representatives
+
+
+# ----------------------------------------------------------------------------
+# Conversion structure ⇄ "elements" (contrat Codex)
+# ----------------------------------------------------------------------------
+
+
+def _structure_to_elements(struct: SemanticStructure) -> List[dict]:
+    """Aplatissement structure -> liste d'éléments {kind, label, bbox, confidence}."""
+    elements: List[dict] = []
+    for tbl in struct.tables:
+        elements.append({
+            "kind": "table",
+            "label": tbl.get("label", "table"),
+            "bbox": tbl.get("bbox", []),
+            "confidence": float(tbl.get("confidence", 0.5)),
+        })
+    for frm in struct.forms:
+        elements.append({
+            "kind": "field",
+            "label": frm.get("label", "field"),
+            "bbox": frm.get("bbox", []),
+            "confidence": float(frm.get("confidence", 0.5)),
+        })
+    for btn in struct.buttons:
+        elements.append({
+            "kind": "button",
+            "label": btn.get("label", "button"),
+            "bbox": btn.get("bbox", []),
+            "confidence": float(btn.get("confidence", 0.5)),
+        })
+    for tb in struct.text_blocks:
+        elements.append({
+            "kind": "text_block",
+            "label": tb.get("label", tb.get("text", "")),
+            "bbox": tb.get("bbox", []),
+            "confidence": float(tb.get("confidence", 0.5)),
+        })
+    return elements
+
+
+def _classify_element(label: str, kind_hint: str | None = None) -> str:
+    """Heuristique de classification d'un élément OmniParser.
+
+    Cohérente avec ``OmniParserAdapter._classify_element``, mais retourne
+    nos catégories sémantiques : ``table | field | button | text_block``.
+    """
+    lab = (label or "").lower()
+    if kind_hint:
+        kh = kind_hint.lower()
+        if "table" in kh:
+            return "table"
+        if "input" in kh or "field" in kh or "edit" in kh:
+            return "field"
+        if "button" in kh or "btn" in kh:
+            return "button"
+    if any(kw in lab for kw in ("button", "btn", "submit", "valider", "annuler", "ok", "close")):
+        return "button"
+    if any(kw in lab for kw in ("input", "field", "saisie", "textbox", "champ")):
+        return "field"
+    if "table" in lab or "grille" in lab:
+        return "table"
+    return "text_block"
+
+
+# ----------------------------------------------------------------------------
+# Adapter wrappers : OmniParser et docTR (fallback)
+# ----------------------------------------------------------------------------
+
+
+class _OmniParserSafeWrapper:
+    """Wrap fragile OmniParserAdapter avec garde-fou anti-exception.
+
+    - Import paresseux (lazy) pour ne pas casser l'import du module si
+      OmniParser n'est pas installé.
+    - ``available=False`` ⇒ caller bascule en fallback OCR-seul.
+    - Timeout effectif appliqué autour de chaque appel ``detect`` via
+      ``ThreadPoolExecutor`` + ``future.result(timeout=...)``.
+    """
+
+    # Executor module-level pour ne pas créer un pool par appel.
+    _TIMEOUT_EXECUTOR: Optional[concurrent.futures.ThreadPoolExecutor] = None
+
+    @classmethod
+    def _get_executor(cls) -> concurrent.futures.ThreadPoolExecutor:
+        if cls._TIMEOUT_EXECUTOR is None:
+            cls._TIMEOUT_EXECUTOR = concurrent.futures.ThreadPoolExecutor(
+                max_workers=2, thread_name_prefix="phase25-omniparser-timeout",
+            )
+        return cls._TIMEOUT_EXECUTOR
+
+    def __init__(self) -> None:
+        self._adapter: Any = None
+        self._available: bool = False
+        self._import_error: Optional[str] = None
+        self._try_import()
+
+    def _try_import(self) -> None:
+        try:
+            from core.detection.omniparser_adapter import OmniParserAdapter  # type: ignore
+            self._adapter = OmniParserAdapter()
+            self._available = bool(getattr(self._adapter, "available", False))
+            if not self._available:
+                # L'adapter existe mais le check de disponibilité a échoué.
+                self._import_error = "OmniParser adapter installé mais modèles non disponibles"
+        except Exception as exc:
+            self._adapter = None
+            self._available = False
+            self._import_error = f"{type(exc).__name__}: {exc}"
+
+    @property
+    def available(self) -> bool:
+        return self._available
+
+    @property
+    def import_error(self) -> Optional[str]:
+        return self._import_error
+
+    def detect(
+        self,
+        image: "Image.Image",
+        *,
+        timeout: Optional[float] = None,
+    ) -> List[Any]:
+        """Appel sécurisé : enrobé d'un timeout dur, lève en cas d'exception.
+
+        Args:
+            image: image PIL à analyser.
+            timeout: timeout en secondes (défaut : ``OMNIPARSER_TIMEOUT_SEC``).
+                Si dépassé ⇒ ``concurrent.futures.TimeoutError`` propagée au
+                caller, qui bascule en fallback docTR + ``degraded=True``.
+        """
+        if not self._available or self._adapter is None:
+            return []
+        effective_timeout = (
+            timeout if timeout is not None else OMNIPARSER_TIMEOUT_SEC
+        )
+        executor = self._get_executor()
+        future = executor.submit(self._adapter.detect, image)
+        try:
+            return list(future.result(timeout=effective_timeout))
+        except concurrent.futures.TimeoutError as exc:
+            # Le thread OmniParser continue son travail en arrière-plan mais
+            # le résultat est ignoré ; le caller bascule en fallback docTR.
+            logger.warning(
+                "[PHASE25] OmniParser.detect timeout (%.1fs) -> fallback",
+                effective_timeout,
+            )
+            raise
+        except Exception as exc:
+            logger.warning("[PHASE25] OmniParser.detect KO : %s", exc)
+            raise  # remonté au caller pour log + fallback
+
+
+def _detect_via_omniparser(
+    wrapper: _OmniParserSafeWrapper,
+    image: "Image.Image",
+    *,
+    timeout: Optional[float] = None,
+) -> List[Any]:
+    return wrapper.detect(image, timeout=timeout)
+
+
+def _detect_via_doctr(image: "Image.Image", screenshot_path: Optional[str]) -> List[dict]:
+    """Fallback OCR-seul (docTR). Retourne une liste de text_blocks bruts.
+
+    Aucun VLM, aucune classification fine — juste OCR ⇒ ``text_blocks``.
+    """
+    if not _HAS_PIL or image is None:
+        return []
+    try:
+        from doctr.io import DocumentFile  # type: ignore
+        from doctr.models import ocr_predictor  # type: ignore
+    except ImportError:
+        logger.info("[PHASE25] docTR non disponible pour fallback OCR")
+        return []
+
+    # Cache predictor module-level pour éviter rechargement.
+    global _DOCTR_PREDICTOR
+    try:
+        _DOCTR_PREDICTOR  # type: ignore[used-before-def]
+    except NameError:
+        _DOCTR_PREDICTOR = None  # type: ignore[assignment]
+
+    try:
+        if _DOCTR_PREDICTOR is None:  # type: ignore[has-type]
+            _DOCTR_PREDICTOR = ocr_predictor(  # type: ignore[assignment]
+                det_arch="db_resnet50", reco_arch="crnn_vgg16_bn", pretrained=True,
+            )
+    except Exception as exc:  # pragma: no cover
+        logger.warning("[PHASE25] docTR init KO : %s", exc)
+        return []
+
+    # docTR prend un fichier ou un array numpy ; on privilégie le chemin si fourni.
+    blocks: List[dict] = []
+    try:
+        if screenshot_path and Path(screenshot_path).exists():
+            doc = DocumentFile.from_images([screenshot_path])
+        else:
+            buf = io.BytesIO()
+            image.convert("RGB").save(buf, format="PNG")
+            buf.seek(0)
+            doc = DocumentFile.from_images([buf.getvalue()])
+        result = _DOCTR_PREDICTOR(doc)  # type: ignore[misc]
+        W, H = image.size
+        for page in result.pages:
+            for block in page.blocks:
+                for line_obj in block.lines:
+                    text = " ".join(w.value for w in line_obj.words).strip()
+                    if not text:
+                        continue
+                    geom = line_obj.geometry  # ((x1,y1), (x2,y2)) norm 0-1
+                    x1 = int(geom[0][0] * W)
+                    y1 = int(geom[0][1] * H)
+                    x2 = int(geom[1][0] * W)
+                    y2 = int(geom[1][1] * H)
+                    blocks.append({
+                        "label": text,
+                        "text": text,
+                        "bbox": [x1, y1, x2, y2],
+                        "confidence": 0.6,  # docTR ne donne pas de score line-level facilement
+                    })
+    except Exception as exc:  # pragma: no cover
+        logger.warning("[PHASE25] docTR predict KO : %s", exc)
+        return []
+
+    return blocks
+
+
+def _elements_to_structure(elements: Iterable[Any]) -> SemanticStructure:
+    """Convertit la liste OmniParser ``DetectedElement`` en SemanticStructure."""
+    struct = SemanticStructure()
+    for el in elements:
+        # Compatible avec DetectedElement (dataclass) et dict.
+        if hasattr(el, "label"):
+            label = getattr(el, "label", "") or ""
+            bbox = list(getattr(el, "bbox", ()) or ())
+            conf = float(getattr(el, "confidence", 0.5) or 0.5)
+            kind_hint = getattr(el, "element_type", None)
+        elif isinstance(el, dict):
+            label = str(el.get("label") or el.get("text") or "")
+            bbox = list(el.get("bbox") or [])
+            conf = float(el.get("confidence", el.get("score", 0.5)) or 0.5)
+            kind_hint = el.get("element_type") or el.get("type")
+        else:
+            continue
+
+        kind = _classify_element(label, kind_hint)
+        entry = {"label": label, "bbox": bbox, "confidence": conf}
+        if kind == "table":
+            struct.tables.append(entry)
+        elif kind == "field":
+            struct.forms.append(entry)
+        elif kind == "button":
+            struct.buttons.append(entry)
+        else:
+            struct.text_blocks.append({**entry, "text": label})
+    return struct
+
+
+# ----------------------------------------------------------------------------
+# Cache disque
+# ----------------------------------------------------------------------------
+
+
+def _cache_path(session_id: str, frame_index: int) -> Path:
+    sid = _validate_session_id(session_id)
+    return OMNIPARSER_CACHE_ROOT / sid / f"{int(frame_index)}.json"
+
+
+def _cache_read(session_id: str, frame_index: int) -> Optional[dict]:
+    path = _cache_path(session_id, frame_index)
+    if not path.exists():
+        return None
+    try:
+        with path.open("r", encoding="utf-8") as fh:
+            return json.load(fh)
+    except (OSError, json.JSONDecodeError) as exc:
+        logger.warning("[PHASE25] cache illisible %s : %s", path, exc)
+        return None
+
+
+def _cache_write(session_id: str, frame_index: int, payload: dict) -> None:
+    path = _cache_path(session_id, frame_index)
+    try:
+        _ensure_dir(path.parent)
+        tmp = path.with_suffix(".json.tmp")
+        with tmp.open("w", encoding="utf-8") as fh:
+            json.dump(payload, fh, ensure_ascii=False, indent=2)
+        tmp.replace(path)
+    except OSError as exc:  # pragma: no cover
+        logger.warning("[PHASE25] cache ecriture KO %s : %s", path, exc)
+
+
+# ----------------------------------------------------------------------------
+# Analyseur principal
+# ----------------------------------------------------------------------------
+
+
+class Phase25Analyzer:
+    """Analyseur sémantique post-apprentissage.
+
+    Usage minimal :
+
+        analyzer = Phase25Analyzer(session_id="abc123")
+        result = analyzer.analyze_frames(frames=[(0, img0), (12, img12), ...])
+        path = analyzer.write_semantic_yaml(result, slug="ma_competence")
+
+    ``frames`` est une séquence ``(frame_index, PIL.Image[, screenshot_path])``.
+    """
+
+    def __init__(
+        self,
+        session_id: str,
+        *,
+        omniparser: Optional[_OmniParserSafeWrapper] = None,
+        max_screens: int = MAX_SCREENS_PER_SESSION,
+        timeout_sec: float = OMNIPARSER_TIMEOUT_SEC,
+    ) -> None:
+        self.session_id = _validate_session_id(session_id)
+        self.omniparser = omniparser if omniparser is not None else _OmniParserSafeWrapper()
+        self.max_screens = max_screens
+        self.timeout_sec = timeout_sec
+        self._healthcheck_passed = True
+        self._healthcheck_reason: Optional[str] = None
+
+    # -- healthcheck -------------------------------------------------------
+
+    def healthcheck(self) -> bool:
+        """Vérifie qu'OmniParser répond sur une image bidon (cf. specs §7).
+
+        - Si l'adapter est ``available=False`` ⇒ healthcheck KO (mais on
+          continuera quand même en mode dégradé OCR-seul).
+        - Si l'adapter lève une exception ⇒ KO + log dédié.
+        """
+        if not _HAS_PIL:
+            self._healthcheck_passed = False
+            self._healthcheck_reason = "PIL indisponible"
+            return False
+        if not self.omniparser.available:
+            self._healthcheck_passed = False
+            self._healthcheck_reason = (
+                self.omniparser.import_error or "OmniParser indisponible"
+            )
+            return False
+        try:
+            dummy = Image.new("RGB", (64, 64), color=(255, 255, 255))
+            _ = self.omniparser.detect(dummy, timeout=self.timeout_sec)
+            self._healthcheck_passed = True
+            self._healthcheck_reason = None
+            return True
+        except Exception as exc:
+            _log_omniparser_error(self.session_id, -1, exc)
+            self._healthcheck_passed = False
+            self._healthcheck_reason = f"{type(exc).__name__}: {exc}"
+            return False
+
+    # -- analyse écran ----------------------------------------------------
+
+    def analyze_screen(
+        self,
+        frame_index: int,
+        image: "Image.Image",
+        phash: str,
+        *,
+        screenshot_path: Optional[str] = None,
+        window_title: Optional[str] = None,
+        force_fallback: bool = False,
+    ) -> ScreenAnalysis:
+        """Analyse un écran représentatif.
+
+        Stratégie :
+        1. Cache disque (idempotence par session_id+frame_index).
+        2. OmniParser via wrapper safe → sinon fallback OCR-seul docTR.
+        3. Exception ⇒ log dédié + ``degraded=True`` + structure docTR.
+        """
+        # 1. Cache
+        cached = _cache_read(self.session_id, frame_index)
+        if cached is not None:
+            struct = SemanticStructure(
+                tables=cached.get("structure", {}).get("tables", []),
+                forms=cached.get("structure", {}).get("forms", []),
+                buttons=cached.get("structure", {}).get("buttons", []),
+                text_blocks=cached.get("structure", {}).get("text_blocks", []),
+            )
+            return ScreenAnalysis(
+                index=frame_index,
+                phash=cached.get("phash", phash),
+                screen_id=cached.get("screen_id", f"screen_{frame_index:03d}"),
+                screenshot_path=cached.get("screenshot_path", screenshot_path),
+                structure=struct,
+                degraded=bool(cached.get("degraded", False)),
+                degraded_reason=cached.get("degraded_reason"),
+                elapsed_sec=float(cached.get("elapsed_sec", 0.0)),
+                window_title=cached.get("window_title", window_title),
+            )
+
+        t0 = time.monotonic()
+        degraded = False
+        degraded_reason: Optional[str] = None
+        structure: SemanticStructure
+
+        use_omniparser = self.omniparser.available and not force_fallback
+        if use_omniparser:
+            try:
+                elements = _detect_via_omniparser(
+                    self.omniparser, image, timeout=self.timeout_sec,
+                )
+                structure = _elements_to_structure(elements)
+                if not (structure.tables or structure.forms or structure.buttons or structure.text_blocks):
+                    # OmniParser n'a rien produit : on ajoute en complément docTR text_blocks.
+                    blocks = _detect_via_doctr(image, screenshot_path)
+                    structure.text_blocks.extend(blocks)
+            except Exception as exc:
+                _log_omniparser_error(self.session_id, frame_index, exc)
+                degraded = True
+                degraded_reason = f"omniparser_exception: {type(exc).__name__}"
+                blocks = _detect_via_doctr(image, screenshot_path)
+                structure = SemanticStructure(text_blocks=blocks)
+        else:
+            degraded = True
+            degraded_reason = (
+                "omniparser_unavailable: " + (self.omniparser.import_error or "n/a")
+                if not self.omniparser.available
+                else "forced_fallback"
+            )
+            blocks = _detect_via_doctr(image, screenshot_path)
+            structure = SemanticStructure(text_blocks=blocks)
+
+        elapsed = time.monotonic() - t0
+        analysis = ScreenAnalysis(
+            index=frame_index,
+            phash=phash,
+            screen_id=f"screen_{frame_index:03d}",
+            screenshot_path=screenshot_path,
+            structure=structure,
+            degraded=degraded,
+            degraded_reason=degraded_reason,
+            elapsed_sec=elapsed,
+            window_title=window_title,
+        )
+
+        # Cache écriture (best-effort).
+        _cache_write(self.session_id, frame_index, analysis.to_dict())
+        return analysis
+
+    # -- pipeline complet -------------------------------------------------
+
+    def analyze_frames(
+        self,
+        frames: Sequence[Tuple[int, "Image.Image"]],
+        *,
+        screenshot_paths: Optional[dict[int, str]] = None,
+        window_titles: Optional[dict[int, str]] = None,
+        run_healthcheck: bool = True,
+    ) -> Phase25Result:
+        """Pipeline complet : grouping phash → analyse → cap → résultat.
+
+        Args:
+            frames: liste ``(frame_index, PIL.Image)``.
+            screenshot_paths: mapping ``frame_index -> path`` (optionnel).
+            window_titles: mapping ``frame_index -> window_title`` (optionnel).
+            run_healthcheck: lancer le healthcheck OmniParser avant analyse.
+
+        Returns:
+            ``Phase25Result`` avec ``too_complex=True`` si > max_screens.
+        """
+        if not _HAS_PIL:
+            raise RuntimeError("PIL est requis pour Phase25Analyzer.analyze_frames")
+
+        if run_healthcheck:
+            self.healthcheck()
+            if not self._healthcheck_passed:
+                logger.warning(
+                    "[PHASE25] healthcheck OmniParser KO (%s) -> mode degrade docTR",
+                    self._healthcheck_reason,
+                )
+
+        force_fallback = not self._healthcheck_passed
+
+        # 1. Regrouper par similarité perceptuelle.
+        reps = identify_distinct_screens(frames)
+
+        # 2. Cap MAX_SCREENS_PER_SESSION.
+        too_complex = len(reps) > self.max_screens
+        if too_complex:
+            logger.warning(
+                "[PHASE25] session %s : %d ecrans distincts > cap %d -> too_complex",
+                self.session_id, len(reps), self.max_screens,
+            )
+            reps = reps[: self.max_screens]
+
+        # 3. Analyser chaque représentant.
+        sp = screenshot_paths or {}
+        wt = window_titles or {}
+        screens: List[ScreenAnalysis] = []
+        any_degraded = False
+        for idx, img, phash in reps:
+            analysis = self.analyze_screen(
+                idx,
+                img,
+                phash,
+                screenshot_path=sp.get(idx),
+                window_title=wt.get(idx),
+                force_fallback=force_fallback,
+            )
+            screens.append(analysis)
+            any_degraded = any_degraded or analysis.degraded
+
+        return Phase25Result(
+            session_id=self.session_id,
+            generated_at=datetime.now(timezone.utc).isoformat(),
+            omniparser_available=self.omniparser.available and self._healthcheck_passed,
+            degraded=any_degraded or not self._healthcheck_passed,
+            too_complex=too_complex,
+            screens=screens,
+            healthcheck_passed=self._healthcheck_passed,
+            healthcheck_reason=self._healthcheck_reason,
+        )
+
+    # -- écriture YAML -----------------------------------------------------
+
+    def write_semantic_yaml(
+        self,
+        result: Phase25Result,
+        slug: str,
+        *,
+        target_dir: Optional[Path] = None,
+    ) -> Path:
+        """Écrit le ``.semantic.yaml`` à côté du YAML compétence candidate.
+
+        Args:
+            result: Résultat d'analyse Phase 2.5.
+            slug: slug compétence (validé contre SLUG_PATTERN).
+            target_dir: répertoire cible (défaut : ``data/competences/candidate/``).
+
+        Returns:
+            Path absolu du fichier écrit.
+
+        Raises:
+            ValueError: slug invalide.
+            OSError: écriture impossible.
+        """
+        s = _validate_slug(slug)
+        out_dir = target_dir if target_dir is not None else SEMANTIC_DIR
+        out_dir = Path(out_dir)
+        _ensure_dir(out_dir)
+
+        # Anti écrasement supervised/stable : on refuse explicitement.
+        forbidden = {"supervised", "stable"}
+        if out_dir.name in forbidden:
+            raise ValueError(
+                f"target_dir interdit '{out_dir.name}' (autorise : candidate uniquement)"
+            )
+
+        payload = {
+            "competence_id": s,
+            "semantic_version": 1,
+            "generated_at": result.generated_at,
+            "session_id": result.session_id,
+            "omniparser_available": result.omniparser_available,
+            "degraded": result.degraded,
+            "too_complex": result.too_complex,
+            "healthcheck_passed": result.healthcheck_passed,
+            "healthcheck_reason": result.healthcheck_reason,
+            "screens": [],
+        }
+        for sc in result.screens:
+            payload["screens"].append({
+                "screen_id": sc.screen_id,
+                "phash": sc.phash,
+                "representative_frame_index": sc.index,
+                "screenshot_path": sc.screenshot_path,
+                "window_title": sc.window_title,
+                "degraded": sc.degraded,
+                "degraded_reason": sc.degraded_reason,
+                "elapsed_sec": round(sc.elapsed_sec, 3),
+                "structure": sc.structure.to_dict(),
+                "annotations": [],  # placeholder — annotation humaine ultérieure
+            })
+
+        target = out_dir / f"{s}.semantic.yaml"
+        tmp = target.with_suffix(".yaml.tmp")
+        with tmp.open("w", encoding="utf-8") as fh:
+            yaml.safe_dump(payload, fh, allow_unicode=True, sort_keys=False)
+        tmp.replace(target)
+        logger.info(
+            "[PHASE25] semantic yaml ecrit : %s (screens=%d, degraded=%s)",
+            target, len(result.screens), result.degraded,
+        )
+        return target
+
+
+# ----------------------------------------------------------------------------
+# Helpers utilitaires (chargement frames)
+# ----------------------------------------------------------------------------
+
+
+def load_frames_from_paths(paths_by_index: dict[int, str]) -> List[Tuple[int, "Image.Image"]]:
+    """Charge des images PIL à partir d'un mapping ``frame_index -> path``.
+
+    Ignore silencieusement les chemins inexistants (avec log warning).
+    """
+    if not _HAS_PIL:
+        raise RuntimeError("PIL est requis pour load_frames_from_paths")
+    frames: List[Tuple[int, Image.Image]] = []
+    for idx in sorted(paths_by_index.keys()):
+        p = paths_by_index[idx]
+        try:
+            img = Image.open(p)
+            img.load()
+            frames.append((int(idx), img))
+        except (FileNotFoundError, OSError) as exc:
+            logger.warning("[PHASE25] frame %d illisible (%s) : %s", idx, p, exc)
+    return frames
+
+
+__all__ = [
+    "Phase25Analyzer",
+    "Phase25Result",
+    "ScreenAnalysis",
+    "SemanticStructure",
+    "SEMANTIC_DIR",
+    "OMNIPARSER_CACHE_DIR",
+    "OMNIPARSER_CACHE_ROOT",
+    "OMNIPARSER_ERROR_LOG",
+    "PHASH_HAMMING_THRESHOLD",
+    "MAX_SCREENS_PER_SESSION",
+    "compute_phash",
+    "identify_distinct_screens",
+    "load_frames_from_paths",
+]