feat(analytics): normalise API + contrat explicite get_next_action (Lot A)

Contrat get_next_action() — suppression du None ambigu : {"status": "selected", "edge": ..., ...} {"status": "terminal"} {"status": "blocked", "reason": "no_valid_edge" | ...} ExecutionLoop dispatche proprement : blocked -> PAUSED + _pause_requested, terminal -> succès légitime. Rétrocompat défensive (None legacy -> blocked). Analytics API normalisée (kwargs-only) : on_execution_complete(duration_ms, status, steps_total|completed|failed) on_step_complete(duration_ms, ...) on_recovery_attempt(duration_ms, ...) Découverte critique : les anciens appels utilisaient des méthodes et champs inexistants (ExecutionMetrics.duration, metrics_collector.record_execution). Le code n'avait jamais tourné au runtime — zéro analytics remontée. L'exception était avalée par le try/except englobant. 58 tests (18 analytics + 11 contrat + 20 ExecutionLoop + 12 edge_scorer non-régression). Migration complète, pas de pont legacy. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-15 09:06:19 +02:00
parent 42f571d496
commit af4ffa189a
9 changed files with 1573 additions and 233 deletions
--- a/core/analytics/collection/metrics_collector.py
+++ b/core/analytics/collection/metrics_collector.py
@@ -76,7 +76,16 @@ class StepMetrics:
    confidence_score: float
    retry_count: int = 0
    error_details: Optional[str] = None
-    
+    # C1 — Instrumentation vision-aware (ExecutionLoop)
+    # Ces champs proviennent de `StepResult` (core/execution/execution_loop.py).
+    # Tous optionnels avec valeurs par défaut pour rétrocompatibilité.
+    ocr_ms: float = 0.0          # Temps OCR sur ce step
+    ui_ms: float = 0.0           # Temps détection UI sur ce step
+    analyze_ms: float = 0.0      # Temps analyse ScreenState (OCR + UI + reste)
+    total_ms: float = 0.0        # Temps total du step (alias duration_ms)
+    cache_hit: bool = False      # True si ScreenState vient du cache perceptuel
+    degraded: bool = False       # True si mode dégradé (timeout analyse)
+
    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary for storage."""
        return {
@@ -92,9 +101,15 @@ class StepMetrics:
            'status': self.status,
            'confidence_score': self.confidence_score,
            'retry_count': self.retry_count,
-            'error_details': self.error_details
+            'error_details': self.error_details,
+            'ocr_ms': self.ocr_ms,
+            'ui_ms': self.ui_ms,
+            'analyze_ms': self.analyze_ms,
+            'total_ms': self.total_ms,
+            'cache_hit': self.cache_hit,
+            'degraded': self.degraded,
        }
-    
+
    @classmethod
    def from_dict(cls, data: Dict[str, Any]) -> 'StepMetrics':
        """Create from dictionary."""
@@ -111,7 +126,13 @@ class StepMetrics:
            status=data['status'],
            confidence_score=data['confidence_score'],
            retry_count=data.get('retry_count', 0),
-            error_details=data.get('error_details')
+            error_details=data.get('error_details'),
+            ocr_ms=float(data.get('ocr_ms') or 0.0),
+            ui_ms=float(data.get('ui_ms') or 0.0),
+            analyze_ms=float(data.get('analyze_ms') or 0.0),
+            total_ms=float(data.get('total_ms') or 0.0),
+            cache_hit=bool(data.get('cache_hit') or False),
+            degraded=bool(data.get('degraded') or False),
        )


--- a/core/analytics/integration/execution_integration.py
+++ b/core/analytics/integration/execution_integration.py
@@ -1,8 +1,8 @@
 """Integration of analytics with ExecutionLoop."""

 import logging
-from typing import Optional
-from datetime import datetime
+from typing import Any, Optional
+from datetime import datetime, timedelta
 import uuid

 from ..analytics_system import get_analytics_system
@@ -14,17 +14,35 @@ logger = logging.getLogger(__name__)
 class AnalyticsExecutionIntegration:
    """Integrate analytics collection with workflow execution."""
    
-    def __init__(self, enabled: bool = True):
+    def __init__(self, analytics_system: Any = True, enabled: Optional[bool] = None):
        """
        Initialize analytics integration.
-        
+
+        Accepte deux formes d'appel pour la rétrocompatibilité :
+          - ``AnalyticsExecutionIntegration(enabled=True)`` → auto-load du système
+          - ``AnalyticsExecutionIntegration(analytics_system_instance)`` →
+            utilise l'instance fournie (utilisé par ExecutionLoop)
+
        Args:
-            enabled: Whether analytics collection is enabled
+            analytics_system: Instance d'AnalyticsSystem pré-construite, ou
+                True/False pour activer/désactiver (legacy).
+            enabled: Legacy — si défini, prime sur analytics_system.
        """
-        self.enabled = enabled
-        self.analytics = None
-        
-        if enabled:
+        # Détection de la forme d'appel
+        if enabled is not None:
+            # Appel legacy explicite: AnalyticsExecutionIntegration(enabled=...)
+            self.enabled = bool(enabled)
+            self.analytics = None
+        elif isinstance(analytics_system, bool):
+            # Appel legacy: AnalyticsExecutionIntegration(True/False)
+            self.enabled = analytics_system
+            self.analytics = None
+        else:
+            # Nouvelle forme: instance injectée
+            self.enabled = analytics_system is not None
+            self.analytics = analytics_system
+
+        if self.enabled and self.analytics is None:
            try:
                self.analytics = get_analytics_system()
                logger.info("Analytics integration enabled")
@@ -36,37 +54,50 @@ class AnalyticsExecutionIntegration:
        self,
        workflow_id: str,
        execution_id: Optional[str] = None,
-        total_steps: int = 0
+        total_steps: int = 0,
+        mode: Optional[str] = None,
    ) -> str:
        """
-        Called when workflow execution starts.
-        
+        Appelé au démarrage d'une exécution de workflow.
+
        Args:
-            workflow_id: Workflow identifier
-            execution_id: Execution identifier (generated if None)
-            total_steps: Total number of steps
-        
+            workflow_id: Identifiant du workflow
+            execution_id: Identifiant d'exécution (généré si None)
+            total_steps: Nombre total d'étapes prévues
+            mode: Mode d'exécution (OBSERVATION / COACHING / SUPERVISED /
+                AUTOMATIC). Propagé en contexte pour MetricsCollector.
+
        Returns:
-            Execution ID
+            Identifiant d'exécution (celui fourni ou nouvellement généré).
        """
        if not self.enabled or not self.analytics:
            return execution_id or str(uuid.uuid4())
-        
+
        if execution_id is None:
            execution_id = str(uuid.uuid4())
-        
+
        try:
-            # Start real-time tracking
+            # Démarrage du tracking temps réel
            self.analytics.realtime_analytics.track_execution(
                execution_id=execution_id,
                workflow_id=workflow_id,
-                total_steps=total_steps
+                total_steps=total_steps,
            )
-            
+
+            # Ouverture de l'ExecutionMetrics côté collector (état "running").
+            # Cela permet à `on_execution_complete` d'appeler
+            # `record_execution_complete` qui clôture proprement.
+            context = {"mode": mode} if mode else {}
+            self.analytics.metrics_collector.record_execution_start(
+                execution_id=execution_id,
+                workflow_id=workflow_id,
+                context=context,
+            )
+
            logger.debug(f"Started tracking execution: {execution_id}")
        except Exception as e:
            logger.error(f"Error starting execution tracking: {e}")
-        
+
        return execution_id
    
    def on_step_start(
@@ -101,110 +132,249 @@ class AnalyticsExecutionIntegration:
        execution_id: str,
        workflow_id: str,
        node_id: str,
-        action_type: str,
-        started_at: datetime,
-        completed_at: datetime,
-        duration: float,
+        *,
+        duration_ms: float,
        success: bool,
-        error_message: Optional[str] = None
+        action_type: str = "",
+        started_at: Optional[datetime] = None,
+        completed_at: Optional[datetime] = None,
+        error_message: Optional[str] = None,
+        confidence: float = 0.0,
+        target_element: str = "",
+        retry_count: int = 0,
+        ocr_ms: float = 0.0,
+        ui_ms: float = 0.0,
+        analyze_ms: float = 0.0,
+        total_ms: float = 0.0,
+        cache_hit: bool = False,
+        degraded: bool = False,
+        step_id: Optional[str] = None,
    ) -> None:
        """
-        Called when a step completes.
-        
+        Appelé à la fin d'un step.
+
+        Contrat normalisé (Lot A — avril 2026) : ``duration_ms`` est
+        obligatoire et en millisecondes. Plus de rétrocompat silencieuse
+        sur ``duration`` en secondes.
+
        Args:
-            execution_id: Execution identifier
-            workflow_id: Workflow identifier
-            node_id: Node identifier
-            action_type: Type of action
-            started_at: Start timestamp
-            completed_at: Completion timestamp
-            duration: Duration in seconds
-            success: Whether step succeeded
-            error_message: Error message if failed
+            execution_id: Identifiant d'exécution
+            workflow_id: Identifiant du workflow
+            node_id: Identifiant du node
+            duration_ms: Durée du step en millisecondes (obligatoire)
+            success: Vrai si le step a réussi
+            action_type: Type d'action (``click``, ``type``, …)
+            started_at: Timestamp de début (déduit de duration_ms si None)
+            completed_at: Timestamp de fin (``now()`` si None)
+            error_message: Message d'erreur si ``success=False``
+            confidence: Score de matching [0, 1]
+            target_element: Élément ciblé (optionnel)
+            retry_count: Nombre de retries
+            ocr_ms: Temps OCR (C1)
+            ui_ms: Temps détection UI (C1)
+            analyze_ms: Temps analyse ScreenState (C1)
+            total_ms: Temps total du step (C1, alias duration_ms)
+            cache_hit: ScreenState depuis cache perceptuel (C1)
+            degraded: Mode dégradé activé (C1)
+            step_id: ID unique du step (généré si None)
        """
        if not self.enabled or not self.analytics:
            return
-        
+
        try:
-            # Record step metrics
+            duration_ms_final = float(duration_ms)
+
+            # Normaliser les timestamps
+            if completed_at is None:
+                completed_at = datetime.now()
+            if started_at is None:
+                started_at = completed_at - timedelta(milliseconds=duration_ms_final)
+
            step_metrics = StepMetrics(
+                step_id=step_id or f"{execution_id}:{node_id}:{completed_at.isoformat()}",
                execution_id=execution_id,
                workflow_id=workflow_id,
                node_id=node_id,
-                action_type=action_type,
+                action_type=action_type or "unknown",
+                target_element=target_element,
                started_at=started_at,
                completed_at=completed_at,
-                duration=duration,
-                success=success,
-                error_message=error_message
+                duration_ms=duration_ms_final,
+                status="completed" if success else "failed",
+                confidence_score=float(confidence),
+                retry_count=retry_count,
+                error_details=error_message,
+                # C1 — vision-aware
+                ocr_ms=float(ocr_ms or 0.0),
+                ui_ms=float(ui_ms or 0.0),
+                analyze_ms=float(analyze_ms or 0.0),
+                total_ms=float(total_ms or duration_ms_final),
+                cache_hit=bool(cache_hit),
+                degraded=bool(degraded),
            )
-            
+
            self.analytics.metrics_collector.record_step(step_metrics)
-            
-            # Update real-time tracking
-            self.analytics.realtime_analytics.record_step_complete(
-                execution_id=execution_id,
-                success=success
+
+            # Tracking temps réel
+            try:
+                self.analytics.realtime_analytics.record_step_complete(
+                    execution_id=execution_id,
+                    success=success,
+                )
+            except Exception as rt_err:
+                logger.debug(f"Realtime tracking skipped: {rt_err}")
+
+            logger.debug(
+                f"Recorded step: {node_id} "
+                f"({'success' if success else 'failed'}, "
+                f"analyze_ms={analyze_ms:.0f}, cache_hit={cache_hit}, "
+                f"degraded={degraded})"
            )
-            
-            logger.debug(f"Recorded step: {node_id} ({'success' if success else 'failed'})")
        except Exception as e:
            logger.error(f"Error recording step completion: {e}")
+
+    def on_step_result(
+        self,
+        execution_id: str,
+        workflow_id: str,
+        step_result: Any,
+    ) -> None:
+        """
+        Raccourci C1 — enregistre un `StepResult` complet.
+
+        Évite aux appelants d'extraire manuellement les champs vision-aware.
+        Utilisé par ExecutionLoop pour pousser StepResult au système analytics.
+
+        Args:
+            execution_id: Identifiant d'exécution
+            workflow_id: Identifiant de workflow
+            step_result: Instance de `core.execution.execution_loop.StepResult`
+        """
+        if not self.enabled or not self.analytics:
+            return
+
+        action_type = "unknown"
+        try:
+            if getattr(step_result, "action_result", None) is not None:
+                ar = step_result.action_result
+                # ExecutionResult.action est optionnel selon la branche
+                action_type = (
+                    getattr(ar, "action_type", None)
+                    or getattr(ar, "action", None)
+                    or "unknown"
+                )
+        except Exception:
+            action_type = "unknown"
+
+        self.on_step_complete(
+            execution_id=execution_id,
+            workflow_id=workflow_id,
+            node_id=getattr(step_result, "node_id", "unknown"),
+            action_type=str(action_type),
+            success=bool(getattr(step_result, "success", False)),
+            error_message=None
+            if getattr(step_result, "success", False)
+            else getattr(step_result, "message", None),
+            duration_ms=float(getattr(step_result, "duration_ms", 0.0) or 0.0),
+            confidence=float(getattr(step_result, "match_confidence", 0.0) or 0.0),
+            ocr_ms=float(getattr(step_result, "ocr_ms", 0.0) or 0.0),
+            ui_ms=float(getattr(step_result, "ui_ms", 0.0) or 0.0),
+            analyze_ms=float(getattr(step_result, "analyze_ms", 0.0) or 0.0),
+            total_ms=float(getattr(step_result, "total_ms", 0.0) or 0.0),
+            cache_hit=bool(getattr(step_result, "cache_hit", False)),
+            degraded=bool(getattr(step_result, "degraded", False)),
+        )
    
    def on_execution_complete(
        self,
        execution_id: str,
        workflow_id: str,
-        started_at: datetime,
-        completed_at: datetime,
-        duration: float,
+        *,
+        duration_ms: float,
        status: str,
-        error_message: Optional[str] = None,
+        steps_total: Optional[int] = None,
        steps_completed: int = 0,
-        steps_failed: int = 0
+        steps_failed: int = 0,
+        error_message: Optional[str] = None,
    ) -> None:
        """
-        Called when workflow execution completes.
-        
+        Appelé à la fin d'une exécution de workflow.
+
+        Contrat normalisé (Lot A — avril 2026) :
+          - ``duration_ms`` en millisecondes, toujours. Plus de rétrocompat
+            silencieuse sur ``duration`` en secondes.
+          - ``status`` est une chaîne libre (``"completed"``, ``"failed"``,
+            ``"stopped"``, ``"timeout"``, …). L'appelant décide.
+          - ``steps_total`` / ``steps_completed`` / ``steps_failed`` : noms
+            alignés sur le dataclass ``ExecutionMetrics``. Si ``steps_total``
+            n'est pas fourni, on le déduit par somme.
+
        Args:
-            execution_id: Execution identifier
-            workflow_id: Workflow identifier
-            started_at: Start timestamp
-            completed_at: Completion timestamp
-            duration: Duration in seconds
-            status: Final status (success, failed, timeout)
-            error_message: Error message if failed
-            steps_completed: Number of steps completed
-            steps_failed: Number of steps failed
+            execution_id: Identifiant d'exécution
+            workflow_id: Identifiant du workflow
+            duration_ms: Durée totale en millisecondes
+            status: Statut final (``"completed"`` / ``"failed"`` / ``"stopped"``)
+            steps_total: Nombre total de steps exécutés (tous statuts confondus)
+            steps_completed: Nombre de steps réussis
+            steps_failed: Nombre de steps en échec
+            error_message: Message d'erreur si ``status != "completed"``
        """
        if not self.enabled or not self.analytics:
            return
-        
+
+        # steps_total dérivé si non fourni explicitement
+        if steps_total is None:
+            steps_total = int(steps_completed) + int(steps_failed)
+
        try:
-            # Record execution metrics
-            execution_metrics = ExecutionMetrics(
-                execution_id=execution_id,
-                workflow_id=workflow_id,
-                started_at=started_at,
-                completed_at=completed_at,
-                duration=duration,
-                status=status,
-                error_message=error_message,
-                steps_completed=steps_completed,
-                steps_failed=steps_failed
-            )
-            
-            self.analytics.metrics_collector.record_execution(execution_metrics)
-            
-            # Flush to ensure persistence
-            self.analytics.metrics_collector.flush()
-            
-            # Complete real-time tracking
+            collector = self.analytics.metrics_collector
+
+            # record_execution_complete clôture proprement un ExecutionMetrics
+            # ouvert par record_execution_start (chemin nominal via
+            # on_execution_start). Si l'état n'est pas présent (tests, legacy),
+            # on pousse un ExecutionMetrics synthétique directement.
+            completed_at = datetime.now()
+            started_at = completed_at - timedelta(milliseconds=float(duration_ms))
+
+            active = getattr(collector, "_active_executions", None)
+            if active is not None and execution_id in active:
+                collector.record_execution_complete(
+                    execution_id=execution_id,
+                    status=status,
+                    steps_total=int(steps_total),
+                    steps_completed=int(steps_completed),
+                    steps_failed=int(steps_failed),
+                    error_message=error_message,
+                )
+            else:
+                # Fallback explicite : on construit directement un ExecutionMetrics
+                # aligné sur le dataclass (duration_ms, status, steps_*).
+                execution_metrics = ExecutionMetrics(
+                    execution_id=execution_id,
+                    workflow_id=workflow_id,
+                    started_at=started_at,
+                    completed_at=completed_at,
+                    duration_ms=float(duration_ms),
+                    status=status,
+                    steps_total=int(steps_total),
+                    steps_completed=int(steps_completed),
+                    steps_failed=int(steps_failed),
+                    error_message=error_message,
+                )
+                # Le collector n'expose pas record_execution(...) : on pousse
+                # dans le buffer protégé par lock pour rester cohérent.
+                with collector._lock:
+                    collector._buffer.append(execution_metrics)
+
+            # Flush pour garantir la persistance immédiate
+            collector.flush()
+
+            # Clôture du tracking temps réel
            self.analytics.realtime_analytics.complete_execution(
                execution_id=execution_id,
-                status=status
+                status=status,
            )
-            
+
            logger.info(f"Recorded execution: {execution_id} ({status})")
        except Exception as e:
            logger.error(f"Error recording execution completion: {e}")
@@ -216,39 +386,54 @@ class AnalyticsExecutionIntegration:
        node_id: str,
        strategy: str,
        success: bool,
-        duration: float
+        duration_ms: float,
    ) -> None:
        """
-        Called when self-healing attempts recovery.
-        
+        Appelé quand le self-healing tente une récupération.
+
+        Contrat normalisé (Lot A — avril 2026) : ``duration_ms`` en
+        millisecondes, cohérent avec ``on_execution_complete`` et
+        ``on_step_complete``. Le StepMetrics construit respecte strictement
+        le dataclass (``status``, ``duration_ms``, ``error_details``,
+        ``confidence_score``, ``target_element``, ``step_id``).
+
        Args:
-            execution_id: Execution identifier
-            workflow_id: Workflow identifier
-            node_id: Node identifier
-            strategy: Recovery strategy used
-            success: Whether recovery succeeded
-            duration: Recovery duration
+            execution_id: Identifiant d'exécution
+            workflow_id: Identifiant du workflow
+            node_id: Node où la récupération est tentée
+            strategy: Stratégie de récupération employée
+            success: Vrai si la récupération a réussi
+            duration_ms: Durée de la tentative en millisecondes
        """
        if not self.enabled or not self.analytics:
            return
-        
+
        try:
-            # Record as a special step metric
+            now = datetime.now()
+            started_at = now - timedelta(milliseconds=float(duration_ms))
+
            recovery_metrics = StepMetrics(
+                step_id=f"{execution_id}:{node_id}:recovery:{now.isoformat()}",
                execution_id=execution_id,
                workflow_id=workflow_id,
                node_id=f"{node_id}_recovery",
                action_type=f"recovery_{strategy}",
-                started_at=datetime.now(),
-                completed_at=datetime.now(),
-                duration=duration,
-                success=success,
-                error_message=None if success else f"Recovery failed: {strategy}"
+                target_element="",
+                started_at=started_at,
+                completed_at=now,
+                duration_ms=float(duration_ms),
+                status="completed" if success else "failed",
+                confidence_score=0.0,
+                retry_count=0,
+                error_details=None if success else f"Recovery failed: {strategy}",
            )
-            
+
            self.analytics.metrics_collector.record_step(recovery_metrics)
-            
-            logger.debug(f"Recorded recovery: {strategy} ({'success' if success else 'failed'})")
+
+            logger.debug(
+                f"Recorded recovery: {strategy} "
+                f"({'success' if success else 'failed'})"
+            )
        except Exception as e:
            logger.error(f"Error recording recovery attempt: {e}")
    
--- a/core/analytics/storage/timeseries_store.py
+++ b/core/analytics/storage/timeseries_store.py
@@ -42,6 +42,8 @@ class TimeSeriesStore:
        ON execution_metrics(started_at);
    
    -- Step metrics table
+    -- Les colonnes ocr_ms, ui_ms, analyze_ms, total_ms, cache_hit, degraded
+    -- proviennent de l'instrumentation vision-aware (C1) de ExecutionLoop.
    CREATE TABLE IF NOT EXISTS step_metrics (
        step_id TEXT PRIMARY KEY,
        execution_id TEXT NOT NULL,
@@ -56,6 +58,12 @@ class TimeSeriesStore:
        confidence_score REAL,
        retry_count INTEGER DEFAULT 0,
        error_details TEXT,
+        ocr_ms REAL DEFAULT 0.0,
+        ui_ms REAL DEFAULT 0.0,
+        analyze_ms REAL DEFAULT 0.0,
+        total_ms REAL DEFAULT 0.0,
+        cache_hit INTEGER DEFAULT 0,
+        degraded INTEGER DEFAULT 0,
        FOREIGN KEY (execution_id) REFERENCES execution_metrics(execution_id)
    );
    
@@ -101,11 +109,40 @@ class TimeSeriesStore:
        
        logger.info(f"TimeSeriesStore initialized at {self.db_path}")
    
+    # Colonnes ajoutées ultérieurement — appliquées via ALTER TABLE si absentes.
+    # (C1 — instrumentation vision-aware, avril 2026)
+    _STEP_METRICS_MIGRATIONS = [
+        ("ocr_ms", "REAL DEFAULT 0.0"),
+        ("ui_ms", "REAL DEFAULT 0.0"),
+        ("analyze_ms", "REAL DEFAULT 0.0"),
+        ("total_ms", "REAL DEFAULT 0.0"),
+        ("cache_hit", "INTEGER DEFAULT 0"),
+        ("degraded", "INTEGER DEFAULT 0"),
+    ]
+
    def _init_database(self) -> None:
-        """Initialize database schema."""
+        """Initialize database schema and apply lightweight migrations."""
        with self._get_connection() as conn:
            conn.executescript(self.SCHEMA)
+            self._migrate_step_metrics(conn)
            conn.commit()
+
+    def _migrate_step_metrics(self, conn: sqlite3.Connection) -> None:
+        """Ajoute les colonnes C1 sur une base `step_metrics` pré-existante."""
+        cursor = conn.execute("PRAGMA table_info(step_metrics)")
+        existing = {row[1] for row in cursor.fetchall()}
+        for column, ddl in self._STEP_METRICS_MIGRATIONS:
+            if column not in existing:
+                try:
+                    conn.execute(
+                        f"ALTER TABLE step_metrics ADD COLUMN {column} {ddl}"
+                    )
+                    logger.info(
+                        f"Migration step_metrics: ajout colonne {column}"
+                    )
+                except sqlite3.OperationalError as e:
+                    # Collision bénigne (colonne déjà ajoutée par un autre process)
+                    logger.debug(f"Migration colonne {column} ignorée: {e}")
    
    @contextmanager
    def _get_connection(self):
@@ -164,13 +201,14 @@ class TimeSeriesStore:
        ))
    
    def _write_step_metric(self, conn: sqlite3.Connection, metric: StepMetrics) -> None:
-        """Write step metric."""
+        """Write step metric (inclut les champs vision-aware C1)."""
        conn.execute("""
            INSERT OR REPLACE INTO step_metrics
            (step_id, execution_id, workflow_id, node_id, action_type, target_element,
             started_at, completed_at, duration_ms, status, confidence_score,
-             retry_count, error_details)
-            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+             retry_count, error_details,
+             ocr_ms, ui_ms, analyze_ms, total_ms, cache_hit, degraded)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        """, (
            metric.step_id,
            metric.execution_id,
@@ -184,7 +222,13 @@ class TimeSeriesStore:
            metric.status,
            metric.confidence_score,
            metric.retry_count,
-            metric.error_details
+            metric.error_details,
+            getattr(metric, 'ocr_ms', 0.0),
+            getattr(metric, 'ui_ms', 0.0),
+            getattr(metric, 'analyze_ms', 0.0),
+            getattr(metric, 'total_ms', 0.0),
+            1 if getattr(metric, 'cache_hit', False) else 0,
+            1 if getattr(metric, 'degraded', False) else 0,
        ))
    
    def _write_resource_metric(self, conn: sqlite3.Connection, metric: ResourceMetrics) -> None:
--- a/core/pipeline/workflow_pipeline.py
+++ b/core/pipeline/workflow_pipeline.py
@@ -354,66 +354,306 @@ class WorkflowPipeline:
    # =========================================================================
    # Mode MATCHING : Reconnaissance de l'état actuel
    # =========================================================================
-    
+
+    def match_current_state_from_state(
+        self,
+        screen_state: ScreenState,
+        workflow_id: Optional[str] = None,
+        *,
+        min_similarity: float = 0.5,
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Matcher un ``ScreenState`` enrichi contre les nodes d'un workflow.
+
+        Lot E — premier vrai matching context-aware. Cette méthode consomme
+        directement le ``ScreenState`` déjà construit par ``ExecutionLoop``
+        (avec ``window_title``, ``detected_text`` et ``ui_elements``
+        renseignés par le ``ScreenAnalyzer``) au lieu de reconstruire un
+        stub vide avec ``window_title="Unknown"``.
+
+        Stratégie :
+          1. Si le ``HierarchicalMatcher`` est disponible ET que le workflow
+             cible est chargeable, on privilégie le matching multi-niveau
+             (fenêtre → région → élément) qui exploite pleinement les
+             ``ui_elements`` et le ``window_title``.
+          2. Sinon on retombe sur le matching par embedding via FAISS
+             (même logique que l'ancien ``match_current_state``, mais avec
+             le ``ScreenState`` fourni, pas un stub).
+
+        Args:
+            screen_state: ``ScreenState`` complet (ui_elements + detected_text
+                + window_info) construit en amont par l'``ExecutionLoop``.
+            workflow_id: ID du workflow cible (tous si None).
+            min_similarity: seuil minimum de confidence pour considérer un
+                match valide. Conserve la sémantique historique (0.5 pour
+                le hiérarchique, 0.85 pour le FAISS fallback).
+
+        Returns:
+            Dict avec ``node_id``, ``workflow_id``, ``confidence`` (+ détails
+            du matching hiérarchique si applicable), ou ``None`` si aucun
+            match ne dépasse le seuil.
+        """
+        logger.debug(
+            "Matching ScreenState (app=%s, title=%s, ui_elements=%d, "
+            "detected_text=%d)",
+            screen_state.window.app_name,
+            screen_state.window.window_title,
+            len(screen_state.ui_elements),
+            len(screen_state.perception.detected_text),
+        )
+
+        # --- Stratégie 1 : matching hiérarchique si workflow disponible ---
+        if workflow_id:
+            workflow = self.load_workflow(workflow_id)
+            if workflow is not None and getattr(workflow, "nodes", None):
+                try:
+                    hier_result = self._match_hierarchical_from_state(
+                        screen_state=screen_state,
+                        workflow=workflow,
+                        workflow_id=workflow_id,
+                        min_similarity=min_similarity,
+                    )
+                    if hier_result is not None:
+                        return hier_result
+                except Exception as exc:
+                    # Ne jamais casser le matching sur une erreur du
+                    # matcher hiérarchique : on retombe sur FAISS.
+                    logger.debug(
+                        f"Hierarchical matching failed, fallback FAISS: {exc}"
+                    )
+
+        # --- Stratégie 2 : fallback embedding + FAISS ---
+        return self._match_via_faiss(
+            screen_state=screen_state,
+            workflow_id=workflow_id,
+            min_similarity=min_similarity,
+        )
+
+    def _match_hierarchical_from_state(
+        self,
+        screen_state: ScreenState,
+        workflow: Workflow,
+        workflow_id: str,
+        min_similarity: float,
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Déléguer le matching au ``HierarchicalMatcher`` en extrayant
+        ``window_info``, ``detected_elements`` et le screenshot à partir du
+        ``ScreenState`` fourni. Factorise la logique de ``match_hierarchical``
+        sans re-ouvrir l'image si ce n'est pas nécessaire.
+        """
+        # Reconstruire window_info à partir du ScreenState (pas "Unknown")
+        window_info = {
+            "title": screen_state.window.window_title,
+            "app_name": screen_state.window.app_name,
+            "window_title": screen_state.window.window_title,
+        }
+        detected_elements = list(screen_state.ui_elements)
+
+        # Ouvrir le screenshot si nécessaire (le matcher peut en avoir besoin
+        # pour du matching au niveau région). Si le chemin n'existe pas, on
+        # passe None et laisse le matcher travailler avec window + elements.
+        screenshot = None
+        path = screen_state.raw.screenshot_path
+        if path:
+            try:
+                from PIL import Image
+                screenshot = Image.open(path)
+            except Exception as exc:
+                logger.debug(f"Screenshot unavailable for hierarchical match: {exc}")
+
+        # Contexte temporel par workflow
+        if workflow_id not in self._temporal_context:
+            self._temporal_context[workflow_id] = TemporalContext()
+        temporal_context = self._temporal_context[workflow_id]
+
+        result: MatchResult = self.hierarchical_matcher.match(
+            screenshot=screenshot,
+            workflow=workflow,
+            window_info=window_info,
+            detected_elements=detected_elements,
+            temporal_context=temporal_context,
+        )
+
+        if result.confidence < min_similarity:
+            logger.debug(
+                f"Hierarchical match below threshold: {result.confidence:.3f} "
+                f"(min={min_similarity})"
+            )
+            return None
+
+        # Mémoriser le match pour le boost temporel suivant
+        temporal_context.add_match(result.node_id, result.confidence)
+
+        return {
+            "node_id": result.node_id,
+            "workflow_id": workflow_id,
+            "confidence": result.confidence,
+            "window_confidence": result.window_confidence,
+            "region_confidence": result.region_confidence,
+            "element_confidence": result.element_confidence,
+            "temporal_boost": result.temporal_boost,
+            "matched_variant": result.matched_variant,
+            "alternatives": [
+                {"node_id": alt.node_id, "confidence": alt.confidence}
+                for alt in result.alternatives
+            ],
+            "match_time_ms": result.match_time_ms,
+            "match_type": "hierarchical",
+        }
+
+    def _match_via_faiss(
+        self,
+        screen_state: ScreenState,
+        workflow_id: Optional[str],
+        min_similarity: float,
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Fallback embedding + recherche FAISS. On réutilise le ``ScreenState``
+        fourni (donc ses ``ui_elements`` et son ``window_title`` réels)
+        au lieu d'en recréer un stub.
+        """
+        # Le seuil FAISS historique était 0.85. On l'honore comme plancher
+        # par défaut mais on respecte un ``min_similarity`` plus permissif
+        # si l'appelant en fournit un (hiérarchique pouvant déjà avoir échoué).
+        threshold = max(min_similarity, 0.85)
+
+        state_embedding = self.embedding_builder.build(screen_state)
+        query_vector = state_embedding.get_vector()
+
+        results = self.faiss_manager.search(query_vector, k=5)
+        if not results:
+            logger.debug("No match found in FAISS")
+            return None
+
+        for result in results:
+            metadata = result.get("metadata", {})
+            result_workflow_id = metadata.get("workflow_id")
+
+            if workflow_id and result_workflow_id != workflow_id:
+                continue
+
+            similarity = result.get("similarity", 0)
+            if similarity >= threshold:
+                return {
+                    "node_id": metadata.get("node_id"),
+                    "workflow_id": result_workflow_id,
+                    "confidence": similarity,
+                    "state_embedding_id": state_embedding.embedding_id,
+                    "match_type": "faiss",
+                }
+
+        logger.debug(
+            f"Best FAISS match below threshold: "
+            f"{results[0].get('similarity', 0):.3f} (min={threshold})"
+        )
+        return None
+
    def match_current_state(
        self,
        screenshot_path: str,
        workflow_id: Optional[str] = None,
-        window_title: Optional[str] = None
+        window_title: Optional[str] = None,
    ) -> Optional[Dict[str, Any]]:
        """
-        Identifier dans quel node se trouve l'écran actuel.
-        
+        Identifier dans quel node se trouve l'écran actuel (API legacy).
+
+        Lot E — cette méthode est désormais un **wrapper** de rétrocompat :
+        elle construit un ``ScreenState`` enrichi via ``ScreenAnalyzer``
+        (au lieu d'un stub avec ``window_title="Unknown"``) puis délègue
+        à ``match_current_state_from_state``. Garantit la compat pour les
+        callers externes qui ne manipulent que le chemin du screenshot.
+
        Args:
-            screenshot_path: Chemin vers le screenshot actuel
-            workflow_id: ID du workflow à matcher (tous si None)
-            window_title: Titre de fenêtre pour contexte
-        
+            screenshot_path: Chemin vers le screenshot actuel.
+            workflow_id: ID du workflow à matcher (tous si None).
+            window_title: Titre de fenêtre pour contexte (utilisé comme
+                hint si le ScreenAnalyzer n'est pas disponible).
+
        Returns:
-            Dict avec node_id, workflow_id, confidence, ou None si pas de match
+            Dict avec ``node_id``, ``workflow_id``, ``confidence``, ou
+            ``None`` si pas de match.
        """
        logger.debug(f"Matching screenshot: {screenshot_path}")
-        
-        # Créer un ScreenState temporaire
+
+        # Construire un ScreenState enrichi via le ScreenAnalyzer partagé.
+        screen_state = self._build_screen_state_for_matching(
+            screenshot_path=screenshot_path,
+            workflow_id=workflow_id,
+            window_title=window_title,
+        )
+
+        return self.match_current_state_from_state(
+            screen_state=screen_state,
+            workflow_id=workflow_id,
+        )
+
+    def _build_screen_state_for_matching(
+        self,
+        screenshot_path: str,
+        workflow_id: Optional[str],
+        window_title: Optional[str],
+    ) -> ScreenState:
+        """
+        Construire un ``ScreenState`` pour l'API legacy ``match_current_state``.
+
+        Tente d'utiliser le ``ScreenAnalyzer`` partagé ; en cas d'échec,
+        retombe sur un stub minimaliste (équivalent fonctionnel de l'ancien
+        comportement, mais clairement isolé ici).
+        """
        from core.models.screen_state import (
            WindowContext, RawLevel, PerceptionLevel, ContextLevel, EmbeddingRef
        )
-        
-        screenshot_path = Path(screenshot_path)
-        
+
+        path = Path(screenshot_path)
+
+        # Tentative 1 : ScreenAnalyzer partagé (résultat enrichi)
+        try:
+            from core.pipeline import get_screen_analyzer
+            analyzer = get_screen_analyzer()
+            if analyzer is not None:
+                window_info = None
+                if window_title:
+                    window_info = {"title": window_title, "app_name": "unknown"}
+                return analyzer.analyze(
+                    str(path),
+                    window_info=window_info,
+                )
+        except Exception as exc:
+            logger.debug(
+                f"ScreenAnalyzer unavailable in match_current_state wrapper: {exc}"
+            )
+
+        # Tentative 2 : stub minimal (comportement legacy d'urgence)
        window = WindowContext(
            app_name="unknown",
            window_title=window_title or "Unknown",
            screen_resolution=[1920, 1080],
-            workspace="main"
+            workspace="main",
        )
-        
        raw = RawLevel(
-            screenshot_path=str(screenshot_path),
+            screenshot_path=str(path),
            capture_method="manual",
-            file_size_bytes=screenshot_path.stat().st_size if screenshot_path.exists() else 0
+            file_size_bytes=path.stat().st_size if path.exists() else 0,
        )
-        
        perception = PerceptionLevel(
            embedding=EmbeddingRef(
                provider="openclip_ViT-B-32",
                vector_id="temp",
-                dimensions=512
+                dimensions=512,
            ),
            detected_text=[],
            text_detection_method="pending",
-            confidence_avg=0.0
+            confidence_avg=0.0,
        )
-        
        context = ContextLevel(
            current_workflow_candidate=workflow_id,
            workflow_step=None,
            user_id="matcher",
            tags=[],
-            business_variables={}
+            business_variables={},
        )
-        
-        current_state = ScreenState(
+        return ScreenState(
            screen_state_id=f"match_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
            timestamp=datetime.now(),
            session_id="matching",
@@ -421,39 +661,8 @@ class WorkflowPipeline:
            raw=raw,
            perception=perception,
            context=context,
-            ui_elements=[]
+            ui_elements=[],
        )
-        
-        # Calculer embedding
-        state_embedding = self.embedding_builder.build(current_state)
-        query_vector = state_embedding.get_vector()
-        
-        # Rechercher dans FAISS
-        results = self.faiss_manager.search(query_vector, k=5)
-        
-        if not results:
-            logger.debug("No match found in FAISS")
-            return None
-        
-        # Filtrer par workflow si spécifié
-        for result in results:
-            metadata = result.get("metadata", {})
-            result_workflow_id = metadata.get("workflow_id")
-            
-            if workflow_id and result_workflow_id != workflow_id:
-                continue
-            
-            similarity = result.get("similarity", 0)
-            if similarity >= 0.85:  # Seuil de matching
-                return {
-                    "node_id": metadata.get("node_id"),
-                    "workflow_id": result_workflow_id,
-                    "confidence": similarity,
-                    "state_embedding_id": state_embedding.embedding_id
-                }
-        
-        logger.debug(f"Best match below threshold: {results[0].get('similarity', 0):.3f}")
-        return None
    
    def match_hierarchical(
        self,
@@ -548,17 +757,56 @@ class WorkflowPipeline:
    def get_next_action(
        self,
        workflow_id: str,
-        current_node_id: str
-    ) -> Optional[Dict[str, Any]]:
+        current_node_id: str,
+        screen_state: Optional[ScreenState] = None,
+        strategy: str = "best",
+        source_similarity: float = 1.0,
+    ) -> Dict[str, Any]:
        """
        Obtenir la prochaine action à exécuter.
-        
+
+        Contrat normalisé (Lot A — avril 2026) : retourne **toujours** un
+        dict avec une clé ``status`` non-ambiguë. Le ``None`` ambigu qui
+        confondait "workflow terminé" et "aucun edge valide" a été
+        supprimé : l'appelant (ExecutionLoop) peut désormais distinguer
+        ces cas pour déclencher une pause supervisée plutôt qu'une fin
+        de workflow faux-positive.
+
+        Sélection d'edge (C3) :
+          - Filtre dur sur ``pre_conditions`` (EdgeConstraints)
+          - Ranking par score composite (success_rate, target_match, recency)
+          - Tiebreak : success_rate le plus haut
+
        Args:
            workflow_id: ID du workflow
            current_node_id: ID du node actuel
-        
+            screen_state: État courant, requis pour évaluer les
+                ``pre_conditions`` et le match ``target_spec``. Si None,
+                fallback sur la logique sans filtre de contraintes.
+            strategy: ``"best"`` (défaut, scoring complet) ou ``"first"``
+                (mode legacy, premier edge sans tri)
+            source_similarity: confiance du matching (``match_current_state``)
+                qui a identifié ``current_node_id``. Propagée à l'EdgeScorer
+                pour activer la précondition ``min_source_similarity`` des
+                edges. Défaut ``1.0`` pour compat avec les appelants qui
+                ne la fournissent pas encore (Lot B — avril 2026).
+
        Returns:
-            Dict avec action, target_node, confidence, ou None
+            Dict avec l'une des formes suivantes :
+
+            - ``{"status": "selected", "edge_id": str, "action": dict,
+              "target_node": str, "confidence": float, "score": float}``
+              → edge sélectionné, l'ExecutionLoop doit l'exécuter.
+
+            - ``{"status": "terminal"}`` → le node courant n'a pas
+              d'outgoing_edge (fin légitime de workflow).
+
+            - ``{"status": "blocked", "reason": str}`` → il existe des
+              outgoing_edges mais aucun ne satisfait les conditions
+              (``reason="no_valid_edge"``), ou le workflow est introuvable
+              (``reason="workflow_not_found"``). L'ExecutionLoop doit
+              déclencher une pause supervisée et ne **jamais** traiter
+              ce cas comme un succès.
        """
        workflow = self._workflows.get(workflow_id)
        if not workflow:
@@ -569,23 +817,44 @@ class WorkflowPipeline:
                self._workflows[workflow_id] = workflow
            else:
                logger.error(f"Workflow not found: {workflow_id}")
-                return None
-        
+                return {"status": "blocked", "reason": "workflow_not_found"}
+
        # Trouver les edges sortants du node actuel
        outgoing_edges = workflow.get_outgoing_edges(current_node_id)
-        
+
        if not outgoing_edges:
+            # Aucun outgoing_edge = fin légitime du workflow
            logger.info(f"No outgoing edges from node {current_node_id}")
-            return None
-        
-        # Pour l'instant, prendre le premier edge (TODO: logique de sélection)
-        edge = outgoing_edges[0]
-        
+            return {"status": "terminal"}
+
+        # Sélection robuste via EdgeScorer (C3)
+        from core.pipeline.edge_scorer import EdgeScorer
+
+        scorer = EdgeScorer()
+        edge = scorer.select_best(
+            outgoing_edges,
+            screen_state=screen_state,
+            strategy=strategy,
+            source_similarity=source_similarity,
+        )
+
+        if edge is None:
+            # Il y avait des candidats mais aucun n'a passé les filtres.
+            # On NE retourne PAS "terminal" : l'ExecutionLoop doit traiter
+            # ce cas comme un blocage et demander de l'aide.
+            logger.warning(
+                f"No valid edge from {current_node_id} "
+                f"({len(outgoing_edges)} candidates rejected)"
+            )
+            return {"status": "blocked", "reason": "no_valid_edge"}
+
        return {
+            "status": "selected",
            "edge_id": edge.edge_id,
            "action": edge.action.to_dict(),
            "target_node": edge.to_node,
-            "confidence": edge.stats.success_rate if edge.stats else 1.0
+            "confidence": edge.stats.success_rate if edge.stats else 1.0,
+            "score": edge.stats.success_rate if edge.stats else 1.0,
        }
    
    def should_execute_automatically(self, workflow_id: str) -> bool:
@@ -759,10 +1028,11 @@ class WorkflowPipeline:
            current_node_id = match_result["node_id"]
            logger.info(f"Matched current state to node: {current_node_id} (confidence: {match_result['confidence']:.3f})")
            
-            # 2. Obtenir la prochaine action
+            # 2. Obtenir la prochaine action (contrat dict avec status explicite)
            action_info = self.get_next_action(workflow_id, current_node_id)
-            
-            if not action_info:
+            action_status = action_info.get("status")
+
+            if action_status == "terminal":
                return {
                    "execution_id": execution_id,
                    "workflow_id": workflow_id,
@@ -771,9 +1041,21 @@ class WorkflowPipeline:
                    "message": "Workflow completed - no more actions",
                    "current_node": current_node_id,
                    "execution_time_ms": (datetime.now() - start_time).total_seconds() * 1000,
-                    "correlation_id": execution_id
+                    "correlation_id": execution_id,
                }
-            
+
+            if action_status == "blocked":
+                return {
+                    "execution_id": execution_id,
+                    "workflow_id": workflow_id,
+                    "success": False,
+                    "step_type": "action_selection",
+                    "error": f"No valid edge: {action_info.get('reason', 'unknown')}",
+                    "current_node": current_node_id,
+                    "execution_time_ms": (datetime.now() - start_time).total_seconds() * 1000,
+                    "correlation_id": execution_id,
+                }
+
            logger.info(f"Next action: {action_info['action']['type']} -> {action_info['target_node']}")
            
            # 3. Charger le workflow pour obtenir l'edge complet
--- a/core/pipeline/workflow_pipeline_enhanced.py
+++ b/core/pipeline/workflow_pipeline_enhanced.py
@@ -125,25 +125,47 @@ class WorkflowPipelineEnhanced:
            current_node_id = match_result["node_id"]
            logger.info(f"Matched current state to node: {current_node_id} (confidence: {match_result['confidence']:.3f})")
            
-            # 2. Obtenir la prochaine action
+            # 2. Obtenir la prochaine action (contrat dict avec status explicite)
            action_info = self.get_next_action(workflow_id, current_node_id)
-            
-            if not action_info:
-                # Workflow terminé
+            action_status = action_info.get("status")
+
+            if action_status == "terminal":
+                # Workflow terminé (aucun outgoing_edge = fin légitime)
                performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
-                
+
                result = WorkflowExecutionResult.workflow_complete(
                    execution_id=execution_id,
                    workflow_id=workflow_id,
                    current_node=current_node_id,
-                    performance_metrics=performance_metrics
+                    performance_metrics=performance_metrics,
                )
                result.correlation_id = correlation_id
                result.match_result = match_result
-                
+
                logger.info(f"Workflow {workflow_id} completed at node {current_node_id}")
                return result
-            
+
+            if action_status == "blocked":
+                # Des edges existent mais aucun ne passe les filtres :
+                # c'est un blocage, pas une fin de workflow.
+                performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
+
+                result = WorkflowExecutionResult.error(
+                    execution_id=execution_id,
+                    workflow_id=workflow_id,
+                    error_message=f"No valid edge: {action_info.get('reason', 'unknown')}",
+                    step_type="action_selection",
+                    current_node=current_node_id,
+                    performance_metrics=performance_metrics,
+                )
+                result.correlation_id = correlation_id
+
+                logger.warning(
+                    f"Workflow {workflow_id} blocked at node {current_node_id}: "
+                    f"{action_info.get('reason')}"
+                )
+                return result
+
            logger.info(f"Next action: {action_info['action']['type']} -> {action_info['target_node']}")
            
            # 3. Charger le workflow pour obtenir l'edge complet