v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution

- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 11:23:51 +01:00
parent 21bfa3b337
commit a27b74cf22
1595 changed files with 412691 additions and 400 deletions
--- a/core/evaluation/failure_case_recorder.py
+++ b/core/evaluation/failure_case_recorder.py
@@ -0,0 +1,432 @@
+"""core/evaluation/failure_case_recorder.py
+
+Fiche #19 - Failure Case Recorder
+
+Capture des "cas d'échec" sous forme de dossiers de repro.
+
+Structure créée:
+  data/failure_cases/YYYY-MM-DD/case_<timestamp>_<sig8>/
+    - failure.json
+    - screen_state.json
+    - target_spec.json (si dispo)
+    - edge.json (si dispo)
+    - execution_result.json (si dispo)
+    - ui_elements.json (si dispo)
+    - screenshot.png (si dispo)
+
+Notes:
+- Le code est volontairement tolérant: il tente plusieurs chemins/noms de champs
+  (raw/raw_level/screenshot_path/to_json/to_dict...).
+- Le but n'est pas d'avoir un export parfait, mais un dossier *rejouable* et
+  exploitable pour debug + dataset.
+
+Auteur: Dom, Alice Kiro - Décembre 2025
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import shutil
+from dataclasses import asdict, is_dataclass
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+logger = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Utilitaires sérialisation
+# ---------------------------------------------------------------------------
+
+def _is_primitive(x: Any) -> bool:
+    return x is None or isinstance(x, (str, int, float, bool))
+
+
+def _safe_jsonable(obj: Any, *, _depth: int = 0, _max_depth: int = 6) -> Any:
+    """Convertit "au mieux" un objet arbitraire en structure JSON-safe."""
+    if _depth > _max_depth:
+        return repr(obj)
+
+    if _is_primitive(obj):
+        return obj
+
+    if isinstance(obj, datetime):
+        return obj.isoformat()
+
+    if isinstance(obj, Path):
+        return str(obj)
+
+    if is_dataclass(obj):
+        try:
+            return _safe_jsonable(asdict(obj), _depth=_depth + 1)
+        except Exception:
+            return repr(obj)
+
+    # Pydantic v2
+    if hasattr(obj, "model_dump") and callable(getattr(obj, "model_dump")):
+        try:
+            return _safe_jsonable(obj.model_dump(), _depth=_depth + 1)
+        except Exception:
+            pass
+
+    # to_dict / to_json
+    for meth in ("to_dict", "to_json"):
+        if hasattr(obj, meth) and callable(getattr(obj, meth)):
+            try:
+                return _safe_jsonable(getattr(obj, meth)(), _depth=_depth + 1)
+            except Exception:
+                pass
+
+    if isinstance(obj, dict):
+        out = {}
+        for k, v in obj.items():
+            try:
+                out[str(k)] = _safe_jsonable(v, _depth=_depth + 1)
+            except Exception:
+                out[str(k)] = repr(v)
+        return out
+
+    if isinstance(obj, (list, tuple, set)):
+        return [_safe_jsonable(x, _depth=_depth + 1) for x in list(obj)]
+
+    # numpy / array-likes (sans dépendre de numpy)
+    if hasattr(obj, "tolist") and callable(getattr(obj, "tolist")):
+        try:
+            return obj.tolist()
+        except Exception:
+            pass
+
+    return repr(obj)
+
+
+def _write_json(path: Path, data: Any) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with open(path, "w", encoding="utf-8") as f:
+        json.dump(_safe_jsonable(data), f, indent=2, ensure_ascii=False)
+
+
+# ---------------------------------------------------------------------------
+# Extraction de champs (tolérant)
+# ---------------------------------------------------------------------------
+
+def _get_attr_chain(obj: Any, chain: List[str]) -> Any:
+    cur = obj
+    for name in chain:
+        if cur is None:
+            return None
+        if not hasattr(cur, name):
+            return None
+        cur = getattr(cur, name)
+    return cur
+
+
+def _extract_screenshot_path(screen_state: Any) -> Optional[Path]:
+    """Tente de retrouver un chemin de screenshot depuis différentes variantes de ScreenState."""
+    # 1) propriété screenshot_path (implémentée dans core/models/screen_state.py)
+    for chain in (
+        ["screenshot_path"],
+        ["raw", "screenshot_path"],
+        ["raw_level", "screenshot_path"],
+        ["raw", "screenshot"],
+        ["raw_level", "screenshot"],
+    ):
+        try:
+            val = _get_attr_chain(screen_state, chain)
+            if val:
+                p = Path(str(val))
+                if p.exists():
+                    return p
+                # essais relatifs
+                cwd_p = (Path.cwd() / p).resolve()
+                if cwd_p.exists():
+                    return cwd_p
+        except Exception:
+            continue
+
+    # 2) dict-like
+    try:
+        if isinstance(screen_state, dict):
+            for key in ("screenshot_path", "screenshot"):
+                if screen_state.get(key):
+                    p = Path(str(screen_state[key]))
+                    if p.exists():
+                        return p
+    except Exception:
+        pass
+
+    return None
+
+
+def _extract_window_info(screen_state: Any) -> Dict[str, Any]:
+    info: Dict[str, Any] = {}
+    # window_title / app_name
+    try:
+        title = _get_attr_chain(screen_state, ["window", "window_title"]) or _get_attr_chain(screen_state, ["window", "title"])
+        app = _get_attr_chain(screen_state, ["window", "app_name"]) or _get_attr_chain(screen_state, ["window", "app"])
+        if title:
+            info["window_title"] = str(title)
+        if app:
+            info["app_name"] = str(app)
+    except Exception:
+        pass
+
+    # résolution
+    try:
+        res = _get_attr_chain(screen_state, ["window", "screen_resolution"])
+        if res:
+            info["screen_resolution"] = list(res)
+    except Exception:
+        pass
+
+    return info
+
+
+def _extract_ids(screen_state: Any) -> Dict[str, Any]:
+    ids: Dict[str, Any] = {}
+    for k in ("state_id", "session_id"):
+        try:
+            v = getattr(screen_state, k, None)
+            if v:
+                ids[k] = str(v)
+        except Exception:
+            pass
+    return ids
+
+
+def _extract_ui_elements(screen_state: Any) -> List[Any]:
+    """Best-effort extraction des UI elements depuis différentes variantes."""
+    # ScreenState v3: top-level ui_elements
+    try:
+        elems = getattr(screen_state, "ui_elements", None)
+        if elems:
+            return list(elems)
+    except Exception:
+        pass
+
+    # fallback: perception.ui_elements / perception_level.ui_elements
+    for chain in (
+        ["perception", "ui_elements"],
+        ["perception_level", "ui_elements"],
+    ):
+        try:
+            elems = _get_attr_chain(screen_state, chain)
+            if elems:
+                return list(elems)
+        except Exception:
+            continue
+
+    return []
+
+
+# ---------------------------------------------------------------------------
+# Recorder
+# ---------------------------------------------------------------------------
+
+
+class FailureCaseRecorder:
+    """Capture et persiste les cas d'échec sous forme de dossier de repro."""
+
+    def __init__(self, base_dir: str = "data/failure_cases"):
+        self.base_dir = Path(base_dir)
+        self.base_dir.mkdir(parents=True, exist_ok=True)
+
+    # ---------------------------------------------------------------------
+    # API haut niveau
+    # ---------------------------------------------------------------------
+
+    def record_action_failure(
+        self,
+        *,
+        failure_type: str,
+        reason: str,
+        screen_state: Any,
+        target_spec: Optional[Any] = None,
+        edge: Optional[Any] = None,
+        execution_result: Optional[Any] = None,
+        extra: Optional[Dict[str, Any]] = None,
+        ui_elements: Optional[List[Any]] = None,
+    ) -> Optional[Path]:
+        """Enregistrer un failure case pour une action/edge."""
+        try:
+            return self._record_case(
+                failure_type=failure_type,
+                reason=reason,
+                screen_state=screen_state,
+                target_spec=target_spec,
+                edge=edge,
+                execution_result=execution_result,
+                extra=extra,
+                ui_elements=ui_elements,
+            )
+        except Exception as e:
+            logger.debug(f"FailureCaseRecorder failed: {e}")
+            return None
+
+    def record_matching_failure(
+        self,
+        *,
+        reason: str,
+        screen_state: Any,
+        best_confidence: float,
+        threshold: float,
+        candidate_nodes: Optional[List[Any]] = None,
+        extra: Optional[Dict[str, Any]] = None,
+        ui_elements: Optional[List[Any]] = None,
+    ) -> Optional[Path]:
+        """Enregistrer un failure case pour un échec de matching (node)."""
+        payload_extra = {
+            "best_confidence": float(best_confidence),
+            "threshold": float(threshold),
+            "candidate_nodes": [
+                {
+                    "node_id": getattr(n, "node_id", getattr(n, "id", "")),
+                    "name": getattr(n, "name", getattr(n, "label", "")),
+                }
+                for n in (candidate_nodes or [])
+            ],
+        }
+        if extra:
+            payload_extra.update(extra)
+
+        return self.record_action_failure(
+            failure_type="MATCHING_FAILED",
+            reason=reason,
+            screen_state=screen_state,
+            target_spec=None,
+            edge=None,
+            execution_result=None,
+            extra=payload_extra,
+            ui_elements=ui_elements,
+        )
+
+    # ---------------------------------------------------------------------
+    # Impl
+    # ---------------------------------------------------------------------
+
+    def _record_case(
+        self,
+        *,
+        failure_type: str,
+        reason: str,
+        screen_state: Any,
+        target_spec: Optional[Any],
+        edge: Optional[Any],
+        execution_result: Optional[Any],
+        extra: Optional[Dict[str, Any]],
+        ui_elements: Optional[List[Any]],
+    ) -> Path:
+        now = datetime.now()
+        day_dir = self.base_dir / now.strftime("%Y-%m-%d")
+        day_dir.mkdir(parents=True, exist_ok=True)
+
+        # UI elements
+        elems = ui_elements if ui_elements is not None else _extract_ui_elements(screen_state)
+
+        # Screen signature (si module dispo)
+        sig = ""
+        try:
+            from core.execution.screen_signature import screen_signature
+
+            sig = screen_signature(screen_state, elems, mode="hybrid")
+        except Exception:
+            sig = ""
+
+        sig8 = sig[:8] if sig else "nosig"
+        case_id = f"case_{now.strftime('%Y%m%d_%H%M%S')}_{sig8}"
+        case_dir = day_dir / case_id
+        case_dir.mkdir(parents=True, exist_ok=True)
+
+        # Screenshot (copie locale)
+        screenshot_src = _extract_screenshot_path(screen_state)
+        screenshot_dst = None
+        if screenshot_src and screenshot_src.exists():
+            try:
+                screenshot_dst = case_dir / "screenshot.png"
+                shutil.copy2(screenshot_src, screenshot_dst)
+            except Exception as e:
+                logger.debug(f"Failed to copy screenshot: {e}")
+                screenshot_dst = None
+
+        # Dump principaux
+        # ScreenState: privilégier to_json() si dispo (ScreenState v3)
+        if hasattr(screen_state, "to_json") and callable(getattr(screen_state, "to_json")):
+            try:
+                screen_payload = screen_state.to_json()
+            except Exception:
+                screen_payload = _safe_jsonable(screen_state)
+        else:
+            screen_payload = _safe_jsonable(screen_state)
+        _write_json(case_dir / "screen_state.json", screen_payload)
+
+        if target_spec is not None:
+            # TargetSpec v3 a to_dict()
+            if hasattr(target_spec, "to_dict") and callable(getattr(target_spec, "to_dict")):
+                try:
+                    ts_payload = target_spec.to_dict()
+                except Exception:
+                    ts_payload = _safe_jsonable(target_spec)
+            else:
+                ts_payload = _safe_jsonable(target_spec)
+            _write_json(case_dir / "target_spec.json", ts_payload)
+
+        if edge is not None:
+            if hasattr(edge, "to_dict") and callable(getattr(edge, "to_dict")):
+                try:
+                    edge_payload = edge.to_dict()
+                except Exception:
+                    edge_payload = _safe_jsonable(edge)
+            else:
+                edge_payload = _safe_jsonable(edge)
+            _write_json(case_dir / "edge.json", edge_payload)
+
+        if execution_result is not None:
+            if hasattr(execution_result, "to_dict") and callable(getattr(execution_result, "to_dict")):
+                try:
+                    er_payload = execution_result.to_dict()
+                except Exception:
+                    er_payload = _safe_jsonable(execution_result)
+            else:
+                er_payload = _safe_jsonable(execution_result)
+            _write_json(case_dir / "execution_result.json", er_payload)
+
+        if elems:
+            elems_payload = []
+            for e in elems:
+                if hasattr(e, "to_dict") and callable(getattr(e, "to_dict")):
+                    try:
+                        elems_payload.append(e.to_dict())
+                        continue
+                    except Exception:
+                        pass
+                elems_payload.append(_safe_jsonable(e))
+            _write_json(case_dir / "ui_elements.json", elems_payload)
+
+        # failure.json (métadonnées)
+        failure_payload: Dict[str, Any] = {
+            "schema_version": "failure_case_v1",
+            "case_id": case_id,
+            "created_at": now.isoformat(),
+            "failure_type": failure_type,
+            "reason": reason,
+            "screen_signature": sig,
+            "screenshot_file": str(screenshot_dst) if screenshot_dst else "",
+            "files": {
+                "screen_state": "screen_state.json",
+                "target_spec": "target_spec.json" if target_spec is not None else "",
+                "edge": "edge.json" if edge is not None else "",
+                "execution_result": "execution_result.json" if execution_result is not None else "",
+                "ui_elements": "ui_elements.json" if elems else "",
+            },
+        }
+
+        failure_payload.update(_extract_ids(screen_state))
+        failure_payload.update(_extract_window_info(screen_state))
+        if extra:
+            failure_payload["extra"] = _safe_jsonable(extra)
+
+        _write_json(case_dir / "failure.json", failure_payload)
+
+        logger.info(f"Failure case captured -> {case_dir}")
+        return case_dir
--- a/core/evaluation/replay_simulation.py
+++ b/core/evaluation/replay_simulation.py
@@ -0,0 +1,930 @@
+"""
+Replay Simulation Report - Fiche #16
+
+Système de test "dry-run" pour évaluer les règles de résolution de cibles
+sans interaction UI réelle. Charge des cas de test depuis tests/dataset/**/
+et génère des rapports de performance avec scores de risque.
+
+Auteur : Dom, Alice Kiro - 22 décembre 2025
+"""
+
+import json
+import logging
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Dict, List, Optional, Any, Tuple
+import numpy as np
+from datetime import datetime
+
+from ..models.screen_state import ScreenState
+from ..models.ui_element import UIElement
+from ..models.workflow_graph import TargetSpec
+from ..execution.target_resolver import TargetResolver
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class TestCase:
+    """Cas de test pour replay simulation"""
+    case_id: str
+    dataset_path: Path
+    screen_state: ScreenState
+    target_spec: TargetSpec
+    expected_element_id: str
+    expected_confidence: float
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class RiskMetrics:
+    """Métriques de risque pour une résolution"""
+    ambiguity_score: float  # 0.0 = non ambigu, 1.0 = très ambigu
+    confidence_score: float  # Confiance du resolver
+    margin_top1_top2: float  # Marge entre top1 et top2
+    element_count: int  # Nombre d'éléments candidats
+    resolution_time_ms: float  # Temps de résolution
+    
+    @property
+    def overall_risk(self) -> float:
+        """Score de risque global (0.0 = faible risque, 1.0 = risque élevé)"""
+        # Pondération des facteurs de risque
+        risk = (
+            0.4 * self.ambiguity_score +  # Ambiguïté = facteur principal
+            0.3 * (1.0 - self.confidence_score) +  # Faible confiance = risque
+            0.2 * (1.0 - min(self.margin_top1_top2, 1.0)) +  # Faible marge = risque
+            0.1 * min(self.resolution_time_ms / 1000.0, 1.0)  # Temps élevé = risque
+        )
+        return min(max(risk, 0.0), 1.0)
+
+
+@dataclass
+class SimulationResult:
+    """Résultat d'une simulation de cas de test"""
+    case_id: str
+    success: bool
+    resolved_element_id: Optional[str]
+    expected_element_id: str
+    risk_metrics: RiskMetrics
+    strategy_used: str
+    error_message: Optional[str] = None
+    alternatives: List[Dict[str, Any]] = field(default_factory=list)
+    
+    @property
+    def is_correct(self) -> bool:
+        """Vérifie si la résolution est correcte"""
+        return self.success and self.resolved_element_id == self.expected_element_id
+
+
+@dataclass
+class ReplayReport:
+    """Rapport complet de replay simulation"""
+    timestamp: datetime
+    total_cases: int
+    successful_cases: int
+    correct_cases: int
+    failed_cases: int
+    results: List[SimulationResult]
+    performance_stats: Dict[str, float]
+    risk_analysis: Dict[str, Any]
+    
+    @property
+    def success_rate(self) -> float:
+        """Taux de succès (résolution trouvée)"""
+        return self.successful_cases / max(1, self.total_cases)
+    
+    @property
+    def accuracy_rate(self) -> float:
+        """Taux de précision (résolution correcte)"""
+        return self.correct_cases / max(1, self.total_cases)
+    
+    @property
+    def average_risk(self) -> float:
+        """Score de risque moyen"""
+        if not self.results:
+            return 0.0
+        risks = [r.risk_metrics.overall_risk for r in self.results if r.success]
+        return sum(risks) / max(1, len(risks))
+
+
+class ReplaySimulation:
+    """
+    Simulateur de replay pour tests headless des règles de résolution.
+    
+    Fonctionnalités:
+    - Chargement de datasets de test depuis tests/dataset/**/
+    - Évaluation avec TargetResolver réel et règles des fiches #8-#14
+    - Calcul de scores de risque (ambiguïté, confiance, marge)
+    - Génération de rapports JSON et Markdown
+    - 100% headless, parfait pour itération rapide
+    """
+    
+    def __init__(
+        self,
+        target_resolver: Optional[TargetResolver] = None,
+        dataset_root: Path = None
+    ):
+        """
+        Initialiser le simulateur.
+        
+        Args:
+            target_resolver: Resolver à utiliser (créé par défaut si None)
+            dataset_root: Racine des datasets (tests/dataset par défaut)
+        """
+        self.target_resolver = target_resolver or TargetResolver()
+        self.dataset_root = dataset_root or Path("tests/dataset")
+        
+        # Stats de performance
+        self.stats = {
+            "cases_loaded": 0,
+            "cases_processed": 0,
+            "total_load_time_ms": 0.0,
+            "total_resolution_time_ms": 0.0
+        }
+        
+        logger.info(f"ReplaySimulation initialized with dataset root: {self.dataset_root}")
+    
+    def load_test_cases(
+        self,
+        dataset_pattern: str = "**",
+        max_cases: Optional[int] = None
+    ) -> List[TestCase]:
+        """
+        Charger les cas de test depuis le dataset.
+        
+        Format attendu par répertoire:
+        - screen_state.json: ScreenState sérialisé
+        - target_spec.json: TargetSpec sérialisé  
+        - expected.json: {"element_id": "...", "confidence": 0.95}
+        
+        Args:
+            dataset_pattern: Pattern de recherche (ex: "form_*", "**")
+            max_cases: Limite du nombre de cas (None = tous)
+            
+        Returns:
+            Liste des cas de test chargés
+        """
+        start_time = time.perf_counter()
+        test_cases = []
+        
+        # Rechercher tous les répertoires correspondant au pattern
+        search_path = self.dataset_root / dataset_pattern
+        case_dirs = []
+        
+        if search_path.is_dir():
+            case_dirs = [search_path]
+        else:
+            # Recherche avec glob pattern
+            case_dirs = list(self.dataset_root.glob(dataset_pattern))
+            case_dirs = [d for d in case_dirs if d.is_dir()]
+        
+        logger.info(f"Found {len(case_dirs)} potential test case directories")
+        
+        for case_dir in case_dirs:
+            if max_cases and len(test_cases) >= max_cases:
+                break
+                
+            try:
+                test_case = self._load_single_test_case(case_dir)
+                if test_case:
+                    test_cases.append(test_case)
+                    self.stats["cases_loaded"] += 1
+            except Exception as e:
+                logger.warning(f"Failed to load test case from {case_dir}: {e}")
+        
+        load_time = (time.perf_counter() - start_time) * 1000
+        self.stats["total_load_time_ms"] += load_time
+        
+        logger.info(f"Loaded {len(test_cases)} test cases in {load_time:.1f}ms")
+        return test_cases
+    
+    def _load_single_test_case(self, case_dir: Path) -> Optional[TestCase]:
+        """
+        Charger un cas de test depuis un répertoire.
+        
+        Args:
+            case_dir: Répertoire contenant les fichiers du cas de test
+            
+        Returns:
+            TestCase chargé ou None si erreur
+        """
+        required_files = ["screen_state.json", "target_spec.json", "expected.json"]
+        
+        # Vérifier que tous les fichiers requis existent
+        for filename in required_files:
+            if not (case_dir / filename).exists():
+                logger.debug(f"Missing required file {filename} in {case_dir}")
+                return None
+        
+        try:
+            # Charger screen_state
+            with open(case_dir / "screen_state.json", 'r', encoding='utf-8') as f:
+                screen_state_data = json.load(f)
+            screen_state = ScreenState.from_json(screen_state_data)
+            
+            # Charger target_spec
+            with open(case_dir / "target_spec.json", 'r', encoding='utf-8') as f:
+                target_spec_data = json.load(f)
+            target_spec = TargetSpec.from_dict(target_spec_data)
+            
+            # Charger expected
+            with open(case_dir / "expected.json", 'r', encoding='utf-8') as f:
+                expected_data = json.load(f)
+            
+            # Métadonnées optionnelles
+            metadata = {}
+            metadata_file = case_dir / "metadata.json"
+            if metadata_file.exists():
+                with open(metadata_file, 'r', encoding='utf-8') as f:
+                    metadata = json.load(f)
+            
+            return TestCase(
+                case_id=case_dir.name,
+                dataset_path=case_dir,
+                screen_state=screen_state,
+                target_spec=target_spec,
+                expected_element_id=expected_data["element_id"],
+                expected_confidence=expected_data.get("confidence", 0.95),
+                metadata=metadata
+            )
+            
+        except Exception as e:
+            logger.error(f"Error loading test case from {case_dir}: {e}")
+            return None
+    
+    def run_simulation(
+        self,
+        test_cases: List[TestCase],
+        include_alternatives: bool = True
+    ) -> ReplayReport:
+        """
+        Exécuter la simulation sur une liste de cas de test.
+        
+        Args:
+            test_cases: Cas de test à évaluer
+            include_alternatives: Inclure les alternatives dans les résultats
+            
+        Returns:
+            Rapport complet de simulation
+        """
+        start_time = time.perf_counter()
+        results = []
+        
+        logger.info(f"Starting replay simulation on {len(test_cases)} test cases")
+        
+        for i, test_case in enumerate(test_cases):
+            if i % 10 == 0:
+                logger.info(f"Processing test case {i+1}/{len(test_cases)}")
+            
+            try:
+                result = self._simulate_single_case(test_case, include_alternatives)
+                results.append(result)
+                self.stats["cases_processed"] += 1
+            except Exception as e:
+                logger.error(f"Error simulating case {test_case.case_id}: {e}")
+                # Créer un résultat d'erreur
+                error_result = SimulationResult(
+                    case_id=test_case.case_id,
+                    success=False,
+                    resolved_element_id=None,
+                    expected_element_id=test_case.expected_element_id,
+                    risk_metrics=RiskMetrics(
+                        ambiguity_score=1.0,
+                        confidence_score=0.0,
+                        margin_top1_top2=0.0,
+                        element_count=0,
+                        resolution_time_ms=0.0
+                    ),
+                    strategy_used="ERROR",
+                    error_message=str(e)
+                )
+                results.append(error_result)
+        
+        # Calculer les statistiques globales
+        total_time = (time.perf_counter() - start_time) * 1000
+        successful_cases = sum(1 for r in results if r.success)
+        correct_cases = sum(1 for r in results if r.is_correct)
+        failed_cases = len(results) - successful_cases
+        
+        # Statistiques de performance
+        resolution_times = [r.risk_metrics.resolution_time_ms for r in results if r.success]
+        performance_stats = {
+            "total_simulation_time_ms": total_time,
+            "avg_resolution_time_ms": sum(resolution_times) / max(1, len(resolution_times)),
+            "min_resolution_time_ms": min(resolution_times) if resolution_times else 0.0,
+            "max_resolution_time_ms": max(resolution_times) if resolution_times else 0.0,
+            "cases_per_second": len(test_cases) / max(0.001, total_time / 1000)
+        }
+        
+        # Analyse des risques
+        risk_scores = [r.risk_metrics.overall_risk for r in results if r.success]
+        risk_analysis = {
+            "average_risk": sum(risk_scores) / max(1, len(risk_scores)),
+            "high_risk_cases": sum(1 for r in risk_scores if r > 0.7),
+            "medium_risk_cases": sum(1 for r in risk_scores if 0.3 <= r <= 0.7),
+            "low_risk_cases": sum(1 for r in risk_scores if r < 0.3),
+            "risk_distribution": self._calculate_risk_distribution(risk_scores)
+        }
+        
+        report = ReplayReport(
+            timestamp=datetime.now(),
+            total_cases=len(test_cases),
+            successful_cases=successful_cases,
+            correct_cases=correct_cases,
+            failed_cases=failed_cases,
+            results=results,
+            performance_stats=performance_stats,
+            risk_analysis=risk_analysis
+        )
+        
+        logger.info(f"Simulation completed: {successful_cases}/{len(test_cases)} successful, "
+                   f"{correct_cases}/{len(test_cases)} correct, avg risk: {report.average_risk:.3f}")
+        
+        return report
+    
+    def _simulate_single_case(
+        self,
+        test_case: TestCase,
+        include_alternatives: bool
+    ) -> SimulationResult:
+        """
+        Simuler un cas de test unique.
+        
+        Args:
+            test_case: Cas de test à évaluer
+            include_alternatives: Inclure les alternatives
+            
+        Returns:
+            Résultat de simulation pour ce cas
+        """
+        start_time = time.perf_counter()
+        
+        try:
+            # Résoudre la cible avec le TargetResolver réel
+            resolved_target = self.target_resolver.resolve_target(
+                target_spec=test_case.target_spec,
+                screen_state=test_case.screen_state
+            )
+            
+            resolution_time = (time.perf_counter() - start_time) * 1000
+            self.stats["total_resolution_time_ms"] += resolution_time
+            
+            if resolved_target is None:
+                # Échec de résolution
+                return SimulationResult(
+                    case_id=test_case.case_id,
+                    success=False,
+                    resolved_element_id=None,
+                    expected_element_id=test_case.expected_element_id,
+                    risk_metrics=RiskMetrics(
+                        ambiguity_score=1.0,
+                        confidence_score=0.0,
+                        margin_top1_top2=0.0,
+                        element_count=len(test_case.screen_state.ui_elements),
+                        resolution_time_ms=resolution_time
+                    ),
+                    strategy_used="FAILED"
+                )
+            
+            # Calculer les métriques de risque
+            risk_metrics = self._calculate_risk_metrics(
+                resolved_target,
+                test_case.screen_state.ui_elements,
+                resolution_time
+            )
+            
+            # Préparer les alternatives si demandées
+            alternatives = []
+            if include_alternatives and resolved_target.alternatives:
+                alternatives = [
+                    {
+                        "element_id": alt.element.element_id,
+                        "confidence": alt.confidence,
+                        "strategy": alt.strategy_used
+                    }
+                    for alt in resolved_target.alternatives[:3]  # Top 3
+                ]
+            
+            return SimulationResult(
+                case_id=test_case.case_id,
+                success=True,
+                resolved_element_id=resolved_target.element.element_id,
+                expected_element_id=test_case.expected_element_id,
+                risk_metrics=risk_metrics,
+                strategy_used=resolved_target.strategy_used,
+                alternatives=alternatives
+            )
+            
+        except Exception as e:
+            resolution_time = (time.perf_counter() - start_time) * 1000
+            return SimulationResult(
+                case_id=test_case.case_id,
+                success=False,
+                resolved_element_id=None,
+                expected_element_id=test_case.expected_element_id,
+                risk_metrics=RiskMetrics(
+                    ambiguity_score=1.0,
+                    confidence_score=0.0,
+                    margin_top1_top2=0.0,
+                    element_count=0,
+                    resolution_time_ms=resolution_time
+                ),
+                strategy_used="ERROR",
+                error_message=str(e)
+            )
+    
+    def _calculate_risk_metrics(
+        self,
+        resolved_target,
+        ui_elements: List[UIElement],
+        resolution_time_ms: float
+    ) -> RiskMetrics:
+        """
+        Calculer les métriques de risque pour une résolution.
+        
+        Args:
+            resolved_target: Résultat de résolution
+            ui_elements: Tous les éléments UI disponibles
+            resolution_time_ms: Temps de résolution
+            
+        Returns:
+            Métriques de risque calculées
+        """
+        # Score d'ambiguïté basé sur le nombre d'éléments similaires
+        similar_elements = self._count_similar_elements(
+            resolved_target.element,
+            ui_elements
+        )
+        ambiguity_score = min(similar_elements / 10.0, 1.0)  # Normaliser sur 10 éléments max
+        
+        # Score de confiance du resolver
+        confidence_score = resolved_target.confidence
+        
+        # Marge entre top1 et top2
+        margin_top1_top2 = 0.0
+        if resolved_target.alternatives and len(resolved_target.alternatives) > 0:
+            top2_confidence = resolved_target.alternatives[0].confidence
+            margin_top1_top2 = max(0.0, confidence_score - top2_confidence)
+        else:
+            margin_top1_top2 = confidence_score  # Pas d'alternative = marge maximale
+        
+        return RiskMetrics(
+            ambiguity_score=ambiguity_score,
+            confidence_score=confidence_score,
+            margin_top1_top2=margin_top1_top2,
+            element_count=len(ui_elements),
+            resolution_time_ms=resolution_time_ms
+        )
+    
+    def _count_similar_elements(
+        self,
+        target_element: UIElement,
+        ui_elements: List[UIElement]
+    ) -> int:
+        """
+        Compter les éléments similaires au target (même rôle/type).
+        
+        Args:
+            target_element: Élément cible résolu
+            ui_elements: Tous les éléments UI
+            
+        Returns:
+            Nombre d'éléments similaires
+        """
+        target_role = (getattr(target_element, 'role', '') or '').lower()
+        target_type = (getattr(target_element, 'type', '') or '').lower()
+        
+        similar_count = 0
+        for elem in ui_elements:
+            if elem.element_id == target_element.element_id:
+                continue  # Ignorer l'élément lui-même
+                
+            elem_role = (getattr(elem, 'role', '') or '').lower()
+            elem_type = (getattr(elem, 'type', '') or '').lower()
+            
+            if elem_role == target_role or elem_type == target_type:
+                similar_count += 1
+        
+        return similar_count
+    
+    def _calculate_risk_distribution(self, risk_scores: List[float]) -> Dict[str, int]:
+        """
+        Calculer la distribution des scores de risque par tranches.
+        
+        Args:
+            risk_scores: Liste des scores de risque
+            
+        Returns:
+            Distribution par tranches
+        """
+        if not risk_scores:
+            return {}
+        
+        distribution = {
+            "0.0-0.1": 0,
+            "0.1-0.2": 0,
+            "0.2-0.3": 0,
+            "0.3-0.4": 0,
+            "0.4-0.5": 0,
+            "0.5-0.6": 0,
+            "0.6-0.7": 0,
+            "0.7-0.8": 0,
+            "0.8-0.9": 0,
+            "0.9-1.0": 0
+        }
+        
+        for score in risk_scores:
+            if score < 0.1:
+                distribution["0.0-0.1"] += 1
+            elif score < 0.2:
+                distribution["0.1-0.2"] += 1
+            elif score < 0.3:
+                distribution["0.2-0.3"] += 1
+            elif score < 0.4:
+                distribution["0.3-0.4"] += 1
+            elif score < 0.5:
+                distribution["0.4-0.5"] += 1
+            elif score < 0.6:
+                distribution["0.5-0.6"] += 1
+            elif score < 0.7:
+                distribution["0.6-0.7"] += 1
+            elif score < 0.8:
+                distribution["0.7-0.8"] += 1
+            elif score < 0.9:
+                distribution["0.8-0.9"] += 1
+            else:
+                distribution["0.9-1.0"] += 1
+        
+        return distribution
+    
+    def export_json_report(
+        self,
+        report: ReplayReport,
+        output_path: Path
+    ) -> None:
+        """
+        Exporter le rapport au format JSON machine-friendly.
+        
+        Args:
+            report: Rapport à exporter
+            output_path: Chemin de sortie
+        """
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        
+        # Sérialiser le rapport
+        report_data = {
+            "metadata": {
+                "timestamp": report.timestamp.isoformat(),
+                "total_cases": report.total_cases,
+                "successful_cases": report.successful_cases,
+                "correct_cases": report.correct_cases,
+                "failed_cases": report.failed_cases,
+                "success_rate": report.success_rate,
+                "accuracy_rate": report.accuracy_rate,
+                "average_risk": report.average_risk
+            },
+            "performance_stats": report.performance_stats,
+            "risk_analysis": report.risk_analysis,
+            "results": [
+                {
+                    "case_id": r.case_id,
+                    "success": r.success,
+                    "is_correct": r.is_correct,
+                    "resolved_element_id": r.resolved_element_id,
+                    "expected_element_id": r.expected_element_id,
+                    "strategy_used": r.strategy_used,
+                    "error_message": r.error_message,
+                    "risk_metrics": {
+                        "ambiguity_score": r.risk_metrics.ambiguity_score,
+                        "confidence_score": r.risk_metrics.confidence_score,
+                        "margin_top1_top2": r.risk_metrics.margin_top1_top2,
+                        "element_count": r.risk_metrics.element_count,
+                        "resolution_time_ms": r.risk_metrics.resolution_time_ms,
+                        "overall_risk": r.risk_metrics.overall_risk
+                    },
+                    "alternatives": r.alternatives
+                }
+                for r in report.results
+            ]
+        }
+        
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(report_data, f, indent=2, ensure_ascii=False)
+        
+        logger.info(f"JSON report exported to {output_path}")
+    
+    def export_markdown_report(
+        self,
+        report: ReplayReport,
+        output_path: Path
+    ) -> None:
+        """
+        Exporter le rapport au format Markdown human-friendly.
+        
+        Args:
+            report: Rapport à exporter
+            output_path: Chemin de sortie
+        """
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        
+        # Générer le contenu Markdown
+        md_content = self._generate_markdown_content(report)
+        
+        with open(output_path, 'w', encoding='utf-8') as f:
+            f.write(md_content)
+        
+        logger.info(f"Markdown report exported to {output_path}")
+    
+    def _generate_markdown_content(self, report: ReplayReport) -> str:
+        """
+        Générer le contenu Markdown du rapport.
+        
+        Args:
+            report: Rapport à convertir
+            
+        Returns:
+            Contenu Markdown formaté
+        """
+        md_lines = [
+            "# Replay Simulation Report",
+            "",
+            f"**Généré le :** {report.timestamp.strftime('%Y-%m-%d %H:%M:%S')}",
+            f"**Auteur :** Dom, Alice Kiro",
+            "",
+            "## Résumé Exécutif",
+            "",
+            f"- **Cas de test traités :** {report.total_cases}",
+            f"- **Résolutions réussies :** {report.successful_cases} ({report.success_rate:.1%})",
+            f"- **Résolutions correctes :** {report.correct_cases} ({report.accuracy_rate:.1%})",
+            f"- **Échecs :** {report.failed_cases}",
+            f"- **Score de risque moyen :** {report.average_risk:.3f}",
+            "",
+            "## Performance",
+            "",
+            f"- **Temps total :** {report.performance_stats['total_simulation_time_ms']:.1f}ms",
+            f"- **Temps moyen par résolution :** {report.performance_stats['avg_resolution_time_ms']:.1f}ms",
+            f"- **Débit :** {report.performance_stats['cases_per_second']:.1f} cas/seconde",
+            f"- **Temps min/max :** {report.performance_stats['min_resolution_time_ms']:.1f}ms / {report.performance_stats['max_resolution_time_ms']:.1f}ms",
+            "",
+            "## Analyse des Risques",
+            "",
+            f"- **Cas à risque élevé (>0.7) :** {report.risk_analysis['high_risk_cases']}",
+            f"- **Cas à risque moyen (0.3-0.7) :** {report.risk_analysis['medium_risk_cases']}",
+            f"- **Cas à faible risque (<0.3) :** {report.risk_analysis['low_risk_cases']}",
+            "",
+            "### Distribution des Risques",
+            "",
+            "| Tranche | Nombre de cas |",
+            "|---------|---------------|"
+        ]
+        
+        # Ajouter la distribution des risques
+        for tranche, count in report.risk_analysis['risk_distribution'].items():
+            md_lines.append(f"| {tranche} | {count} |")
+        
+        md_lines.extend([
+            "",
+            "## Détails par Stratégie",
+            "",
+            "| Stratégie | Cas | Succès | Précision |",
+            "|-----------|-----|--------|-----------|"
+        ])
+        
+        # Analyser par stratégie
+        strategy_stats = {}
+        for result in report.results:
+            strategy = result.strategy_used
+            if strategy not in strategy_stats:
+                strategy_stats[strategy] = {"total": 0, "success": 0, "correct": 0}
+            
+            strategy_stats[strategy]["total"] += 1
+            if result.success:
+                strategy_stats[strategy]["success"] += 1
+            if result.is_correct:
+                strategy_stats[strategy]["correct"] += 1
+        
+        for strategy, stats in strategy_stats.items():
+            success_rate = stats["success"] / max(1, stats["total"])
+            accuracy_rate = stats["correct"] / max(1, stats["total"])
+            md_lines.append(f"| {strategy} | {stats['total']} | {success_rate:.1%} | {accuracy_rate:.1%} |")
+        
+        md_lines.extend([
+            "",
+            "## Cas Problématiques (Risque > 0.7)",
+            ""
+        ])
+        
+        # Lister les cas à risque élevé
+        high_risk_cases = [r for r in report.results if r.success and r.risk_metrics.overall_risk > 0.7]
+        high_risk_cases.sort(key=lambda x: x.risk_metrics.overall_risk, reverse=True)
+        
+        if high_risk_cases:
+            md_lines.extend([
+                "| Cas | Risque | Confiance | Ambiguïté | Marge | Temps |",
+                "|-----|--------|-----------|-----------|-------|-------|"
+            ])
+            
+            for case in high_risk_cases[:10]:  # Top 10
+                md_lines.append(
+                    f"| {case.case_id} | {case.risk_metrics.overall_risk:.3f} | "
+                    f"{case.risk_metrics.confidence_score:.3f} | "
+                    f"{case.risk_metrics.ambiguity_score:.3f} | "
+                    f"{case.risk_metrics.margin_top1_top2:.3f} | "
+                    f"{case.risk_metrics.resolution_time_ms:.1f}ms |"
+                )
+        else:
+            md_lines.append("*Aucun cas à risque élevé détecté.*")
+        
+        md_lines.extend([
+            "",
+            "## Échecs de Résolution",
+            ""
+        ])
+        
+        # Lister les échecs
+        failed_cases = [r for r in report.results if not r.success]
+        if failed_cases:
+            md_lines.extend([
+                "| Cas | Erreur |",
+                "|-----|--------|"
+            ])
+            
+            for case in failed_cases[:10]:  # Top 10
+                error_msg = case.error_message or "Aucune résolution trouvée"
+                md_lines.append(f"| {case.case_id} | {error_msg} |")
+        else:
+            md_lines.append("*Aucun échec de résolution.*")
+        
+        md_lines.extend([
+            "",
+            "## Recommandations",
+            "",
+            self._generate_recommendations(report),
+            "",
+            "---",
+            f"*Rapport généré par RPA Vision V3 - Replay Simulation Engine*"
+        ])
+        
+        return "\n".join(md_lines)
+    
+    def _generate_recommendations(self, report: ReplayReport) -> str:
+        """
+        Générer des recommandations basées sur l'analyse du rapport.
+        
+        Args:
+            report: Rapport analysé
+            
+        Returns:
+            Recommandations formatées en Markdown
+        """
+        recommendations = []
+        
+        # Analyse du taux de succès
+        if report.success_rate < 0.8:
+            recommendations.append(
+                "⚠️ **Taux de succès faible** : Considérer l'amélioration des stratégies de fallback"
+            )
+        
+        # Analyse du taux de précision
+        if report.accuracy_rate < 0.9:
+            recommendations.append(
+                "⚠️ **Précision insuffisante** : Revoir les critères de scoring et les seuils de confiance"
+            )
+        
+        # Analyse des risques
+        if report.average_risk > 0.5:
+            recommendations.append(
+                "⚠️ **Risque élevé** : Améliorer la désambiguïsation et les marges de confiance"
+            )
+        
+        # Analyse des performances
+        avg_time = report.performance_stats['avg_resolution_time_ms']
+        if avg_time > 100:
+            recommendations.append(
+                f"⚠️ **Performance** : Temps de résolution élevé ({avg_time:.1f}ms), optimiser les algorithmes"
+            )
+        
+        # Analyse des stratégies
+        strategy_stats = {}
+        for result in report.results:
+            strategy = result.strategy_used
+            if strategy not in strategy_stats:
+                strategy_stats[strategy] = {"total": 0, "correct": 0}
+            strategy_stats[strategy]["total"] += 1
+            if result.is_correct:
+                strategy_stats[strategy]["correct"] += 1
+        
+        for strategy, stats in strategy_stats.items():
+            accuracy = stats["correct"] / max(1, stats["total"])
+            if accuracy < 0.8 and stats["total"] > 5:
+                recommendations.append(
+                    f"⚠️ **Stratégie {strategy}** : Précision faible ({accuracy:.1%}), revoir l'implémentation"
+                )
+        
+        if not recommendations:
+            recommendations.append("✅ **Excellent** : Toutes les métriques sont dans les objectifs")
+        
+        return "\n".join(f"- {rec}" for rec in recommendations)
+
+
+def create_replay_simulation_cli():
+    """
+    Créer une interface CLI pour le replay simulation.
+    
+    Returns:
+        Fonction CLI configurée
+    """
+    import argparse
+    
+    def cli_main():
+        parser = argparse.ArgumentParser(
+            description="Replay Simulation Report - Test headless des règles de résolution"
+        )
+        parser.add_argument(
+            "--dataset",
+            type=str,
+            default="**",
+            help="Pattern de dataset à charger (ex: 'form_*', '**')"
+        )
+        parser.add_argument(
+            "--max-cases",
+            type=int,
+            help="Nombre maximum de cas à traiter"
+        )
+        parser.add_argument(
+            "--out-json",
+            type=str,
+            default="replay_report.json",
+            help="Fichier de sortie JSON"
+        )
+        parser.add_argument(
+            "--out-md",
+            type=str,
+            default="replay_report.md",
+            help="Fichier de sortie Markdown"
+        )
+        parser.add_argument(
+            "--dataset-root",
+            type=str,
+            default="tests/dataset",
+            help="Racine des datasets de test"
+        )
+        parser.add_argument(
+            "--verbose",
+            action="store_true",
+            help="Mode verbose"
+        )
+        
+        args = parser.parse_args()
+        
+        # Configuration du logging
+        level = logging.DEBUG if args.verbose else logging.INFO
+        logging.basicConfig(level=level, format='%(asctime)s - %(levelname)s - %(message)s')
+        
+        # Créer le simulateur
+        simulator = ReplaySimulation(dataset_root=Path(args.dataset_root))
+        
+        # Charger les cas de test
+        print(f"Chargement des cas de test depuis {args.dataset_root} (pattern: {args.dataset})")
+        test_cases = simulator.load_test_cases(args.dataset, args.max_cases)
+        
+        if not test_cases:
+            print("❌ Aucun cas de test trouvé")
+            return 1
+        
+        print(f"✅ {len(test_cases)} cas de test chargés")
+        
+        # Exécuter la simulation
+        print("🚀 Démarrage de la simulation...")
+        report = simulator.run_simulation(test_cases)
+        
+        # Exporter les rapports
+        json_path = Path(args.out_json)
+        md_path = Path(args.out_md)
+        
+        simulator.export_json_report(report, json_path)
+        simulator.export_markdown_report(report, md_path)
+        
+        # Afficher le résumé
+        print("\n" + "="*60)
+        print("📊 RÉSUMÉ DE SIMULATION")
+        print("="*60)
+        print(f"Cas traités     : {report.total_cases}")
+        print(f"Succès          : {report.successful_cases} ({report.success_rate:.1%})")
+        print(f"Précision       : {report.correct_cases} ({report.accuracy_rate:.1%})")
+        print(f"Risque moyen    : {report.average_risk:.3f}")
+        print(f"Temps total     : {report.performance_stats['total_simulation_time_ms']:.1f}ms")
+        print(f"Débit           : {report.performance_stats['cases_per_second']:.1f} cas/sec")
+        print("\n📄 Rapports générés :")
+        print(f"  - JSON : {json_path}")
+        print(f"  - Markdown : {md_path}")
+        
+        return 0
+    
+    return cli_main
+
+
+if __name__ == "__main__":
+    cli_main = create_replay_simulation_cli()
+    exit(cli_main())
--- a/core/evaluation/workflow_simulation_report.py
+++ b/core/evaluation/workflow_simulation_report.py
@@ -0,0 +1,877 @@
+"""
+Workflow Simulation Report - Fiche #16++
+
+Système de simulation complète de workflows pour tester la chaîne complète :
+Node Matching (FAISS) → Target Resolution → Post-conditions → Transition
+
+Utilise des "scenario packs" avec frames séquentielles pour simuler des workflows
+réalistes et générer des rapports de performance détaillés.
+
+Auteur : Dom, Alice Kiro - 22 décembre 2025
+"""
+
+import json
+import logging
+import time
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Dict, List, Optional, Any, Tuple, Union
+import numpy as np
+from datetime import datetime
+
+from ..models.screen_state import ScreenState
+from ..models.ui_element import UIElement
+from ..models.workflow_graph import Workflow, WorkflowNode, WorkflowEdge, TargetSpec, PostConditions, PostConditionCheck
+from ..graph.node_matcher import NodeMatcher
+from ..embedding.state_embedding_builder import StateEmbeddingBuilder
+from ..execution.target_resolver import TargetResolver
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ScenarioFrame:
+    """Frame individuelle dans un scénario de workflow"""
+    frame_id: str
+    step_number: int
+    screen_state: ScreenState
+    expected_node_id: Optional[str] = None  # Node attendu pour ce frame
+    expected_action: Optional[Dict[str, Any]] = None  # Action attendue
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class ScenarioPack:
+    """Pack de scénario complet avec frames séquentielles"""
+    scenario_id: str
+    name: str
+    description: str
+    workflow_id: str  # Workflow à tester
+    frames: List[ScenarioFrame]
+    expected_path: List[str]  # Séquence de node_ids attendue
+    metadata: Dict[str, Any] = field(default_factory=dict)
+    
+    @classmethod
+    def load_from_directory(cls, scenario_dir: Path) -> 'ScenarioPack':
+        """Charger un scenario pack depuis un répertoire"""
+        scenario_file = scenario_dir / "scenario.json"
+        if not scenario_file.exists():
+            raise FileNotFoundError(f"scenario.json not found in {scenario_dir}")
+        
+        with open(scenario_file, 'r', encoding='utf-8') as f:
+            scenario_data = json.load(f)
+        
+        # Charger les frames
+        frames = []
+        for step_data in scenario_data.get("steps", []):
+            step_file = scenario_dir / f"step_{step_data['step_number']:03d}.json"
+            if not step_file.exists():
+                logger.warning(f"Step file not found: {step_file}")
+                continue
+            
+            with open(step_file, 'r', encoding='utf-8') as f:
+                step_content = json.load(f)
+            
+            # Reconstruire ScreenState depuis JSON
+            screen_state = ScreenState.from_dict(step_content["screen_state"])
+            
+            frame = ScenarioFrame(
+                frame_id=f"{scenario_data['scenario_id']}_step_{step_data['step_number']:03d}",
+                step_number=step_data["step_number"],
+                screen_state=screen_state,
+                expected_node_id=step_data.get("expected_node_id"),
+                expected_action=step_data.get("expected_action"),
+                metadata=step_data.get("metadata", {})
+            )
+            frames.append(frame)
+        
+        return cls(
+            scenario_id=scenario_data["scenario_id"],
+            name=scenario_data["name"],
+            description=scenario_data["description"],
+            workflow_id=scenario_data["workflow_id"],
+            frames=frames,
+            expected_path=scenario_data.get("expected_path", []),
+            metadata=scenario_data.get("metadata", {})
+        )
+
+
+@dataclass
+class NodeMatchingResult:
+    """Résultat du matching de node"""
+    frame_id: str
+    expected_node_id: Optional[str]
+    matched_node_id: Optional[str]
+    confidence: float
+    success: bool
+    strategy_used: str
+    error_message: Optional[str] = None
+    alternatives: List[Tuple[str, float]] = field(default_factory=list)  # (node_id, confidence)
+
+
+@dataclass
+class TargetResolutionResult:
+    """Résultat de la résolution de cible"""
+    frame_id: str
+    target_spec: Optional[TargetSpec]
+    resolved_element_id: Optional[str]
+    expected_element_id: Optional[str]
+    confidence: float
+    success: bool
+    strategy_used: str
+    resolution_time_ms: float
+    error_message: Optional[str] = None
+    alternatives: List[Dict[str, Any]] = field(default_factory=list)
+
+
+@dataclass
+class PostConditionResult:
+    """Résultat de vérification des post-conditions"""
+    frame_id: str
+    post_conditions: Optional[PostConditions]
+    checks_passed: int
+    checks_total: int
+    success: bool
+    timeout_occurred: bool
+    verification_time_ms: float
+    failed_checks: List[str] = field(default_factory=list)
+    error_message: Optional[str] = None
+
+
+@dataclass
+class TransitionResult:
+    """Résultat de transition vers le node suivant"""
+    from_frame_id: str
+    to_frame_id: str
+    expected_transition: bool
+    actual_transition: bool
+    success: bool
+    transition_confidence: float
+    error_message: Optional[str] = None
+
+
+@dataclass
+class WorkflowStepResult:
+    """Résultat complet d'une étape de workflow"""
+    frame_id: str
+    step_number: int
+    node_matching: NodeMatchingResult
+    target_resolution: Optional[TargetResolutionResult]
+    post_conditions: Optional[PostConditionResult]
+    transition: Optional[TransitionResult]
+    overall_success: bool
+    step_duration_ms: float
+    
+    @property
+    def success_components(self) -> Dict[str, bool]:
+        """Composants de succès pour analyse détaillée"""
+        return {
+            "node_matching": self.node_matching.success,
+            "target_resolution": self.target_resolution.success if self.target_resolution else True,
+            "post_conditions": self.post_conditions.success if self.post_conditions else True,
+            "transition": self.transition.success if self.transition else True
+        }
+
+
+@dataclass
+class WorkflowSimulationReport:
+    """Rapport complet de simulation de workflow"""
+    scenario_id: str
+    workflow_id: str
+    timestamp: datetime
+    total_steps: int
+    successful_steps: int
+    step_results: List[WorkflowStepResult]
+    
+    # Métriques globales
+    node_matching_accuracy: float
+    target_resolution_accuracy: float
+    post_condition_success_rate: float
+    transition_accuracy: float
+    
+    # Performance
+    total_simulation_time_ms: float
+    avg_step_time_ms: float
+    
+    # Analyse des erreurs
+    error_breakdown: Dict[str, int]
+    failure_points: List[str]
+    
+    # Recommandations
+    recommendations: List[str]
+    
+    @property
+    def overall_success_rate(self) -> float:
+        """Taux de succès global"""
+        return self.successful_steps / max(1, self.total_steps)
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Sérialiser en dictionnaire"""
+        return {
+            "scenario_id": self.scenario_id,
+            "workflow_id": self.workflow_id,
+            "timestamp": self.timestamp.isoformat(),
+            "total_steps": self.total_steps,
+            "successful_steps": self.successful_steps,
+            "step_results": [
+                {
+                    "frame_id": result.frame_id,
+                    "step_number": result.step_number,
+                    "overall_success": result.overall_success,
+                    "step_duration_ms": result.step_duration_ms,
+                    "success_components": result.success_components,
+                    "node_matching": {
+                        "expected_node_id": result.node_matching.expected_node_id,
+                        "matched_node_id": result.node_matching.matched_node_id,
+                        "confidence": result.node_matching.confidence,
+                        "success": result.node_matching.success,
+                        "strategy_used": result.node_matching.strategy_used,
+                        "error_message": result.node_matching.error_message
+                    },
+                    "target_resolution": {
+                        "resolved_element_id": result.target_resolution.resolved_element_id if result.target_resolution else None,
+                        "confidence": result.target_resolution.confidence if result.target_resolution else 0.0,
+                        "success": result.target_resolution.success if result.target_resolution else True,
+                        "strategy_used": result.target_resolution.strategy_used if result.target_resolution else "N/A",
+                        "resolution_time_ms": result.target_resolution.resolution_time_ms if result.target_resolution else 0.0
+                    } if result.target_resolution else None,
+                    "post_conditions": {
+                        "checks_passed": result.post_conditions.checks_passed if result.post_conditions else 0,
+                        "checks_total": result.post_conditions.checks_total if result.post_conditions else 0,
+                        "success": result.post_conditions.success if result.post_conditions else True,
+                        "verification_time_ms": result.post_conditions.verification_time_ms if result.post_conditions else 0.0
+                    } if result.post_conditions else None,
+                    "transition": {
+                        "expected_transition": result.transition.expected_transition if result.transition else False,
+                        "actual_transition": result.transition.actual_transition if result.transition else False,
+                        "success": result.transition.success if result.transition else True,
+                        "transition_confidence": result.transition.transition_confidence if result.transition else 0.0
+                    } if result.transition else None
+                }
+                for result in self.step_results
+            ],
+            "metrics": {
+                "node_matching_accuracy": self.node_matching_accuracy,
+                "target_resolution_accuracy": self.target_resolution_accuracy,
+                "post_condition_success_rate": self.post_condition_success_rate,
+                "transition_accuracy": self.transition_accuracy,
+                "overall_success_rate": self.overall_success_rate
+            },
+            "performance": {
+                "total_simulation_time_ms": self.total_simulation_time_ms,
+                "avg_step_time_ms": self.avg_step_time_ms
+            },
+            "analysis": {
+                "error_breakdown": self.error_breakdown,
+                "failure_points": self.failure_points,
+                "recommendations": self.recommendations
+            }
+        }
+    
+    def save_to_file(self, filepath: Path) -> None:
+        """Sauvegarder le rapport dans un fichier JSON"""
+        filepath.parent.mkdir(parents=True, exist_ok=True)
+        with open(filepath, 'w', encoding='utf-8') as f:
+            json.dump(self.to_dict(), f, indent=2, ensure_ascii=False)
+    
+    def generate_markdown_report(self) -> str:
+        """Générer un rapport Markdown lisible"""
+        md_lines = [
+            f"# Workflow Simulation Report",
+            f"",
+            f"**Scenario:** {self.scenario_id}",
+            f"**Workflow:** {self.workflow_id}",
+            f"**Date:** {self.timestamp.strftime('%Y-%m-%d %H:%M:%S')}",
+            f"",
+            f"## Summary",
+            f"",
+            f"- **Total Steps:** {self.total_steps}",
+            f"- **Successful Steps:** {self.successful_steps}",
+            f"- **Overall Success Rate:** {self.overall_success_rate:.1%}",
+            f"- **Total Simulation Time:** {self.total_simulation_time_ms:.0f}ms",
+            f"- **Average Step Time:** {self.avg_step_time_ms:.0f}ms",
+            f"",
+            f"## Component Accuracy",
+            f"",
+            f"| Component | Accuracy |",
+            f"|-----------|----------|",
+            f"| Node Matching | {self.node_matching_accuracy:.1%} |",
+            f"| Target Resolution | {self.target_resolution_accuracy:.1%} |",
+            f"| Post-conditions | {self.post_condition_success_rate:.1%} |",
+            f"| Transitions | {self.transition_accuracy:.1%} |",
+            f"",
+            f"## Error Breakdown",
+            f""
+        ]
+        
+        if self.error_breakdown:
+            for error_type, count in self.error_breakdown.items():
+                md_lines.append(f"- **{error_type}:** {count}")
+        else:
+            md_lines.append("- No errors detected")
+        
+        md_lines.extend([
+            f"",
+            f"## Failure Points",
+            f""
+        ])
+        
+        if self.failure_points:
+            for failure in self.failure_points:
+                md_lines.append(f"- {failure}")
+        else:
+            md_lines.append("- No critical failure points identified")
+        
+        md_lines.extend([
+            f"",
+            f"## Recommendations",
+            f""
+        ])
+        
+        if self.recommendations:
+            for rec in self.recommendations:
+                md_lines.append(f"- {rec}")
+        else:
+            md_lines.append("- No specific recommendations at this time")
+        
+        md_lines.extend([
+            f"",
+            f"## Detailed Step Results",
+            f"",
+            f"| Step | Node Match | Target Res | Post-Cond | Transition | Duration |",
+            f"|------|------------|------------|-----------|------------|----------|"
+        ])
+        
+        for result in self.step_results:
+            node_status = "✅" if result.node_matching.success else "❌"
+            target_status = "✅" if result.target_resolution and result.target_resolution.success else "N/A"
+            post_status = "✅" if result.post_conditions and result.post_conditions.success else "N/A"
+            trans_status = "✅" if result.transition and result.transition.success else "N/A"
+            
+            md_lines.append(
+                f"| {result.step_number} | {node_status} | {target_status} | {post_status} | {trans_status} | {result.step_duration_ms:.0f}ms |"
+            )
+        
+        return "\n".join(md_lines)
+
+
+class WorkflowSimulator:
+    """
+    Simulateur de workflow complet
+    
+    Teste la chaîne complète : Node Matching → Target Resolution → Post-conditions → Transition
+    """
+    
+    def __init__(
+        self,
+        node_matcher: Optional[NodeMatcher] = None,
+        target_resolver: Optional[TargetResolver] = None,
+        state_embedding_builder: Optional[StateEmbeddingBuilder] = None
+    ):
+        """
+        Initialiser le simulateur
+        
+        Args:
+            node_matcher: Matcher de nodes (créé par défaut si None)
+            target_resolver: Résolveur de cibles (créé par défaut si None)
+            state_embedding_builder: Builder d'embeddings (créé par défaut si None)
+        """
+        self.node_matcher = node_matcher or NodeMatcher()
+        self.target_resolver = target_resolver or TargetResolver()
+        self.state_embedding_builder = state_embedding_builder or StateEmbeddingBuilder()
+        
+        logger.info("WorkflowSimulator initialized")
+    
+    def simulate_workflow(
+        self,
+        scenario_pack: ScenarioPack,
+        workflow: Workflow,
+        output_dir: Optional[Path] = None
+    ) -> WorkflowSimulationReport:
+        """
+        Simuler un workflow complet avec un scenario pack
+        
+        Args:
+            scenario_pack: Pack de scénario avec frames séquentielles
+            workflow: Workflow à tester
+            output_dir: Répertoire de sortie pour les rapports (optionnel)
+        
+        Returns:
+            Rapport de simulation complet
+        """
+        start_time = time.time()
+        step_results = []
+        
+        logger.info(f"Starting workflow simulation: {scenario_pack.scenario_id}")
+        logger.info(f"Workflow: {workflow.workflow_id}, Steps: {len(scenario_pack.frames)}")
+        
+        # Simuler chaque étape
+        for i, frame in enumerate(scenario_pack.frames):
+            step_start = time.time()
+            
+            # 1. Node Matching
+            node_matching_result = self._simulate_node_matching(frame, workflow)
+            
+            # 2. Target Resolution (si node matché et action attendue)
+            target_resolution_result = None
+            if node_matching_result.success and frame.expected_action:
+                target_resolution_result = self._simulate_target_resolution(frame, workflow, node_matching_result.matched_node_id)
+            
+            # 3. Post-conditions (si action résolue)
+            post_condition_result = None
+            if target_resolution_result and target_resolution_result.success:
+                post_condition_result = self._simulate_post_conditions(frame, workflow, node_matching_result.matched_node_id)
+            
+            # 4. Transition (si pas dernière étape)
+            transition_result = None
+            if i < len(scenario_pack.frames) - 1:
+                next_frame = scenario_pack.frames[i + 1]
+                transition_result = self._simulate_transition(frame, next_frame, workflow)
+            
+            # Calculer succès global de l'étape
+            overall_success = (
+                node_matching_result.success and
+                (target_resolution_result is None or target_resolution_result.success) and
+                (post_condition_result is None or post_condition_result.success) and
+                (transition_result is None or transition_result.success)
+            )
+            
+            step_duration = (time.time() - step_start) * 1000
+            
+            step_result = WorkflowStepResult(
+                frame_id=frame.frame_id,
+                step_number=frame.step_number,
+                node_matching=node_matching_result,
+                target_resolution=target_resolution_result,
+                post_conditions=post_condition_result,
+                transition=transition_result,
+                overall_success=overall_success,
+                step_duration_ms=step_duration
+            )
+            
+            step_results.append(step_result)
+            
+            logger.debug(f"Step {frame.step_number}: {'✅' if overall_success else '❌'} ({step_duration:.0f}ms)")
+        
+        # Calculer métriques globales
+        total_time = (time.time() - start_time) * 1000
+        report = self._generate_report(scenario_pack, workflow, step_results, total_time)
+        
+        # Sauvegarder si répertoire spécifié
+        if output_dir:
+            self._save_reports(report, output_dir)
+        
+        logger.info(f"Simulation completed: {report.overall_success_rate:.1%} success rate")
+        return report
+    
+    def _simulate_node_matching(self, frame: ScenarioFrame, workflow: Workflow) -> NodeMatchingResult:
+        """Simuler le matching de node"""
+        try:
+            # Construire embedding pour le frame
+            state_embedding = self.state_embedding_builder.build(frame.screen_state)
+            
+            # Tenter de matcher avec les nodes du workflow
+            candidate_nodes = workflow.nodes
+            match_result = self.node_matcher.match(frame.screen_state, candidate_nodes)
+            
+            if match_result:
+                matched_node, confidence = match_result
+                success = True
+                matched_node_id = matched_node.node_id
+                strategy_used = "faiss_search"  # ou autre selon NodeMatcher
+                error_message = None
+            else:
+                success = False
+                matched_node_id = None
+                confidence = 0.0
+                strategy_used = "none"
+                error_message = "No matching node found"
+            
+            return NodeMatchingResult(
+                frame_id=frame.frame_id,
+                expected_node_id=frame.expected_node_id,
+                matched_node_id=matched_node_id,
+                confidence=confidence,
+                success=success,
+                strategy_used=strategy_used,
+                error_message=error_message
+            )
+            
+        except Exception as e:
+            logger.error(f"Node matching failed for frame {frame.frame_id}: {e}")
+            return NodeMatchingResult(
+                frame_id=frame.frame_id,
+                expected_node_id=frame.expected_node_id,
+                matched_node_id=None,
+                confidence=0.0,
+                success=False,
+                strategy_used="error",
+                error_message=str(e)
+            )
+    
+    def _simulate_target_resolution(
+        self,
+        frame: ScenarioFrame,
+        workflow: Workflow,
+        matched_node_id: str
+    ) -> TargetResolutionResult:
+        """Simuler la résolution de cible"""
+        try:
+            start_time = time.time()
+            
+            # Récupérer l'action attendue
+            expected_action = frame.expected_action
+            if not expected_action or "target" not in expected_action:
+                return TargetResolutionResult(
+                    frame_id=frame.frame_id,
+                    target_spec=None,
+                    resolved_element_id=None,
+                    expected_element_id=None,
+                    confidence=0.0,
+                    success=True,  # Pas d'action = succès
+                    strategy_used="no_action",
+                    resolution_time_ms=0.0
+                )
+            
+            # Construire TargetSpec depuis l'action attendue
+            target_spec = TargetSpec.from_dict(expected_action["target"])
+            
+            # Résoudre la cible
+            resolved_target = self.target_resolver.resolve_target(
+                target_spec,
+                frame.screen_state,
+                context={}
+            )
+            
+            resolution_time = (time.time() - start_time) * 1000
+            
+            if resolved_target:
+                return TargetResolutionResult(
+                    frame_id=frame.frame_id,
+                    target_spec=target_spec,
+                    resolved_element_id=resolved_target.element.element_id,
+                    expected_element_id=expected_action.get("expected_element_id"),
+                    confidence=resolved_target.confidence,
+                    success=True,
+                    strategy_used=resolved_target.strategy_used,
+                    resolution_time_ms=resolution_time
+                )
+            else:
+                return TargetResolutionResult(
+                    frame_id=frame.frame_id,
+                    target_spec=target_spec,
+                    resolved_element_id=None,
+                    expected_element_id=expected_action.get("expected_element_id"),
+                    confidence=0.0,
+                    success=False,
+                    strategy_used="failed",
+                    resolution_time_ms=resolution_time,
+                    error_message="Target resolution failed"
+                )
+                
+        except Exception as e:
+            logger.error(f"Target resolution failed for frame {frame.frame_id}: {e}")
+            return TargetResolutionResult(
+                frame_id=frame.frame_id,
+                target_spec=None,
+                resolved_element_id=None,
+                expected_element_id=None,
+                confidence=0.0,
+                success=False,
+                strategy_used="error",
+                resolution_time_ms=0.0,
+                error_message=str(e)
+            )
+    
+    def _simulate_post_conditions(
+        self,
+        frame: ScenarioFrame,
+        workflow: Workflow,
+        matched_node_id: str
+    ) -> PostConditionResult:
+        """Simuler la vérification des post-conditions"""
+        try:
+            start_time = time.time()
+            
+            # Trouver l'edge correspondant pour récupérer les post-conditions
+            outgoing_edges = workflow.get_outgoing_edges(matched_node_id)
+            if not outgoing_edges:
+                return PostConditionResult(
+                    frame_id=frame.frame_id,
+                    post_conditions=None,
+                    checks_passed=0,
+                    checks_total=0,
+                    success=True,  # Pas de post-conditions = succès
+                    timeout_occurred=False,
+                    verification_time_ms=0.0
+                )
+            
+            # Prendre le premier edge (simplification)
+            edge = outgoing_edges[0]
+            post_conditions = edge.post_conditions
+            
+            if not post_conditions or not post_conditions.success:
+                return PostConditionResult(
+                    frame_id=frame.frame_id,
+                    post_conditions=post_conditions,
+                    checks_passed=0,
+                    checks_total=0,
+                    success=True,
+                    timeout_occurred=False,
+                    verification_time_ms=0.0
+                )
+            
+            # Simuler vérification des post-conditions
+            checks_total = len(post_conditions.success)
+            checks_passed = 0
+            failed_checks = []
+            
+            for check in post_conditions.success:
+                if self._verify_post_condition_check(check, frame.screen_state):
+                    checks_passed += 1
+                else:
+                    failed_checks.append(f"{check.kind}: {check.value}")
+            
+            verification_time = (time.time() - start_time) * 1000
+            success = checks_passed == checks_total
+            
+            return PostConditionResult(
+                frame_id=frame.frame_id,
+                post_conditions=post_conditions,
+                checks_passed=checks_passed,
+                checks_total=checks_total,
+                success=success,
+                timeout_occurred=False,
+                verification_time_ms=verification_time,
+                failed_checks=failed_checks
+            )
+            
+        except Exception as e:
+            logger.error(f"Post-condition verification failed for frame {frame.frame_id}: {e}")
+            return PostConditionResult(
+                frame_id=frame.frame_id,
+                post_conditions=None,
+                checks_passed=0,
+                checks_total=0,
+                success=False,
+                timeout_occurred=False,
+                verification_time_ms=0.0,
+                error_message=str(e)
+            )
+    
+    def _verify_post_condition_check(self, check: PostConditionCheck, screen_state: ScreenState) -> bool:
+        """Vérifier une post-condition individuelle"""
+        try:
+            if check.kind == "text_present":
+                # Vérifier présence de texte
+                detected_texts = getattr(screen_state.perception_level, 'detected_texts', []) if hasattr(screen_state, 'perception_level') else []
+                return any(check.value in text for text in detected_texts)
+            
+            elif check.kind == "text_absent":
+                # Vérifier absence de texte
+                detected_texts = getattr(screen_state.perception_level, 'detected_texts', []) if hasattr(screen_state, 'perception_level') else []
+                return not any(check.value in text for text in detected_texts)
+            
+            elif check.kind == "element_present":
+                # Vérifier présence d'élément
+                if not check.target:
+                    return False
+                resolved_target = self.target_resolver.resolve_target(check.target, screen_state, context={})
+                return resolved_target is not None
+            
+            elif check.kind == "window_title_contains":
+                # Vérifier titre de fenêtre
+                window_title = getattr(screen_state.raw_level, 'window_title', '') if hasattr(screen_state, 'raw_level') else ''
+                return check.value in window_title
+            
+            else:
+                logger.warning(f"Unknown post-condition check kind: {check.kind}")
+                return False
+                
+        except Exception as e:
+            logger.error(f"Post-condition check failed: {e}")
+            return False
+    
+    def _simulate_transition(
+        self,
+        current_frame: ScenarioFrame,
+        next_frame: ScenarioFrame,
+        workflow: Workflow
+    ) -> TransitionResult:
+        """Simuler la transition vers le frame suivant"""
+        try:
+            # Vérifier si une transition est attendue
+            expected_transition = (
+                current_frame.expected_node_id != next_frame.expected_node_id and
+                current_frame.expected_node_id is not None and
+                next_frame.expected_node_id is not None
+            )
+            
+            # Simuler la transition (ici on assume qu'elle réussit si les nodes sont différents)
+            actual_transition = expected_transition
+            success = expected_transition == actual_transition
+            transition_confidence = 1.0 if success else 0.0
+            
+            return TransitionResult(
+                from_frame_id=current_frame.frame_id,
+                to_frame_id=next_frame.frame_id,
+                expected_transition=expected_transition,
+                actual_transition=actual_transition,
+                success=success,
+                transition_confidence=transition_confidence
+            )
+            
+        except Exception as e:
+            logger.error(f"Transition simulation failed: {e}")
+            return TransitionResult(
+                from_frame_id=current_frame.frame_id,
+                to_frame_id=next_frame.frame_id,
+                expected_transition=False,
+                actual_transition=False,
+                success=False,
+                transition_confidence=0.0,
+                error_message=str(e)
+            )
+    
+    def _generate_report(
+        self,
+        scenario_pack: ScenarioPack,
+        workflow: Workflow,
+        step_results: List[WorkflowStepResult],
+        total_time_ms: float
+    ) -> WorkflowSimulationReport:
+        """Générer le rapport final"""
+        total_steps = len(step_results)
+        successful_steps = sum(1 for result in step_results if result.overall_success)
+        
+        # Calculer métriques par composant
+        node_matching_successes = sum(1 for result in step_results if result.node_matching.success)
+        target_resolution_successes = sum(1 for result in step_results 
+                                        if result.target_resolution is None or result.target_resolution.success)
+        post_condition_successes = sum(1 for result in step_results 
+                                     if result.post_conditions is None or result.post_conditions.success)
+        transition_successes = sum(1 for result in step_results 
+                                 if result.transition is None or result.transition.success)
+        
+        node_matching_accuracy = node_matching_successes / max(1, total_steps)
+        target_resolution_accuracy = target_resolution_successes / max(1, total_steps)
+        post_condition_success_rate = post_condition_successes / max(1, total_steps)
+        transition_accuracy = transition_successes / max(1, total_steps)
+        
+        # Analyser les erreurs
+        error_breakdown = {}
+        failure_points = []
+        
+        for result in step_results:
+            if not result.overall_success:
+                failure_points.append(f"Step {result.step_number}: {result.frame_id}")
+                
+                if not result.node_matching.success:
+                    error_breakdown["node_matching_failures"] = error_breakdown.get("node_matching_failures", 0) + 1
+                if result.target_resolution and not result.target_resolution.success:
+                    error_breakdown["target_resolution_failures"] = error_breakdown.get("target_resolution_failures", 0) + 1
+                if result.post_conditions and not result.post_conditions.success:
+                    error_breakdown["post_condition_failures"] = error_breakdown.get("post_condition_failures", 0) + 1
+                if result.transition and not result.transition.success:
+                    error_breakdown["transition_failures"] = error_breakdown.get("transition_failures", 0) + 1
+        
+        # Générer recommandations
+        recommendations = []
+        if node_matching_accuracy < 0.9:
+            recommendations.append("Consider improving node matching accuracy by updating embedding prototypes")
+        if target_resolution_accuracy < 0.9:
+            recommendations.append("Review target resolution strategies and fallback mechanisms")
+        if post_condition_success_rate < 0.9:
+            recommendations.append("Verify post-condition definitions and timeout settings")
+        if transition_accuracy < 0.9:
+            recommendations.append("Check workflow edge definitions and transition logic")
+        
+        avg_step_time = total_time_ms / max(1, total_steps)
+        
+        return WorkflowSimulationReport(
+            scenario_id=scenario_pack.scenario_id,
+            workflow_id=workflow.workflow_id,
+            timestamp=datetime.now(),
+            total_steps=total_steps,
+            successful_steps=successful_steps,
+            step_results=step_results,
+            node_matching_accuracy=node_matching_accuracy,
+            target_resolution_accuracy=target_resolution_accuracy,
+            post_condition_success_rate=post_condition_success_rate,
+            transition_accuracy=transition_accuracy,
+            total_simulation_time_ms=total_time_ms,
+            avg_step_time_ms=avg_step_time,
+            error_breakdown=error_breakdown,
+            failure_points=failure_points,
+            recommendations=recommendations
+        )
+    
+    def _save_reports(self, report: WorkflowSimulationReport, output_dir: Path) -> None:
+        """Sauvegarder les rapports JSON et Markdown"""
+        output_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Rapport JSON
+        json_path = output_dir / f"workflow_simulation_{report.scenario_id}_{report.timestamp.strftime('%Y%m%d_%H%M%S')}.json"
+        report.save_to_file(json_path)
+        
+        # Rapport Markdown
+        md_path = output_dir / f"workflow_simulation_{report.scenario_id}_{report.timestamp.strftime('%Y%m%d_%H%M%S')}.md"
+        with open(md_path, 'w', encoding='utf-8') as f:
+            f.write(report.generate_markdown_report())
+        
+        logger.info(f"Reports saved to {output_dir}")
+
+
+# ============================================================================
+# Fonctions utilitaires
+# ============================================================================
+
+def load_scenario_pack(scenario_dir: Union[str, Path]) -> ScenarioPack:
+    """Charger un scenario pack depuis un répertoire"""
+    return ScenarioPack.load_from_directory(Path(scenario_dir))
+
+
+def simulate_workflow_from_files(
+    scenario_dir: Union[str, Path],
+    workflow_file: Union[str, Path],
+    output_dir: Optional[Union[str, Path]] = None
+) -> WorkflowSimulationReport:
+    """
+    Simuler un workflow depuis des fichiers
+    
+    Args:
+        scenario_dir: Répertoire du scenario pack
+        workflow_file: Fichier JSON du workflow
+        output_dir: Répertoire de sortie (optionnel)
+    
+    Returns:
+        Rapport de simulation
+    """
+    # Charger scenario pack
+    scenario_pack = load_scenario_pack(scenario_dir)
+    
+    # Charger workflow
+    workflow = Workflow.load_from_file(Path(workflow_file))
+    
+    # Créer simulateur
+    simulator = WorkflowSimulator()
+    
+    # Exécuter simulation
+    output_path = Path(output_dir) if output_dir else None
+    return simulator.simulate_workflow(scenario_pack, workflow, output_path)
+
+
+if __name__ == "__main__":
+    # Test basique
+    logging.basicConfig(level=logging.INFO)
+    
+    # Exemple d'utilisation
+    scenario_dir = Path("tests/scenarios/login_flow")
+    workflow_file = Path("data/workflows/login_workflow.json")
+    output_dir = Path("data/simulation_reports")
+    
+    if scenario_dir.exists() and workflow_file.exists():
+        report = simulate_workflow_from_files(scenario_dir, workflow_file, output_dir)
+        print(f"Simulation completed: {report.overall_success_rate:.1%} success rate")
+    else:
+        print("Example files not found - create test scenarios first")