feat(matching): match_current_state_from_state consomme enrichi (Lot E)

Nouvelle méthode match_current_state_from_state(screen_state, workflow_id) qui utilise directement le ScreenState enrichi (window_title, detected_text, ui_elements) fourni par ExecutionLoop au lieu de reconstruire un stub ScreenState("Unknown", ui_elements=[], ...). Préfère HierarchicalMatcher si workflow chargeable, fallback FAISS sinon. L'ancienne API match_current_state(screenshot_path, workflow_id) est convertie en wrapper : appelle ScreenAnalyzer.analyze() puis délègue. Rétrocompat préservée. ExecutionLoop._execute_step utilise la nouvelle méthode -> plus de double analyze() dans le chemin d'exécution (économie latence). Premier vrai matching context-aware. 11 nouveaux tests + 2 tests integration loop. 172 tests non-régression verts. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-15 09:07:04 +02:00
parent c8a3618e27
commit 78ee962918
2 changed files with 922 additions and 87 deletions
--- a/tests/unit/test_workflow_pipeline_match_from_state.py
+++ b/tests/unit/test_workflow_pipeline_match_from_state.py
@@ -0,0 +1,400 @@
+"""
+Tests unitaires du matching context-aware — Lot E.
+
+Vérifient que ``WorkflowPipeline.match_current_state_from_state`` :
+  - Consomme réellement le ``ScreenState`` fourni (window_title,
+    detected_text, ui_elements) au lieu de le reconstruire en stub.
+  - Ne réinvoque PAS ``ScreenAnalyzer.analyze`` (le state est déjà prêt).
+  - Préfère le matching hiérarchique si un workflow est chargeable.
+  - Retombe sur FAISS quand le hiérarchique n'est pas applicable.
+
+Vérifient aussi que l'ancienne API ``match_current_state(screenshot_path, ...)``
+continue à fonctionner comme un **wrapper** qui invoque bien le
+``ScreenAnalyzer`` puis délègue à ``match_current_state_from_state``.
+"""
+
+from __future__ import annotations
+
+from datetime import datetime
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+from PIL import Image
+
+from core.models.screen_state import (
+    ContextLevel,
+    EmbeddingRef,
+    PerceptionLevel,
+    RawLevel,
+    ScreenState,
+    WindowContext,
+)
+from core.pipeline.workflow_pipeline import WorkflowPipeline
+
+
+# -----------------------------------------------------------------------------
+# Helpers
+# -----------------------------------------------------------------------------
+
+
+def _make_enriched_state(
+    *,
+    window_title: str = "Bloc-Notes - Sans titre",
+    app_name: str = "notepad",
+    detected_text=None,
+    ui_elements=None,
+    screenshot_path: str = "",
+) -> ScreenState:
+    """ScreenState enrichi utilisé pour simuler ce que ExecutionLoop fournit."""
+    return ScreenState(
+        screen_state_id="state_lot_e",
+        timestamp=datetime.now(),
+        session_id="sess_lot_e",
+        window=WindowContext(
+            app_name=app_name,
+            window_title=window_title,
+            screen_resolution=[1920, 1080],
+        ),
+        raw=RawLevel(
+            screenshot_path=screenshot_path,
+            capture_method="test",
+            file_size_bytes=0,
+        ),
+        perception=PerceptionLevel(
+            embedding=EmbeddingRef(provider="t", vector_id="v", dimensions=512),
+            detected_text=detected_text if detected_text is not None else ["Fichier", "Édition"],
+            text_detection_method="test",
+            confidence_avg=0.9,
+        ),
+        context=ContextLevel(),
+        ui_elements=ui_elements if ui_elements is not None else [],
+    )
+
+
+def _make_pipeline_with_mocks(tmp_path) -> WorkflowPipeline:
+    """Construit une WorkflowPipeline minimale avec composants mockés.
+
+    On évite d'instancier réellement CLIPEmbedder / UIDetector / VLM :
+    on bypass ``__init__`` et on injecte directement les collaborateurs
+    mockés. Plus rapide et plus déterministe.
+    """
+    pipe = WorkflowPipeline.__new__(WorkflowPipeline)
+
+    # Répertoires
+    pipe.data_dir = Path(tmp_path)
+    pipe.workflows_dir = pipe.data_dir / "workflows"
+    pipe.workflows_dir.mkdir(parents=True, exist_ok=True)
+    pipe.embeddings_dir = pipe.data_dir / "embeddings"
+    pipe.embeddings_dir.mkdir(parents=True, exist_ok=True)
+    pipe.screenshots_dir = pipe.data_dir / "screenshots"
+    pipe.screenshots_dir.mkdir(parents=True, exist_ok=True)
+
+    # Collaborateurs mockés
+    fake_embedding = MagicMock()
+    fake_embedding.get_vector.return_value = [0.0] * 8
+    fake_embedding.embedding_id = "emb_test"
+
+    pipe.embedding_builder = MagicMock()
+    pipe.embedding_builder.build.return_value = fake_embedding
+
+    pipe.faiss_manager = MagicMock()
+    pipe.faiss_manager.search.return_value = []
+
+    pipe.hierarchical_matcher = MagicMock()
+
+    pipe.clip_embedder = MagicMock()
+    pipe.fusion_engine = MagicMock()
+    pipe.ui_detector = None
+    pipe.vlm_client = None
+    pipe.graph_builder = MagicMock()
+    pipe.node_matcher = MagicMock()
+    pipe.learning_manager = MagicMock()
+    pipe.target_resolver = MagicMock()
+    pipe.error_handler = MagicMock()
+    pipe.action_executor = MagicMock()
+
+    pipe._workflows = {}
+    pipe._temporal_context = {}
+
+    return pipe
+
+
+def _fake_hierarchical_result(
+    node_id: str = "node_ok",
+    confidence: float = 0.82,
+):
+    """Construit un MatchResult factice (compat avec l'API du HierarchicalMatcher)."""
+    result = MagicMock()
+    result.node_id = node_id
+    result.confidence = confidence
+    result.window_confidence = 0.9
+    result.region_confidence = 0.8
+    result.element_confidence = 0.85
+    result.temporal_boost = 0.0
+    result.matched_variant = None
+    result.alternatives = []
+    result.match_time_ms = 1.0
+    return result
+
+
+# -----------------------------------------------------------------------------
+# 1. match_current_state_from_state — chemin hiérarchique
+# -----------------------------------------------------------------------------
+
+
+class TestMatchFromStateHierarchical:
+    """
+    Quand un workflow est chargeable, on passe par le HierarchicalMatcher
+    qui consomme window_title + ui_elements — c'est le cœur du Lot E.
+    """
+
+    def test_match_from_state_uses_provided_window_title(self, tmp_path):
+        """Le window_title fourni (Bloc-Notes) est transmis au matcher,
+        pas un stub "Unknown"."""
+        pipe = _make_pipeline_with_mocks(tmp_path)
+        pipe.load_workflow = MagicMock(return_value=MagicMock(nodes=[MagicMock()]))
+        pipe.hierarchical_matcher.match.return_value = _fake_hierarchical_result()
+
+        state = _make_enriched_state(window_title="Bloc-Notes - Sans titre")
+        result = pipe.match_current_state_from_state(state, workflow_id="wf1")
+
+        assert result is not None
+        # Le matcher hiérarchique a été appelé avec le vrai window_title
+        call_kwargs = pipe.hierarchical_matcher.match.call_args.kwargs
+        window_info = call_kwargs["window_info"]
+        assert window_info["title"] == "Bloc-Notes - Sans titre"
+        assert window_info["window_title"] == "Bloc-Notes - Sans titre"
+        # Pas de "Unknown"
+        assert "Unknown" not in window_info["title"]
+
+    def test_match_from_state_uses_ui_elements(self, tmp_path):
+        """Les ui_elements du ScreenState sont transmis au matcher comme
+        detected_elements, pas remplacés par []."""
+        pipe = _make_pipeline_with_mocks(tmp_path)
+        pipe.load_workflow = MagicMock(return_value=MagicMock(nodes=[MagicMock()]))
+        pipe.hierarchical_matcher.match.return_value = _fake_hierarchical_result()
+
+        # 3 éléments factices
+        ui_elements = [MagicMock(), MagicMock(), MagicMock()]
+        state = _make_enriched_state(ui_elements=ui_elements)
+        pipe.match_current_state_from_state(state, workflow_id="wf1")
+
+        call_kwargs = pipe.hierarchical_matcher.match.call_args.kwargs
+        passed_elements = call_kwargs["detected_elements"]
+        assert len(passed_elements) == 3
+        assert passed_elements == ui_elements
+
+    def test_match_from_state_uses_detected_text(self, tmp_path):
+        """Un ScreenState avec detected_text non vide doit être entièrement
+        transmis (pas remplacé par un stub vide)."""
+        pipe = _make_pipeline_with_mocks(tmp_path)
+        pipe.load_workflow = MagicMock(return_value=MagicMock(nodes=[MagicMock()]))
+        pipe.hierarchical_matcher.match.return_value = _fake_hierarchical_result()
+
+        detected_text = ["Fichier", "Édition", "Affichage", "Aide"]
+        state = _make_enriched_state(detected_text=detected_text)
+        pipe.match_current_state_from_state(state, workflow_id="wf1")
+
+        # Le state lui-même n'est pas passé directement au matcher, mais il
+        # ne doit pas avoir été réécrit en stub avant : on le vérifie
+        # indirectement via ses propriétés conservées. Le state original
+        # doit rester enrichi.
+        assert state.perception.detected_text == detected_text
+        assert state.perception.detected_text != []
+
+    def test_match_from_state_no_reconstruction(self, tmp_path):
+        """``ScreenAnalyzer.analyze`` ne doit PAS être appelé par
+        ``match_current_state_from_state`` — le state est déjà construit."""
+        pipe = _make_pipeline_with_mocks(tmp_path)
+        pipe.load_workflow = MagicMock(return_value=MagicMock(nodes=[MagicMock()]))
+        pipe.hierarchical_matcher.match.return_value = _fake_hierarchical_result()
+
+        state = _make_enriched_state()
+
+        # On patche get_screen_analyzer globalement : si la nouvelle méthode
+        # invoque l'analyseur, le mock sera appelé. Attente : ZÉRO appel.
+        with patch(
+            "core.pipeline.get_screen_analyzer"
+        ) as mock_get_analyzer:
+            fake_analyzer = MagicMock()
+            mock_get_analyzer.return_value = fake_analyzer
+
+            pipe.match_current_state_from_state(state, workflow_id="wf1")
+
+            # get_screen_analyzer peut ou non être appelé (pas de garantie
+            # forte), mais en tout cas analyze() ne doit PAS l'être.
+            fake_analyzer.analyze.assert_not_called()
+
+    def test_match_from_state_below_threshold_returns_none(self, tmp_path):
+        """Si le hiérarchique rend une confidence < min_similarity, on
+        retombe sur FAISS ; si FAISS ne trouve rien non plus, None."""
+        pipe = _make_pipeline_with_mocks(tmp_path)
+        pipe.load_workflow = MagicMock(return_value=MagicMock(nodes=[MagicMock()]))
+        pipe.hierarchical_matcher.match.return_value = _fake_hierarchical_result(
+            confidence=0.1
+        )
+        pipe.faiss_manager.search.return_value = []
+
+        state = _make_enriched_state()
+        result = pipe.match_current_state_from_state(
+            state, workflow_id="wf1", min_similarity=0.5
+        )
+        assert result is None
+
+    def test_match_from_state_returns_hierarchical_metadata(self, tmp_path):
+        """Le résultat doit inclure les confidences par niveau (window,
+        region, element) quand on passe par le hiérarchique."""
+        pipe = _make_pipeline_with_mocks(tmp_path)
+        pipe.load_workflow = MagicMock(return_value=MagicMock(nodes=[MagicMock()]))
+        pipe.hierarchical_matcher.match.return_value = _fake_hierarchical_result(
+            node_id="node_42", confidence=0.77
+        )
+
+        state = _make_enriched_state()
+        result = pipe.match_current_state_from_state(
+            state, workflow_id="wf1", min_similarity=0.5
+        )
+        assert result is not None
+        assert result["node_id"] == "node_42"
+        assert result["confidence"] == 0.77
+        assert result["workflow_id"] == "wf1"
+        assert result["match_type"] == "hierarchical"
+        assert "window_confidence" in result
+        assert "region_confidence" in result
+        assert "element_confidence" in result
+
+
+# -----------------------------------------------------------------------------
+# 2. match_current_state_from_state — fallback FAISS
+# -----------------------------------------------------------------------------
+
+
+class TestMatchFromStateFAISSFallback:
+    """Si aucun workflow n'est chargeable, on tombe sur FAISS avec le state fourni."""
+
+    def test_fallback_faiss_when_no_workflow_id(self, tmp_path):
+        pipe = _make_pipeline_with_mocks(tmp_path)
+        pipe.faiss_manager.search.return_value = [
+            {
+                "similarity": 0.91,
+                "metadata": {"node_id": "n_faiss", "workflow_id": None},
+            }
+        ]
+
+        state = _make_enriched_state()
+        result = pipe.match_current_state_from_state(state, workflow_id=None)
+
+        # Pas de hiérarchique (pas de workflow_id)
+        pipe.hierarchical_matcher.match.assert_not_called()
+        # FAISS a reçu le vecteur calculé sur le state enrichi
+        pipe.embedding_builder.build.assert_called_once_with(state)
+        assert result is not None
+        assert result["node_id"] == "n_faiss"
+        assert result["match_type"] == "faiss"
+
+    def test_faiss_returns_none_below_threshold(self, tmp_path):
+        pipe = _make_pipeline_with_mocks(tmp_path)
+        pipe.faiss_manager.search.return_value = [
+            {
+                "similarity": 0.6,  # < 0.85
+                "metadata": {"node_id": "n_low", "workflow_id": None},
+            }
+        ]
+
+        state = _make_enriched_state()
+        result = pipe.match_current_state_from_state(state, workflow_id=None)
+        assert result is None
+
+
+# -----------------------------------------------------------------------------
+# 3. Wrapper legacy match_current_state(screenshot_path, ...)
+# -----------------------------------------------------------------------------
+
+
+class TestLegacyWrapper:
+    """
+    L'ancienne API ``match_current_state(screenshot_path, ...)`` doit :
+      1. Appeler ScreenAnalyzer.analyze pour enrichir le state.
+      2. Déléguer à match_current_state_from_state.
+    """
+
+    def test_match_current_state_wrapper_calls_analyzer(self, tmp_path):
+        """Le wrapper legacy DOIT appeler ScreenAnalyzer.analyze."""
+        pipe = _make_pipeline_with_mocks(tmp_path)
+        pipe.load_workflow = MagicMock(return_value=MagicMock(nodes=[MagicMock()]))
+        pipe.hierarchical_matcher.match.return_value = _fake_hierarchical_result()
+
+        # Préparer un vrai fichier image pour le wrapper
+        shot = tmp_path / "shot.png"
+        Image.new("RGB", (64, 64), color=(100, 100, 100)).save(str(shot))
+
+        # Patcher l'analyseur partagé pour vérifier l'appel
+        fake_analyzer = MagicMock()
+        fake_analyzer.analyze.return_value = _make_enriched_state(
+            window_title="Calc", screenshot_path=str(shot)
+        )
+
+        with patch(
+            "core.pipeline.get_screen_analyzer",
+            return_value=fake_analyzer,
+        ):
+            result = pipe.match_current_state(str(shot), workflow_id="wf1")
+
+        # L'analyseur a été invoqué
+        fake_analyzer.analyze.assert_called_once()
+        # Et on a un résultat (le hiérarchique a été appelé derrière)
+        assert result is not None
+        assert pipe.hierarchical_matcher.match.called
+
+    def test_match_current_state_wrapper_delegates_to_from_state(self, tmp_path):
+        """Le wrapper délègue bien à match_current_state_from_state."""
+        pipe = _make_pipeline_with_mocks(tmp_path)
+
+        shot = tmp_path / "shot.png"
+        Image.new("RGB", (32, 32), color=(50, 50, 50)).save(str(shot))
+
+        fake_analyzer = MagicMock()
+        fake_analyzer.analyze.return_value = _make_enriched_state(
+            screenshot_path=str(shot)
+        )
+
+        # Espionner la nouvelle méthode (elle existe, on wrap)
+        with patch(
+            "core.pipeline.get_screen_analyzer",
+            return_value=fake_analyzer,
+        ), patch.object(
+            pipe,
+            "match_current_state_from_state",
+            return_value={"node_id": "x", "workflow_id": "wf1", "confidence": 0.9},
+        ) as mock_from_state:
+            result = pipe.match_current_state(str(shot), workflow_id="wf1")
+
+        mock_from_state.assert_called_once()
+        # Le state passé (args ou kwargs) est bien celui renvoyé par l'analyseur
+        call = mock_from_state.call_args
+        passed_state = call.args[0] if call.args else call.kwargs["screen_state"]
+        assert passed_state is fake_analyzer.analyze.return_value
+        assert result == {"node_id": "x", "workflow_id": "wf1", "confidence": 0.9}
+
+    def test_wrapper_fallback_to_stub_when_analyzer_fails(self, tmp_path):
+        """Si l'analyseur est indisponible/plante, le wrapper retombe sur un
+        stub minimal pour garder la rétrocompat."""
+        pipe = _make_pipeline_with_mocks(tmp_path)
+
+        shot = tmp_path / "shot.png"
+        Image.new("RGB", (32, 32)).save(str(shot))
+
+        with patch(
+            "core.pipeline.get_screen_analyzer",
+            side_effect=RuntimeError("analyzer down"),
+        ), patch.object(
+            pipe, "match_current_state_from_state", return_value=None
+        ) as mock_from_state:
+            pipe.match_current_state(str(shot), workflow_id="wf1", window_title="Hint")
+
+        mock_from_state.assert_called_once()
+        # Le state passé (args ou kwargs) est un stub (hint window_title respecté)
+        call = mock_from_state.call_args
+        passed_state = call.args[0] if call.args else call.kwargs["screen_state"]
+        assert passed_state.window.window_title == "Hint"