feat(matching): match_current_state_from_state consomme enrichi (Lot E)
Nouvelle méthode match_current_state_from_state(screen_state, workflow_id)
qui utilise directement le ScreenState enrichi (window_title, detected_text,
ui_elements) fourni par ExecutionLoop au lieu de reconstruire un stub
ScreenState("Unknown", ui_elements=[], ...).
Préfère HierarchicalMatcher si workflow chargeable, fallback FAISS sinon.
L'ancienne API match_current_state(screenshot_path, workflow_id) est
convertie en wrapper : appelle ScreenAnalyzer.analyze() puis délègue.
Rétrocompat préservée.
ExecutionLoop._execute_step utilise la nouvelle méthode -> plus de double
analyze() dans le chemin d'exécution (économie latence).
Premier vrai matching context-aware. 11 nouveaux tests + 2 tests
integration loop. 172 tests non-régression verts.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
400
tests/unit/test_workflow_pipeline_match_from_state.py
Normal file
400
tests/unit/test_workflow_pipeline_match_from_state.py
Normal file
@@ -0,0 +1,400 @@
|
||||
"""
|
||||
Tests unitaires du matching context-aware — Lot E.
|
||||
|
||||
Vérifient que ``WorkflowPipeline.match_current_state_from_state`` :
|
||||
- Consomme réellement le ``ScreenState`` fourni (window_title,
|
||||
detected_text, ui_elements) au lieu de le reconstruire en stub.
|
||||
- Ne réinvoque PAS ``ScreenAnalyzer.analyze`` (le state est déjà prêt).
|
||||
- Préfère le matching hiérarchique si un workflow est chargeable.
|
||||
- Retombe sur FAISS quand le hiérarchique n'est pas applicable.
|
||||
|
||||
Vérifient aussi que l'ancienne API ``match_current_state(screenshot_path, ...)``
|
||||
continue à fonctionner comme un **wrapper** qui invoque bien le
|
||||
``ScreenAnalyzer`` puis délègue à ``match_current_state_from_state``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from PIL import Image
|
||||
|
||||
from core.models.screen_state import (
|
||||
ContextLevel,
|
||||
EmbeddingRef,
|
||||
PerceptionLevel,
|
||||
RawLevel,
|
||||
ScreenState,
|
||||
WindowContext,
|
||||
)
|
||||
from core.pipeline.workflow_pipeline import WorkflowPipeline
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_enriched_state(
|
||||
*,
|
||||
window_title: str = "Bloc-Notes - Sans titre",
|
||||
app_name: str = "notepad",
|
||||
detected_text=None,
|
||||
ui_elements=None,
|
||||
screenshot_path: str = "",
|
||||
) -> ScreenState:
|
||||
"""ScreenState enrichi utilisé pour simuler ce que ExecutionLoop fournit."""
|
||||
return ScreenState(
|
||||
screen_state_id="state_lot_e",
|
||||
timestamp=datetime.now(),
|
||||
session_id="sess_lot_e",
|
||||
window=WindowContext(
|
||||
app_name=app_name,
|
||||
window_title=window_title,
|
||||
screen_resolution=[1920, 1080],
|
||||
),
|
||||
raw=RawLevel(
|
||||
screenshot_path=screenshot_path,
|
||||
capture_method="test",
|
||||
file_size_bytes=0,
|
||||
),
|
||||
perception=PerceptionLevel(
|
||||
embedding=EmbeddingRef(provider="t", vector_id="v", dimensions=512),
|
||||
detected_text=detected_text if detected_text is not None else ["Fichier", "Édition"],
|
||||
text_detection_method="test",
|
||||
confidence_avg=0.9,
|
||||
),
|
||||
context=ContextLevel(),
|
||||
ui_elements=ui_elements if ui_elements is not None else [],
|
||||
)
|
||||
|
||||
|
||||
def _make_pipeline_with_mocks(tmp_path) -> WorkflowPipeline:
|
||||
"""Construit une WorkflowPipeline minimale avec composants mockés.
|
||||
|
||||
On évite d'instancier réellement CLIPEmbedder / UIDetector / VLM :
|
||||
on bypass ``__init__`` et on injecte directement les collaborateurs
|
||||
mockés. Plus rapide et plus déterministe.
|
||||
"""
|
||||
pipe = WorkflowPipeline.__new__(WorkflowPipeline)
|
||||
|
||||
# Répertoires
|
||||
pipe.data_dir = Path(tmp_path)
|
||||
pipe.workflows_dir = pipe.data_dir / "workflows"
|
||||
pipe.workflows_dir.mkdir(parents=True, exist_ok=True)
|
||||
pipe.embeddings_dir = pipe.data_dir / "embeddings"
|
||||
pipe.embeddings_dir.mkdir(parents=True, exist_ok=True)
|
||||
pipe.screenshots_dir = pipe.data_dir / "screenshots"
|
||||
pipe.screenshots_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Collaborateurs mockés
|
||||
fake_embedding = MagicMock()
|
||||
fake_embedding.get_vector.return_value = [0.0] * 8
|
||||
fake_embedding.embedding_id = "emb_test"
|
||||
|
||||
pipe.embedding_builder = MagicMock()
|
||||
pipe.embedding_builder.build.return_value = fake_embedding
|
||||
|
||||
pipe.faiss_manager = MagicMock()
|
||||
pipe.faiss_manager.search.return_value = []
|
||||
|
||||
pipe.hierarchical_matcher = MagicMock()
|
||||
|
||||
pipe.clip_embedder = MagicMock()
|
||||
pipe.fusion_engine = MagicMock()
|
||||
pipe.ui_detector = None
|
||||
pipe.vlm_client = None
|
||||
pipe.graph_builder = MagicMock()
|
||||
pipe.node_matcher = MagicMock()
|
||||
pipe.learning_manager = MagicMock()
|
||||
pipe.target_resolver = MagicMock()
|
||||
pipe.error_handler = MagicMock()
|
||||
pipe.action_executor = MagicMock()
|
||||
|
||||
pipe._workflows = {}
|
||||
pipe._temporal_context = {}
|
||||
|
||||
return pipe
|
||||
|
||||
|
||||
def _fake_hierarchical_result(
|
||||
node_id: str = "node_ok",
|
||||
confidence: float = 0.82,
|
||||
):
|
||||
"""Construit un MatchResult factice (compat avec l'API du HierarchicalMatcher)."""
|
||||
result = MagicMock()
|
||||
result.node_id = node_id
|
||||
result.confidence = confidence
|
||||
result.window_confidence = 0.9
|
||||
result.region_confidence = 0.8
|
||||
result.element_confidence = 0.85
|
||||
result.temporal_boost = 0.0
|
||||
result.matched_variant = None
|
||||
result.alternatives = []
|
||||
result.match_time_ms = 1.0
|
||||
return result
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# 1. match_current_state_from_state — chemin hiérarchique
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestMatchFromStateHierarchical:
|
||||
"""
|
||||
Quand un workflow est chargeable, on passe par le HierarchicalMatcher
|
||||
qui consomme window_title + ui_elements — c'est le cœur du Lot E.
|
||||
"""
|
||||
|
||||
def test_match_from_state_uses_provided_window_title(self, tmp_path):
|
||||
"""Le window_title fourni (Bloc-Notes) est transmis au matcher,
|
||||
pas un stub "Unknown"."""
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
pipe.load_workflow = MagicMock(return_value=MagicMock(nodes=[MagicMock()]))
|
||||
pipe.hierarchical_matcher.match.return_value = _fake_hierarchical_result()
|
||||
|
||||
state = _make_enriched_state(window_title="Bloc-Notes - Sans titre")
|
||||
result = pipe.match_current_state_from_state(state, workflow_id="wf1")
|
||||
|
||||
assert result is not None
|
||||
# Le matcher hiérarchique a été appelé avec le vrai window_title
|
||||
call_kwargs = pipe.hierarchical_matcher.match.call_args.kwargs
|
||||
window_info = call_kwargs["window_info"]
|
||||
assert window_info["title"] == "Bloc-Notes - Sans titre"
|
||||
assert window_info["window_title"] == "Bloc-Notes - Sans titre"
|
||||
# Pas de "Unknown"
|
||||
assert "Unknown" not in window_info["title"]
|
||||
|
||||
def test_match_from_state_uses_ui_elements(self, tmp_path):
|
||||
"""Les ui_elements du ScreenState sont transmis au matcher comme
|
||||
detected_elements, pas remplacés par []."""
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
pipe.load_workflow = MagicMock(return_value=MagicMock(nodes=[MagicMock()]))
|
||||
pipe.hierarchical_matcher.match.return_value = _fake_hierarchical_result()
|
||||
|
||||
# 3 éléments factices
|
||||
ui_elements = [MagicMock(), MagicMock(), MagicMock()]
|
||||
state = _make_enriched_state(ui_elements=ui_elements)
|
||||
pipe.match_current_state_from_state(state, workflow_id="wf1")
|
||||
|
||||
call_kwargs = pipe.hierarchical_matcher.match.call_args.kwargs
|
||||
passed_elements = call_kwargs["detected_elements"]
|
||||
assert len(passed_elements) == 3
|
||||
assert passed_elements == ui_elements
|
||||
|
||||
def test_match_from_state_uses_detected_text(self, tmp_path):
|
||||
"""Un ScreenState avec detected_text non vide doit être entièrement
|
||||
transmis (pas remplacé par un stub vide)."""
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
pipe.load_workflow = MagicMock(return_value=MagicMock(nodes=[MagicMock()]))
|
||||
pipe.hierarchical_matcher.match.return_value = _fake_hierarchical_result()
|
||||
|
||||
detected_text = ["Fichier", "Édition", "Affichage", "Aide"]
|
||||
state = _make_enriched_state(detected_text=detected_text)
|
||||
pipe.match_current_state_from_state(state, workflow_id="wf1")
|
||||
|
||||
# Le state lui-même n'est pas passé directement au matcher, mais il
|
||||
# ne doit pas avoir été réécrit en stub avant : on le vérifie
|
||||
# indirectement via ses propriétés conservées. Le state original
|
||||
# doit rester enrichi.
|
||||
assert state.perception.detected_text == detected_text
|
||||
assert state.perception.detected_text != []
|
||||
|
||||
def test_match_from_state_no_reconstruction(self, tmp_path):
|
||||
"""``ScreenAnalyzer.analyze`` ne doit PAS être appelé par
|
||||
``match_current_state_from_state`` — le state est déjà construit."""
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
pipe.load_workflow = MagicMock(return_value=MagicMock(nodes=[MagicMock()]))
|
||||
pipe.hierarchical_matcher.match.return_value = _fake_hierarchical_result()
|
||||
|
||||
state = _make_enriched_state()
|
||||
|
||||
# On patche get_screen_analyzer globalement : si la nouvelle méthode
|
||||
# invoque l'analyseur, le mock sera appelé. Attente : ZÉRO appel.
|
||||
with patch(
|
||||
"core.pipeline.get_screen_analyzer"
|
||||
) as mock_get_analyzer:
|
||||
fake_analyzer = MagicMock()
|
||||
mock_get_analyzer.return_value = fake_analyzer
|
||||
|
||||
pipe.match_current_state_from_state(state, workflow_id="wf1")
|
||||
|
||||
# get_screen_analyzer peut ou non être appelé (pas de garantie
|
||||
# forte), mais en tout cas analyze() ne doit PAS l'être.
|
||||
fake_analyzer.analyze.assert_not_called()
|
||||
|
||||
def test_match_from_state_below_threshold_returns_none(self, tmp_path):
|
||||
"""Si le hiérarchique rend une confidence < min_similarity, on
|
||||
retombe sur FAISS ; si FAISS ne trouve rien non plus, None."""
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
pipe.load_workflow = MagicMock(return_value=MagicMock(nodes=[MagicMock()]))
|
||||
pipe.hierarchical_matcher.match.return_value = _fake_hierarchical_result(
|
||||
confidence=0.1
|
||||
)
|
||||
pipe.faiss_manager.search.return_value = []
|
||||
|
||||
state = _make_enriched_state()
|
||||
result = pipe.match_current_state_from_state(
|
||||
state, workflow_id="wf1", min_similarity=0.5
|
||||
)
|
||||
assert result is None
|
||||
|
||||
def test_match_from_state_returns_hierarchical_metadata(self, tmp_path):
|
||||
"""Le résultat doit inclure les confidences par niveau (window,
|
||||
region, element) quand on passe par le hiérarchique."""
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
pipe.load_workflow = MagicMock(return_value=MagicMock(nodes=[MagicMock()]))
|
||||
pipe.hierarchical_matcher.match.return_value = _fake_hierarchical_result(
|
||||
node_id="node_42", confidence=0.77
|
||||
)
|
||||
|
||||
state = _make_enriched_state()
|
||||
result = pipe.match_current_state_from_state(
|
||||
state, workflow_id="wf1", min_similarity=0.5
|
||||
)
|
||||
assert result is not None
|
||||
assert result["node_id"] == "node_42"
|
||||
assert result["confidence"] == 0.77
|
||||
assert result["workflow_id"] == "wf1"
|
||||
assert result["match_type"] == "hierarchical"
|
||||
assert "window_confidence" in result
|
||||
assert "region_confidence" in result
|
||||
assert "element_confidence" in result
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# 2. match_current_state_from_state — fallback FAISS
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestMatchFromStateFAISSFallback:
|
||||
"""Si aucun workflow n'est chargeable, on tombe sur FAISS avec le state fourni."""
|
||||
|
||||
def test_fallback_faiss_when_no_workflow_id(self, tmp_path):
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
pipe.faiss_manager.search.return_value = [
|
||||
{
|
||||
"similarity": 0.91,
|
||||
"metadata": {"node_id": "n_faiss", "workflow_id": None},
|
||||
}
|
||||
]
|
||||
|
||||
state = _make_enriched_state()
|
||||
result = pipe.match_current_state_from_state(state, workflow_id=None)
|
||||
|
||||
# Pas de hiérarchique (pas de workflow_id)
|
||||
pipe.hierarchical_matcher.match.assert_not_called()
|
||||
# FAISS a reçu le vecteur calculé sur le state enrichi
|
||||
pipe.embedding_builder.build.assert_called_once_with(state)
|
||||
assert result is not None
|
||||
assert result["node_id"] == "n_faiss"
|
||||
assert result["match_type"] == "faiss"
|
||||
|
||||
def test_faiss_returns_none_below_threshold(self, tmp_path):
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
pipe.faiss_manager.search.return_value = [
|
||||
{
|
||||
"similarity": 0.6, # < 0.85
|
||||
"metadata": {"node_id": "n_low", "workflow_id": None},
|
||||
}
|
||||
]
|
||||
|
||||
state = _make_enriched_state()
|
||||
result = pipe.match_current_state_from_state(state, workflow_id=None)
|
||||
assert result is None
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# 3. Wrapper legacy match_current_state(screenshot_path, ...)
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestLegacyWrapper:
|
||||
"""
|
||||
L'ancienne API ``match_current_state(screenshot_path, ...)`` doit :
|
||||
1. Appeler ScreenAnalyzer.analyze pour enrichir le state.
|
||||
2. Déléguer à match_current_state_from_state.
|
||||
"""
|
||||
|
||||
def test_match_current_state_wrapper_calls_analyzer(self, tmp_path):
|
||||
"""Le wrapper legacy DOIT appeler ScreenAnalyzer.analyze."""
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
pipe.load_workflow = MagicMock(return_value=MagicMock(nodes=[MagicMock()]))
|
||||
pipe.hierarchical_matcher.match.return_value = _fake_hierarchical_result()
|
||||
|
||||
# Préparer un vrai fichier image pour le wrapper
|
||||
shot = tmp_path / "shot.png"
|
||||
Image.new("RGB", (64, 64), color=(100, 100, 100)).save(str(shot))
|
||||
|
||||
# Patcher l'analyseur partagé pour vérifier l'appel
|
||||
fake_analyzer = MagicMock()
|
||||
fake_analyzer.analyze.return_value = _make_enriched_state(
|
||||
window_title="Calc", screenshot_path=str(shot)
|
||||
)
|
||||
|
||||
with patch(
|
||||
"core.pipeline.get_screen_analyzer",
|
||||
return_value=fake_analyzer,
|
||||
):
|
||||
result = pipe.match_current_state(str(shot), workflow_id="wf1")
|
||||
|
||||
# L'analyseur a été invoqué
|
||||
fake_analyzer.analyze.assert_called_once()
|
||||
# Et on a un résultat (le hiérarchique a été appelé derrière)
|
||||
assert result is not None
|
||||
assert pipe.hierarchical_matcher.match.called
|
||||
|
||||
def test_match_current_state_wrapper_delegates_to_from_state(self, tmp_path):
|
||||
"""Le wrapper délègue bien à match_current_state_from_state."""
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
|
||||
shot = tmp_path / "shot.png"
|
||||
Image.new("RGB", (32, 32), color=(50, 50, 50)).save(str(shot))
|
||||
|
||||
fake_analyzer = MagicMock()
|
||||
fake_analyzer.analyze.return_value = _make_enriched_state(
|
||||
screenshot_path=str(shot)
|
||||
)
|
||||
|
||||
# Espionner la nouvelle méthode (elle existe, on wrap)
|
||||
with patch(
|
||||
"core.pipeline.get_screen_analyzer",
|
||||
return_value=fake_analyzer,
|
||||
), patch.object(
|
||||
pipe,
|
||||
"match_current_state_from_state",
|
||||
return_value={"node_id": "x", "workflow_id": "wf1", "confidence": 0.9},
|
||||
) as mock_from_state:
|
||||
result = pipe.match_current_state(str(shot), workflow_id="wf1")
|
||||
|
||||
mock_from_state.assert_called_once()
|
||||
# Le state passé (args ou kwargs) est bien celui renvoyé par l'analyseur
|
||||
call = mock_from_state.call_args
|
||||
passed_state = call.args[0] if call.args else call.kwargs["screen_state"]
|
||||
assert passed_state is fake_analyzer.analyze.return_value
|
||||
assert result == {"node_id": "x", "workflow_id": "wf1", "confidence": 0.9}
|
||||
|
||||
def test_wrapper_fallback_to_stub_when_analyzer_fails(self, tmp_path):
|
||||
"""Si l'analyseur est indisponible/plante, le wrapper retombe sur un
|
||||
stub minimal pour garder la rétrocompat."""
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
|
||||
shot = tmp_path / "shot.png"
|
||||
Image.new("RGB", (32, 32)).save(str(shot))
|
||||
|
||||
with patch(
|
||||
"core.pipeline.get_screen_analyzer",
|
||||
side_effect=RuntimeError("analyzer down"),
|
||||
), patch.object(
|
||||
pipe, "match_current_state_from_state", return_value=None
|
||||
) as mock_from_state:
|
||||
pipe.match_current_state(str(shot), workflow_id="wf1", window_title="Hint")
|
||||
|
||||
mock_from_state.assert_called_once()
|
||||
# Le state passé (args ou kwargs) est un stub (hint window_title respecté)
|
||||
call = mock_from_state.call_args
|
||||
passed_state = call.args[0] if call.args else call.kwargs["screen_state"]
|
||||
assert passed_state.window.window_title == "Hint"
|
||||
Reference in New Issue
Block a user