feat(matching): match_current_state_from_state consomme enrichi (Lot E)
Nouvelle méthode match_current_state_from_state(screen_state, workflow_id)
qui utilise directement le ScreenState enrichi (window_title, detected_text,
ui_elements) fourni par ExecutionLoop au lieu de reconstruire un stub
ScreenState("Unknown", ui_elements=[], ...).
Préfère HierarchicalMatcher si workflow chargeable, fallback FAISS sinon.
L'ancienne API match_current_state(screenshot_path, workflow_id) est
convertie en wrapper : appelle ScreenAnalyzer.analyze() puis délègue.
Rétrocompat préservée.
ExecutionLoop._execute_step utilise la nouvelle méthode -> plus de double
analyze() dans le chemin d'exécution (économie latence).
Premier vrai matching context-aware. 11 nouveaux tests + 2 tests
integration loop. 172 tests non-régression verts.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -151,6 +151,13 @@ class StepResult:
|
||||
duration_ms: float
|
||||
message: str
|
||||
screenshot_path: Optional[str] = None
|
||||
# C1 — Instrumentation vision-aware
|
||||
ocr_ms: float = 0.0 # Temps OCR du ScreenState de ce step
|
||||
ui_ms: float = 0.0 # Temps détection UI de ce step
|
||||
total_ms: float = 0.0 # Temps total (alias de duration_ms pour cohérence)
|
||||
analyze_ms: float = 0.0 # Temps total analyse ScreenState (OCR + UI + reste)
|
||||
cache_hit: bool = False # True si ScreenState vient du cache
|
||||
degraded: bool = False # True si mode dégradé activé (timeout analyse)
|
||||
|
||||
|
||||
class ExecutionLoop:
|
||||
@@ -175,7 +182,13 @@ class ExecutionLoop:
|
||||
capture_interval_ms: int = 500,
|
||||
max_no_match_retries: int = 5,
|
||||
confirmation_callback: Optional[Callable[[str, Dict], bool]] = None,
|
||||
coaching_callback: Optional[Callable[[str, Dict], "CoachingResponse"]] = None
|
||||
coaching_callback: Optional[Callable[[str, Dict], "CoachingResponse"]] = None,
|
||||
screen_analyzer: Optional[Any] = None,
|
||||
screen_state_cache: Optional[Any] = None,
|
||||
enable_ui_detection: bool = True,
|
||||
enable_ocr: bool = True,
|
||||
analyze_timeout_ms: int = 8000,
|
||||
window_info_provider: Optional[Callable[[], Optional[Dict[str, Any]]]] = None,
|
||||
):
|
||||
"""
|
||||
Initialiser la boucle d'exécution.
|
||||
@@ -188,6 +201,15 @@ class ExecutionLoop:
|
||||
max_no_match_retries: Nombre max de tentatives si pas de match
|
||||
confirmation_callback: Callback pour demander confirmation (SUPERVISED)
|
||||
coaching_callback: Callback pour décisions coaching (COACHING)
|
||||
screen_analyzer: ScreenAnalyzer pour construire un ScreenState enrichi
|
||||
(lazy init via singleton si None)
|
||||
screen_state_cache: Cache perceptuel (lazy init via singleton si None)
|
||||
enable_ui_detection: Active la détection UI (True par défaut, flag d'urgence)
|
||||
enable_ocr: Active l'OCR (True par défaut)
|
||||
analyze_timeout_ms: Timeout soft pour l'analyse d'un ScreenState.
|
||||
Au-delà, on active le mode dégradé pour les steps suivants.
|
||||
window_info_provider: Callable renvoyant un dict window_info. Si None,
|
||||
on tente `screen_capturer.get_active_window()`.
|
||||
"""
|
||||
self.pipeline = pipeline
|
||||
self.action_executor = action_executor or ActionExecutor()
|
||||
@@ -204,6 +226,27 @@ class ExecutionLoop:
|
||||
self.confirmation_callback = confirmation_callback
|
||||
self.coaching_callback = coaching_callback
|
||||
|
||||
# C1 — Vision-aware execution
|
||||
self._screen_analyzer = screen_analyzer # lazy init si None
|
||||
self._screen_state_cache = screen_state_cache # lazy init si None
|
||||
self.enable_ui_detection = enable_ui_detection
|
||||
self.enable_ocr = enable_ocr
|
||||
self.analyze_timeout_ms = analyze_timeout_ms
|
||||
self._window_info_provider = window_info_provider
|
||||
# Mode dégradé déclenché par un timeout analyse — persiste tant qu'un
|
||||
# probe n'a pas démontré la récupération (voir ci-dessous).
|
||||
self._degraded_mode = False
|
||||
# Auto-rétablissement : compteur de steps rapides consécutifs.
|
||||
# Si l'analyse tourne vite (< analyze_timeout_ms / 2) pendant
|
||||
# _fast_steps_recovery_threshold steps → on quitte le mode dégradé.
|
||||
self._successive_fast_steps = 0
|
||||
self._fast_steps_recovery_threshold = 3
|
||||
# En mode dégradé, on retente l'analyse tous les _probe_interval steps
|
||||
# pour détecter la récupération (les autres steps restent en stub pour
|
||||
# éviter de re-saturer le GPU). 10 par défaut = ~5s à 500ms/step.
|
||||
self._probe_interval = 10
|
||||
self._degraded_step_counter = 0
|
||||
|
||||
# État interne
|
||||
self.state = ExecutionState.IDLE
|
||||
self.context: Optional[ExecutionContext] = None
|
||||
@@ -464,15 +507,15 @@ class ExecutionLoop:
|
||||
})
|
||||
|
||||
# Notify Analytics about step completion
|
||||
# C1 — transmet tous les champs vision-aware (ocr_ms, ui_ms,
|
||||
# analyze_ms, cache_hit, degraded) au système analytics via
|
||||
# on_step_result qui accepte un StepResult complet.
|
||||
if self._analytics_integration and step_result:
|
||||
try:
|
||||
self._analytics_integration.on_step_complete(
|
||||
workflow_id=self.context.workflow_id,
|
||||
self._analytics_integration.on_step_result(
|
||||
execution_id=self.context.execution_id,
|
||||
step_id=step_result.node_id,
|
||||
success=step_result.success,
|
||||
duration_ms=step_result.duration_ms,
|
||||
confidence=step_result.match_confidence
|
||||
workflow_id=self.context.workflow_id,
|
||||
step_result=step_result,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Analytics step notification failed: {e}")
|
||||
@@ -505,10 +548,32 @@ class ExecutionLoop:
|
||||
self._notify_state_change(ExecutionState.STOPPED)
|
||||
|
||||
# Notify Analytics about execution completion
|
||||
# Contrat normalisé (Lot A) : duration_ms + status explicite
|
||||
# au lieu du booléen success + duration ambigu.
|
||||
if self._analytics_integration and self.context:
|
||||
try:
|
||||
success = self.state == ExecutionState.COMPLETED
|
||||
duration_ms = (datetime.now() - self.context.started_at).total_seconds() * 1000
|
||||
duration_ms = (
|
||||
datetime.now() - self.context.started_at
|
||||
).total_seconds() * 1000
|
||||
|
||||
# Mapping ExecutionState → status analytics
|
||||
if self.state == ExecutionState.COMPLETED:
|
||||
status = "completed"
|
||||
elif self.state == ExecutionState.FAILED:
|
||||
status = "failed"
|
||||
elif self.state == ExecutionState.STOPPED:
|
||||
status = "stopped"
|
||||
elif self.state == ExecutionState.PAUSED:
|
||||
# Pause non résolue à la sortie = blocage non récupéré
|
||||
status = "blocked"
|
||||
else:
|
||||
status = self.state.value
|
||||
|
||||
error_message = (
|
||||
None
|
||||
if status == "completed"
|
||||
else f"Execution ended in state: {self.state.value}"
|
||||
)
|
||||
|
||||
# Stop resource monitoring
|
||||
self._analytics_integration.stop_resource_monitoring(
|
||||
@@ -518,12 +583,12 @@ class ExecutionLoop:
|
||||
self._analytics_integration.on_execution_complete(
|
||||
workflow_id=self.context.workflow_id,
|
||||
execution_id=self.context.execution_id,
|
||||
success=success,
|
||||
duration_ms=duration_ms,
|
||||
steps_executed=self.context.steps_executed,
|
||||
steps_succeeded=self.context.steps_succeeded,
|
||||
status=status,
|
||||
steps_total=self.context.steps_executed,
|
||||
steps_completed=self.context.steps_succeeded,
|
||||
steps_failed=self.context.steps_failed,
|
||||
error_message=None if success else f"Execution ended in state: {self.state.value}"
|
||||
error_message=error_message,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Analytics completion notification failed: {e}")
|
||||
@@ -547,10 +612,23 @@ class ExecutionLoop:
|
||||
|
||||
self.context.last_screenshot_path = screenshot_path
|
||||
|
||||
# 1bis. Construire un ScreenState enrichi (C1) — avec cache perceptuel
|
||||
screen_state, timings = self._build_screen_state(screenshot_path)
|
||||
logger.debug(
|
||||
f"[Step] ScreenState analyze={timings['analyze_ms']:.0f}ms "
|
||||
f"ocr={timings['ocr_ms']:.0f}ms ui={timings['ui_ms']:.0f}ms "
|
||||
f"cache_hit={timings['cache_hit']} degraded={timings['degraded']}"
|
||||
)
|
||||
|
||||
# 2. Identifier l'état actuel (matching)
|
||||
match = self.pipeline.match_current_state(
|
||||
screenshot_path,
|
||||
workflow_id=self.context.workflow_id
|
||||
#
|
||||
# Lot E — on consomme le ScreenState enrichi déjà construit en 1bis
|
||||
# (avec ui_elements, detected_text, window_title réels) au lieu de
|
||||
# laisser le pipeline reconstruire un stub avec window_title="Unknown".
|
||||
# Premier vrai matching context-aware.
|
||||
match = self.pipeline.match_current_state_from_state(
|
||||
screen_state,
|
||||
workflow_id=self.context.workflow_id,
|
||||
)
|
||||
|
||||
if not match:
|
||||
@@ -564,25 +642,98 @@ class ExecutionLoop:
|
||||
|
||||
logger.info(f"Matched node: {current_node_id} (confidence: {confidence:.3f})")
|
||||
|
||||
# 3. Obtenir la prochaine action
|
||||
# 3. Obtenir la prochaine action (C3 : sélection d'edge robuste)
|
||||
#
|
||||
# Lot A — contrat dict avec status explicite :
|
||||
# "terminal" → fin légitime du workflow (success=True)
|
||||
# "blocked" → pause supervisée (plus JAMAIS traité comme un succès
|
||||
# pour ne pas déclencher un faux _is_workflow_complete)
|
||||
# "selected" → action à exécuter
|
||||
#
|
||||
# Lot B — on propage la confidence du match courant (source_similarity)
|
||||
# pour que l'EdgeScorer puisse vérifier la précondition
|
||||
# `min_source_similarity` de chaque edge. Sans cette propagation, la
|
||||
# contrainte était silencieusement désactivée (hardcodé à 1.0).
|
||||
next_action = self.pipeline.get_next_action(
|
||||
self.context.workflow_id,
|
||||
current_node_id
|
||||
current_node_id,
|
||||
screen_state=screen_state,
|
||||
source_similarity=confidence,
|
||||
)
|
||||
|
||||
if not next_action:
|
||||
# Pas d'action suivante = fin du workflow ou node terminal
|
||||
# Rétrocompat défensive : si un pipeline legacy renvoie None ou un dict
|
||||
# sans status, on considère ça comme un blocage (safe default).
|
||||
if not isinstance(next_action, dict) or "status" not in next_action:
|
||||
logger.error(
|
||||
"get_next_action a renvoyé un résultat sans status "
|
||||
f"(legacy?). Valeur reçue: {next_action!r}"
|
||||
)
|
||||
next_action = {"status": "blocked", "reason": "legacy_none_return"}
|
||||
|
||||
action_status = next_action.get("status")
|
||||
|
||||
if action_status == "terminal":
|
||||
# Fin légitime : aucun outgoing_edge sur le node courant
|
||||
total_ms = (time.time() - start_time) * 1000
|
||||
return StepResult(
|
||||
success=True,
|
||||
node_id=current_node_id,
|
||||
edge_id=None,
|
||||
action_result=None,
|
||||
match_confidence=confidence,
|
||||
duration_ms=(time.time() - start_time) * 1000,
|
||||
message="No next action (terminal node)",
|
||||
screenshot_path=screenshot_path
|
||||
duration_ms=total_ms,
|
||||
message="Workflow terminated (terminal node)",
|
||||
screenshot_path=screenshot_path,
|
||||
ocr_ms=timings["ocr_ms"],
|
||||
ui_ms=timings["ui_ms"],
|
||||
analyze_ms=timings["analyze_ms"],
|
||||
total_ms=total_ms,
|
||||
cache_hit=timings["cache_hit"],
|
||||
degraded=timings["degraded"],
|
||||
)
|
||||
|
||||
if action_status == "blocked":
|
||||
# Blocage : des edges existent mais aucun n'est valide.
|
||||
# On déclenche une pause supervisée (paused_need_help) et on
|
||||
# remonte l'erreur. On ne retourne PAS success=True.
|
||||
reason = next_action.get("reason", "unknown")
|
||||
logger.warning(
|
||||
f"ExecutionLoop bloqué sur {current_node_id}: {reason} "
|
||||
f"→ pause supervisée demandée"
|
||||
)
|
||||
# On bascule en PAUSED et on arme _pause_requested pour que la
|
||||
# boucle principale attende un resume() humain.
|
||||
self.state = ExecutionState.PAUSED
|
||||
self._pause_requested = True
|
||||
self._notify_state_change(ExecutionState.PAUSED)
|
||||
if self._on_error:
|
||||
try:
|
||||
self._on_error(
|
||||
"blocked",
|
||||
Exception(f"No valid edge from {current_node_id}: {reason}"),
|
||||
)
|
||||
except Exception as cb_err:
|
||||
logger.debug(f"on_error callback failed: {cb_err}")
|
||||
|
||||
total_ms = (time.time() - start_time) * 1000
|
||||
return StepResult(
|
||||
success=False,
|
||||
node_id=current_node_id,
|
||||
edge_id=None,
|
||||
action_result=None,
|
||||
match_confidence=confidence,
|
||||
duration_ms=total_ms,
|
||||
message=f"Blocked: {reason}",
|
||||
screenshot_path=screenshot_path,
|
||||
ocr_ms=timings["ocr_ms"],
|
||||
ui_ms=timings["ui_ms"],
|
||||
analyze_ms=timings["analyze_ms"],
|
||||
total_ms=total_ms,
|
||||
cache_hit=timings["cache_hit"],
|
||||
degraded=timings["degraded"],
|
||||
)
|
||||
|
||||
# À partir d'ici, on est forcément en status="selected"
|
||||
edge_id = next_action["edge_id"]
|
||||
self.context.current_edge_id = edge_id
|
||||
|
||||
@@ -604,7 +755,7 @@ class ExecutionLoop:
|
||||
if coaching_response.decision == CoachingDecision.ACCEPT:
|
||||
# Utilisateur accepte : exécuter l'action suggérée
|
||||
self._coaching_stats['accepted'] += 1
|
||||
action_result = self._execute_action(next_action)
|
||||
action_result = self._execute_action(next_action, screen_state=screen_state)
|
||||
self._record_coaching_feedback(
|
||||
next_action, coaching_response, action_result, success=True
|
||||
)
|
||||
@@ -615,15 +766,22 @@ class ExecutionLoop:
|
||||
self._record_coaching_feedback(
|
||||
next_action, coaching_response, None, success=False
|
||||
)
|
||||
total_ms = (time.time() - start_time) * 1000
|
||||
return StepResult(
|
||||
success=False,
|
||||
node_id=current_node_id,
|
||||
edge_id=edge_id,
|
||||
action_result=None,
|
||||
match_confidence=confidence,
|
||||
duration_ms=(time.time() - start_time) * 1000,
|
||||
duration_ms=total_ms,
|
||||
message="Action rejected by user in COACHING mode",
|
||||
screenshot_path=screenshot_path
|
||||
screenshot_path=screenshot_path,
|
||||
ocr_ms=timings["ocr_ms"],
|
||||
ui_ms=timings["ui_ms"],
|
||||
analyze_ms=timings["analyze_ms"],
|
||||
total_ms=total_ms,
|
||||
cache_hit=timings["cache_hit"],
|
||||
degraded=timings["degraded"],
|
||||
)
|
||||
|
||||
elif coaching_response.decision == CoachingDecision.CORRECT:
|
||||
@@ -632,7 +790,7 @@ class ExecutionLoop:
|
||||
corrected_action = self._apply_coaching_correction(
|
||||
next_action, coaching_response.correction
|
||||
)
|
||||
action_result = self._execute_action(corrected_action)
|
||||
action_result = self._execute_action(corrected_action, screen_state=screen_state)
|
||||
self._record_coaching_feedback(
|
||||
next_action, coaching_response, action_result,
|
||||
success=action_result.status == ExecutionStatus.SUCCESS if action_result else False
|
||||
@@ -658,23 +816,30 @@ class ExecutionLoop:
|
||||
# Mode supervisé : demander confirmation
|
||||
if not self._request_confirmation(next_action):
|
||||
logger.info("Action rejected by user")
|
||||
total_ms = (time.time() - start_time) * 1000
|
||||
return StepResult(
|
||||
success=False,
|
||||
node_id=current_node_id,
|
||||
edge_id=edge_id,
|
||||
action_result=None,
|
||||
match_confidence=confidence,
|
||||
duration_ms=(time.time() - start_time) * 1000,
|
||||
duration_ms=total_ms,
|
||||
message="Action rejected by user",
|
||||
screenshot_path=screenshot_path
|
||||
screenshot_path=screenshot_path,
|
||||
ocr_ms=timings["ocr_ms"],
|
||||
ui_ms=timings["ui_ms"],
|
||||
analyze_ms=timings["analyze_ms"],
|
||||
total_ms=total_ms,
|
||||
cache_hit=timings["cache_hit"],
|
||||
degraded=timings["degraded"],
|
||||
)
|
||||
|
||||
# Exécuter l'action
|
||||
action_result = self._execute_action(next_action)
|
||||
action_result = self._execute_action(next_action, screen_state=screen_state)
|
||||
|
||||
elif self.context.mode == ExecutionMode.AUTOMATIC:
|
||||
# Mode automatique : exécuter directement
|
||||
action_result = self._execute_action(next_action)
|
||||
action_result = self._execute_action(next_action, screen_state=screen_state)
|
||||
|
||||
# 5. Mettre à jour les compteurs
|
||||
self.context.steps_executed += 1
|
||||
@@ -693,7 +858,13 @@ class ExecutionLoop:
|
||||
match_confidence=confidence,
|
||||
duration_ms=duration_ms,
|
||||
message=action_result.message if action_result else "Observed",
|
||||
screenshot_path=screenshot_path
|
||||
screenshot_path=screenshot_path,
|
||||
ocr_ms=timings["ocr_ms"],
|
||||
ui_ms=timings["ui_ms"],
|
||||
analyze_ms=timings["analyze_ms"],
|
||||
total_ms=duration_ms,
|
||||
cache_hit=timings["cache_hit"],
|
||||
degraded=timings["degraded"],
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
@@ -718,8 +889,18 @@ class ExecutionLoop:
|
||||
logger.error(f"Screen capture failed: {e}")
|
||||
return None
|
||||
|
||||
def _execute_action(self, action_info: Dict[str, Any]) -> ExecutionResult:
|
||||
"""Exécuter une action via l'ActionExecutor."""
|
||||
def _execute_action(
|
||||
self,
|
||||
action_info: Dict[str, Any],
|
||||
screen_state: Optional[Any] = None,
|
||||
) -> ExecutionResult:
|
||||
"""
|
||||
Exécuter une action via l'ActionExecutor.
|
||||
|
||||
Args:
|
||||
action_info: dict action {edge_id, action, target_node, ...}
|
||||
screen_state: ScreenState enrichi (si None, fallback stub minimal)
|
||||
"""
|
||||
try:
|
||||
# Charger le workflow et l'edge
|
||||
workflow = self.pipeline.load_workflow(self.context.workflow_id)
|
||||
@@ -732,36 +913,10 @@ class ExecutionLoop:
|
||||
duration_ms=0
|
||||
)
|
||||
|
||||
# Créer un ScreenState minimal pour l'exécution
|
||||
from core.models.screen_state import (
|
||||
ScreenState, WindowContext, RawLevel, PerceptionLevel,
|
||||
ContextLevel, EmbeddingRef
|
||||
)
|
||||
|
||||
screen_state = ScreenState(
|
||||
screen_state_id=f"exec_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
|
||||
timestamp=datetime.now(),
|
||||
session_id=self.context.execution_id,
|
||||
window=WindowContext(
|
||||
app_name="unknown",
|
||||
window_title="Unknown",
|
||||
screen_resolution=[1920, 1080],
|
||||
workspace="main"
|
||||
),
|
||||
raw=RawLevel(
|
||||
screenshot_path=self.context.last_screenshot_path or "",
|
||||
capture_method="execution",
|
||||
file_size_bytes=0
|
||||
),
|
||||
perception=PerceptionLevel(
|
||||
embedding=EmbeddingRef(provider="", vector_id="", dimensions=512),
|
||||
detected_text=[],
|
||||
text_detection_method="none",
|
||||
confidence_avg=0.0
|
||||
),
|
||||
context=ContextLevel(),
|
||||
ui_elements=[]
|
||||
)
|
||||
# Utiliser le ScreenState enrichi fourni par le loop ; fallback minimal
|
||||
# uniquement si on n'en a pas (legacy, tests).
|
||||
if screen_state is None:
|
||||
screen_state = self._build_stub_screen_state()
|
||||
|
||||
# Exécuter l'action
|
||||
result = self.action_executor.execute_edge(
|
||||
@@ -782,6 +937,286 @@ class ExecutionLoop:
|
||||
error=e
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
# C1 — Construction du ScreenState (vision-aware)
|
||||
# =========================================================================
|
||||
|
||||
def _get_screen_analyzer(self):
|
||||
"""
|
||||
Récupérer le ScreenAnalyzer (singleton partagé, lazy).
|
||||
|
||||
Retourne None si indisponible (import error, etc.) — le loop
|
||||
bascule alors en fallback stub.
|
||||
|
||||
Note Lot C : on ne passe plus `session_id` au singleton. Le session_id
|
||||
est désormais un paramètre d'appel de `analyze()`, pour éviter que deux
|
||||
ExecutionLoop partageant le même analyzer se marchent dessus.
|
||||
"""
|
||||
if self._screen_analyzer is not None:
|
||||
return self._screen_analyzer
|
||||
try:
|
||||
from core.pipeline import get_screen_analyzer
|
||||
self._screen_analyzer = get_screen_analyzer()
|
||||
return self._screen_analyzer
|
||||
except Exception as e:
|
||||
logger.warning(f"ScreenAnalyzer indisponible: {e}")
|
||||
return None
|
||||
|
||||
def _get_screen_state_cache(self):
|
||||
"""Récupérer le cache de ScreenState (singleton partagé, lazy)."""
|
||||
if self._screen_state_cache is not None:
|
||||
return self._screen_state_cache
|
||||
try:
|
||||
from core.pipeline import get_screen_state_cache
|
||||
self._screen_state_cache = get_screen_state_cache()
|
||||
return self._screen_state_cache
|
||||
except Exception as e:
|
||||
logger.warning(f"ScreenStateCache indisponible: {e}")
|
||||
return None
|
||||
|
||||
def _resolve_window_info(self) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Récupérer les infos de la fenêtre active.
|
||||
|
||||
Ordre de préférence :
|
||||
1. `window_info_provider` fourni au constructeur
|
||||
2. `screen_capturer.get_active_window()`
|
||||
3. None → ScreenAnalyzer utilisera les valeurs par défaut
|
||||
"""
|
||||
if self._window_info_provider is not None:
|
||||
try:
|
||||
return self._window_info_provider()
|
||||
except Exception as e:
|
||||
logger.debug(f"window_info_provider failed: {e}")
|
||||
|
||||
try:
|
||||
raw = self.screen_capturer.get_active_window()
|
||||
if raw:
|
||||
# Normaliser vers le format attendu par ScreenAnalyzer
|
||||
return {
|
||||
"title": raw.get("title", "Unknown"),
|
||||
"app_name": raw.get("app", "unknown"),
|
||||
"window_bounds": [
|
||||
raw.get("x", 0),
|
||||
raw.get("y", 0),
|
||||
raw.get("width", 0),
|
||||
raw.get("height", 0),
|
||||
],
|
||||
}
|
||||
except Exception as e:
|
||||
logger.debug(f"get_active_window failed: {e}")
|
||||
return None
|
||||
|
||||
def _build_screen_state(
|
||||
self,
|
||||
screenshot_path: str,
|
||||
) -> tuple:
|
||||
"""
|
||||
Construire un ScreenState enrichi depuis un screenshot.
|
||||
|
||||
Logique :
|
||||
- Si enable_ui_detection=False ET enable_ocr=False → stub
|
||||
- Si analyseur indisponible → stub
|
||||
- Sinon : cache.get_or_compute(analyzer.analyze)
|
||||
- Timeout soft : si l'analyse dépasse `analyze_timeout_ms`, on log
|
||||
un warning et on active le mode dégradé pour les prochains steps.
|
||||
|
||||
Returns:
|
||||
(screen_state, timings_dict)
|
||||
timings_dict: {
|
||||
"analyze_ms", "ocr_ms", "ui_ms", "cache_hit", "degraded"
|
||||
}
|
||||
"""
|
||||
timings = {
|
||||
"analyze_ms": 0.0,
|
||||
"ocr_ms": 0.0,
|
||||
"ui_ms": 0.0,
|
||||
"cache_hit": False,
|
||||
"degraded": False,
|
||||
}
|
||||
|
||||
# Mode "tout désactivé" (flag d'urgence) → stub
|
||||
if not self.enable_ui_detection and not self.enable_ocr:
|
||||
timings["degraded"] = True
|
||||
return self._build_stub_screen_state(screenshot_path), timings
|
||||
|
||||
analyzer = self._get_screen_analyzer()
|
||||
if analyzer is None:
|
||||
timings["degraded"] = True
|
||||
return self._build_stub_screen_state(screenshot_path), timings
|
||||
|
||||
# Mode dégradé : on reste sur stub, sauf "probe" périodique qui teste
|
||||
# si le GPU est redevenu performant. Si oui, on accumule les steps
|
||||
# rapides ; après _fast_steps_recovery_threshold probes rapides
|
||||
# consécutifs on retourne en mode complet.
|
||||
if self._degraded_mode:
|
||||
self._degraded_step_counter += 1
|
||||
if self._degraded_step_counter < self._probe_interval:
|
||||
timings["degraded"] = True
|
||||
return self._build_stub_screen_state(screenshot_path), timings
|
||||
# Sinon on tente un probe réel ci-dessous
|
||||
self._degraded_step_counter = 0
|
||||
|
||||
cache = self._get_screen_state_cache()
|
||||
|
||||
# Invalidation proactive : si l'écran a massivement changé depuis
|
||||
# la dernière entrée du cache, on purge. Le TTL seul (2s) laisserait
|
||||
# passer des entrées obsolètes sur des changements rapides (popup, nav).
|
||||
if cache is not None:
|
||||
try:
|
||||
cache.invalidate_if_changed(screenshot_path, threshold=0.3)
|
||||
except Exception as e:
|
||||
logger.debug(f"invalidate_if_changed a échoué: {e}")
|
||||
|
||||
window_info = self._resolve_window_info()
|
||||
|
||||
# Fonction de calcul (cache miss)
|
||||
# Les flags runtime (enable_ocr, enable_ui_detection) et le session_id
|
||||
# sont passés en kwargs-only à analyze() : AUCUNE mutation de l'analyseur
|
||||
# singleton (Lot C — thread-safety, deux ExecutionLoop peuvent partager
|
||||
# le même analyzer sans se contaminer).
|
||||
execution_id = self.context.execution_id if self.context else ""
|
||||
|
||||
def compute(path: str):
|
||||
t_start = time.time()
|
||||
state = analyzer.analyze(
|
||||
path,
|
||||
window_info=window_info,
|
||||
enable_ocr=self.enable_ocr,
|
||||
enable_ui_detection=self.enable_ui_detection,
|
||||
session_id=execution_id,
|
||||
)
|
||||
elapsed = (time.time() - t_start) * 1000
|
||||
# Annoter le temps dans les métadonnées
|
||||
if hasattr(state, "metadata"):
|
||||
state.metadata["analyze_ms"] = elapsed
|
||||
return state
|
||||
|
||||
t0 = time.time()
|
||||
try:
|
||||
if cache is not None:
|
||||
# Lot D — clé composite context-aware : deux contextes
|
||||
# différents partageant le même screenshot n'entrent plus
|
||||
# en collision. Le workflow_id isole les replays par workflow,
|
||||
# les flags différencient les modes d'analyse (OCR on/off,
|
||||
# UI on/off), et le (window_title, app_name) distingue deux
|
||||
# applications qui présenteraient un rendu visuel similaire.
|
||||
ctx_window_title = (window_info or {}).get("title", "") or ""
|
||||
ctx_app_name = (window_info or {}).get("app_name", "") or ""
|
||||
ctx_workflow_id = (
|
||||
self.context.workflow_id if self.context else ""
|
||||
)
|
||||
state, cache_hit, _ = cache.get_or_compute(
|
||||
screenshot_path,
|
||||
compute,
|
||||
window_title=ctx_window_title,
|
||||
app_name=ctx_app_name,
|
||||
enable_ocr=self.enable_ocr,
|
||||
enable_ui_detection=self.enable_ui_detection,
|
||||
workflow_id=ctx_workflow_id,
|
||||
)
|
||||
else:
|
||||
state = compute(screenshot_path)
|
||||
cache_hit = False
|
||||
except Exception as e:
|
||||
logger.warning(f"ScreenState build failed: {e} — fallback stub")
|
||||
timings["degraded"] = True
|
||||
return self._build_stub_screen_state(screenshot_path), timings
|
||||
|
||||
analyze_ms = (time.time() - t0) * 1000
|
||||
timings["analyze_ms"] = analyze_ms
|
||||
timings["cache_hit"] = cache_hit
|
||||
|
||||
# Décomposer OCR vs UI si possible (métadonnées)
|
||||
meta = getattr(state, "metadata", {}) or {}
|
||||
timings["ocr_ms"] = float(meta.get("ocr_ms", 0.0))
|
||||
timings["ui_ms"] = float(meta.get("ui_ms", 0.0))
|
||||
|
||||
# Timeout soft : activer le mode dégradé si > seuil
|
||||
# (cache_hit ignoré : un hit ne prouve rien sur la santé du GPU)
|
||||
if analyze_ms > self.analyze_timeout_ms and not cache_hit:
|
||||
logger.warning(
|
||||
f"ScreenState analysis slow: {analyze_ms:.0f}ms > "
|
||||
f"{self.analyze_timeout_ms}ms → activation mode dégradé"
|
||||
)
|
||||
self._degraded_mode = True
|
||||
self._successive_fast_steps = 0
|
||||
timings["degraded"] = True
|
||||
else:
|
||||
# Step "rapide" : incrémenter le compteur si < timeout / 2.
|
||||
# On ignore les cache hits (pas représentatifs de la perf GPU).
|
||||
fast_threshold_ms = self.analyze_timeout_ms / 2
|
||||
if not cache_hit and analyze_ms < fast_threshold_ms:
|
||||
self._successive_fast_steps += 1
|
||||
|
||||
# Auto-rétablissement : si on était en dégradé et qu'on a
|
||||
# enchaîné assez de steps rapides → retour en mode complet.
|
||||
if (
|
||||
self._degraded_mode
|
||||
and self._successive_fast_steps
|
||||
>= self._fast_steps_recovery_threshold
|
||||
):
|
||||
logger.info(
|
||||
"Mode complet restauré après %d steps rapides "
|
||||
"(dernier analyze_ms=%.0fms < seuil=%.0fms)",
|
||||
self._successive_fast_steps,
|
||||
analyze_ms,
|
||||
fast_threshold_ms,
|
||||
)
|
||||
self._degraded_mode = False
|
||||
self._successive_fast_steps = 0
|
||||
elif not cache_hit:
|
||||
# Step ni lent ni rapide (entre timeout/2 et timeout) : reset
|
||||
self._successive_fast_steps = 0
|
||||
|
||||
# On propage l'état dégradé courant dans les timings (utile pour le
|
||||
# StepResult : tant qu'on n'a pas récupéré assez de steps rapides,
|
||||
# on continue à signaler "degraded=True").
|
||||
timings["degraded"] = self._degraded_mode
|
||||
|
||||
return state, timings
|
||||
|
||||
def _build_stub_screen_state(self, screenshot_path: Optional[str] = None):
|
||||
"""
|
||||
Construire un ScreenState minimal (fallback legacy).
|
||||
|
||||
Utilisé quand l'analyseur est indisponible ou que tous les flags
|
||||
de détection sont désactivés (flag d'urgence).
|
||||
"""
|
||||
from core.models.screen_state import (
|
||||
ScreenState, WindowContext, RawLevel, PerceptionLevel,
|
||||
ContextLevel, EmbeddingRef
|
||||
)
|
||||
|
||||
path = screenshot_path or (
|
||||
self.context.last_screenshot_path if self.context else ""
|
||||
) or ""
|
||||
|
||||
return ScreenState(
|
||||
screen_state_id=f"exec_{datetime.now().strftime('%Y%m%d_%H%M%S_%f')}",
|
||||
timestamp=datetime.now(),
|
||||
session_id=self.context.execution_id if self.context else "stub",
|
||||
window=WindowContext(
|
||||
app_name="unknown",
|
||||
window_title="Unknown",
|
||||
screen_resolution=[1920, 1080],
|
||||
workspace="main",
|
||||
),
|
||||
raw=RawLevel(
|
||||
screenshot_path=path,
|
||||
capture_method="execution",
|
||||
file_size_bytes=0,
|
||||
),
|
||||
perception=PerceptionLevel(
|
||||
embedding=EmbeddingRef(provider="", vector_id="", dimensions=512),
|
||||
detected_text=[],
|
||||
text_detection_method="none",
|
||||
confidence_avg=0.0,
|
||||
),
|
||||
context=ContextLevel(),
|
||||
ui_elements=[],
|
||||
)
|
||||
|
||||
def _request_confirmation(self, action_info: Dict[str, Any]) -> bool:
|
||||
"""Demander confirmation à l'utilisateur."""
|
||||
if self.confirmation_callback:
|
||||
|
||||
400
tests/unit/test_workflow_pipeline_match_from_state.py
Normal file
400
tests/unit/test_workflow_pipeline_match_from_state.py
Normal file
@@ -0,0 +1,400 @@
|
||||
"""
|
||||
Tests unitaires du matching context-aware — Lot E.
|
||||
|
||||
Vérifient que ``WorkflowPipeline.match_current_state_from_state`` :
|
||||
- Consomme réellement le ``ScreenState`` fourni (window_title,
|
||||
detected_text, ui_elements) au lieu de le reconstruire en stub.
|
||||
- Ne réinvoque PAS ``ScreenAnalyzer.analyze`` (le state est déjà prêt).
|
||||
- Préfère le matching hiérarchique si un workflow est chargeable.
|
||||
- Retombe sur FAISS quand le hiérarchique n'est pas applicable.
|
||||
|
||||
Vérifient aussi que l'ancienne API ``match_current_state(screenshot_path, ...)``
|
||||
continue à fonctionner comme un **wrapper** qui invoque bien le
|
||||
``ScreenAnalyzer`` puis délègue à ``match_current_state_from_state``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from PIL import Image
|
||||
|
||||
from core.models.screen_state import (
|
||||
ContextLevel,
|
||||
EmbeddingRef,
|
||||
PerceptionLevel,
|
||||
RawLevel,
|
||||
ScreenState,
|
||||
WindowContext,
|
||||
)
|
||||
from core.pipeline.workflow_pipeline import WorkflowPipeline
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _make_enriched_state(
|
||||
*,
|
||||
window_title: str = "Bloc-Notes - Sans titre",
|
||||
app_name: str = "notepad",
|
||||
detected_text=None,
|
||||
ui_elements=None,
|
||||
screenshot_path: str = "",
|
||||
) -> ScreenState:
|
||||
"""ScreenState enrichi utilisé pour simuler ce que ExecutionLoop fournit."""
|
||||
return ScreenState(
|
||||
screen_state_id="state_lot_e",
|
||||
timestamp=datetime.now(),
|
||||
session_id="sess_lot_e",
|
||||
window=WindowContext(
|
||||
app_name=app_name,
|
||||
window_title=window_title,
|
||||
screen_resolution=[1920, 1080],
|
||||
),
|
||||
raw=RawLevel(
|
||||
screenshot_path=screenshot_path,
|
||||
capture_method="test",
|
||||
file_size_bytes=0,
|
||||
),
|
||||
perception=PerceptionLevel(
|
||||
embedding=EmbeddingRef(provider="t", vector_id="v", dimensions=512),
|
||||
detected_text=detected_text if detected_text is not None else ["Fichier", "Édition"],
|
||||
text_detection_method="test",
|
||||
confidence_avg=0.9,
|
||||
),
|
||||
context=ContextLevel(),
|
||||
ui_elements=ui_elements if ui_elements is not None else [],
|
||||
)
|
||||
|
||||
|
||||
def _make_pipeline_with_mocks(tmp_path) -> WorkflowPipeline:
|
||||
"""Construit une WorkflowPipeline minimale avec composants mockés.
|
||||
|
||||
On évite d'instancier réellement CLIPEmbedder / UIDetector / VLM :
|
||||
on bypass ``__init__`` et on injecte directement les collaborateurs
|
||||
mockés. Plus rapide et plus déterministe.
|
||||
"""
|
||||
pipe = WorkflowPipeline.__new__(WorkflowPipeline)
|
||||
|
||||
# Répertoires
|
||||
pipe.data_dir = Path(tmp_path)
|
||||
pipe.workflows_dir = pipe.data_dir / "workflows"
|
||||
pipe.workflows_dir.mkdir(parents=True, exist_ok=True)
|
||||
pipe.embeddings_dir = pipe.data_dir / "embeddings"
|
||||
pipe.embeddings_dir.mkdir(parents=True, exist_ok=True)
|
||||
pipe.screenshots_dir = pipe.data_dir / "screenshots"
|
||||
pipe.screenshots_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Collaborateurs mockés
|
||||
fake_embedding = MagicMock()
|
||||
fake_embedding.get_vector.return_value = [0.0] * 8
|
||||
fake_embedding.embedding_id = "emb_test"
|
||||
|
||||
pipe.embedding_builder = MagicMock()
|
||||
pipe.embedding_builder.build.return_value = fake_embedding
|
||||
|
||||
pipe.faiss_manager = MagicMock()
|
||||
pipe.faiss_manager.search.return_value = []
|
||||
|
||||
pipe.hierarchical_matcher = MagicMock()
|
||||
|
||||
pipe.clip_embedder = MagicMock()
|
||||
pipe.fusion_engine = MagicMock()
|
||||
pipe.ui_detector = None
|
||||
pipe.vlm_client = None
|
||||
pipe.graph_builder = MagicMock()
|
||||
pipe.node_matcher = MagicMock()
|
||||
pipe.learning_manager = MagicMock()
|
||||
pipe.target_resolver = MagicMock()
|
||||
pipe.error_handler = MagicMock()
|
||||
pipe.action_executor = MagicMock()
|
||||
|
||||
pipe._workflows = {}
|
||||
pipe._temporal_context = {}
|
||||
|
||||
return pipe
|
||||
|
||||
|
||||
def _fake_hierarchical_result(
|
||||
node_id: str = "node_ok",
|
||||
confidence: float = 0.82,
|
||||
):
|
||||
"""Construit un MatchResult factice (compat avec l'API du HierarchicalMatcher)."""
|
||||
result = MagicMock()
|
||||
result.node_id = node_id
|
||||
result.confidence = confidence
|
||||
result.window_confidence = 0.9
|
||||
result.region_confidence = 0.8
|
||||
result.element_confidence = 0.85
|
||||
result.temporal_boost = 0.0
|
||||
result.matched_variant = None
|
||||
result.alternatives = []
|
||||
result.match_time_ms = 1.0
|
||||
return result
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# 1. match_current_state_from_state — chemin hiérarchique
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestMatchFromStateHierarchical:
|
||||
"""
|
||||
Quand un workflow est chargeable, on passe par le HierarchicalMatcher
|
||||
qui consomme window_title + ui_elements — c'est le cœur du Lot E.
|
||||
"""
|
||||
|
||||
def test_match_from_state_uses_provided_window_title(self, tmp_path):
|
||||
"""Le window_title fourni (Bloc-Notes) est transmis au matcher,
|
||||
pas un stub "Unknown"."""
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
pipe.load_workflow = MagicMock(return_value=MagicMock(nodes=[MagicMock()]))
|
||||
pipe.hierarchical_matcher.match.return_value = _fake_hierarchical_result()
|
||||
|
||||
state = _make_enriched_state(window_title="Bloc-Notes - Sans titre")
|
||||
result = pipe.match_current_state_from_state(state, workflow_id="wf1")
|
||||
|
||||
assert result is not None
|
||||
# Le matcher hiérarchique a été appelé avec le vrai window_title
|
||||
call_kwargs = pipe.hierarchical_matcher.match.call_args.kwargs
|
||||
window_info = call_kwargs["window_info"]
|
||||
assert window_info["title"] == "Bloc-Notes - Sans titre"
|
||||
assert window_info["window_title"] == "Bloc-Notes - Sans titre"
|
||||
# Pas de "Unknown"
|
||||
assert "Unknown" not in window_info["title"]
|
||||
|
||||
def test_match_from_state_uses_ui_elements(self, tmp_path):
|
||||
"""Les ui_elements du ScreenState sont transmis au matcher comme
|
||||
detected_elements, pas remplacés par []."""
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
pipe.load_workflow = MagicMock(return_value=MagicMock(nodes=[MagicMock()]))
|
||||
pipe.hierarchical_matcher.match.return_value = _fake_hierarchical_result()
|
||||
|
||||
# 3 éléments factices
|
||||
ui_elements = [MagicMock(), MagicMock(), MagicMock()]
|
||||
state = _make_enriched_state(ui_elements=ui_elements)
|
||||
pipe.match_current_state_from_state(state, workflow_id="wf1")
|
||||
|
||||
call_kwargs = pipe.hierarchical_matcher.match.call_args.kwargs
|
||||
passed_elements = call_kwargs["detected_elements"]
|
||||
assert len(passed_elements) == 3
|
||||
assert passed_elements == ui_elements
|
||||
|
||||
def test_match_from_state_uses_detected_text(self, tmp_path):
|
||||
"""Un ScreenState avec detected_text non vide doit être entièrement
|
||||
transmis (pas remplacé par un stub vide)."""
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
pipe.load_workflow = MagicMock(return_value=MagicMock(nodes=[MagicMock()]))
|
||||
pipe.hierarchical_matcher.match.return_value = _fake_hierarchical_result()
|
||||
|
||||
detected_text = ["Fichier", "Édition", "Affichage", "Aide"]
|
||||
state = _make_enriched_state(detected_text=detected_text)
|
||||
pipe.match_current_state_from_state(state, workflow_id="wf1")
|
||||
|
||||
# Le state lui-même n'est pas passé directement au matcher, mais il
|
||||
# ne doit pas avoir été réécrit en stub avant : on le vérifie
|
||||
# indirectement via ses propriétés conservées. Le state original
|
||||
# doit rester enrichi.
|
||||
assert state.perception.detected_text == detected_text
|
||||
assert state.perception.detected_text != []
|
||||
|
||||
def test_match_from_state_no_reconstruction(self, tmp_path):
|
||||
"""``ScreenAnalyzer.analyze`` ne doit PAS être appelé par
|
||||
``match_current_state_from_state`` — le state est déjà construit."""
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
pipe.load_workflow = MagicMock(return_value=MagicMock(nodes=[MagicMock()]))
|
||||
pipe.hierarchical_matcher.match.return_value = _fake_hierarchical_result()
|
||||
|
||||
state = _make_enriched_state()
|
||||
|
||||
# On patche get_screen_analyzer globalement : si la nouvelle méthode
|
||||
# invoque l'analyseur, le mock sera appelé. Attente : ZÉRO appel.
|
||||
with patch(
|
||||
"core.pipeline.get_screen_analyzer"
|
||||
) as mock_get_analyzer:
|
||||
fake_analyzer = MagicMock()
|
||||
mock_get_analyzer.return_value = fake_analyzer
|
||||
|
||||
pipe.match_current_state_from_state(state, workflow_id="wf1")
|
||||
|
||||
# get_screen_analyzer peut ou non être appelé (pas de garantie
|
||||
# forte), mais en tout cas analyze() ne doit PAS l'être.
|
||||
fake_analyzer.analyze.assert_not_called()
|
||||
|
||||
def test_match_from_state_below_threshold_returns_none(self, tmp_path):
|
||||
"""Si le hiérarchique rend une confidence < min_similarity, on
|
||||
retombe sur FAISS ; si FAISS ne trouve rien non plus, None."""
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
pipe.load_workflow = MagicMock(return_value=MagicMock(nodes=[MagicMock()]))
|
||||
pipe.hierarchical_matcher.match.return_value = _fake_hierarchical_result(
|
||||
confidence=0.1
|
||||
)
|
||||
pipe.faiss_manager.search.return_value = []
|
||||
|
||||
state = _make_enriched_state()
|
||||
result = pipe.match_current_state_from_state(
|
||||
state, workflow_id="wf1", min_similarity=0.5
|
||||
)
|
||||
assert result is None
|
||||
|
||||
def test_match_from_state_returns_hierarchical_metadata(self, tmp_path):
|
||||
"""Le résultat doit inclure les confidences par niveau (window,
|
||||
region, element) quand on passe par le hiérarchique."""
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
pipe.load_workflow = MagicMock(return_value=MagicMock(nodes=[MagicMock()]))
|
||||
pipe.hierarchical_matcher.match.return_value = _fake_hierarchical_result(
|
||||
node_id="node_42", confidence=0.77
|
||||
)
|
||||
|
||||
state = _make_enriched_state()
|
||||
result = pipe.match_current_state_from_state(
|
||||
state, workflow_id="wf1", min_similarity=0.5
|
||||
)
|
||||
assert result is not None
|
||||
assert result["node_id"] == "node_42"
|
||||
assert result["confidence"] == 0.77
|
||||
assert result["workflow_id"] == "wf1"
|
||||
assert result["match_type"] == "hierarchical"
|
||||
assert "window_confidence" in result
|
||||
assert "region_confidence" in result
|
||||
assert "element_confidence" in result
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# 2. match_current_state_from_state — fallback FAISS
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestMatchFromStateFAISSFallback:
|
||||
"""Si aucun workflow n'est chargeable, on tombe sur FAISS avec le state fourni."""
|
||||
|
||||
def test_fallback_faiss_when_no_workflow_id(self, tmp_path):
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
pipe.faiss_manager.search.return_value = [
|
||||
{
|
||||
"similarity": 0.91,
|
||||
"metadata": {"node_id": "n_faiss", "workflow_id": None},
|
||||
}
|
||||
]
|
||||
|
||||
state = _make_enriched_state()
|
||||
result = pipe.match_current_state_from_state(state, workflow_id=None)
|
||||
|
||||
# Pas de hiérarchique (pas de workflow_id)
|
||||
pipe.hierarchical_matcher.match.assert_not_called()
|
||||
# FAISS a reçu le vecteur calculé sur le state enrichi
|
||||
pipe.embedding_builder.build.assert_called_once_with(state)
|
||||
assert result is not None
|
||||
assert result["node_id"] == "n_faiss"
|
||||
assert result["match_type"] == "faiss"
|
||||
|
||||
def test_faiss_returns_none_below_threshold(self, tmp_path):
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
pipe.faiss_manager.search.return_value = [
|
||||
{
|
||||
"similarity": 0.6, # < 0.85
|
||||
"metadata": {"node_id": "n_low", "workflow_id": None},
|
||||
}
|
||||
]
|
||||
|
||||
state = _make_enriched_state()
|
||||
result = pipe.match_current_state_from_state(state, workflow_id=None)
|
||||
assert result is None
|
||||
|
||||
|
||||
# -----------------------------------------------------------------------------
|
||||
# 3. Wrapper legacy match_current_state(screenshot_path, ...)
|
||||
# -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestLegacyWrapper:
|
||||
"""
|
||||
L'ancienne API ``match_current_state(screenshot_path, ...)`` doit :
|
||||
1. Appeler ScreenAnalyzer.analyze pour enrichir le state.
|
||||
2. Déléguer à match_current_state_from_state.
|
||||
"""
|
||||
|
||||
def test_match_current_state_wrapper_calls_analyzer(self, tmp_path):
|
||||
"""Le wrapper legacy DOIT appeler ScreenAnalyzer.analyze."""
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
pipe.load_workflow = MagicMock(return_value=MagicMock(nodes=[MagicMock()]))
|
||||
pipe.hierarchical_matcher.match.return_value = _fake_hierarchical_result()
|
||||
|
||||
# Préparer un vrai fichier image pour le wrapper
|
||||
shot = tmp_path / "shot.png"
|
||||
Image.new("RGB", (64, 64), color=(100, 100, 100)).save(str(shot))
|
||||
|
||||
# Patcher l'analyseur partagé pour vérifier l'appel
|
||||
fake_analyzer = MagicMock()
|
||||
fake_analyzer.analyze.return_value = _make_enriched_state(
|
||||
window_title="Calc", screenshot_path=str(shot)
|
||||
)
|
||||
|
||||
with patch(
|
||||
"core.pipeline.get_screen_analyzer",
|
||||
return_value=fake_analyzer,
|
||||
):
|
||||
result = pipe.match_current_state(str(shot), workflow_id="wf1")
|
||||
|
||||
# L'analyseur a été invoqué
|
||||
fake_analyzer.analyze.assert_called_once()
|
||||
# Et on a un résultat (le hiérarchique a été appelé derrière)
|
||||
assert result is not None
|
||||
assert pipe.hierarchical_matcher.match.called
|
||||
|
||||
def test_match_current_state_wrapper_delegates_to_from_state(self, tmp_path):
|
||||
"""Le wrapper délègue bien à match_current_state_from_state."""
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
|
||||
shot = tmp_path / "shot.png"
|
||||
Image.new("RGB", (32, 32), color=(50, 50, 50)).save(str(shot))
|
||||
|
||||
fake_analyzer = MagicMock()
|
||||
fake_analyzer.analyze.return_value = _make_enriched_state(
|
||||
screenshot_path=str(shot)
|
||||
)
|
||||
|
||||
# Espionner la nouvelle méthode (elle existe, on wrap)
|
||||
with patch(
|
||||
"core.pipeline.get_screen_analyzer",
|
||||
return_value=fake_analyzer,
|
||||
), patch.object(
|
||||
pipe,
|
||||
"match_current_state_from_state",
|
||||
return_value={"node_id": "x", "workflow_id": "wf1", "confidence": 0.9},
|
||||
) as mock_from_state:
|
||||
result = pipe.match_current_state(str(shot), workflow_id="wf1")
|
||||
|
||||
mock_from_state.assert_called_once()
|
||||
# Le state passé (args ou kwargs) est bien celui renvoyé par l'analyseur
|
||||
call = mock_from_state.call_args
|
||||
passed_state = call.args[0] if call.args else call.kwargs["screen_state"]
|
||||
assert passed_state is fake_analyzer.analyze.return_value
|
||||
assert result == {"node_id": "x", "workflow_id": "wf1", "confidence": 0.9}
|
||||
|
||||
def test_wrapper_fallback_to_stub_when_analyzer_fails(self, tmp_path):
|
||||
"""Si l'analyseur est indisponible/plante, le wrapper retombe sur un
|
||||
stub minimal pour garder la rétrocompat."""
|
||||
pipe = _make_pipeline_with_mocks(tmp_path)
|
||||
|
||||
shot = tmp_path / "shot.png"
|
||||
Image.new("RGB", (32, 32)).save(str(shot))
|
||||
|
||||
with patch(
|
||||
"core.pipeline.get_screen_analyzer",
|
||||
side_effect=RuntimeError("analyzer down"),
|
||||
), patch.object(
|
||||
pipe, "match_current_state_from_state", return_value=None
|
||||
) as mock_from_state:
|
||||
pipe.match_current_state(str(shot), workflow_id="wf1", window_title="Hint")
|
||||
|
||||
mock_from_state.assert_called_once()
|
||||
# Le state passé (args ou kwargs) est un stub (hint window_title respecté)
|
||||
call = mock_from_state.call_args
|
||||
passed_state = call.args[0] if call.args else call.kwargs["screen_state"]
|
||||
assert passed_state.window.window_title == "Hint"
|
||||
Reference in New Issue
Block a user