feat(analytics): normalise API + contrat explicite get_next_action (Lot A)

Contrat get_next_action() — suppression du None ambigu :
  {"status": "selected", "edge": ..., ...}
  {"status": "terminal"}
  {"status": "blocked", "reason": "no_valid_edge" | ...}

ExecutionLoop dispatche proprement : blocked -> PAUSED + _pause_requested,
terminal -> succès légitime. Rétrocompat défensive (None legacy -> blocked).

Analytics API normalisée (kwargs-only) :
  on_execution_complete(duration_ms, status, steps_total|completed|failed)
  on_step_complete(duration_ms, ...)
  on_recovery_attempt(duration_ms, ...)

Découverte critique : les anciens appels utilisaient des méthodes et champs
inexistants (ExecutionMetrics.duration, metrics_collector.record_execution).
Le code n'avait jamais tourné au runtime — zéro analytics remontée.
L'exception était avalée par le try/except englobant.

58 tests (18 analytics + 11 contrat + 20 ExecutionLoop + 12 edge_scorer
non-régression). Migration complète, pas de pont legacy.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dom
2026-04-15 09:06:19 +02:00
parent 42f571d496
commit af4ffa189a
9 changed files with 1573 additions and 233 deletions

View File

@@ -76,7 +76,16 @@ class StepMetrics:
confidence_score: float confidence_score: float
retry_count: int = 0 retry_count: int = 0
error_details: Optional[str] = None error_details: Optional[str] = None
# C1 — Instrumentation vision-aware (ExecutionLoop)
# Ces champs proviennent de `StepResult` (core/execution/execution_loop.py).
# Tous optionnels avec valeurs par défaut pour rétrocompatibilité.
ocr_ms: float = 0.0 # Temps OCR sur ce step
ui_ms: float = 0.0 # Temps détection UI sur ce step
analyze_ms: float = 0.0 # Temps analyse ScreenState (OCR + UI + reste)
total_ms: float = 0.0 # Temps total du step (alias duration_ms)
cache_hit: bool = False # True si ScreenState vient du cache perceptuel
degraded: bool = False # True si mode dégradé (timeout analyse)
def to_dict(self) -> Dict[str, Any]: def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for storage.""" """Convert to dictionary for storage."""
return { return {
@@ -92,9 +101,15 @@ class StepMetrics:
'status': self.status, 'status': self.status,
'confidence_score': self.confidence_score, 'confidence_score': self.confidence_score,
'retry_count': self.retry_count, 'retry_count': self.retry_count,
'error_details': self.error_details 'error_details': self.error_details,
'ocr_ms': self.ocr_ms,
'ui_ms': self.ui_ms,
'analyze_ms': self.analyze_ms,
'total_ms': self.total_ms,
'cache_hit': self.cache_hit,
'degraded': self.degraded,
} }
@classmethod @classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'StepMetrics': def from_dict(cls, data: Dict[str, Any]) -> 'StepMetrics':
"""Create from dictionary.""" """Create from dictionary."""
@@ -111,7 +126,13 @@ class StepMetrics:
status=data['status'], status=data['status'],
confidence_score=data['confidence_score'], confidence_score=data['confidence_score'],
retry_count=data.get('retry_count', 0), retry_count=data.get('retry_count', 0),
error_details=data.get('error_details') error_details=data.get('error_details'),
ocr_ms=float(data.get('ocr_ms') or 0.0),
ui_ms=float(data.get('ui_ms') or 0.0),
analyze_ms=float(data.get('analyze_ms') or 0.0),
total_ms=float(data.get('total_ms') or 0.0),
cache_hit=bool(data.get('cache_hit') or False),
degraded=bool(data.get('degraded') or False),
) )

View File

@@ -1,8 +1,8 @@
"""Integration of analytics with ExecutionLoop.""" """Integration of analytics with ExecutionLoop."""
import logging import logging
from typing import Optional from typing import Any, Optional
from datetime import datetime from datetime import datetime, timedelta
import uuid import uuid
from ..analytics_system import get_analytics_system from ..analytics_system import get_analytics_system
@@ -14,17 +14,35 @@ logger = logging.getLogger(__name__)
class AnalyticsExecutionIntegration: class AnalyticsExecutionIntegration:
"""Integrate analytics collection with workflow execution.""" """Integrate analytics collection with workflow execution."""
def __init__(self, enabled: bool = True): def __init__(self, analytics_system: Any = True, enabled: Optional[bool] = None):
""" """
Initialize analytics integration. Initialize analytics integration.
Accepte deux formes d'appel pour la rétrocompatibilité :
- ``AnalyticsExecutionIntegration(enabled=True)`` → auto-load du système
- ``AnalyticsExecutionIntegration(analytics_system_instance)`` →
utilise l'instance fournie (utilisé par ExecutionLoop)
Args: Args:
enabled: Whether analytics collection is enabled analytics_system: Instance d'AnalyticsSystem pré-construite, ou
True/False pour activer/désactiver (legacy).
enabled: Legacy — si défini, prime sur analytics_system.
""" """
self.enabled = enabled # Détection de la forme d'appel
self.analytics = None if enabled is not None:
# Appel legacy explicite: AnalyticsExecutionIntegration(enabled=...)
if enabled: self.enabled = bool(enabled)
self.analytics = None
elif isinstance(analytics_system, bool):
# Appel legacy: AnalyticsExecutionIntegration(True/False)
self.enabled = analytics_system
self.analytics = None
else:
# Nouvelle forme: instance injectée
self.enabled = analytics_system is not None
self.analytics = analytics_system
if self.enabled and self.analytics is None:
try: try:
self.analytics = get_analytics_system() self.analytics = get_analytics_system()
logger.info("Analytics integration enabled") logger.info("Analytics integration enabled")
@@ -36,37 +54,50 @@ class AnalyticsExecutionIntegration:
self, self,
workflow_id: str, workflow_id: str,
execution_id: Optional[str] = None, execution_id: Optional[str] = None,
total_steps: int = 0 total_steps: int = 0,
mode: Optional[str] = None,
) -> str: ) -> str:
""" """
Called when workflow execution starts. Appelé au démarrage d'une exécution de workflow.
Args: Args:
workflow_id: Workflow identifier workflow_id: Identifiant du workflow
execution_id: Execution identifier (generated if None) execution_id: Identifiant d'exécution (généré si None)
total_steps: Total number of steps total_steps: Nombre total d'étapes prévues
mode: Mode d'exécution (OBSERVATION / COACHING / SUPERVISED /
AUTOMATIC). Propagé en contexte pour MetricsCollector.
Returns: Returns:
Execution ID Identifiant d'exécution (celui fourni ou nouvellement généré).
""" """
if not self.enabled or not self.analytics: if not self.enabled or not self.analytics:
return execution_id or str(uuid.uuid4()) return execution_id or str(uuid.uuid4())
if execution_id is None: if execution_id is None:
execution_id = str(uuid.uuid4()) execution_id = str(uuid.uuid4())
try: try:
# Start real-time tracking # Démarrage du tracking temps réel
self.analytics.realtime_analytics.track_execution( self.analytics.realtime_analytics.track_execution(
execution_id=execution_id, execution_id=execution_id,
workflow_id=workflow_id, workflow_id=workflow_id,
total_steps=total_steps total_steps=total_steps,
) )
# Ouverture de l'ExecutionMetrics côté collector (état "running").
# Cela permet à `on_execution_complete` d'appeler
# `record_execution_complete` qui clôture proprement.
context = {"mode": mode} if mode else {}
self.analytics.metrics_collector.record_execution_start(
execution_id=execution_id,
workflow_id=workflow_id,
context=context,
)
logger.debug(f"Started tracking execution: {execution_id}") logger.debug(f"Started tracking execution: {execution_id}")
except Exception as e: except Exception as e:
logger.error(f"Error starting execution tracking: {e}") logger.error(f"Error starting execution tracking: {e}")
return execution_id return execution_id
def on_step_start( def on_step_start(
@@ -101,110 +132,249 @@ class AnalyticsExecutionIntegration:
execution_id: str, execution_id: str,
workflow_id: str, workflow_id: str,
node_id: str, node_id: str,
action_type: str, *,
started_at: datetime, duration_ms: float,
completed_at: datetime,
duration: float,
success: bool, success: bool,
error_message: Optional[str] = None action_type: str = "",
started_at: Optional[datetime] = None,
completed_at: Optional[datetime] = None,
error_message: Optional[str] = None,
confidence: float = 0.0,
target_element: str = "",
retry_count: int = 0,
ocr_ms: float = 0.0,
ui_ms: float = 0.0,
analyze_ms: float = 0.0,
total_ms: float = 0.0,
cache_hit: bool = False,
degraded: bool = False,
step_id: Optional[str] = None,
) -> None: ) -> None:
""" """
Called when a step completes. Appelé à la fin d'un step.
Contrat normalisé (Lot A — avril 2026) : ``duration_ms`` est
obligatoire et en millisecondes. Plus de rétrocompat silencieuse
sur ``duration`` en secondes.
Args: Args:
execution_id: Execution identifier execution_id: Identifiant d'exécution
workflow_id: Workflow identifier workflow_id: Identifiant du workflow
node_id: Node identifier node_id: Identifiant du node
action_type: Type of action duration_ms: Durée du step en millisecondes (obligatoire)
started_at: Start timestamp success: Vrai si le step a réussi
completed_at: Completion timestamp action_type: Type d'action (``click``, ``type``, …)
duration: Duration in seconds started_at: Timestamp de début (déduit de duration_ms si None)
success: Whether step succeeded completed_at: Timestamp de fin (``now()`` si None)
error_message: Error message if failed error_message: Message d'erreur si ``success=False``
confidence: Score de matching [0, 1]
target_element: Élément ciblé (optionnel)
retry_count: Nombre de retries
ocr_ms: Temps OCR (C1)
ui_ms: Temps détection UI (C1)
analyze_ms: Temps analyse ScreenState (C1)
total_ms: Temps total du step (C1, alias duration_ms)
cache_hit: ScreenState depuis cache perceptuel (C1)
degraded: Mode dégradé activé (C1)
step_id: ID unique du step (généré si None)
""" """
if not self.enabled or not self.analytics: if not self.enabled or not self.analytics:
return return
try: try:
# Record step metrics duration_ms_final = float(duration_ms)
# Normaliser les timestamps
if completed_at is None:
completed_at = datetime.now()
if started_at is None:
started_at = completed_at - timedelta(milliseconds=duration_ms_final)
step_metrics = StepMetrics( step_metrics = StepMetrics(
step_id=step_id or f"{execution_id}:{node_id}:{completed_at.isoformat()}",
execution_id=execution_id, execution_id=execution_id,
workflow_id=workflow_id, workflow_id=workflow_id,
node_id=node_id, node_id=node_id,
action_type=action_type, action_type=action_type or "unknown",
target_element=target_element,
started_at=started_at, started_at=started_at,
completed_at=completed_at, completed_at=completed_at,
duration=duration, duration_ms=duration_ms_final,
success=success, status="completed" if success else "failed",
error_message=error_message confidence_score=float(confidence),
retry_count=retry_count,
error_details=error_message,
# C1 — vision-aware
ocr_ms=float(ocr_ms or 0.0),
ui_ms=float(ui_ms or 0.0),
analyze_ms=float(analyze_ms or 0.0),
total_ms=float(total_ms or duration_ms_final),
cache_hit=bool(cache_hit),
degraded=bool(degraded),
) )
self.analytics.metrics_collector.record_step(step_metrics) self.analytics.metrics_collector.record_step(step_metrics)
# Update real-time tracking # Tracking temps réel
self.analytics.realtime_analytics.record_step_complete( try:
execution_id=execution_id, self.analytics.realtime_analytics.record_step_complete(
success=success execution_id=execution_id,
success=success,
)
except Exception as rt_err:
logger.debug(f"Realtime tracking skipped: {rt_err}")
logger.debug(
f"Recorded step: {node_id} "
f"({'success' if success else 'failed'}, "
f"analyze_ms={analyze_ms:.0f}, cache_hit={cache_hit}, "
f"degraded={degraded})"
) )
logger.debug(f"Recorded step: {node_id} ({'success' if success else 'failed'})")
except Exception as e: except Exception as e:
logger.error(f"Error recording step completion: {e}") logger.error(f"Error recording step completion: {e}")
def on_step_result(
self,
execution_id: str,
workflow_id: str,
step_result: Any,
) -> None:
"""
Raccourci C1 — enregistre un `StepResult` complet.
Évite aux appelants d'extraire manuellement les champs vision-aware.
Utilisé par ExecutionLoop pour pousser StepResult au système analytics.
Args:
execution_id: Identifiant d'exécution
workflow_id: Identifiant de workflow
step_result: Instance de `core.execution.execution_loop.StepResult`
"""
if not self.enabled or not self.analytics:
return
action_type = "unknown"
try:
if getattr(step_result, "action_result", None) is not None:
ar = step_result.action_result
# ExecutionResult.action est optionnel selon la branche
action_type = (
getattr(ar, "action_type", None)
or getattr(ar, "action", None)
or "unknown"
)
except Exception:
action_type = "unknown"
self.on_step_complete(
execution_id=execution_id,
workflow_id=workflow_id,
node_id=getattr(step_result, "node_id", "unknown"),
action_type=str(action_type),
success=bool(getattr(step_result, "success", False)),
error_message=None
if getattr(step_result, "success", False)
else getattr(step_result, "message", None),
duration_ms=float(getattr(step_result, "duration_ms", 0.0) or 0.0),
confidence=float(getattr(step_result, "match_confidence", 0.0) or 0.0),
ocr_ms=float(getattr(step_result, "ocr_ms", 0.0) or 0.0),
ui_ms=float(getattr(step_result, "ui_ms", 0.0) or 0.0),
analyze_ms=float(getattr(step_result, "analyze_ms", 0.0) or 0.0),
total_ms=float(getattr(step_result, "total_ms", 0.0) or 0.0),
cache_hit=bool(getattr(step_result, "cache_hit", False)),
degraded=bool(getattr(step_result, "degraded", False)),
)
def on_execution_complete( def on_execution_complete(
self, self,
execution_id: str, execution_id: str,
workflow_id: str, workflow_id: str,
started_at: datetime, *,
completed_at: datetime, duration_ms: float,
duration: float,
status: str, status: str,
error_message: Optional[str] = None, steps_total: Optional[int] = None,
steps_completed: int = 0, steps_completed: int = 0,
steps_failed: int = 0 steps_failed: int = 0,
error_message: Optional[str] = None,
) -> None: ) -> None:
""" """
Called when workflow execution completes. Appelé à la fin d'une exécution de workflow.
Contrat normalisé (Lot A — avril 2026) :
- ``duration_ms`` en millisecondes, toujours. Plus de rétrocompat
silencieuse sur ``duration`` en secondes.
- ``status`` est une chaîne libre (``"completed"``, ``"failed"``,
``"stopped"``, ``"timeout"``, …). L'appelant décide.
- ``steps_total`` / ``steps_completed`` / ``steps_failed`` : noms
alignés sur le dataclass ``ExecutionMetrics``. Si ``steps_total``
n'est pas fourni, on le déduit par somme.
Args: Args:
execution_id: Execution identifier execution_id: Identifiant d'exécution
workflow_id: Workflow identifier workflow_id: Identifiant du workflow
started_at: Start timestamp duration_ms: Durée totale en millisecondes
completed_at: Completion timestamp status: Statut final (``"completed"`` / ``"failed"`` / ``"stopped"``)
duration: Duration in seconds steps_total: Nombre total de steps exécutés (tous statuts confondus)
status: Final status (success, failed, timeout) steps_completed: Nombre de steps réussis
error_message: Error message if failed steps_failed: Nombre de steps en échec
steps_completed: Number of steps completed error_message: Message d'erreur si ``status != "completed"``
steps_failed: Number of steps failed
""" """
if not self.enabled or not self.analytics: if not self.enabled or not self.analytics:
return return
# steps_total dérivé si non fourni explicitement
if steps_total is None:
steps_total = int(steps_completed) + int(steps_failed)
try: try:
# Record execution metrics collector = self.analytics.metrics_collector
execution_metrics = ExecutionMetrics(
execution_id=execution_id, # record_execution_complete clôture proprement un ExecutionMetrics
workflow_id=workflow_id, # ouvert par record_execution_start (chemin nominal via
started_at=started_at, # on_execution_start). Si l'état n'est pas présent (tests, legacy),
completed_at=completed_at, # on pousse un ExecutionMetrics synthétique directement.
duration=duration, completed_at = datetime.now()
status=status, started_at = completed_at - timedelta(milliseconds=float(duration_ms))
error_message=error_message,
steps_completed=steps_completed, active = getattr(collector, "_active_executions", None)
steps_failed=steps_failed if active is not None and execution_id in active:
) collector.record_execution_complete(
execution_id=execution_id,
self.analytics.metrics_collector.record_execution(execution_metrics) status=status,
steps_total=int(steps_total),
# Flush to ensure persistence steps_completed=int(steps_completed),
self.analytics.metrics_collector.flush() steps_failed=int(steps_failed),
error_message=error_message,
# Complete real-time tracking )
else:
# Fallback explicite : on construit directement un ExecutionMetrics
# aligné sur le dataclass (duration_ms, status, steps_*).
execution_metrics = ExecutionMetrics(
execution_id=execution_id,
workflow_id=workflow_id,
started_at=started_at,
completed_at=completed_at,
duration_ms=float(duration_ms),
status=status,
steps_total=int(steps_total),
steps_completed=int(steps_completed),
steps_failed=int(steps_failed),
error_message=error_message,
)
# Le collector n'expose pas record_execution(...) : on pousse
# dans le buffer protégé par lock pour rester cohérent.
with collector._lock:
collector._buffer.append(execution_metrics)
# Flush pour garantir la persistance immédiate
collector.flush()
# Clôture du tracking temps réel
self.analytics.realtime_analytics.complete_execution( self.analytics.realtime_analytics.complete_execution(
execution_id=execution_id, execution_id=execution_id,
status=status status=status,
) )
logger.info(f"Recorded execution: {execution_id} ({status})") logger.info(f"Recorded execution: {execution_id} ({status})")
except Exception as e: except Exception as e:
logger.error(f"Error recording execution completion: {e}") logger.error(f"Error recording execution completion: {e}")
@@ -216,39 +386,54 @@ class AnalyticsExecutionIntegration:
node_id: str, node_id: str,
strategy: str, strategy: str,
success: bool, success: bool,
duration: float duration_ms: float,
) -> None: ) -> None:
""" """
Called when self-healing attempts recovery. Appelé quand le self-healing tente une récupération.
Contrat normalisé (Lot A — avril 2026) : ``duration_ms`` en
millisecondes, cohérent avec ``on_execution_complete`` et
``on_step_complete``. Le StepMetrics construit respecte strictement
le dataclass (``status``, ``duration_ms``, ``error_details``,
``confidence_score``, ``target_element``, ``step_id``).
Args: Args:
execution_id: Execution identifier execution_id: Identifiant d'exécution
workflow_id: Workflow identifier workflow_id: Identifiant du workflow
node_id: Node identifier node_id: Node où la récupération est tentée
strategy: Recovery strategy used strategy: Stratégie de récupération employée
success: Whether recovery succeeded success: Vrai si la récupération a réussi
duration: Recovery duration duration_ms: Durée de la tentative en millisecondes
""" """
if not self.enabled or not self.analytics: if not self.enabled or not self.analytics:
return return
try: try:
# Record as a special step metric now = datetime.now()
started_at = now - timedelta(milliseconds=float(duration_ms))
recovery_metrics = StepMetrics( recovery_metrics = StepMetrics(
step_id=f"{execution_id}:{node_id}:recovery:{now.isoformat()}",
execution_id=execution_id, execution_id=execution_id,
workflow_id=workflow_id, workflow_id=workflow_id,
node_id=f"{node_id}_recovery", node_id=f"{node_id}_recovery",
action_type=f"recovery_{strategy}", action_type=f"recovery_{strategy}",
started_at=datetime.now(), target_element="",
completed_at=datetime.now(), started_at=started_at,
duration=duration, completed_at=now,
success=success, duration_ms=float(duration_ms),
error_message=None if success else f"Recovery failed: {strategy}" status="completed" if success else "failed",
confidence_score=0.0,
retry_count=0,
error_details=None if success else f"Recovery failed: {strategy}",
) )
self.analytics.metrics_collector.record_step(recovery_metrics) self.analytics.metrics_collector.record_step(recovery_metrics)
logger.debug(f"Recorded recovery: {strategy} ({'success' if success else 'failed'})") logger.debug(
f"Recorded recovery: {strategy} "
f"({'success' if success else 'failed'})"
)
except Exception as e: except Exception as e:
logger.error(f"Error recording recovery attempt: {e}") logger.error(f"Error recording recovery attempt: {e}")

View File

@@ -42,6 +42,8 @@ class TimeSeriesStore:
ON execution_metrics(started_at); ON execution_metrics(started_at);
-- Step metrics table -- Step metrics table
-- Les colonnes ocr_ms, ui_ms, analyze_ms, total_ms, cache_hit, degraded
-- proviennent de l'instrumentation vision-aware (C1) de ExecutionLoop.
CREATE TABLE IF NOT EXISTS step_metrics ( CREATE TABLE IF NOT EXISTS step_metrics (
step_id TEXT PRIMARY KEY, step_id TEXT PRIMARY KEY,
execution_id TEXT NOT NULL, execution_id TEXT NOT NULL,
@@ -56,6 +58,12 @@ class TimeSeriesStore:
confidence_score REAL, confidence_score REAL,
retry_count INTEGER DEFAULT 0, retry_count INTEGER DEFAULT 0,
error_details TEXT, error_details TEXT,
ocr_ms REAL DEFAULT 0.0,
ui_ms REAL DEFAULT 0.0,
analyze_ms REAL DEFAULT 0.0,
total_ms REAL DEFAULT 0.0,
cache_hit INTEGER DEFAULT 0,
degraded INTEGER DEFAULT 0,
FOREIGN KEY (execution_id) REFERENCES execution_metrics(execution_id) FOREIGN KEY (execution_id) REFERENCES execution_metrics(execution_id)
); );
@@ -101,11 +109,40 @@ class TimeSeriesStore:
logger.info(f"TimeSeriesStore initialized at {self.db_path}") logger.info(f"TimeSeriesStore initialized at {self.db_path}")
# Colonnes ajoutées ultérieurement — appliquées via ALTER TABLE si absentes.
# (C1 — instrumentation vision-aware, avril 2026)
_STEP_METRICS_MIGRATIONS = [
("ocr_ms", "REAL DEFAULT 0.0"),
("ui_ms", "REAL DEFAULT 0.0"),
("analyze_ms", "REAL DEFAULT 0.0"),
("total_ms", "REAL DEFAULT 0.0"),
("cache_hit", "INTEGER DEFAULT 0"),
("degraded", "INTEGER DEFAULT 0"),
]
def _init_database(self) -> None: def _init_database(self) -> None:
"""Initialize database schema.""" """Initialize database schema and apply lightweight migrations."""
with self._get_connection() as conn: with self._get_connection() as conn:
conn.executescript(self.SCHEMA) conn.executescript(self.SCHEMA)
self._migrate_step_metrics(conn)
conn.commit() conn.commit()
def _migrate_step_metrics(self, conn: sqlite3.Connection) -> None:
"""Ajoute les colonnes C1 sur une base `step_metrics` pré-existante."""
cursor = conn.execute("PRAGMA table_info(step_metrics)")
existing = {row[1] for row in cursor.fetchall()}
for column, ddl in self._STEP_METRICS_MIGRATIONS:
if column not in existing:
try:
conn.execute(
f"ALTER TABLE step_metrics ADD COLUMN {column} {ddl}"
)
logger.info(
f"Migration step_metrics: ajout colonne {column}"
)
except sqlite3.OperationalError as e:
# Collision bénigne (colonne déjà ajoutée par un autre process)
logger.debug(f"Migration colonne {column} ignorée: {e}")
@contextmanager @contextmanager
def _get_connection(self): def _get_connection(self):
@@ -164,13 +201,14 @@ class TimeSeriesStore:
)) ))
def _write_step_metric(self, conn: sqlite3.Connection, metric: StepMetrics) -> None: def _write_step_metric(self, conn: sqlite3.Connection, metric: StepMetrics) -> None:
"""Write step metric.""" """Write step metric (inclut les champs vision-aware C1)."""
conn.execute(""" conn.execute("""
INSERT OR REPLACE INTO step_metrics INSERT OR REPLACE INTO step_metrics
(step_id, execution_id, workflow_id, node_id, action_type, target_element, (step_id, execution_id, workflow_id, node_id, action_type, target_element,
started_at, completed_at, duration_ms, status, confidence_score, started_at, completed_at, duration_ms, status, confidence_score,
retry_count, error_details) retry_count, error_details,
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ocr_ms, ui_ms, analyze_ms, total_ms, cache_hit, degraded)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", ( """, (
metric.step_id, metric.step_id,
metric.execution_id, metric.execution_id,
@@ -184,7 +222,13 @@ class TimeSeriesStore:
metric.status, metric.status,
metric.confidence_score, metric.confidence_score,
metric.retry_count, metric.retry_count,
metric.error_details metric.error_details,
getattr(metric, 'ocr_ms', 0.0),
getattr(metric, 'ui_ms', 0.0),
getattr(metric, 'analyze_ms', 0.0),
getattr(metric, 'total_ms', 0.0),
1 if getattr(metric, 'cache_hit', False) else 0,
1 if getattr(metric, 'degraded', False) else 0,
)) ))
def _write_resource_metric(self, conn: sqlite3.Connection, metric: ResourceMetrics) -> None: def _write_resource_metric(self, conn: sqlite3.Connection, metric: ResourceMetrics) -> None:

View File

@@ -354,66 +354,306 @@ class WorkflowPipeline:
# ========================================================================= # =========================================================================
# Mode MATCHING : Reconnaissance de l'état actuel # Mode MATCHING : Reconnaissance de l'état actuel
# ========================================================================= # =========================================================================
def match_current_state_from_state(
self,
screen_state: ScreenState,
workflow_id: Optional[str] = None,
*,
min_similarity: float = 0.5,
) -> Optional[Dict[str, Any]]:
"""
Matcher un ``ScreenState`` enrichi contre les nodes d'un workflow.
Lot E — premier vrai matching context-aware. Cette méthode consomme
directement le ``ScreenState`` déjà construit par ``ExecutionLoop``
(avec ``window_title``, ``detected_text`` et ``ui_elements``
renseignés par le ``ScreenAnalyzer``) au lieu de reconstruire un
stub vide avec ``window_title="Unknown"``.
Stratégie :
1. Si le ``HierarchicalMatcher`` est disponible ET que le workflow
cible est chargeable, on privilégie le matching multi-niveau
(fenêtre → région → élément) qui exploite pleinement les
``ui_elements`` et le ``window_title``.
2. Sinon on retombe sur le matching par embedding via FAISS
(même logique que l'ancien ``match_current_state``, mais avec
le ``ScreenState`` fourni, pas un stub).
Args:
screen_state: ``ScreenState`` complet (ui_elements + detected_text
+ window_info) construit en amont par l'``ExecutionLoop``.
workflow_id: ID du workflow cible (tous si None).
min_similarity: seuil minimum de confidence pour considérer un
match valide. Conserve la sémantique historique (0.5 pour
le hiérarchique, 0.85 pour le FAISS fallback).
Returns:
Dict avec ``node_id``, ``workflow_id``, ``confidence`` (+ détails
du matching hiérarchique si applicable), ou ``None`` si aucun
match ne dépasse le seuil.
"""
logger.debug(
"Matching ScreenState (app=%s, title=%s, ui_elements=%d, "
"detected_text=%d)",
screen_state.window.app_name,
screen_state.window.window_title,
len(screen_state.ui_elements),
len(screen_state.perception.detected_text),
)
# --- Stratégie 1 : matching hiérarchique si workflow disponible ---
if workflow_id:
workflow = self.load_workflow(workflow_id)
if workflow is not None and getattr(workflow, "nodes", None):
try:
hier_result = self._match_hierarchical_from_state(
screen_state=screen_state,
workflow=workflow,
workflow_id=workflow_id,
min_similarity=min_similarity,
)
if hier_result is not None:
return hier_result
except Exception as exc:
# Ne jamais casser le matching sur une erreur du
# matcher hiérarchique : on retombe sur FAISS.
logger.debug(
f"Hierarchical matching failed, fallback FAISS: {exc}"
)
# --- Stratégie 2 : fallback embedding + FAISS ---
return self._match_via_faiss(
screen_state=screen_state,
workflow_id=workflow_id,
min_similarity=min_similarity,
)
def _match_hierarchical_from_state(
self,
screen_state: ScreenState,
workflow: Workflow,
workflow_id: str,
min_similarity: float,
) -> Optional[Dict[str, Any]]:
"""
Déléguer le matching au ``HierarchicalMatcher`` en extrayant
``window_info``, ``detected_elements`` et le screenshot à partir du
``ScreenState`` fourni. Factorise la logique de ``match_hierarchical``
sans re-ouvrir l'image si ce n'est pas nécessaire.
"""
# Reconstruire window_info à partir du ScreenState (pas "Unknown")
window_info = {
"title": screen_state.window.window_title,
"app_name": screen_state.window.app_name,
"window_title": screen_state.window.window_title,
}
detected_elements = list(screen_state.ui_elements)
# Ouvrir le screenshot si nécessaire (le matcher peut en avoir besoin
# pour du matching au niveau région). Si le chemin n'existe pas, on
# passe None et laisse le matcher travailler avec window + elements.
screenshot = None
path = screen_state.raw.screenshot_path
if path:
try:
from PIL import Image
screenshot = Image.open(path)
except Exception as exc:
logger.debug(f"Screenshot unavailable for hierarchical match: {exc}")
# Contexte temporel par workflow
if workflow_id not in self._temporal_context:
self._temporal_context[workflow_id] = TemporalContext()
temporal_context = self._temporal_context[workflow_id]
result: MatchResult = self.hierarchical_matcher.match(
screenshot=screenshot,
workflow=workflow,
window_info=window_info,
detected_elements=detected_elements,
temporal_context=temporal_context,
)
if result.confidence < min_similarity:
logger.debug(
f"Hierarchical match below threshold: {result.confidence:.3f} "
f"(min={min_similarity})"
)
return None
# Mémoriser le match pour le boost temporel suivant
temporal_context.add_match(result.node_id, result.confidence)
return {
"node_id": result.node_id,
"workflow_id": workflow_id,
"confidence": result.confidence,
"window_confidence": result.window_confidence,
"region_confidence": result.region_confidence,
"element_confidence": result.element_confidence,
"temporal_boost": result.temporal_boost,
"matched_variant": result.matched_variant,
"alternatives": [
{"node_id": alt.node_id, "confidence": alt.confidence}
for alt in result.alternatives
],
"match_time_ms": result.match_time_ms,
"match_type": "hierarchical",
}
def _match_via_faiss(
self,
screen_state: ScreenState,
workflow_id: Optional[str],
min_similarity: float,
) -> Optional[Dict[str, Any]]:
"""
Fallback embedding + recherche FAISS. On réutilise le ``ScreenState``
fourni (donc ses ``ui_elements`` et son ``window_title`` réels)
au lieu d'en recréer un stub.
"""
# Le seuil FAISS historique était 0.85. On l'honore comme plancher
# par défaut mais on respecte un ``min_similarity`` plus permissif
# si l'appelant en fournit un (hiérarchique pouvant déjà avoir échoué).
threshold = max(min_similarity, 0.85)
state_embedding = self.embedding_builder.build(screen_state)
query_vector = state_embedding.get_vector()
results = self.faiss_manager.search(query_vector, k=5)
if not results:
logger.debug("No match found in FAISS")
return None
for result in results:
metadata = result.get("metadata", {})
result_workflow_id = metadata.get("workflow_id")
if workflow_id and result_workflow_id != workflow_id:
continue
similarity = result.get("similarity", 0)
if similarity >= threshold:
return {
"node_id": metadata.get("node_id"),
"workflow_id": result_workflow_id,
"confidence": similarity,
"state_embedding_id": state_embedding.embedding_id,
"match_type": "faiss",
}
logger.debug(
f"Best FAISS match below threshold: "
f"{results[0].get('similarity', 0):.3f} (min={threshold})"
)
return None
def match_current_state( def match_current_state(
self, self,
screenshot_path: str, screenshot_path: str,
workflow_id: Optional[str] = None, workflow_id: Optional[str] = None,
window_title: Optional[str] = None window_title: Optional[str] = None,
) -> Optional[Dict[str, Any]]: ) -> Optional[Dict[str, Any]]:
""" """
Identifier dans quel node se trouve l'écran actuel. Identifier dans quel node se trouve l'écran actuel (API legacy).
Lot E — cette méthode est désormais un **wrapper** de rétrocompat :
elle construit un ``ScreenState`` enrichi via ``ScreenAnalyzer``
(au lieu d'un stub avec ``window_title="Unknown"``) puis délègue
à ``match_current_state_from_state``. Garantit la compat pour les
callers externes qui ne manipulent que le chemin du screenshot.
Args: Args:
screenshot_path: Chemin vers le screenshot actuel screenshot_path: Chemin vers le screenshot actuel.
workflow_id: ID du workflow à matcher (tous si None) workflow_id: ID du workflow à matcher (tous si None).
window_title: Titre de fenêtre pour contexte window_title: Titre de fenêtre pour contexte (utilisé comme
hint si le ScreenAnalyzer n'est pas disponible).
Returns: Returns:
Dict avec node_id, workflow_id, confidence, ou None si pas de match Dict avec ``node_id``, ``workflow_id``, ``confidence``, ou
``None`` si pas de match.
""" """
logger.debug(f"Matching screenshot: {screenshot_path}") logger.debug(f"Matching screenshot: {screenshot_path}")
# Créer un ScreenState temporaire # Construire un ScreenState enrichi via le ScreenAnalyzer partagé.
screen_state = self._build_screen_state_for_matching(
screenshot_path=screenshot_path,
workflow_id=workflow_id,
window_title=window_title,
)
return self.match_current_state_from_state(
screen_state=screen_state,
workflow_id=workflow_id,
)
def _build_screen_state_for_matching(
self,
screenshot_path: str,
workflow_id: Optional[str],
window_title: Optional[str],
) -> ScreenState:
"""
Construire un ``ScreenState`` pour l'API legacy ``match_current_state``.
Tente d'utiliser le ``ScreenAnalyzer`` partagé ; en cas d'échec,
retombe sur un stub minimaliste (équivalent fonctionnel de l'ancien
comportement, mais clairement isolé ici).
"""
from core.models.screen_state import ( from core.models.screen_state import (
WindowContext, RawLevel, PerceptionLevel, ContextLevel, EmbeddingRef WindowContext, RawLevel, PerceptionLevel, ContextLevel, EmbeddingRef
) )
screenshot_path = Path(screenshot_path) path = Path(screenshot_path)
# Tentative 1 : ScreenAnalyzer partagé (résultat enrichi)
try:
from core.pipeline import get_screen_analyzer
analyzer = get_screen_analyzer()
if analyzer is not None:
window_info = None
if window_title:
window_info = {"title": window_title, "app_name": "unknown"}
return analyzer.analyze(
str(path),
window_info=window_info,
)
except Exception as exc:
logger.debug(
f"ScreenAnalyzer unavailable in match_current_state wrapper: {exc}"
)
# Tentative 2 : stub minimal (comportement legacy d'urgence)
window = WindowContext( window = WindowContext(
app_name="unknown", app_name="unknown",
window_title=window_title or "Unknown", window_title=window_title or "Unknown",
screen_resolution=[1920, 1080], screen_resolution=[1920, 1080],
workspace="main" workspace="main",
) )
raw = RawLevel( raw = RawLevel(
screenshot_path=str(screenshot_path), screenshot_path=str(path),
capture_method="manual", capture_method="manual",
file_size_bytes=screenshot_path.stat().st_size if screenshot_path.exists() else 0 file_size_bytes=path.stat().st_size if path.exists() else 0,
) )
perception = PerceptionLevel( perception = PerceptionLevel(
embedding=EmbeddingRef( embedding=EmbeddingRef(
provider="openclip_ViT-B-32", provider="openclip_ViT-B-32",
vector_id="temp", vector_id="temp",
dimensions=512 dimensions=512,
), ),
detected_text=[], detected_text=[],
text_detection_method="pending", text_detection_method="pending",
confidence_avg=0.0 confidence_avg=0.0,
) )
context = ContextLevel( context = ContextLevel(
current_workflow_candidate=workflow_id, current_workflow_candidate=workflow_id,
workflow_step=None, workflow_step=None,
user_id="matcher", user_id="matcher",
tags=[], tags=[],
business_variables={} business_variables={},
) )
return ScreenState(
current_state = ScreenState(
screen_state_id=f"match_{datetime.now().strftime('%Y%m%d_%H%M%S')}", screen_state_id=f"match_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
timestamp=datetime.now(), timestamp=datetime.now(),
session_id="matching", session_id="matching",
@@ -421,39 +661,8 @@ class WorkflowPipeline:
raw=raw, raw=raw,
perception=perception, perception=perception,
context=context, context=context,
ui_elements=[] ui_elements=[],
) )
# Calculer embedding
state_embedding = self.embedding_builder.build(current_state)
query_vector = state_embedding.get_vector()
# Rechercher dans FAISS
results = self.faiss_manager.search(query_vector, k=5)
if not results:
logger.debug("No match found in FAISS")
return None
# Filtrer par workflow si spécifié
for result in results:
metadata = result.get("metadata", {})
result_workflow_id = metadata.get("workflow_id")
if workflow_id and result_workflow_id != workflow_id:
continue
similarity = result.get("similarity", 0)
if similarity >= 0.85: # Seuil de matching
return {
"node_id": metadata.get("node_id"),
"workflow_id": result_workflow_id,
"confidence": similarity,
"state_embedding_id": state_embedding.embedding_id
}
logger.debug(f"Best match below threshold: {results[0].get('similarity', 0):.3f}")
return None
def match_hierarchical( def match_hierarchical(
self, self,
@@ -548,17 +757,56 @@ class WorkflowPipeline:
def get_next_action( def get_next_action(
self, self,
workflow_id: str, workflow_id: str,
current_node_id: str current_node_id: str,
) -> Optional[Dict[str, Any]]: screen_state: Optional[ScreenState] = None,
strategy: str = "best",
source_similarity: float = 1.0,
) -> Dict[str, Any]:
""" """
Obtenir la prochaine action à exécuter. Obtenir la prochaine action à exécuter.
Contrat normalisé (Lot A — avril 2026) : retourne **toujours** un
dict avec une clé ``status`` non-ambiguë. Le ``None`` ambigu qui
confondait "workflow terminé" et "aucun edge valide" a été
supprimé : l'appelant (ExecutionLoop) peut désormais distinguer
ces cas pour déclencher une pause supervisée plutôt qu'une fin
de workflow faux-positive.
Sélection d'edge (C3) :
- Filtre dur sur ``pre_conditions`` (EdgeConstraints)
- Ranking par score composite (success_rate, target_match, recency)
- Tiebreak : success_rate le plus haut
Args: Args:
workflow_id: ID du workflow workflow_id: ID du workflow
current_node_id: ID du node actuel current_node_id: ID du node actuel
screen_state: État courant, requis pour évaluer les
``pre_conditions`` et le match ``target_spec``. Si None,
fallback sur la logique sans filtre de contraintes.
strategy: ``"best"`` (défaut, scoring complet) ou ``"first"``
(mode legacy, premier edge sans tri)
source_similarity: confiance du matching (``match_current_state``)
qui a identifié ``current_node_id``. Propagée à l'EdgeScorer
pour activer la précondition ``min_source_similarity`` des
edges. Défaut ``1.0`` pour compat avec les appelants qui
ne la fournissent pas encore (Lot B — avril 2026).
Returns: Returns:
Dict avec action, target_node, confidence, ou None Dict avec l'une des formes suivantes :
- ``{"status": "selected", "edge_id": str, "action": dict,
"target_node": str, "confidence": float, "score": float}``
→ edge sélectionné, l'ExecutionLoop doit l'exécuter.
- ``{"status": "terminal"}`` → le node courant n'a pas
d'outgoing_edge (fin légitime de workflow).
- ``{"status": "blocked", "reason": str}`` → il existe des
outgoing_edges mais aucun ne satisfait les conditions
(``reason="no_valid_edge"``), ou le workflow est introuvable
(``reason="workflow_not_found"``). L'ExecutionLoop doit
déclencher une pause supervisée et ne **jamais** traiter
ce cas comme un succès.
""" """
workflow = self._workflows.get(workflow_id) workflow = self._workflows.get(workflow_id)
if not workflow: if not workflow:
@@ -569,23 +817,44 @@ class WorkflowPipeline:
self._workflows[workflow_id] = workflow self._workflows[workflow_id] = workflow
else: else:
logger.error(f"Workflow not found: {workflow_id}") logger.error(f"Workflow not found: {workflow_id}")
return None return {"status": "blocked", "reason": "workflow_not_found"}
# Trouver les edges sortants du node actuel # Trouver les edges sortants du node actuel
outgoing_edges = workflow.get_outgoing_edges(current_node_id) outgoing_edges = workflow.get_outgoing_edges(current_node_id)
if not outgoing_edges: if not outgoing_edges:
# Aucun outgoing_edge = fin légitime du workflow
logger.info(f"No outgoing edges from node {current_node_id}") logger.info(f"No outgoing edges from node {current_node_id}")
return None return {"status": "terminal"}
# Pour l'instant, prendre le premier edge (TODO: logique de sélection) # Sélection robuste via EdgeScorer (C3)
edge = outgoing_edges[0] from core.pipeline.edge_scorer import EdgeScorer
scorer = EdgeScorer()
edge = scorer.select_best(
outgoing_edges,
screen_state=screen_state,
strategy=strategy,
source_similarity=source_similarity,
)
if edge is None:
# Il y avait des candidats mais aucun n'a passé les filtres.
# On NE retourne PAS "terminal" : l'ExecutionLoop doit traiter
# ce cas comme un blocage et demander de l'aide.
logger.warning(
f"No valid edge from {current_node_id} "
f"({len(outgoing_edges)} candidates rejected)"
)
return {"status": "blocked", "reason": "no_valid_edge"}
return { return {
"status": "selected",
"edge_id": edge.edge_id, "edge_id": edge.edge_id,
"action": edge.action.to_dict(), "action": edge.action.to_dict(),
"target_node": edge.to_node, "target_node": edge.to_node,
"confidence": edge.stats.success_rate if edge.stats else 1.0 "confidence": edge.stats.success_rate if edge.stats else 1.0,
"score": edge.stats.success_rate if edge.stats else 1.0,
} }
def should_execute_automatically(self, workflow_id: str) -> bool: def should_execute_automatically(self, workflow_id: str) -> bool:
@@ -759,10 +1028,11 @@ class WorkflowPipeline:
current_node_id = match_result["node_id"] current_node_id = match_result["node_id"]
logger.info(f"Matched current state to node: {current_node_id} (confidence: {match_result['confidence']:.3f})") logger.info(f"Matched current state to node: {current_node_id} (confidence: {match_result['confidence']:.3f})")
# 2. Obtenir la prochaine action # 2. Obtenir la prochaine action (contrat dict avec status explicite)
action_info = self.get_next_action(workflow_id, current_node_id) action_info = self.get_next_action(workflow_id, current_node_id)
action_status = action_info.get("status")
if not action_info:
if action_status == "terminal":
return { return {
"execution_id": execution_id, "execution_id": execution_id,
"workflow_id": workflow_id, "workflow_id": workflow_id,
@@ -771,9 +1041,21 @@ class WorkflowPipeline:
"message": "Workflow completed - no more actions", "message": "Workflow completed - no more actions",
"current_node": current_node_id, "current_node": current_node_id,
"execution_time_ms": (datetime.now() - start_time).total_seconds() * 1000, "execution_time_ms": (datetime.now() - start_time).total_seconds() * 1000,
"correlation_id": execution_id "correlation_id": execution_id,
} }
if action_status == "blocked":
return {
"execution_id": execution_id,
"workflow_id": workflow_id,
"success": False,
"step_type": "action_selection",
"error": f"No valid edge: {action_info.get('reason', 'unknown')}",
"current_node": current_node_id,
"execution_time_ms": (datetime.now() - start_time).total_seconds() * 1000,
"correlation_id": execution_id,
}
logger.info(f"Next action: {action_info['action']['type']} -> {action_info['target_node']}") logger.info(f"Next action: {action_info['action']['type']} -> {action_info['target_node']}")
# 3. Charger le workflow pour obtenir l'edge complet # 3. Charger le workflow pour obtenir l'edge complet

View File

@@ -125,25 +125,47 @@ class WorkflowPipelineEnhanced:
current_node_id = match_result["node_id"] current_node_id = match_result["node_id"]
logger.info(f"Matched current state to node: {current_node_id} (confidence: {match_result['confidence']:.3f})") logger.info(f"Matched current state to node: {current_node_id} (confidence: {match_result['confidence']:.3f})")
# 2. Obtenir la prochaine action # 2. Obtenir la prochaine action (contrat dict avec status explicite)
action_info = self.get_next_action(workflow_id, current_node_id) action_info = self.get_next_action(workflow_id, current_node_id)
action_status = action_info.get("status")
if not action_info:
# Workflow terminé if action_status == "terminal":
# Workflow terminé (aucun outgoing_edge = fin légitime)
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000 performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
result = WorkflowExecutionResult.workflow_complete( result = WorkflowExecutionResult.workflow_complete(
execution_id=execution_id, execution_id=execution_id,
workflow_id=workflow_id, workflow_id=workflow_id,
current_node=current_node_id, current_node=current_node_id,
performance_metrics=performance_metrics performance_metrics=performance_metrics,
) )
result.correlation_id = correlation_id result.correlation_id = correlation_id
result.match_result = match_result result.match_result = match_result
logger.info(f"Workflow {workflow_id} completed at node {current_node_id}") logger.info(f"Workflow {workflow_id} completed at node {current_node_id}")
return result return result
if action_status == "blocked":
# Des edges existent mais aucun ne passe les filtres :
# c'est un blocage, pas une fin de workflow.
performance_metrics.total_execution_time_ms = (datetime.now() - start_time).total_seconds() * 1000
result = WorkflowExecutionResult.error(
execution_id=execution_id,
workflow_id=workflow_id,
error_message=f"No valid edge: {action_info.get('reason', 'unknown')}",
step_type="action_selection",
current_node=current_node_id,
performance_metrics=performance_metrics,
)
result.correlation_id = correlation_id
logger.warning(
f"Workflow {workflow_id} blocked at node {current_node_id}: "
f"{action_info.get('reason')}"
)
return result
logger.info(f"Next action: {action_info['action']['type']} -> {action_info['target_node']}") logger.info(f"Next action: {action_info['action']['type']} -> {action_info['target_node']}")
# 3. Charger le workflow pour obtenir l'edge complet # 3. Charger le workflow pour obtenir l'edge complet

View File

@@ -96,14 +96,16 @@ class TestWorkflowPipelineEnhanced:
"confidence": 0.92 "confidence": 0.92
} }
# Mock de l'action suivante # Mock de l'action suivante (contrat dict normalisé Lot A)
mock_workflow_pipeline.get_next_action.return_value = { mock_workflow_pipeline.get_next_action.return_value = {
"status": "selected",
"edge_id": "edge_1", "edge_id": "edge_1",
"action": {"type": "click", "target": "button"}, "action": {"type": "click", "target": "button"},
"target_node": "node_2", "target_node": "node_2",
"confidence": 0.95 "confidence": 0.95,
"score": 0.95,
} }
# Mock du workflow # Mock du workflow
mock_workflow = Mock(spec=Workflow) mock_workflow = Mock(spec=Workflow)
mock_edge = Mock(spec=WorkflowEdge) mock_edge = Mock(spec=WorkflowEdge)
@@ -112,7 +114,7 @@ class TestWorkflowPipelineEnhanced:
mock_edge.to_node = "node_2" mock_edge.to_node = "node_2"
mock_workflow.edges = [mock_edge] mock_workflow.edges = [mock_edge]
mock_workflow_pipeline.load_workflow.return_value = mock_workflow mock_workflow_pipeline.load_workflow.return_value = mock_workflow
# Mock du résultat d'exécution # Mock du résultat d'exécution
mock_execution_result = Mock(spec=ExecutionResult) mock_execution_result = Mock(spec=ExecutionResult)
mock_execution_result.status = ExecutionStatus.SUCCESS mock_execution_result.status = ExecutionStatus.SUCCESS
@@ -121,24 +123,24 @@ class TestWorkflowPipelineEnhanced:
mock_execution_result.target_resolved = None mock_execution_result.target_resolved = None
mock_execution_result.error = None mock_execution_result.error = None
mock_workflow_pipeline.action_executor.execute_edge.return_value = mock_execution_result mock_workflow_pipeline.action_executor.execute_edge.return_value = mock_execution_result
# Créer l'instance enhanced # Créer l'instance enhanced
enhanced = WorkflowPipelineEnhanced() enhanced = WorkflowPipelineEnhanced()
# Lier les méthodes du pipeline mock # Lier les méthodes du pipeline mock
enhanced.match_current_state = mock_workflow_pipeline.match_current_state enhanced.match_current_state = mock_workflow_pipeline.match_current_state
enhanced.get_next_action = mock_workflow_pipeline.get_next_action enhanced.get_next_action = mock_workflow_pipeline.get_next_action
enhanced.load_workflow = mock_workflow_pipeline.load_workflow enhanced.load_workflow = mock_workflow_pipeline.load_workflow
enhanced.action_executor = mock_workflow_pipeline.action_executor enhanced.action_executor = mock_workflow_pipeline.action_executor
enhanced.error_handler = mock_workflow_pipeline.error_handler enhanced.error_handler = mock_workflow_pipeline.error_handler
# Act # Act
result = enhanced.execute_workflow_step_enhanced( result = enhanced.execute_workflow_step_enhanced(
workflow_id=workflow_id, workflow_id=workflow_id,
current_state=mock_screen_state, current_state=mock_screen_state,
context={"test_context": "value"} context={"test_context": "value"}
) )
# Assert # Assert
assert isinstance(result, WorkflowExecutionResult) assert isinstance(result, WorkflowExecutionResult)
assert result.success is True assert result.success is True
@@ -242,7 +244,8 @@ class TestWorkflowPipelineEnhanced:
} }
# Mock de l'action suivante (pas d'action = workflow terminé) # Mock de l'action suivante (pas d'action = workflow terminé)
mock_workflow_pipeline.get_next_action.return_value = None # Contrat dict normalisé Lot A : status="terminal" pour fin légitime
mock_workflow_pipeline.get_next_action.return_value = {"status": "terminal"}
# Créer l'instance enhanced # Créer l'instance enhanced
enhanced = WorkflowPipelineEnhanced() enhanced = WorkflowPipelineEnhanced()
@@ -347,14 +350,16 @@ class TestWorkflowPipelineEnhanced:
"confidence": 0.92 "confidence": 0.92
} }
# Mock de l'action suivante # Mock de l'action suivante (contrat dict normalisé Lot A)
mock_workflow_pipeline.get_next_action.return_value = { mock_workflow_pipeline.get_next_action.return_value = {
"status": "selected",
"edge_id": "edge_1", "edge_id": "edge_1",
"action": {"type": "click", "target": "button"}, "action": {"type": "click", "target": "button"},
"target_node": "node_2", "target_node": "node_2",
"confidence": 0.95 "confidence": 0.95,
"score": 0.95,
} }
# Mock du workflow # Mock du workflow
mock_workflow = Mock(spec=Workflow) mock_workflow = Mock(spec=Workflow)
mock_edge = Mock(spec=WorkflowEdge) mock_edge = Mock(spec=WorkflowEdge)
@@ -363,7 +368,7 @@ class TestWorkflowPipelineEnhanced:
mock_edge.to_node = "node_2" mock_edge.to_node = "node_2"
mock_workflow.edges = [mock_edge] mock_workflow.edges = [mock_edge]
mock_workflow_pipeline.load_workflow.return_value = mock_workflow mock_workflow_pipeline.load_workflow.return_value = mock_workflow
# Mock du résultat d'exécution # Mock du résultat d'exécution
mock_execution_result = Mock(spec=ExecutionResult) mock_execution_result = Mock(spec=ExecutionResult)
mock_execution_result.status = ExecutionStatus.SUCCESS mock_execution_result.status = ExecutionStatus.SUCCESS
@@ -372,17 +377,17 @@ class TestWorkflowPipelineEnhanced:
mock_execution_result.target_resolved = None mock_execution_result.target_resolved = None
mock_execution_result.error = None mock_execution_result.error = None
mock_workflow_pipeline.action_executor.execute_edge.return_value = mock_execution_result mock_workflow_pipeline.action_executor.execute_edge.return_value = mock_execution_result
# Créer l'instance enhanced # Créer l'instance enhanced
enhanced = WorkflowPipelineEnhanced() enhanced = WorkflowPipelineEnhanced()
# Lier les méthodes du pipeline mock # Lier les méthodes du pipeline mock
enhanced.match_current_state = mock_workflow_pipeline.match_current_state enhanced.match_current_state = mock_workflow_pipeline.match_current_state
enhanced.get_next_action = mock_workflow_pipeline.get_next_action enhanced.get_next_action = mock_workflow_pipeline.get_next_action
enhanced.load_workflow = mock_workflow_pipeline.load_workflow enhanced.load_workflow = mock_workflow_pipeline.load_workflow
enhanced.action_executor = mock_workflow_pipeline.action_executor enhanced.action_executor = mock_workflow_pipeline.action_executor
enhanced.error_handler = mock_workflow_pipeline.error_handler enhanced.error_handler = mock_workflow_pipeline.error_handler
# Act # Act
result = enhanced.execute_workflow_step_enhanced( result = enhanced.execute_workflow_step_enhanced(
workflow_id=workflow_id, workflow_id=workflow_id,

View File

@@ -0,0 +1,520 @@
"""
Tests unitaires pour la remontée des champs vision-aware (C1) vers analytics.
Couvre :
- StepMetrics.to_dict / from_dict avec les nouveaux champs
- AnalyticsExecutionIntegration.on_step_result passe bien les champs
- Persistance SQLite (schema + migration) des colonnes C1
"""
from __future__ import annotations
import sqlite3
import tempfile
from datetime import datetime
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from core.analytics.collection.metrics_collector import StepMetrics
# -----------------------------------------------------------------------------
# StepMetrics : sérialisation des champs C1
# -----------------------------------------------------------------------------
def _make_step_metrics(**overrides) -> StepMetrics:
base = dict(
step_id="s1",
execution_id="exec1",
workflow_id="wf1",
node_id="n1",
action_type="click",
target_element="",
started_at=datetime(2026, 4, 13, 10, 0, 0),
completed_at=datetime(2026, 4, 13, 10, 0, 1),
duration_ms=1000.0,
status="completed",
confidence_score=0.9,
retry_count=0,
error_details=None,
)
base.update(overrides)
return StepMetrics(**base)
class TestStepMetricsVisionFields:
def test_default_vision_fields(self):
m = _make_step_metrics()
assert m.ocr_ms == 0.0
assert m.ui_ms == 0.0
assert m.analyze_ms == 0.0
assert m.total_ms == 0.0
assert m.cache_hit is False
assert m.degraded is False
def test_to_dict_includes_vision_fields(self):
m = _make_step_metrics(
ocr_ms=120.5,
ui_ms=45.0,
analyze_ms=200.0,
total_ms=1050.0,
cache_hit=True,
degraded=True,
)
d = m.to_dict()
assert d["ocr_ms"] == 120.5
assert d["ui_ms"] == 45.0
assert d["analyze_ms"] == 200.0
assert d["total_ms"] == 1050.0
assert d["cache_hit"] is True
assert d["degraded"] is True
def test_from_dict_roundtrip(self):
original = _make_step_metrics(
ocr_ms=10.0, ui_ms=20.0, analyze_ms=30.0,
total_ms=100.0, cache_hit=True, degraded=False,
)
restored = StepMetrics.from_dict(original.to_dict())
assert restored.ocr_ms == 10.0
assert restored.ui_ms == 20.0
assert restored.analyze_ms == 30.0
assert restored.total_ms == 100.0
assert restored.cache_hit is True
assert restored.degraded is False
def test_from_dict_missing_vision_fields_defaults_to_zero(self):
"""Rétrocompatibilité : un dict sans champs C1 doit produire 0/False."""
restored = StepMetrics.from_dict({
'step_id': 's1',
'execution_id': 'e1',
'workflow_id': 'w1',
'node_id': 'n1',
'action_type': 'click',
'target_element': '',
'started_at': datetime.now().isoformat(),
'completed_at': datetime.now().isoformat(),
'duration_ms': 100.0,
'status': 'completed',
'confidence_score': 0.5,
})
assert restored.ocr_ms == 0.0
assert restored.cache_hit is False
assert restored.degraded is False
# -----------------------------------------------------------------------------
# AnalyticsExecutionIntegration.on_step_result
# -----------------------------------------------------------------------------
class _FakeStepResult:
"""Stand-in minimal pour core.execution.execution_loop.StepResult."""
def __init__(self, **kw):
self.success = kw.get("success", True)
self.node_id = kw.get("node_id", "n1")
self.edge_id = kw.get("edge_id", None)
self.action_result = kw.get("action_result", None)
self.match_confidence = kw.get("match_confidence", 0.9)
self.duration_ms = kw.get("duration_ms", 100.0)
self.message = kw.get("message", "")
self.ocr_ms = kw.get("ocr_ms", 0.0)
self.ui_ms = kw.get("ui_ms", 0.0)
self.analyze_ms = kw.get("analyze_ms", 0.0)
self.total_ms = kw.get("total_ms", 0.0)
self.cache_hit = kw.get("cache_hit", False)
self.degraded = kw.get("degraded", False)
class TestAnalyticsOnStepResult:
def test_on_step_result_passes_vision_fields(self):
from core.analytics.integration.execution_integration import (
AnalyticsExecutionIntegration,
)
# Analytics system mocké
fake_system = MagicMock()
integration = AnalyticsExecutionIntegration(fake_system)
step = _FakeStepResult(
node_id="node_click",
success=True,
match_confidence=0.87,
duration_ms=1234.0,
ocr_ms=111.0,
ui_ms=222.0,
analyze_ms=333.0,
total_ms=1234.0,
cache_hit=True,
degraded=False,
)
integration.on_step_result(
execution_id="exec1",
workflow_id="wf1",
step_result=step,
)
# Vérifie qu'un StepMetrics avec les bons champs a été enregistré
record_calls = fake_system.metrics_collector.record_step.call_args_list
assert len(record_calls) == 1
recorded: StepMetrics = record_calls[0].args[0]
assert isinstance(recorded, StepMetrics)
assert recorded.node_id == "node_click"
assert recorded.workflow_id == "wf1"
assert recorded.execution_id == "exec1"
assert recorded.confidence_score == 0.87
assert recorded.duration_ms == 1234.0
assert recorded.ocr_ms == 111.0
assert recorded.ui_ms == 222.0
assert recorded.analyze_ms == 333.0
assert recorded.total_ms == 1234.0
assert recorded.cache_hit is True
assert recorded.degraded is False
assert recorded.status == "completed"
def test_on_step_result_failed_step(self):
from core.analytics.integration.execution_integration import (
AnalyticsExecutionIntegration,
)
fake_system = MagicMock()
integration = AnalyticsExecutionIntegration(fake_system)
step = _FakeStepResult(
success=False,
message="Click failed",
degraded=True,
)
integration.on_step_result("e1", "w1", step)
recorded: StepMetrics = fake_system.metrics_collector.record_step.call_args.args[0]
assert recorded.status == "failed"
assert recorded.error_details == "Click failed"
assert recorded.degraded is True
def test_on_step_result_disabled_integration_is_noop(self):
from core.analytics.integration.execution_integration import (
AnalyticsExecutionIntegration,
)
integration = AnalyticsExecutionIntegration(None) # désactivé
assert integration.enabled is False
step = _FakeStepResult()
# Ne doit rien faire ni lever d'exception
integration.on_step_result("e1", "w1", step)
# -----------------------------------------------------------------------------
# AnalyticsExecutionIntegration.on_execution_complete (Lot A — avril 2026)
# -----------------------------------------------------------------------------
class TestAnalyticsOnExecutionComplete:
"""Contrat normalisé : duration_ms (ms) + status (str), pas de magie."""
def _make_integration(self):
from core.analytics.integration.execution_integration import (
AnalyticsExecutionIntegration,
)
fake_system = MagicMock()
# Pas d'execution active : l'intégration doit emprunter le fallback
# "ExecutionMetrics synthétique pushé dans _buffer".
fake_system.metrics_collector._active_executions = {}
fake_system.metrics_collector._lock = MagicMock()
fake_system.metrics_collector._lock.__enter__ = MagicMock(
return_value=None
)
fake_system.metrics_collector._lock.__exit__ = MagicMock(
return_value=None
)
fake_system.metrics_collector._buffer = []
return AnalyticsExecutionIntegration(fake_system), fake_system
def test_fallback_builds_execution_metrics_with_correct_fields(self):
"""Sans record_execution_start préalable, on construit un
ExecutionMetrics synthétique avec les bons noms de champs."""
from core.analytics.collection.metrics_collector import ExecutionMetrics
integration, fake_system = self._make_integration()
integration.on_execution_complete(
execution_id="exec1",
workflow_id="wf1",
duration_ms=1500.0,
status="completed",
steps_total=3,
steps_completed=3,
steps_failed=0,
)
# Un ExecutionMetrics a été pushé dans le buffer
buffer = fake_system.metrics_collector._buffer
assert len(buffer) == 1
metric: ExecutionMetrics = buffer[0]
assert isinstance(metric, ExecutionMetrics)
assert metric.execution_id == "exec1"
assert metric.workflow_id == "wf1"
assert metric.duration_ms == 1500.0
assert metric.status == "completed"
assert metric.steps_total == 3
assert metric.steps_completed == 3
assert metric.steps_failed == 0
# started_at / completed_at sont cohérents
delta_ms = (
metric.completed_at - metric.started_at
).total_seconds() * 1000
assert abs(delta_ms - 1500.0) < 1.0
def test_uses_record_execution_complete_if_active(self):
"""Si l'execution a été ouverte via on_execution_start, on délègue
à record_execution_complete (chemin nominal)."""
integration, fake_system = self._make_integration()
# Simuler une execution active
fake_system.metrics_collector._active_executions = {"exec1": object()}
integration.on_execution_complete(
execution_id="exec1",
workflow_id="wf1",
duration_ms=800.0,
status="failed",
steps_total=2,
steps_completed=1,
steps_failed=1,
error_message="timeout",
)
call = fake_system.metrics_collector.record_execution_complete.call_args
assert call is not None
kwargs = call.kwargs
assert kwargs["execution_id"] == "exec1"
assert kwargs["status"] == "failed"
assert kwargs["steps_total"] == 2
assert kwargs["steps_completed"] == 1
assert kwargs["steps_failed"] == 1
assert kwargs["error_message"] == "timeout"
def test_steps_total_derived_when_not_provided(self):
"""steps_total déduit par somme si absent, pas d'erreur silencieuse."""
integration, fake_system = self._make_integration()
integration.on_execution_complete(
execution_id="exec1",
workflow_id="wf1",
duration_ms=500.0,
status="completed",
steps_completed=2,
steps_failed=1,
)
metric = fake_system.metrics_collector._buffer[0]
assert metric.steps_total == 3 # 2 + 1
def test_disabled_integration_is_noop(self):
from core.analytics.integration.execution_integration import (
AnalyticsExecutionIntegration,
)
integration = AnalyticsExecutionIntegration(None)
assert integration.enabled is False
# Ne doit rien faire ni lever d'exception
integration.on_execution_complete(
execution_id="exec1",
workflow_id="wf1",
duration_ms=100.0,
status="completed",
)
def test_realtime_complete_called(self):
"""Le tracking temps réel est clos avec le bon status."""
integration, fake_system = self._make_integration()
integration.on_execution_complete(
execution_id="exec1",
workflow_id="wf1",
duration_ms=100.0,
status="stopped",
)
fake_system.realtime_analytics.complete_execution.assert_called_once_with(
execution_id="exec1",
status="stopped",
)
# -----------------------------------------------------------------------------
# AnalyticsExecutionIntegration.on_recovery_attempt (Lot A — avril 2026)
# -----------------------------------------------------------------------------
class TestAnalyticsOnRecoveryAttempt:
"""Contrat normalisé : StepMetrics construit avec les vrais champs."""
def test_success_recovery_builds_valid_step_metrics(self):
from core.analytics.collection.metrics_collector import StepMetrics
from core.analytics.integration.execution_integration import (
AnalyticsExecutionIntegration,
)
fake_system = MagicMock()
integration = AnalyticsExecutionIntegration(fake_system)
integration.on_recovery_attempt(
execution_id="exec1",
workflow_id="wf1",
node_id="node_click",
strategy="retry_with_delay",
success=True,
duration_ms=250.0,
)
call = fake_system.metrics_collector.record_step.call_args
assert call is not None
recorded: StepMetrics = call.args[0]
assert isinstance(recorded, StepMetrics)
assert recorded.execution_id == "exec1"
assert recorded.workflow_id == "wf1"
assert recorded.node_id == "node_click_recovery"
assert recorded.action_type == "recovery_retry_with_delay"
assert recorded.duration_ms == 250.0
assert recorded.status == "completed"
assert recorded.error_details is None
# Champs obligatoires du dataclass
assert recorded.step_id # non vide
assert recorded.target_element == ""
assert recorded.confidence_score == 0.0
def test_failed_recovery_sets_status_and_error_details(self):
from core.analytics.collection.metrics_collector import StepMetrics
from core.analytics.integration.execution_integration import (
AnalyticsExecutionIntegration,
)
fake_system = MagicMock()
integration = AnalyticsExecutionIntegration(fake_system)
integration.on_recovery_attempt(
execution_id="e1",
workflow_id="w1",
node_id="n1",
strategy="fallback_to_parent",
success=False,
duration_ms=80.0,
)
recorded: StepMetrics = (
fake_system.metrics_collector.record_step.call_args.args[0]
)
assert recorded.status == "failed"
assert recorded.error_details == "Recovery failed: fallback_to_parent"
assert recorded.duration_ms == 80.0
def test_disabled_integration_is_noop(self):
from core.analytics.integration.execution_integration import (
AnalyticsExecutionIntegration,
)
integration = AnalyticsExecutionIntegration(None)
integration.on_recovery_attempt(
execution_id="e1",
workflow_id="w1",
node_id="n1",
strategy="x",
success=True,
duration_ms=10.0,
)
# -----------------------------------------------------------------------------
# Persistance SQLite : schema + migration
# -----------------------------------------------------------------------------
class TestTimeSeriesStoreSchema:
def test_new_store_has_vision_columns(self, tmp_path):
from core.analytics.storage.timeseries_store import TimeSeriesStore
store = TimeSeriesStore(tmp_path)
with sqlite3.connect(str(store.db_path)) as conn:
cols = {row[1] for row in conn.execute(
"PRAGMA table_info(step_metrics)"
)}
# Colonnes legacy
assert "duration_ms" in cols
assert "confidence_score" in cols
# Colonnes C1
assert "ocr_ms" in cols
assert "ui_ms" in cols
assert "analyze_ms" in cols
assert "total_ms" in cols
assert "cache_hit" in cols
assert "degraded" in cols
def test_migration_adds_missing_columns(self, tmp_path):
"""Base pré-existante sans les colonnes C1 — la migration doit les ajouter."""
from core.analytics.storage.timeseries_store import TimeSeriesStore
# Créer une base "legacy" manuellement, sans les nouvelles colonnes
storage_dir = tmp_path / "legacy"
storage_dir.mkdir()
legacy_db = storage_dir / "timeseries.db"
with sqlite3.connect(str(legacy_db)) as conn:
conn.executescript("""
CREATE TABLE step_metrics (
step_id TEXT PRIMARY KEY,
execution_id TEXT NOT NULL,
workflow_id TEXT NOT NULL,
node_id TEXT NOT NULL,
action_type TEXT NOT NULL,
target_element TEXT,
started_at TIMESTAMP NOT NULL,
completed_at TIMESTAMP NOT NULL,
duration_ms REAL NOT NULL,
status TEXT NOT NULL,
confidence_score REAL,
retry_count INTEGER DEFAULT 0,
error_details TEXT
);
""")
conn.commit()
# Instancier TimeSeriesStore → doit migrer
_ = TimeSeriesStore(storage_dir)
with sqlite3.connect(str(legacy_db)) as conn:
cols = {row[1] for row in conn.execute(
"PRAGMA table_info(step_metrics)"
)}
assert "ocr_ms" in cols
assert "cache_hit" in cols
assert "degraded" in cols
def test_write_and_read_vision_metrics(self, tmp_path):
from core.analytics.storage.timeseries_store import TimeSeriesStore
store = TimeSeriesStore(tmp_path)
metric = _make_step_metrics(
ocr_ms=50.0, ui_ms=60.0, analyze_ms=110.0,
total_ms=500.0, cache_hit=True, degraded=True,
)
store.write_metrics([metric])
with sqlite3.connect(str(store.db_path)) as conn:
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT * FROM step_metrics WHERE step_id = ?", (metric.step_id,)
).fetchone()
assert row is not None
assert row["ocr_ms"] == 50.0
assert row["ui_ms"] == 60.0
assert row["analyze_ms"] == 110.0
assert row["total_ms"] == 500.0
# SQLite stocke les bool comme INTEGER
assert row["cache_hit"] == 1
assert row["degraded"] == 1

View File

@@ -0,0 +1,264 @@
"""
Tests de la sélection robuste d'edge dans WorkflowPipeline.get_next_action (C3).
Vérifie que la nouvelle API utilise EdgeScorer et expose le contrat dict
normalisé (Lot A — avril 2026) :
- status="selected" → edge choisi
- status="terminal" → aucun outgoing_edge (fin légitime)
- status="blocked" → candidats rejetés (NE DOIT PAS être traité comme fin)
"""
from __future__ import annotations
from datetime import datetime
from unittest.mock import MagicMock, patch
import pytest
from core.models.screen_state import (
ContextLevel,
EmbeddingRef,
PerceptionLevel,
RawLevel,
ScreenState,
WindowContext,
)
from core.models.workflow_graph import (
Action,
EdgeConstraints,
EdgeStats,
PostConditions,
TargetSpec,
Workflow,
WorkflowEdge,
WorkflowNode,
)
def _edge(
edge_id: str,
required_window_title: str = "",
success_rate: float = 0.5,
execution_count: int = 10,
min_source_similarity: float = 0.80,
) -> WorkflowEdge:
stats = EdgeStats()
if execution_count > 0:
stats.execution_count = execution_count
stats.success_count = int(round(success_rate * execution_count))
stats.failure_count = execution_count - stats.success_count
return WorkflowEdge(
edge_id=edge_id,
from_node="n1",
to_node="n2",
action=Action(type="mouse_click", target=TargetSpec()),
constraints=EdgeConstraints(
required_window_title=required_window_title,
min_source_similarity=min_source_similarity,
),
post_conditions=PostConditions(),
stats=stats,
)
def _state(window_title: str = "AppA") -> ScreenState:
return ScreenState(
screen_state_id="s",
timestamp=datetime.now(),
session_id="sess",
window=WindowContext(
app_name="app", window_title=window_title, screen_resolution=[1920, 1080]
),
raw=RawLevel(screenshot_path="", capture_method="t", file_size_bytes=0),
perception=PerceptionLevel(
embedding=EmbeddingRef(provider="t", vector_id="v", dimensions=512),
detected_text=[],
text_detection_method="none",
confidence_avg=0.0,
),
context=ContextLevel(),
ui_elements=[],
)
@pytest.fixture
def pipeline_with_workflow(tmp_path):
"""Pipeline minimal avec un workflow en mémoire (Workflow mocké).
On évite la construction d'un vrai Workflow (ScreenTemplate trop lourd)
en utilisant un MagicMock configuré pour les méthodes utilisées par
`get_next_action` : `get_outgoing_edges`.
"""
from core.pipeline.workflow_pipeline import WorkflowPipeline
# Stub pour éviter les lourds imports (mocks sur composants GPU)
with patch.multiple(
"core.pipeline.workflow_pipeline",
UIDetector=MagicMock(),
CLIPEmbedder=MagicMock(),
StateEmbeddingBuilder=MagicMock(),
FusionEngine=MagicMock(),
FAISSManager=MagicMock(),
GraphBuilder=MagicMock(),
NodeMatcher=MagicMock(),
HierarchicalMatcher=MagicMock(),
LearningManager=MagicMock(),
ActionExecutor=MagicMock(),
TargetResolver=MagicMock(),
ErrorHandler=MagicMock(),
):
pipeline = WorkflowPipeline(data_dir=str(tmp_path), use_gpu=False)
workflow = MagicMock(spec=Workflow)
workflow.workflow_id = "wf1"
workflow.edges = []
workflow.get_outgoing_edges = lambda node_id: [
e for e in workflow.edges if e.from_node == node_id
]
pipeline._workflows["wf1"] = workflow
return pipeline, workflow
class TestGetNextActionC3:
def test_picks_highest_success_rate(self, pipeline_with_workflow):
pipeline, wf = pipeline_with_workflow
wf.edges = [
_edge("low", success_rate=0.1, execution_count=20),
_edge("high", success_rate=0.9, execution_count=20),
]
result = pipeline.get_next_action("wf1", "n1", screen_state=_state())
assert result["status"] == "selected"
assert result["edge_id"] == "high"
def test_filters_out_invalid_preconditions(self, pipeline_with_workflow):
pipeline, wf = pipeline_with_workflow
wf.edges = [
_edge("bad", required_window_title="NopeApp", success_rate=0.99, execution_count=20),
_edge("ok", success_rate=0.50, execution_count=20),
]
result = pipeline.get_next_action(
"wf1", "n1", screen_state=_state(window_title="AppA")
)
assert result["status"] == "selected"
assert result["edge_id"] == "ok"
def test_blocked_when_no_valid_edge(self, pipeline_with_workflow):
"""Des candidats existent mais aucun ne passe les contraintes.
Lot A — cas critique : on NE DOIT PAS retourner "terminal" ici. Un
blocage doit remonter explicitement pour déclencher pause supervisée.
"""
pipeline, wf = pipeline_with_workflow
wf.edges = [
_edge("e1", required_window_title="AppB"),
_edge("e2", required_window_title="AppC"),
]
result = pipeline.get_next_action(
"wf1", "n1", screen_state=_state(window_title="AppA")
)
assert result["status"] == "blocked"
assert result["reason"] == "no_valid_edge"
def test_strategy_first_keeps_legacy_behavior(self, pipeline_with_workflow):
pipeline, wf = pipeline_with_workflow
wf.edges = [
_edge("e1", success_rate=0.1, execution_count=20),
_edge("e2", success_rate=0.9, execution_count=20),
]
result = pipeline.get_next_action(
"wf1", "n1", screen_state=_state(), strategy="first"
)
# Mode legacy : premier edge sans tri
assert result["status"] == "selected"
assert result["edge_id"] == "e1"
def test_no_screen_state_still_works(self, pipeline_with_workflow):
"""Sans ScreenState, le scorer ne peut pas filtrer mais peut ranker."""
pipeline, wf = pipeline_with_workflow
wf.edges = [
_edge("e1", success_rate=0.1, execution_count=20),
_edge("e2", success_rate=0.9, execution_count=20),
]
result = pipeline.get_next_action("wf1", "n1", screen_state=None)
assert result["status"] == "selected"
# Le ranking par success_rate fonctionne toujours
assert result["edge_id"] == "e2"
def test_no_outgoing_edges_is_terminal(self, pipeline_with_workflow):
"""Aucun outgoing_edge = fin légitime du workflow (status="terminal")."""
pipeline, wf = pipeline_with_workflow
wf.edges = []
result = pipeline.get_next_action("wf1", "n1", screen_state=_state())
assert result["status"] == "terminal"
def test_blocked_distinct_from_terminal(self, pipeline_with_workflow):
"""Régression Lot A : blocked != terminal.
Le bug historique confondait ces deux cas. Un workflow bloqué
apparaissait comme "terminé avec succès" côté ExecutionLoop.
"""
pipeline, wf = pipeline_with_workflow
# Cas terminal : pas d'outgoing
wf.edges = []
terminal = pipeline.get_next_action("wf1", "n1", screen_state=_state())
# Cas bloqué : outgoing présent mais rejetés
wf.edges = [_edge("bad", required_window_title="NopeApp")]
blocked = pipeline.get_next_action("wf1", "n1", screen_state=_state(window_title="AppA"))
assert terminal["status"] == "terminal"
assert blocked["status"] == "blocked"
# L'appelant doit pouvoir les distinguer sans ambiguïté
assert terminal["status"] != blocked["status"]
def test_workflow_not_found_is_blocked(self, pipeline_with_workflow):
"""Workflow inexistant = blocked avec reason explicite (pas silencieux)."""
pipeline, _wf = pipeline_with_workflow
result = pipeline.get_next_action(
"wf_inexistant", "n1", screen_state=_state()
)
assert result["status"] == "blocked"
assert result["reason"] == "workflow_not_found"
class TestGetNextActionSourceSimilarity:
"""Lot B — propagation de source_similarity jusqu'à EdgeScorer."""
def test_high_similarity_passes_min_source_similarity(
self, pipeline_with_workflow
):
"""source_similarity élevée → edge accepté."""
pipeline, wf = pipeline_with_workflow
wf.edges = [_edge("e1", min_source_similarity=0.80)]
result = pipeline.get_next_action(
"wf1", "n1", screen_state=_state(), source_similarity=0.95
)
assert result["status"] == "selected"
assert result["edge_id"] == "e1"
def test_low_similarity_blocks_edge(self, pipeline_with_workflow):
"""source_similarity < min_source_similarity → edge rejeté → blocked.
C'est la preuve que la précondition min_source_similarity est
redevenue effective (Lot B). Avant ce lot, l'EdgeScorer recevait
toujours 1.0 hardcodé et ne rejetait jamais l'edge pour ce motif.
"""
pipeline, wf = pipeline_with_workflow
wf.edges = [_edge("e1", min_source_similarity=0.80)]
result = pipeline.get_next_action(
"wf1", "n1", screen_state=_state(), source_similarity=0.40
)
assert result["status"] == "blocked"
assert result["reason"] == "no_valid_edge"
def test_default_source_similarity_is_one(self, pipeline_with_workflow):
"""Sans source_similarity fourni → défaut 1.0 → pas de rejet pour
ce motif (compat avec les call sites qui ne l'ont pas encore)."""
pipeline, wf = pipeline_with_workflow
# min_source_similarity très strict, mais défaut appelant = 1.0
wf.edges = [_edge("e1", min_source_similarity=0.99)]
result = pipeline.get_next_action("wf1", "n1", screen_state=_state())
assert result["status"] == "selected"

View File

@@ -227,12 +227,10 @@ class VisualWorkflowExecutor:
self.analytics_integration.on_execution_complete( self.analytics_integration.on_execution_complete(
execution_id=execution_id, execution_id=execution_id,
workflow_id=workflow_graph.workflow_id, workflow_id=workflow_graph.workflow_id,
started_at=result.start_time, duration_ms=float(result._calculate_duration() or 0.0),
completed_at=result.end_time, status='completed',
duration=result._calculate_duration() / 1000.0, # en secondes
status='success',
steps_completed=len(workflow_graph.nodes), steps_completed=len(workflow_graph.nodes),
steps_failed=0 steps_failed=0,
) )
# Collecter les métriques Analytics pour l'UI # Collecter les métriques Analytics pour l'UI
@@ -265,13 +263,11 @@ class VisualWorkflowExecutor:
self.analytics_integration.on_execution_complete( self.analytics_integration.on_execution_complete(
execution_id=execution_id, execution_id=execution_id,
workflow_id=visual_workflow.workflow_id, workflow_id=visual_workflow.workflow_id,
started_at=result.start_time, duration_ms=float(result._calculate_duration() or 0.0),
completed_at=result.end_time,
duration=result._calculate_duration() / 1000.0 if result._calculate_duration() else 0,
status='failed', status='failed',
error_message=str(e), error_message=str(e),
steps_completed=0, steps_completed=0,
steps_failed=1 steps_failed=1,
) )
# Enregistrer l'échec dans le système d'apprentissage # Enregistrer l'échec dans le système d'apprentissage
@@ -312,7 +308,8 @@ class VisualWorkflowExecutor:
if result.success: if result.success:
self._log(execution_id, 'info', f'Workflow exécuté avec succès') self._log(execution_id, 'info', f'Workflow exécuté avec succès')
# Notifier Analytics pour chaque étape # Notifier Analytics pour chaque étape (contrat normalisé Lot A :
# duration_ms en millisecondes, plus de "duration" en secondes)
for i, step_result in enumerate(result.step_results): for i, step_result in enumerate(result.step_results):
if self.analytics_integration: if self.analytics_integration:
self.analytics_integration.on_step_complete( self.analytics_integration.on_step_complete(
@@ -322,8 +319,8 @@ class VisualWorkflowExecutor:
action_type=step_result.action_type, action_type=step_result.action_type,
started_at=step_result.start_time, started_at=step_result.start_time,
completed_at=step_result.end_time, completed_at=step_result.end_time,
duration=step_result.duration_seconds, duration_ms=float(step_result.duration_seconds or 0.0) * 1000.0,
success=step_result.success success=step_result.success,
) )
# Notifier la progression # Notifier la progression
@@ -383,8 +380,8 @@ class VisualWorkflowExecutor:
action_type=getattr(node, 'action_type', 'unknown'), action_type=getattr(node, 'action_type', 'unknown'),
started_at=step_start_time, started_at=step_start_time,
completed_at=step_end_time, completed_at=step_end_time,
duration=step_duration, duration_ms=float(step_duration or 0.0) * 1000.0,
success=True success=True,
) )
progress = (i + 1) / total_nodes * 100 progress = (i + 1) / total_nodes * 100