feat(analytics): normalise API + contrat explicite get_next_action (Lot A)

Contrat get_next_action() — suppression du None ambigu :
  {"status": "selected", "edge": ..., ...}
  {"status": "terminal"}
  {"status": "blocked", "reason": "no_valid_edge" | ...}

ExecutionLoop dispatche proprement : blocked -> PAUSED + _pause_requested,
terminal -> succès légitime. Rétrocompat défensive (None legacy -> blocked).

Analytics API normalisée (kwargs-only) :
  on_execution_complete(duration_ms, status, steps_total|completed|failed)
  on_step_complete(duration_ms, ...)
  on_recovery_attempt(duration_ms, ...)

Découverte critique : les anciens appels utilisaient des méthodes et champs
inexistants (ExecutionMetrics.duration, metrics_collector.record_execution).
Le code n'avait jamais tourné au runtime — zéro analytics remontée.
L'exception était avalée par le try/except englobant.

58 tests (18 analytics + 11 contrat + 20 ExecutionLoop + 12 edge_scorer
non-régression). Migration complète, pas de pont legacy.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dom
2026-04-15 09:06:19 +02:00
parent 42f571d496
commit af4ffa189a
9 changed files with 1573 additions and 233 deletions

View File

@@ -76,7 +76,16 @@ class StepMetrics:
confidence_score: float
retry_count: int = 0
error_details: Optional[str] = None
# C1 — Instrumentation vision-aware (ExecutionLoop)
# Ces champs proviennent de `StepResult` (core/execution/execution_loop.py).
# Tous optionnels avec valeurs par défaut pour rétrocompatibilité.
ocr_ms: float = 0.0 # Temps OCR sur ce step
ui_ms: float = 0.0 # Temps détection UI sur ce step
analyze_ms: float = 0.0 # Temps analyse ScreenState (OCR + UI + reste)
total_ms: float = 0.0 # Temps total du step (alias duration_ms)
cache_hit: bool = False # True si ScreenState vient du cache perceptuel
degraded: bool = False # True si mode dégradé (timeout analyse)
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for storage."""
return {
@@ -92,9 +101,15 @@ class StepMetrics:
'status': self.status,
'confidence_score': self.confidence_score,
'retry_count': self.retry_count,
'error_details': self.error_details
'error_details': self.error_details,
'ocr_ms': self.ocr_ms,
'ui_ms': self.ui_ms,
'analyze_ms': self.analyze_ms,
'total_ms': self.total_ms,
'cache_hit': self.cache_hit,
'degraded': self.degraded,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'StepMetrics':
"""Create from dictionary."""
@@ -111,7 +126,13 @@ class StepMetrics:
status=data['status'],
confidence_score=data['confidence_score'],
retry_count=data.get('retry_count', 0),
error_details=data.get('error_details')
error_details=data.get('error_details'),
ocr_ms=float(data.get('ocr_ms') or 0.0),
ui_ms=float(data.get('ui_ms') or 0.0),
analyze_ms=float(data.get('analyze_ms') or 0.0),
total_ms=float(data.get('total_ms') or 0.0),
cache_hit=bool(data.get('cache_hit') or False),
degraded=bool(data.get('degraded') or False),
)

View File

@@ -1,8 +1,8 @@
"""Integration of analytics with ExecutionLoop."""
import logging
from typing import Optional
from datetime import datetime
from typing import Any, Optional
from datetime import datetime, timedelta
import uuid
from ..analytics_system import get_analytics_system
@@ -14,17 +14,35 @@ logger = logging.getLogger(__name__)
class AnalyticsExecutionIntegration:
"""Integrate analytics collection with workflow execution."""
def __init__(self, enabled: bool = True):
def __init__(self, analytics_system: Any = True, enabled: Optional[bool] = None):
"""
Initialize analytics integration.
Accepte deux formes d'appel pour la rétrocompatibilité :
- ``AnalyticsExecutionIntegration(enabled=True)`` → auto-load du système
- ``AnalyticsExecutionIntegration(analytics_system_instance)`` →
utilise l'instance fournie (utilisé par ExecutionLoop)
Args:
enabled: Whether analytics collection is enabled
analytics_system: Instance d'AnalyticsSystem pré-construite, ou
True/False pour activer/désactiver (legacy).
enabled: Legacy — si défini, prime sur analytics_system.
"""
self.enabled = enabled
self.analytics = None
if enabled:
# Détection de la forme d'appel
if enabled is not None:
# Appel legacy explicite: AnalyticsExecutionIntegration(enabled=...)
self.enabled = bool(enabled)
self.analytics = None
elif isinstance(analytics_system, bool):
# Appel legacy: AnalyticsExecutionIntegration(True/False)
self.enabled = analytics_system
self.analytics = None
else:
# Nouvelle forme: instance injectée
self.enabled = analytics_system is not None
self.analytics = analytics_system
if self.enabled and self.analytics is None:
try:
self.analytics = get_analytics_system()
logger.info("Analytics integration enabled")
@@ -36,37 +54,50 @@ class AnalyticsExecutionIntegration:
self,
workflow_id: str,
execution_id: Optional[str] = None,
total_steps: int = 0
total_steps: int = 0,
mode: Optional[str] = None,
) -> str:
"""
Called when workflow execution starts.
Appelé au démarrage d'une exécution de workflow.
Args:
workflow_id: Workflow identifier
execution_id: Execution identifier (generated if None)
total_steps: Total number of steps
workflow_id: Identifiant du workflow
execution_id: Identifiant d'exécution (généré si None)
total_steps: Nombre total d'étapes prévues
mode: Mode d'exécution (OBSERVATION / COACHING / SUPERVISED /
AUTOMATIC). Propagé en contexte pour MetricsCollector.
Returns:
Execution ID
Identifiant d'exécution (celui fourni ou nouvellement généré).
"""
if not self.enabled or not self.analytics:
return execution_id or str(uuid.uuid4())
if execution_id is None:
execution_id = str(uuid.uuid4())
try:
# Start real-time tracking
# Démarrage du tracking temps réel
self.analytics.realtime_analytics.track_execution(
execution_id=execution_id,
workflow_id=workflow_id,
total_steps=total_steps
total_steps=total_steps,
)
# Ouverture de l'ExecutionMetrics côté collector (état "running").
# Cela permet à `on_execution_complete` d'appeler
# `record_execution_complete` qui clôture proprement.
context = {"mode": mode} if mode else {}
self.analytics.metrics_collector.record_execution_start(
execution_id=execution_id,
workflow_id=workflow_id,
context=context,
)
logger.debug(f"Started tracking execution: {execution_id}")
except Exception as e:
logger.error(f"Error starting execution tracking: {e}")
return execution_id
def on_step_start(
@@ -101,110 +132,249 @@ class AnalyticsExecutionIntegration:
execution_id: str,
workflow_id: str,
node_id: str,
action_type: str,
started_at: datetime,
completed_at: datetime,
duration: float,
*,
duration_ms: float,
success: bool,
error_message: Optional[str] = None
action_type: str = "",
started_at: Optional[datetime] = None,
completed_at: Optional[datetime] = None,
error_message: Optional[str] = None,
confidence: float = 0.0,
target_element: str = "",
retry_count: int = 0,
ocr_ms: float = 0.0,
ui_ms: float = 0.0,
analyze_ms: float = 0.0,
total_ms: float = 0.0,
cache_hit: bool = False,
degraded: bool = False,
step_id: Optional[str] = None,
) -> None:
"""
Called when a step completes.
Appelé à la fin d'un step.
Contrat normalisé (Lot A — avril 2026) : ``duration_ms`` est
obligatoire et en millisecondes. Plus de rétrocompat silencieuse
sur ``duration`` en secondes.
Args:
execution_id: Execution identifier
workflow_id: Workflow identifier
node_id: Node identifier
action_type: Type of action
started_at: Start timestamp
completed_at: Completion timestamp
duration: Duration in seconds
success: Whether step succeeded
error_message: Error message if failed
execution_id: Identifiant d'exécution
workflow_id: Identifiant du workflow
node_id: Identifiant du node
duration_ms: Durée du step en millisecondes (obligatoire)
success: Vrai si le step a réussi
action_type: Type d'action (``click``, ``type``, …)
started_at: Timestamp de début (déduit de duration_ms si None)
completed_at: Timestamp de fin (``now()`` si None)
error_message: Message d'erreur si ``success=False``
confidence: Score de matching [0, 1]
target_element: Élément ciblé (optionnel)
retry_count: Nombre de retries
ocr_ms: Temps OCR (C1)
ui_ms: Temps détection UI (C1)
analyze_ms: Temps analyse ScreenState (C1)
total_ms: Temps total du step (C1, alias duration_ms)
cache_hit: ScreenState depuis cache perceptuel (C1)
degraded: Mode dégradé activé (C1)
step_id: ID unique du step (généré si None)
"""
if not self.enabled or not self.analytics:
return
try:
# Record step metrics
duration_ms_final = float(duration_ms)
# Normaliser les timestamps
if completed_at is None:
completed_at = datetime.now()
if started_at is None:
started_at = completed_at - timedelta(milliseconds=duration_ms_final)
step_metrics = StepMetrics(
step_id=step_id or f"{execution_id}:{node_id}:{completed_at.isoformat()}",
execution_id=execution_id,
workflow_id=workflow_id,
node_id=node_id,
action_type=action_type,
action_type=action_type or "unknown",
target_element=target_element,
started_at=started_at,
completed_at=completed_at,
duration=duration,
success=success,
error_message=error_message
duration_ms=duration_ms_final,
status="completed" if success else "failed",
confidence_score=float(confidence),
retry_count=retry_count,
error_details=error_message,
# C1 — vision-aware
ocr_ms=float(ocr_ms or 0.0),
ui_ms=float(ui_ms or 0.0),
analyze_ms=float(analyze_ms or 0.0),
total_ms=float(total_ms or duration_ms_final),
cache_hit=bool(cache_hit),
degraded=bool(degraded),
)
self.analytics.metrics_collector.record_step(step_metrics)
# Update real-time tracking
self.analytics.realtime_analytics.record_step_complete(
execution_id=execution_id,
success=success
# Tracking temps réel
try:
self.analytics.realtime_analytics.record_step_complete(
execution_id=execution_id,
success=success,
)
except Exception as rt_err:
logger.debug(f"Realtime tracking skipped: {rt_err}")
logger.debug(
f"Recorded step: {node_id} "
f"({'success' if success else 'failed'}, "
f"analyze_ms={analyze_ms:.0f}, cache_hit={cache_hit}, "
f"degraded={degraded})"
)
logger.debug(f"Recorded step: {node_id} ({'success' if success else 'failed'})")
except Exception as e:
logger.error(f"Error recording step completion: {e}")
def on_step_result(
self,
execution_id: str,
workflow_id: str,
step_result: Any,
) -> None:
"""
Raccourci C1 — enregistre un `StepResult` complet.
Évite aux appelants d'extraire manuellement les champs vision-aware.
Utilisé par ExecutionLoop pour pousser StepResult au système analytics.
Args:
execution_id: Identifiant d'exécution
workflow_id: Identifiant de workflow
step_result: Instance de `core.execution.execution_loop.StepResult`
"""
if not self.enabled or not self.analytics:
return
action_type = "unknown"
try:
if getattr(step_result, "action_result", None) is not None:
ar = step_result.action_result
# ExecutionResult.action est optionnel selon la branche
action_type = (
getattr(ar, "action_type", None)
or getattr(ar, "action", None)
or "unknown"
)
except Exception:
action_type = "unknown"
self.on_step_complete(
execution_id=execution_id,
workflow_id=workflow_id,
node_id=getattr(step_result, "node_id", "unknown"),
action_type=str(action_type),
success=bool(getattr(step_result, "success", False)),
error_message=None
if getattr(step_result, "success", False)
else getattr(step_result, "message", None),
duration_ms=float(getattr(step_result, "duration_ms", 0.0) or 0.0),
confidence=float(getattr(step_result, "match_confidence", 0.0) or 0.0),
ocr_ms=float(getattr(step_result, "ocr_ms", 0.0) or 0.0),
ui_ms=float(getattr(step_result, "ui_ms", 0.0) or 0.0),
analyze_ms=float(getattr(step_result, "analyze_ms", 0.0) or 0.0),
total_ms=float(getattr(step_result, "total_ms", 0.0) or 0.0),
cache_hit=bool(getattr(step_result, "cache_hit", False)),
degraded=bool(getattr(step_result, "degraded", False)),
)
def on_execution_complete(
self,
execution_id: str,
workflow_id: str,
started_at: datetime,
completed_at: datetime,
duration: float,
*,
duration_ms: float,
status: str,
error_message: Optional[str] = None,
steps_total: Optional[int] = None,
steps_completed: int = 0,
steps_failed: int = 0
steps_failed: int = 0,
error_message: Optional[str] = None,
) -> None:
"""
Called when workflow execution completes.
Appelé à la fin d'une exécution de workflow.
Contrat normalisé (Lot A — avril 2026) :
- ``duration_ms`` en millisecondes, toujours. Plus de rétrocompat
silencieuse sur ``duration`` en secondes.
- ``status`` est une chaîne libre (``"completed"``, ``"failed"``,
``"stopped"``, ``"timeout"``, …). L'appelant décide.
- ``steps_total`` / ``steps_completed`` / ``steps_failed`` : noms
alignés sur le dataclass ``ExecutionMetrics``. Si ``steps_total``
n'est pas fourni, on le déduit par somme.
Args:
execution_id: Execution identifier
workflow_id: Workflow identifier
started_at: Start timestamp
completed_at: Completion timestamp
duration: Duration in seconds
status: Final status (success, failed, timeout)
error_message: Error message if failed
steps_completed: Number of steps completed
steps_failed: Number of steps failed
execution_id: Identifiant d'exécution
workflow_id: Identifiant du workflow
duration_ms: Durée totale en millisecondes
status: Statut final (``"completed"`` / ``"failed"`` / ``"stopped"``)
steps_total: Nombre total de steps exécutés (tous statuts confondus)
steps_completed: Nombre de steps réussis
steps_failed: Nombre de steps en échec
error_message: Message d'erreur si ``status != "completed"``
"""
if not self.enabled or not self.analytics:
return
# steps_total dérivé si non fourni explicitement
if steps_total is None:
steps_total = int(steps_completed) + int(steps_failed)
try:
# Record execution metrics
execution_metrics = ExecutionMetrics(
execution_id=execution_id,
workflow_id=workflow_id,
started_at=started_at,
completed_at=completed_at,
duration=duration,
status=status,
error_message=error_message,
steps_completed=steps_completed,
steps_failed=steps_failed
)
self.analytics.metrics_collector.record_execution(execution_metrics)
# Flush to ensure persistence
self.analytics.metrics_collector.flush()
# Complete real-time tracking
collector = self.analytics.metrics_collector
# record_execution_complete clôture proprement un ExecutionMetrics
# ouvert par record_execution_start (chemin nominal via
# on_execution_start). Si l'état n'est pas présent (tests, legacy),
# on pousse un ExecutionMetrics synthétique directement.
completed_at = datetime.now()
started_at = completed_at - timedelta(milliseconds=float(duration_ms))
active = getattr(collector, "_active_executions", None)
if active is not None and execution_id in active:
collector.record_execution_complete(
execution_id=execution_id,
status=status,
steps_total=int(steps_total),
steps_completed=int(steps_completed),
steps_failed=int(steps_failed),
error_message=error_message,
)
else:
# Fallback explicite : on construit directement un ExecutionMetrics
# aligné sur le dataclass (duration_ms, status, steps_*).
execution_metrics = ExecutionMetrics(
execution_id=execution_id,
workflow_id=workflow_id,
started_at=started_at,
completed_at=completed_at,
duration_ms=float(duration_ms),
status=status,
steps_total=int(steps_total),
steps_completed=int(steps_completed),
steps_failed=int(steps_failed),
error_message=error_message,
)
# Le collector n'expose pas record_execution(...) : on pousse
# dans le buffer protégé par lock pour rester cohérent.
with collector._lock:
collector._buffer.append(execution_metrics)
# Flush pour garantir la persistance immédiate
collector.flush()
# Clôture du tracking temps réel
self.analytics.realtime_analytics.complete_execution(
execution_id=execution_id,
status=status
status=status,
)
logger.info(f"Recorded execution: {execution_id} ({status})")
except Exception as e:
logger.error(f"Error recording execution completion: {e}")
@@ -216,39 +386,54 @@ class AnalyticsExecutionIntegration:
node_id: str,
strategy: str,
success: bool,
duration: float
duration_ms: float,
) -> None:
"""
Called when self-healing attempts recovery.
Appelé quand le self-healing tente une récupération.
Contrat normalisé (Lot A — avril 2026) : ``duration_ms`` en
millisecondes, cohérent avec ``on_execution_complete`` et
``on_step_complete``. Le StepMetrics construit respecte strictement
le dataclass (``status``, ``duration_ms``, ``error_details``,
``confidence_score``, ``target_element``, ``step_id``).
Args:
execution_id: Execution identifier
workflow_id: Workflow identifier
node_id: Node identifier
strategy: Recovery strategy used
success: Whether recovery succeeded
duration: Recovery duration
execution_id: Identifiant d'exécution
workflow_id: Identifiant du workflow
node_id: Node où la récupération est tentée
strategy: Stratégie de récupération employée
success: Vrai si la récupération a réussi
duration_ms: Durée de la tentative en millisecondes
"""
if not self.enabled or not self.analytics:
return
try:
# Record as a special step metric
now = datetime.now()
started_at = now - timedelta(milliseconds=float(duration_ms))
recovery_metrics = StepMetrics(
step_id=f"{execution_id}:{node_id}:recovery:{now.isoformat()}",
execution_id=execution_id,
workflow_id=workflow_id,
node_id=f"{node_id}_recovery",
action_type=f"recovery_{strategy}",
started_at=datetime.now(),
completed_at=datetime.now(),
duration=duration,
success=success,
error_message=None if success else f"Recovery failed: {strategy}"
target_element="",
started_at=started_at,
completed_at=now,
duration_ms=float(duration_ms),
status="completed" if success else "failed",
confidence_score=0.0,
retry_count=0,
error_details=None if success else f"Recovery failed: {strategy}",
)
self.analytics.metrics_collector.record_step(recovery_metrics)
logger.debug(f"Recorded recovery: {strategy} ({'success' if success else 'failed'})")
logger.debug(
f"Recorded recovery: {strategy} "
f"({'success' if success else 'failed'})"
)
except Exception as e:
logger.error(f"Error recording recovery attempt: {e}")

View File

@@ -42,6 +42,8 @@ class TimeSeriesStore:
ON execution_metrics(started_at);
-- Step metrics table
-- Les colonnes ocr_ms, ui_ms, analyze_ms, total_ms, cache_hit, degraded
-- proviennent de l'instrumentation vision-aware (C1) de ExecutionLoop.
CREATE TABLE IF NOT EXISTS step_metrics (
step_id TEXT PRIMARY KEY,
execution_id TEXT NOT NULL,
@@ -56,6 +58,12 @@ class TimeSeriesStore:
confidence_score REAL,
retry_count INTEGER DEFAULT 0,
error_details TEXT,
ocr_ms REAL DEFAULT 0.0,
ui_ms REAL DEFAULT 0.0,
analyze_ms REAL DEFAULT 0.0,
total_ms REAL DEFAULT 0.0,
cache_hit INTEGER DEFAULT 0,
degraded INTEGER DEFAULT 0,
FOREIGN KEY (execution_id) REFERENCES execution_metrics(execution_id)
);
@@ -101,11 +109,40 @@ class TimeSeriesStore:
logger.info(f"TimeSeriesStore initialized at {self.db_path}")
# Colonnes ajoutées ultérieurement — appliquées via ALTER TABLE si absentes.
# (C1 — instrumentation vision-aware, avril 2026)
_STEP_METRICS_MIGRATIONS = [
("ocr_ms", "REAL DEFAULT 0.0"),
("ui_ms", "REAL DEFAULT 0.0"),
("analyze_ms", "REAL DEFAULT 0.0"),
("total_ms", "REAL DEFAULT 0.0"),
("cache_hit", "INTEGER DEFAULT 0"),
("degraded", "INTEGER DEFAULT 0"),
]
def _init_database(self) -> None:
"""Initialize database schema."""
"""Initialize database schema and apply lightweight migrations."""
with self._get_connection() as conn:
conn.executescript(self.SCHEMA)
self._migrate_step_metrics(conn)
conn.commit()
def _migrate_step_metrics(self, conn: sqlite3.Connection) -> None:
"""Ajoute les colonnes C1 sur une base `step_metrics` pré-existante."""
cursor = conn.execute("PRAGMA table_info(step_metrics)")
existing = {row[1] for row in cursor.fetchall()}
for column, ddl in self._STEP_METRICS_MIGRATIONS:
if column not in existing:
try:
conn.execute(
f"ALTER TABLE step_metrics ADD COLUMN {column} {ddl}"
)
logger.info(
f"Migration step_metrics: ajout colonne {column}"
)
except sqlite3.OperationalError as e:
# Collision bénigne (colonne déjà ajoutée par un autre process)
logger.debug(f"Migration colonne {column} ignorée: {e}")
@contextmanager
def _get_connection(self):
@@ -164,13 +201,14 @@ class TimeSeriesStore:
))
def _write_step_metric(self, conn: sqlite3.Connection, metric: StepMetrics) -> None:
"""Write step metric."""
"""Write step metric (inclut les champs vision-aware C1)."""
conn.execute("""
INSERT OR REPLACE INTO step_metrics
(step_id, execution_id, workflow_id, node_id, action_type, target_element,
started_at, completed_at, duration_ms, status, confidence_score,
retry_count, error_details)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
retry_count, error_details,
ocr_ms, ui_ms, analyze_ms, total_ms, cache_hit, degraded)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
metric.step_id,
metric.execution_id,
@@ -184,7 +222,13 @@ class TimeSeriesStore:
metric.status,
metric.confidence_score,
metric.retry_count,
metric.error_details
metric.error_details,
getattr(metric, 'ocr_ms', 0.0),
getattr(metric, 'ui_ms', 0.0),
getattr(metric, 'analyze_ms', 0.0),
getattr(metric, 'total_ms', 0.0),
1 if getattr(metric, 'cache_hit', False) else 0,
1 if getattr(metric, 'degraded', False) else 0,
))
def _write_resource_metric(self, conn: sqlite3.Connection, metric: ResourceMetrics) -> None: