feat(analytics): normalise API + contrat explicite get_next_action (Lot A)

Contrat get_next_action() — suppression du None ambigu :
  {"status": "selected", "edge": ..., ...}
  {"status": "terminal"}
  {"status": "blocked", "reason": "no_valid_edge" | ...}

ExecutionLoop dispatche proprement : blocked -> PAUSED + _pause_requested,
terminal -> succès légitime. Rétrocompat défensive (None legacy -> blocked).

Analytics API normalisée (kwargs-only) :
  on_execution_complete(duration_ms, status, steps_total|completed|failed)
  on_step_complete(duration_ms, ...)
  on_recovery_attempt(duration_ms, ...)

Découverte critique : les anciens appels utilisaient des méthodes et champs
inexistants (ExecutionMetrics.duration, metrics_collector.record_execution).
Le code n'avait jamais tourné au runtime — zéro analytics remontée.
L'exception était avalée par le try/except englobant.

58 tests (18 analytics + 11 contrat + 20 ExecutionLoop + 12 edge_scorer
non-régression). Migration complète, pas de pont legacy.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dom
2026-04-15 09:06:19 +02:00
parent 42f571d496
commit af4ffa189a
9 changed files with 1573 additions and 233 deletions

View File

@@ -96,14 +96,16 @@ class TestWorkflowPipelineEnhanced:
"confidence": 0.92
}
# Mock de l'action suivante
# Mock de l'action suivante (contrat dict normalisé Lot A)
mock_workflow_pipeline.get_next_action.return_value = {
"status": "selected",
"edge_id": "edge_1",
"action": {"type": "click", "target": "button"},
"target_node": "node_2",
"confidence": 0.95
"confidence": 0.95,
"score": 0.95,
}
# Mock du workflow
mock_workflow = Mock(spec=Workflow)
mock_edge = Mock(spec=WorkflowEdge)
@@ -112,7 +114,7 @@ class TestWorkflowPipelineEnhanced:
mock_edge.to_node = "node_2"
mock_workflow.edges = [mock_edge]
mock_workflow_pipeline.load_workflow.return_value = mock_workflow
# Mock du résultat d'exécution
mock_execution_result = Mock(spec=ExecutionResult)
mock_execution_result.status = ExecutionStatus.SUCCESS
@@ -121,24 +123,24 @@ class TestWorkflowPipelineEnhanced:
mock_execution_result.target_resolved = None
mock_execution_result.error = None
mock_workflow_pipeline.action_executor.execute_edge.return_value = mock_execution_result
# Créer l'instance enhanced
enhanced = WorkflowPipelineEnhanced()
# Lier les méthodes du pipeline mock
enhanced.match_current_state = mock_workflow_pipeline.match_current_state
enhanced.get_next_action = mock_workflow_pipeline.get_next_action
enhanced.load_workflow = mock_workflow_pipeline.load_workflow
enhanced.action_executor = mock_workflow_pipeline.action_executor
enhanced.error_handler = mock_workflow_pipeline.error_handler
# Act
result = enhanced.execute_workflow_step_enhanced(
workflow_id=workflow_id,
current_state=mock_screen_state,
context={"test_context": "value"}
)
# Assert
assert isinstance(result, WorkflowExecutionResult)
assert result.success is True
@@ -242,7 +244,8 @@ class TestWorkflowPipelineEnhanced:
}
# Mock de l'action suivante (pas d'action = workflow terminé)
mock_workflow_pipeline.get_next_action.return_value = None
# Contrat dict normalisé Lot A : status="terminal" pour fin légitime
mock_workflow_pipeline.get_next_action.return_value = {"status": "terminal"}
# Créer l'instance enhanced
enhanced = WorkflowPipelineEnhanced()
@@ -347,14 +350,16 @@ class TestWorkflowPipelineEnhanced:
"confidence": 0.92
}
# Mock de l'action suivante
# Mock de l'action suivante (contrat dict normalisé Lot A)
mock_workflow_pipeline.get_next_action.return_value = {
"status": "selected",
"edge_id": "edge_1",
"action": {"type": "click", "target": "button"},
"target_node": "node_2",
"confidence": 0.95
"confidence": 0.95,
"score": 0.95,
}
# Mock du workflow
mock_workflow = Mock(spec=Workflow)
mock_edge = Mock(spec=WorkflowEdge)
@@ -363,7 +368,7 @@ class TestWorkflowPipelineEnhanced:
mock_edge.to_node = "node_2"
mock_workflow.edges = [mock_edge]
mock_workflow_pipeline.load_workflow.return_value = mock_workflow
# Mock du résultat d'exécution
mock_execution_result = Mock(spec=ExecutionResult)
mock_execution_result.status = ExecutionStatus.SUCCESS
@@ -372,17 +377,17 @@ class TestWorkflowPipelineEnhanced:
mock_execution_result.target_resolved = None
mock_execution_result.error = None
mock_workflow_pipeline.action_executor.execute_edge.return_value = mock_execution_result
# Créer l'instance enhanced
enhanced = WorkflowPipelineEnhanced()
# Lier les méthodes du pipeline mock
enhanced.match_current_state = mock_workflow_pipeline.match_current_state
enhanced.get_next_action = mock_workflow_pipeline.get_next_action
enhanced.load_workflow = mock_workflow_pipeline.load_workflow
enhanced.action_executor = mock_workflow_pipeline.action_executor
enhanced.error_handler = mock_workflow_pipeline.error_handler
# Act
result = enhanced.execute_workflow_step_enhanced(
workflow_id=workflow_id,

View File

@@ -0,0 +1,520 @@
"""
Tests unitaires pour la remontée des champs vision-aware (C1) vers analytics.
Couvre :
- StepMetrics.to_dict / from_dict avec les nouveaux champs
- AnalyticsExecutionIntegration.on_step_result passe bien les champs
- Persistance SQLite (schema + migration) des colonnes C1
"""
from __future__ import annotations
import sqlite3
import tempfile
from datetime import datetime
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from core.analytics.collection.metrics_collector import StepMetrics
# -----------------------------------------------------------------------------
# StepMetrics : sérialisation des champs C1
# -----------------------------------------------------------------------------
def _make_step_metrics(**overrides) -> StepMetrics:
base = dict(
step_id="s1",
execution_id="exec1",
workflow_id="wf1",
node_id="n1",
action_type="click",
target_element="",
started_at=datetime(2026, 4, 13, 10, 0, 0),
completed_at=datetime(2026, 4, 13, 10, 0, 1),
duration_ms=1000.0,
status="completed",
confidence_score=0.9,
retry_count=0,
error_details=None,
)
base.update(overrides)
return StepMetrics(**base)
class TestStepMetricsVisionFields:
def test_default_vision_fields(self):
m = _make_step_metrics()
assert m.ocr_ms == 0.0
assert m.ui_ms == 0.0
assert m.analyze_ms == 0.0
assert m.total_ms == 0.0
assert m.cache_hit is False
assert m.degraded is False
def test_to_dict_includes_vision_fields(self):
m = _make_step_metrics(
ocr_ms=120.5,
ui_ms=45.0,
analyze_ms=200.0,
total_ms=1050.0,
cache_hit=True,
degraded=True,
)
d = m.to_dict()
assert d["ocr_ms"] == 120.5
assert d["ui_ms"] == 45.0
assert d["analyze_ms"] == 200.0
assert d["total_ms"] == 1050.0
assert d["cache_hit"] is True
assert d["degraded"] is True
def test_from_dict_roundtrip(self):
original = _make_step_metrics(
ocr_ms=10.0, ui_ms=20.0, analyze_ms=30.0,
total_ms=100.0, cache_hit=True, degraded=False,
)
restored = StepMetrics.from_dict(original.to_dict())
assert restored.ocr_ms == 10.0
assert restored.ui_ms == 20.0
assert restored.analyze_ms == 30.0
assert restored.total_ms == 100.0
assert restored.cache_hit is True
assert restored.degraded is False
def test_from_dict_missing_vision_fields_defaults_to_zero(self):
"""Rétrocompatibilité : un dict sans champs C1 doit produire 0/False."""
restored = StepMetrics.from_dict({
'step_id': 's1',
'execution_id': 'e1',
'workflow_id': 'w1',
'node_id': 'n1',
'action_type': 'click',
'target_element': '',
'started_at': datetime.now().isoformat(),
'completed_at': datetime.now().isoformat(),
'duration_ms': 100.0,
'status': 'completed',
'confidence_score': 0.5,
})
assert restored.ocr_ms == 0.0
assert restored.cache_hit is False
assert restored.degraded is False
# -----------------------------------------------------------------------------
# AnalyticsExecutionIntegration.on_step_result
# -----------------------------------------------------------------------------
class _FakeStepResult:
"""Stand-in minimal pour core.execution.execution_loop.StepResult."""
def __init__(self, **kw):
self.success = kw.get("success", True)
self.node_id = kw.get("node_id", "n1")
self.edge_id = kw.get("edge_id", None)
self.action_result = kw.get("action_result", None)
self.match_confidence = kw.get("match_confidence", 0.9)
self.duration_ms = kw.get("duration_ms", 100.0)
self.message = kw.get("message", "")
self.ocr_ms = kw.get("ocr_ms", 0.0)
self.ui_ms = kw.get("ui_ms", 0.0)
self.analyze_ms = kw.get("analyze_ms", 0.0)
self.total_ms = kw.get("total_ms", 0.0)
self.cache_hit = kw.get("cache_hit", False)
self.degraded = kw.get("degraded", False)
class TestAnalyticsOnStepResult:
def test_on_step_result_passes_vision_fields(self):
from core.analytics.integration.execution_integration import (
AnalyticsExecutionIntegration,
)
# Analytics system mocké
fake_system = MagicMock()
integration = AnalyticsExecutionIntegration(fake_system)
step = _FakeStepResult(
node_id="node_click",
success=True,
match_confidence=0.87,
duration_ms=1234.0,
ocr_ms=111.0,
ui_ms=222.0,
analyze_ms=333.0,
total_ms=1234.0,
cache_hit=True,
degraded=False,
)
integration.on_step_result(
execution_id="exec1",
workflow_id="wf1",
step_result=step,
)
# Vérifie qu'un StepMetrics avec les bons champs a été enregistré
record_calls = fake_system.metrics_collector.record_step.call_args_list
assert len(record_calls) == 1
recorded: StepMetrics = record_calls[0].args[0]
assert isinstance(recorded, StepMetrics)
assert recorded.node_id == "node_click"
assert recorded.workflow_id == "wf1"
assert recorded.execution_id == "exec1"
assert recorded.confidence_score == 0.87
assert recorded.duration_ms == 1234.0
assert recorded.ocr_ms == 111.0
assert recorded.ui_ms == 222.0
assert recorded.analyze_ms == 333.0
assert recorded.total_ms == 1234.0
assert recorded.cache_hit is True
assert recorded.degraded is False
assert recorded.status == "completed"
def test_on_step_result_failed_step(self):
from core.analytics.integration.execution_integration import (
AnalyticsExecutionIntegration,
)
fake_system = MagicMock()
integration = AnalyticsExecutionIntegration(fake_system)
step = _FakeStepResult(
success=False,
message="Click failed",
degraded=True,
)
integration.on_step_result("e1", "w1", step)
recorded: StepMetrics = fake_system.metrics_collector.record_step.call_args.args[0]
assert recorded.status == "failed"
assert recorded.error_details == "Click failed"
assert recorded.degraded is True
def test_on_step_result_disabled_integration_is_noop(self):
from core.analytics.integration.execution_integration import (
AnalyticsExecutionIntegration,
)
integration = AnalyticsExecutionIntegration(None) # désactivé
assert integration.enabled is False
step = _FakeStepResult()
# Ne doit rien faire ni lever d'exception
integration.on_step_result("e1", "w1", step)
# -----------------------------------------------------------------------------
# AnalyticsExecutionIntegration.on_execution_complete (Lot A — avril 2026)
# -----------------------------------------------------------------------------
class TestAnalyticsOnExecutionComplete:
"""Contrat normalisé : duration_ms (ms) + status (str), pas de magie."""
def _make_integration(self):
from core.analytics.integration.execution_integration import (
AnalyticsExecutionIntegration,
)
fake_system = MagicMock()
# Pas d'execution active : l'intégration doit emprunter le fallback
# "ExecutionMetrics synthétique pushé dans _buffer".
fake_system.metrics_collector._active_executions = {}
fake_system.metrics_collector._lock = MagicMock()
fake_system.metrics_collector._lock.__enter__ = MagicMock(
return_value=None
)
fake_system.metrics_collector._lock.__exit__ = MagicMock(
return_value=None
)
fake_system.metrics_collector._buffer = []
return AnalyticsExecutionIntegration(fake_system), fake_system
def test_fallback_builds_execution_metrics_with_correct_fields(self):
"""Sans record_execution_start préalable, on construit un
ExecutionMetrics synthétique avec les bons noms de champs."""
from core.analytics.collection.metrics_collector import ExecutionMetrics
integration, fake_system = self._make_integration()
integration.on_execution_complete(
execution_id="exec1",
workflow_id="wf1",
duration_ms=1500.0,
status="completed",
steps_total=3,
steps_completed=3,
steps_failed=0,
)
# Un ExecutionMetrics a été pushé dans le buffer
buffer = fake_system.metrics_collector._buffer
assert len(buffer) == 1
metric: ExecutionMetrics = buffer[0]
assert isinstance(metric, ExecutionMetrics)
assert metric.execution_id == "exec1"
assert metric.workflow_id == "wf1"
assert metric.duration_ms == 1500.0
assert metric.status == "completed"
assert metric.steps_total == 3
assert metric.steps_completed == 3
assert metric.steps_failed == 0
# started_at / completed_at sont cohérents
delta_ms = (
metric.completed_at - metric.started_at
).total_seconds() * 1000
assert abs(delta_ms - 1500.0) < 1.0
def test_uses_record_execution_complete_if_active(self):
"""Si l'execution a été ouverte via on_execution_start, on délègue
à record_execution_complete (chemin nominal)."""
integration, fake_system = self._make_integration()
# Simuler une execution active
fake_system.metrics_collector._active_executions = {"exec1": object()}
integration.on_execution_complete(
execution_id="exec1",
workflow_id="wf1",
duration_ms=800.0,
status="failed",
steps_total=2,
steps_completed=1,
steps_failed=1,
error_message="timeout",
)
call = fake_system.metrics_collector.record_execution_complete.call_args
assert call is not None
kwargs = call.kwargs
assert kwargs["execution_id"] == "exec1"
assert kwargs["status"] == "failed"
assert kwargs["steps_total"] == 2
assert kwargs["steps_completed"] == 1
assert kwargs["steps_failed"] == 1
assert kwargs["error_message"] == "timeout"
def test_steps_total_derived_when_not_provided(self):
"""steps_total déduit par somme si absent, pas d'erreur silencieuse."""
integration, fake_system = self._make_integration()
integration.on_execution_complete(
execution_id="exec1",
workflow_id="wf1",
duration_ms=500.0,
status="completed",
steps_completed=2,
steps_failed=1,
)
metric = fake_system.metrics_collector._buffer[0]
assert metric.steps_total == 3 # 2 + 1
def test_disabled_integration_is_noop(self):
from core.analytics.integration.execution_integration import (
AnalyticsExecutionIntegration,
)
integration = AnalyticsExecutionIntegration(None)
assert integration.enabled is False
# Ne doit rien faire ni lever d'exception
integration.on_execution_complete(
execution_id="exec1",
workflow_id="wf1",
duration_ms=100.0,
status="completed",
)
def test_realtime_complete_called(self):
"""Le tracking temps réel est clos avec le bon status."""
integration, fake_system = self._make_integration()
integration.on_execution_complete(
execution_id="exec1",
workflow_id="wf1",
duration_ms=100.0,
status="stopped",
)
fake_system.realtime_analytics.complete_execution.assert_called_once_with(
execution_id="exec1",
status="stopped",
)
# -----------------------------------------------------------------------------
# AnalyticsExecutionIntegration.on_recovery_attempt (Lot A — avril 2026)
# -----------------------------------------------------------------------------
class TestAnalyticsOnRecoveryAttempt:
"""Contrat normalisé : StepMetrics construit avec les vrais champs."""
def test_success_recovery_builds_valid_step_metrics(self):
from core.analytics.collection.metrics_collector import StepMetrics
from core.analytics.integration.execution_integration import (
AnalyticsExecutionIntegration,
)
fake_system = MagicMock()
integration = AnalyticsExecutionIntegration(fake_system)
integration.on_recovery_attempt(
execution_id="exec1",
workflow_id="wf1",
node_id="node_click",
strategy="retry_with_delay",
success=True,
duration_ms=250.0,
)
call = fake_system.metrics_collector.record_step.call_args
assert call is not None
recorded: StepMetrics = call.args[0]
assert isinstance(recorded, StepMetrics)
assert recorded.execution_id == "exec1"
assert recorded.workflow_id == "wf1"
assert recorded.node_id == "node_click_recovery"
assert recorded.action_type == "recovery_retry_with_delay"
assert recorded.duration_ms == 250.0
assert recorded.status == "completed"
assert recorded.error_details is None
# Champs obligatoires du dataclass
assert recorded.step_id # non vide
assert recorded.target_element == ""
assert recorded.confidence_score == 0.0
def test_failed_recovery_sets_status_and_error_details(self):
from core.analytics.collection.metrics_collector import StepMetrics
from core.analytics.integration.execution_integration import (
AnalyticsExecutionIntegration,
)
fake_system = MagicMock()
integration = AnalyticsExecutionIntegration(fake_system)
integration.on_recovery_attempt(
execution_id="e1",
workflow_id="w1",
node_id="n1",
strategy="fallback_to_parent",
success=False,
duration_ms=80.0,
)
recorded: StepMetrics = (
fake_system.metrics_collector.record_step.call_args.args[0]
)
assert recorded.status == "failed"
assert recorded.error_details == "Recovery failed: fallback_to_parent"
assert recorded.duration_ms == 80.0
def test_disabled_integration_is_noop(self):
from core.analytics.integration.execution_integration import (
AnalyticsExecutionIntegration,
)
integration = AnalyticsExecutionIntegration(None)
integration.on_recovery_attempt(
execution_id="e1",
workflow_id="w1",
node_id="n1",
strategy="x",
success=True,
duration_ms=10.0,
)
# -----------------------------------------------------------------------------
# Persistance SQLite : schema + migration
# -----------------------------------------------------------------------------
class TestTimeSeriesStoreSchema:
def test_new_store_has_vision_columns(self, tmp_path):
from core.analytics.storage.timeseries_store import TimeSeriesStore
store = TimeSeriesStore(tmp_path)
with sqlite3.connect(str(store.db_path)) as conn:
cols = {row[1] for row in conn.execute(
"PRAGMA table_info(step_metrics)"
)}
# Colonnes legacy
assert "duration_ms" in cols
assert "confidence_score" in cols
# Colonnes C1
assert "ocr_ms" in cols
assert "ui_ms" in cols
assert "analyze_ms" in cols
assert "total_ms" in cols
assert "cache_hit" in cols
assert "degraded" in cols
def test_migration_adds_missing_columns(self, tmp_path):
"""Base pré-existante sans les colonnes C1 — la migration doit les ajouter."""
from core.analytics.storage.timeseries_store import TimeSeriesStore
# Créer une base "legacy" manuellement, sans les nouvelles colonnes
storage_dir = tmp_path / "legacy"
storage_dir.mkdir()
legacy_db = storage_dir / "timeseries.db"
with sqlite3.connect(str(legacy_db)) as conn:
conn.executescript("""
CREATE TABLE step_metrics (
step_id TEXT PRIMARY KEY,
execution_id TEXT NOT NULL,
workflow_id TEXT NOT NULL,
node_id TEXT NOT NULL,
action_type TEXT NOT NULL,
target_element TEXT,
started_at TIMESTAMP NOT NULL,
completed_at TIMESTAMP NOT NULL,
duration_ms REAL NOT NULL,
status TEXT NOT NULL,
confidence_score REAL,
retry_count INTEGER DEFAULT 0,
error_details TEXT
);
""")
conn.commit()
# Instancier TimeSeriesStore → doit migrer
_ = TimeSeriesStore(storage_dir)
with sqlite3.connect(str(legacy_db)) as conn:
cols = {row[1] for row in conn.execute(
"PRAGMA table_info(step_metrics)"
)}
assert "ocr_ms" in cols
assert "cache_hit" in cols
assert "degraded" in cols
def test_write_and_read_vision_metrics(self, tmp_path):
from core.analytics.storage.timeseries_store import TimeSeriesStore
store = TimeSeriesStore(tmp_path)
metric = _make_step_metrics(
ocr_ms=50.0, ui_ms=60.0, analyze_ms=110.0,
total_ms=500.0, cache_hit=True, degraded=True,
)
store.write_metrics([metric])
with sqlite3.connect(str(store.db_path)) as conn:
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT * FROM step_metrics WHERE step_id = ?", (metric.step_id,)
).fetchone()
assert row is not None
assert row["ocr_ms"] == 50.0
assert row["ui_ms"] == 60.0
assert row["analyze_ms"] == 110.0
assert row["total_ms"] == 500.0
# SQLite stocke les bool comme INTEGER
assert row["cache_hit"] == 1
assert row["degraded"] == 1

View File

@@ -0,0 +1,264 @@
"""
Tests de la sélection robuste d'edge dans WorkflowPipeline.get_next_action (C3).
Vérifie que la nouvelle API utilise EdgeScorer et expose le contrat dict
normalisé (Lot A — avril 2026) :
- status="selected" → edge choisi
- status="terminal" → aucun outgoing_edge (fin légitime)
- status="blocked" → candidats rejetés (NE DOIT PAS être traité comme fin)
"""
from __future__ import annotations
from datetime import datetime
from unittest.mock import MagicMock, patch
import pytest
from core.models.screen_state import (
ContextLevel,
EmbeddingRef,
PerceptionLevel,
RawLevel,
ScreenState,
WindowContext,
)
from core.models.workflow_graph import (
Action,
EdgeConstraints,
EdgeStats,
PostConditions,
TargetSpec,
Workflow,
WorkflowEdge,
WorkflowNode,
)
def _edge(
edge_id: str,
required_window_title: str = "",
success_rate: float = 0.5,
execution_count: int = 10,
min_source_similarity: float = 0.80,
) -> WorkflowEdge:
stats = EdgeStats()
if execution_count > 0:
stats.execution_count = execution_count
stats.success_count = int(round(success_rate * execution_count))
stats.failure_count = execution_count - stats.success_count
return WorkflowEdge(
edge_id=edge_id,
from_node="n1",
to_node="n2",
action=Action(type="mouse_click", target=TargetSpec()),
constraints=EdgeConstraints(
required_window_title=required_window_title,
min_source_similarity=min_source_similarity,
),
post_conditions=PostConditions(),
stats=stats,
)
def _state(window_title: str = "AppA") -> ScreenState:
return ScreenState(
screen_state_id="s",
timestamp=datetime.now(),
session_id="sess",
window=WindowContext(
app_name="app", window_title=window_title, screen_resolution=[1920, 1080]
),
raw=RawLevel(screenshot_path="", capture_method="t", file_size_bytes=0),
perception=PerceptionLevel(
embedding=EmbeddingRef(provider="t", vector_id="v", dimensions=512),
detected_text=[],
text_detection_method="none",
confidence_avg=0.0,
),
context=ContextLevel(),
ui_elements=[],
)
@pytest.fixture
def pipeline_with_workflow(tmp_path):
"""Pipeline minimal avec un workflow en mémoire (Workflow mocké).
On évite la construction d'un vrai Workflow (ScreenTemplate trop lourd)
en utilisant un MagicMock configuré pour les méthodes utilisées par
`get_next_action` : `get_outgoing_edges`.
"""
from core.pipeline.workflow_pipeline import WorkflowPipeline
# Stub pour éviter les lourds imports (mocks sur composants GPU)
with patch.multiple(
"core.pipeline.workflow_pipeline",
UIDetector=MagicMock(),
CLIPEmbedder=MagicMock(),
StateEmbeddingBuilder=MagicMock(),
FusionEngine=MagicMock(),
FAISSManager=MagicMock(),
GraphBuilder=MagicMock(),
NodeMatcher=MagicMock(),
HierarchicalMatcher=MagicMock(),
LearningManager=MagicMock(),
ActionExecutor=MagicMock(),
TargetResolver=MagicMock(),
ErrorHandler=MagicMock(),
):
pipeline = WorkflowPipeline(data_dir=str(tmp_path), use_gpu=False)
workflow = MagicMock(spec=Workflow)
workflow.workflow_id = "wf1"
workflow.edges = []
workflow.get_outgoing_edges = lambda node_id: [
e for e in workflow.edges if e.from_node == node_id
]
pipeline._workflows["wf1"] = workflow
return pipeline, workflow
class TestGetNextActionC3:
def test_picks_highest_success_rate(self, pipeline_with_workflow):
pipeline, wf = pipeline_with_workflow
wf.edges = [
_edge("low", success_rate=0.1, execution_count=20),
_edge("high", success_rate=0.9, execution_count=20),
]
result = pipeline.get_next_action("wf1", "n1", screen_state=_state())
assert result["status"] == "selected"
assert result["edge_id"] == "high"
def test_filters_out_invalid_preconditions(self, pipeline_with_workflow):
pipeline, wf = pipeline_with_workflow
wf.edges = [
_edge("bad", required_window_title="NopeApp", success_rate=0.99, execution_count=20),
_edge("ok", success_rate=0.50, execution_count=20),
]
result = pipeline.get_next_action(
"wf1", "n1", screen_state=_state(window_title="AppA")
)
assert result["status"] == "selected"
assert result["edge_id"] == "ok"
def test_blocked_when_no_valid_edge(self, pipeline_with_workflow):
"""Des candidats existent mais aucun ne passe les contraintes.
Lot A — cas critique : on NE DOIT PAS retourner "terminal" ici. Un
blocage doit remonter explicitement pour déclencher pause supervisée.
"""
pipeline, wf = pipeline_with_workflow
wf.edges = [
_edge("e1", required_window_title="AppB"),
_edge("e2", required_window_title="AppC"),
]
result = pipeline.get_next_action(
"wf1", "n1", screen_state=_state(window_title="AppA")
)
assert result["status"] == "blocked"
assert result["reason"] == "no_valid_edge"
def test_strategy_first_keeps_legacy_behavior(self, pipeline_with_workflow):
pipeline, wf = pipeline_with_workflow
wf.edges = [
_edge("e1", success_rate=0.1, execution_count=20),
_edge("e2", success_rate=0.9, execution_count=20),
]
result = pipeline.get_next_action(
"wf1", "n1", screen_state=_state(), strategy="first"
)
# Mode legacy : premier edge sans tri
assert result["status"] == "selected"
assert result["edge_id"] == "e1"
def test_no_screen_state_still_works(self, pipeline_with_workflow):
"""Sans ScreenState, le scorer ne peut pas filtrer mais peut ranker."""
pipeline, wf = pipeline_with_workflow
wf.edges = [
_edge("e1", success_rate=0.1, execution_count=20),
_edge("e2", success_rate=0.9, execution_count=20),
]
result = pipeline.get_next_action("wf1", "n1", screen_state=None)
assert result["status"] == "selected"
# Le ranking par success_rate fonctionne toujours
assert result["edge_id"] == "e2"
def test_no_outgoing_edges_is_terminal(self, pipeline_with_workflow):
"""Aucun outgoing_edge = fin légitime du workflow (status="terminal")."""
pipeline, wf = pipeline_with_workflow
wf.edges = []
result = pipeline.get_next_action("wf1", "n1", screen_state=_state())
assert result["status"] == "terminal"
def test_blocked_distinct_from_terminal(self, pipeline_with_workflow):
"""Régression Lot A : blocked != terminal.
Le bug historique confondait ces deux cas. Un workflow bloqué
apparaissait comme "terminé avec succès" côté ExecutionLoop.
"""
pipeline, wf = pipeline_with_workflow
# Cas terminal : pas d'outgoing
wf.edges = []
terminal = pipeline.get_next_action("wf1", "n1", screen_state=_state())
# Cas bloqué : outgoing présent mais rejetés
wf.edges = [_edge("bad", required_window_title="NopeApp")]
blocked = pipeline.get_next_action("wf1", "n1", screen_state=_state(window_title="AppA"))
assert terminal["status"] == "terminal"
assert blocked["status"] == "blocked"
# L'appelant doit pouvoir les distinguer sans ambiguïté
assert terminal["status"] != blocked["status"]
def test_workflow_not_found_is_blocked(self, pipeline_with_workflow):
"""Workflow inexistant = blocked avec reason explicite (pas silencieux)."""
pipeline, _wf = pipeline_with_workflow
result = pipeline.get_next_action(
"wf_inexistant", "n1", screen_state=_state()
)
assert result["status"] == "blocked"
assert result["reason"] == "workflow_not_found"
class TestGetNextActionSourceSimilarity:
"""Lot B — propagation de source_similarity jusqu'à EdgeScorer."""
def test_high_similarity_passes_min_source_similarity(
self, pipeline_with_workflow
):
"""source_similarity élevée → edge accepté."""
pipeline, wf = pipeline_with_workflow
wf.edges = [_edge("e1", min_source_similarity=0.80)]
result = pipeline.get_next_action(
"wf1", "n1", screen_state=_state(), source_similarity=0.95
)
assert result["status"] == "selected"
assert result["edge_id"] == "e1"
def test_low_similarity_blocks_edge(self, pipeline_with_workflow):
"""source_similarity < min_source_similarity → edge rejeté → blocked.
C'est la preuve que la précondition min_source_similarity est
redevenue effective (Lot B). Avant ce lot, l'EdgeScorer recevait
toujours 1.0 hardcodé et ne rejetait jamais l'edge pour ce motif.
"""
pipeline, wf = pipeline_with_workflow
wf.edges = [_edge("e1", min_source_similarity=0.80)]
result = pipeline.get_next_action(
"wf1", "n1", screen_state=_state(), source_similarity=0.40
)
assert result["status"] == "blocked"
assert result["reason"] == "no_valid_edge"
def test_default_source_similarity_is_one(self, pipeline_with_workflow):
"""Sans source_similarity fourni → défaut 1.0 → pas de rejet pour
ce motif (compat avec les call sites qui ne l'ont pas encore)."""
pipeline, wf = pipeline_with_workflow
# min_source_similarity très strict, mais défaut appelant = 1.0
wf.edges = [_edge("e1", min_source_similarity=0.99)]
result = pipeline.get_next_action("wf1", "n1", screen_state=_state())
assert result["status"] == "selected"