Files
rpa_vision_v3/tests/integration/test_t2a_extract.py
Dom ec1fb81054
Some checks failed
tests / Lint (ruff + black) (push) Failing after 1m46s
tests / Tests unitaires (sans GPU) (push) Failing after 2m0s
tests / Tests sécurité (critique) (push) Has been skipped
fix(dashboard,worker): vérité produit P0 — dashboard+worker+VWB export
War-room clôture DGX 2026-06-18 (recadrage Dom : graphe/apprentissage/mémoire/dashboard = surface produit P0).
Le dashboard et le statut worker affichaient des états faux ; corrige pour refléter la vérité du produit.

- dashboard FAISS: distingue index brut / metadata HMAC invalide / runtime / absent (plus de faux "inactif")
- dashboard process-mining: 503 explicite missing_dependency (plus de message trompeur)
- dashboard /api/workflows + system/status: lecture DB VWB v3 canonique (total réel = 24, plus de 0)
- worker /processing/status: véridique (lit _worker_health.json) + statut "idle/armé (lazy)" distinct de "dégradé (échec)"
- VWB export: N steps -> N actions/edges (dernière action n'est plus perdue)
- tests: dashboard routes, worker status truthfulness, export VWB

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-18 17:50:12 +02:00

469 lines
17 KiB
Python

"""Tests des actions extract_text et t2a_decision (C+.5/.6).
Couvre :
- _resolve_runtime_vars : templating {{var}} / {{var.field}}
- _handle_extract_text_action : OCR mocké, stockage variable
- _handle_t2a_decision_action : analyze_dpi mocké, stockage JSON
- _edge_to_normalized_actions pour les 2 types
- Bridge VWB → core (mapping + paramètres)
"""
from unittest.mock import patch
import pytest
from agent_v0.server_v1.replay_engine import (
_ALLOWED_ACTION_TYPES,
_SERVER_SIDE_ACTION_TYPES,
_resolve_runtime_vars,
_handle_extract_text_action,
_handle_t2a_decision_action,
_handle_concat_text_vars_action,
_edge_to_normalized_actions,
_create_replay_state,
SCROLL_PAUSE_MS,
)
from visual_workflow_builder.backend.services.learned_workflow_bridge import (
VWB_ACTION_TO_CORE,
convert_vwb_to_core_workflow,
_vwb_params_to_core,
)
# ----------------------------------------------------------------------
# Templating runtime
# ----------------------------------------------------------------------
def test_resolve_simple_var():
r = _resolve_runtime_vars("Patient {{ipp}}", {"ipp": "25003284"})
assert r == "Patient 25003284"
def test_resolve_field_access():
r = _resolve_runtime_vars(
"{{result.decision}} car {{result.justification}}",
{"result": {"decision": "UHCD", "justification": "asthme + insuf coro"}},
)
assert "UHCD car asthme + insuf coro" == r
def test_resolve_missing_var_kept_intact():
r = _resolve_runtime_vars("Hello {{absent}} world", {"x": "y"})
assert r == "Hello {{absent}} world"
def test_resolve_missing_field_kept_intact():
r = _resolve_runtime_vars("{{var.absent}}", {"var": {"present": "x"}})
assert r == "{{var.absent}}"
def test_resolve_in_dict_recursive():
r = _resolve_runtime_vars(
{"msg": "IPP {{ipp}}", "nested": {"k": "{{ipp}}"}, "list": ["{{age}}"]},
{"ipp": "X", "age": 77},
)
assert r == {"msg": "IPP X", "nested": {"k": "X"}, "list": ["77"]}
def test_resolve_empty_vars_noop():
val = {"k": "{{var}}"}
assert _resolve_runtime_vars(val, {}) == val
assert _resolve_runtime_vars(val, None) == val
def test_resolve_non_string_passthrough():
assert _resolve_runtime_vars(42, {"x": "y"}) == 42
assert _resolve_runtime_vars(None, {"x": "y"}) is None
def test_resolve_handles_whitespace_in_braces():
r = _resolve_runtime_vars("{{ ipp }}", {"ipp": "X"})
assert r == "X"
# ----------------------------------------------------------------------
# Action types & types serveur
# ----------------------------------------------------------------------
def test_extract_text_in_allowed():
assert "extract_text" in _ALLOWED_ACTION_TYPES
def test_t2a_decision_in_allowed():
assert "t2a_decision" in _ALLOWED_ACTION_TYPES
def test_server_side_types():
# Set extensible : on vérifie au minimum les 2 types historiques.
# extract_text_scroll ajoute `_concat_text_vars` (action serveur interne).
assert {"extract_text", "t2a_decision"}.issubset(_SERVER_SIDE_ACTION_TYPES)
# ----------------------------------------------------------------------
# Handler extract_text
# ----------------------------------------------------------------------
def test_handle_extract_text_stores_variable():
state = _create_replay_state("rep1", "wf", "sess", 3)
last_hb = {"sess": {"path": "/fake/heartbeat.png", "timestamp": 0}}
action = {
"type": "extract_text",
"parameters": {"output_var": "texte_motif", "paragraph": True},
}
with patch(
"core.llm.extract_text_from_image",
return_value="Patient asthme peakflow 260",
):
ok = _handle_extract_text_action(action, state, "sess", last_hb)
assert ok is True
assert state["variables"]["texte_motif"] == "Patient asthme peakflow 260"
def test_handle_extract_text_no_heartbeat_stores_empty():
state = _create_replay_state("rep1", "wf", "sess", 3)
last_hb = {} # pas de heartbeat
action = {"type": "extract_text", "parameters": {"output_var": "v"}}
ok = _handle_extract_text_action(action, state, "sess", last_hb)
assert ok is False
assert state["variables"]["v"] == ""
def test_handle_extract_text_default_var_name():
state = _create_replay_state("rep1", "wf", "sess", 3)
last_hb = {"sess": {"path": "/x.png", "timestamp": 0}}
action = {"type": "extract_text", "parameters": {}}
with patch("core.llm.extract_text_from_image", return_value="abc"):
_handle_extract_text_action(action, state, "sess", last_hb)
assert "extracted_text" in state["variables"]
# ----------------------------------------------------------------------
# Handler t2a_decision
# ----------------------------------------------------------------------
def test_handle_t2a_decision_stores_json():
state = _create_replay_state("rep1", "wf", "sess", 3)
action = {
"type": "t2a_decision",
"parameters": {
"input_template": "Patient 78 ans, asthme, peakflow 260",
"output_var": "decision_t2a",
"model": "qwen2.5:7b",
},
}
fake_result = {
"decision": "REQUALIFICATION_HOSPITALISATION",
"justification": "Surveillance continue requise",
"confiance": "elevee",
"_elapsed_s": 4.2,
}
with patch("core.llm.analyze_dpi", return_value=fake_result):
ok = _handle_t2a_decision_action(action, state)
assert ok is True
assert state["variables"]["decision_t2a"]["decision"] == "REQUALIFICATION_HOSPITALISATION"
def test_handle_t2a_decision_empty_input_returns_indetermine():
state = _create_replay_state("rep1", "wf", "sess", 3)
action = {"type": "t2a_decision", "parameters": {"input_template": "", "output_var": "r"}}
ok = _handle_t2a_decision_action(action, state)
assert ok is False
assert state["variables"]["r"]["decision"] == "INDETERMINE"
def test_handle_t2a_decision_analyze_exception():
state = _create_replay_state("rep1", "wf", "sess", 3)
action = {"type": "t2a_decision", "parameters": {"input_template": "x", "output_var": "r"}}
with patch("core.llm.analyze_dpi", side_effect=RuntimeError("ollama down")):
ok = _handle_t2a_decision_action(action, state)
assert ok is False
assert state["variables"]["r"]["decision"] == "INDETERMINE"
assert "ollama down" in state["variables"]["r"]["_error"]
# ----------------------------------------------------------------------
# Edge → action normalisée
# ----------------------------------------------------------------------
class _FakeAction:
def __init__(self, type_, parameters=None):
self.type = type_
self.target = None
self.parameters = parameters or {}
class _FakeEdge:
def __init__(self, action, edge_id="e1", from_node="n1", to_node="n2"):
self.edge_id = edge_id
self.from_node = from_node
self.to_node = to_node
self.action = action
def test_edge_to_action_extract_text():
edge = _FakeEdge(_FakeAction(
"extract_text",
parameters={"output_var": "texte_examens", "paragraph": True},
))
actions = _edge_to_normalized_actions(edge, params={})
assert len(actions) == 1
a = actions[0]
assert a["type"] == "extract_text"
assert a["parameters"]["output_var"] == "texte_examens"
assert a["parameters"]["paragraph"] is True
def test_edge_to_action_extract_table_accepts_tesseract_engine_and_variable_name():
edge = _FakeEdge(_FakeAction(
"extract_table",
parameters={
"variable_name": "t_extraction_liste",
"pattern": r"^25\d{6}$",
"engine": "tesseract",
},
))
actions = _edge_to_normalized_actions(edge, params={})
assert len(actions) == 1
a = actions[0]
assert a["type"] == "extract_table"
assert a["parameters"]["output_var"] == "t_extraction_liste"
assert a["parameters"]["pattern"] == r"^25\d{6}$"
assert a["parameters"]["engine"] == "tesseract"
def test_edge_to_action_t2a_decision():
edge = _FakeEdge(_FakeAction(
"t2a_decision",
parameters={
"input_template": "{{texte_motif}}",
"output_var": "result",
"model": "qwen2.5:7b",
},
))
actions = _edge_to_normalized_actions(edge, params={})
a = actions[0]
assert a["type"] == "t2a_decision"
assert a["parameters"]["input_template"] == "{{texte_motif}}"
assert a["parameters"]["output_var"] == "result"
assert a["parameters"]["model"] == "qwen2.5:7b"
# ----------------------------------------------------------------------
# Bridge VWB → core
# ----------------------------------------------------------------------
def test_vwb_extract_text_passthrough():
assert VWB_ACTION_TO_CORE["extract_text"] == "extract_text"
def test_vwb_t2a_decision_passthrough():
assert VWB_ACTION_TO_CORE["t2a_decision"] == "t2a_decision"
def test_vwb_params_extract_text_preserves_output_var():
p = _vwb_params_to_core("extract_text", {"output_var": "v", "paragraph": False})
assert p == {"output_var": "v", "paragraph": False}
def test_vwb_params_extract_text_legacy_variable_name():
"""Compat avec l'ancien paramètre variable_name côté VWB."""
p = _vwb_params_to_core("extract_text", {"variable_name": "v_legacy"})
assert p["output_var"] == "v_legacy"
def test_vwb_params_t2a_decision_preserves_all():
p = _vwb_params_to_core("t2a_decision", {
"input_template": "DPI {{ipp}}",
"output_var": "dec",
"model": "qwen2.5:7b",
})
assert p == {"input_template": "DPI {{ipp}}", "output_var": "dec", "model": "qwen2.5:7b"}
def test_export_workflow_with_t2a_chain():
"""Workflow VWB extract_text → t2a_decision → pause_for_human export propre."""
workflow_data = {"id": "wf_t2a", "name": "Demo T2A"}
steps_data = [
{"id": "s1", "action_type": "click_anchor", "parameters": {"target_text": "25003284"}, "label": "Clic IPP"},
{"id": "s2", "action_type": "extract_text", "parameters": {"output_var": "dpi"}, "label": "OCR"},
{"id": "s3", "action_type": "t2a_decision", "parameters": {
"input_template": "{{dpi}}", "output_var": "dec", "model": "qwen2.5:7b",
}, "label": "Analyse"},
{"id": "s4", "action_type": "pause_for_human", "parameters": {
"message": "Décision : {{dec.decision}} — {{dec.justification}}",
}, "label": "Validation"},
{"id": "s5", "action_type": "click_anchor", "parameters": {"target_text": "Enregistrer"}, "label": "Clic Enregistrer"},
]
core = convert_vwb_to_core_workflow(workflow_data, steps_data)
edge_types = [e["action"]["type"] for e in core["edges"]]
assert len(core["edges"]) == len(steps_data)
assert "extract_text" in edge_types
assert "t2a_decision" in edge_types
assert "pause_for_human" in edge_types
assert edge_types[-1] == "mouse_click"
# Vérifier que le templating est bien transporté
t2a_edge = next(e for e in core["edges"] if e["action"]["type"] == "t2a_decision")
assert t2a_edge["action"]["parameters"]["input_template"] == "{{dpi}}"
# ----------------------------------------------------------------------
# extract_text_scroll — expansion + handler concat
# ----------------------------------------------------------------------
def test_extract_text_scroll_in_allowed():
assert "extract_text_scroll" in _ALLOWED_ACTION_TYPES
assert "_concat_text_vars" in _ALLOWED_ACTION_TYPES
def test_extract_text_scroll_concat_is_server_side():
assert "_concat_text_vars" in _SERVER_SIDE_ACTION_TYPES
def test_edge_to_action_extract_text_scroll_expands_to_six_steps():
edge = _FakeEdge(_FakeAction(
"extract_text_scroll",
parameters={"variable_name": "t_full", "paragraph": True},
))
actions = _edge_to_normalized_actions(edge, params={})
# 6 actions : OCR(top), Ctrl+End, wait, OCR(bottom), concat, Ctrl+Home
assert len(actions) == 6
types = [a["type"] for a in actions]
assert types == [
"extract_text",
"key_combo",
"wait",
"extract_text",
"_concat_text_vars",
"key_combo",
]
# Sub-actions OCR utilisent des vars internes différentes
assert actions[0]["parameters"]["output_var"] == "__t_full_top"
assert actions[3]["parameters"]["output_var"] == "__t_full_bottom"
# Ctrl+End / Ctrl+Home corrects
assert actions[1]["keys"] == ["ctrl", "end"]
assert actions[5]["keys"] == ["ctrl", "home"]
# Wait = SCROLL_PAUSE_MS
assert actions[2]["duration_ms"] == SCROLL_PAUSE_MS
# Concat lit les bons noms et écrit dans la var finale
concat_params = actions[4]["parameters"]
assert concat_params["top_var"] == "__t_full_top"
assert concat_params["bottom_var"] == "__t_full_bottom"
assert concat_params["output_var"] == "t_full"
assert concat_params["separator"] == "\n\n"
# Tous les action_id sont uniques et toutes les actions héritent de l'edge
action_ids = {a["action_id"] for a in actions}
assert len(action_ids) == 6
for a in actions:
assert a["edge_id"] == "e1"
assert a["from_node"] == "n1"
def test_edge_to_action_extract_text_scroll_default_var_name():
edge = _FakeEdge(_FakeAction("extract_text_scroll", parameters={}))
actions = _edge_to_normalized_actions(edge, params={})
# Default = extracted_text, donc internal vars = __extracted_text_top/bottom
assert actions[0]["parameters"]["output_var"] == "__extracted_text_top"
assert actions[4]["parameters"]["output_var"] == "extracted_text"
def test_edge_to_action_extract_text_scroll_accepts_output_var_legacy():
"""Compat : `output_var` accepté en plus de `variable_name`."""
edge = _FakeEdge(_FakeAction(
"extract_text_scroll",
parameters={"output_var": "legacy_var"},
))
actions = _edge_to_normalized_actions(edge, params={})
assert actions[4]["parameters"]["output_var"] == "legacy_var"
def test_handle_concat_text_vars_merges_top_and_bottom():
state = _create_replay_state("rep1", "wf", "sess", 3)
state["variables"] = {
"__t_full_top": "Lignes du haut",
"__t_full_bottom": "Lignes du bas",
"other": "intact",
}
action = {
"type": "_concat_text_vars",
"parameters": {
"top_var": "__t_full_top",
"bottom_var": "__t_full_bottom",
"output_var": "t_full",
"separator": "\n\n",
},
}
ok = _handle_concat_text_vars_action(action, state)
assert ok is True
assert state["variables"]["t_full"] == "Lignes du haut\n\nLignes du bas"
# Variables internes nettoyées
assert "__t_full_top" not in state["variables"]
assert "__t_full_bottom" not in state["variables"]
# Autres variables préservées
assert state["variables"]["other"] == "intact"
def test_handle_concat_text_vars_handles_empty_top():
state = _create_replay_state("rep1", "wf", "sess", 3)
state["variables"] = {"__a_top": "", "__a_bottom": "Bas seul"}
action = {
"type": "_concat_text_vars",
"parameters": {
"top_var": "__a_top",
"bottom_var": "__a_bottom",
"output_var": "a",
"separator": "\n\n",
},
}
ok = _handle_concat_text_vars_action(action, state)
assert ok is True
# Pas de séparateur en début/fin si une var est vide
assert state["variables"]["a"] == "Bas seul"
def test_handle_concat_text_vars_handles_both_empty():
state = _create_replay_state("rep1", "wf", "sess", 3)
state["variables"] = {"__a_top": "", "__a_bottom": ""}
action = {
"type": "_concat_text_vars",
"parameters": {
"top_var": "__a_top",
"bottom_var": "__a_bottom",
"output_var": "a",
},
}
ok = _handle_concat_text_vars_action(action, state)
assert ok is False # rien d'utile produit
assert state["variables"]["a"] == ""
def test_handle_concat_text_vars_preserves_user_named_vars():
"""Si top/bottom ne commencent pas par __ on ne les supprime pas."""
state = _create_replay_state("rep1", "wf", "sess", 3)
state["variables"] = {"user_top": "haut", "user_bottom": "bas"}
action = {
"type": "_concat_text_vars",
"parameters": {
"top_var": "user_top",
"bottom_var": "user_bottom",
"output_var": "merged",
},
}
_handle_concat_text_vars_action(action, state)
assert state["variables"]["user_top"] == "haut"
assert state["variables"]["user_bottom"] == "bas"
assert state["variables"]["merged"] == "haut\n\nbas"
def test_vwb_extract_text_scroll_passthrough():
assert VWB_ACTION_TO_CORE["extract_text_scroll"] == "extract_text_scroll"
def test_vwb_params_extract_text_scroll_preserves_variable_name():
p = _vwb_params_to_core("extract_text_scroll", {"variable_name": "t_full"})
assert p == {"variable_name": "t_full"}
def test_vwb_params_extract_text_scroll_legacy_output_var():
p = _vwb_params_to_core("extract_text_scroll", {"output_var": "legacy"})
assert p["variable_name"] == "legacy"