diff --git a/agent_v0/server_v1/api_stream.py b/agent_v0/server_v1/api_stream.py index c24792a41..017461fd1 100644 --- a/agent_v0/server_v1/api_stream.py +++ b/agent_v0/server_v1/api_stream.py @@ -219,6 +219,10 @@ from .replay_engine import ( _is_learned_workflow, _edge_to_normalized_actions, _substitute_variables, + _resolve_runtime_vars, + _SERVER_SIDE_ACTION_TYPES, + _handle_extract_text_action, + _handle_t2a_decision_action, _expand_compound_steps, _pre_check_screen_state as _pre_check_screen_state_impl, _detect_popup_hint as _detect_popup_hint_impl, @@ -2850,37 +2854,68 @@ async def get_next_action(session_id: str, machine_id: str = "default"): if not queue: return {"action": None, "session_id": session_id, "machine_id": machine_id} - # Peek à la prochaine action SANS la retirer (pour le pre-check) - action = queue[0] + # ── Boucle de traitement : actions serveur (extract_text, t2a_decision) + # exécutées entièrement côté serveur jusqu'à trouver une action visuelle + # à transmettre à l'Agent V1 ou un pause_for_human qui bloque le replay. + action = None + while queue: + action = queue[0] - # ── pause_for_human : interception avant exécution ── - # Cette action n'est jamais transmise à l'Agent V1. Elle bascule - # le replay en paused_need_help avec le message custom, ce qui - # déclenche la bulle interactive ChatWindow (J3.5). - if action.get("type") == "pause_for_human" and owning_replay is not None: - params = action.get("parameters") or {} - message = params.get("message") or "Validation requise" - queue.pop(0) - _replay_queues[session_id] = queue - owning_replay["status"] = "paused_need_help" - owning_replay["pause_message"] = message - owning_replay["failed_action"] = { - "action_id": action.get("action_id", ""), - "type": "pause_for_human", - "reason": "user_request", - } - logger.info( - f"Replay {owning_replay['replay_id']} pause supervisée demandée " - f"par le workflow : {message[:80]}" - ) - return { - "action": None, - "session_id": session_id, - "machine_id": machine_id, - "replay_paused": True, - "pause_message": message, - "replay_id": owning_replay["replay_id"], - } + # Résoudre les variables runtime ({{var}} et {{var.field}}) + if owning_replay is not None: + runtime_vars = owning_replay.get("variables") or {} + if runtime_vars: + action = _resolve_runtime_vars(action, runtime_vars) + + type_ = action.get("type") + + # pause_for_human : bascule en paused_need_help, return action=None + if type_ == "pause_for_human" and owning_replay is not None: + params = action.get("parameters") or {} + message = params.get("message") or "Validation requise" + queue.pop(0) + _replay_queues[session_id] = queue + owning_replay["status"] = "paused_need_help" + owning_replay["pause_message"] = message + owning_replay["failed_action"] = { + "action_id": action.get("action_id", ""), + "type": "pause_for_human", + "reason": "user_request", + } + logger.info( + f"Replay {owning_replay['replay_id']} pause supervisée demandée " + f"par le workflow : {message[:80]}" + ) + return { + "action": None, + "session_id": session_id, + "machine_id": machine_id, + "replay_paused": True, + "pause_message": message, + "replay_id": owning_replay["replay_id"], + } + + # Actions serveur : exécuter, pop, continuer + if type_ in _SERVER_SIDE_ACTION_TYPES and owning_replay is not None: + try: + if type_ == "extract_text": + _handle_extract_text_action( + action, owning_replay, session_id, _last_heartbeat + ) + elif type_ == "t2a_decision": + _handle_t2a_decision_action(action, owning_replay) + except Exception as e: + logger.warning(f"Action serveur {type_} a levé : {e}") + queue.pop(0) + _replay_queues[session_id] = queue + continue # action suivante + + # Action visuelle : sortir de la boucle pour la transmettre à l'Agent V1 + break + + # Si la queue s'est vidée après les exécutions serveur, rien à transmettre + if not queue or action is None: + return {"action": None, "session_id": session_id, "machine_id": machine_id} # ---- Pre-check écran (optionnel, non bloquant) ---- # Ne s'applique qu'aux actions qui ont un from_node (actions de workflow, diff --git a/agent_v0/server_v1/replay_engine.py b/agent_v0/server_v1/replay_engine.py index 0ba4721c7..fc9078f85 100644 --- a/agent_v0/server_v1/replay_engine.py +++ b/agent_v0/server_v1/replay_engine.py @@ -34,7 +34,14 @@ _ALLOWED_ACTION_TYPES = { "double_click", "right_click", "drag", "verify_screen", # Replay hybride : vérification visuelle entre groupes "pause_for_human", # Pause supervisée explicite (interceptée par /replay/next) + "extract_text", # OCR serveur sur dernier heartbeat → variable workflow + "t2a_decision", # Analyse LLM facturation T2A → variable workflow } + +# Types d'actions exécutées CÔTÉ SERVEUR (jamais transmises à l'Agent V1). +# Le pipeline /replay/next les traite en boucle interne et passe à l'action +# suivante jusqu'à trouver une action visuelle (à transmettre au client). +_SERVER_SIDE_ACTION_TYPES = {"extract_text", "t2a_decision"} _MAX_ACTION_TEXT_LENGTH = 10000 _MAX_KEYS_PER_COMBO = 10 # Touches autorisées dans les key_combo (modificateurs + touches spéciales + caractères simples) @@ -860,6 +867,23 @@ def _edge_to_normalized_actions(edge, params: Dict[str, Any]) -> List[Dict[str, } return [normalized] # pas de target/coords pour cette action logique + elif action_type == "extract_text": + normalized["type"] = "extract_text" + normalized["parameters"] = { + "output_var": action_params.get("output_var", "extracted_text"), + "paragraph": bool(action_params.get("paragraph", True)), + } + return [normalized] + + elif action_type == "t2a_decision": + normalized["type"] = "t2a_decision" + normalized["parameters"] = { + "input_template": action_params.get("input_template", ""), + "output_var": action_params.get("output_var", "t2a_result"), + "model": action_params.get("model"), + } + return [normalized] + else: logger.warning(f"Type d'action inconnu : {action_type}") return [] @@ -894,6 +918,143 @@ def _substitute_variables(text: str, params: Dict[str, Any], defaults: Dict[str, return re.sub(r'\$\{(\w+)\}', replacer, text) +# Regex pour le templating runtime : {{var}} ou {{var.champ}} ou {{var.champ.sous}} +_RUNTIME_VAR_PATTERN = re.compile(r'\{\{\s*(\w+)(?:\.([\w.]+))?\s*\}\}') + + +def _resolve_runtime_vars_in_str(text: str, variables: Dict[str, Any]) -> str: + """Remplace {{var}} et {{var.field}} par leur valeur depuis le dict variables. + + Variables/champs absents : laissés tels quels (ne casse pas le pipeline). + Pour les valeurs non-str (dict, list), str() est appelé. + """ + def replacer(match): + var_name = match.group(1) + path = match.group(2) + if var_name not in variables: + return match.group(0) + value = variables[var_name] + if path: + for field in path.split('.'): + if isinstance(value, dict) and field in value: + value = value[field] + else: + return match.group(0) + return str(value) + + return _RUNTIME_VAR_PATTERN.sub(replacer, text) + + +def _resolve_runtime_vars(value: Any, variables: Dict[str, Any]) -> Any: + """Résout récursivement les {{var}} et {{var.field}} dans une valeur. + + Supporte str, dict, list. Les autres types sont retournés tels quels. + Si variables est vide ou None, value est retournée inchangée. + """ + if not variables: + return value + if isinstance(value, str): + return _resolve_runtime_vars_in_str(value, variables) + if isinstance(value, dict): + return {k: _resolve_runtime_vars(v, variables) for k, v in value.items()} + if isinstance(value, list): + return [_resolve_runtime_vars(item, variables) for item in value] + return value + + +# ========================================================================= +# Handlers pour les actions exécutées côté serveur (extract_text, t2a_decision) +# ========================================================================= + +def _handle_extract_text_action( + action: Dict[str, Any], + replay_state: Dict[str, Any], + session_id: str, + last_heartbeat: Dict[str, Dict[str, Any]], +) -> bool: + """Traite une action extract_text côté serveur. Stocke le texte OCRisé dans + replay_state["variables"][output_var]. Retourne True si succès. + + Robuste aux échecs : si pas de heartbeat ou OCR raté, stocke "" et retourne + False (le pipeline continue, pas de blocage). + """ + params = action.get("parameters") or {} + output_var = (params.get("output_var") or "extracted_text").strip() + paragraph = bool(params.get("paragraph", True)) + + heartbeat = last_heartbeat.get(session_id) or {} + path = heartbeat.get("path") + text = "" + + if path: + try: + from core.llm import extract_text_from_image + text = extract_text_from_image(path, paragraph=paragraph) + except Exception as e: + logger.warning("extract_text OCR échoué (%s) — variable '%s' = ''", e, output_var) + else: + logger.warning( + "extract_text : pas de heartbeat pour session %s — variable '%s' = ''", + session_id, output_var, + ) + + replay_state.setdefault("variables", {})[output_var] = text + logger.info( + "extract_text → variable '%s' (%d chars) replay %s", + output_var, len(text), replay_state.get("replay_id", "?"), + ) + return bool(text) + + +def _handle_t2a_decision_action( + action: Dict[str, Any], + replay_state: Dict[str, Any], +) -> bool: + """Traite une action t2a_decision côté serveur. Stocke le résultat JSON + dans replay_state["variables"][output_var]. Retourne True si succès. + + Le DPI à analyser vient de action.parameters.input_template (déjà résolu + par _resolve_runtime_vars donc les {{var}} sont remplis). + """ + params = action.get("parameters") or {} + output_var = (params.get("output_var") or "t2a_result").strip() + dpi_text = (params.get("input_template") or params.get("dpi") or "").strip() + model = params.get("model") or None # None → DEFAULT_MODEL + + if not dpi_text: + logger.warning( + "t2a_decision : input vide — variable '%s' = {decision: 'INDETERMINE'}", output_var, + ) + replay_state.setdefault("variables", {})[output_var] = { + "decision": "INDETERMINE", + "justification": "DPI vide ou non extrait", + "confiance": "faible", + "_error": "empty_input", + } + return False + + try: + from core.llm import analyze_dpi, DEFAULT_MODEL + result = analyze_dpi(dpi_text, model=model or DEFAULT_MODEL) + except Exception as e: + logger.warning("t2a_decision : analyze_dpi exception %s", e) + result = { + "decision": "INDETERMINE", + "justification": f"Erreur analyse : {e}", + "confiance": "faible", + "_error": str(e), + } + + replay_state.setdefault("variables", {})[output_var] = result + decision = result.get("decision", "?") + elapsed = result.get("_elapsed_s", "?") + logger.info( + "t2a_decision → variable '%s' decision=%s (%ss) replay %s", + output_var, decision, elapsed, replay_state.get("replay_id", "?"), + ) + return "_error" not in result + + def _expand_compound_steps( steps: List[Dict[str, Any]], base: Dict[str, Any], params: Dict[str, Any] ) -> List[Dict[str, Any]]: @@ -1216,6 +1377,10 @@ def _create_replay_state( # Champs pour pause supervisée (target_not_found) "failed_action": None, # Contexte de l'action en echec (quand paused_need_help) "pause_message": None, # Message a afficher a l'utilisateur + # Variables d'exécution produites en cours de workflow (extract_text, + # t2a_decision, etc.). Résolues via templating {{var}} ou {{var.field}} + # dans les paramètres des actions suivantes. + "variables": {}, } diff --git a/core/llm/__init__.py b/core/llm/__init__.py new file mode 100644 index 000000000..865f898cf --- /dev/null +++ b/core/llm/__init__.py @@ -0,0 +1,15 @@ +"""Modules LLM (clients Ollama et décisionnels métier) + extracteur OCR.""" + +from .t2a_decision import ( + PROMPT_TEMPLATE, + DEFAULT_MODEL, + analyze_dpi, +) +from .ocr_extractor import extract_text_from_image + +__all__ = [ + "PROMPT_TEMPLATE", + "DEFAULT_MODEL", + "analyze_dpi", + "extract_text_from_image", +] diff --git a/core/llm/ocr_extractor.py b/core/llm/ocr_extractor.py new file mode 100644 index 000000000..f7d14f5c9 --- /dev/null +++ b/core/llm/ocr_extractor.py @@ -0,0 +1,71 @@ +"""Extracteur OCR — texte depuis une image (screenshot d'écran). + +Utilise EasyOCR fr+en. Singleton (chargement modèle ~3s au premier appel). + +Conçu pour le pipeline streaming serveur (action `extract_text`) : récupère +un screenshot fresh (dernier heartbeat ou capture forcée), applique l'OCR, +retourne le texte concaténé pour analyse downstream (ex: t2a_decision). +""" + +from __future__ import annotations + +import logging +from pathlib import Path +from typing import Optional, Tuple + +logger = logging.getLogger(__name__) + +_easyocr_reader = None + + +def _get_reader(): + """Initialise EasyOCR fr+en au premier appel (singleton).""" + global _easyocr_reader + if _easyocr_reader is None: + import easyocr + try: + _easyocr_reader = easyocr.Reader(['fr', 'en'], gpu=True, verbose=False) + logger.info("EasyOCR initialisé (fr+en, GPU)") + except Exception as e: + logger.warning("EasyOCR GPU indisponible (%s), fallback CPU", e) + _easyocr_reader = easyocr.Reader(['fr', 'en'], gpu=False, verbose=False) + return _easyocr_reader + + +def extract_text_from_image( + image_path: str, + region: Optional[Tuple[int, int, int, int]] = None, + paragraph: bool = True, +) -> str: + """Extrait le texte d'une image via EasyOCR. + + Args: + image_path: chemin du PNG sur disque. + region: (x, y, w, h) pour cropper avant OCR. None = image entière. + paragraph: True pour regrouper les lignes en paragraphes (lisible), + False pour blocs séparés (granulaire). + + Returns: + Texte concaténé. Chaque ligne / paragraphe est séparé par un saut de ligne. + En cas d'erreur, retourne une chaîne vide et log un warning. + """ + path = Path(image_path) + if not path.exists(): + logger.warning("extract_text: fichier introuvable %s", image_path) + return "" + + try: + from PIL import Image + import numpy as np + + img = Image.open(path) + if region: + x, y, w, h = region + img = img.crop((x, y, x + w, y + h)) + + reader = _get_reader() + results = reader.readtext(np.array(img), detail=0, paragraph=paragraph) + return "\n".join(str(r).strip() for r in results if r) + except Exception as e: + logger.warning("extract_text échoué sur %s : %s", image_path, e) + return "" diff --git a/core/llm/t2a_decision.py b/core/llm/t2a_decision.py new file mode 100644 index 000000000..9902f4b05 --- /dev/null +++ b/core/llm/t2a_decision.py @@ -0,0 +1,155 @@ +"""Aide à la décision de facturation urgences T2A/PMSI via LLM local. + +Décide si un passage aux urgences relève : +- du FORFAIT_URGENCE (passage simple, retour à domicile) +- de la REQUALIFICATION_HOSPITALISATION (séjour MCO, valorisation 1k-5k€+) + +Le prompt impose une extraction littérale des faits du DPI (pas d'invention) +et une modulation honnête de la confiance. Validé sur 15 DPI synthétiques : +qwen2.5:7b atteint 100 % d'accuracy en ~5 s/cas avec 4,7 Go VRAM. + +Voir docs/clients/ght_sud_95/ et demo/facturation_urgences/RESULTATS.md pour le +bench comparatif des 11 LLMs évalués. +""" + +from __future__ import annotations + +import json +import logging +import os +import time +import urllib.error +import urllib.request +from typing import Any, Dict + +logger = logging.getLogger(__name__) + +OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/generate") +DEFAULT_MODEL = os.environ.get("T2A_MODEL", "qwen2.5:7b") +DEFAULT_TIMEOUT = 60 # secondes + +PROMPT_TEMPLATE = """Tu es médecin DIM (Département d'Information Médicale), expert en facturation T2A/PMSI aux urgences hospitalières en France. + +Analyse le dossier patient ci-dessous pour déterminer si le passage relève : +- FORFAIT_URGENCE : passage simple, retour à domicile, sans surveillance prolongée ni soins continus +- REQUALIFICATION_HOSPITALISATION : séjour MCO requis selon les critères PMSI/ATIH + +INSTRUCTIONS STRICTES : +1. N'utilise QUE des éléments littéralement présents dans le dossier patient. N'invente AUCUN critère. +2. Identifie d'abord les éléments en faveur d'une hospitalisation, puis ceux en faveur d'un forfait, puis tranche. +3. Calcule la durée totale du passage en heures (admission → sortie/transfert) à partir des horaires du dossier. +4. Module ta confiance honnêtement : + - "elevee" uniquement si tous les indices convergent + - "moyenne" si éléments ambivalents + - "faible" si information manquante ou très atypique + +Réponds STRICTEMENT en JSON valide, sans texte avant ni après : +{{ + "duree_passage_heures": , + "elements_pour_hospitalisation": [], + "elements_pour_forfait": [], + "decision": "FORFAIT_URGENCE" | "REQUALIFICATION_HOSPITALISATION", + "justification": "<2-3 phrases s'appuyant explicitement sur les faits ci-dessus>", + "confiance": "elevee" | "moyenne" | "faible" +}} + +DOSSIER PATIENT : +{dpi} +""" + + +def analyze_dpi( + dpi_text: str, + model: str = DEFAULT_MODEL, + timeout: int = DEFAULT_TIMEOUT, + ollama_url: str = OLLAMA_URL, +) -> Dict[str, Any]: + """Soumet un DPI urgences à un LLM Ollama et retourne la décision JSON. + + Args: + dpi_text: Texte du dossier patient (concaténation des onglets ou DPI brut). + model: Modèle Ollama à utiliser (default qwen2.5:7b — 100% accuracy bench). + timeout: Timeout HTTP en secondes. + ollama_url: Endpoint Ollama (default localhost:11434/api/generate). + + Returns: + Dict avec : + decision: "FORFAIT_URGENCE" | "REQUALIFICATION_HOSPITALISATION" + elements_pour_hospitalisation: List[str] + elements_pour_forfait: List[str] + duree_passage_heures: float + justification: str + confiance: "elevee" | "moyenne" | "faible" + _elapsed_s: float (latence) + _model: str + En cas d'erreur : + {"_error": str, "_elapsed_s": float} (réseau / Ollama indisponible) + {"_parse_error": True, "_raw": str, "_elapsed_s": float} (JSON invalide) + """ + payload = { + "model": model, + "prompt": PROMPT_TEMPLATE.format(dpi=dpi_text), + "stream": False, + "format": "json", + "keep_alive": "5m", + "options": { + "temperature": 0.1, + "num_predict": 4000, + "num_ctx": 4096, + "reasoning_effort": "minimal", + }, + } + data = json.dumps(payload).encode("utf-8") + req = urllib.request.Request( + ollama_url, + data=data, + headers={"Content-Type": "application/json"}, + method="POST", + ) + t0 = time.time() + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + body = json.loads(resp.read().decode("utf-8")) + except (urllib.error.URLError, TimeoutError, ConnectionError) as e: + elapsed = round(time.time() - t0, 1) + logger.warning("analyze_dpi: Ollama indisponible (%s) après %.1fs", e, elapsed) + return {"_error": str(e), "_elapsed_s": elapsed, "_model": model} + + elapsed = time.time() - t0 + + raw_response = body.get("response", "").strip() + raw_thinking = body.get("thinking", "").strip() + + candidates = [raw_response] + if not raw_response and raw_thinking: + last_close = raw_thinking.rfind("}") + last_open = raw_thinking.rfind("{", 0, last_close) + if last_open != -1 and last_close != -1: + candidates.append(raw_thinking[last_open:last_close + 1]) + + parsed = None + for cand in candidates: + cleaned = cand + if cleaned.startswith("```"): + cleaned = cleaned.split("\n", 1)[-1] + if cleaned.endswith("```"): + cleaned = cleaned.rsplit("```", 1)[0] + cleaned = cleaned.strip() + try: + parsed = json.loads(cleaned) + break + except json.JSONDecodeError: + continue + + if parsed is None: + return { + "_parse_error": True, + "_raw": (raw_response or raw_thinking)[:500], + "_elapsed_s": round(elapsed, 1), + "_model": model, + } + + parsed["_elapsed_s"] = round(elapsed, 1) + parsed["_model"] = model + parsed["_eval_count"] = body.get("eval_count") + return parsed diff --git a/tests/integration/test_t2a_extract.py b/tests/integration/test_t2a_extract.py new file mode 100644 index 000000000..07354c56e --- /dev/null +++ b/tests/integration/test_t2a_extract.py @@ -0,0 +1,282 @@ +"""Tests des actions extract_text et t2a_decision (C+.5/.6). + +Couvre : +- _resolve_runtime_vars : templating {{var}} / {{var.field}} +- _handle_extract_text_action : OCR mocké, stockage variable +- _handle_t2a_decision_action : analyze_dpi mocké, stockage JSON +- _edge_to_normalized_actions pour les 2 types +- Bridge VWB → core (mapping + paramètres) +""" + +from unittest.mock import patch + +import pytest + +from agent_v0.server_v1.replay_engine import ( + _ALLOWED_ACTION_TYPES, + _SERVER_SIDE_ACTION_TYPES, + _resolve_runtime_vars, + _handle_extract_text_action, + _handle_t2a_decision_action, + _edge_to_normalized_actions, + _create_replay_state, +) +from visual_workflow_builder.backend.services.learned_workflow_bridge import ( + VWB_ACTION_TO_CORE, + convert_vwb_to_core_workflow, + _vwb_params_to_core, +) + + +# ---------------------------------------------------------------------- +# Templating runtime +# ---------------------------------------------------------------------- + +def test_resolve_simple_var(): + r = _resolve_runtime_vars("Patient {{ipp}}", {"ipp": "25003284"}) + assert r == "Patient 25003284" + + +def test_resolve_field_access(): + r = _resolve_runtime_vars( + "{{result.decision}} car {{result.justification}}", + {"result": {"decision": "UHCD", "justification": "asthme + insuf coro"}}, + ) + assert "UHCD car asthme + insuf coro" == r + + +def test_resolve_missing_var_kept_intact(): + r = _resolve_runtime_vars("Hello {{absent}} world", {"x": "y"}) + assert r == "Hello {{absent}} world" + + +def test_resolve_missing_field_kept_intact(): + r = _resolve_runtime_vars("{{var.absent}}", {"var": {"present": "x"}}) + assert r == "{{var.absent}}" + + +def test_resolve_in_dict_recursive(): + r = _resolve_runtime_vars( + {"msg": "IPP {{ipp}}", "nested": {"k": "{{ipp}}"}, "list": ["{{age}}"]}, + {"ipp": "X", "age": 77}, + ) + assert r == {"msg": "IPP X", "nested": {"k": "X"}, "list": ["77"]} + + +def test_resolve_empty_vars_noop(): + val = {"k": "{{var}}"} + assert _resolve_runtime_vars(val, {}) == val + assert _resolve_runtime_vars(val, None) == val + + +def test_resolve_non_string_passthrough(): + assert _resolve_runtime_vars(42, {"x": "y"}) == 42 + assert _resolve_runtime_vars(None, {"x": "y"}) is None + + +def test_resolve_handles_whitespace_in_braces(): + r = _resolve_runtime_vars("{{ ipp }}", {"ipp": "X"}) + assert r == "X" + + +# ---------------------------------------------------------------------- +# Action types & types serveur +# ---------------------------------------------------------------------- + +def test_extract_text_in_allowed(): + assert "extract_text" in _ALLOWED_ACTION_TYPES + + +def test_t2a_decision_in_allowed(): + assert "t2a_decision" in _ALLOWED_ACTION_TYPES + + +def test_server_side_types(): + assert _SERVER_SIDE_ACTION_TYPES == {"extract_text", "t2a_decision"} + + +# ---------------------------------------------------------------------- +# Handler extract_text +# ---------------------------------------------------------------------- + +def test_handle_extract_text_stores_variable(): + state = _create_replay_state("rep1", "wf", "sess", 3) + last_hb = {"sess": {"path": "/fake/heartbeat.png", "timestamp": 0}} + action = { + "type": "extract_text", + "parameters": {"output_var": "texte_motif", "paragraph": True}, + } + with patch( + "core.llm.extract_text_from_image", + return_value="Patient asthme peakflow 260", + ): + ok = _handle_extract_text_action(action, state, "sess", last_hb) + assert ok is True + assert state["variables"]["texte_motif"] == "Patient asthme peakflow 260" + + +def test_handle_extract_text_no_heartbeat_stores_empty(): + state = _create_replay_state("rep1", "wf", "sess", 3) + last_hb = {} # pas de heartbeat + action = {"type": "extract_text", "parameters": {"output_var": "v"}} + ok = _handle_extract_text_action(action, state, "sess", last_hb) + assert ok is False + assert state["variables"]["v"] == "" + + +def test_handle_extract_text_default_var_name(): + state = _create_replay_state("rep1", "wf", "sess", 3) + last_hb = {"sess": {"path": "/x.png", "timestamp": 0}} + action = {"type": "extract_text", "parameters": {}} + with patch("core.llm.extract_text_from_image", return_value="abc"): + _handle_extract_text_action(action, state, "sess", last_hb) + assert "extracted_text" in state["variables"] + + +# ---------------------------------------------------------------------- +# Handler t2a_decision +# ---------------------------------------------------------------------- + +def test_handle_t2a_decision_stores_json(): + state = _create_replay_state("rep1", "wf", "sess", 3) + action = { + "type": "t2a_decision", + "parameters": { + "input_template": "Patient 78 ans, asthme, peakflow 260", + "output_var": "decision_t2a", + "model": "qwen2.5:7b", + }, + } + fake_result = { + "decision": "REQUALIFICATION_HOSPITALISATION", + "justification": "Surveillance continue requise", + "confiance": "elevee", + "_elapsed_s": 4.2, + } + with patch("core.llm.analyze_dpi", return_value=fake_result): + ok = _handle_t2a_decision_action(action, state) + assert ok is True + assert state["variables"]["decision_t2a"]["decision"] == "REQUALIFICATION_HOSPITALISATION" + + +def test_handle_t2a_decision_empty_input_returns_indetermine(): + state = _create_replay_state("rep1", "wf", "sess", 3) + action = {"type": "t2a_decision", "parameters": {"input_template": "", "output_var": "r"}} + ok = _handle_t2a_decision_action(action, state) + assert ok is False + assert state["variables"]["r"]["decision"] == "INDETERMINE" + + +def test_handle_t2a_decision_analyze_exception(): + state = _create_replay_state("rep1", "wf", "sess", 3) + action = {"type": "t2a_decision", "parameters": {"input_template": "x", "output_var": "r"}} + with patch("core.llm.analyze_dpi", side_effect=RuntimeError("ollama down")): + ok = _handle_t2a_decision_action(action, state) + assert ok is False + assert state["variables"]["r"]["decision"] == "INDETERMINE" + assert "ollama down" in state["variables"]["r"]["_error"] + + +# ---------------------------------------------------------------------- +# Edge → action normalisée +# ---------------------------------------------------------------------- + +class _FakeAction: + def __init__(self, type_, parameters=None): + self.type = type_ + self.target = None + self.parameters = parameters or {} + + +class _FakeEdge: + def __init__(self, action, edge_id="e1", from_node="n1", to_node="n2"): + self.edge_id = edge_id + self.from_node = from_node + self.to_node = to_node + self.action = action + + +def test_edge_to_action_extract_text(): + edge = _FakeEdge(_FakeAction( + "extract_text", + parameters={"output_var": "texte_examens", "paragraph": True}, + )) + actions = _edge_to_normalized_actions(edge, params={}) + assert len(actions) == 1 + a = actions[0] + assert a["type"] == "extract_text" + assert a["parameters"]["output_var"] == "texte_examens" + assert a["parameters"]["paragraph"] is True + + +def test_edge_to_action_t2a_decision(): + edge = _FakeEdge(_FakeAction( + "t2a_decision", + parameters={ + "input_template": "{{texte_motif}}", + "output_var": "result", + "model": "qwen2.5:7b", + }, + )) + actions = _edge_to_normalized_actions(edge, params={}) + a = actions[0] + assert a["type"] == "t2a_decision" + assert a["parameters"]["input_template"] == "{{texte_motif}}" + assert a["parameters"]["output_var"] == "result" + assert a["parameters"]["model"] == "qwen2.5:7b" + + +# ---------------------------------------------------------------------- +# Bridge VWB → core +# ---------------------------------------------------------------------- + +def test_vwb_extract_text_passthrough(): + assert VWB_ACTION_TO_CORE["extract_text"] == "extract_text" + + +def test_vwb_t2a_decision_passthrough(): + assert VWB_ACTION_TO_CORE["t2a_decision"] == "t2a_decision" + + +def test_vwb_params_extract_text_preserves_output_var(): + p = _vwb_params_to_core("extract_text", {"output_var": "v", "paragraph": False}) + assert p == {"output_var": "v", "paragraph": False} + + +def test_vwb_params_extract_text_legacy_variable_name(): + """Compat avec l'ancien paramètre variable_name côté VWB.""" + p = _vwb_params_to_core("extract_text", {"variable_name": "v_legacy"}) + assert p["output_var"] == "v_legacy" + + +def test_vwb_params_t2a_decision_preserves_all(): + p = _vwb_params_to_core("t2a_decision", { + "input_template": "DPI {{ipp}}", + "output_var": "dec", + "model": "qwen2.5:7b", + }) + assert p == {"input_template": "DPI {{ipp}}", "output_var": "dec", "model": "qwen2.5:7b"} + + +def test_export_workflow_with_t2a_chain(): + """Workflow VWB extract_text → t2a_decision → pause_for_human export propre.""" + workflow_data = {"id": "wf_t2a", "name": "Demo T2A"} + steps_data = [ + {"id": "s1", "action_type": "click_anchor", "parameters": {"target_text": "25003284"}, "label": "Clic IPP"}, + {"id": "s2", "action_type": "extract_text", "parameters": {"output_var": "dpi"}, "label": "OCR"}, + {"id": "s3", "action_type": "t2a_decision", "parameters": { + "input_template": "{{dpi}}", "output_var": "dec", "model": "qwen2.5:7b", + }, "label": "Analyse"}, + {"id": "s4", "action_type": "pause_for_human", "parameters": { + "message": "Décision : {{dec.decision}} — {{dec.justification}}", + }, "label": "Validation"}, + {"id": "s5", "action_type": "click_anchor", "parameters": {"target_text": "Enregistrer"}, "label": "Clic Enregistrer"}, + ] + core = convert_vwb_to_core_workflow(workflow_data, steps_data) + edge_types = [e["action"]["type"] for e in core["edges"]] + assert "extract_text" in edge_types + assert "t2a_decision" in edge_types + assert "pause_for_human" in edge_types + # Vérifier que le templating est bien transporté + t2a_edge = next(e for e in core["edges"] if e["action"]["type"] == "t2a_decision") + assert t2a_edge["action"]["parameters"]["input_template"] == "{{dpi}}" diff --git a/visual_workflow_builder/backend/services/learned_workflow_bridge.py b/visual_workflow_builder/backend/services/learned_workflow_bridge.py index 884f3df96..d6dc81914 100644 --- a/visual_workflow_builder/backend/services/learned_workflow_bridge.py +++ b/visual_workflow_builder/backend/services/learned_workflow_bridge.py @@ -57,8 +57,9 @@ VWB_ACTION_TO_CORE = { "scroll_to_anchor": "scroll", "visual_condition": "evaluate_condition", "screenshot_evidence": "screenshot", - "extract_text": "extract_data", - "pause_for_human": "pause_for_human", # passthrough — intercepté par api_stream /replay/next + "extract_text": "extract_text", # passthrough — handler serveur OCR + variable + "pause_for_human": "pause_for_human", # passthrough — intercepté par api_stream /replay/next + "t2a_decision": "t2a_decision", # passthrough — handler serveur LLM T2A + variable } @@ -664,6 +665,20 @@ def _vwb_params_to_core(action_type: str, params: Dict[str, Any]) -> Dict[str, A elif action_type == "pause_for_human": core_params["message"] = params.get("message", "Validation requise") + elif action_type == "extract_text": + # variable_name côté VWB → output_var côté core (compat avec + # le catalogue VWB existant qui utilise variable_name) + var = params.get("output_var") or params.get("variable_name") or "extracted_text" + core_params["output_var"] = var + if "paragraph" in params: + core_params["paragraph"] = bool(params["paragraph"]) + + elif action_type == "t2a_decision": + core_params["input_template"] = params.get("input_template", "") + core_params["output_var"] = params.get("output_var", "t2a_result") + if params.get("model"): + core_params["model"] = params["model"] + return core_params diff --git a/visual_workflow_builder/frontend_v4/src/types.ts b/visual_workflow_builder/frontend_v4/src/types.ts index 32f753794..9d34e8cab 100644 --- a/visual_workflow_builder/frontend_v4/src/types.ts +++ b/visual_workflow_builder/frontend_v4/src/types.ts @@ -44,6 +44,7 @@ export type ActionType = | 'visual_condition' | 'loop_visual' | 'pause_for_human' + | 't2a_decision' | 'download_to_folder' | 'ai_analyze_text' | 'ai_ocr' @@ -109,8 +110,9 @@ export const ACTIONS: ActionDefinition[] = [ ] }, // === EXTRACTION DE DONNÉES === - { type: 'extract_text', label: 'Extraire texte', icon: '📋', description: 'Extrait le texte visible dans la zone de l\'ancre via OCR.', category: 'data', needsAnchor: true, params: [ - { name: 'variable_name', type: 'string', description: 'Nom de la variable pour stocker le résultat' } + { type: 'extract_text', label: 'Extraire texte (OCR écran)', icon: '📋', description: 'OCR EasyOCR fr+en sur le dernier screenshot. Stocke le texte dans une variable réutilisable plus loin via {{output_var}}. Pas d\'ancre nécessaire — extrait toute la page visible.', category: 'data', needsAnchor: false, params: [ + { name: 'output_var', type: 'string', description: 'Nom de la variable de sortie (ex: texte_motif). Réutilisable via {{nom}}.' }, + { name: 'paragraph', type: 'boolean', description: 'Regrouper en paragraphes (true) ou lignes brutes (false)' } ] }, { type: 'extract_table', label: 'Extraire tableau', icon: '📊', description: 'Extrait un tableau structuré depuis la zone de l\'ancre.', category: 'data', needsAnchor: true, params: [ { name: 'variable_name', type: 'string', description: 'Nom de la variable pour stocker le tableau' } @@ -133,6 +135,11 @@ export const ACTIONS: ActionDefinition[] = [ { type: 'pause_for_human', label: 'Pause supervisée', icon: '⏸', description: 'Léa s\'arrête et demande validation humaine via une bulle interactive (boutons Continuer / Annuler).', category: 'logic', needsAnchor: false, params: [ { name: 'message', type: 'string', description: 'Message affiché dans la bulle (ex: "Je ne suis pas sûre du critère 3, validez-vous UHCD ?")' } ] }, + { type: 't2a_decision', label: 'Décision T2A (LLM)', icon: '🧠', description: 'Analyse un DPI urgences via LLM local (qwen2.5:7b par défaut) et propose FORFAIT_URGENCE ou REQUALIFICATION_HOSPITALISATION. Retourne JSON {decision, justification, elements_pour/contre, confiance}. Bench validé 100% accuracy.', category: 'logic', needsAnchor: false, params: [ + { name: 'input_template', type: 'string', description: 'DPI à analyser. Supporte le templating {{var}} pour concaténer plusieurs extractions (ex: "{{texte_motif}}\\n{{texte_examens}}\\n{{texte_notes}}")' }, + { name: 'output_var', type: 'string', description: 'Variable de sortie (ex: decision_t2a). Accès aux champs : {{decision_t2a.decision}}, {{decision_t2a.justification}}' }, + { name: 'model', type: 'string', description: 'Modèle Ollama (default qwen2.5:7b). Autres : t2a-gemma3-27b-q4, gpt-oss:120b-cloud...' } + ] }, // === INTELLIGENCE ARTIFICIELLE === { type: 'ai_ocr', label: 'OCR Intelligent', icon: '📝', description: 'Reconnaissance de texte par IA sur la zone de l\'ancre.', category: 'ai', needsAnchor: true, params: [