feat(t2a): build_dpi_enriched - extraction déterministe horaires + classifications cliniques

Préprocesseur Python qui injecte un bloc FAITS_CALCULÉS en tête du DPI avant l'appel LLM, pour neutraliser l'hallucination de durée (bug "23h" sur cas MOREL, confusion avec "depuis 23h" de l'Observ. IDE Urg). Extrait depuis le bandeau Easily Assure et la Synthèse Urgences : - âge (dateutil.relativedelta) - date admission / sortie + durée passage (format humain + décimal) - CCMU / GEMSA libellé complet (parser multi-ligne) - priorité IAO, mode de venue / médicalisation / mode d'entrée - diagnostic principal - decision_terrain + orientation_terrain (metadata only, jamais injectés dans le prompt pour ne pas biaiser le LLM) Retour tuple (dpi_enriched, metadata) pour permettre les garde-fous serveur Python ↔ LLM au commit 2. Robustesse : - re.search 1re occurrence + WARNING si bandeau divergent multi-occurrences - Synthèse Urgences priorité sur bandeau pour dates - Valeur exigée sur même ligne que label (évite capture de section title) - Cas négatif (horaires absents) → "NON CALCULABLE" + parsing_warnings - Jamais de crash, retour tuple toujours valide Tests : 4/4 verts (golden MOREL string + metadata, négatif sortie absente, DPI vide). Pas de régression sur tests/integration/test_t2a_extract.py. Brief complet : docs/handoffs/2026-05-12_brief_S1_build_dpi_enriched.md Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-12 18:49:49 +02:00
parent 2eeaa806bb
commit 9872f4510c
4 changed files with 937 additions and 9 deletions
--- a/agent_v0/server_v1/replay_engine.py
+++ b/agent_v0/server_v1/replay_engine.py
@@ -15,6 +15,7 @@ Extrait de api_stream.py pour clarifier l'architecture.

 import json
 import logging
+import os
 import re
 import threading
 import time
@@ -35,13 +36,27 @@ _ALLOWED_ACTION_TYPES = {
    "verify_screen",     # Replay hybride : vérification visuelle entre groupes
    "pause_for_human",   # Pause supervisée explicite (interceptée par /replay/next)
    "extract_text",      # OCR serveur sur dernier heartbeat → variable workflow
+    "extract_table",     # OCR serveur + filtre regex → liste structurée (boucle)
+    "extract_text_scroll", # Marker côté graphe — expansé en sous-actions par _edge_to_normalized_actions
+    "_concat_text_vars",   # Action serveur interne (générée par expansion extract_text_scroll)
    "t2a_decision",      # Analyse LLM facturation T2A → variable workflow
+    "llm_generate",      # Génération texte libre côté serveur → variable workflow
 }

 # Types d'actions exécutées CÔTÉ SERVEUR (jamais transmises à l'Agent V1).
 # Le pipeline /replay/next les traite en boucle interne et passe à l'action
 # suivante jusqu'à trouver une action visuelle (à transmettre au client).
-_SERVER_SIDE_ACTION_TYPES = {"extract_text", "t2a_decision"}
+_SERVER_SIDE_ACTION_TYPES = {
+    "extract_text",
+    "extract_table",
+    "t2a_decision",
+    "llm_generate",
+    "_concat_text_vars",
+}
+
+# Pause par défaut entre Ctrl+End/Home et la capture suivante (ms).
+# Configurable par step via parameters.scroll_pause_ms ; default ici.
+SCROLL_PAUSE_MS = 500
 _MAX_ACTION_TEXT_LENGTH = 10000
 _MAX_KEYS_PER_COMBO = 10
 # Touches autorisées dans les key_combo (modificateurs + touches spéciales + caractères simples)
@@ -875,6 +890,42 @@ def _edge_to_normalized_actions(edge, params: Dict[str, Any]) -> List[Dict[str,
        }
        return [normalized]

+    elif action_type == "extract_table":
+        normalized["type"] = "extract_table"
+        normalized["parameters"] = {
+            "output_var": action_params.get("output_var", "table_rows"),
+            "pattern": action_params.get("pattern"),
+            "limit": action_params.get("limit"),
+            "region": action_params.get("region"),
+        }
+        return [normalized]
+
+    elif action_type == "extract_text_scroll":
+        # Expansion en séquence : OCR(top) → Ctrl+End → wait → OCR(bottom)
+        # → concat(top, bottom → final) → Ctrl+Home.
+        # variable_name (préféré) ou output_var (compat extract_text).
+        final_var = (
+            action_params.get("variable_name")
+            or action_params.get("output_var")
+            or "extracted_text"
+        )
+        paragraph = bool(action_params.get("paragraph", True))
+        # Pause après scroll Ctrl+End — configurable au step.
+        # Default 500ms (Wikipedia) ; cible 1500-2000ms pour DPI Citrix lent.
+        try:
+            scroll_pause = int(action_params.get("scroll_pause_ms", SCROLL_PAUSE_MS))
+        except (TypeError, ValueError):
+            scroll_pause = SCROLL_PAUSE_MS
+        # Variables internes nommées par préfixe : invisibles à l'utilisateur.
+        # Préfixe `_` pour signaler "interne" et éviter collision.
+        top_var = f"__{final_var}_top"
+        bottom_var = f"__{final_var}_bottom"
+
+        return _expand_extract_text_scroll(
+            base, final_var, top_var, bottom_var, paragraph,
+            scroll_pause_ms=scroll_pause,
+        )
+
    elif action_type == "t2a_decision":
        normalized["type"] = "t2a_decision"
        normalized["parameters"] = {
@@ -884,6 +935,22 @@ def _edge_to_normalized_actions(edge, params: Dict[str, Any]) -> List[Dict[str,
        }
        return [normalized]

+    elif action_type == "llm_generate":
+        normalized["type"] = "llm_generate"
+        normalized["parameters"] = {
+            "prompt": action_params.get("prompt", ""),
+            "context": action_params.get("context", ""),
+            "output_var": (
+                action_params.get("output_var")
+                or action_params.get("variable_name")
+                or "generated_text"
+            ),
+            "model": action_params.get("model"),
+        }
+        if action_params.get("temperature") is not None:
+            normalized["parameters"]["temperature"] = action_params.get("temperature")
+        return [normalized]
+
    else:
        logger.warning(f"Type d'action inconnu : {action_type}")
        return []
@@ -966,6 +1033,18 @@ def _resolve_runtime_vars(value: Any, variables: Dict[str, Any]) -> Any:
 # Handlers pour les actions exécutées côté serveur (extract_text, t2a_decision)
 # =========================================================================

+def _normalize_ollama_endpoint(raw_url: str) -> str:
+    """Normalise une URL Ollama pour les clients qui attendent l'endpoint racine.
+
+    `OLLAMA_URL` est parfois configuré vers `/api/generate` alors que
+    `LLMActionHandler` attend la racine `http://host:port`.
+    """
+    endpoint = (raw_url or "http://localhost:11434").strip().rstrip("/")
+    for suffix in ("/api/generate", "/api/chat"):
+        if endpoint.endswith(suffix):
+            return endpoint[: -len(suffix)]
+    return endpoint
+
 def _handle_extract_text_action(
    action: Dict[str, Any],
    replay_state: Dict[str, Any],
@@ -979,11 +1058,40 @@ def _handle_extract_text_action(
    False (le pipeline continue, pas de blocage).
    """
    params = action.get("parameters") or {}
-    output_var = (params.get("output_var") or "extracted_text").strip()
+    # Compatibilité VWB : "variable_name" (VWB) et "output_var" (agent libre)
+    output_var = (params.get("output_var") or params.get("variable_name") or "extracted_text").strip()
    paragraph = bool(params.get("paragraph", True))

-    heartbeat = last_heartbeat.get(session_id) or {}
-    path = heartbeat.get("path")
+    # Source prioritaire : screenshot envoyé par l'agent après la dernière action.
+    # Si c'est du base64, on le sauvegarde dans un fichier temp pour l'OCR.
+    # Fallback : heartbeat de fond (vrai chemin serveur, via "bg_{machine_id}").
+    path = None
+    raw_screenshot = replay_state.get("last_screenshot") or ""
+    if raw_screenshot:
+        if raw_screenshot.startswith("data:"):
+            # base64 → fichier temp
+            try:
+                import base64, tempfile
+                header, b64data = raw_screenshot.split(",", 1)
+                suffix = ".jpg" if "jpeg" in header else ".png"
+                tmp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
+                tmp.write(base64.b64decode(b64data))
+                tmp.close()
+                path = tmp.name
+            except Exception as e:
+                logger.warning("extract_text: décodage base64 screenshot échoué: %s", e)
+        elif os.path.isfile(raw_screenshot):
+            path = raw_screenshot
+
+    if not path:
+        machine_id = replay_state.get("machine_id", "")
+        bg_session = f"bg_{machine_id}" if machine_id and machine_id != "default" else None
+        heartbeat = (
+            last_heartbeat.get(session_id)
+            or (last_heartbeat.get(bg_session) if bg_session else None)
+            or {}
+        )
+        path = heartbeat.get("path")
    text = ""

    if path:
@@ -1006,6 +1114,93 @@ def _handle_extract_text_action(
    return bool(text)


+def _handle_extract_table_action(
+    action: Dict[str, Any],
+    replay_state: Dict[str, Any],
+    session_id: str,
+    last_heartbeat: Dict[str, Dict[str, Any]],
+) -> bool:
+    """Traite une action extract_table côté serveur. OCR + filtre regex pour
+    retourner une liste structurée (ex : IPP d'un tableau de patients) qui
+    pourra être bouclée par le templating ${patients[i]}.
+
+    Paramètres reconnus :
+        output_var : nom de variable runtime (default "table_rows")
+        pattern    : regex à matcher sur chaque token OCR (ex : r"^25\\d{6}$")
+        limit      : nb max d'entrées à retourner
+        region     : (x, y, w, h) en pixels pour cropper avant OCR
+                     (None = image entière)
+
+    Robuste aux échecs : si pas de heartbeat ou OCR raté, stocke [] et
+    retourne False — le pipeline continue.
+    """
+    params = action.get("parameters") or {}
+    output_var = (params.get("output_var") or params.get("variable_name") or "table_rows").strip()
+    pattern = params.get("pattern") or None
+    limit = params.get("limit")
+    region = params.get("region") or None
+    if isinstance(limit, str):
+        try:
+            limit = int(limit)
+        except ValueError:
+            limit = None
+
+    # Source : screenshot du heartbeat (idem extract_text)
+    path = None
+    raw_screenshot = replay_state.get("last_screenshot") or ""
+    if raw_screenshot:
+        if raw_screenshot.startswith("data:"):
+            try:
+                import base64, tempfile
+                header, b64data = raw_screenshot.split(",", 1)
+                suffix = ".jpg" if "jpeg" in header else ".png"
+                tmp = tempfile.NamedTemporaryFile(suffix=suffix, delete=False)
+                tmp.write(base64.b64decode(b64data))
+                tmp.close()
+                path = tmp.name
+            except Exception as e:
+                logger.warning("extract_table: décodage base64 screenshot échoué: %s", e)
+        elif os.path.isfile(raw_screenshot):
+            path = raw_screenshot
+
+    if not path:
+        machine_id = replay_state.get("machine_id", "")
+        bg_session = f"bg_{machine_id}" if machine_id and machine_id != "default" else None
+        heartbeat = (
+            last_heartbeat.get(session_id)
+            or (last_heartbeat.get(bg_session) if bg_session else None)
+            or {}
+        )
+        path = heartbeat.get("path")
+
+    rows: list = []
+    if path:
+        try:
+            from core.llm import extract_table_from_image
+            rows = extract_table_from_image(
+                path,
+                region=tuple(region) if region else None,
+                pattern=pattern,
+                limit=limit,
+            )
+        except Exception as e:
+            logger.warning(
+                "extract_table OCR échoué (%s) — variable '%s' = []", e, output_var,
+            )
+    else:
+        logger.warning(
+            "extract_table : pas de heartbeat pour session %s — variable '%s' = []",
+            session_id, output_var,
+        )
+
+    replay_state.setdefault("variables", {})[output_var] = rows
+    logger.info(
+        "extract_table → variable '%s' (%d entrées, pattern=%r, limit=%s) replay %s",
+        output_var, len(rows), pattern, limit, replay_state.get("replay_id", "?"),
+    )
+    return bool(rows)
+
+
 def _handle_t2a_decision_action(
    action: Dict[str, Any],
    replay_state: Dict[str, Any],
@@ -1034,8 +1229,19 @@ def _handle_t2a_decision_action(
        return False

    try:
-        from core.llm import analyze_dpi, DEFAULT_MODEL
-        result = analyze_dpi(dpi_text, model=model or DEFAULT_MODEL)
+        from core.llm import analyze_dpi, DEFAULT_MODEL, build_dpi_enriched
+        # Enrichissement déterministe avant LLM : injection FAITS_CALCULÉS en
+        # tête (durée, âge, CCMU/GEMSA/priorité…) pour neutraliser les
+        # hallucinations de durée (cf. bug "23h" MOREL). La metadata est
+        # capturée pour les garde-fous serveur (commit 2 — Python ↔ LLM).
+        dpi_enriched, metadata = build_dpi_enriched(dpi_text)
+        logger.info(
+            "[build_dpi_enriched] duree_python=%sh decision_terrain=%r warnings=%d",
+            metadata.get("duree_heures_decimales"),
+            metadata.get("decision_terrain"),
+            len(metadata.get("parsing_warnings", [])),
+        )
+        result = analyze_dpi(dpi_enriched, model=model or DEFAULT_MODEL)
    except Exception as e:
        logger.warning("t2a_decision : analyze_dpi exception %s", e)
        result = {
@@ -1045,6 +1251,18 @@ def _handle_t2a_decision_action(
            "_error": str(e),
        }

+    # Si parse_error, injecter des valeurs de fallback pour que les templates restent lisibles
+    if result.get("_parse_error"):
+        raw_preview = result.get("_raw", "")[:200]
+        logger.warning("t2a_decision parse_error — raw: %s", raw_preview)
+        result.setdefault("decision", "INDETERMINE")
+        result.setdefault("decision_court", "À vérifier")
+        result.setdefault("preuve_critere1", "Analyse non disponible (erreur LLM)")
+        result.setdefault("preuve_critere2", "Analyse non disponible (erreur LLM)")
+        result.setdefault("preuve_critere3", "Analyse non disponible (erreur LLM)")
+        result.setdefault("justification", f"Réponse LLM non parsable : {raw_preview}")
+        result.setdefault("confiance", "faible")
+
    replay_state.setdefault("variables", {})[output_var] = result
    decision = result.get("decision", "?")
    elapsed = result.get("_elapsed_s", "?")
@@ -1052,7 +1270,176 @@ def _handle_t2a_decision_action(
        "t2a_decision → variable '%s' decision=%s (%ss) replay %s",
        output_var, decision, elapsed, replay_state.get("replay_id", "?"),
    )
-    return "_error" not in result
+    return "_error" not in result and not result.get("_parse_error")
+
+
+def _handle_llm_generate_action(
+    action: Dict[str, Any],
+    replay_state: Dict[str, Any],
+) -> bool:
+    """Traite une action llm_generate côté serveur.
+
+    Stocke le texte généré dans replay_state["variables"][output_var].
+    Les paramètres `prompt` et `context` sont déjà résolus via le templating
+    runtime avant d'arriver ici.
+    """
+    params = action.get("parameters") or {}
+    output_var = (
+        params.get("output_var")
+        or params.get("variable_name")
+        or "generated_text"
+    ).strip()
+    prompt = str(params.get("prompt") or "").strip()
+    context = str(params.get("context") or "")
+    model = params.get("model") or None
+
+    temperature = None
+    if params.get("temperature") is not None:
+        try:
+            temperature = float(params.get("temperature"))
+        except (TypeError, ValueError):
+            logger.warning(
+                "llm_generate : temperature invalide %r — fallback valeur par défaut",
+                params.get("temperature"),
+            )
+
+    if not prompt:
+        logger.warning(
+            "llm_generate : prompt vide — variable '%s' = ''",
+            output_var,
+        )
+        replay_state.setdefault("variables", {})[output_var] = ""
+        return False
+
+    generated = ""
+    try:
+        from core.execution.llm_actions import LLMActionHandler
+
+        handler = LLMActionHandler(
+            ollama_endpoint=_normalize_ollama_endpoint(
+                os.environ.get("OLLAMA_URL", "http://localhost:11434")
+            ),
+            timeout=180,
+        )
+        generated = handler.generate_text(
+            prompt=prompt,
+            context=context,
+            model=model,
+            temperature=temperature,
+        ).strip()
+    except Exception as e:
+        logger.warning("llm_generate : génération échouée (%s) — variable '%s' = ''", e, output_var)
+
+    replay_state.setdefault("variables", {})[output_var] = generated
+    logger.info(
+        "llm_generate → variable '%s' (%d chars, model=%s) replay %s",
+        output_var,
+        len(generated),
+        model or "default",
+        replay_state.get("replay_id", "?"),
+    )
+    return bool(generated)
+
+
+def _handle_concat_text_vars_action(
+    action: Dict[str, Any],
+    replay_state: Dict[str, Any],
+) -> bool:
+    """Traite une action serveur interne `_concat_text_vars`.
+
+    Concatène deux variables runtime existantes (top_var + separator + bottom_var)
+    et écrit le résultat dans output_var. Variables manquantes traitées comme "".
+
+    Action générée par l'expansion de `extract_text_scroll` ; pas exposée à
+    l'utilisateur final. Robuste aux échecs OCR amont (l'une ou l'autre var vide).
+    """
+    params = action.get("parameters") or {}
+    top_var = (params.get("top_var") or "").strip()
+    bottom_var = (params.get("bottom_var") or "").strip()
+    output_var = (params.get("output_var") or "extracted_text").strip()
+    separator = params.get("separator", "\n\n")
+
+    variables = replay_state.setdefault("variables", {})
+    top_text = str(variables.get(top_var, "") or "")
+    bottom_text = str(variables.get(bottom_var, "") or "")
+
+    # Si les deux sont vides, output reste "" (cohérent avec _handle_extract_text_action).
+    # Si un seul est vide, on évite un séparateur inutile en début/fin.
+    if top_text and bottom_text:
+        merged = top_text + separator + bottom_text
+    else:
+        merged = top_text or bottom_text
+
+    variables[output_var] = merged
+    # Nettoyage des variables internes pour ne pas polluer l'état.
+    if top_var.startswith("__"):
+        variables.pop(top_var, None)
+    if bottom_var.startswith("__"):
+        variables.pop(bottom_var, None)
+
+    logger.info(
+        "extract_text_scroll concat → variable '%s' (%d chars) replay %s",
+        output_var, len(merged), replay_state.get("replay_id", "?"),
+    )
+    return bool(merged)
+
+
+def _expand_extract_text_scroll(
+    base: Dict[str, Any],
+    final_var: str,
+    top_var: str,
+    bottom_var: str,
+    paragraph: bool,
+    scroll_pause_ms: int = SCROLL_PAUSE_MS,
+) -> List[Dict[str, Any]]:
+    """Expanse un step extract_text_scroll en séquence d'actions atomiques.
+
+    Séquence générée :
+        1. extract_text(top_var)               — OCR zone visible (haut de page)
+        2. key_combo(ctrl+end)                 — scroll bas (côté client Léa V1)
+        3. wait(scroll_pause_ms)               — laisse DOM/UI se redessiner
+        4. extract_text(bottom_var)            — OCR zone visible (bas de page)
+        5. _concat_text_vars(top, bottom→final) — action serveur interne
+        6. key_combo(ctrl+home)                — remet en haut
+
+    Toutes les sous-actions héritent de `base` (edge_id, from_node, to_node)
+    pour la traçabilité. Chaque action obtient un action_id unique.
+
+    `scroll_pause_ms` : configurable au step (défaut SCROLL_PAUSE_MS=500ms).
+    """
+    def _new_action() -> Dict[str, Any]:
+        return {**base, "action_id": f"act_{uuid.uuid4().hex[:8]}"}
+
+    a1 = _new_action()
+    a1["type"] = "extract_text"
+    a1["parameters"] = {"output_var": top_var, "paragraph": paragraph}
+
+    a2 = _new_action()
+    a2["type"] = "key_combo"
+    a2["keys"] = ["ctrl", "end"]
+
+    a3 = _new_action()
+    a3["type"] = "wait"
+    a3["duration_ms"] = scroll_pause_ms
+
+    a4 = _new_action()
+    a4["type"] = "extract_text"
+    a4["parameters"] = {"output_var": bottom_var, "paragraph": paragraph}
+
+    a5 = _new_action()
+    a5["type"] = "_concat_text_vars"
+    a5["parameters"] = {
+        "top_var": top_var,
+        "bottom_var": bottom_var,
+        "output_var": final_var,
+        "separator": "\n\n",
+    }
+
+    a6 = _new_action()
+    a6["type"] = "key_combo"
+    a6["keys"] = ["ctrl", "home"]
+
+    return [a1, a2, a3, a4, a5, a6]


 def _expand_compound_steps(