rpa_vision_v3/agent_chat/urgences_orchestrator.py

"""Orchestrateur démo GHT Sud 95 — pilotage du scénario "traite N dossiers".

Reçoit une commande naturelle de Léa (chat) et orchestre :
  1. Parsing intent via gemma3:1b (mini-LLM local, ~400 ms)
  2. Setup Chrome (Win+R → URL maquette → Enter) via /replay/raw
  3. extract_table sur la liste des patients (regex IPP, limit=N)
  4. Boucle : pour chaque IPP, lance le workflow "Urgence_unit" via /replay
     avec `variables={"patient_id": ipp}` pour la résolution `{{patient_id}}`
  5. Synthèse finale postée dans le chat

L'orchestration tourne dans un thread daemon. L'état est stocké en mémoire,
poll-able via /api/urgences/status/<orch_id>.
"""

from __future__ import annotations

import json
import logging
import os
import re
import threading
import time
import urllib.error
import urllib.request
import uuid
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional

logger = logging.getLogger(__name__)

# Chargement explicite de .env.local du repo (le service systemd peut ne pas
# voir cet env file). Cherche dans le parent de agent_chat/.
def _load_env_local() -> None:
    env_path = Path(__file__).resolve().parent.parent / ".env.local"
    if not env_path.is_file():
        return
    try:
        for line in env_path.read_text().splitlines():
            line = line.strip()
            if not line or line.startswith("#") or "=" not in line:
                continue
            k, v = line.split("=", 1)
            k = k.strip()
            v = v.strip().strip('"').strip("'")
            os.environ.setdefault(k, v)
    except Exception as e:
        logger.warning("Erreur chargement .env.local: %s", e)


_load_env_local()

# ─────────────────────────────────────────────────────────────────────
# Config
# ─────────────────────────────────────────────────────────────────────
STREAM_BASE = os.environ.get("RPA_STREAM_BASE", "http://localhost:5005")
OLLAMA_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434/api/generate")
NLP_MODEL = os.environ.get("LEA_NLP_MODEL", "gemma3:1b")
RPA_API_TOKEN = os.environ.get("RPA_API_TOKEN", "")

URGENCE_WORKFLOW_ID = os.environ.get("LEA_URGENCE_WORKFLOW_ID", "wf_urgence_unit")
# URL LAN locale (sans Basic Auth ni HTTPS) pour éviter le prompt Windows Hello
# de Chrome (lecteur d'empreintes digitales) qui bloque le replay automatique.
# L'URL publique HTTPS reste disponible (https://urgence.labs.laurinebazin.design)
# pour usage humain, mais n'est PAS utilisée par Léa pendant la démo.
MAQUETTE_URL = os.environ.get("LEA_MAQUETTE_URL", "http://192.168.1.40:8765/index.html")


# Session de replay stable de l'agent V1. L'agent polle /replay/next sur
# `agent_<user_id>` indépendamment des sessions d'enregistrement (sess_*).
# user_id default côté agent V1 = "demo_user" (cf. agent_v1/main.py:62).
AGENT_SESSION_ID = os.environ.get("LEA_AGENT_SESSION_ID", "agent_demo_user")

# machine_id de l'agent V1 cible. DOIT matcher self.machine_id côté agent V1
# (sinon /replay/next ne distribue pas la queue à cette machine — le serveur
# isole les machines pour éviter le vol cross-machine d'actions).
# Valeur par défaut = hostname du PC Windows de démo GHT.
AGENT_MACHINE_ID = os.environ.get("LEA_AGENT_MACHINE_ID", "DESKTOP-58D5CAC_windows")

# Pattern IPP : 8 chiffres, premier groupe "25" (cohort 2025), reste libre
IPP_PATTERN = r"^25\d{6}$"

# ─────────────────────────────────────────────────────────────────────
# NLP : parsing de commande naturelle via gemma3:1b
# ─────────────────────────────────────────────────────────────────────

NLP_PROMPT = """Tu es un parseur d'intentions pour Léa, assistant RPA médical.
Réponds UNIQUEMENT en JSON valide, sans texte avant/après, selon ce schéma :
{"action": "process_patients" | "stop" | "unknown", "count": <int|null>, "order": "first" | "last" | "all" | "specific" | null, "ipp": "<string>" | null}

Règles :
- "traite N dossiers" / "code N dossiers" / "fais les N premiers" → action=process_patients, count=N, order="first"
- "traite tous les dossiers" → action=process_patients, count=null, order="all"
- "traite le dossier 25003364" → action=process_patients, count=1, order="specific", ipp="25003364"
- "stop" / "arrête" / "annule" → action=stop
- Question ("comment", "pourquoi") → action=unknown
- Si tu ne comprends pas → action=unknown"""


def parse_lea_command(text: str, model: str = NLP_MODEL, timeout: int = 8) -> Dict[str, Any]:
    """Parse une commande naturelle en intent structuré via gemma3:1b.

    Fallback regex si Ollama est indisponible — pour ne pas bloquer la démo.
    Returns : dict {action, count, order, ipp} ou {action: "unknown"}.
    """
    payload = {
        "model": model,
        "prompt": NLP_PROMPT + "\n\nUtilisateur : " + text + "\n\nJSON :",
        "stream": False,
        "format": "json",
        "options": {"temperature": 0.0, "num_predict": 120, "num_ctx": 1024},
    }
    data = json.dumps(payload).encode("utf-8")
    req = urllib.request.Request(OLLAMA_URL, data=data, headers={"Content-Type": "application/json"})
    try:
        with urllib.request.urlopen(req, timeout=timeout) as resp:
            body = json.loads(resp.read().decode("utf-8"))
        raw = (body.get("response") or "").strip()
        if raw.startswith("```"):
            raw = raw.split("\n", 1)[-1].rsplit("```", 1)[0].strip()
        intent = json.loads(raw)
    except (urllib.error.URLError, TimeoutError, json.JSONDecodeError) as e:
        logger.warning("parse_lea_command: gemma3:1b indisponible (%s), fallback regex", e)
        return _parse_fallback_regex(text)

    # Post-processing : gemma3:1b a tendance à remplir tous les champs même
    # quand non pertinent. On nettoie :
    # - ipp ne doit être conservé que si présent LITTÉRALEMENT dans le texte source
    #   (sinon le LLM hallucine un IPP plausible)
    if intent.get("ipp") and str(intent["ipp"]) not in text:
        intent["ipp"] = None
        # Si le LLM a forcé order=specific sans vrai IPP, on bascule en first
        if intent.get("order") == "specific":
            intent["order"] = "first"
    # - ipp ne doit être conservé que si order="specific" ET format IPP valide
    if intent.get("ipp") and intent.get("order") != "specific":
        intent["ipp"] = None
    if intent.get("ipp") and not re.match(r"^\d{8,10}$", str(intent["ipp"])):
        intent["ipp"] = None
    # - si count est défini ET order="all", l'humain demande "N dossiers" et
    #   non "tous les dossiers" : on bascule en "first" (cohérence sémantique)
    if intent.get("count") and intent.get("order") == "all":
        intent["order"] = "first"
    return intent


def _parse_fallback_regex(text: str) -> Dict[str, Any]:
    """Fallback regex robuste si LLM HS — couvre les phrasings classiques."""
    t = text.lower()
    if any(w in t for w in ("stop", "arrête", "annule", "annuler")):
        return {"action": "stop", "count": None, "order": None, "ipp": None}
    # IPP spécifique : "traite le dossier 25003364"
    m = re.search(r"\b(25\d{6})\b", text)
    if m and any(w in t for w in ("traite", "code", "analyse")):
        return {"action": "process_patients", "count": 1, "order": "specific", "ipp": m.group(1)}
    if any(w in t for w in ("tous", "toutes")) and any(w in t for w in ("traite", "code")):
        return {"action": "process_patients", "count": None, "order": "all", "ipp": None}
    # Quantifié : "traite 3 dossiers"
    m = re.search(r"(\d+)\s*(?:premiers?\s*)?(?:dossiers?|cas|patients?)", t)
    if m and any(w in t for w in ("traite", "code", "fais", "analyse")):
        return {"action": "process_patients", "count": int(m.group(1)), "order": "first", "ipp": None}
    return {"action": "unknown", "count": None, "order": None, "ipp": None}


# ─────────────────────────────────────────────────────────────────────
# Helpers HTTP vers le streaming server (port 5005)
# ─────────────────────────────────────────────────────────────────────

def _stream_headers() -> Dict[str, str]:
    h = {"Content-Type": "application/json"}
    if RPA_API_TOKEN:
        h["Authorization"] = f"Bearer {RPA_API_TOKEN}"
    return h


def _post(path: str, body: dict, timeout: int = 30) -> dict:
    req = urllib.request.Request(
        STREAM_BASE + path,
        data=json.dumps(body).encode("utf-8"),
        headers=_stream_headers(),
        method="POST",
    )
    with urllib.request.urlopen(req, timeout=timeout) as resp:
        return json.loads(resp.read().decode("utf-8"))


def _get(path: str, timeout: int = 10) -> dict:
    req = urllib.request.Request(
        STREAM_BASE + path,
        headers=_stream_headers(),
        method="GET",
    )
    with urllib.request.urlopen(req, timeout=timeout) as resp:
        return json.loads(resp.read().decode("utf-8"))


# ─────────────────────────────────────────────────────────────────────
# Orchestration : état + thread d'exécution
# ─────────────────────────────────────────────────────────────────────

@dataclass
class DossierResult:
    ipp: str
    decision: Optional[str] = None         # "REQUALIFICATION_HOSPITALISATION" | "FORFAIT_URGENCE"
    decision_court: Optional[str] = None   # "UHCD" | "Forfait Urgences"
    confiance: Optional[str] = None
    duree_passage_heures: Optional[float] = None
    concordance: Optional[bool] = None
    error: Optional[str] = None


@dataclass
class OrchestrationState:
    orch_id: str
    status: str = "starting"   # starting | running | done | error | cancelled
    progress: int = 0          # 0 → count
    count: int = 0
    current_step: str = ""     # "setup_chrome" | "extract_table" | "process_dossier_X" | "synthese"
    intent: Dict[str, Any] = field(default_factory=dict)
    patients: List[str] = field(default_factory=list)
    results: List[DossierResult] = field(default_factory=list)
    synthese: Optional[str] = None
    error: Optional[str] = None
    started_at: float = field(default_factory=time.time)
    finished_at: Optional[float] = None

    def to_dict(self) -> Dict[str, Any]:
        return {
            "orch_id": self.orch_id,
            "status": self.status,
            "progress": self.progress,
            "count": self.count,
            "current_step": self.current_step,
            "intent": self.intent,
            "patients": self.patients,
            "results": [r.__dict__ for r in self.results],
            "synthese": self.synthese,
            "error": self.error,
            "elapsed_s": round((self.finished_at or time.time()) - self.started_at, 1),
        }


# Registry global des orchestrations en cours (thread-safe via lock)
_ORCH_REGISTRY: Dict[str, OrchestrationState] = {}
_ORCH_LOCK = threading.Lock()


def get_orchestration(orch_id: str) -> Optional[OrchestrationState]:
    with _ORCH_LOCK:
        return _ORCH_REGISTRY.get(orch_id)


def list_orchestrations() -> List[Dict[str, Any]]:
    with _ORCH_LOCK:
        return [s.to_dict() for s in _ORCH_REGISTRY.values()]


def start_orchestration(
    intent: Dict[str, Any],
    session_id: str = "",
    machine_id: Optional[str] = None,
) -> OrchestrationState:
    """Lance une orchestration en thread daemon. Retourne l'état initial.

    Args:
        intent: dict {action, count, order, ipp} (sortie de parse_lea_command)
        session_id: session de replay (default: agent_demo_user, le canal stable
                    sur lequel l'agent V1 polle /replay/next)
        machine_id: machine cible (optionnel, pour multi-machines futurs)
    """
    if not session_id:
        session_id = AGENT_SESSION_ID
    if not machine_id:
        machine_id = AGENT_MACHINE_ID
    orch_id = "orch_" + uuid.uuid4().hex[:10]
    count = intent.get("count") or 3  # default 3 si "tous" ou "first" sans nombre
    state = OrchestrationState(
        orch_id=orch_id,
        status="starting",
        count=count,
        intent=intent,
    )
    with _ORCH_LOCK:
        _ORCH_REGISTRY[orch_id] = state

    th = threading.Thread(
        target=_run_orchestration,
        args=(state, session_id, machine_id),
        daemon=True,
        name=f"orch-{orch_id}",
    )
    th.start()
    return state


def _run_orchestration(state: OrchestrationState, session_id: str, machine_id: Optional[str]) -> None:
    """Boucle d'orchestration exécutée dans un thread.

    Phases :
      1. Setup Chrome (raw actions Win+R)
      2. extract_table sur liste patients
      3. Boucle workflow Urgence_unit
      4. Synthèse
    """
    try:
        state.status = "running"
        intent = state.intent

        # Cas "specific" : court-circuiter, juste 1 IPP
        if intent.get("order") == "specific" and intent.get("ipp"):
            state.patients = [intent["ipp"]]
            state.count = 1
            state.current_step = "process_dossier"
            _process_dossiers(state, session_id, machine_id)
        else:
            # 1. Setup Chrome → URL maquette
            state.current_step = "setup_chrome"
            _setup_chrome(session_id, machine_id)

            # 2. Lire la liste des IPP via extract_table
            state.current_step = "extract_table"
            patients = _extract_patient_list(session_id, machine_id, limit=state.count)
            state.patients = patients
            if not patients:
                raise RuntimeError("extract_table n'a trouvé aucun IPP — vérifier que Chrome est sur index.html")

            # 3. Pour chaque IPP : lancer workflow Urgence_unit
            _process_dossiers(state, session_id, machine_id)

        # 4. Synthèse
        state.current_step = "synthese"
        state.synthese = _build_synthese(state)
        state.status = "done"
    except Exception as e:
        logger.exception("Orchestration %s : erreur fatale", state.orch_id)
        state.status = "error"
        state.error = str(e)
    finally:
        state.finished_at = time.time()


# ─────────────────────────────────────────────────────────────────────
# Phases de l'orchestration
# ─────────────────────────────────────────────────────────────────────

def _setup_chrome(session_id: str, machine_id: Optional[str]) -> None:
    """Composer "ouvrir Chrome sur l'URL maquette" via le catalogue de réflexes.

    Léa ne fait PAS un workflow appris pour cette étape : c'est une composition
    de primitives natives (réflexes du catalogue) + une saisie texte.

    Séquence :
      1. réflexe `sys_run` (Win+R)              ← gesture_catalog
      2. type "chrome.exe <URL>"                 ← saisie atomique
      3. réflexe `nav_enter` (Entrée)            ← gesture_catalog
    """
    from agent_chat.gesture_catalog import get_gesture_catalog

    catalog = get_gesture_catalog()
    show_desktop = catalog.get_by_id("win_minimize_all")  # Win+D — minimise tout (Léa incl.)
    sys_run = catalog.get_by_id("sys_run")
    nav_enter = catalog.get_by_id("nav_enter")
    if sys_run is None or nav_enter is None or show_desktop is None:
        raise RuntimeError("Réflexes catalogue manquants : win_minimize_all / sys_run / nav_enter")

    actions = [
        show_desktop.to_replay_action(),          # réflexe Win+D — Léa se réduit complètement
        {
            "action_id": f"setup_wait_desktop_{uuid.uuid4().hex[:6]}",
            "type": "wait",
            "duration_ms": 400,
            "intention": "Attendre que le bureau soit affiché",
        },
        sys_run.to_replay_action(),               # réflexe Win+R
        {
            "action_id": f"setup_wait_{uuid.uuid4().hex[:6]}",
            "type": "wait",
            "duration_ms": 800,
            "intention": "Attendre que la boîte Exécuter soit prête",
        },
        {
            "action_id": f"setup_typeurl_{uuid.uuid4().hex[:6]}",
            "type": "type",
            "text": f"chrome.exe {MAQUETTE_URL}",
            "intention": "Taper la commande Chrome + URL maquette",
        },
        nav_enter.to_replay_action(),             # réflexe Entrée
        {
            "action_id": f"setup_wait_load_{uuid.uuid4().hex[:6]}",
            "type": "wait",
            "duration_ms": 3500,
            "intention": "Attendre le chargement de la maquette",
        },
    ]
    payload = {
        "actions": actions,
        "session_id": session_id,
        "task_description": "Setup démo GHT — composition réflexes (sys_run + type + nav_enter)",
    }
    if machine_id:
        payload["machine_id"] = machine_id
    resp = _post("/api/v1/traces/stream/replay/raw", payload, timeout=20)
    replay_id = resp.get("replay_id")
    if not replay_id:
        raise RuntimeError(f"setup_chrome : pas de replay_id ({resp})")
    # Setup Chrome ≈ 13s observé (Win+D + Win+R + type URL + Enter + wait 3500ms),
    # mais le PC peut être chargé → 60s donne de la marge.
    _wait_replay_done(replay_id, timeout_s=60)


def _extract_patient_list(session_id: str, machine_id: Optional[str], limit: int) -> List[str]:
    """Lance une action extract_table seule pour lire la liste des IPP."""
    actions = [
        {
            "action_id": f"extract_table_{uuid.uuid4().hex[:6]}",
            "type": "extract_table",
            "parameters": {
                "output_var": "patients_list",
                "pattern": IPP_PATTERN,
                "limit": limit,
            },
            "intention": "Lire la liste des IPP visible à l'écran",
        },
    ]
    payload = {
        "actions": actions,
        "session_id": session_id,
        "task_description": "Extraction liste patients GHT",
    }
    if machine_id:
        payload["machine_id"] = machine_id
    resp = _post("/api/v1/traces/stream/replay/raw", payload, timeout=15)
    replay_id = resp.get("replay_id")
    if not replay_id:
        raise RuntimeError(f"extract_table : pas de replay_id ({resp})")
    final = _wait_replay_done(replay_id, timeout_s=20)
    return list(final.get("variables", {}).get("patients_list") or [])


def _process_dossiers(state: OrchestrationState, session_id: str, machine_id: Optional[str]) -> None:
    """Boucle : pour chaque IPP, lance le workflow Urgence_unit."""
    for i, ipp in enumerate(state.patients):
        state.current_step = f"process_dossier_{i+1}_of_{len(state.patients)}"
        result = DossierResult(ipp=ipp)
        try:
            payload = {
                "workflow_id": URGENCE_WORKFLOW_ID,
                "session_id": session_id,
                "variables": {"patient_id": ipp},
            }
            if machine_id:
                payload["machine_id"] = machine_id
            resp = _post("/api/v1/traces/stream/replay", payload, timeout=20)
            replay_id = resp.get("replay_id")
            if not replay_id:
                raise RuntimeError(f"replay_id manquant ({resp})")
            final = _wait_replay_done(replay_id, timeout_s=180)
            t2a = final.get("variables", {}).get("t2a_result") or {}
            result.decision = t2a.get("decision")
            result.decision_court = t2a.get("decision_court")
            result.confiance = t2a.get("confiance")
            result.duree_passage_heures = t2a.get("duree_passage_heures")
            result.concordance = t2a.get("concordance")
        except Exception as e:
            result.error = str(e)
            logger.warning("Dossier %s : erreur %s", ipp, e)
        state.results.append(result)
        state.progress = i + 1


def _wait_replay_done(replay_id: str, timeout_s: int = 60, poll_s: float = 1.0) -> Dict[str, Any]:
    """Poll /replay/<id> jusqu'à status terminal."""
    deadline = time.time() + timeout_s
    last = {}
    while time.time() < deadline:
        try:
            last = _get(f"/api/v1/traces/stream/replay/{replay_id}", timeout=5)
        except Exception as e:
            logger.warning("poll replay %s : %s", replay_id, e)
        status = last.get("status", "")
        if status in ("done", "completed", "finished", "error", "cancelled", "paused_need_help"):
            return last
        time.sleep(poll_s)
    raise TimeoutError(f"replay {replay_id} non terminé après {timeout_s}s (status={last.get('status')})")


# ─────────────────────────────────────────────────────────────────────
# Synthèse finale
# ─────────────────────────────────────────────────────────────────────

def _build_synthese(state: OrchestrationState) -> str:
    """Construit le message de synthèse posté dans le chat à la fin."""
    n = len(state.results)
    if n == 0:
        return "Aucun dossier traité."
    n_uhcd = sum(1 for r in state.results if r.decision == "REQUALIFICATION_HOSPITALISATION")
    n_forfait = sum(1 for r in state.results if r.decision == "FORFAIT_URGENCE")
    n_concord = sum(1 for r in state.results if r.concordance is True)
    lines = [f"✅ Terminé. {n} dossier(s) traité(s) : {n_forfait} forfait(s) urgences, {n_uhcd} UHCD."]
    if any(r.concordance is not None for r in state.results):
        lines.append(f"Concordance vérité-terrain : {n_concord}/{n}.")
    lines.append("")
    for r in state.results:
        if r.error:
            lines.append(f"  • {r.ipp} : ❌ erreur — {r.error}")
            continue
        decision_label = r.decision_court or r.decision or "—"
        conf = f"confiance {r.confiance}" if r.confiance else ""
        duree = f"{r.duree_passage_heures:.1f}h" if r.duree_passage_heures else ""
        concord_mark = ""
        if r.concordance is True:
            concord_mark = " ✓"
        elif r.concordance is False:
            concord_mark = " ⚠ écart vérité-terrain"
        details = ", ".join(x for x in (conf, duree) if x)
        lines.append(f"  • {r.ipp} : {decision_label}{concord_mark}" + (f" ({details})" if details else ""))
    return "\n".join(lines)