Files
rpa_vision_v3/tests/unit/test_domain_personality.py
Dom 4dc7d840d6 feat(p1x): de-hardcode VLM models/endpoints to vlm_config (DGX-ready)
Migre les call-sites VLM serveur vers la configuration centrale pour
fonctionner sur DGX (tunnel Ollama 11434), où gemma4:* est absent et le
port Docker 11435 est mort.

- task_planner, replay_verifier, domain_context, ir_builder, resolve_engine
  (popup): modele -> vlm_config.get_vlm_model(), defaut 11435 -> 11434
  (override GEMMA4_PORT legacy conserve)
- resolve_engine (grounding bbox x2): nouvel helper
  vlm_config.get_bbox_grounding_model() (var dediee RPA_BBOX_GROUNDING_MODEL,
  fallback RPA_GROUNDING_MODEL puis qwen2.5vl:7b-rpa) -> desambiguise le
  conflit D5-v3b, bbox_2d + num_ctx 4096 preserves
- safety_checks_provider: defaut -> get_vlm_model(), override
  RPA_SAFETY_CHECKS_LLM_MODEL preserve
- ui_detector: default_factory + resolution lazy (corrige aussi un gel a
  l'import), pas d'appel reseau a l'import
- field_extractor: property lazy via vlm_config

TDD strict (RED->GREEN), 305 tests verts, tests mockes HTTP (zero dependance
DGX reel), aucun alias Ollama.

Hors perimetre (arbitrage Dom): client Lea agent_v1/executor.py (gele),
chemin V4 observe_reason_act (RPA_REASONING_MODEL), core/config.py defaults.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-03 14:06:03 +02:00

594 lines
21 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tests unitaires pour la personnalité métier de Léa.
Couvre :
- summarize_action : résumé d'actions en langage métier par domaine
- pose_clarification_question : questions contextuelles quand Léa bloque
- describe_workflow_outcome : rapports de fin en langage métier
- Fallback domaine inconnu / vocabulaire synonyme
- Intégration avec agent_v0.agent_v1.ui.messages (formatters enrichis)
- Appel gemma4 mocké pour le raffinement de résumé
Tous les tests sont 100% offline : aucun appel réseau réel.
"""
from __future__ import annotations
import sys
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
# Assurer que la racine du projet est dans le path (comme les autres tests unit)
ROOT = Path(__file__).resolve().parents[2]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from agent_v0.server_v1.domain_context import (
DomainContext,
get_domain_context,
list_domains,
register_domain,
)
# ============================================================================
# Domaines pré-configurés
# ============================================================================
class TestDomainesDisponibles:
"""Tous les domaines prévus doivent être enregistrés."""
def test_tim_codage_present(self):
ctx = get_domain_context("tim_codage")
assert ctx.domain_id == "tim_codage"
assert "CIM-10" in ctx.vocabulary
assert ctx.common_actions # non vide
assert ctx.clarification_templates
assert ctx.summary_templates
def test_comptabilite_present(self):
ctx = get_domain_context("comptabilite")
assert ctx.domain_id == "comptabilite"
assert "facture" in ctx.vocabulary
assert ctx.summary_templates["item_plural"] == "factures"
def test_rh_paie_present(self):
ctx = get_domain_context("rh_paie")
assert ctx.domain_id == "rh_paie"
assert "bulletin" in ctx.vocabulary
assert ctx.summary_templates["item_plural"] == "bulletins"
def test_stocks_logistique_present(self):
ctx = get_domain_context("stocks_logistique")
assert ctx.domain_id == "stocks_logistique"
assert "BC" in ctx.vocabulary or "bon de commande" in ctx.vocabulary
assert ctx.summary_templates["item_plural"] == "bons"
def test_generic_fallback(self):
"""Un domaine inconnu retourne le contexte générique."""
ctx = get_domain_context("n_existe_pas_42")
assert ctx.domain_id == "generic"
def test_list_domains_contains_all(self):
ids = {d["domain_id"] for d in list_domains()}
assert {
"tim_codage",
"comptabilite",
"rh_paie",
"stocks_logistique",
"generic",
}.issubset(ids)
# ============================================================================
# summarize_action — résumé d'actions
# ============================================================================
class TestSummarizeAction:
"""Résumés en langage métier par domaine."""
def test_tim_click_dp_saisir_diagnostic_principal(self):
ctx = get_domain_context("tim_codage")
phrase = ctx.summarize_action("click", {"target": "DP"})
assert phrase == "saisir le diagnostic principal"
def test_tim_click_valider_codage(self):
ctx = get_domain_context("tim_codage")
phrase = ctx.summarize_action("click", {"target": "Valider le codage"})
assert phrase == "valider le codage"
def test_tim_click_dossier_patient(self):
ctx = get_domain_context("tim_codage")
phrase = ctx.summarize_action(
"click", {"target": "Ouvrir le dossier patient"}
)
assert phrase == "ouvrir le dossier patient"
def test_compta_type_ht(self):
ctx = get_domain_context("comptabilite")
phrase = ctx.summarize_action(
"type", {"target": "Montant HT", "text": "1500"}
)
# La mention "ht" dans la cible déclenche le mapping
assert phrase == "saisir le montant hors taxes"
def test_compta_click_lettrer(self):
ctx = get_domain_context("comptabilite")
phrase = ctx.summarize_action("click", {"target": "Lettrer"})
assert phrase == "lettrer les écritures"
def test_rh_click_bulletin(self):
ctx = get_domain_context("rh_paie")
phrase = ctx.summarize_action("click", {"target": "Bulletin de paie"})
assert phrase == "ouvrir le bulletin de paie"
def test_stocks_type_quantite(self):
ctx = get_domain_context("stocks_logistique")
phrase = ctx.summarize_action(
"type", {"target": "Quantité reçue", "text": "42"}
)
assert phrase == "saisir la quantité"
def test_generic_click_fallback(self):
ctx = get_domain_context("generic")
phrase = ctx.summarize_action("click", {"target": "Bouton quelconque"})
# Pas de mapping mais une description → "cliquer sur ..."
assert "cliquer sur" in phrase
def test_unknown_domain_click(self):
"""Un domaine inconnu ne plante pas."""
ctx = get_domain_context("inconnu")
phrase = ctx.summarize_action("click", {"target": "Quelque chose"})
assert phrase # non vide
assert "cliquer" in phrase
def test_tim_synonymes_dp_dans_cible_longue(self):
"""Si aucun mapping exact mais la cible contient DP → substitution synonyme."""
ctx = get_domain_context("tim_codage")
# Aucun mapping direct "saisir le" mais "DP" est dans les synonymes
phrase = ctx.summarize_action("click", {"target": "Saisir le DP"})
assert phrase == "saisir le diagnostic principal"
def test_key_combo_generic(self):
ctx = get_domain_context("generic")
phrase = ctx.summarize_action("key_combo", {"keys": ["ctrl", "s"]})
assert "ctrl+s" in phrase
def test_wait_and_scroll(self):
ctx = get_domain_context("tim_codage")
assert "attendre" in ctx.summarize_action("wait", {})
assert "défiler" in ctx.summarize_action("scroll", {})
def test_type_no_target(self):
ctx = get_domain_context("generic")
phrase = ctx.summarize_action("type", {"text": "hello"})
assert "hello" in phrase
# ============================================================================
# pose_clarification_question — questions de blocage
# ============================================================================
class TestClarification:
"""Questions posées par Léa en cas de blocage."""
def test_tim_fichier_patient_avec_nom(self):
ctx = get_domain_context("tim_codage")
question = ctx.pose_clarification_question(
{
"blocked_on": "target_not_found",
"target": "Fichier patient",
"params": {"nom_patient": "Mme Durand"},
}
)
assert "Mme Durand" in question
# Langage métier : mention "dossier" (pas juste "fichier")
assert "dossier" in question.lower()
def test_compta_montant_avec_num_facture(self):
ctx = get_domain_context("comptabilite")
question = ctx.pose_clarification_question(
{
"blocked_on": "target_not_found",
"target": "Montant HT",
"params": {"num_facture": "F2026-0145"},
}
)
assert "F2026-0145" in question
assert "Montant HT" in question or "Montant" in question
def test_rh_employe_non_trouve(self):
ctx = get_domain_context("rh_paie")
question = ctx.pose_clarification_question(
{
"blocked_on": "target_not_found",
"target": "Fiche employé",
"params": {"nom_employe": "Jean Martin"},
}
)
assert "Jean Martin" in question
def test_stocks_article_non_trouve(self):
ctx = get_domain_context("stocks_logistique")
question = ctx.pose_clarification_question(
{
"blocked_on": "target_not_found",
"target": "Article",
"params": {"ref_article": "REF-4242", "num_bc": "BC-2026-042"},
}
)
# Un des deux identifiants au moins apparaît
assert "REF-4242" in question or "BC-2026-042" in question
def test_ambiguous_code_tim(self):
ctx = get_domain_context("tim_codage")
question = ctx.pose_clarification_question(
{
"blocked_on": "ambiguous_code",
"params": {"code_a": "E11.9", "code_b": "E11.8"},
}
)
assert "E11.9" in question
assert "E11.8" in question
def test_clarification_unknown_domain_fallback(self):
"""Domaine inconnu → message générique, jamais de crash."""
ctx = get_domain_context("inconnu")
question = ctx.pose_clarification_question(
{"blocked_on": "target_not_found", "target": "Un champ"}
)
assert question
assert "trouve pas" in question.lower()
def test_clarification_empty_context(self):
"""Pas de contexte du tout → fallback."""
ctx = get_domain_context("tim_codage")
question = ctx.pose_clarification_question(None)
assert question # non vide
assert isinstance(question, str)
def test_clarification_missing_params_no_crash(self):
"""Si un template mentionne {nom_patient} mais qu'il n'est pas fourni,
on ne plante pas — les champs manquants sont vides."""
ctx = get_domain_context("tim_codage")
question = ctx.pose_clarification_question(
{
"blocked_on": "target_not_found",
"target": "Fichier patient",
# pas de nom_patient
}
)
assert isinstance(question, str)
assert question
# ============================================================================
# describe_workflow_outcome — rapports finaux
# ============================================================================
class TestWorkflowOutcome:
"""Rapports de fin de workflow en langage métier."""
def test_tim_succes_complet(self):
ctx = get_domain_context("tim_codage")
rapport = ctx.describe_workflow_outcome(
workflow_name="Codage janvier",
success=True,
items_count=15,
failed_count=0,
)
assert "15 dossiers" in rapport
assert "codé" in rapport
def test_tim_succes_partiel(self):
ctx = get_domain_context("tim_codage")
rapport = ctx.describe_workflow_outcome(
workflow_name="Codage janvier",
success=True,
items_count=15,
failed_count=1,
)
assert "14 dossiers" in rapport
assert "15" in rapport
assert "1" in rapport # nombre en attente
def test_tim_echec_complet(self):
ctx = get_domain_context("tim_codage")
rapport = ctx.describe_workflow_outcome(
workflow_name="Codage janvier",
success=False,
items_count=15,
failed_count=15,
)
assert "Codage janvier" in rapport
assert "pas" in rapport.lower() or "rends la main" in rapport.lower()
def test_compta_success_factures(self):
ctx = get_domain_context("comptabilite")
rapport = ctx.describe_workflow_outcome(
workflow_name="Saisie factures mars",
success=True,
items_count=30,
failed_count=0,
)
assert "30 factures" in rapport
def test_rh_success_bulletins(self):
ctx = get_domain_context("rh_paie")
rapport = ctx.describe_workflow_outcome(
workflow_name="Paie avril",
success=True,
items_count=50,
failed_count=2,
)
assert "48" in rapport
assert "50" in rapport
assert "bulletins" in rapport
def test_stocks_success_bons(self):
ctx = get_domain_context("stocks_logistique")
rapport = ctx.describe_workflow_outcome(
workflow_name="Réceptions semaine 14",
success=True,
items_count=12,
failed_count=0,
)
assert "12 bons" in rapport
def test_generic_fallback(self):
"""Domaine inconnu → rapport générique cohérent."""
ctx = get_domain_context("inconnu")
rapport = ctx.describe_workflow_outcome(
workflow_name="Mon workflow",
success=True,
items_count=5,
failed_count=0,
)
assert rapport
assert "Mon workflow" in rapport or "5" in rapport
def test_tim_success_one_avec_nom_patient(self):
"""Cas 1 item : utilise success_one avec un paramètre métier."""
ctx = get_domain_context("tim_codage")
rapport = ctx.describe_workflow_outcome(
workflow_name="Codage urgent",
success=True,
items_count=1,
failed_count=0,
elapsed_s=42,
extra={"nom_patient": "M. Dupont"},
)
assert "M. Dupont" in rapport
assert "42" in rapport
class TestWorkflowOutcomeLLM:
"""Tests du raffinement LLM (gemma4) pour le rapport final."""
def test_use_llm_success_mocked(self):
"""Quand use_llm=True et gemma4 répond, on utilise sa réponse."""
ctx = get_domain_context("tim_codage")
def fake_refine(self, template, subs, success):
return "Voilà, j'ai codé tous tes dossiers, bon café !"
with patch.object(DomainContext, "_llm_refine_summary", fake_refine):
rapport = ctx.describe_workflow_outcome(
workflow_name="Codage", success=True,
items_count=10, use_llm=True,
)
assert "bon café" in rapport
def test_use_llm_failure_falls_back_to_template(self):
"""Si l'appel LLM retourne "" → on retombe sur le template."""
ctx = get_domain_context("tim_codage")
def fake_refine(self, template, subs, success):
return "" # simulate failure
with patch.object(DomainContext, "_llm_refine_summary", fake_refine):
rapport = ctx.describe_workflow_outcome(
workflow_name="Codage", success=True,
items_count=10, failed_count=0, use_llm=True,
)
assert "10 dossiers" in rapport
def test_refine_modele_via_vlm_config(self):
"""Le payload _llm_refine_summary utilise le modèle résolu par vlm_config."""
ctx = get_domain_context("tim_codage")
captured = {}
def fake_post(url, json=None, **kwargs):
captured["url"] = url
captured["model"] = (json or {}).get("model")
resp = MagicMock()
resp.ok = True
resp.json.return_value = {"message": {"content": "ok"}}
return resp
fake_requests = MagicMock()
fake_requests.post.side_effect = fake_post
with patch.dict("sys.modules", {"requests": fake_requests}), patch(
"agent_v0.server_v1.domain_context.vlm_config.get_vlm_model",
return_value="modele-resolu:test",
):
ctx._llm_refine_summary(
template="ok", subs={"workflow_name": "x"}, success=True
)
assert captured["model"] == "modele-resolu:test"
def test_refine_endpoint_par_defaut_11434(self, monkeypatch):
"""Sans GEMMA4_PORT, _llm_refine_summary vise 11434, pas le port mort 11435."""
monkeypatch.delenv("GEMMA4_PORT", raising=False)
ctx = get_domain_context("tim_codage")
captured = {}
def fake_post(url, json=None, **kwargs):
captured["url"] = url
resp = MagicMock()
resp.ok = True
resp.json.return_value = {"message": {"content": "ok"}}
return resp
fake_requests = MagicMock()
fake_requests.post.side_effect = fake_post
with patch.dict("sys.modules", {"requests": fake_requests}):
ctx._llm_refine_summary(
template="ok", subs={"workflow_name": "x"}, success=True
)
assert ":11434" in captured["url"]
assert ":11435" not in captured["url"]
def test_llm_refine_network_error_safe(self):
"""_llm_refine_summary ne doit jamais lever, même si requests échoue."""
ctx = get_domain_context("tim_codage")
fake_requests = MagicMock()
fake_requests.post.side_effect = RuntimeError("boom")
with patch.dict("sys.modules", {"requests": fake_requests}):
out = ctx._llm_refine_summary(
template="ok", subs={"workflow_name": "x"}, success=True
)
assert out == ""
# ============================================================================
# Domaine custom enregistré dynamiquement
# ============================================================================
class TestRegisterDomain:
def test_register_custom_domain(self):
custom = DomainContext(
domain_id="test_custom_xyz",
name="Test",
description="test",
common_actions={"click:foo": "faire foo"},
summary_templates={
"item_singular": "truc",
"item_plural": "trucs",
"success": "J'ai fait {done} trucs sur {items_count}.",
"partial": "Partiel : {done}/{items_count}.",
"failure": "Echec.",
},
)
register_domain(custom)
fetched = get_domain_context("test_custom_xyz")
assert fetched.name == "Test"
assert fetched.summarize_action("click", {"target": "FOO"}) == "faire foo"
# ============================================================================
# Intégration avec ui.messages
# ============================================================================
class TestMessagesIntegration:
"""Les formatters de messages utilisent le domaine quand fourni."""
def test_cible_non_trouvee_domain_tim(self):
from agent_v0.agent_v1.ui.messages import formatter_cible_non_trouvee
msg = formatter_cible_non_trouvee(
description_cible="Fichier patient",
titre_fenetre="DxCare",
domain_id="tim_codage",
params={"nom_patient": "Mme Durand"},
)
assert "Mme Durand" in msg.corps
def test_cible_non_trouvee_domain_comptabilite(self):
from agent_v0.agent_v1.ui.messages import formatter_cible_non_trouvee
msg = formatter_cible_non_trouvee(
description_cible="Montant HT",
titre_fenetre="Sage",
domain_id="comptabilite",
params={"num_facture": "F2026-007"},
)
assert "F2026-007" in msg.corps
def test_cible_non_trouvee_sans_domain_retrocompat(self):
"""Sans domain_id, comportement historique conservé."""
from agent_v0.agent_v1.ui.messages import formatter_cible_non_trouvee
msg = formatter_cible_non_trouvee(
description_cible="bonjour",
titre_fenetre="Test Bloc-notes",
)
assert "bonjour" in msg.corps
assert "Bloc-notes" in msg.corps
def test_fin_workflow_tim_partiel(self):
from agent_v0.agent_v1.ui.messages import (
NiveauMessage,
formatter_fin_workflow,
)
msg = formatter_fin_workflow(
succes=True,
nom_workflow="Codage janvier",
nb_etapes=120,
duree_s=900,
domain_id="tim_codage",
items_count=15,
failed_count=1,
)
# Langage métier, pas "120 étapes"
assert "14 dossiers" in msg.corps
assert msg.niveau == NiveauMessage.ATTENTION # succès partiel
def test_fin_workflow_tim_complet(self):
from agent_v0.agent_v1.ui.messages import (
NiveauMessage,
formatter_fin_workflow,
)
msg = formatter_fin_workflow(
succes=True,
nom_workflow="Codage janvier",
nb_etapes=120,
duree_s=900,
domain_id="tim_codage",
items_count=15,
failed_count=0,
)
assert "15 dossiers" in msg.corps
assert msg.niveau == NiveauMessage.INFO
def test_fin_workflow_sans_domain_retrocompat(self):
from agent_v0.agent_v1.ui.messages import formatter_fin_workflow
msg = formatter_fin_workflow(
succes=True, nom_workflow="Demo", nb_etapes=5, duree_s=10
)
assert "Demo" in msg.corps
assert "5 étapes" in msg.corps
def test_erreur_generique_propagate_domain(self):
from agent_v0.agent_v1.ui.messages import formatter_erreur_generique
msg = formatter_erreur_generique(
"target_not_found: Montant HT",
domain_id="comptabilite",
params={"num_facture": "F-001"},
)
assert "F-001" in msg.corps
def test_friendly_target_tim_synonyme(self):
from agent_v0.agent_v1.ui.messages import _friendly_target
assert _friendly_target("DP", "tim_codage") == "diagnostic principal"
assert _friendly_target("DP", None) == "DP" # pas de domaine → identique
assert _friendly_target("DP", "domaine_inexistant") == "DP"