feat(agent): add learn action flow and grounding guards
This commit is contained in:
280
tests/unit/test_lea_message_contract.py
Normal file
280
tests/unit/test_lea_message_contract.py
Normal file
@@ -0,0 +1,280 @@
|
||||
"""Tests du contrat de messages humains pour Lea."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from agent_v0.agent_v1.ui.message_contract import (
|
||||
MAX_FIELD_CHARS,
|
||||
MessageContractError,
|
||||
coerce_supervised_pause_message,
|
||||
format_supervised_pause_from_mapping,
|
||||
format_supervised_pause_message,
|
||||
validate_supervised_pause_message,
|
||||
validate_visible_message,
|
||||
warn_visible_message,
|
||||
)
|
||||
|
||||
|
||||
def _valid_pause(**overrides: str) -> str:
|
||||
fields = {
|
||||
"intention": "ouvrir le dossier patient dans Aiva Urgence",
|
||||
"attendu": "voir la fiche du patient ouverte avec la liste des passages",
|
||||
"vu": "la page d'accueil Aiva Urgence sans le dossier patient",
|
||||
"demande": "ouvrir le dossier patient puis me rendre la main",
|
||||
}
|
||||
fields.update(overrides)
|
||||
return format_supervised_pause_message(**fields)
|
||||
|
||||
|
||||
def _raw_pause(**overrides: str) -> str:
|
||||
fields = {
|
||||
"intention": "ouvrir le dossier patient dans Aiva Urgence",
|
||||
"attendu": "voir la fiche du patient ouverte avec la liste des passages",
|
||||
"vu": "la page d'accueil Aiva Urgence sans le dossier patient",
|
||||
"demande": "ouvrir le dossier patient puis me rendre la main",
|
||||
}
|
||||
fields.update(overrides)
|
||||
return "\n".join(
|
||||
[
|
||||
f"J'essaie de : {fields['intention']}",
|
||||
f"J'attendais : {fields['attendu']}",
|
||||
f"Je vois : {fields['vu']}",
|
||||
f"Peux-tu : {fields['demande']}",
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def _issue_codes(message: str) -> set[str]:
|
||||
return {issue.code for issue in validate_supervised_pause_message(message).issues}
|
||||
|
||||
|
||||
def test_format_supervised_pause_has_exact_four_field_structure():
|
||||
message = _valid_pause()
|
||||
|
||||
assert message.splitlines() == [
|
||||
"J'essaie de : ouvrir le dossier patient dans Aiva Urgence",
|
||||
"J'attendais : voir la fiche du patient ouverte avec la liste des passages",
|
||||
"Je vois : la page d'accueil Aiva Urgence sans le dossier patient",
|
||||
"Peux-tu : ouvrir le dossier patient puis me rendre la main",
|
||||
]
|
||||
assert validate_supervised_pause_message(message).valid
|
||||
|
||||
|
||||
def test_format_from_mapping_accepts_runtime_aliases():
|
||||
message = format_supervised_pause_from_mapping(
|
||||
{
|
||||
"trying_to": "selectionner le passage aux urgences",
|
||||
"expected": "voir le formulaire de codage du passage",
|
||||
"observed": "la liste des passages reste affichee",
|
||||
"request": "selectionner le bon passage puis me rendre la main",
|
||||
}
|
||||
)
|
||||
|
||||
assert "J'essaie de : selectionner le passage aux urgences" in message
|
||||
assert validate_supervised_pause_message(message).valid
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"bad_phrase",
|
||||
[
|
||||
"un element",
|
||||
"un élément",
|
||||
"cette action",
|
||||
"Validation requise",
|
||||
"cible inconnue",
|
||||
],
|
||||
)
|
||||
def test_blacklist_refuses_generic_formulations(bad_phrase):
|
||||
message = _raw_pause(vu=f"je vois {bad_phrase}")
|
||||
|
||||
result = validate_supervised_pause_message(message)
|
||||
|
||||
assert not result.valid
|
||||
assert "generic_phrase" in {issue.code for issue in result.issues}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"technical_text",
|
||||
[
|
||||
"action_click_12ab34",
|
||||
"replay_9f8e7d6c",
|
||||
"session_id",
|
||||
"target_spec.by_text",
|
||||
"550e8400-e29b-41d4-a716-446655440000",
|
||||
"a3f6c9d8e1b24567",
|
||||
],
|
||||
)
|
||||
def test_refuses_raw_technical_identifiers(technical_text):
|
||||
message = _raw_pause(attendu=f"voir le dossier patient apres {technical_text}")
|
||||
|
||||
assert "technical_identifier" in _issue_codes(message) or "technical_field" in _issue_codes(message)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"technical_text",
|
||||
[
|
||||
"(123, 456)",
|
||||
"x=120 y=340",
|
||||
"340px",
|
||||
"score=0.87",
|
||||
"confidence=0.91",
|
||||
"similarité=0.42",
|
||||
],
|
||||
)
|
||||
def test_refuses_pixels_and_raw_scores(technical_text):
|
||||
message = _raw_pause(vu=f"la page Aiva avec {technical_text}")
|
||||
|
||||
codes = _issue_codes(message)
|
||||
|
||||
assert "raw_coordinates" in codes or "raw_score" in codes
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"technical_english",
|
||||
[
|
||||
"target_not_found",
|
||||
"no_screen_change",
|
||||
"wrong_window",
|
||||
"validation required",
|
||||
"retry",
|
||||
"screenshot",
|
||||
],
|
||||
)
|
||||
def test_refuses_technical_english(technical_english):
|
||||
message = _raw_pause(vu=f"le message {technical_english} est affiche")
|
||||
|
||||
assert "technical_english" in _issue_codes(message)
|
||||
|
||||
|
||||
def test_refuses_raw_english_instruction():
|
||||
message = _raw_pause(demande="please click the target button")
|
||||
|
||||
codes = _issue_codes(message)
|
||||
|
||||
assert "technical_english" in codes
|
||||
assert "not_actionable" in codes
|
||||
|
||||
|
||||
def test_refuses_messages_without_four_required_lines():
|
||||
result = validate_supervised_pause_message("Je ne trouve pas le dossier patient.")
|
||||
|
||||
assert not result.valid
|
||||
assert "invalid_structure" in {issue.code for issue in result.issues}
|
||||
|
||||
|
||||
def test_refuses_wrong_label_order():
|
||||
message = "\n".join(
|
||||
[
|
||||
"J'attendais : voir la fiche patient",
|
||||
"J'essaie de : ouvrir le dossier patient",
|
||||
"Je vois : la page d'accueil",
|
||||
"Peux-tu : ouvrir le dossier puis me rendre la main",
|
||||
]
|
||||
)
|
||||
|
||||
assert "invalid_structure" in _issue_codes(message)
|
||||
|
||||
|
||||
def test_demande_must_be_actionable_in_french():
|
||||
message = "\n".join(
|
||||
[
|
||||
"J'essaie de : ouvrir le dossier patient",
|
||||
"J'attendais : voir la fiche patient ouverte",
|
||||
"Je vois : la page d'accueil Aiva Urgence",
|
||||
"Peux-tu : merci beaucoup",
|
||||
]
|
||||
)
|
||||
|
||||
assert "not_actionable" in _issue_codes(message)
|
||||
|
||||
|
||||
def test_visible_message_validator_accepts_clear_french_actionable_text():
|
||||
message = (
|
||||
"Je ne trouve pas le dossier patient dans Aiva Urgence. "
|
||||
"Peux-tu ouvrir le dossier puis me rendre la main ?"
|
||||
)
|
||||
|
||||
assert validate_visible_message(message).valid
|
||||
|
||||
|
||||
def test_formatter_raises_instead_of_emitting_generic_message():
|
||||
with pytest.raises(MessageContractError):
|
||||
format_supervised_pause_message(
|
||||
intention="faire cette action",
|
||||
attendu="validation requise",
|
||||
vu="un element",
|
||||
demande="corriger",
|
||||
)
|
||||
|
||||
|
||||
def test_formatter_raises_on_too_short_request():
|
||||
with pytest.raises(MessageContractError):
|
||||
format_supervised_pause_message(
|
||||
intention="ouvrir le dossier patient dans Aiva Urgence",
|
||||
attendu="voir la fiche du patient ouverte",
|
||||
vu="la page d'accueil Aiva Urgence",
|
||||
demande="corriger",
|
||||
)
|
||||
|
||||
|
||||
def test_coerce_turns_legacy_validation_required_into_structured_pause():
|
||||
message = coerce_supervised_pause_message("Validation requise")
|
||||
|
||||
assert validate_supervised_pause_message(message).valid
|
||||
assert "Validation requise" not in message
|
||||
assert message.splitlines()[0].startswith("J'essaie de :")
|
||||
|
||||
|
||||
def test_coerce_keeps_clear_legacy_request_as_demande():
|
||||
message = coerce_supervised_pause_message(
|
||||
"Valider le dossier patient avant enregistrement",
|
||||
intention="enregistrer le dossier patient",
|
||||
attendu="avoir ton accord avant l'enregistrement",
|
||||
vu="le formulaire patient est pret a etre enregistre",
|
||||
)
|
||||
|
||||
assert validate_supervised_pause_message(message).valid
|
||||
assert "Valider le dossier patient avant enregistrement" in message
|
||||
|
||||
|
||||
def test_warn_visible_message_logs_without_modifying_message(caplog):
|
||||
raw = "Validation requise"
|
||||
|
||||
returned = warn_visible_message(raw, source="unit.raw")
|
||||
|
||||
assert returned == raw
|
||||
assert "invalid_message source=unit.raw" in caplog.text
|
||||
assert "generic_phrase" in caplog.text
|
||||
|
||||
|
||||
def test_warn_visible_message_accepts_supervised_pause_without_log(caplog):
|
||||
message = _valid_pause()
|
||||
|
||||
returned = warn_visible_message(
|
||||
message,
|
||||
source="unit.final",
|
||||
supervised_pause=True,
|
||||
)
|
||||
|
||||
assert returned == message
|
||||
assert "invalid_message" not in caplog.text
|
||||
|
||||
|
||||
def test_refuses_overlong_fields_and_messages():
|
||||
long_field = "ouvrir " + ("le dossier patient " * 45)
|
||||
assert len(long_field) > MAX_FIELD_CHARS
|
||||
|
||||
message = "\n".join(
|
||||
[
|
||||
f"J'essaie de : {long_field}",
|
||||
"J'attendais : voir la fiche patient ouverte",
|
||||
"Je vois : la page d'accueil Aiva Urgence",
|
||||
"Peux-tu : ouvrir le dossier patient puis me rendre la main",
|
||||
]
|
||||
)
|
||||
|
||||
codes = _issue_codes(message)
|
||||
|
||||
assert "field_too_long" in codes
|
||||
assert "message_too_long" in codes
|
||||
Reference in New Issue
Block a user