485 lines
14 KiB
Python
485 lines
14 KiB
Python
"""Contrat de lisibilite des messages visibles par l'humain.
|
|
|
|
Ce module ne branche encore aucun point runtime. Il fournit une brique pure et
|
|
testable pour que les sorties UI de Lea puissent refuser les messages trop
|
|
generiques ou trop techniques avant affichage.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import re
|
|
import unicodedata
|
|
from dataclasses import dataclass
|
|
from typing import Iterable, Mapping
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
SUPERVISED_PAUSE_LABELS = (
|
|
"J'essaie de",
|
|
"J'attendais",
|
|
"Je vois",
|
|
"Peux-tu",
|
|
)
|
|
|
|
MAX_VISIBLE_MESSAGE_CHARS = 720
|
|
MAX_FIELD_CHARS = 180
|
|
MIN_FIELD_CHARS = 4
|
|
|
|
_GENERIC_PHRASES = (
|
|
"un element",
|
|
"un élément",
|
|
"l'element",
|
|
"l'élément",
|
|
"element inconnu",
|
|
"élément inconnu",
|
|
"cette action",
|
|
"cette cible",
|
|
"cible inconnue",
|
|
"validation requise",
|
|
"action requise",
|
|
)
|
|
|
|
_ACTIONABLE_FRENCH_HINTS = (
|
|
"peux-tu",
|
|
"cliquer",
|
|
"ouvrir",
|
|
"selectionner",
|
|
"sélectionner",
|
|
"choisir",
|
|
"saisir",
|
|
"corriger",
|
|
"montrer",
|
|
"indiquer",
|
|
"valider",
|
|
"fermer",
|
|
"placer",
|
|
"mettre",
|
|
"reprendre",
|
|
)
|
|
|
|
_TECHNICAL_ENGLISH_TERMS = (
|
|
"target_not_found",
|
|
"target not found",
|
|
"no_screen_change",
|
|
"no screen change",
|
|
"wrong_window",
|
|
"wrong window",
|
|
"validation required",
|
|
"retry",
|
|
"fallback",
|
|
"timeout",
|
|
"screenshot",
|
|
"validator",
|
|
"failure",
|
|
"failed",
|
|
"resolve target",
|
|
"postcondition",
|
|
"please",
|
|
"click",
|
|
"button",
|
|
"target",
|
|
"expected",
|
|
"actual",
|
|
"observed",
|
|
)
|
|
|
|
_TECHNICAL_FIELD_RE = re.compile(
|
|
r"\b(?:"
|
|
r"action_id|replay_id|session_id|workflow_id|machine_id|target_spec|"
|
|
r"vlm_description|resolution_method|resolution_score|retry_count|"
|
|
r"x_pct|y_pct|screenshot_b64|expected_window_title|current_action_index"
|
|
r")\b",
|
|
re.IGNORECASE,
|
|
)
|
|
_TECHNICAL_IDENTIFIER_RE = re.compile(
|
|
r"\b(?:action|replay|session|sess|workflow|node|edge|target|retry|"
|
|
r"precheck|wait|trace|event|machine|run)_[A-Za-z0-9][A-Za-z0-9_.:-]{3,}\b"
|
|
)
|
|
_UUID_RE = re.compile(
|
|
r"\b[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}\b",
|
|
re.IGNORECASE,
|
|
)
|
|
_LONG_HEX_RE = re.compile(r"\b[0-9a-f]{16,}\b", re.IGNORECASE)
|
|
_PIXEL_TUPLE_RE = re.compile(r"\(\s*\d{2,5}\s*,\s*\d{2,5}\s*\)")
|
|
_PIXEL_FIELD_RE = re.compile(
|
|
r"\b(?:x|y|left|top|width|height|w|h|x_pct|y_pct)\s*[=:]\s*-?\d+(?:[.,]\d+)?",
|
|
re.IGNORECASE,
|
|
)
|
|
_PX_RE = re.compile(r"\b\d{2,5}\s*px\b", re.IGNORECASE)
|
|
_SCORE_RE = re.compile(
|
|
r"\b(?:score|confidence|confiance|similarit[eé]|threshold|seuil|"
|
|
r"probabilit[eé])\s*[:=]\s*\d+(?:[.,]\d+)?%?\b",
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
@dataclass(frozen=True)
|
|
class MessageValidationIssue:
|
|
"""Un probleme detecte dans un message visible par l'humain."""
|
|
|
|
code: str
|
|
detail: str
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class MessageValidationResult:
|
|
"""Resultat de validation d'un message utilisateur."""
|
|
|
|
issues: tuple[MessageValidationIssue, ...] = ()
|
|
|
|
@property
|
|
def valid(self) -> bool:
|
|
return not self.issues
|
|
|
|
def raise_for_errors(self) -> None:
|
|
if not self.valid:
|
|
raise MessageContractError(self)
|
|
|
|
|
|
class MessageContractError(ValueError):
|
|
"""Erreur levee quand un message ne respecte pas le contrat humain."""
|
|
|
|
def __init__(self, result: MessageValidationResult):
|
|
self.result = result
|
|
details = "; ".join(f"{issue.code}: {issue.detail}" for issue in result.issues)
|
|
super().__init__(f"Message humain invalide: {details}")
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class SupervisedPauseFields:
|
|
"""Champs obligatoires pour expliquer une pause supervisee."""
|
|
|
|
intention: str
|
|
attendu: str
|
|
vu: str
|
|
demande: str
|
|
|
|
|
|
DEFAULT_SUPERVISED_PAUSE_FIELDS = SupervisedPauseFields(
|
|
intention="continuer une etape supervisee",
|
|
attendu="un accord humain clair avant de continuer",
|
|
vu="je suis sur une etape qui demande une verification humaine",
|
|
demande="indiquer si je peux continuer ou corriger l'action attendue",
|
|
)
|
|
|
|
|
|
def format_supervised_pause_message(
|
|
*,
|
|
intention: str,
|
|
attendu: str,
|
|
vu: str,
|
|
demande: str,
|
|
) -> str:
|
|
"""Formatter une pause supervisee claire et actionnable.
|
|
|
|
Le message retourne exactement quatre lignes. Si un champ reste vague ou
|
|
technique, la fonction leve ``MessageContractError`` au lieu de produire un
|
|
message degradant pour l'utilisateur.
|
|
"""
|
|
|
|
fields = SupervisedPauseFields(
|
|
intention=_one_line(intention),
|
|
attendu=_one_line(attendu),
|
|
vu=_one_line(vu),
|
|
demande=_one_line(demande),
|
|
)
|
|
message = "\n".join(
|
|
(
|
|
f"J'essaie de : {fields.intention}",
|
|
f"J'attendais : {fields.attendu}",
|
|
f"Je vois : {fields.vu}",
|
|
f"Peux-tu : {fields.demande}",
|
|
)
|
|
)
|
|
validate_supervised_pause_message(message).raise_for_errors()
|
|
return message
|
|
|
|
|
|
def format_supervised_pause_from_mapping(payload: Mapping[str, object]) -> str:
|
|
"""Formatter depuis un mapping runtime avec noms de champs explicites.
|
|
|
|
Alias acceptes pour faciliter l'integration progressive:
|
|
``intention|trying_to``, ``attendu|expected``, ``vu|observed``,
|
|
``demande|request``.
|
|
"""
|
|
|
|
return format_supervised_pause_message(
|
|
intention=_mapping_text(payload, "intention", "trying_to"),
|
|
attendu=_mapping_text(payload, "attendu", "expected"),
|
|
vu=_mapping_text(payload, "vu", "observed"),
|
|
demande=_mapping_text(payload, "demande", "request"),
|
|
)
|
|
|
|
|
|
def coerce_supervised_pause_message(
|
|
message: object = "",
|
|
*,
|
|
intention: object = "",
|
|
attendu: object = "",
|
|
vu: object = "",
|
|
demande: object = "",
|
|
) -> str:
|
|
"""Retourner une pause supervisee valide, meme depuis un ancien message.
|
|
|
|
Si ``message`` respecte deja le contrat strict, il est conserve. Sinon on
|
|
compose les quatre champs avec les valeurs explicites disponibles. Les
|
|
valeurs trop vagues ou techniques sont remplacees par des fallbacks clairs.
|
|
"""
|
|
|
|
raw_message = _one_line(message)
|
|
if raw_message and validate_supervised_pause_message(raw_message).valid:
|
|
return raw_message
|
|
|
|
defaults = DEFAULT_SUPERVISED_PAUSE_FIELDS
|
|
candidates = SupervisedPauseFields(
|
|
intention=_safe_field_text(intention, defaults.intention),
|
|
attendu=_safe_field_text(attendu, defaults.attendu),
|
|
vu=_safe_field_text(vu, defaults.vu),
|
|
demande=_safe_field_text(demande or raw_message, defaults.demande),
|
|
)
|
|
|
|
try:
|
|
return format_supervised_pause_message(
|
|
intention=candidates.intention,
|
|
attendu=candidates.attendu,
|
|
vu=candidates.vu,
|
|
demande=candidates.demande,
|
|
)
|
|
except MessageContractError:
|
|
return format_supervised_pause_message(
|
|
intention=defaults.intention,
|
|
attendu=defaults.attendu,
|
|
vu=defaults.vu,
|
|
demande=defaults.demande,
|
|
)
|
|
|
|
|
|
def warn_visible_message(
|
|
message: object,
|
|
*,
|
|
source: str,
|
|
supervised_pause: bool = False,
|
|
) -> str:
|
|
"""Log contract violations without modifying the visible message."""
|
|
|
|
text = str(message or "")
|
|
validator = validate_supervised_pause_message if supervised_pause else validate_visible_message
|
|
result = validator(text)
|
|
if not result.valid:
|
|
logger.warning(
|
|
"[message_contract] invalid_message source=%s codes=%s",
|
|
source,
|
|
[issue.code for issue in result.issues],
|
|
)
|
|
return text
|
|
|
|
|
|
def validate_supervised_pause_message(message: str) -> MessageValidationResult:
|
|
"""Valider le contrat strict d'une pause supervisee."""
|
|
|
|
issues = list(validate_visible_message(message).issues)
|
|
fields, structure_issues = _parse_supervised_pause(message)
|
|
issues.extend(structure_issues)
|
|
|
|
if fields:
|
|
for name, value in fields.items():
|
|
if len(value) < MIN_FIELD_CHARS:
|
|
issues.append(
|
|
MessageValidationIssue(
|
|
"field_too_short",
|
|
f"{name} doit etre explicite",
|
|
)
|
|
)
|
|
if len(value) > MAX_FIELD_CHARS:
|
|
issues.append(
|
|
MessageValidationIssue(
|
|
"field_too_long",
|
|
f"{name} depasse {MAX_FIELD_CHARS} caracteres",
|
|
)
|
|
)
|
|
demande = fields.get("demande", "")
|
|
if not _contains_actionable_french(demande) or len(demande.split()) < 4:
|
|
issues.append(
|
|
MessageValidationIssue(
|
|
"not_actionable",
|
|
"la demande doit contenir une action concrete en francais",
|
|
)
|
|
)
|
|
|
|
return _dedupe_issues(issues)
|
|
|
|
|
|
def validate_visible_message(message: str) -> MessageValidationResult:
|
|
"""Valider qu'un message visible n'est ni generique ni technique."""
|
|
|
|
text = str(message or "").strip()
|
|
issues: list[MessageValidationIssue] = []
|
|
|
|
if not text:
|
|
return MessageValidationResult(
|
|
(MessageValidationIssue("empty_message", "message vide"),)
|
|
)
|
|
|
|
if len(text) > MAX_VISIBLE_MESSAGE_CHARS:
|
|
issues.append(
|
|
MessageValidationIssue(
|
|
"message_too_long",
|
|
f"message au-dela de {MAX_VISIBLE_MESSAGE_CHARS} caracteres",
|
|
)
|
|
)
|
|
|
|
folded = _fold(text)
|
|
seen_generic_phrases: set[str] = set()
|
|
for phrase in _GENERIC_PHRASES:
|
|
folded_phrase = _fold(phrase)
|
|
if folded_phrase in seen_generic_phrases:
|
|
continue
|
|
seen_generic_phrases.add(folded_phrase)
|
|
if folded_phrase in folded:
|
|
issues.append(
|
|
MessageValidationIssue(
|
|
"generic_phrase",
|
|
f"formulation trop generique: {phrase}",
|
|
)
|
|
)
|
|
|
|
for term in _TECHNICAL_ENGLISH_TERMS:
|
|
if _fold(term) in folded:
|
|
issues.append(
|
|
MessageValidationIssue(
|
|
"technical_english",
|
|
f"anglais technique visible: {term}",
|
|
)
|
|
)
|
|
|
|
for code, pattern, detail in (
|
|
("technical_field", _TECHNICAL_FIELD_RE, "champ technique brut"),
|
|
("technical_identifier", _TECHNICAL_IDENTIFIER_RE, "identifiant technique brut"),
|
|
("technical_identifier", _UUID_RE, "UUID brut"),
|
|
("technical_identifier", _LONG_HEX_RE, "hash technique brut"),
|
|
("raw_coordinates", _PIXEL_TUPLE_RE, "coordonnees pixel brutes"),
|
|
("raw_coordinates", _PIXEL_FIELD_RE, "coordonnees techniques brutes"),
|
|
("raw_coordinates", _PX_RE, "coordonnees pixel brutes"),
|
|
("raw_score", _SCORE_RE, "score ou confiance brut"),
|
|
):
|
|
if pattern.search(text):
|
|
issues.append(MessageValidationIssue(code, detail))
|
|
|
|
return _dedupe_issues(issues)
|
|
|
|
|
|
def is_valid_visible_message(message: str) -> bool:
|
|
"""Raccourci booleen pour les points d'integration UI."""
|
|
|
|
return validate_visible_message(message).valid
|
|
|
|
|
|
def is_valid_supervised_pause_message(message: str) -> bool:
|
|
"""Raccourci booleen pour les pauses supervisees."""
|
|
|
|
return validate_supervised_pause_message(message).valid
|
|
|
|
|
|
def _parse_supervised_pause(
|
|
message: str,
|
|
) -> tuple[dict[str, str], list[MessageValidationIssue]]:
|
|
lines = [line.rstrip() for line in str(message or "").splitlines() if line.strip()]
|
|
issues: list[MessageValidationIssue] = []
|
|
|
|
if len(lines) != 4:
|
|
issues.append(
|
|
MessageValidationIssue(
|
|
"invalid_structure",
|
|
"une pause supervisee doit contenir exactement 4 lignes",
|
|
)
|
|
)
|
|
return {}, issues
|
|
|
|
specs = (
|
|
("intention", r"^J'essaie de\s*:\s*(.+)$"),
|
|
("attendu", r"^J'attendais\s*:\s*(.+)$"),
|
|
("vu", r"^Je vois\s*:\s*(.+)$"),
|
|
("demande", r"^Peux-tu\s*:\s*(.+)$"),
|
|
)
|
|
fields: dict[str, str] = {}
|
|
for line, (name, pattern) in zip(lines, specs):
|
|
match = re.match(pattern, line)
|
|
if not match:
|
|
issues.append(
|
|
MessageValidationIssue(
|
|
"invalid_structure",
|
|
f"ligne {len(fields) + 1} doit commencer par {SUPERVISED_PAUSE_LABELS[len(fields)]}",
|
|
)
|
|
)
|
|
continue
|
|
fields[name] = match.group(1).strip()
|
|
|
|
if len(fields) != 4:
|
|
return {}, issues
|
|
|
|
return fields, issues
|
|
|
|
|
|
def _contains_actionable_french(text: str) -> bool:
|
|
folded = _fold(text)
|
|
return any(_fold(hint) in folded for hint in _ACTIONABLE_FRENCH_HINTS)
|
|
|
|
|
|
def _one_line(value: object) -> str:
|
|
return re.sub(r"\s+", " ", str(value or "")).strip()
|
|
|
|
|
|
def _mapping_text(payload: Mapping[str, object], *keys: str) -> str:
|
|
for key in keys:
|
|
value = payload.get(key)
|
|
if value is not None:
|
|
return str(value)
|
|
return ""
|
|
|
|
|
|
def _safe_field_text(value: object, fallback: str) -> str:
|
|
text = _one_line(value)
|
|
if len(text) < MIN_FIELD_CHARS or len(text) > MAX_FIELD_CHARS:
|
|
return fallback
|
|
if not validate_visible_message(text).valid:
|
|
return fallback
|
|
return text
|
|
|
|
|
|
def _fold(text: str) -> str:
|
|
normalized = unicodedata.normalize("NFKD", str(text or ""))
|
|
ascii_text = "".join(ch for ch in normalized if not unicodedata.combining(ch))
|
|
return ascii_text.casefold()
|
|
|
|
|
|
def _dedupe_issues(issues: Iterable[MessageValidationIssue]) -> MessageValidationResult:
|
|
seen: set[tuple[str, str]] = set()
|
|
deduped: list[MessageValidationIssue] = []
|
|
for issue in issues:
|
|
key = (issue.code, issue.detail)
|
|
if key in seen:
|
|
continue
|
|
seen.add(key)
|
|
deduped.append(issue)
|
|
return MessageValidationResult(tuple(deduped))
|
|
|
|
|
|
__all__ = [
|
|
"MAX_FIELD_CHARS",
|
|
"MAX_VISIBLE_MESSAGE_CHARS",
|
|
"MessageContractError",
|
|
"MessageValidationIssue",
|
|
"MessageValidationResult",
|
|
"SUPERVISED_PAUSE_LABELS",
|
|
"SupervisedPauseFields",
|
|
"coerce_supervised_pause_message",
|
|
"format_supervised_pause_from_mapping",
|
|
"format_supervised_pause_message",
|
|
"is_valid_supervised_pause_message",
|
|
"is_valid_visible_message",
|
|
"validate_supervised_pause_message",
|
|
"validate_visible_message",
|
|
"warn_visible_message",
|
|
]
|