Nouveaux patterns : - dialog_overwrite : "voulez-vous remplacer/écraser", "fichier existe déjà" → Oui - dialog_dont_save : "ne pas enregistrer", "quitter sans enregistrer" → Ne pas enregistrer Handler amélioré (handle_detected_pattern) : - EasyOCR au lieu de docTR (meilleure lecture des boutons GUI) - Match par inclusion (pas seulement exact) - Suppression fallback VLM (Ollama n'a plus de VRAM) - Prints visibles pour diagnostic 28 patterns au total, testés sur 6 dialogues types. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
524 lines
18 KiB
Python
524 lines
18 KiB
Python
"""
|
|
Base de connaissances des patterns d'interface utilisateur.
|
|
|
|
Donne à Léa des "réflexes natifs" : quand elle reconnaît un pattern UI
|
|
connu (dialogue OK/Annuler, menu, barre d'outils), elle sait immédiatement
|
|
quoi faire sans avoir besoin de l'apprendre par observation.
|
|
|
|
Sources :
|
|
- GUI-R1 dataset (3K exemples annotés, ritzzai/GUI-R1)
|
|
- Patterns Windows/Linux courants
|
|
- Conventions UI universelles
|
|
|
|
Utilisation :
|
|
from core.knowledge.ui_patterns import UIPatternLibrary
|
|
lib = UIPatternLibrary()
|
|
match = lib.find_pattern("Voulez-vous enregistrer ?")
|
|
# → {'action': 'click', 'target': 'Enregistrer', 'zone': 'dialog_center', ...}
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class UIPattern:
|
|
"""Un pattern d'interface connu."""
|
|
name: str
|
|
category: str
|
|
triggers: List[str]
|
|
action: str
|
|
target: str
|
|
typical_zone: str
|
|
typical_bbox: Optional[List[float]] = None
|
|
os: str = "any"
|
|
confidence: float = 0.9
|
|
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
|
|
|
|
# Patterns Windows natifs — réflexes de base
|
|
BUILTIN_PATTERNS: List[Dict[str, Any]] = [
|
|
# === DIALOGUES DE CONFIRMATION ===
|
|
{
|
|
"name": "dialog_save",
|
|
"category": "dialog",
|
|
"triggers": [
|
|
"voulez-vous enregistrer", "do you want to save",
|
|
"save changes", "enregistrer les modifications",
|
|
"enregistrer sous", "save as",
|
|
"sauvegarder", "unsaved changes",
|
|
],
|
|
"action": "click",
|
|
"target": "Enregistrer",
|
|
"alternatives": ["Save", "Oui", "Yes"],
|
|
"typical_zone": "dialog_center",
|
|
"typical_bbox": [0.35, 0.55, 0.50, 0.65],
|
|
"os": "any",
|
|
},
|
|
{
|
|
"name": "dialog_cancel",
|
|
"category": "dialog",
|
|
"triggers": [
|
|
"annuler", "cancel", "abandonner", "discard",
|
|
],
|
|
"action": "click",
|
|
"target": "Annuler",
|
|
"alternatives": ["Cancel", "Non", "No"],
|
|
"typical_zone": "dialog_center",
|
|
"typical_bbox": [0.50, 0.55, 0.65, 0.65],
|
|
"os": "any",
|
|
},
|
|
{
|
|
"name": "dialog_ok",
|
|
"category": "dialog",
|
|
"triggers": [
|
|
"ok", "d'accord", "compris", "information",
|
|
"erreur", "error", "warning", "avertissement",
|
|
],
|
|
"action": "click",
|
|
"target": "OK",
|
|
"alternatives": ["Fermer", "Close", "Compris"],
|
|
"typical_zone": "dialog_center",
|
|
"typical_bbox": [0.45, 0.60, 0.55, 0.70],
|
|
"os": "any",
|
|
},
|
|
{
|
|
"name": "dialog_yes_no",
|
|
"category": "dialog",
|
|
"triggers": [
|
|
"êtes-vous sûr", "are you sure", "confirmer",
|
|
"confirm", "supprimer", "delete",
|
|
],
|
|
"action": "click",
|
|
"target": "Oui",
|
|
"alternatives": ["Yes", "Confirmer", "Confirm"],
|
|
"typical_zone": "dialog_center",
|
|
"typical_bbox": [0.35, 0.60, 0.45, 0.68],
|
|
"os": "any",
|
|
},
|
|
{
|
|
"name": "dialog_overwrite",
|
|
"category": "dialog",
|
|
"triggers": [
|
|
"voulez-vous remplacer", "voulez-vous écraser",
|
|
"remplacer le fichier", "replace existing",
|
|
"fichier existe déjà", "already exists",
|
|
"overwrite", "écraser",
|
|
],
|
|
"action": "click",
|
|
"target": "Oui",
|
|
"alternatives": ["Yes", "Remplacer", "Replace", "Confirmer"],
|
|
"typical_zone": "dialog_center",
|
|
"os": "any",
|
|
},
|
|
{
|
|
"name": "dialog_dont_save",
|
|
"category": "dialog",
|
|
"triggers": [
|
|
"ne pas enregistrer", "don't save",
|
|
"ne pas sauvegarder", "quitter sans enregistrer",
|
|
"discard changes",
|
|
],
|
|
"action": "click",
|
|
"target": "Ne pas enregistrer",
|
|
"alternatives": ["Don't Save", "Ne pas sauvegarder", "Non"],
|
|
"typical_zone": "dialog_center",
|
|
"os": "any",
|
|
},
|
|
|
|
# === NAVIGATION FENÊTRE ===
|
|
{
|
|
"name": "window_close",
|
|
"category": "window",
|
|
"triggers": ["fermer la fenêtre", "close window"],
|
|
"action": "click",
|
|
"target": "X",
|
|
"typical_zone": "titlebar",
|
|
"typical_bbox": [0.96, 0.0, 1.0, 0.04],
|
|
"os": "windows",
|
|
},
|
|
{
|
|
"name": "window_minimize",
|
|
"category": "window",
|
|
"triggers": ["minimiser", "minimize"],
|
|
"action": "click",
|
|
"target": "_",
|
|
"typical_zone": "titlebar",
|
|
"typical_bbox": [0.90, 0.0, 0.94, 0.04],
|
|
"os": "windows",
|
|
},
|
|
{
|
|
"name": "window_maximize",
|
|
"category": "window",
|
|
"triggers": ["maximiser", "maximize", "agrandir"],
|
|
"action": "click",
|
|
"target": "□",
|
|
"typical_zone": "titlebar",
|
|
"typical_bbox": [0.94, 0.0, 0.96, 0.04],
|
|
"os": "windows",
|
|
},
|
|
|
|
# === MENUS ===
|
|
{
|
|
"name": "menu_file",
|
|
"category": "menu",
|
|
"triggers": ["menu fichier", "menu file", "ouvrir fichier", "open file"],
|
|
"action": "click",
|
|
"target": "Fichier",
|
|
"alternatives": ["File"],
|
|
"typical_zone": "menu_toolbar",
|
|
"typical_bbox": [0.0, 0.03, 0.06, 0.06],
|
|
"os": "any",
|
|
},
|
|
{
|
|
"name": "menu_edit",
|
|
"category": "menu",
|
|
"triggers": ["édition", "edit", "modifier"],
|
|
"action": "click",
|
|
"target": "Édition",
|
|
"alternatives": ["Edit"],
|
|
"typical_zone": "menu_toolbar",
|
|
"typical_bbox": [0.06, 0.03, 0.12, 0.06],
|
|
"os": "any",
|
|
},
|
|
|
|
# === FORMULAIRES ===
|
|
{
|
|
"name": "form_submit",
|
|
"category": "form",
|
|
"triggers": [
|
|
"valider", "submit", "envoyer", "send",
|
|
"connexion", "login", "se connecter", "sign in",
|
|
],
|
|
"action": "click",
|
|
"target": "Valider",
|
|
"alternatives": ["Submit", "Envoyer", "Connexion", "Login", "OK"],
|
|
"typical_zone": "content",
|
|
"typical_bbox": [0.35, 0.70, 0.65, 0.80],
|
|
"os": "any",
|
|
},
|
|
{
|
|
"name": "form_search",
|
|
"category": "form",
|
|
"triggers": ["rechercher", "search", "chercher", "find"],
|
|
"action": "click",
|
|
"target": "Rechercher",
|
|
"alternatives": ["Search", "🔍", "Go"],
|
|
"typical_zone": "menu_toolbar",
|
|
"typical_bbox": [0.30, 0.03, 0.70, 0.06],
|
|
"os": "any",
|
|
},
|
|
|
|
# === NAVIGATION WEB ===
|
|
{
|
|
"name": "cookie_accept",
|
|
"category": "popup",
|
|
"triggers": [
|
|
"accepter les cookies", "accept cookies",
|
|
"utilise des cookies", "uses cookies",
|
|
"j'accepte", "accept all", "tout accepter",
|
|
"consent", "consentement",
|
|
],
|
|
"action": "click",
|
|
"target": "Accepter",
|
|
"alternatives": ["Accept", "Accept All", "Tout accepter", "J'accepte"],
|
|
"typical_zone": "content",
|
|
"typical_bbox": [0.30, 0.80, 0.70, 0.90],
|
|
"os": "any",
|
|
},
|
|
|
|
# === RACCOURCIS UNIVERSELS ===
|
|
{
|
|
"name": "shortcut_save",
|
|
"category": "shortcut",
|
|
"triggers": ["sauvegarder", "enregistrer", "save"],
|
|
"action": "hotkey",
|
|
"target": "ctrl+s",
|
|
"typical_zone": "keyboard",
|
|
"os": "any",
|
|
},
|
|
{
|
|
"name": "shortcut_undo",
|
|
"category": "shortcut",
|
|
"triggers": ["annuler action", "undo", "défaire"],
|
|
"action": "hotkey",
|
|
"target": "ctrl+z",
|
|
"typical_zone": "keyboard",
|
|
"os": "any",
|
|
},
|
|
{
|
|
"name": "shortcut_copy",
|
|
"category": "shortcut",
|
|
"triggers": ["copier", "copy"],
|
|
"action": "hotkey",
|
|
"target": "ctrl+c",
|
|
"typical_zone": "keyboard",
|
|
"os": "any",
|
|
},
|
|
{
|
|
"name": "shortcut_paste",
|
|
"category": "shortcut",
|
|
"triggers": ["coller", "paste"],
|
|
"action": "hotkey",
|
|
"target": "ctrl+v",
|
|
"typical_zone": "keyboard",
|
|
"os": "any",
|
|
},
|
|
]
|
|
|
|
|
|
class UIPatternLibrary:
|
|
"""Bibliothèque de patterns UI connus.
|
|
|
|
Fournit des "réflexes natifs" à Léa : quand un pattern
|
|
est reconnu dans le texte OCR ou le contexte visuel,
|
|
elle sait immédiatement quoi faire.
|
|
"""
|
|
|
|
# Chemins par défaut des fichiers de patterns additionnels
|
|
_PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent
|
|
_GUI_R1_PATTERNS_PATH = _PROJECT_ROOT / "data" / "gui_r1_ui_patterns.json"
|
|
_LEARNED_PATTERNS_PATH = _PROJECT_ROOT / "data" / "learned_patterns.json"
|
|
|
|
def __init__(self, extra_patterns_path: Optional[str] = None):
|
|
self._patterns: List[UIPattern] = []
|
|
self._load_builtin()
|
|
|
|
# Charger les patterns extraits de GUI-R1 (statiques, générés une fois)
|
|
self._load_from_file(str(self._GUI_R1_PATTERNS_PATH))
|
|
|
|
# Charger les patterns appris par observation Shadow (dynamiques)
|
|
self._load_from_file(str(self._LEARNED_PATTERNS_PATH))
|
|
|
|
# Fichier custom fourni explicitement
|
|
if extra_patterns_path:
|
|
self._load_from_file(extra_patterns_path)
|
|
|
|
logger.info(f"UIPatternLibrary: {len(self._patterns)} patterns chargés")
|
|
|
|
def _load_builtin(self):
|
|
for p in BUILTIN_PATTERNS:
|
|
self._patterns.append(UIPattern(
|
|
name=p["name"],
|
|
category=p["category"],
|
|
triggers=p["triggers"],
|
|
action=p["action"],
|
|
target=p["target"],
|
|
typical_zone=p.get("typical_zone", "content"),
|
|
typical_bbox=p.get("typical_bbox"),
|
|
os=p.get("os", "any"),
|
|
metadata={
|
|
"alternatives": p.get("alternatives", []),
|
|
"source": "builtin",
|
|
},
|
|
))
|
|
|
|
def _load_from_file(self, path: str):
|
|
filepath = Path(path)
|
|
if not filepath.exists():
|
|
logger.debug(f"Fichier patterns non trouvé (OK si premier lancement): {path}")
|
|
return
|
|
try:
|
|
with open(filepath) as f:
|
|
data = json.load(f)
|
|
for p in data.get("patterns", []):
|
|
# Construire metadata en incluant source/learned_at/gui_r1_id si présents
|
|
meta = dict(p.get("metadata", {}))
|
|
if "source" in p:
|
|
meta["source"] = p["source"]
|
|
if "learned_at" in p:
|
|
meta["learned_at"] = p["learned_at"]
|
|
if "gui_r1_id" in p:
|
|
meta["gui_r1_id"] = p["gui_r1_id"]
|
|
self._patterns.append(UIPattern(
|
|
name=p["name"],
|
|
category=p.get("category", "custom"),
|
|
triggers=p.get("triggers", []),
|
|
action=p.get("action", "click"),
|
|
target=p.get("target", ""),
|
|
typical_zone=p.get("typical_zone", "content"),
|
|
typical_bbox=p.get("typical_bbox"),
|
|
os=p.get("os", "any"),
|
|
confidence=p.get("confidence", 0.9),
|
|
metadata=meta,
|
|
))
|
|
logger.info(f"Chargé {len(data.get('patterns', []))} patterns depuis {path}")
|
|
except Exception as e:
|
|
logger.error(f"Erreur chargement patterns: {e}")
|
|
|
|
def find_pattern(
|
|
self,
|
|
text: str,
|
|
os_filter: Optional[str] = None,
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""Cherche un pattern UI dans du texte (OCR, titre fenêtre, etc.).
|
|
|
|
Args:
|
|
text: Texte à analyser (peut contenir du bruit OCR)
|
|
os_filter: Filtrer par OS ("windows", "linux", None=tous)
|
|
|
|
Returns:
|
|
Dict avec action, target, confidence, etc. ou None
|
|
"""
|
|
text_lower = text.lower()
|
|
best_match = None
|
|
best_score = 0
|
|
|
|
for pattern in self._patterns:
|
|
if os_filter and pattern.os not in ("any", os_filter):
|
|
continue
|
|
|
|
score = 0
|
|
matched_trigger = None
|
|
for trigger in pattern.triggers:
|
|
if len(trigger) <= 3:
|
|
import re
|
|
if re.search(r'\b' + re.escape(trigger) + r'\b', text_lower):
|
|
trigger_score = len(trigger) / max(len(text_lower), 1)
|
|
if trigger_score > score:
|
|
score = trigger_score
|
|
matched_trigger = trigger
|
|
elif trigger in text_lower:
|
|
trigger_score = len(trigger) / max(len(text_lower), 1)
|
|
if trigger_score > score:
|
|
score = trigger_score
|
|
matched_trigger = trigger
|
|
|
|
if score > best_score and matched_trigger is not None:
|
|
best_score = score
|
|
best_match = {
|
|
"pattern": pattern.name,
|
|
"category": pattern.category,
|
|
"action": pattern.action,
|
|
"target": pattern.target,
|
|
"alternatives": pattern.metadata.get("alternatives", []),
|
|
"typical_zone": pattern.typical_zone,
|
|
"typical_bbox": pattern.typical_bbox,
|
|
"confidence": min(pattern.confidence * (1 + score), 1.0),
|
|
"matched_trigger": matched_trigger,
|
|
"os": pattern.os,
|
|
}
|
|
|
|
return best_match
|
|
|
|
def find_by_category(self, category: str) -> List[Dict[str, Any]]:
|
|
"""Retourne tous les patterns d'une catégorie."""
|
|
return [
|
|
{
|
|
"name": p.name,
|
|
"action": p.action,
|
|
"target": p.target,
|
|
"triggers": p.triggers,
|
|
"typical_zone": p.typical_zone,
|
|
}
|
|
for p in self._patterns
|
|
if p.category == category
|
|
]
|
|
|
|
def get_dialog_handler(self, dialog_text: str) -> Optional[Dict[str, Any]]:
|
|
"""Raccourci : cherche un pattern de dialogue."""
|
|
match = self.find_pattern(dialog_text)
|
|
if match and match["category"] == "dialog":
|
|
return match
|
|
return self.find_pattern(dialog_text)
|
|
|
|
def add_pattern(self, pattern_dict: Dict[str, Any]):
|
|
"""Ajoute un pattern dynamiquement (ex: appris par observation)."""
|
|
self._patterns.append(UIPattern(
|
|
name=pattern_dict["name"],
|
|
category=pattern_dict.get("category", "learned"),
|
|
triggers=pattern_dict.get("triggers", []),
|
|
action=pattern_dict.get("action", "click"),
|
|
target=pattern_dict.get("target", ""),
|
|
typical_zone=pattern_dict.get("typical_zone", "content"),
|
|
typical_bbox=pattern_dict.get("typical_bbox"),
|
|
os=pattern_dict.get("os", "any"),
|
|
confidence=pattern_dict.get("confidence", 0.7),
|
|
metadata={"source": "learned"},
|
|
))
|
|
|
|
def save_to_file(self, path: str):
|
|
"""Sauvegarde tous les patterns (builtin + appris) dans un fichier."""
|
|
data = {
|
|
"patterns": [
|
|
{
|
|
"name": p.name,
|
|
"category": p.category,
|
|
"triggers": p.triggers,
|
|
"action": p.action,
|
|
"target": p.target,
|
|
"typical_zone": p.typical_zone,
|
|
"typical_bbox": p.typical_bbox,
|
|
"os": p.os,
|
|
"confidence": p.confidence,
|
|
"metadata": p.metadata,
|
|
}
|
|
for p in self._patterns
|
|
]
|
|
}
|
|
with open(path, "w", encoding="utf-8") as f:
|
|
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
logger.info(f"Sauvegardé {len(self._patterns)} patterns dans {path}")
|
|
|
|
def save_learned_pattern(self, pattern_dict: Dict[str, Any]):
|
|
"""Persiste un pattern appris par observation Shadow dans learned_patterns.json.
|
|
|
|
Le pattern est ajouté en mémoire ET sauvegardé sur disque.
|
|
Le fichier est créé s'il n'existe pas, ou les patterns existants sont préservés.
|
|
"""
|
|
from datetime import datetime as dt
|
|
|
|
# Charger le fichier existant ou créer la structure
|
|
filepath = self._LEARNED_PATTERNS_PATH
|
|
filepath.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
existing: Dict[str, Any] = {"patterns": []}
|
|
if filepath.exists():
|
|
try:
|
|
with open(filepath, encoding="utf-8") as f:
|
|
existing = json.load(f)
|
|
except (json.JSONDecodeError, OSError):
|
|
logger.warning(f"Fichier {filepath} corrompu, recréation")
|
|
|
|
# Vérifier qu'on ne duplique pas (même trigger + même target)
|
|
new_triggers = set(t.lower() for t in pattern_dict.get("triggers", []))
|
|
new_target = pattern_dict.get("target", "").lower()
|
|
for existing_p in existing.get("patterns", []):
|
|
existing_triggers = set(t.lower() for t in existing_p.get("triggers", []))
|
|
if existing_triggers == new_triggers and existing_p.get("target", "").lower() == new_target:
|
|
logger.debug(f"Pattern déjà connu, skip: triggers={new_triggers}, target={new_target}")
|
|
return
|
|
|
|
# Numéroter automatiquement et construire l'entrée complète
|
|
count = len(existing.get("patterns", []))
|
|
entry = {
|
|
"name": pattern_dict.get("name", f"learned_dialog_{count + 1:03d}"),
|
|
"category": pattern_dict.get("category", "dialog"),
|
|
"triggers": pattern_dict.get("triggers", []),
|
|
"action": pattern_dict.get("action", "click"),
|
|
"target": pattern_dict.get("target", ""),
|
|
"os": pattern_dict.get("os", "windows"),
|
|
"source": "shadow_learning",
|
|
"learned_at": dt.now().isoformat(timespec="seconds"),
|
|
"confidence": pattern_dict.get("confidence", 0.8),
|
|
}
|
|
|
|
# Ajouter en mémoire (avec le nom auto-généré)
|
|
self.add_pattern(entry)
|
|
existing.setdefault("patterns", []).append(entry)
|
|
|
|
with open(filepath, "w", encoding="utf-8") as f:
|
|
json.dump(existing, f, indent=2, ensure_ascii=False)
|
|
logger.info(f"Pattern appris sauvegardé: {entry['name']} → {entry['target']}")
|
|
|
|
@property
|
|
def stats(self) -> Dict[str, int]:
|
|
from collections import Counter
|
|
cats = Counter(p.category for p in self._patterns)
|
|
return {"total": len(self._patterns), "by_category": dict(cats)}
|