diff --git a/core/knowledge/__init__.py b/core/knowledge/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/core/knowledge/ui_patterns.py b/core/knowledge/ui_patterns.py new file mode 100644 index 000000000..740c62e0e --- /dev/null +++ b/core/knowledge/ui_patterns.py @@ -0,0 +1,411 @@ +""" +Base de connaissances des patterns d'interface utilisateur. + +Donne à Léa des "réflexes natifs" : quand elle reconnaît un pattern UI +connu (dialogue OK/Annuler, menu, barre d'outils), elle sait immédiatement +quoi faire sans avoir besoin de l'apprendre par observation. + +Sources : +- GUI-R1 dataset (3K exemples annotés, ritzzai/GUI-R1) +- Patterns Windows/Linux courants +- Conventions UI universelles + +Utilisation : + from core.knowledge.ui_patterns import UIPatternLibrary + lib = UIPatternLibrary() + match = lib.find_pattern("Voulez-vous enregistrer ?") + # → {'action': 'click', 'target': 'Enregistrer', 'zone': 'dialog_center', ...} +""" + +import json +import logging +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +@dataclass +class UIPattern: + """Un pattern d'interface connu.""" + name: str + category: str + triggers: List[str] + action: str + target: str + typical_zone: str + typical_bbox: Optional[List[float]] = None + os: str = "any" + confidence: float = 0.9 + metadata: Dict[str, Any] = field(default_factory=dict) + + +# Patterns Windows natifs — réflexes de base +BUILTIN_PATTERNS: List[Dict[str, Any]] = [ + # === DIALOGUES DE CONFIRMATION === + { + "name": "dialog_save", + "category": "dialog", + "triggers": [ + "voulez-vous enregistrer", "do you want to save", + "save changes", "enregistrer les modifications", + "sauvegarder", "unsaved changes", + ], + "action": "click", + "target": "Enregistrer", + "alternatives": ["Save", "Oui", "Yes"], + "typical_zone": "dialog_center", + "typical_bbox": [0.35, 0.55, 0.50, 0.65], + "os": "any", + }, + { + "name": "dialog_cancel", + "category": "dialog", + "triggers": [ + "annuler", "cancel", "abandonner", "discard", + ], + "action": "click", + "target": "Annuler", + "alternatives": ["Cancel", "Non", "No"], + "typical_zone": "dialog_center", + "typical_bbox": [0.50, 0.55, 0.65, 0.65], + "os": "any", + }, + { + "name": "dialog_ok", + "category": "dialog", + "triggers": [ + "ok", "d'accord", "compris", "information", + "erreur", "error", "warning", "avertissement", + ], + "action": "click", + "target": "OK", + "alternatives": ["Fermer", "Close", "Compris"], + "typical_zone": "dialog_center", + "typical_bbox": [0.45, 0.60, 0.55, 0.70], + "os": "any", + }, + { + "name": "dialog_yes_no", + "category": "dialog", + "triggers": [ + "êtes-vous sûr", "are you sure", "confirmer", + "confirm", "supprimer", "delete", + ], + "action": "click", + "target": "Oui", + "alternatives": ["Yes", "Confirmer", "Confirm"], + "typical_zone": "dialog_center", + "typical_bbox": [0.35, 0.60, 0.45, 0.68], + "os": "any", + }, + + # === NAVIGATION FENÊTRE === + { + "name": "window_close", + "category": "window", + "triggers": ["fermer la fenêtre", "close window"], + "action": "click", + "target": "X", + "typical_zone": "titlebar", + "typical_bbox": [0.96, 0.0, 1.0, 0.04], + "os": "windows", + }, + { + "name": "window_minimize", + "category": "window", + "triggers": ["minimiser", "minimize"], + "action": "click", + "target": "_", + "typical_zone": "titlebar", + "typical_bbox": [0.90, 0.0, 0.94, 0.04], + "os": "windows", + }, + { + "name": "window_maximize", + "category": "window", + "triggers": ["maximiser", "maximize", "agrandir"], + "action": "click", + "target": "□", + "typical_zone": "titlebar", + "typical_bbox": [0.94, 0.0, 0.96, 0.04], + "os": "windows", + }, + + # === MENUS === + { + "name": "menu_file", + "category": "menu", + "triggers": ["menu fichier", "menu file", "ouvrir fichier", "open file"], + "action": "click", + "target": "Fichier", + "alternatives": ["File"], + "typical_zone": "menu_toolbar", + "typical_bbox": [0.0, 0.03, 0.06, 0.06], + "os": "any", + }, + { + "name": "menu_edit", + "category": "menu", + "triggers": ["édition", "edit", "modifier"], + "action": "click", + "target": "Édition", + "alternatives": ["Edit"], + "typical_zone": "menu_toolbar", + "typical_bbox": [0.06, 0.03, 0.12, 0.06], + "os": "any", + }, + + # === FORMULAIRES === + { + "name": "form_submit", + "category": "form", + "triggers": [ + "valider", "submit", "envoyer", "send", + "connexion", "login", "se connecter", "sign in", + ], + "action": "click", + "target": "Valider", + "alternatives": ["Submit", "Envoyer", "Connexion", "Login", "OK"], + "typical_zone": "content", + "typical_bbox": [0.35, 0.70, 0.65, 0.80], + "os": "any", + }, + { + "name": "form_search", + "category": "form", + "triggers": ["rechercher", "search", "chercher", "find"], + "action": "click", + "target": "Rechercher", + "alternatives": ["Search", "🔍", "Go"], + "typical_zone": "menu_toolbar", + "typical_bbox": [0.30, 0.03, 0.70, 0.06], + "os": "any", + }, + + # === NAVIGATION WEB === + { + "name": "cookie_accept", + "category": "popup", + "triggers": [ + "accepter les cookies", "accept cookies", + "j'accepte", "accept all", "tout accepter", + "consent", "consentement", + ], + "action": "click", + "target": "Accepter", + "alternatives": ["Accept", "Accept All", "Tout accepter", "J'accepte"], + "typical_zone": "content", + "typical_bbox": [0.30, 0.80, 0.70, 0.90], + "os": "any", + }, + + # === RACCOURCIS UNIVERSELS === + { + "name": "shortcut_save", + "category": "shortcut", + "triggers": ["sauvegarder", "enregistrer", "save"], + "action": "hotkey", + "target": "ctrl+s", + "typical_zone": "keyboard", + "os": "any", + }, + { + "name": "shortcut_undo", + "category": "shortcut", + "triggers": ["annuler action", "undo", "défaire"], + "action": "hotkey", + "target": "ctrl+z", + "typical_zone": "keyboard", + "os": "any", + }, + { + "name": "shortcut_copy", + "category": "shortcut", + "triggers": ["copier", "copy"], + "action": "hotkey", + "target": "ctrl+c", + "typical_zone": "keyboard", + "os": "any", + }, + { + "name": "shortcut_paste", + "category": "shortcut", + "triggers": ["coller", "paste"], + "action": "hotkey", + "target": "ctrl+v", + "typical_zone": "keyboard", + "os": "any", + }, +] + + +class UIPatternLibrary: + """Bibliothèque de patterns UI connus. + + Fournit des "réflexes natifs" à Léa : quand un pattern + est reconnu dans le texte OCR ou le contexte visuel, + elle sait immédiatement quoi faire. + """ + + def __init__(self, extra_patterns_path: Optional[str] = None): + self._patterns: List[UIPattern] = [] + self._load_builtin() + if extra_patterns_path: + self._load_from_file(extra_patterns_path) + logger.info(f"UIPatternLibrary: {len(self._patterns)} patterns chargés") + + def _load_builtin(self): + for p in BUILTIN_PATTERNS: + self._patterns.append(UIPattern( + name=p["name"], + category=p["category"], + triggers=p["triggers"], + action=p["action"], + target=p["target"], + typical_zone=p.get("typical_zone", "content"), + typical_bbox=p.get("typical_bbox"), + os=p.get("os", "any"), + metadata={ + "alternatives": p.get("alternatives", []), + "source": "builtin", + }, + )) + + def _load_from_file(self, path: str): + filepath = Path(path) + if not filepath.exists(): + logger.warning(f"Fichier patterns non trouvé: {path}") + return + try: + with open(filepath) as f: + data = json.load(f) + for p in data.get("patterns", []): + self._patterns.append(UIPattern( + name=p["name"], + category=p.get("category", "custom"), + triggers=p.get("triggers", []), + action=p.get("action", "click"), + target=p.get("target", ""), + typical_zone=p.get("typical_zone", "content"), + typical_bbox=p.get("typical_bbox"), + os=p.get("os", "any"), + metadata=p.get("metadata", {}), + )) + logger.info(f"Chargé {len(data.get('patterns', []))} patterns depuis {path}") + except Exception as e: + logger.error(f"Erreur chargement patterns: {e}") + + def find_pattern( + self, + text: str, + os_filter: Optional[str] = None, + ) -> Optional[Dict[str, Any]]: + """Cherche un pattern UI dans du texte (OCR, titre fenêtre, etc.). + + Args: + text: Texte à analyser (peut contenir du bruit OCR) + os_filter: Filtrer par OS ("windows", "linux", None=tous) + + Returns: + Dict avec action, target, confidence, etc. ou None + """ + text_lower = text.lower() + best_match = None + best_score = 0 + + for pattern in self._patterns: + if os_filter and pattern.os not in ("any", os_filter): + continue + + score = 0 + matched_trigger = None + for trigger in pattern.triggers: + if trigger in text_lower: + trigger_score = len(trigger) / max(len(text_lower), 1) + if trigger_score > score: + score = trigger_score + matched_trigger = trigger + + if score > best_score and score > 0.05: + best_score = score + best_match = { + "pattern": pattern.name, + "category": pattern.category, + "action": pattern.action, + "target": pattern.target, + "alternatives": pattern.metadata.get("alternatives", []), + "typical_zone": pattern.typical_zone, + "typical_bbox": pattern.typical_bbox, + "confidence": min(pattern.confidence * (1 + score), 1.0), + "matched_trigger": matched_trigger, + "os": pattern.os, + } + + return best_match + + def find_by_category(self, category: str) -> List[Dict[str, Any]]: + """Retourne tous les patterns d'une catégorie.""" + return [ + { + "name": p.name, + "action": p.action, + "target": p.target, + "triggers": p.triggers, + "typical_zone": p.typical_zone, + } + for p in self._patterns + if p.category == category + ] + + def get_dialog_handler(self, dialog_text: str) -> Optional[Dict[str, Any]]: + """Raccourci : cherche un pattern de dialogue.""" + match = self.find_pattern(dialog_text) + if match and match["category"] == "dialog": + return match + return self.find_pattern(dialog_text) + + def add_pattern(self, pattern_dict: Dict[str, Any]): + """Ajoute un pattern dynamiquement (ex: appris par observation).""" + self._patterns.append(UIPattern( + name=pattern_dict["name"], + category=pattern_dict.get("category", "learned"), + triggers=pattern_dict.get("triggers", []), + action=pattern_dict.get("action", "click"), + target=pattern_dict.get("target", ""), + typical_zone=pattern_dict.get("typical_zone", "content"), + typical_bbox=pattern_dict.get("typical_bbox"), + os=pattern_dict.get("os", "any"), + confidence=pattern_dict.get("confidence", 0.7), + metadata={"source": "learned"}, + )) + + def save_to_file(self, path: str): + """Sauvegarde tous les patterns (builtin + appris) dans un fichier.""" + data = { + "patterns": [ + { + "name": p.name, + "category": p.category, + "triggers": p.triggers, + "action": p.action, + "target": p.target, + "typical_zone": p.typical_zone, + "typical_bbox": p.typical_bbox, + "os": p.os, + "confidence": p.confidence, + "metadata": p.metadata, + } + for p in self._patterns + ] + } + with open(path, "w", encoding="utf-8") as f: + json.dump(data, f, indent=2, ensure_ascii=False) + logger.info(f"Sauvegardé {len(self._patterns)} patterns dans {path}") + + @property + def stats(self) -> Dict[str, int]: + from collections import Counter + cats = Counter(p.category for p in self._patterns) + return {"total": len(self._patterns), "by_category": dict(cats)}