refactor: factorisation input_handler partagé + page cartographie processus

core/execution/input_handler.py (NOUVEAU) : - safe_type_text() : setxkbmap fr + xdotool, partagé entre les 2 executors - check_screen_for_patterns() : détection dialogues UI via OCR - handle_detected_pattern() : clic bouton par OCR (mot exact, le plus bas) - post_execution_cleanup() : vérification post-workflow VWB executor : suppression du code dupliqué, alias vers input_handler Core executor : pyautogui.write() remplacé par safe_type_text() Page dashboard "Cartographie des processus" : - GET /process-mining : vue analyse des flux de travail - POST /api/process-mining/discover : génère BPMN + indicateurs - 4 cartes indicateurs, diagramme, points d'attention, variantes - Dark theme, français, zéro jargon technique - Onglet ajouté dans la navigation Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-20 17:08:37 +02:00
parent 447fbb2c6e
commit 6c7f88c05d
6 changed files with 727 additions and 245 deletions
--- a/core/execution/input_handler.py
+++ b/core/execution/input_handler.py
@@ -0,0 +1,243 @@
+"""
+Module partagé de saisie texte et gestion des dialogues.
+
+Utilisé par les deux executors :
+- VWB executor (visual_workflow_builder/backend/api_v3/execute.py)
+- Core executor (core/execution/action_executor.py)
+
+Garantit le même comportement AZERTY/VM/Citrix partout.
+"""
+
+import logging
+import subprocess
+import shutil
+import time
+from typing import Any, Dict, List, Optional
+
+logger = logging.getLogger(__name__)
+
+try:
+    import pyautogui
+    PYAUTOGUI_AVAILABLE = True
+except ImportError:
+    PYAUTOGUI_AVAILABLE = False
+
+
+def safe_type_text(text: str):
+    """Saisie de texte compatible VM/Citrix et claviers AZERTY/QWERTY.
+
+    Priorité :
+    1. xdotool type avec refresh layout → traverse les VM spice/QEMU
+    2. Presse-papier (xclip) + Ctrl+V   → fallback
+    3. pyautogui.write()                 → dernier recours
+    """
+    if not text:
+        return
+
+    # Méthode 1 : xdotool type avec refresh du layout clavier
+    if shutil.which('xdotool') and shutil.which('setxkbmap'):
+        try:
+            subprocess.run(['setxkbmap', 'fr'], timeout=2)
+            subprocess.run(
+                ['xdotool', 'type', '--delay', '0', '--clearmodifiers', '--', text],
+                timeout=max(30, len(text) * 0.05),
+                check=True
+            )
+            logger.debug(f"Saisie via xdotool type ({len(text)} car.)")
+            return
+        except Exception as e:
+            logger.debug(f"xdotool type échoué: {e}")
+
+    # Méthode 2 : Presse-papier
+    xclip = shutil.which('xclip')
+    if xclip and PYAUTOGUI_AVAILABLE:
+        try:
+            p = subprocess.Popen(
+                ['xclip', '-selection', 'clipboard'],
+                stdin=subprocess.PIPE,
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL
+            )
+            p.stdin.write(text.encode('utf-8'))
+            p.stdin.close()
+            time.sleep(0.2)
+            pyautogui.hotkey('ctrl', 'v')
+            time.sleep(0.3)
+            logger.debug(f"Saisie via presse-papier ({len(text)} car.)")
+            return
+        except Exception as e:
+            logger.debug(f"xclip échoué: {e}")
+
+    # Méthode 3 : pyautogui
+    if PYAUTOGUI_AVAILABLE:
+        logger.warning("Saisie via pyautogui.write() (AZERTY non garanti)")
+        pyautogui.write(text, interval=0.02)
+    else:
+        logger.warning(f"Aucune méthode de saisie disponible pour: {text[:50]}")
+
+
+def check_screen_for_patterns() -> Optional[Dict[str, Any]]:
+    """Vérifie si l'écran contient un pattern UI connu (dialogue, popup).
+
+    Capture l'écran, extrait le texte via OCR, et cherche un pattern
+    dans la UIPatternLibrary.
+
+    Returns:
+        Dict avec le pattern trouvé, ou None.
+    """
+    try:
+        from core.knowledge.ui_patterns import UIPatternLibrary
+        import mss
+        from PIL import Image
+
+        lib = UIPatternLibrary()
+
+        with mss.mss() as sct:
+            monitor = sct.monitors[1]
+            screenshot = sct.grab(monitor)
+            screen = Image.frombytes('RGB', screenshot.size, screenshot.bgra, 'raw', 'BGRX')
+
+        try:
+            # Essayer docTR d'abord (peut être importé depuis différents chemins)
+            try:
+                from services.ocr_service import ocr_extract_text
+            except ImportError:
+                from core.extraction.field_extractor import FieldExtractor
+                extractor = FieldExtractor()
+                ocr_extract_text = lambda img: extractor.extract_text_from_image(img)
+
+            ocr_text = ocr_extract_text(screen)
+        except ImportError:
+            logger.debug("OCR non disponible pour pattern check")
+            return None
+
+        if not ocr_text or len(ocr_text) < 5:
+            return None
+
+        pattern = lib.find_pattern(ocr_text)
+        if pattern and pattern['category'] in ('dialog', 'popup'):
+            logger.info(f"Pattern UI détecté: {pattern['pattern']} → {pattern['action']} '{pattern['target']}'")
+            return pattern
+
+        return None
+
+    except Exception as e:
+        logger.debug(f"Pattern check échoué: {e}")
+        return None
+
+
+def handle_detected_pattern(pattern: Dict[str, Any]) -> bool:
+    """Gère automatiquement un pattern UI détecté.
+
+    Cherche le bouton cible via OCR (position réelle sur l'écran).
+    100% vision — zéro coordonnée hardcodée.
+
+    Returns:
+        True si le pattern a été géré avec succès.
+    """
+    if not PYAUTOGUI_AVAILABLE:
+        logger.warning("pyautogui non disponible — impossible de gérer le pattern")
+        return False
+
+    action = pattern.get('action')
+    target = pattern.get('target', '')
+    alternatives = pattern.get('alternatives', [])
+
+    if action == 'click':
+        candidates_labels = [target] + alternatives
+
+        try:
+            import mss
+            from PIL import Image
+
+            # Importer OCR (essayer les deux chemins)
+            try:
+                from services.ocr_service import ocr_extract_words
+            except ImportError:
+                from core.extraction.field_extractor import FieldExtractor
+                extractor = FieldExtractor()
+                def ocr_extract_words(img):
+                    return extractor.extract_words_from_image(img)
+
+            with mss.mss() as sct:
+                monitor = sct.monitors[1]
+                screenshot = sct.grab(monitor)
+                screen = Image.frombytes('RGB', screenshot.size, screenshot.bgra, 'raw', 'BGRX')
+
+            words = ocr_extract_words(screen)
+
+            # Collecter tous les matchs, prendre le plus bas (bouton = bas du dialogue)
+            all_matches = []
+
+            for candidate in candidates_labels:
+                candidate_lower = candidate.lower()
+                for word in words:
+                    word_text = word['text'].lower()
+                    if len(word_text) < 2 or len(candidate_lower) < 2:
+                        continue
+                    if word_text == candidate_lower:
+                        x1, y1, x2, y2 = word['bbox']
+                        all_matches.append({
+                            'text': word['text'],
+                            'x': int((x1 + x2) / 2),
+                            'y': int((y1 + y2) / 2),
+                            'match_type': 'exact',
+                        })
+
+            # Recherche partielle (lettre soulignée manquante)
+            if not all_matches:
+                for candidate in candidates_labels:
+                    if len(candidate) > 3:
+                        partial = candidate[1:].lower()
+                        for word in words:
+                            if partial in word['text'].lower():
+                                x1, y1, x2, y2 = word['bbox']
+                                all_matches.append({
+                                    'text': word['text'],
+                                    'x': int((x1 + x2) / 2),
+                                    'y': int((y1 + y2) / 2),
+                                    'match_type': 'partial',
+                                })
+
+            if all_matches:
+                best = max(all_matches, key=lambda m: m['y'])
+                logger.info(f"Clic sur '{best['text']}' à ({best['x']}, {best['y']})")
+                pyautogui.click(best['x'], best['y'])
+                time.sleep(1.0)
+                return True
+
+            logger.info(f"Bouton '{target}' introuvable par OCR")
+            return False
+
+        except Exception as e:
+            logger.warning(f"OCR bouton échoué: {e}")
+            return False
+
+    elif action == 'hotkey':
+        keys = target.split('+')
+        logger.info(f"Raccourci automatique: {target}")
+        pyautogui.hotkey(*keys)
+        time.sleep(0.5)
+        return True
+
+    return False
+
+
+def post_execution_cleanup(execution_mode: str = 'debug'):
+    """Vérifie l'écran après exécution et gère les dialogues restants.
+
+    Appelé après la dernière étape d'un workflow pour laisser l'écran propre.
+    """
+    if execution_mode not in ('intelligent', 'debug'):
+        return
+
+    logger.info("Vérification écran final...")
+    time.sleep(1.0)
+    for _ in range(3):
+        detected = check_screen_for_patterns()
+        if detected:
+            logger.info(f"Dialogue résiduel détecté: {detected.get('pattern')}")
+            handle_detected_pattern(detected)
+            time.sleep(1.0)
+        else:
+            break