feat(grounding): Phase 6 — Shadow Learning Hook

ShadowLearningHook (core/grounding/shadow_learning_hook.py) : - Hook optionnel pour le ShadowObserver - Chaque clic humain observé → FastDetector détecte l'élément sous le clic - SignatureStore enrichie avec texte, type, position, voisins (conf=1.0) - Au replay : SmartMatcher utilise la signature apprise → matching < 1ms Validé : 3 clics simulés → 3 signatures créées avec les bonnes métadonnées. Module standalone — ne modifie pas le ShadowObserver existant. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-25 21:00:11 +02:00
parent e2046837cf
commit 73cea2385e
1 changed files with 156 additions and 0 deletions
--- a/core/grounding/shadow_learning_hook.py
+++ b/core/grounding/shadow_learning_hook.py
@@ -0,0 +1,156 @@
+"""
+core/grounding/shadow_learning_hook.py — Hook d'apprentissage Shadow
+
+Connecte le ShadowObserver au SignatureStore : chaque clic observé pendant
+une session Shadow enrichit la base de signatures d'éléments.
+
+L'humain clique quelque part → on détecte quel élément UI est sous le clic →
+on stocke sa signature (texte, type, position, voisins) pour le replay.
+
+Ce module est un HOOK optionnel — il ne modifie pas le ShadowObserver,
+il s'y branche via callback.
+
+Utilisation :
+    from core.grounding.shadow_learning_hook import ShadowLearningHook
+
+    hook = ShadowLearningHook()
+
+    # Dans le ShadowObserver ou l'API de capture :
+    hook.on_click_observed(
+        click_x=542, click_y=318,
+        screenshot_pil=screen,
+        window_title="Bloc-notes",
+        target_label="Bouton Valider",
+    )
+"""
+
+from __future__ import annotations
+
+import threading
+import time
+from typing import Any, Dict, Optional
+
+from core.grounding.element_signature import SignatureStore
+from core.grounding.fast_types import DetectedUIElement
+
+
+class ShadowLearningHook:
+    """Hook d'apprentissage pour le mode Shadow.
+
+    À chaque clic humain observé, détecte l'élément sous le clic
+    et enrichit le SignatureStore.
+    """
+
+    def __init__(self, signature_store: Optional[SignatureStore] = None):
+        self._store = signature_store or SignatureStore()
+        self._detector = None  # Lazy load pour ne pas charger RF-DETR au startup
+        self._lock = threading.Lock()
+
+    def on_click_observed(
+        self,
+        click_x: int,
+        click_y: int,
+        screenshot_pil: Optional[Any] = None,
+        window_title: str = "",
+        target_label: str = "",
+        target_description: str = "",
+    ) -> Optional[Dict[str, Any]]:
+        """Appelé quand un clic humain est observé pendant le Shadow.
+
+        Args:
+            click_x, click_y: Position du clic (pixels écran).
+            screenshot_pil: Capture d'écran PIL au moment du clic.
+            window_title: Titre de la fenêtre active.
+            target_label: Label de l'étape (si connu).
+            target_description: Description de l'élément (si connue).
+
+        Returns:
+            Dict avec la signature créée/enrichie, ou None si échec.
+        """
+        t0 = time.time()
+
+        try:
+            # Lazy load du détecteur
+            if self._detector is None:
+                from core.grounding.fast_detector import FastDetector
+                self._detector = FastDetector()
+
+            # Détecter les éléments sur l'écran
+            snapshot = self._detector.detect(screenshot_pil=screenshot_pil)
+
+            if not snapshot.elements:
+                print(f"📝 [Shadow/learn] Aucun élément détecté à ({click_x}, {click_y})")
+                return None
+
+            # Trouver l'élément sous le clic
+            clicked_element = self._find_element_at(click_x, click_y, snapshot.elements)
+
+            if clicked_element is None:
+                print(f"📝 [Shadow/learn] Aucun élément sous ({click_x}, {click_y})")
+                return None
+
+            # Construire la clé de la cible
+            target_key = SignatureStore.make_target_key(
+                target_label or clicked_element.ocr_text,
+                target_description,
+            )
+            screen_ctx = SignatureStore.make_screen_context(
+                window_title, snapshot.resolution,
+            )
+
+            # Enregistrer la signature
+            self._store.record_success(
+                target_key=target_key,
+                screen_context=screen_ctx,
+                element=clicked_element,
+                confidence=1.0,  # L'humain a cliqué → confiance maximale
+            )
+
+            dt = (time.time() - t0) * 1000
+            print(f"📝 [Shadow/learn] Signature '{clicked_element.ocr_text}' "
+                  f"type={clicked_element.element_type} "
+                  f"pos={clicked_element.relative_position} "
+                  f"voisins={clicked_element.neighbors[:3]} ({dt:.0f}ms)")
+
+            return {
+                "target_key": target_key,
+                "text": clicked_element.ocr_text,
+                "element_type": clicked_element.element_type,
+                "relative_position": clicked_element.relative_position,
+                "neighbors": clicked_element.neighbors,
+                "center": clicked_element.center,
+            }
+
+        except Exception as e:
+            print(f"⚠️ [Shadow/learn] Erreur: {e}")
+            return None
+
+    @staticmethod
+    def _find_element_at(
+        x: int, y: int,
+        elements: list,
+        margin: int = 20,
+    ) -> Optional[DetectedUIElement]:
+        """Trouve l'élément dont la bbox contient le point (x, y).
+
+        Si aucun match exact, prend le plus proche dans un rayon de `margin` pixels.
+        """
+        # Match exact : le clic est dans la bbox
+        for elem in elements:
+            x1, y1, x2, y2 = elem.bbox
+            if x1 <= x <= x2 and y1 <= y <= y2:
+                return elem
+
+        # Match par proximité : le clic est proche du centre
+        best_elem = None
+        best_dist = float('inf')
+
+        for elem in elements:
+            dx = abs(elem.center[0] - x)
+            dy = abs(elem.center[1] - y)
+            dist = (dx**2 + dy**2) ** 0.5
+            if dist < margin and dist < best_dist:
+                best_dist = dist
+                best_elem = elem
+
+        return best_elem