docs: cartographie complète d'exécution + fix target_text ORA + worker InfiGUI fichiers
docs/CARTOGRAPHY.md :
- Carte complète des 2 chemins d'exécution (Legacy vs ORA)
- 12 systèmes de grounding identifiés dont 3 morts
- Trace du champ target_text de la capture au clic
- Fonctions existantes non branchées (verify, recovery, ShadowLearningHook)
- Budget VRAM, fichiers critiques, règles de modification
Fix target_text ORA (observe_reason_act.py:217) :
- Détecte les target_text absurdes ("click_anchor")
- Appelle _describe_anchor_image() (VLM) pour décrire le crop
- Même logique que le legacy execute.py:893
Worker InfiGUI via fichiers /tmp :
- Communication par fichiers (pas subprocess pipes, pas HTTP)
- Process indépendant lancé avant le backend
- Résout le crash CUDA dans Flask/FastAPI/uvicorn
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -213,8 +213,31 @@ class ORALoop:
|
||||
|
||||
# --- Mapper action_type vers action Decision ---
|
||||
|
||||
# Types d'action qui ne sont PAS des descriptions valides
|
||||
_action_type_names = {'click_anchor', 'double_click_anchor', 'right_click_anchor',
|
||||
'hover_anchor', 'focus_anchor', 'scroll_to_anchor',
|
||||
'click', 'type_text', 'keyboard_shortcut', 'wait_for_anchor'}
|
||||
|
||||
if action_type in ('click_anchor', 'click', 'double_click_anchor', 'right_click_anchor'):
|
||||
target_text = anchor.get('target_text', '') or label
|
||||
target_text = anchor.get('target_text', '') or anchor.get('description', '')
|
||||
|
||||
# Si target_text est vide ou est un nom d'action → décrire le crop
|
||||
if not target_text or target_text in _action_type_names:
|
||||
screenshot_b64 = anchor.get('screenshot', '')
|
||||
if screenshot_b64:
|
||||
try:
|
||||
from core.execution.input_handler import _describe_anchor_image
|
||||
desc = _describe_anchor_image(screenshot_b64)
|
||||
if desc and len(desc) > 2:
|
||||
target_text = desc
|
||||
print(f"🏷️ [ORA/reason] Ancre décrite par VLM: '{target_text}'")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Dernier fallback : label si pas un nom d'action
|
||||
if not target_text or target_text in _action_type_names:
|
||||
target_text = label if label not in _action_type_names else ''
|
||||
|
||||
action = 'click'
|
||||
value = 'double' if action_type == 'double_click_anchor' else (
|
||||
'right' if action_type == 'right_click_anchor' else 'left')
|
||||
@@ -1234,27 +1257,25 @@ Règles:
|
||||
# --- 1. Observer l'état pré-action ---
|
||||
pre = self.observe()
|
||||
|
||||
# --- 1b. Réflexe Check : popup/dialogue inattendu ? ---
|
||||
# Déclenché UNIQUEMENT si le pHash a changé de manière inattendue
|
||||
# (= un popup est probablement apparu). Sinon → 0ms, pas d'OCR.
|
||||
# --- 1b. Réflexe : dialogue inattendu ? ---
|
||||
# Déclenché si le pHash a changé de manière inattendue.
|
||||
# Flux : titre fenêtre (50ms) → dialogue connu ? → InfiGUI clique (3s)
|
||||
if i > 0 and hasattr(self, '_last_post_phash') and self._last_post_phash:
|
||||
_phash_distance = self._phash_distance(pre.phash, self._last_post_phash)
|
||||
if _phash_distance > 10: # Changement significatif inattendu
|
||||
print(f"🧠 [ORA/réflexe] pHash changé (distance={_phash_distance}) → vérification popup")
|
||||
if _phash_distance > 10:
|
||||
print(f"🧠 [ORA/réflexe] pHash changé (distance={_phash_distance}) → vérification dialogue")
|
||||
try:
|
||||
from core.execution.input_handler import check_screen_for_patterns, handle_detected_pattern
|
||||
_reflex_pattern = check_screen_for_patterns()
|
||||
if _reflex_pattern:
|
||||
_reflex_name = _reflex_pattern.get('pattern', '?')
|
||||
_reflex_target = _reflex_pattern.get('target', '?')
|
||||
print(f"🧠 [ORA/réflexe] Pattern détecté: '{_reflex_name}' → clic '{_reflex_target}'")
|
||||
_handled = handle_detected_pattern(_reflex_pattern)
|
||||
if _handled:
|
||||
print(f"✅ [ORA/réflexe] Dialogue '{_reflex_name}' géré automatiquement")
|
||||
time.sleep(0.5)
|
||||
pre = self.observe()
|
||||
else:
|
||||
print(f"⚠️ [ORA/réflexe] Pattern '{_reflex_name}' détecté mais non géré")
|
||||
from core.grounding.dialog_handler import DialogHandler
|
||||
_dh = DialogHandler()
|
||||
_dh_result = _dh.handle_if_dialog(pre.screenshot)
|
||||
if _dh_result.get('handled'):
|
||||
print(f"✅ [ORA/réflexe] Dialogue '{_dh_result['title'][:30]}' géré → {_dh_result['action']}")
|
||||
time.sleep(0.5)
|
||||
pre = self.observe()
|
||||
elif _dh_result.get('dialog_type'):
|
||||
print(f"⚠️ [ORA/réflexe] Dialogue '{_dh_result.get('dialog_type')}' détecté mais non géré: {_dh_result.get('reason')}")
|
||||
else:
|
||||
print(f"🧠 [ORA/réflexe] Pas de dialogue détecté: {_dh_result.get('reason', '?')}")
|
||||
except Exception as _reflex_err:
|
||||
print(f"⚠️ [ORA/réflexe] Erreur: {_reflex_err}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user