feat(ORA): vérification pré-action — VLM confirme avant chaque clic
Some checks failed
security-audit / Bandit (scan statique) (push) Successful in 12s
security-audit / pip-audit (CVE dépendances) (push) Successful in 12s
security-audit / Scan secrets (grep) (push) Successful in 9s
tests / Lint (ruff + black) (push) Successful in 15s
tests / Tests unitaires (sans GPU) (push) Failing after 16s
tests / Tests sécurité (critique) (push) Has been skipped
Some checks failed
security-audit / Bandit (scan statique) (push) Successful in 12s
security-audit / pip-audit (CVE dépendances) (push) Successful in 12s
security-audit / Scan secrets (grep) (push) Successful in 9s
tests / Lint (ruff + black) (push) Successful in 15s
tests / Tests unitaires (sans GPU) (push) Failing after 16s
tests / Tests sécurité (critique) (push) Has been skipped
Avant de cliquer, crop 200x100 autour de la position cible envoyé au VLM (qwen2.5vl:3b) : "Is this UI element 'CR_patient_demo'? YES/NO" Si NO → abandon du clic, évite les clics erronés. Si erreur VLM → laisse passer (pas bloquant). Skippé pour le template matching (confiance pixel suffisante). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -925,6 +925,16 @@ Règles:
|
||||
logger.error(f"❌ [ORA/click] Impossible de localiser '{target_text}' — aucune méthode n'a fonctionné")
|
||||
return False
|
||||
|
||||
# --- Vérification pré-action : est-ce le bon élément ? ---
|
||||
if target_text and method_used not in ('template',) and MSS_AVAILABLE and PIL_AVAILABLE:
|
||||
try:
|
||||
pre_check = self._verify_pre_click(x, y, target_text, target_desc)
|
||||
if not pre_check:
|
||||
print(f"⛔ [ORA/pre-check] L'élément à ({x}, {y}) ne correspond PAS à '{target_text}' — abandon du clic")
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"⚠️ [ORA/pre-check] Erreur vérification: {e}")
|
||||
|
||||
print(f"🖱️ [ORA/click] {decision.value} à ({x}, {y}) via {method_used}")
|
||||
|
||||
if decision.value == 'double':
|
||||
@@ -1079,6 +1089,55 @@ Règles:
|
||||
pass
|
||||
return ''
|
||||
|
||||
def _verify_pre_click(self, x: int, y: int, target_text: str, target_desc: str = "") -> bool:
|
||||
"""Vérifie que l'élément à la position (x,y) correspond au target AVANT de cliquer.
|
||||
|
||||
Fait un crop 200x100 autour de (x,y), envoie au VLM avec la question
|
||||
"est-ce que c'est bien {target} ?"
|
||||
"""
|
||||
try:
|
||||
import requests as _requests
|
||||
|
||||
with mss_lib.mss() as sct:
|
||||
mon = sct.monitors[0]
|
||||
grab = sct.grab(mon)
|
||||
screen = Image.frombytes('RGB', grab.size, grab.bgra, 'raw', 'BGRX')
|
||||
|
||||
# Crop 200x100 autour du point de clic
|
||||
crop_w, crop_h = 200, 100
|
||||
left = max(0, x - crop_w // 2)
|
||||
top = max(0, y - crop_h // 2)
|
||||
right = min(screen.width, left + crop_w)
|
||||
bottom = min(screen.height, top + crop_h)
|
||||
crop = screen.crop((left, top, right, bottom))
|
||||
|
||||
import io as _io
|
||||
buffer = _io.BytesIO()
|
||||
crop.save(buffer, format='JPEG', quality=70)
|
||||
crop_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
|
||||
|
||||
label = target_desc or target_text
|
||||
ollama_url = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||
resp = _requests.post(f"{ollama_url}/api/generate", json={
|
||||
"model": "qwen2.5vl:3b",
|
||||
"prompt": f"Is this UI element '{label}'? Answer only YES or NO.",
|
||||
"images": [crop_b64],
|
||||
"stream": False,
|
||||
"options": {"temperature": 0.1, "num_predict": 5}
|
||||
}, timeout=15)
|
||||
|
||||
if resp.status_code == 200:
|
||||
answer = resp.json().get("response", "").strip().upper()
|
||||
is_match = "YES" in answer
|
||||
print(f"🔍 [ORA/pre-check] '{label}' → {answer} → {'✅' if is_match else '❌'}")
|
||||
return is_match
|
||||
|
||||
return True # En cas d'erreur HTTP, on laisse passer
|
||||
|
||||
except Exception as e:
|
||||
print(f"⚠️ [ORA/pre-check] Erreur: {e}")
|
||||
return True # En cas d'erreur, on laisse passer
|
||||
|
||||
def _phash_distance(self, hash1: Any, hash2: Any) -> int:
|
||||
"""Distance de Hamming entre deux pHash. Retourne 999 si non calculable."""
|
||||
if hash1 is None or hash2 is None:
|
||||
|
||||
Reference in New Issue
Block a user