feat(ORA): vérification pré-action — VLM confirme avant chaque clic
Some checks failed
security-audit / Bandit (scan statique) (push) Successful in 12s
security-audit / pip-audit (CVE dépendances) (push) Successful in 12s
security-audit / Scan secrets (grep) (push) Successful in 9s
tests / Lint (ruff + black) (push) Successful in 15s
tests / Tests unitaires (sans GPU) (push) Failing after 16s
tests / Tests sécurité (critique) (push) Has been skipped
Some checks failed
security-audit / Bandit (scan statique) (push) Successful in 12s
security-audit / pip-audit (CVE dépendances) (push) Successful in 12s
security-audit / Scan secrets (grep) (push) Successful in 9s
tests / Lint (ruff + black) (push) Successful in 15s
tests / Tests unitaires (sans GPU) (push) Failing after 16s
tests / Tests sécurité (critique) (push) Has been skipped
Avant de cliquer, crop 200x100 autour de la position cible envoyé au VLM (qwen2.5vl:3b) : "Is this UI element 'CR_patient_demo'? YES/NO" Si NO → abandon du clic, évite les clics erronés. Si erreur VLM → laisse passer (pas bloquant). Skippé pour le template matching (confiance pixel suffisante). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -925,6 +925,16 @@ Règles:
|
|||||||
logger.error(f"❌ [ORA/click] Impossible de localiser '{target_text}' — aucune méthode n'a fonctionné")
|
logger.error(f"❌ [ORA/click] Impossible de localiser '{target_text}' — aucune méthode n'a fonctionné")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# --- Vérification pré-action : est-ce le bon élément ? ---
|
||||||
|
if target_text and method_used not in ('template',) and MSS_AVAILABLE and PIL_AVAILABLE:
|
||||||
|
try:
|
||||||
|
pre_check = self._verify_pre_click(x, y, target_text, target_desc)
|
||||||
|
if not pre_check:
|
||||||
|
print(f"⛔ [ORA/pre-check] L'élément à ({x}, {y}) ne correspond PAS à '{target_text}' — abandon du clic")
|
||||||
|
return False
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ [ORA/pre-check] Erreur vérification: {e}")
|
||||||
|
|
||||||
print(f"🖱️ [ORA/click] {decision.value} à ({x}, {y}) via {method_used}")
|
print(f"🖱️ [ORA/click] {decision.value} à ({x}, {y}) via {method_used}")
|
||||||
|
|
||||||
if decision.value == 'double':
|
if decision.value == 'double':
|
||||||
@@ -1079,6 +1089,55 @@ Règles:
|
|||||||
pass
|
pass
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
def _verify_pre_click(self, x: int, y: int, target_text: str, target_desc: str = "") -> bool:
|
||||||
|
"""Vérifie que l'élément à la position (x,y) correspond au target AVANT de cliquer.
|
||||||
|
|
||||||
|
Fait un crop 200x100 autour de (x,y), envoie au VLM avec la question
|
||||||
|
"est-ce que c'est bien {target} ?"
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
import requests as _requests
|
||||||
|
|
||||||
|
with mss_lib.mss() as sct:
|
||||||
|
mon = sct.monitors[0]
|
||||||
|
grab = sct.grab(mon)
|
||||||
|
screen = Image.frombytes('RGB', grab.size, grab.bgra, 'raw', 'BGRX')
|
||||||
|
|
||||||
|
# Crop 200x100 autour du point de clic
|
||||||
|
crop_w, crop_h = 200, 100
|
||||||
|
left = max(0, x - crop_w // 2)
|
||||||
|
top = max(0, y - crop_h // 2)
|
||||||
|
right = min(screen.width, left + crop_w)
|
||||||
|
bottom = min(screen.height, top + crop_h)
|
||||||
|
crop = screen.crop((left, top, right, bottom))
|
||||||
|
|
||||||
|
import io as _io
|
||||||
|
buffer = _io.BytesIO()
|
||||||
|
crop.save(buffer, format='JPEG', quality=70)
|
||||||
|
crop_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
|
||||||
|
|
||||||
|
label = target_desc or target_text
|
||||||
|
ollama_url = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||||
|
resp = _requests.post(f"{ollama_url}/api/generate", json={
|
||||||
|
"model": "qwen2.5vl:3b",
|
||||||
|
"prompt": f"Is this UI element '{label}'? Answer only YES or NO.",
|
||||||
|
"images": [crop_b64],
|
||||||
|
"stream": False,
|
||||||
|
"options": {"temperature": 0.1, "num_predict": 5}
|
||||||
|
}, timeout=15)
|
||||||
|
|
||||||
|
if resp.status_code == 200:
|
||||||
|
answer = resp.json().get("response", "").strip().upper()
|
||||||
|
is_match = "YES" in answer
|
||||||
|
print(f"🔍 [ORA/pre-check] '{label}' → {answer} → {'✅' if is_match else '❌'}")
|
||||||
|
return is_match
|
||||||
|
|
||||||
|
return True # En cas d'erreur HTTP, on laisse passer
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ [ORA/pre-check] Erreur: {e}")
|
||||||
|
return True # En cas d'erreur, on laisse passer
|
||||||
|
|
||||||
def _phash_distance(self, hash1: Any, hash2: Any) -> int:
|
def _phash_distance(self, hash1: Any, hash2: Any) -> int:
|
||||||
"""Distance de Hamming entre deux pHash. Retourne 999 si non calculable."""
|
"""Distance de Hamming entre deux pHash. Retourne 999 si non calculable."""
|
||||||
if hash1 is None or hash2 is None:
|
if hash1 is None or hash2 is None:
|
||||||
|
|||||||
Reference in New Issue
Block a user