From 4f5c518d3af8249a3fde2e5bd284fa3617ad3c42 Mon Sep 17 00:00:00 2001 From: Dom Date: Tue, 31 Mar 2026 15:51:18 +0200 Subject: [PATCH] =?UTF-8?q?fix:=20anchor=20match=20sur=20screenshot=20enti?= =?UTF-8?q?er=20+=20proximit=C3=A9=20=C3=A9l=C3=A9ment=20SomEngine?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Le template matching du crop anchor contre les régions YOLO échouait car l'anchor (150x150) est plus grand que les éléments détectés. Maintenant : match sur le screenshot entier → centre du match → élément SomEngine le plus proche (max 100px). Fonctionne pour les icônes mais limité par la taille du crop (150x150 de barre de titre matche à plusieurs endroits). Co-Authored-By: Claude Opus 4.6 (1M context) --- agent_v0/server_v1/api_stream.py | 82 ++++++++++++++++---------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/agent_v0/server_v1/api_stream.py b/agent_v0/server_v1/api_stream.py index b0bf983bf..8c04bb02a 100644 --- a/agent_v0/server_v1/api_stream.py +++ b/agent_v0/server_v1/api_stream.py @@ -3547,51 +3547,51 @@ def _resolve_by_som( screenshot_cv = cv2.imread(screenshot_path, cv2.IMREAD_GRAYSCALE) if anc_img is not None and screenshot_cv is not None: - best_elem = None - best_score = 0.0 + # Template matching de l'anchor sur le SCREENSHOT ENTIER + # (pas sur les régions individuelles — l'anchor est souvent plus grand) anc_h, anc_w = anc_img.shape[:2] + if screenshot_cv.shape[0] >= anc_h and screenshot_cv.shape[1] >= anc_w: + res = cv2.matchTemplate(screenshot_cv, anc_img, cv2.TM_CCOEFF_NORMED) + _, max_score, _, max_loc = cv2.minMaxLoc(res) - for elem in som_result.elements: - x1, y1, x2, y2 = elem.bbox - # Agrandir la zone de 20% pour tolérer les différences - margin_x = int((x2 - x1) * 0.2) - margin_y = int((y2 - y1) * 0.2) - rx1 = max(0, x1 - margin_x) - ry1 = max(0, y1 - margin_y) - rx2 = min(screenshot_cv.shape[1], x2 + margin_x) - ry2 = min(screenshot_cv.shape[0], y2 + margin_y) - region = screenshot_cv[ry1:ry2, rx1:rx2] + if max_score >= 0.5: + # Centre du match + match_cx = max_loc[0] + anc_w // 2 + match_cy = max_loc[1] + anc_h // 2 - if region.shape[0] < anc_h or region.shape[1] < anc_w: - continue + # Trouver l'élément SomEngine le plus proche du centre du match + best_elem = None + best_dist = float("inf") + for elem in som_result.elements: + cx, cy = elem.center + dist = ((match_cx - cx) ** 2 + (match_cy - cy) ** 2) ** 0.5 + if dist < best_dist: + best_dist = dist + best_elem = elem - res = cv2.matchTemplate(region, anc_img, cv2.TM_CCOEFF_NORMED) - _, score, _, _ = cv2.minMaxLoc(res) - if score > best_score: - best_score = score - best_elem = elem - - if best_elem and best_score >= 0.6: - elapsed = time.time() - t0 - cx_norm, cy_norm = best_elem.center_norm - logger.info( - "SoM resolve ANCHOR : match crop '#%d' score=%.3f → (%.4f, %.4f) en %.1fs", - best_elem.id, best_score, cx_norm, cy_norm, elapsed, - ) - return { - "resolved": True, - "method": "som_anchor_match", - "x_pct": round(cx_norm, 6), - "y_pct": round(cy_norm, 6), - "matched_element": { - "label": best_elem.label or f"icon #{best_elem.id}", - "type": best_elem.source, - "role": "som_anchor_match", - "confidence": best_score, - "som_id": best_elem.id, - }, - "score": best_score, - } + if best_elem and best_dist < 100: # Max 100px de distance + elapsed = time.time() - t0 + cx_norm, cy_norm = best_elem.center_norm + logger.info( + "SoM resolve ANCHOR : match crop score=%.3f → " + "elem '#%d %s' (dist=%.0fpx) → (%.4f, %.4f) en %.1fs", + max_score, best_elem.id, best_elem.label, + best_dist, cx_norm, cy_norm, elapsed, + ) + return { + "resolved": True, + "method": "som_anchor_match", + "x_pct": round(cx_norm, 6), + "y_pct": round(cy_norm, 6), + "matched_element": { + "label": best_elem.label or f"icon #{best_elem.id}", + "type": best_elem.source, + "role": "som_anchor_match", + "confidence": max_score, + "som_id": best_elem.id, + }, + "score": max_score, + } except ImportError: pass except Exception as e: