Compare commits
3 Commits
ef5d595d98
...
4f5c518d3a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4f5c518d3a | ||
|
|
7dec3ab63a | ||
|
|
68d5bb7dd1 |
@@ -3532,7 +3532,8 @@ def _resolve_by_som(
|
|||||||
# Pour les icônes sans texte : comparer le crop de référence contre
|
# Pour les icônes sans texte : comparer le crop de référence contre
|
||||||
# chaque région YOLO détectée par SomEngine.
|
# chaque région YOLO détectée par SomEngine.
|
||||||
anchor_b64 = target_spec.get("anchor_image_base64", "")
|
anchor_b64 = target_spec.get("anchor_image_base64", "")
|
||||||
if anchor_b64 and not anchor_label:
|
by_text = target_spec.get("by_text", "").strip()
|
||||||
|
if anchor_b64 and (not anchor_label or not by_text):
|
||||||
try:
|
try:
|
||||||
import cv2
|
import cv2
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -3546,51 +3547,51 @@ def _resolve_by_som(
|
|||||||
screenshot_cv = cv2.imread(screenshot_path, cv2.IMREAD_GRAYSCALE)
|
screenshot_cv = cv2.imread(screenshot_path, cv2.IMREAD_GRAYSCALE)
|
||||||
|
|
||||||
if anc_img is not None and screenshot_cv is not None:
|
if anc_img is not None and screenshot_cv is not None:
|
||||||
best_elem = None
|
# Template matching de l'anchor sur le SCREENSHOT ENTIER
|
||||||
best_score = 0.0
|
# (pas sur les régions individuelles — l'anchor est souvent plus grand)
|
||||||
anc_h, anc_w = anc_img.shape[:2]
|
anc_h, anc_w = anc_img.shape[:2]
|
||||||
|
if screenshot_cv.shape[0] >= anc_h and screenshot_cv.shape[1] >= anc_w:
|
||||||
|
res = cv2.matchTemplate(screenshot_cv, anc_img, cv2.TM_CCOEFF_NORMED)
|
||||||
|
_, max_score, _, max_loc = cv2.minMaxLoc(res)
|
||||||
|
|
||||||
for elem in som_result.elements:
|
if max_score >= 0.5:
|
||||||
x1, y1, x2, y2 = elem.bbox
|
# Centre du match
|
||||||
# Agrandir la zone de 20% pour tolérer les différences
|
match_cx = max_loc[0] + anc_w // 2
|
||||||
margin_x = int((x2 - x1) * 0.2)
|
match_cy = max_loc[1] + anc_h // 2
|
||||||
margin_y = int((y2 - y1) * 0.2)
|
|
||||||
rx1 = max(0, x1 - margin_x)
|
|
||||||
ry1 = max(0, y1 - margin_y)
|
|
||||||
rx2 = min(screenshot_cv.shape[1], x2 + margin_x)
|
|
||||||
ry2 = min(screenshot_cv.shape[0], y2 + margin_y)
|
|
||||||
region = screenshot_cv[ry1:ry2, rx1:rx2]
|
|
||||||
|
|
||||||
if region.shape[0] < anc_h or region.shape[1] < anc_w:
|
# Trouver l'élément SomEngine le plus proche du centre du match
|
||||||
continue
|
best_elem = None
|
||||||
|
best_dist = float("inf")
|
||||||
|
for elem in som_result.elements:
|
||||||
|
cx, cy = elem.center
|
||||||
|
dist = ((match_cx - cx) ** 2 + (match_cy - cy) ** 2) ** 0.5
|
||||||
|
if dist < best_dist:
|
||||||
|
best_dist = dist
|
||||||
|
best_elem = elem
|
||||||
|
|
||||||
res = cv2.matchTemplate(region, anc_img, cv2.TM_CCOEFF_NORMED)
|
if best_elem and best_dist < 100: # Max 100px de distance
|
||||||
_, score, _, _ = cv2.minMaxLoc(res)
|
elapsed = time.time() - t0
|
||||||
if score > best_score:
|
cx_norm, cy_norm = best_elem.center_norm
|
||||||
best_score = score
|
logger.info(
|
||||||
best_elem = elem
|
"SoM resolve ANCHOR : match crop score=%.3f → "
|
||||||
|
"elem '#%d %s' (dist=%.0fpx) → (%.4f, %.4f) en %.1fs",
|
||||||
if best_elem and best_score >= 0.6:
|
max_score, best_elem.id, best_elem.label,
|
||||||
elapsed = time.time() - t0
|
best_dist, cx_norm, cy_norm, elapsed,
|
||||||
cx_norm, cy_norm = best_elem.center_norm
|
)
|
||||||
logger.info(
|
return {
|
||||||
"SoM resolve ANCHOR : match crop '#%d' score=%.3f → (%.4f, %.4f) en %.1fs",
|
"resolved": True,
|
||||||
best_elem.id, best_score, cx_norm, cy_norm, elapsed,
|
"method": "som_anchor_match",
|
||||||
)
|
"x_pct": round(cx_norm, 6),
|
||||||
return {
|
"y_pct": round(cy_norm, 6),
|
||||||
"resolved": True,
|
"matched_element": {
|
||||||
"method": "som_anchor_match",
|
"label": best_elem.label or f"icon #{best_elem.id}",
|
||||||
"x_pct": round(cx_norm, 6),
|
"type": best_elem.source,
|
||||||
"y_pct": round(cy_norm, 6),
|
"role": "som_anchor_match",
|
||||||
"matched_element": {
|
"confidence": max_score,
|
||||||
"label": best_elem.label or f"icon #{best_elem.id}",
|
"som_id": best_elem.id,
|
||||||
"type": best_elem.source,
|
},
|
||||||
"role": "som_anchor_match",
|
"score": max_score,
|
||||||
"confidence": best_score,
|
}
|
||||||
"som_id": best_elem.id,
|
|
||||||
},
|
|
||||||
"score": best_score,
|
|
||||||
}
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -502,14 +502,24 @@ def _vlm_identify_element(anchor_b64: str, window_title: str = "") -> str:
|
|||||||
if raw.lower().startswith(prefix.lower()):
|
if raw.lower().startswith(prefix.lower()):
|
||||||
raw = raw[len(prefix):]
|
raw = raw[len(prefix):]
|
||||||
break
|
break
|
||||||
|
# Rejeter les réponses qui sont du bavardage, pas un label
|
||||||
|
reject_patterns = (
|
||||||
|
"several", "multiple", "various", "image",
|
||||||
|
"I can", "there are", "there is", "elements",
|
||||||
|
"the following", "here are",
|
||||||
|
)
|
||||||
|
if any(p in raw.lower()[:30] for p in reject_patterns):
|
||||||
|
logger.debug("VLM identify : réponse bavarde rejetée (raw='%s')", raw[:60])
|
||||||
|
return ""
|
||||||
|
|
||||||
# Prendre les 5 premiers mots utiles
|
# Prendre les 5 premiers mots utiles
|
||||||
words = raw.split()[:5]
|
words = raw.split()[:5]
|
||||||
label = " ".join(words).strip('",.\' ').rstrip(".")
|
label = " ".join(words).strip('",.\' ').rstrip(".")
|
||||||
if label and 2 <= len(label) <= 60:
|
if label and 2 <= len(label) <= 40:
|
||||||
logger.info("VLM identify element : '%s'", label)
|
logger.info("VLM identify element : '%s'", label)
|
||||||
return label
|
return label
|
||||||
else:
|
else:
|
||||||
logger.debug("VLM identify : label trop court ou vide après nettoyage (raw='%s')", raw[:80])
|
logger.debug("VLM identify : label trop court/long après nettoyage (raw='%s')", raw[:80])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("VLM identify element échoué : %s", e)
|
logger.debug("VLM identify element échoué : %s", e)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user