fix: prompt natif bbox_2d pour le grounding Qwen2.5-VL
Le prompt JSON ("Answer ONLY: {x, y}") ne fonctionne plus — retourne
[0.0, 0.0] systématiquement. Le prompt natif "Detect X with a bounding
box" retourne des bbox_2d précis. C'est le format pour lequel
Qwen2.5-VL est entraîné.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -4565,12 +4565,8 @@ def _resolve_by_grounding(
|
||||
logger.warning("Grounding : erreur redimensionnement — %s", e)
|
||||
return None
|
||||
|
||||
# Construire le prompt — format JSON universel (fonctionne avec gemma4, qwen2.5vl, qwen3)
|
||||
prompt = (
|
||||
f"Look at this screenshot. Find: {description}\n"
|
||||
"Where is it? Give the center position as percentage of the image.\n"
|
||||
'Answer ONLY with JSON: {"x": 0.XX, "y": 0.YY}'
|
||||
)
|
||||
# Prompt natif Qwen2.5-VL — format bbox_2d (le seul fiable)
|
||||
prompt = f"Detect '{description}' in this image with a bounding box."
|
||||
|
||||
# Le grounding nécessite un modèle entraîné pour les coordonnées (bbox_2d).
|
||||
# Qwen2.5-VL est le seul qui retourne des positions précises.
|
||||
@@ -4610,18 +4606,16 @@ def _resolve_by_grounding(
|
||||
except Exception as e:
|
||||
logger.debug("vLLM non disponible (%s), fallback Ollama", e)
|
||||
|
||||
# Essai 2 : Ollama (qwen2.5vl:7b pour le grounding)
|
||||
# Essai 2 : Ollama (qwen2.5vl:7b pour le grounding — format bbox_2d natif)
|
||||
if not content:
|
||||
try:
|
||||
resp = _requests.post("http://localhost:11434/api/chat", json={
|
||||
"model": _grounding_model,
|
||||
"messages": [
|
||||
{"role": "system", "content": "You locate UI elements on screenshots. Return coordinates as JSON."},
|
||||
{"role": "user", "content": prompt, "images": [shot_b64]},
|
||||
],
|
||||
"stream": False,
|
||||
"think": False,
|
||||
"options": {"temperature": 0.1, "num_predict": 200},
|
||||
"options": {"temperature": 0.1, "num_predict": 100},
|
||||
}, timeout=60)
|
||||
content = resp.json().get("message", {}).get("content", "")
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user