diff --git a/visual_workflow_builder/backend/api_v3/capture.py b/visual_workflow_builder/backend/api_v3/capture.py index 0d28b1c6d..35a9fb5bd 100644 --- a/visual_workflow_builder/backend/api_v3/capture.py +++ b/visual_workflow_builder/backend/api_v3/capture.py @@ -199,11 +199,21 @@ def select_anchor(): thumbnail.save(thumbnail_path, 'PNG') # ── Analyse automatique du crop : OCR + VLM ──────────────────── + # Zone élargie autour de l'ancre pour capturer le texte à côté + margin = 50 + expanded = img.crop(( + max(0, x - margin), + max(0, y - margin), + min(img.width, x + w + margin * 3), + min(img.height, y + h + margin) + )) target_text = "" ocr_description = "" try: from services.ocr_service import ocr_extract_text - target_text = ocr_extract_text(thumbnail).strip() + target_text = ocr_extract_text(expanded).strip() + if not target_text: + target_text = ocr_extract_text(thumbnail).strip() print(f"🔍 [OCR] Texte extrait de l'ancre: '{target_text}'") # Si le texte OCR est trop court ou vide, décrire via VLM @@ -226,7 +236,7 @@ def select_anchor(): "stream": False, "options": {"temperature": 0.1, "num_predict": 15} }, - timeout=15 + timeout=60 ) if resp.status_code == 200: ocr_description = resp.json().get("response", "").strip()