|
|
|
|
@@ -1040,16 +1040,27 @@ def _load_anchor_image_b64(anchor_id: str) -> Optional[str]:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _first_non_empty(*values: Any) -> str:
|
|
|
|
|
for value in values:
|
|
|
|
|
text = str(value or "").strip()
|
|
|
|
|
if text and text.casefold() not in {"none", "null"}:
|
|
|
|
|
return text
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _load_anchor_metadata(anchor_id: str) -> Optional[Dict]:
|
|
|
|
|
"""Charger les métadonnées d'une ancre (bounding_box, taille, etc.)."""
|
|
|
|
|
backend_dir = Path(__file__).resolve().parent.parent
|
|
|
|
|
metadata: Dict[str, Any] = {}
|
|
|
|
|
|
|
|
|
|
# 1. Ancien format : metadata.json
|
|
|
|
|
meta_path = backend_dir / 'data' / 'anchor_images' / anchor_id / 'metadata.json'
|
|
|
|
|
if meta_path.exists():
|
|
|
|
|
try:
|
|
|
|
|
with open(meta_path, 'r', encoding='utf-8') as f:
|
|
|
|
|
return json.load(f)
|
|
|
|
|
loaded = json.load(f)
|
|
|
|
|
if isinstance(loaded, dict):
|
|
|
|
|
metadata.update(loaded)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
@@ -1059,19 +1070,55 @@ def _load_anchor_metadata(anchor_id: str) -> Optional[Dict]:
|
|
|
|
|
db_path = backend_dir / 'instance' / 'workflows.db'
|
|
|
|
|
conn = sqlite3.connect(str(db_path))
|
|
|
|
|
row = conn.execute(
|
|
|
|
|
"SELECT bbox_x, bbox_y, bbox_width, bbox_height, screen_width, screen_height "
|
|
|
|
|
"SELECT bbox_x, bbox_y, bbox_width, bbox_height, screen_width, screen_height, "
|
|
|
|
|
"target_text, ocr_description, description "
|
|
|
|
|
"FROM visual_anchors WHERE id=?", (anchor_id,)
|
|
|
|
|
).fetchone()
|
|
|
|
|
conn.close()
|
|
|
|
|
if row:
|
|
|
|
|
return {
|
|
|
|
|
db_metadata = {
|
|
|
|
|
'bounding_box': {'x': row[0], 'y': row[1], 'width': row[2], 'height': row[3]},
|
|
|
|
|
'original_size': {'width': row[4] or 1920, 'height': row[5] or 1080},
|
|
|
|
|
'target_text': row[6] or '',
|
|
|
|
|
'ocr_description': row[7] or '',
|
|
|
|
|
'description': row[8] or '',
|
|
|
|
|
}
|
|
|
|
|
metadata.setdefault('bounding_box', db_metadata['bounding_box'])
|
|
|
|
|
metadata.setdefault('original_size', db_metadata['original_size'])
|
|
|
|
|
for key in ('target_text', 'ocr_description', 'description'):
|
|
|
|
|
value = _first_non_empty(metadata.get(key), db_metadata.get(key))
|
|
|
|
|
if value:
|
|
|
|
|
metadata[key] = value
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
return metadata or None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _anchor_semantic_target_spec(anchor_id: str, anchor_meta: Optional[Dict]) -> Dict[str, Any]:
|
|
|
|
|
"""Construire les libellés humains stables d'une ancre visuelle."""
|
|
|
|
|
if not anchor_id:
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
anchor_meta = anchor_meta or {}
|
|
|
|
|
target_text = _first_non_empty(anchor_meta.get('target_text'))
|
|
|
|
|
ocr_description = _first_non_empty(anchor_meta.get('ocr_description'))
|
|
|
|
|
description = _first_non_empty(anchor_meta.get('description'))
|
|
|
|
|
|
|
|
|
|
target_spec: Dict[str, Any] = {'anchor_id': anchor_id}
|
|
|
|
|
if target_text:
|
|
|
|
|
target_spec['target_text'] = target_text
|
|
|
|
|
target_spec['by_text'] = target_text
|
|
|
|
|
target_spec['by_text_source'] = 'visual_anchor'
|
|
|
|
|
if ocr_description:
|
|
|
|
|
target_spec['ocr_description'] = ocr_description
|
|
|
|
|
if description:
|
|
|
|
|
target_spec['description'] = description
|
|
|
|
|
target_spec['vlm_description'] = description
|
|
|
|
|
elif ocr_description:
|
|
|
|
|
target_spec['vlm_description'] = ocr_description
|
|
|
|
|
|
|
|
|
|
return target_spec
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _inject_anchor_targeting(action: Dict, anchor_id: str) -> None:
|
|
|
|
|
@@ -1101,19 +1148,17 @@ def _inject_anchor_targeting(action: Dict, anchor_id: str) -> None:
|
|
|
|
|
action['x_pct'] = round(cx, 4)
|
|
|
|
|
action['y_pct'] = round(cy, 4)
|
|
|
|
|
|
|
|
|
|
target_spec = _anchor_semantic_target_spec(anchor_id, anchor_meta)
|
|
|
|
|
|
|
|
|
|
# Image de l'ancre pour template matching côté agent
|
|
|
|
|
anchor_b64 = _load_anchor_image_b64(anchor_id)
|
|
|
|
|
if anchor_b64:
|
|
|
|
|
target_spec = {
|
|
|
|
|
'anchor_image_base64': anchor_b64,
|
|
|
|
|
'anchor_id': anchor_id,
|
|
|
|
|
}
|
|
|
|
|
target_spec['anchor_image_base64'] = anchor_b64
|
|
|
|
|
if anchor_meta:
|
|
|
|
|
target_spec['anchor_bbox'] = anchor_meta.get('bounding_box', {})
|
|
|
|
|
target_spec['original_size'] = anchor_meta.get('original_size', {})
|
|
|
|
|
|
|
|
|
|
action['visual_mode'] = True
|
|
|
|
|
action['target_spec'] = target_spec
|
|
|
|
|
logger.info(
|
|
|
|
|
"Action %s : ancre '%s' chargée (%d Ko), visual_mode activé",
|
|
|
|
|
action.get('action_id', '?'),
|
|
|
|
|
@@ -1127,6 +1172,23 @@ def _inject_anchor_targeting(action: Dict, anchor_id: str) -> None:
|
|
|
|
|
anchor_id,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if target_spec:
|
|
|
|
|
action['target_spec'] = target_spec
|
|
|
|
|
if target_spec.get('by_text') or target_spec.get('vlm_description'):
|
|
|
|
|
action['visual_mode'] = True
|
|
|
|
|
|
|
|
|
|
label = _first_non_empty(
|
|
|
|
|
target_spec.get('by_text'),
|
|
|
|
|
target_spec.get('target_text'),
|
|
|
|
|
target_spec.get('description'),
|
|
|
|
|
target_spec.get('ocr_description'),
|
|
|
|
|
target_spec.get('vlm_description'),
|
|
|
|
|
)
|
|
|
|
|
if label:
|
|
|
|
|
action.setdefault('target_text', target_spec.get('target_text') or label)
|
|
|
|
|
action.setdefault('target_description', label)
|
|
|
|
|
action.setdefault('description', target_spec.get('description') or label)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@api_v3_bp.route('/execute-windows', methods=['POST'])
|
|
|
|
|
def execute_windows():
|
|
|
|
|
|