feat(vwb): câblage 19 blocs, OCR réel, screenshots ancres, configs déploiement
Some checks failed
security-audit / Bandit (scan statique) (push) Successful in 13s
security-audit / pip-audit (CVE dépendances) (push) Successful in 11s
security-audit / Scan secrets (grep) (push) Successful in 8s
tests / Lint (ruff + black) (push) Successful in 13s
tests / Tests unitaires (sans GPU) (push) Failing after 14s
tests / Tests sécurité (critique) (push) Has been skipped
Some checks failed
security-audit / Bandit (scan statique) (push) Successful in 13s
security-audit / pip-audit (CVE dépendances) (push) Successful in 11s
security-audit / Scan secrets (grep) (push) Successful in 8s
tests / Lint (ruff + black) (push) Successful in 13s
tests / Tests unitaires (sans GPU) (push) Failing after 14s
tests / Tests sécurité (critique) (push) Has been skipped
Dispatch execute_action élargi de 12 à 19 blocs opérationnels : - 4 blocs souris (hover, drag_drop, scroll, focus) avec pyautogui - extract_text via Ollama VLM (remplace stub hardcodé) - 5 blocs ai_* redirigés vers execute_ai_analyze avec prompts adaptés - screenshot_evidence (capture + sauvegarde PNG) - verify_element_exists (détection visuelle CLIP) Import workflows Léa enrichi : - Bridge extrait anchor_image_base64 des edges - Import crée VisualAnchor en DB + fichiers thumbnail sur disque - PropertiesPanel affiche automatiquement les screenshots Frontend : - visual_condition et loop_visual masqués (hidden: true) - Filtre dans ToolPalette pour exclure les blocs cachés Déploiement : - 2 configs agent (TIM Pauline + Dev Windows) avec machine_id unique - 2 workflows démo dans la BDD (batch factures + extraction IA) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -388,7 +388,7 @@ def execute_ai_analyze(params: dict) -> dict:
|
||||
|
||||
try:
|
||||
prompt = params.get('analysis_prompt', params.get('prompt', ''))
|
||||
model = params.get('model', params.get('ollama_model', 'qwen3-vl:8b'))
|
||||
model = params.get('model', params.get('ollama_model', os.environ.get("RPA_VLM_MODEL", os.environ.get("VLM_MODEL", "gemma4:e4b"))))
|
||||
output_variable = params.get('output_variable', 'resultat_analyse')
|
||||
timeout_ms = params.get('timeout_ms', 120000) # 2 minutes par défaut
|
||||
temperature = params.get('temperature', 0.7) # Même défaut que CLI Ollama
|
||||
@@ -532,6 +532,125 @@ def execute_ai_analyze(params: dict) -> dict:
|
||||
return {'success': False, 'error': str(e)}
|
||||
|
||||
|
||||
def execute_extract_text(params: dict) -> dict:
|
||||
"""
|
||||
Extrait du texte depuis l'écran via Ollama VLM.
|
||||
Capture la zone de l'ancre (ou l'écran entier) et demande au VLM d'extraire le texte.
|
||||
"""
|
||||
import requests
|
||||
import re
|
||||
global _execution_state
|
||||
|
||||
try:
|
||||
anchor = params.get('visual_anchor', {})
|
||||
model = params.get('model', os.environ.get("RPA_VLM_MODEL", os.environ.get("VLM_MODEL", "gemma4:e4b")))
|
||||
output_variable = params.get('output_variable', 'texte_extrait')
|
||||
timeout_ms = params.get('timeout_ms', 60000)
|
||||
extraction_mode = params.get('extraction_mode', 'full')
|
||||
text_filters = params.get('text_filters', [])
|
||||
|
||||
screenshot_base64 = anchor.get('screenshot') if anchor else None
|
||||
|
||||
if not screenshot_base64:
|
||||
try:
|
||||
from PIL import ImageGrab
|
||||
import io
|
||||
|
||||
bbox = anchor.get('bounding_box', {}) if anchor else {}
|
||||
|
||||
if bbox:
|
||||
x, y = int(bbox.get('x', 0)), int(bbox.get('y', 0))
|
||||
w, h = int(bbox.get('width', 100)), int(bbox.get('height', 100))
|
||||
print(f"📸 [OCR] Capture zone: ({x}, {y}) -> ({x+w}, {y+h})")
|
||||
screenshot = ImageGrab.grab(bbox=(x, y, x + w, y + h))
|
||||
else:
|
||||
print(f"📸 [OCR] Capture écran complet")
|
||||
screenshot = ImageGrab.grab()
|
||||
|
||||
buffer = io.BytesIO()
|
||||
screenshot.save(buffer, format='PNG')
|
||||
screenshot_base64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
|
||||
except Exception as cap_err:
|
||||
return {'success': False, 'error': f"Erreur capture: {cap_err}"}
|
||||
|
||||
if not screenshot_base64:
|
||||
return {'success': False, 'error': "Pas d'image à analyser"}
|
||||
|
||||
prompt_map = {
|
||||
'full': "Extrais TOUT le texte visible dans cette image. Retourne uniquement le texte brut, sans commentaire.",
|
||||
'numbers': "Extrais uniquement les nombres et chiffres visibles dans cette image. Retourne-les séparés par des espaces.",
|
||||
'lines': "Extrais tout le texte visible ligne par ligne. Une ligne par ligne de texte visible.",
|
||||
'words': "Extrais tous les mots visibles dans cette image, séparés par des espaces.",
|
||||
}
|
||||
prompt = prompt_map.get(extraction_mode, prompt_map['full'])
|
||||
|
||||
if 'qwen' in model.lower() and not prompt.startswith('/no_think'):
|
||||
prompt = f"/no_think\n{prompt}"
|
||||
|
||||
print(f"📝 [OCR] Extraction texte avec {model} (mode: {extraction_mode})...")
|
||||
|
||||
ollama_url = params.get('ollama_url', 'http://localhost:11434')
|
||||
payload = {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"images": [screenshot_base64],
|
||||
"stream": False,
|
||||
"options": {"temperature": 0.1, "num_predict": 4000}
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
f"{ollama_url}/api/generate",
|
||||
json=payload,
|
||||
timeout=timeout_ms / 1000
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
return {'success': False, 'error': f"Erreur Ollama: {response.status_code}"}
|
||||
|
||||
result = response.json()
|
||||
extracted_text = result.get('response', '').strip()
|
||||
|
||||
if not extracted_text and result.get('thinking'):
|
||||
extracted_text = result.get('thinking', '').strip()
|
||||
|
||||
for f in text_filters:
|
||||
if f == 'digits_only':
|
||||
extracted_text = re.sub(r'[^\d\s]', '', extracted_text)
|
||||
elif f == 'letters_only':
|
||||
extracted_text = re.sub(r'[^a-zA-ZÀ-ÿ\s]', '', extracted_text)
|
||||
elif f == 'trim_whitespace':
|
||||
extracted_text = extracted_text.strip()
|
||||
elif f == 'uppercase':
|
||||
extracted_text = extracted_text.upper()
|
||||
elif f == 'lowercase':
|
||||
extracted_text = extracted_text.lower()
|
||||
|
||||
print(f"✅ [OCR] Texte extrait ({len(extracted_text)} caractères)")
|
||||
if extracted_text:
|
||||
print(f" Résultat: {extracted_text[:150]}...")
|
||||
|
||||
_execution_state['variables'][output_variable] = extracted_text
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'output': {
|
||||
'extracted_text': extracted_text,
|
||||
'variable': output_variable,
|
||||
'character_count': len(extracted_text),
|
||||
'word_count': len(extracted_text.split()) if extracted_text else 0,
|
||||
'mode': extraction_mode,
|
||||
'model': model
|
||||
}
|
||||
}
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
return {'success': False, 'error': f"Timeout Ollama après {timeout_ms}ms"}
|
||||
except requests.exceptions.ConnectionError:
|
||||
return {'success': False, 'error': "Ollama non accessible"}
|
||||
except Exception as e:
|
||||
return {'success': False, 'error': str(e)}
|
||||
|
||||
|
||||
def execute_action_with_coords(action_type: str, params: dict, coords: dict) -> dict:
|
||||
"""
|
||||
Exécute une action avec des coordonnées spécifiées par l'utilisateur (self-healing).
|
||||
@@ -792,6 +911,167 @@ def execute_action(action_type: str, params: dict) -> dict:
|
||||
# Analyse de texte avec IA (Ollama)
|
||||
return execute_ai_analyze(params)
|
||||
|
||||
elif action_type in ['hover_anchor', 'hover']:
|
||||
anchor = params.get('visual_anchor', {})
|
||||
bbox = anchor.get('bounding_box', {})
|
||||
if not bbox:
|
||||
return {'success': False, 'error': 'Pas de bounding_box dans visual_anchor'}
|
||||
|
||||
x = bbox.get('x', 0) + bbox.get('width', 0) / 2
|
||||
y = bbox.get('y', 0) + bbox.get('height', 0) / 2
|
||||
duration_ms = params.get('hover_duration_ms', params.get('duration_ms', 1000))
|
||||
|
||||
print(f"🖱️ [Action] Survol à ({x}, {y}) pendant {duration_ms}ms")
|
||||
pyautogui.moveTo(x, y, duration=0.3)
|
||||
time.sleep(duration_ms / 1000)
|
||||
return {'success': True, 'output': {'hovered_at': {'x': x, 'y': y}, 'duration_ms': duration_ms}}
|
||||
|
||||
elif action_type in ['drag_drop_anchor', 'drag_drop']:
|
||||
source_anchor = params.get('source_anchor', params.get('visual_anchor', {}))
|
||||
dest_anchor = params.get('destination_anchor', {})
|
||||
source_bbox = source_anchor.get('bounding_box', {})
|
||||
dest_bbox = dest_anchor.get('bounding_box', {})
|
||||
|
||||
if not source_bbox or not dest_bbox:
|
||||
return {'success': False, 'error': 'bounding_box source et destination requis'}
|
||||
|
||||
src_x = source_bbox.get('x', 0) + source_bbox.get('width', 0) / 2
|
||||
src_y = source_bbox.get('y', 0) + source_bbox.get('height', 0) / 2
|
||||
dst_x = dest_bbox.get('x', 0) + dest_bbox.get('width', 0) / 2
|
||||
dst_y = dest_bbox.get('y', 0) + dest_bbox.get('height', 0) / 2
|
||||
duration_ms = params.get('drag_duration_ms', 500)
|
||||
|
||||
print(f"🖱️ [Action] Glisser de ({src_x}, {src_y}) vers ({dst_x}, {dst_y})")
|
||||
pyautogui.moveTo(src_x, src_y, duration=0.2)
|
||||
time.sleep(0.1)
|
||||
pyautogui.drag(dst_x - src_x, dst_y - src_y, duration=duration_ms / 1000, button='left')
|
||||
return {'success': True, 'output': {'from': {'x': src_x, 'y': src_y}, 'to': {'x': dst_x, 'y': dst_y}}}
|
||||
|
||||
elif action_type in ['scroll_to_anchor', 'scroll']:
|
||||
direction = params.get('scroll_direction', 'down')
|
||||
amount = params.get('scroll_amount', params.get('scroll_step_pixels', 3))
|
||||
anchor = params.get('visual_anchor', {})
|
||||
bbox = anchor.get('bounding_box', {})
|
||||
|
||||
if bbox:
|
||||
x = bbox.get('x', 0) + bbox.get('width', 0) / 2
|
||||
y = bbox.get('y', 0) + bbox.get('height', 0) / 2
|
||||
pyautogui.moveTo(x, y, duration=0.1)
|
||||
|
||||
scroll_value = amount if direction in ['up', 'left'] else -amount
|
||||
|
||||
print(f"📜 [Action] Scroll {direction} ({amount})")
|
||||
if direction in ['left', 'right']:
|
||||
pyautogui.hscroll(scroll_value)
|
||||
else:
|
||||
pyautogui.scroll(scroll_value)
|
||||
|
||||
time.sleep(0.5)
|
||||
return {'success': True, 'output': {'direction': direction, 'amount': amount}}
|
||||
|
||||
elif action_type in ['focus_anchor', 'focus']:
|
||||
anchor = params.get('visual_anchor', {})
|
||||
bbox = anchor.get('bounding_box', {})
|
||||
if not bbox:
|
||||
return {'success': False, 'error': 'Pas de bounding_box dans visual_anchor'}
|
||||
|
||||
x = bbox.get('x', 0) + bbox.get('width', 0) / 2
|
||||
y = bbox.get('y', 0) + bbox.get('height', 0) / 2
|
||||
|
||||
print(f"🎯 [Action] Focus à ({x}, {y})")
|
||||
pyautogui.click(x, y)
|
||||
time.sleep(0.3)
|
||||
return {'success': True, 'output': {'focused_at': {'x': x, 'y': y}}}
|
||||
|
||||
elif action_type == 'extract_text':
|
||||
return execute_extract_text(params)
|
||||
|
||||
elif action_type == 'ai_ocr':
|
||||
params.setdefault('analysis_prompt', "Extrais TOUT le texte visible dans cette image. Retourne uniquement le texte brut, ligne par ligne, sans commentaire.")
|
||||
return execute_ai_analyze(params)
|
||||
|
||||
elif action_type == 'ai_summarize':
|
||||
params.setdefault('analysis_prompt', "Résume le contenu visible dans cette image en 3-5 phrases concises. Identifie les informations clés.")
|
||||
return execute_ai_analyze(params)
|
||||
|
||||
elif action_type == 'ai_extract':
|
||||
params.setdefault('analysis_prompt', "Extrais les données structurées visibles (noms, dates, montants, identifiants). Retourne un JSON structuré.")
|
||||
return execute_ai_analyze(params)
|
||||
|
||||
elif action_type == 'ai_classify':
|
||||
categories = params.get('categories', [])
|
||||
cats_str = ', '.join(categories) if categories else 'les catégories pertinentes'
|
||||
params.setdefault('analysis_prompt', f"Classe le contenu visible parmi : {cats_str}. Retourne la catégorie et un score de confiance.")
|
||||
return execute_ai_analyze(params)
|
||||
|
||||
elif action_type == 'ai_custom':
|
||||
system_prompt = params.get('system_prompt', '')
|
||||
if system_prompt and 'analysis_prompt' not in params:
|
||||
params['analysis_prompt'] = system_prompt
|
||||
return execute_ai_analyze(params)
|
||||
|
||||
elif action_type == 'screenshot_evidence':
|
||||
import pyautogui
|
||||
from PIL import Image
|
||||
from pathlib import Path
|
||||
import io
|
||||
|
||||
label = params.get('label', params.get('description', 'evidence'))
|
||||
output_variable = params.get('output_variable', 'screenshot_evidence')
|
||||
|
||||
screenshot = pyautogui.screenshot()
|
||||
|
||||
# Sauvegarder la preuve
|
||||
evidence_dir = Path('data/evidence')
|
||||
evidence_dir.mkdir(parents=True, exist_ok=True)
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
filepath = evidence_dir / f"evidence_{timestamp}_{label[:30]}.png"
|
||||
screenshot.save(str(filepath))
|
||||
|
||||
# Encoder en base64 pour la variable
|
||||
buffer = io.BytesIO()
|
||||
screenshot.save(buffer, format='PNG')
|
||||
screenshot_b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
|
||||
|
||||
_execution_state['variables'][output_variable] = screenshot_b64
|
||||
|
||||
print(f"📸 [Evidence] Capture sauvegardée: {filepath}")
|
||||
return {'success': True, 'output': {'filepath': str(filepath), 'variable': output_variable}}
|
||||
|
||||
elif action_type in ['verify_element_exists', 'verify_element']:
|
||||
anchor = params.get('visual_anchor', {})
|
||||
screenshot_base64 = anchor.get('screenshot')
|
||||
bbox = anchor.get('bounding_box', {})
|
||||
expected = params.get('expected', True)
|
||||
output_variable = params.get('output_variable', 'element_exists')
|
||||
|
||||
found = False
|
||||
confidence = 0.0
|
||||
|
||||
if screenshot_base64 and execution_mode in ['intelligent', 'debug']:
|
||||
try:
|
||||
from services.intelligent_executor import find_and_click
|
||||
result = find_and_click(
|
||||
anchor_image_base64=screenshot_base64,
|
||||
anchor_bbox=bbox,
|
||||
method='clip',
|
||||
detection_threshold=0.35
|
||||
)
|
||||
found = result.get('found', False)
|
||||
confidence = result.get('confidence', 0.0)
|
||||
except Exception as e:
|
||||
print(f"⚠️ [Verify] Erreur vision: {e}")
|
||||
elif bbox:
|
||||
found = True
|
||||
confidence = 0.5
|
||||
|
||||
match = (found == expected)
|
||||
_execution_state['variables'][output_variable] = found
|
||||
|
||||
status = "trouvé" if found else "absent"
|
||||
print(f"🔍 [Verify] Élément {status} (confiance: {confidence:.2f}, attendu: {expected})")
|
||||
return {'success': match, 'output': {'found': found, 'confidence': confidence, 'expected': expected, 'match': match}}
|
||||
|
||||
else:
|
||||
return {'success': False, 'error': f"Type d'action non supporté: {action_type}"}
|
||||
|
||||
|
||||
@@ -27,7 +27,7 @@ from flask import jsonify, request
|
||||
|
||||
from . import api_v3_bp
|
||||
from .workflow import generate_id
|
||||
from db.models import db, Workflow, Step
|
||||
from db.models import db, Workflow, Step, VisualAnchor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -303,7 +303,7 @@ def import_learned_workflow(workflow_id: str):
|
||||
|
||||
db.session.add(workflow)
|
||||
|
||||
# Créer les steps
|
||||
# Créer les steps (avec sauvegarde des screenshots d'ancres)
|
||||
for step_data in steps_list:
|
||||
step = Step(
|
||||
id=generate_id("step"),
|
||||
@@ -314,7 +314,57 @@ def import_learned_workflow(workflow_id: str):
|
||||
position_y=step_data.get("position_y", 200),
|
||||
label=step_data.get("label", step_data["action_type"]),
|
||||
)
|
||||
step.parameters = step_data.get("parameters", {})
|
||||
params = dict(step_data.get("parameters", {}))
|
||||
|
||||
# Extraire et sauvegarder le screenshot d'ancre si présent
|
||||
anchor_b64 = params.pop("_anchor_image_base64", None)
|
||||
params.pop("_anchor_bbox", None)
|
||||
if anchor_b64:
|
||||
try:
|
||||
from services.anchor_image_service import (
|
||||
save_anchor_image, generate_anchor_id
|
||||
)
|
||||
from PIL import Image
|
||||
from io import BytesIO
|
||||
import base64 as b64mod
|
||||
|
||||
if ',' in anchor_b64:
|
||||
anchor_b64 = anchor_b64.split(',', 1)[1]
|
||||
img_data = b64mod.b64decode(anchor_b64)
|
||||
img = Image.open(BytesIO(img_data))
|
||||
bbox = {
|
||||
"x": 0, "y": 0,
|
||||
"width": img.width, "height": img.height
|
||||
}
|
||||
anchor_id = generate_anchor_id()
|
||||
result = save_anchor_image(
|
||||
anchor_id=anchor_id,
|
||||
image_base64=anchor_b64,
|
||||
bounding_box=bbox,
|
||||
metadata={"source": "learned_import", "workflow_id": wf_id}
|
||||
)
|
||||
if result.get("success"):
|
||||
from services.anchor_image_service import (
|
||||
get_original_path, get_thumbnail_path
|
||||
)
|
||||
va = VisualAnchor(
|
||||
id=anchor_id,
|
||||
image_path=str(get_original_path(anchor_id) or ""),
|
||||
thumbnail_path=str(get_thumbnail_path(anchor_id) or ""),
|
||||
bbox_x=0, bbox_y=0,
|
||||
bbox_width=img.width, bbox_height=img.height,
|
||||
description=step_data.get("label", ""),
|
||||
capture_method="learned_import",
|
||||
)
|
||||
db.session.add(va)
|
||||
step.anchor_id = anchor_id
|
||||
logger.info("Ancre sauvegardée: %s pour step %s",
|
||||
anchor_id, step.id)
|
||||
except Exception as e:
|
||||
logger.warning("Échec sauvegarde ancre pour step %s: %s",
|
||||
step_data.get("order"), e)
|
||||
|
||||
step.parameters = params
|
||||
db.session.add(step)
|
||||
|
||||
db.session.commit()
|
||||
|
||||
Reference in New Issue
Block a user