fix(vision): Corriger les seuils CLIP/Template pour éviter les clics erronés

Problème résolu:
- Le workflow cliquait au mauvais endroit (200-500px de distance)
- Les seuils de matching étaient trop permissifs

Corrections apportées:
- CLIP: MAX_DISTANCE=120px, MIN_SCORE=0.55, MIN_COMBINED=0.5
- Template zonée: MAX_DISTANCE=150px
- Template global: MAX_DISTANCE=150px (était 500px)
- Ajout de logs détaillés pour debug des candidats rejetés
- Désactivation de l'overlay debug (polling intensif inutile)

Fichiers modifiés:
- intelligent_executor.py: Seuils stricts + logs
- execute.py: Logique d'exécution modes basic/intelligent/debug
- ui_detection_service.py: Backend UI-DETR-1
- App.tsx: Overlay désactivé
- ExecutionOverlay.tsx: URLs API corrigées

Documentation:
- docs/REFERENCE_VISION_RPA.md: Guide complet de référence

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Dom
2026-01-24 02:15:04 +01:00
parent d8d086dac5
commit f04f156144
6 changed files with 2088 additions and 156 deletions

View File

@@ -17,9 +17,14 @@ import StepNode from './components/StepNode';
import ToolPalette from './components/ToolPalette';
import PropertiesPanel from './components/PropertiesPanel';
import CapturePanel from './components/CapturePanel';
import WorkflowList from './components/WorkflowList';
import WorkflowSelector from './components/WorkflowSelector';
import WorkflowManagerModal from './components/WorkflowManagerModal';
import ExecutionControls from './components/ExecutionControls';
import ExecutionModeToggle from './components/ExecutionModeToggle';
import ExecutionOverlay from './components/ExecutionOverlay';
import VariableManager from './components/VariableManager';
import type { Variable } from './components/VariableManager';
import CaptureLibrary from './components/CaptureLibrary';
const nodeTypes: NodeTypes = {
step: StepNode,
@@ -32,6 +37,12 @@ function App() {
const [capture, setCapture] = useState<Capture | null>(null);
const [error, setError] = useState<string | null>(null);
const [executionMode, setExecutionMode] = useState<ExecutionMode>('basic');
const [showDebugOverlay, setShowDebugOverlay] = useState(false);
const [isExecutionRunning, setIsExecutionRunning] = useState(false);
const [detectionZone, setDetectionZone] = useState<{x: number; y: number; width: number; height: number} | null>(null);
const [variables, setVariables] = useState<Variable[]>([]);
const [showWorkflowManager, setShowWorkflowManager] = useState(false);
const [currentCapture, setCurrentCapture] = useState<Capture | null>(null);
// Charger l'état initial
const loadState = useCallback(async () => {
@@ -48,6 +59,31 @@ function App() {
loadState();
}, [loadState]);
// Polling du status d'exécution
useEffect(() => {
if (!isExecutionRunning) return;
const pollStatus = async () => {
try {
const status = await api.getExecutionStatus();
setIsExecutionRunning(status.is_running);
// Mettre à jour l'état si l'exécution est terminée
// Note: Ne PAS fermer l'overlay automatiquement pour permettre
// à l'utilisateur de voir les résultats de détection
if (!status.is_running) {
await loadState();
// L'overlay reste visible, l'utilisateur peut le fermer manuellement
}
} catch (err) {
console.error('Erreur polling status:', err);
}
};
const interval = setInterval(pollStatus, 500);
return () => clearInterval(interval);
}, [isExecutionRunning, loadState]);
// Convertir les étapes en nœuds React Flow
const updateNodesFromWorkflow = (steps: Step[]) => {
const newNodes: Node[] = steps.map((step, index) => ({
@@ -97,7 +133,6 @@ function App() {
};
const handleDeleteWorkflow = async (id: string) => {
if (!confirm('Supprimer ce workflow ?')) return;
try {
await api.deleteWorkflow(id);
await loadState();
@@ -106,6 +141,29 @@ function App() {
}
};
const handleRenameWorkflow = async (id: string, newName: string) => {
try {
await api.updateWorkflow(id, { name: newName });
await loadState();
} catch (err) {
setError((err as Error).message);
}
};
const handleUpdateWorkflowMeta = async (id: string, metadata: { description?: string; tags?: string[]; trigger_examples?: string[] }) => {
try {
// Convertir trigger_examples en triggerExamples pour l'API
const apiData: { description?: string; tags?: string[]; triggerExamples?: string[] } = {};
if (metadata.description !== undefined) apiData.description = metadata.description;
if (metadata.tags !== undefined) apiData.tags = metadata.tags;
if (metadata.trigger_examples !== undefined) apiData.triggerExamples = metadata.trigger_examples;
await api.updateWorkflow(id, apiData);
await loadState();
} catch (err) {
setError((err as Error).message);
}
};
const handleAddStep = async (actionType: ActionType, position?: { x: number; y: number }) => {
if (!appState?.session.active_workflow_id) {
setError('Sélectionnez un workflow d\'abord');
@@ -163,11 +221,17 @@ function App() {
try {
const result = await api.captureScreen();
setCapture(result.capture);
setCurrentCapture(result.capture);
} catch (err) {
setError((err as Error).message);
}
};
const handleSelectCaptureFromLibrary = (cap: Capture) => {
setCapture(cap);
setCurrentCapture(cap);
};
const handleSelectAnchor = async (bbox: { x: number; y: number; width: number; height: number }, screenshotBase64?: string) => {
if (!appState?.session.selected_step_id) {
setError('Sélectionnez une étape d\'abord');
@@ -183,7 +247,14 @@ function App() {
const handleStartExecution = async () => {
try {
await api.startExecution();
await api.startExecution(undefined, executionMode);
setIsExecutionRunning(true);
// Overlay désactivé - génère trop de requêtes et n'est pas utile
// if (executionMode === 'debug') {
// setShowDebugOverlay(true);
// }
await loadState();
} catch (err) {
setError((err as Error).message);
@@ -193,12 +264,31 @@ function App() {
const handleStopExecution = async () => {
try {
await api.stopExecution();
setIsExecutionRunning(false);
setShowDebugOverlay(false);
await loadState();
} catch (err) {
setError((err as Error).message);
}
};
// Gestion des variables
const handleVariableCreate = (data: Omit<Variable, 'id'>) => {
const newVariable: Variable = {
...data,
id: `var_${Date.now()}`,
};
setVariables(prev => [...prev, newVariable]);
};
const handleVariableUpdate = (id: string, data: Partial<Variable>) => {
setVariables(prev => prev.map(v => v.id === id ? { ...v, ...data } : v));
};
const handleVariableDelete = (id: string) => {
setVariables(prev => prev.filter(v => v.id !== id));
};
// Drop d'un outil sur le canvas
const onDrop = useCallback(
(event: React.DragEvent) => {
@@ -230,7 +320,15 @@ function App() {
<div className="app">
{/* Header */}
<header className="header">
<h1>VWB - Visual Workflow Builder</h1>
<h1>VWB</h1>
<WorkflowSelector
workflows={appState?.workflows_list || []}
activeWorkflow={appState?.workflow ? { id: appState.workflow.id, name: appState.workflow.name } : null}
onSelect={handleSelectWorkflow}
onCreate={handleCreateWorkflow}
onOpenManager={() => setShowWorkflowManager(true)}
onRename={handleRenameWorkflow}
/>
<ExecutionModeToggle
mode={executionMode}
onChange={setExecutionMode}
@@ -251,15 +349,8 @@ function App() {
)}
<div className="main-layout">
{/* Sidebar gauche: Workflows + Outils */}
{/* Sidebar gauche: Outils */}
<aside className="sidebar left">
<WorkflowList
workflows={appState?.workflows_list || []}
activeId={appState?.session.active_workflow_id || null}
onSelect={handleSelectWorkflow}
onCreate={handleCreateWorkflow}
onDelete={handleDeleteWorkflow}
/>
<ToolPalette />
</aside>
@@ -286,7 +377,7 @@ function App() {
)}
</main>
{/* Sidebar droite: Propriétés + Capture */}
{/* Sidebar droite: Propriétés + Capture + Variables */}
<aside className="sidebar right">
<PropertiesPanel
step={selectedStep || null}
@@ -299,6 +390,19 @@ function App() {
onSelectAnchor={handleSelectAnchor}
hasSelectedStep={!!appState?.session.selected_step_id}
executionMode={executionMode}
detectionZone={detectionZone}
onSetDetectionZone={setDetectionZone}
/>
<CaptureLibrary
currentCapture={currentCapture}
onSelectCapture={handleSelectCaptureFromLibrary}
onCapture={handleCapture}
/>
<VariableManager
variables={variables}
onVariableCreate={handleVariableCreate}
onVariableUpdate={handleVariableUpdate}
onVariableDelete={handleVariableDelete}
/>
</aside>
</div>
@@ -308,6 +412,27 @@ function App() {
<span>{EXECUTION_MODES[executionMode].icon}</span>
<span>Mode {EXECUTION_MODES[executionMode].label}</span>
</div>
{/* Overlay de debug en temps réel */}
<ExecutionOverlay
isVisible={showDebugOverlay}
isRunning={isExecutionRunning}
onClose={() => setShowDebugOverlay(false)}
initialDetectionZone={detectionZone}
/>
{/* Modal de gestion des workflows */}
{showWorkflowManager && (
<WorkflowManagerModal
workflows={appState?.workflows_list || []}
activeWorkflowId={appState?.session.active_workflow_id || null}
onSelect={handleSelectWorkflow}
onDelete={handleDeleteWorkflow}
onRename={handleRenameWorkflow}
onUpdateMetadata={handleUpdateWorkflowMeta}
onClose={() => setShowWorkflowManager(false)}
/>
)}
</div>
);
}

View File

@@ -0,0 +1,436 @@
/**
* Overlay de debug en temps réel pendant l'exécution
* Affiche la détection UI et les actions en cours
*/
import { useState, useEffect, useCallback } from 'react';
import type { UIElement, DetectionResult } from '../services/uiDetection';
import { detectUIElements } from '../services/uiDetection';
interface ExecutionEvent {
type: 'step_start' | 'detection' | 'click' | 'step_end' | 'error';
stepIndex: number;
stepType: string;
timestamp: number;
data?: {
elements?: UIElement[];
targetElement?: UIElement;
clickCoordinates?: { x: number; y: number };
confidence?: number;
method?: string;
error?: string;
};
}
interface DetectionZone {
x: number;
y: number;
width: number;
height: number;
}
interface Props {
isVisible: boolean;
isRunning: boolean;
onClose: () => void;
initialDetectionZone?: DetectionZone | null;
}
export default function ExecutionOverlay({ isVisible, isRunning, onClose, initialDetectionZone }: Props) {
const [screenshot, setScreenshot] = useState<string | null>(null);
const [elements, setElements] = useState<UIElement[]>([]);
const [targetElement, setTargetElement] = useState<UIElement | null>(null);
const [clickPoint, setClickPoint] = useState<{ x: number; y: number } | null>(null);
const [isDetecting, setIsDetecting] = useState(false);
const [lastEvent, setLastEvent] = useState<ExecutionEvent | null>(null);
const [confidence, setConfidence] = useState<number | null>(null);
const [imageSize, setImageSize] = useState({ width: 1920, height: 1080 });
const [detectionZone, setDetectionZone] = useState<DetectionZone | null>(initialDetectionZone || null);
const [isSelectingZone, setIsSelectingZone] = useState(false);
const [zoneStart, setZoneStart] = useState<{ x: number; y: number } | null>(null);
const [tempZone, setTempZone] = useState<DetectionZone | null>(null);
// Fonction pour cropper une image base64
const cropImage = useCallback(async (
imageBase64: string,
zone: DetectionZone
): Promise<string> => {
return new Promise((resolve) => {
const img = new Image();
img.onload = () => {
const canvas = document.createElement('canvas');
canvas.width = zone.width;
canvas.height = zone.height;
const ctx = canvas.getContext('2d');
if (ctx) {
ctx.drawImage(
img,
zone.x, zone.y, zone.width, zone.height,
0, 0, zone.width, zone.height
);
resolve(canvas.toDataURL('image/png'));
} else {
resolve(imageBase64);
}
};
img.src = imageBase64;
});
}, []);
// Capturer l'écran et détecter les éléments
const captureAndDetect = useCallback(async () => {
// Permettre la capture même si l'exécution est terminée (pour voir l'écran final)
if (isDetecting) return;
setIsDetecting(true);
try {
// Appeler l'API de capture sur le backend (port 5001)
const API_BASE = 'http://localhost:5001';
const response = await fetch(`${API_BASE}/api/v3/capture/screen`, { method: 'POST' });
const data = await response.json();
if (data.success && data.capture) {
const screenshotBase64 = `data:image/png;base64,${data.capture.screenshot_base64}`;
setScreenshot(screenshotBase64);
setImageSize({
width: data.capture.width,
height: data.capture.height
});
// Si une zone de détection est définie, cropper l'image
let imageToDetect = screenshotBase64;
let offsetX = 0;
let offsetY = 0;
if (detectionZone) {
imageToDetect = await cropImage(screenshotBase64, detectionZone);
offsetX = detectionZone.x;
offsetY = detectionZone.y;
}
// Détecter les éléments
const detectionResult = await detectUIElements(imageToDetect, {
threshold: 0.30 // Seuil plus bas pour les petits éléments
});
// Ajuster les coordonnées si on a croppé
const adjustedElements = detectionResult.elements.map(elem => ({
...elem,
bbox: {
x1: elem.bbox.x1 + offsetX,
y1: elem.bbox.y1 + offsetY,
x2: elem.bbox.x2 + offsetX,
y2: elem.bbox.y2 + offsetY,
},
center: {
x: elem.center.x + offsetX,
y: elem.center.y + offsetY,
}
}));
setElements(adjustedElements);
}
} catch (err) {
console.error('Erreur capture/détection:', err);
} finally {
setIsDetecting(false);
}
}, [isDetecting, detectionZone, cropImage]);
// Polling pour mise à jour pendant l'exécution
useEffect(() => {
if (!isVisible) return;
// Capture initiale (même si l'exécution n'est pas en cours, pour voir l'écran actuel)
captureAndDetect();
// Polling toutes les 500ms seulement si l'exécution est en cours
if (isRunning) {
const interval = setInterval(captureAndDetect, 500);
return () => clearInterval(interval);
}
}, [isVisible, isRunning, captureAndDetect]);
// Polling du status d'exécution pour les événements
useEffect(() => {
if (!isVisible || !isRunning) return;
const pollStatus = async () => {
try {
const API_BASE = 'http://localhost:5001';
const response = await fetch(`${API_BASE}/api/v3/execute/status`);
const data = await response.json();
if (data.success && data.execution) {
// Simuler un événement basé sur le status
const event: ExecutionEvent = {
type: 'step_start',
stepIndex: data.execution.current_step_index || 0,
stepType: 'click',
timestamp: Date.now()
};
setLastEvent(event);
}
} catch (err) {
console.error('Erreur polling status:', err);
}
};
const interval = setInterval(pollStatus, 200);
return () => clearInterval(interval);
}, [isVisible, isRunning]);
// Handlers pour la sélection de zone
const handleMouseDown = (e: React.MouseEvent) => {
if (!isSelectingZone) return;
const rect = e.currentTarget.getBoundingClientRect();
const x = (e.clientX - rect.left) / scale;
const y = (e.clientY - rect.top) / scale;
setZoneStart({ x, y });
setTempZone({ x, y, width: 0, height: 0 });
};
const handleMouseMove = (e: React.MouseEvent) => {
if (!isSelectingZone || !zoneStart) return;
const rect = e.currentTarget.getBoundingClientRect();
const currentX = (e.clientX - rect.left) / scale;
const currentY = (e.clientY - rect.top) / scale;
const width = currentX - zoneStart.x;
const height = currentY - zoneStart.y;
setTempZone({
x: width < 0 ? currentX : zoneStart.x,
y: height < 0 ? currentY : zoneStart.y,
width: Math.abs(width),
height: Math.abs(height)
});
};
const handleMouseUp = () => {
if (!isSelectingZone || !tempZone) return;
if (tempZone.width > 50 && tempZone.height > 50) {
setDetectionZone({
x: Math.round(tempZone.x),
y: Math.round(tempZone.y),
width: Math.round(tempZone.width),
height: Math.round(tempZone.height)
});
}
setIsSelectingZone(false);
setZoneStart(null);
setTempZone(null);
};
const clearDetectionZone = () => {
setDetectionZone(null);
setElements([]);
};
// Simuler la mise en surbrillance de l'élément cible (pour démo)
const handleElementHover = (elem: UIElement) => {
setTargetElement(elem);
setClickPoint({
x: elem.center.x,
y: elem.center.y
});
setConfidence(elem.confidence);
};
// Initialiser la zone de détection depuis les props
useEffect(() => {
if (initialDetectionZone) {
setDetectionZone(initialDetectionZone);
}
}, [initialDetectionZone]);
// Réinitialiser quand l'exécution s'arrête
useEffect(() => {
if (!isRunning) {
setTargetElement(null);
setClickPoint(null);
setConfidence(null);
}
}, [isRunning]);
// Raccourci Échap pour fermer
useEffect(() => {
if (!isVisible) return;
const handleKeyDown = (e: KeyboardEvent) => {
if (e.key === 'Escape') {
onClose();
}
};
document.addEventListener('keydown', handleKeyDown);
return () => document.removeEventListener('keydown', handleKeyDown);
}, [isVisible, onClose]);
// Calculer le scale pour l'affichage (défini avant les handlers qui l'utilisent)
const displayWidth = Math.min(window.innerWidth * 0.9, 1400);
const scale = displayWidth / imageSize.width;
const displayHeight = imageSize.height * scale;
if (!isVisible) return null;
return (
<div className="execution-overlay-modal">
<div className="execution-overlay-header">
<div className="header-left">
<span className="status-indicator running" />
<span className="status-text">
{isRunning ? 'Exécution en cours' : 'En pause'}
</span>
{lastEvent && (
<span className="step-info">
Étape {lastEvent.stepIndex + 1}
</span>
)}
</div>
<div className="header-center">
<button
className={`zone-btn ${isSelectingZone ? 'active' : ''}`}
onClick={() => setIsSelectingZone(!isSelectingZone)}
>
{isSelectingZone ? '✋ Annuler' : '✂️ Sélectionner zone'}
</button>
{detectionZone && (
<button className="zone-btn clear" onClick={clearDetectionZone}>
Effacer zone
</button>
)}
<span className="detection-count">
{elements.length} éléments détectés
{detectionZone && ' (zone)'}
</span>
{confidence !== null && (
<span className="confidence-badge">
Confiance: {(confidence * 100).toFixed(0)}%
</span>
)}
</div>
<div className="header-right">
<button onClick={onClose}>Fermer (Échap)</button>
</div>
</div>
<div className="execution-overlay-content">
{screenshot ? (
<div
className={`screen-container ${isSelectingZone ? 'selecting' : ''}`}
style={{
width: displayWidth,
height: displayHeight,
position: 'relative',
cursor: isSelectingZone ? 'crosshair' : 'default'
}}
onMouseDown={handleMouseDown}
onMouseMove={handleMouseMove}
onMouseUp={handleMouseUp}
onMouseLeave={handleMouseUp}
>
<img
src={screenshot}
alt="Écran en temps réel"
style={{ width: '100%', height: '100%', display: 'block', pointerEvents: 'none' }}
/>
{/* Zone de détection définie */}
{detectionZone && (
<div
className="detection-zone"
style={{
position: 'absolute',
left: detectionZone.x * scale,
top: detectionZone.y * scale,
width: detectionZone.width * scale,
height: detectionZone.height * scale,
}}
/>
)}
{/* Zone en cours de sélection */}
{tempZone && tempZone.width > 0 && (
<div
className="detection-zone temp"
style={{
position: 'absolute',
left: tempZone.x * scale,
top: tempZone.y * scale,
width: tempZone.width * scale,
height: tempZone.height * scale,
}}
/>
)}
{/* Éléments détectés */}
{!isSelectingZone && elements.map((elem) => {
const isTarget = targetElement?.id === elem.id;
return (
<div
key={elem.id}
className={`overlay-bbox ${isTarget ? 'target' : ''}`}
style={{
position: 'absolute',
left: elem.bbox.x1 * scale,
top: elem.bbox.y1 * scale,
width: (elem.bbox.x2 - elem.bbox.x1) * scale,
height: (elem.bbox.y2 - elem.bbox.y1) * scale,
}}
onMouseEnter={() => handleElementHover(elem)}
onMouseLeave={() => {
if (!isRunning) {
setTargetElement(null);
setClickPoint(null);
}
}}
>
<span className="bbox-id">{elem.id}</span>
</div>
);
})}
{/* Point de clic animé */}
{clickPoint && (
<div
className="click-indicator"
style={{
position: 'absolute',
left: clickPoint.x * scale - 20,
top: clickPoint.y * scale - 20,
}}
>
<div className="click-ring" />
<div className="click-center" />
</div>
)}
{/* Indicateur de chargement */}
{isDetecting && (
<div className="detecting-indicator">
<span>Détection...</span>
</div>
)}
</div>
) : (
<div className="loading-screen">
<span>Capture de l'écran...</span>
</div>
)}
</div>
{/* Barre d'info en bas */}
<div className="execution-overlay-footer">
<span>Mode Debug - Vision AI activée</span>
<span>UI-DETR-1 | Template Matching</span>
<span>Survolez un élément pour voir le point de clic</span>
</div>
</div>
);
}