feat(vwb): Intégration UI-DETR-1 + Toggle mode Basique/Intelligent/Debug

- Toggle 3 modes dans le header: Basique (coords fixes), Intelligent (vision IA), Debug (overlay)
- Service UI-DETR-1 pour détection d'éléments UI (510MB model, ~800ms/image)
- API endpoints: /api/ui-detection/detect, /preload, /status, /find-element
- Overlay des bboxes détectées en mode Debug (miniature + plein écran)
- Clic sur élément détecté pour le sélectionner comme ancre
- Document de vision produit: docs/VISION_RPA_INTELLIGENT.md
- Configuration CORS étendue pour ports locaux

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Dom
2026-01-23 14:13:32 +01:00
parent 483653a0b4
commit d8d086dac5
11 changed files with 1456 additions and 19 deletions

View File

@@ -11,14 +11,15 @@ import type { Node, Edge, NodeTypes } from '@xyflow/react';
import '@xyflow/react/dist/style.css';
import * as api from './services/api';
import type { AppState, Step, ActionType, Capture } from './types';
import { ACTIONS } from './types';
import type { AppState, Step, ActionType, Capture, ExecutionMode } from './types';
import { ACTIONS, EXECUTION_MODES } from './types';
import StepNode from './components/StepNode';
import ToolPalette from './components/ToolPalette';
import PropertiesPanel from './components/PropertiesPanel';
import CapturePanel from './components/CapturePanel';
import WorkflowList from './components/WorkflowList';
import ExecutionControls from './components/ExecutionControls';
import ExecutionModeToggle from './components/ExecutionModeToggle';
const nodeTypes: NodeTypes = {
step: StepNode,
@@ -30,6 +31,7 @@ function App() {
const [edges, setEdges, onEdgesChange] = useEdgesState<Edge>([]);
const [capture, setCapture] = useState<Capture | null>(null);
const [error, setError] = useState<string | null>(null);
const [executionMode, setExecutionMode] = useState<ExecutionMode>('basic');
// Charger l'état initial
const loadState = useCallback(async () => {
@@ -229,6 +231,10 @@ function App() {
{/* Header */}
<header className="header">
<h1>VWB - Visual Workflow Builder</h1>
<ExecutionModeToggle
mode={executionMode}
onChange={setExecutionMode}
/>
<ExecutionControls
execution={appState?.execution || null}
onStart={handleStartExecution}
@@ -292,9 +298,16 @@ function App() {
onCapture={handleCapture}
onSelectAnchor={handleSelectAnchor}
hasSelectedStep={!!appState?.session.selected_step_id}
executionMode={executionMode}
/>
</aside>
</div>
{/* Indicateur de mode flottant */}
<div className={`mode-indicator ${executionMode}`}>
<span>{EXECUTION_MODES[executionMode].icon}</span>
<span>Mode {EXECUTION_MODES[executionMode].label}</span>
</div>
</div>
);
}

View File

@@ -1,11 +1,15 @@
import { useState, useRef, useEffect } from 'react';
import type { Capture } from '../types';
import type { Capture, ExecutionMode } from '../types';
import DetectionOverlay from './DetectionOverlay';
import type { UIElement, DetectionResult } from '../services/uiDetection';
interface Props {
capture: Capture | null;
onCapture: () => void;
onSelectAnchor: (bbox: { x: number; y: number; width: number; height: number }, screenshotBase64?: string) => void;
hasSelectedStep: boolean;
executionMode?: ExecutionMode;
onDetectionComplete?: (result: DetectionResult) => void;
}
interface LibraryItem {
@@ -14,12 +18,42 @@ interface LibraryItem {
timestamp: Date;
}
export default function CapturePanel({ capture, onCapture, onSelectAnchor, hasSelectedStep }: Props) {
export default function CapturePanel({
capture,
onCapture,
onSelectAnchor,
hasSelectedStep,
executionMode = 'basic',
onDetectionComplete
}: Props) {
const [isFullscreen, setIsFullscreen] = useState(false);
const [library, setLibrary] = useState<LibraryItem[]>([]);
const [currentCapture, setCurrentCapture] = useState<Capture | null>(null);
const [timerSeconds, setTimerSeconds] = useState(0);
const [countdown, setCountdown] = useState<number | null>(null);
const [lastDetection, setLastDetection] = useState<DetectionResult | null>(null);
const isDebugMode = executionMode === 'debug';
const handleDetectionComplete = (result: DetectionResult) => {
setLastDetection(result);
if (onDetectionComplete) {
onDetectionComplete(result);
}
};
const handleElementClick = (element: UIElement) => {
// En mode debug, cliquer sur un élément détecté le sélectionne comme ancre
if (hasSelectedStep && currentCapture) {
const bbox = {
x: element.bbox.x1,
y: element.bbox.y1,
width: element.bbox.x2 - element.bbox.x1,
height: element.bbox.y2 - element.bbox.y1,
};
onSelectAnchor(bbox, currentCapture.screenshot_base64);
}
};
// Charger la bibliothèque depuis sessionStorage
useEffect(() => {
@@ -99,13 +133,26 @@ export default function CapturePanel({ capture, onCapture, onSelectAnchor, hasSe
{/* Aperçu de la capture */}
{currentCapture && (
<div className="capture-preview">
<img
src={`data:image/png;base64,${currentCapture.screenshot_base64}`}
alt="Capture"
onClick={() => setIsFullscreen(true)}
/>
{isDebugMode ? (
<DetectionOverlay
imageBase64={`data:image/png;base64,${currentCapture.screenshot_base64}`}
enabled={true}
threshold={0.35}
onDetectionComplete={handleDetectionComplete}
onElementClick={handleElementClick}
/>
) : (
<img
src={`data:image/png;base64,${currentCapture.screenshot_base64}`}
alt="Capture"
onClick={() => setIsFullscreen(true)}
/>
)}
<p className="capture-info">
{currentCapture.width}x{currentCapture.height}
{isDebugMode && lastDetection && (
<span className="detection-summary"> | {lastDetection.count} éléments détectés</span>
)}
<button onClick={() => setIsFullscreen(true)}>Plein écran</button>
</p>
</div>
@@ -147,6 +194,7 @@ export default function CapturePanel({ capture, onCapture, onSelectAnchor, hasSe
setIsFullscreen(false);
}}
enabled={hasSelectedStep}
debugMode={isDebugMode}
/>
)}
</div>
@@ -158,18 +206,68 @@ function FullscreenSelector({
capture,
onClose,
onSelect,
enabled
enabled,
debugMode = false
}: {
capture: Capture;
onClose: () => void;
onSelect: (bbox: { x: number; y: number; width: number; height: number }) => void;
enabled: boolean;
debugMode?: boolean;
}) {
const imgRef = useRef<HTMLImageElement>(null);
const overlayRef = useRef<HTMLDivElement>(null);
const [isSelecting, setIsSelecting] = useState(false);
const [startPos, setStartPos] = useState({ x: 0, y: 0 });
const [selection, setSelection] = useState({ x: 0, y: 0, width: 0, height: 0 });
const [detectedElements, setDetectedElements] = useState<UIElement[]>([]);
const [isDetecting, setIsDetecting] = useState(false);
const [imageScale, setImageScale] = useState({ x: 1, y: 1 });
// Lancer la détection en mode Debug
useEffect(() => {
if (!debugMode) return;
const runDetection = async () => {
setIsDetecting(true);
try {
const { detectUIElements } = await import('../services/uiDetection');
const result = await detectUIElements(
`data:image/png;base64,${capture.screenshot_base64}`,
{ threshold: 0.35 }
);
setDetectedElements(result.elements);
} catch (err) {
console.error('Erreur détection:', err);
} finally {
setIsDetecting(false);
}
};
runDetection();
}, [debugMode, capture.screenshot_base64]);
// Calculer le scale quand l'image est chargée
const handleImageLoad = () => {
if (imgRef.current) {
setImageScale({
x: imgRef.current.width / imgRef.current.naturalWidth,
y: imgRef.current.height / imgRef.current.naturalHeight
});
}
};
// Cliquer sur un élément détecté
const handleElementClick = (elem: UIElement) => {
if (!enabled) return;
const bbox = {
x: elem.bbox.x1,
y: elem.bbox.y1,
width: elem.bbox.x2 - elem.bbox.x1,
height: elem.bbox.y2 - elem.bbox.y1,
};
onSelect(bbox);
};
useEffect(() => {
const handleKeyDown = (e: KeyboardEvent) => {
@@ -232,7 +330,11 @@ function FullscreenSelector({
return (
<div className="fullscreen-modal">
<div className="fullscreen-header">
<span>{enabled ? 'Dessinez un rectangle pour sélectionner l\'ancre' : 'Sélectionnez d\'abord une étape'}</span>
<span>
{debugMode && isDetecting && '🔍 Détection en cours... '}
{debugMode && !isDetecting && `🎯 ${detectedElements.length} éléments détectés - `}
{enabled ? 'Dessinez un rectangle ou cliquez sur un élément détecté' : 'Sélectionnez d\'abord une étape'}
</span>
<button onClick={onClose}>Fermer (Échap)</button>
</div>
<div
@@ -241,12 +343,55 @@ function FullscreenSelector({
onMouseMove={handleMouseMove}
onMouseUp={handleMouseUp}
>
<img
ref={imgRef}
src={`data:image/png;base64,${capture.screenshot_base64}`}
alt="Capture plein écran"
draggable={false}
/>
{/* Conteneur relatif pour positionner les bboxes par rapport à l'image */}
<div style={{ position: 'relative', display: 'inline-block' }}>
<img
ref={imgRef}
src={`data:image/png;base64,${capture.screenshot_base64}`}
alt="Capture plein écran"
draggable={false}
onLoad={handleImageLoad}
style={{ display: 'block' }}
/>
{/* Overlay des éléments détectés en mode Debug */}
{debugMode && detectedElements.map((elem) => (
<div
key={elem.id}
className="fullscreen-detection-bbox"
style={{
position: 'absolute',
left: elem.bbox.x1 * imageScale.x,
top: elem.bbox.y1 * imageScale.y,
width: (elem.bbox.x2 - elem.bbox.x1) * imageScale.x,
height: (elem.bbox.y2 - elem.bbox.y1) * imageScale.y,
border: '2px solid #e94560',
background: 'rgba(233, 69, 96, 0.15)',
cursor: enabled ? 'pointer' : 'default',
zIndex: 10,
}}
onClick={(e) => {
e.stopPropagation();
handleElementClick(elem);
}}
title={`ID: ${elem.id} | Confiance: ${(elem.confidence * 100).toFixed(0)}%`}
>
<span style={{
position: 'absolute',
top: -20,
left: 0,
background: '#e94560',
color: 'white',
padding: '2px 6px',
borderRadius: '3px',
fontSize: '12px',
fontWeight: 'bold',
}}>
{elem.id}
</span>
</div>
))}
</div>
{(isSelecting || selection.width > 0) && (
<div
ref={overlayRef}

View File

@@ -0,0 +1,120 @@
/**
* Overlay de détection UI
* Affiche les bboxes détectées par UI-DETR-1 sur un screenshot
*/
import { useState, useEffect } from 'react';
import type { UIElement, DetectionResult } from '../services/uiDetection';
import { detectUIElements } from '../services/uiDetection';
interface DetectionOverlayProps {
imageBase64: string | null;
enabled: boolean;
threshold?: number;
onDetectionComplete?: (result: DetectionResult) => void;
onElementClick?: (element: UIElement) => void;
}
export default function DetectionOverlay({
imageBase64,
enabled,
threshold = 0.35,
onDetectionComplete,
onElementClick,
}: DetectionOverlayProps) {
const [elements, setElements] = useState<UIElement[]>([]);
const [imageSize, setImageSize] = useState<{ width: number; height: number } | null>(null);
const [loading, setLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const [processingTime, setProcessingTime] = useState<number | null>(null);
const [hoveredElement, setHoveredElement] = useState<number | null>(null);
useEffect(() => {
if (!enabled || !imageBase64) {
setElements([]);
setImageSize(null);
return;
}
const runDetection = async () => {
setLoading(true);
setError(null);
try {
const result = await detectUIElements(imageBase64, {
threshold,
annotate: false,
});
setElements(result.elements);
setImageSize(result.image_size);
setProcessingTime(result.processing_time_ms);
if (onDetectionComplete) {
onDetectionComplete(result);
}
} catch (err) {
setError((err as Error).message);
setElements([]);
} finally {
setLoading(false);
}
};
runDetection();
}, [imageBase64, enabled, threshold]);
if (!enabled || !imageBase64) {
return null;
}
return (
<div className="detection-overlay-container">
{/* Image de fond */}
<img
src={imageBase64.startsWith('data:') ? imageBase64 : `data:image/png;base64,${imageBase64}`}
alt="Screenshot"
className="detection-image"
/>
{/* Overlay des bboxes */}
<div className="detection-bboxes">
{elements.map((elem) => (
<div
key={elem.id}
className={`detection-bbox ${hoveredElement === elem.id ? 'hovered' : ''}`}
style={{
left: elem.bbox.x1,
top: elem.bbox.y1,
width: elem.bbox.x2 - elem.bbox.x1,
height: elem.bbox.y2 - elem.bbox.y1,
}}
onMouseEnter={() => setHoveredElement(elem.id)}
onMouseLeave={() => setHoveredElement(null)}
onClick={() => onElementClick?.(elem)}
title={`ID: ${elem.id} | Confiance: ${(elem.confidence * 100).toFixed(0)}%`}
>
<span className="detection-id">{elem.id}</span>
</div>
))}
</div>
{/* Barre d'info */}
<div className="detection-info-bar">
{loading ? (
<span className="detection-loading">🔍 Détection en cours...</span>
) : error ? (
<span className="detection-error"> {error}</span>
) : (
<>
<span className="detection-count">🎯 {elements.length} éléments</span>
{processingTime && (
<span className="detection-time"> {processingTime.toFixed(0)}ms</span>
)}
<span className="detection-model">🧠 UI-DETR-1</span>
</>
)}
</div>
</div>
);
}

View File

@@ -0,0 +1,33 @@
import type { ExecutionMode } from '../types';
import { EXECUTION_MODES } from '../types';
interface ExecutionModeToggleProps {
mode: ExecutionMode;
onChange: (mode: ExecutionMode) => void;
}
export default function ExecutionModeToggle({ mode, onChange }: ExecutionModeToggleProps) {
const modes: ExecutionMode[] = ['basic', 'intelligent', 'debug'];
return (
<div className="execution-mode-toggle">
<span className="mode-label">Mode:</span>
<div className="mode-buttons">
{modes.map((m) => {
const config = EXECUTION_MODES[m];
return (
<button
key={m}
className={`mode-btn ${mode === m ? 'active' : ''} mode-${m}`}
onClick={() => onChange(m)}
title={config.description}
>
<span className="mode-icon">{config.icon}</span>
<span className="mode-text">{config.label}</span>
</button>
);
})}
</div>
</div>
);
}

View File

@@ -0,0 +1,138 @@
/**
* Service de détection UI (UI-DETR-1)
*/
const API_BASE = 'http://localhost:5001';
export interface UIElement {
id: number;
bbox: {
x1: number;
y1: number;
x2: number;
y2: number;
};
center: {
x: number;
y: number;
};
confidence: number;
area: number;
}
export interface DetectionResult {
elements: UIElement[];
count: number;
processing_time_ms: number;
image_size: {
width: number;
height: number;
};
model: string;
annotated_image_base64?: string;
}
export interface DetectionOptions {
threshold?: number;
annotate?: boolean;
showConfidence?: boolean;
}
/**
* Détecte les éléments UI dans une image
*/
export async function detectUIElements(
imageBase64: string,
options: DetectionOptions = {}
): Promise<DetectionResult> {
const response = await fetch(`${API_BASE}/api/ui-detection/detect`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
image_base64: imageBase64,
threshold: options.threshold ?? 0.35,
annotate: options.annotate ?? false,
show_confidence: options.showConfidence ?? false,
}),
});
const data = await response.json();
if (!data.success) {
throw new Error(data.error || 'Erreur de détection');
}
return data.result;
}
/**
* Précharge le modèle UI-DETR-1
*/
export async function preloadModel(): Promise<void> {
const response = await fetch(`${API_BASE}/api/ui-detection/preload`, {
method: 'POST',
});
const data = await response.json();
if (!data.success) {
throw new Error(data.error || 'Erreur de préchargement');
}
}
/**
* Récupère le statut du service de détection
*/
export async function getDetectionStatus(): Promise<{
model_path: string;
model_exists: boolean;
model_loaded: boolean;
model_name: string;
default_threshold: number;
}> {
const response = await fetch(`${API_BASE}/api/ui-detection/status`);
const data = await response.json();
if (!data.success) {
throw new Error(data.error || 'Erreur de statut');
}
return data.status;
}
/**
* Trouve un élément spécifique en utilisant une ancre de référence
*/
export async function findElement(
imageBase64: string,
anchorBase64?: string,
threshold?: number
): Promise<{
found: boolean;
element: UIElement | null;
all_elements: UIElement[];
count: number;
match_score: number;
}> {
const response = await fetch(`${API_BASE}/api/ui-detection/find-element`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify({
image_base64: imageBase64,
anchor_base64: anchorBase64,
threshold: threshold ?? 0.35,
}),
});
const data = await response.json();
if (!data.success) {
throw new Error(data.error || 'Erreur de recherche');
}
return data.result;
}

View File

@@ -646,6 +646,70 @@ body {
pointer-events: none;
}
/* Execution Mode Toggle */
.execution-mode-toggle {
display: flex;
align-items: center;
gap: 0.75rem;
padding: 0.25rem;
background: #0f3460;
border-radius: 8px;
}
.mode-label {
font-size: 0.8rem;
color: #888;
padding-left: 0.5rem;
}
.mode-buttons {
display: flex;
gap: 2px;
}
.mode-btn {
display: flex;
align-items: center;
gap: 0.35rem;
padding: 0.4rem 0.65rem;
background: transparent;
border: none;
color: #888;
border-radius: 6px;
cursor: pointer;
transition: all 0.15s;
font-size: 0.8rem;
}
.mode-btn:hover {
background: rgba(255, 255, 255, 0.1);
color: #ccc;
}
.mode-btn.active {
color: white;
}
.mode-btn.active.mode-basic {
background: #4caf50;
}
.mode-btn.active.mode-intelligent {
background: #e94560;
}
.mode-btn.active.mode-debug {
background: #ff9800;
}
.mode-icon {
font-size: 1rem;
}
.mode-text {
font-weight: 500;
}
/* Execution Controls */
.execution-controls {
display: flex;
@@ -740,3 +804,121 @@ body {
.react-flow__background {
background: #1a1a2e;
}
/* Detection Overlay */
.detection-overlay-container {
position: relative;
width: 100%;
overflow: hidden;
}
.detection-image {
width: 100%;
display: block;
border-radius: 4px;
}
.detection-bboxes {
position: absolute;
top: 0;
left: 0;
width: 100%;
height: 100%;
pointer-events: none;
}
.detection-bbox {
position: absolute;
border: 2px solid #e94560;
background: rgba(233, 69, 96, 0.1);
pointer-events: auto;
cursor: pointer;
transition: all 0.15s;
}
.detection-bbox:hover,
.detection-bbox.hovered {
border-color: #4caf50;
background: rgba(76, 175, 80, 0.2);
z-index: 10;
}
.detection-id {
position: absolute;
top: -18px;
left: -2px;
background: #e94560;
color: white;
font-size: 10px;
font-weight: bold;
padding: 2px 5px;
border-radius: 3px;
min-width: 16px;
text-align: center;
}
.detection-bbox:hover .detection-id,
.detection-bbox.hovered .detection-id {
background: #4caf50;
}
.detection-info-bar {
display: flex;
justify-content: space-between;
align-items: center;
padding: 0.5rem;
background: #0f3460;
border-radius: 0 0 4px 4px;
font-size: 0.75rem;
gap: 0.5rem;
}
.detection-count {
color: #4caf50;
}
.detection-time {
color: #888;
}
.detection-model {
color: #e94560;
}
.detection-loading {
color: #ff9800;
}
.detection-error {
color: #e94560;
}
/* Mode indicator */
.mode-indicator {
position: fixed;
bottom: 1rem;
right: 1rem;
padding: 0.5rem 1rem;
border-radius: 8px;
font-size: 0.85rem;
font-weight: 500;
z-index: 100;
display: flex;
align-items: center;
gap: 0.5rem;
}
.mode-indicator.basic {
background: rgba(76, 175, 80, 0.9);
color: white;
}
.mode-indicator.intelligent {
background: rgba(233, 69, 96, 0.9);
color: white;
}
.mode-indicator.debug {
background: rgba(255, 152, 0, 0.9);
color: white;
}

View File

@@ -1,5 +1,26 @@
// Types pour l'API v3
// Mode d'exécution
export type ExecutionMode = 'basic' | 'intelligent' | 'debug';
export const EXECUTION_MODES: Record<ExecutionMode, { label: string; icon: string; description: string }> = {
basic: {
label: 'Basique',
icon: '⚡',
description: 'Coordonnées fixes, rapide et prévisible'
},
intelligent: {
label: 'Intelligent',
icon: '🧠',
description: 'Vision IA, adaptatif, self-healing'
},
debug: {
label: 'Debug',
icon: '🔍',
description: 'Intelligent + overlay détection'
}
};
export type ActionType =
| 'click_anchor'
| 'double_click_anchor'