feat: chat unifié, GestureCatalog, Copilot, Léa UI, extraction données, vérification replay
Refonte majeure du système Agent Chat et ajout de nombreux modules : - Chat unifié : suppression du dual Workflows/Agent Libre, tout passe par /api/chat avec résolution en 3 niveaux (workflow → geste → "montre-moi") - GestureCatalog : 38 raccourcis clavier universels Windows avec matching sémantique, substitution automatique dans les replays, et endpoint /api/gestures - Mode Copilot : exécution pas-à-pas des workflows avec validation humaine via WebSocket (approve/skip/abort) avant chaque action - Léa UI (agent_v0/lea_ui/) : interface PyQt5 pour Windows avec overlay transparent pour feedback visuel pendant le replay - Data Extraction (core/extraction/) : moteur d'extraction visuelle de données (OCR + VLM → SQLite), avec schémas YAML et export CSV/Excel - ReplayVerifier (agent_v0/server_v1/) : vérification post-action par comparaison de screenshots, avec logique de retry (max 3) - IntentParser durci : meilleur fallback regex, type GREETING, patterns améliorés - Dashboard : nouvelles pages gestures, streaming, extractions - Tests : 63 tests GestureCatalog, 47 tests extraction, corrections tests existants - Dépréciation : /api/agent/plan et /api/agent/execute retournent HTTP 410, suppression du code hardcodé _plan_to_replay_actions Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,4 +1,11 @@
|
||||
"""Screen capture module"""
|
||||
from .screen_capturer import ScreenCapturer
|
||||
|
||||
__all__ = ['ScreenCapturer']
|
||||
try:
|
||||
from .event_listener import EventListener
|
||||
except ImportError:
|
||||
EventListener = None
|
||||
|
||||
from .session_recorder import SessionRecorder
|
||||
|
||||
__all__ = ['ScreenCapturer', 'EventListener', 'SessionRecorder']
|
||||
|
||||
258
core/capture/event_listener.py
Normal file
258
core/capture/event_listener.py
Normal file
@@ -0,0 +1,258 @@
|
||||
"""
|
||||
EventListener - Capture d'événements clavier/souris pour RPA Vision V3
|
||||
|
||||
Couche 0 (RawSession) : capture en temps réel des interactions utilisateur
|
||||
(clics souris, frappes clavier) avec horodatage précis et contexte de fenêtre.
|
||||
|
||||
Génère des objets Event compatibles avec RawSession.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import threading
|
||||
import time
|
||||
from typing import Optional, Callable, List, Dict, Any
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
from pynput import mouse, keyboard
|
||||
PYNPUT_AVAILABLE = True
|
||||
except ImportError:
|
||||
mouse = None # type: ignore
|
||||
keyboard = None # type: ignore
|
||||
PYNPUT_AVAILABLE = False
|
||||
logger.warning("pynput non disponible — EventListener désactivé")
|
||||
|
||||
|
||||
class EventListener:
|
||||
"""
|
||||
Listener d'événements clavier/souris basé sur pynput.
|
||||
|
||||
Capture les interactions utilisateur en temps réel et les transmet
|
||||
via un callback. Compatible avec le format Event de RawSession.
|
||||
|
||||
Example:
|
||||
>>> listener = EventListener()
|
||||
>>> listener.start(callback=on_event)
|
||||
>>> # ... l'utilisateur interagit ...
|
||||
>>> events = listener.stop()
|
||||
"""
|
||||
|
||||
def __init__(self, capture_mouse_move: bool = False):
|
||||
"""
|
||||
Args:
|
||||
capture_mouse_move: Capturer les déplacements souris (volumineux, désactivé par défaut)
|
||||
"""
|
||||
if not PYNPUT_AVAILABLE:
|
||||
raise ImportError(
|
||||
"pynput est requis pour EventListener. "
|
||||
"Installer avec: pip install pynput"
|
||||
)
|
||||
|
||||
self.capture_mouse_move = capture_mouse_move
|
||||
self._running = False
|
||||
self._start_time: Optional[float] = None
|
||||
self._events: List[Dict[str, Any]] = []
|
||||
self._callback: Optional[Callable[[Dict[str, Any]], None]] = None
|
||||
self._lock = threading.Lock()
|
||||
|
||||
self._mouse_listener = None
|
||||
self._keyboard_listener = None
|
||||
|
||||
def start(self, callback: Optional[Callable[[Dict[str, Any]], None]] = None) -> None:
|
||||
"""
|
||||
Démarrer la capture d'événements.
|
||||
|
||||
Args:
|
||||
callback: Fonction appelée pour chaque événement capturé.
|
||||
Reçoit un dict au format Event.to_dict().
|
||||
"""
|
||||
if self._running:
|
||||
logger.warning("EventListener déjà en cours")
|
||||
return
|
||||
|
||||
self._callback = callback
|
||||
self._events = []
|
||||
self._start_time = time.time()
|
||||
self._running = True
|
||||
|
||||
# Démarrer les listeners
|
||||
self._mouse_listener = mouse.Listener(
|
||||
on_click=self._on_click,
|
||||
on_scroll=self._on_scroll,
|
||||
on_move=self._on_move if self.capture_mouse_move else None,
|
||||
)
|
||||
self._keyboard_listener = keyboard.Listener(
|
||||
on_press=self._on_key_press,
|
||||
on_release=self._on_key_release,
|
||||
)
|
||||
|
||||
self._mouse_listener.start()
|
||||
self._keyboard_listener.start()
|
||||
|
||||
logger.info("EventListener démarré")
|
||||
|
||||
def stop(self) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Arrêter la capture et retourner les événements capturés.
|
||||
|
||||
Returns:
|
||||
Liste de dicts au format Event
|
||||
"""
|
||||
self._running = False
|
||||
|
||||
if self._mouse_listener:
|
||||
self._mouse_listener.stop()
|
||||
self._mouse_listener = None
|
||||
if self._keyboard_listener:
|
||||
self._keyboard_listener.stop()
|
||||
self._keyboard_listener = None
|
||||
|
||||
logger.info(f"EventListener arrêté — {len(self._events)} événements capturés")
|
||||
|
||||
with self._lock:
|
||||
return list(self._events)
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
return self._running
|
||||
|
||||
@property
|
||||
def event_count(self) -> int:
|
||||
with self._lock:
|
||||
return len(self._events)
|
||||
|
||||
def _relative_time(self) -> float:
|
||||
"""Temps relatif depuis le début de la capture."""
|
||||
if self._start_time is None:
|
||||
return 0.0
|
||||
return round(time.time() - self._start_time, 3)
|
||||
|
||||
def _get_window_context(self) -> Dict[str, str]:
|
||||
"""Obtenir le contexte de la fenêtre active."""
|
||||
try:
|
||||
import subprocess
|
||||
# Utiliser xdotool sur Linux pour obtenir la fenêtre active
|
||||
result = subprocess.run(
|
||||
["xdotool", "getactivewindow", "getwindowname"],
|
||||
capture_output=True, text=True, timeout=1
|
||||
)
|
||||
title = result.stdout.strip() if result.returncode == 0 else "Unknown"
|
||||
|
||||
result2 = subprocess.run(
|
||||
["xdotool", "getactivewindow", "getwindowpid"],
|
||||
capture_output=True, text=True, timeout=1
|
||||
)
|
||||
pid = result2.stdout.strip() if result2.returncode == 0 else ""
|
||||
|
||||
# Essayer d'obtenir le nom du process
|
||||
app_name = "unknown"
|
||||
if pid:
|
||||
try:
|
||||
result3 = subprocess.run(
|
||||
["ps", "-p", pid, "-o", "comm="],
|
||||
capture_output=True, text=True, timeout=1
|
||||
)
|
||||
app_name = result3.stdout.strip() if result3.returncode == 0 else "unknown"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return {"title": title, "app_name": app_name}
|
||||
except Exception:
|
||||
return {"title": "Unknown", "app_name": "unknown"}
|
||||
|
||||
def _emit_event(self, event: Dict[str, Any]) -> None:
|
||||
"""Enregistrer et émettre un événement."""
|
||||
with self._lock:
|
||||
self._events.append(event)
|
||||
|
||||
if self._callback:
|
||||
try:
|
||||
self._callback(event)
|
||||
except Exception as e:
|
||||
logger.error(f"Erreur callback événement: {e}")
|
||||
|
||||
# === Handlers souris ===
|
||||
|
||||
def _on_click(self, x: int, y: int, button, pressed: bool) -> None:
|
||||
if not self._running or not pressed:
|
||||
return
|
||||
|
||||
event = {
|
||||
"t": self._relative_time(),
|
||||
"type": "mouse_click",
|
||||
"button": button.name,
|
||||
"pos": [x, y],
|
||||
"window": self._get_window_context(),
|
||||
"screenshot_id": None,
|
||||
}
|
||||
self._emit_event(event)
|
||||
|
||||
def _on_scroll(self, x: int, y: int, dx: int, dy: int) -> None:
|
||||
if not self._running:
|
||||
return
|
||||
|
||||
event = {
|
||||
"t": self._relative_time(),
|
||||
"type": "mouse_scroll",
|
||||
"delta": dy * 120,
|
||||
"pos": [x, y],
|
||||
"window": self._get_window_context(),
|
||||
"screenshot_id": None,
|
||||
}
|
||||
self._emit_event(event)
|
||||
|
||||
def _on_move(self, x: int, y: int) -> None:
|
||||
if not self._running:
|
||||
return
|
||||
|
||||
event = {
|
||||
"t": self._relative_time(),
|
||||
"type": "mouse_move",
|
||||
"pos": [x, y],
|
||||
"window": self._get_window_context(),
|
||||
"screenshot_id": None,
|
||||
}
|
||||
self._emit_event(event)
|
||||
|
||||
# === Handlers clavier ===
|
||||
|
||||
def _on_key_press(self, key) -> None:
|
||||
if not self._running:
|
||||
return
|
||||
|
||||
key_name = self._key_to_string(key)
|
||||
|
||||
event = {
|
||||
"t": self._relative_time(),
|
||||
"type": "key_press",
|
||||
"keys": [key_name],
|
||||
"window": self._get_window_context(),
|
||||
"screenshot_id": None,
|
||||
}
|
||||
self._emit_event(event)
|
||||
|
||||
def _on_key_release(self, key) -> None:
|
||||
if not self._running:
|
||||
return
|
||||
|
||||
key_name = self._key_to_string(key)
|
||||
|
||||
event = {
|
||||
"t": self._relative_time(),
|
||||
"type": "key_release",
|
||||
"keys": [key_name],
|
||||
"window": self._get_window_context(),
|
||||
"screenshot_id": None,
|
||||
}
|
||||
self._emit_event(event)
|
||||
|
||||
@staticmethod
|
||||
def _key_to_string(key) -> str:
|
||||
"""Convertir une touche pynput en string lisible."""
|
||||
if hasattr(key, 'char') and key.char:
|
||||
return key.char
|
||||
if hasattr(key, 'name'):
|
||||
return key.name.upper()
|
||||
return str(key)
|
||||
344
core/capture/session_recorder.py
Normal file
344
core/capture/session_recorder.py
Normal file
@@ -0,0 +1,344 @@
|
||||
"""
|
||||
SessionRecorder - Enregistrement de sessions RPA complètes
|
||||
|
||||
Orchestre EventListener + ScreenCapturer pour produire un RawSession :
|
||||
- Capture les événements clavier/souris en continu
|
||||
- Prend un screenshot à chaque clic (ou périodiquement)
|
||||
- Sauvegarde les screenshots sur disque
|
||||
- Produit un RawSession complet avec events + screenshots liés
|
||||
|
||||
Usage:
|
||||
>>> recorder = SessionRecorder(output_dir="data/sessions")
|
||||
>>> recorder.start(workflow_name="login_workflow")
|
||||
>>> # ... l'utilisateur effectue ses actions ...
|
||||
>>> session = recorder.stop()
|
||||
>>> print(f"{len(session.events)} events, {len(session.screenshots)} screenshots")
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import platform
|
||||
import threading
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional, Callable, Dict, Any, List
|
||||
|
||||
from core.models.raw_session import RawSession, Event, Screenshot, RawWindowContext
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class SessionRecorder:
|
||||
"""
|
||||
Enregistreur de sessions RPA complet.
|
||||
|
||||
Combine EventListener (clavier/souris) et ScreenCapturer (screenshots)
|
||||
pour produire une RawSession exploitable par le GraphBuilder.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
output_dir: str = "data/training/sessions",
|
||||
screenshot_on_click: bool = True,
|
||||
screenshot_interval_ms: int = 0,
|
||||
capture_keyboard: bool = True,
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
output_dir: Répertoire de sortie pour les sessions
|
||||
screenshot_on_click: Prendre un screenshot à chaque clic
|
||||
screenshot_interval_ms: Intervalle de capture périodique (0 = désactivé)
|
||||
capture_keyboard: Capturer les frappes clavier
|
||||
"""
|
||||
self.output_dir = Path(output_dir)
|
||||
self.screenshot_on_click = screenshot_on_click
|
||||
self.screenshot_interval_ms = screenshot_interval_ms
|
||||
self.capture_keyboard = capture_keyboard
|
||||
|
||||
self._session: Optional[RawSession] = None
|
||||
self._session_dir: Optional[Path] = None
|
||||
self._screenshots_dir: Optional[Path] = None
|
||||
self._running = False
|
||||
self._screenshot_counter = 0
|
||||
self._lock = threading.Lock()
|
||||
|
||||
# Composants (lazy init)
|
||||
self._event_listener = None
|
||||
self._screen_capturer = None
|
||||
self._periodic_thread: Optional[threading.Thread] = None
|
||||
|
||||
# Callbacks optionnels
|
||||
self._on_event: Optional[Callable[[Dict[str, Any]], None]] = None
|
||||
self._on_screenshot: Optional[Callable[[str], None]] = None
|
||||
|
||||
def start(
|
||||
self,
|
||||
workflow_name: str = "",
|
||||
session_id: Optional[str] = None,
|
||||
on_event: Optional[Callable[[Dict[str, Any]], None]] = None,
|
||||
on_screenshot: Optional[Callable[[str], None]] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Démarrer l'enregistrement d'une session.
|
||||
|
||||
Args:
|
||||
workflow_name: Nom du workflow pour le contexte
|
||||
session_id: ID de session (généré si None)
|
||||
on_event: Callback appelé pour chaque événement
|
||||
on_screenshot: Callback appelé pour chaque screenshot
|
||||
|
||||
Returns:
|
||||
session_id de la session démarrée
|
||||
"""
|
||||
if self._running:
|
||||
logger.warning("SessionRecorder déjà en cours")
|
||||
return self._session.session_id if self._session else ""
|
||||
|
||||
# Générer ID de session
|
||||
if session_id is None:
|
||||
session_id = f"session_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
|
||||
|
||||
# Créer répertoires
|
||||
self._session_dir = self.output_dir / session_id
|
||||
self._screenshots_dir = self._session_dir / session_id / "screenshots"
|
||||
self._screenshots_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Initialiser la session
|
||||
self._session = RawSession(
|
||||
session_id=session_id,
|
||||
agent_version="rpa_vision_v3",
|
||||
environment=self._get_environment(),
|
||||
user={"id": os.getenv("USER", "unknown")},
|
||||
context={"workflow": workflow_name, "tags": []},
|
||||
started_at=datetime.now(),
|
||||
)
|
||||
|
||||
self._screenshot_counter = 0
|
||||
self._on_event = on_event
|
||||
self._on_screenshot = on_screenshot
|
||||
self._running = True
|
||||
|
||||
# Démarrer le listener d'événements
|
||||
self._start_event_listener()
|
||||
|
||||
# Démarrer la capture périodique si configurée
|
||||
if self.screenshot_interval_ms > 0:
|
||||
self._start_periodic_capture()
|
||||
|
||||
logger.info(
|
||||
f"SessionRecorder démarré: {session_id} "
|
||||
f"(screenshots_dir={self._screenshots_dir})"
|
||||
)
|
||||
return session_id
|
||||
|
||||
def stop(self) -> RawSession:
|
||||
"""
|
||||
Arrêter l'enregistrement et retourner la session complète.
|
||||
|
||||
Returns:
|
||||
RawSession avec tous les événements et screenshots
|
||||
"""
|
||||
if not self._running:
|
||||
logger.warning("SessionRecorder non démarré")
|
||||
return self._session
|
||||
|
||||
self._running = False
|
||||
|
||||
# Arrêter la capture périodique
|
||||
if self._periodic_thread and self._periodic_thread.is_alive():
|
||||
self._periodic_thread.join(timeout=2)
|
||||
|
||||
# Arrêter le listener d'événements
|
||||
if self._event_listener:
|
||||
self._event_listener.stop()
|
||||
|
||||
# Finaliser la session
|
||||
self._session.ended_at = datetime.now()
|
||||
|
||||
# Sauvegarder la session JSON
|
||||
session_path = self._session_dir / f"{self._session.session_id}.json"
|
||||
self._session.save_to_file(session_path)
|
||||
|
||||
logger.info(
|
||||
f"SessionRecorder arrêté: {self._session.session_id} "
|
||||
f"({len(self._session.events)} events, "
|
||||
f"{len(self._session.screenshots)} screenshots) "
|
||||
f"→ {session_path}"
|
||||
)
|
||||
|
||||
return self._session
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
return self._running
|
||||
|
||||
@property
|
||||
def event_count(self) -> int:
|
||||
return len(self._session.events) if self._session else 0
|
||||
|
||||
@property
|
||||
def screenshot_count(self) -> int:
|
||||
return len(self._session.screenshots) if self._session else 0
|
||||
|
||||
# =========================================================================
|
||||
# Capture d'événements
|
||||
# =========================================================================
|
||||
|
||||
def _start_event_listener(self) -> None:
|
||||
"""Démarrer le listener d'événements."""
|
||||
try:
|
||||
from core.capture.event_listener import EventListener
|
||||
|
||||
self._event_listener = EventListener(capture_mouse_move=False)
|
||||
self._event_listener.start(callback=self._on_raw_event)
|
||||
logger.info("EventListener démarré")
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"EventListener non disponible (pynput manquant). "
|
||||
"Seuls les screenshots périodiques seront capturés."
|
||||
)
|
||||
|
||||
def _on_raw_event(self, raw_event: Dict[str, Any]) -> None:
|
||||
"""Callback appelé par EventListener pour chaque événement."""
|
||||
if not self._running or not self._session:
|
||||
return
|
||||
|
||||
# Convertir en Event
|
||||
event = Event(
|
||||
t=raw_event.get("t", 0.0),
|
||||
type=raw_event.get("type", "unknown"),
|
||||
window=RawWindowContext(
|
||||
title=raw_event.get("window", {}).get("title", "Unknown"),
|
||||
app_name=raw_event.get("window", {}).get("app_name", "unknown"),
|
||||
),
|
||||
screenshot_id=None,
|
||||
data={
|
||||
k: v
|
||||
for k, v in raw_event.items()
|
||||
if k not in ("t", "type", "window", "screenshot_id")
|
||||
},
|
||||
)
|
||||
|
||||
# Screenshot sur clic
|
||||
if self.screenshot_on_click and event.type == "mouse_click":
|
||||
screenshot_id = self._take_screenshot()
|
||||
if screenshot_id:
|
||||
event.screenshot_id = screenshot_id
|
||||
|
||||
with self._lock:
|
||||
self._session.add_event(event)
|
||||
|
||||
# Callback utilisateur
|
||||
if self._on_event:
|
||||
try:
|
||||
self._on_event(raw_event)
|
||||
except Exception as e:
|
||||
logger.warning(f"Erreur callback on_event: {e}")
|
||||
|
||||
# =========================================================================
|
||||
# Capture de screenshots
|
||||
# =========================================================================
|
||||
|
||||
def _take_screenshot(self) -> Optional[str]:
|
||||
"""Prendre un screenshot et le sauvegarder."""
|
||||
if not self._running or not self._session:
|
||||
return None
|
||||
|
||||
try:
|
||||
self._ensure_screen_capturer()
|
||||
if self._screen_capturer is None:
|
||||
return None
|
||||
|
||||
frame = self._screen_capturer.capture_frame()
|
||||
if frame is None:
|
||||
return None
|
||||
|
||||
# Sauvegarder
|
||||
self._screenshot_counter += 1
|
||||
screenshot_id = f"ss_{self._screenshot_counter:04d}"
|
||||
filename = f"screen_{self._screenshot_counter:04d}.png"
|
||||
filepath = self._screenshots_dir / filename
|
||||
|
||||
self._screen_capturer.save_frame(frame, str(filepath))
|
||||
|
||||
# Enregistrer dans la session
|
||||
screenshot = Screenshot(
|
||||
screenshot_id=screenshot_id,
|
||||
relative_path=f"screenshots/{filename}",
|
||||
captured_at=datetime.now().isoformat(),
|
||||
)
|
||||
|
||||
with self._lock:
|
||||
self._session.add_screenshot(screenshot)
|
||||
|
||||
# Callback utilisateur
|
||||
if self._on_screenshot:
|
||||
try:
|
||||
self._on_screenshot(str(filepath))
|
||||
except Exception as e:
|
||||
logger.warning(f"Erreur callback on_screenshot: {e}")
|
||||
|
||||
return screenshot_id
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Erreur capture screenshot: {e}")
|
||||
return None
|
||||
|
||||
def _ensure_screen_capturer(self) -> None:
|
||||
"""Initialiser le ScreenCapturer (lazy)."""
|
||||
if self._screen_capturer is not None:
|
||||
return
|
||||
|
||||
try:
|
||||
from core.capture.screen_capturer import ScreenCapturer
|
||||
|
||||
self._screen_capturer = ScreenCapturer(
|
||||
buffer_size=5,
|
||||
detect_changes=False,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"ScreenCapturer non disponible: {e}")
|
||||
|
||||
def _start_periodic_capture(self) -> None:
|
||||
"""Démarrer la capture périodique en thread."""
|
||||
interval_s = self.screenshot_interval_ms / 1000.0
|
||||
|
||||
def _periodic_loop():
|
||||
while self._running:
|
||||
self._take_screenshot()
|
||||
time.sleep(interval_s)
|
||||
|
||||
self._periodic_thread = threading.Thread(
|
||||
target=_periodic_loop, daemon=True, name="periodic_capture"
|
||||
)
|
||||
self._periodic_thread.start()
|
||||
logger.info(
|
||||
f"Capture périodique démarrée (intervalle={self.screenshot_interval_ms}ms)"
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
# Helpers
|
||||
# =========================================================================
|
||||
|
||||
def _get_environment(self) -> Dict[str, Any]:
|
||||
"""Collecter les informations d'environnement."""
|
||||
env = {
|
||||
"os": platform.system().lower(),
|
||||
"os_version": platform.version(),
|
||||
"hostname": platform.node(),
|
||||
"screen": {},
|
||||
}
|
||||
|
||||
# Résolution d'écran
|
||||
try:
|
||||
self._ensure_screen_capturer()
|
||||
if self._screen_capturer:
|
||||
w, h = self._screen_capturer.get_screen_resolution()
|
||||
env["screen"] = {
|
||||
"primary_resolution": [w, h],
|
||||
}
|
||||
except Exception:
|
||||
env["screen"] = {"primary_resolution": [1920, 1080]}
|
||||
|
||||
return env
|
||||
@@ -69,9 +69,10 @@ class DetectionConfig:
|
||||
"""Configuration de la détection UI hybride"""
|
||||
# VLM
|
||||
# Modèles recommandés:
|
||||
# - "qwen2.5vl:7b" (plus rapide, meilleur avec format='json', recommandé)
|
||||
# - "qwen2.5vl:3b" (léger, tient en GPU 12GB avec split partiel)
|
||||
# - "qwen2.5vl:7b" (meilleur mais 13GB mémoire, CPU-only sur RTX 5070)
|
||||
# - "qwen3-vl:8b" (plus gros, supporté mais plus d'erreurs JSON)
|
||||
vlm_model: str = "qwen2.5vl:7b"
|
||||
vlm_model: str = "qwen2.5vl:3b"
|
||||
vlm_endpoint: str = "http://localhost:11434"
|
||||
use_vlm_classification: bool = True # Utiliser VLM pour classifier
|
||||
|
||||
|
||||
@@ -451,6 +451,9 @@ class FAISSManager:
|
||||
|
||||
return results
|
||||
|
||||
# Alias pour compatibilité (WorkflowPipeline, NodeMatcher)
|
||||
search = search_similar
|
||||
|
||||
def remove_embedding(self, faiss_id: int) -> bool:
|
||||
"""
|
||||
Supprimer un embedding de l'index
|
||||
|
||||
@@ -212,8 +212,8 @@ class StateEmbeddingBuilder:
|
||||
|
||||
# Concaténer tous les textes détectés
|
||||
texts = []
|
||||
if hasattr(screen_state.perception, 'detected_texts'):
|
||||
texts = screen_state.perception.detected_texts
|
||||
if hasattr(screen_state.perception, 'detected_text'):
|
||||
texts = screen_state.perception.detected_text
|
||||
|
||||
combined_text = " ".join(texts) if texts else ""
|
||||
|
||||
|
||||
@@ -664,12 +664,12 @@ class WorkflowSimulator:
|
||||
try:
|
||||
if check.kind == "text_present":
|
||||
# Vérifier présence de texte
|
||||
detected_texts = getattr(screen_state.perception_level, 'detected_texts', []) if hasattr(screen_state, 'perception_level') else []
|
||||
detected_texts = getattr(screen_state.perception, 'detected_text', []) if hasattr(screen_state, 'perception') else []
|
||||
return any(check.value in text for text in detected_texts)
|
||||
|
||||
elif check.kind == "text_absent":
|
||||
# Vérifier absence de texte
|
||||
detected_texts = getattr(screen_state.perception_level, 'detected_texts', []) if hasattr(screen_state, 'perception_level') else []
|
||||
detected_texts = getattr(screen_state.perception, 'detected_text', []) if hasattr(screen_state, 'perception') else []
|
||||
return not any(check.value in text for text in detected_texts)
|
||||
|
||||
elif check.kind == "element_present":
|
||||
@@ -681,7 +681,7 @@ class WorkflowSimulator:
|
||||
|
||||
elif check.kind == "window_title_contains":
|
||||
# Vérifier titre de fenêtre
|
||||
window_title = getattr(screen_state.raw_level, 'window_title', '') if hasattr(screen_state, 'raw_level') else ''
|
||||
window_title = getattr(screen_state.window, 'window_title', '') if hasattr(screen_state, 'window') else ''
|
||||
return check.value in window_title
|
||||
|
||||
else:
|
||||
|
||||
@@ -509,13 +509,13 @@ class ErrorHandler:
|
||||
'workflow_edge': edge,
|
||||
'action': action,
|
||||
'details': {
|
||||
'target_role': action.target.role if hasattr(action.target, 'role') else None,
|
||||
'target_text': action.target.text_pattern if hasattr(action.target, 'text_pattern') else None
|
||||
'target_role': action.target.by_role if hasattr(action.target, 'by_role') else None,
|
||||
'target_text': action.target.by_text if hasattr(action.target, 'by_text') else None
|
||||
},
|
||||
'original_data': {
|
||||
'target': {
|
||||
'role': action.target.role if hasattr(action.target, 'role') else None,
|
||||
'text_pattern': action.target.text_pattern if hasattr(action.target, 'text_pattern') else None,
|
||||
'by_role': action.target.by_role if hasattr(action.target, 'by_role') else None,
|
||||
'by_text': action.target.by_text if hasattr(action.target, 'by_text') else None,
|
||||
'bbox': getattr(action.target, 'bbox', None)
|
||||
}
|
||||
}
|
||||
|
||||
29
core/extraction/__init__.py
Normal file
29
core/extraction/__init__.py
Normal file
@@ -0,0 +1,29 @@
|
||||
"""
|
||||
Module d'extraction de donnees structurees depuis des captures d'ecran.
|
||||
|
||||
Ce module orchestre le cycle complet :
|
||||
schema YAML -> navigation -> screenshot -> VLM/OCR -> validation -> SQLite -> CSV/Excel
|
||||
|
||||
Classes principales :
|
||||
- ExtractionSchema : definition des champs et regles de navigation
|
||||
- ExtractionField : definition d'un champ individuel
|
||||
- FieldExtractor : extraction via VLM (Ollama) ou OCR (docTR)
|
||||
- DataStore : stockage SQLite + export CSV/Excel
|
||||
- IterationController : controle de la boucle de navigation
|
||||
- ExtractionEngine : orchestrateur principal
|
||||
"""
|
||||
|
||||
from .schema import ExtractionField, ExtractionSchema
|
||||
from .field_extractor import FieldExtractor
|
||||
from .data_store import DataStore
|
||||
from .iteration_controller import IterationController
|
||||
from .extraction_engine import ExtractionEngine
|
||||
|
||||
__all__ = [
|
||||
"ExtractionField",
|
||||
"ExtractionSchema",
|
||||
"FieldExtractor",
|
||||
"DataStore",
|
||||
"IterationController",
|
||||
"ExtractionEngine",
|
||||
]
|
||||
420
core/extraction/data_store.py
Normal file
420
core/extraction/data_store.py
Normal file
@@ -0,0 +1,420 @@
|
||||
"""
|
||||
DataStore - Stockage SQLite des donnees extraites + export CSV/Excel
|
||||
|
||||
Chaque session d'extraction (ExtractionSchema applique a un ecran) cree
|
||||
une entree dans la table `extractions`. Les enregistrements individuels
|
||||
sont stockes dans la table `records` avec leurs donnees JSON, le chemin
|
||||
du screenshot source et un score de confiance.
|
||||
"""
|
||||
|
||||
import csv
|
||||
import json
|
||||
import logging
|
||||
import sqlite3
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from io import StringIO
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from .schema import ExtractionSchema
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DataStore:
|
||||
"""Stockage des donnees extraites dans SQLite avec export CSV/Excel."""
|
||||
|
||||
def __init__(self, db_path: str = "data/extractions/store.db"):
|
||||
self.db_path = Path(db_path)
|
||||
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self._init_db()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Initialisation
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _init_db(self) -> None:
|
||||
"""Creer les tables si necessaire."""
|
||||
with self._connect() as conn:
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS extractions (
|
||||
id TEXT PRIMARY KEY,
|
||||
schema_name TEXT NOT NULL,
|
||||
schema_json TEXT NOT NULL,
|
||||
created_at TEXT NOT NULL,
|
||||
updated_at TEXT NOT NULL,
|
||||
status TEXT NOT NULL DEFAULT 'in_progress',
|
||||
record_count INTEGER NOT NULL DEFAULT 0
|
||||
)
|
||||
""")
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS records (
|
||||
id TEXT PRIMARY KEY,
|
||||
extraction_id TEXT NOT NULL,
|
||||
data_json TEXT NOT NULL,
|
||||
screenshot_path TEXT,
|
||||
confidence REAL NOT NULL DEFAULT 0.0,
|
||||
errors_json TEXT,
|
||||
created_at TEXT NOT NULL,
|
||||
FOREIGN KEY (extraction_id) REFERENCES extractions(id)
|
||||
)
|
||||
""")
|
||||
conn.execute("""
|
||||
CREATE INDEX IF NOT EXISTS idx_records_extraction
|
||||
ON records(extraction_id)
|
||||
""")
|
||||
|
||||
def _connect(self) -> sqlite3.Connection:
|
||||
"""Ouvrir une connexion SQLite."""
|
||||
conn = sqlite3.connect(str(self.db_path))
|
||||
conn.row_factory = sqlite3.Row
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
return conn
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Extractions (sessions)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def create_extraction(self, schema: ExtractionSchema) -> str:
|
||||
"""
|
||||
Creer une nouvelle session d'extraction.
|
||||
|
||||
Args:
|
||||
schema: Schema d'extraction
|
||||
|
||||
Returns:
|
||||
extraction_id (UUID)
|
||||
"""
|
||||
extraction_id = str(uuid.uuid4())
|
||||
now = datetime.utcnow().isoformat()
|
||||
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO extractions (id, schema_name, schema_json, created_at, updated_at, status)
|
||||
VALUES (?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
extraction_id,
|
||||
schema.name,
|
||||
json.dumps(schema.to_dict(), ensure_ascii=False),
|
||||
now,
|
||||
now,
|
||||
"in_progress",
|
||||
),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Extraction creee : %s (schema=%s)", extraction_id[:8], schema.name
|
||||
)
|
||||
return extraction_id
|
||||
|
||||
def finish_extraction(self, extraction_id: str, status: str = "completed") -> None:
|
||||
"""Marquer une extraction comme terminee."""
|
||||
now = datetime.utcnow().isoformat()
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
"UPDATE extractions SET status = ?, updated_at = ? WHERE id = ?",
|
||||
(status, now, extraction_id),
|
||||
)
|
||||
|
||||
def get_extraction(self, extraction_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Recuperer les metadonnees d'une extraction."""
|
||||
with self._connect() as conn:
|
||||
row = conn.execute(
|
||||
"SELECT * FROM extractions WHERE id = ?", (extraction_id,)
|
||||
).fetchone()
|
||||
if row:
|
||||
return dict(row)
|
||||
return None
|
||||
|
||||
def list_extractions(self, limit: int = 50) -> List[Dict[str, Any]]:
|
||||
"""Lister les extractions recentes."""
|
||||
with self._connect() as conn:
|
||||
rows = conn.execute(
|
||||
"SELECT * FROM extractions ORDER BY created_at DESC LIMIT ?",
|
||||
(limit,),
|
||||
).fetchall()
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Records (enregistrements)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def add_record(
|
||||
self,
|
||||
extraction_id: str,
|
||||
data: Dict[str, Any],
|
||||
screenshot_path: Optional[str] = None,
|
||||
confidence: float = 0.0,
|
||||
errors: Optional[List[str]] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Ajouter un enregistrement extrait.
|
||||
|
||||
Args:
|
||||
extraction_id: ID de la session d'extraction
|
||||
data: Donnees extraites (dict)
|
||||
screenshot_path: Chemin du screenshot source
|
||||
confidence: Score de confiance [0, 1]
|
||||
errors: Liste d'erreurs de validation
|
||||
|
||||
Returns:
|
||||
record_id (UUID)
|
||||
"""
|
||||
record_id = str(uuid.uuid4())
|
||||
now = datetime.utcnow().isoformat()
|
||||
|
||||
with self._connect() as conn:
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO records (id, extraction_id, data_json, screenshot_path,
|
||||
confidence, errors_json, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
record_id,
|
||||
extraction_id,
|
||||
json.dumps(data, ensure_ascii=False),
|
||||
screenshot_path,
|
||||
confidence,
|
||||
json.dumps(errors or [], ensure_ascii=False),
|
||||
now,
|
||||
),
|
||||
)
|
||||
# Mettre a jour le compteur
|
||||
conn.execute(
|
||||
"""
|
||||
UPDATE extractions
|
||||
SET record_count = record_count + 1, updated_at = ?
|
||||
WHERE id = ?
|
||||
""",
|
||||
(now, extraction_id),
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
"Record ajoute : %s (extraction=%s, confiance=%.2f)",
|
||||
record_id[:8],
|
||||
extraction_id[:8],
|
||||
confidence,
|
||||
)
|
||||
return record_id
|
||||
|
||||
def get_records(self, extraction_id: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Recuperer tous les enregistrements d'une extraction.
|
||||
|
||||
Returns:
|
||||
Liste de dicts avec les cles : id, data, screenshot_path,
|
||||
confidence, errors, created_at
|
||||
"""
|
||||
with self._connect() as conn:
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT id, data_json, screenshot_path, confidence,
|
||||
errors_json, created_at
|
||||
FROM records
|
||||
WHERE extraction_id = ?
|
||||
ORDER BY created_at ASC
|
||||
""",
|
||||
(extraction_id,),
|
||||
).fetchall()
|
||||
|
||||
results = []
|
||||
for row in rows:
|
||||
results.append({
|
||||
"id": row["id"],
|
||||
"data": json.loads(row["data_json"]),
|
||||
"screenshot_path": row["screenshot_path"],
|
||||
"confidence": row["confidence"],
|
||||
"errors": json.loads(row["errors_json"]) if row["errors_json"] else [],
|
||||
"created_at": row["created_at"],
|
||||
})
|
||||
return results
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Export
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def export_csv(self, extraction_id: str, output_path: str) -> str:
|
||||
"""
|
||||
Exporter les enregistrements en CSV.
|
||||
|
||||
Args:
|
||||
extraction_id: ID de la session
|
||||
output_path: Chemin du fichier CSV de sortie
|
||||
|
||||
Returns:
|
||||
Chemin du fichier cree
|
||||
"""
|
||||
records = self.get_records(extraction_id)
|
||||
if not records:
|
||||
raise ValueError(f"Aucun enregistrement pour l'extraction {extraction_id}")
|
||||
|
||||
out = Path(output_path)
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Determiner les colonnes depuis le premier record
|
||||
all_keys = self._collect_all_keys(records)
|
||||
|
||||
with open(out, "w", newline="", encoding="utf-8-sig") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=all_keys, extrasaction="ignore")
|
||||
writer.writeheader()
|
||||
for rec in records:
|
||||
writer.writerow(rec["data"])
|
||||
|
||||
logger.info("Export CSV : %s (%d lignes)", output_path, len(records))
|
||||
return str(out)
|
||||
|
||||
def export_excel(self, extraction_id: str, output_path: str) -> str:
|
||||
"""
|
||||
Exporter les enregistrements en Excel (openpyxl).
|
||||
|
||||
Args:
|
||||
extraction_id: ID de la session
|
||||
output_path: Chemin du fichier Excel de sortie
|
||||
|
||||
Returns:
|
||||
Chemin du fichier cree
|
||||
|
||||
Raises:
|
||||
ImportError: Si openpyxl n'est pas installe
|
||||
"""
|
||||
try:
|
||||
import openpyxl
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"openpyxl est requis pour l'export Excel. "
|
||||
"Installez-le : pip install openpyxl"
|
||||
)
|
||||
|
||||
records = self.get_records(extraction_id)
|
||||
if not records:
|
||||
raise ValueError(f"Aucun enregistrement pour l'extraction {extraction_id}")
|
||||
|
||||
out = Path(output_path)
|
||||
out.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
all_keys = self._collect_all_keys(records)
|
||||
|
||||
wb = openpyxl.Workbook()
|
||||
ws = wb.active
|
||||
ws.title = "Extraction"
|
||||
|
||||
# En-tetes
|
||||
for col_idx, key in enumerate(all_keys, start=1):
|
||||
cell = ws.cell(row=1, column=col_idx, value=key)
|
||||
cell.font = openpyxl.styles.Font(bold=True)
|
||||
|
||||
# Donnees
|
||||
for row_idx, rec in enumerate(records, start=2):
|
||||
for col_idx, key in enumerate(all_keys, start=1):
|
||||
ws.cell(row=row_idx, column=col_idx, value=rec["data"].get(key, ""))
|
||||
|
||||
# Ajuster la largeur des colonnes
|
||||
for col_idx, key in enumerate(all_keys, start=1):
|
||||
max_len = max(
|
||||
len(str(key)),
|
||||
*(len(str(rec["data"].get(key, ""))) for rec in records),
|
||||
)
|
||||
ws.column_dimensions[openpyxl.utils.get_column_letter(col_idx)].width = min(max_len + 2, 50)
|
||||
|
||||
wb.save(str(out))
|
||||
logger.info("Export Excel : %s (%d lignes)", output_path, len(records))
|
||||
return str(out)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Statistiques
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_stats(self, extraction_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Statistiques d'une extraction.
|
||||
|
||||
Returns:
|
||||
Dict avec : record_count, avg_confidence, completeness,
|
||||
field_coverage, status, duration
|
||||
"""
|
||||
extraction = self.get_extraction(extraction_id)
|
||||
if not extraction:
|
||||
return {"error": f"Extraction {extraction_id} introuvable"}
|
||||
|
||||
records = self.get_records(extraction_id)
|
||||
|
||||
if not records:
|
||||
return {
|
||||
"extraction_id": extraction_id,
|
||||
"schema_name": extraction["schema_name"],
|
||||
"status": extraction["status"],
|
||||
"record_count": 0,
|
||||
"avg_confidence": 0.0,
|
||||
"completeness": 0.0,
|
||||
"field_coverage": {},
|
||||
}
|
||||
|
||||
# Confiance moyenne
|
||||
confidences = [r["confidence"] for r in records]
|
||||
avg_confidence = sum(confidences) / len(confidences) if confidences else 0.0
|
||||
|
||||
# Couverture par champ : pourcentage de records ayant une valeur non-nulle
|
||||
schema_data = json.loads(extraction["schema_json"])
|
||||
field_names = [f["name"] for f in schema_data.get("fields", [])]
|
||||
|
||||
field_coverage = {}
|
||||
for fname in field_names:
|
||||
filled = sum(
|
||||
1 for r in records
|
||||
if r["data"].get(fname) is not None
|
||||
and str(r["data"][fname]).strip() != ""
|
||||
)
|
||||
field_coverage[fname] = filled / len(records) if records else 0.0
|
||||
|
||||
# Completude globale
|
||||
completeness = (
|
||||
sum(field_coverage.values()) / len(field_coverage)
|
||||
if field_coverage else 0.0
|
||||
)
|
||||
|
||||
# Erreurs
|
||||
total_errors = sum(len(r.get("errors", [])) for r in records)
|
||||
|
||||
return {
|
||||
"extraction_id": extraction_id,
|
||||
"schema_name": extraction["schema_name"],
|
||||
"status": extraction["status"],
|
||||
"record_count": len(records),
|
||||
"avg_confidence": round(avg_confidence, 3),
|
||||
"completeness": round(completeness, 3),
|
||||
"field_coverage": {k: round(v, 3) for k, v in field_coverage.items()},
|
||||
"total_errors": total_errors,
|
||||
"created_at": extraction["created_at"],
|
||||
"updated_at": extraction["updated_at"],
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Nettoyage
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def delete_extraction(self, extraction_id: str) -> bool:
|
||||
"""Supprimer une extraction et tous ses records."""
|
||||
with self._connect() as conn:
|
||||
conn.execute("DELETE FROM records WHERE extraction_id = ?", (extraction_id,))
|
||||
result = conn.execute("DELETE FROM extractions WHERE id = ?", (extraction_id,))
|
||||
return result.rowcount > 0
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Utilitaires internes
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _collect_all_keys(records: List[Dict[str, Any]]) -> List[str]:
|
||||
"""Collecter toutes les cles uniques des records, en preservant l'ordre."""
|
||||
seen = set()
|
||||
keys = []
|
||||
for rec in records:
|
||||
for k in rec["data"].keys():
|
||||
if k not in seen:
|
||||
seen.add(k)
|
||||
keys.append(k)
|
||||
return keys
|
||||
312
core/extraction/extraction_engine.py
Normal file
312
core/extraction/extraction_engine.py
Normal file
@@ -0,0 +1,312 @@
|
||||
"""
|
||||
ExtractionEngine - Orchestrateur principal du moteur d'extraction de donnees
|
||||
|
||||
Orchestre le cycle complet :
|
||||
naviguer -> screenshot -> extraire -> valider -> stocker -> suivant
|
||||
|
||||
S'appuie sur FieldExtractor (VLM/OCR), DataStore (SQLite), et
|
||||
IterationController (navigation) pour realiser l'extraction automatisee
|
||||
de donnees depuis des interfaces utilisateur.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
|
||||
from .data_store import DataStore
|
||||
from .field_extractor import FieldExtractor
|
||||
from .iteration_controller import IterationController
|
||||
from .schema import ExtractionSchema
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ExtractionEngine:
|
||||
"""
|
||||
Moteur d'extraction principal.
|
||||
|
||||
Orchestre le cycle : naviguer -> screenshot -> extraire -> stocker -> suivant.
|
||||
|
||||
Modes d'utilisation :
|
||||
1. Automatique : start_extraction() — boucle complete avec navigation
|
||||
2. Manuel : extract_current_screen() — extraction ponctuelle d'un screenshot
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
schema: ExtractionSchema,
|
||||
store: Optional[DataStore] = None,
|
||||
field_extractor: Optional[FieldExtractor] = None,
|
||||
streaming_server_url: str = "http://localhost:5005",
|
||||
screenshot_dir: str = "data/extractions/screenshots",
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
schema: Schema d'extraction decrivant les champs et la navigation
|
||||
store: DataStore pour le stockage (cree un par defaut si absent)
|
||||
field_extractor: Extracteur de champs (cree un par defaut si absent)
|
||||
streaming_server_url: URL du streaming server Agent V1
|
||||
screenshot_dir: Repertoire pour sauvegarder les screenshots
|
||||
"""
|
||||
self.schema = schema
|
||||
self.store = store or DataStore()
|
||||
self.field_extractor = field_extractor or FieldExtractor()
|
||||
self.controller = IterationController(schema, streaming_server_url)
|
||||
self.streaming_server_url = streaming_server_url.rstrip("/")
|
||||
self.screenshot_dir = Path(screenshot_dir)
|
||||
self.screenshot_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Etat interne
|
||||
self._current_extraction_id: Optional[str] = None
|
||||
self._is_running = False
|
||||
self._should_stop = False
|
||||
self._progress_callback: Optional[Callable] = None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# API publique - Extraction automatique
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def start_extraction(
|
||||
self,
|
||||
session_id: str,
|
||||
on_progress: Optional[Callable[[Dict[str, Any]], None]] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Demarrer une session d'extraction automatique.
|
||||
|
||||
Boucle :
|
||||
1. Creer l'extraction dans le store
|
||||
2. Pour chaque enregistrement :
|
||||
a. Prendre un screenshot
|
||||
b. Extraire les champs
|
||||
c. Valider
|
||||
d. Stocker
|
||||
e. Naviguer au suivant
|
||||
3. Finaliser et retourner l'extraction_id
|
||||
|
||||
Args:
|
||||
session_id: ID de la session de streaming (pour navigation)
|
||||
on_progress: Callback appele a chaque record (optionnel)
|
||||
|
||||
Returns:
|
||||
extraction_id
|
||||
"""
|
||||
self._is_running = True
|
||||
self._should_stop = False
|
||||
self._progress_callback = on_progress
|
||||
|
||||
# Creer la session d'extraction
|
||||
extraction_id = self.store.create_extraction(self.schema)
|
||||
self._current_extraction_id = extraction_id
|
||||
|
||||
logger.info(
|
||||
"Demarrage extraction %s (schema=%s, max=%d)",
|
||||
extraction_id[:8],
|
||||
self.schema.name,
|
||||
self.controller.max_records,
|
||||
)
|
||||
|
||||
try:
|
||||
while self.controller.has_next() and not self._should_stop:
|
||||
idx = self.controller.current_index
|
||||
|
||||
# 1. Screenshot
|
||||
screenshot_path = self._take_screenshot(session_id, idx)
|
||||
if screenshot_path is None:
|
||||
logger.warning("Screenshot echoue a l'index %d, on continue", idx)
|
||||
# Naviguer quand meme pour ne pas rester bloque
|
||||
self.controller.navigate_to_next(session_id)
|
||||
continue
|
||||
|
||||
# 2. Extraction
|
||||
result = self.extract_current_screen(screenshot_path)
|
||||
|
||||
# 3. Stockage
|
||||
self.store.add_record(
|
||||
extraction_id=extraction_id,
|
||||
data=result["data"],
|
||||
screenshot_path=screenshot_path,
|
||||
confidence=result["confidence"],
|
||||
errors=result.get("errors"),
|
||||
)
|
||||
|
||||
# 4. Callback de progression
|
||||
if self._progress_callback:
|
||||
progress = self.get_progress()
|
||||
progress["last_record"] = result["data"]
|
||||
progress["last_confidence"] = result["confidence"]
|
||||
self._progress_callback(progress)
|
||||
|
||||
logger.info(
|
||||
"Record %d/%d extrait (confiance=%.2f)",
|
||||
idx + 1,
|
||||
self.controller.max_records,
|
||||
result["confidence"],
|
||||
)
|
||||
|
||||
# 5. Navigation
|
||||
if not self.controller.navigate_to_next(session_id):
|
||||
logger.info("Fin de navigation a l'index %d", idx)
|
||||
break
|
||||
|
||||
# Finaliser
|
||||
status = "stopped" if self._should_stop else "completed"
|
||||
self.store.finish_extraction(extraction_id, status=status)
|
||||
|
||||
logger.info(
|
||||
"Extraction %s terminee : %s (%d records)",
|
||||
extraction_id[:8],
|
||||
status,
|
||||
self.controller.current_index,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Erreur pendant l'extraction : %s", e)
|
||||
self.store.finish_extraction(extraction_id, status="error")
|
||||
raise
|
||||
|
||||
finally:
|
||||
self._is_running = False
|
||||
self._current_extraction_id = None
|
||||
|
||||
return extraction_id
|
||||
|
||||
def stop_extraction(self) -> None:
|
||||
"""Demander l'arret de l'extraction en cours."""
|
||||
if self._is_running:
|
||||
logger.info("Arret demande pour l'extraction en cours")
|
||||
self._should_stop = True
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# API publique - Extraction ponctuelle
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def extract_current_screen(self, screenshot_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Extraire les champs du screenshot actuel sans navigation.
|
||||
|
||||
Args:
|
||||
screenshot_path: Chemin vers le screenshot
|
||||
|
||||
Returns:
|
||||
Dict avec 'data', 'confidence', 'errors', 'validation'
|
||||
"""
|
||||
# Extraction
|
||||
result = self.field_extractor.extract_fields(screenshot_path, self.schema)
|
||||
|
||||
# Validation contre le schema
|
||||
validation = self.schema.validate_record(result["data"])
|
||||
result["validation"] = validation
|
||||
|
||||
return result
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# API publique - Progression
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def get_progress(self) -> Dict[str, Any]:
|
||||
"""Retourne la progression actuelle de l'extraction."""
|
||||
nav_progress = self.controller.progress
|
||||
stats = {}
|
||||
|
||||
if self._current_extraction_id:
|
||||
stats = self.store.get_stats(self._current_extraction_id)
|
||||
|
||||
return {
|
||||
"extraction_id": self._current_extraction_id,
|
||||
"is_running": self._is_running,
|
||||
"navigation": nav_progress,
|
||||
"stats": stats,
|
||||
"schema_name": self.schema.name,
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Screenshot
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _take_screenshot(self, session_id: str, index: int) -> Optional[str]:
|
||||
"""
|
||||
Prendre un screenshot via le streaming server.
|
||||
|
||||
Essaie d'appeler l'API du streaming server pour obtenir
|
||||
le screenshot courant. En cas d'echec, retourne None.
|
||||
|
||||
Args:
|
||||
session_id: ID de la session de streaming
|
||||
index: Index de l'enregistrement courant
|
||||
|
||||
Returns:
|
||||
Chemin du screenshot sauvegarde, ou None
|
||||
"""
|
||||
try:
|
||||
response = requests.get(
|
||||
f"{self.streaming_server_url}/api/screenshot",
|
||||
params={"session_id": session_id},
|
||||
timeout=10,
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
# Sauvegarder le screenshot
|
||||
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
|
||||
filename = f"record_{index:04d}_{timestamp}.png"
|
||||
filepath = self.screenshot_dir / filename
|
||||
|
||||
with open(filepath, "wb") as f:
|
||||
f.write(response.content)
|
||||
|
||||
return str(filepath)
|
||||
else:
|
||||
logger.warning(
|
||||
"Screenshot echoue : HTTP %d", response.status_code
|
||||
)
|
||||
return None
|
||||
|
||||
except requests.exceptions.ConnectionError:
|
||||
logger.warning(
|
||||
"Streaming server non accessible pour screenshot"
|
||||
)
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Erreur screenshot : %s", e)
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Utilitaires
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def extract_from_file(self, screenshot_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Raccourci pour extraire depuis un fichier existant
|
||||
et stocker le resultat.
|
||||
|
||||
Utile pour du retraitement offline de screenshots.
|
||||
|
||||
Args:
|
||||
screenshot_path: Chemin vers un screenshot existant
|
||||
|
||||
Returns:
|
||||
Dict avec les donnees extraites et le record_id
|
||||
"""
|
||||
if self._current_extraction_id is None:
|
||||
extraction_id = self.store.create_extraction(self.schema)
|
||||
else:
|
||||
extraction_id = self._current_extraction_id
|
||||
|
||||
result = self.extract_current_screen(screenshot_path)
|
||||
|
||||
record_id = self.store.add_record(
|
||||
extraction_id=extraction_id,
|
||||
data=result["data"],
|
||||
screenshot_path=screenshot_path,
|
||||
confidence=result["confidence"],
|
||||
errors=result.get("errors"),
|
||||
)
|
||||
|
||||
result["record_id"] = record_id
|
||||
result["extraction_id"] = extraction_id
|
||||
return result
|
||||
327
core/extraction/field_extractor.py
Normal file
327
core/extraction/field_extractor.py
Normal file
@@ -0,0 +1,327 @@
|
||||
"""
|
||||
FieldExtractor - Extraction de champs structures depuis des screenshots
|
||||
|
||||
Utilise un VLM (Ollama) pour comprendre le contenu visuel et en extraire
|
||||
des donnees structurees selon un schema predefini.
|
||||
Fallback OCR via docTR si le VLM echoue.
|
||||
"""
|
||||
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import requests
|
||||
|
||||
from .schema import ExtractionField, ExtractionSchema
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Configuration Ollama (coherente avec le reste du projet)
|
||||
OLLAMA_DEFAULT_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||
OLLAMA_DEFAULT_MODEL = os.environ.get("VLM_MODEL", "qwen3-vl:8b")
|
||||
|
||||
|
||||
class FieldExtractor:
|
||||
"""
|
||||
Extrait des champs structures depuis un screenshot.
|
||||
|
||||
Pipeline :
|
||||
1. VLM : envoyer screenshot + schema au VLM pour extraction structuree
|
||||
2. Validation : verifier les regex, types, champs requis
|
||||
3. (Optionnel) OCR fallback si VLM indisponible
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ollama_url: str = OLLAMA_DEFAULT_URL,
|
||||
ollama_model: str = OLLAMA_DEFAULT_MODEL,
|
||||
timeout: int = 60,
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
ollama_url: URL du serveur Ollama
|
||||
ollama_model: Modele VLM a utiliser
|
||||
timeout: Timeout en secondes pour les appels VLM
|
||||
"""
|
||||
self.ollama_url = ollama_url.rstrip("/")
|
||||
self.ollama_model = ollama_model
|
||||
self.timeout = timeout
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# API publique
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def extract_fields(
|
||||
self,
|
||||
screenshot_path: str,
|
||||
schema: ExtractionSchema,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Extraire les champs definis par le schema depuis un screenshot.
|
||||
|
||||
Args:
|
||||
screenshot_path: Chemin vers l'image (PNG/JPEG)
|
||||
schema: Schema d'extraction
|
||||
|
||||
Returns:
|
||||
Dict avec les champs extraits + metadonnees
|
||||
{
|
||||
"data": {"nom": "DUPONT", "prenom": "Jean", ...},
|
||||
"confidence": 0.85,
|
||||
"errors": [],
|
||||
"raw_response": "..."
|
||||
}
|
||||
"""
|
||||
path = Path(screenshot_path)
|
||||
if not path.exists():
|
||||
return {
|
||||
"data": {},
|
||||
"confidence": 0.0,
|
||||
"errors": [f"Fichier introuvable : {screenshot_path}"],
|
||||
"raw_response": None,
|
||||
}
|
||||
|
||||
# Encoder l'image en base64
|
||||
image_b64 = self._encode_image(path)
|
||||
|
||||
# Extraction via VLM
|
||||
raw_data, raw_response = self._extract_via_vlm(image_b64, schema.fields)
|
||||
|
||||
if raw_data is None:
|
||||
logger.warning("VLM extraction echouee, tentative OCR fallback")
|
||||
raw_data = self._extract_via_ocr_fallback(path, schema.fields)
|
||||
raw_response = "(ocr fallback)"
|
||||
|
||||
# Validation et nettoyage
|
||||
validated = {}
|
||||
errors: List[str] = []
|
||||
valid_count = 0
|
||||
|
||||
for fld in schema.fields:
|
||||
value = raw_data.get(fld.name) if raw_data else None
|
||||
# Nettoyer
|
||||
if value is not None:
|
||||
value = str(value).strip()
|
||||
if value == "" or value.lower() in ("null", "none", "n/a"):
|
||||
value = None
|
||||
|
||||
validated[fld.name] = value
|
||||
|
||||
if not fld.validate_value(value):
|
||||
errors.append(
|
||||
f"Champ '{fld.name}' invalide ou manquant : {value!r}"
|
||||
)
|
||||
else:
|
||||
if value is not None and str(value).strip():
|
||||
valid_count += 1
|
||||
|
||||
total = len(schema.fields) if schema.fields else 1
|
||||
confidence = valid_count / total
|
||||
|
||||
return {
|
||||
"data": validated,
|
||||
"confidence": confidence,
|
||||
"errors": errors,
|
||||
"raw_response": raw_response,
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Extraction VLM
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _extract_via_vlm(
|
||||
self, image_b64: str, fields: List[ExtractionField]
|
||||
) -> tuple:
|
||||
"""
|
||||
Appeler le VLM (Ollama) pour extraction structuree.
|
||||
|
||||
Returns:
|
||||
(dict_donnees | None, raw_response_text | None)
|
||||
"""
|
||||
prompt = self._build_extraction_prompt(fields)
|
||||
|
||||
try:
|
||||
# Desactiver le mode thinking pour Qwen3
|
||||
effective_prompt = prompt
|
||||
if "qwen" in self.ollama_model.lower():
|
||||
effective_prompt = f"/nothink {prompt}"
|
||||
|
||||
payload = {
|
||||
"model": self.ollama_model,
|
||||
"prompt": effective_prompt,
|
||||
"images": [image_b64],
|
||||
"stream": False,
|
||||
"format": "json",
|
||||
"options": {
|
||||
"temperature": 0.1,
|
||||
"num_predict": 2000,
|
||||
},
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
f"{self.ollama_url}/api/generate",
|
||||
json=payload,
|
||||
timeout=self.timeout,
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
logger.error(
|
||||
"Erreur Ollama %d : %s",
|
||||
response.status_code,
|
||||
response.text[:300],
|
||||
)
|
||||
return None, None
|
||||
|
||||
result = response.json()
|
||||
raw_text = result.get("response", "").strip()
|
||||
logger.debug("Reponse VLM brute : %s", raw_text[:500])
|
||||
|
||||
parsed = self._parse_vlm_response(raw_text)
|
||||
return parsed, raw_text
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
logger.error("Timeout VLM apres %ds", self.timeout)
|
||||
return None, None
|
||||
|
||||
except requests.exceptions.ConnectionError:
|
||||
logger.error("Ollama non accessible a %s", self.ollama_url)
|
||||
return None, None
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Erreur VLM inattendue : %s", e)
|
||||
return None, None
|
||||
|
||||
def _build_extraction_prompt(self, fields: List[ExtractionField]) -> str:
|
||||
"""Construire le prompt d'extraction structure pour le VLM."""
|
||||
field_descriptions = []
|
||||
for f in fields:
|
||||
desc = f"- {f.name} ({f.field_type}): {f.description}"
|
||||
if f.required:
|
||||
desc += " [OBLIGATOIRE]"
|
||||
if f.validation_regex:
|
||||
desc += f" (format: {f.validation_regex})"
|
||||
field_descriptions.append(desc)
|
||||
|
||||
fields_text = "\n".join(field_descriptions)
|
||||
|
||||
return f"""Regarde cette capture d'ecran et extrais les informations suivantes.
|
||||
|
||||
CHAMPS A EXTRAIRE :
|
||||
{fields_text}
|
||||
|
||||
INSTRUCTIONS :
|
||||
1. Extrais chaque champ tel qu'il apparait a l'ecran
|
||||
2. Si un champ n'est pas visible, mets null
|
||||
3. Pour les dates, conserve le format tel qu'affiche
|
||||
4. Pour les nombres, conserve le format avec virgule si present
|
||||
5. Reponds UNIQUEMENT en JSON valide
|
||||
|
||||
FORMAT DE REPONSE :
|
||||
Un objet JSON avec les cles correspondant aux noms de champs ci-dessus.
|
||||
Exemple : {{"nom": "DUPONT", "prenom": "Jean", "date_naissance": "15/03/1965"}}
|
||||
|
||||
Extrais maintenant les donnees :"""
|
||||
|
||||
def _parse_vlm_response(self, text: str) -> Optional[Dict[str, Any]]:
|
||||
"""Parser la reponse JSON du VLM."""
|
||||
if not text:
|
||||
return None
|
||||
|
||||
# Essayer le parse direct
|
||||
try:
|
||||
return json.loads(text)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Chercher un objet JSON dans la reponse
|
||||
match = re.search(r"\{[\s\S]*\}", text)
|
||||
if match:
|
||||
try:
|
||||
return json.loads(match.group())
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Chercher entre balises ```json ... ```
|
||||
match = re.search(r"```(?:json)?\s*(\{[\s\S]*?\})\s*```", text)
|
||||
if match:
|
||||
try:
|
||||
return json.loads(match.group(1))
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
logger.warning("Impossible de parser la reponse VLM en JSON")
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# OCR Fallback
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _extract_via_ocr_fallback(
|
||||
self, image_path: Path, fields: List[ExtractionField]
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Fallback : extraire du texte brut via OCR (docTR) puis tenter
|
||||
un mapping basique vers les champs.
|
||||
|
||||
Ce fallback est tres basique ; il fournit le texte brut
|
||||
sans mapping intelligent. Le VLM reste la methode privilegiee.
|
||||
"""
|
||||
try:
|
||||
from PIL import Image as PILImage
|
||||
|
||||
img = PILImage.open(str(image_path))
|
||||
|
||||
# Tenter docTR
|
||||
try:
|
||||
from doctr.io import DocumentFile
|
||||
from doctr.models import ocr_predictor
|
||||
|
||||
predictor = ocr_predictor(det_arch="db_mobilenet_v3_large", reco_arch="crnn_mobilenet_v3_large", pretrained=True)
|
||||
doc = DocumentFile.from_images([str(image_path)])
|
||||
result = predictor(doc)
|
||||
|
||||
# Extraire tout le texte
|
||||
all_text = []
|
||||
for page in result.pages:
|
||||
for block in page.blocks:
|
||||
for line in block.lines:
|
||||
line_text = " ".join(w.value for w in line.words)
|
||||
all_text.append(line_text)
|
||||
|
||||
full_text = "\n".join(all_text)
|
||||
logger.info("OCR fallback : %d lignes extraites", len(all_text))
|
||||
|
||||
# Retourner le texte complet dans un champ special
|
||||
return {"_ocr_text": full_text}
|
||||
|
||||
except ImportError:
|
||||
logger.warning("docTR non disponible pour le fallback OCR")
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Erreur OCR fallback : %s", e)
|
||||
return None
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Utilitaires
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
@staticmethod
|
||||
def _encode_image(path: Path) -> str:
|
||||
"""Encoder une image en base64."""
|
||||
with open(path, "rb") as f:
|
||||
return base64.b64encode(f.read()).decode("utf-8")
|
||||
|
||||
def check_vlm_available(self) -> bool:
|
||||
"""Verifier si le VLM Ollama est accessible."""
|
||||
try:
|
||||
response = requests.get(
|
||||
f"{self.ollama_url}/api/tags", timeout=5
|
||||
)
|
||||
return response.status_code == 200
|
||||
except (requests.RequestException, ConnectionError, TimeoutError):
|
||||
return False
|
||||
258
core/extraction/iteration_controller.py
Normal file
258
core/extraction/iteration_controller.py
Normal file
@@ -0,0 +1,258 @@
|
||||
"""
|
||||
IterationController - Controle de navigation entre enregistrements
|
||||
|
||||
Gere la boucle de navigation : passage au record suivant, pagination,
|
||||
scroll, etc. Communique avec le streaming server (Agent V1) pour
|
||||
envoyer les actions de navigation sur la machine cible.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import requests
|
||||
|
||||
from .schema import ExtractionSchema
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class IterationController:
|
||||
"""
|
||||
Controle la navigation entre les enregistrements a extraire.
|
||||
|
||||
Types de navigation supportes :
|
||||
- list_detail : cliquer sur chaque element d'une liste
|
||||
- pagination : bouton suivant / page suivante
|
||||
- scroll : defilement vertical
|
||||
- manual : l'utilisateur navigue manuellement
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
schema: ExtractionSchema,
|
||||
streaming_server_url: str = "http://localhost:5005",
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
schema: Schema d'extraction (contient les regles de navigation)
|
||||
streaming_server_url: URL du streaming server Agent V1
|
||||
"""
|
||||
self.schema = schema
|
||||
self.server_url = streaming_server_url.rstrip("/")
|
||||
self.current_index = 0
|
||||
self.max_records = schema.navigation.get("max_records", 100)
|
||||
self.nav_type = schema.navigation.get("type", "manual")
|
||||
self.nav_action = schema.navigation.get("next_record", "click_next_in_list")
|
||||
self.nav_delay = schema.navigation.get("delay_ms", 1000)
|
||||
|
||||
# Etat interne
|
||||
self._started = False
|
||||
self._finished = False
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# API publique
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def has_next(self) -> bool:
|
||||
"""Retourne True s'il reste des enregistrements a traiter."""
|
||||
if self._finished:
|
||||
return False
|
||||
return self.current_index < self.max_records
|
||||
|
||||
def navigate_to_next(self, session_id: str) -> bool:
|
||||
"""
|
||||
Naviguer vers l'enregistrement suivant.
|
||||
|
||||
Envoie les actions de navigation au streaming server
|
||||
en fonction du type de navigation defini dans le schema.
|
||||
|
||||
Args:
|
||||
session_id: ID de la session de streaming
|
||||
|
||||
Returns:
|
||||
True si la navigation a reussi
|
||||
"""
|
||||
if not self.has_next():
|
||||
logger.info("Plus d'enregistrements a traiter (index=%d)", self.current_index)
|
||||
return False
|
||||
|
||||
success = False
|
||||
|
||||
if self.nav_type == "manual":
|
||||
# Mode manuel : on attend juste un delai
|
||||
logger.info(
|
||||
"Navigation manuelle : attente de %dms (index=%d)",
|
||||
self.nav_delay,
|
||||
self.current_index,
|
||||
)
|
||||
time.sleep(self.nav_delay / 1000)
|
||||
success = True
|
||||
|
||||
elif self.nav_type == "pagination":
|
||||
success = self._navigate_pagination(session_id)
|
||||
|
||||
elif self.nav_type == "list_detail":
|
||||
success = self._navigate_list_detail(session_id)
|
||||
|
||||
elif self.nav_type == "scroll":
|
||||
success = self._navigate_scroll(session_id)
|
||||
|
||||
else:
|
||||
logger.warning("Type de navigation inconnu : %s", self.nav_type)
|
||||
success = False
|
||||
|
||||
if success:
|
||||
self.current_index += 1
|
||||
logger.debug(
|
||||
"Navigation reussie -> index=%d/%d",
|
||||
self.current_index,
|
||||
self.max_records,
|
||||
)
|
||||
|
||||
return success
|
||||
|
||||
def navigate_to_record(self, session_id: str, index: int) -> bool:
|
||||
"""
|
||||
Naviguer vers un enregistrement specifique.
|
||||
|
||||
Args:
|
||||
session_id: ID de la session de streaming
|
||||
index: Index de l'enregistrement cible
|
||||
|
||||
Returns:
|
||||
True si la navigation a reussi
|
||||
"""
|
||||
if index < 0 or index >= self.max_records:
|
||||
logger.error("Index hors limites : %d (max=%d)", index, self.max_records)
|
||||
return False
|
||||
|
||||
# Naviguer pas a pas jusqu'a l'index cible
|
||||
steps = index - self.current_index
|
||||
if steps < 0:
|
||||
logger.warning(
|
||||
"Navigation arriere non supportee (current=%d, target=%d)",
|
||||
self.current_index,
|
||||
index,
|
||||
)
|
||||
return False
|
||||
|
||||
for _ in range(steps):
|
||||
if not self.navigate_to_next(session_id):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def reset(self) -> None:
|
||||
"""Reinitialiser le controleur."""
|
||||
self.current_index = 0
|
||||
self._started = False
|
||||
self._finished = False
|
||||
|
||||
def mark_finished(self) -> None:
|
||||
"""Marquer l'iteration comme terminee (ex: fin de liste detectee)."""
|
||||
self._finished = True
|
||||
logger.info("Iteration marquee comme terminee a l'index %d", self.current_index)
|
||||
|
||||
@property
|
||||
def progress(self) -> Dict[str, Any]:
|
||||
"""Retourne la progression actuelle."""
|
||||
return {
|
||||
"current_index": self.current_index,
|
||||
"max_records": self.max_records,
|
||||
"progress_pct": round(
|
||||
(self.current_index / self.max_records * 100)
|
||||
if self.max_records > 0 else 0,
|
||||
1,
|
||||
),
|
||||
"nav_type": self.nav_type,
|
||||
"finished": self._finished,
|
||||
}
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Navigation specifique
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _navigate_pagination(self, session_id: str) -> bool:
|
||||
"""Navigation par pagination (bouton suivant)."""
|
||||
action = {
|
||||
"type": "click",
|
||||
"target": self.nav_action,
|
||||
"description": "Cliquer sur le bouton suivant / page suivante",
|
||||
}
|
||||
return self._send_action(session_id, action)
|
||||
|
||||
def _navigate_list_detail(self, session_id: str) -> bool:
|
||||
"""Navigation dans une liste (cliquer sur l'element suivant)."""
|
||||
action = {
|
||||
"type": "click",
|
||||
"target": self.nav_action,
|
||||
"index": self.current_index,
|
||||
"description": f"Cliquer sur l'element {self.current_index + 1} de la liste",
|
||||
}
|
||||
return self._send_action(session_id, action)
|
||||
|
||||
def _navigate_scroll(self, session_id: str) -> bool:
|
||||
"""Navigation par defilement."""
|
||||
action = {
|
||||
"type": "scroll",
|
||||
"direction": "down",
|
||||
"amount": self.schema.navigation.get("scroll_amount", 300),
|
||||
"description": "Defiler vers le bas",
|
||||
}
|
||||
return self._send_action(session_id, action)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Communication avec le streaming server
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _send_action(self, session_id: str, action: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Envoyer une action de navigation au streaming server.
|
||||
|
||||
L'action est envoyee via l'API du streaming server (port 5005).
|
||||
Si le serveur n'est pas disponible, on simule un delai.
|
||||
|
||||
Args:
|
||||
session_id: ID de la session de streaming
|
||||
action: Description de l'action a executer
|
||||
|
||||
Returns:
|
||||
True si l'action a ete executee ou simulee
|
||||
"""
|
||||
try:
|
||||
payload = {
|
||||
"session_id": session_id,
|
||||
"action": action,
|
||||
}
|
||||
|
||||
response = requests.post(
|
||||
f"{self.server_url}/api/action",
|
||||
json=payload,
|
||||
timeout=10,
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
# Attendre le delai de navigation
|
||||
if self.nav_delay > 0:
|
||||
time.sleep(self.nav_delay / 1000)
|
||||
return True
|
||||
else:
|
||||
logger.warning(
|
||||
"Action de navigation echouee : HTTP %d", response.status_code
|
||||
)
|
||||
return False
|
||||
|
||||
except requests.exceptions.ConnectionError:
|
||||
logger.warning(
|
||||
"Streaming server non accessible a %s — simulation du delai",
|
||||
self.server_url,
|
||||
)
|
||||
# Simuler l'attente de navigation (mode degrade)
|
||||
if self.nav_delay > 0:
|
||||
time.sleep(self.nav_delay / 1000)
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Erreur envoi action de navigation : %s", e)
|
||||
return False
|
||||
217
core/extraction/schema.py
Normal file
217
core/extraction/schema.py
Normal file
@@ -0,0 +1,217 @@
|
||||
"""
|
||||
Schema d'extraction de donnees - Definition des champs et navigation
|
||||
|
||||
Permet de definir un schema YAML decrivant les champs a extraire
|
||||
depuis des captures d'ecran (DPI, formulaires, listes...).
|
||||
"""
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExtractionField:
|
||||
"""Definition d'un champ a extraire depuis un screenshot."""
|
||||
|
||||
name: str # Ex: "nom_patient", "date_naissance"
|
||||
description: str # Description pour le VLM
|
||||
field_type: str = "text" # "text", "date", "number", "boolean"
|
||||
required: bool = True
|
||||
validation_regex: Optional[str] = None # Regex de validation optionnelle
|
||||
|
||||
def validate_value(self, value: Optional[str]) -> bool:
|
||||
"""
|
||||
Valider une valeur extraite pour ce champ.
|
||||
|
||||
Returns:
|
||||
True si la valeur est valide
|
||||
"""
|
||||
# Champ requis mais absent
|
||||
if self.required and (value is None or str(value).strip() == ""):
|
||||
return False
|
||||
|
||||
# Pas de valeur et pas requis => OK
|
||||
if value is None or str(value).strip() == "":
|
||||
return True
|
||||
|
||||
value_str = str(value).strip()
|
||||
|
||||
# Validation par type
|
||||
if self.field_type == "number":
|
||||
try:
|
||||
float(value_str.replace(",", ".").replace(" ", ""))
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
elif self.field_type == "boolean":
|
||||
if value_str.lower() not in (
|
||||
"true", "false", "oui", "non", "1", "0", "vrai", "faux"
|
||||
):
|
||||
return False
|
||||
|
||||
elif self.field_type == "date":
|
||||
# Accepter les formats courants FR
|
||||
date_patterns = [
|
||||
r"\d{2}/\d{2}/\d{4}", # JJ/MM/AAAA
|
||||
r"\d{2}-\d{2}-\d{4}", # JJ-MM-AAAA
|
||||
r"\d{4}-\d{2}-\d{2}", # AAAA-MM-JJ (ISO)
|
||||
r"\d{2}\.\d{2}\.\d{4}", # JJ.MM.AAAA
|
||||
]
|
||||
if not any(re.fullmatch(p, value_str) for p in date_patterns):
|
||||
return False
|
||||
|
||||
# Validation regex custom
|
||||
if self.validation_regex:
|
||||
if not re.fullmatch(self.validation_regex, value_str):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExtractionSchema:
|
||||
"""
|
||||
Schema complet d'extraction : liste de champs + regles de navigation.
|
||||
|
||||
Peut etre charge/sauvegarde en YAML pour reutilisation.
|
||||
"""
|
||||
|
||||
name: str # Ex: "dossier_patient_DPI"
|
||||
description: str
|
||||
fields: List[ExtractionField] = field(default_factory=list)
|
||||
navigation: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# --- Serialisation YAML ---
|
||||
|
||||
@classmethod
|
||||
def from_yaml(cls, path: str) -> "ExtractionSchema":
|
||||
"""
|
||||
Charger un schema depuis un fichier YAML.
|
||||
|
||||
Args:
|
||||
path: Chemin vers le fichier YAML
|
||||
|
||||
Returns:
|
||||
Instance ExtractionSchema
|
||||
"""
|
||||
yaml_path = Path(path)
|
||||
if not yaml_path.exists():
|
||||
raise FileNotFoundError(f"Schema YAML non trouve : {path}")
|
||||
|
||||
with open(yaml_path, "r", encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f)
|
||||
|
||||
if not isinstance(data, dict):
|
||||
raise ValueError(f"Le fichier YAML doit contenir un dictionnaire, pas {type(data).__name__}")
|
||||
|
||||
return cls._from_dict(data)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "ExtractionSchema":
|
||||
"""Construire un schema depuis un dictionnaire Python."""
|
||||
return cls._from_dict(data)
|
||||
|
||||
@classmethod
|
||||
def _from_dict(cls, data: Dict[str, Any]) -> "ExtractionSchema":
|
||||
"""Construction interne depuis un dict."""
|
||||
fields_raw = data.get("fields", [])
|
||||
fields = []
|
||||
for fd in fields_raw:
|
||||
fields.append(ExtractionField(
|
||||
name=fd["name"],
|
||||
description=fd.get("description", ""),
|
||||
field_type=fd.get("type", fd.get("field_type", "text")),
|
||||
required=fd.get("required", True),
|
||||
validation_regex=fd.get("validation", fd.get("validation_regex")),
|
||||
))
|
||||
|
||||
return cls(
|
||||
name=data.get("name", "unnamed"),
|
||||
description=data.get("description", ""),
|
||||
fields=fields,
|
||||
navigation=data.get("navigation", {}),
|
||||
)
|
||||
|
||||
def to_yaml(self, path: str) -> None:
|
||||
"""
|
||||
Sauvegarder le schema en fichier YAML.
|
||||
|
||||
Args:
|
||||
path: Chemin de sortie
|
||||
"""
|
||||
yaml_path = Path(path)
|
||||
yaml_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
data = self.to_dict()
|
||||
|
||||
with open(yaml_path, "w", encoding="utf-8") as f:
|
||||
yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convertir en dictionnaire serialisable."""
|
||||
return {
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"fields": [
|
||||
{
|
||||
"name": f.name,
|
||||
"description": f.description,
|
||||
"type": f.field_type,
|
||||
"required": f.required,
|
||||
**({"validation": f.validation_regex} if f.validation_regex else {}),
|
||||
}
|
||||
for f in self.fields
|
||||
],
|
||||
"navigation": self.navigation,
|
||||
}
|
||||
|
||||
# --- Utilitaires ---
|
||||
|
||||
@property
|
||||
def required_fields(self) -> List[ExtractionField]:
|
||||
"""Retourne la liste des champs obligatoires."""
|
||||
return [f for f in self.fields if f.required]
|
||||
|
||||
@property
|
||||
def field_names(self) -> List[str]:
|
||||
"""Retourne la liste des noms de champs."""
|
||||
return [f.name for f in self.fields]
|
||||
|
||||
def get_field(self, name: str) -> Optional[ExtractionField]:
|
||||
"""Recuperer un champ par son nom."""
|
||||
for f in self.fields:
|
||||
if f.name == name:
|
||||
return f
|
||||
return None
|
||||
|
||||
def validate_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Valider un enregistrement complet contre le schema.
|
||||
|
||||
Returns:
|
||||
Dict avec 'valid' (bool), 'errors' (list), 'completeness' (float)
|
||||
"""
|
||||
errors = []
|
||||
valid_count = 0
|
||||
|
||||
for fld in self.fields:
|
||||
value = record.get(fld.name)
|
||||
if fld.validate_value(value):
|
||||
if value is not None and str(value).strip():
|
||||
valid_count += 1
|
||||
else:
|
||||
errors.append(f"Champ '{fld.name}' invalide: {value!r}")
|
||||
|
||||
total = len(self.fields) if self.fields else 1
|
||||
completeness = valid_count / total
|
||||
|
||||
return {
|
||||
"valid": len(errors) == 0,
|
||||
"errors": errors,
|
||||
"completeness": completeness,
|
||||
}
|
||||
@@ -24,8 +24,9 @@ Example:
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict, Optional, Tuple
|
||||
from collections import defaultdict
|
||||
import os
|
||||
from typing import List, Dict, Optional, Tuple, Any
|
||||
from collections import defaultdict, Counter
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
@@ -106,6 +107,7 @@ class GraphBuilder:
|
||||
self.clustering_eps = clustering_eps
|
||||
self.clustering_min_samples = clustering_min_samples
|
||||
self.enable_quality_validation = enable_quality_validation
|
||||
self._screen_analyzer = None # ScreenAnalyzer (lazy import)
|
||||
|
||||
logger.info(
|
||||
f"GraphBuilder initialized: "
|
||||
@@ -119,39 +121,47 @@ class GraphBuilder:
|
||||
self,
|
||||
session: RawSession,
|
||||
workflow_name: Optional[str] = None,
|
||||
precomputed_states: Optional[List["ScreenState"]] = None,
|
||||
) -> Workflow:
|
||||
"""
|
||||
Construire un Workflow complet depuis une RawSession.
|
||||
|
||||
|
||||
Processus:
|
||||
1. Créer ScreenStates depuis screenshots
|
||||
1. Créer ScreenStates depuis screenshots (ou utiliser precomputed_states)
|
||||
2. Calculer embeddings pour chaque état
|
||||
3. Détecter patterns via clustering
|
||||
4. Construire nodes depuis clusters
|
||||
5. Construire edges depuis transitions
|
||||
|
||||
|
||||
Args:
|
||||
session: Session brute à analyser
|
||||
workflow_name: Nom du workflow (généré si None)
|
||||
|
||||
precomputed_states: ScreenStates déjà analysés (streaming).
|
||||
Si fourni, saute l'étape 1 (pas de re-analyse via ScreenAnalyzer).
|
||||
|
||||
Returns:
|
||||
Workflow construit avec nodes et edges
|
||||
|
||||
|
||||
Raises:
|
||||
ValueError: Si la session est vide ou invalide
|
||||
"""
|
||||
if not session.screenshots:
|
||||
raise ValueError("Session has no screenshots")
|
||||
|
||||
if not precomputed_states and not session.screenshots:
|
||||
raise ValueError("Session has no screenshots and no precomputed states")
|
||||
|
||||
logger.info(
|
||||
f"Building workflow from session {session.session_id} "
|
||||
f"with {len(session.screenshots)} screenshots"
|
||||
f"with {len(precomputed_states or session.screenshots)} "
|
||||
f"{'precomputed states' if precomputed_states else 'screenshots'}"
|
||||
)
|
||||
|
||||
# Étape 1: Créer ScreenStates
|
||||
screen_states = self._create_screen_states(session)
|
||||
logger.debug(f"Created {len(screen_states)} screen states")
|
||||
|
||||
|
||||
# Étape 1: Créer ScreenStates (ou réutiliser ceux pré-calculés)
|
||||
if precomputed_states:
|
||||
screen_states = precomputed_states
|
||||
logger.debug(f"Using {len(screen_states)} precomputed screen states")
|
||||
else:
|
||||
screen_states = self._create_screen_states(session)
|
||||
logger.debug(f"Created {len(screen_states)} screen states")
|
||||
|
||||
# Étape 2: Calculer embeddings
|
||||
embeddings = self._compute_embeddings(screen_states)
|
||||
logger.debug(f"Computed {len(embeddings)} embeddings")
|
||||
@@ -315,16 +325,31 @@ class GraphBuilder:
|
||||
file_size_bytes=screenshot_path.stat().st_size if screenshot_path.exists() else 0
|
||||
)
|
||||
|
||||
# Créer PerceptionLevel (sera enrichi par embedding_builder)
|
||||
# Créer PerceptionLevel — enrichir avec OCR si le screenshot existe
|
||||
detected_text = []
|
||||
text_method = "none"
|
||||
|
||||
if screenshot_path.exists():
|
||||
try:
|
||||
if self._screen_analyzer is None:
|
||||
from core.pipeline.screen_analyzer import ScreenAnalyzer
|
||||
self._screen_analyzer = ScreenAnalyzer(session_id=session.session_id)
|
||||
extracted = self._screen_analyzer._extract_text(str(screenshot_path))
|
||||
if extracted:
|
||||
detected_text = extracted
|
||||
text_method = self._screen_analyzer._get_ocr_method_name()
|
||||
except Exception as e:
|
||||
logger.debug(f"OCR échoué pour {screenshot_path}: {e}")
|
||||
|
||||
perception = PerceptionLevel(
|
||||
embedding=EmbeddingRef(
|
||||
provider="openclip_ViT-B-32",
|
||||
vector_id=f"data/embeddings/screens/{session.session_id}_state_{i:04d}.npy",
|
||||
dimensions=512
|
||||
),
|
||||
detected_text=[], # Sera rempli par VLM/OCR
|
||||
text_detection_method="pending",
|
||||
confidence_avg=0.0
|
||||
detected_text=detected_text,
|
||||
text_detection_method=text_method,
|
||||
confidence_avg=0.85 if detected_text else 0.0
|
||||
)
|
||||
|
||||
# Créer ContextLevel
|
||||
@@ -504,8 +529,12 @@ class GraphBuilder:
|
||||
node = WorkflowNode(
|
||||
node_id=f"node_{cluster_id:03d}",
|
||||
name=f"State Pattern {cluster_id}",
|
||||
screen_template=template,
|
||||
observation_count=len(indices),
|
||||
description=f"Pattern auto-détecté ({len(indices)} observations)",
|
||||
template=template,
|
||||
metadata={
|
||||
"observation_count": len(indices),
|
||||
"_prototype_vector": prototype.tolist(),
|
||||
},
|
||||
)
|
||||
|
||||
nodes.append(node)
|
||||
@@ -522,27 +551,172 @@ class GraphBuilder:
|
||||
) -> ScreenTemplate:
|
||||
"""
|
||||
Créer un ScreenTemplate depuis un cluster d'états.
|
||||
|
||||
TODO: Implémenter extraction intelligente de:
|
||||
- window_title_pattern (regex depuis titres communs)
|
||||
- required_text_patterns (texte présent dans tous les états)
|
||||
- required_ui_elements (éléments UI communs)
|
||||
|
||||
|
||||
Extrait les contraintes communes à tous les états du cluster :
|
||||
- window_title_pattern : titre de fenêtre commun
|
||||
- required_text_patterns : textes présents dans la majorité des états
|
||||
- required_ui_elements : rôles/types UI récurrents
|
||||
|
||||
Args:
|
||||
states: États du cluster
|
||||
prototype_embedding: Embedding prototype
|
||||
|
||||
|
||||
Returns:
|
||||
ScreenTemplate avec contraintes
|
||||
ScreenTemplate avec contraintes extraites
|
||||
"""
|
||||
# Pour l'instant, template basique avec seulement l'embedding
|
||||
return ScreenTemplate(
|
||||
embedding_prototype=prototype_embedding.tolist(),
|
||||
similarity_threshold=0.85,
|
||||
window_title_pattern=None, # TODO: Extraire
|
||||
required_text_patterns=[], # TODO: Extraire
|
||||
required_ui_elements=[], # TODO: Extraire
|
||||
# --- Extraction du titre de fenêtre commun ---
|
||||
window_title_pattern = self._extract_window_pattern(states)
|
||||
|
||||
# --- Extraction des textes récurrents ---
|
||||
required_text_patterns = self._extract_common_texts(states)
|
||||
|
||||
# --- Extraction des éléments UI récurrents ---
|
||||
required_ui_elements = self._extract_common_ui_elements(states)
|
||||
|
||||
# Construire les sous-objets de contraintes
|
||||
window_constraint = WindowConstraint(
|
||||
title_pattern=window_title_pattern,
|
||||
title_contains=window_title_pattern,
|
||||
)
|
||||
|
||||
text_constraint = TextConstraint(
|
||||
required_texts=required_text_patterns,
|
||||
)
|
||||
|
||||
ui_roles = [
|
||||
e.get("role", "") for e in required_ui_elements if e.get("role")
|
||||
]
|
||||
ui_constraint = UIConstraint(
|
||||
required_roles=ui_roles,
|
||||
)
|
||||
|
||||
embedding_proto = EmbeddingPrototype(
|
||||
provider="openclip_ViT-B-32",
|
||||
vector_id="", # Le vecteur est stocké dans node.metadata._prototype_vector
|
||||
min_cosine_similarity=0.85,
|
||||
sample_count=len(states),
|
||||
)
|
||||
|
||||
return ScreenTemplate(
|
||||
window=window_constraint,
|
||||
text=text_constraint,
|
||||
ui=ui_constraint,
|
||||
embedding=embedding_proto,
|
||||
)
|
||||
|
||||
def _extract_window_pattern(self, states: List[ScreenState]) -> Optional[str]:
|
||||
"""Extraire un pattern de titre de fenêtre commun aux états du cluster."""
|
||||
titles = [s.window.window_title for s in states if s.window.window_title]
|
||||
if not titles:
|
||||
return None
|
||||
|
||||
# Si tous les titres sont identiques, retourner directement
|
||||
if len(set(titles)) == 1:
|
||||
return titles[0]
|
||||
|
||||
# Trouver le préfixe commun le plus long
|
||||
prefix = os.path.commonprefix(titles)
|
||||
if len(prefix) >= 5:
|
||||
return prefix.rstrip(" -–—|")
|
||||
|
||||
# Fallback: le titre le plus fréquent
|
||||
from collections import Counter
|
||||
most_common = Counter(titles).most_common(1)[0][0]
|
||||
return most_common
|
||||
|
||||
def _extract_common_texts(
|
||||
self, states: List[ScreenState], min_presence_ratio: float = 0.6
|
||||
) -> List[str]:
|
||||
"""
|
||||
Extraire les textes présents dans la majorité des états du cluster.
|
||||
|
||||
Args:
|
||||
states: États du cluster
|
||||
min_presence_ratio: Proportion minimale de présence (0.6 = 60% des états)
|
||||
"""
|
||||
if not states:
|
||||
return []
|
||||
|
||||
# Collecter les textes de chaque état
|
||||
text_counts: Dict[str, int] = defaultdict(int)
|
||||
states_with_text = 0
|
||||
|
||||
for state in states:
|
||||
if hasattr(state.perception, 'detected_text') and state.perception.detected_text:
|
||||
states_with_text += 1
|
||||
seen_in_state = set()
|
||||
for text in state.perception.detected_text:
|
||||
normalized = text.strip().lower()
|
||||
if len(normalized) >= 3 and normalized not in seen_in_state:
|
||||
text_counts[normalized] += 1
|
||||
seen_in_state.add(normalized)
|
||||
|
||||
if states_with_text == 0:
|
||||
return []
|
||||
|
||||
# Garder les textes présents dans au moins min_presence_ratio des états
|
||||
threshold = max(2, int(states_with_text * min_presence_ratio))
|
||||
common_texts = [
|
||||
text for text, count in text_counts.items()
|
||||
if count >= threshold
|
||||
]
|
||||
|
||||
# Limiter à 10 textes les plus fréquents
|
||||
common_texts.sort(key=lambda t: text_counts[t], reverse=True)
|
||||
return common_texts[:10]
|
||||
|
||||
def _extract_common_ui_elements(
|
||||
self, states: List[ScreenState], min_presence_ratio: float = 0.5
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extraire les types/rôles d'éléments UI récurrents dans le cluster.
|
||||
|
||||
Retourne une liste de contraintes UI au format:
|
||||
[{"type": "button", "role": "validate", "min_count": 1}, ...]
|
||||
"""
|
||||
if not states:
|
||||
return []
|
||||
|
||||
# Compter les paires (type, role) dans chaque état
|
||||
role_counts: Dict[str, int] = defaultdict(int)
|
||||
type_counts: Dict[str, int] = defaultdict(int)
|
||||
states_with_ui = 0
|
||||
|
||||
for state in states:
|
||||
if state.ui_elements:
|
||||
states_with_ui += 1
|
||||
seen_roles = set()
|
||||
seen_types = set()
|
||||
for el in state.ui_elements:
|
||||
el_type = getattr(el, 'type', 'unknown')
|
||||
el_role = getattr(el, 'role', 'unknown')
|
||||
|
||||
if el_role != 'unknown' and el_role not in seen_roles:
|
||||
role_counts[el_role] += 1
|
||||
seen_roles.add(el_role)
|
||||
|
||||
if el_type != 'unknown' and el_type not in seen_types:
|
||||
type_counts[el_type] += 1
|
||||
seen_types.add(el_type)
|
||||
|
||||
if states_with_ui == 0:
|
||||
return []
|
||||
|
||||
threshold = max(2, int(states_with_ui * min_presence_ratio))
|
||||
|
||||
constraints = []
|
||||
|
||||
# Ajouter les rôles récurrents
|
||||
for role, count in role_counts.items():
|
||||
if count >= threshold:
|
||||
constraints.append({
|
||||
"role": role,
|
||||
"min_count": 1,
|
||||
})
|
||||
|
||||
# Limiter à 8 contraintes
|
||||
constraints.sort(key=lambda c: role_counts.get(c.get("role", ""), 0), reverse=True)
|
||||
return constraints[:8]
|
||||
|
||||
def _build_edges(
|
||||
self,
|
||||
@@ -633,9 +807,14 @@ class GraphBuilder:
|
||||
# Récupérer les embeddings des prototypes de nodes
|
||||
node_prototypes = {}
|
||||
for node in nodes:
|
||||
if hasattr(node, 'template') and node.template:
|
||||
if hasattr(node.template, 'embedding_prototype'):
|
||||
node_prototypes[node.node_id] = np.array(node.template.embedding_prototype)
|
||||
# Priorité : vecteur en mémoire (metadata), sinon chargement depuis disque
|
||||
proto_list = node.metadata.get("_prototype_vector")
|
||||
if proto_list is not None:
|
||||
node_prototypes[node.node_id] = np.array(proto_list, dtype=np.float32)
|
||||
elif node.template and node.template.embedding and node.template.embedding.vector_id:
|
||||
proto_path = Path(node.template.embedding.vector_id)
|
||||
if proto_path.exists():
|
||||
node_prototypes[node.node_id] = np.load(proto_path)
|
||||
|
||||
if not node_prototypes:
|
||||
logger.warning("No node prototypes available for mapping")
|
||||
@@ -741,7 +920,7 @@ class GraphBuilder:
|
||||
action = Action(
|
||||
type=action_type,
|
||||
target=TargetSpec(
|
||||
role=target_role,
|
||||
by_role=target_role,
|
||||
selection_policy="first",
|
||||
fallback_strategy="visual_similarity"
|
||||
),
|
||||
|
||||
@@ -133,10 +133,10 @@ class NodeMatcher:
|
||||
node: WorkflowNode
|
||||
) -> bool:
|
||||
"""Valider les contraintes du node contre l'état."""
|
||||
template = node.screen_template
|
||||
|
||||
if template.window_title_pattern:
|
||||
if not state.raw_level or not state.raw_level.window_title:
|
||||
template = node.template
|
||||
|
||||
if template and template.window and template.window.title_pattern:
|
||||
if not state.window or not state.window.window_title:
|
||||
return False
|
||||
|
||||
return True
|
||||
@@ -179,13 +179,14 @@ class NodeMatcher:
|
||||
# Calculer similarités avec tous les nodes
|
||||
similarities = []
|
||||
for node in candidate_nodes:
|
||||
if node.screen_template.embedding_prototype_path:
|
||||
proto_path = node.template.embedding.vector_id if (node.template and node.template.embedding) else None
|
||||
if proto_path:
|
||||
try:
|
||||
prototype = np.load(node.screen_template.embedding_prototype_path)
|
||||
prototype = np.load(proto_path)
|
||||
similarity = float(np.dot(state_vector, prototype))
|
||||
similarities.append({
|
||||
'node_id': node.node_id,
|
||||
'node_label': node.label,
|
||||
'node_label': node.name,
|
||||
'similarity': similarity,
|
||||
'threshold': self.similarity_threshold,
|
||||
'matched': similarity >= self.similarity_threshold
|
||||
@@ -204,9 +205,9 @@ class NodeMatcher:
|
||||
'timestamp': timestamp,
|
||||
'failed_match_id': failed_match_id,
|
||||
'state': {
|
||||
'window_title': state.raw_level.window_title if state.raw_level else None,
|
||||
'screenshot_path': str(state.raw_level.screenshot_path) if state.raw_level else None,
|
||||
'ui_elements_count': len(state.perception_level.ui_elements) if state.perception_level else 0
|
||||
'window_title': state.window.window_title if getattr(state, 'window', None) else None,
|
||||
'screenshot_path': str(state.raw.screenshot_path) if getattr(state, 'raw', None) else None,
|
||||
'ui_elements_count': len(state.ui_elements) if getattr(state, 'ui_elements', None) else 0
|
||||
},
|
||||
'matching_results': {
|
||||
'best_confidence': best_confidence,
|
||||
|
||||
@@ -303,7 +303,7 @@ class HierarchicalMatcher:
|
||||
if not window_info:
|
||||
return 0.5 # Score neutre si pas d'info
|
||||
|
||||
template = getattr(node, 'screen_template', None)
|
||||
template = getattr(node, 'template', None)
|
||||
if not template:
|
||||
return 0.5
|
||||
|
||||
@@ -311,7 +311,7 @@ class HierarchicalMatcher:
|
||||
|
||||
# Matching du titre
|
||||
current_title = window_info.get('title', '')
|
||||
template_pattern = getattr(template, 'window_title_pattern', None)
|
||||
template_pattern = getattr(template.window, 'title_pattern', None) if getattr(template, 'window', None) else None
|
||||
|
||||
if template_pattern and current_title:
|
||||
if self.config.use_regex_title_matching:
|
||||
@@ -329,7 +329,7 @@ class HierarchicalMatcher:
|
||||
|
||||
# Matching du processus
|
||||
current_process = window_info.get('process_name', '')
|
||||
template_process = getattr(template, 'process_name', None)
|
||||
template_process = getattr(template.window, 'process_name', None) if getattr(template, 'window', None) else None
|
||||
|
||||
if template_process and current_process:
|
||||
if current_process.lower() == template_process.lower():
|
||||
@@ -367,12 +367,12 @@ class HierarchicalMatcher:
|
||||
Returns:
|
||||
Score de confiance 0.0-1.0
|
||||
"""
|
||||
template = getattr(node, 'screen_template', None)
|
||||
template = getattr(node, 'template', None)
|
||||
if not template:
|
||||
return 0.5
|
||||
|
||||
# Récupérer embedding prototype du template
|
||||
prototype = getattr(template, 'embedding_prototype', None)
|
||||
prototype = getattr(template.embedding, 'vector_id', None) if getattr(template, 'embedding', None) else None
|
||||
if prototype is None:
|
||||
return 0.5
|
||||
|
||||
@@ -445,7 +445,7 @@ class HierarchicalMatcher:
|
||||
if not detected_elements:
|
||||
return 0.5
|
||||
|
||||
template = getattr(node, 'screen_template', None)
|
||||
template = getattr(node, 'template', None)
|
||||
if not template:
|
||||
return 0.5
|
||||
|
||||
|
||||
@@ -92,6 +92,41 @@ def get_execution_result():
|
||||
from .execution_result import WorkflowExecutionResult
|
||||
return WorkflowExecutionResult
|
||||
|
||||
# Lazy import via __getattr__ pour éviter les imports circulaires
|
||||
_LAZY_IMPORTS = {
|
||||
"StateEmbedding": "core.models.state_embedding",
|
||||
"EmbeddingComponent": "core.models.state_embedding",
|
||||
"Workflow": "core.models.workflow_graph",
|
||||
"WorkflowNode": "core.models.workflow_graph",
|
||||
"WorkflowEdge": "core.models.workflow_graph",
|
||||
"ScreenTemplate": "core.models.workflow_graph",
|
||||
"Action": "core.models.workflow_graph",
|
||||
"TargetSpec": "core.models.workflow_graph",
|
||||
"ActionType": "core.models.workflow_graph",
|
||||
"EdgeConstraints": "core.models.workflow_graph",
|
||||
"PostConditions": "core.models.workflow_graph",
|
||||
"LearningState": "core.models.workflow_graph",
|
||||
"SelectionPolicy": "core.models.workflow_graph",
|
||||
"WindowConstraint": "core.models.workflow_graph",
|
||||
"TextConstraint": "core.models.workflow_graph",
|
||||
"UIConstraint": "core.models.workflow_graph",
|
||||
"EmbeddingPrototype": "core.models.workflow_graph",
|
||||
"EdgeStats": "core.models.workflow_graph",
|
||||
"SafetyRules": "core.models.workflow_graph",
|
||||
"WorkflowStats": "core.models.workflow_graph",
|
||||
"LearningConfig": "core.models.workflow_graph",
|
||||
"WorkflowExecutionResult": "core.models.execution_result",
|
||||
"PerformanceMetrics": "core.models.execution_result",
|
||||
}
|
||||
|
||||
def __getattr__(name):
|
||||
if name in _LAZY_IMPORTS:
|
||||
import importlib
|
||||
module = importlib.import_module(_LAZY_IMPORTS[name])
|
||||
return getattr(module, name)
|
||||
raise AttributeError(f"module 'core.models' has no attribute {name!r}")
|
||||
|
||||
|
||||
__all__ = [
|
||||
# Modèles de base standardisés (Tâche 4)
|
||||
"BBox",
|
||||
|
||||
@@ -45,6 +45,25 @@ class BBox(BaseModel):
|
||||
return int(v)
|
||||
raise ValueError("Dimensions must be numeric")
|
||||
|
||||
def __iter__(self):
|
||||
"""Permet le unpacking: x, y, w, h = bbox"""
|
||||
return iter((self.x, self.y, self.width, self.height))
|
||||
|
||||
def __getitem__(self, index):
|
||||
"""Permet l'accès par index: bbox[0], bbox[1], etc."""
|
||||
return (self.x, self.y, self.width, self.height)[index]
|
||||
|
||||
def __len__(self):
|
||||
return 4
|
||||
|
||||
def __eq__(self, other):
|
||||
if isinstance(other, BBox):
|
||||
return (self.x == other.x and self.y == other.y and
|
||||
self.width == other.width and self.height == other.height)
|
||||
if isinstance(other, (tuple, list)) and len(other) == 4:
|
||||
return (self.x, self.y, self.width, self.height) == tuple(other)
|
||||
return NotImplemented
|
||||
|
||||
def to_tuple(self) -> Tuple[int, int, int, int]:
|
||||
"""Conversion vers tuple (x, y, w, h)"""
|
||||
return (self.x, self.y, self.width, self.height)
|
||||
|
||||
@@ -311,8 +311,8 @@ class ScreenTemplate:
|
||||
|
||||
# Vérifier contraintes de texte
|
||||
if hasattr(screen_state, 'perception'):
|
||||
detected_texts = getattr(screen_state.perception, 'detected_texts', [])
|
||||
if not self.text.matches(detected_texts):
|
||||
detected_text = getattr(screen_state.perception, 'detected_text', [])
|
||||
if not self.text.matches(detected_text):
|
||||
return False, 0.0
|
||||
|
||||
# Vérifier contraintes UI
|
||||
|
||||
@@ -3,5 +3,6 @@ Pipeline module - Orchestration du flux RPA Vision V3
|
||||
"""
|
||||
|
||||
from .workflow_pipeline import WorkflowPipeline, create_pipeline
|
||||
from .screen_analyzer import ScreenAnalyzer
|
||||
|
||||
__all__ = ["WorkflowPipeline", "create_pipeline"]
|
||||
__all__ = ["WorkflowPipeline", "create_pipeline", "ScreenAnalyzer"]
|
||||
|
||||
343
core/pipeline/screen_analyzer.py
Normal file
343
core/pipeline/screen_analyzer.py
Normal file
@@ -0,0 +1,343 @@
|
||||
"""
|
||||
ScreenAnalyzer - Construction complète d'un ScreenState depuis un screenshot
|
||||
|
||||
Orchestre les 4 niveaux du ScreenState :
|
||||
Niveau 1 (Raw) : métadonnées de l'image
|
||||
Niveau 2 (Perception): OCR + embedding global
|
||||
Niveau 3 (UI) : détection d'éléments UI
|
||||
Niveau 4 (Contexte) : fenêtre active, workflow en cours
|
||||
|
||||
Ce module comble le chaînon manquant entre la capture brute (Couche 0)
|
||||
et la construction d'embeddings (Couche 3).
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional, Dict, Any, List
|
||||
|
||||
from PIL import Image
|
||||
|
||||
from core.models.screen_state import (
|
||||
ScreenState,
|
||||
RawLevel,
|
||||
PerceptionLevel,
|
||||
ContextLevel,
|
||||
WindowContext,
|
||||
EmbeddingRef,
|
||||
)
|
||||
from core.models.ui_element import UIElement
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ScreenAnalyzer:
|
||||
"""
|
||||
Construit un ScreenState complet (4 niveaux) depuis un screenshot.
|
||||
|
||||
Utilise le UIDetector pour la détection d'éléments et un OCR
|
||||
(docTR ou Tesseract) pour l'extraction de texte.
|
||||
|
||||
Example:
|
||||
>>> analyzer = ScreenAnalyzer()
|
||||
>>> state = analyzer.analyze("/path/to/screenshot.png")
|
||||
>>> print(state.perception.detected_text)
|
||||
>>> print(len(state.ui_elements))
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
ui_detector=None,
|
||||
ocr_engine: Optional[str] = None,
|
||||
session_id: str = "",
|
||||
):
|
||||
"""
|
||||
Args:
|
||||
ui_detector: Instance de UIDetector (créé si None)
|
||||
ocr_engine: Moteur OCR à utiliser ("doctr", "tesseract", None=auto)
|
||||
session_id: ID de la session en cours
|
||||
"""
|
||||
self._ui_detector = ui_detector
|
||||
self._ocr_engine_name = ocr_engine
|
||||
self._ocr = None
|
||||
self.session_id = session_id
|
||||
self._state_counter = 0
|
||||
|
||||
# Initialisation lazy pour éviter les imports lourds au démarrage
|
||||
self._ui_detector_initialized = ui_detector is not None
|
||||
self._ocr_initialized = False
|
||||
|
||||
# =========================================================================
|
||||
# API publique
|
||||
# =========================================================================
|
||||
|
||||
def analyze(
|
||||
self,
|
||||
screenshot_path: str,
|
||||
window_info: Optional[Dict[str, Any]] = None,
|
||||
context: Optional[Dict[str, Any]] = None,
|
||||
) -> ScreenState:
|
||||
"""
|
||||
Analyser un screenshot et construire un ScreenState complet.
|
||||
|
||||
Args:
|
||||
screenshot_path: Chemin vers le fichier image
|
||||
window_info: Infos fenêtre active {"title": ..., "app_name": ...}
|
||||
context: Contexte métier optionnel
|
||||
|
||||
Returns:
|
||||
ScreenState avec les 4 niveaux remplis
|
||||
"""
|
||||
screenshot_path = str(screenshot_path)
|
||||
self._state_counter += 1
|
||||
|
||||
state_id = f"{self.session_id}_state_{self._state_counter:04d}" if self.session_id else f"state_{self._state_counter:04d}"
|
||||
|
||||
# Niveau 1 : Raw
|
||||
raw = self._build_raw_level(screenshot_path)
|
||||
|
||||
# Niveau 2 : Perception (OCR)
|
||||
detected_text = self._extract_text(screenshot_path)
|
||||
perception = PerceptionLevel(
|
||||
embedding=EmbeddingRef(
|
||||
provider="openclip_ViT-B-32",
|
||||
vector_id=f"data/embeddings/screens/{state_id}.npy",
|
||||
dimensions=512,
|
||||
),
|
||||
detected_text=detected_text,
|
||||
text_detection_method=self._get_ocr_method_name(),
|
||||
confidence_avg=0.85 if detected_text else 0.0,
|
||||
)
|
||||
|
||||
# Niveau 3 : UI Elements
|
||||
ui_elements = self._detect_ui_elements(screenshot_path, window_info)
|
||||
|
||||
# Niveau 4 : Contexte
|
||||
window_ctx = self._build_window_context(window_info)
|
||||
context_level = self._build_context_level(context)
|
||||
|
||||
state = ScreenState(
|
||||
screen_state_id=state_id,
|
||||
timestamp=datetime.now(),
|
||||
session_id=self.session_id,
|
||||
window=window_ctx,
|
||||
raw=raw,
|
||||
perception=perception,
|
||||
context=context_level,
|
||||
metadata={
|
||||
"analyzer_version": "1.0",
|
||||
"ui_elements_count": len(ui_elements),
|
||||
"text_regions_count": len(detected_text),
|
||||
},
|
||||
ui_elements=ui_elements,
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"ScreenState {state_id} construit: "
|
||||
f"{len(ui_elements)} éléments UI, {len(detected_text)} textes détectés"
|
||||
)
|
||||
return state
|
||||
|
||||
def analyze_image(
|
||||
self,
|
||||
image: Image.Image,
|
||||
save_dir: str = "data/screens",
|
||||
window_info: Optional[Dict[str, Any]] = None,
|
||||
context: Optional[Dict[str, Any]] = None,
|
||||
) -> ScreenState:
|
||||
"""
|
||||
Analyser une PIL Image (utile quand on a déjà l'image en mémoire).
|
||||
|
||||
Sauvegarde l'image sur disque puis appelle analyze().
|
||||
"""
|
||||
save_path = Path(save_dir)
|
||||
save_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
||||
filename = f"screen_{timestamp}.png"
|
||||
filepath = save_path / filename
|
||||
|
||||
image.save(str(filepath))
|
||||
return self.analyze(str(filepath), window_info=window_info, context=context)
|
||||
|
||||
# =========================================================================
|
||||
# Niveau 1 : Raw
|
||||
# =========================================================================
|
||||
|
||||
def _build_raw_level(self, screenshot_path: str) -> RawLevel:
|
||||
file_size = 0
|
||||
try:
|
||||
file_size = os.path.getsize(screenshot_path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
return RawLevel(
|
||||
screenshot_path=screenshot_path,
|
||||
capture_method="mss",
|
||||
file_size_bytes=file_size,
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
# Niveau 2 : Perception — OCR
|
||||
# =========================================================================
|
||||
|
||||
def _extract_text(self, screenshot_path: str) -> List[str]:
|
||||
"""Extraire le texte d'un screenshot via OCR."""
|
||||
self._ensure_ocr()
|
||||
|
||||
if self._ocr is None:
|
||||
return []
|
||||
|
||||
try:
|
||||
return self._ocr(screenshot_path)
|
||||
except Exception as e:
|
||||
logger.warning(f"OCR échoué: {e}")
|
||||
return []
|
||||
|
||||
def _ensure_ocr(self) -> None:
|
||||
"""Initialiser le moteur OCR (lazy)."""
|
||||
if self._ocr_initialized:
|
||||
return
|
||||
self._ocr_initialized = True
|
||||
|
||||
engine = self._ocr_engine_name
|
||||
|
||||
# Auto-détection : essayer docTR puis Tesseract
|
||||
if engine is None or engine == "doctr":
|
||||
try:
|
||||
self._ocr = self._create_doctr_ocr()
|
||||
logger.info("OCR initialisé avec docTR")
|
||||
return
|
||||
except Exception as e:
|
||||
if engine == "doctr":
|
||||
logger.warning(f"docTR non disponible: {e}")
|
||||
return
|
||||
|
||||
if engine is None or engine == "tesseract":
|
||||
try:
|
||||
self._ocr = self._create_tesseract_ocr()
|
||||
logger.info("OCR initialisé avec Tesseract")
|
||||
return
|
||||
except Exception as e:
|
||||
logger.warning(f"Tesseract non disponible: {e}")
|
||||
|
||||
logger.warning("Aucun moteur OCR disponible — detected_text sera vide")
|
||||
|
||||
def _create_doctr_ocr(self):
|
||||
"""Créer une fonction OCR basée sur docTR."""
|
||||
from doctr.io import DocumentFile
|
||||
from doctr.models import ocr_predictor
|
||||
|
||||
predictor = ocr_predictor(det_arch="db_resnet50", reco_arch="crnn_vgg16_bn", pretrained=True)
|
||||
|
||||
def ocr_func(image_path: str) -> List[str]:
|
||||
doc = DocumentFile.from_images(image_path)
|
||||
result = predictor(doc)
|
||||
texts = []
|
||||
for page in result.pages:
|
||||
for block in page.blocks:
|
||||
for line in block.lines:
|
||||
line_text = " ".join(word.value for word in line.words)
|
||||
if line_text.strip():
|
||||
texts.append(line_text.strip())
|
||||
return texts
|
||||
|
||||
return ocr_func
|
||||
|
||||
def _create_tesseract_ocr(self):
|
||||
"""Créer une fonction OCR basée sur Tesseract."""
|
||||
import pytesseract
|
||||
|
||||
def ocr_func(image_path: str) -> List[str]:
|
||||
img = Image.open(image_path)
|
||||
raw_text = pytesseract.image_to_string(img, lang="fra+eng")
|
||||
lines = [line.strip() for line in raw_text.split("\n") if line.strip()]
|
||||
return lines
|
||||
|
||||
return ocr_func
|
||||
|
||||
def _get_ocr_method_name(self) -> str:
|
||||
if self._ocr is None:
|
||||
return "none"
|
||||
if self._ocr_engine_name:
|
||||
return self._ocr_engine_name
|
||||
return "doctr"
|
||||
|
||||
# =========================================================================
|
||||
# Niveau 3 : UI Elements
|
||||
# =========================================================================
|
||||
|
||||
def _detect_ui_elements(
|
||||
self,
|
||||
screenshot_path: str,
|
||||
window_info: Optional[Dict[str, Any]] = None,
|
||||
) -> List[UIElement]:
|
||||
"""Détecter les éléments UI dans le screenshot."""
|
||||
self._ensure_ui_detector()
|
||||
|
||||
if self._ui_detector is None:
|
||||
return []
|
||||
|
||||
try:
|
||||
elements = self._ui_detector.detect(
|
||||
screenshot_path, window_context=window_info
|
||||
)
|
||||
return elements
|
||||
except Exception as e:
|
||||
logger.warning(f"Détection UI échouée: {e}")
|
||||
return []
|
||||
|
||||
def _ensure_ui_detector(self) -> None:
|
||||
"""Initialiser le UIDetector (lazy)."""
|
||||
if self._ui_detector_initialized:
|
||||
return
|
||||
self._ui_detector_initialized = True
|
||||
|
||||
try:
|
||||
from core.detection.ui_detector import UIDetector, DetectionConfig
|
||||
|
||||
config = DetectionConfig(
|
||||
use_owl_detection=False, # Désactiver OWL par défaut (lourd)
|
||||
use_vlm_classification=True,
|
||||
confidence_threshold=0.6,
|
||||
)
|
||||
self._ui_detector = UIDetector(config)
|
||||
logger.info("UIDetector initialisé")
|
||||
except Exception as e:
|
||||
logger.warning(f"UIDetector non disponible: {e}")
|
||||
self._ui_detector = None
|
||||
|
||||
# =========================================================================
|
||||
# Niveau 4 : Contexte
|
||||
# =========================================================================
|
||||
|
||||
def _build_window_context(
|
||||
self, window_info: Optional[Dict[str, Any]] = None
|
||||
) -> WindowContext:
|
||||
if window_info:
|
||||
return WindowContext(
|
||||
app_name=window_info.get("app_name", "unknown"),
|
||||
window_title=window_info.get("title", "Unknown"),
|
||||
screen_resolution=window_info.get("screen_resolution", [1920, 1080]),
|
||||
workspace=window_info.get("workspace", "main"),
|
||||
)
|
||||
return WindowContext(
|
||||
app_name="unknown",
|
||||
window_title="Unknown",
|
||||
screen_resolution=[1920, 1080],
|
||||
workspace="main",
|
||||
)
|
||||
|
||||
def _build_context_level(
|
||||
self, context: Optional[Dict[str, Any]] = None
|
||||
) -> ContextLevel:
|
||||
if context:
|
||||
return ContextLevel(
|
||||
current_workflow_candidate=context.get("workflow_candidate"),
|
||||
workflow_step=context.get("workflow_step"),
|
||||
user_id=context.get("user_id", ""),
|
||||
tags=context.get("tags", []),
|
||||
business_variables=context.get("business_variables", {}),
|
||||
)
|
||||
return ContextLevel()
|
||||
@@ -319,17 +319,25 @@ class WorkflowPipeline:
|
||||
np.ndarray ou None si aucun vecteur trouvé
|
||||
"""
|
||||
|
||||
# v1: prototype stocké en liste directement
|
||||
# v3: prototype stocké dans metadata (Phase 0, mars 2026)
|
||||
meta = getattr(node, "metadata", {}) or {}
|
||||
proto_list = meta.get("_prototype_vector")
|
||||
if proto_list is not None and isinstance(proto_list, list):
|
||||
try:
|
||||
return np.array(proto_list, dtype=np.float32)
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to convert metadata prototype: {e}")
|
||||
|
||||
# v1: prototype stocké en liste directement sur template
|
||||
tpl = getattr(node, "template", None)
|
||||
if tpl is not None:
|
||||
proto_list = getattr(tpl, "embedding_prototype", None)
|
||||
if isinstance(proto_list, list):
|
||||
try:
|
||||
v = np.array(proto_list, dtype=np.float32)
|
||||
return v
|
||||
return np.array(proto_list, dtype=np.float32)
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to convert embedding_prototype list: {e}")
|
||||
|
||||
|
||||
# v2: prototype stocké sur disque via EmbeddingPrototype.vector_id
|
||||
if tpl is not None:
|
||||
emb = getattr(tpl, "embedding", None)
|
||||
@@ -341,16 +349,6 @@ class WorkflowPipeline:
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to load vector from {vector_id}: {e}")
|
||||
|
||||
# fallback (ancienne nomenclature)
|
||||
st = getattr(node, "screen_template", None)
|
||||
if st is not None:
|
||||
p = getattr(st, "embedding_prototype_path", None)
|
||||
if p:
|
||||
try:
|
||||
return np.load(p).astype(np.float32)
|
||||
except Exception as e:
|
||||
logger.debug(f"Failed to load legacy vector from {p}: {e}")
|
||||
|
||||
return None
|
||||
|
||||
# =========================================================================
|
||||
@@ -918,18 +916,6 @@ class WorkflowPipeline:
|
||||
"recovery_attempted": recovery_result.success,
|
||||
"recovery_message": recovery_result.message if recovery_result else None
|
||||
}
|
||||
self.error_handler.error_history.append(error_ctx)
|
||||
self.error_handler._log_error(error_ctx)
|
||||
|
||||
return {
|
||||
"execution_id": execution_id,
|
||||
"workflow_id": workflow_id,
|
||||
"success": False,
|
||||
"step_type": "execution_error",
|
||||
"error": str(e),
|
||||
"execution_time_ms": total_time_ms,
|
||||
"correlation_id": execution_id
|
||||
}
|
||||
|
||||
|
||||
# =============================================================================
|
||||
|
||||
@@ -210,7 +210,7 @@ class TrainingQualityValidator:
|
||||
# 3. Vérifier observations par node
|
||||
nodes = getattr(workflow, 'nodes', [])
|
||||
for node in nodes:
|
||||
obs_count = getattr(node, 'observation_count', 0)
|
||||
obs_count = (node.metadata.get('observation_count', 0) if getattr(node, 'metadata', None) else 0)
|
||||
if obs_count < self.config.min_observations_per_node:
|
||||
recommendations.append(
|
||||
f"Node '{getattr(node, 'node_id', 'unknown')}' a seulement {obs_count} observations "
|
||||
@@ -240,7 +240,7 @@ class TrainingQualityValidator:
|
||||
len(outlier_indices) <= len(embeddings) * self.config.max_outlier_ratio and
|
||||
(validation_result is None or validation_result.is_valid) and
|
||||
all(
|
||||
getattr(node, 'observation_count', 0) >= self.config.min_observations_per_node
|
||||
(node.metadata.get('observation_count', 0) if getattr(node, 'metadata', None) else 0) >= self.config.min_observations_per_node
|
||||
for node in nodes
|
||||
)
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user