feat: chat unifié, GestureCatalog, Copilot, Léa UI, extraction données, vérification replay

Refonte majeure du système Agent Chat et ajout de nombreux modules :

- Chat unifié : suppression du dual Workflows/Agent Libre, tout passe par /api/chat
  avec résolution en 3 niveaux (workflow → geste → "montre-moi")
- GestureCatalog : 38 raccourcis clavier universels Windows avec matching sémantique,
  substitution automatique dans les replays, et endpoint /api/gestures
- Mode Copilot : exécution pas-à-pas des workflows avec validation humaine via WebSocket
  (approve/skip/abort) avant chaque action
- Léa UI (agent_v0/lea_ui/) : interface PyQt5 pour Windows avec overlay transparent
  pour feedback visuel pendant le replay
- Data Extraction (core/extraction/) : moteur d'extraction visuelle de données
  (OCR + VLM → SQLite), avec schémas YAML et export CSV/Excel
- ReplayVerifier (agent_v0/server_v1/) : vérification post-action par comparaison
  de screenshots, avec logique de retry (max 3)
- IntentParser durci : meilleur fallback regex, type GREETING, patterns améliorés
- Dashboard : nouvelles pages gestures, streaming, extractions
- Tests : 63 tests GestureCatalog, 47 tests extraction, corrections tests existants
- Dépréciation : /api/agent/plan et /api/agent/execute retournent HTTP 410,
  suppression du code hardcodé _plan_to_replay_actions

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Dom
2026-03-15 10:02:09 +01:00
parent 74a1cb4e03
commit cf495dd82f
93 changed files with 12463 additions and 1080 deletions

View File

@@ -0,0 +1,29 @@
"""
Module d'extraction de donnees structurees depuis des captures d'ecran.
Ce module orchestre le cycle complet :
schema YAML -> navigation -> screenshot -> VLM/OCR -> validation -> SQLite -> CSV/Excel
Classes principales :
- ExtractionSchema : definition des champs et regles de navigation
- ExtractionField : definition d'un champ individuel
- FieldExtractor : extraction via VLM (Ollama) ou OCR (docTR)
- DataStore : stockage SQLite + export CSV/Excel
- IterationController : controle de la boucle de navigation
- ExtractionEngine : orchestrateur principal
"""
from .schema import ExtractionField, ExtractionSchema
from .field_extractor import FieldExtractor
from .data_store import DataStore
from .iteration_controller import IterationController
from .extraction_engine import ExtractionEngine
__all__ = [
"ExtractionField",
"ExtractionSchema",
"FieldExtractor",
"DataStore",
"IterationController",
"ExtractionEngine",
]

View File

@@ -0,0 +1,420 @@
"""
DataStore - Stockage SQLite des donnees extraites + export CSV/Excel
Chaque session d'extraction (ExtractionSchema applique a un ecran) cree
une entree dans la table `extractions`. Les enregistrements individuels
sont stockes dans la table `records` avec leurs donnees JSON, le chemin
du screenshot source et un score de confiance.
"""
import csv
import json
import logging
import sqlite3
import uuid
from datetime import datetime
from io import StringIO
from pathlib import Path
from typing import Any, Dict, List, Optional
from .schema import ExtractionSchema
logger = logging.getLogger(__name__)
class DataStore:
"""Stockage des donnees extraites dans SQLite avec export CSV/Excel."""
def __init__(self, db_path: str = "data/extractions/store.db"):
self.db_path = Path(db_path)
self.db_path.parent.mkdir(parents=True, exist_ok=True)
self._init_db()
# ------------------------------------------------------------------
# Initialisation
# ------------------------------------------------------------------
def _init_db(self) -> None:
"""Creer les tables si necessaire."""
with self._connect() as conn:
conn.execute("""
CREATE TABLE IF NOT EXISTS extractions (
id TEXT PRIMARY KEY,
schema_name TEXT NOT NULL,
schema_json TEXT NOT NULL,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'in_progress',
record_count INTEGER NOT NULL DEFAULT 0
)
""")
conn.execute("""
CREATE TABLE IF NOT EXISTS records (
id TEXT PRIMARY KEY,
extraction_id TEXT NOT NULL,
data_json TEXT NOT NULL,
screenshot_path TEXT,
confidence REAL NOT NULL DEFAULT 0.0,
errors_json TEXT,
created_at TEXT NOT NULL,
FOREIGN KEY (extraction_id) REFERENCES extractions(id)
)
""")
conn.execute("""
CREATE INDEX IF NOT EXISTS idx_records_extraction
ON records(extraction_id)
""")
def _connect(self) -> sqlite3.Connection:
"""Ouvrir une connexion SQLite."""
conn = sqlite3.connect(str(self.db_path))
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA journal_mode=WAL")
return conn
# ------------------------------------------------------------------
# Extractions (sessions)
# ------------------------------------------------------------------
def create_extraction(self, schema: ExtractionSchema) -> str:
"""
Creer une nouvelle session d'extraction.
Args:
schema: Schema d'extraction
Returns:
extraction_id (UUID)
"""
extraction_id = str(uuid.uuid4())
now = datetime.utcnow().isoformat()
with self._connect() as conn:
conn.execute(
"""
INSERT INTO extractions (id, schema_name, schema_json, created_at, updated_at, status)
VALUES (?, ?, ?, ?, ?, ?)
""",
(
extraction_id,
schema.name,
json.dumps(schema.to_dict(), ensure_ascii=False),
now,
now,
"in_progress",
),
)
logger.info(
"Extraction creee : %s (schema=%s)", extraction_id[:8], schema.name
)
return extraction_id
def finish_extraction(self, extraction_id: str, status: str = "completed") -> None:
"""Marquer une extraction comme terminee."""
now = datetime.utcnow().isoformat()
with self._connect() as conn:
conn.execute(
"UPDATE extractions SET status = ?, updated_at = ? WHERE id = ?",
(status, now, extraction_id),
)
def get_extraction(self, extraction_id: str) -> Optional[Dict[str, Any]]:
"""Recuperer les metadonnees d'une extraction."""
with self._connect() as conn:
row = conn.execute(
"SELECT * FROM extractions WHERE id = ?", (extraction_id,)
).fetchone()
if row:
return dict(row)
return None
def list_extractions(self, limit: int = 50) -> List[Dict[str, Any]]:
"""Lister les extractions recentes."""
with self._connect() as conn:
rows = conn.execute(
"SELECT * FROM extractions ORDER BY created_at DESC LIMIT ?",
(limit,),
).fetchall()
return [dict(r) for r in rows]
# ------------------------------------------------------------------
# Records (enregistrements)
# ------------------------------------------------------------------
def add_record(
self,
extraction_id: str,
data: Dict[str, Any],
screenshot_path: Optional[str] = None,
confidence: float = 0.0,
errors: Optional[List[str]] = None,
) -> str:
"""
Ajouter un enregistrement extrait.
Args:
extraction_id: ID de la session d'extraction
data: Donnees extraites (dict)
screenshot_path: Chemin du screenshot source
confidence: Score de confiance [0, 1]
errors: Liste d'erreurs de validation
Returns:
record_id (UUID)
"""
record_id = str(uuid.uuid4())
now = datetime.utcnow().isoformat()
with self._connect() as conn:
conn.execute(
"""
INSERT INTO records (id, extraction_id, data_json, screenshot_path,
confidence, errors_json, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?)
""",
(
record_id,
extraction_id,
json.dumps(data, ensure_ascii=False),
screenshot_path,
confidence,
json.dumps(errors or [], ensure_ascii=False),
now,
),
)
# Mettre a jour le compteur
conn.execute(
"""
UPDATE extractions
SET record_count = record_count + 1, updated_at = ?
WHERE id = ?
""",
(now, extraction_id),
)
logger.debug(
"Record ajoute : %s (extraction=%s, confiance=%.2f)",
record_id[:8],
extraction_id[:8],
confidence,
)
return record_id
def get_records(self, extraction_id: str) -> List[Dict[str, Any]]:
"""
Recuperer tous les enregistrements d'une extraction.
Returns:
Liste de dicts avec les cles : id, data, screenshot_path,
confidence, errors, created_at
"""
with self._connect() as conn:
rows = conn.execute(
"""
SELECT id, data_json, screenshot_path, confidence,
errors_json, created_at
FROM records
WHERE extraction_id = ?
ORDER BY created_at ASC
""",
(extraction_id,),
).fetchall()
results = []
for row in rows:
results.append({
"id": row["id"],
"data": json.loads(row["data_json"]),
"screenshot_path": row["screenshot_path"],
"confidence": row["confidence"],
"errors": json.loads(row["errors_json"]) if row["errors_json"] else [],
"created_at": row["created_at"],
})
return results
# ------------------------------------------------------------------
# Export
# ------------------------------------------------------------------
def export_csv(self, extraction_id: str, output_path: str) -> str:
"""
Exporter les enregistrements en CSV.
Args:
extraction_id: ID de la session
output_path: Chemin du fichier CSV de sortie
Returns:
Chemin du fichier cree
"""
records = self.get_records(extraction_id)
if not records:
raise ValueError(f"Aucun enregistrement pour l'extraction {extraction_id}")
out = Path(output_path)
out.parent.mkdir(parents=True, exist_ok=True)
# Determiner les colonnes depuis le premier record
all_keys = self._collect_all_keys(records)
with open(out, "w", newline="", encoding="utf-8-sig") as f:
writer = csv.DictWriter(f, fieldnames=all_keys, extrasaction="ignore")
writer.writeheader()
for rec in records:
writer.writerow(rec["data"])
logger.info("Export CSV : %s (%d lignes)", output_path, len(records))
return str(out)
def export_excel(self, extraction_id: str, output_path: str) -> str:
"""
Exporter les enregistrements en Excel (openpyxl).
Args:
extraction_id: ID de la session
output_path: Chemin du fichier Excel de sortie
Returns:
Chemin du fichier cree
Raises:
ImportError: Si openpyxl n'est pas installe
"""
try:
import openpyxl
except ImportError:
raise ImportError(
"openpyxl est requis pour l'export Excel. "
"Installez-le : pip install openpyxl"
)
records = self.get_records(extraction_id)
if not records:
raise ValueError(f"Aucun enregistrement pour l'extraction {extraction_id}")
out = Path(output_path)
out.parent.mkdir(parents=True, exist_ok=True)
all_keys = self._collect_all_keys(records)
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "Extraction"
# En-tetes
for col_idx, key in enumerate(all_keys, start=1):
cell = ws.cell(row=1, column=col_idx, value=key)
cell.font = openpyxl.styles.Font(bold=True)
# Donnees
for row_idx, rec in enumerate(records, start=2):
for col_idx, key in enumerate(all_keys, start=1):
ws.cell(row=row_idx, column=col_idx, value=rec["data"].get(key, ""))
# Ajuster la largeur des colonnes
for col_idx, key in enumerate(all_keys, start=1):
max_len = max(
len(str(key)),
*(len(str(rec["data"].get(key, ""))) for rec in records),
)
ws.column_dimensions[openpyxl.utils.get_column_letter(col_idx)].width = min(max_len + 2, 50)
wb.save(str(out))
logger.info("Export Excel : %s (%d lignes)", output_path, len(records))
return str(out)
# ------------------------------------------------------------------
# Statistiques
# ------------------------------------------------------------------
def get_stats(self, extraction_id: str) -> Dict[str, Any]:
"""
Statistiques d'une extraction.
Returns:
Dict avec : record_count, avg_confidence, completeness,
field_coverage, status, duration
"""
extraction = self.get_extraction(extraction_id)
if not extraction:
return {"error": f"Extraction {extraction_id} introuvable"}
records = self.get_records(extraction_id)
if not records:
return {
"extraction_id": extraction_id,
"schema_name": extraction["schema_name"],
"status": extraction["status"],
"record_count": 0,
"avg_confidence": 0.0,
"completeness": 0.0,
"field_coverage": {},
}
# Confiance moyenne
confidences = [r["confidence"] for r in records]
avg_confidence = sum(confidences) / len(confidences) if confidences else 0.0
# Couverture par champ : pourcentage de records ayant une valeur non-nulle
schema_data = json.loads(extraction["schema_json"])
field_names = [f["name"] for f in schema_data.get("fields", [])]
field_coverage = {}
for fname in field_names:
filled = sum(
1 for r in records
if r["data"].get(fname) is not None
and str(r["data"][fname]).strip() != ""
)
field_coverage[fname] = filled / len(records) if records else 0.0
# Completude globale
completeness = (
sum(field_coverage.values()) / len(field_coverage)
if field_coverage else 0.0
)
# Erreurs
total_errors = sum(len(r.get("errors", [])) for r in records)
return {
"extraction_id": extraction_id,
"schema_name": extraction["schema_name"],
"status": extraction["status"],
"record_count": len(records),
"avg_confidence": round(avg_confidence, 3),
"completeness": round(completeness, 3),
"field_coverage": {k: round(v, 3) for k, v in field_coverage.items()},
"total_errors": total_errors,
"created_at": extraction["created_at"],
"updated_at": extraction["updated_at"],
}
# ------------------------------------------------------------------
# Nettoyage
# ------------------------------------------------------------------
def delete_extraction(self, extraction_id: str) -> bool:
"""Supprimer une extraction et tous ses records."""
with self._connect() as conn:
conn.execute("DELETE FROM records WHERE extraction_id = ?", (extraction_id,))
result = conn.execute("DELETE FROM extractions WHERE id = ?", (extraction_id,))
return result.rowcount > 0
# ------------------------------------------------------------------
# Utilitaires internes
# ------------------------------------------------------------------
@staticmethod
def _collect_all_keys(records: List[Dict[str, Any]]) -> List[str]:
"""Collecter toutes les cles uniques des records, en preservant l'ordre."""
seen = set()
keys = []
for rec in records:
for k in rec["data"].keys():
if k not in seen:
seen.add(k)
keys.append(k)
return keys

View File

@@ -0,0 +1,312 @@
"""
ExtractionEngine - Orchestrateur principal du moteur d'extraction de donnees
Orchestre le cycle complet :
naviguer -> screenshot -> extraire -> valider -> stocker -> suivant
S'appuie sur FieldExtractor (VLM/OCR), DataStore (SQLite), et
IterationController (navigation) pour realiser l'extraction automatisee
de donnees depuis des interfaces utilisateur.
"""
import logging
import time
from datetime import datetime
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional
import requests
from .data_store import DataStore
from .field_extractor import FieldExtractor
from .iteration_controller import IterationController
from .schema import ExtractionSchema
logger = logging.getLogger(__name__)
class ExtractionEngine:
"""
Moteur d'extraction principal.
Orchestre le cycle : naviguer -> screenshot -> extraire -> stocker -> suivant.
Modes d'utilisation :
1. Automatique : start_extraction() — boucle complete avec navigation
2. Manuel : extract_current_screen() — extraction ponctuelle d'un screenshot
"""
def __init__(
self,
schema: ExtractionSchema,
store: Optional[DataStore] = None,
field_extractor: Optional[FieldExtractor] = None,
streaming_server_url: str = "http://localhost:5005",
screenshot_dir: str = "data/extractions/screenshots",
):
"""
Args:
schema: Schema d'extraction decrivant les champs et la navigation
store: DataStore pour le stockage (cree un par defaut si absent)
field_extractor: Extracteur de champs (cree un par defaut si absent)
streaming_server_url: URL du streaming server Agent V1
screenshot_dir: Repertoire pour sauvegarder les screenshots
"""
self.schema = schema
self.store = store or DataStore()
self.field_extractor = field_extractor or FieldExtractor()
self.controller = IterationController(schema, streaming_server_url)
self.streaming_server_url = streaming_server_url.rstrip("/")
self.screenshot_dir = Path(screenshot_dir)
self.screenshot_dir.mkdir(parents=True, exist_ok=True)
# Etat interne
self._current_extraction_id: Optional[str] = None
self._is_running = False
self._should_stop = False
self._progress_callback: Optional[Callable] = None
# ------------------------------------------------------------------
# API publique - Extraction automatique
# ------------------------------------------------------------------
def start_extraction(
self,
session_id: str,
on_progress: Optional[Callable[[Dict[str, Any]], None]] = None,
) -> str:
"""
Demarrer une session d'extraction automatique.
Boucle :
1. Creer l'extraction dans le store
2. Pour chaque enregistrement :
a. Prendre un screenshot
b. Extraire les champs
c. Valider
d. Stocker
e. Naviguer au suivant
3. Finaliser et retourner l'extraction_id
Args:
session_id: ID de la session de streaming (pour navigation)
on_progress: Callback appele a chaque record (optionnel)
Returns:
extraction_id
"""
self._is_running = True
self._should_stop = False
self._progress_callback = on_progress
# Creer la session d'extraction
extraction_id = self.store.create_extraction(self.schema)
self._current_extraction_id = extraction_id
logger.info(
"Demarrage extraction %s (schema=%s, max=%d)",
extraction_id[:8],
self.schema.name,
self.controller.max_records,
)
try:
while self.controller.has_next() and not self._should_stop:
idx = self.controller.current_index
# 1. Screenshot
screenshot_path = self._take_screenshot(session_id, idx)
if screenshot_path is None:
logger.warning("Screenshot echoue a l'index %d, on continue", idx)
# Naviguer quand meme pour ne pas rester bloque
self.controller.navigate_to_next(session_id)
continue
# 2. Extraction
result = self.extract_current_screen(screenshot_path)
# 3. Stockage
self.store.add_record(
extraction_id=extraction_id,
data=result["data"],
screenshot_path=screenshot_path,
confidence=result["confidence"],
errors=result.get("errors"),
)
# 4. Callback de progression
if self._progress_callback:
progress = self.get_progress()
progress["last_record"] = result["data"]
progress["last_confidence"] = result["confidence"]
self._progress_callback(progress)
logger.info(
"Record %d/%d extrait (confiance=%.2f)",
idx + 1,
self.controller.max_records,
result["confidence"],
)
# 5. Navigation
if not self.controller.navigate_to_next(session_id):
logger.info("Fin de navigation a l'index %d", idx)
break
# Finaliser
status = "stopped" if self._should_stop else "completed"
self.store.finish_extraction(extraction_id, status=status)
logger.info(
"Extraction %s terminee : %s (%d records)",
extraction_id[:8],
status,
self.controller.current_index,
)
except Exception as e:
logger.error("Erreur pendant l'extraction : %s", e)
self.store.finish_extraction(extraction_id, status="error")
raise
finally:
self._is_running = False
self._current_extraction_id = None
return extraction_id
def stop_extraction(self) -> None:
"""Demander l'arret de l'extraction en cours."""
if self._is_running:
logger.info("Arret demande pour l'extraction en cours")
self._should_stop = True
# ------------------------------------------------------------------
# API publique - Extraction ponctuelle
# ------------------------------------------------------------------
def extract_current_screen(self, screenshot_path: str) -> Dict[str, Any]:
"""
Extraire les champs du screenshot actuel sans navigation.
Args:
screenshot_path: Chemin vers le screenshot
Returns:
Dict avec 'data', 'confidence', 'errors', 'validation'
"""
# Extraction
result = self.field_extractor.extract_fields(screenshot_path, self.schema)
# Validation contre le schema
validation = self.schema.validate_record(result["data"])
result["validation"] = validation
return result
# ------------------------------------------------------------------
# API publique - Progression
# ------------------------------------------------------------------
def get_progress(self) -> Dict[str, Any]:
"""Retourne la progression actuelle de l'extraction."""
nav_progress = self.controller.progress
stats = {}
if self._current_extraction_id:
stats = self.store.get_stats(self._current_extraction_id)
return {
"extraction_id": self._current_extraction_id,
"is_running": self._is_running,
"navigation": nav_progress,
"stats": stats,
"schema_name": self.schema.name,
}
# ------------------------------------------------------------------
# Screenshot
# ------------------------------------------------------------------
def _take_screenshot(self, session_id: str, index: int) -> Optional[str]:
"""
Prendre un screenshot via le streaming server.
Essaie d'appeler l'API du streaming server pour obtenir
le screenshot courant. En cas d'echec, retourne None.
Args:
session_id: ID de la session de streaming
index: Index de l'enregistrement courant
Returns:
Chemin du screenshot sauvegarde, ou None
"""
try:
response = requests.get(
f"{self.streaming_server_url}/api/screenshot",
params={"session_id": session_id},
timeout=10,
)
if response.status_code == 200:
# Sauvegarder le screenshot
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
filename = f"record_{index:04d}_{timestamp}.png"
filepath = self.screenshot_dir / filename
with open(filepath, "wb") as f:
f.write(response.content)
return str(filepath)
else:
logger.warning(
"Screenshot echoue : HTTP %d", response.status_code
)
return None
except requests.exceptions.ConnectionError:
logger.warning(
"Streaming server non accessible pour screenshot"
)
return None
except Exception as e:
logger.error("Erreur screenshot : %s", e)
return None
# ------------------------------------------------------------------
# Utilitaires
# ------------------------------------------------------------------
def extract_from_file(self, screenshot_path: str) -> Dict[str, Any]:
"""
Raccourci pour extraire depuis un fichier existant
et stocker le resultat.
Utile pour du retraitement offline de screenshots.
Args:
screenshot_path: Chemin vers un screenshot existant
Returns:
Dict avec les donnees extraites et le record_id
"""
if self._current_extraction_id is None:
extraction_id = self.store.create_extraction(self.schema)
else:
extraction_id = self._current_extraction_id
result = self.extract_current_screen(screenshot_path)
record_id = self.store.add_record(
extraction_id=extraction_id,
data=result["data"],
screenshot_path=screenshot_path,
confidence=result["confidence"],
errors=result.get("errors"),
)
result["record_id"] = record_id
result["extraction_id"] = extraction_id
return result

View File

@@ -0,0 +1,327 @@
"""
FieldExtractor - Extraction de champs structures depuis des screenshots
Utilise un VLM (Ollama) pour comprendre le contenu visuel et en extraire
des donnees structurees selon un schema predefini.
Fallback OCR via docTR si le VLM echoue.
"""
import base64
import json
import logging
import os
import re
from pathlib import Path
from typing import Any, Dict, List, Optional
import requests
from .schema import ExtractionField, ExtractionSchema
logger = logging.getLogger(__name__)
# Configuration Ollama (coherente avec le reste du projet)
OLLAMA_DEFAULT_URL = os.environ.get("OLLAMA_URL", "http://localhost:11434")
OLLAMA_DEFAULT_MODEL = os.environ.get("VLM_MODEL", "qwen3-vl:8b")
class FieldExtractor:
"""
Extrait des champs structures depuis un screenshot.
Pipeline :
1. VLM : envoyer screenshot + schema au VLM pour extraction structuree
2. Validation : verifier les regex, types, champs requis
3. (Optionnel) OCR fallback si VLM indisponible
"""
def __init__(
self,
ollama_url: str = OLLAMA_DEFAULT_URL,
ollama_model: str = OLLAMA_DEFAULT_MODEL,
timeout: int = 60,
):
"""
Args:
ollama_url: URL du serveur Ollama
ollama_model: Modele VLM a utiliser
timeout: Timeout en secondes pour les appels VLM
"""
self.ollama_url = ollama_url.rstrip("/")
self.ollama_model = ollama_model
self.timeout = timeout
# ------------------------------------------------------------------
# API publique
# ------------------------------------------------------------------
def extract_fields(
self,
screenshot_path: str,
schema: ExtractionSchema,
) -> Dict[str, Any]:
"""
Extraire les champs definis par le schema depuis un screenshot.
Args:
screenshot_path: Chemin vers l'image (PNG/JPEG)
schema: Schema d'extraction
Returns:
Dict avec les champs extraits + metadonnees
{
"data": {"nom": "DUPONT", "prenom": "Jean", ...},
"confidence": 0.85,
"errors": [],
"raw_response": "..."
}
"""
path = Path(screenshot_path)
if not path.exists():
return {
"data": {},
"confidence": 0.0,
"errors": [f"Fichier introuvable : {screenshot_path}"],
"raw_response": None,
}
# Encoder l'image en base64
image_b64 = self._encode_image(path)
# Extraction via VLM
raw_data, raw_response = self._extract_via_vlm(image_b64, schema.fields)
if raw_data is None:
logger.warning("VLM extraction echouee, tentative OCR fallback")
raw_data = self._extract_via_ocr_fallback(path, schema.fields)
raw_response = "(ocr fallback)"
# Validation et nettoyage
validated = {}
errors: List[str] = []
valid_count = 0
for fld in schema.fields:
value = raw_data.get(fld.name) if raw_data else None
# Nettoyer
if value is not None:
value = str(value).strip()
if value == "" or value.lower() in ("null", "none", "n/a"):
value = None
validated[fld.name] = value
if not fld.validate_value(value):
errors.append(
f"Champ '{fld.name}' invalide ou manquant : {value!r}"
)
else:
if value is not None and str(value).strip():
valid_count += 1
total = len(schema.fields) if schema.fields else 1
confidence = valid_count / total
return {
"data": validated,
"confidence": confidence,
"errors": errors,
"raw_response": raw_response,
}
# ------------------------------------------------------------------
# Extraction VLM
# ------------------------------------------------------------------
def _extract_via_vlm(
self, image_b64: str, fields: List[ExtractionField]
) -> tuple:
"""
Appeler le VLM (Ollama) pour extraction structuree.
Returns:
(dict_donnees | None, raw_response_text | None)
"""
prompt = self._build_extraction_prompt(fields)
try:
# Desactiver le mode thinking pour Qwen3
effective_prompt = prompt
if "qwen" in self.ollama_model.lower():
effective_prompt = f"/nothink {prompt}"
payload = {
"model": self.ollama_model,
"prompt": effective_prompt,
"images": [image_b64],
"stream": False,
"format": "json",
"options": {
"temperature": 0.1,
"num_predict": 2000,
},
}
response = requests.post(
f"{self.ollama_url}/api/generate",
json=payload,
timeout=self.timeout,
)
if response.status_code != 200:
logger.error(
"Erreur Ollama %d : %s",
response.status_code,
response.text[:300],
)
return None, None
result = response.json()
raw_text = result.get("response", "").strip()
logger.debug("Reponse VLM brute : %s", raw_text[:500])
parsed = self._parse_vlm_response(raw_text)
return parsed, raw_text
except requests.exceptions.Timeout:
logger.error("Timeout VLM apres %ds", self.timeout)
return None, None
except requests.exceptions.ConnectionError:
logger.error("Ollama non accessible a %s", self.ollama_url)
return None, None
except Exception as e:
logger.error("Erreur VLM inattendue : %s", e)
return None, None
def _build_extraction_prompt(self, fields: List[ExtractionField]) -> str:
"""Construire le prompt d'extraction structure pour le VLM."""
field_descriptions = []
for f in fields:
desc = f"- {f.name} ({f.field_type}): {f.description}"
if f.required:
desc += " [OBLIGATOIRE]"
if f.validation_regex:
desc += f" (format: {f.validation_regex})"
field_descriptions.append(desc)
fields_text = "\n".join(field_descriptions)
return f"""Regarde cette capture d'ecran et extrais les informations suivantes.
CHAMPS A EXTRAIRE :
{fields_text}
INSTRUCTIONS :
1. Extrais chaque champ tel qu'il apparait a l'ecran
2. Si un champ n'est pas visible, mets null
3. Pour les dates, conserve le format tel qu'affiche
4. Pour les nombres, conserve le format avec virgule si present
5. Reponds UNIQUEMENT en JSON valide
FORMAT DE REPONSE :
Un objet JSON avec les cles correspondant aux noms de champs ci-dessus.
Exemple : {{"nom": "DUPONT", "prenom": "Jean", "date_naissance": "15/03/1965"}}
Extrais maintenant les donnees :"""
def _parse_vlm_response(self, text: str) -> Optional[Dict[str, Any]]:
"""Parser la reponse JSON du VLM."""
if not text:
return None
# Essayer le parse direct
try:
return json.loads(text)
except json.JSONDecodeError:
pass
# Chercher un objet JSON dans la reponse
match = re.search(r"\{[\s\S]*\}", text)
if match:
try:
return json.loads(match.group())
except json.JSONDecodeError:
pass
# Chercher entre balises ```json ... ```
match = re.search(r"```(?:json)?\s*(\{[\s\S]*?\})\s*```", text)
if match:
try:
return json.loads(match.group(1))
except json.JSONDecodeError:
pass
logger.warning("Impossible de parser la reponse VLM en JSON")
return None
# ------------------------------------------------------------------
# OCR Fallback
# ------------------------------------------------------------------
def _extract_via_ocr_fallback(
self, image_path: Path, fields: List[ExtractionField]
) -> Optional[Dict[str, Any]]:
"""
Fallback : extraire du texte brut via OCR (docTR) puis tenter
un mapping basique vers les champs.
Ce fallback est tres basique ; il fournit le texte brut
sans mapping intelligent. Le VLM reste la methode privilegiee.
"""
try:
from PIL import Image as PILImage
img = PILImage.open(str(image_path))
# Tenter docTR
try:
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
predictor = ocr_predictor(det_arch="db_mobilenet_v3_large", reco_arch="crnn_mobilenet_v3_large", pretrained=True)
doc = DocumentFile.from_images([str(image_path)])
result = predictor(doc)
# Extraire tout le texte
all_text = []
for page in result.pages:
for block in page.blocks:
for line in block.lines:
line_text = " ".join(w.value for w in line.words)
all_text.append(line_text)
full_text = "\n".join(all_text)
logger.info("OCR fallback : %d lignes extraites", len(all_text))
# Retourner le texte complet dans un champ special
return {"_ocr_text": full_text}
except ImportError:
logger.warning("docTR non disponible pour le fallback OCR")
return None
except Exception as e:
logger.error("Erreur OCR fallback : %s", e)
return None
# ------------------------------------------------------------------
# Utilitaires
# ------------------------------------------------------------------
@staticmethod
def _encode_image(path: Path) -> str:
"""Encoder une image en base64."""
with open(path, "rb") as f:
return base64.b64encode(f.read()).decode("utf-8")
def check_vlm_available(self) -> bool:
"""Verifier si le VLM Ollama est accessible."""
try:
response = requests.get(
f"{self.ollama_url}/api/tags", timeout=5
)
return response.status_code == 200
except (requests.RequestException, ConnectionError, TimeoutError):
return False

View File

@@ -0,0 +1,258 @@
"""
IterationController - Controle de navigation entre enregistrements
Gere la boucle de navigation : passage au record suivant, pagination,
scroll, etc. Communique avec le streaming server (Agent V1) pour
envoyer les actions de navigation sur la machine cible.
"""
import logging
import time
from typing import Any, Dict, Optional
import requests
from .schema import ExtractionSchema
logger = logging.getLogger(__name__)
class IterationController:
"""
Controle la navigation entre les enregistrements a extraire.
Types de navigation supportes :
- list_detail : cliquer sur chaque element d'une liste
- pagination : bouton suivant / page suivante
- scroll : defilement vertical
- manual : l'utilisateur navigue manuellement
"""
def __init__(
self,
schema: ExtractionSchema,
streaming_server_url: str = "http://localhost:5005",
):
"""
Args:
schema: Schema d'extraction (contient les regles de navigation)
streaming_server_url: URL du streaming server Agent V1
"""
self.schema = schema
self.server_url = streaming_server_url.rstrip("/")
self.current_index = 0
self.max_records = schema.navigation.get("max_records", 100)
self.nav_type = schema.navigation.get("type", "manual")
self.nav_action = schema.navigation.get("next_record", "click_next_in_list")
self.nav_delay = schema.navigation.get("delay_ms", 1000)
# Etat interne
self._started = False
self._finished = False
# ------------------------------------------------------------------
# API publique
# ------------------------------------------------------------------
def has_next(self) -> bool:
"""Retourne True s'il reste des enregistrements a traiter."""
if self._finished:
return False
return self.current_index < self.max_records
def navigate_to_next(self, session_id: str) -> bool:
"""
Naviguer vers l'enregistrement suivant.
Envoie les actions de navigation au streaming server
en fonction du type de navigation defini dans le schema.
Args:
session_id: ID de la session de streaming
Returns:
True si la navigation a reussi
"""
if not self.has_next():
logger.info("Plus d'enregistrements a traiter (index=%d)", self.current_index)
return False
success = False
if self.nav_type == "manual":
# Mode manuel : on attend juste un delai
logger.info(
"Navigation manuelle : attente de %dms (index=%d)",
self.nav_delay,
self.current_index,
)
time.sleep(self.nav_delay / 1000)
success = True
elif self.nav_type == "pagination":
success = self._navigate_pagination(session_id)
elif self.nav_type == "list_detail":
success = self._navigate_list_detail(session_id)
elif self.nav_type == "scroll":
success = self._navigate_scroll(session_id)
else:
logger.warning("Type de navigation inconnu : %s", self.nav_type)
success = False
if success:
self.current_index += 1
logger.debug(
"Navigation reussie -> index=%d/%d",
self.current_index,
self.max_records,
)
return success
def navigate_to_record(self, session_id: str, index: int) -> bool:
"""
Naviguer vers un enregistrement specifique.
Args:
session_id: ID de la session de streaming
index: Index de l'enregistrement cible
Returns:
True si la navigation a reussi
"""
if index < 0 or index >= self.max_records:
logger.error("Index hors limites : %d (max=%d)", index, self.max_records)
return False
# Naviguer pas a pas jusqu'a l'index cible
steps = index - self.current_index
if steps < 0:
logger.warning(
"Navigation arriere non supportee (current=%d, target=%d)",
self.current_index,
index,
)
return False
for _ in range(steps):
if not self.navigate_to_next(session_id):
return False
return True
def reset(self) -> None:
"""Reinitialiser le controleur."""
self.current_index = 0
self._started = False
self._finished = False
def mark_finished(self) -> None:
"""Marquer l'iteration comme terminee (ex: fin de liste detectee)."""
self._finished = True
logger.info("Iteration marquee comme terminee a l'index %d", self.current_index)
@property
def progress(self) -> Dict[str, Any]:
"""Retourne la progression actuelle."""
return {
"current_index": self.current_index,
"max_records": self.max_records,
"progress_pct": round(
(self.current_index / self.max_records * 100)
if self.max_records > 0 else 0,
1,
),
"nav_type": self.nav_type,
"finished": self._finished,
}
# ------------------------------------------------------------------
# Navigation specifique
# ------------------------------------------------------------------
def _navigate_pagination(self, session_id: str) -> bool:
"""Navigation par pagination (bouton suivant)."""
action = {
"type": "click",
"target": self.nav_action,
"description": "Cliquer sur le bouton suivant / page suivante",
}
return self._send_action(session_id, action)
def _navigate_list_detail(self, session_id: str) -> bool:
"""Navigation dans une liste (cliquer sur l'element suivant)."""
action = {
"type": "click",
"target": self.nav_action,
"index": self.current_index,
"description": f"Cliquer sur l'element {self.current_index + 1} de la liste",
}
return self._send_action(session_id, action)
def _navigate_scroll(self, session_id: str) -> bool:
"""Navigation par defilement."""
action = {
"type": "scroll",
"direction": "down",
"amount": self.schema.navigation.get("scroll_amount", 300),
"description": "Defiler vers le bas",
}
return self._send_action(session_id, action)
# ------------------------------------------------------------------
# Communication avec le streaming server
# ------------------------------------------------------------------
def _send_action(self, session_id: str, action: Dict[str, Any]) -> bool:
"""
Envoyer une action de navigation au streaming server.
L'action est envoyee via l'API du streaming server (port 5005).
Si le serveur n'est pas disponible, on simule un delai.
Args:
session_id: ID de la session de streaming
action: Description de l'action a executer
Returns:
True si l'action a ete executee ou simulee
"""
try:
payload = {
"session_id": session_id,
"action": action,
}
response = requests.post(
f"{self.server_url}/api/action",
json=payload,
timeout=10,
)
if response.status_code == 200:
# Attendre le delai de navigation
if self.nav_delay > 0:
time.sleep(self.nav_delay / 1000)
return True
else:
logger.warning(
"Action de navigation echouee : HTTP %d", response.status_code
)
return False
except requests.exceptions.ConnectionError:
logger.warning(
"Streaming server non accessible a %s — simulation du delai",
self.server_url,
)
# Simuler l'attente de navigation (mode degrade)
if self.nav_delay > 0:
time.sleep(self.nav_delay / 1000)
return True
except Exception as e:
logger.error("Erreur envoi action de navigation : %s", e)
return False

217
core/extraction/schema.py Normal file
View File

@@ -0,0 +1,217 @@
"""
Schema d'extraction de donnees - Definition des champs et navigation
Permet de definir un schema YAML decrivant les champs a extraire
depuis des captures d'ecran (DPI, formulaires, listes...).
"""
import re
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
import yaml
@dataclass
class ExtractionField:
"""Definition d'un champ a extraire depuis un screenshot."""
name: str # Ex: "nom_patient", "date_naissance"
description: str # Description pour le VLM
field_type: str = "text" # "text", "date", "number", "boolean"
required: bool = True
validation_regex: Optional[str] = None # Regex de validation optionnelle
def validate_value(self, value: Optional[str]) -> bool:
"""
Valider une valeur extraite pour ce champ.
Returns:
True si la valeur est valide
"""
# Champ requis mais absent
if self.required and (value is None or str(value).strip() == ""):
return False
# Pas de valeur et pas requis => OK
if value is None or str(value).strip() == "":
return True
value_str = str(value).strip()
# Validation par type
if self.field_type == "number":
try:
float(value_str.replace(",", ".").replace(" ", ""))
except ValueError:
return False
elif self.field_type == "boolean":
if value_str.lower() not in (
"true", "false", "oui", "non", "1", "0", "vrai", "faux"
):
return False
elif self.field_type == "date":
# Accepter les formats courants FR
date_patterns = [
r"\d{2}/\d{2}/\d{4}", # JJ/MM/AAAA
r"\d{2}-\d{2}-\d{4}", # JJ-MM-AAAA
r"\d{4}-\d{2}-\d{2}", # AAAA-MM-JJ (ISO)
r"\d{2}\.\d{2}\.\d{4}", # JJ.MM.AAAA
]
if not any(re.fullmatch(p, value_str) for p in date_patterns):
return False
# Validation regex custom
if self.validation_regex:
if not re.fullmatch(self.validation_regex, value_str):
return False
return True
@dataclass
class ExtractionSchema:
"""
Schema complet d'extraction : liste de champs + regles de navigation.
Peut etre charge/sauvegarde en YAML pour reutilisation.
"""
name: str # Ex: "dossier_patient_DPI"
description: str
fields: List[ExtractionField] = field(default_factory=list)
navigation: Dict[str, Any] = field(default_factory=dict)
# --- Serialisation YAML ---
@classmethod
def from_yaml(cls, path: str) -> "ExtractionSchema":
"""
Charger un schema depuis un fichier YAML.
Args:
path: Chemin vers le fichier YAML
Returns:
Instance ExtractionSchema
"""
yaml_path = Path(path)
if not yaml_path.exists():
raise FileNotFoundError(f"Schema YAML non trouve : {path}")
with open(yaml_path, "r", encoding="utf-8") as f:
data = yaml.safe_load(f)
if not isinstance(data, dict):
raise ValueError(f"Le fichier YAML doit contenir un dictionnaire, pas {type(data).__name__}")
return cls._from_dict(data)
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "ExtractionSchema":
"""Construire un schema depuis un dictionnaire Python."""
return cls._from_dict(data)
@classmethod
def _from_dict(cls, data: Dict[str, Any]) -> "ExtractionSchema":
"""Construction interne depuis un dict."""
fields_raw = data.get("fields", [])
fields = []
for fd in fields_raw:
fields.append(ExtractionField(
name=fd["name"],
description=fd.get("description", ""),
field_type=fd.get("type", fd.get("field_type", "text")),
required=fd.get("required", True),
validation_regex=fd.get("validation", fd.get("validation_regex")),
))
return cls(
name=data.get("name", "unnamed"),
description=data.get("description", ""),
fields=fields,
navigation=data.get("navigation", {}),
)
def to_yaml(self, path: str) -> None:
"""
Sauvegarder le schema en fichier YAML.
Args:
path: Chemin de sortie
"""
yaml_path = Path(path)
yaml_path.parent.mkdir(parents=True, exist_ok=True)
data = self.to_dict()
with open(yaml_path, "w", encoding="utf-8") as f:
yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
def to_dict(self) -> Dict[str, Any]:
"""Convertir en dictionnaire serialisable."""
return {
"name": self.name,
"description": self.description,
"fields": [
{
"name": f.name,
"description": f.description,
"type": f.field_type,
"required": f.required,
**({"validation": f.validation_regex} if f.validation_regex else {}),
}
for f in self.fields
],
"navigation": self.navigation,
}
# --- Utilitaires ---
@property
def required_fields(self) -> List[ExtractionField]:
"""Retourne la liste des champs obligatoires."""
return [f for f in self.fields if f.required]
@property
def field_names(self) -> List[str]:
"""Retourne la liste des noms de champs."""
return [f.name for f in self.fields]
def get_field(self, name: str) -> Optional[ExtractionField]:
"""Recuperer un champ par son nom."""
for f in self.fields:
if f.name == name:
return f
return None
def validate_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
"""
Valider un enregistrement complet contre le schema.
Returns:
Dict avec 'valid' (bool), 'errors' (list), 'completeness' (float)
"""
errors = []
valid_count = 0
for fld in self.fields:
value = record.get(fld.name)
if fld.validate_value(value):
if value is not None and str(value).strip():
valid_count += 1
else:
errors.append(f"Champ '{fld.name}' invalide: {value!r}")
total = len(self.fields) if self.fields else 1
completeness = valid_count / total
return {
"valid": len(errors) == 0,
"errors": errors,
"completeness": completeness,
}