feat: chat unifié, GestureCatalog, Copilot, Léa UI, extraction données, vérification replay
Refonte majeure du système Agent Chat et ajout de nombreux modules : - Chat unifié : suppression du dual Workflows/Agent Libre, tout passe par /api/chat avec résolution en 3 niveaux (workflow → geste → "montre-moi") - GestureCatalog : 38 raccourcis clavier universels Windows avec matching sémantique, substitution automatique dans les replays, et endpoint /api/gestures - Mode Copilot : exécution pas-à-pas des workflows avec validation humaine via WebSocket (approve/skip/abort) avant chaque action - Léa UI (agent_v0/lea_ui/) : interface PyQt5 pour Windows avec overlay transparent pour feedback visuel pendant le replay - Data Extraction (core/extraction/) : moteur d'extraction visuelle de données (OCR + VLM → SQLite), avec schémas YAML et export CSV/Excel - ReplayVerifier (agent_v0/server_v1/) : vérification post-action par comparaison de screenshots, avec logique de retry (max 3) - IntentParser durci : meilleur fallback regex, type GREETING, patterns améliorés - Dashboard : nouvelles pages gestures, streaming, extractions - Tests : 63 tests GestureCatalog, 47 tests extraction, corrections tests existants - Dépréciation : /api/agent/plan et /api/agent/execute retournent HTTP 410, suppression du code hardcodé _plan_to_replay_actions Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
217
core/extraction/schema.py
Normal file
217
core/extraction/schema.py
Normal file
@@ -0,0 +1,217 @@
|
||||
"""
|
||||
Schema d'extraction de donnees - Definition des champs et navigation
|
||||
|
||||
Permet de definir un schema YAML decrivant les champs a extraire
|
||||
depuis des captures d'ecran (DPI, formulaires, listes...).
|
||||
"""
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExtractionField:
|
||||
"""Definition d'un champ a extraire depuis un screenshot."""
|
||||
|
||||
name: str # Ex: "nom_patient", "date_naissance"
|
||||
description: str # Description pour le VLM
|
||||
field_type: str = "text" # "text", "date", "number", "boolean"
|
||||
required: bool = True
|
||||
validation_regex: Optional[str] = None # Regex de validation optionnelle
|
||||
|
||||
def validate_value(self, value: Optional[str]) -> bool:
|
||||
"""
|
||||
Valider une valeur extraite pour ce champ.
|
||||
|
||||
Returns:
|
||||
True si la valeur est valide
|
||||
"""
|
||||
# Champ requis mais absent
|
||||
if self.required and (value is None or str(value).strip() == ""):
|
||||
return False
|
||||
|
||||
# Pas de valeur et pas requis => OK
|
||||
if value is None or str(value).strip() == "":
|
||||
return True
|
||||
|
||||
value_str = str(value).strip()
|
||||
|
||||
# Validation par type
|
||||
if self.field_type == "number":
|
||||
try:
|
||||
float(value_str.replace(",", ".").replace(" ", ""))
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
elif self.field_type == "boolean":
|
||||
if value_str.lower() not in (
|
||||
"true", "false", "oui", "non", "1", "0", "vrai", "faux"
|
||||
):
|
||||
return False
|
||||
|
||||
elif self.field_type == "date":
|
||||
# Accepter les formats courants FR
|
||||
date_patterns = [
|
||||
r"\d{2}/\d{2}/\d{4}", # JJ/MM/AAAA
|
||||
r"\d{2}-\d{2}-\d{4}", # JJ-MM-AAAA
|
||||
r"\d{4}-\d{2}-\d{2}", # AAAA-MM-JJ (ISO)
|
||||
r"\d{2}\.\d{2}\.\d{4}", # JJ.MM.AAAA
|
||||
]
|
||||
if not any(re.fullmatch(p, value_str) for p in date_patterns):
|
||||
return False
|
||||
|
||||
# Validation regex custom
|
||||
if self.validation_regex:
|
||||
if not re.fullmatch(self.validation_regex, value_str):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@dataclass
|
||||
class ExtractionSchema:
|
||||
"""
|
||||
Schema complet d'extraction : liste de champs + regles de navigation.
|
||||
|
||||
Peut etre charge/sauvegarde en YAML pour reutilisation.
|
||||
"""
|
||||
|
||||
name: str # Ex: "dossier_patient_DPI"
|
||||
description: str
|
||||
fields: List[ExtractionField] = field(default_factory=list)
|
||||
navigation: Dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
# --- Serialisation YAML ---
|
||||
|
||||
@classmethod
|
||||
def from_yaml(cls, path: str) -> "ExtractionSchema":
|
||||
"""
|
||||
Charger un schema depuis un fichier YAML.
|
||||
|
||||
Args:
|
||||
path: Chemin vers le fichier YAML
|
||||
|
||||
Returns:
|
||||
Instance ExtractionSchema
|
||||
"""
|
||||
yaml_path = Path(path)
|
||||
if not yaml_path.exists():
|
||||
raise FileNotFoundError(f"Schema YAML non trouve : {path}")
|
||||
|
||||
with open(yaml_path, "r", encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f)
|
||||
|
||||
if not isinstance(data, dict):
|
||||
raise ValueError(f"Le fichier YAML doit contenir un dictionnaire, pas {type(data).__name__}")
|
||||
|
||||
return cls._from_dict(data)
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> "ExtractionSchema":
|
||||
"""Construire un schema depuis un dictionnaire Python."""
|
||||
return cls._from_dict(data)
|
||||
|
||||
@classmethod
|
||||
def _from_dict(cls, data: Dict[str, Any]) -> "ExtractionSchema":
|
||||
"""Construction interne depuis un dict."""
|
||||
fields_raw = data.get("fields", [])
|
||||
fields = []
|
||||
for fd in fields_raw:
|
||||
fields.append(ExtractionField(
|
||||
name=fd["name"],
|
||||
description=fd.get("description", ""),
|
||||
field_type=fd.get("type", fd.get("field_type", "text")),
|
||||
required=fd.get("required", True),
|
||||
validation_regex=fd.get("validation", fd.get("validation_regex")),
|
||||
))
|
||||
|
||||
return cls(
|
||||
name=data.get("name", "unnamed"),
|
||||
description=data.get("description", ""),
|
||||
fields=fields,
|
||||
navigation=data.get("navigation", {}),
|
||||
)
|
||||
|
||||
def to_yaml(self, path: str) -> None:
|
||||
"""
|
||||
Sauvegarder le schema en fichier YAML.
|
||||
|
||||
Args:
|
||||
path: Chemin de sortie
|
||||
"""
|
||||
yaml_path = Path(path)
|
||||
yaml_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
data = self.to_dict()
|
||||
|
||||
with open(yaml_path, "w", encoding="utf-8") as f:
|
||||
yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convertir en dictionnaire serialisable."""
|
||||
return {
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"fields": [
|
||||
{
|
||||
"name": f.name,
|
||||
"description": f.description,
|
||||
"type": f.field_type,
|
||||
"required": f.required,
|
||||
**({"validation": f.validation_regex} if f.validation_regex else {}),
|
||||
}
|
||||
for f in self.fields
|
||||
],
|
||||
"navigation": self.navigation,
|
||||
}
|
||||
|
||||
# --- Utilitaires ---
|
||||
|
||||
@property
|
||||
def required_fields(self) -> List[ExtractionField]:
|
||||
"""Retourne la liste des champs obligatoires."""
|
||||
return [f for f in self.fields if f.required]
|
||||
|
||||
@property
|
||||
def field_names(self) -> List[str]:
|
||||
"""Retourne la liste des noms de champs."""
|
||||
return [f.name for f in self.fields]
|
||||
|
||||
def get_field(self, name: str) -> Optional[ExtractionField]:
|
||||
"""Recuperer un champ par son nom."""
|
||||
for f in self.fields:
|
||||
if f.name == name:
|
||||
return f
|
||||
return None
|
||||
|
||||
def validate_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Valider un enregistrement complet contre le schema.
|
||||
|
||||
Returns:
|
||||
Dict avec 'valid' (bool), 'errors' (list), 'completeness' (float)
|
||||
"""
|
||||
errors = []
|
||||
valid_count = 0
|
||||
|
||||
for fld in self.fields:
|
||||
value = record.get(fld.name)
|
||||
if fld.validate_value(value):
|
||||
if value is not None and str(value).strip():
|
||||
valid_count += 1
|
||||
else:
|
||||
errors.append(f"Champ '{fld.name}' invalide: {value!r}")
|
||||
|
||||
total = len(self.fields) if self.fields else 1
|
||||
completeness = valid_count / total
|
||||
|
||||
return {
|
||||
"valid": len(errors) == 0,
|
||||
"errors": errors,
|
||||
"completeness": completeness,
|
||||
}
|
||||
Reference in New Issue
Block a user