Refonte majeure du système Agent Chat et ajout de nombreux modules : - Chat unifié : suppression du dual Workflows/Agent Libre, tout passe par /api/chat avec résolution en 3 niveaux (workflow → geste → "montre-moi") - GestureCatalog : 38 raccourcis clavier universels Windows avec matching sémantique, substitution automatique dans les replays, et endpoint /api/gestures - Mode Copilot : exécution pas-à-pas des workflows avec validation humaine via WebSocket (approve/skip/abort) avant chaque action - Léa UI (agent_v0/lea_ui/) : interface PyQt5 pour Windows avec overlay transparent pour feedback visuel pendant le replay - Data Extraction (core/extraction/) : moteur d'extraction visuelle de données (OCR + VLM → SQLite), avec schémas YAML et export CSV/Excel - ReplayVerifier (agent_v0/server_v1/) : vérification post-action par comparaison de screenshots, avec logique de retry (max 3) - IntentParser durci : meilleur fallback regex, type GREETING, patterns améliorés - Dashboard : nouvelles pages gestures, streaming, extractions - Tests : 63 tests GestureCatalog, 47 tests extraction, corrections tests existants - Dépréciation : /api/agent/plan et /api/agent/execute retournent HTTP 410, suppression du code hardcodé _plan_to_replay_actions Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
218 lines
6.8 KiB
Python
218 lines
6.8 KiB
Python
"""
|
|
Schema d'extraction de donnees - Definition des champs et navigation
|
|
|
|
Permet de definir un schema YAML decrivant les champs a extraire
|
|
depuis des captures d'ecran (DPI, formulaires, listes...).
|
|
"""
|
|
|
|
import re
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional
|
|
|
|
import yaml
|
|
|
|
|
|
@dataclass
|
|
class ExtractionField:
|
|
"""Definition d'un champ a extraire depuis un screenshot."""
|
|
|
|
name: str # Ex: "nom_patient", "date_naissance"
|
|
description: str # Description pour le VLM
|
|
field_type: str = "text" # "text", "date", "number", "boolean"
|
|
required: bool = True
|
|
validation_regex: Optional[str] = None # Regex de validation optionnelle
|
|
|
|
def validate_value(self, value: Optional[str]) -> bool:
|
|
"""
|
|
Valider une valeur extraite pour ce champ.
|
|
|
|
Returns:
|
|
True si la valeur est valide
|
|
"""
|
|
# Champ requis mais absent
|
|
if self.required and (value is None or str(value).strip() == ""):
|
|
return False
|
|
|
|
# Pas de valeur et pas requis => OK
|
|
if value is None or str(value).strip() == "":
|
|
return True
|
|
|
|
value_str = str(value).strip()
|
|
|
|
# Validation par type
|
|
if self.field_type == "number":
|
|
try:
|
|
float(value_str.replace(",", ".").replace(" ", ""))
|
|
except ValueError:
|
|
return False
|
|
|
|
elif self.field_type == "boolean":
|
|
if value_str.lower() not in (
|
|
"true", "false", "oui", "non", "1", "0", "vrai", "faux"
|
|
):
|
|
return False
|
|
|
|
elif self.field_type == "date":
|
|
# Accepter les formats courants FR
|
|
date_patterns = [
|
|
r"\d{2}/\d{2}/\d{4}", # JJ/MM/AAAA
|
|
r"\d{2}-\d{2}-\d{4}", # JJ-MM-AAAA
|
|
r"\d{4}-\d{2}-\d{2}", # AAAA-MM-JJ (ISO)
|
|
r"\d{2}\.\d{2}\.\d{4}", # JJ.MM.AAAA
|
|
]
|
|
if not any(re.fullmatch(p, value_str) for p in date_patterns):
|
|
return False
|
|
|
|
# Validation regex custom
|
|
if self.validation_regex:
|
|
if not re.fullmatch(self.validation_regex, value_str):
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
@dataclass
|
|
class ExtractionSchema:
|
|
"""
|
|
Schema complet d'extraction : liste de champs + regles de navigation.
|
|
|
|
Peut etre charge/sauvegarde en YAML pour reutilisation.
|
|
"""
|
|
|
|
name: str # Ex: "dossier_patient_DPI"
|
|
description: str
|
|
fields: List[ExtractionField] = field(default_factory=list)
|
|
navigation: Dict[str, Any] = field(default_factory=dict)
|
|
|
|
# --- Serialisation YAML ---
|
|
|
|
@classmethod
|
|
def from_yaml(cls, path: str) -> "ExtractionSchema":
|
|
"""
|
|
Charger un schema depuis un fichier YAML.
|
|
|
|
Args:
|
|
path: Chemin vers le fichier YAML
|
|
|
|
Returns:
|
|
Instance ExtractionSchema
|
|
"""
|
|
yaml_path = Path(path)
|
|
if not yaml_path.exists():
|
|
raise FileNotFoundError(f"Schema YAML non trouve : {path}")
|
|
|
|
with open(yaml_path, "r", encoding="utf-8") as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
if not isinstance(data, dict):
|
|
raise ValueError(f"Le fichier YAML doit contenir un dictionnaire, pas {type(data).__name__}")
|
|
|
|
return cls._from_dict(data)
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: Dict[str, Any]) -> "ExtractionSchema":
|
|
"""Construire un schema depuis un dictionnaire Python."""
|
|
return cls._from_dict(data)
|
|
|
|
@classmethod
|
|
def _from_dict(cls, data: Dict[str, Any]) -> "ExtractionSchema":
|
|
"""Construction interne depuis un dict."""
|
|
fields_raw = data.get("fields", [])
|
|
fields = []
|
|
for fd in fields_raw:
|
|
fields.append(ExtractionField(
|
|
name=fd["name"],
|
|
description=fd.get("description", ""),
|
|
field_type=fd.get("type", fd.get("field_type", "text")),
|
|
required=fd.get("required", True),
|
|
validation_regex=fd.get("validation", fd.get("validation_regex")),
|
|
))
|
|
|
|
return cls(
|
|
name=data.get("name", "unnamed"),
|
|
description=data.get("description", ""),
|
|
fields=fields,
|
|
navigation=data.get("navigation", {}),
|
|
)
|
|
|
|
def to_yaml(self, path: str) -> None:
|
|
"""
|
|
Sauvegarder le schema en fichier YAML.
|
|
|
|
Args:
|
|
path: Chemin de sortie
|
|
"""
|
|
yaml_path = Path(path)
|
|
yaml_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
data = self.to_dict()
|
|
|
|
with open(yaml_path, "w", encoding="utf-8") as f:
|
|
yaml.dump(data, f, default_flow_style=False, allow_unicode=True, sort_keys=False)
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
"""Convertir en dictionnaire serialisable."""
|
|
return {
|
|
"name": self.name,
|
|
"description": self.description,
|
|
"fields": [
|
|
{
|
|
"name": f.name,
|
|
"description": f.description,
|
|
"type": f.field_type,
|
|
"required": f.required,
|
|
**({"validation": f.validation_regex} if f.validation_regex else {}),
|
|
}
|
|
for f in self.fields
|
|
],
|
|
"navigation": self.navigation,
|
|
}
|
|
|
|
# --- Utilitaires ---
|
|
|
|
@property
|
|
def required_fields(self) -> List[ExtractionField]:
|
|
"""Retourne la liste des champs obligatoires."""
|
|
return [f for f in self.fields if f.required]
|
|
|
|
@property
|
|
def field_names(self) -> List[str]:
|
|
"""Retourne la liste des noms de champs."""
|
|
return [f.name for f in self.fields]
|
|
|
|
def get_field(self, name: str) -> Optional[ExtractionField]:
|
|
"""Recuperer un champ par son nom."""
|
|
for f in self.fields:
|
|
if f.name == name:
|
|
return f
|
|
return None
|
|
|
|
def validate_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Valider un enregistrement complet contre le schema.
|
|
|
|
Returns:
|
|
Dict avec 'valid' (bool), 'errors' (list), 'completeness' (float)
|
|
"""
|
|
errors = []
|
|
valid_count = 0
|
|
|
|
for fld in self.fields:
|
|
value = record.get(fld.name)
|
|
if fld.validate_value(value):
|
|
if value is not None and str(value).strip():
|
|
valid_count += 1
|
|
else:
|
|
errors.append(f"Champ '{fld.name}' invalide: {value!r}")
|
|
|
|
total = len(self.fields) if self.fields else 1
|
|
completeness = valid_count / total
|
|
|
|
return {
|
|
"valid": len(errors) == 0,
|
|
"errors": errors,
|
|
"completeness": completeness,
|
|
}
|