- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1004 lines
35 KiB
Python
1004 lines
35 KiB
Python
"""
|
|
GraphBuilder - Construction Automatique de Workflow Graphs
|
|
|
|
Ce module implémente la construction automatique de graphes de workflows
|
|
en analysant les sessions enregistrées et en détectant les patterns répétés.
|
|
|
|
Architecture:
|
|
1. Création de ScreenStates depuis RawSession
|
|
2. Calcul de State Embeddings pour tous les états
|
|
3. Détection de patterns via clustering DBSCAN
|
|
4. Construction de WorkflowNodes depuis clusters
|
|
5. Construction de WorkflowEdges depuis transitions
|
|
|
|
Algorithme de Détection de Patterns:
|
|
- Utilise DBSCAN (Density-Based Spatial Clustering of Applications with Noise)
|
|
- Métrique: similarité cosinus entre embeddings
|
|
- Filtre les clusters avec moins de N répétitions
|
|
- Calcule un prototype (moyenne) pour chaque cluster
|
|
|
|
Example:
|
|
>>> builder = GraphBuilder(min_pattern_repetitions=3)
|
|
>>> workflow = builder.build_from_session(raw_session)
|
|
>>> print(f"Workflow with {len(workflow.nodes)} nodes")
|
|
"""
|
|
|
|
import logging
|
|
from typing import List, Dict, Optional, Tuple
|
|
from collections import defaultdict
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
import numpy as np
|
|
from sklearn.cluster import DBSCAN
|
|
|
|
from core.models.raw_session import RawSession, Event
|
|
from core.models.screen_state import (
|
|
ScreenState, WindowContext, RawLevel, PerceptionLevel,
|
|
ContextLevel, EmbeddingRef
|
|
)
|
|
from core.models.workflow_graph import (
|
|
Workflow,
|
|
WorkflowNode,
|
|
WorkflowEdge,
|
|
ScreenTemplate,
|
|
Action,
|
|
TargetSpec,
|
|
EdgeConstraints,
|
|
PostConditions,
|
|
WindowConstraint,
|
|
TextConstraint,
|
|
UIConstraint,
|
|
EmbeddingPrototype,
|
|
)
|
|
from core.persistence import StorageManager
|
|
from core.embedding.state_embedding_builder import StateEmbeddingBuilder
|
|
from core.embedding.faiss_manager import FAISSManager
|
|
from core.training.quality_validator import TrainingQualityValidator, QualityReport
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class GraphBuilder:
|
|
"""
|
|
Constructeur de graphes de workflows depuis sessions brutes.
|
|
|
|
Cette classe analyse une RawSession pour construire automatiquement
|
|
un Workflow avec ses nodes et edges en détectant les patterns répétés.
|
|
|
|
Attributes:
|
|
embedding_builder: Builder pour calculer les State Embeddings
|
|
faiss_manager: Manager FAISS pour indexation (optionnel)
|
|
min_pattern_repetitions: Nombre minimum de répétitions pour un pattern
|
|
clustering_eps: Distance maximum entre points pour DBSCAN
|
|
clustering_min_samples: Nombre minimum d'échantillons par cluster
|
|
|
|
Example:
|
|
>>> builder = GraphBuilder(min_pattern_repetitions=3)
|
|
>>> workflow = builder.build_from_session(session, "Login Workflow")
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
embedding_builder: Optional[StateEmbeddingBuilder] = None,
|
|
faiss_manager: Optional[FAISSManager] = None,
|
|
quality_validator: Optional[TrainingQualityValidator] = None,
|
|
min_pattern_repetitions: int = 3,
|
|
clustering_eps: float = 0.15,
|
|
clustering_min_samples: int = 2,
|
|
enable_quality_validation: bool = True,
|
|
):
|
|
"""
|
|
Initialiser le GraphBuilder.
|
|
|
|
Args:
|
|
embedding_builder: Builder pour State Embeddings (créé si None)
|
|
faiss_manager: Manager FAISS pour indexation (optionnel)
|
|
quality_validator: Validateur de qualité (créé si None)
|
|
min_pattern_repetitions: Nombre minimum de répétitions pour un pattern
|
|
clustering_eps: Epsilon pour DBSCAN (distance max entre points)
|
|
clustering_min_samples: Nombre minimum d'échantillons pour un cluster
|
|
enable_quality_validation: Activer la validation de qualité
|
|
"""
|
|
self.embedding_builder = embedding_builder or StateEmbeddingBuilder()
|
|
self.faiss_manager = faiss_manager
|
|
self.quality_validator = quality_validator or TrainingQualityValidator()
|
|
self.min_pattern_repetitions = min_pattern_repetitions
|
|
self.clustering_eps = clustering_eps
|
|
self.clustering_min_samples = clustering_min_samples
|
|
self.enable_quality_validation = enable_quality_validation
|
|
|
|
logger.info(
|
|
f"GraphBuilder initialized: "
|
|
f"min_repetitions={min_pattern_repetitions}, "
|
|
f"eps={clustering_eps}, "
|
|
f"min_samples={clustering_min_samples}, "
|
|
f"quality_validation={enable_quality_validation}"
|
|
)
|
|
|
|
def build_from_session(
|
|
self,
|
|
session: RawSession,
|
|
workflow_name: Optional[str] = None,
|
|
) -> Workflow:
|
|
"""
|
|
Construire un Workflow complet depuis une RawSession.
|
|
|
|
Processus:
|
|
1. Créer ScreenStates depuis screenshots
|
|
2. Calculer embeddings pour chaque état
|
|
3. Détecter patterns via clustering
|
|
4. Construire nodes depuis clusters
|
|
5. Construire edges depuis transitions
|
|
|
|
Args:
|
|
session: Session brute à analyser
|
|
workflow_name: Nom du workflow (généré si None)
|
|
|
|
Returns:
|
|
Workflow construit avec nodes et edges
|
|
|
|
Raises:
|
|
ValueError: Si la session est vide ou invalide
|
|
"""
|
|
if not session.screenshots:
|
|
raise ValueError("Session has no screenshots")
|
|
|
|
logger.info(
|
|
f"Building workflow from session {session.session_id} "
|
|
f"with {len(session.screenshots)} screenshots"
|
|
)
|
|
|
|
# Étape 1: Créer ScreenStates
|
|
screen_states = self._create_screen_states(session)
|
|
logger.debug(f"Created {len(screen_states)} screen states")
|
|
|
|
# Étape 2: Calculer embeddings
|
|
embeddings = self._compute_embeddings(screen_states)
|
|
logger.debug(f"Computed {len(embeddings)} embeddings")
|
|
|
|
# Étape 3: Détecter patterns
|
|
clusters = self._detect_patterns(embeddings, screen_states)
|
|
logger.info(f"Detected {len(clusters)} patterns")
|
|
|
|
# Étape 4: Construire nodes
|
|
nodes = self._build_nodes(clusters, screen_states, embeddings)
|
|
logger.info(f"Built {len(nodes)} workflow nodes")
|
|
|
|
# Étape 5: Construire edges
|
|
edges = self._build_edges(nodes, screen_states, session)
|
|
logger.info(f"Built {len(edges)} workflow edges")
|
|
|
|
# Créer Workflow
|
|
from core.models.workflow_graph import WorkflowStats, SafetyRules, LearningConfig
|
|
|
|
workflow = Workflow(
|
|
workflow_id=workflow_name or f"workflow_{session.session_id}",
|
|
name=workflow_name or "Unnamed Workflow",
|
|
description="Auto-generated workflow",
|
|
version=1,
|
|
learning_state="OBSERVATION",
|
|
created_at=datetime.now(),
|
|
updated_at=datetime.now(),
|
|
entry_nodes=[nodes[0].node_id] if nodes else [],
|
|
end_nodes=[],
|
|
nodes=nodes,
|
|
edges=edges,
|
|
safety_rules=SafetyRules(),
|
|
stats=WorkflowStats(),
|
|
learning=LearningConfig()
|
|
)
|
|
|
|
# Étape 6: Validation de qualité
|
|
quality_report = None
|
|
if self.enable_quality_validation and screen_states:
|
|
quality_report = self._validate_workflow_quality(
|
|
workflow, screen_states, embeddings, clusters
|
|
)
|
|
|
|
# Stocker le rapport dans les métadonnées du workflow
|
|
workflow.metadata = workflow.metadata or {}
|
|
workflow.metadata['quality_report'] = quality_report.to_dict()
|
|
|
|
# Ajuster learning_state basé sur la qualité
|
|
if quality_report.is_production_ready:
|
|
workflow.learning_state = "AUTO_CANDIDATE"
|
|
logger.info("Workflow qualité suffisante -> AUTO_CANDIDATE")
|
|
else:
|
|
workflow.learning_state = "OBSERVATION"
|
|
logger.warning(
|
|
f"Qualité insuffisante ({quality_report.overall_score:.3f}), "
|
|
f"workflow reste en OBSERVATION"
|
|
)
|
|
|
|
logger.info(
|
|
f"Workflow '{workflow.name}' built successfully: "
|
|
f"{len(nodes)} nodes, {len(edges)} edges"
|
|
)
|
|
|
|
return workflow
|
|
|
|
def _validate_workflow_quality(
|
|
self,
|
|
workflow: Workflow,
|
|
screen_states: List[ScreenState],
|
|
embeddings: List[np.ndarray],
|
|
clusters: Dict[int, List[int]]
|
|
) -> QualityReport:
|
|
"""
|
|
Valider la qualité du workflow construit.
|
|
|
|
Args:
|
|
workflow: Workflow à valider
|
|
screen_states: États d'écran utilisés
|
|
embeddings: Embeddings calculés
|
|
clusters: Clusters détectés
|
|
|
|
Returns:
|
|
QualityReport avec métriques et recommandations
|
|
"""
|
|
logger.info(f"Validation qualité du workflow {workflow.workflow_id}")
|
|
|
|
# Préparer les données pour le validateur
|
|
embeddings_array = np.array(embeddings)
|
|
|
|
# Créer labels depuis les clusters
|
|
labels = np.full(len(embeddings), -1) # -1 = bruit
|
|
for cluster_id, indices in clusters.items():
|
|
for idx in indices:
|
|
labels[idx] = cluster_id
|
|
|
|
# Valider avec le TrainingQualityValidator
|
|
report = self.quality_validator.validate_workflow(
|
|
workflow=workflow,
|
|
observations=screen_states,
|
|
embeddings=embeddings_array,
|
|
labels=labels
|
|
)
|
|
|
|
logger.info(
|
|
f"Validation terminée: score={report.overall_score:.3f}, "
|
|
f"production_ready={report.is_production_ready}"
|
|
)
|
|
|
|
return report
|
|
|
|
def _create_screen_states(self, session: RawSession) -> List[ScreenState]:
|
|
"""
|
|
Créer ScreenStates enrichis depuis les screenshots de la session.
|
|
|
|
Pour chaque screenshot:
|
|
1. Trouver l'événement associé pour le contexte de fenêtre
|
|
2. Créer les 4 niveaux du ScreenState
|
|
3. Optionnellement détecter les éléments UI
|
|
|
|
Args:
|
|
session: Session brute
|
|
|
|
Returns:
|
|
Liste de ScreenStates enrichis
|
|
"""
|
|
screen_states = []
|
|
|
|
# Créer un mapping screenshot_id -> événement
|
|
screenshot_to_event = {}
|
|
for event in session.events:
|
|
if event.screenshot_id:
|
|
screenshot_to_event[event.screenshot_id] = event
|
|
|
|
for i, screenshot in enumerate(session.screenshots):
|
|
# Trouver l'événement associé
|
|
event = screenshot_to_event.get(screenshot.screenshot_id)
|
|
|
|
# Créer WindowContext depuis l'événement
|
|
if event and event.window:
|
|
window = WindowContext(
|
|
app_name=event.window.app_name,
|
|
window_title=event.window.title,
|
|
screen_resolution=session.environment.get("screen", {}).get("primary_resolution", [1920, 1080]),
|
|
workspace="main"
|
|
)
|
|
else:
|
|
window = WindowContext(
|
|
app_name="unknown",
|
|
window_title="Unknown",
|
|
screen_resolution=[1920, 1080],
|
|
workspace="main"
|
|
)
|
|
|
|
# Créer RawLevel
|
|
# Construire chemin absolu : data/training/sessions/{session_id}/{session_id}/{relative_path}
|
|
screenshot_absolute_path = f"data/training/sessions/{session.session_id}/{session.session_id}/{screenshot.relative_path}"
|
|
screenshot_path = Path(screenshot_absolute_path)
|
|
raw = RawLevel(
|
|
screenshot_path=str(screenshot_path),
|
|
capture_method="mss",
|
|
file_size_bytes=screenshot_path.stat().st_size if screenshot_path.exists() else 0
|
|
)
|
|
|
|
# Créer PerceptionLevel (sera enrichi par embedding_builder)
|
|
perception = PerceptionLevel(
|
|
embedding=EmbeddingRef(
|
|
provider="openclip_ViT-B-32",
|
|
vector_id=f"data/embeddings/screens/{session.session_id}_state_{i:04d}.npy",
|
|
dimensions=512
|
|
),
|
|
detected_text=[], # Sera rempli par VLM/OCR
|
|
text_detection_method="pending",
|
|
confidence_avg=0.0
|
|
)
|
|
|
|
# Créer ContextLevel
|
|
context = ContextLevel(
|
|
current_workflow_candidate=None,
|
|
workflow_step=i,
|
|
user_id=session.user.get("id", "unknown"),
|
|
tags=list(session.context.get("tags", [])) if isinstance(session.context.get("tags"), list) else [],
|
|
business_variables={}
|
|
)
|
|
|
|
# Parser timestamp
|
|
if isinstance(screenshot.captured_at, str):
|
|
timestamp = datetime.fromisoformat(screenshot.captured_at.replace('Z', '+00:00'))
|
|
else:
|
|
timestamp = screenshot.captured_at
|
|
|
|
# Créer ScreenState complet
|
|
state = ScreenState(
|
|
screen_state_id=f"{session.session_id}_state_{i:04d}",
|
|
timestamp=timestamp,
|
|
session_id=session.session_id,
|
|
window=window,
|
|
raw=raw,
|
|
perception=perception,
|
|
context=context,
|
|
metadata={
|
|
"screenshot_id": screenshot.screenshot_id,
|
|
"event_type": event.type if event else None,
|
|
"event_time": event.t if event else None
|
|
},
|
|
ui_elements=[] # Sera rempli par UIDetector si disponible
|
|
)
|
|
|
|
screen_states.append(state)
|
|
|
|
logger.info(f"Created {len(screen_states)} enriched screen states")
|
|
return screen_states
|
|
|
|
def _compute_embeddings(
|
|
self, screen_states: List[ScreenState]
|
|
) -> List[np.ndarray]:
|
|
"""
|
|
Calculer State Embeddings pour tous les états.
|
|
|
|
Utilise StateEmbeddingBuilder pour générer des embeddings
|
|
multi-modaux (image + texte + UI). Ajoute optionnellement
|
|
les embeddings à l'index FAISS.
|
|
|
|
Args:
|
|
screen_states: Liste de ScreenStates
|
|
|
|
Returns:
|
|
Liste de vecteurs d'embeddings (numpy arrays)
|
|
"""
|
|
embeddings = []
|
|
|
|
for state in screen_states:
|
|
# Construire embedding
|
|
state_embedding = self.embedding_builder.build(state)
|
|
vector = state_embedding.get_vector()
|
|
embeddings.append(vector)
|
|
|
|
# Ajouter à FAISS si disponible
|
|
if self.faiss_manager:
|
|
self.faiss_manager.add_embedding(
|
|
state.screen_state_id,
|
|
vector,
|
|
{"state_id": state.screen_state_id},
|
|
)
|
|
|
|
return embeddings
|
|
|
|
def _detect_patterns(
|
|
self,
|
|
embeddings: List[np.ndarray],
|
|
screen_states: List[ScreenState],
|
|
) -> Dict[int, List[int]]:
|
|
"""
|
|
Détecter patterns répétés via clustering DBSCAN.
|
|
|
|
Algorithme:
|
|
1. Convertir embeddings en matrice numpy
|
|
2. Appliquer DBSCAN avec métrique cosinus
|
|
3. Grouper états par cluster
|
|
4. Filtrer clusters avec assez de répétitions
|
|
|
|
Args:
|
|
embeddings: Vecteurs d'embeddings
|
|
screen_states: ScreenStates correspondants
|
|
|
|
Returns:
|
|
Dictionnaire {cluster_id: [indices des états]}
|
|
|
|
Note:
|
|
Les états non assignés (bruit) ont label=-1 et sont ignorés
|
|
"""
|
|
if len(embeddings) < self.min_pattern_repetitions:
|
|
logger.warning(
|
|
f"Not enough states ({len(embeddings)}) for pattern detection "
|
|
f"(minimum: {self.min_pattern_repetitions})"
|
|
)
|
|
return {}
|
|
|
|
# Convertir en matrice numpy
|
|
X = np.array(embeddings)
|
|
|
|
# Clustering DBSCAN
|
|
clustering = DBSCAN(
|
|
eps=self.clustering_eps,
|
|
min_samples=self.clustering_min_samples,
|
|
metric="cosine",
|
|
)
|
|
labels = clustering.fit_predict(X)
|
|
|
|
# Grouper par cluster
|
|
clusters = defaultdict(list)
|
|
noise_count = 0
|
|
|
|
for idx, label in enumerate(labels):
|
|
if label == -1:
|
|
noise_count += 1
|
|
else:
|
|
clusters[label].append(idx)
|
|
|
|
# Filtrer clusters avec assez de répétitions
|
|
filtered_clusters = {
|
|
cluster_id: indices
|
|
for cluster_id, indices in clusters.items()
|
|
if len(indices) >= self.min_pattern_repetitions
|
|
}
|
|
|
|
logger.info(
|
|
f"Clustering results: {len(filtered_clusters)} patterns, "
|
|
f"{noise_count} noise points, "
|
|
f"{len(clusters) - len(filtered_clusters)} small clusters filtered"
|
|
)
|
|
|
|
return filtered_clusters
|
|
|
|
def _build_nodes(
|
|
self,
|
|
clusters: Dict[int, List[int]],
|
|
screen_states: List[ScreenState],
|
|
embeddings: List[np.ndarray],
|
|
) -> List[WorkflowNode]:
|
|
"""
|
|
Construire WorkflowNodes depuis les clusters détectés.
|
|
|
|
Pour chaque cluster:
|
|
1. Calculer embedding prototype (moyenne normalisée)
|
|
2. Extraire contraintes depuis états du cluster
|
|
3. Créer ScreenTemplate
|
|
4. Créer WorkflowNode
|
|
|
|
Args:
|
|
clusters: Clusters détectés {cluster_id: [indices]}
|
|
screen_states: ScreenStates
|
|
embeddings: Embeddings
|
|
|
|
Returns:
|
|
Liste de WorkflowNodes
|
|
"""
|
|
nodes = []
|
|
|
|
for cluster_id, indices in clusters.items():
|
|
# Calculer embedding prototype (moyenne)
|
|
cluster_embeddings = [embeddings[i] for i in indices]
|
|
prototype = np.mean(cluster_embeddings, axis=0)
|
|
prototype = prototype / np.linalg.norm(prototype) # Normaliser
|
|
|
|
# Extraire contraintes depuis les états du cluster
|
|
cluster_states = [screen_states[i] for i in indices]
|
|
template = self._create_screen_template(cluster_states, prototype, cluster_id)
|
|
|
|
# Créer node
|
|
node = WorkflowNode(
|
|
node_id=f"node_{cluster_id:03d}",
|
|
name=f"State Pattern {cluster_id}",
|
|
description=f"Cluster detected from {len(indices)} observations",
|
|
template=template
|
|
)
|
|
|
|
nodes.append(node)
|
|
logger.debug(
|
|
f"Created node {node.node_id} with {len(indices)} observations"
|
|
)
|
|
|
|
return nodes
|
|
|
|
def _create_screen_template(
|
|
self,
|
|
states: List[ScreenState],
|
|
prototype_embedding: np.ndarray,
|
|
cluster_id: int
|
|
) -> ScreenTemplate:
|
|
"""
|
|
Créer ScreenTemplate avec architecture progressive.
|
|
|
|
S'adapte automatiquement au niveau de richesse des données :
|
|
- Agent V0 : Embedding + app_name
|
|
- Systèmes 2/3 : Embedding + window + text + ui
|
|
|
|
Args:
|
|
states: États du cluster
|
|
prototype_embedding: Embedding prototype
|
|
cluster_id: ID du cluster (pour nommer le prototype)
|
|
|
|
Returns:
|
|
ScreenTemplate avec contraintes adaptées aux données disponibles
|
|
"""
|
|
# 1. Sauvegarder prototype embedding
|
|
prototype_dir = Path("data/training/prototypes")
|
|
prototype_dir.mkdir(parents=True, exist_ok=True)
|
|
prototype_path = prototype_dir / f"cluster_{cluster_id}.npy"
|
|
np.save(prototype_path, prototype_embedding)
|
|
|
|
logger.debug(f"Saved prototype for cluster {cluster_id}: {prototype_path}")
|
|
|
|
# 2. Créer EmbeddingPrototype
|
|
embedding_proto = EmbeddingPrototype(
|
|
provider="openclip_ViT-B-32",
|
|
vector_id=str(prototype_path),
|
|
min_cosine_similarity=0.85,
|
|
sample_count=len(states)
|
|
)
|
|
|
|
# 3. Extraire contraintes (dégradation gracieuse)
|
|
window = self._extract_window_constraint(states)
|
|
text = self._extract_text_constraint(states)
|
|
ui = self._extract_ui_constraint(states)
|
|
|
|
# 4. Créer ScreenTemplate
|
|
return ScreenTemplate(
|
|
window=window,
|
|
text=text,
|
|
ui=ui,
|
|
embedding=embedding_proto
|
|
)
|
|
|
|
def _extract_window_constraint(self, states: List[ScreenState]) -> WindowConstraint:
|
|
"""
|
|
Extraire contraintes de fenêtre depuis les états.
|
|
|
|
Agent V0 : Utilise app_name (fiable)
|
|
Systèmes 2/3 : app_name + window_title + process
|
|
|
|
Args:
|
|
states: États du cluster
|
|
|
|
Returns:
|
|
WindowConstraint avec données disponibles
|
|
"""
|
|
app_names = []
|
|
window_titles = []
|
|
|
|
for state in states:
|
|
if hasattr(state, 'window') and state.window:
|
|
app_name = getattr(state.window, 'app_name', None)
|
|
window_title = getattr(state.window, 'window_title', None)
|
|
|
|
if app_name:
|
|
app_names.append(app_name)
|
|
if window_title and window_title != "unknown_window":
|
|
window_titles.append(window_title)
|
|
|
|
# Trouver app_name le plus fréquent
|
|
common_app = None
|
|
if app_names:
|
|
from collections import Counter
|
|
most_common = Counter(app_names).most_common(1)
|
|
if most_common:
|
|
common_app = most_common[0][0]
|
|
|
|
# Trouver substring commun dans window_titles
|
|
title_contains = None
|
|
if window_titles and len(window_titles) >= 2:
|
|
title_contains = self._find_common_substring(window_titles)
|
|
elif len(window_titles) == 1:
|
|
title_contains = window_titles[0]
|
|
|
|
logger.debug(
|
|
f"Extracted window constraint: app={common_app}, "
|
|
f"title_contains={title_contains}"
|
|
)
|
|
|
|
return WindowConstraint(
|
|
title_contains=title_contains,
|
|
process_name=common_app
|
|
)
|
|
|
|
def _extract_text_constraint(self, states: List[ScreenState]) -> TextConstraint:
|
|
"""
|
|
Extraire contraintes de texte depuis les états.
|
|
|
|
Agent V0 : Vide (pas d'OCR)
|
|
Systèmes 2/3 : Textes requis/interdits depuis Qwen3-VL
|
|
|
|
Args:
|
|
states: États du cluster
|
|
|
|
Returns:
|
|
TextConstraint avec textes détectés (vide si agent_v0)
|
|
"""
|
|
all_texts_sets = []
|
|
|
|
for state in states:
|
|
if hasattr(state, 'perception') and state.perception:
|
|
detected = getattr(state.perception, 'detected_text', None)
|
|
if detected and isinstance(detected, list) and len(detected) > 0:
|
|
all_texts_sets.append(set(detected))
|
|
|
|
if not all_texts_sets:
|
|
# Pas de textes détectés (agent_v0)
|
|
logger.debug("No detected texts found (agent_v0)")
|
|
return TextConstraint(
|
|
required_texts=[],
|
|
forbidden_texts=[]
|
|
)
|
|
|
|
# Textes présents dans TOUS les états (requis)
|
|
required = set.intersection(*all_texts_sets)
|
|
|
|
logger.debug(f"Extracted {len(required)} required texts from cluster")
|
|
|
|
return TextConstraint(
|
|
required_texts=sorted(list(required))[:5], # Max 5 textes
|
|
forbidden_texts=[] # TODO: Analyser textes absents
|
|
)
|
|
|
|
def _extract_ui_constraint(self, states: List[ScreenState]) -> UIConstraint:
|
|
"""
|
|
Extraire contraintes UI depuis les états.
|
|
|
|
Agent V0 : Vide (pas d'analyse UI)
|
|
Systèmes 2/3 : Rôles/types requis depuis Qwen3-VL
|
|
|
|
Args:
|
|
states: États du cluster
|
|
|
|
Returns:
|
|
UIConstraint avec éléments UI (vide si agent_v0)
|
|
"""
|
|
all_roles = []
|
|
all_types = []
|
|
element_counts = []
|
|
|
|
for state in states:
|
|
ui_elements = getattr(state, 'ui_elements', None)
|
|
if ui_elements and isinstance(ui_elements, list) and len(ui_elements) > 0:
|
|
roles = [el.role for el in ui_elements if hasattr(el, 'role')]
|
|
types = [el.type for el in ui_elements if hasattr(el, 'type')]
|
|
all_roles.append(set(roles))
|
|
all_types.append(set(types))
|
|
element_counts.append(len(ui_elements))
|
|
|
|
if not all_roles:
|
|
# Pas d'éléments UI détectés (agent_v0)
|
|
logger.debug("No UI elements found (agent_v0)")
|
|
return UIConstraint(
|
|
required_roles=[],
|
|
required_types=[],
|
|
min_element_count=0
|
|
)
|
|
|
|
# Rôles/types présents dans TOUS les états
|
|
common_roles = set.intersection(*all_roles) if all_roles else set()
|
|
common_types = set.intersection(*all_types) if all_types else set()
|
|
|
|
logger.debug(
|
|
f"Extracted UI constraint: {len(common_roles)} roles, "
|
|
f"{len(common_types)} types"
|
|
)
|
|
|
|
return UIConstraint(
|
|
required_roles=sorted(list(common_roles))[:3],
|
|
required_types=sorted(list(common_types))[:3],
|
|
min_element_count=min(element_counts) if element_counts else 0
|
|
)
|
|
|
|
def _find_common_substring(self, strings: List[str]) -> Optional[str]:
|
|
"""
|
|
Trouver substring commune ou mots communs.
|
|
|
|
Args:
|
|
strings: Liste de strings
|
|
|
|
Returns:
|
|
Mot le plus fréquent ou None
|
|
"""
|
|
if not strings:
|
|
return None
|
|
|
|
if len(strings) == 1:
|
|
return strings[0]
|
|
|
|
from collections import Counter
|
|
|
|
# Séparer en mots
|
|
all_words = []
|
|
for s in strings:
|
|
# Nettoyer et séparer
|
|
words = s.replace('-', ' ').replace('_', ' ').split()
|
|
all_words.extend([w for w in words if len(w) > 2]) # Ignorer mots courts
|
|
|
|
if not all_words:
|
|
return None
|
|
|
|
# Compter occurrences
|
|
word_counts = Counter(all_words)
|
|
|
|
# Garder mots présents dans au moins 50% des strings
|
|
threshold = len(strings) / 2
|
|
common_words = [
|
|
word for word, count in word_counts.items()
|
|
if count >= threshold
|
|
]
|
|
|
|
if common_words:
|
|
# Retourner le mot le plus fréquent
|
|
return max(common_words, key=lambda w: word_counts[w])
|
|
|
|
return None
|
|
|
|
def _build_edges(
|
|
self,
|
|
nodes: List[WorkflowNode],
|
|
screen_states: List[ScreenState],
|
|
session: RawSession,
|
|
) -> List[WorkflowEdge]:
|
|
"""
|
|
Construire WorkflowEdges depuis les transitions observées.
|
|
|
|
Algorithme:
|
|
1. Mapper chaque ScreenState vers son node (via embedding similarity)
|
|
2. Identifier les transitions (state_i -> state_j où node change)
|
|
3. Extraire l'action depuis l'événement entre les deux états
|
|
4. Créer WorkflowEdge avec action et conditions
|
|
|
|
Args:
|
|
nodes: WorkflowNodes construits
|
|
screen_states: ScreenStates
|
|
session: Session brute (pour événements)
|
|
|
|
Returns:
|
|
Liste de WorkflowEdges
|
|
"""
|
|
if not nodes or len(screen_states) < 2:
|
|
logger.warning("Not enough data to build edges")
|
|
return []
|
|
|
|
edges = []
|
|
edge_counts = defaultdict(int) # Pour compter les occurrences de chaque transition
|
|
|
|
# Étape 1: Mapper chaque état vers son node
|
|
state_to_node = self._map_states_to_nodes(screen_states, nodes)
|
|
|
|
# Étape 2: Créer un mapping screenshot_id -> événement
|
|
screenshot_to_event = {}
|
|
for event in session.events:
|
|
if event.screenshot_id:
|
|
screenshot_to_event[event.screenshot_id] = event
|
|
|
|
# Étape 3: Parcourir les transitions
|
|
for i in range(len(screen_states) - 1):
|
|
current_state = screen_states[i]
|
|
next_state = screen_states[i + 1]
|
|
|
|
current_node_id = state_to_node.get(current_state.screen_state_id)
|
|
next_node_id = state_to_node.get(next_state.screen_state_id)
|
|
|
|
# Si les deux états sont dans des nodes différents, c'est une transition
|
|
if current_node_id and next_node_id and current_node_id != next_node_id:
|
|
# Trouver l'événement qui a causé la transition
|
|
event = self._find_transition_event(
|
|
current_state, next_state, session.events
|
|
)
|
|
|
|
# Créer l'edge
|
|
edge_key = f"{current_node_id}_to_{next_node_id}"
|
|
edge_counts[edge_key] += 1
|
|
|
|
# Ne créer l'edge qu'une fois, mais compter les occurrences
|
|
if edge_counts[edge_key] == 1:
|
|
edge = self._create_edge(
|
|
current_node_id, next_node_id, event, edge_key
|
|
)
|
|
edges.append(edge)
|
|
|
|
# Mettre à jour les stats des edges avec les comptages
|
|
for edge in edges:
|
|
edge_key = f"{edge.from_node}_to_{edge.to_node}"
|
|
edge.stats.execution_count = edge_counts[edge_key]
|
|
edge.stats.success_count = edge_counts[edge_key]
|
|
|
|
logger.info(f"Built {len(edges)} edges from {sum(edge_counts.values())} transitions")
|
|
return edges
|
|
|
|
def _map_states_to_nodes(
|
|
self,
|
|
screen_states: List[ScreenState],
|
|
nodes: List[WorkflowNode]
|
|
) -> Dict[str, str]:
|
|
"""
|
|
Mapper chaque ScreenState vers le node le plus proche.
|
|
|
|
Utilise la similarité d'embedding pour trouver le meilleur match.
|
|
"""
|
|
state_to_node = {}
|
|
|
|
# Récupérer les embeddings des prototypes de nodes
|
|
node_prototypes = {}
|
|
for node in nodes:
|
|
if hasattr(node, 'template') and node.template:
|
|
if hasattr(node.template, 'embedding_prototype'):
|
|
node_prototypes[node.node_id] = np.array(node.template.embedding_prototype)
|
|
|
|
if not node_prototypes:
|
|
logger.warning("No node prototypes available for mapping")
|
|
return state_to_node
|
|
|
|
# Pour chaque état, trouver le node le plus proche
|
|
for state in screen_states:
|
|
# Calculer embedding de l'état
|
|
try:
|
|
state_embedding = self.embedding_builder.build(state)
|
|
state_vector = state_embedding.get_vector()
|
|
|
|
# Trouver le node avec la meilleure similarité
|
|
best_node_id = None
|
|
best_similarity = -1
|
|
|
|
for node_id, prototype in node_prototypes.items():
|
|
similarity = np.dot(state_vector, prototype)
|
|
if similarity > best_similarity:
|
|
best_similarity = similarity
|
|
best_node_id = node_id
|
|
|
|
if best_node_id and best_similarity > 0.7: # Seuil minimum
|
|
state_to_node[state.screen_state_id] = best_node_id
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Failed to map state {state.screen_state_id}: {e}")
|
|
|
|
return state_to_node
|
|
|
|
def _find_transition_event(
|
|
self,
|
|
current_state: ScreenState,
|
|
next_state: ScreenState,
|
|
events: List[Event]
|
|
) -> Optional[Event]:
|
|
"""
|
|
Trouver l'événement qui a causé la transition entre deux états.
|
|
|
|
Cherche l'événement (clic, frappe) qui s'est produit entre les deux screenshots.
|
|
"""
|
|
current_time = current_state.metadata.get("event_time", 0)
|
|
next_time = next_state.metadata.get("event_time", float('inf'))
|
|
|
|
# Chercher les événements d'action entre les deux timestamps
|
|
action_events = []
|
|
for event in events:
|
|
if current_time <= event.t < next_time:
|
|
if event.type in ["mouse_click", "key_press", "text_input"]:
|
|
action_events.append(event)
|
|
|
|
# Retourner le dernier événement d'action (celui qui a probablement causé la transition)
|
|
if action_events:
|
|
return action_events[-1]
|
|
|
|
return None
|
|
|
|
def _create_edge(
|
|
self,
|
|
from_node: str,
|
|
to_node: str,
|
|
event: Optional[Event],
|
|
edge_id: str
|
|
) -> WorkflowEdge:
|
|
"""
|
|
Créer un WorkflowEdge depuis une transition observée.
|
|
"""
|
|
# Déterminer le type d'action
|
|
if event:
|
|
action_type = event.type
|
|
action_params = {}
|
|
|
|
if action_type == "mouse_click":
|
|
action_params = {
|
|
"button": event.data.get("button", "left"),
|
|
"position": event.data.get("pos", [0, 0]),
|
|
"wait_after_ms": 500
|
|
}
|
|
target_role = "unknown_element" # Sera affiné avec détection UI
|
|
|
|
elif action_type == "key_press":
|
|
action_params = {
|
|
"keys": event.data.get("keys", []),
|
|
"wait_after_ms": 200
|
|
}
|
|
target_role = "keyboard_input"
|
|
|
|
elif action_type == "text_input":
|
|
action_params = {
|
|
"text": event.data.get("text", ""),
|
|
"wait_after_ms": 300
|
|
}
|
|
target_role = "text_field"
|
|
else:
|
|
action_params = {}
|
|
target_role = "unknown"
|
|
else:
|
|
action_type = "unknown"
|
|
action_params = {}
|
|
target_role = "unknown"
|
|
|
|
# Créer l'action
|
|
action = Action(
|
|
type=action_type,
|
|
target=TargetSpec(
|
|
role=target_role,
|
|
selection_policy="first",
|
|
fallback_strategy="visual_similarity"
|
|
),
|
|
parameters=action_params
|
|
)
|
|
|
|
# Créer les contraintes
|
|
constraints = EdgeConstraints(
|
|
pre_conditions={},
|
|
required_confidence=0.8,
|
|
max_wait_time_ms=5000
|
|
)
|
|
|
|
# Créer les post-conditions
|
|
post_conditions = PostConditions(
|
|
expected_node=to_node,
|
|
window_change_expected=False,
|
|
new_ui_elements_expected=[],
|
|
timeout_ms=3000
|
|
)
|
|
|
|
# Créer l'edge
|
|
from core.models.workflow_graph import EdgeStats
|
|
|
|
return WorkflowEdge(
|
|
edge_id=edge_id,
|
|
from_node=from_node,
|
|
to_node=to_node,
|
|
action=action,
|
|
constraints=constraints,
|
|
post_conditions=post_conditions,
|
|
stats=EdgeStats(),
|
|
metadata={
|
|
"created_from_event": event.type if event else None,
|
|
"auto_generated": True
|
|
}
|
|
)
|
|
|
|
|
|
def main():
|
|
"""Point d'entrée pour tests manuels."""
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
|
)
|
|
|
|
builder = GraphBuilder(min_pattern_repetitions=3)
|
|
logger.info(f"GraphBuilder initialized: {builder}")
|
|
logger.info("Ready to build workflows from sessions")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|