Files
rpa_vision_v3/graph_builder.py
Dom a27b74cf22 v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution
- Frontend v4 accessible sur réseau local (192.168.1.40)
- Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard)
- Ollama GPU fonctionnel
- Self-healing interactif
- Dashboard confiance

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 11:23:51 +01:00

1004 lines
35 KiB
Python

"""
GraphBuilder - Construction Automatique de Workflow Graphs
Ce module implémente la construction automatique de graphes de workflows
en analysant les sessions enregistrées et en détectant les patterns répétés.
Architecture:
1. Création de ScreenStates depuis RawSession
2. Calcul de State Embeddings pour tous les états
3. Détection de patterns via clustering DBSCAN
4. Construction de WorkflowNodes depuis clusters
5. Construction de WorkflowEdges depuis transitions
Algorithme de Détection de Patterns:
- Utilise DBSCAN (Density-Based Spatial Clustering of Applications with Noise)
- Métrique: similarité cosinus entre embeddings
- Filtre les clusters avec moins de N répétitions
- Calcule un prototype (moyenne) pour chaque cluster
Example:
>>> builder = GraphBuilder(min_pattern_repetitions=3)
>>> workflow = builder.build_from_session(raw_session)
>>> print(f"Workflow with {len(workflow.nodes)} nodes")
"""
import logging
from typing import List, Dict, Optional, Tuple
from collections import defaultdict
from datetime import datetime
from pathlib import Path
import numpy as np
from sklearn.cluster import DBSCAN
from core.models.raw_session import RawSession, Event
from core.models.screen_state import (
ScreenState, WindowContext, RawLevel, PerceptionLevel,
ContextLevel, EmbeddingRef
)
from core.models.workflow_graph import (
Workflow,
WorkflowNode,
WorkflowEdge,
ScreenTemplate,
Action,
TargetSpec,
EdgeConstraints,
PostConditions,
WindowConstraint,
TextConstraint,
UIConstraint,
EmbeddingPrototype,
)
from core.persistence import StorageManager
from core.embedding.state_embedding_builder import StateEmbeddingBuilder
from core.embedding.faiss_manager import FAISSManager
from core.training.quality_validator import TrainingQualityValidator, QualityReport
logger = logging.getLogger(__name__)
class GraphBuilder:
"""
Constructeur de graphes de workflows depuis sessions brutes.
Cette classe analyse une RawSession pour construire automatiquement
un Workflow avec ses nodes et edges en détectant les patterns répétés.
Attributes:
embedding_builder: Builder pour calculer les State Embeddings
faiss_manager: Manager FAISS pour indexation (optionnel)
min_pattern_repetitions: Nombre minimum de répétitions pour un pattern
clustering_eps: Distance maximum entre points pour DBSCAN
clustering_min_samples: Nombre minimum d'échantillons par cluster
Example:
>>> builder = GraphBuilder(min_pattern_repetitions=3)
>>> workflow = builder.build_from_session(session, "Login Workflow")
"""
def __init__(
self,
embedding_builder: Optional[StateEmbeddingBuilder] = None,
faiss_manager: Optional[FAISSManager] = None,
quality_validator: Optional[TrainingQualityValidator] = None,
min_pattern_repetitions: int = 3,
clustering_eps: float = 0.15,
clustering_min_samples: int = 2,
enable_quality_validation: bool = True,
):
"""
Initialiser le GraphBuilder.
Args:
embedding_builder: Builder pour State Embeddings (créé si None)
faiss_manager: Manager FAISS pour indexation (optionnel)
quality_validator: Validateur de qualité (créé si None)
min_pattern_repetitions: Nombre minimum de répétitions pour un pattern
clustering_eps: Epsilon pour DBSCAN (distance max entre points)
clustering_min_samples: Nombre minimum d'échantillons pour un cluster
enable_quality_validation: Activer la validation de qualité
"""
self.embedding_builder = embedding_builder or StateEmbeddingBuilder()
self.faiss_manager = faiss_manager
self.quality_validator = quality_validator or TrainingQualityValidator()
self.min_pattern_repetitions = min_pattern_repetitions
self.clustering_eps = clustering_eps
self.clustering_min_samples = clustering_min_samples
self.enable_quality_validation = enable_quality_validation
logger.info(
f"GraphBuilder initialized: "
f"min_repetitions={min_pattern_repetitions}, "
f"eps={clustering_eps}, "
f"min_samples={clustering_min_samples}, "
f"quality_validation={enable_quality_validation}"
)
def build_from_session(
self,
session: RawSession,
workflow_name: Optional[str] = None,
) -> Workflow:
"""
Construire un Workflow complet depuis une RawSession.
Processus:
1. Créer ScreenStates depuis screenshots
2. Calculer embeddings pour chaque état
3. Détecter patterns via clustering
4. Construire nodes depuis clusters
5. Construire edges depuis transitions
Args:
session: Session brute à analyser
workflow_name: Nom du workflow (généré si None)
Returns:
Workflow construit avec nodes et edges
Raises:
ValueError: Si la session est vide ou invalide
"""
if not session.screenshots:
raise ValueError("Session has no screenshots")
logger.info(
f"Building workflow from session {session.session_id} "
f"with {len(session.screenshots)} screenshots"
)
# Étape 1: Créer ScreenStates
screen_states = self._create_screen_states(session)
logger.debug(f"Created {len(screen_states)} screen states")
# Étape 2: Calculer embeddings
embeddings = self._compute_embeddings(screen_states)
logger.debug(f"Computed {len(embeddings)} embeddings")
# Étape 3: Détecter patterns
clusters = self._detect_patterns(embeddings, screen_states)
logger.info(f"Detected {len(clusters)} patterns")
# Étape 4: Construire nodes
nodes = self._build_nodes(clusters, screen_states, embeddings)
logger.info(f"Built {len(nodes)} workflow nodes")
# Étape 5: Construire edges
edges = self._build_edges(nodes, screen_states, session)
logger.info(f"Built {len(edges)} workflow edges")
# Créer Workflow
from core.models.workflow_graph import WorkflowStats, SafetyRules, LearningConfig
workflow = Workflow(
workflow_id=workflow_name or f"workflow_{session.session_id}",
name=workflow_name or "Unnamed Workflow",
description="Auto-generated workflow",
version=1,
learning_state="OBSERVATION",
created_at=datetime.now(),
updated_at=datetime.now(),
entry_nodes=[nodes[0].node_id] if nodes else [],
end_nodes=[],
nodes=nodes,
edges=edges,
safety_rules=SafetyRules(),
stats=WorkflowStats(),
learning=LearningConfig()
)
# Étape 6: Validation de qualité
quality_report = None
if self.enable_quality_validation and screen_states:
quality_report = self._validate_workflow_quality(
workflow, screen_states, embeddings, clusters
)
# Stocker le rapport dans les métadonnées du workflow
workflow.metadata = workflow.metadata or {}
workflow.metadata['quality_report'] = quality_report.to_dict()
# Ajuster learning_state basé sur la qualité
if quality_report.is_production_ready:
workflow.learning_state = "AUTO_CANDIDATE"
logger.info("Workflow qualité suffisante -> AUTO_CANDIDATE")
else:
workflow.learning_state = "OBSERVATION"
logger.warning(
f"Qualité insuffisante ({quality_report.overall_score:.3f}), "
f"workflow reste en OBSERVATION"
)
logger.info(
f"Workflow '{workflow.name}' built successfully: "
f"{len(nodes)} nodes, {len(edges)} edges"
)
return workflow
def _validate_workflow_quality(
self,
workflow: Workflow,
screen_states: List[ScreenState],
embeddings: List[np.ndarray],
clusters: Dict[int, List[int]]
) -> QualityReport:
"""
Valider la qualité du workflow construit.
Args:
workflow: Workflow à valider
screen_states: États d'écran utilisés
embeddings: Embeddings calculés
clusters: Clusters détectés
Returns:
QualityReport avec métriques et recommandations
"""
logger.info(f"Validation qualité du workflow {workflow.workflow_id}")
# Préparer les données pour le validateur
embeddings_array = np.array(embeddings)
# Créer labels depuis les clusters
labels = np.full(len(embeddings), -1) # -1 = bruit
for cluster_id, indices in clusters.items():
for idx in indices:
labels[idx] = cluster_id
# Valider avec le TrainingQualityValidator
report = self.quality_validator.validate_workflow(
workflow=workflow,
observations=screen_states,
embeddings=embeddings_array,
labels=labels
)
logger.info(
f"Validation terminée: score={report.overall_score:.3f}, "
f"production_ready={report.is_production_ready}"
)
return report
def _create_screen_states(self, session: RawSession) -> List[ScreenState]:
"""
Créer ScreenStates enrichis depuis les screenshots de la session.
Pour chaque screenshot:
1. Trouver l'événement associé pour le contexte de fenêtre
2. Créer les 4 niveaux du ScreenState
3. Optionnellement détecter les éléments UI
Args:
session: Session brute
Returns:
Liste de ScreenStates enrichis
"""
screen_states = []
# Créer un mapping screenshot_id -> événement
screenshot_to_event = {}
for event in session.events:
if event.screenshot_id:
screenshot_to_event[event.screenshot_id] = event
for i, screenshot in enumerate(session.screenshots):
# Trouver l'événement associé
event = screenshot_to_event.get(screenshot.screenshot_id)
# Créer WindowContext depuis l'événement
if event and event.window:
window = WindowContext(
app_name=event.window.app_name,
window_title=event.window.title,
screen_resolution=session.environment.get("screen", {}).get("primary_resolution", [1920, 1080]),
workspace="main"
)
else:
window = WindowContext(
app_name="unknown",
window_title="Unknown",
screen_resolution=[1920, 1080],
workspace="main"
)
# Créer RawLevel
# Construire chemin absolu : data/training/sessions/{session_id}/{session_id}/{relative_path}
screenshot_absolute_path = f"data/training/sessions/{session.session_id}/{session.session_id}/{screenshot.relative_path}"
screenshot_path = Path(screenshot_absolute_path)
raw = RawLevel(
screenshot_path=str(screenshot_path),
capture_method="mss",
file_size_bytes=screenshot_path.stat().st_size if screenshot_path.exists() else 0
)
# Créer PerceptionLevel (sera enrichi par embedding_builder)
perception = PerceptionLevel(
embedding=EmbeddingRef(
provider="openclip_ViT-B-32",
vector_id=f"data/embeddings/screens/{session.session_id}_state_{i:04d}.npy",
dimensions=512
),
detected_text=[], # Sera rempli par VLM/OCR
text_detection_method="pending",
confidence_avg=0.0
)
# Créer ContextLevel
context = ContextLevel(
current_workflow_candidate=None,
workflow_step=i,
user_id=session.user.get("id", "unknown"),
tags=list(session.context.get("tags", [])) if isinstance(session.context.get("tags"), list) else [],
business_variables={}
)
# Parser timestamp
if isinstance(screenshot.captured_at, str):
timestamp = datetime.fromisoformat(screenshot.captured_at.replace('Z', '+00:00'))
else:
timestamp = screenshot.captured_at
# Créer ScreenState complet
state = ScreenState(
screen_state_id=f"{session.session_id}_state_{i:04d}",
timestamp=timestamp,
session_id=session.session_id,
window=window,
raw=raw,
perception=perception,
context=context,
metadata={
"screenshot_id": screenshot.screenshot_id,
"event_type": event.type if event else None,
"event_time": event.t if event else None
},
ui_elements=[] # Sera rempli par UIDetector si disponible
)
screen_states.append(state)
logger.info(f"Created {len(screen_states)} enriched screen states")
return screen_states
def _compute_embeddings(
self, screen_states: List[ScreenState]
) -> List[np.ndarray]:
"""
Calculer State Embeddings pour tous les états.
Utilise StateEmbeddingBuilder pour générer des embeddings
multi-modaux (image + texte + UI). Ajoute optionnellement
les embeddings à l'index FAISS.
Args:
screen_states: Liste de ScreenStates
Returns:
Liste de vecteurs d'embeddings (numpy arrays)
"""
embeddings = []
for state in screen_states:
# Construire embedding
state_embedding = self.embedding_builder.build(state)
vector = state_embedding.get_vector()
embeddings.append(vector)
# Ajouter à FAISS si disponible
if self.faiss_manager:
self.faiss_manager.add_embedding(
state.screen_state_id,
vector,
{"state_id": state.screen_state_id},
)
return embeddings
def _detect_patterns(
self,
embeddings: List[np.ndarray],
screen_states: List[ScreenState],
) -> Dict[int, List[int]]:
"""
Détecter patterns répétés via clustering DBSCAN.
Algorithme:
1. Convertir embeddings en matrice numpy
2. Appliquer DBSCAN avec métrique cosinus
3. Grouper états par cluster
4. Filtrer clusters avec assez de répétitions
Args:
embeddings: Vecteurs d'embeddings
screen_states: ScreenStates correspondants
Returns:
Dictionnaire {cluster_id: [indices des états]}
Note:
Les états non assignés (bruit) ont label=-1 et sont ignorés
"""
if len(embeddings) < self.min_pattern_repetitions:
logger.warning(
f"Not enough states ({len(embeddings)}) for pattern detection "
f"(minimum: {self.min_pattern_repetitions})"
)
return {}
# Convertir en matrice numpy
X = np.array(embeddings)
# Clustering DBSCAN
clustering = DBSCAN(
eps=self.clustering_eps,
min_samples=self.clustering_min_samples,
metric="cosine",
)
labels = clustering.fit_predict(X)
# Grouper par cluster
clusters = defaultdict(list)
noise_count = 0
for idx, label in enumerate(labels):
if label == -1:
noise_count += 1
else:
clusters[label].append(idx)
# Filtrer clusters avec assez de répétitions
filtered_clusters = {
cluster_id: indices
for cluster_id, indices in clusters.items()
if len(indices) >= self.min_pattern_repetitions
}
logger.info(
f"Clustering results: {len(filtered_clusters)} patterns, "
f"{noise_count} noise points, "
f"{len(clusters) - len(filtered_clusters)} small clusters filtered"
)
return filtered_clusters
def _build_nodes(
self,
clusters: Dict[int, List[int]],
screen_states: List[ScreenState],
embeddings: List[np.ndarray],
) -> List[WorkflowNode]:
"""
Construire WorkflowNodes depuis les clusters détectés.
Pour chaque cluster:
1. Calculer embedding prototype (moyenne normalisée)
2. Extraire contraintes depuis états du cluster
3. Créer ScreenTemplate
4. Créer WorkflowNode
Args:
clusters: Clusters détectés {cluster_id: [indices]}
screen_states: ScreenStates
embeddings: Embeddings
Returns:
Liste de WorkflowNodes
"""
nodes = []
for cluster_id, indices in clusters.items():
# Calculer embedding prototype (moyenne)
cluster_embeddings = [embeddings[i] for i in indices]
prototype = np.mean(cluster_embeddings, axis=0)
prototype = prototype / np.linalg.norm(prototype) # Normaliser
# Extraire contraintes depuis les états du cluster
cluster_states = [screen_states[i] for i in indices]
template = self._create_screen_template(cluster_states, prototype, cluster_id)
# Créer node
node = WorkflowNode(
node_id=f"node_{cluster_id:03d}",
name=f"State Pattern {cluster_id}",
description=f"Cluster detected from {len(indices)} observations",
template=template
)
nodes.append(node)
logger.debug(
f"Created node {node.node_id} with {len(indices)} observations"
)
return nodes
def _create_screen_template(
self,
states: List[ScreenState],
prototype_embedding: np.ndarray,
cluster_id: int
) -> ScreenTemplate:
"""
Créer ScreenTemplate avec architecture progressive.
S'adapte automatiquement au niveau de richesse des données :
- Agent V0 : Embedding + app_name
- Systèmes 2/3 : Embedding + window + text + ui
Args:
states: États du cluster
prototype_embedding: Embedding prototype
cluster_id: ID du cluster (pour nommer le prototype)
Returns:
ScreenTemplate avec contraintes adaptées aux données disponibles
"""
# 1. Sauvegarder prototype embedding
prototype_dir = Path("data/training/prototypes")
prototype_dir.mkdir(parents=True, exist_ok=True)
prototype_path = prototype_dir / f"cluster_{cluster_id}.npy"
np.save(prototype_path, prototype_embedding)
logger.debug(f"Saved prototype for cluster {cluster_id}: {prototype_path}")
# 2. Créer EmbeddingPrototype
embedding_proto = EmbeddingPrototype(
provider="openclip_ViT-B-32",
vector_id=str(prototype_path),
min_cosine_similarity=0.85,
sample_count=len(states)
)
# 3. Extraire contraintes (dégradation gracieuse)
window = self._extract_window_constraint(states)
text = self._extract_text_constraint(states)
ui = self._extract_ui_constraint(states)
# 4. Créer ScreenTemplate
return ScreenTemplate(
window=window,
text=text,
ui=ui,
embedding=embedding_proto
)
def _extract_window_constraint(self, states: List[ScreenState]) -> WindowConstraint:
"""
Extraire contraintes de fenêtre depuis les états.
Agent V0 : Utilise app_name (fiable)
Systèmes 2/3 : app_name + window_title + process
Args:
states: États du cluster
Returns:
WindowConstraint avec données disponibles
"""
app_names = []
window_titles = []
for state in states:
if hasattr(state, 'window') and state.window:
app_name = getattr(state.window, 'app_name', None)
window_title = getattr(state.window, 'window_title', None)
if app_name:
app_names.append(app_name)
if window_title and window_title != "unknown_window":
window_titles.append(window_title)
# Trouver app_name le plus fréquent
common_app = None
if app_names:
from collections import Counter
most_common = Counter(app_names).most_common(1)
if most_common:
common_app = most_common[0][0]
# Trouver substring commun dans window_titles
title_contains = None
if window_titles and len(window_titles) >= 2:
title_contains = self._find_common_substring(window_titles)
elif len(window_titles) == 1:
title_contains = window_titles[0]
logger.debug(
f"Extracted window constraint: app={common_app}, "
f"title_contains={title_contains}"
)
return WindowConstraint(
title_contains=title_contains,
process_name=common_app
)
def _extract_text_constraint(self, states: List[ScreenState]) -> TextConstraint:
"""
Extraire contraintes de texte depuis les états.
Agent V0 : Vide (pas d'OCR)
Systèmes 2/3 : Textes requis/interdits depuis Qwen3-VL
Args:
states: États du cluster
Returns:
TextConstraint avec textes détectés (vide si agent_v0)
"""
all_texts_sets = []
for state in states:
if hasattr(state, 'perception') and state.perception:
detected = getattr(state.perception, 'detected_text', None)
if detected and isinstance(detected, list) and len(detected) > 0:
all_texts_sets.append(set(detected))
if not all_texts_sets:
# Pas de textes détectés (agent_v0)
logger.debug("No detected texts found (agent_v0)")
return TextConstraint(
required_texts=[],
forbidden_texts=[]
)
# Textes présents dans TOUS les états (requis)
required = set.intersection(*all_texts_sets)
logger.debug(f"Extracted {len(required)} required texts from cluster")
return TextConstraint(
required_texts=sorted(list(required))[:5], # Max 5 textes
forbidden_texts=[] # TODO: Analyser textes absents
)
def _extract_ui_constraint(self, states: List[ScreenState]) -> UIConstraint:
"""
Extraire contraintes UI depuis les états.
Agent V0 : Vide (pas d'analyse UI)
Systèmes 2/3 : Rôles/types requis depuis Qwen3-VL
Args:
states: États du cluster
Returns:
UIConstraint avec éléments UI (vide si agent_v0)
"""
all_roles = []
all_types = []
element_counts = []
for state in states:
ui_elements = getattr(state, 'ui_elements', None)
if ui_elements and isinstance(ui_elements, list) and len(ui_elements) > 0:
roles = [el.role for el in ui_elements if hasattr(el, 'role')]
types = [el.type for el in ui_elements if hasattr(el, 'type')]
all_roles.append(set(roles))
all_types.append(set(types))
element_counts.append(len(ui_elements))
if not all_roles:
# Pas d'éléments UI détectés (agent_v0)
logger.debug("No UI elements found (agent_v0)")
return UIConstraint(
required_roles=[],
required_types=[],
min_element_count=0
)
# Rôles/types présents dans TOUS les états
common_roles = set.intersection(*all_roles) if all_roles else set()
common_types = set.intersection(*all_types) if all_types else set()
logger.debug(
f"Extracted UI constraint: {len(common_roles)} roles, "
f"{len(common_types)} types"
)
return UIConstraint(
required_roles=sorted(list(common_roles))[:3],
required_types=sorted(list(common_types))[:3],
min_element_count=min(element_counts) if element_counts else 0
)
def _find_common_substring(self, strings: List[str]) -> Optional[str]:
"""
Trouver substring commune ou mots communs.
Args:
strings: Liste de strings
Returns:
Mot le plus fréquent ou None
"""
if not strings:
return None
if len(strings) == 1:
return strings[0]
from collections import Counter
# Séparer en mots
all_words = []
for s in strings:
# Nettoyer et séparer
words = s.replace('-', ' ').replace('_', ' ').split()
all_words.extend([w for w in words if len(w) > 2]) # Ignorer mots courts
if not all_words:
return None
# Compter occurrences
word_counts = Counter(all_words)
# Garder mots présents dans au moins 50% des strings
threshold = len(strings) / 2
common_words = [
word for word, count in word_counts.items()
if count >= threshold
]
if common_words:
# Retourner le mot le plus fréquent
return max(common_words, key=lambda w: word_counts[w])
return None
def _build_edges(
self,
nodes: List[WorkflowNode],
screen_states: List[ScreenState],
session: RawSession,
) -> List[WorkflowEdge]:
"""
Construire WorkflowEdges depuis les transitions observées.
Algorithme:
1. Mapper chaque ScreenState vers son node (via embedding similarity)
2. Identifier les transitions (state_i -> state_j où node change)
3. Extraire l'action depuis l'événement entre les deux états
4. Créer WorkflowEdge avec action et conditions
Args:
nodes: WorkflowNodes construits
screen_states: ScreenStates
session: Session brute (pour événements)
Returns:
Liste de WorkflowEdges
"""
if not nodes or len(screen_states) < 2:
logger.warning("Not enough data to build edges")
return []
edges = []
edge_counts = defaultdict(int) # Pour compter les occurrences de chaque transition
# Étape 1: Mapper chaque état vers son node
state_to_node = self._map_states_to_nodes(screen_states, nodes)
# Étape 2: Créer un mapping screenshot_id -> événement
screenshot_to_event = {}
for event in session.events:
if event.screenshot_id:
screenshot_to_event[event.screenshot_id] = event
# Étape 3: Parcourir les transitions
for i in range(len(screen_states) - 1):
current_state = screen_states[i]
next_state = screen_states[i + 1]
current_node_id = state_to_node.get(current_state.screen_state_id)
next_node_id = state_to_node.get(next_state.screen_state_id)
# Si les deux états sont dans des nodes différents, c'est une transition
if current_node_id and next_node_id and current_node_id != next_node_id:
# Trouver l'événement qui a causé la transition
event = self._find_transition_event(
current_state, next_state, session.events
)
# Créer l'edge
edge_key = f"{current_node_id}_to_{next_node_id}"
edge_counts[edge_key] += 1
# Ne créer l'edge qu'une fois, mais compter les occurrences
if edge_counts[edge_key] == 1:
edge = self._create_edge(
current_node_id, next_node_id, event, edge_key
)
edges.append(edge)
# Mettre à jour les stats des edges avec les comptages
for edge in edges:
edge_key = f"{edge.from_node}_to_{edge.to_node}"
edge.stats.execution_count = edge_counts[edge_key]
edge.stats.success_count = edge_counts[edge_key]
logger.info(f"Built {len(edges)} edges from {sum(edge_counts.values())} transitions")
return edges
def _map_states_to_nodes(
self,
screen_states: List[ScreenState],
nodes: List[WorkflowNode]
) -> Dict[str, str]:
"""
Mapper chaque ScreenState vers le node le plus proche.
Utilise la similarité d'embedding pour trouver le meilleur match.
"""
state_to_node = {}
# Récupérer les embeddings des prototypes de nodes
node_prototypes = {}
for node in nodes:
if hasattr(node, 'template') and node.template:
if hasattr(node.template, 'embedding_prototype'):
node_prototypes[node.node_id] = np.array(node.template.embedding_prototype)
if not node_prototypes:
logger.warning("No node prototypes available for mapping")
return state_to_node
# Pour chaque état, trouver le node le plus proche
for state in screen_states:
# Calculer embedding de l'état
try:
state_embedding = self.embedding_builder.build(state)
state_vector = state_embedding.get_vector()
# Trouver le node avec la meilleure similarité
best_node_id = None
best_similarity = -1
for node_id, prototype in node_prototypes.items():
similarity = np.dot(state_vector, prototype)
if similarity > best_similarity:
best_similarity = similarity
best_node_id = node_id
if best_node_id and best_similarity > 0.7: # Seuil minimum
state_to_node[state.screen_state_id] = best_node_id
except Exception as e:
logger.warning(f"Failed to map state {state.screen_state_id}: {e}")
return state_to_node
def _find_transition_event(
self,
current_state: ScreenState,
next_state: ScreenState,
events: List[Event]
) -> Optional[Event]:
"""
Trouver l'événement qui a causé la transition entre deux états.
Cherche l'événement (clic, frappe) qui s'est produit entre les deux screenshots.
"""
current_time = current_state.metadata.get("event_time", 0)
next_time = next_state.metadata.get("event_time", float('inf'))
# Chercher les événements d'action entre les deux timestamps
action_events = []
for event in events:
if current_time <= event.t < next_time:
if event.type in ["mouse_click", "key_press", "text_input"]:
action_events.append(event)
# Retourner le dernier événement d'action (celui qui a probablement causé la transition)
if action_events:
return action_events[-1]
return None
def _create_edge(
self,
from_node: str,
to_node: str,
event: Optional[Event],
edge_id: str
) -> WorkflowEdge:
"""
Créer un WorkflowEdge depuis une transition observée.
"""
# Déterminer le type d'action
if event:
action_type = event.type
action_params = {}
if action_type == "mouse_click":
action_params = {
"button": event.data.get("button", "left"),
"position": event.data.get("pos", [0, 0]),
"wait_after_ms": 500
}
target_role = "unknown_element" # Sera affiné avec détection UI
elif action_type == "key_press":
action_params = {
"keys": event.data.get("keys", []),
"wait_after_ms": 200
}
target_role = "keyboard_input"
elif action_type == "text_input":
action_params = {
"text": event.data.get("text", ""),
"wait_after_ms": 300
}
target_role = "text_field"
else:
action_params = {}
target_role = "unknown"
else:
action_type = "unknown"
action_params = {}
target_role = "unknown"
# Créer l'action
action = Action(
type=action_type,
target=TargetSpec(
role=target_role,
selection_policy="first",
fallback_strategy="visual_similarity"
),
parameters=action_params
)
# Créer les contraintes
constraints = EdgeConstraints(
pre_conditions={},
required_confidence=0.8,
max_wait_time_ms=5000
)
# Créer les post-conditions
post_conditions = PostConditions(
expected_node=to_node,
window_change_expected=False,
new_ui_elements_expected=[],
timeout_ms=3000
)
# Créer l'edge
from core.models.workflow_graph import EdgeStats
return WorkflowEdge(
edge_id=edge_id,
from_node=from_node,
to_node=to_node,
action=action,
constraints=constraints,
post_conditions=post_conditions,
stats=EdgeStats(),
metadata={
"created_from_event": event.type if event else None,
"auto_generated": True
}
)
def main():
"""Point d'entrée pour tests manuels."""
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
builder = GraphBuilder(min_pattern_repetitions=3)
logger.info(f"GraphBuilder initialized: {builder}")
logger.info("Ready to build workflows from sessions")
if __name__ == "__main__":
main()