Files
Geniusia_v2/geniusia2/core/workflow_matcher.py
2026-03-05 00:20:25 +01:00

537 lines
17 KiB
Python

#!/usr/bin/env python3
"""
WorkflowMatcher - Compare les actions courantes avec les workflows connus
pour détecter les correspondances et suggérer l'auto-complétion.
"""
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
import math
import numpy as np
@dataclass
class WorkflowMatch:
"""Représente une correspondance entre la session courante et un workflow."""
workflow_id: str
workflow_name: str
confidence: float
matched_steps: int
total_steps: int
remaining_steps: List[Dict[str, Any]]
current_step_index: int
@property
def completion_percentage(self) -> float:
"""Pourcentage de complétion du workflow."""
if self.total_steps == 0:
return 0.0
return (self.matched_steps / self.total_steps) * 100
class WorkflowMatcher:
"""
Gestionnaire de correspondance de workflows.
Compare les actions courantes avec les workflows connus.
"""
def __init__(self, logger, config: Dict[str, Any], faiss_index=None):
"""
Initialise le matcher de workflows.
Args:
logger: Logger pour journalisation
config: Configuration globale
faiss_index: Optional FAISSIndex for visual similarity matching
"""
self.logger = logger
self.config = config
self.faiss_index = faiss_index
# Configuration
self.position_tolerance = config.get("workflow", {}).get(
"position_tolerance", 50 # 50px par défaut
)
self.min_confidence = config.get("workflow", {}).get(
"min_confidence", 0.80 # 80% par défaut
)
# Visual similarity weight (if FAISS is available)
self.visual_similarity_weight = config.get("workflow", {}).get(
"visual_similarity_weight", 0.3 # 30% weight for visual similarity
)
if self.logger:
self.logger.log_action({
"action": "workflow_matcher_initialized",
"position_tolerance": self.position_tolerance,
"min_confidence": self.min_confidence,
"faiss_enabled": faiss_index is not None
})
def match_current_session(
self,
session_actions: List[Dict[str, Any]],
workflows: List[Dict[str, Any]]
) -> List[WorkflowMatch]:
"""
Compare la session courante avec tous les workflows connus.
Args:
session_actions: Liste des actions de la session courante
workflows: Liste des workflows connus
Returns:
Liste des correspondances trouvées, triées par confiance
"""
if not session_actions or not workflows:
return []
matches = []
for workflow in workflows:
match_score = self.calculate_match_score(
session_actions,
workflow.get("steps", [])
)
if match_score > 0:
# Calculer le nombre d'étapes matchées
matched_steps = self._count_matched_steps(
session_actions,
workflow.get("steps", [])
)
total_steps = len(workflow.get("steps", []))
# Créer la correspondance
match = WorkflowMatch(
workflow_id=workflow.get("workflow_id", ""),
workflow_name=workflow.get("name", "Workflow inconnu"),
confidence=match_score,
matched_steps=matched_steps,
total_steps=total_steps,
remaining_steps=workflow.get("steps", [])[matched_steps:],
current_step_index=matched_steps
)
matches.append(match)
# Trier par confiance décroissante
matches.sort(key=lambda m: m.confidence, reverse=True)
if matches and self.logger:
self.logger.log_action({
"action": "workflows_matched",
"num_matches": len(matches),
"best_confidence": matches[0].confidence if matches else 0.0
})
return matches
def calculate_match_score(
self,
actions: List[Dict[str, Any]],
workflow_steps: List[Dict[str, Any]]
) -> float:
"""
Calcule le score de correspondance entre des actions et un workflow.
Le score prend en compte:
- La correspondance des types d'actions
- La similarité des positions (avec tolérance)
- La correspondance des fenêtres
Args:
actions: Liste des actions à comparer
workflow_steps: Étapes du workflow
Returns:
Score de correspondance (0-1)
"""
if not actions or not workflow_steps:
return 0.0
# Comparer seulement les N premières actions avec le début du workflow
num_actions = len(actions)
num_steps = len(workflow_steps)
# On ne peut pas matcher plus d'étapes qu'il n'y en a dans le workflow
compare_length = min(num_actions, num_steps)
if compare_length == 0:
return 0.0
total_score = 0.0
for i in range(compare_length):
action = actions[i]
step = workflow_steps[i]
# Score pour cette étape
step_score = self._calculate_step_similarity(action, step)
total_score += step_score
# Score moyen
avg_score = total_score / compare_length
# Bonus si on a matché plusieurs étapes (plus fiable)
# Plus on a d'étapes matchées, plus on est confiant
sequence_bonus = min(0.1, compare_length * 0.02)
final_score = min(1.0, avg_score + sequence_bonus)
return final_score
def _normalize_action_type(self, action_type: str) -> str:
"""
Normalise les types d'actions pour le matching.
Args:
action_type: Type d'action brut
Returns:
Type d'action normalisé
"""
# Mapper les variantes vers un type standard
type_mapping = {
"mouse_click": "click",
"mouse_move": "move",
"key_press": "type",
"keyboard": "type",
}
return type_mapping.get(action_type, action_type)
def _calculate_step_similarity(
self,
action: Dict[str, Any],
step: Dict[str, Any]
) -> float:
"""
Calcule la similarité entre une action et une étape de workflow.
Args:
action: Action à comparer
step: Étape du workflow
Returns:
Score de similarité (0-1)
"""
# Check if we have visual embeddings and FAISS index
has_visual = (
self.faiss_index is not None and
action.get("embedding") is not None and
step.get("embedding") is not None
)
if has_visual:
# Adjust weights to include visual similarity
weights = {
"action_type": 0.3,
"position": 0.2,
"window": 0.2,
"visual": 0.3
}
else:
# Original weights without visual
weights = {
"action_type": 0.4,
"position": 0.3,
"window": 0.3
}
score = 0.0
# 1. Correspondance du type d'action
action_type = self._normalize_action_type(action.get("action_type", ""))
step_type = self._normalize_action_type(step.get("action_type", ""))
action_type_match = (action_type == step_type)
if action_type_match:
score += weights["action_type"]
# 2. Similarité de position
action_pos = action.get("position", [0, 0])
step_pos = step.get("position", [0, 0])
if action_pos and step_pos:
position_similarity = self._calculate_position_similarity(
action_pos,
step_pos
)
score += weights["position"] * position_similarity
# 3. Correspondance de fenêtre
action_window = action.get("window", "")
step_window = step.get("window", "")
if action_window and step_window:
# Correspondance exacte ou partielle
if action_window == step_window:
score += weights["window"]
elif action_window in step_window or step_window in action_window:
score += weights["window"] * 0.5
# 4. Similarité visuelle (si disponible)
if has_visual:
visual_similarity = self._calculate_visual_similarity(
action.get("embedding"),
step.get("embedding")
)
score += weights["visual"] * visual_similarity
return score
def _calculate_visual_similarity(
self,
embedding1: np.ndarray,
embedding2: np.ndarray
) -> float:
"""
Calcule la similarité cosinus entre deux embeddings.
Args:
embedding1: Premier embedding
embedding2: Deuxième embedding
Returns:
Score de similarité (0-1)
"""
if embedding1 is None or embedding2 is None:
return 0.0
try:
# Ensure embeddings are numpy arrays
if not isinstance(embedding1, np.ndarray):
embedding1 = np.array(embedding1)
if not isinstance(embedding2, np.ndarray):
embedding2 = np.array(embedding2)
# Normalize embeddings
emb1_norm = embedding1 / (np.linalg.norm(embedding1) + 1e-8)
emb2_norm = embedding2 / (np.linalg.norm(embedding2) + 1e-8)
# Cosine similarity
similarity = np.dot(emb1_norm, emb2_norm)
# Clamp to [0, 1]
return float(max(0.0, min(1.0, similarity)))
except Exception as e:
if self.logger:
self.logger.log_action({
"action": "visual_similarity_error",
"error": str(e)
})
return 0.0
def _calculate_position_similarity(
self,
pos1: List[int],
pos2: List[int]
) -> float:
"""
Calcule la similarité entre deux positions avec tolérance.
Args:
pos1: Position 1 [x, y]
pos2: Position 2 [x, y]
Returns:
Score de similarité (0-1)
"""
if not pos1 or not pos2 or len(pos1) < 2 or len(pos2) < 2:
return 0.0
# Distance euclidienne
dx = pos1[0] - pos2[0]
dy = pos1[1] - pos2[1]
distance = math.sqrt(dx * dx + dy * dy)
# Si distance <= tolérance, score = 1.0
# Si distance > tolérance, score décroît linéairement
if distance <= self.position_tolerance:
return 1.0
else:
# Décroissance linéaire jusqu'à 2x la tolérance
max_distance = self.position_tolerance * 2
if distance >= max_distance:
return 0.0
else:
return 1.0 - ((distance - self.position_tolerance) / self.position_tolerance)
def _count_matched_steps(
self,
actions: List[Dict[str, Any]],
workflow_steps: List[Dict[str, Any]]
) -> int:
"""
Compte le nombre d'étapes matchées consécutivement.
Args:
actions: Liste des actions
workflow_steps: Étapes du workflow
Returns:
Nombre d'étapes matchées
"""
compare_length = min(len(actions), len(workflow_steps))
matched = 0
for i in range(compare_length):
similarity = self._calculate_step_similarity(
actions[i],
workflow_steps[i]
)
# On considère qu'une étape est matchée si similarité > 0.7
if similarity >= 0.7:
matched += 1
else:
# Arrêter au premier non-match (séquence consécutive)
break
return matched
def find_best_match(
self,
matches: List[WorkflowMatch]
) -> Optional[WorkflowMatch]:
"""
Trouve la meilleure correspondance parmi une liste.
Args:
matches: Liste des correspondances
Returns:
Meilleure correspondance si confiance > seuil, None sinon
"""
if not matches:
return None
# Les matches sont déjà triés par confiance décroissante
best_match = matches[0]
# Vérifier le seuil de confiance
if best_match.confidence >= self.min_confidence:
if self.logger:
self.logger.log_action({
"action": "best_match_found",
"workflow_id": best_match.workflow_id,
"workflow_name": best_match.workflow_name,
"confidence": best_match.confidence,
"matched_steps": best_match.matched_steps,
"total_steps": best_match.total_steps
})
return best_match
return None
def get_match_details(self, match: WorkflowMatch) -> Dict[str, Any]:
"""
Retourne les détails d'une correspondance pour affichage.
Args:
match: Correspondance à détailler
Returns:
Dictionnaire avec les détails
"""
return {
"workflow_id": match.workflow_id,
"workflow_name": match.workflow_name,
"confidence": match.confidence,
"matched_steps": match.matched_steps,
"total_steps": match.total_steps,
"remaining_steps": match.remaining_steps,
"completion_percentage": match.completion_percentage,
"next_steps_preview": match.remaining_steps[:3] # 3 prochaines étapes
}
if __name__ == "__main__":
# Tests basiques
print("Test du WorkflowMatcher")
print("=" * 50)
# Mock logger
class MockLogger:
def log_action(self, data):
print(f"[LOG] {data}")
logger = MockLogger()
config = {
"workflow": {
"position_tolerance": 50,
"min_confidence": 0.80
}
}
matcher = WorkflowMatcher(logger, config)
# Test 1: Match parfait
print("\n1. Test match parfait:")
session_actions = [
{
"action_type": "click",
"position": [100, 100],
"window": "Calculatrice"
},
{
"action_type": "type",
"position": [0, 0],
"window": "Calculatrice"
}
]
workflow = {
"workflow_id": "calc_001",
"name": "Calcul simple",
"steps": [
{
"action_type": "click",
"position": [100, 100],
"window": "Calculatrice"
},
{
"action_type": "type",
"position": [0, 0],
"window": "Calculatrice"
},
{
"action_type": "click",
"position": [200, 200],
"window": "Calculatrice"
}
]
}
matches = matcher.match_current_session(session_actions, [workflow])
print(f" Nombre de matches: {len(matches)}")
if matches:
print(f" Meilleur match: {matches[0].workflow_name}")
print(f" Confiance: {matches[0].confidence:.2%}")
print(f" Étapes matchées: {matches[0].matched_steps}/{matches[0].total_steps}")
# Test 2: Match avec tolérance de position
print("\n2. Test avec tolérance de position:")
session_actions[0]["position"] = [120, 110] # Légèrement décalé
matches = matcher.match_current_session(session_actions, [workflow])
if matches:
print(f" Confiance avec décalage: {matches[0].confidence:.2%}")
# Test 3: Trouver le meilleur match
print("\n3. Test find_best_match:")
best = matcher.find_best_match(matches)
if best:
print(f" Meilleur match trouvé: {best.workflow_name}")
print(f" Confiance: {best.confidence:.2%}")
details = matcher.get_match_details(best)
print(f" Prochaines étapes: {len(details['next_steps_preview'])}")
else:
print(" Aucun match au-dessus du seuil")
print("\n✓ Tests terminés!")