Initial commit
This commit is contained in:
536
geniusia2/core/workflow_matcher.py
Normal file
536
geniusia2/core/workflow_matcher.py
Normal file
@@ -0,0 +1,536 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
WorkflowMatcher - Compare les actions courantes avec les workflows connus
|
||||
pour détecter les correspondances et suggérer l'auto-complétion.
|
||||
"""
|
||||
|
||||
from typing import List, Dict, Any, Optional
|
||||
from dataclasses import dataclass
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
|
||||
@dataclass
|
||||
class WorkflowMatch:
|
||||
"""Représente une correspondance entre la session courante et un workflow."""
|
||||
workflow_id: str
|
||||
workflow_name: str
|
||||
confidence: float
|
||||
matched_steps: int
|
||||
total_steps: int
|
||||
remaining_steps: List[Dict[str, Any]]
|
||||
current_step_index: int
|
||||
|
||||
@property
|
||||
def completion_percentage(self) -> float:
|
||||
"""Pourcentage de complétion du workflow."""
|
||||
if self.total_steps == 0:
|
||||
return 0.0
|
||||
return (self.matched_steps / self.total_steps) * 100
|
||||
|
||||
|
||||
class WorkflowMatcher:
|
||||
"""
|
||||
Gestionnaire de correspondance de workflows.
|
||||
Compare les actions courantes avec les workflows connus.
|
||||
"""
|
||||
|
||||
def __init__(self, logger, config: Dict[str, Any], faiss_index=None):
|
||||
"""
|
||||
Initialise le matcher de workflows.
|
||||
|
||||
Args:
|
||||
logger: Logger pour journalisation
|
||||
config: Configuration globale
|
||||
faiss_index: Optional FAISSIndex for visual similarity matching
|
||||
"""
|
||||
self.logger = logger
|
||||
self.config = config
|
||||
self.faiss_index = faiss_index
|
||||
|
||||
# Configuration
|
||||
self.position_tolerance = config.get("workflow", {}).get(
|
||||
"position_tolerance", 50 # 50px par défaut
|
||||
)
|
||||
self.min_confidence = config.get("workflow", {}).get(
|
||||
"min_confidence", 0.80 # 80% par défaut
|
||||
)
|
||||
|
||||
# Visual similarity weight (if FAISS is available)
|
||||
self.visual_similarity_weight = config.get("workflow", {}).get(
|
||||
"visual_similarity_weight", 0.3 # 30% weight for visual similarity
|
||||
)
|
||||
|
||||
if self.logger:
|
||||
self.logger.log_action({
|
||||
"action": "workflow_matcher_initialized",
|
||||
"position_tolerance": self.position_tolerance,
|
||||
"min_confidence": self.min_confidence,
|
||||
"faiss_enabled": faiss_index is not None
|
||||
})
|
||||
|
||||
def match_current_session(
|
||||
self,
|
||||
session_actions: List[Dict[str, Any]],
|
||||
workflows: List[Dict[str, Any]]
|
||||
) -> List[WorkflowMatch]:
|
||||
"""
|
||||
Compare la session courante avec tous les workflows connus.
|
||||
|
||||
Args:
|
||||
session_actions: Liste des actions de la session courante
|
||||
workflows: Liste des workflows connus
|
||||
|
||||
Returns:
|
||||
Liste des correspondances trouvées, triées par confiance
|
||||
"""
|
||||
if not session_actions or not workflows:
|
||||
return []
|
||||
|
||||
matches = []
|
||||
|
||||
for workflow in workflows:
|
||||
match_score = self.calculate_match_score(
|
||||
session_actions,
|
||||
workflow.get("steps", [])
|
||||
)
|
||||
|
||||
if match_score > 0:
|
||||
# Calculer le nombre d'étapes matchées
|
||||
matched_steps = self._count_matched_steps(
|
||||
session_actions,
|
||||
workflow.get("steps", [])
|
||||
)
|
||||
|
||||
total_steps = len(workflow.get("steps", []))
|
||||
|
||||
# Créer la correspondance
|
||||
match = WorkflowMatch(
|
||||
workflow_id=workflow.get("workflow_id", ""),
|
||||
workflow_name=workflow.get("name", "Workflow inconnu"),
|
||||
confidence=match_score,
|
||||
matched_steps=matched_steps,
|
||||
total_steps=total_steps,
|
||||
remaining_steps=workflow.get("steps", [])[matched_steps:],
|
||||
current_step_index=matched_steps
|
||||
)
|
||||
|
||||
matches.append(match)
|
||||
|
||||
# Trier par confiance décroissante
|
||||
matches.sort(key=lambda m: m.confidence, reverse=True)
|
||||
|
||||
if matches and self.logger:
|
||||
self.logger.log_action({
|
||||
"action": "workflows_matched",
|
||||
"num_matches": len(matches),
|
||||
"best_confidence": matches[0].confidence if matches else 0.0
|
||||
})
|
||||
|
||||
return matches
|
||||
|
||||
def calculate_match_score(
|
||||
self,
|
||||
actions: List[Dict[str, Any]],
|
||||
workflow_steps: List[Dict[str, Any]]
|
||||
) -> float:
|
||||
"""
|
||||
Calcule le score de correspondance entre des actions et un workflow.
|
||||
|
||||
Le score prend en compte:
|
||||
- La correspondance des types d'actions
|
||||
- La similarité des positions (avec tolérance)
|
||||
- La correspondance des fenêtres
|
||||
|
||||
Args:
|
||||
actions: Liste des actions à comparer
|
||||
workflow_steps: Étapes du workflow
|
||||
|
||||
Returns:
|
||||
Score de correspondance (0-1)
|
||||
"""
|
||||
if not actions or not workflow_steps:
|
||||
return 0.0
|
||||
|
||||
# Comparer seulement les N premières actions avec le début du workflow
|
||||
num_actions = len(actions)
|
||||
num_steps = len(workflow_steps)
|
||||
|
||||
# On ne peut pas matcher plus d'étapes qu'il n'y en a dans le workflow
|
||||
compare_length = min(num_actions, num_steps)
|
||||
|
||||
if compare_length == 0:
|
||||
return 0.0
|
||||
|
||||
total_score = 0.0
|
||||
|
||||
for i in range(compare_length):
|
||||
action = actions[i]
|
||||
step = workflow_steps[i]
|
||||
|
||||
# Score pour cette étape
|
||||
step_score = self._calculate_step_similarity(action, step)
|
||||
total_score += step_score
|
||||
|
||||
# Score moyen
|
||||
avg_score = total_score / compare_length
|
||||
|
||||
# Bonus si on a matché plusieurs étapes (plus fiable)
|
||||
# Plus on a d'étapes matchées, plus on est confiant
|
||||
sequence_bonus = min(0.1, compare_length * 0.02)
|
||||
|
||||
final_score = min(1.0, avg_score + sequence_bonus)
|
||||
|
||||
return final_score
|
||||
|
||||
def _normalize_action_type(self, action_type: str) -> str:
|
||||
"""
|
||||
Normalise les types d'actions pour le matching.
|
||||
|
||||
Args:
|
||||
action_type: Type d'action brut
|
||||
|
||||
Returns:
|
||||
Type d'action normalisé
|
||||
"""
|
||||
# Mapper les variantes vers un type standard
|
||||
type_mapping = {
|
||||
"mouse_click": "click",
|
||||
"mouse_move": "move",
|
||||
"key_press": "type",
|
||||
"keyboard": "type",
|
||||
}
|
||||
|
||||
return type_mapping.get(action_type, action_type)
|
||||
|
||||
def _calculate_step_similarity(
|
||||
self,
|
||||
action: Dict[str, Any],
|
||||
step: Dict[str, Any]
|
||||
) -> float:
|
||||
"""
|
||||
Calcule la similarité entre une action et une étape de workflow.
|
||||
|
||||
Args:
|
||||
action: Action à comparer
|
||||
step: Étape du workflow
|
||||
|
||||
Returns:
|
||||
Score de similarité (0-1)
|
||||
"""
|
||||
# Check if we have visual embeddings and FAISS index
|
||||
has_visual = (
|
||||
self.faiss_index is not None and
|
||||
action.get("embedding") is not None and
|
||||
step.get("embedding") is not None
|
||||
)
|
||||
|
||||
if has_visual:
|
||||
# Adjust weights to include visual similarity
|
||||
weights = {
|
||||
"action_type": 0.3,
|
||||
"position": 0.2,
|
||||
"window": 0.2,
|
||||
"visual": 0.3
|
||||
}
|
||||
else:
|
||||
# Original weights without visual
|
||||
weights = {
|
||||
"action_type": 0.4,
|
||||
"position": 0.3,
|
||||
"window": 0.3
|
||||
}
|
||||
|
||||
score = 0.0
|
||||
|
||||
# 1. Correspondance du type d'action
|
||||
action_type = self._normalize_action_type(action.get("action_type", ""))
|
||||
step_type = self._normalize_action_type(step.get("action_type", ""))
|
||||
|
||||
action_type_match = (action_type == step_type)
|
||||
if action_type_match:
|
||||
score += weights["action_type"]
|
||||
|
||||
# 2. Similarité de position
|
||||
action_pos = action.get("position", [0, 0])
|
||||
step_pos = step.get("position", [0, 0])
|
||||
|
||||
if action_pos and step_pos:
|
||||
position_similarity = self._calculate_position_similarity(
|
||||
action_pos,
|
||||
step_pos
|
||||
)
|
||||
score += weights["position"] * position_similarity
|
||||
|
||||
# 3. Correspondance de fenêtre
|
||||
action_window = action.get("window", "")
|
||||
step_window = step.get("window", "")
|
||||
|
||||
if action_window and step_window:
|
||||
# Correspondance exacte ou partielle
|
||||
if action_window == step_window:
|
||||
score += weights["window"]
|
||||
elif action_window in step_window or step_window in action_window:
|
||||
score += weights["window"] * 0.5
|
||||
|
||||
# 4. Similarité visuelle (si disponible)
|
||||
if has_visual:
|
||||
visual_similarity = self._calculate_visual_similarity(
|
||||
action.get("embedding"),
|
||||
step.get("embedding")
|
||||
)
|
||||
score += weights["visual"] * visual_similarity
|
||||
|
||||
return score
|
||||
|
||||
def _calculate_visual_similarity(
|
||||
self,
|
||||
embedding1: np.ndarray,
|
||||
embedding2: np.ndarray
|
||||
) -> float:
|
||||
"""
|
||||
Calcule la similarité cosinus entre deux embeddings.
|
||||
|
||||
Args:
|
||||
embedding1: Premier embedding
|
||||
embedding2: Deuxième embedding
|
||||
|
||||
Returns:
|
||||
Score de similarité (0-1)
|
||||
"""
|
||||
if embedding1 is None or embedding2 is None:
|
||||
return 0.0
|
||||
|
||||
try:
|
||||
# Ensure embeddings are numpy arrays
|
||||
if not isinstance(embedding1, np.ndarray):
|
||||
embedding1 = np.array(embedding1)
|
||||
if not isinstance(embedding2, np.ndarray):
|
||||
embedding2 = np.array(embedding2)
|
||||
|
||||
# Normalize embeddings
|
||||
emb1_norm = embedding1 / (np.linalg.norm(embedding1) + 1e-8)
|
||||
emb2_norm = embedding2 / (np.linalg.norm(embedding2) + 1e-8)
|
||||
|
||||
# Cosine similarity
|
||||
similarity = np.dot(emb1_norm, emb2_norm)
|
||||
|
||||
# Clamp to [0, 1]
|
||||
return float(max(0.0, min(1.0, similarity)))
|
||||
|
||||
except Exception as e:
|
||||
if self.logger:
|
||||
self.logger.log_action({
|
||||
"action": "visual_similarity_error",
|
||||
"error": str(e)
|
||||
})
|
||||
return 0.0
|
||||
|
||||
def _calculate_position_similarity(
|
||||
self,
|
||||
pos1: List[int],
|
||||
pos2: List[int]
|
||||
) -> float:
|
||||
"""
|
||||
Calcule la similarité entre deux positions avec tolérance.
|
||||
|
||||
Args:
|
||||
pos1: Position 1 [x, y]
|
||||
pos2: Position 2 [x, y]
|
||||
|
||||
Returns:
|
||||
Score de similarité (0-1)
|
||||
"""
|
||||
if not pos1 or not pos2 or len(pos1) < 2 or len(pos2) < 2:
|
||||
return 0.0
|
||||
|
||||
# Distance euclidienne
|
||||
dx = pos1[0] - pos2[0]
|
||||
dy = pos1[1] - pos2[1]
|
||||
distance = math.sqrt(dx * dx + dy * dy)
|
||||
|
||||
# Si distance <= tolérance, score = 1.0
|
||||
# Si distance > tolérance, score décroît linéairement
|
||||
if distance <= self.position_tolerance:
|
||||
return 1.0
|
||||
else:
|
||||
# Décroissance linéaire jusqu'à 2x la tolérance
|
||||
max_distance = self.position_tolerance * 2
|
||||
if distance >= max_distance:
|
||||
return 0.0
|
||||
else:
|
||||
return 1.0 - ((distance - self.position_tolerance) / self.position_tolerance)
|
||||
|
||||
def _count_matched_steps(
|
||||
self,
|
||||
actions: List[Dict[str, Any]],
|
||||
workflow_steps: List[Dict[str, Any]]
|
||||
) -> int:
|
||||
"""
|
||||
Compte le nombre d'étapes matchées consécutivement.
|
||||
|
||||
Args:
|
||||
actions: Liste des actions
|
||||
workflow_steps: Étapes du workflow
|
||||
|
||||
Returns:
|
||||
Nombre d'étapes matchées
|
||||
"""
|
||||
compare_length = min(len(actions), len(workflow_steps))
|
||||
matched = 0
|
||||
|
||||
for i in range(compare_length):
|
||||
similarity = self._calculate_step_similarity(
|
||||
actions[i],
|
||||
workflow_steps[i]
|
||||
)
|
||||
|
||||
# On considère qu'une étape est matchée si similarité > 0.7
|
||||
if similarity >= 0.7:
|
||||
matched += 1
|
||||
else:
|
||||
# Arrêter au premier non-match (séquence consécutive)
|
||||
break
|
||||
|
||||
return matched
|
||||
|
||||
def find_best_match(
|
||||
self,
|
||||
matches: List[WorkflowMatch]
|
||||
) -> Optional[WorkflowMatch]:
|
||||
"""
|
||||
Trouve la meilleure correspondance parmi une liste.
|
||||
|
||||
Args:
|
||||
matches: Liste des correspondances
|
||||
|
||||
Returns:
|
||||
Meilleure correspondance si confiance > seuil, None sinon
|
||||
"""
|
||||
if not matches:
|
||||
return None
|
||||
|
||||
# Les matches sont déjà triés par confiance décroissante
|
||||
best_match = matches[0]
|
||||
|
||||
# Vérifier le seuil de confiance
|
||||
if best_match.confidence >= self.min_confidence:
|
||||
if self.logger:
|
||||
self.logger.log_action({
|
||||
"action": "best_match_found",
|
||||
"workflow_id": best_match.workflow_id,
|
||||
"workflow_name": best_match.workflow_name,
|
||||
"confidence": best_match.confidence,
|
||||
"matched_steps": best_match.matched_steps,
|
||||
"total_steps": best_match.total_steps
|
||||
})
|
||||
return best_match
|
||||
|
||||
return None
|
||||
|
||||
def get_match_details(self, match: WorkflowMatch) -> Dict[str, Any]:
|
||||
"""
|
||||
Retourne les détails d'une correspondance pour affichage.
|
||||
|
||||
Args:
|
||||
match: Correspondance à détailler
|
||||
|
||||
Returns:
|
||||
Dictionnaire avec les détails
|
||||
"""
|
||||
return {
|
||||
"workflow_id": match.workflow_id,
|
||||
"workflow_name": match.workflow_name,
|
||||
"confidence": match.confidence,
|
||||
"matched_steps": match.matched_steps,
|
||||
"total_steps": match.total_steps,
|
||||
"remaining_steps": match.remaining_steps,
|
||||
"completion_percentage": match.completion_percentage,
|
||||
"next_steps_preview": match.remaining_steps[:3] # 3 prochaines étapes
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Tests basiques
|
||||
print("Test du WorkflowMatcher")
|
||||
print("=" * 50)
|
||||
|
||||
# Mock logger
|
||||
class MockLogger:
|
||||
def log_action(self, data):
|
||||
print(f"[LOG] {data}")
|
||||
|
||||
logger = MockLogger()
|
||||
config = {
|
||||
"workflow": {
|
||||
"position_tolerance": 50,
|
||||
"min_confidence": 0.80
|
||||
}
|
||||
}
|
||||
|
||||
matcher = WorkflowMatcher(logger, config)
|
||||
|
||||
# Test 1: Match parfait
|
||||
print("\n1. Test match parfait:")
|
||||
session_actions = [
|
||||
{
|
||||
"action_type": "click",
|
||||
"position": [100, 100],
|
||||
"window": "Calculatrice"
|
||||
},
|
||||
{
|
||||
"action_type": "type",
|
||||
"position": [0, 0],
|
||||
"window": "Calculatrice"
|
||||
}
|
||||
]
|
||||
|
||||
workflow = {
|
||||
"workflow_id": "calc_001",
|
||||
"name": "Calcul simple",
|
||||
"steps": [
|
||||
{
|
||||
"action_type": "click",
|
||||
"position": [100, 100],
|
||||
"window": "Calculatrice"
|
||||
},
|
||||
{
|
||||
"action_type": "type",
|
||||
"position": [0, 0],
|
||||
"window": "Calculatrice"
|
||||
},
|
||||
{
|
||||
"action_type": "click",
|
||||
"position": [200, 200],
|
||||
"window": "Calculatrice"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
matches = matcher.match_current_session(session_actions, [workflow])
|
||||
print(f" Nombre de matches: {len(matches)}")
|
||||
if matches:
|
||||
print(f" Meilleur match: {matches[0].workflow_name}")
|
||||
print(f" Confiance: {matches[0].confidence:.2%}")
|
||||
print(f" Étapes matchées: {matches[0].matched_steps}/{matches[0].total_steps}")
|
||||
|
||||
# Test 2: Match avec tolérance de position
|
||||
print("\n2. Test avec tolérance de position:")
|
||||
session_actions[0]["position"] = [120, 110] # Légèrement décalé
|
||||
|
||||
matches = matcher.match_current_session(session_actions, [workflow])
|
||||
if matches:
|
||||
print(f" Confiance avec décalage: {matches[0].confidence:.2%}")
|
||||
|
||||
# Test 3: Trouver le meilleur match
|
||||
print("\n3. Test find_best_match:")
|
||||
best = matcher.find_best_match(matches)
|
||||
if best:
|
||||
print(f" Meilleur match trouvé: {best.workflow_name}")
|
||||
print(f" Confiance: {best.confidence:.2%}")
|
||||
details = matcher.get_match_details(best)
|
||||
print(f" Prochaines étapes: {len(details['next_steps_preview'])}")
|
||||
else:
|
||||
print(" Aucun match au-dessus du seuil")
|
||||
|
||||
print("\n✓ Tests terminés!")
|
||||
Reference in New Issue
Block a user