Initial commit

2026-03-05 00:20:25 +01:00
commit dcd4de9945
1954 changed files with 669380 additions and 0 deletions
--- a/geniusia2/core/workflow_matcher.py
+++ b/geniusia2/core/workflow_matcher.py
@@ -0,0 +1,536 @@
+#!/usr/bin/env python3
+"""
+WorkflowMatcher - Compare les actions courantes avec les workflows connus
+pour détecter les correspondances et suggérer l'auto-complétion.
+"""
+
+from typing import List, Dict, Any, Optional
+from dataclasses import dataclass
+import math
+import numpy as np
+
+
+@dataclass
+class WorkflowMatch:
+    """Représente une correspondance entre la session courante et un workflow."""
+    workflow_id: str
+    workflow_name: str
+    confidence: float
+    matched_steps: int
+    total_steps: int
+    remaining_steps: List[Dict[str, Any]]
+    current_step_index: int
+    
+    @property
+    def completion_percentage(self) -> float:
+        """Pourcentage de complétion du workflow."""
+        if self.total_steps == 0:
+            return 0.0
+        return (self.matched_steps / self.total_steps) * 100
+
+
+class WorkflowMatcher:
+    """
+    Gestionnaire de correspondance de workflows.
+    Compare les actions courantes avec les workflows connus.
+    """
+    
+    def __init__(self, logger, config: Dict[str, Any], faiss_index=None):
+        """
+        Initialise le matcher de workflows.
+        
+        Args:
+            logger: Logger pour journalisation
+            config: Configuration globale
+            faiss_index: Optional FAISSIndex for visual similarity matching
+        """
+        self.logger = logger
+        self.config = config
+        self.faiss_index = faiss_index
+        
+        # Configuration
+        self.position_tolerance = config.get("workflow", {}).get(
+            "position_tolerance", 50  # 50px par défaut
+        )
+        self.min_confidence = config.get("workflow", {}).get(
+            "min_confidence", 0.80  # 80% par défaut
+        )
+        
+        # Visual similarity weight (if FAISS is available)
+        self.visual_similarity_weight = config.get("workflow", {}).get(
+            "visual_similarity_weight", 0.3  # 30% weight for visual similarity
+        )
+        
+        if self.logger:
+            self.logger.log_action({
+                "action": "workflow_matcher_initialized",
+                "position_tolerance": self.position_tolerance,
+                "min_confidence": self.min_confidence,
+                "faiss_enabled": faiss_index is not None
+            })
+    
+    def match_current_session(
+        self,
+        session_actions: List[Dict[str, Any]],
+        workflows: List[Dict[str, Any]]
+    ) -> List[WorkflowMatch]:
+        """
+        Compare la session courante avec tous les workflows connus.
+        
+        Args:
+            session_actions: Liste des actions de la session courante
+            workflows: Liste des workflows connus
+            
+        Returns:
+            Liste des correspondances trouvées, triées par confiance
+        """
+        if not session_actions or not workflows:
+            return []
+        
+        matches = []
+        
+        for workflow in workflows:
+            match_score = self.calculate_match_score(
+                session_actions,
+                workflow.get("steps", [])
+            )
+            
+            if match_score > 0:
+                # Calculer le nombre d'étapes matchées
+                matched_steps = self._count_matched_steps(
+                    session_actions,
+                    workflow.get("steps", [])
+                )
+                
+                total_steps = len(workflow.get("steps", []))
+                
+                # Créer la correspondance
+                match = WorkflowMatch(
+                    workflow_id=workflow.get("workflow_id", ""),
+                    workflow_name=workflow.get("name", "Workflow inconnu"),
+                    confidence=match_score,
+                    matched_steps=matched_steps,
+                    total_steps=total_steps,
+                    remaining_steps=workflow.get("steps", [])[matched_steps:],
+                    current_step_index=matched_steps
+                )
+                
+                matches.append(match)
+        
+        # Trier par confiance décroissante
+        matches.sort(key=lambda m: m.confidence, reverse=True)
+        
+        if matches and self.logger:
+            self.logger.log_action({
+                "action": "workflows_matched",
+                "num_matches": len(matches),
+                "best_confidence": matches[0].confidence if matches else 0.0
+            })
+        
+        return matches
+    
+    def calculate_match_score(
+        self,
+        actions: List[Dict[str, Any]],
+        workflow_steps: List[Dict[str, Any]]
+    ) -> float:
+        """
+        Calcule le score de correspondance entre des actions et un workflow.
+        
+        Le score prend en compte:
+        - La correspondance des types d'actions
+        - La similarité des positions (avec tolérance)
+        - La correspondance des fenêtres
+        
+        Args:
+            actions: Liste des actions à comparer
+            workflow_steps: Étapes du workflow
+            
+        Returns:
+            Score de correspondance (0-1)
+        """
+        if not actions or not workflow_steps:
+            return 0.0
+        
+        # Comparer seulement les N premières actions avec le début du workflow
+        num_actions = len(actions)
+        num_steps = len(workflow_steps)
+        
+        # On ne peut pas matcher plus d'étapes qu'il n'y en a dans le workflow
+        compare_length = min(num_actions, num_steps)
+        
+        if compare_length == 0:
+            return 0.0
+        
+        total_score = 0.0
+        
+        for i in range(compare_length):
+            action = actions[i]
+            step = workflow_steps[i]
+            
+            # Score pour cette étape
+            step_score = self._calculate_step_similarity(action, step)
+            total_score += step_score
+        
+        # Score moyen
+        avg_score = total_score / compare_length
+        
+        # Bonus si on a matché plusieurs étapes (plus fiable)
+        # Plus on a d'étapes matchées, plus on est confiant
+        sequence_bonus = min(0.1, compare_length * 0.02)
+        
+        final_score = min(1.0, avg_score + sequence_bonus)
+        
+        return final_score
+    
+    def _normalize_action_type(self, action_type: str) -> str:
+        """
+        Normalise les types d'actions pour le matching.
+        
+        Args:
+            action_type: Type d'action brut
+            
+        Returns:
+            Type d'action normalisé
+        """
+        # Mapper les variantes vers un type standard
+        type_mapping = {
+            "mouse_click": "click",
+            "mouse_move": "move",
+            "key_press": "type",
+            "keyboard": "type",
+        }
+        
+        return type_mapping.get(action_type, action_type)
+    
+    def _calculate_step_similarity(
+        self,
+        action: Dict[str, Any],
+        step: Dict[str, Any]
+    ) -> float:
+        """
+        Calcule la similarité entre une action et une étape de workflow.
+        
+        Args:
+            action: Action à comparer
+            step: Étape du workflow
+            
+        Returns:
+            Score de similarité (0-1)
+        """
+        # Check if we have visual embeddings and FAISS index
+        has_visual = (
+            self.faiss_index is not None and
+            action.get("embedding") is not None and
+            step.get("embedding") is not None
+        )
+        
+        if has_visual:
+            # Adjust weights to include visual similarity
+            weights = {
+                "action_type": 0.3,
+                "position": 0.2,
+                "window": 0.2,
+                "visual": 0.3
+            }
+        else:
+            # Original weights without visual
+            weights = {
+                "action_type": 0.4,
+                "position": 0.3,
+                "window": 0.3
+            }
+        
+        score = 0.0
+        
+        # 1. Correspondance du type d'action
+        action_type = self._normalize_action_type(action.get("action_type", ""))
+        step_type = self._normalize_action_type(step.get("action_type", ""))
+        
+        action_type_match = (action_type == step_type)
+        if action_type_match:
+            score += weights["action_type"]
+        
+        # 2. Similarité de position
+        action_pos = action.get("position", [0, 0])
+        step_pos = step.get("position", [0, 0])
+        
+        if action_pos and step_pos:
+            position_similarity = self._calculate_position_similarity(
+                action_pos,
+                step_pos
+            )
+            score += weights["position"] * position_similarity
+        
+        # 3. Correspondance de fenêtre
+        action_window = action.get("window", "")
+        step_window = step.get("window", "")
+        
+        if action_window and step_window:
+            # Correspondance exacte ou partielle
+            if action_window == step_window:
+                score += weights["window"]
+            elif action_window in step_window or step_window in action_window:
+                score += weights["window"] * 0.5
+        
+        # 4. Similarité visuelle (si disponible)
+        if has_visual:
+            visual_similarity = self._calculate_visual_similarity(
+                action.get("embedding"),
+                step.get("embedding")
+            )
+            score += weights["visual"] * visual_similarity
+        
+        return score
+    
+    def _calculate_visual_similarity(
+        self,
+        embedding1: np.ndarray,
+        embedding2: np.ndarray
+    ) -> float:
+        """
+        Calcule la similarité cosinus entre deux embeddings.
+        
+        Args:
+            embedding1: Premier embedding
+            embedding2: Deuxième embedding
+            
+        Returns:
+            Score de similarité (0-1)
+        """
+        if embedding1 is None or embedding2 is None:
+            return 0.0
+        
+        try:
+            # Ensure embeddings are numpy arrays
+            if not isinstance(embedding1, np.ndarray):
+                embedding1 = np.array(embedding1)
+            if not isinstance(embedding2, np.ndarray):
+                embedding2 = np.array(embedding2)
+            
+            # Normalize embeddings
+            emb1_norm = embedding1 / (np.linalg.norm(embedding1) + 1e-8)
+            emb2_norm = embedding2 / (np.linalg.norm(embedding2) + 1e-8)
+            
+            # Cosine similarity
+            similarity = np.dot(emb1_norm, emb2_norm)
+            
+            # Clamp to [0, 1]
+            return float(max(0.0, min(1.0, similarity)))
+            
+        except Exception as e:
+            if self.logger:
+                self.logger.log_action({
+                    "action": "visual_similarity_error",
+                    "error": str(e)
+                })
+            return 0.0
+    
+    def _calculate_position_similarity(
+        self,
+        pos1: List[int],
+        pos2: List[int]
+    ) -> float:
+        """
+        Calcule la similarité entre deux positions avec tolérance.
+        
+        Args:
+            pos1: Position 1 [x, y]
+            pos2: Position 2 [x, y]
+            
+        Returns:
+            Score de similarité (0-1)
+        """
+        if not pos1 or not pos2 or len(pos1) < 2 or len(pos2) < 2:
+            return 0.0
+        
+        # Distance euclidienne
+        dx = pos1[0] - pos2[0]
+        dy = pos1[1] - pos2[1]
+        distance = math.sqrt(dx * dx + dy * dy)
+        
+        # Si distance <= tolérance, score = 1.0
+        # Si distance > tolérance, score décroît linéairement
+        if distance <= self.position_tolerance:
+            return 1.0
+        else:
+            # Décroissance linéaire jusqu'à 2x la tolérance
+            max_distance = self.position_tolerance * 2
+            if distance >= max_distance:
+                return 0.0
+            else:
+                return 1.0 - ((distance - self.position_tolerance) / self.position_tolerance)
+    
+    def _count_matched_steps(
+        self,
+        actions: List[Dict[str, Any]],
+        workflow_steps: List[Dict[str, Any]]
+    ) -> int:
+        """
+        Compte le nombre d'étapes matchées consécutivement.
+        
+        Args:
+            actions: Liste des actions
+            workflow_steps: Étapes du workflow
+            
+        Returns:
+            Nombre d'étapes matchées
+        """
+        compare_length = min(len(actions), len(workflow_steps))
+        matched = 0
+        
+        for i in range(compare_length):
+            similarity = self._calculate_step_similarity(
+                actions[i],
+                workflow_steps[i]
+            )
+            
+            # On considère qu'une étape est matchée si similarité > 0.7
+            if similarity >= 0.7:
+                matched += 1
+            else:
+                # Arrêter au premier non-match (séquence consécutive)
+                break
+        
+        return matched
+    
+    def find_best_match(
+        self,
+        matches: List[WorkflowMatch]
+    ) -> Optional[WorkflowMatch]:
+        """
+        Trouve la meilleure correspondance parmi une liste.
+        
+        Args:
+            matches: Liste des correspondances
+            
+        Returns:
+            Meilleure correspondance si confiance > seuil, None sinon
+        """
+        if not matches:
+            return None
+        
+        # Les matches sont déjà triés par confiance décroissante
+        best_match = matches[0]
+        
+        # Vérifier le seuil de confiance
+        if best_match.confidence >= self.min_confidence:
+            if self.logger:
+                self.logger.log_action({
+                    "action": "best_match_found",
+                    "workflow_id": best_match.workflow_id,
+                    "workflow_name": best_match.workflow_name,
+                    "confidence": best_match.confidence,
+                    "matched_steps": best_match.matched_steps,
+                    "total_steps": best_match.total_steps
+                })
+            return best_match
+        
+        return None
+    
+    def get_match_details(self, match: WorkflowMatch) -> Dict[str, Any]:
+        """
+        Retourne les détails d'une correspondance pour affichage.
+        
+        Args:
+            match: Correspondance à détailler
+            
+        Returns:
+            Dictionnaire avec les détails
+        """
+        return {
+            "workflow_id": match.workflow_id,
+            "workflow_name": match.workflow_name,
+            "confidence": match.confidence,
+            "matched_steps": match.matched_steps,
+            "total_steps": match.total_steps,
+            "remaining_steps": match.remaining_steps,
+            "completion_percentage": match.completion_percentage,
+            "next_steps_preview": match.remaining_steps[:3]  # 3 prochaines étapes
+        }
+
+
+if __name__ == "__main__":
+    # Tests basiques
+    print("Test du WorkflowMatcher")
+    print("=" * 50)
+    
+    # Mock logger
+    class MockLogger:
+        def log_action(self, data):
+            print(f"[LOG] {data}")
+    
+    logger = MockLogger()
+    config = {
+        "workflow": {
+            "position_tolerance": 50,
+            "min_confidence": 0.80
+        }
+    }
+    
+    matcher = WorkflowMatcher(logger, config)
+    
+    # Test 1: Match parfait
+    print("\n1. Test match parfait:")
+    session_actions = [
+        {
+            "action_type": "click",
+            "position": [100, 100],
+            "window": "Calculatrice"
+        },
+        {
+            "action_type": "type",
+            "position": [0, 0],
+            "window": "Calculatrice"
+        }
+    ]
+    
+    workflow = {
+        "workflow_id": "calc_001",
+        "name": "Calcul simple",
+        "steps": [
+            {
+                "action_type": "click",
+                "position": [100, 100],
+                "window": "Calculatrice"
+            },
+            {
+                "action_type": "type",
+                "position": [0, 0],
+                "window": "Calculatrice"
+            },
+            {
+                "action_type": "click",
+                "position": [200, 200],
+                "window": "Calculatrice"
+            }
+        ]
+    }
+    
+    matches = matcher.match_current_session(session_actions, [workflow])
+    print(f"   Nombre de matches: {len(matches)}")
+    if matches:
+        print(f"   Meilleur match: {matches[0].workflow_name}")
+        print(f"   Confiance: {matches[0].confidence:.2%}")
+        print(f"   Étapes matchées: {matches[0].matched_steps}/{matches[0].total_steps}")
+    
+    # Test 2: Match avec tolérance de position
+    print("\n2. Test avec tolérance de position:")
+    session_actions[0]["position"] = [120, 110]  # Légèrement décalé
+    
+    matches = matcher.match_current_session(session_actions, [workflow])
+    if matches:
+        print(f"   Confiance avec décalage: {matches[0].confidence:.2%}")
+    
+    # Test 3: Trouver le meilleur match
+    print("\n3. Test find_best_match:")
+    best = matcher.find_best_match(matches)
+    if best:
+        print(f"   Meilleur match trouvé: {best.workflow_name}")
+        print(f"   Confiance: {best.confidence:.2%}")
+        details = matcher.get_match_details(best)
+        print(f"   Prochaines étapes: {len(details['next_steps_preview'])}")
+    else:
+        print("   Aucun match au-dessus du seuil")
+    
+    print("\n✓ Tests terminés!")