v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution

- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 11:23:51 +01:00
parent 21bfa3b337
commit a27b74cf22
1595 changed files with 412691 additions and 400 deletions
--- a/analyze_failed_matches.py
+++ b/analyze_failed_matches.py
@@ -0,0 +1,327 @@
+#!/usr/bin/env python3
+"""
+Analyseur des échecs de matching pour amélioration continue du système.
+
+Ce script analyse les rapports d'échecs de matching et génère des statistiques
+et recommandations pour améliorer le graphe de workflow.
+"""
+
+import json
+import sys
+from pathlib import Path
+from datetime import datetime, timedelta
+from typing import List, Dict, Any
+from collections import Counter, defaultdict
+import argparse
+
+
+class FailedMatchAnalyzer:
+    """Analyseur des échecs de matching."""
+    
+    def __init__(self, failed_matches_dir: str = "data/failed_matches"):
+        self.failed_matches_dir = Path(failed_matches_dir)
+        self.reports: List[Dict[str, Any]] = []
+    
+    def load_reports(self, last_n: int = None, since_hours: int = None):
+        """
+        Charger les rapports d'échecs.
+        
+        Args:
+            last_n: Charger les N derniers rapports
+            since_hours: Charger les rapports des X dernières heures
+        """
+        if not self.failed_matches_dir.exists():
+            print(f"⚠️  Aucun dossier d'échecs trouvé: {self.failed_matches_dir}")
+            return
+        
+        # Lister tous les dossiers d'échecs
+        match_dirs = sorted(
+            [d for d in self.failed_matches_dir.iterdir() if d.is_dir()],
+            key=lambda x: x.name,
+            reverse=True
+        )
+        
+        if not match_dirs:
+            print("⚠️  Aucun échec de matching enregistré")
+            return
+        
+        # Filtrer par date si nécessaire
+        if since_hours:
+            cutoff = datetime.now() - timedelta(hours=since_hours)
+            match_dirs = [
+                d for d in match_dirs
+                if self._parse_timestamp(d.name) >= cutoff
+            ]
+        
+        # Limiter le nombre si nécessaire
+        if last_n:
+            match_dirs = match_dirs[:last_n]
+        
+        # Charger les rapports
+        for match_dir in match_dirs:
+            report_path = match_dir / "report.json"
+            if report_path.exists():
+                try:
+                    with open(report_path, 'r') as f:
+                        report = json.load(f)
+                        report['_dir'] = match_dir
+                        self.reports.append(report)
+                except Exception as e:
+                    print(f"⚠️  Erreur lors du chargement de {report_path}: {e}")
+        
+        print(f"✓ {len(self.reports)} rapports chargés")
+    
+    def _parse_timestamp(self, dirname: str) -> datetime:
+        """Parser le timestamp depuis le nom du dossier."""
+        try:
+            # Format: failed_match_20251123_143052
+            timestamp_str = dirname.replace("failed_match_", "")
+            return datetime.strptime(timestamp_str, "%Y%m%d_%H%M%S")
+        except:
+            return datetime.min
+    
+    def analyze(self) -> Dict[str, Any]:
+        """Analyser tous les rapports et générer des statistiques."""
+        if not self.reports:
+            return {}
+        
+        analysis = {
+            'total_failures': len(self.reports),
+            'date_range': self._get_date_range(),
+            'confidence_stats': self._analyze_confidence(),
+            'suggestions_summary': self._analyze_suggestions(),
+            'problematic_nodes': self._identify_problematic_nodes(),
+            'threshold_recommendations': self._recommend_thresholds(),
+            'new_states_detected': self._count_new_states()
+        }
+        
+        return analysis
+    
+    def _get_date_range(self) -> Dict[str, str]:
+        """Obtenir la plage de dates des rapports."""
+        timestamps = [
+            datetime.strptime(r['timestamp'], "%Y%m%d_%H%M%S")
+            for r in self.reports
+        ]
+        return {
+            'first': min(timestamps).strftime("%Y-%m-%d %H:%M:%S"),
+            'last': max(timestamps).strftime("%Y-%m-%d %H:%M:%S")
+        }
+    
+    def _analyze_confidence(self) -> Dict[str, Any]:
+        """Analyser les niveaux de confiance."""
+        confidences = [
+            r['matching_results']['best_confidence']
+            for r in self.reports
+        ]
+        
+        return {
+            'min': min(confidences),
+            'max': max(confidences),
+            'avg': sum(confidences) / len(confidences),
+            'below_70': sum(1 for c in confidences if c < 0.70),
+            'between_70_85': sum(1 for c in confidences if 0.70 <= c < 0.85),
+            'above_85': sum(1 for c in confidences if c >= 0.85)
+        }
+    
+    def _analyze_suggestions(self) -> Dict[str, int]:
+        """Compter les types de suggestions."""
+        suggestion_types = Counter()
+        
+        for report in self.reports:
+            for suggestion in report.get('suggestions', []):
+                # Extraire le type de suggestion (avant le ':')
+                suggestion_type = suggestion.split(':')[0]
+                suggestion_types[suggestion_type] += 1
+        
+        return dict(suggestion_types)
+    
+    def _identify_problematic_nodes(self) -> List[Dict[str, Any]]:
+        """Identifier les nodes qui causent le plus de confusion."""
+        node_near_misses = defaultdict(list)
+        
+        for report in self.reports:
+            similarities = report['matching_results'].get('similarities', [])
+            if similarities:
+                best = similarities[0]
+                confidence = best['similarity']
+                # Near miss: entre 0.70 et threshold
+                if 0.70 <= confidence < report['matching_results']['threshold']:
+                    node_near_misses[best['node_id']].append({
+                        'confidence': confidence,
+                        'label': best['node_label'],
+                        'timestamp': report['timestamp']
+                    })
+        
+        # Trier par nombre de near misses
+        problematic = [
+            {
+                'node_id': node_id,
+                'node_label': misses[0]['label'],
+                'near_miss_count': len(misses),
+                'avg_confidence': sum(m['confidence'] for m in misses) / len(misses)
+            }
+            for node_id, misses in node_near_misses.items()
+        ]
+        
+        return sorted(problematic, key=lambda x: x['near_miss_count'], reverse=True)
+    
+    def _recommend_thresholds(self) -> Dict[str, Any]:
+        """Recommander des ajustements de seuil."""
+        confidences = [
+            r['matching_results']['best_confidence']
+            for r in self.reports
+        ]
+        
+        # Calculer le percentile 90 des confidences
+        sorted_conf = sorted(confidences)
+        p90_index = int(len(sorted_conf) * 0.9)
+        p90 = sorted_conf[p90_index] if sorted_conf else 0.85
+        
+        current_threshold = self.reports[0]['matching_results']['threshold']
+        
+        recommendations = {
+            'current_threshold': current_threshold,
+            'p90_confidence': p90,
+            'recommended_threshold': max(0.70, min(0.90, p90 - 0.02))
+        }
+        
+        if p90 < current_threshold - 0.05:
+            recommendations['action'] = "LOWER_THRESHOLD"
+            recommendations['reason'] = f"90% des échecs ont une confiance < {p90:.3f}"
+        elif p90 > current_threshold + 0.05:
+            recommendations['action'] = "RAISE_THRESHOLD"
+            recommendations['reason'] = "Beaucoup de faux positifs potentiels"
+        else:
+            recommendations['action'] = "KEEP_CURRENT"
+            recommendations['reason'] = "Seuil approprié"
+        
+        return recommendations
+    
+    def _count_new_states(self) -> int:
+        """Compter les nouveaux états détectés (confiance < 0.70)."""
+        return sum(
+            1 for r in self.reports
+            if r['matching_results']['best_confidence'] < 0.70
+        )
+    
+    def print_report(self, analysis: Dict[str, Any]):
+        """Afficher le rapport d'analyse."""
+        print("\n" + "="*70)
+        print("RAPPORT D'ANALYSE DES ÉCHECS DE MATCHING")
+        print("="*70)
+        
+        print(f"\n📊 Statistiques Générales")
+        print(f"  • Total d'échecs: {analysis['total_failures']}")
+        print(f"  • Période: {analysis['date_range']['first']} → {analysis['date_range']['last']}")
+        
+        print(f"\n📈 Niveaux de Confiance")
+        conf = analysis['confidence_stats']
+        print(f"  • Minimum: {conf['min']:.3f}")
+        print(f"  • Maximum: {conf['max']:.3f}")
+        print(f"  • Moyenne: {conf['avg']:.3f}")
+        print(f"  • < 0.70 (nouveaux états): {conf['below_70']}")
+        print(f"  • 0.70-0.85 (near miss): {conf['between_70_85']}")
+        print(f"  • > 0.85 (faux négatifs): {conf['above_85']}")
+        
+        print(f"\n💡 Suggestions Générées")
+        for suggestion_type, count in analysis['suggestions_summary'].items():
+            print(f"  • {suggestion_type}: {count}")
+        
+        print(f"\n⚠️  Nodes Problématiques (Top 5)")
+        for i, node in enumerate(analysis['problematic_nodes'][:5], 1):
+            print(f"  {i}. {node['node_label']} (ID: {node['node_id']})")
+            print(f"     - Near misses: {node['near_miss_count']}")
+            print(f"     - Confiance moyenne: {node['avg_confidence']:.3f}")
+        
+        print(f"\n🎯 Recommandations de Seuil")
+        thresh = analysis['threshold_recommendations']
+        print(f"  • Seuil actuel: {thresh['current_threshold']:.3f}")
+        print(f"  • P90 des confidences: {thresh['p90_confidence']:.3f}")
+        print(f"  • Seuil recommandé: {thresh['recommended_threshold']:.3f}")
+        print(f"  • Action: {thresh['action']}")
+        print(f"  • Raison: {thresh['reason']}")
+        
+        print(f"\n🆕 Nouveaux États Détectés")
+        print(f"  • {analysis['new_states_detected']} états potentiellement nouveaux")
+        print(f"    (confiance < 0.70, nécessitent création de nodes)")
+        
+        print("\n" + "="*70)
+    
+    def export_detailed_report(self, output_path: str = "failed_matches_analysis.json"):
+        """Exporter un rapport détaillé en JSON."""
+        analysis = self.analyze()
+        
+        detailed_report = {
+            'analysis': analysis,
+            'individual_reports': [
+                {
+                    'timestamp': r['timestamp'],
+                    'confidence': r['matching_results']['best_confidence'],
+                    'suggestions': r['suggestions'],
+                    'window_title': r['state']['window_title'],
+                    'screenshot_path': str(r['_dir'] / "screenshot.png")
+                }
+                for r in self.reports
+            ]
+        }
+        
+        with open(output_path, 'w') as f:
+            json.dump(detailed_report, f, indent=2)
+        
+        print(f"\n✓ Rapport détaillé exporté: {output_path}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Analyser les échecs de matching pour amélioration continue"
+    )
+    parser.add_argument(
+        '--last',
+        type=int,
+        help="Analyser les N derniers échecs"
+    )
+    parser.add_argument(
+        '--since-hours',
+        type=int,
+        help="Analyser les échecs des X dernières heures"
+    )
+    parser.add_argument(
+        '--export',
+        type=str,
+        help="Exporter le rapport détaillé en JSON"
+    )
+    parser.add_argument(
+        '--dir',
+        type=str,
+        default="data/failed_matches",
+        help="Dossier contenant les échecs (défaut: data/failed_matches)"
+    )
+    
+    args = parser.parse_args()
+    
+    # Créer l'analyseur
+    analyzer = FailedMatchAnalyzer(failed_matches_dir=args.dir)
+    
+    # Charger les rapports
+    analyzer.load_reports(last_n=args.last, since_hours=args.since_hours)
+    
+    if not analyzer.reports:
+        print("\n❌ Aucun rapport à analyser")
+        return 1
+    
+    # Analyser
+    analysis = analyzer.analyze()
+    
+    # Afficher le rapport
+    analyzer.print_report(analysis)
+    
+    # Exporter si demandé
+    if args.export:
+        analyzer.export_detailed_report(args.export)
+    
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())