#!/usr/bin/env python3 """ Analyseur des échecs de matching pour amélioration continue du système. Ce script analyse les rapports d'échecs de matching et génère des statistiques et recommandations pour améliorer le graphe de workflow. """ import json import sys from pathlib import Path from datetime import datetime, timedelta from typing import List, Dict, Any from collections import Counter, defaultdict import argparse class FailedMatchAnalyzer: """Analyseur des échecs de matching.""" def __init__(self, failed_matches_dir: str = "data/failed_matches"): self.failed_matches_dir = Path(failed_matches_dir) self.reports: List[Dict[str, Any]] = [] def load_reports(self, last_n: int = None, since_hours: int = None): """ Charger les rapports d'échecs. Args: last_n: Charger les N derniers rapports since_hours: Charger les rapports des X dernières heures """ if not self.failed_matches_dir.exists(): print(f"⚠️ Aucun dossier d'échecs trouvé: {self.failed_matches_dir}") return # Lister tous les dossiers d'échecs match_dirs = sorted( [d for d in self.failed_matches_dir.iterdir() if d.is_dir()], key=lambda x: x.name, reverse=True ) if not match_dirs: print("⚠️ Aucun échec de matching enregistré") return # Filtrer par date si nécessaire if since_hours: cutoff = datetime.now() - timedelta(hours=since_hours) match_dirs = [ d for d in match_dirs if self._parse_timestamp(d.name) >= cutoff ] # Limiter le nombre si nécessaire if last_n: match_dirs = match_dirs[:last_n] # Charger les rapports for match_dir in match_dirs: report_path = match_dir / "report.json" if report_path.exists(): try: with open(report_path, 'r') as f: report = json.load(f) report['_dir'] = match_dir self.reports.append(report) except Exception as e: print(f"⚠️ Erreur lors du chargement de {report_path}: {e}") print(f"✓ {len(self.reports)} rapports chargés") def _parse_timestamp(self, dirname: str) -> datetime: """Parser le timestamp depuis le nom du dossier.""" try: # Format: failed_match_20251123_143052 timestamp_str = dirname.replace("failed_match_", "") return datetime.strptime(timestamp_str, "%Y%m%d_%H%M%S") except: return datetime.min def analyze(self) -> Dict[str, Any]: """Analyser tous les rapports et générer des statistiques.""" if not self.reports: return {} analysis = { 'total_failures': len(self.reports), 'date_range': self._get_date_range(), 'confidence_stats': self._analyze_confidence(), 'suggestions_summary': self._analyze_suggestions(), 'problematic_nodes': self._identify_problematic_nodes(), 'threshold_recommendations': self._recommend_thresholds(), 'new_states_detected': self._count_new_states() } return analysis def _get_date_range(self) -> Dict[str, str]: """Obtenir la plage de dates des rapports.""" timestamps = [ datetime.strptime(r['timestamp'], "%Y%m%d_%H%M%S") for r in self.reports ] return { 'first': min(timestamps).strftime("%Y-%m-%d %H:%M:%S"), 'last': max(timestamps).strftime("%Y-%m-%d %H:%M:%S") } def _analyze_confidence(self) -> Dict[str, Any]: """Analyser les niveaux de confiance.""" confidences = [ r['matching_results']['best_confidence'] for r in self.reports ] return { 'min': min(confidences), 'max': max(confidences), 'avg': sum(confidences) / len(confidences), 'below_70': sum(1 for c in confidences if c < 0.70), 'between_70_85': sum(1 for c in confidences if 0.70 <= c < 0.85), 'above_85': sum(1 for c in confidences if c >= 0.85) } def _analyze_suggestions(self) -> Dict[str, int]: """Compter les types de suggestions.""" suggestion_types = Counter() for report in self.reports: for suggestion in report.get('suggestions', []): # Extraire le type de suggestion (avant le ':') suggestion_type = suggestion.split(':')[0] suggestion_types[suggestion_type] += 1 return dict(suggestion_types) def _identify_problematic_nodes(self) -> List[Dict[str, Any]]: """Identifier les nodes qui causent le plus de confusion.""" node_near_misses = defaultdict(list) for report in self.reports: similarities = report['matching_results'].get('similarities', []) if similarities: best = similarities[0] confidence = best['similarity'] # Near miss: entre 0.70 et threshold if 0.70 <= confidence < report['matching_results']['threshold']: node_near_misses[best['node_id']].append({ 'confidence': confidence, 'label': best['node_label'], 'timestamp': report['timestamp'] }) # Trier par nombre de near misses problematic = [ { 'node_id': node_id, 'node_label': misses[0]['label'], 'near_miss_count': len(misses), 'avg_confidence': sum(m['confidence'] for m in misses) / len(misses) } for node_id, misses in node_near_misses.items() ] return sorted(problematic, key=lambda x: x['near_miss_count'], reverse=True) def _recommend_thresholds(self) -> Dict[str, Any]: """Recommander des ajustements de seuil.""" confidences = [ r['matching_results']['best_confidence'] for r in self.reports ] # Calculer le percentile 90 des confidences sorted_conf = sorted(confidences) p90_index = int(len(sorted_conf) * 0.9) p90 = sorted_conf[p90_index] if sorted_conf else 0.85 current_threshold = self.reports[0]['matching_results']['threshold'] recommendations = { 'current_threshold': current_threshold, 'p90_confidence': p90, 'recommended_threshold': max(0.70, min(0.90, p90 - 0.02)) } if p90 < current_threshold - 0.05: recommendations['action'] = "LOWER_THRESHOLD" recommendations['reason'] = f"90% des échecs ont une confiance < {p90:.3f}" elif p90 > current_threshold + 0.05: recommendations['action'] = "RAISE_THRESHOLD" recommendations['reason'] = "Beaucoup de faux positifs potentiels" else: recommendations['action'] = "KEEP_CURRENT" recommendations['reason'] = "Seuil approprié" return recommendations def _count_new_states(self) -> int: """Compter les nouveaux états détectés (confiance < 0.70).""" return sum( 1 for r in self.reports if r['matching_results']['best_confidence'] < 0.70 ) def print_report(self, analysis: Dict[str, Any]): """Afficher le rapport d'analyse.""" print("\n" + "="*70) print("RAPPORT D'ANALYSE DES ÉCHECS DE MATCHING") print("="*70) print(f"\n📊 Statistiques Générales") print(f" • Total d'échecs: {analysis['total_failures']}") print(f" • Période: {analysis['date_range']['first']} → {analysis['date_range']['last']}") print(f"\n📈 Niveaux de Confiance") conf = analysis['confidence_stats'] print(f" • Minimum: {conf['min']:.3f}") print(f" • Maximum: {conf['max']:.3f}") print(f" • Moyenne: {conf['avg']:.3f}") print(f" • < 0.70 (nouveaux états): {conf['below_70']}") print(f" • 0.70-0.85 (near miss): {conf['between_70_85']}") print(f" • > 0.85 (faux négatifs): {conf['above_85']}") print(f"\n💡 Suggestions Générées") for suggestion_type, count in analysis['suggestions_summary'].items(): print(f" • {suggestion_type}: {count}") print(f"\n⚠️ Nodes Problématiques (Top 5)") for i, node in enumerate(analysis['problematic_nodes'][:5], 1): print(f" {i}. {node['node_label']} (ID: {node['node_id']})") print(f" - Near misses: {node['near_miss_count']}") print(f" - Confiance moyenne: {node['avg_confidence']:.3f}") print(f"\n🎯 Recommandations de Seuil") thresh = analysis['threshold_recommendations'] print(f" • Seuil actuel: {thresh['current_threshold']:.3f}") print(f" • P90 des confidences: {thresh['p90_confidence']:.3f}") print(f" • Seuil recommandé: {thresh['recommended_threshold']:.3f}") print(f" • Action: {thresh['action']}") print(f" • Raison: {thresh['reason']}") print(f"\n🆕 Nouveaux États Détectés") print(f" • {analysis['new_states_detected']} états potentiellement nouveaux") print(f" (confiance < 0.70, nécessitent création de nodes)") print("\n" + "="*70) def export_detailed_report(self, output_path: str = "failed_matches_analysis.json"): """Exporter un rapport détaillé en JSON.""" analysis = self.analyze() detailed_report = { 'analysis': analysis, 'individual_reports': [ { 'timestamp': r['timestamp'], 'confidence': r['matching_results']['best_confidence'], 'suggestions': r['suggestions'], 'window_title': r['state']['window_title'], 'screenshot_path': str(r['_dir'] / "screenshot.png") } for r in self.reports ] } with open(output_path, 'w') as f: json.dump(detailed_report, f, indent=2) print(f"\n✓ Rapport détaillé exporté: {output_path}") def main(): parser = argparse.ArgumentParser( description="Analyser les échecs de matching pour amélioration continue" ) parser.add_argument( '--last', type=int, help="Analyser les N derniers échecs" ) parser.add_argument( '--since-hours', type=int, help="Analyser les échecs des X dernières heures" ) parser.add_argument( '--export', type=str, help="Exporter le rapport détaillé en JSON" ) parser.add_argument( '--dir', type=str, default="data/failed_matches", help="Dossier contenant les échecs (défaut: data/failed_matches)" ) args = parser.parse_args() # Créer l'analyseur analyzer = FailedMatchAnalyzer(failed_matches_dir=args.dir) # Charger les rapports analyzer.load_reports(last_n=args.last, since_hours=args.since_hours) if not analyzer.reports: print("\n❌ Aucun rapport à analyser") return 1 # Analyser analysis = analyzer.analyze() # Afficher le rapport analyzer.print_report(analysis) # Exporter si demandé if args.export: analyzer.export_detailed_report(args.export) return 0 if __name__ == '__main__': sys.exit(main())