v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution
- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
327
analyze_failed_matches.py
Executable file
327
analyze_failed_matches.py
Executable file
@@ -0,0 +1,327 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Analyseur des échecs de matching pour amélioration continue du système.
|
||||
|
||||
Ce script analyse les rapports d'échecs de matching et génère des statistiques
|
||||
et recommandations pour améliorer le graphe de workflow.
|
||||
"""
|
||||
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
from typing import List, Dict, Any
|
||||
from collections import Counter, defaultdict
|
||||
import argparse
|
||||
|
||||
|
||||
class FailedMatchAnalyzer:
|
||||
"""Analyseur des échecs de matching."""
|
||||
|
||||
def __init__(self, failed_matches_dir: str = "data/failed_matches"):
|
||||
self.failed_matches_dir = Path(failed_matches_dir)
|
||||
self.reports: List[Dict[str, Any]] = []
|
||||
|
||||
def load_reports(self, last_n: int = None, since_hours: int = None):
|
||||
"""
|
||||
Charger les rapports d'échecs.
|
||||
|
||||
Args:
|
||||
last_n: Charger les N derniers rapports
|
||||
since_hours: Charger les rapports des X dernières heures
|
||||
"""
|
||||
if not self.failed_matches_dir.exists():
|
||||
print(f"⚠️ Aucun dossier d'échecs trouvé: {self.failed_matches_dir}")
|
||||
return
|
||||
|
||||
# Lister tous les dossiers d'échecs
|
||||
match_dirs = sorted(
|
||||
[d for d in self.failed_matches_dir.iterdir() if d.is_dir()],
|
||||
key=lambda x: x.name,
|
||||
reverse=True
|
||||
)
|
||||
|
||||
if not match_dirs:
|
||||
print("⚠️ Aucun échec de matching enregistré")
|
||||
return
|
||||
|
||||
# Filtrer par date si nécessaire
|
||||
if since_hours:
|
||||
cutoff = datetime.now() - timedelta(hours=since_hours)
|
||||
match_dirs = [
|
||||
d for d in match_dirs
|
||||
if self._parse_timestamp(d.name) >= cutoff
|
||||
]
|
||||
|
||||
# Limiter le nombre si nécessaire
|
||||
if last_n:
|
||||
match_dirs = match_dirs[:last_n]
|
||||
|
||||
# Charger les rapports
|
||||
for match_dir in match_dirs:
|
||||
report_path = match_dir / "report.json"
|
||||
if report_path.exists():
|
||||
try:
|
||||
with open(report_path, 'r') as f:
|
||||
report = json.load(f)
|
||||
report['_dir'] = match_dir
|
||||
self.reports.append(report)
|
||||
except Exception as e:
|
||||
print(f"⚠️ Erreur lors du chargement de {report_path}: {e}")
|
||||
|
||||
print(f"✓ {len(self.reports)} rapports chargés")
|
||||
|
||||
def _parse_timestamp(self, dirname: str) -> datetime:
|
||||
"""Parser le timestamp depuis le nom du dossier."""
|
||||
try:
|
||||
# Format: failed_match_20251123_143052
|
||||
timestamp_str = dirname.replace("failed_match_", "")
|
||||
return datetime.strptime(timestamp_str, "%Y%m%d_%H%M%S")
|
||||
except:
|
||||
return datetime.min
|
||||
|
||||
def analyze(self) -> Dict[str, Any]:
|
||||
"""Analyser tous les rapports et générer des statistiques."""
|
||||
if not self.reports:
|
||||
return {}
|
||||
|
||||
analysis = {
|
||||
'total_failures': len(self.reports),
|
||||
'date_range': self._get_date_range(),
|
||||
'confidence_stats': self._analyze_confidence(),
|
||||
'suggestions_summary': self._analyze_suggestions(),
|
||||
'problematic_nodes': self._identify_problematic_nodes(),
|
||||
'threshold_recommendations': self._recommend_thresholds(),
|
||||
'new_states_detected': self._count_new_states()
|
||||
}
|
||||
|
||||
return analysis
|
||||
|
||||
def _get_date_range(self) -> Dict[str, str]:
|
||||
"""Obtenir la plage de dates des rapports."""
|
||||
timestamps = [
|
||||
datetime.strptime(r['timestamp'], "%Y%m%d_%H%M%S")
|
||||
for r in self.reports
|
||||
]
|
||||
return {
|
||||
'first': min(timestamps).strftime("%Y-%m-%d %H:%M:%S"),
|
||||
'last': max(timestamps).strftime("%Y-%m-%d %H:%M:%S")
|
||||
}
|
||||
|
||||
def _analyze_confidence(self) -> Dict[str, Any]:
|
||||
"""Analyser les niveaux de confiance."""
|
||||
confidences = [
|
||||
r['matching_results']['best_confidence']
|
||||
for r in self.reports
|
||||
]
|
||||
|
||||
return {
|
||||
'min': min(confidences),
|
||||
'max': max(confidences),
|
||||
'avg': sum(confidences) / len(confidences),
|
||||
'below_70': sum(1 for c in confidences if c < 0.70),
|
||||
'between_70_85': sum(1 for c in confidences if 0.70 <= c < 0.85),
|
||||
'above_85': sum(1 for c in confidences if c >= 0.85)
|
||||
}
|
||||
|
||||
def _analyze_suggestions(self) -> Dict[str, int]:
|
||||
"""Compter les types de suggestions."""
|
||||
suggestion_types = Counter()
|
||||
|
||||
for report in self.reports:
|
||||
for suggestion in report.get('suggestions', []):
|
||||
# Extraire le type de suggestion (avant le ':')
|
||||
suggestion_type = suggestion.split(':')[0]
|
||||
suggestion_types[suggestion_type] += 1
|
||||
|
||||
return dict(suggestion_types)
|
||||
|
||||
def _identify_problematic_nodes(self) -> List[Dict[str, Any]]:
|
||||
"""Identifier les nodes qui causent le plus de confusion."""
|
||||
node_near_misses = defaultdict(list)
|
||||
|
||||
for report in self.reports:
|
||||
similarities = report['matching_results'].get('similarities', [])
|
||||
if similarities:
|
||||
best = similarities[0]
|
||||
confidence = best['similarity']
|
||||
# Near miss: entre 0.70 et threshold
|
||||
if 0.70 <= confidence < report['matching_results']['threshold']:
|
||||
node_near_misses[best['node_id']].append({
|
||||
'confidence': confidence,
|
||||
'label': best['node_label'],
|
||||
'timestamp': report['timestamp']
|
||||
})
|
||||
|
||||
# Trier par nombre de near misses
|
||||
problematic = [
|
||||
{
|
||||
'node_id': node_id,
|
||||
'node_label': misses[0]['label'],
|
||||
'near_miss_count': len(misses),
|
||||
'avg_confidence': sum(m['confidence'] for m in misses) / len(misses)
|
||||
}
|
||||
for node_id, misses in node_near_misses.items()
|
||||
]
|
||||
|
||||
return sorted(problematic, key=lambda x: x['near_miss_count'], reverse=True)
|
||||
|
||||
def _recommend_thresholds(self) -> Dict[str, Any]:
|
||||
"""Recommander des ajustements de seuil."""
|
||||
confidences = [
|
||||
r['matching_results']['best_confidence']
|
||||
for r in self.reports
|
||||
]
|
||||
|
||||
# Calculer le percentile 90 des confidences
|
||||
sorted_conf = sorted(confidences)
|
||||
p90_index = int(len(sorted_conf) * 0.9)
|
||||
p90 = sorted_conf[p90_index] if sorted_conf else 0.85
|
||||
|
||||
current_threshold = self.reports[0]['matching_results']['threshold']
|
||||
|
||||
recommendations = {
|
||||
'current_threshold': current_threshold,
|
||||
'p90_confidence': p90,
|
||||
'recommended_threshold': max(0.70, min(0.90, p90 - 0.02))
|
||||
}
|
||||
|
||||
if p90 < current_threshold - 0.05:
|
||||
recommendations['action'] = "LOWER_THRESHOLD"
|
||||
recommendations['reason'] = f"90% des échecs ont une confiance < {p90:.3f}"
|
||||
elif p90 > current_threshold + 0.05:
|
||||
recommendations['action'] = "RAISE_THRESHOLD"
|
||||
recommendations['reason'] = "Beaucoup de faux positifs potentiels"
|
||||
else:
|
||||
recommendations['action'] = "KEEP_CURRENT"
|
||||
recommendations['reason'] = "Seuil approprié"
|
||||
|
||||
return recommendations
|
||||
|
||||
def _count_new_states(self) -> int:
|
||||
"""Compter les nouveaux états détectés (confiance < 0.70)."""
|
||||
return sum(
|
||||
1 for r in self.reports
|
||||
if r['matching_results']['best_confidence'] < 0.70
|
||||
)
|
||||
|
||||
def print_report(self, analysis: Dict[str, Any]):
|
||||
"""Afficher le rapport d'analyse."""
|
||||
print("\n" + "="*70)
|
||||
print("RAPPORT D'ANALYSE DES ÉCHECS DE MATCHING")
|
||||
print("="*70)
|
||||
|
||||
print(f"\n📊 Statistiques Générales")
|
||||
print(f" • Total d'échecs: {analysis['total_failures']}")
|
||||
print(f" • Période: {analysis['date_range']['first']} → {analysis['date_range']['last']}")
|
||||
|
||||
print(f"\n📈 Niveaux de Confiance")
|
||||
conf = analysis['confidence_stats']
|
||||
print(f" • Minimum: {conf['min']:.3f}")
|
||||
print(f" • Maximum: {conf['max']:.3f}")
|
||||
print(f" • Moyenne: {conf['avg']:.3f}")
|
||||
print(f" • < 0.70 (nouveaux états): {conf['below_70']}")
|
||||
print(f" • 0.70-0.85 (near miss): {conf['between_70_85']}")
|
||||
print(f" • > 0.85 (faux négatifs): {conf['above_85']}")
|
||||
|
||||
print(f"\n💡 Suggestions Générées")
|
||||
for suggestion_type, count in analysis['suggestions_summary'].items():
|
||||
print(f" • {suggestion_type}: {count}")
|
||||
|
||||
print(f"\n⚠️ Nodes Problématiques (Top 5)")
|
||||
for i, node in enumerate(analysis['problematic_nodes'][:5], 1):
|
||||
print(f" {i}. {node['node_label']} (ID: {node['node_id']})")
|
||||
print(f" - Near misses: {node['near_miss_count']}")
|
||||
print(f" - Confiance moyenne: {node['avg_confidence']:.3f}")
|
||||
|
||||
print(f"\n🎯 Recommandations de Seuil")
|
||||
thresh = analysis['threshold_recommendations']
|
||||
print(f" • Seuil actuel: {thresh['current_threshold']:.3f}")
|
||||
print(f" • P90 des confidences: {thresh['p90_confidence']:.3f}")
|
||||
print(f" • Seuil recommandé: {thresh['recommended_threshold']:.3f}")
|
||||
print(f" • Action: {thresh['action']}")
|
||||
print(f" • Raison: {thresh['reason']}")
|
||||
|
||||
print(f"\n🆕 Nouveaux États Détectés")
|
||||
print(f" • {analysis['new_states_detected']} états potentiellement nouveaux")
|
||||
print(f" (confiance < 0.70, nécessitent création de nodes)")
|
||||
|
||||
print("\n" + "="*70)
|
||||
|
||||
def export_detailed_report(self, output_path: str = "failed_matches_analysis.json"):
|
||||
"""Exporter un rapport détaillé en JSON."""
|
||||
analysis = self.analyze()
|
||||
|
||||
detailed_report = {
|
||||
'analysis': analysis,
|
||||
'individual_reports': [
|
||||
{
|
||||
'timestamp': r['timestamp'],
|
||||
'confidence': r['matching_results']['best_confidence'],
|
||||
'suggestions': r['suggestions'],
|
||||
'window_title': r['state']['window_title'],
|
||||
'screenshot_path': str(r['_dir'] / "screenshot.png")
|
||||
}
|
||||
for r in self.reports
|
||||
]
|
||||
}
|
||||
|
||||
with open(output_path, 'w') as f:
|
||||
json.dump(detailed_report, f, indent=2)
|
||||
|
||||
print(f"\n✓ Rapport détaillé exporté: {output_path}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Analyser les échecs de matching pour amélioration continue"
|
||||
)
|
||||
parser.add_argument(
|
||||
'--last',
|
||||
type=int,
|
||||
help="Analyser les N derniers échecs"
|
||||
)
|
||||
parser.add_argument(
|
||||
'--since-hours',
|
||||
type=int,
|
||||
help="Analyser les échecs des X dernières heures"
|
||||
)
|
||||
parser.add_argument(
|
||||
'--export',
|
||||
type=str,
|
||||
help="Exporter le rapport détaillé en JSON"
|
||||
)
|
||||
parser.add_argument(
|
||||
'--dir',
|
||||
type=str,
|
||||
default="data/failed_matches",
|
||||
help="Dossier contenant les échecs (défaut: data/failed_matches)"
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Créer l'analyseur
|
||||
analyzer = FailedMatchAnalyzer(failed_matches_dir=args.dir)
|
||||
|
||||
# Charger les rapports
|
||||
analyzer.load_reports(last_n=args.last, since_hours=args.since_hours)
|
||||
|
||||
if not analyzer.reports:
|
||||
print("\n❌ Aucun rapport à analyser")
|
||||
return 1
|
||||
|
||||
# Analyser
|
||||
analysis = analyzer.analyze()
|
||||
|
||||
# Afficher le rapport
|
||||
analyzer.print_report(analysis)
|
||||
|
||||
# Exporter si demandé
|
||||
if args.export:
|
||||
analyzer.export_detailed_report(args.export)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user