- Frontend v4 accessible sur réseau local (192.168.1.40) - Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard) - Ollama GPU fonctionnel - Self-healing interactif - Dashboard confiance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
328 lines
12 KiB
Python
Executable File
328 lines
12 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Analyseur des échecs de matching pour amélioration continue du système.
|
|
|
|
Ce script analyse les rapports d'échecs de matching et génère des statistiques
|
|
et recommandations pour améliorer le graphe de workflow.
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from datetime import datetime, timedelta
|
|
from typing import List, Dict, Any
|
|
from collections import Counter, defaultdict
|
|
import argparse
|
|
|
|
|
|
class FailedMatchAnalyzer:
|
|
"""Analyseur des échecs de matching."""
|
|
|
|
def __init__(self, failed_matches_dir: str = "data/failed_matches"):
|
|
self.failed_matches_dir = Path(failed_matches_dir)
|
|
self.reports: List[Dict[str, Any]] = []
|
|
|
|
def load_reports(self, last_n: int = None, since_hours: int = None):
|
|
"""
|
|
Charger les rapports d'échecs.
|
|
|
|
Args:
|
|
last_n: Charger les N derniers rapports
|
|
since_hours: Charger les rapports des X dernières heures
|
|
"""
|
|
if not self.failed_matches_dir.exists():
|
|
print(f"⚠️ Aucun dossier d'échecs trouvé: {self.failed_matches_dir}")
|
|
return
|
|
|
|
# Lister tous les dossiers d'échecs
|
|
match_dirs = sorted(
|
|
[d for d in self.failed_matches_dir.iterdir() if d.is_dir()],
|
|
key=lambda x: x.name,
|
|
reverse=True
|
|
)
|
|
|
|
if not match_dirs:
|
|
print("⚠️ Aucun échec de matching enregistré")
|
|
return
|
|
|
|
# Filtrer par date si nécessaire
|
|
if since_hours:
|
|
cutoff = datetime.now() - timedelta(hours=since_hours)
|
|
match_dirs = [
|
|
d for d in match_dirs
|
|
if self._parse_timestamp(d.name) >= cutoff
|
|
]
|
|
|
|
# Limiter le nombre si nécessaire
|
|
if last_n:
|
|
match_dirs = match_dirs[:last_n]
|
|
|
|
# Charger les rapports
|
|
for match_dir in match_dirs:
|
|
report_path = match_dir / "report.json"
|
|
if report_path.exists():
|
|
try:
|
|
with open(report_path, 'r') as f:
|
|
report = json.load(f)
|
|
report['_dir'] = match_dir
|
|
self.reports.append(report)
|
|
except Exception as e:
|
|
print(f"⚠️ Erreur lors du chargement de {report_path}: {e}")
|
|
|
|
print(f"✓ {len(self.reports)} rapports chargés")
|
|
|
|
def _parse_timestamp(self, dirname: str) -> datetime:
|
|
"""Parser le timestamp depuis le nom du dossier."""
|
|
try:
|
|
# Format: failed_match_20251123_143052
|
|
timestamp_str = dirname.replace("failed_match_", "")
|
|
return datetime.strptime(timestamp_str, "%Y%m%d_%H%M%S")
|
|
except:
|
|
return datetime.min
|
|
|
|
def analyze(self) -> Dict[str, Any]:
|
|
"""Analyser tous les rapports et générer des statistiques."""
|
|
if not self.reports:
|
|
return {}
|
|
|
|
analysis = {
|
|
'total_failures': len(self.reports),
|
|
'date_range': self._get_date_range(),
|
|
'confidence_stats': self._analyze_confidence(),
|
|
'suggestions_summary': self._analyze_suggestions(),
|
|
'problematic_nodes': self._identify_problematic_nodes(),
|
|
'threshold_recommendations': self._recommend_thresholds(),
|
|
'new_states_detected': self._count_new_states()
|
|
}
|
|
|
|
return analysis
|
|
|
|
def _get_date_range(self) -> Dict[str, str]:
|
|
"""Obtenir la plage de dates des rapports."""
|
|
timestamps = [
|
|
datetime.strptime(r['timestamp'], "%Y%m%d_%H%M%S")
|
|
for r in self.reports
|
|
]
|
|
return {
|
|
'first': min(timestamps).strftime("%Y-%m-%d %H:%M:%S"),
|
|
'last': max(timestamps).strftime("%Y-%m-%d %H:%M:%S")
|
|
}
|
|
|
|
def _analyze_confidence(self) -> Dict[str, Any]:
|
|
"""Analyser les niveaux de confiance."""
|
|
confidences = [
|
|
r['matching_results']['best_confidence']
|
|
for r in self.reports
|
|
]
|
|
|
|
return {
|
|
'min': min(confidences),
|
|
'max': max(confidences),
|
|
'avg': sum(confidences) / len(confidences),
|
|
'below_70': sum(1 for c in confidences if c < 0.70),
|
|
'between_70_85': sum(1 for c in confidences if 0.70 <= c < 0.85),
|
|
'above_85': sum(1 for c in confidences if c >= 0.85)
|
|
}
|
|
|
|
def _analyze_suggestions(self) -> Dict[str, int]:
|
|
"""Compter les types de suggestions."""
|
|
suggestion_types = Counter()
|
|
|
|
for report in self.reports:
|
|
for suggestion in report.get('suggestions', []):
|
|
# Extraire le type de suggestion (avant le ':')
|
|
suggestion_type = suggestion.split(':')[0]
|
|
suggestion_types[suggestion_type] += 1
|
|
|
|
return dict(suggestion_types)
|
|
|
|
def _identify_problematic_nodes(self) -> List[Dict[str, Any]]:
|
|
"""Identifier les nodes qui causent le plus de confusion."""
|
|
node_near_misses = defaultdict(list)
|
|
|
|
for report in self.reports:
|
|
similarities = report['matching_results'].get('similarities', [])
|
|
if similarities:
|
|
best = similarities[0]
|
|
confidence = best['similarity']
|
|
# Near miss: entre 0.70 et threshold
|
|
if 0.70 <= confidence < report['matching_results']['threshold']:
|
|
node_near_misses[best['node_id']].append({
|
|
'confidence': confidence,
|
|
'label': best['node_label'],
|
|
'timestamp': report['timestamp']
|
|
})
|
|
|
|
# Trier par nombre de near misses
|
|
problematic = [
|
|
{
|
|
'node_id': node_id,
|
|
'node_label': misses[0]['label'],
|
|
'near_miss_count': len(misses),
|
|
'avg_confidence': sum(m['confidence'] for m in misses) / len(misses)
|
|
}
|
|
for node_id, misses in node_near_misses.items()
|
|
]
|
|
|
|
return sorted(problematic, key=lambda x: x['near_miss_count'], reverse=True)
|
|
|
|
def _recommend_thresholds(self) -> Dict[str, Any]:
|
|
"""Recommander des ajustements de seuil."""
|
|
confidences = [
|
|
r['matching_results']['best_confidence']
|
|
for r in self.reports
|
|
]
|
|
|
|
# Calculer le percentile 90 des confidences
|
|
sorted_conf = sorted(confidences)
|
|
p90_index = int(len(sorted_conf) * 0.9)
|
|
p90 = sorted_conf[p90_index] if sorted_conf else 0.85
|
|
|
|
current_threshold = self.reports[0]['matching_results']['threshold']
|
|
|
|
recommendations = {
|
|
'current_threshold': current_threshold,
|
|
'p90_confidence': p90,
|
|
'recommended_threshold': max(0.70, min(0.90, p90 - 0.02))
|
|
}
|
|
|
|
if p90 < current_threshold - 0.05:
|
|
recommendations['action'] = "LOWER_THRESHOLD"
|
|
recommendations['reason'] = f"90% des échecs ont une confiance < {p90:.3f}"
|
|
elif p90 > current_threshold + 0.05:
|
|
recommendations['action'] = "RAISE_THRESHOLD"
|
|
recommendations['reason'] = "Beaucoup de faux positifs potentiels"
|
|
else:
|
|
recommendations['action'] = "KEEP_CURRENT"
|
|
recommendations['reason'] = "Seuil approprié"
|
|
|
|
return recommendations
|
|
|
|
def _count_new_states(self) -> int:
|
|
"""Compter les nouveaux états détectés (confiance < 0.70)."""
|
|
return sum(
|
|
1 for r in self.reports
|
|
if r['matching_results']['best_confidence'] < 0.70
|
|
)
|
|
|
|
def print_report(self, analysis: Dict[str, Any]):
|
|
"""Afficher le rapport d'analyse."""
|
|
print("\n" + "="*70)
|
|
print("RAPPORT D'ANALYSE DES ÉCHECS DE MATCHING")
|
|
print("="*70)
|
|
|
|
print(f"\n📊 Statistiques Générales")
|
|
print(f" • Total d'échecs: {analysis['total_failures']}")
|
|
print(f" • Période: {analysis['date_range']['first']} → {analysis['date_range']['last']}")
|
|
|
|
print(f"\n📈 Niveaux de Confiance")
|
|
conf = analysis['confidence_stats']
|
|
print(f" • Minimum: {conf['min']:.3f}")
|
|
print(f" • Maximum: {conf['max']:.3f}")
|
|
print(f" • Moyenne: {conf['avg']:.3f}")
|
|
print(f" • < 0.70 (nouveaux états): {conf['below_70']}")
|
|
print(f" • 0.70-0.85 (near miss): {conf['between_70_85']}")
|
|
print(f" • > 0.85 (faux négatifs): {conf['above_85']}")
|
|
|
|
print(f"\n💡 Suggestions Générées")
|
|
for suggestion_type, count in analysis['suggestions_summary'].items():
|
|
print(f" • {suggestion_type}: {count}")
|
|
|
|
print(f"\n⚠️ Nodes Problématiques (Top 5)")
|
|
for i, node in enumerate(analysis['problematic_nodes'][:5], 1):
|
|
print(f" {i}. {node['node_label']} (ID: {node['node_id']})")
|
|
print(f" - Near misses: {node['near_miss_count']}")
|
|
print(f" - Confiance moyenne: {node['avg_confidence']:.3f}")
|
|
|
|
print(f"\n🎯 Recommandations de Seuil")
|
|
thresh = analysis['threshold_recommendations']
|
|
print(f" • Seuil actuel: {thresh['current_threshold']:.3f}")
|
|
print(f" • P90 des confidences: {thresh['p90_confidence']:.3f}")
|
|
print(f" • Seuil recommandé: {thresh['recommended_threshold']:.3f}")
|
|
print(f" • Action: {thresh['action']}")
|
|
print(f" • Raison: {thresh['reason']}")
|
|
|
|
print(f"\n🆕 Nouveaux États Détectés")
|
|
print(f" • {analysis['new_states_detected']} états potentiellement nouveaux")
|
|
print(f" (confiance < 0.70, nécessitent création de nodes)")
|
|
|
|
print("\n" + "="*70)
|
|
|
|
def export_detailed_report(self, output_path: str = "failed_matches_analysis.json"):
|
|
"""Exporter un rapport détaillé en JSON."""
|
|
analysis = self.analyze()
|
|
|
|
detailed_report = {
|
|
'analysis': analysis,
|
|
'individual_reports': [
|
|
{
|
|
'timestamp': r['timestamp'],
|
|
'confidence': r['matching_results']['best_confidence'],
|
|
'suggestions': r['suggestions'],
|
|
'window_title': r['state']['window_title'],
|
|
'screenshot_path': str(r['_dir'] / "screenshot.png")
|
|
}
|
|
for r in self.reports
|
|
]
|
|
}
|
|
|
|
with open(output_path, 'w') as f:
|
|
json.dump(detailed_report, f, indent=2)
|
|
|
|
print(f"\n✓ Rapport détaillé exporté: {output_path}")
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Analyser les échecs de matching pour amélioration continue"
|
|
)
|
|
parser.add_argument(
|
|
'--last',
|
|
type=int,
|
|
help="Analyser les N derniers échecs"
|
|
)
|
|
parser.add_argument(
|
|
'--since-hours',
|
|
type=int,
|
|
help="Analyser les échecs des X dernières heures"
|
|
)
|
|
parser.add_argument(
|
|
'--export',
|
|
type=str,
|
|
help="Exporter le rapport détaillé en JSON"
|
|
)
|
|
parser.add_argument(
|
|
'--dir',
|
|
type=str,
|
|
default="data/failed_matches",
|
|
help="Dossier contenant les échecs (défaut: data/failed_matches)"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
# Créer l'analyseur
|
|
analyzer = FailedMatchAnalyzer(failed_matches_dir=args.dir)
|
|
|
|
# Charger les rapports
|
|
analyzer.load_reports(last_n=args.last, since_hours=args.since_hours)
|
|
|
|
if not analyzer.reports:
|
|
print("\n❌ Aucun rapport à analyser")
|
|
return 1
|
|
|
|
# Analyser
|
|
analysis = analyzer.analyze()
|
|
|
|
# Afficher le rapport
|
|
analyzer.print_report(analysis)
|
|
|
|
# Exporter si demandé
|
|
if args.export:
|
|
analyzer.export_detailed_report(args.export)
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == '__main__':
|
|
sys.exit(main())
|