Files
rpa_vision_v3/tests/unit/test_circuit_breaker.py
Dom a27b74cf22 v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution
- Frontend v4 accessible sur réseau local (192.168.1.40)
- Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard)
- Ollama GPU fonctionnel
- Self-healing interactif
- Dashboard confiance

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 11:23:51 +01:00

770 lines
33 KiB
Python

"""
Tests unitaires pour CircuitBreaker - Fiche #22 Auto-Heal Hybride
Tests pour le mécanisme de circuit breaker avec fenêtres glissantes,
seuils de déclenchement et gestion des échecs.
Tests de fonctionnalité réelle sans simulation - utilise des données
et scénarios authentiques pour valider le comportement en production.
Auteur: Dom, Alice Kiro - 23 décembre 2024
"""
import pytest
import time
import tempfile
import json
from pathlib import Path
from datetime import datetime, timedelta
from core.system.circuit_breaker import CircuitBreaker
from core.system.models import SimpleFailureEvent
class TestCircuitBreaker:
"""Tests pour la classe CircuitBreaker"""
def setup_method(self):
"""Setup pour chaque test"""
self.policy = {
'step_fail_streak_to_degraded': 3,
'workflow_fail_window_s': 600,
'workflow_fail_max_in_window': 10,
'global_fail_max_in_window': 30,
'success_reset_threshold': 2
}
self.circuit_breaker = CircuitBreaker(self.policy)
# Create temporary directory for persistence testing
self.temp_dir = Path(tempfile.mkdtemp())
def teardown_method(self):
"""Cleanup après chaque test"""
import shutil
if self.temp_dir.exists():
shutil.rmtree(self.temp_dir)
def _create_realistic_failure_scenario(self, workflow_id: str, failure_pattern: list):
"""
Helper pour créer des scénarios d'échec réalistes.
Args:
workflow_id: ID du workflow
failure_pattern: Liste de tuples (step_id, failure_type, should_succeed)
"""
for step_id, failure_type, should_succeed in failure_pattern:
if should_succeed:
self.circuit_breaker.record_success(workflow_id, step_id)
else:
self.circuit_breaker.record_failure(workflow_id, step_id, failure_type)
# Petite pause pour des timestamps réalistes
time.sleep(0.001)
def _save_circuit_breaker_state(self, filename: str) -> Path:
"""Save circuit breaker state to file for persistence testing"""
file_path = self.temp_dir / filename
status = self.circuit_breaker.get_status_summary()
# Add failure data for complete state
state_data = {
'status': status,
'step_consecutive_failures': {
key: [failure.to_dict() for failure in failures]
for key, failures in self.circuit_breaker.step_consecutive_failures.items()
},
'step_success_counts': dict(self.circuit_breaker.step_success_counts),
'workflow_windows': {
wf_id: [failure.to_dict() for failure in window.failures]
for wf_id, window in self.circuit_breaker.workflow_windows.items()
},
'global_failures': [failure.to_dict() for failure in self.circuit_breaker.global_window.failures]
}
with open(file_path, 'w') as f:
json.dump(state_data, f, indent=2)
return file_path
def _load_and_verify_circuit_breaker_state(self, file_path: Path):
"""Load and verify circuit breaker state from file"""
with open(file_path, 'r') as f:
state_data = json.load(f)
# Verify the saved state matches current state
current_status = self.circuit_breaker.get_status_summary()
saved_status = state_data['status']
assert current_status['global_stats']['global_failures_in_window'] == \
saved_status['global_stats']['global_failures_in_window']
assert current_status['global_stats']['workflows_with_failures'] == \
saved_status['global_stats']['workflows_with_failures']
return state_data
def test_initialization(self):
"""Test d'initialisation du CircuitBreaker"""
assert self.circuit_breaker.policy == self.policy
assert len(self.circuit_breaker.step_consecutive_failures) == 0
assert len(self.circuit_breaker.workflow_windows) == 0
assert self.circuit_breaker.global_window.window_duration_s == 600
def test_record_failure(self):
"""Test d'enregistrement d'échec"""
workflow_id = "test_workflow"
step_id = "step_1"
failure_type = "TARGET_NOT_FOUND"
# Enregistrer un échec
self.circuit_breaker.record_failure(workflow_id, step_id, failure_type)
# Vérifier l'enregistrement au niveau step
step_key = f"{workflow_id}:{step_id}"
assert len(self.circuit_breaker.step_consecutive_failures[step_key]) == 1
assert self.circuit_breaker.step_consecutive_failures[step_key][0].failure_type == failure_type
# Vérifier l'enregistrement au niveau workflow
assert workflow_id in self.circuit_breaker.workflow_windows
assert self.circuit_breaker.workflow_windows[workflow_id].get_failure_count() == 1
# Vérifier l'enregistrement au niveau global
assert self.circuit_breaker.global_window.get_failure_count() == 1
def test_record_success(self):
"""Test d'enregistrement de succès"""
workflow_id = "test_workflow"
step_id = "step_1"
# Enregistrer quelques échecs d'abord
for i in range(2):
self.circuit_breaker.record_failure(workflow_id, step_id, "TARGET_NOT_FOUND")
step_key = f"{workflow_id}:{step_id}"
assert len(self.circuit_breaker.step_consecutive_failures[step_key]) == 2
# Enregistrer un succès (pas assez pour reset)
self.circuit_breaker.record_success(workflow_id, step_id)
assert len(self.circuit_breaker.step_consecutive_failures[step_key]) == 2
assert self.circuit_breaker.step_success_counts[step_key] == 1
# Enregistrer un autre succès (devrait reset)
self.circuit_breaker.record_success(workflow_id, step_id)
assert len(self.circuit_breaker.step_consecutive_failures[step_key]) == 0
assert self.circuit_breaker.step_success_counts[step_key] == 2
def test_should_trigger_degraded_with_realistic_scenario(self):
"""Test du déclenchement du mode DEGRADED avec scénario réaliste"""
workflow_id = "customer_service_workflow"
step_id = "ticket_creation"
# Scénario réaliste : interface qui devient instable
failure_pattern = [
(step_id, "TARGET_NOT_FOUND", False), # Premier échec
(step_id, "TARGET_NOT_FOUND", False), # Deuxième échec
("other_step", "TIMEOUT", False), # Échec sur autre étape
(step_id, "POSTCONDITION_FAILED", False), # Troisième échec sur step_id
]
# Pas assez d'échecs au début
assert self.circuit_breaker.should_trigger_degraded(workflow_id, step_id) is False
# Appliquer le pattern d'échecs
self._create_realistic_failure_scenario(workflow_id, failure_pattern)
# Maintenant devrait déclencher DEGRADED (3 échecs consécutifs sur step_id)
assert self.circuit_breaker.should_trigger_degraded(workflow_id, step_id) is True
# Vérifier que l'autre étape n'est pas affectée
assert self.circuit_breaker.should_trigger_degraded(workflow_id, "other_step") is False
def test_should_trigger_quarantine(self):
"""Test du déclenchement du mode QUARANTINED"""
workflow_id = "test_workflow"
# Pas d'échecs
assert self.circuit_breaker.should_trigger_quarantine(workflow_id) is False
# Enregistrer 9 échecs (pas assez)
for i in range(9):
self.circuit_breaker.record_failure(workflow_id, f"step_{i}", "TARGET_NOT_FOUND")
assert self.circuit_breaker.should_trigger_quarantine(workflow_id) is False
# Enregistrer le 10ème échec (seuil atteint)
self.circuit_breaker.record_failure(workflow_id, "step_10", "POSTCONDITION_FAILED")
assert self.circuit_breaker.should_trigger_quarantine(workflow_id) is True
def test_should_trigger_global_pause(self):
"""Test du déclenchement du PAUSE global"""
# Pas assez d'échecs globaux
assert self.circuit_breaker.should_trigger_global_pause() is False
# Enregistrer 29 échecs globaux (pas assez)
for i in range(29):
self.circuit_breaker.record_failure(f"workflow_{i % 5}", f"step_{i}", "TARGET_NOT_FOUND")
assert self.circuit_breaker.should_trigger_global_pause() is False
# Enregistrer le 30ème échec (seuil atteint)
self.circuit_breaker.record_failure("workflow_global", "step_final", "TIMEOUT")
assert self.circuit_breaker.should_trigger_global_pause() is True
def test_get_failure_counts(self):
"""Test de récupération des compteurs d'échecs"""
workflow_id = "test_workflow"
# Enregistrer des échecs sur différentes étapes
self.circuit_breaker.record_failure(workflow_id, "step_1", "TARGET_NOT_FOUND")
self.circuit_breaker.record_failure(workflow_id, "step_1", "TARGET_NOT_FOUND")
self.circuit_breaker.record_failure(workflow_id, "step_2", "POSTCONDITION_FAILED")
counts = self.circuit_breaker.get_failure_counts(workflow_id)
assert counts['step_consecutive']['step_1'] == 2
assert counts['step_consecutive']['step_2'] == 1
assert counts['workflow_window'] == 3
assert counts['global_window'] == 3
assert counts['window_duration_s'] == 600
def test_get_step_failure_history(self):
"""Test de récupération de l'historique des échecs d'étape"""
workflow_id = "test_workflow"
step_id = "step_1"
# Enregistrer plusieurs échecs avec des types réalistes
realistic_failure_sequence = [
"TARGET_NOT_FOUND", # Interface a changé
"POSTCONDITION_FAILED", # Validation échouée
"TIMEOUT" # Réponse lente du système
]
for failure_type in realistic_failure_sequence:
self.circuit_breaker.record_failure(workflow_id, step_id, failure_type)
# Petite pause pour avoir des timestamps différents
time.sleep(0.01)
history = self.circuit_breaker.get_step_failure_history(workflow_id, step_id)
assert len(history) == 3
assert [f.failure_type for f in history] == realistic_failure_sequence
# Vérifier que les timestamps sont dans l'ordre chronologique
timestamps = [f.timestamp for f in history]
assert timestamps == sorted(timestamps)
# Test avec limite
limited_history = self.circuit_breaker.get_step_failure_history(workflow_id, step_id, limit=2)
assert len(limited_history) == 2
assert [f.failure_type for f in limited_history] == realistic_failure_sequence[-2:]
def test_get_workflow_failure_types(self):
"""Test de récupération des types d'échecs par workflow"""
workflow_id = "test_workflow"
# Enregistrer différents types d'échecs
self.circuit_breaker.record_failure(workflow_id, "step_1", "TARGET_NOT_FOUND")
self.circuit_breaker.record_failure(workflow_id, "step_2", "TARGET_NOT_FOUND")
self.circuit_breaker.record_failure(workflow_id, "step_3", "POSTCONDITION_FAILED")
self.circuit_breaker.record_failure(workflow_id, "step_4", "TIMEOUT")
failure_types = self.circuit_breaker.get_workflow_failure_types(workflow_id)
assert failure_types["TARGET_NOT_FOUND"] == 2
assert failure_types["POSTCONDITION_FAILED"] == 1
assert failure_types["TIMEOUT"] == 1
def test_cleanup_old_data_with_real_time_progression(self):
"""Test du nettoyage des anciennes données avec progression temporelle réelle"""
workflow_id = "test_workflow"
step_id = "step_1"
# Créer un circuit breaker avec une fenêtre très courte pour le test
short_window_policy = {
'step_fail_streak_to_degraded': 3,
'workflow_fail_window_s': 2, # 2 secondes seulement
'workflow_fail_max_in_window': 10,
'global_fail_max_in_window': 30,
'success_reset_threshold': 2
}
cb = CircuitBreaker(short_window_policy)
# Enregistrer des échecs
cb.record_failure(workflow_id, step_id, "TARGET_NOT_FOUND")
cb.record_failure(workflow_id, step_id, "POSTCONDITION_FAILED")
# Vérifier que les échecs sont présents
step_key = f"{workflow_id}:{step_id}"
assert len(cb.step_consecutive_failures[step_key]) == 2
assert cb.workflow_windows[workflow_id].get_failure_count() == 2
assert cb.global_window.get_failure_count() == 2
# Attendre que la fenêtre expire (3 secondes pour être sûr)
time.sleep(3)
# Nettoyer les anciennes données
cb.cleanup_old_data()
# Vérifier que les échecs dans les fenêtres ont été nettoyés
assert cb.workflow_windows[workflow_id].get_failure_count() == 0
assert cb.global_window.get_failure_count() == 0
# Les échecs consécutifs restent (ils ont leur propre logique de nettoyage)
# mais ils seront nettoyés après 1 heure selon la logique réelle
def test_reset_step_failures(self):
"""Test de réinitialisation manuelle des échecs d'étape"""
workflow_id = "test_workflow"
step_id = "step_1"
# Enregistrer des échecs
for i in range(3):
self.circuit_breaker.record_failure(workflow_id, step_id, "TARGET_NOT_FOUND")
step_key = f"{workflow_id}:{step_id}"
assert len(self.circuit_breaker.step_consecutive_failures[step_key]) == 3
# Réinitialiser
self.circuit_breaker.reset_step_failures(workflow_id, step_id)
# Vérifier la réinitialisation
assert step_key not in self.circuit_breaker.step_consecutive_failures
assert self.circuit_breaker.step_success_counts[step_key] == 0
def test_reset_workflow_failures(self):
"""Test de réinitialisation manuelle des échecs de workflow"""
workflow_id = "test_workflow"
# Enregistrer des échecs sur plusieurs étapes
for i in range(5):
self.circuit_breaker.record_failure(workflow_id, f"step_{i}", "TARGET_NOT_FOUND")
# Vérifier les échecs
assert workflow_id in self.circuit_breaker.workflow_windows
assert self.circuit_breaker.workflow_windows[workflow_id].get_failure_count() == 5
# Vérifier les échecs consécutifs
step_keys = [f"{workflow_id}:step_{i}" for i in range(5)]
for step_key in step_keys:
assert len(self.circuit_breaker.step_consecutive_failures[step_key]) == 1
# Réinitialiser
self.circuit_breaker.reset_workflow_failures(workflow_id)
# Vérifier la réinitialisation
assert len(self.circuit_breaker.workflow_windows[workflow_id].failures) == 0
for step_key in step_keys:
assert step_key not in self.circuit_breaker.step_consecutive_failures
def test_failure_event_integration(self):
"""Test d'intégration avec de vrais objets SimpleFailureEvent"""
workflow_id = "integration_test_workflow"
step_id = "data_validation"
# Créer des SimpleFailureEvent directement (comme le ferait le système réel)
failure_events = [
SimpleFailureEvent(
timestamp=datetime.now(),
workflow_id=workflow_id,
step_id=step_id,
failure_type="VALIDATION_ERROR"
),
SimpleFailureEvent(
timestamp=datetime.now(),
workflow_id=workflow_id,
step_id=step_id,
failure_type="TARGET_NOT_FOUND"
),
SimpleFailureEvent(
timestamp=datetime.now(),
workflow_id=workflow_id,
step_id=step_id,
failure_type="TIMEOUT"
)
]
# Enregistrer les échecs via l'interface normale
for event in failure_events:
self.circuit_breaker.record_failure(
event.workflow_id,
event.step_id,
event.failure_type
)
# Vérifier que les objets SimpleFailureEvent sont correctement stockés
step_key = f"{workflow_id}:{step_id}"
stored_failures = self.circuit_breaker.step_consecutive_failures[step_key]
assert len(stored_failures) == 3
for i, stored_failure in enumerate(stored_failures):
assert isinstance(stored_failure, SimpleFailureEvent)
assert stored_failure.workflow_id == workflow_id
assert stored_failure.step_id == step_id
assert stored_failure.failure_type == failure_events[i].failure_type
# Vérifier que le circuit breaker fonctionne avec ces objets réels
assert self.circuit_breaker.should_trigger_degraded(workflow_id, step_id) is True
def test_state_persistence_and_recovery(self):
"""Test de persistance et récupération d'état réel"""
workflow_id = "persistent_workflow"
# Créer un état complexe avec plusieurs types d'échecs
realistic_failures = [
("login", "TARGET_NOT_FOUND"),
("form_fill", "VALIDATION_ERROR"),
("submit", "TIMEOUT"),
("login", "TARGET_NOT_FOUND"), # 2ème échec login
("confirmation", "POSTCONDITION_FAILED"),
]
for step_id, failure_type in realistic_failures:
self.circuit_breaker.record_failure(workflow_id, step_id, failure_type)
# Ajouter quelques succès
self.circuit_breaker.record_success(workflow_id, "other_step")
# Sauvegarder l'état dans un fichier réel
state_file = self._save_circuit_breaker_state("circuit_breaker_state.json")
# Vérifier que le fichier existe et contient les bonnes données
assert state_file.exists()
state_data = self._load_and_verify_circuit_breaker_state(state_file)
# Vérifier que les données sauvegardées sont complètes
assert len(state_data['step_consecutive_failures']) > 0
assert workflow_id in [failure['workflow_id'] for failures in state_data['step_consecutive_failures'].values() for failure in failures]
assert len(state_data['global_failures']) == len(realistic_failures)
# Vérifier que les types d'échecs sont préservés
saved_failure_types = [failure['failure_type'] for failure in state_data['global_failures']]
expected_failure_types = [failure_type for _, failure_type in realistic_failures]
assert saved_failure_types == expected_failure_types
def test_get_status_summary(self):
"""Test du résumé de statut"""
# Enregistrer quelques échecs
self.circuit_breaker.record_failure("workflow_1", "step_1", "TARGET_NOT_FOUND")
self.circuit_breaker.record_failure("workflow_1", "step_2", "POSTCONDITION_FAILED")
self.circuit_breaker.record_failure("workflow_2", "step_1", "TARGET_NOT_FOUND")
status = self.circuit_breaker.get_status_summary()
# Vérifier la structure du statut
assert 'timestamp' in status
assert 'policy' in status
assert 'global_stats' in status
assert 'thresholds' in status
# Vérifier les statistiques
global_stats = status['global_stats']
assert global_stats['global_failures_in_window'] == 3
assert global_stats['workflows_with_failures'] == 2
assert global_stats['steps_with_consecutive_failures'] == 3
assert global_stats['global_failure_types']['TARGET_NOT_FOUND'] == 2
assert global_stats['global_failure_types']['POSTCONDITION_FAILED'] == 1
# Vérifier les seuils
thresholds = status['thresholds']
assert thresholds['step_consecutive_to_degraded'] == 3
assert thresholds['workflow_window_to_quarantine'] == 10
assert thresholds['global_window_to_pause'] == 30
assert thresholds['window_duration_s'] == 600
class TestCircuitBreakerRealWorldScenarios:
"""Tests de scénarios réels d'utilisation du CircuitBreaker"""
def test_realistic_workflow_failure_progression(self):
"""Test d'une progression réaliste d'échecs de workflow"""
# Configuration réaliste pour un environnement de production
production_policy = {
'step_fail_streak_to_degraded': 3,
'workflow_fail_window_s': 600, # 10 minutes
'workflow_fail_max_in_window': 10,
'global_fail_max_in_window': 50,
'success_reset_threshold': 2
}
cb = CircuitBreaker(production_policy)
# Simuler un workflow de facturation avec différents types d'échecs
workflow_id = "billing_workflow_v2"
# Échecs typiques dans un workflow de facturation
realistic_failures = [
("login_step", "TARGET_NOT_FOUND"), # Bouton login non trouvé
("customer_search", "TIMEOUT"), # Recherche client timeout
("invoice_form", "POSTCONDITION_FAILED"), # Formulaire non validé
("login_step", "TARGET_NOT_FOUND"), # Encore le bouton login
("payment_step", "TARGET_NOT_FOUND"), # Bouton paiement non trouvé
("login_step", "TARGET_NOT_FOUND"), # 3ème échec login -> DEGRADED
("customer_search", "TIMEOUT"), # Autre timeout
("invoice_form", "VALIDATION_ERROR"), # Erreur de validation
]
# Enregistrer les échecs de manière séquentielle
for step_id, failure_type in realistic_failures:
cb.record_failure(workflow_id, step_id, failure_type)
# Vérifier l'état après chaque échec
if step_id == "login_step":
step_key = f"{workflow_id}:{step_id}"
consecutive_count = len(cb.step_consecutive_failures[step_key])
if consecutive_count >= 3:
assert cb.should_trigger_degraded(workflow_id, step_id)
# Vérifier les statistiques finales
counts = cb.get_failure_counts(workflow_id)
assert counts['workflow_window'] == len(realistic_failures)
assert counts['step_consecutive']['login_step'] == 3
assert counts['step_consecutive']['customer_search'] == 2
# Vérifier les types d'échecs
failure_types = cb.get_workflow_failure_types(workflow_id)
assert failure_types["TARGET_NOT_FOUND"] == 4
assert failure_types["TIMEOUT"] == 2
assert failure_types["POSTCONDITION_FAILED"] == 1
assert failure_types["VALIDATION_ERROR"] == 1
def test_recovery_after_successful_executions(self):
"""Test de récupération après des exécutions réussies"""
cb = CircuitBreaker({
'step_fail_streak_to_degraded': 3,
'workflow_fail_window_s': 600,
'workflow_fail_max_in_window': 10,
'global_fail_max_in_window': 30,
'success_reset_threshold': 2
})
workflow_id = "data_entry_workflow"
step_id = "form_submission"
# Créer une situation proche du seuil DEGRADED
cb.record_failure(workflow_id, step_id, "TARGET_NOT_FOUND")
cb.record_failure(workflow_id, step_id, "TARGET_NOT_FOUND")
# Vérifier qu'on n'est pas encore en DEGRADED
assert not cb.should_trigger_degraded(workflow_id, step_id)
# Simuler des succès qui devraient réinitialiser les échecs
cb.record_success(workflow_id, step_id) # Premier succès
assert not cb.should_trigger_degraded(workflow_id, step_id)
cb.record_success(workflow_id, step_id) # Deuxième succès -> reset
# Vérifier que les échecs consécutifs ont été réinitialisés
step_key = f"{workflow_id}:{step_id}"
assert len(cb.step_consecutive_failures[step_key]) == 0
# Un nouvel échec ne devrait pas déclencher DEGRADED immédiatement
cb.record_failure(workflow_id, step_id, "TIMEOUT")
assert not cb.should_trigger_degraded(workflow_id, step_id)
def test_concurrent_workflows_realistic_load(self):
"""Test de charge réaliste avec plusieurs workflows concurrents"""
cb = CircuitBreaker({
'step_fail_streak_to_degraded': 3,
'workflow_fail_window_s': 300, # 5 minutes
'workflow_fail_max_in_window': 8,
'global_fail_max_in_window': 25,
'success_reset_threshold': 2
})
# Simuler plusieurs workflows typiques d'une entreprise
workflows = [
"invoice_processing",
"customer_onboarding",
"inventory_update",
"report_generation",
"email_automation"
]
# Simuler une charge de travail réaliste avec timestamps réels
total_failures = 0
failure_distribution = {}
for i in range(25): # 25 opérations pour assurer qu'au moins un workflow atteigne le seuil
workflow_id = workflows[i % len(workflows)]
step_id = f"step_{i % 3 + 1}" # 3 étapes par workflow
# Simuler différents types d'échecs avec des probabilités réalistes
failure_types = ["TARGET_NOT_FOUND", "TIMEOUT", "POSTCONDITION_FAILED"]
failure_type = failure_types[i % len(failure_types)]
cb.record_failure(workflow_id, step_id, failure_type)
total_failures += 1
# Track distribution for verification
if workflow_id not in failure_distribution:
failure_distribution[workflow_id] = 0
failure_distribution[workflow_id] += 1
# Add realistic timing between failures
time.sleep(0.001)
# Vérifier que le système global fonctionne correctement
assert cb.global_window.get_failure_count() == total_failures
# Vérifier la distribution des échecs
assert len(failure_distribution) == len(workflows)
assert sum(failure_distribution.values()) == total_failures
# Vérifier que certains workflows ont atteint des seuils réalistes
workflow_failure_counts = {}
for wf in workflows:
workflow_failure_counts[wf] = cb.workflow_windows[wf].get_failure_count() if wf in cb.workflow_windows else 0
# Avec 25 échecs sur 5 workflows, la distribution devrait être équitable
max_failures = max(workflow_failure_counts.values())
min_failures = min(workflow_failure_counts.values())
# Vérifier qu'il y a une distribution raisonnable
assert max_failures >= 4 # Au moins 4 échecs pour le workflow le plus touché
assert min_failures >= 3 # Au moins 3 échecs pour le workflow le moins touché
# Vérifier les types d'échecs globaux
global_failure_types = cb.global_window.get_failure_types()
assert len(global_failure_types) == 3 # Les 3 types utilisés
for failure_type in ["TARGET_NOT_FOUND", "TIMEOUT", "POSTCONDITION_FAILED"]:
assert failure_type in global_failure_types
assert global_failure_types[failure_type] > 0
def test_mixed_success_failure_realistic_pattern(self):
"""Test avec un pattern réaliste de succès et échecs mélangés"""
cb = CircuitBreaker({
'step_fail_streak_to_degraded': 4, # Plus tolérant
'workflow_fail_window_s': 600,
'workflow_fail_max_in_window': 12,
'global_fail_max_in_window': 40,
'success_reset_threshold': 3 # Nécessite plus de succès
})
workflow_id = "document_processing"
step_id = "ocr_extraction"
# Pattern réaliste : échecs intermittents avec succès
operations = [
("failure", "TARGET_NOT_FOUND"),
("success", None),
("failure", "TIMEOUT"),
("success", None),
("failure", "POSTCONDITION_FAILED"),
("failure", "TARGET_NOT_FOUND"), # 2 échecs consécutifs
("success", None),
("success", None),
("failure", "VALIDATION_ERROR"),
("success", None),
("success", None),
("success", None), # 3 succès -> devrait reset
("failure", "TARGET_NOT_FOUND"), # Nouveau cycle
]
for operation_type, failure_type in operations:
if operation_type == "failure":
cb.record_failure(workflow_id, step_id, failure_type)
else:
cb.record_success(workflow_id, step_id)
# Après ce pattern, les échecs consécutifs devraient être réinitialisés
step_key = f"{workflow_id}:{step_id}"
consecutive_failures = len(cb.step_consecutive_failures[step_key])
# Devrait avoir seulement le dernier échec après le reset
assert consecutive_failures == 1
assert not cb.should_trigger_degraded(workflow_id, step_id)
# Mais les échecs devraient toujours être dans la fenêtre du workflow
counts = cb.get_failure_counts(workflow_id)
assert counts['workflow_window'] == 6 # Total des échecs
class TestCircuitBreakerIntegration:
"""Tests d'intégration du CircuitBreaker"""
def test_sliding_window_behavior_with_real_time(self):
"""Test du comportement des fenêtres glissantes avec temps réel"""
policy = {
'step_fail_streak_to_degraded': 3,
'workflow_fail_window_s': 2, # 2 secondes pour test rapide
'workflow_fail_max_in_window': 5,
'global_fail_max_in_window': 10
}
circuit_breaker = CircuitBreaker(policy)
workflow_id = "test_workflow"
# Enregistrer des échecs récents
for i in range(3):
circuit_breaker.record_failure(workflow_id, f"step_{i}", "TARGET_NOT_FOUND")
# Vérifier que les échecs sont comptés
counts = circuit_breaker.get_failure_counts(workflow_id)
assert counts['workflow_window'] == 3
# Attendre que la fenêtre expire
time.sleep(3)
# Les échecs devraient maintenant être expirés
counts_after = circuit_breaker.get_failure_counts(workflow_id)
assert counts_after['workflow_window'] == 0
# Ne devrait pas déclencher quarantine (échecs expirés)
assert circuit_breaker.should_trigger_quarantine(workflow_id) is False
def test_multiple_workflows_isolation(self):
"""Test de l'isolation entre workflows"""
policy = {
'step_fail_streak_to_degraded': 2,
'workflow_fail_window_s': 600,
'workflow_fail_max_in_window': 3,
'global_fail_max_in_window': 10
}
circuit_breaker = CircuitBreaker(policy)
# Enregistrer des échecs pour workflow_1
for i in range(2):
circuit_breaker.record_failure("workflow_1", "step_1", "TARGET_NOT_FOUND")
# Enregistrer des échecs pour workflow_2
circuit_breaker.record_failure("workflow_2", "step_1", "POSTCONDITION_FAILED")
# Vérifier l'isolation
assert circuit_breaker.should_trigger_degraded("workflow_1", "step_1") is True
assert circuit_breaker.should_trigger_degraded("workflow_2", "step_1") is False
assert circuit_breaker.should_trigger_quarantine("workflow_1") is False
assert circuit_breaker.should_trigger_quarantine("workflow_2") is False
# Les échecs contribuent au global
assert circuit_breaker.global_window.get_failure_count() == 3
def test_policy_configuration_impact(self):
"""Test de l'impact de la configuration des politiques"""
# Configuration stricte
strict_policy = {
'step_fail_streak_to_degraded': 1,
'workflow_fail_window_s': 600,
'workflow_fail_max_in_window': 2,
'global_fail_max_in_window': 5
}
strict_cb = CircuitBreaker(strict_policy)
# Configuration permissive
permissive_policy = {
'step_fail_streak_to_degraded': 5,
'workflow_fail_window_s': 600,
'workflow_fail_max_in_window': 20,
'global_fail_max_in_window': 100
}
permissive_cb = CircuitBreaker(permissive_policy)
# Même échec sur les deux
workflow_id = "test_workflow"
step_id = "step_1"
strict_cb.record_failure(workflow_id, step_id, "TARGET_NOT_FOUND")
permissive_cb.record_failure(workflow_id, step_id, "TARGET_NOT_FOUND")
# Vérifier les différences de comportement
assert strict_cb.should_trigger_degraded(workflow_id, step_id) is True
assert permissive_cb.should_trigger_degraded(workflow_id, step_id) is False
if __name__ == "__main__":
pytest.main([__file__])