""" Tests unitaires pour CircuitBreaker - Fiche #22 Auto-Heal Hybride Tests pour le mécanisme de circuit breaker avec fenêtres glissantes, seuils de déclenchement et gestion des échecs. Tests de fonctionnalité réelle sans simulation - utilise des données et scénarios authentiques pour valider le comportement en production. Auteur: Dom, Alice Kiro - 23 décembre 2024 """ import pytest import time import tempfile import json from pathlib import Path from datetime import datetime, timedelta from core.system.circuit_breaker import CircuitBreaker from core.system.models import SimpleFailureEvent class TestCircuitBreaker: """Tests pour la classe CircuitBreaker""" def setup_method(self): """Setup pour chaque test""" self.policy = { 'step_fail_streak_to_degraded': 3, 'workflow_fail_window_s': 600, 'workflow_fail_max_in_window': 10, 'global_fail_max_in_window': 30, 'success_reset_threshold': 2 } self.circuit_breaker = CircuitBreaker(self.policy) # Create temporary directory for persistence testing self.temp_dir = Path(tempfile.mkdtemp()) def teardown_method(self): """Cleanup après chaque test""" import shutil if self.temp_dir.exists(): shutil.rmtree(self.temp_dir) def _create_realistic_failure_scenario(self, workflow_id: str, failure_pattern: list): """ Helper pour créer des scénarios d'échec réalistes. Args: workflow_id: ID du workflow failure_pattern: Liste de tuples (step_id, failure_type, should_succeed) """ for step_id, failure_type, should_succeed in failure_pattern: if should_succeed: self.circuit_breaker.record_success(workflow_id, step_id) else: self.circuit_breaker.record_failure(workflow_id, step_id, failure_type) # Petite pause pour des timestamps réalistes time.sleep(0.001) def _save_circuit_breaker_state(self, filename: str) -> Path: """Save circuit breaker state to file for persistence testing""" file_path = self.temp_dir / filename status = self.circuit_breaker.get_status_summary() # Add failure data for complete state state_data = { 'status': status, 'step_consecutive_failures': { key: [failure.to_dict() for failure in failures] for key, failures in self.circuit_breaker.step_consecutive_failures.items() }, 'step_success_counts': dict(self.circuit_breaker.step_success_counts), 'workflow_windows': { wf_id: [failure.to_dict() for failure in window.failures] for wf_id, window in self.circuit_breaker.workflow_windows.items() }, 'global_failures': [failure.to_dict() for failure in self.circuit_breaker.global_window.failures] } with open(file_path, 'w') as f: json.dump(state_data, f, indent=2) return file_path def _load_and_verify_circuit_breaker_state(self, file_path: Path): """Load and verify circuit breaker state from file""" with open(file_path, 'r') as f: state_data = json.load(f) # Verify the saved state matches current state current_status = self.circuit_breaker.get_status_summary() saved_status = state_data['status'] assert current_status['global_stats']['global_failures_in_window'] == \ saved_status['global_stats']['global_failures_in_window'] assert current_status['global_stats']['workflows_with_failures'] == \ saved_status['global_stats']['workflows_with_failures'] return state_data def test_initialization(self): """Test d'initialisation du CircuitBreaker""" assert self.circuit_breaker.policy == self.policy assert len(self.circuit_breaker.step_consecutive_failures) == 0 assert len(self.circuit_breaker.workflow_windows) == 0 assert self.circuit_breaker.global_window.window_duration_s == 600 def test_record_failure(self): """Test d'enregistrement d'échec""" workflow_id = "test_workflow" step_id = "step_1" failure_type = "TARGET_NOT_FOUND" # Enregistrer un échec self.circuit_breaker.record_failure(workflow_id, step_id, failure_type) # Vérifier l'enregistrement au niveau step step_key = f"{workflow_id}:{step_id}" assert len(self.circuit_breaker.step_consecutive_failures[step_key]) == 1 assert self.circuit_breaker.step_consecutive_failures[step_key][0].failure_type == failure_type # Vérifier l'enregistrement au niveau workflow assert workflow_id in self.circuit_breaker.workflow_windows assert self.circuit_breaker.workflow_windows[workflow_id].get_failure_count() == 1 # Vérifier l'enregistrement au niveau global assert self.circuit_breaker.global_window.get_failure_count() == 1 def test_record_success(self): """Test d'enregistrement de succès""" workflow_id = "test_workflow" step_id = "step_1" # Enregistrer quelques échecs d'abord for i in range(2): self.circuit_breaker.record_failure(workflow_id, step_id, "TARGET_NOT_FOUND") step_key = f"{workflow_id}:{step_id}" assert len(self.circuit_breaker.step_consecutive_failures[step_key]) == 2 # Enregistrer un succès (pas assez pour reset) self.circuit_breaker.record_success(workflow_id, step_id) assert len(self.circuit_breaker.step_consecutive_failures[step_key]) == 2 assert self.circuit_breaker.step_success_counts[step_key] == 1 # Enregistrer un autre succès (devrait reset) self.circuit_breaker.record_success(workflow_id, step_id) assert len(self.circuit_breaker.step_consecutive_failures[step_key]) == 0 assert self.circuit_breaker.step_success_counts[step_key] == 2 def test_should_trigger_degraded_with_realistic_scenario(self): """Test du déclenchement du mode DEGRADED avec scénario réaliste""" workflow_id = "customer_service_workflow" step_id = "ticket_creation" # Scénario réaliste : interface qui devient instable failure_pattern = [ (step_id, "TARGET_NOT_FOUND", False), # Premier échec (step_id, "TARGET_NOT_FOUND", False), # Deuxième échec ("other_step", "TIMEOUT", False), # Échec sur autre étape (step_id, "POSTCONDITION_FAILED", False), # Troisième échec sur step_id ] # Pas assez d'échecs au début assert self.circuit_breaker.should_trigger_degraded(workflow_id, step_id) is False # Appliquer le pattern d'échecs self._create_realistic_failure_scenario(workflow_id, failure_pattern) # Maintenant devrait déclencher DEGRADED (3 échecs consécutifs sur step_id) assert self.circuit_breaker.should_trigger_degraded(workflow_id, step_id) is True # Vérifier que l'autre étape n'est pas affectée assert self.circuit_breaker.should_trigger_degraded(workflow_id, "other_step") is False def test_should_trigger_quarantine(self): """Test du déclenchement du mode QUARANTINED""" workflow_id = "test_workflow" # Pas d'échecs assert self.circuit_breaker.should_trigger_quarantine(workflow_id) is False # Enregistrer 9 échecs (pas assez) for i in range(9): self.circuit_breaker.record_failure(workflow_id, f"step_{i}", "TARGET_NOT_FOUND") assert self.circuit_breaker.should_trigger_quarantine(workflow_id) is False # Enregistrer le 10ème échec (seuil atteint) self.circuit_breaker.record_failure(workflow_id, "step_10", "POSTCONDITION_FAILED") assert self.circuit_breaker.should_trigger_quarantine(workflow_id) is True def test_should_trigger_global_pause(self): """Test du déclenchement du PAUSE global""" # Pas assez d'échecs globaux assert self.circuit_breaker.should_trigger_global_pause() is False # Enregistrer 29 échecs globaux (pas assez) for i in range(29): self.circuit_breaker.record_failure(f"workflow_{i % 5}", f"step_{i}", "TARGET_NOT_FOUND") assert self.circuit_breaker.should_trigger_global_pause() is False # Enregistrer le 30ème échec (seuil atteint) self.circuit_breaker.record_failure("workflow_global", "step_final", "TIMEOUT") assert self.circuit_breaker.should_trigger_global_pause() is True def test_get_failure_counts(self): """Test de récupération des compteurs d'échecs""" workflow_id = "test_workflow" # Enregistrer des échecs sur différentes étapes self.circuit_breaker.record_failure(workflow_id, "step_1", "TARGET_NOT_FOUND") self.circuit_breaker.record_failure(workflow_id, "step_1", "TARGET_NOT_FOUND") self.circuit_breaker.record_failure(workflow_id, "step_2", "POSTCONDITION_FAILED") counts = self.circuit_breaker.get_failure_counts(workflow_id) assert counts['step_consecutive']['step_1'] == 2 assert counts['step_consecutive']['step_2'] == 1 assert counts['workflow_window'] == 3 assert counts['global_window'] == 3 assert counts['window_duration_s'] == 600 def test_get_step_failure_history(self): """Test de récupération de l'historique des échecs d'étape""" workflow_id = "test_workflow" step_id = "step_1" # Enregistrer plusieurs échecs avec des types réalistes realistic_failure_sequence = [ "TARGET_NOT_FOUND", # Interface a changé "POSTCONDITION_FAILED", # Validation échouée "TIMEOUT" # Réponse lente du système ] for failure_type in realistic_failure_sequence: self.circuit_breaker.record_failure(workflow_id, step_id, failure_type) # Petite pause pour avoir des timestamps différents time.sleep(0.01) history = self.circuit_breaker.get_step_failure_history(workflow_id, step_id) assert len(history) == 3 assert [f.failure_type for f in history] == realistic_failure_sequence # Vérifier que les timestamps sont dans l'ordre chronologique timestamps = [f.timestamp for f in history] assert timestamps == sorted(timestamps) # Test avec limite limited_history = self.circuit_breaker.get_step_failure_history(workflow_id, step_id, limit=2) assert len(limited_history) == 2 assert [f.failure_type for f in limited_history] == realistic_failure_sequence[-2:] def test_get_workflow_failure_types(self): """Test de récupération des types d'échecs par workflow""" workflow_id = "test_workflow" # Enregistrer différents types d'échecs self.circuit_breaker.record_failure(workflow_id, "step_1", "TARGET_NOT_FOUND") self.circuit_breaker.record_failure(workflow_id, "step_2", "TARGET_NOT_FOUND") self.circuit_breaker.record_failure(workflow_id, "step_3", "POSTCONDITION_FAILED") self.circuit_breaker.record_failure(workflow_id, "step_4", "TIMEOUT") failure_types = self.circuit_breaker.get_workflow_failure_types(workflow_id) assert failure_types["TARGET_NOT_FOUND"] == 2 assert failure_types["POSTCONDITION_FAILED"] == 1 assert failure_types["TIMEOUT"] == 1 def test_cleanup_old_data_with_real_time_progression(self): """Test du nettoyage des anciennes données avec progression temporelle réelle""" workflow_id = "test_workflow" step_id = "step_1" # Créer un circuit breaker avec une fenêtre très courte pour le test short_window_policy = { 'step_fail_streak_to_degraded': 3, 'workflow_fail_window_s': 2, # 2 secondes seulement 'workflow_fail_max_in_window': 10, 'global_fail_max_in_window': 30, 'success_reset_threshold': 2 } cb = CircuitBreaker(short_window_policy) # Enregistrer des échecs cb.record_failure(workflow_id, step_id, "TARGET_NOT_FOUND") cb.record_failure(workflow_id, step_id, "POSTCONDITION_FAILED") # Vérifier que les échecs sont présents step_key = f"{workflow_id}:{step_id}" assert len(cb.step_consecutive_failures[step_key]) == 2 assert cb.workflow_windows[workflow_id].get_failure_count() == 2 assert cb.global_window.get_failure_count() == 2 # Attendre que la fenêtre expire (3 secondes pour être sûr) time.sleep(3) # Nettoyer les anciennes données cb.cleanup_old_data() # Vérifier que les échecs dans les fenêtres ont été nettoyés assert cb.workflow_windows[workflow_id].get_failure_count() == 0 assert cb.global_window.get_failure_count() == 0 # Les échecs consécutifs restent (ils ont leur propre logique de nettoyage) # mais ils seront nettoyés après 1 heure selon la logique réelle def test_reset_step_failures(self): """Test de réinitialisation manuelle des échecs d'étape""" workflow_id = "test_workflow" step_id = "step_1" # Enregistrer des échecs for i in range(3): self.circuit_breaker.record_failure(workflow_id, step_id, "TARGET_NOT_FOUND") step_key = f"{workflow_id}:{step_id}" assert len(self.circuit_breaker.step_consecutive_failures[step_key]) == 3 # Réinitialiser self.circuit_breaker.reset_step_failures(workflow_id, step_id) # Vérifier la réinitialisation assert step_key not in self.circuit_breaker.step_consecutive_failures assert self.circuit_breaker.step_success_counts[step_key] == 0 def test_reset_workflow_failures(self): """Test de réinitialisation manuelle des échecs de workflow""" workflow_id = "test_workflow" # Enregistrer des échecs sur plusieurs étapes for i in range(5): self.circuit_breaker.record_failure(workflow_id, f"step_{i}", "TARGET_NOT_FOUND") # Vérifier les échecs assert workflow_id in self.circuit_breaker.workflow_windows assert self.circuit_breaker.workflow_windows[workflow_id].get_failure_count() == 5 # Vérifier les échecs consécutifs step_keys = [f"{workflow_id}:step_{i}" for i in range(5)] for step_key in step_keys: assert len(self.circuit_breaker.step_consecutive_failures[step_key]) == 1 # Réinitialiser self.circuit_breaker.reset_workflow_failures(workflow_id) # Vérifier la réinitialisation assert len(self.circuit_breaker.workflow_windows[workflow_id].failures) == 0 for step_key in step_keys: assert step_key not in self.circuit_breaker.step_consecutive_failures def test_failure_event_integration(self): """Test d'intégration avec de vrais objets SimpleFailureEvent""" workflow_id = "integration_test_workflow" step_id = "data_validation" # Créer des SimpleFailureEvent directement (comme le ferait le système réel) failure_events = [ SimpleFailureEvent( timestamp=datetime.now(), workflow_id=workflow_id, step_id=step_id, failure_type="VALIDATION_ERROR" ), SimpleFailureEvent( timestamp=datetime.now(), workflow_id=workflow_id, step_id=step_id, failure_type="TARGET_NOT_FOUND" ), SimpleFailureEvent( timestamp=datetime.now(), workflow_id=workflow_id, step_id=step_id, failure_type="TIMEOUT" ) ] # Enregistrer les échecs via l'interface normale for event in failure_events: self.circuit_breaker.record_failure( event.workflow_id, event.step_id, event.failure_type ) # Vérifier que les objets SimpleFailureEvent sont correctement stockés step_key = f"{workflow_id}:{step_id}" stored_failures = self.circuit_breaker.step_consecutive_failures[step_key] assert len(stored_failures) == 3 for i, stored_failure in enumerate(stored_failures): assert isinstance(stored_failure, SimpleFailureEvent) assert stored_failure.workflow_id == workflow_id assert stored_failure.step_id == step_id assert stored_failure.failure_type == failure_events[i].failure_type # Vérifier que le circuit breaker fonctionne avec ces objets réels assert self.circuit_breaker.should_trigger_degraded(workflow_id, step_id) is True def test_state_persistence_and_recovery(self): """Test de persistance et récupération d'état réel""" workflow_id = "persistent_workflow" # Créer un état complexe avec plusieurs types d'échecs realistic_failures = [ ("login", "TARGET_NOT_FOUND"), ("form_fill", "VALIDATION_ERROR"), ("submit", "TIMEOUT"), ("login", "TARGET_NOT_FOUND"), # 2ème échec login ("confirmation", "POSTCONDITION_FAILED"), ] for step_id, failure_type in realistic_failures: self.circuit_breaker.record_failure(workflow_id, step_id, failure_type) # Ajouter quelques succès self.circuit_breaker.record_success(workflow_id, "other_step") # Sauvegarder l'état dans un fichier réel state_file = self._save_circuit_breaker_state("circuit_breaker_state.json") # Vérifier que le fichier existe et contient les bonnes données assert state_file.exists() state_data = self._load_and_verify_circuit_breaker_state(state_file) # Vérifier que les données sauvegardées sont complètes assert len(state_data['step_consecutive_failures']) > 0 assert workflow_id in [failure['workflow_id'] for failures in state_data['step_consecutive_failures'].values() for failure in failures] assert len(state_data['global_failures']) == len(realistic_failures) # Vérifier que les types d'échecs sont préservés saved_failure_types = [failure['failure_type'] for failure in state_data['global_failures']] expected_failure_types = [failure_type for _, failure_type in realistic_failures] assert saved_failure_types == expected_failure_types def test_get_status_summary(self): """Test du résumé de statut""" # Enregistrer quelques échecs self.circuit_breaker.record_failure("workflow_1", "step_1", "TARGET_NOT_FOUND") self.circuit_breaker.record_failure("workflow_1", "step_2", "POSTCONDITION_FAILED") self.circuit_breaker.record_failure("workflow_2", "step_1", "TARGET_NOT_FOUND") status = self.circuit_breaker.get_status_summary() # Vérifier la structure du statut assert 'timestamp' in status assert 'policy' in status assert 'global_stats' in status assert 'thresholds' in status # Vérifier les statistiques global_stats = status['global_stats'] assert global_stats['global_failures_in_window'] == 3 assert global_stats['workflows_with_failures'] == 2 assert global_stats['steps_with_consecutive_failures'] == 3 assert global_stats['global_failure_types']['TARGET_NOT_FOUND'] == 2 assert global_stats['global_failure_types']['POSTCONDITION_FAILED'] == 1 # Vérifier les seuils thresholds = status['thresholds'] assert thresholds['step_consecutive_to_degraded'] == 3 assert thresholds['workflow_window_to_quarantine'] == 10 assert thresholds['global_window_to_pause'] == 30 assert thresholds['window_duration_s'] == 600 class TestCircuitBreakerRealWorldScenarios: """Tests de scénarios réels d'utilisation du CircuitBreaker""" def test_realistic_workflow_failure_progression(self): """Test d'une progression réaliste d'échecs de workflow""" # Configuration réaliste pour un environnement de production production_policy = { 'step_fail_streak_to_degraded': 3, 'workflow_fail_window_s': 600, # 10 minutes 'workflow_fail_max_in_window': 10, 'global_fail_max_in_window': 50, 'success_reset_threshold': 2 } cb = CircuitBreaker(production_policy) # Simuler un workflow de facturation avec différents types d'échecs workflow_id = "billing_workflow_v2" # Échecs typiques dans un workflow de facturation realistic_failures = [ ("login_step", "TARGET_NOT_FOUND"), # Bouton login non trouvé ("customer_search", "TIMEOUT"), # Recherche client timeout ("invoice_form", "POSTCONDITION_FAILED"), # Formulaire non validé ("login_step", "TARGET_NOT_FOUND"), # Encore le bouton login ("payment_step", "TARGET_NOT_FOUND"), # Bouton paiement non trouvé ("login_step", "TARGET_NOT_FOUND"), # 3ème échec login -> DEGRADED ("customer_search", "TIMEOUT"), # Autre timeout ("invoice_form", "VALIDATION_ERROR"), # Erreur de validation ] # Enregistrer les échecs de manière séquentielle for step_id, failure_type in realistic_failures: cb.record_failure(workflow_id, step_id, failure_type) # Vérifier l'état après chaque échec if step_id == "login_step": step_key = f"{workflow_id}:{step_id}" consecutive_count = len(cb.step_consecutive_failures[step_key]) if consecutive_count >= 3: assert cb.should_trigger_degraded(workflow_id, step_id) # Vérifier les statistiques finales counts = cb.get_failure_counts(workflow_id) assert counts['workflow_window'] == len(realistic_failures) assert counts['step_consecutive']['login_step'] == 3 assert counts['step_consecutive']['customer_search'] == 2 # Vérifier les types d'échecs failure_types = cb.get_workflow_failure_types(workflow_id) assert failure_types["TARGET_NOT_FOUND"] == 4 assert failure_types["TIMEOUT"] == 2 assert failure_types["POSTCONDITION_FAILED"] == 1 assert failure_types["VALIDATION_ERROR"] == 1 def test_recovery_after_successful_executions(self): """Test de récupération après des exécutions réussies""" cb = CircuitBreaker({ 'step_fail_streak_to_degraded': 3, 'workflow_fail_window_s': 600, 'workflow_fail_max_in_window': 10, 'global_fail_max_in_window': 30, 'success_reset_threshold': 2 }) workflow_id = "data_entry_workflow" step_id = "form_submission" # Créer une situation proche du seuil DEGRADED cb.record_failure(workflow_id, step_id, "TARGET_NOT_FOUND") cb.record_failure(workflow_id, step_id, "TARGET_NOT_FOUND") # Vérifier qu'on n'est pas encore en DEGRADED assert not cb.should_trigger_degraded(workflow_id, step_id) # Simuler des succès qui devraient réinitialiser les échecs cb.record_success(workflow_id, step_id) # Premier succès assert not cb.should_trigger_degraded(workflow_id, step_id) cb.record_success(workflow_id, step_id) # Deuxième succès -> reset # Vérifier que les échecs consécutifs ont été réinitialisés step_key = f"{workflow_id}:{step_id}" assert len(cb.step_consecutive_failures[step_key]) == 0 # Un nouvel échec ne devrait pas déclencher DEGRADED immédiatement cb.record_failure(workflow_id, step_id, "TIMEOUT") assert not cb.should_trigger_degraded(workflow_id, step_id) def test_concurrent_workflows_realistic_load(self): """Test de charge réaliste avec plusieurs workflows concurrents""" cb = CircuitBreaker({ 'step_fail_streak_to_degraded': 3, 'workflow_fail_window_s': 300, # 5 minutes 'workflow_fail_max_in_window': 8, 'global_fail_max_in_window': 25, 'success_reset_threshold': 2 }) # Simuler plusieurs workflows typiques d'une entreprise workflows = [ "invoice_processing", "customer_onboarding", "inventory_update", "report_generation", "email_automation" ] # Simuler une charge de travail réaliste avec timestamps réels total_failures = 0 failure_distribution = {} for i in range(25): # 25 opérations pour assurer qu'au moins un workflow atteigne le seuil workflow_id = workflows[i % len(workflows)] step_id = f"step_{i % 3 + 1}" # 3 étapes par workflow # Simuler différents types d'échecs avec des probabilités réalistes failure_types = ["TARGET_NOT_FOUND", "TIMEOUT", "POSTCONDITION_FAILED"] failure_type = failure_types[i % len(failure_types)] cb.record_failure(workflow_id, step_id, failure_type) total_failures += 1 # Track distribution for verification if workflow_id not in failure_distribution: failure_distribution[workflow_id] = 0 failure_distribution[workflow_id] += 1 # Add realistic timing between failures time.sleep(0.001) # Vérifier que le système global fonctionne correctement assert cb.global_window.get_failure_count() == total_failures # Vérifier la distribution des échecs assert len(failure_distribution) == len(workflows) assert sum(failure_distribution.values()) == total_failures # Vérifier que certains workflows ont atteint des seuils réalistes workflow_failure_counts = {} for wf in workflows: workflow_failure_counts[wf] = cb.workflow_windows[wf].get_failure_count() if wf in cb.workflow_windows else 0 # Avec 25 échecs sur 5 workflows, la distribution devrait être équitable max_failures = max(workflow_failure_counts.values()) min_failures = min(workflow_failure_counts.values()) # Vérifier qu'il y a une distribution raisonnable assert max_failures >= 4 # Au moins 4 échecs pour le workflow le plus touché assert min_failures >= 3 # Au moins 3 échecs pour le workflow le moins touché # Vérifier les types d'échecs globaux global_failure_types = cb.global_window.get_failure_types() assert len(global_failure_types) == 3 # Les 3 types utilisés for failure_type in ["TARGET_NOT_FOUND", "TIMEOUT", "POSTCONDITION_FAILED"]: assert failure_type in global_failure_types assert global_failure_types[failure_type] > 0 def test_mixed_success_failure_realistic_pattern(self): """Test avec un pattern réaliste de succès et échecs mélangés""" cb = CircuitBreaker({ 'step_fail_streak_to_degraded': 4, # Plus tolérant 'workflow_fail_window_s': 600, 'workflow_fail_max_in_window': 12, 'global_fail_max_in_window': 40, 'success_reset_threshold': 3 # Nécessite plus de succès }) workflow_id = "document_processing" step_id = "ocr_extraction" # Pattern réaliste : échecs intermittents avec succès operations = [ ("failure", "TARGET_NOT_FOUND"), ("success", None), ("failure", "TIMEOUT"), ("success", None), ("failure", "POSTCONDITION_FAILED"), ("failure", "TARGET_NOT_FOUND"), # 2 échecs consécutifs ("success", None), ("success", None), ("failure", "VALIDATION_ERROR"), ("success", None), ("success", None), ("success", None), # 3 succès -> devrait reset ("failure", "TARGET_NOT_FOUND"), # Nouveau cycle ] for operation_type, failure_type in operations: if operation_type == "failure": cb.record_failure(workflow_id, step_id, failure_type) else: cb.record_success(workflow_id, step_id) # Après ce pattern, les échecs consécutifs devraient être réinitialisés step_key = f"{workflow_id}:{step_id}" consecutive_failures = len(cb.step_consecutive_failures[step_key]) # Devrait avoir seulement le dernier échec après le reset assert consecutive_failures == 1 assert not cb.should_trigger_degraded(workflow_id, step_id) # Mais les échecs devraient toujours être dans la fenêtre du workflow counts = cb.get_failure_counts(workflow_id) assert counts['workflow_window'] == 6 # Total des échecs class TestCircuitBreakerIntegration: """Tests d'intégration du CircuitBreaker""" def test_sliding_window_behavior_with_real_time(self): """Test du comportement des fenêtres glissantes avec temps réel""" policy = { 'step_fail_streak_to_degraded': 3, 'workflow_fail_window_s': 2, # 2 secondes pour test rapide 'workflow_fail_max_in_window': 5, 'global_fail_max_in_window': 10 } circuit_breaker = CircuitBreaker(policy) workflow_id = "test_workflow" # Enregistrer des échecs récents for i in range(3): circuit_breaker.record_failure(workflow_id, f"step_{i}", "TARGET_NOT_FOUND") # Vérifier que les échecs sont comptés counts = circuit_breaker.get_failure_counts(workflow_id) assert counts['workflow_window'] == 3 # Attendre que la fenêtre expire time.sleep(3) # Les échecs devraient maintenant être expirés counts_after = circuit_breaker.get_failure_counts(workflow_id) assert counts_after['workflow_window'] == 0 # Ne devrait pas déclencher quarantine (échecs expirés) assert circuit_breaker.should_trigger_quarantine(workflow_id) is False def test_multiple_workflows_isolation(self): """Test de l'isolation entre workflows""" policy = { 'step_fail_streak_to_degraded': 2, 'workflow_fail_window_s': 600, 'workflow_fail_max_in_window': 3, 'global_fail_max_in_window': 10 } circuit_breaker = CircuitBreaker(policy) # Enregistrer des échecs pour workflow_1 for i in range(2): circuit_breaker.record_failure("workflow_1", "step_1", "TARGET_NOT_FOUND") # Enregistrer des échecs pour workflow_2 circuit_breaker.record_failure("workflow_2", "step_1", "POSTCONDITION_FAILED") # Vérifier l'isolation assert circuit_breaker.should_trigger_degraded("workflow_1", "step_1") is True assert circuit_breaker.should_trigger_degraded("workflow_2", "step_1") is False assert circuit_breaker.should_trigger_quarantine("workflow_1") is False assert circuit_breaker.should_trigger_quarantine("workflow_2") is False # Les échecs contribuent au global assert circuit_breaker.global_window.get_failure_count() == 3 def test_policy_configuration_impact(self): """Test de l'impact de la configuration des politiques""" # Configuration stricte strict_policy = { 'step_fail_streak_to_degraded': 1, 'workflow_fail_window_s': 600, 'workflow_fail_max_in_window': 2, 'global_fail_max_in_window': 5 } strict_cb = CircuitBreaker(strict_policy) # Configuration permissive permissive_policy = { 'step_fail_streak_to_degraded': 5, 'workflow_fail_window_s': 600, 'workflow_fail_max_in_window': 20, 'global_fail_max_in_window': 100 } permissive_cb = CircuitBreaker(permissive_policy) # Même échec sur les deux workflow_id = "test_workflow" step_id = "step_1" strict_cb.record_failure(workflow_id, step_id, "TARGET_NOT_FOUND") permissive_cb.record_failure(workflow_id, step_id, "TARGET_NOT_FOUND") # Vérifier les différences de comportement assert strict_cb.should_trigger_degraded(workflow_id, step_id) is True assert permissive_cb.should_trigger_degraded(workflow_id, step_id) is False if __name__ == "__main__": pytest.main([__file__])