use crate::alerter::Alerter; use crate::config::{Alert, ConfigManager, ProcessConfig}; use chrono::{Duration, Local}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::sync::{Arc, Mutex, RwLock}; use std::time::Duration as StdDuration; use sysinfo::{Disks, System}; use tokio::sync::Mutex as AsyncMutex; pub fn eval_status(value: f64, threshold: f64) -> &'static str { if threshold <= 0.0 { return "ok"; } let ratio = value / threshold; if ratio >= 1.0 { "critical" } else if ratio >= 0.80 { "warning" } else { "ok" } } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CpuMetrics { pub percent: f64, pub cores: usize, pub threshold: f64, pub status: String, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct RamMetrics { pub percent: f64, pub total_gb: f64, pub used_gb: f64, pub available_gb: f64, pub threshold: f64, pub status: String, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DiskMetrics { pub drive: String, pub mountpoint: String, pub percent: f64, pub total_gb: f64, pub used_gb: f64, pub free_gb: f64, pub threshold: f64, pub status: String, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ProcessMetrics { pub name: String, pub pattern: String, pub running: bool, pub enabled: bool, pub alert_on_down: bool, pub instance_count: usize, pub total_memory_mb: f64, pub total_cpu_percent: f64, pub memory_threshold_mb: f64, pub memory_status: String, pub pids: Vec, } #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Metrics { pub timestamp: String, pub hostname: String, pub os: String, pub cpu: CpuMetrics, pub ram: RamMetrics, pub disks: Vec, pub processes: Vec, pub uptime: String, pub boot_time: String, pub monitoring_active: bool, pub last_check: String, pub next_check: String, } pub struct SystemMonitor { config_manager: Arc>, alerter: Arc, pub metrics: Arc>>, pub running: Arc, last_alerts: Arc>>>, } impl SystemMonitor { pub fn new( config_manager: Arc>, alerter: Arc, ) -> Self { SystemMonitor { config_manager, alerter, metrics: Arc::new(RwLock::new(None)), running: Arc::new(std::sync::atomic::AtomicBool::new(false)), last_alerts: Arc::new(Mutex::new(HashMap::new())), } } pub async fn collect(&self) -> Metrics { let config = { let cm = self.config_manager.lock().await; cm.config.clone() }; let mut sys = System::new_all(); // Deux mesures pour CPU précis std::thread::sleep(StdDuration::from_millis(500)); sys.refresh_all(); let cpu_percent = sys.global_cpu_usage() as f64; let cpu_status = eval_status(cpu_percent, config.thresholds.cpu_percent).to_string(); let ram_total = sys.total_memory() as f64; let ram_used = sys.used_memory() as f64; let ram_available = sys.available_memory() as f64; let ram_percent = if ram_total > 0.0 { ram_used / ram_total * 100.0 } else { 0.0 }; let ram_status = eval_status(ram_percent, config.thresholds.ram_percent).to_string(); let mut disks = Vec::new(); let disk_list = Disks::new_with_refreshed_list(); let ignored_fs = ["squashfs", "tmpfs", "devtmpfs", "overlay", "iso9660"]; for disk in &disk_list { let fs = disk.file_system().to_string_lossy().to_lowercase(); if ignored_fs.iter().any(|&f| fs.contains(f)) { continue; } let total = disk.total_space() as f64; if total < 1_073_741_824.0 { continue; // < 1 GB } let available = disk.available_space() as f64; let used = total - available; let percent = (used / total * 1000.0).round() / 10.0; let status = eval_status(percent, config.thresholds.disk_percent).to_string(); disks.push(DiskMetrics { drive: disk .name() .to_string_lossy() .trim_end_matches('\\') .to_string(), mountpoint: disk.mount_point().to_string_lossy().to_string(), percent, total_gb: (total / 1_073_741_824.0 * 10.0).round() / 10.0, used_gb: (used / 1_073_741_824.0 * 10.0).round() / 10.0, free_gb: (available / 1_073_741_824.0 * 10.0).round() / 10.0, threshold: config.thresholds.disk_percent, status, }); } let processes = self.check_processes(&sys, &config.processes); let boot_time_unix = System::boot_time(); let now_unix = Local::now().timestamp() as u64; let uptime_secs = now_unix.saturating_sub(boot_time_unix); let uptime = format!( "{}:{:02}:{:02}", uptime_secs / 3600, (uptime_secs % 3600) / 60, uptime_secs % 60 ); let now = Local::now(); let interval = config.check_interval_minutes; Metrics { timestamp: now.to_rfc3339(), hostname: System::host_name().unwrap_or_else(|| "inconnu".into()), os: format!( "{} {}", System::name().unwrap_or_default(), System::os_version().unwrap_or_default() ), cpu: CpuMetrics { percent: (cpu_percent * 10.0).round() / 10.0, cores: sys.cpus().len(), threshold: config.thresholds.cpu_percent, status: cpu_status, }, ram: RamMetrics { percent: (ram_percent * 10.0).round() / 10.0, total_gb: (ram_total / 1_073_741_824.0 * 10.0).round() / 10.0, used_gb: (ram_used / 1_073_741_824.0 * 10.0).round() / 10.0, available_gb: (ram_available / 1_073_741_824.0 * 10.0).round() / 10.0, threshold: config.thresholds.ram_percent, status: ram_status, }, disks, processes, uptime, boot_time: chrono::DateTime::from_timestamp(boot_time_unix as i64, 0) .map(|dt: chrono::DateTime| dt.to_rfc3339()) .unwrap_or_default(), monitoring_active: self .running .load(std::sync::atomic::Ordering::Relaxed), last_check: now.to_rfc3339(), next_check: (now + Duration::minutes(interval as i64)).to_rfc3339(), } } fn check_processes( &self, sys: &System, process_configs: &[ProcessConfig], ) -> Vec { let mut results = Vec::new(); for pc in process_configs { let pattern = pc.pattern.to_lowercase(); let mut found_pids: Vec = Vec::new(); let mut total_mem: f64 = 0.0; let mut total_cpu: f64 = 0.0; if pc.enabled { for (pid, proc) in sys.processes() { let name = proc.name().to_string_lossy().to_lowercase(); let cmd = proc .cmd() .iter() .map(|s| s.to_string_lossy().to_lowercase()) .collect::>() .join(" "); if name.contains(&pattern) || cmd.contains(&pattern) { found_pids.push(pid.as_u32()); total_mem += proc.memory() as f64 / 1_048_576.0; total_cpu += proc.cpu_usage() as f64; } } } let mem_status = if pc.memory_threshold_mb > 0.0 && total_mem > 0.0 { eval_status(total_mem, pc.memory_threshold_mb).to_string() } else { "ok".to_string() }; results.push(ProcessMetrics { name: pc.name.clone(), pattern: pc.pattern.clone(), running: !found_pids.is_empty(), enabled: pc.enabled, alert_on_down: pc.alert_on_down, instance_count: found_pids.len(), total_memory_mb: (total_mem * 10.0).round() / 10.0, total_cpu_percent: (total_cpu * 10.0).round() / 10.0, memory_threshold_mb: pc.memory_threshold_mb, memory_status: mem_status, pids: found_pids, }); } results } pub async fn check_and_alert(&self, metrics: &Metrics) { let cooldown = { let cm = self.config_manager.lock().await; cm.config.alert_cooldown_minutes }; let hostname = metrics.hostname.clone(); let mut to_alert: Vec<(String, String, f64, f64, String)> = Vec::new(); { let mut last = self.last_alerts.lock().unwrap(); let now = Local::now(); let mut maybe_alert = |key: String, msg: String, val: f64, thr: f64, typ: String| { let should = match last.get(&key) { Some(t) => (now - *t) >= Duration::minutes(cooldown as i64), None => true, }; if should { last.insert(key.clone(), now); to_alert.push((key, msg, val, thr, typ)); } }; if metrics.cpu.status == "critical" { maybe_alert( "cpu".into(), format!( "CPU a {}% (seuil: {}%)", metrics.cpu.percent, metrics.cpu.threshold ), metrics.cpu.percent, metrics.cpu.threshold, "threshold".into(), ); } if metrics.ram.status == "critical" { maybe_alert( "ram".into(), format!( "RAM a {}% (seuil: {}%)", metrics.ram.percent, metrics.ram.threshold ), metrics.ram.percent, metrics.ram.threshold, "threshold".into(), ); } for disk in &metrics.disks { if disk.status == "critical" { maybe_alert( format!("disk_{}", disk.drive), format!( "Disque {} a {}% (seuil: {}%)", disk.drive, disk.percent, disk.threshold ), disk.percent, disk.threshold, "threshold".into(), ); } } for proc in &metrics.processes { if !proc.enabled { continue; } if proc.alert_on_down && !proc.running { maybe_alert( format!("process_down_{}", proc.name), format!( "Processus '{}' non detecte (pattern: {})", proc.name, proc.pattern ), 0.0, 0.0, "process_down".into(), ); } if proc.memory_threshold_mb > 0.0 && proc.memory_status == "critical" { maybe_alert( format!("process_mem_{}", proc.name), format!( "Processus '{}' utilise {} Mo (seuil: {} Mo)", proc.name, proc.total_memory_mb, proc.memory_threshold_mb ), proc.total_memory_mb, proc.memory_threshold_mb, "threshold".into(), ); } } } for (key, message, value, threshold, alert_type) in to_alert { let alert = Alert { timestamp: Local::now().to_rfc3339(), alert_type: alert_type.clone(), key, message: message.clone(), value, threshold, hostname: hostname.clone(), }; { let cm = self.config_manager.lock().await; cm.save_alert(alert); let subject = format!("[ALERTE] {} - {}", hostname, message); let body = format!( "Alerte de supervision\n{sep}\n\nServeur : {host}\nDate : {date}\nType : {typ}\n\nMessage : {msg}\n\n{sep}\nSupervision - Monitoring automatique", sep = "=".repeat(40), host = hostname, date = Local::now().to_rfc3339(), typ = alert_type, msg = message ); self.alerter.send(&cm.config.smtp, &subject, &body).await; } } } pub async fn start(self: Arc) { self.running .store(true, std::sync::atomic::Ordering::Relaxed); let monitor = self.clone(); tokio::spawn(async move { loop { if !monitor .running .load(std::sync::atomic::Ordering::Relaxed) { break; } let metrics = monitor.collect().await; { let mut m = monitor.metrics.write().unwrap(); *m = Some(metrics.clone()); } monitor.check_and_alert(&metrics).await; let interval = { let cm = monitor.config_manager.lock().await; cm.config.check_interval_minutes }; tokio::time::sleep(StdDuration::from_secs(interval * 60)).await; } }); } pub fn stop(&self) { self.running .store(false, std::sync::atomic::Ordering::Relaxed); } } #[cfg(test)] mod tests { use super::*; #[test] fn eval_status_ok_below_80_percent() { assert_eq!(eval_status(70.0, 90.0), "ok"); } #[test] fn eval_status_warning_at_80_percent_of_threshold() { assert_eq!(eval_status(72.0, 90.0), "warning"); // 72/90 = 0.8 } #[test] fn eval_status_critical_at_threshold() { assert_eq!(eval_status(90.0, 90.0), "critical"); } #[test] fn eval_status_critical_above_threshold() { assert_eq!(eval_status(95.0, 90.0), "critical"); } #[test] fn eval_status_ok_with_zero_threshold() { assert_eq!(eval_status(50.0, 0.0), "ok"); } }