From b59edc65c2eabb0e080bd6ea200b35e8f57b8331 Mon Sep 17 00:00:00 2001 From: oussi Date: Tue, 7 Apr 2026 11:31:20 +0200 Subject: [PATCH] feat: monitor module sysinfo + evaluation seuils --- src/monitor.rs | 451 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 450 insertions(+), 1 deletion(-) diff --git a/src/monitor.rs b/src/monitor.rs index de1e2a9..e7c41b5 100644 --- a/src/monitor.rs +++ b/src/monitor.rs @@ -1 +1,450 @@ -// Monitor module — Task 4 +use crate::alerter::Alerter; +use crate::config::{Alert, ConfigManager, ProcessConfig}; +use chrono::{Duration, Local}; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::sync::{Arc, Mutex, RwLock}; +use std::time::Duration as StdDuration; +use sysinfo::{Disks, System}; +use tokio::sync::Mutex as AsyncMutex; + +pub fn eval_status(value: f64, threshold: f64) -> &'static str { + if threshold <= 0.0 { + return "ok"; + } + let ratio = value / threshold; + if ratio >= 1.0 { + "critical" + } else if ratio >= 0.80 { + "warning" + } else { + "ok" + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CpuMetrics { + pub percent: f64, + pub cores: usize, + pub threshold: f64, + pub status: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct RamMetrics { + pub percent: f64, + pub total_gb: f64, + pub used_gb: f64, + pub available_gb: f64, + pub threshold: f64, + pub status: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct DiskMetrics { + pub drive: String, + pub mountpoint: String, + pub percent: f64, + pub total_gb: f64, + pub used_gb: f64, + pub free_gb: f64, + pub threshold: f64, + pub status: String, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ProcessMetrics { + pub name: String, + pub pattern: String, + pub running: bool, + pub enabled: bool, + pub alert_on_down: bool, + pub instance_count: usize, + pub total_memory_mb: f64, + pub total_cpu_percent: f64, + pub memory_threshold_mb: f64, + pub memory_status: String, + pub pids: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Metrics { + pub timestamp: String, + pub hostname: String, + pub os: String, + pub cpu: CpuMetrics, + pub ram: RamMetrics, + pub disks: Vec, + pub processes: Vec, + pub uptime: String, + pub boot_time: String, + pub monitoring_active: bool, + pub last_check: String, + pub next_check: String, +} + +pub struct SystemMonitor { + config_manager: Arc>, + alerter: Arc, + pub metrics: Arc>>, + pub running: Arc, + last_alerts: Arc>>>, +} + +impl SystemMonitor { + pub fn new( + config_manager: Arc>, + alerter: Arc, + ) -> Self { + SystemMonitor { + config_manager, + alerter, + metrics: Arc::new(RwLock::new(None)), + running: Arc::new(std::sync::atomic::AtomicBool::new(false)), + last_alerts: Arc::new(Mutex::new(HashMap::new())), + } + } + + pub async fn collect(&self) -> Metrics { + let config = { + let cm = self.config_manager.lock().await; + cm.config.clone() + }; + + let mut sys = System::new_all(); + // Deux mesures pour CPU précis + std::thread::sleep(StdDuration::from_millis(500)); + sys.refresh_all(); + + let cpu_percent = sys.global_cpu_usage() as f64; + let cpu_status = eval_status(cpu_percent, config.thresholds.cpu_percent).to_string(); + + let ram_total = sys.total_memory() as f64; + let ram_used = sys.used_memory() as f64; + let ram_available = sys.available_memory() as f64; + let ram_percent = if ram_total > 0.0 { + ram_used / ram_total * 100.0 + } else { + 0.0 + }; + let ram_status = eval_status(ram_percent, config.thresholds.ram_percent).to_string(); + + let mut disks = Vec::new(); + let disk_list = Disks::new_with_refreshed_list(); + let ignored_fs = ["squashfs", "tmpfs", "devtmpfs", "overlay", "iso9660"]; + for disk in &disk_list { + let fs = disk.file_system().to_string_lossy().to_lowercase(); + if ignored_fs.iter().any(|&f| fs.contains(f)) { + continue; + } + let total = disk.total_space() as f64; + if total < 1_073_741_824.0 { + continue; // < 1 GB + } + let available = disk.available_space() as f64; + let used = total - available; + let percent = (used / total * 1000.0).round() / 10.0; + let status = eval_status(percent, config.thresholds.disk_percent).to_string(); + disks.push(DiskMetrics { + drive: disk + .name() + .to_string_lossy() + .trim_end_matches('\\') + .to_string(), + mountpoint: disk.mount_point().to_string_lossy().to_string(), + percent, + total_gb: (total / 1_073_741_824.0 * 10.0).round() / 10.0, + used_gb: (used / 1_073_741_824.0 * 10.0).round() / 10.0, + free_gb: (available / 1_073_741_824.0 * 10.0).round() / 10.0, + threshold: config.thresholds.disk_percent, + status, + }); + } + + let processes = self.check_processes(&sys, &config.processes); + + let boot_time_unix = System::boot_time(); + let now_unix = Local::now().timestamp() as u64; + let uptime_secs = now_unix.saturating_sub(boot_time_unix); + let uptime = format!( + "{}:{:02}:{:02}", + uptime_secs / 3600, + (uptime_secs % 3600) / 60, + uptime_secs % 60 + ); + + let now = Local::now(); + let interval = config.check_interval_minutes; + + Metrics { + timestamp: now.to_rfc3339(), + hostname: System::host_name().unwrap_or_else(|| "inconnu".into()), + os: format!( + "{} {}", + System::name().unwrap_or_default(), + System::os_version().unwrap_or_default() + ), + cpu: CpuMetrics { + percent: (cpu_percent * 10.0).round() / 10.0, + cores: sys.cpus().len(), + threshold: config.thresholds.cpu_percent, + status: cpu_status, + }, + ram: RamMetrics { + percent: (ram_percent * 10.0).round() / 10.0, + total_gb: (ram_total / 1_073_741_824.0 * 10.0).round() / 10.0, + used_gb: (ram_used / 1_073_741_824.0 * 10.0).round() / 10.0, + available_gb: (ram_available / 1_073_741_824.0 * 10.0).round() / 10.0, + threshold: config.thresholds.ram_percent, + status: ram_status, + }, + disks, + processes, + uptime, + boot_time: chrono::DateTime::from_timestamp(boot_time_unix as i64, 0) + .map(|dt: chrono::DateTime| dt.to_rfc3339()) + .unwrap_or_default(), + monitoring_active: self + .running + .load(std::sync::atomic::Ordering::Relaxed), + last_check: now.to_rfc3339(), + next_check: (now + Duration::minutes(interval as i64)).to_rfc3339(), + } + } + + fn check_processes( + &self, + sys: &System, + process_configs: &[ProcessConfig], + ) -> Vec { + let mut results = Vec::new(); + for pc in process_configs { + let pattern = pc.pattern.to_lowercase(); + let mut found_pids: Vec = Vec::new(); + let mut total_mem: f64 = 0.0; + let mut total_cpu: f64 = 0.0; + + if pc.enabled { + for (pid, proc) in sys.processes() { + let name = proc.name().to_string_lossy().to_lowercase(); + let cmd = proc + .cmd() + .iter() + .map(|s| s.to_string_lossy().to_lowercase()) + .collect::>() + .join(" "); + if name.contains(&pattern) || cmd.contains(&pattern) { + found_pids.push(pid.as_u32()); + total_mem += proc.memory() as f64 / 1_048_576.0; + total_cpu += proc.cpu_usage() as f64; + } + } + } + + let mem_status = if pc.memory_threshold_mb > 0.0 && total_mem > 0.0 { + eval_status(total_mem, pc.memory_threshold_mb).to_string() + } else { + "ok".to_string() + }; + + results.push(ProcessMetrics { + name: pc.name.clone(), + pattern: pc.pattern.clone(), + running: !found_pids.is_empty(), + enabled: pc.enabled, + alert_on_down: pc.alert_on_down, + instance_count: found_pids.len(), + total_memory_mb: (total_mem * 10.0).round() / 10.0, + total_cpu_percent: (total_cpu * 10.0).round() / 10.0, + memory_threshold_mb: pc.memory_threshold_mb, + memory_status: mem_status, + pids: found_pids, + }); + } + results + } + + pub async fn check_and_alert(&self, metrics: &Metrics) { + let cooldown = { + let cm = self.config_manager.lock().await; + cm.config.alert_cooldown_minutes + }; + let hostname = metrics.hostname.clone(); + + let mut to_alert: Vec<(String, String, f64, f64, String)> = Vec::new(); + + { + let mut last = self.last_alerts.lock().unwrap(); + let now = Local::now(); + + let mut maybe_alert = + |key: String, msg: String, val: f64, thr: f64, typ: String| { + let should = match last.get(&key) { + Some(t) => (now - *t) >= Duration::minutes(cooldown as i64), + None => true, + }; + if should { + last.insert(key.clone(), now); + to_alert.push((key, msg, val, thr, typ)); + } + }; + + if metrics.cpu.status == "critical" { + maybe_alert( + "cpu".into(), + format!( + "CPU a {}% (seuil: {}%)", + metrics.cpu.percent, metrics.cpu.threshold + ), + metrics.cpu.percent, + metrics.cpu.threshold, + "threshold".into(), + ); + } + if metrics.ram.status == "critical" { + maybe_alert( + "ram".into(), + format!( + "RAM a {}% (seuil: {}%)", + metrics.ram.percent, metrics.ram.threshold + ), + metrics.ram.percent, + metrics.ram.threshold, + "threshold".into(), + ); + } + for disk in &metrics.disks { + if disk.status == "critical" { + maybe_alert( + format!("disk_{}", disk.drive), + format!( + "Disque {} a {}% (seuil: {}%)", + disk.drive, disk.percent, disk.threshold + ), + disk.percent, + disk.threshold, + "threshold".into(), + ); + } + } + for proc in &metrics.processes { + if !proc.enabled { + continue; + } + if proc.alert_on_down && !proc.running { + maybe_alert( + format!("process_down_{}", proc.name), + format!( + "Processus '{}' non detecte (pattern: {})", + proc.name, proc.pattern + ), + 0.0, + 0.0, + "process_down".into(), + ); + } + if proc.memory_threshold_mb > 0.0 && proc.memory_status == "critical" { + maybe_alert( + format!("process_mem_{}", proc.name), + format!( + "Processus '{}' utilise {} Mo (seuil: {} Mo)", + proc.name, proc.total_memory_mb, proc.memory_threshold_mb + ), + proc.total_memory_mb, + proc.memory_threshold_mb, + "threshold".into(), + ); + } + } + } + + for (key, message, value, threshold, alert_type) in to_alert { + let alert = Alert { + timestamp: Local::now().to_rfc3339(), + alert_type: alert_type.clone(), + key, + message: message.clone(), + value, + threshold, + hostname: hostname.clone(), + }; + { + let cm = self.config_manager.lock().await; + cm.save_alert(alert); + let subject = format!("[ALERTE] {} - {}", hostname, message); + let body = format!( + "Alerte de supervision\n{sep}\n\nServeur : {host}\nDate : {date}\nType : {typ}\n\nMessage : {msg}\n\n{sep}\nSupervision - Monitoring automatique", + sep = "=".repeat(40), + host = hostname, + date = Local::now().to_rfc3339(), + typ = alert_type, + msg = message + ); + self.alerter.send(&cm.config.smtp, &subject, &body).await; + } + } + } + + pub async fn start(self: Arc) { + self.running + .store(true, std::sync::atomic::Ordering::Relaxed); + let monitor = self.clone(); + tokio::spawn(async move { + loop { + if !monitor + .running + .load(std::sync::atomic::Ordering::Relaxed) + { + break; + } + let metrics = monitor.collect().await; + { + let mut m = monitor.metrics.write().unwrap(); + *m = Some(metrics.clone()); + } + monitor.check_and_alert(&metrics).await; + + let interval = { + let cm = monitor.config_manager.lock().await; + cm.config.check_interval_minutes + }; + tokio::time::sleep(StdDuration::from_secs(interval * 60)).await; + } + }); + } + + pub fn stop(&self) { + self.running + .store(false, std::sync::atomic::Ordering::Relaxed); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn eval_status_ok_below_80_percent() { + assert_eq!(eval_status(70.0, 90.0), "ok"); + } + + #[test] + fn eval_status_warning_at_80_percent_of_threshold() { + assert_eq!(eval_status(72.0, 90.0), "warning"); // 72/90 = 0.8 + } + + #[test] + fn eval_status_critical_at_threshold() { + assert_eq!(eval_status(90.0, 90.0), "critical"); + } + + #[test] + fn eval_status_critical_above_threshold() { + assert_eq!(eval_status(95.0, 90.0), "critical"); + } + + #[test] + fn eval_status_ok_with_zero_threshold() { + assert_eq!(eval_status(50.0, 0.0), "ok"); + } +}