feat: monitor module sysinfo + evaluation seuils

This commit is contained in:
oussi
2026-04-07 11:31:20 +02:00
parent 0e3b1b1b4e
commit b59edc65c2

View File

@@ -1 +1,450 @@
// Monitor module — Task 4 use crate::alerter::Alerter;
use crate::config::{Alert, ConfigManager, ProcessConfig};
use chrono::{Duration, Local};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::{Arc, Mutex, RwLock};
use std::time::Duration as StdDuration;
use sysinfo::{Disks, System};
use tokio::sync::Mutex as AsyncMutex;
pub fn eval_status(value: f64, threshold: f64) -> &'static str {
if threshold <= 0.0 {
return "ok";
}
let ratio = value / threshold;
if ratio >= 1.0 {
"critical"
} else if ratio >= 0.80 {
"warning"
} else {
"ok"
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CpuMetrics {
pub percent: f64,
pub cores: usize,
pub threshold: f64,
pub status: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RamMetrics {
pub percent: f64,
pub total_gb: f64,
pub used_gb: f64,
pub available_gb: f64,
pub threshold: f64,
pub status: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DiskMetrics {
pub drive: String,
pub mountpoint: String,
pub percent: f64,
pub total_gb: f64,
pub used_gb: f64,
pub free_gb: f64,
pub threshold: f64,
pub status: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ProcessMetrics {
pub name: String,
pub pattern: String,
pub running: bool,
pub enabled: bool,
pub alert_on_down: bool,
pub instance_count: usize,
pub total_memory_mb: f64,
pub total_cpu_percent: f64,
pub memory_threshold_mb: f64,
pub memory_status: String,
pub pids: Vec<u32>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Metrics {
pub timestamp: String,
pub hostname: String,
pub os: String,
pub cpu: CpuMetrics,
pub ram: RamMetrics,
pub disks: Vec<DiskMetrics>,
pub processes: Vec<ProcessMetrics>,
pub uptime: String,
pub boot_time: String,
pub monitoring_active: bool,
pub last_check: String,
pub next_check: String,
}
pub struct SystemMonitor {
config_manager: Arc<AsyncMutex<ConfigManager>>,
alerter: Arc<Alerter>,
pub metrics: Arc<RwLock<Option<Metrics>>>,
pub running: Arc<std::sync::atomic::AtomicBool>,
last_alerts: Arc<Mutex<HashMap<String, chrono::DateTime<Local>>>>,
}
impl SystemMonitor {
pub fn new(
config_manager: Arc<AsyncMutex<ConfigManager>>,
alerter: Arc<Alerter>,
) -> Self {
SystemMonitor {
config_manager,
alerter,
metrics: Arc::new(RwLock::new(None)),
running: Arc::new(std::sync::atomic::AtomicBool::new(false)),
last_alerts: Arc::new(Mutex::new(HashMap::new())),
}
}
pub async fn collect(&self) -> Metrics {
let config = {
let cm = self.config_manager.lock().await;
cm.config.clone()
};
let mut sys = System::new_all();
// Deux mesures pour CPU précis
std::thread::sleep(StdDuration::from_millis(500));
sys.refresh_all();
let cpu_percent = sys.global_cpu_usage() as f64;
let cpu_status = eval_status(cpu_percent, config.thresholds.cpu_percent).to_string();
let ram_total = sys.total_memory() as f64;
let ram_used = sys.used_memory() as f64;
let ram_available = sys.available_memory() as f64;
let ram_percent = if ram_total > 0.0 {
ram_used / ram_total * 100.0
} else {
0.0
};
let ram_status = eval_status(ram_percent, config.thresholds.ram_percent).to_string();
let mut disks = Vec::new();
let disk_list = Disks::new_with_refreshed_list();
let ignored_fs = ["squashfs", "tmpfs", "devtmpfs", "overlay", "iso9660"];
for disk in &disk_list {
let fs = disk.file_system().to_string_lossy().to_lowercase();
if ignored_fs.iter().any(|&f| fs.contains(f)) {
continue;
}
let total = disk.total_space() as f64;
if total < 1_073_741_824.0 {
continue; // < 1 GB
}
let available = disk.available_space() as f64;
let used = total - available;
let percent = (used / total * 1000.0).round() / 10.0;
let status = eval_status(percent, config.thresholds.disk_percent).to_string();
disks.push(DiskMetrics {
drive: disk
.name()
.to_string_lossy()
.trim_end_matches('\\')
.to_string(),
mountpoint: disk.mount_point().to_string_lossy().to_string(),
percent,
total_gb: (total / 1_073_741_824.0 * 10.0).round() / 10.0,
used_gb: (used / 1_073_741_824.0 * 10.0).round() / 10.0,
free_gb: (available / 1_073_741_824.0 * 10.0).round() / 10.0,
threshold: config.thresholds.disk_percent,
status,
});
}
let processes = self.check_processes(&sys, &config.processes);
let boot_time_unix = System::boot_time();
let now_unix = Local::now().timestamp() as u64;
let uptime_secs = now_unix.saturating_sub(boot_time_unix);
let uptime = format!(
"{}:{:02}:{:02}",
uptime_secs / 3600,
(uptime_secs % 3600) / 60,
uptime_secs % 60
);
let now = Local::now();
let interval = config.check_interval_minutes;
Metrics {
timestamp: now.to_rfc3339(),
hostname: System::host_name().unwrap_or_else(|| "inconnu".into()),
os: format!(
"{} {}",
System::name().unwrap_or_default(),
System::os_version().unwrap_or_default()
),
cpu: CpuMetrics {
percent: (cpu_percent * 10.0).round() / 10.0,
cores: sys.cpus().len(),
threshold: config.thresholds.cpu_percent,
status: cpu_status,
},
ram: RamMetrics {
percent: (ram_percent * 10.0).round() / 10.0,
total_gb: (ram_total / 1_073_741_824.0 * 10.0).round() / 10.0,
used_gb: (ram_used / 1_073_741_824.0 * 10.0).round() / 10.0,
available_gb: (ram_available / 1_073_741_824.0 * 10.0).round() / 10.0,
threshold: config.thresholds.ram_percent,
status: ram_status,
},
disks,
processes,
uptime,
boot_time: chrono::DateTime::from_timestamp(boot_time_unix as i64, 0)
.map(|dt: chrono::DateTime<chrono::Utc>| dt.to_rfc3339())
.unwrap_or_default(),
monitoring_active: self
.running
.load(std::sync::atomic::Ordering::Relaxed),
last_check: now.to_rfc3339(),
next_check: (now + Duration::minutes(interval as i64)).to_rfc3339(),
}
}
fn check_processes(
&self,
sys: &System,
process_configs: &[ProcessConfig],
) -> Vec<ProcessMetrics> {
let mut results = Vec::new();
for pc in process_configs {
let pattern = pc.pattern.to_lowercase();
let mut found_pids: Vec<u32> = Vec::new();
let mut total_mem: f64 = 0.0;
let mut total_cpu: f64 = 0.0;
if pc.enabled {
for (pid, proc) in sys.processes() {
let name = proc.name().to_string_lossy().to_lowercase();
let cmd = proc
.cmd()
.iter()
.map(|s| s.to_string_lossy().to_lowercase())
.collect::<Vec<_>>()
.join(" ");
if name.contains(&pattern) || cmd.contains(&pattern) {
found_pids.push(pid.as_u32());
total_mem += proc.memory() as f64 / 1_048_576.0;
total_cpu += proc.cpu_usage() as f64;
}
}
}
let mem_status = if pc.memory_threshold_mb > 0.0 && total_mem > 0.0 {
eval_status(total_mem, pc.memory_threshold_mb).to_string()
} else {
"ok".to_string()
};
results.push(ProcessMetrics {
name: pc.name.clone(),
pattern: pc.pattern.clone(),
running: !found_pids.is_empty(),
enabled: pc.enabled,
alert_on_down: pc.alert_on_down,
instance_count: found_pids.len(),
total_memory_mb: (total_mem * 10.0).round() / 10.0,
total_cpu_percent: (total_cpu * 10.0).round() / 10.0,
memory_threshold_mb: pc.memory_threshold_mb,
memory_status: mem_status,
pids: found_pids,
});
}
results
}
pub async fn check_and_alert(&self, metrics: &Metrics) {
let cooldown = {
let cm = self.config_manager.lock().await;
cm.config.alert_cooldown_minutes
};
let hostname = metrics.hostname.clone();
let mut to_alert: Vec<(String, String, f64, f64, String)> = Vec::new();
{
let mut last = self.last_alerts.lock().unwrap();
let now = Local::now();
let mut maybe_alert =
|key: String, msg: String, val: f64, thr: f64, typ: String| {
let should = match last.get(&key) {
Some(t) => (now - *t) >= Duration::minutes(cooldown as i64),
None => true,
};
if should {
last.insert(key.clone(), now);
to_alert.push((key, msg, val, thr, typ));
}
};
if metrics.cpu.status == "critical" {
maybe_alert(
"cpu".into(),
format!(
"CPU a {}% (seuil: {}%)",
metrics.cpu.percent, metrics.cpu.threshold
),
metrics.cpu.percent,
metrics.cpu.threshold,
"threshold".into(),
);
}
if metrics.ram.status == "critical" {
maybe_alert(
"ram".into(),
format!(
"RAM a {}% (seuil: {}%)",
metrics.ram.percent, metrics.ram.threshold
),
metrics.ram.percent,
metrics.ram.threshold,
"threshold".into(),
);
}
for disk in &metrics.disks {
if disk.status == "critical" {
maybe_alert(
format!("disk_{}", disk.drive),
format!(
"Disque {} a {}% (seuil: {}%)",
disk.drive, disk.percent, disk.threshold
),
disk.percent,
disk.threshold,
"threshold".into(),
);
}
}
for proc in &metrics.processes {
if !proc.enabled {
continue;
}
if proc.alert_on_down && !proc.running {
maybe_alert(
format!("process_down_{}", proc.name),
format!(
"Processus '{}' non detecte (pattern: {})",
proc.name, proc.pattern
),
0.0,
0.0,
"process_down".into(),
);
}
if proc.memory_threshold_mb > 0.0 && proc.memory_status == "critical" {
maybe_alert(
format!("process_mem_{}", proc.name),
format!(
"Processus '{}' utilise {} Mo (seuil: {} Mo)",
proc.name, proc.total_memory_mb, proc.memory_threshold_mb
),
proc.total_memory_mb,
proc.memory_threshold_mb,
"threshold".into(),
);
}
}
}
for (key, message, value, threshold, alert_type) in to_alert {
let alert = Alert {
timestamp: Local::now().to_rfc3339(),
alert_type: alert_type.clone(),
key,
message: message.clone(),
value,
threshold,
hostname: hostname.clone(),
};
{
let cm = self.config_manager.lock().await;
cm.save_alert(alert);
let subject = format!("[ALERTE] {} - {}", hostname, message);
let body = format!(
"Alerte de supervision\n{sep}\n\nServeur : {host}\nDate : {date}\nType : {typ}\n\nMessage : {msg}\n\n{sep}\nSupervision - Monitoring automatique",
sep = "=".repeat(40),
host = hostname,
date = Local::now().to_rfc3339(),
typ = alert_type,
msg = message
);
self.alerter.send(&cm.config.smtp, &subject, &body).await;
}
}
}
pub async fn start(self: Arc<Self>) {
self.running
.store(true, std::sync::atomic::Ordering::Relaxed);
let monitor = self.clone();
tokio::spawn(async move {
loop {
if !monitor
.running
.load(std::sync::atomic::Ordering::Relaxed)
{
break;
}
let metrics = monitor.collect().await;
{
let mut m = monitor.metrics.write().unwrap();
*m = Some(metrics.clone());
}
monitor.check_and_alert(&metrics).await;
let interval = {
let cm = monitor.config_manager.lock().await;
cm.config.check_interval_minutes
};
tokio::time::sleep(StdDuration::from_secs(interval * 60)).await;
}
});
}
pub fn stop(&self) {
self.running
.store(false, std::sync::atomic::Ordering::Relaxed);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn eval_status_ok_below_80_percent() {
assert_eq!(eval_status(70.0, 90.0), "ok");
}
#[test]
fn eval_status_warning_at_80_percent_of_threshold() {
assert_eq!(eval_status(72.0, 90.0), "warning"); // 72/90 = 0.8
}
#[test]
fn eval_status_critical_at_threshold() {
assert_eq!(eval_status(90.0, 90.0), "critical");
}
#[test]
fn eval_status_critical_above_threshold() {
assert_eq!(eval_status(95.0, 90.0), "critical");
}
#[test]
fn eval_status_ok_with_zero_threshold() {
assert_eq!(eval_status(50.0, 0.0), "ok");
}
}