v1.0 - Version stable: multi-PC, détection UI-DETR-1, 3 modes exécution

- Frontend v4 accessible sur réseau local (192.168.1.40)
- Ports ouverts: 3002 (frontend), 5001 (backend), 5004 (dashboard)
- Ollama GPU fonctionnel
- Self-healing interactif
- Dashboard confiance

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Dom
2026-01-29 11:23:51 +01:00
parent 21bfa3b337
commit a27b74cf22
1595 changed files with 412691 additions and 400 deletions

View File

@@ -0,0 +1,9 @@
"""Storage components for analytics data."""
from .timeseries_store import TimeSeriesStore
from .archive_storage import ArchiveStorage
__all__ = [
'TimeSeriesStore',
'ArchiveStorage',
]

View File

@@ -0,0 +1,393 @@
"""Archive storage for old metrics with compression."""
import logging
import gzip
import json
import os
from typing import Dict, List, Optional, Any
from datetime import datetime, timedelta
from pathlib import Path
from dataclasses import dataclass
logger = logging.getLogger(__name__)
@dataclass
class RetentionPolicy:
"""Retention policy configuration."""
metric_type: str
hot_retention_days: int # Keep in main storage
archive_retention_days: int # Keep in archive
compression_enabled: bool = True
def to_dict(self) -> Dict:
"""Convert to dictionary."""
return {
'metric_type': self.metric_type,
'hot_retention_days': self.hot_retention_days,
'archive_retention_days': self.archive_retention_days,
'compression_enabled': self.compression_enabled
}
@classmethod
def from_dict(cls, data: Dict) -> 'RetentionPolicy':
"""Create from dictionary."""
return cls(**data)
class ArchiveStorage:
"""Archive storage for old metrics."""
def __init__(self, archive_dir: str = "data/analytics/archive"):
"""
Initialize archive storage.
Args:
archive_dir: Directory for archived data
"""
self.archive_dir = Path(archive_dir)
self.archive_dir.mkdir(parents=True, exist_ok=True)
logger.info(f"ArchiveStorage initialized: {archive_dir}")
def archive_metrics(
self,
metrics: List[Dict[str, Any]],
metric_type: str,
archive_date: datetime,
compress: bool = True
) -> str:
"""
Archive metrics to compressed storage.
Args:
metrics: List of metrics to archive
metric_type: Type of metrics
archive_date: Date for archive file
compress: Whether to compress
Returns:
Path to archive file
"""
# Create archive filename
date_str = archive_date.strftime('%Y%m%d')
filename = f"{metric_type}_{date_str}.json"
if compress:
filename += ".gz"
filepath = self.archive_dir / filename
# Serialize metrics
data = {
'metric_type': metric_type,
'archive_date': archive_date.isoformat(),
'count': len(metrics),
'metrics': metrics
}
json_data = json.dumps(data, indent=2)
# Write to file (compressed or not)
if compress:
with gzip.open(filepath, 'wt', encoding='utf-8') as f:
f.write(json_data)
else:
with open(filepath, 'w', encoding='utf-8') as f:
f.write(json_data)
logger.info(f"Archived {len(metrics)} {metric_type} metrics to {filepath}")
return str(filepath)
def query_archive(
self,
metric_type: str,
start_date: datetime,
end_date: datetime,
filters: Optional[Dict[str, Any]] = None
) -> List[Dict[str, Any]]:
"""
Query archived metrics.
Args:
metric_type: Type of metrics
start_date: Start date
end_date: End date
filters: Optional filters
Returns:
List of matching metrics
"""
results = []
# Iterate through date range
current_date = start_date
while current_date <= end_date:
date_str = current_date.strftime('%Y%m%d')
# Try both compressed and uncompressed
for ext in ['.json.gz', '.json']:
filename = f"{metric_type}_{date_str}{ext}"
filepath = self.archive_dir / filename
if filepath.exists():
metrics = self._read_archive_file(filepath)
# Apply filters
if filters:
metrics = self._apply_filters(metrics, filters)
results.extend(metrics)
break
current_date += timedelta(days=1)
logger.debug(f"Query returned {len(results)} archived metrics")
return results
def _read_archive_file(self, filepath: Path) -> List[Dict[str, Any]]:
"""Read archive file (compressed or not)."""
try:
if filepath.suffix == '.gz':
with gzip.open(filepath, 'rt', encoding='utf-8') as f:
data = json.load(f)
else:
with open(filepath, 'r', encoding='utf-8') as f:
data = json.load(f)
return data.get('metrics', [])
except Exception as e:
logger.error(f"Error reading archive {filepath}: {e}")
return []
def _apply_filters(
self,
metrics: List[Dict[str, Any]],
filters: Dict[str, Any]
) -> List[Dict[str, Any]]:
"""Apply filters to metrics."""
filtered = []
for metric in metrics:
match = True
for key, value in filters.items():
if metric.get(key) != value:
match = False
break
if match:
filtered.append(metric)
return filtered
def delete_archive(
self,
metric_type: str,
before_date: datetime
) -> int:
"""
Delete archived data before a date.
Args:
metric_type: Type of metrics
before_date: Delete archives before this date
Returns:
Number of files deleted
"""
deleted = 0
# Find matching archive files
pattern = f"{metric_type}_*.json*"
for filepath in self.archive_dir.glob(pattern):
# Extract date from filename
try:
date_str = filepath.stem.split('_')[1]
if filepath.suffix == '.gz':
date_str = date_str.replace('.json', '')
file_date = datetime.strptime(date_str, '%Y%m%d')
if file_date < before_date:
filepath.unlink()
deleted += 1
logger.info(f"Deleted archive: {filepath}")
except Exception as e:
logger.error(f"Error processing {filepath}: {e}")
return deleted
def get_archive_stats(self) -> Dict[str, Any]:
"""
Get archive storage statistics.
Returns:
Dictionary with archive stats
"""
stats = {
'total_files': 0,
'total_size_bytes': 0,
'by_metric_type': {},
'oldest_archive': None,
'newest_archive': None
}
for filepath in self.archive_dir.glob('*.json*'):
stats['total_files'] += 1
stats['total_size_bytes'] += filepath.stat().st_size
# Extract metric type
metric_type = filepath.stem.split('_')[0]
if metric_type not in stats['by_metric_type']:
stats['by_metric_type'][metric_type] = {
'count': 0,
'size_bytes': 0
}
stats['by_metric_type'][metric_type]['count'] += 1
stats['by_metric_type'][metric_type]['size_bytes'] += filepath.stat().st_size
# Track oldest/newest
mtime = datetime.fromtimestamp(filepath.stat().st_mtime)
if stats['oldest_archive'] is None or mtime < stats['oldest_archive']:
stats['oldest_archive'] = mtime
if stats['newest_archive'] is None or mtime > stats['newest_archive']:
stats['newest_archive'] = mtime
# Convert to ISO format
if stats['oldest_archive']:
stats['oldest_archive'] = stats['oldest_archive'].isoformat()
if stats['newest_archive']:
stats['newest_archive'] = stats['newest_archive'].isoformat()
return stats
class RetentionPolicyEngine:
"""Engine for applying retention policies."""
def __init__(
self,
archive_storage: ArchiveStorage,
policies: Optional[List[RetentionPolicy]] = None
):
"""
Initialize retention policy engine.
Args:
archive_storage: Archive storage instance
policies: List of retention policies
"""
self.archive = archive_storage
self.policies = policies or self._default_policies()
self.policy_file = Path("data/analytics/retention_policies.json")
self._load_policies()
logger.info("RetentionPolicyEngine initialized")
def _default_policies(self) -> List[RetentionPolicy]:
"""Get default retention policies."""
return [
RetentionPolicy(
metric_type='execution',
hot_retention_days=30,
archive_retention_days=365
),
RetentionPolicy(
metric_type='step',
hot_retention_days=7,
archive_retention_days=90
),
RetentionPolicy(
metric_type='resource',
hot_retention_days=7,
archive_retention_days=30
)
]
def _load_policies(self) -> None:
"""Load policies from file."""
if self.policy_file.exists():
try:
with open(self.policy_file, 'r') as f:
data = json.load(f)
self.policies = [RetentionPolicy.from_dict(p) for p in data]
logger.info(f"Loaded {len(self.policies)} retention policies")
except Exception as e:
logger.error(f"Error loading policies: {e}")
def save_policies(self) -> None:
"""Save policies to file."""
self.policy_file.parent.mkdir(parents=True, exist_ok=True)
with open(self.policy_file, 'w') as f:
json.dump([p.to_dict() for p in self.policies], f, indent=2)
logger.info("Retention policies saved")
def add_policy(self, policy: RetentionPolicy) -> None:
"""Add or update a retention policy."""
# Remove existing policy for same metric type
self.policies = [p for p in self.policies if p.metric_type != policy.metric_type]
self.policies.append(policy)
self.save_policies()
logger.info(f"Added policy for {policy.metric_type}")
def get_policy(self, metric_type: str) -> Optional[RetentionPolicy]:
"""Get policy for a metric type."""
for policy in self.policies:
if policy.metric_type == metric_type:
return policy
return None
def apply_policies(
self,
store, # TimeSeriesStore
dry_run: bool = False
) -> Dict[str, Any]:
"""
Apply retention policies to storage.
Args:
store: TimeSeriesStore instance
dry_run: If True, don't actually delete data
Returns:
Dictionary with application results
"""
results = {
'archived': {},
'deleted': {},
'errors': []
}
now = datetime.now()
for policy in self.policies:
try:
# Archive old hot data
hot_cutoff = now - timedelta(days=policy.hot_retention_days)
metrics_to_archive = store.query_range(
metric_type=policy.metric_type,
start_time=datetime.min,
end_time=hot_cutoff
)
if metrics_to_archive and not dry_run:
archive_path = self.archive.archive_metrics(
metrics=metrics_to_archive,
metric_type=policy.metric_type,
archive_date=hot_cutoff,
compress=policy.compression_enabled
)
results['archived'][policy.metric_type] = {
'count': len(metrics_to_archive),
'path': archive_path
}
# Delete old archived data
archive_cutoff = now - timedelta(days=policy.archive_retention_days)
if not dry_run:
deleted_count = self.archive.delete_archive(
metric_type=policy.metric_type,
before_date=archive_cutoff
)
results['deleted'][policy.metric_type] = deleted_count
except Exception as e:
error_msg = f"Error applying policy for {policy.metric_type}: {e}"
logger.error(error_msg)
results['errors'].append(error_msg)
return results

View File

@@ -0,0 +1,374 @@
"""Time-series storage for analytics metrics."""
import sqlite3
import json
import logging
from pathlib import Path
from typing import List, Dict, Any, Optional, Tuple
from datetime import datetime
from contextlib import contextmanager
from ..collection.metrics_collector import ExecutionMetrics, StepMetrics
from ..collection.resource_collector import ResourceMetrics
logger = logging.getLogger(__name__)
class TimeSeriesStore:
"""Store for time-series metrics data using SQLite."""
# Database schema
SCHEMA = """
-- Execution metrics table
CREATE TABLE IF NOT EXISTS execution_metrics (
execution_id TEXT PRIMARY KEY,
workflow_id TEXT NOT NULL,
started_at TIMESTAMP NOT NULL,
completed_at TIMESTAMP,
duration_ms REAL,
status TEXT NOT NULL,
steps_total INTEGER DEFAULT 0,
steps_completed INTEGER DEFAULT 0,
steps_failed INTEGER DEFAULT 0,
error_message TEXT,
context JSON
);
CREATE INDEX IF NOT EXISTS idx_workflow_time
ON execution_metrics(workflow_id, started_at);
CREATE INDEX IF NOT EXISTS idx_status
ON execution_metrics(status);
CREATE INDEX IF NOT EXISTS idx_started_at
ON execution_metrics(started_at);
-- Step metrics table
CREATE TABLE IF NOT EXISTS step_metrics (
step_id TEXT PRIMARY KEY,
execution_id TEXT NOT NULL,
workflow_id TEXT NOT NULL,
node_id TEXT NOT NULL,
action_type TEXT NOT NULL,
target_element TEXT,
started_at TIMESTAMP NOT NULL,
completed_at TIMESTAMP NOT NULL,
duration_ms REAL NOT NULL,
status TEXT NOT NULL,
confidence_score REAL,
retry_count INTEGER DEFAULT 0,
error_details TEXT,
FOREIGN KEY (execution_id) REFERENCES execution_metrics(execution_id)
);
CREATE INDEX IF NOT EXISTS idx_execution
ON step_metrics(execution_id);
CREATE INDEX IF NOT EXISTS idx_workflow_action
ON step_metrics(workflow_id, action_type);
CREATE INDEX IF NOT EXISTS idx_step_time
ON step_metrics(started_at);
-- Resource metrics table
CREATE TABLE IF NOT EXISTS resource_metrics (
id INTEGER PRIMARY KEY AUTOINCREMENT,
timestamp TIMESTAMP NOT NULL,
workflow_id TEXT,
execution_id TEXT,
cpu_percent REAL NOT NULL,
memory_mb REAL NOT NULL,
gpu_utilization REAL DEFAULT 0.0,
gpu_memory_mb REAL DEFAULT 0.0,
disk_io_mb REAL DEFAULT 0.0
);
CREATE INDEX IF NOT EXISTS idx_resource_time
ON resource_metrics(timestamp);
CREATE INDEX IF NOT EXISTS idx_resource_workflow
ON resource_metrics(workflow_id, timestamp);
"""
def __init__(self, storage_path: Path):
"""
Initialize time-series store.
Args:
storage_path: Path to storage directory
"""
self.storage_path = Path(storage_path)
self.storage_path.mkdir(parents=True, exist_ok=True)
self.db_path = self.storage_path / 'timeseries.db'
# Initialize database
self._init_database()
logger.info(f"TimeSeriesStore initialized at {self.db_path}")
def _init_database(self) -> None:
"""Initialize database schema."""
with self._get_connection() as conn:
conn.executescript(self.SCHEMA)
conn.commit()
@contextmanager
def _get_connection(self):
"""Get database connection context manager."""
conn = sqlite3.connect(str(self.db_path))
conn.row_factory = sqlite3.Row
try:
yield conn
finally:
conn.close()
def write_metrics(
self,
metrics: List[Any] # Union[ExecutionMetrics, StepMetrics, ResourceMetrics]
) -> None:
"""
Write metrics to time-series storage.
Args:
metrics: List of metrics to write
"""
if not metrics:
return
with self._get_connection() as conn:
for metric in metrics:
if isinstance(metric, ExecutionMetrics):
self._write_execution_metric(conn, metric)
elif isinstance(metric, StepMetrics):
self._write_step_metric(conn, metric)
elif isinstance(metric, ResourceMetrics):
self._write_resource_metric(conn, metric)
conn.commit()
logger.debug(f"Wrote {len(metrics)} metrics to storage")
def _write_execution_metric(self, conn: sqlite3.Connection, metric: ExecutionMetrics) -> None:
"""Write execution metric."""
conn.execute("""
INSERT OR REPLACE INTO execution_metrics
(execution_id, workflow_id, started_at, completed_at, duration_ms,
status, steps_total, steps_completed, steps_failed, error_message, context)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
metric.execution_id,
metric.workflow_id,
metric.started_at.isoformat(),
metric.completed_at.isoformat() if metric.completed_at else None,
metric.duration_ms,
metric.status,
metric.steps_total,
metric.steps_completed,
metric.steps_failed,
metric.error_message,
json.dumps(metric.context)
))
def _write_step_metric(self, conn: sqlite3.Connection, metric: StepMetrics) -> None:
"""Write step metric."""
conn.execute("""
INSERT OR REPLACE INTO step_metrics
(step_id, execution_id, workflow_id, node_id, action_type, target_element,
started_at, completed_at, duration_ms, status, confidence_score,
retry_count, error_details)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", (
metric.step_id,
metric.execution_id,
metric.workflow_id,
metric.node_id,
metric.action_type,
metric.target_element,
metric.started_at.isoformat(),
metric.completed_at.isoformat(),
metric.duration_ms,
metric.status,
metric.confidence_score,
metric.retry_count,
metric.error_details
))
def _write_resource_metric(self, conn: sqlite3.Connection, metric: ResourceMetrics) -> None:
"""Write resource metric."""
conn.execute("""
INSERT INTO resource_metrics
(timestamp, workflow_id, execution_id, cpu_percent, memory_mb,
gpu_utilization, gpu_memory_mb, disk_io_mb)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", (
metric.timestamp.isoformat(),
metric.workflow_id,
metric.execution_id,
metric.cpu_percent,
metric.memory_mb,
metric.gpu_utilization,
metric.gpu_memory_mb,
metric.disk_io_mb
))
def query_range(
self,
start_time: datetime,
end_time: datetime,
workflow_id: Optional[str] = None,
metric_types: Optional[List[str]] = None
) -> Dict[str, List[Dict]]:
"""
Query metrics within a time range.
Args:
start_time: Start of time range
end_time: End of time range
workflow_id: Optional workflow ID filter
metric_types: Optional list of metric types ('execution', 'step', 'resource')
Returns:
Dictionary with metric type as key and list of metrics as value
"""
results = {}
metric_types = metric_types or ['execution', 'step', 'resource']
with self._get_connection() as conn:
if 'execution' in metric_types:
results['execution'] = self._query_execution_metrics(
conn, start_time, end_time, workflow_id
)
if 'step' in metric_types:
results['step'] = self._query_step_metrics(
conn, start_time, end_time, workflow_id
)
if 'resource' in metric_types:
results['resource'] = self._query_resource_metrics(
conn, start_time, end_time, workflow_id
)
return results
def _query_execution_metrics(
self,
conn: sqlite3.Connection,
start_time: datetime,
end_time: datetime,
workflow_id: Optional[str]
) -> List[Dict]:
"""Query execution metrics."""
query = """
SELECT * FROM execution_metrics
WHERE started_at >= ? AND started_at <= ?
"""
params = [start_time.isoformat(), end_time.isoformat()]
if workflow_id:
query += " AND workflow_id = ?"
params.append(workflow_id)
query += " ORDER BY started_at"
cursor = conn.execute(query, params)
return [dict(row) for row in cursor.fetchall()]
def _query_step_metrics(
self,
conn: sqlite3.Connection,
start_time: datetime,
end_time: datetime,
workflow_id: Optional[str]
) -> List[Dict]:
"""Query step metrics."""
query = """
SELECT * FROM step_metrics
WHERE started_at >= ? AND started_at <= ?
"""
params = [start_time.isoformat(), end_time.isoformat()]
if workflow_id:
query += " AND workflow_id = ?"
params.append(workflow_id)
query += " ORDER BY started_at"
cursor = conn.execute(query, params)
return [dict(row) for row in cursor.fetchall()]
def _query_resource_metrics(
self,
conn: sqlite3.Connection,
start_time: datetime,
end_time: datetime,
workflow_id: Optional[str]
) -> List[Dict]:
"""Query resource metrics."""
query = """
SELECT * FROM resource_metrics
WHERE timestamp >= ? AND timestamp <= ?
"""
params = [start_time.isoformat(), end_time.isoformat()]
if workflow_id:
query += " AND workflow_id = ?"
params.append(workflow_id)
query += " ORDER BY timestamp"
cursor = conn.execute(query, params)
return [dict(row) for row in cursor.fetchall()]
def aggregate(
self,
metric: str,
aggregation: str, # 'avg', 'sum', 'count', 'min', 'max'
group_by: List[str],
start_time: datetime,
end_time: datetime,
filters: Optional[Dict] = None
) -> List[Dict]:
"""
Aggregate metrics with grouping.
Args:
metric: Metric field to aggregate
aggregation: Aggregation function
group_by: Fields to group by
start_time: Start of time range
end_time: End of time range
filters: Optional filters
Returns:
List of aggregated results
"""
# Determine table based on metric
if metric in ['duration_ms', 'steps_total', 'steps_completed', 'steps_failed']:
table = 'execution_metrics'
time_field = 'started_at'
elif metric in ['confidence_score', 'retry_count']:
table = 'step_metrics'
time_field = 'started_at'
elif metric in ['cpu_percent', 'memory_mb', 'gpu_utilization']:
table = 'resource_metrics'
time_field = 'timestamp'
else:
raise ValueError(f"Unknown metric: {metric}")
# Build query
agg_func = aggregation.upper()
group_fields = ', '.join(group_by)
query = f"""
SELECT {group_fields}, {agg_func}({metric}) as value
FROM {table}
WHERE {time_field} >= ? AND {time_field} <= ?
"""
params = [start_time.isoformat(), end_time.isoformat()]
# Add filters
if filters:
for key, value in filters.items():
query += f" AND {key} = ?"
params.append(value)
query += f" GROUP BY {group_fields}"
with self._get_connection() as conn:
cursor = conn.execute(query, params)
return [dict(row) for row in cursor.fetchall()]