feat(security): eval()→AST parseur + pickle→JSON+HMAC signé

Vulnérabilité 1 — eval() dans DAG executor :
- Nouveau module safe_condition_evaluator.py
- Parseur AST avec whitelist (Constants, Names, Compare, BoolOp, BinOp)
- Rejet explicite Call/Lambda/Import/__dunder__/walrus/comprehensions
- Expression non sûre → logged ERROR + évaluée à False (pas de crash)
- 31 tests (12 valides, 17 malveillantes rejetées, 2 intégration)

Vulnérabilité 2 — 3× pickle.load() non sécurisés :
- Nouveau module signed_serializer.py (JSON+HMAC-SHA256)
- Format : RPA_SIGNED_V1\\n + JSON(hmac + payload base64)
- Migration automatique transparente au premier chargement
- Fallback pickle avec WARNING (désactivable RPA_ALLOW_PICKLE_FALLBACK=0)
- Remplacement dans faiss_manager, visual_embedding_manager,
  visual_persistence_manager
- 13 tests

Clé signature : RPA_SIGNING_KEY (fallback TOKEN_SECRET_KEY puis hostname-derived).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Dom
2026-04-14 16:49:17 +02:00
parent 93ef93e563
commit 36737cfe9d
8 changed files with 1110 additions and 50 deletions

View File

@@ -26,11 +26,15 @@ from PIL import Image
import logging
import threading
from concurrent.futures import ThreadPoolExecutor
import pickle
import os
from core.models import BBox
from core.embedding.fusion_engine import FusionEngine
from core.security.signed_serializer import (
SignatureVerificationError,
load_signed,
save_signed,
)
logger = logging.getLogger(__name__)
@@ -521,42 +525,90 @@ class VisualEmbeddingManager:
logger.debug(f"Éviction de {num_to_remove} entrées du cache")
def _entry_to_dict(self, entry: "EmbeddingCacheEntry") -> Dict[str, Any]:
"""Convertit une entrée du cache en dict JSON-serialisable."""
return {
"embedding": entry.embedding, # numpy → encodé par signed_serializer
"signature": entry.signature,
"created_at": entry.created_at,
"access_count": entry.access_count,
"last_accessed": entry.last_accessed,
}
def _dict_to_entry(self, data: Any) -> Optional["EmbeddingCacheEntry"]:
"""Reconstruit une EmbeddingCacheEntry depuis un dict (format JSON)
ou depuis un objet déjà typé (fallback pickle legacy).
Retourne None si la donnée n'est pas exploitable.
"""
if isinstance(data, EmbeddingCacheEntry):
return data
if not isinstance(data, dict):
return None
try:
return EmbeddingCacheEntry(
embedding=np.asarray(data["embedding"]),
signature=data["signature"],
created_at=data["created_at"],
access_count=int(data.get("access_count", 0)),
last_accessed=data.get("last_accessed"),
)
except (KeyError, TypeError, ValueError) as exc:
logger.warning(f"Entrée de cache invalide ignorée: {exc}")
return None
def _load_persistent_cache(self):
"""Charge le cache persistant depuis le disque"""
"""Charge le cache persistant depuis le disque (JSON signé HMAC,
fallback pickle legacy avec migration automatique)."""
if not self.cache_persistence_path or not os.path.exists(self.cache_persistence_path):
return
try:
with open(self.cache_persistence_path, 'rb') as f:
cached_data = pickle.load(f)
# Filtrer les entrées trop anciennes (plus de 24h)
cutoff_time = datetime.now() - timedelta(hours=24)
for signature, entry in cached_data.items():
if entry.created_at > cutoff_time:
self._embedding_cache[signature] = entry
logger.info(f"Cache persistant chargé: {len(self._embedding_cache)} entrées")
cached_data = load_signed(self.cache_persistence_path)
except SignatureVerificationError:
logger.error(
"Cache persistant %s altéré (HMAC invalide) — ignoré.",
self.cache_persistence_path,
)
return
except Exception as e:
logger.warning(f"Erreur lors du chargement du cache persistant: {e}")
return
if not isinstance(cached_data, dict):
logger.warning("Format de cache inattendu — ignoré.")
return
# Filtrer les entrées trop anciennes (plus de 24h)
cutoff_time = datetime.now() - timedelta(hours=24)
loaded = 0
for signature, raw in cached_data.items():
entry = self._dict_to_entry(raw)
if entry is None:
continue
if entry.created_at > cutoff_time:
self._embedding_cache[signature] = entry
loaded += 1
logger.info(f"Cache persistant chargé: {loaded} entrées")
def _save_persistent_cache(self):
"""Sauvegarde le cache sur disque"""
"""Sauvegarde le cache sur disque en JSON signé HMAC."""
if not self.cache_persistence_path:
return
try:
# Créer le répertoire si nécessaire
os.makedirs(os.path.dirname(self.cache_persistence_path), exist_ok=True)
with self._cache_lock:
with open(self.cache_persistence_path, 'wb') as f:
pickle.dump(dict(self._embedding_cache), f)
serializable = {
signature: self._entry_to_dict(entry)
for signature, entry in self._embedding_cache.items()
}
save_signed(self.cache_persistence_path, serializable)
logger.debug("Cache persistant sauvegardé")
except Exception as e:
logger.warning(f"Erreur lors de la sauvegarde du cache: {e}")

View File

@@ -14,8 +14,9 @@ import asyncio
import logging
import json
import base64
import pickle
import gzip
import pickle # noqa: S403 - usage legacy restreint au fallback de migration
import io
from typing import Dict, List, Optional, Any, Tuple
from dataclasses import dataclass, asdict
from datetime import datetime
@@ -24,6 +25,12 @@ import numpy as np
from core.visual.visual_target_manager import VisualTarget, VisualTargetManager
from core.visual.screenshot_validation_manager import ScreenshotValidationManager, ValidationResult
from core.security.signed_serializer import (
SignatureVerificationError,
UnsupportedFormatError,
dumps_signed,
loads_signed,
)
logger = logging.getLogger(__name__)
@@ -435,19 +442,19 @@ class VisualPersistenceManager:
return None
async def _serialize_workflow_data(self, workflow_data: VisualWorkflowData) -> bytes:
"""Sérialise les données d'un workflow"""
"""Sérialise les données d'un workflow en JSON signé HMAC."""
# Convertir en dictionnaire
data_dict = asdict(workflow_data)
# Traiter les types spéciaux
data_dict['created_at'] = workflow_data.created_at.isoformat()
# Sérialiser les cibles visuelles
serialized_targets = {}
for signature, target in workflow_data.visual_targets.items():
serialized_targets[signature] = await self._serialize_visual_target(target)
data_dict['visual_targets'] = serialized_targets
# Sérialiser l'historique de validation
serialized_history = {}
for signature, history in workflow_data.validation_history.items():
@@ -455,15 +462,30 @@ class VisualPersistenceManager:
self._serialize_validation_result(result) for result in history
]
data_dict['validation_history'] = serialized_history
# Convertir en bytes
return pickle.dumps(data_dict)
# JSON signé HMAC (cf. core.security.signed_serializer)
return dumps_signed(data_dict)
async def _deserialize_workflow_data(self, data: bytes) -> VisualWorkflowData:
"""Désérialise les données d'un workflow"""
# Désérialiser le dictionnaire
data_dict = pickle.loads(data)
"""Désérialise les données d'un workflow (JSON signé HMAC ;
fallback pickle legacy avec WARNING pour migrer les anciens fichiers)."""
try:
data_dict = loads_signed(data)
except SignatureVerificationError:
# Fichier altéré ou clé différente : on refuse sans fallback.
logger.error("Workflow visuel : signature HMAC invalide — refus.")
raise
except UnsupportedFormatError:
# Ancien format pickle : fallback explicite et bruyant.
import os
if os.getenv("RPA_ALLOW_PICKLE_FALLBACK", "1") == "0":
raise
logger.warning(
"Workflow visuel au format pickle legacy — lecture de compat, "
"ré-écrire en JSON signé dès que possible."
)
data_dict = pickle.loads(data) # noqa: S301 - fallback legacy
# Reconstruire les objets
workflow_data = VisualWorkflowData(
workflow_id=data_dict['workflow_id'],