feat: architecture multi-modèles LLM + quality engine + benchmark

- Multi-modèles : 4 rôles LLM (coding=gemma3:27b-cloud, cpam=gemma3:27b-cloud, validation=deepseek-v3.2:cloud, qc=gemma3:12b) avec get_model(role) - Prompts externalisés : 7 templates dans src/prompts/templates.py - Cache Ollama : modèle stocké par entrée (migration auto ancien format) - call_ollama() : paramètre role= (priorité: model > role > global) - Quality engine : veto_engine + decision_engine + rules_router (YAML) - Benchmark qualité : scripts/benchmark_quality.py (A/B, métriques CIM-10) - Fix biologie : valeurs qualitatives (troponine négative) non filtrées - Fix CPAM : gemma3:27b-cloud au lieu de deepseek (JSON tronqué par thinking) - CPAM max_tokens 4000→6000, viewer admin multi-modèles - Benchmark 10 dossiers : 100% DAS valides, 10/10 CPAM, 243s/dossier Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-20 00:21:09 +01:00
parent 5c8c2817ec
commit 909e051cc9
39 changed files with 5092 additions and 574 deletions
--- a/src/medical/ollama_cache.py
+++ b/src/medical/ollama_cache.py
@@ -14,53 +14,79 @@ class OllamaCache:
    """Cache JSON persistant pour éviter les appels Ollama redondants.

    Clé = (texte_diagnostic_normalisé, type).
-    Le modèle Ollama est stocké dans les métadonnées : si le modèle change,
-    le cache est automatiquement invalidé.
+    Le modèle Ollama est stocké PAR ENTRÉE : si le modèle change pour un rôle,
+    seules les entrées de cet ancien modèle sont invalides.
+
+    Migration automatique depuis l'ancien format (model global) au chargement.
    """

-    def __init__(self, cache_path: Path, model: str):
+    def __init__(self, cache_path: Path, model: str | None = None):
        self._path = cache_path
-        self._model = model
+        self._default_model = model
        self._lock = threading.Lock()
        self._data: dict[str, dict] = {}
        self._dirty = False
        self._load()

    def _load(self) -> None:
-        """Charge le cache depuis le disque."""
+        """Charge le cache depuis le disque, avec migration automatique."""
        if not self._path.exists():
            logger.info("Cache Ollama : nouveau cache (%s)", self._path)
            return
        try:
            raw = json.loads(self._path.read_text(encoding="utf-8"))
-            if raw.get("model") != self._model:
-                logger.info(
-                    "Cache Ollama : modèle changé (%s → %s), cache invalidé",
-                    raw.get("model"), self._model,
-                )
-                return
-            self._data = raw.get("entries", {})
-            logger.info("Cache Ollama : %d entrées chargées", len(self._data))
        except (json.JSONDecodeError, KeyError) as e:
            logger.warning("Cache Ollama : fichier corrompu (%s), réinitialisé", e)
            self._data = {}
+            return
+
+        entries = raw.get("entries", {})
+
+        # Détection ancien format : {"model": "...", "entries": {k: result_dict_sans_model}}
+        global_model = raw.get("model")
+        if global_model and entries:
+            first_val = next(iter(entries.values()), None)
+            if isinstance(first_val, dict) and "model" not in first_val:
+                # Migration : ancien format → nouveau (modèle par entrée)
+                logger.info(
+                    "Cache Ollama : migration ancien format (model=%s) → modèle par entrée",
+                    global_model,
+                )
+                migrated: dict[str, dict] = {}
+                for k, v in entries.items():
+                    if isinstance(v, dict):
+                        migrated[k] = {"model": global_model, "result": v}
+                self._data = migrated
+                self._dirty = True
+                logger.info("Cache Ollama : %d entrées migrées", len(migrated))
+                return
+
+        self._data = entries
+        logger.info("Cache Ollama : %d entrées chargées", len(self._data))

    @staticmethod
    def _make_key(texte: str, diag_type: str) -> str:
        """Construit une clé normalisée."""
        return f"{diag_type}::{texte.strip().lower()}"

-    def get(self, texte: str, diag_type: str) -> dict | None:
-        """Récupère un résultat caché, ou None si absent."""
+    def get(self, texte: str, diag_type: str, model: str | None = None) -> dict | None:
+        """Récupère un résultat caché, ou None si absent ou modèle différent."""
        key = self._make_key(texte, diag_type)
+        use_model = model or self._default_model
        with self._lock:
-            return self._data.get(key)
+            entry = self._data.get(key)
+            if entry is None:
+                return None
+            if use_model and entry.get("model") != use_model:
+                return None
+            return entry.get("result")

-    def put(self, texte: str, diag_type: str, result: dict) -> None:
-        """Stocke un résultat dans le cache."""
+    def put(self, texte: str, diag_type: str, result: dict, model: str | None = None) -> None:
+        """Stocke un résultat dans le cache avec le modèle utilisé."""
        key = self._make_key(texte, diag_type)
+        use_model = model or self._default_model
        with self._lock:
-            self._data[key] = result
+            self._data[key] = {"model": use_model, "result": result}
            self._dirty = True

    def save(self) -> None:
@@ -69,10 +95,7 @@ class OllamaCache:
            if not self._dirty:
                return
            self._path.parent.mkdir(parents=True, exist_ok=True)
-            payload = {
-                "model": self._model,
-                "entries": self._data,
-            }
+            payload = {"entries": self._data}
            self._path.write_text(
                json.dumps(payload, ensure_ascii=False, indent=2),
                encoding="utf-8",