feat: architecture multi-modèles LLM + externalisation des prompts

- Ajout OLLAMA_MODELS (coding/cpam/validation/qc) dans config.py avec get_model() - Paramètre role= dans call_ollama() pour dispatch par rôle - Cache Ollama : modèle stocké par entrée (migration auto de l'ancien format) - 7 prompts externalisés dans src/prompts/templates.py (format str.format) - Viewer : admin multi-modèles, endpoint PDF avec redaction, source texte - Documentation prompts dans docs/prompts.md Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-19 20:51:52 +01:00
parent 5c8c2817ec
commit 540e0cb400
17 changed files with 1221 additions and 353 deletions
--- a/src/medical/ollama_cache.py
+++ b/src/medical/ollama_cache.py
@@ -14,32 +14,58 @@ class OllamaCache:
    """Cache JSON persistant pour éviter les appels Ollama redondants.

    Clé = (texte_diagnostic_normalisé, type).
-    Le modèle Ollama est stocké dans les métadonnées : si le modèle change,
-    le cache est automatiquement invalidé.
+    Le modèle Ollama est stocké PAR ENTRÉE : un cache hit ne se produit
+    que si le modèle correspond à celui demandé.
+
+    Backward compat : si le fichier contient l'ancien format (modèle global),
+    les entrées sont migrées à la lecture avec le modèle global comme modèle
+    par entrée.
    """

-    def __init__(self, cache_path: Path, model: str):
+    def __init__(self, cache_path: Path, model: str | None = None):
        self._path = cache_path
-        self._model = model
+        self._default_model = model
        self._lock = threading.Lock()
        self._data: dict[str, dict] = {}
        self._dirty = False
        self._load()

    def _load(self) -> None:
-        """Charge le cache depuis le disque."""
+        """Charge le cache depuis le disque (avec migration ancien format)."""
        if not self._path.exists():
            logger.info("Cache Ollama : nouveau cache (%s)", self._path)
            return
        try:
            raw = json.loads(self._path.read_text(encoding="utf-8"))
-            if raw.get("model") != self._model:
-                logger.info(
-                    "Cache Ollama : modèle changé (%s → %s), cache invalidé",
-                    raw.get("model"), self._model,
-                )
+
+            # Détection ancien format : clé "model" globale + "entries" sans "model" par entrée
+            global_model = raw.get("model")
+            entries = raw.get("entries", {})
+
+            if not entries:
                return
-            self._data = raw.get("entries", {})
+
+            # Vérifier si c'est l'ancien format (entrées sans clé "model")
+            sample_entry = next(iter(entries.values()), None)
+            is_old_format = sample_entry is not None and "model" not in sample_entry
+
+            if is_old_format:
+                if global_model:
+                    # Migrer : injecter le modèle global dans chaque entrée
+                    logger.info(
+                        "Cache Ollama : migration ancien format → modèle par entrée (%s, %d entrées)",
+                        global_model, len(entries),
+                    )
+                    for key, value in entries.items():
+                        self._data[key] = {"model": global_model, "result": value}
+                    self._dirty = True  # Réécrire au prochain save()
+                else:
+                    logger.warning("Cache Ollama : ancien format sans modèle global, cache ignoré")
+                    return
+            else:
+                # Nouveau format : chaque entrée a déjà {"model": ..., "result": ...}
+                self._data = entries
+
            logger.info("Cache Ollama : %d entrées chargées", len(self._data))
        except (json.JSONDecodeError, KeyError) as e:
            logger.warning("Cache Ollama : fichier corrompu (%s), réinitialisé", e)
@@ -50,17 +76,24 @@ class OllamaCache:
        """Construit une clé normalisée."""
        return f"{diag_type}::{texte.strip().lower()}"

-    def get(self, texte: str, diag_type: str) -> dict | None:
-        """Récupère un résultat caché, ou None si absent."""
+    def get(self, texte: str, diag_type: str, model: str | None = None) -> dict | None:
+        """Récupère un résultat caché, ou None si absent ou modèle différent."""
        key = self._make_key(texte, diag_type)
+        use_model = model or self._default_model
        with self._lock:
-            return self._data.get(key)
+            entry = self._data.get(key)
+            if entry is None:
+                return None
+            if use_model and entry.get("model") != use_model:
+                return None
+            return entry.get("result")

-    def put(self, texte: str, diag_type: str, result: dict) -> None:
-        """Stocke un résultat dans le cache."""
+    def put(self, texte: str, diag_type: str, result: dict, model: str | None = None) -> None:
+        """Stocke un résultat dans le cache avec le modèle associé."""
        key = self._make_key(texte, diag_type)
+        use_model = model or self._default_model or "unknown"
        with self._lock:
-            self._data[key] = result
+            self._data[key] = {"model": use_model, "result": result}
            self._dirty = True

    def save(self) -> None:
@@ -69,10 +102,7 @@ class OllamaCache:
            if not self._dirty:
                return
            self._path.parent.mkdir(parents=True, exist_ok=True)
-            payload = {
-                "model": self._model,
-                "entries": self._data,
-            }
+            payload = {"entries": self._data}
            self._path.write_text(
                json.dumps(payload, ensure_ascii=False, indent=2),
                encoding="utf-8",