feat: cache Ollama + parallélisation ThreadPool + filtrage DAS renforcé + modules GHM/CPAM/export RUM

- Cache persistant JSON thread-safe pour les résultats Ollama (invalidation par modèle) - Parallélisation des appels Ollama (ThreadPoolExecutor, 2 workers) - 6 nouvelles règles de filtrage DAS parasites (doublons, ponctuation, OCR, labo, fragments) - Client Ollama centralisé (mode JSON natif + retry) - Module GHM (estimation CMD/sévérité) - Module contrôle CPAM (parser + contre-argumentation RAG) - Export RUM (format RSS) - Viewer enrichi (détail dossier) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-12 13:44:34 +01:00
parent a00e5f1147
commit a58398f5d4
25 changed files with 2872 additions and 97 deletions
--- a/src/medical/ollama_cache.py
+++ b/src/medical/ollama_cache.py
@@ -0,0 +1,85 @@
+"""Cache persistant thread-safe pour les résultats Ollama."""
+
+from __future__ import annotations
+
+import json
+import logging
+import threading
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+
+class OllamaCache:
+    """Cache JSON persistant pour éviter les appels Ollama redondants.
+
+    Clé = (texte_diagnostic_normalisé, type).
+    Le modèle Ollama est stocké dans les métadonnées : si le modèle change,
+    le cache est automatiquement invalidé.
+    """
+
+    def __init__(self, cache_path: Path, model: str):
+        self._path = cache_path
+        self._model = model
+        self._lock = threading.Lock()
+        self._data: dict[str, dict] = {}
+        self._dirty = False
+        self._load()
+
+    def _load(self) -> None:
+        """Charge le cache depuis le disque."""
+        if not self._path.exists():
+            logger.info("Cache Ollama : nouveau cache (%s)", self._path)
+            return
+        try:
+            raw = json.loads(self._path.read_text(encoding="utf-8"))
+            if raw.get("model") != self._model:
+                logger.info(
+                    "Cache Ollama : modèle changé (%s → %s), cache invalidé",
+                    raw.get("model"), self._model,
+                )
+                return
+            self._data = raw.get("entries", {})
+            logger.info("Cache Ollama : %d entrées chargées", len(self._data))
+        except (json.JSONDecodeError, KeyError) as e:
+            logger.warning("Cache Ollama : fichier corrompu (%s), réinitialisé", e)
+            self._data = {}
+
+    @staticmethod
+    def _make_key(texte: str, diag_type: str) -> str:
+        """Construit une clé normalisée."""
+        return f"{diag_type}::{texte.strip().lower()}"
+
+    def get(self, texte: str, diag_type: str) -> dict | None:
+        """Récupère un résultat caché, ou None si absent."""
+        key = self._make_key(texte, diag_type)
+        with self._lock:
+            return self._data.get(key)
+
+    def put(self, texte: str, diag_type: str, result: dict) -> None:
+        """Stocke un résultat dans le cache."""
+        key = self._make_key(texte, diag_type)
+        with self._lock:
+            self._data[key] = result
+            self._dirty = True
+
+    def save(self) -> None:
+        """Persiste le cache sur disque si modifié."""
+        with self._lock:
+            if not self._dirty:
+                return
+            self._path.parent.mkdir(parents=True, exist_ok=True)
+            payload = {
+                "model": self._model,
+                "entries": self._data,
+            }
+            self._path.write_text(
+                json.dumps(payload, ensure_ascii=False, indent=2),
+                encoding="utf-8",
+            )
+            self._dirty = False
+            logger.info("Cache Ollama : %d entrées sauvegardées", len(self._data))
+
+    def __len__(self) -> int:
+        with self._lock:
+            return len(self._data)