feat: fallback Anthropic Haiku quand Ollama est indisponible

Quand Ollama refuse la connexion ou timeout, call_ollama() bascule automatiquement sur l'API Anthropic (Haiku par défaut). Configurable via ANTHROPIC_API_KEY et ANTHROPIC_FALLBACK_MODEL dans .env. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 09:42:32 +01:00
parent 4333b45cda
commit 4c6c0d25bd
1 changed files with 56 additions and 8 deletions
--- a/src/medical/ollama_client.py
+++ b/src/medical/ollama_client.py
@@ -1,9 +1,10 @@
-"""Client Ollama partagé — appel LLM en mode JSON natif."""
+"""Client LLM partagé — Ollama (local) avec fallback Anthropic Haiku."""

 from __future__ import annotations

 import json
 import logging
+import os

 import requests

@@ -11,9 +12,56 @@ from ..config import OLLAMA_URL, OLLAMA_MODEL, OLLAMA_TIMEOUT

 logger = logging.getLogger(__name__)

+# --- Fallback Anthropic ---
+_ANTHROPIC_MODEL = os.environ.get("ANTHROPIC_FALLBACK_MODEL", "claude-haiku-4-5-20251001")
+_anthropic_client = None
+
+
+def _get_anthropic_client():
+    """Lazy-init du client Anthropic (uniquement si clé API présente)."""
+    global _anthropic_client
+    if _anthropic_client is not None:
+        return _anthropic_client
+    api_key = os.environ.get("ANTHROPIC_API_KEY")
+    if not api_key:
+        return None
+    try:
+        import anthropic
+        _anthropic_client = anthropic.Anthropic(api_key=api_key)
+        return _anthropic_client
+    except Exception as e:
+        logger.warning("Anthropic SDK non disponible : %s", e)
+        return None
+
+
+def _call_anthropic(
+    prompt: str,
+    temperature: float = 0.1,
+    max_tokens: int = 2500,
+) -> dict | None:
+    """Appelle l'API Anthropic en fallback."""
+    client = _get_anthropic_client()
+    if client is None:
+        return None
+    try:
+        response = client.messages.create(
+            model=_ANTHROPIC_MODEL,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            messages=[{"role": "user", "content": prompt}],
+        )
+        raw = response.content[0].text
+        result = parse_json_response(raw)
+        if result is not None:
+            logger.debug("Anthropic fallback OK (%s)", _ANTHROPIC_MODEL)
+        return result
+    except Exception as e:
+        logger.warning("Anthropic fallback erreur : %s", e)
+        return None
+

 def parse_json_response(raw: str) -> dict | None:
-    """Parse une réponse JSON d'Ollama, en gérant les blocs markdown."""
+    """Parse une réponse JSON, en gérant les blocs markdown."""
    text = raw.strip()
    if text.startswith("```"):
        first_nl = text.find("\n")
@@ -26,7 +74,7 @@ def parse_json_response(raw: str) -> dict | None:
    try:
        return json.loads(text)
    except json.JSONDecodeError:
-        logger.warning("Ollama : JSON invalide : %s", raw[:200])
+        logger.warning("LLM : JSON invalide : %s", raw[:200])
        return None


@@ -35,7 +83,7 @@ def call_ollama(
    temperature: float = 0.1,
    max_tokens: int = 2500,
 ) -> dict | None:
-    """Appelle Ollama en mode JSON natif avec retry.
+    """Appelle Ollama en mode JSON natif, avec fallback Anthropic si indisponible.

    Args:
        prompt: Le prompt à envoyer.
@@ -69,11 +117,11 @@ def call_ollama(
            if attempt == 0:
                logger.info("Ollama : retry après échec de parsing")
        except requests.ConnectionError:
-            logger.warning("Ollama non disponible (connexion refusée)")
-            return None
+            logger.info("Ollama indisponible → fallback Anthropic (%s)", _ANTHROPIC_MODEL)
+            return _call_anthropic(prompt, temperature, max_tokens)
        except requests.Timeout:
-            logger.warning("Ollama timeout après %ds", OLLAMA_TIMEOUT)
-            return None
+            logger.warning("Ollama timeout après %ds → fallback Anthropic", OLLAMA_TIMEOUT)
+            return _call_anthropic(prompt, temperature, max_tokens)
        except (requests.RequestException, json.JSONDecodeError) as e:
            logger.warning("Ollama erreur : %s", e)
            return None