feat: fallback Anthropic Haiku quand Ollama est indisponible
Quand Ollama refuse la connexion ou timeout, call_ollama() bascule automatiquement sur l'API Anthropic (Haiku par défaut). Configurable via ANTHROPIC_API_KEY et ANTHROPIC_FALLBACK_MODEL dans .env. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,9 +1,10 @@
|
|||||||
"""Client Ollama partagé — appel LLM en mode JSON natif."""
|
"""Client LLM partagé — Ollama (local) avec fallback Anthropic Haiku."""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
@@ -11,9 +12,56 @@ from ..config import OLLAMA_URL, OLLAMA_MODEL, OLLAMA_TIMEOUT
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# --- Fallback Anthropic ---
|
||||||
|
_ANTHROPIC_MODEL = os.environ.get("ANTHROPIC_FALLBACK_MODEL", "claude-haiku-4-5-20251001")
|
||||||
|
_anthropic_client = None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_anthropic_client():
|
||||||
|
"""Lazy-init du client Anthropic (uniquement si clé API présente)."""
|
||||||
|
global _anthropic_client
|
||||||
|
if _anthropic_client is not None:
|
||||||
|
return _anthropic_client
|
||||||
|
api_key = os.environ.get("ANTHROPIC_API_KEY")
|
||||||
|
if not api_key:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
import anthropic
|
||||||
|
_anthropic_client = anthropic.Anthropic(api_key=api_key)
|
||||||
|
return _anthropic_client
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Anthropic SDK non disponible : %s", e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _call_anthropic(
|
||||||
|
prompt: str,
|
||||||
|
temperature: float = 0.1,
|
||||||
|
max_tokens: int = 2500,
|
||||||
|
) -> dict | None:
|
||||||
|
"""Appelle l'API Anthropic en fallback."""
|
||||||
|
client = _get_anthropic_client()
|
||||||
|
if client is None:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
response = client.messages.create(
|
||||||
|
model=_ANTHROPIC_MODEL,
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
temperature=temperature,
|
||||||
|
messages=[{"role": "user", "content": prompt}],
|
||||||
|
)
|
||||||
|
raw = response.content[0].text
|
||||||
|
result = parse_json_response(raw)
|
||||||
|
if result is not None:
|
||||||
|
logger.debug("Anthropic fallback OK (%s)", _ANTHROPIC_MODEL)
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Anthropic fallback erreur : %s", e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def parse_json_response(raw: str) -> dict | None:
|
def parse_json_response(raw: str) -> dict | None:
|
||||||
"""Parse une réponse JSON d'Ollama, en gérant les blocs markdown."""
|
"""Parse une réponse JSON, en gérant les blocs markdown."""
|
||||||
text = raw.strip()
|
text = raw.strip()
|
||||||
if text.startswith("```"):
|
if text.startswith("```"):
|
||||||
first_nl = text.find("\n")
|
first_nl = text.find("\n")
|
||||||
@@ -26,7 +74,7 @@ def parse_json_response(raw: str) -> dict | None:
|
|||||||
try:
|
try:
|
||||||
return json.loads(text)
|
return json.loads(text)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
logger.warning("Ollama : JSON invalide : %s", raw[:200])
|
logger.warning("LLM : JSON invalide : %s", raw[:200])
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@@ -35,7 +83,7 @@ def call_ollama(
|
|||||||
temperature: float = 0.1,
|
temperature: float = 0.1,
|
||||||
max_tokens: int = 2500,
|
max_tokens: int = 2500,
|
||||||
) -> dict | None:
|
) -> dict | None:
|
||||||
"""Appelle Ollama en mode JSON natif avec retry.
|
"""Appelle Ollama en mode JSON natif, avec fallback Anthropic si indisponible.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
prompt: Le prompt à envoyer.
|
prompt: Le prompt à envoyer.
|
||||||
@@ -69,11 +117,11 @@ def call_ollama(
|
|||||||
if attempt == 0:
|
if attempt == 0:
|
||||||
logger.info("Ollama : retry après échec de parsing")
|
logger.info("Ollama : retry après échec de parsing")
|
||||||
except requests.ConnectionError:
|
except requests.ConnectionError:
|
||||||
logger.warning("Ollama non disponible (connexion refusée)")
|
logger.info("Ollama indisponible → fallback Anthropic (%s)", _ANTHROPIC_MODEL)
|
||||||
return None
|
return _call_anthropic(prompt, temperature, max_tokens)
|
||||||
except requests.Timeout:
|
except requests.Timeout:
|
||||||
logger.warning("Ollama timeout après %ds", OLLAMA_TIMEOUT)
|
logger.warning("Ollama timeout après %ds → fallback Anthropic", OLLAMA_TIMEOUT)
|
||||||
return None
|
return _call_anthropic(prompt, temperature, max_tokens)
|
||||||
except (requests.RequestException, json.JSONDecodeError) as e:
|
except (requests.RequestException, json.JSONDecodeError) as e:
|
||||||
logger.warning("Ollama erreur : %s", e)
|
logger.warning("Ollama erreur : %s", e)
|
||||||
return None
|
return None
|
||||||
|
|||||||
Reference in New Issue
Block a user