"""Client LLM partagé — Ollama (local) avec fallback Anthropic Haiku.""" from __future__ import annotations import json import logging import os import requests from ..config import OLLAMA_URL, OLLAMA_MODEL, OLLAMA_TIMEOUT logger = logging.getLogger(__name__) # --- Fallback Anthropic --- _ANTHROPIC_MODEL = os.environ.get("ANTHROPIC_FALLBACK_MODEL", "claude-haiku-4-5-20251001") _anthropic_client = None def _get_anthropic_client(): """Lazy-init du client Anthropic (uniquement si clé API présente).""" global _anthropic_client if _anthropic_client is not None: return _anthropic_client api_key = os.environ.get("ANTHROPIC_API_KEY") if not api_key: return None try: import anthropic _anthropic_client = anthropic.Anthropic(api_key=api_key) return _anthropic_client except Exception as e: logger.warning("Anthropic SDK non disponible : %s", e) return None def _call_anthropic( prompt: str, temperature: float = 0.1, max_tokens: int = 2500, ) -> dict | None: """Appelle l'API Anthropic en fallback.""" client = _get_anthropic_client() if client is None: return None try: response = client.messages.create( model=_ANTHROPIC_MODEL, max_tokens=max_tokens, temperature=temperature, messages=[{"role": "user", "content": prompt}], ) raw = response.content[0].text result = parse_json_response(raw) if result is not None: logger.debug("Anthropic fallback OK (%s)", _ANTHROPIC_MODEL) return result except Exception as e: logger.warning("Anthropic fallback erreur : %s", e) return None def parse_json_response(raw: str) -> dict | None: """Parse une réponse JSON, en gérant les blocs markdown.""" text = raw.strip() if text.startswith("```"): first_nl = text.find("\n") if first_nl != -1: text = text[first_nl + 1:] if text.rstrip().endswith("```"): text = text.rstrip()[:-3] text = text.strip() try: return json.loads(text) except json.JSONDecodeError: logger.warning("LLM : JSON invalide : %s", raw[:200]) return None def call_ollama( prompt: str, temperature: float = 0.1, max_tokens: int = 2500, ) -> dict | None: """Appelle Ollama en mode JSON natif, avec fallback Anthropic si indisponible. Args: prompt: Le prompt à envoyer. temperature: Température de génération (défaut: 0.1). max_tokens: Nombre max de tokens (défaut: 2500). Returns: Le dict JSON parsé, ou None en cas d'erreur. """ for attempt in range(2): try: response = requests.post( f"{OLLAMA_URL}/api/generate", json={ "model": OLLAMA_MODEL, "prompt": prompt, "stream": False, "format": "json", "options": { "temperature": temperature, "num_predict": max_tokens, }, }, timeout=OLLAMA_TIMEOUT, ) response.raise_for_status() raw = response.json().get("response", "") result = parse_json_response(raw) if result is not None: return result if attempt == 0: logger.info("Ollama : retry après échec de parsing") except requests.ConnectionError: logger.info("Ollama indisponible → fallback Anthropic (%s)", _ANTHROPIC_MODEL) return _call_anthropic(prompt, temperature, max_tokens) except requests.Timeout: logger.warning("Ollama timeout après %ds → fallback Anthropic", OLLAMA_TIMEOUT) return _call_anthropic(prompt, temperature, max_tokens) except (requests.RequestException, json.JSONDecodeError) as e: logger.warning("Ollama erreur : %s", e) return None return None