t2a/tests/test_ollama_cache.py

"""Tests unitaires pour le cache Ollama persistant."""

import json
import threading

import pytest

from src.medical.ollama_cache import OllamaCache


class TestOllamaCache:
    def test_get_miss(self, tmp_path):
        cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
        assert cache.get("HTA", "das") is None

    def test_put_and_get(self, tmp_path):
        cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
        result = {"code": "I10", "confidence": "high", "justification": "HTA essentielle"}
        cache.put("HTA", "das", result)
        assert cache.get("HTA", "das") == result

    def test_key_normalization(self, tmp_path):
        cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
        result = {"code": "I10", "confidence": "high"}
        cache.put("  HTA  ", "das", result)
        assert cache.get("hta", "das") == result

    def test_different_types_different_keys(self, tmp_path):
        cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
        cache.put("Diabète", "dp", {"code": "E11.9"})
        cache.put("Diabète", "das", {"code": "E11.8"})
        assert cache.get("Diabète", "dp")["code"] == "E11.9"
        assert cache.get("Diabète", "das")["code"] == "E11.8"

    def test_save_and_reload(self, tmp_path):
        path = tmp_path / "cache.json"
        cache = OllamaCache(path, "gemma3:12b")
        cache.put("HTA", "das", {"code": "I10"})
        cache.save()

        assert path.exists()

        cache2 = OllamaCache(path, "gemma3:12b")
        assert cache2.get("HTA", "das") == {"code": "I10"}

    def test_save_no_write_if_clean(self, tmp_path):
        path = tmp_path / "cache.json"
        cache = OllamaCache(path, "gemma3:12b")
        cache.save()
        assert not path.exists()

    def test_model_per_entry_different_model_miss(self, tmp_path):
        """Un get avec un modèle différent de celui du put retourne None."""
        cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
        cache.put("HTA", "das", {"code": "I10"})
        # Même cache, mais demande avec un modèle différent
        assert cache.get("HTA", "das", model="llama3:8b") is None

    def test_model_per_entry_same_model_hit(self, tmp_path):
        """Un get avec le même modèle retourne le résultat."""
        cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
        cache.put("HTA", "das", {"code": "I10"})
        assert cache.get("HTA", "das", model="gemma3:12b") == {"code": "I10"}

    def test_model_per_entry_explicit_put_model(self, tmp_path):
        """put() avec model= explicite stocke ce modèle."""
        cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
        cache.put("HTA", "das", {"code": "I10"}, model="llama3:8b")
        # Le default model ne matche pas
        assert cache.get("HTA", "das") is None
        # Le modèle explicite matche
        assert cache.get("HTA", "das", model="llama3:8b") == {"code": "I10"}

    def test_save_reload_different_model_miss(self, tmp_path):
        """Après save/reload, les entrées gardent leur modèle."""
        path = tmp_path / "cache.json"
        cache = OllamaCache(path, "gemma3:12b")
        cache.put("HTA", "das", {"code": "I10"})
        cache.save()

        cache2 = OllamaCache(path, "llama3:8b")
        assert cache2.get("HTA", "das") is None

    def test_save_reload_same_model_hit(self, tmp_path):
        """Après save/reload avec le même modèle, le hit fonctionne."""
        path = tmp_path / "cache.json"
        cache = OllamaCache(path, "gemma3:12b")
        cache.put("HTA", "das", {"code": "I10"})
        cache.save()

        cache2 = OllamaCache(path, "gemma3:12b")
        assert cache2.get("HTA", "das") == {"code": "I10"}

    def test_corrupted_file(self, tmp_path):
        path = tmp_path / "cache.json"
        path.write_text("not valid json", encoding="utf-8")

        cache = OllamaCache(path, "gemma3:12b")
        assert len(cache) == 0
        assert cache.get("HTA", "das") is None

    def test_len(self, tmp_path):
        cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
        assert len(cache) == 0
        cache.put("HTA", "das", {"code": "I10"})
        assert len(cache) == 1
        cache.put("Diabète", "dp", {"code": "E11.9"})
        assert len(cache) == 2

    def test_thread_safety(self, tmp_path):
        """Écriture concurrente depuis plusieurs threads."""
        cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
        errors = []

        def writer(i):
            try:
                cache.put(f"diag_{i}", "das", {"code": f"X{i:02d}"})
            except Exception as e:
                errors.append(e)

        threads = [threading.Thread(target=writer, args=(i,)) for i in range(20)]
        for t in threads:
            t.start()
        for t in threads:
            t.join()

        assert not errors
        assert len(cache) == 20

    def test_json_format_new(self, tmp_path):
        """Le nouveau format JSON contient entries avec model par entrée."""
        path = tmp_path / "cache.json"
        cache = OllamaCache(path, "gemma3:12b")
        cache.put("HTA", "das", {"code": "I10"})
        cache.save()

        raw = json.loads(path.read_text(encoding="utf-8"))
        assert "model" not in raw  # plus de modèle global
        assert "entries" in raw
        assert len(raw["entries"]) == 1
        entry = next(iter(raw["entries"].values()))
        assert entry["model"] == "gemma3:12b"
        assert entry["result"] == {"code": "I10"}

    def test_backward_compat_old_format_migration(self, tmp_path):
        """L'ancien format (model global, entrées sans model) est migré correctement."""
        path = tmp_path / "cache.json"
        # Écrire un fichier avec l'ancien format
        old_data = {
            "model": "gemma3:12b",
            "entries": {
                "das::hta": {"code": "I10"},
                "dp::diabète type 2": {"code": "E11.9"},
            },
        }
        path.write_text(json.dumps(old_data), encoding="utf-8")

        # Charger avec le même modèle → doit migrer
        cache = OllamaCache(path, "gemma3:12b")
        assert len(cache) == 2
        assert cache.get("HTA", "das") == {"code": "I10"}
        assert cache.get("diabète type 2", "dp") == {"code": "E11.9"}

        # Sauvegarder et vérifier le nouveau format
        cache.save()
        raw = json.loads(path.read_text(encoding="utf-8"))
        assert "model" not in raw  # plus de modèle global
        entry = raw["entries"]["das::hta"]
        assert entry["model"] == "gemma3:12b"
        assert entry["result"] == {"code": "I10"}

    def test_backward_compat_old_format_wrong_model(self, tmp_path):
        """L'ancien format migré garde le modèle d'origine, pas celui du constructeur."""
        path = tmp_path / "cache.json"
        old_data = {
            "model": "gemma3:12b",
            "entries": {
                "das::hta": {"code": "I10"},
            },
        }
        path.write_text(json.dumps(old_data), encoding="utf-8")

        # Charger avec un modèle différent → entrée a le modèle d'origine
        cache = OllamaCache(path, "llama3:8b")
        assert cache.get("HTA", "das") is None  # llama3:8b != gemma3:12b
        assert cache.get("HTA", "das", model="gemma3:12b") == {"code": "I10"}