t2a_v2/tests/test_ollama_cache.py

"""Tests unitaires pour le cache Ollama persistant."""

import json
import threading

import pytest

from src.medical.ollama_cache import OllamaCache


class TestOllamaCache:
    def test_get_miss(self, tmp_path):
        cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
        assert cache.get("HTA", "das") is None

    def test_put_and_get(self, tmp_path):
        cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
        result = {"code": "I10", "confidence": "high", "justification": "HTA essentielle"}
        cache.put("HTA", "das", result)
        assert cache.get("HTA", "das") == result

    def test_key_normalization(self, tmp_path):
        cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
        result = {"code": "I10", "confidence": "high"}
        cache.put("  HTA  ", "das", result)
        assert cache.get("hta", "das") == result

    def test_different_types_different_keys(self, tmp_path):
        cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
        cache.put("Diabète", "dp", {"code": "E11.9"})
        cache.put("Diabète", "das", {"code": "E11.8"})
        assert cache.get("Diabète", "dp")["code"] == "E11.9"
        assert cache.get("Diabète", "das")["code"] == "E11.8"

    def test_save_and_reload(self, tmp_path):
        path = tmp_path / "cache.json"
        cache = OllamaCache(path, "gemma3:12b")
        cache.put("HTA", "das", {"code": "I10"})
        cache.save()

        assert path.exists()

        cache2 = OllamaCache(path, "gemma3:12b")
        assert cache2.get("HTA", "das") == {"code": "I10"}

    def test_save_no_write_if_clean(self, tmp_path):
        path = tmp_path / "cache.json"
        cache = OllamaCache(path, "gemma3:12b")
        cache.save()
        assert not path.exists()

    def test_model_change_returns_none(self, tmp_path):
        """Entrées d'un autre modèle retournent None (pas d'invalidation globale)."""
        cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
        cache.put("HTA", "das", {"code": "I10"})
        # Même cache, modèle différent → miss
        assert cache.get("HTA", "das", model="llama3:8b") is None
        # Modèle original → hit
        assert cache.get("HTA", "das") == {"code": "I10"}

    def test_corrupted_file(self, tmp_path):
        path = tmp_path / "cache.json"
        path.write_text("not valid json", encoding="utf-8")

        cache = OllamaCache(path, "gemma3:12b")
        assert len(cache) == 0
        assert cache.get("HTA", "das") is None

    def test_len(self, tmp_path):
        cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
        assert len(cache) == 0
        cache.put("HTA", "das", {"code": "I10"})
        assert len(cache) == 1
        cache.put("Diabète", "dp", {"code": "E11.9"})
        assert len(cache) == 2

    def test_thread_safety(self, tmp_path):
        """Écriture concurrente depuis plusieurs threads."""
        cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
        errors = []

        def writer(i):
            try:
                cache.put(f"diag_{i}", "das", {"code": f"X{i:02d}"})
            except Exception as e:
                errors.append(e)

        threads = [threading.Thread(target=writer, args=(i,)) for i in range(20)]
        for t in threads:
            t.start()
        for t in threads:
            t.join()

        assert not errors
        assert len(cache) == 20

    def test_json_format_new(self, tmp_path):
        """Le nouveau format stocke le modèle PAR ENTRÉE (pas global)."""
        path = tmp_path / "cache.json"
        cache = OllamaCache(path, "gemma3:12b")
        cache.put("HTA", "das", {"code": "I10"})
        cache.save()

        raw = json.loads(path.read_text(encoding="utf-8"))
        assert "entries" in raw
        assert "model" not in raw  # plus de model global
        # Chaque entrée contient model + result
        entry = list(raw["entries"].values())[0]
        assert entry["model"] == "gemma3:12b"
        assert entry["result"] == {"code": "I10"}

    def test_migration_old_format(self, tmp_path):
        """Ancien format (model global) migré automatiquement."""
        path = tmp_path / "cache.json"
        # Écrire un cache ancien format
        old_data = {
            "model": "gemma3:12b",
            "entries": {
                "das::hta": {"code": "I10", "confidence": "high"},
            },
        }
        path.write_text(json.dumps(old_data), encoding="utf-8")

        cache = OllamaCache(path, "gemma3:12b")
        # L'entrée doit être accessible
        assert cache.get("HTA", "das") == {"code": "I10", "confidence": "high"}
        assert len(cache) == 1

        # Sauvegarder et vérifier le nouveau format
        cache.save()
        raw = json.loads(path.read_text(encoding="utf-8"))
        assert "model" not in raw
        entry = raw["entries"]["das::hta"]
        assert entry["model"] == "gemma3:12b"
        assert entry["result"]["code"] == "I10"

    def test_migration_old_format_different_model(self, tmp_path):
        """Migration ancien format : les entrées sont bien taggées avec l'ancien modèle."""
        path = tmp_path / "cache.json"
        old_data = {
            "model": "old-model",
            "entries": {
                "das::hta": {"code": "I10"},
            },
        }
        path.write_text(json.dumps(old_data), encoding="utf-8")

        # Charger avec un modèle différent
        cache = OllamaCache(path, "new-model")
        # L'entrée est taggée "old-model" → miss avec "new-model"
        assert cache.get("HTA", "das") is None
        # Mais accessible avec l'ancien modèle
        assert cache.get("HTA", "das", model="old-model") == {"code": "I10"}

    def test_put_with_explicit_model(self, tmp_path):
        """put() avec model= explicite stocke ce modèle."""
        cache = OllamaCache(tmp_path / "cache.json", "default-model")
        cache.put("HTA", "das", {"code": "I10"}, model="explicit-model")
        # get sans model → utilise default → miss
        assert cache.get("HTA", "das") is None
        # get avec le bon modèle → hit
        assert cache.get("HTA", "das", model="explicit-model") == {"code": "I10"}

    def test_get_returns_none_if_model_mismatch(self, tmp_path):
        """get() retourne None si le modèle stocké ≠ modèle demandé."""
        cache = OllamaCache(tmp_path / "cache.json", "gemma3:12b")
        cache.put("HTA", "das", {"code": "I10"})
        assert cache.get("HTA", "das", model="llama3:8b") is None