Aivanov_scan_ogc/tests/test_json_utils.py

"""Tests unitaires pour pipeline.json_utils."""
from __future__ import annotations

from pipeline.json_utils import (
    close_open_json,
    parse_json_output,
    patch_missing_commas,
    strip_fences,
    truncate_empty_loop,
)


class TestStripFences:
    def test_fence_json(self):
        raw = '```json\n{"a": 1}\n```'
        assert strip_fences(raw).strip() == '{"a": 1}'

    def test_fence_simple(self):
        raw = '```\n{"a": 1}\n```'
        assert strip_fences(raw).strip() == '{"a": 1}'

    def test_pas_de_fence(self):
        raw = '{"a": 1}'
        assert strip_fences(raw).strip() == '{"a": 1}'


class TestPatchMissingCommas:
    def test_objets_consecutifs(self):
        raw = '[\n{"a": 1}\n{"b": 2}\n]'
        patched = patch_missing_commas(raw)
        assert '},' in patched

    def test_deja_correct(self):
        raw = '{"a": 1}'
        assert patch_missing_commas(raw) == raw


class TestTruncateEmptyLoop:
    def test_moins_que_seuil(self):
        raw = '[{"code":"","position":""},{"code":"","position":""}]'
        # 2 objets vides = seuil par défaut, rien à tronquer
        out = truncate_empty_loop(raw, max_consecutive=2)
        assert out == raw

    def test_boucle_tronquée(self):
        objs = ['{"code":"","position":""}'] * 10
        raw = '[' + ','.join(objs)
        out = truncate_empty_loop(raw, max_consecutive=2)
        # Après troncature, ne doit contenir que 2 occurrences
        assert out.count('{"code":""') == 2

    def test_pas_de_boucle(self):
        raw = '[{"code":"K650","position":"1"}]'
        assert truncate_empty_loop(raw) == raw


class TestCloseOpenJson:
    def test_deja_ferme(self):
        raw = '{"a": [1, 2]}'
        assert close_open_json(raw) == raw

    def test_accolade_manquante(self):
        raw = '{"a": 1'
        closed = close_open_json(raw)
        assert closed == '{"a": 1}'

    def test_crochet_manquant(self):
        raw = '{"a": [1, 2'
        closed = close_open_json(raw)
        assert closed == '{"a": [1, 2]}'

    def test_accolades_et_crochets_imbriqués(self):
        raw = '{"a": {"b": [1, 2'
        closed = close_open_json(raw)
        assert closed == '{"a": {"b": [1, 2]}}'

    def test_virgule_trainante_supprimée(self):
        raw = '{"a": 1, '
        closed = close_open_json(raw)
        assert closed == '{"a": 1}'

    def test_accolade_dans_string_ignorée(self):
        raw = '{"a": "{ ceci est une { accolade dans une string"'
        closed = close_open_json(raw)
        # On ajoute juste l'accolade finale manquante
        assert closed == raw + '}'


class TestParseJsonOutput:
    def test_json_valide(self):
        assert parse_json_output('{"a": 1}') == {"a": 1}

    def test_vide(self):
        assert parse_json_output("") is None
        assert parse_json_output(None) is None

    def test_fences_markdown(self):
        assert parse_json_output('```json\n{"a": 1}\n```') == {"a": 1}

    def test_virgule_manquante_recuperee(self):
        raw = '[\n{"a": 1}\n{"b": 2}\n]'
        result = parse_json_output(raw)
        assert result == [{"a": 1}, {"b": 2}]

    def test_boucle_tronquée_fermée(self):
        objs = ['{"code":"","position":"","libelle":""}'] * 10
        raw = '{"das": [\n' + ',\n'.join(objs)  # non fermé
        result = parse_json_output(raw)
        assert isinstance(result, dict)
        assert "das" in result
        # Après troncature, 2 objets vides max, puis JSON refermé
        assert result.get("_truncated_loop") is True

    def test_fallback_retourne_raw(self):
        """Quand rien ne marche, on renvoie un dict avec _raw + _parse_error."""
        raw = "ceci n'est pas du JSON du tout !"
        result = parse_json_output(raw)
        assert result.get("_raw") == raw
        assert "_parse_error" in result