"""Tests unitaires pour pipeline.json_utils.""" from __future__ import annotations from pipeline.json_utils import ( close_open_json, parse_json_output, patch_missing_commas, strip_fences, truncate_empty_loop, ) class TestStripFences: def test_fence_json(self): raw = '```json\n{"a": 1}\n```' assert strip_fences(raw).strip() == '{"a": 1}' def test_fence_simple(self): raw = '```\n{"a": 1}\n```' assert strip_fences(raw).strip() == '{"a": 1}' def test_pas_de_fence(self): raw = '{"a": 1}' assert strip_fences(raw).strip() == '{"a": 1}' class TestPatchMissingCommas: def test_objets_consecutifs(self): raw = '[\n{"a": 1}\n{"b": 2}\n]' patched = patch_missing_commas(raw) assert '},' in patched def test_deja_correct(self): raw = '{"a": 1}' assert patch_missing_commas(raw) == raw class TestTruncateEmptyLoop: def test_moins_que_seuil(self): raw = '[{"code":"","position":""},{"code":"","position":""}]' # 2 objets vides = seuil par défaut, rien à tronquer out = truncate_empty_loop(raw, max_consecutive=2) assert out == raw def test_boucle_tronquée(self): objs = ['{"code":"","position":""}'] * 10 raw = '[' + ','.join(objs) out = truncate_empty_loop(raw, max_consecutive=2) # Après troncature, ne doit contenir que 2 occurrences assert out.count('{"code":""') == 2 def test_pas_de_boucle(self): raw = '[{"code":"K650","position":"1"}]' assert truncate_empty_loop(raw) == raw class TestCloseOpenJson: def test_deja_ferme(self): raw = '{"a": [1, 2]}' assert close_open_json(raw) == raw def test_accolade_manquante(self): raw = '{"a": 1' closed = close_open_json(raw) assert closed == '{"a": 1}' def test_crochet_manquant(self): raw = '{"a": [1, 2' closed = close_open_json(raw) assert closed == '{"a": [1, 2]}' def test_accolades_et_crochets_imbriqués(self): raw = '{"a": {"b": [1, 2' closed = close_open_json(raw) assert closed == '{"a": {"b": [1, 2]}}' def test_virgule_trainante_supprimée(self): raw = '{"a": 1, ' closed = close_open_json(raw) assert closed == '{"a": 1}' def test_accolade_dans_string_ignorée(self): raw = '{"a": "{ ceci est une { accolade dans une string"' closed = close_open_json(raw) # On ajoute juste l'accolade finale manquante assert closed == raw + '}' class TestParseJsonOutput: def test_json_valide(self): assert parse_json_output('{"a": 1}') == {"a": 1} def test_vide(self): assert parse_json_output("") is None assert parse_json_output(None) is None def test_fences_markdown(self): assert parse_json_output('```json\n{"a": 1}\n```') == {"a": 1} def test_virgule_manquante_recuperee(self): raw = '[\n{"a": 1}\n{"b": 2}\n]' result = parse_json_output(raw) assert result == [{"a": 1}, {"b": 2}] def test_boucle_tronquée_fermée(self): objs = ['{"code":"","position":"","libelle":""}'] * 10 raw = '{"das": [\n' + ',\n'.join(objs) # non fermé result = parse_json_output(raw) assert isinstance(result, dict) assert "das" in result # Après troncature, 2 objets vides max, puis JSON refermé assert result.get("_truncated_loop") is True def test_fallback_retourne_raw(self): """Quand rien ne marche, on renvoie un dict avec _raw + _parse_error.""" raw = "ceci n'est pas du JSON du tout !" result = parse_json_output(raw) assert result.get("_raw") == raw assert "_parse_error" in result