Suite de tests unitaires pour tous les modules pipeline qui ne dépendent pas du VLM — utiles pour garantir la non-régression après refactor et servir de spec vivante de chaque fonction. Fichiers : - tests/test_json_utils.py (20 tests) : parse_json_output + toutes les stratégies de récupération (fences, virgules manquantes, boucles vides, fermeture JSON, fallback _raw/_parse_error) - tests/test_deskew.py (11 tests) : détection Hough + correction, image synthétique + fixtures cache réel - tests/test_checkboxes.py (17 tests) : parse_ghs_injustifie, dark_ratio, inner_frac, et ground truth visuel sur 17 dossiers (mapping hash→OGC résolu au runtime pour éviter les constantes fragiles) - tests/test_validation.py (18 tests) : _check_cim10/ccam/ghm/ghs, cross-checks GHM↔GHS, annotate sur JSON vide et complet, preservation de l'input (copie défensive) - tests/test_schema.py (8 tests) : clean_dossier retire les champs debug, préserve les champs métier, compacte la validation, ne modifie pas l'input - tests/test_zones_config.py (8 tests) : load/save round-trip, merge avec defaults, résilience JSON corrompu, get_zone Total : 107 tests, 5.1 s d'exécution, tous passent. Aucune dépendance GPU, s'exécutent en CI. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
120 lines
3.7 KiB
Python
120 lines
3.7 KiB
Python
"""Tests unitaires pour pipeline.json_utils."""
|
|
from __future__ import annotations
|
|
|
|
from pipeline.json_utils import (
|
|
close_open_json,
|
|
parse_json_output,
|
|
patch_missing_commas,
|
|
strip_fences,
|
|
truncate_empty_loop,
|
|
)
|
|
|
|
|
|
class TestStripFences:
|
|
def test_fence_json(self):
|
|
raw = '```json\n{"a": 1}\n```'
|
|
assert strip_fences(raw).strip() == '{"a": 1}'
|
|
|
|
def test_fence_simple(self):
|
|
raw = '```\n{"a": 1}\n```'
|
|
assert strip_fences(raw).strip() == '{"a": 1}'
|
|
|
|
def test_pas_de_fence(self):
|
|
raw = '{"a": 1}'
|
|
assert strip_fences(raw).strip() == '{"a": 1}'
|
|
|
|
|
|
class TestPatchMissingCommas:
|
|
def test_objets_consecutifs(self):
|
|
raw = '[\n{"a": 1}\n{"b": 2}\n]'
|
|
patched = patch_missing_commas(raw)
|
|
assert '},' in patched
|
|
|
|
def test_deja_correct(self):
|
|
raw = '{"a": 1}'
|
|
assert patch_missing_commas(raw) == raw
|
|
|
|
|
|
class TestTruncateEmptyLoop:
|
|
def test_moins_que_seuil(self):
|
|
raw = '[{"code":"","position":""},{"code":"","position":""}]'
|
|
# 2 objets vides = seuil par défaut, rien à tronquer
|
|
out = truncate_empty_loop(raw, max_consecutive=2)
|
|
assert out == raw
|
|
|
|
def test_boucle_tronquée(self):
|
|
objs = ['{"code":"","position":""}'] * 10
|
|
raw = '[' + ','.join(objs)
|
|
out = truncate_empty_loop(raw, max_consecutive=2)
|
|
# Après troncature, ne doit contenir que 2 occurrences
|
|
assert out.count('{"code":""') == 2
|
|
|
|
def test_pas_de_boucle(self):
|
|
raw = '[{"code":"K650","position":"1"}]'
|
|
assert truncate_empty_loop(raw) == raw
|
|
|
|
|
|
class TestCloseOpenJson:
|
|
def test_deja_ferme(self):
|
|
raw = '{"a": [1, 2]}'
|
|
assert close_open_json(raw) == raw
|
|
|
|
def test_accolade_manquante(self):
|
|
raw = '{"a": 1'
|
|
closed = close_open_json(raw)
|
|
assert closed == '{"a": 1}'
|
|
|
|
def test_crochet_manquant(self):
|
|
raw = '{"a": [1, 2'
|
|
closed = close_open_json(raw)
|
|
assert closed == '{"a": [1, 2]}'
|
|
|
|
def test_accolades_et_crochets_imbriqués(self):
|
|
raw = '{"a": {"b": [1, 2'
|
|
closed = close_open_json(raw)
|
|
assert closed == '{"a": {"b": [1, 2]}}'
|
|
|
|
def test_virgule_trainante_supprimée(self):
|
|
raw = '{"a": 1, '
|
|
closed = close_open_json(raw)
|
|
assert closed == '{"a": 1}'
|
|
|
|
def test_accolade_dans_string_ignorée(self):
|
|
raw = '{"a": "{ ceci est une { accolade dans une string"'
|
|
closed = close_open_json(raw)
|
|
# On ajoute juste l'accolade finale manquante
|
|
assert closed == raw + '}'
|
|
|
|
|
|
class TestParseJsonOutput:
|
|
def test_json_valide(self):
|
|
assert parse_json_output('{"a": 1}') == {"a": 1}
|
|
|
|
def test_vide(self):
|
|
assert parse_json_output("") is None
|
|
assert parse_json_output(None) is None
|
|
|
|
def test_fences_markdown(self):
|
|
assert parse_json_output('```json\n{"a": 1}\n```') == {"a": 1}
|
|
|
|
def test_virgule_manquante_recuperee(self):
|
|
raw = '[\n{"a": 1}\n{"b": 2}\n]'
|
|
result = parse_json_output(raw)
|
|
assert result == [{"a": 1}, {"b": 2}]
|
|
|
|
def test_boucle_tronquée_fermée(self):
|
|
objs = ['{"code":"","position":"","libelle":""}'] * 10
|
|
raw = '{"das": [\n' + ',\n'.join(objs) # non fermé
|
|
result = parse_json_output(raw)
|
|
assert isinstance(result, dict)
|
|
assert "das" in result
|
|
# Après troncature, 2 objets vides max, puis JSON refermé
|
|
assert result.get("_truncated_loop") is True
|
|
|
|
def test_fallback_retourne_raw(self):
|
|
"""Quand rien ne marche, on renvoie un dict avec _raw + _parse_error."""
|
|
raw = "ceci n'est pas du JSON du tout !"
|
|
result = parse_json_output(raw)
|
|
assert result.get("_raw") == raw
|
|
assert "_parse_error" in result
|