Suite de tests unitaires pour tous les modules pipeline qui ne dépendent pas du VLM — utiles pour garantir la non-régression après refactor et servir de spec vivante de chaque fonction. Fichiers : - tests/test_json_utils.py (20 tests) : parse_json_output + toutes les stratégies de récupération (fences, virgules manquantes, boucles vides, fermeture JSON, fallback _raw/_parse_error) - tests/test_deskew.py (11 tests) : détection Hough + correction, image synthétique + fixtures cache réel - tests/test_checkboxes.py (17 tests) : parse_ghs_injustifie, dark_ratio, inner_frac, et ground truth visuel sur 17 dossiers (mapping hash→OGC résolu au runtime pour éviter les constantes fragiles) - tests/test_validation.py (18 tests) : _check_cim10/ccam/ghm/ghs, cross-checks GHM↔GHS, annotate sur JSON vide et complet, preservation de l'input (copie défensive) - tests/test_schema.py (8 tests) : clean_dossier retire les champs debug, préserve les champs métier, compacte la validation, ne modifie pas l'input - tests/test_zones_config.py (8 tests) : load/save round-trip, merge avec defaults, résilience JSON corrompu, get_zone Total : 107 tests, 5.1 s d'exécution, tous passent. Aucune dépendance GPU, s'exécutent en CI. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
146 lines
5.0 KiB
Python
146 lines
5.0 KiB
Python
"""Tests unitaires pour pipeline.recueil (logique métier de la page recueil).
|
|
|
|
Les fonctions testées ici sont toutes pures (pas d'appel au VLM) :
|
|
- filter_cim10_codes
|
|
- classify_codes_dp_dr_das
|
|
- merge_codage_reco
|
|
- resolve_recueil_zones (juste lecture de config)
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from pipeline.recueil import (
|
|
classify_codes_dp_dr_das,
|
|
filter_cim10_codes,
|
|
merge_codage_reco,
|
|
resolve_recueil_zones,
|
|
)
|
|
|
|
|
|
class TestFilterCim10Codes:
|
|
def test_codes_valides_conservés(self):
|
|
codes = [
|
|
{"code": "K650", "position": "1"},
|
|
{"code": "T814", "position": "2"},
|
|
{"code": "Z954 *", "position": "3"},
|
|
]
|
|
out = filter_cim10_codes(codes)
|
|
assert len(out) == 3
|
|
assert out[0]["code"] == "K650"
|
|
|
|
def test_ccam_rejeté(self):
|
|
"""Un code CCAM (4 lettres + 3 chiffres) ne doit pas passer le filtre CIM-10."""
|
|
codes = [
|
|
{"code": "K650", "position": ""},
|
|
{"code": "EBFA012", "position": "1"}, # CCAM
|
|
]
|
|
out = filter_cim10_codes(codes)
|
|
assert len(out) == 1
|
|
assert out[0]["code"] == "K650"
|
|
|
|
def test_code_vide_rejeté(self):
|
|
codes = [{"code": "", "position": ""}, {"code": "K650", "position": ""}]
|
|
out = filter_cim10_codes(codes)
|
|
assert len(out) == 1
|
|
|
|
def test_non_dict_ignoré(self):
|
|
codes = ["K650", None, {"code": "T814", "position": ""}]
|
|
out = filter_cim10_codes(codes)
|
|
assert len(out) == 1
|
|
|
|
def test_liste_vide(self):
|
|
assert filter_cim10_codes([]) == []
|
|
assert filter_cim10_codes(None) == []
|
|
|
|
|
|
class TestClassifyCodesDpDrDas:
|
|
def test_cas_nominal(self):
|
|
"""1er sans position = DP, 2e sans position = DR, puis DAS avec positions."""
|
|
codes = [
|
|
{"code": "K650", "position": ""},
|
|
{"code": "T814", "position": ""},
|
|
{"code": "Z954", "position": "2"},
|
|
{"code": "R33", "position": "3"},
|
|
]
|
|
dp, dr, das = classify_codes_dp_dr_das(codes)
|
|
assert dp == "K650"
|
|
assert dr == "T814"
|
|
assert [d["code"] for d in das] == ["Z954", "R33"]
|
|
|
|
def test_dr_vide_non_duplique_dp(self):
|
|
"""Quand Qwen duplique le DP (parce que DR est visuellement vide),
|
|
on doit considérer que DR est vide, pas DR = DP."""
|
|
codes = [
|
|
{"code": "K650", "position": ""},
|
|
{"code": "K650", "position": ""}, # doublon
|
|
{"code": "T814", "position": "2"},
|
|
]
|
|
dp, dr, das = classify_codes_dp_dr_das(codes)
|
|
assert dp == "K650"
|
|
assert dr == "" # dédupliqué
|
|
assert len(das) == 1
|
|
|
|
def test_seulement_dp(self):
|
|
codes = [{"code": "K650", "position": ""}]
|
|
dp, dr, das = classify_codes_dp_dr_das(codes)
|
|
assert dp == "K650"
|
|
assert dr == ""
|
|
assert das == []
|
|
|
|
def test_tous_avec_positions(self):
|
|
"""Si tous les codes ont une position, DP et DR sont vides, tout en DAS."""
|
|
codes = [
|
|
{"code": "K650", "position": "1"},
|
|
{"code": "T814", "position": "2"},
|
|
]
|
|
dp, dr, das = classify_codes_dp_dr_das(codes)
|
|
assert dp == ""
|
|
assert dr == ""
|
|
assert len(das) == 2
|
|
|
|
def test_vide(self):
|
|
dp, dr, das = classify_codes_dp_dr_das([])
|
|
assert (dp, dr, das) == ("", "", [])
|
|
|
|
|
|
class TestMergeCodageReco:
|
|
def test_crop_prime_sur_passage_principal(self):
|
|
parsed = {"codage_reco": {"dp": "", "dr": "", "das": []}}
|
|
reco = {"dp": "K650", "dr": "T814",
|
|
"das": [{"code": "Z954", "position": "2"}]}
|
|
merge_codage_reco(parsed, reco)
|
|
assert parsed["codage_reco"]["dp"] == "K650"
|
|
assert parsed["codage_reco"]["dr"] == "T814"
|
|
assert len(parsed["codage_reco"]["das"]) == 1
|
|
|
|
def test_crop_vide_garde_passage_principal(self):
|
|
"""Si le crop a un champ vide mais le passage principal l'avait rempli,
|
|
on ne dégrade pas : on garde le passage principal."""
|
|
parsed = {"codage_reco": {"dp": "K650", "dr": "", "das": []}}
|
|
reco = {"dp": "", "dr": "", "das": []}
|
|
merge_codage_reco(parsed, reco)
|
|
assert parsed["codage_reco"]["dp"] == "K650" # préservé
|
|
|
|
def test_codage_reco_initialement_absent(self):
|
|
parsed = {}
|
|
reco = {"dp": "K650", "dr": "", "das": []}
|
|
merge_codage_reco(parsed, reco)
|
|
assert parsed["codage_reco"]["dp"] == "K650"
|
|
|
|
def test_trace_crop_ajoutee(self):
|
|
parsed = {"codage_reco": {"dp": "", "dr": "", "das": []}}
|
|
reco = {"dp": "K650", "_elapsed_s": 1.5}
|
|
merge_codage_reco(parsed, reco)
|
|
assert parsed["_crop_recodage"]["result"]["_elapsed_s"] == 1.5
|
|
|
|
|
|
class TestResolveRecueilZones:
|
|
def test_fallback_constantes(self):
|
|
"""Sans config utilisateur, on a les zones par défaut."""
|
|
reco, cb = resolve_recueil_zones()
|
|
# 4 coords flottantes
|
|
assert len(reco) == 4
|
|
assert all(isinstance(v, float) for v in reco)
|
|
# Checkbox zones
|
|
assert len(cb.accord) == 4
|
|
assert len(cb.desaccord) == 4
|