Files
Aivanov_scan_ogc/tests/test_recueil.py
Dom 3a87751444 test: couvrir les modules purs du pipeline (96 nouveaux tests)
Suite de tests unitaires pour tous les modules pipeline qui ne dépendent
pas du VLM — utiles pour garantir la non-régression après refactor et
servir de spec vivante de chaque fonction.

Fichiers :
- tests/test_json_utils.py   (20 tests) : parse_json_output + toutes les
  stratégies de récupération (fences, virgules manquantes, boucles vides,
  fermeture JSON, fallback _raw/_parse_error)
- tests/test_deskew.py       (11 tests) : détection Hough + correction,
  image synthétique + fixtures cache réel
- tests/test_checkboxes.py   (17 tests) : parse_ghs_injustifie,
  dark_ratio, inner_frac, et ground truth visuel sur 17 dossiers
  (mapping hash→OGC résolu au runtime pour éviter les constantes fragiles)
- tests/test_validation.py   (18 tests) : _check_cim10/ccam/ghm/ghs,
  cross-checks GHM↔GHS, annotate sur JSON vide et complet,
  preservation de l'input (copie défensive)
- tests/test_schema.py       (8 tests)  : clean_dossier retire les champs
  debug, préserve les champs métier, compacte la validation, ne modifie
  pas l'input
- tests/test_zones_config.py (8 tests)  : load/save round-trip, merge
  avec defaults, résilience JSON corrompu, get_zone

Total : 107 tests, 5.1 s d'exécution, tous passent. Aucune dépendance
GPU, s'exécutent en CI.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-24 23:29:23 +02:00

146 lines
5.0 KiB
Python

"""Tests unitaires pour pipeline.recueil (logique métier de la page recueil).
Les fonctions testées ici sont toutes pures (pas d'appel au VLM) :
- filter_cim10_codes
- classify_codes_dp_dr_das
- merge_codage_reco
- resolve_recueil_zones (juste lecture de config)
"""
from __future__ import annotations
from pipeline.recueil import (
classify_codes_dp_dr_das,
filter_cim10_codes,
merge_codage_reco,
resolve_recueil_zones,
)
class TestFilterCim10Codes:
def test_codes_valides_conservés(self):
codes = [
{"code": "K650", "position": "1"},
{"code": "T814", "position": "2"},
{"code": "Z954 *", "position": "3"},
]
out = filter_cim10_codes(codes)
assert len(out) == 3
assert out[0]["code"] == "K650"
def test_ccam_rejeté(self):
"""Un code CCAM (4 lettres + 3 chiffres) ne doit pas passer le filtre CIM-10."""
codes = [
{"code": "K650", "position": ""},
{"code": "EBFA012", "position": "1"}, # CCAM
]
out = filter_cim10_codes(codes)
assert len(out) == 1
assert out[0]["code"] == "K650"
def test_code_vide_rejeté(self):
codes = [{"code": "", "position": ""}, {"code": "K650", "position": ""}]
out = filter_cim10_codes(codes)
assert len(out) == 1
def test_non_dict_ignoré(self):
codes = ["K650", None, {"code": "T814", "position": ""}]
out = filter_cim10_codes(codes)
assert len(out) == 1
def test_liste_vide(self):
assert filter_cim10_codes([]) == []
assert filter_cim10_codes(None) == []
class TestClassifyCodesDpDrDas:
def test_cas_nominal(self):
"""1er sans position = DP, 2e sans position = DR, puis DAS avec positions."""
codes = [
{"code": "K650", "position": ""},
{"code": "T814", "position": ""},
{"code": "Z954", "position": "2"},
{"code": "R33", "position": "3"},
]
dp, dr, das = classify_codes_dp_dr_das(codes)
assert dp == "K650"
assert dr == "T814"
assert [d["code"] for d in das] == ["Z954", "R33"]
def test_dr_vide_non_duplique_dp(self):
"""Quand Qwen duplique le DP (parce que DR est visuellement vide),
on doit considérer que DR est vide, pas DR = DP."""
codes = [
{"code": "K650", "position": ""},
{"code": "K650", "position": ""}, # doublon
{"code": "T814", "position": "2"},
]
dp, dr, das = classify_codes_dp_dr_das(codes)
assert dp == "K650"
assert dr == "" # dédupliqué
assert len(das) == 1
def test_seulement_dp(self):
codes = [{"code": "K650", "position": ""}]
dp, dr, das = classify_codes_dp_dr_das(codes)
assert dp == "K650"
assert dr == ""
assert das == []
def test_tous_avec_positions(self):
"""Si tous les codes ont une position, DP et DR sont vides, tout en DAS."""
codes = [
{"code": "K650", "position": "1"},
{"code": "T814", "position": "2"},
]
dp, dr, das = classify_codes_dp_dr_das(codes)
assert dp == ""
assert dr == ""
assert len(das) == 2
def test_vide(self):
dp, dr, das = classify_codes_dp_dr_das([])
assert (dp, dr, das) == ("", "", [])
class TestMergeCodageReco:
def test_crop_prime_sur_passage_principal(self):
parsed = {"codage_reco": {"dp": "", "dr": "", "das": []}}
reco = {"dp": "K650", "dr": "T814",
"das": [{"code": "Z954", "position": "2"}]}
merge_codage_reco(parsed, reco)
assert parsed["codage_reco"]["dp"] == "K650"
assert parsed["codage_reco"]["dr"] == "T814"
assert len(parsed["codage_reco"]["das"]) == 1
def test_crop_vide_garde_passage_principal(self):
"""Si le crop a un champ vide mais le passage principal l'avait rempli,
on ne dégrade pas : on garde le passage principal."""
parsed = {"codage_reco": {"dp": "K650", "dr": "", "das": []}}
reco = {"dp": "", "dr": "", "das": []}
merge_codage_reco(parsed, reco)
assert parsed["codage_reco"]["dp"] == "K650" # préservé
def test_codage_reco_initialement_absent(self):
parsed = {}
reco = {"dp": "K650", "dr": "", "das": []}
merge_codage_reco(parsed, reco)
assert parsed["codage_reco"]["dp"] == "K650"
def test_trace_crop_ajoutee(self):
parsed = {"codage_reco": {"dp": "", "dr": "", "das": []}}
reco = {"dp": "K650", "_elapsed_s": 1.5}
merge_codage_reco(parsed, reco)
assert parsed["_crop_recodage"]["result"]["_elapsed_s"] == 1.5
class TestResolveRecueilZones:
def test_fallback_constantes(self):
"""Sans config utilisateur, on a les zones par défaut."""
reco, cb = resolve_recueil_zones()
# 4 coords flottantes
assert len(reco) == 4
assert all(isinstance(v, float) for v in reco)
# Checkbox zones
assert len(cb.accord) == 4
assert len(cb.desaccord) == 4