test: couvrir les modules purs du pipeline (96 nouveaux tests)
Suite de tests unitaires pour tous les modules pipeline qui ne dépendent pas du VLM — utiles pour garantir la non-régression après refactor et servir de spec vivante de chaque fonction. Fichiers : - tests/test_json_utils.py (20 tests) : parse_json_output + toutes les stratégies de récupération (fences, virgules manquantes, boucles vides, fermeture JSON, fallback _raw/_parse_error) - tests/test_deskew.py (11 tests) : détection Hough + correction, image synthétique + fixtures cache réel - tests/test_checkboxes.py (17 tests) : parse_ghs_injustifie, dark_ratio, inner_frac, et ground truth visuel sur 17 dossiers (mapping hash→OGC résolu au runtime pour éviter les constantes fragiles) - tests/test_validation.py (18 tests) : _check_cim10/ccam/ghm/ghs, cross-checks GHM↔GHS, annotate sur JSON vide et complet, preservation de l'input (copie défensive) - tests/test_schema.py (8 tests) : clean_dossier retire les champs debug, préserve les champs métier, compacte la validation, ne modifie pas l'input - tests/test_zones_config.py (8 tests) : load/save round-trip, merge avec defaults, résilience JSON corrompu, get_zone Total : 107 tests, 5.1 s d'exécution, tous passent. Aucune dépendance GPU, s'exécutent en CI. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
146
tests/test_validation.py
Normal file
146
tests/test_validation.py
Normal file
@@ -0,0 +1,146 @@
|
||||
"""Tests unitaires pour pipeline.validation."""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from pipeline.validation import (
|
||||
_check_ccam,
|
||||
_check_cim10,
|
||||
_check_ghm,
|
||||
_check_ghs,
|
||||
_cross_check_ghm_ghs,
|
||||
annotate,
|
||||
validate_recueil,
|
||||
)
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Vérifications par type de code
|
||||
# ============================================================
|
||||
|
||||
class TestCheckCim10:
|
||||
def test_code_valide(self):
|
||||
r = _check_cim10("K650")
|
||||
assert r["valid"] is True
|
||||
assert "libelle_ref" in r
|
||||
|
||||
def test_code_vide(self):
|
||||
assert _check_cim10("")["valid"] is None
|
||||
assert _check_cim10(None)["valid"] is None
|
||||
|
||||
def test_code_avec_suffixe_pmsi(self):
|
||||
# Les suffixes * et +N sont gérés par la normalisation
|
||||
r = _check_cim10("C795 *")
|
||||
assert r["valid"] is True
|
||||
|
||||
def test_code_invalide_avec_suggestion(self):
|
||||
# K65O (O au lieu de 0) n'existe pas, mais K650 oui
|
||||
r = _check_cim10("K65O")
|
||||
assert r["valid"] is False
|
||||
assert r.get("suggestion") == "K650"
|
||||
|
||||
def test_code_invalide_sans_suggestion(self):
|
||||
# Code farfelu sans voisin proche
|
||||
r = _check_cim10("ZZZZ9999")
|
||||
assert r["valid"] is False
|
||||
# suggestion peut être absente
|
||||
assert r.get("suggestion") is None or r.get("suggestion") != "ZZZZ9999"
|
||||
|
||||
|
||||
class TestCheckGhm:
|
||||
def test_ghm_valide(self):
|
||||
r = _check_ghm("11M122")
|
||||
assert r["valid"] is True
|
||||
assert isinstance(r.get("ghs_possibles"), list)
|
||||
assert len(r["ghs_possibles"]) > 0
|
||||
|
||||
def test_ghm_invalide(self):
|
||||
r = _check_ghm("99Z999")
|
||||
assert r["valid"] is False
|
||||
|
||||
|
||||
class TestCheckGhs:
|
||||
def test_ghs_valide(self):
|
||||
assert _check_ghs("4323")["valid"] is True
|
||||
|
||||
def test_ghs_invalide(self):
|
||||
assert _check_ghs("99999")["valid"] is False
|
||||
|
||||
|
||||
class TestCheckCcam:
|
||||
def test_ccam_valide(self):
|
||||
assert _check_ccam("EBFA012")["valid"] is True
|
||||
|
||||
def test_ccam_invalide(self):
|
||||
assert _check_ccam("XXXX000")["valid"] is False
|
||||
|
||||
|
||||
# ============================================================
|
||||
# Cross-checks GHM ↔ GHS
|
||||
# ============================================================
|
||||
|
||||
class TestCrossCheckGhmGhs:
|
||||
def test_couple_coherent(self):
|
||||
# 11M122 a bien 4323 dans ses GHS possibles
|
||||
r = _cross_check_ghm_ghs("11M122", "4323")
|
||||
assert r["checked"] is True
|
||||
assert r["coherent"] is True
|
||||
|
||||
def test_couple_incoherent(self):
|
||||
# 11M122 ne correspond pas à n'importe quel GHS
|
||||
r = _cross_check_ghm_ghs("11M122", "9999")
|
||||
assert r["checked"] is True
|
||||
assert r["coherent"] is False
|
||||
|
||||
def test_ghm_manquant(self):
|
||||
r = _cross_check_ghm_ghs("", "4323")
|
||||
assert r["checked"] is False
|
||||
|
||||
def test_ghm_invalide(self):
|
||||
r = _cross_check_ghm_ghs("99Z999", "4323")
|
||||
assert r["checked"] is False
|
||||
assert "invalide" in r["reason"].lower()
|
||||
|
||||
|
||||
# ============================================================
|
||||
# annotate (intégration)
|
||||
# ============================================================
|
||||
|
||||
class TestAnnotate:
|
||||
def test_annotate_json_vide(self):
|
||||
out = annotate({"fichier": "TEST", "extraction": {}})
|
||||
assert "fichier" in out
|
||||
assert out["extraction"] == {}
|
||||
|
||||
def test_annotate_recueil_complet(self):
|
||||
raw = {
|
||||
"fichier": "TEST",
|
||||
"extraction": {
|
||||
"recueil": {
|
||||
"codage_etab": {"dp": "K650", "dr": "", "das": [
|
||||
{"code": "T814", "position": "2"},
|
||||
]},
|
||||
"codage_reco": {"dp": "", "dr": "", "das": []},
|
||||
"ghm_etab": "11M122",
|
||||
"ghs_etab": "4323",
|
||||
"ghm_reco": "",
|
||||
"ghs_reco": "",
|
||||
},
|
||||
},
|
||||
}
|
||||
out = annotate(raw)
|
||||
v = out["extraction"]["recueil"]["_validation"]
|
||||
assert v["codage_etab"]["dp"]["valid"] is True
|
||||
assert v["ghm_etab"]["valid"] is True
|
||||
assert v["cross_checks"]["etab"]["coherent"] is True
|
||||
assert v["summary"]["valid"] >= 3
|
||||
|
||||
def test_annotate_preserve_source(self):
|
||||
"""L'annotation ne doit pas modifier l'input (copie défensive)."""
|
||||
raw = {
|
||||
"fichier": "T",
|
||||
"extraction": {"recueil": {"codage_etab": {"dp": "K650"}}},
|
||||
}
|
||||
out = annotate(raw)
|
||||
assert "_validation" not in raw["extraction"]["recueil"]
|
||||
assert "_validation" in out["extraction"]["recueil"]
|
||||
Reference in New Issue
Block a user