diff --git a/README.md b/README.md new file mode 100644 index 0000000..7867c4e --- /dev/null +++ b/README.md @@ -0,0 +1,121 @@ +# T2A — Pipeline de codage PMSI automatise + +Pipeline d'extraction et de codage CIM-10/CCAM pour le PMSI hospitalier (MCO). +Transforme les comptes rendus d'hospitalisation (CRH) et fiches Trackare en dossiers structures, codes et valorises. + +## Architecture + +``` +input/ PDFs bruts (CRH, Trackare, anapath, bacterio) + | + v +[Extraction] pdfplumber / OCR / DOCX / images + | + v +[Anonymisation] CamemBERT NER + regex (PHI -> pseudonymes) + | + v +[Codage CIM-10] LLM local (Ollama) + RAG FAISS + regles ATIH + | diagnostic_extraction -> validation_pipeline + v +[Arbitrage DP] dp_selector (LLM) -> dp_finalizer (deterministe) + | Trackare vs CRH-only, traçabilite audit + v +[Qualite] veto_engine (contestabilite) + decision_engine + | completude (checklist documents) + severity (CMA) + v +[CPAM] cpam_parser + cpam_response (contre-argumentation LLM) + | guardian deterministe + validation adversariale + v +output/ JSON structures, rapports, export RUM + | + v +[Viewer Flask] Dashboard, detail dossier, synthese DIM, CPAM, validation +``` + +## Modules principaux + +| Module | Role | +|--------|------| +| `src/extraction/` | Parsers PDF, DOCX, images, OCR, classification documents | +| `src/anonymization/` | Anonymisation NER + regex, registre d'entites | +| `src/medical/` | CIM-10, CCAM, biologie, RAG FAISS, LLM Ollama, fusion multi-documents | +| `src/quality/` | Moteur de vetos deterministe, decisions, completude, routage regles | +| `src/control/` | Controles CPAM, contre-argumentation, validation adversariale | +| `src/viewer/` | Application Flask (dashboard, detail, DIM, admin, regles) | +| `config/` | 12 fichiers YAML de regles editables via l'interface web | + +## Moteur de regles + +Le pipeline utilise un **moteur de regles 100% deterministe** (pas de LLM) pour : +- **Vetos** : bloquer les codes sans preuve, negatifs, doublons, contradictions bio +- **Decisions** : downgrade, ecartement, promotion DP +- **Conflits** : exclusions mutuelles CIM-10, incompatibilites +- **Bio** : contradiction labo vs diagnostic code +- **Completude** : checklist documents manquants + +Toutes les regles sont dans `config/*.yaml` et editables via `/admin/rules`. + +## RAG (Retrieval-Augmented Generation) + +Index FAISS avec ~23 000 vecteurs issus de : +- CIM-10 FR 2026, Guide Methodologique MCO 2026, CCAM V4 +- 30 referentiels supplementaires (COCOA 2025, fascicules ATIH, etc.) +- Embeddings : `sentence-camembert-large` (francais medical) + +Separation en 3 index : `ref` (referentiels), `proc` (procedures), `bio` (biologie). + +## Installation + +```bash +# Prerequis : Python 3.11+, Ollama avec gemma3:27b +git clone && cd t2a_v2 +python -m venv .venv && source .venv/bin/activate +pip install -e ".[dev]" + +# Variables d'environnement (.env) +OLLAMA_URL=http://localhost:11434 +T2A_MODEL_CODING=gemma3:27b +T2A_MODEL_CPAM=mistral-small3.2:24b +# ANTHROPIC_API_KEY=sk-... (optionnel, fallback cloud) +``` + +## Utilisation + +```bash +# Pipeline CLI : traiter des PDFs +python -m src.main input/dossier/ + +# Reconstruire l'index RAG +python -m src.main --rebuild-index + +# Viewer web (developpement) +python -m src.viewer + +# Viewer web (production) +gunicorn -c gunicorn.conf.py 'src.viewer:create_app()' +``` + +## Tests + +```bash +pytest # 239+ tests, ~10s +pytest -k test_viewer # Tests viewer uniquement +pytest -k test_cpam # Tests CPAM +``` + +## Structure des donnees + +Chaque dossier produit un JSON structure (`DossierMedical` Pydantic) contenant : +- `diagnostic_principal` : code CIM-10, confiance, justification, source +- `diagnostics_associes` : DAS avec decisions (KEEP/DOWNGRADE/REMOVE/RULED_OUT) +- `actes_ccam` : actes codes +- `veto_report` : score de contestabilite (0-10), issues detectees +- `completude` : checklist, score, verdict +- `ghm_estimation` : GHM, severite, valorisation estimee +- `controles_cpam` : contre-argumentations generees + +## Deploiement + +Service systemd inclus (`t2a-viewer.service`), config gunicorn (`gunicorn.conf.py`). +Auth HTTP Basic configurable via `T2A_DEMO_USER` / `T2A_DEMO_PASS`. diff --git a/pyproject.toml b/pyproject.toml index 991a31d..3e06aae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,10 +5,58 @@ build-backend = "setuptools.backends._legacy:_Backend" [project] name = "t2a" version = "2.0.0" -requires-python = ">=3.12" +description = "Pipeline de codage CIM-10/CCAM automatise pour le PMSI hospitalier" +readme = "README.md" +requires-python = ">=3.11" +authors = [ + { name = "Equipe T2A" }, +] +dependencies = [ + "pdfplumber>=0.10.0", + "transformers>=4.35.0,<6.0.0", + "torch>=2.1.0", + "protobuf>=3.20.0,<7.0.0", + "regex>=2023.0", + "pydantic>=2.5.0", + "sentencepiece>=0.1.99,<0.3.0", + "edsnlp[ml]>=0.17.0", + "faiss-cpu>=1.7.0", + "sentence-transformers>=2.2.0", + "requests>=2.28.0", + "flask>=3.0.0", + "flask-httpauth>=4.0.0", + "python-dotenv>=1.0.0", + "openpyxl>=3.0.0", + "pandas>=2.0.0", + "PyMuPDF>=1.24.0", + "python-docx>=1.0.0", + "PyYAML>=6.0", + "gunicorn>=22.0.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=7.4.0", + "ruff>=0.4.0", +] + +[project.scripts] +t2a = "src.main:main" + +[tool.setuptools.packages.find] +include = ["src*"] + +[tool.ruff] +target-version = "py311" +line-length = 120 + +[tool.ruff.lint] +select = ["E", "F", "W", "I"] +ignore = ["E501"] [tool.pytest.ini_options] testpaths = ["tests"] +python_files = ["test_*.py"] addopts = "--strict-markers -x -q" markers = ["integration: tests requiring Ollama"] diff --git a/src/control/cpam_response.py b/src/control/cpam_response.py index 7a6523e..62a85fa 100644 --- a/src/control/cpam_response.py +++ b/src/control/cpam_response.py @@ -37,19 +37,6 @@ from .cpam_validation import ( _guardian_deterministic, ) -# Backward compat — sera retiré dans un commit futur -from .cpam_rag import _search_rag_queries # noqa: F401 -from .cpam_context import ( # noqa: F401 - _get_code_label, - _get_cim10_definitions, - _BIO_INTERPRETATION, - _BIO_THRESHOLDS, - _assess_dossier_strength, - _build_bio_summary, - _build_bio_confrontation, - _check_das_bio_coherence, -) -from .cpam_validation import _CIM10_CODE_RE, _validate_adversarial as _validate_adversarial, _assess_quality_tier as _assess_quality_tier, _fuzzy_match_ref as _fuzzy_match_ref, _sanitize_unauthorized_codes as _sanitize_unauthorized_codes # noqa: F401 logger = logging.getLogger(__name__) diff --git a/src/medical/cim10_extractor.py b/src/medical/cim10_extractor.py index f960aef..3789a34 100644 --- a/src/medical/cim10_extractor.py +++ b/src/medical/cim10_extractor.py @@ -54,12 +54,6 @@ from .validation_pipeline import ( _validate_justifications, ) -# Backward compat — sera retiré dans un commit futur -from .bio_normals import BIO_NORMALS, _is_abnormal # noqa: F401 -from .validation_pipeline import _is_dp_family_redundant # noqa: F401 -from .diagnostic_extraction import _lookup_cim10 # noqa: F401 -from .diagnostic_extraction import _DAS_PATTERNS # noqa: F401 -from .diagnostic_extraction import _detect_nutrition_has2021 # noqa: F401 def extract_medical_info( diff --git a/src/viewer/app.py b/src/viewer/app.py index c8f30a9..a78d99d 100644 --- a/src/viewer/app.py +++ b/src/viewer/app.py @@ -16,868 +16,40 @@ from markupsafe import Markup from werkzeug.utils import secure_filename -from collections import Counter - from ..config import ( ANONYMIZED_DIR, STRUCTURED_DIR, INPUT_DIR, REPORTS_DIR, - OLLAMA_URL, CCAM_DICT_PATH, DossierMedical, + OLLAMA_URL, DossierMedical, ALLOWED_EXTENSIONS, UPLOAD_MAX_SIZE_MB, - CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF, CIM10_DICT_PATH, CIM10_SUPPLEMENTS_PATH, ) from .. import config as cfg from ..control.cpam_context import _assess_dossier_strength from ..medical.bio_normals import BIO_NORMALS from .referentiels import ReferentielManager from .validation import ValidationManager +from .helpers import ( + compute_group_stats, + compute_dashboard_stats, + compute_dim_synthesis, + collect_cpam_controls, + get_builtin_referentiels, + get_faiss_index_info, + load_ccam_dict, + scan_dossiers, + load_dossier, + fetch_ollama_models, + format_dossier_name, + _get_system_status, + register_filters, +) +from .bp_rules import bp_rules logger = logging.getLogger(__name__) -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - -def compute_group_stats(items: list[dict]) -> dict: - """Calcule des statistiques agrégées pour un groupe de dossiers. - - Returns: - {das_count, alertes_count, actes_count, cma_count} - """ - das_count = 0 - alertes_count = 0 - actes_count = 0 - cma_count = 0 - - for item in items: - d = item["dossier"] - das_count += len(d.diagnostics_associes) - alertes_count += len(d.alertes_codage) - actes_count += len(d.actes_ccam) - for diag in d.diagnostics_associes: - if diag.est_cma: - cma_count += 1 - if d.diagnostic_principal and d.diagnostic_principal.est_cma: - cma_count += 1 - - return { - "das_count": das_count, - "alertes_count": alertes_count, - "actes_count": actes_count, - "cma_count": cma_count, - } - - -def compute_dashboard_stats(groups: dict[str, list[dict]]) -> dict: - """Calcule les statistiques globales du pipeline pour le dashboard.""" - total_dossiers = len(groups) - total_fichiers = 0 - total_das = 0 - total_actes = 0 - total_alertes = 0 - total_cma = 0 - total_cpam = 0 - dp_confidence: Counter = Counter() - dp_validity: Counter = Counter() - code_counter: Counter = Counter() - ghm_types: Counter = Counter() - severity_dist: Counter = Counter() - processing_times: list[float] = [] - - for items in groups.values(): - total_fichiers += len(items) - for item in items: - d = item["dossier"] - total_das += len(d.diagnostics_associes) - total_actes += len(d.actes_ccam) - total_alertes += len(d.alertes_codage) - total_cpam += len(d.controles_cpam) - - if d.processing_time_s is not None: - processing_times.append(d.processing_time_s) - - # DP confidence & validity - dp = d.diagnostic_principal - if dp: - conf = dp.cim10_confidence or "none" - dp_confidence[conf] += 1 - if dp.cim10_suggestion: - dp_validity["valide"] += 1 - code_counter[dp.cim10_suggestion] += 1 - else: - dp_validity["absent"] += 1 - else: - dp_confidence["none"] += 1 - dp_validity["absent"] += 1 - - # DAS codes + CMA - for das in d.diagnostics_associes: - if das.cim10_suggestion: - code_counter[das.cim10_suggestion] += 1 - if das.est_cma: - total_cma += 1 - if dp and dp.est_cma: - total_cma += 1 - - # GHM - ghm = d.ghm_estimation - if ghm: - if ghm.type_ghm: - ghm_types[ghm.type_ghm] += 1 - severity_dist[ghm.severite] += 1 - - top_codes = code_counter.most_common(15) - top_max = top_codes[0][1] if top_codes else 1 - - return { - "total_dossiers": total_dossiers, - "total_fichiers": total_fichiers, - "total_das": total_das, - "total_actes": total_actes, - "total_alertes": total_alertes, - "total_cma": total_cma, - "total_cpam": total_cpam, - "dp_confidence": dict(dp_confidence), - "dp_validity": dict(dp_validity), - "top_codes": top_codes, - "top_max": top_max, - "ghm_types": dict(ghm_types), - "severity_dist": dict(severity_dist), - "processing_time_total": sum(processing_times), - "processing_time_avg": sum(processing_times) / len(processing_times) if processing_times else 0, - } - - -def compute_dim_synthesis(groups: dict[str, list[dict]]) -> dict: - """Calcule les indicateurs de synthèse pour la vue médecin DIM.""" - # --- DP Arbitrage --- - dp_total = 0 - dp_confirmed = 0 - dp_review = 0 - dp_modified = 0 # finalizer a changé le DP - dp_conf_dist: Counter = Counter() # high/medium/low - dp_source_dist: Counter = Counter() # trackare/crh/override - - # --- DAS Qualité --- - das_total = 0 - das_kept = 0 - das_downgraded = 0 - das_removed = 0 - das_ruled_out = 0 - das_cma = 0 - das_no_code = 0 - - # --- Contestabilité (Veto) --- - veto_dist: Counter = Counter() # PASS/NEED_INFO/FAIL - veto_scores: list[int] = [] - top_vetos: Counter = Counter() - - # --- Complétude --- - completude_dist: Counter = Counter() # defendable/fragile/indefendable - completude_scores: list[int] = [] - - # --- CPAM --- - cpam_total = 0 - cpam_impact_total = 0 - cpam_by_priority: Counter = Counter() - cpam_by_status: Counter = Counter() - - # --- Alertes prioritaires --- - dossiers_review: list[dict] = [] - dossiers_fail: list[dict] = [] - dossiers_indefendable: list[dict] = [] - - for group_name, items in groups.items(): - for item in items: - d = item["dossier"] - dname = format_dossier_name(group_name) - dpath = item["path_rel"] - - # DP - dp_final = d.dp_final - dp_track = d.dp_trackare - if dp_final: - dp_total += 1 - dp_conf_dist[dp_final.confidence or "none"] += 1 - if dp_final.verdict == "CONFIRMED": - dp_confirmed += 1 - else: - dp_review += 1 - dossiers_review.append({"name": dname, "path": dpath, - "reason": dp_final.reason or "DP à valider", - "code": dp_final.chosen_code or "?"}) - # Modification DP - if dp_track and dp_final.chosen_code and dp_track.chosen_code: - if dp_final.chosen_code != dp_track.chosen_code: - dp_modified += 1 - # Source - flags = d.quality_flags or {} - if flags.get("trackare_only_mode"): - dp_source_dist["trackare"] += 1 - elif flags.get("crh_only_mode"): - dp_source_dist["crh"] += 1 - elif flags.get("override_trackare_by_crh_confirmed") or flags.get("trackare_symptom_overridden"): - dp_source_dist["override_crh"] += 1 - elif flags.get("trackare_confirmed_by_crh"): - dp_source_dist["confirmé"] += 1 - else: - dp_source_dist["autre"] += 1 - elif d.diagnostic_principal: - dp_total += 1 - dp_conf_dist[d.diagnostic_principal.cim10_confidence or "none"] += 1 - - # DAS - for das in d.diagnostics_associes: - das_total += 1 - dec = das.cim10_decision - if dec: - action = dec.action - if action == "KEEP": - das_kept += 1 - elif action == "DOWNGRADE": - das_downgraded += 1 - elif action == "REMOVE": - das_removed += 1 - elif action == "RULED_OUT": - das_ruled_out += 1 - else: - das_kept += 1 - else: - das_kept += 1 - if das.est_cma: - das_cma += 1 - if not das.cim10_final and not das.cim10_suggestion: - das_no_code += 1 - - # Veto - vr = d.veto_report - if vr: - veto_dist[vr.verdict] += 1 - veto_scores.append(vr.score_contestabilite) - for issue in (vr.issues or []): - top_vetos[issue.veto] += 1 - if vr.verdict == "FAIL": - dossiers_fail.append({"name": dname, "path": dpath, - "score": vr.score_contestabilite, - "issues": len(vr.issues or [])}) - - # Complétude - comp = d.completude - if comp: - completude_dist[comp.verdict_global] += 1 - completude_scores.append(comp.score_global) - if comp.verdict_global == "indefendable": - dossiers_indefendable.append({"name": dname, "path": dpath, - "score": comp.score_global, - "manquants": len(comp.documents_manquants or [])}) - - # CPAM - for ctrl in d.controles_cpam: - cpam_total += 1 - fi = ctrl.financial_impact - if fi: - cpam_impact_total += fi.impact_estime_euros or 0 - cpam_by_priority[fi.priorite or "normale"] += 1 - cpam_by_status[ctrl.validation_dim or "non_valide"] += 1 - - avg_veto = round(sum(veto_scores) / len(veto_scores)) if veto_scores else 0 - avg_completude = round(sum(completude_scores) / len(completude_scores)) if completude_scores else 0 - - return { - "dp": { - "total": dp_total, - "confirmed": dp_confirmed, - "review": dp_review, - "modified": dp_modified, - "confidence": dict(dp_conf_dist), - "source": dict(dp_source_dist), - }, - "das": { - "total": das_total, - "kept": das_kept, - "downgraded": das_downgraded, - "removed": das_removed, - "ruled_out": das_ruled_out, - "cma": das_cma, - "no_code": das_no_code, - "taux_modification": round((das_downgraded + das_removed + das_ruled_out) / das_total * 100, 1) if das_total else 0, - }, - "veto": { - "distribution": dict(veto_dist), - "avg_score": avg_veto, - "top_issues": top_vetos.most_common(10), - }, - "completude": { - "distribution": dict(completude_dist), - "avg_score": avg_completude, - }, - "cpam": { - "total": cpam_total, - "impact_total": cpam_impact_total, - "by_priority": dict(cpam_by_priority), - "by_status": dict(cpam_by_status), - }, - "alertes": { - "review": dossiers_review[:20], - "fail": dossiers_fail[:20], - "indefendable": dossiers_indefendable[:20], - }, - } - - -def _compute_jours_restants(ctrl) -> int | None: - """Calcule les jours restants avant la date limite de réponse.""" - if not ctrl.date_limite_reponse: - return None - from datetime import datetime - try: - limite = datetime.strptime(ctrl.date_limite_reponse, "%d/%m/%Y") - return (limite - datetime.now()).days - except (ValueError, TypeError): - return None - - -def collect_cpam_controls(groups: dict[str, list[dict]]) -> list[dict]: - """Collecte tous les contrôles CPAM de tous les dossiers, avec impact financier.""" - from ..medical.ghm import estimate_financial_impact - - _PRIORITE_ORDER = {"critique": 0, "haute": 1, "normale": 2, "faible": 3} - - controls = [] - for group_name, items in groups.items(): - for item in items: - d = item["dossier"] - dp_code = d.diagnostic_principal.cim10_suggestion if d.diagnostic_principal else None - for ctrl in d.controles_cpam: - # Calculer l'impact financier si absent - if ctrl.financial_impact is None and d.ghm_estimation: - ctrl.financial_impact = estimate_financial_impact(d.ghm_estimation) - controls.append({ - "group_name": group_name, - "filepath": item["path_rel"], - "ctrl": ctrl, - "dp_code": dp_code, - "jours_restants": _compute_jours_restants(ctrl), - }) - # Tri : 1) priorité financière, 2) désaccords (confirme) avant accords (retient), 3) OGC - controls.sort(key=lambda c: ( - _PRIORITE_ORDER.get( - c["ctrl"].financial_impact.priorite if c["ctrl"].financial_impact else "normale", - 2, - ), - 0 if "confirme" in (c["ctrl"].decision_ucr or "").lower() else 1, - c["ctrl"].numero_ogc, - )) - return controls - - -def get_builtin_referentiels() -> list[dict]: - """Retourne les infos sur les référentiels intégrés (PDFs + dicts).""" - from ..config import BASE_DIR - rag_index_dir = BASE_DIR / "data" / "rag_index" - - # Charger les chunks depuis TOUS les metadata (ref, proc, bio, legacy) - chunks_by_doc: dict[str, int] = {} - for meta_file in rag_index_dir.glob("metadata*.json"): - try: - meta = json.loads(meta_file.read_text(encoding="utf-8")) - for m in meta: - doc = m.get("document", "") - chunks_by_doc[doc] = chunks_by_doc.get(doc, 0) + 1 - except Exception: - pass - - refs = [] - # (nom, path, ext, doc_keys pour compter les chunks, edition, validité) - builtin_sources = [ - ("CIM-10 FR 2026", CIM10_PDF, ".pdf", ["cim10", "cim10_alpha"], - "11/12/2025", "2026 (provisoire)"), - ("Guide Méthodologique MCO 2026", GUIDE_METHODO_PDF, ".pdf", ["guide_methodo"], - "2025", "2026 (provisoire)"), - ("CCAM descriptive PMSI V4", CCAM_PDF, ".pdf", ["ccam"], - "2025", "V4 2025"), - ("Dictionnaire CIM-10", CIM10_DICT_PATH, ".json", [], - "", ""), - ("Suppléments CIM-10", CIM10_SUPPLEMENTS_PATH, ".json", [], - "", ""), - ("Dictionnaire CCAM", CCAM_DICT_PATH, ".json", [], - "", ""), - ] - for name, path, ext, doc_keys, edition, validite in builtin_sources: - size_mb = path.stat().st_size / (1024 * 1024) if path.exists() else 0 - mtime = "" - if path.exists(): - import datetime as _dt - mtime = _dt.datetime.fromtimestamp(path.stat().st_mtime).strftime("%d/%m/%Y") - chunks = sum(chunks_by_doc.get(k, 0) for k in doc_keys) - refs.append({ - "name": name, - "filename": path.name, - "extension": ext, - "size_mb": size_mb, - "chunks": chunks, - "exists": path.exists(), - "edition": edition, - "validite": validite, - "file_date": mtime, - }) - - # Référentiels supplémentaires indexés (ref:*.pdf dans les metadata) - from ..config import REFERENTIELS_DIR - pdfs_dir = REFERENTIELS_DIR / "pdfs" - for doc_name, count in sorted(chunks_by_doc.items()): - if doc_name.startswith("ref:") or doc_name.startswith("proc:"): - prefix, fname = doc_name.split(":", 1) - pdf_path = pdfs_dir / fname - size_mb = pdf_path.stat().st_size / (1024 * 1024) if pdf_path.exists() else 0 - mtime = "" - if pdf_path.exists(): - import datetime as _dt - mtime = _dt.datetime.fromtimestamp(pdf_path.stat().st_mtime).strftime("%d/%m/%Y") - refs.append({ - "name": fname.replace("_", " ").replace(".pdf", ""), - "filename": fname, - "extension": ".pdf", - "size_mb": size_mb, - "chunks": count, - "exists": pdf_path.exists(), - "edition": "", - "validite": "", - "file_date": mtime, - "category": prefix, - }) - - return refs - - -def get_faiss_index_info() -> dict: - """Retourne les informations détaillées sur les index FAISS.""" - from ..config import BASE_DIR - from ..medical.rag_index import check_faiss_ready - rag_dir = BASE_DIR / "data" / "rag_index" - - info = {"ok": False, "indexes": [], "total_vectors": 0, "last_build": ""} - - status = check_faiss_ready() - info["ok"] = status["ok"] - info["total_vectors"] = status["ref"] + status["proc"] + status["bio"] + status["legacy"] - - for kind, label in [("ref", "Référentiels CIM-10"), ("proc", "Procédures/Guides"), - ("bio", "Biologie"), ("all", "Legacy (combiné)")]: - idx_file = rag_dir / f"faiss_{kind}.index" if kind != "all" else rag_dir / "faiss.index" - meta_file = rag_dir / f"metadata_{kind}.json" if kind != "all" else rag_dir / "metadata.json" - count = status.get(kind, status.get("legacy", 0)) if kind == "all" else status.get(kind, 0) - mtime = "" - size_mb = 0 - if idx_file.exists(): - import datetime as _dt - mtime = _dt.datetime.fromtimestamp(idx_file.stat().st_mtime).strftime("%d/%m/%Y %H:%M") - size_mb = idx_file.stat().st_size / (1024 * 1024) - info["indexes"].append({ - "kind": kind, "label": label, - "vectors": count, "size_mb": round(size_mb, 1), - "last_build": mtime, "exists": idx_file.exists(), - }) - if mtime and (not info["last_build"] or mtime > info["last_build"]): - info["last_build"] = mtime - - return info - - -def load_ccam_dict() -> dict[str, dict]: - """Charge le dictionnaire CCAM pour les regroupements.""" - if CCAM_DICT_PATH.exists(): - try: - data = json.loads(CCAM_DICT_PATH.read_text(encoding="utf-8")) - return data - except Exception: - logger.warning("Impossible de charger le dictionnaire CCAM") - return {} - - -_scan_cache: dict[str, object] = {"data": None, "ts": 0.0} -_SCAN_TTL = 30 # secondes - - -def scan_dossiers() -> dict[str, list[dict]]: - """Scanne output/structured/ et retourne les fichiers groupés par sous-dossier. - - Résultat mis en cache pendant 30s pour éviter de re-scanner le FS à chaque requête. - - Returns: - {"racine": [{name, path_rel, dossier}, ...], "sous-dossier": [...]} - Chaque groupe contient aussi une clé "stats" avec les compteurs agrégés. - """ - now = time.monotonic() - if _scan_cache["data"] is not None and (now - _scan_cache["ts"]) < _SCAN_TTL: - return _scan_cache["data"] - - groups: dict[str, list[dict]] = {} - - for json_path in sorted(STRUCTURED_DIR.rglob("*.json")): - rel = json_path.relative_to(STRUCTURED_DIR) - parts = rel.parts - - if len(parts) == 1: - group_name = "racine" - else: - group_name = str(Path(*parts[:-1])) - - try: - data = json.loads(json_path.read_text(encoding="utf-8")) - dossier = DossierMedical.model_validate(data) - except Exception: - logger.warning("Impossible de charger %s", json_path) - continue - - groups.setdefault(group_name, []).append({ - "name": json_path.stem, - "path_rel": str(rel), - "dossier": dossier, - }) - - _scan_cache["data"] = groups - _scan_cache["ts"] = now - return groups - - -def load_dossier(path_rel: str) -> DossierMedical: - """Charge un JSON et le désérialise. Vérifie contre le path traversal.""" - safe_path = (STRUCTURED_DIR / path_rel).resolve() - if not safe_path.is_relative_to(STRUCTURED_DIR.resolve()): - abort(403) - if not safe_path.exists(): - abort(404) - - data = json.loads(safe_path.read_text(encoding="utf-8")) - return DossierMedical.model_validate(data) - - -def fetch_ollama_models() -> list[str]: - """Appelle GET {OLLAMA_URL}/api/tags pour lister les modèles disponibles.""" - try: - resp = requests.get(f"{cfg.OLLAMA_URL}/api/tags", timeout=5) - resp.raise_for_status() - models = resp.json().get("models", []) - return [m["name"] for m in models] - except Exception: - logger.warning("Impossible de contacter Ollama pour lister les modèles") - return [] - - -# --------------------------------------------------------------------------- -# Filtres Jinja2 -# --------------------------------------------------------------------------- - -_CONFIDENCE_COLORS = { - "high": ("#16a34a", "#dcfce7"), - "medium": ("#ca8a04", "#fef9c3"), - "low": ("#dc2626", "#fee2e2"), -} - -_CONFIDENCE_LABELS = { - "high": "Haute", - "medium": "Moyenne", - "low": "Basse", -} - - -def confidence_badge(value: str | None) -> Markup: - if not value: - return Markup("") - fg, bg = _CONFIDENCE_COLORS.get(value, ("#6b7280", "#f3f4f6")) - label = _CONFIDENCE_LABELS.get(value, value) - return Markup( - f'' - f'{label}' - ) - - -def confidence_label(value: str | None) -> str: - if not value: - return "" - return _CONFIDENCE_LABELS.get(value, value) - - -_SEVERITY_STYLES = { - "severe": ("Sévère", "#dc2626", "#fee2e2"), - "modere": ("Modéré", "#92400e", "#fef3c7"), - "leger": ("Léger", "#065f46", "#d1fae5"), -} - -_CMA_LEVEL_STYLES = { - 1: ("1", "#6b7280", "#f3f4f6"), # gris — pas CMA - 2: ("2", "#065f46", "#d1fae5"), # vert - 3: ("3", "#92400e", "#fef3c7"), # jaune/orange - 4: ("4", "#dc2626", "#fee2e2"), # rouge -} - - -def format_duration(seconds: float | None) -> str: - """Formate une durée en secondes vers un format lisible (ex: 2min 30s).""" - if seconds is None: - return "" - if seconds < 60: - return f"{seconds:.1f}s" - minutes = int(seconds // 60) - secs = int(seconds % 60) - if secs == 0: - return f"{minutes}min" - return f"{minutes}min {secs:02d}s" - - -def severity_badge(value: str | None) -> Markup: - if not value or value not in _SEVERITY_STYLES: - return Markup("") - label, fg, bg = _SEVERITY_STYLES[value] - return Markup( - f'' - f'{label}' - ) - - -def cma_level_badge(value: int | None) -> Markup: - """Badge CMA niveau 1-4 avec couleurs graduées.""" - if value is None or value < 1: - return Markup("") - level = min(value, 4) - label, fg, bg = _CMA_LEVEL_STYLES.get(level, _CMA_LEVEL_STYLES[1]) - title = {1: "Pas CMA", 2: "CMA niveau 2", 3: "CMA niveau 3", 4: "CMA niveau 4"}.get(level, "") - return Markup( - f'' - f'CMA {label}' - ) - - -def format_dossier_name(name: str) -> str: - """Retourne le nom complet du dossier (ex: 1_23096332).""" - if name == "racine": - return "Non classés" - return name - - -def format_doc_name(name: str) -> str: - """Transforme un nom de fichier JSON en nom lisible.""" - n = name.lower() - if "fusionne" in n: - return "Fusionné" - if n.startswith("cro") or n.startswith("crh"): - return name.split("_")[0].upper() - if "trackare" in n: - return "Trackare" - if "anapath" in n: - return "Anapath" - return name - - -def decision_badge(decision) -> Markup: - """Badge HTML pour une CodeDecision (action != KEEP).""" - if not decision: - return Markup("") - action = decision.get("action", "KEEP") if isinstance(decision, dict) else getattr(decision, "action", "KEEP") - if action == "KEEP": - return Markup("") - labels = { - "DOWNGRADE": ("Rétrogradé", "#fef3c7", "#92400e"), - "REMOVE": ("Supprimé", "#fee2e2", "#dc2626"), - "RULED_OUT": ("Écarté (Contradiction)", "#f1f5f9", "#64748b"), - "NEED_INFO": ("Preuve manquante", "#fff7ed", "#c2410c"), - "PROMOTE_DP": ("Promu en DP", "#dbeafe", "#1d4ed8"), - } - label, bg, fg = labels.get(action, (action, "#f1f5f9", "#64748b")) - return Markup(f'{label}') - - -def format_cpam_text(text: str | None) -> Markup: - """Convertit un texte CPAM (section) en HTML avec puces et paragraphes.""" - if not text: - return Markup("") - from markupsafe import escape - lines = str(text).split("\n") - html_parts: list[str] = [] - in_list = False - for line in lines: - stripped = line.strip() - if not stripped: - if in_list: - html_parts.append("") - in_list = False - html_parts.append("
") - continue - if stripped.startswith("- "): - if not in_list: - html_parts.append("") - in_list = False - html_parts.append(f"

{escape(stripped)}

") - if in_list: - html_parts.append("") - return Markup("\n".join(html_parts)) - - -# --------------------------------------------------------------------------- -# App factory -# --------------------------------------------------------------------------- - -def human_where(value: str | None) -> str: - """Rend une localisation technique lisible (ex: diagnostics_associes[0] -> DAS n°1).""" - if not value: - return "Global" - if value == "diagnostic_principal": - return "Diagnostic Principal" - if value == "diagnostics_associes": - return "Diagnostics Associés" - if value == "sejour": - return "Séjour" - - # Matching diagnostics_associes[i] - m = re.match(r"diagnostics_associes\[(\d+)\]", value) - if m: - return f"DAS n°{int(m.group(1)) + 1}" - - # Matching actes_ccam[i] - m = re.match(r"actes_ccam\[(\d+)\]", value) - if m: - return f"Acte n°{int(m.group(1)) + 1}" - - return value - - -def _date_to_iso(date_fr: str) -> str: - """Convertit JJ/MM/AAAA → YYYY-MM-DD pour les inputs HTML date.""" - try: - parts = date_fr.strip().split("/") - if len(parts) == 3: - return f"{parts[2]}-{parts[1]}-{parts[0]}" - except Exception: - pass - return "" - - -_status_cache: dict[str, object] = {"data": None, "ts": 0.0} -_STATUS_TTL = 120 # secondes - - -def _get_system_status() -> list[dict]: - """Détecte l'état des composants du pipeline T2A (cache 120s).""" - now = time.monotonic() - if _status_cache["data"] is not None and (now - _status_cache["ts"]) < _STATUS_TTL: - return _status_cache["data"] - - from ..config import OLLAMA_URL, OLLAMA_MODELS - - components = [] - - # 1. Moteur de règles (VetoEngine) - components.append({"name": "Moteur de règles (VetoEngine)", "status": True, "detail": "Actif"}) - - # 2. LLM Ollama - ollama_ok = False - ollama_detail = "Non disponible" - try: - r = requests.get(f"{OLLAMA_URL}/api/tags", timeout=3) - if r.status_code == 200: - ollama_ok = True - models_info = ", ".join(f"{role}={model}" for role, model in OLLAMA_MODELS.items()) - ollama_detail = models_info - except Exception: - pass - components.append({"name": "LLM Ollama", "status": ollama_ok, "detail": ollama_detail}) - - # 3. Fallback Anthropic - api_key = os.environ.get("ANTHROPIC_API_KEY", "") - components.append({ - "name": "Fallback Anthropic (Haiku)", - "status": bool(api_key), - "detail": "Clé configurée" if api_key else "Clé absente", - }) - - # 4. Index FAISS (RAG) - try: - from ..medical.rag_index import check_faiss_ready - faiss_check = check_faiss_ready() - if faiss_check["ok"]: - total = faiss_check["ref"] + faiss_check["proc"] + faiss_check["bio"] + faiss_check["legacy"] - parts = [] - if faiss_check["ref"]: - parts.append(f"ref={faiss_check['ref']}") - if faiss_check["proc"]: - parts.append(f"proc={faiss_check['proc']}") - if faiss_check["bio"]: - parts.append(f"bio={faiss_check['bio']}") - detail = f"{total} vecteurs ({', '.join(parts)})" - else: - detail = "; ".join(faiss_check["errors"][:2]) - components.append({ - "name": "Index FAISS (RAG)", - "status": faiss_check["ok"], - "detail": detail, - }) - except Exception as e: - components.append({ - "name": "Index FAISS (RAG)", - "status": False, - "detail": f"Erreur vérification : {e}", - }) - - # 5. Extraction PDF - components.append({"name": "Extraction PDF (pdfplumber)", "status": True, "detail": "Actif"}) - - # 6. Anonymisation NER - ner_ok = False - try: - from transformers import AutoTokenizer - AutoTokenizer.from_pretrained("Jean-Baptiste/camembert-ner", local_files_only=True) - ner_ok = True - except Exception: - pass - components.append({ - "name": "Anonymisation NER (CamemBERT)", - "status": ner_ok, - "detail": "Modèle en cache" if ner_ok else "Modèle non trouvé", - }) - - # 7. Embeddings — vérifier le cache HuggingFace sans charger le modèle - emb_ok = False - try: - from huggingface_hub import try_to_load_from_cache - result = try_to_load_from_cache("dangvantuan/sentence-camembert-large", "config.json") - emb_ok = result is not None and isinstance(result, str) - except Exception: - pass - components.append({ - "name": "Embeddings (sentence-camembert-large)", - "status": emb_ok, - "detail": "Modèle en cache" if emb_ok else "Modèle non trouvé", - }) - - _status_cache["data"] = components - _status_cache["ts"] = now - return components - - -def _sort_qc_alerts(alerts: list[str]) -> list[str]: - """Trie les alertes QC : DP d'abord, puis critiques, puis le reste.""" - def _key(a: str) -> tuple[int, int]: - text = a.lower() - # DP en premier - dp = 0 if " dp " in text or text.startswith("dp ") or "diagnostic principal" in text else 1 - # Critiques ensuite - critical = 0 if any(k in text for k in ("high→low", "high → low", "à reconsidérer", "reconsider")) else 1 - return (dp, critical) - return sorted(alerts, key=_key) - - def create_app() -> Flask: app = Flask(__name__) - # --- Authentification HTTP Basic (optionnelle, activée via env) --- + # --- Authentification HTTP Basic (optionnelle) --- auth = HTTPBasicAuth() demo_user = os.environ.get("T2A_DEMO_USER", "") demo_pass = os.environ.get("T2A_DEMO_PASS", "") @@ -885,49 +57,42 @@ def create_app() -> Flask: @auth.verify_password def verify_password(username, password): if not demo_user: - return True # Auth désactivée si pas de user configuré - if username == demo_user and password == demo_pass: return True - return False + return username == demo_user and password == demo_pass @app.before_request def require_auth(): if demo_user: return auth.login_required(lambda: None)() - app.jinja_env.filters["confidence_badge"] = confidence_badge - app.jinja_env.filters["confidence_label"] = confidence_label - app.jinja_env.filters["severity_badge"] = severity_badge - app.jinja_env.filters["cma_level_badge"] = cma_level_badge - app.jinja_env.filters["format_duration"] = format_duration - app.jinja_env.filters["format_dossier_name"] = format_dossier_name - app.jinja_env.filters["format_doc_name"] = format_doc_name - app.jinja_env.filters["format_cpam_text"] = format_cpam_text - app.jinja_env.filters["decision_badge"] = decision_badge - app.jinja_env.filters["human_where"] = human_where - app.jinja_env.filters["date_to_iso"] = _date_to_iso - app.jinja_env.filters["sort_qc_alerts"] = _sort_qc_alerts + # --- Filtres Jinja2 --- + register_filters(app) + # --- Ressources partagees --- ccam_dict = load_ccam_dict() - # Vérification FAISS au démarrage du viewer + # FAISS check au demarrage try: from ..medical.rag_index import check_faiss_ready _faiss_status = check_faiss_ready() if _faiss_status["ok"]: total = _faiss_status["ref"] + _faiss_status["proc"] + _faiss_status["bio"] + _faiss_status["legacy"] - logger.info("FAISS OK : %d vecteurs chargés", total) + logger.info("FAISS OK : %d vecteurs charges", total) else: for err in _faiss_status["errors"]: logger.error("FAISS : %s", err) except Exception as e: - logger.error("Vérification FAISS échouée : %s", e) + logger.error("Verification FAISS echouee : %s", e) ref_manager = ReferentielManager() + val_manager = ValidationManager() + # --- Blueprints --- + app.register_blueprint(bp_rules) + + # --- Context processor --- @app.context_processor def inject_dossier_list(): - """Injecte la liste des dossiers pour l'autocomplétion sidebar.""" groups = scan_dossiers() dossier_list = [] for group_name, items in groups.items(): @@ -939,6 +104,10 @@ def create_app() -> Flask: dossier_list.append({"name": format_dossier_name(group_name), "path": rep["path_rel"]}) return {"dossier_list": dossier_list} + # =================================================================== + # Routes principales + # =================================================================== + @app.route("/") def index(): groups = scan_dossiers() @@ -949,7 +118,6 @@ def create_app() -> Flask: @app.route("/dossier/") def detail(filepath: str): dossier = load_dossier(filepath) - # Trouver les fichiers du même groupe pour la navigation groups = scan_dossiers() siblings = [] current_group = None @@ -957,17 +125,12 @@ def create_app() -> Flask: if len(rel_parts) > 1: current_group = str(Path(*rel_parts[:-1])) siblings = groups.get(current_group, []) - # Force probante (pour section CPAM) dossier_strength = _assess_dossier_strength(dossier) if dossier.controles_cpam else None return render_template( "detail.html", - dossier=dossier, - filepath=filepath, - ccam_dict=ccam_dict, - siblings=siblings, - current_group=current_group, - dossier_strength=dossier_strength, - groups=groups, + dossier=dossier, filepath=filepath, ccam_dict=ccam_dict, + siblings=siblings, current_group=current_group, + dossier_strength=dossier_strength, groups=groups, bio_normals=BIO_NORMALS, ) @@ -992,6 +155,10 @@ def create_app() -> Flask: dim = compute_dim_synthesis(groups) return render_template("dim.html", dim=dim) + # =================================================================== + # Routes CPAM + # =================================================================== + @app.route("/cpam") def cpam_list(): groups = scan_dossiers() @@ -1000,25 +167,20 @@ def create_app() -> Flask: @app.route("/api/cpam///versions") def cpam_versions(dossier_id: str, ogc: int): - """Retourne la liste des versions précédentes d'un argumentaire.""" - # dossier_id est le path relatif du JSON ; extraire le répertoire parent parts = Path(dossier_id).parts if len(parts) > 1: subdir = str(Path(*parts[:-1])) else: return jsonify({"versions": []}) - versions_dir = STRUCTURED_DIR / subdir / "_cpam_versions" if not versions_dir.is_dir(): return jsonify({"versions": []}) - versions = [] for f in sorted(versions_dir.glob(f"{ogc}_*.json"), reverse=True): try: data = json.loads(f.read_text(encoding="utf-8")) versions.append({ - "filename": f.name, - "version": data.get("version", 0), + "filename": f.name, "version": data.get("version", 0), "timestamp": data.get("timestamp", ""), "quality_tier": data.get("quality_tier"), "validation_dim": data.get("validation_dim"), @@ -1030,23 +192,18 @@ def create_app() -> Flask: @app.route("/api/cpam///deadline", methods=["POST"]) def cpam_deadline(dossier_id: str, ogc: int): - """Saisie manuelle de la date de notification pour un contrôle.""" from datetime import datetime as dt, timedelta - data = request.get_json(silent=True) or {} date_notif = data.get("date_notification", "").strip() if not date_notif: return jsonify({"error": "date_notification requis (JJ/MM/AAAA)"}), 400 - safe_path = (STRUCTURED_DIR / dossier_id).resolve() if not safe_path.is_relative_to(STRUCTURED_DIR.resolve()): abort(403) if not safe_path.exists(): abort(404) - dossier_data = json.loads(safe_path.read_text(encoding="utf-8")) dossier = DossierMedical.model_validate(dossier_data) - found = False for ctrl in dossier.controles_cpam: if ctrl.numero_ogc == ogc: @@ -1058,37 +215,25 @@ def create_app() -> Flask: ctrl.date_limite_reponse = None found = True break - if not found: - return jsonify({"error": f"OGC {ogc} non trouvé"}), 404 - - safe_path.write_text( - dossier.model_dump_json(indent=2, exclude_none=True), - encoding="utf-8", - ) + return jsonify({"error": f"OGC {ogc} non trouve"}), 404 + safe_path.write_text(dossier.model_dump_json(indent=2, exclude_none=True), encoding="utf-8") return jsonify({"ok": True, "date_limite": ctrl.date_limite_reponse}) @app.route("/api/cpam///validate", methods=["POST"]) def cpam_validate(dossier_id: str, ogc: int): - """Valide ou rejette un argumentaire CPAM (workflow DIM).""" from datetime import datetime - data = request.get_json(silent=True) or {} statut = data.get("statut", "") if statut not in ("valide", "rejete", "en_revision", "non_valide"): return jsonify({"error": "Statut invalide"}), 400 - - # Charger le JSON du dossier safe_path = (STRUCTURED_DIR / dossier_id).resolve() if not safe_path.is_relative_to(STRUCTURED_DIR.resolve()): abort(403) if not safe_path.exists(): abort(404) - dossier_data = json.loads(safe_path.read_text(encoding="utf-8")) dossier = DossierMedical.model_validate(dossier_data) - - # Trouver le contrôle par OGC found = False for ctrl in dossier.controles_cpam: if ctrl.numero_ogc == ogc: @@ -1097,25 +242,20 @@ def create_app() -> Flask: ctrl.date_validation = datetime.now().strftime("%d/%m/%Y %H:%M") found = True break - if not found: - return jsonify({"error": f"OGC {ogc} non trouvé"}), 404 - - # Sauvegarder - safe_path.write_text( - dossier.model_dump_json(indent=2, exclude_none=True), - encoding="utf-8", - ) + return jsonify({"error": f"OGC {ogc} non trouve"}), 404 + safe_path.write_text(dossier.model_dump_json(indent=2, exclude_none=True), encoding="utf-8") return jsonify({"ok": True, "statut": statut}) + # =================================================================== + # Routes admin + # =================================================================== + @app.route("/admin/models", methods=["GET"]) def list_models(): models = fetch_ollama_models() - return jsonify({ - "models": models, - "current": cfg.OLLAMA_MODEL, - "roles": dict(cfg.OLLAMA_MODELS), - }) + return jsonify({"models": models, "current": cfg.OLLAMA_MODEL, + "roles": dict(cfg.OLLAMA_MODELS)}) @app.route("/admin/models", methods=["POST"]) def set_model(): @@ -1126,49 +266,35 @@ def create_app() -> Flask: role = data.get("role", "").strip() if role: if role not in cfg.OLLAMA_MODELS: - return jsonify({"error": f"Rôle inconnu : {role}"}), 400 + return jsonify({"error": f"Role inconnu : {role}"}), 400 cfg.OLLAMA_MODELS[role] = new_model - logger.info("Modèle Ollama pour rôle '%s' changé : %s", role, new_model) + logger.info("Modele Ollama pour role '%s' change : %s", role, new_model) return jsonify({"ok": True, "role": role, "model": new_model, "roles": dict(cfg.OLLAMA_MODELS)}) cfg.OLLAMA_MODEL = new_model - logger.info("Modèle Ollama global changé : %s", new_model) + logger.info("Modele Ollama global change : %s", new_model) return jsonify({"ok": True, "model": cfg.OLLAMA_MODEL}) @app.route("/admin/reprocess/", methods=["POST"]) def reprocess(filepath: str): - """Relance le pipeline complet : process PDFs + fusion + GHM + CPAM.""" from ..main import process_pdf, write_outputs from ..medical.ghm import estimate_ghm - dossier = load_dossier(filepath) - input_dir = INPUT_DIR - - # Collecter les PDFs sources (fusionné → source_files, simple → source_file) - source_names = [] - if dossier.source_files: - source_names = list(dossier.source_files) - elif dossier.source_file: - source_names = [dossier.source_file] - + source_names = list(dossier.source_files) if dossier.source_files else ( + [dossier.source_file] if dossier.source_file else []) if not source_names: return jsonify({"error": "Fichier source introuvable"}), 400 - - # Résoudre les chemins PDF dans input/ pdf_paths = [] missing = [] for name in source_names: found = None - # Essai 1 : nom exact - for p in input_dir.rglob(name): + for p in INPUT_DIR.rglob(name): if p.is_file(): found = p break - # Essai 2 : retirer le préfixe "{num}_{nip}_" ajouté par la réorg if not found: - import re stripped = re.sub(r"^\d+_\d+_", "", name) if stripped != name: - for p in input_dir.rglob(stripped): + for p in INPUT_DIR.rglob(stripped): if p.is_file(): found = p break @@ -1176,17 +302,10 @@ def create_app() -> Flask: pdf_paths.append(found) else: missing.append(name) - if not pdf_paths: return jsonify({"error": f"PDF sources introuvables : {', '.join(missing)}"}), 404 - try: - # Déterminer le subdir depuis le premier PDF trouvé - subdir = None - if pdf_paths[0].parent != input_dir: - subdir = pdf_paths[0].parent.name - - # 1. Traiter chaque PDF + subdir = pdf_paths[0].parent.name if pdf_paths[0].parent != INPUT_DIR else None group_dossiers = [] for pdf_path in pdf_paths: pdf_results = process_pdf(pdf_path) @@ -1196,25 +315,20 @@ def create_app() -> Flask: part_stem = f"{stem}_part{part_idx + 1}" if multi else stem write_outputs(part_stem, anonymized_text, new_dossier, report, subdir=subdir) group_dossiers.append(new_dossier) - - # 2. Fusion multi-PDF merged = None if len(group_dossiers) > 1 and subdir: try: from ..medical.fusion import merge_dossiers merged = merge_dossiers(group_dossiers) try: - ghm = estimate_ghm(merged) - merged.ghm_estimation = ghm + merged.ghm_estimation = estimate_ghm(merged) except Exception: - logger.warning("Erreur estimation GHM fusionné", exc_info=True) + logger.warning("Erreur estimation GHM fusionne", exc_info=True) except Exception: logger.exception("Erreur fusion groupe %s", subdir) - - # 3. Contrôle CPAM (auto-détection Excel) target = merged if merged else (group_dossiers[-1] if group_dossiers else None) if target and subdir: - cpam_dir = input_dir / "Control_cpam" + cpam_dir = INPUT_DIR / "Control_cpam" cpam_path = None if cpam_dir.is_dir(): xlsx_files = sorted(cpam_dir.glob("*.xlsx")) @@ -1228,8 +342,6 @@ def create_app() -> Flask: if cpam_data: controles = match_dossier_ogc(subdir, cpam_data) if controles: - logger.info("CPAM reprocess : %d contrôle(s) pour %s", - len(controles), subdir) for ctrl in controles: text, response_data, sources = generate_cpam_response(target, ctrl) ctrl.contre_argumentation = text @@ -1238,29 +350,21 @@ def create_app() -> Flask: target.controles_cpam = controles except Exception: logger.exception("Erreur CPAM reprocess pour %s", subdir) - - # 4. Écrire le dossier fusionné (après CPAM) if merged is not None and subdir: struct_dir = STRUCTURED_DIR / subdir struct_dir.mkdir(parents=True, exist_ok=True) merged_path = struct_dir / f"{subdir}_fusionne_cim10.json" merged_json = merged.model_dump_json(indent=2, exclude_none=True) merged_path.write_text(merged_json, encoding="utf-8") - logger.info("Dossier fusionné réécrit : %s", merged_path) - - # Sync vers le répertoire du viewer si différent viewer_dir = STRUCTURED_DIR / Path(filepath).parts[0] if viewer_dir.resolve() != struct_dir.resolve(): viewer_dir.mkdir(parents=True, exist_ok=True) - viewer_fusionne = viewer_dir / Path(filepath).name - viewer_fusionne.write_text(merged_json, encoding="utf-8") - logger.info("Fusionné copié vers viewer : %s", viewer_fusionne) - - msg = f"Traitement terminé ({len(group_dossiers)} dossier(s)" + (viewer_dir / Path(filepath).name).write_text(merged_json, encoding="utf-8") + msg = f"Traitement termine ({len(group_dossiers)} dossier(s)" if merged: - msg += ", fusionné" + msg += ", fusionne" if target and getattr(target, "controles_cpam", None): - msg += f", {len(target.controles_cpam)} contrôle(s) CPAM" + msg += f", {len(target.controles_cpam)} controle(s) CPAM" if missing: msg += f", {len(missing)} PDF(s) manquant(s)" msg += ")" @@ -1271,28 +375,19 @@ def create_app() -> Flask: @app.route("/admin/upload-document/", methods=["POST"]) def upload_document(filepath: str): - """Upload un PDF dans input// puis relance le retraitement.""" if "file" not in request.files: return jsonify({"error": "Aucun fichier fourni"}), 400 f = request.files["file"] if not f.filename or not f.filename.lower().endswith(".pdf"): - return jsonify({"error": "Seuls les fichiers PDF sont acceptés"}), 400 - - # Déterminer le sous-dossier input - dossier = load_dossier(filepath) - input_dir = INPUT_DIR + return jsonify({"error": "Seuls les fichiers PDF sont acceptes"}), 400 rel_parts = Path(filepath).parts subdir = str(Path(*rel_parts[:-1])) if len(rel_parts) > 1 else None - target_dir = input_dir / subdir if subdir else input_dir + target_dir = INPUT_DIR / subdir if subdir else INPUT_DIR target_dir.mkdir(parents=True, exist_ok=True) - - # Sauvegarder le PDF safe_name = secure_filename(f.filename) dest = target_dir / safe_name f.save(str(dest)) - logger.info("Document uploadé : %s", dest) - - # Relancer le retraitement via la route existante + logger.info("Document uploade : %s", dest) try: with app.test_request_context(): resp = reprocess(filepath) @@ -1302,24 +397,22 @@ def create_app() -> Flask: data = resp[0].get_json() else: data = {"ok": True} - return jsonify({"ok": True, "message": f"PDF '{safe_name}' ajouté. {data.get('message', '')}"}) + return jsonify({"ok": True, "message": f"PDF '{safe_name}' ajoute. {data.get('message', '')}"}) except Exception as e: - logger.exception("Erreur après upload + reprocess") - return jsonify({"ok": True, "message": f"PDF '{safe_name}' ajouté mais erreur retraitement : {e}"}) + logger.exception("Erreur apres upload + reprocess") + return jsonify({"ok": True, "message": f"PDF '{safe_name}' ajoute mais erreur retraitement : {e}"}) - # ------------------------------------------------------------------ - # API texte source anonymisé - # ------------------------------------------------------------------ + # =================================================================== + # API + # =================================================================== @app.route("/api/source-text/") def source_text(dossier_id: str): - """Retourne le contenu texte anonymisé de tous les fichiers d'un dossier.""" safe_dir = (ANONYMIZED_DIR / dossier_id).resolve() if not safe_dir.is_relative_to(ANONYMIZED_DIR.resolve()): abort(403) if not safe_dir.is_dir(): abort(404) - result = {} for txt_path in sorted(safe_dir.glob("*_anonymized.txt")): try: @@ -1328,47 +421,28 @@ def create_app() -> Flask: logger.warning("Impossible de lire %s", txt_path) return jsonify(result) - # ------------------------------------------------------------------ - # API PDF caviardé - # ------------------------------------------------------------------ - @app.route("/api/pdf//") def serve_redacted_pdf(dossier_id: str, filename: str): - """Sert un PDF avec les données personnelles caviardées (rectangles noirs). - - Query params optionnels : - - highlight : texte à surligner en jaune - - page : numéro de page (1-indexed) pour cibler le surlignage - """ from .pdf_redactor import load_entities_from_report, redact_pdf, highlight_text - - # Sécurité path traversal safe_dir = (INPUT_DIR / dossier_id).resolve() if not safe_dir.is_relative_to(INPUT_DIR.resolve()): abort(403) - pdf_path = safe_dir / filename if not pdf_path.exists() or pdf_path.suffix.lower() != ".pdf": abort(404) - - # Charger les entités depuis le rapport d'anonymisation stem = Path(filename).stem.replace(" ", "_") report_path = REPORTS_DIR / dossier_id / f"{stem}_report.json" entities = load_entities_from_report(report_path) if report_path.exists() else set() - pdf_bytes = redact_pdf(pdf_path, entities) - - # Surlignage optionnel highlight = request.args.get("highlight", "") page_num = request.args.get("page", type=int) if highlight: pdf_bytes = highlight_text(pdf_bytes, highlight, page_num) - return Response(pdf_bytes, mimetype="application/pdf") - # ------------------------------------------------------------------ - # Routes admin référentiels - # ------------------------------------------------------------------ + # =================================================================== + # Routes admin referentiels + # =================================================================== @app.route("/admin/referentiels") def admin_referentiels(): @@ -1382,15 +456,13 @@ def create_app() -> Flask: @app.route("/admin/referentiels/upload", methods=["POST"]) def upload_referentiel(): if "file" not in request.files: - return jsonify({"error": "Aucun fichier envoyé"}), 400 + return jsonify({"error": "Aucun fichier envoye"}), 400 f = request.files["file"] if not f.filename: return jsonify({"error": "Nom de fichier vide"}), 400 - filename = secure_filename(f.filename) try: - file_data = f.read() - ref = ref_manager.add_file(filename, file_data) + ref = ref_manager.add_file(filename, f.read()) return jsonify({"ok": True, "referentiel": ref}) except ValueError as e: return jsonify({"error": str(e)}), 400 @@ -1403,21 +475,20 @@ def create_app() -> Flask: except ValueError as e: return jsonify({"error": str(e)}), 404 except Exception as e: - logger.exception("Erreur lors de l'indexation du référentiel %s", ref_id) + logger.exception("Erreur lors de l'indexation du referentiel %s", ref_id) return jsonify({"error": str(e)}), 500 @app.route("/admin/referentiels/", methods=["DELETE"]) def delete_referentiel(ref_id: str): if ref_manager.remove(ref_id): return jsonify({"ok": True}) - return jsonify({"error": "Référentiel introuvable"}), 404 + return jsonify({"error": "Referentiel introuvable"}), 404 @app.route("/admin/referentiels/rebuild-index", methods=["POST"]) def rebuild_index(): try: from ..medical.rag_index import build_index build_index(force=True) - # Réindexer tous les référentiels actifs reindexed = 0 for ref in ref_manager.list_all(): if ref["status"] == "indexed": @@ -1428,23 +499,18 @@ def create_app() -> Flask: logger.exception("Erreur lors du rebuild de l'index") return jsonify({"error": str(e)}), 500 - # ------------------------------------------------------------------ + # =================================================================== # Routes validation DIM - # ------------------------------------------------------------------ - - val_manager = ValidationManager() + # =================================================================== @app.route("/validation") def validation_list(): groups = scan_dossiers() selection = val_manager.load_selection() annotations = {a["dossier_id"]: a for a in val_manager.list_annotations()} - - # Construire la liste enrichie items = [] for dossier_id in selection: annot = annotations.get(dossier_id, {}) - # Trouver les données pipeline parts = dossier_id.split("/") group_name = parts[0] if parts else "" group_items = groups.get(group_name, []) @@ -1455,11 +521,9 @@ def create_app() -> Flask: break if not pipeline and group_items: pipeline = group_items[0] - d = pipeline["dossier"] if pipeline else None items.append({ - "dossier_id": dossier_id, - "group_name": group_name, + "dossier_id": dossier_id, "group_name": group_name, "dp_code": d.diagnostic_principal.cim10_suggestion if d and d.diagnostic_principal else "", "dp_texte": d.diagnostic_principal.texte if d and d.diagnostic_principal else "", "dp_confidence": d.diagnostic_principal.cim10_confidence if d and d.diagnostic_principal else "", @@ -1469,29 +533,19 @@ def create_app() -> Flask: "validateur": annot.get("validateur", ""), "date_validation": annot.get("date_validation", ""), }) - total = len(items) valides = sum(1 for i in items if i["statut"] == "valide") en_cours = sum(1 for i in items if i["statut"] == "en_cours") - - return render_template( - "validation_list.html", - items=items, - total=total, - valides=valides, - en_cours=en_cours, - groups=groups, - ) + return render_template("validation_list.html", + items=items, total=total, valides=valides, + en_cours=en_cours, groups=groups) @app.route("/validation/") def validation_detail(dossier_id: str): groups = scan_dossiers() - # Charger l'annotation annotation = val_manager.load_annotation(dossier_id) if not annotation: abort(404) - - # Charger les données pipeline parts = dossier_id.split("/") group_name = parts[0] if parts else "" group_items = groups.get(group_name, []) @@ -1502,25 +556,15 @@ def create_app() -> Flask: break if not pipeline and group_items: pipeline = group_items[0] - dossier = pipeline["dossier"] if pipeline else None - - # Navigation : dossier précédent / suivant selection = val_manager.load_selection() current_idx = selection.index(dossier_id) if dossier_id in selection else -1 prev_id = selection[current_idx - 1] if current_idx > 0 else None next_id = selection[current_idx + 1] if current_idx < len(selection) - 1 else None - - return render_template( - "validation_detail.html", - annotation=annotation, - dossier=dossier, - dossier_id=dossier_id, - group_name=group_name, - prev_id=prev_id, - next_id=next_id, - groups=groups, - ) + return render_template("validation_detail.html", + annotation=annotation, dossier=dossier, + dossier_id=dossier_id, group_name=group_name, + prev_id=prev_id, next_id=next_id, groups=groups) @app.route("/api/validation/save", methods=["POST"]) def api_validation_save(): @@ -1528,10 +572,9 @@ def create_app() -> Flask: if not data or "dossier_id" not in data: return jsonify({"error": "dossier_id requis"}), 400 dossier_id = data["dossier_id"] - # Vérifier que le dossier fait partie de la sélection selection = val_manager.load_selection() if selection and dossier_id not in selection: - return jsonify({"error": "Dossier non sélectionné pour validation"}), 403 + return jsonify({"error": "Dossier non selectionne pour validation"}), 403 try: val_manager.save_annotation(dossier_id, data) return jsonify({"ok": True}) @@ -1545,20 +588,15 @@ def create_app() -> Flask: q = request.args.get("q", "").strip() if len(q) < 2: return jsonify({"results": []}) - cim10 = load_dict() q_norm = normalize_text(q) q_upper = q.upper().strip() - results = [] - # Recherche par code exact d'abord for code, label in cim10.items(): if code.upper().startswith(q_upper): results.append({"code": code, "label": label}) if len(results) >= 20: break - - # Puis recherche par texte normalisé if len(results) < 20: for code, label in cim10.items(): if any(r["code"] == code for r in results): @@ -1567,7 +605,6 @@ def create_app() -> Flask: results.append({"code": code, "label": label}) if len(results) >= 20: break - return jsonify({"results": results}) @app.route("/validation/metrics") @@ -1575,11 +612,8 @@ def create_app() -> Flask: groups = scan_dossiers() metrics = val_manager.compute_metrics(groups) selection = val_manager.load_selection() - return render_template( - "validation_metrics.html", - metrics=metrics, - total_selection=len(selection), - groups=groups, - ) + return render_template("validation_metrics.html", + metrics=metrics, total_selection=len(selection), + groups=groups) return app diff --git a/src/viewer/bp_rules.py b/src/viewer/bp_rules.py new file mode 100644 index 0000000..adea8d7 --- /dev/null +++ b/src/viewer/bp_rules.py @@ -0,0 +1,105 @@ +"""Blueprint Flask pour la gestion des règles métier (CRUD YAML).""" + +from __future__ import annotations + +import logging + +from flask import Blueprint, render_template, request, jsonify + +from .rules_manager import ( + list_rule_files, + load_rule_file, + toggle_rule, + update_rule_field, + add_rule, + delete_rule, + _find_file, +) + +logger = logging.getLogger(__name__) + +bp_rules = Blueprint("rules", __name__) + + +@bp_rules.route("/admin/rules") +def admin_rules(): + """Page principale de gestion des règles.""" + files = list_rule_files() + # Pré-charger le contenu de chaque fichier + for f in files: + if f["exists"]: + f["data"] = load_rule_file(f["id"]) + return render_template("admin_rules.html", rule_files=files) + + +@bp_rules.route("/api/rules/") +def api_get_rules(file_id: str): + """Retourne le contenu complet d'un fichier de règles.""" + try: + rf = _find_file(file_id) + data = load_rule_file(file_id) + return jsonify({"ok": True, "file_id": file_id, "label": rf["label"], "data": data}) + except ValueError as e: + return jsonify({"error": str(e)}), 404 + + +@bp_rules.route("/api/rules//toggle", methods=["POST"]) +def api_toggle_rule(file_id: str): + """Active/désactive une règle.""" + body = request.get_json(silent=True) or {} + rule_path = body.get("rule_path", "") + enabled = body.get("enabled", True) + if not rule_path: + return jsonify({"error": "rule_path requis"}), 400 + try: + data = toggle_rule(file_id, rule_path, enabled) + return jsonify({"ok": True, "data": data}) + except (ValueError, KeyError) as e: + return jsonify({"error": str(e)}), 400 + + +@bp_rules.route("/api/rules//update", methods=["POST"]) +def api_update_rule(file_id: str): + """Met à jour un champ d'une règle.""" + body = request.get_json(silent=True) or {} + rule_path = body.get("rule_path", "") + field = body.get("field", "") + value = body.get("value") + if not rule_path or not field: + return jsonify({"error": "rule_path et field requis"}), 400 + try: + data = update_rule_field(file_id, rule_path, field, value) + return jsonify({"ok": True, "data": data}) + except (ValueError, KeyError) as e: + return jsonify({"error": str(e)}), 400 + + +@bp_rules.route("/api/rules//add", methods=["POST"]) +def api_add_rule(file_id: str): + """Ajoute une nouvelle règle.""" + body = request.get_json(silent=True) or {} + parent_path = body.get("parent_path", "") + rule_id = body.get("rule_id", "").strip() + rule_data = body.get("rule_data", {}) + if not parent_path or not rule_id: + return jsonify({"error": "parent_path et rule_id requis"}), 400 + try: + data = add_rule(file_id, parent_path, rule_id, rule_data) + return jsonify({"ok": True, "data": data}) + except ValueError as e: + return jsonify({"error": str(e)}), 400 + + +@bp_rules.route("/api/rules//delete", methods=["POST"]) +def api_delete_rule(file_id: str): + """Supprime une règle.""" + body = request.get_json(silent=True) or {} + parent_path = body.get("parent_path", "") + rule_id = body.get("rule_id", "").strip() + if not parent_path or not rule_id: + return jsonify({"error": "parent_path et rule_id requis"}), 400 + try: + data = delete_rule(file_id, parent_path, rule_id) + return jsonify({"ok": True, "data": data}) + except ValueError as e: + return jsonify({"error": str(e)}), 400 diff --git a/src/viewer/helpers.py b/src/viewer/helpers.py new file mode 100644 index 0000000..55e9622 --- /dev/null +++ b/src/viewer/helpers.py @@ -0,0 +1,702 @@ +"""Fonctions utilitaires et filtres Jinja2 pour le viewer T2A.""" + +from __future__ import annotations + +import json +import logging +import re +import time +from collections import Counter +from pathlib import Path + +from markupsafe import Markup + +from ..config import ( + ANONYMIZED_DIR, STRUCTURED_DIR, CCAM_DICT_PATH, DossierMedical, + CIM10_PDF, GUIDE_METHODO_PDF, CCAM_PDF, CIM10_DICT_PATH, CIM10_SUPPLEMENTS_PATH, +) + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Helpers — statistiques & données +# --------------------------------------------------------------------------- + +def compute_group_stats(items: list[dict]) -> dict: + das_count = 0 + alertes_count = 0 + actes_count = 0 + cma_count = 0 + for item in items: + d = item["dossier"] + das_count += len(d.diagnostics_associes) + alertes_count += len(d.alertes_codage) + actes_count += len(d.actes_ccam) + for diag in d.diagnostics_associes: + if diag.est_cma: + cma_count += 1 + if d.diagnostic_principal and d.diagnostic_principal.est_cma: + cma_count += 1 + return {"das_count": das_count, "alertes_count": alertes_count, + "actes_count": actes_count, "cma_count": cma_count} + + +def compute_dashboard_stats(groups: dict[str, list[dict]]) -> dict: + total_dossiers = len(groups) + total_fichiers = 0 + total_das = 0 + total_actes = 0 + total_alertes = 0 + total_cma = 0 + total_cpam = 0 + dp_confidence: Counter = Counter() + dp_validity: Counter = Counter() + code_counter: Counter = Counter() + ghm_types: Counter = Counter() + severity_dist: Counter = Counter() + processing_times: list[float] = [] + + for items in groups.values(): + total_fichiers += len(items) + for item in items: + d = item["dossier"] + total_das += len(d.diagnostics_associes) + total_actes += len(d.actes_ccam) + total_alertes += len(d.alertes_codage) + total_cpam += len(d.controles_cpam) + if d.processing_time_s is not None: + processing_times.append(d.processing_time_s) + dp = d.diagnostic_principal + if dp: + dp_confidence[dp.cim10_confidence or "none"] += 1 + if dp.cim10_suggestion: + dp_validity["valide"] += 1 + code_counter[dp.cim10_suggestion] += 1 + else: + dp_validity["absent"] += 1 + else: + dp_confidence["none"] += 1 + dp_validity["absent"] += 1 + for das in d.diagnostics_associes: + if das.cim10_suggestion: + code_counter[das.cim10_suggestion] += 1 + if das.est_cma: + total_cma += 1 + if dp and dp.est_cma: + total_cma += 1 + ghm = d.ghm_estimation + if ghm: + if ghm.type_ghm: + ghm_types[ghm.type_ghm] += 1 + severity_dist[ghm.severite] += 1 + + top_codes = code_counter.most_common(15) + top_max = top_codes[0][1] if top_codes else 1 + + return { + "total_dossiers": total_dossiers, + "total_fichiers": total_fichiers, + "total_das": total_das, + "total_actes": total_actes, + "total_alertes": total_alertes, + "total_cma": total_cma, + "total_cpam": total_cpam, + "dp_confidence": dict(dp_confidence), + "dp_validity": dict(dp_validity), + "top_codes": top_codes, + "top_max": top_max, + "ghm_types": dict(ghm_types), + "severity_dist": dict(severity_dist), + "processing_time_total": sum(processing_times), + "processing_time_avg": sum(processing_times) / len(processing_times) if processing_times else 0, + } + + +def compute_dim_synthesis(groups: dict[str, list[dict]]) -> dict: + dp_total = 0 + dp_confirmed = 0 + dp_review = 0 + dp_modified = 0 + dp_conf_dist: Counter = Counter() + dp_source_dist: Counter = Counter() + das_total = 0 + das_kept = 0 + das_downgraded = 0 + das_removed = 0 + das_ruled_out = 0 + das_cma = 0 + das_no_code = 0 + veto_dist: Counter = Counter() + veto_scores: list[int] = [] + top_vetos: Counter = Counter() + completude_dist: Counter = Counter() + completude_scores: list[int] = [] + cpam_total = 0 + cpam_impact_total = 0 + cpam_by_priority: Counter = Counter() + cpam_by_status: Counter = Counter() + dossiers_review: list[dict] = [] + dossiers_fail: list[dict] = [] + dossiers_indefendable: list[dict] = [] + + for group_name, items in groups.items(): + for item in items: + d = item["dossier"] + dname = format_dossier_name(group_name) + dpath = item["path_rel"] + + dp_final = d.dp_final + dp_track = d.dp_trackare + if dp_final: + dp_total += 1 + dp_conf_dist[dp_final.confidence or "none"] += 1 + if dp_final.verdict == "CONFIRMED": + dp_confirmed += 1 + else: + dp_review += 1 + dossiers_review.append({"name": dname, "path": dpath, + "reason": dp_final.reason or "DP à valider", + "code": dp_final.chosen_code or "?"}) + if dp_track and dp_final.chosen_code and dp_track.chosen_code: + if dp_final.chosen_code != dp_track.chosen_code: + dp_modified += 1 + flags = d.quality_flags or {} + if flags.get("trackare_only_mode"): + dp_source_dist["trackare"] += 1 + elif flags.get("crh_only_mode"): + dp_source_dist["crh"] += 1 + elif flags.get("override_trackare_by_crh_confirmed") or flags.get("trackare_symptom_overridden"): + dp_source_dist["override_crh"] += 1 + elif flags.get("trackare_confirmed_by_crh"): + dp_source_dist["confirmé"] += 1 + else: + dp_source_dist["autre"] += 1 + elif d.diagnostic_principal: + dp_total += 1 + dp_conf_dist[d.diagnostic_principal.cim10_confidence or "none"] += 1 + + for das in d.diagnostics_associes: + das_total += 1 + dec = das.cim10_decision + if dec: + action = dec.action + if action == "KEEP": + das_kept += 1 + elif action == "DOWNGRADE": + das_downgraded += 1 + elif action == "REMOVE": + das_removed += 1 + elif action == "RULED_OUT": + das_ruled_out += 1 + else: + das_kept += 1 + else: + das_kept += 1 + if das.est_cma: + das_cma += 1 + if not das.cim10_final and not das.cim10_suggestion: + das_no_code += 1 + + vr = d.veto_report + if vr: + veto_dist[vr.verdict] += 1 + veto_scores.append(vr.score_contestabilite) + for issue in (vr.issues or []): + top_vetos[issue.veto] += 1 + if vr.verdict == "FAIL": + dossiers_fail.append({"name": dname, "path": dpath, + "score": vr.score_contestabilite, + "issues": len(vr.issues or [])}) + + comp = d.completude + if comp: + completude_dist[comp.verdict_global] += 1 + completude_scores.append(comp.score_global) + if comp.verdict_global == "indefendable": + dossiers_indefendable.append({"name": dname, "path": dpath, + "score": comp.score_global, + "manquants": len(comp.documents_manquants or [])}) + + for ctrl in d.controles_cpam: + cpam_total += 1 + fi = ctrl.financial_impact + if fi: + cpam_impact_total += fi.impact_estime_euros or 0 + cpam_by_priority[fi.priorite or "normale"] += 1 + cpam_by_status[ctrl.validation_dim or "non_valide"] += 1 + + avg_veto = round(sum(veto_scores) / len(veto_scores)) if veto_scores else 0 + avg_completude = round(sum(completude_scores) / len(completude_scores)) if completude_scores else 0 + + return { + "dp": {"total": dp_total, "confirmed": dp_confirmed, "review": dp_review, + "modified": dp_modified, "confidence": dict(dp_conf_dist), + "source": dict(dp_source_dist)}, + "das": {"total": das_total, "kept": das_kept, "downgraded": das_downgraded, + "removed": das_removed, "ruled_out": das_ruled_out, "cma": das_cma, + "no_code": das_no_code, + "taux_modification": round((das_downgraded + das_removed + das_ruled_out) / das_total * 100, 1) if das_total else 0}, + "veto": {"distribution": dict(veto_dist), "avg_score": avg_veto, + "top_issues": top_vetos.most_common(10)}, + "completude": {"distribution": dict(completude_dist), "avg_score": avg_completude}, + "cpam": {"total": cpam_total, "impact_total": cpam_impact_total, + "by_priority": dict(cpam_by_priority), "by_status": dict(cpam_by_status)}, + "alertes": {"review": dossiers_review[:20], "fail": dossiers_fail[:20], + "indefendable": dossiers_indefendable[:20]}, + } + + +def _compute_jours_restants(ctrl) -> int | None: + if not ctrl.date_limite_reponse: + return None + from datetime import datetime + try: + limite = datetime.strptime(ctrl.date_limite_reponse, "%d/%m/%Y") + return (limite - datetime.now()).days + except (ValueError, TypeError): + return None + + +def collect_cpam_controls(groups: dict[str, list[dict]]) -> list[dict]: + from ..medical.ghm import estimate_financial_impact + _PRIORITE_ORDER = {"critique": 0, "haute": 1, "normale": 2, "faible": 3} + controls = [] + for group_name, items in groups.items(): + for item in items: + d = item["dossier"] + dp_code = d.diagnostic_principal.cim10_suggestion if d.diagnostic_principal else None + for ctrl in d.controles_cpam: + if ctrl.financial_impact is None and d.ghm_estimation: + ctrl.financial_impact = estimate_financial_impact(d.ghm_estimation) + controls.append({ + "group_name": group_name, "filepath": item["path_rel"], + "ctrl": ctrl, "dp_code": dp_code, + "jours_restants": _compute_jours_restants(ctrl), + }) + controls.sort(key=lambda c: ( + _PRIORITE_ORDER.get( + c["ctrl"].financial_impact.priorite if c["ctrl"].financial_impact else "normale", 2), + 0 if "confirme" in (c["ctrl"].decision_ucr or "").lower() else 1, + c["ctrl"].numero_ogc, + )) + return controls + + +def get_builtin_referentiels() -> list[dict]: + from ..config import BASE_DIR, REFERENTIELS_DIR + import datetime as _dt + rag_index_dir = BASE_DIR / "data" / "rag_index" + + chunks_by_doc: dict[str, int] = {} + for meta_file in rag_index_dir.glob("metadata*.json"): + try: + meta = json.loads(meta_file.read_text(encoding="utf-8")) + for m in meta: + doc = m.get("document", "") + chunks_by_doc[doc] = chunks_by_doc.get(doc, 0) + 1 + except Exception: + pass + + refs = [] + builtin_sources = [ + ("CIM-10 FR 2026", CIM10_PDF, ".pdf", ["cim10", "cim10_alpha"], "11/12/2025", "2026 (provisoire)"), + ("Guide Méthodologique MCO 2026", GUIDE_METHODO_PDF, ".pdf", ["guide_methodo"], "2025", "2026 (provisoire)"), + ("CCAM descriptive PMSI V4", CCAM_PDF, ".pdf", ["ccam"], "2025", "V4 2025"), + ("Dictionnaire CIM-10", CIM10_DICT_PATH, ".json", [], "", ""), + ("Suppléments CIM-10", CIM10_SUPPLEMENTS_PATH, ".json", [], "", ""), + ("Dictionnaire CCAM", CCAM_DICT_PATH, ".json", [], "", ""), + ] + for name, path, ext, doc_keys, edition, validite in builtin_sources: + size_mb = path.stat().st_size / (1024 * 1024) if path.exists() else 0 + mtime = "" + if path.exists(): + mtime = _dt.datetime.fromtimestamp(path.stat().st_mtime).strftime("%d/%m/%Y") + chunks = sum(chunks_by_doc.get(k, 0) for k in doc_keys) + refs.append({"name": name, "filename": path.name, "extension": ext, + "size_mb": size_mb, "chunks": chunks, "exists": path.exists(), + "edition": edition, "validite": validite, "file_date": mtime}) + + pdfs_dir = REFERENTIELS_DIR / "pdfs" + for doc_name, count in sorted(chunks_by_doc.items()): + if doc_name.startswith("ref:") or doc_name.startswith("proc:"): + prefix, fname = doc_name.split(":", 1) + pdf_path = pdfs_dir / fname + size_mb = pdf_path.stat().st_size / (1024 * 1024) if pdf_path.exists() else 0 + mtime = "" + if pdf_path.exists(): + mtime = _dt.datetime.fromtimestamp(pdf_path.stat().st_mtime).strftime("%d/%m/%Y") + refs.append({"name": fname.replace("_", " ").replace(".pdf", ""), + "filename": fname, "extension": ".pdf", "size_mb": size_mb, + "chunks": count, "exists": pdf_path.exists(), "edition": "", + "validite": "", "file_date": mtime, "category": prefix}) + return refs + + +def get_faiss_index_info() -> dict: + from ..config import BASE_DIR + from ..medical.rag_index import check_faiss_ready + import datetime as _dt + rag_dir = BASE_DIR / "data" / "rag_index" + info = {"ok": False, "indexes": [], "total_vectors": 0, "last_build": ""} + status = check_faiss_ready() + info["ok"] = status["ok"] + info["total_vectors"] = status["ref"] + status["proc"] + status["bio"] + status["legacy"] + for kind, label in [("ref", "Référentiels CIM-10"), ("proc", "Procédures/Guides"), + ("bio", "Biologie"), ("all", "Legacy (combiné)")]: + idx_file = rag_dir / f"faiss_{kind}.index" if kind != "all" else rag_dir / "faiss.index" + count = status.get(kind, status.get("legacy", 0)) if kind == "all" else status.get(kind, 0) + mtime = "" + size_mb = 0 + if idx_file.exists(): + mtime = _dt.datetime.fromtimestamp(idx_file.stat().st_mtime).strftime("%d/%m/%Y %H:%M") + size_mb = idx_file.stat().st_size / (1024 * 1024) + info["indexes"].append({"kind": kind, "label": label, "vectors": count, + "size_mb": round(size_mb, 1), "last_build": mtime, + "exists": idx_file.exists()}) + if mtime and (not info["last_build"] or mtime > info["last_build"]): + info["last_build"] = mtime + return info + + +def load_ccam_dict() -> dict[str, dict]: + if CCAM_DICT_PATH.exists(): + try: + return json.loads(CCAM_DICT_PATH.read_text(encoding="utf-8")) + except Exception: + logger.warning("Impossible de charger le dictionnaire CCAM") + return {} + + +_scan_cache: dict[str, object] = {"data": None, "ts": 0.0} +_SCAN_TTL = 30 + + +def scan_dossiers() -> dict[str, list[dict]]: + now = time.monotonic() + if _scan_cache["data"] is not None and (now - _scan_cache["ts"]) < _SCAN_TTL: + return _scan_cache["data"] + groups: dict[str, list[dict]] = {} + for json_path in sorted(STRUCTURED_DIR.rglob("*.json")): + rel = json_path.relative_to(STRUCTURED_DIR) + parts = rel.parts + group_name = "racine" if len(parts) == 1 else str(Path(*parts[:-1])) + try: + data = json.loads(json_path.read_text(encoding="utf-8")) + dossier = DossierMedical.model_validate(data) + except Exception: + logger.warning("Impossible de charger %s", json_path) + continue + groups.setdefault(group_name, []).append({ + "name": json_path.stem, "path_rel": str(rel), "dossier": dossier, + }) + _scan_cache["data"] = groups + _scan_cache["ts"] = now + return groups + + +def load_dossier(path_rel: str) -> DossierMedical: + from flask import abort + safe_path = (STRUCTURED_DIR / path_rel).resolve() + if not safe_path.is_relative_to(STRUCTURED_DIR.resolve()): + abort(403) + if not safe_path.exists(): + abort(404) + data = json.loads(safe_path.read_text(encoding="utf-8")) + return DossierMedical.model_validate(data) + + +def fetch_ollama_models() -> list[str]: + import requests + from .. import config as cfg + try: + resp = requests.get(f"{cfg.OLLAMA_URL}/api/tags", timeout=5) + resp.raise_for_status() + return [m["name"] for m in resp.json().get("models", [])] + except Exception: + logger.warning("Impossible de contacter Ollama pour lister les modèles") + return [] + + +# --------------------------------------------------------------------------- +# Filtres Jinja2 +# --------------------------------------------------------------------------- + +_CONFIDENCE_COLORS = { + "high": ("#16a34a", "#dcfce7"), + "medium": ("#ca8a04", "#fef9c3"), + "low": ("#dc2626", "#fee2e2"), +} +_CONFIDENCE_LABELS = {"high": "Haute", "medium": "Moyenne", "low": "Basse"} + + +_CONFIDENCE_TIPS = { + "high": "Confiance haute — le pipeline est très sûr de ce code CIM-10", + "medium": "Confiance moyenne — le code est probable mais mérite vérification", + "low": "Confiance basse — code incertain, relecture médicale recommandée", +} + + +def confidence_badge(value: str | None) -> Markup: + if not value: + return Markup("") + fg, bg = _CONFIDENCE_COLORS.get(value, ("#6b7280", "#f3f4f6")) + label = _CONFIDENCE_LABELS.get(value, value) + tip = _CONFIDENCE_TIPS.get(value, "Niveau de confiance du pipeline sur ce code") + return Markup( + f'{label}') + + +def confidence_label(value: str | None) -> str: + if not value: + return "" + return _CONFIDENCE_LABELS.get(value, value) + + +_SEVERITY_STYLES = { + "severe": ("Sévère", "#dc2626", "#fee2e2"), + "modere": ("Modéré", "#92400e", "#fef3c7"), + "leger": ("Léger", "#065f46", "#d1fae5"), +} +_CMA_LEVEL_STYLES = { + 1: ("1", "#6b7280", "#f3f4f6"), + 2: ("2", "#065f46", "#d1fae5"), + 3: ("3", "#92400e", "#fef3c7"), + 4: ("4", "#dc2626", "#fee2e2"), +} + + +def format_duration(seconds: float | None) -> str: + if seconds is None: + return "" + if seconds < 60: + return f"{seconds:.1f}s" + minutes = int(seconds // 60) + secs = int(seconds % 60) + if secs == 0: + return f"{minutes}min" + return f"{minutes}min {secs:02d}s" + + +_SEVERITY_TIPS = { + "severe": "Impact clinique sévère — complication ou morbidité majeure augmentant significativement la valorisation T2A", + "modere": "Impact clinique modéré — complication ou morbidité d'importance intermédiaire", + "leger": "Impact clinique léger — séjour sans complication significative", +} + + +def severity_badge(value: str | None) -> Markup: + if not value or value not in _SEVERITY_STYLES: + return Markup("") + label, fg, bg = _SEVERITY_STYLES[value] + tip = _SEVERITY_TIPS.get(value, "") + return Markup( + f'{label}') + + +def cma_level_badge(value: int | None) -> Markup: + if value is None or value < 1: + return Markup("") + level = min(value, 4) + label, fg, bg = _CMA_LEVEL_STYLES.get(level, _CMA_LEVEL_STYLES[1]) + title = { + 1: "Pas de CMA — ce diagnostic n'augmente pas la sévérité du GHM", + 2: "CMA niveau 2 — comorbidité mineure augmentant légèrement la sévérité", + 3: "CMA niveau 3 — comorbidité majeure augmentant significativement la sévérité", + 4: "CMA niveau 4 — comorbidité très sévère (réanimation, décès, etc.)", + }.get(level, "") + return Markup( + f'CMA {label}') + + +def format_dossier_name(name: str) -> str: + if name == "racine": + return "Non classés" + return name + + +def format_doc_name(name: str) -> str: + n = name.lower() + if "fusionne" in n: + return "Fusionné" + if n.startswith("cro") or n.startswith("crh"): + return name.split("_")[0].upper() + if "trackare" in n: + return "Trackare" + if "anapath" in n: + return "Anapath" + return name + + +def decision_badge(decision) -> Markup: + if not decision: + return Markup("") + action = decision.get("action", "KEEP") if isinstance(decision, dict) else getattr(decision, "action", "KEEP") + if action == "KEEP": + return Markup("") + labels = { + "DOWNGRADE": ("Rétrogradé", "#fef3c7", "#92400e", "Le niveau de confiance de ce diagnostic a été abaissé par le moteur de règles"), + "REMOVE": ("Supprimé", "#fee2e2", "#dc2626", "Ce diagnostic a été retiré du codage car jugé non pertinent ou non étayé"), + "RULED_OUT": ("Écarté (Contradiction)", "#f1f5f9", "#64748b", "Ce diagnostic a été écarté car il contredit une règle ATIH (exclusion, doublon, etc.)"), + "NEED_INFO": ("Preuve manquante", "#fff7ed", "#c2410c", "Ce diagnostic nécessite des preuves cliniques supplémentaires pour être validé"), + "PROMOTE_DP": ("Promu en DP", "#dbeafe", "#1d4ed8", "Ce diagnostic a été promu en Diagnostic Principal car plus pertinent que le DP initial"), + } + info = labels.get(action, (action, "#f1f5f9", "#64748b", "")) + label, bg, fg = info[0], info[1], info[2] + tip = info[3] if len(info) > 3 else "" + return Markup(f'{label}') + + +def format_cpam_text(text: str | None) -> Markup: + if not text: + return Markup("") + from markupsafe import escape + lines = str(text).split("\n") + html_parts: list[str] = [] + in_list = False + for line in lines: + stripped = line.strip() + if not stripped: + if in_list: + html_parts.append("") + in_list = False + html_parts.append("
") + continue + if stripped.startswith("- "): + if not in_list: + html_parts.append("
    ") + in_list = True + html_parts.append(f"
  • {escape(stripped[2:])}
  • ") + else: + if in_list: + html_parts.append("
") + in_list = False + html_parts.append(f"

{escape(stripped)}

") + if in_list: + html_parts.append("") + return Markup("\n".join(html_parts)) + + +def human_where(value: str | None) -> str: + if not value: + return "Global" + if value == "diagnostic_principal": + return "Diagnostic Principal" + if value == "diagnostics_associes": + return "Diagnostics Associés" + if value == "sejour": + return "Séjour" + m = re.match(r"diagnostics_associes\[(\d+)\]", value) + if m: + return f"DAS n°{int(m.group(1)) + 1}" + m = re.match(r"actes_ccam\[(\d+)\]", value) + if m: + return f"Acte n°{int(m.group(1)) + 1}" + return value + + +def _date_to_iso(date_fr: str) -> str: + try: + parts = date_fr.strip().split("/") + if len(parts) == 3: + return f"{parts[2]}-{parts[1]}-{parts[0]}" + except Exception: + pass + return "" + + +_status_cache: dict[str, object] = {"data": None, "ts": 0.0} +_STATUS_TTL = 120 + + +def _get_system_status() -> list[dict]: + import os + import requests + now = time.monotonic() + if _status_cache["data"] is not None and (now - _status_cache["ts"]) < _STATUS_TTL: + return _status_cache["data"] + from ..config import OLLAMA_URL, OLLAMA_MODELS + components = [] + components.append({"name": "Moteur de règles (VetoEngine)", "status": True, "detail": "Actif"}) + ollama_ok = False + ollama_detail = "Non disponible" + try: + r = requests.get(f"{OLLAMA_URL}/api/tags", timeout=3) + if r.status_code == 200: + ollama_ok = True + ollama_detail = ", ".join(f"{role}={model}" for role, model in OLLAMA_MODELS.items()) + except Exception: + pass + components.append({"name": "LLM Ollama", "status": ollama_ok, "detail": ollama_detail}) + api_key = os.environ.get("ANTHROPIC_API_KEY", "") + components.append({"name": "Fallback Anthropic (Haiku)", "status": bool(api_key), + "detail": "Clé configurée" if api_key else "Clé absente"}) + try: + from ..medical.rag_index import check_faiss_ready + faiss_check = check_faiss_ready() + if faiss_check["ok"]: + total = faiss_check["ref"] + faiss_check["proc"] + faiss_check["bio"] + faiss_check["legacy"] + parts = [] + if faiss_check["ref"]: + parts.append(f"ref={faiss_check['ref']}") + if faiss_check["proc"]: + parts.append(f"proc={faiss_check['proc']}") + if faiss_check["bio"]: + parts.append(f"bio={faiss_check['bio']}") + detail = f"{total} vecteurs ({', '.join(parts)})" + else: + detail = "; ".join(faiss_check["errors"][:2]) + components.append({"name": "Index FAISS (RAG)", "status": faiss_check["ok"], "detail": detail}) + except Exception as e: + components.append({"name": "Index FAISS (RAG)", "status": False, + "detail": f"Erreur vérification : {e}"}) + components.append({"name": "Extraction PDF (pdfplumber)", "status": True, "detail": "Actif"}) + ner_ok = False + try: + from transformers import AutoTokenizer + AutoTokenizer.from_pretrained("Jean-Baptiste/camembert-ner", local_files_only=True) + ner_ok = True + except Exception: + pass + components.append({"name": "Anonymisation NER (CamemBERT)", "status": ner_ok, + "detail": "Modèle en cache" if ner_ok else "Modèle non trouvé"}) + emb_ok = False + try: + from huggingface_hub import try_to_load_from_cache + result = try_to_load_from_cache("dangvantuan/sentence-camembert-large", "config.json") + emb_ok = result is not None and isinstance(result, str) + except Exception: + pass + components.append({"name": "Embeddings (sentence-camembert-large)", "status": emb_ok, + "detail": "Modèle en cache" if emb_ok else "Modèle non trouvé"}) + _status_cache["data"] = components + _status_cache["ts"] = now + return components + + +def _sort_qc_alerts(alerts: list[str]) -> list[str]: + def _key(a: str) -> tuple[int, int]: + text = a.lower() + dp = 0 if " dp " in text or text.startswith("dp ") or "diagnostic principal" in text else 1 + critical = 0 if any(k in text for k in ("high→low", "high → low", "à reconsidérer", "reconsider")) else 1 + return (dp, critical) + return sorted(alerts, key=_key) + + +def register_filters(app): + """Enregistre tous les filtres Jinja2 sur l'application Flask.""" + app.jinja_env.filters["confidence_badge"] = confidence_badge + app.jinja_env.filters["confidence_label"] = confidence_label + app.jinja_env.filters["severity_badge"] = severity_badge + app.jinja_env.filters["cma_level_badge"] = cma_level_badge + app.jinja_env.filters["format_duration"] = format_duration + app.jinja_env.filters["format_dossier_name"] = format_dossier_name + app.jinja_env.filters["format_doc_name"] = format_doc_name + app.jinja_env.filters["format_cpam_text"] = format_cpam_text + app.jinja_env.filters["decision_badge"] = decision_badge + app.jinja_env.filters["human_where"] = human_where + app.jinja_env.filters["date_to_iso"] = _date_to_iso + app.jinja_env.filters["sort_qc_alerts"] = _sort_qc_alerts diff --git a/src/viewer/rules_manager.py b/src/viewer/rules_manager.py new file mode 100644 index 0000000..e28dd79 --- /dev/null +++ b/src/viewer/rules_manager.py @@ -0,0 +1,218 @@ +"""Gestionnaire CRUD pour les fichiers de règles YAML.""" + +from __future__ import annotations + +import logging +from pathlib import Path + +import yaml + +from ..config import CONFIG_DIR, RULES_DIR + +logger = logging.getLogger(__name__) + +# Fichiers de règles gérables via l'UI +RULE_FILES: list[dict] = [ + { + "id": "base", + "path": RULES_DIR / "base.yaml", + "label": "Vetos & Decisions (socle)", + "description": "Packs de règles activables : vetos de contestabilité et décisions automatiques.", + "structure": "packs", + }, + { + "id": "bio_rules", + "path": CONFIG_DIR / "bio_rules.yaml", + "label": "Règles biologiques", + "description": "Contradiction bio → écartement automatique (ruled_out) ou alerte VETO-17.", + "structure": "flat_rules", + }, + { + "id": "diagnostic_conflicts", + "path": CONFIG_DIR / "diagnostic_conflicts.yaml", + "label": "Conflits diagnostiques", + "description": "Exclusions mutuelles et incompatibilités entre codes CIM-10.", + "structure": "conflicts", + }, + { + "id": "demographic_rules", + "path": CONFIG_DIR / "demographic_rules.yaml", + "label": "Règles démographiques", + "description": "Vérification cohérence âge/sexe avec les diagnostics codés.", + "structure": "generic", + }, + { + "id": "temporal_rules", + "path": CONFIG_DIR / "temporal_rules.yaml", + "label": "Règles temporelles", + "description": "Durée de séjour minimale/maximale pour certains diagnostics.", + "structure": "generic", + }, + { + "id": "parcours_rules", + "path": CONFIG_DIR / "parcours_rules.yaml", + "label": "Règles de parcours", + "description": "Vérification de la cohérence du parcours patient.", + "structure": "generic", + }, + { + "id": "procedure_diagnosis_rules", + "path": CONFIG_DIR / "procedure_diagnosis_rules.yaml", + "label": "Règles actes-diagnostics", + "description": "Cohérence entre actes CCAM et diagnostics CIM-10.", + "structure": "generic", + }, + { + "id": "completude_rules", + "path": CONFIG_DIR / "completude_rules.yaml", + "label": "Règles de complétude", + "description": "Vérification que le dossier contient les éléments requis pour le codage.", + "structure": "generic", + }, + { + "id": "router", + "path": RULES_DIR / "router.yaml", + "label": "Routeur de règles", + "description": "Activation conditionnelle de packs selon le contenu du dossier.", + "structure": "generic", + }, + { + "id": "enabled", + "path": RULES_DIR / "enabled.yaml", + "label": "Overlays actifs", + "description": "Sélection de spécialité, site, et overlays additionnels.", + "structure": "generic", + }, +] + + +def list_rule_files() -> list[dict]: + """Retourne la liste des fichiers de règles avec métadonnées.""" + result = [] + for rf in RULE_FILES: + path = rf["path"] + info = {**rf, "exists": path.exists(), "size": 0, "rules_count": 0} + if path.exists(): + info["size"] = path.stat().st_size + try: + data = yaml.safe_load(path.read_text(encoding="utf-8")) or {} + info["rules_count"] = _count_rules(data, rf["structure"]) + except Exception: + pass + result.append(info) + return result + + +def load_rule_file(file_id: str) -> dict: + """Charge un fichier de règles YAML complet.""" + rf = _find_file(file_id) + if not rf["path"].exists(): + return {} + return yaml.safe_load(rf["path"].read_text(encoding="utf-8")) or {} + + +def save_rule_file(file_id: str, data: dict) -> None: + """Sauvegarde un fichier de règles YAML.""" + rf = _find_file(file_id) + rf["path"].write_text( + yaml.dump(data, default_flow_style=False, allow_unicode=True, sort_keys=False), + encoding="utf-8", + ) + logger.info("Fichier de règles sauvegardé : %s", rf["path"]) + + +def toggle_rule(file_id: str, rule_path: str, enabled: bool) -> dict: + """Active/désactive une règle identifiée par son chemin dans le YAML. + + rule_path : chemin pointé séparé par des '.' (ex: 'packs.vetos_core.rules.VETO-02') + """ + data = load_rule_file(file_id) + _set_nested(data, rule_path + ".enabled", enabled) + save_rule_file(file_id, data) + return data + + +def update_rule_field(file_id: str, rule_path: str, field: str, value) -> dict: + """Met à jour un champ d'une règle.""" + data = load_rule_file(file_id) + _set_nested(data, rule_path + "." + field, value) + save_rule_file(file_id, data) + return data + + +def add_rule(file_id: str, parent_path: str, rule_id: str, rule_data: dict) -> dict: + """Ajoute une nouvelle règle sous parent_path.""" + data = load_rule_file(file_id) + parent = _get_nested(data, parent_path) + if not isinstance(parent, dict): + raise ValueError(f"Chemin parent introuvable : {parent_path}") + if rule_id in parent: + raise ValueError(f"Règle '{rule_id}' existe déjà") + parent[rule_id] = rule_data + save_rule_file(file_id, data) + return data + + +def delete_rule(file_id: str, parent_path: str, rule_id: str) -> dict: + """Supprime une règle.""" + data = load_rule_file(file_id) + parent = _get_nested(data, parent_path) + if not isinstance(parent, dict) or rule_id not in parent: + raise ValueError(f"Règle '{rule_id}' introuvable dans '{parent_path}'") + del parent[rule_id] + save_rule_file(file_id, data) + return data + + +# --------------------------------------------------------------------------- +# Helpers internes +# --------------------------------------------------------------------------- + +def _find_file(file_id: str) -> dict: + for rf in RULE_FILES: + if rf["id"] == file_id: + return rf + raise ValueError(f"Fichier de règles inconnu : {file_id}") + + +def _count_rules(data: dict, structure: str) -> int: + if structure == "packs": + count = 0 + for pack in (data.get("packs") or {}).values(): + count += len(pack.get("rules") or {}) + return count + if structure == "flat_rules": + return len(data.get("rules") or {}) + if structure == "conflicts": + return len(data.get("mutual_exclusions") or []) + len(data.get("incompatibilities") or []) + # generic: count top-level keys that look like rule containers + count = 0 + for v in data.values(): + if isinstance(v, dict): + count += len(v) + elif isinstance(v, list): + count += len(v) + return count + + +def _get_nested(data: dict, path: str): + """Accède à un noeud du YAML via un chemin pointé.""" + parts = path.split(".") + current = data + for p in parts: + if isinstance(current, dict) and p in current: + current = current[p] + else: + return None + return current + + +def _set_nested(data: dict, path: str, value) -> None: + """Définit une valeur dans un dict imbriqué via un chemin pointé.""" + parts = path.split(".") + current = data + for p in parts[:-1]: + if p not in current or not isinstance(current[p], dict): + current[p] = {} + current = current[p] + current[parts[-1]] = value diff --git a/src/viewer/templates/admin_rules.html b/src/viewer/templates/admin_rules.html new file mode 100644 index 0000000..f7eda0b --- /dev/null +++ b/src/viewer/templates/admin_rules.html @@ -0,0 +1,379 @@ +{% extends "base.html" %} + +{% block title %}Moteur de regles{% endblock %} + +{% block sidebar %} +
Admin
+Moteur de regles +Referentiels RAG +Dashboard +Retour aux dossiers +{% endblock %} + +{% block content %} +← Dashboard +

Moteur de regles metier

+

+ Gerez les regles du pipeline T2A : activez/desactivez, modifiez les parametres, ajoutez ou supprimez des regles. + Les modifications sont appliquees immediatement (fichiers YAML). +

+ +{# ---- Cartes synthese ---- #} +
+
+
Fichiers de regles
+
{{ rule_files|length }}
+
+
+
Regles totales
+
{{ rule_files|sum(attribute='rules_count') }}
+
+
+
Type
+
YAML
+
+
+
Mode
+
Strict
+
+
+ +{# ---- Tabs par fichier ---- #} +
+ {% for rf in rule_files %} + + {% endfor %} +
+ +{# ---- Contenu par fichier ---- #} +{% for rf in rule_files %} +
+
+
+
+

{{rf.label}}

+

{{rf.description}}

+
+
+ {{rf.rules_count}} regle(s) + {% if rf.exists %} + · {{ (rf.size / 1024)|round(1) }} Ko + {% endif %} +
+
+
+ + {% if rf.exists and rf.data %} + {% if rf.structure == 'packs' %} + {# ---- Structure packs (base.yaml) ---- #} + {% for pack_name, pack in rf.data.get('packs', {}).items() %} +
+
+

+ Pack : {{pack_name}} +

+ +
+ + + + + + + + + + + + {% for rule_id, rule in (pack.get('rules') or {}).items() %} + + + + + + + + {% endfor %} + +
IDDescriptionSeveriteActifActions
{{rule_id}} + {{rule.get('description', '')}} + + {% if rule.get('force_severity') %} + {{rule.force_severity}} + {% else %} + + {% endif %} + + + + +
+
+ +
+
+ {% endfor %} + + {% elif rf.structure == 'flat_rules' %} + {# ---- Structure rules plates (bio_rules.yaml) ---- #} +
+ {% if rf.data.get('missing_evidence') %} +
+ Preuve manquante : + veto={{rf.data.missing_evidence.get('veto', '?')}}, + severite={{rf.data.missing_evidence.get('severity', '?')}}, + penalite={{rf.data.missing_evidence.get('score_penalty', '?')}} + +
+ {% endif %} + + + + + + + + + + + + + {% for rule_id, rule in (rf.data.get('rules') or {}).items() %} + + + + + + + + + {% endfor %} + +
RegleCodes CIM-10AnalyteSeuilActifActions
{{rule_id}}{{(rule.get('codes') or [])|join(', ')}}{{rule.get('analyte', '')}} + {{rule.get('threshold_type', '')}} + {% if rule.get('message') %} ({{rule.message}}){% endif %} + + + + +
+
+ +
+
+ + {% elif rf.structure == 'conflicts' %} + {# ---- Conflits diagnostiques ---- #} +
+

Exclusions mutuelles

+ + + + + + + + + + + {% for excl in (rf.data.get('mutual_exclusions') or []) %} + + + + + + + {% endfor %} + +
NomCodesMessageSeverite
{{excl.get('name', '')}}{{(excl.get('codes') or [])|join(', ')}}{{excl.get('message', '')}} + {% set sev = excl.get('severity', 'MEDIUM') %} + {{sev}} +
+ +

Incompatibilites

+ + + + + + + + + + + {% for inc in (rf.data.get('incompatibilities') or []) %} + + + + + + + {% endfor %} + +
CodesRef ATIHMessageSeverite
{{(inc.get('pair') or [])|join(', ')}}{{inc.get('atih_ref', '')}}{{inc.get('message', '')}} + {% set sev = inc.get('severity', 'MEDIUM') %} + {{sev}} +
+
+ + {% else %} + {# ---- Structure generique (YAML brut) ---- #} +
+
{{ rf.data|tojson(indent=2) }}
+
+ {% endif %} + {% else %} +
+ Fichier non trouve : {{rf.path}} +
+ {% endif %} +
+{% endfor %} + +{# ---- Modal ajout regle ---- #} + + +{# ---- Toast notifications ---- #} + + + +{% endblock %} diff --git a/tests/test_cpam_response.py b/tests/test_cpam_response.py index 4c3ea57..5bd3c64 100644 --- a/tests/test_cpam_response.py +++ b/tests/test_cpam_response.py @@ -19,28 +19,32 @@ from src.config import ( Traitement, ) from src.control.cpam_response import ( - _assess_dossier_strength, - _build_bio_confrontation, - _build_bio_summary, _build_correction_prompt, _build_cpam_prompt, _build_tagged_context, - _BIO_THRESHOLDS, - _check_das_bio_coherence, _extraction_pass, _format_response, - _fuzzy_match_ref, - _get_cim10_definitions, - _get_code_label, - _sanitize_unauthorized_codes, _search_rag_for_control, - _validate_adversarial, _validate_codes_in_response, _validate_grounding, _validate_references, - _assess_quality_tier, generate_cpam_response, ) +from src.control.cpam_context import ( + _assess_dossier_strength, + _build_bio_confrontation, + _build_bio_summary, + _BIO_THRESHOLDS, + _check_das_bio_coherence, + _get_cim10_definitions, + _get_code_label, +) +from src.control.cpam_validation import ( + _assess_quality_tier, + _fuzzy_match_ref, + _sanitize_unauthorized_codes, + _validate_adversarial, +) def _make_dossier() -> DossierMedical: diff --git a/tests/test_das_llm.py b/tests/test_das_llm.py index 9d7f0c8..772c7a7 100644 --- a/tests/test_das_llm.py +++ b/tests/test_das_llm.py @@ -176,8 +176,8 @@ class TestBioNormesInContext: assert "[N: min-max]" in prompt def test_bio_normals_exported(self): - """BIO_NORMALS est bien exporté depuis cim10_extractor.""" - from src.medical.cim10_extractor import BIO_NORMALS + """BIO_NORMALS est bien exporté depuis bio_normals.""" + from src.medical.bio_normals import BIO_NORMALS assert "Créatinine" in BIO_NORMALS assert BIO_NORMALS["Créatinine"] == (50, 120) diff --git a/tests/test_medical.py b/tests/test_medical.py index e43d70e..629c25b 100644 --- a/tests/test_medical.py +++ b/tests/test_medical.py @@ -3,12 +3,10 @@ import pytest from src.config import DossierMedical, Diagnostic, Antecedent, Complication -from src.medical.cim10_extractor import ( - extract_medical_info, - _lookup_cim10, - _is_abnormal, - _is_valid_antecedent, -) +from src.medical.cim10_extractor import extract_medical_info +from src.medical.diagnostic_extraction import _lookup_cim10 +from src.medical.bio_normals import _is_abnormal +from src.medical.cim10_extractor import _is_valid_antecedent from src.medical.cim10_dict import normalize_text, load_dict, lookup, reset_cache from src.extraction.document_classifier import classify, classify_with_confidence diff --git a/tests/test_viewer.py b/tests/test_viewer.py index 95b835b..1383970 100644 --- a/tests/test_viewer.py +++ b/tests/test_viewer.py @@ -6,7 +6,8 @@ import pytest from pathlib import Path from unittest.mock import patch -from src.viewer.app import create_app, compute_group_stats, severity_badge, format_duration, format_cpam_text +from src.viewer.app import create_app +from src.viewer.helpers import compute_group_stats, severity_badge, format_duration, format_cpam_text from src.viewer.pdf_redactor import load_entities_from_report, redact_pdf, highlight_text from src.config import DossierMedical, Diagnostic, ActeCCAM