From 037d255aa0875a012651601d04ba524356742512 Mon Sep 17 00:00:00 2001 From: dom Date: Tue, 10 Feb 2026 20:11:07 +0100 Subject: [PATCH] =?UTF-8?q?feat:=20ajout=20viewer=20Flask=20CIM-10=20avec?= =?UTF-8?q?=20config=20Ollama=20centralis=C3=A9e=20et=20chronom=C3=A9trage?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ajoute une interface web Flask pour visualiser les dossiers médicaux CIM-10, avec temps de traitement par PDF, sélecteur de modèle Ollama, et centralisation de la config Ollama dans src/config.py. Co-Authored-By: Claude Opus 4.6 --- requirements.txt | 1 + src/config.py | 8 + src/main.py | 4 + src/medical/rag_search.py | 9 +- src/viewer/__init__.py | 0 src/viewer/__main__.py | 20 +++ src/viewer/app.py | 150 ++++++++++++++++++ src/viewer/templates/base.html | 258 +++++++++++++++++++++++++++++++ src/viewer/templates/detail.html | 206 ++++++++++++++++++++++++ src/viewer/templates/index.html | 72 +++++++++ 10 files changed, 721 insertions(+), 7 deletions(-) create mode 100644 src/viewer/__init__.py create mode 100644 src/viewer/__main__.py create mode 100644 src/viewer/app.py create mode 100644 src/viewer/templates/base.html create mode 100644 src/viewer/templates/detail.html create mode 100644 src/viewer/templates/index.html diff --git a/requirements.txt b/requirements.txt index 89e9a4f..cf6fd69 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,4 @@ edsnlp[ml]>=0.17.0 faiss-cpu>=1.7.0 sentence-transformers>=2.2.0 requests>=2.28.0 +flask>=3.0.0 diff --git a/src/config.py b/src/config.py index 1a5c033..373d56b 100644 --- a/src/config.py +++ b/src/config.py @@ -28,6 +28,13 @@ NER_MODEL = "Jean-Baptiste/camembert-ner" NER_CONFIDENCE_THRESHOLD = 0.80 +# --- Configuration Ollama --- + +OLLAMA_URL = "http://localhost:11434" +OLLAMA_MODEL = "mistral-large-3:675b-cloud" +OLLAMA_TIMEOUT = 120 + + # --- Configuration RAG --- RAG_INDEX_DIR = BASE_DIR / "data" / "rag_index" @@ -103,6 +110,7 @@ class DossierMedical(BaseModel): biologie_cle: list[BiologieCle] = Field(default_factory=list) imagerie: list[Imagerie] = Field(default_factory=list) complications: list[str] = Field(default_factory=list) + processing_time_s: float | None = None # --- Rapport d'anonymisation --- diff --git a/src/main.py b/src/main.py index 284eb42..de5d349 100644 --- a/src/main.py +++ b/src/main.py @@ -6,6 +6,7 @@ import argparse import json import logging import sys +import time from pathlib import Path from .anonymization.anonymizer import Anonymizer @@ -29,6 +30,7 @@ _use_rag = True def process_pdf(pdf_path: Path) -> tuple[str, DossierMedical, AnonymizationReport]: """Traite un PDF : extraction → parsing → anonymisation → extraction CIM-10.""" + t0 = time.time() logger.info("Traitement de %s", pdf_path.name) # 1. Extraction texte @@ -67,8 +69,10 @@ def process_pdf(pdf_path: Path) -> tuple[str, DossierMedical, AnonymizationRepor dossier = extract_medical_info(parsed, anonymized_text, edsnlp_result, use_rag=_use_rag) dossier.source_file = pdf_path.name dossier.document_type = doc_type + dossier.processing_time_s = round(time.time() - t0, 2) logger.info(" DP : %s", dossier.diagnostic_principal) logger.info(" DAS : %d, Actes : %d", len(dossier.diagnostics_associes), len(dossier.actes_ccam)) + logger.info(" Temps de traitement : %.2fs", dossier.processing_time_s) return anonymized_text, dossier, report diff --git a/src/medical/rag_search.py b/src/medical/rag_search.py index ee30964..963f12c 100644 --- a/src/medical/rag_search.py +++ b/src/medical/rag_search.py @@ -8,15 +8,10 @@ from typing import Optional import requests -from ..config import Diagnostic, DossierMedical, RAGSource +from ..config import Diagnostic, DossierMedical, RAGSource, OLLAMA_URL, OLLAMA_MODEL, OLLAMA_TIMEOUT logger = logging.getLogger(__name__) -# Configuration Ollama -OLLAMA_URL = "http://localhost:11434/api/generate" -OLLAMA_MODEL = "mistral-small3.2:24b" -OLLAMA_TIMEOUT = 120 # secondes - # Singleton pour le modèle d'embedding (chargé une seule fois) _embed_model = None @@ -107,7 +102,7 @@ def _call_ollama(prompt: str) -> dict | None: """Appelle Ollama et parse la réponse JSON.""" try: response = requests.post( - OLLAMA_URL, + f"{OLLAMA_URL}/api/generate", json={ "model": OLLAMA_MODEL, "prompt": prompt, diff --git a/src/viewer/__init__.py b/src/viewer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/viewer/__main__.py b/src/viewer/__main__.py new file mode 100644 index 0000000..1f0bbbd --- /dev/null +++ b/src/viewer/__main__.py @@ -0,0 +1,20 @@ +"""Point d'entrée : python -m src.viewer [--host 127.0.0.1] [--port 5000] [--debug].""" + +import argparse + +from .app import create_app + + +def main(): + parser = argparse.ArgumentParser(description="Viewer CIM-10 T2A") + parser.add_argument("--host", default="127.0.0.1") + parser.add_argument("--port", type=int, default=5000) + parser.add_argument("--debug", action="store_true") + args = parser.parse_args() + + app = create_app() + app.run(host=args.host, port=args.port, debug=args.debug) + + +if __name__ == "__main__": + main() diff --git a/src/viewer/app.py b/src/viewer/app.py new file mode 100644 index 0000000..5ec0c7d --- /dev/null +++ b/src/viewer/app.py @@ -0,0 +1,150 @@ +"""App Flask — viewer CIM-10 T2A.""" + +from __future__ import annotations + +import json +import logging +from pathlib import Path + +import requests +from flask import Flask, abort, render_template, request, jsonify +from markupsafe import Markup + +from ..config import STRUCTURED_DIR, OLLAMA_URL, DossierMedical +from .. import config as cfg + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def scan_dossiers() -> dict[str, list[dict]]: + """Scanne output/structured/ et retourne les fichiers groupés par sous-dossier. + + Returns: + {"racine": [{name, path_rel, dossier}, ...], "sous-dossier": [...]} + """ + groups: dict[str, list[dict]] = {} + + for json_path in sorted(STRUCTURED_DIR.rglob("*.json")): + rel = json_path.relative_to(STRUCTURED_DIR) + parts = rel.parts + + if len(parts) == 1: + group_name = "racine" + else: + group_name = str(Path(*parts[:-1])) + + try: + data = json.loads(json_path.read_text(encoding="utf-8")) + dossier = DossierMedical.model_validate(data) + except Exception: + logger.warning("Impossible de charger %s", json_path) + continue + + groups.setdefault(group_name, []).append({ + "name": json_path.stem, + "path_rel": str(rel), + "dossier": dossier, + }) + + return groups + + +def load_dossier(path_rel: str) -> DossierMedical: + """Charge un JSON et le désérialise. Vérifie contre le path traversal.""" + safe_path = (STRUCTURED_DIR / path_rel).resolve() + if not safe_path.is_relative_to(STRUCTURED_DIR.resolve()): + abort(403) + if not safe_path.exists(): + abort(404) + + data = json.loads(safe_path.read_text(encoding="utf-8")) + return DossierMedical.model_validate(data) + + +def fetch_ollama_models() -> list[str]: + """Appelle GET {OLLAMA_URL}/api/tags pour lister les modèles disponibles.""" + try: + resp = requests.get(f"{cfg.OLLAMA_URL}/api/tags", timeout=5) + resp.raise_for_status() + models = resp.json().get("models", []) + return [m["name"] for m in models] + except Exception: + logger.warning("Impossible de contacter Ollama pour lister les modèles") + return [] + + +# --------------------------------------------------------------------------- +# Filtres Jinja2 +# --------------------------------------------------------------------------- + +_CONFIDENCE_COLORS = { + "high": ("#16a34a", "#dcfce7"), + "medium": ("#ca8a04", "#fef9c3"), + "low": ("#dc2626", "#fee2e2"), +} + +_CONFIDENCE_LABELS = { + "high": "Haute", + "medium": "Moyenne", + "low": "Basse", +} + + +def confidence_badge(value: str | None) -> Markup: + if not value: + return Markup("") + fg, bg = _CONFIDENCE_COLORS.get(value, ("#6b7280", "#f3f4f6")) + label = _CONFIDENCE_LABELS.get(value, value) + return Markup( + f'' + f'{label}' + ) + + +def confidence_label(value: str | None) -> str: + if not value: + return "" + return _CONFIDENCE_LABELS.get(value, value) + + +# --------------------------------------------------------------------------- +# App factory +# --------------------------------------------------------------------------- + +def create_app() -> Flask: + app = Flask(__name__) + + app.jinja_env.filters["confidence_badge"] = confidence_badge + app.jinja_env.filters["confidence_label"] = confidence_label + + @app.route("/") + def index(): + groups = scan_dossiers() + return render_template("index.html", groups=groups) + + @app.route("/dossier/") + def detail(filepath: str): + dossier = load_dossier(filepath) + return render_template("detail.html", dossier=dossier, filepath=filepath) + + @app.route("/admin/models", methods=["GET"]) + def list_models(): + models = fetch_ollama_models() + return jsonify({"models": models, "current": cfg.OLLAMA_MODEL}) + + @app.route("/admin/models", methods=["POST"]) + def set_model(): + data = request.get_json(silent=True) or {} + new_model = data.get("model", "").strip() + if not new_model: + return jsonify({"error": "Champ 'model' requis"}), 400 + cfg.OLLAMA_MODEL = new_model + logger.info("Modèle Ollama changé : %s", new_model) + return jsonify({"ok": True, "model": cfg.OLLAMA_MODEL}) + + return app diff --git a/src/viewer/templates/base.html b/src/viewer/templates/base.html new file mode 100644 index 0000000..7f1db19 --- /dev/null +++ b/src/viewer/templates/base.html @@ -0,0 +1,258 @@ + + + + + +{% block title %}Viewer CIM-10{% endblock %} — T2A + + + + + + + + +
+ {% block content %}{% endblock %} +
+ + + + diff --git a/src/viewer/templates/detail.html b/src/viewer/templates/detail.html new file mode 100644 index 0000000..6ffc2a0 --- /dev/null +++ b/src/viewer/templates/detail.html @@ -0,0 +1,206 @@ +{% extends "base.html" %} +{% block title %}{{ dossier.source_file or filepath }}{% endblock %} + +{% block sidebar %} +
Navigation
+Retour à la liste +{% endblock %} + +{% block content %} +← Retour à la liste + +{# ---- En-tête ---- #} +
+

{{ dossier.source_file or filepath }}

+
+ {% if dossier.document_type %} +
+ + {{ dossier.document_type }} +
+ {% endif %} + {% if dossier.processing_time_s is not none %} +
+ + {{ dossier.processing_time_s }}s +
+ {% endif %} +
+
+ +{# ---- Séjour ---- #} +{% set s = dossier.sejour %} +{% if s.sexe or s.age or s.date_entree or s.date_sortie or s.duree_sejour is not none or s.imc or s.poids or s.taille %} +
+

Séjour

+
+ {% if s.sexe %}
{{ s.sexe }}
{% endif %} + {% if s.age is not none %}
{{ s.age }} ans
{% endif %} + {% if s.date_entree %}
{{ s.date_entree }}
{% endif %} + {% if s.date_sortie %}
{{ s.date_sortie }}
{% endif %} + {% if s.duree_sejour is not none %}
{{ s.duree_sejour }} jour(s)
{% endif %} + {% if s.mode_entree %}
{{ s.mode_entree }}
{% endif %} + {% if s.mode_sortie %}
{{ s.mode_sortie }}
{% endif %} + {% if s.poids %}
{{ s.poids }} kg
{% endif %} + {% if s.taille %}
{{ s.taille }} cm
{% endif %} + {% if s.imc %}
{{ s.imc }}
{% endif %} +
+
+{% endif %} + +{# ---- Diagnostic principal ---- #} +{% if dossier.diagnostic_principal %} +{% set dp = dossier.diagnostic_principal %} +
+

Diagnostic principal

+
{{ dp.texte }}
+ {% if dp.cim10_suggestion %} + {{ dp.cim10_suggestion }} + {{ dp.cim10_confidence | confidence_badge }} + {% endif %} + {% if dp.justification %} +
{{ dp.justification }}
+ {% endif %} + {% if dp.sources_rag %} +
+ Sources RAG ({{ dp.sources_rag|length }}) + {% for src in dp.sources_rag %} +
{{ src.document }}{% if src.code %} — {{ src.code }}{% endif %}{% if src.page %} [p.{{ src.page }}]{% endif %}
+{{ src.extrait or '' }}
+ {% endfor %} +
+ {% endif %} +
+{% endif %} + +{# ---- Diagnostics associés ---- #} +{% if dossier.diagnostics_associes %} +
+

Diagnostics associés ({{ dossier.diagnostics_associes|length }})

+ + + + {% for das in dossier.diagnostics_associes %} + + + + + + + {% if das.sources_rag %} + + + + {% endif %} + {% endfor %} + +
TexteCIM-10ConfianceJustification
{{ das.texte }}{% if das.cim10_suggestion %}{{ das.cim10_suggestion }}{% endif %}{{ das.cim10_confidence | confidence_badge }}{{ das.justification or '' }}
+
+ Sources RAG ({{ das.sources_rag|length }}) + {% for src in das.sources_rag %} +
{{ src.document }}{% if src.code %} — {{ src.code }}{% endif %}{% if src.page %} [p.{{ src.page }}]{% endif %}
+{{ src.extrait or '' }}
+ {% endfor %} +
+
+
+{% endif %} + +{# ---- Actes CCAM ---- #} +{% if dossier.actes_ccam %} +
+

Actes CCAM ({{ dossier.actes_ccam|length }})

+ + + + {% for a in dossier.actes_ccam %} + + + + + + {% endfor %} + +
TexteCode CCAMDate
{{ a.texte }}{% if a.code_ccam_suggestion %}{{ a.code_ccam_suggestion }}{% endif %}{{ a.date or '' }}
+
+{% endif %} + +{# ---- Biologie clé ---- #} +{% if dossier.biologie_cle %} +
+

Biologie clé ({{ dossier.biologie_cle|length }})

+ + + + {% for b in dossier.biologie_cle %} + + + + + + {% endfor %} + +
TestValeurAnomalie
{{ b.test }}{{ b.valeur or '' }}{% if b.anomalie %}Oui{% else %}—{% endif %}
+
+{% endif %} + +{# ---- Imagerie ---- #} +{% if dossier.imagerie %} +
+

Imagerie ({{ dossier.imagerie|length }})

+ {% for img in dossier.imagerie %} +
+ {{ img.type }} + {% if img.score %} — Score : {{ img.score }}{% endif %} + {% if img.conclusion %} +
{{ img.conclusion }}
+ {% endif %} +
+ {% endfor %} +
+{% endif %} + +{# ---- Traitements de sortie ---- #} +{% if dossier.traitements_sortie %} +
+

Traitements de sortie ({{ dossier.traitements_sortie|length }})

+ + + + {% for t in dossier.traitements_sortie %} + + + + + + {% endfor %} + +
MédicamentPosologieCode ATC
{{ t.medicament }}{{ t.posologie or '' }}{% if t.code_atc %}{{ t.code_atc }}{% endif %}
+
+{% endif %} + +{# ---- Antécédents ---- #} +{% if dossier.antecedents %} +
+

Antécédents ({{ dossier.antecedents|length }})

+
    + {% for a in dossier.antecedents %} +
  • {{ a }}
  • + {% endfor %} +
+
+{% endif %} + +{# ---- Complications ---- #} +{% if dossier.complications %} +
+

Complications ({{ dossier.complications|length }})

+
    + {% for c in dossier.complications %} +
  • {{ c }}
  • + {% endfor %} +
+
+{% endif %} + +{% endblock %} diff --git a/src/viewer/templates/index.html b/src/viewer/templates/index.html new file mode 100644 index 0000000..e7f3a2f --- /dev/null +++ b/src/viewer/templates/index.html @@ -0,0 +1,72 @@ +{% extends "base.html" %} +{% block title %}Accueil{% endblock %} + +{% block sidebar %} +{% for group_name, items in groups.items() %} +
{{ group_name }}
+ {% for item in items %} + {{ item.name }} + {% endfor %} +{% endfor %} +{% endblock %} + +{% block content %} +

Dossiers médicaux traités

+ +{% if not groups %} +
+

Aucun dossier trouvé dans output/structured/.

+

+ Lancez le pipeline avec python -m src.main pour générer des fichiers. +

+
+{% endif %} + +{% for group_name, items in groups.items() %} + +{% endfor %} +{% endblock %}