diff --git a/src/anonymization/ner_anonymizer.py b/src/anonymization/ner_anonymizer.py index ea42b27..0cf8793 100644 --- a/src/anonymization/ner_anonymizer.py +++ b/src/anonymization/ner_anonymizer.py @@ -22,8 +22,8 @@ def _get_pipeline() -> Pipeline: logger.info("Chargement du modèle NER %s...", NER_MODEL) from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline - tokenizer = AutoTokenizer.from_pretrained(NER_MODEL) - model = AutoModelForTokenClassification.from_pretrained(NER_MODEL) + tokenizer = AutoTokenizer.from_pretrained(NER_MODEL, local_files_only=True) + model = AutoModelForTokenClassification.from_pretrained(NER_MODEL, local_files_only=True) _pipeline = pipeline( "ner", model=model, diff --git a/src/main.py b/src/main.py index c1b3bb4..6c6bf0d 100644 --- a/src/main.py +++ b/src/main.py @@ -144,9 +144,9 @@ def main(input_path: str | None = None) -> None: ) parser.add_argument( "input", - nargs="?", - default=input_path or "input/", - help="Chemin vers un PDF ou un dossier de PDFs (défaut: input/)", + nargs="*", + default=[input_path or "input/"], + help="Chemin(s) vers des PDFs, dossiers patients, ou le dossier racine (défaut: input/)", ) parser.add_argument( "--no-ner", @@ -209,28 +209,34 @@ def main(input_path: str | None = None) -> None: if args.no_rag: _use_rag = False - input_p = Path(args.input) + input_paths = args.input # Collecte des groupes (pdfs, subdir) à traiter groups: list[tuple[list[Path], str | None]] = [] - if input_p.is_file(): - groups.append(([input_p], None)) - elif input_p.is_dir(): - # PDFs à la racine - root_pdfs = sorted(input_p.glob("*.pdf")) - if root_pdfs: - groups.append((root_pdfs, None)) + for p in input_paths: + input_p = Path(p) + if input_p.is_file(): + # Fichier unique → subdir = nom du dossier parent (si ce n'est pas input/) + subdir = input_p.parent.name if input_p.parent.name != "input" else None + groups.append(([input_p], subdir)) + elif input_p.is_dir(): + # Vérifier s'il y a des PDFs directement dans ce dossier + root_pdfs = sorted(input_p.glob("*.pdf")) + # Vérifier s'il y a des sous-dossiers avec PDFs + sub_dirs = [c for c in sorted(input_p.iterdir()) if c.is_dir() and list(c.glob("*.pdf"))] - # Sous-dossiers directs (un seul niveau) - for child in sorted(input_p.iterdir()): - if child.is_dir(): - sub_pdfs = sorted(child.glob("*.pdf")) - if sub_pdfs: + if sub_dirs: + # C'est un dossier racine (comme input/) → traiter chaque sous-dossier + for child in sub_dirs: + sub_pdfs = sorted(child.glob("*.pdf")) groups.append((sub_pdfs, child.name)) - else: - logger.error("Chemin introuvable : %s", input_p) - sys.exit(1) + elif root_pdfs: + # C'est un dossier patient directement → utiliser son nom comme subdir + groups.append((root_pdfs, input_p.name)) + else: + logger.error("Chemin introuvable : %s", input_p) + sys.exit(1) total = sum(len(pdfs) for pdfs, _ in groups) if total == 0: diff --git a/src/viewer/app.py b/src/viewer/app.py index 90660dd..270dbfc 100644 --- a/src/viewer/app.py +++ b/src/viewer/app.py @@ -4,6 +4,7 @@ from __future__ import annotations import json import logging +import re from pathlib import Path import requests @@ -185,6 +186,30 @@ def severity_badge(value: str | None) -> Markup: ) +def format_dossier_name(name: str) -> str: + """Transforme un nom de dossier en nom lisible (ex: 15_23096332 → Dossier 23096332).""" + if name == "racine": + return "Non classés" + m = re.match(r"^\d+_(\d+)$", name) + if m: + return f"Dossier {m.group(1)}" + return name + + +def format_doc_name(name: str) -> str: + """Transforme un nom de fichier JSON en nom lisible.""" + n = name.lower() + if "fusionne" in n: + return "Fusionné" + if n.startswith("cro") or n.startswith("crh"): + return name.split("_")[0].upper() + if "trackare" in n: + return "Trackare" + if "anapath" in n: + return "Anapath" + return name + + # --------------------------------------------------------------------------- # App factory # --------------------------------------------------------------------------- @@ -196,6 +221,8 @@ def create_app() -> Flask: app.jinja_env.filters["confidence_label"] = confidence_label app.jinja_env.filters["severity_badge"] = severity_badge app.jinja_env.filters["format_duration"] = format_duration + app.jinja_env.filters["format_dossier_name"] = format_dossier_name + app.jinja_env.filters["format_doc_name"] = format_doc_name ccam_dict = load_ccam_dict() diff --git a/src/viewer/templates/base.html b/src/viewer/templates/base.html index a79a52b..855ae8f 100644 --- a/src/viewer/templates/base.html +++ b/src/viewer/templates/base.html @@ -72,6 +72,10 @@ background: #1e293b; border-left-color: #3b82f6; } + .sidebar-nav a.sidebar-fusionne { + color: #60a5fa; + font-weight: 600; + } /* Admin section */ .sidebar-admin { diff --git a/src/viewer/templates/index.html b/src/viewer/templates/index.html index 0c951f7..16c1364 100644 --- a/src/viewer/templates/index.html +++ b/src/viewer/templates/index.html @@ -3,9 +3,13 @@ {% block sidebar %} {% for group_name, items in groups.items() %} -
{{ group_name }}
+
{{ group_name | format_dossier_name }}
{% for item in items %} - {{ item.name }} + {% if 'fusionne' in item.name %} + ★ Fusionné + {% else %} + {{ item.name | format_doc_name }} + {% endif %} {% endfor %} {% endfor %} {% endblock %} @@ -33,7 +37,7 @@ {% endfor %} {% set stats = group_stats.get(group_name, {}) %}

- {{ group_name }} + {{ group_name | format_dossier_name }} {{ items|length }} fichier(s){% if ns.count %} — total : {{ ns.total|format_duration }}{% endif %} @@ -60,7 +64,7 @@
- {{ item.name }} + {{ item.name | format_doc_name }}
{% if item.dossier.document_type %}