feat: output miroir de input, viewer lisible, mode 100% local
- CLI accepte plusieurs chemins en entrée (nargs="*") - Un dossier patient passé directement utilise son nom comme subdir - Filtres Jinja format_dossier_name (15_23096332 → Dossier 23096332) et format_doc_name (CRO_xxx_cim10 → CRO, Trackare, Fusionné) - Sidebar : noms lisibles, fusionné mis en avant (★) - NER CamemBERT en local_files_only (aucun appel réseau) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -22,8 +22,8 @@ def _get_pipeline() -> Pipeline:
|
|||||||
logger.info("Chargement du modèle NER %s...", NER_MODEL)
|
logger.info("Chargement du modèle NER %s...", NER_MODEL)
|
||||||
from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline
|
from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(NER_MODEL)
|
tokenizer = AutoTokenizer.from_pretrained(NER_MODEL, local_files_only=True)
|
||||||
model = AutoModelForTokenClassification.from_pretrained(NER_MODEL)
|
model = AutoModelForTokenClassification.from_pretrained(NER_MODEL, local_files_only=True)
|
||||||
_pipeline = pipeline(
|
_pipeline = pipeline(
|
||||||
"ner",
|
"ner",
|
||||||
model=model,
|
model=model,
|
||||||
|
|||||||
30
src/main.py
30
src/main.py
@@ -144,9 +144,9 @@ def main(input_path: str | None = None) -> None:
|
|||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"input",
|
"input",
|
||||||
nargs="?",
|
nargs="*",
|
||||||
default=input_path or "input/",
|
default=[input_path or "input/"],
|
||||||
help="Chemin vers un PDF ou un dossier de PDFs (défaut: input/)",
|
help="Chemin(s) vers des PDFs, dossiers patients, ou le dossier racine (défaut: input/)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no-ner",
|
"--no-ner",
|
||||||
@@ -209,25 +209,31 @@ def main(input_path: str | None = None) -> None:
|
|||||||
if args.no_rag:
|
if args.no_rag:
|
||||||
_use_rag = False
|
_use_rag = False
|
||||||
|
|
||||||
input_p = Path(args.input)
|
input_paths = args.input
|
||||||
|
|
||||||
# Collecte des groupes (pdfs, subdir) à traiter
|
# Collecte des groupes (pdfs, subdir) à traiter
|
||||||
groups: list[tuple[list[Path], str | None]] = []
|
groups: list[tuple[list[Path], str | None]] = []
|
||||||
|
|
||||||
|
for p in input_paths:
|
||||||
|
input_p = Path(p)
|
||||||
if input_p.is_file():
|
if input_p.is_file():
|
||||||
groups.append(([input_p], None))
|
# Fichier unique → subdir = nom du dossier parent (si ce n'est pas input/)
|
||||||
|
subdir = input_p.parent.name if input_p.parent.name != "input" else None
|
||||||
|
groups.append(([input_p], subdir))
|
||||||
elif input_p.is_dir():
|
elif input_p.is_dir():
|
||||||
# PDFs à la racine
|
# Vérifier s'il y a des PDFs directement dans ce dossier
|
||||||
root_pdfs = sorted(input_p.glob("*.pdf"))
|
root_pdfs = sorted(input_p.glob("*.pdf"))
|
||||||
if root_pdfs:
|
# Vérifier s'il y a des sous-dossiers avec PDFs
|
||||||
groups.append((root_pdfs, None))
|
sub_dirs = [c for c in sorted(input_p.iterdir()) if c.is_dir() and list(c.glob("*.pdf"))]
|
||||||
|
|
||||||
# Sous-dossiers directs (un seul niveau)
|
if sub_dirs:
|
||||||
for child in sorted(input_p.iterdir()):
|
# C'est un dossier racine (comme input/) → traiter chaque sous-dossier
|
||||||
if child.is_dir():
|
for child in sub_dirs:
|
||||||
sub_pdfs = sorted(child.glob("*.pdf"))
|
sub_pdfs = sorted(child.glob("*.pdf"))
|
||||||
if sub_pdfs:
|
|
||||||
groups.append((sub_pdfs, child.name))
|
groups.append((sub_pdfs, child.name))
|
||||||
|
elif root_pdfs:
|
||||||
|
# C'est un dossier patient directement → utiliser son nom comme subdir
|
||||||
|
groups.append((root_pdfs, input_p.name))
|
||||||
else:
|
else:
|
||||||
logger.error("Chemin introuvable : %s", input_p)
|
logger.error("Chemin introuvable : %s", input_p)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
@@ -185,6 +186,30 @@ def severity_badge(value: str | None) -> Markup:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def format_dossier_name(name: str) -> str:
|
||||||
|
"""Transforme un nom de dossier en nom lisible (ex: 15_23096332 → Dossier 23096332)."""
|
||||||
|
if name == "racine":
|
||||||
|
return "Non classés"
|
||||||
|
m = re.match(r"^\d+_(\d+)$", name)
|
||||||
|
if m:
|
||||||
|
return f"Dossier {m.group(1)}"
|
||||||
|
return name
|
||||||
|
|
||||||
|
|
||||||
|
def format_doc_name(name: str) -> str:
|
||||||
|
"""Transforme un nom de fichier JSON en nom lisible."""
|
||||||
|
n = name.lower()
|
||||||
|
if "fusionne" in n:
|
||||||
|
return "Fusionné"
|
||||||
|
if n.startswith("cro") or n.startswith("crh"):
|
||||||
|
return name.split("_")[0].upper()
|
||||||
|
if "trackare" in n:
|
||||||
|
return "Trackare"
|
||||||
|
if "anapath" in n:
|
||||||
|
return "Anapath"
|
||||||
|
return name
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# App factory
|
# App factory
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -196,6 +221,8 @@ def create_app() -> Flask:
|
|||||||
app.jinja_env.filters["confidence_label"] = confidence_label
|
app.jinja_env.filters["confidence_label"] = confidence_label
|
||||||
app.jinja_env.filters["severity_badge"] = severity_badge
|
app.jinja_env.filters["severity_badge"] = severity_badge
|
||||||
app.jinja_env.filters["format_duration"] = format_duration
|
app.jinja_env.filters["format_duration"] = format_duration
|
||||||
|
app.jinja_env.filters["format_dossier_name"] = format_dossier_name
|
||||||
|
app.jinja_env.filters["format_doc_name"] = format_doc_name
|
||||||
|
|
||||||
ccam_dict = load_ccam_dict()
|
ccam_dict = load_ccam_dict()
|
||||||
|
|
||||||
|
|||||||
@@ -72,6 +72,10 @@
|
|||||||
background: #1e293b;
|
background: #1e293b;
|
||||||
border-left-color: #3b82f6;
|
border-left-color: #3b82f6;
|
||||||
}
|
}
|
||||||
|
.sidebar-nav a.sidebar-fusionne {
|
||||||
|
color: #60a5fa;
|
||||||
|
font-weight: 600;
|
||||||
|
}
|
||||||
|
|
||||||
/* Admin section */
|
/* Admin section */
|
||||||
.sidebar-admin {
|
.sidebar-admin {
|
||||||
|
|||||||
@@ -3,9 +3,13 @@
|
|||||||
|
|
||||||
{% block sidebar %}
|
{% block sidebar %}
|
||||||
{% for group_name, items in groups.items() %}
|
{% for group_name, items in groups.items() %}
|
||||||
<div class="group-title">{{ group_name }}</div>
|
<div class="group-title">{{ group_name | format_dossier_name }}</div>
|
||||||
{% for item in items %}
|
{% for item in items %}
|
||||||
<a href="/dossier/{{ item.path_rel }}">{{ item.name }}</a>
|
{% if 'fusionne' in item.name %}
|
||||||
|
<a href="/dossier/{{ item.path_rel }}" class="sidebar-fusionne">★ Fusionné</a>
|
||||||
|
{% else %}
|
||||||
|
<a href="/dossier/{{ item.path_rel }}">{{ item.name | format_doc_name }}</a>
|
||||||
|
{% endif %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
@@ -33,7 +37,7 @@
|
|||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% set stats = group_stats.get(group_name, {}) %}
|
{% set stats = group_stats.get(group_name, {}) %}
|
||||||
<h3 style="display:flex;align-items:baseline;gap:0.75rem;flex-wrap:wrap;">
|
<h3 style="display:flex;align-items:baseline;gap:0.75rem;flex-wrap:wrap;">
|
||||||
{{ group_name }}
|
{{ group_name | format_dossier_name }}
|
||||||
<span style="font-size:0.75rem;font-weight:400;color:#64748b;">
|
<span style="font-size:0.75rem;font-weight:400;color:#64748b;">
|
||||||
{{ items|length }} fichier(s){% if ns.count %} — total : {{ ns.total|format_duration }}{% endif %}
|
{{ items|length }} fichier(s){% if ns.count %} — total : {{ ns.total|format_duration }}{% endif %}
|
||||||
</span>
|
</span>
|
||||||
@@ -60,7 +64,7 @@
|
|||||||
<a href="/dossier/{{ item.path_rel }}" style="text-decoration:none;color:inherit;">
|
<a href="/dossier/{{ item.path_rel }}" style="text-decoration:none;color:inherit;">
|
||||||
<div class="card" style="cursor:pointer;transition:box-shadow 0.15s;">
|
<div class="card" style="cursor:pointer;transition:box-shadow 0.15s;">
|
||||||
<div style="font-weight:600;font-size:0.9rem;margin-bottom:0.4rem;color:#0f172a;">
|
<div style="font-weight:600;font-size:0.9rem;margin-bottom:0.4rem;color:#0f172a;">
|
||||||
{{ item.name }}
|
{{ item.name | format_doc_name }}
|
||||||
</div>
|
</div>
|
||||||
<div style="display:flex;flex-wrap:wrap;gap:0.3rem;margin-bottom:0.4rem;">
|
<div style="display:flex;flex-wrap:wrap;gap:0.3rem;margin-bottom:0.4rem;">
|
||||||
{% if item.dossier.document_type %}
|
{% if item.dossier.document_type %}
|
||||||
|
|||||||
Reference in New Issue
Block a user