feat: pass LLM hybride pour DAS + interface admin référentiels RAG

Chantier 1 — Extraction DAS par LLM :
- Nouveau prompt expert DIM dans rag_search.py (extract_das_llm)
- Phase 4 dans cim10_extractor.py : détection DAS supplémentaires avant enrichissement RAG
- Cache persistant (clé hash du texte), validation CIM-10, déduplication
- Activé uniquement avec use_rag=True (--no-rag le désactive)

Chantier 2 — Admin référentiels :
- Config : REFERENTIELS_DIR, UPLOAD_MAX_SIZE_MB, ALLOWED_EXTENSIONS
- Chunking générique (PDF/CSV/Excel/TXT) + ajout incrémental FAISS dans rag_index.py
- ReferentielManager CRUD dans viewer/referentiels.py
- 5 routes Flask (listing, upload, indexation, suppression, rebuild)
- Template admin avec tableau interactif + lien sidebar

Fix : if cache → if cache is not None (OllamaCache vide évaluait à False)

410 tests passent (27 nouveaux, 0 régression).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-02-12 23:12:39 +01:00
parent bf92a0ce3e
commit f44216b95b
10 changed files with 1197 additions and 6 deletions

View File

@@ -11,8 +11,11 @@ import requests
from flask import Flask, abort, render_template, request, jsonify
from markupsafe import Markup
from ..config import STRUCTURED_DIR, OLLAMA_URL, CCAM_DICT_PATH, DossierMedical
from werkzeug.utils import secure_filename
from ..config import STRUCTURED_DIR, OLLAMA_URL, CCAM_DICT_PATH, DossierMedical, ALLOWED_EXTENSIONS, UPLOAD_MAX_SIZE_MB
from .. import config as cfg
from .referentiels import ReferentielManager
logger = logging.getLogger(__name__)
@@ -271,12 +274,12 @@ def create_app() -> Flask:
def reprocess(filepath: str):
"""Relance le traitement d'un dossier."""
from ..main import process_pdf, write_outputs
dossier = load_dossier(filepath)
source_file = dossier.source_file
if not source_file:
return jsonify({"error": "Fichier source introuvable"}), 400
# Chercher le PDF source dans input/
input_dir = Path(__file__).parent.parent.parent / "input"
pdf_path = None
@@ -284,10 +287,10 @@ def create_app() -> Flask:
if p.is_file():
pdf_path = p
break
if not pdf_path:
return jsonify({"error": f"PDF source '{source_file}' introuvable"}), 404
try:
anonymized_text, new_dossier, report = process_pdf(pdf_path)
stem = pdf_path.stem.replace(" ", "_")
@@ -300,4 +303,64 @@ def create_app() -> Flask:
logger.exception("Erreur lors du retraitement")
return jsonify({"error": str(e)}), 500
# ------------------------------------------------------------------
# Routes admin référentiels
# ------------------------------------------------------------------
ref_manager = ReferentielManager()
@app.route("/admin/referentiels")
def admin_referentiels():
refs = ref_manager.list_all()
return render_template("admin_referentiels.html", referentiels=refs, max_size=UPLOAD_MAX_SIZE_MB)
@app.route("/admin/referentiels/upload", methods=["POST"])
def upload_referentiel():
if "file" not in request.files:
return jsonify({"error": "Aucun fichier envoyé"}), 400
f = request.files["file"]
if not f.filename:
return jsonify({"error": "Nom de fichier vide"}), 400
filename = secure_filename(f.filename)
try:
file_data = f.read()
ref = ref_manager.add_file(filename, file_data)
return jsonify({"ok": True, "referentiel": ref})
except ValueError as e:
return jsonify({"error": str(e)}), 400
@app.route("/admin/referentiels/<ref_id>/index", methods=["POST"])
def index_referentiel(ref_id: str):
try:
count = ref_manager.index_referentiel(ref_id)
return jsonify({"ok": True, "chunks": count})
except ValueError as e:
return jsonify({"error": str(e)}), 404
except Exception as e:
logger.exception("Erreur lors de l'indexation du référentiel %s", ref_id)
return jsonify({"error": str(e)}), 500
@app.route("/admin/referentiels/<ref_id>", methods=["DELETE"])
def delete_referentiel(ref_id: str):
if ref_manager.remove(ref_id):
return jsonify({"ok": True})
return jsonify({"error": "Référentiel introuvable"}), 404
@app.route("/admin/referentiels/rebuild-index", methods=["POST"])
def rebuild_index():
try:
from ..medical.rag_index import build_index
build_index(force=True)
# Réindexer tous les référentiels actifs
reindexed = 0
for ref in ref_manager.list_all():
if ref["status"] == "indexed":
ref_manager.index_referentiel(ref["id"])
reindexed += 1
return jsonify({"ok": True, "reindexed": reindexed})
except Exception as e:
logger.exception("Erreur lors du rebuild de l'index")
return jsonify({"error": str(e)}), 500
return app

155
src/viewer/referentiels.py Normal file
View File

@@ -0,0 +1,155 @@
"""Gestionnaire de référentiels utilisateur pour le RAG."""
from __future__ import annotations
import json
import logging
import shutil
import uuid
from datetime import datetime
from pathlib import Path
from ..config import REFERENTIELS_DIR, ALLOWED_EXTENSIONS, UPLOAD_MAX_SIZE_MB
logger = logging.getLogger(__name__)
class ReferentielManager:
"""CRUD pour les fichiers de référentiels utilisateur.
Stocke les fichiers dans REFERENTIELS_DIR avec un index.json
pour les métadonnées.
"""
def __init__(self, referentiels_dir: Path | None = None):
self._dir = referentiels_dir or REFERENTIELS_DIR
self._dir.mkdir(parents=True, exist_ok=True)
self._index_path = self._dir / "index.json"
self._index: list[dict] = self._load_index()
def _load_index(self) -> list[dict]:
if self._index_path.exists():
try:
return json.loads(self._index_path.read_text(encoding="utf-8"))
except (json.JSONDecodeError, KeyError):
logger.warning("Index référentiels corrompu, réinitialisé")
return []
def _save_index(self) -> None:
self._index_path.write_text(
json.dumps(self._index, ensure_ascii=False, indent=2),
encoding="utf-8",
)
def list_all(self) -> list[dict]:
"""Retourne la liste de tous les référentiels."""
return list(self._index)
def get(self, ref_id: str) -> dict | None:
"""Retourne un référentiel par son ID."""
for ref in self._index:
if ref["id"] == ref_id:
return ref
return None
def add_file(self, filename: str, file_data: bytes) -> dict:
"""Ajoute un fichier de référentiel.
Args:
filename: Nom original du fichier.
file_data: Contenu binaire du fichier.
Returns:
Métadonnées du référentiel créé.
Raises:
ValueError: Extension non autorisée ou taille dépassée.
"""
ext = Path(filename).suffix.lower()
if ext not in ALLOWED_EXTENSIONS:
raise ValueError(f"Extension '{ext}' non autorisée. Extensions valides : {ALLOWED_EXTENSIONS}")
size_mb = len(file_data) / (1024 * 1024)
if size_mb > UPLOAD_MAX_SIZE_MB:
raise ValueError(f"Fichier trop volumineux ({size_mb:.1f} Mo > {UPLOAD_MAX_SIZE_MB} Mo)")
ref_id = uuid.uuid4().hex[:12]
safe_name = f"{ref_id}_{Path(filename).stem}{ext}"
file_path = self._dir / safe_name
file_path.write_bytes(file_data)
ref = {
"id": ref_id,
"filename": filename,
"stored_name": safe_name,
"extension": ext,
"size_bytes": len(file_data),
"date_added": datetime.now().isoformat(),
"status": "uploaded",
"chunks_count": 0,
}
self._index.append(ref)
self._save_index()
logger.info("Référentiel ajouté : %s (%s)", filename, ref_id)
return ref
def remove(self, ref_id: str) -> bool:
"""Supprime un référentiel (fichier + métadonnées).
Returns:
True si trouvé et supprimé, False sinon.
"""
ref = self.get(ref_id)
if not ref:
return False
file_path = self._dir / ref["stored_name"]
if file_path.exists():
file_path.unlink()
self._index = [r for r in self._index if r["id"] != ref_id]
self._save_index()
logger.info("Référentiel supprimé : %s (%s)", ref["filename"], ref_id)
return True
def index_referentiel(self, ref_id: str) -> int:
"""Indexe un référentiel dans FAISS.
Args:
ref_id: ID du référentiel à indexer.
Returns:
Nombre de chunks indexés.
Raises:
ValueError: Référentiel introuvable.
"""
ref = self.get(ref_id)
if not ref:
raise ValueError(f"Référentiel {ref_id} introuvable")
file_path = self._dir / ref["stored_name"]
if not file_path.exists():
raise ValueError(f"Fichier {ref['stored_name']} introuvable")
from ..medical.rag_index import chunk_user_file, add_chunks_to_index
doc_name = f"ref:{ref['filename']}"
chunks = chunk_user_file(file_path, doc_name)
if not chunks:
ref["status"] = "empty"
ref["chunks_count"] = 0
self._save_index()
return 0
count = add_chunks_to_index(chunks)
ref["status"] = "indexed"
ref["chunks_count"] = count
self._save_index()
logger.info("Référentiel indexé : %s%d chunks", ref["filename"], count)
return count

View File

@@ -0,0 +1,220 @@
{% extends "base.html" %}
{% block title %}Référentiels RAG{% endblock %}
{% block sidebar %}
<div class="group-title">Admin</div>
<a href="/admin/referentiels" style="color:#60a5fa;font-weight:600;border-left-color:#3b82f6;">Référentiels RAG</a>
<a href="/">Retour aux dossiers</a>
{% endblock %}
{% block content %}
<h2>Référentiels RAG</h2>
<p style="font-size:0.85rem;color:#64748b;margin-bottom:1.5rem;">
Ajoutez des documents de référence (PDF, CSV, Excel, TXT) pour enrichir la base de connaissances du RAG.
</p>
<!-- Zone upload -->
<div class="card" style="margin-bottom:1.5rem;">
<h3>Ajouter un référentiel</h3>
<form id="upload-form" style="display:flex;gap:0.75rem;align-items:end;flex-wrap:wrap;margin-top:0.75rem;">
<div>
<label style="display:block;font-size:0.7rem;color:#64748b;text-transform:uppercase;letter-spacing:0.05em;font-weight:600;margin-bottom:0.25rem;">Fichier</label>
<input type="file" id="file-input" name="file" accept=".pdf,.csv,.xlsx,.xls,.txt"
style="font-size:0.85rem;padding:0.35rem;">
</div>
<button type="submit" id="upload-btn"
style="padding:0.5rem 1.25rem;border-radius:6px;border:none;background:#3b82f6;color:#fff;font-size:0.85rem;font-weight:600;cursor:pointer;">
Uploader
</button>
<span id="upload-status" style="font-size:0.8rem;"></span>
</form>
<p style="font-size:0.7rem;color:#94a3b8;margin-top:0.5rem;">
Extensions : .pdf, .csv, .xlsx, .xls, .txt — Max {{ max_size }} Mo
</p>
</div>
<!-- Tableau référentiels -->
<div class="card">
<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:0.75rem;">
<h3>Référentiels indexés</h3>
<button id="rebuild-btn"
style="padding:0.35rem 0.75rem;border-radius:6px;border:1px solid #e2e8f0;background:#fff;font-size:0.75rem;cursor:pointer;">
Rebuild complet
</button>
</div>
<table>
<thead>
<tr>
<th>Nom</th>
<th>Type</th>
<th>Taille</th>
<th>Date</th>
<th>Chunks</th>
<th>Statut</th>
<th>Actions</th>
</tr>
</thead>
<tbody id="ref-table">
{% for ref in referentiels %}
<tr id="row-{{ ref.id }}">
<td>{{ ref.filename }}</td>
<td><span class="badge" style="background:#f1f5f9;color:#334155;">{{ ref.extension }}</span></td>
<td>{{ "%.1f"|format(ref.size_bytes / 1024 / 1024) }} Mo</td>
<td style="font-size:0.8rem;">{{ ref.date_added[:10] }}</td>
<td>{{ ref.chunks_count }}</td>
<td>
{% if ref.status == 'indexed' %}
<span class="badge" style="background:#dcfce7;color:#16a34a;">Indexé</span>
{% elif ref.status == 'empty' %}
<span class="badge" style="background:#fef9c3;color:#ca8a04;">Vide</span>
{% else %}
<span class="badge" style="background:#f1f5f9;color:#64748b;">Uploadé</span>
{% endif %}
</td>
<td>
<button onclick="indexRef('{{ ref.id }}')" class="action-btn"
style="padding:2px 8px;border-radius:4px;border:1px solid #3b82f6;background:#eff6ff;color:#2563eb;font-size:0.75rem;cursor:pointer;margin-right:4px;">
Indexer
</button>
<button onclick="deleteRef('{{ ref.id }}')" class="action-btn"
style="padding:2px 8px;border-radius:4px;border:1px solid #fca5a5;background:#fef2f2;color:#dc2626;font-size:0.75rem;cursor:pointer;">
Supprimer
</button>
</td>
</tr>
{% endfor %}
{% if not referentiels %}
<tr id="empty-row">
<td colspan="7" style="text-align:center;color:#94a3b8;padding:2rem;">Aucun référentiel</td>
</tr>
{% endif %}
</tbody>
</table>
</div>
<div id="global-status" style="margin-top:1rem;font-size:0.8rem;"></div>
{% endblock %}
{% block scripts %}
<script>
(function() {
const uploadForm = document.getElementById('upload-form');
const fileInput = document.getElementById('file-input');
const uploadBtn = document.getElementById('upload-btn');
const uploadStatus = document.getElementById('upload-status');
const globalStatus = document.getElementById('global-status');
const rebuildBtn = document.getElementById('rebuild-btn');
uploadForm.addEventListener('submit', function(e) {
e.preventDefault();
const file = fileInput.files[0];
if (!file) { uploadStatus.textContent = 'Sélectionnez un fichier'; return; }
const fd = new FormData();
fd.append('file', file);
uploadBtn.disabled = true;
uploadBtn.innerHTML = '<span class="spinner"></span>';
uploadStatus.textContent = '';
fetch('/admin/referentiels/upload', { method: 'POST', body: fd })
.then(r => r.json())
.then(d => {
uploadBtn.disabled = false;
uploadBtn.textContent = 'Uploader';
if (d.ok) {
uploadStatus.style.color = '#16a34a';
uploadStatus.textContent = 'Uploadé';
setTimeout(() => location.reload(), 800);
} else {
uploadStatus.style.color = '#dc2626';
uploadStatus.textContent = d.error || 'Erreur';
}
})
.catch(() => {
uploadBtn.disabled = false;
uploadBtn.textContent = 'Uploader';
uploadStatus.style.color = '#dc2626';
uploadStatus.textContent = 'Erreur réseau';
});
});
window.indexRef = function(id) {
const btn = event.target;
btn.disabled = true;
btn.innerHTML = '<span class="spinner" style="border-color:rgba(37,99,235,0.3);border-top-color:#2563eb;width:10px;height:10px;"></span>';
fetch('/admin/referentiels/' + id + '/index', { method: 'POST' })
.then(r => r.json())
.then(d => {
if (d.ok) {
globalStatus.style.color = '#16a34a';
globalStatus.textContent = d.chunks + ' chunks indexés';
setTimeout(() => location.reload(), 800);
} else {
btn.disabled = false;
btn.textContent = 'Indexer';
globalStatus.style.color = '#dc2626';
globalStatus.textContent = d.error || 'Erreur';
}
})
.catch(() => {
btn.disabled = false;
btn.textContent = 'Indexer';
globalStatus.style.color = '#dc2626';
globalStatus.textContent = 'Erreur réseau';
});
};
window.deleteRef = function(id) {
if (!confirm('Supprimer ce référentiel ?')) return;
fetch('/admin/referentiels/' + id, { method: 'DELETE' })
.then(r => r.json())
.then(d => {
if (d.ok) {
const row = document.getElementById('row-' + id);
if (row) row.remove();
globalStatus.style.color = '#16a34a';
globalStatus.textContent = 'Supprimé';
} else {
globalStatus.style.color = '#dc2626';
globalStatus.textContent = d.error || 'Erreur';
}
})
.catch(() => {
globalStatus.style.color = '#dc2626';
globalStatus.textContent = 'Erreur réseau';
});
};
rebuildBtn.addEventListener('click', function() {
if (!confirm('Reconstruire l\'index FAISS complet ? Cela peut prendre plusieurs minutes.')) return;
rebuildBtn.disabled = true;
rebuildBtn.innerHTML = '<span class="spinner" style="border-color:rgba(0,0,0,0.2);border-top-color:#333;width:10px;height:10px;"></span> Rebuild…';
fetch('/admin/referentiels/rebuild-index', { method: 'POST' })
.then(r => r.json())
.then(d => {
rebuildBtn.disabled = false;
rebuildBtn.textContent = 'Rebuild complet';
if (d.ok) {
globalStatus.style.color = '#16a34a';
globalStatus.textContent = 'Index reconstruit (' + d.reindexed + ' référentiels réindexés)';
} else {
globalStatus.style.color = '#dc2626';
globalStatus.textContent = d.error || 'Erreur';
}
})
.catch(() => {
rebuildBtn.disabled = false;
rebuildBtn.textContent = 'Rebuild complet';
globalStatus.style.color = '#dc2626';
globalStatus.textContent = 'Erreur réseau';
});
});
})();
</script>
{% endblock %}

View File

@@ -227,6 +227,12 @@
<nav class="sidebar-nav" id="sidebar-nav">
{% block sidebar %}{% endblock %}
</nav>
<div class="sidebar-admin" style="border-top:1px solid #1e293b;padding:0.5rem 1rem;">
<a href="/admin/referentiels" style="display:block;color:#94a3b8;text-decoration:none;font-size:0.8rem;padding:0.35rem 0;transition:color 0.15s;"
onmouseover="this.style.color='#e2e8f0'" onmouseout="this.style.color='#94a3b8'">
Référentiels RAG
</a>
</div>
<div class="sidebar-admin">
<label for="model-select">Modèle Ollama</label>
<select id="model-select"><option>Chargement…</option></select>