feat: architecture multi-modèles LLM + quality engine + benchmark

- Multi-modèles : 4 rôles LLM (coding=gemma3:27b-cloud, cpam=gemma3:27b-cloud,
  validation=deepseek-v3.2:cloud, qc=gemma3:12b) avec get_model(role)
- Prompts externalisés : 7 templates dans src/prompts/templates.py
- Cache Ollama : modèle stocké par entrée (migration auto ancien format)
- call_ollama() : paramètre role= (priorité: model > role > global)
- Quality engine : veto_engine + decision_engine + rules_router (YAML)
- Benchmark qualité : scripts/benchmark_quality.py (A/B, métriques CIM-10)
- Fix biologie : valeurs qualitatives (troponine négative) non filtrées
- Fix CPAM : gemma3:27b-cloud au lieu de deepseek (JSON tronqué par thinking)
- CPAM max_tokens 4000→6000, viewer admin multi-modèles
- Benchmark 10 dossiers : 100% DAS valides, 10/10 CPAM, 243s/dossier

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
dom
2026-02-20 00:21:09 +01:00
parent 5c8c2817ec
commit 909e051cc9
39 changed files with 5092 additions and 574 deletions

137
batch_50.sh Executable file
View File

@@ -0,0 +1,137 @@
#!/usr/bin/env bash
set -euo pipefail
ROOT_INPUT="${1:-input}" # ex: input
N="${2:-50}" # nb dossiers à traiter
# Options
FORCE="${FORCE:-0}" # FORCE=1 => retraiter même si output JSON existe
CLEAN="${CLEAN:-0}" # CLEAN=1 => supprime outputs du dossier avant retraitement (recommandé avec FORCE)
RANDOM_PICK="${RANDOM_PICK:-0}" # RANDOM_PICK=1 => choisir N dossiers aléatoires
MAX_PARALLEL="${OLLAMA_MAX_PARALLEL:-1}"
# Modèles
export OLLAMA_CODER_MODEL="${OLLAMA_CODER_MODEL:-gemma3:27b}"
export OLLAMA_VERIFIER_MODEL="${OLLAMA_VERIFIER_MODEL:-deepseek-v3.2:cloud}"
export OLLAMA_MAX_PARALLEL="$MAX_PARALLEL"
# Python du venv (fiable)
PY="./.venv/bin/python"
if [[ ! -x "$PY" ]]; then
echo "❌ Venv introuvable: $PY"
echo " Active ton venv ou crée-le, puis relance."
exit 1
fi
RUN_ID="$(date +%Y%m%d_%H%M%S)"
RUN_DIR="output/batch_runs/$RUN_ID"
LOG_DIR="output/batch_logs/$RUN_ID"
mkdir -p "$RUN_DIR" "$LOG_DIR"
IDS_FILE="$RUN_DIR/ids.txt"
FILES_FILE="$RUN_DIR/files.txt"
echo "=== Batch Run: $RUN_ID ===" | tee "$RUN_DIR/summary.txt"
echo "ROOT_INPUT=$ROOT_INPUT N=$N FORCE=$FORCE CLEAN=$CLEAN RANDOM_PICK=$RANDOM_PICK" | tee -a "$RUN_DIR/summary.txt"
echo "CODER=$OLLAMA_CODER_MODEL VERIFIER=$OLLAMA_VERIFIER_MODEL OLLAMA_MAX_PARALLEL=$OLLAMA_MAX_PARALLEL" | tee -a "$RUN_DIR/summary.txt"
echo | tee -a "$RUN_DIR/summary.txt"
# Liste des dossiers = sous-dossiers avec au moins 1 pdf
LIST_CMD=(find "$ROOT_INPUT" -mindepth 1 -maxdepth 1 -type d -print)
mapfile -t ALL_DIRS < <("${LIST_CMD[@]}" | while read -r d; do
compgen -G "$d/*.pdf" >/dev/null && echo "$d"
done)
if [[ "${#ALL_DIRS[@]}" -eq 0 ]]; then
echo "❌ Aucun dossier avec PDF trouvé dans: $ROOT_INPUT"
exit 1
fi
# Sélection N dossiers
if [[ "$RANDOM_PICK" == "1" ]]; then
mapfile -t DOSSIERS < <(printf "%s\n" "${ALL_DIRS[@]}" | shuf | head -n "$N")
else
mapfile -t DOSSIERS < <(printf "%s\n" "${ALL_DIRS[@]}" | sort | head -n "$N")
fi
echo "→ Dossiers sélectionnés: ${#DOSSIERS[@]}" | tee -a "$RUN_DIR/summary.txt"
# Traitement
for d in "${DOSSIERS[@]}"; do
id="$(basename "$d")"
out_json="output/structured/$id/${id}_fusionne_cim10.json"
log="$LOG_DIR/${id}.log"
# Enregistre l'ID (pour stats de fin)
echo "$id" >> "$IDS_FILE"
if [[ -f "$out_json" && "$FORCE" != "1" ]]; then
echo "⏭️ SKIP $id (déjà traité)" | tee -a "$RUN_DIR/summary.txt"
continue
fi
if [[ "$CLEAN" == "1" ]]; then
rm -rf "output/structured/$id" "output/reports/$id" "output/anonymized/$id" 2>/dev/null || true
fi
echo "▶️ START $id" | tee -a "$RUN_DIR/summary.txt"
("$PY" -m src.main "$d") 2>&1 | tee "$log"
echo "✅ DONE $id" | tee -a "$RUN_DIR/summary.txt"
done
# Construit la liste des fichiers JSON réellement présents pour ce run
: > "$FILES_FILE"
while read -r id; do
f="output/structured/$id/${id}_fusionne_cim10.json"
[[ -f "$f" ]] && echo "$f" >> "$FILES_FILE"
done < "$IDS_FILE"
COUNT_FILES=$(wc -l < "$FILES_FILE" | tr -d ' ')
echo | tee -a "$RUN_DIR/summary.txt"
echo "→ JSON trouvés pour stats: $COUNT_FILES" | tee -a "$RUN_DIR/summary.txt"
if [[ "$COUNT_FILES" -eq 0 ]]; then
echo "⚠️ Aucun JSON pour stats. Fin." | tee -a "$RUN_DIR/summary.txt"
exit 0
fi
echo | tee -a "$RUN_DIR/summary.txt"
echo "=== STATS (sur ce run uniquement) ===" | tee -a "$RUN_DIR/summary.txt"
# 1) Verdicts
echo "--- Verdicts ---" | tee -a "$RUN_DIR/summary.txt"
xargs -a "$FILES_FILE" jq -r '(.veto_report.verdict // "NO_REPORT")' \
| sort | uniq -c | sort -nr | tee -a "$RUN_DIR/summary.txt"
# 2) Top VETOs
echo | tee -a "$RUN_DIR/summary.txt"
echo "--- Top VETOs ---" | tee -a "$RUN_DIR/summary.txt"
xargs -a "$FILES_FILE" jq -r '.veto_report.issues[]?.veto' \
| sort | uniq -c | sort -nr | head -n 20 | tee -a "$RUN_DIR/summary.txt"
# 3) HARD count
echo | tee -a "$RUN_DIR/summary.txt"
echo "--- Dossiers avec HARD ---" | tee -a "$RUN_DIR/summary.txt"
while read -r f; do
id="$(basename "$f" _fusionne_cim10.json)"
hard=$(jq '[.veto_report.issues[]? | select(.severity=="HARD")] | length' "$f")
[[ "$hard" -gt 0 ]] && printf "%s\tHARD=%s\n" "$id" "$hard"
done < "$FILES_FILE" | sort -k2,2nr | tee -a "$RUN_DIR/summary.txt"
# 4) Downgrades (cim10_final != cim10_suggestion)
echo | tee -a "$RUN_DIR/summary.txt"
echo "--- Downgrades (TOP 30) ---" | tee -a "$RUN_DIR/summary.txt"
while read -r f; do
id="$(basename "$f" _fusionne_cim10.json)"
dw=$(jq '
([
(.diagnostic_principal? | select(.cim10_final? and .cim10_suggestion? and .cim10_final != .cim10_suggestion) | 1),
(.diagnostics_associes[]? | select(.cim10_final? and .cim10_suggestion? and .cim10_final != .cim10_suggestion) | 1)
] | add) // 0
' "$f")
[[ "$dw" -gt 0 ]] && printf "%s\tDOWN=%s\n" "$id" "$dw"
done < "$FILES_FILE" | sort -k2,2nr | head -n 30 | tee -a "$RUN_DIR/summary.txt"
echo | tee -a "$RUN_DIR/summary.txt"
echo "✅ Stats écrites dans: $RUN_DIR/summary.txt"
echo "📁 Logs dossier par dossier: $LOG_DIR/"