Initial commit

2026-03-05 01:20:13 +01:00
commit 93e549c061
9 changed files with 1050 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,76 @@
+# === Python ===
+__pycache__/
+*.py[cod]
+*.pyo
+*.egg-info/
+*.egg
+dist/
+build/
+*.whl
+
+# === Virtual environments ===
+.venv/
+venv/
+venv_*/
+env/
+
+# === ML Models & Data ===
+*.pt
+*.pth
+*.onnx
+*.bin
+*.safetensors
+*.h5
+*.hdf5
+*.pkl
+*.pickle
+*.npy
+*.npz
+*.faiss
+models/
+*.tar.gz
+*.zip
+
+# === Documents & Media ===
+*.pdf
+*.docx
+*.xlsx
+*.csv
+*.png
+*.jpg
+*.jpeg
+*.gif
+*.mp3
+*.wav
+*.mp4
+
+# === IDE ===
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+
+# === OS ===
+.DS_Store
+Thumbs.db
+.~lock.*
+
+# === Secrets ===
+.env
+*.env
+credentials.json
+token.pickle
+
+# === Logs & Cache ===
+*.log
+logs/
+.pytest_cache/
+.mypy_cache/
+.ruff_cache/
+htmlcov/
+.coverage
+
+# === Backups ===
+*_backup_*
+backups/
--- a/full_auto_medical_scribe.py
+++ b/full_auto_medical_scribe.py
@@ -0,0 +1,53 @@
+import os
+import sys
+import subprocess
+import time
+
+def run_full_auto(audio_file, hf_token):
+    """
+    Pipeline complet : Diarisation + Transcription -> Synthèse Médicale.
+    """
+    if not os.path.exists(audio_file):
+        print(f"Erreur : Le fichier {audio_file} n'existe pas.")
+        return
+
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    
+    # Étape 1 : Diarisation et Transcription (Pyannote + Whisper)
+    print("
+[STEP 1/2] DIARISATION & TRANSCRIPTION (Top Qualité)...")
+    env = os.environ.copy()
+    env["HF_TOKEN"] = hf_token
+    
+    diarizer_script = os.path.join(script_dir, "medical_diarizer.py")
+    subprocess.run([sys.executable, diarizer_script, audio_file], env=env, check=True)
+    
+    # Le fichier de sortie attendu du diarizer
+    transcript_file = audio_file.rsplit('.', 1)[0] + "_diarized.txt"
+    
+    if not os.path.exists(transcript_file):
+        print("Erreur : La transcription avec diarisation a échoué.")
+        return
+
+    # Étape 2 : Synthèse IA (Ollama ou OpenAI)
+    print("
+[STEP 2/2] GÉNÉRATION DE LA SYNTHÈSE MÉDICALE...")
+    summarizer_script = os.path.join(script_dir, "medical_summarizer.py")
+    subprocess.run([sys.executable, summarizer_script, transcript_file], check=True)
+    
+    summary_file = audio_file.rsplit('.', 1)[0] + "_diarized_summary.md"
+    
+    print("
+" + "="*50)
+    print("PIPELINE MÉDICAL TERMINÉ AVEC SUCCÈS")
+    print(f"Transcription structurée : {transcript_file}")
+    print(f"Synthèse médicale finale : {summary_file}")
+    print("="*50)
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python full_auto_medical_scribe.py <votre_audio.wav>")
+    else:
+        # On utilise le token que vous m'avez fourni
+        TOKEN = "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"
+        run_full_auto(sys.argv[1], TOKEN)
--- a/medical_diarizer.py
+++ b/medical_diarizer.py
@@ -0,0 +1,86 @@
+import os
+import sys
+import time
+import torch
+import gc
+from pyannote.audio import Pipeline
+from faster_whisper import WhisperModel
+import librosa
+
+def run_diarization_and_transcription(audio_file, hf_token):
+    """
+    Version 3.0 : Feedback ultra-précis pour barre de progression.
+    """
+    duration = librosa.get_duration(path=audio_file)
+    print(f"[STATUS] PROGRESS:1") 
+
+    # 1. DIARISATION
+    print("[PHASE 1/3] Diarisation (Analyse des voix)...")
+    try:
+        pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", token=hf_token)
+        pipeline.to(torch.device("cpu"))
+        raw_result = pipeline(audio_file)
+        
+        if hasattr(raw_result, 'annotation'):
+            diarization = raw_result.annotation
+        else:
+            diarization = raw_result
+        
+        del pipeline
+        gc.collect()
+    except Exception as e:
+        print(f"[ERROR] Diarisation : {e}")
+        diarization = None
+
+    print(f"[STATUS] PROGRESS:30")
+
+    # 2. TRANSCRIPTION
+    print("[PHASE 2/3] Transcription (Modèle Large-v3-Turbo)...")
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    compute_type = "int8_float16" if device == "cuda" else "int8"
+    
+    try:
+        model = WhisperModel("large-v3-turbo", device=device, compute_type=compute_type, cpu_threads=16)
+        segments, _ = model.transcribe(audio_file, beam_size=5, language="fr", word_timestamps=True)
+        
+        whisper_segments = []
+        for segment in segments:
+            # Progression dynamique : 30% à 95%
+            pct = 30 + (65 * (segment.end / duration))
+            print(f"[STATUS] PROGRESS:{int(pct)}")
+            
+            speaker = "VOIX"
+            if diarization:
+                max_overlap = 0
+                for turn, _, speaker_id in diarization.itertracks(yield_label=True):
+                    overlap = min(segment.end, turn.end) - max(segment.start, turn.start)
+                    if overlap > max_overlap:
+                        max_overlap = overlap
+                        speaker = speaker_id
+            
+            timestamp = f"[{time.strftime('%H:%M:%S', time.gmtime(segment.start))}]"
+            line = f"{timestamp} {speaker}: {segment.text.strip()}"
+            print(line)
+            whisper_segments.append(line)
+
+        del model
+        gc.collect()
+        if torch.cuda.is_available(): torch.cuda.empty_cache()
+
+    except Exception as e:
+        print(f"[ERROR] Transcription : {e}")
+        return None
+
+    print(f"[STATUS] PROGRESS:100")
+    return "\n".join(whisper_segments)
+
+if __name__ == "__main__":
+    hf_token = os.getenv("HF_TOKEN")
+    if len(sys.argv) > 1:
+        audio_file = sys.argv[1]
+        result = run_diarization_and_transcription(audio_file, hf_token)
+        if result:
+            output_file = audio_file.rsplit('.', 1)[0] + "_diarized.txt"
+            with open(output_file, "w", encoding="utf-8") as f:
+                f.write(result)
+            print(f"\n[OK] Fini : {output_file}")
--- a/medical_scribe_gui.py
+++ b/medical_scribe_gui.py
@@ -0,0 +1,201 @@
+import os
+import sys
+import tkinter as tk
+from tkinter import filedialog, messagebox, ttk
+import threading
+import subprocess
+import time
+
+# Token HF en dur (depuis votre message)
+HF_TOKEN = "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"
+
+class MedicalScribeGUI:
+    def __init__(self, root):
+        self.root = root
+        self.root.title("Medical AI Scribe - v1.0")
+        self.root.geometry("850x750")
+        self.root.configure(bg="#f8f9fa")
+
+        # Variables
+        self.audio_path = tk.StringVar()
+        self.selected_model = tk.StringVar()
+        self.status_var = tk.StringVar(value="Système prêt.")
+
+        self.setup_ui()
+        # Charger les modèles Ollama au démarrage de manière asynchrone
+        threading.Thread(target=self.load_ollama_models, daemon=True).start()
+
+    def setup_ui(self):
+        main_frame = tk.Frame(self.root, bg="#f8f9fa", padx=20, pady=20)
+        main_frame.pack(fill=tk.BOTH, expand=True)
+
+        # 1. Sélection du fichier
+        tk.Label(main_frame, text="Fichier Audio :", bg="#f8f9fa", font=("Arial", 10, "bold")).pack(anchor="w")
+        file_frame = tk.Frame(main_frame, bg="#f8f9fa")
+        file_frame.pack(fill="x", pady=(5, 15))
+        tk.Entry(file_frame, textvariable=self.audio_path, font=("Arial", 10), width=80).pack(side="left", padx=(0, 10))
+        tk.Button(file_frame, text="Parcourir", command=self.browse_file, bg="#dee2e6").pack(side="left")
+
+        # 2. Sélection du modèle
+        tk.Label(main_frame, text="Modèle Ollama pour la synthèse :", bg="#f8f9fa", font=("Arial", 10, "bold")).pack(anchor="w")
+        # On permet d'écrire le nom du modèle s'il n'est pas dans la liste
+        self.model_combo = ttk.Combobox(main_frame, textvariable=self.selected_model, width=50, font=("Arial", 10))
+        self.model_combo['values'] = ["Chargement des modèles..."]
+        self.model_combo.set("gpt-oss:120b-cloud")
+        self.model_combo.pack(anchor="w", pady=(5, 15))
+
+        # 3. Édition du Prompt
+        tk.Label(main_frame, text="Prompt pour la synthèse médicale :", bg="#f8f9fa", font=("Arial", 10, "bold")).pack(anchor="w")
+        self.prompt_text = tk.Text(main_frame, height=10, font=("Arial", 10), padx=5, pady=5)
+        self.prompt_text.pack(fill="x", pady=(5, 15))
+        
+        default_prompt = (
+            "Tu es un expert médical assistant. Tu dois analyser la transcription d'une réunion médicale.\n"
+            "Ta mission est de produire une synthèse structurée incluant :\n"
+            "1. Objet de la réunion / Motif de consultation.\n"
+            "2. Éléments clés de la discussion (Symptômes, antécédents, examens évoqués).\n"
+            "3. Décisions prises ou Diagnostic provisoire.\n"
+            "4. Plan d'action (Traitements, examens complémentaires, prochain RDV).\n\n"
+            "Règle : Terminologie médicale précise et style synthétique."
+        )
+        self.prompt_text.insert("1.0", default_prompt)
+
+        # 4. Bouton de lancement
+        self.run_btn = tk.Button(main_frame, text="LANCER LE PIPELINE COMPLET", 
+                                 bg="#28a745", fg="white", font=("Arial", 12, "bold"),
+                                 relief=tk.FLAT, pady=12, command=self.start_pipeline_thread)
+        self.run_btn.pack(fill="x", pady=10)
+
+        # 5. Zone de Logs
+        tk.Label(main_frame, text="Logs de traitement :", bg="#f8f9fa", font=("Arial", 10, "bold")).pack(anchor="w")
+        self.log_area = tk.Text(main_frame, height=12, bg="#1e1e1e", fg="#00ff00", font=("Consolas", 9), padx=10, pady=10)
+        self.log_area.pack(fill=tk.BOTH, expand=True, pady=5)
+
+        # Status bar
+        tk.Label(self.root, textvariable=self.status_var, bd=1, relief=tk.SUNKEN, anchor="w", bg="#e9ecef").pack(side=tk.BOTTOM, fill=tk.X)
+
+    def browse_file(self):
+        filename = filedialog.askopenfilename(filetypes=[("Audio files", "*.wav *.mp3 *.m4a *.flac *.ogg")])
+        if filename:
+            self.audio_path.set(filename)
+
+    def load_ollama_models(self):
+        """Tente de lister les modèles Ollama de manière robuste."""
+        try:
+            import ollama
+            print("[DEBUG] Connexion à Ollama...")
+            response = ollama.list()
+            # On essaie d'extraire les noms de différentes manières selon la version de l'API
+            models = []
+            if hasattr(response, 'models'):
+                models = response.models
+            elif isinstance(response, dict):
+                models = response.get('models', [])
+            
+            model_names = []
+            for m in models:
+                if hasattr(m, 'model'): # Nouveau format
+                    model_names.append(m.model)
+                elif isinstance(m, dict):
+                    name = m.get('model') or m.get('name')
+                    if name: model_names.append(name)
+            
+            if model_names:
+                self.model_combo['values'] = model_names
+                if "gpt-oss:120b-cloud" in model_names:
+                    self.selected_model.set("gpt-oss:120b-cloud")
+                elif "gpt-oss:latest" in model_names:
+                    self.selected_model.set("gpt-oss:latest")
+                print(f"[DEBUG] {len(model_names)} modèles trouvés.")
+            else:
+                self.model_combo['values'] = ["gpt-oss:120b-cloud", "llama3.3:70b"]
+        except Exception as e:
+            print(f"[DEBUG] Erreur Ollama : {e}")
+            self.model_combo['values'] = ["gpt-oss:120b-cloud", "llama3.3:70b"]
+
+    def log(self, message):
+        self.log_area.insert(tk.END, message + "\n")
+        self.log_area.see(tk.END)
+        self.root.update_idletasks()
+
+    def start_pipeline_thread(self):
+        if not self.audio_path.get():
+            messagebox.showwarning("Attention", "Veuillez d'abord sélectionner un fichier audio.")
+            return
+        
+        self.run_btn.config(state="disabled", bg="#6c757d")
+        self.log_area.delete("1.0", tk.END)
+        self.status_var.set("Traitement en cours... (Transcription + Diarisation)")
+        
+        threading.Thread(target=self.run_pipeline, daemon=True).start()
+
+    def run_pipeline(self):
+        audio_file = self.audio_path.get()
+        model_name = self.selected_model.get()
+        custom_prompt = self.prompt_text.get("1.0", tk.END).strip()
+        
+        try:
+            # Étape 1 : Diarisation et Transcription
+            self.log("--- ÉTAPE 1 : DIARISATION & TRANSCRIPTION WHISPER LARGE-V3 ---")
+            self.log(f"Fichier cible : {os.path.basename(audio_file)}")
+            
+            script_dir = os.path.dirname(os.path.abspath(__file__))
+            env = os.environ.copy()
+            env["HF_TOKEN"] = HF_TOKEN
+            
+            p = subprocess.Popen(
+                [sys.executable, os.path.join(script_dir, "medical_diarizer.py"), audio_file],
+                env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
+            )
+            
+            for line in p.stdout:
+                self.log(line.strip())
+            p.wait()
+
+            transcript_file = audio_file.rsplit('.', 1)[0] + "_diarized.txt"
+            if not os.path.exists(transcript_file):
+                raise Exception("Échec de la transcription : le fichier texte n'a pas été généré.")
+
+            # Étape 2 : Synthèse IA
+            self.log(f"\n--- ÉTAPE 2 : GÉNÉRATION SYNTHÈSE AVEC {model_name} ---")
+            self.status_var.set(f"Analyse par l'IA ({model_name})...")
+            
+            with open(transcript_file, "r", encoding="utf-8") as f:
+                content = f.read()
+
+            import ollama
+            response = ollama.chat(
+                model=model_name,
+                messages=[
+                    {"role": "system", "content": custom_prompt},
+                    {"role": "user", "content": f"Voici le transcript à analyser :\n\n{content}"}
+                ]
+            )
+            
+            summary = response['message']['content']
+            summary_file = audio_file.rsplit('.', 1)[0] + "_summary.md"
+            
+            with open(summary_file, "w", encoding="utf-8") as f:
+                f.write(summary)
+
+            self.log("\n" + "="*50)
+            self.log("PIPELINE MÉDICAL TERMINÉ !")
+            self.log(f"Transcript structuré : {os.path.basename(transcript_file)}")
+            self.log(f"Synthèse sauvegardée  : {os.path.basename(summary_file)}")
+            self.log("="*50)
+            
+            self.status_var.set("Terminé avec succès.")
+            messagebox.showinfo("Succès", "Traitement terminé avec succès !")
+
+        except Exception as e:
+            self.log(f"\n[ERREUR] {str(e)}")
+            self.status_var.set("Erreur durant le traitement.")
+            messagebox.showerror("Erreur de Pipeline", str(e))
+        
+        finally:
+            self.run_btn.config(state="normal", bg="#28a745")
+
+if __name__ == "__main__":
+    root = tk.Tk()
+    app = MedicalScribeGUI(root)
+    root.mainloop()
--- a/medical_scribe_gui_v2.py
+++ b/medical_scribe_gui_v2.py
@@ -0,0 +1,224 @@
+import os
+import sys
+import tkinter as tk
+from tkinter import filedialog, messagebox, ttk
+import threading
+import subprocess
+import time
+import ollama
+
+# Token HF
+HF_TOKEN = "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"
+
+# Dictionnaire de Prompts
+PROMPT_TEMPLATES = {
+    "Consultation Standard": """Tu es un expert médical assistant. Analyse cette consultation et produis une synthèse incluant :
+1. Motif de consultation.
+2. Symptômes et antécédents.
+3. Examen clinique réalisé.
+4. Diagnostic et Plan thérapeutique (ordonnance, conseils).""",
+
+    "Compte-rendu Opératoire": """Analyse cette réunion chirurgicale. Produis un compte-rendu incluant :
+1. Type d'intervention et indication.
+2. Déroulement technique étape par étape.
+3. Matériel utilisé et incidents éventuels.
+4. Suites opératoires immédiates prévues.""",
+
+    "Réunion d'Équipe (Staff)": """Synthétise cette réunion de service médical :
+1. Liste des patients discutés.
+2. Décisions collégiales prises pour chaque cas.
+3. Tâches assignées aux différents membres de l'équipe.""",
+
+    "Lettre au Confrère": """Rédige une lettre de liaison médicale professionnelle adressée à un confrère à partir de ce transcript.
+La lettre doit être formelle, concise et inclure tous les éléments clés de la consultation.""",
+
+    "Prompt Personnalisé": "Tapez vos propres instructions ici..."
+}
+
+class MedicalScribeGUIv2:
+    def __init__(self, root):
+        self.root = root
+        self.root.title("Medical AI Scribe v2.0 - Turbo Edition")
+        self.root.geometry("950x850")
+        self.root.configure(bg="#f4f7f6")
+
+        # Variables
+        self.audio_path = tk.StringVar()
+        self.selected_model = tk.StringVar()
+        self.selected_template = tk.StringVar(value="Consultation Standard")
+        self.status_var = tk.StringVar(value="Prêt.")
+        self.progress_val = tk.DoubleVar(value=0)
+        
+        self.current_process = None
+
+        self.setup_ui()
+        # Charger modèles en arrière-plan
+        threading.Thread(target=self.load_ollama_models, daemon=True).start()
+
+    def setup_ui(self):
+        main_frame = tk.Frame(self.root, bg="#f4f7f6", padx=20, pady=20)
+        main_frame.pack(fill=tk.BOTH, expand=True)
+
+        # Header
+        header = tk.Label(main_frame, text="Medical AI Scribe v2.0", font=("Helvetica", 18, "bold"), bg="#f4f7f6", fg="#2c3e50")
+        header.pack(pady=(0, 20))
+
+        # 1. Fichier
+        tk.Label(main_frame, text="Fichier Audio :", font=("Helvetica", 10, "bold"), bg="#f4f7f6").pack(anchor="w")
+        file_frame = tk.Frame(main_frame, bg="#f4f7f6")
+        file_frame.pack(fill="x", pady=(5, 15))
+        tk.Entry(file_frame, textvariable=self.audio_path, font=("Arial", 10), width=85).pack(side="left", padx=(0, 10))
+        tk.Button(file_frame, text="Parcourir", command=self.browse_file, bg="#3498db", fg="white", relief=tk.FLAT).pack(side="left")
+
+        # 2. Modèle & Template
+        row2 = tk.Frame(main_frame, bg="#f4f7f6")
+        row2.pack(fill="x", pady=(5, 15))
+        
+        col_model = tk.Frame(row2, bg="#f4f7f6")
+        col_model.pack(side="left", fill="x", expand=True)
+        tk.Label(col_model, text="Modèle Ollama :", font=("Helvetica", 10, "bold"), bg="#f4f7f6").pack(anchor="w")
+        self.model_combo = ttk.Combobox(col_model, textvariable=self.selected_model, width=35)
+        self.model_combo.pack(anchor="w", pady=5)
+        self.model_combo.set("gpt-oss:120b-cloud")
+
+        col_template = tk.Frame(row2, bg="#f4f7f6")
+        col_template.pack(side="right", fill="x", expand=True)
+        tk.Label(col_template, text="Dictionnaire de Prompts :", font=("Helvetica", 10, "bold"), bg="#f4f7f6").pack(anchor="w")
+        self.template_combo = ttk.Combobox(col_template, textvariable=self.selected_template, width=35, state="readonly")
+        self.template_combo['values'] = list(PROMPT_TEMPLATES.keys())
+        self.template_combo.pack(anchor="w", pady=5)
+        self.template_combo.bind("<<ComboboxSelected>>", self.on_template_change)
+
+        # 3. Prompt Editor
+        tk.Label(main_frame, text="Instructions de Synthèse (Prompt) :", font=("Helvetica", 10, "bold"), bg="#f4f7f6").pack(anchor="w")
+        self.prompt_text = tk.Text(main_frame, height=8, font=("Arial", 10), padx=10, pady=10)
+        self.prompt_text.pack(fill="x", pady=(5, 15))
+        self.prompt_text.insert("1.0", PROMPT_TEMPLATES["Consultation Standard"])
+
+        # 4. Progress Bar
+        tk.Label(main_frame, text="Progression :", font=("Helvetica", 9), bg="#f4f7f6").pack(anchor="w")
+        self.progress_bar = ttk.Progressbar(main_frame, variable=self.progress_val, maximum=100, mode='determinate')
+        self.progress_bar.pack(fill="x", pady=(5, 2))
+        self.status_label = tk.Label(main_frame, textvariable=self.status_var, bg="#f4f7f6", font=("Arial", 9, "italic"))
+        self.status_label.pack(pady=(0, 10))
+
+        # 5. Boutons Actions
+        btn_frame = tk.Frame(main_frame, bg="#f4f7f6")
+        btn_frame.pack(fill="x", pady=10)
+        self.run_btn = tk.Button(btn_frame, text="LANCER LE TRAITEMENT", bg="#27ae60", fg="white", 
+                                font=("Helvetica", 12, "bold"), relief=tk.FLAT, pady=10, command=self.start_pipeline)
+        self.run_btn.pack(side="left", fill="x", expand=True, padx=(0, 10))
+        self.stop_btn = tk.Button(btn_frame, text="STOP", bg="#e74c3c", fg="white", 
+                                 font=("Helvetica", 12, "bold"), relief=tk.FLAT, pady=10, state="disabled", width=15, command=self.stop_pipeline)
+        self.stop_btn.pack(side="right")
+
+        # 6. Logs
+        tk.Label(main_frame, text="Flux de Transcription (Live) :", font=("Helvetica", 10, "bold"), bg="#f4f7f6").pack(anchor="w")
+        self.log_area = tk.Text(main_frame, height=15, bg="#1e1e1e", fg="#00ff00", font=("Consolas", 9), padx=10, pady=10)
+        self.log_area.pack(fill=tk.BOTH, expand=True, pady=5)
+
+    def on_template_change(self, event):
+        template = self.selected_template.get()
+        self.prompt_text.delete("1.0", tk.END)
+        self.prompt_text.insert("1.0", PROMPT_TEMPLATES[template])
+
+    def browse_file(self):
+        fn = filedialog.askopenfilename(filetypes=[("Audio", "*.wav *.mp3 *.m4a *.flac")])
+        if fn: self.audio_path.set(fn)
+
+    def load_ollama_models(self):
+        try:
+            import ollama
+            resp = ollama.list()
+            models = getattr(resp, 'models', []) if hasattr(resp, 'models') else resp.get('models', [])
+            names = []
+            for m in models:
+                name = getattr(m, 'model', None) or (m.get('model') if isinstance(m, dict) else None)
+                if name: names.append(name)
+            if names:
+                self.model_combo['values'] = names
+                if "gpt-oss:120b-cloud" in names: self.selected_model.set("gpt-oss:120b-cloud")
+        except: pass
+
+    def log(self, msg):
+        self.log_area.insert(tk.END, msg + "\n")
+        self.log_area.see(tk.END)
+        self.root.update_idletasks()
+
+    def stop_pipeline(self):
+        if self.current_process:
+            self.current_process.terminate()
+            self.log("\n[STOP] Arrêt demandé.")
+            self.status_var.set("Interrompu.")
+            self.run_btn.config(state="normal", bg="#27ae60")
+            self.stop_btn.config(state="disabled")
+
+    def start_pipeline(self):
+        if not self.audio_path.get(): return
+        self.run_btn.config(state="disabled", bg="#95a5a6")
+        self.stop_btn.config(state="normal")
+        self.log_area.delete("1.0", tk.END)
+        self.progress_val.set(0)
+        threading.Thread(target=self.run_worker, daemon=True).start()
+
+    def run_worker(self):
+        audio = self.audio_path.get()
+        model_name = self.selected_model.get()
+        prompt = self.prompt_text.get("1.0", tk.END).strip()
+        
+        try:
+            self.log("--- INITIALISATION DU MOTEUR TURBO ---")
+            script_dir = os.path.dirname(os.path.abspath(__file__))
+            env = os.environ.copy()
+            env["HF_TOKEN"] = HF_TOKEN
+            
+            self.current_process = subprocess.Popen(
+                [sys.executable, os.path.join(script_dir, "medical_diarizer.py"), audio],
+                env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
+            )
+            
+            for line in self.current_process.stdout:
+                line = line.strip()
+                if line.startswith("[STATUS] PROGRESS:"):
+                    try:
+                        val = int(line.split(":")[1])
+                        self.progress_val.set(val)
+                        self.status_var.set(f"Analyse en cours : {val}%")
+                    except: pass
+                else:
+                    self.log(line)
+            
+            self.current_process.wait()
+            
+            transcript_file = audio.rsplit('.', 1)[0] + "_diarized.txt"
+            if not os.path.exists(transcript_file): raise Exception("Échec de la transcription.")
+
+            # Synthèse
+            self.status_var.set("Génération du compte-rendu par l'IA...")
+            self.log("\n--- GÉNÉRATION DE LA SYNTHÈSE MÉDICALE ---")
+            import ollama
+            with open(transcript_file, "r") as f: content = f.read()
+            
+            resp = ollama.chat(model=model_name, messages=[
+                {"role": "system", "content": prompt},
+                {"role": "user", "content": content}
+            ])
+            
+            out = audio.rsplit('.', 1)[0] + "_summary_v2.md"
+            with open(out, "w") as f: f.write(resp['message']['content'])
+            
+            self.log(f"\n[SUCCÈS] Synthèse enregistrée : {os.path.basename(out)}")
+            self.status_var.set("Traitement terminé.")
+            messagebox.showinfo("Succès", f"Compte-rendu généré :\n{os.path.basename(out)}")
+            
+        except Exception as e:
+            self.log(f"\n[ERREUR] {e}")
+            self.status_var.set("Erreur de traitement.")
+        finally:
+            self.run_btn.config(state="normal", bg="#27ae60")
+            self.stop_btn.config(state="disabled")
+
+if __name__ == "__main__":
+    root = tk.Tk()
+    MedicalScribeGUIv2(root)
+    root.mainloop()
--- a/medical_scribe_gui_v3.py
+++ b/medical_scribe_gui_v3.py
@@ -0,0 +1,235 @@
+import os
+import sys
+import tkinter as tk
+from tkinter import filedialog, messagebox, ttk
+import threading
+import subprocess
+import time
+import ollama
+from fpdf import FPDF
+
+# Token HF
+HF_TOKEN = "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"
+
+# Dictionnaire de Prompts Enrichi
+PROMPT_TEMPLATES = {
+    "Contrôle T2A / Codage CIM-10": """Tu es un médecin DIM expert. Analyse cette réunion sur le codage CIM-10 et produis :
+1. Un tableau des phases (Avant/Pendant/Après contrôle).
+2. Une check-list des points d'attention (DP, DAS, Actes).
+3. Un modèle de fiche de synthèse pratique.
+Sois extrêmement rigoureux sur la terminologie PMSI/T2A.""",
+
+    "Consultation Standard": """Analyse cette consultation médicale et produis une synthèse :
+1. Motif et symptômes.
+2. Antécédents et Examen clinique.
+3. Diagnostic et Plan thérapeutique (ordonnance).""",
+
+    "Compte-rendu Opératoire": """Produis un compte-rendu chirurgical structuré :
+1. Indication et type d'intervention.
+2. Description technique détaillée.
+3. Matériel et incidents.
+4. Suites prévues.""",
+
+    "Lettre au Confrère": """Rédige une lettre de liaison formelle adressée à un confrère à partir de ce transcript.
+Format professionnel avec en-tête et conclusion standard.""",
+
+    "Prompt Personnalisé": "Tapez vos propres instructions ici..."
+}
+
+class PDF(FPDF):
+    def header(self):
+        self.set_font('Arial', 'B', 15)
+        self.cell(0, 10, 'Compte-rendu Médical IA', 0, 1, 'C')
+        self.ln(5)
+
+class MedicalScribeGUIv3:
+    def __init__(self, root):
+        self.root = root
+        self.root.title("Medical AI Scribe v3.0 - Expert Edition")
+        self.root.geometry("1000x900")
+        self.root.configure(bg="#eceff1")
+
+        # Variables
+        self.audio_path = tk.StringVar()
+        self.selected_model = tk.StringVar()
+        self.selected_template = tk.StringVar(value="Contrôle T2A / Codage CIM-10")
+        self.status_var = tk.StringVar(value="Prêt.")
+        self.progress_val = tk.DoubleVar(value=0)
+        self.last_summary_path = None
+        
+        self.current_process = None
+
+        self.setup_ui()
+        threading.Thread(target=self.load_ollama_models, daemon=True).start()
+
+    def setup_ui(self):
+        main_frame = tk.Frame(self.root, bg="#eceff1", padx=25, pady=25)
+        main_frame.pack(fill=tk.BOTH, expand=True)
+
+        # Titre
+        tk.Label(main_frame, text="MEDICAL AI SCRIBE EXPERT", font=("Segoe UI", 20, "bold"), bg="#eceff1", fg="#263238").pack(pady=(0, 20))
+
+        # Zone Fichier
+        f_frame = tk.LabelFrame(main_frame, text=" 1. Source Audio ", font=("Arial", 10, "bold"), bg="#eceff1", padx=10, pady=10)
+        f_frame.pack(fill="x", pady=10)
+        tk.Entry(f_frame, textvariable=self.audio_path, font=("Arial", 10), width=90).pack(side="left", padx=5)
+        tk.Button(f_frame, text="Parcourir", command=self.browse_file, bg="#607d8b", fg="white").pack(side="left", padx=5)
+
+        # Zone Configuration
+        c_frame = tk.LabelFrame(main_frame, text=" 2. Intelligence & Format ", font=("Arial", 10, "bold"), bg="#eceff1", padx=10, pady=10)
+        c_frame.pack(fill="x", pady=10)
+        
+        tk.Label(c_frame, text="Modèle Ollama:", bg="#eceff1").grid(row=0, column=0, sticky="w")
+        self.model_combo = ttk.Combobox(c_frame, textvariable=self.selected_model, width=40)
+        self.model_combo.grid(row=0, column=1, padx=10, pady=5, sticky="w")
+        self.model_combo.set("gpt-oss:120b-cloud")
+
+        tk.Label(c_frame, text="Type de document:", bg="#eceff1").grid(row=1, column=0, sticky="w")
+        self.template_combo = ttk.Combobox(c_frame, textvariable=self.selected_template, width=40, state="readonly")
+        self.template_combo['values'] = list(PROMPT_TEMPLATES.keys())
+        self.template_combo.grid(row=1, column=1, padx=10, pady=5, sticky="w")
+        self.template_combo.bind("<<ComboboxSelected>>", self.on_template_change)
+
+        # Prompt
+        tk.Label(main_frame, text="Instructions de synthèse:", font=("Arial", 10, "bold"), bg="#eceff1").pack(anchor="w", pady=(10, 0))
+        self.prompt_text = tk.Text(main_frame, height=6, font=("Arial", 10), padx=10, pady=10)
+        self.prompt_text.pack(fill="x", pady=5)
+        self.prompt_text.insert("1.0", PROMPT_TEMPLATES["Contrôle T2A / Codage CIM-10"])
+
+        # Progression
+        tk.Label(main_frame, text="Progression du traitement:", font=("Arial", 9), bg="#eceff1").pack(anchor="w", pady=(10, 0))
+        self.progress_bar = ttk.Progressbar(main_frame, variable=self.progress_val, maximum=100, mode='determinate')
+        self.progress_bar.pack(fill="x", pady=5)
+        self.status_label = tk.Label(main_frame, textvariable=self.status_var, font=("Arial", 10, "bold"), bg="#eceff1", fg="#1565c0")
+        self.status_label.pack()
+
+        # Boutons
+        btn_frame = tk.Frame(main_frame, bg="#eceff1")
+        btn_frame.pack(fill="x", pady=20)
+        
+        self.run_btn = tk.Button(btn_frame, text="LANCER LE PIPELINE", bg="#2e7d32", fg="white", font=("Arial", 12, "bold"), 
+                                width=25, pady=10, command=self.start_pipeline)
+        self.run_btn.pack(side="left", padx=5)
+        
+        self.stop_btn = tk.Button(btn_frame, text="STOP", bg="#c62828", fg="white", font=("Arial", 12, "bold"), 
+                                 width=10, pady=10, state="disabled", command=self.stop_pipeline)
+        self.stop_btn.pack(side="left", padx=5)
+
+        self.pdf_btn = tk.Button(btn_frame, text="EXPORTER PDF", bg="#f57c00", fg="white", font=("Arial", 12, "bold"), 
+                                width=15, pady=10, state="disabled", command=self.export_pdf)
+        self.pdf_btn.pack(side="right", padx=5)
+
+        # Logs
+        self.log_area = tk.Text(main_frame, height=12, bg="#212121", fg="#76ff03", font=("Consolas", 9), padx=10, pady=10)
+        self.log_area.pack(fill=tk.BOTH, expand=True)
+
+    def on_template_change(self, event):
+        self.prompt_text.delete("1.0", tk.END)
+        self.prompt_text.insert("1.0", PROMPT_TEMPLATES[self.selected_template.get()])
+
+    def browse_file(self):
+        fn = filedialog.askopenfilename(filetypes=[("Audio", "*.wav *.mp3 *.m4a *.flac")])
+        if fn: self.audio_path.set(fn)
+
+    def load_ollama_models(self):
+        try:
+            resp = ollama.list()
+            models = getattr(resp, 'models', []) if hasattr(resp, 'models') else resp.get('models', [])
+            names = [m.model if hasattr(m, 'model') else m.get('model', 'Unknown') for m in models]
+            if names: self.model_combo['values'] = names
+        except: pass
+
+    def log(self, msg):
+        self.log_area.insert(tk.END, str(msg) + "\n")
+        self.log_area.see(tk.END)
+        self.root.update_idletasks()
+
+    def start_pipeline(self):
+        if not self.audio_path.get(): return
+        self.run_btn.config(state="disabled")
+        self.stop_btn.config(state="normal")
+        self.pdf_btn.config(state="disabled")
+        self.log_area.delete("1.0", tk.END)
+        threading.Thread(target=self.run_worker, daemon=True).start()
+
+    def run_worker(self):
+        audio = self.audio_path.get()
+        model_name = self.selected_model.get()
+        prompt = self.prompt_text.get("1.0", tk.END).strip()
+        
+        try:
+            self.log("--- DÉMARRAGE DU MOTEUR EXPERT ---")
+            script_dir = os.path.dirname(os.path.abspath(__file__))
+            env = os.environ.copy()
+            env["HF_TOKEN"] = HF_TOKEN
+            
+            self.current_process = subprocess.Popen(
+                [sys.executable, os.path.join(script_dir, "medical_diarizer.py"), audio],
+                env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1
+            )
+            
+            for line in self.current_process.stdout:
+                line = line.strip()
+                if line.startswith("[STATUS] PROGRESS:"):
+                    try:
+                        val = int(line.split(":")[1])
+                        self.progress_val.set(val)
+                        self.status_var.set(f"Analyse en cours : {val}%")
+                    except: pass
+                else:
+                    self.log(line)
+            
+            self.current_process.wait()
+            transcript_file = audio.rsplit('.', 1)[0] + "_diarized.txt"
+            if not os.path.exists(transcript_file): raise Exception("Échec moteur.")
+
+            self.status_var.set("Génération de la synthèse IA...")
+            self.log("\n--- GÉNÉRATION DE LA SYNTHÈSE MÉDICALE ---")
+            import ollama
+            with open(transcript_file, "r") as f: content = f.read()
+            resp = ollama.chat(model=model_name, messages=[
+                {"role": "system", "content": prompt}, {"role": "user", "content": content}
+            ])
+            
+            self.last_summary_path = audio.rsplit('.', 1)[0] + "_final_summary.md"
+            with open(self.last_summary_path, "w") as f: f.write(resp['message']['content'])
+            
+            self.log("\n[OK] Synthèse générée avec succès.")
+            self.status_var.set("Terminé. Prêt pour export PDF.")
+            self.pdf_btn.config(state="normal")
+            messagebox.showinfo("Succès", "Traitement terminé !")
+            
+        except Exception as e:
+            self.log(f"\n[ERREUR] {e}")
+        finally:
+            self.run_btn.config(state="normal")
+            self.stop_btn.config(state="disabled")
+
+    def stop_pipeline(self):
+        if self.current_process:
+            self.current_process.terminate()
+            self.log("\n[STOP] Abandon.")
+            self.status_var.set("Prêt.")
+
+    def export_pdf(self):
+        if not self.last_summary_path or not os.path.exists(self.last_summary_path): return
+        try:
+            pdf = PDF()
+            pdf.add_page()
+            pdf.set_font("Arial", size=11)
+            with open(self.last_summary_path, 'r', encoding='utf-8') as f:
+                for line in f:
+                    clean_line = line.encode('latin-1', 'replace').decode('latin-1')
+                    pdf.multi_cell(0, 10, clean_line)
+            
+            pdf_path = self.last_summary_path.replace(".md", ".pdf")
+            pdf.output(pdf_path)
+            os.system(f"xdg-open '{pdf_path}'")
+            messagebox.showinfo("PDF", f"PDF généré et ouvert :\n{os.path.basename(pdf_path)}")
+        except Exception as e:
+            messagebox.showerror("Erreur PDF", str(e))
+
+if __name__ == "__main__":
+    root = tk.Tk()
+    MedicalScribeGUIv3(root)
+    root.mainloop()
--- a/medical_summarizer.py
+++ b/medical_summarizer.py
@@ -0,0 +1,73 @@
+import os
+import sys
+import ollama
+from openai import OpenAI
+
+# Configuration
+OLLAMA_MODEL = "llama3:8b" # Modèle performant et léger
+# Si vous mettez une clé OpenAI, elle sera utilisée
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+
+PROMPT_SYSTEM = """
+Tu es un expert médical assistant. Tu dois analyser la transcription d'une réunion médicale ou d'une consultation.
+Ta mission est de produire une synthèse structurée incluant :
+1. Objet de la réunion / Motif de consultation.
+2. Éléments clés de la discussion (Symptômes, antécédents, examens évoqués).
+3. Décisions prises ou Diagnostic provisoire.
+4. Plan d'action (Traitements prescrits, examens complémentaires, prochain rendez-vous).
+
+Règle d'or : Sois précis, utilise la terminologie médicale correcte, et sois synthétique.
+"""
+
+def summarize_transcript(transcript, use_openai=False):
+    """
+    Génère une synthèse médicale du transcript fourni.
+    """
+    if use_openai and OPENAI_API_KEY:
+        print("Utilisation d'OpenAI pour la synthèse...")
+        client = OpenAI(api_key=OPENAI_API_KEY)
+        response = client.chat.completions.create(
+            model="gpt-4o",
+            messages=[
+                {"role": "system", "content": PROMPT_SYSTEM},
+                {"role": "user", "content": f"Voici le transcript à analyser :
+
+{transcript}"}
+            ]
+        )
+        return response.choices[0].message.content
+    else:
+        print(f"Utilisation d'Ollama ({OLLAMA_MODEL}) pour la synthèse...")
+        try:
+            response = ollama.chat(
+                model=OLLAMA_MODEL,
+                messages=[
+                    {"role": "system", "content": PROMPT_SYSTEM},
+                    {"role": "user", "content": f"Voici le transcript à analyser :
+
+{transcript}"}
+                ]
+            )
+            return response['message']['content']
+        except Exception as e:
+            return f"Erreur avec Ollama : {e}. Assurez-vous qu'Ollama est lancé et que le modèle {OLLAMA_MODEL} est téléchargé."
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python medical_summarizer.py <transcript_file.txt>")
+    else:
+        file_path = sys.argv[1]
+        with open(file_path, "r", encoding="utf-8") as f:
+            content = f.read()
+        
+        summary = summarize_transcript(content)
+        
+        output_file = file_path.rsplit('.', 1)[0] + "_summary.md"
+        with open(output_file, "w", encoding="utf-8") as f:
+            f.write(summary)
+        
+        print(f"Synthèse sauvegardée dans : {output_file}")
+        print("
+--- SYNTHÈSE MÉDICALE ---
+")
+        print(summary)
--- a/medical_transcriber.py
+++ b/medical_transcriber.py
@@ -0,0 +1,59 @@
+import os
+import sys
+import time
+from faster_whisper import WhisperModel
+
+def transcribe_audio(file_path, model_size="large-v3", device="cuda", compute_type="float16"):
+    """
+    Transcrit un fichier audio avec une précision maximale et optimisation GPU.
+    """
+    if not os.path.exists(file_path):
+        print(f"Erreur : Le fichier {file_path} n'existe pas.")
+        return None
+
+    print(f"Chargement du modèle {model_size} sur {device}...")
+    # Initialisation du modèle avec optimisation CTranslate2
+    model = WhisperModel(model_size, device=device, compute_type=compute_type)
+
+    print(f"Transcription en cours : {file_path}")
+    start_time = time.time()
+
+    # Transcription avec paramètres optimisés pour la précision médicale
+    # beam_size=5 : meilleure recherche de mots
+    # language="fr" : force le français pour éviter les erreurs de détection
+    segments, info = model.transcribe(
+        file_path, 
+        beam_size=5, 
+        language="fr", 
+        condition_on_previous_text=True,
+        vad_filter=True, # Supprime les silences pour éviter les hallucinations
+        vad_parameters=dict(min_silence_duration_ms=500)
+    )
+
+    print(f"Langue détectée : {info.language} (probabilité: {info.language_probability:.2f})")
+    
+    full_text = []
+    for segment in segments:
+        timestamp = f"[{time.strftime('%H:%M:%S', time.gmtime(segment.start))}]"
+        print(f"{timestamp} {segment.text}")
+        full_text.append(f"{timestamp} {segment.text}")
+
+    end_time = time.time()
+    duration = end_time - start_time
+    print(f"
+Transcription terminée en {duration:.2f} secondes.")
+    
+    return "
+".join(full_text)
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python medical_transcriber.py <audio_file>")
+    else:
+        audio_file = sys.argv[1]
+        transcript = transcribe_audio(audio_file)
+        if transcript:
+            output_file = audio_file.rsplit('.', 1)[0] + "_transcript.txt"
+            with open(output_file, "w", encoding="utf-8") as f:
+                f.write(transcript)
+            print(f"Transcription sauvegardée dans : {output_file}")
--- a/run_pipeline.py
+++ b/run_pipeline.py
@@ -0,0 +1,43 @@
+import os
+import sys
+import subprocess
+import time
+
+def run_full_pipeline(audio_file):
+    """
+    Lance le pipeline complet : Transcription -> Synthèse.
+    """
+    if not os.path.exists(audio_file):
+        print(f"Erreur : Le fichier {audio_file} n'existe pas.")
+        return
+
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    
+    # 1. Transcription
+    print("
+[STEP 1] TRANSCRIPTION (Whisper Large v3)...")
+    transcribe_cmd = [sys.executable, os.path.join(script_dir, "medical_transcriber.py"), audio_file]
+    subprocess.run(transcribe_cmd, check=True)
+    
+    transcript_file = audio_file.rsplit('.', 1)[0] + "_transcript.txt"
+    if not os.path.exists(transcript_file):
+        print("Erreur : La transcription a échoué.")
+        return
+
+    # 2. Synthèse
+    print("
+[STEP 2] SYNTHÈSE MÉDICALE (Ollama/OpenAI)...")
+    summarize_cmd = [sys.executable, os.path.join(script_dir, "medical_summarizer.py"), transcript_file]
+    subprocess.run(summarize_cmd, check=True)
+    
+    summary_file = audio_file.rsplit('.', 1)[0] + "_summary.md"
+    print(f"
+[DONE] Pipeline terminé avec succès !")
+    print(f"Transcript : {transcript_file}")
+    print(f"Synthèse   : {summary_file}")
+
+if __name__ == "__main__":
+    if len(sys.argv) < 2:
+        print("Usage: python run_pipeline.py <audio_file.wav>")
+    else:
+        run_full_pipeline(sys.argv[1])