Initial commit
This commit is contained in:
86
medical_diarizer.py
Normal file
86
medical_diarizer.py
Normal file
@@ -0,0 +1,86 @@
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import torch
|
||||
import gc
|
||||
from pyannote.audio import Pipeline
|
||||
from faster_whisper import WhisperModel
|
||||
import librosa
|
||||
|
||||
def run_diarization_and_transcription(audio_file, hf_token):
|
||||
"""
|
||||
Version 3.0 : Feedback ultra-précis pour barre de progression.
|
||||
"""
|
||||
duration = librosa.get_duration(path=audio_file)
|
||||
print(f"[STATUS] PROGRESS:1")
|
||||
|
||||
# 1. DIARISATION
|
||||
print("[PHASE 1/3] Diarisation (Analyse des voix)...")
|
||||
try:
|
||||
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", token=hf_token)
|
||||
pipeline.to(torch.device("cpu"))
|
||||
raw_result = pipeline(audio_file)
|
||||
|
||||
if hasattr(raw_result, 'annotation'):
|
||||
diarization = raw_result.annotation
|
||||
else:
|
||||
diarization = raw_result
|
||||
|
||||
del pipeline
|
||||
gc.collect()
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Diarisation : {e}")
|
||||
diarization = None
|
||||
|
||||
print(f"[STATUS] PROGRESS:30")
|
||||
|
||||
# 2. TRANSCRIPTION
|
||||
print("[PHASE 2/3] Transcription (Modèle Large-v3-Turbo)...")
|
||||
device = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
compute_type = "int8_float16" if device == "cuda" else "int8"
|
||||
|
||||
try:
|
||||
model = WhisperModel("large-v3-turbo", device=device, compute_type=compute_type, cpu_threads=16)
|
||||
segments, _ = model.transcribe(audio_file, beam_size=5, language="fr", word_timestamps=True)
|
||||
|
||||
whisper_segments = []
|
||||
for segment in segments:
|
||||
# Progression dynamique : 30% à 95%
|
||||
pct = 30 + (65 * (segment.end / duration))
|
||||
print(f"[STATUS] PROGRESS:{int(pct)}")
|
||||
|
||||
speaker = "VOIX"
|
||||
if diarization:
|
||||
max_overlap = 0
|
||||
for turn, _, speaker_id in diarization.itertracks(yield_label=True):
|
||||
overlap = min(segment.end, turn.end) - max(segment.start, turn.start)
|
||||
if overlap > max_overlap:
|
||||
max_overlap = overlap
|
||||
speaker = speaker_id
|
||||
|
||||
timestamp = f"[{time.strftime('%H:%M:%S', time.gmtime(segment.start))}]"
|
||||
line = f"{timestamp} {speaker}: {segment.text.strip()}"
|
||||
print(line)
|
||||
whisper_segments.append(line)
|
||||
|
||||
del model
|
||||
gc.collect()
|
||||
if torch.cuda.is_available(): torch.cuda.empty_cache()
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ERROR] Transcription : {e}")
|
||||
return None
|
||||
|
||||
print(f"[STATUS] PROGRESS:100")
|
||||
return "\n".join(whisper_segments)
|
||||
|
||||
if __name__ == "__main__":
|
||||
hf_token = os.getenv("HF_TOKEN")
|
||||
if len(sys.argv) > 1:
|
||||
audio_file = sys.argv[1]
|
||||
result = run_diarization_and_transcription(audio_file, hf_token)
|
||||
if result:
|
||||
output_file = audio_file.rsplit('.', 1)[0] + "_diarized.txt"
|
||||
with open(output_file, "w", encoding="utf-8") as f:
|
||||
f.write(result)
|
||||
print(f"\n[OK] Fini : {output_file}")
|
||||
Reference in New Issue
Block a user