Files
t2a-finetune/runpod/train_runpod.py
2026-03-05 00:37:36 +01:00

308 lines
9.6 KiB
Python

#!/usr/bin/env python3
"""
Fine-tuning QLoRA de gemma3:12b sur RunPod (A100 40/80GB).
Dataset V2 : 15.9K train, 53% raisonnement structuré (vs 95% lookups V1).
Sources : referentiels, pipeline, cocoa, ccam, cim10, reasoning,
negative, discrimination, fascicule_reasoning, guide_metho.
Usage sur RunPod :
1. Créer un pod A100 80GB (template PyTorch 2.4+ / CUDA 12.x)
2. Uploader les fichiers (train_runpod.py, setup.sh, data/)
3. bash setup.sh
4. python train_runpod.py [--epochs 3] [--export-gguf]
"""
import argparse
import json
import os
from pathlib import Path
BASE = Path(__file__).resolve().parent
DATASETS = BASE / "data"
OUTPUT = BASE / "models"
OUTPUT.mkdir(parents=True, exist_ok=True)
def check_prerequisites():
import torch
if not torch.cuda.is_available():
raise RuntimeError("CUDA non disponible.")
gpu_name = torch.cuda.get_device_name(0)
vram_total = torch.cuda.get_device_properties(0).total_memory / 1024**3
print(f"GPU: {gpu_name}")
print(f"VRAM: {vram_total:.1f} Go")
train_path = DATASETS / "pmsi_train.jsonl"
eval_path = DATASETS / "pmsi_eval.jsonl"
if not train_path.exists() or not eval_path.exists():
raise FileNotFoundError("Dataset non trouvé dans data/")
with open(train_path) as f:
n_train = sum(1 for _ in f)
with open(eval_path) as f:
n_eval = sum(1 for _ in f)
print(f"Dataset: {n_train} train + {n_eval} eval")
# Adapter le batch size à la VRAM
if vram_total >= 70:
suggested_batch = 8
elif vram_total >= 35:
suggested_batch = 4
else:
suggested_batch = 2
print(f"Batch size suggéré: {suggested_batch}")
return train_path, eval_path, suggested_batch
def load_model(model_name, max_seq_length, load_in_4bit=True):
from unsloth import FastLanguageModel
print(f"\nChargement de {model_name} (4-bit={load_in_4bit})...")
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=model_name,
max_seq_length=max_seq_length,
dtype=None,
load_in_4bit=load_in_4bit,
)
print(f" Modèle chargé : {model.config._name_or_path}")
print(f" Paramètres : {model.num_parameters() / 1e9:.1f}B")
return model, tokenizer
def attach_lora(model, r=32, alpha=64, dropout=0.0):
from unsloth import FastLanguageModel
print(f"\nLoRA (r={r}, alpha={alpha})...")
model = FastLanguageModel.get_peft_model(
model,
r=r,
target_modules=[
"q_proj", "k_proj", "v_proj", "o_proj",
"gate_proj", "up_proj", "down_proj",
],
lora_alpha=alpha,
lora_dropout=dropout,
bias="none",
use_gradient_checkpointing="unsloth",
random_state=42,
)
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
total = sum(p.numel() for p in model.parameters())
print(f" Entraînables : {trainable / 1e6:.1f}M / {total / 1e9:.1f}B ({100 * trainable / total:.2f}%)")
return model
def load_dataset(train_path, eval_path):
from datasets import Dataset
def load_jsonl(path):
examples = []
with open(path) as f:
for line in f:
examples.append(json.loads(line.strip()))
return examples
train_ds = Dataset.from_list(load_jsonl(train_path))
eval_ds = Dataset.from_list(load_jsonl(eval_path))
print(f"\nDataset : {len(train_ds)} train + {len(eval_ds)} eval")
return train_ds, eval_ds
def format_chat(example, tokenizer):
text = tokenizer.apply_chat_template(
example["messages"],
tokenize=False,
add_generation_prompt=False,
)
return {"text": text}
def train(model, tokenizer, train_ds, eval_ds, args):
from trl import SFTTrainer, SFTConfig
print(f"\nConfig entraînement :")
print(f" Epochs : {args.epochs}")
print(f" LR : {args.lr}")
print(f" Batch : {args.batch} x grad_accum={args.grad_accum} = {args.batch * args.grad_accum}")
print(f" Max seq length : {args.max_seq_length}")
train_ds = train_ds.map(lambda x: format_chat(x, tokenizer), num_proc=4)
eval_ds = eval_ds.map(lambda x: format_chat(x, tokenizer), num_proc=4)
output_dir = OUTPUT / "pmsi-lora-checkpoints"
# Wandb optionnel
report = "none"
callbacks = []
try:
import wandb
wandb.init(project="pmsi-coder", name=f"v2-runpod-{args.epochs}ep-seq{args.max_seq_length}")
report = "wandb"
print(" Tracking : wandb")
except ImportError:
print(" Tracking : none (pip install wandb pour activer)")
training_args = SFTConfig(
output_dir=str(output_dir),
num_train_epochs=args.epochs,
per_device_train_batch_size=args.batch,
per_device_eval_batch_size=args.batch,
gradient_accumulation_steps=args.grad_accum,
learning_rate=args.lr,
weight_decay=0.01,
warmup_ratio=0.05,
lr_scheduler_type="cosine",
logging_steps=10,
eval_strategy="steps",
eval_steps=500,
save_strategy="steps",
save_steps=500,
save_total_limit=3,
fp16=False,
bf16=True,
max_seq_length=args.max_seq_length,
dataset_text_field="text",
seed=42,
report_to=report,
)
trainer = SFTTrainer(
model=model,
tokenizer=tokenizer,
train_dataset=train_ds,
eval_dataset=eval_ds,
args=training_args,
callbacks=callbacks,
)
total_steps = len(train_ds) * args.epochs // (args.batch * args.grad_accum)
print(f"\n Steps estimés : ~{total_steps}")
print(f" Démarrage...")
if args.resume:
trainer.train(resume_from_checkpoint=True)
else:
trainer.train()
final_dir = OUTPUT / "pmsi-lora-final"
model.save_pretrained(str(final_dir))
tokenizer.save_pretrained(str(final_dir))
print(f"\nLoRA sauvegardé : {final_dir}")
return trainer, final_dir
def export_merged_hf(model, tokenizer):
"""Sauvegarder le modèle mergé en 16-bit (HF format) pour conversion GGUF ultérieure."""
print(f"\nExport modèle mergé (16-bit HF)...")
merged_dir = OUTPUT / "pmsi-merged-hf"
merged_dir.mkdir(parents=True, exist_ok=True)
model.save_pretrained_merged(
str(merged_dir),
tokenizer,
save_method="merged_16bit",
)
size_gb = sum(f.stat().st_size for f in merged_dir.glob("*.safetensors")) / 1024**3
print(f" Modèle mergé : {merged_dir} ({size_gb:.1f} Go)")
print(f"\n Pour convertir en GGUF :")
print(f" python llama.cpp/convert_hf_to_gguf.py {merged_dir} --outfile pmsi-v2-q8.gguf --outtype q8_0")
print(f" llama-quantize pmsi-v2-q8.gguf pmsi-v2-q4km.gguf Q4_K_M")
return merged_dir
def export_gguf(model, tokenizer, final_dir, quantization="q4_k_m"):
"""Export GGUF via Unsloth (peut échouer — fallback sur export_merged_hf)."""
print(f"\nExport GGUF ({quantization})...")
gguf_dir = OUTPUT / "pmsi-gguf"
gguf_dir.mkdir(parents=True, exist_ok=True)
try:
model.save_pretrained_gguf(
str(gguf_dir),
tokenizer,
quantization_method=quantization,
)
gguf_files = list(gguf_dir.glob("*.gguf"))
if gguf_files:
gguf_path = gguf_files[0]
size_gb = gguf_path.stat().st_size / 1024**3
print(f" GGUF : {gguf_path.name} ({size_gb:.1f} Go)")
modelfile_path = gguf_dir / "Modelfile"
with open(modelfile_path, "w") as f:
f.write(f"FROM {gguf_path.name}\n\n")
f.write("PARAMETER temperature 0.3\n")
f.write("PARAMETER top_p 0.9\n")
f.write("PARAMETER num_ctx 8192\n")
f.write('PARAMETER stop "<end_of_turn>"\n')
f.write('PARAMETER stop "<eos>"\n')
print(f" Modelfile créé")
except Exception as e:
print(f" GGUF export échoué : {e}")
print(f" Fallback : export HF mergé...")
export_merged_hf(model, tokenizer)
def main():
parser = argparse.ArgumentParser(description="Fine-tuning QLoRA RunPod")
parser.add_argument("--model", default="unsloth/gemma-3-12b-it-bnb-4bit")
parser.add_argument("--max-seq-length", type=int, default=2048)
parser.add_argument("--lora-r", type=int, default=32)
parser.add_argument("--lora-alpha", type=int, default=64)
parser.add_argument("--lora-dropout", type=float, default=0.0)
parser.add_argument("--epochs", type=int, default=3)
parser.add_argument("--lr", type=float, default=2e-4)
parser.add_argument("--batch", type=int, default=0, help="0=auto-detect")
parser.add_argument("--grad-accum", type=int, default=4)
parser.add_argument("--resume", action="store_true")
parser.add_argument("--export-gguf", action="store_true")
parser.add_argument("--gguf-quant", default="q4_k_m")
args = parser.parse_args()
train_path, eval_path, suggested_batch = check_prerequisites()
if args.batch == 0:
args.batch = suggested_batch
print(f"Batch auto-détecté : {args.batch}")
model, tokenizer = load_model(args.model, args.max_seq_length)
model = attach_lora(model, r=args.lora_r, alpha=args.lora_alpha, dropout=args.lora_dropout)
train_ds, eval_ds = load_dataset(train_path, eval_path)
trainer, final_dir = train(model, tokenizer, train_ds, eval_ds, args)
if args.export_gguf:
export_gguf(model, tokenizer, final_dir, args.gguf_quant)
print("\n" + "=" * 50)
print("Fine-tuning terminé !")
print(f" LoRA : {final_dir}")
if args.export_gguf:
print(f" GGUF : {OUTPUT / 'pmsi-gguf'}")
print("=" * 50)
if __name__ == "__main__":
main()