chore: add .gitignore

This commit is contained in:
dom
2026-03-05 00:37:36 +01:00
parent 06100df236
commit e26be72f9c
449 changed files with 504051 additions and 57 deletions

31
data/datasets/stats.json Normal file
View File

@@ -0,0 +1,31 @@
{
"sources": {
"ccam_chatml.jsonl": 1500,
"cim10_chatml.jsonl": 1500,
"cocoa_chatml.jsonl": 2000,
"discrimination_chatml.jsonl": 799,
"fascicule_reasoning_chatml.jsonl": 453,
"guide_metho_chatml.jsonl": 364,
"negative_chatml.jsonl": 1000,
"pipeline_chatml.jsonl": 2795,
"reasoning_chatml.jsonl": 1359,
"referentiels_chatml.jsonl": 5336,
"synthetic_chatml.jsonl": 600
},
"total": 17706,
"train": {
"count": 15936,
"total_tokens_approx": 3739820,
"avg_tokens": 235,
"max_tokens": 1042,
"min_tokens": 41
},
"eval": {
"count": 1770,
"total_tokens_approx": 405094,
"avg_tokens": 229,
"max_tokens": 1030,
"min_tokens": 42
},
"eval_ratio": 0.1
}