{ "sources": { "ccam_chatml.jsonl": 1500, "cim10_chatml.jsonl": 1500, "cocoa_chatml.jsonl": 2000, "discrimination_chatml.jsonl": 799, "fascicule_reasoning_chatml.jsonl": 453, "guide_metho_chatml.jsonl": 364, "negative_chatml.jsonl": 1000, "pipeline_chatml.jsonl": 2795, "reasoning_chatml.jsonl": 1359, "referentiels_chatml.jsonl": 5336, "synthetic_chatml.jsonl": 600 }, "total": 17706, "train": { "count": 15936, "total_tokens_approx": 3739820, "avg_tokens": 235, "max_tokens": 1042, "min_tokens": 41 }, "eval": { "count": 1770, "total_tokens_approx": 405094, "avg_tokens": 229, "max_tokens": 1030, "min_tokens": 42 }, "eval_ratio": 0.1 }