31 lines
738 B
JSON
31 lines
738 B
JSON
{
|
|
"sources": {
|
|
"ccam_chatml.jsonl": 1500,
|
|
"cim10_chatml.jsonl": 1500,
|
|
"cocoa_chatml.jsonl": 2000,
|
|
"discrimination_chatml.jsonl": 799,
|
|
"fascicule_reasoning_chatml.jsonl": 453,
|
|
"guide_metho_chatml.jsonl": 364,
|
|
"negative_chatml.jsonl": 1000,
|
|
"pipeline_chatml.jsonl": 2795,
|
|
"reasoning_chatml.jsonl": 1359,
|
|
"referentiels_chatml.jsonl": 5336,
|
|
"synthetic_chatml.jsonl": 600
|
|
},
|
|
"total": 17706,
|
|
"train": {
|
|
"count": 15936,
|
|
"total_tokens_approx": 3739820,
|
|
"avg_tokens": 235,
|
|
"max_tokens": 1042,
|
|
"min_tokens": 41
|
|
},
|
|
"eval": {
|
|
"count": 1770,
|
|
"total_tokens_approx": 405094,
|
|
"avg_tokens": 229,
|
|
"max_tokens": 1030,
|
|
"min_tokens": 42
|
|
},
|
|
"eval_ratio": 0.1
|
|
} |