{ "best_metric": null, "best_model_checkpoint": null, "epoch": 36.0, "global_step": 14040, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.5992906078421846, "eval_loss": 1.33469557762146, "eval_runtime": 5.8169, "eval_samples_per_second": 29.397, "eval_steps_per_second": 14.785, "step": 390 }, { "epoch": 1.28, "learning_rate": 9.644586894586896e-06, "loss": 1.5408, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.6796235478653008, "eval_loss": 1.0698518753051758, "eval_runtime": 5.8146, "eval_samples_per_second": 29.409, "eval_steps_per_second": 14.79, "step": 780 }, { "epoch": 2.56, "learning_rate": 9.28917378917379e-06, "loss": 1.1283, "step": 1000 }, { "epoch": 3.0, "eval_accuracy": 0.7372856151482106, "eval_loss": 0.8751375675201416, "eval_runtime": 5.8077, "eval_samples_per_second": 29.444, "eval_steps_per_second": 14.808, "step": 1170 }, { "epoch": 3.85, "learning_rate": 8.933048433048434e-06, "loss": 0.9078, "step": 1500 }, { "epoch": 4.0, "eval_accuracy": 0.7711176956331666, "eval_loss": 0.7534294128417969, "eval_runtime": 5.8229, "eval_samples_per_second": 29.367, "eval_steps_per_second": 14.769, "step": 1560 }, { "epoch": 5.0, "eval_accuracy": 0.802166420482521, "eval_loss": 0.6710610389709473, "eval_runtime": 5.6976, "eval_samples_per_second": 30.013, "eval_steps_per_second": 15.094, "step": 1950 }, { "epoch": 5.13, "learning_rate": 8.576923076923077e-06, "loss": 0.7705, "step": 2000 }, { "epoch": 6.0, "eval_accuracy": 0.8168786692759296, "eval_loss": 0.6077755689620972, "eval_runtime": 5.8114, "eval_samples_per_second": 29.425, "eval_steps_per_second": 14.798, "step": 2340 }, { "epoch": 6.41, "learning_rate": 8.220797720797722e-06, "loss": 0.6863, "step": 2500 }, { "epoch": 7.0, "eval_accuracy": 0.831802051431627, "eval_loss": 0.5667673945426941, "eval_runtime": 5.8128, "eval_samples_per_second": 29.418, "eval_steps_per_second": 14.795, "step": 2730 }, { "epoch": 7.69, "learning_rate": 7.864672364672366e-06, "loss": 0.6277, "step": 3000 }, { "epoch": 8.0, "eval_accuracy": 0.8386034912718204, "eval_loss": 0.546131432056427, "eval_runtime": 5.8073, "eval_samples_per_second": 29.446, "eval_steps_per_second": 14.809, "step": 3120 }, { "epoch": 8.97, "learning_rate": 7.508547008547009e-06, "loss": 0.5863, "step": 3500 }, { "epoch": 9.0, "eval_accuracy": 0.8513571943310113, "eval_loss": 0.5142761468887329, "eval_runtime": 5.8058, "eval_samples_per_second": 29.453, "eval_steps_per_second": 14.813, "step": 3510 }, { "epoch": 10.0, "eval_accuracy": 0.8522174818930449, "eval_loss": 0.49920225143432617, "eval_runtime": 5.8205, "eval_samples_per_second": 29.379, "eval_steps_per_second": 14.775, "step": 3900 }, { "epoch": 10.26, "learning_rate": 7.152421652421653e-06, "loss": 0.5564, "step": 4000 }, { "epoch": 11.0, "eval_accuracy": 0.8533209429717304, "eval_loss": 0.49400192499160767, "eval_runtime": 5.8142, "eval_samples_per_second": 29.411, "eval_steps_per_second": 14.791, "step": 4290 }, { "epoch": 11.54, "learning_rate": 6.796296296296296e-06, "loss": 0.5199, "step": 4500 }, { "epoch": 12.0, "eval_accuracy": 0.8632922665875019, "eval_loss": 0.4726846218109131, "eval_runtime": 5.8155, "eval_samples_per_second": 29.404, "eval_steps_per_second": 14.788, "step": 4680 }, { "epoch": 12.82, "learning_rate": 6.440170940170941e-06, "loss": 0.5025, "step": 5000 }, { "epoch": 13.0, "eval_accuracy": 0.8637540927527733, "eval_loss": 0.4585917592048645, "eval_runtime": 5.8153, "eval_samples_per_second": 29.405, "eval_steps_per_second": 14.789, "step": 5070 }, { "epoch": 14.0, "eval_accuracy": 0.8673048600883653, "eval_loss": 0.45485442876815796, "eval_runtime": 5.8178, "eval_samples_per_second": 29.393, "eval_steps_per_second": 14.782, "step": 5460 }, { "epoch": 14.1, "learning_rate": 6.084045584045585e-06, "loss": 0.4814, "step": 5500 }, { "epoch": 15.0, "eval_accuracy": 0.8698035411108416, "eval_loss": 0.44424179196357727, "eval_runtime": 5.7973, "eval_samples_per_second": 29.496, "eval_steps_per_second": 14.834, "step": 5850 }, { "epoch": 15.38, "learning_rate": 5.727920227920228e-06, "loss": 0.4746, "step": 6000 }, { "epoch": 16.0, "eval_accuracy": 0.8749573607523999, "eval_loss": 0.43056586384773254, "eval_runtime": 5.6925, "eval_samples_per_second": 30.04, "eval_steps_per_second": 15.108, "step": 6240 }, { "epoch": 16.67, "learning_rate": 5.371794871794872e-06, "loss": 0.4527, "step": 6500 }, { "epoch": 17.0, "eval_accuracy": 0.874227107665129, "eval_loss": 0.42905324697494507, "eval_runtime": 5.8566, "eval_samples_per_second": 29.198, "eval_steps_per_second": 14.684, "step": 6630 }, { "epoch": 17.95, "learning_rate": 5.016381766381767e-06, "loss": 0.4382, "step": 7000 }, { "epoch": 18.0, "eval_accuracy": 0.8751032706419789, "eval_loss": 0.4213222861289978, "eval_runtime": 5.8093, "eval_samples_per_second": 29.435, "eval_steps_per_second": 14.804, "step": 7020 }, { "epoch": 19.0, "eval_accuracy": 0.8751214299591995, "eval_loss": 0.41926833987236023, "eval_runtime": 5.8022, "eval_samples_per_second": 29.472, "eval_steps_per_second": 14.822, "step": 7410 }, { "epoch": 19.23, "learning_rate": 4.6602564102564106e-06, "loss": 0.4328, "step": 7500 }, { "epoch": 20.0, "eval_accuracy": 0.8759944995580002, "eval_loss": 0.41431769728660583, "eval_runtime": 5.8137, "eval_samples_per_second": 29.413, "eval_steps_per_second": 14.793, "step": 7800 }, { "epoch": 20.51, "learning_rate": 4.304131054131054e-06, "loss": 0.4191, "step": 8000 }, { "epoch": 21.0, "eval_accuracy": 0.8835775987576434, "eval_loss": 0.4071265459060669, "eval_runtime": 5.8164, "eval_samples_per_second": 29.4, "eval_steps_per_second": 14.786, "step": 8190 }, { "epoch": 21.79, "learning_rate": 3.948717948717949e-06, "loss": 0.4106, "step": 8500 }, { "epoch": 22.0, "eval_accuracy": 0.881939052795031, "eval_loss": 0.3980366587638855, "eval_runtime": 5.875, "eval_samples_per_second": 29.106, "eval_steps_per_second": 14.638, "step": 8580 }, { "epoch": 23.0, "eval_accuracy": 0.8821786026625207, "eval_loss": 0.39872363209724426, "eval_runtime": 5.8105, "eval_samples_per_second": 29.43, "eval_steps_per_second": 14.801, "step": 8970 }, { "epoch": 23.08, "learning_rate": 3.592592592592593e-06, "loss": 0.4037, "step": 9000 }, { "epoch": 24.0, "eval_accuracy": 0.8819395993222366, "eval_loss": 0.40265128016471863, "eval_runtime": 5.8012, "eval_samples_per_second": 29.477, "eval_steps_per_second": 14.824, "step": 9360 }, { "epoch": 24.36, "learning_rate": 3.2364672364672365e-06, "loss": 0.3893, "step": 9500 }, { "epoch": 25.0, "eval_accuracy": 0.8892501819946614, "eval_loss": 0.3867790400981903, "eval_runtime": 5.8209, "eval_samples_per_second": 29.377, "eval_steps_per_second": 14.774, "step": 9750 }, { "epoch": 25.64, "learning_rate": 2.8803418803418804e-06, "loss": 0.3991, "step": 10000 }, { "epoch": 26.0, "eval_accuracy": 0.8846003326484688, "eval_loss": 0.3882477581501007, "eval_runtime": 5.8184, "eval_samples_per_second": 29.389, "eval_steps_per_second": 14.781, "step": 10140 }, { "epoch": 26.92, "learning_rate": 2.5242165242165246e-06, "loss": 0.3786, "step": 10500 }, { "epoch": 27.0, "eval_accuracy": 0.8858714334822964, "eval_loss": 0.3939129710197449, "eval_runtime": 5.8222, "eval_samples_per_second": 29.37, "eval_steps_per_second": 14.771, "step": 10530 }, { "epoch": 28.0, "eval_accuracy": 0.8847989764283254, "eval_loss": 0.39587706327438354, "eval_runtime": 5.8063, "eval_samples_per_second": 29.451, "eval_steps_per_second": 14.812, "step": 10920 }, { "epoch": 28.21, "learning_rate": 2.168803418803419e-06, "loss": 0.38, "step": 11000 }, { "epoch": 29.0, "eval_accuracy": 0.8849687976020835, "eval_loss": 0.3949810862541199, "eval_runtime": 5.8138, "eval_samples_per_second": 29.413, "eval_steps_per_second": 14.792, "step": 11310 }, { "epoch": 29.49, "learning_rate": 1.8126780626780629e-06, "loss": 0.3764, "step": 11500 }, { "epoch": 30.0, "eval_accuracy": 0.8893153879792042, "eval_loss": 0.3783101439476013, "eval_runtime": 5.8039, "eval_samples_per_second": 29.463, "eval_steps_per_second": 14.818, "step": 11700 }, { "epoch": 30.77, "learning_rate": 1.4565527065527065e-06, "loss": 0.3708, "step": 12000 }, { "epoch": 31.0, "eval_accuracy": 0.8890792500856626, "eval_loss": 0.3798995912075043, "eval_runtime": 5.7021, "eval_samples_per_second": 29.989, "eval_steps_per_second": 15.082, "step": 12090 }, { "epoch": 32.0, "eval_accuracy": 0.8867172306495527, "eval_loss": 0.39150363206863403, "eval_runtime": 5.8791, "eval_samples_per_second": 29.086, "eval_steps_per_second": 14.628, "step": 12480 }, { "epoch": 32.05, "learning_rate": 1.1004273504273506e-06, "loss": 0.3656, "step": 12500 }, { "epoch": 33.0, "eval_accuracy": 0.8902530694061639, "eval_loss": 0.3780055344104767, "eval_runtime": 5.8784, "eval_samples_per_second": 29.09, "eval_steps_per_second": 14.63, "step": 12870 }, { "epoch": 33.33, "learning_rate": 7.443019943019944e-07, "loss": 0.3617, "step": 13000 }, { "epoch": 34.0, "eval_accuracy": 0.8873723487824038, "eval_loss": 0.38049712777137756, "eval_runtime": 5.8779, "eval_samples_per_second": 29.092, "eval_steps_per_second": 14.631, "step": 13260 }, { "epoch": 34.62, "learning_rate": 3.8817663817663825e-07, "loss": 0.361, "step": 13500 }, { "epoch": 35.0, "eval_accuracy": 0.8919778767559101, "eval_loss": 0.3775971233844757, "eval_runtime": 5.8063, "eval_samples_per_second": 29.451, "eval_steps_per_second": 14.812, "step": 13650 }, { "epoch": 35.9, "learning_rate": 3.205128205128205e-08, "loss": 0.3595, "step": 14000 }, { "epoch": 36.0, "eval_accuracy": 0.8888084202394747, "eval_loss": 0.3711872100830078, "eval_runtime": 5.8621, "eval_samples_per_second": 29.171, "eval_steps_per_second": 14.671, "step": 14040 }, { "epoch": 36.0, "step": 14040, "total_flos": 3.509780816886497e+17, "train_loss": 0.5379996796958467, "train_runtime": 13714.1823, "train_samples_per_second": 8.19, "train_steps_per_second": 1.024 } ], "max_steps": 14040, "num_train_epochs": 36, "total_flos": 3.509780816886497e+17, "trial_name": null, "trial_params": null }