{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.974958263772955, "eval_steps": 10, "global_step": 745, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 3e-05, "loss": 2.3217, "step": 10 }, { "epoch": 0.07, "eval_accuracy": 0.4901315789473684, "eval_loss": 0.7262606620788574, "eval_runtime": 25.3718, "eval_samples_per_second": 11.982, "eval_steps_per_second": 2.995, "step": 10 }, { "epoch": 0.13, "learning_rate": 3e-05, "loss": 0.56, "step": 20 }, { "epoch": 0.13, "eval_accuracy": 0.5526315789473685, "eval_loss": 0.6898001432418823, "eval_runtime": 25.3761, "eval_samples_per_second": 11.98, "eval_steps_per_second": 2.995, "step": 20 }, { "epoch": 0.2, "learning_rate": 3e-05, "loss": 0.5281, "step": 30 }, { "epoch": 0.2, "eval_accuracy": 0.5888157894736842, "eval_loss": 0.6465093493461609, "eval_runtime": 25.3947, "eval_samples_per_second": 11.971, "eval_steps_per_second": 2.993, "step": 30 }, { "epoch": 0.27, "learning_rate": 3e-05, "loss": 0.994, "step": 40 }, { "epoch": 0.27, "eval_accuracy": 0.5986842105263158, "eval_loss": 0.7351367473602295, "eval_runtime": 25.4621, "eval_samples_per_second": 11.939, "eval_steps_per_second": 2.985, "step": 40 }, { "epoch": 0.33, "learning_rate": 3e-05, "loss": 0.4785, "step": 50 }, { "epoch": 0.33, "eval_accuracy": 0.6118421052631579, "eval_loss": 0.6004362106323242, "eval_runtime": 25.4138, "eval_samples_per_second": 11.962, "eval_steps_per_second": 2.991, "step": 50 }, { "epoch": 0.4, "learning_rate": 3e-05, "loss": 0.4732, "step": 60 }, { "epoch": 0.4, "eval_accuracy": 0.6348684210526315, "eval_loss": 0.5782976746559143, "eval_runtime": 25.369, "eval_samples_per_second": 11.983, "eval_steps_per_second": 2.996, "step": 60 }, { "epoch": 0.47, "learning_rate": 3e-05, "loss": 0.4466, "step": 70 }, { "epoch": 0.47, "eval_accuracy": 0.6414473684210527, "eval_loss": 0.5713546872138977, "eval_runtime": 25.3858, "eval_samples_per_second": 11.975, "eval_steps_per_second": 2.994, "step": 70 }, { "epoch": 0.53, "learning_rate": 3e-05, "loss": 0.8737, "step": 80 }, { "epoch": 0.53, "eval_accuracy": 0.618421052631579, "eval_loss": 0.567269504070282, "eval_runtime": 25.3639, "eval_samples_per_second": 11.986, "eval_steps_per_second": 2.996, "step": 80 }, { "epoch": 0.6, "learning_rate": 3e-05, "loss": 0.4471, "step": 90 }, { "epoch": 0.6, "eval_accuracy": 0.6282894736842105, "eval_loss": 0.5630530118942261, "eval_runtime": 25.3754, "eval_samples_per_second": 11.98, "eval_steps_per_second": 2.995, "step": 90 }, { "epoch": 0.67, "learning_rate": 3e-05, "loss": 0.46, "step": 100 }, { "epoch": 0.67, "eval_accuracy": 0.6348684210526315, "eval_loss": 0.5503756999969482, "eval_runtime": 25.3686, "eval_samples_per_second": 11.983, "eval_steps_per_second": 2.996, "step": 100 }, { "epoch": 0.73, "learning_rate": 3e-05, "loss": 0.3294, "step": 110 }, { "epoch": 0.73, "eval_accuracy": 0.625, "eval_loss": 0.6009898781776428, "eval_runtime": 25.3647, "eval_samples_per_second": 11.985, "eval_steps_per_second": 2.996, "step": 110 }, { "epoch": 0.8, "learning_rate": 3e-05, "loss": 0.6526, "step": 120 }, { "epoch": 0.8, "eval_accuracy": 0.6282894736842105, "eval_loss": 0.5730816721916199, "eval_runtime": 25.3832, "eval_samples_per_second": 11.976, "eval_steps_per_second": 2.994, "step": 120 }, { "epoch": 0.87, "learning_rate": 3e-05, "loss": 0.3712, "step": 130 }, { "epoch": 0.87, "eval_accuracy": 0.6447368421052632, "eval_loss": 0.5378755331039429, "eval_runtime": 25.3825, "eval_samples_per_second": 11.977, "eval_steps_per_second": 2.994, "step": 130 }, { "epoch": 0.93, "learning_rate": 3e-05, "loss": 0.3341, "step": 140 }, { "epoch": 0.93, "eval_accuracy": 0.6282894736842105, "eval_loss": 0.5408769249916077, "eval_runtime": 25.3787, "eval_samples_per_second": 11.979, "eval_steps_per_second": 2.995, "step": 140 }, { "epoch": 1.0, "learning_rate": 3e-05, "loss": 0.552, "step": 150 }, { "epoch": 1.0, "eval_accuracy": 0.6381578947368421, "eval_loss": 0.5310616493225098, "eval_runtime": 25.4512, "eval_samples_per_second": 11.944, "eval_steps_per_second": 2.986, "step": 150 }, { "epoch": 1.07, "learning_rate": 3e-05, "loss": 0.4681, "step": 160 }, { "epoch": 1.07, "eval_accuracy": 0.6414473684210527, "eval_loss": 0.5371212959289551, "eval_runtime": 25.3633, "eval_samples_per_second": 11.986, "eval_steps_per_second": 2.996, "step": 160 }, { "epoch": 1.14, "learning_rate": 3e-05, "loss": 0.3119, "step": 170 }, { "epoch": 1.14, "eval_accuracy": 0.6282894736842105, "eval_loss": 0.6171860694885254, "eval_runtime": 25.3622, "eval_samples_per_second": 11.986, "eval_steps_per_second": 2.997, "step": 170 }, { "epoch": 1.2, "learning_rate": 3e-05, "loss": 0.3082, "step": 180 }, { "epoch": 1.2, "eval_accuracy": 0.6513157894736842, "eval_loss": 0.5360857844352722, "eval_runtime": 25.3868, "eval_samples_per_second": 11.975, "eval_steps_per_second": 2.994, "step": 180 }, { "epoch": 1.27, "learning_rate": 3e-05, "loss": 0.5217, "step": 190 }, { "epoch": 1.27, "eval_accuracy": 0.625, "eval_loss": 0.5467653870582581, "eval_runtime": 25.3846, "eval_samples_per_second": 11.976, "eval_steps_per_second": 2.994, "step": 190 }, { "epoch": 1.34, "learning_rate": 3e-05, "loss": 0.3888, "step": 200 }, { "epoch": 1.34, "eval_accuracy": 0.631578947368421, "eval_loss": 0.5891463756561279, "eval_runtime": 25.374, "eval_samples_per_second": 11.981, "eval_steps_per_second": 2.995, "step": 200 }, { "epoch": 1.4, "learning_rate": 3e-05, "loss": 0.2841, "step": 210 }, { "epoch": 1.4, "eval_accuracy": 0.6282894736842105, "eval_loss": 0.5428625345230103, "eval_runtime": 25.3734, "eval_samples_per_second": 11.981, "eval_steps_per_second": 2.995, "step": 210 }, { "epoch": 1.47, "learning_rate": 3e-05, "loss": 0.2728, "step": 220 }, { "epoch": 1.47, "eval_accuracy": 0.6381578947368421, "eval_loss": 0.5246651768684387, "eval_runtime": 25.3858, "eval_samples_per_second": 11.975, "eval_steps_per_second": 2.994, "step": 220 }, { "epoch": 1.54, "learning_rate": 3e-05, "loss": 0.5563, "step": 230 }, { "epoch": 1.54, "eval_accuracy": 0.6513157894736842, "eval_loss": 0.5003584027290344, "eval_runtime": 25.3763, "eval_samples_per_second": 11.98, "eval_steps_per_second": 2.995, "step": 230 }, { "epoch": 1.6, "learning_rate": 3e-05, "loss": 0.2862, "step": 240 }, { "epoch": 1.6, "eval_accuracy": 0.6546052631578947, "eval_loss": 0.4741169810295105, "eval_runtime": 25.3852, "eval_samples_per_second": 11.975, "eval_steps_per_second": 2.994, "step": 240 }, { "epoch": 1.67, "learning_rate": 3e-05, "loss": 0.2289, "step": 250 }, { "epoch": 1.67, "eval_accuracy": 0.6513157894736842, "eval_loss": 0.5441343188285828, "eval_runtime": 25.3897, "eval_samples_per_second": 11.973, "eval_steps_per_second": 2.993, "step": 250 }, { "epoch": 1.74, "learning_rate": 3e-05, "loss": 0.2481, "step": 260 }, { "epoch": 1.74, "eval_accuracy": 0.6513157894736842, "eval_loss": 0.5170696377754211, "eval_runtime": 25.3747, "eval_samples_per_second": 11.98, "eval_steps_per_second": 2.995, "step": 260 }, { "epoch": 1.8, "learning_rate": 3e-05, "loss": 0.329, "step": 270 }, { "epoch": 1.8, "eval_accuracy": 0.6546052631578947, "eval_loss": 0.5371391177177429, "eval_runtime": 25.3708, "eval_samples_per_second": 11.982, "eval_steps_per_second": 2.996, "step": 270 }, { "epoch": 1.87, "learning_rate": 3e-05, "loss": 0.1741, "step": 280 }, { "epoch": 1.87, "eval_accuracy": 0.6677631578947368, "eval_loss": 0.5411613583564758, "eval_runtime": 25.3792, "eval_samples_per_second": 11.978, "eval_steps_per_second": 2.995, "step": 280 }, { "epoch": 1.94, "learning_rate": 3e-05, "loss": 0.2888, "step": 290 }, { "epoch": 1.94, "eval_accuracy": 0.6710526315789473, "eval_loss": 0.5130823850631714, "eval_runtime": 25.3665, "eval_samples_per_second": 11.984, "eval_steps_per_second": 2.996, "step": 290 }, { "epoch": 2.0, "learning_rate": 3e-05, "loss": 0.4157, "step": 300 }, { "epoch": 2.0, "eval_accuracy": 0.6447368421052632, "eval_loss": 0.45547178387641907, "eval_runtime": 25.356, "eval_samples_per_second": 11.989, "eval_steps_per_second": 2.997, "step": 300 }, { "epoch": 2.07, "learning_rate": 3e-05, "loss": 0.1982, "step": 310 }, { "epoch": 2.07, "eval_accuracy": 0.6611842105263158, "eval_loss": 0.5670450329780579, "eval_runtime": 25.3778, "eval_samples_per_second": 11.979, "eval_steps_per_second": 2.995, "step": 310 }, { "epoch": 2.14, "learning_rate": 3e-05, "loss": 0.106, "step": 320 }, { "epoch": 2.14, "eval_accuracy": 0.6677631578947368, "eval_loss": 0.7942893505096436, "eval_runtime": 25.3984, "eval_samples_per_second": 11.969, "eval_steps_per_second": 2.992, "step": 320 }, { "epoch": 2.2, "learning_rate": 3e-05, "loss": 0.1718, "step": 330 }, { "epoch": 2.2, "eval_accuracy": 0.6644736842105263, "eval_loss": 0.7496399879455566, "eval_runtime": 25.3753, "eval_samples_per_second": 11.98, "eval_steps_per_second": 2.995, "step": 330 }, { "epoch": 2.27, "learning_rate": 3e-05, "loss": 0.214, "step": 340 }, { "epoch": 2.27, "eval_accuracy": 0.6842105263157895, "eval_loss": 0.626396656036377, "eval_runtime": 25.3727, "eval_samples_per_second": 11.981, "eval_steps_per_second": 2.995, "step": 340 }, { "epoch": 2.34, "learning_rate": 3e-05, "loss": 0.1571, "step": 350 }, { "epoch": 2.34, "eval_accuracy": 0.631578947368421, "eval_loss": 0.6138848066329956, "eval_runtime": 25.3708, "eval_samples_per_second": 11.982, "eval_steps_per_second": 2.996, "step": 350 }, { "epoch": 2.4, "learning_rate": 3e-05, "loss": 0.1432, "step": 360 }, { "epoch": 2.4, "eval_accuracy": 0.6842105263157895, "eval_loss": 0.6198970675468445, "eval_runtime": 25.3819, "eval_samples_per_second": 11.977, "eval_steps_per_second": 2.994, "step": 360 }, { "epoch": 2.47, "learning_rate": 3e-05, "loss": 0.1038, "step": 370 }, { "epoch": 2.47, "eval_accuracy": 0.6973684210526315, "eval_loss": 0.636822521686554, "eval_runtime": 25.4286, "eval_samples_per_second": 11.955, "eval_steps_per_second": 2.989, "step": 370 }, { "epoch": 2.54, "learning_rate": 3e-05, "loss": 0.1728, "step": 380 }, { "epoch": 2.54, "eval_accuracy": 0.6677631578947368, "eval_loss": 0.7889474630355835, "eval_runtime": 25.3888, "eval_samples_per_second": 11.974, "eval_steps_per_second": 2.993, "step": 380 }, { "epoch": 2.6, "learning_rate": 3e-05, "loss": 0.14, "step": 390 }, { "epoch": 2.6, "eval_accuracy": 0.6546052631578947, "eval_loss": 0.795179545879364, "eval_runtime": 25.3862, "eval_samples_per_second": 11.975, "eval_steps_per_second": 2.994, "step": 390 }, { "epoch": 2.67, "learning_rate": 3e-05, "loss": 0.1522, "step": 400 }, { "epoch": 2.67, "eval_accuracy": 0.6578947368421053, "eval_loss": 0.7745038866996765, "eval_runtime": 25.3703, "eval_samples_per_second": 11.983, "eval_steps_per_second": 2.996, "step": 400 }, { "epoch": 2.74, "learning_rate": 3e-05, "loss": 0.1345, "step": 410 }, { "epoch": 2.74, "eval_accuracy": 0.6513157894736842, "eval_loss": 0.7230806946754456, "eval_runtime": 25.3678, "eval_samples_per_second": 11.984, "eval_steps_per_second": 2.996, "step": 410 }, { "epoch": 2.8, "learning_rate": 3e-05, "loss": 0.1587, "step": 420 }, { "epoch": 2.8, "eval_accuracy": 0.6480263157894737, "eval_loss": 0.7153956890106201, "eval_runtime": 25.3506, "eval_samples_per_second": 11.992, "eval_steps_per_second": 2.998, "step": 420 }, { "epoch": 2.87, "learning_rate": 3e-05, "loss": 0.1391, "step": 430 }, { "epoch": 2.87, "eval_accuracy": 0.6513157894736842, "eval_loss": 0.6923157572746277, "eval_runtime": 25.3652, "eval_samples_per_second": 11.985, "eval_steps_per_second": 2.996, "step": 430 }, { "epoch": 2.94, "learning_rate": 3e-05, "loss": 0.129, "step": 440 }, { "epoch": 2.94, "eval_accuracy": 0.6710526315789473, "eval_loss": 0.6483842730522156, "eval_runtime": 25.3639, "eval_samples_per_second": 11.986, "eval_steps_per_second": 2.996, "step": 440 }, { "epoch": 3.01, "learning_rate": 3e-05, "loss": 0.2092, "step": 450 }, { "epoch": 3.01, "eval_accuracy": 0.6743421052631579, "eval_loss": 0.5821840167045593, "eval_runtime": 25.3814, "eval_samples_per_second": 11.977, "eval_steps_per_second": 2.994, "step": 450 }, { "epoch": 3.07, "learning_rate": 3e-05, "loss": 0.015, "step": 460 }, { "epoch": 3.07, "eval_accuracy": 0.6578947368421053, "eval_loss": 1.1217145919799805, "eval_runtime": 25.3698, "eval_samples_per_second": 11.983, "eval_steps_per_second": 2.996, "step": 460 }, { "epoch": 3.14, "learning_rate": 3e-05, "loss": 0.051, "step": 470 }, { "epoch": 3.14, "eval_accuracy": 0.6480263157894737, "eval_loss": 1.5789867639541626, "eval_runtime": 25.3704, "eval_samples_per_second": 11.982, "eval_steps_per_second": 2.996, "step": 470 }, { "epoch": 3.21, "learning_rate": 3e-05, "loss": 0.0999, "step": 480 }, { "epoch": 3.21, "eval_accuracy": 0.6677631578947368, "eval_loss": 1.5168237686157227, "eval_runtime": 25.3794, "eval_samples_per_second": 11.978, "eval_steps_per_second": 2.995, "step": 480 }, { "epoch": 3.27, "learning_rate": 3e-05, "loss": 0.1776, "step": 490 }, { "epoch": 3.27, "eval_accuracy": 0.6875, "eval_loss": 1.2342281341552734, "eval_runtime": 25.3944, "eval_samples_per_second": 11.971, "eval_steps_per_second": 2.993, "step": 490 }, { "epoch": 3.34, "learning_rate": 3e-05, "loss": 0.0612, "step": 500 }, { "epoch": 3.34, "eval_accuracy": 0.6973684210526315, "eval_loss": 1.0370548963546753, "eval_runtime": 25.385, "eval_samples_per_second": 11.976, "eval_steps_per_second": 2.994, "step": 500 }, { "epoch": 3.41, "learning_rate": 3e-05, "loss": 0.0858, "step": 510 }, { "epoch": 3.41, "eval_accuracy": 0.6776315789473685, "eval_loss": 1.0277096033096313, "eval_runtime": 25.386, "eval_samples_per_second": 11.975, "eval_steps_per_second": 2.994, "step": 510 }, { "epoch": 3.47, "learning_rate": 3e-05, "loss": 0.0316, "step": 520 }, { "epoch": 3.47, "eval_accuracy": 0.680921052631579, "eval_loss": 1.0386649370193481, "eval_runtime": 25.3706, "eval_samples_per_second": 11.982, "eval_steps_per_second": 2.996, "step": 520 }, { "epoch": 3.54, "learning_rate": 3e-05, "loss": 0.1899, "step": 530 }, { "epoch": 3.54, "eval_accuracy": 0.6907894736842105, "eval_loss": 0.8184946775436401, "eval_runtime": 25.3771, "eval_samples_per_second": 11.979, "eval_steps_per_second": 2.995, "step": 530 }, { "epoch": 3.61, "learning_rate": 3e-05, "loss": 0.1517, "step": 540 }, { "epoch": 3.61, "eval_accuracy": 0.6842105263157895, "eval_loss": 0.7053664922714233, "eval_runtime": 25.3758, "eval_samples_per_second": 11.98, "eval_steps_per_second": 2.995, "step": 540 }, { "epoch": 3.67, "learning_rate": 3e-05, "loss": 0.0324, "step": 550 }, { "epoch": 3.67, "eval_accuracy": 0.6842105263157895, "eval_loss": 0.8504552245140076, "eval_runtime": 25.3714, "eval_samples_per_second": 11.982, "eval_steps_per_second": 2.995, "step": 550 }, { "epoch": 3.74, "learning_rate": 3e-05, "loss": 0.0646, "step": 560 }, { "epoch": 3.74, "eval_accuracy": 0.6611842105263158, "eval_loss": 1.0056827068328857, "eval_runtime": 25.3614, "eval_samples_per_second": 11.987, "eval_steps_per_second": 2.997, "step": 560 }, { "epoch": 3.81, "learning_rate": 3e-05, "loss": 0.1038, "step": 570 }, { "epoch": 3.81, "eval_accuracy": 0.6644736842105263, "eval_loss": 1.0026819705963135, "eval_runtime": 25.376, "eval_samples_per_second": 11.98, "eval_steps_per_second": 2.995, "step": 570 }, { "epoch": 3.87, "learning_rate": 3e-05, "loss": 0.0844, "step": 580 }, { "epoch": 3.87, "eval_accuracy": 0.6513157894736842, "eval_loss": 0.9926165342330933, "eval_runtime": 25.3651, "eval_samples_per_second": 11.985, "eval_steps_per_second": 2.996, "step": 580 }, { "epoch": 3.94, "learning_rate": 3e-05, "loss": 0.0986, "step": 590 }, { "epoch": 3.94, "eval_accuracy": 0.6578947368421053, "eval_loss": 0.9245979189872742, "eval_runtime": 25.3574, "eval_samples_per_second": 11.989, "eval_steps_per_second": 2.997, "step": 590 }, { "epoch": 4.01, "learning_rate": 3e-05, "loss": 0.0627, "step": 600 }, { "epoch": 4.01, "eval_accuracy": 0.6546052631578947, "eval_loss": 0.8538947105407715, "eval_runtime": 25.3724, "eval_samples_per_second": 11.982, "eval_steps_per_second": 2.995, "step": 600 }, { "epoch": 4.07, "learning_rate": 3e-05, "loss": 0.0513, "step": 610 }, { "epoch": 4.07, "eval_accuracy": 0.6513157894736842, "eval_loss": 0.924721896648407, "eval_runtime": 25.4077, "eval_samples_per_second": 11.965, "eval_steps_per_second": 2.991, "step": 610 }, { "epoch": 4.14, "learning_rate": 3e-05, "loss": 0.0484, "step": 620 }, { "epoch": 4.14, "eval_accuracy": 0.6546052631578947, "eval_loss": 1.112806797027588, "eval_runtime": 25.3865, "eval_samples_per_second": 11.975, "eval_steps_per_second": 2.994, "step": 620 }, { "epoch": 4.21, "learning_rate": 3e-05, "loss": 0.0244, "step": 630 }, { "epoch": 4.21, "eval_accuracy": 0.6480263157894737, "eval_loss": 1.2701855897903442, "eval_runtime": 25.3744, "eval_samples_per_second": 11.981, "eval_steps_per_second": 2.995, "step": 630 }, { "epoch": 4.27, "learning_rate": 3e-05, "loss": 0.0672, "step": 640 }, { "epoch": 4.27, "eval_accuracy": 0.6414473684210527, "eval_loss": 1.716863751411438, "eval_runtime": 25.3891, "eval_samples_per_second": 11.974, "eval_steps_per_second": 2.993, "step": 640 }, { "epoch": 4.34, "learning_rate": 3e-05, "loss": 0.0824, "step": 650 }, { "epoch": 4.34, "eval_accuracy": 0.6414473684210527, "eval_loss": 1.662705659866333, "eval_runtime": 25.3803, "eval_samples_per_second": 11.978, "eval_steps_per_second": 2.994, "step": 650 }, { "epoch": 4.41, "learning_rate": 3e-05, "loss": 0.0068, "step": 660 }, { "epoch": 4.41, "eval_accuracy": 0.6348684210526315, "eval_loss": 1.342494010925293, "eval_runtime": 25.376, "eval_samples_per_second": 11.98, "eval_steps_per_second": 2.995, "step": 660 }, { "epoch": 4.47, "learning_rate": 3e-05, "loss": 0.044, "step": 670 }, { "epoch": 4.47, "eval_accuracy": 0.6611842105263158, "eval_loss": 1.2208458185195923, "eval_runtime": 25.3753, "eval_samples_per_second": 11.98, "eval_steps_per_second": 2.995, "step": 670 }, { "epoch": 4.54, "learning_rate": 3e-05, "loss": 0.0378, "step": 680 }, { "epoch": 4.54, "eval_accuracy": 0.6447368421052632, "eval_loss": 1.289115309715271, "eval_runtime": 25.3905, "eval_samples_per_second": 11.973, "eval_steps_per_second": 2.993, "step": 680 }, { "epoch": 4.61, "learning_rate": 3e-05, "loss": 0.0411, "step": 690 }, { "epoch": 4.61, "eval_accuracy": 0.6611842105263158, "eval_loss": 1.3528344631195068, "eval_runtime": 25.3678, "eval_samples_per_second": 11.984, "eval_steps_per_second": 2.996, "step": 690 }, { "epoch": 4.67, "learning_rate": 3e-05, "loss": 0.0215, "step": 700 }, { "epoch": 4.67, "eval_accuracy": 0.6677631578947368, "eval_loss": 1.2606314420700073, "eval_runtime": 25.3852, "eval_samples_per_second": 11.975, "eval_steps_per_second": 2.994, "step": 700 }, { "epoch": 4.74, "learning_rate": 3e-05, "loss": 0.0438, "step": 710 }, { "epoch": 4.74, "eval_accuracy": 0.6546052631578947, "eval_loss": 1.2514981031417847, "eval_runtime": 25.3679, "eval_samples_per_second": 11.984, "eval_steps_per_second": 2.996, "step": 710 }, { "epoch": 4.81, "learning_rate": 3e-05, "loss": 0.0936, "step": 720 }, { "epoch": 4.81, "eval_accuracy": 0.6644736842105263, "eval_loss": 1.0857858657836914, "eval_runtime": 25.3812, "eval_samples_per_second": 11.977, "eval_steps_per_second": 2.994, "step": 720 }, { "epoch": 4.87, "learning_rate": 3e-05, "loss": 0.0305, "step": 730 }, { "epoch": 4.87, "eval_accuracy": 0.6578947368421053, "eval_loss": 0.9838737845420837, "eval_runtime": 25.3673, "eval_samples_per_second": 11.984, "eval_steps_per_second": 2.996, "step": 730 }, { "epoch": 4.94, "learning_rate": 3e-05, "loss": 0.0282, "step": 740 }, { "epoch": 4.94, "eval_accuracy": 0.680921052631579, "eval_loss": 1.0233386754989624, "eval_runtime": 25.3685, "eval_samples_per_second": 11.983, "eval_steps_per_second": 2.996, "step": 740 }, { "epoch": 4.97, "step": 745, "total_flos": 2.959225892752589e+17, "train_loss": 0.25739487704614666, "train_runtime": 4901.8806, "train_samples_per_second": 2.444, "train_steps_per_second": 0.152 } ], "logging_steps": 10, "max_steps": 745, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 150, "total_flos": 2.959225892752589e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }