{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.774057048004438, "global_step": 180000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 4.892535079835384e-05, "loss": 3.2017, "step": 5000 }, { "epoch": 0.02, "eval_accuracy": 0.4179681193308354, "eval_loss": 2.969822883605957, "eval_runtime": 1929.4803, "eval_samples_per_second": 19.496, "eval_steps_per_second": 2.437, "step": 5000 }, { "epoch": 0.04, "learning_rate": 4.785070159670768e-05, "loss": 2.9116, "step": 10000 }, { "epoch": 0.04, "eval_accuracy": 0.4297956163670618, "eval_loss": 2.8731179237365723, "eval_runtime": 1919.4124, "eval_samples_per_second": 19.598, "eval_steps_per_second": 2.45, "step": 10000 }, { "epoch": 0.06, "learning_rate": 4.677605239506152e-05, "loss": 2.8412, "step": 15000 }, { "epoch": 0.06, "eval_accuracy": 0.4368075611910975, "eval_loss": 2.81874942779541, "eval_runtime": 2015.9454, "eval_samples_per_second": 18.66, "eval_steps_per_second": 2.333, "step": 15000 }, { "epoch": 0.09, "learning_rate": 4.5701403193415356e-05, "loss": 2.7937, "step": 20000 }, { "epoch": 0.09, "eval_accuracy": 0.44204520475458375, "eval_loss": 2.778646230697632, "eval_runtime": 1919.08, "eval_samples_per_second": 19.602, "eval_steps_per_second": 2.451, "step": 20000 }, { "epoch": 0.11, "learning_rate": 4.4626753991769196e-05, "loss": 2.7614, "step": 25000 }, { "epoch": 0.11, "eval_accuracy": 0.44610035327770187, "eval_loss": 2.7471842765808105, "eval_runtime": 1919.775, "eval_samples_per_second": 19.594, "eval_steps_per_second": 2.45, "step": 25000 }, { "epoch": 0.13, "learning_rate": 4.355188977427636e-05, "loss": 2.7295, "step": 30000 }, { "epoch": 0.13, "eval_accuracy": 0.4501440419543679, "eval_loss": 2.7196261882781982, "eval_runtime": 1928.9026, "eval_samples_per_second": 19.502, "eval_steps_per_second": 2.438, "step": 30000 }, { "epoch": 0.15, "learning_rate": 4.24772405726302e-05, "loss": 2.7008, "step": 35000 }, { "epoch": 0.15, "eval_accuracy": 0.45296033170252703, "eval_loss": 2.6961302757263184, "eval_runtime": 1926.8386, "eval_samples_per_second": 19.523, "eval_steps_per_second": 2.441, "step": 35000 }, { "epoch": 0.17, "learning_rate": 4.140237635513737e-05, "loss": 2.6827, "step": 40000 }, { "epoch": 0.17, "eval_accuracy": 0.4561378015092748, "eval_loss": 2.6738195419311523, "eval_runtime": 1920.5318, "eval_samples_per_second": 19.587, "eval_steps_per_second": 2.449, "step": 40000 }, { "epoch": 0.19, "learning_rate": 4.032772715349121e-05, "loss": 2.6545, "step": 45000 }, { "epoch": 0.19, "eval_accuracy": 0.4590067649734393, "eval_loss": 2.6545751094818115, "eval_runtime": 1933.7083, "eval_samples_per_second": 19.453, "eval_steps_per_second": 2.432, "step": 45000 }, { "epoch": 0.22, "learning_rate": 3.9252862935998384e-05, "loss": 2.6419, "step": 50000 }, { "epoch": 0.22, "eval_accuracy": 0.4616434651550895, "eval_loss": 2.635937452316284, "eval_runtime": 1931.385, "eval_samples_per_second": 19.477, "eval_steps_per_second": 2.435, "step": 50000 }, { "epoch": 0.24, "learning_rate": 3.8178213734352224e-05, "loss": 2.6208, "step": 55000 }, { "epoch": 0.24, "eval_accuracy": 0.4644954077588773, "eval_loss": 2.617478847503662, "eval_runtime": 1935.6188, "eval_samples_per_second": 19.434, "eval_steps_per_second": 2.43, "step": 55000 }, { "epoch": 0.26, "learning_rate": 3.7103564532706064e-05, "loss": 2.604, "step": 60000 }, { "epoch": 0.26, "eval_accuracy": 0.4666273030036154, "eval_loss": 2.6017534732818604, "eval_runtime": 1934.3532, "eval_samples_per_second": 19.447, "eval_steps_per_second": 2.431, "step": 60000 }, { "epoch": 0.28, "learning_rate": 3.60289153310599e-05, "loss": 2.5883, "step": 65000 }, { "epoch": 0.28, "eval_accuracy": 0.4690363654190064, "eval_loss": 2.5858962535858154, "eval_runtime": 1937.9051, "eval_samples_per_second": 19.411, "eval_steps_per_second": 2.427, "step": 65000 }, { "epoch": 0.3, "learning_rate": 3.495426612941374e-05, "loss": 2.5733, "step": 70000 }, { "epoch": 0.3, "eval_accuracy": 0.47110511457104925, "eval_loss": 2.5711781978607178, "eval_runtime": 1929.1817, "eval_samples_per_second": 19.499, "eval_steps_per_second": 2.438, "step": 70000 }, { "epoch": 0.32, "learning_rate": 3.387940191192091e-05, "loss": 2.5603, "step": 75000 }, { "epoch": 0.32, "eval_accuracy": 0.4735584312234197, "eval_loss": 2.5554990768432617, "eval_runtime": 1924.3705, "eval_samples_per_second": 19.548, "eval_steps_per_second": 2.444, "step": 75000 }, { "epoch": 0.34, "learning_rate": 3.280475271027475e-05, "loss": 2.5462, "step": 80000 }, { "epoch": 0.34, "eval_accuracy": 0.4755388538038284, "eval_loss": 2.541663646697998, "eval_runtime": 1932.1982, "eval_samples_per_second": 19.469, "eval_steps_per_second": 2.434, "step": 80000 }, { "epoch": 0.37, "learning_rate": 3.173010350862859e-05, "loss": 2.5267, "step": 85000 }, { "epoch": 0.37, "eval_accuracy": 0.47766700705788817, "eval_loss": 2.528374433517456, "eval_runtime": 1933.9592, "eval_samples_per_second": 19.451, "eval_steps_per_second": 2.432, "step": 85000 }, { "epoch": 0.39, "learning_rate": 3.0655454306982425e-05, "loss": 2.5153, "step": 90000 }, { "epoch": 0.39, "eval_accuracy": 0.47989050311610376, "eval_loss": 2.514660596847534, "eval_runtime": 1929.164, "eval_samples_per_second": 19.499, "eval_steps_per_second": 2.438, "step": 90000 }, { "epoch": 0.41, "learning_rate": 2.9580805105336262e-05, "loss": 2.5028, "step": 95000 }, { "epoch": 0.41, "eval_accuracy": 0.481903668114947, "eval_loss": 2.501615285873413, "eval_runtime": 1929.1634, "eval_samples_per_second": 19.499, "eval_steps_per_second": 2.438, "step": 95000 }, { "epoch": 0.43, "learning_rate": 2.8505940887843435e-05, "loss": 2.4881, "step": 100000 }, { "epoch": 0.43, "eval_accuracy": 0.4839338799602133, "eval_loss": 2.488723039627075, "eval_runtime": 1930.4001, "eval_samples_per_second": 19.487, "eval_steps_per_second": 2.436, "step": 100000 }, { "epoch": 0.45, "learning_rate": 2.7431291686197276e-05, "loss": 2.4758, "step": 105000 }, { "epoch": 0.45, "eval_accuracy": 0.48596689829848827, "eval_loss": 2.475806951522827, "eval_runtime": 1923.9804, "eval_samples_per_second": 19.552, "eval_steps_per_second": 2.444, "step": 105000 }, { "epoch": 0.47, "learning_rate": 2.6356642484551112e-05, "loss": 2.4632, "step": 110000 }, { "epoch": 0.47, "eval_accuracy": 0.4880078683669545, "eval_loss": 2.463679552078247, "eval_runtime": 1943.6457, "eval_samples_per_second": 19.354, "eval_steps_per_second": 2.42, "step": 110000 }, { "epoch": 0.49, "learning_rate": 2.5281993282904953e-05, "loss": 2.4543, "step": 115000 }, { "epoch": 0.49, "eval_accuracy": 0.4898118976645586, "eval_loss": 2.4515490531921387, "eval_runtime": 1940.3072, "eval_samples_per_second": 19.387, "eval_steps_per_second": 2.424, "step": 115000 }, { "epoch": 0.52, "learning_rate": 2.4206914049565453e-05, "loss": 2.4416, "step": 120000 }, { "epoch": 0.52, "eval_accuracy": 0.49199028194626443, "eval_loss": 2.4388370513916016, "eval_runtime": 1928.5903, "eval_samples_per_second": 19.505, "eval_steps_per_second": 2.439, "step": 120000 }, { "epoch": 0.54, "learning_rate": 2.313247986376596e-05, "loss": 2.4256, "step": 125000 }, { "epoch": 0.54, "eval_accuracy": 0.49386564294117247, "eval_loss": 2.4272964000701904, "eval_runtime": 1927.1096, "eval_samples_per_second": 19.52, "eval_steps_per_second": 2.44, "step": 125000 }, { "epoch": 0.56, "learning_rate": 2.20578306621198e-05, "loss": 2.4166, "step": 130000 }, { "epoch": 0.56, "eval_accuracy": 0.4959010260096677, "eval_loss": 2.4158496856689453, "eval_runtime": 1932.6805, "eval_samples_per_second": 19.464, "eval_steps_per_second": 2.433, "step": 130000 }, { "epoch": 0.58, "learning_rate": 2.0982751428780303e-05, "loss": 2.4052, "step": 135000 }, { "epoch": 0.58, "eval_accuracy": 0.4977953568184306, "eval_loss": 2.4038643836975098, "eval_runtime": 1925.4197, "eval_samples_per_second": 19.537, "eval_steps_per_second": 2.443, "step": 135000 }, { "epoch": 0.6, "learning_rate": 1.990831724298081e-05, "loss": 2.3936, "step": 140000 }, { "epoch": 0.6, "eval_accuracy": 0.49973316228278164, "eval_loss": 2.392191171646118, "eval_runtime": 1928.0691, "eval_samples_per_second": 19.51, "eval_steps_per_second": 2.439, "step": 140000 }, { "epoch": 0.62, "learning_rate": 1.8833668041334647e-05, "loss": 2.379, "step": 145000 }, { "epoch": 0.62, "eval_accuracy": 0.5019234221876816, "eval_loss": 2.38037371635437, "eval_runtime": 1923.4067, "eval_samples_per_second": 19.557, "eval_steps_per_second": 2.445, "step": 145000 }, { "epoch": 0.65, "learning_rate": 1.7759018839688484e-05, "loss": 2.3659, "step": 150000 }, { "epoch": 0.65, "eval_accuracy": 0.5037539832386363, "eval_loss": 2.3692569732666016, "eval_runtime": 1925.9734, "eval_samples_per_second": 19.531, "eval_steps_per_second": 2.442, "step": 150000 }, { "epoch": 0.67, "learning_rate": 1.6684154622195657e-05, "loss": 2.3588, "step": 155000 }, { "epoch": 0.67, "eval_accuracy": 0.5057738006653519, "eval_loss": 2.357806444168091, "eval_runtime": 1927.4777, "eval_samples_per_second": 19.516, "eval_steps_per_second": 2.44, "step": 155000 }, { "epoch": 0.69, "learning_rate": 1.5609505420549494e-05, "loss": 2.3529, "step": 160000 }, { "epoch": 0.69, "eval_accuracy": 0.5079495603563737, "eval_loss": 2.3464465141296387, "eval_runtime": 1926.6683, "eval_samples_per_second": 19.524, "eval_steps_per_second": 2.441, "step": 160000 }, { "epoch": 0.71, "learning_rate": 1.4535071234750002e-05, "loss": 2.3338, "step": 165000 }, { "epoch": 0.71, "eval_accuracy": 0.5098386939558613, "eval_loss": 2.335519313812256, "eval_runtime": 1925.6719, "eval_samples_per_second": 19.534, "eval_steps_per_second": 2.442, "step": 165000 }, { "epoch": 0.73, "learning_rate": 1.3460422033103839e-05, "loss": 2.3238, "step": 170000 }, { "epoch": 0.73, "eval_accuracy": 0.5118405290384843, "eval_loss": 2.3250160217285156, "eval_runtime": 1925.0126, "eval_samples_per_second": 19.541, "eval_steps_per_second": 2.443, "step": 170000 }, { "epoch": 0.75, "learning_rate": 1.2385557815611011e-05, "loss": 2.3139, "step": 175000 }, { "epoch": 0.75, "eval_accuracy": 0.5137200997729053, "eval_loss": 2.314518451690674, "eval_runtime": 1927.2965, "eval_samples_per_second": 19.518, "eval_steps_per_second": 2.44, "step": 175000 }, { "epoch": 0.77, "learning_rate": 1.1311123629811518e-05, "loss": 2.3035, "step": 180000 }, { "epoch": 0.77, "eval_accuracy": 0.5153991621734844, "eval_loss": 2.3050577640533447, "eval_runtime": 1927.482, "eval_samples_per_second": 19.516, "eval_steps_per_second": 2.44, "step": 180000 } ], "max_steps": 232541, "num_train_epochs": 1, "total_flos": 2.138588502294528e+19, "trial_name": null, "trial_params": null }