{ "best_metric": 0.561149001121521, "best_model_checkpoint": "/content/drive/MyDrive/c4_200m/weights/checkpoint-5000", "epoch": 0.969681298080031, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 1.8072148952676496e-05, "loss": 0.763, "step": 500 }, { "epoch": 0.1, "eval_gen_len": 17.3073, "eval_loss": 0.6263013482093811, "eval_rouge1": 71.3829, "eval_rouge2": 60.9575, "eval_rougeL": 70.617, "eval_rougeLsum": 70.6508, "eval_runtime": 4434.8322, "eval_samples_per_second": 12.402, "eval_steps_per_second": 0.775, "step": 500 }, { "epoch": 0.19, "learning_rate": 1.6132660977501943e-05, "loss": 0.6682, "step": 1000 }, { "epoch": 0.19, "eval_gen_len": 17.2827, "eval_loss": 0.5980147123336792, "eval_rouge1": 71.6839, "eval_rouge2": 61.5115, "eval_rougeL": 70.9369, "eval_rougeLsum": 70.9762, "eval_runtime": 4421.7138, "eval_samples_per_second": 12.439, "eval_steps_per_second": 0.778, "step": 1000 }, { "epoch": 0.29, "learning_rate": 1.4193173002327387e-05, "loss": 0.6547, "step": 1500 }, { "epoch": 0.29, "eval_gen_len": 17.2674, "eval_loss": 0.5852676630020142, "eval_rouge1": 71.8716, "eval_rouge2": 61.8264, "eval_rougeL": 71.1345, "eval_rougeLsum": 71.1741, "eval_runtime": 4432.6842, "eval_samples_per_second": 12.408, "eval_steps_per_second": 0.776, "step": 1500 }, { "epoch": 0.39, "learning_rate": 1.2257564003103182e-05, "loss": 0.6423, "step": 2000 }, { "epoch": 0.39, "eval_gen_len": 17.2636, "eval_loss": 0.5758475065231323, "eval_rouge1": 71.9319, "eval_rouge2": 61.9431, "eval_rougeL": 71.1965, "eval_rougeLsum": 71.2364, "eval_runtime": 4426.8507, "eval_samples_per_second": 12.424, "eval_steps_per_second": 0.777, "step": 2000 }, { "epoch": 0.48, "learning_rate": 1.0318076027928628e-05, "loss": 0.6319, "step": 2500 }, { "epoch": 0.48, "eval_gen_len": 17.2555, "eval_loss": 0.5706557631492615, "eval_rouge1": 72.004, "eval_rouge2": 62.0721, "eval_rougeL": 71.2739, "eval_rougeLsum": 71.3176, "eval_runtime": 4455.9486, "eval_samples_per_second": 12.343, "eval_steps_per_second": 0.772, "step": 2500 }, { "epoch": 0.58, "learning_rate": 8.378588052754074e-06, "loss": 0.6239, "step": 3000 }, { "epoch": 0.58, "eval_gen_len": 17.251, "eval_loss": 0.5678849220275879, "eval_rouge1": 72.0655, "eval_rouge2": 62.1749, "eval_rougeL": 71.3432, "eval_rougeLsum": 71.3854, "eval_runtime": 4483.3834, "eval_samples_per_second": 12.268, "eval_steps_per_second": 0.767, "step": 3000 }, { "epoch": 0.68, "learning_rate": 6.43910007757952e-06, "loss": 0.619, "step": 3500 }, { "epoch": 0.68, "eval_gen_len": 17.25, "eval_loss": 0.5634791851043701, "eval_rouge1": 72.0893, "eval_rouge2": 62.2244, "eval_rougeL": 71.3669, "eval_rougeLsum": 71.4087, "eval_runtime": 4466.0069, "eval_samples_per_second": 12.315, "eval_steps_per_second": 0.77, "step": 3500 }, { "epoch": 0.78, "learning_rate": 4.499612102404966e-06, "loss": 0.6248, "step": 4000 }, { "epoch": 0.78, "eval_gen_len": 17.246, "eval_loss": 0.5618749260902405, "eval_rouge1": 72.1096, "eval_rouge2": 62.261, "eval_rougeL": 71.3877, "eval_rougeLsum": 71.4304, "eval_runtime": 4436.9366, "eval_samples_per_second": 12.396, "eval_steps_per_second": 0.775, "step": 4000 }, { "epoch": 0.87, "learning_rate": 2.560124127230411e-06, "loss": 0.6159, "step": 4500 }, { "epoch": 0.87, "eval_gen_len": 17.2457, "eval_loss": 0.5612673163414001, "eval_rouge1": 72.1232, "eval_rouge2": 62.2834, "eval_rougeL": 71.4006, "eval_rougeLsum": 71.4433, "eval_runtime": 4436.0596, "eval_samples_per_second": 12.398, "eval_steps_per_second": 0.775, "step": 4500 }, { "epoch": 0.97, "learning_rate": 6.206361520558573e-07, "loss": 0.6118, "step": 5000 }, { "epoch": 0.97, "eval_gen_len": 17.2461, "eval_loss": 0.561149001121521, "eval_rouge1": 72.1272, "eval_rouge2": 62.2947, "eval_rougeL": 71.4073, "eval_rougeLsum": 71.4508, "eval_runtime": 4447.3157, "eval_samples_per_second": 12.367, "eval_steps_per_second": 0.773, "step": 5000 } ], "max_steps": 5156, "num_train_epochs": 1, "total_flos": 7.061289205579776e+16, "trial_name": null, "trial_params": null }