|
{ |
|
"best_metric": 0.561149001121521, |
|
"best_model_checkpoint": "/content/drive/MyDrive/c4_200m/weights/checkpoint-5000", |
|
"epoch": 0.969681298080031, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8072148952676496e-05, |
|
"loss": 0.763, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_gen_len": 17.3073, |
|
"eval_loss": 0.6263013482093811, |
|
"eval_rouge1": 71.3829, |
|
"eval_rouge2": 60.9575, |
|
"eval_rougeL": 70.617, |
|
"eval_rougeLsum": 70.6508, |
|
"eval_runtime": 4434.8322, |
|
"eval_samples_per_second": 12.402, |
|
"eval_steps_per_second": 0.775, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.6132660977501943e-05, |
|
"loss": 0.6682, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_gen_len": 17.2827, |
|
"eval_loss": 0.5980147123336792, |
|
"eval_rouge1": 71.6839, |
|
"eval_rouge2": 61.5115, |
|
"eval_rougeL": 70.9369, |
|
"eval_rougeLsum": 70.9762, |
|
"eval_runtime": 4421.7138, |
|
"eval_samples_per_second": 12.439, |
|
"eval_steps_per_second": 0.778, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4193173002327387e-05, |
|
"loss": 0.6547, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_gen_len": 17.2674, |
|
"eval_loss": 0.5852676630020142, |
|
"eval_rouge1": 71.8716, |
|
"eval_rouge2": 61.8264, |
|
"eval_rougeL": 71.1345, |
|
"eval_rougeLsum": 71.1741, |
|
"eval_runtime": 4432.6842, |
|
"eval_samples_per_second": 12.408, |
|
"eval_steps_per_second": 0.776, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.2257564003103182e-05, |
|
"loss": 0.6423, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_gen_len": 17.2636, |
|
"eval_loss": 0.5758475065231323, |
|
"eval_rouge1": 71.9319, |
|
"eval_rouge2": 61.9431, |
|
"eval_rougeL": 71.1965, |
|
"eval_rougeLsum": 71.2364, |
|
"eval_runtime": 4426.8507, |
|
"eval_samples_per_second": 12.424, |
|
"eval_steps_per_second": 0.777, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0318076027928628e-05, |
|
"loss": 0.6319, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_gen_len": 17.2555, |
|
"eval_loss": 0.5706557631492615, |
|
"eval_rouge1": 72.004, |
|
"eval_rouge2": 62.0721, |
|
"eval_rougeL": 71.2739, |
|
"eval_rougeLsum": 71.3176, |
|
"eval_runtime": 4455.9486, |
|
"eval_samples_per_second": 12.343, |
|
"eval_steps_per_second": 0.772, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.378588052754074e-06, |
|
"loss": 0.6239, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_gen_len": 17.251, |
|
"eval_loss": 0.5678849220275879, |
|
"eval_rouge1": 72.0655, |
|
"eval_rouge2": 62.1749, |
|
"eval_rougeL": 71.3432, |
|
"eval_rougeLsum": 71.3854, |
|
"eval_runtime": 4483.3834, |
|
"eval_samples_per_second": 12.268, |
|
"eval_steps_per_second": 0.767, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.43910007757952e-06, |
|
"loss": 0.619, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_gen_len": 17.25, |
|
"eval_loss": 0.5634791851043701, |
|
"eval_rouge1": 72.0893, |
|
"eval_rouge2": 62.2244, |
|
"eval_rougeL": 71.3669, |
|
"eval_rougeLsum": 71.4087, |
|
"eval_runtime": 4466.0069, |
|
"eval_samples_per_second": 12.315, |
|
"eval_steps_per_second": 0.77, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.499612102404966e-06, |
|
"loss": 0.6248, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_gen_len": 17.246, |
|
"eval_loss": 0.5618749260902405, |
|
"eval_rouge1": 72.1096, |
|
"eval_rouge2": 62.261, |
|
"eval_rougeL": 71.3877, |
|
"eval_rougeLsum": 71.4304, |
|
"eval_runtime": 4436.9366, |
|
"eval_samples_per_second": 12.396, |
|
"eval_steps_per_second": 0.775, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.560124127230411e-06, |
|
"loss": 0.6159, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_gen_len": 17.2457, |
|
"eval_loss": 0.5612673163414001, |
|
"eval_rouge1": 72.1232, |
|
"eval_rouge2": 62.2834, |
|
"eval_rougeL": 71.4006, |
|
"eval_rougeLsum": 71.4433, |
|
"eval_runtime": 4436.0596, |
|
"eval_samples_per_second": 12.398, |
|
"eval_steps_per_second": 0.775, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.206361520558573e-07, |
|
"loss": 0.6118, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_gen_len": 17.2461, |
|
"eval_loss": 0.561149001121521, |
|
"eval_rouge1": 72.1272, |
|
"eval_rouge2": 62.2947, |
|
"eval_rougeL": 71.4073, |
|
"eval_rougeLsum": 71.4508, |
|
"eval_runtime": 4447.3157, |
|
"eval_samples_per_second": 12.367, |
|
"eval_steps_per_second": 0.773, |
|
"step": 5000 |
|
} |
|
], |
|
"max_steps": 5156, |
|
"num_train_epochs": 1, |
|
"total_flos": 7.061289205579776e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|