{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.996712689020382, "eval_steps": 500, "global_step": 1900, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "high_lr": 0.001, "low_lr": 2e-05, "step": 0 }, { "epoch": 0, "high_lr": 0.001, "low_lr": 2e-05, "step": 0 }, { "epoch": 0, "high_lr": 0.001, "low_lr": 2e-05, "step": 0 }, { "epoch": 0, "high_lr": 0.001, "low_lr": 2e-05, "step": 0 }, { "epoch": 0, "high_lr": 0.001, "low_lr": 2e-05, "step": 0 }, { "epoch": 0, "high_lr": 0.001, "low_lr": 2e-05, "step": 0 }, { "epoch": 0, "high_lr": 0.001, "low_lr": 2e-05, "step": 0 }, { "epoch": 0, "high_lr": 0.001, "low_lr": 2e-05, "step": 0 }, { "epoch": 0.0026298487836949377, "grad_norm": 2.205960988998413, "learning_rate": 0.0009994736842105264, "loss": 2.5661, "step": 1 }, { "epoch": 0.0026298487836949377, "high_lr": 0.0009994736842105264, "low_lr": 1.9989473684210526e-05, "step": 1 }, { "epoch": 0.0026298487836949377, "high_lr": 0.0009994736842105264, "low_lr": 1.9989473684210526e-05, "step": 1 }, { "epoch": 0.0026298487836949377, "high_lr": 0.0009994736842105264, "low_lr": 1.9989473684210526e-05, "step": 1 }, { "epoch": 0.0026298487836949377, "high_lr": 0.0009994736842105264, "low_lr": 1.9989473684210526e-05, "step": 1 }, { "epoch": 0.0026298487836949377, "high_lr": 0.0009994736842105264, "low_lr": 1.9989473684210526e-05, "step": 1 }, { "epoch": 0.0026298487836949377, "high_lr": 0.0009994736842105264, "low_lr": 1.9989473684210526e-05, "step": 1 }, { "epoch": 0.0026298487836949377, "high_lr": 0.0009994736842105264, "low_lr": 1.9989473684210526e-05, "step": 1 }, { "epoch": 0.0026298487836949377, "high_lr": 0.0009994736842105264, "low_lr": 1.9989473684210526e-05, "step": 1 }, { "epoch": 0.005259697567389875, "grad_norm": 1.3347679376602173, "learning_rate": 0.0009989473684210526, "loss": 2.5835, "step": 2 }, { "epoch": 0.005259697567389875, "high_lr": 0.0009989473684210526, "low_lr": 1.9978947368421054e-05, "step": 2 }, { "epoch": 0.005259697567389875, "high_lr": 0.0009989473684210526, "low_lr": 1.9978947368421054e-05, "step": 2 }, { "epoch": 0.005259697567389875, "high_lr": 0.0009989473684210526, "low_lr": 1.9978947368421054e-05, "step": 2 }, { "epoch": 0.005259697567389875, "high_lr": 0.0009989473684210526, "low_lr": 1.9978947368421054e-05, "step": 2 }, { "epoch": 0.005259697567389875, "high_lr": 0.0009989473684210526, "low_lr": 1.9978947368421054e-05, "step": 2 }, { "epoch": 0.005259697567389875, "high_lr": 0.0009989473684210526, "low_lr": 1.9978947368421054e-05, "step": 2 }, { "epoch": 0.005259697567389875, "high_lr": 0.0009989473684210526, "low_lr": 1.9978947368421054e-05, "step": 2 }, { "epoch": 0.005259697567389875, "high_lr": 0.0009989473684210526, "low_lr": 1.9978947368421054e-05, "step": 2 }, { "epoch": 0.007889546351084813, "grad_norm": 1.0948487520217896, "learning_rate": 0.000998421052631579, "loss": 2.5211, "step": 3 }, { "epoch": 0.007889546351084813, "high_lr": 0.000998421052631579, "low_lr": 1.9968421052631582e-05, "step": 3 }, { "epoch": 0.007889546351084813, "high_lr": 0.000998421052631579, "low_lr": 1.9968421052631582e-05, "step": 3 }, { "epoch": 0.007889546351084813, "high_lr": 0.000998421052631579, "low_lr": 1.9968421052631582e-05, "step": 3 }, { "epoch": 0.007889546351084813, "high_lr": 0.000998421052631579, "low_lr": 1.9968421052631582e-05, "step": 3 }, { "epoch": 0.007889546351084813, "high_lr": 0.000998421052631579, "low_lr": 1.9968421052631582e-05, "step": 3 }, { "epoch": 0.007889546351084813, "high_lr": 0.000998421052631579, "low_lr": 1.9968421052631582e-05, "step": 3 }, { "epoch": 0.007889546351084813, "high_lr": 0.000998421052631579, "low_lr": 1.9968421052631582e-05, "step": 3 }, { "epoch": 0.007889546351084813, "high_lr": 0.000998421052631579, "low_lr": 1.9968421052631582e-05, "step": 3 }, { "epoch": 0.01051939513477975, "grad_norm": 0.8912683725357056, "learning_rate": 0.0009978947368421054, "loss": 2.4531, "step": 4 }, { "epoch": 0.01051939513477975, "high_lr": 0.0009978947368421054, "low_lr": 1.9957894736842107e-05, "step": 4 }, { "epoch": 0.01051939513477975, "high_lr": 0.0009978947368421054, "low_lr": 1.9957894736842107e-05, "step": 4 }, { "epoch": 0.01051939513477975, "high_lr": 0.0009978947368421054, "low_lr": 1.9957894736842107e-05, "step": 4 }, { "epoch": 0.01051939513477975, "high_lr": 0.0009978947368421054, "low_lr": 1.9957894736842107e-05, "step": 4 }, { "epoch": 0.01051939513477975, "high_lr": 0.0009978947368421054, "low_lr": 1.9957894736842107e-05, "step": 4 }, { "epoch": 0.01051939513477975, "high_lr": 0.0009978947368421054, "low_lr": 1.9957894736842107e-05, "step": 4 }, { "epoch": 0.01051939513477975, "high_lr": 0.0009978947368421054, "low_lr": 1.9957894736842107e-05, "step": 4 }, { "epoch": 0.01051939513477975, "high_lr": 0.0009978947368421054, "low_lr": 1.9957894736842107e-05, "step": 4 }, { "epoch": 0.013149243918474688, "grad_norm": 0.7557066082954407, "learning_rate": 0.0009973684210526316, "loss": 2.3694, "step": 5 }, { "epoch": 0.013149243918474688, "high_lr": 0.0009973684210526316, "low_lr": 1.994736842105263e-05, "step": 5 }, { "epoch": 0.013149243918474688, "high_lr": 0.0009973684210526316, "low_lr": 1.994736842105263e-05, "step": 5 }, { "epoch": 0.013149243918474688, "high_lr": 0.0009973684210526316, "low_lr": 1.994736842105263e-05, "step": 5 }, { "epoch": 0.013149243918474688, "high_lr": 0.0009973684210526316, "low_lr": 1.994736842105263e-05, "step": 5 }, { "epoch": 0.013149243918474688, "high_lr": 0.0009973684210526316, "low_lr": 1.994736842105263e-05, "step": 5 }, { "epoch": 0.013149243918474688, "high_lr": 0.0009973684210526316, "low_lr": 1.994736842105263e-05, "step": 5 }, { "epoch": 0.013149243918474688, "high_lr": 0.0009973684210526316, "low_lr": 1.994736842105263e-05, "step": 5 }, { "epoch": 0.013149243918474688, "high_lr": 0.0009973684210526316, "low_lr": 1.994736842105263e-05, "step": 5 }, { "epoch": 0.015779092702169626, "grad_norm": 0.6246759295463562, "learning_rate": 0.000996842105263158, "loss": 2.3495, "step": 6 }, { "epoch": 0.015779092702169626, "high_lr": 0.000996842105263158, "low_lr": 1.993684210526316e-05, "step": 6 }, { "epoch": 0.015779092702169626, "high_lr": 0.000996842105263158, "low_lr": 1.993684210526316e-05, "step": 6 }, { "epoch": 0.015779092702169626, "high_lr": 0.000996842105263158, "low_lr": 1.993684210526316e-05, "step": 6 }, { "epoch": 0.015779092702169626, "high_lr": 0.000996842105263158, "low_lr": 1.993684210526316e-05, "step": 6 }, { "epoch": 0.015779092702169626, "high_lr": 0.000996842105263158, "low_lr": 1.993684210526316e-05, "step": 6 }, { "epoch": 0.015779092702169626, "high_lr": 0.000996842105263158, "low_lr": 1.993684210526316e-05, "step": 6 }, { "epoch": 0.015779092702169626, "high_lr": 0.000996842105263158, "low_lr": 1.993684210526316e-05, "step": 6 }, { "epoch": 0.015779092702169626, "high_lr": 0.000996842105263158, "low_lr": 1.993684210526316e-05, "step": 6 }, { "epoch": 0.018408941485864562, "grad_norm": 0.5611429214477539, "learning_rate": 0.0009963157894736843, "loss": 2.368, "step": 7 }, { "epoch": 0.018408941485864562, "high_lr": 0.0009963157894736843, "low_lr": 1.9926315789473688e-05, "step": 7 }, { "epoch": 0.018408941485864562, "high_lr": 0.0009963157894736843, "low_lr": 1.9926315789473688e-05, "step": 7 }, { "epoch": 0.018408941485864562, "high_lr": 0.0009963157894736843, "low_lr": 1.9926315789473688e-05, "step": 7 }, { "epoch": 0.018408941485864562, "high_lr": 0.0009963157894736843, "low_lr": 1.9926315789473688e-05, "step": 7 }, { "epoch": 0.018408941485864562, "high_lr": 0.0009963157894736843, "low_lr": 1.9926315789473688e-05, "step": 7 }, { "epoch": 0.018408941485864562, "high_lr": 0.0009963157894736843, "low_lr": 1.9926315789473688e-05, "step": 7 }, { "epoch": 0.018408941485864562, "high_lr": 0.0009963157894736843, "low_lr": 1.9926315789473688e-05, "step": 7 }, { "epoch": 0.018408941485864562, "high_lr": 0.0009963157894736843, "low_lr": 1.9926315789473688e-05, "step": 7 }, { "epoch": 0.0210387902695595, "grad_norm": 0.5358776450157166, "learning_rate": 0.0009957894736842105, "loss": 2.326, "step": 8 }, { "epoch": 0.0210387902695595, "high_lr": 0.0009957894736842105, "low_lr": 1.9915789473684212e-05, "step": 8 }, { "epoch": 0.0210387902695595, "high_lr": 0.0009957894736842105, "low_lr": 1.9915789473684212e-05, "step": 8 }, { "epoch": 0.0210387902695595, "high_lr": 0.0009957894736842105, "low_lr": 1.9915789473684212e-05, "step": 8 }, { "epoch": 0.0210387902695595, "high_lr": 0.0009957894736842105, "low_lr": 1.9915789473684212e-05, "step": 8 }, { "epoch": 0.0210387902695595, "high_lr": 0.0009957894736842105, "low_lr": 1.9915789473684212e-05, "step": 8 }, { "epoch": 0.0210387902695595, "high_lr": 0.0009957894736842105, "low_lr": 1.9915789473684212e-05, "step": 8 }, { "epoch": 0.0210387902695595, "high_lr": 0.0009957894736842105, "low_lr": 1.9915789473684212e-05, "step": 8 }, { "epoch": 0.0210387902695595, "high_lr": 0.0009957894736842105, "low_lr": 1.9915789473684212e-05, "step": 8 }, { "epoch": 0.023668639053254437, "grad_norm": 0.46877679228782654, "learning_rate": 0.000995263157894737, "loss": 2.2725, "step": 9 }, { "epoch": 0.023668639053254437, "high_lr": 0.000995263157894737, "low_lr": 1.990526315789474e-05, "step": 9 }, { "epoch": 0.023668639053254437, "high_lr": 0.000995263157894737, "low_lr": 1.990526315789474e-05, "step": 9 }, { "epoch": 0.023668639053254437, "high_lr": 0.000995263157894737, "low_lr": 1.990526315789474e-05, "step": 9 }, { "epoch": 0.023668639053254437, "high_lr": 0.000995263157894737, "low_lr": 1.990526315789474e-05, "step": 9 }, { "epoch": 0.023668639053254437, "high_lr": 0.000995263157894737, "low_lr": 1.990526315789474e-05, "step": 9 }, { "epoch": 0.023668639053254437, "high_lr": 0.000995263157894737, "low_lr": 1.990526315789474e-05, "step": 9 }, { "epoch": 0.023668639053254437, "high_lr": 0.000995263157894737, "low_lr": 1.990526315789474e-05, "step": 9 }, { "epoch": 0.023668639053254437, "high_lr": 0.000995263157894737, "low_lr": 1.990526315789474e-05, "step": 9 }, { "epoch": 0.026298487836949377, "grad_norm": 0.4279167056083679, "learning_rate": 0.000994736842105263, "loss": 2.3037, "step": 10 }, { "epoch": 0.026298487836949377, "high_lr": 0.000994736842105263, "low_lr": 1.9894736842105265e-05, "step": 10 }, { "epoch": 0.026298487836949377, "high_lr": 0.000994736842105263, "low_lr": 1.9894736842105265e-05, "step": 10 }, { "epoch": 0.026298487836949377, "high_lr": 0.000994736842105263, "low_lr": 1.9894736842105265e-05, "step": 10 }, { "epoch": 0.026298487836949377, "high_lr": 0.000994736842105263, "low_lr": 1.9894736842105265e-05, "step": 10 }, { "epoch": 0.026298487836949377, "high_lr": 0.000994736842105263, "low_lr": 1.9894736842105265e-05, "step": 10 }, { "epoch": 0.026298487836949377, "high_lr": 0.000994736842105263, "low_lr": 1.9894736842105265e-05, "step": 10 }, { "epoch": 0.026298487836949377, "high_lr": 0.000994736842105263, "low_lr": 1.9894736842105265e-05, "step": 10 }, { "epoch": 0.026298487836949377, "high_lr": 0.000994736842105263, "low_lr": 1.9894736842105265e-05, "step": 10 }, { "epoch": 0.028928336620644313, "grad_norm": 0.4076891541481018, "learning_rate": 0.0009942105263157895, "loss": 2.2379, "step": 11 }, { "epoch": 0.028928336620644313, "high_lr": 0.0009942105263157895, "low_lr": 1.988421052631579e-05, "step": 11 }, { "epoch": 0.028928336620644313, "high_lr": 0.0009942105263157895, "low_lr": 1.988421052631579e-05, "step": 11 }, { "epoch": 0.028928336620644313, "high_lr": 0.0009942105263157895, "low_lr": 1.988421052631579e-05, "step": 11 }, { "epoch": 0.028928336620644313, "high_lr": 0.0009942105263157895, "low_lr": 1.988421052631579e-05, "step": 11 }, { "epoch": 0.028928336620644313, "high_lr": 0.0009942105263157895, "low_lr": 1.988421052631579e-05, "step": 11 }, { "epoch": 0.028928336620644313, "high_lr": 0.0009942105263157895, "low_lr": 1.988421052631579e-05, "step": 11 }, { "epoch": 0.028928336620644313, "high_lr": 0.0009942105263157895, "low_lr": 1.988421052631579e-05, "step": 11 }, { "epoch": 0.028928336620644313, "high_lr": 0.0009942105263157895, "low_lr": 1.988421052631579e-05, "step": 11 }, { "epoch": 0.03155818540433925, "grad_norm": 0.4081032872200012, "learning_rate": 0.0009936842105263159, "loss": 2.1953, "step": 12 }, { "epoch": 0.03155818540433925, "high_lr": 0.0009936842105263159, "low_lr": 1.9873684210526318e-05, "step": 12 }, { "epoch": 0.03155818540433925, "high_lr": 0.0009936842105263159, "low_lr": 1.9873684210526318e-05, "step": 12 }, { "epoch": 0.03155818540433925, "high_lr": 0.0009936842105263159, "low_lr": 1.9873684210526318e-05, "step": 12 }, { "epoch": 0.03155818540433925, "high_lr": 0.0009936842105263159, "low_lr": 1.9873684210526318e-05, "step": 12 }, { "epoch": 0.03155818540433925, "high_lr": 0.0009936842105263159, "low_lr": 1.9873684210526318e-05, "step": 12 }, { "epoch": 0.03155818540433925, "high_lr": 0.0009936842105263159, "low_lr": 1.9873684210526318e-05, "step": 12 }, { "epoch": 0.03155818540433925, "high_lr": 0.0009936842105263159, "low_lr": 1.9873684210526318e-05, "step": 12 }, { "epoch": 0.03155818540433925, "high_lr": 0.0009936842105263159, "low_lr": 1.9873684210526318e-05, "step": 12 }, { "epoch": 0.03418803418803419, "grad_norm": 0.5867348909378052, "learning_rate": 0.000993157894736842, "loss": 2.2433, "step": 13 }, { "epoch": 0.03418803418803419, "high_lr": 0.000993157894736842, "low_lr": 1.9863157894736846e-05, "step": 13 }, { "epoch": 0.03418803418803419, "high_lr": 0.000993157894736842, "low_lr": 1.9863157894736846e-05, "step": 13 }, { "epoch": 0.03418803418803419, "high_lr": 0.000993157894736842, "low_lr": 1.9863157894736846e-05, "step": 13 }, { "epoch": 0.03418803418803419, "high_lr": 0.000993157894736842, "low_lr": 1.9863157894736846e-05, "step": 13 }, { "epoch": 0.03418803418803419, "high_lr": 0.000993157894736842, "low_lr": 1.9863157894736846e-05, "step": 13 }, { "epoch": 0.03418803418803419, "high_lr": 0.000993157894736842, "low_lr": 1.9863157894736846e-05, "step": 13 }, { "epoch": 0.03418803418803419, "high_lr": 0.000993157894736842, "low_lr": 1.9863157894736846e-05, "step": 13 }, { "epoch": 0.03418803418803419, "high_lr": 0.000993157894736842, "low_lr": 1.9863157894736846e-05, "step": 13 }, { "epoch": 0.036817882971729124, "grad_norm": 0.453240305185318, "learning_rate": 0.0009926315789473685, "loss": 2.1978, "step": 14 }, { "epoch": 0.036817882971729124, "high_lr": 0.0009926315789473685, "low_lr": 1.985263157894737e-05, "step": 14 }, { "epoch": 0.036817882971729124, "high_lr": 0.0009926315789473685, "low_lr": 1.985263157894737e-05, "step": 14 }, { "epoch": 0.036817882971729124, "high_lr": 0.0009926315789473685, "low_lr": 1.985263157894737e-05, "step": 14 }, { "epoch": 0.036817882971729124, "high_lr": 0.0009926315789473685, "low_lr": 1.985263157894737e-05, "step": 14 }, { "epoch": 0.036817882971729124, "high_lr": 0.0009926315789473685, "low_lr": 1.985263157894737e-05, "step": 14 }, { "epoch": 0.036817882971729124, "high_lr": 0.0009926315789473685, "low_lr": 1.985263157894737e-05, "step": 14 }, { "epoch": 0.036817882971729124, "high_lr": 0.0009926315789473685, "low_lr": 1.985263157894737e-05, "step": 14 }, { "epoch": 0.036817882971729124, "high_lr": 0.0009926315789473685, "low_lr": 1.985263157894737e-05, "step": 14 }, { "epoch": 0.03944773175542406, "grad_norm": 0.4354722797870636, "learning_rate": 0.0009921052631578946, "loss": 2.2465, "step": 15 }, { "epoch": 0.03944773175542406, "high_lr": 0.0009921052631578946, "low_lr": 1.9842105263157895e-05, "step": 15 }, { "epoch": 0.03944773175542406, "high_lr": 0.0009921052631578946, "low_lr": 1.9842105263157895e-05, "step": 15 }, { "epoch": 0.03944773175542406, "high_lr": 0.0009921052631578946, "low_lr": 1.9842105263157895e-05, "step": 15 }, { "epoch": 0.03944773175542406, "high_lr": 0.0009921052631578946, "low_lr": 1.9842105263157895e-05, "step": 15 }, { "epoch": 0.03944773175542406, "high_lr": 0.0009921052631578946, "low_lr": 1.9842105263157895e-05, "step": 15 }, { "epoch": 0.03944773175542406, "high_lr": 0.0009921052631578946, "low_lr": 1.9842105263157895e-05, "step": 15 }, { "epoch": 0.03944773175542406, "high_lr": 0.0009921052631578946, "low_lr": 1.9842105263157895e-05, "step": 15 }, { "epoch": 0.03944773175542406, "high_lr": 0.0009921052631578946, "low_lr": 1.9842105263157895e-05, "step": 15 }, { "epoch": 0.042077580539119, "grad_norm": 0.4762808084487915, "learning_rate": 0.000991578947368421, "loss": 2.2044, "step": 16 }, { "epoch": 0.042077580539119, "high_lr": 0.000991578947368421, "low_lr": 1.9831578947368423e-05, "step": 16 }, { "epoch": 0.042077580539119, "high_lr": 0.000991578947368421, "low_lr": 1.9831578947368423e-05, "step": 16 }, { "epoch": 0.042077580539119, "high_lr": 0.000991578947368421, "low_lr": 1.9831578947368423e-05, "step": 16 }, { "epoch": 0.042077580539119, "high_lr": 0.000991578947368421, "low_lr": 1.9831578947368423e-05, "step": 16 }, { "epoch": 0.042077580539119, "high_lr": 0.000991578947368421, "low_lr": 1.9831578947368423e-05, "step": 16 }, { "epoch": 0.042077580539119, "high_lr": 0.000991578947368421, "low_lr": 1.9831578947368423e-05, "step": 16 }, { "epoch": 0.042077580539119, "high_lr": 0.000991578947368421, "low_lr": 1.9831578947368423e-05, "step": 16 }, { "epoch": 0.042077580539119, "high_lr": 0.000991578947368421, "low_lr": 1.9831578947368423e-05, "step": 16 }, { "epoch": 0.044707429322813935, "grad_norm": 0.5493490099906921, "learning_rate": 0.0009910526315789474, "loss": 2.1654, "step": 17 }, { "epoch": 0.044707429322813935, "high_lr": 0.0009910526315789474, "low_lr": 1.982105263157895e-05, "step": 17 }, { "epoch": 0.044707429322813935, "high_lr": 0.0009910526315789474, "low_lr": 1.982105263157895e-05, "step": 17 }, { "epoch": 0.044707429322813935, "high_lr": 0.0009910526315789474, "low_lr": 1.982105263157895e-05, "step": 17 }, { "epoch": 0.044707429322813935, "high_lr": 0.0009910526315789474, "low_lr": 1.982105263157895e-05, "step": 17 }, { "epoch": 0.044707429322813935, "high_lr": 0.0009910526315789474, "low_lr": 1.982105263157895e-05, "step": 17 }, { "epoch": 0.044707429322813935, "high_lr": 0.0009910526315789474, "low_lr": 1.982105263157895e-05, "step": 17 }, { "epoch": 0.044707429322813935, "high_lr": 0.0009910526315789474, "low_lr": 1.982105263157895e-05, "step": 17 }, { "epoch": 0.044707429322813935, "high_lr": 0.0009910526315789474, "low_lr": 1.982105263157895e-05, "step": 17 }, { "epoch": 0.047337278106508875, "grad_norm": 0.43686118721961975, "learning_rate": 0.0009905263157894738, "loss": 2.1749, "step": 18 }, { "epoch": 0.047337278106508875, "high_lr": 0.0009905263157894738, "low_lr": 1.9810526315789476e-05, "step": 18 }, { "epoch": 0.047337278106508875, "high_lr": 0.0009905263157894738, "low_lr": 1.9810526315789476e-05, "step": 18 }, { "epoch": 0.047337278106508875, "high_lr": 0.0009905263157894738, "low_lr": 1.9810526315789476e-05, "step": 18 }, { "epoch": 0.047337278106508875, "high_lr": 0.0009905263157894738, "low_lr": 1.9810526315789476e-05, "step": 18 }, { "epoch": 0.047337278106508875, "high_lr": 0.0009905263157894738, "low_lr": 1.9810526315789476e-05, "step": 18 }, { "epoch": 0.047337278106508875, "high_lr": 0.0009905263157894738, "low_lr": 1.9810526315789476e-05, "step": 18 }, { "epoch": 0.047337278106508875, "high_lr": 0.0009905263157894738, "low_lr": 1.9810526315789476e-05, "step": 18 }, { "epoch": 0.047337278106508875, "high_lr": 0.0009905263157894738, "low_lr": 1.9810526315789476e-05, "step": 18 }, { "epoch": 0.049967126890203814, "grad_norm": 0.5231380462646484, "learning_rate": 0.00099, "loss": 2.1947, "step": 19 }, { "epoch": 0.049967126890203814, "high_lr": 0.00099, "low_lr": 1.98e-05, "step": 19 }, { "epoch": 0.049967126890203814, "high_lr": 0.00099, "low_lr": 1.98e-05, "step": 19 }, { "epoch": 0.049967126890203814, "high_lr": 0.00099, "low_lr": 1.98e-05, "step": 19 }, { "epoch": 0.049967126890203814, "high_lr": 0.00099, "low_lr": 1.98e-05, "step": 19 }, { "epoch": 0.049967126890203814, "high_lr": 0.00099, "low_lr": 1.98e-05, "step": 19 }, { "epoch": 0.049967126890203814, "high_lr": 0.00099, "low_lr": 1.98e-05, "step": 19 }, { "epoch": 0.049967126890203814, "high_lr": 0.00099, "low_lr": 1.98e-05, "step": 19 }, { "epoch": 0.049967126890203814, "high_lr": 0.00099, "low_lr": 1.98e-05, "step": 19 }, { "epoch": 0.05259697567389875, "grad_norm": 0.5058587789535522, "learning_rate": 0.0009894736842105264, "loss": 2.1794, "step": 20 }, { "epoch": 0.05259697567389875, "high_lr": 0.0009894736842105264, "low_lr": 1.9789473684210528e-05, "step": 20 }, { "epoch": 0.05259697567389875, "high_lr": 0.0009894736842105264, "low_lr": 1.9789473684210528e-05, "step": 20 }, { "epoch": 0.05259697567389875, "high_lr": 0.0009894736842105264, "low_lr": 1.9789473684210528e-05, "step": 20 }, { "epoch": 0.05259697567389875, "high_lr": 0.0009894736842105264, "low_lr": 1.9789473684210528e-05, "step": 20 }, { "epoch": 0.05259697567389875, "high_lr": 0.0009894736842105264, "low_lr": 1.9789473684210528e-05, "step": 20 }, { "epoch": 0.05259697567389875, "high_lr": 0.0009894736842105264, "low_lr": 1.9789473684210528e-05, "step": 20 }, { "epoch": 0.05259697567389875, "high_lr": 0.0009894736842105264, "low_lr": 1.9789473684210528e-05, "step": 20 }, { "epoch": 0.05259697567389875, "high_lr": 0.0009894736842105264, "low_lr": 1.9789473684210528e-05, "step": 20 }, { "epoch": 0.055226824457593686, "grad_norm": 0.41516372561454773, "learning_rate": 0.0009889473684210528, "loss": 2.1126, "step": 21 }, { "epoch": 0.055226824457593686, "high_lr": 0.0009889473684210528, "low_lr": 1.9778947368421056e-05, "step": 21 }, { "epoch": 0.055226824457593686, "high_lr": 0.0009889473684210528, "low_lr": 1.9778947368421056e-05, "step": 21 }, { "epoch": 0.055226824457593686, "high_lr": 0.0009889473684210528, "low_lr": 1.9778947368421056e-05, "step": 21 }, { "epoch": 0.055226824457593686, "high_lr": 0.0009889473684210528, "low_lr": 1.9778947368421056e-05, "step": 21 }, { "epoch": 0.055226824457593686, "high_lr": 0.0009889473684210528, "low_lr": 1.9778947368421056e-05, "step": 21 }, { "epoch": 0.055226824457593686, "high_lr": 0.0009889473684210528, "low_lr": 1.9778947368421056e-05, "step": 21 }, { "epoch": 0.055226824457593686, "high_lr": 0.0009889473684210528, "low_lr": 1.9778947368421056e-05, "step": 21 }, { "epoch": 0.055226824457593686, "high_lr": 0.0009889473684210528, "low_lr": 1.9778947368421056e-05, "step": 21 }, { "epoch": 0.057856673241288625, "grad_norm": 0.4417484402656555, "learning_rate": 0.000988421052631579, "loss": 2.1126, "step": 22 }, { "epoch": 0.057856673241288625, "high_lr": 0.000988421052631579, "low_lr": 1.976842105263158e-05, "step": 22 }, { "epoch": 0.057856673241288625, "high_lr": 0.000988421052631579, "low_lr": 1.976842105263158e-05, "step": 22 }, { "epoch": 0.057856673241288625, "high_lr": 0.000988421052631579, "low_lr": 1.976842105263158e-05, "step": 22 }, { "epoch": 0.057856673241288625, "high_lr": 0.000988421052631579, "low_lr": 1.976842105263158e-05, "step": 22 }, { "epoch": 0.057856673241288625, "high_lr": 0.000988421052631579, "low_lr": 1.976842105263158e-05, "step": 22 }, { "epoch": 0.057856673241288625, "high_lr": 0.000988421052631579, "low_lr": 1.976842105263158e-05, "step": 22 }, { "epoch": 0.057856673241288625, "high_lr": 0.000988421052631579, "low_lr": 1.976842105263158e-05, "step": 22 }, { "epoch": 0.057856673241288625, "high_lr": 0.000988421052631579, "low_lr": 1.976842105263158e-05, "step": 22 }, { "epoch": 0.060486522024983565, "grad_norm": 0.4763607978820801, "learning_rate": 0.0009878947368421054, "loss": 2.1861, "step": 23 }, { "epoch": 0.060486522024983565, "high_lr": 0.0009878947368421054, "low_lr": 1.9757894736842105e-05, "step": 23 }, { "epoch": 0.060486522024983565, "high_lr": 0.0009878947368421054, "low_lr": 1.9757894736842105e-05, "step": 23 }, { "epoch": 0.060486522024983565, "high_lr": 0.0009878947368421054, "low_lr": 1.9757894736842105e-05, "step": 23 }, { "epoch": 0.060486522024983565, "high_lr": 0.0009878947368421054, "low_lr": 1.9757894736842105e-05, "step": 23 }, { "epoch": 0.060486522024983565, "high_lr": 0.0009878947368421054, "low_lr": 1.9757894736842105e-05, "step": 23 }, { "epoch": 0.060486522024983565, "high_lr": 0.0009878947368421054, "low_lr": 1.9757894736842105e-05, "step": 23 }, { "epoch": 0.060486522024983565, "high_lr": 0.0009878947368421054, "low_lr": 1.9757894736842105e-05, "step": 23 }, { "epoch": 0.060486522024983565, "high_lr": 0.0009878947368421054, "low_lr": 1.9757894736842105e-05, "step": 23 }, { "epoch": 0.0631163708086785, "grad_norm": 0.42307791113853455, "learning_rate": 0.0009873684210526315, "loss": 2.0869, "step": 24 }, { "epoch": 0.0631163708086785, "high_lr": 0.0009873684210526315, "low_lr": 1.9747368421052633e-05, "step": 24 }, { "epoch": 0.0631163708086785, "high_lr": 0.0009873684210526315, "low_lr": 1.9747368421052633e-05, "step": 24 }, { "epoch": 0.0631163708086785, "high_lr": 0.0009873684210526315, "low_lr": 1.9747368421052633e-05, "step": 24 }, { "epoch": 0.0631163708086785, "high_lr": 0.0009873684210526315, "low_lr": 1.9747368421052633e-05, "step": 24 }, { "epoch": 0.0631163708086785, "high_lr": 0.0009873684210526315, "low_lr": 1.9747368421052633e-05, "step": 24 }, { "epoch": 0.0631163708086785, "high_lr": 0.0009873684210526315, "low_lr": 1.9747368421052633e-05, "step": 24 }, { "epoch": 0.0631163708086785, "high_lr": 0.0009873684210526315, "low_lr": 1.9747368421052633e-05, "step": 24 }, { "epoch": 0.0631163708086785, "high_lr": 0.0009873684210526315, "low_lr": 1.9747368421052633e-05, "step": 24 }, { "epoch": 0.06574621959237344, "grad_norm": 0.439748615026474, "learning_rate": 0.000986842105263158, "loss": 2.0882, "step": 25 }, { "epoch": 0.06574621959237344, "high_lr": 0.000986842105263158, "low_lr": 1.9736842105263158e-05, "step": 25 }, { "epoch": 0.06574621959237344, "high_lr": 0.000986842105263158, "low_lr": 1.9736842105263158e-05, "step": 25 }, { "epoch": 0.06574621959237344, "high_lr": 0.000986842105263158, "low_lr": 1.9736842105263158e-05, "step": 25 }, { "epoch": 0.06574621959237344, "high_lr": 0.000986842105263158, "low_lr": 1.9736842105263158e-05, "step": 25 }, { "epoch": 0.06574621959237344, "high_lr": 0.000986842105263158, "low_lr": 1.9736842105263158e-05, "step": 25 }, { "epoch": 0.06574621959237344, "high_lr": 0.000986842105263158, "low_lr": 1.9736842105263158e-05, "step": 25 }, { "epoch": 0.06574621959237344, "high_lr": 0.000986842105263158, "low_lr": 1.9736842105263158e-05, "step": 25 }, { "epoch": 0.06574621959237344, "high_lr": 0.000986842105263158, "low_lr": 1.9736842105263158e-05, "step": 25 }, { "epoch": 0.06837606837606838, "grad_norm": 0.39283064007759094, "learning_rate": 0.0009863157894736843, "loss": 2.0292, "step": 26 }, { "epoch": 0.06837606837606838, "high_lr": 0.0009863157894736843, "low_lr": 1.9726315789473686e-05, "step": 26 }, { "epoch": 0.06837606837606838, "high_lr": 0.0009863157894736843, "low_lr": 1.9726315789473686e-05, "step": 26 }, { "epoch": 0.06837606837606838, "high_lr": 0.0009863157894736843, "low_lr": 1.9726315789473686e-05, "step": 26 }, { "epoch": 0.06837606837606838, "high_lr": 0.0009863157894736843, "low_lr": 1.9726315789473686e-05, "step": 26 }, { "epoch": 0.06837606837606838, "high_lr": 0.0009863157894736843, "low_lr": 1.9726315789473686e-05, "step": 26 }, { "epoch": 0.06837606837606838, "high_lr": 0.0009863157894736843, "low_lr": 1.9726315789473686e-05, "step": 26 }, { "epoch": 0.06837606837606838, "high_lr": 0.0009863157894736843, "low_lr": 1.9726315789473686e-05, "step": 26 }, { "epoch": 0.06837606837606838, "high_lr": 0.0009863157894736843, "low_lr": 1.9726315789473686e-05, "step": 26 }, { "epoch": 0.07100591715976332, "grad_norm": 0.4499070346355438, "learning_rate": 0.0009857894736842105, "loss": 2.1265, "step": 27 }, { "epoch": 0.07100591715976332, "high_lr": 0.0009857894736842105, "low_lr": 1.9715789473684214e-05, "step": 27 }, { "epoch": 0.07100591715976332, "high_lr": 0.0009857894736842105, "low_lr": 1.9715789473684214e-05, "step": 27 }, { "epoch": 0.07100591715976332, "high_lr": 0.0009857894736842105, "low_lr": 1.9715789473684214e-05, "step": 27 }, { "epoch": 0.07100591715976332, "high_lr": 0.0009857894736842105, "low_lr": 1.9715789473684214e-05, "step": 27 }, { "epoch": 0.07100591715976332, "high_lr": 0.0009857894736842105, "low_lr": 1.9715789473684214e-05, "step": 27 }, { "epoch": 0.07100591715976332, "high_lr": 0.0009857894736842105, "low_lr": 1.9715789473684214e-05, "step": 27 }, { "epoch": 0.07100591715976332, "high_lr": 0.0009857894736842105, "low_lr": 1.9715789473684214e-05, "step": 27 }, { "epoch": 0.07100591715976332, "high_lr": 0.0009857894736842105, "low_lr": 1.9715789473684214e-05, "step": 27 }, { "epoch": 0.07363576594345825, "grad_norm": 0.41874054074287415, "learning_rate": 0.000985263157894737, "loss": 1.9887, "step": 28 }, { "epoch": 0.07363576594345825, "high_lr": 0.000985263157894737, "low_lr": 1.970526315789474e-05, "step": 28 }, { "epoch": 0.07363576594345825, "high_lr": 0.000985263157894737, "low_lr": 1.970526315789474e-05, "step": 28 }, { "epoch": 0.07363576594345825, "high_lr": 0.000985263157894737, "low_lr": 1.970526315789474e-05, "step": 28 }, { "epoch": 0.07363576594345825, "high_lr": 0.000985263157894737, "low_lr": 1.970526315789474e-05, "step": 28 }, { "epoch": 0.07363576594345825, "high_lr": 0.000985263157894737, "low_lr": 1.970526315789474e-05, "step": 28 }, { "epoch": 0.07363576594345825, "high_lr": 0.000985263157894737, "low_lr": 1.970526315789474e-05, "step": 28 }, { "epoch": 0.07363576594345825, "high_lr": 0.000985263157894737, "low_lr": 1.970526315789474e-05, "step": 28 }, { "epoch": 0.07363576594345825, "high_lr": 0.000985263157894737, "low_lr": 1.970526315789474e-05, "step": 28 }, { "epoch": 0.0762656147271532, "grad_norm": 0.4721282124519348, "learning_rate": 0.000984736842105263, "loss": 2.1088, "step": 29 }, { "epoch": 0.0762656147271532, "high_lr": 0.000984736842105263, "low_lr": 1.9694736842105263e-05, "step": 29 }, { "epoch": 0.0762656147271532, "high_lr": 0.000984736842105263, "low_lr": 1.9694736842105263e-05, "step": 29 }, { "epoch": 0.0762656147271532, "high_lr": 0.000984736842105263, "low_lr": 1.9694736842105263e-05, "step": 29 }, { "epoch": 0.0762656147271532, "high_lr": 0.000984736842105263, "low_lr": 1.9694736842105263e-05, "step": 29 }, { "epoch": 0.0762656147271532, "high_lr": 0.000984736842105263, "low_lr": 1.9694736842105263e-05, "step": 29 }, { "epoch": 0.0762656147271532, "high_lr": 0.000984736842105263, "low_lr": 1.9694736842105263e-05, "step": 29 }, { "epoch": 0.0762656147271532, "high_lr": 0.000984736842105263, "low_lr": 1.9694736842105263e-05, "step": 29 }, { "epoch": 0.0762656147271532, "high_lr": 0.000984736842105263, "low_lr": 1.9694736842105263e-05, "step": 29 }, { "epoch": 0.07889546351084813, "grad_norm": 0.4673194885253906, "learning_rate": 0.0009842105263157895, "loss": 2.1235, "step": 30 }, { "epoch": 0.07889546351084813, "high_lr": 0.0009842105263157895, "low_lr": 1.968421052631579e-05, "step": 30 }, { "epoch": 0.07889546351084813, "high_lr": 0.0009842105263157895, "low_lr": 1.968421052631579e-05, "step": 30 }, { "epoch": 0.07889546351084813, "high_lr": 0.0009842105263157895, "low_lr": 1.968421052631579e-05, "step": 30 }, { "epoch": 0.07889546351084813, "high_lr": 0.0009842105263157895, "low_lr": 1.968421052631579e-05, "step": 30 }, { "epoch": 0.07889546351084813, "high_lr": 0.0009842105263157895, "low_lr": 1.968421052631579e-05, "step": 30 }, { "epoch": 0.07889546351084813, "high_lr": 0.0009842105263157895, "low_lr": 1.968421052631579e-05, "step": 30 }, { "epoch": 0.07889546351084813, "high_lr": 0.0009842105263157895, "low_lr": 1.968421052631579e-05, "step": 30 }, { "epoch": 0.07889546351084813, "high_lr": 0.0009842105263157895, "low_lr": 1.968421052631579e-05, "step": 30 }, { "epoch": 0.08152531229454306, "grad_norm": 0.43837472796440125, "learning_rate": 0.0009836842105263159, "loss": 2.1186, "step": 31 }, { "epoch": 0.08152531229454306, "high_lr": 0.0009836842105263159, "low_lr": 1.967368421052632e-05, "step": 31 }, { "epoch": 0.08152531229454306, "high_lr": 0.0009836842105263159, "low_lr": 1.967368421052632e-05, "step": 31 }, { "epoch": 0.08152531229454306, "high_lr": 0.0009836842105263159, "low_lr": 1.967368421052632e-05, "step": 31 }, { "epoch": 0.08152531229454306, "high_lr": 0.0009836842105263159, "low_lr": 1.967368421052632e-05, "step": 31 }, { "epoch": 0.08152531229454306, "high_lr": 0.0009836842105263159, "low_lr": 1.967368421052632e-05, "step": 31 }, { "epoch": 0.08152531229454306, "high_lr": 0.0009836842105263159, "low_lr": 1.967368421052632e-05, "step": 31 }, { "epoch": 0.08152531229454306, "high_lr": 0.0009836842105263159, "low_lr": 1.967368421052632e-05, "step": 31 }, { "epoch": 0.08152531229454306, "high_lr": 0.0009836842105263159, "low_lr": 1.967368421052632e-05, "step": 31 }, { "epoch": 0.084155161078238, "grad_norm": 0.42769670486450195, "learning_rate": 0.000983157894736842, "loss": 2.0903, "step": 32 }, { "epoch": 0.084155161078238, "high_lr": 0.000983157894736842, "low_lr": 1.9663157894736844e-05, "step": 32 }, { "epoch": 0.084155161078238, "high_lr": 0.000983157894736842, "low_lr": 1.9663157894736844e-05, "step": 32 }, { "epoch": 0.084155161078238, "high_lr": 0.000983157894736842, "low_lr": 1.9663157894736844e-05, "step": 32 }, { "epoch": 0.084155161078238, "high_lr": 0.000983157894736842, "low_lr": 1.9663157894736844e-05, "step": 32 }, { "epoch": 0.084155161078238, "high_lr": 0.000983157894736842, "low_lr": 1.9663157894736844e-05, "step": 32 }, { "epoch": 0.084155161078238, "high_lr": 0.000983157894736842, "low_lr": 1.9663157894736844e-05, "step": 32 }, { "epoch": 0.084155161078238, "high_lr": 0.000983157894736842, "low_lr": 1.9663157894736844e-05, "step": 32 }, { "epoch": 0.084155161078238, "high_lr": 0.000983157894736842, "low_lr": 1.9663157894736844e-05, "step": 32 }, { "epoch": 0.08678500986193294, "grad_norm": 0.4276221990585327, "learning_rate": 0.0009826315789473684, "loss": 2.0315, "step": 33 }, { "epoch": 0.08678500986193294, "high_lr": 0.0009826315789473684, "low_lr": 1.965263157894737e-05, "step": 33 }, { "epoch": 0.08678500986193294, "high_lr": 0.0009826315789473684, "low_lr": 1.965263157894737e-05, "step": 33 }, { "epoch": 0.08678500986193294, "high_lr": 0.0009826315789473684, "low_lr": 1.965263157894737e-05, "step": 33 }, { "epoch": 0.08678500986193294, "high_lr": 0.0009826315789473684, "low_lr": 1.965263157894737e-05, "step": 33 }, { "epoch": 0.08678500986193294, "high_lr": 0.0009826315789473684, "low_lr": 1.965263157894737e-05, "step": 33 }, { "epoch": 0.08678500986193294, "high_lr": 0.0009826315789473684, "low_lr": 1.965263157894737e-05, "step": 33 }, { "epoch": 0.08678500986193294, "high_lr": 0.0009826315789473684, "low_lr": 1.965263157894737e-05, "step": 33 }, { "epoch": 0.08678500986193294, "high_lr": 0.0009826315789473684, "low_lr": 1.965263157894737e-05, "step": 33 }, { "epoch": 0.08941485864562787, "grad_norm": 0.46253710985183716, "learning_rate": 0.0009821052631578948, "loss": 2.0703, "step": 34 }, { "epoch": 0.08941485864562787, "high_lr": 0.0009821052631578948, "low_lr": 1.9642105263157897e-05, "step": 34 }, { "epoch": 0.08941485864562787, "high_lr": 0.0009821052631578948, "low_lr": 1.9642105263157897e-05, "step": 34 }, { "epoch": 0.08941485864562787, "high_lr": 0.0009821052631578948, "low_lr": 1.9642105263157897e-05, "step": 34 }, { "epoch": 0.08941485864562787, "high_lr": 0.0009821052631578948, "low_lr": 1.9642105263157897e-05, "step": 34 }, { "epoch": 0.08941485864562787, "high_lr": 0.0009821052631578948, "low_lr": 1.9642105263157897e-05, "step": 34 }, { "epoch": 0.08941485864562787, "high_lr": 0.0009821052631578948, "low_lr": 1.9642105263157897e-05, "step": 34 }, { "epoch": 0.08941485864562787, "high_lr": 0.0009821052631578948, "low_lr": 1.9642105263157897e-05, "step": 34 }, { "epoch": 0.08941485864562787, "high_lr": 0.0009821052631578948, "low_lr": 1.9642105263157897e-05, "step": 34 }, { "epoch": 0.09204470742932282, "grad_norm": 0.4598131477832794, "learning_rate": 0.0009815789473684212, "loss": 2.1117, "step": 35 }, { "epoch": 0.09204470742932282, "high_lr": 0.0009815789473684212, "low_lr": 1.9631578947368425e-05, "step": 35 }, { "epoch": 0.09204470742932282, "high_lr": 0.0009815789473684212, "low_lr": 1.9631578947368425e-05, "step": 35 }, { "epoch": 0.09204470742932282, "high_lr": 0.0009815789473684212, "low_lr": 1.9631578947368425e-05, "step": 35 }, { "epoch": 0.09204470742932282, "high_lr": 0.0009815789473684212, "low_lr": 1.9631578947368425e-05, "step": 35 }, { "epoch": 0.09204470742932282, "high_lr": 0.0009815789473684212, "low_lr": 1.9631578947368425e-05, "step": 35 }, { "epoch": 0.09204470742932282, "high_lr": 0.0009815789473684212, "low_lr": 1.9631578947368425e-05, "step": 35 }, { "epoch": 0.09204470742932282, "high_lr": 0.0009815789473684212, "low_lr": 1.9631578947368425e-05, "step": 35 }, { "epoch": 0.09204470742932282, "high_lr": 0.0009815789473684212, "low_lr": 1.9631578947368425e-05, "step": 35 }, { "epoch": 0.09467455621301775, "grad_norm": 0.44061988592147827, "learning_rate": 0.0009810526315789474, "loss": 2.0431, "step": 36 }, { "epoch": 0.09467455621301775, "high_lr": 0.0009810526315789474, "low_lr": 1.962105263157895e-05, "step": 36 }, { "epoch": 0.09467455621301775, "high_lr": 0.0009810526315789474, "low_lr": 1.962105263157895e-05, "step": 36 }, { "epoch": 0.09467455621301775, "high_lr": 0.0009810526315789474, "low_lr": 1.962105263157895e-05, "step": 36 }, { "epoch": 0.09467455621301775, "high_lr": 0.0009810526315789474, "low_lr": 1.962105263157895e-05, "step": 36 }, { "epoch": 0.09467455621301775, "high_lr": 0.0009810526315789474, "low_lr": 1.962105263157895e-05, "step": 36 }, { "epoch": 0.09467455621301775, "high_lr": 0.0009810526315789474, "low_lr": 1.962105263157895e-05, "step": 36 }, { "epoch": 0.09467455621301775, "high_lr": 0.0009810526315789474, "low_lr": 1.962105263157895e-05, "step": 36 }, { "epoch": 0.09467455621301775, "high_lr": 0.0009810526315789474, "low_lr": 1.962105263157895e-05, "step": 36 }, { "epoch": 0.0973044049967127, "grad_norm": 0.46434134244918823, "learning_rate": 0.0009805263157894738, "loss": 2.0377, "step": 37 }, { "epoch": 0.0973044049967127, "high_lr": 0.0009805263157894738, "low_lr": 1.9610526315789474e-05, "step": 37 }, { "epoch": 0.0973044049967127, "high_lr": 0.0009805263157894738, "low_lr": 1.9610526315789474e-05, "step": 37 }, { "epoch": 0.0973044049967127, "high_lr": 0.0009805263157894738, "low_lr": 1.9610526315789474e-05, "step": 37 }, { "epoch": 0.0973044049967127, "high_lr": 0.0009805263157894738, "low_lr": 1.9610526315789474e-05, "step": 37 }, { "epoch": 0.0973044049967127, "high_lr": 0.0009805263157894738, "low_lr": 1.9610526315789474e-05, "step": 37 }, { "epoch": 0.0973044049967127, "high_lr": 0.0009805263157894738, "low_lr": 1.9610526315789474e-05, "step": 37 }, { "epoch": 0.0973044049967127, "high_lr": 0.0009805263157894738, "low_lr": 1.9610526315789474e-05, "step": 37 }, { "epoch": 0.0973044049967127, "high_lr": 0.0009805263157894738, "low_lr": 1.9610526315789474e-05, "step": 37 }, { "epoch": 0.09993425378040763, "grad_norm": 0.48765864968299866, "learning_rate": 0.00098, "loss": 2.1, "step": 38 }, { "epoch": 0.09993425378040763, "high_lr": 0.00098, "low_lr": 1.9600000000000002e-05, "step": 38 }, { "epoch": 0.09993425378040763, "high_lr": 0.00098, "low_lr": 1.9600000000000002e-05, "step": 38 }, { "epoch": 0.09993425378040763, "high_lr": 0.00098, "low_lr": 1.9600000000000002e-05, "step": 38 }, { "epoch": 0.09993425378040763, "high_lr": 0.00098, "low_lr": 1.9600000000000002e-05, "step": 38 }, { "epoch": 0.09993425378040763, "high_lr": 0.00098, "low_lr": 1.9600000000000002e-05, "step": 38 }, { "epoch": 0.09993425378040763, "high_lr": 0.00098, "low_lr": 1.9600000000000002e-05, "step": 38 }, { "epoch": 0.09993425378040763, "high_lr": 0.00098, "low_lr": 1.9600000000000002e-05, "step": 38 }, { "epoch": 0.09993425378040763, "high_lr": 0.00098, "low_lr": 1.9600000000000002e-05, "step": 38 }, { "epoch": 0.10256410256410256, "grad_norm": 0.4472310543060303, "learning_rate": 0.0009794736842105264, "loss": 2.0205, "step": 39 }, { "epoch": 0.10256410256410256, "high_lr": 0.0009794736842105264, "low_lr": 1.9589473684210527e-05, "step": 39 }, { "epoch": 0.10256410256410256, "high_lr": 0.0009794736842105264, "low_lr": 1.9589473684210527e-05, "step": 39 }, { "epoch": 0.10256410256410256, "high_lr": 0.0009794736842105264, "low_lr": 1.9589473684210527e-05, "step": 39 }, { "epoch": 0.10256410256410256, "high_lr": 0.0009794736842105264, "low_lr": 1.9589473684210527e-05, "step": 39 }, { "epoch": 0.10256410256410256, "high_lr": 0.0009794736842105264, "low_lr": 1.9589473684210527e-05, "step": 39 }, { "epoch": 0.10256410256410256, "high_lr": 0.0009794736842105264, "low_lr": 1.9589473684210527e-05, "step": 39 }, { "epoch": 0.10256410256410256, "high_lr": 0.0009794736842105264, "low_lr": 1.9589473684210527e-05, "step": 39 }, { "epoch": 0.10256410256410256, "high_lr": 0.0009794736842105264, "low_lr": 1.9589473684210527e-05, "step": 39 }, { "epoch": 0.1051939513477975, "grad_norm": 0.5191563367843628, "learning_rate": 0.0009789473684210528, "loss": 2.1507, "step": 40 }, { "epoch": 0.1051939513477975, "high_lr": 0.0009789473684210528, "low_lr": 1.9578947368421055e-05, "step": 40 }, { "epoch": 0.1051939513477975, "high_lr": 0.0009789473684210528, "low_lr": 1.9578947368421055e-05, "step": 40 }, { "epoch": 0.1051939513477975, "high_lr": 0.0009789473684210528, "low_lr": 1.9578947368421055e-05, "step": 40 }, { "epoch": 0.1051939513477975, "high_lr": 0.0009789473684210528, "low_lr": 1.9578947368421055e-05, "step": 40 }, { "epoch": 0.1051939513477975, "high_lr": 0.0009789473684210528, "low_lr": 1.9578947368421055e-05, "step": 40 }, { "epoch": 0.1051939513477975, "high_lr": 0.0009789473684210528, "low_lr": 1.9578947368421055e-05, "step": 40 }, { "epoch": 0.1051939513477975, "high_lr": 0.0009789473684210528, "low_lr": 1.9578947368421055e-05, "step": 40 }, { "epoch": 0.1051939513477975, "high_lr": 0.0009789473684210528, "low_lr": 1.9578947368421055e-05, "step": 40 }, { "epoch": 0.10782380013149244, "grad_norm": 0.4820740222930908, "learning_rate": 0.000978421052631579, "loss": 1.9987, "step": 41 }, { "epoch": 0.10782380013149244, "high_lr": 0.000978421052631579, "low_lr": 1.956842105263158e-05, "step": 41 }, { "epoch": 0.10782380013149244, "high_lr": 0.000978421052631579, "low_lr": 1.956842105263158e-05, "step": 41 }, { "epoch": 0.10782380013149244, "high_lr": 0.000978421052631579, "low_lr": 1.956842105263158e-05, "step": 41 }, { "epoch": 0.10782380013149244, "high_lr": 0.000978421052631579, "low_lr": 1.956842105263158e-05, "step": 41 }, { "epoch": 0.10782380013149244, "high_lr": 0.000978421052631579, "low_lr": 1.956842105263158e-05, "step": 41 }, { "epoch": 0.10782380013149244, "high_lr": 0.000978421052631579, "low_lr": 1.956842105263158e-05, "step": 41 }, { "epoch": 0.10782380013149244, "high_lr": 0.000978421052631579, "low_lr": 1.956842105263158e-05, "step": 41 }, { "epoch": 0.10782380013149244, "high_lr": 0.000978421052631579, "low_lr": 1.956842105263158e-05, "step": 41 }, { "epoch": 0.11045364891518737, "grad_norm": 0.49189651012420654, "learning_rate": 0.0009778947368421053, "loss": 2.1101, "step": 42 }, { "epoch": 0.11045364891518737, "high_lr": 0.0009778947368421053, "low_lr": 1.9557894736842107e-05, "step": 42 }, { "epoch": 0.11045364891518737, "high_lr": 0.0009778947368421053, "low_lr": 1.9557894736842107e-05, "step": 42 }, { "epoch": 0.11045364891518737, "high_lr": 0.0009778947368421053, "low_lr": 1.9557894736842107e-05, "step": 42 }, { "epoch": 0.11045364891518737, "high_lr": 0.0009778947368421053, "low_lr": 1.9557894736842107e-05, "step": 42 }, { "epoch": 0.11045364891518737, "high_lr": 0.0009778947368421053, "low_lr": 1.9557894736842107e-05, "step": 42 }, { "epoch": 0.11045364891518737, "high_lr": 0.0009778947368421053, "low_lr": 1.9557894736842107e-05, "step": 42 }, { "epoch": 0.11045364891518737, "high_lr": 0.0009778947368421053, "low_lr": 1.9557894736842107e-05, "step": 42 }, { "epoch": 0.11045364891518737, "high_lr": 0.0009778947368421053, "low_lr": 1.9557894736842107e-05, "step": 42 }, { "epoch": 0.11308349769888232, "grad_norm": 0.48595666885375977, "learning_rate": 0.0009773684210526315, "loss": 2.0509, "step": 43 }, { "epoch": 0.11308349769888232, "high_lr": 0.0009773684210526315, "low_lr": 1.9547368421052632e-05, "step": 43 }, { "epoch": 0.11308349769888232, "high_lr": 0.0009773684210526315, "low_lr": 1.9547368421052632e-05, "step": 43 }, { "epoch": 0.11308349769888232, "high_lr": 0.0009773684210526315, "low_lr": 1.9547368421052632e-05, "step": 43 }, { "epoch": 0.11308349769888232, "high_lr": 0.0009773684210526315, "low_lr": 1.9547368421052632e-05, "step": 43 }, { "epoch": 0.11308349769888232, "high_lr": 0.0009773684210526315, "low_lr": 1.9547368421052632e-05, "step": 43 }, { "epoch": 0.11308349769888232, "high_lr": 0.0009773684210526315, "low_lr": 1.9547368421052632e-05, "step": 43 }, { "epoch": 0.11308349769888232, "high_lr": 0.0009773684210526315, "low_lr": 1.9547368421052632e-05, "step": 43 }, { "epoch": 0.11308349769888232, "high_lr": 0.0009773684210526315, "low_lr": 1.9547368421052632e-05, "step": 43 }, { "epoch": 0.11571334648257725, "grad_norm": 0.45890429615974426, "learning_rate": 0.000976842105263158, "loss": 1.9858, "step": 44 }, { "epoch": 0.11571334648257725, "high_lr": 0.000976842105263158, "low_lr": 1.953684210526316e-05, "step": 44 }, { "epoch": 0.11571334648257725, "high_lr": 0.000976842105263158, "low_lr": 1.953684210526316e-05, "step": 44 }, { "epoch": 0.11571334648257725, "high_lr": 0.000976842105263158, "low_lr": 1.953684210526316e-05, "step": 44 }, { "epoch": 0.11571334648257725, "high_lr": 0.000976842105263158, "low_lr": 1.953684210526316e-05, "step": 44 }, { "epoch": 0.11571334648257725, "high_lr": 0.000976842105263158, "low_lr": 1.953684210526316e-05, "step": 44 }, { "epoch": 0.11571334648257725, "high_lr": 0.000976842105263158, "low_lr": 1.953684210526316e-05, "step": 44 }, { "epoch": 0.11571334648257725, "high_lr": 0.000976842105263158, "low_lr": 1.953684210526316e-05, "step": 44 }, { "epoch": 0.11571334648257725, "high_lr": 0.000976842105263158, "low_lr": 1.953684210526316e-05, "step": 44 }, { "epoch": 0.11834319526627218, "grad_norm": 0.500018298625946, "learning_rate": 0.0009763157894736843, "loss": 2.0669, "step": 45 }, { "epoch": 0.11834319526627218, "high_lr": 0.0009763157894736843, "low_lr": 1.9526315789473688e-05, "step": 45 }, { "epoch": 0.11834319526627218, "high_lr": 0.0009763157894736843, "low_lr": 1.9526315789473688e-05, "step": 45 }, { "epoch": 0.11834319526627218, "high_lr": 0.0009763157894736843, "low_lr": 1.9526315789473688e-05, "step": 45 }, { "epoch": 0.11834319526627218, "high_lr": 0.0009763157894736843, "low_lr": 1.9526315789473688e-05, "step": 45 }, { "epoch": 0.11834319526627218, "high_lr": 0.0009763157894736843, "low_lr": 1.9526315789473688e-05, "step": 45 }, { "epoch": 0.11834319526627218, "high_lr": 0.0009763157894736843, "low_lr": 1.9526315789473688e-05, "step": 45 }, { "epoch": 0.11834319526627218, "high_lr": 0.0009763157894736843, "low_lr": 1.9526315789473688e-05, "step": 45 }, { "epoch": 0.11834319526627218, "high_lr": 0.0009763157894736843, "low_lr": 1.9526315789473688e-05, "step": 45 }, { "epoch": 0.12097304404996713, "grad_norm": 0.4983457922935486, "learning_rate": 0.0009757894736842106, "loss": 2.0629, "step": 46 }, { "epoch": 0.12097304404996713, "high_lr": 0.0009757894736842106, "low_lr": 1.9515789473684213e-05, "step": 46 }, { "epoch": 0.12097304404996713, "high_lr": 0.0009757894736842106, "low_lr": 1.9515789473684213e-05, "step": 46 }, { "epoch": 0.12097304404996713, "high_lr": 0.0009757894736842106, "low_lr": 1.9515789473684213e-05, "step": 46 }, { "epoch": 0.12097304404996713, "high_lr": 0.0009757894736842106, "low_lr": 1.9515789473684213e-05, "step": 46 }, { "epoch": 0.12097304404996713, "high_lr": 0.0009757894736842106, "low_lr": 1.9515789473684213e-05, "step": 46 }, { "epoch": 0.12097304404996713, "high_lr": 0.0009757894736842106, "low_lr": 1.9515789473684213e-05, "step": 46 }, { "epoch": 0.12097304404996713, "high_lr": 0.0009757894736842106, "low_lr": 1.9515789473684213e-05, "step": 46 }, { "epoch": 0.12097304404996713, "high_lr": 0.0009757894736842106, "low_lr": 1.9515789473684213e-05, "step": 46 }, { "epoch": 0.12360289283366206, "grad_norm": 0.5502267479896545, "learning_rate": 0.0009752631578947369, "loss": 2.156, "step": 47 }, { "epoch": 0.12360289283366206, "high_lr": 0.0009752631578947369, "low_lr": 1.9505263157894737e-05, "step": 47 }, { "epoch": 0.12360289283366206, "high_lr": 0.0009752631578947369, "low_lr": 1.9505263157894737e-05, "step": 47 }, { "epoch": 0.12360289283366206, "high_lr": 0.0009752631578947369, "low_lr": 1.9505263157894737e-05, "step": 47 }, { "epoch": 0.12360289283366206, "high_lr": 0.0009752631578947369, "low_lr": 1.9505263157894737e-05, "step": 47 }, { "epoch": 0.12360289283366206, "high_lr": 0.0009752631578947369, "low_lr": 1.9505263157894737e-05, "step": 47 }, { "epoch": 0.12360289283366206, "high_lr": 0.0009752631578947369, "low_lr": 1.9505263157894737e-05, "step": 47 }, { "epoch": 0.12360289283366206, "high_lr": 0.0009752631578947369, "low_lr": 1.9505263157894737e-05, "step": 47 }, { "epoch": 0.12360289283366206, "high_lr": 0.0009752631578947369, "low_lr": 1.9505263157894737e-05, "step": 47 }, { "epoch": 0.126232741617357, "grad_norm": 0.5233122110366821, "learning_rate": 0.0009747368421052632, "loss": 2.0685, "step": 48 }, { "epoch": 0.126232741617357, "high_lr": 0.0009747368421052632, "low_lr": 1.9494736842105265e-05, "step": 48 }, { "epoch": 0.126232741617357, "high_lr": 0.0009747368421052632, "low_lr": 1.9494736842105265e-05, "step": 48 }, { "epoch": 0.126232741617357, "high_lr": 0.0009747368421052632, "low_lr": 1.9494736842105265e-05, "step": 48 }, { "epoch": 0.126232741617357, "high_lr": 0.0009747368421052632, "low_lr": 1.9494736842105265e-05, "step": 48 }, { "epoch": 0.126232741617357, "high_lr": 0.0009747368421052632, "low_lr": 1.9494736842105265e-05, "step": 48 }, { "epoch": 0.126232741617357, "high_lr": 0.0009747368421052632, "low_lr": 1.9494736842105265e-05, "step": 48 }, { "epoch": 0.126232741617357, "high_lr": 0.0009747368421052632, "low_lr": 1.9494736842105265e-05, "step": 48 }, { "epoch": 0.126232741617357, "high_lr": 0.0009747368421052632, "low_lr": 1.9494736842105265e-05, "step": 48 }, { "epoch": 0.12886259040105194, "grad_norm": 0.5220436453819275, "learning_rate": 0.0009742105263157895, "loss": 2.013, "step": 49 }, { "epoch": 0.12886259040105194, "high_lr": 0.0009742105263157895, "low_lr": 1.9484210526315793e-05, "step": 49 }, { "epoch": 0.12886259040105194, "high_lr": 0.0009742105263157895, "low_lr": 1.9484210526315793e-05, "step": 49 }, { "epoch": 0.12886259040105194, "high_lr": 0.0009742105263157895, "low_lr": 1.9484210526315793e-05, "step": 49 }, { "epoch": 0.12886259040105194, "high_lr": 0.0009742105263157895, "low_lr": 1.9484210526315793e-05, "step": 49 }, { "epoch": 0.12886259040105194, "high_lr": 0.0009742105263157895, "low_lr": 1.9484210526315793e-05, "step": 49 }, { "epoch": 0.12886259040105194, "high_lr": 0.0009742105263157895, "low_lr": 1.9484210526315793e-05, "step": 49 }, { "epoch": 0.12886259040105194, "high_lr": 0.0009742105263157895, "low_lr": 1.9484210526315793e-05, "step": 49 }, { "epoch": 0.12886259040105194, "high_lr": 0.0009742105263157895, "low_lr": 1.9484210526315793e-05, "step": 49 }, { "epoch": 0.13149243918474687, "grad_norm": 0.455016165971756, "learning_rate": 0.0009736842105263158, "loss": 2.0045, "step": 50 }, { "epoch": 0.13149243918474687, "high_lr": 0.0009736842105263158, "low_lr": 1.9473684210526318e-05, "step": 50 }, { "epoch": 0.13149243918474687, "high_lr": 0.0009736842105263158, "low_lr": 1.9473684210526318e-05, "step": 50 }, { "epoch": 0.13149243918474687, "high_lr": 0.0009736842105263158, "low_lr": 1.9473684210526318e-05, "step": 50 }, { "epoch": 0.13149243918474687, "high_lr": 0.0009736842105263158, "low_lr": 1.9473684210526318e-05, "step": 50 }, { "epoch": 0.13149243918474687, "high_lr": 0.0009736842105263158, "low_lr": 1.9473684210526318e-05, "step": 50 }, { "epoch": 0.13149243918474687, "high_lr": 0.0009736842105263158, "low_lr": 1.9473684210526318e-05, "step": 50 }, { "epoch": 0.13149243918474687, "high_lr": 0.0009736842105263158, "low_lr": 1.9473684210526318e-05, "step": 50 }, { "epoch": 0.13149243918474687, "high_lr": 0.0009736842105263158, "low_lr": 1.9473684210526318e-05, "step": 50 }, { "epoch": 0.1341222879684418, "grad_norm": 0.5332766175270081, "learning_rate": 0.0009731578947368421, "loss": 2.0508, "step": 51 }, { "epoch": 0.1341222879684418, "high_lr": 0.0009731578947368421, "low_lr": 1.9463157894736843e-05, "step": 51 }, { "epoch": 0.1341222879684418, "high_lr": 0.0009731578947368421, "low_lr": 1.9463157894736843e-05, "step": 51 }, { "epoch": 0.1341222879684418, "high_lr": 0.0009731578947368421, "low_lr": 1.9463157894736843e-05, "step": 51 }, { "epoch": 0.1341222879684418, "high_lr": 0.0009731578947368421, "low_lr": 1.9463157894736843e-05, "step": 51 }, { "epoch": 0.1341222879684418, "high_lr": 0.0009731578947368421, "low_lr": 1.9463157894736843e-05, "step": 51 }, { "epoch": 0.1341222879684418, "high_lr": 0.0009731578947368421, "low_lr": 1.9463157894736843e-05, "step": 51 }, { "epoch": 0.1341222879684418, "high_lr": 0.0009731578947368421, "low_lr": 1.9463157894736843e-05, "step": 51 }, { "epoch": 0.1341222879684418, "high_lr": 0.0009731578947368421, "low_lr": 1.9463157894736843e-05, "step": 51 }, { "epoch": 0.13675213675213677, "grad_norm": 0.5860726833343506, "learning_rate": 0.0009726315789473684, "loss": 2.064, "step": 52 }, { "epoch": 0.13675213675213677, "high_lr": 0.0009726315789473684, "low_lr": 1.945263157894737e-05, "step": 52 }, { "epoch": 0.13675213675213677, "high_lr": 0.0009726315789473684, "low_lr": 1.945263157894737e-05, "step": 52 }, { "epoch": 0.13675213675213677, "high_lr": 0.0009726315789473684, "low_lr": 1.945263157894737e-05, "step": 52 }, { "epoch": 0.13675213675213677, "high_lr": 0.0009726315789473684, "low_lr": 1.945263157894737e-05, "step": 52 }, { "epoch": 0.13675213675213677, "high_lr": 0.0009726315789473684, "low_lr": 1.945263157894737e-05, "step": 52 }, { "epoch": 0.13675213675213677, "high_lr": 0.0009726315789473684, "low_lr": 1.945263157894737e-05, "step": 52 }, { "epoch": 0.13675213675213677, "high_lr": 0.0009726315789473684, "low_lr": 1.945263157894737e-05, "step": 52 }, { "epoch": 0.13675213675213677, "high_lr": 0.0009726315789473684, "low_lr": 1.945263157894737e-05, "step": 52 }, { "epoch": 0.1393819855358317, "grad_norm": 0.5577614307403564, "learning_rate": 0.0009721052631578947, "loss": 2.0728, "step": 53 }, { "epoch": 0.1393819855358317, "high_lr": 0.0009721052631578947, "low_lr": 1.9442105263157895e-05, "step": 53 }, { "epoch": 0.1393819855358317, "high_lr": 0.0009721052631578947, "low_lr": 1.9442105263157895e-05, "step": 53 }, { "epoch": 0.1393819855358317, "high_lr": 0.0009721052631578947, "low_lr": 1.9442105263157895e-05, "step": 53 }, { "epoch": 0.1393819855358317, "high_lr": 0.0009721052631578947, "low_lr": 1.9442105263157895e-05, "step": 53 }, { "epoch": 0.1393819855358317, "high_lr": 0.0009721052631578947, "low_lr": 1.9442105263157895e-05, "step": 53 }, { "epoch": 0.1393819855358317, "high_lr": 0.0009721052631578947, "low_lr": 1.9442105263157895e-05, "step": 53 }, { "epoch": 0.1393819855358317, "high_lr": 0.0009721052631578947, "low_lr": 1.9442105263157895e-05, "step": 53 }, { "epoch": 0.1393819855358317, "high_lr": 0.0009721052631578947, "low_lr": 1.9442105263157895e-05, "step": 53 }, { "epoch": 0.14201183431952663, "grad_norm": 0.5507766604423523, "learning_rate": 0.0009715789473684211, "loss": 2.0143, "step": 54 }, { "epoch": 0.14201183431952663, "high_lr": 0.0009715789473684211, "low_lr": 1.9431578947368423e-05, "step": 54 }, { "epoch": 0.14201183431952663, "high_lr": 0.0009715789473684211, "low_lr": 1.9431578947368423e-05, "step": 54 }, { "epoch": 0.14201183431952663, "high_lr": 0.0009715789473684211, "low_lr": 1.9431578947368423e-05, "step": 54 }, { "epoch": 0.14201183431952663, "high_lr": 0.0009715789473684211, "low_lr": 1.9431578947368423e-05, "step": 54 }, { "epoch": 0.14201183431952663, "high_lr": 0.0009715789473684211, "low_lr": 1.9431578947368423e-05, "step": 54 }, { "epoch": 0.14201183431952663, "high_lr": 0.0009715789473684211, "low_lr": 1.9431578947368423e-05, "step": 54 }, { "epoch": 0.14201183431952663, "high_lr": 0.0009715789473684211, "low_lr": 1.9431578947368423e-05, "step": 54 }, { "epoch": 0.14201183431952663, "high_lr": 0.0009715789473684211, "low_lr": 1.9431578947368423e-05, "step": 54 }, { "epoch": 0.14464168310322156, "grad_norm": 0.5255084037780762, "learning_rate": 0.0009710526315789474, "loss": 2.0189, "step": 55 }, { "epoch": 0.14464168310322156, "high_lr": 0.0009710526315789474, "low_lr": 1.9421052631578948e-05, "step": 55 }, { "epoch": 0.14464168310322156, "high_lr": 0.0009710526315789474, "low_lr": 1.9421052631578948e-05, "step": 55 }, { "epoch": 0.14464168310322156, "high_lr": 0.0009710526315789474, "low_lr": 1.9421052631578948e-05, "step": 55 }, { "epoch": 0.14464168310322156, "high_lr": 0.0009710526315789474, "low_lr": 1.9421052631578948e-05, "step": 55 }, { "epoch": 0.14464168310322156, "high_lr": 0.0009710526315789474, "low_lr": 1.9421052631578948e-05, "step": 55 }, { "epoch": 0.14464168310322156, "high_lr": 0.0009710526315789474, "low_lr": 1.9421052631578948e-05, "step": 55 }, { "epoch": 0.14464168310322156, "high_lr": 0.0009710526315789474, "low_lr": 1.9421052631578948e-05, "step": 55 }, { "epoch": 0.14464168310322156, "high_lr": 0.0009710526315789474, "low_lr": 1.9421052631578948e-05, "step": 55 }, { "epoch": 0.1472715318869165, "grad_norm": 0.5672211647033691, "learning_rate": 0.0009705263157894737, "loss": 2.0786, "step": 56 }, { "epoch": 0.1472715318869165, "high_lr": 0.0009705263157894737, "low_lr": 1.9410526315789476e-05, "step": 56 }, { "epoch": 0.1472715318869165, "high_lr": 0.0009705263157894737, "low_lr": 1.9410526315789476e-05, "step": 56 }, { "epoch": 0.1472715318869165, "high_lr": 0.0009705263157894737, "low_lr": 1.9410526315789476e-05, "step": 56 }, { "epoch": 0.1472715318869165, "high_lr": 0.0009705263157894737, "low_lr": 1.9410526315789476e-05, "step": 56 }, { "epoch": 0.1472715318869165, "high_lr": 0.0009705263157894737, "low_lr": 1.9410526315789476e-05, "step": 56 }, { "epoch": 0.1472715318869165, "high_lr": 0.0009705263157894737, "low_lr": 1.9410526315789476e-05, "step": 56 }, { "epoch": 0.1472715318869165, "high_lr": 0.0009705263157894737, "low_lr": 1.9410526315789476e-05, "step": 56 }, { "epoch": 0.1472715318869165, "high_lr": 0.0009705263157894737, "low_lr": 1.9410526315789476e-05, "step": 56 }, { "epoch": 0.14990138067061143, "grad_norm": 0.5566748976707458, "learning_rate": 0.0009699999999999999, "loss": 2.0574, "step": 57 }, { "epoch": 0.14990138067061143, "high_lr": 0.0009699999999999999, "low_lr": 1.94e-05, "step": 57 }, { "epoch": 0.14990138067061143, "high_lr": 0.0009699999999999999, "low_lr": 1.94e-05, "step": 57 }, { "epoch": 0.14990138067061143, "high_lr": 0.0009699999999999999, "low_lr": 1.94e-05, "step": 57 }, { "epoch": 0.14990138067061143, "high_lr": 0.0009699999999999999, "low_lr": 1.94e-05, "step": 57 }, { "epoch": 0.14990138067061143, "high_lr": 0.0009699999999999999, "low_lr": 1.94e-05, "step": 57 }, { "epoch": 0.14990138067061143, "high_lr": 0.0009699999999999999, "low_lr": 1.94e-05, "step": 57 }, { "epoch": 0.14990138067061143, "high_lr": 0.0009699999999999999, "low_lr": 1.94e-05, "step": 57 }, { "epoch": 0.14990138067061143, "high_lr": 0.0009699999999999999, "low_lr": 1.94e-05, "step": 57 }, { "epoch": 0.1525312294543064, "grad_norm": 0.49547362327575684, "learning_rate": 0.0009694736842105263, "loss": 2.0502, "step": 58 }, { "epoch": 0.1525312294543064, "high_lr": 0.0009694736842105263, "low_lr": 1.9389473684210525e-05, "step": 58 }, { "epoch": 0.1525312294543064, "high_lr": 0.0009694736842105263, "low_lr": 1.9389473684210525e-05, "step": 58 }, { "epoch": 0.1525312294543064, "high_lr": 0.0009694736842105263, "low_lr": 1.9389473684210525e-05, "step": 58 }, { "epoch": 0.1525312294543064, "high_lr": 0.0009694736842105263, "low_lr": 1.9389473684210525e-05, "step": 58 }, { "epoch": 0.1525312294543064, "high_lr": 0.0009694736842105263, "low_lr": 1.9389473684210525e-05, "step": 58 }, { "epoch": 0.1525312294543064, "high_lr": 0.0009694736842105263, "low_lr": 1.9389473684210525e-05, "step": 58 }, { "epoch": 0.1525312294543064, "high_lr": 0.0009694736842105263, "low_lr": 1.9389473684210525e-05, "step": 58 }, { "epoch": 0.1525312294543064, "high_lr": 0.0009694736842105263, "low_lr": 1.9389473684210525e-05, "step": 58 }, { "epoch": 0.15516107823800132, "grad_norm": 0.6034396886825562, "learning_rate": 0.0009689473684210527, "loss": 2.0388, "step": 59 }, { "epoch": 0.15516107823800132, "high_lr": 0.0009689473684210527, "low_lr": 1.9378947368421053e-05, "step": 59 }, { "epoch": 0.15516107823800132, "high_lr": 0.0009689473684210527, "low_lr": 1.9378947368421053e-05, "step": 59 }, { "epoch": 0.15516107823800132, "high_lr": 0.0009689473684210527, "low_lr": 1.9378947368421053e-05, "step": 59 }, { "epoch": 0.15516107823800132, "high_lr": 0.0009689473684210527, "low_lr": 1.9378947368421053e-05, "step": 59 }, { "epoch": 0.15516107823800132, "high_lr": 0.0009689473684210527, "low_lr": 1.9378947368421053e-05, "step": 59 }, { "epoch": 0.15516107823800132, "high_lr": 0.0009689473684210527, "low_lr": 1.9378947368421053e-05, "step": 59 }, { "epoch": 0.15516107823800132, "high_lr": 0.0009689473684210527, "low_lr": 1.9378947368421053e-05, "step": 59 }, { "epoch": 0.15516107823800132, "high_lr": 0.0009689473684210527, "low_lr": 1.9378947368421053e-05, "step": 59 }, { "epoch": 0.15779092702169625, "grad_norm": 0.5364948511123657, "learning_rate": 0.000968421052631579, "loss": 1.96, "step": 60 }, { "epoch": 0.15779092702169625, "high_lr": 0.000968421052631579, "low_lr": 1.936842105263158e-05, "step": 60 }, { "epoch": 0.15779092702169625, "high_lr": 0.000968421052631579, "low_lr": 1.936842105263158e-05, "step": 60 }, { "epoch": 0.15779092702169625, "high_lr": 0.000968421052631579, "low_lr": 1.936842105263158e-05, "step": 60 }, { "epoch": 0.15779092702169625, "high_lr": 0.000968421052631579, "low_lr": 1.936842105263158e-05, "step": 60 }, { "epoch": 0.15779092702169625, "high_lr": 0.000968421052631579, "low_lr": 1.936842105263158e-05, "step": 60 }, { "epoch": 0.15779092702169625, "high_lr": 0.000968421052631579, "low_lr": 1.936842105263158e-05, "step": 60 }, { "epoch": 0.15779092702169625, "high_lr": 0.000968421052631579, "low_lr": 1.936842105263158e-05, "step": 60 }, { "epoch": 0.15779092702169625, "high_lr": 0.000968421052631579, "low_lr": 1.936842105263158e-05, "step": 60 }, { "epoch": 0.16042077580539119, "grad_norm": 0.5651670694351196, "learning_rate": 0.0009678947368421053, "loss": 1.9774, "step": 61 }, { "epoch": 0.16042077580539119, "high_lr": 0.0009678947368421053, "low_lr": 1.9357894736842106e-05, "step": 61 }, { "epoch": 0.16042077580539119, "high_lr": 0.0009678947368421053, "low_lr": 1.9357894736842106e-05, "step": 61 }, { "epoch": 0.16042077580539119, "high_lr": 0.0009678947368421053, "low_lr": 1.9357894736842106e-05, "step": 61 }, { "epoch": 0.16042077580539119, "high_lr": 0.0009678947368421053, "low_lr": 1.9357894736842106e-05, "step": 61 }, { "epoch": 0.16042077580539119, "high_lr": 0.0009678947368421053, "low_lr": 1.9357894736842106e-05, "step": 61 }, { "epoch": 0.16042077580539119, "high_lr": 0.0009678947368421053, "low_lr": 1.9357894736842106e-05, "step": 61 }, { "epoch": 0.16042077580539119, "high_lr": 0.0009678947368421053, "low_lr": 1.9357894736842106e-05, "step": 61 }, { "epoch": 0.16042077580539119, "high_lr": 0.0009678947368421053, "low_lr": 1.9357894736842106e-05, "step": 61 }, { "epoch": 0.16305062458908612, "grad_norm": 0.5697059631347656, "learning_rate": 0.0009673684210526316, "loss": 2.0256, "step": 62 }, { "epoch": 0.16305062458908612, "high_lr": 0.0009673684210526316, "low_lr": 1.9347368421052634e-05, "step": 62 }, { "epoch": 0.16305062458908612, "high_lr": 0.0009673684210526316, "low_lr": 1.9347368421052634e-05, "step": 62 }, { "epoch": 0.16305062458908612, "high_lr": 0.0009673684210526316, "low_lr": 1.9347368421052634e-05, "step": 62 }, { "epoch": 0.16305062458908612, "high_lr": 0.0009673684210526316, "low_lr": 1.9347368421052634e-05, "step": 62 }, { "epoch": 0.16305062458908612, "high_lr": 0.0009673684210526316, "low_lr": 1.9347368421052634e-05, "step": 62 }, { "epoch": 0.16305062458908612, "high_lr": 0.0009673684210526316, "low_lr": 1.9347368421052634e-05, "step": 62 }, { "epoch": 0.16305062458908612, "high_lr": 0.0009673684210526316, "low_lr": 1.9347368421052634e-05, "step": 62 }, { "epoch": 0.16305062458908612, "high_lr": 0.0009673684210526316, "low_lr": 1.9347368421052634e-05, "step": 62 }, { "epoch": 0.16568047337278108, "grad_norm": 0.6273848414421082, "learning_rate": 0.000966842105263158, "loss": 2.0194, "step": 63 }, { "epoch": 0.16568047337278108, "high_lr": 0.000966842105263158, "low_lr": 1.9336842105263162e-05, "step": 63 }, { "epoch": 0.16568047337278108, "high_lr": 0.000966842105263158, "low_lr": 1.9336842105263162e-05, "step": 63 }, { "epoch": 0.16568047337278108, "high_lr": 0.000966842105263158, "low_lr": 1.9336842105263162e-05, "step": 63 }, { "epoch": 0.16568047337278108, "high_lr": 0.000966842105263158, "low_lr": 1.9336842105263162e-05, "step": 63 }, { "epoch": 0.16568047337278108, "high_lr": 0.000966842105263158, "low_lr": 1.9336842105263162e-05, "step": 63 }, { "epoch": 0.16568047337278108, "high_lr": 0.000966842105263158, "low_lr": 1.9336842105263162e-05, "step": 63 }, { "epoch": 0.16568047337278108, "high_lr": 0.000966842105263158, "low_lr": 1.9336842105263162e-05, "step": 63 }, { "epoch": 0.16568047337278108, "high_lr": 0.000966842105263158, "low_lr": 1.9336842105263162e-05, "step": 63 }, { "epoch": 0.168310322156476, "grad_norm": 0.6345831155776978, "learning_rate": 0.0009663157894736843, "loss": 2.0486, "step": 64 }, { "epoch": 0.168310322156476, "high_lr": 0.0009663157894736843, "low_lr": 1.9326315789473687e-05, "step": 64 }, { "epoch": 0.168310322156476, "high_lr": 0.0009663157894736843, "low_lr": 1.9326315789473687e-05, "step": 64 }, { "epoch": 0.168310322156476, "high_lr": 0.0009663157894736843, "low_lr": 1.9326315789473687e-05, "step": 64 }, { "epoch": 0.168310322156476, "high_lr": 0.0009663157894736843, "low_lr": 1.9326315789473687e-05, "step": 64 }, { "epoch": 0.168310322156476, "high_lr": 0.0009663157894736843, "low_lr": 1.9326315789473687e-05, "step": 64 }, { "epoch": 0.168310322156476, "high_lr": 0.0009663157894736843, "low_lr": 1.9326315789473687e-05, "step": 64 }, { "epoch": 0.168310322156476, "high_lr": 0.0009663157894736843, "low_lr": 1.9326315789473687e-05, "step": 64 }, { "epoch": 0.168310322156476, "high_lr": 0.0009663157894736843, "low_lr": 1.9326315789473687e-05, "step": 64 }, { "epoch": 0.17094017094017094, "grad_norm": 0.5673817992210388, "learning_rate": 0.0009657894736842106, "loss": 2.0464, "step": 65 }, { "epoch": 0.17094017094017094, "high_lr": 0.0009657894736842106, "low_lr": 1.931578947368421e-05, "step": 65 }, { "epoch": 0.17094017094017094, "high_lr": 0.0009657894736842106, "low_lr": 1.931578947368421e-05, "step": 65 }, { "epoch": 0.17094017094017094, "high_lr": 0.0009657894736842106, "low_lr": 1.931578947368421e-05, "step": 65 }, { "epoch": 0.17094017094017094, "high_lr": 0.0009657894736842106, "low_lr": 1.931578947368421e-05, "step": 65 }, { "epoch": 0.17094017094017094, "high_lr": 0.0009657894736842106, "low_lr": 1.931578947368421e-05, "step": 65 }, { "epoch": 0.17094017094017094, "high_lr": 0.0009657894736842106, "low_lr": 1.931578947368421e-05, "step": 65 }, { "epoch": 0.17094017094017094, "high_lr": 0.0009657894736842106, "low_lr": 1.931578947368421e-05, "step": 65 }, { "epoch": 0.17094017094017094, "high_lr": 0.0009657894736842106, "low_lr": 1.931578947368421e-05, "step": 65 }, { "epoch": 0.17357001972386588, "grad_norm": 0.5592147707939148, "learning_rate": 0.0009652631578947368, "loss": 2.0164, "step": 66 }, { "epoch": 0.17357001972386588, "high_lr": 0.0009652631578947368, "low_lr": 1.930526315789474e-05, "step": 66 }, { "epoch": 0.17357001972386588, "high_lr": 0.0009652631578947368, "low_lr": 1.930526315789474e-05, "step": 66 }, { "epoch": 0.17357001972386588, "high_lr": 0.0009652631578947368, "low_lr": 1.930526315789474e-05, "step": 66 }, { "epoch": 0.17357001972386588, "high_lr": 0.0009652631578947368, "low_lr": 1.930526315789474e-05, "step": 66 }, { "epoch": 0.17357001972386588, "high_lr": 0.0009652631578947368, "low_lr": 1.930526315789474e-05, "step": 66 }, { "epoch": 0.17357001972386588, "high_lr": 0.0009652631578947368, "low_lr": 1.930526315789474e-05, "step": 66 }, { "epoch": 0.17357001972386588, "high_lr": 0.0009652631578947368, "low_lr": 1.930526315789474e-05, "step": 66 }, { "epoch": 0.17357001972386588, "high_lr": 0.0009652631578947368, "low_lr": 1.930526315789474e-05, "step": 66 }, { "epoch": 0.1761998685075608, "grad_norm": 0.5632216930389404, "learning_rate": 0.0009647368421052631, "loss": 1.9588, "step": 67 }, { "epoch": 0.1761998685075608, "high_lr": 0.0009647368421052631, "low_lr": 1.9294736842105264e-05, "step": 67 }, { "epoch": 0.1761998685075608, "high_lr": 0.0009647368421052631, "low_lr": 1.9294736842105264e-05, "step": 67 }, { "epoch": 0.1761998685075608, "high_lr": 0.0009647368421052631, "low_lr": 1.9294736842105264e-05, "step": 67 }, { "epoch": 0.1761998685075608, "high_lr": 0.0009647368421052631, "low_lr": 1.9294736842105264e-05, "step": 67 }, { "epoch": 0.1761998685075608, "high_lr": 0.0009647368421052631, "low_lr": 1.9294736842105264e-05, "step": 67 }, { "epoch": 0.1761998685075608, "high_lr": 0.0009647368421052631, "low_lr": 1.9294736842105264e-05, "step": 67 }, { "epoch": 0.1761998685075608, "high_lr": 0.0009647368421052631, "low_lr": 1.9294736842105264e-05, "step": 67 }, { "epoch": 0.1761998685075608, "high_lr": 0.0009647368421052631, "low_lr": 1.9294736842105264e-05, "step": 67 }, { "epoch": 0.17882971729125574, "grad_norm": 0.5781631469726562, "learning_rate": 0.0009642105263157895, "loss": 1.9856, "step": 68 }, { "epoch": 0.17882971729125574, "high_lr": 0.0009642105263157895, "low_lr": 1.9284210526315792e-05, "step": 68 }, { "epoch": 0.17882971729125574, "high_lr": 0.0009642105263157895, "low_lr": 1.9284210526315792e-05, "step": 68 }, { "epoch": 0.17882971729125574, "high_lr": 0.0009642105263157895, "low_lr": 1.9284210526315792e-05, "step": 68 }, { "epoch": 0.17882971729125574, "high_lr": 0.0009642105263157895, "low_lr": 1.9284210526315792e-05, "step": 68 }, { "epoch": 0.17882971729125574, "high_lr": 0.0009642105263157895, "low_lr": 1.9284210526315792e-05, "step": 68 }, { "epoch": 0.17882971729125574, "high_lr": 0.0009642105263157895, "low_lr": 1.9284210526315792e-05, "step": 68 }, { "epoch": 0.17882971729125574, "high_lr": 0.0009642105263157895, "low_lr": 1.9284210526315792e-05, "step": 68 }, { "epoch": 0.17882971729125574, "high_lr": 0.0009642105263157895, "low_lr": 1.9284210526315792e-05, "step": 68 }, { "epoch": 0.1814595660749507, "grad_norm": 0.509727418422699, "learning_rate": 0.0009636842105263158, "loss": 1.8842, "step": 69 }, { "epoch": 0.1814595660749507, "high_lr": 0.0009636842105263158, "low_lr": 1.9273684210526317e-05, "step": 69 }, { "epoch": 0.1814595660749507, "high_lr": 0.0009636842105263158, "low_lr": 1.9273684210526317e-05, "step": 69 }, { "epoch": 0.1814595660749507, "high_lr": 0.0009636842105263158, "low_lr": 1.9273684210526317e-05, "step": 69 }, { "epoch": 0.1814595660749507, "high_lr": 0.0009636842105263158, "low_lr": 1.9273684210526317e-05, "step": 69 }, { "epoch": 0.1814595660749507, "high_lr": 0.0009636842105263158, "low_lr": 1.9273684210526317e-05, "step": 69 }, { "epoch": 0.1814595660749507, "high_lr": 0.0009636842105263158, "low_lr": 1.9273684210526317e-05, "step": 69 }, { "epoch": 0.1814595660749507, "high_lr": 0.0009636842105263158, "low_lr": 1.9273684210526317e-05, "step": 69 }, { "epoch": 0.1814595660749507, "high_lr": 0.0009636842105263158, "low_lr": 1.9273684210526317e-05, "step": 69 }, { "epoch": 0.18408941485864563, "grad_norm": 1.9063596725463867, "learning_rate": 0.0009631578947368421, "loss": 2.0654, "step": 70 }, { "epoch": 0.18408941485864563, "high_lr": 0.0009631578947368421, "low_lr": 1.9263157894736845e-05, "step": 70 }, { "epoch": 0.18408941485864563, "high_lr": 0.0009631578947368421, "low_lr": 1.9263157894736845e-05, "step": 70 }, { "epoch": 0.18408941485864563, "high_lr": 0.0009631578947368421, "low_lr": 1.9263157894736845e-05, "step": 70 }, { "epoch": 0.18408941485864563, "high_lr": 0.0009631578947368421, "low_lr": 1.9263157894736845e-05, "step": 70 }, { "epoch": 0.18408941485864563, "high_lr": 0.0009631578947368421, "low_lr": 1.9263157894736845e-05, "step": 70 }, { "epoch": 0.18408941485864563, "high_lr": 0.0009631578947368421, "low_lr": 1.9263157894736845e-05, "step": 70 }, { "epoch": 0.18408941485864563, "high_lr": 0.0009631578947368421, "low_lr": 1.9263157894736845e-05, "step": 70 }, { "epoch": 0.18408941485864563, "high_lr": 0.0009631578947368421, "low_lr": 1.9263157894736845e-05, "step": 70 }, { "epoch": 0.18671926364234057, "grad_norm": 0.6156869530677795, "learning_rate": 0.0009626315789473684, "loss": 2.0458, "step": 71 }, { "epoch": 0.18671926364234057, "high_lr": 0.0009626315789473684, "low_lr": 1.925263157894737e-05, "step": 71 }, { "epoch": 0.18671926364234057, "high_lr": 0.0009626315789473684, "low_lr": 1.925263157894737e-05, "step": 71 }, { "epoch": 0.18671926364234057, "high_lr": 0.0009626315789473684, "low_lr": 1.925263157894737e-05, "step": 71 }, { "epoch": 0.18671926364234057, "high_lr": 0.0009626315789473684, "low_lr": 1.925263157894737e-05, "step": 71 }, { "epoch": 0.18671926364234057, "high_lr": 0.0009626315789473684, "low_lr": 1.925263157894737e-05, "step": 71 }, { "epoch": 0.18671926364234057, "high_lr": 0.0009626315789473684, "low_lr": 1.925263157894737e-05, "step": 71 }, { "epoch": 0.18671926364234057, "high_lr": 0.0009626315789473684, "low_lr": 1.925263157894737e-05, "step": 71 }, { "epoch": 0.18671926364234057, "high_lr": 0.0009626315789473684, "low_lr": 1.925263157894737e-05, "step": 71 }, { "epoch": 0.1893491124260355, "grad_norm": 0.590178906917572, "learning_rate": 0.0009621052631578947, "loss": 1.954, "step": 72 }, { "epoch": 0.1893491124260355, "high_lr": 0.0009621052631578947, "low_lr": 1.9242105263157894e-05, "step": 72 }, { "epoch": 0.1893491124260355, "high_lr": 0.0009621052631578947, "low_lr": 1.9242105263157894e-05, "step": 72 }, { "epoch": 0.1893491124260355, "high_lr": 0.0009621052631578947, "low_lr": 1.9242105263157894e-05, "step": 72 }, { "epoch": 0.1893491124260355, "high_lr": 0.0009621052631578947, "low_lr": 1.9242105263157894e-05, "step": 72 }, { "epoch": 0.1893491124260355, "high_lr": 0.0009621052631578947, "low_lr": 1.9242105263157894e-05, "step": 72 }, { "epoch": 0.1893491124260355, "high_lr": 0.0009621052631578947, "low_lr": 1.9242105263157894e-05, "step": 72 }, { "epoch": 0.1893491124260355, "high_lr": 0.0009621052631578947, "low_lr": 1.9242105263157894e-05, "step": 72 }, { "epoch": 0.1893491124260355, "high_lr": 0.0009621052631578947, "low_lr": 1.9242105263157894e-05, "step": 72 }, { "epoch": 0.19197896120973043, "grad_norm": 0.6381607055664062, "learning_rate": 0.0009615789473684211, "loss": 1.982, "step": 73 }, { "epoch": 0.19197896120973043, "high_lr": 0.0009615789473684211, "low_lr": 1.9231578947368422e-05, "step": 73 }, { "epoch": 0.19197896120973043, "high_lr": 0.0009615789473684211, "low_lr": 1.9231578947368422e-05, "step": 73 }, { "epoch": 0.19197896120973043, "high_lr": 0.0009615789473684211, "low_lr": 1.9231578947368422e-05, "step": 73 }, { "epoch": 0.19197896120973043, "high_lr": 0.0009615789473684211, "low_lr": 1.9231578947368422e-05, "step": 73 }, { "epoch": 0.19197896120973043, "high_lr": 0.0009615789473684211, "low_lr": 1.9231578947368422e-05, "step": 73 }, { "epoch": 0.19197896120973043, "high_lr": 0.0009615789473684211, "low_lr": 1.9231578947368422e-05, "step": 73 }, { "epoch": 0.19197896120973043, "high_lr": 0.0009615789473684211, "low_lr": 1.9231578947368422e-05, "step": 73 }, { "epoch": 0.19197896120973043, "high_lr": 0.0009615789473684211, "low_lr": 1.9231578947368422e-05, "step": 73 }, { "epoch": 0.1946088099934254, "grad_norm": 0.6437403559684753, "learning_rate": 0.0009610526315789475, "loss": 1.9463, "step": 74 }, { "epoch": 0.1946088099934254, "high_lr": 0.0009610526315789475, "low_lr": 1.922105263157895e-05, "step": 74 }, { "epoch": 0.1946088099934254, "high_lr": 0.0009610526315789475, "low_lr": 1.922105263157895e-05, "step": 74 }, { "epoch": 0.1946088099934254, "high_lr": 0.0009610526315789475, "low_lr": 1.922105263157895e-05, "step": 74 }, { "epoch": 0.1946088099934254, "high_lr": 0.0009610526315789475, "low_lr": 1.922105263157895e-05, "step": 74 }, { "epoch": 0.1946088099934254, "high_lr": 0.0009610526315789475, "low_lr": 1.922105263157895e-05, "step": 74 }, { "epoch": 0.1946088099934254, "high_lr": 0.0009610526315789475, "low_lr": 1.922105263157895e-05, "step": 74 }, { "epoch": 0.1946088099934254, "high_lr": 0.0009610526315789475, "low_lr": 1.922105263157895e-05, "step": 74 }, { "epoch": 0.1946088099934254, "high_lr": 0.0009610526315789475, "low_lr": 1.922105263157895e-05, "step": 74 }, { "epoch": 0.19723865877712032, "grad_norm": 0.6442136168479919, "learning_rate": 0.0009605263157894737, "loss": 2.0247, "step": 75 }, { "epoch": 0.19723865877712032, "high_lr": 0.0009605263157894737, "low_lr": 1.9210526315789474e-05, "step": 75 }, { "epoch": 0.19723865877712032, "high_lr": 0.0009605263157894737, "low_lr": 1.9210526315789474e-05, "step": 75 }, { "epoch": 0.19723865877712032, "high_lr": 0.0009605263157894737, "low_lr": 1.9210526315789474e-05, "step": 75 }, { "epoch": 0.19723865877712032, "high_lr": 0.0009605263157894737, "low_lr": 1.9210526315789474e-05, "step": 75 }, { "epoch": 0.19723865877712032, "high_lr": 0.0009605263157894737, "low_lr": 1.9210526315789474e-05, "step": 75 }, { "epoch": 0.19723865877712032, "high_lr": 0.0009605263157894737, "low_lr": 1.9210526315789474e-05, "step": 75 }, { "epoch": 0.19723865877712032, "high_lr": 0.0009605263157894737, "low_lr": 1.9210526315789474e-05, "step": 75 }, { "epoch": 0.19723865877712032, "high_lr": 0.0009605263157894737, "low_lr": 1.9210526315789474e-05, "step": 75 }, { "epoch": 0.19986850756081526, "grad_norm": 0.775407612323761, "learning_rate": 0.00096, "loss": 2.0624, "step": 76 }, { "epoch": 0.19986850756081526, "high_lr": 0.00096, "low_lr": 1.9200000000000003e-05, "step": 76 }, { "epoch": 0.19986850756081526, "high_lr": 0.00096, "low_lr": 1.9200000000000003e-05, "step": 76 }, { "epoch": 0.19986850756081526, "high_lr": 0.00096, "low_lr": 1.9200000000000003e-05, "step": 76 }, { "epoch": 0.19986850756081526, "high_lr": 0.00096, "low_lr": 1.9200000000000003e-05, "step": 76 }, { "epoch": 0.19986850756081526, "high_lr": 0.00096, "low_lr": 1.9200000000000003e-05, "step": 76 }, { "epoch": 0.19986850756081526, "high_lr": 0.00096, "low_lr": 1.9200000000000003e-05, "step": 76 }, { "epoch": 0.19986850756081526, "high_lr": 0.00096, "low_lr": 1.9200000000000003e-05, "step": 76 }, { "epoch": 0.19986850756081526, "high_lr": 0.00096, "low_lr": 1.9200000000000003e-05, "step": 76 }, { "epoch": 0.2024983563445102, "grad_norm": 0.6264069080352783, "learning_rate": 0.0009594736842105264, "loss": 1.9074, "step": 77 }, { "epoch": 0.2024983563445102, "high_lr": 0.0009594736842105264, "low_lr": 1.918947368421053e-05, "step": 77 }, { "epoch": 0.2024983563445102, "high_lr": 0.0009594736842105264, "low_lr": 1.918947368421053e-05, "step": 77 }, { "epoch": 0.2024983563445102, "high_lr": 0.0009594736842105264, "low_lr": 1.918947368421053e-05, "step": 77 }, { "epoch": 0.2024983563445102, "high_lr": 0.0009594736842105264, "low_lr": 1.918947368421053e-05, "step": 77 }, { "epoch": 0.2024983563445102, "high_lr": 0.0009594736842105264, "low_lr": 1.918947368421053e-05, "step": 77 }, { "epoch": 0.2024983563445102, "high_lr": 0.0009594736842105264, "low_lr": 1.918947368421053e-05, "step": 77 }, { "epoch": 0.2024983563445102, "high_lr": 0.0009594736842105264, "low_lr": 1.918947368421053e-05, "step": 77 }, { "epoch": 0.2024983563445102, "high_lr": 0.0009594736842105264, "low_lr": 1.918947368421053e-05, "step": 77 }, { "epoch": 0.20512820512820512, "grad_norm": 0.6018377542495728, "learning_rate": 0.0009589473684210527, "loss": 1.9637, "step": 78 }, { "epoch": 0.20512820512820512, "high_lr": 0.0009589473684210527, "low_lr": 1.9178947368421055e-05, "step": 78 }, { "epoch": 0.20512820512820512, "high_lr": 0.0009589473684210527, "low_lr": 1.9178947368421055e-05, "step": 78 }, { "epoch": 0.20512820512820512, "high_lr": 0.0009589473684210527, "low_lr": 1.9178947368421055e-05, "step": 78 }, { "epoch": 0.20512820512820512, "high_lr": 0.0009589473684210527, "low_lr": 1.9178947368421055e-05, "step": 78 }, { "epoch": 0.20512820512820512, "high_lr": 0.0009589473684210527, "low_lr": 1.9178947368421055e-05, "step": 78 }, { "epoch": 0.20512820512820512, "high_lr": 0.0009589473684210527, "low_lr": 1.9178947368421055e-05, "step": 78 }, { "epoch": 0.20512820512820512, "high_lr": 0.0009589473684210527, "low_lr": 1.9178947368421055e-05, "step": 78 }, { "epoch": 0.20512820512820512, "high_lr": 0.0009589473684210527, "low_lr": 1.9178947368421055e-05, "step": 78 }, { "epoch": 0.20775805391190005, "grad_norm": 0.5876686573028564, "learning_rate": 0.000958421052631579, "loss": 2.0186, "step": 79 }, { "epoch": 0.20775805391190005, "high_lr": 0.000958421052631579, "low_lr": 1.916842105263158e-05, "step": 79 }, { "epoch": 0.20775805391190005, "high_lr": 0.000958421052631579, "low_lr": 1.916842105263158e-05, "step": 79 }, { "epoch": 0.20775805391190005, "high_lr": 0.000958421052631579, "low_lr": 1.916842105263158e-05, "step": 79 }, { "epoch": 0.20775805391190005, "high_lr": 0.000958421052631579, "low_lr": 1.916842105263158e-05, "step": 79 }, { "epoch": 0.20775805391190005, "high_lr": 0.000958421052631579, "low_lr": 1.916842105263158e-05, "step": 79 }, { "epoch": 0.20775805391190005, "high_lr": 0.000958421052631579, "low_lr": 1.916842105263158e-05, "step": 79 }, { "epoch": 0.20775805391190005, "high_lr": 0.000958421052631579, "low_lr": 1.916842105263158e-05, "step": 79 }, { "epoch": 0.20775805391190005, "high_lr": 0.000958421052631579, "low_lr": 1.916842105263158e-05, "step": 79 }, { "epoch": 0.210387902695595, "grad_norm": 0.6686984300613403, "learning_rate": 0.0009578947368421053, "loss": 2.0095, "step": 80 }, { "epoch": 0.210387902695595, "high_lr": 0.0009578947368421053, "low_lr": 1.9157894736842108e-05, "step": 80 }, { "epoch": 0.210387902695595, "high_lr": 0.0009578947368421053, "low_lr": 1.9157894736842108e-05, "step": 80 }, { "epoch": 0.210387902695595, "high_lr": 0.0009578947368421053, "low_lr": 1.9157894736842108e-05, "step": 80 }, { "epoch": 0.210387902695595, "high_lr": 0.0009578947368421053, "low_lr": 1.9157894736842108e-05, "step": 80 }, { "epoch": 0.210387902695595, "high_lr": 0.0009578947368421053, "low_lr": 1.9157894736842108e-05, "step": 80 }, { "epoch": 0.210387902695595, "high_lr": 0.0009578947368421053, "low_lr": 1.9157894736842108e-05, "step": 80 }, { "epoch": 0.210387902695595, "high_lr": 0.0009578947368421053, "low_lr": 1.9157894736842108e-05, "step": 80 }, { "epoch": 0.210387902695595, "high_lr": 0.0009578947368421053, "low_lr": 1.9157894736842108e-05, "step": 80 }, { "epoch": 0.21301775147928995, "grad_norm": 0.5342804193496704, "learning_rate": 0.0009573684210526316, "loss": 1.9288, "step": 81 }, { "epoch": 0.21301775147928995, "high_lr": 0.0009573684210526316, "low_lr": 1.9147368421052632e-05, "step": 81 }, { "epoch": 0.21301775147928995, "high_lr": 0.0009573684210526316, "low_lr": 1.9147368421052632e-05, "step": 81 }, { "epoch": 0.21301775147928995, "high_lr": 0.0009573684210526316, "low_lr": 1.9147368421052632e-05, "step": 81 }, { "epoch": 0.21301775147928995, "high_lr": 0.0009573684210526316, "low_lr": 1.9147368421052632e-05, "step": 81 }, { "epoch": 0.21301775147928995, "high_lr": 0.0009573684210526316, "low_lr": 1.9147368421052632e-05, "step": 81 }, { "epoch": 0.21301775147928995, "high_lr": 0.0009573684210526316, "low_lr": 1.9147368421052632e-05, "step": 81 }, { "epoch": 0.21301775147928995, "high_lr": 0.0009573684210526316, "low_lr": 1.9147368421052632e-05, "step": 81 }, { "epoch": 0.21301775147928995, "high_lr": 0.0009573684210526316, "low_lr": 1.9147368421052632e-05, "step": 81 }, { "epoch": 0.21564760026298488, "grad_norm": 0.636827826499939, "learning_rate": 0.000956842105263158, "loss": 1.9725, "step": 82 }, { "epoch": 0.21564760026298488, "high_lr": 0.000956842105263158, "low_lr": 1.913684210526316e-05, "step": 82 }, { "epoch": 0.21564760026298488, "high_lr": 0.000956842105263158, "low_lr": 1.913684210526316e-05, "step": 82 }, { "epoch": 0.21564760026298488, "high_lr": 0.000956842105263158, "low_lr": 1.913684210526316e-05, "step": 82 }, { "epoch": 0.21564760026298488, "high_lr": 0.000956842105263158, "low_lr": 1.913684210526316e-05, "step": 82 }, { "epoch": 0.21564760026298488, "high_lr": 0.000956842105263158, "low_lr": 1.913684210526316e-05, "step": 82 }, { "epoch": 0.21564760026298488, "high_lr": 0.000956842105263158, "low_lr": 1.913684210526316e-05, "step": 82 }, { "epoch": 0.21564760026298488, "high_lr": 0.000956842105263158, "low_lr": 1.913684210526316e-05, "step": 82 }, { "epoch": 0.21564760026298488, "high_lr": 0.000956842105263158, "low_lr": 1.913684210526316e-05, "step": 82 }, { "epoch": 0.2182774490466798, "grad_norm": 0.6838151216506958, "learning_rate": 0.0009563157894736842, "loss": 2.0263, "step": 83 }, { "epoch": 0.2182774490466798, "high_lr": 0.0009563157894736842, "low_lr": 1.9126315789473685e-05, "step": 83 }, { "epoch": 0.2182774490466798, "high_lr": 0.0009563157894736842, "low_lr": 1.9126315789473685e-05, "step": 83 }, { "epoch": 0.2182774490466798, "high_lr": 0.0009563157894736842, "low_lr": 1.9126315789473685e-05, "step": 83 }, { "epoch": 0.2182774490466798, "high_lr": 0.0009563157894736842, "low_lr": 1.9126315789473685e-05, "step": 83 }, { "epoch": 0.2182774490466798, "high_lr": 0.0009563157894736842, "low_lr": 1.9126315789473685e-05, "step": 83 }, { "epoch": 0.2182774490466798, "high_lr": 0.0009563157894736842, "low_lr": 1.9126315789473685e-05, "step": 83 }, { "epoch": 0.2182774490466798, "high_lr": 0.0009563157894736842, "low_lr": 1.9126315789473685e-05, "step": 83 }, { "epoch": 0.2182774490466798, "high_lr": 0.0009563157894736842, "low_lr": 1.9126315789473685e-05, "step": 83 }, { "epoch": 0.22090729783037474, "grad_norm": 0.5947110056877136, "learning_rate": 0.0009557894736842105, "loss": 1.9359, "step": 84 }, { "epoch": 0.22090729783037474, "high_lr": 0.0009557894736842105, "low_lr": 1.9115789473684213e-05, "step": 84 }, { "epoch": 0.22090729783037474, "high_lr": 0.0009557894736842105, "low_lr": 1.9115789473684213e-05, "step": 84 }, { "epoch": 0.22090729783037474, "high_lr": 0.0009557894736842105, "low_lr": 1.9115789473684213e-05, "step": 84 }, { "epoch": 0.22090729783037474, "high_lr": 0.0009557894736842105, "low_lr": 1.9115789473684213e-05, "step": 84 }, { "epoch": 0.22090729783037474, "high_lr": 0.0009557894736842105, "low_lr": 1.9115789473684213e-05, "step": 84 }, { "epoch": 0.22090729783037474, "high_lr": 0.0009557894736842105, "low_lr": 1.9115789473684213e-05, "step": 84 }, { "epoch": 0.22090729783037474, "high_lr": 0.0009557894736842105, "low_lr": 1.9115789473684213e-05, "step": 84 }, { "epoch": 0.22090729783037474, "high_lr": 0.0009557894736842105, "low_lr": 1.9115789473684213e-05, "step": 84 }, { "epoch": 0.2235371466140697, "grad_norm": 0.6437410116195679, "learning_rate": 0.0009552631578947368, "loss": 1.9238, "step": 85 }, { "epoch": 0.2235371466140697, "high_lr": 0.0009552631578947368, "low_lr": 1.9105263157894738e-05, "step": 85 }, { "epoch": 0.2235371466140697, "high_lr": 0.0009552631578947368, "low_lr": 1.9105263157894738e-05, "step": 85 }, { "epoch": 0.2235371466140697, "high_lr": 0.0009552631578947368, "low_lr": 1.9105263157894738e-05, "step": 85 }, { "epoch": 0.2235371466140697, "high_lr": 0.0009552631578947368, "low_lr": 1.9105263157894738e-05, "step": 85 }, { "epoch": 0.2235371466140697, "high_lr": 0.0009552631578947368, "low_lr": 1.9105263157894738e-05, "step": 85 }, { "epoch": 0.2235371466140697, "high_lr": 0.0009552631578947368, "low_lr": 1.9105263157894738e-05, "step": 85 }, { "epoch": 0.2235371466140697, "high_lr": 0.0009552631578947368, "low_lr": 1.9105263157894738e-05, "step": 85 }, { "epoch": 0.2235371466140697, "high_lr": 0.0009552631578947368, "low_lr": 1.9105263157894738e-05, "step": 85 }, { "epoch": 0.22616699539776464, "grad_norm": 0.6488404273986816, "learning_rate": 0.0009547368421052631, "loss": 1.9571, "step": 86 }, { "epoch": 0.22616699539776464, "high_lr": 0.0009547368421052631, "low_lr": 1.9094736842105262e-05, "step": 86 }, { "epoch": 0.22616699539776464, "high_lr": 0.0009547368421052631, "low_lr": 1.9094736842105262e-05, "step": 86 }, { "epoch": 0.22616699539776464, "high_lr": 0.0009547368421052631, "low_lr": 1.9094736842105262e-05, "step": 86 }, { "epoch": 0.22616699539776464, "high_lr": 0.0009547368421052631, "low_lr": 1.9094736842105262e-05, "step": 86 }, { "epoch": 0.22616699539776464, "high_lr": 0.0009547368421052631, "low_lr": 1.9094736842105262e-05, "step": 86 }, { "epoch": 0.22616699539776464, "high_lr": 0.0009547368421052631, "low_lr": 1.9094736842105262e-05, "step": 86 }, { "epoch": 0.22616699539776464, "high_lr": 0.0009547368421052631, "low_lr": 1.9094736842105262e-05, "step": 86 }, { "epoch": 0.22616699539776464, "high_lr": 0.0009547368421052631, "low_lr": 1.9094736842105262e-05, "step": 86 }, { "epoch": 0.22879684418145957, "grad_norm": 0.6692899465560913, "learning_rate": 0.0009542105263157895, "loss": 2.0267, "step": 87 }, { "epoch": 0.22879684418145957, "high_lr": 0.0009542105263157895, "low_lr": 1.908421052631579e-05, "step": 87 }, { "epoch": 0.22879684418145957, "high_lr": 0.0009542105263157895, "low_lr": 1.908421052631579e-05, "step": 87 }, { "epoch": 0.22879684418145957, "high_lr": 0.0009542105263157895, "low_lr": 1.908421052631579e-05, "step": 87 }, { "epoch": 0.22879684418145957, "high_lr": 0.0009542105263157895, "low_lr": 1.908421052631579e-05, "step": 87 }, { "epoch": 0.22879684418145957, "high_lr": 0.0009542105263157895, "low_lr": 1.908421052631579e-05, "step": 87 }, { "epoch": 0.22879684418145957, "high_lr": 0.0009542105263157895, "low_lr": 1.908421052631579e-05, "step": 87 }, { "epoch": 0.22879684418145957, "high_lr": 0.0009542105263157895, "low_lr": 1.908421052631579e-05, "step": 87 }, { "epoch": 0.22879684418145957, "high_lr": 0.0009542105263157895, "low_lr": 1.908421052631579e-05, "step": 87 }, { "epoch": 0.2314266929651545, "grad_norm": 0.6169629693031311, "learning_rate": 0.0009536842105263158, "loss": 1.9545, "step": 88 }, { "epoch": 0.2314266929651545, "high_lr": 0.0009536842105263158, "low_lr": 1.907368421052632e-05, "step": 88 }, { "epoch": 0.2314266929651545, "high_lr": 0.0009536842105263158, "low_lr": 1.907368421052632e-05, "step": 88 }, { "epoch": 0.2314266929651545, "high_lr": 0.0009536842105263158, "low_lr": 1.907368421052632e-05, "step": 88 }, { "epoch": 0.2314266929651545, "high_lr": 0.0009536842105263158, "low_lr": 1.907368421052632e-05, "step": 88 }, { "epoch": 0.2314266929651545, "high_lr": 0.0009536842105263158, "low_lr": 1.907368421052632e-05, "step": 88 }, { "epoch": 0.2314266929651545, "high_lr": 0.0009536842105263158, "low_lr": 1.907368421052632e-05, "step": 88 }, { "epoch": 0.2314266929651545, "high_lr": 0.0009536842105263158, "low_lr": 1.907368421052632e-05, "step": 88 }, { "epoch": 0.2314266929651545, "high_lr": 0.0009536842105263158, "low_lr": 1.907368421052632e-05, "step": 88 }, { "epoch": 0.23405654174884943, "grad_norm": 0.587587296962738, "learning_rate": 0.0009531578947368421, "loss": 1.8974, "step": 89 }, { "epoch": 0.23405654174884943, "high_lr": 0.0009531578947368421, "low_lr": 1.9063157894736843e-05, "step": 89 }, { "epoch": 0.23405654174884943, "high_lr": 0.0009531578947368421, "low_lr": 1.9063157894736843e-05, "step": 89 }, { "epoch": 0.23405654174884943, "high_lr": 0.0009531578947368421, "low_lr": 1.9063157894736843e-05, "step": 89 }, { "epoch": 0.23405654174884943, "high_lr": 0.0009531578947368421, "low_lr": 1.9063157894736843e-05, "step": 89 }, { "epoch": 0.23405654174884943, "high_lr": 0.0009531578947368421, "low_lr": 1.9063157894736843e-05, "step": 89 }, { "epoch": 0.23405654174884943, "high_lr": 0.0009531578947368421, "low_lr": 1.9063157894736843e-05, "step": 89 }, { "epoch": 0.23405654174884943, "high_lr": 0.0009531578947368421, "low_lr": 1.9063157894736843e-05, "step": 89 }, { "epoch": 0.23405654174884943, "high_lr": 0.0009531578947368421, "low_lr": 1.9063157894736843e-05, "step": 89 }, { "epoch": 0.23668639053254437, "grad_norm": 0.6006247401237488, "learning_rate": 0.0009526315789473684, "loss": 1.9328, "step": 90 }, { "epoch": 0.23668639053254437, "high_lr": 0.0009526315789473684, "low_lr": 1.9052631578947368e-05, "step": 90 }, { "epoch": 0.23668639053254437, "high_lr": 0.0009526315789473684, "low_lr": 1.9052631578947368e-05, "step": 90 }, { "epoch": 0.23668639053254437, "high_lr": 0.0009526315789473684, "low_lr": 1.9052631578947368e-05, "step": 90 }, { "epoch": 0.23668639053254437, "high_lr": 0.0009526315789473684, "low_lr": 1.9052631578947368e-05, "step": 90 }, { "epoch": 0.23668639053254437, "high_lr": 0.0009526315789473684, "low_lr": 1.9052631578947368e-05, "step": 90 }, { "epoch": 0.23668639053254437, "high_lr": 0.0009526315789473684, "low_lr": 1.9052631578947368e-05, "step": 90 }, { "epoch": 0.23668639053254437, "high_lr": 0.0009526315789473684, "low_lr": 1.9052631578947368e-05, "step": 90 }, { "epoch": 0.23668639053254437, "high_lr": 0.0009526315789473684, "low_lr": 1.9052631578947368e-05, "step": 90 }, { "epoch": 0.23931623931623933, "grad_norm": 0.7251639366149902, "learning_rate": 0.0009521052631578949, "loss": 2.0029, "step": 91 }, { "epoch": 0.23931623931623933, "high_lr": 0.0009521052631578949, "low_lr": 1.9042105263157896e-05, "step": 91 }, { "epoch": 0.23931623931623933, "high_lr": 0.0009521052631578949, "low_lr": 1.9042105263157896e-05, "step": 91 }, { "epoch": 0.23931623931623933, "high_lr": 0.0009521052631578949, "low_lr": 1.9042105263157896e-05, "step": 91 }, { "epoch": 0.23931623931623933, "high_lr": 0.0009521052631578949, "low_lr": 1.9042105263157896e-05, "step": 91 }, { "epoch": 0.23931623931623933, "high_lr": 0.0009521052631578949, "low_lr": 1.9042105263157896e-05, "step": 91 }, { "epoch": 0.23931623931623933, "high_lr": 0.0009521052631578949, "low_lr": 1.9042105263157896e-05, "step": 91 }, { "epoch": 0.23931623931623933, "high_lr": 0.0009521052631578949, "low_lr": 1.9042105263157896e-05, "step": 91 }, { "epoch": 0.23931623931623933, "high_lr": 0.0009521052631578949, "low_lr": 1.9042105263157896e-05, "step": 91 }, { "epoch": 0.24194608809993426, "grad_norm": 0.7573750019073486, "learning_rate": 0.0009515789473684211, "loss": 2.0662, "step": 92 }, { "epoch": 0.24194608809993426, "high_lr": 0.0009515789473684211, "low_lr": 1.9031578947368424e-05, "step": 92 }, { "epoch": 0.24194608809993426, "high_lr": 0.0009515789473684211, "low_lr": 1.9031578947368424e-05, "step": 92 }, { "epoch": 0.24194608809993426, "high_lr": 0.0009515789473684211, "low_lr": 1.9031578947368424e-05, "step": 92 }, { "epoch": 0.24194608809993426, "high_lr": 0.0009515789473684211, "low_lr": 1.9031578947368424e-05, "step": 92 }, { "epoch": 0.24194608809993426, "high_lr": 0.0009515789473684211, "low_lr": 1.9031578947368424e-05, "step": 92 }, { "epoch": 0.24194608809993426, "high_lr": 0.0009515789473684211, "low_lr": 1.9031578947368424e-05, "step": 92 }, { "epoch": 0.24194608809993426, "high_lr": 0.0009515789473684211, "low_lr": 1.9031578947368424e-05, "step": 92 }, { "epoch": 0.24194608809993426, "high_lr": 0.0009515789473684211, "low_lr": 1.9031578947368424e-05, "step": 92 }, { "epoch": 0.2445759368836292, "grad_norm": 0.6473020315170288, "learning_rate": 0.0009510526315789474, "loss": 1.9681, "step": 93 }, { "epoch": 0.2445759368836292, "high_lr": 0.0009510526315789474, "low_lr": 1.902105263157895e-05, "step": 93 }, { "epoch": 0.2445759368836292, "high_lr": 0.0009510526315789474, "low_lr": 1.902105263157895e-05, "step": 93 }, { "epoch": 0.2445759368836292, "high_lr": 0.0009510526315789474, "low_lr": 1.902105263157895e-05, "step": 93 }, { "epoch": 0.2445759368836292, "high_lr": 0.0009510526315789474, "low_lr": 1.902105263157895e-05, "step": 93 }, { "epoch": 0.2445759368836292, "high_lr": 0.0009510526315789474, "low_lr": 1.902105263157895e-05, "step": 93 }, { "epoch": 0.2445759368836292, "high_lr": 0.0009510526315789474, "low_lr": 1.902105263157895e-05, "step": 93 }, { "epoch": 0.2445759368836292, "high_lr": 0.0009510526315789474, "low_lr": 1.902105263157895e-05, "step": 93 }, { "epoch": 0.2445759368836292, "high_lr": 0.0009510526315789474, "low_lr": 1.902105263157895e-05, "step": 93 }, { "epoch": 0.24720578566732412, "grad_norm": 0.6018515229225159, "learning_rate": 0.0009505263157894737, "loss": 1.9542, "step": 94 }, { "epoch": 0.24720578566732412, "high_lr": 0.0009505263157894737, "low_lr": 1.9010526315789476e-05, "step": 94 }, { "epoch": 0.24720578566732412, "high_lr": 0.0009505263157894737, "low_lr": 1.9010526315789476e-05, "step": 94 }, { "epoch": 0.24720578566732412, "high_lr": 0.0009505263157894737, "low_lr": 1.9010526315789476e-05, "step": 94 }, { "epoch": 0.24720578566732412, "high_lr": 0.0009505263157894737, "low_lr": 1.9010526315789476e-05, "step": 94 }, { "epoch": 0.24720578566732412, "high_lr": 0.0009505263157894737, "low_lr": 1.9010526315789476e-05, "step": 94 }, { "epoch": 0.24720578566732412, "high_lr": 0.0009505263157894737, "low_lr": 1.9010526315789476e-05, "step": 94 }, { "epoch": 0.24720578566732412, "high_lr": 0.0009505263157894737, "low_lr": 1.9010526315789476e-05, "step": 94 }, { "epoch": 0.24720578566732412, "high_lr": 0.0009505263157894737, "low_lr": 1.9010526315789476e-05, "step": 94 }, { "epoch": 0.24983563445101906, "grad_norm": 0.6318551898002625, "learning_rate": 0.00095, "loss": 1.9501, "step": 95 }, { "epoch": 0.24983563445101906, "high_lr": 0.00095, "low_lr": 1.9e-05, "step": 95 }, { "epoch": 0.24983563445101906, "high_lr": 0.00095, "low_lr": 1.9e-05, "step": 95 }, { "epoch": 0.24983563445101906, "high_lr": 0.00095, "low_lr": 1.9e-05, "step": 95 }, { "epoch": 0.24983563445101906, "high_lr": 0.00095, "low_lr": 1.9e-05, "step": 95 }, { "epoch": 0.24983563445101906, "high_lr": 0.00095, "low_lr": 1.9e-05, "step": 95 }, { "epoch": 0.24983563445101906, "high_lr": 0.00095, "low_lr": 1.9e-05, "step": 95 }, { "epoch": 0.24983563445101906, "high_lr": 0.00095, "low_lr": 1.9e-05, "step": 95 }, { "epoch": 0.24983563445101906, "high_lr": 0.00095, "low_lr": 1.9e-05, "step": 95 }, { "epoch": 0.252465483234714, "grad_norm": 0.6606828570365906, "learning_rate": 0.0009494736842105264, "loss": 1.8956, "step": 96 }, { "epoch": 0.252465483234714, "high_lr": 0.0009494736842105264, "low_lr": 1.898947368421053e-05, "step": 96 }, { "epoch": 0.252465483234714, "high_lr": 0.0009494736842105264, "low_lr": 1.898947368421053e-05, "step": 96 }, { "epoch": 0.252465483234714, "high_lr": 0.0009494736842105264, "low_lr": 1.898947368421053e-05, "step": 96 }, { "epoch": 0.252465483234714, "high_lr": 0.0009494736842105264, "low_lr": 1.898947368421053e-05, "step": 96 }, { "epoch": 0.252465483234714, "high_lr": 0.0009494736842105264, "low_lr": 1.898947368421053e-05, "step": 96 }, { "epoch": 0.252465483234714, "high_lr": 0.0009494736842105264, "low_lr": 1.898947368421053e-05, "step": 96 }, { "epoch": 0.252465483234714, "high_lr": 0.0009494736842105264, "low_lr": 1.898947368421053e-05, "step": 96 }, { "epoch": 0.252465483234714, "high_lr": 0.0009494736842105264, "low_lr": 1.898947368421053e-05, "step": 96 }, { "epoch": 0.25509533201840895, "grad_norm": 0.64029860496521, "learning_rate": 0.0009489473684210527, "loss": 1.9137, "step": 97 }, { "epoch": 0.25509533201840895, "high_lr": 0.0009489473684210527, "low_lr": 1.8978947368421054e-05, "step": 97 }, { "epoch": 0.25509533201840895, "high_lr": 0.0009489473684210527, "low_lr": 1.8978947368421054e-05, "step": 97 }, { "epoch": 0.25509533201840895, "high_lr": 0.0009489473684210527, "low_lr": 1.8978947368421054e-05, "step": 97 }, { "epoch": 0.25509533201840895, "high_lr": 0.0009489473684210527, "low_lr": 1.8978947368421054e-05, "step": 97 }, { "epoch": 0.25509533201840895, "high_lr": 0.0009489473684210527, "low_lr": 1.8978947368421054e-05, "step": 97 }, { "epoch": 0.25509533201840895, "high_lr": 0.0009489473684210527, "low_lr": 1.8978947368421054e-05, "step": 97 }, { "epoch": 0.25509533201840895, "high_lr": 0.0009489473684210527, "low_lr": 1.8978947368421054e-05, "step": 97 }, { "epoch": 0.25509533201840895, "high_lr": 0.0009489473684210527, "low_lr": 1.8978947368421054e-05, "step": 97 }, { "epoch": 0.2577251808021039, "grad_norm": 0.6775450110435486, "learning_rate": 0.000948421052631579, "loss": 1.9968, "step": 98 }, { "epoch": 0.2577251808021039, "high_lr": 0.000948421052631579, "low_lr": 1.8968421052631582e-05, "step": 98 }, { "epoch": 0.2577251808021039, "high_lr": 0.000948421052631579, "low_lr": 1.8968421052631582e-05, "step": 98 }, { "epoch": 0.2577251808021039, "high_lr": 0.000948421052631579, "low_lr": 1.8968421052631582e-05, "step": 98 }, { "epoch": 0.2577251808021039, "high_lr": 0.000948421052631579, "low_lr": 1.8968421052631582e-05, "step": 98 }, { "epoch": 0.2577251808021039, "high_lr": 0.000948421052631579, "low_lr": 1.8968421052631582e-05, "step": 98 }, { "epoch": 0.2577251808021039, "high_lr": 0.000948421052631579, "low_lr": 1.8968421052631582e-05, "step": 98 }, { "epoch": 0.2577251808021039, "high_lr": 0.000948421052631579, "low_lr": 1.8968421052631582e-05, "step": 98 }, { "epoch": 0.2577251808021039, "high_lr": 0.000948421052631579, "low_lr": 1.8968421052631582e-05, "step": 98 }, { "epoch": 0.2603550295857988, "grad_norm": 0.6305446028709412, "learning_rate": 0.0009478947368421053, "loss": 1.8879, "step": 99 }, { "epoch": 0.2603550295857988, "high_lr": 0.0009478947368421053, "low_lr": 1.8957894736842106e-05, "step": 99 }, { "epoch": 0.2603550295857988, "high_lr": 0.0009478947368421053, "low_lr": 1.8957894736842106e-05, "step": 99 }, { "epoch": 0.2603550295857988, "high_lr": 0.0009478947368421053, "low_lr": 1.8957894736842106e-05, "step": 99 }, { "epoch": 0.2603550295857988, "high_lr": 0.0009478947368421053, "low_lr": 1.8957894736842106e-05, "step": 99 }, { "epoch": 0.2603550295857988, "high_lr": 0.0009478947368421053, "low_lr": 1.8957894736842106e-05, "step": 99 }, { "epoch": 0.2603550295857988, "high_lr": 0.0009478947368421053, "low_lr": 1.8957894736842106e-05, "step": 99 }, { "epoch": 0.2603550295857988, "high_lr": 0.0009478947368421053, "low_lr": 1.8957894736842106e-05, "step": 99 }, { "epoch": 0.2603550295857988, "high_lr": 0.0009478947368421053, "low_lr": 1.8957894736842106e-05, "step": 99 }, { "epoch": 0.26298487836949375, "grad_norm": 0.6236740946769714, "learning_rate": 0.0009473684210526315, "loss": 1.9292, "step": 100 }, { "epoch": 0.26298487836949375, "high_lr": 0.0009473684210526315, "low_lr": 1.894736842105263e-05, "step": 100 }, { "epoch": 0.26298487836949375, "high_lr": 0.0009473684210526315, "low_lr": 1.894736842105263e-05, "step": 100 }, { "epoch": 0.26298487836949375, "high_lr": 0.0009473684210526315, "low_lr": 1.894736842105263e-05, "step": 100 }, { "epoch": 0.26298487836949375, "high_lr": 0.0009473684210526315, "low_lr": 1.894736842105263e-05, "step": 100 }, { "epoch": 0.26298487836949375, "high_lr": 0.0009473684210526315, "low_lr": 1.894736842105263e-05, "step": 100 }, { "epoch": 0.26298487836949375, "high_lr": 0.0009473684210526315, "low_lr": 1.894736842105263e-05, "step": 100 }, { "epoch": 0.26298487836949375, "high_lr": 0.0009473684210526315, "low_lr": 1.894736842105263e-05, "step": 100 }, { "epoch": 0.26298487836949375, "high_lr": 0.0009473684210526315, "low_lr": 1.894736842105263e-05, "step": 100 }, { "epoch": 0.2656147271531887, "grad_norm": 0.7075583934783936, "learning_rate": 0.0009468421052631579, "loss": 1.9358, "step": 101 }, { "epoch": 0.2656147271531887, "high_lr": 0.0009468421052631579, "low_lr": 1.893684210526316e-05, "step": 101 }, { "epoch": 0.2656147271531887, "high_lr": 0.0009468421052631579, "low_lr": 1.893684210526316e-05, "step": 101 }, { "epoch": 0.2656147271531887, "high_lr": 0.0009468421052631579, "low_lr": 1.893684210526316e-05, "step": 101 }, { "epoch": 0.2656147271531887, "high_lr": 0.0009468421052631579, "low_lr": 1.893684210526316e-05, "step": 101 }, { "epoch": 0.2656147271531887, "high_lr": 0.0009468421052631579, "low_lr": 1.893684210526316e-05, "step": 101 }, { "epoch": 0.2656147271531887, "high_lr": 0.0009468421052631579, "low_lr": 1.893684210526316e-05, "step": 101 }, { "epoch": 0.2656147271531887, "high_lr": 0.0009468421052631579, "low_lr": 1.893684210526316e-05, "step": 101 }, { "epoch": 0.2656147271531887, "high_lr": 0.0009468421052631579, "low_lr": 1.893684210526316e-05, "step": 101 }, { "epoch": 0.2682445759368836, "grad_norm": 0.6899865865707397, "learning_rate": 0.0009463157894736842, "loss": 1.9407, "step": 102 }, { "epoch": 0.2682445759368836, "high_lr": 0.0009463157894736842, "low_lr": 1.8926315789473687e-05, "step": 102 }, { "epoch": 0.2682445759368836, "high_lr": 0.0009463157894736842, "low_lr": 1.8926315789473687e-05, "step": 102 }, { "epoch": 0.2682445759368836, "high_lr": 0.0009463157894736842, "low_lr": 1.8926315789473687e-05, "step": 102 }, { "epoch": 0.2682445759368836, "high_lr": 0.0009463157894736842, "low_lr": 1.8926315789473687e-05, "step": 102 }, { "epoch": 0.2682445759368836, "high_lr": 0.0009463157894736842, "low_lr": 1.8926315789473687e-05, "step": 102 }, { "epoch": 0.2682445759368836, "high_lr": 0.0009463157894736842, "low_lr": 1.8926315789473687e-05, "step": 102 }, { "epoch": 0.2682445759368836, "high_lr": 0.0009463157894736842, "low_lr": 1.8926315789473687e-05, "step": 102 }, { "epoch": 0.2682445759368836, "high_lr": 0.0009463157894736842, "low_lr": 1.8926315789473687e-05, "step": 102 }, { "epoch": 0.27087442472057854, "grad_norm": 0.6408928632736206, "learning_rate": 0.0009457894736842105, "loss": 1.9906, "step": 103 }, { "epoch": 0.27087442472057854, "high_lr": 0.0009457894736842105, "low_lr": 1.891578947368421e-05, "step": 103 }, { "epoch": 0.27087442472057854, "high_lr": 0.0009457894736842105, "low_lr": 1.891578947368421e-05, "step": 103 }, { "epoch": 0.27087442472057854, "high_lr": 0.0009457894736842105, "low_lr": 1.891578947368421e-05, "step": 103 }, { "epoch": 0.27087442472057854, "high_lr": 0.0009457894736842105, "low_lr": 1.891578947368421e-05, "step": 103 }, { "epoch": 0.27087442472057854, "high_lr": 0.0009457894736842105, "low_lr": 1.891578947368421e-05, "step": 103 }, { "epoch": 0.27087442472057854, "high_lr": 0.0009457894736842105, "low_lr": 1.891578947368421e-05, "step": 103 }, { "epoch": 0.27087442472057854, "high_lr": 0.0009457894736842105, "low_lr": 1.891578947368421e-05, "step": 103 }, { "epoch": 0.27087442472057854, "high_lr": 0.0009457894736842105, "low_lr": 1.891578947368421e-05, "step": 103 }, { "epoch": 0.27350427350427353, "grad_norm": 0.6023521423339844, "learning_rate": 0.0009452631578947368, "loss": 1.8894, "step": 104 }, { "epoch": 0.27350427350427353, "high_lr": 0.0009452631578947368, "low_lr": 1.8905263157894736e-05, "step": 104 }, { "epoch": 0.27350427350427353, "high_lr": 0.0009452631578947368, "low_lr": 1.8905263157894736e-05, "step": 104 }, { "epoch": 0.27350427350427353, "high_lr": 0.0009452631578947368, "low_lr": 1.8905263157894736e-05, "step": 104 }, { "epoch": 0.27350427350427353, "high_lr": 0.0009452631578947368, "low_lr": 1.8905263157894736e-05, "step": 104 }, { "epoch": 0.27350427350427353, "high_lr": 0.0009452631578947368, "low_lr": 1.8905263157894736e-05, "step": 104 }, { "epoch": 0.27350427350427353, "high_lr": 0.0009452631578947368, "low_lr": 1.8905263157894736e-05, "step": 104 }, { "epoch": 0.27350427350427353, "high_lr": 0.0009452631578947368, "low_lr": 1.8905263157894736e-05, "step": 104 }, { "epoch": 0.27350427350427353, "high_lr": 0.0009452631578947368, "low_lr": 1.8905263157894736e-05, "step": 104 }, { "epoch": 0.27613412228796846, "grad_norm": 0.6638351082801819, "learning_rate": 0.0009447368421052632, "loss": 1.927, "step": 105 }, { "epoch": 0.27613412228796846, "high_lr": 0.0009447368421052632, "low_lr": 1.8894736842105264e-05, "step": 105 }, { "epoch": 0.27613412228796846, "high_lr": 0.0009447368421052632, "low_lr": 1.8894736842105264e-05, "step": 105 }, { "epoch": 0.27613412228796846, "high_lr": 0.0009447368421052632, "low_lr": 1.8894736842105264e-05, "step": 105 }, { "epoch": 0.27613412228796846, "high_lr": 0.0009447368421052632, "low_lr": 1.8894736842105264e-05, "step": 105 }, { "epoch": 0.27613412228796846, "high_lr": 0.0009447368421052632, "low_lr": 1.8894736842105264e-05, "step": 105 }, { "epoch": 0.27613412228796846, "high_lr": 0.0009447368421052632, "low_lr": 1.8894736842105264e-05, "step": 105 }, { "epoch": 0.27613412228796846, "high_lr": 0.0009447368421052632, "low_lr": 1.8894736842105264e-05, "step": 105 }, { "epoch": 0.27613412228796846, "high_lr": 0.0009447368421052632, "low_lr": 1.8894736842105264e-05, "step": 105 }, { "epoch": 0.2787639710716634, "grad_norm": 0.6832205653190613, "learning_rate": 0.0009442105263157895, "loss": 1.9289, "step": 106 }, { "epoch": 0.2787639710716634, "high_lr": 0.0009442105263157895, "low_lr": 1.8884210526315792e-05, "step": 106 }, { "epoch": 0.2787639710716634, "high_lr": 0.0009442105263157895, "low_lr": 1.8884210526315792e-05, "step": 106 }, { "epoch": 0.2787639710716634, "high_lr": 0.0009442105263157895, "low_lr": 1.8884210526315792e-05, "step": 106 }, { "epoch": 0.2787639710716634, "high_lr": 0.0009442105263157895, "low_lr": 1.8884210526315792e-05, "step": 106 }, { "epoch": 0.2787639710716634, "high_lr": 0.0009442105263157895, "low_lr": 1.8884210526315792e-05, "step": 106 }, { "epoch": 0.2787639710716634, "high_lr": 0.0009442105263157895, "low_lr": 1.8884210526315792e-05, "step": 106 }, { "epoch": 0.2787639710716634, "high_lr": 0.0009442105263157895, "low_lr": 1.8884210526315792e-05, "step": 106 }, { "epoch": 0.2787639710716634, "high_lr": 0.0009442105263157895, "low_lr": 1.8884210526315792e-05, "step": 106 }, { "epoch": 0.28139381985535833, "grad_norm": 0.6350919604301453, "learning_rate": 0.0009436842105263159, "loss": 1.9271, "step": 107 }, { "epoch": 0.28139381985535833, "high_lr": 0.0009436842105263159, "low_lr": 1.8873684210526317e-05, "step": 107 }, { "epoch": 0.28139381985535833, "high_lr": 0.0009436842105263159, "low_lr": 1.8873684210526317e-05, "step": 107 }, { "epoch": 0.28139381985535833, "high_lr": 0.0009436842105263159, "low_lr": 1.8873684210526317e-05, "step": 107 }, { "epoch": 0.28139381985535833, "high_lr": 0.0009436842105263159, "low_lr": 1.8873684210526317e-05, "step": 107 }, { "epoch": 0.28139381985535833, "high_lr": 0.0009436842105263159, "low_lr": 1.8873684210526317e-05, "step": 107 }, { "epoch": 0.28139381985535833, "high_lr": 0.0009436842105263159, "low_lr": 1.8873684210526317e-05, "step": 107 }, { "epoch": 0.28139381985535833, "high_lr": 0.0009436842105263159, "low_lr": 1.8873684210526317e-05, "step": 107 }, { "epoch": 0.28139381985535833, "high_lr": 0.0009436842105263159, "low_lr": 1.8873684210526317e-05, "step": 107 }, { "epoch": 0.28402366863905326, "grad_norm": 0.7083135843276978, "learning_rate": 0.0009431578947368421, "loss": 1.9643, "step": 108 }, { "epoch": 0.28402366863905326, "high_lr": 0.0009431578947368421, "low_lr": 1.886315789473684e-05, "step": 108 }, { "epoch": 0.28402366863905326, "high_lr": 0.0009431578947368421, "low_lr": 1.886315789473684e-05, "step": 108 }, { "epoch": 0.28402366863905326, "high_lr": 0.0009431578947368421, "low_lr": 1.886315789473684e-05, "step": 108 }, { "epoch": 0.28402366863905326, "high_lr": 0.0009431578947368421, "low_lr": 1.886315789473684e-05, "step": 108 }, { "epoch": 0.28402366863905326, "high_lr": 0.0009431578947368421, "low_lr": 1.886315789473684e-05, "step": 108 }, { "epoch": 0.28402366863905326, "high_lr": 0.0009431578947368421, "low_lr": 1.886315789473684e-05, "step": 108 }, { "epoch": 0.28402366863905326, "high_lr": 0.0009431578947368421, "low_lr": 1.886315789473684e-05, "step": 108 }, { "epoch": 0.28402366863905326, "high_lr": 0.0009431578947368421, "low_lr": 1.886315789473684e-05, "step": 108 }, { "epoch": 0.2866535174227482, "grad_norm": 0.7548526525497437, "learning_rate": 0.0009426315789473684, "loss": 1.9212, "step": 109 }, { "epoch": 0.2866535174227482, "high_lr": 0.0009426315789473684, "low_lr": 1.885263157894737e-05, "step": 109 }, { "epoch": 0.2866535174227482, "high_lr": 0.0009426315789473684, "low_lr": 1.885263157894737e-05, "step": 109 }, { "epoch": 0.2866535174227482, "high_lr": 0.0009426315789473684, "low_lr": 1.885263157894737e-05, "step": 109 }, { "epoch": 0.2866535174227482, "high_lr": 0.0009426315789473684, "low_lr": 1.885263157894737e-05, "step": 109 }, { "epoch": 0.2866535174227482, "high_lr": 0.0009426315789473684, "low_lr": 1.885263157894737e-05, "step": 109 }, { "epoch": 0.2866535174227482, "high_lr": 0.0009426315789473684, "low_lr": 1.885263157894737e-05, "step": 109 }, { "epoch": 0.2866535174227482, "high_lr": 0.0009426315789473684, "low_lr": 1.885263157894737e-05, "step": 109 }, { "epoch": 0.2866535174227482, "high_lr": 0.0009426315789473684, "low_lr": 1.885263157894737e-05, "step": 109 }, { "epoch": 0.2892833662064431, "grad_norm": 0.6176226139068604, "learning_rate": 0.0009421052631578948, "loss": 1.8167, "step": 110 }, { "epoch": 0.2892833662064431, "high_lr": 0.0009421052631578948, "low_lr": 1.8842105263157898e-05, "step": 110 }, { "epoch": 0.2892833662064431, "high_lr": 0.0009421052631578948, "low_lr": 1.8842105263157898e-05, "step": 110 }, { "epoch": 0.2892833662064431, "high_lr": 0.0009421052631578948, "low_lr": 1.8842105263157898e-05, "step": 110 }, { "epoch": 0.2892833662064431, "high_lr": 0.0009421052631578948, "low_lr": 1.8842105263157898e-05, "step": 110 }, { "epoch": 0.2892833662064431, "high_lr": 0.0009421052631578948, "low_lr": 1.8842105263157898e-05, "step": 110 }, { "epoch": 0.2892833662064431, "high_lr": 0.0009421052631578948, "low_lr": 1.8842105263157898e-05, "step": 110 }, { "epoch": 0.2892833662064431, "high_lr": 0.0009421052631578948, "low_lr": 1.8842105263157898e-05, "step": 110 }, { "epoch": 0.2892833662064431, "high_lr": 0.0009421052631578948, "low_lr": 1.8842105263157898e-05, "step": 110 }, { "epoch": 0.29191321499013806, "grad_norm": 0.6795015931129456, "learning_rate": 0.0009415789473684211, "loss": 1.9002, "step": 111 }, { "epoch": 0.29191321499013806, "high_lr": 0.0009415789473684211, "low_lr": 1.8831578947368422e-05, "step": 111 }, { "epoch": 0.29191321499013806, "high_lr": 0.0009415789473684211, "low_lr": 1.8831578947368422e-05, "step": 111 }, { "epoch": 0.29191321499013806, "high_lr": 0.0009415789473684211, "low_lr": 1.8831578947368422e-05, "step": 111 }, { "epoch": 0.29191321499013806, "high_lr": 0.0009415789473684211, "low_lr": 1.8831578947368422e-05, "step": 111 }, { "epoch": 0.29191321499013806, "high_lr": 0.0009415789473684211, "low_lr": 1.8831578947368422e-05, "step": 111 }, { "epoch": 0.29191321499013806, "high_lr": 0.0009415789473684211, "low_lr": 1.8831578947368422e-05, "step": 111 }, { "epoch": 0.29191321499013806, "high_lr": 0.0009415789473684211, "low_lr": 1.8831578947368422e-05, "step": 111 }, { "epoch": 0.29191321499013806, "high_lr": 0.0009415789473684211, "low_lr": 1.8831578947368422e-05, "step": 111 }, { "epoch": 0.294543063773833, "grad_norm": 0.7737109065055847, "learning_rate": 0.0009410526315789474, "loss": 2.0236, "step": 112 }, { "epoch": 0.294543063773833, "high_lr": 0.0009410526315789474, "low_lr": 1.882105263157895e-05, "step": 112 }, { "epoch": 0.294543063773833, "high_lr": 0.0009410526315789474, "low_lr": 1.882105263157895e-05, "step": 112 }, { "epoch": 0.294543063773833, "high_lr": 0.0009410526315789474, "low_lr": 1.882105263157895e-05, "step": 112 }, { "epoch": 0.294543063773833, "high_lr": 0.0009410526315789474, "low_lr": 1.882105263157895e-05, "step": 112 }, { "epoch": 0.294543063773833, "high_lr": 0.0009410526315789474, "low_lr": 1.882105263157895e-05, "step": 112 }, { "epoch": 0.294543063773833, "high_lr": 0.0009410526315789474, "low_lr": 1.882105263157895e-05, "step": 112 }, { "epoch": 0.294543063773833, "high_lr": 0.0009410526315789474, "low_lr": 1.882105263157895e-05, "step": 112 }, { "epoch": 0.294543063773833, "high_lr": 0.0009410526315789474, "low_lr": 1.882105263157895e-05, "step": 112 }, { "epoch": 0.2971729125575279, "grad_norm": 0.6610509753227234, "learning_rate": 0.0009405263157894737, "loss": 1.9563, "step": 113 }, { "epoch": 0.2971729125575279, "high_lr": 0.0009405263157894737, "low_lr": 1.8810526315789475e-05, "step": 113 }, { "epoch": 0.2971729125575279, "high_lr": 0.0009405263157894737, "low_lr": 1.8810526315789475e-05, "step": 113 }, { "epoch": 0.2971729125575279, "high_lr": 0.0009405263157894737, "low_lr": 1.8810526315789475e-05, "step": 113 }, { "epoch": 0.2971729125575279, "high_lr": 0.0009405263157894737, "low_lr": 1.8810526315789475e-05, "step": 113 }, { "epoch": 0.2971729125575279, "high_lr": 0.0009405263157894737, "low_lr": 1.8810526315789475e-05, "step": 113 }, { "epoch": 0.2971729125575279, "high_lr": 0.0009405263157894737, "low_lr": 1.8810526315789475e-05, "step": 113 }, { "epoch": 0.2971729125575279, "high_lr": 0.0009405263157894737, "low_lr": 1.8810526315789475e-05, "step": 113 }, { "epoch": 0.2971729125575279, "high_lr": 0.0009405263157894737, "low_lr": 1.8810526315789475e-05, "step": 113 }, { "epoch": 0.29980276134122286, "grad_norm": 0.9263393878936768, "learning_rate": 0.00094, "loss": 1.8574, "step": 114 }, { "epoch": 0.29980276134122286, "high_lr": 0.00094, "low_lr": 1.88e-05, "step": 114 }, { "epoch": 0.29980276134122286, "high_lr": 0.00094, "low_lr": 1.88e-05, "step": 114 }, { "epoch": 0.29980276134122286, "high_lr": 0.00094, "low_lr": 1.88e-05, "step": 114 }, { "epoch": 0.29980276134122286, "high_lr": 0.00094, "low_lr": 1.88e-05, "step": 114 }, { "epoch": 0.29980276134122286, "high_lr": 0.00094, "low_lr": 1.88e-05, "step": 114 }, { "epoch": 0.29980276134122286, "high_lr": 0.00094, "low_lr": 1.88e-05, "step": 114 }, { "epoch": 0.29980276134122286, "high_lr": 0.00094, "low_lr": 1.88e-05, "step": 114 }, { "epoch": 0.29980276134122286, "high_lr": 0.00094, "low_lr": 1.88e-05, "step": 114 }, { "epoch": 0.30243261012491784, "grad_norm": 0.7165038585662842, "learning_rate": 0.0009394736842105264, "loss": 1.9537, "step": 115 }, { "epoch": 0.30243261012491784, "high_lr": 0.0009394736842105264, "low_lr": 1.8789473684210528e-05, "step": 115 }, { "epoch": 0.30243261012491784, "high_lr": 0.0009394736842105264, "low_lr": 1.8789473684210528e-05, "step": 115 }, { "epoch": 0.30243261012491784, "high_lr": 0.0009394736842105264, "low_lr": 1.8789473684210528e-05, "step": 115 }, { "epoch": 0.30243261012491784, "high_lr": 0.0009394736842105264, "low_lr": 1.8789473684210528e-05, "step": 115 }, { "epoch": 0.30243261012491784, "high_lr": 0.0009394736842105264, "low_lr": 1.8789473684210528e-05, "step": 115 }, { "epoch": 0.30243261012491784, "high_lr": 0.0009394736842105264, "low_lr": 1.8789473684210528e-05, "step": 115 }, { "epoch": 0.30243261012491784, "high_lr": 0.0009394736842105264, "low_lr": 1.8789473684210528e-05, "step": 115 }, { "epoch": 0.30243261012491784, "high_lr": 0.0009394736842105264, "low_lr": 1.8789473684210528e-05, "step": 115 }, { "epoch": 0.3050624589086128, "grad_norm": 0.7112441658973694, "learning_rate": 0.0009389473684210527, "loss": 1.9437, "step": 116 }, { "epoch": 0.3050624589086128, "high_lr": 0.0009389473684210527, "low_lr": 1.8778947368421056e-05, "step": 116 }, { "epoch": 0.3050624589086128, "high_lr": 0.0009389473684210527, "low_lr": 1.8778947368421056e-05, "step": 116 }, { "epoch": 0.3050624589086128, "high_lr": 0.0009389473684210527, "low_lr": 1.8778947368421056e-05, "step": 116 }, { "epoch": 0.3050624589086128, "high_lr": 0.0009389473684210527, "low_lr": 1.8778947368421056e-05, "step": 116 }, { "epoch": 0.3050624589086128, "high_lr": 0.0009389473684210527, "low_lr": 1.8778947368421056e-05, "step": 116 }, { "epoch": 0.3050624589086128, "high_lr": 0.0009389473684210527, "low_lr": 1.8778947368421056e-05, "step": 116 }, { "epoch": 0.3050624589086128, "high_lr": 0.0009389473684210527, "low_lr": 1.8778947368421056e-05, "step": 116 }, { "epoch": 0.3050624589086128, "high_lr": 0.0009389473684210527, "low_lr": 1.8778947368421056e-05, "step": 116 }, { "epoch": 0.3076923076923077, "grad_norm": 0.6522067785263062, "learning_rate": 0.0009384210526315789, "loss": 1.905, "step": 117 }, { "epoch": 0.3076923076923077, "high_lr": 0.0009384210526315789, "low_lr": 1.876842105263158e-05, "step": 117 }, { "epoch": 0.3076923076923077, "high_lr": 0.0009384210526315789, "low_lr": 1.876842105263158e-05, "step": 117 }, { "epoch": 0.3076923076923077, "high_lr": 0.0009384210526315789, "low_lr": 1.876842105263158e-05, "step": 117 }, { "epoch": 0.3076923076923077, "high_lr": 0.0009384210526315789, "low_lr": 1.876842105263158e-05, "step": 117 }, { "epoch": 0.3076923076923077, "high_lr": 0.0009384210526315789, "low_lr": 1.876842105263158e-05, "step": 117 }, { "epoch": 0.3076923076923077, "high_lr": 0.0009384210526315789, "low_lr": 1.876842105263158e-05, "step": 117 }, { "epoch": 0.3076923076923077, "high_lr": 0.0009384210526315789, "low_lr": 1.876842105263158e-05, "step": 117 }, { "epoch": 0.3076923076923077, "high_lr": 0.0009384210526315789, "low_lr": 1.876842105263158e-05, "step": 117 }, { "epoch": 0.31032215647600264, "grad_norm": 0.6830266714096069, "learning_rate": 0.0009378947368421052, "loss": 1.9671, "step": 118 }, { "epoch": 0.31032215647600264, "high_lr": 0.0009378947368421052, "low_lr": 1.8757894736842105e-05, "step": 118 }, { "epoch": 0.31032215647600264, "high_lr": 0.0009378947368421052, "low_lr": 1.8757894736842105e-05, "step": 118 }, { "epoch": 0.31032215647600264, "high_lr": 0.0009378947368421052, "low_lr": 1.8757894736842105e-05, "step": 118 }, { "epoch": 0.31032215647600264, "high_lr": 0.0009378947368421052, "low_lr": 1.8757894736842105e-05, "step": 118 }, { "epoch": 0.31032215647600264, "high_lr": 0.0009378947368421052, "low_lr": 1.8757894736842105e-05, "step": 118 }, { "epoch": 0.31032215647600264, "high_lr": 0.0009378947368421052, "low_lr": 1.8757894736842105e-05, "step": 118 }, { "epoch": 0.31032215647600264, "high_lr": 0.0009378947368421052, "low_lr": 1.8757894736842105e-05, "step": 118 }, { "epoch": 0.31032215647600264, "high_lr": 0.0009378947368421052, "low_lr": 1.8757894736842105e-05, "step": 118 }, { "epoch": 0.3129520052596976, "grad_norm": 1.746518611907959, "learning_rate": 0.0009373684210526316, "loss": 1.9321, "step": 119 }, { "epoch": 0.3129520052596976, "high_lr": 0.0009373684210526316, "low_lr": 1.8747368421052633e-05, "step": 119 }, { "epoch": 0.3129520052596976, "high_lr": 0.0009373684210526316, "low_lr": 1.8747368421052633e-05, "step": 119 }, { "epoch": 0.3129520052596976, "high_lr": 0.0009373684210526316, "low_lr": 1.8747368421052633e-05, "step": 119 }, { "epoch": 0.3129520052596976, "high_lr": 0.0009373684210526316, "low_lr": 1.8747368421052633e-05, "step": 119 }, { "epoch": 0.3129520052596976, "high_lr": 0.0009373684210526316, "low_lr": 1.8747368421052633e-05, "step": 119 }, { "epoch": 0.3129520052596976, "high_lr": 0.0009373684210526316, "low_lr": 1.8747368421052633e-05, "step": 119 }, { "epoch": 0.3129520052596976, "high_lr": 0.0009373684210526316, "low_lr": 1.8747368421052633e-05, "step": 119 }, { "epoch": 0.3129520052596976, "high_lr": 0.0009373684210526316, "low_lr": 1.8747368421052633e-05, "step": 119 }, { "epoch": 0.3155818540433925, "grad_norm": 0.6991109848022461, "learning_rate": 0.0009368421052631579, "loss": 1.9557, "step": 120 }, { "epoch": 0.3155818540433925, "high_lr": 0.0009368421052631579, "low_lr": 1.873684210526316e-05, "step": 120 }, { "epoch": 0.3155818540433925, "high_lr": 0.0009368421052631579, "low_lr": 1.873684210526316e-05, "step": 120 }, { "epoch": 0.3155818540433925, "high_lr": 0.0009368421052631579, "low_lr": 1.873684210526316e-05, "step": 120 }, { "epoch": 0.3155818540433925, "high_lr": 0.0009368421052631579, "low_lr": 1.873684210526316e-05, "step": 120 }, { "epoch": 0.3155818540433925, "high_lr": 0.0009368421052631579, "low_lr": 1.873684210526316e-05, "step": 120 }, { "epoch": 0.3155818540433925, "high_lr": 0.0009368421052631579, "low_lr": 1.873684210526316e-05, "step": 120 }, { "epoch": 0.3155818540433925, "high_lr": 0.0009368421052631579, "low_lr": 1.873684210526316e-05, "step": 120 }, { "epoch": 0.3155818540433925, "high_lr": 0.0009368421052631579, "low_lr": 1.873684210526316e-05, "step": 120 }, { "epoch": 0.31821170282708744, "grad_norm": 0.6823937892913818, "learning_rate": 0.0009363157894736842, "loss": 1.9575, "step": 121 }, { "epoch": 0.31821170282708744, "high_lr": 0.0009363157894736842, "low_lr": 1.8726315789473686e-05, "step": 121 }, { "epoch": 0.31821170282708744, "high_lr": 0.0009363157894736842, "low_lr": 1.8726315789473686e-05, "step": 121 }, { "epoch": 0.31821170282708744, "high_lr": 0.0009363157894736842, "low_lr": 1.8726315789473686e-05, "step": 121 }, { "epoch": 0.31821170282708744, "high_lr": 0.0009363157894736842, "low_lr": 1.8726315789473686e-05, "step": 121 }, { "epoch": 0.31821170282708744, "high_lr": 0.0009363157894736842, "low_lr": 1.8726315789473686e-05, "step": 121 }, { "epoch": 0.31821170282708744, "high_lr": 0.0009363157894736842, "low_lr": 1.8726315789473686e-05, "step": 121 }, { "epoch": 0.31821170282708744, "high_lr": 0.0009363157894736842, "low_lr": 1.8726315789473686e-05, "step": 121 }, { "epoch": 0.31821170282708744, "high_lr": 0.0009363157894736842, "low_lr": 1.8726315789473686e-05, "step": 121 }, { "epoch": 0.32084155161078237, "grad_norm": 0.901719331741333, "learning_rate": 0.0009357894736842105, "loss": 1.9167, "step": 122 }, { "epoch": 0.32084155161078237, "high_lr": 0.0009357894736842105, "low_lr": 1.871578947368421e-05, "step": 122 }, { "epoch": 0.32084155161078237, "high_lr": 0.0009357894736842105, "low_lr": 1.871578947368421e-05, "step": 122 }, { "epoch": 0.32084155161078237, "high_lr": 0.0009357894736842105, "low_lr": 1.871578947368421e-05, "step": 122 }, { "epoch": 0.32084155161078237, "high_lr": 0.0009357894736842105, "low_lr": 1.871578947368421e-05, "step": 122 }, { "epoch": 0.32084155161078237, "high_lr": 0.0009357894736842105, "low_lr": 1.871578947368421e-05, "step": 122 }, { "epoch": 0.32084155161078237, "high_lr": 0.0009357894736842105, "low_lr": 1.871578947368421e-05, "step": 122 }, { "epoch": 0.32084155161078237, "high_lr": 0.0009357894736842105, "low_lr": 1.871578947368421e-05, "step": 122 }, { "epoch": 0.32084155161078237, "high_lr": 0.0009357894736842105, "low_lr": 1.871578947368421e-05, "step": 122 }, { "epoch": 0.3234714003944773, "grad_norm": 0.6262206435203552, "learning_rate": 0.0009352631578947368, "loss": 1.9214, "step": 123 }, { "epoch": 0.3234714003944773, "high_lr": 0.0009352631578947368, "low_lr": 1.8705263157894738e-05, "step": 123 }, { "epoch": 0.3234714003944773, "high_lr": 0.0009352631578947368, "low_lr": 1.8705263157894738e-05, "step": 123 }, { "epoch": 0.3234714003944773, "high_lr": 0.0009352631578947368, "low_lr": 1.8705263157894738e-05, "step": 123 }, { "epoch": 0.3234714003944773, "high_lr": 0.0009352631578947368, "low_lr": 1.8705263157894738e-05, "step": 123 }, { "epoch": 0.3234714003944773, "high_lr": 0.0009352631578947368, "low_lr": 1.8705263157894738e-05, "step": 123 }, { "epoch": 0.3234714003944773, "high_lr": 0.0009352631578947368, "low_lr": 1.8705263157894738e-05, "step": 123 }, { "epoch": 0.3234714003944773, "high_lr": 0.0009352631578947368, "low_lr": 1.8705263157894738e-05, "step": 123 }, { "epoch": 0.3234714003944773, "high_lr": 0.0009352631578947368, "low_lr": 1.8705263157894738e-05, "step": 123 }, { "epoch": 0.32610124917817224, "grad_norm": 0.800382673740387, "learning_rate": 0.0009347368421052633, "loss": 1.9136, "step": 124 }, { "epoch": 0.32610124917817224, "high_lr": 0.0009347368421052633, "low_lr": 1.8694736842105266e-05, "step": 124 }, { "epoch": 0.32610124917817224, "high_lr": 0.0009347368421052633, "low_lr": 1.8694736842105266e-05, "step": 124 }, { "epoch": 0.32610124917817224, "high_lr": 0.0009347368421052633, "low_lr": 1.8694736842105266e-05, "step": 124 }, { "epoch": 0.32610124917817224, "high_lr": 0.0009347368421052633, "low_lr": 1.8694736842105266e-05, "step": 124 }, { "epoch": 0.32610124917817224, "high_lr": 0.0009347368421052633, "low_lr": 1.8694736842105266e-05, "step": 124 }, { "epoch": 0.32610124917817224, "high_lr": 0.0009347368421052633, "low_lr": 1.8694736842105266e-05, "step": 124 }, { "epoch": 0.32610124917817224, "high_lr": 0.0009347368421052633, "low_lr": 1.8694736842105266e-05, "step": 124 }, { "epoch": 0.32610124917817224, "high_lr": 0.0009347368421052633, "low_lr": 1.8694736842105266e-05, "step": 124 }, { "epoch": 0.32873109796186717, "grad_norm": 0.6870277523994446, "learning_rate": 0.0009342105263157896, "loss": 1.8989, "step": 125 }, { "epoch": 0.32873109796186717, "high_lr": 0.0009342105263157896, "low_lr": 1.868421052631579e-05, "step": 125 }, { "epoch": 0.32873109796186717, "high_lr": 0.0009342105263157896, "low_lr": 1.868421052631579e-05, "step": 125 }, { "epoch": 0.32873109796186717, "high_lr": 0.0009342105263157896, "low_lr": 1.868421052631579e-05, "step": 125 }, { "epoch": 0.32873109796186717, "high_lr": 0.0009342105263157896, "low_lr": 1.868421052631579e-05, "step": 125 }, { "epoch": 0.32873109796186717, "high_lr": 0.0009342105263157896, "low_lr": 1.868421052631579e-05, "step": 125 }, { "epoch": 0.32873109796186717, "high_lr": 0.0009342105263157896, "low_lr": 1.868421052631579e-05, "step": 125 }, { "epoch": 0.32873109796186717, "high_lr": 0.0009342105263157896, "low_lr": 1.868421052631579e-05, "step": 125 }, { "epoch": 0.32873109796186717, "high_lr": 0.0009342105263157896, "low_lr": 1.868421052631579e-05, "step": 125 }, { "epoch": 0.33136094674556216, "grad_norm": 0.7083466053009033, "learning_rate": 0.0009336842105263158, "loss": 1.8249, "step": 126 }, { "epoch": 0.33136094674556216, "high_lr": 0.0009336842105263158, "low_lr": 1.8673684210526316e-05, "step": 126 }, { "epoch": 0.33136094674556216, "high_lr": 0.0009336842105263158, "low_lr": 1.8673684210526316e-05, "step": 126 }, { "epoch": 0.33136094674556216, "high_lr": 0.0009336842105263158, "low_lr": 1.8673684210526316e-05, "step": 126 }, { "epoch": 0.33136094674556216, "high_lr": 0.0009336842105263158, "low_lr": 1.8673684210526316e-05, "step": 126 }, { "epoch": 0.33136094674556216, "high_lr": 0.0009336842105263158, "low_lr": 1.8673684210526316e-05, "step": 126 }, { "epoch": 0.33136094674556216, "high_lr": 0.0009336842105263158, "low_lr": 1.8673684210526316e-05, "step": 126 }, { "epoch": 0.33136094674556216, "high_lr": 0.0009336842105263158, "low_lr": 1.8673684210526316e-05, "step": 126 }, { "epoch": 0.33136094674556216, "high_lr": 0.0009336842105263158, "low_lr": 1.8673684210526316e-05, "step": 126 }, { "epoch": 0.3339907955292571, "grad_norm": 0.6873717904090881, "learning_rate": 0.0009331578947368421, "loss": 1.9402, "step": 127 }, { "epoch": 0.3339907955292571, "high_lr": 0.0009331578947368421, "low_lr": 1.8663157894736844e-05, "step": 127 }, { "epoch": 0.3339907955292571, "high_lr": 0.0009331578947368421, "low_lr": 1.8663157894736844e-05, "step": 127 }, { "epoch": 0.3339907955292571, "high_lr": 0.0009331578947368421, "low_lr": 1.8663157894736844e-05, "step": 127 }, { "epoch": 0.3339907955292571, "high_lr": 0.0009331578947368421, "low_lr": 1.8663157894736844e-05, "step": 127 }, { "epoch": 0.3339907955292571, "high_lr": 0.0009331578947368421, "low_lr": 1.8663157894736844e-05, "step": 127 }, { "epoch": 0.3339907955292571, "high_lr": 0.0009331578947368421, "low_lr": 1.8663157894736844e-05, "step": 127 }, { "epoch": 0.3339907955292571, "high_lr": 0.0009331578947368421, "low_lr": 1.8663157894736844e-05, "step": 127 }, { "epoch": 0.3339907955292571, "high_lr": 0.0009331578947368421, "low_lr": 1.8663157894736844e-05, "step": 127 }, { "epoch": 0.336620644312952, "grad_norm": 0.6439316272735596, "learning_rate": 0.0009326315789473684, "loss": 1.8117, "step": 128 }, { "epoch": 0.336620644312952, "high_lr": 0.0009326315789473684, "low_lr": 1.8652631578947368e-05, "step": 128 }, { "epoch": 0.336620644312952, "high_lr": 0.0009326315789473684, "low_lr": 1.8652631578947368e-05, "step": 128 }, { "epoch": 0.336620644312952, "high_lr": 0.0009326315789473684, "low_lr": 1.8652631578947368e-05, "step": 128 }, { "epoch": 0.336620644312952, "high_lr": 0.0009326315789473684, "low_lr": 1.8652631578947368e-05, "step": 128 }, { "epoch": 0.336620644312952, "high_lr": 0.0009326315789473684, "low_lr": 1.8652631578947368e-05, "step": 128 }, { "epoch": 0.336620644312952, "high_lr": 0.0009326315789473684, "low_lr": 1.8652631578947368e-05, "step": 128 }, { "epoch": 0.336620644312952, "high_lr": 0.0009326315789473684, "low_lr": 1.8652631578947368e-05, "step": 128 }, { "epoch": 0.336620644312952, "high_lr": 0.0009326315789473684, "low_lr": 1.8652631578947368e-05, "step": 128 }, { "epoch": 0.33925049309664695, "grad_norm": 0.6715201139450073, "learning_rate": 0.0009321052631578948, "loss": 1.9089, "step": 129 }, { "epoch": 0.33925049309664695, "high_lr": 0.0009321052631578948, "low_lr": 1.8642105263157896e-05, "step": 129 }, { "epoch": 0.33925049309664695, "high_lr": 0.0009321052631578948, "low_lr": 1.8642105263157896e-05, "step": 129 }, { "epoch": 0.33925049309664695, "high_lr": 0.0009321052631578948, "low_lr": 1.8642105263157896e-05, "step": 129 }, { "epoch": 0.33925049309664695, "high_lr": 0.0009321052631578948, "low_lr": 1.8642105263157896e-05, "step": 129 }, { "epoch": 0.33925049309664695, "high_lr": 0.0009321052631578948, "low_lr": 1.8642105263157896e-05, "step": 129 }, { "epoch": 0.33925049309664695, "high_lr": 0.0009321052631578948, "low_lr": 1.8642105263157896e-05, "step": 129 }, { "epoch": 0.33925049309664695, "high_lr": 0.0009321052631578948, "low_lr": 1.8642105263157896e-05, "step": 129 }, { "epoch": 0.33925049309664695, "high_lr": 0.0009321052631578948, "low_lr": 1.8642105263157896e-05, "step": 129 }, { "epoch": 0.3418803418803419, "grad_norm": 0.7621733546257019, "learning_rate": 0.0009315789473684211, "loss": 1.8731, "step": 130 }, { "epoch": 0.3418803418803419, "high_lr": 0.0009315789473684211, "low_lr": 1.8631578947368424e-05, "step": 130 }, { "epoch": 0.3418803418803419, "high_lr": 0.0009315789473684211, "low_lr": 1.8631578947368424e-05, "step": 130 }, { "epoch": 0.3418803418803419, "high_lr": 0.0009315789473684211, "low_lr": 1.8631578947368424e-05, "step": 130 }, { "epoch": 0.3418803418803419, "high_lr": 0.0009315789473684211, "low_lr": 1.8631578947368424e-05, "step": 130 }, { "epoch": 0.3418803418803419, "high_lr": 0.0009315789473684211, "low_lr": 1.8631578947368424e-05, "step": 130 }, { "epoch": 0.3418803418803419, "high_lr": 0.0009315789473684211, "low_lr": 1.8631578947368424e-05, "step": 130 }, { "epoch": 0.3418803418803419, "high_lr": 0.0009315789473684211, "low_lr": 1.8631578947368424e-05, "step": 130 }, { "epoch": 0.3418803418803419, "high_lr": 0.0009315789473684211, "low_lr": 1.8631578947368424e-05, "step": 130 }, { "epoch": 0.3445101906640368, "grad_norm": 0.817223072052002, "learning_rate": 0.0009310526315789474, "loss": 1.8612, "step": 131 }, { "epoch": 0.3445101906640368, "high_lr": 0.0009310526315789474, "low_lr": 1.862105263157895e-05, "step": 131 }, { "epoch": 0.3445101906640368, "high_lr": 0.0009310526315789474, "low_lr": 1.862105263157895e-05, "step": 131 }, { "epoch": 0.3445101906640368, "high_lr": 0.0009310526315789474, "low_lr": 1.862105263157895e-05, "step": 131 }, { "epoch": 0.3445101906640368, "high_lr": 0.0009310526315789474, "low_lr": 1.862105263157895e-05, "step": 131 }, { "epoch": 0.3445101906640368, "high_lr": 0.0009310526315789474, "low_lr": 1.862105263157895e-05, "step": 131 }, { "epoch": 0.3445101906640368, "high_lr": 0.0009310526315789474, "low_lr": 1.862105263157895e-05, "step": 131 }, { "epoch": 0.3445101906640368, "high_lr": 0.0009310526315789474, "low_lr": 1.862105263157895e-05, "step": 131 }, { "epoch": 0.3445101906640368, "high_lr": 0.0009310526315789474, "low_lr": 1.862105263157895e-05, "step": 131 }, { "epoch": 0.34714003944773175, "grad_norm": 0.8939811587333679, "learning_rate": 0.0009305263157894737, "loss": 1.882, "step": 132 }, { "epoch": 0.34714003944773175, "high_lr": 0.0009305263157894737, "low_lr": 1.8610526315789473e-05, "step": 132 }, { "epoch": 0.34714003944773175, "high_lr": 0.0009305263157894737, "low_lr": 1.8610526315789473e-05, "step": 132 }, { "epoch": 0.34714003944773175, "high_lr": 0.0009305263157894737, "low_lr": 1.8610526315789473e-05, "step": 132 }, { "epoch": 0.34714003944773175, "high_lr": 0.0009305263157894737, "low_lr": 1.8610526315789473e-05, "step": 132 }, { "epoch": 0.34714003944773175, "high_lr": 0.0009305263157894737, "low_lr": 1.8610526315789473e-05, "step": 132 }, { "epoch": 0.34714003944773175, "high_lr": 0.0009305263157894737, "low_lr": 1.8610526315789473e-05, "step": 132 }, { "epoch": 0.34714003944773175, "high_lr": 0.0009305263157894737, "low_lr": 1.8610526315789473e-05, "step": 132 }, { "epoch": 0.34714003944773175, "high_lr": 0.0009305263157894737, "low_lr": 1.8610526315789473e-05, "step": 132 }, { "epoch": 0.3497698882314267, "grad_norm": 0.6691973209381104, "learning_rate": 0.00093, "loss": 1.8966, "step": 133 }, { "epoch": 0.3497698882314267, "high_lr": 0.00093, "low_lr": 1.86e-05, "step": 133 }, { "epoch": 0.3497698882314267, "high_lr": 0.00093, "low_lr": 1.86e-05, "step": 133 }, { "epoch": 0.3497698882314267, "high_lr": 0.00093, "low_lr": 1.86e-05, "step": 133 }, { "epoch": 0.3497698882314267, "high_lr": 0.00093, "low_lr": 1.86e-05, "step": 133 }, { "epoch": 0.3497698882314267, "high_lr": 0.00093, "low_lr": 1.86e-05, "step": 133 }, { "epoch": 0.3497698882314267, "high_lr": 0.00093, "low_lr": 1.86e-05, "step": 133 }, { "epoch": 0.3497698882314267, "high_lr": 0.00093, "low_lr": 1.86e-05, "step": 133 }, { "epoch": 0.3497698882314267, "high_lr": 0.00093, "low_lr": 1.86e-05, "step": 133 }, { "epoch": 0.3523997370151216, "grad_norm": 1.2141129970550537, "learning_rate": 0.0009294736842105263, "loss": 1.9498, "step": 134 }, { "epoch": 0.3523997370151216, "high_lr": 0.0009294736842105263, "low_lr": 1.858947368421053e-05, "step": 134 }, { "epoch": 0.3523997370151216, "high_lr": 0.0009294736842105263, "low_lr": 1.858947368421053e-05, "step": 134 }, { "epoch": 0.3523997370151216, "high_lr": 0.0009294736842105263, "low_lr": 1.858947368421053e-05, "step": 134 }, { "epoch": 0.3523997370151216, "high_lr": 0.0009294736842105263, "low_lr": 1.858947368421053e-05, "step": 134 }, { "epoch": 0.3523997370151216, "high_lr": 0.0009294736842105263, "low_lr": 1.858947368421053e-05, "step": 134 }, { "epoch": 0.3523997370151216, "high_lr": 0.0009294736842105263, "low_lr": 1.858947368421053e-05, "step": 134 }, { "epoch": 0.3523997370151216, "high_lr": 0.0009294736842105263, "low_lr": 1.858947368421053e-05, "step": 134 }, { "epoch": 0.3523997370151216, "high_lr": 0.0009294736842105263, "low_lr": 1.858947368421053e-05, "step": 134 }, { "epoch": 0.35502958579881655, "grad_norm": 0.7771977782249451, "learning_rate": 0.0009289473684210526, "loss": 2.0322, "step": 135 }, { "epoch": 0.35502958579881655, "high_lr": 0.0009289473684210526, "low_lr": 1.8578947368421054e-05, "step": 135 }, { "epoch": 0.35502958579881655, "high_lr": 0.0009289473684210526, "low_lr": 1.8578947368421054e-05, "step": 135 }, { "epoch": 0.35502958579881655, "high_lr": 0.0009289473684210526, "low_lr": 1.8578947368421054e-05, "step": 135 }, { "epoch": 0.35502958579881655, "high_lr": 0.0009289473684210526, "low_lr": 1.8578947368421054e-05, "step": 135 }, { "epoch": 0.35502958579881655, "high_lr": 0.0009289473684210526, "low_lr": 1.8578947368421054e-05, "step": 135 }, { "epoch": 0.35502958579881655, "high_lr": 0.0009289473684210526, "low_lr": 1.8578947368421054e-05, "step": 135 }, { "epoch": 0.35502958579881655, "high_lr": 0.0009289473684210526, "low_lr": 1.8578947368421054e-05, "step": 135 }, { "epoch": 0.35502958579881655, "high_lr": 0.0009289473684210526, "low_lr": 1.8578947368421054e-05, "step": 135 }, { "epoch": 0.3576594345825115, "grad_norm": 0.7535092830657959, "learning_rate": 0.0009284210526315789, "loss": 1.9483, "step": 136 }, { "epoch": 0.3576594345825115, "high_lr": 0.0009284210526315789, "low_lr": 1.856842105263158e-05, "step": 136 }, { "epoch": 0.3576594345825115, "high_lr": 0.0009284210526315789, "low_lr": 1.856842105263158e-05, "step": 136 }, { "epoch": 0.3576594345825115, "high_lr": 0.0009284210526315789, "low_lr": 1.856842105263158e-05, "step": 136 }, { "epoch": 0.3576594345825115, "high_lr": 0.0009284210526315789, "low_lr": 1.856842105263158e-05, "step": 136 }, { "epoch": 0.3576594345825115, "high_lr": 0.0009284210526315789, "low_lr": 1.856842105263158e-05, "step": 136 }, { "epoch": 0.3576594345825115, "high_lr": 0.0009284210526315789, "low_lr": 1.856842105263158e-05, "step": 136 }, { "epoch": 0.3576594345825115, "high_lr": 0.0009284210526315789, "low_lr": 1.856842105263158e-05, "step": 136 }, { "epoch": 0.3576594345825115, "high_lr": 0.0009284210526315789, "low_lr": 1.856842105263158e-05, "step": 136 }, { "epoch": 0.36028928336620647, "grad_norm": 0.6840701103210449, "learning_rate": 0.0009278947368421052, "loss": 1.8602, "step": 137 }, { "epoch": 0.36028928336620647, "high_lr": 0.0009278947368421052, "low_lr": 1.8557894736842107e-05, "step": 137 }, { "epoch": 0.36028928336620647, "high_lr": 0.0009278947368421052, "low_lr": 1.8557894736842107e-05, "step": 137 }, { "epoch": 0.36028928336620647, "high_lr": 0.0009278947368421052, "low_lr": 1.8557894736842107e-05, "step": 137 }, { "epoch": 0.36028928336620647, "high_lr": 0.0009278947368421052, "low_lr": 1.8557894736842107e-05, "step": 137 }, { "epoch": 0.36028928336620647, "high_lr": 0.0009278947368421052, "low_lr": 1.8557894736842107e-05, "step": 137 }, { "epoch": 0.36028928336620647, "high_lr": 0.0009278947368421052, "low_lr": 1.8557894736842107e-05, "step": 137 }, { "epoch": 0.36028928336620647, "high_lr": 0.0009278947368421052, "low_lr": 1.8557894736842107e-05, "step": 137 }, { "epoch": 0.36028928336620647, "high_lr": 0.0009278947368421052, "low_lr": 1.8557894736842107e-05, "step": 137 }, { "epoch": 0.3629191321499014, "grad_norm": 0.6990517973899841, "learning_rate": 0.0009273684210526316, "loss": 1.7916, "step": 138 }, { "epoch": 0.3629191321499014, "high_lr": 0.0009273684210526316, "low_lr": 1.8547368421052635e-05, "step": 138 }, { "epoch": 0.3629191321499014, "high_lr": 0.0009273684210526316, "low_lr": 1.8547368421052635e-05, "step": 138 }, { "epoch": 0.3629191321499014, "high_lr": 0.0009273684210526316, "low_lr": 1.8547368421052635e-05, "step": 138 }, { "epoch": 0.3629191321499014, "high_lr": 0.0009273684210526316, "low_lr": 1.8547368421052635e-05, "step": 138 }, { "epoch": 0.3629191321499014, "high_lr": 0.0009273684210526316, "low_lr": 1.8547368421052635e-05, "step": 138 }, { "epoch": 0.3629191321499014, "high_lr": 0.0009273684210526316, "low_lr": 1.8547368421052635e-05, "step": 138 }, { "epoch": 0.3629191321499014, "high_lr": 0.0009273684210526316, "low_lr": 1.8547368421052635e-05, "step": 138 }, { "epoch": 0.3629191321499014, "high_lr": 0.0009273684210526316, "low_lr": 1.8547368421052635e-05, "step": 138 }, { "epoch": 0.36554898093359633, "grad_norm": 0.7095828056335449, "learning_rate": 0.0009268421052631579, "loss": 1.9012, "step": 139 }, { "epoch": 0.36554898093359633, "high_lr": 0.0009268421052631579, "low_lr": 1.853684210526316e-05, "step": 139 }, { "epoch": 0.36554898093359633, "high_lr": 0.0009268421052631579, "low_lr": 1.853684210526316e-05, "step": 139 }, { "epoch": 0.36554898093359633, "high_lr": 0.0009268421052631579, "low_lr": 1.853684210526316e-05, "step": 139 }, { "epoch": 0.36554898093359633, "high_lr": 0.0009268421052631579, "low_lr": 1.853684210526316e-05, "step": 139 }, { "epoch": 0.36554898093359633, "high_lr": 0.0009268421052631579, "low_lr": 1.853684210526316e-05, "step": 139 }, { "epoch": 0.36554898093359633, "high_lr": 0.0009268421052631579, "low_lr": 1.853684210526316e-05, "step": 139 }, { "epoch": 0.36554898093359633, "high_lr": 0.0009268421052631579, "low_lr": 1.853684210526316e-05, "step": 139 }, { "epoch": 0.36554898093359633, "high_lr": 0.0009268421052631579, "low_lr": 1.853684210526316e-05, "step": 139 }, { "epoch": 0.36817882971729127, "grad_norm": 0.6847386360168457, "learning_rate": 0.0009263157894736843, "loss": 1.84, "step": 140 }, { "epoch": 0.36817882971729127, "high_lr": 0.0009263157894736843, "low_lr": 1.8526315789473684e-05, "step": 140 }, { "epoch": 0.36817882971729127, "high_lr": 0.0009263157894736843, "low_lr": 1.8526315789473684e-05, "step": 140 }, { "epoch": 0.36817882971729127, "high_lr": 0.0009263157894736843, "low_lr": 1.8526315789473684e-05, "step": 140 }, { "epoch": 0.36817882971729127, "high_lr": 0.0009263157894736843, "low_lr": 1.8526315789473684e-05, "step": 140 }, { "epoch": 0.36817882971729127, "high_lr": 0.0009263157894736843, "low_lr": 1.8526315789473684e-05, "step": 140 }, { "epoch": 0.36817882971729127, "high_lr": 0.0009263157894736843, "low_lr": 1.8526315789473684e-05, "step": 140 }, { "epoch": 0.36817882971729127, "high_lr": 0.0009263157894736843, "low_lr": 1.8526315789473684e-05, "step": 140 }, { "epoch": 0.36817882971729127, "high_lr": 0.0009263157894736843, "low_lr": 1.8526315789473684e-05, "step": 140 }, { "epoch": 0.3708086785009862, "grad_norm": 0.7900689840316772, "learning_rate": 0.0009257894736842106, "loss": 1.9082, "step": 141 }, { "epoch": 0.3708086785009862, "high_lr": 0.0009257894736842106, "low_lr": 1.8515789473684212e-05, "step": 141 }, { "epoch": 0.3708086785009862, "high_lr": 0.0009257894736842106, "low_lr": 1.8515789473684212e-05, "step": 141 }, { "epoch": 0.3708086785009862, "high_lr": 0.0009257894736842106, "low_lr": 1.8515789473684212e-05, "step": 141 }, { "epoch": 0.3708086785009862, "high_lr": 0.0009257894736842106, "low_lr": 1.8515789473684212e-05, "step": 141 }, { "epoch": 0.3708086785009862, "high_lr": 0.0009257894736842106, "low_lr": 1.8515789473684212e-05, "step": 141 }, { "epoch": 0.3708086785009862, "high_lr": 0.0009257894736842106, "low_lr": 1.8515789473684212e-05, "step": 141 }, { "epoch": 0.3708086785009862, "high_lr": 0.0009257894736842106, "low_lr": 1.8515789473684212e-05, "step": 141 }, { "epoch": 0.3708086785009862, "high_lr": 0.0009257894736842106, "low_lr": 1.8515789473684212e-05, "step": 141 }, { "epoch": 0.37343852728468113, "grad_norm": 0.65594482421875, "learning_rate": 0.0009252631578947368, "loss": 1.8793, "step": 142 }, { "epoch": 0.37343852728468113, "high_lr": 0.0009252631578947368, "low_lr": 1.8505263157894737e-05, "step": 142 }, { "epoch": 0.37343852728468113, "high_lr": 0.0009252631578947368, "low_lr": 1.8505263157894737e-05, "step": 142 }, { "epoch": 0.37343852728468113, "high_lr": 0.0009252631578947368, "low_lr": 1.8505263157894737e-05, "step": 142 }, { "epoch": 0.37343852728468113, "high_lr": 0.0009252631578947368, "low_lr": 1.8505263157894737e-05, "step": 142 }, { "epoch": 0.37343852728468113, "high_lr": 0.0009252631578947368, "low_lr": 1.8505263157894737e-05, "step": 142 }, { "epoch": 0.37343852728468113, "high_lr": 0.0009252631578947368, "low_lr": 1.8505263157894737e-05, "step": 142 }, { "epoch": 0.37343852728468113, "high_lr": 0.0009252631578947368, "low_lr": 1.8505263157894737e-05, "step": 142 }, { "epoch": 0.37343852728468113, "high_lr": 0.0009252631578947368, "low_lr": 1.8505263157894737e-05, "step": 142 }, { "epoch": 0.37606837606837606, "grad_norm": 0.7136049866676331, "learning_rate": 0.0009247368421052632, "loss": 1.9126, "step": 143 }, { "epoch": 0.37606837606837606, "high_lr": 0.0009247368421052632, "low_lr": 1.8494736842105265e-05, "step": 143 }, { "epoch": 0.37606837606837606, "high_lr": 0.0009247368421052632, "low_lr": 1.8494736842105265e-05, "step": 143 }, { "epoch": 0.37606837606837606, "high_lr": 0.0009247368421052632, "low_lr": 1.8494736842105265e-05, "step": 143 }, { "epoch": 0.37606837606837606, "high_lr": 0.0009247368421052632, "low_lr": 1.8494736842105265e-05, "step": 143 }, { "epoch": 0.37606837606837606, "high_lr": 0.0009247368421052632, "low_lr": 1.8494736842105265e-05, "step": 143 }, { "epoch": 0.37606837606837606, "high_lr": 0.0009247368421052632, "low_lr": 1.8494736842105265e-05, "step": 143 }, { "epoch": 0.37606837606837606, "high_lr": 0.0009247368421052632, "low_lr": 1.8494736842105265e-05, "step": 143 }, { "epoch": 0.37606837606837606, "high_lr": 0.0009247368421052632, "low_lr": 1.8494736842105265e-05, "step": 143 }, { "epoch": 0.378698224852071, "grad_norm": 0.7377094030380249, "learning_rate": 0.0009242105263157895, "loss": 1.8829, "step": 144 }, { "epoch": 0.378698224852071, "high_lr": 0.0009242105263157895, "low_lr": 1.8484210526315793e-05, "step": 144 }, { "epoch": 0.378698224852071, "high_lr": 0.0009242105263157895, "low_lr": 1.8484210526315793e-05, "step": 144 }, { "epoch": 0.378698224852071, "high_lr": 0.0009242105263157895, "low_lr": 1.8484210526315793e-05, "step": 144 }, { "epoch": 0.378698224852071, "high_lr": 0.0009242105263157895, "low_lr": 1.8484210526315793e-05, "step": 144 }, { "epoch": 0.378698224852071, "high_lr": 0.0009242105263157895, "low_lr": 1.8484210526315793e-05, "step": 144 }, { "epoch": 0.378698224852071, "high_lr": 0.0009242105263157895, "low_lr": 1.8484210526315793e-05, "step": 144 }, { "epoch": 0.378698224852071, "high_lr": 0.0009242105263157895, "low_lr": 1.8484210526315793e-05, "step": 144 }, { "epoch": 0.378698224852071, "high_lr": 0.0009242105263157895, "low_lr": 1.8484210526315793e-05, "step": 144 }, { "epoch": 0.38132807363576593, "grad_norm": 0.8433911800384521, "learning_rate": 0.0009236842105263158, "loss": 1.9184, "step": 145 }, { "epoch": 0.38132807363576593, "high_lr": 0.0009236842105263158, "low_lr": 1.8473684210526317e-05, "step": 145 }, { "epoch": 0.38132807363576593, "high_lr": 0.0009236842105263158, "low_lr": 1.8473684210526317e-05, "step": 145 }, { "epoch": 0.38132807363576593, "high_lr": 0.0009236842105263158, "low_lr": 1.8473684210526317e-05, "step": 145 }, { "epoch": 0.38132807363576593, "high_lr": 0.0009236842105263158, "low_lr": 1.8473684210526317e-05, "step": 145 }, { "epoch": 0.38132807363576593, "high_lr": 0.0009236842105263158, "low_lr": 1.8473684210526317e-05, "step": 145 }, { "epoch": 0.38132807363576593, "high_lr": 0.0009236842105263158, "low_lr": 1.8473684210526317e-05, "step": 145 }, { "epoch": 0.38132807363576593, "high_lr": 0.0009236842105263158, "low_lr": 1.8473684210526317e-05, "step": 145 }, { "epoch": 0.38132807363576593, "high_lr": 0.0009236842105263158, "low_lr": 1.8473684210526317e-05, "step": 145 }, { "epoch": 0.38395792241946086, "grad_norm": 0.7446521520614624, "learning_rate": 0.0009231578947368421, "loss": 1.9224, "step": 146 }, { "epoch": 0.38395792241946086, "high_lr": 0.0009231578947368421, "low_lr": 1.8463157894736842e-05, "step": 146 }, { "epoch": 0.38395792241946086, "high_lr": 0.0009231578947368421, "low_lr": 1.8463157894736842e-05, "step": 146 }, { "epoch": 0.38395792241946086, "high_lr": 0.0009231578947368421, "low_lr": 1.8463157894736842e-05, "step": 146 }, { "epoch": 0.38395792241946086, "high_lr": 0.0009231578947368421, "low_lr": 1.8463157894736842e-05, "step": 146 }, { "epoch": 0.38395792241946086, "high_lr": 0.0009231578947368421, "low_lr": 1.8463157894736842e-05, "step": 146 }, { "epoch": 0.38395792241946086, "high_lr": 0.0009231578947368421, "low_lr": 1.8463157894736842e-05, "step": 146 }, { "epoch": 0.38395792241946086, "high_lr": 0.0009231578947368421, "low_lr": 1.8463157894736842e-05, "step": 146 }, { "epoch": 0.38395792241946086, "high_lr": 0.0009231578947368421, "low_lr": 1.8463157894736842e-05, "step": 146 }, { "epoch": 0.3865877712031558, "grad_norm": 0.766181230545044, "learning_rate": 0.0009226315789473685, "loss": 1.9315, "step": 147 }, { "epoch": 0.3865877712031558, "high_lr": 0.0009226315789473685, "low_lr": 1.845263157894737e-05, "step": 147 }, { "epoch": 0.3865877712031558, "high_lr": 0.0009226315789473685, "low_lr": 1.845263157894737e-05, "step": 147 }, { "epoch": 0.3865877712031558, "high_lr": 0.0009226315789473685, "low_lr": 1.845263157894737e-05, "step": 147 }, { "epoch": 0.3865877712031558, "high_lr": 0.0009226315789473685, "low_lr": 1.845263157894737e-05, "step": 147 }, { "epoch": 0.3865877712031558, "high_lr": 0.0009226315789473685, "low_lr": 1.845263157894737e-05, "step": 147 }, { "epoch": 0.3865877712031558, "high_lr": 0.0009226315789473685, "low_lr": 1.845263157894737e-05, "step": 147 }, { "epoch": 0.3865877712031558, "high_lr": 0.0009226315789473685, "low_lr": 1.845263157894737e-05, "step": 147 }, { "epoch": 0.3865877712031558, "high_lr": 0.0009226315789473685, "low_lr": 1.845263157894737e-05, "step": 147 }, { "epoch": 0.3892176199868508, "grad_norm": 0.7001948952674866, "learning_rate": 0.0009221052631578948, "loss": 1.8632, "step": 148 }, { "epoch": 0.3892176199868508, "high_lr": 0.0009221052631578948, "low_lr": 1.8442105263157898e-05, "step": 148 }, { "epoch": 0.3892176199868508, "high_lr": 0.0009221052631578948, "low_lr": 1.8442105263157898e-05, "step": 148 }, { "epoch": 0.3892176199868508, "high_lr": 0.0009221052631578948, "low_lr": 1.8442105263157898e-05, "step": 148 }, { "epoch": 0.3892176199868508, "high_lr": 0.0009221052631578948, "low_lr": 1.8442105263157898e-05, "step": 148 }, { "epoch": 0.3892176199868508, "high_lr": 0.0009221052631578948, "low_lr": 1.8442105263157898e-05, "step": 148 }, { "epoch": 0.3892176199868508, "high_lr": 0.0009221052631578948, "low_lr": 1.8442105263157898e-05, "step": 148 }, { "epoch": 0.3892176199868508, "high_lr": 0.0009221052631578948, "low_lr": 1.8442105263157898e-05, "step": 148 }, { "epoch": 0.3892176199868508, "high_lr": 0.0009221052631578948, "low_lr": 1.8442105263157898e-05, "step": 148 }, { "epoch": 0.3918474687705457, "grad_norm": 0.6942868232727051, "learning_rate": 0.0009215789473684211, "loss": 1.9002, "step": 149 }, { "epoch": 0.3918474687705457, "high_lr": 0.0009215789473684211, "low_lr": 1.8431578947368423e-05, "step": 149 }, { "epoch": 0.3918474687705457, "high_lr": 0.0009215789473684211, "low_lr": 1.8431578947368423e-05, "step": 149 }, { "epoch": 0.3918474687705457, "high_lr": 0.0009215789473684211, "low_lr": 1.8431578947368423e-05, "step": 149 }, { "epoch": 0.3918474687705457, "high_lr": 0.0009215789473684211, "low_lr": 1.8431578947368423e-05, "step": 149 }, { "epoch": 0.3918474687705457, "high_lr": 0.0009215789473684211, "low_lr": 1.8431578947368423e-05, "step": 149 }, { "epoch": 0.3918474687705457, "high_lr": 0.0009215789473684211, "low_lr": 1.8431578947368423e-05, "step": 149 }, { "epoch": 0.3918474687705457, "high_lr": 0.0009215789473684211, "low_lr": 1.8431578947368423e-05, "step": 149 }, { "epoch": 0.3918474687705457, "high_lr": 0.0009215789473684211, "low_lr": 1.8431578947368423e-05, "step": 149 }, { "epoch": 0.39447731755424065, "grad_norm": 1.636558175086975, "learning_rate": 0.0009210526315789473, "loss": 1.9456, "step": 150 }, { "epoch": 0.39447731755424065, "high_lr": 0.0009210526315789473, "low_lr": 1.8421052631578947e-05, "step": 150 }, { "epoch": 0.39447731755424065, "high_lr": 0.0009210526315789473, "low_lr": 1.8421052631578947e-05, "step": 150 }, { "epoch": 0.39447731755424065, "high_lr": 0.0009210526315789473, "low_lr": 1.8421052631578947e-05, "step": 150 }, { "epoch": 0.39447731755424065, "high_lr": 0.0009210526315789473, "low_lr": 1.8421052631578947e-05, "step": 150 }, { "epoch": 0.39447731755424065, "high_lr": 0.0009210526315789473, "low_lr": 1.8421052631578947e-05, "step": 150 }, { "epoch": 0.39447731755424065, "high_lr": 0.0009210526315789473, "low_lr": 1.8421052631578947e-05, "step": 150 }, { "epoch": 0.39447731755424065, "high_lr": 0.0009210526315789473, "low_lr": 1.8421052631578947e-05, "step": 150 }, { "epoch": 0.39447731755424065, "high_lr": 0.0009210526315789473, "low_lr": 1.8421052631578947e-05, "step": 150 }, { "epoch": 0.3971071663379356, "grad_norm": 0.7493187785148621, "learning_rate": 0.0009205263157894736, "loss": 1.8301, "step": 151 }, { "epoch": 0.3971071663379356, "high_lr": 0.0009205263157894736, "low_lr": 1.8410526315789475e-05, "step": 151 }, { "epoch": 0.3971071663379356, "high_lr": 0.0009205263157894736, "low_lr": 1.8410526315789475e-05, "step": 151 }, { "epoch": 0.3971071663379356, "high_lr": 0.0009205263157894736, "low_lr": 1.8410526315789475e-05, "step": 151 }, { "epoch": 0.3971071663379356, "high_lr": 0.0009205263157894736, "low_lr": 1.8410526315789475e-05, "step": 151 }, { "epoch": 0.3971071663379356, "high_lr": 0.0009205263157894736, "low_lr": 1.8410526315789475e-05, "step": 151 }, { "epoch": 0.3971071663379356, "high_lr": 0.0009205263157894736, "low_lr": 1.8410526315789475e-05, "step": 151 }, { "epoch": 0.3971071663379356, "high_lr": 0.0009205263157894736, "low_lr": 1.8410526315789475e-05, "step": 151 }, { "epoch": 0.3971071663379356, "high_lr": 0.0009205263157894736, "low_lr": 1.8410526315789475e-05, "step": 151 }, { "epoch": 0.3997370151216305, "grad_norm": 0.7495818734169006, "learning_rate": 0.00092, "loss": 1.88, "step": 152 }, { "epoch": 0.3997370151216305, "high_lr": 0.00092, "low_lr": 1.8400000000000003e-05, "step": 152 }, { "epoch": 0.3997370151216305, "high_lr": 0.00092, "low_lr": 1.8400000000000003e-05, "step": 152 }, { "epoch": 0.3997370151216305, "high_lr": 0.00092, "low_lr": 1.8400000000000003e-05, "step": 152 }, { "epoch": 0.3997370151216305, "high_lr": 0.00092, "low_lr": 1.8400000000000003e-05, "step": 152 }, { "epoch": 0.3997370151216305, "high_lr": 0.00092, "low_lr": 1.8400000000000003e-05, "step": 152 }, { "epoch": 0.3997370151216305, "high_lr": 0.00092, "low_lr": 1.8400000000000003e-05, "step": 152 }, { "epoch": 0.3997370151216305, "high_lr": 0.00092, "low_lr": 1.8400000000000003e-05, "step": 152 }, { "epoch": 0.3997370151216305, "high_lr": 0.00092, "low_lr": 1.8400000000000003e-05, "step": 152 }, { "epoch": 0.40236686390532544, "grad_norm": 0.7916814684867859, "learning_rate": 0.0009194736842105263, "loss": 1.8471, "step": 153 }, { "epoch": 0.40236686390532544, "high_lr": 0.0009194736842105263, "low_lr": 1.8389473684210528e-05, "step": 153 }, { "epoch": 0.40236686390532544, "high_lr": 0.0009194736842105263, "low_lr": 1.8389473684210528e-05, "step": 153 }, { "epoch": 0.40236686390532544, "high_lr": 0.0009194736842105263, "low_lr": 1.8389473684210528e-05, "step": 153 }, { "epoch": 0.40236686390532544, "high_lr": 0.0009194736842105263, "low_lr": 1.8389473684210528e-05, "step": 153 }, { "epoch": 0.40236686390532544, "high_lr": 0.0009194736842105263, "low_lr": 1.8389473684210528e-05, "step": 153 }, { "epoch": 0.40236686390532544, "high_lr": 0.0009194736842105263, "low_lr": 1.8389473684210528e-05, "step": 153 }, { "epoch": 0.40236686390532544, "high_lr": 0.0009194736842105263, "low_lr": 1.8389473684210528e-05, "step": 153 }, { "epoch": 0.40236686390532544, "high_lr": 0.0009194736842105263, "low_lr": 1.8389473684210528e-05, "step": 153 }, { "epoch": 0.4049967126890204, "grad_norm": 0.7696760296821594, "learning_rate": 0.0009189473684210526, "loss": 1.9551, "step": 154 }, { "epoch": 0.4049967126890204, "high_lr": 0.0009189473684210526, "low_lr": 1.8378947368421053e-05, "step": 154 }, { "epoch": 0.4049967126890204, "high_lr": 0.0009189473684210526, "low_lr": 1.8378947368421053e-05, "step": 154 }, { "epoch": 0.4049967126890204, "high_lr": 0.0009189473684210526, "low_lr": 1.8378947368421053e-05, "step": 154 }, { "epoch": 0.4049967126890204, "high_lr": 0.0009189473684210526, "low_lr": 1.8378947368421053e-05, "step": 154 }, { "epoch": 0.4049967126890204, "high_lr": 0.0009189473684210526, "low_lr": 1.8378947368421053e-05, "step": 154 }, { "epoch": 0.4049967126890204, "high_lr": 0.0009189473684210526, "low_lr": 1.8378947368421053e-05, "step": 154 }, { "epoch": 0.4049967126890204, "high_lr": 0.0009189473684210526, "low_lr": 1.8378947368421053e-05, "step": 154 }, { "epoch": 0.4049967126890204, "high_lr": 0.0009189473684210526, "low_lr": 1.8378947368421053e-05, "step": 154 }, { "epoch": 0.4076265614727153, "grad_norm": 0.714638352394104, "learning_rate": 0.0009184210526315789, "loss": 1.8951, "step": 155 }, { "epoch": 0.4076265614727153, "high_lr": 0.0009184210526315789, "low_lr": 1.836842105263158e-05, "step": 155 }, { "epoch": 0.4076265614727153, "high_lr": 0.0009184210526315789, "low_lr": 1.836842105263158e-05, "step": 155 }, { "epoch": 0.4076265614727153, "high_lr": 0.0009184210526315789, "low_lr": 1.836842105263158e-05, "step": 155 }, { "epoch": 0.4076265614727153, "high_lr": 0.0009184210526315789, "low_lr": 1.836842105263158e-05, "step": 155 }, { "epoch": 0.4076265614727153, "high_lr": 0.0009184210526315789, "low_lr": 1.836842105263158e-05, "step": 155 }, { "epoch": 0.4076265614727153, "high_lr": 0.0009184210526315789, "low_lr": 1.836842105263158e-05, "step": 155 }, { "epoch": 0.4076265614727153, "high_lr": 0.0009184210526315789, "low_lr": 1.836842105263158e-05, "step": 155 }, { "epoch": 0.4076265614727153, "high_lr": 0.0009184210526315789, "low_lr": 1.836842105263158e-05, "step": 155 }, { "epoch": 0.41025641025641024, "grad_norm": 0.7288228869438171, "learning_rate": 0.0009178947368421053, "loss": 1.8497, "step": 156 }, { "epoch": 0.41025641025641024, "high_lr": 0.0009178947368421053, "low_lr": 1.8357894736842105e-05, "step": 156 }, { "epoch": 0.41025641025641024, "high_lr": 0.0009178947368421053, "low_lr": 1.8357894736842105e-05, "step": 156 }, { "epoch": 0.41025641025641024, "high_lr": 0.0009178947368421053, "low_lr": 1.8357894736842105e-05, "step": 156 }, { "epoch": 0.41025641025641024, "high_lr": 0.0009178947368421053, "low_lr": 1.8357894736842105e-05, "step": 156 }, { "epoch": 0.41025641025641024, "high_lr": 0.0009178947368421053, "low_lr": 1.8357894736842105e-05, "step": 156 }, { "epoch": 0.41025641025641024, "high_lr": 0.0009178947368421053, "low_lr": 1.8357894736842105e-05, "step": 156 }, { "epoch": 0.41025641025641024, "high_lr": 0.0009178947368421053, "low_lr": 1.8357894736842105e-05, "step": 156 }, { "epoch": 0.41025641025641024, "high_lr": 0.0009178947368421053, "low_lr": 1.8357894736842105e-05, "step": 156 }, { "epoch": 0.4128862590401052, "grad_norm": 0.7588838338851929, "learning_rate": 0.0009173684210526317, "loss": 1.8814, "step": 157 }, { "epoch": 0.4128862590401052, "high_lr": 0.0009173684210526317, "low_lr": 1.8347368421052633e-05, "step": 157 }, { "epoch": 0.4128862590401052, "high_lr": 0.0009173684210526317, "low_lr": 1.8347368421052633e-05, "step": 157 }, { "epoch": 0.4128862590401052, "high_lr": 0.0009173684210526317, "low_lr": 1.8347368421052633e-05, "step": 157 }, { "epoch": 0.4128862590401052, "high_lr": 0.0009173684210526317, "low_lr": 1.8347368421052633e-05, "step": 157 }, { "epoch": 0.4128862590401052, "high_lr": 0.0009173684210526317, "low_lr": 1.8347368421052633e-05, "step": 157 }, { "epoch": 0.4128862590401052, "high_lr": 0.0009173684210526317, "low_lr": 1.8347368421052633e-05, "step": 157 }, { "epoch": 0.4128862590401052, "high_lr": 0.0009173684210526317, "low_lr": 1.8347368421052633e-05, "step": 157 }, { "epoch": 0.4128862590401052, "high_lr": 0.0009173684210526317, "low_lr": 1.8347368421052633e-05, "step": 157 }, { "epoch": 0.4155161078238001, "grad_norm": 0.7453787326812744, "learning_rate": 0.000916842105263158, "loss": 1.8727, "step": 158 }, { "epoch": 0.4155161078238001, "high_lr": 0.000916842105263158, "low_lr": 1.8336842105263158e-05, "step": 158 }, { "epoch": 0.4155161078238001, "high_lr": 0.000916842105263158, "low_lr": 1.8336842105263158e-05, "step": 158 }, { "epoch": 0.4155161078238001, "high_lr": 0.000916842105263158, "low_lr": 1.8336842105263158e-05, "step": 158 }, { "epoch": 0.4155161078238001, "high_lr": 0.000916842105263158, "low_lr": 1.8336842105263158e-05, "step": 158 }, { "epoch": 0.4155161078238001, "high_lr": 0.000916842105263158, "low_lr": 1.8336842105263158e-05, "step": 158 }, { "epoch": 0.4155161078238001, "high_lr": 0.000916842105263158, "low_lr": 1.8336842105263158e-05, "step": 158 }, { "epoch": 0.4155161078238001, "high_lr": 0.000916842105263158, "low_lr": 1.8336842105263158e-05, "step": 158 }, { "epoch": 0.4155161078238001, "high_lr": 0.000916842105263158, "low_lr": 1.8336842105263158e-05, "step": 158 }, { "epoch": 0.4181459566074951, "grad_norm": 0.6738296151161194, "learning_rate": 0.0009163157894736842, "loss": 1.7921, "step": 159 }, { "epoch": 0.4181459566074951, "high_lr": 0.0009163157894736842, "low_lr": 1.8326315789473686e-05, "step": 159 }, { "epoch": 0.4181459566074951, "high_lr": 0.0009163157894736842, "low_lr": 1.8326315789473686e-05, "step": 159 }, { "epoch": 0.4181459566074951, "high_lr": 0.0009163157894736842, "low_lr": 1.8326315789473686e-05, "step": 159 }, { "epoch": 0.4181459566074951, "high_lr": 0.0009163157894736842, "low_lr": 1.8326315789473686e-05, "step": 159 }, { "epoch": 0.4181459566074951, "high_lr": 0.0009163157894736842, "low_lr": 1.8326315789473686e-05, "step": 159 }, { "epoch": 0.4181459566074951, "high_lr": 0.0009163157894736842, "low_lr": 1.8326315789473686e-05, "step": 159 }, { "epoch": 0.4181459566074951, "high_lr": 0.0009163157894736842, "low_lr": 1.8326315789473686e-05, "step": 159 }, { "epoch": 0.4181459566074951, "high_lr": 0.0009163157894736842, "low_lr": 1.8326315789473686e-05, "step": 159 }, { "epoch": 0.42077580539119, "grad_norm": 0.6959648132324219, "learning_rate": 0.0009157894736842105, "loss": 1.8321, "step": 160 }, { "epoch": 0.42077580539119, "high_lr": 0.0009157894736842105, "low_lr": 1.831578947368421e-05, "step": 160 }, { "epoch": 0.42077580539119, "high_lr": 0.0009157894736842105, "low_lr": 1.831578947368421e-05, "step": 160 }, { "epoch": 0.42077580539119, "high_lr": 0.0009157894736842105, "low_lr": 1.831578947368421e-05, "step": 160 }, { "epoch": 0.42077580539119, "high_lr": 0.0009157894736842105, "low_lr": 1.831578947368421e-05, "step": 160 }, { "epoch": 0.42077580539119, "high_lr": 0.0009157894736842105, "low_lr": 1.831578947368421e-05, "step": 160 }, { "epoch": 0.42077580539119, "high_lr": 0.0009157894736842105, "low_lr": 1.831578947368421e-05, "step": 160 }, { "epoch": 0.42077580539119, "high_lr": 0.0009157894736842105, "low_lr": 1.831578947368421e-05, "step": 160 }, { "epoch": 0.42077580539119, "high_lr": 0.0009157894736842105, "low_lr": 1.831578947368421e-05, "step": 160 }, { "epoch": 0.42340565417488496, "grad_norm": 0.7143363356590271, "learning_rate": 0.0009152631578947369, "loss": 1.8262, "step": 161 }, { "epoch": 0.42340565417488496, "high_lr": 0.0009152631578947369, "low_lr": 1.830526315789474e-05, "step": 161 }, { "epoch": 0.42340565417488496, "high_lr": 0.0009152631578947369, "low_lr": 1.830526315789474e-05, "step": 161 }, { "epoch": 0.42340565417488496, "high_lr": 0.0009152631578947369, "low_lr": 1.830526315789474e-05, "step": 161 }, { "epoch": 0.42340565417488496, "high_lr": 0.0009152631578947369, "low_lr": 1.830526315789474e-05, "step": 161 }, { "epoch": 0.42340565417488496, "high_lr": 0.0009152631578947369, "low_lr": 1.830526315789474e-05, "step": 161 }, { "epoch": 0.42340565417488496, "high_lr": 0.0009152631578947369, "low_lr": 1.830526315789474e-05, "step": 161 }, { "epoch": 0.42340565417488496, "high_lr": 0.0009152631578947369, "low_lr": 1.830526315789474e-05, "step": 161 }, { "epoch": 0.42340565417488496, "high_lr": 0.0009152631578947369, "low_lr": 1.830526315789474e-05, "step": 161 }, { "epoch": 0.4260355029585799, "grad_norm": 0.7190316319465637, "learning_rate": 0.0009147368421052632, "loss": 1.7782, "step": 162 }, { "epoch": 0.4260355029585799, "high_lr": 0.0009147368421052632, "low_lr": 1.8294736842105267e-05, "step": 162 }, { "epoch": 0.4260355029585799, "high_lr": 0.0009147368421052632, "low_lr": 1.8294736842105267e-05, "step": 162 }, { "epoch": 0.4260355029585799, "high_lr": 0.0009147368421052632, "low_lr": 1.8294736842105267e-05, "step": 162 }, { "epoch": 0.4260355029585799, "high_lr": 0.0009147368421052632, "low_lr": 1.8294736842105267e-05, "step": 162 }, { "epoch": 0.4260355029585799, "high_lr": 0.0009147368421052632, "low_lr": 1.8294736842105267e-05, "step": 162 }, { "epoch": 0.4260355029585799, "high_lr": 0.0009147368421052632, "low_lr": 1.8294736842105267e-05, "step": 162 }, { "epoch": 0.4260355029585799, "high_lr": 0.0009147368421052632, "low_lr": 1.8294736842105267e-05, "step": 162 }, { "epoch": 0.4260355029585799, "high_lr": 0.0009147368421052632, "low_lr": 1.8294736842105267e-05, "step": 162 }, { "epoch": 0.4286653517422748, "grad_norm": 0.7058752179145813, "learning_rate": 0.0009142105263157895, "loss": 1.8556, "step": 163 }, { "epoch": 0.4286653517422748, "high_lr": 0.0009142105263157895, "low_lr": 1.828421052631579e-05, "step": 163 }, { "epoch": 0.4286653517422748, "high_lr": 0.0009142105263157895, "low_lr": 1.828421052631579e-05, "step": 163 }, { "epoch": 0.4286653517422748, "high_lr": 0.0009142105263157895, "low_lr": 1.828421052631579e-05, "step": 163 }, { "epoch": 0.4286653517422748, "high_lr": 0.0009142105263157895, "low_lr": 1.828421052631579e-05, "step": 163 }, { "epoch": 0.4286653517422748, "high_lr": 0.0009142105263157895, "low_lr": 1.828421052631579e-05, "step": 163 }, { "epoch": 0.4286653517422748, "high_lr": 0.0009142105263157895, "low_lr": 1.828421052631579e-05, "step": 163 }, { "epoch": 0.4286653517422748, "high_lr": 0.0009142105263157895, "low_lr": 1.828421052631579e-05, "step": 163 }, { "epoch": 0.4286653517422748, "high_lr": 0.0009142105263157895, "low_lr": 1.828421052631579e-05, "step": 163 }, { "epoch": 0.43129520052596976, "grad_norm": 0.7939090132713318, "learning_rate": 0.0009136842105263158, "loss": 1.8973, "step": 164 }, { "epoch": 0.43129520052596976, "high_lr": 0.0009136842105263158, "low_lr": 1.8273684210526316e-05, "step": 164 }, { "epoch": 0.43129520052596976, "high_lr": 0.0009136842105263158, "low_lr": 1.8273684210526316e-05, "step": 164 }, { "epoch": 0.43129520052596976, "high_lr": 0.0009136842105263158, "low_lr": 1.8273684210526316e-05, "step": 164 }, { "epoch": 0.43129520052596976, "high_lr": 0.0009136842105263158, "low_lr": 1.8273684210526316e-05, "step": 164 }, { "epoch": 0.43129520052596976, "high_lr": 0.0009136842105263158, "low_lr": 1.8273684210526316e-05, "step": 164 }, { "epoch": 0.43129520052596976, "high_lr": 0.0009136842105263158, "low_lr": 1.8273684210526316e-05, "step": 164 }, { "epoch": 0.43129520052596976, "high_lr": 0.0009136842105263158, "low_lr": 1.8273684210526316e-05, "step": 164 }, { "epoch": 0.43129520052596976, "high_lr": 0.0009136842105263158, "low_lr": 1.8273684210526316e-05, "step": 164 }, { "epoch": 0.4339250493096647, "grad_norm": 0.719814121723175, "learning_rate": 0.0009131578947368421, "loss": 1.8264, "step": 165 }, { "epoch": 0.4339250493096647, "high_lr": 0.0009131578947368421, "low_lr": 1.8263157894736844e-05, "step": 165 }, { "epoch": 0.4339250493096647, "high_lr": 0.0009131578947368421, "low_lr": 1.8263157894736844e-05, "step": 165 }, { "epoch": 0.4339250493096647, "high_lr": 0.0009131578947368421, "low_lr": 1.8263157894736844e-05, "step": 165 }, { "epoch": 0.4339250493096647, "high_lr": 0.0009131578947368421, "low_lr": 1.8263157894736844e-05, "step": 165 }, { "epoch": 0.4339250493096647, "high_lr": 0.0009131578947368421, "low_lr": 1.8263157894736844e-05, "step": 165 }, { "epoch": 0.4339250493096647, "high_lr": 0.0009131578947368421, "low_lr": 1.8263157894736844e-05, "step": 165 }, { "epoch": 0.4339250493096647, "high_lr": 0.0009131578947368421, "low_lr": 1.8263157894736844e-05, "step": 165 }, { "epoch": 0.4339250493096647, "high_lr": 0.0009131578947368421, "low_lr": 1.8263157894736844e-05, "step": 165 }, { "epoch": 0.4365548980933596, "grad_norm": 0.7035220265388489, "learning_rate": 0.0009126315789473685, "loss": 1.805, "step": 166 }, { "epoch": 0.4365548980933596, "high_lr": 0.0009126315789473685, "low_lr": 1.8252631578947372e-05, "step": 166 }, { "epoch": 0.4365548980933596, "high_lr": 0.0009126315789473685, "low_lr": 1.8252631578947372e-05, "step": 166 }, { "epoch": 0.4365548980933596, "high_lr": 0.0009126315789473685, "low_lr": 1.8252631578947372e-05, "step": 166 }, { "epoch": 0.4365548980933596, "high_lr": 0.0009126315789473685, "low_lr": 1.8252631578947372e-05, "step": 166 }, { "epoch": 0.4365548980933596, "high_lr": 0.0009126315789473685, "low_lr": 1.8252631578947372e-05, "step": 166 }, { "epoch": 0.4365548980933596, "high_lr": 0.0009126315789473685, "low_lr": 1.8252631578947372e-05, "step": 166 }, { "epoch": 0.4365548980933596, "high_lr": 0.0009126315789473685, "low_lr": 1.8252631578947372e-05, "step": 166 }, { "epoch": 0.4365548980933596, "high_lr": 0.0009126315789473685, "low_lr": 1.8252631578947372e-05, "step": 166 }, { "epoch": 0.43918474687705455, "grad_norm": 0.7253255248069763, "learning_rate": 0.0009121052631578947, "loss": 1.809, "step": 167 }, { "epoch": 0.43918474687705455, "high_lr": 0.0009121052631578947, "low_lr": 1.8242105263157897e-05, "step": 167 }, { "epoch": 0.43918474687705455, "high_lr": 0.0009121052631578947, "low_lr": 1.8242105263157897e-05, "step": 167 }, { "epoch": 0.43918474687705455, "high_lr": 0.0009121052631578947, "low_lr": 1.8242105263157897e-05, "step": 167 }, { "epoch": 0.43918474687705455, "high_lr": 0.0009121052631578947, "low_lr": 1.8242105263157897e-05, "step": 167 }, { "epoch": 0.43918474687705455, "high_lr": 0.0009121052631578947, "low_lr": 1.8242105263157897e-05, "step": 167 }, { "epoch": 0.43918474687705455, "high_lr": 0.0009121052631578947, "low_lr": 1.8242105263157897e-05, "step": 167 }, { "epoch": 0.43918474687705455, "high_lr": 0.0009121052631578947, "low_lr": 1.8242105263157897e-05, "step": 167 }, { "epoch": 0.43918474687705455, "high_lr": 0.0009121052631578947, "low_lr": 1.8242105263157897e-05, "step": 167 }, { "epoch": 0.4418145956607495, "grad_norm": 0.7530324459075928, "learning_rate": 0.000911578947368421, "loss": 1.8508, "step": 168 }, { "epoch": 0.4418145956607495, "high_lr": 0.000911578947368421, "low_lr": 1.823157894736842e-05, "step": 168 }, { "epoch": 0.4418145956607495, "high_lr": 0.000911578947368421, "low_lr": 1.823157894736842e-05, "step": 168 }, { "epoch": 0.4418145956607495, "high_lr": 0.000911578947368421, "low_lr": 1.823157894736842e-05, "step": 168 }, { "epoch": 0.4418145956607495, "high_lr": 0.000911578947368421, "low_lr": 1.823157894736842e-05, "step": 168 }, { "epoch": 0.4418145956607495, "high_lr": 0.000911578947368421, "low_lr": 1.823157894736842e-05, "step": 168 }, { "epoch": 0.4418145956607495, "high_lr": 0.000911578947368421, "low_lr": 1.823157894736842e-05, "step": 168 }, { "epoch": 0.4418145956607495, "high_lr": 0.000911578947368421, "low_lr": 1.823157894736842e-05, "step": 168 }, { "epoch": 0.4418145956607495, "high_lr": 0.000911578947368421, "low_lr": 1.823157894736842e-05, "step": 168 }, { "epoch": 0.4444444444444444, "grad_norm": 0.7631351351737976, "learning_rate": 0.0009110526315789473, "loss": 1.8022, "step": 169 }, { "epoch": 0.4444444444444444, "high_lr": 0.0009110526315789473, "low_lr": 1.822105263157895e-05, "step": 169 }, { "epoch": 0.4444444444444444, "high_lr": 0.0009110526315789473, "low_lr": 1.822105263157895e-05, "step": 169 }, { "epoch": 0.4444444444444444, "high_lr": 0.0009110526315789473, "low_lr": 1.822105263157895e-05, "step": 169 }, { "epoch": 0.4444444444444444, "high_lr": 0.0009110526315789473, "low_lr": 1.822105263157895e-05, "step": 169 }, { "epoch": 0.4444444444444444, "high_lr": 0.0009110526315789473, "low_lr": 1.822105263157895e-05, "step": 169 }, { "epoch": 0.4444444444444444, "high_lr": 0.0009110526315789473, "low_lr": 1.822105263157895e-05, "step": 169 }, { "epoch": 0.4444444444444444, "high_lr": 0.0009110526315789473, "low_lr": 1.822105263157895e-05, "step": 169 }, { "epoch": 0.4444444444444444, "high_lr": 0.0009110526315789473, "low_lr": 1.822105263157895e-05, "step": 169 }, { "epoch": 0.4470742932281394, "grad_norm": 0.9896913766860962, "learning_rate": 0.0009105263157894737, "loss": 1.7911, "step": 170 }, { "epoch": 0.4470742932281394, "high_lr": 0.0009105263157894737, "low_lr": 1.8210526315789477e-05, "step": 170 }, { "epoch": 0.4470742932281394, "high_lr": 0.0009105263157894737, "low_lr": 1.8210526315789477e-05, "step": 170 }, { "epoch": 0.4470742932281394, "high_lr": 0.0009105263157894737, "low_lr": 1.8210526315789477e-05, "step": 170 }, { "epoch": 0.4470742932281394, "high_lr": 0.0009105263157894737, "low_lr": 1.8210526315789477e-05, "step": 170 }, { "epoch": 0.4470742932281394, "high_lr": 0.0009105263157894737, "low_lr": 1.8210526315789477e-05, "step": 170 }, { "epoch": 0.4470742932281394, "high_lr": 0.0009105263157894737, "low_lr": 1.8210526315789477e-05, "step": 170 }, { "epoch": 0.4470742932281394, "high_lr": 0.0009105263157894737, "low_lr": 1.8210526315789477e-05, "step": 170 }, { "epoch": 0.4470742932281394, "high_lr": 0.0009105263157894737, "low_lr": 1.8210526315789477e-05, "step": 170 }, { "epoch": 0.44970414201183434, "grad_norm": 0.7498970627784729, "learning_rate": 0.00091, "loss": 1.92, "step": 171 }, { "epoch": 0.44970414201183434, "high_lr": 0.00091, "low_lr": 1.8200000000000002e-05, "step": 171 }, { "epoch": 0.44970414201183434, "high_lr": 0.00091, "low_lr": 1.8200000000000002e-05, "step": 171 }, { "epoch": 0.44970414201183434, "high_lr": 0.00091, "low_lr": 1.8200000000000002e-05, "step": 171 }, { "epoch": 0.44970414201183434, "high_lr": 0.00091, "low_lr": 1.8200000000000002e-05, "step": 171 }, { "epoch": 0.44970414201183434, "high_lr": 0.00091, "low_lr": 1.8200000000000002e-05, "step": 171 }, { "epoch": 0.44970414201183434, "high_lr": 0.00091, "low_lr": 1.8200000000000002e-05, "step": 171 }, { "epoch": 0.44970414201183434, "high_lr": 0.00091, "low_lr": 1.8200000000000002e-05, "step": 171 }, { "epoch": 0.44970414201183434, "high_lr": 0.00091, "low_lr": 1.8200000000000002e-05, "step": 171 }, { "epoch": 0.4523339907955293, "grad_norm": 0.77386075258255, "learning_rate": 0.0009094736842105264, "loss": 1.7877, "step": 172 }, { "epoch": 0.4523339907955293, "high_lr": 0.0009094736842105264, "low_lr": 1.8189473684210527e-05, "step": 172 }, { "epoch": 0.4523339907955293, "high_lr": 0.0009094736842105264, "low_lr": 1.8189473684210527e-05, "step": 172 }, { "epoch": 0.4523339907955293, "high_lr": 0.0009094736842105264, "low_lr": 1.8189473684210527e-05, "step": 172 }, { "epoch": 0.4523339907955293, "high_lr": 0.0009094736842105264, "low_lr": 1.8189473684210527e-05, "step": 172 }, { "epoch": 0.4523339907955293, "high_lr": 0.0009094736842105264, "low_lr": 1.8189473684210527e-05, "step": 172 }, { "epoch": 0.4523339907955293, "high_lr": 0.0009094736842105264, "low_lr": 1.8189473684210527e-05, "step": 172 }, { "epoch": 0.4523339907955293, "high_lr": 0.0009094736842105264, "low_lr": 1.8189473684210527e-05, "step": 172 }, { "epoch": 0.4523339907955293, "high_lr": 0.0009094736842105264, "low_lr": 1.8189473684210527e-05, "step": 172 }, { "epoch": 0.4549638395792242, "grad_norm": 0.8331085443496704, "learning_rate": 0.0009089473684210527, "loss": 1.8145, "step": 173 }, { "epoch": 0.4549638395792242, "high_lr": 0.0009089473684210527, "low_lr": 1.8178947368421055e-05, "step": 173 }, { "epoch": 0.4549638395792242, "high_lr": 0.0009089473684210527, "low_lr": 1.8178947368421055e-05, "step": 173 }, { "epoch": 0.4549638395792242, "high_lr": 0.0009089473684210527, "low_lr": 1.8178947368421055e-05, "step": 173 }, { "epoch": 0.4549638395792242, "high_lr": 0.0009089473684210527, "low_lr": 1.8178947368421055e-05, "step": 173 }, { "epoch": 0.4549638395792242, "high_lr": 0.0009089473684210527, "low_lr": 1.8178947368421055e-05, "step": 173 }, { "epoch": 0.4549638395792242, "high_lr": 0.0009089473684210527, "low_lr": 1.8178947368421055e-05, "step": 173 }, { "epoch": 0.4549638395792242, "high_lr": 0.0009089473684210527, "low_lr": 1.8178947368421055e-05, "step": 173 }, { "epoch": 0.4549638395792242, "high_lr": 0.0009089473684210527, "low_lr": 1.8178947368421055e-05, "step": 173 }, { "epoch": 0.45759368836291914, "grad_norm": 0.8667138814926147, "learning_rate": 0.000908421052631579, "loss": 1.8783, "step": 174 }, { "epoch": 0.45759368836291914, "high_lr": 0.000908421052631579, "low_lr": 1.816842105263158e-05, "step": 174 }, { "epoch": 0.45759368836291914, "high_lr": 0.000908421052631579, "low_lr": 1.816842105263158e-05, "step": 174 }, { "epoch": 0.45759368836291914, "high_lr": 0.000908421052631579, "low_lr": 1.816842105263158e-05, "step": 174 }, { "epoch": 0.45759368836291914, "high_lr": 0.000908421052631579, "low_lr": 1.816842105263158e-05, "step": 174 }, { "epoch": 0.45759368836291914, "high_lr": 0.000908421052631579, "low_lr": 1.816842105263158e-05, "step": 174 }, { "epoch": 0.45759368836291914, "high_lr": 0.000908421052631579, "low_lr": 1.816842105263158e-05, "step": 174 }, { "epoch": 0.45759368836291914, "high_lr": 0.000908421052631579, "low_lr": 1.816842105263158e-05, "step": 174 }, { "epoch": 0.45759368836291914, "high_lr": 0.000908421052631579, "low_lr": 1.816842105263158e-05, "step": 174 }, { "epoch": 0.46022353714661407, "grad_norm": 0.7862030863761902, "learning_rate": 0.0009078947368421054, "loss": 1.9253, "step": 175 }, { "epoch": 0.46022353714661407, "high_lr": 0.0009078947368421054, "low_lr": 1.8157894736842107e-05, "step": 175 }, { "epoch": 0.46022353714661407, "high_lr": 0.0009078947368421054, "low_lr": 1.8157894736842107e-05, "step": 175 }, { "epoch": 0.46022353714661407, "high_lr": 0.0009078947368421054, "low_lr": 1.8157894736842107e-05, "step": 175 }, { "epoch": 0.46022353714661407, "high_lr": 0.0009078947368421054, "low_lr": 1.8157894736842107e-05, "step": 175 }, { "epoch": 0.46022353714661407, "high_lr": 0.0009078947368421054, "low_lr": 1.8157894736842107e-05, "step": 175 }, { "epoch": 0.46022353714661407, "high_lr": 0.0009078947368421054, "low_lr": 1.8157894736842107e-05, "step": 175 }, { "epoch": 0.46022353714661407, "high_lr": 0.0009078947368421054, "low_lr": 1.8157894736842107e-05, "step": 175 }, { "epoch": 0.46022353714661407, "high_lr": 0.0009078947368421054, "low_lr": 1.8157894736842107e-05, "step": 175 }, { "epoch": 0.462853385930309, "grad_norm": 0.7251467108726501, "learning_rate": 0.0009073684210526316, "loss": 1.7903, "step": 176 }, { "epoch": 0.462853385930309, "high_lr": 0.0009073684210526316, "low_lr": 1.8147368421052632e-05, "step": 176 }, { "epoch": 0.462853385930309, "high_lr": 0.0009073684210526316, "low_lr": 1.8147368421052632e-05, "step": 176 }, { "epoch": 0.462853385930309, "high_lr": 0.0009073684210526316, "low_lr": 1.8147368421052632e-05, "step": 176 }, { "epoch": 0.462853385930309, "high_lr": 0.0009073684210526316, "low_lr": 1.8147368421052632e-05, "step": 176 }, { "epoch": 0.462853385930309, "high_lr": 0.0009073684210526316, "low_lr": 1.8147368421052632e-05, "step": 176 }, { "epoch": 0.462853385930309, "high_lr": 0.0009073684210526316, "low_lr": 1.8147368421052632e-05, "step": 176 }, { "epoch": 0.462853385930309, "high_lr": 0.0009073684210526316, "low_lr": 1.8147368421052632e-05, "step": 176 }, { "epoch": 0.462853385930309, "high_lr": 0.0009073684210526316, "low_lr": 1.8147368421052632e-05, "step": 176 }, { "epoch": 0.46548323471400394, "grad_norm": 0.7861801385879517, "learning_rate": 0.0009068421052631579, "loss": 1.8458, "step": 177 }, { "epoch": 0.46548323471400394, "high_lr": 0.0009068421052631579, "low_lr": 1.813684210526316e-05, "step": 177 }, { "epoch": 0.46548323471400394, "high_lr": 0.0009068421052631579, "low_lr": 1.813684210526316e-05, "step": 177 }, { "epoch": 0.46548323471400394, "high_lr": 0.0009068421052631579, "low_lr": 1.813684210526316e-05, "step": 177 }, { "epoch": 0.46548323471400394, "high_lr": 0.0009068421052631579, "low_lr": 1.813684210526316e-05, "step": 177 }, { "epoch": 0.46548323471400394, "high_lr": 0.0009068421052631579, "low_lr": 1.813684210526316e-05, "step": 177 }, { "epoch": 0.46548323471400394, "high_lr": 0.0009068421052631579, "low_lr": 1.813684210526316e-05, "step": 177 }, { "epoch": 0.46548323471400394, "high_lr": 0.0009068421052631579, "low_lr": 1.813684210526316e-05, "step": 177 }, { "epoch": 0.46548323471400394, "high_lr": 0.0009068421052631579, "low_lr": 1.813684210526316e-05, "step": 177 }, { "epoch": 0.46811308349769887, "grad_norm": 0.8178443312644958, "learning_rate": 0.0009063157894736842, "loss": 1.7825, "step": 178 }, { "epoch": 0.46811308349769887, "high_lr": 0.0009063157894736842, "low_lr": 1.8126315789473685e-05, "step": 178 }, { "epoch": 0.46811308349769887, "high_lr": 0.0009063157894736842, "low_lr": 1.8126315789473685e-05, "step": 178 }, { "epoch": 0.46811308349769887, "high_lr": 0.0009063157894736842, "low_lr": 1.8126315789473685e-05, "step": 178 }, { "epoch": 0.46811308349769887, "high_lr": 0.0009063157894736842, "low_lr": 1.8126315789473685e-05, "step": 178 }, { "epoch": 0.46811308349769887, "high_lr": 0.0009063157894736842, "low_lr": 1.8126315789473685e-05, "step": 178 }, { "epoch": 0.46811308349769887, "high_lr": 0.0009063157894736842, "low_lr": 1.8126315789473685e-05, "step": 178 }, { "epoch": 0.46811308349769887, "high_lr": 0.0009063157894736842, "low_lr": 1.8126315789473685e-05, "step": 178 }, { "epoch": 0.46811308349769887, "high_lr": 0.0009063157894736842, "low_lr": 1.8126315789473685e-05, "step": 178 }, { "epoch": 0.4707429322813938, "grad_norm": 0.8175384402275085, "learning_rate": 0.0009057894736842105, "loss": 1.9254, "step": 179 }, { "epoch": 0.4707429322813938, "high_lr": 0.0009057894736842105, "low_lr": 1.8115789473684213e-05, "step": 179 }, { "epoch": 0.4707429322813938, "high_lr": 0.0009057894736842105, "low_lr": 1.8115789473684213e-05, "step": 179 }, { "epoch": 0.4707429322813938, "high_lr": 0.0009057894736842105, "low_lr": 1.8115789473684213e-05, "step": 179 }, { "epoch": 0.4707429322813938, "high_lr": 0.0009057894736842105, "low_lr": 1.8115789473684213e-05, "step": 179 }, { "epoch": 0.4707429322813938, "high_lr": 0.0009057894736842105, "low_lr": 1.8115789473684213e-05, "step": 179 }, { "epoch": 0.4707429322813938, "high_lr": 0.0009057894736842105, "low_lr": 1.8115789473684213e-05, "step": 179 }, { "epoch": 0.4707429322813938, "high_lr": 0.0009057894736842105, "low_lr": 1.8115789473684213e-05, "step": 179 }, { "epoch": 0.4707429322813938, "high_lr": 0.0009057894736842105, "low_lr": 1.8115789473684213e-05, "step": 179 }, { "epoch": 0.47337278106508873, "grad_norm": 0.8413812518119812, "learning_rate": 0.0009052631578947369, "loss": 1.8219, "step": 180 }, { "epoch": 0.47337278106508873, "high_lr": 0.0009052631578947369, "low_lr": 1.810526315789474e-05, "step": 180 }, { "epoch": 0.47337278106508873, "high_lr": 0.0009052631578947369, "low_lr": 1.810526315789474e-05, "step": 180 }, { "epoch": 0.47337278106508873, "high_lr": 0.0009052631578947369, "low_lr": 1.810526315789474e-05, "step": 180 }, { "epoch": 0.47337278106508873, "high_lr": 0.0009052631578947369, "low_lr": 1.810526315789474e-05, "step": 180 }, { "epoch": 0.47337278106508873, "high_lr": 0.0009052631578947369, "low_lr": 1.810526315789474e-05, "step": 180 }, { "epoch": 0.47337278106508873, "high_lr": 0.0009052631578947369, "low_lr": 1.810526315789474e-05, "step": 180 }, { "epoch": 0.47337278106508873, "high_lr": 0.0009052631578947369, "low_lr": 1.810526315789474e-05, "step": 180 }, { "epoch": 0.47337278106508873, "high_lr": 0.0009052631578947369, "low_lr": 1.810526315789474e-05, "step": 180 }, { "epoch": 0.4760026298487837, "grad_norm": 0.8568588495254517, "learning_rate": 0.0009047368421052632, "loss": 1.8912, "step": 181 }, { "epoch": 0.4760026298487837, "high_lr": 0.0009047368421052632, "low_lr": 1.8094736842105265e-05, "step": 181 }, { "epoch": 0.4760026298487837, "high_lr": 0.0009047368421052632, "low_lr": 1.8094736842105265e-05, "step": 181 }, { "epoch": 0.4760026298487837, "high_lr": 0.0009047368421052632, "low_lr": 1.8094736842105265e-05, "step": 181 }, { "epoch": 0.4760026298487837, "high_lr": 0.0009047368421052632, "low_lr": 1.8094736842105265e-05, "step": 181 }, { "epoch": 0.4760026298487837, "high_lr": 0.0009047368421052632, "low_lr": 1.8094736842105265e-05, "step": 181 }, { "epoch": 0.4760026298487837, "high_lr": 0.0009047368421052632, "low_lr": 1.8094736842105265e-05, "step": 181 }, { "epoch": 0.4760026298487837, "high_lr": 0.0009047368421052632, "low_lr": 1.8094736842105265e-05, "step": 181 }, { "epoch": 0.4760026298487837, "high_lr": 0.0009047368421052632, "low_lr": 1.8094736842105265e-05, "step": 181 }, { "epoch": 0.47863247863247865, "grad_norm": 0.7578849792480469, "learning_rate": 0.0009042105263157895, "loss": 1.845, "step": 182 }, { "epoch": 0.47863247863247865, "high_lr": 0.0009042105263157895, "low_lr": 1.808421052631579e-05, "step": 182 }, { "epoch": 0.47863247863247865, "high_lr": 0.0009042105263157895, "low_lr": 1.808421052631579e-05, "step": 182 }, { "epoch": 0.47863247863247865, "high_lr": 0.0009042105263157895, "low_lr": 1.808421052631579e-05, "step": 182 }, { "epoch": 0.47863247863247865, "high_lr": 0.0009042105263157895, "low_lr": 1.808421052631579e-05, "step": 182 }, { "epoch": 0.47863247863247865, "high_lr": 0.0009042105263157895, "low_lr": 1.808421052631579e-05, "step": 182 }, { "epoch": 0.47863247863247865, "high_lr": 0.0009042105263157895, "low_lr": 1.808421052631579e-05, "step": 182 }, { "epoch": 0.47863247863247865, "high_lr": 0.0009042105263157895, "low_lr": 1.808421052631579e-05, "step": 182 }, { "epoch": 0.47863247863247865, "high_lr": 0.0009042105263157895, "low_lr": 1.808421052631579e-05, "step": 182 }, { "epoch": 0.4812623274161736, "grad_norm": 0.9012306332588196, "learning_rate": 0.0009036842105263158, "loss": 1.847, "step": 183 }, { "epoch": 0.4812623274161736, "high_lr": 0.0009036842105263158, "low_lr": 1.8073684210526318e-05, "step": 183 }, { "epoch": 0.4812623274161736, "high_lr": 0.0009036842105263158, "low_lr": 1.8073684210526318e-05, "step": 183 }, { "epoch": 0.4812623274161736, "high_lr": 0.0009036842105263158, "low_lr": 1.8073684210526318e-05, "step": 183 }, { "epoch": 0.4812623274161736, "high_lr": 0.0009036842105263158, "low_lr": 1.8073684210526318e-05, "step": 183 }, { "epoch": 0.4812623274161736, "high_lr": 0.0009036842105263158, "low_lr": 1.8073684210526318e-05, "step": 183 }, { "epoch": 0.4812623274161736, "high_lr": 0.0009036842105263158, "low_lr": 1.8073684210526318e-05, "step": 183 }, { "epoch": 0.4812623274161736, "high_lr": 0.0009036842105263158, "low_lr": 1.8073684210526318e-05, "step": 183 }, { "epoch": 0.4812623274161736, "high_lr": 0.0009036842105263158, "low_lr": 1.8073684210526318e-05, "step": 183 }, { "epoch": 0.4838921761998685, "grad_norm": 0.794228732585907, "learning_rate": 0.0009031578947368422, "loss": 1.8191, "step": 184 }, { "epoch": 0.4838921761998685, "high_lr": 0.0009031578947368422, "low_lr": 1.8063157894736846e-05, "step": 184 }, { "epoch": 0.4838921761998685, "high_lr": 0.0009031578947368422, "low_lr": 1.8063157894736846e-05, "step": 184 }, { "epoch": 0.4838921761998685, "high_lr": 0.0009031578947368422, "low_lr": 1.8063157894736846e-05, "step": 184 }, { "epoch": 0.4838921761998685, "high_lr": 0.0009031578947368422, "low_lr": 1.8063157894736846e-05, "step": 184 }, { "epoch": 0.4838921761998685, "high_lr": 0.0009031578947368422, "low_lr": 1.8063157894736846e-05, "step": 184 }, { "epoch": 0.4838921761998685, "high_lr": 0.0009031578947368422, "low_lr": 1.8063157894736846e-05, "step": 184 }, { "epoch": 0.4838921761998685, "high_lr": 0.0009031578947368422, "low_lr": 1.8063157894736846e-05, "step": 184 }, { "epoch": 0.4838921761998685, "high_lr": 0.0009031578947368422, "low_lr": 1.8063157894736846e-05, "step": 184 }, { "epoch": 0.48652202498356345, "grad_norm": 0.7716225981712341, "learning_rate": 0.0009026315789473684, "loss": 1.8249, "step": 185 }, { "epoch": 0.48652202498356345, "high_lr": 0.0009026315789473684, "low_lr": 1.805263157894737e-05, "step": 185 }, { "epoch": 0.48652202498356345, "high_lr": 0.0009026315789473684, "low_lr": 1.805263157894737e-05, "step": 185 }, { "epoch": 0.48652202498356345, "high_lr": 0.0009026315789473684, "low_lr": 1.805263157894737e-05, "step": 185 }, { "epoch": 0.48652202498356345, "high_lr": 0.0009026315789473684, "low_lr": 1.805263157894737e-05, "step": 185 }, { "epoch": 0.48652202498356345, "high_lr": 0.0009026315789473684, "low_lr": 1.805263157894737e-05, "step": 185 }, { "epoch": 0.48652202498356345, "high_lr": 0.0009026315789473684, "low_lr": 1.805263157894737e-05, "step": 185 }, { "epoch": 0.48652202498356345, "high_lr": 0.0009026315789473684, "low_lr": 1.805263157894737e-05, "step": 185 }, { "epoch": 0.48652202498356345, "high_lr": 0.0009026315789473684, "low_lr": 1.805263157894737e-05, "step": 185 }, { "epoch": 0.4891518737672584, "grad_norm": 0.8229754567146301, "learning_rate": 0.0009021052631578947, "loss": 1.8453, "step": 186 }, { "epoch": 0.4891518737672584, "high_lr": 0.0009021052631578947, "low_lr": 1.8042105263157895e-05, "step": 186 }, { "epoch": 0.4891518737672584, "high_lr": 0.0009021052631578947, "low_lr": 1.8042105263157895e-05, "step": 186 }, { "epoch": 0.4891518737672584, "high_lr": 0.0009021052631578947, "low_lr": 1.8042105263157895e-05, "step": 186 }, { "epoch": 0.4891518737672584, "high_lr": 0.0009021052631578947, "low_lr": 1.8042105263157895e-05, "step": 186 }, { "epoch": 0.4891518737672584, "high_lr": 0.0009021052631578947, "low_lr": 1.8042105263157895e-05, "step": 186 }, { "epoch": 0.4891518737672584, "high_lr": 0.0009021052631578947, "low_lr": 1.8042105263157895e-05, "step": 186 }, { "epoch": 0.4891518737672584, "high_lr": 0.0009021052631578947, "low_lr": 1.8042105263157895e-05, "step": 186 }, { "epoch": 0.4891518737672584, "high_lr": 0.0009021052631578947, "low_lr": 1.8042105263157895e-05, "step": 186 }, { "epoch": 0.4917817225509533, "grad_norm": 0.8289306163787842, "learning_rate": 0.000901578947368421, "loss": 1.86, "step": 187 }, { "epoch": 0.4917817225509533, "high_lr": 0.000901578947368421, "low_lr": 1.8031578947368423e-05, "step": 187 }, { "epoch": 0.4917817225509533, "high_lr": 0.000901578947368421, "low_lr": 1.8031578947368423e-05, "step": 187 }, { "epoch": 0.4917817225509533, "high_lr": 0.000901578947368421, "low_lr": 1.8031578947368423e-05, "step": 187 }, { "epoch": 0.4917817225509533, "high_lr": 0.000901578947368421, "low_lr": 1.8031578947368423e-05, "step": 187 }, { "epoch": 0.4917817225509533, "high_lr": 0.000901578947368421, "low_lr": 1.8031578947368423e-05, "step": 187 }, { "epoch": 0.4917817225509533, "high_lr": 0.000901578947368421, "low_lr": 1.8031578947368423e-05, "step": 187 }, { "epoch": 0.4917817225509533, "high_lr": 0.000901578947368421, "low_lr": 1.8031578947368423e-05, "step": 187 }, { "epoch": 0.4917817225509533, "high_lr": 0.000901578947368421, "low_lr": 1.8031578947368423e-05, "step": 187 }, { "epoch": 0.49441157133464825, "grad_norm": 0.7908790707588196, "learning_rate": 0.0009010526315789473, "loss": 1.8061, "step": 188 }, { "epoch": 0.49441157133464825, "high_lr": 0.0009010526315789473, "low_lr": 1.8021052631578948e-05, "step": 188 }, { "epoch": 0.49441157133464825, "high_lr": 0.0009010526315789473, "low_lr": 1.8021052631578948e-05, "step": 188 }, { "epoch": 0.49441157133464825, "high_lr": 0.0009010526315789473, "low_lr": 1.8021052631578948e-05, "step": 188 }, { "epoch": 0.49441157133464825, "high_lr": 0.0009010526315789473, "low_lr": 1.8021052631578948e-05, "step": 188 }, { "epoch": 0.49441157133464825, "high_lr": 0.0009010526315789473, "low_lr": 1.8021052631578948e-05, "step": 188 }, { "epoch": 0.49441157133464825, "high_lr": 0.0009010526315789473, "low_lr": 1.8021052631578948e-05, "step": 188 }, { "epoch": 0.49441157133464825, "high_lr": 0.0009010526315789473, "low_lr": 1.8021052631578948e-05, "step": 188 }, { "epoch": 0.49441157133464825, "high_lr": 0.0009010526315789473, "low_lr": 1.8021052631578948e-05, "step": 188 }, { "epoch": 0.4970414201183432, "grad_norm": 0.9673170447349548, "learning_rate": 0.0009005263157894738, "loss": 1.8517, "step": 189 }, { "epoch": 0.4970414201183432, "high_lr": 0.0009005263157894738, "low_lr": 1.8010526315789476e-05, "step": 189 }, { "epoch": 0.4970414201183432, "high_lr": 0.0009005263157894738, "low_lr": 1.8010526315789476e-05, "step": 189 }, { "epoch": 0.4970414201183432, "high_lr": 0.0009005263157894738, "low_lr": 1.8010526315789476e-05, "step": 189 }, { "epoch": 0.4970414201183432, "high_lr": 0.0009005263157894738, "low_lr": 1.8010526315789476e-05, "step": 189 }, { "epoch": 0.4970414201183432, "high_lr": 0.0009005263157894738, "low_lr": 1.8010526315789476e-05, "step": 189 }, { "epoch": 0.4970414201183432, "high_lr": 0.0009005263157894738, "low_lr": 1.8010526315789476e-05, "step": 189 }, { "epoch": 0.4970414201183432, "high_lr": 0.0009005263157894738, "low_lr": 1.8010526315789476e-05, "step": 189 }, { "epoch": 0.4970414201183432, "high_lr": 0.0009005263157894738, "low_lr": 1.8010526315789476e-05, "step": 189 }, { "epoch": 0.4996712689020381, "grad_norm": 0.862890899181366, "learning_rate": 0.0009000000000000001, "loss": 1.8615, "step": 190 }, { "epoch": 0.4996712689020381, "high_lr": 0.0009000000000000001, "low_lr": 1.8e-05, "step": 190 }, { "epoch": 0.4996712689020381, "high_lr": 0.0009000000000000001, "low_lr": 1.8e-05, "step": 190 }, { "epoch": 0.4996712689020381, "high_lr": 0.0009000000000000001, "low_lr": 1.8e-05, "step": 190 }, { "epoch": 0.4996712689020381, "high_lr": 0.0009000000000000001, "low_lr": 1.8e-05, "step": 190 }, { "epoch": 0.4996712689020381, "high_lr": 0.0009000000000000001, "low_lr": 1.8e-05, "step": 190 }, { "epoch": 0.4996712689020381, "high_lr": 0.0009000000000000001, "low_lr": 1.8e-05, "step": 190 }, { "epoch": 0.4996712689020381, "high_lr": 0.0009000000000000001, "low_lr": 1.8e-05, "step": 190 }, { "epoch": 0.4996712689020381, "high_lr": 0.0009000000000000001, "low_lr": 1.8e-05, "step": 190 }, { "epoch": 0.502301117685733, "grad_norm": 0.8507909178733826, "learning_rate": 0.0008994736842105264, "loss": 1.8215, "step": 191 }, { "epoch": 0.502301117685733, "high_lr": 0.0008994736842105264, "low_lr": 1.798947368421053e-05, "step": 191 }, { "epoch": 0.502301117685733, "high_lr": 0.0008994736842105264, "low_lr": 1.798947368421053e-05, "step": 191 }, { "epoch": 0.502301117685733, "high_lr": 0.0008994736842105264, "low_lr": 1.798947368421053e-05, "step": 191 }, { "epoch": 0.502301117685733, "high_lr": 0.0008994736842105264, "low_lr": 1.798947368421053e-05, "step": 191 }, { "epoch": 0.502301117685733, "high_lr": 0.0008994736842105264, "low_lr": 1.798947368421053e-05, "step": 191 }, { "epoch": 0.502301117685733, "high_lr": 0.0008994736842105264, "low_lr": 1.798947368421053e-05, "step": 191 }, { "epoch": 0.502301117685733, "high_lr": 0.0008994736842105264, "low_lr": 1.798947368421053e-05, "step": 191 }, { "epoch": 0.502301117685733, "high_lr": 0.0008994736842105264, "low_lr": 1.798947368421053e-05, "step": 191 }, { "epoch": 0.504930966469428, "grad_norm": 0.7558906674385071, "learning_rate": 0.0008989473684210527, "loss": 1.7142, "step": 192 }, { "epoch": 0.504930966469428, "high_lr": 0.0008989473684210527, "low_lr": 1.7978947368421053e-05, "step": 192 }, { "epoch": 0.504930966469428, "high_lr": 0.0008989473684210527, "low_lr": 1.7978947368421053e-05, "step": 192 }, { "epoch": 0.504930966469428, "high_lr": 0.0008989473684210527, "low_lr": 1.7978947368421053e-05, "step": 192 }, { "epoch": 0.504930966469428, "high_lr": 0.0008989473684210527, "low_lr": 1.7978947368421053e-05, "step": 192 }, { "epoch": 0.504930966469428, "high_lr": 0.0008989473684210527, "low_lr": 1.7978947368421053e-05, "step": 192 }, { "epoch": 0.504930966469428, "high_lr": 0.0008989473684210527, "low_lr": 1.7978947368421053e-05, "step": 192 }, { "epoch": 0.504930966469428, "high_lr": 0.0008989473684210527, "low_lr": 1.7978947368421053e-05, "step": 192 }, { "epoch": 0.504930966469428, "high_lr": 0.0008989473684210527, "low_lr": 1.7978947368421053e-05, "step": 192 }, { "epoch": 0.5075608152531229, "grad_norm": 0.810209333896637, "learning_rate": 0.0008984210526315789, "loss": 1.8301, "step": 193 }, { "epoch": 0.5075608152531229, "high_lr": 0.0008984210526315789, "low_lr": 1.7968421052631578e-05, "step": 193 }, { "epoch": 0.5075608152531229, "high_lr": 0.0008984210526315789, "low_lr": 1.7968421052631578e-05, "step": 193 }, { "epoch": 0.5075608152531229, "high_lr": 0.0008984210526315789, "low_lr": 1.7968421052631578e-05, "step": 193 }, { "epoch": 0.5075608152531229, "high_lr": 0.0008984210526315789, "low_lr": 1.7968421052631578e-05, "step": 193 }, { "epoch": 0.5075608152531229, "high_lr": 0.0008984210526315789, "low_lr": 1.7968421052631578e-05, "step": 193 }, { "epoch": 0.5075608152531229, "high_lr": 0.0008984210526315789, "low_lr": 1.7968421052631578e-05, "step": 193 }, { "epoch": 0.5075608152531229, "high_lr": 0.0008984210526315789, "low_lr": 1.7968421052631578e-05, "step": 193 }, { "epoch": 0.5075608152531229, "high_lr": 0.0008984210526315789, "low_lr": 1.7968421052631578e-05, "step": 193 }, { "epoch": 0.5101906640368179, "grad_norm": 0.7663288712501526, "learning_rate": 0.0008978947368421053, "loss": 1.8105, "step": 194 }, { "epoch": 0.5101906640368179, "high_lr": 0.0008978947368421053, "low_lr": 1.795789473684211e-05, "step": 194 }, { "epoch": 0.5101906640368179, "high_lr": 0.0008978947368421053, "low_lr": 1.795789473684211e-05, "step": 194 }, { "epoch": 0.5101906640368179, "high_lr": 0.0008978947368421053, "low_lr": 1.795789473684211e-05, "step": 194 }, { "epoch": 0.5101906640368179, "high_lr": 0.0008978947368421053, "low_lr": 1.795789473684211e-05, "step": 194 }, { "epoch": 0.5101906640368179, "high_lr": 0.0008978947368421053, "low_lr": 1.795789473684211e-05, "step": 194 }, { "epoch": 0.5101906640368179, "high_lr": 0.0008978947368421053, "low_lr": 1.795789473684211e-05, "step": 194 }, { "epoch": 0.5101906640368179, "high_lr": 0.0008978947368421053, "low_lr": 1.795789473684211e-05, "step": 194 }, { "epoch": 0.5101906640368179, "high_lr": 0.0008978947368421053, "low_lr": 1.795789473684211e-05, "step": 194 }, { "epoch": 0.5128205128205128, "grad_norm": 0.8246523141860962, "learning_rate": 0.0008973684210526316, "loss": 1.8246, "step": 195 }, { "epoch": 0.5128205128205128, "high_lr": 0.0008973684210526316, "low_lr": 1.7947368421052634e-05, "step": 195 }, { "epoch": 0.5128205128205128, "high_lr": 0.0008973684210526316, "low_lr": 1.7947368421052634e-05, "step": 195 }, { "epoch": 0.5128205128205128, "high_lr": 0.0008973684210526316, "low_lr": 1.7947368421052634e-05, "step": 195 }, { "epoch": 0.5128205128205128, "high_lr": 0.0008973684210526316, "low_lr": 1.7947368421052634e-05, "step": 195 }, { "epoch": 0.5128205128205128, "high_lr": 0.0008973684210526316, "low_lr": 1.7947368421052634e-05, "step": 195 }, { "epoch": 0.5128205128205128, "high_lr": 0.0008973684210526316, "low_lr": 1.7947368421052634e-05, "step": 195 }, { "epoch": 0.5128205128205128, "high_lr": 0.0008973684210526316, "low_lr": 1.7947368421052634e-05, "step": 195 }, { "epoch": 0.5128205128205128, "high_lr": 0.0008973684210526316, "low_lr": 1.7947368421052634e-05, "step": 195 }, { "epoch": 0.5154503616042078, "grad_norm": 0.8492828607559204, "learning_rate": 0.0008968421052631579, "loss": 1.8492, "step": 196 }, { "epoch": 0.5154503616042078, "high_lr": 0.0008968421052631579, "low_lr": 1.793684210526316e-05, "step": 196 }, { "epoch": 0.5154503616042078, "high_lr": 0.0008968421052631579, "low_lr": 1.793684210526316e-05, "step": 196 }, { "epoch": 0.5154503616042078, "high_lr": 0.0008968421052631579, "low_lr": 1.793684210526316e-05, "step": 196 }, { "epoch": 0.5154503616042078, "high_lr": 0.0008968421052631579, "low_lr": 1.793684210526316e-05, "step": 196 }, { "epoch": 0.5154503616042078, "high_lr": 0.0008968421052631579, "low_lr": 1.793684210526316e-05, "step": 196 }, { "epoch": 0.5154503616042078, "high_lr": 0.0008968421052631579, "low_lr": 1.793684210526316e-05, "step": 196 }, { "epoch": 0.5154503616042078, "high_lr": 0.0008968421052631579, "low_lr": 1.793684210526316e-05, "step": 196 }, { "epoch": 0.5154503616042078, "high_lr": 0.0008968421052631579, "low_lr": 1.793684210526316e-05, "step": 196 }, { "epoch": 0.5180802103879028, "grad_norm": 0.7884014248847961, "learning_rate": 0.0008963157894736842, "loss": 1.8289, "step": 197 }, { "epoch": 0.5180802103879028, "high_lr": 0.0008963157894736842, "low_lr": 1.7926315789473686e-05, "step": 197 }, { "epoch": 0.5180802103879028, "high_lr": 0.0008963157894736842, "low_lr": 1.7926315789473686e-05, "step": 197 }, { "epoch": 0.5180802103879028, "high_lr": 0.0008963157894736842, "low_lr": 1.7926315789473686e-05, "step": 197 }, { "epoch": 0.5180802103879028, "high_lr": 0.0008963157894736842, "low_lr": 1.7926315789473686e-05, "step": 197 }, { "epoch": 0.5180802103879028, "high_lr": 0.0008963157894736842, "low_lr": 1.7926315789473686e-05, "step": 197 }, { "epoch": 0.5180802103879028, "high_lr": 0.0008963157894736842, "low_lr": 1.7926315789473686e-05, "step": 197 }, { "epoch": 0.5180802103879028, "high_lr": 0.0008963157894736842, "low_lr": 1.7926315789473686e-05, "step": 197 }, { "epoch": 0.5180802103879028, "high_lr": 0.0008963157894736842, "low_lr": 1.7926315789473686e-05, "step": 197 }, { "epoch": 0.5207100591715976, "grad_norm": 0.878328800201416, "learning_rate": 0.0008957894736842106, "loss": 1.7901, "step": 198 }, { "epoch": 0.5207100591715976, "high_lr": 0.0008957894736842106, "low_lr": 1.7915789473684214e-05, "step": 198 }, { "epoch": 0.5207100591715976, "high_lr": 0.0008957894736842106, "low_lr": 1.7915789473684214e-05, "step": 198 }, { "epoch": 0.5207100591715976, "high_lr": 0.0008957894736842106, "low_lr": 1.7915789473684214e-05, "step": 198 }, { "epoch": 0.5207100591715976, "high_lr": 0.0008957894736842106, "low_lr": 1.7915789473684214e-05, "step": 198 }, { "epoch": 0.5207100591715976, "high_lr": 0.0008957894736842106, "low_lr": 1.7915789473684214e-05, "step": 198 }, { "epoch": 0.5207100591715976, "high_lr": 0.0008957894736842106, "low_lr": 1.7915789473684214e-05, "step": 198 }, { "epoch": 0.5207100591715976, "high_lr": 0.0008957894736842106, "low_lr": 1.7915789473684214e-05, "step": 198 }, { "epoch": 0.5207100591715976, "high_lr": 0.0008957894736842106, "low_lr": 1.7915789473684214e-05, "step": 198 }, { "epoch": 0.5233399079552926, "grad_norm": 0.9260053038597107, "learning_rate": 0.0008952631578947369, "loss": 1.9373, "step": 199 }, { "epoch": 0.5233399079552926, "high_lr": 0.0008952631578947369, "low_lr": 1.790526315789474e-05, "step": 199 }, { "epoch": 0.5233399079552926, "high_lr": 0.0008952631578947369, "low_lr": 1.790526315789474e-05, "step": 199 }, { "epoch": 0.5233399079552926, "high_lr": 0.0008952631578947369, "low_lr": 1.790526315789474e-05, "step": 199 }, { "epoch": 0.5233399079552926, "high_lr": 0.0008952631578947369, "low_lr": 1.790526315789474e-05, "step": 199 }, { "epoch": 0.5233399079552926, "high_lr": 0.0008952631578947369, "low_lr": 1.790526315789474e-05, "step": 199 }, { "epoch": 0.5233399079552926, "high_lr": 0.0008952631578947369, "low_lr": 1.790526315789474e-05, "step": 199 }, { "epoch": 0.5233399079552926, "high_lr": 0.0008952631578947369, "low_lr": 1.790526315789474e-05, "step": 199 }, { "epoch": 0.5233399079552926, "high_lr": 0.0008952631578947369, "low_lr": 1.790526315789474e-05, "step": 199 }, { "epoch": 0.5259697567389875, "grad_norm": 0.9091192483901978, "learning_rate": 0.0008947368421052632, "loss": 1.8995, "step": 200 }, { "epoch": 0.5259697567389875, "high_lr": 0.0008947368421052632, "low_lr": 1.7894736842105264e-05, "step": 200 }, { "epoch": 0.5259697567389875, "high_lr": 0.0008947368421052632, "low_lr": 1.7894736842105264e-05, "step": 200 }, { "epoch": 0.5259697567389875, "high_lr": 0.0008947368421052632, "low_lr": 1.7894736842105264e-05, "step": 200 }, { "epoch": 0.5259697567389875, "high_lr": 0.0008947368421052632, "low_lr": 1.7894736842105264e-05, "step": 200 }, { "epoch": 0.5259697567389875, "high_lr": 0.0008947368421052632, "low_lr": 1.7894736842105264e-05, "step": 200 }, { "epoch": 0.5259697567389875, "high_lr": 0.0008947368421052632, "low_lr": 1.7894736842105264e-05, "step": 200 }, { "epoch": 0.5259697567389875, "high_lr": 0.0008947368421052632, "low_lr": 1.7894736842105264e-05, "step": 200 }, { "epoch": 0.5259697567389875, "high_lr": 0.0008947368421052632, "low_lr": 1.7894736842105264e-05, "step": 200 }, { "epoch": 0.5285996055226825, "grad_norm": 0.8205613493919373, "learning_rate": 0.0008942105263157894, "loss": 1.8548, "step": 201 }, { "epoch": 0.5285996055226825, "high_lr": 0.0008942105263157894, "low_lr": 1.7884210526315792e-05, "step": 201 }, { "epoch": 0.5285996055226825, "high_lr": 0.0008942105263157894, "low_lr": 1.7884210526315792e-05, "step": 201 }, { "epoch": 0.5285996055226825, "high_lr": 0.0008942105263157894, "low_lr": 1.7884210526315792e-05, "step": 201 }, { "epoch": 0.5285996055226825, "high_lr": 0.0008942105263157894, "low_lr": 1.7884210526315792e-05, "step": 201 }, { "epoch": 0.5285996055226825, "high_lr": 0.0008942105263157894, "low_lr": 1.7884210526315792e-05, "step": 201 }, { "epoch": 0.5285996055226825, "high_lr": 0.0008942105263157894, "low_lr": 1.7884210526315792e-05, "step": 201 }, { "epoch": 0.5285996055226825, "high_lr": 0.0008942105263157894, "low_lr": 1.7884210526315792e-05, "step": 201 }, { "epoch": 0.5285996055226825, "high_lr": 0.0008942105263157894, "low_lr": 1.7884210526315792e-05, "step": 201 }, { "epoch": 0.5312294543063774, "grad_norm": 0.8982312083244324, "learning_rate": 0.0008936842105263157, "loss": 1.8409, "step": 202 }, { "epoch": 0.5312294543063774, "high_lr": 0.0008936842105263157, "low_lr": 1.7873684210526316e-05, "step": 202 }, { "epoch": 0.5312294543063774, "high_lr": 0.0008936842105263157, "low_lr": 1.7873684210526316e-05, "step": 202 }, { "epoch": 0.5312294543063774, "high_lr": 0.0008936842105263157, "low_lr": 1.7873684210526316e-05, "step": 202 }, { "epoch": 0.5312294543063774, "high_lr": 0.0008936842105263157, "low_lr": 1.7873684210526316e-05, "step": 202 }, { "epoch": 0.5312294543063774, "high_lr": 0.0008936842105263157, "low_lr": 1.7873684210526316e-05, "step": 202 }, { "epoch": 0.5312294543063774, "high_lr": 0.0008936842105263157, "low_lr": 1.7873684210526316e-05, "step": 202 }, { "epoch": 0.5312294543063774, "high_lr": 0.0008936842105263157, "low_lr": 1.7873684210526316e-05, "step": 202 }, { "epoch": 0.5312294543063774, "high_lr": 0.0008936842105263157, "low_lr": 1.7873684210526316e-05, "step": 202 }, { "epoch": 0.5338593030900723, "grad_norm": 0.8637439012527466, "learning_rate": 0.0008931578947368421, "loss": 1.8147, "step": 203 }, { "epoch": 0.5338593030900723, "high_lr": 0.0008931578947368421, "low_lr": 1.7863157894736844e-05, "step": 203 }, { "epoch": 0.5338593030900723, "high_lr": 0.0008931578947368421, "low_lr": 1.7863157894736844e-05, "step": 203 }, { "epoch": 0.5338593030900723, "high_lr": 0.0008931578947368421, "low_lr": 1.7863157894736844e-05, "step": 203 }, { "epoch": 0.5338593030900723, "high_lr": 0.0008931578947368421, "low_lr": 1.7863157894736844e-05, "step": 203 }, { "epoch": 0.5338593030900723, "high_lr": 0.0008931578947368421, "low_lr": 1.7863157894736844e-05, "step": 203 }, { "epoch": 0.5338593030900723, "high_lr": 0.0008931578947368421, "low_lr": 1.7863157894736844e-05, "step": 203 }, { "epoch": 0.5338593030900723, "high_lr": 0.0008931578947368421, "low_lr": 1.7863157894736844e-05, "step": 203 }, { "epoch": 0.5338593030900723, "high_lr": 0.0008931578947368421, "low_lr": 1.7863157894736844e-05, "step": 203 }, { "epoch": 0.5364891518737672, "grad_norm": 0.8085411787033081, "learning_rate": 0.0008926315789473684, "loss": 1.7611, "step": 204 }, { "epoch": 0.5364891518737672, "high_lr": 0.0008926315789473684, "low_lr": 1.785263157894737e-05, "step": 204 }, { "epoch": 0.5364891518737672, "high_lr": 0.0008926315789473684, "low_lr": 1.785263157894737e-05, "step": 204 }, { "epoch": 0.5364891518737672, "high_lr": 0.0008926315789473684, "low_lr": 1.785263157894737e-05, "step": 204 }, { "epoch": 0.5364891518737672, "high_lr": 0.0008926315789473684, "low_lr": 1.785263157894737e-05, "step": 204 }, { "epoch": 0.5364891518737672, "high_lr": 0.0008926315789473684, "low_lr": 1.785263157894737e-05, "step": 204 }, { "epoch": 0.5364891518737672, "high_lr": 0.0008926315789473684, "low_lr": 1.785263157894737e-05, "step": 204 }, { "epoch": 0.5364891518737672, "high_lr": 0.0008926315789473684, "low_lr": 1.785263157894737e-05, "step": 204 }, { "epoch": 0.5364891518737672, "high_lr": 0.0008926315789473684, "low_lr": 1.785263157894737e-05, "step": 204 }, { "epoch": 0.5391190006574622, "grad_norm": 0.9032369256019592, "learning_rate": 0.0008921052631578948, "loss": 1.7992, "step": 205 }, { "epoch": 0.5391190006574622, "high_lr": 0.0008921052631578948, "low_lr": 1.7842105263157897e-05, "step": 205 }, { "epoch": 0.5391190006574622, "high_lr": 0.0008921052631578948, "low_lr": 1.7842105263157897e-05, "step": 205 }, { "epoch": 0.5391190006574622, "high_lr": 0.0008921052631578948, "low_lr": 1.7842105263157897e-05, "step": 205 }, { "epoch": 0.5391190006574622, "high_lr": 0.0008921052631578948, "low_lr": 1.7842105263157897e-05, "step": 205 }, { "epoch": 0.5391190006574622, "high_lr": 0.0008921052631578948, "low_lr": 1.7842105263157897e-05, "step": 205 }, { "epoch": 0.5391190006574622, "high_lr": 0.0008921052631578948, "low_lr": 1.7842105263157897e-05, "step": 205 }, { "epoch": 0.5391190006574622, "high_lr": 0.0008921052631578948, "low_lr": 1.7842105263157897e-05, "step": 205 }, { "epoch": 0.5391190006574622, "high_lr": 0.0008921052631578948, "low_lr": 1.7842105263157897e-05, "step": 205 }, { "epoch": 0.5417488494411571, "grad_norm": 0.9390583038330078, "learning_rate": 0.0008915789473684211, "loss": 1.8455, "step": 206 }, { "epoch": 0.5417488494411571, "high_lr": 0.0008915789473684211, "low_lr": 1.7831578947368422e-05, "step": 206 }, { "epoch": 0.5417488494411571, "high_lr": 0.0008915789473684211, "low_lr": 1.7831578947368422e-05, "step": 206 }, { "epoch": 0.5417488494411571, "high_lr": 0.0008915789473684211, "low_lr": 1.7831578947368422e-05, "step": 206 }, { "epoch": 0.5417488494411571, "high_lr": 0.0008915789473684211, "low_lr": 1.7831578947368422e-05, "step": 206 }, { "epoch": 0.5417488494411571, "high_lr": 0.0008915789473684211, "low_lr": 1.7831578947368422e-05, "step": 206 }, { "epoch": 0.5417488494411571, "high_lr": 0.0008915789473684211, "low_lr": 1.7831578947368422e-05, "step": 206 }, { "epoch": 0.5417488494411571, "high_lr": 0.0008915789473684211, "low_lr": 1.7831578947368422e-05, "step": 206 }, { "epoch": 0.5417488494411571, "high_lr": 0.0008915789473684211, "low_lr": 1.7831578947368422e-05, "step": 206 }, { "epoch": 0.5443786982248521, "grad_norm": 0.8497365117073059, "learning_rate": 0.0008910526315789474, "loss": 1.7379, "step": 207 }, { "epoch": 0.5443786982248521, "high_lr": 0.0008910526315789474, "low_lr": 1.7821052631578946e-05, "step": 207 }, { "epoch": 0.5443786982248521, "high_lr": 0.0008910526315789474, "low_lr": 1.7821052631578946e-05, "step": 207 }, { "epoch": 0.5443786982248521, "high_lr": 0.0008910526315789474, "low_lr": 1.7821052631578946e-05, "step": 207 }, { "epoch": 0.5443786982248521, "high_lr": 0.0008910526315789474, "low_lr": 1.7821052631578946e-05, "step": 207 }, { "epoch": 0.5443786982248521, "high_lr": 0.0008910526315789474, "low_lr": 1.7821052631578946e-05, "step": 207 }, { "epoch": 0.5443786982248521, "high_lr": 0.0008910526315789474, "low_lr": 1.7821052631578946e-05, "step": 207 }, { "epoch": 0.5443786982248521, "high_lr": 0.0008910526315789474, "low_lr": 1.7821052631578946e-05, "step": 207 }, { "epoch": 0.5443786982248521, "high_lr": 0.0008910526315789474, "low_lr": 1.7821052631578946e-05, "step": 207 }, { "epoch": 0.5470085470085471, "grad_norm": 0.9147151112556458, "learning_rate": 0.0008905263157894738, "loss": 1.7945, "step": 208 }, { "epoch": 0.5470085470085471, "high_lr": 0.0008905263157894738, "low_lr": 1.7810526315789474e-05, "step": 208 }, { "epoch": 0.5470085470085471, "high_lr": 0.0008905263157894738, "low_lr": 1.7810526315789474e-05, "step": 208 }, { "epoch": 0.5470085470085471, "high_lr": 0.0008905263157894738, "low_lr": 1.7810526315789474e-05, "step": 208 }, { "epoch": 0.5470085470085471, "high_lr": 0.0008905263157894738, "low_lr": 1.7810526315789474e-05, "step": 208 }, { "epoch": 0.5470085470085471, "high_lr": 0.0008905263157894738, "low_lr": 1.7810526315789474e-05, "step": 208 }, { "epoch": 0.5470085470085471, "high_lr": 0.0008905263157894738, "low_lr": 1.7810526315789474e-05, "step": 208 }, { "epoch": 0.5470085470085471, "high_lr": 0.0008905263157894738, "low_lr": 1.7810526315789474e-05, "step": 208 }, { "epoch": 0.5470085470085471, "high_lr": 0.0008905263157894738, "low_lr": 1.7810526315789474e-05, "step": 208 }, { "epoch": 0.5496383957922419, "grad_norm": 0.7853782176971436, "learning_rate": 0.0008900000000000001, "loss": 1.7118, "step": 209 }, { "epoch": 0.5496383957922419, "high_lr": 0.0008900000000000001, "low_lr": 1.7800000000000002e-05, "step": 209 }, { "epoch": 0.5496383957922419, "high_lr": 0.0008900000000000001, "low_lr": 1.7800000000000002e-05, "step": 209 }, { "epoch": 0.5496383957922419, "high_lr": 0.0008900000000000001, "low_lr": 1.7800000000000002e-05, "step": 209 }, { "epoch": 0.5496383957922419, "high_lr": 0.0008900000000000001, "low_lr": 1.7800000000000002e-05, "step": 209 }, { "epoch": 0.5496383957922419, "high_lr": 0.0008900000000000001, "low_lr": 1.7800000000000002e-05, "step": 209 }, { "epoch": 0.5496383957922419, "high_lr": 0.0008900000000000001, "low_lr": 1.7800000000000002e-05, "step": 209 }, { "epoch": 0.5496383957922419, "high_lr": 0.0008900000000000001, "low_lr": 1.7800000000000002e-05, "step": 209 }, { "epoch": 0.5496383957922419, "high_lr": 0.0008900000000000001, "low_lr": 1.7800000000000002e-05, "step": 209 }, { "epoch": 0.5522682445759369, "grad_norm": 0.7360106110572815, "learning_rate": 0.0008894736842105263, "loss": 1.7677, "step": 210 }, { "epoch": 0.5522682445759369, "high_lr": 0.0008894736842105263, "low_lr": 1.7789473684210527e-05, "step": 210 }, { "epoch": 0.5522682445759369, "high_lr": 0.0008894736842105263, "low_lr": 1.7789473684210527e-05, "step": 210 }, { "epoch": 0.5522682445759369, "high_lr": 0.0008894736842105263, "low_lr": 1.7789473684210527e-05, "step": 210 }, { "epoch": 0.5522682445759369, "high_lr": 0.0008894736842105263, "low_lr": 1.7789473684210527e-05, "step": 210 }, { "epoch": 0.5522682445759369, "high_lr": 0.0008894736842105263, "low_lr": 1.7789473684210527e-05, "step": 210 }, { "epoch": 0.5522682445759369, "high_lr": 0.0008894736842105263, "low_lr": 1.7789473684210527e-05, "step": 210 }, { "epoch": 0.5522682445759369, "high_lr": 0.0008894736842105263, "low_lr": 1.7789473684210527e-05, "step": 210 }, { "epoch": 0.5522682445759369, "high_lr": 0.0008894736842105263, "low_lr": 1.7789473684210527e-05, "step": 210 }, { "epoch": 0.5548980933596318, "grad_norm": 0.8855023980140686, "learning_rate": 0.0008889473684210526, "loss": 1.7806, "step": 211 }, { "epoch": 0.5548980933596318, "high_lr": 0.0008889473684210526, "low_lr": 1.7778947368421055e-05, "step": 211 }, { "epoch": 0.5548980933596318, "high_lr": 0.0008889473684210526, "low_lr": 1.7778947368421055e-05, "step": 211 }, { "epoch": 0.5548980933596318, "high_lr": 0.0008889473684210526, "low_lr": 1.7778947368421055e-05, "step": 211 }, { "epoch": 0.5548980933596318, "high_lr": 0.0008889473684210526, "low_lr": 1.7778947368421055e-05, "step": 211 }, { "epoch": 0.5548980933596318, "high_lr": 0.0008889473684210526, "low_lr": 1.7778947368421055e-05, "step": 211 }, { "epoch": 0.5548980933596318, "high_lr": 0.0008889473684210526, "low_lr": 1.7778947368421055e-05, "step": 211 }, { "epoch": 0.5548980933596318, "high_lr": 0.0008889473684210526, "low_lr": 1.7778947368421055e-05, "step": 211 }, { "epoch": 0.5548980933596318, "high_lr": 0.0008889473684210526, "low_lr": 1.7778947368421055e-05, "step": 211 }, { "epoch": 0.5575279421433268, "grad_norm": 0.8010016679763794, "learning_rate": 0.000888421052631579, "loss": 1.7559, "step": 212 }, { "epoch": 0.5575279421433268, "high_lr": 0.000888421052631579, "low_lr": 1.7768421052631583e-05, "step": 212 }, { "epoch": 0.5575279421433268, "high_lr": 0.000888421052631579, "low_lr": 1.7768421052631583e-05, "step": 212 }, { "epoch": 0.5575279421433268, "high_lr": 0.000888421052631579, "low_lr": 1.7768421052631583e-05, "step": 212 }, { "epoch": 0.5575279421433268, "high_lr": 0.000888421052631579, "low_lr": 1.7768421052631583e-05, "step": 212 }, { "epoch": 0.5575279421433268, "high_lr": 0.000888421052631579, "low_lr": 1.7768421052631583e-05, "step": 212 }, { "epoch": 0.5575279421433268, "high_lr": 0.000888421052631579, "low_lr": 1.7768421052631583e-05, "step": 212 }, { "epoch": 0.5575279421433268, "high_lr": 0.000888421052631579, "low_lr": 1.7768421052631583e-05, "step": 212 }, { "epoch": 0.5575279421433268, "high_lr": 0.000888421052631579, "low_lr": 1.7768421052631583e-05, "step": 212 }, { "epoch": 0.5601577909270217, "grad_norm": 0.9242972135543823, "learning_rate": 0.0008878947368421053, "loss": 1.7538, "step": 213 }, { "epoch": 0.5601577909270217, "high_lr": 0.0008878947368421053, "low_lr": 1.7757894736842108e-05, "step": 213 }, { "epoch": 0.5601577909270217, "high_lr": 0.0008878947368421053, "low_lr": 1.7757894736842108e-05, "step": 213 }, { "epoch": 0.5601577909270217, "high_lr": 0.0008878947368421053, "low_lr": 1.7757894736842108e-05, "step": 213 }, { "epoch": 0.5601577909270217, "high_lr": 0.0008878947368421053, "low_lr": 1.7757894736842108e-05, "step": 213 }, { "epoch": 0.5601577909270217, "high_lr": 0.0008878947368421053, "low_lr": 1.7757894736842108e-05, "step": 213 }, { "epoch": 0.5601577909270217, "high_lr": 0.0008878947368421053, "low_lr": 1.7757894736842108e-05, "step": 213 }, { "epoch": 0.5601577909270217, "high_lr": 0.0008878947368421053, "low_lr": 1.7757894736842108e-05, "step": 213 }, { "epoch": 0.5601577909270217, "high_lr": 0.0008878947368421053, "low_lr": 1.7757894736842108e-05, "step": 213 }, { "epoch": 0.5627876397107167, "grad_norm": 0.8491708636283875, "learning_rate": 0.0008873684210526316, "loss": 1.8032, "step": 214 }, { "epoch": 0.5627876397107167, "high_lr": 0.0008873684210526316, "low_lr": 1.7747368421052632e-05, "step": 214 }, { "epoch": 0.5627876397107167, "high_lr": 0.0008873684210526316, "low_lr": 1.7747368421052632e-05, "step": 214 }, { "epoch": 0.5627876397107167, "high_lr": 0.0008873684210526316, "low_lr": 1.7747368421052632e-05, "step": 214 }, { "epoch": 0.5627876397107167, "high_lr": 0.0008873684210526316, "low_lr": 1.7747368421052632e-05, "step": 214 }, { "epoch": 0.5627876397107167, "high_lr": 0.0008873684210526316, "low_lr": 1.7747368421052632e-05, "step": 214 }, { "epoch": 0.5627876397107167, "high_lr": 0.0008873684210526316, "low_lr": 1.7747368421052632e-05, "step": 214 }, { "epoch": 0.5627876397107167, "high_lr": 0.0008873684210526316, "low_lr": 1.7747368421052632e-05, "step": 214 }, { "epoch": 0.5627876397107167, "high_lr": 0.0008873684210526316, "low_lr": 1.7747368421052632e-05, "step": 214 }, { "epoch": 0.5654174884944115, "grad_norm": 0.8941650390625, "learning_rate": 0.0008868421052631579, "loss": 1.8055, "step": 215 }, { "epoch": 0.5654174884944115, "high_lr": 0.0008868421052631579, "low_lr": 1.773684210526316e-05, "step": 215 }, { "epoch": 0.5654174884944115, "high_lr": 0.0008868421052631579, "low_lr": 1.773684210526316e-05, "step": 215 }, { "epoch": 0.5654174884944115, "high_lr": 0.0008868421052631579, "low_lr": 1.773684210526316e-05, "step": 215 }, { "epoch": 0.5654174884944115, "high_lr": 0.0008868421052631579, "low_lr": 1.773684210526316e-05, "step": 215 }, { "epoch": 0.5654174884944115, "high_lr": 0.0008868421052631579, "low_lr": 1.773684210526316e-05, "step": 215 }, { "epoch": 0.5654174884944115, "high_lr": 0.0008868421052631579, "low_lr": 1.773684210526316e-05, "step": 215 }, { "epoch": 0.5654174884944115, "high_lr": 0.0008868421052631579, "low_lr": 1.773684210526316e-05, "step": 215 }, { "epoch": 0.5654174884944115, "high_lr": 0.0008868421052631579, "low_lr": 1.773684210526316e-05, "step": 215 }, { "epoch": 0.5680473372781065, "grad_norm": 0.8661569356918335, "learning_rate": 0.0008863157894736842, "loss": 1.7234, "step": 216 }, { "epoch": 0.5680473372781065, "high_lr": 0.0008863157894736842, "low_lr": 1.7726315789473685e-05, "step": 216 }, { "epoch": 0.5680473372781065, "high_lr": 0.0008863157894736842, "low_lr": 1.7726315789473685e-05, "step": 216 }, { "epoch": 0.5680473372781065, "high_lr": 0.0008863157894736842, "low_lr": 1.7726315789473685e-05, "step": 216 }, { "epoch": 0.5680473372781065, "high_lr": 0.0008863157894736842, "low_lr": 1.7726315789473685e-05, "step": 216 }, { "epoch": 0.5680473372781065, "high_lr": 0.0008863157894736842, "low_lr": 1.7726315789473685e-05, "step": 216 }, { "epoch": 0.5680473372781065, "high_lr": 0.0008863157894736842, "low_lr": 1.7726315789473685e-05, "step": 216 }, { "epoch": 0.5680473372781065, "high_lr": 0.0008863157894736842, "low_lr": 1.7726315789473685e-05, "step": 216 }, { "epoch": 0.5680473372781065, "high_lr": 0.0008863157894736842, "low_lr": 1.7726315789473685e-05, "step": 216 }, { "epoch": 0.5706771860618014, "grad_norm": 0.824845016002655, "learning_rate": 0.0008857894736842106, "loss": 1.7999, "step": 217 }, { "epoch": 0.5706771860618014, "high_lr": 0.0008857894736842106, "low_lr": 1.7715789473684213e-05, "step": 217 }, { "epoch": 0.5706771860618014, "high_lr": 0.0008857894736842106, "low_lr": 1.7715789473684213e-05, "step": 217 }, { "epoch": 0.5706771860618014, "high_lr": 0.0008857894736842106, "low_lr": 1.7715789473684213e-05, "step": 217 }, { "epoch": 0.5706771860618014, "high_lr": 0.0008857894736842106, "low_lr": 1.7715789473684213e-05, "step": 217 }, { "epoch": 0.5706771860618014, "high_lr": 0.0008857894736842106, "low_lr": 1.7715789473684213e-05, "step": 217 }, { "epoch": 0.5706771860618014, "high_lr": 0.0008857894736842106, "low_lr": 1.7715789473684213e-05, "step": 217 }, { "epoch": 0.5706771860618014, "high_lr": 0.0008857894736842106, "low_lr": 1.7715789473684213e-05, "step": 217 }, { "epoch": 0.5706771860618014, "high_lr": 0.0008857894736842106, "low_lr": 1.7715789473684213e-05, "step": 217 }, { "epoch": 0.5733070348454964, "grad_norm": 0.9351339340209961, "learning_rate": 0.0008852631578947368, "loss": 1.8139, "step": 218 }, { "epoch": 0.5733070348454964, "high_lr": 0.0008852631578947368, "low_lr": 1.7705263157894738e-05, "step": 218 }, { "epoch": 0.5733070348454964, "high_lr": 0.0008852631578947368, "low_lr": 1.7705263157894738e-05, "step": 218 }, { "epoch": 0.5733070348454964, "high_lr": 0.0008852631578947368, "low_lr": 1.7705263157894738e-05, "step": 218 }, { "epoch": 0.5733070348454964, "high_lr": 0.0008852631578947368, "low_lr": 1.7705263157894738e-05, "step": 218 }, { "epoch": 0.5733070348454964, "high_lr": 0.0008852631578947368, "low_lr": 1.7705263157894738e-05, "step": 218 }, { "epoch": 0.5733070348454964, "high_lr": 0.0008852631578947368, "low_lr": 1.7705263157894738e-05, "step": 218 }, { "epoch": 0.5733070348454964, "high_lr": 0.0008852631578947368, "low_lr": 1.7705263157894738e-05, "step": 218 }, { "epoch": 0.5733070348454964, "high_lr": 0.0008852631578947368, "low_lr": 1.7705263157894738e-05, "step": 218 }, { "epoch": 0.5759368836291914, "grad_norm": 0.8763374090194702, "learning_rate": 0.0008847368421052631, "loss": 1.7691, "step": 219 }, { "epoch": 0.5759368836291914, "high_lr": 0.0008847368421052631, "low_lr": 1.7694736842105266e-05, "step": 219 }, { "epoch": 0.5759368836291914, "high_lr": 0.0008847368421052631, "low_lr": 1.7694736842105266e-05, "step": 219 }, { "epoch": 0.5759368836291914, "high_lr": 0.0008847368421052631, "low_lr": 1.7694736842105266e-05, "step": 219 }, { "epoch": 0.5759368836291914, "high_lr": 0.0008847368421052631, "low_lr": 1.7694736842105266e-05, "step": 219 }, { "epoch": 0.5759368836291914, "high_lr": 0.0008847368421052631, "low_lr": 1.7694736842105266e-05, "step": 219 }, { "epoch": 0.5759368836291914, "high_lr": 0.0008847368421052631, "low_lr": 1.7694736842105266e-05, "step": 219 }, { "epoch": 0.5759368836291914, "high_lr": 0.0008847368421052631, "low_lr": 1.7694736842105266e-05, "step": 219 }, { "epoch": 0.5759368836291914, "high_lr": 0.0008847368421052631, "low_lr": 1.7694736842105266e-05, "step": 219 }, { "epoch": 0.5785667324128863, "grad_norm": 0.8397433757781982, "learning_rate": 0.0008842105263157894, "loss": 1.7958, "step": 220 }, { "epoch": 0.5785667324128863, "high_lr": 0.0008842105263157894, "low_lr": 1.768421052631579e-05, "step": 220 }, { "epoch": 0.5785667324128863, "high_lr": 0.0008842105263157894, "low_lr": 1.768421052631579e-05, "step": 220 }, { "epoch": 0.5785667324128863, "high_lr": 0.0008842105263157894, "low_lr": 1.768421052631579e-05, "step": 220 }, { "epoch": 0.5785667324128863, "high_lr": 0.0008842105263157894, "low_lr": 1.768421052631579e-05, "step": 220 }, { "epoch": 0.5785667324128863, "high_lr": 0.0008842105263157894, "low_lr": 1.768421052631579e-05, "step": 220 }, { "epoch": 0.5785667324128863, "high_lr": 0.0008842105263157894, "low_lr": 1.768421052631579e-05, "step": 220 }, { "epoch": 0.5785667324128863, "high_lr": 0.0008842105263157894, "low_lr": 1.768421052631579e-05, "step": 220 }, { "epoch": 0.5785667324128863, "high_lr": 0.0008842105263157894, "low_lr": 1.768421052631579e-05, "step": 220 }, { "epoch": 0.5811965811965812, "grad_norm": 0.8304501175880432, "learning_rate": 0.0008836842105263157, "loss": 1.706, "step": 221 }, { "epoch": 0.5811965811965812, "high_lr": 0.0008836842105263157, "low_lr": 1.7673684210526315e-05, "step": 221 }, { "epoch": 0.5811965811965812, "high_lr": 0.0008836842105263157, "low_lr": 1.7673684210526315e-05, "step": 221 }, { "epoch": 0.5811965811965812, "high_lr": 0.0008836842105263157, "low_lr": 1.7673684210526315e-05, "step": 221 }, { "epoch": 0.5811965811965812, "high_lr": 0.0008836842105263157, "low_lr": 1.7673684210526315e-05, "step": 221 }, { "epoch": 0.5811965811965812, "high_lr": 0.0008836842105263157, "low_lr": 1.7673684210526315e-05, "step": 221 }, { "epoch": 0.5811965811965812, "high_lr": 0.0008836842105263157, "low_lr": 1.7673684210526315e-05, "step": 221 }, { "epoch": 0.5811965811965812, "high_lr": 0.0008836842105263157, "low_lr": 1.7673684210526315e-05, "step": 221 }, { "epoch": 0.5811965811965812, "high_lr": 0.0008836842105263157, "low_lr": 1.7673684210526315e-05, "step": 221 }, { "epoch": 0.5838264299802761, "grad_norm": 0.8978874087333679, "learning_rate": 0.0008831578947368422, "loss": 1.6779, "step": 222 }, { "epoch": 0.5838264299802761, "high_lr": 0.0008831578947368422, "low_lr": 1.7663157894736843e-05, "step": 222 }, { "epoch": 0.5838264299802761, "high_lr": 0.0008831578947368422, "low_lr": 1.7663157894736843e-05, "step": 222 }, { "epoch": 0.5838264299802761, "high_lr": 0.0008831578947368422, "low_lr": 1.7663157894736843e-05, "step": 222 }, { "epoch": 0.5838264299802761, "high_lr": 0.0008831578947368422, "low_lr": 1.7663157894736843e-05, "step": 222 }, { "epoch": 0.5838264299802761, "high_lr": 0.0008831578947368422, "low_lr": 1.7663157894736843e-05, "step": 222 }, { "epoch": 0.5838264299802761, "high_lr": 0.0008831578947368422, "low_lr": 1.7663157894736843e-05, "step": 222 }, { "epoch": 0.5838264299802761, "high_lr": 0.0008831578947368422, "low_lr": 1.7663157894736843e-05, "step": 222 }, { "epoch": 0.5838264299802761, "high_lr": 0.0008831578947368422, "low_lr": 1.7663157894736843e-05, "step": 222 }, { "epoch": 0.5864562787639711, "grad_norm": 0.8363296389579773, "learning_rate": 0.0008826315789473685, "loss": 1.7498, "step": 223 }, { "epoch": 0.5864562787639711, "high_lr": 0.0008826315789473685, "low_lr": 1.765263157894737e-05, "step": 223 }, { "epoch": 0.5864562787639711, "high_lr": 0.0008826315789473685, "low_lr": 1.765263157894737e-05, "step": 223 }, { "epoch": 0.5864562787639711, "high_lr": 0.0008826315789473685, "low_lr": 1.765263157894737e-05, "step": 223 }, { "epoch": 0.5864562787639711, "high_lr": 0.0008826315789473685, "low_lr": 1.765263157894737e-05, "step": 223 }, { "epoch": 0.5864562787639711, "high_lr": 0.0008826315789473685, "low_lr": 1.765263157894737e-05, "step": 223 }, { "epoch": 0.5864562787639711, "high_lr": 0.0008826315789473685, "low_lr": 1.765263157894737e-05, "step": 223 }, { "epoch": 0.5864562787639711, "high_lr": 0.0008826315789473685, "low_lr": 1.765263157894737e-05, "step": 223 }, { "epoch": 0.5864562787639711, "high_lr": 0.0008826315789473685, "low_lr": 1.765263157894737e-05, "step": 223 }, { "epoch": 0.589086127547666, "grad_norm": 1.024382472038269, "learning_rate": 0.0008821052631578948, "loss": 1.8, "step": 224 }, { "epoch": 0.589086127547666, "high_lr": 0.0008821052631578948, "low_lr": 1.7642105263157896e-05, "step": 224 }, { "epoch": 0.589086127547666, "high_lr": 0.0008821052631578948, "low_lr": 1.7642105263157896e-05, "step": 224 }, { "epoch": 0.589086127547666, "high_lr": 0.0008821052631578948, "low_lr": 1.7642105263157896e-05, "step": 224 }, { "epoch": 0.589086127547666, "high_lr": 0.0008821052631578948, "low_lr": 1.7642105263157896e-05, "step": 224 }, { "epoch": 0.589086127547666, "high_lr": 0.0008821052631578948, "low_lr": 1.7642105263157896e-05, "step": 224 }, { "epoch": 0.589086127547666, "high_lr": 0.0008821052631578948, "low_lr": 1.7642105263157896e-05, "step": 224 }, { "epoch": 0.589086127547666, "high_lr": 0.0008821052631578948, "low_lr": 1.7642105263157896e-05, "step": 224 }, { "epoch": 0.589086127547666, "high_lr": 0.0008821052631578948, "low_lr": 1.7642105263157896e-05, "step": 224 }, { "epoch": 0.591715976331361, "grad_norm": 0.9602994918823242, "learning_rate": 0.0008815789473684211, "loss": 1.745, "step": 225 }, { "epoch": 0.591715976331361, "high_lr": 0.0008815789473684211, "low_lr": 1.763157894736842e-05, "step": 225 }, { "epoch": 0.591715976331361, "high_lr": 0.0008815789473684211, "low_lr": 1.763157894736842e-05, "step": 225 }, { "epoch": 0.591715976331361, "high_lr": 0.0008815789473684211, "low_lr": 1.763157894736842e-05, "step": 225 }, { "epoch": 0.591715976331361, "high_lr": 0.0008815789473684211, "low_lr": 1.763157894736842e-05, "step": 225 }, { "epoch": 0.591715976331361, "high_lr": 0.0008815789473684211, "low_lr": 1.763157894736842e-05, "step": 225 }, { "epoch": 0.591715976331361, "high_lr": 0.0008815789473684211, "low_lr": 1.763157894736842e-05, "step": 225 }, { "epoch": 0.591715976331361, "high_lr": 0.0008815789473684211, "low_lr": 1.763157894736842e-05, "step": 225 }, { "epoch": 0.591715976331361, "high_lr": 0.0008815789473684211, "low_lr": 1.763157894736842e-05, "step": 225 }, { "epoch": 0.5943458251150558, "grad_norm": 0.8573108315467834, "learning_rate": 0.0008810526315789475, "loss": 1.7609, "step": 226 }, { "epoch": 0.5943458251150558, "high_lr": 0.0008810526315789475, "low_lr": 1.7621052631578948e-05, "step": 226 }, { "epoch": 0.5943458251150558, "high_lr": 0.0008810526315789475, "low_lr": 1.7621052631578948e-05, "step": 226 }, { "epoch": 0.5943458251150558, "high_lr": 0.0008810526315789475, "low_lr": 1.7621052631578948e-05, "step": 226 }, { "epoch": 0.5943458251150558, "high_lr": 0.0008810526315789475, "low_lr": 1.7621052631578948e-05, "step": 226 }, { "epoch": 0.5943458251150558, "high_lr": 0.0008810526315789475, "low_lr": 1.7621052631578948e-05, "step": 226 }, { "epoch": 0.5943458251150558, "high_lr": 0.0008810526315789475, "low_lr": 1.7621052631578948e-05, "step": 226 }, { "epoch": 0.5943458251150558, "high_lr": 0.0008810526315789475, "low_lr": 1.7621052631578948e-05, "step": 226 }, { "epoch": 0.5943458251150558, "high_lr": 0.0008810526315789475, "low_lr": 1.7621052631578948e-05, "step": 226 }, { "epoch": 0.5969756738987508, "grad_norm": 0.8403958678245544, "learning_rate": 0.0008805263157894737, "loss": 1.7054, "step": 227 }, { "epoch": 0.5969756738987508, "high_lr": 0.0008805263157894737, "low_lr": 1.7610526315789476e-05, "step": 227 }, { "epoch": 0.5969756738987508, "high_lr": 0.0008805263157894737, "low_lr": 1.7610526315789476e-05, "step": 227 }, { "epoch": 0.5969756738987508, "high_lr": 0.0008805263157894737, "low_lr": 1.7610526315789476e-05, "step": 227 }, { "epoch": 0.5969756738987508, "high_lr": 0.0008805263157894737, "low_lr": 1.7610526315789476e-05, "step": 227 }, { "epoch": 0.5969756738987508, "high_lr": 0.0008805263157894737, "low_lr": 1.7610526315789476e-05, "step": 227 }, { "epoch": 0.5969756738987508, "high_lr": 0.0008805263157894737, "low_lr": 1.7610526315789476e-05, "step": 227 }, { "epoch": 0.5969756738987508, "high_lr": 0.0008805263157894737, "low_lr": 1.7610526315789476e-05, "step": 227 }, { "epoch": 0.5969756738987508, "high_lr": 0.0008805263157894737, "low_lr": 1.7610526315789476e-05, "step": 227 }, { "epoch": 0.5996055226824457, "grad_norm": 0.922599732875824, "learning_rate": 0.00088, "loss": 1.7903, "step": 228 }, { "epoch": 0.5996055226824457, "high_lr": 0.00088, "low_lr": 1.76e-05, "step": 228 }, { "epoch": 0.5996055226824457, "high_lr": 0.00088, "low_lr": 1.76e-05, "step": 228 }, { "epoch": 0.5996055226824457, "high_lr": 0.00088, "low_lr": 1.76e-05, "step": 228 }, { "epoch": 0.5996055226824457, "high_lr": 0.00088, "low_lr": 1.76e-05, "step": 228 }, { "epoch": 0.5996055226824457, "high_lr": 0.00088, "low_lr": 1.76e-05, "step": 228 }, { "epoch": 0.5996055226824457, "high_lr": 0.00088, "low_lr": 1.76e-05, "step": 228 }, { "epoch": 0.5996055226824457, "high_lr": 0.00088, "low_lr": 1.76e-05, "step": 228 }, { "epoch": 0.5996055226824457, "high_lr": 0.00088, "low_lr": 1.76e-05, "step": 228 }, { "epoch": 0.6022353714661407, "grad_norm": 0.8383209705352783, "learning_rate": 0.0008794736842105263, "loss": 1.7644, "step": 229 }, { "epoch": 0.6022353714661407, "high_lr": 0.0008794736842105263, "low_lr": 1.758947368421053e-05, "step": 229 }, { "epoch": 0.6022353714661407, "high_lr": 0.0008794736842105263, "low_lr": 1.758947368421053e-05, "step": 229 }, { "epoch": 0.6022353714661407, "high_lr": 0.0008794736842105263, "low_lr": 1.758947368421053e-05, "step": 229 }, { "epoch": 0.6022353714661407, "high_lr": 0.0008794736842105263, "low_lr": 1.758947368421053e-05, "step": 229 }, { "epoch": 0.6022353714661407, "high_lr": 0.0008794736842105263, "low_lr": 1.758947368421053e-05, "step": 229 }, { "epoch": 0.6022353714661407, "high_lr": 0.0008794736842105263, "low_lr": 1.758947368421053e-05, "step": 229 }, { "epoch": 0.6022353714661407, "high_lr": 0.0008794736842105263, "low_lr": 1.758947368421053e-05, "step": 229 }, { "epoch": 0.6022353714661407, "high_lr": 0.0008794736842105263, "low_lr": 1.758947368421053e-05, "step": 229 }, { "epoch": 0.6048652202498357, "grad_norm": 78.71210479736328, "learning_rate": 0.0008789473684210526, "loss": 1.8008, "step": 230 }, { "epoch": 0.6048652202498357, "high_lr": 0.0008789473684210526, "low_lr": 1.7578947368421054e-05, "step": 230 }, { "epoch": 0.6048652202498357, "high_lr": 0.0008789473684210526, "low_lr": 1.7578947368421054e-05, "step": 230 }, { "epoch": 0.6048652202498357, "high_lr": 0.0008789473684210526, "low_lr": 1.7578947368421054e-05, "step": 230 }, { "epoch": 0.6048652202498357, "high_lr": 0.0008789473684210526, "low_lr": 1.7578947368421054e-05, "step": 230 }, { "epoch": 0.6048652202498357, "high_lr": 0.0008789473684210526, "low_lr": 1.7578947368421054e-05, "step": 230 }, { "epoch": 0.6048652202498357, "high_lr": 0.0008789473684210526, "low_lr": 1.7578947368421054e-05, "step": 230 }, { "epoch": 0.6048652202498357, "high_lr": 0.0008789473684210526, "low_lr": 1.7578947368421054e-05, "step": 230 }, { "epoch": 0.6048652202498357, "high_lr": 0.0008789473684210526, "low_lr": 1.7578947368421054e-05, "step": 230 }, { "epoch": 0.6074950690335306, "grad_norm": 0.861912727355957, "learning_rate": 0.000878421052631579, "loss": 1.8102, "step": 231 }, { "epoch": 0.6074950690335306, "high_lr": 0.000878421052631579, "low_lr": 1.756842105263158e-05, "step": 231 }, { "epoch": 0.6074950690335306, "high_lr": 0.000878421052631579, "low_lr": 1.756842105263158e-05, "step": 231 }, { "epoch": 0.6074950690335306, "high_lr": 0.000878421052631579, "low_lr": 1.756842105263158e-05, "step": 231 }, { "epoch": 0.6074950690335306, "high_lr": 0.000878421052631579, "low_lr": 1.756842105263158e-05, "step": 231 }, { "epoch": 0.6074950690335306, "high_lr": 0.000878421052631579, "low_lr": 1.756842105263158e-05, "step": 231 }, { "epoch": 0.6074950690335306, "high_lr": 0.000878421052631579, "low_lr": 1.756842105263158e-05, "step": 231 }, { "epoch": 0.6074950690335306, "high_lr": 0.000878421052631579, "low_lr": 1.756842105263158e-05, "step": 231 }, { "epoch": 0.6074950690335306, "high_lr": 0.000878421052631579, "low_lr": 1.756842105263158e-05, "step": 231 }, { "epoch": 0.6101249178172256, "grad_norm": 0.8288989067077637, "learning_rate": 0.0008778947368421053, "loss": 1.6689, "step": 232 }, { "epoch": 0.6101249178172256, "high_lr": 0.0008778947368421053, "low_lr": 1.7557894736842106e-05, "step": 232 }, { "epoch": 0.6101249178172256, "high_lr": 0.0008778947368421053, "low_lr": 1.7557894736842106e-05, "step": 232 }, { "epoch": 0.6101249178172256, "high_lr": 0.0008778947368421053, "low_lr": 1.7557894736842106e-05, "step": 232 }, { "epoch": 0.6101249178172256, "high_lr": 0.0008778947368421053, "low_lr": 1.7557894736842106e-05, "step": 232 }, { "epoch": 0.6101249178172256, "high_lr": 0.0008778947368421053, "low_lr": 1.7557894736842106e-05, "step": 232 }, { "epoch": 0.6101249178172256, "high_lr": 0.0008778947368421053, "low_lr": 1.7557894736842106e-05, "step": 232 }, { "epoch": 0.6101249178172256, "high_lr": 0.0008778947368421053, "low_lr": 1.7557894736842106e-05, "step": 232 }, { "epoch": 0.6101249178172256, "high_lr": 0.0008778947368421053, "low_lr": 1.7557894736842106e-05, "step": 232 }, { "epoch": 0.6127547666009204, "grad_norm": 0.8340638279914856, "learning_rate": 0.0008773684210526316, "loss": 1.7501, "step": 233 }, { "epoch": 0.6127547666009204, "high_lr": 0.0008773684210526316, "low_lr": 1.7547368421052634e-05, "step": 233 }, { "epoch": 0.6127547666009204, "high_lr": 0.0008773684210526316, "low_lr": 1.7547368421052634e-05, "step": 233 }, { "epoch": 0.6127547666009204, "high_lr": 0.0008773684210526316, "low_lr": 1.7547368421052634e-05, "step": 233 }, { "epoch": 0.6127547666009204, "high_lr": 0.0008773684210526316, "low_lr": 1.7547368421052634e-05, "step": 233 }, { "epoch": 0.6127547666009204, "high_lr": 0.0008773684210526316, "low_lr": 1.7547368421052634e-05, "step": 233 }, { "epoch": 0.6127547666009204, "high_lr": 0.0008773684210526316, "low_lr": 1.7547368421052634e-05, "step": 233 }, { "epoch": 0.6127547666009204, "high_lr": 0.0008773684210526316, "low_lr": 1.7547368421052634e-05, "step": 233 }, { "epoch": 0.6127547666009204, "high_lr": 0.0008773684210526316, "low_lr": 1.7547368421052634e-05, "step": 233 }, { "epoch": 0.6153846153846154, "grad_norm": 0.7775981426239014, "learning_rate": 0.0008768421052631579, "loss": 1.6956, "step": 234 }, { "epoch": 0.6153846153846154, "high_lr": 0.0008768421052631579, "low_lr": 1.753684210526316e-05, "step": 234 }, { "epoch": 0.6153846153846154, "high_lr": 0.0008768421052631579, "low_lr": 1.753684210526316e-05, "step": 234 }, { "epoch": 0.6153846153846154, "high_lr": 0.0008768421052631579, "low_lr": 1.753684210526316e-05, "step": 234 }, { "epoch": 0.6153846153846154, "high_lr": 0.0008768421052631579, "low_lr": 1.753684210526316e-05, "step": 234 }, { "epoch": 0.6153846153846154, "high_lr": 0.0008768421052631579, "low_lr": 1.753684210526316e-05, "step": 234 }, { "epoch": 0.6153846153846154, "high_lr": 0.0008768421052631579, "low_lr": 1.753684210526316e-05, "step": 234 }, { "epoch": 0.6153846153846154, "high_lr": 0.0008768421052631579, "low_lr": 1.753684210526316e-05, "step": 234 }, { "epoch": 0.6153846153846154, "high_lr": 0.0008768421052631579, "low_lr": 1.753684210526316e-05, "step": 234 }, { "epoch": 0.6180144641683103, "grad_norm": 0.8985382914543152, "learning_rate": 0.0008763157894736841, "loss": 1.7198, "step": 235 }, { "epoch": 0.6180144641683103, "high_lr": 0.0008763157894736841, "low_lr": 1.7526315789473683e-05, "step": 235 }, { "epoch": 0.6180144641683103, "high_lr": 0.0008763157894736841, "low_lr": 1.7526315789473683e-05, "step": 235 }, { "epoch": 0.6180144641683103, "high_lr": 0.0008763157894736841, "low_lr": 1.7526315789473683e-05, "step": 235 }, { "epoch": 0.6180144641683103, "high_lr": 0.0008763157894736841, "low_lr": 1.7526315789473683e-05, "step": 235 }, { "epoch": 0.6180144641683103, "high_lr": 0.0008763157894736841, "low_lr": 1.7526315789473683e-05, "step": 235 }, { "epoch": 0.6180144641683103, "high_lr": 0.0008763157894736841, "low_lr": 1.7526315789473683e-05, "step": 235 }, { "epoch": 0.6180144641683103, "high_lr": 0.0008763157894736841, "low_lr": 1.7526315789473683e-05, "step": 235 }, { "epoch": 0.6180144641683103, "high_lr": 0.0008763157894736841, "low_lr": 1.7526315789473683e-05, "step": 235 }, { "epoch": 0.6206443129520053, "grad_norm": 0.8429329991340637, "learning_rate": 0.0008757894736842105, "loss": 1.6871, "step": 236 }, { "epoch": 0.6206443129520053, "high_lr": 0.0008757894736842105, "low_lr": 1.751578947368421e-05, "step": 236 }, { "epoch": 0.6206443129520053, "high_lr": 0.0008757894736842105, "low_lr": 1.751578947368421e-05, "step": 236 }, { "epoch": 0.6206443129520053, "high_lr": 0.0008757894736842105, "low_lr": 1.751578947368421e-05, "step": 236 }, { "epoch": 0.6206443129520053, "high_lr": 0.0008757894736842105, "low_lr": 1.751578947368421e-05, "step": 236 }, { "epoch": 0.6206443129520053, "high_lr": 0.0008757894736842105, "low_lr": 1.751578947368421e-05, "step": 236 }, { "epoch": 0.6206443129520053, "high_lr": 0.0008757894736842105, "low_lr": 1.751578947368421e-05, "step": 236 }, { "epoch": 0.6206443129520053, "high_lr": 0.0008757894736842105, "low_lr": 1.751578947368421e-05, "step": 236 }, { "epoch": 0.6206443129520053, "high_lr": 0.0008757894736842105, "low_lr": 1.751578947368421e-05, "step": 236 }, { "epoch": 0.6232741617357002, "grad_norm": 0.8710259795188904, "learning_rate": 0.0008752631578947368, "loss": 1.6992, "step": 237 }, { "epoch": 0.6232741617357002, "high_lr": 0.0008752631578947368, "low_lr": 1.750526315789474e-05, "step": 237 }, { "epoch": 0.6232741617357002, "high_lr": 0.0008752631578947368, "low_lr": 1.750526315789474e-05, "step": 237 }, { "epoch": 0.6232741617357002, "high_lr": 0.0008752631578947368, "low_lr": 1.750526315789474e-05, "step": 237 }, { "epoch": 0.6232741617357002, "high_lr": 0.0008752631578947368, "low_lr": 1.750526315789474e-05, "step": 237 }, { "epoch": 0.6232741617357002, "high_lr": 0.0008752631578947368, "low_lr": 1.750526315789474e-05, "step": 237 }, { "epoch": 0.6232741617357002, "high_lr": 0.0008752631578947368, "low_lr": 1.750526315789474e-05, "step": 237 }, { "epoch": 0.6232741617357002, "high_lr": 0.0008752631578947368, "low_lr": 1.750526315789474e-05, "step": 237 }, { "epoch": 0.6232741617357002, "high_lr": 0.0008752631578947368, "low_lr": 1.750526315789474e-05, "step": 237 }, { "epoch": 0.6259040105193951, "grad_norm": 0.9169574975967407, "learning_rate": 0.0008747368421052632, "loss": 1.7979, "step": 238 }, { "epoch": 0.6259040105193951, "high_lr": 0.0008747368421052632, "low_lr": 1.7494736842105264e-05, "step": 238 }, { "epoch": 0.6259040105193951, "high_lr": 0.0008747368421052632, "low_lr": 1.7494736842105264e-05, "step": 238 }, { "epoch": 0.6259040105193951, "high_lr": 0.0008747368421052632, "low_lr": 1.7494736842105264e-05, "step": 238 }, { "epoch": 0.6259040105193951, "high_lr": 0.0008747368421052632, "low_lr": 1.7494736842105264e-05, "step": 238 }, { "epoch": 0.6259040105193951, "high_lr": 0.0008747368421052632, "low_lr": 1.7494736842105264e-05, "step": 238 }, { "epoch": 0.6259040105193951, "high_lr": 0.0008747368421052632, "low_lr": 1.7494736842105264e-05, "step": 238 }, { "epoch": 0.6259040105193951, "high_lr": 0.0008747368421052632, "low_lr": 1.7494736842105264e-05, "step": 238 }, { "epoch": 0.6259040105193951, "high_lr": 0.0008747368421052632, "low_lr": 1.7494736842105264e-05, "step": 238 }, { "epoch": 0.62853385930309, "grad_norm": 2.7285892963409424, "learning_rate": 0.0008742105263157895, "loss": 1.8977, "step": 239 }, { "epoch": 0.62853385930309, "high_lr": 0.0008742105263157895, "low_lr": 1.748421052631579e-05, "step": 239 }, { "epoch": 0.62853385930309, "high_lr": 0.0008742105263157895, "low_lr": 1.748421052631579e-05, "step": 239 }, { "epoch": 0.62853385930309, "high_lr": 0.0008742105263157895, "low_lr": 1.748421052631579e-05, "step": 239 }, { "epoch": 0.62853385930309, "high_lr": 0.0008742105263157895, "low_lr": 1.748421052631579e-05, "step": 239 }, { "epoch": 0.62853385930309, "high_lr": 0.0008742105263157895, "low_lr": 1.748421052631579e-05, "step": 239 }, { "epoch": 0.62853385930309, "high_lr": 0.0008742105263157895, "low_lr": 1.748421052631579e-05, "step": 239 }, { "epoch": 0.62853385930309, "high_lr": 0.0008742105263157895, "low_lr": 1.748421052631579e-05, "step": 239 }, { "epoch": 0.62853385930309, "high_lr": 0.0008742105263157895, "low_lr": 1.748421052631579e-05, "step": 239 }, { "epoch": 0.631163708086785, "grad_norm": 0.8813413381576538, "learning_rate": 0.0008736842105263159, "loss": 1.676, "step": 240 }, { "epoch": 0.631163708086785, "high_lr": 0.0008736842105263159, "low_lr": 1.7473684210526317e-05, "step": 240 }, { "epoch": 0.631163708086785, "high_lr": 0.0008736842105263159, "low_lr": 1.7473684210526317e-05, "step": 240 }, { "epoch": 0.631163708086785, "high_lr": 0.0008736842105263159, "low_lr": 1.7473684210526317e-05, "step": 240 }, { "epoch": 0.631163708086785, "high_lr": 0.0008736842105263159, "low_lr": 1.7473684210526317e-05, "step": 240 }, { "epoch": 0.631163708086785, "high_lr": 0.0008736842105263159, "low_lr": 1.7473684210526317e-05, "step": 240 }, { "epoch": 0.631163708086785, "high_lr": 0.0008736842105263159, "low_lr": 1.7473684210526317e-05, "step": 240 }, { "epoch": 0.631163708086785, "high_lr": 0.0008736842105263159, "low_lr": 1.7473684210526317e-05, "step": 240 }, { "epoch": 0.631163708086785, "high_lr": 0.0008736842105263159, "low_lr": 1.7473684210526317e-05, "step": 240 }, { "epoch": 0.63379355687048, "grad_norm": 0.8603155612945557, "learning_rate": 0.0008731578947368422, "loss": 1.6664, "step": 241 }, { "epoch": 0.63379355687048, "high_lr": 0.0008731578947368422, "low_lr": 1.7463157894736845e-05, "step": 241 }, { "epoch": 0.63379355687048, "high_lr": 0.0008731578947368422, "low_lr": 1.7463157894736845e-05, "step": 241 }, { "epoch": 0.63379355687048, "high_lr": 0.0008731578947368422, "low_lr": 1.7463157894736845e-05, "step": 241 }, { "epoch": 0.63379355687048, "high_lr": 0.0008731578947368422, "low_lr": 1.7463157894736845e-05, "step": 241 }, { "epoch": 0.63379355687048, "high_lr": 0.0008731578947368422, "low_lr": 1.7463157894736845e-05, "step": 241 }, { "epoch": 0.63379355687048, "high_lr": 0.0008731578947368422, "low_lr": 1.7463157894736845e-05, "step": 241 }, { "epoch": 0.63379355687048, "high_lr": 0.0008731578947368422, "low_lr": 1.7463157894736845e-05, "step": 241 }, { "epoch": 0.63379355687048, "high_lr": 0.0008731578947368422, "low_lr": 1.7463157894736845e-05, "step": 241 }, { "epoch": 0.6364234056541749, "grad_norm": 0.935298502445221, "learning_rate": 0.0008726315789473685, "loss": 1.6882, "step": 242 }, { "epoch": 0.6364234056541749, "high_lr": 0.0008726315789473685, "low_lr": 1.745263157894737e-05, "step": 242 }, { "epoch": 0.6364234056541749, "high_lr": 0.0008726315789473685, "low_lr": 1.745263157894737e-05, "step": 242 }, { "epoch": 0.6364234056541749, "high_lr": 0.0008726315789473685, "low_lr": 1.745263157894737e-05, "step": 242 }, { "epoch": 0.6364234056541749, "high_lr": 0.0008726315789473685, "low_lr": 1.745263157894737e-05, "step": 242 }, { "epoch": 0.6364234056541749, "high_lr": 0.0008726315789473685, "low_lr": 1.745263157894737e-05, "step": 242 }, { "epoch": 0.6364234056541749, "high_lr": 0.0008726315789473685, "low_lr": 1.745263157894737e-05, "step": 242 }, { "epoch": 0.6364234056541749, "high_lr": 0.0008726315789473685, "low_lr": 1.745263157894737e-05, "step": 242 }, { "epoch": 0.6364234056541749, "high_lr": 0.0008726315789473685, "low_lr": 1.745263157894737e-05, "step": 242 }, { "epoch": 0.6390532544378699, "grad_norm": 0.9325158596038818, "learning_rate": 0.0008721052631578948, "loss": 1.6877, "step": 243 }, { "epoch": 0.6390532544378699, "high_lr": 0.0008721052631578948, "low_lr": 1.7442105263157894e-05, "step": 243 }, { "epoch": 0.6390532544378699, "high_lr": 0.0008721052631578948, "low_lr": 1.7442105263157894e-05, "step": 243 }, { "epoch": 0.6390532544378699, "high_lr": 0.0008721052631578948, "low_lr": 1.7442105263157894e-05, "step": 243 }, { "epoch": 0.6390532544378699, "high_lr": 0.0008721052631578948, "low_lr": 1.7442105263157894e-05, "step": 243 }, { "epoch": 0.6390532544378699, "high_lr": 0.0008721052631578948, "low_lr": 1.7442105263157894e-05, "step": 243 }, { "epoch": 0.6390532544378699, "high_lr": 0.0008721052631578948, "low_lr": 1.7442105263157894e-05, "step": 243 }, { "epoch": 0.6390532544378699, "high_lr": 0.0008721052631578948, "low_lr": 1.7442105263157894e-05, "step": 243 }, { "epoch": 0.6390532544378699, "high_lr": 0.0008721052631578948, "low_lr": 1.7442105263157894e-05, "step": 243 }, { "epoch": 0.6416831032215647, "grad_norm": 0.8132498860359192, "learning_rate": 0.000871578947368421, "loss": 1.7054, "step": 244 }, { "epoch": 0.6416831032215647, "high_lr": 0.000871578947368421, "low_lr": 1.7431578947368422e-05, "step": 244 }, { "epoch": 0.6416831032215647, "high_lr": 0.000871578947368421, "low_lr": 1.7431578947368422e-05, "step": 244 }, { "epoch": 0.6416831032215647, "high_lr": 0.000871578947368421, "low_lr": 1.7431578947368422e-05, "step": 244 }, { "epoch": 0.6416831032215647, "high_lr": 0.000871578947368421, "low_lr": 1.7431578947368422e-05, "step": 244 }, { "epoch": 0.6416831032215647, "high_lr": 0.000871578947368421, "low_lr": 1.7431578947368422e-05, "step": 244 }, { "epoch": 0.6416831032215647, "high_lr": 0.000871578947368421, "low_lr": 1.7431578947368422e-05, "step": 244 }, { "epoch": 0.6416831032215647, "high_lr": 0.000871578947368421, "low_lr": 1.7431578947368422e-05, "step": 244 }, { "epoch": 0.6416831032215647, "high_lr": 0.000871578947368421, "low_lr": 1.7431578947368422e-05, "step": 244 }, { "epoch": 0.6443129520052597, "grad_norm": 1.1216504573822021, "learning_rate": 0.0008710526315789474, "loss": 1.6937, "step": 245 }, { "epoch": 0.6443129520052597, "high_lr": 0.0008710526315789474, "low_lr": 1.742105263157895e-05, "step": 245 }, { "epoch": 0.6443129520052597, "high_lr": 0.0008710526315789474, "low_lr": 1.742105263157895e-05, "step": 245 }, { "epoch": 0.6443129520052597, "high_lr": 0.0008710526315789474, "low_lr": 1.742105263157895e-05, "step": 245 }, { "epoch": 0.6443129520052597, "high_lr": 0.0008710526315789474, "low_lr": 1.742105263157895e-05, "step": 245 }, { "epoch": 0.6443129520052597, "high_lr": 0.0008710526315789474, "low_lr": 1.742105263157895e-05, "step": 245 }, { "epoch": 0.6443129520052597, "high_lr": 0.0008710526315789474, "low_lr": 1.742105263157895e-05, "step": 245 }, { "epoch": 0.6443129520052597, "high_lr": 0.0008710526315789474, "low_lr": 1.742105263157895e-05, "step": 245 }, { "epoch": 0.6443129520052597, "high_lr": 0.0008710526315789474, "low_lr": 1.742105263157895e-05, "step": 245 }, { "epoch": 0.6469428007889546, "grad_norm": 0.8950498700141907, "learning_rate": 0.0008705263157894737, "loss": 1.6761, "step": 246 }, { "epoch": 0.6469428007889546, "high_lr": 0.0008705263157894737, "low_lr": 1.7410526315789475e-05, "step": 246 }, { "epoch": 0.6469428007889546, "high_lr": 0.0008705263157894737, "low_lr": 1.7410526315789475e-05, "step": 246 }, { "epoch": 0.6469428007889546, "high_lr": 0.0008705263157894737, "low_lr": 1.7410526315789475e-05, "step": 246 }, { "epoch": 0.6469428007889546, "high_lr": 0.0008705263157894737, "low_lr": 1.7410526315789475e-05, "step": 246 }, { "epoch": 0.6469428007889546, "high_lr": 0.0008705263157894737, "low_lr": 1.7410526315789475e-05, "step": 246 }, { "epoch": 0.6469428007889546, "high_lr": 0.0008705263157894737, "low_lr": 1.7410526315789475e-05, "step": 246 }, { "epoch": 0.6469428007889546, "high_lr": 0.0008705263157894737, "low_lr": 1.7410526315789475e-05, "step": 246 }, { "epoch": 0.6469428007889546, "high_lr": 0.0008705263157894737, "low_lr": 1.7410526315789475e-05, "step": 246 }, { "epoch": 0.6495726495726496, "grad_norm": 0.9288367033004761, "learning_rate": 0.00087, "loss": 1.634, "step": 247 }, { "epoch": 0.6495726495726496, "high_lr": 0.00087, "low_lr": 1.7400000000000003e-05, "step": 247 }, { "epoch": 0.6495726495726496, "high_lr": 0.00087, "low_lr": 1.7400000000000003e-05, "step": 247 }, { "epoch": 0.6495726495726496, "high_lr": 0.00087, "low_lr": 1.7400000000000003e-05, "step": 247 }, { "epoch": 0.6495726495726496, "high_lr": 0.00087, "low_lr": 1.7400000000000003e-05, "step": 247 }, { "epoch": 0.6495726495726496, "high_lr": 0.00087, "low_lr": 1.7400000000000003e-05, "step": 247 }, { "epoch": 0.6495726495726496, "high_lr": 0.00087, "low_lr": 1.7400000000000003e-05, "step": 247 }, { "epoch": 0.6495726495726496, "high_lr": 0.00087, "low_lr": 1.7400000000000003e-05, "step": 247 }, { "epoch": 0.6495726495726496, "high_lr": 0.00087, "low_lr": 1.7400000000000003e-05, "step": 247 }, { "epoch": 0.6522024983563445, "grad_norm": 0.8630435466766357, "learning_rate": 0.0008694736842105263, "loss": 1.695, "step": 248 }, { "epoch": 0.6522024983563445, "high_lr": 0.0008694736842105263, "low_lr": 1.7389473684210527e-05, "step": 248 }, { "epoch": 0.6522024983563445, "high_lr": 0.0008694736842105263, "low_lr": 1.7389473684210527e-05, "step": 248 }, { "epoch": 0.6522024983563445, "high_lr": 0.0008694736842105263, "low_lr": 1.7389473684210527e-05, "step": 248 }, { "epoch": 0.6522024983563445, "high_lr": 0.0008694736842105263, "low_lr": 1.7389473684210527e-05, "step": 248 }, { "epoch": 0.6522024983563445, "high_lr": 0.0008694736842105263, "low_lr": 1.7389473684210527e-05, "step": 248 }, { "epoch": 0.6522024983563445, "high_lr": 0.0008694736842105263, "low_lr": 1.7389473684210527e-05, "step": 248 }, { "epoch": 0.6522024983563445, "high_lr": 0.0008694736842105263, "low_lr": 1.7389473684210527e-05, "step": 248 }, { "epoch": 0.6522024983563445, "high_lr": 0.0008694736842105263, "low_lr": 1.7389473684210527e-05, "step": 248 }, { "epoch": 0.6548323471400395, "grad_norm": 0.9438216090202332, "learning_rate": 0.0008689473684210526, "loss": 1.7483, "step": 249 }, { "epoch": 0.6548323471400395, "high_lr": 0.0008689473684210526, "low_lr": 1.7378947368421052e-05, "step": 249 }, { "epoch": 0.6548323471400395, "high_lr": 0.0008689473684210526, "low_lr": 1.7378947368421052e-05, "step": 249 }, { "epoch": 0.6548323471400395, "high_lr": 0.0008689473684210526, "low_lr": 1.7378947368421052e-05, "step": 249 }, { "epoch": 0.6548323471400395, "high_lr": 0.0008689473684210526, "low_lr": 1.7378947368421052e-05, "step": 249 }, { "epoch": 0.6548323471400395, "high_lr": 0.0008689473684210526, "low_lr": 1.7378947368421052e-05, "step": 249 }, { "epoch": 0.6548323471400395, "high_lr": 0.0008689473684210526, "low_lr": 1.7378947368421052e-05, "step": 249 }, { "epoch": 0.6548323471400395, "high_lr": 0.0008689473684210526, "low_lr": 1.7378947368421052e-05, "step": 249 }, { "epoch": 0.6548323471400395, "high_lr": 0.0008689473684210526, "low_lr": 1.7378947368421052e-05, "step": 249 }, { "epoch": 0.6574621959237343, "grad_norm": 0.9484896063804626, "learning_rate": 0.000868421052631579, "loss": 1.7012, "step": 250 }, { "epoch": 0.6574621959237343, "high_lr": 0.000868421052631579, "low_lr": 1.736842105263158e-05, "step": 250 }, { "epoch": 0.6574621959237343, "high_lr": 0.000868421052631579, "low_lr": 1.736842105263158e-05, "step": 250 }, { "epoch": 0.6574621959237343, "high_lr": 0.000868421052631579, "low_lr": 1.736842105263158e-05, "step": 250 }, { "epoch": 0.6574621959237343, "high_lr": 0.000868421052631579, "low_lr": 1.736842105263158e-05, "step": 250 }, { "epoch": 0.6574621959237343, "high_lr": 0.000868421052631579, "low_lr": 1.736842105263158e-05, "step": 250 }, { "epoch": 0.6574621959237343, "high_lr": 0.000868421052631579, "low_lr": 1.736842105263158e-05, "step": 250 }, { "epoch": 0.6574621959237343, "high_lr": 0.000868421052631579, "low_lr": 1.736842105263158e-05, "step": 250 }, { "epoch": 0.6574621959237343, "high_lr": 0.000868421052631579, "low_lr": 1.736842105263158e-05, "step": 250 }, { "epoch": 0.6600920447074293, "grad_norm": 0.886074423789978, "learning_rate": 0.0008678947368421053, "loss": 1.6723, "step": 251 }, { "epoch": 0.6600920447074293, "high_lr": 0.0008678947368421053, "low_lr": 1.7357894736842108e-05, "step": 251 }, { "epoch": 0.6600920447074293, "high_lr": 0.0008678947368421053, "low_lr": 1.7357894736842108e-05, "step": 251 }, { "epoch": 0.6600920447074293, "high_lr": 0.0008678947368421053, "low_lr": 1.7357894736842108e-05, "step": 251 }, { "epoch": 0.6600920447074293, "high_lr": 0.0008678947368421053, "low_lr": 1.7357894736842108e-05, "step": 251 }, { "epoch": 0.6600920447074293, "high_lr": 0.0008678947368421053, "low_lr": 1.7357894736842108e-05, "step": 251 }, { "epoch": 0.6600920447074293, "high_lr": 0.0008678947368421053, "low_lr": 1.7357894736842108e-05, "step": 251 }, { "epoch": 0.6600920447074293, "high_lr": 0.0008678947368421053, "low_lr": 1.7357894736842108e-05, "step": 251 }, { "epoch": 0.6600920447074293, "high_lr": 0.0008678947368421053, "low_lr": 1.7357894736842108e-05, "step": 251 }, { "epoch": 0.6627218934911243, "grad_norm": 0.9116823673248291, "learning_rate": 0.0008673684210526315, "loss": 1.7146, "step": 252 }, { "epoch": 0.6627218934911243, "high_lr": 0.0008673684210526315, "low_lr": 1.7347368421052633e-05, "step": 252 }, { "epoch": 0.6627218934911243, "high_lr": 0.0008673684210526315, "low_lr": 1.7347368421052633e-05, "step": 252 }, { "epoch": 0.6627218934911243, "high_lr": 0.0008673684210526315, "low_lr": 1.7347368421052633e-05, "step": 252 }, { "epoch": 0.6627218934911243, "high_lr": 0.0008673684210526315, "low_lr": 1.7347368421052633e-05, "step": 252 }, { "epoch": 0.6627218934911243, "high_lr": 0.0008673684210526315, "low_lr": 1.7347368421052633e-05, "step": 252 }, { "epoch": 0.6627218934911243, "high_lr": 0.0008673684210526315, "low_lr": 1.7347368421052633e-05, "step": 252 }, { "epoch": 0.6627218934911243, "high_lr": 0.0008673684210526315, "low_lr": 1.7347368421052633e-05, "step": 252 }, { "epoch": 0.6627218934911243, "high_lr": 0.0008673684210526315, "low_lr": 1.7347368421052633e-05, "step": 252 }, { "epoch": 0.6653517422748192, "grad_norm": 0.8356298208236694, "learning_rate": 0.0008668421052631578, "loss": 1.658, "step": 253 }, { "epoch": 0.6653517422748192, "high_lr": 0.0008668421052631578, "low_lr": 1.7336842105263157e-05, "step": 253 }, { "epoch": 0.6653517422748192, "high_lr": 0.0008668421052631578, "low_lr": 1.7336842105263157e-05, "step": 253 }, { "epoch": 0.6653517422748192, "high_lr": 0.0008668421052631578, "low_lr": 1.7336842105263157e-05, "step": 253 }, { "epoch": 0.6653517422748192, "high_lr": 0.0008668421052631578, "low_lr": 1.7336842105263157e-05, "step": 253 }, { "epoch": 0.6653517422748192, "high_lr": 0.0008668421052631578, "low_lr": 1.7336842105263157e-05, "step": 253 }, { "epoch": 0.6653517422748192, "high_lr": 0.0008668421052631578, "low_lr": 1.7336842105263157e-05, "step": 253 }, { "epoch": 0.6653517422748192, "high_lr": 0.0008668421052631578, "low_lr": 1.7336842105263157e-05, "step": 253 }, { "epoch": 0.6653517422748192, "high_lr": 0.0008668421052631578, "low_lr": 1.7336842105263157e-05, "step": 253 }, { "epoch": 0.6679815910585142, "grad_norm": 0.9419047236442566, "learning_rate": 0.0008663157894736843, "loss": 1.7317, "step": 254 }, { "epoch": 0.6679815910585142, "high_lr": 0.0008663157894736843, "low_lr": 1.7326315789473685e-05, "step": 254 }, { "epoch": 0.6679815910585142, "high_lr": 0.0008663157894736843, "low_lr": 1.7326315789473685e-05, "step": 254 }, { "epoch": 0.6679815910585142, "high_lr": 0.0008663157894736843, "low_lr": 1.7326315789473685e-05, "step": 254 }, { "epoch": 0.6679815910585142, "high_lr": 0.0008663157894736843, "low_lr": 1.7326315789473685e-05, "step": 254 }, { "epoch": 0.6679815910585142, "high_lr": 0.0008663157894736843, "low_lr": 1.7326315789473685e-05, "step": 254 }, { "epoch": 0.6679815910585142, "high_lr": 0.0008663157894736843, "low_lr": 1.7326315789473685e-05, "step": 254 }, { "epoch": 0.6679815910585142, "high_lr": 0.0008663157894736843, "low_lr": 1.7326315789473685e-05, "step": 254 }, { "epoch": 0.6679815910585142, "high_lr": 0.0008663157894736843, "low_lr": 1.7326315789473685e-05, "step": 254 }, { "epoch": 0.6706114398422091, "grad_norm": 0.8268232941627502, "learning_rate": 0.0008657894736842106, "loss": 1.6354, "step": 255 }, { "epoch": 0.6706114398422091, "high_lr": 0.0008657894736842106, "low_lr": 1.7315789473684213e-05, "step": 255 }, { "epoch": 0.6706114398422091, "high_lr": 0.0008657894736842106, "low_lr": 1.7315789473684213e-05, "step": 255 }, { "epoch": 0.6706114398422091, "high_lr": 0.0008657894736842106, "low_lr": 1.7315789473684213e-05, "step": 255 }, { "epoch": 0.6706114398422091, "high_lr": 0.0008657894736842106, "low_lr": 1.7315789473684213e-05, "step": 255 }, { "epoch": 0.6706114398422091, "high_lr": 0.0008657894736842106, "low_lr": 1.7315789473684213e-05, "step": 255 }, { "epoch": 0.6706114398422091, "high_lr": 0.0008657894736842106, "low_lr": 1.7315789473684213e-05, "step": 255 }, { "epoch": 0.6706114398422091, "high_lr": 0.0008657894736842106, "low_lr": 1.7315789473684213e-05, "step": 255 }, { "epoch": 0.6706114398422091, "high_lr": 0.0008657894736842106, "low_lr": 1.7315789473684213e-05, "step": 255 }, { "epoch": 0.673241288625904, "grad_norm": 0.8773369193077087, "learning_rate": 0.0008652631578947369, "loss": 1.6979, "step": 256 }, { "epoch": 0.673241288625904, "high_lr": 0.0008652631578947369, "low_lr": 1.7305263157894738e-05, "step": 256 }, { "epoch": 0.673241288625904, "high_lr": 0.0008652631578947369, "low_lr": 1.7305263157894738e-05, "step": 256 }, { "epoch": 0.673241288625904, "high_lr": 0.0008652631578947369, "low_lr": 1.7305263157894738e-05, "step": 256 }, { "epoch": 0.673241288625904, "high_lr": 0.0008652631578947369, "low_lr": 1.7305263157894738e-05, "step": 256 }, { "epoch": 0.673241288625904, "high_lr": 0.0008652631578947369, "low_lr": 1.7305263157894738e-05, "step": 256 }, { "epoch": 0.673241288625904, "high_lr": 0.0008652631578947369, "low_lr": 1.7305263157894738e-05, "step": 256 }, { "epoch": 0.673241288625904, "high_lr": 0.0008652631578947369, "low_lr": 1.7305263157894738e-05, "step": 256 }, { "epoch": 0.673241288625904, "high_lr": 0.0008652631578947369, "low_lr": 1.7305263157894738e-05, "step": 256 }, { "epoch": 0.6758711374095989, "grad_norm": 1.0301709175109863, "learning_rate": 0.0008647368421052632, "loss": 1.6949, "step": 257 }, { "epoch": 0.6758711374095989, "high_lr": 0.0008647368421052632, "low_lr": 1.7294736842105263e-05, "step": 257 }, { "epoch": 0.6758711374095989, "high_lr": 0.0008647368421052632, "low_lr": 1.7294736842105263e-05, "step": 257 }, { "epoch": 0.6758711374095989, "high_lr": 0.0008647368421052632, "low_lr": 1.7294736842105263e-05, "step": 257 }, { "epoch": 0.6758711374095989, "high_lr": 0.0008647368421052632, "low_lr": 1.7294736842105263e-05, "step": 257 }, { "epoch": 0.6758711374095989, "high_lr": 0.0008647368421052632, "low_lr": 1.7294736842105263e-05, "step": 257 }, { "epoch": 0.6758711374095989, "high_lr": 0.0008647368421052632, "low_lr": 1.7294736842105263e-05, "step": 257 }, { "epoch": 0.6758711374095989, "high_lr": 0.0008647368421052632, "low_lr": 1.7294736842105263e-05, "step": 257 }, { "epoch": 0.6758711374095989, "high_lr": 0.0008647368421052632, "low_lr": 1.7294736842105263e-05, "step": 257 }, { "epoch": 0.6785009861932939, "grad_norm": 1.0185023546218872, "learning_rate": 0.0008642105263157895, "loss": 1.6434, "step": 258 }, { "epoch": 0.6785009861932939, "high_lr": 0.0008642105263157895, "low_lr": 1.728421052631579e-05, "step": 258 }, { "epoch": 0.6785009861932939, "high_lr": 0.0008642105263157895, "low_lr": 1.728421052631579e-05, "step": 258 }, { "epoch": 0.6785009861932939, "high_lr": 0.0008642105263157895, "low_lr": 1.728421052631579e-05, "step": 258 }, { "epoch": 0.6785009861932939, "high_lr": 0.0008642105263157895, "low_lr": 1.728421052631579e-05, "step": 258 }, { "epoch": 0.6785009861932939, "high_lr": 0.0008642105263157895, "low_lr": 1.728421052631579e-05, "step": 258 }, { "epoch": 0.6785009861932939, "high_lr": 0.0008642105263157895, "low_lr": 1.728421052631579e-05, "step": 258 }, { "epoch": 0.6785009861932939, "high_lr": 0.0008642105263157895, "low_lr": 1.728421052631579e-05, "step": 258 }, { "epoch": 0.6785009861932939, "high_lr": 0.0008642105263157895, "low_lr": 1.728421052631579e-05, "step": 258 }, { "epoch": 0.6811308349769888, "grad_norm": 0.9030142426490784, "learning_rate": 0.0008636842105263159, "loss": 1.6707, "step": 259 }, { "epoch": 0.6811308349769888, "high_lr": 0.0008636842105263159, "low_lr": 1.727368421052632e-05, "step": 259 }, { "epoch": 0.6811308349769888, "high_lr": 0.0008636842105263159, "low_lr": 1.727368421052632e-05, "step": 259 }, { "epoch": 0.6811308349769888, "high_lr": 0.0008636842105263159, "low_lr": 1.727368421052632e-05, "step": 259 }, { "epoch": 0.6811308349769888, "high_lr": 0.0008636842105263159, "low_lr": 1.727368421052632e-05, "step": 259 }, { "epoch": 0.6811308349769888, "high_lr": 0.0008636842105263159, "low_lr": 1.727368421052632e-05, "step": 259 }, { "epoch": 0.6811308349769888, "high_lr": 0.0008636842105263159, "low_lr": 1.727368421052632e-05, "step": 259 }, { "epoch": 0.6811308349769888, "high_lr": 0.0008636842105263159, "low_lr": 1.727368421052632e-05, "step": 259 }, { "epoch": 0.6811308349769888, "high_lr": 0.0008636842105263159, "low_lr": 1.727368421052632e-05, "step": 259 }, { "epoch": 0.6837606837606838, "grad_norm": 0.9696186184883118, "learning_rate": 0.0008631578947368422, "loss": 1.698, "step": 260 }, { "epoch": 0.6837606837606838, "high_lr": 0.0008631578947368422, "low_lr": 1.7263157894736843e-05, "step": 260 }, { "epoch": 0.6837606837606838, "high_lr": 0.0008631578947368422, "low_lr": 1.7263157894736843e-05, "step": 260 }, { "epoch": 0.6837606837606838, "high_lr": 0.0008631578947368422, "low_lr": 1.7263157894736843e-05, "step": 260 }, { "epoch": 0.6837606837606838, "high_lr": 0.0008631578947368422, "low_lr": 1.7263157894736843e-05, "step": 260 }, { "epoch": 0.6837606837606838, "high_lr": 0.0008631578947368422, "low_lr": 1.7263157894736843e-05, "step": 260 }, { "epoch": 0.6837606837606838, "high_lr": 0.0008631578947368422, "low_lr": 1.7263157894736843e-05, "step": 260 }, { "epoch": 0.6837606837606838, "high_lr": 0.0008631578947368422, "low_lr": 1.7263157894736843e-05, "step": 260 }, { "epoch": 0.6837606837606838, "high_lr": 0.0008631578947368422, "low_lr": 1.7263157894736843e-05, "step": 260 }, { "epoch": 0.6863905325443787, "grad_norm": 0.871801495552063, "learning_rate": 0.0008626315789473684, "loss": 1.7287, "step": 261 }, { "epoch": 0.6863905325443787, "high_lr": 0.0008626315789473684, "low_lr": 1.725263157894737e-05, "step": 261 }, { "epoch": 0.6863905325443787, "high_lr": 0.0008626315789473684, "low_lr": 1.725263157894737e-05, "step": 261 }, { "epoch": 0.6863905325443787, "high_lr": 0.0008626315789473684, "low_lr": 1.725263157894737e-05, "step": 261 }, { "epoch": 0.6863905325443787, "high_lr": 0.0008626315789473684, "low_lr": 1.725263157894737e-05, "step": 261 }, { "epoch": 0.6863905325443787, "high_lr": 0.0008626315789473684, "low_lr": 1.725263157894737e-05, "step": 261 }, { "epoch": 0.6863905325443787, "high_lr": 0.0008626315789473684, "low_lr": 1.725263157894737e-05, "step": 261 }, { "epoch": 0.6863905325443787, "high_lr": 0.0008626315789473684, "low_lr": 1.725263157894737e-05, "step": 261 }, { "epoch": 0.6863905325443787, "high_lr": 0.0008626315789473684, "low_lr": 1.725263157894737e-05, "step": 261 }, { "epoch": 0.6890203813280736, "grad_norm": 1.0416325330734253, "learning_rate": 0.0008621052631578947, "loss": 1.7148, "step": 262 }, { "epoch": 0.6890203813280736, "high_lr": 0.0008621052631578947, "low_lr": 1.7242105263157896e-05, "step": 262 }, { "epoch": 0.6890203813280736, "high_lr": 0.0008621052631578947, "low_lr": 1.7242105263157896e-05, "step": 262 }, { "epoch": 0.6890203813280736, "high_lr": 0.0008621052631578947, "low_lr": 1.7242105263157896e-05, "step": 262 }, { "epoch": 0.6890203813280736, "high_lr": 0.0008621052631578947, "low_lr": 1.7242105263157896e-05, "step": 262 }, { "epoch": 0.6890203813280736, "high_lr": 0.0008621052631578947, "low_lr": 1.7242105263157896e-05, "step": 262 }, { "epoch": 0.6890203813280736, "high_lr": 0.0008621052631578947, "low_lr": 1.7242105263157896e-05, "step": 262 }, { "epoch": 0.6890203813280736, "high_lr": 0.0008621052631578947, "low_lr": 1.7242105263157896e-05, "step": 262 }, { "epoch": 0.6890203813280736, "high_lr": 0.0008621052631578947, "low_lr": 1.7242105263157896e-05, "step": 262 }, { "epoch": 0.6916502301117686, "grad_norm": 0.8305730223655701, "learning_rate": 0.000861578947368421, "loss": 1.6686, "step": 263 }, { "epoch": 0.6916502301117686, "high_lr": 0.000861578947368421, "low_lr": 1.723157894736842e-05, "step": 263 }, { "epoch": 0.6916502301117686, "high_lr": 0.000861578947368421, "low_lr": 1.723157894736842e-05, "step": 263 }, { "epoch": 0.6916502301117686, "high_lr": 0.000861578947368421, "low_lr": 1.723157894736842e-05, "step": 263 }, { "epoch": 0.6916502301117686, "high_lr": 0.000861578947368421, "low_lr": 1.723157894736842e-05, "step": 263 }, { "epoch": 0.6916502301117686, "high_lr": 0.000861578947368421, "low_lr": 1.723157894736842e-05, "step": 263 }, { "epoch": 0.6916502301117686, "high_lr": 0.000861578947368421, "low_lr": 1.723157894736842e-05, "step": 263 }, { "epoch": 0.6916502301117686, "high_lr": 0.000861578947368421, "low_lr": 1.723157894736842e-05, "step": 263 }, { "epoch": 0.6916502301117686, "high_lr": 0.000861578947368421, "low_lr": 1.723157894736842e-05, "step": 263 }, { "epoch": 0.6942800788954635, "grad_norm": 1.1706870794296265, "learning_rate": 0.0008610526315789474, "loss": 1.7173, "step": 264 }, { "epoch": 0.6942800788954635, "high_lr": 0.0008610526315789474, "low_lr": 1.722105263157895e-05, "step": 264 }, { "epoch": 0.6942800788954635, "high_lr": 0.0008610526315789474, "low_lr": 1.722105263157895e-05, "step": 264 }, { "epoch": 0.6942800788954635, "high_lr": 0.0008610526315789474, "low_lr": 1.722105263157895e-05, "step": 264 }, { "epoch": 0.6942800788954635, "high_lr": 0.0008610526315789474, "low_lr": 1.722105263157895e-05, "step": 264 }, { "epoch": 0.6942800788954635, "high_lr": 0.0008610526315789474, "low_lr": 1.722105263157895e-05, "step": 264 }, { "epoch": 0.6942800788954635, "high_lr": 0.0008610526315789474, "low_lr": 1.722105263157895e-05, "step": 264 }, { "epoch": 0.6942800788954635, "high_lr": 0.0008610526315789474, "low_lr": 1.722105263157895e-05, "step": 264 }, { "epoch": 0.6942800788954635, "high_lr": 0.0008610526315789474, "low_lr": 1.722105263157895e-05, "step": 264 }, { "epoch": 0.6969099276791585, "grad_norm": 0.9050807952880859, "learning_rate": 0.0008605263157894737, "loss": 1.6596, "step": 265 }, { "epoch": 0.6969099276791585, "high_lr": 0.0008605263157894737, "low_lr": 1.7210526315789477e-05, "step": 265 }, { "epoch": 0.6969099276791585, "high_lr": 0.0008605263157894737, "low_lr": 1.7210526315789477e-05, "step": 265 }, { "epoch": 0.6969099276791585, "high_lr": 0.0008605263157894737, "low_lr": 1.7210526315789477e-05, "step": 265 }, { "epoch": 0.6969099276791585, "high_lr": 0.0008605263157894737, "low_lr": 1.7210526315789477e-05, "step": 265 }, { "epoch": 0.6969099276791585, "high_lr": 0.0008605263157894737, "low_lr": 1.7210526315789477e-05, "step": 265 }, { "epoch": 0.6969099276791585, "high_lr": 0.0008605263157894737, "low_lr": 1.7210526315789477e-05, "step": 265 }, { "epoch": 0.6969099276791585, "high_lr": 0.0008605263157894737, "low_lr": 1.7210526315789477e-05, "step": 265 }, { "epoch": 0.6969099276791585, "high_lr": 0.0008605263157894737, "low_lr": 1.7210526315789477e-05, "step": 265 }, { "epoch": 0.6995397764628534, "grad_norm": 0.9089176058769226, "learning_rate": 0.00086, "loss": 1.6438, "step": 266 }, { "epoch": 0.6995397764628534, "high_lr": 0.00086, "low_lr": 1.72e-05, "step": 266 }, { "epoch": 0.6995397764628534, "high_lr": 0.00086, "low_lr": 1.72e-05, "step": 266 }, { "epoch": 0.6995397764628534, "high_lr": 0.00086, "low_lr": 1.72e-05, "step": 266 }, { "epoch": 0.6995397764628534, "high_lr": 0.00086, "low_lr": 1.72e-05, "step": 266 }, { "epoch": 0.6995397764628534, "high_lr": 0.00086, "low_lr": 1.72e-05, "step": 266 }, { "epoch": 0.6995397764628534, "high_lr": 0.00086, "low_lr": 1.72e-05, "step": 266 }, { "epoch": 0.6995397764628534, "high_lr": 0.00086, "low_lr": 1.72e-05, "step": 266 }, { "epoch": 0.6995397764628534, "high_lr": 0.00086, "low_lr": 1.72e-05, "step": 266 }, { "epoch": 0.7021696252465484, "grad_norm": 1.1428871154785156, "learning_rate": 0.0008594736842105263, "loss": 1.6189, "step": 267 }, { "epoch": 0.7021696252465484, "high_lr": 0.0008594736842105263, "low_lr": 1.7189473684210526e-05, "step": 267 }, { "epoch": 0.7021696252465484, "high_lr": 0.0008594736842105263, "low_lr": 1.7189473684210526e-05, "step": 267 }, { "epoch": 0.7021696252465484, "high_lr": 0.0008594736842105263, "low_lr": 1.7189473684210526e-05, "step": 267 }, { "epoch": 0.7021696252465484, "high_lr": 0.0008594736842105263, "low_lr": 1.7189473684210526e-05, "step": 267 }, { "epoch": 0.7021696252465484, "high_lr": 0.0008594736842105263, "low_lr": 1.7189473684210526e-05, "step": 267 }, { "epoch": 0.7021696252465484, "high_lr": 0.0008594736842105263, "low_lr": 1.7189473684210526e-05, "step": 267 }, { "epoch": 0.7021696252465484, "high_lr": 0.0008594736842105263, "low_lr": 1.7189473684210526e-05, "step": 267 }, { "epoch": 0.7021696252465484, "high_lr": 0.0008594736842105263, "low_lr": 1.7189473684210526e-05, "step": 267 }, { "epoch": 0.7047994740302432, "grad_norm": 0.8949025273323059, "learning_rate": 0.0008589473684210527, "loss": 1.6549, "step": 268 }, { "epoch": 0.7047994740302432, "high_lr": 0.0008589473684210527, "low_lr": 1.7178947368421054e-05, "step": 268 }, { "epoch": 0.7047994740302432, "high_lr": 0.0008589473684210527, "low_lr": 1.7178947368421054e-05, "step": 268 }, { "epoch": 0.7047994740302432, "high_lr": 0.0008589473684210527, "low_lr": 1.7178947368421054e-05, "step": 268 }, { "epoch": 0.7047994740302432, "high_lr": 0.0008589473684210527, "low_lr": 1.7178947368421054e-05, "step": 268 }, { "epoch": 0.7047994740302432, "high_lr": 0.0008589473684210527, "low_lr": 1.7178947368421054e-05, "step": 268 }, { "epoch": 0.7047994740302432, "high_lr": 0.0008589473684210527, "low_lr": 1.7178947368421054e-05, "step": 268 }, { "epoch": 0.7047994740302432, "high_lr": 0.0008589473684210527, "low_lr": 1.7178947368421054e-05, "step": 268 }, { "epoch": 0.7047994740302432, "high_lr": 0.0008589473684210527, "low_lr": 1.7178947368421054e-05, "step": 268 }, { "epoch": 0.7074293228139382, "grad_norm": 0.8947936296463013, "learning_rate": 0.0008584210526315789, "loss": 1.6635, "step": 269 }, { "epoch": 0.7074293228139382, "high_lr": 0.0008584210526315789, "low_lr": 1.7168421052631582e-05, "step": 269 }, { "epoch": 0.7074293228139382, "high_lr": 0.0008584210526315789, "low_lr": 1.7168421052631582e-05, "step": 269 }, { "epoch": 0.7074293228139382, "high_lr": 0.0008584210526315789, "low_lr": 1.7168421052631582e-05, "step": 269 }, { "epoch": 0.7074293228139382, "high_lr": 0.0008584210526315789, "low_lr": 1.7168421052631582e-05, "step": 269 }, { "epoch": 0.7074293228139382, "high_lr": 0.0008584210526315789, "low_lr": 1.7168421052631582e-05, "step": 269 }, { "epoch": 0.7074293228139382, "high_lr": 0.0008584210526315789, "low_lr": 1.7168421052631582e-05, "step": 269 }, { "epoch": 0.7074293228139382, "high_lr": 0.0008584210526315789, "low_lr": 1.7168421052631582e-05, "step": 269 }, { "epoch": 0.7074293228139382, "high_lr": 0.0008584210526315789, "low_lr": 1.7168421052631582e-05, "step": 269 }, { "epoch": 0.7100591715976331, "grad_norm": 0.8887916803359985, "learning_rate": 0.0008578947368421052, "loss": 1.65, "step": 270 }, { "epoch": 0.7100591715976331, "high_lr": 0.0008578947368421052, "low_lr": 1.7157894736842107e-05, "step": 270 }, { "epoch": 0.7100591715976331, "high_lr": 0.0008578947368421052, "low_lr": 1.7157894736842107e-05, "step": 270 }, { "epoch": 0.7100591715976331, "high_lr": 0.0008578947368421052, "low_lr": 1.7157894736842107e-05, "step": 270 }, { "epoch": 0.7100591715976331, "high_lr": 0.0008578947368421052, "low_lr": 1.7157894736842107e-05, "step": 270 }, { "epoch": 0.7100591715976331, "high_lr": 0.0008578947368421052, "low_lr": 1.7157894736842107e-05, "step": 270 }, { "epoch": 0.7100591715976331, "high_lr": 0.0008578947368421052, "low_lr": 1.7157894736842107e-05, "step": 270 }, { "epoch": 0.7100591715976331, "high_lr": 0.0008578947368421052, "low_lr": 1.7157894736842107e-05, "step": 270 }, { "epoch": 0.7100591715976331, "high_lr": 0.0008578947368421052, "low_lr": 1.7157894736842107e-05, "step": 270 }, { "epoch": 0.7126890203813281, "grad_norm": 0.9368761777877808, "learning_rate": 0.0008573684210526316, "loss": 1.6606, "step": 271 }, { "epoch": 0.7126890203813281, "high_lr": 0.0008573684210526316, "low_lr": 1.714736842105263e-05, "step": 271 }, { "epoch": 0.7126890203813281, "high_lr": 0.0008573684210526316, "low_lr": 1.714736842105263e-05, "step": 271 }, { "epoch": 0.7126890203813281, "high_lr": 0.0008573684210526316, "low_lr": 1.714736842105263e-05, "step": 271 }, { "epoch": 0.7126890203813281, "high_lr": 0.0008573684210526316, "low_lr": 1.714736842105263e-05, "step": 271 }, { "epoch": 0.7126890203813281, "high_lr": 0.0008573684210526316, "low_lr": 1.714736842105263e-05, "step": 271 }, { "epoch": 0.7126890203813281, "high_lr": 0.0008573684210526316, "low_lr": 1.714736842105263e-05, "step": 271 }, { "epoch": 0.7126890203813281, "high_lr": 0.0008573684210526316, "low_lr": 1.714736842105263e-05, "step": 271 }, { "epoch": 0.7126890203813281, "high_lr": 0.0008573684210526316, "low_lr": 1.714736842105263e-05, "step": 271 }, { "epoch": 0.715318869165023, "grad_norm": 0.9301985502243042, "learning_rate": 0.0008568421052631579, "loss": 1.637, "step": 272 }, { "epoch": 0.715318869165023, "high_lr": 0.0008568421052631579, "low_lr": 1.713684210526316e-05, "step": 272 }, { "epoch": 0.715318869165023, "high_lr": 0.0008568421052631579, "low_lr": 1.713684210526316e-05, "step": 272 }, { "epoch": 0.715318869165023, "high_lr": 0.0008568421052631579, "low_lr": 1.713684210526316e-05, "step": 272 }, { "epoch": 0.715318869165023, "high_lr": 0.0008568421052631579, "low_lr": 1.713684210526316e-05, "step": 272 }, { "epoch": 0.715318869165023, "high_lr": 0.0008568421052631579, "low_lr": 1.713684210526316e-05, "step": 272 }, { "epoch": 0.715318869165023, "high_lr": 0.0008568421052631579, "low_lr": 1.713684210526316e-05, "step": 272 }, { "epoch": 0.715318869165023, "high_lr": 0.0008568421052631579, "low_lr": 1.713684210526316e-05, "step": 272 }, { "epoch": 0.715318869165023, "high_lr": 0.0008568421052631579, "low_lr": 1.713684210526316e-05, "step": 272 }, { "epoch": 0.717948717948718, "grad_norm": 0.9040118455886841, "learning_rate": 0.0008563157894736843, "loss": 1.624, "step": 273 }, { "epoch": 0.717948717948718, "high_lr": 0.0008563157894736843, "low_lr": 1.7126315789473687e-05, "step": 273 }, { "epoch": 0.717948717948718, "high_lr": 0.0008563157894736843, "low_lr": 1.7126315789473687e-05, "step": 273 }, { "epoch": 0.717948717948718, "high_lr": 0.0008563157894736843, "low_lr": 1.7126315789473687e-05, "step": 273 }, { "epoch": 0.717948717948718, "high_lr": 0.0008563157894736843, "low_lr": 1.7126315789473687e-05, "step": 273 }, { "epoch": 0.717948717948718, "high_lr": 0.0008563157894736843, "low_lr": 1.7126315789473687e-05, "step": 273 }, { "epoch": 0.717948717948718, "high_lr": 0.0008563157894736843, "low_lr": 1.7126315789473687e-05, "step": 273 }, { "epoch": 0.717948717948718, "high_lr": 0.0008563157894736843, "low_lr": 1.7126315789473687e-05, "step": 273 }, { "epoch": 0.717948717948718, "high_lr": 0.0008563157894736843, "low_lr": 1.7126315789473687e-05, "step": 273 }, { "epoch": 0.7205785667324129, "grad_norm": 0.901945173740387, "learning_rate": 0.0008557894736842106, "loss": 1.6794, "step": 274 }, { "epoch": 0.7205785667324129, "high_lr": 0.0008557894736842106, "low_lr": 1.7115789473684212e-05, "step": 274 }, { "epoch": 0.7205785667324129, "high_lr": 0.0008557894736842106, "low_lr": 1.7115789473684212e-05, "step": 274 }, { "epoch": 0.7205785667324129, "high_lr": 0.0008557894736842106, "low_lr": 1.7115789473684212e-05, "step": 274 }, { "epoch": 0.7205785667324129, "high_lr": 0.0008557894736842106, "low_lr": 1.7115789473684212e-05, "step": 274 }, { "epoch": 0.7205785667324129, "high_lr": 0.0008557894736842106, "low_lr": 1.7115789473684212e-05, "step": 274 }, { "epoch": 0.7205785667324129, "high_lr": 0.0008557894736842106, "low_lr": 1.7115789473684212e-05, "step": 274 }, { "epoch": 0.7205785667324129, "high_lr": 0.0008557894736842106, "low_lr": 1.7115789473684212e-05, "step": 274 }, { "epoch": 0.7205785667324129, "high_lr": 0.0008557894736842106, "low_lr": 1.7115789473684212e-05, "step": 274 }, { "epoch": 0.7232084155161078, "grad_norm": 0.8379615545272827, "learning_rate": 0.0008552631578947369, "loss": 1.6236, "step": 275 }, { "epoch": 0.7232084155161078, "high_lr": 0.0008552631578947369, "low_lr": 1.7105263157894737e-05, "step": 275 }, { "epoch": 0.7232084155161078, "high_lr": 0.0008552631578947369, "low_lr": 1.7105263157894737e-05, "step": 275 }, { "epoch": 0.7232084155161078, "high_lr": 0.0008552631578947369, "low_lr": 1.7105263157894737e-05, "step": 275 }, { "epoch": 0.7232084155161078, "high_lr": 0.0008552631578947369, "low_lr": 1.7105263157894737e-05, "step": 275 }, { "epoch": 0.7232084155161078, "high_lr": 0.0008552631578947369, "low_lr": 1.7105263157894737e-05, "step": 275 }, { "epoch": 0.7232084155161078, "high_lr": 0.0008552631578947369, "low_lr": 1.7105263157894737e-05, "step": 275 }, { "epoch": 0.7232084155161078, "high_lr": 0.0008552631578947369, "low_lr": 1.7105263157894737e-05, "step": 275 }, { "epoch": 0.7232084155161078, "high_lr": 0.0008552631578947369, "low_lr": 1.7105263157894737e-05, "step": 275 }, { "epoch": 0.7258382642998028, "grad_norm": 0.8739696145057678, "learning_rate": 0.0008547368421052632, "loss": 1.6327, "step": 276 }, { "epoch": 0.7258382642998028, "high_lr": 0.0008547368421052632, "low_lr": 1.7094736842105265e-05, "step": 276 }, { "epoch": 0.7258382642998028, "high_lr": 0.0008547368421052632, "low_lr": 1.7094736842105265e-05, "step": 276 }, { "epoch": 0.7258382642998028, "high_lr": 0.0008547368421052632, "low_lr": 1.7094736842105265e-05, "step": 276 }, { "epoch": 0.7258382642998028, "high_lr": 0.0008547368421052632, "low_lr": 1.7094736842105265e-05, "step": 276 }, { "epoch": 0.7258382642998028, "high_lr": 0.0008547368421052632, "low_lr": 1.7094736842105265e-05, "step": 276 }, { "epoch": 0.7258382642998028, "high_lr": 0.0008547368421052632, "low_lr": 1.7094736842105265e-05, "step": 276 }, { "epoch": 0.7258382642998028, "high_lr": 0.0008547368421052632, "low_lr": 1.7094736842105265e-05, "step": 276 }, { "epoch": 0.7258382642998028, "high_lr": 0.0008547368421052632, "low_lr": 1.7094736842105265e-05, "step": 276 }, { "epoch": 0.7284681130834977, "grad_norm": 0.8980576992034912, "learning_rate": 0.0008542105263157894, "loss": 1.6419, "step": 277 }, { "epoch": 0.7284681130834977, "high_lr": 0.0008542105263157894, "low_lr": 1.708421052631579e-05, "step": 277 }, { "epoch": 0.7284681130834977, "high_lr": 0.0008542105263157894, "low_lr": 1.708421052631579e-05, "step": 277 }, { "epoch": 0.7284681130834977, "high_lr": 0.0008542105263157894, "low_lr": 1.708421052631579e-05, "step": 277 }, { "epoch": 0.7284681130834977, "high_lr": 0.0008542105263157894, "low_lr": 1.708421052631579e-05, "step": 277 }, { "epoch": 0.7284681130834977, "high_lr": 0.0008542105263157894, "low_lr": 1.708421052631579e-05, "step": 277 }, { "epoch": 0.7284681130834977, "high_lr": 0.0008542105263157894, "low_lr": 1.708421052631579e-05, "step": 277 }, { "epoch": 0.7284681130834977, "high_lr": 0.0008542105263157894, "low_lr": 1.708421052631579e-05, "step": 277 }, { "epoch": 0.7284681130834977, "high_lr": 0.0008542105263157894, "low_lr": 1.708421052631579e-05, "step": 277 }, { "epoch": 0.7310979618671927, "grad_norm": 0.8740023374557495, "learning_rate": 0.0008536842105263158, "loss": 1.6539, "step": 278 }, { "epoch": 0.7310979618671927, "high_lr": 0.0008536842105263158, "low_lr": 1.7073684210526317e-05, "step": 278 }, { "epoch": 0.7310979618671927, "high_lr": 0.0008536842105263158, "low_lr": 1.7073684210526317e-05, "step": 278 }, { "epoch": 0.7310979618671927, "high_lr": 0.0008536842105263158, "low_lr": 1.7073684210526317e-05, "step": 278 }, { "epoch": 0.7310979618671927, "high_lr": 0.0008536842105263158, "low_lr": 1.7073684210526317e-05, "step": 278 }, { "epoch": 0.7310979618671927, "high_lr": 0.0008536842105263158, "low_lr": 1.7073684210526317e-05, "step": 278 }, { "epoch": 0.7310979618671927, "high_lr": 0.0008536842105263158, "low_lr": 1.7073684210526317e-05, "step": 278 }, { "epoch": 0.7310979618671927, "high_lr": 0.0008536842105263158, "low_lr": 1.7073684210526317e-05, "step": 278 }, { "epoch": 0.7310979618671927, "high_lr": 0.0008536842105263158, "low_lr": 1.7073684210526317e-05, "step": 278 }, { "epoch": 0.7337278106508875, "grad_norm": 0.8141024708747864, "learning_rate": 0.0008531578947368421, "loss": 1.6074, "step": 279 }, { "epoch": 0.7337278106508875, "high_lr": 0.0008531578947368421, "low_lr": 1.7063157894736845e-05, "step": 279 }, { "epoch": 0.7337278106508875, "high_lr": 0.0008531578947368421, "low_lr": 1.7063157894736845e-05, "step": 279 }, { "epoch": 0.7337278106508875, "high_lr": 0.0008531578947368421, "low_lr": 1.7063157894736845e-05, "step": 279 }, { "epoch": 0.7337278106508875, "high_lr": 0.0008531578947368421, "low_lr": 1.7063157894736845e-05, "step": 279 }, { "epoch": 0.7337278106508875, "high_lr": 0.0008531578947368421, "low_lr": 1.7063157894736845e-05, "step": 279 }, { "epoch": 0.7337278106508875, "high_lr": 0.0008531578947368421, "low_lr": 1.7063157894736845e-05, "step": 279 }, { "epoch": 0.7337278106508875, "high_lr": 0.0008531578947368421, "low_lr": 1.7063157894736845e-05, "step": 279 }, { "epoch": 0.7337278106508875, "high_lr": 0.0008531578947368421, "low_lr": 1.7063157894736845e-05, "step": 279 }, { "epoch": 0.7363576594345825, "grad_norm": 0.9393441677093506, "learning_rate": 0.0008526315789473684, "loss": 1.663, "step": 280 }, { "epoch": 0.7363576594345825, "high_lr": 0.0008526315789473684, "low_lr": 1.705263157894737e-05, "step": 280 }, { "epoch": 0.7363576594345825, "high_lr": 0.0008526315789473684, "low_lr": 1.705263157894737e-05, "step": 280 }, { "epoch": 0.7363576594345825, "high_lr": 0.0008526315789473684, "low_lr": 1.705263157894737e-05, "step": 280 }, { "epoch": 0.7363576594345825, "high_lr": 0.0008526315789473684, "low_lr": 1.705263157894737e-05, "step": 280 }, { "epoch": 0.7363576594345825, "high_lr": 0.0008526315789473684, "low_lr": 1.705263157894737e-05, "step": 280 }, { "epoch": 0.7363576594345825, "high_lr": 0.0008526315789473684, "low_lr": 1.705263157894737e-05, "step": 280 }, { "epoch": 0.7363576594345825, "high_lr": 0.0008526315789473684, "low_lr": 1.705263157894737e-05, "step": 280 }, { "epoch": 0.7363576594345825, "high_lr": 0.0008526315789473684, "low_lr": 1.705263157894737e-05, "step": 280 }, { "epoch": 0.7389875082182774, "grad_norm": 0.9309095144271851, "learning_rate": 0.0008521052631578947, "loss": 1.675, "step": 281 }, { "epoch": 0.7389875082182774, "high_lr": 0.0008521052631578947, "low_lr": 1.7042105263157895e-05, "step": 281 }, { "epoch": 0.7389875082182774, "high_lr": 0.0008521052631578947, "low_lr": 1.7042105263157895e-05, "step": 281 }, { "epoch": 0.7389875082182774, "high_lr": 0.0008521052631578947, "low_lr": 1.7042105263157895e-05, "step": 281 }, { "epoch": 0.7389875082182774, "high_lr": 0.0008521052631578947, "low_lr": 1.7042105263157895e-05, "step": 281 }, { "epoch": 0.7389875082182774, "high_lr": 0.0008521052631578947, "low_lr": 1.7042105263157895e-05, "step": 281 }, { "epoch": 0.7389875082182774, "high_lr": 0.0008521052631578947, "low_lr": 1.7042105263157895e-05, "step": 281 }, { "epoch": 0.7389875082182774, "high_lr": 0.0008521052631578947, "low_lr": 1.7042105263157895e-05, "step": 281 }, { "epoch": 0.7389875082182774, "high_lr": 0.0008521052631578947, "low_lr": 1.7042105263157895e-05, "step": 281 }, { "epoch": 0.7416173570019724, "grad_norm": 0.9190104603767395, "learning_rate": 0.0008515789473684211, "loss": 1.647, "step": 282 }, { "epoch": 0.7416173570019724, "high_lr": 0.0008515789473684211, "low_lr": 1.7031578947368423e-05, "step": 282 }, { "epoch": 0.7416173570019724, "high_lr": 0.0008515789473684211, "low_lr": 1.7031578947368423e-05, "step": 282 }, { "epoch": 0.7416173570019724, "high_lr": 0.0008515789473684211, "low_lr": 1.7031578947368423e-05, "step": 282 }, { "epoch": 0.7416173570019724, "high_lr": 0.0008515789473684211, "low_lr": 1.7031578947368423e-05, "step": 282 }, { "epoch": 0.7416173570019724, "high_lr": 0.0008515789473684211, "low_lr": 1.7031578947368423e-05, "step": 282 }, { "epoch": 0.7416173570019724, "high_lr": 0.0008515789473684211, "low_lr": 1.7031578947368423e-05, "step": 282 }, { "epoch": 0.7416173570019724, "high_lr": 0.0008515789473684211, "low_lr": 1.7031578947368423e-05, "step": 282 }, { "epoch": 0.7416173570019724, "high_lr": 0.0008515789473684211, "low_lr": 1.7031578947368423e-05, "step": 282 }, { "epoch": 0.7442472057856673, "grad_norm": 0.9044349789619446, "learning_rate": 0.0008510526315789474, "loss": 1.6526, "step": 283 }, { "epoch": 0.7442472057856673, "high_lr": 0.0008510526315789474, "low_lr": 1.702105263157895e-05, "step": 283 }, { "epoch": 0.7442472057856673, "high_lr": 0.0008510526315789474, "low_lr": 1.702105263157895e-05, "step": 283 }, { "epoch": 0.7442472057856673, "high_lr": 0.0008510526315789474, "low_lr": 1.702105263157895e-05, "step": 283 }, { "epoch": 0.7442472057856673, "high_lr": 0.0008510526315789474, "low_lr": 1.702105263157895e-05, "step": 283 }, { "epoch": 0.7442472057856673, "high_lr": 0.0008510526315789474, "low_lr": 1.702105263157895e-05, "step": 283 }, { "epoch": 0.7442472057856673, "high_lr": 0.0008510526315789474, "low_lr": 1.702105263157895e-05, "step": 283 }, { "epoch": 0.7442472057856673, "high_lr": 0.0008510526315789474, "low_lr": 1.702105263157895e-05, "step": 283 }, { "epoch": 0.7442472057856673, "high_lr": 0.0008510526315789474, "low_lr": 1.702105263157895e-05, "step": 283 }, { "epoch": 0.7468770545693623, "grad_norm": 0.8546414971351624, "learning_rate": 0.0008505263157894737, "loss": 1.6029, "step": 284 }, { "epoch": 0.7468770545693623, "high_lr": 0.0008505263157894737, "low_lr": 1.7010526315789475e-05, "step": 284 }, { "epoch": 0.7468770545693623, "high_lr": 0.0008505263157894737, "low_lr": 1.7010526315789475e-05, "step": 284 }, { "epoch": 0.7468770545693623, "high_lr": 0.0008505263157894737, "low_lr": 1.7010526315789475e-05, "step": 284 }, { "epoch": 0.7468770545693623, "high_lr": 0.0008505263157894737, "low_lr": 1.7010526315789475e-05, "step": 284 }, { "epoch": 0.7468770545693623, "high_lr": 0.0008505263157894737, "low_lr": 1.7010526315789475e-05, "step": 284 }, { "epoch": 0.7468770545693623, "high_lr": 0.0008505263157894737, "low_lr": 1.7010526315789475e-05, "step": 284 }, { "epoch": 0.7468770545693623, "high_lr": 0.0008505263157894737, "low_lr": 1.7010526315789475e-05, "step": 284 }, { "epoch": 0.7468770545693623, "high_lr": 0.0008505263157894737, "low_lr": 1.7010526315789475e-05, "step": 284 }, { "epoch": 0.7495069033530573, "grad_norm": 0.9218443036079407, "learning_rate": 0.00085, "loss": 1.6428, "step": 285 }, { "epoch": 0.7495069033530573, "high_lr": 0.00085, "low_lr": 1.7e-05, "step": 285 }, { "epoch": 0.7495069033530573, "high_lr": 0.00085, "low_lr": 1.7e-05, "step": 285 }, { "epoch": 0.7495069033530573, "high_lr": 0.00085, "low_lr": 1.7e-05, "step": 285 }, { "epoch": 0.7495069033530573, "high_lr": 0.00085, "low_lr": 1.7e-05, "step": 285 }, { "epoch": 0.7495069033530573, "high_lr": 0.00085, "low_lr": 1.7e-05, "step": 285 }, { "epoch": 0.7495069033530573, "high_lr": 0.00085, "low_lr": 1.7e-05, "step": 285 }, { "epoch": 0.7495069033530573, "high_lr": 0.00085, "low_lr": 1.7e-05, "step": 285 }, { "epoch": 0.7495069033530573, "high_lr": 0.00085, "low_lr": 1.7e-05, "step": 285 }, { "epoch": 0.7521367521367521, "grad_norm": 0.971239447593689, "learning_rate": 0.0008494736842105262, "loss": 1.6809, "step": 286 }, { "epoch": 0.7521367521367521, "high_lr": 0.0008494736842105262, "low_lr": 1.6989473684210528e-05, "step": 286 }, { "epoch": 0.7521367521367521, "high_lr": 0.0008494736842105262, "low_lr": 1.6989473684210528e-05, "step": 286 }, { "epoch": 0.7521367521367521, "high_lr": 0.0008494736842105262, "low_lr": 1.6989473684210528e-05, "step": 286 }, { "epoch": 0.7521367521367521, "high_lr": 0.0008494736842105262, "low_lr": 1.6989473684210528e-05, "step": 286 }, { "epoch": 0.7521367521367521, "high_lr": 0.0008494736842105262, "low_lr": 1.6989473684210528e-05, "step": 286 }, { "epoch": 0.7521367521367521, "high_lr": 0.0008494736842105262, "low_lr": 1.6989473684210528e-05, "step": 286 }, { "epoch": 0.7521367521367521, "high_lr": 0.0008494736842105262, "low_lr": 1.6989473684210528e-05, "step": 286 }, { "epoch": 0.7521367521367521, "high_lr": 0.0008494736842105262, "low_lr": 1.6989473684210528e-05, "step": 286 }, { "epoch": 0.7547666009204471, "grad_norm": 0.8431284427642822, "learning_rate": 0.0008489473684210527, "loss": 1.6044, "step": 287 }, { "epoch": 0.7547666009204471, "high_lr": 0.0008489473684210527, "low_lr": 1.6978947368421056e-05, "step": 287 }, { "epoch": 0.7547666009204471, "high_lr": 0.0008489473684210527, "low_lr": 1.6978947368421056e-05, "step": 287 }, { "epoch": 0.7547666009204471, "high_lr": 0.0008489473684210527, "low_lr": 1.6978947368421056e-05, "step": 287 }, { "epoch": 0.7547666009204471, "high_lr": 0.0008489473684210527, "low_lr": 1.6978947368421056e-05, "step": 287 }, { "epoch": 0.7547666009204471, "high_lr": 0.0008489473684210527, "low_lr": 1.6978947368421056e-05, "step": 287 }, { "epoch": 0.7547666009204471, "high_lr": 0.0008489473684210527, "low_lr": 1.6978947368421056e-05, "step": 287 }, { "epoch": 0.7547666009204471, "high_lr": 0.0008489473684210527, "low_lr": 1.6978947368421056e-05, "step": 287 }, { "epoch": 0.7547666009204471, "high_lr": 0.0008489473684210527, "low_lr": 1.6978947368421056e-05, "step": 287 }, { "epoch": 0.757396449704142, "grad_norm": 0.9022310972213745, "learning_rate": 0.000848421052631579, "loss": 1.6496, "step": 288 }, { "epoch": 0.757396449704142, "high_lr": 0.000848421052631579, "low_lr": 1.696842105263158e-05, "step": 288 }, { "epoch": 0.757396449704142, "high_lr": 0.000848421052631579, "low_lr": 1.696842105263158e-05, "step": 288 }, { "epoch": 0.757396449704142, "high_lr": 0.000848421052631579, "low_lr": 1.696842105263158e-05, "step": 288 }, { "epoch": 0.757396449704142, "high_lr": 0.000848421052631579, "low_lr": 1.696842105263158e-05, "step": 288 }, { "epoch": 0.757396449704142, "high_lr": 0.000848421052631579, "low_lr": 1.696842105263158e-05, "step": 288 }, { "epoch": 0.757396449704142, "high_lr": 0.000848421052631579, "low_lr": 1.696842105263158e-05, "step": 288 }, { "epoch": 0.757396449704142, "high_lr": 0.000848421052631579, "low_lr": 1.696842105263158e-05, "step": 288 }, { "epoch": 0.757396449704142, "high_lr": 0.000848421052631579, "low_lr": 1.696842105263158e-05, "step": 288 }, { "epoch": 0.760026298487837, "grad_norm": 0.9277114868164062, "learning_rate": 0.0008478947368421053, "loss": 1.6554, "step": 289 }, { "epoch": 0.760026298487837, "high_lr": 0.0008478947368421053, "low_lr": 1.6957894736842105e-05, "step": 289 }, { "epoch": 0.760026298487837, "high_lr": 0.0008478947368421053, "low_lr": 1.6957894736842105e-05, "step": 289 }, { "epoch": 0.760026298487837, "high_lr": 0.0008478947368421053, "low_lr": 1.6957894736842105e-05, "step": 289 }, { "epoch": 0.760026298487837, "high_lr": 0.0008478947368421053, "low_lr": 1.6957894736842105e-05, "step": 289 }, { "epoch": 0.760026298487837, "high_lr": 0.0008478947368421053, "low_lr": 1.6957894736842105e-05, "step": 289 }, { "epoch": 0.760026298487837, "high_lr": 0.0008478947368421053, "low_lr": 1.6957894736842105e-05, "step": 289 }, { "epoch": 0.760026298487837, "high_lr": 0.0008478947368421053, "low_lr": 1.6957894736842105e-05, "step": 289 }, { "epoch": 0.760026298487837, "high_lr": 0.0008478947368421053, "low_lr": 1.6957894736842105e-05, "step": 289 }, { "epoch": 0.7626561472715319, "grad_norm": 1.0391700267791748, "learning_rate": 0.0008473684210526316, "loss": 1.6826, "step": 290 }, { "epoch": 0.7626561472715319, "high_lr": 0.0008473684210526316, "low_lr": 1.6947368421052633e-05, "step": 290 }, { "epoch": 0.7626561472715319, "high_lr": 0.0008473684210526316, "low_lr": 1.6947368421052633e-05, "step": 290 }, { "epoch": 0.7626561472715319, "high_lr": 0.0008473684210526316, "low_lr": 1.6947368421052633e-05, "step": 290 }, { "epoch": 0.7626561472715319, "high_lr": 0.0008473684210526316, "low_lr": 1.6947368421052633e-05, "step": 290 }, { "epoch": 0.7626561472715319, "high_lr": 0.0008473684210526316, "low_lr": 1.6947368421052633e-05, "step": 290 }, { "epoch": 0.7626561472715319, "high_lr": 0.0008473684210526316, "low_lr": 1.6947368421052633e-05, "step": 290 }, { "epoch": 0.7626561472715319, "high_lr": 0.0008473684210526316, "low_lr": 1.6947368421052633e-05, "step": 290 }, { "epoch": 0.7626561472715319, "high_lr": 0.0008473684210526316, "low_lr": 1.6947368421052633e-05, "step": 290 }, { "epoch": 0.7652859960552268, "grad_norm": 0.8468443751335144, "learning_rate": 0.0008468421052631579, "loss": 1.6082, "step": 291 }, { "epoch": 0.7652859960552268, "high_lr": 0.0008468421052631579, "low_lr": 1.6936842105263158e-05, "step": 291 }, { "epoch": 0.7652859960552268, "high_lr": 0.0008468421052631579, "low_lr": 1.6936842105263158e-05, "step": 291 }, { "epoch": 0.7652859960552268, "high_lr": 0.0008468421052631579, "low_lr": 1.6936842105263158e-05, "step": 291 }, { "epoch": 0.7652859960552268, "high_lr": 0.0008468421052631579, "low_lr": 1.6936842105263158e-05, "step": 291 }, { "epoch": 0.7652859960552268, "high_lr": 0.0008468421052631579, "low_lr": 1.6936842105263158e-05, "step": 291 }, { "epoch": 0.7652859960552268, "high_lr": 0.0008468421052631579, "low_lr": 1.6936842105263158e-05, "step": 291 }, { "epoch": 0.7652859960552268, "high_lr": 0.0008468421052631579, "low_lr": 1.6936842105263158e-05, "step": 291 }, { "epoch": 0.7652859960552268, "high_lr": 0.0008468421052631579, "low_lr": 1.6936842105263158e-05, "step": 291 }, { "epoch": 0.7679158448389217, "grad_norm": 0.8624788522720337, "learning_rate": 0.0008463157894736843, "loss": 1.6122, "step": 292 }, { "epoch": 0.7679158448389217, "high_lr": 0.0008463157894736843, "low_lr": 1.6926315789473686e-05, "step": 292 }, { "epoch": 0.7679158448389217, "high_lr": 0.0008463157894736843, "low_lr": 1.6926315789473686e-05, "step": 292 }, { "epoch": 0.7679158448389217, "high_lr": 0.0008463157894736843, "low_lr": 1.6926315789473686e-05, "step": 292 }, { "epoch": 0.7679158448389217, "high_lr": 0.0008463157894736843, "low_lr": 1.6926315789473686e-05, "step": 292 }, { "epoch": 0.7679158448389217, "high_lr": 0.0008463157894736843, "low_lr": 1.6926315789473686e-05, "step": 292 }, { "epoch": 0.7679158448389217, "high_lr": 0.0008463157894736843, "low_lr": 1.6926315789473686e-05, "step": 292 }, { "epoch": 0.7679158448389217, "high_lr": 0.0008463157894736843, "low_lr": 1.6926315789473686e-05, "step": 292 }, { "epoch": 0.7679158448389217, "high_lr": 0.0008463157894736843, "low_lr": 1.6926315789473686e-05, "step": 292 }, { "epoch": 0.7705456936226167, "grad_norm": 0.940320611000061, "learning_rate": 0.0008457894736842106, "loss": 1.5794, "step": 293 }, { "epoch": 0.7705456936226167, "high_lr": 0.0008457894736842106, "low_lr": 1.691578947368421e-05, "step": 293 }, { "epoch": 0.7705456936226167, "high_lr": 0.0008457894736842106, "low_lr": 1.691578947368421e-05, "step": 293 }, { "epoch": 0.7705456936226167, "high_lr": 0.0008457894736842106, "low_lr": 1.691578947368421e-05, "step": 293 }, { "epoch": 0.7705456936226167, "high_lr": 0.0008457894736842106, "low_lr": 1.691578947368421e-05, "step": 293 }, { "epoch": 0.7705456936226167, "high_lr": 0.0008457894736842106, "low_lr": 1.691578947368421e-05, "step": 293 }, { "epoch": 0.7705456936226167, "high_lr": 0.0008457894736842106, "low_lr": 1.691578947368421e-05, "step": 293 }, { "epoch": 0.7705456936226167, "high_lr": 0.0008457894736842106, "low_lr": 1.691578947368421e-05, "step": 293 }, { "epoch": 0.7705456936226167, "high_lr": 0.0008457894736842106, "low_lr": 1.691578947368421e-05, "step": 293 }, { "epoch": 0.7731755424063116, "grad_norm": 0.9850048422813416, "learning_rate": 0.0008452631578947369, "loss": 1.677, "step": 294 }, { "epoch": 0.7731755424063116, "high_lr": 0.0008452631578947369, "low_lr": 1.690526315789474e-05, "step": 294 }, { "epoch": 0.7731755424063116, "high_lr": 0.0008452631578947369, "low_lr": 1.690526315789474e-05, "step": 294 }, { "epoch": 0.7731755424063116, "high_lr": 0.0008452631578947369, "low_lr": 1.690526315789474e-05, "step": 294 }, { "epoch": 0.7731755424063116, "high_lr": 0.0008452631578947369, "low_lr": 1.690526315789474e-05, "step": 294 }, { "epoch": 0.7731755424063116, "high_lr": 0.0008452631578947369, "low_lr": 1.690526315789474e-05, "step": 294 }, { "epoch": 0.7731755424063116, "high_lr": 0.0008452631578947369, "low_lr": 1.690526315789474e-05, "step": 294 }, { "epoch": 0.7731755424063116, "high_lr": 0.0008452631578947369, "low_lr": 1.690526315789474e-05, "step": 294 }, { "epoch": 0.7731755424063116, "high_lr": 0.0008452631578947369, "low_lr": 1.690526315789474e-05, "step": 294 }, { "epoch": 0.7758053911900066, "grad_norm": 0.8709503412246704, "learning_rate": 0.0008447368421052631, "loss": 1.5626, "step": 295 }, { "epoch": 0.7758053911900066, "high_lr": 0.0008447368421052631, "low_lr": 1.6894736842105263e-05, "step": 295 }, { "epoch": 0.7758053911900066, "high_lr": 0.0008447368421052631, "low_lr": 1.6894736842105263e-05, "step": 295 }, { "epoch": 0.7758053911900066, "high_lr": 0.0008447368421052631, "low_lr": 1.6894736842105263e-05, "step": 295 }, { "epoch": 0.7758053911900066, "high_lr": 0.0008447368421052631, "low_lr": 1.6894736842105263e-05, "step": 295 }, { "epoch": 0.7758053911900066, "high_lr": 0.0008447368421052631, "low_lr": 1.6894736842105263e-05, "step": 295 }, { "epoch": 0.7758053911900066, "high_lr": 0.0008447368421052631, "low_lr": 1.6894736842105263e-05, "step": 295 }, { "epoch": 0.7758053911900066, "high_lr": 0.0008447368421052631, "low_lr": 1.6894736842105263e-05, "step": 295 }, { "epoch": 0.7758053911900066, "high_lr": 0.0008447368421052631, "low_lr": 1.6894736842105263e-05, "step": 295 }, { "epoch": 0.7784352399737016, "grad_norm": 0.8435001969337463, "learning_rate": 0.0008442105263157895, "loss": 1.5553, "step": 296 }, { "epoch": 0.7784352399737016, "high_lr": 0.0008442105263157895, "low_lr": 1.688421052631579e-05, "step": 296 }, { "epoch": 0.7784352399737016, "high_lr": 0.0008442105263157895, "low_lr": 1.688421052631579e-05, "step": 296 }, { "epoch": 0.7784352399737016, "high_lr": 0.0008442105263157895, "low_lr": 1.688421052631579e-05, "step": 296 }, { "epoch": 0.7784352399737016, "high_lr": 0.0008442105263157895, "low_lr": 1.688421052631579e-05, "step": 296 }, { "epoch": 0.7784352399737016, "high_lr": 0.0008442105263157895, "low_lr": 1.688421052631579e-05, "step": 296 }, { "epoch": 0.7784352399737016, "high_lr": 0.0008442105263157895, "low_lr": 1.688421052631579e-05, "step": 296 }, { "epoch": 0.7784352399737016, "high_lr": 0.0008442105263157895, "low_lr": 1.688421052631579e-05, "step": 296 }, { "epoch": 0.7784352399737016, "high_lr": 0.0008442105263157895, "low_lr": 1.688421052631579e-05, "step": 296 }, { "epoch": 0.7810650887573964, "grad_norm": 0.8458621501922607, "learning_rate": 0.0008436842105263158, "loss": 1.6561, "step": 297 }, { "epoch": 0.7810650887573964, "high_lr": 0.0008436842105263158, "low_lr": 1.687368421052632e-05, "step": 297 }, { "epoch": 0.7810650887573964, "high_lr": 0.0008436842105263158, "low_lr": 1.687368421052632e-05, "step": 297 }, { "epoch": 0.7810650887573964, "high_lr": 0.0008436842105263158, "low_lr": 1.687368421052632e-05, "step": 297 }, { "epoch": 0.7810650887573964, "high_lr": 0.0008436842105263158, "low_lr": 1.687368421052632e-05, "step": 297 }, { "epoch": 0.7810650887573964, "high_lr": 0.0008436842105263158, "low_lr": 1.687368421052632e-05, "step": 297 }, { "epoch": 0.7810650887573964, "high_lr": 0.0008436842105263158, "low_lr": 1.687368421052632e-05, "step": 297 }, { "epoch": 0.7810650887573964, "high_lr": 0.0008436842105263158, "low_lr": 1.687368421052632e-05, "step": 297 }, { "epoch": 0.7810650887573964, "high_lr": 0.0008436842105263158, "low_lr": 1.687368421052632e-05, "step": 297 }, { "epoch": 0.7836949375410914, "grad_norm": 0.893551766872406, "learning_rate": 0.0008431578947368421, "loss": 1.6, "step": 298 }, { "epoch": 0.7836949375410914, "high_lr": 0.0008431578947368421, "low_lr": 1.6863157894736844e-05, "step": 298 }, { "epoch": 0.7836949375410914, "high_lr": 0.0008431578947368421, "low_lr": 1.6863157894736844e-05, "step": 298 }, { "epoch": 0.7836949375410914, "high_lr": 0.0008431578947368421, "low_lr": 1.6863157894736844e-05, "step": 298 }, { "epoch": 0.7836949375410914, "high_lr": 0.0008431578947368421, "low_lr": 1.6863157894736844e-05, "step": 298 }, { "epoch": 0.7836949375410914, "high_lr": 0.0008431578947368421, "low_lr": 1.6863157894736844e-05, "step": 298 }, { "epoch": 0.7836949375410914, "high_lr": 0.0008431578947368421, "low_lr": 1.6863157894736844e-05, "step": 298 }, { "epoch": 0.7836949375410914, "high_lr": 0.0008431578947368421, "low_lr": 1.6863157894736844e-05, "step": 298 }, { "epoch": 0.7836949375410914, "high_lr": 0.0008431578947368421, "low_lr": 1.6863157894736844e-05, "step": 298 }, { "epoch": 0.7863247863247863, "grad_norm": 0.9290257096290588, "learning_rate": 0.0008426315789473684, "loss": 1.6117, "step": 299 }, { "epoch": 0.7863247863247863, "high_lr": 0.0008426315789473684, "low_lr": 1.685263157894737e-05, "step": 299 }, { "epoch": 0.7863247863247863, "high_lr": 0.0008426315789473684, "low_lr": 1.685263157894737e-05, "step": 299 }, { "epoch": 0.7863247863247863, "high_lr": 0.0008426315789473684, "low_lr": 1.685263157894737e-05, "step": 299 }, { "epoch": 0.7863247863247863, "high_lr": 0.0008426315789473684, "low_lr": 1.685263157894737e-05, "step": 299 }, { "epoch": 0.7863247863247863, "high_lr": 0.0008426315789473684, "low_lr": 1.685263157894737e-05, "step": 299 }, { "epoch": 0.7863247863247863, "high_lr": 0.0008426315789473684, "low_lr": 1.685263157894737e-05, "step": 299 }, { "epoch": 0.7863247863247863, "high_lr": 0.0008426315789473684, "low_lr": 1.685263157894737e-05, "step": 299 }, { "epoch": 0.7863247863247863, "high_lr": 0.0008426315789473684, "low_lr": 1.685263157894737e-05, "step": 299 }, { "epoch": 0.7889546351084813, "grad_norm": 0.9197156429290771, "learning_rate": 0.0008421052631578947, "loss": 1.6593, "step": 300 }, { "epoch": 0.7889546351084813, "high_lr": 0.0008421052631578947, "low_lr": 1.6842105263157896e-05, "step": 300 }, { "epoch": 0.7889546351084813, "high_lr": 0.0008421052631578947, "low_lr": 1.6842105263157896e-05, "step": 300 }, { "epoch": 0.7889546351084813, "high_lr": 0.0008421052631578947, "low_lr": 1.6842105263157896e-05, "step": 300 }, { "epoch": 0.7889546351084813, "high_lr": 0.0008421052631578947, "low_lr": 1.6842105263157896e-05, "step": 300 }, { "epoch": 0.7889546351084813, "high_lr": 0.0008421052631578947, "low_lr": 1.6842105263157896e-05, "step": 300 }, { "epoch": 0.7889546351084813, "high_lr": 0.0008421052631578947, "low_lr": 1.6842105263157896e-05, "step": 300 }, { "epoch": 0.7889546351084813, "high_lr": 0.0008421052631578947, "low_lr": 1.6842105263157896e-05, "step": 300 }, { "epoch": 0.7889546351084813, "high_lr": 0.0008421052631578947, "low_lr": 1.6842105263157896e-05, "step": 300 }, { "epoch": 0.7915844838921762, "grad_norm": 1.0398555994033813, "learning_rate": 0.0008415789473684211, "loss": 1.6858, "step": 301 }, { "epoch": 0.7915844838921762, "high_lr": 0.0008415789473684211, "low_lr": 1.6831578947368424e-05, "step": 301 }, { "epoch": 0.7915844838921762, "high_lr": 0.0008415789473684211, "low_lr": 1.6831578947368424e-05, "step": 301 }, { "epoch": 0.7915844838921762, "high_lr": 0.0008415789473684211, "low_lr": 1.6831578947368424e-05, "step": 301 }, { "epoch": 0.7915844838921762, "high_lr": 0.0008415789473684211, "low_lr": 1.6831578947368424e-05, "step": 301 }, { "epoch": 0.7915844838921762, "high_lr": 0.0008415789473684211, "low_lr": 1.6831578947368424e-05, "step": 301 }, { "epoch": 0.7915844838921762, "high_lr": 0.0008415789473684211, "low_lr": 1.6831578947368424e-05, "step": 301 }, { "epoch": 0.7915844838921762, "high_lr": 0.0008415789473684211, "low_lr": 1.6831578947368424e-05, "step": 301 }, { "epoch": 0.7915844838921762, "high_lr": 0.0008415789473684211, "low_lr": 1.6831578947368424e-05, "step": 301 }, { "epoch": 0.7942143326758712, "grad_norm": 0.9343593716621399, "learning_rate": 0.0008410526315789474, "loss": 1.6419, "step": 302 }, { "epoch": 0.7942143326758712, "high_lr": 0.0008410526315789474, "low_lr": 1.682105263157895e-05, "step": 302 }, { "epoch": 0.7942143326758712, "high_lr": 0.0008410526315789474, "low_lr": 1.682105263157895e-05, "step": 302 }, { "epoch": 0.7942143326758712, "high_lr": 0.0008410526315789474, "low_lr": 1.682105263157895e-05, "step": 302 }, { "epoch": 0.7942143326758712, "high_lr": 0.0008410526315789474, "low_lr": 1.682105263157895e-05, "step": 302 }, { "epoch": 0.7942143326758712, "high_lr": 0.0008410526315789474, "low_lr": 1.682105263157895e-05, "step": 302 }, { "epoch": 0.7942143326758712, "high_lr": 0.0008410526315789474, "low_lr": 1.682105263157895e-05, "step": 302 }, { "epoch": 0.7942143326758712, "high_lr": 0.0008410526315789474, "low_lr": 1.682105263157895e-05, "step": 302 }, { "epoch": 0.7942143326758712, "high_lr": 0.0008410526315789474, "low_lr": 1.682105263157895e-05, "step": 302 }, { "epoch": 0.796844181459566, "grad_norm": 0.9294910430908203, "learning_rate": 0.0008405263157894736, "loss": 1.6051, "step": 303 }, { "epoch": 0.796844181459566, "high_lr": 0.0008405263157894736, "low_lr": 1.6810526315789474e-05, "step": 303 }, { "epoch": 0.796844181459566, "high_lr": 0.0008405263157894736, "low_lr": 1.6810526315789474e-05, "step": 303 }, { "epoch": 0.796844181459566, "high_lr": 0.0008405263157894736, "low_lr": 1.6810526315789474e-05, "step": 303 }, { "epoch": 0.796844181459566, "high_lr": 0.0008405263157894736, "low_lr": 1.6810526315789474e-05, "step": 303 }, { "epoch": 0.796844181459566, "high_lr": 0.0008405263157894736, "low_lr": 1.6810526315789474e-05, "step": 303 }, { "epoch": 0.796844181459566, "high_lr": 0.0008405263157894736, "low_lr": 1.6810526315789474e-05, "step": 303 }, { "epoch": 0.796844181459566, "high_lr": 0.0008405263157894736, "low_lr": 1.6810526315789474e-05, "step": 303 }, { "epoch": 0.796844181459566, "high_lr": 0.0008405263157894736, "low_lr": 1.6810526315789474e-05, "step": 303 }, { "epoch": 0.799474030243261, "grad_norm": 0.9193534851074219, "learning_rate": 0.00084, "loss": 1.6315, "step": 304 }, { "epoch": 0.799474030243261, "high_lr": 0.00084, "low_lr": 1.6800000000000002e-05, "step": 304 }, { "epoch": 0.799474030243261, "high_lr": 0.00084, "low_lr": 1.6800000000000002e-05, "step": 304 }, { "epoch": 0.799474030243261, "high_lr": 0.00084, "low_lr": 1.6800000000000002e-05, "step": 304 }, { "epoch": 0.799474030243261, "high_lr": 0.00084, "low_lr": 1.6800000000000002e-05, "step": 304 }, { "epoch": 0.799474030243261, "high_lr": 0.00084, "low_lr": 1.6800000000000002e-05, "step": 304 }, { "epoch": 0.799474030243261, "high_lr": 0.00084, "low_lr": 1.6800000000000002e-05, "step": 304 }, { "epoch": 0.799474030243261, "high_lr": 0.00084, "low_lr": 1.6800000000000002e-05, "step": 304 }, { "epoch": 0.799474030243261, "high_lr": 0.00084, "low_lr": 1.6800000000000002e-05, "step": 304 }, { "epoch": 0.8021038790269559, "grad_norm": 0.8686949014663696, "learning_rate": 0.0008394736842105263, "loss": 1.589, "step": 305 }, { "epoch": 0.8021038790269559, "high_lr": 0.0008394736842105263, "low_lr": 1.6789473684210526e-05, "step": 305 }, { "epoch": 0.8021038790269559, "high_lr": 0.0008394736842105263, "low_lr": 1.6789473684210526e-05, "step": 305 }, { "epoch": 0.8021038790269559, "high_lr": 0.0008394736842105263, "low_lr": 1.6789473684210526e-05, "step": 305 }, { "epoch": 0.8021038790269559, "high_lr": 0.0008394736842105263, "low_lr": 1.6789473684210526e-05, "step": 305 }, { "epoch": 0.8021038790269559, "high_lr": 0.0008394736842105263, "low_lr": 1.6789473684210526e-05, "step": 305 }, { "epoch": 0.8021038790269559, "high_lr": 0.0008394736842105263, "low_lr": 1.6789473684210526e-05, "step": 305 }, { "epoch": 0.8021038790269559, "high_lr": 0.0008394736842105263, "low_lr": 1.6789473684210526e-05, "step": 305 }, { "epoch": 0.8021038790269559, "high_lr": 0.0008394736842105263, "low_lr": 1.6789473684210526e-05, "step": 305 }, { "epoch": 0.8047337278106509, "grad_norm": 0.892025887966156, "learning_rate": 0.0008389473684210527, "loss": 1.5995, "step": 306 }, { "epoch": 0.8047337278106509, "high_lr": 0.0008389473684210527, "low_lr": 1.6778947368421054e-05, "step": 306 }, { "epoch": 0.8047337278106509, "high_lr": 0.0008389473684210527, "low_lr": 1.6778947368421054e-05, "step": 306 }, { "epoch": 0.8047337278106509, "high_lr": 0.0008389473684210527, "low_lr": 1.6778947368421054e-05, "step": 306 }, { "epoch": 0.8047337278106509, "high_lr": 0.0008389473684210527, "low_lr": 1.6778947368421054e-05, "step": 306 }, { "epoch": 0.8047337278106509, "high_lr": 0.0008389473684210527, "low_lr": 1.6778947368421054e-05, "step": 306 }, { "epoch": 0.8047337278106509, "high_lr": 0.0008389473684210527, "low_lr": 1.6778947368421054e-05, "step": 306 }, { "epoch": 0.8047337278106509, "high_lr": 0.0008389473684210527, "low_lr": 1.6778947368421054e-05, "step": 306 }, { "epoch": 0.8047337278106509, "high_lr": 0.0008389473684210527, "low_lr": 1.6778947368421054e-05, "step": 306 }, { "epoch": 0.8073635765943459, "grad_norm": 0.9276682734489441, "learning_rate": 0.000838421052631579, "loss": 1.6165, "step": 307 }, { "epoch": 0.8073635765943459, "high_lr": 0.000838421052631579, "low_lr": 1.676842105263158e-05, "step": 307 }, { "epoch": 0.8073635765943459, "high_lr": 0.000838421052631579, "low_lr": 1.676842105263158e-05, "step": 307 }, { "epoch": 0.8073635765943459, "high_lr": 0.000838421052631579, "low_lr": 1.676842105263158e-05, "step": 307 }, { "epoch": 0.8073635765943459, "high_lr": 0.000838421052631579, "low_lr": 1.676842105263158e-05, "step": 307 }, { "epoch": 0.8073635765943459, "high_lr": 0.000838421052631579, "low_lr": 1.676842105263158e-05, "step": 307 }, { "epoch": 0.8073635765943459, "high_lr": 0.000838421052631579, "low_lr": 1.676842105263158e-05, "step": 307 }, { "epoch": 0.8073635765943459, "high_lr": 0.000838421052631579, "low_lr": 1.676842105263158e-05, "step": 307 }, { "epoch": 0.8073635765943459, "high_lr": 0.000838421052631579, "low_lr": 1.676842105263158e-05, "step": 307 }, { "epoch": 0.8099934253780408, "grad_norm": 0.9737101197242737, "learning_rate": 0.0008378947368421053, "loss": 1.6376, "step": 308 }, { "epoch": 0.8099934253780408, "high_lr": 0.0008378947368421053, "low_lr": 1.6757894736842107e-05, "step": 308 }, { "epoch": 0.8099934253780408, "high_lr": 0.0008378947368421053, "low_lr": 1.6757894736842107e-05, "step": 308 }, { "epoch": 0.8099934253780408, "high_lr": 0.0008378947368421053, "low_lr": 1.6757894736842107e-05, "step": 308 }, { "epoch": 0.8099934253780408, "high_lr": 0.0008378947368421053, "low_lr": 1.6757894736842107e-05, "step": 308 }, { "epoch": 0.8099934253780408, "high_lr": 0.0008378947368421053, "low_lr": 1.6757894736842107e-05, "step": 308 }, { "epoch": 0.8099934253780408, "high_lr": 0.0008378947368421053, "low_lr": 1.6757894736842107e-05, "step": 308 }, { "epoch": 0.8099934253780408, "high_lr": 0.0008378947368421053, "low_lr": 1.6757894736842107e-05, "step": 308 }, { "epoch": 0.8099934253780408, "high_lr": 0.0008378947368421053, "low_lr": 1.6757894736842107e-05, "step": 308 }, { "epoch": 0.8126232741617357, "grad_norm": 0.915702223777771, "learning_rate": 0.0008373684210526316, "loss": 1.6523, "step": 309 }, { "epoch": 0.8126232741617357, "high_lr": 0.0008373684210526316, "low_lr": 1.6747368421052632e-05, "step": 309 }, { "epoch": 0.8126232741617357, "high_lr": 0.0008373684210526316, "low_lr": 1.6747368421052632e-05, "step": 309 }, { "epoch": 0.8126232741617357, "high_lr": 0.0008373684210526316, "low_lr": 1.6747368421052632e-05, "step": 309 }, { "epoch": 0.8126232741617357, "high_lr": 0.0008373684210526316, "low_lr": 1.6747368421052632e-05, "step": 309 }, { "epoch": 0.8126232741617357, "high_lr": 0.0008373684210526316, "low_lr": 1.6747368421052632e-05, "step": 309 }, { "epoch": 0.8126232741617357, "high_lr": 0.0008373684210526316, "low_lr": 1.6747368421052632e-05, "step": 309 }, { "epoch": 0.8126232741617357, "high_lr": 0.0008373684210526316, "low_lr": 1.6747368421052632e-05, "step": 309 }, { "epoch": 0.8126232741617357, "high_lr": 0.0008373684210526316, "low_lr": 1.6747368421052632e-05, "step": 309 }, { "epoch": 0.8152531229454306, "grad_norm": 0.9162072539329529, "learning_rate": 0.000836842105263158, "loss": 1.5835, "step": 310 }, { "epoch": 0.8152531229454306, "high_lr": 0.000836842105263158, "low_lr": 1.673684210526316e-05, "step": 310 }, { "epoch": 0.8152531229454306, "high_lr": 0.000836842105263158, "low_lr": 1.673684210526316e-05, "step": 310 }, { "epoch": 0.8152531229454306, "high_lr": 0.000836842105263158, "low_lr": 1.673684210526316e-05, "step": 310 }, { "epoch": 0.8152531229454306, "high_lr": 0.000836842105263158, "low_lr": 1.673684210526316e-05, "step": 310 }, { "epoch": 0.8152531229454306, "high_lr": 0.000836842105263158, "low_lr": 1.673684210526316e-05, "step": 310 }, { "epoch": 0.8152531229454306, "high_lr": 0.000836842105263158, "low_lr": 1.673684210526316e-05, "step": 310 }, { "epoch": 0.8152531229454306, "high_lr": 0.000836842105263158, "low_lr": 1.673684210526316e-05, "step": 310 }, { "epoch": 0.8152531229454306, "high_lr": 0.000836842105263158, "low_lr": 1.673684210526316e-05, "step": 310 }, { "epoch": 0.8178829717291256, "grad_norm": 0.8940391540527344, "learning_rate": 0.0008363157894736843, "loss": 1.6092, "step": 311 }, { "epoch": 0.8178829717291256, "high_lr": 0.0008363157894736843, "low_lr": 1.6726315789473684e-05, "step": 311 }, { "epoch": 0.8178829717291256, "high_lr": 0.0008363157894736843, "low_lr": 1.6726315789473684e-05, "step": 311 }, { "epoch": 0.8178829717291256, "high_lr": 0.0008363157894736843, "low_lr": 1.6726315789473684e-05, "step": 311 }, { "epoch": 0.8178829717291256, "high_lr": 0.0008363157894736843, "low_lr": 1.6726315789473684e-05, "step": 311 }, { "epoch": 0.8178829717291256, "high_lr": 0.0008363157894736843, "low_lr": 1.6726315789473684e-05, "step": 311 }, { "epoch": 0.8178829717291256, "high_lr": 0.0008363157894736843, "low_lr": 1.6726315789473684e-05, "step": 311 }, { "epoch": 0.8178829717291256, "high_lr": 0.0008363157894736843, "low_lr": 1.6726315789473684e-05, "step": 311 }, { "epoch": 0.8178829717291256, "high_lr": 0.0008363157894736843, "low_lr": 1.6726315789473684e-05, "step": 311 }, { "epoch": 0.8205128205128205, "grad_norm": 0.9650228023529053, "learning_rate": 0.0008357894736842105, "loss": 1.6253, "step": 312 }, { "epoch": 0.8205128205128205, "high_lr": 0.0008357894736842105, "low_lr": 1.6715789473684212e-05, "step": 312 }, { "epoch": 0.8205128205128205, "high_lr": 0.0008357894736842105, "low_lr": 1.6715789473684212e-05, "step": 312 }, { "epoch": 0.8205128205128205, "high_lr": 0.0008357894736842105, "low_lr": 1.6715789473684212e-05, "step": 312 }, { "epoch": 0.8205128205128205, "high_lr": 0.0008357894736842105, "low_lr": 1.6715789473684212e-05, "step": 312 }, { "epoch": 0.8205128205128205, "high_lr": 0.0008357894736842105, "low_lr": 1.6715789473684212e-05, "step": 312 }, { "epoch": 0.8205128205128205, "high_lr": 0.0008357894736842105, "low_lr": 1.6715789473684212e-05, "step": 312 }, { "epoch": 0.8205128205128205, "high_lr": 0.0008357894736842105, "low_lr": 1.6715789473684212e-05, "step": 312 }, { "epoch": 0.8205128205128205, "high_lr": 0.0008357894736842105, "low_lr": 1.6715789473684212e-05, "step": 312 }, { "epoch": 0.8231426692965155, "grad_norm": 0.874620795249939, "learning_rate": 0.0008352631578947368, "loss": 1.6371, "step": 313 }, { "epoch": 0.8231426692965155, "high_lr": 0.0008352631578947368, "low_lr": 1.6705263157894737e-05, "step": 313 }, { "epoch": 0.8231426692965155, "high_lr": 0.0008352631578947368, "low_lr": 1.6705263157894737e-05, "step": 313 }, { "epoch": 0.8231426692965155, "high_lr": 0.0008352631578947368, "low_lr": 1.6705263157894737e-05, "step": 313 }, { "epoch": 0.8231426692965155, "high_lr": 0.0008352631578947368, "low_lr": 1.6705263157894737e-05, "step": 313 }, { "epoch": 0.8231426692965155, "high_lr": 0.0008352631578947368, "low_lr": 1.6705263157894737e-05, "step": 313 }, { "epoch": 0.8231426692965155, "high_lr": 0.0008352631578947368, "low_lr": 1.6705263157894737e-05, "step": 313 }, { "epoch": 0.8231426692965155, "high_lr": 0.0008352631578947368, "low_lr": 1.6705263157894737e-05, "step": 313 }, { "epoch": 0.8231426692965155, "high_lr": 0.0008352631578947368, "low_lr": 1.6705263157894737e-05, "step": 313 }, { "epoch": 0.8257725180802103, "grad_norm": 0.9986231923103333, "learning_rate": 0.0008347368421052631, "loss": 1.6738, "step": 314 }, { "epoch": 0.8257725180802103, "high_lr": 0.0008347368421052631, "low_lr": 1.6694736842105265e-05, "step": 314 }, { "epoch": 0.8257725180802103, "high_lr": 0.0008347368421052631, "low_lr": 1.6694736842105265e-05, "step": 314 }, { "epoch": 0.8257725180802103, "high_lr": 0.0008347368421052631, "low_lr": 1.6694736842105265e-05, "step": 314 }, { "epoch": 0.8257725180802103, "high_lr": 0.0008347368421052631, "low_lr": 1.6694736842105265e-05, "step": 314 }, { "epoch": 0.8257725180802103, "high_lr": 0.0008347368421052631, "low_lr": 1.6694736842105265e-05, "step": 314 }, { "epoch": 0.8257725180802103, "high_lr": 0.0008347368421052631, "low_lr": 1.6694736842105265e-05, "step": 314 }, { "epoch": 0.8257725180802103, "high_lr": 0.0008347368421052631, "low_lr": 1.6694736842105265e-05, "step": 314 }, { "epoch": 0.8257725180802103, "high_lr": 0.0008347368421052631, "low_lr": 1.6694736842105265e-05, "step": 314 }, { "epoch": 0.8284023668639053, "grad_norm": 0.9026766419410706, "learning_rate": 0.0008342105263157895, "loss": 1.601, "step": 315 }, { "epoch": 0.8284023668639053, "high_lr": 0.0008342105263157895, "low_lr": 1.6684210526315793e-05, "step": 315 }, { "epoch": 0.8284023668639053, "high_lr": 0.0008342105263157895, "low_lr": 1.6684210526315793e-05, "step": 315 }, { "epoch": 0.8284023668639053, "high_lr": 0.0008342105263157895, "low_lr": 1.6684210526315793e-05, "step": 315 }, { "epoch": 0.8284023668639053, "high_lr": 0.0008342105263157895, "low_lr": 1.6684210526315793e-05, "step": 315 }, { "epoch": 0.8284023668639053, "high_lr": 0.0008342105263157895, "low_lr": 1.6684210526315793e-05, "step": 315 }, { "epoch": 0.8284023668639053, "high_lr": 0.0008342105263157895, "low_lr": 1.6684210526315793e-05, "step": 315 }, { "epoch": 0.8284023668639053, "high_lr": 0.0008342105263157895, "low_lr": 1.6684210526315793e-05, "step": 315 }, { "epoch": 0.8284023668639053, "high_lr": 0.0008342105263157895, "low_lr": 1.6684210526315793e-05, "step": 315 }, { "epoch": 0.8310322156476002, "grad_norm": 0.8999379277229309, "learning_rate": 0.0008336842105263158, "loss": 1.548, "step": 316 }, { "epoch": 0.8310322156476002, "high_lr": 0.0008336842105263158, "low_lr": 1.6673684210526318e-05, "step": 316 }, { "epoch": 0.8310322156476002, "high_lr": 0.0008336842105263158, "low_lr": 1.6673684210526318e-05, "step": 316 }, { "epoch": 0.8310322156476002, "high_lr": 0.0008336842105263158, "low_lr": 1.6673684210526318e-05, "step": 316 }, { "epoch": 0.8310322156476002, "high_lr": 0.0008336842105263158, "low_lr": 1.6673684210526318e-05, "step": 316 }, { "epoch": 0.8310322156476002, "high_lr": 0.0008336842105263158, "low_lr": 1.6673684210526318e-05, "step": 316 }, { "epoch": 0.8310322156476002, "high_lr": 0.0008336842105263158, "low_lr": 1.6673684210526318e-05, "step": 316 }, { "epoch": 0.8310322156476002, "high_lr": 0.0008336842105263158, "low_lr": 1.6673684210526318e-05, "step": 316 }, { "epoch": 0.8310322156476002, "high_lr": 0.0008336842105263158, "low_lr": 1.6673684210526318e-05, "step": 316 }, { "epoch": 0.8336620644312952, "grad_norm": 0.8758360147476196, "learning_rate": 0.0008331578947368421, "loss": 1.6161, "step": 317 }, { "epoch": 0.8336620644312952, "high_lr": 0.0008331578947368421, "low_lr": 1.6663157894736842e-05, "step": 317 }, { "epoch": 0.8336620644312952, "high_lr": 0.0008331578947368421, "low_lr": 1.6663157894736842e-05, "step": 317 }, { "epoch": 0.8336620644312952, "high_lr": 0.0008331578947368421, "low_lr": 1.6663157894736842e-05, "step": 317 }, { "epoch": 0.8336620644312952, "high_lr": 0.0008331578947368421, "low_lr": 1.6663157894736842e-05, "step": 317 }, { "epoch": 0.8336620644312952, "high_lr": 0.0008331578947368421, "low_lr": 1.6663157894736842e-05, "step": 317 }, { "epoch": 0.8336620644312952, "high_lr": 0.0008331578947368421, "low_lr": 1.6663157894736842e-05, "step": 317 }, { "epoch": 0.8336620644312952, "high_lr": 0.0008331578947368421, "low_lr": 1.6663157894736842e-05, "step": 317 }, { "epoch": 0.8336620644312952, "high_lr": 0.0008331578947368421, "low_lr": 1.6663157894736842e-05, "step": 317 }, { "epoch": 0.8362919132149902, "grad_norm": 0.9446306824684143, "learning_rate": 0.0008326315789473684, "loss": 1.631, "step": 318 }, { "epoch": 0.8362919132149902, "high_lr": 0.0008326315789473684, "low_lr": 1.665263157894737e-05, "step": 318 }, { "epoch": 0.8362919132149902, "high_lr": 0.0008326315789473684, "low_lr": 1.665263157894737e-05, "step": 318 }, { "epoch": 0.8362919132149902, "high_lr": 0.0008326315789473684, "low_lr": 1.665263157894737e-05, "step": 318 }, { "epoch": 0.8362919132149902, "high_lr": 0.0008326315789473684, "low_lr": 1.665263157894737e-05, "step": 318 }, { "epoch": 0.8362919132149902, "high_lr": 0.0008326315789473684, "low_lr": 1.665263157894737e-05, "step": 318 }, { "epoch": 0.8362919132149902, "high_lr": 0.0008326315789473684, "low_lr": 1.665263157894737e-05, "step": 318 }, { "epoch": 0.8362919132149902, "high_lr": 0.0008326315789473684, "low_lr": 1.665263157894737e-05, "step": 318 }, { "epoch": 0.8362919132149902, "high_lr": 0.0008326315789473684, "low_lr": 1.665263157894737e-05, "step": 318 }, { "epoch": 0.8389217619986851, "grad_norm": 0.9866498708724976, "learning_rate": 0.0008321052631578948, "loss": 1.615, "step": 319 }, { "epoch": 0.8389217619986851, "high_lr": 0.0008321052631578948, "low_lr": 1.66421052631579e-05, "step": 319 }, { "epoch": 0.8389217619986851, "high_lr": 0.0008321052631578948, "low_lr": 1.66421052631579e-05, "step": 319 }, { "epoch": 0.8389217619986851, "high_lr": 0.0008321052631578948, "low_lr": 1.66421052631579e-05, "step": 319 }, { "epoch": 0.8389217619986851, "high_lr": 0.0008321052631578948, "low_lr": 1.66421052631579e-05, "step": 319 }, { "epoch": 0.8389217619986851, "high_lr": 0.0008321052631578948, "low_lr": 1.66421052631579e-05, "step": 319 }, { "epoch": 0.8389217619986851, "high_lr": 0.0008321052631578948, "low_lr": 1.66421052631579e-05, "step": 319 }, { "epoch": 0.8389217619986851, "high_lr": 0.0008321052631578948, "low_lr": 1.66421052631579e-05, "step": 319 }, { "epoch": 0.8389217619986851, "high_lr": 0.0008321052631578948, "low_lr": 1.66421052631579e-05, "step": 319 }, { "epoch": 0.84155161078238, "grad_norm": 0.9884803891181946, "learning_rate": 0.0008315789473684212, "loss": 1.6674, "step": 320 }, { "epoch": 0.84155161078238, "high_lr": 0.0008315789473684212, "low_lr": 1.6631578947368423e-05, "step": 320 }, { "epoch": 0.84155161078238, "high_lr": 0.0008315789473684212, "low_lr": 1.6631578947368423e-05, "step": 320 }, { "epoch": 0.84155161078238, "high_lr": 0.0008315789473684212, "low_lr": 1.6631578947368423e-05, "step": 320 }, { "epoch": 0.84155161078238, "high_lr": 0.0008315789473684212, "low_lr": 1.6631578947368423e-05, "step": 320 }, { "epoch": 0.84155161078238, "high_lr": 0.0008315789473684212, "low_lr": 1.6631578947368423e-05, "step": 320 }, { "epoch": 0.84155161078238, "high_lr": 0.0008315789473684212, "low_lr": 1.6631578947368423e-05, "step": 320 }, { "epoch": 0.84155161078238, "high_lr": 0.0008315789473684212, "low_lr": 1.6631578947368423e-05, "step": 320 }, { "epoch": 0.84155161078238, "high_lr": 0.0008315789473684212, "low_lr": 1.6631578947368423e-05, "step": 320 }, { "epoch": 0.8441814595660749, "grad_norm": 0.8910170793533325, "learning_rate": 0.0008310526315789474, "loss": 1.5924, "step": 321 }, { "epoch": 0.8441814595660749, "high_lr": 0.0008310526315789474, "low_lr": 1.6621052631578948e-05, "step": 321 }, { "epoch": 0.8441814595660749, "high_lr": 0.0008310526315789474, "low_lr": 1.6621052631578948e-05, "step": 321 }, { "epoch": 0.8441814595660749, "high_lr": 0.0008310526315789474, "low_lr": 1.6621052631578948e-05, "step": 321 }, { "epoch": 0.8441814595660749, "high_lr": 0.0008310526315789474, "low_lr": 1.6621052631578948e-05, "step": 321 }, { "epoch": 0.8441814595660749, "high_lr": 0.0008310526315789474, "low_lr": 1.6621052631578948e-05, "step": 321 }, { "epoch": 0.8441814595660749, "high_lr": 0.0008310526315789474, "low_lr": 1.6621052631578948e-05, "step": 321 }, { "epoch": 0.8441814595660749, "high_lr": 0.0008310526315789474, "low_lr": 1.6621052631578948e-05, "step": 321 }, { "epoch": 0.8441814595660749, "high_lr": 0.0008310526315789474, "low_lr": 1.6621052631578948e-05, "step": 321 }, { "epoch": 0.8468113083497699, "grad_norm": 0.9594380855560303, "learning_rate": 0.0008305263157894737, "loss": 1.6081, "step": 322 }, { "epoch": 0.8468113083497699, "high_lr": 0.0008305263157894737, "low_lr": 1.6610526315789476e-05, "step": 322 }, { "epoch": 0.8468113083497699, "high_lr": 0.0008305263157894737, "low_lr": 1.6610526315789476e-05, "step": 322 }, { "epoch": 0.8468113083497699, "high_lr": 0.0008305263157894737, "low_lr": 1.6610526315789476e-05, "step": 322 }, { "epoch": 0.8468113083497699, "high_lr": 0.0008305263157894737, "low_lr": 1.6610526315789476e-05, "step": 322 }, { "epoch": 0.8468113083497699, "high_lr": 0.0008305263157894737, "low_lr": 1.6610526315789476e-05, "step": 322 }, { "epoch": 0.8468113083497699, "high_lr": 0.0008305263157894737, "low_lr": 1.6610526315789476e-05, "step": 322 }, { "epoch": 0.8468113083497699, "high_lr": 0.0008305263157894737, "low_lr": 1.6610526315789476e-05, "step": 322 }, { "epoch": 0.8468113083497699, "high_lr": 0.0008305263157894737, "low_lr": 1.6610526315789476e-05, "step": 322 }, { "epoch": 0.8494411571334648, "grad_norm": 0.9601569175720215, "learning_rate": 0.00083, "loss": 1.6509, "step": 323 }, { "epoch": 0.8494411571334648, "high_lr": 0.00083, "low_lr": 1.66e-05, "step": 323 }, { "epoch": 0.8494411571334648, "high_lr": 0.00083, "low_lr": 1.66e-05, "step": 323 }, { "epoch": 0.8494411571334648, "high_lr": 0.00083, "low_lr": 1.66e-05, "step": 323 }, { "epoch": 0.8494411571334648, "high_lr": 0.00083, "low_lr": 1.66e-05, "step": 323 }, { "epoch": 0.8494411571334648, "high_lr": 0.00083, "low_lr": 1.66e-05, "step": 323 }, { "epoch": 0.8494411571334648, "high_lr": 0.00083, "low_lr": 1.66e-05, "step": 323 }, { "epoch": 0.8494411571334648, "high_lr": 0.00083, "low_lr": 1.66e-05, "step": 323 }, { "epoch": 0.8494411571334648, "high_lr": 0.00083, "low_lr": 1.66e-05, "step": 323 }, { "epoch": 0.8520710059171598, "grad_norm": 0.9653990864753723, "learning_rate": 0.0008294736842105264, "loss": 1.6323, "step": 324 }, { "epoch": 0.8520710059171598, "high_lr": 0.0008294736842105264, "low_lr": 1.658947368421053e-05, "step": 324 }, { "epoch": 0.8520710059171598, "high_lr": 0.0008294736842105264, "low_lr": 1.658947368421053e-05, "step": 324 }, { "epoch": 0.8520710059171598, "high_lr": 0.0008294736842105264, "low_lr": 1.658947368421053e-05, "step": 324 }, { "epoch": 0.8520710059171598, "high_lr": 0.0008294736842105264, "low_lr": 1.658947368421053e-05, "step": 324 }, { "epoch": 0.8520710059171598, "high_lr": 0.0008294736842105264, "low_lr": 1.658947368421053e-05, "step": 324 }, { "epoch": 0.8520710059171598, "high_lr": 0.0008294736842105264, "low_lr": 1.658947368421053e-05, "step": 324 }, { "epoch": 0.8520710059171598, "high_lr": 0.0008294736842105264, "low_lr": 1.658947368421053e-05, "step": 324 }, { "epoch": 0.8520710059171598, "high_lr": 0.0008294736842105264, "low_lr": 1.658947368421053e-05, "step": 324 }, { "epoch": 0.8547008547008547, "grad_norm": 0.8928948640823364, "learning_rate": 0.0008289473684210527, "loss": 1.6013, "step": 325 }, { "epoch": 0.8547008547008547, "high_lr": 0.0008289473684210527, "low_lr": 1.6578947368421053e-05, "step": 325 }, { "epoch": 0.8547008547008547, "high_lr": 0.0008289473684210527, "low_lr": 1.6578947368421053e-05, "step": 325 }, { "epoch": 0.8547008547008547, "high_lr": 0.0008289473684210527, "low_lr": 1.6578947368421053e-05, "step": 325 }, { "epoch": 0.8547008547008547, "high_lr": 0.0008289473684210527, "low_lr": 1.6578947368421053e-05, "step": 325 }, { "epoch": 0.8547008547008547, "high_lr": 0.0008289473684210527, "low_lr": 1.6578947368421053e-05, "step": 325 }, { "epoch": 0.8547008547008547, "high_lr": 0.0008289473684210527, "low_lr": 1.6578947368421053e-05, "step": 325 }, { "epoch": 0.8547008547008547, "high_lr": 0.0008289473684210527, "low_lr": 1.6578947368421053e-05, "step": 325 }, { "epoch": 0.8547008547008547, "high_lr": 0.0008289473684210527, "low_lr": 1.6578947368421053e-05, "step": 325 }, { "epoch": 0.8573307034845496, "grad_norm": 0.9746809601783752, "learning_rate": 0.000828421052631579, "loss": 1.6171, "step": 326 }, { "epoch": 0.8573307034845496, "high_lr": 0.000828421052631579, "low_lr": 1.656842105263158e-05, "step": 326 }, { "epoch": 0.8573307034845496, "high_lr": 0.000828421052631579, "low_lr": 1.656842105263158e-05, "step": 326 }, { "epoch": 0.8573307034845496, "high_lr": 0.000828421052631579, "low_lr": 1.656842105263158e-05, "step": 326 }, { "epoch": 0.8573307034845496, "high_lr": 0.000828421052631579, "low_lr": 1.656842105263158e-05, "step": 326 }, { "epoch": 0.8573307034845496, "high_lr": 0.000828421052631579, "low_lr": 1.656842105263158e-05, "step": 326 }, { "epoch": 0.8573307034845496, "high_lr": 0.000828421052631579, "low_lr": 1.656842105263158e-05, "step": 326 }, { "epoch": 0.8573307034845496, "high_lr": 0.000828421052631579, "low_lr": 1.656842105263158e-05, "step": 326 }, { "epoch": 0.8573307034845496, "high_lr": 0.000828421052631579, "low_lr": 1.656842105263158e-05, "step": 326 }, { "epoch": 0.8599605522682445, "grad_norm": 0.9501665234565735, "learning_rate": 0.0008278947368421053, "loss": 1.6396, "step": 327 }, { "epoch": 0.8599605522682445, "high_lr": 0.0008278947368421053, "low_lr": 1.6557894736842106e-05, "step": 327 }, { "epoch": 0.8599605522682445, "high_lr": 0.0008278947368421053, "low_lr": 1.6557894736842106e-05, "step": 327 }, { "epoch": 0.8599605522682445, "high_lr": 0.0008278947368421053, "low_lr": 1.6557894736842106e-05, "step": 327 }, { "epoch": 0.8599605522682445, "high_lr": 0.0008278947368421053, "low_lr": 1.6557894736842106e-05, "step": 327 }, { "epoch": 0.8599605522682445, "high_lr": 0.0008278947368421053, "low_lr": 1.6557894736842106e-05, "step": 327 }, { "epoch": 0.8599605522682445, "high_lr": 0.0008278947368421053, "low_lr": 1.6557894736842106e-05, "step": 327 }, { "epoch": 0.8599605522682445, "high_lr": 0.0008278947368421053, "low_lr": 1.6557894736842106e-05, "step": 327 }, { "epoch": 0.8599605522682445, "high_lr": 0.0008278947368421053, "low_lr": 1.6557894736842106e-05, "step": 327 }, { "epoch": 0.8625904010519395, "grad_norm": 2.2945706844329834, "learning_rate": 0.0008273684210526315, "loss": 1.6043, "step": 328 }, { "epoch": 0.8625904010519395, "high_lr": 0.0008273684210526315, "low_lr": 1.6547368421052634e-05, "step": 328 }, { "epoch": 0.8625904010519395, "high_lr": 0.0008273684210526315, "low_lr": 1.6547368421052634e-05, "step": 328 }, { "epoch": 0.8625904010519395, "high_lr": 0.0008273684210526315, "low_lr": 1.6547368421052634e-05, "step": 328 }, { "epoch": 0.8625904010519395, "high_lr": 0.0008273684210526315, "low_lr": 1.6547368421052634e-05, "step": 328 }, { "epoch": 0.8625904010519395, "high_lr": 0.0008273684210526315, "low_lr": 1.6547368421052634e-05, "step": 328 }, { "epoch": 0.8625904010519395, "high_lr": 0.0008273684210526315, "low_lr": 1.6547368421052634e-05, "step": 328 }, { "epoch": 0.8625904010519395, "high_lr": 0.0008273684210526315, "low_lr": 1.6547368421052634e-05, "step": 328 }, { "epoch": 0.8625904010519395, "high_lr": 0.0008273684210526315, "low_lr": 1.6547368421052634e-05, "step": 328 }, { "epoch": 0.8652202498356345, "grad_norm": 0.9581923484802246, "learning_rate": 0.0008268421052631579, "loss": 1.615, "step": 329 }, { "epoch": 0.8652202498356345, "high_lr": 0.0008268421052631579, "low_lr": 1.653684210526316e-05, "step": 329 }, { "epoch": 0.8652202498356345, "high_lr": 0.0008268421052631579, "low_lr": 1.653684210526316e-05, "step": 329 }, { "epoch": 0.8652202498356345, "high_lr": 0.0008268421052631579, "low_lr": 1.653684210526316e-05, "step": 329 }, { "epoch": 0.8652202498356345, "high_lr": 0.0008268421052631579, "low_lr": 1.653684210526316e-05, "step": 329 }, { "epoch": 0.8652202498356345, "high_lr": 0.0008268421052631579, "low_lr": 1.653684210526316e-05, "step": 329 }, { "epoch": 0.8652202498356345, "high_lr": 0.0008268421052631579, "low_lr": 1.653684210526316e-05, "step": 329 }, { "epoch": 0.8652202498356345, "high_lr": 0.0008268421052631579, "low_lr": 1.653684210526316e-05, "step": 329 }, { "epoch": 0.8652202498356345, "high_lr": 0.0008268421052631579, "low_lr": 1.653684210526316e-05, "step": 329 }, { "epoch": 0.8678500986193294, "grad_norm": 0.8627300262451172, "learning_rate": 0.0008263157894736842, "loss": 1.602, "step": 330 }, { "epoch": 0.8678500986193294, "high_lr": 0.0008263157894736842, "low_lr": 1.6526315789473686e-05, "step": 330 }, { "epoch": 0.8678500986193294, "high_lr": 0.0008263157894736842, "low_lr": 1.6526315789473686e-05, "step": 330 }, { "epoch": 0.8678500986193294, "high_lr": 0.0008263157894736842, "low_lr": 1.6526315789473686e-05, "step": 330 }, { "epoch": 0.8678500986193294, "high_lr": 0.0008263157894736842, "low_lr": 1.6526315789473686e-05, "step": 330 }, { "epoch": 0.8678500986193294, "high_lr": 0.0008263157894736842, "low_lr": 1.6526315789473686e-05, "step": 330 }, { "epoch": 0.8678500986193294, "high_lr": 0.0008263157894736842, "low_lr": 1.6526315789473686e-05, "step": 330 }, { "epoch": 0.8678500986193294, "high_lr": 0.0008263157894736842, "low_lr": 1.6526315789473686e-05, "step": 330 }, { "epoch": 0.8678500986193294, "high_lr": 0.0008263157894736842, "low_lr": 1.6526315789473686e-05, "step": 330 }, { "epoch": 0.8704799474030244, "grad_norm": 0.9691030383110046, "learning_rate": 0.0008257894736842105, "loss": 1.6121, "step": 331 }, { "epoch": 0.8704799474030244, "high_lr": 0.0008257894736842105, "low_lr": 1.651578947368421e-05, "step": 331 }, { "epoch": 0.8704799474030244, "high_lr": 0.0008257894736842105, "low_lr": 1.651578947368421e-05, "step": 331 }, { "epoch": 0.8704799474030244, "high_lr": 0.0008257894736842105, "low_lr": 1.651578947368421e-05, "step": 331 }, { "epoch": 0.8704799474030244, "high_lr": 0.0008257894736842105, "low_lr": 1.651578947368421e-05, "step": 331 }, { "epoch": 0.8704799474030244, "high_lr": 0.0008257894736842105, "low_lr": 1.651578947368421e-05, "step": 331 }, { "epoch": 0.8704799474030244, "high_lr": 0.0008257894736842105, "low_lr": 1.651578947368421e-05, "step": 331 }, { "epoch": 0.8704799474030244, "high_lr": 0.0008257894736842105, "low_lr": 1.651578947368421e-05, "step": 331 }, { "epoch": 0.8704799474030244, "high_lr": 0.0008257894736842105, "low_lr": 1.651578947368421e-05, "step": 331 }, { "epoch": 0.8731097961867192, "grad_norm": 0.9407858848571777, "learning_rate": 0.0008252631578947368, "loss": 1.5723, "step": 332 }, { "epoch": 0.8731097961867192, "high_lr": 0.0008252631578947368, "low_lr": 1.650526315789474e-05, "step": 332 }, { "epoch": 0.8731097961867192, "high_lr": 0.0008252631578947368, "low_lr": 1.650526315789474e-05, "step": 332 }, { "epoch": 0.8731097961867192, "high_lr": 0.0008252631578947368, "low_lr": 1.650526315789474e-05, "step": 332 }, { "epoch": 0.8731097961867192, "high_lr": 0.0008252631578947368, "low_lr": 1.650526315789474e-05, "step": 332 }, { "epoch": 0.8731097961867192, "high_lr": 0.0008252631578947368, "low_lr": 1.650526315789474e-05, "step": 332 }, { "epoch": 0.8731097961867192, "high_lr": 0.0008252631578947368, "low_lr": 1.650526315789474e-05, "step": 332 }, { "epoch": 0.8731097961867192, "high_lr": 0.0008252631578947368, "low_lr": 1.650526315789474e-05, "step": 332 }, { "epoch": 0.8731097961867192, "high_lr": 0.0008252631578947368, "low_lr": 1.650526315789474e-05, "step": 332 }, { "epoch": 0.8757396449704142, "grad_norm": 0.8633793592453003, "learning_rate": 0.0008247368421052632, "loss": 1.5878, "step": 333 }, { "epoch": 0.8757396449704142, "high_lr": 0.0008247368421052632, "low_lr": 1.6494736842105267e-05, "step": 333 }, { "epoch": 0.8757396449704142, "high_lr": 0.0008247368421052632, "low_lr": 1.6494736842105267e-05, "step": 333 }, { "epoch": 0.8757396449704142, "high_lr": 0.0008247368421052632, "low_lr": 1.6494736842105267e-05, "step": 333 }, { "epoch": 0.8757396449704142, "high_lr": 0.0008247368421052632, "low_lr": 1.6494736842105267e-05, "step": 333 }, { "epoch": 0.8757396449704142, "high_lr": 0.0008247368421052632, "low_lr": 1.6494736842105267e-05, "step": 333 }, { "epoch": 0.8757396449704142, "high_lr": 0.0008247368421052632, "low_lr": 1.6494736842105267e-05, "step": 333 }, { "epoch": 0.8757396449704142, "high_lr": 0.0008247368421052632, "low_lr": 1.6494736842105267e-05, "step": 333 }, { "epoch": 0.8757396449704142, "high_lr": 0.0008247368421052632, "low_lr": 1.6494736842105267e-05, "step": 333 }, { "epoch": 0.8783694937541091, "grad_norm": 0.9088572859764099, "learning_rate": 0.0008242105263157895, "loss": 1.6378, "step": 334 }, { "epoch": 0.8783694937541091, "high_lr": 0.0008242105263157895, "low_lr": 1.648421052631579e-05, "step": 334 }, { "epoch": 0.8783694937541091, "high_lr": 0.0008242105263157895, "low_lr": 1.648421052631579e-05, "step": 334 }, { "epoch": 0.8783694937541091, "high_lr": 0.0008242105263157895, "low_lr": 1.648421052631579e-05, "step": 334 }, { "epoch": 0.8783694937541091, "high_lr": 0.0008242105263157895, "low_lr": 1.648421052631579e-05, "step": 334 }, { "epoch": 0.8783694937541091, "high_lr": 0.0008242105263157895, "low_lr": 1.648421052631579e-05, "step": 334 }, { "epoch": 0.8783694937541091, "high_lr": 0.0008242105263157895, "low_lr": 1.648421052631579e-05, "step": 334 }, { "epoch": 0.8783694937541091, "high_lr": 0.0008242105263157895, "low_lr": 1.648421052631579e-05, "step": 334 }, { "epoch": 0.8783694937541091, "high_lr": 0.0008242105263157895, "low_lr": 1.648421052631579e-05, "step": 334 }, { "epoch": 0.8809993425378041, "grad_norm": 0.8914270997047424, "learning_rate": 0.0008236842105263158, "loss": 1.596, "step": 335 }, { "epoch": 0.8809993425378041, "high_lr": 0.0008236842105263158, "low_lr": 1.6473684210526316e-05, "step": 335 }, { "epoch": 0.8809993425378041, "high_lr": 0.0008236842105263158, "low_lr": 1.6473684210526316e-05, "step": 335 }, { "epoch": 0.8809993425378041, "high_lr": 0.0008236842105263158, "low_lr": 1.6473684210526316e-05, "step": 335 }, { "epoch": 0.8809993425378041, "high_lr": 0.0008236842105263158, "low_lr": 1.6473684210526316e-05, "step": 335 }, { "epoch": 0.8809993425378041, "high_lr": 0.0008236842105263158, "low_lr": 1.6473684210526316e-05, "step": 335 }, { "epoch": 0.8809993425378041, "high_lr": 0.0008236842105263158, "low_lr": 1.6473684210526316e-05, "step": 335 }, { "epoch": 0.8809993425378041, "high_lr": 0.0008236842105263158, "low_lr": 1.6473684210526316e-05, "step": 335 }, { "epoch": 0.8809993425378041, "high_lr": 0.0008236842105263158, "low_lr": 1.6473684210526316e-05, "step": 335 }, { "epoch": 0.883629191321499, "grad_norm": 0.9653371572494507, "learning_rate": 0.0008231578947368422, "loss": 1.6209, "step": 336 }, { "epoch": 0.883629191321499, "high_lr": 0.0008231578947368422, "low_lr": 1.6463157894736844e-05, "step": 336 }, { "epoch": 0.883629191321499, "high_lr": 0.0008231578947368422, "low_lr": 1.6463157894736844e-05, "step": 336 }, { "epoch": 0.883629191321499, "high_lr": 0.0008231578947368422, "low_lr": 1.6463157894736844e-05, "step": 336 }, { "epoch": 0.883629191321499, "high_lr": 0.0008231578947368422, "low_lr": 1.6463157894736844e-05, "step": 336 }, { "epoch": 0.883629191321499, "high_lr": 0.0008231578947368422, "low_lr": 1.6463157894736844e-05, "step": 336 }, { "epoch": 0.883629191321499, "high_lr": 0.0008231578947368422, "low_lr": 1.6463157894736844e-05, "step": 336 }, { "epoch": 0.883629191321499, "high_lr": 0.0008231578947368422, "low_lr": 1.6463157894736844e-05, "step": 336 }, { "epoch": 0.883629191321499, "high_lr": 0.0008231578947368422, "low_lr": 1.6463157894736844e-05, "step": 336 }, { "epoch": 0.886259040105194, "grad_norm": 0.8828110098838806, "learning_rate": 0.0008226315789473684, "loss": 1.5831, "step": 337 }, { "epoch": 0.886259040105194, "high_lr": 0.0008226315789473684, "low_lr": 1.645263157894737e-05, "step": 337 }, { "epoch": 0.886259040105194, "high_lr": 0.0008226315789473684, "low_lr": 1.645263157894737e-05, "step": 337 }, { "epoch": 0.886259040105194, "high_lr": 0.0008226315789473684, "low_lr": 1.645263157894737e-05, "step": 337 }, { "epoch": 0.886259040105194, "high_lr": 0.0008226315789473684, "low_lr": 1.645263157894737e-05, "step": 337 }, { "epoch": 0.886259040105194, "high_lr": 0.0008226315789473684, "low_lr": 1.645263157894737e-05, "step": 337 }, { "epoch": 0.886259040105194, "high_lr": 0.0008226315789473684, "low_lr": 1.645263157894737e-05, "step": 337 }, { "epoch": 0.886259040105194, "high_lr": 0.0008226315789473684, "low_lr": 1.645263157894737e-05, "step": 337 }, { "epoch": 0.886259040105194, "high_lr": 0.0008226315789473684, "low_lr": 1.645263157894737e-05, "step": 337 }, { "epoch": 0.8888888888888888, "grad_norm": 0.9500367045402527, "learning_rate": 0.0008221052631578948, "loss": 1.5705, "step": 338 }, { "epoch": 0.8888888888888888, "high_lr": 0.0008221052631578948, "low_lr": 1.6442105263157897e-05, "step": 338 }, { "epoch": 0.8888888888888888, "high_lr": 0.0008221052631578948, "low_lr": 1.6442105263157897e-05, "step": 338 }, { "epoch": 0.8888888888888888, "high_lr": 0.0008221052631578948, "low_lr": 1.6442105263157897e-05, "step": 338 }, { "epoch": 0.8888888888888888, "high_lr": 0.0008221052631578948, "low_lr": 1.6442105263157897e-05, "step": 338 }, { "epoch": 0.8888888888888888, "high_lr": 0.0008221052631578948, "low_lr": 1.6442105263157897e-05, "step": 338 }, { "epoch": 0.8888888888888888, "high_lr": 0.0008221052631578948, "low_lr": 1.6442105263157897e-05, "step": 338 }, { "epoch": 0.8888888888888888, "high_lr": 0.0008221052631578948, "low_lr": 1.6442105263157897e-05, "step": 338 }, { "epoch": 0.8888888888888888, "high_lr": 0.0008221052631578948, "low_lr": 1.6442105263157897e-05, "step": 338 }, { "epoch": 0.8915187376725838, "grad_norm": 0.9348553419113159, "learning_rate": 0.0008215789473684211, "loss": 1.5876, "step": 339 }, { "epoch": 0.8915187376725838, "high_lr": 0.0008215789473684211, "low_lr": 1.643157894736842e-05, "step": 339 }, { "epoch": 0.8915187376725838, "high_lr": 0.0008215789473684211, "low_lr": 1.643157894736842e-05, "step": 339 }, { "epoch": 0.8915187376725838, "high_lr": 0.0008215789473684211, "low_lr": 1.643157894736842e-05, "step": 339 }, { "epoch": 0.8915187376725838, "high_lr": 0.0008215789473684211, "low_lr": 1.643157894736842e-05, "step": 339 }, { "epoch": 0.8915187376725838, "high_lr": 0.0008215789473684211, "low_lr": 1.643157894736842e-05, "step": 339 }, { "epoch": 0.8915187376725838, "high_lr": 0.0008215789473684211, "low_lr": 1.643157894736842e-05, "step": 339 }, { "epoch": 0.8915187376725838, "high_lr": 0.0008215789473684211, "low_lr": 1.643157894736842e-05, "step": 339 }, { "epoch": 0.8915187376725838, "high_lr": 0.0008215789473684211, "low_lr": 1.643157894736842e-05, "step": 339 }, { "epoch": 0.8941485864562788, "grad_norm": 0.9669507741928101, "learning_rate": 0.0008210526315789474, "loss": 1.5939, "step": 340 }, { "epoch": 0.8941485864562788, "high_lr": 0.0008210526315789474, "low_lr": 1.642105263157895e-05, "step": 340 }, { "epoch": 0.8941485864562788, "high_lr": 0.0008210526315789474, "low_lr": 1.642105263157895e-05, "step": 340 }, { "epoch": 0.8941485864562788, "high_lr": 0.0008210526315789474, "low_lr": 1.642105263157895e-05, "step": 340 }, { "epoch": 0.8941485864562788, "high_lr": 0.0008210526315789474, "low_lr": 1.642105263157895e-05, "step": 340 }, { "epoch": 0.8941485864562788, "high_lr": 0.0008210526315789474, "low_lr": 1.642105263157895e-05, "step": 340 }, { "epoch": 0.8941485864562788, "high_lr": 0.0008210526315789474, "low_lr": 1.642105263157895e-05, "step": 340 }, { "epoch": 0.8941485864562788, "high_lr": 0.0008210526315789474, "low_lr": 1.642105263157895e-05, "step": 340 }, { "epoch": 0.8941485864562788, "high_lr": 0.0008210526315789474, "low_lr": 1.642105263157895e-05, "step": 340 }, { "epoch": 0.8967784352399737, "grad_norm": 0.9118064641952515, "learning_rate": 0.0008205263157894737, "loss": 1.5395, "step": 341 }, { "epoch": 0.8967784352399737, "high_lr": 0.0008205263157894737, "low_lr": 1.6410526315789474e-05, "step": 341 }, { "epoch": 0.8967784352399737, "high_lr": 0.0008205263157894737, "low_lr": 1.6410526315789474e-05, "step": 341 }, { "epoch": 0.8967784352399737, "high_lr": 0.0008205263157894737, "low_lr": 1.6410526315789474e-05, "step": 341 }, { "epoch": 0.8967784352399737, "high_lr": 0.0008205263157894737, "low_lr": 1.6410526315789474e-05, "step": 341 }, { "epoch": 0.8967784352399737, "high_lr": 0.0008205263157894737, "low_lr": 1.6410526315789474e-05, "step": 341 }, { "epoch": 0.8967784352399737, "high_lr": 0.0008205263157894737, "low_lr": 1.6410526315789474e-05, "step": 341 }, { "epoch": 0.8967784352399737, "high_lr": 0.0008205263157894737, "low_lr": 1.6410526315789474e-05, "step": 341 }, { "epoch": 0.8967784352399737, "high_lr": 0.0008205263157894737, "low_lr": 1.6410526315789474e-05, "step": 341 }, { "epoch": 0.8994082840236687, "grad_norm": 0.9734652638435364, "learning_rate": 0.00082, "loss": 1.5986, "step": 342 }, { "epoch": 0.8994082840236687, "high_lr": 0.00082, "low_lr": 1.64e-05, "step": 342 }, { "epoch": 0.8994082840236687, "high_lr": 0.00082, "low_lr": 1.64e-05, "step": 342 }, { "epoch": 0.8994082840236687, "high_lr": 0.00082, "low_lr": 1.64e-05, "step": 342 }, { "epoch": 0.8994082840236687, "high_lr": 0.00082, "low_lr": 1.64e-05, "step": 342 }, { "epoch": 0.8994082840236687, "high_lr": 0.00082, "low_lr": 1.64e-05, "step": 342 }, { "epoch": 0.8994082840236687, "high_lr": 0.00082, "low_lr": 1.64e-05, "step": 342 }, { "epoch": 0.8994082840236687, "high_lr": 0.00082, "low_lr": 1.64e-05, "step": 342 }, { "epoch": 0.8994082840236687, "high_lr": 0.00082, "low_lr": 1.64e-05, "step": 342 }, { "epoch": 0.9020381328073636, "grad_norm": 0.9268157482147217, "learning_rate": 0.0008194736842105264, "loss": 1.5755, "step": 343 }, { "epoch": 0.9020381328073636, "high_lr": 0.0008194736842105264, "low_lr": 1.6389473684210527e-05, "step": 343 }, { "epoch": 0.9020381328073636, "high_lr": 0.0008194736842105264, "low_lr": 1.6389473684210527e-05, "step": 343 }, { "epoch": 0.9020381328073636, "high_lr": 0.0008194736842105264, "low_lr": 1.6389473684210527e-05, "step": 343 }, { "epoch": 0.9020381328073636, "high_lr": 0.0008194736842105264, "low_lr": 1.6389473684210527e-05, "step": 343 }, { "epoch": 0.9020381328073636, "high_lr": 0.0008194736842105264, "low_lr": 1.6389473684210527e-05, "step": 343 }, { "epoch": 0.9020381328073636, "high_lr": 0.0008194736842105264, "low_lr": 1.6389473684210527e-05, "step": 343 }, { "epoch": 0.9020381328073636, "high_lr": 0.0008194736842105264, "low_lr": 1.6389473684210527e-05, "step": 343 }, { "epoch": 0.9020381328073636, "high_lr": 0.0008194736842105264, "low_lr": 1.6389473684210527e-05, "step": 343 }, { "epoch": 0.9046679815910585, "grad_norm": 0.9329240918159485, "learning_rate": 0.0008189473684210527, "loss": 1.5928, "step": 344 }, { "epoch": 0.9046679815910585, "high_lr": 0.0008189473684210527, "low_lr": 1.6378947368421055e-05, "step": 344 }, { "epoch": 0.9046679815910585, "high_lr": 0.0008189473684210527, "low_lr": 1.6378947368421055e-05, "step": 344 }, { "epoch": 0.9046679815910585, "high_lr": 0.0008189473684210527, "low_lr": 1.6378947368421055e-05, "step": 344 }, { "epoch": 0.9046679815910585, "high_lr": 0.0008189473684210527, "low_lr": 1.6378947368421055e-05, "step": 344 }, { "epoch": 0.9046679815910585, "high_lr": 0.0008189473684210527, "low_lr": 1.6378947368421055e-05, "step": 344 }, { "epoch": 0.9046679815910585, "high_lr": 0.0008189473684210527, "low_lr": 1.6378947368421055e-05, "step": 344 }, { "epoch": 0.9046679815910585, "high_lr": 0.0008189473684210527, "low_lr": 1.6378947368421055e-05, "step": 344 }, { "epoch": 0.9046679815910585, "high_lr": 0.0008189473684210527, "low_lr": 1.6378947368421055e-05, "step": 344 }, { "epoch": 0.9072978303747534, "grad_norm": 0.9910773634910583, "learning_rate": 0.000818421052631579, "loss": 1.6063, "step": 345 }, { "epoch": 0.9072978303747534, "high_lr": 0.000818421052631579, "low_lr": 1.636842105263158e-05, "step": 345 }, { "epoch": 0.9072978303747534, "high_lr": 0.000818421052631579, "low_lr": 1.636842105263158e-05, "step": 345 }, { "epoch": 0.9072978303747534, "high_lr": 0.000818421052631579, "low_lr": 1.636842105263158e-05, "step": 345 }, { "epoch": 0.9072978303747534, "high_lr": 0.000818421052631579, "low_lr": 1.636842105263158e-05, "step": 345 }, { "epoch": 0.9072978303747534, "high_lr": 0.000818421052631579, "low_lr": 1.636842105263158e-05, "step": 345 }, { "epoch": 0.9072978303747534, "high_lr": 0.000818421052631579, "low_lr": 1.636842105263158e-05, "step": 345 }, { "epoch": 0.9072978303747534, "high_lr": 0.000818421052631579, "low_lr": 1.636842105263158e-05, "step": 345 }, { "epoch": 0.9072978303747534, "high_lr": 0.000818421052631579, "low_lr": 1.636842105263158e-05, "step": 345 }, { "epoch": 0.9099276791584484, "grad_norm": 0.9147838354110718, "learning_rate": 0.0008178947368421052, "loss": 1.5372, "step": 346 }, { "epoch": 0.9099276791584484, "high_lr": 0.0008178947368421052, "low_lr": 1.6357894736842108e-05, "step": 346 }, { "epoch": 0.9099276791584484, "high_lr": 0.0008178947368421052, "low_lr": 1.6357894736842108e-05, "step": 346 }, { "epoch": 0.9099276791584484, "high_lr": 0.0008178947368421052, "low_lr": 1.6357894736842108e-05, "step": 346 }, { "epoch": 0.9099276791584484, "high_lr": 0.0008178947368421052, "low_lr": 1.6357894736842108e-05, "step": 346 }, { "epoch": 0.9099276791584484, "high_lr": 0.0008178947368421052, "low_lr": 1.6357894736842108e-05, "step": 346 }, { "epoch": 0.9099276791584484, "high_lr": 0.0008178947368421052, "low_lr": 1.6357894736842108e-05, "step": 346 }, { "epoch": 0.9099276791584484, "high_lr": 0.0008178947368421052, "low_lr": 1.6357894736842108e-05, "step": 346 }, { "epoch": 0.9099276791584484, "high_lr": 0.0008178947368421052, "low_lr": 1.6357894736842108e-05, "step": 346 }, { "epoch": 0.9125575279421433, "grad_norm": 0.9866480231285095, "learning_rate": 0.0008173684210526316, "loss": 1.6184, "step": 347 }, { "epoch": 0.9125575279421433, "high_lr": 0.0008173684210526316, "low_lr": 1.6347368421052636e-05, "step": 347 }, { "epoch": 0.9125575279421433, "high_lr": 0.0008173684210526316, "low_lr": 1.6347368421052636e-05, "step": 347 }, { "epoch": 0.9125575279421433, "high_lr": 0.0008173684210526316, "low_lr": 1.6347368421052636e-05, "step": 347 }, { "epoch": 0.9125575279421433, "high_lr": 0.0008173684210526316, "low_lr": 1.6347368421052636e-05, "step": 347 }, { "epoch": 0.9125575279421433, "high_lr": 0.0008173684210526316, "low_lr": 1.6347368421052636e-05, "step": 347 }, { "epoch": 0.9125575279421433, "high_lr": 0.0008173684210526316, "low_lr": 1.6347368421052636e-05, "step": 347 }, { "epoch": 0.9125575279421433, "high_lr": 0.0008173684210526316, "low_lr": 1.6347368421052636e-05, "step": 347 }, { "epoch": 0.9125575279421433, "high_lr": 0.0008173684210526316, "low_lr": 1.6347368421052636e-05, "step": 347 }, { "epoch": 0.9151873767258383, "grad_norm": 0.9890763759613037, "learning_rate": 0.0008168421052631579, "loss": 1.6494, "step": 348 }, { "epoch": 0.9151873767258383, "high_lr": 0.0008168421052631579, "low_lr": 1.633684210526316e-05, "step": 348 }, { "epoch": 0.9151873767258383, "high_lr": 0.0008168421052631579, "low_lr": 1.633684210526316e-05, "step": 348 }, { "epoch": 0.9151873767258383, "high_lr": 0.0008168421052631579, "low_lr": 1.633684210526316e-05, "step": 348 }, { "epoch": 0.9151873767258383, "high_lr": 0.0008168421052631579, "low_lr": 1.633684210526316e-05, "step": 348 }, { "epoch": 0.9151873767258383, "high_lr": 0.0008168421052631579, "low_lr": 1.633684210526316e-05, "step": 348 }, { "epoch": 0.9151873767258383, "high_lr": 0.0008168421052631579, "low_lr": 1.633684210526316e-05, "step": 348 }, { "epoch": 0.9151873767258383, "high_lr": 0.0008168421052631579, "low_lr": 1.633684210526316e-05, "step": 348 }, { "epoch": 0.9151873767258383, "high_lr": 0.0008168421052631579, "low_lr": 1.633684210526316e-05, "step": 348 }, { "epoch": 0.9178172255095332, "grad_norm": 1.0507274866104126, "learning_rate": 0.0008163157894736842, "loss": 1.5947, "step": 349 }, { "epoch": 0.9178172255095332, "high_lr": 0.0008163157894736842, "low_lr": 1.6326315789473685e-05, "step": 349 }, { "epoch": 0.9178172255095332, "high_lr": 0.0008163157894736842, "low_lr": 1.6326315789473685e-05, "step": 349 }, { "epoch": 0.9178172255095332, "high_lr": 0.0008163157894736842, "low_lr": 1.6326315789473685e-05, "step": 349 }, { "epoch": 0.9178172255095332, "high_lr": 0.0008163157894736842, "low_lr": 1.6326315789473685e-05, "step": 349 }, { "epoch": 0.9178172255095332, "high_lr": 0.0008163157894736842, "low_lr": 1.6326315789473685e-05, "step": 349 }, { "epoch": 0.9178172255095332, "high_lr": 0.0008163157894736842, "low_lr": 1.6326315789473685e-05, "step": 349 }, { "epoch": 0.9178172255095332, "high_lr": 0.0008163157894736842, "low_lr": 1.6326315789473685e-05, "step": 349 }, { "epoch": 0.9178172255095332, "high_lr": 0.0008163157894736842, "low_lr": 1.6326315789473685e-05, "step": 349 }, { "epoch": 0.9204470742932281, "grad_norm": 0.9223301410675049, "learning_rate": 0.0008157894736842105, "loss": 1.5522, "step": 350 }, { "epoch": 0.9204470742932281, "high_lr": 0.0008157894736842105, "low_lr": 1.6315789473684213e-05, "step": 350 }, { "epoch": 0.9204470742932281, "high_lr": 0.0008157894736842105, "low_lr": 1.6315789473684213e-05, "step": 350 }, { "epoch": 0.9204470742932281, "high_lr": 0.0008157894736842105, "low_lr": 1.6315789473684213e-05, "step": 350 }, { "epoch": 0.9204470742932281, "high_lr": 0.0008157894736842105, "low_lr": 1.6315789473684213e-05, "step": 350 }, { "epoch": 0.9204470742932281, "high_lr": 0.0008157894736842105, "low_lr": 1.6315789473684213e-05, "step": 350 }, { "epoch": 0.9204470742932281, "high_lr": 0.0008157894736842105, "low_lr": 1.6315789473684213e-05, "step": 350 }, { "epoch": 0.9204470742932281, "high_lr": 0.0008157894736842105, "low_lr": 1.6315789473684213e-05, "step": 350 }, { "epoch": 0.9204470742932281, "high_lr": 0.0008157894736842105, "low_lr": 1.6315789473684213e-05, "step": 350 }, { "epoch": 0.9230769230769231, "grad_norm": 0.9552976489067078, "learning_rate": 0.0008152631578947368, "loss": 1.5934, "step": 351 }, { "epoch": 0.9230769230769231, "high_lr": 0.0008152631578947368, "low_lr": 1.6305263157894737e-05, "step": 351 }, { "epoch": 0.9230769230769231, "high_lr": 0.0008152631578947368, "low_lr": 1.6305263157894737e-05, "step": 351 }, { "epoch": 0.9230769230769231, "high_lr": 0.0008152631578947368, "low_lr": 1.6305263157894737e-05, "step": 351 }, { "epoch": 0.9230769230769231, "high_lr": 0.0008152631578947368, "low_lr": 1.6305263157894737e-05, "step": 351 }, { "epoch": 0.9230769230769231, "high_lr": 0.0008152631578947368, "low_lr": 1.6305263157894737e-05, "step": 351 }, { "epoch": 0.9230769230769231, "high_lr": 0.0008152631578947368, "low_lr": 1.6305263157894737e-05, "step": 351 }, { "epoch": 0.9230769230769231, "high_lr": 0.0008152631578947368, "low_lr": 1.6305263157894737e-05, "step": 351 }, { "epoch": 0.9230769230769231, "high_lr": 0.0008152631578947368, "low_lr": 1.6305263157894737e-05, "step": 351 }, { "epoch": 0.925706771860618, "grad_norm": 0.9349220991134644, "learning_rate": 0.0008147368421052633, "loss": 1.6361, "step": 352 }, { "epoch": 0.925706771860618, "high_lr": 0.0008147368421052633, "low_lr": 1.6294736842105265e-05, "step": 352 }, { "epoch": 0.925706771860618, "high_lr": 0.0008147368421052633, "low_lr": 1.6294736842105265e-05, "step": 352 }, { "epoch": 0.925706771860618, "high_lr": 0.0008147368421052633, "low_lr": 1.6294736842105265e-05, "step": 352 }, { "epoch": 0.925706771860618, "high_lr": 0.0008147368421052633, "low_lr": 1.6294736842105265e-05, "step": 352 }, { "epoch": 0.925706771860618, "high_lr": 0.0008147368421052633, "low_lr": 1.6294736842105265e-05, "step": 352 }, { "epoch": 0.925706771860618, "high_lr": 0.0008147368421052633, "low_lr": 1.6294736842105265e-05, "step": 352 }, { "epoch": 0.925706771860618, "high_lr": 0.0008147368421052633, "low_lr": 1.6294736842105265e-05, "step": 352 }, { "epoch": 0.925706771860618, "high_lr": 0.0008147368421052633, "low_lr": 1.6294736842105265e-05, "step": 352 }, { "epoch": 0.928336620644313, "grad_norm": 0.9589920043945312, "learning_rate": 0.0008142105263157896, "loss": 1.6333, "step": 353 }, { "epoch": 0.928336620644313, "high_lr": 0.0008142105263157896, "low_lr": 1.628421052631579e-05, "step": 353 }, { "epoch": 0.928336620644313, "high_lr": 0.0008142105263157896, "low_lr": 1.628421052631579e-05, "step": 353 }, { "epoch": 0.928336620644313, "high_lr": 0.0008142105263157896, "low_lr": 1.628421052631579e-05, "step": 353 }, { "epoch": 0.928336620644313, "high_lr": 0.0008142105263157896, "low_lr": 1.628421052631579e-05, "step": 353 }, { "epoch": 0.928336620644313, "high_lr": 0.0008142105263157896, "low_lr": 1.628421052631579e-05, "step": 353 }, { "epoch": 0.928336620644313, "high_lr": 0.0008142105263157896, "low_lr": 1.628421052631579e-05, "step": 353 }, { "epoch": 0.928336620644313, "high_lr": 0.0008142105263157896, "low_lr": 1.628421052631579e-05, "step": 353 }, { "epoch": 0.928336620644313, "high_lr": 0.0008142105263157896, "low_lr": 1.628421052631579e-05, "step": 353 }, { "epoch": 0.9309664694280079, "grad_norm": 0.9250889420509338, "learning_rate": 0.0008136842105263158, "loss": 1.6012, "step": 354 }, { "epoch": 0.9309664694280079, "high_lr": 0.0008136842105263158, "low_lr": 1.6273684210526318e-05, "step": 354 }, { "epoch": 0.9309664694280079, "high_lr": 0.0008136842105263158, "low_lr": 1.6273684210526318e-05, "step": 354 }, { "epoch": 0.9309664694280079, "high_lr": 0.0008136842105263158, "low_lr": 1.6273684210526318e-05, "step": 354 }, { "epoch": 0.9309664694280079, "high_lr": 0.0008136842105263158, "low_lr": 1.6273684210526318e-05, "step": 354 }, { "epoch": 0.9309664694280079, "high_lr": 0.0008136842105263158, "low_lr": 1.6273684210526318e-05, "step": 354 }, { "epoch": 0.9309664694280079, "high_lr": 0.0008136842105263158, "low_lr": 1.6273684210526318e-05, "step": 354 }, { "epoch": 0.9309664694280079, "high_lr": 0.0008136842105263158, "low_lr": 1.6273684210526318e-05, "step": 354 }, { "epoch": 0.9309664694280079, "high_lr": 0.0008136842105263158, "low_lr": 1.6273684210526318e-05, "step": 354 }, { "epoch": 0.9335963182117029, "grad_norm": 0.9966514706611633, "learning_rate": 0.0008131578947368421, "loss": 1.618, "step": 355 }, { "epoch": 0.9335963182117029, "high_lr": 0.0008131578947368421, "low_lr": 1.6263157894736843e-05, "step": 355 }, { "epoch": 0.9335963182117029, "high_lr": 0.0008131578947368421, "low_lr": 1.6263157894736843e-05, "step": 355 }, { "epoch": 0.9335963182117029, "high_lr": 0.0008131578947368421, "low_lr": 1.6263157894736843e-05, "step": 355 }, { "epoch": 0.9335963182117029, "high_lr": 0.0008131578947368421, "low_lr": 1.6263157894736843e-05, "step": 355 }, { "epoch": 0.9335963182117029, "high_lr": 0.0008131578947368421, "low_lr": 1.6263157894736843e-05, "step": 355 }, { "epoch": 0.9335963182117029, "high_lr": 0.0008131578947368421, "low_lr": 1.6263157894736843e-05, "step": 355 }, { "epoch": 0.9335963182117029, "high_lr": 0.0008131578947368421, "low_lr": 1.6263157894736843e-05, "step": 355 }, { "epoch": 0.9335963182117029, "high_lr": 0.0008131578947368421, "low_lr": 1.6263157894736843e-05, "step": 355 }, { "epoch": 0.9362261669953977, "grad_norm": 0.9778467416763306, "learning_rate": 0.0008126315789473684, "loss": 1.5727, "step": 356 }, { "epoch": 0.9362261669953977, "high_lr": 0.0008126315789473684, "low_lr": 1.6252631578947367e-05, "step": 356 }, { "epoch": 0.9362261669953977, "high_lr": 0.0008126315789473684, "low_lr": 1.6252631578947367e-05, "step": 356 }, { "epoch": 0.9362261669953977, "high_lr": 0.0008126315789473684, "low_lr": 1.6252631578947367e-05, "step": 356 }, { "epoch": 0.9362261669953977, "high_lr": 0.0008126315789473684, "low_lr": 1.6252631578947367e-05, "step": 356 }, { "epoch": 0.9362261669953977, "high_lr": 0.0008126315789473684, "low_lr": 1.6252631578947367e-05, "step": 356 }, { "epoch": 0.9362261669953977, "high_lr": 0.0008126315789473684, "low_lr": 1.6252631578947367e-05, "step": 356 }, { "epoch": 0.9362261669953977, "high_lr": 0.0008126315789473684, "low_lr": 1.6252631578947367e-05, "step": 356 }, { "epoch": 0.9362261669953977, "high_lr": 0.0008126315789473684, "low_lr": 1.6252631578947367e-05, "step": 356 }, { "epoch": 0.9388560157790927, "grad_norm": 0.9976954460144043, "learning_rate": 0.0008121052631578948, "loss": 1.6105, "step": 357 }, { "epoch": 0.9388560157790927, "high_lr": 0.0008121052631578948, "low_lr": 1.6242105263157895e-05, "step": 357 }, { "epoch": 0.9388560157790927, "high_lr": 0.0008121052631578948, "low_lr": 1.6242105263157895e-05, "step": 357 }, { "epoch": 0.9388560157790927, "high_lr": 0.0008121052631578948, "low_lr": 1.6242105263157895e-05, "step": 357 }, { "epoch": 0.9388560157790927, "high_lr": 0.0008121052631578948, "low_lr": 1.6242105263157895e-05, "step": 357 }, { "epoch": 0.9388560157790927, "high_lr": 0.0008121052631578948, "low_lr": 1.6242105263157895e-05, "step": 357 }, { "epoch": 0.9388560157790927, "high_lr": 0.0008121052631578948, "low_lr": 1.6242105263157895e-05, "step": 357 }, { "epoch": 0.9388560157790927, "high_lr": 0.0008121052631578948, "low_lr": 1.6242105263157895e-05, "step": 357 }, { "epoch": 0.9388560157790927, "high_lr": 0.0008121052631578948, "low_lr": 1.6242105263157895e-05, "step": 357 }, { "epoch": 0.9414858645627876, "grad_norm": 0.9831271767616272, "learning_rate": 0.0008115789473684211, "loss": 1.5965, "step": 358 }, { "epoch": 0.9414858645627876, "high_lr": 0.0008115789473684211, "low_lr": 1.6231578947368423e-05, "step": 358 }, { "epoch": 0.9414858645627876, "high_lr": 0.0008115789473684211, "low_lr": 1.6231578947368423e-05, "step": 358 }, { "epoch": 0.9414858645627876, "high_lr": 0.0008115789473684211, "low_lr": 1.6231578947368423e-05, "step": 358 }, { "epoch": 0.9414858645627876, "high_lr": 0.0008115789473684211, "low_lr": 1.6231578947368423e-05, "step": 358 }, { "epoch": 0.9414858645627876, "high_lr": 0.0008115789473684211, "low_lr": 1.6231578947368423e-05, "step": 358 }, { "epoch": 0.9414858645627876, "high_lr": 0.0008115789473684211, "low_lr": 1.6231578947368423e-05, "step": 358 }, { "epoch": 0.9414858645627876, "high_lr": 0.0008115789473684211, "low_lr": 1.6231578947368423e-05, "step": 358 }, { "epoch": 0.9414858645627876, "high_lr": 0.0008115789473684211, "low_lr": 1.6231578947368423e-05, "step": 358 }, { "epoch": 0.9441157133464826, "grad_norm": 0.9419095516204834, "learning_rate": 0.0008110526315789474, "loss": 1.6456, "step": 359 }, { "epoch": 0.9441157133464826, "high_lr": 0.0008110526315789474, "low_lr": 1.6221052631578948e-05, "step": 359 }, { "epoch": 0.9441157133464826, "high_lr": 0.0008110526315789474, "low_lr": 1.6221052631578948e-05, "step": 359 }, { "epoch": 0.9441157133464826, "high_lr": 0.0008110526315789474, "low_lr": 1.6221052631578948e-05, "step": 359 }, { "epoch": 0.9441157133464826, "high_lr": 0.0008110526315789474, "low_lr": 1.6221052631578948e-05, "step": 359 }, { "epoch": 0.9441157133464826, "high_lr": 0.0008110526315789474, "low_lr": 1.6221052631578948e-05, "step": 359 }, { "epoch": 0.9441157133464826, "high_lr": 0.0008110526315789474, "low_lr": 1.6221052631578948e-05, "step": 359 }, { "epoch": 0.9441157133464826, "high_lr": 0.0008110526315789474, "low_lr": 1.6221052631578948e-05, "step": 359 }, { "epoch": 0.9441157133464826, "high_lr": 0.0008110526315789474, "low_lr": 1.6221052631578948e-05, "step": 359 }, { "epoch": 0.9467455621301775, "grad_norm": 1.0125757455825806, "learning_rate": 0.0008105263157894737, "loss": 1.6393, "step": 360 }, { "epoch": 0.9467455621301775, "high_lr": 0.0008105263157894737, "low_lr": 1.6210526315789473e-05, "step": 360 }, { "epoch": 0.9467455621301775, "high_lr": 0.0008105263157894737, "low_lr": 1.6210526315789473e-05, "step": 360 }, { "epoch": 0.9467455621301775, "high_lr": 0.0008105263157894737, "low_lr": 1.6210526315789473e-05, "step": 360 }, { "epoch": 0.9467455621301775, "high_lr": 0.0008105263157894737, "low_lr": 1.6210526315789473e-05, "step": 360 }, { "epoch": 0.9467455621301775, "high_lr": 0.0008105263157894737, "low_lr": 1.6210526315789473e-05, "step": 360 }, { "epoch": 0.9467455621301775, "high_lr": 0.0008105263157894737, "low_lr": 1.6210526315789473e-05, "step": 360 }, { "epoch": 0.9467455621301775, "high_lr": 0.0008105263157894737, "low_lr": 1.6210526315789473e-05, "step": 360 }, { "epoch": 0.9467455621301775, "high_lr": 0.0008105263157894737, "low_lr": 1.6210526315789473e-05, "step": 360 }, { "epoch": 0.9493754109138725, "grad_norm": 0.9917994141578674, "learning_rate": 0.0008100000000000001, "loss": 1.6001, "step": 361 }, { "epoch": 0.9493754109138725, "high_lr": 0.0008100000000000001, "low_lr": 1.62e-05, "step": 361 }, { "epoch": 0.9493754109138725, "high_lr": 0.0008100000000000001, "low_lr": 1.62e-05, "step": 361 }, { "epoch": 0.9493754109138725, "high_lr": 0.0008100000000000001, "low_lr": 1.62e-05, "step": 361 }, { "epoch": 0.9493754109138725, "high_lr": 0.0008100000000000001, "low_lr": 1.62e-05, "step": 361 }, { "epoch": 0.9493754109138725, "high_lr": 0.0008100000000000001, "low_lr": 1.62e-05, "step": 361 }, { "epoch": 0.9493754109138725, "high_lr": 0.0008100000000000001, "low_lr": 1.62e-05, "step": 361 }, { "epoch": 0.9493754109138725, "high_lr": 0.0008100000000000001, "low_lr": 1.62e-05, "step": 361 }, { "epoch": 0.9493754109138725, "high_lr": 0.0008100000000000001, "low_lr": 1.62e-05, "step": 361 }, { "epoch": 0.9520052596975674, "grad_norm": 0.9051048159599304, "learning_rate": 0.0008094736842105264, "loss": 1.5905, "step": 362 }, { "epoch": 0.9520052596975674, "high_lr": 0.0008094736842105264, "low_lr": 1.618947368421053e-05, "step": 362 }, { "epoch": 0.9520052596975674, "high_lr": 0.0008094736842105264, "low_lr": 1.618947368421053e-05, "step": 362 }, { "epoch": 0.9520052596975674, "high_lr": 0.0008094736842105264, "low_lr": 1.618947368421053e-05, "step": 362 }, { "epoch": 0.9520052596975674, "high_lr": 0.0008094736842105264, "low_lr": 1.618947368421053e-05, "step": 362 }, { "epoch": 0.9520052596975674, "high_lr": 0.0008094736842105264, "low_lr": 1.618947368421053e-05, "step": 362 }, { "epoch": 0.9520052596975674, "high_lr": 0.0008094736842105264, "low_lr": 1.618947368421053e-05, "step": 362 }, { "epoch": 0.9520052596975674, "high_lr": 0.0008094736842105264, "low_lr": 1.618947368421053e-05, "step": 362 }, { "epoch": 0.9520052596975674, "high_lr": 0.0008094736842105264, "low_lr": 1.618947368421053e-05, "step": 362 }, { "epoch": 0.9546351084812623, "grad_norm": 0.9285058975219727, "learning_rate": 0.0008089473684210526, "loss": 1.5778, "step": 363 }, { "epoch": 0.9546351084812623, "high_lr": 0.0008089473684210526, "low_lr": 1.6178947368421053e-05, "step": 363 }, { "epoch": 0.9546351084812623, "high_lr": 0.0008089473684210526, "low_lr": 1.6178947368421053e-05, "step": 363 }, { "epoch": 0.9546351084812623, "high_lr": 0.0008089473684210526, "low_lr": 1.6178947368421053e-05, "step": 363 }, { "epoch": 0.9546351084812623, "high_lr": 0.0008089473684210526, "low_lr": 1.6178947368421053e-05, "step": 363 }, { "epoch": 0.9546351084812623, "high_lr": 0.0008089473684210526, "low_lr": 1.6178947368421053e-05, "step": 363 }, { "epoch": 0.9546351084812623, "high_lr": 0.0008089473684210526, "low_lr": 1.6178947368421053e-05, "step": 363 }, { "epoch": 0.9546351084812623, "high_lr": 0.0008089473684210526, "low_lr": 1.6178947368421053e-05, "step": 363 }, { "epoch": 0.9546351084812623, "high_lr": 0.0008089473684210526, "low_lr": 1.6178947368421053e-05, "step": 363 }, { "epoch": 0.9572649572649573, "grad_norm": 0.9660423994064331, "learning_rate": 0.0008084210526315789, "loss": 1.5676, "step": 364 }, { "epoch": 0.9572649572649573, "high_lr": 0.0008084210526315789, "low_lr": 1.616842105263158e-05, "step": 364 }, { "epoch": 0.9572649572649573, "high_lr": 0.0008084210526315789, "low_lr": 1.616842105263158e-05, "step": 364 }, { "epoch": 0.9572649572649573, "high_lr": 0.0008084210526315789, "low_lr": 1.616842105263158e-05, "step": 364 }, { "epoch": 0.9572649572649573, "high_lr": 0.0008084210526315789, "low_lr": 1.616842105263158e-05, "step": 364 }, { "epoch": 0.9572649572649573, "high_lr": 0.0008084210526315789, "low_lr": 1.616842105263158e-05, "step": 364 }, { "epoch": 0.9572649572649573, "high_lr": 0.0008084210526315789, "low_lr": 1.616842105263158e-05, "step": 364 }, { "epoch": 0.9572649572649573, "high_lr": 0.0008084210526315789, "low_lr": 1.616842105263158e-05, "step": 364 }, { "epoch": 0.9572649572649573, "high_lr": 0.0008084210526315789, "low_lr": 1.616842105263158e-05, "step": 364 }, { "epoch": 0.9598948060486522, "grad_norm": 0.9621961116790771, "learning_rate": 0.0008078947368421052, "loss": 1.5944, "step": 365 }, { "epoch": 0.9598948060486522, "high_lr": 0.0008078947368421052, "low_lr": 1.6157894736842106e-05, "step": 365 }, { "epoch": 0.9598948060486522, "high_lr": 0.0008078947368421052, "low_lr": 1.6157894736842106e-05, "step": 365 }, { "epoch": 0.9598948060486522, "high_lr": 0.0008078947368421052, "low_lr": 1.6157894736842106e-05, "step": 365 }, { "epoch": 0.9598948060486522, "high_lr": 0.0008078947368421052, "low_lr": 1.6157894736842106e-05, "step": 365 }, { "epoch": 0.9598948060486522, "high_lr": 0.0008078947368421052, "low_lr": 1.6157894736842106e-05, "step": 365 }, { "epoch": 0.9598948060486522, "high_lr": 0.0008078947368421052, "low_lr": 1.6157894736842106e-05, "step": 365 }, { "epoch": 0.9598948060486522, "high_lr": 0.0008078947368421052, "low_lr": 1.6157894736842106e-05, "step": 365 }, { "epoch": 0.9598948060486522, "high_lr": 0.0008078947368421052, "low_lr": 1.6157894736842106e-05, "step": 365 }, { "epoch": 0.9625246548323472, "grad_norm": 0.8977233171463013, "learning_rate": 0.0008073684210526316, "loss": 1.6, "step": 366 }, { "epoch": 0.9625246548323472, "high_lr": 0.0008073684210526316, "low_lr": 1.6147368421052634e-05, "step": 366 }, { "epoch": 0.9625246548323472, "high_lr": 0.0008073684210526316, "low_lr": 1.6147368421052634e-05, "step": 366 }, { "epoch": 0.9625246548323472, "high_lr": 0.0008073684210526316, "low_lr": 1.6147368421052634e-05, "step": 366 }, { "epoch": 0.9625246548323472, "high_lr": 0.0008073684210526316, "low_lr": 1.6147368421052634e-05, "step": 366 }, { "epoch": 0.9625246548323472, "high_lr": 0.0008073684210526316, "low_lr": 1.6147368421052634e-05, "step": 366 }, { "epoch": 0.9625246548323472, "high_lr": 0.0008073684210526316, "low_lr": 1.6147368421052634e-05, "step": 366 }, { "epoch": 0.9625246548323472, "high_lr": 0.0008073684210526316, "low_lr": 1.6147368421052634e-05, "step": 366 }, { "epoch": 0.9625246548323472, "high_lr": 0.0008073684210526316, "low_lr": 1.6147368421052634e-05, "step": 366 }, { "epoch": 0.965154503616042, "grad_norm": 0.9885714650154114, "learning_rate": 0.0008068421052631579, "loss": 1.6135, "step": 367 }, { "epoch": 0.965154503616042, "high_lr": 0.0008068421052631579, "low_lr": 1.613684210526316e-05, "step": 367 }, { "epoch": 0.965154503616042, "high_lr": 0.0008068421052631579, "low_lr": 1.613684210526316e-05, "step": 367 }, { "epoch": 0.965154503616042, "high_lr": 0.0008068421052631579, "low_lr": 1.613684210526316e-05, "step": 367 }, { "epoch": 0.965154503616042, "high_lr": 0.0008068421052631579, "low_lr": 1.613684210526316e-05, "step": 367 }, { "epoch": 0.965154503616042, "high_lr": 0.0008068421052631579, "low_lr": 1.613684210526316e-05, "step": 367 }, { "epoch": 0.965154503616042, "high_lr": 0.0008068421052631579, "low_lr": 1.613684210526316e-05, "step": 367 }, { "epoch": 0.965154503616042, "high_lr": 0.0008068421052631579, "low_lr": 1.613684210526316e-05, "step": 367 }, { "epoch": 0.965154503616042, "high_lr": 0.0008068421052631579, "low_lr": 1.613684210526316e-05, "step": 367 }, { "epoch": 0.967784352399737, "grad_norm": 1.0077567100524902, "learning_rate": 0.0008063157894736842, "loss": 1.6205, "step": 368 }, { "epoch": 0.967784352399737, "high_lr": 0.0008063157894736842, "low_lr": 1.6126315789473687e-05, "step": 368 }, { "epoch": 0.967784352399737, "high_lr": 0.0008063157894736842, "low_lr": 1.6126315789473687e-05, "step": 368 }, { "epoch": 0.967784352399737, "high_lr": 0.0008063157894736842, "low_lr": 1.6126315789473687e-05, "step": 368 }, { "epoch": 0.967784352399737, "high_lr": 0.0008063157894736842, "low_lr": 1.6126315789473687e-05, "step": 368 }, { "epoch": 0.967784352399737, "high_lr": 0.0008063157894736842, "low_lr": 1.6126315789473687e-05, "step": 368 }, { "epoch": 0.967784352399737, "high_lr": 0.0008063157894736842, "low_lr": 1.6126315789473687e-05, "step": 368 }, { "epoch": 0.967784352399737, "high_lr": 0.0008063157894736842, "low_lr": 1.6126315789473687e-05, "step": 368 }, { "epoch": 0.967784352399737, "high_lr": 0.0008063157894736842, "low_lr": 1.6126315789473687e-05, "step": 368 }, { "epoch": 0.9704142011834319, "grad_norm": 0.9423245787620544, "learning_rate": 0.0008057894736842106, "loss": 1.5613, "step": 369 }, { "epoch": 0.9704142011834319, "high_lr": 0.0008057894736842106, "low_lr": 1.611578947368421e-05, "step": 369 }, { "epoch": 0.9704142011834319, "high_lr": 0.0008057894736842106, "low_lr": 1.611578947368421e-05, "step": 369 }, { "epoch": 0.9704142011834319, "high_lr": 0.0008057894736842106, "low_lr": 1.611578947368421e-05, "step": 369 }, { "epoch": 0.9704142011834319, "high_lr": 0.0008057894736842106, "low_lr": 1.611578947368421e-05, "step": 369 }, { "epoch": 0.9704142011834319, "high_lr": 0.0008057894736842106, "low_lr": 1.611578947368421e-05, "step": 369 }, { "epoch": 0.9704142011834319, "high_lr": 0.0008057894736842106, "low_lr": 1.611578947368421e-05, "step": 369 }, { "epoch": 0.9704142011834319, "high_lr": 0.0008057894736842106, "low_lr": 1.611578947368421e-05, "step": 369 }, { "epoch": 0.9704142011834319, "high_lr": 0.0008057894736842106, "low_lr": 1.611578947368421e-05, "step": 369 }, { "epoch": 0.9730440499671269, "grad_norm": 0.967456579208374, "learning_rate": 0.0008052631578947369, "loss": 1.5729, "step": 370 }, { "epoch": 0.9730440499671269, "high_lr": 0.0008052631578947369, "low_lr": 1.6105263157894736e-05, "step": 370 }, { "epoch": 0.9730440499671269, "high_lr": 0.0008052631578947369, "low_lr": 1.6105263157894736e-05, "step": 370 }, { "epoch": 0.9730440499671269, "high_lr": 0.0008052631578947369, "low_lr": 1.6105263157894736e-05, "step": 370 }, { "epoch": 0.9730440499671269, "high_lr": 0.0008052631578947369, "low_lr": 1.6105263157894736e-05, "step": 370 }, { "epoch": 0.9730440499671269, "high_lr": 0.0008052631578947369, "low_lr": 1.6105263157894736e-05, "step": 370 }, { "epoch": 0.9730440499671269, "high_lr": 0.0008052631578947369, "low_lr": 1.6105263157894736e-05, "step": 370 }, { "epoch": 0.9730440499671269, "high_lr": 0.0008052631578947369, "low_lr": 1.6105263157894736e-05, "step": 370 }, { "epoch": 0.9730440499671269, "high_lr": 0.0008052631578947369, "low_lr": 1.6105263157894736e-05, "step": 370 }, { "epoch": 0.9756738987508218, "grad_norm": 0.9354105591773987, "learning_rate": 0.0008047368421052632, "loss": 1.6185, "step": 371 }, { "epoch": 0.9756738987508218, "high_lr": 0.0008047368421052632, "low_lr": 1.6094736842105264e-05, "step": 371 }, { "epoch": 0.9756738987508218, "high_lr": 0.0008047368421052632, "low_lr": 1.6094736842105264e-05, "step": 371 }, { "epoch": 0.9756738987508218, "high_lr": 0.0008047368421052632, "low_lr": 1.6094736842105264e-05, "step": 371 }, { "epoch": 0.9756738987508218, "high_lr": 0.0008047368421052632, "low_lr": 1.6094736842105264e-05, "step": 371 }, { "epoch": 0.9756738987508218, "high_lr": 0.0008047368421052632, "low_lr": 1.6094736842105264e-05, "step": 371 }, { "epoch": 0.9756738987508218, "high_lr": 0.0008047368421052632, "low_lr": 1.6094736842105264e-05, "step": 371 }, { "epoch": 0.9756738987508218, "high_lr": 0.0008047368421052632, "low_lr": 1.6094736842105264e-05, "step": 371 }, { "epoch": 0.9756738987508218, "high_lr": 0.0008047368421052632, "low_lr": 1.6094736842105264e-05, "step": 371 }, { "epoch": 0.9783037475345168, "grad_norm": 1.0104918479919434, "learning_rate": 0.0008042105263157895, "loss": 1.5653, "step": 372 }, { "epoch": 0.9783037475345168, "high_lr": 0.0008042105263157895, "low_lr": 1.6084210526315792e-05, "step": 372 }, { "epoch": 0.9783037475345168, "high_lr": 0.0008042105263157895, "low_lr": 1.6084210526315792e-05, "step": 372 }, { "epoch": 0.9783037475345168, "high_lr": 0.0008042105263157895, "low_lr": 1.6084210526315792e-05, "step": 372 }, { "epoch": 0.9783037475345168, "high_lr": 0.0008042105263157895, "low_lr": 1.6084210526315792e-05, "step": 372 }, { "epoch": 0.9783037475345168, "high_lr": 0.0008042105263157895, "low_lr": 1.6084210526315792e-05, "step": 372 }, { "epoch": 0.9783037475345168, "high_lr": 0.0008042105263157895, "low_lr": 1.6084210526315792e-05, "step": 372 }, { "epoch": 0.9783037475345168, "high_lr": 0.0008042105263157895, "low_lr": 1.6084210526315792e-05, "step": 372 }, { "epoch": 0.9783037475345168, "high_lr": 0.0008042105263157895, "low_lr": 1.6084210526315792e-05, "step": 372 }, { "epoch": 0.9809335963182118, "grad_norm": 0.9284670352935791, "learning_rate": 0.0008036842105263158, "loss": 1.5226, "step": 373 }, { "epoch": 0.9809335963182118, "high_lr": 0.0008036842105263158, "low_lr": 1.6073684210526317e-05, "step": 373 }, { "epoch": 0.9809335963182118, "high_lr": 0.0008036842105263158, "low_lr": 1.6073684210526317e-05, "step": 373 }, { "epoch": 0.9809335963182118, "high_lr": 0.0008036842105263158, "low_lr": 1.6073684210526317e-05, "step": 373 }, { "epoch": 0.9809335963182118, "high_lr": 0.0008036842105263158, "low_lr": 1.6073684210526317e-05, "step": 373 }, { "epoch": 0.9809335963182118, "high_lr": 0.0008036842105263158, "low_lr": 1.6073684210526317e-05, "step": 373 }, { "epoch": 0.9809335963182118, "high_lr": 0.0008036842105263158, "low_lr": 1.6073684210526317e-05, "step": 373 }, { "epoch": 0.9809335963182118, "high_lr": 0.0008036842105263158, "low_lr": 1.6073684210526317e-05, "step": 373 }, { "epoch": 0.9809335963182118, "high_lr": 0.0008036842105263158, "low_lr": 1.6073684210526317e-05, "step": 373 }, { "epoch": 0.9835634451019066, "grad_norm": 0.9576569199562073, "learning_rate": 0.0008031578947368421, "loss": 1.6275, "step": 374 }, { "epoch": 0.9835634451019066, "high_lr": 0.0008031578947368421, "low_lr": 1.606315789473684e-05, "step": 374 }, { "epoch": 0.9835634451019066, "high_lr": 0.0008031578947368421, "low_lr": 1.606315789473684e-05, "step": 374 }, { "epoch": 0.9835634451019066, "high_lr": 0.0008031578947368421, "low_lr": 1.606315789473684e-05, "step": 374 }, { "epoch": 0.9835634451019066, "high_lr": 0.0008031578947368421, "low_lr": 1.606315789473684e-05, "step": 374 }, { "epoch": 0.9835634451019066, "high_lr": 0.0008031578947368421, "low_lr": 1.606315789473684e-05, "step": 374 }, { "epoch": 0.9835634451019066, "high_lr": 0.0008031578947368421, "low_lr": 1.606315789473684e-05, "step": 374 }, { "epoch": 0.9835634451019066, "high_lr": 0.0008031578947368421, "low_lr": 1.606315789473684e-05, "step": 374 }, { "epoch": 0.9835634451019066, "high_lr": 0.0008031578947368421, "low_lr": 1.606315789473684e-05, "step": 374 }, { "epoch": 0.9861932938856016, "grad_norm": 0.9816845655441284, "learning_rate": 0.0008026315789473685, "loss": 1.5117, "step": 375 }, { "epoch": 0.9861932938856016, "high_lr": 0.0008026315789473685, "low_lr": 1.605263157894737e-05, "step": 375 }, { "epoch": 0.9861932938856016, "high_lr": 0.0008026315789473685, "low_lr": 1.605263157894737e-05, "step": 375 }, { "epoch": 0.9861932938856016, "high_lr": 0.0008026315789473685, "low_lr": 1.605263157894737e-05, "step": 375 }, { "epoch": 0.9861932938856016, "high_lr": 0.0008026315789473685, "low_lr": 1.605263157894737e-05, "step": 375 }, { "epoch": 0.9861932938856016, "high_lr": 0.0008026315789473685, "low_lr": 1.605263157894737e-05, "step": 375 }, { "epoch": 0.9861932938856016, "high_lr": 0.0008026315789473685, "low_lr": 1.605263157894737e-05, "step": 375 }, { "epoch": 0.9861932938856016, "high_lr": 0.0008026315789473685, "low_lr": 1.605263157894737e-05, "step": 375 }, { "epoch": 0.9861932938856016, "high_lr": 0.0008026315789473685, "low_lr": 1.605263157894737e-05, "step": 375 }, { "epoch": 0.9888231426692965, "grad_norm": 0.9952553510665894, "learning_rate": 0.0008021052631578948, "loss": 1.6148, "step": 376 }, { "epoch": 0.9888231426692965, "high_lr": 0.0008021052631578948, "low_lr": 1.6042105263157897e-05, "step": 376 }, { "epoch": 0.9888231426692965, "high_lr": 0.0008021052631578948, "low_lr": 1.6042105263157897e-05, "step": 376 }, { "epoch": 0.9888231426692965, "high_lr": 0.0008021052631578948, "low_lr": 1.6042105263157897e-05, "step": 376 }, { "epoch": 0.9888231426692965, "high_lr": 0.0008021052631578948, "low_lr": 1.6042105263157897e-05, "step": 376 }, { "epoch": 0.9888231426692965, "high_lr": 0.0008021052631578948, "low_lr": 1.6042105263157897e-05, "step": 376 }, { "epoch": 0.9888231426692965, "high_lr": 0.0008021052631578948, "low_lr": 1.6042105263157897e-05, "step": 376 }, { "epoch": 0.9888231426692965, "high_lr": 0.0008021052631578948, "low_lr": 1.6042105263157897e-05, "step": 376 }, { "epoch": 0.9888231426692965, "high_lr": 0.0008021052631578948, "low_lr": 1.6042105263157897e-05, "step": 376 }, { "epoch": 0.9914529914529915, "grad_norm": 1.0607998371124268, "learning_rate": 0.0008015789473684211, "loss": 1.6075, "step": 377 }, { "epoch": 0.9914529914529915, "high_lr": 0.0008015789473684211, "low_lr": 1.6031578947368422e-05, "step": 377 }, { "epoch": 0.9914529914529915, "high_lr": 0.0008015789473684211, "low_lr": 1.6031578947368422e-05, "step": 377 }, { "epoch": 0.9914529914529915, "high_lr": 0.0008015789473684211, "low_lr": 1.6031578947368422e-05, "step": 377 }, { "epoch": 0.9914529914529915, "high_lr": 0.0008015789473684211, "low_lr": 1.6031578947368422e-05, "step": 377 }, { "epoch": 0.9914529914529915, "high_lr": 0.0008015789473684211, "low_lr": 1.6031578947368422e-05, "step": 377 }, { "epoch": 0.9914529914529915, "high_lr": 0.0008015789473684211, "low_lr": 1.6031578947368422e-05, "step": 377 }, { "epoch": 0.9914529914529915, "high_lr": 0.0008015789473684211, "low_lr": 1.6031578947368422e-05, "step": 377 }, { "epoch": 0.9914529914529915, "high_lr": 0.0008015789473684211, "low_lr": 1.6031578947368422e-05, "step": 377 }, { "epoch": 0.9940828402366864, "grad_norm": 0.975928783416748, "learning_rate": 0.0008010526315789474, "loss": 1.5553, "step": 378 }, { "epoch": 0.9940828402366864, "high_lr": 0.0008010526315789474, "low_lr": 1.6021052631578947e-05, "step": 378 }, { "epoch": 0.9940828402366864, "high_lr": 0.0008010526315789474, "low_lr": 1.6021052631578947e-05, "step": 378 }, { "epoch": 0.9940828402366864, "high_lr": 0.0008010526315789474, "low_lr": 1.6021052631578947e-05, "step": 378 }, { "epoch": 0.9940828402366864, "high_lr": 0.0008010526315789474, "low_lr": 1.6021052631578947e-05, "step": 378 }, { "epoch": 0.9940828402366864, "high_lr": 0.0008010526315789474, "low_lr": 1.6021052631578947e-05, "step": 378 }, { "epoch": 0.9940828402366864, "high_lr": 0.0008010526315789474, "low_lr": 1.6021052631578947e-05, "step": 378 }, { "epoch": 0.9940828402366864, "high_lr": 0.0008010526315789474, "low_lr": 1.6021052631578947e-05, "step": 378 }, { "epoch": 0.9940828402366864, "high_lr": 0.0008010526315789474, "low_lr": 1.6021052631578947e-05, "step": 378 }, { "epoch": 0.9967126890203813, "grad_norm": 1.1215518712997437, "learning_rate": 0.0008005263157894736, "loss": 1.602, "step": 379 }, { "epoch": 0.9967126890203813, "high_lr": 0.0008005263157894736, "low_lr": 1.6010526315789475e-05, "step": 379 }, { "epoch": 0.9967126890203813, "high_lr": 0.0008005263157894736, "low_lr": 1.6010526315789475e-05, "step": 379 }, { "epoch": 0.9967126890203813, "high_lr": 0.0008005263157894736, "low_lr": 1.6010526315789475e-05, "step": 379 }, { "epoch": 0.9967126890203813, "high_lr": 0.0008005263157894736, "low_lr": 1.6010526315789475e-05, "step": 379 }, { "epoch": 0.9967126890203813, "high_lr": 0.0008005263157894736, "low_lr": 1.6010526315789475e-05, "step": 379 }, { "epoch": 0.9967126890203813, "high_lr": 0.0008005263157894736, "low_lr": 1.6010526315789475e-05, "step": 379 }, { "epoch": 0.9967126890203813, "high_lr": 0.0008005263157894736, "low_lr": 1.6010526315789475e-05, "step": 379 }, { "epoch": 0.9967126890203813, "high_lr": 0.0008005263157894736, "low_lr": 1.6010526315789475e-05, "step": 379 }, { "epoch": 0.9993425378040762, "grad_norm": 0.9771432280540466, "learning_rate": 0.0008, "loss": 1.5582, "step": 380 }, { "epoch": 0.9993425378040762, "high_lr": 0.0008, "low_lr": 1.6000000000000003e-05, "step": 380 }, { "epoch": 0.9993425378040762, "high_lr": 0.0008, "low_lr": 1.6000000000000003e-05, "step": 380 }, { "epoch": 0.9993425378040762, "high_lr": 0.0008, "low_lr": 1.6000000000000003e-05, "step": 380 }, { "epoch": 0.9993425378040762, "high_lr": 0.0008, "low_lr": 1.6000000000000003e-05, "step": 380 }, { "epoch": 0.9993425378040762, "high_lr": 0.0008, "low_lr": 1.6000000000000003e-05, "step": 380 }, { "epoch": 0.9993425378040762, "high_lr": 0.0008, "low_lr": 1.6000000000000003e-05, "step": 380 }, { "epoch": 0.9993425378040762, "high_lr": 0.0008, "low_lr": 1.6000000000000003e-05, "step": 380 }, { "epoch": 0.9993425378040762, "high_lr": 0.0008, "low_lr": 1.6000000000000003e-05, "step": 380 }, { "epoch": 1.0019723865877712, "grad_norm": 0.9735488891601562, "learning_rate": 0.0007994736842105263, "loss": 1.5239, "step": 381 }, { "epoch": 1.0019723865877712, "high_lr": 0.0007994736842105263, "low_lr": 1.5989473684210527e-05, "step": 381 }, { "epoch": 1.0019723865877712, "high_lr": 0.0007994736842105263, "low_lr": 1.5989473684210527e-05, "step": 381 }, { "epoch": 1.0019723865877712, "high_lr": 0.0007994736842105263, "low_lr": 1.5989473684210527e-05, "step": 381 }, { "epoch": 1.0019723865877712, "high_lr": 0.0007994736842105263, "low_lr": 1.5989473684210527e-05, "step": 381 }, { "epoch": 1.0019723865877712, "high_lr": 0.0007994736842105263, "low_lr": 1.5989473684210527e-05, "step": 381 }, { "epoch": 1.0019723865877712, "high_lr": 0.0007994736842105263, "low_lr": 1.5989473684210527e-05, "step": 381 }, { "epoch": 1.0019723865877712, "high_lr": 0.0007994736842105263, "low_lr": 1.5989473684210527e-05, "step": 381 }, { "epoch": 1.0019723865877712, "high_lr": 0.0007994736842105263, "low_lr": 1.5989473684210527e-05, "step": 381 }, { "epoch": 1.004602235371466, "grad_norm": 0.9668174982070923, "learning_rate": 0.0007989473684210526, "loss": 1.4952, "step": 382 }, { "epoch": 1.004602235371466, "high_lr": 0.0007989473684210526, "low_lr": 1.5978947368421055e-05, "step": 382 }, { "epoch": 1.004602235371466, "high_lr": 0.0007989473684210526, "low_lr": 1.5978947368421055e-05, "step": 382 }, { "epoch": 1.004602235371466, "high_lr": 0.0007989473684210526, "low_lr": 1.5978947368421055e-05, "step": 382 }, { "epoch": 1.004602235371466, "high_lr": 0.0007989473684210526, "low_lr": 1.5978947368421055e-05, "step": 382 }, { "epoch": 1.004602235371466, "high_lr": 0.0007989473684210526, "low_lr": 1.5978947368421055e-05, "step": 382 }, { "epoch": 1.004602235371466, "high_lr": 0.0007989473684210526, "low_lr": 1.5978947368421055e-05, "step": 382 }, { "epoch": 1.004602235371466, "high_lr": 0.0007989473684210526, "low_lr": 1.5978947368421055e-05, "step": 382 }, { "epoch": 1.004602235371466, "high_lr": 0.0007989473684210526, "low_lr": 1.5978947368421055e-05, "step": 382 }, { "epoch": 1.0072320841551612, "grad_norm": 0.9419265389442444, "learning_rate": 0.0007984210526315789, "loss": 1.5457, "step": 383 }, { "epoch": 1.0072320841551612, "high_lr": 0.0007984210526315789, "low_lr": 1.596842105263158e-05, "step": 383 }, { "epoch": 1.0072320841551612, "high_lr": 0.0007984210526315789, "low_lr": 1.596842105263158e-05, "step": 383 }, { "epoch": 1.0072320841551612, "high_lr": 0.0007984210526315789, "low_lr": 1.596842105263158e-05, "step": 383 }, { "epoch": 1.0072320841551612, "high_lr": 0.0007984210526315789, "low_lr": 1.596842105263158e-05, "step": 383 }, { "epoch": 1.0072320841551612, "high_lr": 0.0007984210526315789, "low_lr": 1.596842105263158e-05, "step": 383 }, { "epoch": 1.0072320841551612, "high_lr": 0.0007984210526315789, "low_lr": 1.596842105263158e-05, "step": 383 }, { "epoch": 1.0072320841551612, "high_lr": 0.0007984210526315789, "low_lr": 1.596842105263158e-05, "step": 383 }, { "epoch": 1.0072320841551612, "high_lr": 0.0007984210526315789, "low_lr": 1.596842105263158e-05, "step": 383 }, { "epoch": 1.009861932938856, "grad_norm": 1.009337067604065, "learning_rate": 0.0007978947368421052, "loss": 1.5266, "step": 384 }, { "epoch": 1.009861932938856, "high_lr": 0.0007978947368421052, "low_lr": 1.5957894736842105e-05, "step": 384 }, { "epoch": 1.009861932938856, "high_lr": 0.0007978947368421052, "low_lr": 1.5957894736842105e-05, "step": 384 }, { "epoch": 1.009861932938856, "high_lr": 0.0007978947368421052, "low_lr": 1.5957894736842105e-05, "step": 384 }, { "epoch": 1.009861932938856, "high_lr": 0.0007978947368421052, "low_lr": 1.5957894736842105e-05, "step": 384 }, { "epoch": 1.009861932938856, "high_lr": 0.0007978947368421052, "low_lr": 1.5957894736842105e-05, "step": 384 }, { "epoch": 1.009861932938856, "high_lr": 0.0007978947368421052, "low_lr": 1.5957894736842105e-05, "step": 384 }, { "epoch": 1.009861932938856, "high_lr": 0.0007978947368421052, "low_lr": 1.5957894736842105e-05, "step": 384 }, { "epoch": 1.009861932938856, "high_lr": 0.0007978947368421052, "low_lr": 1.5957894736842105e-05, "step": 384 }, { "epoch": 1.012491781722551, "grad_norm": 0.928406834602356, "learning_rate": 0.0007973684210526317, "loss": 1.4883, "step": 385 }, { "epoch": 1.012491781722551, "high_lr": 0.0007973684210526317, "low_lr": 1.5947368421052633e-05, "step": 385 }, { "epoch": 1.012491781722551, "high_lr": 0.0007973684210526317, "low_lr": 1.5947368421052633e-05, "step": 385 }, { "epoch": 1.012491781722551, "high_lr": 0.0007973684210526317, "low_lr": 1.5947368421052633e-05, "step": 385 }, { "epoch": 1.012491781722551, "high_lr": 0.0007973684210526317, "low_lr": 1.5947368421052633e-05, "step": 385 }, { "epoch": 1.012491781722551, "high_lr": 0.0007973684210526317, "low_lr": 1.5947368421052633e-05, "step": 385 }, { "epoch": 1.012491781722551, "high_lr": 0.0007973684210526317, "low_lr": 1.5947368421052633e-05, "step": 385 }, { "epoch": 1.012491781722551, "high_lr": 0.0007973684210526317, "low_lr": 1.5947368421052633e-05, "step": 385 }, { "epoch": 1.012491781722551, "high_lr": 0.0007973684210526317, "low_lr": 1.5947368421052633e-05, "step": 385 }, { "epoch": 1.0151216305062458, "grad_norm": 1.0293240547180176, "learning_rate": 0.000796842105263158, "loss": 1.4763, "step": 386 }, { "epoch": 1.0151216305062458, "high_lr": 0.000796842105263158, "low_lr": 1.593684210526316e-05, "step": 386 }, { "epoch": 1.0151216305062458, "high_lr": 0.000796842105263158, "low_lr": 1.593684210526316e-05, "step": 386 }, { "epoch": 1.0151216305062458, "high_lr": 0.000796842105263158, "low_lr": 1.593684210526316e-05, "step": 386 }, { "epoch": 1.0151216305062458, "high_lr": 0.000796842105263158, "low_lr": 1.593684210526316e-05, "step": 386 }, { "epoch": 1.0151216305062458, "high_lr": 0.000796842105263158, "low_lr": 1.593684210526316e-05, "step": 386 }, { "epoch": 1.0151216305062458, "high_lr": 0.000796842105263158, "low_lr": 1.593684210526316e-05, "step": 386 }, { "epoch": 1.0151216305062458, "high_lr": 0.000796842105263158, "low_lr": 1.593684210526316e-05, "step": 386 }, { "epoch": 1.0151216305062458, "high_lr": 0.000796842105263158, "low_lr": 1.593684210526316e-05, "step": 386 }, { "epoch": 1.017751479289941, "grad_norm": 0.953455924987793, "learning_rate": 0.0007963157894736843, "loss": 1.4951, "step": 387 }, { "epoch": 1.017751479289941, "high_lr": 0.0007963157894736843, "low_lr": 1.5926315789473685e-05, "step": 387 }, { "epoch": 1.017751479289941, "high_lr": 0.0007963157894736843, "low_lr": 1.5926315789473685e-05, "step": 387 }, { "epoch": 1.017751479289941, "high_lr": 0.0007963157894736843, "low_lr": 1.5926315789473685e-05, "step": 387 }, { "epoch": 1.017751479289941, "high_lr": 0.0007963157894736843, "low_lr": 1.5926315789473685e-05, "step": 387 }, { "epoch": 1.017751479289941, "high_lr": 0.0007963157894736843, "low_lr": 1.5926315789473685e-05, "step": 387 }, { "epoch": 1.017751479289941, "high_lr": 0.0007963157894736843, "low_lr": 1.5926315789473685e-05, "step": 387 }, { "epoch": 1.017751479289941, "high_lr": 0.0007963157894736843, "low_lr": 1.5926315789473685e-05, "step": 387 }, { "epoch": 1.017751479289941, "high_lr": 0.0007963157894736843, "low_lr": 1.5926315789473685e-05, "step": 387 }, { "epoch": 1.0203813280736358, "grad_norm": 1.0384786128997803, "learning_rate": 0.0007957894736842105, "loss": 1.5585, "step": 388 }, { "epoch": 1.0203813280736358, "high_lr": 0.0007957894736842105, "low_lr": 1.591578947368421e-05, "step": 388 }, { "epoch": 1.0203813280736358, "high_lr": 0.0007957894736842105, "low_lr": 1.591578947368421e-05, "step": 388 }, { "epoch": 1.0203813280736358, "high_lr": 0.0007957894736842105, "low_lr": 1.591578947368421e-05, "step": 388 }, { "epoch": 1.0203813280736358, "high_lr": 0.0007957894736842105, "low_lr": 1.591578947368421e-05, "step": 388 }, { "epoch": 1.0203813280736358, "high_lr": 0.0007957894736842105, "low_lr": 1.591578947368421e-05, "step": 388 }, { "epoch": 1.0203813280736358, "high_lr": 0.0007957894736842105, "low_lr": 1.591578947368421e-05, "step": 388 }, { "epoch": 1.0203813280736358, "high_lr": 0.0007957894736842105, "low_lr": 1.591578947368421e-05, "step": 388 }, { "epoch": 1.0203813280736358, "high_lr": 0.0007957894736842105, "low_lr": 1.591578947368421e-05, "step": 388 }, { "epoch": 1.0230111768573307, "grad_norm": 1.0553350448608398, "learning_rate": 0.0007952631578947369, "loss": 1.4933, "step": 389 }, { "epoch": 1.0230111768573307, "high_lr": 0.0007952631578947369, "low_lr": 1.5905263157894738e-05, "step": 389 }, { "epoch": 1.0230111768573307, "high_lr": 0.0007952631578947369, "low_lr": 1.5905263157894738e-05, "step": 389 }, { "epoch": 1.0230111768573307, "high_lr": 0.0007952631578947369, "low_lr": 1.5905263157894738e-05, "step": 389 }, { "epoch": 1.0230111768573307, "high_lr": 0.0007952631578947369, "low_lr": 1.5905263157894738e-05, "step": 389 }, { "epoch": 1.0230111768573307, "high_lr": 0.0007952631578947369, "low_lr": 1.5905263157894738e-05, "step": 389 }, { "epoch": 1.0230111768573307, "high_lr": 0.0007952631578947369, "low_lr": 1.5905263157894738e-05, "step": 389 }, { "epoch": 1.0230111768573307, "high_lr": 0.0007952631578947369, "low_lr": 1.5905263157894738e-05, "step": 389 }, { "epoch": 1.0230111768573307, "high_lr": 0.0007952631578947369, "low_lr": 1.5905263157894738e-05, "step": 389 }, { "epoch": 1.0256410256410255, "grad_norm": 0.9986532330513, "learning_rate": 0.0007947368421052632, "loss": 1.5027, "step": 390 }, { "epoch": 1.0256410256410255, "high_lr": 0.0007947368421052632, "low_lr": 1.5894736842105266e-05, "step": 390 }, { "epoch": 1.0256410256410255, "high_lr": 0.0007947368421052632, "low_lr": 1.5894736842105266e-05, "step": 390 }, { "epoch": 1.0256410256410255, "high_lr": 0.0007947368421052632, "low_lr": 1.5894736842105266e-05, "step": 390 }, { "epoch": 1.0256410256410255, "high_lr": 0.0007947368421052632, "low_lr": 1.5894736842105266e-05, "step": 390 }, { "epoch": 1.0256410256410255, "high_lr": 0.0007947368421052632, "low_lr": 1.5894736842105266e-05, "step": 390 }, { "epoch": 1.0256410256410255, "high_lr": 0.0007947368421052632, "low_lr": 1.5894736842105266e-05, "step": 390 }, { "epoch": 1.0256410256410255, "high_lr": 0.0007947368421052632, "low_lr": 1.5894736842105266e-05, "step": 390 }, { "epoch": 1.0256410256410255, "high_lr": 0.0007947368421052632, "low_lr": 1.5894736842105266e-05, "step": 390 }, { "epoch": 1.0282708744247206, "grad_norm": 0.9266156554222107, "learning_rate": 0.0007942105263157895, "loss": 1.4784, "step": 391 }, { "epoch": 1.0282708744247206, "high_lr": 0.0007942105263157895, "low_lr": 1.588421052631579e-05, "step": 391 }, { "epoch": 1.0282708744247206, "high_lr": 0.0007942105263157895, "low_lr": 1.588421052631579e-05, "step": 391 }, { "epoch": 1.0282708744247206, "high_lr": 0.0007942105263157895, "low_lr": 1.588421052631579e-05, "step": 391 }, { "epoch": 1.0282708744247206, "high_lr": 0.0007942105263157895, "low_lr": 1.588421052631579e-05, "step": 391 }, { "epoch": 1.0282708744247206, "high_lr": 0.0007942105263157895, "low_lr": 1.588421052631579e-05, "step": 391 }, { "epoch": 1.0282708744247206, "high_lr": 0.0007942105263157895, "low_lr": 1.588421052631579e-05, "step": 391 }, { "epoch": 1.0282708744247206, "high_lr": 0.0007942105263157895, "low_lr": 1.588421052631579e-05, "step": 391 }, { "epoch": 1.0282708744247206, "high_lr": 0.0007942105263157895, "low_lr": 1.588421052631579e-05, "step": 391 }, { "epoch": 1.0309007232084155, "grad_norm": 0.9740056991577148, "learning_rate": 0.0007936842105263158, "loss": 1.5323, "step": 392 }, { "epoch": 1.0309007232084155, "high_lr": 0.0007936842105263158, "low_lr": 1.5873684210526315e-05, "step": 392 }, { "epoch": 1.0309007232084155, "high_lr": 0.0007936842105263158, "low_lr": 1.5873684210526315e-05, "step": 392 }, { "epoch": 1.0309007232084155, "high_lr": 0.0007936842105263158, "low_lr": 1.5873684210526315e-05, "step": 392 }, { "epoch": 1.0309007232084155, "high_lr": 0.0007936842105263158, "low_lr": 1.5873684210526315e-05, "step": 392 }, { "epoch": 1.0309007232084155, "high_lr": 0.0007936842105263158, "low_lr": 1.5873684210526315e-05, "step": 392 }, { "epoch": 1.0309007232084155, "high_lr": 0.0007936842105263158, "low_lr": 1.5873684210526315e-05, "step": 392 }, { "epoch": 1.0309007232084155, "high_lr": 0.0007936842105263158, "low_lr": 1.5873684210526315e-05, "step": 392 }, { "epoch": 1.0309007232084155, "high_lr": 0.0007936842105263158, "low_lr": 1.5873684210526315e-05, "step": 392 }, { "epoch": 1.0335305719921104, "grad_norm": 1.048331379890442, "learning_rate": 0.0007931578947368421, "loss": 1.5524, "step": 393 }, { "epoch": 1.0335305719921104, "high_lr": 0.0007931578947368421, "low_lr": 1.5863157894736843e-05, "step": 393 }, { "epoch": 1.0335305719921104, "high_lr": 0.0007931578947368421, "low_lr": 1.5863157894736843e-05, "step": 393 }, { "epoch": 1.0335305719921104, "high_lr": 0.0007931578947368421, "low_lr": 1.5863157894736843e-05, "step": 393 }, { "epoch": 1.0335305719921104, "high_lr": 0.0007931578947368421, "low_lr": 1.5863157894736843e-05, "step": 393 }, { "epoch": 1.0335305719921104, "high_lr": 0.0007931578947368421, "low_lr": 1.5863157894736843e-05, "step": 393 }, { "epoch": 1.0335305719921104, "high_lr": 0.0007931578947368421, "low_lr": 1.5863157894736843e-05, "step": 393 }, { "epoch": 1.0335305719921104, "high_lr": 0.0007931578947368421, "low_lr": 1.5863157894736843e-05, "step": 393 }, { "epoch": 1.0335305719921104, "high_lr": 0.0007931578947368421, "low_lr": 1.5863157894736843e-05, "step": 393 }, { "epoch": 1.0361604207758055, "grad_norm": 0.9899749159812927, "learning_rate": 0.0007926315789473685, "loss": 1.5418, "step": 394 }, { "epoch": 1.0361604207758055, "high_lr": 0.0007926315789473685, "low_lr": 1.585263157894737e-05, "step": 394 }, { "epoch": 1.0361604207758055, "high_lr": 0.0007926315789473685, "low_lr": 1.585263157894737e-05, "step": 394 }, { "epoch": 1.0361604207758055, "high_lr": 0.0007926315789473685, "low_lr": 1.585263157894737e-05, "step": 394 }, { "epoch": 1.0361604207758055, "high_lr": 0.0007926315789473685, "low_lr": 1.585263157894737e-05, "step": 394 }, { "epoch": 1.0361604207758055, "high_lr": 0.0007926315789473685, "low_lr": 1.585263157894737e-05, "step": 394 }, { "epoch": 1.0361604207758055, "high_lr": 0.0007926315789473685, "low_lr": 1.585263157894737e-05, "step": 394 }, { "epoch": 1.0361604207758055, "high_lr": 0.0007926315789473685, "low_lr": 1.585263157894737e-05, "step": 394 }, { "epoch": 1.0361604207758055, "high_lr": 0.0007926315789473685, "low_lr": 1.585263157894737e-05, "step": 394 }, { "epoch": 1.0387902695595004, "grad_norm": 0.9434759616851807, "learning_rate": 0.0007921052631578948, "loss": 1.5092, "step": 395 }, { "epoch": 1.0387902695595004, "high_lr": 0.0007921052631578948, "low_lr": 1.5842105263157896e-05, "step": 395 }, { "epoch": 1.0387902695595004, "high_lr": 0.0007921052631578948, "low_lr": 1.5842105263157896e-05, "step": 395 }, { "epoch": 1.0387902695595004, "high_lr": 0.0007921052631578948, "low_lr": 1.5842105263157896e-05, "step": 395 }, { "epoch": 1.0387902695595004, "high_lr": 0.0007921052631578948, "low_lr": 1.5842105263157896e-05, "step": 395 }, { "epoch": 1.0387902695595004, "high_lr": 0.0007921052631578948, "low_lr": 1.5842105263157896e-05, "step": 395 }, { "epoch": 1.0387902695595004, "high_lr": 0.0007921052631578948, "low_lr": 1.5842105263157896e-05, "step": 395 }, { "epoch": 1.0387902695595004, "high_lr": 0.0007921052631578948, "low_lr": 1.5842105263157896e-05, "step": 395 }, { "epoch": 1.0387902695595004, "high_lr": 0.0007921052631578948, "low_lr": 1.5842105263157896e-05, "step": 395 }, { "epoch": 1.0414201183431953, "grad_norm": 0.9564380049705505, "learning_rate": 0.000791578947368421, "loss": 1.5142, "step": 396 }, { "epoch": 1.0414201183431953, "high_lr": 0.000791578947368421, "low_lr": 1.5831578947368424e-05, "step": 396 }, { "epoch": 1.0414201183431953, "high_lr": 0.000791578947368421, "low_lr": 1.5831578947368424e-05, "step": 396 }, { "epoch": 1.0414201183431953, "high_lr": 0.000791578947368421, "low_lr": 1.5831578947368424e-05, "step": 396 }, { "epoch": 1.0414201183431953, "high_lr": 0.000791578947368421, "low_lr": 1.5831578947368424e-05, "step": 396 }, { "epoch": 1.0414201183431953, "high_lr": 0.000791578947368421, "low_lr": 1.5831578947368424e-05, "step": 396 }, { "epoch": 1.0414201183431953, "high_lr": 0.000791578947368421, "low_lr": 1.5831578947368424e-05, "step": 396 }, { "epoch": 1.0414201183431953, "high_lr": 0.000791578947368421, "low_lr": 1.5831578947368424e-05, "step": 396 }, { "epoch": 1.0414201183431953, "high_lr": 0.000791578947368421, "low_lr": 1.5831578947368424e-05, "step": 396 }, { "epoch": 1.0440499671268901, "grad_norm": 0.9293102025985718, "learning_rate": 0.0007910526315789473, "loss": 1.5236, "step": 397 }, { "epoch": 1.0440499671268901, "high_lr": 0.0007910526315789473, "low_lr": 1.582105263157895e-05, "step": 397 }, { "epoch": 1.0440499671268901, "high_lr": 0.0007910526315789473, "low_lr": 1.582105263157895e-05, "step": 397 }, { "epoch": 1.0440499671268901, "high_lr": 0.0007910526315789473, "low_lr": 1.582105263157895e-05, "step": 397 }, { "epoch": 1.0440499671268901, "high_lr": 0.0007910526315789473, "low_lr": 1.582105263157895e-05, "step": 397 }, { "epoch": 1.0440499671268901, "high_lr": 0.0007910526315789473, "low_lr": 1.582105263157895e-05, "step": 397 }, { "epoch": 1.0440499671268901, "high_lr": 0.0007910526315789473, "low_lr": 1.582105263157895e-05, "step": 397 }, { "epoch": 1.0440499671268901, "high_lr": 0.0007910526315789473, "low_lr": 1.582105263157895e-05, "step": 397 }, { "epoch": 1.0440499671268901, "high_lr": 0.0007910526315789473, "low_lr": 1.582105263157895e-05, "step": 397 }, { "epoch": 1.0466798159105852, "grad_norm": 0.9483926296234131, "learning_rate": 0.0007905263157894736, "loss": 1.5735, "step": 398 }, { "epoch": 1.0466798159105852, "high_lr": 0.0007905263157894736, "low_lr": 1.5810526315789473e-05, "step": 398 }, { "epoch": 1.0466798159105852, "high_lr": 0.0007905263157894736, "low_lr": 1.5810526315789473e-05, "step": 398 }, { "epoch": 1.0466798159105852, "high_lr": 0.0007905263157894736, "low_lr": 1.5810526315789473e-05, "step": 398 }, { "epoch": 1.0466798159105852, "high_lr": 0.0007905263157894736, "low_lr": 1.5810526315789473e-05, "step": 398 }, { "epoch": 1.0466798159105852, "high_lr": 0.0007905263157894736, "low_lr": 1.5810526315789473e-05, "step": 398 }, { "epoch": 1.0466798159105852, "high_lr": 0.0007905263157894736, "low_lr": 1.5810526315789473e-05, "step": 398 }, { "epoch": 1.0466798159105852, "high_lr": 0.0007905263157894736, "low_lr": 1.5810526315789473e-05, "step": 398 }, { "epoch": 1.0466798159105852, "high_lr": 0.0007905263157894736, "low_lr": 1.5810526315789473e-05, "step": 398 }, { "epoch": 1.04930966469428, "grad_norm": 1.06614089012146, "learning_rate": 0.00079, "loss": 1.5154, "step": 399 }, { "epoch": 1.04930966469428, "high_lr": 0.00079, "low_lr": 1.58e-05, "step": 399 }, { "epoch": 1.04930966469428, "high_lr": 0.00079, "low_lr": 1.58e-05, "step": 399 }, { "epoch": 1.04930966469428, "high_lr": 0.00079, "low_lr": 1.58e-05, "step": 399 }, { "epoch": 1.04930966469428, "high_lr": 0.00079, "low_lr": 1.58e-05, "step": 399 }, { "epoch": 1.04930966469428, "high_lr": 0.00079, "low_lr": 1.58e-05, "step": 399 }, { "epoch": 1.04930966469428, "high_lr": 0.00079, "low_lr": 1.58e-05, "step": 399 }, { "epoch": 1.04930966469428, "high_lr": 0.00079, "low_lr": 1.58e-05, "step": 399 }, { "epoch": 1.04930966469428, "high_lr": 0.00079, "low_lr": 1.58e-05, "step": 399 }, { "epoch": 1.051939513477975, "grad_norm": 1.038985013961792, "learning_rate": 0.0007894736842105263, "loss": 1.5807, "step": 400 }, { "epoch": 1.051939513477975, "high_lr": 0.0007894736842105263, "low_lr": 1.578947368421053e-05, "step": 400 }, { "epoch": 1.051939513477975, "high_lr": 0.0007894736842105263, "low_lr": 1.578947368421053e-05, "step": 400 }, { "epoch": 1.051939513477975, "high_lr": 0.0007894736842105263, "low_lr": 1.578947368421053e-05, "step": 400 }, { "epoch": 1.051939513477975, "high_lr": 0.0007894736842105263, "low_lr": 1.578947368421053e-05, "step": 400 }, { "epoch": 1.051939513477975, "high_lr": 0.0007894736842105263, "low_lr": 1.578947368421053e-05, "step": 400 }, { "epoch": 1.051939513477975, "high_lr": 0.0007894736842105263, "low_lr": 1.578947368421053e-05, "step": 400 }, { "epoch": 1.051939513477975, "high_lr": 0.0007894736842105263, "low_lr": 1.578947368421053e-05, "step": 400 }, { "epoch": 1.051939513477975, "high_lr": 0.0007894736842105263, "low_lr": 1.578947368421053e-05, "step": 400 }, { "epoch": 1.0545693622616699, "grad_norm": 0.9950482249259949, "learning_rate": 0.0007889473684210526, "loss": 1.4867, "step": 401 }, { "epoch": 1.0545693622616699, "high_lr": 0.0007889473684210526, "low_lr": 1.5778947368421054e-05, "step": 401 }, { "epoch": 1.0545693622616699, "high_lr": 0.0007889473684210526, "low_lr": 1.5778947368421054e-05, "step": 401 }, { "epoch": 1.0545693622616699, "high_lr": 0.0007889473684210526, "low_lr": 1.5778947368421054e-05, "step": 401 }, { "epoch": 1.0545693622616699, "high_lr": 0.0007889473684210526, "low_lr": 1.5778947368421054e-05, "step": 401 }, { "epoch": 1.0545693622616699, "high_lr": 0.0007889473684210526, "low_lr": 1.5778947368421054e-05, "step": 401 }, { "epoch": 1.0545693622616699, "high_lr": 0.0007889473684210526, "low_lr": 1.5778947368421054e-05, "step": 401 }, { "epoch": 1.0545693622616699, "high_lr": 0.0007889473684210526, "low_lr": 1.5778947368421054e-05, "step": 401 }, { "epoch": 1.0545693622616699, "high_lr": 0.0007889473684210526, "low_lr": 1.5778947368421054e-05, "step": 401 }, { "epoch": 1.057199211045365, "grad_norm": 1.071161150932312, "learning_rate": 0.000788421052631579, "loss": 1.5655, "step": 402 }, { "epoch": 1.057199211045365, "high_lr": 0.000788421052631579, "low_lr": 1.576842105263158e-05, "step": 402 }, { "epoch": 1.057199211045365, "high_lr": 0.000788421052631579, "low_lr": 1.576842105263158e-05, "step": 402 }, { "epoch": 1.057199211045365, "high_lr": 0.000788421052631579, "low_lr": 1.576842105263158e-05, "step": 402 }, { "epoch": 1.057199211045365, "high_lr": 0.000788421052631579, "low_lr": 1.576842105263158e-05, "step": 402 }, { "epoch": 1.057199211045365, "high_lr": 0.000788421052631579, "low_lr": 1.576842105263158e-05, "step": 402 }, { "epoch": 1.057199211045365, "high_lr": 0.000788421052631579, "low_lr": 1.576842105263158e-05, "step": 402 }, { "epoch": 1.057199211045365, "high_lr": 0.000788421052631579, "low_lr": 1.576842105263158e-05, "step": 402 }, { "epoch": 1.057199211045365, "high_lr": 0.000788421052631579, "low_lr": 1.576842105263158e-05, "step": 402 }, { "epoch": 1.0598290598290598, "grad_norm": 1.0037591457366943, "learning_rate": 0.0007878947368421054, "loss": 1.4863, "step": 403 }, { "epoch": 1.0598290598290598, "high_lr": 0.0007878947368421054, "low_lr": 1.5757894736842107e-05, "step": 403 }, { "epoch": 1.0598290598290598, "high_lr": 0.0007878947368421054, "low_lr": 1.5757894736842107e-05, "step": 403 }, { "epoch": 1.0598290598290598, "high_lr": 0.0007878947368421054, "low_lr": 1.5757894736842107e-05, "step": 403 }, { "epoch": 1.0598290598290598, "high_lr": 0.0007878947368421054, "low_lr": 1.5757894736842107e-05, "step": 403 }, { "epoch": 1.0598290598290598, "high_lr": 0.0007878947368421054, "low_lr": 1.5757894736842107e-05, "step": 403 }, { "epoch": 1.0598290598290598, "high_lr": 0.0007878947368421054, "low_lr": 1.5757894736842107e-05, "step": 403 }, { "epoch": 1.0598290598290598, "high_lr": 0.0007878947368421054, "low_lr": 1.5757894736842107e-05, "step": 403 }, { "epoch": 1.0598290598290598, "high_lr": 0.0007878947368421054, "low_lr": 1.5757894736842107e-05, "step": 403 }, { "epoch": 1.0624589086127547, "grad_norm": 0.9963755011558533, "learning_rate": 0.0007873684210526317, "loss": 1.5674, "step": 404 }, { "epoch": 1.0624589086127547, "high_lr": 0.0007873684210526317, "low_lr": 1.5747368421052635e-05, "step": 404 }, { "epoch": 1.0624589086127547, "high_lr": 0.0007873684210526317, "low_lr": 1.5747368421052635e-05, "step": 404 }, { "epoch": 1.0624589086127547, "high_lr": 0.0007873684210526317, "low_lr": 1.5747368421052635e-05, "step": 404 }, { "epoch": 1.0624589086127547, "high_lr": 0.0007873684210526317, "low_lr": 1.5747368421052635e-05, "step": 404 }, { "epoch": 1.0624589086127547, "high_lr": 0.0007873684210526317, "low_lr": 1.5747368421052635e-05, "step": 404 }, { "epoch": 1.0624589086127547, "high_lr": 0.0007873684210526317, "low_lr": 1.5747368421052635e-05, "step": 404 }, { "epoch": 1.0624589086127547, "high_lr": 0.0007873684210526317, "low_lr": 1.5747368421052635e-05, "step": 404 }, { "epoch": 1.0624589086127547, "high_lr": 0.0007873684210526317, "low_lr": 1.5747368421052635e-05, "step": 404 }, { "epoch": 1.0650887573964498, "grad_norm": 0.9359927773475647, "learning_rate": 0.0007868421052631579, "loss": 1.4635, "step": 405 }, { "epoch": 1.0650887573964498, "high_lr": 0.0007868421052631579, "low_lr": 1.573684210526316e-05, "step": 405 }, { "epoch": 1.0650887573964498, "high_lr": 0.0007868421052631579, "low_lr": 1.573684210526316e-05, "step": 405 }, { "epoch": 1.0650887573964498, "high_lr": 0.0007868421052631579, "low_lr": 1.573684210526316e-05, "step": 405 }, { "epoch": 1.0650887573964498, "high_lr": 0.0007868421052631579, "low_lr": 1.573684210526316e-05, "step": 405 }, { "epoch": 1.0650887573964498, "high_lr": 0.0007868421052631579, "low_lr": 1.573684210526316e-05, "step": 405 }, { "epoch": 1.0650887573964498, "high_lr": 0.0007868421052631579, "low_lr": 1.573684210526316e-05, "step": 405 }, { "epoch": 1.0650887573964498, "high_lr": 0.0007868421052631579, "low_lr": 1.573684210526316e-05, "step": 405 }, { "epoch": 1.0650887573964498, "high_lr": 0.0007868421052631579, "low_lr": 1.573684210526316e-05, "step": 405 }, { "epoch": 1.0677186061801447, "grad_norm": 0.9304128289222717, "learning_rate": 0.0007863157894736842, "loss": 1.4867, "step": 406 }, { "epoch": 1.0677186061801447, "high_lr": 0.0007863157894736842, "low_lr": 1.5726315789473684e-05, "step": 406 }, { "epoch": 1.0677186061801447, "high_lr": 0.0007863157894736842, "low_lr": 1.5726315789473684e-05, "step": 406 }, { "epoch": 1.0677186061801447, "high_lr": 0.0007863157894736842, "low_lr": 1.5726315789473684e-05, "step": 406 }, { "epoch": 1.0677186061801447, "high_lr": 0.0007863157894736842, "low_lr": 1.5726315789473684e-05, "step": 406 }, { "epoch": 1.0677186061801447, "high_lr": 0.0007863157894736842, "low_lr": 1.5726315789473684e-05, "step": 406 }, { "epoch": 1.0677186061801447, "high_lr": 0.0007863157894736842, "low_lr": 1.5726315789473684e-05, "step": 406 }, { "epoch": 1.0677186061801447, "high_lr": 0.0007863157894736842, "low_lr": 1.5726315789473684e-05, "step": 406 }, { "epoch": 1.0677186061801447, "high_lr": 0.0007863157894736842, "low_lr": 1.5726315789473684e-05, "step": 406 }, { "epoch": 1.0703484549638396, "grad_norm": 0.9901525974273682, "learning_rate": 0.0007857894736842105, "loss": 1.5089, "step": 407 }, { "epoch": 1.0703484549638396, "high_lr": 0.0007857894736842105, "low_lr": 1.5715789473684212e-05, "step": 407 }, { "epoch": 1.0703484549638396, "high_lr": 0.0007857894736842105, "low_lr": 1.5715789473684212e-05, "step": 407 }, { "epoch": 1.0703484549638396, "high_lr": 0.0007857894736842105, "low_lr": 1.5715789473684212e-05, "step": 407 }, { "epoch": 1.0703484549638396, "high_lr": 0.0007857894736842105, "low_lr": 1.5715789473684212e-05, "step": 407 }, { "epoch": 1.0703484549638396, "high_lr": 0.0007857894736842105, "low_lr": 1.5715789473684212e-05, "step": 407 }, { "epoch": 1.0703484549638396, "high_lr": 0.0007857894736842105, "low_lr": 1.5715789473684212e-05, "step": 407 }, { "epoch": 1.0703484549638396, "high_lr": 0.0007857894736842105, "low_lr": 1.5715789473684212e-05, "step": 407 }, { "epoch": 1.0703484549638396, "high_lr": 0.0007857894736842105, "low_lr": 1.5715789473684212e-05, "step": 407 }, { "epoch": 1.0729783037475344, "grad_norm": 1.1033786535263062, "learning_rate": 0.0007852631578947369, "loss": 1.487, "step": 408 }, { "epoch": 1.0729783037475344, "high_lr": 0.0007852631578947369, "low_lr": 1.570526315789474e-05, "step": 408 }, { "epoch": 1.0729783037475344, "high_lr": 0.0007852631578947369, "low_lr": 1.570526315789474e-05, "step": 408 }, { "epoch": 1.0729783037475344, "high_lr": 0.0007852631578947369, "low_lr": 1.570526315789474e-05, "step": 408 }, { "epoch": 1.0729783037475344, "high_lr": 0.0007852631578947369, "low_lr": 1.570526315789474e-05, "step": 408 }, { "epoch": 1.0729783037475344, "high_lr": 0.0007852631578947369, "low_lr": 1.570526315789474e-05, "step": 408 }, { "epoch": 1.0729783037475344, "high_lr": 0.0007852631578947369, "low_lr": 1.570526315789474e-05, "step": 408 }, { "epoch": 1.0729783037475344, "high_lr": 0.0007852631578947369, "low_lr": 1.570526315789474e-05, "step": 408 }, { "epoch": 1.0729783037475344, "high_lr": 0.0007852631578947369, "low_lr": 1.570526315789474e-05, "step": 408 }, { "epoch": 1.0756081525312295, "grad_norm": 1.0218335390090942, "learning_rate": 0.0007847368421052632, "loss": 1.5137, "step": 409 }, { "epoch": 1.0756081525312295, "high_lr": 0.0007847368421052632, "low_lr": 1.5694736842105264e-05, "step": 409 }, { "epoch": 1.0756081525312295, "high_lr": 0.0007847368421052632, "low_lr": 1.5694736842105264e-05, "step": 409 }, { "epoch": 1.0756081525312295, "high_lr": 0.0007847368421052632, "low_lr": 1.5694736842105264e-05, "step": 409 }, { "epoch": 1.0756081525312295, "high_lr": 0.0007847368421052632, "low_lr": 1.5694736842105264e-05, "step": 409 }, { "epoch": 1.0756081525312295, "high_lr": 0.0007847368421052632, "low_lr": 1.5694736842105264e-05, "step": 409 }, { "epoch": 1.0756081525312295, "high_lr": 0.0007847368421052632, "low_lr": 1.5694736842105264e-05, "step": 409 }, { "epoch": 1.0756081525312295, "high_lr": 0.0007847368421052632, "low_lr": 1.5694736842105264e-05, "step": 409 }, { "epoch": 1.0756081525312295, "high_lr": 0.0007847368421052632, "low_lr": 1.5694736842105264e-05, "step": 409 }, { "epoch": 1.0782380013149244, "grad_norm": 0.9986996054649353, "learning_rate": 0.0007842105263157895, "loss": 1.5205, "step": 410 }, { "epoch": 1.0782380013149244, "high_lr": 0.0007842105263157895, "low_lr": 1.568421052631579e-05, "step": 410 }, { "epoch": 1.0782380013149244, "high_lr": 0.0007842105263157895, "low_lr": 1.568421052631579e-05, "step": 410 }, { "epoch": 1.0782380013149244, "high_lr": 0.0007842105263157895, "low_lr": 1.568421052631579e-05, "step": 410 }, { "epoch": 1.0782380013149244, "high_lr": 0.0007842105263157895, "low_lr": 1.568421052631579e-05, "step": 410 }, { "epoch": 1.0782380013149244, "high_lr": 0.0007842105263157895, "low_lr": 1.568421052631579e-05, "step": 410 }, { "epoch": 1.0782380013149244, "high_lr": 0.0007842105263157895, "low_lr": 1.568421052631579e-05, "step": 410 }, { "epoch": 1.0782380013149244, "high_lr": 0.0007842105263157895, "low_lr": 1.568421052631579e-05, "step": 410 }, { "epoch": 1.0782380013149244, "high_lr": 0.0007842105263157895, "low_lr": 1.568421052631579e-05, "step": 410 }, { "epoch": 1.0808678500986193, "grad_norm": 0.9301803112030029, "learning_rate": 0.0007836842105263158, "loss": 1.4571, "step": 411 }, { "epoch": 1.0808678500986193, "high_lr": 0.0007836842105263158, "low_lr": 1.5673684210526317e-05, "step": 411 }, { "epoch": 1.0808678500986193, "high_lr": 0.0007836842105263158, "low_lr": 1.5673684210526317e-05, "step": 411 }, { "epoch": 1.0808678500986193, "high_lr": 0.0007836842105263158, "low_lr": 1.5673684210526317e-05, "step": 411 }, { "epoch": 1.0808678500986193, "high_lr": 0.0007836842105263158, "low_lr": 1.5673684210526317e-05, "step": 411 }, { "epoch": 1.0808678500986193, "high_lr": 0.0007836842105263158, "low_lr": 1.5673684210526317e-05, "step": 411 }, { "epoch": 1.0808678500986193, "high_lr": 0.0007836842105263158, "low_lr": 1.5673684210526317e-05, "step": 411 }, { "epoch": 1.0808678500986193, "high_lr": 0.0007836842105263158, "low_lr": 1.5673684210526317e-05, "step": 411 }, { "epoch": 1.0808678500986193, "high_lr": 0.0007836842105263158, "low_lr": 1.5673684210526317e-05, "step": 411 }, { "epoch": 1.0834976988823142, "grad_norm": 0.975524365901947, "learning_rate": 0.000783157894736842, "loss": 1.5262, "step": 412 }, { "epoch": 1.0834976988823142, "high_lr": 0.000783157894736842, "low_lr": 1.5663157894736842e-05, "step": 412 }, { "epoch": 1.0834976988823142, "high_lr": 0.000783157894736842, "low_lr": 1.5663157894736842e-05, "step": 412 }, { "epoch": 1.0834976988823142, "high_lr": 0.000783157894736842, "low_lr": 1.5663157894736842e-05, "step": 412 }, { "epoch": 1.0834976988823142, "high_lr": 0.000783157894736842, "low_lr": 1.5663157894736842e-05, "step": 412 }, { "epoch": 1.0834976988823142, "high_lr": 0.000783157894736842, "low_lr": 1.5663157894736842e-05, "step": 412 }, { "epoch": 1.0834976988823142, "high_lr": 0.000783157894736842, "low_lr": 1.5663157894736842e-05, "step": 412 }, { "epoch": 1.0834976988823142, "high_lr": 0.000783157894736842, "low_lr": 1.5663157894736842e-05, "step": 412 }, { "epoch": 1.0834976988823142, "high_lr": 0.000783157894736842, "low_lr": 1.5663157894736842e-05, "step": 412 }, { "epoch": 1.0861275476660093, "grad_norm": 0.9938554167747498, "learning_rate": 0.0007826315789473684, "loss": 1.5412, "step": 413 }, { "epoch": 1.0861275476660093, "high_lr": 0.0007826315789473684, "low_lr": 1.565263157894737e-05, "step": 413 }, { "epoch": 1.0861275476660093, "high_lr": 0.0007826315789473684, "low_lr": 1.565263157894737e-05, "step": 413 }, { "epoch": 1.0861275476660093, "high_lr": 0.0007826315789473684, "low_lr": 1.565263157894737e-05, "step": 413 }, { "epoch": 1.0861275476660093, "high_lr": 0.0007826315789473684, "low_lr": 1.565263157894737e-05, "step": 413 }, { "epoch": 1.0861275476660093, "high_lr": 0.0007826315789473684, "low_lr": 1.565263157894737e-05, "step": 413 }, { "epoch": 1.0861275476660093, "high_lr": 0.0007826315789473684, "low_lr": 1.565263157894737e-05, "step": 413 }, { "epoch": 1.0861275476660093, "high_lr": 0.0007826315789473684, "low_lr": 1.565263157894737e-05, "step": 413 }, { "epoch": 1.0861275476660093, "high_lr": 0.0007826315789473684, "low_lr": 1.565263157894737e-05, "step": 413 }, { "epoch": 1.0887573964497042, "grad_norm": 0.9773283004760742, "learning_rate": 0.0007821052631578947, "loss": 1.5134, "step": 414 }, { "epoch": 1.0887573964497042, "high_lr": 0.0007821052631578947, "low_lr": 1.5642105263157898e-05, "step": 414 }, { "epoch": 1.0887573964497042, "high_lr": 0.0007821052631578947, "low_lr": 1.5642105263157898e-05, "step": 414 }, { "epoch": 1.0887573964497042, "high_lr": 0.0007821052631578947, "low_lr": 1.5642105263157898e-05, "step": 414 }, { "epoch": 1.0887573964497042, "high_lr": 0.0007821052631578947, "low_lr": 1.5642105263157898e-05, "step": 414 }, { "epoch": 1.0887573964497042, "high_lr": 0.0007821052631578947, "low_lr": 1.5642105263157898e-05, "step": 414 }, { "epoch": 1.0887573964497042, "high_lr": 0.0007821052631578947, "low_lr": 1.5642105263157898e-05, "step": 414 }, { "epoch": 1.0887573964497042, "high_lr": 0.0007821052631578947, "low_lr": 1.5642105263157898e-05, "step": 414 }, { "epoch": 1.0887573964497042, "high_lr": 0.0007821052631578947, "low_lr": 1.5642105263157898e-05, "step": 414 }, { "epoch": 1.091387245233399, "grad_norm": 1.018235206604004, "learning_rate": 0.000781578947368421, "loss": 1.5264, "step": 415 }, { "epoch": 1.091387245233399, "high_lr": 0.000781578947368421, "low_lr": 1.5631578947368422e-05, "step": 415 }, { "epoch": 1.091387245233399, "high_lr": 0.000781578947368421, "low_lr": 1.5631578947368422e-05, "step": 415 }, { "epoch": 1.091387245233399, "high_lr": 0.000781578947368421, "low_lr": 1.5631578947368422e-05, "step": 415 }, { "epoch": 1.091387245233399, "high_lr": 0.000781578947368421, "low_lr": 1.5631578947368422e-05, "step": 415 }, { "epoch": 1.091387245233399, "high_lr": 0.000781578947368421, "low_lr": 1.5631578947368422e-05, "step": 415 }, { "epoch": 1.091387245233399, "high_lr": 0.000781578947368421, "low_lr": 1.5631578947368422e-05, "step": 415 }, { "epoch": 1.091387245233399, "high_lr": 0.000781578947368421, "low_lr": 1.5631578947368422e-05, "step": 415 }, { "epoch": 1.091387245233399, "high_lr": 0.000781578947368421, "low_lr": 1.5631578947368422e-05, "step": 415 }, { "epoch": 1.0940170940170941, "grad_norm": 0.9918999671936035, "learning_rate": 0.0007810526315789473, "loss": 1.5063, "step": 416 }, { "epoch": 1.0940170940170941, "high_lr": 0.0007810526315789473, "low_lr": 1.5621052631578947e-05, "step": 416 }, { "epoch": 1.0940170940170941, "high_lr": 0.0007810526315789473, "low_lr": 1.5621052631578947e-05, "step": 416 }, { "epoch": 1.0940170940170941, "high_lr": 0.0007810526315789473, "low_lr": 1.5621052631578947e-05, "step": 416 }, { "epoch": 1.0940170940170941, "high_lr": 0.0007810526315789473, "low_lr": 1.5621052631578947e-05, "step": 416 }, { "epoch": 1.0940170940170941, "high_lr": 0.0007810526315789473, "low_lr": 1.5621052631578947e-05, "step": 416 }, { "epoch": 1.0940170940170941, "high_lr": 0.0007810526315789473, "low_lr": 1.5621052631578947e-05, "step": 416 }, { "epoch": 1.0940170940170941, "high_lr": 0.0007810526315789473, "low_lr": 1.5621052631578947e-05, "step": 416 }, { "epoch": 1.0940170940170941, "high_lr": 0.0007810526315789473, "low_lr": 1.5621052631578947e-05, "step": 416 }, { "epoch": 1.096646942800789, "grad_norm": 0.954351007938385, "learning_rate": 0.0007805263157894737, "loss": 1.5365, "step": 417 }, { "epoch": 1.096646942800789, "high_lr": 0.0007805263157894737, "low_lr": 1.5610526315789475e-05, "step": 417 }, { "epoch": 1.096646942800789, "high_lr": 0.0007805263157894737, "low_lr": 1.5610526315789475e-05, "step": 417 }, { "epoch": 1.096646942800789, "high_lr": 0.0007805263157894737, "low_lr": 1.5610526315789475e-05, "step": 417 }, { "epoch": 1.096646942800789, "high_lr": 0.0007805263157894737, "low_lr": 1.5610526315789475e-05, "step": 417 }, { "epoch": 1.096646942800789, "high_lr": 0.0007805263157894737, "low_lr": 1.5610526315789475e-05, "step": 417 }, { "epoch": 1.096646942800789, "high_lr": 0.0007805263157894737, "low_lr": 1.5610526315789475e-05, "step": 417 }, { "epoch": 1.096646942800789, "high_lr": 0.0007805263157894737, "low_lr": 1.5610526315789475e-05, "step": 417 }, { "epoch": 1.096646942800789, "high_lr": 0.0007805263157894737, "low_lr": 1.5610526315789475e-05, "step": 417 }, { "epoch": 1.0992767915844839, "grad_norm": 0.9554334282875061, "learning_rate": 0.0007800000000000001, "loss": 1.4601, "step": 418 }, { "epoch": 1.0992767915844839, "high_lr": 0.0007800000000000001, "low_lr": 1.5600000000000003e-05, "step": 418 }, { "epoch": 1.0992767915844839, "high_lr": 0.0007800000000000001, "low_lr": 1.5600000000000003e-05, "step": 418 }, { "epoch": 1.0992767915844839, "high_lr": 0.0007800000000000001, "low_lr": 1.5600000000000003e-05, "step": 418 }, { "epoch": 1.0992767915844839, "high_lr": 0.0007800000000000001, "low_lr": 1.5600000000000003e-05, "step": 418 }, { "epoch": 1.0992767915844839, "high_lr": 0.0007800000000000001, "low_lr": 1.5600000000000003e-05, "step": 418 }, { "epoch": 1.0992767915844839, "high_lr": 0.0007800000000000001, "low_lr": 1.5600000000000003e-05, "step": 418 }, { "epoch": 1.0992767915844839, "high_lr": 0.0007800000000000001, "low_lr": 1.5600000000000003e-05, "step": 418 }, { "epoch": 1.0992767915844839, "high_lr": 0.0007800000000000001, "low_lr": 1.5600000000000003e-05, "step": 418 }, { "epoch": 1.1019066403681788, "grad_norm": 0.938503086566925, "learning_rate": 0.0007794736842105264, "loss": 1.5218, "step": 419 }, { "epoch": 1.1019066403681788, "high_lr": 0.0007794736842105264, "low_lr": 1.5589473684210528e-05, "step": 419 }, { "epoch": 1.1019066403681788, "high_lr": 0.0007794736842105264, "low_lr": 1.5589473684210528e-05, "step": 419 }, { "epoch": 1.1019066403681788, "high_lr": 0.0007794736842105264, "low_lr": 1.5589473684210528e-05, "step": 419 }, { "epoch": 1.1019066403681788, "high_lr": 0.0007794736842105264, "low_lr": 1.5589473684210528e-05, "step": 419 }, { "epoch": 1.1019066403681788, "high_lr": 0.0007794736842105264, "low_lr": 1.5589473684210528e-05, "step": 419 }, { "epoch": 1.1019066403681788, "high_lr": 0.0007794736842105264, "low_lr": 1.5589473684210528e-05, "step": 419 }, { "epoch": 1.1019066403681788, "high_lr": 0.0007794736842105264, "low_lr": 1.5589473684210528e-05, "step": 419 }, { "epoch": 1.1019066403681788, "high_lr": 0.0007794736842105264, "low_lr": 1.5589473684210528e-05, "step": 419 }, { "epoch": 1.1045364891518739, "grad_norm": 1.0199605226516724, "learning_rate": 0.0007789473684210527, "loss": 1.5454, "step": 420 }, { "epoch": 1.1045364891518739, "high_lr": 0.0007789473684210527, "low_lr": 1.5578947368421052e-05, "step": 420 }, { "epoch": 1.1045364891518739, "high_lr": 0.0007789473684210527, "low_lr": 1.5578947368421052e-05, "step": 420 }, { "epoch": 1.1045364891518739, "high_lr": 0.0007789473684210527, "low_lr": 1.5578947368421052e-05, "step": 420 }, { "epoch": 1.1045364891518739, "high_lr": 0.0007789473684210527, "low_lr": 1.5578947368421052e-05, "step": 420 }, { "epoch": 1.1045364891518739, "high_lr": 0.0007789473684210527, "low_lr": 1.5578947368421052e-05, "step": 420 }, { "epoch": 1.1045364891518739, "high_lr": 0.0007789473684210527, "low_lr": 1.5578947368421052e-05, "step": 420 }, { "epoch": 1.1045364891518739, "high_lr": 0.0007789473684210527, "low_lr": 1.5578947368421052e-05, "step": 420 }, { "epoch": 1.1045364891518739, "high_lr": 0.0007789473684210527, "low_lr": 1.5578947368421052e-05, "step": 420 }, { "epoch": 1.1071663379355687, "grad_norm": 0.986962080001831, "learning_rate": 0.000778421052631579, "loss": 1.5153, "step": 421 }, { "epoch": 1.1071663379355687, "high_lr": 0.000778421052631579, "low_lr": 1.556842105263158e-05, "step": 421 }, { "epoch": 1.1071663379355687, "high_lr": 0.000778421052631579, "low_lr": 1.556842105263158e-05, "step": 421 }, { "epoch": 1.1071663379355687, "high_lr": 0.000778421052631579, "low_lr": 1.556842105263158e-05, "step": 421 }, { "epoch": 1.1071663379355687, "high_lr": 0.000778421052631579, "low_lr": 1.556842105263158e-05, "step": 421 }, { "epoch": 1.1071663379355687, "high_lr": 0.000778421052631579, "low_lr": 1.556842105263158e-05, "step": 421 }, { "epoch": 1.1071663379355687, "high_lr": 0.000778421052631579, "low_lr": 1.556842105263158e-05, "step": 421 }, { "epoch": 1.1071663379355687, "high_lr": 0.000778421052631579, "low_lr": 1.556842105263158e-05, "step": 421 }, { "epoch": 1.1071663379355687, "high_lr": 0.000778421052631579, "low_lr": 1.556842105263158e-05, "step": 421 }, { "epoch": 1.1097961867192636, "grad_norm": 0.9597830176353455, "learning_rate": 0.0007778947368421053, "loss": 1.444, "step": 422 }, { "epoch": 1.1097961867192636, "high_lr": 0.0007778947368421053, "low_lr": 1.555789473684211e-05, "step": 422 }, { "epoch": 1.1097961867192636, "high_lr": 0.0007778947368421053, "low_lr": 1.555789473684211e-05, "step": 422 }, { "epoch": 1.1097961867192636, "high_lr": 0.0007778947368421053, "low_lr": 1.555789473684211e-05, "step": 422 }, { "epoch": 1.1097961867192636, "high_lr": 0.0007778947368421053, "low_lr": 1.555789473684211e-05, "step": 422 }, { "epoch": 1.1097961867192636, "high_lr": 0.0007778947368421053, "low_lr": 1.555789473684211e-05, "step": 422 }, { "epoch": 1.1097961867192636, "high_lr": 0.0007778947368421053, "low_lr": 1.555789473684211e-05, "step": 422 }, { "epoch": 1.1097961867192636, "high_lr": 0.0007778947368421053, "low_lr": 1.555789473684211e-05, "step": 422 }, { "epoch": 1.1097961867192636, "high_lr": 0.0007778947368421053, "low_lr": 1.555789473684211e-05, "step": 422 }, { "epoch": 1.1124260355029585, "grad_norm": 0.9897134900093079, "learning_rate": 0.0007773684210526316, "loss": 1.4764, "step": 423 }, { "epoch": 1.1124260355029585, "high_lr": 0.0007773684210526316, "low_lr": 1.5547368421052633e-05, "step": 423 }, { "epoch": 1.1124260355029585, "high_lr": 0.0007773684210526316, "low_lr": 1.5547368421052633e-05, "step": 423 }, { "epoch": 1.1124260355029585, "high_lr": 0.0007773684210526316, "low_lr": 1.5547368421052633e-05, "step": 423 }, { "epoch": 1.1124260355029585, "high_lr": 0.0007773684210526316, "low_lr": 1.5547368421052633e-05, "step": 423 }, { "epoch": 1.1124260355029585, "high_lr": 0.0007773684210526316, "low_lr": 1.5547368421052633e-05, "step": 423 }, { "epoch": 1.1124260355029585, "high_lr": 0.0007773684210526316, "low_lr": 1.5547368421052633e-05, "step": 423 }, { "epoch": 1.1124260355029585, "high_lr": 0.0007773684210526316, "low_lr": 1.5547368421052633e-05, "step": 423 }, { "epoch": 1.1124260355029585, "high_lr": 0.0007773684210526316, "low_lr": 1.5547368421052633e-05, "step": 423 }, { "epoch": 1.1150558842866536, "grad_norm": 1.0836101770401, "learning_rate": 0.0007768421052631579, "loss": 1.5184, "step": 424 }, { "epoch": 1.1150558842866536, "high_lr": 0.0007768421052631579, "low_lr": 1.5536842105263158e-05, "step": 424 }, { "epoch": 1.1150558842866536, "high_lr": 0.0007768421052631579, "low_lr": 1.5536842105263158e-05, "step": 424 }, { "epoch": 1.1150558842866536, "high_lr": 0.0007768421052631579, "low_lr": 1.5536842105263158e-05, "step": 424 }, { "epoch": 1.1150558842866536, "high_lr": 0.0007768421052631579, "low_lr": 1.5536842105263158e-05, "step": 424 }, { "epoch": 1.1150558842866536, "high_lr": 0.0007768421052631579, "low_lr": 1.5536842105263158e-05, "step": 424 }, { "epoch": 1.1150558842866536, "high_lr": 0.0007768421052631579, "low_lr": 1.5536842105263158e-05, "step": 424 }, { "epoch": 1.1150558842866536, "high_lr": 0.0007768421052631579, "low_lr": 1.5536842105263158e-05, "step": 424 }, { "epoch": 1.1150558842866536, "high_lr": 0.0007768421052631579, "low_lr": 1.5536842105263158e-05, "step": 424 }, { "epoch": 1.1176857330703485, "grad_norm": 1.0581777095794678, "learning_rate": 0.0007763157894736842, "loss": 1.5481, "step": 425 }, { "epoch": 1.1176857330703485, "high_lr": 0.0007763157894736842, "low_lr": 1.5526315789473686e-05, "step": 425 }, { "epoch": 1.1176857330703485, "high_lr": 0.0007763157894736842, "low_lr": 1.5526315789473686e-05, "step": 425 }, { "epoch": 1.1176857330703485, "high_lr": 0.0007763157894736842, "low_lr": 1.5526315789473686e-05, "step": 425 }, { "epoch": 1.1176857330703485, "high_lr": 0.0007763157894736842, "low_lr": 1.5526315789473686e-05, "step": 425 }, { "epoch": 1.1176857330703485, "high_lr": 0.0007763157894736842, "low_lr": 1.5526315789473686e-05, "step": 425 }, { "epoch": 1.1176857330703485, "high_lr": 0.0007763157894736842, "low_lr": 1.5526315789473686e-05, "step": 425 }, { "epoch": 1.1176857330703485, "high_lr": 0.0007763157894736842, "low_lr": 1.5526315789473686e-05, "step": 425 }, { "epoch": 1.1176857330703485, "high_lr": 0.0007763157894736842, "low_lr": 1.5526315789473686e-05, "step": 425 }, { "epoch": 1.1203155818540433, "grad_norm": 1.011803150177002, "learning_rate": 0.0007757894736842105, "loss": 1.5031, "step": 426 }, { "epoch": 1.1203155818540433, "high_lr": 0.0007757894736842105, "low_lr": 1.551578947368421e-05, "step": 426 }, { "epoch": 1.1203155818540433, "high_lr": 0.0007757894736842105, "low_lr": 1.551578947368421e-05, "step": 426 }, { "epoch": 1.1203155818540433, "high_lr": 0.0007757894736842105, "low_lr": 1.551578947368421e-05, "step": 426 }, { "epoch": 1.1203155818540433, "high_lr": 0.0007757894736842105, "low_lr": 1.551578947368421e-05, "step": 426 }, { "epoch": 1.1203155818540433, "high_lr": 0.0007757894736842105, "low_lr": 1.551578947368421e-05, "step": 426 }, { "epoch": 1.1203155818540433, "high_lr": 0.0007757894736842105, "low_lr": 1.551578947368421e-05, "step": 426 }, { "epoch": 1.1203155818540433, "high_lr": 0.0007757894736842105, "low_lr": 1.551578947368421e-05, "step": 426 }, { "epoch": 1.1203155818540433, "high_lr": 0.0007757894736842105, "low_lr": 1.551578947368421e-05, "step": 426 }, { "epoch": 1.1229454306377384, "grad_norm": 0.9950527548789978, "learning_rate": 0.0007752631578947369, "loss": 1.526, "step": 427 }, { "epoch": 1.1229454306377384, "high_lr": 0.0007752631578947369, "low_lr": 1.550526315789474e-05, "step": 427 }, { "epoch": 1.1229454306377384, "high_lr": 0.0007752631578947369, "low_lr": 1.550526315789474e-05, "step": 427 }, { "epoch": 1.1229454306377384, "high_lr": 0.0007752631578947369, "low_lr": 1.550526315789474e-05, "step": 427 }, { "epoch": 1.1229454306377384, "high_lr": 0.0007752631578947369, "low_lr": 1.550526315789474e-05, "step": 427 }, { "epoch": 1.1229454306377384, "high_lr": 0.0007752631578947369, "low_lr": 1.550526315789474e-05, "step": 427 }, { "epoch": 1.1229454306377384, "high_lr": 0.0007752631578947369, "low_lr": 1.550526315789474e-05, "step": 427 }, { "epoch": 1.1229454306377384, "high_lr": 0.0007752631578947369, "low_lr": 1.550526315789474e-05, "step": 427 }, { "epoch": 1.1229454306377384, "high_lr": 0.0007752631578947369, "low_lr": 1.550526315789474e-05, "step": 427 }, { "epoch": 1.1255752794214333, "grad_norm": 1.0123891830444336, "learning_rate": 0.0007747368421052632, "loss": 1.5468, "step": 428 }, { "epoch": 1.1255752794214333, "high_lr": 0.0007747368421052632, "low_lr": 1.5494736842105263e-05, "step": 428 }, { "epoch": 1.1255752794214333, "high_lr": 0.0007747368421052632, "low_lr": 1.5494736842105263e-05, "step": 428 }, { "epoch": 1.1255752794214333, "high_lr": 0.0007747368421052632, "low_lr": 1.5494736842105263e-05, "step": 428 }, { "epoch": 1.1255752794214333, "high_lr": 0.0007747368421052632, "low_lr": 1.5494736842105263e-05, "step": 428 }, { "epoch": 1.1255752794214333, "high_lr": 0.0007747368421052632, "low_lr": 1.5494736842105263e-05, "step": 428 }, { "epoch": 1.1255752794214333, "high_lr": 0.0007747368421052632, "low_lr": 1.5494736842105263e-05, "step": 428 }, { "epoch": 1.1255752794214333, "high_lr": 0.0007747368421052632, "low_lr": 1.5494736842105263e-05, "step": 428 }, { "epoch": 1.1255752794214333, "high_lr": 0.0007747368421052632, "low_lr": 1.5494736842105263e-05, "step": 428 }, { "epoch": 1.1282051282051282, "grad_norm": 1.0693731307983398, "learning_rate": 0.0007742105263157895, "loss": 1.4981, "step": 429 }, { "epoch": 1.1282051282051282, "high_lr": 0.0007742105263157895, "low_lr": 1.548421052631579e-05, "step": 429 }, { "epoch": 1.1282051282051282, "high_lr": 0.0007742105263157895, "low_lr": 1.548421052631579e-05, "step": 429 }, { "epoch": 1.1282051282051282, "high_lr": 0.0007742105263157895, "low_lr": 1.548421052631579e-05, "step": 429 }, { "epoch": 1.1282051282051282, "high_lr": 0.0007742105263157895, "low_lr": 1.548421052631579e-05, "step": 429 }, { "epoch": 1.1282051282051282, "high_lr": 0.0007742105263157895, "low_lr": 1.548421052631579e-05, "step": 429 }, { "epoch": 1.1282051282051282, "high_lr": 0.0007742105263157895, "low_lr": 1.548421052631579e-05, "step": 429 }, { "epoch": 1.1282051282051282, "high_lr": 0.0007742105263157895, "low_lr": 1.548421052631579e-05, "step": 429 }, { "epoch": 1.1282051282051282, "high_lr": 0.0007742105263157895, "low_lr": 1.548421052631579e-05, "step": 429 }, { "epoch": 1.130834976988823, "grad_norm": 1.0787478685379028, "learning_rate": 0.0007736842105263157, "loss": 1.5177, "step": 430 }, { "epoch": 1.130834976988823, "high_lr": 0.0007736842105263157, "low_lr": 1.5473684210526316e-05, "step": 430 }, { "epoch": 1.130834976988823, "high_lr": 0.0007736842105263157, "low_lr": 1.5473684210526316e-05, "step": 430 }, { "epoch": 1.130834976988823, "high_lr": 0.0007736842105263157, "low_lr": 1.5473684210526316e-05, "step": 430 }, { "epoch": 1.130834976988823, "high_lr": 0.0007736842105263157, "low_lr": 1.5473684210526316e-05, "step": 430 }, { "epoch": 1.130834976988823, "high_lr": 0.0007736842105263157, "low_lr": 1.5473684210526316e-05, "step": 430 }, { "epoch": 1.130834976988823, "high_lr": 0.0007736842105263157, "low_lr": 1.5473684210526316e-05, "step": 430 }, { "epoch": 1.130834976988823, "high_lr": 0.0007736842105263157, "low_lr": 1.5473684210526316e-05, "step": 430 }, { "epoch": 1.130834976988823, "high_lr": 0.0007736842105263157, "low_lr": 1.5473684210526316e-05, "step": 430 }, { "epoch": 1.1334648257725182, "grad_norm": 1.0918725728988647, "learning_rate": 0.0007731578947368421, "loss": 1.5557, "step": 431 }, { "epoch": 1.1334648257725182, "high_lr": 0.0007731578947368421, "low_lr": 1.5463157894736844e-05, "step": 431 }, { "epoch": 1.1334648257725182, "high_lr": 0.0007731578947368421, "low_lr": 1.5463157894736844e-05, "step": 431 }, { "epoch": 1.1334648257725182, "high_lr": 0.0007731578947368421, "low_lr": 1.5463157894736844e-05, "step": 431 }, { "epoch": 1.1334648257725182, "high_lr": 0.0007731578947368421, "low_lr": 1.5463157894736844e-05, "step": 431 }, { "epoch": 1.1334648257725182, "high_lr": 0.0007731578947368421, "low_lr": 1.5463157894736844e-05, "step": 431 }, { "epoch": 1.1334648257725182, "high_lr": 0.0007731578947368421, "low_lr": 1.5463157894736844e-05, "step": 431 }, { "epoch": 1.1334648257725182, "high_lr": 0.0007731578947368421, "low_lr": 1.5463157894736844e-05, "step": 431 }, { "epoch": 1.1334648257725182, "high_lr": 0.0007731578947368421, "low_lr": 1.5463157894736844e-05, "step": 431 }, { "epoch": 1.136094674556213, "grad_norm": 1.0071529150009155, "learning_rate": 0.0007726315789473684, "loss": 1.5034, "step": 432 }, { "epoch": 1.136094674556213, "high_lr": 0.0007726315789473684, "low_lr": 1.545263157894737e-05, "step": 432 }, { "epoch": 1.136094674556213, "high_lr": 0.0007726315789473684, "low_lr": 1.545263157894737e-05, "step": 432 }, { "epoch": 1.136094674556213, "high_lr": 0.0007726315789473684, "low_lr": 1.545263157894737e-05, "step": 432 }, { "epoch": 1.136094674556213, "high_lr": 0.0007726315789473684, "low_lr": 1.545263157894737e-05, "step": 432 }, { "epoch": 1.136094674556213, "high_lr": 0.0007726315789473684, "low_lr": 1.545263157894737e-05, "step": 432 }, { "epoch": 1.136094674556213, "high_lr": 0.0007726315789473684, "low_lr": 1.545263157894737e-05, "step": 432 }, { "epoch": 1.136094674556213, "high_lr": 0.0007726315789473684, "low_lr": 1.545263157894737e-05, "step": 432 }, { "epoch": 1.136094674556213, "high_lr": 0.0007726315789473684, "low_lr": 1.545263157894737e-05, "step": 432 }, { "epoch": 1.138724523339908, "grad_norm": 1.2867735624313354, "learning_rate": 0.0007721052631578947, "loss": 1.5102, "step": 433 }, { "epoch": 1.138724523339908, "high_lr": 0.0007721052631578947, "low_lr": 1.5442105263157896e-05, "step": 433 }, { "epoch": 1.138724523339908, "high_lr": 0.0007721052631578947, "low_lr": 1.5442105263157896e-05, "step": 433 }, { "epoch": 1.138724523339908, "high_lr": 0.0007721052631578947, "low_lr": 1.5442105263157896e-05, "step": 433 }, { "epoch": 1.138724523339908, "high_lr": 0.0007721052631578947, "low_lr": 1.5442105263157896e-05, "step": 433 }, { "epoch": 1.138724523339908, "high_lr": 0.0007721052631578947, "low_lr": 1.5442105263157896e-05, "step": 433 }, { "epoch": 1.138724523339908, "high_lr": 0.0007721052631578947, "low_lr": 1.5442105263157896e-05, "step": 433 }, { "epoch": 1.138724523339908, "high_lr": 0.0007721052631578947, "low_lr": 1.5442105263157896e-05, "step": 433 }, { "epoch": 1.138724523339908, "high_lr": 0.0007721052631578947, "low_lr": 1.5442105263157896e-05, "step": 433 }, { "epoch": 1.1413543721236028, "grad_norm": 1.1004258394241333, "learning_rate": 0.000771578947368421, "loss": 1.4904, "step": 434 }, { "epoch": 1.1413543721236028, "high_lr": 0.000771578947368421, "low_lr": 1.543157894736842e-05, "step": 434 }, { "epoch": 1.1413543721236028, "high_lr": 0.000771578947368421, "low_lr": 1.543157894736842e-05, "step": 434 }, { "epoch": 1.1413543721236028, "high_lr": 0.000771578947368421, "low_lr": 1.543157894736842e-05, "step": 434 }, { "epoch": 1.1413543721236028, "high_lr": 0.000771578947368421, "low_lr": 1.543157894736842e-05, "step": 434 }, { "epoch": 1.1413543721236028, "high_lr": 0.000771578947368421, "low_lr": 1.543157894736842e-05, "step": 434 }, { "epoch": 1.1413543721236028, "high_lr": 0.000771578947368421, "low_lr": 1.543157894736842e-05, "step": 434 }, { "epoch": 1.1413543721236028, "high_lr": 0.000771578947368421, "low_lr": 1.543157894736842e-05, "step": 434 }, { "epoch": 1.1413543721236028, "high_lr": 0.000771578947368421, "low_lr": 1.543157894736842e-05, "step": 434 }, { "epoch": 1.143984220907298, "grad_norm": 1.057671070098877, "learning_rate": 0.0007710526315789474, "loss": 1.5217, "step": 435 }, { "epoch": 1.143984220907298, "high_lr": 0.0007710526315789474, "low_lr": 1.542105263157895e-05, "step": 435 }, { "epoch": 1.143984220907298, "high_lr": 0.0007710526315789474, "low_lr": 1.542105263157895e-05, "step": 435 }, { "epoch": 1.143984220907298, "high_lr": 0.0007710526315789474, "low_lr": 1.542105263157895e-05, "step": 435 }, { "epoch": 1.143984220907298, "high_lr": 0.0007710526315789474, "low_lr": 1.542105263157895e-05, "step": 435 }, { "epoch": 1.143984220907298, "high_lr": 0.0007710526315789474, "low_lr": 1.542105263157895e-05, "step": 435 }, { "epoch": 1.143984220907298, "high_lr": 0.0007710526315789474, "low_lr": 1.542105263157895e-05, "step": 435 }, { "epoch": 1.143984220907298, "high_lr": 0.0007710526315789474, "low_lr": 1.542105263157895e-05, "step": 435 }, { "epoch": 1.143984220907298, "high_lr": 0.0007710526315789474, "low_lr": 1.542105263157895e-05, "step": 435 }, { "epoch": 1.1466140696909928, "grad_norm": 1.0055783987045288, "learning_rate": 0.0007705263157894738, "loss": 1.5086, "step": 436 }, { "epoch": 1.1466140696909928, "high_lr": 0.0007705263157894738, "low_lr": 1.5410526315789477e-05, "step": 436 }, { "epoch": 1.1466140696909928, "high_lr": 0.0007705263157894738, "low_lr": 1.5410526315789477e-05, "step": 436 }, { "epoch": 1.1466140696909928, "high_lr": 0.0007705263157894738, "low_lr": 1.5410526315789477e-05, "step": 436 }, { "epoch": 1.1466140696909928, "high_lr": 0.0007705263157894738, "low_lr": 1.5410526315789477e-05, "step": 436 }, { "epoch": 1.1466140696909928, "high_lr": 0.0007705263157894738, "low_lr": 1.5410526315789477e-05, "step": 436 }, { "epoch": 1.1466140696909928, "high_lr": 0.0007705263157894738, "low_lr": 1.5410526315789477e-05, "step": 436 }, { "epoch": 1.1466140696909928, "high_lr": 0.0007705263157894738, "low_lr": 1.5410526315789477e-05, "step": 436 }, { "epoch": 1.1466140696909928, "high_lr": 0.0007705263157894738, "low_lr": 1.5410526315789477e-05, "step": 436 }, { "epoch": 1.1492439184746877, "grad_norm": 1.0507538318634033, "learning_rate": 0.0007700000000000001, "loss": 1.5008, "step": 437 }, { "epoch": 1.1492439184746877, "high_lr": 0.0007700000000000001, "low_lr": 1.54e-05, "step": 437 }, { "epoch": 1.1492439184746877, "high_lr": 0.0007700000000000001, "low_lr": 1.54e-05, "step": 437 }, { "epoch": 1.1492439184746877, "high_lr": 0.0007700000000000001, "low_lr": 1.54e-05, "step": 437 }, { "epoch": 1.1492439184746877, "high_lr": 0.0007700000000000001, "low_lr": 1.54e-05, "step": 437 }, { "epoch": 1.1492439184746877, "high_lr": 0.0007700000000000001, "low_lr": 1.54e-05, "step": 437 }, { "epoch": 1.1492439184746877, "high_lr": 0.0007700000000000001, "low_lr": 1.54e-05, "step": 437 }, { "epoch": 1.1492439184746877, "high_lr": 0.0007700000000000001, "low_lr": 1.54e-05, "step": 437 }, { "epoch": 1.1492439184746877, "high_lr": 0.0007700000000000001, "low_lr": 1.54e-05, "step": 437 }, { "epoch": 1.1518737672583828, "grad_norm": 0.9970671534538269, "learning_rate": 0.0007694736842105264, "loss": 1.477, "step": 438 }, { "epoch": 1.1518737672583828, "high_lr": 0.0007694736842105264, "low_lr": 1.5389473684210526e-05, "step": 438 }, { "epoch": 1.1518737672583828, "high_lr": 0.0007694736842105264, "low_lr": 1.5389473684210526e-05, "step": 438 }, { "epoch": 1.1518737672583828, "high_lr": 0.0007694736842105264, "low_lr": 1.5389473684210526e-05, "step": 438 }, { "epoch": 1.1518737672583828, "high_lr": 0.0007694736842105264, "low_lr": 1.5389473684210526e-05, "step": 438 }, { "epoch": 1.1518737672583828, "high_lr": 0.0007694736842105264, "low_lr": 1.5389473684210526e-05, "step": 438 }, { "epoch": 1.1518737672583828, "high_lr": 0.0007694736842105264, "low_lr": 1.5389473684210526e-05, "step": 438 }, { "epoch": 1.1518737672583828, "high_lr": 0.0007694736842105264, "low_lr": 1.5389473684210526e-05, "step": 438 }, { "epoch": 1.1518737672583828, "high_lr": 0.0007694736842105264, "low_lr": 1.5389473684210526e-05, "step": 438 }, { "epoch": 1.1545036160420776, "grad_norm": 1.0109833478927612, "learning_rate": 0.0007689473684210526, "loss": 1.5215, "step": 439 }, { "epoch": 1.1545036160420776, "high_lr": 0.0007689473684210526, "low_lr": 1.5378947368421054e-05, "step": 439 }, { "epoch": 1.1545036160420776, "high_lr": 0.0007689473684210526, "low_lr": 1.5378947368421054e-05, "step": 439 }, { "epoch": 1.1545036160420776, "high_lr": 0.0007689473684210526, "low_lr": 1.5378947368421054e-05, "step": 439 }, { "epoch": 1.1545036160420776, "high_lr": 0.0007689473684210526, "low_lr": 1.5378947368421054e-05, "step": 439 }, { "epoch": 1.1545036160420776, "high_lr": 0.0007689473684210526, "low_lr": 1.5378947368421054e-05, "step": 439 }, { "epoch": 1.1545036160420776, "high_lr": 0.0007689473684210526, "low_lr": 1.5378947368421054e-05, "step": 439 }, { "epoch": 1.1545036160420776, "high_lr": 0.0007689473684210526, "low_lr": 1.5378947368421054e-05, "step": 439 }, { "epoch": 1.1545036160420776, "high_lr": 0.0007689473684210526, "low_lr": 1.5378947368421054e-05, "step": 439 }, { "epoch": 1.1571334648257725, "grad_norm": 1.018898606300354, "learning_rate": 0.0007684210526315789, "loss": 1.5551, "step": 440 }, { "epoch": 1.1571334648257725, "high_lr": 0.0007684210526315789, "low_lr": 1.536842105263158e-05, "step": 440 }, { "epoch": 1.1571334648257725, "high_lr": 0.0007684210526315789, "low_lr": 1.536842105263158e-05, "step": 440 }, { "epoch": 1.1571334648257725, "high_lr": 0.0007684210526315789, "low_lr": 1.536842105263158e-05, "step": 440 }, { "epoch": 1.1571334648257725, "high_lr": 0.0007684210526315789, "low_lr": 1.536842105263158e-05, "step": 440 }, { "epoch": 1.1571334648257725, "high_lr": 0.0007684210526315789, "low_lr": 1.536842105263158e-05, "step": 440 }, { "epoch": 1.1571334648257725, "high_lr": 0.0007684210526315789, "low_lr": 1.536842105263158e-05, "step": 440 }, { "epoch": 1.1571334648257725, "high_lr": 0.0007684210526315789, "low_lr": 1.536842105263158e-05, "step": 440 }, { "epoch": 1.1571334648257725, "high_lr": 0.0007684210526315789, "low_lr": 1.536842105263158e-05, "step": 440 }, { "epoch": 1.1597633136094674, "grad_norm": 1.0163764953613281, "learning_rate": 0.0007678947368421053, "loss": 1.493, "step": 441 }, { "epoch": 1.1597633136094674, "high_lr": 0.0007678947368421053, "low_lr": 1.5357894736842107e-05, "step": 441 }, { "epoch": 1.1597633136094674, "high_lr": 0.0007678947368421053, "low_lr": 1.5357894736842107e-05, "step": 441 }, { "epoch": 1.1597633136094674, "high_lr": 0.0007678947368421053, "low_lr": 1.5357894736842107e-05, "step": 441 }, { "epoch": 1.1597633136094674, "high_lr": 0.0007678947368421053, "low_lr": 1.5357894736842107e-05, "step": 441 }, { "epoch": 1.1597633136094674, "high_lr": 0.0007678947368421053, "low_lr": 1.5357894736842107e-05, "step": 441 }, { "epoch": 1.1597633136094674, "high_lr": 0.0007678947368421053, "low_lr": 1.5357894736842107e-05, "step": 441 }, { "epoch": 1.1597633136094674, "high_lr": 0.0007678947368421053, "low_lr": 1.5357894736842107e-05, "step": 441 }, { "epoch": 1.1597633136094674, "high_lr": 0.0007678947368421053, "low_lr": 1.5357894736842107e-05, "step": 441 }, { "epoch": 1.1623931623931625, "grad_norm": 0.9848003387451172, "learning_rate": 0.0007673684210526316, "loss": 1.564, "step": 442 }, { "epoch": 1.1623931623931625, "high_lr": 0.0007673684210526316, "low_lr": 1.534736842105263e-05, "step": 442 }, { "epoch": 1.1623931623931625, "high_lr": 0.0007673684210526316, "low_lr": 1.534736842105263e-05, "step": 442 }, { "epoch": 1.1623931623931625, "high_lr": 0.0007673684210526316, "low_lr": 1.534736842105263e-05, "step": 442 }, { "epoch": 1.1623931623931625, "high_lr": 0.0007673684210526316, "low_lr": 1.534736842105263e-05, "step": 442 }, { "epoch": 1.1623931623931625, "high_lr": 0.0007673684210526316, "low_lr": 1.534736842105263e-05, "step": 442 }, { "epoch": 1.1623931623931625, "high_lr": 0.0007673684210526316, "low_lr": 1.534736842105263e-05, "step": 442 }, { "epoch": 1.1623931623931625, "high_lr": 0.0007673684210526316, "low_lr": 1.534736842105263e-05, "step": 442 }, { "epoch": 1.1623931623931625, "high_lr": 0.0007673684210526316, "low_lr": 1.534736842105263e-05, "step": 442 }, { "epoch": 1.1650230111768574, "grad_norm": 1.058992862701416, "learning_rate": 0.0007668421052631579, "loss": 1.4823, "step": 443 }, { "epoch": 1.1650230111768574, "high_lr": 0.0007668421052631579, "low_lr": 1.533684210526316e-05, "step": 443 }, { "epoch": 1.1650230111768574, "high_lr": 0.0007668421052631579, "low_lr": 1.533684210526316e-05, "step": 443 }, { "epoch": 1.1650230111768574, "high_lr": 0.0007668421052631579, "low_lr": 1.533684210526316e-05, "step": 443 }, { "epoch": 1.1650230111768574, "high_lr": 0.0007668421052631579, "low_lr": 1.533684210526316e-05, "step": 443 }, { "epoch": 1.1650230111768574, "high_lr": 0.0007668421052631579, "low_lr": 1.533684210526316e-05, "step": 443 }, { "epoch": 1.1650230111768574, "high_lr": 0.0007668421052631579, "low_lr": 1.533684210526316e-05, "step": 443 }, { "epoch": 1.1650230111768574, "high_lr": 0.0007668421052631579, "low_lr": 1.533684210526316e-05, "step": 443 }, { "epoch": 1.1650230111768574, "high_lr": 0.0007668421052631579, "low_lr": 1.533684210526316e-05, "step": 443 }, { "epoch": 1.1676528599605522, "grad_norm": 1.038759469985962, "learning_rate": 0.0007663157894736842, "loss": 1.521, "step": 444 }, { "epoch": 1.1676528599605522, "high_lr": 0.0007663157894736842, "low_lr": 1.5326315789473684e-05, "step": 444 }, { "epoch": 1.1676528599605522, "high_lr": 0.0007663157894736842, "low_lr": 1.5326315789473684e-05, "step": 444 }, { "epoch": 1.1676528599605522, "high_lr": 0.0007663157894736842, "low_lr": 1.5326315789473684e-05, "step": 444 }, { "epoch": 1.1676528599605522, "high_lr": 0.0007663157894736842, "low_lr": 1.5326315789473684e-05, "step": 444 }, { "epoch": 1.1676528599605522, "high_lr": 0.0007663157894736842, "low_lr": 1.5326315789473684e-05, "step": 444 }, { "epoch": 1.1676528599605522, "high_lr": 0.0007663157894736842, "low_lr": 1.5326315789473684e-05, "step": 444 }, { "epoch": 1.1676528599605522, "high_lr": 0.0007663157894736842, "low_lr": 1.5326315789473684e-05, "step": 444 }, { "epoch": 1.1676528599605522, "high_lr": 0.0007663157894736842, "low_lr": 1.5326315789473684e-05, "step": 444 }, { "epoch": 1.1702827087442471, "grad_norm": 1.034196138381958, "learning_rate": 0.0007657894736842106, "loss": 1.4978, "step": 445 }, { "epoch": 1.1702827087442471, "high_lr": 0.0007657894736842106, "low_lr": 1.5315789473684212e-05, "step": 445 }, { "epoch": 1.1702827087442471, "high_lr": 0.0007657894736842106, "low_lr": 1.5315789473684212e-05, "step": 445 }, { "epoch": 1.1702827087442471, "high_lr": 0.0007657894736842106, "low_lr": 1.5315789473684212e-05, "step": 445 }, { "epoch": 1.1702827087442471, "high_lr": 0.0007657894736842106, "low_lr": 1.5315789473684212e-05, "step": 445 }, { "epoch": 1.1702827087442471, "high_lr": 0.0007657894736842106, "low_lr": 1.5315789473684212e-05, "step": 445 }, { "epoch": 1.1702827087442471, "high_lr": 0.0007657894736842106, "low_lr": 1.5315789473684212e-05, "step": 445 }, { "epoch": 1.1702827087442471, "high_lr": 0.0007657894736842106, "low_lr": 1.5315789473684212e-05, "step": 445 }, { "epoch": 1.1702827087442471, "high_lr": 0.0007657894736842106, "low_lr": 1.5315789473684212e-05, "step": 445 }, { "epoch": 1.1729125575279422, "grad_norm": 1.1163936853408813, "learning_rate": 0.0007652631578947369, "loss": 1.5139, "step": 446 }, { "epoch": 1.1729125575279422, "high_lr": 0.0007652631578947369, "low_lr": 1.530526315789474e-05, "step": 446 }, { "epoch": 1.1729125575279422, "high_lr": 0.0007652631578947369, "low_lr": 1.530526315789474e-05, "step": 446 }, { "epoch": 1.1729125575279422, "high_lr": 0.0007652631578947369, "low_lr": 1.530526315789474e-05, "step": 446 }, { "epoch": 1.1729125575279422, "high_lr": 0.0007652631578947369, "low_lr": 1.530526315789474e-05, "step": 446 }, { "epoch": 1.1729125575279422, "high_lr": 0.0007652631578947369, "low_lr": 1.530526315789474e-05, "step": 446 }, { "epoch": 1.1729125575279422, "high_lr": 0.0007652631578947369, "low_lr": 1.530526315789474e-05, "step": 446 }, { "epoch": 1.1729125575279422, "high_lr": 0.0007652631578947369, "low_lr": 1.530526315789474e-05, "step": 446 }, { "epoch": 1.1729125575279422, "high_lr": 0.0007652631578947369, "low_lr": 1.530526315789474e-05, "step": 446 }, { "epoch": 1.175542406311637, "grad_norm": 1.0809240341186523, "learning_rate": 0.0007647368421052631, "loss": 1.5607, "step": 447 }, { "epoch": 1.175542406311637, "high_lr": 0.0007647368421052631, "low_lr": 1.5294736842105265e-05, "step": 447 }, { "epoch": 1.175542406311637, "high_lr": 0.0007647368421052631, "low_lr": 1.5294736842105265e-05, "step": 447 }, { "epoch": 1.175542406311637, "high_lr": 0.0007647368421052631, "low_lr": 1.5294736842105265e-05, "step": 447 }, { "epoch": 1.175542406311637, "high_lr": 0.0007647368421052631, "low_lr": 1.5294736842105265e-05, "step": 447 }, { "epoch": 1.175542406311637, "high_lr": 0.0007647368421052631, "low_lr": 1.5294736842105265e-05, "step": 447 }, { "epoch": 1.175542406311637, "high_lr": 0.0007647368421052631, "low_lr": 1.5294736842105265e-05, "step": 447 }, { "epoch": 1.175542406311637, "high_lr": 0.0007647368421052631, "low_lr": 1.5294736842105265e-05, "step": 447 }, { "epoch": 1.175542406311637, "high_lr": 0.0007647368421052631, "low_lr": 1.5294736842105265e-05, "step": 447 }, { "epoch": 1.178172255095332, "grad_norm": 1.0977026224136353, "learning_rate": 0.0007642105263157894, "loss": 1.5739, "step": 448 }, { "epoch": 1.178172255095332, "high_lr": 0.0007642105263157894, "low_lr": 1.528421052631579e-05, "step": 448 }, { "epoch": 1.178172255095332, "high_lr": 0.0007642105263157894, "low_lr": 1.528421052631579e-05, "step": 448 }, { "epoch": 1.178172255095332, "high_lr": 0.0007642105263157894, "low_lr": 1.528421052631579e-05, "step": 448 }, { "epoch": 1.178172255095332, "high_lr": 0.0007642105263157894, "low_lr": 1.528421052631579e-05, "step": 448 }, { "epoch": 1.178172255095332, "high_lr": 0.0007642105263157894, "low_lr": 1.528421052631579e-05, "step": 448 }, { "epoch": 1.178172255095332, "high_lr": 0.0007642105263157894, "low_lr": 1.528421052631579e-05, "step": 448 }, { "epoch": 1.178172255095332, "high_lr": 0.0007642105263157894, "low_lr": 1.528421052631579e-05, "step": 448 }, { "epoch": 1.178172255095332, "high_lr": 0.0007642105263157894, "low_lr": 1.528421052631579e-05, "step": 448 }, { "epoch": 1.180802103879027, "grad_norm": 1.0918418169021606, "learning_rate": 0.0007636842105263157, "loss": 1.5288, "step": 449 }, { "epoch": 1.180802103879027, "high_lr": 0.0007636842105263157, "low_lr": 1.5273684210526318e-05, "step": 449 }, { "epoch": 1.180802103879027, "high_lr": 0.0007636842105263157, "low_lr": 1.5273684210526318e-05, "step": 449 }, { "epoch": 1.180802103879027, "high_lr": 0.0007636842105263157, "low_lr": 1.5273684210526318e-05, "step": 449 }, { "epoch": 1.180802103879027, "high_lr": 0.0007636842105263157, "low_lr": 1.5273684210526318e-05, "step": 449 }, { "epoch": 1.180802103879027, "high_lr": 0.0007636842105263157, "low_lr": 1.5273684210526318e-05, "step": 449 }, { "epoch": 1.180802103879027, "high_lr": 0.0007636842105263157, "low_lr": 1.5273684210526318e-05, "step": 449 }, { "epoch": 1.180802103879027, "high_lr": 0.0007636842105263157, "low_lr": 1.5273684210526318e-05, "step": 449 }, { "epoch": 1.180802103879027, "high_lr": 0.0007636842105263157, "low_lr": 1.5273684210526318e-05, "step": 449 }, { "epoch": 1.183431952662722, "grad_norm": 1.018148422241211, "learning_rate": 0.0007631578947368421, "loss": 1.5004, "step": 450 }, { "epoch": 1.183431952662722, "high_lr": 0.0007631578947368421, "low_lr": 1.5263157894736846e-05, "step": 450 }, { "epoch": 1.183431952662722, "high_lr": 0.0007631578947368421, "low_lr": 1.5263157894736846e-05, "step": 450 }, { "epoch": 1.183431952662722, "high_lr": 0.0007631578947368421, "low_lr": 1.5263157894736846e-05, "step": 450 }, { "epoch": 1.183431952662722, "high_lr": 0.0007631578947368421, "low_lr": 1.5263157894736846e-05, "step": 450 }, { "epoch": 1.183431952662722, "high_lr": 0.0007631578947368421, "low_lr": 1.5263157894736846e-05, "step": 450 }, { "epoch": 1.183431952662722, "high_lr": 0.0007631578947368421, "low_lr": 1.5263157894736846e-05, "step": 450 }, { "epoch": 1.183431952662722, "high_lr": 0.0007631578947368421, "low_lr": 1.5263157894736846e-05, "step": 450 }, { "epoch": 1.183431952662722, "high_lr": 0.0007631578947368421, "low_lr": 1.5263157894736846e-05, "step": 450 }, { "epoch": 1.1860618014464168, "grad_norm": 1.0755535364151, "learning_rate": 0.0007626315789473685, "loss": 1.5135, "step": 451 }, { "epoch": 1.1860618014464168, "high_lr": 0.0007626315789473685, "low_lr": 1.525263157894737e-05, "step": 451 }, { "epoch": 1.1860618014464168, "high_lr": 0.0007626315789473685, "low_lr": 1.525263157894737e-05, "step": 451 }, { "epoch": 1.1860618014464168, "high_lr": 0.0007626315789473685, "low_lr": 1.525263157894737e-05, "step": 451 }, { "epoch": 1.1860618014464168, "high_lr": 0.0007626315789473685, "low_lr": 1.525263157894737e-05, "step": 451 }, { "epoch": 1.1860618014464168, "high_lr": 0.0007626315789473685, "low_lr": 1.525263157894737e-05, "step": 451 }, { "epoch": 1.1860618014464168, "high_lr": 0.0007626315789473685, "low_lr": 1.525263157894737e-05, "step": 451 }, { "epoch": 1.1860618014464168, "high_lr": 0.0007626315789473685, "low_lr": 1.525263157894737e-05, "step": 451 }, { "epoch": 1.1860618014464168, "high_lr": 0.0007626315789473685, "low_lr": 1.525263157894737e-05, "step": 451 }, { "epoch": 1.1886916502301117, "grad_norm": 1.0672171115875244, "learning_rate": 0.0007621052631578948, "loss": 1.456, "step": 452 }, { "epoch": 1.1886916502301117, "high_lr": 0.0007621052631578948, "low_lr": 1.5242105263157897e-05, "step": 452 }, { "epoch": 1.1886916502301117, "high_lr": 0.0007621052631578948, "low_lr": 1.5242105263157897e-05, "step": 452 }, { "epoch": 1.1886916502301117, "high_lr": 0.0007621052631578948, "low_lr": 1.5242105263157897e-05, "step": 452 }, { "epoch": 1.1886916502301117, "high_lr": 0.0007621052631578948, "low_lr": 1.5242105263157897e-05, "step": 452 }, { "epoch": 1.1886916502301117, "high_lr": 0.0007621052631578948, "low_lr": 1.5242105263157897e-05, "step": 452 }, { "epoch": 1.1886916502301117, "high_lr": 0.0007621052631578948, "low_lr": 1.5242105263157897e-05, "step": 452 }, { "epoch": 1.1886916502301117, "high_lr": 0.0007621052631578948, "low_lr": 1.5242105263157897e-05, "step": 452 }, { "epoch": 1.1886916502301117, "high_lr": 0.0007621052631578948, "low_lr": 1.5242105263157897e-05, "step": 452 }, { "epoch": 1.1913214990138068, "grad_norm": 1.021423101425171, "learning_rate": 0.0007615789473684211, "loss": 1.5174, "step": 453 }, { "epoch": 1.1913214990138068, "high_lr": 0.0007615789473684211, "low_lr": 1.5231578947368421e-05, "step": 453 }, { "epoch": 1.1913214990138068, "high_lr": 0.0007615789473684211, "low_lr": 1.5231578947368421e-05, "step": 453 }, { "epoch": 1.1913214990138068, "high_lr": 0.0007615789473684211, "low_lr": 1.5231578947368421e-05, "step": 453 }, { "epoch": 1.1913214990138068, "high_lr": 0.0007615789473684211, "low_lr": 1.5231578947368421e-05, "step": 453 }, { "epoch": 1.1913214990138068, "high_lr": 0.0007615789473684211, "low_lr": 1.5231578947368421e-05, "step": 453 }, { "epoch": 1.1913214990138068, "high_lr": 0.0007615789473684211, "low_lr": 1.5231578947368421e-05, "step": 453 }, { "epoch": 1.1913214990138068, "high_lr": 0.0007615789473684211, "low_lr": 1.5231578947368421e-05, "step": 453 }, { "epoch": 1.1913214990138068, "high_lr": 0.0007615789473684211, "low_lr": 1.5231578947368421e-05, "step": 453 }, { "epoch": 1.1939513477975017, "grad_norm": 0.9255045056343079, "learning_rate": 0.0007610526315789474, "loss": 1.5226, "step": 454 }, { "epoch": 1.1939513477975017, "high_lr": 0.0007610526315789474, "low_lr": 1.5221052631578948e-05, "step": 454 }, { "epoch": 1.1939513477975017, "high_lr": 0.0007610526315789474, "low_lr": 1.5221052631578948e-05, "step": 454 }, { "epoch": 1.1939513477975017, "high_lr": 0.0007610526315789474, "low_lr": 1.5221052631578948e-05, "step": 454 }, { "epoch": 1.1939513477975017, "high_lr": 0.0007610526315789474, "low_lr": 1.5221052631578948e-05, "step": 454 }, { "epoch": 1.1939513477975017, "high_lr": 0.0007610526315789474, "low_lr": 1.5221052631578948e-05, "step": 454 }, { "epoch": 1.1939513477975017, "high_lr": 0.0007610526315789474, "low_lr": 1.5221052631578948e-05, "step": 454 }, { "epoch": 1.1939513477975017, "high_lr": 0.0007610526315789474, "low_lr": 1.5221052631578948e-05, "step": 454 }, { "epoch": 1.1939513477975017, "high_lr": 0.0007610526315789474, "low_lr": 1.5221052631578948e-05, "step": 454 }, { "epoch": 1.1965811965811965, "grad_norm": 1.060662031173706, "learning_rate": 0.0007605263157894738, "loss": 1.4796, "step": 455 }, { "epoch": 1.1965811965811965, "high_lr": 0.0007605263157894738, "low_lr": 1.5210526315789476e-05, "step": 455 }, { "epoch": 1.1965811965811965, "high_lr": 0.0007605263157894738, "low_lr": 1.5210526315789476e-05, "step": 455 }, { "epoch": 1.1965811965811965, "high_lr": 0.0007605263157894738, "low_lr": 1.5210526315789476e-05, "step": 455 }, { "epoch": 1.1965811965811965, "high_lr": 0.0007605263157894738, "low_lr": 1.5210526315789476e-05, "step": 455 }, { "epoch": 1.1965811965811965, "high_lr": 0.0007605263157894738, "low_lr": 1.5210526315789476e-05, "step": 455 }, { "epoch": 1.1965811965811965, "high_lr": 0.0007605263157894738, "low_lr": 1.5210526315789476e-05, "step": 455 }, { "epoch": 1.1965811965811965, "high_lr": 0.0007605263157894738, "low_lr": 1.5210526315789476e-05, "step": 455 }, { "epoch": 1.1965811965811965, "high_lr": 0.0007605263157894738, "low_lr": 1.5210526315789476e-05, "step": 455 }, { "epoch": 1.1992110453648914, "grad_norm": 1.1802492141723633, "learning_rate": 0.00076, "loss": 1.5481, "step": 456 }, { "epoch": 1.1992110453648914, "high_lr": 0.00076, "low_lr": 1.5200000000000002e-05, "step": 456 }, { "epoch": 1.1992110453648914, "high_lr": 0.00076, "low_lr": 1.5200000000000002e-05, "step": 456 }, { "epoch": 1.1992110453648914, "high_lr": 0.00076, "low_lr": 1.5200000000000002e-05, "step": 456 }, { "epoch": 1.1992110453648914, "high_lr": 0.00076, "low_lr": 1.5200000000000002e-05, "step": 456 }, { "epoch": 1.1992110453648914, "high_lr": 0.00076, "low_lr": 1.5200000000000002e-05, "step": 456 }, { "epoch": 1.1992110453648914, "high_lr": 0.00076, "low_lr": 1.5200000000000002e-05, "step": 456 }, { "epoch": 1.1992110453648914, "high_lr": 0.00076, "low_lr": 1.5200000000000002e-05, "step": 456 }, { "epoch": 1.1992110453648914, "high_lr": 0.00076, "low_lr": 1.5200000000000002e-05, "step": 456 }, { "epoch": 1.2018408941485865, "grad_norm": 1.0207029581069946, "learning_rate": 0.0007594736842105263, "loss": 1.5057, "step": 457 }, { "epoch": 1.2018408941485865, "high_lr": 0.0007594736842105263, "low_lr": 1.5189473684210526e-05, "step": 457 }, { "epoch": 1.2018408941485865, "high_lr": 0.0007594736842105263, "low_lr": 1.5189473684210526e-05, "step": 457 }, { "epoch": 1.2018408941485865, "high_lr": 0.0007594736842105263, "low_lr": 1.5189473684210526e-05, "step": 457 }, { "epoch": 1.2018408941485865, "high_lr": 0.0007594736842105263, "low_lr": 1.5189473684210526e-05, "step": 457 }, { "epoch": 1.2018408941485865, "high_lr": 0.0007594736842105263, "low_lr": 1.5189473684210526e-05, "step": 457 }, { "epoch": 1.2018408941485865, "high_lr": 0.0007594736842105263, "low_lr": 1.5189473684210526e-05, "step": 457 }, { "epoch": 1.2018408941485865, "high_lr": 0.0007594736842105263, "low_lr": 1.5189473684210526e-05, "step": 457 }, { "epoch": 1.2018408941485865, "high_lr": 0.0007594736842105263, "low_lr": 1.5189473684210526e-05, "step": 457 }, { "epoch": 1.2044707429322814, "grad_norm": 1.0410336256027222, "learning_rate": 0.0007589473684210526, "loss": 1.5044, "step": 458 }, { "epoch": 1.2044707429322814, "high_lr": 0.0007589473684210526, "low_lr": 1.5178947368421053e-05, "step": 458 }, { "epoch": 1.2044707429322814, "high_lr": 0.0007589473684210526, "low_lr": 1.5178947368421053e-05, "step": 458 }, { "epoch": 1.2044707429322814, "high_lr": 0.0007589473684210526, "low_lr": 1.5178947368421053e-05, "step": 458 }, { "epoch": 1.2044707429322814, "high_lr": 0.0007589473684210526, "low_lr": 1.5178947368421053e-05, "step": 458 }, { "epoch": 1.2044707429322814, "high_lr": 0.0007589473684210526, "low_lr": 1.5178947368421053e-05, "step": 458 }, { "epoch": 1.2044707429322814, "high_lr": 0.0007589473684210526, "low_lr": 1.5178947368421053e-05, "step": 458 }, { "epoch": 1.2044707429322814, "high_lr": 0.0007589473684210526, "low_lr": 1.5178947368421053e-05, "step": 458 }, { "epoch": 1.2044707429322814, "high_lr": 0.0007589473684210526, "low_lr": 1.5178947368421053e-05, "step": 458 }, { "epoch": 1.2071005917159763, "grad_norm": 1.304905891418457, "learning_rate": 0.000758421052631579, "loss": 1.5078, "step": 459 }, { "epoch": 1.2071005917159763, "high_lr": 0.000758421052631579, "low_lr": 1.516842105263158e-05, "step": 459 }, { "epoch": 1.2071005917159763, "high_lr": 0.000758421052631579, "low_lr": 1.516842105263158e-05, "step": 459 }, { "epoch": 1.2071005917159763, "high_lr": 0.000758421052631579, "low_lr": 1.516842105263158e-05, "step": 459 }, { "epoch": 1.2071005917159763, "high_lr": 0.000758421052631579, "low_lr": 1.516842105263158e-05, "step": 459 }, { "epoch": 1.2071005917159763, "high_lr": 0.000758421052631579, "low_lr": 1.516842105263158e-05, "step": 459 }, { "epoch": 1.2071005917159763, "high_lr": 0.000758421052631579, "low_lr": 1.516842105263158e-05, "step": 459 }, { "epoch": 1.2071005917159763, "high_lr": 0.000758421052631579, "low_lr": 1.516842105263158e-05, "step": 459 }, { "epoch": 1.2071005917159763, "high_lr": 0.000758421052631579, "low_lr": 1.516842105263158e-05, "step": 459 }, { "epoch": 1.2097304404996714, "grad_norm": 1.040297508239746, "learning_rate": 0.0007578947368421053, "loss": 1.5058, "step": 460 }, { "epoch": 1.2097304404996714, "high_lr": 0.0007578947368421053, "low_lr": 1.5157894736842107e-05, "step": 460 }, { "epoch": 1.2097304404996714, "high_lr": 0.0007578947368421053, "low_lr": 1.5157894736842107e-05, "step": 460 }, { "epoch": 1.2097304404996714, "high_lr": 0.0007578947368421053, "low_lr": 1.5157894736842107e-05, "step": 460 }, { "epoch": 1.2097304404996714, "high_lr": 0.0007578947368421053, "low_lr": 1.5157894736842107e-05, "step": 460 }, { "epoch": 1.2097304404996714, "high_lr": 0.0007578947368421053, "low_lr": 1.5157894736842107e-05, "step": 460 }, { "epoch": 1.2097304404996714, "high_lr": 0.0007578947368421053, "low_lr": 1.5157894736842107e-05, "step": 460 }, { "epoch": 1.2097304404996714, "high_lr": 0.0007578947368421053, "low_lr": 1.5157894736842107e-05, "step": 460 }, { "epoch": 1.2097304404996714, "high_lr": 0.0007578947368421053, "low_lr": 1.5157894736842107e-05, "step": 460 }, { "epoch": 1.2123602892833663, "grad_norm": 1.012952446937561, "learning_rate": 0.0007573684210526316, "loss": 1.5206, "step": 461 }, { "epoch": 1.2123602892833663, "high_lr": 0.0007573684210526316, "low_lr": 1.5147368421052633e-05, "step": 461 }, { "epoch": 1.2123602892833663, "high_lr": 0.0007573684210526316, "low_lr": 1.5147368421052633e-05, "step": 461 }, { "epoch": 1.2123602892833663, "high_lr": 0.0007573684210526316, "low_lr": 1.5147368421052633e-05, "step": 461 }, { "epoch": 1.2123602892833663, "high_lr": 0.0007573684210526316, "low_lr": 1.5147368421052633e-05, "step": 461 }, { "epoch": 1.2123602892833663, "high_lr": 0.0007573684210526316, "low_lr": 1.5147368421052633e-05, "step": 461 }, { "epoch": 1.2123602892833663, "high_lr": 0.0007573684210526316, "low_lr": 1.5147368421052633e-05, "step": 461 }, { "epoch": 1.2123602892833663, "high_lr": 0.0007573684210526316, "low_lr": 1.5147368421052633e-05, "step": 461 }, { "epoch": 1.2123602892833663, "high_lr": 0.0007573684210526316, "low_lr": 1.5147368421052633e-05, "step": 461 }, { "epoch": 1.2149901380670611, "grad_norm": 1.0536714792251587, "learning_rate": 0.0007568421052631579, "loss": 1.4892, "step": 462 }, { "epoch": 1.2149901380670611, "high_lr": 0.0007568421052631579, "low_lr": 1.5136842105263158e-05, "step": 462 }, { "epoch": 1.2149901380670611, "high_lr": 0.0007568421052631579, "low_lr": 1.5136842105263158e-05, "step": 462 }, { "epoch": 1.2149901380670611, "high_lr": 0.0007568421052631579, "low_lr": 1.5136842105263158e-05, "step": 462 }, { "epoch": 1.2149901380670611, "high_lr": 0.0007568421052631579, "low_lr": 1.5136842105263158e-05, "step": 462 }, { "epoch": 1.2149901380670611, "high_lr": 0.0007568421052631579, "low_lr": 1.5136842105263158e-05, "step": 462 }, { "epoch": 1.2149901380670611, "high_lr": 0.0007568421052631579, "low_lr": 1.5136842105263158e-05, "step": 462 }, { "epoch": 1.2149901380670611, "high_lr": 0.0007568421052631579, "low_lr": 1.5136842105263158e-05, "step": 462 }, { "epoch": 1.2149901380670611, "high_lr": 0.0007568421052631579, "low_lr": 1.5136842105263158e-05, "step": 462 }, { "epoch": 1.217619986850756, "grad_norm": 1.0727607011795044, "learning_rate": 0.0007563157894736842, "loss": 1.5471, "step": 463 }, { "epoch": 1.217619986850756, "high_lr": 0.0007563157894736842, "low_lr": 1.5126315789473684e-05, "step": 463 }, { "epoch": 1.217619986850756, "high_lr": 0.0007563157894736842, "low_lr": 1.5126315789473684e-05, "step": 463 }, { "epoch": 1.217619986850756, "high_lr": 0.0007563157894736842, "low_lr": 1.5126315789473684e-05, "step": 463 }, { "epoch": 1.217619986850756, "high_lr": 0.0007563157894736842, "low_lr": 1.5126315789473684e-05, "step": 463 }, { "epoch": 1.217619986850756, "high_lr": 0.0007563157894736842, "low_lr": 1.5126315789473684e-05, "step": 463 }, { "epoch": 1.217619986850756, "high_lr": 0.0007563157894736842, "low_lr": 1.5126315789473684e-05, "step": 463 }, { "epoch": 1.217619986850756, "high_lr": 0.0007563157894736842, "low_lr": 1.5126315789473684e-05, "step": 463 }, { "epoch": 1.217619986850756, "high_lr": 0.0007563157894736842, "low_lr": 1.5126315789473684e-05, "step": 463 }, { "epoch": 1.220249835634451, "grad_norm": 0.9881057143211365, "learning_rate": 0.0007557894736842105, "loss": 1.4938, "step": 464 }, { "epoch": 1.220249835634451, "high_lr": 0.0007557894736842105, "low_lr": 1.5115789473684212e-05, "step": 464 }, { "epoch": 1.220249835634451, "high_lr": 0.0007557894736842105, "low_lr": 1.5115789473684212e-05, "step": 464 }, { "epoch": 1.220249835634451, "high_lr": 0.0007557894736842105, "low_lr": 1.5115789473684212e-05, "step": 464 }, { "epoch": 1.220249835634451, "high_lr": 0.0007557894736842105, "low_lr": 1.5115789473684212e-05, "step": 464 }, { "epoch": 1.220249835634451, "high_lr": 0.0007557894736842105, "low_lr": 1.5115789473684212e-05, "step": 464 }, { "epoch": 1.220249835634451, "high_lr": 0.0007557894736842105, "low_lr": 1.5115789473684212e-05, "step": 464 }, { "epoch": 1.220249835634451, "high_lr": 0.0007557894736842105, "low_lr": 1.5115789473684212e-05, "step": 464 }, { "epoch": 1.220249835634451, "high_lr": 0.0007557894736842105, "low_lr": 1.5115789473684212e-05, "step": 464 }, { "epoch": 1.222879684418146, "grad_norm": 1.0744034051895142, "learning_rate": 0.0007552631578947368, "loss": 1.4936, "step": 465 }, { "epoch": 1.222879684418146, "high_lr": 0.0007552631578947368, "low_lr": 1.5105263157894739e-05, "step": 465 }, { "epoch": 1.222879684418146, "high_lr": 0.0007552631578947368, "low_lr": 1.5105263157894739e-05, "step": 465 }, { "epoch": 1.222879684418146, "high_lr": 0.0007552631578947368, "low_lr": 1.5105263157894739e-05, "step": 465 }, { "epoch": 1.222879684418146, "high_lr": 0.0007552631578947368, "low_lr": 1.5105263157894739e-05, "step": 465 }, { "epoch": 1.222879684418146, "high_lr": 0.0007552631578947368, "low_lr": 1.5105263157894739e-05, "step": 465 }, { "epoch": 1.222879684418146, "high_lr": 0.0007552631578947368, "low_lr": 1.5105263157894739e-05, "step": 465 }, { "epoch": 1.222879684418146, "high_lr": 0.0007552631578947368, "low_lr": 1.5105263157894739e-05, "step": 465 }, { "epoch": 1.222879684418146, "high_lr": 0.0007552631578947368, "low_lr": 1.5105263157894739e-05, "step": 465 }, { "epoch": 1.2255095332018409, "grad_norm": 1.0122504234313965, "learning_rate": 0.0007547368421052631, "loss": 1.5304, "step": 466 }, { "epoch": 1.2255095332018409, "high_lr": 0.0007547368421052631, "low_lr": 1.5094736842105263e-05, "step": 466 }, { "epoch": 1.2255095332018409, "high_lr": 0.0007547368421052631, "low_lr": 1.5094736842105263e-05, "step": 466 }, { "epoch": 1.2255095332018409, "high_lr": 0.0007547368421052631, "low_lr": 1.5094736842105263e-05, "step": 466 }, { "epoch": 1.2255095332018409, "high_lr": 0.0007547368421052631, "low_lr": 1.5094736842105263e-05, "step": 466 }, { "epoch": 1.2255095332018409, "high_lr": 0.0007547368421052631, "low_lr": 1.5094736842105263e-05, "step": 466 }, { "epoch": 1.2255095332018409, "high_lr": 0.0007547368421052631, "low_lr": 1.5094736842105263e-05, "step": 466 }, { "epoch": 1.2255095332018409, "high_lr": 0.0007547368421052631, "low_lr": 1.5094736842105263e-05, "step": 466 }, { "epoch": 1.2255095332018409, "high_lr": 0.0007547368421052631, "low_lr": 1.5094736842105263e-05, "step": 466 }, { "epoch": 1.2281393819855357, "grad_norm": 0.9647414088249207, "learning_rate": 0.0007542105263157895, "loss": 1.4701, "step": 467 }, { "epoch": 1.2281393819855357, "high_lr": 0.0007542105263157895, "low_lr": 1.508421052631579e-05, "step": 467 }, { "epoch": 1.2281393819855357, "high_lr": 0.0007542105263157895, "low_lr": 1.508421052631579e-05, "step": 467 }, { "epoch": 1.2281393819855357, "high_lr": 0.0007542105263157895, "low_lr": 1.508421052631579e-05, "step": 467 }, { "epoch": 1.2281393819855357, "high_lr": 0.0007542105263157895, "low_lr": 1.508421052631579e-05, "step": 467 }, { "epoch": 1.2281393819855357, "high_lr": 0.0007542105263157895, "low_lr": 1.508421052631579e-05, "step": 467 }, { "epoch": 1.2281393819855357, "high_lr": 0.0007542105263157895, "low_lr": 1.508421052631579e-05, "step": 467 }, { "epoch": 1.2281393819855357, "high_lr": 0.0007542105263157895, "low_lr": 1.508421052631579e-05, "step": 467 }, { "epoch": 1.2281393819855357, "high_lr": 0.0007542105263157895, "low_lr": 1.508421052631579e-05, "step": 467 }, { "epoch": 1.2307692307692308, "grad_norm": 1.006118655204773, "learning_rate": 0.0007536842105263158, "loss": 1.5536, "step": 468 }, { "epoch": 1.2307692307692308, "high_lr": 0.0007536842105263158, "low_lr": 1.5073684210526316e-05, "step": 468 }, { "epoch": 1.2307692307692308, "high_lr": 0.0007536842105263158, "low_lr": 1.5073684210526316e-05, "step": 468 }, { "epoch": 1.2307692307692308, "high_lr": 0.0007536842105263158, "low_lr": 1.5073684210526316e-05, "step": 468 }, { "epoch": 1.2307692307692308, "high_lr": 0.0007536842105263158, "low_lr": 1.5073684210526316e-05, "step": 468 }, { "epoch": 1.2307692307692308, "high_lr": 0.0007536842105263158, "low_lr": 1.5073684210526316e-05, "step": 468 }, { "epoch": 1.2307692307692308, "high_lr": 0.0007536842105263158, "low_lr": 1.5073684210526316e-05, "step": 468 }, { "epoch": 1.2307692307692308, "high_lr": 0.0007536842105263158, "low_lr": 1.5073684210526316e-05, "step": 468 }, { "epoch": 1.2307692307692308, "high_lr": 0.0007536842105263158, "low_lr": 1.5073684210526316e-05, "step": 468 }, { "epoch": 1.2333990795529257, "grad_norm": 0.9264904856681824, "learning_rate": 0.0007531578947368422, "loss": 1.4641, "step": 469 }, { "epoch": 1.2333990795529257, "high_lr": 0.0007531578947368422, "low_lr": 1.5063157894736844e-05, "step": 469 }, { "epoch": 1.2333990795529257, "high_lr": 0.0007531578947368422, "low_lr": 1.5063157894736844e-05, "step": 469 }, { "epoch": 1.2333990795529257, "high_lr": 0.0007531578947368422, "low_lr": 1.5063157894736844e-05, "step": 469 }, { "epoch": 1.2333990795529257, "high_lr": 0.0007531578947368422, "low_lr": 1.5063157894736844e-05, "step": 469 }, { "epoch": 1.2333990795529257, "high_lr": 0.0007531578947368422, "low_lr": 1.5063157894736844e-05, "step": 469 }, { "epoch": 1.2333990795529257, "high_lr": 0.0007531578947368422, "low_lr": 1.5063157894736844e-05, "step": 469 }, { "epoch": 1.2333990795529257, "high_lr": 0.0007531578947368422, "low_lr": 1.5063157894736844e-05, "step": 469 }, { "epoch": 1.2333990795529257, "high_lr": 0.0007531578947368422, "low_lr": 1.5063157894736844e-05, "step": 469 }, { "epoch": 1.2360289283366206, "grad_norm": 0.9730166792869568, "learning_rate": 0.0007526315789473685, "loss": 1.5371, "step": 470 }, { "epoch": 1.2360289283366206, "high_lr": 0.0007526315789473685, "low_lr": 1.505263157894737e-05, "step": 470 }, { "epoch": 1.2360289283366206, "high_lr": 0.0007526315789473685, "low_lr": 1.505263157894737e-05, "step": 470 }, { "epoch": 1.2360289283366206, "high_lr": 0.0007526315789473685, "low_lr": 1.505263157894737e-05, "step": 470 }, { "epoch": 1.2360289283366206, "high_lr": 0.0007526315789473685, "low_lr": 1.505263157894737e-05, "step": 470 }, { "epoch": 1.2360289283366206, "high_lr": 0.0007526315789473685, "low_lr": 1.505263157894737e-05, "step": 470 }, { "epoch": 1.2360289283366206, "high_lr": 0.0007526315789473685, "low_lr": 1.505263157894737e-05, "step": 470 }, { "epoch": 1.2360289283366206, "high_lr": 0.0007526315789473685, "low_lr": 1.505263157894737e-05, "step": 470 }, { "epoch": 1.2360289283366206, "high_lr": 0.0007526315789473685, "low_lr": 1.505263157894737e-05, "step": 470 }, { "epoch": 1.2386587771203157, "grad_norm": 1.172485589981079, "learning_rate": 0.0007521052631578948, "loss": 1.5384, "step": 471 }, { "epoch": 1.2386587771203157, "high_lr": 0.0007521052631578948, "low_lr": 1.5042105263157895e-05, "step": 471 }, { "epoch": 1.2386587771203157, "high_lr": 0.0007521052631578948, "low_lr": 1.5042105263157895e-05, "step": 471 }, { "epoch": 1.2386587771203157, "high_lr": 0.0007521052631578948, "low_lr": 1.5042105263157895e-05, "step": 471 }, { "epoch": 1.2386587771203157, "high_lr": 0.0007521052631578948, "low_lr": 1.5042105263157895e-05, "step": 471 }, { "epoch": 1.2386587771203157, "high_lr": 0.0007521052631578948, "low_lr": 1.5042105263157895e-05, "step": 471 }, { "epoch": 1.2386587771203157, "high_lr": 0.0007521052631578948, "low_lr": 1.5042105263157895e-05, "step": 471 }, { "epoch": 1.2386587771203157, "high_lr": 0.0007521052631578948, "low_lr": 1.5042105263157895e-05, "step": 471 }, { "epoch": 1.2386587771203157, "high_lr": 0.0007521052631578948, "low_lr": 1.5042105263157895e-05, "step": 471 }, { "epoch": 1.2412886259040106, "grad_norm": 0.9785175323486328, "learning_rate": 0.000751578947368421, "loss": 1.5094, "step": 472 }, { "epoch": 1.2412886259040106, "high_lr": 0.000751578947368421, "low_lr": 1.5031578947368421e-05, "step": 472 }, { "epoch": 1.2412886259040106, "high_lr": 0.000751578947368421, "low_lr": 1.5031578947368421e-05, "step": 472 }, { "epoch": 1.2412886259040106, "high_lr": 0.000751578947368421, "low_lr": 1.5031578947368421e-05, "step": 472 }, { "epoch": 1.2412886259040106, "high_lr": 0.000751578947368421, "low_lr": 1.5031578947368421e-05, "step": 472 }, { "epoch": 1.2412886259040106, "high_lr": 0.000751578947368421, "low_lr": 1.5031578947368421e-05, "step": 472 }, { "epoch": 1.2412886259040106, "high_lr": 0.000751578947368421, "low_lr": 1.5031578947368421e-05, "step": 472 }, { "epoch": 1.2412886259040106, "high_lr": 0.000751578947368421, "low_lr": 1.5031578947368421e-05, "step": 472 }, { "epoch": 1.2412886259040106, "high_lr": 0.000751578947368421, "low_lr": 1.5031578947368421e-05, "step": 472 }, { "epoch": 1.2439184746877054, "grad_norm": 0.9950137138366699, "learning_rate": 0.0007510526315789474, "loss": 1.4456, "step": 473 }, { "epoch": 1.2439184746877054, "high_lr": 0.0007510526315789474, "low_lr": 1.502105263157895e-05, "step": 473 }, { "epoch": 1.2439184746877054, "high_lr": 0.0007510526315789474, "low_lr": 1.502105263157895e-05, "step": 473 }, { "epoch": 1.2439184746877054, "high_lr": 0.0007510526315789474, "low_lr": 1.502105263157895e-05, "step": 473 }, { "epoch": 1.2439184746877054, "high_lr": 0.0007510526315789474, "low_lr": 1.502105263157895e-05, "step": 473 }, { "epoch": 1.2439184746877054, "high_lr": 0.0007510526315789474, "low_lr": 1.502105263157895e-05, "step": 473 }, { "epoch": 1.2439184746877054, "high_lr": 0.0007510526315789474, "low_lr": 1.502105263157895e-05, "step": 473 }, { "epoch": 1.2439184746877054, "high_lr": 0.0007510526315789474, "low_lr": 1.502105263157895e-05, "step": 473 }, { "epoch": 1.2439184746877054, "high_lr": 0.0007510526315789474, "low_lr": 1.502105263157895e-05, "step": 473 }, { "epoch": 1.2465483234714003, "grad_norm": 1.0175728797912598, "learning_rate": 0.0007505263157894737, "loss": 1.4836, "step": 474 }, { "epoch": 1.2465483234714003, "high_lr": 0.0007505263157894737, "low_lr": 1.5010526315789476e-05, "step": 474 }, { "epoch": 1.2465483234714003, "high_lr": 0.0007505263157894737, "low_lr": 1.5010526315789476e-05, "step": 474 }, { "epoch": 1.2465483234714003, "high_lr": 0.0007505263157894737, "low_lr": 1.5010526315789476e-05, "step": 474 }, { "epoch": 1.2465483234714003, "high_lr": 0.0007505263157894737, "low_lr": 1.5010526315789476e-05, "step": 474 }, { "epoch": 1.2465483234714003, "high_lr": 0.0007505263157894737, "low_lr": 1.5010526315789476e-05, "step": 474 }, { "epoch": 1.2465483234714003, "high_lr": 0.0007505263157894737, "low_lr": 1.5010526315789476e-05, "step": 474 }, { "epoch": 1.2465483234714003, "high_lr": 0.0007505263157894737, "low_lr": 1.5010526315789476e-05, "step": 474 }, { "epoch": 1.2465483234714003, "high_lr": 0.0007505263157894737, "low_lr": 1.5010526315789476e-05, "step": 474 }, { "epoch": 1.2491781722550954, "grad_norm": 0.9616714119911194, "learning_rate": 0.00075, "loss": 1.4358, "step": 475 }, { "epoch": 1.2491781722550954, "high_lr": 0.00075, "low_lr": 1.5000000000000002e-05, "step": 475 }, { "epoch": 1.2491781722550954, "high_lr": 0.00075, "low_lr": 1.5000000000000002e-05, "step": 475 }, { "epoch": 1.2491781722550954, "high_lr": 0.00075, "low_lr": 1.5000000000000002e-05, "step": 475 }, { "epoch": 1.2491781722550954, "high_lr": 0.00075, "low_lr": 1.5000000000000002e-05, "step": 475 }, { "epoch": 1.2491781722550954, "high_lr": 0.00075, "low_lr": 1.5000000000000002e-05, "step": 475 }, { "epoch": 1.2491781722550954, "high_lr": 0.00075, "low_lr": 1.5000000000000002e-05, "step": 475 }, { "epoch": 1.2491781722550954, "high_lr": 0.00075, "low_lr": 1.5000000000000002e-05, "step": 475 }, { "epoch": 1.2491781722550954, "high_lr": 0.00075, "low_lr": 1.5000000000000002e-05, "step": 475 }, { "epoch": 1.2518080210387903, "grad_norm": 1.003388524055481, "learning_rate": 0.0007494736842105263, "loss": 1.4858, "step": 476 }, { "epoch": 1.2518080210387903, "high_lr": 0.0007494736842105263, "low_lr": 1.4989473684210527e-05, "step": 476 }, { "epoch": 1.2518080210387903, "high_lr": 0.0007494736842105263, "low_lr": 1.4989473684210527e-05, "step": 476 }, { "epoch": 1.2518080210387903, "high_lr": 0.0007494736842105263, "low_lr": 1.4989473684210527e-05, "step": 476 }, { "epoch": 1.2518080210387903, "high_lr": 0.0007494736842105263, "low_lr": 1.4989473684210527e-05, "step": 476 }, { "epoch": 1.2518080210387903, "high_lr": 0.0007494736842105263, "low_lr": 1.4989473684210527e-05, "step": 476 }, { "epoch": 1.2518080210387903, "high_lr": 0.0007494736842105263, "low_lr": 1.4989473684210527e-05, "step": 476 }, { "epoch": 1.2518080210387903, "high_lr": 0.0007494736842105263, "low_lr": 1.4989473684210527e-05, "step": 476 }, { "epoch": 1.2518080210387903, "high_lr": 0.0007494736842105263, "low_lr": 1.4989473684210527e-05, "step": 476 }, { "epoch": 1.2544378698224852, "grad_norm": 1.0780497789382935, "learning_rate": 0.0007489473684210526, "loss": 1.5253, "step": 477 }, { "epoch": 1.2544378698224852, "high_lr": 0.0007489473684210526, "low_lr": 1.4978947368421053e-05, "step": 477 }, { "epoch": 1.2544378698224852, "high_lr": 0.0007489473684210526, "low_lr": 1.4978947368421053e-05, "step": 477 }, { "epoch": 1.2544378698224852, "high_lr": 0.0007489473684210526, "low_lr": 1.4978947368421053e-05, "step": 477 }, { "epoch": 1.2544378698224852, "high_lr": 0.0007489473684210526, "low_lr": 1.4978947368421053e-05, "step": 477 }, { "epoch": 1.2544378698224852, "high_lr": 0.0007489473684210526, "low_lr": 1.4978947368421053e-05, "step": 477 }, { "epoch": 1.2544378698224852, "high_lr": 0.0007489473684210526, "low_lr": 1.4978947368421053e-05, "step": 477 }, { "epoch": 1.2544378698224852, "high_lr": 0.0007489473684210526, "low_lr": 1.4978947368421053e-05, "step": 477 }, { "epoch": 1.2544378698224852, "high_lr": 0.0007489473684210526, "low_lr": 1.4978947368421053e-05, "step": 477 }, { "epoch": 1.25706771860618, "grad_norm": 1.0622295141220093, "learning_rate": 0.000748421052631579, "loss": 1.5205, "step": 478 }, { "epoch": 1.25706771860618, "high_lr": 0.000748421052631579, "low_lr": 1.4968421052631581e-05, "step": 478 }, { "epoch": 1.25706771860618, "high_lr": 0.000748421052631579, "low_lr": 1.4968421052631581e-05, "step": 478 }, { "epoch": 1.25706771860618, "high_lr": 0.000748421052631579, "low_lr": 1.4968421052631581e-05, "step": 478 }, { "epoch": 1.25706771860618, "high_lr": 0.000748421052631579, "low_lr": 1.4968421052631581e-05, "step": 478 }, { "epoch": 1.25706771860618, "high_lr": 0.000748421052631579, "low_lr": 1.4968421052631581e-05, "step": 478 }, { "epoch": 1.25706771860618, "high_lr": 0.000748421052631579, "low_lr": 1.4968421052631581e-05, "step": 478 }, { "epoch": 1.25706771860618, "high_lr": 0.000748421052631579, "low_lr": 1.4968421052631581e-05, "step": 478 }, { "epoch": 1.25706771860618, "high_lr": 0.000748421052631579, "low_lr": 1.4968421052631581e-05, "step": 478 }, { "epoch": 1.2596975673898752, "grad_norm": 1.078023910522461, "learning_rate": 0.0007478947368421053, "loss": 1.5447, "step": 479 }, { "epoch": 1.2596975673898752, "high_lr": 0.0007478947368421053, "low_lr": 1.4957894736842107e-05, "step": 479 }, { "epoch": 1.2596975673898752, "high_lr": 0.0007478947368421053, "low_lr": 1.4957894736842107e-05, "step": 479 }, { "epoch": 1.2596975673898752, "high_lr": 0.0007478947368421053, "low_lr": 1.4957894736842107e-05, "step": 479 }, { "epoch": 1.2596975673898752, "high_lr": 0.0007478947368421053, "low_lr": 1.4957894736842107e-05, "step": 479 }, { "epoch": 1.2596975673898752, "high_lr": 0.0007478947368421053, "low_lr": 1.4957894736842107e-05, "step": 479 }, { "epoch": 1.2596975673898752, "high_lr": 0.0007478947368421053, "low_lr": 1.4957894736842107e-05, "step": 479 }, { "epoch": 1.2596975673898752, "high_lr": 0.0007478947368421053, "low_lr": 1.4957894736842107e-05, "step": 479 }, { "epoch": 1.2596975673898752, "high_lr": 0.0007478947368421053, "low_lr": 1.4957894736842107e-05, "step": 479 }, { "epoch": 1.26232741617357, "grad_norm": 1.0321089029312134, "learning_rate": 0.0007473684210526316, "loss": 1.4966, "step": 480 }, { "epoch": 1.26232741617357, "high_lr": 0.0007473684210526316, "low_lr": 1.4947368421052632e-05, "step": 480 }, { "epoch": 1.26232741617357, "high_lr": 0.0007473684210526316, "low_lr": 1.4947368421052632e-05, "step": 480 }, { "epoch": 1.26232741617357, "high_lr": 0.0007473684210526316, "low_lr": 1.4947368421052632e-05, "step": 480 }, { "epoch": 1.26232741617357, "high_lr": 0.0007473684210526316, "low_lr": 1.4947368421052632e-05, "step": 480 }, { "epoch": 1.26232741617357, "high_lr": 0.0007473684210526316, "low_lr": 1.4947368421052632e-05, "step": 480 }, { "epoch": 1.26232741617357, "high_lr": 0.0007473684210526316, "low_lr": 1.4947368421052632e-05, "step": 480 }, { "epoch": 1.26232741617357, "high_lr": 0.0007473684210526316, "low_lr": 1.4947368421052632e-05, "step": 480 }, { "epoch": 1.26232741617357, "high_lr": 0.0007473684210526316, "low_lr": 1.4947368421052632e-05, "step": 480 }, { "epoch": 1.264957264957265, "grad_norm": 1.0012013912200928, "learning_rate": 0.0007468421052631578, "loss": 1.4845, "step": 481 }, { "epoch": 1.264957264957265, "high_lr": 0.0007468421052631578, "low_lr": 1.4936842105263158e-05, "step": 481 }, { "epoch": 1.264957264957265, "high_lr": 0.0007468421052631578, "low_lr": 1.4936842105263158e-05, "step": 481 }, { "epoch": 1.264957264957265, "high_lr": 0.0007468421052631578, "low_lr": 1.4936842105263158e-05, "step": 481 }, { "epoch": 1.264957264957265, "high_lr": 0.0007468421052631578, "low_lr": 1.4936842105263158e-05, "step": 481 }, { "epoch": 1.264957264957265, "high_lr": 0.0007468421052631578, "low_lr": 1.4936842105263158e-05, "step": 481 }, { "epoch": 1.264957264957265, "high_lr": 0.0007468421052631578, "low_lr": 1.4936842105263158e-05, "step": 481 }, { "epoch": 1.264957264957265, "high_lr": 0.0007468421052631578, "low_lr": 1.4936842105263158e-05, "step": 481 }, { "epoch": 1.264957264957265, "high_lr": 0.0007468421052631578, "low_lr": 1.4936842105263158e-05, "step": 481 }, { "epoch": 1.26758711374096, "grad_norm": 1.076859712600708, "learning_rate": 0.0007463157894736842, "loss": 1.5144, "step": 482 }, { "epoch": 1.26758711374096, "high_lr": 0.0007463157894736842, "low_lr": 1.4926315789473686e-05, "step": 482 }, { "epoch": 1.26758711374096, "high_lr": 0.0007463157894736842, "low_lr": 1.4926315789473686e-05, "step": 482 }, { "epoch": 1.26758711374096, "high_lr": 0.0007463157894736842, "low_lr": 1.4926315789473686e-05, "step": 482 }, { "epoch": 1.26758711374096, "high_lr": 0.0007463157894736842, "low_lr": 1.4926315789473686e-05, "step": 482 }, { "epoch": 1.26758711374096, "high_lr": 0.0007463157894736842, "low_lr": 1.4926315789473686e-05, "step": 482 }, { "epoch": 1.26758711374096, "high_lr": 0.0007463157894736842, "low_lr": 1.4926315789473686e-05, "step": 482 }, { "epoch": 1.26758711374096, "high_lr": 0.0007463157894736842, "low_lr": 1.4926315789473686e-05, "step": 482 }, { "epoch": 1.26758711374096, "high_lr": 0.0007463157894736842, "low_lr": 1.4926315789473686e-05, "step": 482 }, { "epoch": 1.2702169625246549, "grad_norm": 1.1330339908599854, "learning_rate": 0.0007457894736842105, "loss": 1.524, "step": 483 }, { "epoch": 1.2702169625246549, "high_lr": 0.0007457894736842105, "low_lr": 1.4915789473684213e-05, "step": 483 }, { "epoch": 1.2702169625246549, "high_lr": 0.0007457894736842105, "low_lr": 1.4915789473684213e-05, "step": 483 }, { "epoch": 1.2702169625246549, "high_lr": 0.0007457894736842105, "low_lr": 1.4915789473684213e-05, "step": 483 }, { "epoch": 1.2702169625246549, "high_lr": 0.0007457894736842105, "low_lr": 1.4915789473684213e-05, "step": 483 }, { "epoch": 1.2702169625246549, "high_lr": 0.0007457894736842105, "low_lr": 1.4915789473684213e-05, "step": 483 }, { "epoch": 1.2702169625246549, "high_lr": 0.0007457894736842105, "low_lr": 1.4915789473684213e-05, "step": 483 }, { "epoch": 1.2702169625246549, "high_lr": 0.0007457894736842105, "low_lr": 1.4915789473684213e-05, "step": 483 }, { "epoch": 1.2702169625246549, "high_lr": 0.0007457894736842105, "low_lr": 1.4915789473684213e-05, "step": 483 }, { "epoch": 1.2728468113083498, "grad_norm": 1.0957008600234985, "learning_rate": 0.0007452631578947369, "loss": 1.4832, "step": 484 }, { "epoch": 1.2728468113083498, "high_lr": 0.0007452631578947369, "low_lr": 1.4905263157894739e-05, "step": 484 }, { "epoch": 1.2728468113083498, "high_lr": 0.0007452631578947369, "low_lr": 1.4905263157894739e-05, "step": 484 }, { "epoch": 1.2728468113083498, "high_lr": 0.0007452631578947369, "low_lr": 1.4905263157894739e-05, "step": 484 }, { "epoch": 1.2728468113083498, "high_lr": 0.0007452631578947369, "low_lr": 1.4905263157894739e-05, "step": 484 }, { "epoch": 1.2728468113083498, "high_lr": 0.0007452631578947369, "low_lr": 1.4905263157894739e-05, "step": 484 }, { "epoch": 1.2728468113083498, "high_lr": 0.0007452631578947369, "low_lr": 1.4905263157894739e-05, "step": 484 }, { "epoch": 1.2728468113083498, "high_lr": 0.0007452631578947369, "low_lr": 1.4905263157894739e-05, "step": 484 }, { "epoch": 1.2728468113083498, "high_lr": 0.0007452631578947369, "low_lr": 1.4905263157894739e-05, "step": 484 }, { "epoch": 1.2754766600920446, "grad_norm": 0.9506247043609619, "learning_rate": 0.0007447368421052632, "loss": 1.4619, "step": 485 }, { "epoch": 1.2754766600920446, "high_lr": 0.0007447368421052632, "low_lr": 1.4894736842105264e-05, "step": 485 }, { "epoch": 1.2754766600920446, "high_lr": 0.0007447368421052632, "low_lr": 1.4894736842105264e-05, "step": 485 }, { "epoch": 1.2754766600920446, "high_lr": 0.0007447368421052632, "low_lr": 1.4894736842105264e-05, "step": 485 }, { "epoch": 1.2754766600920446, "high_lr": 0.0007447368421052632, "low_lr": 1.4894736842105264e-05, "step": 485 }, { "epoch": 1.2754766600920446, "high_lr": 0.0007447368421052632, "low_lr": 1.4894736842105264e-05, "step": 485 }, { "epoch": 1.2754766600920446, "high_lr": 0.0007447368421052632, "low_lr": 1.4894736842105264e-05, "step": 485 }, { "epoch": 1.2754766600920446, "high_lr": 0.0007447368421052632, "low_lr": 1.4894736842105264e-05, "step": 485 }, { "epoch": 1.2754766600920446, "high_lr": 0.0007447368421052632, "low_lr": 1.4894736842105264e-05, "step": 485 }, { "epoch": 1.2781065088757395, "grad_norm": 1.2661606073379517, "learning_rate": 0.0007442105263157895, "loss": 1.5559, "step": 486 }, { "epoch": 1.2781065088757395, "high_lr": 0.0007442105263157895, "low_lr": 1.488421052631579e-05, "step": 486 }, { "epoch": 1.2781065088757395, "high_lr": 0.0007442105263157895, "low_lr": 1.488421052631579e-05, "step": 486 }, { "epoch": 1.2781065088757395, "high_lr": 0.0007442105263157895, "low_lr": 1.488421052631579e-05, "step": 486 }, { "epoch": 1.2781065088757395, "high_lr": 0.0007442105263157895, "low_lr": 1.488421052631579e-05, "step": 486 }, { "epoch": 1.2781065088757395, "high_lr": 0.0007442105263157895, "low_lr": 1.488421052631579e-05, "step": 486 }, { "epoch": 1.2781065088757395, "high_lr": 0.0007442105263157895, "low_lr": 1.488421052631579e-05, "step": 486 }, { "epoch": 1.2781065088757395, "high_lr": 0.0007442105263157895, "low_lr": 1.488421052631579e-05, "step": 486 }, { "epoch": 1.2781065088757395, "high_lr": 0.0007442105263157895, "low_lr": 1.488421052631579e-05, "step": 486 }, { "epoch": 1.2807363576594346, "grad_norm": 1.0828596353530884, "learning_rate": 0.0007436842105263159, "loss": 1.5071, "step": 487 }, { "epoch": 1.2807363576594346, "high_lr": 0.0007436842105263159, "low_lr": 1.4873684210526318e-05, "step": 487 }, { "epoch": 1.2807363576594346, "high_lr": 0.0007436842105263159, "low_lr": 1.4873684210526318e-05, "step": 487 }, { "epoch": 1.2807363576594346, "high_lr": 0.0007436842105263159, "low_lr": 1.4873684210526318e-05, "step": 487 }, { "epoch": 1.2807363576594346, "high_lr": 0.0007436842105263159, "low_lr": 1.4873684210526318e-05, "step": 487 }, { "epoch": 1.2807363576594346, "high_lr": 0.0007436842105263159, "low_lr": 1.4873684210526318e-05, "step": 487 }, { "epoch": 1.2807363576594346, "high_lr": 0.0007436842105263159, "low_lr": 1.4873684210526318e-05, "step": 487 }, { "epoch": 1.2807363576594346, "high_lr": 0.0007436842105263159, "low_lr": 1.4873684210526318e-05, "step": 487 }, { "epoch": 1.2807363576594346, "high_lr": 0.0007436842105263159, "low_lr": 1.4873684210526318e-05, "step": 487 }, { "epoch": 1.2833662064431295, "grad_norm": 1.1039183139801025, "learning_rate": 0.0007431578947368422, "loss": 1.5014, "step": 488 }, { "epoch": 1.2833662064431295, "high_lr": 0.0007431578947368422, "low_lr": 1.4863157894736844e-05, "step": 488 }, { "epoch": 1.2833662064431295, "high_lr": 0.0007431578947368422, "low_lr": 1.4863157894736844e-05, "step": 488 }, { "epoch": 1.2833662064431295, "high_lr": 0.0007431578947368422, "low_lr": 1.4863157894736844e-05, "step": 488 }, { "epoch": 1.2833662064431295, "high_lr": 0.0007431578947368422, "low_lr": 1.4863157894736844e-05, "step": 488 }, { "epoch": 1.2833662064431295, "high_lr": 0.0007431578947368422, "low_lr": 1.4863157894736844e-05, "step": 488 }, { "epoch": 1.2833662064431295, "high_lr": 0.0007431578947368422, "low_lr": 1.4863157894736844e-05, "step": 488 }, { "epoch": 1.2833662064431295, "high_lr": 0.0007431578947368422, "low_lr": 1.4863157894736844e-05, "step": 488 }, { "epoch": 1.2833662064431295, "high_lr": 0.0007431578947368422, "low_lr": 1.4863157894736844e-05, "step": 488 }, { "epoch": 1.2859960552268244, "grad_norm": 1.0724716186523438, "learning_rate": 0.0007426315789473685, "loss": 1.4949, "step": 489 }, { "epoch": 1.2859960552268244, "high_lr": 0.0007426315789473685, "low_lr": 1.4852631578947369e-05, "step": 489 }, { "epoch": 1.2859960552268244, "high_lr": 0.0007426315789473685, "low_lr": 1.4852631578947369e-05, "step": 489 }, { "epoch": 1.2859960552268244, "high_lr": 0.0007426315789473685, "low_lr": 1.4852631578947369e-05, "step": 489 }, { "epoch": 1.2859960552268244, "high_lr": 0.0007426315789473685, "low_lr": 1.4852631578947369e-05, "step": 489 }, { "epoch": 1.2859960552268244, "high_lr": 0.0007426315789473685, "low_lr": 1.4852631578947369e-05, "step": 489 }, { "epoch": 1.2859960552268244, "high_lr": 0.0007426315789473685, "low_lr": 1.4852631578947369e-05, "step": 489 }, { "epoch": 1.2859960552268244, "high_lr": 0.0007426315789473685, "low_lr": 1.4852631578947369e-05, "step": 489 }, { "epoch": 1.2859960552268244, "high_lr": 0.0007426315789473685, "low_lr": 1.4852631578947369e-05, "step": 489 }, { "epoch": 1.2886259040105195, "grad_norm": 1.0938204526901245, "learning_rate": 0.0007421052631578947, "loss": 1.565, "step": 490 }, { "epoch": 1.2886259040105195, "high_lr": 0.0007421052631578947, "low_lr": 1.4842105263157895e-05, "step": 490 }, { "epoch": 1.2886259040105195, "high_lr": 0.0007421052631578947, "low_lr": 1.4842105263157895e-05, "step": 490 }, { "epoch": 1.2886259040105195, "high_lr": 0.0007421052631578947, "low_lr": 1.4842105263157895e-05, "step": 490 }, { "epoch": 1.2886259040105195, "high_lr": 0.0007421052631578947, "low_lr": 1.4842105263157895e-05, "step": 490 }, { "epoch": 1.2886259040105195, "high_lr": 0.0007421052631578947, "low_lr": 1.4842105263157895e-05, "step": 490 }, { "epoch": 1.2886259040105195, "high_lr": 0.0007421052631578947, "low_lr": 1.4842105263157895e-05, "step": 490 }, { "epoch": 1.2886259040105195, "high_lr": 0.0007421052631578947, "low_lr": 1.4842105263157895e-05, "step": 490 }, { "epoch": 1.2886259040105195, "high_lr": 0.0007421052631578947, "low_lr": 1.4842105263157895e-05, "step": 490 }, { "epoch": 1.2912557527942143, "grad_norm": 1.025592565536499, "learning_rate": 0.000741578947368421, "loss": 1.4964, "step": 491 }, { "epoch": 1.2912557527942143, "high_lr": 0.000741578947368421, "low_lr": 1.4831578947368422e-05, "step": 491 }, { "epoch": 1.2912557527942143, "high_lr": 0.000741578947368421, "low_lr": 1.4831578947368422e-05, "step": 491 }, { "epoch": 1.2912557527942143, "high_lr": 0.000741578947368421, "low_lr": 1.4831578947368422e-05, "step": 491 }, { "epoch": 1.2912557527942143, "high_lr": 0.000741578947368421, "low_lr": 1.4831578947368422e-05, "step": 491 }, { "epoch": 1.2912557527942143, "high_lr": 0.000741578947368421, "low_lr": 1.4831578947368422e-05, "step": 491 }, { "epoch": 1.2912557527942143, "high_lr": 0.000741578947368421, "low_lr": 1.4831578947368422e-05, "step": 491 }, { "epoch": 1.2912557527942143, "high_lr": 0.000741578947368421, "low_lr": 1.4831578947368422e-05, "step": 491 }, { "epoch": 1.2912557527942143, "high_lr": 0.000741578947368421, "low_lr": 1.4831578947368422e-05, "step": 491 }, { "epoch": 1.2938856015779092, "grad_norm": 1.0463948249816895, "learning_rate": 0.0007410526315789474, "loss": 1.5096, "step": 492 }, { "epoch": 1.2938856015779092, "high_lr": 0.0007410526315789474, "low_lr": 1.482105263157895e-05, "step": 492 }, { "epoch": 1.2938856015779092, "high_lr": 0.0007410526315789474, "low_lr": 1.482105263157895e-05, "step": 492 }, { "epoch": 1.2938856015779092, "high_lr": 0.0007410526315789474, "low_lr": 1.482105263157895e-05, "step": 492 }, { "epoch": 1.2938856015779092, "high_lr": 0.0007410526315789474, "low_lr": 1.482105263157895e-05, "step": 492 }, { "epoch": 1.2938856015779092, "high_lr": 0.0007410526315789474, "low_lr": 1.482105263157895e-05, "step": 492 }, { "epoch": 1.2938856015779092, "high_lr": 0.0007410526315789474, "low_lr": 1.482105263157895e-05, "step": 492 }, { "epoch": 1.2938856015779092, "high_lr": 0.0007410526315789474, "low_lr": 1.482105263157895e-05, "step": 492 }, { "epoch": 1.2938856015779092, "high_lr": 0.0007410526315789474, "low_lr": 1.482105263157895e-05, "step": 492 }, { "epoch": 1.2965154503616043, "grad_norm": 1.0823513269424438, "learning_rate": 0.0007405263157894737, "loss": 1.5304, "step": 493 }, { "epoch": 1.2965154503616043, "high_lr": 0.0007405263157894737, "low_lr": 1.4810526315789476e-05, "step": 493 }, { "epoch": 1.2965154503616043, "high_lr": 0.0007405263157894737, "low_lr": 1.4810526315789476e-05, "step": 493 }, { "epoch": 1.2965154503616043, "high_lr": 0.0007405263157894737, "low_lr": 1.4810526315789476e-05, "step": 493 }, { "epoch": 1.2965154503616043, "high_lr": 0.0007405263157894737, "low_lr": 1.4810526315789476e-05, "step": 493 }, { "epoch": 1.2965154503616043, "high_lr": 0.0007405263157894737, "low_lr": 1.4810526315789476e-05, "step": 493 }, { "epoch": 1.2965154503616043, "high_lr": 0.0007405263157894737, "low_lr": 1.4810526315789476e-05, "step": 493 }, { "epoch": 1.2965154503616043, "high_lr": 0.0007405263157894737, "low_lr": 1.4810526315789476e-05, "step": 493 }, { "epoch": 1.2965154503616043, "high_lr": 0.0007405263157894737, "low_lr": 1.4810526315789476e-05, "step": 493 }, { "epoch": 1.2991452991452992, "grad_norm": 1.0551989078521729, "learning_rate": 0.00074, "loss": 1.5116, "step": 494 }, { "epoch": 1.2991452991452992, "high_lr": 0.00074, "low_lr": 1.48e-05, "step": 494 }, { "epoch": 1.2991452991452992, "high_lr": 0.00074, "low_lr": 1.48e-05, "step": 494 }, { "epoch": 1.2991452991452992, "high_lr": 0.00074, "low_lr": 1.48e-05, "step": 494 }, { "epoch": 1.2991452991452992, "high_lr": 0.00074, "low_lr": 1.48e-05, "step": 494 }, { "epoch": 1.2991452991452992, "high_lr": 0.00074, "low_lr": 1.48e-05, "step": 494 }, { "epoch": 1.2991452991452992, "high_lr": 0.00074, "low_lr": 1.48e-05, "step": 494 }, { "epoch": 1.2991452991452992, "high_lr": 0.00074, "low_lr": 1.48e-05, "step": 494 }, { "epoch": 1.2991452991452992, "high_lr": 0.00074, "low_lr": 1.48e-05, "step": 494 }, { "epoch": 1.301775147928994, "grad_norm": 1.1153234243392944, "learning_rate": 0.0007394736842105263, "loss": 1.5439, "step": 495 }, { "epoch": 1.301775147928994, "high_lr": 0.0007394736842105263, "low_lr": 1.4789473684210527e-05, "step": 495 }, { "epoch": 1.301775147928994, "high_lr": 0.0007394736842105263, "low_lr": 1.4789473684210527e-05, "step": 495 }, { "epoch": 1.301775147928994, "high_lr": 0.0007394736842105263, "low_lr": 1.4789473684210527e-05, "step": 495 }, { "epoch": 1.301775147928994, "high_lr": 0.0007394736842105263, "low_lr": 1.4789473684210527e-05, "step": 495 }, { "epoch": 1.301775147928994, "high_lr": 0.0007394736842105263, "low_lr": 1.4789473684210527e-05, "step": 495 }, { "epoch": 1.301775147928994, "high_lr": 0.0007394736842105263, "low_lr": 1.4789473684210527e-05, "step": 495 }, { "epoch": 1.301775147928994, "high_lr": 0.0007394736842105263, "low_lr": 1.4789473684210527e-05, "step": 495 }, { "epoch": 1.301775147928994, "high_lr": 0.0007394736842105263, "low_lr": 1.4789473684210527e-05, "step": 495 }, { "epoch": 1.304404996712689, "grad_norm": 0.9963828921318054, "learning_rate": 0.0007389473684210527, "loss": 1.4877, "step": 496 }, { "epoch": 1.304404996712689, "high_lr": 0.0007389473684210527, "low_lr": 1.4778947368421055e-05, "step": 496 }, { "epoch": 1.304404996712689, "high_lr": 0.0007389473684210527, "low_lr": 1.4778947368421055e-05, "step": 496 }, { "epoch": 1.304404996712689, "high_lr": 0.0007389473684210527, "low_lr": 1.4778947368421055e-05, "step": 496 }, { "epoch": 1.304404996712689, "high_lr": 0.0007389473684210527, "low_lr": 1.4778947368421055e-05, "step": 496 }, { "epoch": 1.304404996712689, "high_lr": 0.0007389473684210527, "low_lr": 1.4778947368421055e-05, "step": 496 }, { "epoch": 1.304404996712689, "high_lr": 0.0007389473684210527, "low_lr": 1.4778947368421055e-05, "step": 496 }, { "epoch": 1.304404996712689, "high_lr": 0.0007389473684210527, "low_lr": 1.4778947368421055e-05, "step": 496 }, { "epoch": 1.304404996712689, "high_lr": 0.0007389473684210527, "low_lr": 1.4778947368421055e-05, "step": 496 }, { "epoch": 1.3070348454963838, "grad_norm": 1.0606313943862915, "learning_rate": 0.000738421052631579, "loss": 1.485, "step": 497 }, { "epoch": 1.3070348454963838, "high_lr": 0.000738421052631579, "low_lr": 1.4768421052631581e-05, "step": 497 }, { "epoch": 1.3070348454963838, "high_lr": 0.000738421052631579, "low_lr": 1.4768421052631581e-05, "step": 497 }, { "epoch": 1.3070348454963838, "high_lr": 0.000738421052631579, "low_lr": 1.4768421052631581e-05, "step": 497 }, { "epoch": 1.3070348454963838, "high_lr": 0.000738421052631579, "low_lr": 1.4768421052631581e-05, "step": 497 }, { "epoch": 1.3070348454963838, "high_lr": 0.000738421052631579, "low_lr": 1.4768421052631581e-05, "step": 497 }, { "epoch": 1.3070348454963838, "high_lr": 0.000738421052631579, "low_lr": 1.4768421052631581e-05, "step": 497 }, { "epoch": 1.3070348454963838, "high_lr": 0.000738421052631579, "low_lr": 1.4768421052631581e-05, "step": 497 }, { "epoch": 1.3070348454963838, "high_lr": 0.000738421052631579, "low_lr": 1.4768421052631581e-05, "step": 497 }, { "epoch": 1.309664694280079, "grad_norm": 1.0575344562530518, "learning_rate": 0.0007378947368421052, "loss": 1.4944, "step": 498 }, { "epoch": 1.309664694280079, "high_lr": 0.0007378947368421052, "low_lr": 1.4757894736842106e-05, "step": 498 }, { "epoch": 1.309664694280079, "high_lr": 0.0007378947368421052, "low_lr": 1.4757894736842106e-05, "step": 498 }, { "epoch": 1.309664694280079, "high_lr": 0.0007378947368421052, "low_lr": 1.4757894736842106e-05, "step": 498 }, { "epoch": 1.309664694280079, "high_lr": 0.0007378947368421052, "low_lr": 1.4757894736842106e-05, "step": 498 }, { "epoch": 1.309664694280079, "high_lr": 0.0007378947368421052, "low_lr": 1.4757894736842106e-05, "step": 498 }, { "epoch": 1.309664694280079, "high_lr": 0.0007378947368421052, "low_lr": 1.4757894736842106e-05, "step": 498 }, { "epoch": 1.309664694280079, "high_lr": 0.0007378947368421052, "low_lr": 1.4757894736842106e-05, "step": 498 }, { "epoch": 1.309664694280079, "high_lr": 0.0007378947368421052, "low_lr": 1.4757894736842106e-05, "step": 498 }, { "epoch": 1.3122945430637738, "grad_norm": 1.0020501613616943, "learning_rate": 0.0007373684210526315, "loss": 1.4982, "step": 499 }, { "epoch": 1.3122945430637738, "high_lr": 0.0007373684210526315, "low_lr": 1.4747368421052632e-05, "step": 499 }, { "epoch": 1.3122945430637738, "high_lr": 0.0007373684210526315, "low_lr": 1.4747368421052632e-05, "step": 499 }, { "epoch": 1.3122945430637738, "high_lr": 0.0007373684210526315, "low_lr": 1.4747368421052632e-05, "step": 499 }, { "epoch": 1.3122945430637738, "high_lr": 0.0007373684210526315, "low_lr": 1.4747368421052632e-05, "step": 499 }, { "epoch": 1.3122945430637738, "high_lr": 0.0007373684210526315, "low_lr": 1.4747368421052632e-05, "step": 499 }, { "epoch": 1.3122945430637738, "high_lr": 0.0007373684210526315, "low_lr": 1.4747368421052632e-05, "step": 499 }, { "epoch": 1.3122945430637738, "high_lr": 0.0007373684210526315, "low_lr": 1.4747368421052632e-05, "step": 499 }, { "epoch": 1.3122945430637738, "high_lr": 0.0007373684210526315, "low_lr": 1.4747368421052632e-05, "step": 499 }, { "epoch": 1.3149243918474687, "grad_norm": 1.1564699411392212, "learning_rate": 0.0007368421052631579, "loss": 1.5925, "step": 500 }, { "epoch": 1.3149243918474687, "high_lr": 0.0007368421052631579, "low_lr": 1.4736842105263159e-05, "step": 500 }, { "epoch": 1.3149243918474687, "high_lr": 0.0007368421052631579, "low_lr": 1.4736842105263159e-05, "step": 500 }, { "epoch": 1.3149243918474687, "high_lr": 0.0007368421052631579, "low_lr": 1.4736842105263159e-05, "step": 500 }, { "epoch": 1.3149243918474687, "high_lr": 0.0007368421052631579, "low_lr": 1.4736842105263159e-05, "step": 500 }, { "epoch": 1.3149243918474687, "high_lr": 0.0007368421052631579, "low_lr": 1.4736842105263159e-05, "step": 500 }, { "epoch": 1.3149243918474687, "high_lr": 0.0007368421052631579, "low_lr": 1.4736842105263159e-05, "step": 500 }, { "epoch": 1.3149243918474687, "high_lr": 0.0007368421052631579, "low_lr": 1.4736842105263159e-05, "step": 500 }, { "epoch": 1.3149243918474687, "high_lr": 0.0007368421052631579, "low_lr": 1.4736842105263159e-05, "step": 500 }, { "epoch": 1.3175542406311638, "grad_norm": 1.0927271842956543, "learning_rate": 0.0007363157894736843, "loss": 1.5029, "step": 501 }, { "epoch": 1.3175542406311638, "high_lr": 0.0007363157894736843, "low_lr": 1.4726315789473687e-05, "step": 501 }, { "epoch": 1.3175542406311638, "high_lr": 0.0007363157894736843, "low_lr": 1.4726315789473687e-05, "step": 501 }, { "epoch": 1.3175542406311638, "high_lr": 0.0007363157894736843, "low_lr": 1.4726315789473687e-05, "step": 501 }, { "epoch": 1.3175542406311638, "high_lr": 0.0007363157894736843, "low_lr": 1.4726315789473687e-05, "step": 501 }, { "epoch": 1.3175542406311638, "high_lr": 0.0007363157894736843, "low_lr": 1.4726315789473687e-05, "step": 501 }, { "epoch": 1.3175542406311638, "high_lr": 0.0007363157894736843, "low_lr": 1.4726315789473687e-05, "step": 501 }, { "epoch": 1.3175542406311638, "high_lr": 0.0007363157894736843, "low_lr": 1.4726315789473687e-05, "step": 501 }, { "epoch": 1.3175542406311638, "high_lr": 0.0007363157894736843, "low_lr": 1.4726315789473687e-05, "step": 501 }, { "epoch": 1.3201840894148587, "grad_norm": 1.0777077674865723, "learning_rate": 0.0007357894736842106, "loss": 1.4987, "step": 502 }, { "epoch": 1.3201840894148587, "high_lr": 0.0007357894736842106, "low_lr": 1.4715789473684213e-05, "step": 502 }, { "epoch": 1.3201840894148587, "high_lr": 0.0007357894736842106, "low_lr": 1.4715789473684213e-05, "step": 502 }, { "epoch": 1.3201840894148587, "high_lr": 0.0007357894736842106, "low_lr": 1.4715789473684213e-05, "step": 502 }, { "epoch": 1.3201840894148587, "high_lr": 0.0007357894736842106, "low_lr": 1.4715789473684213e-05, "step": 502 }, { "epoch": 1.3201840894148587, "high_lr": 0.0007357894736842106, "low_lr": 1.4715789473684213e-05, "step": 502 }, { "epoch": 1.3201840894148587, "high_lr": 0.0007357894736842106, "low_lr": 1.4715789473684213e-05, "step": 502 }, { "epoch": 1.3201840894148587, "high_lr": 0.0007357894736842106, "low_lr": 1.4715789473684213e-05, "step": 502 }, { "epoch": 1.3201840894148587, "high_lr": 0.0007357894736842106, "low_lr": 1.4715789473684213e-05, "step": 502 }, { "epoch": 1.3228139381985535, "grad_norm": 1.0413988828659058, "learning_rate": 0.0007352631578947369, "loss": 1.475, "step": 503 }, { "epoch": 1.3228139381985535, "high_lr": 0.0007352631578947369, "low_lr": 1.4705263157894738e-05, "step": 503 }, { "epoch": 1.3228139381985535, "high_lr": 0.0007352631578947369, "low_lr": 1.4705263157894738e-05, "step": 503 }, { "epoch": 1.3228139381985535, "high_lr": 0.0007352631578947369, "low_lr": 1.4705263157894738e-05, "step": 503 }, { "epoch": 1.3228139381985535, "high_lr": 0.0007352631578947369, "low_lr": 1.4705263157894738e-05, "step": 503 }, { "epoch": 1.3228139381985535, "high_lr": 0.0007352631578947369, "low_lr": 1.4705263157894738e-05, "step": 503 }, { "epoch": 1.3228139381985535, "high_lr": 0.0007352631578947369, "low_lr": 1.4705263157894738e-05, "step": 503 }, { "epoch": 1.3228139381985535, "high_lr": 0.0007352631578947369, "low_lr": 1.4705263157894738e-05, "step": 503 }, { "epoch": 1.3228139381985535, "high_lr": 0.0007352631578947369, "low_lr": 1.4705263157894738e-05, "step": 503 }, { "epoch": 1.3254437869822486, "grad_norm": 1.0489373207092285, "learning_rate": 0.0007347368421052632, "loss": 1.535, "step": 504 }, { "epoch": 1.3254437869822486, "high_lr": 0.0007347368421052632, "low_lr": 1.4694736842105264e-05, "step": 504 }, { "epoch": 1.3254437869822486, "high_lr": 0.0007347368421052632, "low_lr": 1.4694736842105264e-05, "step": 504 }, { "epoch": 1.3254437869822486, "high_lr": 0.0007347368421052632, "low_lr": 1.4694736842105264e-05, "step": 504 }, { "epoch": 1.3254437869822486, "high_lr": 0.0007347368421052632, "low_lr": 1.4694736842105264e-05, "step": 504 }, { "epoch": 1.3254437869822486, "high_lr": 0.0007347368421052632, "low_lr": 1.4694736842105264e-05, "step": 504 }, { "epoch": 1.3254437869822486, "high_lr": 0.0007347368421052632, "low_lr": 1.4694736842105264e-05, "step": 504 }, { "epoch": 1.3254437869822486, "high_lr": 0.0007347368421052632, "low_lr": 1.4694736842105264e-05, "step": 504 }, { "epoch": 1.3254437869822486, "high_lr": 0.0007347368421052632, "low_lr": 1.4694736842105264e-05, "step": 504 }, { "epoch": 1.3280736357659435, "grad_norm": 1.0401055812835693, "learning_rate": 0.0007342105263157895, "loss": 1.4928, "step": 505 }, { "epoch": 1.3280736357659435, "high_lr": 0.0007342105263157895, "low_lr": 1.468421052631579e-05, "step": 505 }, { "epoch": 1.3280736357659435, "high_lr": 0.0007342105263157895, "low_lr": 1.468421052631579e-05, "step": 505 }, { "epoch": 1.3280736357659435, "high_lr": 0.0007342105263157895, "low_lr": 1.468421052631579e-05, "step": 505 }, { "epoch": 1.3280736357659435, "high_lr": 0.0007342105263157895, "low_lr": 1.468421052631579e-05, "step": 505 }, { "epoch": 1.3280736357659435, "high_lr": 0.0007342105263157895, "low_lr": 1.468421052631579e-05, "step": 505 }, { "epoch": 1.3280736357659435, "high_lr": 0.0007342105263157895, "low_lr": 1.468421052631579e-05, "step": 505 }, { "epoch": 1.3280736357659435, "high_lr": 0.0007342105263157895, "low_lr": 1.468421052631579e-05, "step": 505 }, { "epoch": 1.3280736357659435, "high_lr": 0.0007342105263157895, "low_lr": 1.468421052631579e-05, "step": 505 }, { "epoch": 1.3307034845496384, "grad_norm": 1.0362565517425537, "learning_rate": 0.0007336842105263159, "loss": 1.4942, "step": 506 }, { "epoch": 1.3307034845496384, "high_lr": 0.0007336842105263159, "low_lr": 1.4673684210526318e-05, "step": 506 }, { "epoch": 1.3307034845496384, "high_lr": 0.0007336842105263159, "low_lr": 1.4673684210526318e-05, "step": 506 }, { "epoch": 1.3307034845496384, "high_lr": 0.0007336842105263159, "low_lr": 1.4673684210526318e-05, "step": 506 }, { "epoch": 1.3307034845496384, "high_lr": 0.0007336842105263159, "low_lr": 1.4673684210526318e-05, "step": 506 }, { "epoch": 1.3307034845496384, "high_lr": 0.0007336842105263159, "low_lr": 1.4673684210526318e-05, "step": 506 }, { "epoch": 1.3307034845496384, "high_lr": 0.0007336842105263159, "low_lr": 1.4673684210526318e-05, "step": 506 }, { "epoch": 1.3307034845496384, "high_lr": 0.0007336842105263159, "low_lr": 1.4673684210526318e-05, "step": 506 }, { "epoch": 1.3307034845496384, "high_lr": 0.0007336842105263159, "low_lr": 1.4673684210526318e-05, "step": 506 }, { "epoch": 1.3333333333333333, "grad_norm": 0.9949714541435242, "learning_rate": 0.0007331578947368421, "loss": 1.4918, "step": 507 }, { "epoch": 1.3333333333333333, "high_lr": 0.0007331578947368421, "low_lr": 1.4663157894736843e-05, "step": 507 }, { "epoch": 1.3333333333333333, "high_lr": 0.0007331578947368421, "low_lr": 1.4663157894736843e-05, "step": 507 }, { "epoch": 1.3333333333333333, "high_lr": 0.0007331578947368421, "low_lr": 1.4663157894736843e-05, "step": 507 }, { "epoch": 1.3333333333333333, "high_lr": 0.0007331578947368421, "low_lr": 1.4663157894736843e-05, "step": 507 }, { "epoch": 1.3333333333333333, "high_lr": 0.0007331578947368421, "low_lr": 1.4663157894736843e-05, "step": 507 }, { "epoch": 1.3333333333333333, "high_lr": 0.0007331578947368421, "low_lr": 1.4663157894736843e-05, "step": 507 }, { "epoch": 1.3333333333333333, "high_lr": 0.0007331578947368421, "low_lr": 1.4663157894736843e-05, "step": 507 }, { "epoch": 1.3333333333333333, "high_lr": 0.0007331578947368421, "low_lr": 1.4663157894736843e-05, "step": 507 }, { "epoch": 1.3359631821170281, "grad_norm": 1.072759985923767, "learning_rate": 0.0007326315789473684, "loss": 1.5207, "step": 508 }, { "epoch": 1.3359631821170281, "high_lr": 0.0007326315789473684, "low_lr": 1.465263157894737e-05, "step": 508 }, { "epoch": 1.3359631821170281, "high_lr": 0.0007326315789473684, "low_lr": 1.465263157894737e-05, "step": 508 }, { "epoch": 1.3359631821170281, "high_lr": 0.0007326315789473684, "low_lr": 1.465263157894737e-05, "step": 508 }, { "epoch": 1.3359631821170281, "high_lr": 0.0007326315789473684, "low_lr": 1.465263157894737e-05, "step": 508 }, { "epoch": 1.3359631821170281, "high_lr": 0.0007326315789473684, "low_lr": 1.465263157894737e-05, "step": 508 }, { "epoch": 1.3359631821170281, "high_lr": 0.0007326315789473684, "low_lr": 1.465263157894737e-05, "step": 508 }, { "epoch": 1.3359631821170281, "high_lr": 0.0007326315789473684, "low_lr": 1.465263157894737e-05, "step": 508 }, { "epoch": 1.3359631821170281, "high_lr": 0.0007326315789473684, "low_lr": 1.465263157894737e-05, "step": 508 }, { "epoch": 1.3385930309007232, "grad_norm": 1.176019310951233, "learning_rate": 0.0007321052631578947, "loss": 1.5604, "step": 509 }, { "epoch": 1.3385930309007232, "high_lr": 0.0007321052631578947, "low_lr": 1.4642105263157896e-05, "step": 509 }, { "epoch": 1.3385930309007232, "high_lr": 0.0007321052631578947, "low_lr": 1.4642105263157896e-05, "step": 509 }, { "epoch": 1.3385930309007232, "high_lr": 0.0007321052631578947, "low_lr": 1.4642105263157896e-05, "step": 509 }, { "epoch": 1.3385930309007232, "high_lr": 0.0007321052631578947, "low_lr": 1.4642105263157896e-05, "step": 509 }, { "epoch": 1.3385930309007232, "high_lr": 0.0007321052631578947, "low_lr": 1.4642105263157896e-05, "step": 509 }, { "epoch": 1.3385930309007232, "high_lr": 0.0007321052631578947, "low_lr": 1.4642105263157896e-05, "step": 509 }, { "epoch": 1.3385930309007232, "high_lr": 0.0007321052631578947, "low_lr": 1.4642105263157896e-05, "step": 509 }, { "epoch": 1.3385930309007232, "high_lr": 0.0007321052631578947, "low_lr": 1.4642105263157896e-05, "step": 509 }, { "epoch": 1.3412228796844181, "grad_norm": 1.09358549118042, "learning_rate": 0.0007315789473684211, "loss": 1.5187, "step": 510 }, { "epoch": 1.3412228796844181, "high_lr": 0.0007315789473684211, "low_lr": 1.4631578947368424e-05, "step": 510 }, { "epoch": 1.3412228796844181, "high_lr": 0.0007315789473684211, "low_lr": 1.4631578947368424e-05, "step": 510 }, { "epoch": 1.3412228796844181, "high_lr": 0.0007315789473684211, "low_lr": 1.4631578947368424e-05, "step": 510 }, { "epoch": 1.3412228796844181, "high_lr": 0.0007315789473684211, "low_lr": 1.4631578947368424e-05, "step": 510 }, { "epoch": 1.3412228796844181, "high_lr": 0.0007315789473684211, "low_lr": 1.4631578947368424e-05, "step": 510 }, { "epoch": 1.3412228796844181, "high_lr": 0.0007315789473684211, "low_lr": 1.4631578947368424e-05, "step": 510 }, { "epoch": 1.3412228796844181, "high_lr": 0.0007315789473684211, "low_lr": 1.4631578947368424e-05, "step": 510 }, { "epoch": 1.3412228796844181, "high_lr": 0.0007315789473684211, "low_lr": 1.4631578947368424e-05, "step": 510 }, { "epoch": 1.343852728468113, "grad_norm": 1.1042243242263794, "learning_rate": 0.0007310526315789474, "loss": 1.5189, "step": 511 }, { "epoch": 1.343852728468113, "high_lr": 0.0007310526315789474, "low_lr": 1.462105263157895e-05, "step": 511 }, { "epoch": 1.343852728468113, "high_lr": 0.0007310526315789474, "low_lr": 1.462105263157895e-05, "step": 511 }, { "epoch": 1.343852728468113, "high_lr": 0.0007310526315789474, "low_lr": 1.462105263157895e-05, "step": 511 }, { "epoch": 1.343852728468113, "high_lr": 0.0007310526315789474, "low_lr": 1.462105263157895e-05, "step": 511 }, { "epoch": 1.343852728468113, "high_lr": 0.0007310526315789474, "low_lr": 1.462105263157895e-05, "step": 511 }, { "epoch": 1.343852728468113, "high_lr": 0.0007310526315789474, "low_lr": 1.462105263157895e-05, "step": 511 }, { "epoch": 1.343852728468113, "high_lr": 0.0007310526315789474, "low_lr": 1.462105263157895e-05, "step": 511 }, { "epoch": 1.343852728468113, "high_lr": 0.0007310526315789474, "low_lr": 1.462105263157895e-05, "step": 511 }, { "epoch": 1.346482577251808, "grad_norm": 1.0067287683486938, "learning_rate": 0.0007305263157894737, "loss": 1.562, "step": 512 }, { "epoch": 1.346482577251808, "high_lr": 0.0007305263157894737, "low_lr": 1.4610526315789474e-05, "step": 512 }, { "epoch": 1.346482577251808, "high_lr": 0.0007305263157894737, "low_lr": 1.4610526315789474e-05, "step": 512 }, { "epoch": 1.346482577251808, "high_lr": 0.0007305263157894737, "low_lr": 1.4610526315789474e-05, "step": 512 }, { "epoch": 1.346482577251808, "high_lr": 0.0007305263157894737, "low_lr": 1.4610526315789474e-05, "step": 512 }, { "epoch": 1.346482577251808, "high_lr": 0.0007305263157894737, "low_lr": 1.4610526315789474e-05, "step": 512 }, { "epoch": 1.346482577251808, "high_lr": 0.0007305263157894737, "low_lr": 1.4610526315789474e-05, "step": 512 }, { "epoch": 1.346482577251808, "high_lr": 0.0007305263157894737, "low_lr": 1.4610526315789474e-05, "step": 512 }, { "epoch": 1.346482577251808, "high_lr": 0.0007305263157894737, "low_lr": 1.4610526315789474e-05, "step": 512 }, { "epoch": 1.349112426035503, "grad_norm": 1.0002638101577759, "learning_rate": 0.00073, "loss": 1.4714, "step": 513 }, { "epoch": 1.349112426035503, "high_lr": 0.00073, "low_lr": 1.46e-05, "step": 513 }, { "epoch": 1.349112426035503, "high_lr": 0.00073, "low_lr": 1.46e-05, "step": 513 }, { "epoch": 1.349112426035503, "high_lr": 0.00073, "low_lr": 1.46e-05, "step": 513 }, { "epoch": 1.349112426035503, "high_lr": 0.00073, "low_lr": 1.46e-05, "step": 513 }, { "epoch": 1.349112426035503, "high_lr": 0.00073, "low_lr": 1.46e-05, "step": 513 }, { "epoch": 1.349112426035503, "high_lr": 0.00073, "low_lr": 1.46e-05, "step": 513 }, { "epoch": 1.349112426035503, "high_lr": 0.00073, "low_lr": 1.46e-05, "step": 513 }, { "epoch": 1.349112426035503, "high_lr": 0.00073, "low_lr": 1.46e-05, "step": 513 }, { "epoch": 1.3517422748191978, "grad_norm": 1.0247275829315186, "learning_rate": 0.0007294736842105262, "loss": 1.5009, "step": 514 }, { "epoch": 1.3517422748191978, "high_lr": 0.0007294736842105262, "low_lr": 1.4589473684210527e-05, "step": 514 }, { "epoch": 1.3517422748191978, "high_lr": 0.0007294736842105262, "low_lr": 1.4589473684210527e-05, "step": 514 }, { "epoch": 1.3517422748191978, "high_lr": 0.0007294736842105262, "low_lr": 1.4589473684210527e-05, "step": 514 }, { "epoch": 1.3517422748191978, "high_lr": 0.0007294736842105262, "low_lr": 1.4589473684210527e-05, "step": 514 }, { "epoch": 1.3517422748191978, "high_lr": 0.0007294736842105262, "low_lr": 1.4589473684210527e-05, "step": 514 }, { "epoch": 1.3517422748191978, "high_lr": 0.0007294736842105262, "low_lr": 1.4589473684210527e-05, "step": 514 }, { "epoch": 1.3517422748191978, "high_lr": 0.0007294736842105262, "low_lr": 1.4589473684210527e-05, "step": 514 }, { "epoch": 1.3517422748191978, "high_lr": 0.0007294736842105262, "low_lr": 1.4589473684210527e-05, "step": 514 }, { "epoch": 1.354372123602893, "grad_norm": 1.0016647577285767, "learning_rate": 0.0007289473684210526, "loss": 1.4438, "step": 515 }, { "epoch": 1.354372123602893, "high_lr": 0.0007289473684210526, "low_lr": 1.4578947368421055e-05, "step": 515 }, { "epoch": 1.354372123602893, "high_lr": 0.0007289473684210526, "low_lr": 1.4578947368421055e-05, "step": 515 }, { "epoch": 1.354372123602893, "high_lr": 0.0007289473684210526, "low_lr": 1.4578947368421055e-05, "step": 515 }, { "epoch": 1.354372123602893, "high_lr": 0.0007289473684210526, "low_lr": 1.4578947368421055e-05, "step": 515 }, { "epoch": 1.354372123602893, "high_lr": 0.0007289473684210526, "low_lr": 1.4578947368421055e-05, "step": 515 }, { "epoch": 1.354372123602893, "high_lr": 0.0007289473684210526, "low_lr": 1.4578947368421055e-05, "step": 515 }, { "epoch": 1.354372123602893, "high_lr": 0.0007289473684210526, "low_lr": 1.4578947368421055e-05, "step": 515 }, { "epoch": 1.354372123602893, "high_lr": 0.0007289473684210526, "low_lr": 1.4578947368421055e-05, "step": 515 }, { "epoch": 1.3570019723865878, "grad_norm": 1.0843017101287842, "learning_rate": 0.000728421052631579, "loss": 1.5361, "step": 516 }, { "epoch": 1.3570019723865878, "high_lr": 0.000728421052631579, "low_lr": 1.456842105263158e-05, "step": 516 }, { "epoch": 1.3570019723865878, "high_lr": 0.000728421052631579, "low_lr": 1.456842105263158e-05, "step": 516 }, { "epoch": 1.3570019723865878, "high_lr": 0.000728421052631579, "low_lr": 1.456842105263158e-05, "step": 516 }, { "epoch": 1.3570019723865878, "high_lr": 0.000728421052631579, "low_lr": 1.456842105263158e-05, "step": 516 }, { "epoch": 1.3570019723865878, "high_lr": 0.000728421052631579, "low_lr": 1.456842105263158e-05, "step": 516 }, { "epoch": 1.3570019723865878, "high_lr": 0.000728421052631579, "low_lr": 1.456842105263158e-05, "step": 516 }, { "epoch": 1.3570019723865878, "high_lr": 0.000728421052631579, "low_lr": 1.456842105263158e-05, "step": 516 }, { "epoch": 1.3570019723865878, "high_lr": 0.000728421052631579, "low_lr": 1.456842105263158e-05, "step": 516 }, { "epoch": 1.3596318211702827, "grad_norm": 1.0744608640670776, "learning_rate": 0.0007278947368421053, "loss": 1.4725, "step": 517 }, { "epoch": 1.3596318211702827, "high_lr": 0.0007278947368421053, "low_lr": 1.4557894736842106e-05, "step": 517 }, { "epoch": 1.3596318211702827, "high_lr": 0.0007278947368421053, "low_lr": 1.4557894736842106e-05, "step": 517 }, { "epoch": 1.3596318211702827, "high_lr": 0.0007278947368421053, "low_lr": 1.4557894736842106e-05, "step": 517 }, { "epoch": 1.3596318211702827, "high_lr": 0.0007278947368421053, "low_lr": 1.4557894736842106e-05, "step": 517 }, { "epoch": 1.3596318211702827, "high_lr": 0.0007278947368421053, "low_lr": 1.4557894736842106e-05, "step": 517 }, { "epoch": 1.3596318211702827, "high_lr": 0.0007278947368421053, "low_lr": 1.4557894736842106e-05, "step": 517 }, { "epoch": 1.3596318211702827, "high_lr": 0.0007278947368421053, "low_lr": 1.4557894736842106e-05, "step": 517 }, { "epoch": 1.3596318211702827, "high_lr": 0.0007278947368421053, "low_lr": 1.4557894736842106e-05, "step": 517 }, { "epoch": 1.3622616699539776, "grad_norm": 1.1196846961975098, "learning_rate": 0.0007273684210526316, "loss": 1.5027, "step": 518 }, { "epoch": 1.3622616699539776, "high_lr": 0.0007273684210526316, "low_lr": 1.4547368421052632e-05, "step": 518 }, { "epoch": 1.3622616699539776, "high_lr": 0.0007273684210526316, "low_lr": 1.4547368421052632e-05, "step": 518 }, { "epoch": 1.3622616699539776, "high_lr": 0.0007273684210526316, "low_lr": 1.4547368421052632e-05, "step": 518 }, { "epoch": 1.3622616699539776, "high_lr": 0.0007273684210526316, "low_lr": 1.4547368421052632e-05, "step": 518 }, { "epoch": 1.3622616699539776, "high_lr": 0.0007273684210526316, "low_lr": 1.4547368421052632e-05, "step": 518 }, { "epoch": 1.3622616699539776, "high_lr": 0.0007273684210526316, "low_lr": 1.4547368421052632e-05, "step": 518 }, { "epoch": 1.3622616699539776, "high_lr": 0.0007273684210526316, "low_lr": 1.4547368421052632e-05, "step": 518 }, { "epoch": 1.3622616699539776, "high_lr": 0.0007273684210526316, "low_lr": 1.4547368421052632e-05, "step": 518 }, { "epoch": 1.3648915187376724, "grad_norm": 1.1462395191192627, "learning_rate": 0.0007268421052631579, "loss": 1.5538, "step": 519 }, { "epoch": 1.3648915187376724, "high_lr": 0.0007268421052631579, "low_lr": 1.4536842105263159e-05, "step": 519 }, { "epoch": 1.3648915187376724, "high_lr": 0.0007268421052631579, "low_lr": 1.4536842105263159e-05, "step": 519 }, { "epoch": 1.3648915187376724, "high_lr": 0.0007268421052631579, "low_lr": 1.4536842105263159e-05, "step": 519 }, { "epoch": 1.3648915187376724, "high_lr": 0.0007268421052631579, "low_lr": 1.4536842105263159e-05, "step": 519 }, { "epoch": 1.3648915187376724, "high_lr": 0.0007268421052631579, "low_lr": 1.4536842105263159e-05, "step": 519 }, { "epoch": 1.3648915187376724, "high_lr": 0.0007268421052631579, "low_lr": 1.4536842105263159e-05, "step": 519 }, { "epoch": 1.3648915187376724, "high_lr": 0.0007268421052631579, "low_lr": 1.4536842105263159e-05, "step": 519 }, { "epoch": 1.3648915187376724, "high_lr": 0.0007268421052631579, "low_lr": 1.4536842105263159e-05, "step": 519 }, { "epoch": 1.3675213675213675, "grad_norm": 1.1063448190689087, "learning_rate": 0.0007263157894736843, "loss": 1.5369, "step": 520 }, { "epoch": 1.3675213675213675, "high_lr": 0.0007263157894736843, "low_lr": 1.4526315789473687e-05, "step": 520 }, { "epoch": 1.3675213675213675, "high_lr": 0.0007263157894736843, "low_lr": 1.4526315789473687e-05, "step": 520 }, { "epoch": 1.3675213675213675, "high_lr": 0.0007263157894736843, "low_lr": 1.4526315789473687e-05, "step": 520 }, { "epoch": 1.3675213675213675, "high_lr": 0.0007263157894736843, "low_lr": 1.4526315789473687e-05, "step": 520 }, { "epoch": 1.3675213675213675, "high_lr": 0.0007263157894736843, "low_lr": 1.4526315789473687e-05, "step": 520 }, { "epoch": 1.3675213675213675, "high_lr": 0.0007263157894736843, "low_lr": 1.4526315789473687e-05, "step": 520 }, { "epoch": 1.3675213675213675, "high_lr": 0.0007263157894736843, "low_lr": 1.4526315789473687e-05, "step": 520 }, { "epoch": 1.3675213675213675, "high_lr": 0.0007263157894736843, "low_lr": 1.4526315789473687e-05, "step": 520 }, { "epoch": 1.3701512163050624, "grad_norm": 1.0946933031082153, "learning_rate": 0.0007257894736842106, "loss": 1.5006, "step": 521 }, { "epoch": 1.3701512163050624, "high_lr": 0.0007257894736842106, "low_lr": 1.4515789473684211e-05, "step": 521 }, { "epoch": 1.3701512163050624, "high_lr": 0.0007257894736842106, "low_lr": 1.4515789473684211e-05, "step": 521 }, { "epoch": 1.3701512163050624, "high_lr": 0.0007257894736842106, "low_lr": 1.4515789473684211e-05, "step": 521 }, { "epoch": 1.3701512163050624, "high_lr": 0.0007257894736842106, "low_lr": 1.4515789473684211e-05, "step": 521 }, { "epoch": 1.3701512163050624, "high_lr": 0.0007257894736842106, "low_lr": 1.4515789473684211e-05, "step": 521 }, { "epoch": 1.3701512163050624, "high_lr": 0.0007257894736842106, "low_lr": 1.4515789473684211e-05, "step": 521 }, { "epoch": 1.3701512163050624, "high_lr": 0.0007257894736842106, "low_lr": 1.4515789473684211e-05, "step": 521 }, { "epoch": 1.3701512163050624, "high_lr": 0.0007257894736842106, "low_lr": 1.4515789473684211e-05, "step": 521 }, { "epoch": 1.3727810650887573, "grad_norm": 1.0498840808868408, "learning_rate": 0.0007252631578947369, "loss": 1.4724, "step": 522 }, { "epoch": 1.3727810650887573, "high_lr": 0.0007252631578947369, "low_lr": 1.4505263157894738e-05, "step": 522 }, { "epoch": 1.3727810650887573, "high_lr": 0.0007252631578947369, "low_lr": 1.4505263157894738e-05, "step": 522 }, { "epoch": 1.3727810650887573, "high_lr": 0.0007252631578947369, "low_lr": 1.4505263157894738e-05, "step": 522 }, { "epoch": 1.3727810650887573, "high_lr": 0.0007252631578947369, "low_lr": 1.4505263157894738e-05, "step": 522 }, { "epoch": 1.3727810650887573, "high_lr": 0.0007252631578947369, "low_lr": 1.4505263157894738e-05, "step": 522 }, { "epoch": 1.3727810650887573, "high_lr": 0.0007252631578947369, "low_lr": 1.4505263157894738e-05, "step": 522 }, { "epoch": 1.3727810650887573, "high_lr": 0.0007252631578947369, "low_lr": 1.4505263157894738e-05, "step": 522 }, { "epoch": 1.3727810650887573, "high_lr": 0.0007252631578947369, "low_lr": 1.4505263157894738e-05, "step": 522 }, { "epoch": 1.3754109138724524, "grad_norm": 1.0840269327163696, "learning_rate": 0.0007247368421052631, "loss": 1.4561, "step": 523 }, { "epoch": 1.3754109138724524, "high_lr": 0.0007247368421052631, "low_lr": 1.4494736842105264e-05, "step": 523 }, { "epoch": 1.3754109138724524, "high_lr": 0.0007247368421052631, "low_lr": 1.4494736842105264e-05, "step": 523 }, { "epoch": 1.3754109138724524, "high_lr": 0.0007247368421052631, "low_lr": 1.4494736842105264e-05, "step": 523 }, { "epoch": 1.3754109138724524, "high_lr": 0.0007247368421052631, "low_lr": 1.4494736842105264e-05, "step": 523 }, { "epoch": 1.3754109138724524, "high_lr": 0.0007247368421052631, "low_lr": 1.4494736842105264e-05, "step": 523 }, { "epoch": 1.3754109138724524, "high_lr": 0.0007247368421052631, "low_lr": 1.4494736842105264e-05, "step": 523 }, { "epoch": 1.3754109138724524, "high_lr": 0.0007247368421052631, "low_lr": 1.4494736842105264e-05, "step": 523 }, { "epoch": 1.3754109138724524, "high_lr": 0.0007247368421052631, "low_lr": 1.4494736842105264e-05, "step": 523 }, { "epoch": 1.3780407626561473, "grad_norm": 1.0193777084350586, "learning_rate": 0.0007242105263157895, "loss": 1.4748, "step": 524 }, { "epoch": 1.3780407626561473, "high_lr": 0.0007242105263157895, "low_lr": 1.4484210526315792e-05, "step": 524 }, { "epoch": 1.3780407626561473, "high_lr": 0.0007242105263157895, "low_lr": 1.4484210526315792e-05, "step": 524 }, { "epoch": 1.3780407626561473, "high_lr": 0.0007242105263157895, "low_lr": 1.4484210526315792e-05, "step": 524 }, { "epoch": 1.3780407626561473, "high_lr": 0.0007242105263157895, "low_lr": 1.4484210526315792e-05, "step": 524 }, { "epoch": 1.3780407626561473, "high_lr": 0.0007242105263157895, "low_lr": 1.4484210526315792e-05, "step": 524 }, { "epoch": 1.3780407626561473, "high_lr": 0.0007242105263157895, "low_lr": 1.4484210526315792e-05, "step": 524 }, { "epoch": 1.3780407626561473, "high_lr": 0.0007242105263157895, "low_lr": 1.4484210526315792e-05, "step": 524 }, { "epoch": 1.3780407626561473, "high_lr": 0.0007242105263157895, "low_lr": 1.4484210526315792e-05, "step": 524 }, { "epoch": 1.3806706114398422, "grad_norm": 1.0453554391860962, "learning_rate": 0.0007236842105263158, "loss": 1.4933, "step": 525 }, { "epoch": 1.3806706114398422, "high_lr": 0.0007236842105263158, "low_lr": 1.4473684210526317e-05, "step": 525 }, { "epoch": 1.3806706114398422, "high_lr": 0.0007236842105263158, "low_lr": 1.4473684210526317e-05, "step": 525 }, { "epoch": 1.3806706114398422, "high_lr": 0.0007236842105263158, "low_lr": 1.4473684210526317e-05, "step": 525 }, { "epoch": 1.3806706114398422, "high_lr": 0.0007236842105263158, "low_lr": 1.4473684210526317e-05, "step": 525 }, { "epoch": 1.3806706114398422, "high_lr": 0.0007236842105263158, "low_lr": 1.4473684210526317e-05, "step": 525 }, { "epoch": 1.3806706114398422, "high_lr": 0.0007236842105263158, "low_lr": 1.4473684210526317e-05, "step": 525 }, { "epoch": 1.3806706114398422, "high_lr": 0.0007236842105263158, "low_lr": 1.4473684210526317e-05, "step": 525 }, { "epoch": 1.3806706114398422, "high_lr": 0.0007236842105263158, "low_lr": 1.4473684210526317e-05, "step": 525 }, { "epoch": 1.3833004602235373, "grad_norm": 1.0979812145233154, "learning_rate": 0.0007231578947368421, "loss": 1.49, "step": 526 }, { "epoch": 1.3833004602235373, "high_lr": 0.0007231578947368421, "low_lr": 1.4463157894736843e-05, "step": 526 }, { "epoch": 1.3833004602235373, "high_lr": 0.0007231578947368421, "low_lr": 1.4463157894736843e-05, "step": 526 }, { "epoch": 1.3833004602235373, "high_lr": 0.0007231578947368421, "low_lr": 1.4463157894736843e-05, "step": 526 }, { "epoch": 1.3833004602235373, "high_lr": 0.0007231578947368421, "low_lr": 1.4463157894736843e-05, "step": 526 }, { "epoch": 1.3833004602235373, "high_lr": 0.0007231578947368421, "low_lr": 1.4463157894736843e-05, "step": 526 }, { "epoch": 1.3833004602235373, "high_lr": 0.0007231578947368421, "low_lr": 1.4463157894736843e-05, "step": 526 }, { "epoch": 1.3833004602235373, "high_lr": 0.0007231578947368421, "low_lr": 1.4463157894736843e-05, "step": 526 }, { "epoch": 1.3833004602235373, "high_lr": 0.0007231578947368421, "low_lr": 1.4463157894736843e-05, "step": 526 }, { "epoch": 1.3859303090072321, "grad_norm": 1.0743083953857422, "learning_rate": 0.0007226315789473684, "loss": 1.5036, "step": 527 }, { "epoch": 1.3859303090072321, "high_lr": 0.0007226315789473684, "low_lr": 1.445263157894737e-05, "step": 527 }, { "epoch": 1.3859303090072321, "high_lr": 0.0007226315789473684, "low_lr": 1.445263157894737e-05, "step": 527 }, { "epoch": 1.3859303090072321, "high_lr": 0.0007226315789473684, "low_lr": 1.445263157894737e-05, "step": 527 }, { "epoch": 1.3859303090072321, "high_lr": 0.0007226315789473684, "low_lr": 1.445263157894737e-05, "step": 527 }, { "epoch": 1.3859303090072321, "high_lr": 0.0007226315789473684, "low_lr": 1.445263157894737e-05, "step": 527 }, { "epoch": 1.3859303090072321, "high_lr": 0.0007226315789473684, "low_lr": 1.445263157894737e-05, "step": 527 }, { "epoch": 1.3859303090072321, "high_lr": 0.0007226315789473684, "low_lr": 1.445263157894737e-05, "step": 527 }, { "epoch": 1.3859303090072321, "high_lr": 0.0007226315789473684, "low_lr": 1.445263157894737e-05, "step": 527 }, { "epoch": 1.388560157790927, "grad_norm": 1.0622069835662842, "learning_rate": 0.0007221052631578947, "loss": 1.5221, "step": 528 }, { "epoch": 1.388560157790927, "high_lr": 0.0007221052631578947, "low_lr": 1.4442105263157896e-05, "step": 528 }, { "epoch": 1.388560157790927, "high_lr": 0.0007221052631578947, "low_lr": 1.4442105263157896e-05, "step": 528 }, { "epoch": 1.388560157790927, "high_lr": 0.0007221052631578947, "low_lr": 1.4442105263157896e-05, "step": 528 }, { "epoch": 1.388560157790927, "high_lr": 0.0007221052631578947, "low_lr": 1.4442105263157896e-05, "step": 528 }, { "epoch": 1.388560157790927, "high_lr": 0.0007221052631578947, "low_lr": 1.4442105263157896e-05, "step": 528 }, { "epoch": 1.388560157790927, "high_lr": 0.0007221052631578947, "low_lr": 1.4442105263157896e-05, "step": 528 }, { "epoch": 1.388560157790927, "high_lr": 0.0007221052631578947, "low_lr": 1.4442105263157896e-05, "step": 528 }, { "epoch": 1.388560157790927, "high_lr": 0.0007221052631578947, "low_lr": 1.4442105263157896e-05, "step": 528 }, { "epoch": 1.3911900065746219, "grad_norm": 1.0960756540298462, "learning_rate": 0.0007215789473684211, "loss": 1.5349, "step": 529 }, { "epoch": 1.3911900065746219, "high_lr": 0.0007215789473684211, "low_lr": 1.4431578947368424e-05, "step": 529 }, { "epoch": 1.3911900065746219, "high_lr": 0.0007215789473684211, "low_lr": 1.4431578947368424e-05, "step": 529 }, { "epoch": 1.3911900065746219, "high_lr": 0.0007215789473684211, "low_lr": 1.4431578947368424e-05, "step": 529 }, { "epoch": 1.3911900065746219, "high_lr": 0.0007215789473684211, "low_lr": 1.4431578947368424e-05, "step": 529 }, { "epoch": 1.3911900065746219, "high_lr": 0.0007215789473684211, "low_lr": 1.4431578947368424e-05, "step": 529 }, { "epoch": 1.3911900065746219, "high_lr": 0.0007215789473684211, "low_lr": 1.4431578947368424e-05, "step": 529 }, { "epoch": 1.3911900065746219, "high_lr": 0.0007215789473684211, "low_lr": 1.4431578947368424e-05, "step": 529 }, { "epoch": 1.3911900065746219, "high_lr": 0.0007215789473684211, "low_lr": 1.4431578947368424e-05, "step": 529 }, { "epoch": 1.3938198553583168, "grad_norm": 1.1283634901046753, "learning_rate": 0.0007210526315789474, "loss": 1.5013, "step": 530 }, { "epoch": 1.3938198553583168, "high_lr": 0.0007210526315789474, "low_lr": 1.4421052631578948e-05, "step": 530 }, { "epoch": 1.3938198553583168, "high_lr": 0.0007210526315789474, "low_lr": 1.4421052631578948e-05, "step": 530 }, { "epoch": 1.3938198553583168, "high_lr": 0.0007210526315789474, "low_lr": 1.4421052631578948e-05, "step": 530 }, { "epoch": 1.3938198553583168, "high_lr": 0.0007210526315789474, "low_lr": 1.4421052631578948e-05, "step": 530 }, { "epoch": 1.3938198553583168, "high_lr": 0.0007210526315789474, "low_lr": 1.4421052631578948e-05, "step": 530 }, { "epoch": 1.3938198553583168, "high_lr": 0.0007210526315789474, "low_lr": 1.4421052631578948e-05, "step": 530 }, { "epoch": 1.3938198553583168, "high_lr": 0.0007210526315789474, "low_lr": 1.4421052631578948e-05, "step": 530 }, { "epoch": 1.3938198553583168, "high_lr": 0.0007210526315789474, "low_lr": 1.4421052631578948e-05, "step": 530 }, { "epoch": 1.3964497041420119, "grad_norm": 1.0277425050735474, "learning_rate": 0.0007205263157894737, "loss": 1.5036, "step": 531 }, { "epoch": 1.3964497041420119, "high_lr": 0.0007205263157894737, "low_lr": 1.4410526315789475e-05, "step": 531 }, { "epoch": 1.3964497041420119, "high_lr": 0.0007205263157894737, "low_lr": 1.4410526315789475e-05, "step": 531 }, { "epoch": 1.3964497041420119, "high_lr": 0.0007205263157894737, "low_lr": 1.4410526315789475e-05, "step": 531 }, { "epoch": 1.3964497041420119, "high_lr": 0.0007205263157894737, "low_lr": 1.4410526315789475e-05, "step": 531 }, { "epoch": 1.3964497041420119, "high_lr": 0.0007205263157894737, "low_lr": 1.4410526315789475e-05, "step": 531 }, { "epoch": 1.3964497041420119, "high_lr": 0.0007205263157894737, "low_lr": 1.4410526315789475e-05, "step": 531 }, { "epoch": 1.3964497041420119, "high_lr": 0.0007205263157894737, "low_lr": 1.4410526315789475e-05, "step": 531 }, { "epoch": 1.3964497041420119, "high_lr": 0.0007205263157894737, "low_lr": 1.4410526315789475e-05, "step": 531 }, { "epoch": 1.3990795529257067, "grad_norm": 1.084513545036316, "learning_rate": 0.0007199999999999999, "loss": 1.4435, "step": 532 }, { "epoch": 1.3990795529257067, "high_lr": 0.0007199999999999999, "low_lr": 1.4400000000000001e-05, "step": 532 }, { "epoch": 1.3990795529257067, "high_lr": 0.0007199999999999999, "low_lr": 1.4400000000000001e-05, "step": 532 }, { "epoch": 1.3990795529257067, "high_lr": 0.0007199999999999999, "low_lr": 1.4400000000000001e-05, "step": 532 }, { "epoch": 1.3990795529257067, "high_lr": 0.0007199999999999999, "low_lr": 1.4400000000000001e-05, "step": 532 }, { "epoch": 1.3990795529257067, "high_lr": 0.0007199999999999999, "low_lr": 1.4400000000000001e-05, "step": 532 }, { "epoch": 1.3990795529257067, "high_lr": 0.0007199999999999999, "low_lr": 1.4400000000000001e-05, "step": 532 }, { "epoch": 1.3990795529257067, "high_lr": 0.0007199999999999999, "low_lr": 1.4400000000000001e-05, "step": 532 }, { "epoch": 1.3990795529257067, "high_lr": 0.0007199999999999999, "low_lr": 1.4400000000000001e-05, "step": 532 }, { "epoch": 1.4017094017094016, "grad_norm": 1.1466425657272339, "learning_rate": 0.0007194736842105263, "loss": 1.5007, "step": 533 }, { "epoch": 1.4017094017094016, "high_lr": 0.0007194736842105263, "low_lr": 1.4389473684210526e-05, "step": 533 }, { "epoch": 1.4017094017094016, "high_lr": 0.0007194736842105263, "low_lr": 1.4389473684210526e-05, "step": 533 }, { "epoch": 1.4017094017094016, "high_lr": 0.0007194736842105263, "low_lr": 1.4389473684210526e-05, "step": 533 }, { "epoch": 1.4017094017094016, "high_lr": 0.0007194736842105263, "low_lr": 1.4389473684210526e-05, "step": 533 }, { "epoch": 1.4017094017094016, "high_lr": 0.0007194736842105263, "low_lr": 1.4389473684210526e-05, "step": 533 }, { "epoch": 1.4017094017094016, "high_lr": 0.0007194736842105263, "low_lr": 1.4389473684210526e-05, "step": 533 }, { "epoch": 1.4017094017094016, "high_lr": 0.0007194736842105263, "low_lr": 1.4389473684210526e-05, "step": 533 }, { "epoch": 1.4017094017094016, "high_lr": 0.0007194736842105263, "low_lr": 1.4389473684210526e-05, "step": 533 }, { "epoch": 1.4043392504930967, "grad_norm": 1.099838376045227, "learning_rate": 0.0007189473684210527, "loss": 1.5459, "step": 534 }, { "epoch": 1.4043392504930967, "high_lr": 0.0007189473684210527, "low_lr": 1.4378947368421054e-05, "step": 534 }, { "epoch": 1.4043392504930967, "high_lr": 0.0007189473684210527, "low_lr": 1.4378947368421054e-05, "step": 534 }, { "epoch": 1.4043392504930967, "high_lr": 0.0007189473684210527, "low_lr": 1.4378947368421054e-05, "step": 534 }, { "epoch": 1.4043392504930967, "high_lr": 0.0007189473684210527, "low_lr": 1.4378947368421054e-05, "step": 534 }, { "epoch": 1.4043392504930967, "high_lr": 0.0007189473684210527, "low_lr": 1.4378947368421054e-05, "step": 534 }, { "epoch": 1.4043392504930967, "high_lr": 0.0007189473684210527, "low_lr": 1.4378947368421054e-05, "step": 534 }, { "epoch": 1.4043392504930967, "high_lr": 0.0007189473684210527, "low_lr": 1.4378947368421054e-05, "step": 534 }, { "epoch": 1.4043392504930967, "high_lr": 0.0007189473684210527, "low_lr": 1.4378947368421054e-05, "step": 534 }, { "epoch": 1.4069690992767916, "grad_norm": 1.1151689291000366, "learning_rate": 0.000718421052631579, "loss": 1.4911, "step": 535 }, { "epoch": 1.4069690992767916, "high_lr": 0.000718421052631579, "low_lr": 1.436842105263158e-05, "step": 535 }, { "epoch": 1.4069690992767916, "high_lr": 0.000718421052631579, "low_lr": 1.436842105263158e-05, "step": 535 }, { "epoch": 1.4069690992767916, "high_lr": 0.000718421052631579, "low_lr": 1.436842105263158e-05, "step": 535 }, { "epoch": 1.4069690992767916, "high_lr": 0.000718421052631579, "low_lr": 1.436842105263158e-05, "step": 535 }, { "epoch": 1.4069690992767916, "high_lr": 0.000718421052631579, "low_lr": 1.436842105263158e-05, "step": 535 }, { "epoch": 1.4069690992767916, "high_lr": 0.000718421052631579, "low_lr": 1.436842105263158e-05, "step": 535 }, { "epoch": 1.4069690992767916, "high_lr": 0.000718421052631579, "low_lr": 1.436842105263158e-05, "step": 535 }, { "epoch": 1.4069690992767916, "high_lr": 0.000718421052631579, "low_lr": 1.436842105263158e-05, "step": 535 }, { "epoch": 1.4095989480604865, "grad_norm": 1.112339973449707, "learning_rate": 0.0007178947368421053, "loss": 1.4461, "step": 536 }, { "epoch": 1.4095989480604865, "high_lr": 0.0007178947368421053, "low_lr": 1.4357894736842106e-05, "step": 536 }, { "epoch": 1.4095989480604865, "high_lr": 0.0007178947368421053, "low_lr": 1.4357894736842106e-05, "step": 536 }, { "epoch": 1.4095989480604865, "high_lr": 0.0007178947368421053, "low_lr": 1.4357894736842106e-05, "step": 536 }, { "epoch": 1.4095989480604865, "high_lr": 0.0007178947368421053, "low_lr": 1.4357894736842106e-05, "step": 536 }, { "epoch": 1.4095989480604865, "high_lr": 0.0007178947368421053, "low_lr": 1.4357894736842106e-05, "step": 536 }, { "epoch": 1.4095989480604865, "high_lr": 0.0007178947368421053, "low_lr": 1.4357894736842106e-05, "step": 536 }, { "epoch": 1.4095989480604865, "high_lr": 0.0007178947368421053, "low_lr": 1.4357894736842106e-05, "step": 536 }, { "epoch": 1.4095989480604865, "high_lr": 0.0007178947368421053, "low_lr": 1.4357894736842106e-05, "step": 536 }, { "epoch": 1.4122287968441816, "grad_norm": 1.1630061864852905, "learning_rate": 0.0007173684210526316, "loss": 1.4448, "step": 537 }, { "epoch": 1.4122287968441816, "high_lr": 0.0007173684210526316, "low_lr": 1.4347368421052633e-05, "step": 537 }, { "epoch": 1.4122287968441816, "high_lr": 0.0007173684210526316, "low_lr": 1.4347368421052633e-05, "step": 537 }, { "epoch": 1.4122287968441816, "high_lr": 0.0007173684210526316, "low_lr": 1.4347368421052633e-05, "step": 537 }, { "epoch": 1.4122287968441816, "high_lr": 0.0007173684210526316, "low_lr": 1.4347368421052633e-05, "step": 537 }, { "epoch": 1.4122287968441816, "high_lr": 0.0007173684210526316, "low_lr": 1.4347368421052633e-05, "step": 537 }, { "epoch": 1.4122287968441816, "high_lr": 0.0007173684210526316, "low_lr": 1.4347368421052633e-05, "step": 537 }, { "epoch": 1.4122287968441816, "high_lr": 0.0007173684210526316, "low_lr": 1.4347368421052633e-05, "step": 537 }, { "epoch": 1.4122287968441816, "high_lr": 0.0007173684210526316, "low_lr": 1.4347368421052633e-05, "step": 537 }, { "epoch": 1.4148586456278764, "grad_norm": 1.1844900846481323, "learning_rate": 0.000716842105263158, "loss": 1.4975, "step": 538 }, { "epoch": 1.4148586456278764, "high_lr": 0.000716842105263158, "low_lr": 1.433684210526316e-05, "step": 538 }, { "epoch": 1.4148586456278764, "high_lr": 0.000716842105263158, "low_lr": 1.433684210526316e-05, "step": 538 }, { "epoch": 1.4148586456278764, "high_lr": 0.000716842105263158, "low_lr": 1.433684210526316e-05, "step": 538 }, { "epoch": 1.4148586456278764, "high_lr": 0.000716842105263158, "low_lr": 1.433684210526316e-05, "step": 538 }, { "epoch": 1.4148586456278764, "high_lr": 0.000716842105263158, "low_lr": 1.433684210526316e-05, "step": 538 }, { "epoch": 1.4148586456278764, "high_lr": 0.000716842105263158, "low_lr": 1.433684210526316e-05, "step": 538 }, { "epoch": 1.4148586456278764, "high_lr": 0.000716842105263158, "low_lr": 1.433684210526316e-05, "step": 538 }, { "epoch": 1.4148586456278764, "high_lr": 0.000716842105263158, "low_lr": 1.433684210526316e-05, "step": 538 }, { "epoch": 1.4174884944115713, "grad_norm": 1.059999704360962, "learning_rate": 0.0007163157894736843, "loss": 1.5175, "step": 539 }, { "epoch": 1.4174884944115713, "high_lr": 0.0007163157894736843, "low_lr": 1.4326315789473685e-05, "step": 539 }, { "epoch": 1.4174884944115713, "high_lr": 0.0007163157894736843, "low_lr": 1.4326315789473685e-05, "step": 539 }, { "epoch": 1.4174884944115713, "high_lr": 0.0007163157894736843, "low_lr": 1.4326315789473685e-05, "step": 539 }, { "epoch": 1.4174884944115713, "high_lr": 0.0007163157894736843, "low_lr": 1.4326315789473685e-05, "step": 539 }, { "epoch": 1.4174884944115713, "high_lr": 0.0007163157894736843, "low_lr": 1.4326315789473685e-05, "step": 539 }, { "epoch": 1.4174884944115713, "high_lr": 0.0007163157894736843, "low_lr": 1.4326315789473685e-05, "step": 539 }, { "epoch": 1.4174884944115713, "high_lr": 0.0007163157894736843, "low_lr": 1.4326315789473685e-05, "step": 539 }, { "epoch": 1.4174884944115713, "high_lr": 0.0007163157894736843, "low_lr": 1.4326315789473685e-05, "step": 539 }, { "epoch": 1.4201183431952662, "grad_norm": 1.0168579816818237, "learning_rate": 0.0007157894736842105, "loss": 1.4733, "step": 540 }, { "epoch": 1.4201183431952662, "high_lr": 0.0007157894736842105, "low_lr": 1.4315789473684212e-05, "step": 540 }, { "epoch": 1.4201183431952662, "high_lr": 0.0007157894736842105, "low_lr": 1.4315789473684212e-05, "step": 540 }, { "epoch": 1.4201183431952662, "high_lr": 0.0007157894736842105, "low_lr": 1.4315789473684212e-05, "step": 540 }, { "epoch": 1.4201183431952662, "high_lr": 0.0007157894736842105, "low_lr": 1.4315789473684212e-05, "step": 540 }, { "epoch": 1.4201183431952662, "high_lr": 0.0007157894736842105, "low_lr": 1.4315789473684212e-05, "step": 540 }, { "epoch": 1.4201183431952662, "high_lr": 0.0007157894736842105, "low_lr": 1.4315789473684212e-05, "step": 540 }, { "epoch": 1.4201183431952662, "high_lr": 0.0007157894736842105, "low_lr": 1.4315789473684212e-05, "step": 540 }, { "epoch": 1.4201183431952662, "high_lr": 0.0007157894736842105, "low_lr": 1.4315789473684212e-05, "step": 540 }, { "epoch": 1.422748191978961, "grad_norm": 1.0919007062911987, "learning_rate": 0.0007152631578947368, "loss": 1.4975, "step": 541 }, { "epoch": 1.422748191978961, "high_lr": 0.0007152631578947368, "low_lr": 1.4305263157894738e-05, "step": 541 }, { "epoch": 1.422748191978961, "high_lr": 0.0007152631578947368, "low_lr": 1.4305263157894738e-05, "step": 541 }, { "epoch": 1.422748191978961, "high_lr": 0.0007152631578947368, "low_lr": 1.4305263157894738e-05, "step": 541 }, { "epoch": 1.422748191978961, "high_lr": 0.0007152631578947368, "low_lr": 1.4305263157894738e-05, "step": 541 }, { "epoch": 1.422748191978961, "high_lr": 0.0007152631578947368, "low_lr": 1.4305263157894738e-05, "step": 541 }, { "epoch": 1.422748191978961, "high_lr": 0.0007152631578947368, "low_lr": 1.4305263157894738e-05, "step": 541 }, { "epoch": 1.422748191978961, "high_lr": 0.0007152631578947368, "low_lr": 1.4305263157894738e-05, "step": 541 }, { "epoch": 1.422748191978961, "high_lr": 0.0007152631578947368, "low_lr": 1.4305263157894738e-05, "step": 541 }, { "epoch": 1.4253780407626562, "grad_norm": 1.0264736413955688, "learning_rate": 0.0007147368421052631, "loss": 1.4414, "step": 542 }, { "epoch": 1.4253780407626562, "high_lr": 0.0007147368421052631, "low_lr": 1.4294736842105263e-05, "step": 542 }, { "epoch": 1.4253780407626562, "high_lr": 0.0007147368421052631, "low_lr": 1.4294736842105263e-05, "step": 542 }, { "epoch": 1.4253780407626562, "high_lr": 0.0007147368421052631, "low_lr": 1.4294736842105263e-05, "step": 542 }, { "epoch": 1.4253780407626562, "high_lr": 0.0007147368421052631, "low_lr": 1.4294736842105263e-05, "step": 542 }, { "epoch": 1.4253780407626562, "high_lr": 0.0007147368421052631, "low_lr": 1.4294736842105263e-05, "step": 542 }, { "epoch": 1.4253780407626562, "high_lr": 0.0007147368421052631, "low_lr": 1.4294736842105263e-05, "step": 542 }, { "epoch": 1.4253780407626562, "high_lr": 0.0007147368421052631, "low_lr": 1.4294736842105263e-05, "step": 542 }, { "epoch": 1.4253780407626562, "high_lr": 0.0007147368421052631, "low_lr": 1.4294736842105263e-05, "step": 542 }, { "epoch": 1.428007889546351, "grad_norm": 1.1516650915145874, "learning_rate": 0.0007142105263157895, "loss": 1.5142, "step": 543 }, { "epoch": 1.428007889546351, "high_lr": 0.0007142105263157895, "low_lr": 1.4284210526315792e-05, "step": 543 }, { "epoch": 1.428007889546351, "high_lr": 0.0007142105263157895, "low_lr": 1.4284210526315792e-05, "step": 543 }, { "epoch": 1.428007889546351, "high_lr": 0.0007142105263157895, "low_lr": 1.4284210526315792e-05, "step": 543 }, { "epoch": 1.428007889546351, "high_lr": 0.0007142105263157895, "low_lr": 1.4284210526315792e-05, "step": 543 }, { "epoch": 1.428007889546351, "high_lr": 0.0007142105263157895, "low_lr": 1.4284210526315792e-05, "step": 543 }, { "epoch": 1.428007889546351, "high_lr": 0.0007142105263157895, "low_lr": 1.4284210526315792e-05, "step": 543 }, { "epoch": 1.428007889546351, "high_lr": 0.0007142105263157895, "low_lr": 1.4284210526315792e-05, "step": 543 }, { "epoch": 1.428007889546351, "high_lr": 0.0007142105263157895, "low_lr": 1.4284210526315792e-05, "step": 543 }, { "epoch": 1.430637738330046, "grad_norm": 1.1006218194961548, "learning_rate": 0.0007136842105263158, "loss": 1.5291, "step": 544 }, { "epoch": 1.430637738330046, "high_lr": 0.0007136842105263158, "low_lr": 1.4273684210526317e-05, "step": 544 }, { "epoch": 1.430637738330046, "high_lr": 0.0007136842105263158, "low_lr": 1.4273684210526317e-05, "step": 544 }, { "epoch": 1.430637738330046, "high_lr": 0.0007136842105263158, "low_lr": 1.4273684210526317e-05, "step": 544 }, { "epoch": 1.430637738330046, "high_lr": 0.0007136842105263158, "low_lr": 1.4273684210526317e-05, "step": 544 }, { "epoch": 1.430637738330046, "high_lr": 0.0007136842105263158, "low_lr": 1.4273684210526317e-05, "step": 544 }, { "epoch": 1.430637738330046, "high_lr": 0.0007136842105263158, "low_lr": 1.4273684210526317e-05, "step": 544 }, { "epoch": 1.430637738330046, "high_lr": 0.0007136842105263158, "low_lr": 1.4273684210526317e-05, "step": 544 }, { "epoch": 1.430637738330046, "high_lr": 0.0007136842105263158, "low_lr": 1.4273684210526317e-05, "step": 544 }, { "epoch": 1.433267587113741, "grad_norm": 0.9842214584350586, "learning_rate": 0.0007131578947368421, "loss": 1.4205, "step": 545 }, { "epoch": 1.433267587113741, "high_lr": 0.0007131578947368421, "low_lr": 1.4263157894736843e-05, "step": 545 }, { "epoch": 1.433267587113741, "high_lr": 0.0007131578947368421, "low_lr": 1.4263157894736843e-05, "step": 545 }, { "epoch": 1.433267587113741, "high_lr": 0.0007131578947368421, "low_lr": 1.4263157894736843e-05, "step": 545 }, { "epoch": 1.433267587113741, "high_lr": 0.0007131578947368421, "low_lr": 1.4263157894736843e-05, "step": 545 }, { "epoch": 1.433267587113741, "high_lr": 0.0007131578947368421, "low_lr": 1.4263157894736843e-05, "step": 545 }, { "epoch": 1.433267587113741, "high_lr": 0.0007131578947368421, "low_lr": 1.4263157894736843e-05, "step": 545 }, { "epoch": 1.433267587113741, "high_lr": 0.0007131578947368421, "low_lr": 1.4263157894736843e-05, "step": 545 }, { "epoch": 1.433267587113741, "high_lr": 0.0007131578947368421, "low_lr": 1.4263157894736843e-05, "step": 545 }, { "epoch": 1.435897435897436, "grad_norm": 1.1286381483078003, "learning_rate": 0.0007126315789473684, "loss": 1.506, "step": 546 }, { "epoch": 1.435897435897436, "high_lr": 0.0007126315789473684, "low_lr": 1.425263157894737e-05, "step": 546 }, { "epoch": 1.435897435897436, "high_lr": 0.0007126315789473684, "low_lr": 1.425263157894737e-05, "step": 546 }, { "epoch": 1.435897435897436, "high_lr": 0.0007126315789473684, "low_lr": 1.425263157894737e-05, "step": 546 }, { "epoch": 1.435897435897436, "high_lr": 0.0007126315789473684, "low_lr": 1.425263157894737e-05, "step": 546 }, { "epoch": 1.435897435897436, "high_lr": 0.0007126315789473684, "low_lr": 1.425263157894737e-05, "step": 546 }, { "epoch": 1.435897435897436, "high_lr": 0.0007126315789473684, "low_lr": 1.425263157894737e-05, "step": 546 }, { "epoch": 1.435897435897436, "high_lr": 0.0007126315789473684, "low_lr": 1.425263157894737e-05, "step": 546 }, { "epoch": 1.435897435897436, "high_lr": 0.0007126315789473684, "low_lr": 1.425263157894737e-05, "step": 546 }, { "epoch": 1.4385272846811308, "grad_norm": 1.1527904272079468, "learning_rate": 0.0007121052631578947, "loss": 1.4887, "step": 547 }, { "epoch": 1.4385272846811308, "high_lr": 0.0007121052631578947, "low_lr": 1.4242105263157894e-05, "step": 547 }, { "epoch": 1.4385272846811308, "high_lr": 0.0007121052631578947, "low_lr": 1.4242105263157894e-05, "step": 547 }, { "epoch": 1.4385272846811308, "high_lr": 0.0007121052631578947, "low_lr": 1.4242105263157894e-05, "step": 547 }, { "epoch": 1.4385272846811308, "high_lr": 0.0007121052631578947, "low_lr": 1.4242105263157894e-05, "step": 547 }, { "epoch": 1.4385272846811308, "high_lr": 0.0007121052631578947, "low_lr": 1.4242105263157894e-05, "step": 547 }, { "epoch": 1.4385272846811308, "high_lr": 0.0007121052631578947, "low_lr": 1.4242105263157894e-05, "step": 547 }, { "epoch": 1.4385272846811308, "high_lr": 0.0007121052631578947, "low_lr": 1.4242105263157894e-05, "step": 547 }, { "epoch": 1.4385272846811308, "high_lr": 0.0007121052631578947, "low_lr": 1.4242105263157894e-05, "step": 547 }, { "epoch": 1.4411571334648259, "grad_norm": 1.0327802896499634, "learning_rate": 0.000711578947368421, "loss": 1.4907, "step": 548 }, { "epoch": 1.4411571334648259, "high_lr": 0.000711578947368421, "low_lr": 1.4231578947368422e-05, "step": 548 }, { "epoch": 1.4411571334648259, "high_lr": 0.000711578947368421, "low_lr": 1.4231578947368422e-05, "step": 548 }, { "epoch": 1.4411571334648259, "high_lr": 0.000711578947368421, "low_lr": 1.4231578947368422e-05, "step": 548 }, { "epoch": 1.4411571334648259, "high_lr": 0.000711578947368421, "low_lr": 1.4231578947368422e-05, "step": 548 }, { "epoch": 1.4411571334648259, "high_lr": 0.000711578947368421, "low_lr": 1.4231578947368422e-05, "step": 548 }, { "epoch": 1.4411571334648259, "high_lr": 0.000711578947368421, "low_lr": 1.4231578947368422e-05, "step": 548 }, { "epoch": 1.4411571334648259, "high_lr": 0.000711578947368421, "low_lr": 1.4231578947368422e-05, "step": 548 }, { "epoch": 1.4411571334648259, "high_lr": 0.000711578947368421, "low_lr": 1.4231578947368422e-05, "step": 548 }, { "epoch": 1.4437869822485208, "grad_norm": 0.9996568560600281, "learning_rate": 0.0007110526315789474, "loss": 1.4501, "step": 549 }, { "epoch": 1.4437869822485208, "high_lr": 0.0007110526315789474, "low_lr": 1.4221052631578949e-05, "step": 549 }, { "epoch": 1.4437869822485208, "high_lr": 0.0007110526315789474, "low_lr": 1.4221052631578949e-05, "step": 549 }, { "epoch": 1.4437869822485208, "high_lr": 0.0007110526315789474, "low_lr": 1.4221052631578949e-05, "step": 549 }, { "epoch": 1.4437869822485208, "high_lr": 0.0007110526315789474, "low_lr": 1.4221052631578949e-05, "step": 549 }, { "epoch": 1.4437869822485208, "high_lr": 0.0007110526315789474, "low_lr": 1.4221052631578949e-05, "step": 549 }, { "epoch": 1.4437869822485208, "high_lr": 0.0007110526315789474, "low_lr": 1.4221052631578949e-05, "step": 549 }, { "epoch": 1.4437869822485208, "high_lr": 0.0007110526315789474, "low_lr": 1.4221052631578949e-05, "step": 549 }, { "epoch": 1.4437869822485208, "high_lr": 0.0007110526315789474, "low_lr": 1.4221052631578949e-05, "step": 549 }, { "epoch": 1.4464168310322156, "grad_norm": 1.054540991783142, "learning_rate": 0.0007105263157894737, "loss": 1.4928, "step": 550 }, { "epoch": 1.4464168310322156, "high_lr": 0.0007105263157894737, "low_lr": 1.4210526315789475e-05, "step": 550 }, { "epoch": 1.4464168310322156, "high_lr": 0.0007105263157894737, "low_lr": 1.4210526315789475e-05, "step": 550 }, { "epoch": 1.4464168310322156, "high_lr": 0.0007105263157894737, "low_lr": 1.4210526315789475e-05, "step": 550 }, { "epoch": 1.4464168310322156, "high_lr": 0.0007105263157894737, "low_lr": 1.4210526315789475e-05, "step": 550 }, { "epoch": 1.4464168310322156, "high_lr": 0.0007105263157894737, "low_lr": 1.4210526315789475e-05, "step": 550 }, { "epoch": 1.4464168310322156, "high_lr": 0.0007105263157894737, "low_lr": 1.4210526315789475e-05, "step": 550 }, { "epoch": 1.4464168310322156, "high_lr": 0.0007105263157894737, "low_lr": 1.4210526315789475e-05, "step": 550 }, { "epoch": 1.4464168310322156, "high_lr": 0.0007105263157894737, "low_lr": 1.4210526315789475e-05, "step": 550 }, { "epoch": 1.4490466798159105, "grad_norm": 1.1426942348480225, "learning_rate": 0.00071, "loss": 1.5159, "step": 551 }, { "epoch": 1.4490466798159105, "high_lr": 0.00071, "low_lr": 1.4200000000000001e-05, "step": 551 }, { "epoch": 1.4490466798159105, "high_lr": 0.00071, "low_lr": 1.4200000000000001e-05, "step": 551 }, { "epoch": 1.4490466798159105, "high_lr": 0.00071, "low_lr": 1.4200000000000001e-05, "step": 551 }, { "epoch": 1.4490466798159105, "high_lr": 0.00071, "low_lr": 1.4200000000000001e-05, "step": 551 }, { "epoch": 1.4490466798159105, "high_lr": 0.00071, "low_lr": 1.4200000000000001e-05, "step": 551 }, { "epoch": 1.4490466798159105, "high_lr": 0.00071, "low_lr": 1.4200000000000001e-05, "step": 551 }, { "epoch": 1.4490466798159105, "high_lr": 0.00071, "low_lr": 1.4200000000000001e-05, "step": 551 }, { "epoch": 1.4490466798159105, "high_lr": 0.00071, "low_lr": 1.4200000000000001e-05, "step": 551 }, { "epoch": 1.4516765285996054, "grad_norm": 1.1202912330627441, "learning_rate": 0.0007094736842105264, "loss": 1.494, "step": 552 }, { "epoch": 1.4516765285996054, "high_lr": 0.0007094736842105264, "low_lr": 1.418947368421053e-05, "step": 552 }, { "epoch": 1.4516765285996054, "high_lr": 0.0007094736842105264, "low_lr": 1.418947368421053e-05, "step": 552 }, { "epoch": 1.4516765285996054, "high_lr": 0.0007094736842105264, "low_lr": 1.418947368421053e-05, "step": 552 }, { "epoch": 1.4516765285996054, "high_lr": 0.0007094736842105264, "low_lr": 1.418947368421053e-05, "step": 552 }, { "epoch": 1.4516765285996054, "high_lr": 0.0007094736842105264, "low_lr": 1.418947368421053e-05, "step": 552 }, { "epoch": 1.4516765285996054, "high_lr": 0.0007094736842105264, "low_lr": 1.418947368421053e-05, "step": 552 }, { "epoch": 1.4516765285996054, "high_lr": 0.0007094736842105264, "low_lr": 1.418947368421053e-05, "step": 552 }, { "epoch": 1.4516765285996054, "high_lr": 0.0007094736842105264, "low_lr": 1.418947368421053e-05, "step": 552 }, { "epoch": 1.4543063773833005, "grad_norm": 1.1229134798049927, "learning_rate": 0.0007089473684210527, "loss": 1.4982, "step": 553 }, { "epoch": 1.4543063773833005, "high_lr": 0.0007089473684210527, "low_lr": 1.4178947368421054e-05, "step": 553 }, { "epoch": 1.4543063773833005, "high_lr": 0.0007089473684210527, "low_lr": 1.4178947368421054e-05, "step": 553 }, { "epoch": 1.4543063773833005, "high_lr": 0.0007089473684210527, "low_lr": 1.4178947368421054e-05, "step": 553 }, { "epoch": 1.4543063773833005, "high_lr": 0.0007089473684210527, "low_lr": 1.4178947368421054e-05, "step": 553 }, { "epoch": 1.4543063773833005, "high_lr": 0.0007089473684210527, "low_lr": 1.4178947368421054e-05, "step": 553 }, { "epoch": 1.4543063773833005, "high_lr": 0.0007089473684210527, "low_lr": 1.4178947368421054e-05, "step": 553 }, { "epoch": 1.4543063773833005, "high_lr": 0.0007089473684210527, "low_lr": 1.4178947368421054e-05, "step": 553 }, { "epoch": 1.4543063773833005, "high_lr": 0.0007089473684210527, "low_lr": 1.4178947368421054e-05, "step": 553 }, { "epoch": 1.4569362261669954, "grad_norm": 1.000651478767395, "learning_rate": 0.000708421052631579, "loss": 1.4656, "step": 554 }, { "epoch": 1.4569362261669954, "high_lr": 0.000708421052631579, "low_lr": 1.416842105263158e-05, "step": 554 }, { "epoch": 1.4569362261669954, "high_lr": 0.000708421052631579, "low_lr": 1.416842105263158e-05, "step": 554 }, { "epoch": 1.4569362261669954, "high_lr": 0.000708421052631579, "low_lr": 1.416842105263158e-05, "step": 554 }, { "epoch": 1.4569362261669954, "high_lr": 0.000708421052631579, "low_lr": 1.416842105263158e-05, "step": 554 }, { "epoch": 1.4569362261669954, "high_lr": 0.000708421052631579, "low_lr": 1.416842105263158e-05, "step": 554 }, { "epoch": 1.4569362261669954, "high_lr": 0.000708421052631579, "low_lr": 1.416842105263158e-05, "step": 554 }, { "epoch": 1.4569362261669954, "high_lr": 0.000708421052631579, "low_lr": 1.416842105263158e-05, "step": 554 }, { "epoch": 1.4569362261669954, "high_lr": 0.000708421052631579, "low_lr": 1.416842105263158e-05, "step": 554 }, { "epoch": 1.4595660749506902, "grad_norm": 1.1136142015457153, "learning_rate": 0.0007078947368421053, "loss": 1.5472, "step": 555 }, { "epoch": 1.4595660749506902, "high_lr": 0.0007078947368421053, "low_lr": 1.4157894736842107e-05, "step": 555 }, { "epoch": 1.4595660749506902, "high_lr": 0.0007078947368421053, "low_lr": 1.4157894736842107e-05, "step": 555 }, { "epoch": 1.4595660749506902, "high_lr": 0.0007078947368421053, "low_lr": 1.4157894736842107e-05, "step": 555 }, { "epoch": 1.4595660749506902, "high_lr": 0.0007078947368421053, "low_lr": 1.4157894736842107e-05, "step": 555 }, { "epoch": 1.4595660749506902, "high_lr": 0.0007078947368421053, "low_lr": 1.4157894736842107e-05, "step": 555 }, { "epoch": 1.4595660749506902, "high_lr": 0.0007078947368421053, "low_lr": 1.4157894736842107e-05, "step": 555 }, { "epoch": 1.4595660749506902, "high_lr": 0.0007078947368421053, "low_lr": 1.4157894736842107e-05, "step": 555 }, { "epoch": 1.4595660749506902, "high_lr": 0.0007078947368421053, "low_lr": 1.4157894736842107e-05, "step": 555 }, { "epoch": 1.4621959237343853, "grad_norm": 1.0530623197555542, "learning_rate": 0.0007073684210526316, "loss": 1.4984, "step": 556 }, { "epoch": 1.4621959237343853, "high_lr": 0.0007073684210526316, "low_lr": 1.4147368421052631e-05, "step": 556 }, { "epoch": 1.4621959237343853, "high_lr": 0.0007073684210526316, "low_lr": 1.4147368421052631e-05, "step": 556 }, { "epoch": 1.4621959237343853, "high_lr": 0.0007073684210526316, "low_lr": 1.4147368421052631e-05, "step": 556 }, { "epoch": 1.4621959237343853, "high_lr": 0.0007073684210526316, "low_lr": 1.4147368421052631e-05, "step": 556 }, { "epoch": 1.4621959237343853, "high_lr": 0.0007073684210526316, "low_lr": 1.4147368421052631e-05, "step": 556 }, { "epoch": 1.4621959237343853, "high_lr": 0.0007073684210526316, "low_lr": 1.4147368421052631e-05, "step": 556 }, { "epoch": 1.4621959237343853, "high_lr": 0.0007073684210526316, "low_lr": 1.4147368421052631e-05, "step": 556 }, { "epoch": 1.4621959237343853, "high_lr": 0.0007073684210526316, "low_lr": 1.4147368421052631e-05, "step": 556 }, { "epoch": 1.4648257725180802, "grad_norm": 1.0790746212005615, "learning_rate": 0.000706842105263158, "loss": 1.492, "step": 557 }, { "epoch": 1.4648257725180802, "high_lr": 0.000706842105263158, "low_lr": 1.413684210526316e-05, "step": 557 }, { "epoch": 1.4648257725180802, "high_lr": 0.000706842105263158, "low_lr": 1.413684210526316e-05, "step": 557 }, { "epoch": 1.4648257725180802, "high_lr": 0.000706842105263158, "low_lr": 1.413684210526316e-05, "step": 557 }, { "epoch": 1.4648257725180802, "high_lr": 0.000706842105263158, "low_lr": 1.413684210526316e-05, "step": 557 }, { "epoch": 1.4648257725180802, "high_lr": 0.000706842105263158, "low_lr": 1.413684210526316e-05, "step": 557 }, { "epoch": 1.4648257725180802, "high_lr": 0.000706842105263158, "low_lr": 1.413684210526316e-05, "step": 557 }, { "epoch": 1.4648257725180802, "high_lr": 0.000706842105263158, "low_lr": 1.413684210526316e-05, "step": 557 }, { "epoch": 1.4648257725180802, "high_lr": 0.000706842105263158, "low_lr": 1.413684210526316e-05, "step": 557 }, { "epoch": 1.467455621301775, "grad_norm": 1.2384121417999268, "learning_rate": 0.0007063157894736842, "loss": 1.4896, "step": 558 }, { "epoch": 1.467455621301775, "high_lr": 0.0007063157894736842, "low_lr": 1.4126315789473686e-05, "step": 558 }, { "epoch": 1.467455621301775, "high_lr": 0.0007063157894736842, "low_lr": 1.4126315789473686e-05, "step": 558 }, { "epoch": 1.467455621301775, "high_lr": 0.0007063157894736842, "low_lr": 1.4126315789473686e-05, "step": 558 }, { "epoch": 1.467455621301775, "high_lr": 0.0007063157894736842, "low_lr": 1.4126315789473686e-05, "step": 558 }, { "epoch": 1.467455621301775, "high_lr": 0.0007063157894736842, "low_lr": 1.4126315789473686e-05, "step": 558 }, { "epoch": 1.467455621301775, "high_lr": 0.0007063157894736842, "low_lr": 1.4126315789473686e-05, "step": 558 }, { "epoch": 1.467455621301775, "high_lr": 0.0007063157894736842, "low_lr": 1.4126315789473686e-05, "step": 558 }, { "epoch": 1.467455621301775, "high_lr": 0.0007063157894736842, "low_lr": 1.4126315789473686e-05, "step": 558 }, { "epoch": 1.4700854700854702, "grad_norm": 1.0954855680465698, "learning_rate": 0.0007057894736842105, "loss": 1.4779, "step": 559 }, { "epoch": 1.4700854700854702, "high_lr": 0.0007057894736842105, "low_lr": 1.4115789473684212e-05, "step": 559 }, { "epoch": 1.4700854700854702, "high_lr": 0.0007057894736842105, "low_lr": 1.4115789473684212e-05, "step": 559 }, { "epoch": 1.4700854700854702, "high_lr": 0.0007057894736842105, "low_lr": 1.4115789473684212e-05, "step": 559 }, { "epoch": 1.4700854700854702, "high_lr": 0.0007057894736842105, "low_lr": 1.4115789473684212e-05, "step": 559 }, { "epoch": 1.4700854700854702, "high_lr": 0.0007057894736842105, "low_lr": 1.4115789473684212e-05, "step": 559 }, { "epoch": 1.4700854700854702, "high_lr": 0.0007057894736842105, "low_lr": 1.4115789473684212e-05, "step": 559 }, { "epoch": 1.4700854700854702, "high_lr": 0.0007057894736842105, "low_lr": 1.4115789473684212e-05, "step": 559 }, { "epoch": 1.4700854700854702, "high_lr": 0.0007057894736842105, "low_lr": 1.4115789473684212e-05, "step": 559 }, { "epoch": 1.472715318869165, "grad_norm": 1.0818630456924438, "learning_rate": 0.0007052631578947368, "loss": 1.5016, "step": 560 }, { "epoch": 1.472715318869165, "high_lr": 0.0007052631578947368, "low_lr": 1.4105263157894738e-05, "step": 560 }, { "epoch": 1.472715318869165, "high_lr": 0.0007052631578947368, "low_lr": 1.4105263157894738e-05, "step": 560 }, { "epoch": 1.472715318869165, "high_lr": 0.0007052631578947368, "low_lr": 1.4105263157894738e-05, "step": 560 }, { "epoch": 1.472715318869165, "high_lr": 0.0007052631578947368, "low_lr": 1.4105263157894738e-05, "step": 560 }, { "epoch": 1.472715318869165, "high_lr": 0.0007052631578947368, "low_lr": 1.4105263157894738e-05, "step": 560 }, { "epoch": 1.472715318869165, "high_lr": 0.0007052631578947368, "low_lr": 1.4105263157894738e-05, "step": 560 }, { "epoch": 1.472715318869165, "high_lr": 0.0007052631578947368, "low_lr": 1.4105263157894738e-05, "step": 560 }, { "epoch": 1.472715318869165, "high_lr": 0.0007052631578947368, "low_lr": 1.4105263157894738e-05, "step": 560 }, { "epoch": 1.47534516765286, "grad_norm": 2.5597925186157227, "learning_rate": 0.0007047368421052631, "loss": 1.5734, "step": 561 }, { "epoch": 1.47534516765286, "high_lr": 0.0007047368421052631, "low_lr": 1.4094736842105263e-05, "step": 561 }, { "epoch": 1.47534516765286, "high_lr": 0.0007047368421052631, "low_lr": 1.4094736842105263e-05, "step": 561 }, { "epoch": 1.47534516765286, "high_lr": 0.0007047368421052631, "low_lr": 1.4094736842105263e-05, "step": 561 }, { "epoch": 1.47534516765286, "high_lr": 0.0007047368421052631, "low_lr": 1.4094736842105263e-05, "step": 561 }, { "epoch": 1.47534516765286, "high_lr": 0.0007047368421052631, "low_lr": 1.4094736842105263e-05, "step": 561 }, { "epoch": 1.47534516765286, "high_lr": 0.0007047368421052631, "low_lr": 1.4094736842105263e-05, "step": 561 }, { "epoch": 1.47534516765286, "high_lr": 0.0007047368421052631, "low_lr": 1.4094736842105263e-05, "step": 561 }, { "epoch": 1.47534516765286, "high_lr": 0.0007047368421052631, "low_lr": 1.4094736842105263e-05, "step": 561 }, { "epoch": 1.4779750164365548, "grad_norm": 1.0145095586776733, "learning_rate": 0.0007042105263157895, "loss": 1.5125, "step": 562 }, { "epoch": 1.4779750164365548, "high_lr": 0.0007042105263157895, "low_lr": 1.4084210526315791e-05, "step": 562 }, { "epoch": 1.4779750164365548, "high_lr": 0.0007042105263157895, "low_lr": 1.4084210526315791e-05, "step": 562 }, { "epoch": 1.4779750164365548, "high_lr": 0.0007042105263157895, "low_lr": 1.4084210526315791e-05, "step": 562 }, { "epoch": 1.4779750164365548, "high_lr": 0.0007042105263157895, "low_lr": 1.4084210526315791e-05, "step": 562 }, { "epoch": 1.4779750164365548, "high_lr": 0.0007042105263157895, "low_lr": 1.4084210526315791e-05, "step": 562 }, { "epoch": 1.4779750164365548, "high_lr": 0.0007042105263157895, "low_lr": 1.4084210526315791e-05, "step": 562 }, { "epoch": 1.4779750164365548, "high_lr": 0.0007042105263157895, "low_lr": 1.4084210526315791e-05, "step": 562 }, { "epoch": 1.4779750164365548, "high_lr": 0.0007042105263157895, "low_lr": 1.4084210526315791e-05, "step": 562 }, { "epoch": 1.4806048652202497, "grad_norm": 75.1147689819336, "learning_rate": 0.0007036842105263158, "loss": 1.4529, "step": 563 }, { "epoch": 1.4806048652202497, "high_lr": 0.0007036842105263158, "low_lr": 1.4073684210526317e-05, "step": 563 }, { "epoch": 1.4806048652202497, "high_lr": 0.0007036842105263158, "low_lr": 1.4073684210526317e-05, "step": 563 }, { "epoch": 1.4806048652202497, "high_lr": 0.0007036842105263158, "low_lr": 1.4073684210526317e-05, "step": 563 }, { "epoch": 1.4806048652202497, "high_lr": 0.0007036842105263158, "low_lr": 1.4073684210526317e-05, "step": 563 }, { "epoch": 1.4806048652202497, "high_lr": 0.0007036842105263158, "low_lr": 1.4073684210526317e-05, "step": 563 }, { "epoch": 1.4806048652202497, "high_lr": 0.0007036842105263158, "low_lr": 1.4073684210526317e-05, "step": 563 }, { "epoch": 1.4806048652202497, "high_lr": 0.0007036842105263158, "low_lr": 1.4073684210526317e-05, "step": 563 }, { "epoch": 1.4806048652202497, "high_lr": 0.0007036842105263158, "low_lr": 1.4073684210526317e-05, "step": 563 }, { "epoch": 1.4832347140039448, "grad_norm": 1.159050464630127, "learning_rate": 0.0007031578947368421, "loss": 1.5372, "step": 564 }, { "epoch": 1.4832347140039448, "high_lr": 0.0007031578947368421, "low_lr": 1.4063157894736844e-05, "step": 564 }, { "epoch": 1.4832347140039448, "high_lr": 0.0007031578947368421, "low_lr": 1.4063157894736844e-05, "step": 564 }, { "epoch": 1.4832347140039448, "high_lr": 0.0007031578947368421, "low_lr": 1.4063157894736844e-05, "step": 564 }, { "epoch": 1.4832347140039448, "high_lr": 0.0007031578947368421, "low_lr": 1.4063157894736844e-05, "step": 564 }, { "epoch": 1.4832347140039448, "high_lr": 0.0007031578947368421, "low_lr": 1.4063157894736844e-05, "step": 564 }, { "epoch": 1.4832347140039448, "high_lr": 0.0007031578947368421, "low_lr": 1.4063157894736844e-05, "step": 564 }, { "epoch": 1.4832347140039448, "high_lr": 0.0007031578947368421, "low_lr": 1.4063157894736844e-05, "step": 564 }, { "epoch": 1.4832347140039448, "high_lr": 0.0007031578947368421, "low_lr": 1.4063157894736844e-05, "step": 564 }, { "epoch": 1.4858645627876397, "grad_norm": 1.106245994567871, "learning_rate": 0.0007026315789473683, "loss": 1.51, "step": 565 }, { "epoch": 1.4858645627876397, "high_lr": 0.0007026315789473683, "low_lr": 1.4052631578947368e-05, "step": 565 }, { "epoch": 1.4858645627876397, "high_lr": 0.0007026315789473683, "low_lr": 1.4052631578947368e-05, "step": 565 }, { "epoch": 1.4858645627876397, "high_lr": 0.0007026315789473683, "low_lr": 1.4052631578947368e-05, "step": 565 }, { "epoch": 1.4858645627876397, "high_lr": 0.0007026315789473683, "low_lr": 1.4052631578947368e-05, "step": 565 }, { "epoch": 1.4858645627876397, "high_lr": 0.0007026315789473683, "low_lr": 1.4052631578947368e-05, "step": 565 }, { "epoch": 1.4858645627876397, "high_lr": 0.0007026315789473683, "low_lr": 1.4052631578947368e-05, "step": 565 }, { "epoch": 1.4858645627876397, "high_lr": 0.0007026315789473683, "low_lr": 1.4052631578947368e-05, "step": 565 }, { "epoch": 1.4858645627876397, "high_lr": 0.0007026315789473683, "low_lr": 1.4052631578947368e-05, "step": 565 }, { "epoch": 1.4884944115713346, "grad_norm": 1.1369872093200684, "learning_rate": 0.0007021052631578948, "loss": 1.4915, "step": 566 }, { "epoch": 1.4884944115713346, "high_lr": 0.0007021052631578948, "low_lr": 1.4042105263157896e-05, "step": 566 }, { "epoch": 1.4884944115713346, "high_lr": 0.0007021052631578948, "low_lr": 1.4042105263157896e-05, "step": 566 }, { "epoch": 1.4884944115713346, "high_lr": 0.0007021052631578948, "low_lr": 1.4042105263157896e-05, "step": 566 }, { "epoch": 1.4884944115713346, "high_lr": 0.0007021052631578948, "low_lr": 1.4042105263157896e-05, "step": 566 }, { "epoch": 1.4884944115713346, "high_lr": 0.0007021052631578948, "low_lr": 1.4042105263157896e-05, "step": 566 }, { "epoch": 1.4884944115713346, "high_lr": 0.0007021052631578948, "low_lr": 1.4042105263157896e-05, "step": 566 }, { "epoch": 1.4884944115713346, "high_lr": 0.0007021052631578948, "low_lr": 1.4042105263157896e-05, "step": 566 }, { "epoch": 1.4884944115713346, "high_lr": 0.0007021052631578948, "low_lr": 1.4042105263157896e-05, "step": 566 }, { "epoch": 1.4911242603550297, "grad_norm": 1.026293396949768, "learning_rate": 0.0007015789473684211, "loss": 1.476, "step": 567 }, { "epoch": 1.4911242603550297, "high_lr": 0.0007015789473684211, "low_lr": 1.4031578947368423e-05, "step": 567 }, { "epoch": 1.4911242603550297, "high_lr": 0.0007015789473684211, "low_lr": 1.4031578947368423e-05, "step": 567 }, { "epoch": 1.4911242603550297, "high_lr": 0.0007015789473684211, "low_lr": 1.4031578947368423e-05, "step": 567 }, { "epoch": 1.4911242603550297, "high_lr": 0.0007015789473684211, "low_lr": 1.4031578947368423e-05, "step": 567 }, { "epoch": 1.4911242603550297, "high_lr": 0.0007015789473684211, "low_lr": 1.4031578947368423e-05, "step": 567 }, { "epoch": 1.4911242603550297, "high_lr": 0.0007015789473684211, "low_lr": 1.4031578947368423e-05, "step": 567 }, { "epoch": 1.4911242603550297, "high_lr": 0.0007015789473684211, "low_lr": 1.4031578947368423e-05, "step": 567 }, { "epoch": 1.4911242603550297, "high_lr": 0.0007015789473684211, "low_lr": 1.4031578947368423e-05, "step": 567 }, { "epoch": 1.4937541091387245, "grad_norm": 1.069638729095459, "learning_rate": 0.0007010526315789474, "loss": 1.488, "step": 568 }, { "epoch": 1.4937541091387245, "high_lr": 0.0007010526315789474, "low_lr": 1.4021052631578949e-05, "step": 568 }, { "epoch": 1.4937541091387245, "high_lr": 0.0007010526315789474, "low_lr": 1.4021052631578949e-05, "step": 568 }, { "epoch": 1.4937541091387245, "high_lr": 0.0007010526315789474, "low_lr": 1.4021052631578949e-05, "step": 568 }, { "epoch": 1.4937541091387245, "high_lr": 0.0007010526315789474, "low_lr": 1.4021052631578949e-05, "step": 568 }, { "epoch": 1.4937541091387245, "high_lr": 0.0007010526315789474, "low_lr": 1.4021052631578949e-05, "step": 568 }, { "epoch": 1.4937541091387245, "high_lr": 0.0007010526315789474, "low_lr": 1.4021052631578949e-05, "step": 568 }, { "epoch": 1.4937541091387245, "high_lr": 0.0007010526315789474, "low_lr": 1.4021052631578949e-05, "step": 568 }, { "epoch": 1.4937541091387245, "high_lr": 0.0007010526315789474, "low_lr": 1.4021052631578949e-05, "step": 568 }, { "epoch": 1.4963839579224194, "grad_norm": 1.0903842449188232, "learning_rate": 0.0007005263157894737, "loss": 1.5264, "step": 569 }, { "epoch": 1.4963839579224194, "high_lr": 0.0007005263157894737, "low_lr": 1.4010526315789475e-05, "step": 569 }, { "epoch": 1.4963839579224194, "high_lr": 0.0007005263157894737, "low_lr": 1.4010526315789475e-05, "step": 569 }, { "epoch": 1.4963839579224194, "high_lr": 0.0007005263157894737, "low_lr": 1.4010526315789475e-05, "step": 569 }, { "epoch": 1.4963839579224194, "high_lr": 0.0007005263157894737, "low_lr": 1.4010526315789475e-05, "step": 569 }, { "epoch": 1.4963839579224194, "high_lr": 0.0007005263157894737, "low_lr": 1.4010526315789475e-05, "step": 569 }, { "epoch": 1.4963839579224194, "high_lr": 0.0007005263157894737, "low_lr": 1.4010526315789475e-05, "step": 569 }, { "epoch": 1.4963839579224194, "high_lr": 0.0007005263157894737, "low_lr": 1.4010526315789475e-05, "step": 569 }, { "epoch": 1.4963839579224194, "high_lr": 0.0007005263157894737, "low_lr": 1.4010526315789475e-05, "step": 569 }, { "epoch": 1.4990138067061145, "grad_norm": 1.1938930749893188, "learning_rate": 0.0007, "loss": 1.5603, "step": 570 }, { "epoch": 1.4990138067061145, "high_lr": 0.0007, "low_lr": 1.4e-05, "step": 570 }, { "epoch": 1.4990138067061145, "high_lr": 0.0007, "low_lr": 1.4e-05, "step": 570 }, { "epoch": 1.4990138067061145, "high_lr": 0.0007, "low_lr": 1.4e-05, "step": 570 }, { "epoch": 1.4990138067061145, "high_lr": 0.0007, "low_lr": 1.4e-05, "step": 570 }, { "epoch": 1.4990138067061145, "high_lr": 0.0007, "low_lr": 1.4e-05, "step": 570 }, { "epoch": 1.4990138067061145, "high_lr": 0.0007, "low_lr": 1.4e-05, "step": 570 }, { "epoch": 1.4990138067061145, "high_lr": 0.0007, "low_lr": 1.4e-05, "step": 570 }, { "epoch": 1.4990138067061145, "high_lr": 0.0007, "low_lr": 1.4e-05, "step": 570 }, { "epoch": 1.5016436554898094, "grad_norm": 1.3760454654693604, "learning_rate": 0.0006994736842105264, "loss": 1.5244, "step": 571 }, { "epoch": 1.5016436554898094, "high_lr": 0.0006994736842105264, "low_lr": 1.3989473684210528e-05, "step": 571 }, { "epoch": 1.5016436554898094, "high_lr": 0.0006994736842105264, "low_lr": 1.3989473684210528e-05, "step": 571 }, { "epoch": 1.5016436554898094, "high_lr": 0.0006994736842105264, "low_lr": 1.3989473684210528e-05, "step": 571 }, { "epoch": 1.5016436554898094, "high_lr": 0.0006994736842105264, "low_lr": 1.3989473684210528e-05, "step": 571 }, { "epoch": 1.5016436554898094, "high_lr": 0.0006994736842105264, "low_lr": 1.3989473684210528e-05, "step": 571 }, { "epoch": 1.5016436554898094, "high_lr": 0.0006994736842105264, "low_lr": 1.3989473684210528e-05, "step": 571 }, { "epoch": 1.5016436554898094, "high_lr": 0.0006994736842105264, "low_lr": 1.3989473684210528e-05, "step": 571 }, { "epoch": 1.5016436554898094, "high_lr": 0.0006994736842105264, "low_lr": 1.3989473684210528e-05, "step": 571 }, { "epoch": 1.5042735042735043, "grad_norm": 1.042982578277588, "learning_rate": 0.0006989473684210527, "loss": 1.441, "step": 572 }, { "epoch": 1.5042735042735043, "high_lr": 0.0006989473684210527, "low_lr": 1.3978947368421054e-05, "step": 572 }, { "epoch": 1.5042735042735043, "high_lr": 0.0006989473684210527, "low_lr": 1.3978947368421054e-05, "step": 572 }, { "epoch": 1.5042735042735043, "high_lr": 0.0006989473684210527, "low_lr": 1.3978947368421054e-05, "step": 572 }, { "epoch": 1.5042735042735043, "high_lr": 0.0006989473684210527, "low_lr": 1.3978947368421054e-05, "step": 572 }, { "epoch": 1.5042735042735043, "high_lr": 0.0006989473684210527, "low_lr": 1.3978947368421054e-05, "step": 572 }, { "epoch": 1.5042735042735043, "high_lr": 0.0006989473684210527, "low_lr": 1.3978947368421054e-05, "step": 572 }, { "epoch": 1.5042735042735043, "high_lr": 0.0006989473684210527, "low_lr": 1.3978947368421054e-05, "step": 572 }, { "epoch": 1.5042735042735043, "high_lr": 0.0006989473684210527, "low_lr": 1.3978947368421054e-05, "step": 572 }, { "epoch": 1.5069033530571994, "grad_norm": 1.1798406839370728, "learning_rate": 0.000698421052631579, "loss": 1.478, "step": 573 }, { "epoch": 1.5069033530571994, "high_lr": 0.000698421052631579, "low_lr": 1.396842105263158e-05, "step": 573 }, { "epoch": 1.5069033530571994, "high_lr": 0.000698421052631579, "low_lr": 1.396842105263158e-05, "step": 573 }, { "epoch": 1.5069033530571994, "high_lr": 0.000698421052631579, "low_lr": 1.396842105263158e-05, "step": 573 }, { "epoch": 1.5069033530571994, "high_lr": 0.000698421052631579, "low_lr": 1.396842105263158e-05, "step": 573 }, { "epoch": 1.5069033530571994, "high_lr": 0.000698421052631579, "low_lr": 1.396842105263158e-05, "step": 573 }, { "epoch": 1.5069033530571994, "high_lr": 0.000698421052631579, "low_lr": 1.396842105263158e-05, "step": 573 }, { "epoch": 1.5069033530571994, "high_lr": 0.000698421052631579, "low_lr": 1.396842105263158e-05, "step": 573 }, { "epoch": 1.5069033530571994, "high_lr": 0.000698421052631579, "low_lr": 1.396842105263158e-05, "step": 573 }, { "epoch": 1.509533201840894, "grad_norm": 1.2408303022384644, "learning_rate": 0.0006978947368421052, "loss": 1.4939, "step": 574 }, { "epoch": 1.509533201840894, "high_lr": 0.0006978947368421052, "low_lr": 1.3957894736842105e-05, "step": 574 }, { "epoch": 1.509533201840894, "high_lr": 0.0006978947368421052, "low_lr": 1.3957894736842105e-05, "step": 574 }, { "epoch": 1.509533201840894, "high_lr": 0.0006978947368421052, "low_lr": 1.3957894736842105e-05, "step": 574 }, { "epoch": 1.509533201840894, "high_lr": 0.0006978947368421052, "low_lr": 1.3957894736842105e-05, "step": 574 }, { "epoch": 1.509533201840894, "high_lr": 0.0006978947368421052, "low_lr": 1.3957894736842105e-05, "step": 574 }, { "epoch": 1.509533201840894, "high_lr": 0.0006978947368421052, "low_lr": 1.3957894736842105e-05, "step": 574 }, { "epoch": 1.509533201840894, "high_lr": 0.0006978947368421052, "low_lr": 1.3957894736842105e-05, "step": 574 }, { "epoch": 1.509533201840894, "high_lr": 0.0006978947368421052, "low_lr": 1.3957894736842105e-05, "step": 574 }, { "epoch": 1.5121630506245891, "grad_norm": 1.1613993644714355, "learning_rate": 0.0006973684210526315, "loss": 1.4693, "step": 575 }, { "epoch": 1.5121630506245891, "high_lr": 0.0006973684210526315, "low_lr": 1.3947368421052631e-05, "step": 575 }, { "epoch": 1.5121630506245891, "high_lr": 0.0006973684210526315, "low_lr": 1.3947368421052631e-05, "step": 575 }, { "epoch": 1.5121630506245891, "high_lr": 0.0006973684210526315, "low_lr": 1.3947368421052631e-05, "step": 575 }, { "epoch": 1.5121630506245891, "high_lr": 0.0006973684210526315, "low_lr": 1.3947368421052631e-05, "step": 575 }, { "epoch": 1.5121630506245891, "high_lr": 0.0006973684210526315, "low_lr": 1.3947368421052631e-05, "step": 575 }, { "epoch": 1.5121630506245891, "high_lr": 0.0006973684210526315, "low_lr": 1.3947368421052631e-05, "step": 575 }, { "epoch": 1.5121630506245891, "high_lr": 0.0006973684210526315, "low_lr": 1.3947368421052631e-05, "step": 575 }, { "epoch": 1.5121630506245891, "high_lr": 0.0006973684210526315, "low_lr": 1.3947368421052631e-05, "step": 575 }, { "epoch": 1.514792899408284, "grad_norm": 1.0958755016326904, "learning_rate": 0.0006968421052631579, "loss": 1.4779, "step": 576 }, { "epoch": 1.514792899408284, "high_lr": 0.0006968421052631579, "low_lr": 1.393684210526316e-05, "step": 576 }, { "epoch": 1.514792899408284, "high_lr": 0.0006968421052631579, "low_lr": 1.393684210526316e-05, "step": 576 }, { "epoch": 1.514792899408284, "high_lr": 0.0006968421052631579, "low_lr": 1.393684210526316e-05, "step": 576 }, { "epoch": 1.514792899408284, "high_lr": 0.0006968421052631579, "low_lr": 1.393684210526316e-05, "step": 576 }, { "epoch": 1.514792899408284, "high_lr": 0.0006968421052631579, "low_lr": 1.393684210526316e-05, "step": 576 }, { "epoch": 1.514792899408284, "high_lr": 0.0006968421052631579, "low_lr": 1.393684210526316e-05, "step": 576 }, { "epoch": 1.514792899408284, "high_lr": 0.0006968421052631579, "low_lr": 1.393684210526316e-05, "step": 576 }, { "epoch": 1.514792899408284, "high_lr": 0.0006968421052631579, "low_lr": 1.393684210526316e-05, "step": 576 }, { "epoch": 1.5174227481919789, "grad_norm": 1.1317672729492188, "learning_rate": 0.0006963157894736842, "loss": 1.472, "step": 577 }, { "epoch": 1.5174227481919789, "high_lr": 0.0006963157894736842, "low_lr": 1.3926315789473686e-05, "step": 577 }, { "epoch": 1.5174227481919789, "high_lr": 0.0006963157894736842, "low_lr": 1.3926315789473686e-05, "step": 577 }, { "epoch": 1.5174227481919789, "high_lr": 0.0006963157894736842, "low_lr": 1.3926315789473686e-05, "step": 577 }, { "epoch": 1.5174227481919789, "high_lr": 0.0006963157894736842, "low_lr": 1.3926315789473686e-05, "step": 577 }, { "epoch": 1.5174227481919789, "high_lr": 0.0006963157894736842, "low_lr": 1.3926315789473686e-05, "step": 577 }, { "epoch": 1.5174227481919789, "high_lr": 0.0006963157894736842, "low_lr": 1.3926315789473686e-05, "step": 577 }, { "epoch": 1.5174227481919789, "high_lr": 0.0006963157894736842, "low_lr": 1.3926315789473686e-05, "step": 577 }, { "epoch": 1.5174227481919789, "high_lr": 0.0006963157894736842, "low_lr": 1.3926315789473686e-05, "step": 577 }, { "epoch": 1.520052596975674, "grad_norm": 1.1417269706726074, "learning_rate": 0.0006957894736842105, "loss": 1.4846, "step": 578 }, { "epoch": 1.520052596975674, "high_lr": 0.0006957894736842105, "low_lr": 1.3915789473684212e-05, "step": 578 }, { "epoch": 1.520052596975674, "high_lr": 0.0006957894736842105, "low_lr": 1.3915789473684212e-05, "step": 578 }, { "epoch": 1.520052596975674, "high_lr": 0.0006957894736842105, "low_lr": 1.3915789473684212e-05, "step": 578 }, { "epoch": 1.520052596975674, "high_lr": 0.0006957894736842105, "low_lr": 1.3915789473684212e-05, "step": 578 }, { "epoch": 1.520052596975674, "high_lr": 0.0006957894736842105, "low_lr": 1.3915789473684212e-05, "step": 578 }, { "epoch": 1.520052596975674, "high_lr": 0.0006957894736842105, "low_lr": 1.3915789473684212e-05, "step": 578 }, { "epoch": 1.520052596975674, "high_lr": 0.0006957894736842105, "low_lr": 1.3915789473684212e-05, "step": 578 }, { "epoch": 1.520052596975674, "high_lr": 0.0006957894736842105, "low_lr": 1.3915789473684212e-05, "step": 578 }, { "epoch": 1.5226824457593688, "grad_norm": 4.384186267852783, "learning_rate": 0.0006952631578947368, "loss": 1.497, "step": 579 }, { "epoch": 1.5226824457593688, "high_lr": 0.0006952631578947368, "low_lr": 1.3905263157894737e-05, "step": 579 }, { "epoch": 1.5226824457593688, "high_lr": 0.0006952631578947368, "low_lr": 1.3905263157894737e-05, "step": 579 }, { "epoch": 1.5226824457593688, "high_lr": 0.0006952631578947368, "low_lr": 1.3905263157894737e-05, "step": 579 }, { "epoch": 1.5226824457593688, "high_lr": 0.0006952631578947368, "low_lr": 1.3905263157894737e-05, "step": 579 }, { "epoch": 1.5226824457593688, "high_lr": 0.0006952631578947368, "low_lr": 1.3905263157894737e-05, "step": 579 }, { "epoch": 1.5226824457593688, "high_lr": 0.0006952631578947368, "low_lr": 1.3905263157894737e-05, "step": 579 }, { "epoch": 1.5226824457593688, "high_lr": 0.0006952631578947368, "low_lr": 1.3905263157894737e-05, "step": 579 }, { "epoch": 1.5226824457593688, "high_lr": 0.0006952631578947368, "low_lr": 1.3905263157894737e-05, "step": 579 }, { "epoch": 1.5253122945430637, "grad_norm": 1.1421034336090088, "learning_rate": 0.0006947368421052632, "loss": 1.5128, "step": 580 }, { "epoch": 1.5253122945430637, "high_lr": 0.0006947368421052632, "low_lr": 1.3894736842105265e-05, "step": 580 }, { "epoch": 1.5253122945430637, "high_lr": 0.0006947368421052632, "low_lr": 1.3894736842105265e-05, "step": 580 }, { "epoch": 1.5253122945430637, "high_lr": 0.0006947368421052632, "low_lr": 1.3894736842105265e-05, "step": 580 }, { "epoch": 1.5253122945430637, "high_lr": 0.0006947368421052632, "low_lr": 1.3894736842105265e-05, "step": 580 }, { "epoch": 1.5253122945430637, "high_lr": 0.0006947368421052632, "low_lr": 1.3894736842105265e-05, "step": 580 }, { "epoch": 1.5253122945430637, "high_lr": 0.0006947368421052632, "low_lr": 1.3894736842105265e-05, "step": 580 }, { "epoch": 1.5253122945430637, "high_lr": 0.0006947368421052632, "low_lr": 1.3894736842105265e-05, "step": 580 }, { "epoch": 1.5253122945430637, "high_lr": 0.0006947368421052632, "low_lr": 1.3894736842105265e-05, "step": 580 }, { "epoch": 1.5279421433267588, "grad_norm": 1.2075952291488647, "learning_rate": 0.0006942105263157895, "loss": 1.5414, "step": 581 }, { "epoch": 1.5279421433267588, "high_lr": 0.0006942105263157895, "low_lr": 1.3884210526315791e-05, "step": 581 }, { "epoch": 1.5279421433267588, "high_lr": 0.0006942105263157895, "low_lr": 1.3884210526315791e-05, "step": 581 }, { "epoch": 1.5279421433267588, "high_lr": 0.0006942105263157895, "low_lr": 1.3884210526315791e-05, "step": 581 }, { "epoch": 1.5279421433267588, "high_lr": 0.0006942105263157895, "low_lr": 1.3884210526315791e-05, "step": 581 }, { "epoch": 1.5279421433267588, "high_lr": 0.0006942105263157895, "low_lr": 1.3884210526315791e-05, "step": 581 }, { "epoch": 1.5279421433267588, "high_lr": 0.0006942105263157895, "low_lr": 1.3884210526315791e-05, "step": 581 }, { "epoch": 1.5279421433267588, "high_lr": 0.0006942105263157895, "low_lr": 1.3884210526315791e-05, "step": 581 }, { "epoch": 1.5279421433267588, "high_lr": 0.0006942105263157895, "low_lr": 1.3884210526315791e-05, "step": 581 }, { "epoch": 1.5305719921104537, "grad_norm": 1.0938867330551147, "learning_rate": 0.0006936842105263159, "loss": 1.4475, "step": 582 }, { "epoch": 1.5305719921104537, "high_lr": 0.0006936842105263159, "low_lr": 1.3873684210526317e-05, "step": 582 }, { "epoch": 1.5305719921104537, "high_lr": 0.0006936842105263159, "low_lr": 1.3873684210526317e-05, "step": 582 }, { "epoch": 1.5305719921104537, "high_lr": 0.0006936842105263159, "low_lr": 1.3873684210526317e-05, "step": 582 }, { "epoch": 1.5305719921104537, "high_lr": 0.0006936842105263159, "low_lr": 1.3873684210526317e-05, "step": 582 }, { "epoch": 1.5305719921104537, "high_lr": 0.0006936842105263159, "low_lr": 1.3873684210526317e-05, "step": 582 }, { "epoch": 1.5305719921104537, "high_lr": 0.0006936842105263159, "low_lr": 1.3873684210526317e-05, "step": 582 }, { "epoch": 1.5305719921104537, "high_lr": 0.0006936842105263159, "low_lr": 1.3873684210526317e-05, "step": 582 }, { "epoch": 1.5305719921104537, "high_lr": 0.0006936842105263159, "low_lr": 1.3873684210526317e-05, "step": 582 }, { "epoch": 1.5332018408941486, "grad_norm": 1.0829936265945435, "learning_rate": 0.0006931578947368421, "loss": 1.436, "step": 583 }, { "epoch": 1.5332018408941486, "high_lr": 0.0006931578947368421, "low_lr": 1.3863157894736842e-05, "step": 583 }, { "epoch": 1.5332018408941486, "high_lr": 0.0006931578947368421, "low_lr": 1.3863157894736842e-05, "step": 583 }, { "epoch": 1.5332018408941486, "high_lr": 0.0006931578947368421, "low_lr": 1.3863157894736842e-05, "step": 583 }, { "epoch": 1.5332018408941486, "high_lr": 0.0006931578947368421, "low_lr": 1.3863157894736842e-05, "step": 583 }, { "epoch": 1.5332018408941486, "high_lr": 0.0006931578947368421, "low_lr": 1.3863157894736842e-05, "step": 583 }, { "epoch": 1.5332018408941486, "high_lr": 0.0006931578947368421, "low_lr": 1.3863157894736842e-05, "step": 583 }, { "epoch": 1.5332018408941486, "high_lr": 0.0006931578947368421, "low_lr": 1.3863157894736842e-05, "step": 583 }, { "epoch": 1.5332018408941486, "high_lr": 0.0006931578947368421, "low_lr": 1.3863157894736842e-05, "step": 583 }, { "epoch": 1.5358316896778437, "grad_norm": 1.1421469449996948, "learning_rate": 0.0006926315789473684, "loss": 1.4937, "step": 584 }, { "epoch": 1.5358316896778437, "high_lr": 0.0006926315789473684, "low_lr": 1.3852631578947368e-05, "step": 584 }, { "epoch": 1.5358316896778437, "high_lr": 0.0006926315789473684, "low_lr": 1.3852631578947368e-05, "step": 584 }, { "epoch": 1.5358316896778437, "high_lr": 0.0006926315789473684, "low_lr": 1.3852631578947368e-05, "step": 584 }, { "epoch": 1.5358316896778437, "high_lr": 0.0006926315789473684, "low_lr": 1.3852631578947368e-05, "step": 584 }, { "epoch": 1.5358316896778437, "high_lr": 0.0006926315789473684, "low_lr": 1.3852631578947368e-05, "step": 584 }, { "epoch": 1.5358316896778437, "high_lr": 0.0006926315789473684, "low_lr": 1.3852631578947368e-05, "step": 584 }, { "epoch": 1.5358316896778437, "high_lr": 0.0006926315789473684, "low_lr": 1.3852631578947368e-05, "step": 584 }, { "epoch": 1.5358316896778437, "high_lr": 0.0006926315789473684, "low_lr": 1.3852631578947368e-05, "step": 584 }, { "epoch": 1.5384615384615383, "grad_norm": 1.1205614805221558, "learning_rate": 0.0006921052631578948, "loss": 1.5554, "step": 585 }, { "epoch": 1.5384615384615383, "high_lr": 0.0006921052631578948, "low_lr": 1.3842105263157896e-05, "step": 585 }, { "epoch": 1.5384615384615383, "high_lr": 0.0006921052631578948, "low_lr": 1.3842105263157896e-05, "step": 585 }, { "epoch": 1.5384615384615383, "high_lr": 0.0006921052631578948, "low_lr": 1.3842105263157896e-05, "step": 585 }, { "epoch": 1.5384615384615383, "high_lr": 0.0006921052631578948, "low_lr": 1.3842105263157896e-05, "step": 585 }, { "epoch": 1.5384615384615383, "high_lr": 0.0006921052631578948, "low_lr": 1.3842105263157896e-05, "step": 585 }, { "epoch": 1.5384615384615383, "high_lr": 0.0006921052631578948, "low_lr": 1.3842105263157896e-05, "step": 585 }, { "epoch": 1.5384615384615383, "high_lr": 0.0006921052631578948, "low_lr": 1.3842105263157896e-05, "step": 585 }, { "epoch": 1.5384615384615383, "high_lr": 0.0006921052631578948, "low_lr": 1.3842105263157896e-05, "step": 585 }, { "epoch": 1.5410913872452334, "grad_norm": 1.1230958700180054, "learning_rate": 0.0006915789473684211, "loss": 1.4586, "step": 586 }, { "epoch": 1.5410913872452334, "high_lr": 0.0006915789473684211, "low_lr": 1.3831578947368423e-05, "step": 586 }, { "epoch": 1.5410913872452334, "high_lr": 0.0006915789473684211, "low_lr": 1.3831578947368423e-05, "step": 586 }, { "epoch": 1.5410913872452334, "high_lr": 0.0006915789473684211, "low_lr": 1.3831578947368423e-05, "step": 586 }, { "epoch": 1.5410913872452334, "high_lr": 0.0006915789473684211, "low_lr": 1.3831578947368423e-05, "step": 586 }, { "epoch": 1.5410913872452334, "high_lr": 0.0006915789473684211, "low_lr": 1.3831578947368423e-05, "step": 586 }, { "epoch": 1.5410913872452334, "high_lr": 0.0006915789473684211, "low_lr": 1.3831578947368423e-05, "step": 586 }, { "epoch": 1.5410913872452334, "high_lr": 0.0006915789473684211, "low_lr": 1.3831578947368423e-05, "step": 586 }, { "epoch": 1.5410913872452334, "high_lr": 0.0006915789473684211, "low_lr": 1.3831578947368423e-05, "step": 586 }, { "epoch": 1.5437212360289283, "grad_norm": 1.1113606691360474, "learning_rate": 0.0006910526315789474, "loss": 1.4812, "step": 587 }, { "epoch": 1.5437212360289283, "high_lr": 0.0006910526315789474, "low_lr": 1.3821052631578949e-05, "step": 587 }, { "epoch": 1.5437212360289283, "high_lr": 0.0006910526315789474, "low_lr": 1.3821052631578949e-05, "step": 587 }, { "epoch": 1.5437212360289283, "high_lr": 0.0006910526315789474, "low_lr": 1.3821052631578949e-05, "step": 587 }, { "epoch": 1.5437212360289283, "high_lr": 0.0006910526315789474, "low_lr": 1.3821052631578949e-05, "step": 587 }, { "epoch": 1.5437212360289283, "high_lr": 0.0006910526315789474, "low_lr": 1.3821052631578949e-05, "step": 587 }, { "epoch": 1.5437212360289283, "high_lr": 0.0006910526315789474, "low_lr": 1.3821052631578949e-05, "step": 587 }, { "epoch": 1.5437212360289283, "high_lr": 0.0006910526315789474, "low_lr": 1.3821052631578949e-05, "step": 587 }, { "epoch": 1.5437212360289283, "high_lr": 0.0006910526315789474, "low_lr": 1.3821052631578949e-05, "step": 587 }, { "epoch": 1.5463510848126232, "grad_norm": 1.0749051570892334, "learning_rate": 0.0006905263157894737, "loss": 1.4668, "step": 588 }, { "epoch": 1.5463510848126232, "high_lr": 0.0006905263157894737, "low_lr": 1.3810526315789474e-05, "step": 588 }, { "epoch": 1.5463510848126232, "high_lr": 0.0006905263157894737, "low_lr": 1.3810526315789474e-05, "step": 588 }, { "epoch": 1.5463510848126232, "high_lr": 0.0006905263157894737, "low_lr": 1.3810526315789474e-05, "step": 588 }, { "epoch": 1.5463510848126232, "high_lr": 0.0006905263157894737, "low_lr": 1.3810526315789474e-05, "step": 588 }, { "epoch": 1.5463510848126232, "high_lr": 0.0006905263157894737, "low_lr": 1.3810526315789474e-05, "step": 588 }, { "epoch": 1.5463510848126232, "high_lr": 0.0006905263157894737, "low_lr": 1.3810526315789474e-05, "step": 588 }, { "epoch": 1.5463510848126232, "high_lr": 0.0006905263157894737, "low_lr": 1.3810526315789474e-05, "step": 588 }, { "epoch": 1.5463510848126232, "high_lr": 0.0006905263157894737, "low_lr": 1.3810526315789474e-05, "step": 588 }, { "epoch": 1.5489809335963183, "grad_norm": 1.0763673782348633, "learning_rate": 0.00069, "loss": 1.5195, "step": 589 }, { "epoch": 1.5489809335963183, "high_lr": 0.00069, "low_lr": 1.38e-05, "step": 589 }, { "epoch": 1.5489809335963183, "high_lr": 0.00069, "low_lr": 1.38e-05, "step": 589 }, { "epoch": 1.5489809335963183, "high_lr": 0.00069, "low_lr": 1.38e-05, "step": 589 }, { "epoch": 1.5489809335963183, "high_lr": 0.00069, "low_lr": 1.38e-05, "step": 589 }, { "epoch": 1.5489809335963183, "high_lr": 0.00069, "low_lr": 1.38e-05, "step": 589 }, { "epoch": 1.5489809335963183, "high_lr": 0.00069, "low_lr": 1.38e-05, "step": 589 }, { "epoch": 1.5489809335963183, "high_lr": 0.00069, "low_lr": 1.38e-05, "step": 589 }, { "epoch": 1.5489809335963183, "high_lr": 0.00069, "low_lr": 1.38e-05, "step": 589 }, { "epoch": 1.5516107823800132, "grad_norm": 1.023079752922058, "learning_rate": 0.0006894736842105264, "loss": 1.4612, "step": 590 }, { "epoch": 1.5516107823800132, "high_lr": 0.0006894736842105264, "low_lr": 1.3789473684210528e-05, "step": 590 }, { "epoch": 1.5516107823800132, "high_lr": 0.0006894736842105264, "low_lr": 1.3789473684210528e-05, "step": 590 }, { "epoch": 1.5516107823800132, "high_lr": 0.0006894736842105264, "low_lr": 1.3789473684210528e-05, "step": 590 }, { "epoch": 1.5516107823800132, "high_lr": 0.0006894736842105264, "low_lr": 1.3789473684210528e-05, "step": 590 }, { "epoch": 1.5516107823800132, "high_lr": 0.0006894736842105264, "low_lr": 1.3789473684210528e-05, "step": 590 }, { "epoch": 1.5516107823800132, "high_lr": 0.0006894736842105264, "low_lr": 1.3789473684210528e-05, "step": 590 }, { "epoch": 1.5516107823800132, "high_lr": 0.0006894736842105264, "low_lr": 1.3789473684210528e-05, "step": 590 }, { "epoch": 1.5516107823800132, "high_lr": 0.0006894736842105264, "low_lr": 1.3789473684210528e-05, "step": 590 }, { "epoch": 1.554240631163708, "grad_norm": 1.1420434713363647, "learning_rate": 0.0006889473684210526, "loss": 1.46, "step": 591 }, { "epoch": 1.554240631163708, "high_lr": 0.0006889473684210526, "low_lr": 1.3778947368421054e-05, "step": 591 }, { "epoch": 1.554240631163708, "high_lr": 0.0006889473684210526, "low_lr": 1.3778947368421054e-05, "step": 591 }, { "epoch": 1.554240631163708, "high_lr": 0.0006889473684210526, "low_lr": 1.3778947368421054e-05, "step": 591 }, { "epoch": 1.554240631163708, "high_lr": 0.0006889473684210526, "low_lr": 1.3778947368421054e-05, "step": 591 }, { "epoch": 1.554240631163708, "high_lr": 0.0006889473684210526, "low_lr": 1.3778947368421054e-05, "step": 591 }, { "epoch": 1.554240631163708, "high_lr": 0.0006889473684210526, "low_lr": 1.3778947368421054e-05, "step": 591 }, { "epoch": 1.554240631163708, "high_lr": 0.0006889473684210526, "low_lr": 1.3778947368421054e-05, "step": 591 }, { "epoch": 1.554240631163708, "high_lr": 0.0006889473684210526, "low_lr": 1.3778947368421054e-05, "step": 591 }, { "epoch": 1.5568704799474031, "grad_norm": 1.221968412399292, "learning_rate": 0.0006884210526315789, "loss": 1.4592, "step": 592 }, { "epoch": 1.5568704799474031, "high_lr": 0.0006884210526315789, "low_lr": 1.3768421052631579e-05, "step": 592 }, { "epoch": 1.5568704799474031, "high_lr": 0.0006884210526315789, "low_lr": 1.3768421052631579e-05, "step": 592 }, { "epoch": 1.5568704799474031, "high_lr": 0.0006884210526315789, "low_lr": 1.3768421052631579e-05, "step": 592 }, { "epoch": 1.5568704799474031, "high_lr": 0.0006884210526315789, "low_lr": 1.3768421052631579e-05, "step": 592 }, { "epoch": 1.5568704799474031, "high_lr": 0.0006884210526315789, "low_lr": 1.3768421052631579e-05, "step": 592 }, { "epoch": 1.5568704799474031, "high_lr": 0.0006884210526315789, "low_lr": 1.3768421052631579e-05, "step": 592 }, { "epoch": 1.5568704799474031, "high_lr": 0.0006884210526315789, "low_lr": 1.3768421052631579e-05, "step": 592 }, { "epoch": 1.5568704799474031, "high_lr": 0.0006884210526315789, "low_lr": 1.3768421052631579e-05, "step": 592 }, { "epoch": 1.559500328731098, "grad_norm": 1.0707217454910278, "learning_rate": 0.0006878947368421052, "loss": 1.5016, "step": 593 }, { "epoch": 1.559500328731098, "high_lr": 0.0006878947368421052, "low_lr": 1.3757894736842105e-05, "step": 593 }, { "epoch": 1.559500328731098, "high_lr": 0.0006878947368421052, "low_lr": 1.3757894736842105e-05, "step": 593 }, { "epoch": 1.559500328731098, "high_lr": 0.0006878947368421052, "low_lr": 1.3757894736842105e-05, "step": 593 }, { "epoch": 1.559500328731098, "high_lr": 0.0006878947368421052, "low_lr": 1.3757894736842105e-05, "step": 593 }, { "epoch": 1.559500328731098, "high_lr": 0.0006878947368421052, "low_lr": 1.3757894736842105e-05, "step": 593 }, { "epoch": 1.559500328731098, "high_lr": 0.0006878947368421052, "low_lr": 1.3757894736842105e-05, "step": 593 }, { "epoch": 1.559500328731098, "high_lr": 0.0006878947368421052, "low_lr": 1.3757894736842105e-05, "step": 593 }, { "epoch": 1.559500328731098, "high_lr": 0.0006878947368421052, "low_lr": 1.3757894736842105e-05, "step": 593 }, { "epoch": 1.5621301775147929, "grad_norm": 1.0185551643371582, "learning_rate": 0.0006873684210526316, "loss": 1.4795, "step": 594 }, { "epoch": 1.5621301775147929, "high_lr": 0.0006873684210526316, "low_lr": 1.3747368421052633e-05, "step": 594 }, { "epoch": 1.5621301775147929, "high_lr": 0.0006873684210526316, "low_lr": 1.3747368421052633e-05, "step": 594 }, { "epoch": 1.5621301775147929, "high_lr": 0.0006873684210526316, "low_lr": 1.3747368421052633e-05, "step": 594 }, { "epoch": 1.5621301775147929, "high_lr": 0.0006873684210526316, "low_lr": 1.3747368421052633e-05, "step": 594 }, { "epoch": 1.5621301775147929, "high_lr": 0.0006873684210526316, "low_lr": 1.3747368421052633e-05, "step": 594 }, { "epoch": 1.5621301775147929, "high_lr": 0.0006873684210526316, "low_lr": 1.3747368421052633e-05, "step": 594 }, { "epoch": 1.5621301775147929, "high_lr": 0.0006873684210526316, "low_lr": 1.3747368421052633e-05, "step": 594 }, { "epoch": 1.5621301775147929, "high_lr": 0.0006873684210526316, "low_lr": 1.3747368421052633e-05, "step": 594 }, { "epoch": 1.564760026298488, "grad_norm": 1.0264899730682373, "learning_rate": 0.0006868421052631579, "loss": 1.488, "step": 595 }, { "epoch": 1.564760026298488, "high_lr": 0.0006868421052631579, "low_lr": 1.373684210526316e-05, "step": 595 }, { "epoch": 1.564760026298488, "high_lr": 0.0006868421052631579, "low_lr": 1.373684210526316e-05, "step": 595 }, { "epoch": 1.564760026298488, "high_lr": 0.0006868421052631579, "low_lr": 1.373684210526316e-05, "step": 595 }, { "epoch": 1.564760026298488, "high_lr": 0.0006868421052631579, "low_lr": 1.373684210526316e-05, "step": 595 }, { "epoch": 1.564760026298488, "high_lr": 0.0006868421052631579, "low_lr": 1.373684210526316e-05, "step": 595 }, { "epoch": 1.564760026298488, "high_lr": 0.0006868421052631579, "low_lr": 1.373684210526316e-05, "step": 595 }, { "epoch": 1.564760026298488, "high_lr": 0.0006868421052631579, "low_lr": 1.373684210526316e-05, "step": 595 }, { "epoch": 1.564760026298488, "high_lr": 0.0006868421052631579, "low_lr": 1.373684210526316e-05, "step": 595 }, { "epoch": 1.5673898750821826, "grad_norm": 1.1029831171035767, "learning_rate": 0.0006863157894736842, "loss": 1.5167, "step": 596 }, { "epoch": 1.5673898750821826, "high_lr": 0.0006863157894736842, "low_lr": 1.3726315789473686e-05, "step": 596 }, { "epoch": 1.5673898750821826, "high_lr": 0.0006863157894736842, "low_lr": 1.3726315789473686e-05, "step": 596 }, { "epoch": 1.5673898750821826, "high_lr": 0.0006863157894736842, "low_lr": 1.3726315789473686e-05, "step": 596 }, { "epoch": 1.5673898750821826, "high_lr": 0.0006863157894736842, "low_lr": 1.3726315789473686e-05, "step": 596 }, { "epoch": 1.5673898750821826, "high_lr": 0.0006863157894736842, "low_lr": 1.3726315789473686e-05, "step": 596 }, { "epoch": 1.5673898750821826, "high_lr": 0.0006863157894736842, "low_lr": 1.3726315789473686e-05, "step": 596 }, { "epoch": 1.5673898750821826, "high_lr": 0.0006863157894736842, "low_lr": 1.3726315789473686e-05, "step": 596 }, { "epoch": 1.5673898750821826, "high_lr": 0.0006863157894736842, "low_lr": 1.3726315789473686e-05, "step": 596 }, { "epoch": 1.5700197238658777, "grad_norm": 1.1172997951507568, "learning_rate": 0.0006857894736842105, "loss": 1.4726, "step": 597 }, { "epoch": 1.5700197238658777, "high_lr": 0.0006857894736842105, "low_lr": 1.371578947368421e-05, "step": 597 }, { "epoch": 1.5700197238658777, "high_lr": 0.0006857894736842105, "low_lr": 1.371578947368421e-05, "step": 597 }, { "epoch": 1.5700197238658777, "high_lr": 0.0006857894736842105, "low_lr": 1.371578947368421e-05, "step": 597 }, { "epoch": 1.5700197238658777, "high_lr": 0.0006857894736842105, "low_lr": 1.371578947368421e-05, "step": 597 }, { "epoch": 1.5700197238658777, "high_lr": 0.0006857894736842105, "low_lr": 1.371578947368421e-05, "step": 597 }, { "epoch": 1.5700197238658777, "high_lr": 0.0006857894736842105, "low_lr": 1.371578947368421e-05, "step": 597 }, { "epoch": 1.5700197238658777, "high_lr": 0.0006857894736842105, "low_lr": 1.371578947368421e-05, "step": 597 }, { "epoch": 1.5700197238658777, "high_lr": 0.0006857894736842105, "low_lr": 1.371578947368421e-05, "step": 597 }, { "epoch": 1.5726495726495726, "grad_norm": 1.0712045431137085, "learning_rate": 0.0006852631578947368, "loss": 1.4423, "step": 598 }, { "epoch": 1.5726495726495726, "high_lr": 0.0006852631578947368, "low_lr": 1.3705263157894737e-05, "step": 598 }, { "epoch": 1.5726495726495726, "high_lr": 0.0006852631578947368, "low_lr": 1.3705263157894737e-05, "step": 598 }, { "epoch": 1.5726495726495726, "high_lr": 0.0006852631578947368, "low_lr": 1.3705263157894737e-05, "step": 598 }, { "epoch": 1.5726495726495726, "high_lr": 0.0006852631578947368, "low_lr": 1.3705263157894737e-05, "step": 598 }, { "epoch": 1.5726495726495726, "high_lr": 0.0006852631578947368, "low_lr": 1.3705263157894737e-05, "step": 598 }, { "epoch": 1.5726495726495726, "high_lr": 0.0006852631578947368, "low_lr": 1.3705263157894737e-05, "step": 598 }, { "epoch": 1.5726495726495726, "high_lr": 0.0006852631578947368, "low_lr": 1.3705263157894737e-05, "step": 598 }, { "epoch": 1.5726495726495726, "high_lr": 0.0006852631578947368, "low_lr": 1.3705263157894737e-05, "step": 598 }, { "epoch": 1.5752794214332675, "grad_norm": 1.0438041687011719, "learning_rate": 0.0006847368421052633, "loss": 1.4476, "step": 599 }, { "epoch": 1.5752794214332675, "high_lr": 0.0006847368421052633, "low_lr": 1.3694736842105265e-05, "step": 599 }, { "epoch": 1.5752794214332675, "high_lr": 0.0006847368421052633, "low_lr": 1.3694736842105265e-05, "step": 599 }, { "epoch": 1.5752794214332675, "high_lr": 0.0006847368421052633, "low_lr": 1.3694736842105265e-05, "step": 599 }, { "epoch": 1.5752794214332675, "high_lr": 0.0006847368421052633, "low_lr": 1.3694736842105265e-05, "step": 599 }, { "epoch": 1.5752794214332675, "high_lr": 0.0006847368421052633, "low_lr": 1.3694736842105265e-05, "step": 599 }, { "epoch": 1.5752794214332675, "high_lr": 0.0006847368421052633, "low_lr": 1.3694736842105265e-05, "step": 599 }, { "epoch": 1.5752794214332675, "high_lr": 0.0006847368421052633, "low_lr": 1.3694736842105265e-05, "step": 599 }, { "epoch": 1.5752794214332675, "high_lr": 0.0006847368421052633, "low_lr": 1.3694736842105265e-05, "step": 599 }, { "epoch": 1.5779092702169626, "grad_norm": 1.0444962978363037, "learning_rate": 0.0006842105263157895, "loss": 1.4121, "step": 600 }, { "epoch": 1.5779092702169626, "high_lr": 0.0006842105263157895, "low_lr": 1.3684210526315791e-05, "step": 600 }, { "epoch": 1.5779092702169626, "high_lr": 0.0006842105263157895, "low_lr": 1.3684210526315791e-05, "step": 600 }, { "epoch": 1.5779092702169626, "high_lr": 0.0006842105263157895, "low_lr": 1.3684210526315791e-05, "step": 600 }, { "epoch": 1.5779092702169626, "high_lr": 0.0006842105263157895, "low_lr": 1.3684210526315791e-05, "step": 600 }, { "epoch": 1.5779092702169626, "high_lr": 0.0006842105263157895, "low_lr": 1.3684210526315791e-05, "step": 600 }, { "epoch": 1.5779092702169626, "high_lr": 0.0006842105263157895, "low_lr": 1.3684210526315791e-05, "step": 600 }, { "epoch": 1.5779092702169626, "high_lr": 0.0006842105263157895, "low_lr": 1.3684210526315791e-05, "step": 600 }, { "epoch": 1.5779092702169626, "high_lr": 0.0006842105263157895, "low_lr": 1.3684210526315791e-05, "step": 600 }, { "epoch": 1.5805391190006575, "grad_norm": 1.0622714757919312, "learning_rate": 0.0006836842105263158, "loss": 1.5067, "step": 601 }, { "epoch": 1.5805391190006575, "high_lr": 0.0006836842105263158, "low_lr": 1.3673684210526316e-05, "step": 601 }, { "epoch": 1.5805391190006575, "high_lr": 0.0006836842105263158, "low_lr": 1.3673684210526316e-05, "step": 601 }, { "epoch": 1.5805391190006575, "high_lr": 0.0006836842105263158, "low_lr": 1.3673684210526316e-05, "step": 601 }, { "epoch": 1.5805391190006575, "high_lr": 0.0006836842105263158, "low_lr": 1.3673684210526316e-05, "step": 601 }, { "epoch": 1.5805391190006575, "high_lr": 0.0006836842105263158, "low_lr": 1.3673684210526316e-05, "step": 601 }, { "epoch": 1.5805391190006575, "high_lr": 0.0006836842105263158, "low_lr": 1.3673684210526316e-05, "step": 601 }, { "epoch": 1.5805391190006575, "high_lr": 0.0006836842105263158, "low_lr": 1.3673684210526316e-05, "step": 601 }, { "epoch": 1.5805391190006575, "high_lr": 0.0006836842105263158, "low_lr": 1.3673684210526316e-05, "step": 601 }, { "epoch": 1.5831689677843523, "grad_norm": 1.0328023433685303, "learning_rate": 0.0006831578947368421, "loss": 1.5176, "step": 602 }, { "epoch": 1.5831689677843523, "high_lr": 0.0006831578947368421, "low_lr": 1.3663157894736842e-05, "step": 602 }, { "epoch": 1.5831689677843523, "high_lr": 0.0006831578947368421, "low_lr": 1.3663157894736842e-05, "step": 602 }, { "epoch": 1.5831689677843523, "high_lr": 0.0006831578947368421, "low_lr": 1.3663157894736842e-05, "step": 602 }, { "epoch": 1.5831689677843523, "high_lr": 0.0006831578947368421, "low_lr": 1.3663157894736842e-05, "step": 602 }, { "epoch": 1.5831689677843523, "high_lr": 0.0006831578947368421, "low_lr": 1.3663157894736842e-05, "step": 602 }, { "epoch": 1.5831689677843523, "high_lr": 0.0006831578947368421, "low_lr": 1.3663157894736842e-05, "step": 602 }, { "epoch": 1.5831689677843523, "high_lr": 0.0006831578947368421, "low_lr": 1.3663157894736842e-05, "step": 602 }, { "epoch": 1.5831689677843523, "high_lr": 0.0006831578947368421, "low_lr": 1.3663157894736842e-05, "step": 602 }, { "epoch": 1.5857988165680474, "grad_norm": 1.0628708600997925, "learning_rate": 0.0006826315789473684, "loss": 1.475, "step": 603 }, { "epoch": 1.5857988165680474, "high_lr": 0.0006826315789473684, "low_lr": 1.3652631578947369e-05, "step": 603 }, { "epoch": 1.5857988165680474, "high_lr": 0.0006826315789473684, "low_lr": 1.3652631578947369e-05, "step": 603 }, { "epoch": 1.5857988165680474, "high_lr": 0.0006826315789473684, "low_lr": 1.3652631578947369e-05, "step": 603 }, { "epoch": 1.5857988165680474, "high_lr": 0.0006826315789473684, "low_lr": 1.3652631578947369e-05, "step": 603 }, { "epoch": 1.5857988165680474, "high_lr": 0.0006826315789473684, "low_lr": 1.3652631578947369e-05, "step": 603 }, { "epoch": 1.5857988165680474, "high_lr": 0.0006826315789473684, "low_lr": 1.3652631578947369e-05, "step": 603 }, { "epoch": 1.5857988165680474, "high_lr": 0.0006826315789473684, "low_lr": 1.3652631578947369e-05, "step": 603 }, { "epoch": 1.5857988165680474, "high_lr": 0.0006826315789473684, "low_lr": 1.3652631578947369e-05, "step": 603 }, { "epoch": 1.5884286653517423, "grad_norm": 1.1042441129684448, "learning_rate": 0.0006821052631578948, "loss": 1.5409, "step": 604 }, { "epoch": 1.5884286653517423, "high_lr": 0.0006821052631578948, "low_lr": 1.3642105263157897e-05, "step": 604 }, { "epoch": 1.5884286653517423, "high_lr": 0.0006821052631578948, "low_lr": 1.3642105263157897e-05, "step": 604 }, { "epoch": 1.5884286653517423, "high_lr": 0.0006821052631578948, "low_lr": 1.3642105263157897e-05, "step": 604 }, { "epoch": 1.5884286653517423, "high_lr": 0.0006821052631578948, "low_lr": 1.3642105263157897e-05, "step": 604 }, { "epoch": 1.5884286653517423, "high_lr": 0.0006821052631578948, "low_lr": 1.3642105263157897e-05, "step": 604 }, { "epoch": 1.5884286653517423, "high_lr": 0.0006821052631578948, "low_lr": 1.3642105263157897e-05, "step": 604 }, { "epoch": 1.5884286653517423, "high_lr": 0.0006821052631578948, "low_lr": 1.3642105263157897e-05, "step": 604 }, { "epoch": 1.5884286653517423, "high_lr": 0.0006821052631578948, "low_lr": 1.3642105263157897e-05, "step": 604 }, { "epoch": 1.5910585141354372, "grad_norm": 1.1029032468795776, "learning_rate": 0.0006815789473684211, "loss": 1.4833, "step": 605 }, { "epoch": 1.5910585141354372, "high_lr": 0.0006815789473684211, "low_lr": 1.3631578947368423e-05, "step": 605 }, { "epoch": 1.5910585141354372, "high_lr": 0.0006815789473684211, "low_lr": 1.3631578947368423e-05, "step": 605 }, { "epoch": 1.5910585141354372, "high_lr": 0.0006815789473684211, "low_lr": 1.3631578947368423e-05, "step": 605 }, { "epoch": 1.5910585141354372, "high_lr": 0.0006815789473684211, "low_lr": 1.3631578947368423e-05, "step": 605 }, { "epoch": 1.5910585141354372, "high_lr": 0.0006815789473684211, "low_lr": 1.3631578947368423e-05, "step": 605 }, { "epoch": 1.5910585141354372, "high_lr": 0.0006815789473684211, "low_lr": 1.3631578947368423e-05, "step": 605 }, { "epoch": 1.5910585141354372, "high_lr": 0.0006815789473684211, "low_lr": 1.3631578947368423e-05, "step": 605 }, { "epoch": 1.5910585141354372, "high_lr": 0.0006815789473684211, "low_lr": 1.3631578947368423e-05, "step": 605 }, { "epoch": 1.5936883629191323, "grad_norm": 1.0896053314208984, "learning_rate": 0.0006810526315789474, "loss": 1.4316, "step": 606 }, { "epoch": 1.5936883629191323, "high_lr": 0.0006810526315789474, "low_lr": 1.3621052631578948e-05, "step": 606 }, { "epoch": 1.5936883629191323, "high_lr": 0.0006810526315789474, "low_lr": 1.3621052631578948e-05, "step": 606 }, { "epoch": 1.5936883629191323, "high_lr": 0.0006810526315789474, "low_lr": 1.3621052631578948e-05, "step": 606 }, { "epoch": 1.5936883629191323, "high_lr": 0.0006810526315789474, "low_lr": 1.3621052631578948e-05, "step": 606 }, { "epoch": 1.5936883629191323, "high_lr": 0.0006810526315789474, "low_lr": 1.3621052631578948e-05, "step": 606 }, { "epoch": 1.5936883629191323, "high_lr": 0.0006810526315789474, "low_lr": 1.3621052631578948e-05, "step": 606 }, { "epoch": 1.5936883629191323, "high_lr": 0.0006810526315789474, "low_lr": 1.3621052631578948e-05, "step": 606 }, { "epoch": 1.5936883629191323, "high_lr": 0.0006810526315789474, "low_lr": 1.3621052631578948e-05, "step": 606 }, { "epoch": 1.596318211702827, "grad_norm": 1.2059811353683472, "learning_rate": 0.0006805263157894737, "loss": 1.4984, "step": 607 }, { "epoch": 1.596318211702827, "high_lr": 0.0006805263157894737, "low_lr": 1.3610526315789474e-05, "step": 607 }, { "epoch": 1.596318211702827, "high_lr": 0.0006805263157894737, "low_lr": 1.3610526315789474e-05, "step": 607 }, { "epoch": 1.596318211702827, "high_lr": 0.0006805263157894737, "low_lr": 1.3610526315789474e-05, "step": 607 }, { "epoch": 1.596318211702827, "high_lr": 0.0006805263157894737, "low_lr": 1.3610526315789474e-05, "step": 607 }, { "epoch": 1.596318211702827, "high_lr": 0.0006805263157894737, "low_lr": 1.3610526315789474e-05, "step": 607 }, { "epoch": 1.596318211702827, "high_lr": 0.0006805263157894737, "low_lr": 1.3610526315789474e-05, "step": 607 }, { "epoch": 1.596318211702827, "high_lr": 0.0006805263157894737, "low_lr": 1.3610526315789474e-05, "step": 607 }, { "epoch": 1.596318211702827, "high_lr": 0.0006805263157894737, "low_lr": 1.3610526315789474e-05, "step": 607 }, { "epoch": 1.598948060486522, "grad_norm": 1.071141242980957, "learning_rate": 0.00068, "loss": 1.5504, "step": 608 }, { "epoch": 1.598948060486522, "high_lr": 0.00068, "low_lr": 1.3600000000000002e-05, "step": 608 }, { "epoch": 1.598948060486522, "high_lr": 0.00068, "low_lr": 1.3600000000000002e-05, "step": 608 }, { "epoch": 1.598948060486522, "high_lr": 0.00068, "low_lr": 1.3600000000000002e-05, "step": 608 }, { "epoch": 1.598948060486522, "high_lr": 0.00068, "low_lr": 1.3600000000000002e-05, "step": 608 }, { "epoch": 1.598948060486522, "high_lr": 0.00068, "low_lr": 1.3600000000000002e-05, "step": 608 }, { "epoch": 1.598948060486522, "high_lr": 0.00068, "low_lr": 1.3600000000000002e-05, "step": 608 }, { "epoch": 1.598948060486522, "high_lr": 0.00068, "low_lr": 1.3600000000000002e-05, "step": 608 }, { "epoch": 1.598948060486522, "high_lr": 0.00068, "low_lr": 1.3600000000000002e-05, "step": 608 }, { "epoch": 1.601577909270217, "grad_norm": 1.0634510517120361, "learning_rate": 0.0006794736842105263, "loss": 1.5281, "step": 609 }, { "epoch": 1.601577909270217, "high_lr": 0.0006794736842105263, "low_lr": 1.3589473684210528e-05, "step": 609 }, { "epoch": 1.601577909270217, "high_lr": 0.0006794736842105263, "low_lr": 1.3589473684210528e-05, "step": 609 }, { "epoch": 1.601577909270217, "high_lr": 0.0006794736842105263, "low_lr": 1.3589473684210528e-05, "step": 609 }, { "epoch": 1.601577909270217, "high_lr": 0.0006794736842105263, "low_lr": 1.3589473684210528e-05, "step": 609 }, { "epoch": 1.601577909270217, "high_lr": 0.0006794736842105263, "low_lr": 1.3589473684210528e-05, "step": 609 }, { "epoch": 1.601577909270217, "high_lr": 0.0006794736842105263, "low_lr": 1.3589473684210528e-05, "step": 609 }, { "epoch": 1.601577909270217, "high_lr": 0.0006794736842105263, "low_lr": 1.3589473684210528e-05, "step": 609 }, { "epoch": 1.601577909270217, "high_lr": 0.0006794736842105263, "low_lr": 1.3589473684210528e-05, "step": 609 }, { "epoch": 1.6042077580539118, "grad_norm": 1.0419907569885254, "learning_rate": 0.0006789473684210526, "loss": 1.4822, "step": 610 }, { "epoch": 1.6042077580539118, "high_lr": 0.0006789473684210526, "low_lr": 1.3578947368421055e-05, "step": 610 }, { "epoch": 1.6042077580539118, "high_lr": 0.0006789473684210526, "low_lr": 1.3578947368421055e-05, "step": 610 }, { "epoch": 1.6042077580539118, "high_lr": 0.0006789473684210526, "low_lr": 1.3578947368421055e-05, "step": 610 }, { "epoch": 1.6042077580539118, "high_lr": 0.0006789473684210526, "low_lr": 1.3578947368421055e-05, "step": 610 }, { "epoch": 1.6042077580539118, "high_lr": 0.0006789473684210526, "low_lr": 1.3578947368421055e-05, "step": 610 }, { "epoch": 1.6042077580539118, "high_lr": 0.0006789473684210526, "low_lr": 1.3578947368421055e-05, "step": 610 }, { "epoch": 1.6042077580539118, "high_lr": 0.0006789473684210526, "low_lr": 1.3578947368421055e-05, "step": 610 }, { "epoch": 1.6042077580539118, "high_lr": 0.0006789473684210526, "low_lr": 1.3578947368421055e-05, "step": 610 }, { "epoch": 1.606837606837607, "grad_norm": 1.1207650899887085, "learning_rate": 0.0006784210526315789, "loss": 1.5263, "step": 611 }, { "epoch": 1.606837606837607, "high_lr": 0.0006784210526315789, "low_lr": 1.356842105263158e-05, "step": 611 }, { "epoch": 1.606837606837607, "high_lr": 0.0006784210526315789, "low_lr": 1.356842105263158e-05, "step": 611 }, { "epoch": 1.606837606837607, "high_lr": 0.0006784210526315789, "low_lr": 1.356842105263158e-05, "step": 611 }, { "epoch": 1.606837606837607, "high_lr": 0.0006784210526315789, "low_lr": 1.356842105263158e-05, "step": 611 }, { "epoch": 1.606837606837607, "high_lr": 0.0006784210526315789, "low_lr": 1.356842105263158e-05, "step": 611 }, { "epoch": 1.606837606837607, "high_lr": 0.0006784210526315789, "low_lr": 1.356842105263158e-05, "step": 611 }, { "epoch": 1.606837606837607, "high_lr": 0.0006784210526315789, "low_lr": 1.356842105263158e-05, "step": 611 }, { "epoch": 1.606837606837607, "high_lr": 0.0006784210526315789, "low_lr": 1.356842105263158e-05, "step": 611 }, { "epoch": 1.6094674556213018, "grad_norm": 1.0400859117507935, "learning_rate": 0.0006778947368421052, "loss": 1.4753, "step": 612 }, { "epoch": 1.6094674556213018, "high_lr": 0.0006778947368421052, "low_lr": 1.3557894736842106e-05, "step": 612 }, { "epoch": 1.6094674556213018, "high_lr": 0.0006778947368421052, "low_lr": 1.3557894736842106e-05, "step": 612 }, { "epoch": 1.6094674556213018, "high_lr": 0.0006778947368421052, "low_lr": 1.3557894736842106e-05, "step": 612 }, { "epoch": 1.6094674556213018, "high_lr": 0.0006778947368421052, "low_lr": 1.3557894736842106e-05, "step": 612 }, { "epoch": 1.6094674556213018, "high_lr": 0.0006778947368421052, "low_lr": 1.3557894736842106e-05, "step": 612 }, { "epoch": 1.6094674556213018, "high_lr": 0.0006778947368421052, "low_lr": 1.3557894736842106e-05, "step": 612 }, { "epoch": 1.6094674556213018, "high_lr": 0.0006778947368421052, "low_lr": 1.3557894736842106e-05, "step": 612 }, { "epoch": 1.6094674556213018, "high_lr": 0.0006778947368421052, "low_lr": 1.3557894736842106e-05, "step": 612 }, { "epoch": 1.6120973044049967, "grad_norm": 1.067070484161377, "learning_rate": 0.0006773684210526316, "loss": 1.5082, "step": 613 }, { "epoch": 1.6120973044049967, "high_lr": 0.0006773684210526316, "low_lr": 1.3547368421052634e-05, "step": 613 }, { "epoch": 1.6120973044049967, "high_lr": 0.0006773684210526316, "low_lr": 1.3547368421052634e-05, "step": 613 }, { "epoch": 1.6120973044049967, "high_lr": 0.0006773684210526316, "low_lr": 1.3547368421052634e-05, "step": 613 }, { "epoch": 1.6120973044049967, "high_lr": 0.0006773684210526316, "low_lr": 1.3547368421052634e-05, "step": 613 }, { "epoch": 1.6120973044049967, "high_lr": 0.0006773684210526316, "low_lr": 1.3547368421052634e-05, "step": 613 }, { "epoch": 1.6120973044049967, "high_lr": 0.0006773684210526316, "low_lr": 1.3547368421052634e-05, "step": 613 }, { "epoch": 1.6120973044049967, "high_lr": 0.0006773684210526316, "low_lr": 1.3547368421052634e-05, "step": 613 }, { "epoch": 1.6120973044049967, "high_lr": 0.0006773684210526316, "low_lr": 1.3547368421052634e-05, "step": 613 }, { "epoch": 1.6147271531886918, "grad_norm": 1.1833747625350952, "learning_rate": 0.0006768421052631579, "loss": 1.511, "step": 614 }, { "epoch": 1.6147271531886918, "high_lr": 0.0006768421052631579, "low_lr": 1.353684210526316e-05, "step": 614 }, { "epoch": 1.6147271531886918, "high_lr": 0.0006768421052631579, "low_lr": 1.353684210526316e-05, "step": 614 }, { "epoch": 1.6147271531886918, "high_lr": 0.0006768421052631579, "low_lr": 1.353684210526316e-05, "step": 614 }, { "epoch": 1.6147271531886918, "high_lr": 0.0006768421052631579, "low_lr": 1.353684210526316e-05, "step": 614 }, { "epoch": 1.6147271531886918, "high_lr": 0.0006768421052631579, "low_lr": 1.353684210526316e-05, "step": 614 }, { "epoch": 1.6147271531886918, "high_lr": 0.0006768421052631579, "low_lr": 1.353684210526316e-05, "step": 614 }, { "epoch": 1.6147271531886918, "high_lr": 0.0006768421052631579, "low_lr": 1.353684210526316e-05, "step": 614 }, { "epoch": 1.6147271531886918, "high_lr": 0.0006768421052631579, "low_lr": 1.353684210526316e-05, "step": 614 }, { "epoch": 1.6173570019723866, "grad_norm": 1.060511589050293, "learning_rate": 0.0006763157894736843, "loss": 1.4908, "step": 615 }, { "epoch": 1.6173570019723866, "high_lr": 0.0006763157894736843, "low_lr": 1.3526315789473685e-05, "step": 615 }, { "epoch": 1.6173570019723866, "high_lr": 0.0006763157894736843, "low_lr": 1.3526315789473685e-05, "step": 615 }, { "epoch": 1.6173570019723866, "high_lr": 0.0006763157894736843, "low_lr": 1.3526315789473685e-05, "step": 615 }, { "epoch": 1.6173570019723866, "high_lr": 0.0006763157894736843, "low_lr": 1.3526315789473685e-05, "step": 615 }, { "epoch": 1.6173570019723866, "high_lr": 0.0006763157894736843, "low_lr": 1.3526315789473685e-05, "step": 615 }, { "epoch": 1.6173570019723866, "high_lr": 0.0006763157894736843, "low_lr": 1.3526315789473685e-05, "step": 615 }, { "epoch": 1.6173570019723866, "high_lr": 0.0006763157894736843, "low_lr": 1.3526315789473685e-05, "step": 615 }, { "epoch": 1.6173570019723866, "high_lr": 0.0006763157894736843, "low_lr": 1.3526315789473685e-05, "step": 615 }, { "epoch": 1.6199868507560815, "grad_norm": 1.0827206373214722, "learning_rate": 0.0006757894736842106, "loss": 1.4658, "step": 616 }, { "epoch": 1.6199868507560815, "high_lr": 0.0006757894736842106, "low_lr": 1.3515789473684211e-05, "step": 616 }, { "epoch": 1.6199868507560815, "high_lr": 0.0006757894736842106, "low_lr": 1.3515789473684211e-05, "step": 616 }, { "epoch": 1.6199868507560815, "high_lr": 0.0006757894736842106, "low_lr": 1.3515789473684211e-05, "step": 616 }, { "epoch": 1.6199868507560815, "high_lr": 0.0006757894736842106, "low_lr": 1.3515789473684211e-05, "step": 616 }, { "epoch": 1.6199868507560815, "high_lr": 0.0006757894736842106, "low_lr": 1.3515789473684211e-05, "step": 616 }, { "epoch": 1.6199868507560815, "high_lr": 0.0006757894736842106, "low_lr": 1.3515789473684211e-05, "step": 616 }, { "epoch": 1.6199868507560815, "high_lr": 0.0006757894736842106, "low_lr": 1.3515789473684211e-05, "step": 616 }, { "epoch": 1.6199868507560815, "high_lr": 0.0006757894736842106, "low_lr": 1.3515789473684211e-05, "step": 616 }, { "epoch": 1.6226166995397766, "grad_norm": 1.04438316822052, "learning_rate": 0.0006752631578947368, "loss": 1.4972, "step": 617 }, { "epoch": 1.6226166995397766, "high_lr": 0.0006752631578947368, "low_lr": 1.3505263157894737e-05, "step": 617 }, { "epoch": 1.6226166995397766, "high_lr": 0.0006752631578947368, "low_lr": 1.3505263157894737e-05, "step": 617 }, { "epoch": 1.6226166995397766, "high_lr": 0.0006752631578947368, "low_lr": 1.3505263157894737e-05, "step": 617 }, { "epoch": 1.6226166995397766, "high_lr": 0.0006752631578947368, "low_lr": 1.3505263157894737e-05, "step": 617 }, { "epoch": 1.6226166995397766, "high_lr": 0.0006752631578947368, "low_lr": 1.3505263157894737e-05, "step": 617 }, { "epoch": 1.6226166995397766, "high_lr": 0.0006752631578947368, "low_lr": 1.3505263157894737e-05, "step": 617 }, { "epoch": 1.6226166995397766, "high_lr": 0.0006752631578947368, "low_lr": 1.3505263157894737e-05, "step": 617 }, { "epoch": 1.6226166995397766, "high_lr": 0.0006752631578947368, "low_lr": 1.3505263157894737e-05, "step": 617 }, { "epoch": 1.6252465483234713, "grad_norm": 1.1282044649124146, "learning_rate": 0.0006747368421052632, "loss": 1.5409, "step": 618 }, { "epoch": 1.6252465483234713, "high_lr": 0.0006747368421052632, "low_lr": 1.3494736842105265e-05, "step": 618 }, { "epoch": 1.6252465483234713, "high_lr": 0.0006747368421052632, "low_lr": 1.3494736842105265e-05, "step": 618 }, { "epoch": 1.6252465483234713, "high_lr": 0.0006747368421052632, "low_lr": 1.3494736842105265e-05, "step": 618 }, { "epoch": 1.6252465483234713, "high_lr": 0.0006747368421052632, "low_lr": 1.3494736842105265e-05, "step": 618 }, { "epoch": 1.6252465483234713, "high_lr": 0.0006747368421052632, "low_lr": 1.3494736842105265e-05, "step": 618 }, { "epoch": 1.6252465483234713, "high_lr": 0.0006747368421052632, "low_lr": 1.3494736842105265e-05, "step": 618 }, { "epoch": 1.6252465483234713, "high_lr": 0.0006747368421052632, "low_lr": 1.3494736842105265e-05, "step": 618 }, { "epoch": 1.6252465483234713, "high_lr": 0.0006747368421052632, "low_lr": 1.3494736842105265e-05, "step": 618 }, { "epoch": 1.6278763971071664, "grad_norm": 1.096423864364624, "learning_rate": 0.0006742105263157895, "loss": 1.4574, "step": 619 }, { "epoch": 1.6278763971071664, "high_lr": 0.0006742105263157895, "low_lr": 1.3484210526315792e-05, "step": 619 }, { "epoch": 1.6278763971071664, "high_lr": 0.0006742105263157895, "low_lr": 1.3484210526315792e-05, "step": 619 }, { "epoch": 1.6278763971071664, "high_lr": 0.0006742105263157895, "low_lr": 1.3484210526315792e-05, "step": 619 }, { "epoch": 1.6278763971071664, "high_lr": 0.0006742105263157895, "low_lr": 1.3484210526315792e-05, "step": 619 }, { "epoch": 1.6278763971071664, "high_lr": 0.0006742105263157895, "low_lr": 1.3484210526315792e-05, "step": 619 }, { "epoch": 1.6278763971071664, "high_lr": 0.0006742105263157895, "low_lr": 1.3484210526315792e-05, "step": 619 }, { "epoch": 1.6278763971071664, "high_lr": 0.0006742105263157895, "low_lr": 1.3484210526315792e-05, "step": 619 }, { "epoch": 1.6278763971071664, "high_lr": 0.0006742105263157895, "low_lr": 1.3484210526315792e-05, "step": 619 }, { "epoch": 1.6305062458908612, "grad_norm": 1.0992679595947266, "learning_rate": 0.0006736842105263158, "loss": 1.4983, "step": 620 }, { "epoch": 1.6305062458908612, "high_lr": 0.0006736842105263158, "low_lr": 1.3473684210526316e-05, "step": 620 }, { "epoch": 1.6305062458908612, "high_lr": 0.0006736842105263158, "low_lr": 1.3473684210526316e-05, "step": 620 }, { "epoch": 1.6305062458908612, "high_lr": 0.0006736842105263158, "low_lr": 1.3473684210526316e-05, "step": 620 }, { "epoch": 1.6305062458908612, "high_lr": 0.0006736842105263158, "low_lr": 1.3473684210526316e-05, "step": 620 }, { "epoch": 1.6305062458908612, "high_lr": 0.0006736842105263158, "low_lr": 1.3473684210526316e-05, "step": 620 }, { "epoch": 1.6305062458908612, "high_lr": 0.0006736842105263158, "low_lr": 1.3473684210526316e-05, "step": 620 }, { "epoch": 1.6305062458908612, "high_lr": 0.0006736842105263158, "low_lr": 1.3473684210526316e-05, "step": 620 }, { "epoch": 1.6305062458908612, "high_lr": 0.0006736842105263158, "low_lr": 1.3473684210526316e-05, "step": 620 }, { "epoch": 1.6331360946745561, "grad_norm": 1.0997353792190552, "learning_rate": 0.0006731578947368421, "loss": 1.5037, "step": 621 }, { "epoch": 1.6331360946745561, "high_lr": 0.0006731578947368421, "low_lr": 1.3463157894736842e-05, "step": 621 }, { "epoch": 1.6331360946745561, "high_lr": 0.0006731578947368421, "low_lr": 1.3463157894736842e-05, "step": 621 }, { "epoch": 1.6331360946745561, "high_lr": 0.0006731578947368421, "low_lr": 1.3463157894736842e-05, "step": 621 }, { "epoch": 1.6331360946745561, "high_lr": 0.0006731578947368421, "low_lr": 1.3463157894736842e-05, "step": 621 }, { "epoch": 1.6331360946745561, "high_lr": 0.0006731578947368421, "low_lr": 1.3463157894736842e-05, "step": 621 }, { "epoch": 1.6331360946745561, "high_lr": 0.0006731578947368421, "low_lr": 1.3463157894736842e-05, "step": 621 }, { "epoch": 1.6331360946745561, "high_lr": 0.0006731578947368421, "low_lr": 1.3463157894736842e-05, "step": 621 }, { "epoch": 1.6331360946745561, "high_lr": 0.0006731578947368421, "low_lr": 1.3463157894736842e-05, "step": 621 }, { "epoch": 1.6357659434582512, "grad_norm": 1.1235257387161255, "learning_rate": 0.0006726315789473685, "loss": 1.5002, "step": 622 }, { "epoch": 1.6357659434582512, "high_lr": 0.0006726315789473685, "low_lr": 1.345263157894737e-05, "step": 622 }, { "epoch": 1.6357659434582512, "high_lr": 0.0006726315789473685, "low_lr": 1.345263157894737e-05, "step": 622 }, { "epoch": 1.6357659434582512, "high_lr": 0.0006726315789473685, "low_lr": 1.345263157894737e-05, "step": 622 }, { "epoch": 1.6357659434582512, "high_lr": 0.0006726315789473685, "low_lr": 1.345263157894737e-05, "step": 622 }, { "epoch": 1.6357659434582512, "high_lr": 0.0006726315789473685, "low_lr": 1.345263157894737e-05, "step": 622 }, { "epoch": 1.6357659434582512, "high_lr": 0.0006726315789473685, "low_lr": 1.345263157894737e-05, "step": 622 }, { "epoch": 1.6357659434582512, "high_lr": 0.0006726315789473685, "low_lr": 1.345263157894737e-05, "step": 622 }, { "epoch": 1.6357659434582512, "high_lr": 0.0006726315789473685, "low_lr": 1.345263157894737e-05, "step": 622 }, { "epoch": 1.638395792241946, "grad_norm": 1.065502643585205, "learning_rate": 0.0006721052631578948, "loss": 1.4911, "step": 623 }, { "epoch": 1.638395792241946, "high_lr": 0.0006721052631578948, "low_lr": 1.3442105263157897e-05, "step": 623 }, { "epoch": 1.638395792241946, "high_lr": 0.0006721052631578948, "low_lr": 1.3442105263157897e-05, "step": 623 }, { "epoch": 1.638395792241946, "high_lr": 0.0006721052631578948, "low_lr": 1.3442105263157897e-05, "step": 623 }, { "epoch": 1.638395792241946, "high_lr": 0.0006721052631578948, "low_lr": 1.3442105263157897e-05, "step": 623 }, { "epoch": 1.638395792241946, "high_lr": 0.0006721052631578948, "low_lr": 1.3442105263157897e-05, "step": 623 }, { "epoch": 1.638395792241946, "high_lr": 0.0006721052631578948, "low_lr": 1.3442105263157897e-05, "step": 623 }, { "epoch": 1.638395792241946, "high_lr": 0.0006721052631578948, "low_lr": 1.3442105263157897e-05, "step": 623 }, { "epoch": 1.638395792241946, "high_lr": 0.0006721052631578948, "low_lr": 1.3442105263157897e-05, "step": 623 }, { "epoch": 1.641025641025641, "grad_norm": 1.0841379165649414, "learning_rate": 0.0006715789473684211, "loss": 1.5082, "step": 624 }, { "epoch": 1.641025641025641, "high_lr": 0.0006715789473684211, "low_lr": 1.3431578947368421e-05, "step": 624 }, { "epoch": 1.641025641025641, "high_lr": 0.0006715789473684211, "low_lr": 1.3431578947368421e-05, "step": 624 }, { "epoch": 1.641025641025641, "high_lr": 0.0006715789473684211, "low_lr": 1.3431578947368421e-05, "step": 624 }, { "epoch": 1.641025641025641, "high_lr": 0.0006715789473684211, "low_lr": 1.3431578947368421e-05, "step": 624 }, { "epoch": 1.641025641025641, "high_lr": 0.0006715789473684211, "low_lr": 1.3431578947368421e-05, "step": 624 }, { "epoch": 1.641025641025641, "high_lr": 0.0006715789473684211, "low_lr": 1.3431578947368421e-05, "step": 624 }, { "epoch": 1.641025641025641, "high_lr": 0.0006715789473684211, "low_lr": 1.3431578947368421e-05, "step": 624 }, { "epoch": 1.641025641025641, "high_lr": 0.0006715789473684211, "low_lr": 1.3431578947368421e-05, "step": 624 }, { "epoch": 1.643655489809336, "grad_norm": 1.1010593175888062, "learning_rate": 0.0006710526315789473, "loss": 1.4974, "step": 625 }, { "epoch": 1.643655489809336, "high_lr": 0.0006710526315789473, "low_lr": 1.3421052631578948e-05, "step": 625 }, { "epoch": 1.643655489809336, "high_lr": 0.0006710526315789473, "low_lr": 1.3421052631578948e-05, "step": 625 }, { "epoch": 1.643655489809336, "high_lr": 0.0006710526315789473, "low_lr": 1.3421052631578948e-05, "step": 625 }, { "epoch": 1.643655489809336, "high_lr": 0.0006710526315789473, "low_lr": 1.3421052631578948e-05, "step": 625 }, { "epoch": 1.643655489809336, "high_lr": 0.0006710526315789473, "low_lr": 1.3421052631578948e-05, "step": 625 }, { "epoch": 1.643655489809336, "high_lr": 0.0006710526315789473, "low_lr": 1.3421052631578948e-05, "step": 625 }, { "epoch": 1.643655489809336, "high_lr": 0.0006710526315789473, "low_lr": 1.3421052631578948e-05, "step": 625 }, { "epoch": 1.643655489809336, "high_lr": 0.0006710526315789473, "low_lr": 1.3421052631578948e-05, "step": 625 }, { "epoch": 1.646285338593031, "grad_norm": 1.1630381345748901, "learning_rate": 0.0006705263157894736, "loss": 1.5256, "step": 626 }, { "epoch": 1.646285338593031, "high_lr": 0.0006705263157894736, "low_lr": 1.3410526315789474e-05, "step": 626 }, { "epoch": 1.646285338593031, "high_lr": 0.0006705263157894736, "low_lr": 1.3410526315789474e-05, "step": 626 }, { "epoch": 1.646285338593031, "high_lr": 0.0006705263157894736, "low_lr": 1.3410526315789474e-05, "step": 626 }, { "epoch": 1.646285338593031, "high_lr": 0.0006705263157894736, "low_lr": 1.3410526315789474e-05, "step": 626 }, { "epoch": 1.646285338593031, "high_lr": 0.0006705263157894736, "low_lr": 1.3410526315789474e-05, "step": 626 }, { "epoch": 1.646285338593031, "high_lr": 0.0006705263157894736, "low_lr": 1.3410526315789474e-05, "step": 626 }, { "epoch": 1.646285338593031, "high_lr": 0.0006705263157894736, "low_lr": 1.3410526315789474e-05, "step": 626 }, { "epoch": 1.646285338593031, "high_lr": 0.0006705263157894736, "low_lr": 1.3410526315789474e-05, "step": 626 }, { "epoch": 1.6489151873767258, "grad_norm": 1.060465693473816, "learning_rate": 0.00067, "loss": 1.4869, "step": 627 }, { "epoch": 1.6489151873767258, "high_lr": 0.00067, "low_lr": 1.3400000000000002e-05, "step": 627 }, { "epoch": 1.6489151873767258, "high_lr": 0.00067, "low_lr": 1.3400000000000002e-05, "step": 627 }, { "epoch": 1.6489151873767258, "high_lr": 0.00067, "low_lr": 1.3400000000000002e-05, "step": 627 }, { "epoch": 1.6489151873767258, "high_lr": 0.00067, "low_lr": 1.3400000000000002e-05, "step": 627 }, { "epoch": 1.6489151873767258, "high_lr": 0.00067, "low_lr": 1.3400000000000002e-05, "step": 627 }, { "epoch": 1.6489151873767258, "high_lr": 0.00067, "low_lr": 1.3400000000000002e-05, "step": 627 }, { "epoch": 1.6489151873767258, "high_lr": 0.00067, "low_lr": 1.3400000000000002e-05, "step": 627 }, { "epoch": 1.6489151873767258, "high_lr": 0.00067, "low_lr": 1.3400000000000002e-05, "step": 627 }, { "epoch": 1.651545036160421, "grad_norm": 1.1353005170822144, "learning_rate": 0.0006694736842105263, "loss": 1.5764, "step": 628 }, { "epoch": 1.651545036160421, "high_lr": 0.0006694736842105263, "low_lr": 1.3389473684210528e-05, "step": 628 }, { "epoch": 1.651545036160421, "high_lr": 0.0006694736842105263, "low_lr": 1.3389473684210528e-05, "step": 628 }, { "epoch": 1.651545036160421, "high_lr": 0.0006694736842105263, "low_lr": 1.3389473684210528e-05, "step": 628 }, { "epoch": 1.651545036160421, "high_lr": 0.0006694736842105263, "low_lr": 1.3389473684210528e-05, "step": 628 }, { "epoch": 1.651545036160421, "high_lr": 0.0006694736842105263, "low_lr": 1.3389473684210528e-05, "step": 628 }, { "epoch": 1.651545036160421, "high_lr": 0.0006694736842105263, "low_lr": 1.3389473684210528e-05, "step": 628 }, { "epoch": 1.651545036160421, "high_lr": 0.0006694736842105263, "low_lr": 1.3389473684210528e-05, "step": 628 }, { "epoch": 1.651545036160421, "high_lr": 0.0006694736842105263, "low_lr": 1.3389473684210528e-05, "step": 628 }, { "epoch": 1.6541748849441156, "grad_norm": 1.0829559564590454, "learning_rate": 0.0006689473684210526, "loss": 1.472, "step": 629 }, { "epoch": 1.6541748849441156, "high_lr": 0.0006689473684210526, "low_lr": 1.3378947368421053e-05, "step": 629 }, { "epoch": 1.6541748849441156, "high_lr": 0.0006689473684210526, "low_lr": 1.3378947368421053e-05, "step": 629 }, { "epoch": 1.6541748849441156, "high_lr": 0.0006689473684210526, "low_lr": 1.3378947368421053e-05, "step": 629 }, { "epoch": 1.6541748849441156, "high_lr": 0.0006689473684210526, "low_lr": 1.3378947368421053e-05, "step": 629 }, { "epoch": 1.6541748849441156, "high_lr": 0.0006689473684210526, "low_lr": 1.3378947368421053e-05, "step": 629 }, { "epoch": 1.6541748849441156, "high_lr": 0.0006689473684210526, "low_lr": 1.3378947368421053e-05, "step": 629 }, { "epoch": 1.6541748849441156, "high_lr": 0.0006689473684210526, "low_lr": 1.3378947368421053e-05, "step": 629 }, { "epoch": 1.6541748849441156, "high_lr": 0.0006689473684210526, "low_lr": 1.3378947368421053e-05, "step": 629 }, { "epoch": 1.6568047337278107, "grad_norm": 1.1032090187072754, "learning_rate": 0.0006684210526315789, "loss": 1.5168, "step": 630 }, { "epoch": 1.6568047337278107, "high_lr": 0.0006684210526315789, "low_lr": 1.336842105263158e-05, "step": 630 }, { "epoch": 1.6568047337278107, "high_lr": 0.0006684210526315789, "low_lr": 1.336842105263158e-05, "step": 630 }, { "epoch": 1.6568047337278107, "high_lr": 0.0006684210526315789, "low_lr": 1.336842105263158e-05, "step": 630 }, { "epoch": 1.6568047337278107, "high_lr": 0.0006684210526315789, "low_lr": 1.336842105263158e-05, "step": 630 }, { "epoch": 1.6568047337278107, "high_lr": 0.0006684210526315789, "low_lr": 1.336842105263158e-05, "step": 630 }, { "epoch": 1.6568047337278107, "high_lr": 0.0006684210526315789, "low_lr": 1.336842105263158e-05, "step": 630 }, { "epoch": 1.6568047337278107, "high_lr": 0.0006684210526315789, "low_lr": 1.336842105263158e-05, "step": 630 }, { "epoch": 1.6568047337278107, "high_lr": 0.0006684210526315789, "low_lr": 1.336842105263158e-05, "step": 630 }, { "epoch": 1.6594345825115056, "grad_norm": 1.1293106079101562, "learning_rate": 0.0006678947368421053, "loss": 1.4934, "step": 631 }, { "epoch": 1.6594345825115056, "high_lr": 0.0006678947368421053, "low_lr": 1.3357894736842106e-05, "step": 631 }, { "epoch": 1.6594345825115056, "high_lr": 0.0006678947368421053, "low_lr": 1.3357894736842106e-05, "step": 631 }, { "epoch": 1.6594345825115056, "high_lr": 0.0006678947368421053, "low_lr": 1.3357894736842106e-05, "step": 631 }, { "epoch": 1.6594345825115056, "high_lr": 0.0006678947368421053, "low_lr": 1.3357894736842106e-05, "step": 631 }, { "epoch": 1.6594345825115056, "high_lr": 0.0006678947368421053, "low_lr": 1.3357894736842106e-05, "step": 631 }, { "epoch": 1.6594345825115056, "high_lr": 0.0006678947368421053, "low_lr": 1.3357894736842106e-05, "step": 631 }, { "epoch": 1.6594345825115056, "high_lr": 0.0006678947368421053, "low_lr": 1.3357894736842106e-05, "step": 631 }, { "epoch": 1.6594345825115056, "high_lr": 0.0006678947368421053, "low_lr": 1.3357894736842106e-05, "step": 631 }, { "epoch": 1.6620644312952004, "grad_norm": 1.1231383085250854, "learning_rate": 0.0006673684210526317, "loss": 1.4535, "step": 632 }, { "epoch": 1.6620644312952004, "high_lr": 0.0006673684210526317, "low_lr": 1.3347368421052634e-05, "step": 632 }, { "epoch": 1.6620644312952004, "high_lr": 0.0006673684210526317, "low_lr": 1.3347368421052634e-05, "step": 632 }, { "epoch": 1.6620644312952004, "high_lr": 0.0006673684210526317, "low_lr": 1.3347368421052634e-05, "step": 632 }, { "epoch": 1.6620644312952004, "high_lr": 0.0006673684210526317, "low_lr": 1.3347368421052634e-05, "step": 632 }, { "epoch": 1.6620644312952004, "high_lr": 0.0006673684210526317, "low_lr": 1.3347368421052634e-05, "step": 632 }, { "epoch": 1.6620644312952004, "high_lr": 0.0006673684210526317, "low_lr": 1.3347368421052634e-05, "step": 632 }, { "epoch": 1.6620644312952004, "high_lr": 0.0006673684210526317, "low_lr": 1.3347368421052634e-05, "step": 632 }, { "epoch": 1.6620644312952004, "high_lr": 0.0006673684210526317, "low_lr": 1.3347368421052634e-05, "step": 632 }, { "epoch": 1.6646942800788955, "grad_norm": 1.0538183450698853, "learning_rate": 0.000666842105263158, "loss": 1.4255, "step": 633 }, { "epoch": 1.6646942800788955, "high_lr": 0.000666842105263158, "low_lr": 1.3336842105263158e-05, "step": 633 }, { "epoch": 1.6646942800788955, "high_lr": 0.000666842105263158, "low_lr": 1.3336842105263158e-05, "step": 633 }, { "epoch": 1.6646942800788955, "high_lr": 0.000666842105263158, "low_lr": 1.3336842105263158e-05, "step": 633 }, { "epoch": 1.6646942800788955, "high_lr": 0.000666842105263158, "low_lr": 1.3336842105263158e-05, "step": 633 }, { "epoch": 1.6646942800788955, "high_lr": 0.000666842105263158, "low_lr": 1.3336842105263158e-05, "step": 633 }, { "epoch": 1.6646942800788955, "high_lr": 0.000666842105263158, "low_lr": 1.3336842105263158e-05, "step": 633 }, { "epoch": 1.6646942800788955, "high_lr": 0.000666842105263158, "low_lr": 1.3336842105263158e-05, "step": 633 }, { "epoch": 1.6646942800788955, "high_lr": 0.000666842105263158, "low_lr": 1.3336842105263158e-05, "step": 633 }, { "epoch": 1.6673241288625904, "grad_norm": 1.0623528957366943, "learning_rate": 0.0006663157894736842, "loss": 1.4342, "step": 634 }, { "epoch": 1.6673241288625904, "high_lr": 0.0006663157894736842, "low_lr": 1.3326315789473685e-05, "step": 634 }, { "epoch": 1.6673241288625904, "high_lr": 0.0006663157894736842, "low_lr": 1.3326315789473685e-05, "step": 634 }, { "epoch": 1.6673241288625904, "high_lr": 0.0006663157894736842, "low_lr": 1.3326315789473685e-05, "step": 634 }, { "epoch": 1.6673241288625904, "high_lr": 0.0006663157894736842, "low_lr": 1.3326315789473685e-05, "step": 634 }, { "epoch": 1.6673241288625904, "high_lr": 0.0006663157894736842, "low_lr": 1.3326315789473685e-05, "step": 634 }, { "epoch": 1.6673241288625904, "high_lr": 0.0006663157894736842, "low_lr": 1.3326315789473685e-05, "step": 634 }, { "epoch": 1.6673241288625904, "high_lr": 0.0006663157894736842, "low_lr": 1.3326315789473685e-05, "step": 634 }, { "epoch": 1.6673241288625904, "high_lr": 0.0006663157894736842, "low_lr": 1.3326315789473685e-05, "step": 634 }, { "epoch": 1.6699539776462853, "grad_norm": 1.1783959865570068, "learning_rate": 0.0006657894736842105, "loss": 1.525, "step": 635 }, { "epoch": 1.6699539776462853, "high_lr": 0.0006657894736842105, "low_lr": 1.3315789473684211e-05, "step": 635 }, { "epoch": 1.6699539776462853, "high_lr": 0.0006657894736842105, "low_lr": 1.3315789473684211e-05, "step": 635 }, { "epoch": 1.6699539776462853, "high_lr": 0.0006657894736842105, "low_lr": 1.3315789473684211e-05, "step": 635 }, { "epoch": 1.6699539776462853, "high_lr": 0.0006657894736842105, "low_lr": 1.3315789473684211e-05, "step": 635 }, { "epoch": 1.6699539776462853, "high_lr": 0.0006657894736842105, "low_lr": 1.3315789473684211e-05, "step": 635 }, { "epoch": 1.6699539776462853, "high_lr": 0.0006657894736842105, "low_lr": 1.3315789473684211e-05, "step": 635 }, { "epoch": 1.6699539776462853, "high_lr": 0.0006657894736842105, "low_lr": 1.3315789473684211e-05, "step": 635 }, { "epoch": 1.6699539776462853, "high_lr": 0.0006657894736842105, "low_lr": 1.3315789473684211e-05, "step": 635 }, { "epoch": 1.6725838264299804, "grad_norm": 1.04253089427948, "learning_rate": 0.0006652631578947369, "loss": 1.4717, "step": 636 }, { "epoch": 1.6725838264299804, "high_lr": 0.0006652631578947369, "low_lr": 1.3305263157894739e-05, "step": 636 }, { "epoch": 1.6725838264299804, "high_lr": 0.0006652631578947369, "low_lr": 1.3305263157894739e-05, "step": 636 }, { "epoch": 1.6725838264299804, "high_lr": 0.0006652631578947369, "low_lr": 1.3305263157894739e-05, "step": 636 }, { "epoch": 1.6725838264299804, "high_lr": 0.0006652631578947369, "low_lr": 1.3305263157894739e-05, "step": 636 }, { "epoch": 1.6725838264299804, "high_lr": 0.0006652631578947369, "low_lr": 1.3305263157894739e-05, "step": 636 }, { "epoch": 1.6725838264299804, "high_lr": 0.0006652631578947369, "low_lr": 1.3305263157894739e-05, "step": 636 }, { "epoch": 1.6725838264299804, "high_lr": 0.0006652631578947369, "low_lr": 1.3305263157894739e-05, "step": 636 }, { "epoch": 1.6725838264299804, "high_lr": 0.0006652631578947369, "low_lr": 1.3305263157894739e-05, "step": 636 }, { "epoch": 1.6752136752136753, "grad_norm": 1.1287508010864258, "learning_rate": 0.0006647368421052632, "loss": 1.4899, "step": 637 }, { "epoch": 1.6752136752136753, "high_lr": 0.0006647368421052632, "low_lr": 1.3294736842105265e-05, "step": 637 }, { "epoch": 1.6752136752136753, "high_lr": 0.0006647368421052632, "low_lr": 1.3294736842105265e-05, "step": 637 }, { "epoch": 1.6752136752136753, "high_lr": 0.0006647368421052632, "low_lr": 1.3294736842105265e-05, "step": 637 }, { "epoch": 1.6752136752136753, "high_lr": 0.0006647368421052632, "low_lr": 1.3294736842105265e-05, "step": 637 }, { "epoch": 1.6752136752136753, "high_lr": 0.0006647368421052632, "low_lr": 1.3294736842105265e-05, "step": 637 }, { "epoch": 1.6752136752136753, "high_lr": 0.0006647368421052632, "low_lr": 1.3294736842105265e-05, "step": 637 }, { "epoch": 1.6752136752136753, "high_lr": 0.0006647368421052632, "low_lr": 1.3294736842105265e-05, "step": 637 }, { "epoch": 1.6752136752136753, "high_lr": 0.0006647368421052632, "low_lr": 1.3294736842105265e-05, "step": 637 }, { "epoch": 1.6778435239973701, "grad_norm": 1.3079376220703125, "learning_rate": 0.0006642105263157895, "loss": 1.5828, "step": 638 }, { "epoch": 1.6778435239973701, "high_lr": 0.0006642105263157895, "low_lr": 1.328421052631579e-05, "step": 638 }, { "epoch": 1.6778435239973701, "high_lr": 0.0006642105263157895, "low_lr": 1.328421052631579e-05, "step": 638 }, { "epoch": 1.6778435239973701, "high_lr": 0.0006642105263157895, "low_lr": 1.328421052631579e-05, "step": 638 }, { "epoch": 1.6778435239973701, "high_lr": 0.0006642105263157895, "low_lr": 1.328421052631579e-05, "step": 638 }, { "epoch": 1.6778435239973701, "high_lr": 0.0006642105263157895, "low_lr": 1.328421052631579e-05, "step": 638 }, { "epoch": 1.6778435239973701, "high_lr": 0.0006642105263157895, "low_lr": 1.328421052631579e-05, "step": 638 }, { "epoch": 1.6778435239973701, "high_lr": 0.0006642105263157895, "low_lr": 1.328421052631579e-05, "step": 638 }, { "epoch": 1.6778435239973701, "high_lr": 0.0006642105263157895, "low_lr": 1.328421052631579e-05, "step": 638 }, { "epoch": 1.6804733727810652, "grad_norm": 1.1009700298309326, "learning_rate": 0.0006636842105263158, "loss": 1.4915, "step": 639 }, { "epoch": 1.6804733727810652, "high_lr": 0.0006636842105263158, "low_lr": 1.3273684210526316e-05, "step": 639 }, { "epoch": 1.6804733727810652, "high_lr": 0.0006636842105263158, "low_lr": 1.3273684210526316e-05, "step": 639 }, { "epoch": 1.6804733727810652, "high_lr": 0.0006636842105263158, "low_lr": 1.3273684210526316e-05, "step": 639 }, { "epoch": 1.6804733727810652, "high_lr": 0.0006636842105263158, "low_lr": 1.3273684210526316e-05, "step": 639 }, { "epoch": 1.6804733727810652, "high_lr": 0.0006636842105263158, "low_lr": 1.3273684210526316e-05, "step": 639 }, { "epoch": 1.6804733727810652, "high_lr": 0.0006636842105263158, "low_lr": 1.3273684210526316e-05, "step": 639 }, { "epoch": 1.6804733727810652, "high_lr": 0.0006636842105263158, "low_lr": 1.3273684210526316e-05, "step": 639 }, { "epoch": 1.6804733727810652, "high_lr": 0.0006636842105263158, "low_lr": 1.3273684210526316e-05, "step": 639 }, { "epoch": 1.6831032215647599, "grad_norm": 1.0824031829833984, "learning_rate": 0.0006631578947368421, "loss": 1.4459, "step": 640 }, { "epoch": 1.6831032215647599, "high_lr": 0.0006631578947368421, "low_lr": 1.3263157894736843e-05, "step": 640 }, { "epoch": 1.6831032215647599, "high_lr": 0.0006631578947368421, "low_lr": 1.3263157894736843e-05, "step": 640 }, { "epoch": 1.6831032215647599, "high_lr": 0.0006631578947368421, "low_lr": 1.3263157894736843e-05, "step": 640 }, { "epoch": 1.6831032215647599, "high_lr": 0.0006631578947368421, "low_lr": 1.3263157894736843e-05, "step": 640 }, { "epoch": 1.6831032215647599, "high_lr": 0.0006631578947368421, "low_lr": 1.3263157894736843e-05, "step": 640 }, { "epoch": 1.6831032215647599, "high_lr": 0.0006631578947368421, "low_lr": 1.3263157894736843e-05, "step": 640 }, { "epoch": 1.6831032215647599, "high_lr": 0.0006631578947368421, "low_lr": 1.3263157894736843e-05, "step": 640 }, { "epoch": 1.6831032215647599, "high_lr": 0.0006631578947368421, "low_lr": 1.3263157894736843e-05, "step": 640 }, { "epoch": 1.685733070348455, "grad_norm": 1.137454628944397, "learning_rate": 0.0006626315789473685, "loss": 1.4475, "step": 641 }, { "epoch": 1.685733070348455, "high_lr": 0.0006626315789473685, "low_lr": 1.325263157894737e-05, "step": 641 }, { "epoch": 1.685733070348455, "high_lr": 0.0006626315789473685, "low_lr": 1.325263157894737e-05, "step": 641 }, { "epoch": 1.685733070348455, "high_lr": 0.0006626315789473685, "low_lr": 1.325263157894737e-05, "step": 641 }, { "epoch": 1.685733070348455, "high_lr": 0.0006626315789473685, "low_lr": 1.325263157894737e-05, "step": 641 }, { "epoch": 1.685733070348455, "high_lr": 0.0006626315789473685, "low_lr": 1.325263157894737e-05, "step": 641 }, { "epoch": 1.685733070348455, "high_lr": 0.0006626315789473685, "low_lr": 1.325263157894737e-05, "step": 641 }, { "epoch": 1.685733070348455, "high_lr": 0.0006626315789473685, "low_lr": 1.325263157894737e-05, "step": 641 }, { "epoch": 1.685733070348455, "high_lr": 0.0006626315789473685, "low_lr": 1.325263157894737e-05, "step": 641 }, { "epoch": 1.6883629191321499, "grad_norm": 1.089472770690918, "learning_rate": 0.0006621052631578947, "loss": 1.4748, "step": 642 }, { "epoch": 1.6883629191321499, "high_lr": 0.0006621052631578947, "low_lr": 1.3242105263157895e-05, "step": 642 }, { "epoch": 1.6883629191321499, "high_lr": 0.0006621052631578947, "low_lr": 1.3242105263157895e-05, "step": 642 }, { "epoch": 1.6883629191321499, "high_lr": 0.0006621052631578947, "low_lr": 1.3242105263157895e-05, "step": 642 }, { "epoch": 1.6883629191321499, "high_lr": 0.0006621052631578947, "low_lr": 1.3242105263157895e-05, "step": 642 }, { "epoch": 1.6883629191321499, "high_lr": 0.0006621052631578947, "low_lr": 1.3242105263157895e-05, "step": 642 }, { "epoch": 1.6883629191321499, "high_lr": 0.0006621052631578947, "low_lr": 1.3242105263157895e-05, "step": 642 }, { "epoch": 1.6883629191321499, "high_lr": 0.0006621052631578947, "low_lr": 1.3242105263157895e-05, "step": 642 }, { "epoch": 1.6883629191321499, "high_lr": 0.0006621052631578947, "low_lr": 1.3242105263157895e-05, "step": 642 }, { "epoch": 1.6909927679158447, "grad_norm": 1.1632318496704102, "learning_rate": 0.000661578947368421, "loss": 1.4587, "step": 643 }, { "epoch": 1.6909927679158447, "high_lr": 0.000661578947368421, "low_lr": 1.3231578947368422e-05, "step": 643 }, { "epoch": 1.6909927679158447, "high_lr": 0.000661578947368421, "low_lr": 1.3231578947368422e-05, "step": 643 }, { "epoch": 1.6909927679158447, "high_lr": 0.000661578947368421, "low_lr": 1.3231578947368422e-05, "step": 643 }, { "epoch": 1.6909927679158447, "high_lr": 0.000661578947368421, "low_lr": 1.3231578947368422e-05, "step": 643 }, { "epoch": 1.6909927679158447, "high_lr": 0.000661578947368421, "low_lr": 1.3231578947368422e-05, "step": 643 }, { "epoch": 1.6909927679158447, "high_lr": 0.000661578947368421, "low_lr": 1.3231578947368422e-05, "step": 643 }, { "epoch": 1.6909927679158447, "high_lr": 0.000661578947368421, "low_lr": 1.3231578947368422e-05, "step": 643 }, { "epoch": 1.6909927679158447, "high_lr": 0.000661578947368421, "low_lr": 1.3231578947368422e-05, "step": 643 }, { "epoch": 1.6936226166995398, "grad_norm": 1.1421369314193726, "learning_rate": 0.0006610526315789473, "loss": 1.4532, "step": 644 }, { "epoch": 1.6936226166995398, "high_lr": 0.0006610526315789473, "low_lr": 1.3221052631578948e-05, "step": 644 }, { "epoch": 1.6936226166995398, "high_lr": 0.0006610526315789473, "low_lr": 1.3221052631578948e-05, "step": 644 }, { "epoch": 1.6936226166995398, "high_lr": 0.0006610526315789473, "low_lr": 1.3221052631578948e-05, "step": 644 }, { "epoch": 1.6936226166995398, "high_lr": 0.0006610526315789473, "low_lr": 1.3221052631578948e-05, "step": 644 }, { "epoch": 1.6936226166995398, "high_lr": 0.0006610526315789473, "low_lr": 1.3221052631578948e-05, "step": 644 }, { "epoch": 1.6936226166995398, "high_lr": 0.0006610526315789473, "low_lr": 1.3221052631578948e-05, "step": 644 }, { "epoch": 1.6936226166995398, "high_lr": 0.0006610526315789473, "low_lr": 1.3221052631578948e-05, "step": 644 }, { "epoch": 1.6936226166995398, "high_lr": 0.0006610526315789473, "low_lr": 1.3221052631578948e-05, "step": 644 }, { "epoch": 1.6962524654832347, "grad_norm": 1.0684672594070435, "learning_rate": 0.0006605263157894737, "loss": 1.4379, "step": 645 }, { "epoch": 1.6962524654832347, "high_lr": 0.0006605263157894737, "low_lr": 1.3210526315789476e-05, "step": 645 }, { "epoch": 1.6962524654832347, "high_lr": 0.0006605263157894737, "low_lr": 1.3210526315789476e-05, "step": 645 }, { "epoch": 1.6962524654832347, "high_lr": 0.0006605263157894737, "low_lr": 1.3210526315789476e-05, "step": 645 }, { "epoch": 1.6962524654832347, "high_lr": 0.0006605263157894737, "low_lr": 1.3210526315789476e-05, "step": 645 }, { "epoch": 1.6962524654832347, "high_lr": 0.0006605263157894737, "low_lr": 1.3210526315789476e-05, "step": 645 }, { "epoch": 1.6962524654832347, "high_lr": 0.0006605263157894737, "low_lr": 1.3210526315789476e-05, "step": 645 }, { "epoch": 1.6962524654832347, "high_lr": 0.0006605263157894737, "low_lr": 1.3210526315789476e-05, "step": 645 }, { "epoch": 1.6962524654832347, "high_lr": 0.0006605263157894737, "low_lr": 1.3210526315789476e-05, "step": 645 }, { "epoch": 1.6988823142669296, "grad_norm": 1.0975732803344727, "learning_rate": 0.00066, "loss": 1.4593, "step": 646 }, { "epoch": 1.6988823142669296, "high_lr": 0.00066, "low_lr": 1.3200000000000002e-05, "step": 646 }, { "epoch": 1.6988823142669296, "high_lr": 0.00066, "low_lr": 1.3200000000000002e-05, "step": 646 }, { "epoch": 1.6988823142669296, "high_lr": 0.00066, "low_lr": 1.3200000000000002e-05, "step": 646 }, { "epoch": 1.6988823142669296, "high_lr": 0.00066, "low_lr": 1.3200000000000002e-05, "step": 646 }, { "epoch": 1.6988823142669296, "high_lr": 0.00066, "low_lr": 1.3200000000000002e-05, "step": 646 }, { "epoch": 1.6988823142669296, "high_lr": 0.00066, "low_lr": 1.3200000000000002e-05, "step": 646 }, { "epoch": 1.6988823142669296, "high_lr": 0.00066, "low_lr": 1.3200000000000002e-05, "step": 646 }, { "epoch": 1.6988823142669296, "high_lr": 0.00066, "low_lr": 1.3200000000000002e-05, "step": 646 }, { "epoch": 1.7015121630506247, "grad_norm": 1.161067247390747, "learning_rate": 0.0006594736842105264, "loss": 1.4718, "step": 647 }, { "epoch": 1.7015121630506247, "high_lr": 0.0006594736842105264, "low_lr": 1.3189473684210527e-05, "step": 647 }, { "epoch": 1.7015121630506247, "high_lr": 0.0006594736842105264, "low_lr": 1.3189473684210527e-05, "step": 647 }, { "epoch": 1.7015121630506247, "high_lr": 0.0006594736842105264, "low_lr": 1.3189473684210527e-05, "step": 647 }, { "epoch": 1.7015121630506247, "high_lr": 0.0006594736842105264, "low_lr": 1.3189473684210527e-05, "step": 647 }, { "epoch": 1.7015121630506247, "high_lr": 0.0006594736842105264, "low_lr": 1.3189473684210527e-05, "step": 647 }, { "epoch": 1.7015121630506247, "high_lr": 0.0006594736842105264, "low_lr": 1.3189473684210527e-05, "step": 647 }, { "epoch": 1.7015121630506247, "high_lr": 0.0006594736842105264, "low_lr": 1.3189473684210527e-05, "step": 647 }, { "epoch": 1.7015121630506247, "high_lr": 0.0006594736842105264, "low_lr": 1.3189473684210527e-05, "step": 647 }, { "epoch": 1.7041420118343196, "grad_norm": 1.2819602489471436, "learning_rate": 0.0006589473684210527, "loss": 1.4713, "step": 648 }, { "epoch": 1.7041420118343196, "high_lr": 0.0006589473684210527, "low_lr": 1.3178947368421053e-05, "step": 648 }, { "epoch": 1.7041420118343196, "high_lr": 0.0006589473684210527, "low_lr": 1.3178947368421053e-05, "step": 648 }, { "epoch": 1.7041420118343196, "high_lr": 0.0006589473684210527, "low_lr": 1.3178947368421053e-05, "step": 648 }, { "epoch": 1.7041420118343196, "high_lr": 0.0006589473684210527, "low_lr": 1.3178947368421053e-05, "step": 648 }, { "epoch": 1.7041420118343196, "high_lr": 0.0006589473684210527, "low_lr": 1.3178947368421053e-05, "step": 648 }, { "epoch": 1.7041420118343196, "high_lr": 0.0006589473684210527, "low_lr": 1.3178947368421053e-05, "step": 648 }, { "epoch": 1.7041420118343196, "high_lr": 0.0006589473684210527, "low_lr": 1.3178947368421053e-05, "step": 648 }, { "epoch": 1.7041420118343196, "high_lr": 0.0006589473684210527, "low_lr": 1.3178947368421053e-05, "step": 648 }, { "epoch": 1.7067718606180144, "grad_norm": 1.1141456365585327, "learning_rate": 0.000658421052631579, "loss": 1.4705, "step": 649 }, { "epoch": 1.7067718606180144, "high_lr": 0.000658421052631579, "low_lr": 1.316842105263158e-05, "step": 649 }, { "epoch": 1.7067718606180144, "high_lr": 0.000658421052631579, "low_lr": 1.316842105263158e-05, "step": 649 }, { "epoch": 1.7067718606180144, "high_lr": 0.000658421052631579, "low_lr": 1.316842105263158e-05, "step": 649 }, { "epoch": 1.7067718606180144, "high_lr": 0.000658421052631579, "low_lr": 1.316842105263158e-05, "step": 649 }, { "epoch": 1.7067718606180144, "high_lr": 0.000658421052631579, "low_lr": 1.316842105263158e-05, "step": 649 }, { "epoch": 1.7067718606180144, "high_lr": 0.000658421052631579, "low_lr": 1.316842105263158e-05, "step": 649 }, { "epoch": 1.7067718606180144, "high_lr": 0.000658421052631579, "low_lr": 1.316842105263158e-05, "step": 649 }, { "epoch": 1.7067718606180144, "high_lr": 0.000658421052631579, "low_lr": 1.316842105263158e-05, "step": 649 }, { "epoch": 1.7094017094017095, "grad_norm": 1.2047836780548096, "learning_rate": 0.0006578947368421054, "loss": 1.5427, "step": 650 }, { "epoch": 1.7094017094017095, "high_lr": 0.0006578947368421054, "low_lr": 1.3157894736842108e-05, "step": 650 }, { "epoch": 1.7094017094017095, "high_lr": 0.0006578947368421054, "low_lr": 1.3157894736842108e-05, "step": 650 }, { "epoch": 1.7094017094017095, "high_lr": 0.0006578947368421054, "low_lr": 1.3157894736842108e-05, "step": 650 }, { "epoch": 1.7094017094017095, "high_lr": 0.0006578947368421054, "low_lr": 1.3157894736842108e-05, "step": 650 }, { "epoch": 1.7094017094017095, "high_lr": 0.0006578947368421054, "low_lr": 1.3157894736842108e-05, "step": 650 }, { "epoch": 1.7094017094017095, "high_lr": 0.0006578947368421054, "low_lr": 1.3157894736842108e-05, "step": 650 }, { "epoch": 1.7094017094017095, "high_lr": 0.0006578947368421054, "low_lr": 1.3157894736842108e-05, "step": 650 }, { "epoch": 1.7094017094017095, "high_lr": 0.0006578947368421054, "low_lr": 1.3157894736842108e-05, "step": 650 }, { "epoch": 1.7120315581854042, "grad_norm": 1.0834522247314453, "learning_rate": 0.0006573684210526316, "loss": 1.4951, "step": 651 }, { "epoch": 1.7120315581854042, "high_lr": 0.0006573684210526316, "low_lr": 1.3147368421052632e-05, "step": 651 }, { "epoch": 1.7120315581854042, "high_lr": 0.0006573684210526316, "low_lr": 1.3147368421052632e-05, "step": 651 }, { "epoch": 1.7120315581854042, "high_lr": 0.0006573684210526316, "low_lr": 1.3147368421052632e-05, "step": 651 }, { "epoch": 1.7120315581854042, "high_lr": 0.0006573684210526316, "low_lr": 1.3147368421052632e-05, "step": 651 }, { "epoch": 1.7120315581854042, "high_lr": 0.0006573684210526316, "low_lr": 1.3147368421052632e-05, "step": 651 }, { "epoch": 1.7120315581854042, "high_lr": 0.0006573684210526316, "low_lr": 1.3147368421052632e-05, "step": 651 }, { "epoch": 1.7120315581854042, "high_lr": 0.0006573684210526316, "low_lr": 1.3147368421052632e-05, "step": 651 }, { "epoch": 1.7120315581854042, "high_lr": 0.0006573684210526316, "low_lr": 1.3147368421052632e-05, "step": 651 }, { "epoch": 1.7146614069690993, "grad_norm": 1.027203917503357, "learning_rate": 0.0006568421052631579, "loss": 1.4369, "step": 652 }, { "epoch": 1.7146614069690993, "high_lr": 0.0006568421052631579, "low_lr": 1.3136842105263159e-05, "step": 652 }, { "epoch": 1.7146614069690993, "high_lr": 0.0006568421052631579, "low_lr": 1.3136842105263159e-05, "step": 652 }, { "epoch": 1.7146614069690993, "high_lr": 0.0006568421052631579, "low_lr": 1.3136842105263159e-05, "step": 652 }, { "epoch": 1.7146614069690993, "high_lr": 0.0006568421052631579, "low_lr": 1.3136842105263159e-05, "step": 652 }, { "epoch": 1.7146614069690993, "high_lr": 0.0006568421052631579, "low_lr": 1.3136842105263159e-05, "step": 652 }, { "epoch": 1.7146614069690993, "high_lr": 0.0006568421052631579, "low_lr": 1.3136842105263159e-05, "step": 652 }, { "epoch": 1.7146614069690993, "high_lr": 0.0006568421052631579, "low_lr": 1.3136842105263159e-05, "step": 652 }, { "epoch": 1.7146614069690993, "high_lr": 0.0006568421052631579, "low_lr": 1.3136842105263159e-05, "step": 652 }, { "epoch": 1.7172912557527942, "grad_norm": 1.1530510187149048, "learning_rate": 0.0006563157894736842, "loss": 1.5094, "step": 653 }, { "epoch": 1.7172912557527942, "high_lr": 0.0006563157894736842, "low_lr": 1.3126315789473685e-05, "step": 653 }, { "epoch": 1.7172912557527942, "high_lr": 0.0006563157894736842, "low_lr": 1.3126315789473685e-05, "step": 653 }, { "epoch": 1.7172912557527942, "high_lr": 0.0006563157894736842, "low_lr": 1.3126315789473685e-05, "step": 653 }, { "epoch": 1.7172912557527942, "high_lr": 0.0006563157894736842, "low_lr": 1.3126315789473685e-05, "step": 653 }, { "epoch": 1.7172912557527942, "high_lr": 0.0006563157894736842, "low_lr": 1.3126315789473685e-05, "step": 653 }, { "epoch": 1.7172912557527942, "high_lr": 0.0006563157894736842, "low_lr": 1.3126315789473685e-05, "step": 653 }, { "epoch": 1.7172912557527942, "high_lr": 0.0006563157894736842, "low_lr": 1.3126315789473685e-05, "step": 653 }, { "epoch": 1.7172912557527942, "high_lr": 0.0006563157894736842, "low_lr": 1.3126315789473685e-05, "step": 653 }, { "epoch": 1.719921104536489, "grad_norm": 1.1710301637649536, "learning_rate": 0.0006557894736842105, "loss": 1.483, "step": 654 }, { "epoch": 1.719921104536489, "high_lr": 0.0006557894736842105, "low_lr": 1.3115789473684211e-05, "step": 654 }, { "epoch": 1.719921104536489, "high_lr": 0.0006557894736842105, "low_lr": 1.3115789473684211e-05, "step": 654 }, { "epoch": 1.719921104536489, "high_lr": 0.0006557894736842105, "low_lr": 1.3115789473684211e-05, "step": 654 }, { "epoch": 1.719921104536489, "high_lr": 0.0006557894736842105, "low_lr": 1.3115789473684211e-05, "step": 654 }, { "epoch": 1.719921104536489, "high_lr": 0.0006557894736842105, "low_lr": 1.3115789473684211e-05, "step": 654 }, { "epoch": 1.719921104536489, "high_lr": 0.0006557894736842105, "low_lr": 1.3115789473684211e-05, "step": 654 }, { "epoch": 1.719921104536489, "high_lr": 0.0006557894736842105, "low_lr": 1.3115789473684211e-05, "step": 654 }, { "epoch": 1.719921104536489, "high_lr": 0.0006557894736842105, "low_lr": 1.3115789473684211e-05, "step": 654 }, { "epoch": 1.7225509533201842, "grad_norm": 1.118013620376587, "learning_rate": 0.0006552631578947369, "loss": 1.4718, "step": 655 }, { "epoch": 1.7225509533201842, "high_lr": 0.0006552631578947369, "low_lr": 1.310526315789474e-05, "step": 655 }, { "epoch": 1.7225509533201842, "high_lr": 0.0006552631578947369, "low_lr": 1.310526315789474e-05, "step": 655 }, { "epoch": 1.7225509533201842, "high_lr": 0.0006552631578947369, "low_lr": 1.310526315789474e-05, "step": 655 }, { "epoch": 1.7225509533201842, "high_lr": 0.0006552631578947369, "low_lr": 1.310526315789474e-05, "step": 655 }, { "epoch": 1.7225509533201842, "high_lr": 0.0006552631578947369, "low_lr": 1.310526315789474e-05, "step": 655 }, { "epoch": 1.7225509533201842, "high_lr": 0.0006552631578947369, "low_lr": 1.310526315789474e-05, "step": 655 }, { "epoch": 1.7225509533201842, "high_lr": 0.0006552631578947369, "low_lr": 1.310526315789474e-05, "step": 655 }, { "epoch": 1.7225509533201842, "high_lr": 0.0006552631578947369, "low_lr": 1.310526315789474e-05, "step": 655 }, { "epoch": 1.725180802103879, "grad_norm": 1.1998544931411743, "learning_rate": 0.0006547368421052632, "loss": 1.4875, "step": 656 }, { "epoch": 1.725180802103879, "high_lr": 0.0006547368421052632, "low_lr": 1.3094736842105264e-05, "step": 656 }, { "epoch": 1.725180802103879, "high_lr": 0.0006547368421052632, "low_lr": 1.3094736842105264e-05, "step": 656 }, { "epoch": 1.725180802103879, "high_lr": 0.0006547368421052632, "low_lr": 1.3094736842105264e-05, "step": 656 }, { "epoch": 1.725180802103879, "high_lr": 0.0006547368421052632, "low_lr": 1.3094736842105264e-05, "step": 656 }, { "epoch": 1.725180802103879, "high_lr": 0.0006547368421052632, "low_lr": 1.3094736842105264e-05, "step": 656 }, { "epoch": 1.725180802103879, "high_lr": 0.0006547368421052632, "low_lr": 1.3094736842105264e-05, "step": 656 }, { "epoch": 1.725180802103879, "high_lr": 0.0006547368421052632, "low_lr": 1.3094736842105264e-05, "step": 656 }, { "epoch": 1.725180802103879, "high_lr": 0.0006547368421052632, "low_lr": 1.3094736842105264e-05, "step": 656 }, { "epoch": 1.727810650887574, "grad_norm": 1.1463004350662231, "learning_rate": 0.0006542105263157895, "loss": 1.5059, "step": 657 }, { "epoch": 1.727810650887574, "high_lr": 0.0006542105263157895, "low_lr": 1.308421052631579e-05, "step": 657 }, { "epoch": 1.727810650887574, "high_lr": 0.0006542105263157895, "low_lr": 1.308421052631579e-05, "step": 657 }, { "epoch": 1.727810650887574, "high_lr": 0.0006542105263157895, "low_lr": 1.308421052631579e-05, "step": 657 }, { "epoch": 1.727810650887574, "high_lr": 0.0006542105263157895, "low_lr": 1.308421052631579e-05, "step": 657 }, { "epoch": 1.727810650887574, "high_lr": 0.0006542105263157895, "low_lr": 1.308421052631579e-05, "step": 657 }, { "epoch": 1.727810650887574, "high_lr": 0.0006542105263157895, "low_lr": 1.308421052631579e-05, "step": 657 }, { "epoch": 1.727810650887574, "high_lr": 0.0006542105263157895, "low_lr": 1.308421052631579e-05, "step": 657 }, { "epoch": 1.727810650887574, "high_lr": 0.0006542105263157895, "low_lr": 1.308421052631579e-05, "step": 657 }, { "epoch": 1.730440499671269, "grad_norm": 1.1531513929367065, "learning_rate": 0.0006536842105263158, "loss": 1.4984, "step": 658 }, { "epoch": 1.730440499671269, "high_lr": 0.0006536842105263158, "low_lr": 1.3073684210526317e-05, "step": 658 }, { "epoch": 1.730440499671269, "high_lr": 0.0006536842105263158, "low_lr": 1.3073684210526317e-05, "step": 658 }, { "epoch": 1.730440499671269, "high_lr": 0.0006536842105263158, "low_lr": 1.3073684210526317e-05, "step": 658 }, { "epoch": 1.730440499671269, "high_lr": 0.0006536842105263158, "low_lr": 1.3073684210526317e-05, "step": 658 }, { "epoch": 1.730440499671269, "high_lr": 0.0006536842105263158, "low_lr": 1.3073684210526317e-05, "step": 658 }, { "epoch": 1.730440499671269, "high_lr": 0.0006536842105263158, "low_lr": 1.3073684210526317e-05, "step": 658 }, { "epoch": 1.730440499671269, "high_lr": 0.0006536842105263158, "low_lr": 1.3073684210526317e-05, "step": 658 }, { "epoch": 1.730440499671269, "high_lr": 0.0006536842105263158, "low_lr": 1.3073684210526317e-05, "step": 658 }, { "epoch": 1.7330703484549639, "grad_norm": 1.1394236087799072, "learning_rate": 0.0006531578947368421, "loss": 1.4976, "step": 659 }, { "epoch": 1.7330703484549639, "high_lr": 0.0006531578947368421, "low_lr": 1.3063157894736845e-05, "step": 659 }, { "epoch": 1.7330703484549639, "high_lr": 0.0006531578947368421, "low_lr": 1.3063157894736845e-05, "step": 659 }, { "epoch": 1.7330703484549639, "high_lr": 0.0006531578947368421, "low_lr": 1.3063157894736845e-05, "step": 659 }, { "epoch": 1.7330703484549639, "high_lr": 0.0006531578947368421, "low_lr": 1.3063157894736845e-05, "step": 659 }, { "epoch": 1.7330703484549639, "high_lr": 0.0006531578947368421, "low_lr": 1.3063157894736845e-05, "step": 659 }, { "epoch": 1.7330703484549639, "high_lr": 0.0006531578947368421, "low_lr": 1.3063157894736845e-05, "step": 659 }, { "epoch": 1.7330703484549639, "high_lr": 0.0006531578947368421, "low_lr": 1.3063157894736845e-05, "step": 659 }, { "epoch": 1.7330703484549639, "high_lr": 0.0006531578947368421, "low_lr": 1.3063157894736845e-05, "step": 659 }, { "epoch": 1.7357001972386588, "grad_norm": 1.1302404403686523, "learning_rate": 0.0006526315789473684, "loss": 1.4824, "step": 660 }, { "epoch": 1.7357001972386588, "high_lr": 0.0006526315789473684, "low_lr": 1.305263157894737e-05, "step": 660 }, { "epoch": 1.7357001972386588, "high_lr": 0.0006526315789473684, "low_lr": 1.305263157894737e-05, "step": 660 }, { "epoch": 1.7357001972386588, "high_lr": 0.0006526315789473684, "low_lr": 1.305263157894737e-05, "step": 660 }, { "epoch": 1.7357001972386588, "high_lr": 0.0006526315789473684, "low_lr": 1.305263157894737e-05, "step": 660 }, { "epoch": 1.7357001972386588, "high_lr": 0.0006526315789473684, "low_lr": 1.305263157894737e-05, "step": 660 }, { "epoch": 1.7357001972386588, "high_lr": 0.0006526315789473684, "low_lr": 1.305263157894737e-05, "step": 660 }, { "epoch": 1.7357001972386588, "high_lr": 0.0006526315789473684, "low_lr": 1.305263157894737e-05, "step": 660 }, { "epoch": 1.7357001972386588, "high_lr": 0.0006526315789473684, "low_lr": 1.305263157894737e-05, "step": 660 }, { "epoch": 1.7383300460223539, "grad_norm": 1.1688681840896606, "learning_rate": 0.0006521052631578947, "loss": 1.5363, "step": 661 }, { "epoch": 1.7383300460223539, "high_lr": 0.0006521052631578947, "low_lr": 1.3042105263157896e-05, "step": 661 }, { "epoch": 1.7383300460223539, "high_lr": 0.0006521052631578947, "low_lr": 1.3042105263157896e-05, "step": 661 }, { "epoch": 1.7383300460223539, "high_lr": 0.0006521052631578947, "low_lr": 1.3042105263157896e-05, "step": 661 }, { "epoch": 1.7383300460223539, "high_lr": 0.0006521052631578947, "low_lr": 1.3042105263157896e-05, "step": 661 }, { "epoch": 1.7383300460223539, "high_lr": 0.0006521052631578947, "low_lr": 1.3042105263157896e-05, "step": 661 }, { "epoch": 1.7383300460223539, "high_lr": 0.0006521052631578947, "low_lr": 1.3042105263157896e-05, "step": 661 }, { "epoch": 1.7383300460223539, "high_lr": 0.0006521052631578947, "low_lr": 1.3042105263157896e-05, "step": 661 }, { "epoch": 1.7383300460223539, "high_lr": 0.0006521052631578947, "low_lr": 1.3042105263157896e-05, "step": 661 }, { "epoch": 1.7409598948060485, "grad_norm": 1.0534868240356445, "learning_rate": 0.000651578947368421, "loss": 1.4678, "step": 662 }, { "epoch": 1.7409598948060485, "high_lr": 0.000651578947368421, "low_lr": 1.3031578947368422e-05, "step": 662 }, { "epoch": 1.7409598948060485, "high_lr": 0.000651578947368421, "low_lr": 1.3031578947368422e-05, "step": 662 }, { "epoch": 1.7409598948060485, "high_lr": 0.000651578947368421, "low_lr": 1.3031578947368422e-05, "step": 662 }, { "epoch": 1.7409598948060485, "high_lr": 0.000651578947368421, "low_lr": 1.3031578947368422e-05, "step": 662 }, { "epoch": 1.7409598948060485, "high_lr": 0.000651578947368421, "low_lr": 1.3031578947368422e-05, "step": 662 }, { "epoch": 1.7409598948060485, "high_lr": 0.000651578947368421, "low_lr": 1.3031578947368422e-05, "step": 662 }, { "epoch": 1.7409598948060485, "high_lr": 0.000651578947368421, "low_lr": 1.3031578947368422e-05, "step": 662 }, { "epoch": 1.7409598948060485, "high_lr": 0.000651578947368421, "low_lr": 1.3031578947368422e-05, "step": 662 }, { "epoch": 1.7435897435897436, "grad_norm": 1.075860619544983, "learning_rate": 0.0006510526315789473, "loss": 1.4319, "step": 663 }, { "epoch": 1.7435897435897436, "high_lr": 0.0006510526315789473, "low_lr": 1.3021052631578948e-05, "step": 663 }, { "epoch": 1.7435897435897436, "high_lr": 0.0006510526315789473, "low_lr": 1.3021052631578948e-05, "step": 663 }, { "epoch": 1.7435897435897436, "high_lr": 0.0006510526315789473, "low_lr": 1.3021052631578948e-05, "step": 663 }, { "epoch": 1.7435897435897436, "high_lr": 0.0006510526315789473, "low_lr": 1.3021052631578948e-05, "step": 663 }, { "epoch": 1.7435897435897436, "high_lr": 0.0006510526315789473, "low_lr": 1.3021052631578948e-05, "step": 663 }, { "epoch": 1.7435897435897436, "high_lr": 0.0006510526315789473, "low_lr": 1.3021052631578948e-05, "step": 663 }, { "epoch": 1.7435897435897436, "high_lr": 0.0006510526315789473, "low_lr": 1.3021052631578948e-05, "step": 663 }, { "epoch": 1.7435897435897436, "high_lr": 0.0006510526315789473, "low_lr": 1.3021052631578948e-05, "step": 663 }, { "epoch": 1.7462195923734385, "grad_norm": 1.1058990955352783, "learning_rate": 0.0006505263157894738, "loss": 1.4832, "step": 664 }, { "epoch": 1.7462195923734385, "high_lr": 0.0006505263157894738, "low_lr": 1.3010526315789476e-05, "step": 664 }, { "epoch": 1.7462195923734385, "high_lr": 0.0006505263157894738, "low_lr": 1.3010526315789476e-05, "step": 664 }, { "epoch": 1.7462195923734385, "high_lr": 0.0006505263157894738, "low_lr": 1.3010526315789476e-05, "step": 664 }, { "epoch": 1.7462195923734385, "high_lr": 0.0006505263157894738, "low_lr": 1.3010526315789476e-05, "step": 664 }, { "epoch": 1.7462195923734385, "high_lr": 0.0006505263157894738, "low_lr": 1.3010526315789476e-05, "step": 664 }, { "epoch": 1.7462195923734385, "high_lr": 0.0006505263157894738, "low_lr": 1.3010526315789476e-05, "step": 664 }, { "epoch": 1.7462195923734385, "high_lr": 0.0006505263157894738, "low_lr": 1.3010526315789476e-05, "step": 664 }, { "epoch": 1.7462195923734385, "high_lr": 0.0006505263157894738, "low_lr": 1.3010526315789476e-05, "step": 664 }, { "epoch": 1.7488494411571334, "grad_norm": 1.2384603023529053, "learning_rate": 0.0006500000000000001, "loss": 1.5347, "step": 665 }, { "epoch": 1.7488494411571334, "high_lr": 0.0006500000000000001, "low_lr": 1.3000000000000001e-05, "step": 665 }, { "epoch": 1.7488494411571334, "high_lr": 0.0006500000000000001, "low_lr": 1.3000000000000001e-05, "step": 665 }, { "epoch": 1.7488494411571334, "high_lr": 0.0006500000000000001, "low_lr": 1.3000000000000001e-05, "step": 665 }, { "epoch": 1.7488494411571334, "high_lr": 0.0006500000000000001, "low_lr": 1.3000000000000001e-05, "step": 665 }, { "epoch": 1.7488494411571334, "high_lr": 0.0006500000000000001, "low_lr": 1.3000000000000001e-05, "step": 665 }, { "epoch": 1.7488494411571334, "high_lr": 0.0006500000000000001, "low_lr": 1.3000000000000001e-05, "step": 665 }, { "epoch": 1.7488494411571334, "high_lr": 0.0006500000000000001, "low_lr": 1.3000000000000001e-05, "step": 665 }, { "epoch": 1.7488494411571334, "high_lr": 0.0006500000000000001, "low_lr": 1.3000000000000001e-05, "step": 665 }, { "epoch": 1.7514792899408285, "grad_norm": 1.1601611375808716, "learning_rate": 0.0006494736842105264, "loss": 1.4963, "step": 666 }, { "epoch": 1.7514792899408285, "high_lr": 0.0006494736842105264, "low_lr": 1.2989473684210527e-05, "step": 666 }, { "epoch": 1.7514792899408285, "high_lr": 0.0006494736842105264, "low_lr": 1.2989473684210527e-05, "step": 666 }, { "epoch": 1.7514792899408285, "high_lr": 0.0006494736842105264, "low_lr": 1.2989473684210527e-05, "step": 666 }, { "epoch": 1.7514792899408285, "high_lr": 0.0006494736842105264, "low_lr": 1.2989473684210527e-05, "step": 666 }, { "epoch": 1.7514792899408285, "high_lr": 0.0006494736842105264, "low_lr": 1.2989473684210527e-05, "step": 666 }, { "epoch": 1.7514792899408285, "high_lr": 0.0006494736842105264, "low_lr": 1.2989473684210527e-05, "step": 666 }, { "epoch": 1.7514792899408285, "high_lr": 0.0006494736842105264, "low_lr": 1.2989473684210527e-05, "step": 666 }, { "epoch": 1.7514792899408285, "high_lr": 0.0006494736842105264, "low_lr": 1.2989473684210527e-05, "step": 666 }, { "epoch": 1.7541091387245233, "grad_norm": 1.135069489479065, "learning_rate": 0.0006489473684210527, "loss": 1.4478, "step": 667 }, { "epoch": 1.7541091387245233, "high_lr": 0.0006489473684210527, "low_lr": 1.2978947368421054e-05, "step": 667 }, { "epoch": 1.7541091387245233, "high_lr": 0.0006489473684210527, "low_lr": 1.2978947368421054e-05, "step": 667 }, { "epoch": 1.7541091387245233, "high_lr": 0.0006489473684210527, "low_lr": 1.2978947368421054e-05, "step": 667 }, { "epoch": 1.7541091387245233, "high_lr": 0.0006489473684210527, "low_lr": 1.2978947368421054e-05, "step": 667 }, { "epoch": 1.7541091387245233, "high_lr": 0.0006489473684210527, "low_lr": 1.2978947368421054e-05, "step": 667 }, { "epoch": 1.7541091387245233, "high_lr": 0.0006489473684210527, "low_lr": 1.2978947368421054e-05, "step": 667 }, { "epoch": 1.7541091387245233, "high_lr": 0.0006489473684210527, "low_lr": 1.2978947368421054e-05, "step": 667 }, { "epoch": 1.7541091387245233, "high_lr": 0.0006489473684210527, "low_lr": 1.2978947368421054e-05, "step": 667 }, { "epoch": 1.7567389875082182, "grad_norm": 1.2301567792892456, "learning_rate": 0.0006484210526315789, "loss": 1.4792, "step": 668 }, { "epoch": 1.7567389875082182, "high_lr": 0.0006484210526315789, "low_lr": 1.2968421052631578e-05, "step": 668 }, { "epoch": 1.7567389875082182, "high_lr": 0.0006484210526315789, "low_lr": 1.2968421052631578e-05, "step": 668 }, { "epoch": 1.7567389875082182, "high_lr": 0.0006484210526315789, "low_lr": 1.2968421052631578e-05, "step": 668 }, { "epoch": 1.7567389875082182, "high_lr": 0.0006484210526315789, "low_lr": 1.2968421052631578e-05, "step": 668 }, { "epoch": 1.7567389875082182, "high_lr": 0.0006484210526315789, "low_lr": 1.2968421052631578e-05, "step": 668 }, { "epoch": 1.7567389875082182, "high_lr": 0.0006484210526315789, "low_lr": 1.2968421052631578e-05, "step": 668 }, { "epoch": 1.7567389875082182, "high_lr": 0.0006484210526315789, "low_lr": 1.2968421052631578e-05, "step": 668 }, { "epoch": 1.7567389875082182, "high_lr": 0.0006484210526315789, "low_lr": 1.2968421052631578e-05, "step": 668 }, { "epoch": 1.7593688362919133, "grad_norm": 1.132869839668274, "learning_rate": 0.0006478947368421053, "loss": 1.473, "step": 669 }, { "epoch": 1.7593688362919133, "high_lr": 0.0006478947368421053, "low_lr": 1.2957894736842108e-05, "step": 669 }, { "epoch": 1.7593688362919133, "high_lr": 0.0006478947368421053, "low_lr": 1.2957894736842108e-05, "step": 669 }, { "epoch": 1.7593688362919133, "high_lr": 0.0006478947368421053, "low_lr": 1.2957894736842108e-05, "step": 669 }, { "epoch": 1.7593688362919133, "high_lr": 0.0006478947368421053, "low_lr": 1.2957894736842108e-05, "step": 669 }, { "epoch": 1.7593688362919133, "high_lr": 0.0006478947368421053, "low_lr": 1.2957894736842108e-05, "step": 669 }, { "epoch": 1.7593688362919133, "high_lr": 0.0006478947368421053, "low_lr": 1.2957894736842108e-05, "step": 669 }, { "epoch": 1.7593688362919133, "high_lr": 0.0006478947368421053, "low_lr": 1.2957894736842108e-05, "step": 669 }, { "epoch": 1.7593688362919133, "high_lr": 0.0006478947368421053, "low_lr": 1.2957894736842108e-05, "step": 669 }, { "epoch": 1.7619986850756082, "grad_norm": 1.162306547164917, "learning_rate": 0.0006473684210526316, "loss": 1.5266, "step": 670 }, { "epoch": 1.7619986850756082, "high_lr": 0.0006473684210526316, "low_lr": 1.2947368421052633e-05, "step": 670 }, { "epoch": 1.7619986850756082, "high_lr": 0.0006473684210526316, "low_lr": 1.2947368421052633e-05, "step": 670 }, { "epoch": 1.7619986850756082, "high_lr": 0.0006473684210526316, "low_lr": 1.2947368421052633e-05, "step": 670 }, { "epoch": 1.7619986850756082, "high_lr": 0.0006473684210526316, "low_lr": 1.2947368421052633e-05, "step": 670 }, { "epoch": 1.7619986850756082, "high_lr": 0.0006473684210526316, "low_lr": 1.2947368421052633e-05, "step": 670 }, { "epoch": 1.7619986850756082, "high_lr": 0.0006473684210526316, "low_lr": 1.2947368421052633e-05, "step": 670 }, { "epoch": 1.7619986850756082, "high_lr": 0.0006473684210526316, "low_lr": 1.2947368421052633e-05, "step": 670 }, { "epoch": 1.7619986850756082, "high_lr": 0.0006473684210526316, "low_lr": 1.2947368421052633e-05, "step": 670 }, { "epoch": 1.764628533859303, "grad_norm": 1.0740419626235962, "learning_rate": 0.0006468421052631579, "loss": 1.49, "step": 671 }, { "epoch": 1.764628533859303, "high_lr": 0.0006468421052631579, "low_lr": 1.2936842105263159e-05, "step": 671 }, { "epoch": 1.764628533859303, "high_lr": 0.0006468421052631579, "low_lr": 1.2936842105263159e-05, "step": 671 }, { "epoch": 1.764628533859303, "high_lr": 0.0006468421052631579, "low_lr": 1.2936842105263159e-05, "step": 671 }, { "epoch": 1.764628533859303, "high_lr": 0.0006468421052631579, "low_lr": 1.2936842105263159e-05, "step": 671 }, { "epoch": 1.764628533859303, "high_lr": 0.0006468421052631579, "low_lr": 1.2936842105263159e-05, "step": 671 }, { "epoch": 1.764628533859303, "high_lr": 0.0006468421052631579, "low_lr": 1.2936842105263159e-05, "step": 671 }, { "epoch": 1.764628533859303, "high_lr": 0.0006468421052631579, "low_lr": 1.2936842105263159e-05, "step": 671 }, { "epoch": 1.764628533859303, "high_lr": 0.0006468421052631579, "low_lr": 1.2936842105263159e-05, "step": 671 }, { "epoch": 1.7672583826429982, "grad_norm": 1.0520868301391602, "learning_rate": 0.0006463157894736842, "loss": 1.4678, "step": 672 }, { "epoch": 1.7672583826429982, "high_lr": 0.0006463157894736842, "low_lr": 1.2926315789473685e-05, "step": 672 }, { "epoch": 1.7672583826429982, "high_lr": 0.0006463157894736842, "low_lr": 1.2926315789473685e-05, "step": 672 }, { "epoch": 1.7672583826429982, "high_lr": 0.0006463157894736842, "low_lr": 1.2926315789473685e-05, "step": 672 }, { "epoch": 1.7672583826429982, "high_lr": 0.0006463157894736842, "low_lr": 1.2926315789473685e-05, "step": 672 }, { "epoch": 1.7672583826429982, "high_lr": 0.0006463157894736842, "low_lr": 1.2926315789473685e-05, "step": 672 }, { "epoch": 1.7672583826429982, "high_lr": 0.0006463157894736842, "low_lr": 1.2926315789473685e-05, "step": 672 }, { "epoch": 1.7672583826429982, "high_lr": 0.0006463157894736842, "low_lr": 1.2926315789473685e-05, "step": 672 }, { "epoch": 1.7672583826429982, "high_lr": 0.0006463157894736842, "low_lr": 1.2926315789473685e-05, "step": 672 }, { "epoch": 1.7698882314266928, "grad_norm": 1.1029653549194336, "learning_rate": 0.0006457894736842106, "loss": 1.4892, "step": 673 }, { "epoch": 1.7698882314266928, "high_lr": 0.0006457894736842106, "low_lr": 1.2915789473684213e-05, "step": 673 }, { "epoch": 1.7698882314266928, "high_lr": 0.0006457894736842106, "low_lr": 1.2915789473684213e-05, "step": 673 }, { "epoch": 1.7698882314266928, "high_lr": 0.0006457894736842106, "low_lr": 1.2915789473684213e-05, "step": 673 }, { "epoch": 1.7698882314266928, "high_lr": 0.0006457894736842106, "low_lr": 1.2915789473684213e-05, "step": 673 }, { "epoch": 1.7698882314266928, "high_lr": 0.0006457894736842106, "low_lr": 1.2915789473684213e-05, "step": 673 }, { "epoch": 1.7698882314266928, "high_lr": 0.0006457894736842106, "low_lr": 1.2915789473684213e-05, "step": 673 }, { "epoch": 1.7698882314266928, "high_lr": 0.0006457894736842106, "low_lr": 1.2915789473684213e-05, "step": 673 }, { "epoch": 1.7698882314266928, "high_lr": 0.0006457894736842106, "low_lr": 1.2915789473684213e-05, "step": 673 }, { "epoch": 1.772518080210388, "grad_norm": 1.1500648260116577, "learning_rate": 0.0006452631578947369, "loss": 1.4969, "step": 674 }, { "epoch": 1.772518080210388, "high_lr": 0.0006452631578947369, "low_lr": 1.2905263157894738e-05, "step": 674 }, { "epoch": 1.772518080210388, "high_lr": 0.0006452631578947369, "low_lr": 1.2905263157894738e-05, "step": 674 }, { "epoch": 1.772518080210388, "high_lr": 0.0006452631578947369, "low_lr": 1.2905263157894738e-05, "step": 674 }, { "epoch": 1.772518080210388, "high_lr": 0.0006452631578947369, "low_lr": 1.2905263157894738e-05, "step": 674 }, { "epoch": 1.772518080210388, "high_lr": 0.0006452631578947369, "low_lr": 1.2905263157894738e-05, "step": 674 }, { "epoch": 1.772518080210388, "high_lr": 0.0006452631578947369, "low_lr": 1.2905263157894738e-05, "step": 674 }, { "epoch": 1.772518080210388, "high_lr": 0.0006452631578947369, "low_lr": 1.2905263157894738e-05, "step": 674 }, { "epoch": 1.772518080210388, "high_lr": 0.0006452631578947369, "low_lr": 1.2905263157894738e-05, "step": 674 }, { "epoch": 1.7751479289940828, "grad_norm": 1.1641913652420044, "learning_rate": 0.0006447368421052632, "loss": 1.4436, "step": 675 }, { "epoch": 1.7751479289940828, "high_lr": 0.0006447368421052632, "low_lr": 1.2894736842105264e-05, "step": 675 }, { "epoch": 1.7751479289940828, "high_lr": 0.0006447368421052632, "low_lr": 1.2894736842105264e-05, "step": 675 }, { "epoch": 1.7751479289940828, "high_lr": 0.0006447368421052632, "low_lr": 1.2894736842105264e-05, "step": 675 }, { "epoch": 1.7751479289940828, "high_lr": 0.0006447368421052632, "low_lr": 1.2894736842105264e-05, "step": 675 }, { "epoch": 1.7751479289940828, "high_lr": 0.0006447368421052632, "low_lr": 1.2894736842105264e-05, "step": 675 }, { "epoch": 1.7751479289940828, "high_lr": 0.0006447368421052632, "low_lr": 1.2894736842105264e-05, "step": 675 }, { "epoch": 1.7751479289940828, "high_lr": 0.0006447368421052632, "low_lr": 1.2894736842105264e-05, "step": 675 }, { "epoch": 1.7751479289940828, "high_lr": 0.0006447368421052632, "low_lr": 1.2894736842105264e-05, "step": 675 }, { "epoch": 1.7777777777777777, "grad_norm": 1.161435604095459, "learning_rate": 0.0006442105263157894, "loss": 1.5171, "step": 676 }, { "epoch": 1.7777777777777777, "high_lr": 0.0006442105263157894, "low_lr": 1.288421052631579e-05, "step": 676 }, { "epoch": 1.7777777777777777, "high_lr": 0.0006442105263157894, "low_lr": 1.288421052631579e-05, "step": 676 }, { "epoch": 1.7777777777777777, "high_lr": 0.0006442105263157894, "low_lr": 1.288421052631579e-05, "step": 676 }, { "epoch": 1.7777777777777777, "high_lr": 0.0006442105263157894, "low_lr": 1.288421052631579e-05, "step": 676 }, { "epoch": 1.7777777777777777, "high_lr": 0.0006442105263157894, "low_lr": 1.288421052631579e-05, "step": 676 }, { "epoch": 1.7777777777777777, "high_lr": 0.0006442105263157894, "low_lr": 1.288421052631579e-05, "step": 676 }, { "epoch": 1.7777777777777777, "high_lr": 0.0006442105263157894, "low_lr": 1.288421052631579e-05, "step": 676 }, { "epoch": 1.7777777777777777, "high_lr": 0.0006442105263157894, "low_lr": 1.288421052631579e-05, "step": 676 }, { "epoch": 1.7804076265614728, "grad_norm": 1.1899640560150146, "learning_rate": 0.0006436842105263157, "loss": 1.482, "step": 677 }, { "epoch": 1.7804076265614728, "high_lr": 0.0006436842105263157, "low_lr": 1.2873684210526317e-05, "step": 677 }, { "epoch": 1.7804076265614728, "high_lr": 0.0006436842105263157, "low_lr": 1.2873684210526317e-05, "step": 677 }, { "epoch": 1.7804076265614728, "high_lr": 0.0006436842105263157, "low_lr": 1.2873684210526317e-05, "step": 677 }, { "epoch": 1.7804076265614728, "high_lr": 0.0006436842105263157, "low_lr": 1.2873684210526317e-05, "step": 677 }, { "epoch": 1.7804076265614728, "high_lr": 0.0006436842105263157, "low_lr": 1.2873684210526317e-05, "step": 677 }, { "epoch": 1.7804076265614728, "high_lr": 0.0006436842105263157, "low_lr": 1.2873684210526317e-05, "step": 677 }, { "epoch": 1.7804076265614728, "high_lr": 0.0006436842105263157, "low_lr": 1.2873684210526317e-05, "step": 677 }, { "epoch": 1.7804076265614728, "high_lr": 0.0006436842105263157, "low_lr": 1.2873684210526317e-05, "step": 677 }, { "epoch": 1.7830374753451677, "grad_norm": 1.110012412071228, "learning_rate": 0.0006431578947368421, "loss": 1.4839, "step": 678 }, { "epoch": 1.7830374753451677, "high_lr": 0.0006431578947368421, "low_lr": 1.2863157894736845e-05, "step": 678 }, { "epoch": 1.7830374753451677, "high_lr": 0.0006431578947368421, "low_lr": 1.2863157894736845e-05, "step": 678 }, { "epoch": 1.7830374753451677, "high_lr": 0.0006431578947368421, "low_lr": 1.2863157894736845e-05, "step": 678 }, { "epoch": 1.7830374753451677, "high_lr": 0.0006431578947368421, "low_lr": 1.2863157894736845e-05, "step": 678 }, { "epoch": 1.7830374753451677, "high_lr": 0.0006431578947368421, "low_lr": 1.2863157894736845e-05, "step": 678 }, { "epoch": 1.7830374753451677, "high_lr": 0.0006431578947368421, "low_lr": 1.2863157894736845e-05, "step": 678 }, { "epoch": 1.7830374753451677, "high_lr": 0.0006431578947368421, "low_lr": 1.2863157894736845e-05, "step": 678 }, { "epoch": 1.7830374753451677, "high_lr": 0.0006431578947368421, "low_lr": 1.2863157894736845e-05, "step": 678 }, { "epoch": 1.7856673241288625, "grad_norm": 1.1355431079864502, "learning_rate": 0.0006426315789473684, "loss": 1.4858, "step": 679 }, { "epoch": 1.7856673241288625, "high_lr": 0.0006426315789473684, "low_lr": 1.285263157894737e-05, "step": 679 }, { "epoch": 1.7856673241288625, "high_lr": 0.0006426315789473684, "low_lr": 1.285263157894737e-05, "step": 679 }, { "epoch": 1.7856673241288625, "high_lr": 0.0006426315789473684, "low_lr": 1.285263157894737e-05, "step": 679 }, { "epoch": 1.7856673241288625, "high_lr": 0.0006426315789473684, "low_lr": 1.285263157894737e-05, "step": 679 }, { "epoch": 1.7856673241288625, "high_lr": 0.0006426315789473684, "low_lr": 1.285263157894737e-05, "step": 679 }, { "epoch": 1.7856673241288625, "high_lr": 0.0006426315789473684, "low_lr": 1.285263157894737e-05, "step": 679 }, { "epoch": 1.7856673241288625, "high_lr": 0.0006426315789473684, "low_lr": 1.285263157894737e-05, "step": 679 }, { "epoch": 1.7856673241288625, "high_lr": 0.0006426315789473684, "low_lr": 1.285263157894737e-05, "step": 679 }, { "epoch": 1.7882971729125576, "grad_norm": 1.0867681503295898, "learning_rate": 0.0006421052631578948, "loss": 1.4809, "step": 680 }, { "epoch": 1.7882971729125576, "high_lr": 0.0006421052631578948, "low_lr": 1.2842105263157896e-05, "step": 680 }, { "epoch": 1.7882971729125576, "high_lr": 0.0006421052631578948, "low_lr": 1.2842105263157896e-05, "step": 680 }, { "epoch": 1.7882971729125576, "high_lr": 0.0006421052631578948, "low_lr": 1.2842105263157896e-05, "step": 680 }, { "epoch": 1.7882971729125576, "high_lr": 0.0006421052631578948, "low_lr": 1.2842105263157896e-05, "step": 680 }, { "epoch": 1.7882971729125576, "high_lr": 0.0006421052631578948, "low_lr": 1.2842105263157896e-05, "step": 680 }, { "epoch": 1.7882971729125576, "high_lr": 0.0006421052631578948, "low_lr": 1.2842105263157896e-05, "step": 680 }, { "epoch": 1.7882971729125576, "high_lr": 0.0006421052631578948, "low_lr": 1.2842105263157896e-05, "step": 680 }, { "epoch": 1.7882971729125576, "high_lr": 0.0006421052631578948, "low_lr": 1.2842105263157896e-05, "step": 680 }, { "epoch": 1.7909270216962525, "grad_norm": 2.313262701034546, "learning_rate": 0.0006415789473684211, "loss": 1.4851, "step": 681 }, { "epoch": 1.7909270216962525, "high_lr": 0.0006415789473684211, "low_lr": 1.2831578947368422e-05, "step": 681 }, { "epoch": 1.7909270216962525, "high_lr": 0.0006415789473684211, "low_lr": 1.2831578947368422e-05, "step": 681 }, { "epoch": 1.7909270216962525, "high_lr": 0.0006415789473684211, "low_lr": 1.2831578947368422e-05, "step": 681 }, { "epoch": 1.7909270216962525, "high_lr": 0.0006415789473684211, "low_lr": 1.2831578947368422e-05, "step": 681 }, { "epoch": 1.7909270216962525, "high_lr": 0.0006415789473684211, "low_lr": 1.2831578947368422e-05, "step": 681 }, { "epoch": 1.7909270216962525, "high_lr": 0.0006415789473684211, "low_lr": 1.2831578947368422e-05, "step": 681 }, { "epoch": 1.7909270216962525, "high_lr": 0.0006415789473684211, "low_lr": 1.2831578947368422e-05, "step": 681 }, { "epoch": 1.7909270216962525, "high_lr": 0.0006415789473684211, "low_lr": 1.2831578947368422e-05, "step": 681 }, { "epoch": 1.7935568704799474, "grad_norm": 1.1034647226333618, "learning_rate": 0.0006410526315789474, "loss": 1.4758, "step": 682 }, { "epoch": 1.7935568704799474, "high_lr": 0.0006410526315789474, "low_lr": 1.2821052631578947e-05, "step": 682 }, { "epoch": 1.7935568704799474, "high_lr": 0.0006410526315789474, "low_lr": 1.2821052631578947e-05, "step": 682 }, { "epoch": 1.7935568704799474, "high_lr": 0.0006410526315789474, "low_lr": 1.2821052631578947e-05, "step": 682 }, { "epoch": 1.7935568704799474, "high_lr": 0.0006410526315789474, "low_lr": 1.2821052631578947e-05, "step": 682 }, { "epoch": 1.7935568704799474, "high_lr": 0.0006410526315789474, "low_lr": 1.2821052631578947e-05, "step": 682 }, { "epoch": 1.7935568704799474, "high_lr": 0.0006410526315789474, "low_lr": 1.2821052631578947e-05, "step": 682 }, { "epoch": 1.7935568704799474, "high_lr": 0.0006410526315789474, "low_lr": 1.2821052631578947e-05, "step": 682 }, { "epoch": 1.7935568704799474, "high_lr": 0.0006410526315789474, "low_lr": 1.2821052631578947e-05, "step": 682 }, { "epoch": 1.7961867192636425, "grad_norm": 1.1270182132720947, "learning_rate": 0.0006405263157894738, "loss": 1.4794, "step": 683 }, { "epoch": 1.7961867192636425, "high_lr": 0.0006405263157894738, "low_lr": 1.2810526315789475e-05, "step": 683 }, { "epoch": 1.7961867192636425, "high_lr": 0.0006405263157894738, "low_lr": 1.2810526315789475e-05, "step": 683 }, { "epoch": 1.7961867192636425, "high_lr": 0.0006405263157894738, "low_lr": 1.2810526315789475e-05, "step": 683 }, { "epoch": 1.7961867192636425, "high_lr": 0.0006405263157894738, "low_lr": 1.2810526315789475e-05, "step": 683 }, { "epoch": 1.7961867192636425, "high_lr": 0.0006405263157894738, "low_lr": 1.2810526315789475e-05, "step": 683 }, { "epoch": 1.7961867192636425, "high_lr": 0.0006405263157894738, "low_lr": 1.2810526315789475e-05, "step": 683 }, { "epoch": 1.7961867192636425, "high_lr": 0.0006405263157894738, "low_lr": 1.2810526315789475e-05, "step": 683 }, { "epoch": 1.7961867192636425, "high_lr": 0.0006405263157894738, "low_lr": 1.2810526315789475e-05, "step": 683 }, { "epoch": 1.7988165680473371, "grad_norm": 1.1152960062026978, "learning_rate": 0.00064, "loss": 1.4582, "step": 684 }, { "epoch": 1.7988165680473371, "high_lr": 0.00064, "low_lr": 1.2800000000000001e-05, "step": 684 }, { "epoch": 1.7988165680473371, "high_lr": 0.00064, "low_lr": 1.2800000000000001e-05, "step": 684 }, { "epoch": 1.7988165680473371, "high_lr": 0.00064, "low_lr": 1.2800000000000001e-05, "step": 684 }, { "epoch": 1.7988165680473371, "high_lr": 0.00064, "low_lr": 1.2800000000000001e-05, "step": 684 }, { "epoch": 1.7988165680473371, "high_lr": 0.00064, "low_lr": 1.2800000000000001e-05, "step": 684 }, { "epoch": 1.7988165680473371, "high_lr": 0.00064, "low_lr": 1.2800000000000001e-05, "step": 684 }, { "epoch": 1.7988165680473371, "high_lr": 0.00064, "low_lr": 1.2800000000000001e-05, "step": 684 }, { "epoch": 1.7988165680473371, "high_lr": 0.00064, "low_lr": 1.2800000000000001e-05, "step": 684 }, { "epoch": 1.8014464168310322, "grad_norm": 1.0992164611816406, "learning_rate": 0.0006394736842105263, "loss": 1.5235, "step": 685 }, { "epoch": 1.8014464168310322, "high_lr": 0.0006394736842105263, "low_lr": 1.2789473684210527e-05, "step": 685 }, { "epoch": 1.8014464168310322, "high_lr": 0.0006394736842105263, "low_lr": 1.2789473684210527e-05, "step": 685 }, { "epoch": 1.8014464168310322, "high_lr": 0.0006394736842105263, "low_lr": 1.2789473684210527e-05, "step": 685 }, { "epoch": 1.8014464168310322, "high_lr": 0.0006394736842105263, "low_lr": 1.2789473684210527e-05, "step": 685 }, { "epoch": 1.8014464168310322, "high_lr": 0.0006394736842105263, "low_lr": 1.2789473684210527e-05, "step": 685 }, { "epoch": 1.8014464168310322, "high_lr": 0.0006394736842105263, "low_lr": 1.2789473684210527e-05, "step": 685 }, { "epoch": 1.8014464168310322, "high_lr": 0.0006394736842105263, "low_lr": 1.2789473684210527e-05, "step": 685 }, { "epoch": 1.8014464168310322, "high_lr": 0.0006394736842105263, "low_lr": 1.2789473684210527e-05, "step": 685 }, { "epoch": 1.8040762656147271, "grad_norm": 1.1349226236343384, "learning_rate": 0.0006389473684210526, "loss": 1.4314, "step": 686 }, { "epoch": 1.8040762656147271, "high_lr": 0.0006389473684210526, "low_lr": 1.2778947368421054e-05, "step": 686 }, { "epoch": 1.8040762656147271, "high_lr": 0.0006389473684210526, "low_lr": 1.2778947368421054e-05, "step": 686 }, { "epoch": 1.8040762656147271, "high_lr": 0.0006389473684210526, "low_lr": 1.2778947368421054e-05, "step": 686 }, { "epoch": 1.8040762656147271, "high_lr": 0.0006389473684210526, "low_lr": 1.2778947368421054e-05, "step": 686 }, { "epoch": 1.8040762656147271, "high_lr": 0.0006389473684210526, "low_lr": 1.2778947368421054e-05, "step": 686 }, { "epoch": 1.8040762656147271, "high_lr": 0.0006389473684210526, "low_lr": 1.2778947368421054e-05, "step": 686 }, { "epoch": 1.8040762656147271, "high_lr": 0.0006389473684210526, "low_lr": 1.2778947368421054e-05, "step": 686 }, { "epoch": 1.8040762656147271, "high_lr": 0.0006389473684210526, "low_lr": 1.2778947368421054e-05, "step": 686 }, { "epoch": 1.806706114398422, "grad_norm": 1.1309823989868164, "learning_rate": 0.000638421052631579, "loss": 1.453, "step": 687 }, { "epoch": 1.806706114398422, "high_lr": 0.000638421052631579, "low_lr": 1.2768421052631582e-05, "step": 687 }, { "epoch": 1.806706114398422, "high_lr": 0.000638421052631579, "low_lr": 1.2768421052631582e-05, "step": 687 }, { "epoch": 1.806706114398422, "high_lr": 0.000638421052631579, "low_lr": 1.2768421052631582e-05, "step": 687 }, { "epoch": 1.806706114398422, "high_lr": 0.000638421052631579, "low_lr": 1.2768421052631582e-05, "step": 687 }, { "epoch": 1.806706114398422, "high_lr": 0.000638421052631579, "low_lr": 1.2768421052631582e-05, "step": 687 }, { "epoch": 1.806706114398422, "high_lr": 0.000638421052631579, "low_lr": 1.2768421052631582e-05, "step": 687 }, { "epoch": 1.806706114398422, "high_lr": 0.000638421052631579, "low_lr": 1.2768421052631582e-05, "step": 687 }, { "epoch": 1.806706114398422, "high_lr": 0.000638421052631579, "low_lr": 1.2768421052631582e-05, "step": 687 }, { "epoch": 1.809335963182117, "grad_norm": 1.1155540943145752, "learning_rate": 0.0006378947368421053, "loss": 1.4349, "step": 688 }, { "epoch": 1.809335963182117, "high_lr": 0.0006378947368421053, "low_lr": 1.2757894736842106e-05, "step": 688 }, { "epoch": 1.809335963182117, "high_lr": 0.0006378947368421053, "low_lr": 1.2757894736842106e-05, "step": 688 }, { "epoch": 1.809335963182117, "high_lr": 0.0006378947368421053, "low_lr": 1.2757894736842106e-05, "step": 688 }, { "epoch": 1.809335963182117, "high_lr": 0.0006378947368421053, "low_lr": 1.2757894736842106e-05, "step": 688 }, { "epoch": 1.809335963182117, "high_lr": 0.0006378947368421053, "low_lr": 1.2757894736842106e-05, "step": 688 }, { "epoch": 1.809335963182117, "high_lr": 0.0006378947368421053, "low_lr": 1.2757894736842106e-05, "step": 688 }, { "epoch": 1.809335963182117, "high_lr": 0.0006378947368421053, "low_lr": 1.2757894736842106e-05, "step": 688 }, { "epoch": 1.809335963182117, "high_lr": 0.0006378947368421053, "low_lr": 1.2757894736842106e-05, "step": 688 }, { "epoch": 1.811965811965812, "grad_norm": 1.0642731189727783, "learning_rate": 0.0006373684210526316, "loss": 1.4595, "step": 689 }, { "epoch": 1.811965811965812, "high_lr": 0.0006373684210526316, "low_lr": 1.2747368421052633e-05, "step": 689 }, { "epoch": 1.811965811965812, "high_lr": 0.0006373684210526316, "low_lr": 1.2747368421052633e-05, "step": 689 }, { "epoch": 1.811965811965812, "high_lr": 0.0006373684210526316, "low_lr": 1.2747368421052633e-05, "step": 689 }, { "epoch": 1.811965811965812, "high_lr": 0.0006373684210526316, "low_lr": 1.2747368421052633e-05, "step": 689 }, { "epoch": 1.811965811965812, "high_lr": 0.0006373684210526316, "low_lr": 1.2747368421052633e-05, "step": 689 }, { "epoch": 1.811965811965812, "high_lr": 0.0006373684210526316, "low_lr": 1.2747368421052633e-05, "step": 689 }, { "epoch": 1.811965811965812, "high_lr": 0.0006373684210526316, "low_lr": 1.2747368421052633e-05, "step": 689 }, { "epoch": 1.811965811965812, "high_lr": 0.0006373684210526316, "low_lr": 1.2747368421052633e-05, "step": 689 }, { "epoch": 1.8145956607495068, "grad_norm": 1.1135817766189575, "learning_rate": 0.0006368421052631579, "loss": 1.5065, "step": 690 }, { "epoch": 1.8145956607495068, "high_lr": 0.0006368421052631579, "low_lr": 1.2736842105263159e-05, "step": 690 }, { "epoch": 1.8145956607495068, "high_lr": 0.0006368421052631579, "low_lr": 1.2736842105263159e-05, "step": 690 }, { "epoch": 1.8145956607495068, "high_lr": 0.0006368421052631579, "low_lr": 1.2736842105263159e-05, "step": 690 }, { "epoch": 1.8145956607495068, "high_lr": 0.0006368421052631579, "low_lr": 1.2736842105263159e-05, "step": 690 }, { "epoch": 1.8145956607495068, "high_lr": 0.0006368421052631579, "low_lr": 1.2736842105263159e-05, "step": 690 }, { "epoch": 1.8145956607495068, "high_lr": 0.0006368421052631579, "low_lr": 1.2736842105263159e-05, "step": 690 }, { "epoch": 1.8145956607495068, "high_lr": 0.0006368421052631579, "low_lr": 1.2736842105263159e-05, "step": 690 }, { "epoch": 1.8145956607495068, "high_lr": 0.0006368421052631579, "low_lr": 1.2736842105263159e-05, "step": 690 }, { "epoch": 1.817225509533202, "grad_norm": 1.1606158018112183, "learning_rate": 0.0006363157894736842, "loss": 1.4447, "step": 691 }, { "epoch": 1.817225509533202, "high_lr": 0.0006363157894736842, "low_lr": 1.2726315789473684e-05, "step": 691 }, { "epoch": 1.817225509533202, "high_lr": 0.0006363157894736842, "low_lr": 1.2726315789473684e-05, "step": 691 }, { "epoch": 1.817225509533202, "high_lr": 0.0006363157894736842, "low_lr": 1.2726315789473684e-05, "step": 691 }, { "epoch": 1.817225509533202, "high_lr": 0.0006363157894736842, "low_lr": 1.2726315789473684e-05, "step": 691 }, { "epoch": 1.817225509533202, "high_lr": 0.0006363157894736842, "low_lr": 1.2726315789473684e-05, "step": 691 }, { "epoch": 1.817225509533202, "high_lr": 0.0006363157894736842, "low_lr": 1.2726315789473684e-05, "step": 691 }, { "epoch": 1.817225509533202, "high_lr": 0.0006363157894736842, "low_lr": 1.2726315789473684e-05, "step": 691 }, { "epoch": 1.817225509533202, "high_lr": 0.0006363157894736842, "low_lr": 1.2726315789473684e-05, "step": 691 }, { "epoch": 1.8198553583168968, "grad_norm": 1.2284473180770874, "learning_rate": 0.0006357894736842106, "loss": 1.5534, "step": 692 }, { "epoch": 1.8198553583168968, "high_lr": 0.0006357894736842106, "low_lr": 1.2715789473684212e-05, "step": 692 }, { "epoch": 1.8198553583168968, "high_lr": 0.0006357894736842106, "low_lr": 1.2715789473684212e-05, "step": 692 }, { "epoch": 1.8198553583168968, "high_lr": 0.0006357894736842106, "low_lr": 1.2715789473684212e-05, "step": 692 }, { "epoch": 1.8198553583168968, "high_lr": 0.0006357894736842106, "low_lr": 1.2715789473684212e-05, "step": 692 }, { "epoch": 1.8198553583168968, "high_lr": 0.0006357894736842106, "low_lr": 1.2715789473684212e-05, "step": 692 }, { "epoch": 1.8198553583168968, "high_lr": 0.0006357894736842106, "low_lr": 1.2715789473684212e-05, "step": 692 }, { "epoch": 1.8198553583168968, "high_lr": 0.0006357894736842106, "low_lr": 1.2715789473684212e-05, "step": 692 }, { "epoch": 1.8198553583168968, "high_lr": 0.0006357894736842106, "low_lr": 1.2715789473684212e-05, "step": 692 }, { "epoch": 1.8224852071005917, "grad_norm": 1.128949522972107, "learning_rate": 0.0006352631578947368, "loss": 1.471, "step": 693 }, { "epoch": 1.8224852071005917, "high_lr": 0.0006352631578947368, "low_lr": 1.2705263157894738e-05, "step": 693 }, { "epoch": 1.8224852071005917, "high_lr": 0.0006352631578947368, "low_lr": 1.2705263157894738e-05, "step": 693 }, { "epoch": 1.8224852071005917, "high_lr": 0.0006352631578947368, "low_lr": 1.2705263157894738e-05, "step": 693 }, { "epoch": 1.8224852071005917, "high_lr": 0.0006352631578947368, "low_lr": 1.2705263157894738e-05, "step": 693 }, { "epoch": 1.8224852071005917, "high_lr": 0.0006352631578947368, "low_lr": 1.2705263157894738e-05, "step": 693 }, { "epoch": 1.8224852071005917, "high_lr": 0.0006352631578947368, "low_lr": 1.2705263157894738e-05, "step": 693 }, { "epoch": 1.8224852071005917, "high_lr": 0.0006352631578947368, "low_lr": 1.2705263157894738e-05, "step": 693 }, { "epoch": 1.8224852071005917, "high_lr": 0.0006352631578947368, "low_lr": 1.2705263157894738e-05, "step": 693 }, { "epoch": 1.8251150558842868, "grad_norm": 1.2049524784088135, "learning_rate": 0.0006347368421052631, "loss": 1.4575, "step": 694 }, { "epoch": 1.8251150558842868, "high_lr": 0.0006347368421052631, "low_lr": 1.2694736842105264e-05, "step": 694 }, { "epoch": 1.8251150558842868, "high_lr": 0.0006347368421052631, "low_lr": 1.2694736842105264e-05, "step": 694 }, { "epoch": 1.8251150558842868, "high_lr": 0.0006347368421052631, "low_lr": 1.2694736842105264e-05, "step": 694 }, { "epoch": 1.8251150558842868, "high_lr": 0.0006347368421052631, "low_lr": 1.2694736842105264e-05, "step": 694 }, { "epoch": 1.8251150558842868, "high_lr": 0.0006347368421052631, "low_lr": 1.2694736842105264e-05, "step": 694 }, { "epoch": 1.8251150558842868, "high_lr": 0.0006347368421052631, "low_lr": 1.2694736842105264e-05, "step": 694 }, { "epoch": 1.8251150558842868, "high_lr": 0.0006347368421052631, "low_lr": 1.2694736842105264e-05, "step": 694 }, { "epoch": 1.8251150558842868, "high_lr": 0.0006347368421052631, "low_lr": 1.2694736842105264e-05, "step": 694 }, { "epoch": 1.8277449046679815, "grad_norm": 1.0460563898086548, "learning_rate": 0.0006342105263157894, "loss": 1.4718, "step": 695 }, { "epoch": 1.8277449046679815, "high_lr": 0.0006342105263157894, "low_lr": 1.268421052631579e-05, "step": 695 }, { "epoch": 1.8277449046679815, "high_lr": 0.0006342105263157894, "low_lr": 1.268421052631579e-05, "step": 695 }, { "epoch": 1.8277449046679815, "high_lr": 0.0006342105263157894, "low_lr": 1.268421052631579e-05, "step": 695 }, { "epoch": 1.8277449046679815, "high_lr": 0.0006342105263157894, "low_lr": 1.268421052631579e-05, "step": 695 }, { "epoch": 1.8277449046679815, "high_lr": 0.0006342105263157894, "low_lr": 1.268421052631579e-05, "step": 695 }, { "epoch": 1.8277449046679815, "high_lr": 0.0006342105263157894, "low_lr": 1.268421052631579e-05, "step": 695 }, { "epoch": 1.8277449046679815, "high_lr": 0.0006342105263157894, "low_lr": 1.268421052631579e-05, "step": 695 }, { "epoch": 1.8277449046679815, "high_lr": 0.0006342105263157894, "low_lr": 1.268421052631579e-05, "step": 695 }, { "epoch": 1.8303747534516766, "grad_norm": 1.1107012033462524, "learning_rate": 0.0006336842105263157, "loss": 1.409, "step": 696 }, { "epoch": 1.8303747534516766, "high_lr": 0.0006336842105263157, "low_lr": 1.2673684210526315e-05, "step": 696 }, { "epoch": 1.8303747534516766, "high_lr": 0.0006336842105263157, "low_lr": 1.2673684210526315e-05, "step": 696 }, { "epoch": 1.8303747534516766, "high_lr": 0.0006336842105263157, "low_lr": 1.2673684210526315e-05, "step": 696 }, { "epoch": 1.8303747534516766, "high_lr": 0.0006336842105263157, "low_lr": 1.2673684210526315e-05, "step": 696 }, { "epoch": 1.8303747534516766, "high_lr": 0.0006336842105263157, "low_lr": 1.2673684210526315e-05, "step": 696 }, { "epoch": 1.8303747534516766, "high_lr": 0.0006336842105263157, "low_lr": 1.2673684210526315e-05, "step": 696 }, { "epoch": 1.8303747534516766, "high_lr": 0.0006336842105263157, "low_lr": 1.2673684210526315e-05, "step": 696 }, { "epoch": 1.8303747534516766, "high_lr": 0.0006336842105263157, "low_lr": 1.2673684210526315e-05, "step": 696 }, { "epoch": 1.8330046022353714, "grad_norm": 1.090254783630371, "learning_rate": 0.0006331578947368422, "loss": 1.4258, "step": 697 }, { "epoch": 1.8330046022353714, "high_lr": 0.0006331578947368422, "low_lr": 1.2663157894736843e-05, "step": 697 }, { "epoch": 1.8330046022353714, "high_lr": 0.0006331578947368422, "low_lr": 1.2663157894736843e-05, "step": 697 }, { "epoch": 1.8330046022353714, "high_lr": 0.0006331578947368422, "low_lr": 1.2663157894736843e-05, "step": 697 }, { "epoch": 1.8330046022353714, "high_lr": 0.0006331578947368422, "low_lr": 1.2663157894736843e-05, "step": 697 }, { "epoch": 1.8330046022353714, "high_lr": 0.0006331578947368422, "low_lr": 1.2663157894736843e-05, "step": 697 }, { "epoch": 1.8330046022353714, "high_lr": 0.0006331578947368422, "low_lr": 1.2663157894736843e-05, "step": 697 }, { "epoch": 1.8330046022353714, "high_lr": 0.0006331578947368422, "low_lr": 1.2663157894736843e-05, "step": 697 }, { "epoch": 1.8330046022353714, "high_lr": 0.0006331578947368422, "low_lr": 1.2663157894736843e-05, "step": 697 }, { "epoch": 1.8356344510190663, "grad_norm": 1.116258978843689, "learning_rate": 0.0006326315789473685, "loss": 1.4292, "step": 698 }, { "epoch": 1.8356344510190663, "high_lr": 0.0006326315789473685, "low_lr": 1.265263157894737e-05, "step": 698 }, { "epoch": 1.8356344510190663, "high_lr": 0.0006326315789473685, "low_lr": 1.265263157894737e-05, "step": 698 }, { "epoch": 1.8356344510190663, "high_lr": 0.0006326315789473685, "low_lr": 1.265263157894737e-05, "step": 698 }, { "epoch": 1.8356344510190663, "high_lr": 0.0006326315789473685, "low_lr": 1.265263157894737e-05, "step": 698 }, { "epoch": 1.8356344510190663, "high_lr": 0.0006326315789473685, "low_lr": 1.265263157894737e-05, "step": 698 }, { "epoch": 1.8356344510190663, "high_lr": 0.0006326315789473685, "low_lr": 1.265263157894737e-05, "step": 698 }, { "epoch": 1.8356344510190663, "high_lr": 0.0006326315789473685, "low_lr": 1.265263157894737e-05, "step": 698 }, { "epoch": 1.8356344510190663, "high_lr": 0.0006326315789473685, "low_lr": 1.265263157894737e-05, "step": 698 }, { "epoch": 1.8382642998027614, "grad_norm": 1.164453387260437, "learning_rate": 0.0006321052631578948, "loss": 1.4696, "step": 699 }, { "epoch": 1.8382642998027614, "high_lr": 0.0006321052631578948, "low_lr": 1.2642105263157896e-05, "step": 699 }, { "epoch": 1.8382642998027614, "high_lr": 0.0006321052631578948, "low_lr": 1.2642105263157896e-05, "step": 699 }, { "epoch": 1.8382642998027614, "high_lr": 0.0006321052631578948, "low_lr": 1.2642105263157896e-05, "step": 699 }, { "epoch": 1.8382642998027614, "high_lr": 0.0006321052631578948, "low_lr": 1.2642105263157896e-05, "step": 699 }, { "epoch": 1.8382642998027614, "high_lr": 0.0006321052631578948, "low_lr": 1.2642105263157896e-05, "step": 699 }, { "epoch": 1.8382642998027614, "high_lr": 0.0006321052631578948, "low_lr": 1.2642105263157896e-05, "step": 699 }, { "epoch": 1.8382642998027614, "high_lr": 0.0006321052631578948, "low_lr": 1.2642105263157896e-05, "step": 699 }, { "epoch": 1.8382642998027614, "high_lr": 0.0006321052631578948, "low_lr": 1.2642105263157896e-05, "step": 699 }, { "epoch": 1.8408941485864563, "grad_norm": 1.1327639818191528, "learning_rate": 0.0006315789473684211, "loss": 1.4706, "step": 700 }, { "epoch": 1.8408941485864563, "high_lr": 0.0006315789473684211, "low_lr": 1.263157894736842e-05, "step": 700 }, { "epoch": 1.8408941485864563, "high_lr": 0.0006315789473684211, "low_lr": 1.263157894736842e-05, "step": 700 }, { "epoch": 1.8408941485864563, "high_lr": 0.0006315789473684211, "low_lr": 1.263157894736842e-05, "step": 700 }, { "epoch": 1.8408941485864563, "high_lr": 0.0006315789473684211, "low_lr": 1.263157894736842e-05, "step": 700 }, { "epoch": 1.8408941485864563, "high_lr": 0.0006315789473684211, "low_lr": 1.263157894736842e-05, "step": 700 }, { "epoch": 1.8408941485864563, "high_lr": 0.0006315789473684211, "low_lr": 1.263157894736842e-05, "step": 700 }, { "epoch": 1.8408941485864563, "high_lr": 0.0006315789473684211, "low_lr": 1.263157894736842e-05, "step": 700 }, { "epoch": 1.8408941485864563, "high_lr": 0.0006315789473684211, "low_lr": 1.263157894736842e-05, "step": 700 }, { "epoch": 1.8435239973701512, "grad_norm": 1.1588438749313354, "learning_rate": 0.0006310526315789475, "loss": 1.4673, "step": 701 }, { "epoch": 1.8435239973701512, "high_lr": 0.0006310526315789475, "low_lr": 1.2621052631578949e-05, "step": 701 }, { "epoch": 1.8435239973701512, "high_lr": 0.0006310526315789475, "low_lr": 1.2621052631578949e-05, "step": 701 }, { "epoch": 1.8435239973701512, "high_lr": 0.0006310526315789475, "low_lr": 1.2621052631578949e-05, "step": 701 }, { "epoch": 1.8435239973701512, "high_lr": 0.0006310526315789475, "low_lr": 1.2621052631578949e-05, "step": 701 }, { "epoch": 1.8435239973701512, "high_lr": 0.0006310526315789475, "low_lr": 1.2621052631578949e-05, "step": 701 }, { "epoch": 1.8435239973701512, "high_lr": 0.0006310526315789475, "low_lr": 1.2621052631578949e-05, "step": 701 }, { "epoch": 1.8435239973701512, "high_lr": 0.0006310526315789475, "low_lr": 1.2621052631578949e-05, "step": 701 }, { "epoch": 1.8435239973701512, "high_lr": 0.0006310526315789475, "low_lr": 1.2621052631578949e-05, "step": 701 }, { "epoch": 1.8461538461538463, "grad_norm": 1.1653773784637451, "learning_rate": 0.0006305263157894737, "loss": 1.4839, "step": 702 }, { "epoch": 1.8461538461538463, "high_lr": 0.0006305263157894737, "low_lr": 1.2610526315789475e-05, "step": 702 }, { "epoch": 1.8461538461538463, "high_lr": 0.0006305263157894737, "low_lr": 1.2610526315789475e-05, "step": 702 }, { "epoch": 1.8461538461538463, "high_lr": 0.0006305263157894737, "low_lr": 1.2610526315789475e-05, "step": 702 }, { "epoch": 1.8461538461538463, "high_lr": 0.0006305263157894737, "low_lr": 1.2610526315789475e-05, "step": 702 }, { "epoch": 1.8461538461538463, "high_lr": 0.0006305263157894737, "low_lr": 1.2610526315789475e-05, "step": 702 }, { "epoch": 1.8461538461538463, "high_lr": 0.0006305263157894737, "low_lr": 1.2610526315789475e-05, "step": 702 }, { "epoch": 1.8461538461538463, "high_lr": 0.0006305263157894737, "low_lr": 1.2610526315789475e-05, "step": 702 }, { "epoch": 1.8461538461538463, "high_lr": 0.0006305263157894737, "low_lr": 1.2610526315789475e-05, "step": 702 }, { "epoch": 1.8487836949375411, "grad_norm": 1.1697614192962646, "learning_rate": 0.00063, "loss": 1.4751, "step": 703 }, { "epoch": 1.8487836949375411, "high_lr": 0.00063, "low_lr": 1.2600000000000001e-05, "step": 703 }, { "epoch": 1.8487836949375411, "high_lr": 0.00063, "low_lr": 1.2600000000000001e-05, "step": 703 }, { "epoch": 1.8487836949375411, "high_lr": 0.00063, "low_lr": 1.2600000000000001e-05, "step": 703 }, { "epoch": 1.8487836949375411, "high_lr": 0.00063, "low_lr": 1.2600000000000001e-05, "step": 703 }, { "epoch": 1.8487836949375411, "high_lr": 0.00063, "low_lr": 1.2600000000000001e-05, "step": 703 }, { "epoch": 1.8487836949375411, "high_lr": 0.00063, "low_lr": 1.2600000000000001e-05, "step": 703 }, { "epoch": 1.8487836949375411, "high_lr": 0.00063, "low_lr": 1.2600000000000001e-05, "step": 703 }, { "epoch": 1.8487836949375411, "high_lr": 0.00063, "low_lr": 1.2600000000000001e-05, "step": 703 }, { "epoch": 1.851413543721236, "grad_norm": 1.1886961460113525, "learning_rate": 0.0006294736842105263, "loss": 1.5048, "step": 704 }, { "epoch": 1.851413543721236, "high_lr": 0.0006294736842105263, "low_lr": 1.2589473684210528e-05, "step": 704 }, { "epoch": 1.851413543721236, "high_lr": 0.0006294736842105263, "low_lr": 1.2589473684210528e-05, "step": 704 }, { "epoch": 1.851413543721236, "high_lr": 0.0006294736842105263, "low_lr": 1.2589473684210528e-05, "step": 704 }, { "epoch": 1.851413543721236, "high_lr": 0.0006294736842105263, "low_lr": 1.2589473684210528e-05, "step": 704 }, { "epoch": 1.851413543721236, "high_lr": 0.0006294736842105263, "low_lr": 1.2589473684210528e-05, "step": 704 }, { "epoch": 1.851413543721236, "high_lr": 0.0006294736842105263, "low_lr": 1.2589473684210528e-05, "step": 704 }, { "epoch": 1.851413543721236, "high_lr": 0.0006294736842105263, "low_lr": 1.2589473684210528e-05, "step": 704 }, { "epoch": 1.851413543721236, "high_lr": 0.0006294736842105263, "low_lr": 1.2589473684210528e-05, "step": 704 }, { "epoch": 1.854043392504931, "grad_norm": 1.1062883138656616, "learning_rate": 0.0006289473684210526, "loss": 1.4738, "step": 705 }, { "epoch": 1.854043392504931, "high_lr": 0.0006289473684210526, "low_lr": 1.2578947368421052e-05, "step": 705 }, { "epoch": 1.854043392504931, "high_lr": 0.0006289473684210526, "low_lr": 1.2578947368421052e-05, "step": 705 }, { "epoch": 1.854043392504931, "high_lr": 0.0006289473684210526, "low_lr": 1.2578947368421052e-05, "step": 705 }, { "epoch": 1.854043392504931, "high_lr": 0.0006289473684210526, "low_lr": 1.2578947368421052e-05, "step": 705 }, { "epoch": 1.854043392504931, "high_lr": 0.0006289473684210526, "low_lr": 1.2578947368421052e-05, "step": 705 }, { "epoch": 1.854043392504931, "high_lr": 0.0006289473684210526, "low_lr": 1.2578947368421052e-05, "step": 705 }, { "epoch": 1.854043392504931, "high_lr": 0.0006289473684210526, "low_lr": 1.2578947368421052e-05, "step": 705 }, { "epoch": 1.854043392504931, "high_lr": 0.0006289473684210526, "low_lr": 1.2578947368421052e-05, "step": 705 }, { "epoch": 1.8566732412886258, "grad_norm": 1.1155682802200317, "learning_rate": 0.000628421052631579, "loss": 1.481, "step": 706 }, { "epoch": 1.8566732412886258, "high_lr": 0.000628421052631579, "low_lr": 1.256842105263158e-05, "step": 706 }, { "epoch": 1.8566732412886258, "high_lr": 0.000628421052631579, "low_lr": 1.256842105263158e-05, "step": 706 }, { "epoch": 1.8566732412886258, "high_lr": 0.000628421052631579, "low_lr": 1.256842105263158e-05, "step": 706 }, { "epoch": 1.8566732412886258, "high_lr": 0.000628421052631579, "low_lr": 1.256842105263158e-05, "step": 706 }, { "epoch": 1.8566732412886258, "high_lr": 0.000628421052631579, "low_lr": 1.256842105263158e-05, "step": 706 }, { "epoch": 1.8566732412886258, "high_lr": 0.000628421052631579, "low_lr": 1.256842105263158e-05, "step": 706 }, { "epoch": 1.8566732412886258, "high_lr": 0.000628421052631579, "low_lr": 1.256842105263158e-05, "step": 706 }, { "epoch": 1.8566732412886258, "high_lr": 0.000628421052631579, "low_lr": 1.256842105263158e-05, "step": 706 }, { "epoch": 1.8593030900723209, "grad_norm": 1.0740604400634766, "learning_rate": 0.0006278947368421053, "loss": 1.4829, "step": 707 }, { "epoch": 1.8593030900723209, "high_lr": 0.0006278947368421053, "low_lr": 1.2557894736842107e-05, "step": 707 }, { "epoch": 1.8593030900723209, "high_lr": 0.0006278947368421053, "low_lr": 1.2557894736842107e-05, "step": 707 }, { "epoch": 1.8593030900723209, "high_lr": 0.0006278947368421053, "low_lr": 1.2557894736842107e-05, "step": 707 }, { "epoch": 1.8593030900723209, "high_lr": 0.0006278947368421053, "low_lr": 1.2557894736842107e-05, "step": 707 }, { "epoch": 1.8593030900723209, "high_lr": 0.0006278947368421053, "low_lr": 1.2557894736842107e-05, "step": 707 }, { "epoch": 1.8593030900723209, "high_lr": 0.0006278947368421053, "low_lr": 1.2557894736842107e-05, "step": 707 }, { "epoch": 1.8593030900723209, "high_lr": 0.0006278947368421053, "low_lr": 1.2557894736842107e-05, "step": 707 }, { "epoch": 1.8593030900723209, "high_lr": 0.0006278947368421053, "low_lr": 1.2557894736842107e-05, "step": 707 }, { "epoch": 1.8619329388560157, "grad_norm": 1.1394215822219849, "learning_rate": 0.0006273684210526316, "loss": 1.5019, "step": 708 }, { "epoch": 1.8619329388560157, "high_lr": 0.0006273684210526316, "low_lr": 1.2547368421052633e-05, "step": 708 }, { "epoch": 1.8619329388560157, "high_lr": 0.0006273684210526316, "low_lr": 1.2547368421052633e-05, "step": 708 }, { "epoch": 1.8619329388560157, "high_lr": 0.0006273684210526316, "low_lr": 1.2547368421052633e-05, "step": 708 }, { "epoch": 1.8619329388560157, "high_lr": 0.0006273684210526316, "low_lr": 1.2547368421052633e-05, "step": 708 }, { "epoch": 1.8619329388560157, "high_lr": 0.0006273684210526316, "low_lr": 1.2547368421052633e-05, "step": 708 }, { "epoch": 1.8619329388560157, "high_lr": 0.0006273684210526316, "low_lr": 1.2547368421052633e-05, "step": 708 }, { "epoch": 1.8619329388560157, "high_lr": 0.0006273684210526316, "low_lr": 1.2547368421052633e-05, "step": 708 }, { "epoch": 1.8619329388560157, "high_lr": 0.0006273684210526316, "low_lr": 1.2547368421052633e-05, "step": 708 }, { "epoch": 1.8645627876397106, "grad_norm": 1.1655924320220947, "learning_rate": 0.0006268421052631578, "loss": 1.4564, "step": 709 }, { "epoch": 1.8645627876397106, "high_lr": 0.0006268421052631578, "low_lr": 1.2536842105263158e-05, "step": 709 }, { "epoch": 1.8645627876397106, "high_lr": 0.0006268421052631578, "low_lr": 1.2536842105263158e-05, "step": 709 }, { "epoch": 1.8645627876397106, "high_lr": 0.0006268421052631578, "low_lr": 1.2536842105263158e-05, "step": 709 }, { "epoch": 1.8645627876397106, "high_lr": 0.0006268421052631578, "low_lr": 1.2536842105263158e-05, "step": 709 }, { "epoch": 1.8645627876397106, "high_lr": 0.0006268421052631578, "low_lr": 1.2536842105263158e-05, "step": 709 }, { "epoch": 1.8645627876397106, "high_lr": 0.0006268421052631578, "low_lr": 1.2536842105263158e-05, "step": 709 }, { "epoch": 1.8645627876397106, "high_lr": 0.0006268421052631578, "low_lr": 1.2536842105263158e-05, "step": 709 }, { "epoch": 1.8645627876397106, "high_lr": 0.0006268421052631578, "low_lr": 1.2536842105263158e-05, "step": 709 }, { "epoch": 1.8671926364234057, "grad_norm": 1.150449275970459, "learning_rate": 0.0006263157894736841, "loss": 1.4662, "step": 710 }, { "epoch": 1.8671926364234057, "high_lr": 0.0006263157894736841, "low_lr": 1.2526315789473684e-05, "step": 710 }, { "epoch": 1.8671926364234057, "high_lr": 0.0006263157894736841, "low_lr": 1.2526315789473684e-05, "step": 710 }, { "epoch": 1.8671926364234057, "high_lr": 0.0006263157894736841, "low_lr": 1.2526315789473684e-05, "step": 710 }, { "epoch": 1.8671926364234057, "high_lr": 0.0006263157894736841, "low_lr": 1.2526315789473684e-05, "step": 710 }, { "epoch": 1.8671926364234057, "high_lr": 0.0006263157894736841, "low_lr": 1.2526315789473684e-05, "step": 710 }, { "epoch": 1.8671926364234057, "high_lr": 0.0006263157894736841, "low_lr": 1.2526315789473684e-05, "step": 710 }, { "epoch": 1.8671926364234057, "high_lr": 0.0006263157894736841, "low_lr": 1.2526315789473684e-05, "step": 710 }, { "epoch": 1.8671926364234057, "high_lr": 0.0006263157894736841, "low_lr": 1.2526315789473684e-05, "step": 710 }, { "epoch": 1.8698224852071006, "grad_norm": 1.20257568359375, "learning_rate": 0.0006257894736842105, "loss": 1.4665, "step": 711 }, { "epoch": 1.8698224852071006, "high_lr": 0.0006257894736842105, "low_lr": 1.2515789473684212e-05, "step": 711 }, { "epoch": 1.8698224852071006, "high_lr": 0.0006257894736842105, "low_lr": 1.2515789473684212e-05, "step": 711 }, { "epoch": 1.8698224852071006, "high_lr": 0.0006257894736842105, "low_lr": 1.2515789473684212e-05, "step": 711 }, { "epoch": 1.8698224852071006, "high_lr": 0.0006257894736842105, "low_lr": 1.2515789473684212e-05, "step": 711 }, { "epoch": 1.8698224852071006, "high_lr": 0.0006257894736842105, "low_lr": 1.2515789473684212e-05, "step": 711 }, { "epoch": 1.8698224852071006, "high_lr": 0.0006257894736842105, "low_lr": 1.2515789473684212e-05, "step": 711 }, { "epoch": 1.8698224852071006, "high_lr": 0.0006257894736842105, "low_lr": 1.2515789473684212e-05, "step": 711 }, { "epoch": 1.8698224852071006, "high_lr": 0.0006257894736842105, "low_lr": 1.2515789473684212e-05, "step": 711 }, { "epoch": 1.8724523339907955, "grad_norm": 1.0963051319122314, "learning_rate": 0.0006252631578947368, "loss": 1.4644, "step": 712 }, { "epoch": 1.8724523339907955, "high_lr": 0.0006252631578947368, "low_lr": 1.2505263157894738e-05, "step": 712 }, { "epoch": 1.8724523339907955, "high_lr": 0.0006252631578947368, "low_lr": 1.2505263157894738e-05, "step": 712 }, { "epoch": 1.8724523339907955, "high_lr": 0.0006252631578947368, "low_lr": 1.2505263157894738e-05, "step": 712 }, { "epoch": 1.8724523339907955, "high_lr": 0.0006252631578947368, "low_lr": 1.2505263157894738e-05, "step": 712 }, { "epoch": 1.8724523339907955, "high_lr": 0.0006252631578947368, "low_lr": 1.2505263157894738e-05, "step": 712 }, { "epoch": 1.8724523339907955, "high_lr": 0.0006252631578947368, "low_lr": 1.2505263157894738e-05, "step": 712 }, { "epoch": 1.8724523339907955, "high_lr": 0.0006252631578947368, "low_lr": 1.2505263157894738e-05, "step": 712 }, { "epoch": 1.8724523339907955, "high_lr": 0.0006252631578947368, "low_lr": 1.2505263157894738e-05, "step": 712 }, { "epoch": 1.8750821827744906, "grad_norm": 1.2021636962890625, "learning_rate": 0.0006247368421052632, "loss": 1.4841, "step": 713 }, { "epoch": 1.8750821827744906, "high_lr": 0.0006247368421052632, "low_lr": 1.2494736842105265e-05, "step": 713 }, { "epoch": 1.8750821827744906, "high_lr": 0.0006247368421052632, "low_lr": 1.2494736842105265e-05, "step": 713 }, { "epoch": 1.8750821827744906, "high_lr": 0.0006247368421052632, "low_lr": 1.2494736842105265e-05, "step": 713 }, { "epoch": 1.8750821827744906, "high_lr": 0.0006247368421052632, "low_lr": 1.2494736842105265e-05, "step": 713 }, { "epoch": 1.8750821827744906, "high_lr": 0.0006247368421052632, "low_lr": 1.2494736842105265e-05, "step": 713 }, { "epoch": 1.8750821827744906, "high_lr": 0.0006247368421052632, "low_lr": 1.2494736842105265e-05, "step": 713 }, { "epoch": 1.8750821827744906, "high_lr": 0.0006247368421052632, "low_lr": 1.2494736842105265e-05, "step": 713 }, { "epoch": 1.8750821827744906, "high_lr": 0.0006247368421052632, "low_lr": 1.2494736842105265e-05, "step": 713 }, { "epoch": 1.8777120315581854, "grad_norm": 1.085993766784668, "learning_rate": 0.0006242105263157895, "loss": 1.4279, "step": 714 }, { "epoch": 1.8777120315581854, "high_lr": 0.0006242105263157895, "low_lr": 1.248421052631579e-05, "step": 714 }, { "epoch": 1.8777120315581854, "high_lr": 0.0006242105263157895, "low_lr": 1.248421052631579e-05, "step": 714 }, { "epoch": 1.8777120315581854, "high_lr": 0.0006242105263157895, "low_lr": 1.248421052631579e-05, "step": 714 }, { "epoch": 1.8777120315581854, "high_lr": 0.0006242105263157895, "low_lr": 1.248421052631579e-05, "step": 714 }, { "epoch": 1.8777120315581854, "high_lr": 0.0006242105263157895, "low_lr": 1.248421052631579e-05, "step": 714 }, { "epoch": 1.8777120315581854, "high_lr": 0.0006242105263157895, "low_lr": 1.248421052631579e-05, "step": 714 }, { "epoch": 1.8777120315581854, "high_lr": 0.0006242105263157895, "low_lr": 1.248421052631579e-05, "step": 714 }, { "epoch": 1.8777120315581854, "high_lr": 0.0006242105263157895, "low_lr": 1.248421052631579e-05, "step": 714 }, { "epoch": 1.8803418803418803, "grad_norm": 1.02846097946167, "learning_rate": 0.0006236842105263159, "loss": 1.4146, "step": 715 }, { "epoch": 1.8803418803418803, "high_lr": 0.0006236842105263159, "low_lr": 1.2473684210526317e-05, "step": 715 }, { "epoch": 1.8803418803418803, "high_lr": 0.0006236842105263159, "low_lr": 1.2473684210526317e-05, "step": 715 }, { "epoch": 1.8803418803418803, "high_lr": 0.0006236842105263159, "low_lr": 1.2473684210526317e-05, "step": 715 }, { "epoch": 1.8803418803418803, "high_lr": 0.0006236842105263159, "low_lr": 1.2473684210526317e-05, "step": 715 }, { "epoch": 1.8803418803418803, "high_lr": 0.0006236842105263159, "low_lr": 1.2473684210526317e-05, "step": 715 }, { "epoch": 1.8803418803418803, "high_lr": 0.0006236842105263159, "low_lr": 1.2473684210526317e-05, "step": 715 }, { "epoch": 1.8803418803418803, "high_lr": 0.0006236842105263159, "low_lr": 1.2473684210526317e-05, "step": 715 }, { "epoch": 1.8803418803418803, "high_lr": 0.0006236842105263159, "low_lr": 1.2473684210526317e-05, "step": 715 }, { "epoch": 1.8829717291255754, "grad_norm": 1.2024359703063965, "learning_rate": 0.0006231578947368422, "loss": 1.4347, "step": 716 }, { "epoch": 1.8829717291255754, "high_lr": 0.0006231578947368422, "low_lr": 1.2463157894736844e-05, "step": 716 }, { "epoch": 1.8829717291255754, "high_lr": 0.0006231578947368422, "low_lr": 1.2463157894736844e-05, "step": 716 }, { "epoch": 1.8829717291255754, "high_lr": 0.0006231578947368422, "low_lr": 1.2463157894736844e-05, "step": 716 }, { "epoch": 1.8829717291255754, "high_lr": 0.0006231578947368422, "low_lr": 1.2463157894736844e-05, "step": 716 }, { "epoch": 1.8829717291255754, "high_lr": 0.0006231578947368422, "low_lr": 1.2463157894736844e-05, "step": 716 }, { "epoch": 1.8829717291255754, "high_lr": 0.0006231578947368422, "low_lr": 1.2463157894736844e-05, "step": 716 }, { "epoch": 1.8829717291255754, "high_lr": 0.0006231578947368422, "low_lr": 1.2463157894736844e-05, "step": 716 }, { "epoch": 1.8829717291255754, "high_lr": 0.0006231578947368422, "low_lr": 1.2463157894736844e-05, "step": 716 }, { "epoch": 1.88560157790927, "grad_norm": 1.135720133781433, "learning_rate": 0.0006226315789473685, "loss": 1.4472, "step": 717 }, { "epoch": 1.88560157790927, "high_lr": 0.0006226315789473685, "low_lr": 1.245263157894737e-05, "step": 717 }, { "epoch": 1.88560157790927, "high_lr": 0.0006226315789473685, "low_lr": 1.245263157894737e-05, "step": 717 }, { "epoch": 1.88560157790927, "high_lr": 0.0006226315789473685, "low_lr": 1.245263157894737e-05, "step": 717 }, { "epoch": 1.88560157790927, "high_lr": 0.0006226315789473685, "low_lr": 1.245263157894737e-05, "step": 717 }, { "epoch": 1.88560157790927, "high_lr": 0.0006226315789473685, "low_lr": 1.245263157894737e-05, "step": 717 }, { "epoch": 1.88560157790927, "high_lr": 0.0006226315789473685, "low_lr": 1.245263157894737e-05, "step": 717 }, { "epoch": 1.88560157790927, "high_lr": 0.0006226315789473685, "low_lr": 1.245263157894737e-05, "step": 717 }, { "epoch": 1.88560157790927, "high_lr": 0.0006226315789473685, "low_lr": 1.245263157894737e-05, "step": 717 }, { "epoch": 1.8882314266929652, "grad_norm": 1.146132230758667, "learning_rate": 0.0006221052631578947, "loss": 1.4908, "step": 718 }, { "epoch": 1.8882314266929652, "high_lr": 0.0006221052631578947, "low_lr": 1.2442105263157895e-05, "step": 718 }, { "epoch": 1.8882314266929652, "high_lr": 0.0006221052631578947, "low_lr": 1.2442105263157895e-05, "step": 718 }, { "epoch": 1.8882314266929652, "high_lr": 0.0006221052631578947, "low_lr": 1.2442105263157895e-05, "step": 718 }, { "epoch": 1.8882314266929652, "high_lr": 0.0006221052631578947, "low_lr": 1.2442105263157895e-05, "step": 718 }, { "epoch": 1.8882314266929652, "high_lr": 0.0006221052631578947, "low_lr": 1.2442105263157895e-05, "step": 718 }, { "epoch": 1.8882314266929652, "high_lr": 0.0006221052631578947, "low_lr": 1.2442105263157895e-05, "step": 718 }, { "epoch": 1.8882314266929652, "high_lr": 0.0006221052631578947, "low_lr": 1.2442105263157895e-05, "step": 718 }, { "epoch": 1.8882314266929652, "high_lr": 0.0006221052631578947, "low_lr": 1.2442105263157895e-05, "step": 718 }, { "epoch": 1.89086127547666, "grad_norm": 1.113741159439087, "learning_rate": 0.000621578947368421, "loss": 1.476, "step": 719 }, { "epoch": 1.89086127547666, "high_lr": 0.000621578947368421, "low_lr": 1.2431578947368421e-05, "step": 719 }, { "epoch": 1.89086127547666, "high_lr": 0.000621578947368421, "low_lr": 1.2431578947368421e-05, "step": 719 }, { "epoch": 1.89086127547666, "high_lr": 0.000621578947368421, "low_lr": 1.2431578947368421e-05, "step": 719 }, { "epoch": 1.89086127547666, "high_lr": 0.000621578947368421, "low_lr": 1.2431578947368421e-05, "step": 719 }, { "epoch": 1.89086127547666, "high_lr": 0.000621578947368421, "low_lr": 1.2431578947368421e-05, "step": 719 }, { "epoch": 1.89086127547666, "high_lr": 0.000621578947368421, "low_lr": 1.2431578947368421e-05, "step": 719 }, { "epoch": 1.89086127547666, "high_lr": 0.000621578947368421, "low_lr": 1.2431578947368421e-05, "step": 719 }, { "epoch": 1.89086127547666, "high_lr": 0.000621578947368421, "low_lr": 1.2431578947368421e-05, "step": 719 }, { "epoch": 1.893491124260355, "grad_norm": 1.0785640478134155, "learning_rate": 0.0006210526315789474, "loss": 1.4843, "step": 720 }, { "epoch": 1.893491124260355, "high_lr": 0.0006210526315789474, "low_lr": 1.2421052631578949e-05, "step": 720 }, { "epoch": 1.893491124260355, "high_lr": 0.0006210526315789474, "low_lr": 1.2421052631578949e-05, "step": 720 }, { "epoch": 1.893491124260355, "high_lr": 0.0006210526315789474, "low_lr": 1.2421052631578949e-05, "step": 720 }, { "epoch": 1.893491124260355, "high_lr": 0.0006210526315789474, "low_lr": 1.2421052631578949e-05, "step": 720 }, { "epoch": 1.893491124260355, "high_lr": 0.0006210526315789474, "low_lr": 1.2421052631578949e-05, "step": 720 }, { "epoch": 1.893491124260355, "high_lr": 0.0006210526315789474, "low_lr": 1.2421052631578949e-05, "step": 720 }, { "epoch": 1.893491124260355, "high_lr": 0.0006210526315789474, "low_lr": 1.2421052631578949e-05, "step": 720 }, { "epoch": 1.893491124260355, "high_lr": 0.0006210526315789474, "low_lr": 1.2421052631578949e-05, "step": 720 }, { "epoch": 1.89612097304405, "grad_norm": 1.1490678787231445, "learning_rate": 0.0006205263157894737, "loss": 1.5026, "step": 721 }, { "epoch": 1.89612097304405, "high_lr": 0.0006205263157894737, "low_lr": 1.2410526315789475e-05, "step": 721 }, { "epoch": 1.89612097304405, "high_lr": 0.0006205263157894737, "low_lr": 1.2410526315789475e-05, "step": 721 }, { "epoch": 1.89612097304405, "high_lr": 0.0006205263157894737, "low_lr": 1.2410526315789475e-05, "step": 721 }, { "epoch": 1.89612097304405, "high_lr": 0.0006205263157894737, "low_lr": 1.2410526315789475e-05, "step": 721 }, { "epoch": 1.89612097304405, "high_lr": 0.0006205263157894737, "low_lr": 1.2410526315789475e-05, "step": 721 }, { "epoch": 1.89612097304405, "high_lr": 0.0006205263157894737, "low_lr": 1.2410526315789475e-05, "step": 721 }, { "epoch": 1.89612097304405, "high_lr": 0.0006205263157894737, "low_lr": 1.2410526315789475e-05, "step": 721 }, { "epoch": 1.89612097304405, "high_lr": 0.0006205263157894737, "low_lr": 1.2410526315789475e-05, "step": 721 }, { "epoch": 1.898750821827745, "grad_norm": 1.1461573839187622, "learning_rate": 0.00062, "loss": 1.4493, "step": 722 }, { "epoch": 1.898750821827745, "high_lr": 0.00062, "low_lr": 1.2400000000000002e-05, "step": 722 }, { "epoch": 1.898750821827745, "high_lr": 0.00062, "low_lr": 1.2400000000000002e-05, "step": 722 }, { "epoch": 1.898750821827745, "high_lr": 0.00062, "low_lr": 1.2400000000000002e-05, "step": 722 }, { "epoch": 1.898750821827745, "high_lr": 0.00062, "low_lr": 1.2400000000000002e-05, "step": 722 }, { "epoch": 1.898750821827745, "high_lr": 0.00062, "low_lr": 1.2400000000000002e-05, "step": 722 }, { "epoch": 1.898750821827745, "high_lr": 0.00062, "low_lr": 1.2400000000000002e-05, "step": 722 }, { "epoch": 1.898750821827745, "high_lr": 0.00062, "low_lr": 1.2400000000000002e-05, "step": 722 }, { "epoch": 1.898750821827745, "high_lr": 0.00062, "low_lr": 1.2400000000000002e-05, "step": 722 }, { "epoch": 1.9013806706114398, "grad_norm": 1.1096080541610718, "learning_rate": 0.0006194736842105263, "loss": 1.4309, "step": 723 }, { "epoch": 1.9013806706114398, "high_lr": 0.0006194736842105263, "low_lr": 1.2389473684210526e-05, "step": 723 }, { "epoch": 1.9013806706114398, "high_lr": 0.0006194736842105263, "low_lr": 1.2389473684210526e-05, "step": 723 }, { "epoch": 1.9013806706114398, "high_lr": 0.0006194736842105263, "low_lr": 1.2389473684210526e-05, "step": 723 }, { "epoch": 1.9013806706114398, "high_lr": 0.0006194736842105263, "low_lr": 1.2389473684210526e-05, "step": 723 }, { "epoch": 1.9013806706114398, "high_lr": 0.0006194736842105263, "low_lr": 1.2389473684210526e-05, "step": 723 }, { "epoch": 1.9013806706114398, "high_lr": 0.0006194736842105263, "low_lr": 1.2389473684210526e-05, "step": 723 }, { "epoch": 1.9013806706114398, "high_lr": 0.0006194736842105263, "low_lr": 1.2389473684210526e-05, "step": 723 }, { "epoch": 1.9013806706114398, "high_lr": 0.0006194736842105263, "low_lr": 1.2389473684210526e-05, "step": 723 }, { "epoch": 1.9040105193951349, "grad_norm": 1.2648789882659912, "learning_rate": 0.0006189473684210526, "loss": 1.5025, "step": 724 }, { "epoch": 1.9040105193951349, "high_lr": 0.0006189473684210526, "low_lr": 1.2378947368421053e-05, "step": 724 }, { "epoch": 1.9040105193951349, "high_lr": 0.0006189473684210526, "low_lr": 1.2378947368421053e-05, "step": 724 }, { "epoch": 1.9040105193951349, "high_lr": 0.0006189473684210526, "low_lr": 1.2378947368421053e-05, "step": 724 }, { "epoch": 1.9040105193951349, "high_lr": 0.0006189473684210526, "low_lr": 1.2378947368421053e-05, "step": 724 }, { "epoch": 1.9040105193951349, "high_lr": 0.0006189473684210526, "low_lr": 1.2378947368421053e-05, "step": 724 }, { "epoch": 1.9040105193951349, "high_lr": 0.0006189473684210526, "low_lr": 1.2378947368421053e-05, "step": 724 }, { "epoch": 1.9040105193951349, "high_lr": 0.0006189473684210526, "low_lr": 1.2378947368421053e-05, "step": 724 }, { "epoch": 1.9040105193951349, "high_lr": 0.0006189473684210526, "low_lr": 1.2378947368421053e-05, "step": 724 }, { "epoch": 1.9066403681788298, "grad_norm": 1.1586730480194092, "learning_rate": 0.000618421052631579, "loss": 1.4308, "step": 725 }, { "epoch": 1.9066403681788298, "high_lr": 0.000618421052631579, "low_lr": 1.236842105263158e-05, "step": 725 }, { "epoch": 1.9066403681788298, "high_lr": 0.000618421052631579, "low_lr": 1.236842105263158e-05, "step": 725 }, { "epoch": 1.9066403681788298, "high_lr": 0.000618421052631579, "low_lr": 1.236842105263158e-05, "step": 725 }, { "epoch": 1.9066403681788298, "high_lr": 0.000618421052631579, "low_lr": 1.236842105263158e-05, "step": 725 }, { "epoch": 1.9066403681788298, "high_lr": 0.000618421052631579, "low_lr": 1.236842105263158e-05, "step": 725 }, { "epoch": 1.9066403681788298, "high_lr": 0.000618421052631579, "low_lr": 1.236842105263158e-05, "step": 725 }, { "epoch": 1.9066403681788298, "high_lr": 0.000618421052631579, "low_lr": 1.236842105263158e-05, "step": 725 }, { "epoch": 1.9066403681788298, "high_lr": 0.000618421052631579, "low_lr": 1.236842105263158e-05, "step": 725 }, { "epoch": 1.9092702169625246, "grad_norm": 1.1156361103057861, "learning_rate": 0.0006178947368421053, "loss": 1.4735, "step": 726 }, { "epoch": 1.9092702169625246, "high_lr": 0.0006178947368421053, "low_lr": 1.2357894736842107e-05, "step": 726 }, { "epoch": 1.9092702169625246, "high_lr": 0.0006178947368421053, "low_lr": 1.2357894736842107e-05, "step": 726 }, { "epoch": 1.9092702169625246, "high_lr": 0.0006178947368421053, "low_lr": 1.2357894736842107e-05, "step": 726 }, { "epoch": 1.9092702169625246, "high_lr": 0.0006178947368421053, "low_lr": 1.2357894736842107e-05, "step": 726 }, { "epoch": 1.9092702169625246, "high_lr": 0.0006178947368421053, "low_lr": 1.2357894736842107e-05, "step": 726 }, { "epoch": 1.9092702169625246, "high_lr": 0.0006178947368421053, "low_lr": 1.2357894736842107e-05, "step": 726 }, { "epoch": 1.9092702169625246, "high_lr": 0.0006178947368421053, "low_lr": 1.2357894736842107e-05, "step": 726 }, { "epoch": 1.9092702169625246, "high_lr": 0.0006178947368421053, "low_lr": 1.2357894736842107e-05, "step": 726 }, { "epoch": 1.9119000657462197, "grad_norm": 1.0861486196517944, "learning_rate": 0.0006173684210526315, "loss": 1.4552, "step": 727 }, { "epoch": 1.9119000657462197, "high_lr": 0.0006173684210526315, "low_lr": 1.2347368421052631e-05, "step": 727 }, { "epoch": 1.9119000657462197, "high_lr": 0.0006173684210526315, "low_lr": 1.2347368421052631e-05, "step": 727 }, { "epoch": 1.9119000657462197, "high_lr": 0.0006173684210526315, "low_lr": 1.2347368421052631e-05, "step": 727 }, { "epoch": 1.9119000657462197, "high_lr": 0.0006173684210526315, "low_lr": 1.2347368421052631e-05, "step": 727 }, { "epoch": 1.9119000657462197, "high_lr": 0.0006173684210526315, "low_lr": 1.2347368421052631e-05, "step": 727 }, { "epoch": 1.9119000657462197, "high_lr": 0.0006173684210526315, "low_lr": 1.2347368421052631e-05, "step": 727 }, { "epoch": 1.9119000657462197, "high_lr": 0.0006173684210526315, "low_lr": 1.2347368421052631e-05, "step": 727 }, { "epoch": 1.9119000657462197, "high_lr": 0.0006173684210526315, "low_lr": 1.2347368421052631e-05, "step": 727 }, { "epoch": 1.9145299145299144, "grad_norm": 1.0762629508972168, "learning_rate": 0.0006168421052631578, "loss": 1.4672, "step": 728 }, { "epoch": 1.9145299145299144, "high_lr": 0.0006168421052631578, "low_lr": 1.2336842105263158e-05, "step": 728 }, { "epoch": 1.9145299145299144, "high_lr": 0.0006168421052631578, "low_lr": 1.2336842105263158e-05, "step": 728 }, { "epoch": 1.9145299145299144, "high_lr": 0.0006168421052631578, "low_lr": 1.2336842105263158e-05, "step": 728 }, { "epoch": 1.9145299145299144, "high_lr": 0.0006168421052631578, "low_lr": 1.2336842105263158e-05, "step": 728 }, { "epoch": 1.9145299145299144, "high_lr": 0.0006168421052631578, "low_lr": 1.2336842105263158e-05, "step": 728 }, { "epoch": 1.9145299145299144, "high_lr": 0.0006168421052631578, "low_lr": 1.2336842105263158e-05, "step": 728 }, { "epoch": 1.9145299145299144, "high_lr": 0.0006168421052631578, "low_lr": 1.2336842105263158e-05, "step": 728 }, { "epoch": 1.9145299145299144, "high_lr": 0.0006168421052631578, "low_lr": 1.2336842105263158e-05, "step": 728 }, { "epoch": 1.9171597633136095, "grad_norm": 1.1548411846160889, "learning_rate": 0.0006163157894736843, "loss": 1.4726, "step": 729 }, { "epoch": 1.9171597633136095, "high_lr": 0.0006163157894736843, "low_lr": 1.2326315789473686e-05, "step": 729 }, { "epoch": 1.9171597633136095, "high_lr": 0.0006163157894736843, "low_lr": 1.2326315789473686e-05, "step": 729 }, { "epoch": 1.9171597633136095, "high_lr": 0.0006163157894736843, "low_lr": 1.2326315789473686e-05, "step": 729 }, { "epoch": 1.9171597633136095, "high_lr": 0.0006163157894736843, "low_lr": 1.2326315789473686e-05, "step": 729 }, { "epoch": 1.9171597633136095, "high_lr": 0.0006163157894736843, "low_lr": 1.2326315789473686e-05, "step": 729 }, { "epoch": 1.9171597633136095, "high_lr": 0.0006163157894736843, "low_lr": 1.2326315789473686e-05, "step": 729 }, { "epoch": 1.9171597633136095, "high_lr": 0.0006163157894736843, "low_lr": 1.2326315789473686e-05, "step": 729 }, { "epoch": 1.9171597633136095, "high_lr": 0.0006163157894736843, "low_lr": 1.2326315789473686e-05, "step": 729 }, { "epoch": 1.9197896120973044, "grad_norm": 1.122642159461975, "learning_rate": 0.0006157894736842106, "loss": 1.4835, "step": 730 }, { "epoch": 1.9197896120973044, "high_lr": 0.0006157894736842106, "low_lr": 1.2315789473684212e-05, "step": 730 }, { "epoch": 1.9197896120973044, "high_lr": 0.0006157894736842106, "low_lr": 1.2315789473684212e-05, "step": 730 }, { "epoch": 1.9197896120973044, "high_lr": 0.0006157894736842106, "low_lr": 1.2315789473684212e-05, "step": 730 }, { "epoch": 1.9197896120973044, "high_lr": 0.0006157894736842106, "low_lr": 1.2315789473684212e-05, "step": 730 }, { "epoch": 1.9197896120973044, "high_lr": 0.0006157894736842106, "low_lr": 1.2315789473684212e-05, "step": 730 }, { "epoch": 1.9197896120973044, "high_lr": 0.0006157894736842106, "low_lr": 1.2315789473684212e-05, "step": 730 }, { "epoch": 1.9197896120973044, "high_lr": 0.0006157894736842106, "low_lr": 1.2315789473684212e-05, "step": 730 }, { "epoch": 1.9197896120973044, "high_lr": 0.0006157894736842106, "low_lr": 1.2315789473684212e-05, "step": 730 }, { "epoch": 1.9224194608809992, "grad_norm": 1.1182903051376343, "learning_rate": 0.0006152631578947369, "loss": 1.507, "step": 731 }, { "epoch": 1.9224194608809992, "high_lr": 0.0006152631578947369, "low_lr": 1.2305263157894739e-05, "step": 731 }, { "epoch": 1.9224194608809992, "high_lr": 0.0006152631578947369, "low_lr": 1.2305263157894739e-05, "step": 731 }, { "epoch": 1.9224194608809992, "high_lr": 0.0006152631578947369, "low_lr": 1.2305263157894739e-05, "step": 731 }, { "epoch": 1.9224194608809992, "high_lr": 0.0006152631578947369, "low_lr": 1.2305263157894739e-05, "step": 731 }, { "epoch": 1.9224194608809992, "high_lr": 0.0006152631578947369, "low_lr": 1.2305263157894739e-05, "step": 731 }, { "epoch": 1.9224194608809992, "high_lr": 0.0006152631578947369, "low_lr": 1.2305263157894739e-05, "step": 731 }, { "epoch": 1.9224194608809992, "high_lr": 0.0006152631578947369, "low_lr": 1.2305263157894739e-05, "step": 731 }, { "epoch": 1.9224194608809992, "high_lr": 0.0006152631578947369, "low_lr": 1.2305263157894739e-05, "step": 731 }, { "epoch": 1.9250493096646943, "grad_norm": 1.0955809354782104, "learning_rate": 0.0006147368421052632, "loss": 1.4574, "step": 732 }, { "epoch": 1.9250493096646943, "high_lr": 0.0006147368421052632, "low_lr": 1.2294736842105263e-05, "step": 732 }, { "epoch": 1.9250493096646943, "high_lr": 0.0006147368421052632, "low_lr": 1.2294736842105263e-05, "step": 732 }, { "epoch": 1.9250493096646943, "high_lr": 0.0006147368421052632, "low_lr": 1.2294736842105263e-05, "step": 732 }, { "epoch": 1.9250493096646943, "high_lr": 0.0006147368421052632, "low_lr": 1.2294736842105263e-05, "step": 732 }, { "epoch": 1.9250493096646943, "high_lr": 0.0006147368421052632, "low_lr": 1.2294736842105263e-05, "step": 732 }, { "epoch": 1.9250493096646943, "high_lr": 0.0006147368421052632, "low_lr": 1.2294736842105263e-05, "step": 732 }, { "epoch": 1.9250493096646943, "high_lr": 0.0006147368421052632, "low_lr": 1.2294736842105263e-05, "step": 732 }, { "epoch": 1.9250493096646943, "high_lr": 0.0006147368421052632, "low_lr": 1.2294736842105263e-05, "step": 732 }, { "epoch": 1.9276791584483892, "grad_norm": 1.0820459127426147, "learning_rate": 0.0006142105263157895, "loss": 1.4271, "step": 733 }, { "epoch": 1.9276791584483892, "high_lr": 0.0006142105263157895, "low_lr": 1.228421052631579e-05, "step": 733 }, { "epoch": 1.9276791584483892, "high_lr": 0.0006142105263157895, "low_lr": 1.228421052631579e-05, "step": 733 }, { "epoch": 1.9276791584483892, "high_lr": 0.0006142105263157895, "low_lr": 1.228421052631579e-05, "step": 733 }, { "epoch": 1.9276791584483892, "high_lr": 0.0006142105263157895, "low_lr": 1.228421052631579e-05, "step": 733 }, { "epoch": 1.9276791584483892, "high_lr": 0.0006142105263157895, "low_lr": 1.228421052631579e-05, "step": 733 }, { "epoch": 1.9276791584483892, "high_lr": 0.0006142105263157895, "low_lr": 1.228421052631579e-05, "step": 733 }, { "epoch": 1.9276791584483892, "high_lr": 0.0006142105263157895, "low_lr": 1.228421052631579e-05, "step": 733 }, { "epoch": 1.9276791584483892, "high_lr": 0.0006142105263157895, "low_lr": 1.228421052631579e-05, "step": 733 }, { "epoch": 1.930309007232084, "grad_norm": 1.2596343755722046, "learning_rate": 0.0006136842105263159, "loss": 1.525, "step": 734 }, { "epoch": 1.930309007232084, "high_lr": 0.0006136842105263159, "low_lr": 1.2273684210526317e-05, "step": 734 }, { "epoch": 1.930309007232084, "high_lr": 0.0006136842105263159, "low_lr": 1.2273684210526317e-05, "step": 734 }, { "epoch": 1.930309007232084, "high_lr": 0.0006136842105263159, "low_lr": 1.2273684210526317e-05, "step": 734 }, { "epoch": 1.930309007232084, "high_lr": 0.0006136842105263159, "low_lr": 1.2273684210526317e-05, "step": 734 }, { "epoch": 1.930309007232084, "high_lr": 0.0006136842105263159, "low_lr": 1.2273684210526317e-05, "step": 734 }, { "epoch": 1.930309007232084, "high_lr": 0.0006136842105263159, "low_lr": 1.2273684210526317e-05, "step": 734 }, { "epoch": 1.930309007232084, "high_lr": 0.0006136842105263159, "low_lr": 1.2273684210526317e-05, "step": 734 }, { "epoch": 1.930309007232084, "high_lr": 0.0006136842105263159, "low_lr": 1.2273684210526317e-05, "step": 734 }, { "epoch": 1.9329388560157792, "grad_norm": 1.1566976308822632, "learning_rate": 0.0006131578947368421, "loss": 1.5111, "step": 735 }, { "epoch": 1.9329388560157792, "high_lr": 0.0006131578947368421, "low_lr": 1.2263157894736844e-05, "step": 735 }, { "epoch": 1.9329388560157792, "high_lr": 0.0006131578947368421, "low_lr": 1.2263157894736844e-05, "step": 735 }, { "epoch": 1.9329388560157792, "high_lr": 0.0006131578947368421, "low_lr": 1.2263157894736844e-05, "step": 735 }, { "epoch": 1.9329388560157792, "high_lr": 0.0006131578947368421, "low_lr": 1.2263157894736844e-05, "step": 735 }, { "epoch": 1.9329388560157792, "high_lr": 0.0006131578947368421, "low_lr": 1.2263157894736844e-05, "step": 735 }, { "epoch": 1.9329388560157792, "high_lr": 0.0006131578947368421, "low_lr": 1.2263157894736844e-05, "step": 735 }, { "epoch": 1.9329388560157792, "high_lr": 0.0006131578947368421, "low_lr": 1.2263157894736844e-05, "step": 735 }, { "epoch": 1.9329388560157792, "high_lr": 0.0006131578947368421, "low_lr": 1.2263157894736844e-05, "step": 735 }, { "epoch": 1.935568704799474, "grad_norm": 1.1604498624801636, "learning_rate": 0.0006126315789473684, "loss": 1.5178, "step": 736 }, { "epoch": 1.935568704799474, "high_lr": 0.0006126315789473684, "low_lr": 1.225263157894737e-05, "step": 736 }, { "epoch": 1.935568704799474, "high_lr": 0.0006126315789473684, "low_lr": 1.225263157894737e-05, "step": 736 }, { "epoch": 1.935568704799474, "high_lr": 0.0006126315789473684, "low_lr": 1.225263157894737e-05, "step": 736 }, { "epoch": 1.935568704799474, "high_lr": 0.0006126315789473684, "low_lr": 1.225263157894737e-05, "step": 736 }, { "epoch": 1.935568704799474, "high_lr": 0.0006126315789473684, "low_lr": 1.225263157894737e-05, "step": 736 }, { "epoch": 1.935568704799474, "high_lr": 0.0006126315789473684, "low_lr": 1.225263157894737e-05, "step": 736 }, { "epoch": 1.935568704799474, "high_lr": 0.0006126315789473684, "low_lr": 1.225263157894737e-05, "step": 736 }, { "epoch": 1.935568704799474, "high_lr": 0.0006126315789473684, "low_lr": 1.225263157894737e-05, "step": 736 }, { "epoch": 1.938198553583169, "grad_norm": 1.1423522233963013, "learning_rate": 0.0006121052631578947, "loss": 1.4149, "step": 737 }, { "epoch": 1.938198553583169, "high_lr": 0.0006121052631578947, "low_lr": 1.2242105263157895e-05, "step": 737 }, { "epoch": 1.938198553583169, "high_lr": 0.0006121052631578947, "low_lr": 1.2242105263157895e-05, "step": 737 }, { "epoch": 1.938198553583169, "high_lr": 0.0006121052631578947, "low_lr": 1.2242105263157895e-05, "step": 737 }, { "epoch": 1.938198553583169, "high_lr": 0.0006121052631578947, "low_lr": 1.2242105263157895e-05, "step": 737 }, { "epoch": 1.938198553583169, "high_lr": 0.0006121052631578947, "low_lr": 1.2242105263157895e-05, "step": 737 }, { "epoch": 1.938198553583169, "high_lr": 0.0006121052631578947, "low_lr": 1.2242105263157895e-05, "step": 737 }, { "epoch": 1.938198553583169, "high_lr": 0.0006121052631578947, "low_lr": 1.2242105263157895e-05, "step": 737 }, { "epoch": 1.938198553583169, "high_lr": 0.0006121052631578947, "low_lr": 1.2242105263157895e-05, "step": 737 }, { "epoch": 1.940828402366864, "grad_norm": 1.1732314825057983, "learning_rate": 0.000611578947368421, "loss": 1.4408, "step": 738 }, { "epoch": 1.940828402366864, "high_lr": 0.000611578947368421, "low_lr": 1.2231578947368421e-05, "step": 738 }, { "epoch": 1.940828402366864, "high_lr": 0.000611578947368421, "low_lr": 1.2231578947368421e-05, "step": 738 }, { "epoch": 1.940828402366864, "high_lr": 0.000611578947368421, "low_lr": 1.2231578947368421e-05, "step": 738 }, { "epoch": 1.940828402366864, "high_lr": 0.000611578947368421, "low_lr": 1.2231578947368421e-05, "step": 738 }, { "epoch": 1.940828402366864, "high_lr": 0.000611578947368421, "low_lr": 1.2231578947368421e-05, "step": 738 }, { "epoch": 1.940828402366864, "high_lr": 0.000611578947368421, "low_lr": 1.2231578947368421e-05, "step": 738 }, { "epoch": 1.940828402366864, "high_lr": 0.000611578947368421, "low_lr": 1.2231578947368421e-05, "step": 738 }, { "epoch": 1.940828402366864, "high_lr": 0.000611578947368421, "low_lr": 1.2231578947368421e-05, "step": 738 }, { "epoch": 1.9434582511505587, "grad_norm": 1.0459725856781006, "learning_rate": 0.0006110526315789474, "loss": 1.4532, "step": 739 }, { "epoch": 1.9434582511505587, "high_lr": 0.0006110526315789474, "low_lr": 1.2221052631578949e-05, "step": 739 }, { "epoch": 1.9434582511505587, "high_lr": 0.0006110526315789474, "low_lr": 1.2221052631578949e-05, "step": 739 }, { "epoch": 1.9434582511505587, "high_lr": 0.0006110526315789474, "low_lr": 1.2221052631578949e-05, "step": 739 }, { "epoch": 1.9434582511505587, "high_lr": 0.0006110526315789474, "low_lr": 1.2221052631578949e-05, "step": 739 }, { "epoch": 1.9434582511505587, "high_lr": 0.0006110526315789474, "low_lr": 1.2221052631578949e-05, "step": 739 }, { "epoch": 1.9434582511505587, "high_lr": 0.0006110526315789474, "low_lr": 1.2221052631578949e-05, "step": 739 }, { "epoch": 1.9434582511505587, "high_lr": 0.0006110526315789474, "low_lr": 1.2221052631578949e-05, "step": 739 }, { "epoch": 1.9434582511505587, "high_lr": 0.0006110526315789474, "low_lr": 1.2221052631578949e-05, "step": 739 }, { "epoch": 1.9460880999342538, "grad_norm": 1.2245088815689087, "learning_rate": 0.0006105263157894737, "loss": 1.5121, "step": 740 }, { "epoch": 1.9460880999342538, "high_lr": 0.0006105263157894737, "low_lr": 1.2210526315789475e-05, "step": 740 }, { "epoch": 1.9460880999342538, "high_lr": 0.0006105263157894737, "low_lr": 1.2210526315789475e-05, "step": 740 }, { "epoch": 1.9460880999342538, "high_lr": 0.0006105263157894737, "low_lr": 1.2210526315789475e-05, "step": 740 }, { "epoch": 1.9460880999342538, "high_lr": 0.0006105263157894737, "low_lr": 1.2210526315789475e-05, "step": 740 }, { "epoch": 1.9460880999342538, "high_lr": 0.0006105263157894737, "low_lr": 1.2210526315789475e-05, "step": 740 }, { "epoch": 1.9460880999342538, "high_lr": 0.0006105263157894737, "low_lr": 1.2210526315789475e-05, "step": 740 }, { "epoch": 1.9460880999342538, "high_lr": 0.0006105263157894737, "low_lr": 1.2210526315789475e-05, "step": 740 }, { "epoch": 1.9460880999342538, "high_lr": 0.0006105263157894737, "low_lr": 1.2210526315789475e-05, "step": 740 }, { "epoch": 1.9487179487179487, "grad_norm": 1.1924901008605957, "learning_rate": 0.00061, "loss": 1.4692, "step": 741 }, { "epoch": 1.9487179487179487, "high_lr": 0.00061, "low_lr": 1.22e-05, "step": 741 }, { "epoch": 1.9487179487179487, "high_lr": 0.00061, "low_lr": 1.22e-05, "step": 741 }, { "epoch": 1.9487179487179487, "high_lr": 0.00061, "low_lr": 1.22e-05, "step": 741 }, { "epoch": 1.9487179487179487, "high_lr": 0.00061, "low_lr": 1.22e-05, "step": 741 }, { "epoch": 1.9487179487179487, "high_lr": 0.00061, "low_lr": 1.22e-05, "step": 741 }, { "epoch": 1.9487179487179487, "high_lr": 0.00061, "low_lr": 1.22e-05, "step": 741 }, { "epoch": 1.9487179487179487, "high_lr": 0.00061, "low_lr": 1.22e-05, "step": 741 }, { "epoch": 1.9487179487179487, "high_lr": 0.00061, "low_lr": 1.22e-05, "step": 741 }, { "epoch": 1.9513477975016436, "grad_norm": 1.0595648288726807, "learning_rate": 0.0006094736842105263, "loss": 1.5118, "step": 742 }, { "epoch": 1.9513477975016436, "high_lr": 0.0006094736842105263, "low_lr": 1.2189473684210526e-05, "step": 742 }, { "epoch": 1.9513477975016436, "high_lr": 0.0006094736842105263, "low_lr": 1.2189473684210526e-05, "step": 742 }, { "epoch": 1.9513477975016436, "high_lr": 0.0006094736842105263, "low_lr": 1.2189473684210526e-05, "step": 742 }, { "epoch": 1.9513477975016436, "high_lr": 0.0006094736842105263, "low_lr": 1.2189473684210526e-05, "step": 742 }, { "epoch": 1.9513477975016436, "high_lr": 0.0006094736842105263, "low_lr": 1.2189473684210526e-05, "step": 742 }, { "epoch": 1.9513477975016436, "high_lr": 0.0006094736842105263, "low_lr": 1.2189473684210526e-05, "step": 742 }, { "epoch": 1.9513477975016436, "high_lr": 0.0006094736842105263, "low_lr": 1.2189473684210526e-05, "step": 742 }, { "epoch": 1.9513477975016436, "high_lr": 0.0006094736842105263, "low_lr": 1.2189473684210526e-05, "step": 742 }, { "epoch": 1.9539776462853387, "grad_norm": 1.097933053970337, "learning_rate": 0.0006089473684210527, "loss": 1.4404, "step": 743 }, { "epoch": 1.9539776462853387, "high_lr": 0.0006089473684210527, "low_lr": 1.2178947368421054e-05, "step": 743 }, { "epoch": 1.9539776462853387, "high_lr": 0.0006089473684210527, "low_lr": 1.2178947368421054e-05, "step": 743 }, { "epoch": 1.9539776462853387, "high_lr": 0.0006089473684210527, "low_lr": 1.2178947368421054e-05, "step": 743 }, { "epoch": 1.9539776462853387, "high_lr": 0.0006089473684210527, "low_lr": 1.2178947368421054e-05, "step": 743 }, { "epoch": 1.9539776462853387, "high_lr": 0.0006089473684210527, "low_lr": 1.2178947368421054e-05, "step": 743 }, { "epoch": 1.9539776462853387, "high_lr": 0.0006089473684210527, "low_lr": 1.2178947368421054e-05, "step": 743 }, { "epoch": 1.9539776462853387, "high_lr": 0.0006089473684210527, "low_lr": 1.2178947368421054e-05, "step": 743 }, { "epoch": 1.9539776462853387, "high_lr": 0.0006089473684210527, "low_lr": 1.2178947368421054e-05, "step": 743 }, { "epoch": 1.9566074950690335, "grad_norm": 1.1369494199752808, "learning_rate": 0.0006084210526315789, "loss": 1.4706, "step": 744 }, { "epoch": 1.9566074950690335, "high_lr": 0.0006084210526315789, "low_lr": 1.216842105263158e-05, "step": 744 }, { "epoch": 1.9566074950690335, "high_lr": 0.0006084210526315789, "low_lr": 1.216842105263158e-05, "step": 744 }, { "epoch": 1.9566074950690335, "high_lr": 0.0006084210526315789, "low_lr": 1.216842105263158e-05, "step": 744 }, { "epoch": 1.9566074950690335, "high_lr": 0.0006084210526315789, "low_lr": 1.216842105263158e-05, "step": 744 }, { "epoch": 1.9566074950690335, "high_lr": 0.0006084210526315789, "low_lr": 1.216842105263158e-05, "step": 744 }, { "epoch": 1.9566074950690335, "high_lr": 0.0006084210526315789, "low_lr": 1.216842105263158e-05, "step": 744 }, { "epoch": 1.9566074950690335, "high_lr": 0.0006084210526315789, "low_lr": 1.216842105263158e-05, "step": 744 }, { "epoch": 1.9566074950690335, "high_lr": 0.0006084210526315789, "low_lr": 1.216842105263158e-05, "step": 744 }, { "epoch": 1.9592373438527284, "grad_norm": 1.1629635095596313, "learning_rate": 0.0006078947368421052, "loss": 1.4443, "step": 745 }, { "epoch": 1.9592373438527284, "high_lr": 0.0006078947368421052, "low_lr": 1.2157894736842107e-05, "step": 745 }, { "epoch": 1.9592373438527284, "high_lr": 0.0006078947368421052, "low_lr": 1.2157894736842107e-05, "step": 745 }, { "epoch": 1.9592373438527284, "high_lr": 0.0006078947368421052, "low_lr": 1.2157894736842107e-05, "step": 745 }, { "epoch": 1.9592373438527284, "high_lr": 0.0006078947368421052, "low_lr": 1.2157894736842107e-05, "step": 745 }, { "epoch": 1.9592373438527284, "high_lr": 0.0006078947368421052, "low_lr": 1.2157894736842107e-05, "step": 745 }, { "epoch": 1.9592373438527284, "high_lr": 0.0006078947368421052, "low_lr": 1.2157894736842107e-05, "step": 745 }, { "epoch": 1.9592373438527284, "high_lr": 0.0006078947368421052, "low_lr": 1.2157894736842107e-05, "step": 745 }, { "epoch": 1.9592373438527284, "high_lr": 0.0006078947368421052, "low_lr": 1.2157894736842107e-05, "step": 745 }, { "epoch": 1.9618671926364235, "grad_norm": 1.108062982559204, "learning_rate": 0.0006073684210526316, "loss": 1.4084, "step": 746 }, { "epoch": 1.9618671926364235, "high_lr": 0.0006073684210526316, "low_lr": 1.2147368421052632e-05, "step": 746 }, { "epoch": 1.9618671926364235, "high_lr": 0.0006073684210526316, "low_lr": 1.2147368421052632e-05, "step": 746 }, { "epoch": 1.9618671926364235, "high_lr": 0.0006073684210526316, "low_lr": 1.2147368421052632e-05, "step": 746 }, { "epoch": 1.9618671926364235, "high_lr": 0.0006073684210526316, "low_lr": 1.2147368421052632e-05, "step": 746 }, { "epoch": 1.9618671926364235, "high_lr": 0.0006073684210526316, "low_lr": 1.2147368421052632e-05, "step": 746 }, { "epoch": 1.9618671926364235, "high_lr": 0.0006073684210526316, "low_lr": 1.2147368421052632e-05, "step": 746 }, { "epoch": 1.9618671926364235, "high_lr": 0.0006073684210526316, "low_lr": 1.2147368421052632e-05, "step": 746 }, { "epoch": 1.9618671926364235, "high_lr": 0.0006073684210526316, "low_lr": 1.2147368421052632e-05, "step": 746 }, { "epoch": 1.9644970414201184, "grad_norm": 1.122900128364563, "learning_rate": 0.0006068421052631579, "loss": 1.4306, "step": 747 }, { "epoch": 1.9644970414201184, "high_lr": 0.0006068421052631579, "low_lr": 1.2136842105263158e-05, "step": 747 }, { "epoch": 1.9644970414201184, "high_lr": 0.0006068421052631579, "low_lr": 1.2136842105263158e-05, "step": 747 }, { "epoch": 1.9644970414201184, "high_lr": 0.0006068421052631579, "low_lr": 1.2136842105263158e-05, "step": 747 }, { "epoch": 1.9644970414201184, "high_lr": 0.0006068421052631579, "low_lr": 1.2136842105263158e-05, "step": 747 }, { "epoch": 1.9644970414201184, "high_lr": 0.0006068421052631579, "low_lr": 1.2136842105263158e-05, "step": 747 }, { "epoch": 1.9644970414201184, "high_lr": 0.0006068421052631579, "low_lr": 1.2136842105263158e-05, "step": 747 }, { "epoch": 1.9644970414201184, "high_lr": 0.0006068421052631579, "low_lr": 1.2136842105263158e-05, "step": 747 }, { "epoch": 1.9644970414201184, "high_lr": 0.0006068421052631579, "low_lr": 1.2136842105263158e-05, "step": 747 }, { "epoch": 1.9671268902038133, "grad_norm": 1.1524890661239624, "learning_rate": 0.0006063157894736843, "loss": 1.4589, "step": 748 }, { "epoch": 1.9671268902038133, "high_lr": 0.0006063157894736843, "low_lr": 1.2126315789473686e-05, "step": 748 }, { "epoch": 1.9671268902038133, "high_lr": 0.0006063157894736843, "low_lr": 1.2126315789473686e-05, "step": 748 }, { "epoch": 1.9671268902038133, "high_lr": 0.0006063157894736843, "low_lr": 1.2126315789473686e-05, "step": 748 }, { "epoch": 1.9671268902038133, "high_lr": 0.0006063157894736843, "low_lr": 1.2126315789473686e-05, "step": 748 }, { "epoch": 1.9671268902038133, "high_lr": 0.0006063157894736843, "low_lr": 1.2126315789473686e-05, "step": 748 }, { "epoch": 1.9671268902038133, "high_lr": 0.0006063157894736843, "low_lr": 1.2126315789473686e-05, "step": 748 }, { "epoch": 1.9671268902038133, "high_lr": 0.0006063157894736843, "low_lr": 1.2126315789473686e-05, "step": 748 }, { "epoch": 1.9671268902038133, "high_lr": 0.0006063157894736843, "low_lr": 1.2126315789473686e-05, "step": 748 }, { "epoch": 1.9697567389875084, "grad_norm": 1.1671377420425415, "learning_rate": 0.0006057894736842106, "loss": 1.4813, "step": 749 }, { "epoch": 1.9697567389875084, "high_lr": 0.0006057894736842106, "low_lr": 1.2115789473684212e-05, "step": 749 }, { "epoch": 1.9697567389875084, "high_lr": 0.0006057894736842106, "low_lr": 1.2115789473684212e-05, "step": 749 }, { "epoch": 1.9697567389875084, "high_lr": 0.0006057894736842106, "low_lr": 1.2115789473684212e-05, "step": 749 }, { "epoch": 1.9697567389875084, "high_lr": 0.0006057894736842106, "low_lr": 1.2115789473684212e-05, "step": 749 }, { "epoch": 1.9697567389875084, "high_lr": 0.0006057894736842106, "low_lr": 1.2115789473684212e-05, "step": 749 }, { "epoch": 1.9697567389875084, "high_lr": 0.0006057894736842106, "low_lr": 1.2115789473684212e-05, "step": 749 }, { "epoch": 1.9697567389875084, "high_lr": 0.0006057894736842106, "low_lr": 1.2115789473684212e-05, "step": 749 }, { "epoch": 1.9697567389875084, "high_lr": 0.0006057894736842106, "low_lr": 1.2115789473684212e-05, "step": 749 }, { "epoch": 1.972386587771203, "grad_norm": 1.1508365869522095, "learning_rate": 0.0006052631578947369, "loss": 1.5052, "step": 750 }, { "epoch": 1.972386587771203, "high_lr": 0.0006052631578947369, "low_lr": 1.2105263157894737e-05, "step": 750 }, { "epoch": 1.972386587771203, "high_lr": 0.0006052631578947369, "low_lr": 1.2105263157894737e-05, "step": 750 }, { "epoch": 1.972386587771203, "high_lr": 0.0006052631578947369, "low_lr": 1.2105263157894737e-05, "step": 750 }, { "epoch": 1.972386587771203, "high_lr": 0.0006052631578947369, "low_lr": 1.2105263157894737e-05, "step": 750 }, { "epoch": 1.972386587771203, "high_lr": 0.0006052631578947369, "low_lr": 1.2105263157894737e-05, "step": 750 }, { "epoch": 1.972386587771203, "high_lr": 0.0006052631578947369, "low_lr": 1.2105263157894737e-05, "step": 750 }, { "epoch": 1.972386587771203, "high_lr": 0.0006052631578947369, "low_lr": 1.2105263157894737e-05, "step": 750 }, { "epoch": 1.972386587771203, "high_lr": 0.0006052631578947369, "low_lr": 1.2105263157894737e-05, "step": 750 }, { "epoch": 1.9750164365548981, "grad_norm": 1.1085923910140991, "learning_rate": 0.0006047368421052632, "loss": 1.5286, "step": 751 }, { "epoch": 1.9750164365548981, "high_lr": 0.0006047368421052632, "low_lr": 1.2094736842105263e-05, "step": 751 }, { "epoch": 1.9750164365548981, "high_lr": 0.0006047368421052632, "low_lr": 1.2094736842105263e-05, "step": 751 }, { "epoch": 1.9750164365548981, "high_lr": 0.0006047368421052632, "low_lr": 1.2094736842105263e-05, "step": 751 }, { "epoch": 1.9750164365548981, "high_lr": 0.0006047368421052632, "low_lr": 1.2094736842105263e-05, "step": 751 }, { "epoch": 1.9750164365548981, "high_lr": 0.0006047368421052632, "low_lr": 1.2094736842105263e-05, "step": 751 }, { "epoch": 1.9750164365548981, "high_lr": 0.0006047368421052632, "low_lr": 1.2094736842105263e-05, "step": 751 }, { "epoch": 1.9750164365548981, "high_lr": 0.0006047368421052632, "low_lr": 1.2094736842105263e-05, "step": 751 }, { "epoch": 1.9750164365548981, "high_lr": 0.0006047368421052632, "low_lr": 1.2094736842105263e-05, "step": 751 }, { "epoch": 1.977646285338593, "grad_norm": 1.2111530303955078, "learning_rate": 0.0006042105263157894, "loss": 1.5144, "step": 752 }, { "epoch": 1.977646285338593, "high_lr": 0.0006042105263157894, "low_lr": 1.208421052631579e-05, "step": 752 }, { "epoch": 1.977646285338593, "high_lr": 0.0006042105263157894, "low_lr": 1.208421052631579e-05, "step": 752 }, { "epoch": 1.977646285338593, "high_lr": 0.0006042105263157894, "low_lr": 1.208421052631579e-05, "step": 752 }, { "epoch": 1.977646285338593, "high_lr": 0.0006042105263157894, "low_lr": 1.208421052631579e-05, "step": 752 }, { "epoch": 1.977646285338593, "high_lr": 0.0006042105263157894, "low_lr": 1.208421052631579e-05, "step": 752 }, { "epoch": 1.977646285338593, "high_lr": 0.0006042105263157894, "low_lr": 1.208421052631579e-05, "step": 752 }, { "epoch": 1.977646285338593, "high_lr": 0.0006042105263157894, "low_lr": 1.208421052631579e-05, "step": 752 }, { "epoch": 1.977646285338593, "high_lr": 0.0006042105263157894, "low_lr": 1.208421052631579e-05, "step": 752 }, { "epoch": 1.9802761341222879, "grad_norm": 1.09846830368042, "learning_rate": 0.0006036842105263158, "loss": 1.4373, "step": 753 }, { "epoch": 1.9802761341222879, "high_lr": 0.0006036842105263158, "low_lr": 1.2073684210526318e-05, "step": 753 }, { "epoch": 1.9802761341222879, "high_lr": 0.0006036842105263158, "low_lr": 1.2073684210526318e-05, "step": 753 }, { "epoch": 1.9802761341222879, "high_lr": 0.0006036842105263158, "low_lr": 1.2073684210526318e-05, "step": 753 }, { "epoch": 1.9802761341222879, "high_lr": 0.0006036842105263158, "low_lr": 1.2073684210526318e-05, "step": 753 }, { "epoch": 1.9802761341222879, "high_lr": 0.0006036842105263158, "low_lr": 1.2073684210526318e-05, "step": 753 }, { "epoch": 1.9802761341222879, "high_lr": 0.0006036842105263158, "low_lr": 1.2073684210526318e-05, "step": 753 }, { "epoch": 1.9802761341222879, "high_lr": 0.0006036842105263158, "low_lr": 1.2073684210526318e-05, "step": 753 }, { "epoch": 1.9802761341222879, "high_lr": 0.0006036842105263158, "low_lr": 1.2073684210526318e-05, "step": 753 }, { "epoch": 1.982905982905983, "grad_norm": 1.1521211862564087, "learning_rate": 0.0006031578947368421, "loss": 1.4419, "step": 754 }, { "epoch": 1.982905982905983, "high_lr": 0.0006031578947368421, "low_lr": 1.2063157894736844e-05, "step": 754 }, { "epoch": 1.982905982905983, "high_lr": 0.0006031578947368421, "low_lr": 1.2063157894736844e-05, "step": 754 }, { "epoch": 1.982905982905983, "high_lr": 0.0006031578947368421, "low_lr": 1.2063157894736844e-05, "step": 754 }, { "epoch": 1.982905982905983, "high_lr": 0.0006031578947368421, "low_lr": 1.2063157894736844e-05, "step": 754 }, { "epoch": 1.982905982905983, "high_lr": 0.0006031578947368421, "low_lr": 1.2063157894736844e-05, "step": 754 }, { "epoch": 1.982905982905983, "high_lr": 0.0006031578947368421, "low_lr": 1.2063157894736844e-05, "step": 754 }, { "epoch": 1.982905982905983, "high_lr": 0.0006031578947368421, "low_lr": 1.2063157894736844e-05, "step": 754 }, { "epoch": 1.982905982905983, "high_lr": 0.0006031578947368421, "low_lr": 1.2063157894736844e-05, "step": 754 }, { "epoch": 1.9855358316896778, "grad_norm": 1.1370145082473755, "learning_rate": 0.0006026315789473684, "loss": 1.4149, "step": 755 }, { "epoch": 1.9855358316896778, "high_lr": 0.0006026315789473684, "low_lr": 1.2052631578947369e-05, "step": 755 }, { "epoch": 1.9855358316896778, "high_lr": 0.0006026315789473684, "low_lr": 1.2052631578947369e-05, "step": 755 }, { "epoch": 1.9855358316896778, "high_lr": 0.0006026315789473684, "low_lr": 1.2052631578947369e-05, "step": 755 }, { "epoch": 1.9855358316896778, "high_lr": 0.0006026315789473684, "low_lr": 1.2052631578947369e-05, "step": 755 }, { "epoch": 1.9855358316896778, "high_lr": 0.0006026315789473684, "low_lr": 1.2052631578947369e-05, "step": 755 }, { "epoch": 1.9855358316896778, "high_lr": 0.0006026315789473684, "low_lr": 1.2052631578947369e-05, "step": 755 }, { "epoch": 1.9855358316896778, "high_lr": 0.0006026315789473684, "low_lr": 1.2052631578947369e-05, "step": 755 }, { "epoch": 1.9855358316896778, "high_lr": 0.0006026315789473684, "low_lr": 1.2052631578947369e-05, "step": 755 }, { "epoch": 1.9881656804733727, "grad_norm": 1.2295494079589844, "learning_rate": 0.0006021052631578947, "loss": 1.5146, "step": 756 }, { "epoch": 1.9881656804733727, "high_lr": 0.0006021052631578947, "low_lr": 1.2042105263157895e-05, "step": 756 }, { "epoch": 1.9881656804733727, "high_lr": 0.0006021052631578947, "low_lr": 1.2042105263157895e-05, "step": 756 }, { "epoch": 1.9881656804733727, "high_lr": 0.0006021052631578947, "low_lr": 1.2042105263157895e-05, "step": 756 }, { "epoch": 1.9881656804733727, "high_lr": 0.0006021052631578947, "low_lr": 1.2042105263157895e-05, "step": 756 }, { "epoch": 1.9881656804733727, "high_lr": 0.0006021052631578947, "low_lr": 1.2042105263157895e-05, "step": 756 }, { "epoch": 1.9881656804733727, "high_lr": 0.0006021052631578947, "low_lr": 1.2042105263157895e-05, "step": 756 }, { "epoch": 1.9881656804733727, "high_lr": 0.0006021052631578947, "low_lr": 1.2042105263157895e-05, "step": 756 }, { "epoch": 1.9881656804733727, "high_lr": 0.0006021052631578947, "low_lr": 1.2042105263157895e-05, "step": 756 }, { "epoch": 1.9907955292570678, "grad_norm": 1.0924862623214722, "learning_rate": 0.0006015789473684211, "loss": 1.4997, "step": 757 }, { "epoch": 1.9907955292570678, "high_lr": 0.0006015789473684211, "low_lr": 1.2031578947368423e-05, "step": 757 }, { "epoch": 1.9907955292570678, "high_lr": 0.0006015789473684211, "low_lr": 1.2031578947368423e-05, "step": 757 }, { "epoch": 1.9907955292570678, "high_lr": 0.0006015789473684211, "low_lr": 1.2031578947368423e-05, "step": 757 }, { "epoch": 1.9907955292570678, "high_lr": 0.0006015789473684211, "low_lr": 1.2031578947368423e-05, "step": 757 }, { "epoch": 1.9907955292570678, "high_lr": 0.0006015789473684211, "low_lr": 1.2031578947368423e-05, "step": 757 }, { "epoch": 1.9907955292570678, "high_lr": 0.0006015789473684211, "low_lr": 1.2031578947368423e-05, "step": 757 }, { "epoch": 1.9907955292570678, "high_lr": 0.0006015789473684211, "low_lr": 1.2031578947368423e-05, "step": 757 }, { "epoch": 1.9907955292570678, "high_lr": 0.0006015789473684211, "low_lr": 1.2031578947368423e-05, "step": 757 }, { "epoch": 1.9934253780407627, "grad_norm": 1.096147894859314, "learning_rate": 0.0006010526315789474, "loss": 1.4644, "step": 758 }, { "epoch": 1.9934253780407627, "high_lr": 0.0006010526315789474, "low_lr": 1.202105263157895e-05, "step": 758 }, { "epoch": 1.9934253780407627, "high_lr": 0.0006010526315789474, "low_lr": 1.202105263157895e-05, "step": 758 }, { "epoch": 1.9934253780407627, "high_lr": 0.0006010526315789474, "low_lr": 1.202105263157895e-05, "step": 758 }, { "epoch": 1.9934253780407627, "high_lr": 0.0006010526315789474, "low_lr": 1.202105263157895e-05, "step": 758 }, { "epoch": 1.9934253780407627, "high_lr": 0.0006010526315789474, "low_lr": 1.202105263157895e-05, "step": 758 }, { "epoch": 1.9934253780407627, "high_lr": 0.0006010526315789474, "low_lr": 1.202105263157895e-05, "step": 758 }, { "epoch": 1.9934253780407627, "high_lr": 0.0006010526315789474, "low_lr": 1.202105263157895e-05, "step": 758 }, { "epoch": 1.9934253780407627, "high_lr": 0.0006010526315789474, "low_lr": 1.202105263157895e-05, "step": 758 }, { "epoch": 1.9960552268244576, "grad_norm": 1.1285107135772705, "learning_rate": 0.0006005263157894737, "loss": 1.4483, "step": 759 }, { "epoch": 1.9960552268244576, "high_lr": 0.0006005263157894737, "low_lr": 1.2010526315789474e-05, "step": 759 }, { "epoch": 1.9960552268244576, "high_lr": 0.0006005263157894737, "low_lr": 1.2010526315789474e-05, "step": 759 }, { "epoch": 1.9960552268244576, "high_lr": 0.0006005263157894737, "low_lr": 1.2010526315789474e-05, "step": 759 }, { "epoch": 1.9960552268244576, "high_lr": 0.0006005263157894737, "low_lr": 1.2010526315789474e-05, "step": 759 }, { "epoch": 1.9960552268244576, "high_lr": 0.0006005263157894737, "low_lr": 1.2010526315789474e-05, "step": 759 }, { "epoch": 1.9960552268244576, "high_lr": 0.0006005263157894737, "low_lr": 1.2010526315789474e-05, "step": 759 }, { "epoch": 1.9960552268244576, "high_lr": 0.0006005263157894737, "low_lr": 1.2010526315789474e-05, "step": 759 }, { "epoch": 1.9960552268244576, "high_lr": 0.0006005263157894737, "low_lr": 1.2010526315789474e-05, "step": 759 }, { "epoch": 1.9986850756081527, "grad_norm": 2.830282211303711, "learning_rate": 0.0006, "loss": 1.5092, "step": 760 }, { "epoch": 1.9986850756081527, "high_lr": 0.0006, "low_lr": 1.2e-05, "step": 760 }, { "epoch": 1.9986850756081527, "high_lr": 0.0006, "low_lr": 1.2e-05, "step": 760 }, { "epoch": 1.9986850756081527, "high_lr": 0.0006, "low_lr": 1.2e-05, "step": 760 }, { "epoch": 1.9986850756081527, "high_lr": 0.0006, "low_lr": 1.2e-05, "step": 760 }, { "epoch": 1.9986850756081527, "high_lr": 0.0006, "low_lr": 1.2e-05, "step": 760 }, { "epoch": 1.9986850756081527, "high_lr": 0.0006, "low_lr": 1.2e-05, "step": 760 }, { "epoch": 1.9986850756081527, "high_lr": 0.0006, "low_lr": 1.2e-05, "step": 760 }, { "epoch": 1.9986850756081527, "high_lr": 0.0006, "low_lr": 1.2e-05, "step": 760 }, { "epoch": 2.0013149243918473, "grad_norm": 1.145472764968872, "learning_rate": 0.0005994736842105262, "loss": 1.4476, "step": 761 }, { "epoch": 2.0013149243918473, "high_lr": 0.0005994736842105262, "low_lr": 1.1989473684210527e-05, "step": 761 }, { "epoch": 2.0013149243918473, "high_lr": 0.0005994736842105262, "low_lr": 1.1989473684210527e-05, "step": 761 }, { "epoch": 2.0013149243918473, "high_lr": 0.0005994736842105262, "low_lr": 1.1989473684210527e-05, "step": 761 }, { "epoch": 2.0013149243918473, "high_lr": 0.0005994736842105262, "low_lr": 1.1989473684210527e-05, "step": 761 }, { "epoch": 2.0013149243918473, "high_lr": 0.0005994736842105262, "low_lr": 1.1989473684210527e-05, "step": 761 }, { "epoch": 2.0013149243918473, "high_lr": 0.0005994736842105262, "low_lr": 1.1989473684210527e-05, "step": 761 }, { "epoch": 2.0013149243918473, "high_lr": 0.0005994736842105262, "low_lr": 1.1989473684210527e-05, "step": 761 }, { "epoch": 2.0013149243918473, "high_lr": 0.0005994736842105262, "low_lr": 1.1989473684210527e-05, "step": 761 }, { "epoch": 2.0039447731755424, "grad_norm": 1.2734953165054321, "learning_rate": 0.0005989473684210527, "loss": 1.3967, "step": 762 }, { "epoch": 2.0039447731755424, "high_lr": 0.0005989473684210527, "low_lr": 1.1978947368421055e-05, "step": 762 }, { "epoch": 2.0039447731755424, "high_lr": 0.0005989473684210527, "low_lr": 1.1978947368421055e-05, "step": 762 }, { "epoch": 2.0039447731755424, "high_lr": 0.0005989473684210527, "low_lr": 1.1978947368421055e-05, "step": 762 }, { "epoch": 2.0039447731755424, "high_lr": 0.0005989473684210527, "low_lr": 1.1978947368421055e-05, "step": 762 }, { "epoch": 2.0039447731755424, "high_lr": 0.0005989473684210527, "low_lr": 1.1978947368421055e-05, "step": 762 }, { "epoch": 2.0039447731755424, "high_lr": 0.0005989473684210527, "low_lr": 1.1978947368421055e-05, "step": 762 }, { "epoch": 2.0039447731755424, "high_lr": 0.0005989473684210527, "low_lr": 1.1978947368421055e-05, "step": 762 }, { "epoch": 2.0039447731755424, "high_lr": 0.0005989473684210527, "low_lr": 1.1978947368421055e-05, "step": 762 }, { "epoch": 2.0065746219592375, "grad_norm": 1.0868024826049805, "learning_rate": 0.000598421052631579, "loss": 1.3929, "step": 763 }, { "epoch": 2.0065746219592375, "high_lr": 0.000598421052631579, "low_lr": 1.1968421052631581e-05, "step": 763 }, { "epoch": 2.0065746219592375, "high_lr": 0.000598421052631579, "low_lr": 1.1968421052631581e-05, "step": 763 }, { "epoch": 2.0065746219592375, "high_lr": 0.000598421052631579, "low_lr": 1.1968421052631581e-05, "step": 763 }, { "epoch": 2.0065746219592375, "high_lr": 0.000598421052631579, "low_lr": 1.1968421052631581e-05, "step": 763 }, { "epoch": 2.0065746219592375, "high_lr": 0.000598421052631579, "low_lr": 1.1968421052631581e-05, "step": 763 }, { "epoch": 2.0065746219592375, "high_lr": 0.000598421052631579, "low_lr": 1.1968421052631581e-05, "step": 763 }, { "epoch": 2.0065746219592375, "high_lr": 0.000598421052631579, "low_lr": 1.1968421052631581e-05, "step": 763 }, { "epoch": 2.0065746219592375, "high_lr": 0.000598421052631579, "low_lr": 1.1968421052631581e-05, "step": 763 }, { "epoch": 2.009204470742932, "grad_norm": 1.0661119222640991, "learning_rate": 0.0005978947368421053, "loss": 1.4308, "step": 764 }, { "epoch": 2.009204470742932, "high_lr": 0.0005978947368421053, "low_lr": 1.1957894736842106e-05, "step": 764 }, { "epoch": 2.009204470742932, "high_lr": 0.0005978947368421053, "low_lr": 1.1957894736842106e-05, "step": 764 }, { "epoch": 2.009204470742932, "high_lr": 0.0005978947368421053, "low_lr": 1.1957894736842106e-05, "step": 764 }, { "epoch": 2.009204470742932, "high_lr": 0.0005978947368421053, "low_lr": 1.1957894736842106e-05, "step": 764 }, { "epoch": 2.009204470742932, "high_lr": 0.0005978947368421053, "low_lr": 1.1957894736842106e-05, "step": 764 }, { "epoch": 2.009204470742932, "high_lr": 0.0005978947368421053, "low_lr": 1.1957894736842106e-05, "step": 764 }, { "epoch": 2.009204470742932, "high_lr": 0.0005978947368421053, "low_lr": 1.1957894736842106e-05, "step": 764 }, { "epoch": 2.009204470742932, "high_lr": 0.0005978947368421053, "low_lr": 1.1957894736842106e-05, "step": 764 }, { "epoch": 2.0118343195266273, "grad_norm": 1.0468875169754028, "learning_rate": 0.0005973684210526316, "loss": 1.3778, "step": 765 }, { "epoch": 2.0118343195266273, "high_lr": 0.0005973684210526316, "low_lr": 1.1947368421052632e-05, "step": 765 }, { "epoch": 2.0118343195266273, "high_lr": 0.0005973684210526316, "low_lr": 1.1947368421052632e-05, "step": 765 }, { "epoch": 2.0118343195266273, "high_lr": 0.0005973684210526316, "low_lr": 1.1947368421052632e-05, "step": 765 }, { "epoch": 2.0118343195266273, "high_lr": 0.0005973684210526316, "low_lr": 1.1947368421052632e-05, "step": 765 }, { "epoch": 2.0118343195266273, "high_lr": 0.0005973684210526316, "low_lr": 1.1947368421052632e-05, "step": 765 }, { "epoch": 2.0118343195266273, "high_lr": 0.0005973684210526316, "low_lr": 1.1947368421052632e-05, "step": 765 }, { "epoch": 2.0118343195266273, "high_lr": 0.0005973684210526316, "low_lr": 1.1947368421052632e-05, "step": 765 }, { "epoch": 2.0118343195266273, "high_lr": 0.0005973684210526316, "low_lr": 1.1947368421052632e-05, "step": 765 }, { "epoch": 2.0144641683103224, "grad_norm": 1.1116379499435425, "learning_rate": 0.0005968421052631579, "loss": 1.3936, "step": 766 }, { "epoch": 2.0144641683103224, "high_lr": 0.0005968421052631579, "low_lr": 1.1936842105263158e-05, "step": 766 }, { "epoch": 2.0144641683103224, "high_lr": 0.0005968421052631579, "low_lr": 1.1936842105263158e-05, "step": 766 }, { "epoch": 2.0144641683103224, "high_lr": 0.0005968421052631579, "low_lr": 1.1936842105263158e-05, "step": 766 }, { "epoch": 2.0144641683103224, "high_lr": 0.0005968421052631579, "low_lr": 1.1936842105263158e-05, "step": 766 }, { "epoch": 2.0144641683103224, "high_lr": 0.0005968421052631579, "low_lr": 1.1936842105263158e-05, "step": 766 }, { "epoch": 2.0144641683103224, "high_lr": 0.0005968421052631579, "low_lr": 1.1936842105263158e-05, "step": 766 }, { "epoch": 2.0144641683103224, "high_lr": 0.0005968421052631579, "low_lr": 1.1936842105263158e-05, "step": 766 }, { "epoch": 2.0144641683103224, "high_lr": 0.0005968421052631579, "low_lr": 1.1936842105263158e-05, "step": 766 }, { "epoch": 2.017094017094017, "grad_norm": 1.1133373975753784, "learning_rate": 0.0005963157894736843, "loss": 1.351, "step": 767 }, { "epoch": 2.017094017094017, "high_lr": 0.0005963157894736843, "low_lr": 1.1926315789473686e-05, "step": 767 }, { "epoch": 2.017094017094017, "high_lr": 0.0005963157894736843, "low_lr": 1.1926315789473686e-05, "step": 767 }, { "epoch": 2.017094017094017, "high_lr": 0.0005963157894736843, "low_lr": 1.1926315789473686e-05, "step": 767 }, { "epoch": 2.017094017094017, "high_lr": 0.0005963157894736843, "low_lr": 1.1926315789473686e-05, "step": 767 }, { "epoch": 2.017094017094017, "high_lr": 0.0005963157894736843, "low_lr": 1.1926315789473686e-05, "step": 767 }, { "epoch": 2.017094017094017, "high_lr": 0.0005963157894736843, "low_lr": 1.1926315789473686e-05, "step": 767 }, { "epoch": 2.017094017094017, "high_lr": 0.0005963157894736843, "low_lr": 1.1926315789473686e-05, "step": 767 }, { "epoch": 2.017094017094017, "high_lr": 0.0005963157894736843, "low_lr": 1.1926315789473686e-05, "step": 767 }, { "epoch": 2.019723865877712, "grad_norm": 1.1404736042022705, "learning_rate": 0.0005957894736842106, "loss": 1.3537, "step": 768 }, { "epoch": 2.019723865877712, "high_lr": 0.0005957894736842106, "low_lr": 1.1915789473684211e-05, "step": 768 }, { "epoch": 2.019723865877712, "high_lr": 0.0005957894736842106, "low_lr": 1.1915789473684211e-05, "step": 768 }, { "epoch": 2.019723865877712, "high_lr": 0.0005957894736842106, "low_lr": 1.1915789473684211e-05, "step": 768 }, { "epoch": 2.019723865877712, "high_lr": 0.0005957894736842106, "low_lr": 1.1915789473684211e-05, "step": 768 }, { "epoch": 2.019723865877712, "high_lr": 0.0005957894736842106, "low_lr": 1.1915789473684211e-05, "step": 768 }, { "epoch": 2.019723865877712, "high_lr": 0.0005957894736842106, "low_lr": 1.1915789473684211e-05, "step": 768 }, { "epoch": 2.019723865877712, "high_lr": 0.0005957894736842106, "low_lr": 1.1915789473684211e-05, "step": 768 }, { "epoch": 2.019723865877712, "high_lr": 0.0005957894736842106, "low_lr": 1.1915789473684211e-05, "step": 768 }, { "epoch": 2.022353714661407, "grad_norm": 1.2281948328018188, "learning_rate": 0.0005952631578947368, "loss": 1.4146, "step": 769 }, { "epoch": 2.022353714661407, "high_lr": 0.0005952631578947368, "low_lr": 1.1905263157894737e-05, "step": 769 }, { "epoch": 2.022353714661407, "high_lr": 0.0005952631578947368, "low_lr": 1.1905263157894737e-05, "step": 769 }, { "epoch": 2.022353714661407, "high_lr": 0.0005952631578947368, "low_lr": 1.1905263157894737e-05, "step": 769 }, { "epoch": 2.022353714661407, "high_lr": 0.0005952631578947368, "low_lr": 1.1905263157894737e-05, "step": 769 }, { "epoch": 2.022353714661407, "high_lr": 0.0005952631578947368, "low_lr": 1.1905263157894737e-05, "step": 769 }, { "epoch": 2.022353714661407, "high_lr": 0.0005952631578947368, "low_lr": 1.1905263157894737e-05, "step": 769 }, { "epoch": 2.022353714661407, "high_lr": 0.0005952631578947368, "low_lr": 1.1905263157894737e-05, "step": 769 }, { "epoch": 2.022353714661407, "high_lr": 0.0005952631578947368, "low_lr": 1.1905263157894737e-05, "step": 769 }, { "epoch": 2.024983563445102, "grad_norm": 1.2524970769882202, "learning_rate": 0.0005947368421052631, "loss": 1.4337, "step": 770 }, { "epoch": 2.024983563445102, "high_lr": 0.0005947368421052631, "low_lr": 1.1894736842105264e-05, "step": 770 }, { "epoch": 2.024983563445102, "high_lr": 0.0005947368421052631, "low_lr": 1.1894736842105264e-05, "step": 770 }, { "epoch": 2.024983563445102, "high_lr": 0.0005947368421052631, "low_lr": 1.1894736842105264e-05, "step": 770 }, { "epoch": 2.024983563445102, "high_lr": 0.0005947368421052631, "low_lr": 1.1894736842105264e-05, "step": 770 }, { "epoch": 2.024983563445102, "high_lr": 0.0005947368421052631, "low_lr": 1.1894736842105264e-05, "step": 770 }, { "epoch": 2.024983563445102, "high_lr": 0.0005947368421052631, "low_lr": 1.1894736842105264e-05, "step": 770 }, { "epoch": 2.024983563445102, "high_lr": 0.0005947368421052631, "low_lr": 1.1894736842105264e-05, "step": 770 }, { "epoch": 2.024983563445102, "high_lr": 0.0005947368421052631, "low_lr": 1.1894736842105264e-05, "step": 770 }, { "epoch": 2.027613412228797, "grad_norm": 1.2224129438400269, "learning_rate": 0.0005942105263157895, "loss": 1.3969, "step": 771 }, { "epoch": 2.027613412228797, "high_lr": 0.0005942105263157895, "low_lr": 1.1884210526315792e-05, "step": 771 }, { "epoch": 2.027613412228797, "high_lr": 0.0005942105263157895, "low_lr": 1.1884210526315792e-05, "step": 771 }, { "epoch": 2.027613412228797, "high_lr": 0.0005942105263157895, "low_lr": 1.1884210526315792e-05, "step": 771 }, { "epoch": 2.027613412228797, "high_lr": 0.0005942105263157895, "low_lr": 1.1884210526315792e-05, "step": 771 }, { "epoch": 2.027613412228797, "high_lr": 0.0005942105263157895, "low_lr": 1.1884210526315792e-05, "step": 771 }, { "epoch": 2.027613412228797, "high_lr": 0.0005942105263157895, "low_lr": 1.1884210526315792e-05, "step": 771 }, { "epoch": 2.027613412228797, "high_lr": 0.0005942105263157895, "low_lr": 1.1884210526315792e-05, "step": 771 }, { "epoch": 2.027613412228797, "high_lr": 0.0005942105263157895, "low_lr": 1.1884210526315792e-05, "step": 771 }, { "epoch": 2.0302432610124916, "grad_norm": 1.123277187347412, "learning_rate": 0.0005936842105263158, "loss": 1.3849, "step": 772 }, { "epoch": 2.0302432610124916, "high_lr": 0.0005936842105263158, "low_lr": 1.1873684210526318e-05, "step": 772 }, { "epoch": 2.0302432610124916, "high_lr": 0.0005936842105263158, "low_lr": 1.1873684210526318e-05, "step": 772 }, { "epoch": 2.0302432610124916, "high_lr": 0.0005936842105263158, "low_lr": 1.1873684210526318e-05, "step": 772 }, { "epoch": 2.0302432610124916, "high_lr": 0.0005936842105263158, "low_lr": 1.1873684210526318e-05, "step": 772 }, { "epoch": 2.0302432610124916, "high_lr": 0.0005936842105263158, "low_lr": 1.1873684210526318e-05, "step": 772 }, { "epoch": 2.0302432610124916, "high_lr": 0.0005936842105263158, "low_lr": 1.1873684210526318e-05, "step": 772 }, { "epoch": 2.0302432610124916, "high_lr": 0.0005936842105263158, "low_lr": 1.1873684210526318e-05, "step": 772 }, { "epoch": 2.0302432610124916, "high_lr": 0.0005936842105263158, "low_lr": 1.1873684210526318e-05, "step": 772 }, { "epoch": 2.0328731097961867, "grad_norm": 1.2327888011932373, "learning_rate": 0.0005931578947368421, "loss": 1.4064, "step": 773 }, { "epoch": 2.0328731097961867, "high_lr": 0.0005931578947368421, "low_lr": 1.1863157894736843e-05, "step": 773 }, { "epoch": 2.0328731097961867, "high_lr": 0.0005931578947368421, "low_lr": 1.1863157894736843e-05, "step": 773 }, { "epoch": 2.0328731097961867, "high_lr": 0.0005931578947368421, "low_lr": 1.1863157894736843e-05, "step": 773 }, { "epoch": 2.0328731097961867, "high_lr": 0.0005931578947368421, "low_lr": 1.1863157894736843e-05, "step": 773 }, { "epoch": 2.0328731097961867, "high_lr": 0.0005931578947368421, "low_lr": 1.1863157894736843e-05, "step": 773 }, { "epoch": 2.0328731097961867, "high_lr": 0.0005931578947368421, "low_lr": 1.1863157894736843e-05, "step": 773 }, { "epoch": 2.0328731097961867, "high_lr": 0.0005931578947368421, "low_lr": 1.1863157894736843e-05, "step": 773 }, { "epoch": 2.0328731097961867, "high_lr": 0.0005931578947368421, "low_lr": 1.1863157894736843e-05, "step": 773 }, { "epoch": 2.035502958579882, "grad_norm": 1.1804497241973877, "learning_rate": 0.0005926315789473684, "loss": 1.3843, "step": 774 }, { "epoch": 2.035502958579882, "high_lr": 0.0005926315789473684, "low_lr": 1.1852631578947369e-05, "step": 774 }, { "epoch": 2.035502958579882, "high_lr": 0.0005926315789473684, "low_lr": 1.1852631578947369e-05, "step": 774 }, { "epoch": 2.035502958579882, "high_lr": 0.0005926315789473684, "low_lr": 1.1852631578947369e-05, "step": 774 }, { "epoch": 2.035502958579882, "high_lr": 0.0005926315789473684, "low_lr": 1.1852631578947369e-05, "step": 774 }, { "epoch": 2.035502958579882, "high_lr": 0.0005926315789473684, "low_lr": 1.1852631578947369e-05, "step": 774 }, { "epoch": 2.035502958579882, "high_lr": 0.0005926315789473684, "low_lr": 1.1852631578947369e-05, "step": 774 }, { "epoch": 2.035502958579882, "high_lr": 0.0005926315789473684, "low_lr": 1.1852631578947369e-05, "step": 774 }, { "epoch": 2.035502958579882, "high_lr": 0.0005926315789473684, "low_lr": 1.1852631578947369e-05, "step": 774 }, { "epoch": 2.0381328073635765, "grad_norm": 1.1749237775802612, "learning_rate": 0.0005921052631578947, "loss": 1.4147, "step": 775 }, { "epoch": 2.0381328073635765, "high_lr": 0.0005921052631578947, "low_lr": 1.1842105263157895e-05, "step": 775 }, { "epoch": 2.0381328073635765, "high_lr": 0.0005921052631578947, "low_lr": 1.1842105263157895e-05, "step": 775 }, { "epoch": 2.0381328073635765, "high_lr": 0.0005921052631578947, "low_lr": 1.1842105263157895e-05, "step": 775 }, { "epoch": 2.0381328073635765, "high_lr": 0.0005921052631578947, "low_lr": 1.1842105263157895e-05, "step": 775 }, { "epoch": 2.0381328073635765, "high_lr": 0.0005921052631578947, "low_lr": 1.1842105263157895e-05, "step": 775 }, { "epoch": 2.0381328073635765, "high_lr": 0.0005921052631578947, "low_lr": 1.1842105263157895e-05, "step": 775 }, { "epoch": 2.0381328073635765, "high_lr": 0.0005921052631578947, "low_lr": 1.1842105263157895e-05, "step": 775 }, { "epoch": 2.0381328073635765, "high_lr": 0.0005921052631578947, "low_lr": 1.1842105263157895e-05, "step": 775 }, { "epoch": 2.0407626561472716, "grad_norm": 1.1598745584487915, "learning_rate": 0.0005915789473684211, "loss": 1.384, "step": 776 }, { "epoch": 2.0407626561472716, "high_lr": 0.0005915789473684211, "low_lr": 1.1831578947368423e-05, "step": 776 }, { "epoch": 2.0407626561472716, "high_lr": 0.0005915789473684211, "low_lr": 1.1831578947368423e-05, "step": 776 }, { "epoch": 2.0407626561472716, "high_lr": 0.0005915789473684211, "low_lr": 1.1831578947368423e-05, "step": 776 }, { "epoch": 2.0407626561472716, "high_lr": 0.0005915789473684211, "low_lr": 1.1831578947368423e-05, "step": 776 }, { "epoch": 2.0407626561472716, "high_lr": 0.0005915789473684211, "low_lr": 1.1831578947368423e-05, "step": 776 }, { "epoch": 2.0407626561472716, "high_lr": 0.0005915789473684211, "low_lr": 1.1831578947368423e-05, "step": 776 }, { "epoch": 2.0407626561472716, "high_lr": 0.0005915789473684211, "low_lr": 1.1831578947368423e-05, "step": 776 }, { "epoch": 2.0407626561472716, "high_lr": 0.0005915789473684211, "low_lr": 1.1831578947368423e-05, "step": 776 }, { "epoch": 2.0433925049309662, "grad_norm": 1.0803080797195435, "learning_rate": 0.0005910526315789473, "loss": 1.4139, "step": 777 }, { "epoch": 2.0433925049309662, "high_lr": 0.0005910526315789473, "low_lr": 1.1821052631578948e-05, "step": 777 }, { "epoch": 2.0433925049309662, "high_lr": 0.0005910526315789473, "low_lr": 1.1821052631578948e-05, "step": 777 }, { "epoch": 2.0433925049309662, "high_lr": 0.0005910526315789473, "low_lr": 1.1821052631578948e-05, "step": 777 }, { "epoch": 2.0433925049309662, "high_lr": 0.0005910526315789473, "low_lr": 1.1821052631578948e-05, "step": 777 }, { "epoch": 2.0433925049309662, "high_lr": 0.0005910526315789473, "low_lr": 1.1821052631578948e-05, "step": 777 }, { "epoch": 2.0433925049309662, "high_lr": 0.0005910526315789473, "low_lr": 1.1821052631578948e-05, "step": 777 }, { "epoch": 2.0433925049309662, "high_lr": 0.0005910526315789473, "low_lr": 1.1821052631578948e-05, "step": 777 }, { "epoch": 2.0433925049309662, "high_lr": 0.0005910526315789473, "low_lr": 1.1821052631578948e-05, "step": 777 }, { "epoch": 2.0460223537146613, "grad_norm": 1.0830882787704468, "learning_rate": 0.0005905263157894736, "loss": 1.3873, "step": 778 }, { "epoch": 2.0460223537146613, "high_lr": 0.0005905263157894736, "low_lr": 1.1810526315789474e-05, "step": 778 }, { "epoch": 2.0460223537146613, "high_lr": 0.0005905263157894736, "low_lr": 1.1810526315789474e-05, "step": 778 }, { "epoch": 2.0460223537146613, "high_lr": 0.0005905263157894736, "low_lr": 1.1810526315789474e-05, "step": 778 }, { "epoch": 2.0460223537146613, "high_lr": 0.0005905263157894736, "low_lr": 1.1810526315789474e-05, "step": 778 }, { "epoch": 2.0460223537146613, "high_lr": 0.0005905263157894736, "low_lr": 1.1810526315789474e-05, "step": 778 }, { "epoch": 2.0460223537146613, "high_lr": 0.0005905263157894736, "low_lr": 1.1810526315789474e-05, "step": 778 }, { "epoch": 2.0460223537146613, "high_lr": 0.0005905263157894736, "low_lr": 1.1810526315789474e-05, "step": 778 }, { "epoch": 2.0460223537146613, "high_lr": 0.0005905263157894736, "low_lr": 1.1810526315789474e-05, "step": 778 }, { "epoch": 2.0486522024983564, "grad_norm": 1.1197872161865234, "learning_rate": 0.00059, "loss": 1.4179, "step": 779 }, { "epoch": 2.0486522024983564, "high_lr": 0.00059, "low_lr": 1.18e-05, "step": 779 }, { "epoch": 2.0486522024983564, "high_lr": 0.00059, "low_lr": 1.18e-05, "step": 779 }, { "epoch": 2.0486522024983564, "high_lr": 0.00059, "low_lr": 1.18e-05, "step": 779 }, { "epoch": 2.0486522024983564, "high_lr": 0.00059, "low_lr": 1.18e-05, "step": 779 }, { "epoch": 2.0486522024983564, "high_lr": 0.00059, "low_lr": 1.18e-05, "step": 779 }, { "epoch": 2.0486522024983564, "high_lr": 0.00059, "low_lr": 1.18e-05, "step": 779 }, { "epoch": 2.0486522024983564, "high_lr": 0.00059, "low_lr": 1.18e-05, "step": 779 }, { "epoch": 2.0486522024983564, "high_lr": 0.00059, "low_lr": 1.18e-05, "step": 779 }, { "epoch": 2.051282051282051, "grad_norm": 1.1884441375732422, "learning_rate": 0.0005894736842105263, "loss": 1.402, "step": 780 }, { "epoch": 2.051282051282051, "high_lr": 0.0005894736842105263, "low_lr": 1.1789473684210527e-05, "step": 780 }, { "epoch": 2.051282051282051, "high_lr": 0.0005894736842105263, "low_lr": 1.1789473684210527e-05, "step": 780 }, { "epoch": 2.051282051282051, "high_lr": 0.0005894736842105263, "low_lr": 1.1789473684210527e-05, "step": 780 }, { "epoch": 2.051282051282051, "high_lr": 0.0005894736842105263, "low_lr": 1.1789473684210527e-05, "step": 780 }, { "epoch": 2.051282051282051, "high_lr": 0.0005894736842105263, "low_lr": 1.1789473684210527e-05, "step": 780 }, { "epoch": 2.051282051282051, "high_lr": 0.0005894736842105263, "low_lr": 1.1789473684210527e-05, "step": 780 }, { "epoch": 2.051282051282051, "high_lr": 0.0005894736842105263, "low_lr": 1.1789473684210527e-05, "step": 780 }, { "epoch": 2.051282051282051, "high_lr": 0.0005894736842105263, "low_lr": 1.1789473684210527e-05, "step": 780 }, { "epoch": 2.053911900065746, "grad_norm": 1.2280651330947876, "learning_rate": 0.0005889473684210527, "loss": 1.3927, "step": 781 }, { "epoch": 2.053911900065746, "high_lr": 0.0005889473684210527, "low_lr": 1.1778947368421055e-05, "step": 781 }, { "epoch": 2.053911900065746, "high_lr": 0.0005889473684210527, "low_lr": 1.1778947368421055e-05, "step": 781 }, { "epoch": 2.053911900065746, "high_lr": 0.0005889473684210527, "low_lr": 1.1778947368421055e-05, "step": 781 }, { "epoch": 2.053911900065746, "high_lr": 0.0005889473684210527, "low_lr": 1.1778947368421055e-05, "step": 781 }, { "epoch": 2.053911900065746, "high_lr": 0.0005889473684210527, "low_lr": 1.1778947368421055e-05, "step": 781 }, { "epoch": 2.053911900065746, "high_lr": 0.0005889473684210527, "low_lr": 1.1778947368421055e-05, "step": 781 }, { "epoch": 2.053911900065746, "high_lr": 0.0005889473684210527, "low_lr": 1.1778947368421055e-05, "step": 781 }, { "epoch": 2.053911900065746, "high_lr": 0.0005889473684210527, "low_lr": 1.1778947368421055e-05, "step": 781 }, { "epoch": 2.0565417488494413, "grad_norm": 1.2406994104385376, "learning_rate": 0.000588421052631579, "loss": 1.4305, "step": 782 }, { "epoch": 2.0565417488494413, "high_lr": 0.000588421052631579, "low_lr": 1.176842105263158e-05, "step": 782 }, { "epoch": 2.0565417488494413, "high_lr": 0.000588421052631579, "low_lr": 1.176842105263158e-05, "step": 782 }, { "epoch": 2.0565417488494413, "high_lr": 0.000588421052631579, "low_lr": 1.176842105263158e-05, "step": 782 }, { "epoch": 2.0565417488494413, "high_lr": 0.000588421052631579, "low_lr": 1.176842105263158e-05, "step": 782 }, { "epoch": 2.0565417488494413, "high_lr": 0.000588421052631579, "low_lr": 1.176842105263158e-05, "step": 782 }, { "epoch": 2.0565417488494413, "high_lr": 0.000588421052631579, "low_lr": 1.176842105263158e-05, "step": 782 }, { "epoch": 2.0565417488494413, "high_lr": 0.000588421052631579, "low_lr": 1.176842105263158e-05, "step": 782 }, { "epoch": 2.0565417488494413, "high_lr": 0.000588421052631579, "low_lr": 1.176842105263158e-05, "step": 782 }, { "epoch": 2.059171597633136, "grad_norm": 1.098323941230774, "learning_rate": 0.0005878947368421053, "loss": 1.3512, "step": 783 }, { "epoch": 2.059171597633136, "high_lr": 0.0005878947368421053, "low_lr": 1.1757894736842106e-05, "step": 783 }, { "epoch": 2.059171597633136, "high_lr": 0.0005878947368421053, "low_lr": 1.1757894736842106e-05, "step": 783 }, { "epoch": 2.059171597633136, "high_lr": 0.0005878947368421053, "low_lr": 1.1757894736842106e-05, "step": 783 }, { "epoch": 2.059171597633136, "high_lr": 0.0005878947368421053, "low_lr": 1.1757894736842106e-05, "step": 783 }, { "epoch": 2.059171597633136, "high_lr": 0.0005878947368421053, "low_lr": 1.1757894736842106e-05, "step": 783 }, { "epoch": 2.059171597633136, "high_lr": 0.0005878947368421053, "low_lr": 1.1757894736842106e-05, "step": 783 }, { "epoch": 2.059171597633136, "high_lr": 0.0005878947368421053, "low_lr": 1.1757894736842106e-05, "step": 783 }, { "epoch": 2.059171597633136, "high_lr": 0.0005878947368421053, "low_lr": 1.1757894736842106e-05, "step": 783 }, { "epoch": 2.061801446416831, "grad_norm": 1.1673874855041504, "learning_rate": 0.0005873684210526316, "loss": 1.3542, "step": 784 }, { "epoch": 2.061801446416831, "high_lr": 0.0005873684210526316, "low_lr": 1.1747368421052632e-05, "step": 784 }, { "epoch": 2.061801446416831, "high_lr": 0.0005873684210526316, "low_lr": 1.1747368421052632e-05, "step": 784 }, { "epoch": 2.061801446416831, "high_lr": 0.0005873684210526316, "low_lr": 1.1747368421052632e-05, "step": 784 }, { "epoch": 2.061801446416831, "high_lr": 0.0005873684210526316, "low_lr": 1.1747368421052632e-05, "step": 784 }, { "epoch": 2.061801446416831, "high_lr": 0.0005873684210526316, "low_lr": 1.1747368421052632e-05, "step": 784 }, { "epoch": 2.061801446416831, "high_lr": 0.0005873684210526316, "low_lr": 1.1747368421052632e-05, "step": 784 }, { "epoch": 2.061801446416831, "high_lr": 0.0005873684210526316, "low_lr": 1.1747368421052632e-05, "step": 784 }, { "epoch": 2.061801446416831, "high_lr": 0.0005873684210526316, "low_lr": 1.1747368421052632e-05, "step": 784 }, { "epoch": 2.064431295200526, "grad_norm": 1.362140417098999, "learning_rate": 0.000586842105263158, "loss": 1.3845, "step": 785 }, { "epoch": 2.064431295200526, "high_lr": 0.000586842105263158, "low_lr": 1.173684210526316e-05, "step": 785 }, { "epoch": 2.064431295200526, "high_lr": 0.000586842105263158, "low_lr": 1.173684210526316e-05, "step": 785 }, { "epoch": 2.064431295200526, "high_lr": 0.000586842105263158, "low_lr": 1.173684210526316e-05, "step": 785 }, { "epoch": 2.064431295200526, "high_lr": 0.000586842105263158, "low_lr": 1.173684210526316e-05, "step": 785 }, { "epoch": 2.064431295200526, "high_lr": 0.000586842105263158, "low_lr": 1.173684210526316e-05, "step": 785 }, { "epoch": 2.064431295200526, "high_lr": 0.000586842105263158, "low_lr": 1.173684210526316e-05, "step": 785 }, { "epoch": 2.064431295200526, "high_lr": 0.000586842105263158, "low_lr": 1.173684210526316e-05, "step": 785 }, { "epoch": 2.064431295200526, "high_lr": 0.000586842105263158, "low_lr": 1.173684210526316e-05, "step": 785 }, { "epoch": 2.067061143984221, "grad_norm": 1.187095284461975, "learning_rate": 0.0005863157894736842, "loss": 1.4345, "step": 786 }, { "epoch": 2.067061143984221, "high_lr": 0.0005863157894736842, "low_lr": 1.1726315789473685e-05, "step": 786 }, { "epoch": 2.067061143984221, "high_lr": 0.0005863157894736842, "low_lr": 1.1726315789473685e-05, "step": 786 }, { "epoch": 2.067061143984221, "high_lr": 0.0005863157894736842, "low_lr": 1.1726315789473685e-05, "step": 786 }, { "epoch": 2.067061143984221, "high_lr": 0.0005863157894736842, "low_lr": 1.1726315789473685e-05, "step": 786 }, { "epoch": 2.067061143984221, "high_lr": 0.0005863157894736842, "low_lr": 1.1726315789473685e-05, "step": 786 }, { "epoch": 2.067061143984221, "high_lr": 0.0005863157894736842, "low_lr": 1.1726315789473685e-05, "step": 786 }, { "epoch": 2.067061143984221, "high_lr": 0.0005863157894736842, "low_lr": 1.1726315789473685e-05, "step": 786 }, { "epoch": 2.067061143984221, "high_lr": 0.0005863157894736842, "low_lr": 1.1726315789473685e-05, "step": 786 }, { "epoch": 2.069690992767916, "grad_norm": 1.184488296508789, "learning_rate": 0.0005857894736842105, "loss": 1.4132, "step": 787 }, { "epoch": 2.069690992767916, "high_lr": 0.0005857894736842105, "low_lr": 1.1715789473684211e-05, "step": 787 }, { "epoch": 2.069690992767916, "high_lr": 0.0005857894736842105, "low_lr": 1.1715789473684211e-05, "step": 787 }, { "epoch": 2.069690992767916, "high_lr": 0.0005857894736842105, "low_lr": 1.1715789473684211e-05, "step": 787 }, { "epoch": 2.069690992767916, "high_lr": 0.0005857894736842105, "low_lr": 1.1715789473684211e-05, "step": 787 }, { "epoch": 2.069690992767916, "high_lr": 0.0005857894736842105, "low_lr": 1.1715789473684211e-05, "step": 787 }, { "epoch": 2.069690992767916, "high_lr": 0.0005857894736842105, "low_lr": 1.1715789473684211e-05, "step": 787 }, { "epoch": 2.069690992767916, "high_lr": 0.0005857894736842105, "low_lr": 1.1715789473684211e-05, "step": 787 }, { "epoch": 2.069690992767916, "high_lr": 0.0005857894736842105, "low_lr": 1.1715789473684211e-05, "step": 787 }, { "epoch": 2.072320841551611, "grad_norm": 1.1735727787017822, "learning_rate": 0.0005852631578947368, "loss": 1.3603, "step": 788 }, { "epoch": 2.072320841551611, "high_lr": 0.0005852631578947368, "low_lr": 1.1705263157894737e-05, "step": 788 }, { "epoch": 2.072320841551611, "high_lr": 0.0005852631578947368, "low_lr": 1.1705263157894737e-05, "step": 788 }, { "epoch": 2.072320841551611, "high_lr": 0.0005852631578947368, "low_lr": 1.1705263157894737e-05, "step": 788 }, { "epoch": 2.072320841551611, "high_lr": 0.0005852631578947368, "low_lr": 1.1705263157894737e-05, "step": 788 }, { "epoch": 2.072320841551611, "high_lr": 0.0005852631578947368, "low_lr": 1.1705263157894737e-05, "step": 788 }, { "epoch": 2.072320841551611, "high_lr": 0.0005852631578947368, "low_lr": 1.1705263157894737e-05, "step": 788 }, { "epoch": 2.072320841551611, "high_lr": 0.0005852631578947368, "low_lr": 1.1705263157894737e-05, "step": 788 }, { "epoch": 2.072320841551611, "high_lr": 0.0005852631578947368, "low_lr": 1.1705263157894737e-05, "step": 788 }, { "epoch": 2.0749506903353057, "grad_norm": 1.1374088525772095, "learning_rate": 0.0005847368421052631, "loss": 1.3822, "step": 789 }, { "epoch": 2.0749506903353057, "high_lr": 0.0005847368421052631, "low_lr": 1.1694736842105264e-05, "step": 789 }, { "epoch": 2.0749506903353057, "high_lr": 0.0005847368421052631, "low_lr": 1.1694736842105264e-05, "step": 789 }, { "epoch": 2.0749506903353057, "high_lr": 0.0005847368421052631, "low_lr": 1.1694736842105264e-05, "step": 789 }, { "epoch": 2.0749506903353057, "high_lr": 0.0005847368421052631, "low_lr": 1.1694736842105264e-05, "step": 789 }, { "epoch": 2.0749506903353057, "high_lr": 0.0005847368421052631, "low_lr": 1.1694736842105264e-05, "step": 789 }, { "epoch": 2.0749506903353057, "high_lr": 0.0005847368421052631, "low_lr": 1.1694736842105264e-05, "step": 789 }, { "epoch": 2.0749506903353057, "high_lr": 0.0005847368421052631, "low_lr": 1.1694736842105264e-05, "step": 789 }, { "epoch": 2.0749506903353057, "high_lr": 0.0005847368421052631, "low_lr": 1.1694736842105264e-05, "step": 789 }, { "epoch": 2.0775805391190008, "grad_norm": 1.120962142944336, "learning_rate": 0.0005842105263157895, "loss": 1.3845, "step": 790 }, { "epoch": 2.0775805391190008, "high_lr": 0.0005842105263157895, "low_lr": 1.1684210526315792e-05, "step": 790 }, { "epoch": 2.0775805391190008, "high_lr": 0.0005842105263157895, "low_lr": 1.1684210526315792e-05, "step": 790 }, { "epoch": 2.0775805391190008, "high_lr": 0.0005842105263157895, "low_lr": 1.1684210526315792e-05, "step": 790 }, { "epoch": 2.0775805391190008, "high_lr": 0.0005842105263157895, "low_lr": 1.1684210526315792e-05, "step": 790 }, { "epoch": 2.0775805391190008, "high_lr": 0.0005842105263157895, "low_lr": 1.1684210526315792e-05, "step": 790 }, { "epoch": 2.0775805391190008, "high_lr": 0.0005842105263157895, "low_lr": 1.1684210526315792e-05, "step": 790 }, { "epoch": 2.0775805391190008, "high_lr": 0.0005842105263157895, "low_lr": 1.1684210526315792e-05, "step": 790 }, { "epoch": 2.0775805391190008, "high_lr": 0.0005842105263157895, "low_lr": 1.1684210526315792e-05, "step": 790 }, { "epoch": 2.0802103879026954, "grad_norm": 1.1211026906967163, "learning_rate": 0.0005836842105263158, "loss": 1.3344, "step": 791 }, { "epoch": 2.0802103879026954, "high_lr": 0.0005836842105263158, "low_lr": 1.1673684210526316e-05, "step": 791 }, { "epoch": 2.0802103879026954, "high_lr": 0.0005836842105263158, "low_lr": 1.1673684210526316e-05, "step": 791 }, { "epoch": 2.0802103879026954, "high_lr": 0.0005836842105263158, "low_lr": 1.1673684210526316e-05, "step": 791 }, { "epoch": 2.0802103879026954, "high_lr": 0.0005836842105263158, "low_lr": 1.1673684210526316e-05, "step": 791 }, { "epoch": 2.0802103879026954, "high_lr": 0.0005836842105263158, "low_lr": 1.1673684210526316e-05, "step": 791 }, { "epoch": 2.0802103879026954, "high_lr": 0.0005836842105263158, "low_lr": 1.1673684210526316e-05, "step": 791 }, { "epoch": 2.0802103879026954, "high_lr": 0.0005836842105263158, "low_lr": 1.1673684210526316e-05, "step": 791 }, { "epoch": 2.0802103879026954, "high_lr": 0.0005836842105263158, "low_lr": 1.1673684210526316e-05, "step": 791 }, { "epoch": 2.0828402366863905, "grad_norm": 1.1464637517929077, "learning_rate": 0.0005831578947368421, "loss": 1.4136, "step": 792 }, { "epoch": 2.0828402366863905, "high_lr": 0.0005831578947368421, "low_lr": 1.1663157894736843e-05, "step": 792 }, { "epoch": 2.0828402366863905, "high_lr": 0.0005831578947368421, "low_lr": 1.1663157894736843e-05, "step": 792 }, { "epoch": 2.0828402366863905, "high_lr": 0.0005831578947368421, "low_lr": 1.1663157894736843e-05, "step": 792 }, { "epoch": 2.0828402366863905, "high_lr": 0.0005831578947368421, "low_lr": 1.1663157894736843e-05, "step": 792 }, { "epoch": 2.0828402366863905, "high_lr": 0.0005831578947368421, "low_lr": 1.1663157894736843e-05, "step": 792 }, { "epoch": 2.0828402366863905, "high_lr": 0.0005831578947368421, "low_lr": 1.1663157894736843e-05, "step": 792 }, { "epoch": 2.0828402366863905, "high_lr": 0.0005831578947368421, "low_lr": 1.1663157894736843e-05, "step": 792 }, { "epoch": 2.0828402366863905, "high_lr": 0.0005831578947368421, "low_lr": 1.1663157894736843e-05, "step": 792 }, { "epoch": 2.0854700854700856, "grad_norm": 1.169979214668274, "learning_rate": 0.0005826315789473684, "loss": 1.4088, "step": 793 }, { "epoch": 2.0854700854700856, "high_lr": 0.0005826315789473684, "low_lr": 1.1652631578947369e-05, "step": 793 }, { "epoch": 2.0854700854700856, "high_lr": 0.0005826315789473684, "low_lr": 1.1652631578947369e-05, "step": 793 }, { "epoch": 2.0854700854700856, "high_lr": 0.0005826315789473684, "low_lr": 1.1652631578947369e-05, "step": 793 }, { "epoch": 2.0854700854700856, "high_lr": 0.0005826315789473684, "low_lr": 1.1652631578947369e-05, "step": 793 }, { "epoch": 2.0854700854700856, "high_lr": 0.0005826315789473684, "low_lr": 1.1652631578947369e-05, "step": 793 }, { "epoch": 2.0854700854700856, "high_lr": 0.0005826315789473684, "low_lr": 1.1652631578947369e-05, "step": 793 }, { "epoch": 2.0854700854700856, "high_lr": 0.0005826315789473684, "low_lr": 1.1652631578947369e-05, "step": 793 }, { "epoch": 2.0854700854700856, "high_lr": 0.0005826315789473684, "low_lr": 1.1652631578947369e-05, "step": 793 }, { "epoch": 2.0880999342537803, "grad_norm": 1.22758150100708, "learning_rate": 0.0005821052631578948, "loss": 1.4294, "step": 794 }, { "epoch": 2.0880999342537803, "high_lr": 0.0005821052631578948, "low_lr": 1.1642105263157897e-05, "step": 794 }, { "epoch": 2.0880999342537803, "high_lr": 0.0005821052631578948, "low_lr": 1.1642105263157897e-05, "step": 794 }, { "epoch": 2.0880999342537803, "high_lr": 0.0005821052631578948, "low_lr": 1.1642105263157897e-05, "step": 794 }, { "epoch": 2.0880999342537803, "high_lr": 0.0005821052631578948, "low_lr": 1.1642105263157897e-05, "step": 794 }, { "epoch": 2.0880999342537803, "high_lr": 0.0005821052631578948, "low_lr": 1.1642105263157897e-05, "step": 794 }, { "epoch": 2.0880999342537803, "high_lr": 0.0005821052631578948, "low_lr": 1.1642105263157897e-05, "step": 794 }, { "epoch": 2.0880999342537803, "high_lr": 0.0005821052631578948, "low_lr": 1.1642105263157897e-05, "step": 794 }, { "epoch": 2.0880999342537803, "high_lr": 0.0005821052631578948, "low_lr": 1.1642105263157897e-05, "step": 794 }, { "epoch": 2.0907297830374754, "grad_norm": 1.3216019868850708, "learning_rate": 0.0005815789473684211, "loss": 1.4271, "step": 795 }, { "epoch": 2.0907297830374754, "high_lr": 0.0005815789473684211, "low_lr": 1.1631578947368423e-05, "step": 795 }, { "epoch": 2.0907297830374754, "high_lr": 0.0005815789473684211, "low_lr": 1.1631578947368423e-05, "step": 795 }, { "epoch": 2.0907297830374754, "high_lr": 0.0005815789473684211, "low_lr": 1.1631578947368423e-05, "step": 795 }, { "epoch": 2.0907297830374754, "high_lr": 0.0005815789473684211, "low_lr": 1.1631578947368423e-05, "step": 795 }, { "epoch": 2.0907297830374754, "high_lr": 0.0005815789473684211, "low_lr": 1.1631578947368423e-05, "step": 795 }, { "epoch": 2.0907297830374754, "high_lr": 0.0005815789473684211, "low_lr": 1.1631578947368423e-05, "step": 795 }, { "epoch": 2.0907297830374754, "high_lr": 0.0005815789473684211, "low_lr": 1.1631578947368423e-05, "step": 795 }, { "epoch": 2.0907297830374754, "high_lr": 0.0005815789473684211, "low_lr": 1.1631578947368423e-05, "step": 795 }, { "epoch": 2.0933596318211705, "grad_norm": 1.0885225534439087, "learning_rate": 0.0005810526315789474, "loss": 1.3856, "step": 796 }, { "epoch": 2.0933596318211705, "high_lr": 0.0005810526315789474, "low_lr": 1.1621052631578948e-05, "step": 796 }, { "epoch": 2.0933596318211705, "high_lr": 0.0005810526315789474, "low_lr": 1.1621052631578948e-05, "step": 796 }, { "epoch": 2.0933596318211705, "high_lr": 0.0005810526315789474, "low_lr": 1.1621052631578948e-05, "step": 796 }, { "epoch": 2.0933596318211705, "high_lr": 0.0005810526315789474, "low_lr": 1.1621052631578948e-05, "step": 796 }, { "epoch": 2.0933596318211705, "high_lr": 0.0005810526315789474, "low_lr": 1.1621052631578948e-05, "step": 796 }, { "epoch": 2.0933596318211705, "high_lr": 0.0005810526315789474, "low_lr": 1.1621052631578948e-05, "step": 796 }, { "epoch": 2.0933596318211705, "high_lr": 0.0005810526315789474, "low_lr": 1.1621052631578948e-05, "step": 796 }, { "epoch": 2.0933596318211705, "high_lr": 0.0005810526315789474, "low_lr": 1.1621052631578948e-05, "step": 796 }, { "epoch": 2.095989480604865, "grad_norm": 1.1253178119659424, "learning_rate": 0.0005805263157894737, "loss": 1.4179, "step": 797 }, { "epoch": 2.095989480604865, "high_lr": 0.0005805263157894737, "low_lr": 1.1610526315789474e-05, "step": 797 }, { "epoch": 2.095989480604865, "high_lr": 0.0005805263157894737, "low_lr": 1.1610526315789474e-05, "step": 797 }, { "epoch": 2.095989480604865, "high_lr": 0.0005805263157894737, "low_lr": 1.1610526315789474e-05, "step": 797 }, { "epoch": 2.095989480604865, "high_lr": 0.0005805263157894737, "low_lr": 1.1610526315789474e-05, "step": 797 }, { "epoch": 2.095989480604865, "high_lr": 0.0005805263157894737, "low_lr": 1.1610526315789474e-05, "step": 797 }, { "epoch": 2.095989480604865, "high_lr": 0.0005805263157894737, "low_lr": 1.1610526315789474e-05, "step": 797 }, { "epoch": 2.095989480604865, "high_lr": 0.0005805263157894737, "low_lr": 1.1610526315789474e-05, "step": 797 }, { "epoch": 2.095989480604865, "high_lr": 0.0005805263157894737, "low_lr": 1.1610526315789474e-05, "step": 797 }, { "epoch": 2.09861932938856, "grad_norm": 1.229320764541626, "learning_rate": 0.00058, "loss": 1.4089, "step": 798 }, { "epoch": 2.09861932938856, "high_lr": 0.00058, "low_lr": 1.16e-05, "step": 798 }, { "epoch": 2.09861932938856, "high_lr": 0.00058, "low_lr": 1.16e-05, "step": 798 }, { "epoch": 2.09861932938856, "high_lr": 0.00058, "low_lr": 1.16e-05, "step": 798 }, { "epoch": 2.09861932938856, "high_lr": 0.00058, "low_lr": 1.16e-05, "step": 798 }, { "epoch": 2.09861932938856, "high_lr": 0.00058, "low_lr": 1.16e-05, "step": 798 }, { "epoch": 2.09861932938856, "high_lr": 0.00058, "low_lr": 1.16e-05, "step": 798 }, { "epoch": 2.09861932938856, "high_lr": 0.00058, "low_lr": 1.16e-05, "step": 798 }, { "epoch": 2.09861932938856, "high_lr": 0.00058, "low_lr": 1.16e-05, "step": 798 }, { "epoch": 2.101249178172255, "grad_norm": 1.207044005393982, "learning_rate": 0.0005794736842105264, "loss": 1.4144, "step": 799 }, { "epoch": 2.101249178172255, "high_lr": 0.0005794736842105264, "low_lr": 1.1589473684210529e-05, "step": 799 }, { "epoch": 2.101249178172255, "high_lr": 0.0005794736842105264, "low_lr": 1.1589473684210529e-05, "step": 799 }, { "epoch": 2.101249178172255, "high_lr": 0.0005794736842105264, "low_lr": 1.1589473684210529e-05, "step": 799 }, { "epoch": 2.101249178172255, "high_lr": 0.0005794736842105264, "low_lr": 1.1589473684210529e-05, "step": 799 }, { "epoch": 2.101249178172255, "high_lr": 0.0005794736842105264, "low_lr": 1.1589473684210529e-05, "step": 799 }, { "epoch": 2.101249178172255, "high_lr": 0.0005794736842105264, "low_lr": 1.1589473684210529e-05, "step": 799 }, { "epoch": 2.101249178172255, "high_lr": 0.0005794736842105264, "low_lr": 1.1589473684210529e-05, "step": 799 }, { "epoch": 2.101249178172255, "high_lr": 0.0005794736842105264, "low_lr": 1.1589473684210529e-05, "step": 799 }, { "epoch": 2.10387902695595, "grad_norm": 1.188481092453003, "learning_rate": 0.0005789473684210527, "loss": 1.3837, "step": 800 }, { "epoch": 2.10387902695595, "high_lr": 0.0005789473684210527, "low_lr": 1.1578947368421053e-05, "step": 800 }, { "epoch": 2.10387902695595, "high_lr": 0.0005789473684210527, "low_lr": 1.1578947368421053e-05, "step": 800 }, { "epoch": 2.10387902695595, "high_lr": 0.0005789473684210527, "low_lr": 1.1578947368421053e-05, "step": 800 }, { "epoch": 2.10387902695595, "high_lr": 0.0005789473684210527, "low_lr": 1.1578947368421053e-05, "step": 800 }, { "epoch": 2.10387902695595, "high_lr": 0.0005789473684210527, "low_lr": 1.1578947368421053e-05, "step": 800 }, { "epoch": 2.10387902695595, "high_lr": 0.0005789473684210527, "low_lr": 1.1578947368421053e-05, "step": 800 }, { "epoch": 2.10387902695595, "high_lr": 0.0005789473684210527, "low_lr": 1.1578947368421053e-05, "step": 800 }, { "epoch": 2.10387902695595, "high_lr": 0.0005789473684210527, "low_lr": 1.1578947368421053e-05, "step": 800 }, { "epoch": 2.106508875739645, "grad_norm": 1.1202236413955688, "learning_rate": 0.000578421052631579, "loss": 1.3754, "step": 801 }, { "epoch": 2.106508875739645, "high_lr": 0.000578421052631579, "low_lr": 1.156842105263158e-05, "step": 801 }, { "epoch": 2.106508875739645, "high_lr": 0.000578421052631579, "low_lr": 1.156842105263158e-05, "step": 801 }, { "epoch": 2.106508875739645, "high_lr": 0.000578421052631579, "low_lr": 1.156842105263158e-05, "step": 801 }, { "epoch": 2.106508875739645, "high_lr": 0.000578421052631579, "low_lr": 1.156842105263158e-05, "step": 801 }, { "epoch": 2.106508875739645, "high_lr": 0.000578421052631579, "low_lr": 1.156842105263158e-05, "step": 801 }, { "epoch": 2.106508875739645, "high_lr": 0.000578421052631579, "low_lr": 1.156842105263158e-05, "step": 801 }, { "epoch": 2.106508875739645, "high_lr": 0.000578421052631579, "low_lr": 1.156842105263158e-05, "step": 801 }, { "epoch": 2.106508875739645, "high_lr": 0.000578421052631579, "low_lr": 1.156842105263158e-05, "step": 801 }, { "epoch": 2.1091387245233397, "grad_norm": 1.2411617040634155, "learning_rate": 0.0005778947368421053, "loss": 1.3105, "step": 802 }, { "epoch": 2.1091387245233397, "high_lr": 0.0005778947368421053, "low_lr": 1.1557894736842106e-05, "step": 802 }, { "epoch": 2.1091387245233397, "high_lr": 0.0005778947368421053, "low_lr": 1.1557894736842106e-05, "step": 802 }, { "epoch": 2.1091387245233397, "high_lr": 0.0005778947368421053, "low_lr": 1.1557894736842106e-05, "step": 802 }, { "epoch": 2.1091387245233397, "high_lr": 0.0005778947368421053, "low_lr": 1.1557894736842106e-05, "step": 802 }, { "epoch": 2.1091387245233397, "high_lr": 0.0005778947368421053, "low_lr": 1.1557894736842106e-05, "step": 802 }, { "epoch": 2.1091387245233397, "high_lr": 0.0005778947368421053, "low_lr": 1.1557894736842106e-05, "step": 802 }, { "epoch": 2.1091387245233397, "high_lr": 0.0005778947368421053, "low_lr": 1.1557894736842106e-05, "step": 802 }, { "epoch": 2.1091387245233397, "high_lr": 0.0005778947368421053, "low_lr": 1.1557894736842106e-05, "step": 802 }, { "epoch": 2.111768573307035, "grad_norm": 1.1825741529464722, "learning_rate": 0.0005773684210526315, "loss": 1.3767, "step": 803 }, { "epoch": 2.111768573307035, "high_lr": 0.0005773684210526315, "low_lr": 1.1547368421052632e-05, "step": 803 }, { "epoch": 2.111768573307035, "high_lr": 0.0005773684210526315, "low_lr": 1.1547368421052632e-05, "step": 803 }, { "epoch": 2.111768573307035, "high_lr": 0.0005773684210526315, "low_lr": 1.1547368421052632e-05, "step": 803 }, { "epoch": 2.111768573307035, "high_lr": 0.0005773684210526315, "low_lr": 1.1547368421052632e-05, "step": 803 }, { "epoch": 2.111768573307035, "high_lr": 0.0005773684210526315, "low_lr": 1.1547368421052632e-05, "step": 803 }, { "epoch": 2.111768573307035, "high_lr": 0.0005773684210526315, "low_lr": 1.1547368421052632e-05, "step": 803 }, { "epoch": 2.111768573307035, "high_lr": 0.0005773684210526315, "low_lr": 1.1547368421052632e-05, "step": 803 }, { "epoch": 2.111768573307035, "high_lr": 0.0005773684210526315, "low_lr": 1.1547368421052632e-05, "step": 803 }, { "epoch": 2.11439842209073, "grad_norm": 1.1901229619979858, "learning_rate": 0.0005768421052631579, "loss": 1.4043, "step": 804 }, { "epoch": 2.11439842209073, "high_lr": 0.0005768421052631579, "low_lr": 1.153684210526316e-05, "step": 804 }, { "epoch": 2.11439842209073, "high_lr": 0.0005768421052631579, "low_lr": 1.153684210526316e-05, "step": 804 }, { "epoch": 2.11439842209073, "high_lr": 0.0005768421052631579, "low_lr": 1.153684210526316e-05, "step": 804 }, { "epoch": 2.11439842209073, "high_lr": 0.0005768421052631579, "low_lr": 1.153684210526316e-05, "step": 804 }, { "epoch": 2.11439842209073, "high_lr": 0.0005768421052631579, "low_lr": 1.153684210526316e-05, "step": 804 }, { "epoch": 2.11439842209073, "high_lr": 0.0005768421052631579, "low_lr": 1.153684210526316e-05, "step": 804 }, { "epoch": 2.11439842209073, "high_lr": 0.0005768421052631579, "low_lr": 1.153684210526316e-05, "step": 804 }, { "epoch": 2.11439842209073, "high_lr": 0.0005768421052631579, "low_lr": 1.153684210526316e-05, "step": 804 }, { "epoch": 2.1170282708744246, "grad_norm": 1.2637526988983154, "learning_rate": 0.0005763157894736842, "loss": 1.4499, "step": 805 }, { "epoch": 2.1170282708744246, "high_lr": 0.0005763157894736842, "low_lr": 1.1526315789473685e-05, "step": 805 }, { "epoch": 2.1170282708744246, "high_lr": 0.0005763157894736842, "low_lr": 1.1526315789473685e-05, "step": 805 }, { "epoch": 2.1170282708744246, "high_lr": 0.0005763157894736842, "low_lr": 1.1526315789473685e-05, "step": 805 }, { "epoch": 2.1170282708744246, "high_lr": 0.0005763157894736842, "low_lr": 1.1526315789473685e-05, "step": 805 }, { "epoch": 2.1170282708744246, "high_lr": 0.0005763157894736842, "low_lr": 1.1526315789473685e-05, "step": 805 }, { "epoch": 2.1170282708744246, "high_lr": 0.0005763157894736842, "low_lr": 1.1526315789473685e-05, "step": 805 }, { "epoch": 2.1170282708744246, "high_lr": 0.0005763157894736842, "low_lr": 1.1526315789473685e-05, "step": 805 }, { "epoch": 2.1170282708744246, "high_lr": 0.0005763157894736842, "low_lr": 1.1526315789473685e-05, "step": 805 }, { "epoch": 2.1196581196581197, "grad_norm": 1.1765457391738892, "learning_rate": 0.0005757894736842105, "loss": 1.365, "step": 806 }, { "epoch": 2.1196581196581197, "high_lr": 0.0005757894736842105, "low_lr": 1.1515789473684211e-05, "step": 806 }, { "epoch": 2.1196581196581197, "high_lr": 0.0005757894736842105, "low_lr": 1.1515789473684211e-05, "step": 806 }, { "epoch": 2.1196581196581197, "high_lr": 0.0005757894736842105, "low_lr": 1.1515789473684211e-05, "step": 806 }, { "epoch": 2.1196581196581197, "high_lr": 0.0005757894736842105, "low_lr": 1.1515789473684211e-05, "step": 806 }, { "epoch": 2.1196581196581197, "high_lr": 0.0005757894736842105, "low_lr": 1.1515789473684211e-05, "step": 806 }, { "epoch": 2.1196581196581197, "high_lr": 0.0005757894736842105, "low_lr": 1.1515789473684211e-05, "step": 806 }, { "epoch": 2.1196581196581197, "high_lr": 0.0005757894736842105, "low_lr": 1.1515789473684211e-05, "step": 806 }, { "epoch": 2.1196581196581197, "high_lr": 0.0005757894736842105, "low_lr": 1.1515789473684211e-05, "step": 806 }, { "epoch": 2.1222879684418148, "grad_norm": 1.150899887084961, "learning_rate": 0.0005752631578947368, "loss": 1.3587, "step": 807 }, { "epoch": 2.1222879684418148, "high_lr": 0.0005752631578947368, "low_lr": 1.1505263157894738e-05, "step": 807 }, { "epoch": 2.1222879684418148, "high_lr": 0.0005752631578947368, "low_lr": 1.1505263157894738e-05, "step": 807 }, { "epoch": 2.1222879684418148, "high_lr": 0.0005752631578947368, "low_lr": 1.1505263157894738e-05, "step": 807 }, { "epoch": 2.1222879684418148, "high_lr": 0.0005752631578947368, "low_lr": 1.1505263157894738e-05, "step": 807 }, { "epoch": 2.1222879684418148, "high_lr": 0.0005752631578947368, "low_lr": 1.1505263157894738e-05, "step": 807 }, { "epoch": 2.1222879684418148, "high_lr": 0.0005752631578947368, "low_lr": 1.1505263157894738e-05, "step": 807 }, { "epoch": 2.1222879684418148, "high_lr": 0.0005752631578947368, "low_lr": 1.1505263157894738e-05, "step": 807 }, { "epoch": 2.1222879684418148, "high_lr": 0.0005752631578947368, "low_lr": 1.1505263157894738e-05, "step": 807 }, { "epoch": 2.1249178172255094, "grad_norm": 1.220388412475586, "learning_rate": 0.0005747368421052632, "loss": 1.3871, "step": 808 }, { "epoch": 2.1249178172255094, "high_lr": 0.0005747368421052632, "low_lr": 1.1494736842105266e-05, "step": 808 }, { "epoch": 2.1249178172255094, "high_lr": 0.0005747368421052632, "low_lr": 1.1494736842105266e-05, "step": 808 }, { "epoch": 2.1249178172255094, "high_lr": 0.0005747368421052632, "low_lr": 1.1494736842105266e-05, "step": 808 }, { "epoch": 2.1249178172255094, "high_lr": 0.0005747368421052632, "low_lr": 1.1494736842105266e-05, "step": 808 }, { "epoch": 2.1249178172255094, "high_lr": 0.0005747368421052632, "low_lr": 1.1494736842105266e-05, "step": 808 }, { "epoch": 2.1249178172255094, "high_lr": 0.0005747368421052632, "low_lr": 1.1494736842105266e-05, "step": 808 }, { "epoch": 2.1249178172255094, "high_lr": 0.0005747368421052632, "low_lr": 1.1494736842105266e-05, "step": 808 }, { "epoch": 2.1249178172255094, "high_lr": 0.0005747368421052632, "low_lr": 1.1494736842105266e-05, "step": 808 }, { "epoch": 2.1275476660092045, "grad_norm": 1.2202270030975342, "learning_rate": 0.0005742105263157895, "loss": 1.3888, "step": 809 }, { "epoch": 2.1275476660092045, "high_lr": 0.0005742105263157895, "low_lr": 1.148421052631579e-05, "step": 809 }, { "epoch": 2.1275476660092045, "high_lr": 0.0005742105263157895, "low_lr": 1.148421052631579e-05, "step": 809 }, { "epoch": 2.1275476660092045, "high_lr": 0.0005742105263157895, "low_lr": 1.148421052631579e-05, "step": 809 }, { "epoch": 2.1275476660092045, "high_lr": 0.0005742105263157895, "low_lr": 1.148421052631579e-05, "step": 809 }, { "epoch": 2.1275476660092045, "high_lr": 0.0005742105263157895, "low_lr": 1.148421052631579e-05, "step": 809 }, { "epoch": 2.1275476660092045, "high_lr": 0.0005742105263157895, "low_lr": 1.148421052631579e-05, "step": 809 }, { "epoch": 2.1275476660092045, "high_lr": 0.0005742105263157895, "low_lr": 1.148421052631579e-05, "step": 809 }, { "epoch": 2.1275476660092045, "high_lr": 0.0005742105263157895, "low_lr": 1.148421052631579e-05, "step": 809 }, { "epoch": 2.1301775147928996, "grad_norm": 1.2154358625411987, "learning_rate": 0.0005736842105263158, "loss": 1.3709, "step": 810 }, { "epoch": 2.1301775147928996, "high_lr": 0.0005736842105263158, "low_lr": 1.1473684210526317e-05, "step": 810 }, { "epoch": 2.1301775147928996, "high_lr": 0.0005736842105263158, "low_lr": 1.1473684210526317e-05, "step": 810 }, { "epoch": 2.1301775147928996, "high_lr": 0.0005736842105263158, "low_lr": 1.1473684210526317e-05, "step": 810 }, { "epoch": 2.1301775147928996, "high_lr": 0.0005736842105263158, "low_lr": 1.1473684210526317e-05, "step": 810 }, { "epoch": 2.1301775147928996, "high_lr": 0.0005736842105263158, "low_lr": 1.1473684210526317e-05, "step": 810 }, { "epoch": 2.1301775147928996, "high_lr": 0.0005736842105263158, "low_lr": 1.1473684210526317e-05, "step": 810 }, { "epoch": 2.1301775147928996, "high_lr": 0.0005736842105263158, "low_lr": 1.1473684210526317e-05, "step": 810 }, { "epoch": 2.1301775147928996, "high_lr": 0.0005736842105263158, "low_lr": 1.1473684210526317e-05, "step": 810 }, { "epoch": 2.1328073635765943, "grad_norm": 1.2578213214874268, "learning_rate": 0.0005731578947368422, "loss": 1.4057, "step": 811 }, { "epoch": 2.1328073635765943, "high_lr": 0.0005731578947368422, "low_lr": 1.1463157894736843e-05, "step": 811 }, { "epoch": 2.1328073635765943, "high_lr": 0.0005731578947368422, "low_lr": 1.1463157894736843e-05, "step": 811 }, { "epoch": 2.1328073635765943, "high_lr": 0.0005731578947368422, "low_lr": 1.1463157894736843e-05, "step": 811 }, { "epoch": 2.1328073635765943, "high_lr": 0.0005731578947368422, "low_lr": 1.1463157894736843e-05, "step": 811 }, { "epoch": 2.1328073635765943, "high_lr": 0.0005731578947368422, "low_lr": 1.1463157894736843e-05, "step": 811 }, { "epoch": 2.1328073635765943, "high_lr": 0.0005731578947368422, "low_lr": 1.1463157894736843e-05, "step": 811 }, { "epoch": 2.1328073635765943, "high_lr": 0.0005731578947368422, "low_lr": 1.1463157894736843e-05, "step": 811 }, { "epoch": 2.1328073635765943, "high_lr": 0.0005731578947368422, "low_lr": 1.1463157894736843e-05, "step": 811 }, { "epoch": 2.1354372123602894, "grad_norm": 1.2131134271621704, "learning_rate": 0.0005726315789473684, "loss": 1.3604, "step": 812 }, { "epoch": 2.1354372123602894, "high_lr": 0.0005726315789473684, "low_lr": 1.145263157894737e-05, "step": 812 }, { "epoch": 2.1354372123602894, "high_lr": 0.0005726315789473684, "low_lr": 1.145263157894737e-05, "step": 812 }, { "epoch": 2.1354372123602894, "high_lr": 0.0005726315789473684, "low_lr": 1.145263157894737e-05, "step": 812 }, { "epoch": 2.1354372123602894, "high_lr": 0.0005726315789473684, "low_lr": 1.145263157894737e-05, "step": 812 }, { "epoch": 2.1354372123602894, "high_lr": 0.0005726315789473684, "low_lr": 1.145263157894737e-05, "step": 812 }, { "epoch": 2.1354372123602894, "high_lr": 0.0005726315789473684, "low_lr": 1.145263157894737e-05, "step": 812 }, { "epoch": 2.1354372123602894, "high_lr": 0.0005726315789473684, "low_lr": 1.145263157894737e-05, "step": 812 }, { "epoch": 2.1354372123602894, "high_lr": 0.0005726315789473684, "low_lr": 1.145263157894737e-05, "step": 812 }, { "epoch": 2.138067061143984, "grad_norm": 1.1633799076080322, "learning_rate": 0.0005721052631578948, "loss": 1.3531, "step": 813 }, { "epoch": 2.138067061143984, "high_lr": 0.0005721052631578948, "low_lr": 1.1442105263157897e-05, "step": 813 }, { "epoch": 2.138067061143984, "high_lr": 0.0005721052631578948, "low_lr": 1.1442105263157897e-05, "step": 813 }, { "epoch": 2.138067061143984, "high_lr": 0.0005721052631578948, "low_lr": 1.1442105263157897e-05, "step": 813 }, { "epoch": 2.138067061143984, "high_lr": 0.0005721052631578948, "low_lr": 1.1442105263157897e-05, "step": 813 }, { "epoch": 2.138067061143984, "high_lr": 0.0005721052631578948, "low_lr": 1.1442105263157897e-05, "step": 813 }, { "epoch": 2.138067061143984, "high_lr": 0.0005721052631578948, "low_lr": 1.1442105263157897e-05, "step": 813 }, { "epoch": 2.138067061143984, "high_lr": 0.0005721052631578948, "low_lr": 1.1442105263157897e-05, "step": 813 }, { "epoch": 2.138067061143984, "high_lr": 0.0005721052631578948, "low_lr": 1.1442105263157897e-05, "step": 813 }, { "epoch": 2.140696909927679, "grad_norm": 1.1977272033691406, "learning_rate": 0.0005715789473684211, "loss": 1.3672, "step": 814 }, { "epoch": 2.140696909927679, "high_lr": 0.0005715789473684211, "low_lr": 1.1431578947368422e-05, "step": 814 }, { "epoch": 2.140696909927679, "high_lr": 0.0005715789473684211, "low_lr": 1.1431578947368422e-05, "step": 814 }, { "epoch": 2.140696909927679, "high_lr": 0.0005715789473684211, "low_lr": 1.1431578947368422e-05, "step": 814 }, { "epoch": 2.140696909927679, "high_lr": 0.0005715789473684211, "low_lr": 1.1431578947368422e-05, "step": 814 }, { "epoch": 2.140696909927679, "high_lr": 0.0005715789473684211, "low_lr": 1.1431578947368422e-05, "step": 814 }, { "epoch": 2.140696909927679, "high_lr": 0.0005715789473684211, "low_lr": 1.1431578947368422e-05, "step": 814 }, { "epoch": 2.140696909927679, "high_lr": 0.0005715789473684211, "low_lr": 1.1431578947368422e-05, "step": 814 }, { "epoch": 2.140696909927679, "high_lr": 0.0005715789473684211, "low_lr": 1.1431578947368422e-05, "step": 814 }, { "epoch": 2.1433267587113742, "grad_norm": 1.2198271751403809, "learning_rate": 0.0005710526315789474, "loss": 1.3856, "step": 815 }, { "epoch": 2.1433267587113742, "high_lr": 0.0005710526315789474, "low_lr": 1.1421052631578948e-05, "step": 815 }, { "epoch": 2.1433267587113742, "high_lr": 0.0005710526315789474, "low_lr": 1.1421052631578948e-05, "step": 815 }, { "epoch": 2.1433267587113742, "high_lr": 0.0005710526315789474, "low_lr": 1.1421052631578948e-05, "step": 815 }, { "epoch": 2.1433267587113742, "high_lr": 0.0005710526315789474, "low_lr": 1.1421052631578948e-05, "step": 815 }, { "epoch": 2.1433267587113742, "high_lr": 0.0005710526315789474, "low_lr": 1.1421052631578948e-05, "step": 815 }, { "epoch": 2.1433267587113742, "high_lr": 0.0005710526315789474, "low_lr": 1.1421052631578948e-05, "step": 815 }, { "epoch": 2.1433267587113742, "high_lr": 0.0005710526315789474, "low_lr": 1.1421052631578948e-05, "step": 815 }, { "epoch": 2.1433267587113742, "high_lr": 0.0005710526315789474, "low_lr": 1.1421052631578948e-05, "step": 815 }, { "epoch": 2.145956607495069, "grad_norm": 1.2254588603973389, "learning_rate": 0.0005705263157894737, "loss": 1.3596, "step": 816 }, { "epoch": 2.145956607495069, "high_lr": 0.0005705263157894737, "low_lr": 1.1410526315789475e-05, "step": 816 }, { "epoch": 2.145956607495069, "high_lr": 0.0005705263157894737, "low_lr": 1.1410526315789475e-05, "step": 816 }, { "epoch": 2.145956607495069, "high_lr": 0.0005705263157894737, "low_lr": 1.1410526315789475e-05, "step": 816 }, { "epoch": 2.145956607495069, "high_lr": 0.0005705263157894737, "low_lr": 1.1410526315789475e-05, "step": 816 }, { "epoch": 2.145956607495069, "high_lr": 0.0005705263157894737, "low_lr": 1.1410526315789475e-05, "step": 816 }, { "epoch": 2.145956607495069, "high_lr": 0.0005705263157894737, "low_lr": 1.1410526315789475e-05, "step": 816 }, { "epoch": 2.145956607495069, "high_lr": 0.0005705263157894737, "low_lr": 1.1410526315789475e-05, "step": 816 }, { "epoch": 2.145956607495069, "high_lr": 0.0005705263157894737, "low_lr": 1.1410526315789475e-05, "step": 816 }, { "epoch": 2.148586456278764, "grad_norm": 1.173850417137146, "learning_rate": 0.00057, "loss": 1.4394, "step": 817 }, { "epoch": 2.148586456278764, "high_lr": 0.00057, "low_lr": 1.14e-05, "step": 817 }, { "epoch": 2.148586456278764, "high_lr": 0.00057, "low_lr": 1.14e-05, "step": 817 }, { "epoch": 2.148586456278764, "high_lr": 0.00057, "low_lr": 1.14e-05, "step": 817 }, { "epoch": 2.148586456278764, "high_lr": 0.00057, "low_lr": 1.14e-05, "step": 817 }, { "epoch": 2.148586456278764, "high_lr": 0.00057, "low_lr": 1.14e-05, "step": 817 }, { "epoch": 2.148586456278764, "high_lr": 0.00057, "low_lr": 1.14e-05, "step": 817 }, { "epoch": 2.148586456278764, "high_lr": 0.00057, "low_lr": 1.14e-05, "step": 817 }, { "epoch": 2.148586456278764, "high_lr": 0.00057, "low_lr": 1.14e-05, "step": 817 }, { "epoch": 2.151216305062459, "grad_norm": 1.183383584022522, "learning_rate": 0.0005694736842105264, "loss": 1.3927, "step": 818 }, { "epoch": 2.151216305062459, "high_lr": 0.0005694736842105264, "low_lr": 1.1389473684210527e-05, "step": 818 }, { "epoch": 2.151216305062459, "high_lr": 0.0005694736842105264, "low_lr": 1.1389473684210527e-05, "step": 818 }, { "epoch": 2.151216305062459, "high_lr": 0.0005694736842105264, "low_lr": 1.1389473684210527e-05, "step": 818 }, { "epoch": 2.151216305062459, "high_lr": 0.0005694736842105264, "low_lr": 1.1389473684210527e-05, "step": 818 }, { "epoch": 2.151216305062459, "high_lr": 0.0005694736842105264, "low_lr": 1.1389473684210527e-05, "step": 818 }, { "epoch": 2.151216305062459, "high_lr": 0.0005694736842105264, "low_lr": 1.1389473684210527e-05, "step": 818 }, { "epoch": 2.151216305062459, "high_lr": 0.0005694736842105264, "low_lr": 1.1389473684210527e-05, "step": 818 }, { "epoch": 2.151216305062459, "high_lr": 0.0005694736842105264, "low_lr": 1.1389473684210527e-05, "step": 818 }, { "epoch": 2.1538461538461537, "grad_norm": 1.128944754600525, "learning_rate": 0.0005689473684210527, "loss": 1.3884, "step": 819 }, { "epoch": 2.1538461538461537, "high_lr": 0.0005689473684210527, "low_lr": 1.1378947368421054e-05, "step": 819 }, { "epoch": 2.1538461538461537, "high_lr": 0.0005689473684210527, "low_lr": 1.1378947368421054e-05, "step": 819 }, { "epoch": 2.1538461538461537, "high_lr": 0.0005689473684210527, "low_lr": 1.1378947368421054e-05, "step": 819 }, { "epoch": 2.1538461538461537, "high_lr": 0.0005689473684210527, "low_lr": 1.1378947368421054e-05, "step": 819 }, { "epoch": 2.1538461538461537, "high_lr": 0.0005689473684210527, "low_lr": 1.1378947368421054e-05, "step": 819 }, { "epoch": 2.1538461538461537, "high_lr": 0.0005689473684210527, "low_lr": 1.1378947368421054e-05, "step": 819 }, { "epoch": 2.1538461538461537, "high_lr": 0.0005689473684210527, "low_lr": 1.1378947368421054e-05, "step": 819 }, { "epoch": 2.1538461538461537, "high_lr": 0.0005689473684210527, "low_lr": 1.1378947368421054e-05, "step": 819 }, { "epoch": 2.156476002629849, "grad_norm": 1.2648850679397583, "learning_rate": 0.0005684210526315789, "loss": 1.3936, "step": 820 }, { "epoch": 2.156476002629849, "high_lr": 0.0005684210526315789, "low_lr": 1.136842105263158e-05, "step": 820 }, { "epoch": 2.156476002629849, "high_lr": 0.0005684210526315789, "low_lr": 1.136842105263158e-05, "step": 820 }, { "epoch": 2.156476002629849, "high_lr": 0.0005684210526315789, "low_lr": 1.136842105263158e-05, "step": 820 }, { "epoch": 2.156476002629849, "high_lr": 0.0005684210526315789, "low_lr": 1.136842105263158e-05, "step": 820 }, { "epoch": 2.156476002629849, "high_lr": 0.0005684210526315789, "low_lr": 1.136842105263158e-05, "step": 820 }, { "epoch": 2.156476002629849, "high_lr": 0.0005684210526315789, "low_lr": 1.136842105263158e-05, "step": 820 }, { "epoch": 2.156476002629849, "high_lr": 0.0005684210526315789, "low_lr": 1.136842105263158e-05, "step": 820 }, { "epoch": 2.156476002629849, "high_lr": 0.0005684210526315789, "low_lr": 1.136842105263158e-05, "step": 820 }, { "epoch": 2.1591058514135435, "grad_norm": 1.2596508264541626, "learning_rate": 0.0005678947368421052, "loss": 1.3974, "step": 821 }, { "epoch": 2.1591058514135435, "high_lr": 0.0005678947368421052, "low_lr": 1.1357894736842106e-05, "step": 821 }, { "epoch": 2.1591058514135435, "high_lr": 0.0005678947368421052, "low_lr": 1.1357894736842106e-05, "step": 821 }, { "epoch": 2.1591058514135435, "high_lr": 0.0005678947368421052, "low_lr": 1.1357894736842106e-05, "step": 821 }, { "epoch": 2.1591058514135435, "high_lr": 0.0005678947368421052, "low_lr": 1.1357894736842106e-05, "step": 821 }, { "epoch": 2.1591058514135435, "high_lr": 0.0005678947368421052, "low_lr": 1.1357894736842106e-05, "step": 821 }, { "epoch": 2.1591058514135435, "high_lr": 0.0005678947368421052, "low_lr": 1.1357894736842106e-05, "step": 821 }, { "epoch": 2.1591058514135435, "high_lr": 0.0005678947368421052, "low_lr": 1.1357894736842106e-05, "step": 821 }, { "epoch": 2.1591058514135435, "high_lr": 0.0005678947368421052, "low_lr": 1.1357894736842106e-05, "step": 821 }, { "epoch": 2.1617357001972386, "grad_norm": 1.180319905281067, "learning_rate": 0.0005673684210526316, "loss": 1.3733, "step": 822 }, { "epoch": 2.1617357001972386, "high_lr": 0.0005673684210526316, "low_lr": 1.1347368421052634e-05, "step": 822 }, { "epoch": 2.1617357001972386, "high_lr": 0.0005673684210526316, "low_lr": 1.1347368421052634e-05, "step": 822 }, { "epoch": 2.1617357001972386, "high_lr": 0.0005673684210526316, "low_lr": 1.1347368421052634e-05, "step": 822 }, { "epoch": 2.1617357001972386, "high_lr": 0.0005673684210526316, "low_lr": 1.1347368421052634e-05, "step": 822 }, { "epoch": 2.1617357001972386, "high_lr": 0.0005673684210526316, "low_lr": 1.1347368421052634e-05, "step": 822 }, { "epoch": 2.1617357001972386, "high_lr": 0.0005673684210526316, "low_lr": 1.1347368421052634e-05, "step": 822 }, { "epoch": 2.1617357001972386, "high_lr": 0.0005673684210526316, "low_lr": 1.1347368421052634e-05, "step": 822 }, { "epoch": 2.1617357001972386, "high_lr": 0.0005673684210526316, "low_lr": 1.1347368421052634e-05, "step": 822 }, { "epoch": 2.1643655489809337, "grad_norm": 1.2433300018310547, "learning_rate": 0.0005668421052631579, "loss": 1.4125, "step": 823 }, { "epoch": 2.1643655489809337, "high_lr": 0.0005668421052631579, "low_lr": 1.1336842105263159e-05, "step": 823 }, { "epoch": 2.1643655489809337, "high_lr": 0.0005668421052631579, "low_lr": 1.1336842105263159e-05, "step": 823 }, { "epoch": 2.1643655489809337, "high_lr": 0.0005668421052631579, "low_lr": 1.1336842105263159e-05, "step": 823 }, { "epoch": 2.1643655489809337, "high_lr": 0.0005668421052631579, "low_lr": 1.1336842105263159e-05, "step": 823 }, { "epoch": 2.1643655489809337, "high_lr": 0.0005668421052631579, "low_lr": 1.1336842105263159e-05, "step": 823 }, { "epoch": 2.1643655489809337, "high_lr": 0.0005668421052631579, "low_lr": 1.1336842105263159e-05, "step": 823 }, { "epoch": 2.1643655489809337, "high_lr": 0.0005668421052631579, "low_lr": 1.1336842105263159e-05, "step": 823 }, { "epoch": 2.1643655489809337, "high_lr": 0.0005668421052631579, "low_lr": 1.1336842105263159e-05, "step": 823 }, { "epoch": 2.1669953977646284, "grad_norm": 1.219747543334961, "learning_rate": 0.0005663157894736842, "loss": 1.4256, "step": 824 }, { "epoch": 2.1669953977646284, "high_lr": 0.0005663157894736842, "low_lr": 1.1326315789473685e-05, "step": 824 }, { "epoch": 2.1669953977646284, "high_lr": 0.0005663157894736842, "low_lr": 1.1326315789473685e-05, "step": 824 }, { "epoch": 2.1669953977646284, "high_lr": 0.0005663157894736842, "low_lr": 1.1326315789473685e-05, "step": 824 }, { "epoch": 2.1669953977646284, "high_lr": 0.0005663157894736842, "low_lr": 1.1326315789473685e-05, "step": 824 }, { "epoch": 2.1669953977646284, "high_lr": 0.0005663157894736842, "low_lr": 1.1326315789473685e-05, "step": 824 }, { "epoch": 2.1669953977646284, "high_lr": 0.0005663157894736842, "low_lr": 1.1326315789473685e-05, "step": 824 }, { "epoch": 2.1669953977646284, "high_lr": 0.0005663157894736842, "low_lr": 1.1326315789473685e-05, "step": 824 }, { "epoch": 2.1669953977646284, "high_lr": 0.0005663157894736842, "low_lr": 1.1326315789473685e-05, "step": 824 }, { "epoch": 2.1696252465483234, "grad_norm": 1.3756067752838135, "learning_rate": 0.0005657894736842105, "loss": 1.3732, "step": 825 }, { "epoch": 2.1696252465483234, "high_lr": 0.0005657894736842105, "low_lr": 1.1315789473684212e-05, "step": 825 }, { "epoch": 2.1696252465483234, "high_lr": 0.0005657894736842105, "low_lr": 1.1315789473684212e-05, "step": 825 }, { "epoch": 2.1696252465483234, "high_lr": 0.0005657894736842105, "low_lr": 1.1315789473684212e-05, "step": 825 }, { "epoch": 2.1696252465483234, "high_lr": 0.0005657894736842105, "low_lr": 1.1315789473684212e-05, "step": 825 }, { "epoch": 2.1696252465483234, "high_lr": 0.0005657894736842105, "low_lr": 1.1315789473684212e-05, "step": 825 }, { "epoch": 2.1696252465483234, "high_lr": 0.0005657894736842105, "low_lr": 1.1315789473684212e-05, "step": 825 }, { "epoch": 2.1696252465483234, "high_lr": 0.0005657894736842105, "low_lr": 1.1315789473684212e-05, "step": 825 }, { "epoch": 2.1696252465483234, "high_lr": 0.0005657894736842105, "low_lr": 1.1315789473684212e-05, "step": 825 }, { "epoch": 2.1722550953320185, "grad_norm": 1.3394635915756226, "learning_rate": 0.0005652631578947368, "loss": 1.427, "step": 826 }, { "epoch": 2.1722550953320185, "high_lr": 0.0005652631578947368, "low_lr": 1.1305263157894736e-05, "step": 826 }, { "epoch": 2.1722550953320185, "high_lr": 0.0005652631578947368, "low_lr": 1.1305263157894736e-05, "step": 826 }, { "epoch": 2.1722550953320185, "high_lr": 0.0005652631578947368, "low_lr": 1.1305263157894736e-05, "step": 826 }, { "epoch": 2.1722550953320185, "high_lr": 0.0005652631578947368, "low_lr": 1.1305263157894736e-05, "step": 826 }, { "epoch": 2.1722550953320185, "high_lr": 0.0005652631578947368, "low_lr": 1.1305263157894736e-05, "step": 826 }, { "epoch": 2.1722550953320185, "high_lr": 0.0005652631578947368, "low_lr": 1.1305263157894736e-05, "step": 826 }, { "epoch": 2.1722550953320185, "high_lr": 0.0005652631578947368, "low_lr": 1.1305263157894736e-05, "step": 826 }, { "epoch": 2.1722550953320185, "high_lr": 0.0005652631578947368, "low_lr": 1.1305263157894736e-05, "step": 826 }, { "epoch": 2.174884944115713, "grad_norm": 1.1506638526916504, "learning_rate": 0.0005647368421052633, "loss": 1.3673, "step": 827 }, { "epoch": 2.174884944115713, "high_lr": 0.0005647368421052633, "low_lr": 1.1294736842105264e-05, "step": 827 }, { "epoch": 2.174884944115713, "high_lr": 0.0005647368421052633, "low_lr": 1.1294736842105264e-05, "step": 827 }, { "epoch": 2.174884944115713, "high_lr": 0.0005647368421052633, "low_lr": 1.1294736842105264e-05, "step": 827 }, { "epoch": 2.174884944115713, "high_lr": 0.0005647368421052633, "low_lr": 1.1294736842105264e-05, "step": 827 }, { "epoch": 2.174884944115713, "high_lr": 0.0005647368421052633, "low_lr": 1.1294736842105264e-05, "step": 827 }, { "epoch": 2.174884944115713, "high_lr": 0.0005647368421052633, "low_lr": 1.1294736842105264e-05, "step": 827 }, { "epoch": 2.174884944115713, "high_lr": 0.0005647368421052633, "low_lr": 1.1294736842105264e-05, "step": 827 }, { "epoch": 2.174884944115713, "high_lr": 0.0005647368421052633, "low_lr": 1.1294736842105264e-05, "step": 827 }, { "epoch": 2.1775147928994083, "grad_norm": 1.342165231704712, "learning_rate": 0.0005642105263157896, "loss": 1.3631, "step": 828 }, { "epoch": 2.1775147928994083, "high_lr": 0.0005642105263157896, "low_lr": 1.128421052631579e-05, "step": 828 }, { "epoch": 2.1775147928994083, "high_lr": 0.0005642105263157896, "low_lr": 1.128421052631579e-05, "step": 828 }, { "epoch": 2.1775147928994083, "high_lr": 0.0005642105263157896, "low_lr": 1.128421052631579e-05, "step": 828 }, { "epoch": 2.1775147928994083, "high_lr": 0.0005642105263157896, "low_lr": 1.128421052631579e-05, "step": 828 }, { "epoch": 2.1775147928994083, "high_lr": 0.0005642105263157896, "low_lr": 1.128421052631579e-05, "step": 828 }, { "epoch": 2.1775147928994083, "high_lr": 0.0005642105263157896, "low_lr": 1.128421052631579e-05, "step": 828 }, { "epoch": 2.1775147928994083, "high_lr": 0.0005642105263157896, "low_lr": 1.128421052631579e-05, "step": 828 }, { "epoch": 2.1775147928994083, "high_lr": 0.0005642105263157896, "low_lr": 1.128421052631579e-05, "step": 828 }, { "epoch": 2.1801446416831034, "grad_norm": 1.2696932554244995, "learning_rate": 0.0005636842105263158, "loss": 1.3854, "step": 829 }, { "epoch": 2.1801446416831034, "high_lr": 0.0005636842105263158, "low_lr": 1.1273684210526317e-05, "step": 829 }, { "epoch": 2.1801446416831034, "high_lr": 0.0005636842105263158, "low_lr": 1.1273684210526317e-05, "step": 829 }, { "epoch": 2.1801446416831034, "high_lr": 0.0005636842105263158, "low_lr": 1.1273684210526317e-05, "step": 829 }, { "epoch": 2.1801446416831034, "high_lr": 0.0005636842105263158, "low_lr": 1.1273684210526317e-05, "step": 829 }, { "epoch": 2.1801446416831034, "high_lr": 0.0005636842105263158, "low_lr": 1.1273684210526317e-05, "step": 829 }, { "epoch": 2.1801446416831034, "high_lr": 0.0005636842105263158, "low_lr": 1.1273684210526317e-05, "step": 829 }, { "epoch": 2.1801446416831034, "high_lr": 0.0005636842105263158, "low_lr": 1.1273684210526317e-05, "step": 829 }, { "epoch": 2.1801446416831034, "high_lr": 0.0005636842105263158, "low_lr": 1.1273684210526317e-05, "step": 829 }, { "epoch": 2.182774490466798, "grad_norm": 2.098314046859741, "learning_rate": 0.0005631578947368421, "loss": 1.4307, "step": 830 }, { "epoch": 2.182774490466798, "high_lr": 0.0005631578947368421, "low_lr": 1.1263157894736843e-05, "step": 830 }, { "epoch": 2.182774490466798, "high_lr": 0.0005631578947368421, "low_lr": 1.1263157894736843e-05, "step": 830 }, { "epoch": 2.182774490466798, "high_lr": 0.0005631578947368421, "low_lr": 1.1263157894736843e-05, "step": 830 }, { "epoch": 2.182774490466798, "high_lr": 0.0005631578947368421, "low_lr": 1.1263157894736843e-05, "step": 830 }, { "epoch": 2.182774490466798, "high_lr": 0.0005631578947368421, "low_lr": 1.1263157894736843e-05, "step": 830 }, { "epoch": 2.182774490466798, "high_lr": 0.0005631578947368421, "low_lr": 1.1263157894736843e-05, "step": 830 }, { "epoch": 2.182774490466798, "high_lr": 0.0005631578947368421, "low_lr": 1.1263157894736843e-05, "step": 830 }, { "epoch": 2.182774490466798, "high_lr": 0.0005631578947368421, "low_lr": 1.1263157894736843e-05, "step": 830 }, { "epoch": 2.185404339250493, "grad_norm": 1.9224364757537842, "learning_rate": 0.0005626315789473684, "loss": 1.4026, "step": 831 }, { "epoch": 2.185404339250493, "high_lr": 0.0005626315789473684, "low_lr": 1.1252631578947368e-05, "step": 831 }, { "epoch": 2.185404339250493, "high_lr": 0.0005626315789473684, "low_lr": 1.1252631578947368e-05, "step": 831 }, { "epoch": 2.185404339250493, "high_lr": 0.0005626315789473684, "low_lr": 1.1252631578947368e-05, "step": 831 }, { "epoch": 2.185404339250493, "high_lr": 0.0005626315789473684, "low_lr": 1.1252631578947368e-05, "step": 831 }, { "epoch": 2.185404339250493, "high_lr": 0.0005626315789473684, "low_lr": 1.1252631578947368e-05, "step": 831 }, { "epoch": 2.185404339250493, "high_lr": 0.0005626315789473684, "low_lr": 1.1252631578947368e-05, "step": 831 }, { "epoch": 2.185404339250493, "high_lr": 0.0005626315789473684, "low_lr": 1.1252631578947368e-05, "step": 831 }, { "epoch": 2.185404339250493, "high_lr": 0.0005626315789473684, "low_lr": 1.1252631578947368e-05, "step": 831 }, { "epoch": 2.1880341880341883, "grad_norm": 1.2604079246520996, "learning_rate": 0.0005621052631578948, "loss": 1.371, "step": 832 }, { "epoch": 2.1880341880341883, "high_lr": 0.0005621052631578948, "low_lr": 1.1242105263157896e-05, "step": 832 }, { "epoch": 2.1880341880341883, "high_lr": 0.0005621052631578948, "low_lr": 1.1242105263157896e-05, "step": 832 }, { "epoch": 2.1880341880341883, "high_lr": 0.0005621052631578948, "low_lr": 1.1242105263157896e-05, "step": 832 }, { "epoch": 2.1880341880341883, "high_lr": 0.0005621052631578948, "low_lr": 1.1242105263157896e-05, "step": 832 }, { "epoch": 2.1880341880341883, "high_lr": 0.0005621052631578948, "low_lr": 1.1242105263157896e-05, "step": 832 }, { "epoch": 2.1880341880341883, "high_lr": 0.0005621052631578948, "low_lr": 1.1242105263157896e-05, "step": 832 }, { "epoch": 2.1880341880341883, "high_lr": 0.0005621052631578948, "low_lr": 1.1242105263157896e-05, "step": 832 }, { "epoch": 2.1880341880341883, "high_lr": 0.0005621052631578948, "low_lr": 1.1242105263157896e-05, "step": 832 }, { "epoch": 2.190664036817883, "grad_norm": 2.1818010807037354, "learning_rate": 0.0005615789473684211, "loss": 1.3593, "step": 833 }, { "epoch": 2.190664036817883, "high_lr": 0.0005615789473684211, "low_lr": 1.1231578947368422e-05, "step": 833 }, { "epoch": 2.190664036817883, "high_lr": 0.0005615789473684211, "low_lr": 1.1231578947368422e-05, "step": 833 }, { "epoch": 2.190664036817883, "high_lr": 0.0005615789473684211, "low_lr": 1.1231578947368422e-05, "step": 833 }, { "epoch": 2.190664036817883, "high_lr": 0.0005615789473684211, "low_lr": 1.1231578947368422e-05, "step": 833 }, { "epoch": 2.190664036817883, "high_lr": 0.0005615789473684211, "low_lr": 1.1231578947368422e-05, "step": 833 }, { "epoch": 2.190664036817883, "high_lr": 0.0005615789473684211, "low_lr": 1.1231578947368422e-05, "step": 833 }, { "epoch": 2.190664036817883, "high_lr": 0.0005615789473684211, "low_lr": 1.1231578947368422e-05, "step": 833 }, { "epoch": 2.190664036817883, "high_lr": 0.0005615789473684211, "low_lr": 1.1231578947368422e-05, "step": 833 }, { "epoch": 2.193293885601578, "grad_norm": 13.749748229980469, "learning_rate": 0.0005610526315789474, "loss": 1.4095, "step": 834 }, { "epoch": 2.193293885601578, "high_lr": 0.0005610526315789474, "low_lr": 1.1221052631578949e-05, "step": 834 }, { "epoch": 2.193293885601578, "high_lr": 0.0005610526315789474, "low_lr": 1.1221052631578949e-05, "step": 834 }, { "epoch": 2.193293885601578, "high_lr": 0.0005610526315789474, "low_lr": 1.1221052631578949e-05, "step": 834 }, { "epoch": 2.193293885601578, "high_lr": 0.0005610526315789474, "low_lr": 1.1221052631578949e-05, "step": 834 }, { "epoch": 2.193293885601578, "high_lr": 0.0005610526315789474, "low_lr": 1.1221052631578949e-05, "step": 834 }, { "epoch": 2.193293885601578, "high_lr": 0.0005610526315789474, "low_lr": 1.1221052631578949e-05, "step": 834 }, { "epoch": 2.193293885601578, "high_lr": 0.0005610526315789474, "low_lr": 1.1221052631578949e-05, "step": 834 }, { "epoch": 2.193293885601578, "high_lr": 0.0005610526315789474, "low_lr": 1.1221052631578949e-05, "step": 834 }, { "epoch": 2.1959237343852727, "grad_norm": 1.1653400659561157, "learning_rate": 0.0005605263157894737, "loss": 1.3679, "step": 835 }, { "epoch": 2.1959237343852727, "high_lr": 0.0005605263157894737, "low_lr": 1.1210526315789473e-05, "step": 835 }, { "epoch": 2.1959237343852727, "high_lr": 0.0005605263157894737, "low_lr": 1.1210526315789473e-05, "step": 835 }, { "epoch": 2.1959237343852727, "high_lr": 0.0005605263157894737, "low_lr": 1.1210526315789473e-05, "step": 835 }, { "epoch": 2.1959237343852727, "high_lr": 0.0005605263157894737, "low_lr": 1.1210526315789473e-05, "step": 835 }, { "epoch": 2.1959237343852727, "high_lr": 0.0005605263157894737, "low_lr": 1.1210526315789473e-05, "step": 835 }, { "epoch": 2.1959237343852727, "high_lr": 0.0005605263157894737, "low_lr": 1.1210526315789473e-05, "step": 835 }, { "epoch": 2.1959237343852727, "high_lr": 0.0005605263157894737, "low_lr": 1.1210526315789473e-05, "step": 835 }, { "epoch": 2.1959237343852727, "high_lr": 0.0005605263157894737, "low_lr": 1.1210526315789473e-05, "step": 835 }, { "epoch": 2.1985535831689678, "grad_norm": 1.3364615440368652, "learning_rate": 0.0005600000000000001, "loss": 1.3428, "step": 836 }, { "epoch": 2.1985535831689678, "high_lr": 0.0005600000000000001, "low_lr": 1.1200000000000001e-05, "step": 836 }, { "epoch": 2.1985535831689678, "high_lr": 0.0005600000000000001, "low_lr": 1.1200000000000001e-05, "step": 836 }, { "epoch": 2.1985535831689678, "high_lr": 0.0005600000000000001, "low_lr": 1.1200000000000001e-05, "step": 836 }, { "epoch": 2.1985535831689678, "high_lr": 0.0005600000000000001, "low_lr": 1.1200000000000001e-05, "step": 836 }, { "epoch": 2.1985535831689678, "high_lr": 0.0005600000000000001, "low_lr": 1.1200000000000001e-05, "step": 836 }, { "epoch": 2.1985535831689678, "high_lr": 0.0005600000000000001, "low_lr": 1.1200000000000001e-05, "step": 836 }, { "epoch": 2.1985535831689678, "high_lr": 0.0005600000000000001, "low_lr": 1.1200000000000001e-05, "step": 836 }, { "epoch": 2.1985535831689678, "high_lr": 0.0005600000000000001, "low_lr": 1.1200000000000001e-05, "step": 836 }, { "epoch": 2.201183431952663, "grad_norm": 1.1404310464859009, "learning_rate": 0.0005594736842105263, "loss": 1.3897, "step": 837 }, { "epoch": 2.201183431952663, "high_lr": 0.0005594736842105263, "low_lr": 1.1189473684210528e-05, "step": 837 }, { "epoch": 2.201183431952663, "high_lr": 0.0005594736842105263, "low_lr": 1.1189473684210528e-05, "step": 837 }, { "epoch": 2.201183431952663, "high_lr": 0.0005594736842105263, "low_lr": 1.1189473684210528e-05, "step": 837 }, { "epoch": 2.201183431952663, "high_lr": 0.0005594736842105263, "low_lr": 1.1189473684210528e-05, "step": 837 }, { "epoch": 2.201183431952663, "high_lr": 0.0005594736842105263, "low_lr": 1.1189473684210528e-05, "step": 837 }, { "epoch": 2.201183431952663, "high_lr": 0.0005594736842105263, "low_lr": 1.1189473684210528e-05, "step": 837 }, { "epoch": 2.201183431952663, "high_lr": 0.0005594736842105263, "low_lr": 1.1189473684210528e-05, "step": 837 }, { "epoch": 2.201183431952663, "high_lr": 0.0005594736842105263, "low_lr": 1.1189473684210528e-05, "step": 837 }, { "epoch": 2.2038132807363575, "grad_norm": 1.1752078533172607, "learning_rate": 0.0005589473684210526, "loss": 1.4003, "step": 838 }, { "epoch": 2.2038132807363575, "high_lr": 0.0005589473684210526, "low_lr": 1.1178947368421054e-05, "step": 838 }, { "epoch": 2.2038132807363575, "high_lr": 0.0005589473684210526, "low_lr": 1.1178947368421054e-05, "step": 838 }, { "epoch": 2.2038132807363575, "high_lr": 0.0005589473684210526, "low_lr": 1.1178947368421054e-05, "step": 838 }, { "epoch": 2.2038132807363575, "high_lr": 0.0005589473684210526, "low_lr": 1.1178947368421054e-05, "step": 838 }, { "epoch": 2.2038132807363575, "high_lr": 0.0005589473684210526, "low_lr": 1.1178947368421054e-05, "step": 838 }, { "epoch": 2.2038132807363575, "high_lr": 0.0005589473684210526, "low_lr": 1.1178947368421054e-05, "step": 838 }, { "epoch": 2.2038132807363575, "high_lr": 0.0005589473684210526, "low_lr": 1.1178947368421054e-05, "step": 838 }, { "epoch": 2.2038132807363575, "high_lr": 0.0005589473684210526, "low_lr": 1.1178947368421054e-05, "step": 838 }, { "epoch": 2.2064431295200526, "grad_norm": 1.1771602630615234, "learning_rate": 0.0005584210526315789, "loss": 1.3759, "step": 839 }, { "epoch": 2.2064431295200526, "high_lr": 0.0005584210526315789, "low_lr": 1.116842105263158e-05, "step": 839 }, { "epoch": 2.2064431295200526, "high_lr": 0.0005584210526315789, "low_lr": 1.116842105263158e-05, "step": 839 }, { "epoch": 2.2064431295200526, "high_lr": 0.0005584210526315789, "low_lr": 1.116842105263158e-05, "step": 839 }, { "epoch": 2.2064431295200526, "high_lr": 0.0005584210526315789, "low_lr": 1.116842105263158e-05, "step": 839 }, { "epoch": 2.2064431295200526, "high_lr": 0.0005584210526315789, "low_lr": 1.116842105263158e-05, "step": 839 }, { "epoch": 2.2064431295200526, "high_lr": 0.0005584210526315789, "low_lr": 1.116842105263158e-05, "step": 839 }, { "epoch": 2.2064431295200526, "high_lr": 0.0005584210526315789, "low_lr": 1.116842105263158e-05, "step": 839 }, { "epoch": 2.2064431295200526, "high_lr": 0.0005584210526315789, "low_lr": 1.116842105263158e-05, "step": 839 }, { "epoch": 2.2090729783037477, "grad_norm": 1.2733348608016968, "learning_rate": 0.0005578947368421052, "loss": 1.4378, "step": 840 }, { "epoch": 2.2090729783037477, "high_lr": 0.0005578947368421052, "low_lr": 1.1157894736842105e-05, "step": 840 }, { "epoch": 2.2090729783037477, "high_lr": 0.0005578947368421052, "low_lr": 1.1157894736842105e-05, "step": 840 }, { "epoch": 2.2090729783037477, "high_lr": 0.0005578947368421052, "low_lr": 1.1157894736842105e-05, "step": 840 }, { "epoch": 2.2090729783037477, "high_lr": 0.0005578947368421052, "low_lr": 1.1157894736842105e-05, "step": 840 }, { "epoch": 2.2090729783037477, "high_lr": 0.0005578947368421052, "low_lr": 1.1157894736842105e-05, "step": 840 }, { "epoch": 2.2090729783037477, "high_lr": 0.0005578947368421052, "low_lr": 1.1157894736842105e-05, "step": 840 }, { "epoch": 2.2090729783037477, "high_lr": 0.0005578947368421052, "low_lr": 1.1157894736842105e-05, "step": 840 }, { "epoch": 2.2090729783037477, "high_lr": 0.0005578947368421052, "low_lr": 1.1157894736842105e-05, "step": 840 }, { "epoch": 2.2117028270874424, "grad_norm": 1.1732800006866455, "learning_rate": 0.0005573684210526316, "loss": 1.3983, "step": 841 }, { "epoch": 2.2117028270874424, "high_lr": 0.0005573684210526316, "low_lr": 1.1147368421052633e-05, "step": 841 }, { "epoch": 2.2117028270874424, "high_lr": 0.0005573684210526316, "low_lr": 1.1147368421052633e-05, "step": 841 }, { "epoch": 2.2117028270874424, "high_lr": 0.0005573684210526316, "low_lr": 1.1147368421052633e-05, "step": 841 }, { "epoch": 2.2117028270874424, "high_lr": 0.0005573684210526316, "low_lr": 1.1147368421052633e-05, "step": 841 }, { "epoch": 2.2117028270874424, "high_lr": 0.0005573684210526316, "low_lr": 1.1147368421052633e-05, "step": 841 }, { "epoch": 2.2117028270874424, "high_lr": 0.0005573684210526316, "low_lr": 1.1147368421052633e-05, "step": 841 }, { "epoch": 2.2117028270874424, "high_lr": 0.0005573684210526316, "low_lr": 1.1147368421052633e-05, "step": 841 }, { "epoch": 2.2117028270874424, "high_lr": 0.0005573684210526316, "low_lr": 1.1147368421052633e-05, "step": 841 }, { "epoch": 2.2143326758711375, "grad_norm": 1.2102004289627075, "learning_rate": 0.0005568421052631579, "loss": 1.3733, "step": 842 }, { "epoch": 2.2143326758711375, "high_lr": 0.0005568421052631579, "low_lr": 1.1136842105263159e-05, "step": 842 }, { "epoch": 2.2143326758711375, "high_lr": 0.0005568421052631579, "low_lr": 1.1136842105263159e-05, "step": 842 }, { "epoch": 2.2143326758711375, "high_lr": 0.0005568421052631579, "low_lr": 1.1136842105263159e-05, "step": 842 }, { "epoch": 2.2143326758711375, "high_lr": 0.0005568421052631579, "low_lr": 1.1136842105263159e-05, "step": 842 }, { "epoch": 2.2143326758711375, "high_lr": 0.0005568421052631579, "low_lr": 1.1136842105263159e-05, "step": 842 }, { "epoch": 2.2143326758711375, "high_lr": 0.0005568421052631579, "low_lr": 1.1136842105263159e-05, "step": 842 }, { "epoch": 2.2143326758711375, "high_lr": 0.0005568421052631579, "low_lr": 1.1136842105263159e-05, "step": 842 }, { "epoch": 2.2143326758711375, "high_lr": 0.0005568421052631579, "low_lr": 1.1136842105263159e-05, "step": 842 }, { "epoch": 2.216962524654832, "grad_norm": 1.2097240686416626, "learning_rate": 0.0005563157894736842, "loss": 1.3866, "step": 843 }, { "epoch": 2.216962524654832, "high_lr": 0.0005563157894736842, "low_lr": 1.1126315789473685e-05, "step": 843 }, { "epoch": 2.216962524654832, "high_lr": 0.0005563157894736842, "low_lr": 1.1126315789473685e-05, "step": 843 }, { "epoch": 2.216962524654832, "high_lr": 0.0005563157894736842, "low_lr": 1.1126315789473685e-05, "step": 843 }, { "epoch": 2.216962524654832, "high_lr": 0.0005563157894736842, "low_lr": 1.1126315789473685e-05, "step": 843 }, { "epoch": 2.216962524654832, "high_lr": 0.0005563157894736842, "low_lr": 1.1126315789473685e-05, "step": 843 }, { "epoch": 2.216962524654832, "high_lr": 0.0005563157894736842, "low_lr": 1.1126315789473685e-05, "step": 843 }, { "epoch": 2.216962524654832, "high_lr": 0.0005563157894736842, "low_lr": 1.1126315789473685e-05, "step": 843 }, { "epoch": 2.216962524654832, "high_lr": 0.0005563157894736842, "low_lr": 1.1126315789473685e-05, "step": 843 }, { "epoch": 2.219592373438527, "grad_norm": 1.1703976392745972, "learning_rate": 0.0005557894736842106, "loss": 1.4377, "step": 844 }, { "epoch": 2.219592373438527, "high_lr": 0.0005557894736842106, "low_lr": 1.111578947368421e-05, "step": 844 }, { "epoch": 2.219592373438527, "high_lr": 0.0005557894736842106, "low_lr": 1.111578947368421e-05, "step": 844 }, { "epoch": 2.219592373438527, "high_lr": 0.0005557894736842106, "low_lr": 1.111578947368421e-05, "step": 844 }, { "epoch": 2.219592373438527, "high_lr": 0.0005557894736842106, "low_lr": 1.111578947368421e-05, "step": 844 }, { "epoch": 2.219592373438527, "high_lr": 0.0005557894736842106, "low_lr": 1.111578947368421e-05, "step": 844 }, { "epoch": 2.219592373438527, "high_lr": 0.0005557894736842106, "low_lr": 1.111578947368421e-05, "step": 844 }, { "epoch": 2.219592373438527, "high_lr": 0.0005557894736842106, "low_lr": 1.111578947368421e-05, "step": 844 }, { "epoch": 2.219592373438527, "high_lr": 0.0005557894736842106, "low_lr": 1.111578947368421e-05, "step": 844 }, { "epoch": 2.2222222222222223, "grad_norm": 1.1830466985702515, "learning_rate": 0.0005552631578947368, "loss": 1.3486, "step": 845 }, { "epoch": 2.2222222222222223, "high_lr": 0.0005552631578947368, "low_lr": 1.1105263157894736e-05, "step": 845 }, { "epoch": 2.2222222222222223, "high_lr": 0.0005552631578947368, "low_lr": 1.1105263157894736e-05, "step": 845 }, { "epoch": 2.2222222222222223, "high_lr": 0.0005552631578947368, "low_lr": 1.1105263157894736e-05, "step": 845 }, { "epoch": 2.2222222222222223, "high_lr": 0.0005552631578947368, "low_lr": 1.1105263157894736e-05, "step": 845 }, { "epoch": 2.2222222222222223, "high_lr": 0.0005552631578947368, "low_lr": 1.1105263157894736e-05, "step": 845 }, { "epoch": 2.2222222222222223, "high_lr": 0.0005552631578947368, "low_lr": 1.1105263157894736e-05, "step": 845 }, { "epoch": 2.2222222222222223, "high_lr": 0.0005552631578947368, "low_lr": 1.1105263157894736e-05, "step": 845 }, { "epoch": 2.2222222222222223, "high_lr": 0.0005552631578947368, "low_lr": 1.1105263157894736e-05, "step": 845 }, { "epoch": 2.224852071005917, "grad_norm": 1.2717379331588745, "learning_rate": 0.0005547368421052632, "loss": 1.4297, "step": 846 }, { "epoch": 2.224852071005917, "high_lr": 0.0005547368421052632, "low_lr": 1.1094736842105264e-05, "step": 846 }, { "epoch": 2.224852071005917, "high_lr": 0.0005547368421052632, "low_lr": 1.1094736842105264e-05, "step": 846 }, { "epoch": 2.224852071005917, "high_lr": 0.0005547368421052632, "low_lr": 1.1094736842105264e-05, "step": 846 }, { "epoch": 2.224852071005917, "high_lr": 0.0005547368421052632, "low_lr": 1.1094736842105264e-05, "step": 846 }, { "epoch": 2.224852071005917, "high_lr": 0.0005547368421052632, "low_lr": 1.1094736842105264e-05, "step": 846 }, { "epoch": 2.224852071005917, "high_lr": 0.0005547368421052632, "low_lr": 1.1094736842105264e-05, "step": 846 }, { "epoch": 2.224852071005917, "high_lr": 0.0005547368421052632, "low_lr": 1.1094736842105264e-05, "step": 846 }, { "epoch": 2.224852071005917, "high_lr": 0.0005547368421052632, "low_lr": 1.1094736842105264e-05, "step": 846 }, { "epoch": 2.227481919789612, "grad_norm": 1.3143019676208496, "learning_rate": 0.0005542105263157895, "loss": 1.4476, "step": 847 }, { "epoch": 2.227481919789612, "high_lr": 0.0005542105263157895, "low_lr": 1.108421052631579e-05, "step": 847 }, { "epoch": 2.227481919789612, "high_lr": 0.0005542105263157895, "low_lr": 1.108421052631579e-05, "step": 847 }, { "epoch": 2.227481919789612, "high_lr": 0.0005542105263157895, "low_lr": 1.108421052631579e-05, "step": 847 }, { "epoch": 2.227481919789612, "high_lr": 0.0005542105263157895, "low_lr": 1.108421052631579e-05, "step": 847 }, { "epoch": 2.227481919789612, "high_lr": 0.0005542105263157895, "low_lr": 1.108421052631579e-05, "step": 847 }, { "epoch": 2.227481919789612, "high_lr": 0.0005542105263157895, "low_lr": 1.108421052631579e-05, "step": 847 }, { "epoch": 2.227481919789612, "high_lr": 0.0005542105263157895, "low_lr": 1.108421052631579e-05, "step": 847 }, { "epoch": 2.227481919789612, "high_lr": 0.0005542105263157895, "low_lr": 1.108421052631579e-05, "step": 847 }, { "epoch": 2.230111768573307, "grad_norm": 1.2574163675308228, "learning_rate": 0.0005536842105263158, "loss": 1.3895, "step": 848 }, { "epoch": 2.230111768573307, "high_lr": 0.0005536842105263158, "low_lr": 1.1073684210526317e-05, "step": 848 }, { "epoch": 2.230111768573307, "high_lr": 0.0005536842105263158, "low_lr": 1.1073684210526317e-05, "step": 848 }, { "epoch": 2.230111768573307, "high_lr": 0.0005536842105263158, "low_lr": 1.1073684210526317e-05, "step": 848 }, { "epoch": 2.230111768573307, "high_lr": 0.0005536842105263158, "low_lr": 1.1073684210526317e-05, "step": 848 }, { "epoch": 2.230111768573307, "high_lr": 0.0005536842105263158, "low_lr": 1.1073684210526317e-05, "step": 848 }, { "epoch": 2.230111768573307, "high_lr": 0.0005536842105263158, "low_lr": 1.1073684210526317e-05, "step": 848 }, { "epoch": 2.230111768573307, "high_lr": 0.0005536842105263158, "low_lr": 1.1073684210526317e-05, "step": 848 }, { "epoch": 2.230111768573307, "high_lr": 0.0005536842105263158, "low_lr": 1.1073684210526317e-05, "step": 848 }, { "epoch": 2.232741617357002, "grad_norm": 1.2217366695404053, "learning_rate": 0.0005531578947368421, "loss": 1.3625, "step": 849 }, { "epoch": 2.232741617357002, "high_lr": 0.0005531578947368421, "low_lr": 1.1063157894736842e-05, "step": 849 }, { "epoch": 2.232741617357002, "high_lr": 0.0005531578947368421, "low_lr": 1.1063157894736842e-05, "step": 849 }, { "epoch": 2.232741617357002, "high_lr": 0.0005531578947368421, "low_lr": 1.1063157894736842e-05, "step": 849 }, { "epoch": 2.232741617357002, "high_lr": 0.0005531578947368421, "low_lr": 1.1063157894736842e-05, "step": 849 }, { "epoch": 2.232741617357002, "high_lr": 0.0005531578947368421, "low_lr": 1.1063157894736842e-05, "step": 849 }, { "epoch": 2.232741617357002, "high_lr": 0.0005531578947368421, "low_lr": 1.1063157894736842e-05, "step": 849 }, { "epoch": 2.232741617357002, "high_lr": 0.0005531578947368421, "low_lr": 1.1063157894736842e-05, "step": 849 }, { "epoch": 2.232741617357002, "high_lr": 0.0005531578947368421, "low_lr": 1.1063157894736842e-05, "step": 849 }, { "epoch": 2.235371466140697, "grad_norm": 1.2277804613113403, "learning_rate": 0.0005526315789473685, "loss": 1.4304, "step": 850 }, { "epoch": 2.235371466140697, "high_lr": 0.0005526315789473685, "low_lr": 1.105263157894737e-05, "step": 850 }, { "epoch": 2.235371466140697, "high_lr": 0.0005526315789473685, "low_lr": 1.105263157894737e-05, "step": 850 }, { "epoch": 2.235371466140697, "high_lr": 0.0005526315789473685, "low_lr": 1.105263157894737e-05, "step": 850 }, { "epoch": 2.235371466140697, "high_lr": 0.0005526315789473685, "low_lr": 1.105263157894737e-05, "step": 850 }, { "epoch": 2.235371466140697, "high_lr": 0.0005526315789473685, "low_lr": 1.105263157894737e-05, "step": 850 }, { "epoch": 2.235371466140697, "high_lr": 0.0005526315789473685, "low_lr": 1.105263157894737e-05, "step": 850 }, { "epoch": 2.235371466140697, "high_lr": 0.0005526315789473685, "low_lr": 1.105263157894737e-05, "step": 850 }, { "epoch": 2.235371466140697, "high_lr": 0.0005526315789473685, "low_lr": 1.105263157894737e-05, "step": 850 }, { "epoch": 2.238001314924392, "grad_norm": 1.2404119968414307, "learning_rate": 0.0005521052631578948, "loss": 1.3227, "step": 851 }, { "epoch": 2.238001314924392, "high_lr": 0.0005521052631578948, "low_lr": 1.1042105263157896e-05, "step": 851 }, { "epoch": 2.238001314924392, "high_lr": 0.0005521052631578948, "low_lr": 1.1042105263157896e-05, "step": 851 }, { "epoch": 2.238001314924392, "high_lr": 0.0005521052631578948, "low_lr": 1.1042105263157896e-05, "step": 851 }, { "epoch": 2.238001314924392, "high_lr": 0.0005521052631578948, "low_lr": 1.1042105263157896e-05, "step": 851 }, { "epoch": 2.238001314924392, "high_lr": 0.0005521052631578948, "low_lr": 1.1042105263157896e-05, "step": 851 }, { "epoch": 2.238001314924392, "high_lr": 0.0005521052631578948, "low_lr": 1.1042105263157896e-05, "step": 851 }, { "epoch": 2.238001314924392, "high_lr": 0.0005521052631578948, "low_lr": 1.1042105263157896e-05, "step": 851 }, { "epoch": 2.238001314924392, "high_lr": 0.0005521052631578948, "low_lr": 1.1042105263157896e-05, "step": 851 }, { "epoch": 2.2406311637080867, "grad_norm": 1.2292978763580322, "learning_rate": 0.0005515789473684211, "loss": 1.3879, "step": 852 }, { "epoch": 2.2406311637080867, "high_lr": 0.0005515789473684211, "low_lr": 1.1031578947368422e-05, "step": 852 }, { "epoch": 2.2406311637080867, "high_lr": 0.0005515789473684211, "low_lr": 1.1031578947368422e-05, "step": 852 }, { "epoch": 2.2406311637080867, "high_lr": 0.0005515789473684211, "low_lr": 1.1031578947368422e-05, "step": 852 }, { "epoch": 2.2406311637080867, "high_lr": 0.0005515789473684211, "low_lr": 1.1031578947368422e-05, "step": 852 }, { "epoch": 2.2406311637080867, "high_lr": 0.0005515789473684211, "low_lr": 1.1031578947368422e-05, "step": 852 }, { "epoch": 2.2406311637080867, "high_lr": 0.0005515789473684211, "low_lr": 1.1031578947368422e-05, "step": 852 }, { "epoch": 2.2406311637080867, "high_lr": 0.0005515789473684211, "low_lr": 1.1031578947368422e-05, "step": 852 }, { "epoch": 2.2406311637080867, "high_lr": 0.0005515789473684211, "low_lr": 1.1031578947368422e-05, "step": 852 }, { "epoch": 2.243261012491782, "grad_norm": 1.25013267993927, "learning_rate": 0.0005510526315789474, "loss": 1.4225, "step": 853 }, { "epoch": 2.243261012491782, "high_lr": 0.0005510526315789474, "low_lr": 1.1021052631578947e-05, "step": 853 }, { "epoch": 2.243261012491782, "high_lr": 0.0005510526315789474, "low_lr": 1.1021052631578947e-05, "step": 853 }, { "epoch": 2.243261012491782, "high_lr": 0.0005510526315789474, "low_lr": 1.1021052631578947e-05, "step": 853 }, { "epoch": 2.243261012491782, "high_lr": 0.0005510526315789474, "low_lr": 1.1021052631578947e-05, "step": 853 }, { "epoch": 2.243261012491782, "high_lr": 0.0005510526315789474, "low_lr": 1.1021052631578947e-05, "step": 853 }, { "epoch": 2.243261012491782, "high_lr": 0.0005510526315789474, "low_lr": 1.1021052631578947e-05, "step": 853 }, { "epoch": 2.243261012491782, "high_lr": 0.0005510526315789474, "low_lr": 1.1021052631578947e-05, "step": 853 }, { "epoch": 2.243261012491782, "high_lr": 0.0005510526315789474, "low_lr": 1.1021052631578947e-05, "step": 853 }, { "epoch": 2.245890861275477, "grad_norm": 1.2261604070663452, "learning_rate": 0.0005505263157894736, "loss": 1.3401, "step": 854 }, { "epoch": 2.245890861275477, "high_lr": 0.0005505263157894736, "low_lr": 1.1010526315789473e-05, "step": 854 }, { "epoch": 2.245890861275477, "high_lr": 0.0005505263157894736, "low_lr": 1.1010526315789473e-05, "step": 854 }, { "epoch": 2.245890861275477, "high_lr": 0.0005505263157894736, "low_lr": 1.1010526315789473e-05, "step": 854 }, { "epoch": 2.245890861275477, "high_lr": 0.0005505263157894736, "low_lr": 1.1010526315789473e-05, "step": 854 }, { "epoch": 2.245890861275477, "high_lr": 0.0005505263157894736, "low_lr": 1.1010526315789473e-05, "step": 854 }, { "epoch": 2.245890861275477, "high_lr": 0.0005505263157894736, "low_lr": 1.1010526315789473e-05, "step": 854 }, { "epoch": 2.245890861275477, "high_lr": 0.0005505263157894736, "low_lr": 1.1010526315789473e-05, "step": 854 }, { "epoch": 2.245890861275477, "high_lr": 0.0005505263157894736, "low_lr": 1.1010526315789473e-05, "step": 854 }, { "epoch": 2.2485207100591715, "grad_norm": 1.202841877937317, "learning_rate": 0.00055, "loss": 1.3989, "step": 855 }, { "epoch": 2.2485207100591715, "high_lr": 0.00055, "low_lr": 1.1000000000000001e-05, "step": 855 }, { "epoch": 2.2485207100591715, "high_lr": 0.00055, "low_lr": 1.1000000000000001e-05, "step": 855 }, { "epoch": 2.2485207100591715, "high_lr": 0.00055, "low_lr": 1.1000000000000001e-05, "step": 855 }, { "epoch": 2.2485207100591715, "high_lr": 0.00055, "low_lr": 1.1000000000000001e-05, "step": 855 }, { "epoch": 2.2485207100591715, "high_lr": 0.00055, "low_lr": 1.1000000000000001e-05, "step": 855 }, { "epoch": 2.2485207100591715, "high_lr": 0.00055, "low_lr": 1.1000000000000001e-05, "step": 855 }, { "epoch": 2.2485207100591715, "high_lr": 0.00055, "low_lr": 1.1000000000000001e-05, "step": 855 }, { "epoch": 2.2485207100591715, "high_lr": 0.00055, "low_lr": 1.1000000000000001e-05, "step": 855 }, { "epoch": 2.2511505588428666, "grad_norm": 1.2519607543945312, "learning_rate": 0.0005494736842105263, "loss": 1.3917, "step": 856 }, { "epoch": 2.2511505588428666, "high_lr": 0.0005494736842105263, "low_lr": 1.0989473684210528e-05, "step": 856 }, { "epoch": 2.2511505588428666, "high_lr": 0.0005494736842105263, "low_lr": 1.0989473684210528e-05, "step": 856 }, { "epoch": 2.2511505588428666, "high_lr": 0.0005494736842105263, "low_lr": 1.0989473684210528e-05, "step": 856 }, { "epoch": 2.2511505588428666, "high_lr": 0.0005494736842105263, "low_lr": 1.0989473684210528e-05, "step": 856 }, { "epoch": 2.2511505588428666, "high_lr": 0.0005494736842105263, "low_lr": 1.0989473684210528e-05, "step": 856 }, { "epoch": 2.2511505588428666, "high_lr": 0.0005494736842105263, "low_lr": 1.0989473684210528e-05, "step": 856 }, { "epoch": 2.2511505588428666, "high_lr": 0.0005494736842105263, "low_lr": 1.0989473684210528e-05, "step": 856 }, { "epoch": 2.2511505588428666, "high_lr": 0.0005494736842105263, "low_lr": 1.0989473684210528e-05, "step": 856 }, { "epoch": 2.2537804076265613, "grad_norm": 1.1744623184204102, "learning_rate": 0.0005489473684210526, "loss": 1.3736, "step": 857 }, { "epoch": 2.2537804076265613, "high_lr": 0.0005489473684210526, "low_lr": 1.0978947368421054e-05, "step": 857 }, { "epoch": 2.2537804076265613, "high_lr": 0.0005489473684210526, "low_lr": 1.0978947368421054e-05, "step": 857 }, { "epoch": 2.2537804076265613, "high_lr": 0.0005489473684210526, "low_lr": 1.0978947368421054e-05, "step": 857 }, { "epoch": 2.2537804076265613, "high_lr": 0.0005489473684210526, "low_lr": 1.0978947368421054e-05, "step": 857 }, { "epoch": 2.2537804076265613, "high_lr": 0.0005489473684210526, "low_lr": 1.0978947368421054e-05, "step": 857 }, { "epoch": 2.2537804076265613, "high_lr": 0.0005489473684210526, "low_lr": 1.0978947368421054e-05, "step": 857 }, { "epoch": 2.2537804076265613, "high_lr": 0.0005489473684210526, "low_lr": 1.0978947368421054e-05, "step": 857 }, { "epoch": 2.2537804076265613, "high_lr": 0.0005489473684210526, "low_lr": 1.0978947368421054e-05, "step": 857 }, { "epoch": 2.2564102564102564, "grad_norm": 1.2681621313095093, "learning_rate": 0.0005484210526315789, "loss": 1.3639, "step": 858 }, { "epoch": 2.2564102564102564, "high_lr": 0.0005484210526315789, "low_lr": 1.0968421052631579e-05, "step": 858 }, { "epoch": 2.2564102564102564, "high_lr": 0.0005484210526315789, "low_lr": 1.0968421052631579e-05, "step": 858 }, { "epoch": 2.2564102564102564, "high_lr": 0.0005484210526315789, "low_lr": 1.0968421052631579e-05, "step": 858 }, { "epoch": 2.2564102564102564, "high_lr": 0.0005484210526315789, "low_lr": 1.0968421052631579e-05, "step": 858 }, { "epoch": 2.2564102564102564, "high_lr": 0.0005484210526315789, "low_lr": 1.0968421052631579e-05, "step": 858 }, { "epoch": 2.2564102564102564, "high_lr": 0.0005484210526315789, "low_lr": 1.0968421052631579e-05, "step": 858 }, { "epoch": 2.2564102564102564, "high_lr": 0.0005484210526315789, "low_lr": 1.0968421052631579e-05, "step": 858 }, { "epoch": 2.2564102564102564, "high_lr": 0.0005484210526315789, "low_lr": 1.0968421052631579e-05, "step": 858 }, { "epoch": 2.2590401051939515, "grad_norm": 1.1985419988632202, "learning_rate": 0.0005478947368421052, "loss": 1.3894, "step": 859 }, { "epoch": 2.2590401051939515, "high_lr": 0.0005478947368421052, "low_lr": 1.0957894736842105e-05, "step": 859 }, { "epoch": 2.2590401051939515, "high_lr": 0.0005478947368421052, "low_lr": 1.0957894736842105e-05, "step": 859 }, { "epoch": 2.2590401051939515, "high_lr": 0.0005478947368421052, "low_lr": 1.0957894736842105e-05, "step": 859 }, { "epoch": 2.2590401051939515, "high_lr": 0.0005478947368421052, "low_lr": 1.0957894736842105e-05, "step": 859 }, { "epoch": 2.2590401051939515, "high_lr": 0.0005478947368421052, "low_lr": 1.0957894736842105e-05, "step": 859 }, { "epoch": 2.2590401051939515, "high_lr": 0.0005478947368421052, "low_lr": 1.0957894736842105e-05, "step": 859 }, { "epoch": 2.2590401051939515, "high_lr": 0.0005478947368421052, "low_lr": 1.0957894736842105e-05, "step": 859 }, { "epoch": 2.2590401051939515, "high_lr": 0.0005478947368421052, "low_lr": 1.0957894736842105e-05, "step": 859 }, { "epoch": 2.261669953977646, "grad_norm": 1.3177270889282227, "learning_rate": 0.0005473684210526317, "loss": 1.3522, "step": 860 }, { "epoch": 2.261669953977646, "high_lr": 0.0005473684210526317, "low_lr": 1.0947368421052633e-05, "step": 860 }, { "epoch": 2.261669953977646, "high_lr": 0.0005473684210526317, "low_lr": 1.0947368421052633e-05, "step": 860 }, { "epoch": 2.261669953977646, "high_lr": 0.0005473684210526317, "low_lr": 1.0947368421052633e-05, "step": 860 }, { "epoch": 2.261669953977646, "high_lr": 0.0005473684210526317, "low_lr": 1.0947368421052633e-05, "step": 860 }, { "epoch": 2.261669953977646, "high_lr": 0.0005473684210526317, "low_lr": 1.0947368421052633e-05, "step": 860 }, { "epoch": 2.261669953977646, "high_lr": 0.0005473684210526317, "low_lr": 1.0947368421052633e-05, "step": 860 }, { "epoch": 2.261669953977646, "high_lr": 0.0005473684210526317, "low_lr": 1.0947368421052633e-05, "step": 860 }, { "epoch": 2.261669953977646, "high_lr": 0.0005473684210526317, "low_lr": 1.0947368421052633e-05, "step": 860 }, { "epoch": 2.2642998027613412, "grad_norm": 1.23194420337677, "learning_rate": 0.000546842105263158, "loss": 1.3465, "step": 861 }, { "epoch": 2.2642998027613412, "high_lr": 0.000546842105263158, "low_lr": 1.093684210526316e-05, "step": 861 }, { "epoch": 2.2642998027613412, "high_lr": 0.000546842105263158, "low_lr": 1.093684210526316e-05, "step": 861 }, { "epoch": 2.2642998027613412, "high_lr": 0.000546842105263158, "low_lr": 1.093684210526316e-05, "step": 861 }, { "epoch": 2.2642998027613412, "high_lr": 0.000546842105263158, "low_lr": 1.093684210526316e-05, "step": 861 }, { "epoch": 2.2642998027613412, "high_lr": 0.000546842105263158, "low_lr": 1.093684210526316e-05, "step": 861 }, { "epoch": 2.2642998027613412, "high_lr": 0.000546842105263158, "low_lr": 1.093684210526316e-05, "step": 861 }, { "epoch": 2.2642998027613412, "high_lr": 0.000546842105263158, "low_lr": 1.093684210526316e-05, "step": 861 }, { "epoch": 2.2642998027613412, "high_lr": 0.000546842105263158, "low_lr": 1.093684210526316e-05, "step": 861 }, { "epoch": 2.2669296515450363, "grad_norm": 1.2162786722183228, "learning_rate": 0.0005463157894736843, "loss": 1.4018, "step": 862 }, { "epoch": 2.2669296515450363, "high_lr": 0.0005463157894736843, "low_lr": 1.0926315789473686e-05, "step": 862 }, { "epoch": 2.2669296515450363, "high_lr": 0.0005463157894736843, "low_lr": 1.0926315789473686e-05, "step": 862 }, { "epoch": 2.2669296515450363, "high_lr": 0.0005463157894736843, "low_lr": 1.0926315789473686e-05, "step": 862 }, { "epoch": 2.2669296515450363, "high_lr": 0.0005463157894736843, "low_lr": 1.0926315789473686e-05, "step": 862 }, { "epoch": 2.2669296515450363, "high_lr": 0.0005463157894736843, "low_lr": 1.0926315789473686e-05, "step": 862 }, { "epoch": 2.2669296515450363, "high_lr": 0.0005463157894736843, "low_lr": 1.0926315789473686e-05, "step": 862 }, { "epoch": 2.2669296515450363, "high_lr": 0.0005463157894736843, "low_lr": 1.0926315789473686e-05, "step": 862 }, { "epoch": 2.2669296515450363, "high_lr": 0.0005463157894736843, "low_lr": 1.0926315789473686e-05, "step": 862 }, { "epoch": 2.269559500328731, "grad_norm": 1.2616527080535889, "learning_rate": 0.0005457894736842105, "loss": 1.4306, "step": 863 }, { "epoch": 2.269559500328731, "high_lr": 0.0005457894736842105, "low_lr": 1.091578947368421e-05, "step": 863 }, { "epoch": 2.269559500328731, "high_lr": 0.0005457894736842105, "low_lr": 1.091578947368421e-05, "step": 863 }, { "epoch": 2.269559500328731, "high_lr": 0.0005457894736842105, "low_lr": 1.091578947368421e-05, "step": 863 }, { "epoch": 2.269559500328731, "high_lr": 0.0005457894736842105, "low_lr": 1.091578947368421e-05, "step": 863 }, { "epoch": 2.269559500328731, "high_lr": 0.0005457894736842105, "low_lr": 1.091578947368421e-05, "step": 863 }, { "epoch": 2.269559500328731, "high_lr": 0.0005457894736842105, "low_lr": 1.091578947368421e-05, "step": 863 }, { "epoch": 2.269559500328731, "high_lr": 0.0005457894736842105, "low_lr": 1.091578947368421e-05, "step": 863 }, { "epoch": 2.269559500328731, "high_lr": 0.0005457894736842105, "low_lr": 1.091578947368421e-05, "step": 863 }, { "epoch": 2.272189349112426, "grad_norm": 1.2469723224639893, "learning_rate": 0.0005452631578947369, "loss": 1.3668, "step": 864 }, { "epoch": 2.272189349112426, "high_lr": 0.0005452631578947369, "low_lr": 1.0905263157894738e-05, "step": 864 }, { "epoch": 2.272189349112426, "high_lr": 0.0005452631578947369, "low_lr": 1.0905263157894738e-05, "step": 864 }, { "epoch": 2.272189349112426, "high_lr": 0.0005452631578947369, "low_lr": 1.0905263157894738e-05, "step": 864 }, { "epoch": 2.272189349112426, "high_lr": 0.0005452631578947369, "low_lr": 1.0905263157894738e-05, "step": 864 }, { "epoch": 2.272189349112426, "high_lr": 0.0005452631578947369, "low_lr": 1.0905263157894738e-05, "step": 864 }, { "epoch": 2.272189349112426, "high_lr": 0.0005452631578947369, "low_lr": 1.0905263157894738e-05, "step": 864 }, { "epoch": 2.272189349112426, "high_lr": 0.0005452631578947369, "low_lr": 1.0905263157894738e-05, "step": 864 }, { "epoch": 2.272189349112426, "high_lr": 0.0005452631578947369, "low_lr": 1.0905263157894738e-05, "step": 864 }, { "epoch": 2.2748191978961207, "grad_norm": 1.2859033346176147, "learning_rate": 0.0005447368421052632, "loss": 1.3792, "step": 865 }, { "epoch": 2.2748191978961207, "high_lr": 0.0005447368421052632, "low_lr": 1.0894736842105265e-05, "step": 865 }, { "epoch": 2.2748191978961207, "high_lr": 0.0005447368421052632, "low_lr": 1.0894736842105265e-05, "step": 865 }, { "epoch": 2.2748191978961207, "high_lr": 0.0005447368421052632, "low_lr": 1.0894736842105265e-05, "step": 865 }, { "epoch": 2.2748191978961207, "high_lr": 0.0005447368421052632, "low_lr": 1.0894736842105265e-05, "step": 865 }, { "epoch": 2.2748191978961207, "high_lr": 0.0005447368421052632, "low_lr": 1.0894736842105265e-05, "step": 865 }, { "epoch": 2.2748191978961207, "high_lr": 0.0005447368421052632, "low_lr": 1.0894736842105265e-05, "step": 865 }, { "epoch": 2.2748191978961207, "high_lr": 0.0005447368421052632, "low_lr": 1.0894736842105265e-05, "step": 865 }, { "epoch": 2.2748191978961207, "high_lr": 0.0005447368421052632, "low_lr": 1.0894736842105265e-05, "step": 865 }, { "epoch": 2.277449046679816, "grad_norm": 1.2813301086425781, "learning_rate": 0.0005442105263157895, "loss": 1.3938, "step": 866 }, { "epoch": 2.277449046679816, "high_lr": 0.0005442105263157895, "low_lr": 1.0884210526315791e-05, "step": 866 }, { "epoch": 2.277449046679816, "high_lr": 0.0005442105263157895, "low_lr": 1.0884210526315791e-05, "step": 866 }, { "epoch": 2.277449046679816, "high_lr": 0.0005442105263157895, "low_lr": 1.0884210526315791e-05, "step": 866 }, { "epoch": 2.277449046679816, "high_lr": 0.0005442105263157895, "low_lr": 1.0884210526315791e-05, "step": 866 }, { "epoch": 2.277449046679816, "high_lr": 0.0005442105263157895, "low_lr": 1.0884210526315791e-05, "step": 866 }, { "epoch": 2.277449046679816, "high_lr": 0.0005442105263157895, "low_lr": 1.0884210526315791e-05, "step": 866 }, { "epoch": 2.277449046679816, "high_lr": 0.0005442105263157895, "low_lr": 1.0884210526315791e-05, "step": 866 }, { "epoch": 2.277449046679816, "high_lr": 0.0005442105263157895, "low_lr": 1.0884210526315791e-05, "step": 866 }, { "epoch": 2.280078895463511, "grad_norm": 1.2008345127105713, "learning_rate": 0.0005436842105263158, "loss": 1.417, "step": 867 }, { "epoch": 2.280078895463511, "high_lr": 0.0005436842105263158, "low_lr": 1.0873684210526316e-05, "step": 867 }, { "epoch": 2.280078895463511, "high_lr": 0.0005436842105263158, "low_lr": 1.0873684210526316e-05, "step": 867 }, { "epoch": 2.280078895463511, "high_lr": 0.0005436842105263158, "low_lr": 1.0873684210526316e-05, "step": 867 }, { "epoch": 2.280078895463511, "high_lr": 0.0005436842105263158, "low_lr": 1.0873684210526316e-05, "step": 867 }, { "epoch": 2.280078895463511, "high_lr": 0.0005436842105263158, "low_lr": 1.0873684210526316e-05, "step": 867 }, { "epoch": 2.280078895463511, "high_lr": 0.0005436842105263158, "low_lr": 1.0873684210526316e-05, "step": 867 }, { "epoch": 2.280078895463511, "high_lr": 0.0005436842105263158, "low_lr": 1.0873684210526316e-05, "step": 867 }, { "epoch": 2.280078895463511, "high_lr": 0.0005436842105263158, "low_lr": 1.0873684210526316e-05, "step": 867 }, { "epoch": 2.2827087442472056, "grad_norm": 1.2014013528823853, "learning_rate": 0.0005431578947368421, "loss": 1.3606, "step": 868 }, { "epoch": 2.2827087442472056, "high_lr": 0.0005431578947368421, "low_lr": 1.0863157894736842e-05, "step": 868 }, { "epoch": 2.2827087442472056, "high_lr": 0.0005431578947368421, "low_lr": 1.0863157894736842e-05, "step": 868 }, { "epoch": 2.2827087442472056, "high_lr": 0.0005431578947368421, "low_lr": 1.0863157894736842e-05, "step": 868 }, { "epoch": 2.2827087442472056, "high_lr": 0.0005431578947368421, "low_lr": 1.0863157894736842e-05, "step": 868 }, { "epoch": 2.2827087442472056, "high_lr": 0.0005431578947368421, "low_lr": 1.0863157894736842e-05, "step": 868 }, { "epoch": 2.2827087442472056, "high_lr": 0.0005431578947368421, "low_lr": 1.0863157894736842e-05, "step": 868 }, { "epoch": 2.2827087442472056, "high_lr": 0.0005431578947368421, "low_lr": 1.0863157894736842e-05, "step": 868 }, { "epoch": 2.2827087442472056, "high_lr": 0.0005431578947368421, "low_lr": 1.0863157894736842e-05, "step": 868 }, { "epoch": 2.2853385930309007, "grad_norm": 1.2660644054412842, "learning_rate": 0.0005426315789473685, "loss": 1.3771, "step": 869 }, { "epoch": 2.2853385930309007, "high_lr": 0.0005426315789473685, "low_lr": 1.085263157894737e-05, "step": 869 }, { "epoch": 2.2853385930309007, "high_lr": 0.0005426315789473685, "low_lr": 1.085263157894737e-05, "step": 869 }, { "epoch": 2.2853385930309007, "high_lr": 0.0005426315789473685, "low_lr": 1.085263157894737e-05, "step": 869 }, { "epoch": 2.2853385930309007, "high_lr": 0.0005426315789473685, "low_lr": 1.085263157894737e-05, "step": 869 }, { "epoch": 2.2853385930309007, "high_lr": 0.0005426315789473685, "low_lr": 1.085263157894737e-05, "step": 869 }, { "epoch": 2.2853385930309007, "high_lr": 0.0005426315789473685, "low_lr": 1.085263157894737e-05, "step": 869 }, { "epoch": 2.2853385930309007, "high_lr": 0.0005426315789473685, "low_lr": 1.085263157894737e-05, "step": 869 }, { "epoch": 2.2853385930309007, "high_lr": 0.0005426315789473685, "low_lr": 1.085263157894737e-05, "step": 869 }, { "epoch": 2.287968441814596, "grad_norm": 1.1866534948349, "learning_rate": 0.0005421052631578948, "loss": 1.4152, "step": 870 }, { "epoch": 2.287968441814596, "high_lr": 0.0005421052631578948, "low_lr": 1.0842105263157896e-05, "step": 870 }, { "epoch": 2.287968441814596, "high_lr": 0.0005421052631578948, "low_lr": 1.0842105263157896e-05, "step": 870 }, { "epoch": 2.287968441814596, "high_lr": 0.0005421052631578948, "low_lr": 1.0842105263157896e-05, "step": 870 }, { "epoch": 2.287968441814596, "high_lr": 0.0005421052631578948, "low_lr": 1.0842105263157896e-05, "step": 870 }, { "epoch": 2.287968441814596, "high_lr": 0.0005421052631578948, "low_lr": 1.0842105263157896e-05, "step": 870 }, { "epoch": 2.287968441814596, "high_lr": 0.0005421052631578948, "low_lr": 1.0842105263157896e-05, "step": 870 }, { "epoch": 2.287968441814596, "high_lr": 0.0005421052631578948, "low_lr": 1.0842105263157896e-05, "step": 870 }, { "epoch": 2.287968441814596, "high_lr": 0.0005421052631578948, "low_lr": 1.0842105263157896e-05, "step": 870 }, { "epoch": 2.2905982905982905, "grad_norm": 1.195812702178955, "learning_rate": 0.000541578947368421, "loss": 1.3509, "step": 871 }, { "epoch": 2.2905982905982905, "high_lr": 0.000541578947368421, "low_lr": 1.0831578947368423e-05, "step": 871 }, { "epoch": 2.2905982905982905, "high_lr": 0.000541578947368421, "low_lr": 1.0831578947368423e-05, "step": 871 }, { "epoch": 2.2905982905982905, "high_lr": 0.000541578947368421, "low_lr": 1.0831578947368423e-05, "step": 871 }, { "epoch": 2.2905982905982905, "high_lr": 0.000541578947368421, "low_lr": 1.0831578947368423e-05, "step": 871 }, { "epoch": 2.2905982905982905, "high_lr": 0.000541578947368421, "low_lr": 1.0831578947368423e-05, "step": 871 }, { "epoch": 2.2905982905982905, "high_lr": 0.000541578947368421, "low_lr": 1.0831578947368423e-05, "step": 871 }, { "epoch": 2.2905982905982905, "high_lr": 0.000541578947368421, "low_lr": 1.0831578947368423e-05, "step": 871 }, { "epoch": 2.2905982905982905, "high_lr": 0.000541578947368421, "low_lr": 1.0831578947368423e-05, "step": 871 }, { "epoch": 2.2932281393819856, "grad_norm": 1.1974976062774658, "learning_rate": 0.0005410526315789473, "loss": 1.3986, "step": 872 }, { "epoch": 2.2932281393819856, "high_lr": 0.0005410526315789473, "low_lr": 1.0821052631578947e-05, "step": 872 }, { "epoch": 2.2932281393819856, "high_lr": 0.0005410526315789473, "low_lr": 1.0821052631578947e-05, "step": 872 }, { "epoch": 2.2932281393819856, "high_lr": 0.0005410526315789473, "low_lr": 1.0821052631578947e-05, "step": 872 }, { "epoch": 2.2932281393819856, "high_lr": 0.0005410526315789473, "low_lr": 1.0821052631578947e-05, "step": 872 }, { "epoch": 2.2932281393819856, "high_lr": 0.0005410526315789473, "low_lr": 1.0821052631578947e-05, "step": 872 }, { "epoch": 2.2932281393819856, "high_lr": 0.0005410526315789473, "low_lr": 1.0821052631578947e-05, "step": 872 }, { "epoch": 2.2932281393819856, "high_lr": 0.0005410526315789473, "low_lr": 1.0821052631578947e-05, "step": 872 }, { "epoch": 2.2932281393819856, "high_lr": 0.0005410526315789473, "low_lr": 1.0821052631578947e-05, "step": 872 }, { "epoch": 2.2958579881656807, "grad_norm": 1.3242113590240479, "learning_rate": 0.0005405263157894736, "loss": 1.3586, "step": 873 }, { "epoch": 2.2958579881656807, "high_lr": 0.0005405263157894736, "low_lr": 1.0810526315789474e-05, "step": 873 }, { "epoch": 2.2958579881656807, "high_lr": 0.0005405263157894736, "low_lr": 1.0810526315789474e-05, "step": 873 }, { "epoch": 2.2958579881656807, "high_lr": 0.0005405263157894736, "low_lr": 1.0810526315789474e-05, "step": 873 }, { "epoch": 2.2958579881656807, "high_lr": 0.0005405263157894736, "low_lr": 1.0810526315789474e-05, "step": 873 }, { "epoch": 2.2958579881656807, "high_lr": 0.0005405263157894736, "low_lr": 1.0810526315789474e-05, "step": 873 }, { "epoch": 2.2958579881656807, "high_lr": 0.0005405263157894736, "low_lr": 1.0810526315789474e-05, "step": 873 }, { "epoch": 2.2958579881656807, "high_lr": 0.0005405263157894736, "low_lr": 1.0810526315789474e-05, "step": 873 }, { "epoch": 2.2958579881656807, "high_lr": 0.0005405263157894736, "low_lr": 1.0810526315789474e-05, "step": 873 }, { "epoch": 2.2984878369493753, "grad_norm": 1.312902569770813, "learning_rate": 0.00054, "loss": 1.3863, "step": 874 }, { "epoch": 2.2984878369493753, "high_lr": 0.00054, "low_lr": 1.0800000000000002e-05, "step": 874 }, { "epoch": 2.2984878369493753, "high_lr": 0.00054, "low_lr": 1.0800000000000002e-05, "step": 874 }, { "epoch": 2.2984878369493753, "high_lr": 0.00054, "low_lr": 1.0800000000000002e-05, "step": 874 }, { "epoch": 2.2984878369493753, "high_lr": 0.00054, "low_lr": 1.0800000000000002e-05, "step": 874 }, { "epoch": 2.2984878369493753, "high_lr": 0.00054, "low_lr": 1.0800000000000002e-05, "step": 874 }, { "epoch": 2.2984878369493753, "high_lr": 0.00054, "low_lr": 1.0800000000000002e-05, "step": 874 }, { "epoch": 2.2984878369493753, "high_lr": 0.00054, "low_lr": 1.0800000000000002e-05, "step": 874 }, { "epoch": 2.2984878369493753, "high_lr": 0.00054, "low_lr": 1.0800000000000002e-05, "step": 874 }, { "epoch": 2.3011176857330704, "grad_norm": 1.3001848459243774, "learning_rate": 0.0005394736842105263, "loss": 1.3902, "step": 875 }, { "epoch": 2.3011176857330704, "high_lr": 0.0005394736842105263, "low_lr": 1.0789473684210528e-05, "step": 875 }, { "epoch": 2.3011176857330704, "high_lr": 0.0005394736842105263, "low_lr": 1.0789473684210528e-05, "step": 875 }, { "epoch": 2.3011176857330704, "high_lr": 0.0005394736842105263, "low_lr": 1.0789473684210528e-05, "step": 875 }, { "epoch": 2.3011176857330704, "high_lr": 0.0005394736842105263, "low_lr": 1.0789473684210528e-05, "step": 875 }, { "epoch": 2.3011176857330704, "high_lr": 0.0005394736842105263, "low_lr": 1.0789473684210528e-05, "step": 875 }, { "epoch": 2.3011176857330704, "high_lr": 0.0005394736842105263, "low_lr": 1.0789473684210528e-05, "step": 875 }, { "epoch": 2.3011176857330704, "high_lr": 0.0005394736842105263, "low_lr": 1.0789473684210528e-05, "step": 875 }, { "epoch": 2.3011176857330704, "high_lr": 0.0005394736842105263, "low_lr": 1.0789473684210528e-05, "step": 875 }, { "epoch": 2.3037475345167655, "grad_norm": 1.207558274269104, "learning_rate": 0.0005389473684210526, "loss": 1.3501, "step": 876 }, { "epoch": 2.3037475345167655, "high_lr": 0.0005389473684210526, "low_lr": 1.0778947368421053e-05, "step": 876 }, { "epoch": 2.3037475345167655, "high_lr": 0.0005389473684210526, "low_lr": 1.0778947368421053e-05, "step": 876 }, { "epoch": 2.3037475345167655, "high_lr": 0.0005389473684210526, "low_lr": 1.0778947368421053e-05, "step": 876 }, { "epoch": 2.3037475345167655, "high_lr": 0.0005389473684210526, "low_lr": 1.0778947368421053e-05, "step": 876 }, { "epoch": 2.3037475345167655, "high_lr": 0.0005389473684210526, "low_lr": 1.0778947368421053e-05, "step": 876 }, { "epoch": 2.3037475345167655, "high_lr": 0.0005389473684210526, "low_lr": 1.0778947368421053e-05, "step": 876 }, { "epoch": 2.3037475345167655, "high_lr": 0.0005389473684210526, "low_lr": 1.0778947368421053e-05, "step": 876 }, { "epoch": 2.3037475345167655, "high_lr": 0.0005389473684210526, "low_lr": 1.0778947368421053e-05, "step": 876 }, { "epoch": 2.30637738330046, "grad_norm": 1.2861137390136719, "learning_rate": 0.000538421052631579, "loss": 1.3996, "step": 877 }, { "epoch": 2.30637738330046, "high_lr": 0.000538421052631579, "low_lr": 1.0768421052631579e-05, "step": 877 }, { "epoch": 2.30637738330046, "high_lr": 0.000538421052631579, "low_lr": 1.0768421052631579e-05, "step": 877 }, { "epoch": 2.30637738330046, "high_lr": 0.000538421052631579, "low_lr": 1.0768421052631579e-05, "step": 877 }, { "epoch": 2.30637738330046, "high_lr": 0.000538421052631579, "low_lr": 1.0768421052631579e-05, "step": 877 }, { "epoch": 2.30637738330046, "high_lr": 0.000538421052631579, "low_lr": 1.0768421052631579e-05, "step": 877 }, { "epoch": 2.30637738330046, "high_lr": 0.000538421052631579, "low_lr": 1.0768421052631579e-05, "step": 877 }, { "epoch": 2.30637738330046, "high_lr": 0.000538421052631579, "low_lr": 1.0768421052631579e-05, "step": 877 }, { "epoch": 2.30637738330046, "high_lr": 0.000538421052631579, "low_lr": 1.0768421052631579e-05, "step": 877 }, { "epoch": 2.3090072320841553, "grad_norm": 1.238488793373108, "learning_rate": 0.0005378947368421054, "loss": 1.3846, "step": 878 }, { "epoch": 2.3090072320841553, "high_lr": 0.0005378947368421054, "low_lr": 1.0757894736842107e-05, "step": 878 }, { "epoch": 2.3090072320841553, "high_lr": 0.0005378947368421054, "low_lr": 1.0757894736842107e-05, "step": 878 }, { "epoch": 2.3090072320841553, "high_lr": 0.0005378947368421054, "low_lr": 1.0757894736842107e-05, "step": 878 }, { "epoch": 2.3090072320841553, "high_lr": 0.0005378947368421054, "low_lr": 1.0757894736842107e-05, "step": 878 }, { "epoch": 2.3090072320841553, "high_lr": 0.0005378947368421054, "low_lr": 1.0757894736842107e-05, "step": 878 }, { "epoch": 2.3090072320841553, "high_lr": 0.0005378947368421054, "low_lr": 1.0757894736842107e-05, "step": 878 }, { "epoch": 2.3090072320841553, "high_lr": 0.0005378947368421054, "low_lr": 1.0757894736842107e-05, "step": 878 }, { "epoch": 2.3090072320841553, "high_lr": 0.0005378947368421054, "low_lr": 1.0757894736842107e-05, "step": 878 }, { "epoch": 2.31163708086785, "grad_norm": 1.239641785621643, "learning_rate": 0.0005373684210526317, "loss": 1.3896, "step": 879 }, { "epoch": 2.31163708086785, "high_lr": 0.0005373684210526317, "low_lr": 1.0747368421052633e-05, "step": 879 }, { "epoch": 2.31163708086785, "high_lr": 0.0005373684210526317, "low_lr": 1.0747368421052633e-05, "step": 879 }, { "epoch": 2.31163708086785, "high_lr": 0.0005373684210526317, "low_lr": 1.0747368421052633e-05, "step": 879 }, { "epoch": 2.31163708086785, "high_lr": 0.0005373684210526317, "low_lr": 1.0747368421052633e-05, "step": 879 }, { "epoch": 2.31163708086785, "high_lr": 0.0005373684210526317, "low_lr": 1.0747368421052633e-05, "step": 879 }, { "epoch": 2.31163708086785, "high_lr": 0.0005373684210526317, "low_lr": 1.0747368421052633e-05, "step": 879 }, { "epoch": 2.31163708086785, "high_lr": 0.0005373684210526317, "low_lr": 1.0747368421052633e-05, "step": 879 }, { "epoch": 2.31163708086785, "high_lr": 0.0005373684210526317, "low_lr": 1.0747368421052633e-05, "step": 879 }, { "epoch": 2.314266929651545, "grad_norm": 1.2383469343185425, "learning_rate": 0.0005368421052631579, "loss": 1.3641, "step": 880 }, { "epoch": 2.314266929651545, "high_lr": 0.0005368421052631579, "low_lr": 1.073684210526316e-05, "step": 880 }, { "epoch": 2.314266929651545, "high_lr": 0.0005368421052631579, "low_lr": 1.073684210526316e-05, "step": 880 }, { "epoch": 2.314266929651545, "high_lr": 0.0005368421052631579, "low_lr": 1.073684210526316e-05, "step": 880 }, { "epoch": 2.314266929651545, "high_lr": 0.0005368421052631579, "low_lr": 1.073684210526316e-05, "step": 880 }, { "epoch": 2.314266929651545, "high_lr": 0.0005368421052631579, "low_lr": 1.073684210526316e-05, "step": 880 }, { "epoch": 2.314266929651545, "high_lr": 0.0005368421052631579, "low_lr": 1.073684210526316e-05, "step": 880 }, { "epoch": 2.314266929651545, "high_lr": 0.0005368421052631579, "low_lr": 1.073684210526316e-05, "step": 880 }, { "epoch": 2.314266929651545, "high_lr": 0.0005368421052631579, "low_lr": 1.073684210526316e-05, "step": 880 }, { "epoch": 2.31689677843524, "grad_norm": 1.268680214881897, "learning_rate": 0.0005363157894736842, "loss": 1.368, "step": 881 }, { "epoch": 2.31689677843524, "high_lr": 0.0005363157894736842, "low_lr": 1.0726315789473684e-05, "step": 881 }, { "epoch": 2.31689677843524, "high_lr": 0.0005363157894736842, "low_lr": 1.0726315789473684e-05, "step": 881 }, { "epoch": 2.31689677843524, "high_lr": 0.0005363157894736842, "low_lr": 1.0726315789473684e-05, "step": 881 }, { "epoch": 2.31689677843524, "high_lr": 0.0005363157894736842, "low_lr": 1.0726315789473684e-05, "step": 881 }, { "epoch": 2.31689677843524, "high_lr": 0.0005363157894736842, "low_lr": 1.0726315789473684e-05, "step": 881 }, { "epoch": 2.31689677843524, "high_lr": 0.0005363157894736842, "low_lr": 1.0726315789473684e-05, "step": 881 }, { "epoch": 2.31689677843524, "high_lr": 0.0005363157894736842, "low_lr": 1.0726315789473684e-05, "step": 881 }, { "epoch": 2.31689677843524, "high_lr": 0.0005363157894736842, "low_lr": 1.0726315789473684e-05, "step": 881 }, { "epoch": 2.3195266272189348, "grad_norm": 1.2960704565048218, "learning_rate": 0.0005357894736842105, "loss": 1.4236, "step": 882 }, { "epoch": 2.3195266272189348, "high_lr": 0.0005357894736842105, "low_lr": 1.071578947368421e-05, "step": 882 }, { "epoch": 2.3195266272189348, "high_lr": 0.0005357894736842105, "low_lr": 1.071578947368421e-05, "step": 882 }, { "epoch": 2.3195266272189348, "high_lr": 0.0005357894736842105, "low_lr": 1.071578947368421e-05, "step": 882 }, { "epoch": 2.3195266272189348, "high_lr": 0.0005357894736842105, "low_lr": 1.071578947368421e-05, "step": 882 }, { "epoch": 2.3195266272189348, "high_lr": 0.0005357894736842105, "low_lr": 1.071578947368421e-05, "step": 882 }, { "epoch": 2.3195266272189348, "high_lr": 0.0005357894736842105, "low_lr": 1.071578947368421e-05, "step": 882 }, { "epoch": 2.3195266272189348, "high_lr": 0.0005357894736842105, "low_lr": 1.071578947368421e-05, "step": 882 }, { "epoch": 2.3195266272189348, "high_lr": 0.0005357894736842105, "low_lr": 1.071578947368421e-05, "step": 882 }, { "epoch": 2.32215647600263, "grad_norm": 1.2486226558685303, "learning_rate": 0.0005352631578947369, "loss": 1.3622, "step": 883 }, { "epoch": 2.32215647600263, "high_lr": 0.0005352631578947369, "low_lr": 1.0705263157894739e-05, "step": 883 }, { "epoch": 2.32215647600263, "high_lr": 0.0005352631578947369, "low_lr": 1.0705263157894739e-05, "step": 883 }, { "epoch": 2.32215647600263, "high_lr": 0.0005352631578947369, "low_lr": 1.0705263157894739e-05, "step": 883 }, { "epoch": 2.32215647600263, "high_lr": 0.0005352631578947369, "low_lr": 1.0705263157894739e-05, "step": 883 }, { "epoch": 2.32215647600263, "high_lr": 0.0005352631578947369, "low_lr": 1.0705263157894739e-05, "step": 883 }, { "epoch": 2.32215647600263, "high_lr": 0.0005352631578947369, "low_lr": 1.0705263157894739e-05, "step": 883 }, { "epoch": 2.32215647600263, "high_lr": 0.0005352631578947369, "low_lr": 1.0705263157894739e-05, "step": 883 }, { "epoch": 2.32215647600263, "high_lr": 0.0005352631578947369, "low_lr": 1.0705263157894739e-05, "step": 883 }, { "epoch": 2.324786324786325, "grad_norm": 1.1381765604019165, "learning_rate": 0.0005347368421052632, "loss": 1.3331, "step": 884 }, { "epoch": 2.324786324786325, "high_lr": 0.0005347368421052632, "low_lr": 1.0694736842105265e-05, "step": 884 }, { "epoch": 2.324786324786325, "high_lr": 0.0005347368421052632, "low_lr": 1.0694736842105265e-05, "step": 884 }, { "epoch": 2.324786324786325, "high_lr": 0.0005347368421052632, "low_lr": 1.0694736842105265e-05, "step": 884 }, { "epoch": 2.324786324786325, "high_lr": 0.0005347368421052632, "low_lr": 1.0694736842105265e-05, "step": 884 }, { "epoch": 2.324786324786325, "high_lr": 0.0005347368421052632, "low_lr": 1.0694736842105265e-05, "step": 884 }, { "epoch": 2.324786324786325, "high_lr": 0.0005347368421052632, "low_lr": 1.0694736842105265e-05, "step": 884 }, { "epoch": 2.324786324786325, "high_lr": 0.0005347368421052632, "low_lr": 1.0694736842105265e-05, "step": 884 }, { "epoch": 2.324786324786325, "high_lr": 0.0005347368421052632, "low_lr": 1.0694736842105265e-05, "step": 884 }, { "epoch": 2.3274161735700196, "grad_norm": 1.3162821531295776, "learning_rate": 0.0005342105263157895, "loss": 1.4184, "step": 885 }, { "epoch": 2.3274161735700196, "high_lr": 0.0005342105263157895, "low_lr": 1.068421052631579e-05, "step": 885 }, { "epoch": 2.3274161735700196, "high_lr": 0.0005342105263157895, "low_lr": 1.068421052631579e-05, "step": 885 }, { "epoch": 2.3274161735700196, "high_lr": 0.0005342105263157895, "low_lr": 1.068421052631579e-05, "step": 885 }, { "epoch": 2.3274161735700196, "high_lr": 0.0005342105263157895, "low_lr": 1.068421052631579e-05, "step": 885 }, { "epoch": 2.3274161735700196, "high_lr": 0.0005342105263157895, "low_lr": 1.068421052631579e-05, "step": 885 }, { "epoch": 2.3274161735700196, "high_lr": 0.0005342105263157895, "low_lr": 1.068421052631579e-05, "step": 885 }, { "epoch": 2.3274161735700196, "high_lr": 0.0005342105263157895, "low_lr": 1.068421052631579e-05, "step": 885 }, { "epoch": 2.3274161735700196, "high_lr": 0.0005342105263157895, "low_lr": 1.068421052631579e-05, "step": 885 }, { "epoch": 2.3300460223537147, "grad_norm": 1.3376330137252808, "learning_rate": 0.0005336842105263158, "loss": 1.419, "step": 886 }, { "epoch": 2.3300460223537147, "high_lr": 0.0005336842105263158, "low_lr": 1.0673684210526316e-05, "step": 886 }, { "epoch": 2.3300460223537147, "high_lr": 0.0005336842105263158, "low_lr": 1.0673684210526316e-05, "step": 886 }, { "epoch": 2.3300460223537147, "high_lr": 0.0005336842105263158, "low_lr": 1.0673684210526316e-05, "step": 886 }, { "epoch": 2.3300460223537147, "high_lr": 0.0005336842105263158, "low_lr": 1.0673684210526316e-05, "step": 886 }, { "epoch": 2.3300460223537147, "high_lr": 0.0005336842105263158, "low_lr": 1.0673684210526316e-05, "step": 886 }, { "epoch": 2.3300460223537147, "high_lr": 0.0005336842105263158, "low_lr": 1.0673684210526316e-05, "step": 886 }, { "epoch": 2.3300460223537147, "high_lr": 0.0005336842105263158, "low_lr": 1.0673684210526316e-05, "step": 886 }, { "epoch": 2.3300460223537147, "high_lr": 0.0005336842105263158, "low_lr": 1.0673684210526316e-05, "step": 886 }, { "epoch": 2.3326758711374094, "grad_norm": 1.2614970207214355, "learning_rate": 0.000533157894736842, "loss": 1.4318, "step": 887 }, { "epoch": 2.3326758711374094, "high_lr": 0.000533157894736842, "low_lr": 1.0663157894736842e-05, "step": 887 }, { "epoch": 2.3326758711374094, "high_lr": 0.000533157894736842, "low_lr": 1.0663157894736842e-05, "step": 887 }, { "epoch": 2.3326758711374094, "high_lr": 0.000533157894736842, "low_lr": 1.0663157894736842e-05, "step": 887 }, { "epoch": 2.3326758711374094, "high_lr": 0.000533157894736842, "low_lr": 1.0663157894736842e-05, "step": 887 }, { "epoch": 2.3326758711374094, "high_lr": 0.000533157894736842, "low_lr": 1.0663157894736842e-05, "step": 887 }, { "epoch": 2.3326758711374094, "high_lr": 0.000533157894736842, "low_lr": 1.0663157894736842e-05, "step": 887 }, { "epoch": 2.3326758711374094, "high_lr": 0.000533157894736842, "low_lr": 1.0663157894736842e-05, "step": 887 }, { "epoch": 2.3326758711374094, "high_lr": 0.000533157894736842, "low_lr": 1.0663157894736842e-05, "step": 887 }, { "epoch": 2.3353057199211045, "grad_norm": 1.267763614654541, "learning_rate": 0.0005326315789473684, "loss": 1.3359, "step": 888 }, { "epoch": 2.3353057199211045, "high_lr": 0.0005326315789473684, "low_lr": 1.065263157894737e-05, "step": 888 }, { "epoch": 2.3353057199211045, "high_lr": 0.0005326315789473684, "low_lr": 1.065263157894737e-05, "step": 888 }, { "epoch": 2.3353057199211045, "high_lr": 0.0005326315789473684, "low_lr": 1.065263157894737e-05, "step": 888 }, { "epoch": 2.3353057199211045, "high_lr": 0.0005326315789473684, "low_lr": 1.065263157894737e-05, "step": 888 }, { "epoch": 2.3353057199211045, "high_lr": 0.0005326315789473684, "low_lr": 1.065263157894737e-05, "step": 888 }, { "epoch": 2.3353057199211045, "high_lr": 0.0005326315789473684, "low_lr": 1.065263157894737e-05, "step": 888 }, { "epoch": 2.3353057199211045, "high_lr": 0.0005326315789473684, "low_lr": 1.065263157894737e-05, "step": 888 }, { "epoch": 2.3353057199211045, "high_lr": 0.0005326315789473684, "low_lr": 1.065263157894737e-05, "step": 888 }, { "epoch": 2.3379355687047996, "grad_norm": 1.2461472749710083, "learning_rate": 0.0005321052631578947, "loss": 1.4038, "step": 889 }, { "epoch": 2.3379355687047996, "high_lr": 0.0005321052631578947, "low_lr": 1.0642105263157897e-05, "step": 889 }, { "epoch": 2.3379355687047996, "high_lr": 0.0005321052631578947, "low_lr": 1.0642105263157897e-05, "step": 889 }, { "epoch": 2.3379355687047996, "high_lr": 0.0005321052631578947, "low_lr": 1.0642105263157897e-05, "step": 889 }, { "epoch": 2.3379355687047996, "high_lr": 0.0005321052631578947, "low_lr": 1.0642105263157897e-05, "step": 889 }, { "epoch": 2.3379355687047996, "high_lr": 0.0005321052631578947, "low_lr": 1.0642105263157897e-05, "step": 889 }, { "epoch": 2.3379355687047996, "high_lr": 0.0005321052631578947, "low_lr": 1.0642105263157897e-05, "step": 889 }, { "epoch": 2.3379355687047996, "high_lr": 0.0005321052631578947, "low_lr": 1.0642105263157897e-05, "step": 889 }, { "epoch": 2.3379355687047996, "high_lr": 0.0005321052631578947, "low_lr": 1.0642105263157897e-05, "step": 889 }, { "epoch": 2.3405654174884942, "grad_norm": 1.2365288734436035, "learning_rate": 0.000531578947368421, "loss": 1.3389, "step": 890 }, { "epoch": 2.3405654174884942, "high_lr": 0.000531578947368421, "low_lr": 1.0631578947368421e-05, "step": 890 }, { "epoch": 2.3405654174884942, "high_lr": 0.000531578947368421, "low_lr": 1.0631578947368421e-05, "step": 890 }, { "epoch": 2.3405654174884942, "high_lr": 0.000531578947368421, "low_lr": 1.0631578947368421e-05, "step": 890 }, { "epoch": 2.3405654174884942, "high_lr": 0.000531578947368421, "low_lr": 1.0631578947368421e-05, "step": 890 }, { "epoch": 2.3405654174884942, "high_lr": 0.000531578947368421, "low_lr": 1.0631578947368421e-05, "step": 890 }, { "epoch": 2.3405654174884942, "high_lr": 0.000531578947368421, "low_lr": 1.0631578947368421e-05, "step": 890 }, { "epoch": 2.3405654174884942, "high_lr": 0.000531578947368421, "low_lr": 1.0631578947368421e-05, "step": 890 }, { "epoch": 2.3405654174884942, "high_lr": 0.000531578947368421, "low_lr": 1.0631578947368421e-05, "step": 890 }, { "epoch": 2.3431952662721893, "grad_norm": 1.289572834968567, "learning_rate": 0.0005310526315789473, "loss": 1.4167, "step": 891 }, { "epoch": 2.3431952662721893, "high_lr": 0.0005310526315789473, "low_lr": 1.0621052631578948e-05, "step": 891 }, { "epoch": 2.3431952662721893, "high_lr": 0.0005310526315789473, "low_lr": 1.0621052631578948e-05, "step": 891 }, { "epoch": 2.3431952662721893, "high_lr": 0.0005310526315789473, "low_lr": 1.0621052631578948e-05, "step": 891 }, { "epoch": 2.3431952662721893, "high_lr": 0.0005310526315789473, "low_lr": 1.0621052631578948e-05, "step": 891 }, { "epoch": 2.3431952662721893, "high_lr": 0.0005310526315789473, "low_lr": 1.0621052631578948e-05, "step": 891 }, { "epoch": 2.3431952662721893, "high_lr": 0.0005310526315789473, "low_lr": 1.0621052631578948e-05, "step": 891 }, { "epoch": 2.3431952662721893, "high_lr": 0.0005310526315789473, "low_lr": 1.0621052631578948e-05, "step": 891 }, { "epoch": 2.3431952662721893, "high_lr": 0.0005310526315789473, "low_lr": 1.0621052631578948e-05, "step": 891 }, { "epoch": 2.3458251150558844, "grad_norm": 1.2580618858337402, "learning_rate": 0.0005305263157894737, "loss": 1.4169, "step": 892 }, { "epoch": 2.3458251150558844, "high_lr": 0.0005305263157894737, "low_lr": 1.0610526315789476e-05, "step": 892 }, { "epoch": 2.3458251150558844, "high_lr": 0.0005305263157894737, "low_lr": 1.0610526315789476e-05, "step": 892 }, { "epoch": 2.3458251150558844, "high_lr": 0.0005305263157894737, "low_lr": 1.0610526315789476e-05, "step": 892 }, { "epoch": 2.3458251150558844, "high_lr": 0.0005305263157894737, "low_lr": 1.0610526315789476e-05, "step": 892 }, { "epoch": 2.3458251150558844, "high_lr": 0.0005305263157894737, "low_lr": 1.0610526315789476e-05, "step": 892 }, { "epoch": 2.3458251150558844, "high_lr": 0.0005305263157894737, "low_lr": 1.0610526315789476e-05, "step": 892 }, { "epoch": 2.3458251150558844, "high_lr": 0.0005305263157894737, "low_lr": 1.0610526315789476e-05, "step": 892 }, { "epoch": 2.3458251150558844, "high_lr": 0.0005305263157894737, "low_lr": 1.0610526315789476e-05, "step": 892 }, { "epoch": 2.348454963839579, "grad_norm": 1.1528156995773315, "learning_rate": 0.0005300000000000001, "loss": 1.3834, "step": 893 }, { "epoch": 2.348454963839579, "high_lr": 0.0005300000000000001, "low_lr": 1.0600000000000002e-05, "step": 893 }, { "epoch": 2.348454963839579, "high_lr": 0.0005300000000000001, "low_lr": 1.0600000000000002e-05, "step": 893 }, { "epoch": 2.348454963839579, "high_lr": 0.0005300000000000001, "low_lr": 1.0600000000000002e-05, "step": 893 }, { "epoch": 2.348454963839579, "high_lr": 0.0005300000000000001, "low_lr": 1.0600000000000002e-05, "step": 893 }, { "epoch": 2.348454963839579, "high_lr": 0.0005300000000000001, "low_lr": 1.0600000000000002e-05, "step": 893 }, { "epoch": 2.348454963839579, "high_lr": 0.0005300000000000001, "low_lr": 1.0600000000000002e-05, "step": 893 }, { "epoch": 2.348454963839579, "high_lr": 0.0005300000000000001, "low_lr": 1.0600000000000002e-05, "step": 893 }, { "epoch": 2.348454963839579, "high_lr": 0.0005300000000000001, "low_lr": 1.0600000000000002e-05, "step": 893 }, { "epoch": 2.351084812623274, "grad_norm": 1.290389895439148, "learning_rate": 0.0005294736842105264, "loss": 1.4016, "step": 894 }, { "epoch": 2.351084812623274, "high_lr": 0.0005294736842105264, "low_lr": 1.0589473684210526e-05, "step": 894 }, { "epoch": 2.351084812623274, "high_lr": 0.0005294736842105264, "low_lr": 1.0589473684210526e-05, "step": 894 }, { "epoch": 2.351084812623274, "high_lr": 0.0005294736842105264, "low_lr": 1.0589473684210526e-05, "step": 894 }, { "epoch": 2.351084812623274, "high_lr": 0.0005294736842105264, "low_lr": 1.0589473684210526e-05, "step": 894 }, { "epoch": 2.351084812623274, "high_lr": 0.0005294736842105264, "low_lr": 1.0589473684210526e-05, "step": 894 }, { "epoch": 2.351084812623274, "high_lr": 0.0005294736842105264, "low_lr": 1.0589473684210526e-05, "step": 894 }, { "epoch": 2.351084812623274, "high_lr": 0.0005294736842105264, "low_lr": 1.0589473684210526e-05, "step": 894 }, { "epoch": 2.351084812623274, "high_lr": 0.0005294736842105264, "low_lr": 1.0589473684210526e-05, "step": 894 }, { "epoch": 2.3537146614069693, "grad_norm": 1.329153060913086, "learning_rate": 0.0005289473684210527, "loss": 1.4091, "step": 895 }, { "epoch": 2.3537146614069693, "high_lr": 0.0005289473684210527, "low_lr": 1.0578947368421053e-05, "step": 895 }, { "epoch": 2.3537146614069693, "high_lr": 0.0005289473684210527, "low_lr": 1.0578947368421053e-05, "step": 895 }, { "epoch": 2.3537146614069693, "high_lr": 0.0005289473684210527, "low_lr": 1.0578947368421053e-05, "step": 895 }, { "epoch": 2.3537146614069693, "high_lr": 0.0005289473684210527, "low_lr": 1.0578947368421053e-05, "step": 895 }, { "epoch": 2.3537146614069693, "high_lr": 0.0005289473684210527, "low_lr": 1.0578947368421053e-05, "step": 895 }, { "epoch": 2.3537146614069693, "high_lr": 0.0005289473684210527, "low_lr": 1.0578947368421053e-05, "step": 895 }, { "epoch": 2.3537146614069693, "high_lr": 0.0005289473684210527, "low_lr": 1.0578947368421053e-05, "step": 895 }, { "epoch": 2.3537146614069693, "high_lr": 0.0005289473684210527, "low_lr": 1.0578947368421053e-05, "step": 895 }, { "epoch": 2.356344510190664, "grad_norm": 1.2408227920532227, "learning_rate": 0.000528421052631579, "loss": 1.3911, "step": 896 }, { "epoch": 2.356344510190664, "high_lr": 0.000528421052631579, "low_lr": 1.0568421052631579e-05, "step": 896 }, { "epoch": 2.356344510190664, "high_lr": 0.000528421052631579, "low_lr": 1.0568421052631579e-05, "step": 896 }, { "epoch": 2.356344510190664, "high_lr": 0.000528421052631579, "low_lr": 1.0568421052631579e-05, "step": 896 }, { "epoch": 2.356344510190664, "high_lr": 0.000528421052631579, "low_lr": 1.0568421052631579e-05, "step": 896 }, { "epoch": 2.356344510190664, "high_lr": 0.000528421052631579, "low_lr": 1.0568421052631579e-05, "step": 896 }, { "epoch": 2.356344510190664, "high_lr": 0.000528421052631579, "low_lr": 1.0568421052631579e-05, "step": 896 }, { "epoch": 2.356344510190664, "high_lr": 0.000528421052631579, "low_lr": 1.0568421052631579e-05, "step": 896 }, { "epoch": 2.356344510190664, "high_lr": 0.000528421052631579, "low_lr": 1.0568421052631579e-05, "step": 896 }, { "epoch": 2.358974358974359, "grad_norm": 1.286737084388733, "learning_rate": 0.0005278947368421053, "loss": 1.4165, "step": 897 }, { "epoch": 2.358974358974359, "high_lr": 0.0005278947368421053, "low_lr": 1.0557894736842107e-05, "step": 897 }, { "epoch": 2.358974358974359, "high_lr": 0.0005278947368421053, "low_lr": 1.0557894736842107e-05, "step": 897 }, { "epoch": 2.358974358974359, "high_lr": 0.0005278947368421053, "low_lr": 1.0557894736842107e-05, "step": 897 }, { "epoch": 2.358974358974359, "high_lr": 0.0005278947368421053, "low_lr": 1.0557894736842107e-05, "step": 897 }, { "epoch": 2.358974358974359, "high_lr": 0.0005278947368421053, "low_lr": 1.0557894736842107e-05, "step": 897 }, { "epoch": 2.358974358974359, "high_lr": 0.0005278947368421053, "low_lr": 1.0557894736842107e-05, "step": 897 }, { "epoch": 2.358974358974359, "high_lr": 0.0005278947368421053, "low_lr": 1.0557894736842107e-05, "step": 897 }, { "epoch": 2.358974358974359, "high_lr": 0.0005278947368421053, "low_lr": 1.0557894736842107e-05, "step": 897 }, { "epoch": 2.361604207758054, "grad_norm": 1.2762736082077026, "learning_rate": 0.0005273684210526316, "loss": 1.403, "step": 898 }, { "epoch": 2.361604207758054, "high_lr": 0.0005273684210526316, "low_lr": 1.0547368421052633e-05, "step": 898 }, { "epoch": 2.361604207758054, "high_lr": 0.0005273684210526316, "low_lr": 1.0547368421052633e-05, "step": 898 }, { "epoch": 2.361604207758054, "high_lr": 0.0005273684210526316, "low_lr": 1.0547368421052633e-05, "step": 898 }, { "epoch": 2.361604207758054, "high_lr": 0.0005273684210526316, "low_lr": 1.0547368421052633e-05, "step": 898 }, { "epoch": 2.361604207758054, "high_lr": 0.0005273684210526316, "low_lr": 1.0547368421052633e-05, "step": 898 }, { "epoch": 2.361604207758054, "high_lr": 0.0005273684210526316, "low_lr": 1.0547368421052633e-05, "step": 898 }, { "epoch": 2.361604207758054, "high_lr": 0.0005273684210526316, "low_lr": 1.0547368421052633e-05, "step": 898 }, { "epoch": 2.361604207758054, "high_lr": 0.0005273684210526316, "low_lr": 1.0547368421052633e-05, "step": 898 }, { "epoch": 2.364234056541749, "grad_norm": 1.1602425575256348, "learning_rate": 0.0005268421052631579, "loss": 1.3799, "step": 899 }, { "epoch": 2.364234056541749, "high_lr": 0.0005268421052631579, "low_lr": 1.0536842105263158e-05, "step": 899 }, { "epoch": 2.364234056541749, "high_lr": 0.0005268421052631579, "low_lr": 1.0536842105263158e-05, "step": 899 }, { "epoch": 2.364234056541749, "high_lr": 0.0005268421052631579, "low_lr": 1.0536842105263158e-05, "step": 899 }, { "epoch": 2.364234056541749, "high_lr": 0.0005268421052631579, "low_lr": 1.0536842105263158e-05, "step": 899 }, { "epoch": 2.364234056541749, "high_lr": 0.0005268421052631579, "low_lr": 1.0536842105263158e-05, "step": 899 }, { "epoch": 2.364234056541749, "high_lr": 0.0005268421052631579, "low_lr": 1.0536842105263158e-05, "step": 899 }, { "epoch": 2.364234056541749, "high_lr": 0.0005268421052631579, "low_lr": 1.0536842105263158e-05, "step": 899 }, { "epoch": 2.364234056541749, "high_lr": 0.0005268421052631579, "low_lr": 1.0536842105263158e-05, "step": 899 }, { "epoch": 2.366863905325444, "grad_norm": 1.2419919967651367, "learning_rate": 0.0005263157894736842, "loss": 1.3534, "step": 900 }, { "epoch": 2.366863905325444, "high_lr": 0.0005263157894736842, "low_lr": 1.0526315789473684e-05, "step": 900 }, { "epoch": 2.366863905325444, "high_lr": 0.0005263157894736842, "low_lr": 1.0526315789473684e-05, "step": 900 }, { "epoch": 2.366863905325444, "high_lr": 0.0005263157894736842, "low_lr": 1.0526315789473684e-05, "step": 900 }, { "epoch": 2.366863905325444, "high_lr": 0.0005263157894736842, "low_lr": 1.0526315789473684e-05, "step": 900 }, { "epoch": 2.366863905325444, "high_lr": 0.0005263157894736842, "low_lr": 1.0526315789473684e-05, "step": 900 }, { "epoch": 2.366863905325444, "high_lr": 0.0005263157894736842, "low_lr": 1.0526315789473684e-05, "step": 900 }, { "epoch": 2.366863905325444, "high_lr": 0.0005263157894736842, "low_lr": 1.0526315789473684e-05, "step": 900 }, { "epoch": 2.366863905325444, "high_lr": 0.0005263157894736842, "low_lr": 1.0526315789473684e-05, "step": 900 }, { "epoch": 2.3694937541091385, "grad_norm": 1.212281346321106, "learning_rate": 0.0005257894736842105, "loss": 1.3761, "step": 901 }, { "epoch": 2.3694937541091385, "high_lr": 0.0005257894736842105, "low_lr": 1.051578947368421e-05, "step": 901 }, { "epoch": 2.3694937541091385, "high_lr": 0.0005257894736842105, "low_lr": 1.051578947368421e-05, "step": 901 }, { "epoch": 2.3694937541091385, "high_lr": 0.0005257894736842105, "low_lr": 1.051578947368421e-05, "step": 901 }, { "epoch": 2.3694937541091385, "high_lr": 0.0005257894736842105, "low_lr": 1.051578947368421e-05, "step": 901 }, { "epoch": 2.3694937541091385, "high_lr": 0.0005257894736842105, "low_lr": 1.051578947368421e-05, "step": 901 }, { "epoch": 2.3694937541091385, "high_lr": 0.0005257894736842105, "low_lr": 1.051578947368421e-05, "step": 901 }, { "epoch": 2.3694937541091385, "high_lr": 0.0005257894736842105, "low_lr": 1.051578947368421e-05, "step": 901 }, { "epoch": 2.3694937541091385, "high_lr": 0.0005257894736842105, "low_lr": 1.051578947368421e-05, "step": 901 }, { "epoch": 2.3721236028928336, "grad_norm": 1.226370930671692, "learning_rate": 0.0005252631578947369, "loss": 1.3435, "step": 902 }, { "epoch": 2.3721236028928336, "high_lr": 0.0005252631578947369, "low_lr": 1.0505263157894739e-05, "step": 902 }, { "epoch": 2.3721236028928336, "high_lr": 0.0005252631578947369, "low_lr": 1.0505263157894739e-05, "step": 902 }, { "epoch": 2.3721236028928336, "high_lr": 0.0005252631578947369, "low_lr": 1.0505263157894739e-05, "step": 902 }, { "epoch": 2.3721236028928336, "high_lr": 0.0005252631578947369, "low_lr": 1.0505263157894739e-05, "step": 902 }, { "epoch": 2.3721236028928336, "high_lr": 0.0005252631578947369, "low_lr": 1.0505263157894739e-05, "step": 902 }, { "epoch": 2.3721236028928336, "high_lr": 0.0005252631578947369, "low_lr": 1.0505263157894739e-05, "step": 902 }, { "epoch": 2.3721236028928336, "high_lr": 0.0005252631578947369, "low_lr": 1.0505263157894739e-05, "step": 902 }, { "epoch": 2.3721236028928336, "high_lr": 0.0005252631578947369, "low_lr": 1.0505263157894739e-05, "step": 902 }, { "epoch": 2.3747534516765287, "grad_norm": 1.2944639921188354, "learning_rate": 0.0005247368421052632, "loss": 1.4133, "step": 903 }, { "epoch": 2.3747534516765287, "high_lr": 0.0005247368421052632, "low_lr": 1.0494736842105263e-05, "step": 903 }, { "epoch": 2.3747534516765287, "high_lr": 0.0005247368421052632, "low_lr": 1.0494736842105263e-05, "step": 903 }, { "epoch": 2.3747534516765287, "high_lr": 0.0005247368421052632, "low_lr": 1.0494736842105263e-05, "step": 903 }, { "epoch": 2.3747534516765287, "high_lr": 0.0005247368421052632, "low_lr": 1.0494736842105263e-05, "step": 903 }, { "epoch": 2.3747534516765287, "high_lr": 0.0005247368421052632, "low_lr": 1.0494736842105263e-05, "step": 903 }, { "epoch": 2.3747534516765287, "high_lr": 0.0005247368421052632, "low_lr": 1.0494736842105263e-05, "step": 903 }, { "epoch": 2.3747534516765287, "high_lr": 0.0005247368421052632, "low_lr": 1.0494736842105263e-05, "step": 903 }, { "epoch": 2.3747534516765287, "high_lr": 0.0005247368421052632, "low_lr": 1.0494736842105263e-05, "step": 903 }, { "epoch": 2.3773833004602234, "grad_norm": 1.2294273376464844, "learning_rate": 0.0005242105263157895, "loss": 1.3538, "step": 904 }, { "epoch": 2.3773833004602234, "high_lr": 0.0005242105263157895, "low_lr": 1.048421052631579e-05, "step": 904 }, { "epoch": 2.3773833004602234, "high_lr": 0.0005242105263157895, "low_lr": 1.048421052631579e-05, "step": 904 }, { "epoch": 2.3773833004602234, "high_lr": 0.0005242105263157895, "low_lr": 1.048421052631579e-05, "step": 904 }, { "epoch": 2.3773833004602234, "high_lr": 0.0005242105263157895, "low_lr": 1.048421052631579e-05, "step": 904 }, { "epoch": 2.3773833004602234, "high_lr": 0.0005242105263157895, "low_lr": 1.048421052631579e-05, "step": 904 }, { "epoch": 2.3773833004602234, "high_lr": 0.0005242105263157895, "low_lr": 1.048421052631579e-05, "step": 904 }, { "epoch": 2.3773833004602234, "high_lr": 0.0005242105263157895, "low_lr": 1.048421052631579e-05, "step": 904 }, { "epoch": 2.3773833004602234, "high_lr": 0.0005242105263157895, "low_lr": 1.048421052631579e-05, "step": 904 }, { "epoch": 2.3800131492439185, "grad_norm": 1.2078107595443726, "learning_rate": 0.0005236842105263157, "loss": 1.3645, "step": 905 }, { "epoch": 2.3800131492439185, "high_lr": 0.0005236842105263157, "low_lr": 1.0473684210526316e-05, "step": 905 }, { "epoch": 2.3800131492439185, "high_lr": 0.0005236842105263157, "low_lr": 1.0473684210526316e-05, "step": 905 }, { "epoch": 2.3800131492439185, "high_lr": 0.0005236842105263157, "low_lr": 1.0473684210526316e-05, "step": 905 }, { "epoch": 2.3800131492439185, "high_lr": 0.0005236842105263157, "low_lr": 1.0473684210526316e-05, "step": 905 }, { "epoch": 2.3800131492439185, "high_lr": 0.0005236842105263157, "low_lr": 1.0473684210526316e-05, "step": 905 }, { "epoch": 2.3800131492439185, "high_lr": 0.0005236842105263157, "low_lr": 1.0473684210526316e-05, "step": 905 }, { "epoch": 2.3800131492439185, "high_lr": 0.0005236842105263157, "low_lr": 1.0473684210526316e-05, "step": 905 }, { "epoch": 2.3800131492439185, "high_lr": 0.0005236842105263157, "low_lr": 1.0473684210526316e-05, "step": 905 }, { "epoch": 2.3826429980276136, "grad_norm": 1.2253119945526123, "learning_rate": 0.0005231578947368421, "loss": 1.3828, "step": 906 }, { "epoch": 2.3826429980276136, "high_lr": 0.0005231578947368421, "low_lr": 1.0463157894736844e-05, "step": 906 }, { "epoch": 2.3826429980276136, "high_lr": 0.0005231578947368421, "low_lr": 1.0463157894736844e-05, "step": 906 }, { "epoch": 2.3826429980276136, "high_lr": 0.0005231578947368421, "low_lr": 1.0463157894736844e-05, "step": 906 }, { "epoch": 2.3826429980276136, "high_lr": 0.0005231578947368421, "low_lr": 1.0463157894736844e-05, "step": 906 }, { "epoch": 2.3826429980276136, "high_lr": 0.0005231578947368421, "low_lr": 1.0463157894736844e-05, "step": 906 }, { "epoch": 2.3826429980276136, "high_lr": 0.0005231578947368421, "low_lr": 1.0463157894736844e-05, "step": 906 }, { "epoch": 2.3826429980276136, "high_lr": 0.0005231578947368421, "low_lr": 1.0463157894736844e-05, "step": 906 }, { "epoch": 2.3826429980276136, "high_lr": 0.0005231578947368421, "low_lr": 1.0463157894736844e-05, "step": 906 }, { "epoch": 2.3852728468113082, "grad_norm": 1.2381949424743652, "learning_rate": 0.0005226315789473684, "loss": 1.4067, "step": 907 }, { "epoch": 2.3852728468113082, "high_lr": 0.0005226315789473684, "low_lr": 1.045263157894737e-05, "step": 907 }, { "epoch": 2.3852728468113082, "high_lr": 0.0005226315789473684, "low_lr": 1.045263157894737e-05, "step": 907 }, { "epoch": 2.3852728468113082, "high_lr": 0.0005226315789473684, "low_lr": 1.045263157894737e-05, "step": 907 }, { "epoch": 2.3852728468113082, "high_lr": 0.0005226315789473684, "low_lr": 1.045263157894737e-05, "step": 907 }, { "epoch": 2.3852728468113082, "high_lr": 0.0005226315789473684, "low_lr": 1.045263157894737e-05, "step": 907 }, { "epoch": 2.3852728468113082, "high_lr": 0.0005226315789473684, "low_lr": 1.045263157894737e-05, "step": 907 }, { "epoch": 2.3852728468113082, "high_lr": 0.0005226315789473684, "low_lr": 1.045263157894737e-05, "step": 907 }, { "epoch": 2.3852728468113082, "high_lr": 0.0005226315789473684, "low_lr": 1.045263157894737e-05, "step": 907 }, { "epoch": 2.3879026955950033, "grad_norm": 1.351272463798523, "learning_rate": 0.0005221052631578947, "loss": 1.4329, "step": 908 }, { "epoch": 2.3879026955950033, "high_lr": 0.0005221052631578947, "low_lr": 1.0442105263157895e-05, "step": 908 }, { "epoch": 2.3879026955950033, "high_lr": 0.0005221052631578947, "low_lr": 1.0442105263157895e-05, "step": 908 }, { "epoch": 2.3879026955950033, "high_lr": 0.0005221052631578947, "low_lr": 1.0442105263157895e-05, "step": 908 }, { "epoch": 2.3879026955950033, "high_lr": 0.0005221052631578947, "low_lr": 1.0442105263157895e-05, "step": 908 }, { "epoch": 2.3879026955950033, "high_lr": 0.0005221052631578947, "low_lr": 1.0442105263157895e-05, "step": 908 }, { "epoch": 2.3879026955950033, "high_lr": 0.0005221052631578947, "low_lr": 1.0442105263157895e-05, "step": 908 }, { "epoch": 2.3879026955950033, "high_lr": 0.0005221052631578947, "low_lr": 1.0442105263157895e-05, "step": 908 }, { "epoch": 2.3879026955950033, "high_lr": 0.0005221052631578947, "low_lr": 1.0442105263157895e-05, "step": 908 }, { "epoch": 2.390532544378698, "grad_norm": 1.2670356035232544, "learning_rate": 0.000521578947368421, "loss": 1.3633, "step": 909 }, { "epoch": 2.390532544378698, "high_lr": 0.000521578947368421, "low_lr": 1.0431578947368421e-05, "step": 909 }, { "epoch": 2.390532544378698, "high_lr": 0.000521578947368421, "low_lr": 1.0431578947368421e-05, "step": 909 }, { "epoch": 2.390532544378698, "high_lr": 0.000521578947368421, "low_lr": 1.0431578947368421e-05, "step": 909 }, { "epoch": 2.390532544378698, "high_lr": 0.000521578947368421, "low_lr": 1.0431578947368421e-05, "step": 909 }, { "epoch": 2.390532544378698, "high_lr": 0.000521578947368421, "low_lr": 1.0431578947368421e-05, "step": 909 }, { "epoch": 2.390532544378698, "high_lr": 0.000521578947368421, "low_lr": 1.0431578947368421e-05, "step": 909 }, { "epoch": 2.390532544378698, "high_lr": 0.000521578947368421, "low_lr": 1.0431578947368421e-05, "step": 909 }, { "epoch": 2.390532544378698, "high_lr": 0.000521578947368421, "low_lr": 1.0431578947368421e-05, "step": 909 }, { "epoch": 2.393162393162393, "grad_norm": 1.2720977067947388, "learning_rate": 0.0005210526315789474, "loss": 1.3497, "step": 910 }, { "epoch": 2.393162393162393, "high_lr": 0.0005210526315789474, "low_lr": 1.0421052631578948e-05, "step": 910 }, { "epoch": 2.393162393162393, "high_lr": 0.0005210526315789474, "low_lr": 1.0421052631578948e-05, "step": 910 }, { "epoch": 2.393162393162393, "high_lr": 0.0005210526315789474, "low_lr": 1.0421052631578948e-05, "step": 910 }, { "epoch": 2.393162393162393, "high_lr": 0.0005210526315789474, "low_lr": 1.0421052631578948e-05, "step": 910 }, { "epoch": 2.393162393162393, "high_lr": 0.0005210526315789474, "low_lr": 1.0421052631578948e-05, "step": 910 }, { "epoch": 2.393162393162393, "high_lr": 0.0005210526315789474, "low_lr": 1.0421052631578948e-05, "step": 910 }, { "epoch": 2.393162393162393, "high_lr": 0.0005210526315789474, "low_lr": 1.0421052631578948e-05, "step": 910 }, { "epoch": 2.393162393162393, "high_lr": 0.0005210526315789474, "low_lr": 1.0421052631578948e-05, "step": 910 }, { "epoch": 2.395792241946088, "grad_norm": 1.2801196575164795, "learning_rate": 0.0005205263157894738, "loss": 1.417, "step": 911 }, { "epoch": 2.395792241946088, "high_lr": 0.0005205263157894738, "low_lr": 1.0410526315789476e-05, "step": 911 }, { "epoch": 2.395792241946088, "high_lr": 0.0005205263157894738, "low_lr": 1.0410526315789476e-05, "step": 911 }, { "epoch": 2.395792241946088, "high_lr": 0.0005205263157894738, "low_lr": 1.0410526315789476e-05, "step": 911 }, { "epoch": 2.395792241946088, "high_lr": 0.0005205263157894738, "low_lr": 1.0410526315789476e-05, "step": 911 }, { "epoch": 2.395792241946088, "high_lr": 0.0005205263157894738, "low_lr": 1.0410526315789476e-05, "step": 911 }, { "epoch": 2.395792241946088, "high_lr": 0.0005205263157894738, "low_lr": 1.0410526315789476e-05, "step": 911 }, { "epoch": 2.395792241946088, "high_lr": 0.0005205263157894738, "low_lr": 1.0410526315789476e-05, "step": 911 }, { "epoch": 2.395792241946088, "high_lr": 0.0005205263157894738, "low_lr": 1.0410526315789476e-05, "step": 911 }, { "epoch": 2.398422090729783, "grad_norm": 1.3025861978530884, "learning_rate": 0.0005200000000000001, "loss": 1.4148, "step": 912 }, { "epoch": 2.398422090729783, "high_lr": 0.0005200000000000001, "low_lr": 1.04e-05, "step": 912 }, { "epoch": 2.398422090729783, "high_lr": 0.0005200000000000001, "low_lr": 1.04e-05, "step": 912 }, { "epoch": 2.398422090729783, "high_lr": 0.0005200000000000001, "low_lr": 1.04e-05, "step": 912 }, { "epoch": 2.398422090729783, "high_lr": 0.0005200000000000001, "low_lr": 1.04e-05, "step": 912 }, { "epoch": 2.398422090729783, "high_lr": 0.0005200000000000001, "low_lr": 1.04e-05, "step": 912 }, { "epoch": 2.398422090729783, "high_lr": 0.0005200000000000001, "low_lr": 1.04e-05, "step": 912 }, { "epoch": 2.398422090729783, "high_lr": 0.0005200000000000001, "low_lr": 1.04e-05, "step": 912 }, { "epoch": 2.398422090729783, "high_lr": 0.0005200000000000001, "low_lr": 1.04e-05, "step": 912 }, { "epoch": 2.401051939513478, "grad_norm": 1.2210677862167358, "learning_rate": 0.0005194736842105263, "loss": 1.34, "step": 913 }, { "epoch": 2.401051939513478, "high_lr": 0.0005194736842105263, "low_lr": 1.0389473684210527e-05, "step": 913 }, { "epoch": 2.401051939513478, "high_lr": 0.0005194736842105263, "low_lr": 1.0389473684210527e-05, "step": 913 }, { "epoch": 2.401051939513478, "high_lr": 0.0005194736842105263, "low_lr": 1.0389473684210527e-05, "step": 913 }, { "epoch": 2.401051939513478, "high_lr": 0.0005194736842105263, "low_lr": 1.0389473684210527e-05, "step": 913 }, { "epoch": 2.401051939513478, "high_lr": 0.0005194736842105263, "low_lr": 1.0389473684210527e-05, "step": 913 }, { "epoch": 2.401051939513478, "high_lr": 0.0005194736842105263, "low_lr": 1.0389473684210527e-05, "step": 913 }, { "epoch": 2.401051939513478, "high_lr": 0.0005194736842105263, "low_lr": 1.0389473684210527e-05, "step": 913 }, { "epoch": 2.401051939513478, "high_lr": 0.0005194736842105263, "low_lr": 1.0389473684210527e-05, "step": 913 }, { "epoch": 2.403681788297173, "grad_norm": 1.3378238677978516, "learning_rate": 0.0005189473684210526, "loss": 1.3899, "step": 914 }, { "epoch": 2.403681788297173, "high_lr": 0.0005189473684210526, "low_lr": 1.0378947368421053e-05, "step": 914 }, { "epoch": 2.403681788297173, "high_lr": 0.0005189473684210526, "low_lr": 1.0378947368421053e-05, "step": 914 }, { "epoch": 2.403681788297173, "high_lr": 0.0005189473684210526, "low_lr": 1.0378947368421053e-05, "step": 914 }, { "epoch": 2.403681788297173, "high_lr": 0.0005189473684210526, "low_lr": 1.0378947368421053e-05, "step": 914 }, { "epoch": 2.403681788297173, "high_lr": 0.0005189473684210526, "low_lr": 1.0378947368421053e-05, "step": 914 }, { "epoch": 2.403681788297173, "high_lr": 0.0005189473684210526, "low_lr": 1.0378947368421053e-05, "step": 914 }, { "epoch": 2.403681788297173, "high_lr": 0.0005189473684210526, "low_lr": 1.0378947368421053e-05, "step": 914 }, { "epoch": 2.403681788297173, "high_lr": 0.0005189473684210526, "low_lr": 1.0378947368421053e-05, "step": 914 }, { "epoch": 2.4063116370808677, "grad_norm": 1.3250514268875122, "learning_rate": 0.0005184210526315789, "loss": 1.4164, "step": 915 }, { "epoch": 2.4063116370808677, "high_lr": 0.0005184210526315789, "low_lr": 1.036842105263158e-05, "step": 915 }, { "epoch": 2.4063116370808677, "high_lr": 0.0005184210526315789, "low_lr": 1.036842105263158e-05, "step": 915 }, { "epoch": 2.4063116370808677, "high_lr": 0.0005184210526315789, "low_lr": 1.036842105263158e-05, "step": 915 }, { "epoch": 2.4063116370808677, "high_lr": 0.0005184210526315789, "low_lr": 1.036842105263158e-05, "step": 915 }, { "epoch": 2.4063116370808677, "high_lr": 0.0005184210526315789, "low_lr": 1.036842105263158e-05, "step": 915 }, { "epoch": 2.4063116370808677, "high_lr": 0.0005184210526315789, "low_lr": 1.036842105263158e-05, "step": 915 }, { "epoch": 2.4063116370808677, "high_lr": 0.0005184210526315789, "low_lr": 1.036842105263158e-05, "step": 915 }, { "epoch": 2.4063116370808677, "high_lr": 0.0005184210526315789, "low_lr": 1.036842105263158e-05, "step": 915 }, { "epoch": 2.408941485864563, "grad_norm": 1.3091120719909668, "learning_rate": 0.0005178947368421053, "loss": 1.3754, "step": 916 }, { "epoch": 2.408941485864563, "high_lr": 0.0005178947368421053, "low_lr": 1.0357894736842107e-05, "step": 916 }, { "epoch": 2.408941485864563, "high_lr": 0.0005178947368421053, "low_lr": 1.0357894736842107e-05, "step": 916 }, { "epoch": 2.408941485864563, "high_lr": 0.0005178947368421053, "low_lr": 1.0357894736842107e-05, "step": 916 }, { "epoch": 2.408941485864563, "high_lr": 0.0005178947368421053, "low_lr": 1.0357894736842107e-05, "step": 916 }, { "epoch": 2.408941485864563, "high_lr": 0.0005178947368421053, "low_lr": 1.0357894736842107e-05, "step": 916 }, { "epoch": 2.408941485864563, "high_lr": 0.0005178947368421053, "low_lr": 1.0357894736842107e-05, "step": 916 }, { "epoch": 2.408941485864563, "high_lr": 0.0005178947368421053, "low_lr": 1.0357894736842107e-05, "step": 916 }, { "epoch": 2.408941485864563, "high_lr": 0.0005178947368421053, "low_lr": 1.0357894736842107e-05, "step": 916 }, { "epoch": 2.411571334648258, "grad_norm": 1.2084747552871704, "learning_rate": 0.0005173684210526316, "loss": 1.3484, "step": 917 }, { "epoch": 2.411571334648258, "high_lr": 0.0005173684210526316, "low_lr": 1.0347368421052632e-05, "step": 917 }, { "epoch": 2.411571334648258, "high_lr": 0.0005173684210526316, "low_lr": 1.0347368421052632e-05, "step": 917 }, { "epoch": 2.411571334648258, "high_lr": 0.0005173684210526316, "low_lr": 1.0347368421052632e-05, "step": 917 }, { "epoch": 2.411571334648258, "high_lr": 0.0005173684210526316, "low_lr": 1.0347368421052632e-05, "step": 917 }, { "epoch": 2.411571334648258, "high_lr": 0.0005173684210526316, "low_lr": 1.0347368421052632e-05, "step": 917 }, { "epoch": 2.411571334648258, "high_lr": 0.0005173684210526316, "low_lr": 1.0347368421052632e-05, "step": 917 }, { "epoch": 2.411571334648258, "high_lr": 0.0005173684210526316, "low_lr": 1.0347368421052632e-05, "step": 917 }, { "epoch": 2.411571334648258, "high_lr": 0.0005173684210526316, "low_lr": 1.0347368421052632e-05, "step": 917 }, { "epoch": 2.4142011834319526, "grad_norm": 1.3047235012054443, "learning_rate": 0.0005168421052631579, "loss": 1.3769, "step": 918 }, { "epoch": 2.4142011834319526, "high_lr": 0.0005168421052631579, "low_lr": 1.0336842105263158e-05, "step": 918 }, { "epoch": 2.4142011834319526, "high_lr": 0.0005168421052631579, "low_lr": 1.0336842105263158e-05, "step": 918 }, { "epoch": 2.4142011834319526, "high_lr": 0.0005168421052631579, "low_lr": 1.0336842105263158e-05, "step": 918 }, { "epoch": 2.4142011834319526, "high_lr": 0.0005168421052631579, "low_lr": 1.0336842105263158e-05, "step": 918 }, { "epoch": 2.4142011834319526, "high_lr": 0.0005168421052631579, "low_lr": 1.0336842105263158e-05, "step": 918 }, { "epoch": 2.4142011834319526, "high_lr": 0.0005168421052631579, "low_lr": 1.0336842105263158e-05, "step": 918 }, { "epoch": 2.4142011834319526, "high_lr": 0.0005168421052631579, "low_lr": 1.0336842105263158e-05, "step": 918 }, { "epoch": 2.4142011834319526, "high_lr": 0.0005168421052631579, "low_lr": 1.0336842105263158e-05, "step": 918 }, { "epoch": 2.4168310322156477, "grad_norm": 1.2231842279434204, "learning_rate": 0.0005163157894736842, "loss": 1.3473, "step": 919 }, { "epoch": 2.4168310322156477, "high_lr": 0.0005163157894736842, "low_lr": 1.0326315789473685e-05, "step": 919 }, { "epoch": 2.4168310322156477, "high_lr": 0.0005163157894736842, "low_lr": 1.0326315789473685e-05, "step": 919 }, { "epoch": 2.4168310322156477, "high_lr": 0.0005163157894736842, "low_lr": 1.0326315789473685e-05, "step": 919 }, { "epoch": 2.4168310322156477, "high_lr": 0.0005163157894736842, "low_lr": 1.0326315789473685e-05, "step": 919 }, { "epoch": 2.4168310322156477, "high_lr": 0.0005163157894736842, "low_lr": 1.0326315789473685e-05, "step": 919 }, { "epoch": 2.4168310322156477, "high_lr": 0.0005163157894736842, "low_lr": 1.0326315789473685e-05, "step": 919 }, { "epoch": 2.4168310322156477, "high_lr": 0.0005163157894736842, "low_lr": 1.0326315789473685e-05, "step": 919 }, { "epoch": 2.4168310322156477, "high_lr": 0.0005163157894736842, "low_lr": 1.0326315789473685e-05, "step": 919 }, { "epoch": 2.4194608809993428, "grad_norm": 1.344902515411377, "learning_rate": 0.0005157894736842106, "loss": 1.4027, "step": 920 }, { "epoch": 2.4194608809993428, "high_lr": 0.0005157894736842106, "low_lr": 1.0315789473684213e-05, "step": 920 }, { "epoch": 2.4194608809993428, "high_lr": 0.0005157894736842106, "low_lr": 1.0315789473684213e-05, "step": 920 }, { "epoch": 2.4194608809993428, "high_lr": 0.0005157894736842106, "low_lr": 1.0315789473684213e-05, "step": 920 }, { "epoch": 2.4194608809993428, "high_lr": 0.0005157894736842106, "low_lr": 1.0315789473684213e-05, "step": 920 }, { "epoch": 2.4194608809993428, "high_lr": 0.0005157894736842106, "low_lr": 1.0315789473684213e-05, "step": 920 }, { "epoch": 2.4194608809993428, "high_lr": 0.0005157894736842106, "low_lr": 1.0315789473684213e-05, "step": 920 }, { "epoch": 2.4194608809993428, "high_lr": 0.0005157894736842106, "low_lr": 1.0315789473684213e-05, "step": 920 }, { "epoch": 2.4194608809993428, "high_lr": 0.0005157894736842106, "low_lr": 1.0315789473684213e-05, "step": 920 }, { "epoch": 2.4220907297830374, "grad_norm": 1.3281110525131226, "learning_rate": 0.0005152631578947369, "loss": 1.3709, "step": 921 }, { "epoch": 2.4220907297830374, "high_lr": 0.0005152631578947369, "low_lr": 1.0305263157894739e-05, "step": 921 }, { "epoch": 2.4220907297830374, "high_lr": 0.0005152631578947369, "low_lr": 1.0305263157894739e-05, "step": 921 }, { "epoch": 2.4220907297830374, "high_lr": 0.0005152631578947369, "low_lr": 1.0305263157894739e-05, "step": 921 }, { "epoch": 2.4220907297830374, "high_lr": 0.0005152631578947369, "low_lr": 1.0305263157894739e-05, "step": 921 }, { "epoch": 2.4220907297830374, "high_lr": 0.0005152631578947369, "low_lr": 1.0305263157894739e-05, "step": 921 }, { "epoch": 2.4220907297830374, "high_lr": 0.0005152631578947369, "low_lr": 1.0305263157894739e-05, "step": 921 }, { "epoch": 2.4220907297830374, "high_lr": 0.0005152631578947369, "low_lr": 1.0305263157894739e-05, "step": 921 }, { "epoch": 2.4220907297830374, "high_lr": 0.0005152631578947369, "low_lr": 1.0305263157894739e-05, "step": 921 }, { "epoch": 2.4247205785667325, "grad_norm": 1.271446943283081, "learning_rate": 0.0005147368421052631, "loss": 1.3206, "step": 922 }, { "epoch": 2.4247205785667325, "high_lr": 0.0005147368421052631, "low_lr": 1.0294736842105264e-05, "step": 922 }, { "epoch": 2.4247205785667325, "high_lr": 0.0005147368421052631, "low_lr": 1.0294736842105264e-05, "step": 922 }, { "epoch": 2.4247205785667325, "high_lr": 0.0005147368421052631, "low_lr": 1.0294736842105264e-05, "step": 922 }, { "epoch": 2.4247205785667325, "high_lr": 0.0005147368421052631, "low_lr": 1.0294736842105264e-05, "step": 922 }, { "epoch": 2.4247205785667325, "high_lr": 0.0005147368421052631, "low_lr": 1.0294736842105264e-05, "step": 922 }, { "epoch": 2.4247205785667325, "high_lr": 0.0005147368421052631, "low_lr": 1.0294736842105264e-05, "step": 922 }, { "epoch": 2.4247205785667325, "high_lr": 0.0005147368421052631, "low_lr": 1.0294736842105264e-05, "step": 922 }, { "epoch": 2.4247205785667325, "high_lr": 0.0005147368421052631, "low_lr": 1.0294736842105264e-05, "step": 922 }, { "epoch": 2.427350427350427, "grad_norm": 1.2340179681777954, "learning_rate": 0.0005142105263157894, "loss": 1.4087, "step": 923 }, { "epoch": 2.427350427350427, "high_lr": 0.0005142105263157894, "low_lr": 1.028421052631579e-05, "step": 923 }, { "epoch": 2.427350427350427, "high_lr": 0.0005142105263157894, "low_lr": 1.028421052631579e-05, "step": 923 }, { "epoch": 2.427350427350427, "high_lr": 0.0005142105263157894, "low_lr": 1.028421052631579e-05, "step": 923 }, { "epoch": 2.427350427350427, "high_lr": 0.0005142105263157894, "low_lr": 1.028421052631579e-05, "step": 923 }, { "epoch": 2.427350427350427, "high_lr": 0.0005142105263157894, "low_lr": 1.028421052631579e-05, "step": 923 }, { "epoch": 2.427350427350427, "high_lr": 0.0005142105263157894, "low_lr": 1.028421052631579e-05, "step": 923 }, { "epoch": 2.427350427350427, "high_lr": 0.0005142105263157894, "low_lr": 1.028421052631579e-05, "step": 923 }, { "epoch": 2.427350427350427, "high_lr": 0.0005142105263157894, "low_lr": 1.028421052631579e-05, "step": 923 }, { "epoch": 2.4299802761341223, "grad_norm": 1.2009501457214355, "learning_rate": 0.0005136842105263157, "loss": 1.362, "step": 924 }, { "epoch": 2.4299802761341223, "high_lr": 0.0005136842105263157, "low_lr": 1.0273684210526316e-05, "step": 924 }, { "epoch": 2.4299802761341223, "high_lr": 0.0005136842105263157, "low_lr": 1.0273684210526316e-05, "step": 924 }, { "epoch": 2.4299802761341223, "high_lr": 0.0005136842105263157, "low_lr": 1.0273684210526316e-05, "step": 924 }, { "epoch": 2.4299802761341223, "high_lr": 0.0005136842105263157, "low_lr": 1.0273684210526316e-05, "step": 924 }, { "epoch": 2.4299802761341223, "high_lr": 0.0005136842105263157, "low_lr": 1.0273684210526316e-05, "step": 924 }, { "epoch": 2.4299802761341223, "high_lr": 0.0005136842105263157, "low_lr": 1.0273684210526316e-05, "step": 924 }, { "epoch": 2.4299802761341223, "high_lr": 0.0005136842105263157, "low_lr": 1.0273684210526316e-05, "step": 924 }, { "epoch": 2.4299802761341223, "high_lr": 0.0005136842105263157, "low_lr": 1.0273684210526316e-05, "step": 924 }, { "epoch": 2.4326101249178174, "grad_norm": 1.222140908241272, "learning_rate": 0.0005131578947368421, "loss": 1.3997, "step": 925 }, { "epoch": 2.4326101249178174, "high_lr": 0.0005131578947368421, "low_lr": 1.0263157894736844e-05, "step": 925 }, { "epoch": 2.4326101249178174, "high_lr": 0.0005131578947368421, "low_lr": 1.0263157894736844e-05, "step": 925 }, { "epoch": 2.4326101249178174, "high_lr": 0.0005131578947368421, "low_lr": 1.0263157894736844e-05, "step": 925 }, { "epoch": 2.4326101249178174, "high_lr": 0.0005131578947368421, "low_lr": 1.0263157894736844e-05, "step": 925 }, { "epoch": 2.4326101249178174, "high_lr": 0.0005131578947368421, "low_lr": 1.0263157894736844e-05, "step": 925 }, { "epoch": 2.4326101249178174, "high_lr": 0.0005131578947368421, "low_lr": 1.0263157894736844e-05, "step": 925 }, { "epoch": 2.4326101249178174, "high_lr": 0.0005131578947368421, "low_lr": 1.0263157894736844e-05, "step": 925 }, { "epoch": 2.4326101249178174, "high_lr": 0.0005131578947368421, "low_lr": 1.0263157894736844e-05, "step": 925 }, { "epoch": 2.435239973701512, "grad_norm": 1.2842364311218262, "learning_rate": 0.0005126315789473685, "loss": 1.3655, "step": 926 }, { "epoch": 2.435239973701512, "high_lr": 0.0005126315789473685, "low_lr": 1.0252631578947369e-05, "step": 926 }, { "epoch": 2.435239973701512, "high_lr": 0.0005126315789473685, "low_lr": 1.0252631578947369e-05, "step": 926 }, { "epoch": 2.435239973701512, "high_lr": 0.0005126315789473685, "low_lr": 1.0252631578947369e-05, "step": 926 }, { "epoch": 2.435239973701512, "high_lr": 0.0005126315789473685, "low_lr": 1.0252631578947369e-05, "step": 926 }, { "epoch": 2.435239973701512, "high_lr": 0.0005126315789473685, "low_lr": 1.0252631578947369e-05, "step": 926 }, { "epoch": 2.435239973701512, "high_lr": 0.0005126315789473685, "low_lr": 1.0252631578947369e-05, "step": 926 }, { "epoch": 2.435239973701512, "high_lr": 0.0005126315789473685, "low_lr": 1.0252631578947369e-05, "step": 926 }, { "epoch": 2.435239973701512, "high_lr": 0.0005126315789473685, "low_lr": 1.0252631578947369e-05, "step": 926 }, { "epoch": 2.437869822485207, "grad_norm": 1.326341986656189, "learning_rate": 0.0005121052631578948, "loss": 1.4301, "step": 927 }, { "epoch": 2.437869822485207, "high_lr": 0.0005121052631578948, "low_lr": 1.0242105263157895e-05, "step": 927 }, { "epoch": 2.437869822485207, "high_lr": 0.0005121052631578948, "low_lr": 1.0242105263157895e-05, "step": 927 }, { "epoch": 2.437869822485207, "high_lr": 0.0005121052631578948, "low_lr": 1.0242105263157895e-05, "step": 927 }, { "epoch": 2.437869822485207, "high_lr": 0.0005121052631578948, "low_lr": 1.0242105263157895e-05, "step": 927 }, { "epoch": 2.437869822485207, "high_lr": 0.0005121052631578948, "low_lr": 1.0242105263157895e-05, "step": 927 }, { "epoch": 2.437869822485207, "high_lr": 0.0005121052631578948, "low_lr": 1.0242105263157895e-05, "step": 927 }, { "epoch": 2.437869822485207, "high_lr": 0.0005121052631578948, "low_lr": 1.0242105263157895e-05, "step": 927 }, { "epoch": 2.437869822485207, "high_lr": 0.0005121052631578948, "low_lr": 1.0242105263157895e-05, "step": 927 }, { "epoch": 2.440499671268902, "grad_norm": 1.2571117877960205, "learning_rate": 0.0005115789473684211, "loss": 1.4242, "step": 928 }, { "epoch": 2.440499671268902, "high_lr": 0.0005115789473684211, "low_lr": 1.0231578947368422e-05, "step": 928 }, { "epoch": 2.440499671268902, "high_lr": 0.0005115789473684211, "low_lr": 1.0231578947368422e-05, "step": 928 }, { "epoch": 2.440499671268902, "high_lr": 0.0005115789473684211, "low_lr": 1.0231578947368422e-05, "step": 928 }, { "epoch": 2.440499671268902, "high_lr": 0.0005115789473684211, "low_lr": 1.0231578947368422e-05, "step": 928 }, { "epoch": 2.440499671268902, "high_lr": 0.0005115789473684211, "low_lr": 1.0231578947368422e-05, "step": 928 }, { "epoch": 2.440499671268902, "high_lr": 0.0005115789473684211, "low_lr": 1.0231578947368422e-05, "step": 928 }, { "epoch": 2.440499671268902, "high_lr": 0.0005115789473684211, "low_lr": 1.0231578947368422e-05, "step": 928 }, { "epoch": 2.440499671268902, "high_lr": 0.0005115789473684211, "low_lr": 1.0231578947368422e-05, "step": 928 }, { "epoch": 2.443129520052597, "grad_norm": 1.199884057044983, "learning_rate": 0.0005110526315789474, "loss": 1.3604, "step": 929 }, { "epoch": 2.443129520052597, "high_lr": 0.0005110526315789474, "low_lr": 1.0221052631578948e-05, "step": 929 }, { "epoch": 2.443129520052597, "high_lr": 0.0005110526315789474, "low_lr": 1.0221052631578948e-05, "step": 929 }, { "epoch": 2.443129520052597, "high_lr": 0.0005110526315789474, "low_lr": 1.0221052631578948e-05, "step": 929 }, { "epoch": 2.443129520052597, "high_lr": 0.0005110526315789474, "low_lr": 1.0221052631578948e-05, "step": 929 }, { "epoch": 2.443129520052597, "high_lr": 0.0005110526315789474, "low_lr": 1.0221052631578948e-05, "step": 929 }, { "epoch": 2.443129520052597, "high_lr": 0.0005110526315789474, "low_lr": 1.0221052631578948e-05, "step": 929 }, { "epoch": 2.443129520052597, "high_lr": 0.0005110526315789474, "low_lr": 1.0221052631578948e-05, "step": 929 }, { "epoch": 2.443129520052597, "high_lr": 0.0005110526315789474, "low_lr": 1.0221052631578948e-05, "step": 929 }, { "epoch": 2.445759368836292, "grad_norm": 1.2833794355392456, "learning_rate": 0.0005105263157894738, "loss": 1.4088, "step": 930 }, { "epoch": 2.445759368836292, "high_lr": 0.0005105263157894738, "low_lr": 1.0210526315789476e-05, "step": 930 }, { "epoch": 2.445759368836292, "high_lr": 0.0005105263157894738, "low_lr": 1.0210526315789476e-05, "step": 930 }, { "epoch": 2.445759368836292, "high_lr": 0.0005105263157894738, "low_lr": 1.0210526315789476e-05, "step": 930 }, { "epoch": 2.445759368836292, "high_lr": 0.0005105263157894738, "low_lr": 1.0210526315789476e-05, "step": 930 }, { "epoch": 2.445759368836292, "high_lr": 0.0005105263157894738, "low_lr": 1.0210526315789476e-05, "step": 930 }, { "epoch": 2.445759368836292, "high_lr": 0.0005105263157894738, "low_lr": 1.0210526315789476e-05, "step": 930 }, { "epoch": 2.445759368836292, "high_lr": 0.0005105263157894738, "low_lr": 1.0210526315789476e-05, "step": 930 }, { "epoch": 2.445759368836292, "high_lr": 0.0005105263157894738, "low_lr": 1.0210526315789476e-05, "step": 930 }, { "epoch": 2.4483892176199866, "grad_norm": 1.1905300617218018, "learning_rate": 0.00051, "loss": 1.3732, "step": 931 }, { "epoch": 2.4483892176199866, "high_lr": 0.00051, "low_lr": 1.02e-05, "step": 931 }, { "epoch": 2.4483892176199866, "high_lr": 0.00051, "low_lr": 1.02e-05, "step": 931 }, { "epoch": 2.4483892176199866, "high_lr": 0.00051, "low_lr": 1.02e-05, "step": 931 }, { "epoch": 2.4483892176199866, "high_lr": 0.00051, "low_lr": 1.02e-05, "step": 931 }, { "epoch": 2.4483892176199866, "high_lr": 0.00051, "low_lr": 1.02e-05, "step": 931 }, { "epoch": 2.4483892176199866, "high_lr": 0.00051, "low_lr": 1.02e-05, "step": 931 }, { "epoch": 2.4483892176199866, "high_lr": 0.00051, "low_lr": 1.02e-05, "step": 931 }, { "epoch": 2.4483892176199866, "high_lr": 0.00051, "low_lr": 1.02e-05, "step": 931 }, { "epoch": 2.4510190664036817, "grad_norm": 1.2222079038619995, "learning_rate": 0.0005094736842105263, "loss": 1.3916, "step": 932 }, { "epoch": 2.4510190664036817, "high_lr": 0.0005094736842105263, "low_lr": 1.0189473684210527e-05, "step": 932 }, { "epoch": 2.4510190664036817, "high_lr": 0.0005094736842105263, "low_lr": 1.0189473684210527e-05, "step": 932 }, { "epoch": 2.4510190664036817, "high_lr": 0.0005094736842105263, "low_lr": 1.0189473684210527e-05, "step": 932 }, { "epoch": 2.4510190664036817, "high_lr": 0.0005094736842105263, "low_lr": 1.0189473684210527e-05, "step": 932 }, { "epoch": 2.4510190664036817, "high_lr": 0.0005094736842105263, "low_lr": 1.0189473684210527e-05, "step": 932 }, { "epoch": 2.4510190664036817, "high_lr": 0.0005094736842105263, "low_lr": 1.0189473684210527e-05, "step": 932 }, { "epoch": 2.4510190664036817, "high_lr": 0.0005094736842105263, "low_lr": 1.0189473684210527e-05, "step": 932 }, { "epoch": 2.4510190664036817, "high_lr": 0.0005094736842105263, "low_lr": 1.0189473684210527e-05, "step": 932 }, { "epoch": 2.453648915187377, "grad_norm": 1.1991325616836548, "learning_rate": 0.0005089473684210526, "loss": 1.3654, "step": 933 }, { "epoch": 2.453648915187377, "high_lr": 0.0005089473684210526, "low_lr": 1.0178947368421053e-05, "step": 933 }, { "epoch": 2.453648915187377, "high_lr": 0.0005089473684210526, "low_lr": 1.0178947368421053e-05, "step": 933 }, { "epoch": 2.453648915187377, "high_lr": 0.0005089473684210526, "low_lr": 1.0178947368421053e-05, "step": 933 }, { "epoch": 2.453648915187377, "high_lr": 0.0005089473684210526, "low_lr": 1.0178947368421053e-05, "step": 933 }, { "epoch": 2.453648915187377, "high_lr": 0.0005089473684210526, "low_lr": 1.0178947368421053e-05, "step": 933 }, { "epoch": 2.453648915187377, "high_lr": 0.0005089473684210526, "low_lr": 1.0178947368421053e-05, "step": 933 }, { "epoch": 2.453648915187377, "high_lr": 0.0005089473684210526, "low_lr": 1.0178947368421053e-05, "step": 933 }, { "epoch": 2.453648915187377, "high_lr": 0.0005089473684210526, "low_lr": 1.0178947368421053e-05, "step": 933 }, { "epoch": 2.4562787639710715, "grad_norm": 1.1947762966156006, "learning_rate": 0.000508421052631579, "loss": 1.4105, "step": 934 }, { "epoch": 2.4562787639710715, "high_lr": 0.000508421052631579, "low_lr": 1.0168421052631581e-05, "step": 934 }, { "epoch": 2.4562787639710715, "high_lr": 0.000508421052631579, "low_lr": 1.0168421052631581e-05, "step": 934 }, { "epoch": 2.4562787639710715, "high_lr": 0.000508421052631579, "low_lr": 1.0168421052631581e-05, "step": 934 }, { "epoch": 2.4562787639710715, "high_lr": 0.000508421052631579, "low_lr": 1.0168421052631581e-05, "step": 934 }, { "epoch": 2.4562787639710715, "high_lr": 0.000508421052631579, "low_lr": 1.0168421052631581e-05, "step": 934 }, { "epoch": 2.4562787639710715, "high_lr": 0.000508421052631579, "low_lr": 1.0168421052631581e-05, "step": 934 }, { "epoch": 2.4562787639710715, "high_lr": 0.000508421052631579, "low_lr": 1.0168421052631581e-05, "step": 934 }, { "epoch": 2.4562787639710715, "high_lr": 0.000508421052631579, "low_lr": 1.0168421052631581e-05, "step": 934 }, { "epoch": 2.4589086127547666, "grad_norm": 1.3332761526107788, "learning_rate": 0.0005078947368421053, "loss": 1.4219, "step": 935 }, { "epoch": 2.4589086127547666, "high_lr": 0.0005078947368421053, "low_lr": 1.0157894736842106e-05, "step": 935 }, { "epoch": 2.4589086127547666, "high_lr": 0.0005078947368421053, "low_lr": 1.0157894736842106e-05, "step": 935 }, { "epoch": 2.4589086127547666, "high_lr": 0.0005078947368421053, "low_lr": 1.0157894736842106e-05, "step": 935 }, { "epoch": 2.4589086127547666, "high_lr": 0.0005078947368421053, "low_lr": 1.0157894736842106e-05, "step": 935 }, { "epoch": 2.4589086127547666, "high_lr": 0.0005078947368421053, "low_lr": 1.0157894736842106e-05, "step": 935 }, { "epoch": 2.4589086127547666, "high_lr": 0.0005078947368421053, "low_lr": 1.0157894736842106e-05, "step": 935 }, { "epoch": 2.4589086127547666, "high_lr": 0.0005078947368421053, "low_lr": 1.0157894736842106e-05, "step": 935 }, { "epoch": 2.4589086127547666, "high_lr": 0.0005078947368421053, "low_lr": 1.0157894736842106e-05, "step": 935 }, { "epoch": 2.4615384615384617, "grad_norm": 1.2377543449401855, "learning_rate": 0.0005073684210526316, "loss": 1.3687, "step": 936 }, { "epoch": 2.4615384615384617, "high_lr": 0.0005073684210526316, "low_lr": 1.0147368421052632e-05, "step": 936 }, { "epoch": 2.4615384615384617, "high_lr": 0.0005073684210526316, "low_lr": 1.0147368421052632e-05, "step": 936 }, { "epoch": 2.4615384615384617, "high_lr": 0.0005073684210526316, "low_lr": 1.0147368421052632e-05, "step": 936 }, { "epoch": 2.4615384615384617, "high_lr": 0.0005073684210526316, "low_lr": 1.0147368421052632e-05, "step": 936 }, { "epoch": 2.4615384615384617, "high_lr": 0.0005073684210526316, "low_lr": 1.0147368421052632e-05, "step": 936 }, { "epoch": 2.4615384615384617, "high_lr": 0.0005073684210526316, "low_lr": 1.0147368421052632e-05, "step": 936 }, { "epoch": 2.4615384615384617, "high_lr": 0.0005073684210526316, "low_lr": 1.0147368421052632e-05, "step": 936 }, { "epoch": 2.4615384615384617, "high_lr": 0.0005073684210526316, "low_lr": 1.0147368421052632e-05, "step": 936 }, { "epoch": 2.4641683103221563, "grad_norm": 1.1500272750854492, "learning_rate": 0.0005068421052631579, "loss": 1.391, "step": 937 }, { "epoch": 2.4641683103221563, "high_lr": 0.0005068421052631579, "low_lr": 1.0136842105263159e-05, "step": 937 }, { "epoch": 2.4641683103221563, "high_lr": 0.0005068421052631579, "low_lr": 1.0136842105263159e-05, "step": 937 }, { "epoch": 2.4641683103221563, "high_lr": 0.0005068421052631579, "low_lr": 1.0136842105263159e-05, "step": 937 }, { "epoch": 2.4641683103221563, "high_lr": 0.0005068421052631579, "low_lr": 1.0136842105263159e-05, "step": 937 }, { "epoch": 2.4641683103221563, "high_lr": 0.0005068421052631579, "low_lr": 1.0136842105263159e-05, "step": 937 }, { "epoch": 2.4641683103221563, "high_lr": 0.0005068421052631579, "low_lr": 1.0136842105263159e-05, "step": 937 }, { "epoch": 2.4641683103221563, "high_lr": 0.0005068421052631579, "low_lr": 1.0136842105263159e-05, "step": 937 }, { "epoch": 2.4641683103221563, "high_lr": 0.0005068421052631579, "low_lr": 1.0136842105263159e-05, "step": 937 }, { "epoch": 2.4667981591058514, "grad_norm": 1.2336149215698242, "learning_rate": 0.0005063157894736841, "loss": 1.3741, "step": 938 }, { "epoch": 2.4667981591058514, "high_lr": 0.0005063157894736841, "low_lr": 1.0126315789473685e-05, "step": 938 }, { "epoch": 2.4667981591058514, "high_lr": 0.0005063157894736841, "low_lr": 1.0126315789473685e-05, "step": 938 }, { "epoch": 2.4667981591058514, "high_lr": 0.0005063157894736841, "low_lr": 1.0126315789473685e-05, "step": 938 }, { "epoch": 2.4667981591058514, "high_lr": 0.0005063157894736841, "low_lr": 1.0126315789473685e-05, "step": 938 }, { "epoch": 2.4667981591058514, "high_lr": 0.0005063157894736841, "low_lr": 1.0126315789473685e-05, "step": 938 }, { "epoch": 2.4667981591058514, "high_lr": 0.0005063157894736841, "low_lr": 1.0126315789473685e-05, "step": 938 }, { "epoch": 2.4667981591058514, "high_lr": 0.0005063157894736841, "low_lr": 1.0126315789473685e-05, "step": 938 }, { "epoch": 2.4667981591058514, "high_lr": 0.0005063157894736841, "low_lr": 1.0126315789473685e-05, "step": 938 }, { "epoch": 2.4694280078895465, "grad_norm": 1.2014271020889282, "learning_rate": 0.0005057894736842105, "loss": 1.3637, "step": 939 }, { "epoch": 2.4694280078895465, "high_lr": 0.0005057894736842105, "low_lr": 1.0115789473684213e-05, "step": 939 }, { "epoch": 2.4694280078895465, "high_lr": 0.0005057894736842105, "low_lr": 1.0115789473684213e-05, "step": 939 }, { "epoch": 2.4694280078895465, "high_lr": 0.0005057894736842105, "low_lr": 1.0115789473684213e-05, "step": 939 }, { "epoch": 2.4694280078895465, "high_lr": 0.0005057894736842105, "low_lr": 1.0115789473684213e-05, "step": 939 }, { "epoch": 2.4694280078895465, "high_lr": 0.0005057894736842105, "low_lr": 1.0115789473684213e-05, "step": 939 }, { "epoch": 2.4694280078895465, "high_lr": 0.0005057894736842105, "low_lr": 1.0115789473684213e-05, "step": 939 }, { "epoch": 2.4694280078895465, "high_lr": 0.0005057894736842105, "low_lr": 1.0115789473684213e-05, "step": 939 }, { "epoch": 2.4694280078895465, "high_lr": 0.0005057894736842105, "low_lr": 1.0115789473684213e-05, "step": 939 }, { "epoch": 2.472057856673241, "grad_norm": 1.2425811290740967, "learning_rate": 0.0005052631578947368, "loss": 1.404, "step": 940 }, { "epoch": 2.472057856673241, "high_lr": 0.0005052631578947368, "low_lr": 1.0105263157894738e-05, "step": 940 }, { "epoch": 2.472057856673241, "high_lr": 0.0005052631578947368, "low_lr": 1.0105263157894738e-05, "step": 940 }, { "epoch": 2.472057856673241, "high_lr": 0.0005052631578947368, "low_lr": 1.0105263157894738e-05, "step": 940 }, { "epoch": 2.472057856673241, "high_lr": 0.0005052631578947368, "low_lr": 1.0105263157894738e-05, "step": 940 }, { "epoch": 2.472057856673241, "high_lr": 0.0005052631578947368, "low_lr": 1.0105263157894738e-05, "step": 940 }, { "epoch": 2.472057856673241, "high_lr": 0.0005052631578947368, "low_lr": 1.0105263157894738e-05, "step": 940 }, { "epoch": 2.472057856673241, "high_lr": 0.0005052631578947368, "low_lr": 1.0105263157894738e-05, "step": 940 }, { "epoch": 2.472057856673241, "high_lr": 0.0005052631578947368, "low_lr": 1.0105263157894738e-05, "step": 940 }, { "epoch": 2.4746877054569363, "grad_norm": 1.2446467876434326, "learning_rate": 0.0005047368421052631, "loss": 1.3615, "step": 941 }, { "epoch": 2.4746877054569363, "high_lr": 0.0005047368421052631, "low_lr": 1.0094736842105264e-05, "step": 941 }, { "epoch": 2.4746877054569363, "high_lr": 0.0005047368421052631, "low_lr": 1.0094736842105264e-05, "step": 941 }, { "epoch": 2.4746877054569363, "high_lr": 0.0005047368421052631, "low_lr": 1.0094736842105264e-05, "step": 941 }, { "epoch": 2.4746877054569363, "high_lr": 0.0005047368421052631, "low_lr": 1.0094736842105264e-05, "step": 941 }, { "epoch": 2.4746877054569363, "high_lr": 0.0005047368421052631, "low_lr": 1.0094736842105264e-05, "step": 941 }, { "epoch": 2.4746877054569363, "high_lr": 0.0005047368421052631, "low_lr": 1.0094736842105264e-05, "step": 941 }, { "epoch": 2.4746877054569363, "high_lr": 0.0005047368421052631, "low_lr": 1.0094736842105264e-05, "step": 941 }, { "epoch": 2.4746877054569363, "high_lr": 0.0005047368421052631, "low_lr": 1.0094736842105264e-05, "step": 941 }, { "epoch": 2.4773175542406314, "grad_norm": 1.1776800155639648, "learning_rate": 0.0005042105263157895, "loss": 1.4181, "step": 942 }, { "epoch": 2.4773175542406314, "high_lr": 0.0005042105263157895, "low_lr": 1.008421052631579e-05, "step": 942 }, { "epoch": 2.4773175542406314, "high_lr": 0.0005042105263157895, "low_lr": 1.008421052631579e-05, "step": 942 }, { "epoch": 2.4773175542406314, "high_lr": 0.0005042105263157895, "low_lr": 1.008421052631579e-05, "step": 942 }, { "epoch": 2.4773175542406314, "high_lr": 0.0005042105263157895, "low_lr": 1.008421052631579e-05, "step": 942 }, { "epoch": 2.4773175542406314, "high_lr": 0.0005042105263157895, "low_lr": 1.008421052631579e-05, "step": 942 }, { "epoch": 2.4773175542406314, "high_lr": 0.0005042105263157895, "low_lr": 1.008421052631579e-05, "step": 942 }, { "epoch": 2.4773175542406314, "high_lr": 0.0005042105263157895, "low_lr": 1.008421052631579e-05, "step": 942 }, { "epoch": 2.4773175542406314, "high_lr": 0.0005042105263157895, "low_lr": 1.008421052631579e-05, "step": 942 }, { "epoch": 2.479947403024326, "grad_norm": 1.4032350778579712, "learning_rate": 0.0005036842105263158, "loss": 1.43, "step": 943 }, { "epoch": 2.479947403024326, "high_lr": 0.0005036842105263158, "low_lr": 1.0073684210526315e-05, "step": 943 }, { "epoch": 2.479947403024326, "high_lr": 0.0005036842105263158, "low_lr": 1.0073684210526315e-05, "step": 943 }, { "epoch": 2.479947403024326, "high_lr": 0.0005036842105263158, "low_lr": 1.0073684210526315e-05, "step": 943 }, { "epoch": 2.479947403024326, "high_lr": 0.0005036842105263158, "low_lr": 1.0073684210526315e-05, "step": 943 }, { "epoch": 2.479947403024326, "high_lr": 0.0005036842105263158, "low_lr": 1.0073684210526315e-05, "step": 943 }, { "epoch": 2.479947403024326, "high_lr": 0.0005036842105263158, "low_lr": 1.0073684210526315e-05, "step": 943 }, { "epoch": 2.479947403024326, "high_lr": 0.0005036842105263158, "low_lr": 1.0073684210526315e-05, "step": 943 }, { "epoch": 2.479947403024326, "high_lr": 0.0005036842105263158, "low_lr": 1.0073684210526315e-05, "step": 943 }, { "epoch": 2.482577251808021, "grad_norm": 1.2709810733795166, "learning_rate": 0.0005031578947368422, "loss": 1.4115, "step": 944 }, { "epoch": 2.482577251808021, "high_lr": 0.0005031578947368422, "low_lr": 1.0063157894736843e-05, "step": 944 }, { "epoch": 2.482577251808021, "high_lr": 0.0005031578947368422, "low_lr": 1.0063157894736843e-05, "step": 944 }, { "epoch": 2.482577251808021, "high_lr": 0.0005031578947368422, "low_lr": 1.0063157894736843e-05, "step": 944 }, { "epoch": 2.482577251808021, "high_lr": 0.0005031578947368422, "low_lr": 1.0063157894736843e-05, "step": 944 }, { "epoch": 2.482577251808021, "high_lr": 0.0005031578947368422, "low_lr": 1.0063157894736843e-05, "step": 944 }, { "epoch": 2.482577251808021, "high_lr": 0.0005031578947368422, "low_lr": 1.0063157894736843e-05, "step": 944 }, { "epoch": 2.482577251808021, "high_lr": 0.0005031578947368422, "low_lr": 1.0063157894736843e-05, "step": 944 }, { "epoch": 2.482577251808021, "high_lr": 0.0005031578947368422, "low_lr": 1.0063157894736843e-05, "step": 944 }, { "epoch": 2.485207100591716, "grad_norm": 1.2019845247268677, "learning_rate": 0.0005026315789473685, "loss": 1.3414, "step": 945 }, { "epoch": 2.485207100591716, "high_lr": 0.0005026315789473685, "low_lr": 1.005263157894737e-05, "step": 945 }, { "epoch": 2.485207100591716, "high_lr": 0.0005026315789473685, "low_lr": 1.005263157894737e-05, "step": 945 }, { "epoch": 2.485207100591716, "high_lr": 0.0005026315789473685, "low_lr": 1.005263157894737e-05, "step": 945 }, { "epoch": 2.485207100591716, "high_lr": 0.0005026315789473685, "low_lr": 1.005263157894737e-05, "step": 945 }, { "epoch": 2.485207100591716, "high_lr": 0.0005026315789473685, "low_lr": 1.005263157894737e-05, "step": 945 }, { "epoch": 2.485207100591716, "high_lr": 0.0005026315789473685, "low_lr": 1.005263157894737e-05, "step": 945 }, { "epoch": 2.485207100591716, "high_lr": 0.0005026315789473685, "low_lr": 1.005263157894737e-05, "step": 945 }, { "epoch": 2.485207100591716, "high_lr": 0.0005026315789473685, "low_lr": 1.005263157894737e-05, "step": 945 }, { "epoch": 2.487836949375411, "grad_norm": 1.287297010421753, "learning_rate": 0.0005021052631578948, "loss": 1.3787, "step": 946 }, { "epoch": 2.487836949375411, "high_lr": 0.0005021052631578948, "low_lr": 1.0042105263157896e-05, "step": 946 }, { "epoch": 2.487836949375411, "high_lr": 0.0005021052631578948, "low_lr": 1.0042105263157896e-05, "step": 946 }, { "epoch": 2.487836949375411, "high_lr": 0.0005021052631578948, "low_lr": 1.0042105263157896e-05, "step": 946 }, { "epoch": 2.487836949375411, "high_lr": 0.0005021052631578948, "low_lr": 1.0042105263157896e-05, "step": 946 }, { "epoch": 2.487836949375411, "high_lr": 0.0005021052631578948, "low_lr": 1.0042105263157896e-05, "step": 946 }, { "epoch": 2.487836949375411, "high_lr": 0.0005021052631578948, "low_lr": 1.0042105263157896e-05, "step": 946 }, { "epoch": 2.487836949375411, "high_lr": 0.0005021052631578948, "low_lr": 1.0042105263157896e-05, "step": 946 }, { "epoch": 2.487836949375411, "high_lr": 0.0005021052631578948, "low_lr": 1.0042105263157896e-05, "step": 946 }, { "epoch": 2.490466798159106, "grad_norm": 1.2814826965332031, "learning_rate": 0.000501578947368421, "loss": 1.4206, "step": 947 }, { "epoch": 2.490466798159106, "high_lr": 0.000501578947368421, "low_lr": 1.0031578947368422e-05, "step": 947 }, { "epoch": 2.490466798159106, "high_lr": 0.000501578947368421, "low_lr": 1.0031578947368422e-05, "step": 947 }, { "epoch": 2.490466798159106, "high_lr": 0.000501578947368421, "low_lr": 1.0031578947368422e-05, "step": 947 }, { "epoch": 2.490466798159106, "high_lr": 0.000501578947368421, "low_lr": 1.0031578947368422e-05, "step": 947 }, { "epoch": 2.490466798159106, "high_lr": 0.000501578947368421, "low_lr": 1.0031578947368422e-05, "step": 947 }, { "epoch": 2.490466798159106, "high_lr": 0.000501578947368421, "low_lr": 1.0031578947368422e-05, "step": 947 }, { "epoch": 2.490466798159106, "high_lr": 0.000501578947368421, "low_lr": 1.0031578947368422e-05, "step": 947 }, { "epoch": 2.490466798159106, "high_lr": 0.000501578947368421, "low_lr": 1.0031578947368422e-05, "step": 947 }, { "epoch": 2.4930966469428006, "grad_norm": 1.1957181692123413, "learning_rate": 0.0005010526315789474, "loss": 1.3637, "step": 948 }, { "epoch": 2.4930966469428006, "high_lr": 0.0005010526315789474, "low_lr": 1.002105263157895e-05, "step": 948 }, { "epoch": 2.4930966469428006, "high_lr": 0.0005010526315789474, "low_lr": 1.002105263157895e-05, "step": 948 }, { "epoch": 2.4930966469428006, "high_lr": 0.0005010526315789474, "low_lr": 1.002105263157895e-05, "step": 948 }, { "epoch": 2.4930966469428006, "high_lr": 0.0005010526315789474, "low_lr": 1.002105263157895e-05, "step": 948 }, { "epoch": 2.4930966469428006, "high_lr": 0.0005010526315789474, "low_lr": 1.002105263157895e-05, "step": 948 }, { "epoch": 2.4930966469428006, "high_lr": 0.0005010526315789474, "low_lr": 1.002105263157895e-05, "step": 948 }, { "epoch": 2.4930966469428006, "high_lr": 0.0005010526315789474, "low_lr": 1.002105263157895e-05, "step": 948 }, { "epoch": 2.4930966469428006, "high_lr": 0.0005010526315789474, "low_lr": 1.002105263157895e-05, "step": 948 }, { "epoch": 2.4957264957264957, "grad_norm": 1.253524899482727, "learning_rate": 0.0005005263157894737, "loss": 1.3531, "step": 949 }, { "epoch": 2.4957264957264957, "high_lr": 0.0005005263157894737, "low_lr": 1.0010526315789474e-05, "step": 949 }, { "epoch": 2.4957264957264957, "high_lr": 0.0005005263157894737, "low_lr": 1.0010526315789474e-05, "step": 949 }, { "epoch": 2.4957264957264957, "high_lr": 0.0005005263157894737, "low_lr": 1.0010526315789474e-05, "step": 949 }, { "epoch": 2.4957264957264957, "high_lr": 0.0005005263157894737, "low_lr": 1.0010526315789474e-05, "step": 949 }, { "epoch": 2.4957264957264957, "high_lr": 0.0005005263157894737, "low_lr": 1.0010526315789474e-05, "step": 949 }, { "epoch": 2.4957264957264957, "high_lr": 0.0005005263157894737, "low_lr": 1.0010526315789474e-05, "step": 949 }, { "epoch": 2.4957264957264957, "high_lr": 0.0005005263157894737, "low_lr": 1.0010526315789474e-05, "step": 949 }, { "epoch": 2.4957264957264957, "high_lr": 0.0005005263157894737, "low_lr": 1.0010526315789474e-05, "step": 949 }, { "epoch": 2.498356344510191, "grad_norm": 1.2310932874679565, "learning_rate": 0.0005, "loss": 1.3921, "step": 950 }, { "epoch": 2.498356344510191, "high_lr": 0.0005, "low_lr": 1e-05, "step": 950 }, { "epoch": 2.498356344510191, "high_lr": 0.0005, "low_lr": 1e-05, "step": 950 }, { "epoch": 2.498356344510191, "high_lr": 0.0005, "low_lr": 1e-05, "step": 950 }, { "epoch": 2.498356344510191, "high_lr": 0.0005, "low_lr": 1e-05, "step": 950 }, { "epoch": 2.498356344510191, "high_lr": 0.0005, "low_lr": 1e-05, "step": 950 }, { "epoch": 2.498356344510191, "high_lr": 0.0005, "low_lr": 1e-05, "step": 950 }, { "epoch": 2.498356344510191, "high_lr": 0.0005, "low_lr": 1e-05, "step": 950 }, { "epoch": 2.498356344510191, "high_lr": 0.0005, "low_lr": 1e-05, "step": 950 }, { "epoch": 2.5009861932938855, "grad_norm": 1.2856698036193848, "learning_rate": 0.0004994736842105263, "loss": 1.3888, "step": 951 }, { "epoch": 2.5009861932938855, "high_lr": 0.0004994736842105263, "low_lr": 9.989473684210527e-06, "step": 951 }, { "epoch": 2.5009861932938855, "high_lr": 0.0004994736842105263, "low_lr": 9.989473684210527e-06, "step": 951 }, { "epoch": 2.5009861932938855, "high_lr": 0.0004994736842105263, "low_lr": 9.989473684210527e-06, "step": 951 }, { "epoch": 2.5009861932938855, "high_lr": 0.0004994736842105263, "low_lr": 9.989473684210527e-06, "step": 951 }, { "epoch": 2.5009861932938855, "high_lr": 0.0004994736842105263, "low_lr": 9.989473684210527e-06, "step": 951 }, { "epoch": 2.5009861932938855, "high_lr": 0.0004994736842105263, "low_lr": 9.989473684210527e-06, "step": 951 }, { "epoch": 2.5009861932938855, "high_lr": 0.0004994736842105263, "low_lr": 9.989473684210527e-06, "step": 951 }, { "epoch": 2.5009861932938855, "high_lr": 0.0004994736842105263, "low_lr": 9.989473684210527e-06, "step": 951 }, { "epoch": 2.5036160420775806, "grad_norm": 1.3589057922363281, "learning_rate": 0.0004989473684210527, "loss": 1.3897, "step": 952 }, { "epoch": 2.5036160420775806, "high_lr": 0.0004989473684210527, "low_lr": 9.978947368421053e-06, "step": 952 }, { "epoch": 2.5036160420775806, "high_lr": 0.0004989473684210527, "low_lr": 9.978947368421053e-06, "step": 952 }, { "epoch": 2.5036160420775806, "high_lr": 0.0004989473684210527, "low_lr": 9.978947368421053e-06, "step": 952 }, { "epoch": 2.5036160420775806, "high_lr": 0.0004989473684210527, "low_lr": 9.978947368421053e-06, "step": 952 }, { "epoch": 2.5036160420775806, "high_lr": 0.0004989473684210527, "low_lr": 9.978947368421053e-06, "step": 952 }, { "epoch": 2.5036160420775806, "high_lr": 0.0004989473684210527, "low_lr": 9.978947368421053e-06, "step": 952 }, { "epoch": 2.5036160420775806, "high_lr": 0.0004989473684210527, "low_lr": 9.978947368421053e-06, "step": 952 }, { "epoch": 2.5036160420775806, "high_lr": 0.0004989473684210527, "low_lr": 9.978947368421053e-06, "step": 952 }, { "epoch": 2.5062458908612752, "grad_norm": 1.202459692955017, "learning_rate": 0.000498421052631579, "loss": 1.3616, "step": 953 }, { "epoch": 2.5062458908612752, "high_lr": 0.000498421052631579, "low_lr": 9.96842105263158e-06, "step": 953 }, { "epoch": 2.5062458908612752, "high_lr": 0.000498421052631579, "low_lr": 9.96842105263158e-06, "step": 953 }, { "epoch": 2.5062458908612752, "high_lr": 0.000498421052631579, "low_lr": 9.96842105263158e-06, "step": 953 }, { "epoch": 2.5062458908612752, "high_lr": 0.000498421052631579, "low_lr": 9.96842105263158e-06, "step": 953 }, { "epoch": 2.5062458908612752, "high_lr": 0.000498421052631579, "low_lr": 9.96842105263158e-06, "step": 953 }, { "epoch": 2.5062458908612752, "high_lr": 0.000498421052631579, "low_lr": 9.96842105263158e-06, "step": 953 }, { "epoch": 2.5062458908612752, "high_lr": 0.000498421052631579, "low_lr": 9.96842105263158e-06, "step": 953 }, { "epoch": 2.5062458908612752, "high_lr": 0.000498421052631579, "low_lr": 9.96842105263158e-06, "step": 953 }, { "epoch": 2.5088757396449703, "grad_norm": 1.286462426185608, "learning_rate": 0.0004978947368421053, "loss": 1.3871, "step": 954 }, { "epoch": 2.5088757396449703, "high_lr": 0.0004978947368421053, "low_lr": 9.957894736842106e-06, "step": 954 }, { "epoch": 2.5088757396449703, "high_lr": 0.0004978947368421053, "low_lr": 9.957894736842106e-06, "step": 954 }, { "epoch": 2.5088757396449703, "high_lr": 0.0004978947368421053, "low_lr": 9.957894736842106e-06, "step": 954 }, { "epoch": 2.5088757396449703, "high_lr": 0.0004978947368421053, "low_lr": 9.957894736842106e-06, "step": 954 }, { "epoch": 2.5088757396449703, "high_lr": 0.0004978947368421053, "low_lr": 9.957894736842106e-06, "step": 954 }, { "epoch": 2.5088757396449703, "high_lr": 0.0004978947368421053, "low_lr": 9.957894736842106e-06, "step": 954 }, { "epoch": 2.5088757396449703, "high_lr": 0.0004978947368421053, "low_lr": 9.957894736842106e-06, "step": 954 }, { "epoch": 2.5088757396449703, "high_lr": 0.0004978947368421053, "low_lr": 9.957894736842106e-06, "step": 954 }, { "epoch": 2.5115055884286654, "grad_norm": 1.2855150699615479, "learning_rate": 0.0004973684210526315, "loss": 1.3865, "step": 955 }, { "epoch": 2.5115055884286654, "high_lr": 0.0004973684210526315, "low_lr": 9.947368421052632e-06, "step": 955 }, { "epoch": 2.5115055884286654, "high_lr": 0.0004973684210526315, "low_lr": 9.947368421052632e-06, "step": 955 }, { "epoch": 2.5115055884286654, "high_lr": 0.0004973684210526315, "low_lr": 9.947368421052632e-06, "step": 955 }, { "epoch": 2.5115055884286654, "high_lr": 0.0004973684210526315, "low_lr": 9.947368421052632e-06, "step": 955 }, { "epoch": 2.5115055884286654, "high_lr": 0.0004973684210526315, "low_lr": 9.947368421052632e-06, "step": 955 }, { "epoch": 2.5115055884286654, "high_lr": 0.0004973684210526315, "low_lr": 9.947368421052632e-06, "step": 955 }, { "epoch": 2.5115055884286654, "high_lr": 0.0004973684210526315, "low_lr": 9.947368421052632e-06, "step": 955 }, { "epoch": 2.5115055884286654, "high_lr": 0.0004973684210526315, "low_lr": 9.947368421052632e-06, "step": 955 }, { "epoch": 2.51413543721236, "grad_norm": 1.3831669092178345, "learning_rate": 0.0004968421052631579, "loss": 1.3705, "step": 956 }, { "epoch": 2.51413543721236, "high_lr": 0.0004968421052631579, "low_lr": 9.936842105263159e-06, "step": 956 }, { "epoch": 2.51413543721236, "high_lr": 0.0004968421052631579, "low_lr": 9.936842105263159e-06, "step": 956 }, { "epoch": 2.51413543721236, "high_lr": 0.0004968421052631579, "low_lr": 9.936842105263159e-06, "step": 956 }, { "epoch": 2.51413543721236, "high_lr": 0.0004968421052631579, "low_lr": 9.936842105263159e-06, "step": 956 }, { "epoch": 2.51413543721236, "high_lr": 0.0004968421052631579, "low_lr": 9.936842105263159e-06, "step": 956 }, { "epoch": 2.51413543721236, "high_lr": 0.0004968421052631579, "low_lr": 9.936842105263159e-06, "step": 956 }, { "epoch": 2.51413543721236, "high_lr": 0.0004968421052631579, "low_lr": 9.936842105263159e-06, "step": 956 }, { "epoch": 2.51413543721236, "high_lr": 0.0004968421052631579, "low_lr": 9.936842105263159e-06, "step": 956 }, { "epoch": 2.516765285996055, "grad_norm": 1.3121248483657837, "learning_rate": 0.0004963157894736842, "loss": 1.4089, "step": 957 }, { "epoch": 2.516765285996055, "high_lr": 0.0004963157894736842, "low_lr": 9.926315789473685e-06, "step": 957 }, { "epoch": 2.516765285996055, "high_lr": 0.0004963157894736842, "low_lr": 9.926315789473685e-06, "step": 957 }, { "epoch": 2.516765285996055, "high_lr": 0.0004963157894736842, "low_lr": 9.926315789473685e-06, "step": 957 }, { "epoch": 2.516765285996055, "high_lr": 0.0004963157894736842, "low_lr": 9.926315789473685e-06, "step": 957 }, { "epoch": 2.516765285996055, "high_lr": 0.0004963157894736842, "low_lr": 9.926315789473685e-06, "step": 957 }, { "epoch": 2.516765285996055, "high_lr": 0.0004963157894736842, "low_lr": 9.926315789473685e-06, "step": 957 }, { "epoch": 2.516765285996055, "high_lr": 0.0004963157894736842, "low_lr": 9.926315789473685e-06, "step": 957 }, { "epoch": 2.516765285996055, "high_lr": 0.0004963157894736842, "low_lr": 9.926315789473685e-06, "step": 957 }, { "epoch": 2.5193951347797503, "grad_norm": 1.2612452507019043, "learning_rate": 0.0004957894736842105, "loss": 1.4095, "step": 958 }, { "epoch": 2.5193951347797503, "high_lr": 0.0004957894736842105, "low_lr": 9.915789473684211e-06, "step": 958 }, { "epoch": 2.5193951347797503, "high_lr": 0.0004957894736842105, "low_lr": 9.915789473684211e-06, "step": 958 }, { "epoch": 2.5193951347797503, "high_lr": 0.0004957894736842105, "low_lr": 9.915789473684211e-06, "step": 958 }, { "epoch": 2.5193951347797503, "high_lr": 0.0004957894736842105, "low_lr": 9.915789473684211e-06, "step": 958 }, { "epoch": 2.5193951347797503, "high_lr": 0.0004957894736842105, "low_lr": 9.915789473684211e-06, "step": 958 }, { "epoch": 2.5193951347797503, "high_lr": 0.0004957894736842105, "low_lr": 9.915789473684211e-06, "step": 958 }, { "epoch": 2.5193951347797503, "high_lr": 0.0004957894736842105, "low_lr": 9.915789473684211e-06, "step": 958 }, { "epoch": 2.5193951347797503, "high_lr": 0.0004957894736842105, "low_lr": 9.915789473684211e-06, "step": 958 }, { "epoch": 2.522024983563445, "grad_norm": 1.2511200904846191, "learning_rate": 0.0004952631578947369, "loss": 1.4177, "step": 959 }, { "epoch": 2.522024983563445, "high_lr": 0.0004952631578947369, "low_lr": 9.905263157894738e-06, "step": 959 }, { "epoch": 2.522024983563445, "high_lr": 0.0004952631578947369, "low_lr": 9.905263157894738e-06, "step": 959 }, { "epoch": 2.522024983563445, "high_lr": 0.0004952631578947369, "low_lr": 9.905263157894738e-06, "step": 959 }, { "epoch": 2.522024983563445, "high_lr": 0.0004952631578947369, "low_lr": 9.905263157894738e-06, "step": 959 }, { "epoch": 2.522024983563445, "high_lr": 0.0004952631578947369, "low_lr": 9.905263157894738e-06, "step": 959 }, { "epoch": 2.522024983563445, "high_lr": 0.0004952631578947369, "low_lr": 9.905263157894738e-06, "step": 959 }, { "epoch": 2.522024983563445, "high_lr": 0.0004952631578947369, "low_lr": 9.905263157894738e-06, "step": 959 }, { "epoch": 2.522024983563445, "high_lr": 0.0004952631578947369, "low_lr": 9.905263157894738e-06, "step": 959 }, { "epoch": 2.52465483234714, "grad_norm": 1.2834885120391846, "learning_rate": 0.0004947368421052632, "loss": 1.3563, "step": 960 }, { "epoch": 2.52465483234714, "high_lr": 0.0004947368421052632, "low_lr": 9.894736842105264e-06, "step": 960 }, { "epoch": 2.52465483234714, "high_lr": 0.0004947368421052632, "low_lr": 9.894736842105264e-06, "step": 960 }, { "epoch": 2.52465483234714, "high_lr": 0.0004947368421052632, "low_lr": 9.894736842105264e-06, "step": 960 }, { "epoch": 2.52465483234714, "high_lr": 0.0004947368421052632, "low_lr": 9.894736842105264e-06, "step": 960 }, { "epoch": 2.52465483234714, "high_lr": 0.0004947368421052632, "low_lr": 9.894736842105264e-06, "step": 960 }, { "epoch": 2.52465483234714, "high_lr": 0.0004947368421052632, "low_lr": 9.894736842105264e-06, "step": 960 }, { "epoch": 2.52465483234714, "high_lr": 0.0004947368421052632, "low_lr": 9.894736842105264e-06, "step": 960 }, { "epoch": 2.52465483234714, "high_lr": 0.0004947368421052632, "low_lr": 9.894736842105264e-06, "step": 960 }, { "epoch": 2.527284681130835, "grad_norm": 1.2725642919540405, "learning_rate": 0.0004942105263157895, "loss": 1.3973, "step": 961 }, { "epoch": 2.527284681130835, "high_lr": 0.0004942105263157895, "low_lr": 9.88421052631579e-06, "step": 961 }, { "epoch": 2.527284681130835, "high_lr": 0.0004942105263157895, "low_lr": 9.88421052631579e-06, "step": 961 }, { "epoch": 2.527284681130835, "high_lr": 0.0004942105263157895, "low_lr": 9.88421052631579e-06, "step": 961 }, { "epoch": 2.527284681130835, "high_lr": 0.0004942105263157895, "low_lr": 9.88421052631579e-06, "step": 961 }, { "epoch": 2.527284681130835, "high_lr": 0.0004942105263157895, "low_lr": 9.88421052631579e-06, "step": 961 }, { "epoch": 2.527284681130835, "high_lr": 0.0004942105263157895, "low_lr": 9.88421052631579e-06, "step": 961 }, { "epoch": 2.527284681130835, "high_lr": 0.0004942105263157895, "low_lr": 9.88421052631579e-06, "step": 961 }, { "epoch": 2.527284681130835, "high_lr": 0.0004942105263157895, "low_lr": 9.88421052631579e-06, "step": 961 }, { "epoch": 2.52991452991453, "grad_norm": 1.165604591369629, "learning_rate": 0.0004936842105263158, "loss": 1.367, "step": 962 }, { "epoch": 2.52991452991453, "high_lr": 0.0004936842105263158, "low_lr": 9.873684210526317e-06, "step": 962 }, { "epoch": 2.52991452991453, "high_lr": 0.0004936842105263158, "low_lr": 9.873684210526317e-06, "step": 962 }, { "epoch": 2.52991452991453, "high_lr": 0.0004936842105263158, "low_lr": 9.873684210526317e-06, "step": 962 }, { "epoch": 2.52991452991453, "high_lr": 0.0004936842105263158, "low_lr": 9.873684210526317e-06, "step": 962 }, { "epoch": 2.52991452991453, "high_lr": 0.0004936842105263158, "low_lr": 9.873684210526317e-06, "step": 962 }, { "epoch": 2.52991452991453, "high_lr": 0.0004936842105263158, "low_lr": 9.873684210526317e-06, "step": 962 }, { "epoch": 2.52991452991453, "high_lr": 0.0004936842105263158, "low_lr": 9.873684210526317e-06, "step": 962 }, { "epoch": 2.52991452991453, "high_lr": 0.0004936842105263158, "low_lr": 9.873684210526317e-06, "step": 962 }, { "epoch": 2.532544378698225, "grad_norm": 1.3213335275650024, "learning_rate": 0.0004931578947368422, "loss": 1.4425, "step": 963 }, { "epoch": 2.532544378698225, "high_lr": 0.0004931578947368422, "low_lr": 9.863157894736843e-06, "step": 963 }, { "epoch": 2.532544378698225, "high_lr": 0.0004931578947368422, "low_lr": 9.863157894736843e-06, "step": 963 }, { "epoch": 2.532544378698225, "high_lr": 0.0004931578947368422, "low_lr": 9.863157894736843e-06, "step": 963 }, { "epoch": 2.532544378698225, "high_lr": 0.0004931578947368422, "low_lr": 9.863157894736843e-06, "step": 963 }, { "epoch": 2.532544378698225, "high_lr": 0.0004931578947368422, "low_lr": 9.863157894736843e-06, "step": 963 }, { "epoch": 2.532544378698225, "high_lr": 0.0004931578947368422, "low_lr": 9.863157894736843e-06, "step": 963 }, { "epoch": 2.532544378698225, "high_lr": 0.0004931578947368422, "low_lr": 9.863157894736843e-06, "step": 963 }, { "epoch": 2.532544378698225, "high_lr": 0.0004931578947368422, "low_lr": 9.863157894736843e-06, "step": 963 }, { "epoch": 2.53517422748192, "grad_norm": 1.2542871236801147, "learning_rate": 0.0004926315789473684, "loss": 1.4138, "step": 964 }, { "epoch": 2.53517422748192, "high_lr": 0.0004926315789473684, "low_lr": 9.85263157894737e-06, "step": 964 }, { "epoch": 2.53517422748192, "high_lr": 0.0004926315789473684, "low_lr": 9.85263157894737e-06, "step": 964 }, { "epoch": 2.53517422748192, "high_lr": 0.0004926315789473684, "low_lr": 9.85263157894737e-06, "step": 964 }, { "epoch": 2.53517422748192, "high_lr": 0.0004926315789473684, "low_lr": 9.85263157894737e-06, "step": 964 }, { "epoch": 2.53517422748192, "high_lr": 0.0004926315789473684, "low_lr": 9.85263157894737e-06, "step": 964 }, { "epoch": 2.53517422748192, "high_lr": 0.0004926315789473684, "low_lr": 9.85263157894737e-06, "step": 964 }, { "epoch": 2.53517422748192, "high_lr": 0.0004926315789473684, "low_lr": 9.85263157894737e-06, "step": 964 }, { "epoch": 2.53517422748192, "high_lr": 0.0004926315789473684, "low_lr": 9.85263157894737e-06, "step": 964 }, { "epoch": 2.5378040762656147, "grad_norm": 1.2876955270767212, "learning_rate": 0.0004921052631578947, "loss": 1.415, "step": 965 }, { "epoch": 2.5378040762656147, "high_lr": 0.0004921052631578947, "low_lr": 9.842105263157896e-06, "step": 965 }, { "epoch": 2.5378040762656147, "high_lr": 0.0004921052631578947, "low_lr": 9.842105263157896e-06, "step": 965 }, { "epoch": 2.5378040762656147, "high_lr": 0.0004921052631578947, "low_lr": 9.842105263157896e-06, "step": 965 }, { "epoch": 2.5378040762656147, "high_lr": 0.0004921052631578947, "low_lr": 9.842105263157896e-06, "step": 965 }, { "epoch": 2.5378040762656147, "high_lr": 0.0004921052631578947, "low_lr": 9.842105263157896e-06, "step": 965 }, { "epoch": 2.5378040762656147, "high_lr": 0.0004921052631578947, "low_lr": 9.842105263157896e-06, "step": 965 }, { "epoch": 2.5378040762656147, "high_lr": 0.0004921052631578947, "low_lr": 9.842105263157896e-06, "step": 965 }, { "epoch": 2.5378040762656147, "high_lr": 0.0004921052631578947, "low_lr": 9.842105263157896e-06, "step": 965 }, { "epoch": 2.5404339250493098, "grad_norm": 1.2044575214385986, "learning_rate": 0.000491578947368421, "loss": 1.3757, "step": 966 }, { "epoch": 2.5404339250493098, "high_lr": 0.000491578947368421, "low_lr": 9.831578947368422e-06, "step": 966 }, { "epoch": 2.5404339250493098, "high_lr": 0.000491578947368421, "low_lr": 9.831578947368422e-06, "step": 966 }, { "epoch": 2.5404339250493098, "high_lr": 0.000491578947368421, "low_lr": 9.831578947368422e-06, "step": 966 }, { "epoch": 2.5404339250493098, "high_lr": 0.000491578947368421, "low_lr": 9.831578947368422e-06, "step": 966 }, { "epoch": 2.5404339250493098, "high_lr": 0.000491578947368421, "low_lr": 9.831578947368422e-06, "step": 966 }, { "epoch": 2.5404339250493098, "high_lr": 0.000491578947368421, "low_lr": 9.831578947368422e-06, "step": 966 }, { "epoch": 2.5404339250493098, "high_lr": 0.000491578947368421, "low_lr": 9.831578947368422e-06, "step": 966 }, { "epoch": 2.5404339250493098, "high_lr": 0.000491578947368421, "low_lr": 9.831578947368422e-06, "step": 966 }, { "epoch": 2.543063773833005, "grad_norm": 2.4558346271514893, "learning_rate": 0.0004910526315789474, "loss": 1.4181, "step": 967 }, { "epoch": 2.543063773833005, "high_lr": 0.0004910526315789474, "low_lr": 9.821052631578948e-06, "step": 967 }, { "epoch": 2.543063773833005, "high_lr": 0.0004910526315789474, "low_lr": 9.821052631578948e-06, "step": 967 }, { "epoch": 2.543063773833005, "high_lr": 0.0004910526315789474, "low_lr": 9.821052631578948e-06, "step": 967 }, { "epoch": 2.543063773833005, "high_lr": 0.0004910526315789474, "low_lr": 9.821052631578948e-06, "step": 967 }, { "epoch": 2.543063773833005, "high_lr": 0.0004910526315789474, "low_lr": 9.821052631578948e-06, "step": 967 }, { "epoch": 2.543063773833005, "high_lr": 0.0004910526315789474, "low_lr": 9.821052631578948e-06, "step": 967 }, { "epoch": 2.543063773833005, "high_lr": 0.0004910526315789474, "low_lr": 9.821052631578948e-06, "step": 967 }, { "epoch": 2.543063773833005, "high_lr": 0.0004910526315789474, "low_lr": 9.821052631578948e-06, "step": 967 }, { "epoch": 2.5456936226166995, "grad_norm": 1.3899211883544922, "learning_rate": 0.0004905263157894737, "loss": 1.4303, "step": 968 }, { "epoch": 2.5456936226166995, "high_lr": 0.0004905263157894737, "low_lr": 9.810526315789475e-06, "step": 968 }, { "epoch": 2.5456936226166995, "high_lr": 0.0004905263157894737, "low_lr": 9.810526315789475e-06, "step": 968 }, { "epoch": 2.5456936226166995, "high_lr": 0.0004905263157894737, "low_lr": 9.810526315789475e-06, "step": 968 }, { "epoch": 2.5456936226166995, "high_lr": 0.0004905263157894737, "low_lr": 9.810526315789475e-06, "step": 968 }, { "epoch": 2.5456936226166995, "high_lr": 0.0004905263157894737, "low_lr": 9.810526315789475e-06, "step": 968 }, { "epoch": 2.5456936226166995, "high_lr": 0.0004905263157894737, "low_lr": 9.810526315789475e-06, "step": 968 }, { "epoch": 2.5456936226166995, "high_lr": 0.0004905263157894737, "low_lr": 9.810526315789475e-06, "step": 968 }, { "epoch": 2.5456936226166995, "high_lr": 0.0004905263157894737, "low_lr": 9.810526315789475e-06, "step": 968 }, { "epoch": 2.5483234714003946, "grad_norm": 1.3217566013336182, "learning_rate": 0.00049, "loss": 1.3862, "step": 969 }, { "epoch": 2.5483234714003946, "high_lr": 0.00049, "low_lr": 9.800000000000001e-06, "step": 969 }, { "epoch": 2.5483234714003946, "high_lr": 0.00049, "low_lr": 9.800000000000001e-06, "step": 969 }, { "epoch": 2.5483234714003946, "high_lr": 0.00049, "low_lr": 9.800000000000001e-06, "step": 969 }, { "epoch": 2.5483234714003946, "high_lr": 0.00049, "low_lr": 9.800000000000001e-06, "step": 969 }, { "epoch": 2.5483234714003946, "high_lr": 0.00049, "low_lr": 9.800000000000001e-06, "step": 969 }, { "epoch": 2.5483234714003946, "high_lr": 0.00049, "low_lr": 9.800000000000001e-06, "step": 969 }, { "epoch": 2.5483234714003946, "high_lr": 0.00049, "low_lr": 9.800000000000001e-06, "step": 969 }, { "epoch": 2.5483234714003946, "high_lr": 0.00049, "low_lr": 9.800000000000001e-06, "step": 969 }, { "epoch": 2.5509533201840893, "grad_norm": 1.2447056770324707, "learning_rate": 0.0004894736842105264, "loss": 1.3551, "step": 970 }, { "epoch": 2.5509533201840893, "high_lr": 0.0004894736842105264, "low_lr": 9.789473684210527e-06, "step": 970 }, { "epoch": 2.5509533201840893, "high_lr": 0.0004894736842105264, "low_lr": 9.789473684210527e-06, "step": 970 }, { "epoch": 2.5509533201840893, "high_lr": 0.0004894736842105264, "low_lr": 9.789473684210527e-06, "step": 970 }, { "epoch": 2.5509533201840893, "high_lr": 0.0004894736842105264, "low_lr": 9.789473684210527e-06, "step": 970 }, { "epoch": 2.5509533201840893, "high_lr": 0.0004894736842105264, "low_lr": 9.789473684210527e-06, "step": 970 }, { "epoch": 2.5509533201840893, "high_lr": 0.0004894736842105264, "low_lr": 9.789473684210527e-06, "step": 970 }, { "epoch": 2.5509533201840893, "high_lr": 0.0004894736842105264, "low_lr": 9.789473684210527e-06, "step": 970 }, { "epoch": 2.5509533201840893, "high_lr": 0.0004894736842105264, "low_lr": 9.789473684210527e-06, "step": 970 }, { "epoch": 2.5535831689677844, "grad_norm": 1.3785122632980347, "learning_rate": 0.0004889473684210527, "loss": 1.4053, "step": 971 }, { "epoch": 2.5535831689677844, "high_lr": 0.0004889473684210527, "low_lr": 9.778947368421054e-06, "step": 971 }, { "epoch": 2.5535831689677844, "high_lr": 0.0004889473684210527, "low_lr": 9.778947368421054e-06, "step": 971 }, { "epoch": 2.5535831689677844, "high_lr": 0.0004889473684210527, "low_lr": 9.778947368421054e-06, "step": 971 }, { "epoch": 2.5535831689677844, "high_lr": 0.0004889473684210527, "low_lr": 9.778947368421054e-06, "step": 971 }, { "epoch": 2.5535831689677844, "high_lr": 0.0004889473684210527, "low_lr": 9.778947368421054e-06, "step": 971 }, { "epoch": 2.5535831689677844, "high_lr": 0.0004889473684210527, "low_lr": 9.778947368421054e-06, "step": 971 }, { "epoch": 2.5535831689677844, "high_lr": 0.0004889473684210527, "low_lr": 9.778947368421054e-06, "step": 971 }, { "epoch": 2.5535831689677844, "high_lr": 0.0004889473684210527, "low_lr": 9.778947368421054e-06, "step": 971 }, { "epoch": 2.556213017751479, "grad_norm": 1.2596253156661987, "learning_rate": 0.000488421052631579, "loss": 1.4237, "step": 972 }, { "epoch": 2.556213017751479, "high_lr": 0.000488421052631579, "low_lr": 9.76842105263158e-06, "step": 972 }, { "epoch": 2.556213017751479, "high_lr": 0.000488421052631579, "low_lr": 9.76842105263158e-06, "step": 972 }, { "epoch": 2.556213017751479, "high_lr": 0.000488421052631579, "low_lr": 9.76842105263158e-06, "step": 972 }, { "epoch": 2.556213017751479, "high_lr": 0.000488421052631579, "low_lr": 9.76842105263158e-06, "step": 972 }, { "epoch": 2.556213017751479, "high_lr": 0.000488421052631579, "low_lr": 9.76842105263158e-06, "step": 972 }, { "epoch": 2.556213017751479, "high_lr": 0.000488421052631579, "low_lr": 9.76842105263158e-06, "step": 972 }, { "epoch": 2.556213017751479, "high_lr": 0.000488421052631579, "low_lr": 9.76842105263158e-06, "step": 972 }, { "epoch": 2.556213017751479, "high_lr": 0.000488421052631579, "low_lr": 9.76842105263158e-06, "step": 972 }, { "epoch": 2.558842866535174, "grad_norm": 1.2338931560516357, "learning_rate": 0.0004878947368421053, "loss": 1.3836, "step": 973 }, { "epoch": 2.558842866535174, "high_lr": 0.0004878947368421053, "low_lr": 9.757894736842106e-06, "step": 973 }, { "epoch": 2.558842866535174, "high_lr": 0.0004878947368421053, "low_lr": 9.757894736842106e-06, "step": 973 }, { "epoch": 2.558842866535174, "high_lr": 0.0004878947368421053, "low_lr": 9.757894736842106e-06, "step": 973 }, { "epoch": 2.558842866535174, "high_lr": 0.0004878947368421053, "low_lr": 9.757894736842106e-06, "step": 973 }, { "epoch": 2.558842866535174, "high_lr": 0.0004878947368421053, "low_lr": 9.757894736842106e-06, "step": 973 }, { "epoch": 2.558842866535174, "high_lr": 0.0004878947368421053, "low_lr": 9.757894736842106e-06, "step": 973 }, { "epoch": 2.558842866535174, "high_lr": 0.0004878947368421053, "low_lr": 9.757894736842106e-06, "step": 973 }, { "epoch": 2.558842866535174, "high_lr": 0.0004878947368421053, "low_lr": 9.757894736842106e-06, "step": 973 }, { "epoch": 2.561472715318869, "grad_norm": 1.2043802738189697, "learning_rate": 0.0004873684210526316, "loss": 1.3826, "step": 974 }, { "epoch": 2.561472715318869, "high_lr": 0.0004873684210526316, "low_lr": 9.747368421052633e-06, "step": 974 }, { "epoch": 2.561472715318869, "high_lr": 0.0004873684210526316, "low_lr": 9.747368421052633e-06, "step": 974 }, { "epoch": 2.561472715318869, "high_lr": 0.0004873684210526316, "low_lr": 9.747368421052633e-06, "step": 974 }, { "epoch": 2.561472715318869, "high_lr": 0.0004873684210526316, "low_lr": 9.747368421052633e-06, "step": 974 }, { "epoch": 2.561472715318869, "high_lr": 0.0004873684210526316, "low_lr": 9.747368421052633e-06, "step": 974 }, { "epoch": 2.561472715318869, "high_lr": 0.0004873684210526316, "low_lr": 9.747368421052633e-06, "step": 974 }, { "epoch": 2.561472715318869, "high_lr": 0.0004873684210526316, "low_lr": 9.747368421052633e-06, "step": 974 }, { "epoch": 2.561472715318869, "high_lr": 0.0004873684210526316, "low_lr": 9.747368421052633e-06, "step": 974 }, { "epoch": 2.564102564102564, "grad_norm": 1.2998297214508057, "learning_rate": 0.0004868421052631579, "loss": 1.3963, "step": 975 }, { "epoch": 2.564102564102564, "high_lr": 0.0004868421052631579, "low_lr": 9.736842105263159e-06, "step": 975 }, { "epoch": 2.564102564102564, "high_lr": 0.0004868421052631579, "low_lr": 9.736842105263159e-06, "step": 975 }, { "epoch": 2.564102564102564, "high_lr": 0.0004868421052631579, "low_lr": 9.736842105263159e-06, "step": 975 }, { "epoch": 2.564102564102564, "high_lr": 0.0004868421052631579, "low_lr": 9.736842105263159e-06, "step": 975 }, { "epoch": 2.564102564102564, "high_lr": 0.0004868421052631579, "low_lr": 9.736842105263159e-06, "step": 975 }, { "epoch": 2.564102564102564, "high_lr": 0.0004868421052631579, "low_lr": 9.736842105263159e-06, "step": 975 }, { "epoch": 2.564102564102564, "high_lr": 0.0004868421052631579, "low_lr": 9.736842105263159e-06, "step": 975 }, { "epoch": 2.564102564102564, "high_lr": 0.0004868421052631579, "low_lr": 9.736842105263159e-06, "step": 975 }, { "epoch": 2.566732412886259, "grad_norm": 1.3420791625976562, "learning_rate": 0.0004863157894736842, "loss": 1.4045, "step": 976 }, { "epoch": 2.566732412886259, "high_lr": 0.0004863157894736842, "low_lr": 9.726315789473685e-06, "step": 976 }, { "epoch": 2.566732412886259, "high_lr": 0.0004863157894736842, "low_lr": 9.726315789473685e-06, "step": 976 }, { "epoch": 2.566732412886259, "high_lr": 0.0004863157894736842, "low_lr": 9.726315789473685e-06, "step": 976 }, { "epoch": 2.566732412886259, "high_lr": 0.0004863157894736842, "low_lr": 9.726315789473685e-06, "step": 976 }, { "epoch": 2.566732412886259, "high_lr": 0.0004863157894736842, "low_lr": 9.726315789473685e-06, "step": 976 }, { "epoch": 2.566732412886259, "high_lr": 0.0004863157894736842, "low_lr": 9.726315789473685e-06, "step": 976 }, { "epoch": 2.566732412886259, "high_lr": 0.0004863157894736842, "low_lr": 9.726315789473685e-06, "step": 976 }, { "epoch": 2.566732412886259, "high_lr": 0.0004863157894736842, "low_lr": 9.726315789473685e-06, "step": 976 }, { "epoch": 2.569362261669954, "grad_norm": 1.2570667266845703, "learning_rate": 0.00048578947368421054, "loss": 1.3937, "step": 977 }, { "epoch": 2.569362261669954, "high_lr": 0.00048578947368421054, "low_lr": 9.715789473684212e-06, "step": 977 }, { "epoch": 2.569362261669954, "high_lr": 0.00048578947368421054, "low_lr": 9.715789473684212e-06, "step": 977 }, { "epoch": 2.569362261669954, "high_lr": 0.00048578947368421054, "low_lr": 9.715789473684212e-06, "step": 977 }, { "epoch": 2.569362261669954, "high_lr": 0.00048578947368421054, "low_lr": 9.715789473684212e-06, "step": 977 }, { "epoch": 2.569362261669954, "high_lr": 0.00048578947368421054, "low_lr": 9.715789473684212e-06, "step": 977 }, { "epoch": 2.569362261669954, "high_lr": 0.00048578947368421054, "low_lr": 9.715789473684212e-06, "step": 977 }, { "epoch": 2.569362261669954, "high_lr": 0.00048578947368421054, "low_lr": 9.715789473684212e-06, "step": 977 }, { "epoch": 2.569362261669954, "high_lr": 0.00048578947368421054, "low_lr": 9.715789473684212e-06, "step": 977 }, { "epoch": 2.5719921104536487, "grad_norm": 1.1883705854415894, "learning_rate": 0.00048526315789473683, "loss": 1.3889, "step": 978 }, { "epoch": 2.5719921104536487, "high_lr": 0.00048526315789473683, "low_lr": 9.705263157894738e-06, "step": 978 }, { "epoch": 2.5719921104536487, "high_lr": 0.00048526315789473683, "low_lr": 9.705263157894738e-06, "step": 978 }, { "epoch": 2.5719921104536487, "high_lr": 0.00048526315789473683, "low_lr": 9.705263157894738e-06, "step": 978 }, { "epoch": 2.5719921104536487, "high_lr": 0.00048526315789473683, "low_lr": 9.705263157894738e-06, "step": 978 }, { "epoch": 2.5719921104536487, "high_lr": 0.00048526315789473683, "low_lr": 9.705263157894738e-06, "step": 978 }, { "epoch": 2.5719921104536487, "high_lr": 0.00048526315789473683, "low_lr": 9.705263157894738e-06, "step": 978 }, { "epoch": 2.5719921104536487, "high_lr": 0.00048526315789473683, "low_lr": 9.705263157894738e-06, "step": 978 }, { "epoch": 2.5719921104536487, "high_lr": 0.00048526315789473683, "low_lr": 9.705263157894738e-06, "step": 978 }, { "epoch": 2.574621959237344, "grad_norm": 1.3627675771713257, "learning_rate": 0.00048473684210526317, "loss": 1.3776, "step": 979 }, { "epoch": 2.574621959237344, "high_lr": 0.00048473684210526317, "low_lr": 9.694736842105263e-06, "step": 979 }, { "epoch": 2.574621959237344, "high_lr": 0.00048473684210526317, "low_lr": 9.694736842105263e-06, "step": 979 }, { "epoch": 2.574621959237344, "high_lr": 0.00048473684210526317, "low_lr": 9.694736842105263e-06, "step": 979 }, { "epoch": 2.574621959237344, "high_lr": 0.00048473684210526317, "low_lr": 9.694736842105263e-06, "step": 979 }, { "epoch": 2.574621959237344, "high_lr": 0.00048473684210526317, "low_lr": 9.694736842105263e-06, "step": 979 }, { "epoch": 2.574621959237344, "high_lr": 0.00048473684210526317, "low_lr": 9.694736842105263e-06, "step": 979 }, { "epoch": 2.574621959237344, "high_lr": 0.00048473684210526317, "low_lr": 9.694736842105263e-06, "step": 979 }, { "epoch": 2.574621959237344, "high_lr": 0.00048473684210526317, "low_lr": 9.694736842105263e-06, "step": 979 }, { "epoch": 2.577251808021039, "grad_norm": 1.2854207754135132, "learning_rate": 0.0004842105263157895, "loss": 1.4233, "step": 980 }, { "epoch": 2.577251808021039, "high_lr": 0.0004842105263157895, "low_lr": 9.68421052631579e-06, "step": 980 }, { "epoch": 2.577251808021039, "high_lr": 0.0004842105263157895, "low_lr": 9.68421052631579e-06, "step": 980 }, { "epoch": 2.577251808021039, "high_lr": 0.0004842105263157895, "low_lr": 9.68421052631579e-06, "step": 980 }, { "epoch": 2.577251808021039, "high_lr": 0.0004842105263157895, "low_lr": 9.68421052631579e-06, "step": 980 }, { "epoch": 2.577251808021039, "high_lr": 0.0004842105263157895, "low_lr": 9.68421052631579e-06, "step": 980 }, { "epoch": 2.577251808021039, "high_lr": 0.0004842105263157895, "low_lr": 9.68421052631579e-06, "step": 980 }, { "epoch": 2.577251808021039, "high_lr": 0.0004842105263157895, "low_lr": 9.68421052631579e-06, "step": 980 }, { "epoch": 2.577251808021039, "high_lr": 0.0004842105263157895, "low_lr": 9.68421052631579e-06, "step": 980 }, { "epoch": 2.5798816568047336, "grad_norm": 1.1889302730560303, "learning_rate": 0.0004836842105263158, "loss": 1.3547, "step": 981 }, { "epoch": 2.5798816568047336, "high_lr": 0.0004836842105263158, "low_lr": 9.673684210526317e-06, "step": 981 }, { "epoch": 2.5798816568047336, "high_lr": 0.0004836842105263158, "low_lr": 9.673684210526317e-06, "step": 981 }, { "epoch": 2.5798816568047336, "high_lr": 0.0004836842105263158, "low_lr": 9.673684210526317e-06, "step": 981 }, { "epoch": 2.5798816568047336, "high_lr": 0.0004836842105263158, "low_lr": 9.673684210526317e-06, "step": 981 }, { "epoch": 2.5798816568047336, "high_lr": 0.0004836842105263158, "low_lr": 9.673684210526317e-06, "step": 981 }, { "epoch": 2.5798816568047336, "high_lr": 0.0004836842105263158, "low_lr": 9.673684210526317e-06, "step": 981 }, { "epoch": 2.5798816568047336, "high_lr": 0.0004836842105263158, "low_lr": 9.673684210526317e-06, "step": 981 }, { "epoch": 2.5798816568047336, "high_lr": 0.0004836842105263158, "low_lr": 9.673684210526317e-06, "step": 981 }, { "epoch": 2.5825115055884287, "grad_norm": 1.2391724586486816, "learning_rate": 0.00048315789473684213, "loss": 1.3968, "step": 982 }, { "epoch": 2.5825115055884287, "high_lr": 0.00048315789473684213, "low_lr": 9.663157894736843e-06, "step": 982 }, { "epoch": 2.5825115055884287, "high_lr": 0.00048315789473684213, "low_lr": 9.663157894736843e-06, "step": 982 }, { "epoch": 2.5825115055884287, "high_lr": 0.00048315789473684213, "low_lr": 9.663157894736843e-06, "step": 982 }, { "epoch": 2.5825115055884287, "high_lr": 0.00048315789473684213, "low_lr": 9.663157894736843e-06, "step": 982 }, { "epoch": 2.5825115055884287, "high_lr": 0.00048315789473684213, "low_lr": 9.663157894736843e-06, "step": 982 }, { "epoch": 2.5825115055884287, "high_lr": 0.00048315789473684213, "low_lr": 9.663157894736843e-06, "step": 982 }, { "epoch": 2.5825115055884287, "high_lr": 0.00048315789473684213, "low_lr": 9.663157894736843e-06, "step": 982 }, { "epoch": 2.5825115055884287, "high_lr": 0.00048315789473684213, "low_lr": 9.663157894736843e-06, "step": 982 }, { "epoch": 2.585141354372124, "grad_norm": 1.2237834930419922, "learning_rate": 0.0004826315789473684, "loss": 1.3391, "step": 983 }, { "epoch": 2.585141354372124, "high_lr": 0.0004826315789473684, "low_lr": 9.65263157894737e-06, "step": 983 }, { "epoch": 2.585141354372124, "high_lr": 0.0004826315789473684, "low_lr": 9.65263157894737e-06, "step": 983 }, { "epoch": 2.585141354372124, "high_lr": 0.0004826315789473684, "low_lr": 9.65263157894737e-06, "step": 983 }, { "epoch": 2.585141354372124, "high_lr": 0.0004826315789473684, "low_lr": 9.65263157894737e-06, "step": 983 }, { "epoch": 2.585141354372124, "high_lr": 0.0004826315789473684, "low_lr": 9.65263157894737e-06, "step": 983 }, { "epoch": 2.585141354372124, "high_lr": 0.0004826315789473684, "low_lr": 9.65263157894737e-06, "step": 983 }, { "epoch": 2.585141354372124, "high_lr": 0.0004826315789473684, "low_lr": 9.65263157894737e-06, "step": 983 }, { "epoch": 2.585141354372124, "high_lr": 0.0004826315789473684, "low_lr": 9.65263157894737e-06, "step": 983 }, { "epoch": 2.5877712031558184, "grad_norm": 1.2656444311141968, "learning_rate": 0.00048210526315789476, "loss": 1.3969, "step": 984 }, { "epoch": 2.5877712031558184, "high_lr": 0.00048210526315789476, "low_lr": 9.642105263157896e-06, "step": 984 }, { "epoch": 2.5877712031558184, "high_lr": 0.00048210526315789476, "low_lr": 9.642105263157896e-06, "step": 984 }, { "epoch": 2.5877712031558184, "high_lr": 0.00048210526315789476, "low_lr": 9.642105263157896e-06, "step": 984 }, { "epoch": 2.5877712031558184, "high_lr": 0.00048210526315789476, "low_lr": 9.642105263157896e-06, "step": 984 }, { "epoch": 2.5877712031558184, "high_lr": 0.00048210526315789476, "low_lr": 9.642105263157896e-06, "step": 984 }, { "epoch": 2.5877712031558184, "high_lr": 0.00048210526315789476, "low_lr": 9.642105263157896e-06, "step": 984 }, { "epoch": 2.5877712031558184, "high_lr": 0.00048210526315789476, "low_lr": 9.642105263157896e-06, "step": 984 }, { "epoch": 2.5877712031558184, "high_lr": 0.00048210526315789476, "low_lr": 9.642105263157896e-06, "step": 984 }, { "epoch": 2.5904010519395135, "grad_norm": 1.304051399230957, "learning_rate": 0.00048157894736842105, "loss": 1.3388, "step": 985 }, { "epoch": 2.5904010519395135, "high_lr": 0.00048157894736842105, "low_lr": 9.631578947368422e-06, "step": 985 }, { "epoch": 2.5904010519395135, "high_lr": 0.00048157894736842105, "low_lr": 9.631578947368422e-06, "step": 985 }, { "epoch": 2.5904010519395135, "high_lr": 0.00048157894736842105, "low_lr": 9.631578947368422e-06, "step": 985 }, { "epoch": 2.5904010519395135, "high_lr": 0.00048157894736842105, "low_lr": 9.631578947368422e-06, "step": 985 }, { "epoch": 2.5904010519395135, "high_lr": 0.00048157894736842105, "low_lr": 9.631578947368422e-06, "step": 985 }, { "epoch": 2.5904010519395135, "high_lr": 0.00048157894736842105, "low_lr": 9.631578947368422e-06, "step": 985 }, { "epoch": 2.5904010519395135, "high_lr": 0.00048157894736842105, "low_lr": 9.631578947368422e-06, "step": 985 }, { "epoch": 2.5904010519395135, "high_lr": 0.00048157894736842105, "low_lr": 9.631578947368422e-06, "step": 985 }, { "epoch": 2.5930309007232086, "grad_norm": 1.3260159492492676, "learning_rate": 0.00048105263157894733, "loss": 1.3607, "step": 986 }, { "epoch": 2.5930309007232086, "high_lr": 0.00048105263157894733, "low_lr": 9.621052631578947e-06, "step": 986 }, { "epoch": 2.5930309007232086, "high_lr": 0.00048105263157894733, "low_lr": 9.621052631578947e-06, "step": 986 }, { "epoch": 2.5930309007232086, "high_lr": 0.00048105263157894733, "low_lr": 9.621052631578947e-06, "step": 986 }, { "epoch": 2.5930309007232086, "high_lr": 0.00048105263157894733, "low_lr": 9.621052631578947e-06, "step": 986 }, { "epoch": 2.5930309007232086, "high_lr": 0.00048105263157894733, "low_lr": 9.621052631578947e-06, "step": 986 }, { "epoch": 2.5930309007232086, "high_lr": 0.00048105263157894733, "low_lr": 9.621052631578947e-06, "step": 986 }, { "epoch": 2.5930309007232086, "high_lr": 0.00048105263157894733, "low_lr": 9.621052631578947e-06, "step": 986 }, { "epoch": 2.5930309007232086, "high_lr": 0.00048105263157894733, "low_lr": 9.621052631578947e-06, "step": 986 }, { "epoch": 2.5956607495069033, "grad_norm": 1.2157127857208252, "learning_rate": 0.0004805263157894737, "loss": 1.3737, "step": 987 }, { "epoch": 2.5956607495069033, "high_lr": 0.0004805263157894737, "low_lr": 9.610526315789475e-06, "step": 987 }, { "epoch": 2.5956607495069033, "high_lr": 0.0004805263157894737, "low_lr": 9.610526315789475e-06, "step": 987 }, { "epoch": 2.5956607495069033, "high_lr": 0.0004805263157894737, "low_lr": 9.610526315789475e-06, "step": 987 }, { "epoch": 2.5956607495069033, "high_lr": 0.0004805263157894737, "low_lr": 9.610526315789475e-06, "step": 987 }, { "epoch": 2.5956607495069033, "high_lr": 0.0004805263157894737, "low_lr": 9.610526315789475e-06, "step": 987 }, { "epoch": 2.5956607495069033, "high_lr": 0.0004805263157894737, "low_lr": 9.610526315789475e-06, "step": 987 }, { "epoch": 2.5956607495069033, "high_lr": 0.0004805263157894737, "low_lr": 9.610526315789475e-06, "step": 987 }, { "epoch": 2.5956607495069033, "high_lr": 0.0004805263157894737, "low_lr": 9.610526315789475e-06, "step": 987 }, { "epoch": 2.5982905982905984, "grad_norm": 1.289671778678894, "learning_rate": 0.00048, "loss": 1.3784, "step": 988 }, { "epoch": 2.5982905982905984, "high_lr": 0.00048, "low_lr": 9.600000000000001e-06, "step": 988 }, { "epoch": 2.5982905982905984, "high_lr": 0.00048, "low_lr": 9.600000000000001e-06, "step": 988 }, { "epoch": 2.5982905982905984, "high_lr": 0.00048, "low_lr": 9.600000000000001e-06, "step": 988 }, { "epoch": 2.5982905982905984, "high_lr": 0.00048, "low_lr": 9.600000000000001e-06, "step": 988 }, { "epoch": 2.5982905982905984, "high_lr": 0.00048, "low_lr": 9.600000000000001e-06, "step": 988 }, { "epoch": 2.5982905982905984, "high_lr": 0.00048, "low_lr": 9.600000000000001e-06, "step": 988 }, { "epoch": 2.5982905982905984, "high_lr": 0.00048, "low_lr": 9.600000000000001e-06, "step": 988 }, { "epoch": 2.5982905982905984, "high_lr": 0.00048, "low_lr": 9.600000000000001e-06, "step": 988 }, { "epoch": 2.6009204470742935, "grad_norm": 1.2512733936309814, "learning_rate": 0.00047947368421052635, "loss": 1.4151, "step": 989 }, { "epoch": 2.6009204470742935, "high_lr": 0.00047947368421052635, "low_lr": 9.589473684210528e-06, "step": 989 }, { "epoch": 2.6009204470742935, "high_lr": 0.00047947368421052635, "low_lr": 9.589473684210528e-06, "step": 989 }, { "epoch": 2.6009204470742935, "high_lr": 0.00047947368421052635, "low_lr": 9.589473684210528e-06, "step": 989 }, { "epoch": 2.6009204470742935, "high_lr": 0.00047947368421052635, "low_lr": 9.589473684210528e-06, "step": 989 }, { "epoch": 2.6009204470742935, "high_lr": 0.00047947368421052635, "low_lr": 9.589473684210528e-06, "step": 989 }, { "epoch": 2.6009204470742935, "high_lr": 0.00047947368421052635, "low_lr": 9.589473684210528e-06, "step": 989 }, { "epoch": 2.6009204470742935, "high_lr": 0.00047947368421052635, "low_lr": 9.589473684210528e-06, "step": 989 }, { "epoch": 2.6009204470742935, "high_lr": 0.00047947368421052635, "low_lr": 9.589473684210528e-06, "step": 989 }, { "epoch": 2.603550295857988, "grad_norm": 1.3273476362228394, "learning_rate": 0.00047894736842105264, "loss": 1.3562, "step": 990 }, { "epoch": 2.603550295857988, "high_lr": 0.00047894736842105264, "low_lr": 9.578947368421054e-06, "step": 990 }, { "epoch": 2.603550295857988, "high_lr": 0.00047894736842105264, "low_lr": 9.578947368421054e-06, "step": 990 }, { "epoch": 2.603550295857988, "high_lr": 0.00047894736842105264, "low_lr": 9.578947368421054e-06, "step": 990 }, { "epoch": 2.603550295857988, "high_lr": 0.00047894736842105264, "low_lr": 9.578947368421054e-06, "step": 990 }, { "epoch": 2.603550295857988, "high_lr": 0.00047894736842105264, "low_lr": 9.578947368421054e-06, "step": 990 }, { "epoch": 2.603550295857988, "high_lr": 0.00047894736842105264, "low_lr": 9.578947368421054e-06, "step": 990 }, { "epoch": 2.603550295857988, "high_lr": 0.00047894736842105264, "low_lr": 9.578947368421054e-06, "step": 990 }, { "epoch": 2.603550295857988, "high_lr": 0.00047894736842105264, "low_lr": 9.578947368421054e-06, "step": 990 }, { "epoch": 2.6061801446416832, "grad_norm": 1.1679435968399048, "learning_rate": 0.000478421052631579, "loss": 1.3634, "step": 991 }, { "epoch": 2.6061801446416832, "high_lr": 0.000478421052631579, "low_lr": 9.56842105263158e-06, "step": 991 }, { "epoch": 2.6061801446416832, "high_lr": 0.000478421052631579, "low_lr": 9.56842105263158e-06, "step": 991 }, { "epoch": 2.6061801446416832, "high_lr": 0.000478421052631579, "low_lr": 9.56842105263158e-06, "step": 991 }, { "epoch": 2.6061801446416832, "high_lr": 0.000478421052631579, "low_lr": 9.56842105263158e-06, "step": 991 }, { "epoch": 2.6061801446416832, "high_lr": 0.000478421052631579, "low_lr": 9.56842105263158e-06, "step": 991 }, { "epoch": 2.6061801446416832, "high_lr": 0.000478421052631579, "low_lr": 9.56842105263158e-06, "step": 991 }, { "epoch": 2.6061801446416832, "high_lr": 0.000478421052631579, "low_lr": 9.56842105263158e-06, "step": 991 }, { "epoch": 2.6061801446416832, "high_lr": 0.000478421052631579, "low_lr": 9.56842105263158e-06, "step": 991 }, { "epoch": 2.608809993425378, "grad_norm": 1.233843445777893, "learning_rate": 0.00047789473684210526, "loss": 1.3652, "step": 992 }, { "epoch": 2.608809993425378, "high_lr": 0.00047789473684210526, "low_lr": 9.557894736842107e-06, "step": 992 }, { "epoch": 2.608809993425378, "high_lr": 0.00047789473684210526, "low_lr": 9.557894736842107e-06, "step": 992 }, { "epoch": 2.608809993425378, "high_lr": 0.00047789473684210526, "low_lr": 9.557894736842107e-06, "step": 992 }, { "epoch": 2.608809993425378, "high_lr": 0.00047789473684210526, "low_lr": 9.557894736842107e-06, "step": 992 }, { "epoch": 2.608809993425378, "high_lr": 0.00047789473684210526, "low_lr": 9.557894736842107e-06, "step": 992 }, { "epoch": 2.608809993425378, "high_lr": 0.00047789473684210526, "low_lr": 9.557894736842107e-06, "step": 992 }, { "epoch": 2.608809993425378, "high_lr": 0.00047789473684210526, "low_lr": 9.557894736842107e-06, "step": 992 }, { "epoch": 2.608809993425378, "high_lr": 0.00047789473684210526, "low_lr": 9.557894736842107e-06, "step": 992 }, { "epoch": 2.611439842209073, "grad_norm": 1.2381819486618042, "learning_rate": 0.00047736842105263155, "loss": 1.3308, "step": 993 }, { "epoch": 2.611439842209073, "high_lr": 0.00047736842105263155, "low_lr": 9.547368421052631e-06, "step": 993 }, { "epoch": 2.611439842209073, "high_lr": 0.00047736842105263155, "low_lr": 9.547368421052631e-06, "step": 993 }, { "epoch": 2.611439842209073, "high_lr": 0.00047736842105263155, "low_lr": 9.547368421052631e-06, "step": 993 }, { "epoch": 2.611439842209073, "high_lr": 0.00047736842105263155, "low_lr": 9.547368421052631e-06, "step": 993 }, { "epoch": 2.611439842209073, "high_lr": 0.00047736842105263155, "low_lr": 9.547368421052631e-06, "step": 993 }, { "epoch": 2.611439842209073, "high_lr": 0.00047736842105263155, "low_lr": 9.547368421052631e-06, "step": 993 }, { "epoch": 2.611439842209073, "high_lr": 0.00047736842105263155, "low_lr": 9.547368421052631e-06, "step": 993 }, { "epoch": 2.611439842209073, "high_lr": 0.00047736842105263155, "low_lr": 9.547368421052631e-06, "step": 993 }, { "epoch": 2.6140696909927676, "grad_norm": 1.323737382888794, "learning_rate": 0.0004768421052631579, "loss": 1.3516, "step": 994 }, { "epoch": 2.6140696909927676, "high_lr": 0.0004768421052631579, "low_lr": 9.53684210526316e-06, "step": 994 }, { "epoch": 2.6140696909927676, "high_lr": 0.0004768421052631579, "low_lr": 9.53684210526316e-06, "step": 994 }, { "epoch": 2.6140696909927676, "high_lr": 0.0004768421052631579, "low_lr": 9.53684210526316e-06, "step": 994 }, { "epoch": 2.6140696909927676, "high_lr": 0.0004768421052631579, "low_lr": 9.53684210526316e-06, "step": 994 }, { "epoch": 2.6140696909927676, "high_lr": 0.0004768421052631579, "low_lr": 9.53684210526316e-06, "step": 994 }, { "epoch": 2.6140696909927676, "high_lr": 0.0004768421052631579, "low_lr": 9.53684210526316e-06, "step": 994 }, { "epoch": 2.6140696909927676, "high_lr": 0.0004768421052631579, "low_lr": 9.53684210526316e-06, "step": 994 }, { "epoch": 2.6140696909927676, "high_lr": 0.0004768421052631579, "low_lr": 9.53684210526316e-06, "step": 994 }, { "epoch": 2.6166995397764627, "grad_norm": 1.3120098114013672, "learning_rate": 0.0004763157894736842, "loss": 1.4448, "step": 995 }, { "epoch": 2.6166995397764627, "high_lr": 0.0004763157894736842, "low_lr": 9.526315789473684e-06, "step": 995 }, { "epoch": 2.6166995397764627, "high_lr": 0.0004763157894736842, "low_lr": 9.526315789473684e-06, "step": 995 }, { "epoch": 2.6166995397764627, "high_lr": 0.0004763157894736842, "low_lr": 9.526315789473684e-06, "step": 995 }, { "epoch": 2.6166995397764627, "high_lr": 0.0004763157894736842, "low_lr": 9.526315789473684e-06, "step": 995 }, { "epoch": 2.6166995397764627, "high_lr": 0.0004763157894736842, "low_lr": 9.526315789473684e-06, "step": 995 }, { "epoch": 2.6166995397764627, "high_lr": 0.0004763157894736842, "low_lr": 9.526315789473684e-06, "step": 995 }, { "epoch": 2.6166995397764627, "high_lr": 0.0004763157894736842, "low_lr": 9.526315789473684e-06, "step": 995 }, { "epoch": 2.6166995397764627, "high_lr": 0.0004763157894736842, "low_lr": 9.526315789473684e-06, "step": 995 }, { "epoch": 2.619329388560158, "grad_norm": 1.3577193021774292, "learning_rate": 0.00047578947368421057, "loss": 1.4299, "step": 996 }, { "epoch": 2.619329388560158, "high_lr": 0.00047578947368421057, "low_lr": 9.515789473684212e-06, "step": 996 }, { "epoch": 2.619329388560158, "high_lr": 0.00047578947368421057, "low_lr": 9.515789473684212e-06, "step": 996 }, { "epoch": 2.619329388560158, "high_lr": 0.00047578947368421057, "low_lr": 9.515789473684212e-06, "step": 996 }, { "epoch": 2.619329388560158, "high_lr": 0.00047578947368421057, "low_lr": 9.515789473684212e-06, "step": 996 }, { "epoch": 2.619329388560158, "high_lr": 0.00047578947368421057, "low_lr": 9.515789473684212e-06, "step": 996 }, { "epoch": 2.619329388560158, "high_lr": 0.00047578947368421057, "low_lr": 9.515789473684212e-06, "step": 996 }, { "epoch": 2.619329388560158, "high_lr": 0.00047578947368421057, "low_lr": 9.515789473684212e-06, "step": 996 }, { "epoch": 2.619329388560158, "high_lr": 0.00047578947368421057, "low_lr": 9.515789473684212e-06, "step": 996 }, { "epoch": 2.6219592373438525, "grad_norm": 1.3073041439056396, "learning_rate": 0.00047526315789473686, "loss": 1.3781, "step": 997 }, { "epoch": 2.6219592373438525, "high_lr": 0.00047526315789473686, "low_lr": 9.505263157894738e-06, "step": 997 }, { "epoch": 2.6219592373438525, "high_lr": 0.00047526315789473686, "low_lr": 9.505263157894738e-06, "step": 997 }, { "epoch": 2.6219592373438525, "high_lr": 0.00047526315789473686, "low_lr": 9.505263157894738e-06, "step": 997 }, { "epoch": 2.6219592373438525, "high_lr": 0.00047526315789473686, "low_lr": 9.505263157894738e-06, "step": 997 }, { "epoch": 2.6219592373438525, "high_lr": 0.00047526315789473686, "low_lr": 9.505263157894738e-06, "step": 997 }, { "epoch": 2.6219592373438525, "high_lr": 0.00047526315789473686, "low_lr": 9.505263157894738e-06, "step": 997 }, { "epoch": 2.6219592373438525, "high_lr": 0.00047526315789473686, "low_lr": 9.505263157894738e-06, "step": 997 }, { "epoch": 2.6219592373438525, "high_lr": 0.00047526315789473686, "low_lr": 9.505263157894738e-06, "step": 997 }, { "epoch": 2.6245890861275476, "grad_norm": 1.2592058181762695, "learning_rate": 0.0004747368421052632, "loss": 1.3993, "step": 998 }, { "epoch": 2.6245890861275476, "high_lr": 0.0004747368421052632, "low_lr": 9.494736842105265e-06, "step": 998 }, { "epoch": 2.6245890861275476, "high_lr": 0.0004747368421052632, "low_lr": 9.494736842105265e-06, "step": 998 }, { "epoch": 2.6245890861275476, "high_lr": 0.0004747368421052632, "low_lr": 9.494736842105265e-06, "step": 998 }, { "epoch": 2.6245890861275476, "high_lr": 0.0004747368421052632, "low_lr": 9.494736842105265e-06, "step": 998 }, { "epoch": 2.6245890861275476, "high_lr": 0.0004747368421052632, "low_lr": 9.494736842105265e-06, "step": 998 }, { "epoch": 2.6245890861275476, "high_lr": 0.0004747368421052632, "low_lr": 9.494736842105265e-06, "step": 998 }, { "epoch": 2.6245890861275476, "high_lr": 0.0004747368421052632, "low_lr": 9.494736842105265e-06, "step": 998 }, { "epoch": 2.6245890861275476, "high_lr": 0.0004747368421052632, "low_lr": 9.494736842105265e-06, "step": 998 }, { "epoch": 2.6272189349112427, "grad_norm": 1.1837654113769531, "learning_rate": 0.0004742105263157895, "loss": 1.3234, "step": 999 }, { "epoch": 2.6272189349112427, "high_lr": 0.0004742105263157895, "low_lr": 9.484210526315791e-06, "step": 999 }, { "epoch": 2.6272189349112427, "high_lr": 0.0004742105263157895, "low_lr": 9.484210526315791e-06, "step": 999 }, { "epoch": 2.6272189349112427, "high_lr": 0.0004742105263157895, "low_lr": 9.484210526315791e-06, "step": 999 }, { "epoch": 2.6272189349112427, "high_lr": 0.0004742105263157895, "low_lr": 9.484210526315791e-06, "step": 999 }, { "epoch": 2.6272189349112427, "high_lr": 0.0004742105263157895, "low_lr": 9.484210526315791e-06, "step": 999 }, { "epoch": 2.6272189349112427, "high_lr": 0.0004742105263157895, "low_lr": 9.484210526315791e-06, "step": 999 }, { "epoch": 2.6272189349112427, "high_lr": 0.0004742105263157895, "low_lr": 9.484210526315791e-06, "step": 999 }, { "epoch": 2.6272189349112427, "high_lr": 0.0004742105263157895, "low_lr": 9.484210526315791e-06, "step": 999 }, { "epoch": 2.6298487836949374, "grad_norm": 1.3210477828979492, "learning_rate": 0.00047368421052631577, "loss": 1.3892, "step": 1000 }, { "epoch": 2.6298487836949374, "high_lr": 0.00047368421052631577, "low_lr": 9.473684210526315e-06, "step": 1000 }, { "epoch": 2.6298487836949374, "high_lr": 0.00047368421052631577, "low_lr": 9.473684210526315e-06, "step": 1000 }, { "epoch": 2.6298487836949374, "high_lr": 0.00047368421052631577, "low_lr": 9.473684210526315e-06, "step": 1000 }, { "epoch": 2.6298487836949374, "high_lr": 0.00047368421052631577, "low_lr": 9.473684210526315e-06, "step": 1000 }, { "epoch": 2.6298487836949374, "high_lr": 0.00047368421052631577, "low_lr": 9.473684210526315e-06, "step": 1000 }, { "epoch": 2.6298487836949374, "high_lr": 0.00047368421052631577, "low_lr": 9.473684210526315e-06, "step": 1000 }, { "epoch": 2.6298487836949374, "high_lr": 0.00047368421052631577, "low_lr": 9.473684210526315e-06, "step": 1000 }, { "epoch": 2.6298487836949374, "high_lr": 0.00047368421052631577, "low_lr": 9.473684210526315e-06, "step": 1000 }, { "epoch": 2.6324786324786325, "grad_norm": 1.3772284984588623, "learning_rate": 0.0004731578947368421, "loss": 1.359, "step": 1001 }, { "epoch": 2.6324786324786325, "high_lr": 0.0004731578947368421, "low_lr": 9.463157894736844e-06, "step": 1001 }, { "epoch": 2.6324786324786325, "high_lr": 0.0004731578947368421, "low_lr": 9.463157894736844e-06, "step": 1001 }, { "epoch": 2.6324786324786325, "high_lr": 0.0004731578947368421, "low_lr": 9.463157894736844e-06, "step": 1001 }, { "epoch": 2.6324786324786325, "high_lr": 0.0004731578947368421, "low_lr": 9.463157894736844e-06, "step": 1001 }, { "epoch": 2.6324786324786325, "high_lr": 0.0004731578947368421, "low_lr": 9.463157894736844e-06, "step": 1001 }, { "epoch": 2.6324786324786325, "high_lr": 0.0004731578947368421, "low_lr": 9.463157894736844e-06, "step": 1001 }, { "epoch": 2.6324786324786325, "high_lr": 0.0004731578947368421, "low_lr": 9.463157894736844e-06, "step": 1001 }, { "epoch": 2.6324786324786325, "high_lr": 0.0004731578947368421, "low_lr": 9.463157894736844e-06, "step": 1001 }, { "epoch": 2.6351084812623276, "grad_norm": 1.3075475692749023, "learning_rate": 0.0004726315789473684, "loss": 1.3993, "step": 1002 }, { "epoch": 2.6351084812623276, "high_lr": 0.0004726315789473684, "low_lr": 9.452631578947368e-06, "step": 1002 }, { "epoch": 2.6351084812623276, "high_lr": 0.0004726315789473684, "low_lr": 9.452631578947368e-06, "step": 1002 }, { "epoch": 2.6351084812623276, "high_lr": 0.0004726315789473684, "low_lr": 9.452631578947368e-06, "step": 1002 }, { "epoch": 2.6351084812623276, "high_lr": 0.0004726315789473684, "low_lr": 9.452631578947368e-06, "step": 1002 }, { "epoch": 2.6351084812623276, "high_lr": 0.0004726315789473684, "low_lr": 9.452631578947368e-06, "step": 1002 }, { "epoch": 2.6351084812623276, "high_lr": 0.0004726315789473684, "low_lr": 9.452631578947368e-06, "step": 1002 }, { "epoch": 2.6351084812623276, "high_lr": 0.0004726315789473684, "low_lr": 9.452631578947368e-06, "step": 1002 }, { "epoch": 2.6351084812623276, "high_lr": 0.0004726315789473684, "low_lr": 9.452631578947368e-06, "step": 1002 }, { "epoch": 2.637738330046022, "grad_norm": 1.3833972215652466, "learning_rate": 0.00047210526315789473, "loss": 1.4675, "step": 1003 }, { "epoch": 2.637738330046022, "high_lr": 0.00047210526315789473, "low_lr": 9.442105263157896e-06, "step": 1003 }, { "epoch": 2.637738330046022, "high_lr": 0.00047210526315789473, "low_lr": 9.442105263157896e-06, "step": 1003 }, { "epoch": 2.637738330046022, "high_lr": 0.00047210526315789473, "low_lr": 9.442105263157896e-06, "step": 1003 }, { "epoch": 2.637738330046022, "high_lr": 0.00047210526315789473, "low_lr": 9.442105263157896e-06, "step": 1003 }, { "epoch": 2.637738330046022, "high_lr": 0.00047210526315789473, "low_lr": 9.442105263157896e-06, "step": 1003 }, { "epoch": 2.637738330046022, "high_lr": 0.00047210526315789473, "low_lr": 9.442105263157896e-06, "step": 1003 }, { "epoch": 2.637738330046022, "high_lr": 0.00047210526315789473, "low_lr": 9.442105263157896e-06, "step": 1003 }, { "epoch": 2.637738330046022, "high_lr": 0.00047210526315789473, "low_lr": 9.442105263157896e-06, "step": 1003 }, { "epoch": 2.6403681788297173, "grad_norm": 1.2069523334503174, "learning_rate": 0.0004715789473684211, "loss": 1.406, "step": 1004 }, { "epoch": 2.6403681788297173, "high_lr": 0.0004715789473684211, "low_lr": 9.43157894736842e-06, "step": 1004 }, { "epoch": 2.6403681788297173, "high_lr": 0.0004715789473684211, "low_lr": 9.43157894736842e-06, "step": 1004 }, { "epoch": 2.6403681788297173, "high_lr": 0.0004715789473684211, "low_lr": 9.43157894736842e-06, "step": 1004 }, { "epoch": 2.6403681788297173, "high_lr": 0.0004715789473684211, "low_lr": 9.43157894736842e-06, "step": 1004 }, { "epoch": 2.6403681788297173, "high_lr": 0.0004715789473684211, "low_lr": 9.43157894736842e-06, "step": 1004 }, { "epoch": 2.6403681788297173, "high_lr": 0.0004715789473684211, "low_lr": 9.43157894736842e-06, "step": 1004 }, { "epoch": 2.6403681788297173, "high_lr": 0.0004715789473684211, "low_lr": 9.43157894736842e-06, "step": 1004 }, { "epoch": 2.6403681788297173, "high_lr": 0.0004715789473684211, "low_lr": 9.43157894736842e-06, "step": 1004 }, { "epoch": 2.6429980276134124, "grad_norm": 1.2812342643737793, "learning_rate": 0.0004710526315789474, "loss": 1.3987, "step": 1005 }, { "epoch": 2.6429980276134124, "high_lr": 0.0004710526315789474, "low_lr": 9.421052631578949e-06, "step": 1005 }, { "epoch": 2.6429980276134124, "high_lr": 0.0004710526315789474, "low_lr": 9.421052631578949e-06, "step": 1005 }, { "epoch": 2.6429980276134124, "high_lr": 0.0004710526315789474, "low_lr": 9.421052631578949e-06, "step": 1005 }, { "epoch": 2.6429980276134124, "high_lr": 0.0004710526315789474, "low_lr": 9.421052631578949e-06, "step": 1005 }, { "epoch": 2.6429980276134124, "high_lr": 0.0004710526315789474, "low_lr": 9.421052631578949e-06, "step": 1005 }, { "epoch": 2.6429980276134124, "high_lr": 0.0004710526315789474, "low_lr": 9.421052631578949e-06, "step": 1005 }, { "epoch": 2.6429980276134124, "high_lr": 0.0004710526315789474, "low_lr": 9.421052631578949e-06, "step": 1005 }, { "epoch": 2.6429980276134124, "high_lr": 0.0004710526315789474, "low_lr": 9.421052631578949e-06, "step": 1005 }, { "epoch": 2.645627876397107, "grad_norm": 1.30702543258667, "learning_rate": 0.0004705263157894737, "loss": 1.371, "step": 1006 }, { "epoch": 2.645627876397107, "high_lr": 0.0004705263157894737, "low_lr": 9.410526315789475e-06, "step": 1006 }, { "epoch": 2.645627876397107, "high_lr": 0.0004705263157894737, "low_lr": 9.410526315789475e-06, "step": 1006 }, { "epoch": 2.645627876397107, "high_lr": 0.0004705263157894737, "low_lr": 9.410526315789475e-06, "step": 1006 }, { "epoch": 2.645627876397107, "high_lr": 0.0004705263157894737, "low_lr": 9.410526315789475e-06, "step": 1006 }, { "epoch": 2.645627876397107, "high_lr": 0.0004705263157894737, "low_lr": 9.410526315789475e-06, "step": 1006 }, { "epoch": 2.645627876397107, "high_lr": 0.0004705263157894737, "low_lr": 9.410526315789475e-06, "step": 1006 }, { "epoch": 2.645627876397107, "high_lr": 0.0004705263157894737, "low_lr": 9.410526315789475e-06, "step": 1006 }, { "epoch": 2.645627876397107, "high_lr": 0.0004705263157894737, "low_lr": 9.410526315789475e-06, "step": 1006 }, { "epoch": 2.648257725180802, "grad_norm": 1.2445564270019531, "learning_rate": 0.00047, "loss": 1.397, "step": 1007 }, { "epoch": 2.648257725180802, "high_lr": 0.00047, "low_lr": 9.4e-06, "step": 1007 }, { "epoch": 2.648257725180802, "high_lr": 0.00047, "low_lr": 9.4e-06, "step": 1007 }, { "epoch": 2.648257725180802, "high_lr": 0.00047, "low_lr": 9.4e-06, "step": 1007 }, { "epoch": 2.648257725180802, "high_lr": 0.00047, "low_lr": 9.4e-06, "step": 1007 }, { "epoch": 2.648257725180802, "high_lr": 0.00047, "low_lr": 9.4e-06, "step": 1007 }, { "epoch": 2.648257725180802, "high_lr": 0.00047, "low_lr": 9.4e-06, "step": 1007 }, { "epoch": 2.648257725180802, "high_lr": 0.00047, "low_lr": 9.4e-06, "step": 1007 }, { "epoch": 2.648257725180802, "high_lr": 0.00047, "low_lr": 9.4e-06, "step": 1007 }, { "epoch": 2.6508875739644973, "grad_norm": 1.2458575963974, "learning_rate": 0.0004694736842105263, "loss": 1.4151, "step": 1008 }, { "epoch": 2.6508875739644973, "high_lr": 0.0004694736842105263, "low_lr": 9.389473684210528e-06, "step": 1008 }, { "epoch": 2.6508875739644973, "high_lr": 0.0004694736842105263, "low_lr": 9.389473684210528e-06, "step": 1008 }, { "epoch": 2.6508875739644973, "high_lr": 0.0004694736842105263, "low_lr": 9.389473684210528e-06, "step": 1008 }, { "epoch": 2.6508875739644973, "high_lr": 0.0004694736842105263, "low_lr": 9.389473684210528e-06, "step": 1008 }, { "epoch": 2.6508875739644973, "high_lr": 0.0004694736842105263, "low_lr": 9.389473684210528e-06, "step": 1008 }, { "epoch": 2.6508875739644973, "high_lr": 0.0004694736842105263, "low_lr": 9.389473684210528e-06, "step": 1008 }, { "epoch": 2.6508875739644973, "high_lr": 0.0004694736842105263, "low_lr": 9.389473684210528e-06, "step": 1008 }, { "epoch": 2.6508875739644973, "high_lr": 0.0004694736842105263, "low_lr": 9.389473684210528e-06, "step": 1008 }, { "epoch": 2.653517422748192, "grad_norm": 1.2521098852157593, "learning_rate": 0.0004689473684210526, "loss": 1.3789, "step": 1009 }, { "epoch": 2.653517422748192, "high_lr": 0.0004689473684210526, "low_lr": 9.378947368421052e-06, "step": 1009 }, { "epoch": 2.653517422748192, "high_lr": 0.0004689473684210526, "low_lr": 9.378947368421052e-06, "step": 1009 }, { "epoch": 2.653517422748192, "high_lr": 0.0004689473684210526, "low_lr": 9.378947368421052e-06, "step": 1009 }, { "epoch": 2.653517422748192, "high_lr": 0.0004689473684210526, "low_lr": 9.378947368421052e-06, "step": 1009 }, { "epoch": 2.653517422748192, "high_lr": 0.0004689473684210526, "low_lr": 9.378947368421052e-06, "step": 1009 }, { "epoch": 2.653517422748192, "high_lr": 0.0004689473684210526, "low_lr": 9.378947368421052e-06, "step": 1009 }, { "epoch": 2.653517422748192, "high_lr": 0.0004689473684210526, "low_lr": 9.378947368421052e-06, "step": 1009 }, { "epoch": 2.653517422748192, "high_lr": 0.0004689473684210526, "low_lr": 9.378947368421052e-06, "step": 1009 }, { "epoch": 2.656147271531887, "grad_norm": 1.286059021949768, "learning_rate": 0.00046842105263157895, "loss": 1.4011, "step": 1010 }, { "epoch": 2.656147271531887, "high_lr": 0.00046842105263157895, "low_lr": 9.36842105263158e-06, "step": 1010 }, { "epoch": 2.656147271531887, "high_lr": 0.00046842105263157895, "low_lr": 9.36842105263158e-06, "step": 1010 }, { "epoch": 2.656147271531887, "high_lr": 0.00046842105263157895, "low_lr": 9.36842105263158e-06, "step": 1010 }, { "epoch": 2.656147271531887, "high_lr": 0.00046842105263157895, "low_lr": 9.36842105263158e-06, "step": 1010 }, { "epoch": 2.656147271531887, "high_lr": 0.00046842105263157895, "low_lr": 9.36842105263158e-06, "step": 1010 }, { "epoch": 2.656147271531887, "high_lr": 0.00046842105263157895, "low_lr": 9.36842105263158e-06, "step": 1010 }, { "epoch": 2.656147271531887, "high_lr": 0.00046842105263157895, "low_lr": 9.36842105263158e-06, "step": 1010 }, { "epoch": 2.656147271531887, "high_lr": 0.00046842105263157895, "low_lr": 9.36842105263158e-06, "step": 1010 }, { "epoch": 2.658777120315582, "grad_norm": 1.3176252841949463, "learning_rate": 0.00046789473684210524, "loss": 1.4245, "step": 1011 }, { "epoch": 2.658777120315582, "high_lr": 0.00046789473684210524, "low_lr": 9.357894736842105e-06, "step": 1011 }, { "epoch": 2.658777120315582, "high_lr": 0.00046789473684210524, "low_lr": 9.357894736842105e-06, "step": 1011 }, { "epoch": 2.658777120315582, "high_lr": 0.00046789473684210524, "low_lr": 9.357894736842105e-06, "step": 1011 }, { "epoch": 2.658777120315582, "high_lr": 0.00046789473684210524, "low_lr": 9.357894736842105e-06, "step": 1011 }, { "epoch": 2.658777120315582, "high_lr": 0.00046789473684210524, "low_lr": 9.357894736842105e-06, "step": 1011 }, { "epoch": 2.658777120315582, "high_lr": 0.00046789473684210524, "low_lr": 9.357894736842105e-06, "step": 1011 }, { "epoch": 2.658777120315582, "high_lr": 0.00046789473684210524, "low_lr": 9.357894736842105e-06, "step": 1011 }, { "epoch": 2.658777120315582, "high_lr": 0.00046789473684210524, "low_lr": 9.357894736842105e-06, "step": 1011 }, { "epoch": 2.6614069690992768, "grad_norm": 1.219322919845581, "learning_rate": 0.00046736842105263163, "loss": 1.4275, "step": 1012 }, { "epoch": 2.6614069690992768, "high_lr": 0.00046736842105263163, "low_lr": 9.347368421052633e-06, "step": 1012 }, { "epoch": 2.6614069690992768, "high_lr": 0.00046736842105263163, "low_lr": 9.347368421052633e-06, "step": 1012 }, { "epoch": 2.6614069690992768, "high_lr": 0.00046736842105263163, "low_lr": 9.347368421052633e-06, "step": 1012 }, { "epoch": 2.6614069690992768, "high_lr": 0.00046736842105263163, "low_lr": 9.347368421052633e-06, "step": 1012 }, { "epoch": 2.6614069690992768, "high_lr": 0.00046736842105263163, "low_lr": 9.347368421052633e-06, "step": 1012 }, { "epoch": 2.6614069690992768, "high_lr": 0.00046736842105263163, "low_lr": 9.347368421052633e-06, "step": 1012 }, { "epoch": 2.6614069690992768, "high_lr": 0.00046736842105263163, "low_lr": 9.347368421052633e-06, "step": 1012 }, { "epoch": 2.6614069690992768, "high_lr": 0.00046736842105263163, "low_lr": 9.347368421052633e-06, "step": 1012 }, { "epoch": 2.664036817882972, "grad_norm": 1.2309496402740479, "learning_rate": 0.0004668421052631579, "loss": 1.386, "step": 1013 }, { "epoch": 2.664036817882972, "high_lr": 0.0004668421052631579, "low_lr": 9.336842105263158e-06, "step": 1013 }, { "epoch": 2.664036817882972, "high_lr": 0.0004668421052631579, "low_lr": 9.336842105263158e-06, "step": 1013 }, { "epoch": 2.664036817882972, "high_lr": 0.0004668421052631579, "low_lr": 9.336842105263158e-06, "step": 1013 }, { "epoch": 2.664036817882972, "high_lr": 0.0004668421052631579, "low_lr": 9.336842105263158e-06, "step": 1013 }, { "epoch": 2.664036817882972, "high_lr": 0.0004668421052631579, "low_lr": 9.336842105263158e-06, "step": 1013 }, { "epoch": 2.664036817882972, "high_lr": 0.0004668421052631579, "low_lr": 9.336842105263158e-06, "step": 1013 }, { "epoch": 2.664036817882972, "high_lr": 0.0004668421052631579, "low_lr": 9.336842105263158e-06, "step": 1013 }, { "epoch": 2.664036817882972, "high_lr": 0.0004668421052631579, "low_lr": 9.336842105263158e-06, "step": 1013 }, { "epoch": 2.6666666666666665, "grad_norm": 1.352072834968567, "learning_rate": 0.0004663157894736842, "loss": 1.4204, "step": 1014 }, { "epoch": 2.6666666666666665, "high_lr": 0.0004663157894736842, "low_lr": 9.326315789473684e-06, "step": 1014 }, { "epoch": 2.6666666666666665, "high_lr": 0.0004663157894736842, "low_lr": 9.326315789473684e-06, "step": 1014 }, { "epoch": 2.6666666666666665, "high_lr": 0.0004663157894736842, "low_lr": 9.326315789473684e-06, "step": 1014 }, { "epoch": 2.6666666666666665, "high_lr": 0.0004663157894736842, "low_lr": 9.326315789473684e-06, "step": 1014 }, { "epoch": 2.6666666666666665, "high_lr": 0.0004663157894736842, "low_lr": 9.326315789473684e-06, "step": 1014 }, { "epoch": 2.6666666666666665, "high_lr": 0.0004663157894736842, "low_lr": 9.326315789473684e-06, "step": 1014 }, { "epoch": 2.6666666666666665, "high_lr": 0.0004663157894736842, "low_lr": 9.326315789473684e-06, "step": 1014 }, { "epoch": 2.6666666666666665, "high_lr": 0.0004663157894736842, "low_lr": 9.326315789473684e-06, "step": 1014 }, { "epoch": 2.6692965154503616, "grad_norm": 1.2742241621017456, "learning_rate": 0.00046578947368421054, "loss": 1.4226, "step": 1015 }, { "epoch": 2.6692965154503616, "high_lr": 0.00046578947368421054, "low_lr": 9.315789473684212e-06, "step": 1015 }, { "epoch": 2.6692965154503616, "high_lr": 0.00046578947368421054, "low_lr": 9.315789473684212e-06, "step": 1015 }, { "epoch": 2.6692965154503616, "high_lr": 0.00046578947368421054, "low_lr": 9.315789473684212e-06, "step": 1015 }, { "epoch": 2.6692965154503616, "high_lr": 0.00046578947368421054, "low_lr": 9.315789473684212e-06, "step": 1015 }, { "epoch": 2.6692965154503616, "high_lr": 0.00046578947368421054, "low_lr": 9.315789473684212e-06, "step": 1015 }, { "epoch": 2.6692965154503616, "high_lr": 0.00046578947368421054, "low_lr": 9.315789473684212e-06, "step": 1015 }, { "epoch": 2.6692965154503616, "high_lr": 0.00046578947368421054, "low_lr": 9.315789473684212e-06, "step": 1015 }, { "epoch": 2.6692965154503616, "high_lr": 0.00046578947368421054, "low_lr": 9.315789473684212e-06, "step": 1015 }, { "epoch": 2.6719263642340563, "grad_norm": 1.3469874858856201, "learning_rate": 0.00046526315789473683, "loss": 1.3989, "step": 1016 }, { "epoch": 2.6719263642340563, "high_lr": 0.00046526315789473683, "low_lr": 9.305263157894737e-06, "step": 1016 }, { "epoch": 2.6719263642340563, "high_lr": 0.00046526315789473683, "low_lr": 9.305263157894737e-06, "step": 1016 }, { "epoch": 2.6719263642340563, "high_lr": 0.00046526315789473683, "low_lr": 9.305263157894737e-06, "step": 1016 }, { "epoch": 2.6719263642340563, "high_lr": 0.00046526315789473683, "low_lr": 9.305263157894737e-06, "step": 1016 }, { "epoch": 2.6719263642340563, "high_lr": 0.00046526315789473683, "low_lr": 9.305263157894737e-06, "step": 1016 }, { "epoch": 2.6719263642340563, "high_lr": 0.00046526315789473683, "low_lr": 9.305263157894737e-06, "step": 1016 }, { "epoch": 2.6719263642340563, "high_lr": 0.00046526315789473683, "low_lr": 9.305263157894737e-06, "step": 1016 }, { "epoch": 2.6719263642340563, "high_lr": 0.00046526315789473683, "low_lr": 9.305263157894737e-06, "step": 1016 }, { "epoch": 2.6745562130177514, "grad_norm": 1.3517266511917114, "learning_rate": 0.00046473684210526317, "loss": 1.3909, "step": 1017 }, { "epoch": 2.6745562130177514, "high_lr": 0.00046473684210526317, "low_lr": 9.294736842105265e-06, "step": 1017 }, { "epoch": 2.6745562130177514, "high_lr": 0.00046473684210526317, "low_lr": 9.294736842105265e-06, "step": 1017 }, { "epoch": 2.6745562130177514, "high_lr": 0.00046473684210526317, "low_lr": 9.294736842105265e-06, "step": 1017 }, { "epoch": 2.6745562130177514, "high_lr": 0.00046473684210526317, "low_lr": 9.294736842105265e-06, "step": 1017 }, { "epoch": 2.6745562130177514, "high_lr": 0.00046473684210526317, "low_lr": 9.294736842105265e-06, "step": 1017 }, { "epoch": 2.6745562130177514, "high_lr": 0.00046473684210526317, "low_lr": 9.294736842105265e-06, "step": 1017 }, { "epoch": 2.6745562130177514, "high_lr": 0.00046473684210526317, "low_lr": 9.294736842105265e-06, "step": 1017 }, { "epoch": 2.6745562130177514, "high_lr": 0.00046473684210526317, "low_lr": 9.294736842105265e-06, "step": 1017 }, { "epoch": 2.6771860618014465, "grad_norm": 1.2885749340057373, "learning_rate": 0.00046421052631578946, "loss": 1.363, "step": 1018 }, { "epoch": 2.6771860618014465, "high_lr": 0.00046421052631578946, "low_lr": 9.28421052631579e-06, "step": 1018 }, { "epoch": 2.6771860618014465, "high_lr": 0.00046421052631578946, "low_lr": 9.28421052631579e-06, "step": 1018 }, { "epoch": 2.6771860618014465, "high_lr": 0.00046421052631578946, "low_lr": 9.28421052631579e-06, "step": 1018 }, { "epoch": 2.6771860618014465, "high_lr": 0.00046421052631578946, "low_lr": 9.28421052631579e-06, "step": 1018 }, { "epoch": 2.6771860618014465, "high_lr": 0.00046421052631578946, "low_lr": 9.28421052631579e-06, "step": 1018 }, { "epoch": 2.6771860618014465, "high_lr": 0.00046421052631578946, "low_lr": 9.28421052631579e-06, "step": 1018 }, { "epoch": 2.6771860618014465, "high_lr": 0.00046421052631578946, "low_lr": 9.28421052631579e-06, "step": 1018 }, { "epoch": 2.6771860618014465, "high_lr": 0.00046421052631578946, "low_lr": 9.28421052631579e-06, "step": 1018 }, { "epoch": 2.679815910585141, "grad_norm": 1.333804726600647, "learning_rate": 0.0004636842105263158, "loss": 1.416, "step": 1019 }, { "epoch": 2.679815910585141, "high_lr": 0.0004636842105263158, "low_lr": 9.273684210526317e-06, "step": 1019 }, { "epoch": 2.679815910585141, "high_lr": 0.0004636842105263158, "low_lr": 9.273684210526317e-06, "step": 1019 }, { "epoch": 2.679815910585141, "high_lr": 0.0004636842105263158, "low_lr": 9.273684210526317e-06, "step": 1019 }, { "epoch": 2.679815910585141, "high_lr": 0.0004636842105263158, "low_lr": 9.273684210526317e-06, "step": 1019 }, { "epoch": 2.679815910585141, "high_lr": 0.0004636842105263158, "low_lr": 9.273684210526317e-06, "step": 1019 }, { "epoch": 2.679815910585141, "high_lr": 0.0004636842105263158, "low_lr": 9.273684210526317e-06, "step": 1019 }, { "epoch": 2.679815910585141, "high_lr": 0.0004636842105263158, "low_lr": 9.273684210526317e-06, "step": 1019 }, { "epoch": 2.679815910585141, "high_lr": 0.0004636842105263158, "low_lr": 9.273684210526317e-06, "step": 1019 }, { "epoch": 2.6824457593688362, "grad_norm": 1.2797845602035522, "learning_rate": 0.00046315789473684214, "loss": 1.333, "step": 1020 }, { "epoch": 2.6824457593688362, "high_lr": 0.00046315789473684214, "low_lr": 9.263157894736842e-06, "step": 1020 }, { "epoch": 2.6824457593688362, "high_lr": 0.00046315789473684214, "low_lr": 9.263157894736842e-06, "step": 1020 }, { "epoch": 2.6824457593688362, "high_lr": 0.00046315789473684214, "low_lr": 9.263157894736842e-06, "step": 1020 }, { "epoch": 2.6824457593688362, "high_lr": 0.00046315789473684214, "low_lr": 9.263157894736842e-06, "step": 1020 }, { "epoch": 2.6824457593688362, "high_lr": 0.00046315789473684214, "low_lr": 9.263157894736842e-06, "step": 1020 }, { "epoch": 2.6824457593688362, "high_lr": 0.00046315789473684214, "low_lr": 9.263157894736842e-06, "step": 1020 }, { "epoch": 2.6824457593688362, "high_lr": 0.00046315789473684214, "low_lr": 9.263157894736842e-06, "step": 1020 }, { "epoch": 2.6824457593688362, "high_lr": 0.00046315789473684214, "low_lr": 9.263157894736842e-06, "step": 1020 }, { "epoch": 2.6850756081525313, "grad_norm": 1.2081880569458008, "learning_rate": 0.0004626315789473684, "loss": 1.3541, "step": 1021 }, { "epoch": 2.6850756081525313, "high_lr": 0.0004626315789473684, "low_lr": 9.252631578947368e-06, "step": 1021 }, { "epoch": 2.6850756081525313, "high_lr": 0.0004626315789473684, "low_lr": 9.252631578947368e-06, "step": 1021 }, { "epoch": 2.6850756081525313, "high_lr": 0.0004626315789473684, "low_lr": 9.252631578947368e-06, "step": 1021 }, { "epoch": 2.6850756081525313, "high_lr": 0.0004626315789473684, "low_lr": 9.252631578947368e-06, "step": 1021 }, { "epoch": 2.6850756081525313, "high_lr": 0.0004626315789473684, "low_lr": 9.252631578947368e-06, "step": 1021 }, { "epoch": 2.6850756081525313, "high_lr": 0.0004626315789473684, "low_lr": 9.252631578947368e-06, "step": 1021 }, { "epoch": 2.6850756081525313, "high_lr": 0.0004626315789473684, "low_lr": 9.252631578947368e-06, "step": 1021 }, { "epoch": 2.6850756081525313, "high_lr": 0.0004626315789473684, "low_lr": 9.252631578947368e-06, "step": 1021 }, { "epoch": 2.687705456936226, "grad_norm": 1.3071238994598389, "learning_rate": 0.00046210526315789476, "loss": 1.3664, "step": 1022 }, { "epoch": 2.687705456936226, "high_lr": 0.00046210526315789476, "low_lr": 9.242105263157896e-06, "step": 1022 }, { "epoch": 2.687705456936226, "high_lr": 0.00046210526315789476, "low_lr": 9.242105263157896e-06, "step": 1022 }, { "epoch": 2.687705456936226, "high_lr": 0.00046210526315789476, "low_lr": 9.242105263157896e-06, "step": 1022 }, { "epoch": 2.687705456936226, "high_lr": 0.00046210526315789476, "low_lr": 9.242105263157896e-06, "step": 1022 }, { "epoch": 2.687705456936226, "high_lr": 0.00046210526315789476, "low_lr": 9.242105263157896e-06, "step": 1022 }, { "epoch": 2.687705456936226, "high_lr": 0.00046210526315789476, "low_lr": 9.242105263157896e-06, "step": 1022 }, { "epoch": 2.687705456936226, "high_lr": 0.00046210526315789476, "low_lr": 9.242105263157896e-06, "step": 1022 }, { "epoch": 2.687705456936226, "high_lr": 0.00046210526315789476, "low_lr": 9.242105263157896e-06, "step": 1022 }, { "epoch": 2.690335305719921, "grad_norm": 1.2534948587417603, "learning_rate": 0.00046157894736842105, "loss": 1.3551, "step": 1023 }, { "epoch": 2.690335305719921, "high_lr": 0.00046157894736842105, "low_lr": 9.231578947368421e-06, "step": 1023 }, { "epoch": 2.690335305719921, "high_lr": 0.00046157894736842105, "low_lr": 9.231578947368421e-06, "step": 1023 }, { "epoch": 2.690335305719921, "high_lr": 0.00046157894736842105, "low_lr": 9.231578947368421e-06, "step": 1023 }, { "epoch": 2.690335305719921, "high_lr": 0.00046157894736842105, "low_lr": 9.231578947368421e-06, "step": 1023 }, { "epoch": 2.690335305719921, "high_lr": 0.00046157894736842105, "low_lr": 9.231578947368421e-06, "step": 1023 }, { "epoch": 2.690335305719921, "high_lr": 0.00046157894736842105, "low_lr": 9.231578947368421e-06, "step": 1023 }, { "epoch": 2.690335305719921, "high_lr": 0.00046157894736842105, "low_lr": 9.231578947368421e-06, "step": 1023 }, { "epoch": 2.690335305719921, "high_lr": 0.00046157894736842105, "low_lr": 9.231578947368421e-06, "step": 1023 }, { "epoch": 2.692965154503616, "grad_norm": 1.2943768501281738, "learning_rate": 0.0004610526315789474, "loss": 1.3696, "step": 1024 }, { "epoch": 2.692965154503616, "high_lr": 0.0004610526315789474, "low_lr": 9.221052631578949e-06, "step": 1024 }, { "epoch": 2.692965154503616, "high_lr": 0.0004610526315789474, "low_lr": 9.221052631578949e-06, "step": 1024 }, { "epoch": 2.692965154503616, "high_lr": 0.0004610526315789474, "low_lr": 9.221052631578949e-06, "step": 1024 }, { "epoch": 2.692965154503616, "high_lr": 0.0004610526315789474, "low_lr": 9.221052631578949e-06, "step": 1024 }, { "epoch": 2.692965154503616, "high_lr": 0.0004610526315789474, "low_lr": 9.221052631578949e-06, "step": 1024 }, { "epoch": 2.692965154503616, "high_lr": 0.0004610526315789474, "low_lr": 9.221052631578949e-06, "step": 1024 }, { "epoch": 2.692965154503616, "high_lr": 0.0004610526315789474, "low_lr": 9.221052631578949e-06, "step": 1024 }, { "epoch": 2.692965154503616, "high_lr": 0.0004610526315789474, "low_lr": 9.221052631578949e-06, "step": 1024 }, { "epoch": 2.695595003287311, "grad_norm": 1.3256723880767822, "learning_rate": 0.0004605263157894737, "loss": 1.4162, "step": 1025 }, { "epoch": 2.695595003287311, "high_lr": 0.0004605263157894737, "low_lr": 9.210526315789474e-06, "step": 1025 }, { "epoch": 2.695595003287311, "high_lr": 0.0004605263157894737, "low_lr": 9.210526315789474e-06, "step": 1025 }, { "epoch": 2.695595003287311, "high_lr": 0.0004605263157894737, "low_lr": 9.210526315789474e-06, "step": 1025 }, { "epoch": 2.695595003287311, "high_lr": 0.0004605263157894737, "low_lr": 9.210526315789474e-06, "step": 1025 }, { "epoch": 2.695595003287311, "high_lr": 0.0004605263157894737, "low_lr": 9.210526315789474e-06, "step": 1025 }, { "epoch": 2.695595003287311, "high_lr": 0.0004605263157894737, "low_lr": 9.210526315789474e-06, "step": 1025 }, { "epoch": 2.695595003287311, "high_lr": 0.0004605263157894737, "low_lr": 9.210526315789474e-06, "step": 1025 }, { "epoch": 2.695595003287311, "high_lr": 0.0004605263157894737, "low_lr": 9.210526315789474e-06, "step": 1025 }, { "epoch": 2.698224852071006, "grad_norm": 1.3400224447250366, "learning_rate": 0.00046, "loss": 1.3839, "step": 1026 }, { "epoch": 2.698224852071006, "high_lr": 0.00046, "low_lr": 9.200000000000002e-06, "step": 1026 }, { "epoch": 2.698224852071006, "high_lr": 0.00046, "low_lr": 9.200000000000002e-06, "step": 1026 }, { "epoch": 2.698224852071006, "high_lr": 0.00046, "low_lr": 9.200000000000002e-06, "step": 1026 }, { "epoch": 2.698224852071006, "high_lr": 0.00046, "low_lr": 9.200000000000002e-06, "step": 1026 }, { "epoch": 2.698224852071006, "high_lr": 0.00046, "low_lr": 9.200000000000002e-06, "step": 1026 }, { "epoch": 2.698224852071006, "high_lr": 0.00046, "low_lr": 9.200000000000002e-06, "step": 1026 }, { "epoch": 2.698224852071006, "high_lr": 0.00046, "low_lr": 9.200000000000002e-06, "step": 1026 }, { "epoch": 2.698224852071006, "high_lr": 0.00046, "low_lr": 9.200000000000002e-06, "step": 1026 }, { "epoch": 2.700854700854701, "grad_norm": 1.2152968645095825, "learning_rate": 0.0004594736842105263, "loss": 1.3392, "step": 1027 }, { "epoch": 2.700854700854701, "high_lr": 0.0004594736842105263, "low_lr": 9.189473684210526e-06, "step": 1027 }, { "epoch": 2.700854700854701, "high_lr": 0.0004594736842105263, "low_lr": 9.189473684210526e-06, "step": 1027 }, { "epoch": 2.700854700854701, "high_lr": 0.0004594736842105263, "low_lr": 9.189473684210526e-06, "step": 1027 }, { "epoch": 2.700854700854701, "high_lr": 0.0004594736842105263, "low_lr": 9.189473684210526e-06, "step": 1027 }, { "epoch": 2.700854700854701, "high_lr": 0.0004594736842105263, "low_lr": 9.189473684210526e-06, "step": 1027 }, { "epoch": 2.700854700854701, "high_lr": 0.0004594736842105263, "low_lr": 9.189473684210526e-06, "step": 1027 }, { "epoch": 2.700854700854701, "high_lr": 0.0004594736842105263, "low_lr": 9.189473684210526e-06, "step": 1027 }, { "epoch": 2.700854700854701, "high_lr": 0.0004594736842105263, "low_lr": 9.189473684210526e-06, "step": 1027 }, { "epoch": 2.7034845496383957, "grad_norm": 1.329406976699829, "learning_rate": 0.00045894736842105264, "loss": 1.3347, "step": 1028 }, { "epoch": 2.7034845496383957, "high_lr": 0.00045894736842105264, "low_lr": 9.178947368421053e-06, "step": 1028 }, { "epoch": 2.7034845496383957, "high_lr": 0.00045894736842105264, "low_lr": 9.178947368421053e-06, "step": 1028 }, { "epoch": 2.7034845496383957, "high_lr": 0.00045894736842105264, "low_lr": 9.178947368421053e-06, "step": 1028 }, { "epoch": 2.7034845496383957, "high_lr": 0.00045894736842105264, "low_lr": 9.178947368421053e-06, "step": 1028 }, { "epoch": 2.7034845496383957, "high_lr": 0.00045894736842105264, "low_lr": 9.178947368421053e-06, "step": 1028 }, { "epoch": 2.7034845496383957, "high_lr": 0.00045894736842105264, "low_lr": 9.178947368421053e-06, "step": 1028 }, { "epoch": 2.7034845496383957, "high_lr": 0.00045894736842105264, "low_lr": 9.178947368421053e-06, "step": 1028 }, { "epoch": 2.7034845496383957, "high_lr": 0.00045894736842105264, "low_lr": 9.178947368421053e-06, "step": 1028 }, { "epoch": 2.706114398422091, "grad_norm": 1.3266621828079224, "learning_rate": 0.000458421052631579, "loss": 1.4081, "step": 1029 }, { "epoch": 2.706114398422091, "high_lr": 0.000458421052631579, "low_lr": 9.168421052631579e-06, "step": 1029 }, { "epoch": 2.706114398422091, "high_lr": 0.000458421052631579, "low_lr": 9.168421052631579e-06, "step": 1029 }, { "epoch": 2.706114398422091, "high_lr": 0.000458421052631579, "low_lr": 9.168421052631579e-06, "step": 1029 }, { "epoch": 2.706114398422091, "high_lr": 0.000458421052631579, "low_lr": 9.168421052631579e-06, "step": 1029 }, { "epoch": 2.706114398422091, "high_lr": 0.000458421052631579, "low_lr": 9.168421052631579e-06, "step": 1029 }, { "epoch": 2.706114398422091, "high_lr": 0.000458421052631579, "low_lr": 9.168421052631579e-06, "step": 1029 }, { "epoch": 2.706114398422091, "high_lr": 0.000458421052631579, "low_lr": 9.168421052631579e-06, "step": 1029 }, { "epoch": 2.706114398422091, "high_lr": 0.000458421052631579, "low_lr": 9.168421052631579e-06, "step": 1029 }, { "epoch": 2.708744247205786, "grad_norm": 1.3029323816299438, "learning_rate": 0.00045789473684210527, "loss": 1.4039, "step": 1030 }, { "epoch": 2.708744247205786, "high_lr": 0.00045789473684210527, "low_lr": 9.157894736842105e-06, "step": 1030 }, { "epoch": 2.708744247205786, "high_lr": 0.00045789473684210527, "low_lr": 9.157894736842105e-06, "step": 1030 }, { "epoch": 2.708744247205786, "high_lr": 0.00045789473684210527, "low_lr": 9.157894736842105e-06, "step": 1030 }, { "epoch": 2.708744247205786, "high_lr": 0.00045789473684210527, "low_lr": 9.157894736842105e-06, "step": 1030 }, { "epoch": 2.708744247205786, "high_lr": 0.00045789473684210527, "low_lr": 9.157894736842105e-06, "step": 1030 }, { "epoch": 2.708744247205786, "high_lr": 0.00045789473684210527, "low_lr": 9.157894736842105e-06, "step": 1030 }, { "epoch": 2.708744247205786, "high_lr": 0.00045789473684210527, "low_lr": 9.157894736842105e-06, "step": 1030 }, { "epoch": 2.708744247205786, "high_lr": 0.00045789473684210527, "low_lr": 9.157894736842105e-06, "step": 1030 }, { "epoch": 2.7113740959894805, "grad_norm": 1.299635648727417, "learning_rate": 0.0004573684210526316, "loss": 1.4511, "step": 1031 }, { "epoch": 2.7113740959894805, "high_lr": 0.0004573684210526316, "low_lr": 9.147368421052633e-06, "step": 1031 }, { "epoch": 2.7113740959894805, "high_lr": 0.0004573684210526316, "low_lr": 9.147368421052633e-06, "step": 1031 }, { "epoch": 2.7113740959894805, "high_lr": 0.0004573684210526316, "low_lr": 9.147368421052633e-06, "step": 1031 }, { "epoch": 2.7113740959894805, "high_lr": 0.0004573684210526316, "low_lr": 9.147368421052633e-06, "step": 1031 }, { "epoch": 2.7113740959894805, "high_lr": 0.0004573684210526316, "low_lr": 9.147368421052633e-06, "step": 1031 }, { "epoch": 2.7113740959894805, "high_lr": 0.0004573684210526316, "low_lr": 9.147368421052633e-06, "step": 1031 }, { "epoch": 2.7113740959894805, "high_lr": 0.0004573684210526316, "low_lr": 9.147368421052633e-06, "step": 1031 }, { "epoch": 2.7113740959894805, "high_lr": 0.0004573684210526316, "low_lr": 9.147368421052633e-06, "step": 1031 }, { "epoch": 2.7140039447731756, "grad_norm": 1.3590953350067139, "learning_rate": 0.0004568421052631579, "loss": 1.3464, "step": 1032 }, { "epoch": 2.7140039447731756, "high_lr": 0.0004568421052631579, "low_lr": 9.136842105263158e-06, "step": 1032 }, { "epoch": 2.7140039447731756, "high_lr": 0.0004568421052631579, "low_lr": 9.136842105263158e-06, "step": 1032 }, { "epoch": 2.7140039447731756, "high_lr": 0.0004568421052631579, "low_lr": 9.136842105263158e-06, "step": 1032 }, { "epoch": 2.7140039447731756, "high_lr": 0.0004568421052631579, "low_lr": 9.136842105263158e-06, "step": 1032 }, { "epoch": 2.7140039447731756, "high_lr": 0.0004568421052631579, "low_lr": 9.136842105263158e-06, "step": 1032 }, { "epoch": 2.7140039447731756, "high_lr": 0.0004568421052631579, "low_lr": 9.136842105263158e-06, "step": 1032 }, { "epoch": 2.7140039447731756, "high_lr": 0.0004568421052631579, "low_lr": 9.136842105263158e-06, "step": 1032 }, { "epoch": 2.7140039447731756, "high_lr": 0.0004568421052631579, "low_lr": 9.136842105263158e-06, "step": 1032 }, { "epoch": 2.7166337935568707, "grad_norm": 1.2269901037216187, "learning_rate": 0.00045631578947368423, "loss": 1.3538, "step": 1033 }, { "epoch": 2.7166337935568707, "high_lr": 0.00045631578947368423, "low_lr": 9.126315789473686e-06, "step": 1033 }, { "epoch": 2.7166337935568707, "high_lr": 0.00045631578947368423, "low_lr": 9.126315789473686e-06, "step": 1033 }, { "epoch": 2.7166337935568707, "high_lr": 0.00045631578947368423, "low_lr": 9.126315789473686e-06, "step": 1033 }, { "epoch": 2.7166337935568707, "high_lr": 0.00045631578947368423, "low_lr": 9.126315789473686e-06, "step": 1033 }, { "epoch": 2.7166337935568707, "high_lr": 0.00045631578947368423, "low_lr": 9.126315789473686e-06, "step": 1033 }, { "epoch": 2.7166337935568707, "high_lr": 0.00045631578947368423, "low_lr": 9.126315789473686e-06, "step": 1033 }, { "epoch": 2.7166337935568707, "high_lr": 0.00045631578947368423, "low_lr": 9.126315789473686e-06, "step": 1033 }, { "epoch": 2.7166337935568707, "high_lr": 0.00045631578947368423, "low_lr": 9.126315789473686e-06, "step": 1033 }, { "epoch": 2.7192636423405654, "grad_norm": 1.3440412282943726, "learning_rate": 0.0004557894736842105, "loss": 1.3747, "step": 1034 }, { "epoch": 2.7192636423405654, "high_lr": 0.0004557894736842105, "low_lr": 9.11578947368421e-06, "step": 1034 }, { "epoch": 2.7192636423405654, "high_lr": 0.0004557894736842105, "low_lr": 9.11578947368421e-06, "step": 1034 }, { "epoch": 2.7192636423405654, "high_lr": 0.0004557894736842105, "low_lr": 9.11578947368421e-06, "step": 1034 }, { "epoch": 2.7192636423405654, "high_lr": 0.0004557894736842105, "low_lr": 9.11578947368421e-06, "step": 1034 }, { "epoch": 2.7192636423405654, "high_lr": 0.0004557894736842105, "low_lr": 9.11578947368421e-06, "step": 1034 }, { "epoch": 2.7192636423405654, "high_lr": 0.0004557894736842105, "low_lr": 9.11578947368421e-06, "step": 1034 }, { "epoch": 2.7192636423405654, "high_lr": 0.0004557894736842105, "low_lr": 9.11578947368421e-06, "step": 1034 }, { "epoch": 2.7192636423405654, "high_lr": 0.0004557894736842105, "low_lr": 9.11578947368421e-06, "step": 1034 }, { "epoch": 2.7218934911242605, "grad_norm": 1.2938976287841797, "learning_rate": 0.00045526315789473686, "loss": 1.3702, "step": 1035 }, { "epoch": 2.7218934911242605, "high_lr": 0.00045526315789473686, "low_lr": 9.105263157894739e-06, "step": 1035 }, { "epoch": 2.7218934911242605, "high_lr": 0.00045526315789473686, "low_lr": 9.105263157894739e-06, "step": 1035 }, { "epoch": 2.7218934911242605, "high_lr": 0.00045526315789473686, "low_lr": 9.105263157894739e-06, "step": 1035 }, { "epoch": 2.7218934911242605, "high_lr": 0.00045526315789473686, "low_lr": 9.105263157894739e-06, "step": 1035 }, { "epoch": 2.7218934911242605, "high_lr": 0.00045526315789473686, "low_lr": 9.105263157894739e-06, "step": 1035 }, { "epoch": 2.7218934911242605, "high_lr": 0.00045526315789473686, "low_lr": 9.105263157894739e-06, "step": 1035 }, { "epoch": 2.7218934911242605, "high_lr": 0.00045526315789473686, "low_lr": 9.105263157894739e-06, "step": 1035 }, { "epoch": 2.7218934911242605, "high_lr": 0.00045526315789473686, "low_lr": 9.105263157894739e-06, "step": 1035 }, { "epoch": 2.724523339907955, "grad_norm": 1.2927764654159546, "learning_rate": 0.0004547368421052632, "loss": 1.427, "step": 1036 }, { "epoch": 2.724523339907955, "high_lr": 0.0004547368421052632, "low_lr": 9.094736842105263e-06, "step": 1036 }, { "epoch": 2.724523339907955, "high_lr": 0.0004547368421052632, "low_lr": 9.094736842105263e-06, "step": 1036 }, { "epoch": 2.724523339907955, "high_lr": 0.0004547368421052632, "low_lr": 9.094736842105263e-06, "step": 1036 }, { "epoch": 2.724523339907955, "high_lr": 0.0004547368421052632, "low_lr": 9.094736842105263e-06, "step": 1036 }, { "epoch": 2.724523339907955, "high_lr": 0.0004547368421052632, "low_lr": 9.094736842105263e-06, "step": 1036 }, { "epoch": 2.724523339907955, "high_lr": 0.0004547368421052632, "low_lr": 9.094736842105263e-06, "step": 1036 }, { "epoch": 2.724523339907955, "high_lr": 0.0004547368421052632, "low_lr": 9.094736842105263e-06, "step": 1036 }, { "epoch": 2.724523339907955, "high_lr": 0.0004547368421052632, "low_lr": 9.094736842105263e-06, "step": 1036 }, { "epoch": 2.7271531886916502, "grad_norm": 1.2207523584365845, "learning_rate": 0.0004542105263157895, "loss": 1.3559, "step": 1037 }, { "epoch": 2.7271531886916502, "high_lr": 0.0004542105263157895, "low_lr": 9.08421052631579e-06, "step": 1037 }, { "epoch": 2.7271531886916502, "high_lr": 0.0004542105263157895, "low_lr": 9.08421052631579e-06, "step": 1037 }, { "epoch": 2.7271531886916502, "high_lr": 0.0004542105263157895, "low_lr": 9.08421052631579e-06, "step": 1037 }, { "epoch": 2.7271531886916502, "high_lr": 0.0004542105263157895, "low_lr": 9.08421052631579e-06, "step": 1037 }, { "epoch": 2.7271531886916502, "high_lr": 0.0004542105263157895, "low_lr": 9.08421052631579e-06, "step": 1037 }, { "epoch": 2.7271531886916502, "high_lr": 0.0004542105263157895, "low_lr": 9.08421052631579e-06, "step": 1037 }, { "epoch": 2.7271531886916502, "high_lr": 0.0004542105263157895, "low_lr": 9.08421052631579e-06, "step": 1037 }, { "epoch": 2.7271531886916502, "high_lr": 0.0004542105263157895, "low_lr": 9.08421052631579e-06, "step": 1037 }, { "epoch": 2.729783037475345, "grad_norm": 1.2172162532806396, "learning_rate": 0.0004536842105263158, "loss": 1.4333, "step": 1038 }, { "epoch": 2.729783037475345, "high_lr": 0.0004536842105263158, "low_lr": 9.073684210526316e-06, "step": 1038 }, { "epoch": 2.729783037475345, "high_lr": 0.0004536842105263158, "low_lr": 9.073684210526316e-06, "step": 1038 }, { "epoch": 2.729783037475345, "high_lr": 0.0004536842105263158, "low_lr": 9.073684210526316e-06, "step": 1038 }, { "epoch": 2.729783037475345, "high_lr": 0.0004536842105263158, "low_lr": 9.073684210526316e-06, "step": 1038 }, { "epoch": 2.729783037475345, "high_lr": 0.0004536842105263158, "low_lr": 9.073684210526316e-06, "step": 1038 }, { "epoch": 2.729783037475345, "high_lr": 0.0004536842105263158, "low_lr": 9.073684210526316e-06, "step": 1038 }, { "epoch": 2.729783037475345, "high_lr": 0.0004536842105263158, "low_lr": 9.073684210526316e-06, "step": 1038 }, { "epoch": 2.729783037475345, "high_lr": 0.0004536842105263158, "low_lr": 9.073684210526316e-06, "step": 1038 }, { "epoch": 2.73241288625904, "grad_norm": 1.2281653881072998, "learning_rate": 0.0004531578947368421, "loss": 1.3917, "step": 1039 }, { "epoch": 2.73241288625904, "high_lr": 0.0004531578947368421, "low_lr": 9.063157894736842e-06, "step": 1039 }, { "epoch": 2.73241288625904, "high_lr": 0.0004531578947368421, "low_lr": 9.063157894736842e-06, "step": 1039 }, { "epoch": 2.73241288625904, "high_lr": 0.0004531578947368421, "low_lr": 9.063157894736842e-06, "step": 1039 }, { "epoch": 2.73241288625904, "high_lr": 0.0004531578947368421, "low_lr": 9.063157894736842e-06, "step": 1039 }, { "epoch": 2.73241288625904, "high_lr": 0.0004531578947368421, "low_lr": 9.063157894736842e-06, "step": 1039 }, { "epoch": 2.73241288625904, "high_lr": 0.0004531578947368421, "low_lr": 9.063157894736842e-06, "step": 1039 }, { "epoch": 2.73241288625904, "high_lr": 0.0004531578947368421, "low_lr": 9.063157894736842e-06, "step": 1039 }, { "epoch": 2.73241288625904, "high_lr": 0.0004531578947368421, "low_lr": 9.063157894736842e-06, "step": 1039 }, { "epoch": 2.735042735042735, "grad_norm": 1.2144098281860352, "learning_rate": 0.00045263157894736845, "loss": 1.3998, "step": 1040 }, { "epoch": 2.735042735042735, "high_lr": 0.00045263157894736845, "low_lr": 9.05263157894737e-06, "step": 1040 }, { "epoch": 2.735042735042735, "high_lr": 0.00045263157894736845, "low_lr": 9.05263157894737e-06, "step": 1040 }, { "epoch": 2.735042735042735, "high_lr": 0.00045263157894736845, "low_lr": 9.05263157894737e-06, "step": 1040 }, { "epoch": 2.735042735042735, "high_lr": 0.00045263157894736845, "low_lr": 9.05263157894737e-06, "step": 1040 }, { "epoch": 2.735042735042735, "high_lr": 0.00045263157894736845, "low_lr": 9.05263157894737e-06, "step": 1040 }, { "epoch": 2.735042735042735, "high_lr": 0.00045263157894736845, "low_lr": 9.05263157894737e-06, "step": 1040 }, { "epoch": 2.735042735042735, "high_lr": 0.00045263157894736845, "low_lr": 9.05263157894737e-06, "step": 1040 }, { "epoch": 2.735042735042735, "high_lr": 0.00045263157894736845, "low_lr": 9.05263157894737e-06, "step": 1040 }, { "epoch": 2.7376725838264298, "grad_norm": 1.2607382535934448, "learning_rate": 0.00045210526315789474, "loss": 1.3706, "step": 1041 }, { "epoch": 2.7376725838264298, "high_lr": 0.00045210526315789474, "low_lr": 9.042105263157895e-06, "step": 1041 }, { "epoch": 2.7376725838264298, "high_lr": 0.00045210526315789474, "low_lr": 9.042105263157895e-06, "step": 1041 }, { "epoch": 2.7376725838264298, "high_lr": 0.00045210526315789474, "low_lr": 9.042105263157895e-06, "step": 1041 }, { "epoch": 2.7376725838264298, "high_lr": 0.00045210526315789474, "low_lr": 9.042105263157895e-06, "step": 1041 }, { "epoch": 2.7376725838264298, "high_lr": 0.00045210526315789474, "low_lr": 9.042105263157895e-06, "step": 1041 }, { "epoch": 2.7376725838264298, "high_lr": 0.00045210526315789474, "low_lr": 9.042105263157895e-06, "step": 1041 }, { "epoch": 2.7376725838264298, "high_lr": 0.00045210526315789474, "low_lr": 9.042105263157895e-06, "step": 1041 }, { "epoch": 2.7376725838264298, "high_lr": 0.00045210526315789474, "low_lr": 9.042105263157895e-06, "step": 1041 }, { "epoch": 2.740302432610125, "grad_norm": 1.1702170372009277, "learning_rate": 0.0004515789473684211, "loss": 1.3604, "step": 1042 }, { "epoch": 2.740302432610125, "high_lr": 0.0004515789473684211, "low_lr": 9.031578947368423e-06, "step": 1042 }, { "epoch": 2.740302432610125, "high_lr": 0.0004515789473684211, "low_lr": 9.031578947368423e-06, "step": 1042 }, { "epoch": 2.740302432610125, "high_lr": 0.0004515789473684211, "low_lr": 9.031578947368423e-06, "step": 1042 }, { "epoch": 2.740302432610125, "high_lr": 0.0004515789473684211, "low_lr": 9.031578947368423e-06, "step": 1042 }, { "epoch": 2.740302432610125, "high_lr": 0.0004515789473684211, "low_lr": 9.031578947368423e-06, "step": 1042 }, { "epoch": 2.740302432610125, "high_lr": 0.0004515789473684211, "low_lr": 9.031578947368423e-06, "step": 1042 }, { "epoch": 2.740302432610125, "high_lr": 0.0004515789473684211, "low_lr": 9.031578947368423e-06, "step": 1042 }, { "epoch": 2.740302432610125, "high_lr": 0.0004515789473684211, "low_lr": 9.031578947368423e-06, "step": 1042 }, { "epoch": 2.74293228139382, "grad_norm": 1.2794098854064941, "learning_rate": 0.00045105263157894736, "loss": 1.4444, "step": 1043 }, { "epoch": 2.74293228139382, "high_lr": 0.00045105263157894736, "low_lr": 9.021052631578948e-06, "step": 1043 }, { "epoch": 2.74293228139382, "high_lr": 0.00045105263157894736, "low_lr": 9.021052631578948e-06, "step": 1043 }, { "epoch": 2.74293228139382, "high_lr": 0.00045105263157894736, "low_lr": 9.021052631578948e-06, "step": 1043 }, { "epoch": 2.74293228139382, "high_lr": 0.00045105263157894736, "low_lr": 9.021052631578948e-06, "step": 1043 }, { "epoch": 2.74293228139382, "high_lr": 0.00045105263157894736, "low_lr": 9.021052631578948e-06, "step": 1043 }, { "epoch": 2.74293228139382, "high_lr": 0.00045105263157894736, "low_lr": 9.021052631578948e-06, "step": 1043 }, { "epoch": 2.74293228139382, "high_lr": 0.00045105263157894736, "low_lr": 9.021052631578948e-06, "step": 1043 }, { "epoch": 2.74293228139382, "high_lr": 0.00045105263157894736, "low_lr": 9.021052631578948e-06, "step": 1043 }, { "epoch": 2.7455621301775146, "grad_norm": 1.275855541229248, "learning_rate": 0.00045052631578947365, "loss": 1.4003, "step": 1044 }, { "epoch": 2.7455621301775146, "high_lr": 0.00045052631578947365, "low_lr": 9.010526315789474e-06, "step": 1044 }, { "epoch": 2.7455621301775146, "high_lr": 0.00045052631578947365, "low_lr": 9.010526315789474e-06, "step": 1044 }, { "epoch": 2.7455621301775146, "high_lr": 0.00045052631578947365, "low_lr": 9.010526315789474e-06, "step": 1044 }, { "epoch": 2.7455621301775146, "high_lr": 0.00045052631578947365, "low_lr": 9.010526315789474e-06, "step": 1044 }, { "epoch": 2.7455621301775146, "high_lr": 0.00045052631578947365, "low_lr": 9.010526315789474e-06, "step": 1044 }, { "epoch": 2.7455621301775146, "high_lr": 0.00045052631578947365, "low_lr": 9.010526315789474e-06, "step": 1044 }, { "epoch": 2.7455621301775146, "high_lr": 0.00045052631578947365, "low_lr": 9.010526315789474e-06, "step": 1044 }, { "epoch": 2.7455621301775146, "high_lr": 0.00045052631578947365, "low_lr": 9.010526315789474e-06, "step": 1044 }, { "epoch": 2.7481919789612097, "grad_norm": 1.3354053497314453, "learning_rate": 0.00045000000000000004, "loss": 1.3732, "step": 1045 }, { "epoch": 2.7481919789612097, "high_lr": 0.00045000000000000004, "low_lr": 9e-06, "step": 1045 }, { "epoch": 2.7481919789612097, "high_lr": 0.00045000000000000004, "low_lr": 9e-06, "step": 1045 }, { "epoch": 2.7481919789612097, "high_lr": 0.00045000000000000004, "low_lr": 9e-06, "step": 1045 }, { "epoch": 2.7481919789612097, "high_lr": 0.00045000000000000004, "low_lr": 9e-06, "step": 1045 }, { "epoch": 2.7481919789612097, "high_lr": 0.00045000000000000004, "low_lr": 9e-06, "step": 1045 }, { "epoch": 2.7481919789612097, "high_lr": 0.00045000000000000004, "low_lr": 9e-06, "step": 1045 }, { "epoch": 2.7481919789612097, "high_lr": 0.00045000000000000004, "low_lr": 9e-06, "step": 1045 }, { "epoch": 2.7481919789612097, "high_lr": 0.00045000000000000004, "low_lr": 9e-06, "step": 1045 }, { "epoch": 2.750821827744905, "grad_norm": 1.233538031578064, "learning_rate": 0.00044947368421052633, "loss": 1.365, "step": 1046 }, { "epoch": 2.750821827744905, "high_lr": 0.00044947368421052633, "low_lr": 8.989473684210527e-06, "step": 1046 }, { "epoch": 2.750821827744905, "high_lr": 0.00044947368421052633, "low_lr": 8.989473684210527e-06, "step": 1046 }, { "epoch": 2.750821827744905, "high_lr": 0.00044947368421052633, "low_lr": 8.989473684210527e-06, "step": 1046 }, { "epoch": 2.750821827744905, "high_lr": 0.00044947368421052633, "low_lr": 8.989473684210527e-06, "step": 1046 }, { "epoch": 2.750821827744905, "high_lr": 0.00044947368421052633, "low_lr": 8.989473684210527e-06, "step": 1046 }, { "epoch": 2.750821827744905, "high_lr": 0.00044947368421052633, "low_lr": 8.989473684210527e-06, "step": 1046 }, { "epoch": 2.750821827744905, "high_lr": 0.00044947368421052633, "low_lr": 8.989473684210527e-06, "step": 1046 }, { "epoch": 2.750821827744905, "high_lr": 0.00044947368421052633, "low_lr": 8.989473684210527e-06, "step": 1046 }, { "epoch": 2.7534516765285995, "grad_norm": 1.243747353553772, "learning_rate": 0.00044894736842105267, "loss": 1.3859, "step": 1047 }, { "epoch": 2.7534516765285995, "high_lr": 0.00044894736842105267, "low_lr": 8.978947368421055e-06, "step": 1047 }, { "epoch": 2.7534516765285995, "high_lr": 0.00044894736842105267, "low_lr": 8.978947368421055e-06, "step": 1047 }, { "epoch": 2.7534516765285995, "high_lr": 0.00044894736842105267, "low_lr": 8.978947368421055e-06, "step": 1047 }, { "epoch": 2.7534516765285995, "high_lr": 0.00044894736842105267, "low_lr": 8.978947368421055e-06, "step": 1047 }, { "epoch": 2.7534516765285995, "high_lr": 0.00044894736842105267, "low_lr": 8.978947368421055e-06, "step": 1047 }, { "epoch": 2.7534516765285995, "high_lr": 0.00044894736842105267, "low_lr": 8.978947368421055e-06, "step": 1047 }, { "epoch": 2.7534516765285995, "high_lr": 0.00044894736842105267, "low_lr": 8.978947368421055e-06, "step": 1047 }, { "epoch": 2.7534516765285995, "high_lr": 0.00044894736842105267, "low_lr": 8.978947368421055e-06, "step": 1047 }, { "epoch": 2.7560815253122946, "grad_norm": 1.3367723226547241, "learning_rate": 0.00044842105263157895, "loss": 1.3905, "step": 1048 }, { "epoch": 2.7560815253122946, "high_lr": 0.00044842105263157895, "low_lr": 8.96842105263158e-06, "step": 1048 }, { "epoch": 2.7560815253122946, "high_lr": 0.00044842105263157895, "low_lr": 8.96842105263158e-06, "step": 1048 }, { "epoch": 2.7560815253122946, "high_lr": 0.00044842105263157895, "low_lr": 8.96842105263158e-06, "step": 1048 }, { "epoch": 2.7560815253122946, "high_lr": 0.00044842105263157895, "low_lr": 8.96842105263158e-06, "step": 1048 }, { "epoch": 2.7560815253122946, "high_lr": 0.00044842105263157895, "low_lr": 8.96842105263158e-06, "step": 1048 }, { "epoch": 2.7560815253122946, "high_lr": 0.00044842105263157895, "low_lr": 8.96842105263158e-06, "step": 1048 }, { "epoch": 2.7560815253122946, "high_lr": 0.00044842105263157895, "low_lr": 8.96842105263158e-06, "step": 1048 }, { "epoch": 2.7560815253122946, "high_lr": 0.00044842105263157895, "low_lr": 8.96842105263158e-06, "step": 1048 }, { "epoch": 2.7587113740959897, "grad_norm": 1.2546968460083008, "learning_rate": 0.0004478947368421053, "loss": 1.4116, "step": 1049 }, { "epoch": 2.7587113740959897, "high_lr": 0.0004478947368421053, "low_lr": 8.957894736842107e-06, "step": 1049 }, { "epoch": 2.7587113740959897, "high_lr": 0.0004478947368421053, "low_lr": 8.957894736842107e-06, "step": 1049 }, { "epoch": 2.7587113740959897, "high_lr": 0.0004478947368421053, "low_lr": 8.957894736842107e-06, "step": 1049 }, { "epoch": 2.7587113740959897, "high_lr": 0.0004478947368421053, "low_lr": 8.957894736842107e-06, "step": 1049 }, { "epoch": 2.7587113740959897, "high_lr": 0.0004478947368421053, "low_lr": 8.957894736842107e-06, "step": 1049 }, { "epoch": 2.7587113740959897, "high_lr": 0.0004478947368421053, "low_lr": 8.957894736842107e-06, "step": 1049 }, { "epoch": 2.7587113740959897, "high_lr": 0.0004478947368421053, "low_lr": 8.957894736842107e-06, "step": 1049 }, { "epoch": 2.7587113740959897, "high_lr": 0.0004478947368421053, "low_lr": 8.957894736842107e-06, "step": 1049 }, { "epoch": 2.7613412228796843, "grad_norm": 1.2776412963867188, "learning_rate": 0.0004473684210526316, "loss": 1.3674, "step": 1050 }, { "epoch": 2.7613412228796843, "high_lr": 0.0004473684210526316, "low_lr": 8.947368421052632e-06, "step": 1050 }, { "epoch": 2.7613412228796843, "high_lr": 0.0004473684210526316, "low_lr": 8.947368421052632e-06, "step": 1050 }, { "epoch": 2.7613412228796843, "high_lr": 0.0004473684210526316, "low_lr": 8.947368421052632e-06, "step": 1050 }, { "epoch": 2.7613412228796843, "high_lr": 0.0004473684210526316, "low_lr": 8.947368421052632e-06, "step": 1050 }, { "epoch": 2.7613412228796843, "high_lr": 0.0004473684210526316, "low_lr": 8.947368421052632e-06, "step": 1050 }, { "epoch": 2.7613412228796843, "high_lr": 0.0004473684210526316, "low_lr": 8.947368421052632e-06, "step": 1050 }, { "epoch": 2.7613412228796843, "high_lr": 0.0004473684210526316, "low_lr": 8.947368421052632e-06, "step": 1050 }, { "epoch": 2.7613412228796843, "high_lr": 0.0004473684210526316, "low_lr": 8.947368421052632e-06, "step": 1050 }, { "epoch": 2.7639710716633794, "grad_norm": 1.3470097780227661, "learning_rate": 0.00044684210526315787, "loss": 1.3553, "step": 1051 }, { "epoch": 2.7639710716633794, "high_lr": 0.00044684210526315787, "low_lr": 8.936842105263158e-06, "step": 1051 }, { "epoch": 2.7639710716633794, "high_lr": 0.00044684210526315787, "low_lr": 8.936842105263158e-06, "step": 1051 }, { "epoch": 2.7639710716633794, "high_lr": 0.00044684210526315787, "low_lr": 8.936842105263158e-06, "step": 1051 }, { "epoch": 2.7639710716633794, "high_lr": 0.00044684210526315787, "low_lr": 8.936842105263158e-06, "step": 1051 }, { "epoch": 2.7639710716633794, "high_lr": 0.00044684210526315787, "low_lr": 8.936842105263158e-06, "step": 1051 }, { "epoch": 2.7639710716633794, "high_lr": 0.00044684210526315787, "low_lr": 8.936842105263158e-06, "step": 1051 }, { "epoch": 2.7639710716633794, "high_lr": 0.00044684210526315787, "low_lr": 8.936842105263158e-06, "step": 1051 }, { "epoch": 2.7639710716633794, "high_lr": 0.00044684210526315787, "low_lr": 8.936842105263158e-06, "step": 1051 }, { "epoch": 2.7666009204470745, "grad_norm": 1.3481202125549316, "learning_rate": 0.0004463157894736842, "loss": 1.4003, "step": 1052 }, { "epoch": 2.7666009204470745, "high_lr": 0.0004463157894736842, "low_lr": 8.926315789473685e-06, "step": 1052 }, { "epoch": 2.7666009204470745, "high_lr": 0.0004463157894736842, "low_lr": 8.926315789473685e-06, "step": 1052 }, { "epoch": 2.7666009204470745, "high_lr": 0.0004463157894736842, "low_lr": 8.926315789473685e-06, "step": 1052 }, { "epoch": 2.7666009204470745, "high_lr": 0.0004463157894736842, "low_lr": 8.926315789473685e-06, "step": 1052 }, { "epoch": 2.7666009204470745, "high_lr": 0.0004463157894736842, "low_lr": 8.926315789473685e-06, "step": 1052 }, { "epoch": 2.7666009204470745, "high_lr": 0.0004463157894736842, "low_lr": 8.926315789473685e-06, "step": 1052 }, { "epoch": 2.7666009204470745, "high_lr": 0.0004463157894736842, "low_lr": 8.926315789473685e-06, "step": 1052 }, { "epoch": 2.7666009204470745, "high_lr": 0.0004463157894736842, "low_lr": 8.926315789473685e-06, "step": 1052 }, { "epoch": 2.769230769230769, "grad_norm": 1.2733091115951538, "learning_rate": 0.00044578947368421055, "loss": 1.3668, "step": 1053 }, { "epoch": 2.769230769230769, "high_lr": 0.00044578947368421055, "low_lr": 8.915789473684211e-06, "step": 1053 }, { "epoch": 2.769230769230769, "high_lr": 0.00044578947368421055, "low_lr": 8.915789473684211e-06, "step": 1053 }, { "epoch": 2.769230769230769, "high_lr": 0.00044578947368421055, "low_lr": 8.915789473684211e-06, "step": 1053 }, { "epoch": 2.769230769230769, "high_lr": 0.00044578947368421055, "low_lr": 8.915789473684211e-06, "step": 1053 }, { "epoch": 2.769230769230769, "high_lr": 0.00044578947368421055, "low_lr": 8.915789473684211e-06, "step": 1053 }, { "epoch": 2.769230769230769, "high_lr": 0.00044578947368421055, "low_lr": 8.915789473684211e-06, "step": 1053 }, { "epoch": 2.769230769230769, "high_lr": 0.00044578947368421055, "low_lr": 8.915789473684211e-06, "step": 1053 }, { "epoch": 2.769230769230769, "high_lr": 0.00044578947368421055, "low_lr": 8.915789473684211e-06, "step": 1053 }, { "epoch": 2.7718606180144643, "grad_norm": 1.2438217401504517, "learning_rate": 0.0004452631578947369, "loss": 1.3692, "step": 1054 }, { "epoch": 2.7718606180144643, "high_lr": 0.0004452631578947369, "low_lr": 8.905263157894737e-06, "step": 1054 }, { "epoch": 2.7718606180144643, "high_lr": 0.0004452631578947369, "low_lr": 8.905263157894737e-06, "step": 1054 }, { "epoch": 2.7718606180144643, "high_lr": 0.0004452631578947369, "low_lr": 8.905263157894737e-06, "step": 1054 }, { "epoch": 2.7718606180144643, "high_lr": 0.0004452631578947369, "low_lr": 8.905263157894737e-06, "step": 1054 }, { "epoch": 2.7718606180144643, "high_lr": 0.0004452631578947369, "low_lr": 8.905263157894737e-06, "step": 1054 }, { "epoch": 2.7718606180144643, "high_lr": 0.0004452631578947369, "low_lr": 8.905263157894737e-06, "step": 1054 }, { "epoch": 2.7718606180144643, "high_lr": 0.0004452631578947369, "low_lr": 8.905263157894737e-06, "step": 1054 }, { "epoch": 2.7718606180144643, "high_lr": 0.0004452631578947369, "low_lr": 8.905263157894737e-06, "step": 1054 }, { "epoch": 2.7744904667981594, "grad_norm": 1.2441380023956299, "learning_rate": 0.00044473684210526317, "loss": 1.4018, "step": 1055 }, { "epoch": 2.7744904667981594, "high_lr": 0.00044473684210526317, "low_lr": 8.894736842105264e-06, "step": 1055 }, { "epoch": 2.7744904667981594, "high_lr": 0.00044473684210526317, "low_lr": 8.894736842105264e-06, "step": 1055 }, { "epoch": 2.7744904667981594, "high_lr": 0.00044473684210526317, "low_lr": 8.894736842105264e-06, "step": 1055 }, { "epoch": 2.7744904667981594, "high_lr": 0.00044473684210526317, "low_lr": 8.894736842105264e-06, "step": 1055 }, { "epoch": 2.7744904667981594, "high_lr": 0.00044473684210526317, "low_lr": 8.894736842105264e-06, "step": 1055 }, { "epoch": 2.7744904667981594, "high_lr": 0.00044473684210526317, "low_lr": 8.894736842105264e-06, "step": 1055 }, { "epoch": 2.7744904667981594, "high_lr": 0.00044473684210526317, "low_lr": 8.894736842105264e-06, "step": 1055 }, { "epoch": 2.7744904667981594, "high_lr": 0.00044473684210526317, "low_lr": 8.894736842105264e-06, "step": 1055 }, { "epoch": 2.777120315581854, "grad_norm": 1.2575567960739136, "learning_rate": 0.0004442105263157895, "loss": 1.4044, "step": 1056 }, { "epoch": 2.777120315581854, "high_lr": 0.0004442105263157895, "low_lr": 8.884210526315792e-06, "step": 1056 }, { "epoch": 2.777120315581854, "high_lr": 0.0004442105263157895, "low_lr": 8.884210526315792e-06, "step": 1056 }, { "epoch": 2.777120315581854, "high_lr": 0.0004442105263157895, "low_lr": 8.884210526315792e-06, "step": 1056 }, { "epoch": 2.777120315581854, "high_lr": 0.0004442105263157895, "low_lr": 8.884210526315792e-06, "step": 1056 }, { "epoch": 2.777120315581854, "high_lr": 0.0004442105263157895, "low_lr": 8.884210526315792e-06, "step": 1056 }, { "epoch": 2.777120315581854, "high_lr": 0.0004442105263157895, "low_lr": 8.884210526315792e-06, "step": 1056 }, { "epoch": 2.777120315581854, "high_lr": 0.0004442105263157895, "low_lr": 8.884210526315792e-06, "step": 1056 }, { "epoch": 2.777120315581854, "high_lr": 0.0004442105263157895, "low_lr": 8.884210526315792e-06, "step": 1056 }, { "epoch": 2.779750164365549, "grad_norm": 1.360580325126648, "learning_rate": 0.0004436842105263158, "loss": 1.4391, "step": 1057 }, { "epoch": 2.779750164365549, "high_lr": 0.0004436842105263158, "low_lr": 8.873684210526316e-06, "step": 1057 }, { "epoch": 2.779750164365549, "high_lr": 0.0004436842105263158, "low_lr": 8.873684210526316e-06, "step": 1057 }, { "epoch": 2.779750164365549, "high_lr": 0.0004436842105263158, "low_lr": 8.873684210526316e-06, "step": 1057 }, { "epoch": 2.779750164365549, "high_lr": 0.0004436842105263158, "low_lr": 8.873684210526316e-06, "step": 1057 }, { "epoch": 2.779750164365549, "high_lr": 0.0004436842105263158, "low_lr": 8.873684210526316e-06, "step": 1057 }, { "epoch": 2.779750164365549, "high_lr": 0.0004436842105263158, "low_lr": 8.873684210526316e-06, "step": 1057 }, { "epoch": 2.779750164365549, "high_lr": 0.0004436842105263158, "low_lr": 8.873684210526316e-06, "step": 1057 }, { "epoch": 2.779750164365549, "high_lr": 0.0004436842105263158, "low_lr": 8.873684210526316e-06, "step": 1057 }, { "epoch": 2.7823800131492438, "grad_norm": 1.2910478115081787, "learning_rate": 0.0004431578947368421, "loss": 1.3596, "step": 1058 }, { "epoch": 2.7823800131492438, "high_lr": 0.0004431578947368421, "low_lr": 8.863157894736842e-06, "step": 1058 }, { "epoch": 2.7823800131492438, "high_lr": 0.0004431578947368421, "low_lr": 8.863157894736842e-06, "step": 1058 }, { "epoch": 2.7823800131492438, "high_lr": 0.0004431578947368421, "low_lr": 8.863157894736842e-06, "step": 1058 }, { "epoch": 2.7823800131492438, "high_lr": 0.0004431578947368421, "low_lr": 8.863157894736842e-06, "step": 1058 }, { "epoch": 2.7823800131492438, "high_lr": 0.0004431578947368421, "low_lr": 8.863157894736842e-06, "step": 1058 }, { "epoch": 2.7823800131492438, "high_lr": 0.0004431578947368421, "low_lr": 8.863157894736842e-06, "step": 1058 }, { "epoch": 2.7823800131492438, "high_lr": 0.0004431578947368421, "low_lr": 8.863157894736842e-06, "step": 1058 }, { "epoch": 2.7823800131492438, "high_lr": 0.0004431578947368421, "low_lr": 8.863157894736842e-06, "step": 1058 }, { "epoch": 2.785009861932939, "grad_norm": 1.2490891218185425, "learning_rate": 0.0004426315789473684, "loss": 1.3702, "step": 1059 }, { "epoch": 2.785009861932939, "high_lr": 0.0004426315789473684, "low_lr": 8.852631578947369e-06, "step": 1059 }, { "epoch": 2.785009861932939, "high_lr": 0.0004426315789473684, "low_lr": 8.852631578947369e-06, "step": 1059 }, { "epoch": 2.785009861932939, "high_lr": 0.0004426315789473684, "low_lr": 8.852631578947369e-06, "step": 1059 }, { "epoch": 2.785009861932939, "high_lr": 0.0004426315789473684, "low_lr": 8.852631578947369e-06, "step": 1059 }, { "epoch": 2.785009861932939, "high_lr": 0.0004426315789473684, "low_lr": 8.852631578947369e-06, "step": 1059 }, { "epoch": 2.785009861932939, "high_lr": 0.0004426315789473684, "low_lr": 8.852631578947369e-06, "step": 1059 }, { "epoch": 2.785009861932939, "high_lr": 0.0004426315789473684, "low_lr": 8.852631578947369e-06, "step": 1059 }, { "epoch": 2.785009861932939, "high_lr": 0.0004426315789473684, "low_lr": 8.852631578947369e-06, "step": 1059 }, { "epoch": 2.7876397107166335, "grad_norm": 1.3067238330841064, "learning_rate": 0.0004421052631578947, "loss": 1.4366, "step": 1060 }, { "epoch": 2.7876397107166335, "high_lr": 0.0004421052631578947, "low_lr": 8.842105263157895e-06, "step": 1060 }, { "epoch": 2.7876397107166335, "high_lr": 0.0004421052631578947, "low_lr": 8.842105263157895e-06, "step": 1060 }, { "epoch": 2.7876397107166335, "high_lr": 0.0004421052631578947, "low_lr": 8.842105263157895e-06, "step": 1060 }, { "epoch": 2.7876397107166335, "high_lr": 0.0004421052631578947, "low_lr": 8.842105263157895e-06, "step": 1060 }, { "epoch": 2.7876397107166335, "high_lr": 0.0004421052631578947, "low_lr": 8.842105263157895e-06, "step": 1060 }, { "epoch": 2.7876397107166335, "high_lr": 0.0004421052631578947, "low_lr": 8.842105263157895e-06, "step": 1060 }, { "epoch": 2.7876397107166335, "high_lr": 0.0004421052631578947, "low_lr": 8.842105263157895e-06, "step": 1060 }, { "epoch": 2.7876397107166335, "high_lr": 0.0004421052631578947, "low_lr": 8.842105263157895e-06, "step": 1060 }, { "epoch": 2.7902695595003286, "grad_norm": 1.3038711547851562, "learning_rate": 0.0004415789473684211, "loss": 1.3697, "step": 1061 }, { "epoch": 2.7902695595003286, "high_lr": 0.0004415789473684211, "low_lr": 8.831578947368421e-06, "step": 1061 }, { "epoch": 2.7902695595003286, "high_lr": 0.0004415789473684211, "low_lr": 8.831578947368421e-06, "step": 1061 }, { "epoch": 2.7902695595003286, "high_lr": 0.0004415789473684211, "low_lr": 8.831578947368421e-06, "step": 1061 }, { "epoch": 2.7902695595003286, "high_lr": 0.0004415789473684211, "low_lr": 8.831578947368421e-06, "step": 1061 }, { "epoch": 2.7902695595003286, "high_lr": 0.0004415789473684211, "low_lr": 8.831578947368421e-06, "step": 1061 }, { "epoch": 2.7902695595003286, "high_lr": 0.0004415789473684211, "low_lr": 8.831578947368421e-06, "step": 1061 }, { "epoch": 2.7902695595003286, "high_lr": 0.0004415789473684211, "low_lr": 8.831578947368421e-06, "step": 1061 }, { "epoch": 2.7902695595003286, "high_lr": 0.0004415789473684211, "low_lr": 8.831578947368421e-06, "step": 1061 }, { "epoch": 2.7928994082840237, "grad_norm": 1.3181500434875488, "learning_rate": 0.0004410526315789474, "loss": 1.4253, "step": 1062 }, { "epoch": 2.7928994082840237, "high_lr": 0.0004410526315789474, "low_lr": 8.821052631578948e-06, "step": 1062 }, { "epoch": 2.7928994082840237, "high_lr": 0.0004410526315789474, "low_lr": 8.821052631578948e-06, "step": 1062 }, { "epoch": 2.7928994082840237, "high_lr": 0.0004410526315789474, "low_lr": 8.821052631578948e-06, "step": 1062 }, { "epoch": 2.7928994082840237, "high_lr": 0.0004410526315789474, "low_lr": 8.821052631578948e-06, "step": 1062 }, { "epoch": 2.7928994082840237, "high_lr": 0.0004410526315789474, "low_lr": 8.821052631578948e-06, "step": 1062 }, { "epoch": 2.7928994082840237, "high_lr": 0.0004410526315789474, "low_lr": 8.821052631578948e-06, "step": 1062 }, { "epoch": 2.7928994082840237, "high_lr": 0.0004410526315789474, "low_lr": 8.821052631578948e-06, "step": 1062 }, { "epoch": 2.7928994082840237, "high_lr": 0.0004410526315789474, "low_lr": 8.821052631578948e-06, "step": 1062 }, { "epoch": 2.7955292570677184, "grad_norm": 1.366966962814331, "learning_rate": 0.00044052631578947373, "loss": 1.4173, "step": 1063 }, { "epoch": 2.7955292570677184, "high_lr": 0.00044052631578947373, "low_lr": 8.810526315789474e-06, "step": 1063 }, { "epoch": 2.7955292570677184, "high_lr": 0.00044052631578947373, "low_lr": 8.810526315789474e-06, "step": 1063 }, { "epoch": 2.7955292570677184, "high_lr": 0.00044052631578947373, "low_lr": 8.810526315789474e-06, "step": 1063 }, { "epoch": 2.7955292570677184, "high_lr": 0.00044052631578947373, "low_lr": 8.810526315789474e-06, "step": 1063 }, { "epoch": 2.7955292570677184, "high_lr": 0.00044052631578947373, "low_lr": 8.810526315789474e-06, "step": 1063 }, { "epoch": 2.7955292570677184, "high_lr": 0.00044052631578947373, "low_lr": 8.810526315789474e-06, "step": 1063 }, { "epoch": 2.7955292570677184, "high_lr": 0.00044052631578947373, "low_lr": 8.810526315789474e-06, "step": 1063 }, { "epoch": 2.7955292570677184, "high_lr": 0.00044052631578947373, "low_lr": 8.810526315789474e-06, "step": 1063 }, { "epoch": 2.7981591058514135, "grad_norm": 1.249804139137268, "learning_rate": 0.00044, "loss": 1.4177, "step": 1064 }, { "epoch": 2.7981591058514135, "high_lr": 0.00044, "low_lr": 8.8e-06, "step": 1064 }, { "epoch": 2.7981591058514135, "high_lr": 0.00044, "low_lr": 8.8e-06, "step": 1064 }, { "epoch": 2.7981591058514135, "high_lr": 0.00044, "low_lr": 8.8e-06, "step": 1064 }, { "epoch": 2.7981591058514135, "high_lr": 0.00044, "low_lr": 8.8e-06, "step": 1064 }, { "epoch": 2.7981591058514135, "high_lr": 0.00044, "low_lr": 8.8e-06, "step": 1064 }, { "epoch": 2.7981591058514135, "high_lr": 0.00044, "low_lr": 8.8e-06, "step": 1064 }, { "epoch": 2.7981591058514135, "high_lr": 0.00044, "low_lr": 8.8e-06, "step": 1064 }, { "epoch": 2.7981591058514135, "high_lr": 0.00044, "low_lr": 8.8e-06, "step": 1064 }, { "epoch": 2.8007889546351086, "grad_norm": 1.2769664525985718, "learning_rate": 0.0004394736842105263, "loss": 1.3844, "step": 1065 }, { "epoch": 2.8007889546351086, "high_lr": 0.0004394736842105263, "low_lr": 8.789473684210527e-06, "step": 1065 }, { "epoch": 2.8007889546351086, "high_lr": 0.0004394736842105263, "low_lr": 8.789473684210527e-06, "step": 1065 }, { "epoch": 2.8007889546351086, "high_lr": 0.0004394736842105263, "low_lr": 8.789473684210527e-06, "step": 1065 }, { "epoch": 2.8007889546351086, "high_lr": 0.0004394736842105263, "low_lr": 8.789473684210527e-06, "step": 1065 }, { "epoch": 2.8007889546351086, "high_lr": 0.0004394736842105263, "low_lr": 8.789473684210527e-06, "step": 1065 }, { "epoch": 2.8007889546351086, "high_lr": 0.0004394736842105263, "low_lr": 8.789473684210527e-06, "step": 1065 }, { "epoch": 2.8007889546351086, "high_lr": 0.0004394736842105263, "low_lr": 8.789473684210527e-06, "step": 1065 }, { "epoch": 2.8007889546351086, "high_lr": 0.0004394736842105263, "low_lr": 8.789473684210527e-06, "step": 1065 }, { "epoch": 2.8034188034188032, "grad_norm": 1.2841724157333374, "learning_rate": 0.00043894736842105264, "loss": 1.3809, "step": 1066 }, { "epoch": 2.8034188034188032, "high_lr": 0.00043894736842105264, "low_lr": 8.778947368421053e-06, "step": 1066 }, { "epoch": 2.8034188034188032, "high_lr": 0.00043894736842105264, "low_lr": 8.778947368421053e-06, "step": 1066 }, { "epoch": 2.8034188034188032, "high_lr": 0.00043894736842105264, "low_lr": 8.778947368421053e-06, "step": 1066 }, { "epoch": 2.8034188034188032, "high_lr": 0.00043894736842105264, "low_lr": 8.778947368421053e-06, "step": 1066 }, { "epoch": 2.8034188034188032, "high_lr": 0.00043894736842105264, "low_lr": 8.778947368421053e-06, "step": 1066 }, { "epoch": 2.8034188034188032, "high_lr": 0.00043894736842105264, "low_lr": 8.778947368421053e-06, "step": 1066 }, { "epoch": 2.8034188034188032, "high_lr": 0.00043894736842105264, "low_lr": 8.778947368421053e-06, "step": 1066 }, { "epoch": 2.8034188034188032, "high_lr": 0.00043894736842105264, "low_lr": 8.778947368421053e-06, "step": 1066 }, { "epoch": 2.8060486522024983, "grad_norm": 1.3203274011611938, "learning_rate": 0.00043842105263157893, "loss": 1.3903, "step": 1067 }, { "epoch": 2.8060486522024983, "high_lr": 0.00043842105263157893, "low_lr": 8.76842105263158e-06, "step": 1067 }, { "epoch": 2.8060486522024983, "high_lr": 0.00043842105263157893, "low_lr": 8.76842105263158e-06, "step": 1067 }, { "epoch": 2.8060486522024983, "high_lr": 0.00043842105263157893, "low_lr": 8.76842105263158e-06, "step": 1067 }, { "epoch": 2.8060486522024983, "high_lr": 0.00043842105263157893, "low_lr": 8.76842105263158e-06, "step": 1067 }, { "epoch": 2.8060486522024983, "high_lr": 0.00043842105263157893, "low_lr": 8.76842105263158e-06, "step": 1067 }, { "epoch": 2.8060486522024983, "high_lr": 0.00043842105263157893, "low_lr": 8.76842105263158e-06, "step": 1067 }, { "epoch": 2.8060486522024983, "high_lr": 0.00043842105263157893, "low_lr": 8.76842105263158e-06, "step": 1067 }, { "epoch": 2.8060486522024983, "high_lr": 0.00043842105263157893, "low_lr": 8.76842105263158e-06, "step": 1067 }, { "epoch": 2.8086785009861934, "grad_norm": 1.3281534910202026, "learning_rate": 0.00043789473684210527, "loss": 1.3852, "step": 1068 }, { "epoch": 2.8086785009861934, "high_lr": 0.00043789473684210527, "low_lr": 8.757894736842106e-06, "step": 1068 }, { "epoch": 2.8086785009861934, "high_lr": 0.00043789473684210527, "low_lr": 8.757894736842106e-06, "step": 1068 }, { "epoch": 2.8086785009861934, "high_lr": 0.00043789473684210527, "low_lr": 8.757894736842106e-06, "step": 1068 }, { "epoch": 2.8086785009861934, "high_lr": 0.00043789473684210527, "low_lr": 8.757894736842106e-06, "step": 1068 }, { "epoch": 2.8086785009861934, "high_lr": 0.00043789473684210527, "low_lr": 8.757894736842106e-06, "step": 1068 }, { "epoch": 2.8086785009861934, "high_lr": 0.00043789473684210527, "low_lr": 8.757894736842106e-06, "step": 1068 }, { "epoch": 2.8086785009861934, "high_lr": 0.00043789473684210527, "low_lr": 8.757894736842106e-06, "step": 1068 }, { "epoch": 2.8086785009861934, "high_lr": 0.00043789473684210527, "low_lr": 8.757894736842106e-06, "step": 1068 }, { "epoch": 2.811308349769888, "grad_norm": 1.3264497518539429, "learning_rate": 0.0004373684210526316, "loss": 1.4132, "step": 1069 }, { "epoch": 2.811308349769888, "high_lr": 0.0004373684210526316, "low_lr": 8.747368421052632e-06, "step": 1069 }, { "epoch": 2.811308349769888, "high_lr": 0.0004373684210526316, "low_lr": 8.747368421052632e-06, "step": 1069 }, { "epoch": 2.811308349769888, "high_lr": 0.0004373684210526316, "low_lr": 8.747368421052632e-06, "step": 1069 }, { "epoch": 2.811308349769888, "high_lr": 0.0004373684210526316, "low_lr": 8.747368421052632e-06, "step": 1069 }, { "epoch": 2.811308349769888, "high_lr": 0.0004373684210526316, "low_lr": 8.747368421052632e-06, "step": 1069 }, { "epoch": 2.811308349769888, "high_lr": 0.0004373684210526316, "low_lr": 8.747368421052632e-06, "step": 1069 }, { "epoch": 2.811308349769888, "high_lr": 0.0004373684210526316, "low_lr": 8.747368421052632e-06, "step": 1069 }, { "epoch": 2.811308349769888, "high_lr": 0.0004373684210526316, "low_lr": 8.747368421052632e-06, "step": 1069 }, { "epoch": 2.813938198553583, "grad_norm": 1.288648247718811, "learning_rate": 0.00043684210526315795, "loss": 1.4039, "step": 1070 }, { "epoch": 2.813938198553583, "high_lr": 0.00043684210526315795, "low_lr": 8.736842105263158e-06, "step": 1070 }, { "epoch": 2.813938198553583, "high_lr": 0.00043684210526315795, "low_lr": 8.736842105263158e-06, "step": 1070 }, { "epoch": 2.813938198553583, "high_lr": 0.00043684210526315795, "low_lr": 8.736842105263158e-06, "step": 1070 }, { "epoch": 2.813938198553583, "high_lr": 0.00043684210526315795, "low_lr": 8.736842105263158e-06, "step": 1070 }, { "epoch": 2.813938198553583, "high_lr": 0.00043684210526315795, "low_lr": 8.736842105263158e-06, "step": 1070 }, { "epoch": 2.813938198553583, "high_lr": 0.00043684210526315795, "low_lr": 8.736842105263158e-06, "step": 1070 }, { "epoch": 2.813938198553583, "high_lr": 0.00043684210526315795, "low_lr": 8.736842105263158e-06, "step": 1070 }, { "epoch": 2.813938198553583, "high_lr": 0.00043684210526315795, "low_lr": 8.736842105263158e-06, "step": 1070 }, { "epoch": 2.8165680473372783, "grad_norm": 1.276823878288269, "learning_rate": 0.00043631578947368423, "loss": 1.377, "step": 1071 }, { "epoch": 2.8165680473372783, "high_lr": 0.00043631578947368423, "low_lr": 8.726315789473685e-06, "step": 1071 }, { "epoch": 2.8165680473372783, "high_lr": 0.00043631578947368423, "low_lr": 8.726315789473685e-06, "step": 1071 }, { "epoch": 2.8165680473372783, "high_lr": 0.00043631578947368423, "low_lr": 8.726315789473685e-06, "step": 1071 }, { "epoch": 2.8165680473372783, "high_lr": 0.00043631578947368423, "low_lr": 8.726315789473685e-06, "step": 1071 }, { "epoch": 2.8165680473372783, "high_lr": 0.00043631578947368423, "low_lr": 8.726315789473685e-06, "step": 1071 }, { "epoch": 2.8165680473372783, "high_lr": 0.00043631578947368423, "low_lr": 8.726315789473685e-06, "step": 1071 }, { "epoch": 2.8165680473372783, "high_lr": 0.00043631578947368423, "low_lr": 8.726315789473685e-06, "step": 1071 }, { "epoch": 2.8165680473372783, "high_lr": 0.00043631578947368423, "low_lr": 8.726315789473685e-06, "step": 1071 }, { "epoch": 2.819197896120973, "grad_norm": 1.3558621406555176, "learning_rate": 0.0004357894736842105, "loss": 1.399, "step": 1072 }, { "epoch": 2.819197896120973, "high_lr": 0.0004357894736842105, "low_lr": 8.715789473684211e-06, "step": 1072 }, { "epoch": 2.819197896120973, "high_lr": 0.0004357894736842105, "low_lr": 8.715789473684211e-06, "step": 1072 }, { "epoch": 2.819197896120973, "high_lr": 0.0004357894736842105, "low_lr": 8.715789473684211e-06, "step": 1072 }, { "epoch": 2.819197896120973, "high_lr": 0.0004357894736842105, "low_lr": 8.715789473684211e-06, "step": 1072 }, { "epoch": 2.819197896120973, "high_lr": 0.0004357894736842105, "low_lr": 8.715789473684211e-06, "step": 1072 }, { "epoch": 2.819197896120973, "high_lr": 0.0004357894736842105, "low_lr": 8.715789473684211e-06, "step": 1072 }, { "epoch": 2.819197896120973, "high_lr": 0.0004357894736842105, "low_lr": 8.715789473684211e-06, "step": 1072 }, { "epoch": 2.819197896120973, "high_lr": 0.0004357894736842105, "low_lr": 8.715789473684211e-06, "step": 1072 }, { "epoch": 2.821827744904668, "grad_norm": 1.3223730325698853, "learning_rate": 0.00043526315789473686, "loss": 1.3744, "step": 1073 }, { "epoch": 2.821827744904668, "high_lr": 0.00043526315789473686, "low_lr": 8.705263157894737e-06, "step": 1073 }, { "epoch": 2.821827744904668, "high_lr": 0.00043526315789473686, "low_lr": 8.705263157894737e-06, "step": 1073 }, { "epoch": 2.821827744904668, "high_lr": 0.00043526315789473686, "low_lr": 8.705263157894737e-06, "step": 1073 }, { "epoch": 2.821827744904668, "high_lr": 0.00043526315789473686, "low_lr": 8.705263157894737e-06, "step": 1073 }, { "epoch": 2.821827744904668, "high_lr": 0.00043526315789473686, "low_lr": 8.705263157894737e-06, "step": 1073 }, { "epoch": 2.821827744904668, "high_lr": 0.00043526315789473686, "low_lr": 8.705263157894737e-06, "step": 1073 }, { "epoch": 2.821827744904668, "high_lr": 0.00043526315789473686, "low_lr": 8.705263157894737e-06, "step": 1073 }, { "epoch": 2.821827744904668, "high_lr": 0.00043526315789473686, "low_lr": 8.705263157894737e-06, "step": 1073 }, { "epoch": 2.824457593688363, "grad_norm": 1.284300446510315, "learning_rate": 0.00043473684210526315, "loss": 1.3657, "step": 1074 }, { "epoch": 2.824457593688363, "high_lr": 0.00043473684210526315, "low_lr": 8.694736842105264e-06, "step": 1074 }, { "epoch": 2.824457593688363, "high_lr": 0.00043473684210526315, "low_lr": 8.694736842105264e-06, "step": 1074 }, { "epoch": 2.824457593688363, "high_lr": 0.00043473684210526315, "low_lr": 8.694736842105264e-06, "step": 1074 }, { "epoch": 2.824457593688363, "high_lr": 0.00043473684210526315, "low_lr": 8.694736842105264e-06, "step": 1074 }, { "epoch": 2.824457593688363, "high_lr": 0.00043473684210526315, "low_lr": 8.694736842105264e-06, "step": 1074 }, { "epoch": 2.824457593688363, "high_lr": 0.00043473684210526315, "low_lr": 8.694736842105264e-06, "step": 1074 }, { "epoch": 2.824457593688363, "high_lr": 0.00043473684210526315, "low_lr": 8.694736842105264e-06, "step": 1074 }, { "epoch": 2.824457593688363, "high_lr": 0.00043473684210526315, "low_lr": 8.694736842105264e-06, "step": 1074 }, { "epoch": 2.827087442472058, "grad_norm": 1.3425698280334473, "learning_rate": 0.0004342105263157895, "loss": 1.3967, "step": 1075 }, { "epoch": 2.827087442472058, "high_lr": 0.0004342105263157895, "low_lr": 8.68421052631579e-06, "step": 1075 }, { "epoch": 2.827087442472058, "high_lr": 0.0004342105263157895, "low_lr": 8.68421052631579e-06, "step": 1075 }, { "epoch": 2.827087442472058, "high_lr": 0.0004342105263157895, "low_lr": 8.68421052631579e-06, "step": 1075 }, { "epoch": 2.827087442472058, "high_lr": 0.0004342105263157895, "low_lr": 8.68421052631579e-06, "step": 1075 }, { "epoch": 2.827087442472058, "high_lr": 0.0004342105263157895, "low_lr": 8.68421052631579e-06, "step": 1075 }, { "epoch": 2.827087442472058, "high_lr": 0.0004342105263157895, "low_lr": 8.68421052631579e-06, "step": 1075 }, { "epoch": 2.827087442472058, "high_lr": 0.0004342105263157895, "low_lr": 8.68421052631579e-06, "step": 1075 }, { "epoch": 2.827087442472058, "high_lr": 0.0004342105263157895, "low_lr": 8.68421052631579e-06, "step": 1075 }, { "epoch": 2.829717291255753, "grad_norm": 1.2884697914123535, "learning_rate": 0.00043368421052631577, "loss": 1.367, "step": 1076 }, { "epoch": 2.829717291255753, "high_lr": 0.00043368421052631577, "low_lr": 8.673684210526316e-06, "step": 1076 }, { "epoch": 2.829717291255753, "high_lr": 0.00043368421052631577, "low_lr": 8.673684210526316e-06, "step": 1076 }, { "epoch": 2.829717291255753, "high_lr": 0.00043368421052631577, "low_lr": 8.673684210526316e-06, "step": 1076 }, { "epoch": 2.829717291255753, "high_lr": 0.00043368421052631577, "low_lr": 8.673684210526316e-06, "step": 1076 }, { "epoch": 2.829717291255753, "high_lr": 0.00043368421052631577, "low_lr": 8.673684210526316e-06, "step": 1076 }, { "epoch": 2.829717291255753, "high_lr": 0.00043368421052631577, "low_lr": 8.673684210526316e-06, "step": 1076 }, { "epoch": 2.829717291255753, "high_lr": 0.00043368421052631577, "low_lr": 8.673684210526316e-06, "step": 1076 }, { "epoch": 2.829717291255753, "high_lr": 0.00043368421052631577, "low_lr": 8.673684210526316e-06, "step": 1076 }, { "epoch": 2.832347140039448, "grad_norm": 1.3022724390029907, "learning_rate": 0.00043315789473684217, "loss": 1.3843, "step": 1077 }, { "epoch": 2.832347140039448, "high_lr": 0.00043315789473684217, "low_lr": 8.663157894736843e-06, "step": 1077 }, { "epoch": 2.832347140039448, "high_lr": 0.00043315789473684217, "low_lr": 8.663157894736843e-06, "step": 1077 }, { "epoch": 2.832347140039448, "high_lr": 0.00043315789473684217, "low_lr": 8.663157894736843e-06, "step": 1077 }, { "epoch": 2.832347140039448, "high_lr": 0.00043315789473684217, "low_lr": 8.663157894736843e-06, "step": 1077 }, { "epoch": 2.832347140039448, "high_lr": 0.00043315789473684217, "low_lr": 8.663157894736843e-06, "step": 1077 }, { "epoch": 2.832347140039448, "high_lr": 0.00043315789473684217, "low_lr": 8.663157894736843e-06, "step": 1077 }, { "epoch": 2.832347140039448, "high_lr": 0.00043315789473684217, "low_lr": 8.663157894736843e-06, "step": 1077 }, { "epoch": 2.832347140039448, "high_lr": 0.00043315789473684217, "low_lr": 8.663157894736843e-06, "step": 1077 }, { "epoch": 2.8349769888231426, "grad_norm": 1.2575839757919312, "learning_rate": 0.00043263157894736845, "loss": 1.3433, "step": 1078 }, { "epoch": 2.8349769888231426, "high_lr": 0.00043263157894736845, "low_lr": 8.652631578947369e-06, "step": 1078 }, { "epoch": 2.8349769888231426, "high_lr": 0.00043263157894736845, "low_lr": 8.652631578947369e-06, "step": 1078 }, { "epoch": 2.8349769888231426, "high_lr": 0.00043263157894736845, "low_lr": 8.652631578947369e-06, "step": 1078 }, { "epoch": 2.8349769888231426, "high_lr": 0.00043263157894736845, "low_lr": 8.652631578947369e-06, "step": 1078 }, { "epoch": 2.8349769888231426, "high_lr": 0.00043263157894736845, "low_lr": 8.652631578947369e-06, "step": 1078 }, { "epoch": 2.8349769888231426, "high_lr": 0.00043263157894736845, "low_lr": 8.652631578947369e-06, "step": 1078 }, { "epoch": 2.8349769888231426, "high_lr": 0.00043263157894736845, "low_lr": 8.652631578947369e-06, "step": 1078 }, { "epoch": 2.8349769888231426, "high_lr": 0.00043263157894736845, "low_lr": 8.652631578947369e-06, "step": 1078 }, { "epoch": 2.8376068376068377, "grad_norm": 1.3376774787902832, "learning_rate": 0.00043210526315789474, "loss": 1.3808, "step": 1079 }, { "epoch": 2.8376068376068377, "high_lr": 0.00043210526315789474, "low_lr": 8.642105263157895e-06, "step": 1079 }, { "epoch": 2.8376068376068377, "high_lr": 0.00043210526315789474, "low_lr": 8.642105263157895e-06, "step": 1079 }, { "epoch": 2.8376068376068377, "high_lr": 0.00043210526315789474, "low_lr": 8.642105263157895e-06, "step": 1079 }, { "epoch": 2.8376068376068377, "high_lr": 0.00043210526315789474, "low_lr": 8.642105263157895e-06, "step": 1079 }, { "epoch": 2.8376068376068377, "high_lr": 0.00043210526315789474, "low_lr": 8.642105263157895e-06, "step": 1079 }, { "epoch": 2.8376068376068377, "high_lr": 0.00043210526315789474, "low_lr": 8.642105263157895e-06, "step": 1079 }, { "epoch": 2.8376068376068377, "high_lr": 0.00043210526315789474, "low_lr": 8.642105263157895e-06, "step": 1079 }, { "epoch": 2.8376068376068377, "high_lr": 0.00043210526315789474, "low_lr": 8.642105263157895e-06, "step": 1079 }, { "epoch": 2.8402366863905324, "grad_norm": 1.318920612335205, "learning_rate": 0.0004315789473684211, "loss": 1.4487, "step": 1080 }, { "epoch": 2.8402366863905324, "high_lr": 0.0004315789473684211, "low_lr": 8.631578947368422e-06, "step": 1080 }, { "epoch": 2.8402366863905324, "high_lr": 0.0004315789473684211, "low_lr": 8.631578947368422e-06, "step": 1080 }, { "epoch": 2.8402366863905324, "high_lr": 0.0004315789473684211, "low_lr": 8.631578947368422e-06, "step": 1080 }, { "epoch": 2.8402366863905324, "high_lr": 0.0004315789473684211, "low_lr": 8.631578947368422e-06, "step": 1080 }, { "epoch": 2.8402366863905324, "high_lr": 0.0004315789473684211, "low_lr": 8.631578947368422e-06, "step": 1080 }, { "epoch": 2.8402366863905324, "high_lr": 0.0004315789473684211, "low_lr": 8.631578947368422e-06, "step": 1080 }, { "epoch": 2.8402366863905324, "high_lr": 0.0004315789473684211, "low_lr": 8.631578947368422e-06, "step": 1080 }, { "epoch": 2.8402366863905324, "high_lr": 0.0004315789473684211, "low_lr": 8.631578947368422e-06, "step": 1080 }, { "epoch": 2.8428665351742275, "grad_norm": 1.413286566734314, "learning_rate": 0.00043105263157894736, "loss": 1.3623, "step": 1081 }, { "epoch": 2.8428665351742275, "high_lr": 0.00043105263157894736, "low_lr": 8.621052631578948e-06, "step": 1081 }, { "epoch": 2.8428665351742275, "high_lr": 0.00043105263157894736, "low_lr": 8.621052631578948e-06, "step": 1081 }, { "epoch": 2.8428665351742275, "high_lr": 0.00043105263157894736, "low_lr": 8.621052631578948e-06, "step": 1081 }, { "epoch": 2.8428665351742275, "high_lr": 0.00043105263157894736, "low_lr": 8.621052631578948e-06, "step": 1081 }, { "epoch": 2.8428665351742275, "high_lr": 0.00043105263157894736, "low_lr": 8.621052631578948e-06, "step": 1081 }, { "epoch": 2.8428665351742275, "high_lr": 0.00043105263157894736, "low_lr": 8.621052631578948e-06, "step": 1081 }, { "epoch": 2.8428665351742275, "high_lr": 0.00043105263157894736, "low_lr": 8.621052631578948e-06, "step": 1081 }, { "epoch": 2.8428665351742275, "high_lr": 0.00043105263157894736, "low_lr": 8.621052631578948e-06, "step": 1081 }, { "epoch": 2.845496383957922, "grad_norm": 1.2876933813095093, "learning_rate": 0.0004305263157894737, "loss": 1.38, "step": 1082 }, { "epoch": 2.845496383957922, "high_lr": 0.0004305263157894737, "low_lr": 8.610526315789474e-06, "step": 1082 }, { "epoch": 2.845496383957922, "high_lr": 0.0004305263157894737, "low_lr": 8.610526315789474e-06, "step": 1082 }, { "epoch": 2.845496383957922, "high_lr": 0.0004305263157894737, "low_lr": 8.610526315789474e-06, "step": 1082 }, { "epoch": 2.845496383957922, "high_lr": 0.0004305263157894737, "low_lr": 8.610526315789474e-06, "step": 1082 }, { "epoch": 2.845496383957922, "high_lr": 0.0004305263157894737, "low_lr": 8.610526315789474e-06, "step": 1082 }, { "epoch": 2.845496383957922, "high_lr": 0.0004305263157894737, "low_lr": 8.610526315789474e-06, "step": 1082 }, { "epoch": 2.845496383957922, "high_lr": 0.0004305263157894737, "low_lr": 8.610526315789474e-06, "step": 1082 }, { "epoch": 2.845496383957922, "high_lr": 0.0004305263157894737, "low_lr": 8.610526315789474e-06, "step": 1082 }, { "epoch": 2.8481262327416172, "grad_norm": 1.250273585319519, "learning_rate": 0.00043, "loss": 1.3492, "step": 1083 }, { "epoch": 2.8481262327416172, "high_lr": 0.00043, "low_lr": 8.6e-06, "step": 1083 }, { "epoch": 2.8481262327416172, "high_lr": 0.00043, "low_lr": 8.6e-06, "step": 1083 }, { "epoch": 2.8481262327416172, "high_lr": 0.00043, "low_lr": 8.6e-06, "step": 1083 }, { "epoch": 2.8481262327416172, "high_lr": 0.00043, "low_lr": 8.6e-06, "step": 1083 }, { "epoch": 2.8481262327416172, "high_lr": 0.00043, "low_lr": 8.6e-06, "step": 1083 }, { "epoch": 2.8481262327416172, "high_lr": 0.00043, "low_lr": 8.6e-06, "step": 1083 }, { "epoch": 2.8481262327416172, "high_lr": 0.00043, "low_lr": 8.6e-06, "step": 1083 }, { "epoch": 2.8481262327416172, "high_lr": 0.00043, "low_lr": 8.6e-06, "step": 1083 }, { "epoch": 2.8507560815253123, "grad_norm": 1.2635754346847534, "learning_rate": 0.00042947368421052633, "loss": 1.3557, "step": 1084 }, { "epoch": 2.8507560815253123, "high_lr": 0.00042947368421052633, "low_lr": 8.589473684210527e-06, "step": 1084 }, { "epoch": 2.8507560815253123, "high_lr": 0.00042947368421052633, "low_lr": 8.589473684210527e-06, "step": 1084 }, { "epoch": 2.8507560815253123, "high_lr": 0.00042947368421052633, "low_lr": 8.589473684210527e-06, "step": 1084 }, { "epoch": 2.8507560815253123, "high_lr": 0.00042947368421052633, "low_lr": 8.589473684210527e-06, "step": 1084 }, { "epoch": 2.8507560815253123, "high_lr": 0.00042947368421052633, "low_lr": 8.589473684210527e-06, "step": 1084 }, { "epoch": 2.8507560815253123, "high_lr": 0.00042947368421052633, "low_lr": 8.589473684210527e-06, "step": 1084 }, { "epoch": 2.8507560815253123, "high_lr": 0.00042947368421052633, "low_lr": 8.589473684210527e-06, "step": 1084 }, { "epoch": 2.8507560815253123, "high_lr": 0.00042947368421052633, "low_lr": 8.589473684210527e-06, "step": 1084 }, { "epoch": 2.853385930309007, "grad_norm": 1.2985435724258423, "learning_rate": 0.0004289473684210526, "loss": 1.3558, "step": 1085 }, { "epoch": 2.853385930309007, "high_lr": 0.0004289473684210526, "low_lr": 8.578947368421053e-06, "step": 1085 }, { "epoch": 2.853385930309007, "high_lr": 0.0004289473684210526, "low_lr": 8.578947368421053e-06, "step": 1085 }, { "epoch": 2.853385930309007, "high_lr": 0.0004289473684210526, "low_lr": 8.578947368421053e-06, "step": 1085 }, { "epoch": 2.853385930309007, "high_lr": 0.0004289473684210526, "low_lr": 8.578947368421053e-06, "step": 1085 }, { "epoch": 2.853385930309007, "high_lr": 0.0004289473684210526, "low_lr": 8.578947368421053e-06, "step": 1085 }, { "epoch": 2.853385930309007, "high_lr": 0.0004289473684210526, "low_lr": 8.578947368421053e-06, "step": 1085 }, { "epoch": 2.853385930309007, "high_lr": 0.0004289473684210526, "low_lr": 8.578947368421053e-06, "step": 1085 }, { "epoch": 2.853385930309007, "high_lr": 0.0004289473684210526, "low_lr": 8.578947368421053e-06, "step": 1085 }, { "epoch": 2.856015779092702, "grad_norm": 1.3095905780792236, "learning_rate": 0.00042842105263157896, "loss": 1.4058, "step": 1086 }, { "epoch": 2.856015779092702, "high_lr": 0.00042842105263157896, "low_lr": 8.56842105263158e-06, "step": 1086 }, { "epoch": 2.856015779092702, "high_lr": 0.00042842105263157896, "low_lr": 8.56842105263158e-06, "step": 1086 }, { "epoch": 2.856015779092702, "high_lr": 0.00042842105263157896, "low_lr": 8.56842105263158e-06, "step": 1086 }, { "epoch": 2.856015779092702, "high_lr": 0.00042842105263157896, "low_lr": 8.56842105263158e-06, "step": 1086 }, { "epoch": 2.856015779092702, "high_lr": 0.00042842105263157896, "low_lr": 8.56842105263158e-06, "step": 1086 }, { "epoch": 2.856015779092702, "high_lr": 0.00042842105263157896, "low_lr": 8.56842105263158e-06, "step": 1086 }, { "epoch": 2.856015779092702, "high_lr": 0.00042842105263157896, "low_lr": 8.56842105263158e-06, "step": 1086 }, { "epoch": 2.856015779092702, "high_lr": 0.00042842105263157896, "low_lr": 8.56842105263158e-06, "step": 1086 }, { "epoch": 2.858645627876397, "grad_norm": 1.2998263835906982, "learning_rate": 0.0004278947368421053, "loss": 1.4075, "step": 1087 }, { "epoch": 2.858645627876397, "high_lr": 0.0004278947368421053, "low_lr": 8.557894736842106e-06, "step": 1087 }, { "epoch": 2.858645627876397, "high_lr": 0.0004278947368421053, "low_lr": 8.557894736842106e-06, "step": 1087 }, { "epoch": 2.858645627876397, "high_lr": 0.0004278947368421053, "low_lr": 8.557894736842106e-06, "step": 1087 }, { "epoch": 2.858645627876397, "high_lr": 0.0004278947368421053, "low_lr": 8.557894736842106e-06, "step": 1087 }, { "epoch": 2.858645627876397, "high_lr": 0.0004278947368421053, "low_lr": 8.557894736842106e-06, "step": 1087 }, { "epoch": 2.858645627876397, "high_lr": 0.0004278947368421053, "low_lr": 8.557894736842106e-06, "step": 1087 }, { "epoch": 2.858645627876397, "high_lr": 0.0004278947368421053, "low_lr": 8.557894736842106e-06, "step": 1087 }, { "epoch": 2.858645627876397, "high_lr": 0.0004278947368421053, "low_lr": 8.557894736842106e-06, "step": 1087 }, { "epoch": 2.861275476660092, "grad_norm": 1.369041919708252, "learning_rate": 0.0004273684210526316, "loss": 1.4008, "step": 1088 }, { "epoch": 2.861275476660092, "high_lr": 0.0004273684210526316, "low_lr": 8.547368421052632e-06, "step": 1088 }, { "epoch": 2.861275476660092, "high_lr": 0.0004273684210526316, "low_lr": 8.547368421052632e-06, "step": 1088 }, { "epoch": 2.861275476660092, "high_lr": 0.0004273684210526316, "low_lr": 8.547368421052632e-06, "step": 1088 }, { "epoch": 2.861275476660092, "high_lr": 0.0004273684210526316, "low_lr": 8.547368421052632e-06, "step": 1088 }, { "epoch": 2.861275476660092, "high_lr": 0.0004273684210526316, "low_lr": 8.547368421052632e-06, "step": 1088 }, { "epoch": 2.861275476660092, "high_lr": 0.0004273684210526316, "low_lr": 8.547368421052632e-06, "step": 1088 }, { "epoch": 2.861275476660092, "high_lr": 0.0004273684210526316, "low_lr": 8.547368421052632e-06, "step": 1088 }, { "epoch": 2.861275476660092, "high_lr": 0.0004273684210526316, "low_lr": 8.547368421052632e-06, "step": 1088 }, { "epoch": 2.863905325443787, "grad_norm": 1.3575427532196045, "learning_rate": 0.0004268421052631579, "loss": 1.441, "step": 1089 }, { "epoch": 2.863905325443787, "high_lr": 0.0004268421052631579, "low_lr": 8.536842105263159e-06, "step": 1089 }, { "epoch": 2.863905325443787, "high_lr": 0.0004268421052631579, "low_lr": 8.536842105263159e-06, "step": 1089 }, { "epoch": 2.863905325443787, "high_lr": 0.0004268421052631579, "low_lr": 8.536842105263159e-06, "step": 1089 }, { "epoch": 2.863905325443787, "high_lr": 0.0004268421052631579, "low_lr": 8.536842105263159e-06, "step": 1089 }, { "epoch": 2.863905325443787, "high_lr": 0.0004268421052631579, "low_lr": 8.536842105263159e-06, "step": 1089 }, { "epoch": 2.863905325443787, "high_lr": 0.0004268421052631579, "low_lr": 8.536842105263159e-06, "step": 1089 }, { "epoch": 2.863905325443787, "high_lr": 0.0004268421052631579, "low_lr": 8.536842105263159e-06, "step": 1089 }, { "epoch": 2.863905325443787, "high_lr": 0.0004268421052631579, "low_lr": 8.536842105263159e-06, "step": 1089 }, { "epoch": 2.866535174227482, "grad_norm": 1.2896759510040283, "learning_rate": 0.0004263157894736842, "loss": 1.3802, "step": 1090 }, { "epoch": 2.866535174227482, "high_lr": 0.0004263157894736842, "low_lr": 8.526315789473685e-06, "step": 1090 }, { "epoch": 2.866535174227482, "high_lr": 0.0004263157894736842, "low_lr": 8.526315789473685e-06, "step": 1090 }, { "epoch": 2.866535174227482, "high_lr": 0.0004263157894736842, "low_lr": 8.526315789473685e-06, "step": 1090 }, { "epoch": 2.866535174227482, "high_lr": 0.0004263157894736842, "low_lr": 8.526315789473685e-06, "step": 1090 }, { "epoch": 2.866535174227482, "high_lr": 0.0004263157894736842, "low_lr": 8.526315789473685e-06, "step": 1090 }, { "epoch": 2.866535174227482, "high_lr": 0.0004263157894736842, "low_lr": 8.526315789473685e-06, "step": 1090 }, { "epoch": 2.866535174227482, "high_lr": 0.0004263157894736842, "low_lr": 8.526315789473685e-06, "step": 1090 }, { "epoch": 2.866535174227482, "high_lr": 0.0004263157894736842, "low_lr": 8.526315789473685e-06, "step": 1090 }, { "epoch": 2.8691650230111767, "grad_norm": 1.2547643184661865, "learning_rate": 0.00042578947368421055, "loss": 1.3332, "step": 1091 }, { "epoch": 2.8691650230111767, "high_lr": 0.00042578947368421055, "low_lr": 8.515789473684211e-06, "step": 1091 }, { "epoch": 2.8691650230111767, "high_lr": 0.00042578947368421055, "low_lr": 8.515789473684211e-06, "step": 1091 }, { "epoch": 2.8691650230111767, "high_lr": 0.00042578947368421055, "low_lr": 8.515789473684211e-06, "step": 1091 }, { "epoch": 2.8691650230111767, "high_lr": 0.00042578947368421055, "low_lr": 8.515789473684211e-06, "step": 1091 }, { "epoch": 2.8691650230111767, "high_lr": 0.00042578947368421055, "low_lr": 8.515789473684211e-06, "step": 1091 }, { "epoch": 2.8691650230111767, "high_lr": 0.00042578947368421055, "low_lr": 8.515789473684211e-06, "step": 1091 }, { "epoch": 2.8691650230111767, "high_lr": 0.00042578947368421055, "low_lr": 8.515789473684211e-06, "step": 1091 }, { "epoch": 2.8691650230111767, "high_lr": 0.00042578947368421055, "low_lr": 8.515789473684211e-06, "step": 1091 }, { "epoch": 2.871794871794872, "grad_norm": 2.4219512939453125, "learning_rate": 0.00042526315789473683, "loss": 1.3427, "step": 1092 }, { "epoch": 2.871794871794872, "high_lr": 0.00042526315789473683, "low_lr": 8.505263157894738e-06, "step": 1092 }, { "epoch": 2.871794871794872, "high_lr": 0.00042526315789473683, "low_lr": 8.505263157894738e-06, "step": 1092 }, { "epoch": 2.871794871794872, "high_lr": 0.00042526315789473683, "low_lr": 8.505263157894738e-06, "step": 1092 }, { "epoch": 2.871794871794872, "high_lr": 0.00042526315789473683, "low_lr": 8.505263157894738e-06, "step": 1092 }, { "epoch": 2.871794871794872, "high_lr": 0.00042526315789473683, "low_lr": 8.505263157894738e-06, "step": 1092 }, { "epoch": 2.871794871794872, "high_lr": 0.00042526315789473683, "low_lr": 8.505263157894738e-06, "step": 1092 }, { "epoch": 2.871794871794872, "high_lr": 0.00042526315789473683, "low_lr": 8.505263157894738e-06, "step": 1092 }, { "epoch": 2.871794871794872, "high_lr": 0.00042526315789473683, "low_lr": 8.505263157894738e-06, "step": 1092 }, { "epoch": 2.874424720578567, "grad_norm": 1.207375168800354, "learning_rate": 0.0004247368421052631, "loss": 1.3563, "step": 1093 }, { "epoch": 2.874424720578567, "high_lr": 0.0004247368421052631, "low_lr": 8.494736842105264e-06, "step": 1093 }, { "epoch": 2.874424720578567, "high_lr": 0.0004247368421052631, "low_lr": 8.494736842105264e-06, "step": 1093 }, { "epoch": 2.874424720578567, "high_lr": 0.0004247368421052631, "low_lr": 8.494736842105264e-06, "step": 1093 }, { "epoch": 2.874424720578567, "high_lr": 0.0004247368421052631, "low_lr": 8.494736842105264e-06, "step": 1093 }, { "epoch": 2.874424720578567, "high_lr": 0.0004247368421052631, "low_lr": 8.494736842105264e-06, "step": 1093 }, { "epoch": 2.874424720578567, "high_lr": 0.0004247368421052631, "low_lr": 8.494736842105264e-06, "step": 1093 }, { "epoch": 2.874424720578567, "high_lr": 0.0004247368421052631, "low_lr": 8.494736842105264e-06, "step": 1093 }, { "epoch": 2.874424720578567, "high_lr": 0.0004247368421052631, "low_lr": 8.494736842105264e-06, "step": 1093 }, { "epoch": 2.8770545693622616, "grad_norm": 1.3039588928222656, "learning_rate": 0.0004242105263157895, "loss": 1.4024, "step": 1094 }, { "epoch": 2.8770545693622616, "high_lr": 0.0004242105263157895, "low_lr": 8.48421052631579e-06, "step": 1094 }, { "epoch": 2.8770545693622616, "high_lr": 0.0004242105263157895, "low_lr": 8.48421052631579e-06, "step": 1094 }, { "epoch": 2.8770545693622616, "high_lr": 0.0004242105263157895, "low_lr": 8.48421052631579e-06, "step": 1094 }, { "epoch": 2.8770545693622616, "high_lr": 0.0004242105263157895, "low_lr": 8.48421052631579e-06, "step": 1094 }, { "epoch": 2.8770545693622616, "high_lr": 0.0004242105263157895, "low_lr": 8.48421052631579e-06, "step": 1094 }, { "epoch": 2.8770545693622616, "high_lr": 0.0004242105263157895, "low_lr": 8.48421052631579e-06, "step": 1094 }, { "epoch": 2.8770545693622616, "high_lr": 0.0004242105263157895, "low_lr": 8.48421052631579e-06, "step": 1094 }, { "epoch": 2.8770545693622616, "high_lr": 0.0004242105263157895, "low_lr": 8.48421052631579e-06, "step": 1094 }, { "epoch": 2.8796844181459567, "grad_norm": 1.2961663007736206, "learning_rate": 0.0004236842105263158, "loss": 1.3692, "step": 1095 }, { "epoch": 2.8796844181459567, "high_lr": 0.0004236842105263158, "low_lr": 8.473684210526317e-06, "step": 1095 }, { "epoch": 2.8796844181459567, "high_lr": 0.0004236842105263158, "low_lr": 8.473684210526317e-06, "step": 1095 }, { "epoch": 2.8796844181459567, "high_lr": 0.0004236842105263158, "low_lr": 8.473684210526317e-06, "step": 1095 }, { "epoch": 2.8796844181459567, "high_lr": 0.0004236842105263158, "low_lr": 8.473684210526317e-06, "step": 1095 }, { "epoch": 2.8796844181459567, "high_lr": 0.0004236842105263158, "low_lr": 8.473684210526317e-06, "step": 1095 }, { "epoch": 2.8796844181459567, "high_lr": 0.0004236842105263158, "low_lr": 8.473684210526317e-06, "step": 1095 }, { "epoch": 2.8796844181459567, "high_lr": 0.0004236842105263158, "low_lr": 8.473684210526317e-06, "step": 1095 }, { "epoch": 2.8796844181459567, "high_lr": 0.0004236842105263158, "low_lr": 8.473684210526317e-06, "step": 1095 }, { "epoch": 2.8823142669296518, "grad_norm": 1.3432323932647705, "learning_rate": 0.00042315789473684214, "loss": 1.4235, "step": 1096 }, { "epoch": 2.8823142669296518, "high_lr": 0.00042315789473684214, "low_lr": 8.463157894736843e-06, "step": 1096 }, { "epoch": 2.8823142669296518, "high_lr": 0.00042315789473684214, "low_lr": 8.463157894736843e-06, "step": 1096 }, { "epoch": 2.8823142669296518, "high_lr": 0.00042315789473684214, "low_lr": 8.463157894736843e-06, "step": 1096 }, { "epoch": 2.8823142669296518, "high_lr": 0.00042315789473684214, "low_lr": 8.463157894736843e-06, "step": 1096 }, { "epoch": 2.8823142669296518, "high_lr": 0.00042315789473684214, "low_lr": 8.463157894736843e-06, "step": 1096 }, { "epoch": 2.8823142669296518, "high_lr": 0.00042315789473684214, "low_lr": 8.463157894736843e-06, "step": 1096 }, { "epoch": 2.8823142669296518, "high_lr": 0.00042315789473684214, "low_lr": 8.463157894736843e-06, "step": 1096 }, { "epoch": 2.8823142669296518, "high_lr": 0.00042315789473684214, "low_lr": 8.463157894736843e-06, "step": 1096 }, { "epoch": 2.8849441157133464, "grad_norm": 1.2235922813415527, "learning_rate": 0.0004226315789473684, "loss": 1.318, "step": 1097 }, { "epoch": 2.8849441157133464, "high_lr": 0.0004226315789473684, "low_lr": 8.45263157894737e-06, "step": 1097 }, { "epoch": 2.8849441157133464, "high_lr": 0.0004226315789473684, "low_lr": 8.45263157894737e-06, "step": 1097 }, { "epoch": 2.8849441157133464, "high_lr": 0.0004226315789473684, "low_lr": 8.45263157894737e-06, "step": 1097 }, { "epoch": 2.8849441157133464, "high_lr": 0.0004226315789473684, "low_lr": 8.45263157894737e-06, "step": 1097 }, { "epoch": 2.8849441157133464, "high_lr": 0.0004226315789473684, "low_lr": 8.45263157894737e-06, "step": 1097 }, { "epoch": 2.8849441157133464, "high_lr": 0.0004226315789473684, "low_lr": 8.45263157894737e-06, "step": 1097 }, { "epoch": 2.8849441157133464, "high_lr": 0.0004226315789473684, "low_lr": 8.45263157894737e-06, "step": 1097 }, { "epoch": 2.8849441157133464, "high_lr": 0.0004226315789473684, "low_lr": 8.45263157894737e-06, "step": 1097 }, { "epoch": 2.8875739644970415, "grad_norm": 1.2706127166748047, "learning_rate": 0.00042210526315789477, "loss": 1.4046, "step": 1098 }, { "epoch": 2.8875739644970415, "high_lr": 0.00042210526315789477, "low_lr": 8.442105263157896e-06, "step": 1098 }, { "epoch": 2.8875739644970415, "high_lr": 0.00042210526315789477, "low_lr": 8.442105263157896e-06, "step": 1098 }, { "epoch": 2.8875739644970415, "high_lr": 0.00042210526315789477, "low_lr": 8.442105263157896e-06, "step": 1098 }, { "epoch": 2.8875739644970415, "high_lr": 0.00042210526315789477, "low_lr": 8.442105263157896e-06, "step": 1098 }, { "epoch": 2.8875739644970415, "high_lr": 0.00042210526315789477, "low_lr": 8.442105263157896e-06, "step": 1098 }, { "epoch": 2.8875739644970415, "high_lr": 0.00042210526315789477, "low_lr": 8.442105263157896e-06, "step": 1098 }, { "epoch": 2.8875739644970415, "high_lr": 0.00042210526315789477, "low_lr": 8.442105263157896e-06, "step": 1098 }, { "epoch": 2.8875739644970415, "high_lr": 0.00042210526315789477, "low_lr": 8.442105263157896e-06, "step": 1098 }, { "epoch": 2.8902038132807366, "grad_norm": 1.2929545640945435, "learning_rate": 0.00042157894736842105, "loss": 1.3805, "step": 1099 }, { "epoch": 2.8902038132807366, "high_lr": 0.00042157894736842105, "low_lr": 8.431578947368422e-06, "step": 1099 }, { "epoch": 2.8902038132807366, "high_lr": 0.00042157894736842105, "low_lr": 8.431578947368422e-06, "step": 1099 }, { "epoch": 2.8902038132807366, "high_lr": 0.00042157894736842105, "low_lr": 8.431578947368422e-06, "step": 1099 }, { "epoch": 2.8902038132807366, "high_lr": 0.00042157894736842105, "low_lr": 8.431578947368422e-06, "step": 1099 }, { "epoch": 2.8902038132807366, "high_lr": 0.00042157894736842105, "low_lr": 8.431578947368422e-06, "step": 1099 }, { "epoch": 2.8902038132807366, "high_lr": 0.00042157894736842105, "low_lr": 8.431578947368422e-06, "step": 1099 }, { "epoch": 2.8902038132807366, "high_lr": 0.00042157894736842105, "low_lr": 8.431578947368422e-06, "step": 1099 }, { "epoch": 2.8902038132807366, "high_lr": 0.00042157894736842105, "low_lr": 8.431578947368422e-06, "step": 1099 }, { "epoch": 2.8928336620644313, "grad_norm": 1.4245407581329346, "learning_rate": 0.00042105263157894734, "loss": 1.4389, "step": 1100 }, { "epoch": 2.8928336620644313, "high_lr": 0.00042105263157894734, "low_lr": 8.421052631578948e-06, "step": 1100 }, { "epoch": 2.8928336620644313, "high_lr": 0.00042105263157894734, "low_lr": 8.421052631578948e-06, "step": 1100 }, { "epoch": 2.8928336620644313, "high_lr": 0.00042105263157894734, "low_lr": 8.421052631578948e-06, "step": 1100 }, { "epoch": 2.8928336620644313, "high_lr": 0.00042105263157894734, "low_lr": 8.421052631578948e-06, "step": 1100 }, { "epoch": 2.8928336620644313, "high_lr": 0.00042105263157894734, "low_lr": 8.421052631578948e-06, "step": 1100 }, { "epoch": 2.8928336620644313, "high_lr": 0.00042105263157894734, "low_lr": 8.421052631578948e-06, "step": 1100 }, { "epoch": 2.8928336620644313, "high_lr": 0.00042105263157894734, "low_lr": 8.421052631578948e-06, "step": 1100 }, { "epoch": 2.8928336620644313, "high_lr": 0.00042105263157894734, "low_lr": 8.421052631578948e-06, "step": 1100 }, { "epoch": 2.8954635108481264, "grad_norm": 1.3371567726135254, "learning_rate": 0.0004205263157894737, "loss": 1.3643, "step": 1101 }, { "epoch": 2.8954635108481264, "high_lr": 0.0004205263157894737, "low_lr": 8.410526315789475e-06, "step": 1101 }, { "epoch": 2.8954635108481264, "high_lr": 0.0004205263157894737, "low_lr": 8.410526315789475e-06, "step": 1101 }, { "epoch": 2.8954635108481264, "high_lr": 0.0004205263157894737, "low_lr": 8.410526315789475e-06, "step": 1101 }, { "epoch": 2.8954635108481264, "high_lr": 0.0004205263157894737, "low_lr": 8.410526315789475e-06, "step": 1101 }, { "epoch": 2.8954635108481264, "high_lr": 0.0004205263157894737, "low_lr": 8.410526315789475e-06, "step": 1101 }, { "epoch": 2.8954635108481264, "high_lr": 0.0004205263157894737, "low_lr": 8.410526315789475e-06, "step": 1101 }, { "epoch": 2.8954635108481264, "high_lr": 0.0004205263157894737, "low_lr": 8.410526315789475e-06, "step": 1101 }, { "epoch": 2.8954635108481264, "high_lr": 0.0004205263157894737, "low_lr": 8.410526315789475e-06, "step": 1101 }, { "epoch": 2.898093359631821, "grad_norm": 1.2589834928512573, "learning_rate": 0.00042, "loss": 1.4083, "step": 1102 }, { "epoch": 2.898093359631821, "high_lr": 0.00042, "low_lr": 8.400000000000001e-06, "step": 1102 }, { "epoch": 2.898093359631821, "high_lr": 0.00042, "low_lr": 8.400000000000001e-06, "step": 1102 }, { "epoch": 2.898093359631821, "high_lr": 0.00042, "low_lr": 8.400000000000001e-06, "step": 1102 }, { "epoch": 2.898093359631821, "high_lr": 0.00042, "low_lr": 8.400000000000001e-06, "step": 1102 }, { "epoch": 2.898093359631821, "high_lr": 0.00042, "low_lr": 8.400000000000001e-06, "step": 1102 }, { "epoch": 2.898093359631821, "high_lr": 0.00042, "low_lr": 8.400000000000001e-06, "step": 1102 }, { "epoch": 2.898093359631821, "high_lr": 0.00042, "low_lr": 8.400000000000001e-06, "step": 1102 }, { "epoch": 2.898093359631821, "high_lr": 0.00042, "low_lr": 8.400000000000001e-06, "step": 1102 }, { "epoch": 2.900723208415516, "grad_norm": 1.3375548124313354, "learning_rate": 0.00041947368421052636, "loss": 1.3482, "step": 1103 }, { "epoch": 2.900723208415516, "high_lr": 0.00041947368421052636, "low_lr": 8.389473684210527e-06, "step": 1103 }, { "epoch": 2.900723208415516, "high_lr": 0.00041947368421052636, "low_lr": 8.389473684210527e-06, "step": 1103 }, { "epoch": 2.900723208415516, "high_lr": 0.00041947368421052636, "low_lr": 8.389473684210527e-06, "step": 1103 }, { "epoch": 2.900723208415516, "high_lr": 0.00041947368421052636, "low_lr": 8.389473684210527e-06, "step": 1103 }, { "epoch": 2.900723208415516, "high_lr": 0.00041947368421052636, "low_lr": 8.389473684210527e-06, "step": 1103 }, { "epoch": 2.900723208415516, "high_lr": 0.00041947368421052636, "low_lr": 8.389473684210527e-06, "step": 1103 }, { "epoch": 2.900723208415516, "high_lr": 0.00041947368421052636, "low_lr": 8.389473684210527e-06, "step": 1103 }, { "epoch": 2.900723208415516, "high_lr": 0.00041947368421052636, "low_lr": 8.389473684210527e-06, "step": 1103 }, { "epoch": 2.9033530571992108, "grad_norm": 1.3510268926620483, "learning_rate": 0.00041894736842105264, "loss": 1.3335, "step": 1104 }, { "epoch": 2.9033530571992108, "high_lr": 0.00041894736842105264, "low_lr": 8.378947368421054e-06, "step": 1104 }, { "epoch": 2.9033530571992108, "high_lr": 0.00041894736842105264, "low_lr": 8.378947368421054e-06, "step": 1104 }, { "epoch": 2.9033530571992108, "high_lr": 0.00041894736842105264, "low_lr": 8.378947368421054e-06, "step": 1104 }, { "epoch": 2.9033530571992108, "high_lr": 0.00041894736842105264, "low_lr": 8.378947368421054e-06, "step": 1104 }, { "epoch": 2.9033530571992108, "high_lr": 0.00041894736842105264, "low_lr": 8.378947368421054e-06, "step": 1104 }, { "epoch": 2.9033530571992108, "high_lr": 0.00041894736842105264, "low_lr": 8.378947368421054e-06, "step": 1104 }, { "epoch": 2.9033530571992108, "high_lr": 0.00041894736842105264, "low_lr": 8.378947368421054e-06, "step": 1104 }, { "epoch": 2.9033530571992108, "high_lr": 0.00041894736842105264, "low_lr": 8.378947368421054e-06, "step": 1104 }, { "epoch": 2.905982905982906, "grad_norm": 1.38094961643219, "learning_rate": 0.000418421052631579, "loss": 1.3722, "step": 1105 }, { "epoch": 2.905982905982906, "high_lr": 0.000418421052631579, "low_lr": 8.36842105263158e-06, "step": 1105 }, { "epoch": 2.905982905982906, "high_lr": 0.000418421052631579, "low_lr": 8.36842105263158e-06, "step": 1105 }, { "epoch": 2.905982905982906, "high_lr": 0.000418421052631579, "low_lr": 8.36842105263158e-06, "step": 1105 }, { "epoch": 2.905982905982906, "high_lr": 0.000418421052631579, "low_lr": 8.36842105263158e-06, "step": 1105 }, { "epoch": 2.905982905982906, "high_lr": 0.000418421052631579, "low_lr": 8.36842105263158e-06, "step": 1105 }, { "epoch": 2.905982905982906, "high_lr": 0.000418421052631579, "low_lr": 8.36842105263158e-06, "step": 1105 }, { "epoch": 2.905982905982906, "high_lr": 0.000418421052631579, "low_lr": 8.36842105263158e-06, "step": 1105 }, { "epoch": 2.905982905982906, "high_lr": 0.000418421052631579, "low_lr": 8.36842105263158e-06, "step": 1105 }, { "epoch": 2.908612754766601, "grad_norm": 1.4025527238845825, "learning_rate": 0.00041789473684210527, "loss": 1.3813, "step": 1106 }, { "epoch": 2.908612754766601, "high_lr": 0.00041789473684210527, "low_lr": 8.357894736842106e-06, "step": 1106 }, { "epoch": 2.908612754766601, "high_lr": 0.00041789473684210527, "low_lr": 8.357894736842106e-06, "step": 1106 }, { "epoch": 2.908612754766601, "high_lr": 0.00041789473684210527, "low_lr": 8.357894736842106e-06, "step": 1106 }, { "epoch": 2.908612754766601, "high_lr": 0.00041789473684210527, "low_lr": 8.357894736842106e-06, "step": 1106 }, { "epoch": 2.908612754766601, "high_lr": 0.00041789473684210527, "low_lr": 8.357894736842106e-06, "step": 1106 }, { "epoch": 2.908612754766601, "high_lr": 0.00041789473684210527, "low_lr": 8.357894736842106e-06, "step": 1106 }, { "epoch": 2.908612754766601, "high_lr": 0.00041789473684210527, "low_lr": 8.357894736842106e-06, "step": 1106 }, { "epoch": 2.908612754766601, "high_lr": 0.00041789473684210527, "low_lr": 8.357894736842106e-06, "step": 1106 }, { "epoch": 2.9112426035502956, "grad_norm": 1.372842788696289, "learning_rate": 0.00041736842105263156, "loss": 1.3694, "step": 1107 }, { "epoch": 2.9112426035502956, "high_lr": 0.00041736842105263156, "low_lr": 8.347368421052633e-06, "step": 1107 }, { "epoch": 2.9112426035502956, "high_lr": 0.00041736842105263156, "low_lr": 8.347368421052633e-06, "step": 1107 }, { "epoch": 2.9112426035502956, "high_lr": 0.00041736842105263156, "low_lr": 8.347368421052633e-06, "step": 1107 }, { "epoch": 2.9112426035502956, "high_lr": 0.00041736842105263156, "low_lr": 8.347368421052633e-06, "step": 1107 }, { "epoch": 2.9112426035502956, "high_lr": 0.00041736842105263156, "low_lr": 8.347368421052633e-06, "step": 1107 }, { "epoch": 2.9112426035502956, "high_lr": 0.00041736842105263156, "low_lr": 8.347368421052633e-06, "step": 1107 }, { "epoch": 2.9112426035502956, "high_lr": 0.00041736842105263156, "low_lr": 8.347368421052633e-06, "step": 1107 }, { "epoch": 2.9112426035502956, "high_lr": 0.00041736842105263156, "low_lr": 8.347368421052633e-06, "step": 1107 }, { "epoch": 2.9138724523339907, "grad_norm": 1.254815936088562, "learning_rate": 0.0004168421052631579, "loss": 1.3346, "step": 1108 }, { "epoch": 2.9138724523339907, "high_lr": 0.0004168421052631579, "low_lr": 8.336842105263159e-06, "step": 1108 }, { "epoch": 2.9138724523339907, "high_lr": 0.0004168421052631579, "low_lr": 8.336842105263159e-06, "step": 1108 }, { "epoch": 2.9138724523339907, "high_lr": 0.0004168421052631579, "low_lr": 8.336842105263159e-06, "step": 1108 }, { "epoch": 2.9138724523339907, "high_lr": 0.0004168421052631579, "low_lr": 8.336842105263159e-06, "step": 1108 }, { "epoch": 2.9138724523339907, "high_lr": 0.0004168421052631579, "low_lr": 8.336842105263159e-06, "step": 1108 }, { "epoch": 2.9138724523339907, "high_lr": 0.0004168421052631579, "low_lr": 8.336842105263159e-06, "step": 1108 }, { "epoch": 2.9138724523339907, "high_lr": 0.0004168421052631579, "low_lr": 8.336842105263159e-06, "step": 1108 }, { "epoch": 2.9138724523339907, "high_lr": 0.0004168421052631579, "low_lr": 8.336842105263159e-06, "step": 1108 }, { "epoch": 2.916502301117686, "grad_norm": 1.2994201183319092, "learning_rate": 0.0004163157894736842, "loss": 1.4007, "step": 1109 }, { "epoch": 2.916502301117686, "high_lr": 0.0004163157894736842, "low_lr": 8.326315789473685e-06, "step": 1109 }, { "epoch": 2.916502301117686, "high_lr": 0.0004163157894736842, "low_lr": 8.326315789473685e-06, "step": 1109 }, { "epoch": 2.916502301117686, "high_lr": 0.0004163157894736842, "low_lr": 8.326315789473685e-06, "step": 1109 }, { "epoch": 2.916502301117686, "high_lr": 0.0004163157894736842, "low_lr": 8.326315789473685e-06, "step": 1109 }, { "epoch": 2.916502301117686, "high_lr": 0.0004163157894736842, "low_lr": 8.326315789473685e-06, "step": 1109 }, { "epoch": 2.916502301117686, "high_lr": 0.0004163157894736842, "low_lr": 8.326315789473685e-06, "step": 1109 }, { "epoch": 2.916502301117686, "high_lr": 0.0004163157894736842, "low_lr": 8.326315789473685e-06, "step": 1109 }, { "epoch": 2.916502301117686, "high_lr": 0.0004163157894736842, "low_lr": 8.326315789473685e-06, "step": 1109 }, { "epoch": 2.9191321499013805, "grad_norm": 1.2803548574447632, "learning_rate": 0.0004157894736842106, "loss": 1.3833, "step": 1110 }, { "epoch": 2.9191321499013805, "high_lr": 0.0004157894736842106, "low_lr": 8.315789473684212e-06, "step": 1110 }, { "epoch": 2.9191321499013805, "high_lr": 0.0004157894736842106, "low_lr": 8.315789473684212e-06, "step": 1110 }, { "epoch": 2.9191321499013805, "high_lr": 0.0004157894736842106, "low_lr": 8.315789473684212e-06, "step": 1110 }, { "epoch": 2.9191321499013805, "high_lr": 0.0004157894736842106, "low_lr": 8.315789473684212e-06, "step": 1110 }, { "epoch": 2.9191321499013805, "high_lr": 0.0004157894736842106, "low_lr": 8.315789473684212e-06, "step": 1110 }, { "epoch": 2.9191321499013805, "high_lr": 0.0004157894736842106, "low_lr": 8.315789473684212e-06, "step": 1110 }, { "epoch": 2.9191321499013805, "high_lr": 0.0004157894736842106, "low_lr": 8.315789473684212e-06, "step": 1110 }, { "epoch": 2.9191321499013805, "high_lr": 0.0004157894736842106, "low_lr": 8.315789473684212e-06, "step": 1110 }, { "epoch": 2.9217619986850756, "grad_norm": 1.354733943939209, "learning_rate": 0.00041526315789473686, "loss": 1.3278, "step": 1111 }, { "epoch": 2.9217619986850756, "high_lr": 0.00041526315789473686, "low_lr": 8.305263157894738e-06, "step": 1111 }, { "epoch": 2.9217619986850756, "high_lr": 0.00041526315789473686, "low_lr": 8.305263157894738e-06, "step": 1111 }, { "epoch": 2.9217619986850756, "high_lr": 0.00041526315789473686, "low_lr": 8.305263157894738e-06, "step": 1111 }, { "epoch": 2.9217619986850756, "high_lr": 0.00041526315789473686, "low_lr": 8.305263157894738e-06, "step": 1111 }, { "epoch": 2.9217619986850756, "high_lr": 0.00041526315789473686, "low_lr": 8.305263157894738e-06, "step": 1111 }, { "epoch": 2.9217619986850756, "high_lr": 0.00041526315789473686, "low_lr": 8.305263157894738e-06, "step": 1111 }, { "epoch": 2.9217619986850756, "high_lr": 0.00041526315789473686, "low_lr": 8.305263157894738e-06, "step": 1111 }, { "epoch": 2.9217619986850756, "high_lr": 0.00041526315789473686, "low_lr": 8.305263157894738e-06, "step": 1111 }, { "epoch": 2.9243918474687707, "grad_norm": 1.2924370765686035, "learning_rate": 0.0004147368421052632, "loss": 1.3748, "step": 1112 }, { "epoch": 2.9243918474687707, "high_lr": 0.0004147368421052632, "low_lr": 8.294736842105264e-06, "step": 1112 }, { "epoch": 2.9243918474687707, "high_lr": 0.0004147368421052632, "low_lr": 8.294736842105264e-06, "step": 1112 }, { "epoch": 2.9243918474687707, "high_lr": 0.0004147368421052632, "low_lr": 8.294736842105264e-06, "step": 1112 }, { "epoch": 2.9243918474687707, "high_lr": 0.0004147368421052632, "low_lr": 8.294736842105264e-06, "step": 1112 }, { "epoch": 2.9243918474687707, "high_lr": 0.0004147368421052632, "low_lr": 8.294736842105264e-06, "step": 1112 }, { "epoch": 2.9243918474687707, "high_lr": 0.0004147368421052632, "low_lr": 8.294736842105264e-06, "step": 1112 }, { "epoch": 2.9243918474687707, "high_lr": 0.0004147368421052632, "low_lr": 8.294736842105264e-06, "step": 1112 }, { "epoch": 2.9243918474687707, "high_lr": 0.0004147368421052632, "low_lr": 8.294736842105264e-06, "step": 1112 }, { "epoch": 2.9270216962524653, "grad_norm": 1.2990412712097168, "learning_rate": 0.0004142105263157895, "loss": 1.3719, "step": 1113 }, { "epoch": 2.9270216962524653, "high_lr": 0.0004142105263157895, "low_lr": 8.28421052631579e-06, "step": 1113 }, { "epoch": 2.9270216962524653, "high_lr": 0.0004142105263157895, "low_lr": 8.28421052631579e-06, "step": 1113 }, { "epoch": 2.9270216962524653, "high_lr": 0.0004142105263157895, "low_lr": 8.28421052631579e-06, "step": 1113 }, { "epoch": 2.9270216962524653, "high_lr": 0.0004142105263157895, "low_lr": 8.28421052631579e-06, "step": 1113 }, { "epoch": 2.9270216962524653, "high_lr": 0.0004142105263157895, "low_lr": 8.28421052631579e-06, "step": 1113 }, { "epoch": 2.9270216962524653, "high_lr": 0.0004142105263157895, "low_lr": 8.28421052631579e-06, "step": 1113 }, { "epoch": 2.9270216962524653, "high_lr": 0.0004142105263157895, "low_lr": 8.28421052631579e-06, "step": 1113 }, { "epoch": 2.9270216962524653, "high_lr": 0.0004142105263157895, "low_lr": 8.28421052631579e-06, "step": 1113 }, { "epoch": 2.9296515450361604, "grad_norm": 1.5746723413467407, "learning_rate": 0.0004136842105263158, "loss": 1.4748, "step": 1114 }, { "epoch": 2.9296515450361604, "high_lr": 0.0004136842105263158, "low_lr": 8.273684210526317e-06, "step": 1114 }, { "epoch": 2.9296515450361604, "high_lr": 0.0004136842105263158, "low_lr": 8.273684210526317e-06, "step": 1114 }, { "epoch": 2.9296515450361604, "high_lr": 0.0004136842105263158, "low_lr": 8.273684210526317e-06, "step": 1114 }, { "epoch": 2.9296515450361604, "high_lr": 0.0004136842105263158, "low_lr": 8.273684210526317e-06, "step": 1114 }, { "epoch": 2.9296515450361604, "high_lr": 0.0004136842105263158, "low_lr": 8.273684210526317e-06, "step": 1114 }, { "epoch": 2.9296515450361604, "high_lr": 0.0004136842105263158, "low_lr": 8.273684210526317e-06, "step": 1114 }, { "epoch": 2.9296515450361604, "high_lr": 0.0004136842105263158, "low_lr": 8.273684210526317e-06, "step": 1114 }, { "epoch": 2.9296515450361604, "high_lr": 0.0004136842105263158, "low_lr": 8.273684210526317e-06, "step": 1114 }, { "epoch": 2.9322813938198555, "grad_norm": 1.3033024072647095, "learning_rate": 0.0004131578947368421, "loss": 1.4211, "step": 1115 }, { "epoch": 2.9322813938198555, "high_lr": 0.0004131578947368421, "low_lr": 8.263157894736843e-06, "step": 1115 }, { "epoch": 2.9322813938198555, "high_lr": 0.0004131578947368421, "low_lr": 8.263157894736843e-06, "step": 1115 }, { "epoch": 2.9322813938198555, "high_lr": 0.0004131578947368421, "low_lr": 8.263157894736843e-06, "step": 1115 }, { "epoch": 2.9322813938198555, "high_lr": 0.0004131578947368421, "low_lr": 8.263157894736843e-06, "step": 1115 }, { "epoch": 2.9322813938198555, "high_lr": 0.0004131578947368421, "low_lr": 8.263157894736843e-06, "step": 1115 }, { "epoch": 2.9322813938198555, "high_lr": 0.0004131578947368421, "low_lr": 8.263157894736843e-06, "step": 1115 }, { "epoch": 2.9322813938198555, "high_lr": 0.0004131578947368421, "low_lr": 8.263157894736843e-06, "step": 1115 }, { "epoch": 2.9322813938198555, "high_lr": 0.0004131578947368421, "low_lr": 8.263157894736843e-06, "step": 1115 }, { "epoch": 2.93491124260355, "grad_norm": 1.3628966808319092, "learning_rate": 0.0004126315789473684, "loss": 1.3952, "step": 1116 }, { "epoch": 2.93491124260355, "high_lr": 0.0004126315789473684, "low_lr": 8.25263157894737e-06, "step": 1116 }, { "epoch": 2.93491124260355, "high_lr": 0.0004126315789473684, "low_lr": 8.25263157894737e-06, "step": 1116 }, { "epoch": 2.93491124260355, "high_lr": 0.0004126315789473684, "low_lr": 8.25263157894737e-06, "step": 1116 }, { "epoch": 2.93491124260355, "high_lr": 0.0004126315789473684, "low_lr": 8.25263157894737e-06, "step": 1116 }, { "epoch": 2.93491124260355, "high_lr": 0.0004126315789473684, "low_lr": 8.25263157894737e-06, "step": 1116 }, { "epoch": 2.93491124260355, "high_lr": 0.0004126315789473684, "low_lr": 8.25263157894737e-06, "step": 1116 }, { "epoch": 2.93491124260355, "high_lr": 0.0004126315789473684, "low_lr": 8.25263157894737e-06, "step": 1116 }, { "epoch": 2.93491124260355, "high_lr": 0.0004126315789473684, "low_lr": 8.25263157894737e-06, "step": 1116 }, { "epoch": 2.9375410913872453, "grad_norm": 1.1582694053649902, "learning_rate": 0.00041210526315789474, "loss": 1.3778, "step": 1117 }, { "epoch": 2.9375410913872453, "high_lr": 0.00041210526315789474, "low_lr": 8.242105263157896e-06, "step": 1117 }, { "epoch": 2.9375410913872453, "high_lr": 0.00041210526315789474, "low_lr": 8.242105263157896e-06, "step": 1117 }, { "epoch": 2.9375410913872453, "high_lr": 0.00041210526315789474, "low_lr": 8.242105263157896e-06, "step": 1117 }, { "epoch": 2.9375410913872453, "high_lr": 0.00041210526315789474, "low_lr": 8.242105263157896e-06, "step": 1117 }, { "epoch": 2.9375410913872453, "high_lr": 0.00041210526315789474, "low_lr": 8.242105263157896e-06, "step": 1117 }, { "epoch": 2.9375410913872453, "high_lr": 0.00041210526315789474, "low_lr": 8.242105263157896e-06, "step": 1117 }, { "epoch": 2.9375410913872453, "high_lr": 0.00041210526315789474, "low_lr": 8.242105263157896e-06, "step": 1117 }, { "epoch": 2.9375410913872453, "high_lr": 0.00041210526315789474, "low_lr": 8.242105263157896e-06, "step": 1117 }, { "epoch": 2.9401709401709404, "grad_norm": 1.3429607152938843, "learning_rate": 0.0004115789473684211, "loss": 1.3652, "step": 1118 }, { "epoch": 2.9401709401709404, "high_lr": 0.0004115789473684211, "low_lr": 8.231578947368422e-06, "step": 1118 }, { "epoch": 2.9401709401709404, "high_lr": 0.0004115789473684211, "low_lr": 8.231578947368422e-06, "step": 1118 }, { "epoch": 2.9401709401709404, "high_lr": 0.0004115789473684211, "low_lr": 8.231578947368422e-06, "step": 1118 }, { "epoch": 2.9401709401709404, "high_lr": 0.0004115789473684211, "low_lr": 8.231578947368422e-06, "step": 1118 }, { "epoch": 2.9401709401709404, "high_lr": 0.0004115789473684211, "low_lr": 8.231578947368422e-06, "step": 1118 }, { "epoch": 2.9401709401709404, "high_lr": 0.0004115789473684211, "low_lr": 8.231578947368422e-06, "step": 1118 }, { "epoch": 2.9401709401709404, "high_lr": 0.0004115789473684211, "low_lr": 8.231578947368422e-06, "step": 1118 }, { "epoch": 2.9401709401709404, "high_lr": 0.0004115789473684211, "low_lr": 8.231578947368422e-06, "step": 1118 }, { "epoch": 2.942800788954635, "grad_norm": 1.2951549291610718, "learning_rate": 0.0004110526315789474, "loss": 1.4009, "step": 1119 }, { "epoch": 2.942800788954635, "high_lr": 0.0004110526315789474, "low_lr": 8.221052631578948e-06, "step": 1119 }, { "epoch": 2.942800788954635, "high_lr": 0.0004110526315789474, "low_lr": 8.221052631578948e-06, "step": 1119 }, { "epoch": 2.942800788954635, "high_lr": 0.0004110526315789474, "low_lr": 8.221052631578948e-06, "step": 1119 }, { "epoch": 2.942800788954635, "high_lr": 0.0004110526315789474, "low_lr": 8.221052631578948e-06, "step": 1119 }, { "epoch": 2.942800788954635, "high_lr": 0.0004110526315789474, "low_lr": 8.221052631578948e-06, "step": 1119 }, { "epoch": 2.942800788954635, "high_lr": 0.0004110526315789474, "low_lr": 8.221052631578948e-06, "step": 1119 }, { "epoch": 2.942800788954635, "high_lr": 0.0004110526315789474, "low_lr": 8.221052631578948e-06, "step": 1119 }, { "epoch": 2.942800788954635, "high_lr": 0.0004110526315789474, "low_lr": 8.221052631578948e-06, "step": 1119 }, { "epoch": 2.94543063773833, "grad_norm": 1.3277417421340942, "learning_rate": 0.0004105263157894737, "loss": 1.3428, "step": 1120 }, { "epoch": 2.94543063773833, "high_lr": 0.0004105263157894737, "low_lr": 8.210526315789475e-06, "step": 1120 }, { "epoch": 2.94543063773833, "high_lr": 0.0004105263157894737, "low_lr": 8.210526315789475e-06, "step": 1120 }, { "epoch": 2.94543063773833, "high_lr": 0.0004105263157894737, "low_lr": 8.210526315789475e-06, "step": 1120 }, { "epoch": 2.94543063773833, "high_lr": 0.0004105263157894737, "low_lr": 8.210526315789475e-06, "step": 1120 }, { "epoch": 2.94543063773833, "high_lr": 0.0004105263157894737, "low_lr": 8.210526315789475e-06, "step": 1120 }, { "epoch": 2.94543063773833, "high_lr": 0.0004105263157894737, "low_lr": 8.210526315789475e-06, "step": 1120 }, { "epoch": 2.94543063773833, "high_lr": 0.0004105263157894737, "low_lr": 8.210526315789475e-06, "step": 1120 }, { "epoch": 2.94543063773833, "high_lr": 0.0004105263157894737, "low_lr": 8.210526315789475e-06, "step": 1120 }, { "epoch": 2.9480604865220252, "grad_norm": 1.3367265462875366, "learning_rate": 0.00041, "loss": 1.3685, "step": 1121 }, { "epoch": 2.9480604865220252, "high_lr": 0.00041, "low_lr": 8.2e-06, "step": 1121 }, { "epoch": 2.9480604865220252, "high_lr": 0.00041, "low_lr": 8.2e-06, "step": 1121 }, { "epoch": 2.9480604865220252, "high_lr": 0.00041, "low_lr": 8.2e-06, "step": 1121 }, { "epoch": 2.9480604865220252, "high_lr": 0.00041, "low_lr": 8.2e-06, "step": 1121 }, { "epoch": 2.9480604865220252, "high_lr": 0.00041, "low_lr": 8.2e-06, "step": 1121 }, { "epoch": 2.9480604865220252, "high_lr": 0.00041, "low_lr": 8.2e-06, "step": 1121 }, { "epoch": 2.9480604865220252, "high_lr": 0.00041, "low_lr": 8.2e-06, "step": 1121 }, { "epoch": 2.9480604865220252, "high_lr": 0.00041, "low_lr": 8.2e-06, "step": 1121 }, { "epoch": 2.95069033530572, "grad_norm": 1.2772372961044312, "learning_rate": 0.00040947368421052633, "loss": 1.3718, "step": 1122 }, { "epoch": 2.95069033530572, "high_lr": 0.00040947368421052633, "low_lr": 8.189473684210527e-06, "step": 1122 }, { "epoch": 2.95069033530572, "high_lr": 0.00040947368421052633, "low_lr": 8.189473684210527e-06, "step": 1122 }, { "epoch": 2.95069033530572, "high_lr": 0.00040947368421052633, "low_lr": 8.189473684210527e-06, "step": 1122 }, { "epoch": 2.95069033530572, "high_lr": 0.00040947368421052633, "low_lr": 8.189473684210527e-06, "step": 1122 }, { "epoch": 2.95069033530572, "high_lr": 0.00040947368421052633, "low_lr": 8.189473684210527e-06, "step": 1122 }, { "epoch": 2.95069033530572, "high_lr": 0.00040947368421052633, "low_lr": 8.189473684210527e-06, "step": 1122 }, { "epoch": 2.95069033530572, "high_lr": 0.00040947368421052633, "low_lr": 8.189473684210527e-06, "step": 1122 }, { "epoch": 2.95069033530572, "high_lr": 0.00040947368421052633, "low_lr": 8.189473684210527e-06, "step": 1122 }, { "epoch": 2.953320184089415, "grad_norm": 1.3111339807510376, "learning_rate": 0.0004089473684210526, "loss": 1.395, "step": 1123 }, { "epoch": 2.953320184089415, "high_lr": 0.0004089473684210526, "low_lr": 8.178947368421054e-06, "step": 1123 }, { "epoch": 2.953320184089415, "high_lr": 0.0004089473684210526, "low_lr": 8.178947368421054e-06, "step": 1123 }, { "epoch": 2.953320184089415, "high_lr": 0.0004089473684210526, "low_lr": 8.178947368421054e-06, "step": 1123 }, { "epoch": 2.953320184089415, "high_lr": 0.0004089473684210526, "low_lr": 8.178947368421054e-06, "step": 1123 }, { "epoch": 2.953320184089415, "high_lr": 0.0004089473684210526, "low_lr": 8.178947368421054e-06, "step": 1123 }, { "epoch": 2.953320184089415, "high_lr": 0.0004089473684210526, "low_lr": 8.178947368421054e-06, "step": 1123 }, { "epoch": 2.953320184089415, "high_lr": 0.0004089473684210526, "low_lr": 8.178947368421054e-06, "step": 1123 }, { "epoch": 2.953320184089415, "high_lr": 0.0004089473684210526, "low_lr": 8.178947368421054e-06, "step": 1123 }, { "epoch": 2.9559500328731096, "grad_norm": 1.2343921661376953, "learning_rate": 0.00040842105263157896, "loss": 1.3731, "step": 1124 }, { "epoch": 2.9559500328731096, "high_lr": 0.00040842105263157896, "low_lr": 8.16842105263158e-06, "step": 1124 }, { "epoch": 2.9559500328731096, "high_lr": 0.00040842105263157896, "low_lr": 8.16842105263158e-06, "step": 1124 }, { "epoch": 2.9559500328731096, "high_lr": 0.00040842105263157896, "low_lr": 8.16842105263158e-06, "step": 1124 }, { "epoch": 2.9559500328731096, "high_lr": 0.00040842105263157896, "low_lr": 8.16842105263158e-06, "step": 1124 }, { "epoch": 2.9559500328731096, "high_lr": 0.00040842105263157896, "low_lr": 8.16842105263158e-06, "step": 1124 }, { "epoch": 2.9559500328731096, "high_lr": 0.00040842105263157896, "low_lr": 8.16842105263158e-06, "step": 1124 }, { "epoch": 2.9559500328731096, "high_lr": 0.00040842105263157896, "low_lr": 8.16842105263158e-06, "step": 1124 }, { "epoch": 2.9559500328731096, "high_lr": 0.00040842105263157896, "low_lr": 8.16842105263158e-06, "step": 1124 }, { "epoch": 2.9585798816568047, "grad_norm": 1.253628134727478, "learning_rate": 0.00040789473684210524, "loss": 1.381, "step": 1125 }, { "epoch": 2.9585798816568047, "high_lr": 0.00040789473684210524, "low_lr": 8.157894736842106e-06, "step": 1125 }, { "epoch": 2.9585798816568047, "high_lr": 0.00040789473684210524, "low_lr": 8.157894736842106e-06, "step": 1125 }, { "epoch": 2.9585798816568047, "high_lr": 0.00040789473684210524, "low_lr": 8.157894736842106e-06, "step": 1125 }, { "epoch": 2.9585798816568047, "high_lr": 0.00040789473684210524, "low_lr": 8.157894736842106e-06, "step": 1125 }, { "epoch": 2.9585798816568047, "high_lr": 0.00040789473684210524, "low_lr": 8.157894736842106e-06, "step": 1125 }, { "epoch": 2.9585798816568047, "high_lr": 0.00040789473684210524, "low_lr": 8.157894736842106e-06, "step": 1125 }, { "epoch": 2.9585798816568047, "high_lr": 0.00040789473684210524, "low_lr": 8.157894736842106e-06, "step": 1125 }, { "epoch": 2.9585798816568047, "high_lr": 0.00040789473684210524, "low_lr": 8.157894736842106e-06, "step": 1125 }, { "epoch": 2.9612097304404994, "grad_norm": 1.2898083925247192, "learning_rate": 0.00040736842105263164, "loss": 1.4433, "step": 1126 }, { "epoch": 2.9612097304404994, "high_lr": 0.00040736842105263164, "low_lr": 8.147368421052633e-06, "step": 1126 }, { "epoch": 2.9612097304404994, "high_lr": 0.00040736842105263164, "low_lr": 8.147368421052633e-06, "step": 1126 }, { "epoch": 2.9612097304404994, "high_lr": 0.00040736842105263164, "low_lr": 8.147368421052633e-06, "step": 1126 }, { "epoch": 2.9612097304404994, "high_lr": 0.00040736842105263164, "low_lr": 8.147368421052633e-06, "step": 1126 }, { "epoch": 2.9612097304404994, "high_lr": 0.00040736842105263164, "low_lr": 8.147368421052633e-06, "step": 1126 }, { "epoch": 2.9612097304404994, "high_lr": 0.00040736842105263164, "low_lr": 8.147368421052633e-06, "step": 1126 }, { "epoch": 2.9612097304404994, "high_lr": 0.00040736842105263164, "low_lr": 8.147368421052633e-06, "step": 1126 }, { "epoch": 2.9612097304404994, "high_lr": 0.00040736842105263164, "low_lr": 8.147368421052633e-06, "step": 1126 }, { "epoch": 2.9638395792241945, "grad_norm": 1.3289254903793335, "learning_rate": 0.0004068421052631579, "loss": 1.3839, "step": 1127 }, { "epoch": 2.9638395792241945, "high_lr": 0.0004068421052631579, "low_lr": 8.136842105263159e-06, "step": 1127 }, { "epoch": 2.9638395792241945, "high_lr": 0.0004068421052631579, "low_lr": 8.136842105263159e-06, "step": 1127 }, { "epoch": 2.9638395792241945, "high_lr": 0.0004068421052631579, "low_lr": 8.136842105263159e-06, "step": 1127 }, { "epoch": 2.9638395792241945, "high_lr": 0.0004068421052631579, "low_lr": 8.136842105263159e-06, "step": 1127 }, { "epoch": 2.9638395792241945, "high_lr": 0.0004068421052631579, "low_lr": 8.136842105263159e-06, "step": 1127 }, { "epoch": 2.9638395792241945, "high_lr": 0.0004068421052631579, "low_lr": 8.136842105263159e-06, "step": 1127 }, { "epoch": 2.9638395792241945, "high_lr": 0.0004068421052631579, "low_lr": 8.136842105263159e-06, "step": 1127 }, { "epoch": 2.9638395792241945, "high_lr": 0.0004068421052631579, "low_lr": 8.136842105263159e-06, "step": 1127 }, { "epoch": 2.9664694280078896, "grad_norm": 1.3643008470535278, "learning_rate": 0.0004063157894736842, "loss": 1.4039, "step": 1128 }, { "epoch": 2.9664694280078896, "high_lr": 0.0004063157894736842, "low_lr": 8.126315789473684e-06, "step": 1128 }, { "epoch": 2.9664694280078896, "high_lr": 0.0004063157894736842, "low_lr": 8.126315789473684e-06, "step": 1128 }, { "epoch": 2.9664694280078896, "high_lr": 0.0004063157894736842, "low_lr": 8.126315789473684e-06, "step": 1128 }, { "epoch": 2.9664694280078896, "high_lr": 0.0004063157894736842, "low_lr": 8.126315789473684e-06, "step": 1128 }, { "epoch": 2.9664694280078896, "high_lr": 0.0004063157894736842, "low_lr": 8.126315789473684e-06, "step": 1128 }, { "epoch": 2.9664694280078896, "high_lr": 0.0004063157894736842, "low_lr": 8.126315789473684e-06, "step": 1128 }, { "epoch": 2.9664694280078896, "high_lr": 0.0004063157894736842, "low_lr": 8.126315789473684e-06, "step": 1128 }, { "epoch": 2.9664694280078896, "high_lr": 0.0004063157894736842, "low_lr": 8.126315789473684e-06, "step": 1128 }, { "epoch": 2.9690992767915843, "grad_norm": 1.2966452836990356, "learning_rate": 0.00040578947368421055, "loss": 1.3814, "step": 1129 }, { "epoch": 2.9690992767915843, "high_lr": 0.00040578947368421055, "low_lr": 8.115789473684212e-06, "step": 1129 }, { "epoch": 2.9690992767915843, "high_lr": 0.00040578947368421055, "low_lr": 8.115789473684212e-06, "step": 1129 }, { "epoch": 2.9690992767915843, "high_lr": 0.00040578947368421055, "low_lr": 8.115789473684212e-06, "step": 1129 }, { "epoch": 2.9690992767915843, "high_lr": 0.00040578947368421055, "low_lr": 8.115789473684212e-06, "step": 1129 }, { "epoch": 2.9690992767915843, "high_lr": 0.00040578947368421055, "low_lr": 8.115789473684212e-06, "step": 1129 }, { "epoch": 2.9690992767915843, "high_lr": 0.00040578947368421055, "low_lr": 8.115789473684212e-06, "step": 1129 }, { "epoch": 2.9690992767915843, "high_lr": 0.00040578947368421055, "low_lr": 8.115789473684212e-06, "step": 1129 }, { "epoch": 2.9690992767915843, "high_lr": 0.00040578947368421055, "low_lr": 8.115789473684212e-06, "step": 1129 }, { "epoch": 2.9717291255752794, "grad_norm": 1.4507331848144531, "learning_rate": 0.00040526315789473684, "loss": 1.3616, "step": 1130 }, { "epoch": 2.9717291255752794, "high_lr": 0.00040526315789473684, "low_lr": 8.105263157894736e-06, "step": 1130 }, { "epoch": 2.9717291255752794, "high_lr": 0.00040526315789473684, "low_lr": 8.105263157894736e-06, "step": 1130 }, { "epoch": 2.9717291255752794, "high_lr": 0.00040526315789473684, "low_lr": 8.105263157894736e-06, "step": 1130 }, { "epoch": 2.9717291255752794, "high_lr": 0.00040526315789473684, "low_lr": 8.105263157894736e-06, "step": 1130 }, { "epoch": 2.9717291255752794, "high_lr": 0.00040526315789473684, "low_lr": 8.105263157894736e-06, "step": 1130 }, { "epoch": 2.9717291255752794, "high_lr": 0.00040526315789473684, "low_lr": 8.105263157894736e-06, "step": 1130 }, { "epoch": 2.9717291255752794, "high_lr": 0.00040526315789473684, "low_lr": 8.105263157894736e-06, "step": 1130 }, { "epoch": 2.9717291255752794, "high_lr": 0.00040526315789473684, "low_lr": 8.105263157894736e-06, "step": 1130 }, { "epoch": 2.9743589743589745, "grad_norm": 1.286329984664917, "learning_rate": 0.0004047368421052632, "loss": 1.466, "step": 1131 }, { "epoch": 2.9743589743589745, "high_lr": 0.0004047368421052632, "low_lr": 8.094736842105264e-06, "step": 1131 }, { "epoch": 2.9743589743589745, "high_lr": 0.0004047368421052632, "low_lr": 8.094736842105264e-06, "step": 1131 }, { "epoch": 2.9743589743589745, "high_lr": 0.0004047368421052632, "low_lr": 8.094736842105264e-06, "step": 1131 }, { "epoch": 2.9743589743589745, "high_lr": 0.0004047368421052632, "low_lr": 8.094736842105264e-06, "step": 1131 }, { "epoch": 2.9743589743589745, "high_lr": 0.0004047368421052632, "low_lr": 8.094736842105264e-06, "step": 1131 }, { "epoch": 2.9743589743589745, "high_lr": 0.0004047368421052632, "low_lr": 8.094736842105264e-06, "step": 1131 }, { "epoch": 2.9743589743589745, "high_lr": 0.0004047368421052632, "low_lr": 8.094736842105264e-06, "step": 1131 }, { "epoch": 2.9743589743589745, "high_lr": 0.0004047368421052632, "low_lr": 8.094736842105264e-06, "step": 1131 }, { "epoch": 2.976988823142669, "grad_norm": 1.3008004426956177, "learning_rate": 0.00040421052631578946, "loss": 1.3937, "step": 1132 }, { "epoch": 2.976988823142669, "high_lr": 0.00040421052631578946, "low_lr": 8.08421052631579e-06, "step": 1132 }, { "epoch": 2.976988823142669, "high_lr": 0.00040421052631578946, "low_lr": 8.08421052631579e-06, "step": 1132 }, { "epoch": 2.976988823142669, "high_lr": 0.00040421052631578946, "low_lr": 8.08421052631579e-06, "step": 1132 }, { "epoch": 2.976988823142669, "high_lr": 0.00040421052631578946, "low_lr": 8.08421052631579e-06, "step": 1132 }, { "epoch": 2.976988823142669, "high_lr": 0.00040421052631578946, "low_lr": 8.08421052631579e-06, "step": 1132 }, { "epoch": 2.976988823142669, "high_lr": 0.00040421052631578946, "low_lr": 8.08421052631579e-06, "step": 1132 }, { "epoch": 2.976988823142669, "high_lr": 0.00040421052631578946, "low_lr": 8.08421052631579e-06, "step": 1132 }, { "epoch": 2.976988823142669, "high_lr": 0.00040421052631578946, "low_lr": 8.08421052631579e-06, "step": 1132 }, { "epoch": 2.979618671926364, "grad_norm": 1.2866188287734985, "learning_rate": 0.0004036842105263158, "loss": 1.4485, "step": 1133 }, { "epoch": 2.979618671926364, "high_lr": 0.0004036842105263158, "low_lr": 8.073684210526317e-06, "step": 1133 }, { "epoch": 2.979618671926364, "high_lr": 0.0004036842105263158, "low_lr": 8.073684210526317e-06, "step": 1133 }, { "epoch": 2.979618671926364, "high_lr": 0.0004036842105263158, "low_lr": 8.073684210526317e-06, "step": 1133 }, { "epoch": 2.979618671926364, "high_lr": 0.0004036842105263158, "low_lr": 8.073684210526317e-06, "step": 1133 }, { "epoch": 2.979618671926364, "high_lr": 0.0004036842105263158, "low_lr": 8.073684210526317e-06, "step": 1133 }, { "epoch": 2.979618671926364, "high_lr": 0.0004036842105263158, "low_lr": 8.073684210526317e-06, "step": 1133 }, { "epoch": 2.979618671926364, "high_lr": 0.0004036842105263158, "low_lr": 8.073684210526317e-06, "step": 1133 }, { "epoch": 2.979618671926364, "high_lr": 0.0004036842105263158, "low_lr": 8.073684210526317e-06, "step": 1133 }, { "epoch": 2.9822485207100593, "grad_norm": 1.3842809200286865, "learning_rate": 0.0004031578947368421, "loss": 1.4056, "step": 1134 }, { "epoch": 2.9822485207100593, "high_lr": 0.0004031578947368421, "low_lr": 8.063157894736843e-06, "step": 1134 }, { "epoch": 2.9822485207100593, "high_lr": 0.0004031578947368421, "low_lr": 8.063157894736843e-06, "step": 1134 }, { "epoch": 2.9822485207100593, "high_lr": 0.0004031578947368421, "low_lr": 8.063157894736843e-06, "step": 1134 }, { "epoch": 2.9822485207100593, "high_lr": 0.0004031578947368421, "low_lr": 8.063157894736843e-06, "step": 1134 }, { "epoch": 2.9822485207100593, "high_lr": 0.0004031578947368421, "low_lr": 8.063157894736843e-06, "step": 1134 }, { "epoch": 2.9822485207100593, "high_lr": 0.0004031578947368421, "low_lr": 8.063157894736843e-06, "step": 1134 }, { "epoch": 2.9822485207100593, "high_lr": 0.0004031578947368421, "low_lr": 8.063157894736843e-06, "step": 1134 }, { "epoch": 2.9822485207100593, "high_lr": 0.0004031578947368421, "low_lr": 8.063157894736843e-06, "step": 1134 }, { "epoch": 2.984878369493754, "grad_norm": 1.3081294298171997, "learning_rate": 0.00040263157894736843, "loss": 1.3381, "step": 1135 }, { "epoch": 2.984878369493754, "high_lr": 0.00040263157894736843, "low_lr": 8.052631578947368e-06, "step": 1135 }, { "epoch": 2.984878369493754, "high_lr": 0.00040263157894736843, "low_lr": 8.052631578947368e-06, "step": 1135 }, { "epoch": 2.984878369493754, "high_lr": 0.00040263157894736843, "low_lr": 8.052631578947368e-06, "step": 1135 }, { "epoch": 2.984878369493754, "high_lr": 0.00040263157894736843, "low_lr": 8.052631578947368e-06, "step": 1135 }, { "epoch": 2.984878369493754, "high_lr": 0.00040263157894736843, "low_lr": 8.052631578947368e-06, "step": 1135 }, { "epoch": 2.984878369493754, "high_lr": 0.00040263157894736843, "low_lr": 8.052631578947368e-06, "step": 1135 }, { "epoch": 2.984878369493754, "high_lr": 0.00040263157894736843, "low_lr": 8.052631578947368e-06, "step": 1135 }, { "epoch": 2.984878369493754, "high_lr": 0.00040263157894736843, "low_lr": 8.052631578947368e-06, "step": 1135 }, { "epoch": 2.987508218277449, "grad_norm": 1.2561933994293213, "learning_rate": 0.00040210526315789477, "loss": 1.371, "step": 1136 }, { "epoch": 2.987508218277449, "high_lr": 0.00040210526315789477, "low_lr": 8.042105263157896e-06, "step": 1136 }, { "epoch": 2.987508218277449, "high_lr": 0.00040210526315789477, "low_lr": 8.042105263157896e-06, "step": 1136 }, { "epoch": 2.987508218277449, "high_lr": 0.00040210526315789477, "low_lr": 8.042105263157896e-06, "step": 1136 }, { "epoch": 2.987508218277449, "high_lr": 0.00040210526315789477, "low_lr": 8.042105263157896e-06, "step": 1136 }, { "epoch": 2.987508218277449, "high_lr": 0.00040210526315789477, "low_lr": 8.042105263157896e-06, "step": 1136 }, { "epoch": 2.987508218277449, "high_lr": 0.00040210526315789477, "low_lr": 8.042105263157896e-06, "step": 1136 }, { "epoch": 2.987508218277449, "high_lr": 0.00040210526315789477, "low_lr": 8.042105263157896e-06, "step": 1136 }, { "epoch": 2.987508218277449, "high_lr": 0.00040210526315789477, "low_lr": 8.042105263157896e-06, "step": 1136 }, { "epoch": 2.990138067061144, "grad_norm": 1.2555829286575317, "learning_rate": 0.00040157894736842105, "loss": 1.3887, "step": 1137 }, { "epoch": 2.990138067061144, "high_lr": 0.00040157894736842105, "low_lr": 8.03157894736842e-06, "step": 1137 }, { "epoch": 2.990138067061144, "high_lr": 0.00040157894736842105, "low_lr": 8.03157894736842e-06, "step": 1137 }, { "epoch": 2.990138067061144, "high_lr": 0.00040157894736842105, "low_lr": 8.03157894736842e-06, "step": 1137 }, { "epoch": 2.990138067061144, "high_lr": 0.00040157894736842105, "low_lr": 8.03157894736842e-06, "step": 1137 }, { "epoch": 2.990138067061144, "high_lr": 0.00040157894736842105, "low_lr": 8.03157894736842e-06, "step": 1137 }, { "epoch": 2.990138067061144, "high_lr": 0.00040157894736842105, "low_lr": 8.03157894736842e-06, "step": 1137 }, { "epoch": 2.990138067061144, "high_lr": 0.00040157894736842105, "low_lr": 8.03157894736842e-06, "step": 1137 }, { "epoch": 2.990138067061144, "high_lr": 0.00040157894736842105, "low_lr": 8.03157894736842e-06, "step": 1137 }, { "epoch": 2.992767915844839, "grad_norm": 1.3083648681640625, "learning_rate": 0.0004010526315789474, "loss": 1.4056, "step": 1138 }, { "epoch": 2.992767915844839, "high_lr": 0.0004010526315789474, "low_lr": 8.021052631578949e-06, "step": 1138 }, { "epoch": 2.992767915844839, "high_lr": 0.0004010526315789474, "low_lr": 8.021052631578949e-06, "step": 1138 }, { "epoch": 2.992767915844839, "high_lr": 0.0004010526315789474, "low_lr": 8.021052631578949e-06, "step": 1138 }, { "epoch": 2.992767915844839, "high_lr": 0.0004010526315789474, "low_lr": 8.021052631578949e-06, "step": 1138 }, { "epoch": 2.992767915844839, "high_lr": 0.0004010526315789474, "low_lr": 8.021052631578949e-06, "step": 1138 }, { "epoch": 2.992767915844839, "high_lr": 0.0004010526315789474, "low_lr": 8.021052631578949e-06, "step": 1138 }, { "epoch": 2.992767915844839, "high_lr": 0.0004010526315789474, "low_lr": 8.021052631578949e-06, "step": 1138 }, { "epoch": 2.992767915844839, "high_lr": 0.0004010526315789474, "low_lr": 8.021052631578949e-06, "step": 1138 }, { "epoch": 2.995397764628534, "grad_norm": 1.28179132938385, "learning_rate": 0.0004005263157894737, "loss": 1.3428, "step": 1139 }, { "epoch": 2.995397764628534, "high_lr": 0.0004005263157894737, "low_lr": 8.010526315789473e-06, "step": 1139 }, { "epoch": 2.995397764628534, "high_lr": 0.0004005263157894737, "low_lr": 8.010526315789473e-06, "step": 1139 }, { "epoch": 2.995397764628534, "high_lr": 0.0004005263157894737, "low_lr": 8.010526315789473e-06, "step": 1139 }, { "epoch": 2.995397764628534, "high_lr": 0.0004005263157894737, "low_lr": 8.010526315789473e-06, "step": 1139 }, { "epoch": 2.995397764628534, "high_lr": 0.0004005263157894737, "low_lr": 8.010526315789473e-06, "step": 1139 }, { "epoch": 2.995397764628534, "high_lr": 0.0004005263157894737, "low_lr": 8.010526315789473e-06, "step": 1139 }, { "epoch": 2.995397764628534, "high_lr": 0.0004005263157894737, "low_lr": 8.010526315789473e-06, "step": 1139 }, { "epoch": 2.995397764628534, "high_lr": 0.0004005263157894737, "low_lr": 8.010526315789473e-06, "step": 1139 }, { "epoch": 2.998027613412229, "grad_norm": 1.225212574005127, "learning_rate": 0.0004, "loss": 1.38, "step": 1140 }, { "epoch": 2.998027613412229, "high_lr": 0.0004, "low_lr": 8.000000000000001e-06, "step": 1140 }, { "epoch": 2.998027613412229, "high_lr": 0.0004, "low_lr": 8.000000000000001e-06, "step": 1140 }, { "epoch": 2.998027613412229, "high_lr": 0.0004, "low_lr": 8.000000000000001e-06, "step": 1140 }, { "epoch": 2.998027613412229, "high_lr": 0.0004, "low_lr": 8.000000000000001e-06, "step": 1140 }, { "epoch": 2.998027613412229, "high_lr": 0.0004, "low_lr": 8.000000000000001e-06, "step": 1140 }, { "epoch": 2.998027613412229, "high_lr": 0.0004, "low_lr": 8.000000000000001e-06, "step": 1140 }, { "epoch": 2.998027613412229, "high_lr": 0.0004, "low_lr": 8.000000000000001e-06, "step": 1140 }, { "epoch": 2.998027613412229, "high_lr": 0.0004, "low_lr": 8.000000000000001e-06, "step": 1140 }, { "epoch": 3.0006574621959237, "grad_norm": 1.3067548274993896, "learning_rate": 0.0003994736842105263, "loss": 1.367, "step": 1141 }, { "epoch": 3.0006574621959237, "high_lr": 0.0003994736842105263, "low_lr": 7.989473684210528e-06, "step": 1141 }, { "epoch": 3.0006574621959237, "high_lr": 0.0003994736842105263, "low_lr": 7.989473684210528e-06, "step": 1141 }, { "epoch": 3.0006574621959237, "high_lr": 0.0003994736842105263, "low_lr": 7.989473684210528e-06, "step": 1141 }, { "epoch": 3.0006574621959237, "high_lr": 0.0003994736842105263, "low_lr": 7.989473684210528e-06, "step": 1141 }, { "epoch": 3.0006574621959237, "high_lr": 0.0003994736842105263, "low_lr": 7.989473684210528e-06, "step": 1141 }, { "epoch": 3.0006574621959237, "high_lr": 0.0003994736842105263, "low_lr": 7.989473684210528e-06, "step": 1141 }, { "epoch": 3.0006574621959237, "high_lr": 0.0003994736842105263, "low_lr": 7.989473684210528e-06, "step": 1141 }, { "epoch": 3.0006574621959237, "high_lr": 0.0003994736842105263, "low_lr": 7.989473684210528e-06, "step": 1141 }, { "epoch": 3.0032873109796188, "grad_norm": 1.2940045595169067, "learning_rate": 0.0003989473684210526, "loss": 1.3059, "step": 1142 }, { "epoch": 3.0032873109796188, "high_lr": 0.0003989473684210526, "low_lr": 7.978947368421052e-06, "step": 1142 }, { "epoch": 3.0032873109796188, "high_lr": 0.0003989473684210526, "low_lr": 7.978947368421052e-06, "step": 1142 }, { "epoch": 3.0032873109796188, "high_lr": 0.0003989473684210526, "low_lr": 7.978947368421052e-06, "step": 1142 }, { "epoch": 3.0032873109796188, "high_lr": 0.0003989473684210526, "low_lr": 7.978947368421052e-06, "step": 1142 }, { "epoch": 3.0032873109796188, "high_lr": 0.0003989473684210526, "low_lr": 7.978947368421052e-06, "step": 1142 }, { "epoch": 3.0032873109796188, "high_lr": 0.0003989473684210526, "low_lr": 7.978947368421052e-06, "step": 1142 }, { "epoch": 3.0032873109796188, "high_lr": 0.0003989473684210526, "low_lr": 7.978947368421052e-06, "step": 1142 }, { "epoch": 3.0032873109796188, "high_lr": 0.0003989473684210526, "low_lr": 7.978947368421052e-06, "step": 1142 }, { "epoch": 3.0059171597633134, "grad_norm": 1.3060542345046997, "learning_rate": 0.000398421052631579, "loss": 1.3071, "step": 1143 }, { "epoch": 3.0059171597633134, "high_lr": 0.000398421052631579, "low_lr": 7.96842105263158e-06, "step": 1143 }, { "epoch": 3.0059171597633134, "high_lr": 0.000398421052631579, "low_lr": 7.96842105263158e-06, "step": 1143 }, { "epoch": 3.0059171597633134, "high_lr": 0.000398421052631579, "low_lr": 7.96842105263158e-06, "step": 1143 }, { "epoch": 3.0059171597633134, "high_lr": 0.000398421052631579, "low_lr": 7.96842105263158e-06, "step": 1143 }, { "epoch": 3.0059171597633134, "high_lr": 0.000398421052631579, "low_lr": 7.96842105263158e-06, "step": 1143 }, { "epoch": 3.0059171597633134, "high_lr": 0.000398421052631579, "low_lr": 7.96842105263158e-06, "step": 1143 }, { "epoch": 3.0059171597633134, "high_lr": 0.000398421052631579, "low_lr": 7.96842105263158e-06, "step": 1143 }, { "epoch": 3.0059171597633134, "high_lr": 0.000398421052631579, "low_lr": 7.96842105263158e-06, "step": 1143 }, { "epoch": 3.0085470085470085, "grad_norm": 1.2439887523651123, "learning_rate": 0.00039789473684210527, "loss": 1.2963, "step": 1144 }, { "epoch": 3.0085470085470085, "high_lr": 0.00039789473684210527, "low_lr": 7.957894736842105e-06, "step": 1144 }, { "epoch": 3.0085470085470085, "high_lr": 0.00039789473684210527, "low_lr": 7.957894736842105e-06, "step": 1144 }, { "epoch": 3.0085470085470085, "high_lr": 0.00039789473684210527, "low_lr": 7.957894736842105e-06, "step": 1144 }, { "epoch": 3.0085470085470085, "high_lr": 0.00039789473684210527, "low_lr": 7.957894736842105e-06, "step": 1144 }, { "epoch": 3.0085470085470085, "high_lr": 0.00039789473684210527, "low_lr": 7.957894736842105e-06, "step": 1144 }, { "epoch": 3.0085470085470085, "high_lr": 0.00039789473684210527, "low_lr": 7.957894736842105e-06, "step": 1144 }, { "epoch": 3.0085470085470085, "high_lr": 0.00039789473684210527, "low_lr": 7.957894736842105e-06, "step": 1144 }, { "epoch": 3.0085470085470085, "high_lr": 0.00039789473684210527, "low_lr": 7.957894736842105e-06, "step": 1144 }, { "epoch": 3.0111768573307036, "grad_norm": 1.3645330667495728, "learning_rate": 0.0003973684210526316, "loss": 1.3169, "step": 1145 }, { "epoch": 3.0111768573307036, "high_lr": 0.0003973684210526316, "low_lr": 7.947368421052633e-06, "step": 1145 }, { "epoch": 3.0111768573307036, "high_lr": 0.0003973684210526316, "low_lr": 7.947368421052633e-06, "step": 1145 }, { "epoch": 3.0111768573307036, "high_lr": 0.0003973684210526316, "low_lr": 7.947368421052633e-06, "step": 1145 }, { "epoch": 3.0111768573307036, "high_lr": 0.0003973684210526316, "low_lr": 7.947368421052633e-06, "step": 1145 }, { "epoch": 3.0111768573307036, "high_lr": 0.0003973684210526316, "low_lr": 7.947368421052633e-06, "step": 1145 }, { "epoch": 3.0111768573307036, "high_lr": 0.0003973684210526316, "low_lr": 7.947368421052633e-06, "step": 1145 }, { "epoch": 3.0111768573307036, "high_lr": 0.0003973684210526316, "low_lr": 7.947368421052633e-06, "step": 1145 }, { "epoch": 3.0111768573307036, "high_lr": 0.0003973684210526316, "low_lr": 7.947368421052633e-06, "step": 1145 }, { "epoch": 3.0138067061143983, "grad_norm": 1.377105474472046, "learning_rate": 0.0003968421052631579, "loss": 1.2971, "step": 1146 }, { "epoch": 3.0138067061143983, "high_lr": 0.0003968421052631579, "low_lr": 7.936842105263158e-06, "step": 1146 }, { "epoch": 3.0138067061143983, "high_lr": 0.0003968421052631579, "low_lr": 7.936842105263158e-06, "step": 1146 }, { "epoch": 3.0138067061143983, "high_lr": 0.0003968421052631579, "low_lr": 7.936842105263158e-06, "step": 1146 }, { "epoch": 3.0138067061143983, "high_lr": 0.0003968421052631579, "low_lr": 7.936842105263158e-06, "step": 1146 }, { "epoch": 3.0138067061143983, "high_lr": 0.0003968421052631579, "low_lr": 7.936842105263158e-06, "step": 1146 }, { "epoch": 3.0138067061143983, "high_lr": 0.0003968421052631579, "low_lr": 7.936842105263158e-06, "step": 1146 }, { "epoch": 3.0138067061143983, "high_lr": 0.0003968421052631579, "low_lr": 7.936842105263158e-06, "step": 1146 }, { "epoch": 3.0138067061143983, "high_lr": 0.0003968421052631579, "low_lr": 7.936842105263158e-06, "step": 1146 }, { "epoch": 3.0164365548980934, "grad_norm": 1.2766972780227661, "learning_rate": 0.00039631578947368424, "loss": 1.3058, "step": 1147 }, { "epoch": 3.0164365548980934, "high_lr": 0.00039631578947368424, "low_lr": 7.926315789473686e-06, "step": 1147 }, { "epoch": 3.0164365548980934, "high_lr": 0.00039631578947368424, "low_lr": 7.926315789473686e-06, "step": 1147 }, { "epoch": 3.0164365548980934, "high_lr": 0.00039631578947368424, "low_lr": 7.926315789473686e-06, "step": 1147 }, { "epoch": 3.0164365548980934, "high_lr": 0.00039631578947368424, "low_lr": 7.926315789473686e-06, "step": 1147 }, { "epoch": 3.0164365548980934, "high_lr": 0.00039631578947368424, "low_lr": 7.926315789473686e-06, "step": 1147 }, { "epoch": 3.0164365548980934, "high_lr": 0.00039631578947368424, "low_lr": 7.926315789473686e-06, "step": 1147 }, { "epoch": 3.0164365548980934, "high_lr": 0.00039631578947368424, "low_lr": 7.926315789473686e-06, "step": 1147 }, { "epoch": 3.0164365548980934, "high_lr": 0.00039631578947368424, "low_lr": 7.926315789473686e-06, "step": 1147 }, { "epoch": 3.0190664036817885, "grad_norm": 1.2870296239852905, "learning_rate": 0.0003957894736842105, "loss": 1.3569, "step": 1148 }, { "epoch": 3.0190664036817885, "high_lr": 0.0003957894736842105, "low_lr": 7.915789473684212e-06, "step": 1148 }, { "epoch": 3.0190664036817885, "high_lr": 0.0003957894736842105, "low_lr": 7.915789473684212e-06, "step": 1148 }, { "epoch": 3.0190664036817885, "high_lr": 0.0003957894736842105, "low_lr": 7.915789473684212e-06, "step": 1148 }, { "epoch": 3.0190664036817885, "high_lr": 0.0003957894736842105, "low_lr": 7.915789473684212e-06, "step": 1148 }, { "epoch": 3.0190664036817885, "high_lr": 0.0003957894736842105, "low_lr": 7.915789473684212e-06, "step": 1148 }, { "epoch": 3.0190664036817885, "high_lr": 0.0003957894736842105, "low_lr": 7.915789473684212e-06, "step": 1148 }, { "epoch": 3.0190664036817885, "high_lr": 0.0003957894736842105, "low_lr": 7.915789473684212e-06, "step": 1148 }, { "epoch": 3.0190664036817885, "high_lr": 0.0003957894736842105, "low_lr": 7.915789473684212e-06, "step": 1148 }, { "epoch": 3.021696252465483, "grad_norm": 1.3098727464675903, "learning_rate": 0.0003952631578947368, "loss": 1.3159, "step": 1149 }, { "epoch": 3.021696252465483, "high_lr": 0.0003952631578947368, "low_lr": 7.905263157894737e-06, "step": 1149 }, { "epoch": 3.021696252465483, "high_lr": 0.0003952631578947368, "low_lr": 7.905263157894737e-06, "step": 1149 }, { "epoch": 3.021696252465483, "high_lr": 0.0003952631578947368, "low_lr": 7.905263157894737e-06, "step": 1149 }, { "epoch": 3.021696252465483, "high_lr": 0.0003952631578947368, "low_lr": 7.905263157894737e-06, "step": 1149 }, { "epoch": 3.021696252465483, "high_lr": 0.0003952631578947368, "low_lr": 7.905263157894737e-06, "step": 1149 }, { "epoch": 3.021696252465483, "high_lr": 0.0003952631578947368, "low_lr": 7.905263157894737e-06, "step": 1149 }, { "epoch": 3.021696252465483, "high_lr": 0.0003952631578947368, "low_lr": 7.905263157894737e-06, "step": 1149 }, { "epoch": 3.021696252465483, "high_lr": 0.0003952631578947368, "low_lr": 7.905263157894737e-06, "step": 1149 }, { "epoch": 3.0243261012491782, "grad_norm": 1.410931944847107, "learning_rate": 0.00039473684210526315, "loss": 1.3034, "step": 1150 }, { "epoch": 3.0243261012491782, "high_lr": 0.00039473684210526315, "low_lr": 7.894736842105265e-06, "step": 1150 }, { "epoch": 3.0243261012491782, "high_lr": 0.00039473684210526315, "low_lr": 7.894736842105265e-06, "step": 1150 }, { "epoch": 3.0243261012491782, "high_lr": 0.00039473684210526315, "low_lr": 7.894736842105265e-06, "step": 1150 }, { "epoch": 3.0243261012491782, "high_lr": 0.00039473684210526315, "low_lr": 7.894736842105265e-06, "step": 1150 }, { "epoch": 3.0243261012491782, "high_lr": 0.00039473684210526315, "low_lr": 7.894736842105265e-06, "step": 1150 }, { "epoch": 3.0243261012491782, "high_lr": 0.00039473684210526315, "low_lr": 7.894736842105265e-06, "step": 1150 }, { "epoch": 3.0243261012491782, "high_lr": 0.00039473684210526315, "low_lr": 7.894736842105265e-06, "step": 1150 }, { "epoch": 3.0243261012491782, "high_lr": 0.00039473684210526315, "low_lr": 7.894736842105265e-06, "step": 1150 }, { "epoch": 3.0269559500328733, "grad_norm": 1.3159284591674805, "learning_rate": 0.0003942105263157895, "loss": 1.2683, "step": 1151 }, { "epoch": 3.0269559500328733, "high_lr": 0.0003942105263157895, "low_lr": 7.88421052631579e-06, "step": 1151 }, { "epoch": 3.0269559500328733, "high_lr": 0.0003942105263157895, "low_lr": 7.88421052631579e-06, "step": 1151 }, { "epoch": 3.0269559500328733, "high_lr": 0.0003942105263157895, "low_lr": 7.88421052631579e-06, "step": 1151 }, { "epoch": 3.0269559500328733, "high_lr": 0.0003942105263157895, "low_lr": 7.88421052631579e-06, "step": 1151 }, { "epoch": 3.0269559500328733, "high_lr": 0.0003942105263157895, "low_lr": 7.88421052631579e-06, "step": 1151 }, { "epoch": 3.0269559500328733, "high_lr": 0.0003942105263157895, "low_lr": 7.88421052631579e-06, "step": 1151 }, { "epoch": 3.0269559500328733, "high_lr": 0.0003942105263157895, "low_lr": 7.88421052631579e-06, "step": 1151 }, { "epoch": 3.0269559500328733, "high_lr": 0.0003942105263157895, "low_lr": 7.88421052631579e-06, "step": 1151 }, { "epoch": 3.029585798816568, "grad_norm": 1.3127446174621582, "learning_rate": 0.00039368421052631583, "loss": 1.2996, "step": 1152 }, { "epoch": 3.029585798816568, "high_lr": 0.00039368421052631583, "low_lr": 7.873684210526317e-06, "step": 1152 }, { "epoch": 3.029585798816568, "high_lr": 0.00039368421052631583, "low_lr": 7.873684210526317e-06, "step": 1152 }, { "epoch": 3.029585798816568, "high_lr": 0.00039368421052631583, "low_lr": 7.873684210526317e-06, "step": 1152 }, { "epoch": 3.029585798816568, "high_lr": 0.00039368421052631583, "low_lr": 7.873684210526317e-06, "step": 1152 }, { "epoch": 3.029585798816568, "high_lr": 0.00039368421052631583, "low_lr": 7.873684210526317e-06, "step": 1152 }, { "epoch": 3.029585798816568, "high_lr": 0.00039368421052631583, "low_lr": 7.873684210526317e-06, "step": 1152 }, { "epoch": 3.029585798816568, "high_lr": 0.00039368421052631583, "low_lr": 7.873684210526317e-06, "step": 1152 }, { "epoch": 3.029585798816568, "high_lr": 0.00039368421052631583, "low_lr": 7.873684210526317e-06, "step": 1152 }, { "epoch": 3.032215647600263, "grad_norm": 1.3679771423339844, "learning_rate": 0.0003931578947368421, "loss": 1.3208, "step": 1153 }, { "epoch": 3.032215647600263, "high_lr": 0.0003931578947368421, "low_lr": 7.863157894736842e-06, "step": 1153 }, { "epoch": 3.032215647600263, "high_lr": 0.0003931578947368421, "low_lr": 7.863157894736842e-06, "step": 1153 }, { "epoch": 3.032215647600263, "high_lr": 0.0003931578947368421, "low_lr": 7.863157894736842e-06, "step": 1153 }, { "epoch": 3.032215647600263, "high_lr": 0.0003931578947368421, "low_lr": 7.863157894736842e-06, "step": 1153 }, { "epoch": 3.032215647600263, "high_lr": 0.0003931578947368421, "low_lr": 7.863157894736842e-06, "step": 1153 }, { "epoch": 3.032215647600263, "high_lr": 0.0003931578947368421, "low_lr": 7.863157894736842e-06, "step": 1153 }, { "epoch": 3.032215647600263, "high_lr": 0.0003931578947368421, "low_lr": 7.863157894736842e-06, "step": 1153 }, { "epoch": 3.032215647600263, "high_lr": 0.0003931578947368421, "low_lr": 7.863157894736842e-06, "step": 1153 }, { "epoch": 3.0348454963839577, "grad_norm": 1.3075698614120483, "learning_rate": 0.00039263157894736846, "loss": 1.3233, "step": 1154 }, { "epoch": 3.0348454963839577, "high_lr": 0.00039263157894736846, "low_lr": 7.85263157894737e-06, "step": 1154 }, { "epoch": 3.0348454963839577, "high_lr": 0.00039263157894736846, "low_lr": 7.85263157894737e-06, "step": 1154 }, { "epoch": 3.0348454963839577, "high_lr": 0.00039263157894736846, "low_lr": 7.85263157894737e-06, "step": 1154 }, { "epoch": 3.0348454963839577, "high_lr": 0.00039263157894736846, "low_lr": 7.85263157894737e-06, "step": 1154 }, { "epoch": 3.0348454963839577, "high_lr": 0.00039263157894736846, "low_lr": 7.85263157894737e-06, "step": 1154 }, { "epoch": 3.0348454963839577, "high_lr": 0.00039263157894736846, "low_lr": 7.85263157894737e-06, "step": 1154 }, { "epoch": 3.0348454963839577, "high_lr": 0.00039263157894736846, "low_lr": 7.85263157894737e-06, "step": 1154 }, { "epoch": 3.0348454963839577, "high_lr": 0.00039263157894736846, "low_lr": 7.85263157894737e-06, "step": 1154 }, { "epoch": 3.037475345167653, "grad_norm": 1.3870766162872314, "learning_rate": 0.00039210526315789474, "loss": 1.3746, "step": 1155 }, { "epoch": 3.037475345167653, "high_lr": 0.00039210526315789474, "low_lr": 7.842105263157895e-06, "step": 1155 }, { "epoch": 3.037475345167653, "high_lr": 0.00039210526315789474, "low_lr": 7.842105263157895e-06, "step": 1155 }, { "epoch": 3.037475345167653, "high_lr": 0.00039210526315789474, "low_lr": 7.842105263157895e-06, "step": 1155 }, { "epoch": 3.037475345167653, "high_lr": 0.00039210526315789474, "low_lr": 7.842105263157895e-06, "step": 1155 }, { "epoch": 3.037475345167653, "high_lr": 0.00039210526315789474, "low_lr": 7.842105263157895e-06, "step": 1155 }, { "epoch": 3.037475345167653, "high_lr": 0.00039210526315789474, "low_lr": 7.842105263157895e-06, "step": 1155 }, { "epoch": 3.037475345167653, "high_lr": 0.00039210526315789474, "low_lr": 7.842105263157895e-06, "step": 1155 }, { "epoch": 3.037475345167653, "high_lr": 0.00039210526315789474, "low_lr": 7.842105263157895e-06, "step": 1155 }, { "epoch": 3.040105193951348, "grad_norm": 1.500157117843628, "learning_rate": 0.000391578947368421, "loss": 1.2875, "step": 1156 }, { "epoch": 3.040105193951348, "high_lr": 0.000391578947368421, "low_lr": 7.831578947368421e-06, "step": 1156 }, { "epoch": 3.040105193951348, "high_lr": 0.000391578947368421, "low_lr": 7.831578947368421e-06, "step": 1156 }, { "epoch": 3.040105193951348, "high_lr": 0.000391578947368421, "low_lr": 7.831578947368421e-06, "step": 1156 }, { "epoch": 3.040105193951348, "high_lr": 0.000391578947368421, "low_lr": 7.831578947368421e-06, "step": 1156 }, { "epoch": 3.040105193951348, "high_lr": 0.000391578947368421, "low_lr": 7.831578947368421e-06, "step": 1156 }, { "epoch": 3.040105193951348, "high_lr": 0.000391578947368421, "low_lr": 7.831578947368421e-06, "step": 1156 }, { "epoch": 3.040105193951348, "high_lr": 0.000391578947368421, "low_lr": 7.831578947368421e-06, "step": 1156 }, { "epoch": 3.040105193951348, "high_lr": 0.000391578947368421, "low_lr": 7.831578947368421e-06, "step": 1156 }, { "epoch": 3.0427350427350426, "grad_norm": 1.3098334074020386, "learning_rate": 0.00039105263157894737, "loss": 1.2819, "step": 1157 }, { "epoch": 3.0427350427350426, "high_lr": 0.00039105263157894737, "low_lr": 7.821052631578949e-06, "step": 1157 }, { "epoch": 3.0427350427350426, "high_lr": 0.00039105263157894737, "low_lr": 7.821052631578949e-06, "step": 1157 }, { "epoch": 3.0427350427350426, "high_lr": 0.00039105263157894737, "low_lr": 7.821052631578949e-06, "step": 1157 }, { "epoch": 3.0427350427350426, "high_lr": 0.00039105263157894737, "low_lr": 7.821052631578949e-06, "step": 1157 }, { "epoch": 3.0427350427350426, "high_lr": 0.00039105263157894737, "low_lr": 7.821052631578949e-06, "step": 1157 }, { "epoch": 3.0427350427350426, "high_lr": 0.00039105263157894737, "low_lr": 7.821052631578949e-06, "step": 1157 }, { "epoch": 3.0427350427350426, "high_lr": 0.00039105263157894737, "low_lr": 7.821052631578949e-06, "step": 1157 }, { "epoch": 3.0427350427350426, "high_lr": 0.00039105263157894737, "low_lr": 7.821052631578949e-06, "step": 1157 }, { "epoch": 3.0453648915187377, "grad_norm": 1.370381474494934, "learning_rate": 0.00039052631578947365, "loss": 1.3062, "step": 1158 }, { "epoch": 3.0453648915187377, "high_lr": 0.00039052631578947365, "low_lr": 7.810526315789474e-06, "step": 1158 }, { "epoch": 3.0453648915187377, "high_lr": 0.00039052631578947365, "low_lr": 7.810526315789474e-06, "step": 1158 }, { "epoch": 3.0453648915187377, "high_lr": 0.00039052631578947365, "low_lr": 7.810526315789474e-06, "step": 1158 }, { "epoch": 3.0453648915187377, "high_lr": 0.00039052631578947365, "low_lr": 7.810526315789474e-06, "step": 1158 }, { "epoch": 3.0453648915187377, "high_lr": 0.00039052631578947365, "low_lr": 7.810526315789474e-06, "step": 1158 }, { "epoch": 3.0453648915187377, "high_lr": 0.00039052631578947365, "low_lr": 7.810526315789474e-06, "step": 1158 }, { "epoch": 3.0453648915187377, "high_lr": 0.00039052631578947365, "low_lr": 7.810526315789474e-06, "step": 1158 }, { "epoch": 3.0453648915187377, "high_lr": 0.00039052631578947365, "low_lr": 7.810526315789474e-06, "step": 1158 }, { "epoch": 3.047994740302433, "grad_norm": 1.270233154296875, "learning_rate": 0.00039000000000000005, "loss": 1.3884, "step": 1159 }, { "epoch": 3.047994740302433, "high_lr": 0.00039000000000000005, "low_lr": 7.800000000000002e-06, "step": 1159 }, { "epoch": 3.047994740302433, "high_lr": 0.00039000000000000005, "low_lr": 7.800000000000002e-06, "step": 1159 }, { "epoch": 3.047994740302433, "high_lr": 0.00039000000000000005, "low_lr": 7.800000000000002e-06, "step": 1159 }, { "epoch": 3.047994740302433, "high_lr": 0.00039000000000000005, "low_lr": 7.800000000000002e-06, "step": 1159 }, { "epoch": 3.047994740302433, "high_lr": 0.00039000000000000005, "low_lr": 7.800000000000002e-06, "step": 1159 }, { "epoch": 3.047994740302433, "high_lr": 0.00039000000000000005, "low_lr": 7.800000000000002e-06, "step": 1159 }, { "epoch": 3.047994740302433, "high_lr": 0.00039000000000000005, "low_lr": 7.800000000000002e-06, "step": 1159 }, { "epoch": 3.047994740302433, "high_lr": 0.00039000000000000005, "low_lr": 7.800000000000002e-06, "step": 1159 }, { "epoch": 3.0506245890861274, "grad_norm": 1.2626560926437378, "learning_rate": 0.00038947368421052633, "loss": 1.3011, "step": 1160 }, { "epoch": 3.0506245890861274, "high_lr": 0.00038947368421052633, "low_lr": 7.789473684210526e-06, "step": 1160 }, { "epoch": 3.0506245890861274, "high_lr": 0.00038947368421052633, "low_lr": 7.789473684210526e-06, "step": 1160 }, { "epoch": 3.0506245890861274, "high_lr": 0.00038947368421052633, "low_lr": 7.789473684210526e-06, "step": 1160 }, { "epoch": 3.0506245890861274, "high_lr": 0.00038947368421052633, "low_lr": 7.789473684210526e-06, "step": 1160 }, { "epoch": 3.0506245890861274, "high_lr": 0.00038947368421052633, "low_lr": 7.789473684210526e-06, "step": 1160 }, { "epoch": 3.0506245890861274, "high_lr": 0.00038947368421052633, "low_lr": 7.789473684210526e-06, "step": 1160 }, { "epoch": 3.0506245890861274, "high_lr": 0.00038947368421052633, "low_lr": 7.789473684210526e-06, "step": 1160 }, { "epoch": 3.0506245890861274, "high_lr": 0.00038947368421052633, "low_lr": 7.789473684210526e-06, "step": 1160 }, { "epoch": 3.0532544378698225, "grad_norm": 1.2719833850860596, "learning_rate": 0.0003889473684210527, "loss": 1.3133, "step": 1161 }, { "epoch": 3.0532544378698225, "high_lr": 0.0003889473684210527, "low_lr": 7.778947368421054e-06, "step": 1161 }, { "epoch": 3.0532544378698225, "high_lr": 0.0003889473684210527, "low_lr": 7.778947368421054e-06, "step": 1161 }, { "epoch": 3.0532544378698225, "high_lr": 0.0003889473684210527, "low_lr": 7.778947368421054e-06, "step": 1161 }, { "epoch": 3.0532544378698225, "high_lr": 0.0003889473684210527, "low_lr": 7.778947368421054e-06, "step": 1161 }, { "epoch": 3.0532544378698225, "high_lr": 0.0003889473684210527, "low_lr": 7.778947368421054e-06, "step": 1161 }, { "epoch": 3.0532544378698225, "high_lr": 0.0003889473684210527, "low_lr": 7.778947368421054e-06, "step": 1161 }, { "epoch": 3.0532544378698225, "high_lr": 0.0003889473684210527, "low_lr": 7.778947368421054e-06, "step": 1161 }, { "epoch": 3.0532544378698225, "high_lr": 0.0003889473684210527, "low_lr": 7.778947368421054e-06, "step": 1161 }, { "epoch": 3.0558842866535176, "grad_norm": 1.2456934452056885, "learning_rate": 0.00038842105263157896, "loss": 1.3058, "step": 1162 }, { "epoch": 3.0558842866535176, "high_lr": 0.00038842105263157896, "low_lr": 7.768421052631579e-06, "step": 1162 }, { "epoch": 3.0558842866535176, "high_lr": 0.00038842105263157896, "low_lr": 7.768421052631579e-06, "step": 1162 }, { "epoch": 3.0558842866535176, "high_lr": 0.00038842105263157896, "low_lr": 7.768421052631579e-06, "step": 1162 }, { "epoch": 3.0558842866535176, "high_lr": 0.00038842105263157896, "low_lr": 7.768421052631579e-06, "step": 1162 }, { "epoch": 3.0558842866535176, "high_lr": 0.00038842105263157896, "low_lr": 7.768421052631579e-06, "step": 1162 }, { "epoch": 3.0558842866535176, "high_lr": 0.00038842105263157896, "low_lr": 7.768421052631579e-06, "step": 1162 }, { "epoch": 3.0558842866535176, "high_lr": 0.00038842105263157896, "low_lr": 7.768421052631579e-06, "step": 1162 }, { "epoch": 3.0558842866535176, "high_lr": 0.00038842105263157896, "low_lr": 7.768421052631579e-06, "step": 1162 }, { "epoch": 3.0585141354372123, "grad_norm": 1.3651379346847534, "learning_rate": 0.00038789473684210524, "loss": 1.2902, "step": 1163 }, { "epoch": 3.0585141354372123, "high_lr": 0.00038789473684210524, "low_lr": 7.757894736842105e-06, "step": 1163 }, { "epoch": 3.0585141354372123, "high_lr": 0.00038789473684210524, "low_lr": 7.757894736842105e-06, "step": 1163 }, { "epoch": 3.0585141354372123, "high_lr": 0.00038789473684210524, "low_lr": 7.757894736842105e-06, "step": 1163 }, { "epoch": 3.0585141354372123, "high_lr": 0.00038789473684210524, "low_lr": 7.757894736842105e-06, "step": 1163 }, { "epoch": 3.0585141354372123, "high_lr": 0.00038789473684210524, "low_lr": 7.757894736842105e-06, "step": 1163 }, { "epoch": 3.0585141354372123, "high_lr": 0.00038789473684210524, "low_lr": 7.757894736842105e-06, "step": 1163 }, { "epoch": 3.0585141354372123, "high_lr": 0.00038789473684210524, "low_lr": 7.757894736842105e-06, "step": 1163 }, { "epoch": 3.0585141354372123, "high_lr": 0.00038789473684210524, "low_lr": 7.757894736842105e-06, "step": 1163 }, { "epoch": 3.0611439842209074, "grad_norm": 1.316124677658081, "learning_rate": 0.0003873684210526316, "loss": 1.2918, "step": 1164 }, { "epoch": 3.0611439842209074, "high_lr": 0.0003873684210526316, "low_lr": 7.747368421052631e-06, "step": 1164 }, { "epoch": 3.0611439842209074, "high_lr": 0.0003873684210526316, "low_lr": 7.747368421052631e-06, "step": 1164 }, { "epoch": 3.0611439842209074, "high_lr": 0.0003873684210526316, "low_lr": 7.747368421052631e-06, "step": 1164 }, { "epoch": 3.0611439842209074, "high_lr": 0.0003873684210526316, "low_lr": 7.747368421052631e-06, "step": 1164 }, { "epoch": 3.0611439842209074, "high_lr": 0.0003873684210526316, "low_lr": 7.747368421052631e-06, "step": 1164 }, { "epoch": 3.0611439842209074, "high_lr": 0.0003873684210526316, "low_lr": 7.747368421052631e-06, "step": 1164 }, { "epoch": 3.0611439842209074, "high_lr": 0.0003873684210526316, "low_lr": 7.747368421052631e-06, "step": 1164 }, { "epoch": 3.0611439842209074, "high_lr": 0.0003873684210526316, "low_lr": 7.747368421052631e-06, "step": 1164 }, { "epoch": 3.063773833004602, "grad_norm": 1.3913902044296265, "learning_rate": 0.00038684210526315787, "loss": 1.2928, "step": 1165 }, { "epoch": 3.063773833004602, "high_lr": 0.00038684210526315787, "low_lr": 7.736842105263158e-06, "step": 1165 }, { "epoch": 3.063773833004602, "high_lr": 0.00038684210526315787, "low_lr": 7.736842105263158e-06, "step": 1165 }, { "epoch": 3.063773833004602, "high_lr": 0.00038684210526315787, "low_lr": 7.736842105263158e-06, "step": 1165 }, { "epoch": 3.063773833004602, "high_lr": 0.00038684210526315787, "low_lr": 7.736842105263158e-06, "step": 1165 }, { "epoch": 3.063773833004602, "high_lr": 0.00038684210526315787, "low_lr": 7.736842105263158e-06, "step": 1165 }, { "epoch": 3.063773833004602, "high_lr": 0.00038684210526315787, "low_lr": 7.736842105263158e-06, "step": 1165 }, { "epoch": 3.063773833004602, "high_lr": 0.00038684210526315787, "low_lr": 7.736842105263158e-06, "step": 1165 }, { "epoch": 3.063773833004602, "high_lr": 0.00038684210526315787, "low_lr": 7.736842105263158e-06, "step": 1165 }, { "epoch": 3.066403681788297, "grad_norm": 1.357170581817627, "learning_rate": 0.0003863157894736842, "loss": 1.2819, "step": 1166 }, { "epoch": 3.066403681788297, "high_lr": 0.0003863157894736842, "low_lr": 7.726315789473686e-06, "step": 1166 }, { "epoch": 3.066403681788297, "high_lr": 0.0003863157894736842, "low_lr": 7.726315789473686e-06, "step": 1166 }, { "epoch": 3.066403681788297, "high_lr": 0.0003863157894736842, "low_lr": 7.726315789473686e-06, "step": 1166 }, { "epoch": 3.066403681788297, "high_lr": 0.0003863157894736842, "low_lr": 7.726315789473686e-06, "step": 1166 }, { "epoch": 3.066403681788297, "high_lr": 0.0003863157894736842, "low_lr": 7.726315789473686e-06, "step": 1166 }, { "epoch": 3.066403681788297, "high_lr": 0.0003863157894736842, "low_lr": 7.726315789473686e-06, "step": 1166 }, { "epoch": 3.066403681788297, "high_lr": 0.0003863157894736842, "low_lr": 7.726315789473686e-06, "step": 1166 }, { "epoch": 3.066403681788297, "high_lr": 0.0003863157894736842, "low_lr": 7.726315789473686e-06, "step": 1166 }, { "epoch": 3.0690335305719922, "grad_norm": 1.297806978225708, "learning_rate": 0.0003857894736842105, "loss": 1.2749, "step": 1167 }, { "epoch": 3.0690335305719922, "high_lr": 0.0003857894736842105, "low_lr": 7.71578947368421e-06, "step": 1167 }, { "epoch": 3.0690335305719922, "high_lr": 0.0003857894736842105, "low_lr": 7.71578947368421e-06, "step": 1167 }, { "epoch": 3.0690335305719922, "high_lr": 0.0003857894736842105, "low_lr": 7.71578947368421e-06, "step": 1167 }, { "epoch": 3.0690335305719922, "high_lr": 0.0003857894736842105, "low_lr": 7.71578947368421e-06, "step": 1167 }, { "epoch": 3.0690335305719922, "high_lr": 0.0003857894736842105, "low_lr": 7.71578947368421e-06, "step": 1167 }, { "epoch": 3.0690335305719922, "high_lr": 0.0003857894736842105, "low_lr": 7.71578947368421e-06, "step": 1167 }, { "epoch": 3.0690335305719922, "high_lr": 0.0003857894736842105, "low_lr": 7.71578947368421e-06, "step": 1167 }, { "epoch": 3.0690335305719922, "high_lr": 0.0003857894736842105, "low_lr": 7.71578947368421e-06, "step": 1167 }, { "epoch": 3.071663379355687, "grad_norm": 1.3861886262893677, "learning_rate": 0.0003852631578947369, "loss": 1.265, "step": 1168 }, { "epoch": 3.071663379355687, "high_lr": 0.0003852631578947369, "low_lr": 7.705263157894738e-06, "step": 1168 }, { "epoch": 3.071663379355687, "high_lr": 0.0003852631578947369, "low_lr": 7.705263157894738e-06, "step": 1168 }, { "epoch": 3.071663379355687, "high_lr": 0.0003852631578947369, "low_lr": 7.705263157894738e-06, "step": 1168 }, { "epoch": 3.071663379355687, "high_lr": 0.0003852631578947369, "low_lr": 7.705263157894738e-06, "step": 1168 }, { "epoch": 3.071663379355687, "high_lr": 0.0003852631578947369, "low_lr": 7.705263157894738e-06, "step": 1168 }, { "epoch": 3.071663379355687, "high_lr": 0.0003852631578947369, "low_lr": 7.705263157894738e-06, "step": 1168 }, { "epoch": 3.071663379355687, "high_lr": 0.0003852631578947369, "low_lr": 7.705263157894738e-06, "step": 1168 }, { "epoch": 3.071663379355687, "high_lr": 0.0003852631578947369, "low_lr": 7.705263157894738e-06, "step": 1168 }, { "epoch": 3.074293228139382, "grad_norm": 1.303112268447876, "learning_rate": 0.0003847368421052632, "loss": 1.2866, "step": 1169 }, { "epoch": 3.074293228139382, "high_lr": 0.0003847368421052632, "low_lr": 7.694736842105263e-06, "step": 1169 }, { "epoch": 3.074293228139382, "high_lr": 0.0003847368421052632, "low_lr": 7.694736842105263e-06, "step": 1169 }, { "epoch": 3.074293228139382, "high_lr": 0.0003847368421052632, "low_lr": 7.694736842105263e-06, "step": 1169 }, { "epoch": 3.074293228139382, "high_lr": 0.0003847368421052632, "low_lr": 7.694736842105263e-06, "step": 1169 }, { "epoch": 3.074293228139382, "high_lr": 0.0003847368421052632, "low_lr": 7.694736842105263e-06, "step": 1169 }, { "epoch": 3.074293228139382, "high_lr": 0.0003847368421052632, "low_lr": 7.694736842105263e-06, "step": 1169 }, { "epoch": 3.074293228139382, "high_lr": 0.0003847368421052632, "low_lr": 7.694736842105263e-06, "step": 1169 }, { "epoch": 3.074293228139382, "high_lr": 0.0003847368421052632, "low_lr": 7.694736842105263e-06, "step": 1169 }, { "epoch": 3.076923076923077, "grad_norm": 1.435673713684082, "learning_rate": 0.00038421052631578946, "loss": 1.3403, "step": 1170 }, { "epoch": 3.076923076923077, "high_lr": 0.00038421052631578946, "low_lr": 7.68421052631579e-06, "step": 1170 }, { "epoch": 3.076923076923077, "high_lr": 0.00038421052631578946, "low_lr": 7.68421052631579e-06, "step": 1170 }, { "epoch": 3.076923076923077, "high_lr": 0.00038421052631578946, "low_lr": 7.68421052631579e-06, "step": 1170 }, { "epoch": 3.076923076923077, "high_lr": 0.00038421052631578946, "low_lr": 7.68421052631579e-06, "step": 1170 }, { "epoch": 3.076923076923077, "high_lr": 0.00038421052631578946, "low_lr": 7.68421052631579e-06, "step": 1170 }, { "epoch": 3.076923076923077, "high_lr": 0.00038421052631578946, "low_lr": 7.68421052631579e-06, "step": 1170 }, { "epoch": 3.076923076923077, "high_lr": 0.00038421052631578946, "low_lr": 7.68421052631579e-06, "step": 1170 }, { "epoch": 3.076923076923077, "high_lr": 0.00038421052631578946, "low_lr": 7.68421052631579e-06, "step": 1170 }, { "epoch": 3.0795529257067717, "grad_norm": 1.348922848701477, "learning_rate": 0.0003836842105263158, "loss": 1.2902, "step": 1171 }, { "epoch": 3.0795529257067717, "high_lr": 0.0003836842105263158, "low_lr": 7.673684210526316e-06, "step": 1171 }, { "epoch": 3.0795529257067717, "high_lr": 0.0003836842105263158, "low_lr": 7.673684210526316e-06, "step": 1171 }, { "epoch": 3.0795529257067717, "high_lr": 0.0003836842105263158, "low_lr": 7.673684210526316e-06, "step": 1171 }, { "epoch": 3.0795529257067717, "high_lr": 0.0003836842105263158, "low_lr": 7.673684210526316e-06, "step": 1171 }, { "epoch": 3.0795529257067717, "high_lr": 0.0003836842105263158, "low_lr": 7.673684210526316e-06, "step": 1171 }, { "epoch": 3.0795529257067717, "high_lr": 0.0003836842105263158, "low_lr": 7.673684210526316e-06, "step": 1171 }, { "epoch": 3.0795529257067717, "high_lr": 0.0003836842105263158, "low_lr": 7.673684210526316e-06, "step": 1171 }, { "epoch": 3.0795529257067717, "high_lr": 0.0003836842105263158, "low_lr": 7.673684210526316e-06, "step": 1171 }, { "epoch": 3.082182774490467, "grad_norm": 1.350555658340454, "learning_rate": 0.0003831578947368421, "loss": 1.3351, "step": 1172 }, { "epoch": 3.082182774490467, "high_lr": 0.0003831578947368421, "low_lr": 7.663157894736842e-06, "step": 1172 }, { "epoch": 3.082182774490467, "high_lr": 0.0003831578947368421, "low_lr": 7.663157894736842e-06, "step": 1172 }, { "epoch": 3.082182774490467, "high_lr": 0.0003831578947368421, "low_lr": 7.663157894736842e-06, "step": 1172 }, { "epoch": 3.082182774490467, "high_lr": 0.0003831578947368421, "low_lr": 7.663157894736842e-06, "step": 1172 }, { "epoch": 3.082182774490467, "high_lr": 0.0003831578947368421, "low_lr": 7.663157894736842e-06, "step": 1172 }, { "epoch": 3.082182774490467, "high_lr": 0.0003831578947368421, "low_lr": 7.663157894736842e-06, "step": 1172 }, { "epoch": 3.082182774490467, "high_lr": 0.0003831578947368421, "low_lr": 7.663157894736842e-06, "step": 1172 }, { "epoch": 3.082182774490467, "high_lr": 0.0003831578947368421, "low_lr": 7.663157894736842e-06, "step": 1172 }, { "epoch": 3.084812623274162, "grad_norm": 1.331555724143982, "learning_rate": 0.00038263157894736843, "loss": 1.2961, "step": 1173 }, { "epoch": 3.084812623274162, "high_lr": 0.00038263157894736843, "low_lr": 7.65263157894737e-06, "step": 1173 }, { "epoch": 3.084812623274162, "high_lr": 0.00038263157894736843, "low_lr": 7.65263157894737e-06, "step": 1173 }, { "epoch": 3.084812623274162, "high_lr": 0.00038263157894736843, "low_lr": 7.65263157894737e-06, "step": 1173 }, { "epoch": 3.084812623274162, "high_lr": 0.00038263157894736843, "low_lr": 7.65263157894737e-06, "step": 1173 }, { "epoch": 3.084812623274162, "high_lr": 0.00038263157894736843, "low_lr": 7.65263157894737e-06, "step": 1173 }, { "epoch": 3.084812623274162, "high_lr": 0.00038263157894736843, "low_lr": 7.65263157894737e-06, "step": 1173 }, { "epoch": 3.084812623274162, "high_lr": 0.00038263157894736843, "low_lr": 7.65263157894737e-06, "step": 1173 }, { "epoch": 3.084812623274162, "high_lr": 0.00038263157894736843, "low_lr": 7.65263157894737e-06, "step": 1173 }, { "epoch": 3.0874424720578566, "grad_norm": 1.455815315246582, "learning_rate": 0.0003821052631578947, "loss": 1.2662, "step": 1174 }, { "epoch": 3.0874424720578566, "high_lr": 0.0003821052631578947, "low_lr": 7.642105263157895e-06, "step": 1174 }, { "epoch": 3.0874424720578566, "high_lr": 0.0003821052631578947, "low_lr": 7.642105263157895e-06, "step": 1174 }, { "epoch": 3.0874424720578566, "high_lr": 0.0003821052631578947, "low_lr": 7.642105263157895e-06, "step": 1174 }, { "epoch": 3.0874424720578566, "high_lr": 0.0003821052631578947, "low_lr": 7.642105263157895e-06, "step": 1174 }, { "epoch": 3.0874424720578566, "high_lr": 0.0003821052631578947, "low_lr": 7.642105263157895e-06, "step": 1174 }, { "epoch": 3.0874424720578566, "high_lr": 0.0003821052631578947, "low_lr": 7.642105263157895e-06, "step": 1174 }, { "epoch": 3.0874424720578566, "high_lr": 0.0003821052631578947, "low_lr": 7.642105263157895e-06, "step": 1174 }, { "epoch": 3.0874424720578566, "high_lr": 0.0003821052631578947, "low_lr": 7.642105263157895e-06, "step": 1174 }, { "epoch": 3.0900723208415517, "grad_norm": 1.3014329671859741, "learning_rate": 0.00038157894736842105, "loss": 1.337, "step": 1175 }, { "epoch": 3.0900723208415517, "high_lr": 0.00038157894736842105, "low_lr": 7.631578947368423e-06, "step": 1175 }, { "epoch": 3.0900723208415517, "high_lr": 0.00038157894736842105, "low_lr": 7.631578947368423e-06, "step": 1175 }, { "epoch": 3.0900723208415517, "high_lr": 0.00038157894736842105, "low_lr": 7.631578947368423e-06, "step": 1175 }, { "epoch": 3.0900723208415517, "high_lr": 0.00038157894736842105, "low_lr": 7.631578947368423e-06, "step": 1175 }, { "epoch": 3.0900723208415517, "high_lr": 0.00038157894736842105, "low_lr": 7.631578947368423e-06, "step": 1175 }, { "epoch": 3.0900723208415517, "high_lr": 0.00038157894736842105, "low_lr": 7.631578947368423e-06, "step": 1175 }, { "epoch": 3.0900723208415517, "high_lr": 0.00038157894736842105, "low_lr": 7.631578947368423e-06, "step": 1175 }, { "epoch": 3.0900723208415517, "high_lr": 0.00038157894736842105, "low_lr": 7.631578947368423e-06, "step": 1175 }, { "epoch": 3.0927021696252464, "grad_norm": 1.3321278095245361, "learning_rate": 0.0003810526315789474, "loss": 1.3101, "step": 1176 }, { "epoch": 3.0927021696252464, "high_lr": 0.0003810526315789474, "low_lr": 7.621052631578948e-06, "step": 1176 }, { "epoch": 3.0927021696252464, "high_lr": 0.0003810526315789474, "low_lr": 7.621052631578948e-06, "step": 1176 }, { "epoch": 3.0927021696252464, "high_lr": 0.0003810526315789474, "low_lr": 7.621052631578948e-06, "step": 1176 }, { "epoch": 3.0927021696252464, "high_lr": 0.0003810526315789474, "low_lr": 7.621052631578948e-06, "step": 1176 }, { "epoch": 3.0927021696252464, "high_lr": 0.0003810526315789474, "low_lr": 7.621052631578948e-06, "step": 1176 }, { "epoch": 3.0927021696252464, "high_lr": 0.0003810526315789474, "low_lr": 7.621052631578948e-06, "step": 1176 }, { "epoch": 3.0927021696252464, "high_lr": 0.0003810526315789474, "low_lr": 7.621052631578948e-06, "step": 1176 }, { "epoch": 3.0927021696252464, "high_lr": 0.0003810526315789474, "low_lr": 7.621052631578948e-06, "step": 1176 }, { "epoch": 3.0953320184089415, "grad_norm": 1.3796805143356323, "learning_rate": 0.0003805263157894737, "loss": 1.325, "step": 1177 }, { "epoch": 3.0953320184089415, "high_lr": 0.0003805263157894737, "low_lr": 7.610526315789474e-06, "step": 1177 }, { "epoch": 3.0953320184089415, "high_lr": 0.0003805263157894737, "low_lr": 7.610526315789474e-06, "step": 1177 }, { "epoch": 3.0953320184089415, "high_lr": 0.0003805263157894737, "low_lr": 7.610526315789474e-06, "step": 1177 }, { "epoch": 3.0953320184089415, "high_lr": 0.0003805263157894737, "low_lr": 7.610526315789474e-06, "step": 1177 }, { "epoch": 3.0953320184089415, "high_lr": 0.0003805263157894737, "low_lr": 7.610526315789474e-06, "step": 1177 }, { "epoch": 3.0953320184089415, "high_lr": 0.0003805263157894737, "low_lr": 7.610526315789474e-06, "step": 1177 }, { "epoch": 3.0953320184089415, "high_lr": 0.0003805263157894737, "low_lr": 7.610526315789474e-06, "step": 1177 }, { "epoch": 3.0953320184089415, "high_lr": 0.0003805263157894737, "low_lr": 7.610526315789474e-06, "step": 1177 }, { "epoch": 3.0979618671926366, "grad_norm": 1.3283296823501587, "learning_rate": 0.00038, "loss": 1.3269, "step": 1178 }, { "epoch": 3.0979618671926366, "high_lr": 0.00038, "low_lr": 7.600000000000001e-06, "step": 1178 }, { "epoch": 3.0979618671926366, "high_lr": 0.00038, "low_lr": 7.600000000000001e-06, "step": 1178 }, { "epoch": 3.0979618671926366, "high_lr": 0.00038, "low_lr": 7.600000000000001e-06, "step": 1178 }, { "epoch": 3.0979618671926366, "high_lr": 0.00038, "low_lr": 7.600000000000001e-06, "step": 1178 }, { "epoch": 3.0979618671926366, "high_lr": 0.00038, "low_lr": 7.600000000000001e-06, "step": 1178 }, { "epoch": 3.0979618671926366, "high_lr": 0.00038, "low_lr": 7.600000000000001e-06, "step": 1178 }, { "epoch": 3.0979618671926366, "high_lr": 0.00038, "low_lr": 7.600000000000001e-06, "step": 1178 }, { "epoch": 3.0979618671926366, "high_lr": 0.00038, "low_lr": 7.600000000000001e-06, "step": 1178 }, { "epoch": 3.100591715976331, "grad_norm": 1.359445571899414, "learning_rate": 0.0003794736842105263, "loss": 1.2569, "step": 1179 }, { "epoch": 3.100591715976331, "high_lr": 0.0003794736842105263, "low_lr": 7.589473684210526e-06, "step": 1179 }, { "epoch": 3.100591715976331, "high_lr": 0.0003794736842105263, "low_lr": 7.589473684210526e-06, "step": 1179 }, { "epoch": 3.100591715976331, "high_lr": 0.0003794736842105263, "low_lr": 7.589473684210526e-06, "step": 1179 }, { "epoch": 3.100591715976331, "high_lr": 0.0003794736842105263, "low_lr": 7.589473684210526e-06, "step": 1179 }, { "epoch": 3.100591715976331, "high_lr": 0.0003794736842105263, "low_lr": 7.589473684210526e-06, "step": 1179 }, { "epoch": 3.100591715976331, "high_lr": 0.0003794736842105263, "low_lr": 7.589473684210526e-06, "step": 1179 }, { "epoch": 3.100591715976331, "high_lr": 0.0003794736842105263, "low_lr": 7.589473684210526e-06, "step": 1179 }, { "epoch": 3.100591715976331, "high_lr": 0.0003794736842105263, "low_lr": 7.589473684210526e-06, "step": 1179 }, { "epoch": 3.1032215647600263, "grad_norm": 1.3396329879760742, "learning_rate": 0.00037894736842105265, "loss": 1.3353, "step": 1180 }, { "epoch": 3.1032215647600263, "high_lr": 0.00037894736842105265, "low_lr": 7.578947368421054e-06, "step": 1180 }, { "epoch": 3.1032215647600263, "high_lr": 0.00037894736842105265, "low_lr": 7.578947368421054e-06, "step": 1180 }, { "epoch": 3.1032215647600263, "high_lr": 0.00037894736842105265, "low_lr": 7.578947368421054e-06, "step": 1180 }, { "epoch": 3.1032215647600263, "high_lr": 0.00037894736842105265, "low_lr": 7.578947368421054e-06, "step": 1180 }, { "epoch": 3.1032215647600263, "high_lr": 0.00037894736842105265, "low_lr": 7.578947368421054e-06, "step": 1180 }, { "epoch": 3.1032215647600263, "high_lr": 0.00037894736842105265, "low_lr": 7.578947368421054e-06, "step": 1180 }, { "epoch": 3.1032215647600263, "high_lr": 0.00037894736842105265, "low_lr": 7.578947368421054e-06, "step": 1180 }, { "epoch": 3.1032215647600263, "high_lr": 0.00037894736842105265, "low_lr": 7.578947368421054e-06, "step": 1180 }, { "epoch": 3.1058514135437214, "grad_norm": 1.2817565202713013, "learning_rate": 0.00037842105263157893, "loss": 1.3289, "step": 1181 }, { "epoch": 3.1058514135437214, "high_lr": 0.00037842105263157893, "low_lr": 7.568421052631579e-06, "step": 1181 }, { "epoch": 3.1058514135437214, "high_lr": 0.00037842105263157893, "low_lr": 7.568421052631579e-06, "step": 1181 }, { "epoch": 3.1058514135437214, "high_lr": 0.00037842105263157893, "low_lr": 7.568421052631579e-06, "step": 1181 }, { "epoch": 3.1058514135437214, "high_lr": 0.00037842105263157893, "low_lr": 7.568421052631579e-06, "step": 1181 }, { "epoch": 3.1058514135437214, "high_lr": 0.00037842105263157893, "low_lr": 7.568421052631579e-06, "step": 1181 }, { "epoch": 3.1058514135437214, "high_lr": 0.00037842105263157893, "low_lr": 7.568421052631579e-06, "step": 1181 }, { "epoch": 3.1058514135437214, "high_lr": 0.00037842105263157893, "low_lr": 7.568421052631579e-06, "step": 1181 }, { "epoch": 3.1058514135437214, "high_lr": 0.00037842105263157893, "low_lr": 7.568421052631579e-06, "step": 1181 }, { "epoch": 3.108481262327416, "grad_norm": 1.3851240873336792, "learning_rate": 0.00037789473684210527, "loss": 1.337, "step": 1182 }, { "epoch": 3.108481262327416, "high_lr": 0.00037789473684210527, "low_lr": 7.557894736842106e-06, "step": 1182 }, { "epoch": 3.108481262327416, "high_lr": 0.00037789473684210527, "low_lr": 7.557894736842106e-06, "step": 1182 }, { "epoch": 3.108481262327416, "high_lr": 0.00037789473684210527, "low_lr": 7.557894736842106e-06, "step": 1182 }, { "epoch": 3.108481262327416, "high_lr": 0.00037789473684210527, "low_lr": 7.557894736842106e-06, "step": 1182 }, { "epoch": 3.108481262327416, "high_lr": 0.00037789473684210527, "low_lr": 7.557894736842106e-06, "step": 1182 }, { "epoch": 3.108481262327416, "high_lr": 0.00037789473684210527, "low_lr": 7.557894736842106e-06, "step": 1182 }, { "epoch": 3.108481262327416, "high_lr": 0.00037789473684210527, "low_lr": 7.557894736842106e-06, "step": 1182 }, { "epoch": 3.108481262327416, "high_lr": 0.00037789473684210527, "low_lr": 7.557894736842106e-06, "step": 1182 }, { "epoch": 3.111111111111111, "grad_norm": 1.2566713094711304, "learning_rate": 0.00037736842105263156, "loss": 1.2824, "step": 1183 }, { "epoch": 3.111111111111111, "high_lr": 0.00037736842105263156, "low_lr": 7.547368421052632e-06, "step": 1183 }, { "epoch": 3.111111111111111, "high_lr": 0.00037736842105263156, "low_lr": 7.547368421052632e-06, "step": 1183 }, { "epoch": 3.111111111111111, "high_lr": 0.00037736842105263156, "low_lr": 7.547368421052632e-06, "step": 1183 }, { "epoch": 3.111111111111111, "high_lr": 0.00037736842105263156, "low_lr": 7.547368421052632e-06, "step": 1183 }, { "epoch": 3.111111111111111, "high_lr": 0.00037736842105263156, "low_lr": 7.547368421052632e-06, "step": 1183 }, { "epoch": 3.111111111111111, "high_lr": 0.00037736842105263156, "low_lr": 7.547368421052632e-06, "step": 1183 }, { "epoch": 3.111111111111111, "high_lr": 0.00037736842105263156, "low_lr": 7.547368421052632e-06, "step": 1183 }, { "epoch": 3.111111111111111, "high_lr": 0.00037736842105263156, "low_lr": 7.547368421052632e-06, "step": 1183 }, { "epoch": 3.1137409598948063, "grad_norm": 1.3147213459014893, "learning_rate": 0.0003768421052631579, "loss": 1.2508, "step": 1184 }, { "epoch": 3.1137409598948063, "high_lr": 0.0003768421052631579, "low_lr": 7.536842105263158e-06, "step": 1184 }, { "epoch": 3.1137409598948063, "high_lr": 0.0003768421052631579, "low_lr": 7.536842105263158e-06, "step": 1184 }, { "epoch": 3.1137409598948063, "high_lr": 0.0003768421052631579, "low_lr": 7.536842105263158e-06, "step": 1184 }, { "epoch": 3.1137409598948063, "high_lr": 0.0003768421052631579, "low_lr": 7.536842105263158e-06, "step": 1184 }, { "epoch": 3.1137409598948063, "high_lr": 0.0003768421052631579, "low_lr": 7.536842105263158e-06, "step": 1184 }, { "epoch": 3.1137409598948063, "high_lr": 0.0003768421052631579, "low_lr": 7.536842105263158e-06, "step": 1184 }, { "epoch": 3.1137409598948063, "high_lr": 0.0003768421052631579, "low_lr": 7.536842105263158e-06, "step": 1184 }, { "epoch": 3.1137409598948063, "high_lr": 0.0003768421052631579, "low_lr": 7.536842105263158e-06, "step": 1184 }, { "epoch": 3.116370808678501, "grad_norm": 1.3407189846038818, "learning_rate": 0.00037631578947368424, "loss": 1.2757, "step": 1185 }, { "epoch": 3.116370808678501, "high_lr": 0.00037631578947368424, "low_lr": 7.526315789473685e-06, "step": 1185 }, { "epoch": 3.116370808678501, "high_lr": 0.00037631578947368424, "low_lr": 7.526315789473685e-06, "step": 1185 }, { "epoch": 3.116370808678501, "high_lr": 0.00037631578947368424, "low_lr": 7.526315789473685e-06, "step": 1185 }, { "epoch": 3.116370808678501, "high_lr": 0.00037631578947368424, "low_lr": 7.526315789473685e-06, "step": 1185 }, { "epoch": 3.116370808678501, "high_lr": 0.00037631578947368424, "low_lr": 7.526315789473685e-06, "step": 1185 }, { "epoch": 3.116370808678501, "high_lr": 0.00037631578947368424, "low_lr": 7.526315789473685e-06, "step": 1185 }, { "epoch": 3.116370808678501, "high_lr": 0.00037631578947368424, "low_lr": 7.526315789473685e-06, "step": 1185 }, { "epoch": 3.116370808678501, "high_lr": 0.00037631578947368424, "low_lr": 7.526315789473685e-06, "step": 1185 }, { "epoch": 3.119000657462196, "grad_norm": 1.3812859058380127, "learning_rate": 0.0003757894736842105, "loss": 1.2975, "step": 1186 }, { "epoch": 3.119000657462196, "high_lr": 0.0003757894736842105, "low_lr": 7.515789473684211e-06, "step": 1186 }, { "epoch": 3.119000657462196, "high_lr": 0.0003757894736842105, "low_lr": 7.515789473684211e-06, "step": 1186 }, { "epoch": 3.119000657462196, "high_lr": 0.0003757894736842105, "low_lr": 7.515789473684211e-06, "step": 1186 }, { "epoch": 3.119000657462196, "high_lr": 0.0003757894736842105, "low_lr": 7.515789473684211e-06, "step": 1186 }, { "epoch": 3.119000657462196, "high_lr": 0.0003757894736842105, "low_lr": 7.515789473684211e-06, "step": 1186 }, { "epoch": 3.119000657462196, "high_lr": 0.0003757894736842105, "low_lr": 7.515789473684211e-06, "step": 1186 }, { "epoch": 3.119000657462196, "high_lr": 0.0003757894736842105, "low_lr": 7.515789473684211e-06, "step": 1186 }, { "epoch": 3.119000657462196, "high_lr": 0.0003757894736842105, "low_lr": 7.515789473684211e-06, "step": 1186 }, { "epoch": 3.1216305062458907, "grad_norm": 1.363731861114502, "learning_rate": 0.00037526315789473686, "loss": 1.2942, "step": 1187 }, { "epoch": 3.1216305062458907, "high_lr": 0.00037526315789473686, "low_lr": 7.505263157894738e-06, "step": 1187 }, { "epoch": 3.1216305062458907, "high_lr": 0.00037526315789473686, "low_lr": 7.505263157894738e-06, "step": 1187 }, { "epoch": 3.1216305062458907, "high_lr": 0.00037526315789473686, "low_lr": 7.505263157894738e-06, "step": 1187 }, { "epoch": 3.1216305062458907, "high_lr": 0.00037526315789473686, "low_lr": 7.505263157894738e-06, "step": 1187 }, { "epoch": 3.1216305062458907, "high_lr": 0.00037526315789473686, "low_lr": 7.505263157894738e-06, "step": 1187 }, { "epoch": 3.1216305062458907, "high_lr": 0.00037526315789473686, "low_lr": 7.505263157894738e-06, "step": 1187 }, { "epoch": 3.1216305062458907, "high_lr": 0.00037526315789473686, "low_lr": 7.505263157894738e-06, "step": 1187 }, { "epoch": 3.1216305062458907, "high_lr": 0.00037526315789473686, "low_lr": 7.505263157894738e-06, "step": 1187 }, { "epoch": 3.1242603550295858, "grad_norm": 1.3423538208007812, "learning_rate": 0.00037473684210526315, "loss": 1.2962, "step": 1188 }, { "epoch": 3.1242603550295858, "high_lr": 0.00037473684210526315, "low_lr": 7.494736842105263e-06, "step": 1188 }, { "epoch": 3.1242603550295858, "high_lr": 0.00037473684210526315, "low_lr": 7.494736842105263e-06, "step": 1188 }, { "epoch": 3.1242603550295858, "high_lr": 0.00037473684210526315, "low_lr": 7.494736842105263e-06, "step": 1188 }, { "epoch": 3.1242603550295858, "high_lr": 0.00037473684210526315, "low_lr": 7.494736842105263e-06, "step": 1188 }, { "epoch": 3.1242603550295858, "high_lr": 0.00037473684210526315, "low_lr": 7.494736842105263e-06, "step": 1188 }, { "epoch": 3.1242603550295858, "high_lr": 0.00037473684210526315, "low_lr": 7.494736842105263e-06, "step": 1188 }, { "epoch": 3.1242603550295858, "high_lr": 0.00037473684210526315, "low_lr": 7.494736842105263e-06, "step": 1188 }, { "epoch": 3.1242603550295858, "high_lr": 0.00037473684210526315, "low_lr": 7.494736842105263e-06, "step": 1188 }, { "epoch": 3.126890203813281, "grad_norm": 1.337683081626892, "learning_rate": 0.0003742105263157895, "loss": 1.327, "step": 1189 }, { "epoch": 3.126890203813281, "high_lr": 0.0003742105263157895, "low_lr": 7.4842105263157905e-06, "step": 1189 }, { "epoch": 3.126890203813281, "high_lr": 0.0003742105263157895, "low_lr": 7.4842105263157905e-06, "step": 1189 }, { "epoch": 3.126890203813281, "high_lr": 0.0003742105263157895, "low_lr": 7.4842105263157905e-06, "step": 1189 }, { "epoch": 3.126890203813281, "high_lr": 0.0003742105263157895, "low_lr": 7.4842105263157905e-06, "step": 1189 }, { "epoch": 3.126890203813281, "high_lr": 0.0003742105263157895, "low_lr": 7.4842105263157905e-06, "step": 1189 }, { "epoch": 3.126890203813281, "high_lr": 0.0003742105263157895, "low_lr": 7.4842105263157905e-06, "step": 1189 }, { "epoch": 3.126890203813281, "high_lr": 0.0003742105263157895, "low_lr": 7.4842105263157905e-06, "step": 1189 }, { "epoch": 3.126890203813281, "high_lr": 0.0003742105263157895, "low_lr": 7.4842105263157905e-06, "step": 1189 }, { "epoch": 3.1295200525969755, "grad_norm": 1.4115649461746216, "learning_rate": 0.0003736842105263158, "loss": 1.2936, "step": 1190 }, { "epoch": 3.1295200525969755, "high_lr": 0.0003736842105263158, "low_lr": 7.473684210526316e-06, "step": 1190 }, { "epoch": 3.1295200525969755, "high_lr": 0.0003736842105263158, "low_lr": 7.473684210526316e-06, "step": 1190 }, { "epoch": 3.1295200525969755, "high_lr": 0.0003736842105263158, "low_lr": 7.473684210526316e-06, "step": 1190 }, { "epoch": 3.1295200525969755, "high_lr": 0.0003736842105263158, "low_lr": 7.473684210526316e-06, "step": 1190 }, { "epoch": 3.1295200525969755, "high_lr": 0.0003736842105263158, "low_lr": 7.473684210526316e-06, "step": 1190 }, { "epoch": 3.1295200525969755, "high_lr": 0.0003736842105263158, "low_lr": 7.473684210526316e-06, "step": 1190 }, { "epoch": 3.1295200525969755, "high_lr": 0.0003736842105263158, "low_lr": 7.473684210526316e-06, "step": 1190 }, { "epoch": 3.1295200525969755, "high_lr": 0.0003736842105263158, "low_lr": 7.473684210526316e-06, "step": 1190 }, { "epoch": 3.1321499013806706, "grad_norm": 1.3720990419387817, "learning_rate": 0.0003731578947368421, "loss": 1.2764, "step": 1191 }, { "epoch": 3.1321499013806706, "high_lr": 0.0003731578947368421, "low_lr": 7.463157894736843e-06, "step": 1191 }, { "epoch": 3.1321499013806706, "high_lr": 0.0003731578947368421, "low_lr": 7.463157894736843e-06, "step": 1191 }, { "epoch": 3.1321499013806706, "high_lr": 0.0003731578947368421, "low_lr": 7.463157894736843e-06, "step": 1191 }, { "epoch": 3.1321499013806706, "high_lr": 0.0003731578947368421, "low_lr": 7.463157894736843e-06, "step": 1191 }, { "epoch": 3.1321499013806706, "high_lr": 0.0003731578947368421, "low_lr": 7.463157894736843e-06, "step": 1191 }, { "epoch": 3.1321499013806706, "high_lr": 0.0003731578947368421, "low_lr": 7.463157894736843e-06, "step": 1191 }, { "epoch": 3.1321499013806706, "high_lr": 0.0003731578947368421, "low_lr": 7.463157894736843e-06, "step": 1191 }, { "epoch": 3.1321499013806706, "high_lr": 0.0003731578947368421, "low_lr": 7.463157894736843e-06, "step": 1191 }, { "epoch": 3.1347797501643657, "grad_norm": 1.5742392539978027, "learning_rate": 0.00037263157894736846, "loss": 1.3159, "step": 1192 }, { "epoch": 3.1347797501643657, "high_lr": 0.00037263157894736846, "low_lr": 7.4526315789473695e-06, "step": 1192 }, { "epoch": 3.1347797501643657, "high_lr": 0.00037263157894736846, "low_lr": 7.4526315789473695e-06, "step": 1192 }, { "epoch": 3.1347797501643657, "high_lr": 0.00037263157894736846, "low_lr": 7.4526315789473695e-06, "step": 1192 }, { "epoch": 3.1347797501643657, "high_lr": 0.00037263157894736846, "low_lr": 7.4526315789473695e-06, "step": 1192 }, { "epoch": 3.1347797501643657, "high_lr": 0.00037263157894736846, "low_lr": 7.4526315789473695e-06, "step": 1192 }, { "epoch": 3.1347797501643657, "high_lr": 0.00037263157894736846, "low_lr": 7.4526315789473695e-06, "step": 1192 }, { "epoch": 3.1347797501643657, "high_lr": 0.00037263157894736846, "low_lr": 7.4526315789473695e-06, "step": 1192 }, { "epoch": 3.1347797501643657, "high_lr": 0.00037263157894736846, "low_lr": 7.4526315789473695e-06, "step": 1192 }, { "epoch": 3.1374095989480604, "grad_norm": 1.5012247562408447, "learning_rate": 0.00037210526315789474, "loss": 1.28, "step": 1193 }, { "epoch": 3.1374095989480604, "high_lr": 0.00037210526315789474, "low_lr": 7.442105263157895e-06, "step": 1193 }, { "epoch": 3.1374095989480604, "high_lr": 0.00037210526315789474, "low_lr": 7.442105263157895e-06, "step": 1193 }, { "epoch": 3.1374095989480604, "high_lr": 0.00037210526315789474, "low_lr": 7.442105263157895e-06, "step": 1193 }, { "epoch": 3.1374095989480604, "high_lr": 0.00037210526315789474, "low_lr": 7.442105263157895e-06, "step": 1193 }, { "epoch": 3.1374095989480604, "high_lr": 0.00037210526315789474, "low_lr": 7.442105263157895e-06, "step": 1193 }, { "epoch": 3.1374095989480604, "high_lr": 0.00037210526315789474, "low_lr": 7.442105263157895e-06, "step": 1193 }, { "epoch": 3.1374095989480604, "high_lr": 0.00037210526315789474, "low_lr": 7.442105263157895e-06, "step": 1193 }, { "epoch": 3.1374095989480604, "high_lr": 0.00037210526315789474, "low_lr": 7.442105263157895e-06, "step": 1193 }, { "epoch": 3.1400394477317555, "grad_norm": 1.3024135828018188, "learning_rate": 0.0003715789473684211, "loss": 1.284, "step": 1194 }, { "epoch": 3.1400394477317555, "high_lr": 0.0003715789473684211, "low_lr": 7.431578947368422e-06, "step": 1194 }, { "epoch": 3.1400394477317555, "high_lr": 0.0003715789473684211, "low_lr": 7.431578947368422e-06, "step": 1194 }, { "epoch": 3.1400394477317555, "high_lr": 0.0003715789473684211, "low_lr": 7.431578947368422e-06, "step": 1194 }, { "epoch": 3.1400394477317555, "high_lr": 0.0003715789473684211, "low_lr": 7.431578947368422e-06, "step": 1194 }, { "epoch": 3.1400394477317555, "high_lr": 0.0003715789473684211, "low_lr": 7.431578947368422e-06, "step": 1194 }, { "epoch": 3.1400394477317555, "high_lr": 0.0003715789473684211, "low_lr": 7.431578947368422e-06, "step": 1194 }, { "epoch": 3.1400394477317555, "high_lr": 0.0003715789473684211, "low_lr": 7.431578947368422e-06, "step": 1194 }, { "epoch": 3.1400394477317555, "high_lr": 0.0003715789473684211, "low_lr": 7.431578947368422e-06, "step": 1194 }, { "epoch": 3.14266929651545, "grad_norm": 1.489599585533142, "learning_rate": 0.00037105263157894737, "loss": 1.2985, "step": 1195 }, { "epoch": 3.14266929651545, "high_lr": 0.00037105263157894737, "low_lr": 7.421052631578948e-06, "step": 1195 }, { "epoch": 3.14266929651545, "high_lr": 0.00037105263157894737, "low_lr": 7.421052631578948e-06, "step": 1195 }, { "epoch": 3.14266929651545, "high_lr": 0.00037105263157894737, "low_lr": 7.421052631578948e-06, "step": 1195 }, { "epoch": 3.14266929651545, "high_lr": 0.00037105263157894737, "low_lr": 7.421052631578948e-06, "step": 1195 }, { "epoch": 3.14266929651545, "high_lr": 0.00037105263157894737, "low_lr": 7.421052631578948e-06, "step": 1195 }, { "epoch": 3.14266929651545, "high_lr": 0.00037105263157894737, "low_lr": 7.421052631578948e-06, "step": 1195 }, { "epoch": 3.14266929651545, "high_lr": 0.00037105263157894737, "low_lr": 7.421052631578948e-06, "step": 1195 }, { "epoch": 3.14266929651545, "high_lr": 0.00037105263157894737, "low_lr": 7.421052631578948e-06, "step": 1195 }, { "epoch": 3.1452991452991452, "grad_norm": 1.2693541049957275, "learning_rate": 0.0003705263157894737, "loss": 1.2973, "step": 1196 }, { "epoch": 3.1452991452991452, "high_lr": 0.0003705263157894737, "low_lr": 7.410526315789475e-06, "step": 1196 }, { "epoch": 3.1452991452991452, "high_lr": 0.0003705263157894737, "low_lr": 7.410526315789475e-06, "step": 1196 }, { "epoch": 3.1452991452991452, "high_lr": 0.0003705263157894737, "low_lr": 7.410526315789475e-06, "step": 1196 }, { "epoch": 3.1452991452991452, "high_lr": 0.0003705263157894737, "low_lr": 7.410526315789475e-06, "step": 1196 }, { "epoch": 3.1452991452991452, "high_lr": 0.0003705263157894737, "low_lr": 7.410526315789475e-06, "step": 1196 }, { "epoch": 3.1452991452991452, "high_lr": 0.0003705263157894737, "low_lr": 7.410526315789475e-06, "step": 1196 }, { "epoch": 3.1452991452991452, "high_lr": 0.0003705263157894737, "low_lr": 7.410526315789475e-06, "step": 1196 }, { "epoch": 3.1452991452991452, "high_lr": 0.0003705263157894737, "low_lr": 7.410526315789475e-06, "step": 1196 }, { "epoch": 3.1479289940828403, "grad_norm": 1.2748855352401733, "learning_rate": 0.00037, "loss": 1.2723, "step": 1197 }, { "epoch": 3.1479289940828403, "high_lr": 0.00037, "low_lr": 7.4e-06, "step": 1197 }, { "epoch": 3.1479289940828403, "high_lr": 0.00037, "low_lr": 7.4e-06, "step": 1197 }, { "epoch": 3.1479289940828403, "high_lr": 0.00037, "low_lr": 7.4e-06, "step": 1197 }, { "epoch": 3.1479289940828403, "high_lr": 0.00037, "low_lr": 7.4e-06, "step": 1197 }, { "epoch": 3.1479289940828403, "high_lr": 0.00037, "low_lr": 7.4e-06, "step": 1197 }, { "epoch": 3.1479289940828403, "high_lr": 0.00037, "low_lr": 7.4e-06, "step": 1197 }, { "epoch": 3.1479289940828403, "high_lr": 0.00037, "low_lr": 7.4e-06, "step": 1197 }, { "epoch": 3.1479289940828403, "high_lr": 0.00037, "low_lr": 7.4e-06, "step": 1197 }, { "epoch": 3.150558842866535, "grad_norm": 1.4245465993881226, "learning_rate": 0.00036947368421052633, "loss": 1.3359, "step": 1198 }, { "epoch": 3.150558842866535, "high_lr": 0.00036947368421052633, "low_lr": 7.3894736842105275e-06, "step": 1198 }, { "epoch": 3.150558842866535, "high_lr": 0.00036947368421052633, "low_lr": 7.3894736842105275e-06, "step": 1198 }, { "epoch": 3.150558842866535, "high_lr": 0.00036947368421052633, "low_lr": 7.3894736842105275e-06, "step": 1198 }, { "epoch": 3.150558842866535, "high_lr": 0.00036947368421052633, "low_lr": 7.3894736842105275e-06, "step": 1198 }, { "epoch": 3.150558842866535, "high_lr": 0.00036947368421052633, "low_lr": 7.3894736842105275e-06, "step": 1198 }, { "epoch": 3.150558842866535, "high_lr": 0.00036947368421052633, "low_lr": 7.3894736842105275e-06, "step": 1198 }, { "epoch": 3.150558842866535, "high_lr": 0.00036947368421052633, "low_lr": 7.3894736842105275e-06, "step": 1198 }, { "epoch": 3.150558842866535, "high_lr": 0.00036947368421052633, "low_lr": 7.3894736842105275e-06, "step": 1198 }, { "epoch": 3.15318869165023, "grad_norm": 1.4283756017684937, "learning_rate": 0.0003689473684210526, "loss": 1.2856, "step": 1199 }, { "epoch": 3.15318869165023, "high_lr": 0.0003689473684210526, "low_lr": 7.378947368421053e-06, "step": 1199 }, { "epoch": 3.15318869165023, "high_lr": 0.0003689473684210526, "low_lr": 7.378947368421053e-06, "step": 1199 }, { "epoch": 3.15318869165023, "high_lr": 0.0003689473684210526, "low_lr": 7.378947368421053e-06, "step": 1199 }, { "epoch": 3.15318869165023, "high_lr": 0.0003689473684210526, "low_lr": 7.378947368421053e-06, "step": 1199 }, { "epoch": 3.15318869165023, "high_lr": 0.0003689473684210526, "low_lr": 7.378947368421053e-06, "step": 1199 }, { "epoch": 3.15318869165023, "high_lr": 0.0003689473684210526, "low_lr": 7.378947368421053e-06, "step": 1199 }, { "epoch": 3.15318869165023, "high_lr": 0.0003689473684210526, "low_lr": 7.378947368421053e-06, "step": 1199 }, { "epoch": 3.15318869165023, "high_lr": 0.0003689473684210526, "low_lr": 7.378947368421053e-06, "step": 1199 }, { "epoch": 3.155818540433925, "grad_norm": 1.4153192043304443, "learning_rate": 0.00036842105263157896, "loss": 1.2831, "step": 1200 }, { "epoch": 3.155818540433925, "high_lr": 0.00036842105263157896, "low_lr": 7.368421052631579e-06, "step": 1200 }, { "epoch": 3.155818540433925, "high_lr": 0.00036842105263157896, "low_lr": 7.368421052631579e-06, "step": 1200 }, { "epoch": 3.155818540433925, "high_lr": 0.00036842105263157896, "low_lr": 7.368421052631579e-06, "step": 1200 }, { "epoch": 3.155818540433925, "high_lr": 0.00036842105263157896, "low_lr": 7.368421052631579e-06, "step": 1200 }, { "epoch": 3.155818540433925, "high_lr": 0.00036842105263157896, "low_lr": 7.368421052631579e-06, "step": 1200 }, { "epoch": 3.155818540433925, "high_lr": 0.00036842105263157896, "low_lr": 7.368421052631579e-06, "step": 1200 }, { "epoch": 3.155818540433925, "high_lr": 0.00036842105263157896, "low_lr": 7.368421052631579e-06, "step": 1200 }, { "epoch": 3.155818540433925, "high_lr": 0.00036842105263157896, "low_lr": 7.368421052631579e-06, "step": 1200 }, { "epoch": 3.15844838921762, "grad_norm": 1.3777788877487183, "learning_rate": 0.0003678947368421053, "loss": 1.2857, "step": 1201 }, { "epoch": 3.15844838921762, "high_lr": 0.0003678947368421053, "low_lr": 7.3578947368421065e-06, "step": 1201 }, { "epoch": 3.15844838921762, "high_lr": 0.0003678947368421053, "low_lr": 7.3578947368421065e-06, "step": 1201 }, { "epoch": 3.15844838921762, "high_lr": 0.0003678947368421053, "low_lr": 7.3578947368421065e-06, "step": 1201 }, { "epoch": 3.15844838921762, "high_lr": 0.0003678947368421053, "low_lr": 7.3578947368421065e-06, "step": 1201 }, { "epoch": 3.15844838921762, "high_lr": 0.0003678947368421053, "low_lr": 7.3578947368421065e-06, "step": 1201 }, { "epoch": 3.15844838921762, "high_lr": 0.0003678947368421053, "low_lr": 7.3578947368421065e-06, "step": 1201 }, { "epoch": 3.15844838921762, "high_lr": 0.0003678947368421053, "low_lr": 7.3578947368421065e-06, "step": 1201 }, { "epoch": 3.15844838921762, "high_lr": 0.0003678947368421053, "low_lr": 7.3578947368421065e-06, "step": 1201 }, { "epoch": 3.161078238001315, "grad_norm": 1.4032799005508423, "learning_rate": 0.0003673684210526316, "loss": 1.3104, "step": 1202 }, { "epoch": 3.161078238001315, "high_lr": 0.0003673684210526316, "low_lr": 7.347368421052632e-06, "step": 1202 }, { "epoch": 3.161078238001315, "high_lr": 0.0003673684210526316, "low_lr": 7.347368421052632e-06, "step": 1202 }, { "epoch": 3.161078238001315, "high_lr": 0.0003673684210526316, "low_lr": 7.347368421052632e-06, "step": 1202 }, { "epoch": 3.161078238001315, "high_lr": 0.0003673684210526316, "low_lr": 7.347368421052632e-06, "step": 1202 }, { "epoch": 3.161078238001315, "high_lr": 0.0003673684210526316, "low_lr": 7.347368421052632e-06, "step": 1202 }, { "epoch": 3.161078238001315, "high_lr": 0.0003673684210526316, "low_lr": 7.347368421052632e-06, "step": 1202 }, { "epoch": 3.161078238001315, "high_lr": 0.0003673684210526316, "low_lr": 7.347368421052632e-06, "step": 1202 }, { "epoch": 3.161078238001315, "high_lr": 0.0003673684210526316, "low_lr": 7.347368421052632e-06, "step": 1202 }, { "epoch": 3.16370808678501, "grad_norm": 1.3944417238235474, "learning_rate": 0.0003668421052631579, "loss": 1.2586, "step": 1203 }, { "epoch": 3.16370808678501, "high_lr": 0.0003668421052631579, "low_lr": 7.336842105263159e-06, "step": 1203 }, { "epoch": 3.16370808678501, "high_lr": 0.0003668421052631579, "low_lr": 7.336842105263159e-06, "step": 1203 }, { "epoch": 3.16370808678501, "high_lr": 0.0003668421052631579, "low_lr": 7.336842105263159e-06, "step": 1203 }, { "epoch": 3.16370808678501, "high_lr": 0.0003668421052631579, "low_lr": 7.336842105263159e-06, "step": 1203 }, { "epoch": 3.16370808678501, "high_lr": 0.0003668421052631579, "low_lr": 7.336842105263159e-06, "step": 1203 }, { "epoch": 3.16370808678501, "high_lr": 0.0003668421052631579, "low_lr": 7.336842105263159e-06, "step": 1203 }, { "epoch": 3.16370808678501, "high_lr": 0.0003668421052631579, "low_lr": 7.336842105263159e-06, "step": 1203 }, { "epoch": 3.16370808678501, "high_lr": 0.0003668421052631579, "low_lr": 7.336842105263159e-06, "step": 1203 }, { "epoch": 3.1663379355687047, "grad_norm": 1.3695789575576782, "learning_rate": 0.0003663157894736842, "loss": 1.2655, "step": 1204 }, { "epoch": 3.1663379355687047, "high_lr": 0.0003663157894736842, "low_lr": 7.326315789473685e-06, "step": 1204 }, { "epoch": 3.1663379355687047, "high_lr": 0.0003663157894736842, "low_lr": 7.326315789473685e-06, "step": 1204 }, { "epoch": 3.1663379355687047, "high_lr": 0.0003663157894736842, "low_lr": 7.326315789473685e-06, "step": 1204 }, { "epoch": 3.1663379355687047, "high_lr": 0.0003663157894736842, "low_lr": 7.326315789473685e-06, "step": 1204 }, { "epoch": 3.1663379355687047, "high_lr": 0.0003663157894736842, "low_lr": 7.326315789473685e-06, "step": 1204 }, { "epoch": 3.1663379355687047, "high_lr": 0.0003663157894736842, "low_lr": 7.326315789473685e-06, "step": 1204 }, { "epoch": 3.1663379355687047, "high_lr": 0.0003663157894736842, "low_lr": 7.326315789473685e-06, "step": 1204 }, { "epoch": 3.1663379355687047, "high_lr": 0.0003663157894736842, "low_lr": 7.326315789473685e-06, "step": 1204 }, { "epoch": 3.1689677843524, "grad_norm": 1.3784259557724, "learning_rate": 0.00036578947368421055, "loss": 1.316, "step": 1205 }, { "epoch": 3.1689677843524, "high_lr": 0.00036578947368421055, "low_lr": 7.315789473684212e-06, "step": 1205 }, { "epoch": 3.1689677843524, "high_lr": 0.00036578947368421055, "low_lr": 7.315789473684212e-06, "step": 1205 }, { "epoch": 3.1689677843524, "high_lr": 0.00036578947368421055, "low_lr": 7.315789473684212e-06, "step": 1205 }, { "epoch": 3.1689677843524, "high_lr": 0.00036578947368421055, "low_lr": 7.315789473684212e-06, "step": 1205 }, { "epoch": 3.1689677843524, "high_lr": 0.00036578947368421055, "low_lr": 7.315789473684212e-06, "step": 1205 }, { "epoch": 3.1689677843524, "high_lr": 0.00036578947368421055, "low_lr": 7.315789473684212e-06, "step": 1205 }, { "epoch": 3.1689677843524, "high_lr": 0.00036578947368421055, "low_lr": 7.315789473684212e-06, "step": 1205 }, { "epoch": 3.1689677843524, "high_lr": 0.00036578947368421055, "low_lr": 7.315789473684212e-06, "step": 1205 }, { "epoch": 3.171597633136095, "grad_norm": 1.284883737564087, "learning_rate": 0.00036526315789473684, "loss": 1.3479, "step": 1206 }, { "epoch": 3.171597633136095, "high_lr": 0.00036526315789473684, "low_lr": 7.305263157894737e-06, "step": 1206 }, { "epoch": 3.171597633136095, "high_lr": 0.00036526315789473684, "low_lr": 7.305263157894737e-06, "step": 1206 }, { "epoch": 3.171597633136095, "high_lr": 0.00036526315789473684, "low_lr": 7.305263157894737e-06, "step": 1206 }, { "epoch": 3.171597633136095, "high_lr": 0.00036526315789473684, "low_lr": 7.305263157894737e-06, "step": 1206 }, { "epoch": 3.171597633136095, "high_lr": 0.00036526315789473684, "low_lr": 7.305263157894737e-06, "step": 1206 }, { "epoch": 3.171597633136095, "high_lr": 0.00036526315789473684, "low_lr": 7.305263157894737e-06, "step": 1206 }, { "epoch": 3.171597633136095, "high_lr": 0.00036526315789473684, "low_lr": 7.305263157894737e-06, "step": 1206 }, { "epoch": 3.171597633136095, "high_lr": 0.00036526315789473684, "low_lr": 7.305263157894737e-06, "step": 1206 }, { "epoch": 3.1742274819197895, "grad_norm": 1.364266037940979, "learning_rate": 0.0003647368421052631, "loss": 1.2961, "step": 1207 }, { "epoch": 3.1742274819197895, "high_lr": 0.0003647368421052631, "low_lr": 7.2947368421052636e-06, "step": 1207 }, { "epoch": 3.1742274819197895, "high_lr": 0.0003647368421052631, "low_lr": 7.2947368421052636e-06, "step": 1207 }, { "epoch": 3.1742274819197895, "high_lr": 0.0003647368421052631, "low_lr": 7.2947368421052636e-06, "step": 1207 }, { "epoch": 3.1742274819197895, "high_lr": 0.0003647368421052631, "low_lr": 7.2947368421052636e-06, "step": 1207 }, { "epoch": 3.1742274819197895, "high_lr": 0.0003647368421052631, "low_lr": 7.2947368421052636e-06, "step": 1207 }, { "epoch": 3.1742274819197895, "high_lr": 0.0003647368421052631, "low_lr": 7.2947368421052636e-06, "step": 1207 }, { "epoch": 3.1742274819197895, "high_lr": 0.0003647368421052631, "low_lr": 7.2947368421052636e-06, "step": 1207 }, { "epoch": 3.1742274819197895, "high_lr": 0.0003647368421052631, "low_lr": 7.2947368421052636e-06, "step": 1207 }, { "epoch": 3.1768573307034846, "grad_norm": 1.5872355699539185, "learning_rate": 0.0003642105263157895, "loss": 1.2343, "step": 1208 }, { "epoch": 3.1768573307034846, "high_lr": 0.0003642105263157895, "low_lr": 7.28421052631579e-06, "step": 1208 }, { "epoch": 3.1768573307034846, "high_lr": 0.0003642105263157895, "low_lr": 7.28421052631579e-06, "step": 1208 }, { "epoch": 3.1768573307034846, "high_lr": 0.0003642105263157895, "low_lr": 7.28421052631579e-06, "step": 1208 }, { "epoch": 3.1768573307034846, "high_lr": 0.0003642105263157895, "low_lr": 7.28421052631579e-06, "step": 1208 }, { "epoch": 3.1768573307034846, "high_lr": 0.0003642105263157895, "low_lr": 7.28421052631579e-06, "step": 1208 }, { "epoch": 3.1768573307034846, "high_lr": 0.0003642105263157895, "low_lr": 7.28421052631579e-06, "step": 1208 }, { "epoch": 3.1768573307034846, "high_lr": 0.0003642105263157895, "low_lr": 7.28421052631579e-06, "step": 1208 }, { "epoch": 3.1768573307034846, "high_lr": 0.0003642105263157895, "low_lr": 7.28421052631579e-06, "step": 1208 }, { "epoch": 3.1794871794871793, "grad_norm": 1.3258522748947144, "learning_rate": 0.0003636842105263158, "loss": 1.2877, "step": 1209 }, { "epoch": 3.1794871794871793, "high_lr": 0.0003636842105263158, "low_lr": 7.273684210526316e-06, "step": 1209 }, { "epoch": 3.1794871794871793, "high_lr": 0.0003636842105263158, "low_lr": 7.273684210526316e-06, "step": 1209 }, { "epoch": 3.1794871794871793, "high_lr": 0.0003636842105263158, "low_lr": 7.273684210526316e-06, "step": 1209 }, { "epoch": 3.1794871794871793, "high_lr": 0.0003636842105263158, "low_lr": 7.273684210526316e-06, "step": 1209 }, { "epoch": 3.1794871794871793, "high_lr": 0.0003636842105263158, "low_lr": 7.273684210526316e-06, "step": 1209 }, { "epoch": 3.1794871794871793, "high_lr": 0.0003636842105263158, "low_lr": 7.273684210526316e-06, "step": 1209 }, { "epoch": 3.1794871794871793, "high_lr": 0.0003636842105263158, "low_lr": 7.273684210526316e-06, "step": 1209 }, { "epoch": 3.1794871794871793, "high_lr": 0.0003636842105263158, "low_lr": 7.273684210526316e-06, "step": 1209 }, { "epoch": 3.1821170282708744, "grad_norm": 1.4260060787200928, "learning_rate": 0.00036315789473684214, "loss": 1.3165, "step": 1210 }, { "epoch": 3.1821170282708744, "high_lr": 0.00036315789473684214, "low_lr": 7.263157894736843e-06, "step": 1210 }, { "epoch": 3.1821170282708744, "high_lr": 0.00036315789473684214, "low_lr": 7.263157894736843e-06, "step": 1210 }, { "epoch": 3.1821170282708744, "high_lr": 0.00036315789473684214, "low_lr": 7.263157894736843e-06, "step": 1210 }, { "epoch": 3.1821170282708744, "high_lr": 0.00036315789473684214, "low_lr": 7.263157894736843e-06, "step": 1210 }, { "epoch": 3.1821170282708744, "high_lr": 0.00036315789473684214, "low_lr": 7.263157894736843e-06, "step": 1210 }, { "epoch": 3.1821170282708744, "high_lr": 0.00036315789473684214, "low_lr": 7.263157894736843e-06, "step": 1210 }, { "epoch": 3.1821170282708744, "high_lr": 0.00036315789473684214, "low_lr": 7.263157894736843e-06, "step": 1210 }, { "epoch": 3.1821170282708744, "high_lr": 0.00036315789473684214, "low_lr": 7.263157894736843e-06, "step": 1210 }, { "epoch": 3.1847468770545695, "grad_norm": 1.4013381004333496, "learning_rate": 0.00036263157894736843, "loss": 1.3305, "step": 1211 }, { "epoch": 3.1847468770545695, "high_lr": 0.00036263157894736843, "low_lr": 7.252631578947369e-06, "step": 1211 }, { "epoch": 3.1847468770545695, "high_lr": 0.00036263157894736843, "low_lr": 7.252631578947369e-06, "step": 1211 }, { "epoch": 3.1847468770545695, "high_lr": 0.00036263157894736843, "low_lr": 7.252631578947369e-06, "step": 1211 }, { "epoch": 3.1847468770545695, "high_lr": 0.00036263157894736843, "low_lr": 7.252631578947369e-06, "step": 1211 }, { "epoch": 3.1847468770545695, "high_lr": 0.00036263157894736843, "low_lr": 7.252631578947369e-06, "step": 1211 }, { "epoch": 3.1847468770545695, "high_lr": 0.00036263157894736843, "low_lr": 7.252631578947369e-06, "step": 1211 }, { "epoch": 3.1847468770545695, "high_lr": 0.00036263157894736843, "low_lr": 7.252631578947369e-06, "step": 1211 }, { "epoch": 3.1847468770545695, "high_lr": 0.00036263157894736843, "low_lr": 7.252631578947369e-06, "step": 1211 }, { "epoch": 3.187376725838264, "grad_norm": 1.4238418340682983, "learning_rate": 0.00036210526315789477, "loss": 1.3362, "step": 1212 }, { "epoch": 3.187376725838264, "high_lr": 0.00036210526315789477, "low_lr": 7.242105263157896e-06, "step": 1212 }, { "epoch": 3.187376725838264, "high_lr": 0.00036210526315789477, "low_lr": 7.242105263157896e-06, "step": 1212 }, { "epoch": 3.187376725838264, "high_lr": 0.00036210526315789477, "low_lr": 7.242105263157896e-06, "step": 1212 }, { "epoch": 3.187376725838264, "high_lr": 0.00036210526315789477, "low_lr": 7.242105263157896e-06, "step": 1212 }, { "epoch": 3.187376725838264, "high_lr": 0.00036210526315789477, "low_lr": 7.242105263157896e-06, "step": 1212 }, { "epoch": 3.187376725838264, "high_lr": 0.00036210526315789477, "low_lr": 7.242105263157896e-06, "step": 1212 }, { "epoch": 3.187376725838264, "high_lr": 0.00036210526315789477, "low_lr": 7.242105263157896e-06, "step": 1212 }, { "epoch": 3.187376725838264, "high_lr": 0.00036210526315789477, "low_lr": 7.242105263157896e-06, "step": 1212 }, { "epoch": 3.1900065746219592, "grad_norm": 1.3276677131652832, "learning_rate": 0.00036157894736842106, "loss": 1.2812, "step": 1213 }, { "epoch": 3.1900065746219592, "high_lr": 0.00036157894736842106, "low_lr": 7.2315789473684215e-06, "step": 1213 }, { "epoch": 3.1900065746219592, "high_lr": 0.00036157894736842106, "low_lr": 7.2315789473684215e-06, "step": 1213 }, { "epoch": 3.1900065746219592, "high_lr": 0.00036157894736842106, "low_lr": 7.2315789473684215e-06, "step": 1213 }, { "epoch": 3.1900065746219592, "high_lr": 0.00036157894736842106, "low_lr": 7.2315789473684215e-06, "step": 1213 }, { "epoch": 3.1900065746219592, "high_lr": 0.00036157894736842106, "low_lr": 7.2315789473684215e-06, "step": 1213 }, { "epoch": 3.1900065746219592, "high_lr": 0.00036157894736842106, "low_lr": 7.2315789473684215e-06, "step": 1213 }, { "epoch": 3.1900065746219592, "high_lr": 0.00036157894736842106, "low_lr": 7.2315789473684215e-06, "step": 1213 }, { "epoch": 3.1900065746219592, "high_lr": 0.00036157894736842106, "low_lr": 7.2315789473684215e-06, "step": 1213 }, { "epoch": 3.1926364234056543, "grad_norm": 1.320291519165039, "learning_rate": 0.00036105263157894734, "loss": 1.3383, "step": 1214 }, { "epoch": 3.1926364234056543, "high_lr": 0.00036105263157894734, "low_lr": 7.221052631578948e-06, "step": 1214 }, { "epoch": 3.1926364234056543, "high_lr": 0.00036105263157894734, "low_lr": 7.221052631578948e-06, "step": 1214 }, { "epoch": 3.1926364234056543, "high_lr": 0.00036105263157894734, "low_lr": 7.221052631578948e-06, "step": 1214 }, { "epoch": 3.1926364234056543, "high_lr": 0.00036105263157894734, "low_lr": 7.221052631578948e-06, "step": 1214 }, { "epoch": 3.1926364234056543, "high_lr": 0.00036105263157894734, "low_lr": 7.221052631578948e-06, "step": 1214 }, { "epoch": 3.1926364234056543, "high_lr": 0.00036105263157894734, "low_lr": 7.221052631578948e-06, "step": 1214 }, { "epoch": 3.1926364234056543, "high_lr": 0.00036105263157894734, "low_lr": 7.221052631578948e-06, "step": 1214 }, { "epoch": 3.1926364234056543, "high_lr": 0.00036105263157894734, "low_lr": 7.221052631578948e-06, "step": 1214 }, { "epoch": 3.195266272189349, "grad_norm": 1.5130553245544434, "learning_rate": 0.0003605263157894737, "loss": 1.3628, "step": 1215 }, { "epoch": 3.195266272189349, "high_lr": 0.0003605263157894737, "low_lr": 7.210526315789474e-06, "step": 1215 }, { "epoch": 3.195266272189349, "high_lr": 0.0003605263157894737, "low_lr": 7.210526315789474e-06, "step": 1215 }, { "epoch": 3.195266272189349, "high_lr": 0.0003605263157894737, "low_lr": 7.210526315789474e-06, "step": 1215 }, { "epoch": 3.195266272189349, "high_lr": 0.0003605263157894737, "low_lr": 7.210526315789474e-06, "step": 1215 }, { "epoch": 3.195266272189349, "high_lr": 0.0003605263157894737, "low_lr": 7.210526315789474e-06, "step": 1215 }, { "epoch": 3.195266272189349, "high_lr": 0.0003605263157894737, "low_lr": 7.210526315789474e-06, "step": 1215 }, { "epoch": 3.195266272189349, "high_lr": 0.0003605263157894737, "low_lr": 7.210526315789474e-06, "step": 1215 }, { "epoch": 3.195266272189349, "high_lr": 0.0003605263157894737, "low_lr": 7.210526315789474e-06, "step": 1215 }, { "epoch": 3.197896120973044, "grad_norm": 1.4092506170272827, "learning_rate": 0.00035999999999999997, "loss": 1.3085, "step": 1216 }, { "epoch": 3.197896120973044, "high_lr": 0.00035999999999999997, "low_lr": 7.2000000000000005e-06, "step": 1216 }, { "epoch": 3.197896120973044, "high_lr": 0.00035999999999999997, "low_lr": 7.2000000000000005e-06, "step": 1216 }, { "epoch": 3.197896120973044, "high_lr": 0.00035999999999999997, "low_lr": 7.2000000000000005e-06, "step": 1216 }, { "epoch": 3.197896120973044, "high_lr": 0.00035999999999999997, "low_lr": 7.2000000000000005e-06, "step": 1216 }, { "epoch": 3.197896120973044, "high_lr": 0.00035999999999999997, "low_lr": 7.2000000000000005e-06, "step": 1216 }, { "epoch": 3.197896120973044, "high_lr": 0.00035999999999999997, "low_lr": 7.2000000000000005e-06, "step": 1216 }, { "epoch": 3.197896120973044, "high_lr": 0.00035999999999999997, "low_lr": 7.2000000000000005e-06, "step": 1216 }, { "epoch": 3.197896120973044, "high_lr": 0.00035999999999999997, "low_lr": 7.2000000000000005e-06, "step": 1216 }, { "epoch": 3.2005259697567388, "grad_norm": 1.3080147504806519, "learning_rate": 0.00035947368421052636, "loss": 1.2738, "step": 1217 }, { "epoch": 3.2005259697567388, "high_lr": 0.00035947368421052636, "low_lr": 7.189473684210527e-06, "step": 1217 }, { "epoch": 3.2005259697567388, "high_lr": 0.00035947368421052636, "low_lr": 7.189473684210527e-06, "step": 1217 }, { "epoch": 3.2005259697567388, "high_lr": 0.00035947368421052636, "low_lr": 7.189473684210527e-06, "step": 1217 }, { "epoch": 3.2005259697567388, "high_lr": 0.00035947368421052636, "low_lr": 7.189473684210527e-06, "step": 1217 }, { "epoch": 3.2005259697567388, "high_lr": 0.00035947368421052636, "low_lr": 7.189473684210527e-06, "step": 1217 }, { "epoch": 3.2005259697567388, "high_lr": 0.00035947368421052636, "low_lr": 7.189473684210527e-06, "step": 1217 }, { "epoch": 3.2005259697567388, "high_lr": 0.00035947368421052636, "low_lr": 7.189473684210527e-06, "step": 1217 }, { "epoch": 3.2005259697567388, "high_lr": 0.00035947368421052636, "low_lr": 7.189473684210527e-06, "step": 1217 }, { "epoch": 3.203155818540434, "grad_norm": 1.2631381750106812, "learning_rate": 0.00035894736842105265, "loss": 1.3021, "step": 1218 }, { "epoch": 3.203155818540434, "high_lr": 0.00035894736842105265, "low_lr": 7.178947368421053e-06, "step": 1218 }, { "epoch": 3.203155818540434, "high_lr": 0.00035894736842105265, "low_lr": 7.178947368421053e-06, "step": 1218 }, { "epoch": 3.203155818540434, "high_lr": 0.00035894736842105265, "low_lr": 7.178947368421053e-06, "step": 1218 }, { "epoch": 3.203155818540434, "high_lr": 0.00035894736842105265, "low_lr": 7.178947368421053e-06, "step": 1218 }, { "epoch": 3.203155818540434, "high_lr": 0.00035894736842105265, "low_lr": 7.178947368421053e-06, "step": 1218 }, { "epoch": 3.203155818540434, "high_lr": 0.00035894736842105265, "low_lr": 7.178947368421053e-06, "step": 1218 }, { "epoch": 3.203155818540434, "high_lr": 0.00035894736842105265, "low_lr": 7.178947368421053e-06, "step": 1218 }, { "epoch": 3.203155818540434, "high_lr": 0.00035894736842105265, "low_lr": 7.178947368421053e-06, "step": 1218 }, { "epoch": 3.205785667324129, "grad_norm": 1.3651789426803589, "learning_rate": 0.000358421052631579, "loss": 1.323, "step": 1219 }, { "epoch": 3.205785667324129, "high_lr": 0.000358421052631579, "low_lr": 7.16842105263158e-06, "step": 1219 }, { "epoch": 3.205785667324129, "high_lr": 0.000358421052631579, "low_lr": 7.16842105263158e-06, "step": 1219 }, { "epoch": 3.205785667324129, "high_lr": 0.000358421052631579, "low_lr": 7.16842105263158e-06, "step": 1219 }, { "epoch": 3.205785667324129, "high_lr": 0.000358421052631579, "low_lr": 7.16842105263158e-06, "step": 1219 }, { "epoch": 3.205785667324129, "high_lr": 0.000358421052631579, "low_lr": 7.16842105263158e-06, "step": 1219 }, { "epoch": 3.205785667324129, "high_lr": 0.000358421052631579, "low_lr": 7.16842105263158e-06, "step": 1219 }, { "epoch": 3.205785667324129, "high_lr": 0.000358421052631579, "low_lr": 7.16842105263158e-06, "step": 1219 }, { "epoch": 3.205785667324129, "high_lr": 0.000358421052631579, "low_lr": 7.16842105263158e-06, "step": 1219 }, { "epoch": 3.2084155161078236, "grad_norm": 1.3125711679458618, "learning_rate": 0.0003578947368421053, "loss": 1.3265, "step": 1220 }, { "epoch": 3.2084155161078236, "high_lr": 0.0003578947368421053, "low_lr": 7.157894736842106e-06, "step": 1220 }, { "epoch": 3.2084155161078236, "high_lr": 0.0003578947368421053, "low_lr": 7.157894736842106e-06, "step": 1220 }, { "epoch": 3.2084155161078236, "high_lr": 0.0003578947368421053, "low_lr": 7.157894736842106e-06, "step": 1220 }, { "epoch": 3.2084155161078236, "high_lr": 0.0003578947368421053, "low_lr": 7.157894736842106e-06, "step": 1220 }, { "epoch": 3.2084155161078236, "high_lr": 0.0003578947368421053, "low_lr": 7.157894736842106e-06, "step": 1220 }, { "epoch": 3.2084155161078236, "high_lr": 0.0003578947368421053, "low_lr": 7.157894736842106e-06, "step": 1220 }, { "epoch": 3.2084155161078236, "high_lr": 0.0003578947368421053, "low_lr": 7.157894736842106e-06, "step": 1220 }, { "epoch": 3.2084155161078236, "high_lr": 0.0003578947368421053, "low_lr": 7.157894736842106e-06, "step": 1220 }, { "epoch": 3.2110453648915187, "grad_norm": 1.3207658529281616, "learning_rate": 0.00035736842105263156, "loss": 1.2898, "step": 1221 }, { "epoch": 3.2110453648915187, "high_lr": 0.00035736842105263156, "low_lr": 7.147368421052631e-06, "step": 1221 }, { "epoch": 3.2110453648915187, "high_lr": 0.00035736842105263156, "low_lr": 7.147368421052631e-06, "step": 1221 }, { "epoch": 3.2110453648915187, "high_lr": 0.00035736842105263156, "low_lr": 7.147368421052631e-06, "step": 1221 }, { "epoch": 3.2110453648915187, "high_lr": 0.00035736842105263156, "low_lr": 7.147368421052631e-06, "step": 1221 }, { "epoch": 3.2110453648915187, "high_lr": 0.00035736842105263156, "low_lr": 7.147368421052631e-06, "step": 1221 }, { "epoch": 3.2110453648915187, "high_lr": 0.00035736842105263156, "low_lr": 7.147368421052631e-06, "step": 1221 }, { "epoch": 3.2110453648915187, "high_lr": 0.00035736842105263156, "low_lr": 7.147368421052631e-06, "step": 1221 }, { "epoch": 3.2110453648915187, "high_lr": 0.00035736842105263156, "low_lr": 7.147368421052631e-06, "step": 1221 }, { "epoch": 3.213675213675214, "grad_norm": 1.4371613264083862, "learning_rate": 0.0003568421052631579, "loss": 1.3598, "step": 1222 }, { "epoch": 3.213675213675214, "high_lr": 0.0003568421052631579, "low_lr": 7.1368421052631585e-06, "step": 1222 }, { "epoch": 3.213675213675214, "high_lr": 0.0003568421052631579, "low_lr": 7.1368421052631585e-06, "step": 1222 }, { "epoch": 3.213675213675214, "high_lr": 0.0003568421052631579, "low_lr": 7.1368421052631585e-06, "step": 1222 }, { "epoch": 3.213675213675214, "high_lr": 0.0003568421052631579, "low_lr": 7.1368421052631585e-06, "step": 1222 }, { "epoch": 3.213675213675214, "high_lr": 0.0003568421052631579, "low_lr": 7.1368421052631585e-06, "step": 1222 }, { "epoch": 3.213675213675214, "high_lr": 0.0003568421052631579, "low_lr": 7.1368421052631585e-06, "step": 1222 }, { "epoch": 3.213675213675214, "high_lr": 0.0003568421052631579, "low_lr": 7.1368421052631585e-06, "step": 1222 }, { "epoch": 3.213675213675214, "high_lr": 0.0003568421052631579, "low_lr": 7.1368421052631585e-06, "step": 1222 }, { "epoch": 3.2163050624589085, "grad_norm": 1.6412293910980225, "learning_rate": 0.0003563157894736842, "loss": 1.2523, "step": 1223 }, { "epoch": 3.2163050624589085, "high_lr": 0.0003563157894736842, "low_lr": 7.126315789473685e-06, "step": 1223 }, { "epoch": 3.2163050624589085, "high_lr": 0.0003563157894736842, "low_lr": 7.126315789473685e-06, "step": 1223 }, { "epoch": 3.2163050624589085, "high_lr": 0.0003563157894736842, "low_lr": 7.126315789473685e-06, "step": 1223 }, { "epoch": 3.2163050624589085, "high_lr": 0.0003563157894736842, "low_lr": 7.126315789473685e-06, "step": 1223 }, { "epoch": 3.2163050624589085, "high_lr": 0.0003563157894736842, "low_lr": 7.126315789473685e-06, "step": 1223 }, { "epoch": 3.2163050624589085, "high_lr": 0.0003563157894736842, "low_lr": 7.126315789473685e-06, "step": 1223 }, { "epoch": 3.2163050624589085, "high_lr": 0.0003563157894736842, "low_lr": 7.126315789473685e-06, "step": 1223 }, { "epoch": 3.2163050624589085, "high_lr": 0.0003563157894736842, "low_lr": 7.126315789473685e-06, "step": 1223 }, { "epoch": 3.2189349112426036, "grad_norm": 1.2812743186950684, "learning_rate": 0.0003557894736842105, "loss": 1.3104, "step": 1224 }, { "epoch": 3.2189349112426036, "high_lr": 0.0003557894736842105, "low_lr": 7.115789473684211e-06, "step": 1224 }, { "epoch": 3.2189349112426036, "high_lr": 0.0003557894736842105, "low_lr": 7.115789473684211e-06, "step": 1224 }, { "epoch": 3.2189349112426036, "high_lr": 0.0003557894736842105, "low_lr": 7.115789473684211e-06, "step": 1224 }, { "epoch": 3.2189349112426036, "high_lr": 0.0003557894736842105, "low_lr": 7.115789473684211e-06, "step": 1224 }, { "epoch": 3.2189349112426036, "high_lr": 0.0003557894736842105, "low_lr": 7.115789473684211e-06, "step": 1224 }, { "epoch": 3.2189349112426036, "high_lr": 0.0003557894736842105, "low_lr": 7.115789473684211e-06, "step": 1224 }, { "epoch": 3.2189349112426036, "high_lr": 0.0003557894736842105, "low_lr": 7.115789473684211e-06, "step": 1224 }, { "epoch": 3.2189349112426036, "high_lr": 0.0003557894736842105, "low_lr": 7.115789473684211e-06, "step": 1224 }, { "epoch": 3.2215647600262987, "grad_norm": 1.2608232498168945, "learning_rate": 0.00035526315789473687, "loss": 1.2855, "step": 1225 }, { "epoch": 3.2215647600262987, "high_lr": 0.00035526315789473687, "low_lr": 7.1052631578947375e-06, "step": 1225 }, { "epoch": 3.2215647600262987, "high_lr": 0.00035526315789473687, "low_lr": 7.1052631578947375e-06, "step": 1225 }, { "epoch": 3.2215647600262987, "high_lr": 0.00035526315789473687, "low_lr": 7.1052631578947375e-06, "step": 1225 }, { "epoch": 3.2215647600262987, "high_lr": 0.00035526315789473687, "low_lr": 7.1052631578947375e-06, "step": 1225 }, { "epoch": 3.2215647600262987, "high_lr": 0.00035526315789473687, "low_lr": 7.1052631578947375e-06, "step": 1225 }, { "epoch": 3.2215647600262987, "high_lr": 0.00035526315789473687, "low_lr": 7.1052631578947375e-06, "step": 1225 }, { "epoch": 3.2215647600262987, "high_lr": 0.00035526315789473687, "low_lr": 7.1052631578947375e-06, "step": 1225 }, { "epoch": 3.2215647600262987, "high_lr": 0.00035526315789473687, "low_lr": 7.1052631578947375e-06, "step": 1225 }, { "epoch": 3.2241946088099933, "grad_norm": 1.4294136762619019, "learning_rate": 0.0003547368421052632, "loss": 1.288, "step": 1226 }, { "epoch": 3.2241946088099933, "high_lr": 0.0003547368421052632, "low_lr": 7.094736842105265e-06, "step": 1226 }, { "epoch": 3.2241946088099933, "high_lr": 0.0003547368421052632, "low_lr": 7.094736842105265e-06, "step": 1226 }, { "epoch": 3.2241946088099933, "high_lr": 0.0003547368421052632, "low_lr": 7.094736842105265e-06, "step": 1226 }, { "epoch": 3.2241946088099933, "high_lr": 0.0003547368421052632, "low_lr": 7.094736842105265e-06, "step": 1226 }, { "epoch": 3.2241946088099933, "high_lr": 0.0003547368421052632, "low_lr": 7.094736842105265e-06, "step": 1226 }, { "epoch": 3.2241946088099933, "high_lr": 0.0003547368421052632, "low_lr": 7.094736842105265e-06, "step": 1226 }, { "epoch": 3.2241946088099933, "high_lr": 0.0003547368421052632, "low_lr": 7.094736842105265e-06, "step": 1226 }, { "epoch": 3.2241946088099933, "high_lr": 0.0003547368421052632, "low_lr": 7.094736842105265e-06, "step": 1226 }, { "epoch": 3.2268244575936884, "grad_norm": 1.3366351127624512, "learning_rate": 0.0003542105263157895, "loss": 1.2917, "step": 1227 }, { "epoch": 3.2268244575936884, "high_lr": 0.0003542105263157895, "low_lr": 7.08421052631579e-06, "step": 1227 }, { "epoch": 3.2268244575936884, "high_lr": 0.0003542105263157895, "low_lr": 7.08421052631579e-06, "step": 1227 }, { "epoch": 3.2268244575936884, "high_lr": 0.0003542105263157895, "low_lr": 7.08421052631579e-06, "step": 1227 }, { "epoch": 3.2268244575936884, "high_lr": 0.0003542105263157895, "low_lr": 7.08421052631579e-06, "step": 1227 }, { "epoch": 3.2268244575936884, "high_lr": 0.0003542105263157895, "low_lr": 7.08421052631579e-06, "step": 1227 }, { "epoch": 3.2268244575936884, "high_lr": 0.0003542105263157895, "low_lr": 7.08421052631579e-06, "step": 1227 }, { "epoch": 3.2268244575936884, "high_lr": 0.0003542105263157895, "low_lr": 7.08421052631579e-06, "step": 1227 }, { "epoch": 3.2268244575936884, "high_lr": 0.0003542105263157895, "low_lr": 7.08421052631579e-06, "step": 1227 }, { "epoch": 3.2294543063773835, "grad_norm": 1.504764199256897, "learning_rate": 0.0003536842105263158, "loss": 1.2717, "step": 1228 }, { "epoch": 3.2294543063773835, "high_lr": 0.0003536842105263158, "low_lr": 7.073684210526316e-06, "step": 1228 }, { "epoch": 3.2294543063773835, "high_lr": 0.0003536842105263158, "low_lr": 7.073684210526316e-06, "step": 1228 }, { "epoch": 3.2294543063773835, "high_lr": 0.0003536842105263158, "low_lr": 7.073684210526316e-06, "step": 1228 }, { "epoch": 3.2294543063773835, "high_lr": 0.0003536842105263158, "low_lr": 7.073684210526316e-06, "step": 1228 }, { "epoch": 3.2294543063773835, "high_lr": 0.0003536842105263158, "low_lr": 7.073684210526316e-06, "step": 1228 }, { "epoch": 3.2294543063773835, "high_lr": 0.0003536842105263158, "low_lr": 7.073684210526316e-06, "step": 1228 }, { "epoch": 3.2294543063773835, "high_lr": 0.0003536842105263158, "low_lr": 7.073684210526316e-06, "step": 1228 }, { "epoch": 3.2294543063773835, "high_lr": 0.0003536842105263158, "low_lr": 7.073684210526316e-06, "step": 1228 }, { "epoch": 3.232084155161078, "grad_norm": 1.5057621002197266, "learning_rate": 0.0003531578947368421, "loss": 1.3052, "step": 1229 }, { "epoch": 3.232084155161078, "high_lr": 0.0003531578947368421, "low_lr": 7.063157894736843e-06, "step": 1229 }, { "epoch": 3.232084155161078, "high_lr": 0.0003531578947368421, "low_lr": 7.063157894736843e-06, "step": 1229 }, { "epoch": 3.232084155161078, "high_lr": 0.0003531578947368421, "low_lr": 7.063157894736843e-06, "step": 1229 }, { "epoch": 3.232084155161078, "high_lr": 0.0003531578947368421, "low_lr": 7.063157894736843e-06, "step": 1229 }, { "epoch": 3.232084155161078, "high_lr": 0.0003531578947368421, "low_lr": 7.063157894736843e-06, "step": 1229 }, { "epoch": 3.232084155161078, "high_lr": 0.0003531578947368421, "low_lr": 7.063157894736843e-06, "step": 1229 }, { "epoch": 3.232084155161078, "high_lr": 0.0003531578947368421, "low_lr": 7.063157894736843e-06, "step": 1229 }, { "epoch": 3.232084155161078, "high_lr": 0.0003531578947368421, "low_lr": 7.063157894736843e-06, "step": 1229 }, { "epoch": 3.2347140039447733, "grad_norm": 1.4025167226791382, "learning_rate": 0.0003526315789473684, "loss": 1.2541, "step": 1230 }, { "epoch": 3.2347140039447733, "high_lr": 0.0003526315789473684, "low_lr": 7.052631578947369e-06, "step": 1230 }, { "epoch": 3.2347140039447733, "high_lr": 0.0003526315789473684, "low_lr": 7.052631578947369e-06, "step": 1230 }, { "epoch": 3.2347140039447733, "high_lr": 0.0003526315789473684, "low_lr": 7.052631578947369e-06, "step": 1230 }, { "epoch": 3.2347140039447733, "high_lr": 0.0003526315789473684, "low_lr": 7.052631578947369e-06, "step": 1230 }, { "epoch": 3.2347140039447733, "high_lr": 0.0003526315789473684, "low_lr": 7.052631578947369e-06, "step": 1230 }, { "epoch": 3.2347140039447733, "high_lr": 0.0003526315789473684, "low_lr": 7.052631578947369e-06, "step": 1230 }, { "epoch": 3.2347140039447733, "high_lr": 0.0003526315789473684, "low_lr": 7.052631578947369e-06, "step": 1230 }, { "epoch": 3.2347140039447733, "high_lr": 0.0003526315789473684, "low_lr": 7.052631578947369e-06, "step": 1230 }, { "epoch": 3.237343852728468, "grad_norm": 1.487691879272461, "learning_rate": 0.00035210526315789474, "loss": 1.2767, "step": 1231 }, { "epoch": 3.237343852728468, "high_lr": 0.00035210526315789474, "low_lr": 7.0421052631578954e-06, "step": 1231 }, { "epoch": 3.237343852728468, "high_lr": 0.00035210526315789474, "low_lr": 7.0421052631578954e-06, "step": 1231 }, { "epoch": 3.237343852728468, "high_lr": 0.00035210526315789474, "low_lr": 7.0421052631578954e-06, "step": 1231 }, { "epoch": 3.237343852728468, "high_lr": 0.00035210526315789474, "low_lr": 7.0421052631578954e-06, "step": 1231 }, { "epoch": 3.237343852728468, "high_lr": 0.00035210526315789474, "low_lr": 7.0421052631578954e-06, "step": 1231 }, { "epoch": 3.237343852728468, "high_lr": 0.00035210526315789474, "low_lr": 7.0421052631578954e-06, "step": 1231 }, { "epoch": 3.237343852728468, "high_lr": 0.00035210526315789474, "low_lr": 7.0421052631578954e-06, "step": 1231 }, { "epoch": 3.237343852728468, "high_lr": 0.00035210526315789474, "low_lr": 7.0421052631578954e-06, "step": 1231 }, { "epoch": 3.239973701512163, "grad_norm": 1.3063784837722778, "learning_rate": 0.00035157894736842103, "loss": 1.2583, "step": 1232 }, { "epoch": 3.239973701512163, "high_lr": 0.00035157894736842103, "low_lr": 7.031578947368422e-06, "step": 1232 }, { "epoch": 3.239973701512163, "high_lr": 0.00035157894736842103, "low_lr": 7.031578947368422e-06, "step": 1232 }, { "epoch": 3.239973701512163, "high_lr": 0.00035157894736842103, "low_lr": 7.031578947368422e-06, "step": 1232 }, { "epoch": 3.239973701512163, "high_lr": 0.00035157894736842103, "low_lr": 7.031578947368422e-06, "step": 1232 }, { "epoch": 3.239973701512163, "high_lr": 0.00035157894736842103, "low_lr": 7.031578947368422e-06, "step": 1232 }, { "epoch": 3.239973701512163, "high_lr": 0.00035157894736842103, "low_lr": 7.031578947368422e-06, "step": 1232 }, { "epoch": 3.239973701512163, "high_lr": 0.00035157894736842103, "low_lr": 7.031578947368422e-06, "step": 1232 }, { "epoch": 3.239973701512163, "high_lr": 0.00035157894736842103, "low_lr": 7.031578947368422e-06, "step": 1232 }, { "epoch": 3.242603550295858, "grad_norm": 1.5102909803390503, "learning_rate": 0.0003510526315789474, "loss": 1.3413, "step": 1233 }, { "epoch": 3.242603550295858, "high_lr": 0.0003510526315789474, "low_lr": 7.021052631578948e-06, "step": 1233 }, { "epoch": 3.242603550295858, "high_lr": 0.0003510526315789474, "low_lr": 7.021052631578948e-06, "step": 1233 }, { "epoch": 3.242603550295858, "high_lr": 0.0003510526315789474, "low_lr": 7.021052631578948e-06, "step": 1233 }, { "epoch": 3.242603550295858, "high_lr": 0.0003510526315789474, "low_lr": 7.021052631578948e-06, "step": 1233 }, { "epoch": 3.242603550295858, "high_lr": 0.0003510526315789474, "low_lr": 7.021052631578948e-06, "step": 1233 }, { "epoch": 3.242603550295858, "high_lr": 0.0003510526315789474, "low_lr": 7.021052631578948e-06, "step": 1233 }, { "epoch": 3.242603550295858, "high_lr": 0.0003510526315789474, "low_lr": 7.021052631578948e-06, "step": 1233 }, { "epoch": 3.242603550295858, "high_lr": 0.0003510526315789474, "low_lr": 7.021052631578948e-06, "step": 1233 }, { "epoch": 3.2452333990795528, "grad_norm": 1.4315178394317627, "learning_rate": 0.0003505263157894737, "loss": 1.3391, "step": 1234 }, { "epoch": 3.2452333990795528, "high_lr": 0.0003505263157894737, "low_lr": 7.010526315789474e-06, "step": 1234 }, { "epoch": 3.2452333990795528, "high_lr": 0.0003505263157894737, "low_lr": 7.010526315789474e-06, "step": 1234 }, { "epoch": 3.2452333990795528, "high_lr": 0.0003505263157894737, "low_lr": 7.010526315789474e-06, "step": 1234 }, { "epoch": 3.2452333990795528, "high_lr": 0.0003505263157894737, "low_lr": 7.010526315789474e-06, "step": 1234 }, { "epoch": 3.2452333990795528, "high_lr": 0.0003505263157894737, "low_lr": 7.010526315789474e-06, "step": 1234 }, { "epoch": 3.2452333990795528, "high_lr": 0.0003505263157894737, "low_lr": 7.010526315789474e-06, "step": 1234 }, { "epoch": 3.2452333990795528, "high_lr": 0.0003505263157894737, "low_lr": 7.010526315789474e-06, "step": 1234 }, { "epoch": 3.2452333990795528, "high_lr": 0.0003505263157894737, "low_lr": 7.010526315789474e-06, "step": 1234 }, { "epoch": 3.247863247863248, "grad_norm": 1.4550777673721313, "learning_rate": 0.00035, "loss": 1.2911, "step": 1235 }, { "epoch": 3.247863247863248, "high_lr": 0.00035, "low_lr": 7e-06, "step": 1235 }, { "epoch": 3.247863247863248, "high_lr": 0.00035, "low_lr": 7e-06, "step": 1235 }, { "epoch": 3.247863247863248, "high_lr": 0.00035, "low_lr": 7e-06, "step": 1235 }, { "epoch": 3.247863247863248, "high_lr": 0.00035, "low_lr": 7e-06, "step": 1235 }, { "epoch": 3.247863247863248, "high_lr": 0.00035, "low_lr": 7e-06, "step": 1235 }, { "epoch": 3.247863247863248, "high_lr": 0.00035, "low_lr": 7e-06, "step": 1235 }, { "epoch": 3.247863247863248, "high_lr": 0.00035, "low_lr": 7e-06, "step": 1235 }, { "epoch": 3.247863247863248, "high_lr": 0.00035, "low_lr": 7e-06, "step": 1235 }, { "epoch": 3.250493096646943, "grad_norm": 1.4469130039215088, "learning_rate": 0.00034947368421052634, "loss": 1.3573, "step": 1236 }, { "epoch": 3.250493096646943, "high_lr": 0.00034947368421052634, "low_lr": 6.989473684210527e-06, "step": 1236 }, { "epoch": 3.250493096646943, "high_lr": 0.00034947368421052634, "low_lr": 6.989473684210527e-06, "step": 1236 }, { "epoch": 3.250493096646943, "high_lr": 0.00034947368421052634, "low_lr": 6.989473684210527e-06, "step": 1236 }, { "epoch": 3.250493096646943, "high_lr": 0.00034947368421052634, "low_lr": 6.989473684210527e-06, "step": 1236 }, { "epoch": 3.250493096646943, "high_lr": 0.00034947368421052634, "low_lr": 6.989473684210527e-06, "step": 1236 }, { "epoch": 3.250493096646943, "high_lr": 0.00034947368421052634, "low_lr": 6.989473684210527e-06, "step": 1236 }, { "epoch": 3.250493096646943, "high_lr": 0.00034947368421052634, "low_lr": 6.989473684210527e-06, "step": 1236 }, { "epoch": 3.250493096646943, "high_lr": 0.00034947368421052634, "low_lr": 6.989473684210527e-06, "step": 1236 }, { "epoch": 3.2531229454306376, "grad_norm": 1.4454048871994019, "learning_rate": 0.0003489473684210526, "loss": 1.3024, "step": 1237 }, { "epoch": 3.2531229454306376, "high_lr": 0.0003489473684210526, "low_lr": 6.9789473684210525e-06, "step": 1237 }, { "epoch": 3.2531229454306376, "high_lr": 0.0003489473684210526, "low_lr": 6.9789473684210525e-06, "step": 1237 }, { "epoch": 3.2531229454306376, "high_lr": 0.0003489473684210526, "low_lr": 6.9789473684210525e-06, "step": 1237 }, { "epoch": 3.2531229454306376, "high_lr": 0.0003489473684210526, "low_lr": 6.9789473684210525e-06, "step": 1237 }, { "epoch": 3.2531229454306376, "high_lr": 0.0003489473684210526, "low_lr": 6.9789473684210525e-06, "step": 1237 }, { "epoch": 3.2531229454306376, "high_lr": 0.0003489473684210526, "low_lr": 6.9789473684210525e-06, "step": 1237 }, { "epoch": 3.2531229454306376, "high_lr": 0.0003489473684210526, "low_lr": 6.9789473684210525e-06, "step": 1237 }, { "epoch": 3.2531229454306376, "high_lr": 0.0003489473684210526, "low_lr": 6.9789473684210525e-06, "step": 1237 }, { "epoch": 3.2557527942143327, "grad_norm": 1.3726730346679688, "learning_rate": 0.00034842105263157896, "loss": 1.3433, "step": 1238 }, { "epoch": 3.2557527942143327, "high_lr": 0.00034842105263157896, "low_lr": 6.96842105263158e-06, "step": 1238 }, { "epoch": 3.2557527942143327, "high_lr": 0.00034842105263157896, "low_lr": 6.96842105263158e-06, "step": 1238 }, { "epoch": 3.2557527942143327, "high_lr": 0.00034842105263157896, "low_lr": 6.96842105263158e-06, "step": 1238 }, { "epoch": 3.2557527942143327, "high_lr": 0.00034842105263157896, "low_lr": 6.96842105263158e-06, "step": 1238 }, { "epoch": 3.2557527942143327, "high_lr": 0.00034842105263157896, "low_lr": 6.96842105263158e-06, "step": 1238 }, { "epoch": 3.2557527942143327, "high_lr": 0.00034842105263157896, "low_lr": 6.96842105263158e-06, "step": 1238 }, { "epoch": 3.2557527942143327, "high_lr": 0.00034842105263157896, "low_lr": 6.96842105263158e-06, "step": 1238 }, { "epoch": 3.2557527942143327, "high_lr": 0.00034842105263157896, "low_lr": 6.96842105263158e-06, "step": 1238 }, { "epoch": 3.2583826429980274, "grad_norm": 1.4009768962860107, "learning_rate": 0.00034789473684210525, "loss": 1.3129, "step": 1239 }, { "epoch": 3.2583826429980274, "high_lr": 0.00034789473684210525, "low_lr": 6.957894736842106e-06, "step": 1239 }, { "epoch": 3.2583826429980274, "high_lr": 0.00034789473684210525, "low_lr": 6.957894736842106e-06, "step": 1239 }, { "epoch": 3.2583826429980274, "high_lr": 0.00034789473684210525, "low_lr": 6.957894736842106e-06, "step": 1239 }, { "epoch": 3.2583826429980274, "high_lr": 0.00034789473684210525, "low_lr": 6.957894736842106e-06, "step": 1239 }, { "epoch": 3.2583826429980274, "high_lr": 0.00034789473684210525, "low_lr": 6.957894736842106e-06, "step": 1239 }, { "epoch": 3.2583826429980274, "high_lr": 0.00034789473684210525, "low_lr": 6.957894736842106e-06, "step": 1239 }, { "epoch": 3.2583826429980274, "high_lr": 0.00034789473684210525, "low_lr": 6.957894736842106e-06, "step": 1239 }, { "epoch": 3.2583826429980274, "high_lr": 0.00034789473684210525, "low_lr": 6.957894736842106e-06, "step": 1239 }, { "epoch": 3.2610124917817225, "grad_norm": 1.4004924297332764, "learning_rate": 0.0003473684210526316, "loss": 1.2838, "step": 1240 }, { "epoch": 3.2610124917817225, "high_lr": 0.0003473684210526316, "low_lr": 6.947368421052632e-06, "step": 1240 }, { "epoch": 3.2610124917817225, "high_lr": 0.0003473684210526316, "low_lr": 6.947368421052632e-06, "step": 1240 }, { "epoch": 3.2610124917817225, "high_lr": 0.0003473684210526316, "low_lr": 6.947368421052632e-06, "step": 1240 }, { "epoch": 3.2610124917817225, "high_lr": 0.0003473684210526316, "low_lr": 6.947368421052632e-06, "step": 1240 }, { "epoch": 3.2610124917817225, "high_lr": 0.0003473684210526316, "low_lr": 6.947368421052632e-06, "step": 1240 }, { "epoch": 3.2610124917817225, "high_lr": 0.0003473684210526316, "low_lr": 6.947368421052632e-06, "step": 1240 }, { "epoch": 3.2610124917817225, "high_lr": 0.0003473684210526316, "low_lr": 6.947368421052632e-06, "step": 1240 }, { "epoch": 3.2610124917817225, "high_lr": 0.0003473684210526316, "low_lr": 6.947368421052632e-06, "step": 1240 }, { "epoch": 3.2636423405654176, "grad_norm": 1.3671832084655762, "learning_rate": 0.00034684210526315793, "loss": 1.2908, "step": 1241 }, { "epoch": 3.2636423405654176, "high_lr": 0.00034684210526315793, "low_lr": 6.936842105263159e-06, "step": 1241 }, { "epoch": 3.2636423405654176, "high_lr": 0.00034684210526315793, "low_lr": 6.936842105263159e-06, "step": 1241 }, { "epoch": 3.2636423405654176, "high_lr": 0.00034684210526315793, "low_lr": 6.936842105263159e-06, "step": 1241 }, { "epoch": 3.2636423405654176, "high_lr": 0.00034684210526315793, "low_lr": 6.936842105263159e-06, "step": 1241 }, { "epoch": 3.2636423405654176, "high_lr": 0.00034684210526315793, "low_lr": 6.936842105263159e-06, "step": 1241 }, { "epoch": 3.2636423405654176, "high_lr": 0.00034684210526315793, "low_lr": 6.936842105263159e-06, "step": 1241 }, { "epoch": 3.2636423405654176, "high_lr": 0.00034684210526315793, "low_lr": 6.936842105263159e-06, "step": 1241 }, { "epoch": 3.2636423405654176, "high_lr": 0.00034684210526315793, "low_lr": 6.936842105263159e-06, "step": 1241 }, { "epoch": 3.2662721893491122, "grad_norm": 1.3692514896392822, "learning_rate": 0.0003463157894736842, "loss": 1.3547, "step": 1242 }, { "epoch": 3.2662721893491122, "high_lr": 0.0003463157894736842, "low_lr": 6.926315789473684e-06, "step": 1242 }, { "epoch": 3.2662721893491122, "high_lr": 0.0003463157894736842, "low_lr": 6.926315789473684e-06, "step": 1242 }, { "epoch": 3.2662721893491122, "high_lr": 0.0003463157894736842, "low_lr": 6.926315789473684e-06, "step": 1242 }, { "epoch": 3.2662721893491122, "high_lr": 0.0003463157894736842, "low_lr": 6.926315789473684e-06, "step": 1242 }, { "epoch": 3.2662721893491122, "high_lr": 0.0003463157894736842, "low_lr": 6.926315789473684e-06, "step": 1242 }, { "epoch": 3.2662721893491122, "high_lr": 0.0003463157894736842, "low_lr": 6.926315789473684e-06, "step": 1242 }, { "epoch": 3.2662721893491122, "high_lr": 0.0003463157894736842, "low_lr": 6.926315789473684e-06, "step": 1242 }, { "epoch": 3.2662721893491122, "high_lr": 0.0003463157894736842, "low_lr": 6.926315789473684e-06, "step": 1242 }, { "epoch": 3.2689020381328073, "grad_norm": 1.3831573724746704, "learning_rate": 0.00034578947368421055, "loss": 1.3195, "step": 1243 }, { "epoch": 3.2689020381328073, "high_lr": 0.00034578947368421055, "low_lr": 6.915789473684211e-06, "step": 1243 }, { "epoch": 3.2689020381328073, "high_lr": 0.00034578947368421055, "low_lr": 6.915789473684211e-06, "step": 1243 }, { "epoch": 3.2689020381328073, "high_lr": 0.00034578947368421055, "low_lr": 6.915789473684211e-06, "step": 1243 }, { "epoch": 3.2689020381328073, "high_lr": 0.00034578947368421055, "low_lr": 6.915789473684211e-06, "step": 1243 }, { "epoch": 3.2689020381328073, "high_lr": 0.00034578947368421055, "low_lr": 6.915789473684211e-06, "step": 1243 }, { "epoch": 3.2689020381328073, "high_lr": 0.00034578947368421055, "low_lr": 6.915789473684211e-06, "step": 1243 }, { "epoch": 3.2689020381328073, "high_lr": 0.00034578947368421055, "low_lr": 6.915789473684211e-06, "step": 1243 }, { "epoch": 3.2689020381328073, "high_lr": 0.00034578947368421055, "low_lr": 6.915789473684211e-06, "step": 1243 }, { "epoch": 3.2715318869165024, "grad_norm": 1.3399865627288818, "learning_rate": 0.00034526315789473684, "loss": 1.3231, "step": 1244 }, { "epoch": 3.2715318869165024, "high_lr": 0.00034526315789473684, "low_lr": 6.905263157894737e-06, "step": 1244 }, { "epoch": 3.2715318869165024, "high_lr": 0.00034526315789473684, "low_lr": 6.905263157894737e-06, "step": 1244 }, { "epoch": 3.2715318869165024, "high_lr": 0.00034526315789473684, "low_lr": 6.905263157894737e-06, "step": 1244 }, { "epoch": 3.2715318869165024, "high_lr": 0.00034526315789473684, "low_lr": 6.905263157894737e-06, "step": 1244 }, { "epoch": 3.2715318869165024, "high_lr": 0.00034526315789473684, "low_lr": 6.905263157894737e-06, "step": 1244 }, { "epoch": 3.2715318869165024, "high_lr": 0.00034526315789473684, "low_lr": 6.905263157894737e-06, "step": 1244 }, { "epoch": 3.2715318869165024, "high_lr": 0.00034526315789473684, "low_lr": 6.905263157894737e-06, "step": 1244 }, { "epoch": 3.2715318869165024, "high_lr": 0.00034526315789473684, "low_lr": 6.905263157894737e-06, "step": 1244 }, { "epoch": 3.274161735700197, "grad_norm": 1.346484661102295, "learning_rate": 0.0003447368421052632, "loss": 1.2596, "step": 1245 }, { "epoch": 3.274161735700197, "high_lr": 0.0003447368421052632, "low_lr": 6.894736842105264e-06, "step": 1245 }, { "epoch": 3.274161735700197, "high_lr": 0.0003447368421052632, "low_lr": 6.894736842105264e-06, "step": 1245 }, { "epoch": 3.274161735700197, "high_lr": 0.0003447368421052632, "low_lr": 6.894736842105264e-06, "step": 1245 }, { "epoch": 3.274161735700197, "high_lr": 0.0003447368421052632, "low_lr": 6.894736842105264e-06, "step": 1245 }, { "epoch": 3.274161735700197, "high_lr": 0.0003447368421052632, "low_lr": 6.894736842105264e-06, "step": 1245 }, { "epoch": 3.274161735700197, "high_lr": 0.0003447368421052632, "low_lr": 6.894736842105264e-06, "step": 1245 }, { "epoch": 3.274161735700197, "high_lr": 0.0003447368421052632, "low_lr": 6.894736842105264e-06, "step": 1245 }, { "epoch": 3.274161735700197, "high_lr": 0.0003447368421052632, "low_lr": 6.894736842105264e-06, "step": 1245 }, { "epoch": 3.276791584483892, "grad_norm": 1.3539178371429443, "learning_rate": 0.00034421052631578947, "loss": 1.3023, "step": 1246 }, { "epoch": 3.276791584483892, "high_lr": 0.00034421052631578947, "low_lr": 6.8842105263157895e-06, "step": 1246 }, { "epoch": 3.276791584483892, "high_lr": 0.00034421052631578947, "low_lr": 6.8842105263157895e-06, "step": 1246 }, { "epoch": 3.276791584483892, "high_lr": 0.00034421052631578947, "low_lr": 6.8842105263157895e-06, "step": 1246 }, { "epoch": 3.276791584483892, "high_lr": 0.00034421052631578947, "low_lr": 6.8842105263157895e-06, "step": 1246 }, { "epoch": 3.276791584483892, "high_lr": 0.00034421052631578947, "low_lr": 6.8842105263157895e-06, "step": 1246 }, { "epoch": 3.276791584483892, "high_lr": 0.00034421052631578947, "low_lr": 6.8842105263157895e-06, "step": 1246 }, { "epoch": 3.276791584483892, "high_lr": 0.00034421052631578947, "low_lr": 6.8842105263157895e-06, "step": 1246 }, { "epoch": 3.276791584483892, "high_lr": 0.00034421052631578947, "low_lr": 6.8842105263157895e-06, "step": 1246 }, { "epoch": 3.2794214332675873, "grad_norm": 1.3168984651565552, "learning_rate": 0.0003436842105263158, "loss": 1.3064, "step": 1247 }, { "epoch": 3.2794214332675873, "high_lr": 0.0003436842105263158, "low_lr": 6.873684210526317e-06, "step": 1247 }, { "epoch": 3.2794214332675873, "high_lr": 0.0003436842105263158, "low_lr": 6.873684210526317e-06, "step": 1247 }, { "epoch": 3.2794214332675873, "high_lr": 0.0003436842105263158, "low_lr": 6.873684210526317e-06, "step": 1247 }, { "epoch": 3.2794214332675873, "high_lr": 0.0003436842105263158, "low_lr": 6.873684210526317e-06, "step": 1247 }, { "epoch": 3.2794214332675873, "high_lr": 0.0003436842105263158, "low_lr": 6.873684210526317e-06, "step": 1247 }, { "epoch": 3.2794214332675873, "high_lr": 0.0003436842105263158, "low_lr": 6.873684210526317e-06, "step": 1247 }, { "epoch": 3.2794214332675873, "high_lr": 0.0003436842105263158, "low_lr": 6.873684210526317e-06, "step": 1247 }, { "epoch": 3.2794214332675873, "high_lr": 0.0003436842105263158, "low_lr": 6.873684210526317e-06, "step": 1247 }, { "epoch": 3.282051282051282, "grad_norm": 1.372035264968872, "learning_rate": 0.0003431578947368421, "loss": 1.3217, "step": 1248 }, { "epoch": 3.282051282051282, "high_lr": 0.0003431578947368421, "low_lr": 6.863157894736843e-06, "step": 1248 }, { "epoch": 3.282051282051282, "high_lr": 0.0003431578947368421, "low_lr": 6.863157894736843e-06, "step": 1248 }, { "epoch": 3.282051282051282, "high_lr": 0.0003431578947368421, "low_lr": 6.863157894736843e-06, "step": 1248 }, { "epoch": 3.282051282051282, "high_lr": 0.0003431578947368421, "low_lr": 6.863157894736843e-06, "step": 1248 }, { "epoch": 3.282051282051282, "high_lr": 0.0003431578947368421, "low_lr": 6.863157894736843e-06, "step": 1248 }, { "epoch": 3.282051282051282, "high_lr": 0.0003431578947368421, "low_lr": 6.863157894736843e-06, "step": 1248 }, { "epoch": 3.282051282051282, "high_lr": 0.0003431578947368421, "low_lr": 6.863157894736843e-06, "step": 1248 }, { "epoch": 3.282051282051282, "high_lr": 0.0003431578947368421, "low_lr": 6.863157894736843e-06, "step": 1248 }, { "epoch": 3.284681130834977, "grad_norm": 1.3251991271972656, "learning_rate": 0.0003426315789473684, "loss": 1.3082, "step": 1249 }, { "epoch": 3.284681130834977, "high_lr": 0.0003426315789473684, "low_lr": 6.8526315789473685e-06, "step": 1249 }, { "epoch": 3.284681130834977, "high_lr": 0.0003426315789473684, "low_lr": 6.8526315789473685e-06, "step": 1249 }, { "epoch": 3.284681130834977, "high_lr": 0.0003426315789473684, "low_lr": 6.8526315789473685e-06, "step": 1249 }, { "epoch": 3.284681130834977, "high_lr": 0.0003426315789473684, "low_lr": 6.8526315789473685e-06, "step": 1249 }, { "epoch": 3.284681130834977, "high_lr": 0.0003426315789473684, "low_lr": 6.8526315789473685e-06, "step": 1249 }, { "epoch": 3.284681130834977, "high_lr": 0.0003426315789473684, "low_lr": 6.8526315789473685e-06, "step": 1249 }, { "epoch": 3.284681130834977, "high_lr": 0.0003426315789473684, "low_lr": 6.8526315789473685e-06, "step": 1249 }, { "epoch": 3.284681130834977, "high_lr": 0.0003426315789473684, "low_lr": 6.8526315789473685e-06, "step": 1249 }, { "epoch": 3.287310979618672, "grad_norm": 1.2727844715118408, "learning_rate": 0.00034210526315789477, "loss": 1.3121, "step": 1250 }, { "epoch": 3.287310979618672, "high_lr": 0.00034210526315789477, "low_lr": 6.842105263157896e-06, "step": 1250 }, { "epoch": 3.287310979618672, "high_lr": 0.00034210526315789477, "low_lr": 6.842105263157896e-06, "step": 1250 }, { "epoch": 3.287310979618672, "high_lr": 0.00034210526315789477, "low_lr": 6.842105263157896e-06, "step": 1250 }, { "epoch": 3.287310979618672, "high_lr": 0.00034210526315789477, "low_lr": 6.842105263157896e-06, "step": 1250 }, { "epoch": 3.287310979618672, "high_lr": 0.00034210526315789477, "low_lr": 6.842105263157896e-06, "step": 1250 }, { "epoch": 3.287310979618672, "high_lr": 0.00034210526315789477, "low_lr": 6.842105263157896e-06, "step": 1250 }, { "epoch": 3.287310979618672, "high_lr": 0.00034210526315789477, "low_lr": 6.842105263157896e-06, "step": 1250 }, { "epoch": 3.287310979618672, "high_lr": 0.00034210526315789477, "low_lr": 6.842105263157896e-06, "step": 1250 }, { "epoch": 3.289940828402367, "grad_norm": 1.456199049949646, "learning_rate": 0.00034157894736842106, "loss": 1.323, "step": 1251 }, { "epoch": 3.289940828402367, "high_lr": 0.00034157894736842106, "low_lr": 6.831578947368421e-06, "step": 1251 }, { "epoch": 3.289940828402367, "high_lr": 0.00034157894736842106, "low_lr": 6.831578947368421e-06, "step": 1251 }, { "epoch": 3.289940828402367, "high_lr": 0.00034157894736842106, "low_lr": 6.831578947368421e-06, "step": 1251 }, { "epoch": 3.289940828402367, "high_lr": 0.00034157894736842106, "low_lr": 6.831578947368421e-06, "step": 1251 }, { "epoch": 3.289940828402367, "high_lr": 0.00034157894736842106, "low_lr": 6.831578947368421e-06, "step": 1251 }, { "epoch": 3.289940828402367, "high_lr": 0.00034157894736842106, "low_lr": 6.831578947368421e-06, "step": 1251 }, { "epoch": 3.289940828402367, "high_lr": 0.00034157894736842106, "low_lr": 6.831578947368421e-06, "step": 1251 }, { "epoch": 3.289940828402367, "high_lr": 0.00034157894736842106, "low_lr": 6.831578947368421e-06, "step": 1251 }, { "epoch": 3.292570677186062, "grad_norm": 1.4075325727462769, "learning_rate": 0.0003410526315789474, "loss": 1.3247, "step": 1252 }, { "epoch": 3.292570677186062, "high_lr": 0.0003410526315789474, "low_lr": 6.821052631578948e-06, "step": 1252 }, { "epoch": 3.292570677186062, "high_lr": 0.0003410526315789474, "low_lr": 6.821052631578948e-06, "step": 1252 }, { "epoch": 3.292570677186062, "high_lr": 0.0003410526315789474, "low_lr": 6.821052631578948e-06, "step": 1252 }, { "epoch": 3.292570677186062, "high_lr": 0.0003410526315789474, "low_lr": 6.821052631578948e-06, "step": 1252 }, { "epoch": 3.292570677186062, "high_lr": 0.0003410526315789474, "low_lr": 6.821052631578948e-06, "step": 1252 }, { "epoch": 3.292570677186062, "high_lr": 0.0003410526315789474, "low_lr": 6.821052631578948e-06, "step": 1252 }, { "epoch": 3.292570677186062, "high_lr": 0.0003410526315789474, "low_lr": 6.821052631578948e-06, "step": 1252 }, { "epoch": 3.292570677186062, "high_lr": 0.0003410526315789474, "low_lr": 6.821052631578948e-06, "step": 1252 }, { "epoch": 3.295200525969757, "grad_norm": 1.3012107610702515, "learning_rate": 0.0003405263157894737, "loss": 1.3016, "step": 1253 }, { "epoch": 3.295200525969757, "high_lr": 0.0003405263157894737, "low_lr": 6.810526315789474e-06, "step": 1253 }, { "epoch": 3.295200525969757, "high_lr": 0.0003405263157894737, "low_lr": 6.810526315789474e-06, "step": 1253 }, { "epoch": 3.295200525969757, "high_lr": 0.0003405263157894737, "low_lr": 6.810526315789474e-06, "step": 1253 }, { "epoch": 3.295200525969757, "high_lr": 0.0003405263157894737, "low_lr": 6.810526315789474e-06, "step": 1253 }, { "epoch": 3.295200525969757, "high_lr": 0.0003405263157894737, "low_lr": 6.810526315789474e-06, "step": 1253 }, { "epoch": 3.295200525969757, "high_lr": 0.0003405263157894737, "low_lr": 6.810526315789474e-06, "step": 1253 }, { "epoch": 3.295200525969757, "high_lr": 0.0003405263157894737, "low_lr": 6.810526315789474e-06, "step": 1253 }, { "epoch": 3.295200525969757, "high_lr": 0.0003405263157894737, "low_lr": 6.810526315789474e-06, "step": 1253 }, { "epoch": 3.2978303747534516, "grad_norm": 1.4447226524353027, "learning_rate": 0.00034, "loss": 1.3979, "step": 1254 }, { "epoch": 3.2978303747534516, "high_lr": 0.00034, "low_lr": 6.800000000000001e-06, "step": 1254 }, { "epoch": 3.2978303747534516, "high_lr": 0.00034, "low_lr": 6.800000000000001e-06, "step": 1254 }, { "epoch": 3.2978303747534516, "high_lr": 0.00034, "low_lr": 6.800000000000001e-06, "step": 1254 }, { "epoch": 3.2978303747534516, "high_lr": 0.00034, "low_lr": 6.800000000000001e-06, "step": 1254 }, { "epoch": 3.2978303747534516, "high_lr": 0.00034, "low_lr": 6.800000000000001e-06, "step": 1254 }, { "epoch": 3.2978303747534516, "high_lr": 0.00034, "low_lr": 6.800000000000001e-06, "step": 1254 }, { "epoch": 3.2978303747534516, "high_lr": 0.00034, "low_lr": 6.800000000000001e-06, "step": 1254 }, { "epoch": 3.2978303747534516, "high_lr": 0.00034, "low_lr": 6.800000000000001e-06, "step": 1254 }, { "epoch": 3.3004602235371467, "grad_norm": 1.4133968353271484, "learning_rate": 0.0003394736842105263, "loss": 1.3364, "step": 1255 }, { "epoch": 3.3004602235371467, "high_lr": 0.0003394736842105263, "low_lr": 6.789473684210527e-06, "step": 1255 }, { "epoch": 3.3004602235371467, "high_lr": 0.0003394736842105263, "low_lr": 6.789473684210527e-06, "step": 1255 }, { "epoch": 3.3004602235371467, "high_lr": 0.0003394736842105263, "low_lr": 6.789473684210527e-06, "step": 1255 }, { "epoch": 3.3004602235371467, "high_lr": 0.0003394736842105263, "low_lr": 6.789473684210527e-06, "step": 1255 }, { "epoch": 3.3004602235371467, "high_lr": 0.0003394736842105263, "low_lr": 6.789473684210527e-06, "step": 1255 }, { "epoch": 3.3004602235371467, "high_lr": 0.0003394736842105263, "low_lr": 6.789473684210527e-06, "step": 1255 }, { "epoch": 3.3004602235371467, "high_lr": 0.0003394736842105263, "low_lr": 6.789473684210527e-06, "step": 1255 }, { "epoch": 3.3004602235371467, "high_lr": 0.0003394736842105263, "low_lr": 6.789473684210527e-06, "step": 1255 }, { "epoch": 3.3030900723208414, "grad_norm": 1.3877695798873901, "learning_rate": 0.0003389473684210526, "loss": 1.3226, "step": 1256 }, { "epoch": 3.3030900723208414, "high_lr": 0.0003389473684210526, "low_lr": 6.778947368421053e-06, "step": 1256 }, { "epoch": 3.3030900723208414, "high_lr": 0.0003389473684210526, "low_lr": 6.778947368421053e-06, "step": 1256 }, { "epoch": 3.3030900723208414, "high_lr": 0.0003389473684210526, "low_lr": 6.778947368421053e-06, "step": 1256 }, { "epoch": 3.3030900723208414, "high_lr": 0.0003389473684210526, "low_lr": 6.778947368421053e-06, "step": 1256 }, { "epoch": 3.3030900723208414, "high_lr": 0.0003389473684210526, "low_lr": 6.778947368421053e-06, "step": 1256 }, { "epoch": 3.3030900723208414, "high_lr": 0.0003389473684210526, "low_lr": 6.778947368421053e-06, "step": 1256 }, { "epoch": 3.3030900723208414, "high_lr": 0.0003389473684210526, "low_lr": 6.778947368421053e-06, "step": 1256 }, { "epoch": 3.3030900723208414, "high_lr": 0.0003389473684210526, "low_lr": 6.778947368421053e-06, "step": 1256 }, { "epoch": 3.3057199211045365, "grad_norm": 1.4301080703735352, "learning_rate": 0.00033842105263157894, "loss": 1.343, "step": 1257 }, { "epoch": 3.3057199211045365, "high_lr": 0.00033842105263157894, "low_lr": 6.76842105263158e-06, "step": 1257 }, { "epoch": 3.3057199211045365, "high_lr": 0.00033842105263157894, "low_lr": 6.76842105263158e-06, "step": 1257 }, { "epoch": 3.3057199211045365, "high_lr": 0.00033842105263157894, "low_lr": 6.76842105263158e-06, "step": 1257 }, { "epoch": 3.3057199211045365, "high_lr": 0.00033842105263157894, "low_lr": 6.76842105263158e-06, "step": 1257 }, { "epoch": 3.3057199211045365, "high_lr": 0.00033842105263157894, "low_lr": 6.76842105263158e-06, "step": 1257 }, { "epoch": 3.3057199211045365, "high_lr": 0.00033842105263157894, "low_lr": 6.76842105263158e-06, "step": 1257 }, { "epoch": 3.3057199211045365, "high_lr": 0.00033842105263157894, "low_lr": 6.76842105263158e-06, "step": 1257 }, { "epoch": 3.3057199211045365, "high_lr": 0.00033842105263157894, "low_lr": 6.76842105263158e-06, "step": 1257 }, { "epoch": 3.3083497698882316, "grad_norm": 1.403597116470337, "learning_rate": 0.0003378947368421053, "loss": 1.3917, "step": 1258 }, { "epoch": 3.3083497698882316, "high_lr": 0.0003378947368421053, "low_lr": 6.7578947368421054e-06, "step": 1258 }, { "epoch": 3.3083497698882316, "high_lr": 0.0003378947368421053, "low_lr": 6.7578947368421054e-06, "step": 1258 }, { "epoch": 3.3083497698882316, "high_lr": 0.0003378947368421053, "low_lr": 6.7578947368421054e-06, "step": 1258 }, { "epoch": 3.3083497698882316, "high_lr": 0.0003378947368421053, "low_lr": 6.7578947368421054e-06, "step": 1258 }, { "epoch": 3.3083497698882316, "high_lr": 0.0003378947368421053, "low_lr": 6.7578947368421054e-06, "step": 1258 }, { "epoch": 3.3083497698882316, "high_lr": 0.0003378947368421053, "low_lr": 6.7578947368421054e-06, "step": 1258 }, { "epoch": 3.3083497698882316, "high_lr": 0.0003378947368421053, "low_lr": 6.7578947368421054e-06, "step": 1258 }, { "epoch": 3.3083497698882316, "high_lr": 0.0003378947368421053, "low_lr": 6.7578947368421054e-06, "step": 1258 }, { "epoch": 3.3109796186719263, "grad_norm": 1.4304726123809814, "learning_rate": 0.0003373684210526316, "loss": 1.2391, "step": 1259 }, { "epoch": 3.3109796186719263, "high_lr": 0.0003373684210526316, "low_lr": 6.747368421052633e-06, "step": 1259 }, { "epoch": 3.3109796186719263, "high_lr": 0.0003373684210526316, "low_lr": 6.747368421052633e-06, "step": 1259 }, { "epoch": 3.3109796186719263, "high_lr": 0.0003373684210526316, "low_lr": 6.747368421052633e-06, "step": 1259 }, { "epoch": 3.3109796186719263, "high_lr": 0.0003373684210526316, "low_lr": 6.747368421052633e-06, "step": 1259 }, { "epoch": 3.3109796186719263, "high_lr": 0.0003373684210526316, "low_lr": 6.747368421052633e-06, "step": 1259 }, { "epoch": 3.3109796186719263, "high_lr": 0.0003373684210526316, "low_lr": 6.747368421052633e-06, "step": 1259 }, { "epoch": 3.3109796186719263, "high_lr": 0.0003373684210526316, "low_lr": 6.747368421052633e-06, "step": 1259 }, { "epoch": 3.3109796186719263, "high_lr": 0.0003373684210526316, "low_lr": 6.747368421052633e-06, "step": 1259 }, { "epoch": 3.3136094674556213, "grad_norm": 1.3591734170913696, "learning_rate": 0.0003368421052631579, "loss": 1.2765, "step": 1260 }, { "epoch": 3.3136094674556213, "high_lr": 0.0003368421052631579, "low_lr": 6.736842105263158e-06, "step": 1260 }, { "epoch": 3.3136094674556213, "high_lr": 0.0003368421052631579, "low_lr": 6.736842105263158e-06, "step": 1260 }, { "epoch": 3.3136094674556213, "high_lr": 0.0003368421052631579, "low_lr": 6.736842105263158e-06, "step": 1260 }, { "epoch": 3.3136094674556213, "high_lr": 0.0003368421052631579, "low_lr": 6.736842105263158e-06, "step": 1260 }, { "epoch": 3.3136094674556213, "high_lr": 0.0003368421052631579, "low_lr": 6.736842105263158e-06, "step": 1260 }, { "epoch": 3.3136094674556213, "high_lr": 0.0003368421052631579, "low_lr": 6.736842105263158e-06, "step": 1260 }, { "epoch": 3.3136094674556213, "high_lr": 0.0003368421052631579, "low_lr": 6.736842105263158e-06, "step": 1260 }, { "epoch": 3.3136094674556213, "high_lr": 0.0003368421052631579, "low_lr": 6.736842105263158e-06, "step": 1260 }, { "epoch": 3.316239316239316, "grad_norm": 1.3532341718673706, "learning_rate": 0.00033631578947368424, "loss": 1.3013, "step": 1261 }, { "epoch": 3.316239316239316, "high_lr": 0.00033631578947368424, "low_lr": 6.726315789473685e-06, "step": 1261 }, { "epoch": 3.316239316239316, "high_lr": 0.00033631578947368424, "low_lr": 6.726315789473685e-06, "step": 1261 }, { "epoch": 3.316239316239316, "high_lr": 0.00033631578947368424, "low_lr": 6.726315789473685e-06, "step": 1261 }, { "epoch": 3.316239316239316, "high_lr": 0.00033631578947368424, "low_lr": 6.726315789473685e-06, "step": 1261 }, { "epoch": 3.316239316239316, "high_lr": 0.00033631578947368424, "low_lr": 6.726315789473685e-06, "step": 1261 }, { "epoch": 3.316239316239316, "high_lr": 0.00033631578947368424, "low_lr": 6.726315789473685e-06, "step": 1261 }, { "epoch": 3.316239316239316, "high_lr": 0.00033631578947368424, "low_lr": 6.726315789473685e-06, "step": 1261 }, { "epoch": 3.316239316239316, "high_lr": 0.00033631578947368424, "low_lr": 6.726315789473685e-06, "step": 1261 }, { "epoch": 3.318869165023011, "grad_norm": 1.3452813625335693, "learning_rate": 0.00033578947368421053, "loss": 1.3099, "step": 1262 }, { "epoch": 3.318869165023011, "high_lr": 0.00033578947368421053, "low_lr": 6.715789473684211e-06, "step": 1262 }, { "epoch": 3.318869165023011, "high_lr": 0.00033578947368421053, "low_lr": 6.715789473684211e-06, "step": 1262 }, { "epoch": 3.318869165023011, "high_lr": 0.00033578947368421053, "low_lr": 6.715789473684211e-06, "step": 1262 }, { "epoch": 3.318869165023011, "high_lr": 0.00033578947368421053, "low_lr": 6.715789473684211e-06, "step": 1262 }, { "epoch": 3.318869165023011, "high_lr": 0.00033578947368421053, "low_lr": 6.715789473684211e-06, "step": 1262 }, { "epoch": 3.318869165023011, "high_lr": 0.00033578947368421053, "low_lr": 6.715789473684211e-06, "step": 1262 }, { "epoch": 3.318869165023011, "high_lr": 0.00033578947368421053, "low_lr": 6.715789473684211e-06, "step": 1262 }, { "epoch": 3.318869165023011, "high_lr": 0.00033578947368421053, "low_lr": 6.715789473684211e-06, "step": 1262 }, { "epoch": 3.321499013806706, "grad_norm": 1.4131159782409668, "learning_rate": 0.0003352631578947368, "loss": 1.3188, "step": 1263 }, { "epoch": 3.321499013806706, "high_lr": 0.0003352631578947368, "low_lr": 6.705263157894737e-06, "step": 1263 }, { "epoch": 3.321499013806706, "high_lr": 0.0003352631578947368, "low_lr": 6.705263157894737e-06, "step": 1263 }, { "epoch": 3.321499013806706, "high_lr": 0.0003352631578947368, "low_lr": 6.705263157894737e-06, "step": 1263 }, { "epoch": 3.321499013806706, "high_lr": 0.0003352631578947368, "low_lr": 6.705263157894737e-06, "step": 1263 }, { "epoch": 3.321499013806706, "high_lr": 0.0003352631578947368, "low_lr": 6.705263157894737e-06, "step": 1263 }, { "epoch": 3.321499013806706, "high_lr": 0.0003352631578947368, "low_lr": 6.705263157894737e-06, "step": 1263 }, { "epoch": 3.321499013806706, "high_lr": 0.0003352631578947368, "low_lr": 6.705263157894737e-06, "step": 1263 }, { "epoch": 3.321499013806706, "high_lr": 0.0003352631578947368, "low_lr": 6.705263157894737e-06, "step": 1263 }, { "epoch": 3.324128862590401, "grad_norm": 1.5348033905029297, "learning_rate": 0.00033473684210526315, "loss": 1.336, "step": 1264 }, { "epoch": 3.324128862590401, "high_lr": 0.00033473684210526315, "low_lr": 6.694736842105264e-06, "step": 1264 }, { "epoch": 3.324128862590401, "high_lr": 0.00033473684210526315, "low_lr": 6.694736842105264e-06, "step": 1264 }, { "epoch": 3.324128862590401, "high_lr": 0.00033473684210526315, "low_lr": 6.694736842105264e-06, "step": 1264 }, { "epoch": 3.324128862590401, "high_lr": 0.00033473684210526315, "low_lr": 6.694736842105264e-06, "step": 1264 }, { "epoch": 3.324128862590401, "high_lr": 0.00033473684210526315, "low_lr": 6.694736842105264e-06, "step": 1264 }, { "epoch": 3.324128862590401, "high_lr": 0.00033473684210526315, "low_lr": 6.694736842105264e-06, "step": 1264 }, { "epoch": 3.324128862590401, "high_lr": 0.00033473684210526315, "low_lr": 6.694736842105264e-06, "step": 1264 }, { "epoch": 3.324128862590401, "high_lr": 0.00033473684210526315, "low_lr": 6.694736842105264e-06, "step": 1264 }, { "epoch": 3.326758711374096, "grad_norm": 1.4381033182144165, "learning_rate": 0.00033421052631578944, "loss": 1.3093, "step": 1265 }, { "epoch": 3.326758711374096, "high_lr": 0.00033421052631578944, "low_lr": 6.68421052631579e-06, "step": 1265 }, { "epoch": 3.326758711374096, "high_lr": 0.00033421052631578944, "low_lr": 6.68421052631579e-06, "step": 1265 }, { "epoch": 3.326758711374096, "high_lr": 0.00033421052631578944, "low_lr": 6.68421052631579e-06, "step": 1265 }, { "epoch": 3.326758711374096, "high_lr": 0.00033421052631578944, "low_lr": 6.68421052631579e-06, "step": 1265 }, { "epoch": 3.326758711374096, "high_lr": 0.00033421052631578944, "low_lr": 6.68421052631579e-06, "step": 1265 }, { "epoch": 3.326758711374096, "high_lr": 0.00033421052631578944, "low_lr": 6.68421052631579e-06, "step": 1265 }, { "epoch": 3.326758711374096, "high_lr": 0.00033421052631578944, "low_lr": 6.68421052631579e-06, "step": 1265 }, { "epoch": 3.326758711374096, "high_lr": 0.00033421052631578944, "low_lr": 6.68421052631579e-06, "step": 1265 }, { "epoch": 3.329388560157791, "grad_norm": 1.3288220167160034, "learning_rate": 0.00033368421052631583, "loss": 1.3156, "step": 1266 }, { "epoch": 3.329388560157791, "high_lr": 0.00033368421052631583, "low_lr": 6.673684210526317e-06, "step": 1266 }, { "epoch": 3.329388560157791, "high_lr": 0.00033368421052631583, "low_lr": 6.673684210526317e-06, "step": 1266 }, { "epoch": 3.329388560157791, "high_lr": 0.00033368421052631583, "low_lr": 6.673684210526317e-06, "step": 1266 }, { "epoch": 3.329388560157791, "high_lr": 0.00033368421052631583, "low_lr": 6.673684210526317e-06, "step": 1266 }, { "epoch": 3.329388560157791, "high_lr": 0.00033368421052631583, "low_lr": 6.673684210526317e-06, "step": 1266 }, { "epoch": 3.329388560157791, "high_lr": 0.00033368421052631583, "low_lr": 6.673684210526317e-06, "step": 1266 }, { "epoch": 3.329388560157791, "high_lr": 0.00033368421052631583, "low_lr": 6.673684210526317e-06, "step": 1266 }, { "epoch": 3.329388560157791, "high_lr": 0.00033368421052631583, "low_lr": 6.673684210526317e-06, "step": 1266 }, { "epoch": 3.3320184089414857, "grad_norm": 1.386177897453308, "learning_rate": 0.0003331578947368421, "loss": 1.2822, "step": 1267 }, { "epoch": 3.3320184089414857, "high_lr": 0.0003331578947368421, "low_lr": 6.663157894736842e-06, "step": 1267 }, { "epoch": 3.3320184089414857, "high_lr": 0.0003331578947368421, "low_lr": 6.663157894736842e-06, "step": 1267 }, { "epoch": 3.3320184089414857, "high_lr": 0.0003331578947368421, "low_lr": 6.663157894736842e-06, "step": 1267 }, { "epoch": 3.3320184089414857, "high_lr": 0.0003331578947368421, "low_lr": 6.663157894736842e-06, "step": 1267 }, { "epoch": 3.3320184089414857, "high_lr": 0.0003331578947368421, "low_lr": 6.663157894736842e-06, "step": 1267 }, { "epoch": 3.3320184089414857, "high_lr": 0.0003331578947368421, "low_lr": 6.663157894736842e-06, "step": 1267 }, { "epoch": 3.3320184089414857, "high_lr": 0.0003331578947368421, "low_lr": 6.663157894736842e-06, "step": 1267 }, { "epoch": 3.3320184089414857, "high_lr": 0.0003331578947368421, "low_lr": 6.663157894736842e-06, "step": 1267 }, { "epoch": 3.334648257725181, "grad_norm": 1.3520636558532715, "learning_rate": 0.00033263157894736846, "loss": 1.3012, "step": 1268 }, { "epoch": 3.334648257725181, "high_lr": 0.00033263157894736846, "low_lr": 6.6526315789473695e-06, "step": 1268 }, { "epoch": 3.334648257725181, "high_lr": 0.00033263157894736846, "low_lr": 6.6526315789473695e-06, "step": 1268 }, { "epoch": 3.334648257725181, "high_lr": 0.00033263157894736846, "low_lr": 6.6526315789473695e-06, "step": 1268 }, { "epoch": 3.334648257725181, "high_lr": 0.00033263157894736846, "low_lr": 6.6526315789473695e-06, "step": 1268 }, { "epoch": 3.334648257725181, "high_lr": 0.00033263157894736846, "low_lr": 6.6526315789473695e-06, "step": 1268 }, { "epoch": 3.334648257725181, "high_lr": 0.00033263157894736846, "low_lr": 6.6526315789473695e-06, "step": 1268 }, { "epoch": 3.334648257725181, "high_lr": 0.00033263157894736846, "low_lr": 6.6526315789473695e-06, "step": 1268 }, { "epoch": 3.334648257725181, "high_lr": 0.00033263157894736846, "low_lr": 6.6526315789473695e-06, "step": 1268 }, { "epoch": 3.337278106508876, "grad_norm": 1.4403034448623657, "learning_rate": 0.00033210526315789475, "loss": 1.2646, "step": 1269 }, { "epoch": 3.337278106508876, "high_lr": 0.00033210526315789475, "low_lr": 6.642105263157895e-06, "step": 1269 }, { "epoch": 3.337278106508876, "high_lr": 0.00033210526315789475, "low_lr": 6.642105263157895e-06, "step": 1269 }, { "epoch": 3.337278106508876, "high_lr": 0.00033210526315789475, "low_lr": 6.642105263157895e-06, "step": 1269 }, { "epoch": 3.337278106508876, "high_lr": 0.00033210526315789475, "low_lr": 6.642105263157895e-06, "step": 1269 }, { "epoch": 3.337278106508876, "high_lr": 0.00033210526315789475, "low_lr": 6.642105263157895e-06, "step": 1269 }, { "epoch": 3.337278106508876, "high_lr": 0.00033210526315789475, "low_lr": 6.642105263157895e-06, "step": 1269 }, { "epoch": 3.337278106508876, "high_lr": 0.00033210526315789475, "low_lr": 6.642105263157895e-06, "step": 1269 }, { "epoch": 3.337278106508876, "high_lr": 0.00033210526315789475, "low_lr": 6.642105263157895e-06, "step": 1269 }, { "epoch": 3.3399079552925706, "grad_norm": 1.5093498229980469, "learning_rate": 0.00033157894736842103, "loss": 1.3276, "step": 1270 }, { "epoch": 3.3399079552925706, "high_lr": 0.00033157894736842103, "low_lr": 6.631578947368421e-06, "step": 1270 }, { "epoch": 3.3399079552925706, "high_lr": 0.00033157894736842103, "low_lr": 6.631578947368421e-06, "step": 1270 }, { "epoch": 3.3399079552925706, "high_lr": 0.00033157894736842103, "low_lr": 6.631578947368421e-06, "step": 1270 }, { "epoch": 3.3399079552925706, "high_lr": 0.00033157894736842103, "low_lr": 6.631578947368421e-06, "step": 1270 }, { "epoch": 3.3399079552925706, "high_lr": 0.00033157894736842103, "low_lr": 6.631578947368421e-06, "step": 1270 }, { "epoch": 3.3399079552925706, "high_lr": 0.00033157894736842103, "low_lr": 6.631578947368421e-06, "step": 1270 }, { "epoch": 3.3399079552925706, "high_lr": 0.00033157894736842103, "low_lr": 6.631578947368421e-06, "step": 1270 }, { "epoch": 3.3399079552925706, "high_lr": 0.00033157894736842103, "low_lr": 6.631578947368421e-06, "step": 1270 }, { "epoch": 3.3425378040762657, "grad_norm": 1.4945142269134521, "learning_rate": 0.00033105263157894737, "loss": 1.3251, "step": 1271 }, { "epoch": 3.3425378040762657, "high_lr": 0.00033105263157894737, "low_lr": 6.621052631578948e-06, "step": 1271 }, { "epoch": 3.3425378040762657, "high_lr": 0.00033105263157894737, "low_lr": 6.621052631578948e-06, "step": 1271 }, { "epoch": 3.3425378040762657, "high_lr": 0.00033105263157894737, "low_lr": 6.621052631578948e-06, "step": 1271 }, { "epoch": 3.3425378040762657, "high_lr": 0.00033105263157894737, "low_lr": 6.621052631578948e-06, "step": 1271 }, { "epoch": 3.3425378040762657, "high_lr": 0.00033105263157894737, "low_lr": 6.621052631578948e-06, "step": 1271 }, { "epoch": 3.3425378040762657, "high_lr": 0.00033105263157894737, "low_lr": 6.621052631578948e-06, "step": 1271 }, { "epoch": 3.3425378040762657, "high_lr": 0.00033105263157894737, "low_lr": 6.621052631578948e-06, "step": 1271 }, { "epoch": 3.3425378040762657, "high_lr": 0.00033105263157894737, "low_lr": 6.621052631578948e-06, "step": 1271 }, { "epoch": 3.3451676528599608, "grad_norm": 1.4792543649673462, "learning_rate": 0.00033052631578947366, "loss": 1.3416, "step": 1272 }, { "epoch": 3.3451676528599608, "high_lr": 0.00033052631578947366, "low_lr": 6.610526315789474e-06, "step": 1272 }, { "epoch": 3.3451676528599608, "high_lr": 0.00033052631578947366, "low_lr": 6.610526315789474e-06, "step": 1272 }, { "epoch": 3.3451676528599608, "high_lr": 0.00033052631578947366, "low_lr": 6.610526315789474e-06, "step": 1272 }, { "epoch": 3.3451676528599608, "high_lr": 0.00033052631578947366, "low_lr": 6.610526315789474e-06, "step": 1272 }, { "epoch": 3.3451676528599608, "high_lr": 0.00033052631578947366, "low_lr": 6.610526315789474e-06, "step": 1272 }, { "epoch": 3.3451676528599608, "high_lr": 0.00033052631578947366, "low_lr": 6.610526315789474e-06, "step": 1272 }, { "epoch": 3.3451676528599608, "high_lr": 0.00033052631578947366, "low_lr": 6.610526315789474e-06, "step": 1272 }, { "epoch": 3.3451676528599608, "high_lr": 0.00033052631578947366, "low_lr": 6.610526315789474e-06, "step": 1272 }, { "epoch": 3.3477975016436554, "grad_norm": 1.414167046546936, "learning_rate": 0.00033, "loss": 1.3377, "step": 1273 }, { "epoch": 3.3477975016436554, "high_lr": 0.00033, "low_lr": 6.600000000000001e-06, "step": 1273 }, { "epoch": 3.3477975016436554, "high_lr": 0.00033, "low_lr": 6.600000000000001e-06, "step": 1273 }, { "epoch": 3.3477975016436554, "high_lr": 0.00033, "low_lr": 6.600000000000001e-06, "step": 1273 }, { "epoch": 3.3477975016436554, "high_lr": 0.00033, "low_lr": 6.600000000000001e-06, "step": 1273 }, { "epoch": 3.3477975016436554, "high_lr": 0.00033, "low_lr": 6.600000000000001e-06, "step": 1273 }, { "epoch": 3.3477975016436554, "high_lr": 0.00033, "low_lr": 6.600000000000001e-06, "step": 1273 }, { "epoch": 3.3477975016436554, "high_lr": 0.00033, "low_lr": 6.600000000000001e-06, "step": 1273 }, { "epoch": 3.3477975016436554, "high_lr": 0.00033, "low_lr": 6.600000000000001e-06, "step": 1273 }, { "epoch": 3.3504273504273505, "grad_norm": 1.4472049474716187, "learning_rate": 0.00032947368421052634, "loss": 1.2771, "step": 1274 }, { "epoch": 3.3504273504273505, "high_lr": 0.00032947368421052634, "low_lr": 6.589473684210527e-06, "step": 1274 }, { "epoch": 3.3504273504273505, "high_lr": 0.00032947368421052634, "low_lr": 6.589473684210527e-06, "step": 1274 }, { "epoch": 3.3504273504273505, "high_lr": 0.00032947368421052634, "low_lr": 6.589473684210527e-06, "step": 1274 }, { "epoch": 3.3504273504273505, "high_lr": 0.00032947368421052634, "low_lr": 6.589473684210527e-06, "step": 1274 }, { "epoch": 3.3504273504273505, "high_lr": 0.00032947368421052634, "low_lr": 6.589473684210527e-06, "step": 1274 }, { "epoch": 3.3504273504273505, "high_lr": 0.00032947368421052634, "low_lr": 6.589473684210527e-06, "step": 1274 }, { "epoch": 3.3504273504273505, "high_lr": 0.00032947368421052634, "low_lr": 6.589473684210527e-06, "step": 1274 }, { "epoch": 3.3504273504273505, "high_lr": 0.00032947368421052634, "low_lr": 6.589473684210527e-06, "step": 1274 }, { "epoch": 3.3530571992110456, "grad_norm": 1.3168164491653442, "learning_rate": 0.0003289473684210527, "loss": 1.2955, "step": 1275 }, { "epoch": 3.3530571992110456, "high_lr": 0.0003289473684210527, "low_lr": 6.578947368421054e-06, "step": 1275 }, { "epoch": 3.3530571992110456, "high_lr": 0.0003289473684210527, "low_lr": 6.578947368421054e-06, "step": 1275 }, { "epoch": 3.3530571992110456, "high_lr": 0.0003289473684210527, "low_lr": 6.578947368421054e-06, "step": 1275 }, { "epoch": 3.3530571992110456, "high_lr": 0.0003289473684210527, "low_lr": 6.578947368421054e-06, "step": 1275 }, { "epoch": 3.3530571992110456, "high_lr": 0.0003289473684210527, "low_lr": 6.578947368421054e-06, "step": 1275 }, { "epoch": 3.3530571992110456, "high_lr": 0.0003289473684210527, "low_lr": 6.578947368421054e-06, "step": 1275 }, { "epoch": 3.3530571992110456, "high_lr": 0.0003289473684210527, "low_lr": 6.578947368421054e-06, "step": 1275 }, { "epoch": 3.3530571992110456, "high_lr": 0.0003289473684210527, "low_lr": 6.578947368421054e-06, "step": 1275 }, { "epoch": 3.3556870479947403, "grad_norm": 1.312169075012207, "learning_rate": 0.00032842105263157896, "loss": 1.3291, "step": 1276 }, { "epoch": 3.3556870479947403, "high_lr": 0.00032842105263157896, "low_lr": 6.568421052631579e-06, "step": 1276 }, { "epoch": 3.3556870479947403, "high_lr": 0.00032842105263157896, "low_lr": 6.568421052631579e-06, "step": 1276 }, { "epoch": 3.3556870479947403, "high_lr": 0.00032842105263157896, "low_lr": 6.568421052631579e-06, "step": 1276 }, { "epoch": 3.3556870479947403, "high_lr": 0.00032842105263157896, "low_lr": 6.568421052631579e-06, "step": 1276 }, { "epoch": 3.3556870479947403, "high_lr": 0.00032842105263157896, "low_lr": 6.568421052631579e-06, "step": 1276 }, { "epoch": 3.3556870479947403, "high_lr": 0.00032842105263157896, "low_lr": 6.568421052631579e-06, "step": 1276 }, { "epoch": 3.3556870479947403, "high_lr": 0.00032842105263157896, "low_lr": 6.568421052631579e-06, "step": 1276 }, { "epoch": 3.3556870479947403, "high_lr": 0.00032842105263157896, "low_lr": 6.568421052631579e-06, "step": 1276 }, { "epoch": 3.3583168967784354, "grad_norm": 1.3834537267684937, "learning_rate": 0.00032789473684210525, "loss": 1.2886, "step": 1277 }, { "epoch": 3.3583168967784354, "high_lr": 0.00032789473684210525, "low_lr": 6.557894736842106e-06, "step": 1277 }, { "epoch": 3.3583168967784354, "high_lr": 0.00032789473684210525, "low_lr": 6.557894736842106e-06, "step": 1277 }, { "epoch": 3.3583168967784354, "high_lr": 0.00032789473684210525, "low_lr": 6.557894736842106e-06, "step": 1277 }, { "epoch": 3.3583168967784354, "high_lr": 0.00032789473684210525, "low_lr": 6.557894736842106e-06, "step": 1277 }, { "epoch": 3.3583168967784354, "high_lr": 0.00032789473684210525, "low_lr": 6.557894736842106e-06, "step": 1277 }, { "epoch": 3.3583168967784354, "high_lr": 0.00032789473684210525, "low_lr": 6.557894736842106e-06, "step": 1277 }, { "epoch": 3.3583168967784354, "high_lr": 0.00032789473684210525, "low_lr": 6.557894736842106e-06, "step": 1277 }, { "epoch": 3.3583168967784354, "high_lr": 0.00032789473684210525, "low_lr": 6.557894736842106e-06, "step": 1277 }, { "epoch": 3.36094674556213, "grad_norm": 1.4049303531646729, "learning_rate": 0.0003273684210526316, "loss": 1.3054, "step": 1278 }, { "epoch": 3.36094674556213, "high_lr": 0.0003273684210526316, "low_lr": 6.547368421052632e-06, "step": 1278 }, { "epoch": 3.36094674556213, "high_lr": 0.0003273684210526316, "low_lr": 6.547368421052632e-06, "step": 1278 }, { "epoch": 3.36094674556213, "high_lr": 0.0003273684210526316, "low_lr": 6.547368421052632e-06, "step": 1278 }, { "epoch": 3.36094674556213, "high_lr": 0.0003273684210526316, "low_lr": 6.547368421052632e-06, "step": 1278 }, { "epoch": 3.36094674556213, "high_lr": 0.0003273684210526316, "low_lr": 6.547368421052632e-06, "step": 1278 }, { "epoch": 3.36094674556213, "high_lr": 0.0003273684210526316, "low_lr": 6.547368421052632e-06, "step": 1278 }, { "epoch": 3.36094674556213, "high_lr": 0.0003273684210526316, "low_lr": 6.547368421052632e-06, "step": 1278 }, { "epoch": 3.36094674556213, "high_lr": 0.0003273684210526316, "low_lr": 6.547368421052632e-06, "step": 1278 }, { "epoch": 3.363576594345825, "grad_norm": 1.3778067827224731, "learning_rate": 0.0003268421052631579, "loss": 1.3031, "step": 1279 }, { "epoch": 3.363576594345825, "high_lr": 0.0003268421052631579, "low_lr": 6.536842105263158e-06, "step": 1279 }, { "epoch": 3.363576594345825, "high_lr": 0.0003268421052631579, "low_lr": 6.536842105263158e-06, "step": 1279 }, { "epoch": 3.363576594345825, "high_lr": 0.0003268421052631579, "low_lr": 6.536842105263158e-06, "step": 1279 }, { "epoch": 3.363576594345825, "high_lr": 0.0003268421052631579, "low_lr": 6.536842105263158e-06, "step": 1279 }, { "epoch": 3.363576594345825, "high_lr": 0.0003268421052631579, "low_lr": 6.536842105263158e-06, "step": 1279 }, { "epoch": 3.363576594345825, "high_lr": 0.0003268421052631579, "low_lr": 6.536842105263158e-06, "step": 1279 }, { "epoch": 3.363576594345825, "high_lr": 0.0003268421052631579, "low_lr": 6.536842105263158e-06, "step": 1279 }, { "epoch": 3.363576594345825, "high_lr": 0.0003268421052631579, "low_lr": 6.536842105263158e-06, "step": 1279 }, { "epoch": 3.36620644312952, "grad_norm": 1.3682295083999634, "learning_rate": 0.0003263157894736842, "loss": 1.2862, "step": 1280 }, { "epoch": 3.36620644312952, "high_lr": 0.0003263157894736842, "low_lr": 6.526315789473685e-06, "step": 1280 }, { "epoch": 3.36620644312952, "high_lr": 0.0003263157894736842, "low_lr": 6.526315789473685e-06, "step": 1280 }, { "epoch": 3.36620644312952, "high_lr": 0.0003263157894736842, "low_lr": 6.526315789473685e-06, "step": 1280 }, { "epoch": 3.36620644312952, "high_lr": 0.0003263157894736842, "low_lr": 6.526315789473685e-06, "step": 1280 }, { "epoch": 3.36620644312952, "high_lr": 0.0003263157894736842, "low_lr": 6.526315789473685e-06, "step": 1280 }, { "epoch": 3.36620644312952, "high_lr": 0.0003263157894736842, "low_lr": 6.526315789473685e-06, "step": 1280 }, { "epoch": 3.36620644312952, "high_lr": 0.0003263157894736842, "low_lr": 6.526315789473685e-06, "step": 1280 }, { "epoch": 3.36620644312952, "high_lr": 0.0003263157894736842, "low_lr": 6.526315789473685e-06, "step": 1280 }, { "epoch": 3.368836291913215, "grad_norm": 1.3198716640472412, "learning_rate": 0.0003257894736842105, "loss": 1.2398, "step": 1281 }, { "epoch": 3.368836291913215, "high_lr": 0.0003257894736842105, "low_lr": 6.515789473684211e-06, "step": 1281 }, { "epoch": 3.368836291913215, "high_lr": 0.0003257894736842105, "low_lr": 6.515789473684211e-06, "step": 1281 }, { "epoch": 3.368836291913215, "high_lr": 0.0003257894736842105, "low_lr": 6.515789473684211e-06, "step": 1281 }, { "epoch": 3.368836291913215, "high_lr": 0.0003257894736842105, "low_lr": 6.515789473684211e-06, "step": 1281 }, { "epoch": 3.368836291913215, "high_lr": 0.0003257894736842105, "low_lr": 6.515789473684211e-06, "step": 1281 }, { "epoch": 3.368836291913215, "high_lr": 0.0003257894736842105, "low_lr": 6.515789473684211e-06, "step": 1281 }, { "epoch": 3.368836291913215, "high_lr": 0.0003257894736842105, "low_lr": 6.515789473684211e-06, "step": 1281 }, { "epoch": 3.368836291913215, "high_lr": 0.0003257894736842105, "low_lr": 6.515789473684211e-06, "step": 1281 }, { "epoch": 3.37146614069691, "grad_norm": 1.4166431427001953, "learning_rate": 0.0003252631578947369, "loss": 1.3183, "step": 1282 }, { "epoch": 3.37146614069691, "high_lr": 0.0003252631578947369, "low_lr": 6.505263157894738e-06, "step": 1282 }, { "epoch": 3.37146614069691, "high_lr": 0.0003252631578947369, "low_lr": 6.505263157894738e-06, "step": 1282 }, { "epoch": 3.37146614069691, "high_lr": 0.0003252631578947369, "low_lr": 6.505263157894738e-06, "step": 1282 }, { "epoch": 3.37146614069691, "high_lr": 0.0003252631578947369, "low_lr": 6.505263157894738e-06, "step": 1282 }, { "epoch": 3.37146614069691, "high_lr": 0.0003252631578947369, "low_lr": 6.505263157894738e-06, "step": 1282 }, { "epoch": 3.37146614069691, "high_lr": 0.0003252631578947369, "low_lr": 6.505263157894738e-06, "step": 1282 }, { "epoch": 3.37146614069691, "high_lr": 0.0003252631578947369, "low_lr": 6.505263157894738e-06, "step": 1282 }, { "epoch": 3.37146614069691, "high_lr": 0.0003252631578947369, "low_lr": 6.505263157894738e-06, "step": 1282 }, { "epoch": 3.3740959894806046, "grad_norm": 1.3787736892700195, "learning_rate": 0.0003247368421052632, "loss": 1.3056, "step": 1283 }, { "epoch": 3.3740959894806046, "high_lr": 0.0003247368421052632, "low_lr": 6.494736842105264e-06, "step": 1283 }, { "epoch": 3.3740959894806046, "high_lr": 0.0003247368421052632, "low_lr": 6.494736842105264e-06, "step": 1283 }, { "epoch": 3.3740959894806046, "high_lr": 0.0003247368421052632, "low_lr": 6.494736842105264e-06, "step": 1283 }, { "epoch": 3.3740959894806046, "high_lr": 0.0003247368421052632, "low_lr": 6.494736842105264e-06, "step": 1283 }, { "epoch": 3.3740959894806046, "high_lr": 0.0003247368421052632, "low_lr": 6.494736842105264e-06, "step": 1283 }, { "epoch": 3.3740959894806046, "high_lr": 0.0003247368421052632, "low_lr": 6.494736842105264e-06, "step": 1283 }, { "epoch": 3.3740959894806046, "high_lr": 0.0003247368421052632, "low_lr": 6.494736842105264e-06, "step": 1283 }, { "epoch": 3.3740959894806046, "high_lr": 0.0003247368421052632, "low_lr": 6.494736842105264e-06, "step": 1283 }, { "epoch": 3.3767258382642997, "grad_norm": 1.41636061668396, "learning_rate": 0.00032421052631578947, "loss": 1.3253, "step": 1284 }, { "epoch": 3.3767258382642997, "high_lr": 0.00032421052631578947, "low_lr": 6.484210526315789e-06, "step": 1284 }, { "epoch": 3.3767258382642997, "high_lr": 0.00032421052631578947, "low_lr": 6.484210526315789e-06, "step": 1284 }, { "epoch": 3.3767258382642997, "high_lr": 0.00032421052631578947, "low_lr": 6.484210526315789e-06, "step": 1284 }, { "epoch": 3.3767258382642997, "high_lr": 0.00032421052631578947, "low_lr": 6.484210526315789e-06, "step": 1284 }, { "epoch": 3.3767258382642997, "high_lr": 0.00032421052631578947, "low_lr": 6.484210526315789e-06, "step": 1284 }, { "epoch": 3.3767258382642997, "high_lr": 0.00032421052631578947, "low_lr": 6.484210526315789e-06, "step": 1284 }, { "epoch": 3.3767258382642997, "high_lr": 0.00032421052631578947, "low_lr": 6.484210526315789e-06, "step": 1284 }, { "epoch": 3.3767258382642997, "high_lr": 0.00032421052631578947, "low_lr": 6.484210526315789e-06, "step": 1284 }, { "epoch": 3.379355687047995, "grad_norm": 1.5021086931228638, "learning_rate": 0.0003236842105263158, "loss": 1.28, "step": 1285 }, { "epoch": 3.379355687047995, "high_lr": 0.0003236842105263158, "low_lr": 6.473684210526316e-06, "step": 1285 }, { "epoch": 3.379355687047995, "high_lr": 0.0003236842105263158, "low_lr": 6.473684210526316e-06, "step": 1285 }, { "epoch": 3.379355687047995, "high_lr": 0.0003236842105263158, "low_lr": 6.473684210526316e-06, "step": 1285 }, { "epoch": 3.379355687047995, "high_lr": 0.0003236842105263158, "low_lr": 6.473684210526316e-06, "step": 1285 }, { "epoch": 3.379355687047995, "high_lr": 0.0003236842105263158, "low_lr": 6.473684210526316e-06, "step": 1285 }, { "epoch": 3.379355687047995, "high_lr": 0.0003236842105263158, "low_lr": 6.473684210526316e-06, "step": 1285 }, { "epoch": 3.379355687047995, "high_lr": 0.0003236842105263158, "low_lr": 6.473684210526316e-06, "step": 1285 }, { "epoch": 3.379355687047995, "high_lr": 0.0003236842105263158, "low_lr": 6.473684210526316e-06, "step": 1285 }, { "epoch": 3.3819855358316895, "grad_norm": 1.497136116027832, "learning_rate": 0.0003231578947368421, "loss": 1.2785, "step": 1286 }, { "epoch": 3.3819855358316895, "high_lr": 0.0003231578947368421, "low_lr": 6.463157894736843e-06, "step": 1286 }, { "epoch": 3.3819855358316895, "high_lr": 0.0003231578947368421, "low_lr": 6.463157894736843e-06, "step": 1286 }, { "epoch": 3.3819855358316895, "high_lr": 0.0003231578947368421, "low_lr": 6.463157894736843e-06, "step": 1286 }, { "epoch": 3.3819855358316895, "high_lr": 0.0003231578947368421, "low_lr": 6.463157894736843e-06, "step": 1286 }, { "epoch": 3.3819855358316895, "high_lr": 0.0003231578947368421, "low_lr": 6.463157894736843e-06, "step": 1286 }, { "epoch": 3.3819855358316895, "high_lr": 0.0003231578947368421, "low_lr": 6.463157894736843e-06, "step": 1286 }, { "epoch": 3.3819855358316895, "high_lr": 0.0003231578947368421, "low_lr": 6.463157894736843e-06, "step": 1286 }, { "epoch": 3.3819855358316895, "high_lr": 0.0003231578947368421, "low_lr": 6.463157894736843e-06, "step": 1286 }, { "epoch": 3.3846153846153846, "grad_norm": 1.3111793994903564, "learning_rate": 0.00032263157894736843, "loss": 1.2936, "step": 1287 }, { "epoch": 3.3846153846153846, "high_lr": 0.00032263157894736843, "low_lr": 6.452631578947369e-06, "step": 1287 }, { "epoch": 3.3846153846153846, "high_lr": 0.00032263157894736843, "low_lr": 6.452631578947369e-06, "step": 1287 }, { "epoch": 3.3846153846153846, "high_lr": 0.00032263157894736843, "low_lr": 6.452631578947369e-06, "step": 1287 }, { "epoch": 3.3846153846153846, "high_lr": 0.00032263157894736843, "low_lr": 6.452631578947369e-06, "step": 1287 }, { "epoch": 3.3846153846153846, "high_lr": 0.00032263157894736843, "low_lr": 6.452631578947369e-06, "step": 1287 }, { "epoch": 3.3846153846153846, "high_lr": 0.00032263157894736843, "low_lr": 6.452631578947369e-06, "step": 1287 }, { "epoch": 3.3846153846153846, "high_lr": 0.00032263157894736843, "low_lr": 6.452631578947369e-06, "step": 1287 }, { "epoch": 3.3846153846153846, "high_lr": 0.00032263157894736843, "low_lr": 6.452631578947369e-06, "step": 1287 }, { "epoch": 3.3872452333990797, "grad_norm": 1.333734154701233, "learning_rate": 0.0003221052631578947, "loss": 1.286, "step": 1288 }, { "epoch": 3.3872452333990797, "high_lr": 0.0003221052631578947, "low_lr": 6.442105263157895e-06, "step": 1288 }, { "epoch": 3.3872452333990797, "high_lr": 0.0003221052631578947, "low_lr": 6.442105263157895e-06, "step": 1288 }, { "epoch": 3.3872452333990797, "high_lr": 0.0003221052631578947, "low_lr": 6.442105263157895e-06, "step": 1288 }, { "epoch": 3.3872452333990797, "high_lr": 0.0003221052631578947, "low_lr": 6.442105263157895e-06, "step": 1288 }, { "epoch": 3.3872452333990797, "high_lr": 0.0003221052631578947, "low_lr": 6.442105263157895e-06, "step": 1288 }, { "epoch": 3.3872452333990797, "high_lr": 0.0003221052631578947, "low_lr": 6.442105263157895e-06, "step": 1288 }, { "epoch": 3.3872452333990797, "high_lr": 0.0003221052631578947, "low_lr": 6.442105263157895e-06, "step": 1288 }, { "epoch": 3.3872452333990797, "high_lr": 0.0003221052631578947, "low_lr": 6.442105263157895e-06, "step": 1288 }, { "epoch": 3.3898750821827743, "grad_norm": 1.371147632598877, "learning_rate": 0.00032157894736842106, "loss": 1.2734, "step": 1289 }, { "epoch": 3.3898750821827743, "high_lr": 0.00032157894736842106, "low_lr": 6.431578947368422e-06, "step": 1289 }, { "epoch": 3.3898750821827743, "high_lr": 0.00032157894736842106, "low_lr": 6.431578947368422e-06, "step": 1289 }, { "epoch": 3.3898750821827743, "high_lr": 0.00032157894736842106, "low_lr": 6.431578947368422e-06, "step": 1289 }, { "epoch": 3.3898750821827743, "high_lr": 0.00032157894736842106, "low_lr": 6.431578947368422e-06, "step": 1289 }, { "epoch": 3.3898750821827743, "high_lr": 0.00032157894736842106, "low_lr": 6.431578947368422e-06, "step": 1289 }, { "epoch": 3.3898750821827743, "high_lr": 0.00032157894736842106, "low_lr": 6.431578947368422e-06, "step": 1289 }, { "epoch": 3.3898750821827743, "high_lr": 0.00032157894736842106, "low_lr": 6.431578947368422e-06, "step": 1289 }, { "epoch": 3.3898750821827743, "high_lr": 0.00032157894736842106, "low_lr": 6.431578947368422e-06, "step": 1289 }, { "epoch": 3.3925049309664694, "grad_norm": 1.4894404411315918, "learning_rate": 0.0003210526315789474, "loss": 1.3076, "step": 1290 }, { "epoch": 3.3925049309664694, "high_lr": 0.0003210526315789474, "low_lr": 6.421052631578948e-06, "step": 1290 }, { "epoch": 3.3925049309664694, "high_lr": 0.0003210526315789474, "low_lr": 6.421052631578948e-06, "step": 1290 }, { "epoch": 3.3925049309664694, "high_lr": 0.0003210526315789474, "low_lr": 6.421052631578948e-06, "step": 1290 }, { "epoch": 3.3925049309664694, "high_lr": 0.0003210526315789474, "low_lr": 6.421052631578948e-06, "step": 1290 }, { "epoch": 3.3925049309664694, "high_lr": 0.0003210526315789474, "low_lr": 6.421052631578948e-06, "step": 1290 }, { "epoch": 3.3925049309664694, "high_lr": 0.0003210526315789474, "low_lr": 6.421052631578948e-06, "step": 1290 }, { "epoch": 3.3925049309664694, "high_lr": 0.0003210526315789474, "low_lr": 6.421052631578948e-06, "step": 1290 }, { "epoch": 3.3925049309664694, "high_lr": 0.0003210526315789474, "low_lr": 6.421052631578948e-06, "step": 1290 }, { "epoch": 3.3951347797501645, "grad_norm": 1.3065286874771118, "learning_rate": 0.0003205263157894737, "loss": 1.2725, "step": 1291 }, { "epoch": 3.3951347797501645, "high_lr": 0.0003205263157894737, "low_lr": 6.410526315789473e-06, "step": 1291 }, { "epoch": 3.3951347797501645, "high_lr": 0.0003205263157894737, "low_lr": 6.410526315789473e-06, "step": 1291 }, { "epoch": 3.3951347797501645, "high_lr": 0.0003205263157894737, "low_lr": 6.410526315789473e-06, "step": 1291 }, { "epoch": 3.3951347797501645, "high_lr": 0.0003205263157894737, "low_lr": 6.410526315789473e-06, "step": 1291 }, { "epoch": 3.3951347797501645, "high_lr": 0.0003205263157894737, "low_lr": 6.410526315789473e-06, "step": 1291 }, { "epoch": 3.3951347797501645, "high_lr": 0.0003205263157894737, "low_lr": 6.410526315789473e-06, "step": 1291 }, { "epoch": 3.3951347797501645, "high_lr": 0.0003205263157894737, "low_lr": 6.410526315789473e-06, "step": 1291 }, { "epoch": 3.3951347797501645, "high_lr": 0.0003205263157894737, "low_lr": 6.410526315789473e-06, "step": 1291 }, { "epoch": 3.397764628533859, "grad_norm": 1.4746716022491455, "learning_rate": 0.00032, "loss": 1.2408, "step": 1292 }, { "epoch": 3.397764628533859, "high_lr": 0.00032, "low_lr": 6.4000000000000006e-06, "step": 1292 }, { "epoch": 3.397764628533859, "high_lr": 0.00032, "low_lr": 6.4000000000000006e-06, "step": 1292 }, { "epoch": 3.397764628533859, "high_lr": 0.00032, "low_lr": 6.4000000000000006e-06, "step": 1292 }, { "epoch": 3.397764628533859, "high_lr": 0.00032, "low_lr": 6.4000000000000006e-06, "step": 1292 }, { "epoch": 3.397764628533859, "high_lr": 0.00032, "low_lr": 6.4000000000000006e-06, "step": 1292 }, { "epoch": 3.397764628533859, "high_lr": 0.00032, "low_lr": 6.4000000000000006e-06, "step": 1292 }, { "epoch": 3.397764628533859, "high_lr": 0.00032, "low_lr": 6.4000000000000006e-06, "step": 1292 }, { "epoch": 3.397764628533859, "high_lr": 0.00032, "low_lr": 6.4000000000000006e-06, "step": 1292 }, { "epoch": 3.4003944773175543, "grad_norm": 1.4193333387374878, "learning_rate": 0.0003194736842105263, "loss": 1.3131, "step": 1293 }, { "epoch": 3.4003944773175543, "high_lr": 0.0003194736842105263, "low_lr": 6.389473684210527e-06, "step": 1293 }, { "epoch": 3.4003944773175543, "high_lr": 0.0003194736842105263, "low_lr": 6.389473684210527e-06, "step": 1293 }, { "epoch": 3.4003944773175543, "high_lr": 0.0003194736842105263, "low_lr": 6.389473684210527e-06, "step": 1293 }, { "epoch": 3.4003944773175543, "high_lr": 0.0003194736842105263, "low_lr": 6.389473684210527e-06, "step": 1293 }, { "epoch": 3.4003944773175543, "high_lr": 0.0003194736842105263, "low_lr": 6.389473684210527e-06, "step": 1293 }, { "epoch": 3.4003944773175543, "high_lr": 0.0003194736842105263, "low_lr": 6.389473684210527e-06, "step": 1293 }, { "epoch": 3.4003944773175543, "high_lr": 0.0003194736842105263, "low_lr": 6.389473684210527e-06, "step": 1293 }, { "epoch": 3.4003944773175543, "high_lr": 0.0003194736842105263, "low_lr": 6.389473684210527e-06, "step": 1293 }, { "epoch": 3.4030243261012494, "grad_norm": 1.4493755102157593, "learning_rate": 0.00031894736842105265, "loss": 1.3229, "step": 1294 }, { "epoch": 3.4030243261012494, "high_lr": 0.00031894736842105265, "low_lr": 6.378947368421053e-06, "step": 1294 }, { "epoch": 3.4030243261012494, "high_lr": 0.00031894736842105265, "low_lr": 6.378947368421053e-06, "step": 1294 }, { "epoch": 3.4030243261012494, "high_lr": 0.00031894736842105265, "low_lr": 6.378947368421053e-06, "step": 1294 }, { "epoch": 3.4030243261012494, "high_lr": 0.00031894736842105265, "low_lr": 6.378947368421053e-06, "step": 1294 }, { "epoch": 3.4030243261012494, "high_lr": 0.00031894736842105265, "low_lr": 6.378947368421053e-06, "step": 1294 }, { "epoch": 3.4030243261012494, "high_lr": 0.00031894736842105265, "low_lr": 6.378947368421053e-06, "step": 1294 }, { "epoch": 3.4030243261012494, "high_lr": 0.00031894736842105265, "low_lr": 6.378947368421053e-06, "step": 1294 }, { "epoch": 3.4030243261012494, "high_lr": 0.00031894736842105265, "low_lr": 6.378947368421053e-06, "step": 1294 }, { "epoch": 3.405654174884944, "grad_norm": 1.3478337526321411, "learning_rate": 0.00031842105263157894, "loss": 1.2642, "step": 1295 }, { "epoch": 3.405654174884944, "high_lr": 0.00031842105263157894, "low_lr": 6.3684210526315795e-06, "step": 1295 }, { "epoch": 3.405654174884944, "high_lr": 0.00031842105263157894, "low_lr": 6.3684210526315795e-06, "step": 1295 }, { "epoch": 3.405654174884944, "high_lr": 0.00031842105263157894, "low_lr": 6.3684210526315795e-06, "step": 1295 }, { "epoch": 3.405654174884944, "high_lr": 0.00031842105263157894, "low_lr": 6.3684210526315795e-06, "step": 1295 }, { "epoch": 3.405654174884944, "high_lr": 0.00031842105263157894, "low_lr": 6.3684210526315795e-06, "step": 1295 }, { "epoch": 3.405654174884944, "high_lr": 0.00031842105263157894, "low_lr": 6.3684210526315795e-06, "step": 1295 }, { "epoch": 3.405654174884944, "high_lr": 0.00031842105263157894, "low_lr": 6.3684210526315795e-06, "step": 1295 }, { "epoch": 3.405654174884944, "high_lr": 0.00031842105263157894, "low_lr": 6.3684210526315795e-06, "step": 1295 }, { "epoch": 3.408284023668639, "grad_norm": 1.4272356033325195, "learning_rate": 0.0003178947368421053, "loss": 1.3132, "step": 1296 }, { "epoch": 3.408284023668639, "high_lr": 0.0003178947368421053, "low_lr": 6.357894736842106e-06, "step": 1296 }, { "epoch": 3.408284023668639, "high_lr": 0.0003178947368421053, "low_lr": 6.357894736842106e-06, "step": 1296 }, { "epoch": 3.408284023668639, "high_lr": 0.0003178947368421053, "low_lr": 6.357894736842106e-06, "step": 1296 }, { "epoch": 3.408284023668639, "high_lr": 0.0003178947368421053, "low_lr": 6.357894736842106e-06, "step": 1296 }, { "epoch": 3.408284023668639, "high_lr": 0.0003178947368421053, "low_lr": 6.357894736842106e-06, "step": 1296 }, { "epoch": 3.408284023668639, "high_lr": 0.0003178947368421053, "low_lr": 6.357894736842106e-06, "step": 1296 }, { "epoch": 3.408284023668639, "high_lr": 0.0003178947368421053, "low_lr": 6.357894736842106e-06, "step": 1296 }, { "epoch": 3.408284023668639, "high_lr": 0.0003178947368421053, "low_lr": 6.357894736842106e-06, "step": 1296 }, { "epoch": 3.4109138724523342, "grad_norm": 1.3690342903137207, "learning_rate": 0.00031736842105263156, "loss": 1.3243, "step": 1297 }, { "epoch": 3.4109138724523342, "high_lr": 0.00031736842105263156, "low_lr": 6.347368421052632e-06, "step": 1297 }, { "epoch": 3.4109138724523342, "high_lr": 0.00031736842105263156, "low_lr": 6.347368421052632e-06, "step": 1297 }, { "epoch": 3.4109138724523342, "high_lr": 0.00031736842105263156, "low_lr": 6.347368421052632e-06, "step": 1297 }, { "epoch": 3.4109138724523342, "high_lr": 0.00031736842105263156, "low_lr": 6.347368421052632e-06, "step": 1297 }, { "epoch": 3.4109138724523342, "high_lr": 0.00031736842105263156, "low_lr": 6.347368421052632e-06, "step": 1297 }, { "epoch": 3.4109138724523342, "high_lr": 0.00031736842105263156, "low_lr": 6.347368421052632e-06, "step": 1297 }, { "epoch": 3.4109138724523342, "high_lr": 0.00031736842105263156, "low_lr": 6.347368421052632e-06, "step": 1297 }, { "epoch": 3.4109138724523342, "high_lr": 0.00031736842105263156, "low_lr": 6.347368421052632e-06, "step": 1297 }, { "epoch": 3.413543721236029, "grad_norm": 1.4308713674545288, "learning_rate": 0.00031684210526315785, "loss": 1.2948, "step": 1298 }, { "epoch": 3.413543721236029, "high_lr": 0.00031684210526315785, "low_lr": 6.336842105263158e-06, "step": 1298 }, { "epoch": 3.413543721236029, "high_lr": 0.00031684210526315785, "low_lr": 6.336842105263158e-06, "step": 1298 }, { "epoch": 3.413543721236029, "high_lr": 0.00031684210526315785, "low_lr": 6.336842105263158e-06, "step": 1298 }, { "epoch": 3.413543721236029, "high_lr": 0.00031684210526315785, "low_lr": 6.336842105263158e-06, "step": 1298 }, { "epoch": 3.413543721236029, "high_lr": 0.00031684210526315785, "low_lr": 6.336842105263158e-06, "step": 1298 }, { "epoch": 3.413543721236029, "high_lr": 0.00031684210526315785, "low_lr": 6.336842105263158e-06, "step": 1298 }, { "epoch": 3.413543721236029, "high_lr": 0.00031684210526315785, "low_lr": 6.336842105263158e-06, "step": 1298 }, { "epoch": 3.413543721236029, "high_lr": 0.00031684210526315785, "low_lr": 6.336842105263158e-06, "step": 1298 }, { "epoch": 3.416173570019724, "grad_norm": 1.416103482246399, "learning_rate": 0.00031631578947368424, "loss": 1.2928, "step": 1299 }, { "epoch": 3.416173570019724, "high_lr": 0.00031631578947368424, "low_lr": 6.326315789473685e-06, "step": 1299 }, { "epoch": 3.416173570019724, "high_lr": 0.00031631578947368424, "low_lr": 6.326315789473685e-06, "step": 1299 }, { "epoch": 3.416173570019724, "high_lr": 0.00031631578947368424, "low_lr": 6.326315789473685e-06, "step": 1299 }, { "epoch": 3.416173570019724, "high_lr": 0.00031631578947368424, "low_lr": 6.326315789473685e-06, "step": 1299 }, { "epoch": 3.416173570019724, "high_lr": 0.00031631578947368424, "low_lr": 6.326315789473685e-06, "step": 1299 }, { "epoch": 3.416173570019724, "high_lr": 0.00031631578947368424, "low_lr": 6.326315789473685e-06, "step": 1299 }, { "epoch": 3.416173570019724, "high_lr": 0.00031631578947368424, "low_lr": 6.326315789473685e-06, "step": 1299 }, { "epoch": 3.416173570019724, "high_lr": 0.00031631578947368424, "low_lr": 6.326315789473685e-06, "step": 1299 }, { "epoch": 3.4188034188034186, "grad_norm": 1.3614590167999268, "learning_rate": 0.00031578947368421053, "loss": 1.2941, "step": 1300 }, { "epoch": 3.4188034188034186, "high_lr": 0.00031578947368421053, "low_lr": 6.31578947368421e-06, "step": 1300 }, { "epoch": 3.4188034188034186, "high_lr": 0.00031578947368421053, "low_lr": 6.31578947368421e-06, "step": 1300 }, { "epoch": 3.4188034188034186, "high_lr": 0.00031578947368421053, "low_lr": 6.31578947368421e-06, "step": 1300 }, { "epoch": 3.4188034188034186, "high_lr": 0.00031578947368421053, "low_lr": 6.31578947368421e-06, "step": 1300 }, { "epoch": 3.4188034188034186, "high_lr": 0.00031578947368421053, "low_lr": 6.31578947368421e-06, "step": 1300 }, { "epoch": 3.4188034188034186, "high_lr": 0.00031578947368421053, "low_lr": 6.31578947368421e-06, "step": 1300 }, { "epoch": 3.4188034188034186, "high_lr": 0.00031578947368421053, "low_lr": 6.31578947368421e-06, "step": 1300 }, { "epoch": 3.4188034188034186, "high_lr": 0.00031578947368421053, "low_lr": 6.31578947368421e-06, "step": 1300 }, { "epoch": 3.4214332675871137, "grad_norm": 1.4130271673202515, "learning_rate": 0.00031526315789473687, "loss": 1.3067, "step": 1301 }, { "epoch": 3.4214332675871137, "high_lr": 0.00031526315789473687, "low_lr": 6.3052631578947375e-06, "step": 1301 }, { "epoch": 3.4214332675871137, "high_lr": 0.00031526315789473687, "low_lr": 6.3052631578947375e-06, "step": 1301 }, { "epoch": 3.4214332675871137, "high_lr": 0.00031526315789473687, "low_lr": 6.3052631578947375e-06, "step": 1301 }, { "epoch": 3.4214332675871137, "high_lr": 0.00031526315789473687, "low_lr": 6.3052631578947375e-06, "step": 1301 }, { "epoch": 3.4214332675871137, "high_lr": 0.00031526315789473687, "low_lr": 6.3052631578947375e-06, "step": 1301 }, { "epoch": 3.4214332675871137, "high_lr": 0.00031526315789473687, "low_lr": 6.3052631578947375e-06, "step": 1301 }, { "epoch": 3.4214332675871137, "high_lr": 0.00031526315789473687, "low_lr": 6.3052631578947375e-06, "step": 1301 }, { "epoch": 3.4214332675871137, "high_lr": 0.00031526315789473687, "low_lr": 6.3052631578947375e-06, "step": 1301 }, { "epoch": 3.424063116370809, "grad_norm": 1.4979448318481445, "learning_rate": 0.00031473684210526316, "loss": 1.3681, "step": 1302 }, { "epoch": 3.424063116370809, "high_lr": 0.00031473684210526316, "low_lr": 6.294736842105264e-06, "step": 1302 }, { "epoch": 3.424063116370809, "high_lr": 0.00031473684210526316, "low_lr": 6.294736842105264e-06, "step": 1302 }, { "epoch": 3.424063116370809, "high_lr": 0.00031473684210526316, "low_lr": 6.294736842105264e-06, "step": 1302 }, { "epoch": 3.424063116370809, "high_lr": 0.00031473684210526316, "low_lr": 6.294736842105264e-06, "step": 1302 }, { "epoch": 3.424063116370809, "high_lr": 0.00031473684210526316, "low_lr": 6.294736842105264e-06, "step": 1302 }, { "epoch": 3.424063116370809, "high_lr": 0.00031473684210526316, "low_lr": 6.294736842105264e-06, "step": 1302 }, { "epoch": 3.424063116370809, "high_lr": 0.00031473684210526316, "low_lr": 6.294736842105264e-06, "step": 1302 }, { "epoch": 3.424063116370809, "high_lr": 0.00031473684210526316, "low_lr": 6.294736842105264e-06, "step": 1302 }, { "epoch": 3.4266929651545035, "grad_norm": 1.4602686166763306, "learning_rate": 0.0003142105263157895, "loss": 1.2643, "step": 1303 }, { "epoch": 3.4266929651545035, "high_lr": 0.0003142105263157895, "low_lr": 6.28421052631579e-06, "step": 1303 }, { "epoch": 3.4266929651545035, "high_lr": 0.0003142105263157895, "low_lr": 6.28421052631579e-06, "step": 1303 }, { "epoch": 3.4266929651545035, "high_lr": 0.0003142105263157895, "low_lr": 6.28421052631579e-06, "step": 1303 }, { "epoch": 3.4266929651545035, "high_lr": 0.0003142105263157895, "low_lr": 6.28421052631579e-06, "step": 1303 }, { "epoch": 3.4266929651545035, "high_lr": 0.0003142105263157895, "low_lr": 6.28421052631579e-06, "step": 1303 }, { "epoch": 3.4266929651545035, "high_lr": 0.0003142105263157895, "low_lr": 6.28421052631579e-06, "step": 1303 }, { "epoch": 3.4266929651545035, "high_lr": 0.0003142105263157895, "low_lr": 6.28421052631579e-06, "step": 1303 }, { "epoch": 3.4266929651545035, "high_lr": 0.0003142105263157895, "low_lr": 6.28421052631579e-06, "step": 1303 }, { "epoch": 3.4293228139381986, "grad_norm": 1.4567514657974243, "learning_rate": 0.0003136842105263158, "loss": 1.2775, "step": 1304 }, { "epoch": 3.4293228139381986, "high_lr": 0.0003136842105263158, "low_lr": 6.2736842105263165e-06, "step": 1304 }, { "epoch": 3.4293228139381986, "high_lr": 0.0003136842105263158, "low_lr": 6.2736842105263165e-06, "step": 1304 }, { "epoch": 3.4293228139381986, "high_lr": 0.0003136842105263158, "low_lr": 6.2736842105263165e-06, "step": 1304 }, { "epoch": 3.4293228139381986, "high_lr": 0.0003136842105263158, "low_lr": 6.2736842105263165e-06, "step": 1304 }, { "epoch": 3.4293228139381986, "high_lr": 0.0003136842105263158, "low_lr": 6.2736842105263165e-06, "step": 1304 }, { "epoch": 3.4293228139381986, "high_lr": 0.0003136842105263158, "low_lr": 6.2736842105263165e-06, "step": 1304 }, { "epoch": 3.4293228139381986, "high_lr": 0.0003136842105263158, "low_lr": 6.2736842105263165e-06, "step": 1304 }, { "epoch": 3.4293228139381986, "high_lr": 0.0003136842105263158, "low_lr": 6.2736842105263165e-06, "step": 1304 }, { "epoch": 3.4319526627218933, "grad_norm": 1.3604228496551514, "learning_rate": 0.00031315789473684207, "loss": 1.3007, "step": 1305 }, { "epoch": 3.4319526627218933, "high_lr": 0.00031315789473684207, "low_lr": 6.263157894736842e-06, "step": 1305 }, { "epoch": 3.4319526627218933, "high_lr": 0.00031315789473684207, "low_lr": 6.263157894736842e-06, "step": 1305 }, { "epoch": 3.4319526627218933, "high_lr": 0.00031315789473684207, "low_lr": 6.263157894736842e-06, "step": 1305 }, { "epoch": 3.4319526627218933, "high_lr": 0.00031315789473684207, "low_lr": 6.263157894736842e-06, "step": 1305 }, { "epoch": 3.4319526627218933, "high_lr": 0.00031315789473684207, "low_lr": 6.263157894736842e-06, "step": 1305 }, { "epoch": 3.4319526627218933, "high_lr": 0.00031315789473684207, "low_lr": 6.263157894736842e-06, "step": 1305 }, { "epoch": 3.4319526627218933, "high_lr": 0.00031315789473684207, "low_lr": 6.263157894736842e-06, "step": 1305 }, { "epoch": 3.4319526627218933, "high_lr": 0.00031315789473684207, "low_lr": 6.263157894736842e-06, "step": 1305 }, { "epoch": 3.4345825115055884, "grad_norm": 1.4118196964263916, "learning_rate": 0.0003126315789473684, "loss": 1.2874, "step": 1306 }, { "epoch": 3.4345825115055884, "high_lr": 0.0003126315789473684, "low_lr": 6.252631578947369e-06, "step": 1306 }, { "epoch": 3.4345825115055884, "high_lr": 0.0003126315789473684, "low_lr": 6.252631578947369e-06, "step": 1306 }, { "epoch": 3.4345825115055884, "high_lr": 0.0003126315789473684, "low_lr": 6.252631578947369e-06, "step": 1306 }, { "epoch": 3.4345825115055884, "high_lr": 0.0003126315789473684, "low_lr": 6.252631578947369e-06, "step": 1306 }, { "epoch": 3.4345825115055884, "high_lr": 0.0003126315789473684, "low_lr": 6.252631578947369e-06, "step": 1306 }, { "epoch": 3.4345825115055884, "high_lr": 0.0003126315789473684, "low_lr": 6.252631578947369e-06, "step": 1306 }, { "epoch": 3.4345825115055884, "high_lr": 0.0003126315789473684, "low_lr": 6.252631578947369e-06, "step": 1306 }, { "epoch": 3.4345825115055884, "high_lr": 0.0003126315789473684, "low_lr": 6.252631578947369e-06, "step": 1306 }, { "epoch": 3.4372123602892835, "grad_norm": 1.3901150226593018, "learning_rate": 0.00031210526315789475, "loss": 1.2901, "step": 1307 }, { "epoch": 3.4372123602892835, "high_lr": 0.00031210526315789475, "low_lr": 6.242105263157895e-06, "step": 1307 }, { "epoch": 3.4372123602892835, "high_lr": 0.00031210526315789475, "low_lr": 6.242105263157895e-06, "step": 1307 }, { "epoch": 3.4372123602892835, "high_lr": 0.00031210526315789475, "low_lr": 6.242105263157895e-06, "step": 1307 }, { "epoch": 3.4372123602892835, "high_lr": 0.00031210526315789475, "low_lr": 6.242105263157895e-06, "step": 1307 }, { "epoch": 3.4372123602892835, "high_lr": 0.00031210526315789475, "low_lr": 6.242105263157895e-06, "step": 1307 }, { "epoch": 3.4372123602892835, "high_lr": 0.00031210526315789475, "low_lr": 6.242105263157895e-06, "step": 1307 }, { "epoch": 3.4372123602892835, "high_lr": 0.00031210526315789475, "low_lr": 6.242105263157895e-06, "step": 1307 }, { "epoch": 3.4372123602892835, "high_lr": 0.00031210526315789475, "low_lr": 6.242105263157895e-06, "step": 1307 }, { "epoch": 3.439842209072978, "grad_norm": 1.3946117162704468, "learning_rate": 0.0003115789473684211, "loss": 1.2785, "step": 1308 }, { "epoch": 3.439842209072978, "high_lr": 0.0003115789473684211, "low_lr": 6.231578947368422e-06, "step": 1308 }, { "epoch": 3.439842209072978, "high_lr": 0.0003115789473684211, "low_lr": 6.231578947368422e-06, "step": 1308 }, { "epoch": 3.439842209072978, "high_lr": 0.0003115789473684211, "low_lr": 6.231578947368422e-06, "step": 1308 }, { "epoch": 3.439842209072978, "high_lr": 0.0003115789473684211, "low_lr": 6.231578947368422e-06, "step": 1308 }, { "epoch": 3.439842209072978, "high_lr": 0.0003115789473684211, "low_lr": 6.231578947368422e-06, "step": 1308 }, { "epoch": 3.439842209072978, "high_lr": 0.0003115789473684211, "low_lr": 6.231578947368422e-06, "step": 1308 }, { "epoch": 3.439842209072978, "high_lr": 0.0003115789473684211, "low_lr": 6.231578947368422e-06, "step": 1308 }, { "epoch": 3.439842209072978, "high_lr": 0.0003115789473684211, "low_lr": 6.231578947368422e-06, "step": 1308 }, { "epoch": 3.442472057856673, "grad_norm": 1.4135258197784424, "learning_rate": 0.0003110526315789474, "loss": 1.301, "step": 1309 }, { "epoch": 3.442472057856673, "high_lr": 0.0003110526315789474, "low_lr": 6.221052631578947e-06, "step": 1309 }, { "epoch": 3.442472057856673, "high_lr": 0.0003110526315789474, "low_lr": 6.221052631578947e-06, "step": 1309 }, { "epoch": 3.442472057856673, "high_lr": 0.0003110526315789474, "low_lr": 6.221052631578947e-06, "step": 1309 }, { "epoch": 3.442472057856673, "high_lr": 0.0003110526315789474, "low_lr": 6.221052631578947e-06, "step": 1309 }, { "epoch": 3.442472057856673, "high_lr": 0.0003110526315789474, "low_lr": 6.221052631578947e-06, "step": 1309 }, { "epoch": 3.442472057856673, "high_lr": 0.0003110526315789474, "low_lr": 6.221052631578947e-06, "step": 1309 }, { "epoch": 3.442472057856673, "high_lr": 0.0003110526315789474, "low_lr": 6.221052631578947e-06, "step": 1309 }, { "epoch": 3.442472057856673, "high_lr": 0.0003110526315789474, "low_lr": 6.221052631578947e-06, "step": 1309 }, { "epoch": 3.4451019066403683, "grad_norm": 1.4388030767440796, "learning_rate": 0.0003105263157894737, "loss": 1.3084, "step": 1310 }, { "epoch": 3.4451019066403683, "high_lr": 0.0003105263157894737, "low_lr": 6.2105263157894745e-06, "step": 1310 }, { "epoch": 3.4451019066403683, "high_lr": 0.0003105263157894737, "low_lr": 6.2105263157894745e-06, "step": 1310 }, { "epoch": 3.4451019066403683, "high_lr": 0.0003105263157894737, "low_lr": 6.2105263157894745e-06, "step": 1310 }, { "epoch": 3.4451019066403683, "high_lr": 0.0003105263157894737, "low_lr": 6.2105263157894745e-06, "step": 1310 }, { "epoch": 3.4451019066403683, "high_lr": 0.0003105263157894737, "low_lr": 6.2105263157894745e-06, "step": 1310 }, { "epoch": 3.4451019066403683, "high_lr": 0.0003105263157894737, "low_lr": 6.2105263157894745e-06, "step": 1310 }, { "epoch": 3.4451019066403683, "high_lr": 0.0003105263157894737, "low_lr": 6.2105263157894745e-06, "step": 1310 }, { "epoch": 3.4451019066403683, "high_lr": 0.0003105263157894737, "low_lr": 6.2105263157894745e-06, "step": 1310 }, { "epoch": 3.447731755424063, "grad_norm": 1.385117769241333, "learning_rate": 0.00031, "loss": 1.3113, "step": 1311 }, { "epoch": 3.447731755424063, "high_lr": 0.00031, "low_lr": 6.200000000000001e-06, "step": 1311 }, { "epoch": 3.447731755424063, "high_lr": 0.00031, "low_lr": 6.200000000000001e-06, "step": 1311 }, { "epoch": 3.447731755424063, "high_lr": 0.00031, "low_lr": 6.200000000000001e-06, "step": 1311 }, { "epoch": 3.447731755424063, "high_lr": 0.00031, "low_lr": 6.200000000000001e-06, "step": 1311 }, { "epoch": 3.447731755424063, "high_lr": 0.00031, "low_lr": 6.200000000000001e-06, "step": 1311 }, { "epoch": 3.447731755424063, "high_lr": 0.00031, "low_lr": 6.200000000000001e-06, "step": 1311 }, { "epoch": 3.447731755424063, "high_lr": 0.00031, "low_lr": 6.200000000000001e-06, "step": 1311 }, { "epoch": 3.447731755424063, "high_lr": 0.00031, "low_lr": 6.200000000000001e-06, "step": 1311 }, { "epoch": 3.450361604207758, "grad_norm": 1.5202070474624634, "learning_rate": 0.0003094736842105263, "loss": 1.3932, "step": 1312 }, { "epoch": 3.450361604207758, "high_lr": 0.0003094736842105263, "low_lr": 6.189473684210526e-06, "step": 1312 }, { "epoch": 3.450361604207758, "high_lr": 0.0003094736842105263, "low_lr": 6.189473684210526e-06, "step": 1312 }, { "epoch": 3.450361604207758, "high_lr": 0.0003094736842105263, "low_lr": 6.189473684210526e-06, "step": 1312 }, { "epoch": 3.450361604207758, "high_lr": 0.0003094736842105263, "low_lr": 6.189473684210526e-06, "step": 1312 }, { "epoch": 3.450361604207758, "high_lr": 0.0003094736842105263, "low_lr": 6.189473684210526e-06, "step": 1312 }, { "epoch": 3.450361604207758, "high_lr": 0.0003094736842105263, "low_lr": 6.189473684210526e-06, "step": 1312 }, { "epoch": 3.450361604207758, "high_lr": 0.0003094736842105263, "low_lr": 6.189473684210526e-06, "step": 1312 }, { "epoch": 3.450361604207758, "high_lr": 0.0003094736842105263, "low_lr": 6.189473684210526e-06, "step": 1312 }, { "epoch": 3.452991452991453, "grad_norm": 1.3394801616668701, "learning_rate": 0.0003089473684210526, "loss": 1.3255, "step": 1313 }, { "epoch": 3.452991452991453, "high_lr": 0.0003089473684210526, "low_lr": 6.1789473684210534e-06, "step": 1313 }, { "epoch": 3.452991452991453, "high_lr": 0.0003089473684210526, "low_lr": 6.1789473684210534e-06, "step": 1313 }, { "epoch": 3.452991452991453, "high_lr": 0.0003089473684210526, "low_lr": 6.1789473684210534e-06, "step": 1313 }, { "epoch": 3.452991452991453, "high_lr": 0.0003089473684210526, "low_lr": 6.1789473684210534e-06, "step": 1313 }, { "epoch": 3.452991452991453, "high_lr": 0.0003089473684210526, "low_lr": 6.1789473684210534e-06, "step": 1313 }, { "epoch": 3.452991452991453, "high_lr": 0.0003089473684210526, "low_lr": 6.1789473684210534e-06, "step": 1313 }, { "epoch": 3.452991452991453, "high_lr": 0.0003089473684210526, "low_lr": 6.1789473684210534e-06, "step": 1313 }, { "epoch": 3.452991452991453, "high_lr": 0.0003089473684210526, "low_lr": 6.1789473684210534e-06, "step": 1313 }, { "epoch": 3.455621301775148, "grad_norm": 1.495471715927124, "learning_rate": 0.0003084210526315789, "loss": 1.3534, "step": 1314 }, { "epoch": 3.455621301775148, "high_lr": 0.0003084210526315789, "low_lr": 6.168421052631579e-06, "step": 1314 }, { "epoch": 3.455621301775148, "high_lr": 0.0003084210526315789, "low_lr": 6.168421052631579e-06, "step": 1314 }, { "epoch": 3.455621301775148, "high_lr": 0.0003084210526315789, "low_lr": 6.168421052631579e-06, "step": 1314 }, { "epoch": 3.455621301775148, "high_lr": 0.0003084210526315789, "low_lr": 6.168421052631579e-06, "step": 1314 }, { "epoch": 3.455621301775148, "high_lr": 0.0003084210526315789, "low_lr": 6.168421052631579e-06, "step": 1314 }, { "epoch": 3.455621301775148, "high_lr": 0.0003084210526315789, "low_lr": 6.168421052631579e-06, "step": 1314 }, { "epoch": 3.455621301775148, "high_lr": 0.0003084210526315789, "low_lr": 6.168421052631579e-06, "step": 1314 }, { "epoch": 3.455621301775148, "high_lr": 0.0003084210526315789, "low_lr": 6.168421052631579e-06, "step": 1314 }, { "epoch": 3.458251150558843, "grad_norm": 1.4218358993530273, "learning_rate": 0.0003078947368421053, "loss": 1.2885, "step": 1315 }, { "epoch": 3.458251150558843, "high_lr": 0.0003078947368421053, "low_lr": 6.157894736842106e-06, "step": 1315 }, { "epoch": 3.458251150558843, "high_lr": 0.0003078947368421053, "low_lr": 6.157894736842106e-06, "step": 1315 }, { "epoch": 3.458251150558843, "high_lr": 0.0003078947368421053, "low_lr": 6.157894736842106e-06, "step": 1315 }, { "epoch": 3.458251150558843, "high_lr": 0.0003078947368421053, "low_lr": 6.157894736842106e-06, "step": 1315 }, { "epoch": 3.458251150558843, "high_lr": 0.0003078947368421053, "low_lr": 6.157894736842106e-06, "step": 1315 }, { "epoch": 3.458251150558843, "high_lr": 0.0003078947368421053, "low_lr": 6.157894736842106e-06, "step": 1315 }, { "epoch": 3.458251150558843, "high_lr": 0.0003078947368421053, "low_lr": 6.157894736842106e-06, "step": 1315 }, { "epoch": 3.458251150558843, "high_lr": 0.0003078947368421053, "low_lr": 6.157894736842106e-06, "step": 1315 }, { "epoch": 3.460880999342538, "grad_norm": 1.413833498954773, "learning_rate": 0.0003073684210526316, "loss": 1.2894, "step": 1316 }, { "epoch": 3.460880999342538, "high_lr": 0.0003073684210526316, "low_lr": 6.1473684210526316e-06, "step": 1316 }, { "epoch": 3.460880999342538, "high_lr": 0.0003073684210526316, "low_lr": 6.1473684210526316e-06, "step": 1316 }, { "epoch": 3.460880999342538, "high_lr": 0.0003073684210526316, "low_lr": 6.1473684210526316e-06, "step": 1316 }, { "epoch": 3.460880999342538, "high_lr": 0.0003073684210526316, "low_lr": 6.1473684210526316e-06, "step": 1316 }, { "epoch": 3.460880999342538, "high_lr": 0.0003073684210526316, "low_lr": 6.1473684210526316e-06, "step": 1316 }, { "epoch": 3.460880999342538, "high_lr": 0.0003073684210526316, "low_lr": 6.1473684210526316e-06, "step": 1316 }, { "epoch": 3.460880999342538, "high_lr": 0.0003073684210526316, "low_lr": 6.1473684210526316e-06, "step": 1316 }, { "epoch": 3.460880999342538, "high_lr": 0.0003073684210526316, "low_lr": 6.1473684210526316e-06, "step": 1316 }, { "epoch": 3.4635108481262327, "grad_norm": 1.3772145509719849, "learning_rate": 0.00030684210526315793, "loss": 1.3812, "step": 1317 }, { "epoch": 3.4635108481262327, "high_lr": 0.00030684210526315793, "low_lr": 6.136842105263159e-06, "step": 1317 }, { "epoch": 3.4635108481262327, "high_lr": 0.00030684210526315793, "low_lr": 6.136842105263159e-06, "step": 1317 }, { "epoch": 3.4635108481262327, "high_lr": 0.00030684210526315793, "low_lr": 6.136842105263159e-06, "step": 1317 }, { "epoch": 3.4635108481262327, "high_lr": 0.00030684210526315793, "low_lr": 6.136842105263159e-06, "step": 1317 }, { "epoch": 3.4635108481262327, "high_lr": 0.00030684210526315793, "low_lr": 6.136842105263159e-06, "step": 1317 }, { "epoch": 3.4635108481262327, "high_lr": 0.00030684210526315793, "low_lr": 6.136842105263159e-06, "step": 1317 }, { "epoch": 3.4635108481262327, "high_lr": 0.00030684210526315793, "low_lr": 6.136842105263159e-06, "step": 1317 }, { "epoch": 3.4635108481262327, "high_lr": 0.00030684210526315793, "low_lr": 6.136842105263159e-06, "step": 1317 }, { "epoch": 3.4661406969099278, "grad_norm": 1.3490023612976074, "learning_rate": 0.0003063157894736842, "loss": 1.3206, "step": 1318 }, { "epoch": 3.4661406969099278, "high_lr": 0.0003063157894736842, "low_lr": 6.126315789473685e-06, "step": 1318 }, { "epoch": 3.4661406969099278, "high_lr": 0.0003063157894736842, "low_lr": 6.126315789473685e-06, "step": 1318 }, { "epoch": 3.4661406969099278, "high_lr": 0.0003063157894736842, "low_lr": 6.126315789473685e-06, "step": 1318 }, { "epoch": 3.4661406969099278, "high_lr": 0.0003063157894736842, "low_lr": 6.126315789473685e-06, "step": 1318 }, { "epoch": 3.4661406969099278, "high_lr": 0.0003063157894736842, "low_lr": 6.126315789473685e-06, "step": 1318 }, { "epoch": 3.4661406969099278, "high_lr": 0.0003063157894736842, "low_lr": 6.126315789473685e-06, "step": 1318 }, { "epoch": 3.4661406969099278, "high_lr": 0.0003063157894736842, "low_lr": 6.126315789473685e-06, "step": 1318 }, { "epoch": 3.4661406969099278, "high_lr": 0.0003063157894736842, "low_lr": 6.126315789473685e-06, "step": 1318 }, { "epoch": 3.468770545693623, "grad_norm": 1.4685592651367188, "learning_rate": 0.0003057894736842105, "loss": 1.2311, "step": 1319 }, { "epoch": 3.468770545693623, "high_lr": 0.0003057894736842105, "low_lr": 6.1157894736842106e-06, "step": 1319 }, { "epoch": 3.468770545693623, "high_lr": 0.0003057894736842105, "low_lr": 6.1157894736842106e-06, "step": 1319 }, { "epoch": 3.468770545693623, "high_lr": 0.0003057894736842105, "low_lr": 6.1157894736842106e-06, "step": 1319 }, { "epoch": 3.468770545693623, "high_lr": 0.0003057894736842105, "low_lr": 6.1157894736842106e-06, "step": 1319 }, { "epoch": 3.468770545693623, "high_lr": 0.0003057894736842105, "low_lr": 6.1157894736842106e-06, "step": 1319 }, { "epoch": 3.468770545693623, "high_lr": 0.0003057894736842105, "low_lr": 6.1157894736842106e-06, "step": 1319 }, { "epoch": 3.468770545693623, "high_lr": 0.0003057894736842105, "low_lr": 6.1157894736842106e-06, "step": 1319 }, { "epoch": 3.468770545693623, "high_lr": 0.0003057894736842105, "low_lr": 6.1157894736842106e-06, "step": 1319 }, { "epoch": 3.4714003944773175, "grad_norm": 1.5080946683883667, "learning_rate": 0.00030526315789473684, "loss": 1.286, "step": 1320 }, { "epoch": 3.4714003944773175, "high_lr": 0.00030526315789473684, "low_lr": 6.105263157894738e-06, "step": 1320 }, { "epoch": 3.4714003944773175, "high_lr": 0.00030526315789473684, "low_lr": 6.105263157894738e-06, "step": 1320 }, { "epoch": 3.4714003944773175, "high_lr": 0.00030526315789473684, "low_lr": 6.105263157894738e-06, "step": 1320 }, { "epoch": 3.4714003944773175, "high_lr": 0.00030526315789473684, "low_lr": 6.105263157894738e-06, "step": 1320 }, { "epoch": 3.4714003944773175, "high_lr": 0.00030526315789473684, "low_lr": 6.105263157894738e-06, "step": 1320 }, { "epoch": 3.4714003944773175, "high_lr": 0.00030526315789473684, "low_lr": 6.105263157894738e-06, "step": 1320 }, { "epoch": 3.4714003944773175, "high_lr": 0.00030526315789473684, "low_lr": 6.105263157894738e-06, "step": 1320 }, { "epoch": 3.4714003944773175, "high_lr": 0.00030526315789473684, "low_lr": 6.105263157894738e-06, "step": 1320 }, { "epoch": 3.4740302432610126, "grad_norm": 1.4560896158218384, "learning_rate": 0.00030473684210526313, "loss": 1.2817, "step": 1321 }, { "epoch": 3.4740302432610126, "high_lr": 0.00030473684210526313, "low_lr": 6.094736842105263e-06, "step": 1321 }, { "epoch": 3.4740302432610126, "high_lr": 0.00030473684210526313, "low_lr": 6.094736842105263e-06, "step": 1321 }, { "epoch": 3.4740302432610126, "high_lr": 0.00030473684210526313, "low_lr": 6.094736842105263e-06, "step": 1321 }, { "epoch": 3.4740302432610126, "high_lr": 0.00030473684210526313, "low_lr": 6.094736842105263e-06, "step": 1321 }, { "epoch": 3.4740302432610126, "high_lr": 0.00030473684210526313, "low_lr": 6.094736842105263e-06, "step": 1321 }, { "epoch": 3.4740302432610126, "high_lr": 0.00030473684210526313, "low_lr": 6.094736842105263e-06, "step": 1321 }, { "epoch": 3.4740302432610126, "high_lr": 0.00030473684210526313, "low_lr": 6.094736842105263e-06, "step": 1321 }, { "epoch": 3.4740302432610126, "high_lr": 0.00030473684210526313, "low_lr": 6.094736842105263e-06, "step": 1321 }, { "epoch": 3.4766600920447073, "grad_norm": 1.376052737236023, "learning_rate": 0.00030421052631578947, "loss": 1.3213, "step": 1322 }, { "epoch": 3.4766600920447073, "high_lr": 0.00030421052631578947, "low_lr": 6.08421052631579e-06, "step": 1322 }, { "epoch": 3.4766600920447073, "high_lr": 0.00030421052631578947, "low_lr": 6.08421052631579e-06, "step": 1322 }, { "epoch": 3.4766600920447073, "high_lr": 0.00030421052631578947, "low_lr": 6.08421052631579e-06, "step": 1322 }, { "epoch": 3.4766600920447073, "high_lr": 0.00030421052631578947, "low_lr": 6.08421052631579e-06, "step": 1322 }, { "epoch": 3.4766600920447073, "high_lr": 0.00030421052631578947, "low_lr": 6.08421052631579e-06, "step": 1322 }, { "epoch": 3.4766600920447073, "high_lr": 0.00030421052631578947, "low_lr": 6.08421052631579e-06, "step": 1322 }, { "epoch": 3.4766600920447073, "high_lr": 0.00030421052631578947, "low_lr": 6.08421052631579e-06, "step": 1322 }, { "epoch": 3.4766600920447073, "high_lr": 0.00030421052631578947, "low_lr": 6.08421052631579e-06, "step": 1322 }, { "epoch": 3.4792899408284024, "grad_norm": 1.3689539432525635, "learning_rate": 0.0003036842105263158, "loss": 1.3158, "step": 1323 }, { "epoch": 3.4792899408284024, "high_lr": 0.0003036842105263158, "low_lr": 6.073684210526316e-06, "step": 1323 }, { "epoch": 3.4792899408284024, "high_lr": 0.0003036842105263158, "low_lr": 6.073684210526316e-06, "step": 1323 }, { "epoch": 3.4792899408284024, "high_lr": 0.0003036842105263158, "low_lr": 6.073684210526316e-06, "step": 1323 }, { "epoch": 3.4792899408284024, "high_lr": 0.0003036842105263158, "low_lr": 6.073684210526316e-06, "step": 1323 }, { "epoch": 3.4792899408284024, "high_lr": 0.0003036842105263158, "low_lr": 6.073684210526316e-06, "step": 1323 }, { "epoch": 3.4792899408284024, "high_lr": 0.0003036842105263158, "low_lr": 6.073684210526316e-06, "step": 1323 }, { "epoch": 3.4792899408284024, "high_lr": 0.0003036842105263158, "low_lr": 6.073684210526316e-06, "step": 1323 }, { "epoch": 3.4792899408284024, "high_lr": 0.0003036842105263158, "low_lr": 6.073684210526316e-06, "step": 1323 }, { "epoch": 3.4819197896120975, "grad_norm": 1.4256808757781982, "learning_rate": 0.00030315789473684215, "loss": 1.3019, "step": 1324 }, { "epoch": 3.4819197896120975, "high_lr": 0.00030315789473684215, "low_lr": 6.063157894736843e-06, "step": 1324 }, { "epoch": 3.4819197896120975, "high_lr": 0.00030315789473684215, "low_lr": 6.063157894736843e-06, "step": 1324 }, { "epoch": 3.4819197896120975, "high_lr": 0.00030315789473684215, "low_lr": 6.063157894736843e-06, "step": 1324 }, { "epoch": 3.4819197896120975, "high_lr": 0.00030315789473684215, "low_lr": 6.063157894736843e-06, "step": 1324 }, { "epoch": 3.4819197896120975, "high_lr": 0.00030315789473684215, "low_lr": 6.063157894736843e-06, "step": 1324 }, { "epoch": 3.4819197896120975, "high_lr": 0.00030315789473684215, "low_lr": 6.063157894736843e-06, "step": 1324 }, { "epoch": 3.4819197896120975, "high_lr": 0.00030315789473684215, "low_lr": 6.063157894736843e-06, "step": 1324 }, { "epoch": 3.4819197896120975, "high_lr": 0.00030315789473684215, "low_lr": 6.063157894736843e-06, "step": 1324 }, { "epoch": 3.484549638395792, "grad_norm": 1.4182405471801758, "learning_rate": 0.00030263157894736844, "loss": 1.2553, "step": 1325 }, { "epoch": 3.484549638395792, "high_lr": 0.00030263157894736844, "low_lr": 6.0526315789473685e-06, "step": 1325 }, { "epoch": 3.484549638395792, "high_lr": 0.00030263157894736844, "low_lr": 6.0526315789473685e-06, "step": 1325 }, { "epoch": 3.484549638395792, "high_lr": 0.00030263157894736844, "low_lr": 6.0526315789473685e-06, "step": 1325 }, { "epoch": 3.484549638395792, "high_lr": 0.00030263157894736844, "low_lr": 6.0526315789473685e-06, "step": 1325 }, { "epoch": 3.484549638395792, "high_lr": 0.00030263157894736844, "low_lr": 6.0526315789473685e-06, "step": 1325 }, { "epoch": 3.484549638395792, "high_lr": 0.00030263157894736844, "low_lr": 6.0526315789473685e-06, "step": 1325 }, { "epoch": 3.484549638395792, "high_lr": 0.00030263157894736844, "low_lr": 6.0526315789473685e-06, "step": 1325 }, { "epoch": 3.484549638395792, "high_lr": 0.00030263157894736844, "low_lr": 6.0526315789473685e-06, "step": 1325 }, { "epoch": 3.4871794871794872, "grad_norm": 1.3478316068649292, "learning_rate": 0.0003021052631578947, "loss": 1.3268, "step": 1326 }, { "epoch": 3.4871794871794872, "high_lr": 0.0003021052631578947, "low_lr": 6.042105263157895e-06, "step": 1326 }, { "epoch": 3.4871794871794872, "high_lr": 0.0003021052631578947, "low_lr": 6.042105263157895e-06, "step": 1326 }, { "epoch": 3.4871794871794872, "high_lr": 0.0003021052631578947, "low_lr": 6.042105263157895e-06, "step": 1326 }, { "epoch": 3.4871794871794872, "high_lr": 0.0003021052631578947, "low_lr": 6.042105263157895e-06, "step": 1326 }, { "epoch": 3.4871794871794872, "high_lr": 0.0003021052631578947, "low_lr": 6.042105263157895e-06, "step": 1326 }, { "epoch": 3.4871794871794872, "high_lr": 0.0003021052631578947, "low_lr": 6.042105263157895e-06, "step": 1326 }, { "epoch": 3.4871794871794872, "high_lr": 0.0003021052631578947, "low_lr": 6.042105263157895e-06, "step": 1326 }, { "epoch": 3.4871794871794872, "high_lr": 0.0003021052631578947, "low_lr": 6.042105263157895e-06, "step": 1326 }, { "epoch": 3.489809335963182, "grad_norm": 1.4121602773666382, "learning_rate": 0.00030157894736842106, "loss": 1.2991, "step": 1327 }, { "epoch": 3.489809335963182, "high_lr": 0.00030157894736842106, "low_lr": 6.031578947368422e-06, "step": 1327 }, { "epoch": 3.489809335963182, "high_lr": 0.00030157894736842106, "low_lr": 6.031578947368422e-06, "step": 1327 }, { "epoch": 3.489809335963182, "high_lr": 0.00030157894736842106, "low_lr": 6.031578947368422e-06, "step": 1327 }, { "epoch": 3.489809335963182, "high_lr": 0.00030157894736842106, "low_lr": 6.031578947368422e-06, "step": 1327 }, { "epoch": 3.489809335963182, "high_lr": 0.00030157894736842106, "low_lr": 6.031578947368422e-06, "step": 1327 }, { "epoch": 3.489809335963182, "high_lr": 0.00030157894736842106, "low_lr": 6.031578947368422e-06, "step": 1327 }, { "epoch": 3.489809335963182, "high_lr": 0.00030157894736842106, "low_lr": 6.031578947368422e-06, "step": 1327 }, { "epoch": 3.489809335963182, "high_lr": 0.00030157894736842106, "low_lr": 6.031578947368422e-06, "step": 1327 }, { "epoch": 3.492439184746877, "grad_norm": 1.4898854494094849, "learning_rate": 0.00030105263157894735, "loss": 1.3184, "step": 1328 }, { "epoch": 3.492439184746877, "high_lr": 0.00030105263157894735, "low_lr": 6.0210526315789475e-06, "step": 1328 }, { "epoch": 3.492439184746877, "high_lr": 0.00030105263157894735, "low_lr": 6.0210526315789475e-06, "step": 1328 }, { "epoch": 3.492439184746877, "high_lr": 0.00030105263157894735, "low_lr": 6.0210526315789475e-06, "step": 1328 }, { "epoch": 3.492439184746877, "high_lr": 0.00030105263157894735, "low_lr": 6.0210526315789475e-06, "step": 1328 }, { "epoch": 3.492439184746877, "high_lr": 0.00030105263157894735, "low_lr": 6.0210526315789475e-06, "step": 1328 }, { "epoch": 3.492439184746877, "high_lr": 0.00030105263157894735, "low_lr": 6.0210526315789475e-06, "step": 1328 }, { "epoch": 3.492439184746877, "high_lr": 0.00030105263157894735, "low_lr": 6.0210526315789475e-06, "step": 1328 }, { "epoch": 3.492439184746877, "high_lr": 0.00030105263157894735, "low_lr": 6.0210526315789475e-06, "step": 1328 }, { "epoch": 3.495069033530572, "grad_norm": 1.3850432634353638, "learning_rate": 0.0003005263157894737, "loss": 1.3219, "step": 1329 }, { "epoch": 3.495069033530572, "high_lr": 0.0003005263157894737, "low_lr": 6.010526315789475e-06, "step": 1329 }, { "epoch": 3.495069033530572, "high_lr": 0.0003005263157894737, "low_lr": 6.010526315789475e-06, "step": 1329 }, { "epoch": 3.495069033530572, "high_lr": 0.0003005263157894737, "low_lr": 6.010526315789475e-06, "step": 1329 }, { "epoch": 3.495069033530572, "high_lr": 0.0003005263157894737, "low_lr": 6.010526315789475e-06, "step": 1329 }, { "epoch": 3.495069033530572, "high_lr": 0.0003005263157894737, "low_lr": 6.010526315789475e-06, "step": 1329 }, { "epoch": 3.495069033530572, "high_lr": 0.0003005263157894737, "low_lr": 6.010526315789475e-06, "step": 1329 }, { "epoch": 3.495069033530572, "high_lr": 0.0003005263157894737, "low_lr": 6.010526315789475e-06, "step": 1329 }, { "epoch": 3.495069033530572, "high_lr": 0.0003005263157894737, "low_lr": 6.010526315789475e-06, "step": 1329 }, { "epoch": 3.4976988823142667, "grad_norm": 1.4616377353668213, "learning_rate": 0.0003, "loss": 1.2914, "step": 1330 }, { "epoch": 3.4976988823142667, "high_lr": 0.0003, "low_lr": 6e-06, "step": 1330 }, { "epoch": 3.4976988823142667, "high_lr": 0.0003, "low_lr": 6e-06, "step": 1330 }, { "epoch": 3.4976988823142667, "high_lr": 0.0003, "low_lr": 6e-06, "step": 1330 }, { "epoch": 3.4976988823142667, "high_lr": 0.0003, "low_lr": 6e-06, "step": 1330 }, { "epoch": 3.4976988823142667, "high_lr": 0.0003, "low_lr": 6e-06, "step": 1330 }, { "epoch": 3.4976988823142667, "high_lr": 0.0003, "low_lr": 6e-06, "step": 1330 }, { "epoch": 3.4976988823142667, "high_lr": 0.0003, "low_lr": 6e-06, "step": 1330 }, { "epoch": 3.4976988823142667, "high_lr": 0.0003, "low_lr": 6e-06, "step": 1330 }, { "epoch": 3.500328731097962, "grad_norm": 1.3514699935913086, "learning_rate": 0.00029947368421052637, "loss": 1.2483, "step": 1331 }, { "epoch": 3.500328731097962, "high_lr": 0.00029947368421052637, "low_lr": 5.989473684210527e-06, "step": 1331 }, { "epoch": 3.500328731097962, "high_lr": 0.00029947368421052637, "low_lr": 5.989473684210527e-06, "step": 1331 }, { "epoch": 3.500328731097962, "high_lr": 0.00029947368421052637, "low_lr": 5.989473684210527e-06, "step": 1331 }, { "epoch": 3.500328731097962, "high_lr": 0.00029947368421052637, "low_lr": 5.989473684210527e-06, "step": 1331 }, { "epoch": 3.500328731097962, "high_lr": 0.00029947368421052637, "low_lr": 5.989473684210527e-06, "step": 1331 }, { "epoch": 3.500328731097962, "high_lr": 0.00029947368421052637, "low_lr": 5.989473684210527e-06, "step": 1331 }, { "epoch": 3.500328731097962, "high_lr": 0.00029947368421052637, "low_lr": 5.989473684210527e-06, "step": 1331 }, { "epoch": 3.500328731097962, "high_lr": 0.00029947368421052637, "low_lr": 5.989473684210527e-06, "step": 1331 }, { "epoch": 3.502958579881657, "grad_norm": 1.5310125350952148, "learning_rate": 0.00029894736842105265, "loss": 1.321, "step": 1332 }, { "epoch": 3.502958579881657, "high_lr": 0.00029894736842105265, "low_lr": 5.978947368421053e-06, "step": 1332 }, { "epoch": 3.502958579881657, "high_lr": 0.00029894736842105265, "low_lr": 5.978947368421053e-06, "step": 1332 }, { "epoch": 3.502958579881657, "high_lr": 0.00029894736842105265, "low_lr": 5.978947368421053e-06, "step": 1332 }, { "epoch": 3.502958579881657, "high_lr": 0.00029894736842105265, "low_lr": 5.978947368421053e-06, "step": 1332 }, { "epoch": 3.502958579881657, "high_lr": 0.00029894736842105265, "low_lr": 5.978947368421053e-06, "step": 1332 }, { "epoch": 3.502958579881657, "high_lr": 0.00029894736842105265, "low_lr": 5.978947368421053e-06, "step": 1332 }, { "epoch": 3.502958579881657, "high_lr": 0.00029894736842105265, "low_lr": 5.978947368421053e-06, "step": 1332 }, { "epoch": 3.502958579881657, "high_lr": 0.00029894736842105265, "low_lr": 5.978947368421053e-06, "step": 1332 }, { "epoch": 3.5055884286653516, "grad_norm": 1.3913053274154663, "learning_rate": 0.00029842105263157894, "loss": 1.2846, "step": 1333 }, { "epoch": 3.5055884286653516, "high_lr": 0.00029842105263157894, "low_lr": 5.968421052631579e-06, "step": 1333 }, { "epoch": 3.5055884286653516, "high_lr": 0.00029842105263157894, "low_lr": 5.968421052631579e-06, "step": 1333 }, { "epoch": 3.5055884286653516, "high_lr": 0.00029842105263157894, "low_lr": 5.968421052631579e-06, "step": 1333 }, { "epoch": 3.5055884286653516, "high_lr": 0.00029842105263157894, "low_lr": 5.968421052631579e-06, "step": 1333 }, { "epoch": 3.5055884286653516, "high_lr": 0.00029842105263157894, "low_lr": 5.968421052631579e-06, "step": 1333 }, { "epoch": 3.5055884286653516, "high_lr": 0.00029842105263157894, "low_lr": 5.968421052631579e-06, "step": 1333 }, { "epoch": 3.5055884286653516, "high_lr": 0.00029842105263157894, "low_lr": 5.968421052631579e-06, "step": 1333 }, { "epoch": 3.5055884286653516, "high_lr": 0.00029842105263157894, "low_lr": 5.968421052631579e-06, "step": 1333 }, { "epoch": 3.5082182774490467, "grad_norm": 1.4891670942306519, "learning_rate": 0.0002978947368421053, "loss": 1.294, "step": 1334 }, { "epoch": 3.5082182774490467, "high_lr": 0.0002978947368421053, "low_lr": 5.9578947368421055e-06, "step": 1334 }, { "epoch": 3.5082182774490467, "high_lr": 0.0002978947368421053, "low_lr": 5.9578947368421055e-06, "step": 1334 }, { "epoch": 3.5082182774490467, "high_lr": 0.0002978947368421053, "low_lr": 5.9578947368421055e-06, "step": 1334 }, { "epoch": 3.5082182774490467, "high_lr": 0.0002978947368421053, "low_lr": 5.9578947368421055e-06, "step": 1334 }, { "epoch": 3.5082182774490467, "high_lr": 0.0002978947368421053, "low_lr": 5.9578947368421055e-06, "step": 1334 }, { "epoch": 3.5082182774490467, "high_lr": 0.0002978947368421053, "low_lr": 5.9578947368421055e-06, "step": 1334 }, { "epoch": 3.5082182774490467, "high_lr": 0.0002978947368421053, "low_lr": 5.9578947368421055e-06, "step": 1334 }, { "epoch": 3.5082182774490467, "high_lr": 0.0002978947368421053, "low_lr": 5.9578947368421055e-06, "step": 1334 }, { "epoch": 3.510848126232742, "grad_norm": 1.3192452192306519, "learning_rate": 0.00029736842105263157, "loss": 1.3118, "step": 1335 }, { "epoch": 3.510848126232742, "high_lr": 0.00029736842105263157, "low_lr": 5.947368421052632e-06, "step": 1335 }, { "epoch": 3.510848126232742, "high_lr": 0.00029736842105263157, "low_lr": 5.947368421052632e-06, "step": 1335 }, { "epoch": 3.510848126232742, "high_lr": 0.00029736842105263157, "low_lr": 5.947368421052632e-06, "step": 1335 }, { "epoch": 3.510848126232742, "high_lr": 0.00029736842105263157, "low_lr": 5.947368421052632e-06, "step": 1335 }, { "epoch": 3.510848126232742, "high_lr": 0.00029736842105263157, "low_lr": 5.947368421052632e-06, "step": 1335 }, { "epoch": 3.510848126232742, "high_lr": 0.00029736842105263157, "low_lr": 5.947368421052632e-06, "step": 1335 }, { "epoch": 3.510848126232742, "high_lr": 0.00029736842105263157, "low_lr": 5.947368421052632e-06, "step": 1335 }, { "epoch": 3.510848126232742, "high_lr": 0.00029736842105263157, "low_lr": 5.947368421052632e-06, "step": 1335 }, { "epoch": 3.5134779750164364, "grad_norm": 1.4548825025558472, "learning_rate": 0.0002968421052631579, "loss": 1.2824, "step": 1336 }, { "epoch": 3.5134779750164364, "high_lr": 0.0002968421052631579, "low_lr": 5.936842105263159e-06, "step": 1336 }, { "epoch": 3.5134779750164364, "high_lr": 0.0002968421052631579, "low_lr": 5.936842105263159e-06, "step": 1336 }, { "epoch": 3.5134779750164364, "high_lr": 0.0002968421052631579, "low_lr": 5.936842105263159e-06, "step": 1336 }, { "epoch": 3.5134779750164364, "high_lr": 0.0002968421052631579, "low_lr": 5.936842105263159e-06, "step": 1336 }, { "epoch": 3.5134779750164364, "high_lr": 0.0002968421052631579, "low_lr": 5.936842105263159e-06, "step": 1336 }, { "epoch": 3.5134779750164364, "high_lr": 0.0002968421052631579, "low_lr": 5.936842105263159e-06, "step": 1336 }, { "epoch": 3.5134779750164364, "high_lr": 0.0002968421052631579, "low_lr": 5.936842105263159e-06, "step": 1336 }, { "epoch": 3.5134779750164364, "high_lr": 0.0002968421052631579, "low_lr": 5.936842105263159e-06, "step": 1336 }, { "epoch": 3.5161078238001315, "grad_norm": 1.349829912185669, "learning_rate": 0.0002963157894736842, "loss": 1.2968, "step": 1337 }, { "epoch": 3.5161078238001315, "high_lr": 0.0002963157894736842, "low_lr": 5.9263157894736844e-06, "step": 1337 }, { "epoch": 3.5161078238001315, "high_lr": 0.0002963157894736842, "low_lr": 5.9263157894736844e-06, "step": 1337 }, { "epoch": 3.5161078238001315, "high_lr": 0.0002963157894736842, "low_lr": 5.9263157894736844e-06, "step": 1337 }, { "epoch": 3.5161078238001315, "high_lr": 0.0002963157894736842, "low_lr": 5.9263157894736844e-06, "step": 1337 }, { "epoch": 3.5161078238001315, "high_lr": 0.0002963157894736842, "low_lr": 5.9263157894736844e-06, "step": 1337 }, { "epoch": 3.5161078238001315, "high_lr": 0.0002963157894736842, "low_lr": 5.9263157894736844e-06, "step": 1337 }, { "epoch": 3.5161078238001315, "high_lr": 0.0002963157894736842, "low_lr": 5.9263157894736844e-06, "step": 1337 }, { "epoch": 3.5161078238001315, "high_lr": 0.0002963157894736842, "low_lr": 5.9263157894736844e-06, "step": 1337 }, { "epoch": 3.5187376725838266, "grad_norm": 1.38246750831604, "learning_rate": 0.00029578947368421053, "loss": 1.2769, "step": 1338 }, { "epoch": 3.5187376725838266, "high_lr": 0.00029578947368421053, "low_lr": 5.915789473684212e-06, "step": 1338 }, { "epoch": 3.5187376725838266, "high_lr": 0.00029578947368421053, "low_lr": 5.915789473684212e-06, "step": 1338 }, { "epoch": 3.5187376725838266, "high_lr": 0.00029578947368421053, "low_lr": 5.915789473684212e-06, "step": 1338 }, { "epoch": 3.5187376725838266, "high_lr": 0.00029578947368421053, "low_lr": 5.915789473684212e-06, "step": 1338 }, { "epoch": 3.5187376725838266, "high_lr": 0.00029578947368421053, "low_lr": 5.915789473684212e-06, "step": 1338 }, { "epoch": 3.5187376725838266, "high_lr": 0.00029578947368421053, "low_lr": 5.915789473684212e-06, "step": 1338 }, { "epoch": 3.5187376725838266, "high_lr": 0.00029578947368421053, "low_lr": 5.915789473684212e-06, "step": 1338 }, { "epoch": 3.5187376725838266, "high_lr": 0.00029578947368421053, "low_lr": 5.915789473684212e-06, "step": 1338 }, { "epoch": 3.5213675213675213, "grad_norm": 1.3541733026504517, "learning_rate": 0.0002952631578947368, "loss": 1.3354, "step": 1339 }, { "epoch": 3.5213675213675213, "high_lr": 0.0002952631578947368, "low_lr": 5.905263157894737e-06, "step": 1339 }, { "epoch": 3.5213675213675213, "high_lr": 0.0002952631578947368, "low_lr": 5.905263157894737e-06, "step": 1339 }, { "epoch": 3.5213675213675213, "high_lr": 0.0002952631578947368, "low_lr": 5.905263157894737e-06, "step": 1339 }, { "epoch": 3.5213675213675213, "high_lr": 0.0002952631578947368, "low_lr": 5.905263157894737e-06, "step": 1339 }, { "epoch": 3.5213675213675213, "high_lr": 0.0002952631578947368, "low_lr": 5.905263157894737e-06, "step": 1339 }, { "epoch": 3.5213675213675213, "high_lr": 0.0002952631578947368, "low_lr": 5.905263157894737e-06, "step": 1339 }, { "epoch": 3.5213675213675213, "high_lr": 0.0002952631578947368, "low_lr": 5.905263157894737e-06, "step": 1339 }, { "epoch": 3.5213675213675213, "high_lr": 0.0002952631578947368, "low_lr": 5.905263157894737e-06, "step": 1339 }, { "epoch": 3.5239973701512164, "grad_norm": 1.4464415311813354, "learning_rate": 0.00029473684210526316, "loss": 1.3037, "step": 1340 }, { "epoch": 3.5239973701512164, "high_lr": 0.00029473684210526316, "low_lr": 5.8947368421052634e-06, "step": 1340 }, { "epoch": 3.5239973701512164, "high_lr": 0.00029473684210526316, "low_lr": 5.8947368421052634e-06, "step": 1340 }, { "epoch": 3.5239973701512164, "high_lr": 0.00029473684210526316, "low_lr": 5.8947368421052634e-06, "step": 1340 }, { "epoch": 3.5239973701512164, "high_lr": 0.00029473684210526316, "low_lr": 5.8947368421052634e-06, "step": 1340 }, { "epoch": 3.5239973701512164, "high_lr": 0.00029473684210526316, "low_lr": 5.8947368421052634e-06, "step": 1340 }, { "epoch": 3.5239973701512164, "high_lr": 0.00029473684210526316, "low_lr": 5.8947368421052634e-06, "step": 1340 }, { "epoch": 3.5239973701512164, "high_lr": 0.00029473684210526316, "low_lr": 5.8947368421052634e-06, "step": 1340 }, { "epoch": 3.5239973701512164, "high_lr": 0.00029473684210526316, "low_lr": 5.8947368421052634e-06, "step": 1340 }, { "epoch": 3.5266272189349115, "grad_norm": 1.4825074672698975, "learning_rate": 0.0002942105263157895, "loss": 1.2406, "step": 1341 }, { "epoch": 3.5266272189349115, "high_lr": 0.0002942105263157895, "low_lr": 5.88421052631579e-06, "step": 1341 }, { "epoch": 3.5266272189349115, "high_lr": 0.0002942105263157895, "low_lr": 5.88421052631579e-06, "step": 1341 }, { "epoch": 3.5266272189349115, "high_lr": 0.0002942105263157895, "low_lr": 5.88421052631579e-06, "step": 1341 }, { "epoch": 3.5266272189349115, "high_lr": 0.0002942105263157895, "low_lr": 5.88421052631579e-06, "step": 1341 }, { "epoch": 3.5266272189349115, "high_lr": 0.0002942105263157895, "low_lr": 5.88421052631579e-06, "step": 1341 }, { "epoch": 3.5266272189349115, "high_lr": 0.0002942105263157895, "low_lr": 5.88421052631579e-06, "step": 1341 }, { "epoch": 3.5266272189349115, "high_lr": 0.0002942105263157895, "low_lr": 5.88421052631579e-06, "step": 1341 }, { "epoch": 3.5266272189349115, "high_lr": 0.0002942105263157895, "low_lr": 5.88421052631579e-06, "step": 1341 }, { "epoch": 3.529257067718606, "grad_norm": 1.2963181734085083, "learning_rate": 0.0002936842105263158, "loss": 1.2894, "step": 1342 }, { "epoch": 3.529257067718606, "high_lr": 0.0002936842105263158, "low_lr": 5.873684210526316e-06, "step": 1342 }, { "epoch": 3.529257067718606, "high_lr": 0.0002936842105263158, "low_lr": 5.873684210526316e-06, "step": 1342 }, { "epoch": 3.529257067718606, "high_lr": 0.0002936842105263158, "low_lr": 5.873684210526316e-06, "step": 1342 }, { "epoch": 3.529257067718606, "high_lr": 0.0002936842105263158, "low_lr": 5.873684210526316e-06, "step": 1342 }, { "epoch": 3.529257067718606, "high_lr": 0.0002936842105263158, "low_lr": 5.873684210526316e-06, "step": 1342 }, { "epoch": 3.529257067718606, "high_lr": 0.0002936842105263158, "low_lr": 5.873684210526316e-06, "step": 1342 }, { "epoch": 3.529257067718606, "high_lr": 0.0002936842105263158, "low_lr": 5.873684210526316e-06, "step": 1342 }, { "epoch": 3.529257067718606, "high_lr": 0.0002936842105263158, "low_lr": 5.873684210526316e-06, "step": 1342 }, { "epoch": 3.5318869165023012, "grad_norm": 1.4152305126190186, "learning_rate": 0.0002931578947368421, "loss": 1.3201, "step": 1343 }, { "epoch": 3.5318869165023012, "high_lr": 0.0002931578947368421, "low_lr": 5.863157894736842e-06, "step": 1343 }, { "epoch": 3.5318869165023012, "high_lr": 0.0002931578947368421, "low_lr": 5.863157894736842e-06, "step": 1343 }, { "epoch": 3.5318869165023012, "high_lr": 0.0002931578947368421, "low_lr": 5.863157894736842e-06, "step": 1343 }, { "epoch": 3.5318869165023012, "high_lr": 0.0002931578947368421, "low_lr": 5.863157894736842e-06, "step": 1343 }, { "epoch": 3.5318869165023012, "high_lr": 0.0002931578947368421, "low_lr": 5.863157894736842e-06, "step": 1343 }, { "epoch": 3.5318869165023012, "high_lr": 0.0002931578947368421, "low_lr": 5.863157894736842e-06, "step": 1343 }, { "epoch": 3.5318869165023012, "high_lr": 0.0002931578947368421, "low_lr": 5.863157894736842e-06, "step": 1343 }, { "epoch": 3.5318869165023012, "high_lr": 0.0002931578947368421, "low_lr": 5.863157894736842e-06, "step": 1343 }, { "epoch": 3.534516765285996, "grad_norm": 1.319513201713562, "learning_rate": 0.0002926315789473684, "loss": 1.3224, "step": 1344 }, { "epoch": 3.534516765285996, "high_lr": 0.0002926315789473684, "low_lr": 5.852631578947369e-06, "step": 1344 }, { "epoch": 3.534516765285996, "high_lr": 0.0002926315789473684, "low_lr": 5.852631578947369e-06, "step": 1344 }, { "epoch": 3.534516765285996, "high_lr": 0.0002926315789473684, "low_lr": 5.852631578947369e-06, "step": 1344 }, { "epoch": 3.534516765285996, "high_lr": 0.0002926315789473684, "low_lr": 5.852631578947369e-06, "step": 1344 }, { "epoch": 3.534516765285996, "high_lr": 0.0002926315789473684, "low_lr": 5.852631578947369e-06, "step": 1344 }, { "epoch": 3.534516765285996, "high_lr": 0.0002926315789473684, "low_lr": 5.852631578947369e-06, "step": 1344 }, { "epoch": 3.534516765285996, "high_lr": 0.0002926315789473684, "low_lr": 5.852631578947369e-06, "step": 1344 }, { "epoch": 3.534516765285996, "high_lr": 0.0002926315789473684, "low_lr": 5.852631578947369e-06, "step": 1344 }, { "epoch": 3.537146614069691, "grad_norm": 1.3924484252929688, "learning_rate": 0.00029210526315789475, "loss": 1.2676, "step": 1345 }, { "epoch": 3.537146614069691, "high_lr": 0.00029210526315789475, "low_lr": 5.842105263157896e-06, "step": 1345 }, { "epoch": 3.537146614069691, "high_lr": 0.00029210526315789475, "low_lr": 5.842105263157896e-06, "step": 1345 }, { "epoch": 3.537146614069691, "high_lr": 0.00029210526315789475, "low_lr": 5.842105263157896e-06, "step": 1345 }, { "epoch": 3.537146614069691, "high_lr": 0.00029210526315789475, "low_lr": 5.842105263157896e-06, "step": 1345 }, { "epoch": 3.537146614069691, "high_lr": 0.00029210526315789475, "low_lr": 5.842105263157896e-06, "step": 1345 }, { "epoch": 3.537146614069691, "high_lr": 0.00029210526315789475, "low_lr": 5.842105263157896e-06, "step": 1345 }, { "epoch": 3.537146614069691, "high_lr": 0.00029210526315789475, "low_lr": 5.842105263157896e-06, "step": 1345 }, { "epoch": 3.537146614069691, "high_lr": 0.00029210526315789475, "low_lr": 5.842105263157896e-06, "step": 1345 }, { "epoch": 3.539776462853386, "grad_norm": 1.495673656463623, "learning_rate": 0.00029157894736842104, "loss": 1.3082, "step": 1346 }, { "epoch": 3.539776462853386, "high_lr": 0.00029157894736842104, "low_lr": 5.831578947368421e-06, "step": 1346 }, { "epoch": 3.539776462853386, "high_lr": 0.00029157894736842104, "low_lr": 5.831578947368421e-06, "step": 1346 }, { "epoch": 3.539776462853386, "high_lr": 0.00029157894736842104, "low_lr": 5.831578947368421e-06, "step": 1346 }, { "epoch": 3.539776462853386, "high_lr": 0.00029157894736842104, "low_lr": 5.831578947368421e-06, "step": 1346 }, { "epoch": 3.539776462853386, "high_lr": 0.00029157894736842104, "low_lr": 5.831578947368421e-06, "step": 1346 }, { "epoch": 3.539776462853386, "high_lr": 0.00029157894736842104, "low_lr": 5.831578947368421e-06, "step": 1346 }, { "epoch": 3.539776462853386, "high_lr": 0.00029157894736842104, "low_lr": 5.831578947368421e-06, "step": 1346 }, { "epoch": 3.539776462853386, "high_lr": 0.00029157894736842104, "low_lr": 5.831578947368421e-06, "step": 1346 }, { "epoch": 3.5424063116370808, "grad_norm": 1.3282442092895508, "learning_rate": 0.0002910526315789474, "loss": 1.3114, "step": 1347 }, { "epoch": 3.5424063116370808, "high_lr": 0.0002910526315789474, "low_lr": 5.8210526315789486e-06, "step": 1347 }, { "epoch": 3.5424063116370808, "high_lr": 0.0002910526315789474, "low_lr": 5.8210526315789486e-06, "step": 1347 }, { "epoch": 3.5424063116370808, "high_lr": 0.0002910526315789474, "low_lr": 5.8210526315789486e-06, "step": 1347 }, { "epoch": 3.5424063116370808, "high_lr": 0.0002910526315789474, "low_lr": 5.8210526315789486e-06, "step": 1347 }, { "epoch": 3.5424063116370808, "high_lr": 0.0002910526315789474, "low_lr": 5.8210526315789486e-06, "step": 1347 }, { "epoch": 3.5424063116370808, "high_lr": 0.0002910526315789474, "low_lr": 5.8210526315789486e-06, "step": 1347 }, { "epoch": 3.5424063116370808, "high_lr": 0.0002910526315789474, "low_lr": 5.8210526315789486e-06, "step": 1347 }, { "epoch": 3.5424063116370808, "high_lr": 0.0002910526315789474, "low_lr": 5.8210526315789486e-06, "step": 1347 }, { "epoch": 3.545036160420776, "grad_norm": 1.2868953943252563, "learning_rate": 0.0002905263157894737, "loss": 1.3005, "step": 1348 }, { "epoch": 3.545036160420776, "high_lr": 0.0002905263157894737, "low_lr": 5.810526315789474e-06, "step": 1348 }, { "epoch": 3.545036160420776, "high_lr": 0.0002905263157894737, "low_lr": 5.810526315789474e-06, "step": 1348 }, { "epoch": 3.545036160420776, "high_lr": 0.0002905263157894737, "low_lr": 5.810526315789474e-06, "step": 1348 }, { "epoch": 3.545036160420776, "high_lr": 0.0002905263157894737, "low_lr": 5.810526315789474e-06, "step": 1348 }, { "epoch": 3.545036160420776, "high_lr": 0.0002905263157894737, "low_lr": 5.810526315789474e-06, "step": 1348 }, { "epoch": 3.545036160420776, "high_lr": 0.0002905263157894737, "low_lr": 5.810526315789474e-06, "step": 1348 }, { "epoch": 3.545036160420776, "high_lr": 0.0002905263157894737, "low_lr": 5.810526315789474e-06, "step": 1348 }, { "epoch": 3.545036160420776, "high_lr": 0.0002905263157894737, "low_lr": 5.810526315789474e-06, "step": 1348 }, { "epoch": 3.5476660092044705, "grad_norm": 1.4387948513031006, "learning_rate": 0.00029, "loss": 1.2966, "step": 1349 }, { "epoch": 3.5476660092044705, "high_lr": 0.00029, "low_lr": 5.8e-06, "step": 1349 }, { "epoch": 3.5476660092044705, "high_lr": 0.00029, "low_lr": 5.8e-06, "step": 1349 }, { "epoch": 3.5476660092044705, "high_lr": 0.00029, "low_lr": 5.8e-06, "step": 1349 }, { "epoch": 3.5476660092044705, "high_lr": 0.00029, "low_lr": 5.8e-06, "step": 1349 }, { "epoch": 3.5476660092044705, "high_lr": 0.00029, "low_lr": 5.8e-06, "step": 1349 }, { "epoch": 3.5476660092044705, "high_lr": 0.00029, "low_lr": 5.8e-06, "step": 1349 }, { "epoch": 3.5476660092044705, "high_lr": 0.00029, "low_lr": 5.8e-06, "step": 1349 }, { "epoch": 3.5476660092044705, "high_lr": 0.00029, "low_lr": 5.8e-06, "step": 1349 }, { "epoch": 3.5502958579881656, "grad_norm": 1.4605005979537964, "learning_rate": 0.00028947368421052634, "loss": 1.3046, "step": 1350 }, { "epoch": 3.5502958579881656, "high_lr": 0.00028947368421052634, "low_lr": 5.789473684210527e-06, "step": 1350 }, { "epoch": 3.5502958579881656, "high_lr": 0.00028947368421052634, "low_lr": 5.789473684210527e-06, "step": 1350 }, { "epoch": 3.5502958579881656, "high_lr": 0.00028947368421052634, "low_lr": 5.789473684210527e-06, "step": 1350 }, { "epoch": 3.5502958579881656, "high_lr": 0.00028947368421052634, "low_lr": 5.789473684210527e-06, "step": 1350 }, { "epoch": 3.5502958579881656, "high_lr": 0.00028947368421052634, "low_lr": 5.789473684210527e-06, "step": 1350 }, { "epoch": 3.5502958579881656, "high_lr": 0.00028947368421052634, "low_lr": 5.789473684210527e-06, "step": 1350 }, { "epoch": 3.5502958579881656, "high_lr": 0.00028947368421052634, "low_lr": 5.789473684210527e-06, "step": 1350 }, { "epoch": 3.5502958579881656, "high_lr": 0.00028947368421052634, "low_lr": 5.789473684210527e-06, "step": 1350 }, { "epoch": 3.5529257067718607, "grad_norm": 1.4000284671783447, "learning_rate": 0.00028894736842105263, "loss": 1.3419, "step": 1351 }, { "epoch": 3.5529257067718607, "high_lr": 0.00028894736842105263, "low_lr": 5.778947368421053e-06, "step": 1351 }, { "epoch": 3.5529257067718607, "high_lr": 0.00028894736842105263, "low_lr": 5.778947368421053e-06, "step": 1351 }, { "epoch": 3.5529257067718607, "high_lr": 0.00028894736842105263, "low_lr": 5.778947368421053e-06, "step": 1351 }, { "epoch": 3.5529257067718607, "high_lr": 0.00028894736842105263, "low_lr": 5.778947368421053e-06, "step": 1351 }, { "epoch": 3.5529257067718607, "high_lr": 0.00028894736842105263, "low_lr": 5.778947368421053e-06, "step": 1351 }, { "epoch": 3.5529257067718607, "high_lr": 0.00028894736842105263, "low_lr": 5.778947368421053e-06, "step": 1351 }, { "epoch": 3.5529257067718607, "high_lr": 0.00028894736842105263, "low_lr": 5.778947368421053e-06, "step": 1351 }, { "epoch": 3.5529257067718607, "high_lr": 0.00028894736842105263, "low_lr": 5.778947368421053e-06, "step": 1351 }, { "epoch": 3.5555555555555554, "grad_norm": 1.4252368211746216, "learning_rate": 0.00028842105263157897, "loss": 1.3249, "step": 1352 }, { "epoch": 3.5555555555555554, "high_lr": 0.00028842105263157897, "low_lr": 5.76842105263158e-06, "step": 1352 }, { "epoch": 3.5555555555555554, "high_lr": 0.00028842105263157897, "low_lr": 5.76842105263158e-06, "step": 1352 }, { "epoch": 3.5555555555555554, "high_lr": 0.00028842105263157897, "low_lr": 5.76842105263158e-06, "step": 1352 }, { "epoch": 3.5555555555555554, "high_lr": 0.00028842105263157897, "low_lr": 5.76842105263158e-06, "step": 1352 }, { "epoch": 3.5555555555555554, "high_lr": 0.00028842105263157897, "low_lr": 5.76842105263158e-06, "step": 1352 }, { "epoch": 3.5555555555555554, "high_lr": 0.00028842105263157897, "low_lr": 5.76842105263158e-06, "step": 1352 }, { "epoch": 3.5555555555555554, "high_lr": 0.00028842105263157897, "low_lr": 5.76842105263158e-06, "step": 1352 }, { "epoch": 3.5555555555555554, "high_lr": 0.00028842105263157897, "low_lr": 5.76842105263158e-06, "step": 1352 }, { "epoch": 3.5581854043392505, "grad_norm": 1.4046462774276733, "learning_rate": 0.00028789473684210525, "loss": 1.2866, "step": 1353 }, { "epoch": 3.5581854043392505, "high_lr": 0.00028789473684210525, "low_lr": 5.757894736842106e-06, "step": 1353 }, { "epoch": 3.5581854043392505, "high_lr": 0.00028789473684210525, "low_lr": 5.757894736842106e-06, "step": 1353 }, { "epoch": 3.5581854043392505, "high_lr": 0.00028789473684210525, "low_lr": 5.757894736842106e-06, "step": 1353 }, { "epoch": 3.5581854043392505, "high_lr": 0.00028789473684210525, "low_lr": 5.757894736842106e-06, "step": 1353 }, { "epoch": 3.5581854043392505, "high_lr": 0.00028789473684210525, "low_lr": 5.757894736842106e-06, "step": 1353 }, { "epoch": 3.5581854043392505, "high_lr": 0.00028789473684210525, "low_lr": 5.757894736842106e-06, "step": 1353 }, { "epoch": 3.5581854043392505, "high_lr": 0.00028789473684210525, "low_lr": 5.757894736842106e-06, "step": 1353 }, { "epoch": 3.5581854043392505, "high_lr": 0.00028789473684210525, "low_lr": 5.757894736842106e-06, "step": 1353 }, { "epoch": 3.5608152531229456, "grad_norm": 1.5079935789108276, "learning_rate": 0.0002873684210526316, "loss": 1.2601, "step": 1354 }, { "epoch": 3.5608152531229456, "high_lr": 0.0002873684210526316, "low_lr": 5.747368421052633e-06, "step": 1354 }, { "epoch": 3.5608152531229456, "high_lr": 0.0002873684210526316, "low_lr": 5.747368421052633e-06, "step": 1354 }, { "epoch": 3.5608152531229456, "high_lr": 0.0002873684210526316, "low_lr": 5.747368421052633e-06, "step": 1354 }, { "epoch": 3.5608152531229456, "high_lr": 0.0002873684210526316, "low_lr": 5.747368421052633e-06, "step": 1354 }, { "epoch": 3.5608152531229456, "high_lr": 0.0002873684210526316, "low_lr": 5.747368421052633e-06, "step": 1354 }, { "epoch": 3.5608152531229456, "high_lr": 0.0002873684210526316, "low_lr": 5.747368421052633e-06, "step": 1354 }, { "epoch": 3.5608152531229456, "high_lr": 0.0002873684210526316, "low_lr": 5.747368421052633e-06, "step": 1354 }, { "epoch": 3.5608152531229456, "high_lr": 0.0002873684210526316, "low_lr": 5.747368421052633e-06, "step": 1354 }, { "epoch": 3.56344510190664, "grad_norm": 1.4072425365447998, "learning_rate": 0.0002868421052631579, "loss": 1.3042, "step": 1355 }, { "epoch": 3.56344510190664, "high_lr": 0.0002868421052631579, "low_lr": 5.736842105263158e-06, "step": 1355 }, { "epoch": 3.56344510190664, "high_lr": 0.0002868421052631579, "low_lr": 5.736842105263158e-06, "step": 1355 }, { "epoch": 3.56344510190664, "high_lr": 0.0002868421052631579, "low_lr": 5.736842105263158e-06, "step": 1355 }, { "epoch": 3.56344510190664, "high_lr": 0.0002868421052631579, "low_lr": 5.736842105263158e-06, "step": 1355 }, { "epoch": 3.56344510190664, "high_lr": 0.0002868421052631579, "low_lr": 5.736842105263158e-06, "step": 1355 }, { "epoch": 3.56344510190664, "high_lr": 0.0002868421052631579, "low_lr": 5.736842105263158e-06, "step": 1355 }, { "epoch": 3.56344510190664, "high_lr": 0.0002868421052631579, "low_lr": 5.736842105263158e-06, "step": 1355 }, { "epoch": 3.56344510190664, "high_lr": 0.0002868421052631579, "low_lr": 5.736842105263158e-06, "step": 1355 }, { "epoch": 3.5660749506903353, "grad_norm": 1.595568299293518, "learning_rate": 0.0002863157894736842, "loss": 1.3556, "step": 1356 }, { "epoch": 3.5660749506903353, "high_lr": 0.0002863157894736842, "low_lr": 5.726315789473685e-06, "step": 1356 }, { "epoch": 3.5660749506903353, "high_lr": 0.0002863157894736842, "low_lr": 5.726315789473685e-06, "step": 1356 }, { "epoch": 3.5660749506903353, "high_lr": 0.0002863157894736842, "low_lr": 5.726315789473685e-06, "step": 1356 }, { "epoch": 3.5660749506903353, "high_lr": 0.0002863157894736842, "low_lr": 5.726315789473685e-06, "step": 1356 }, { "epoch": 3.5660749506903353, "high_lr": 0.0002863157894736842, "low_lr": 5.726315789473685e-06, "step": 1356 }, { "epoch": 3.5660749506903353, "high_lr": 0.0002863157894736842, "low_lr": 5.726315789473685e-06, "step": 1356 }, { "epoch": 3.5660749506903353, "high_lr": 0.0002863157894736842, "low_lr": 5.726315789473685e-06, "step": 1356 }, { "epoch": 3.5660749506903353, "high_lr": 0.0002863157894736842, "low_lr": 5.726315789473685e-06, "step": 1356 }, { "epoch": 3.5687047994740304, "grad_norm": 1.3672691583633423, "learning_rate": 0.00028578947368421056, "loss": 1.2951, "step": 1357 }, { "epoch": 3.5687047994740304, "high_lr": 0.00028578947368421056, "low_lr": 5.715789473684211e-06, "step": 1357 }, { "epoch": 3.5687047994740304, "high_lr": 0.00028578947368421056, "low_lr": 5.715789473684211e-06, "step": 1357 }, { "epoch": 3.5687047994740304, "high_lr": 0.00028578947368421056, "low_lr": 5.715789473684211e-06, "step": 1357 }, { "epoch": 3.5687047994740304, "high_lr": 0.00028578947368421056, "low_lr": 5.715789473684211e-06, "step": 1357 }, { "epoch": 3.5687047994740304, "high_lr": 0.00028578947368421056, "low_lr": 5.715789473684211e-06, "step": 1357 }, { "epoch": 3.5687047994740304, "high_lr": 0.00028578947368421056, "low_lr": 5.715789473684211e-06, "step": 1357 }, { "epoch": 3.5687047994740304, "high_lr": 0.00028578947368421056, "low_lr": 5.715789473684211e-06, "step": 1357 }, { "epoch": 3.5687047994740304, "high_lr": 0.00028578947368421056, "low_lr": 5.715789473684211e-06, "step": 1357 }, { "epoch": 3.571334648257725, "grad_norm": 1.4218196868896484, "learning_rate": 0.00028526315789473685, "loss": 1.2929, "step": 1358 }, { "epoch": 3.571334648257725, "high_lr": 0.00028526315789473685, "low_lr": 5.705263157894737e-06, "step": 1358 }, { "epoch": 3.571334648257725, "high_lr": 0.00028526315789473685, "low_lr": 5.705263157894737e-06, "step": 1358 }, { "epoch": 3.571334648257725, "high_lr": 0.00028526315789473685, "low_lr": 5.705263157894737e-06, "step": 1358 }, { "epoch": 3.571334648257725, "high_lr": 0.00028526315789473685, "low_lr": 5.705263157894737e-06, "step": 1358 }, { "epoch": 3.571334648257725, "high_lr": 0.00028526315789473685, "low_lr": 5.705263157894737e-06, "step": 1358 }, { "epoch": 3.571334648257725, "high_lr": 0.00028526315789473685, "low_lr": 5.705263157894737e-06, "step": 1358 }, { "epoch": 3.571334648257725, "high_lr": 0.00028526315789473685, "low_lr": 5.705263157894737e-06, "step": 1358 }, { "epoch": 3.571334648257725, "high_lr": 0.00028526315789473685, "low_lr": 5.705263157894737e-06, "step": 1358 }, { "epoch": 3.57396449704142, "grad_norm": 1.3965482711791992, "learning_rate": 0.0002847368421052632, "loss": 1.2911, "step": 1359 }, { "epoch": 3.57396449704142, "high_lr": 0.0002847368421052632, "low_lr": 5.694736842105264e-06, "step": 1359 }, { "epoch": 3.57396449704142, "high_lr": 0.0002847368421052632, "low_lr": 5.694736842105264e-06, "step": 1359 }, { "epoch": 3.57396449704142, "high_lr": 0.0002847368421052632, "low_lr": 5.694736842105264e-06, "step": 1359 }, { "epoch": 3.57396449704142, "high_lr": 0.0002847368421052632, "low_lr": 5.694736842105264e-06, "step": 1359 }, { "epoch": 3.57396449704142, "high_lr": 0.0002847368421052632, "low_lr": 5.694736842105264e-06, "step": 1359 }, { "epoch": 3.57396449704142, "high_lr": 0.0002847368421052632, "low_lr": 5.694736842105264e-06, "step": 1359 }, { "epoch": 3.57396449704142, "high_lr": 0.0002847368421052632, "low_lr": 5.694736842105264e-06, "step": 1359 }, { "epoch": 3.57396449704142, "high_lr": 0.0002847368421052632, "low_lr": 5.694736842105264e-06, "step": 1359 }, { "epoch": 3.5765943458251153, "grad_norm": 1.6971560716629028, "learning_rate": 0.00028421052631578947, "loss": 1.3143, "step": 1360 }, { "epoch": 3.5765943458251153, "high_lr": 0.00028421052631578947, "low_lr": 5.68421052631579e-06, "step": 1360 }, { "epoch": 3.5765943458251153, "high_lr": 0.00028421052631578947, "low_lr": 5.68421052631579e-06, "step": 1360 }, { "epoch": 3.5765943458251153, "high_lr": 0.00028421052631578947, "low_lr": 5.68421052631579e-06, "step": 1360 }, { "epoch": 3.5765943458251153, "high_lr": 0.00028421052631578947, "low_lr": 5.68421052631579e-06, "step": 1360 }, { "epoch": 3.5765943458251153, "high_lr": 0.00028421052631578947, "low_lr": 5.68421052631579e-06, "step": 1360 }, { "epoch": 3.5765943458251153, "high_lr": 0.00028421052631578947, "low_lr": 5.68421052631579e-06, "step": 1360 }, { "epoch": 3.5765943458251153, "high_lr": 0.00028421052631578947, "low_lr": 5.68421052631579e-06, "step": 1360 }, { "epoch": 3.5765943458251153, "high_lr": 0.00028421052631578947, "low_lr": 5.68421052631579e-06, "step": 1360 }, { "epoch": 3.57922419460881, "grad_norm": 1.4533029794692993, "learning_rate": 0.0002836842105263158, "loss": 1.3407, "step": 1361 }, { "epoch": 3.57922419460881, "high_lr": 0.0002836842105263158, "low_lr": 5.673684210526317e-06, "step": 1361 }, { "epoch": 3.57922419460881, "high_lr": 0.0002836842105263158, "low_lr": 5.673684210526317e-06, "step": 1361 }, { "epoch": 3.57922419460881, "high_lr": 0.0002836842105263158, "low_lr": 5.673684210526317e-06, "step": 1361 }, { "epoch": 3.57922419460881, "high_lr": 0.0002836842105263158, "low_lr": 5.673684210526317e-06, "step": 1361 }, { "epoch": 3.57922419460881, "high_lr": 0.0002836842105263158, "low_lr": 5.673684210526317e-06, "step": 1361 }, { "epoch": 3.57922419460881, "high_lr": 0.0002836842105263158, "low_lr": 5.673684210526317e-06, "step": 1361 }, { "epoch": 3.57922419460881, "high_lr": 0.0002836842105263158, "low_lr": 5.673684210526317e-06, "step": 1361 }, { "epoch": 3.57922419460881, "high_lr": 0.0002836842105263158, "low_lr": 5.673684210526317e-06, "step": 1361 }, { "epoch": 3.581854043392505, "grad_norm": 1.512963891029358, "learning_rate": 0.0002831578947368421, "loss": 1.2996, "step": 1362 }, { "epoch": 3.581854043392505, "high_lr": 0.0002831578947368421, "low_lr": 5.663157894736843e-06, "step": 1362 }, { "epoch": 3.581854043392505, "high_lr": 0.0002831578947368421, "low_lr": 5.663157894736843e-06, "step": 1362 }, { "epoch": 3.581854043392505, "high_lr": 0.0002831578947368421, "low_lr": 5.663157894736843e-06, "step": 1362 }, { "epoch": 3.581854043392505, "high_lr": 0.0002831578947368421, "low_lr": 5.663157894736843e-06, "step": 1362 }, { "epoch": 3.581854043392505, "high_lr": 0.0002831578947368421, "low_lr": 5.663157894736843e-06, "step": 1362 }, { "epoch": 3.581854043392505, "high_lr": 0.0002831578947368421, "low_lr": 5.663157894736843e-06, "step": 1362 }, { "epoch": 3.581854043392505, "high_lr": 0.0002831578947368421, "low_lr": 5.663157894736843e-06, "step": 1362 }, { "epoch": 3.581854043392505, "high_lr": 0.0002831578947368421, "low_lr": 5.663157894736843e-06, "step": 1362 }, { "epoch": 3.5844838921762, "grad_norm": 1.5167478322982788, "learning_rate": 0.0002826315789473684, "loss": 1.278, "step": 1363 }, { "epoch": 3.5844838921762, "high_lr": 0.0002826315789473684, "low_lr": 5.652631578947368e-06, "step": 1363 }, { "epoch": 3.5844838921762, "high_lr": 0.0002826315789473684, "low_lr": 5.652631578947368e-06, "step": 1363 }, { "epoch": 3.5844838921762, "high_lr": 0.0002826315789473684, "low_lr": 5.652631578947368e-06, "step": 1363 }, { "epoch": 3.5844838921762, "high_lr": 0.0002826315789473684, "low_lr": 5.652631578947368e-06, "step": 1363 }, { "epoch": 3.5844838921762, "high_lr": 0.0002826315789473684, "low_lr": 5.652631578947368e-06, "step": 1363 }, { "epoch": 3.5844838921762, "high_lr": 0.0002826315789473684, "low_lr": 5.652631578947368e-06, "step": 1363 }, { "epoch": 3.5844838921762, "high_lr": 0.0002826315789473684, "low_lr": 5.652631578947368e-06, "step": 1363 }, { "epoch": 3.5844838921762, "high_lr": 0.0002826315789473684, "low_lr": 5.652631578947368e-06, "step": 1363 }, { "epoch": 3.5871137409598948, "grad_norm": 1.4397393465042114, "learning_rate": 0.0002821052631578948, "loss": 1.3288, "step": 1364 }, { "epoch": 3.5871137409598948, "high_lr": 0.0002821052631578948, "low_lr": 5.642105263157895e-06, "step": 1364 }, { "epoch": 3.5871137409598948, "high_lr": 0.0002821052631578948, "low_lr": 5.642105263157895e-06, "step": 1364 }, { "epoch": 3.5871137409598948, "high_lr": 0.0002821052631578948, "low_lr": 5.642105263157895e-06, "step": 1364 }, { "epoch": 3.5871137409598948, "high_lr": 0.0002821052631578948, "low_lr": 5.642105263157895e-06, "step": 1364 }, { "epoch": 3.5871137409598948, "high_lr": 0.0002821052631578948, "low_lr": 5.642105263157895e-06, "step": 1364 }, { "epoch": 3.5871137409598948, "high_lr": 0.0002821052631578948, "low_lr": 5.642105263157895e-06, "step": 1364 }, { "epoch": 3.5871137409598948, "high_lr": 0.0002821052631578948, "low_lr": 5.642105263157895e-06, "step": 1364 }, { "epoch": 3.5871137409598948, "high_lr": 0.0002821052631578948, "low_lr": 5.642105263157895e-06, "step": 1364 }, { "epoch": 3.58974358974359, "grad_norm": 1.3969866037368774, "learning_rate": 0.00028157894736842106, "loss": 1.3335, "step": 1365 }, { "epoch": 3.58974358974359, "high_lr": 0.00028157894736842106, "low_lr": 5.631578947368422e-06, "step": 1365 }, { "epoch": 3.58974358974359, "high_lr": 0.00028157894736842106, "low_lr": 5.631578947368422e-06, "step": 1365 }, { "epoch": 3.58974358974359, "high_lr": 0.00028157894736842106, "low_lr": 5.631578947368422e-06, "step": 1365 }, { "epoch": 3.58974358974359, "high_lr": 0.00028157894736842106, "low_lr": 5.631578947368422e-06, "step": 1365 }, { "epoch": 3.58974358974359, "high_lr": 0.00028157894736842106, "low_lr": 5.631578947368422e-06, "step": 1365 }, { "epoch": 3.58974358974359, "high_lr": 0.00028157894736842106, "low_lr": 5.631578947368422e-06, "step": 1365 }, { "epoch": 3.58974358974359, "high_lr": 0.00028157894736842106, "low_lr": 5.631578947368422e-06, "step": 1365 }, { "epoch": 3.58974358974359, "high_lr": 0.00028157894736842106, "low_lr": 5.631578947368422e-06, "step": 1365 }, { "epoch": 3.5923734385272845, "grad_norm": 1.3118953704833984, "learning_rate": 0.0002810526315789474, "loss": 1.3282, "step": 1366 }, { "epoch": 3.5923734385272845, "high_lr": 0.0002810526315789474, "low_lr": 5.621052631578948e-06, "step": 1366 }, { "epoch": 3.5923734385272845, "high_lr": 0.0002810526315789474, "low_lr": 5.621052631578948e-06, "step": 1366 }, { "epoch": 3.5923734385272845, "high_lr": 0.0002810526315789474, "low_lr": 5.621052631578948e-06, "step": 1366 }, { "epoch": 3.5923734385272845, "high_lr": 0.0002810526315789474, "low_lr": 5.621052631578948e-06, "step": 1366 }, { "epoch": 3.5923734385272845, "high_lr": 0.0002810526315789474, "low_lr": 5.621052631578948e-06, "step": 1366 }, { "epoch": 3.5923734385272845, "high_lr": 0.0002810526315789474, "low_lr": 5.621052631578948e-06, "step": 1366 }, { "epoch": 3.5923734385272845, "high_lr": 0.0002810526315789474, "low_lr": 5.621052631578948e-06, "step": 1366 }, { "epoch": 3.5923734385272845, "high_lr": 0.0002810526315789474, "low_lr": 5.621052631578948e-06, "step": 1366 }, { "epoch": 3.5950032873109796, "grad_norm": 1.4552891254425049, "learning_rate": 0.0002805263157894737, "loss": 1.2933, "step": 1367 }, { "epoch": 3.5950032873109796, "high_lr": 0.0002805263157894737, "low_lr": 5.610526315789474e-06, "step": 1367 }, { "epoch": 3.5950032873109796, "high_lr": 0.0002805263157894737, "low_lr": 5.610526315789474e-06, "step": 1367 }, { "epoch": 3.5950032873109796, "high_lr": 0.0002805263157894737, "low_lr": 5.610526315789474e-06, "step": 1367 }, { "epoch": 3.5950032873109796, "high_lr": 0.0002805263157894737, "low_lr": 5.610526315789474e-06, "step": 1367 }, { "epoch": 3.5950032873109796, "high_lr": 0.0002805263157894737, "low_lr": 5.610526315789474e-06, "step": 1367 }, { "epoch": 3.5950032873109796, "high_lr": 0.0002805263157894737, "low_lr": 5.610526315789474e-06, "step": 1367 }, { "epoch": 3.5950032873109796, "high_lr": 0.0002805263157894737, "low_lr": 5.610526315789474e-06, "step": 1367 }, { "epoch": 3.5950032873109796, "high_lr": 0.0002805263157894737, "low_lr": 5.610526315789474e-06, "step": 1367 }, { "epoch": 3.5976331360946747, "grad_norm": 1.4135433435440063, "learning_rate": 0.00028000000000000003, "loss": 1.3545, "step": 1368 }, { "epoch": 3.5976331360946747, "high_lr": 0.00028000000000000003, "low_lr": 5.600000000000001e-06, "step": 1368 }, { "epoch": 3.5976331360946747, "high_lr": 0.00028000000000000003, "low_lr": 5.600000000000001e-06, "step": 1368 }, { "epoch": 3.5976331360946747, "high_lr": 0.00028000000000000003, "low_lr": 5.600000000000001e-06, "step": 1368 }, { "epoch": 3.5976331360946747, "high_lr": 0.00028000000000000003, "low_lr": 5.600000000000001e-06, "step": 1368 }, { "epoch": 3.5976331360946747, "high_lr": 0.00028000000000000003, "low_lr": 5.600000000000001e-06, "step": 1368 }, { "epoch": 3.5976331360946747, "high_lr": 0.00028000000000000003, "low_lr": 5.600000000000001e-06, "step": 1368 }, { "epoch": 3.5976331360946747, "high_lr": 0.00028000000000000003, "low_lr": 5.600000000000001e-06, "step": 1368 }, { "epoch": 3.5976331360946747, "high_lr": 0.00028000000000000003, "low_lr": 5.600000000000001e-06, "step": 1368 }, { "epoch": 3.6002629848783694, "grad_norm": 1.3850128650665283, "learning_rate": 0.0002794736842105263, "loss": 1.2739, "step": 1369 }, { "epoch": 3.6002629848783694, "high_lr": 0.0002794736842105263, "low_lr": 5.589473684210527e-06, "step": 1369 }, { "epoch": 3.6002629848783694, "high_lr": 0.0002794736842105263, "low_lr": 5.589473684210527e-06, "step": 1369 }, { "epoch": 3.6002629848783694, "high_lr": 0.0002794736842105263, "low_lr": 5.589473684210527e-06, "step": 1369 }, { "epoch": 3.6002629848783694, "high_lr": 0.0002794736842105263, "low_lr": 5.589473684210527e-06, "step": 1369 }, { "epoch": 3.6002629848783694, "high_lr": 0.0002794736842105263, "low_lr": 5.589473684210527e-06, "step": 1369 }, { "epoch": 3.6002629848783694, "high_lr": 0.0002794736842105263, "low_lr": 5.589473684210527e-06, "step": 1369 }, { "epoch": 3.6002629848783694, "high_lr": 0.0002794736842105263, "low_lr": 5.589473684210527e-06, "step": 1369 }, { "epoch": 3.6002629848783694, "high_lr": 0.0002794736842105263, "low_lr": 5.589473684210527e-06, "step": 1369 }, { "epoch": 3.6028928336620645, "grad_norm": 1.3625056743621826, "learning_rate": 0.0002789473684210526, "loss": 1.285, "step": 1370 }, { "epoch": 3.6028928336620645, "high_lr": 0.0002789473684210526, "low_lr": 5.578947368421052e-06, "step": 1370 }, { "epoch": 3.6028928336620645, "high_lr": 0.0002789473684210526, "low_lr": 5.578947368421052e-06, "step": 1370 }, { "epoch": 3.6028928336620645, "high_lr": 0.0002789473684210526, "low_lr": 5.578947368421052e-06, "step": 1370 }, { "epoch": 3.6028928336620645, "high_lr": 0.0002789473684210526, "low_lr": 5.578947368421052e-06, "step": 1370 }, { "epoch": 3.6028928336620645, "high_lr": 0.0002789473684210526, "low_lr": 5.578947368421052e-06, "step": 1370 }, { "epoch": 3.6028928336620645, "high_lr": 0.0002789473684210526, "low_lr": 5.578947368421052e-06, "step": 1370 }, { "epoch": 3.6028928336620645, "high_lr": 0.0002789473684210526, "low_lr": 5.578947368421052e-06, "step": 1370 }, { "epoch": 3.6028928336620645, "high_lr": 0.0002789473684210526, "low_lr": 5.578947368421052e-06, "step": 1370 }, { "epoch": 3.605522682445759, "grad_norm": 1.468536615371704, "learning_rate": 0.00027842105263157894, "loss": 1.2537, "step": 1371 }, { "epoch": 3.605522682445759, "high_lr": 0.00027842105263157894, "low_lr": 5.5684210526315796e-06, "step": 1371 }, { "epoch": 3.605522682445759, "high_lr": 0.00027842105263157894, "low_lr": 5.5684210526315796e-06, "step": 1371 }, { "epoch": 3.605522682445759, "high_lr": 0.00027842105263157894, "low_lr": 5.5684210526315796e-06, "step": 1371 }, { "epoch": 3.605522682445759, "high_lr": 0.00027842105263157894, "low_lr": 5.5684210526315796e-06, "step": 1371 }, { "epoch": 3.605522682445759, "high_lr": 0.00027842105263157894, "low_lr": 5.5684210526315796e-06, "step": 1371 }, { "epoch": 3.605522682445759, "high_lr": 0.00027842105263157894, "low_lr": 5.5684210526315796e-06, "step": 1371 }, { "epoch": 3.605522682445759, "high_lr": 0.00027842105263157894, "low_lr": 5.5684210526315796e-06, "step": 1371 }, { "epoch": 3.605522682445759, "high_lr": 0.00027842105263157894, "low_lr": 5.5684210526315796e-06, "step": 1371 }, { "epoch": 3.6081525312294542, "grad_norm": 1.3496447801589966, "learning_rate": 0.0002778947368421053, "loss": 1.3043, "step": 1372 }, { "epoch": 3.6081525312294542, "high_lr": 0.0002778947368421053, "low_lr": 5.557894736842105e-06, "step": 1372 }, { "epoch": 3.6081525312294542, "high_lr": 0.0002778947368421053, "low_lr": 5.557894736842105e-06, "step": 1372 }, { "epoch": 3.6081525312294542, "high_lr": 0.0002778947368421053, "low_lr": 5.557894736842105e-06, "step": 1372 }, { "epoch": 3.6081525312294542, "high_lr": 0.0002778947368421053, "low_lr": 5.557894736842105e-06, "step": 1372 }, { "epoch": 3.6081525312294542, "high_lr": 0.0002778947368421053, "low_lr": 5.557894736842105e-06, "step": 1372 }, { "epoch": 3.6081525312294542, "high_lr": 0.0002778947368421053, "low_lr": 5.557894736842105e-06, "step": 1372 }, { "epoch": 3.6081525312294542, "high_lr": 0.0002778947368421053, "low_lr": 5.557894736842105e-06, "step": 1372 }, { "epoch": 3.6081525312294542, "high_lr": 0.0002778947368421053, "low_lr": 5.557894736842105e-06, "step": 1372 }, { "epoch": 3.6107823800131493, "grad_norm": 1.5024930238723755, "learning_rate": 0.0002773684210526316, "loss": 1.2904, "step": 1373 }, { "epoch": 3.6107823800131493, "high_lr": 0.0002773684210526316, "low_lr": 5.547368421052632e-06, "step": 1373 }, { "epoch": 3.6107823800131493, "high_lr": 0.0002773684210526316, "low_lr": 5.547368421052632e-06, "step": 1373 }, { "epoch": 3.6107823800131493, "high_lr": 0.0002773684210526316, "low_lr": 5.547368421052632e-06, "step": 1373 }, { "epoch": 3.6107823800131493, "high_lr": 0.0002773684210526316, "low_lr": 5.547368421052632e-06, "step": 1373 }, { "epoch": 3.6107823800131493, "high_lr": 0.0002773684210526316, "low_lr": 5.547368421052632e-06, "step": 1373 }, { "epoch": 3.6107823800131493, "high_lr": 0.0002773684210526316, "low_lr": 5.547368421052632e-06, "step": 1373 }, { "epoch": 3.6107823800131493, "high_lr": 0.0002773684210526316, "low_lr": 5.547368421052632e-06, "step": 1373 }, { "epoch": 3.6107823800131493, "high_lr": 0.0002773684210526316, "low_lr": 5.547368421052632e-06, "step": 1373 }, { "epoch": 3.613412228796844, "grad_norm": 1.3925727605819702, "learning_rate": 0.0002768421052631579, "loss": 1.3119, "step": 1374 }, { "epoch": 3.613412228796844, "high_lr": 0.0002768421052631579, "low_lr": 5.5368421052631586e-06, "step": 1374 }, { "epoch": 3.613412228796844, "high_lr": 0.0002768421052631579, "low_lr": 5.5368421052631586e-06, "step": 1374 }, { "epoch": 3.613412228796844, "high_lr": 0.0002768421052631579, "low_lr": 5.5368421052631586e-06, "step": 1374 }, { "epoch": 3.613412228796844, "high_lr": 0.0002768421052631579, "low_lr": 5.5368421052631586e-06, "step": 1374 }, { "epoch": 3.613412228796844, "high_lr": 0.0002768421052631579, "low_lr": 5.5368421052631586e-06, "step": 1374 }, { "epoch": 3.613412228796844, "high_lr": 0.0002768421052631579, "low_lr": 5.5368421052631586e-06, "step": 1374 }, { "epoch": 3.613412228796844, "high_lr": 0.0002768421052631579, "low_lr": 5.5368421052631586e-06, "step": 1374 }, { "epoch": 3.613412228796844, "high_lr": 0.0002768421052631579, "low_lr": 5.5368421052631586e-06, "step": 1374 }, { "epoch": 3.616042077580539, "grad_norm": 1.3758540153503418, "learning_rate": 0.00027631578947368425, "loss": 1.2542, "step": 1375 }, { "epoch": 3.616042077580539, "high_lr": 0.00027631578947368425, "low_lr": 5.526315789473685e-06, "step": 1375 }, { "epoch": 3.616042077580539, "high_lr": 0.00027631578947368425, "low_lr": 5.526315789473685e-06, "step": 1375 }, { "epoch": 3.616042077580539, "high_lr": 0.00027631578947368425, "low_lr": 5.526315789473685e-06, "step": 1375 }, { "epoch": 3.616042077580539, "high_lr": 0.00027631578947368425, "low_lr": 5.526315789473685e-06, "step": 1375 }, { "epoch": 3.616042077580539, "high_lr": 0.00027631578947368425, "low_lr": 5.526315789473685e-06, "step": 1375 }, { "epoch": 3.616042077580539, "high_lr": 0.00027631578947368425, "low_lr": 5.526315789473685e-06, "step": 1375 }, { "epoch": 3.616042077580539, "high_lr": 0.00027631578947368425, "low_lr": 5.526315789473685e-06, "step": 1375 }, { "epoch": 3.616042077580539, "high_lr": 0.00027631578947368425, "low_lr": 5.526315789473685e-06, "step": 1375 }, { "epoch": 3.618671926364234, "grad_norm": 1.4709190130233765, "learning_rate": 0.00027578947368421053, "loss": 1.2812, "step": 1376 }, { "epoch": 3.618671926364234, "high_lr": 0.00027578947368421053, "low_lr": 5.515789473684211e-06, "step": 1376 }, { "epoch": 3.618671926364234, "high_lr": 0.00027578947368421053, "low_lr": 5.515789473684211e-06, "step": 1376 }, { "epoch": 3.618671926364234, "high_lr": 0.00027578947368421053, "low_lr": 5.515789473684211e-06, "step": 1376 }, { "epoch": 3.618671926364234, "high_lr": 0.00027578947368421053, "low_lr": 5.515789473684211e-06, "step": 1376 }, { "epoch": 3.618671926364234, "high_lr": 0.00027578947368421053, "low_lr": 5.515789473684211e-06, "step": 1376 }, { "epoch": 3.618671926364234, "high_lr": 0.00027578947368421053, "low_lr": 5.515789473684211e-06, "step": 1376 }, { "epoch": 3.618671926364234, "high_lr": 0.00027578947368421053, "low_lr": 5.515789473684211e-06, "step": 1376 }, { "epoch": 3.618671926364234, "high_lr": 0.00027578947368421053, "low_lr": 5.515789473684211e-06, "step": 1376 }, { "epoch": 3.621301775147929, "grad_norm": 1.3651463985443115, "learning_rate": 0.0002752631578947368, "loss": 1.2765, "step": 1377 }, { "epoch": 3.621301775147929, "high_lr": 0.0002752631578947368, "low_lr": 5.505263157894737e-06, "step": 1377 }, { "epoch": 3.621301775147929, "high_lr": 0.0002752631578947368, "low_lr": 5.505263157894737e-06, "step": 1377 }, { "epoch": 3.621301775147929, "high_lr": 0.0002752631578947368, "low_lr": 5.505263157894737e-06, "step": 1377 }, { "epoch": 3.621301775147929, "high_lr": 0.0002752631578947368, "low_lr": 5.505263157894737e-06, "step": 1377 }, { "epoch": 3.621301775147929, "high_lr": 0.0002752631578947368, "low_lr": 5.505263157894737e-06, "step": 1377 }, { "epoch": 3.621301775147929, "high_lr": 0.0002752631578947368, "low_lr": 5.505263157894737e-06, "step": 1377 }, { "epoch": 3.621301775147929, "high_lr": 0.0002752631578947368, "low_lr": 5.505263157894737e-06, "step": 1377 }, { "epoch": 3.621301775147929, "high_lr": 0.0002752631578947368, "low_lr": 5.505263157894737e-06, "step": 1377 }, { "epoch": 3.623931623931624, "grad_norm": 1.4042037725448608, "learning_rate": 0.00027473684210526316, "loss": 1.3151, "step": 1378 }, { "epoch": 3.623931623931624, "high_lr": 0.00027473684210526316, "low_lr": 5.494736842105264e-06, "step": 1378 }, { "epoch": 3.623931623931624, "high_lr": 0.00027473684210526316, "low_lr": 5.494736842105264e-06, "step": 1378 }, { "epoch": 3.623931623931624, "high_lr": 0.00027473684210526316, "low_lr": 5.494736842105264e-06, "step": 1378 }, { "epoch": 3.623931623931624, "high_lr": 0.00027473684210526316, "low_lr": 5.494736842105264e-06, "step": 1378 }, { "epoch": 3.623931623931624, "high_lr": 0.00027473684210526316, "low_lr": 5.494736842105264e-06, "step": 1378 }, { "epoch": 3.623931623931624, "high_lr": 0.00027473684210526316, "low_lr": 5.494736842105264e-06, "step": 1378 }, { "epoch": 3.623931623931624, "high_lr": 0.00027473684210526316, "low_lr": 5.494736842105264e-06, "step": 1378 }, { "epoch": 3.623931623931624, "high_lr": 0.00027473684210526316, "low_lr": 5.494736842105264e-06, "step": 1378 }, { "epoch": 3.626561472715319, "grad_norm": 1.4488333463668823, "learning_rate": 0.00027421052631578945, "loss": 1.3332, "step": 1379 }, { "epoch": 3.626561472715319, "high_lr": 0.00027421052631578945, "low_lr": 5.484210526315789e-06, "step": 1379 }, { "epoch": 3.626561472715319, "high_lr": 0.00027421052631578945, "low_lr": 5.484210526315789e-06, "step": 1379 }, { "epoch": 3.626561472715319, "high_lr": 0.00027421052631578945, "low_lr": 5.484210526315789e-06, "step": 1379 }, { "epoch": 3.626561472715319, "high_lr": 0.00027421052631578945, "low_lr": 5.484210526315789e-06, "step": 1379 }, { "epoch": 3.626561472715319, "high_lr": 0.00027421052631578945, "low_lr": 5.484210526315789e-06, "step": 1379 }, { "epoch": 3.626561472715319, "high_lr": 0.00027421052631578945, "low_lr": 5.484210526315789e-06, "step": 1379 }, { "epoch": 3.626561472715319, "high_lr": 0.00027421052631578945, "low_lr": 5.484210526315789e-06, "step": 1379 }, { "epoch": 3.626561472715319, "high_lr": 0.00027421052631578945, "low_lr": 5.484210526315789e-06, "step": 1379 }, { "epoch": 3.6291913214990137, "grad_norm": 1.440686583518982, "learning_rate": 0.00027368421052631584, "loss": 1.2859, "step": 1380 }, { "epoch": 3.6291913214990137, "high_lr": 0.00027368421052631584, "low_lr": 5.4736842105263165e-06, "step": 1380 }, { "epoch": 3.6291913214990137, "high_lr": 0.00027368421052631584, "low_lr": 5.4736842105263165e-06, "step": 1380 }, { "epoch": 3.6291913214990137, "high_lr": 0.00027368421052631584, "low_lr": 5.4736842105263165e-06, "step": 1380 }, { "epoch": 3.6291913214990137, "high_lr": 0.00027368421052631584, "low_lr": 5.4736842105263165e-06, "step": 1380 }, { "epoch": 3.6291913214990137, "high_lr": 0.00027368421052631584, "low_lr": 5.4736842105263165e-06, "step": 1380 }, { "epoch": 3.6291913214990137, "high_lr": 0.00027368421052631584, "low_lr": 5.4736842105263165e-06, "step": 1380 }, { "epoch": 3.6291913214990137, "high_lr": 0.00027368421052631584, "low_lr": 5.4736842105263165e-06, "step": 1380 }, { "epoch": 3.6291913214990137, "high_lr": 0.00027368421052631584, "low_lr": 5.4736842105263165e-06, "step": 1380 }, { "epoch": 3.631821170282709, "grad_norm": 1.4867942333221436, "learning_rate": 0.0002731578947368421, "loss": 1.3299, "step": 1381 }, { "epoch": 3.631821170282709, "high_lr": 0.0002731578947368421, "low_lr": 5.463157894736843e-06, "step": 1381 }, { "epoch": 3.631821170282709, "high_lr": 0.0002731578947368421, "low_lr": 5.463157894736843e-06, "step": 1381 }, { "epoch": 3.631821170282709, "high_lr": 0.0002731578947368421, "low_lr": 5.463157894736843e-06, "step": 1381 }, { "epoch": 3.631821170282709, "high_lr": 0.0002731578947368421, "low_lr": 5.463157894736843e-06, "step": 1381 }, { "epoch": 3.631821170282709, "high_lr": 0.0002731578947368421, "low_lr": 5.463157894736843e-06, "step": 1381 }, { "epoch": 3.631821170282709, "high_lr": 0.0002731578947368421, "low_lr": 5.463157894736843e-06, "step": 1381 }, { "epoch": 3.631821170282709, "high_lr": 0.0002731578947368421, "low_lr": 5.463157894736843e-06, "step": 1381 }, { "epoch": 3.631821170282709, "high_lr": 0.0002731578947368421, "low_lr": 5.463157894736843e-06, "step": 1381 }, { "epoch": 3.634451019066404, "grad_norm": 1.3880106210708618, "learning_rate": 0.00027263157894736847, "loss": 1.262, "step": 1382 }, { "epoch": 3.634451019066404, "high_lr": 0.00027263157894736847, "low_lr": 5.452631578947369e-06, "step": 1382 }, { "epoch": 3.634451019066404, "high_lr": 0.00027263157894736847, "low_lr": 5.452631578947369e-06, "step": 1382 }, { "epoch": 3.634451019066404, "high_lr": 0.00027263157894736847, "low_lr": 5.452631578947369e-06, "step": 1382 }, { "epoch": 3.634451019066404, "high_lr": 0.00027263157894736847, "low_lr": 5.452631578947369e-06, "step": 1382 }, { "epoch": 3.634451019066404, "high_lr": 0.00027263157894736847, "low_lr": 5.452631578947369e-06, "step": 1382 }, { "epoch": 3.634451019066404, "high_lr": 0.00027263157894736847, "low_lr": 5.452631578947369e-06, "step": 1382 }, { "epoch": 3.634451019066404, "high_lr": 0.00027263157894736847, "low_lr": 5.452631578947369e-06, "step": 1382 }, { "epoch": 3.634451019066404, "high_lr": 0.00027263157894736847, "low_lr": 5.452631578947369e-06, "step": 1382 }, { "epoch": 3.6370808678500985, "grad_norm": 1.6879795789718628, "learning_rate": 0.00027210526315789475, "loss": 1.2888, "step": 1383 }, { "epoch": 3.6370808678500985, "high_lr": 0.00027210526315789475, "low_lr": 5.4421052631578955e-06, "step": 1383 }, { "epoch": 3.6370808678500985, "high_lr": 0.00027210526315789475, "low_lr": 5.4421052631578955e-06, "step": 1383 }, { "epoch": 3.6370808678500985, "high_lr": 0.00027210526315789475, "low_lr": 5.4421052631578955e-06, "step": 1383 }, { "epoch": 3.6370808678500985, "high_lr": 0.00027210526315789475, "low_lr": 5.4421052631578955e-06, "step": 1383 }, { "epoch": 3.6370808678500985, "high_lr": 0.00027210526315789475, "low_lr": 5.4421052631578955e-06, "step": 1383 }, { "epoch": 3.6370808678500985, "high_lr": 0.00027210526315789475, "low_lr": 5.4421052631578955e-06, "step": 1383 }, { "epoch": 3.6370808678500985, "high_lr": 0.00027210526315789475, "low_lr": 5.4421052631578955e-06, "step": 1383 }, { "epoch": 3.6370808678500985, "high_lr": 0.00027210526315789475, "low_lr": 5.4421052631578955e-06, "step": 1383 }, { "epoch": 3.6397107166337936, "grad_norm": 1.3893376588821411, "learning_rate": 0.00027157894736842104, "loss": 1.2899, "step": 1384 }, { "epoch": 3.6397107166337936, "high_lr": 0.00027157894736842104, "low_lr": 5.431578947368421e-06, "step": 1384 }, { "epoch": 3.6397107166337936, "high_lr": 0.00027157894736842104, "low_lr": 5.431578947368421e-06, "step": 1384 }, { "epoch": 3.6397107166337936, "high_lr": 0.00027157894736842104, "low_lr": 5.431578947368421e-06, "step": 1384 }, { "epoch": 3.6397107166337936, "high_lr": 0.00027157894736842104, "low_lr": 5.431578947368421e-06, "step": 1384 }, { "epoch": 3.6397107166337936, "high_lr": 0.00027157894736842104, "low_lr": 5.431578947368421e-06, "step": 1384 }, { "epoch": 3.6397107166337936, "high_lr": 0.00027157894736842104, "low_lr": 5.431578947368421e-06, "step": 1384 }, { "epoch": 3.6397107166337936, "high_lr": 0.00027157894736842104, "low_lr": 5.431578947368421e-06, "step": 1384 }, { "epoch": 3.6397107166337936, "high_lr": 0.00027157894736842104, "low_lr": 5.431578947368421e-06, "step": 1384 }, { "epoch": 3.6423405654174887, "grad_norm": 1.4377245903015137, "learning_rate": 0.0002710526315789474, "loss": 1.309, "step": 1385 }, { "epoch": 3.6423405654174887, "high_lr": 0.0002710526315789474, "low_lr": 5.421052631578948e-06, "step": 1385 }, { "epoch": 3.6423405654174887, "high_lr": 0.0002710526315789474, "low_lr": 5.421052631578948e-06, "step": 1385 }, { "epoch": 3.6423405654174887, "high_lr": 0.0002710526315789474, "low_lr": 5.421052631578948e-06, "step": 1385 }, { "epoch": 3.6423405654174887, "high_lr": 0.0002710526315789474, "low_lr": 5.421052631578948e-06, "step": 1385 }, { "epoch": 3.6423405654174887, "high_lr": 0.0002710526315789474, "low_lr": 5.421052631578948e-06, "step": 1385 }, { "epoch": 3.6423405654174887, "high_lr": 0.0002710526315789474, "low_lr": 5.421052631578948e-06, "step": 1385 }, { "epoch": 3.6423405654174887, "high_lr": 0.0002710526315789474, "low_lr": 5.421052631578948e-06, "step": 1385 }, { "epoch": 3.6423405654174887, "high_lr": 0.0002710526315789474, "low_lr": 5.421052631578948e-06, "step": 1385 }, { "epoch": 3.6449704142011834, "grad_norm": 1.39258873462677, "learning_rate": 0.00027052631578947366, "loss": 1.2727, "step": 1386 }, { "epoch": 3.6449704142011834, "high_lr": 0.00027052631578947366, "low_lr": 5.410526315789474e-06, "step": 1386 }, { "epoch": 3.6449704142011834, "high_lr": 0.00027052631578947366, "low_lr": 5.410526315789474e-06, "step": 1386 }, { "epoch": 3.6449704142011834, "high_lr": 0.00027052631578947366, "low_lr": 5.410526315789474e-06, "step": 1386 }, { "epoch": 3.6449704142011834, "high_lr": 0.00027052631578947366, "low_lr": 5.410526315789474e-06, "step": 1386 }, { "epoch": 3.6449704142011834, "high_lr": 0.00027052631578947366, "low_lr": 5.410526315789474e-06, "step": 1386 }, { "epoch": 3.6449704142011834, "high_lr": 0.00027052631578947366, "low_lr": 5.410526315789474e-06, "step": 1386 }, { "epoch": 3.6449704142011834, "high_lr": 0.00027052631578947366, "low_lr": 5.410526315789474e-06, "step": 1386 }, { "epoch": 3.6449704142011834, "high_lr": 0.00027052631578947366, "low_lr": 5.410526315789474e-06, "step": 1386 }, { "epoch": 3.6476002629848785, "grad_norm": 1.4606719017028809, "learning_rate": 0.00027, "loss": 1.3294, "step": 1387 }, { "epoch": 3.6476002629848785, "high_lr": 0.00027, "low_lr": 5.400000000000001e-06, "step": 1387 }, { "epoch": 3.6476002629848785, "high_lr": 0.00027, "low_lr": 5.400000000000001e-06, "step": 1387 }, { "epoch": 3.6476002629848785, "high_lr": 0.00027, "low_lr": 5.400000000000001e-06, "step": 1387 }, { "epoch": 3.6476002629848785, "high_lr": 0.00027, "low_lr": 5.400000000000001e-06, "step": 1387 }, { "epoch": 3.6476002629848785, "high_lr": 0.00027, "low_lr": 5.400000000000001e-06, "step": 1387 }, { "epoch": 3.6476002629848785, "high_lr": 0.00027, "low_lr": 5.400000000000001e-06, "step": 1387 }, { "epoch": 3.6476002629848785, "high_lr": 0.00027, "low_lr": 5.400000000000001e-06, "step": 1387 }, { "epoch": 3.6476002629848785, "high_lr": 0.00027, "low_lr": 5.400000000000001e-06, "step": 1387 }, { "epoch": 3.650230111768573, "grad_norm": 1.4539998769760132, "learning_rate": 0.0002694736842105263, "loss": 1.2799, "step": 1388 }, { "epoch": 3.650230111768573, "high_lr": 0.0002694736842105263, "low_lr": 5.389473684210526e-06, "step": 1388 }, { "epoch": 3.650230111768573, "high_lr": 0.0002694736842105263, "low_lr": 5.389473684210526e-06, "step": 1388 }, { "epoch": 3.650230111768573, "high_lr": 0.0002694736842105263, "low_lr": 5.389473684210526e-06, "step": 1388 }, { "epoch": 3.650230111768573, "high_lr": 0.0002694736842105263, "low_lr": 5.389473684210526e-06, "step": 1388 }, { "epoch": 3.650230111768573, "high_lr": 0.0002694736842105263, "low_lr": 5.389473684210526e-06, "step": 1388 }, { "epoch": 3.650230111768573, "high_lr": 0.0002694736842105263, "low_lr": 5.389473684210526e-06, "step": 1388 }, { "epoch": 3.650230111768573, "high_lr": 0.0002694736842105263, "low_lr": 5.389473684210526e-06, "step": 1388 }, { "epoch": 3.650230111768573, "high_lr": 0.0002694736842105263, "low_lr": 5.389473684210526e-06, "step": 1388 }, { "epoch": 3.6528599605522682, "grad_norm": 1.4604228734970093, "learning_rate": 0.0002689473684210527, "loss": 1.2791, "step": 1389 }, { "epoch": 3.6528599605522682, "high_lr": 0.0002689473684210527, "low_lr": 5.3789473684210535e-06, "step": 1389 }, { "epoch": 3.6528599605522682, "high_lr": 0.0002689473684210527, "low_lr": 5.3789473684210535e-06, "step": 1389 }, { "epoch": 3.6528599605522682, "high_lr": 0.0002689473684210527, "low_lr": 5.3789473684210535e-06, "step": 1389 }, { "epoch": 3.6528599605522682, "high_lr": 0.0002689473684210527, "low_lr": 5.3789473684210535e-06, "step": 1389 }, { "epoch": 3.6528599605522682, "high_lr": 0.0002689473684210527, "low_lr": 5.3789473684210535e-06, "step": 1389 }, { "epoch": 3.6528599605522682, "high_lr": 0.0002689473684210527, "low_lr": 5.3789473684210535e-06, "step": 1389 }, { "epoch": 3.6528599605522682, "high_lr": 0.0002689473684210527, "low_lr": 5.3789473684210535e-06, "step": 1389 }, { "epoch": 3.6528599605522682, "high_lr": 0.0002689473684210527, "low_lr": 5.3789473684210535e-06, "step": 1389 }, { "epoch": 3.6554898093359633, "grad_norm": 1.453295350074768, "learning_rate": 0.00026842105263157897, "loss": 1.298, "step": 1390 }, { "epoch": 3.6554898093359633, "high_lr": 0.00026842105263157897, "low_lr": 5.36842105263158e-06, "step": 1390 }, { "epoch": 3.6554898093359633, "high_lr": 0.00026842105263157897, "low_lr": 5.36842105263158e-06, "step": 1390 }, { "epoch": 3.6554898093359633, "high_lr": 0.00026842105263157897, "low_lr": 5.36842105263158e-06, "step": 1390 }, { "epoch": 3.6554898093359633, "high_lr": 0.00026842105263157897, "low_lr": 5.36842105263158e-06, "step": 1390 }, { "epoch": 3.6554898093359633, "high_lr": 0.00026842105263157897, "low_lr": 5.36842105263158e-06, "step": 1390 }, { "epoch": 3.6554898093359633, "high_lr": 0.00026842105263157897, "low_lr": 5.36842105263158e-06, "step": 1390 }, { "epoch": 3.6554898093359633, "high_lr": 0.00026842105263157897, "low_lr": 5.36842105263158e-06, "step": 1390 }, { "epoch": 3.6554898093359633, "high_lr": 0.00026842105263157897, "low_lr": 5.36842105263158e-06, "step": 1390 }, { "epoch": 3.658119658119658, "grad_norm": 1.3308731317520142, "learning_rate": 0.00026789473684210526, "loss": 1.3055, "step": 1391 }, { "epoch": 3.658119658119658, "high_lr": 0.00026789473684210526, "low_lr": 5.357894736842105e-06, "step": 1391 }, { "epoch": 3.658119658119658, "high_lr": 0.00026789473684210526, "low_lr": 5.357894736842105e-06, "step": 1391 }, { "epoch": 3.658119658119658, "high_lr": 0.00026789473684210526, "low_lr": 5.357894736842105e-06, "step": 1391 }, { "epoch": 3.658119658119658, "high_lr": 0.00026789473684210526, "low_lr": 5.357894736842105e-06, "step": 1391 }, { "epoch": 3.658119658119658, "high_lr": 0.00026789473684210526, "low_lr": 5.357894736842105e-06, "step": 1391 }, { "epoch": 3.658119658119658, "high_lr": 0.00026789473684210526, "low_lr": 5.357894736842105e-06, "step": 1391 }, { "epoch": 3.658119658119658, "high_lr": 0.00026789473684210526, "low_lr": 5.357894736842105e-06, "step": 1391 }, { "epoch": 3.658119658119658, "high_lr": 0.00026789473684210526, "low_lr": 5.357894736842105e-06, "step": 1391 }, { "epoch": 3.660749506903353, "grad_norm": 1.4058395624160767, "learning_rate": 0.0002673684210526316, "loss": 1.2897, "step": 1392 }, { "epoch": 3.660749506903353, "high_lr": 0.0002673684210526316, "low_lr": 5.3473684210526325e-06, "step": 1392 }, { "epoch": 3.660749506903353, "high_lr": 0.0002673684210526316, "low_lr": 5.3473684210526325e-06, "step": 1392 }, { "epoch": 3.660749506903353, "high_lr": 0.0002673684210526316, "low_lr": 5.3473684210526325e-06, "step": 1392 }, { "epoch": 3.660749506903353, "high_lr": 0.0002673684210526316, "low_lr": 5.3473684210526325e-06, "step": 1392 }, { "epoch": 3.660749506903353, "high_lr": 0.0002673684210526316, "low_lr": 5.3473684210526325e-06, "step": 1392 }, { "epoch": 3.660749506903353, "high_lr": 0.0002673684210526316, "low_lr": 5.3473684210526325e-06, "step": 1392 }, { "epoch": 3.660749506903353, "high_lr": 0.0002673684210526316, "low_lr": 5.3473684210526325e-06, "step": 1392 }, { "epoch": 3.660749506903353, "high_lr": 0.0002673684210526316, "low_lr": 5.3473684210526325e-06, "step": 1392 }, { "epoch": 3.6633793556870478, "grad_norm": 1.326690912246704, "learning_rate": 0.0002668421052631579, "loss": 1.3402, "step": 1393 }, { "epoch": 3.6633793556870478, "high_lr": 0.0002668421052631579, "low_lr": 5.336842105263158e-06, "step": 1393 }, { "epoch": 3.6633793556870478, "high_lr": 0.0002668421052631579, "low_lr": 5.336842105263158e-06, "step": 1393 }, { "epoch": 3.6633793556870478, "high_lr": 0.0002668421052631579, "low_lr": 5.336842105263158e-06, "step": 1393 }, { "epoch": 3.6633793556870478, "high_lr": 0.0002668421052631579, "low_lr": 5.336842105263158e-06, "step": 1393 }, { "epoch": 3.6633793556870478, "high_lr": 0.0002668421052631579, "low_lr": 5.336842105263158e-06, "step": 1393 }, { "epoch": 3.6633793556870478, "high_lr": 0.0002668421052631579, "low_lr": 5.336842105263158e-06, "step": 1393 }, { "epoch": 3.6633793556870478, "high_lr": 0.0002668421052631579, "low_lr": 5.336842105263158e-06, "step": 1393 }, { "epoch": 3.6633793556870478, "high_lr": 0.0002668421052631579, "low_lr": 5.336842105263158e-06, "step": 1393 }, { "epoch": 3.666009204470743, "grad_norm": 1.3362632989883423, "learning_rate": 0.0002663157894736842, "loss": 1.3058, "step": 1394 }, { "epoch": 3.666009204470743, "high_lr": 0.0002663157894736842, "low_lr": 5.326315789473685e-06, "step": 1394 }, { "epoch": 3.666009204470743, "high_lr": 0.0002663157894736842, "low_lr": 5.326315789473685e-06, "step": 1394 }, { "epoch": 3.666009204470743, "high_lr": 0.0002663157894736842, "low_lr": 5.326315789473685e-06, "step": 1394 }, { "epoch": 3.666009204470743, "high_lr": 0.0002663157894736842, "low_lr": 5.326315789473685e-06, "step": 1394 }, { "epoch": 3.666009204470743, "high_lr": 0.0002663157894736842, "low_lr": 5.326315789473685e-06, "step": 1394 }, { "epoch": 3.666009204470743, "high_lr": 0.0002663157894736842, "low_lr": 5.326315789473685e-06, "step": 1394 }, { "epoch": 3.666009204470743, "high_lr": 0.0002663157894736842, "low_lr": 5.326315789473685e-06, "step": 1394 }, { "epoch": 3.666009204470743, "high_lr": 0.0002663157894736842, "low_lr": 5.326315789473685e-06, "step": 1394 }, { "epoch": 3.668639053254438, "grad_norm": 1.5829945802688599, "learning_rate": 0.0002657894736842105, "loss": 1.3656, "step": 1395 }, { "epoch": 3.668639053254438, "high_lr": 0.0002657894736842105, "low_lr": 5.315789473684211e-06, "step": 1395 }, { "epoch": 3.668639053254438, "high_lr": 0.0002657894736842105, "low_lr": 5.315789473684211e-06, "step": 1395 }, { "epoch": 3.668639053254438, "high_lr": 0.0002657894736842105, "low_lr": 5.315789473684211e-06, "step": 1395 }, { "epoch": 3.668639053254438, "high_lr": 0.0002657894736842105, "low_lr": 5.315789473684211e-06, "step": 1395 }, { "epoch": 3.668639053254438, "high_lr": 0.0002657894736842105, "low_lr": 5.315789473684211e-06, "step": 1395 }, { "epoch": 3.668639053254438, "high_lr": 0.0002657894736842105, "low_lr": 5.315789473684211e-06, "step": 1395 }, { "epoch": 3.668639053254438, "high_lr": 0.0002657894736842105, "low_lr": 5.315789473684211e-06, "step": 1395 }, { "epoch": 3.668639053254438, "high_lr": 0.0002657894736842105, "low_lr": 5.315789473684211e-06, "step": 1395 }, { "epoch": 3.6712689020381326, "grad_norm": 1.4499706029891968, "learning_rate": 0.00026526315789473685, "loss": 1.2588, "step": 1396 }, { "epoch": 3.6712689020381326, "high_lr": 0.00026526315789473685, "low_lr": 5.305263157894738e-06, "step": 1396 }, { "epoch": 3.6712689020381326, "high_lr": 0.00026526315789473685, "low_lr": 5.305263157894738e-06, "step": 1396 }, { "epoch": 3.6712689020381326, "high_lr": 0.00026526315789473685, "low_lr": 5.305263157894738e-06, "step": 1396 }, { "epoch": 3.6712689020381326, "high_lr": 0.00026526315789473685, "low_lr": 5.305263157894738e-06, "step": 1396 }, { "epoch": 3.6712689020381326, "high_lr": 0.00026526315789473685, "low_lr": 5.305263157894738e-06, "step": 1396 }, { "epoch": 3.6712689020381326, "high_lr": 0.00026526315789473685, "low_lr": 5.305263157894738e-06, "step": 1396 }, { "epoch": 3.6712689020381326, "high_lr": 0.00026526315789473685, "low_lr": 5.305263157894738e-06, "step": 1396 }, { "epoch": 3.6712689020381326, "high_lr": 0.00026526315789473685, "low_lr": 5.305263157894738e-06, "step": 1396 }, { "epoch": 3.6738987508218277, "grad_norm": 1.3186489343643188, "learning_rate": 0.0002647368421052632, "loss": 1.2915, "step": 1397 }, { "epoch": 3.6738987508218277, "high_lr": 0.0002647368421052632, "low_lr": 5.294736842105263e-06, "step": 1397 }, { "epoch": 3.6738987508218277, "high_lr": 0.0002647368421052632, "low_lr": 5.294736842105263e-06, "step": 1397 }, { "epoch": 3.6738987508218277, "high_lr": 0.0002647368421052632, "low_lr": 5.294736842105263e-06, "step": 1397 }, { "epoch": 3.6738987508218277, "high_lr": 0.0002647368421052632, "low_lr": 5.294736842105263e-06, "step": 1397 }, { "epoch": 3.6738987508218277, "high_lr": 0.0002647368421052632, "low_lr": 5.294736842105263e-06, "step": 1397 }, { "epoch": 3.6738987508218277, "high_lr": 0.0002647368421052632, "low_lr": 5.294736842105263e-06, "step": 1397 }, { "epoch": 3.6738987508218277, "high_lr": 0.0002647368421052632, "low_lr": 5.294736842105263e-06, "step": 1397 }, { "epoch": 3.6738987508218277, "high_lr": 0.0002647368421052632, "low_lr": 5.294736842105263e-06, "step": 1397 }, { "epoch": 3.676528599605523, "grad_norm": 1.3520015478134155, "learning_rate": 0.0002642105263157895, "loss": 1.2928, "step": 1398 }, { "epoch": 3.676528599605523, "high_lr": 0.0002642105263157895, "low_lr": 5.2842105263157896e-06, "step": 1398 }, { "epoch": 3.676528599605523, "high_lr": 0.0002642105263157895, "low_lr": 5.2842105263157896e-06, "step": 1398 }, { "epoch": 3.676528599605523, "high_lr": 0.0002642105263157895, "low_lr": 5.2842105263157896e-06, "step": 1398 }, { "epoch": 3.676528599605523, "high_lr": 0.0002642105263157895, "low_lr": 5.2842105263157896e-06, "step": 1398 }, { "epoch": 3.676528599605523, "high_lr": 0.0002642105263157895, "low_lr": 5.2842105263157896e-06, "step": 1398 }, { "epoch": 3.676528599605523, "high_lr": 0.0002642105263157895, "low_lr": 5.2842105263157896e-06, "step": 1398 }, { "epoch": 3.676528599605523, "high_lr": 0.0002642105263157895, "low_lr": 5.2842105263157896e-06, "step": 1398 }, { "epoch": 3.676528599605523, "high_lr": 0.0002642105263157895, "low_lr": 5.2842105263157896e-06, "step": 1398 }, { "epoch": 3.6791584483892175, "grad_norm": 1.4090241193771362, "learning_rate": 0.0002636842105263158, "loss": 1.2711, "step": 1399 }, { "epoch": 3.6791584483892175, "high_lr": 0.0002636842105263158, "low_lr": 5.273684210526317e-06, "step": 1399 }, { "epoch": 3.6791584483892175, "high_lr": 0.0002636842105263158, "low_lr": 5.273684210526317e-06, "step": 1399 }, { "epoch": 3.6791584483892175, "high_lr": 0.0002636842105263158, "low_lr": 5.273684210526317e-06, "step": 1399 }, { "epoch": 3.6791584483892175, "high_lr": 0.0002636842105263158, "low_lr": 5.273684210526317e-06, "step": 1399 }, { "epoch": 3.6791584483892175, "high_lr": 0.0002636842105263158, "low_lr": 5.273684210526317e-06, "step": 1399 }, { "epoch": 3.6791584483892175, "high_lr": 0.0002636842105263158, "low_lr": 5.273684210526317e-06, "step": 1399 }, { "epoch": 3.6791584483892175, "high_lr": 0.0002636842105263158, "low_lr": 5.273684210526317e-06, "step": 1399 }, { "epoch": 3.6791584483892175, "high_lr": 0.0002636842105263158, "low_lr": 5.273684210526317e-06, "step": 1399 }, { "epoch": 3.6817882971729126, "grad_norm": 1.359285593032837, "learning_rate": 0.0002631578947368421, "loss": 1.3154, "step": 1400 }, { "epoch": 3.6817882971729126, "high_lr": 0.0002631578947368421, "low_lr": 5.263157894736842e-06, "step": 1400 }, { "epoch": 3.6817882971729126, "high_lr": 0.0002631578947368421, "low_lr": 5.263157894736842e-06, "step": 1400 }, { "epoch": 3.6817882971729126, "high_lr": 0.0002631578947368421, "low_lr": 5.263157894736842e-06, "step": 1400 }, { "epoch": 3.6817882971729126, "high_lr": 0.0002631578947368421, "low_lr": 5.263157894736842e-06, "step": 1400 }, { "epoch": 3.6817882971729126, "high_lr": 0.0002631578947368421, "low_lr": 5.263157894736842e-06, "step": 1400 }, { "epoch": 3.6817882971729126, "high_lr": 0.0002631578947368421, "low_lr": 5.263157894736842e-06, "step": 1400 }, { "epoch": 3.6817882971729126, "high_lr": 0.0002631578947368421, "low_lr": 5.263157894736842e-06, "step": 1400 }, { "epoch": 3.6817882971729126, "high_lr": 0.0002631578947368421, "low_lr": 5.263157894736842e-06, "step": 1400 }, { "epoch": 3.6844181459566077, "grad_norm": 1.3238935470581055, "learning_rate": 0.00026263157894736844, "loss": 1.2649, "step": 1401 }, { "epoch": 3.6844181459566077, "high_lr": 0.00026263157894736844, "low_lr": 5.252631578947369e-06, "step": 1401 }, { "epoch": 3.6844181459566077, "high_lr": 0.00026263157894736844, "low_lr": 5.252631578947369e-06, "step": 1401 }, { "epoch": 3.6844181459566077, "high_lr": 0.00026263157894736844, "low_lr": 5.252631578947369e-06, "step": 1401 }, { "epoch": 3.6844181459566077, "high_lr": 0.00026263157894736844, "low_lr": 5.252631578947369e-06, "step": 1401 }, { "epoch": 3.6844181459566077, "high_lr": 0.00026263157894736844, "low_lr": 5.252631578947369e-06, "step": 1401 }, { "epoch": 3.6844181459566077, "high_lr": 0.00026263157894736844, "low_lr": 5.252631578947369e-06, "step": 1401 }, { "epoch": 3.6844181459566077, "high_lr": 0.00026263157894736844, "low_lr": 5.252631578947369e-06, "step": 1401 }, { "epoch": 3.6844181459566077, "high_lr": 0.00026263157894736844, "low_lr": 5.252631578947369e-06, "step": 1401 }, { "epoch": 3.6870479947403023, "grad_norm": 1.4484986066818237, "learning_rate": 0.0002621052631578947, "loss": 1.3265, "step": 1402 }, { "epoch": 3.6870479947403023, "high_lr": 0.0002621052631578947, "low_lr": 5.242105263157895e-06, "step": 1402 }, { "epoch": 3.6870479947403023, "high_lr": 0.0002621052631578947, "low_lr": 5.242105263157895e-06, "step": 1402 }, { "epoch": 3.6870479947403023, "high_lr": 0.0002621052631578947, "low_lr": 5.242105263157895e-06, "step": 1402 }, { "epoch": 3.6870479947403023, "high_lr": 0.0002621052631578947, "low_lr": 5.242105263157895e-06, "step": 1402 }, { "epoch": 3.6870479947403023, "high_lr": 0.0002621052631578947, "low_lr": 5.242105263157895e-06, "step": 1402 }, { "epoch": 3.6870479947403023, "high_lr": 0.0002621052631578947, "low_lr": 5.242105263157895e-06, "step": 1402 }, { "epoch": 3.6870479947403023, "high_lr": 0.0002621052631578947, "low_lr": 5.242105263157895e-06, "step": 1402 }, { "epoch": 3.6870479947403023, "high_lr": 0.0002621052631578947, "low_lr": 5.242105263157895e-06, "step": 1402 }, { "epoch": 3.6896778435239974, "grad_norm": 1.3380155563354492, "learning_rate": 0.00026157894736842107, "loss": 1.2926, "step": 1403 }, { "epoch": 3.6896778435239974, "high_lr": 0.00026157894736842107, "low_lr": 5.231578947368422e-06, "step": 1403 }, { "epoch": 3.6896778435239974, "high_lr": 0.00026157894736842107, "low_lr": 5.231578947368422e-06, "step": 1403 }, { "epoch": 3.6896778435239974, "high_lr": 0.00026157894736842107, "low_lr": 5.231578947368422e-06, "step": 1403 }, { "epoch": 3.6896778435239974, "high_lr": 0.00026157894736842107, "low_lr": 5.231578947368422e-06, "step": 1403 }, { "epoch": 3.6896778435239974, "high_lr": 0.00026157894736842107, "low_lr": 5.231578947368422e-06, "step": 1403 }, { "epoch": 3.6896778435239974, "high_lr": 0.00026157894736842107, "low_lr": 5.231578947368422e-06, "step": 1403 }, { "epoch": 3.6896778435239974, "high_lr": 0.00026157894736842107, "low_lr": 5.231578947368422e-06, "step": 1403 }, { "epoch": 3.6896778435239974, "high_lr": 0.00026157894736842107, "low_lr": 5.231578947368422e-06, "step": 1403 }, { "epoch": 3.6923076923076925, "grad_norm": 1.4612758159637451, "learning_rate": 0.00026105263157894735, "loss": 1.3273, "step": 1404 }, { "epoch": 3.6923076923076925, "high_lr": 0.00026105263157894735, "low_lr": 5.2210526315789475e-06, "step": 1404 }, { "epoch": 3.6923076923076925, "high_lr": 0.00026105263157894735, "low_lr": 5.2210526315789475e-06, "step": 1404 }, { "epoch": 3.6923076923076925, "high_lr": 0.00026105263157894735, "low_lr": 5.2210526315789475e-06, "step": 1404 }, { "epoch": 3.6923076923076925, "high_lr": 0.00026105263157894735, "low_lr": 5.2210526315789475e-06, "step": 1404 }, { "epoch": 3.6923076923076925, "high_lr": 0.00026105263157894735, "low_lr": 5.2210526315789475e-06, "step": 1404 }, { "epoch": 3.6923076923076925, "high_lr": 0.00026105263157894735, "low_lr": 5.2210526315789475e-06, "step": 1404 }, { "epoch": 3.6923076923076925, "high_lr": 0.00026105263157894735, "low_lr": 5.2210526315789475e-06, "step": 1404 }, { "epoch": 3.6923076923076925, "high_lr": 0.00026105263157894735, "low_lr": 5.2210526315789475e-06, "step": 1404 }, { "epoch": 3.694937541091387, "grad_norm": 1.5359078645706177, "learning_rate": 0.0002605263157894737, "loss": 1.3441, "step": 1405 }, { "epoch": 3.694937541091387, "high_lr": 0.0002605263157894737, "low_lr": 5.210526315789474e-06, "step": 1405 }, { "epoch": 3.694937541091387, "high_lr": 0.0002605263157894737, "low_lr": 5.210526315789474e-06, "step": 1405 }, { "epoch": 3.694937541091387, "high_lr": 0.0002605263157894737, "low_lr": 5.210526315789474e-06, "step": 1405 }, { "epoch": 3.694937541091387, "high_lr": 0.0002605263157894737, "low_lr": 5.210526315789474e-06, "step": 1405 }, { "epoch": 3.694937541091387, "high_lr": 0.0002605263157894737, "low_lr": 5.210526315789474e-06, "step": 1405 }, { "epoch": 3.694937541091387, "high_lr": 0.0002605263157894737, "low_lr": 5.210526315789474e-06, "step": 1405 }, { "epoch": 3.694937541091387, "high_lr": 0.0002605263157894737, "low_lr": 5.210526315789474e-06, "step": 1405 }, { "epoch": 3.694937541091387, "high_lr": 0.0002605263157894737, "low_lr": 5.210526315789474e-06, "step": 1405 }, { "epoch": 3.6975673898750823, "grad_norm": 1.486899733543396, "learning_rate": 0.00026000000000000003, "loss": 1.2808, "step": 1406 }, { "epoch": 3.6975673898750823, "high_lr": 0.00026000000000000003, "low_lr": 5.2e-06, "step": 1406 }, { "epoch": 3.6975673898750823, "high_lr": 0.00026000000000000003, "low_lr": 5.2e-06, "step": 1406 }, { "epoch": 3.6975673898750823, "high_lr": 0.00026000000000000003, "low_lr": 5.2e-06, "step": 1406 }, { "epoch": 3.6975673898750823, "high_lr": 0.00026000000000000003, "low_lr": 5.2e-06, "step": 1406 }, { "epoch": 3.6975673898750823, "high_lr": 0.00026000000000000003, "low_lr": 5.2e-06, "step": 1406 }, { "epoch": 3.6975673898750823, "high_lr": 0.00026000000000000003, "low_lr": 5.2e-06, "step": 1406 }, { "epoch": 3.6975673898750823, "high_lr": 0.00026000000000000003, "low_lr": 5.2e-06, "step": 1406 }, { "epoch": 3.6975673898750823, "high_lr": 0.00026000000000000003, "low_lr": 5.2e-06, "step": 1406 }, { "epoch": 3.7001972386587774, "grad_norm": 1.377661943435669, "learning_rate": 0.0002594736842105263, "loss": 1.2963, "step": 1407 }, { "epoch": 3.7001972386587774, "high_lr": 0.0002594736842105263, "low_lr": 5.1894736842105265e-06, "step": 1407 }, { "epoch": 3.7001972386587774, "high_lr": 0.0002594736842105263, "low_lr": 5.1894736842105265e-06, "step": 1407 }, { "epoch": 3.7001972386587774, "high_lr": 0.0002594736842105263, "low_lr": 5.1894736842105265e-06, "step": 1407 }, { "epoch": 3.7001972386587774, "high_lr": 0.0002594736842105263, "low_lr": 5.1894736842105265e-06, "step": 1407 }, { "epoch": 3.7001972386587774, "high_lr": 0.0002594736842105263, "low_lr": 5.1894736842105265e-06, "step": 1407 }, { "epoch": 3.7001972386587774, "high_lr": 0.0002594736842105263, "low_lr": 5.1894736842105265e-06, "step": 1407 }, { "epoch": 3.7001972386587774, "high_lr": 0.0002594736842105263, "low_lr": 5.1894736842105265e-06, "step": 1407 }, { "epoch": 3.7001972386587774, "high_lr": 0.0002594736842105263, "low_lr": 5.1894736842105265e-06, "step": 1407 }, { "epoch": 3.702827087442472, "grad_norm": 1.764217495918274, "learning_rate": 0.00025894736842105266, "loss": 1.3115, "step": 1408 }, { "epoch": 3.702827087442472, "high_lr": 0.00025894736842105266, "low_lr": 5.178947368421054e-06, "step": 1408 }, { "epoch": 3.702827087442472, "high_lr": 0.00025894736842105266, "low_lr": 5.178947368421054e-06, "step": 1408 }, { "epoch": 3.702827087442472, "high_lr": 0.00025894736842105266, "low_lr": 5.178947368421054e-06, "step": 1408 }, { "epoch": 3.702827087442472, "high_lr": 0.00025894736842105266, "low_lr": 5.178947368421054e-06, "step": 1408 }, { "epoch": 3.702827087442472, "high_lr": 0.00025894736842105266, "low_lr": 5.178947368421054e-06, "step": 1408 }, { "epoch": 3.702827087442472, "high_lr": 0.00025894736842105266, "low_lr": 5.178947368421054e-06, "step": 1408 }, { "epoch": 3.702827087442472, "high_lr": 0.00025894736842105266, "low_lr": 5.178947368421054e-06, "step": 1408 }, { "epoch": 3.702827087442472, "high_lr": 0.00025894736842105266, "low_lr": 5.178947368421054e-06, "step": 1408 }, { "epoch": 3.705456936226167, "grad_norm": 1.3747307062149048, "learning_rate": 0.00025842105263157894, "loss": 1.3314, "step": 1409 }, { "epoch": 3.705456936226167, "high_lr": 0.00025842105263157894, "low_lr": 5.168421052631579e-06, "step": 1409 }, { "epoch": 3.705456936226167, "high_lr": 0.00025842105263157894, "low_lr": 5.168421052631579e-06, "step": 1409 }, { "epoch": 3.705456936226167, "high_lr": 0.00025842105263157894, "low_lr": 5.168421052631579e-06, "step": 1409 }, { "epoch": 3.705456936226167, "high_lr": 0.00025842105263157894, "low_lr": 5.168421052631579e-06, "step": 1409 }, { "epoch": 3.705456936226167, "high_lr": 0.00025842105263157894, "low_lr": 5.168421052631579e-06, "step": 1409 }, { "epoch": 3.705456936226167, "high_lr": 0.00025842105263157894, "low_lr": 5.168421052631579e-06, "step": 1409 }, { "epoch": 3.705456936226167, "high_lr": 0.00025842105263157894, "low_lr": 5.168421052631579e-06, "step": 1409 }, { "epoch": 3.705456936226167, "high_lr": 0.00025842105263157894, "low_lr": 5.168421052631579e-06, "step": 1409 }, { "epoch": 3.7080867850098618, "grad_norm": 1.390458583831787, "learning_rate": 0.0002578947368421053, "loss": 1.309, "step": 1410 }, { "epoch": 3.7080867850098618, "high_lr": 0.0002578947368421053, "low_lr": 5.157894736842106e-06, "step": 1410 }, { "epoch": 3.7080867850098618, "high_lr": 0.0002578947368421053, "low_lr": 5.157894736842106e-06, "step": 1410 }, { "epoch": 3.7080867850098618, "high_lr": 0.0002578947368421053, "low_lr": 5.157894736842106e-06, "step": 1410 }, { "epoch": 3.7080867850098618, "high_lr": 0.0002578947368421053, "low_lr": 5.157894736842106e-06, "step": 1410 }, { "epoch": 3.7080867850098618, "high_lr": 0.0002578947368421053, "low_lr": 5.157894736842106e-06, "step": 1410 }, { "epoch": 3.7080867850098618, "high_lr": 0.0002578947368421053, "low_lr": 5.157894736842106e-06, "step": 1410 }, { "epoch": 3.7080867850098618, "high_lr": 0.0002578947368421053, "low_lr": 5.157894736842106e-06, "step": 1410 }, { "epoch": 3.7080867850098618, "high_lr": 0.0002578947368421053, "low_lr": 5.157894736842106e-06, "step": 1410 }, { "epoch": 3.710716633793557, "grad_norm": 1.4675899744033813, "learning_rate": 0.00025736842105263157, "loss": 1.2697, "step": 1411 }, { "epoch": 3.710716633793557, "high_lr": 0.00025736842105263157, "low_lr": 5.147368421052632e-06, "step": 1411 }, { "epoch": 3.710716633793557, "high_lr": 0.00025736842105263157, "low_lr": 5.147368421052632e-06, "step": 1411 }, { "epoch": 3.710716633793557, "high_lr": 0.00025736842105263157, "low_lr": 5.147368421052632e-06, "step": 1411 }, { "epoch": 3.710716633793557, "high_lr": 0.00025736842105263157, "low_lr": 5.147368421052632e-06, "step": 1411 }, { "epoch": 3.710716633793557, "high_lr": 0.00025736842105263157, "low_lr": 5.147368421052632e-06, "step": 1411 }, { "epoch": 3.710716633793557, "high_lr": 0.00025736842105263157, "low_lr": 5.147368421052632e-06, "step": 1411 }, { "epoch": 3.710716633793557, "high_lr": 0.00025736842105263157, "low_lr": 5.147368421052632e-06, "step": 1411 }, { "epoch": 3.710716633793557, "high_lr": 0.00025736842105263157, "low_lr": 5.147368421052632e-06, "step": 1411 }, { "epoch": 3.713346482577252, "grad_norm": 1.3478926420211792, "learning_rate": 0.00025684210526315786, "loss": 1.3522, "step": 1412 }, { "epoch": 3.713346482577252, "high_lr": 0.00025684210526315786, "low_lr": 5.136842105263158e-06, "step": 1412 }, { "epoch": 3.713346482577252, "high_lr": 0.00025684210526315786, "low_lr": 5.136842105263158e-06, "step": 1412 }, { "epoch": 3.713346482577252, "high_lr": 0.00025684210526315786, "low_lr": 5.136842105263158e-06, "step": 1412 }, { "epoch": 3.713346482577252, "high_lr": 0.00025684210526315786, "low_lr": 5.136842105263158e-06, "step": 1412 }, { "epoch": 3.713346482577252, "high_lr": 0.00025684210526315786, "low_lr": 5.136842105263158e-06, "step": 1412 }, { "epoch": 3.713346482577252, "high_lr": 0.00025684210526315786, "low_lr": 5.136842105263158e-06, "step": 1412 }, { "epoch": 3.713346482577252, "high_lr": 0.00025684210526315786, "low_lr": 5.136842105263158e-06, "step": 1412 }, { "epoch": 3.713346482577252, "high_lr": 0.00025684210526315786, "low_lr": 5.136842105263158e-06, "step": 1412 }, { "epoch": 3.7159763313609466, "grad_norm": 1.4412983655929565, "learning_rate": 0.00025631578947368425, "loss": 1.2975, "step": 1413 }, { "epoch": 3.7159763313609466, "high_lr": 0.00025631578947368425, "low_lr": 5.1263157894736845e-06, "step": 1413 }, { "epoch": 3.7159763313609466, "high_lr": 0.00025631578947368425, "low_lr": 5.1263157894736845e-06, "step": 1413 }, { "epoch": 3.7159763313609466, "high_lr": 0.00025631578947368425, "low_lr": 5.1263157894736845e-06, "step": 1413 }, { "epoch": 3.7159763313609466, "high_lr": 0.00025631578947368425, "low_lr": 5.1263157894736845e-06, "step": 1413 }, { "epoch": 3.7159763313609466, "high_lr": 0.00025631578947368425, "low_lr": 5.1263157894736845e-06, "step": 1413 }, { "epoch": 3.7159763313609466, "high_lr": 0.00025631578947368425, "low_lr": 5.1263157894736845e-06, "step": 1413 }, { "epoch": 3.7159763313609466, "high_lr": 0.00025631578947368425, "low_lr": 5.1263157894736845e-06, "step": 1413 }, { "epoch": 3.7159763313609466, "high_lr": 0.00025631578947368425, "low_lr": 5.1263157894736845e-06, "step": 1413 }, { "epoch": 3.7186061801446417, "grad_norm": 1.5651241540908813, "learning_rate": 0.00025578947368421054, "loss": 1.2681, "step": 1414 }, { "epoch": 3.7186061801446417, "high_lr": 0.00025578947368421054, "low_lr": 5.115789473684211e-06, "step": 1414 }, { "epoch": 3.7186061801446417, "high_lr": 0.00025578947368421054, "low_lr": 5.115789473684211e-06, "step": 1414 }, { "epoch": 3.7186061801446417, "high_lr": 0.00025578947368421054, "low_lr": 5.115789473684211e-06, "step": 1414 }, { "epoch": 3.7186061801446417, "high_lr": 0.00025578947368421054, "low_lr": 5.115789473684211e-06, "step": 1414 }, { "epoch": 3.7186061801446417, "high_lr": 0.00025578947368421054, "low_lr": 5.115789473684211e-06, "step": 1414 }, { "epoch": 3.7186061801446417, "high_lr": 0.00025578947368421054, "low_lr": 5.115789473684211e-06, "step": 1414 }, { "epoch": 3.7186061801446417, "high_lr": 0.00025578947368421054, "low_lr": 5.115789473684211e-06, "step": 1414 }, { "epoch": 3.7186061801446417, "high_lr": 0.00025578947368421054, "low_lr": 5.115789473684211e-06, "step": 1414 }, { "epoch": 3.7212360289283364, "grad_norm": 1.3770606517791748, "learning_rate": 0.0002552631578947369, "loss": 1.2874, "step": 1415 }, { "epoch": 3.7212360289283364, "high_lr": 0.0002552631578947369, "low_lr": 5.105263157894738e-06, "step": 1415 }, { "epoch": 3.7212360289283364, "high_lr": 0.0002552631578947369, "low_lr": 5.105263157894738e-06, "step": 1415 }, { "epoch": 3.7212360289283364, "high_lr": 0.0002552631578947369, "low_lr": 5.105263157894738e-06, "step": 1415 }, { "epoch": 3.7212360289283364, "high_lr": 0.0002552631578947369, "low_lr": 5.105263157894738e-06, "step": 1415 }, { "epoch": 3.7212360289283364, "high_lr": 0.0002552631578947369, "low_lr": 5.105263157894738e-06, "step": 1415 }, { "epoch": 3.7212360289283364, "high_lr": 0.0002552631578947369, "low_lr": 5.105263157894738e-06, "step": 1415 }, { "epoch": 3.7212360289283364, "high_lr": 0.0002552631578947369, "low_lr": 5.105263157894738e-06, "step": 1415 }, { "epoch": 3.7212360289283364, "high_lr": 0.0002552631578947369, "low_lr": 5.105263157894738e-06, "step": 1415 }, { "epoch": 3.7238658777120315, "grad_norm": 1.4465447664260864, "learning_rate": 0.00025473684210526316, "loss": 1.2934, "step": 1416 }, { "epoch": 3.7238658777120315, "high_lr": 0.00025473684210526316, "low_lr": 5.0947368421052635e-06, "step": 1416 }, { "epoch": 3.7238658777120315, "high_lr": 0.00025473684210526316, "low_lr": 5.0947368421052635e-06, "step": 1416 }, { "epoch": 3.7238658777120315, "high_lr": 0.00025473684210526316, "low_lr": 5.0947368421052635e-06, "step": 1416 }, { "epoch": 3.7238658777120315, "high_lr": 0.00025473684210526316, "low_lr": 5.0947368421052635e-06, "step": 1416 }, { "epoch": 3.7238658777120315, "high_lr": 0.00025473684210526316, "low_lr": 5.0947368421052635e-06, "step": 1416 }, { "epoch": 3.7238658777120315, "high_lr": 0.00025473684210526316, "low_lr": 5.0947368421052635e-06, "step": 1416 }, { "epoch": 3.7238658777120315, "high_lr": 0.00025473684210526316, "low_lr": 5.0947368421052635e-06, "step": 1416 }, { "epoch": 3.7238658777120315, "high_lr": 0.00025473684210526316, "low_lr": 5.0947368421052635e-06, "step": 1416 }, { "epoch": 3.7264957264957266, "grad_norm": 1.2946622371673584, "learning_rate": 0.0002542105263157895, "loss": 1.312, "step": 1417 }, { "epoch": 3.7264957264957266, "high_lr": 0.0002542105263157895, "low_lr": 5.084210526315791e-06, "step": 1417 }, { "epoch": 3.7264957264957266, "high_lr": 0.0002542105263157895, "low_lr": 5.084210526315791e-06, "step": 1417 }, { "epoch": 3.7264957264957266, "high_lr": 0.0002542105263157895, "low_lr": 5.084210526315791e-06, "step": 1417 }, { "epoch": 3.7264957264957266, "high_lr": 0.0002542105263157895, "low_lr": 5.084210526315791e-06, "step": 1417 }, { "epoch": 3.7264957264957266, "high_lr": 0.0002542105263157895, "low_lr": 5.084210526315791e-06, "step": 1417 }, { "epoch": 3.7264957264957266, "high_lr": 0.0002542105263157895, "low_lr": 5.084210526315791e-06, "step": 1417 }, { "epoch": 3.7264957264957266, "high_lr": 0.0002542105263157895, "low_lr": 5.084210526315791e-06, "step": 1417 }, { "epoch": 3.7264957264957266, "high_lr": 0.0002542105263157895, "low_lr": 5.084210526315791e-06, "step": 1417 }, { "epoch": 3.7291255752794212, "grad_norm": 1.3798704147338867, "learning_rate": 0.0002536842105263158, "loss": 1.2907, "step": 1418 }, { "epoch": 3.7291255752794212, "high_lr": 0.0002536842105263158, "low_lr": 5.073684210526316e-06, "step": 1418 }, { "epoch": 3.7291255752794212, "high_lr": 0.0002536842105263158, "low_lr": 5.073684210526316e-06, "step": 1418 }, { "epoch": 3.7291255752794212, "high_lr": 0.0002536842105263158, "low_lr": 5.073684210526316e-06, "step": 1418 }, { "epoch": 3.7291255752794212, "high_lr": 0.0002536842105263158, "low_lr": 5.073684210526316e-06, "step": 1418 }, { "epoch": 3.7291255752794212, "high_lr": 0.0002536842105263158, "low_lr": 5.073684210526316e-06, "step": 1418 }, { "epoch": 3.7291255752794212, "high_lr": 0.0002536842105263158, "low_lr": 5.073684210526316e-06, "step": 1418 }, { "epoch": 3.7291255752794212, "high_lr": 0.0002536842105263158, "low_lr": 5.073684210526316e-06, "step": 1418 }, { "epoch": 3.7291255752794212, "high_lr": 0.0002536842105263158, "low_lr": 5.073684210526316e-06, "step": 1418 }, { "epoch": 3.7317554240631163, "grad_norm": 1.5236715078353882, "learning_rate": 0.0002531578947368421, "loss": 1.2864, "step": 1419 }, { "epoch": 3.7317554240631163, "high_lr": 0.0002531578947368421, "low_lr": 5.0631578947368424e-06, "step": 1419 }, { "epoch": 3.7317554240631163, "high_lr": 0.0002531578947368421, "low_lr": 5.0631578947368424e-06, "step": 1419 }, { "epoch": 3.7317554240631163, "high_lr": 0.0002531578947368421, "low_lr": 5.0631578947368424e-06, "step": 1419 }, { "epoch": 3.7317554240631163, "high_lr": 0.0002531578947368421, "low_lr": 5.0631578947368424e-06, "step": 1419 }, { "epoch": 3.7317554240631163, "high_lr": 0.0002531578947368421, "low_lr": 5.0631578947368424e-06, "step": 1419 }, { "epoch": 3.7317554240631163, "high_lr": 0.0002531578947368421, "low_lr": 5.0631578947368424e-06, "step": 1419 }, { "epoch": 3.7317554240631163, "high_lr": 0.0002531578947368421, "low_lr": 5.0631578947368424e-06, "step": 1419 }, { "epoch": 3.7317554240631163, "high_lr": 0.0002531578947368421, "low_lr": 5.0631578947368424e-06, "step": 1419 }, { "epoch": 3.7343852728468114, "grad_norm": 1.4545153379440308, "learning_rate": 0.0002526315789473684, "loss": 1.2923, "step": 1420 }, { "epoch": 3.7343852728468114, "high_lr": 0.0002526315789473684, "low_lr": 5.052631578947369e-06, "step": 1420 }, { "epoch": 3.7343852728468114, "high_lr": 0.0002526315789473684, "low_lr": 5.052631578947369e-06, "step": 1420 }, { "epoch": 3.7343852728468114, "high_lr": 0.0002526315789473684, "low_lr": 5.052631578947369e-06, "step": 1420 }, { "epoch": 3.7343852728468114, "high_lr": 0.0002526315789473684, "low_lr": 5.052631578947369e-06, "step": 1420 }, { "epoch": 3.7343852728468114, "high_lr": 0.0002526315789473684, "low_lr": 5.052631578947369e-06, "step": 1420 }, { "epoch": 3.7343852728468114, "high_lr": 0.0002526315789473684, "low_lr": 5.052631578947369e-06, "step": 1420 }, { "epoch": 3.7343852728468114, "high_lr": 0.0002526315789473684, "low_lr": 5.052631578947369e-06, "step": 1420 }, { "epoch": 3.7343852728468114, "high_lr": 0.0002526315789473684, "low_lr": 5.052631578947369e-06, "step": 1420 }, { "epoch": 3.737015121630506, "grad_norm": 1.4548542499542236, "learning_rate": 0.00025210526315789475, "loss": 1.292, "step": 1421 }, { "epoch": 3.737015121630506, "high_lr": 0.00025210526315789475, "low_lr": 5.042105263157895e-06, "step": 1421 }, { "epoch": 3.737015121630506, "high_lr": 0.00025210526315789475, "low_lr": 5.042105263157895e-06, "step": 1421 }, { "epoch": 3.737015121630506, "high_lr": 0.00025210526315789475, "low_lr": 5.042105263157895e-06, "step": 1421 }, { "epoch": 3.737015121630506, "high_lr": 0.00025210526315789475, "low_lr": 5.042105263157895e-06, "step": 1421 }, { "epoch": 3.737015121630506, "high_lr": 0.00025210526315789475, "low_lr": 5.042105263157895e-06, "step": 1421 }, { "epoch": 3.737015121630506, "high_lr": 0.00025210526315789475, "low_lr": 5.042105263157895e-06, "step": 1421 }, { "epoch": 3.737015121630506, "high_lr": 0.00025210526315789475, "low_lr": 5.042105263157895e-06, "step": 1421 }, { "epoch": 3.737015121630506, "high_lr": 0.00025210526315789475, "low_lr": 5.042105263157895e-06, "step": 1421 }, { "epoch": 3.739644970414201, "grad_norm": 1.5131704807281494, "learning_rate": 0.0002515789473684211, "loss": 1.295, "step": 1422 }, { "epoch": 3.739644970414201, "high_lr": 0.0002515789473684211, "low_lr": 5.0315789473684214e-06, "step": 1422 }, { "epoch": 3.739644970414201, "high_lr": 0.0002515789473684211, "low_lr": 5.0315789473684214e-06, "step": 1422 }, { "epoch": 3.739644970414201, "high_lr": 0.0002515789473684211, "low_lr": 5.0315789473684214e-06, "step": 1422 }, { "epoch": 3.739644970414201, "high_lr": 0.0002515789473684211, "low_lr": 5.0315789473684214e-06, "step": 1422 }, { "epoch": 3.739644970414201, "high_lr": 0.0002515789473684211, "low_lr": 5.0315789473684214e-06, "step": 1422 }, { "epoch": 3.739644970414201, "high_lr": 0.0002515789473684211, "low_lr": 5.0315789473684214e-06, "step": 1422 }, { "epoch": 3.739644970414201, "high_lr": 0.0002515789473684211, "low_lr": 5.0315789473684214e-06, "step": 1422 }, { "epoch": 3.739644970414201, "high_lr": 0.0002515789473684211, "low_lr": 5.0315789473684214e-06, "step": 1422 }, { "epoch": 3.7422748191978963, "grad_norm": 1.3713613748550415, "learning_rate": 0.0002510526315789474, "loss": 1.2988, "step": 1423 }, { "epoch": 3.7422748191978963, "high_lr": 0.0002510526315789474, "low_lr": 5.021052631578948e-06, "step": 1423 }, { "epoch": 3.7422748191978963, "high_lr": 0.0002510526315789474, "low_lr": 5.021052631578948e-06, "step": 1423 }, { "epoch": 3.7422748191978963, "high_lr": 0.0002510526315789474, "low_lr": 5.021052631578948e-06, "step": 1423 }, { "epoch": 3.7422748191978963, "high_lr": 0.0002510526315789474, "low_lr": 5.021052631578948e-06, "step": 1423 }, { "epoch": 3.7422748191978963, "high_lr": 0.0002510526315789474, "low_lr": 5.021052631578948e-06, "step": 1423 }, { "epoch": 3.7422748191978963, "high_lr": 0.0002510526315789474, "low_lr": 5.021052631578948e-06, "step": 1423 }, { "epoch": 3.7422748191978963, "high_lr": 0.0002510526315789474, "low_lr": 5.021052631578948e-06, "step": 1423 }, { "epoch": 3.7422748191978963, "high_lr": 0.0002510526315789474, "low_lr": 5.021052631578948e-06, "step": 1423 }, { "epoch": 3.744904667981591, "grad_norm": 1.3472787141799927, "learning_rate": 0.0002505263157894737, "loss": 1.3521, "step": 1424 }, { "epoch": 3.744904667981591, "high_lr": 0.0002505263157894737, "low_lr": 5.010526315789475e-06, "step": 1424 }, { "epoch": 3.744904667981591, "high_lr": 0.0002505263157894737, "low_lr": 5.010526315789475e-06, "step": 1424 }, { "epoch": 3.744904667981591, "high_lr": 0.0002505263157894737, "low_lr": 5.010526315789475e-06, "step": 1424 }, { "epoch": 3.744904667981591, "high_lr": 0.0002505263157894737, "low_lr": 5.010526315789475e-06, "step": 1424 }, { "epoch": 3.744904667981591, "high_lr": 0.0002505263157894737, "low_lr": 5.010526315789475e-06, "step": 1424 }, { "epoch": 3.744904667981591, "high_lr": 0.0002505263157894737, "low_lr": 5.010526315789475e-06, "step": 1424 }, { "epoch": 3.744904667981591, "high_lr": 0.0002505263157894737, "low_lr": 5.010526315789475e-06, "step": 1424 }, { "epoch": 3.744904667981591, "high_lr": 0.0002505263157894737, "low_lr": 5.010526315789475e-06, "step": 1424 }, { "epoch": 3.747534516765286, "grad_norm": 1.3773797750473022, "learning_rate": 0.00025, "loss": 1.3132, "step": 1425 }, { "epoch": 3.747534516765286, "high_lr": 0.00025, "low_lr": 5e-06, "step": 1425 }, { "epoch": 3.747534516765286, "high_lr": 0.00025, "low_lr": 5e-06, "step": 1425 }, { "epoch": 3.747534516765286, "high_lr": 0.00025, "low_lr": 5e-06, "step": 1425 }, { "epoch": 3.747534516765286, "high_lr": 0.00025, "low_lr": 5e-06, "step": 1425 }, { "epoch": 3.747534516765286, "high_lr": 0.00025, "low_lr": 5e-06, "step": 1425 }, { "epoch": 3.747534516765286, "high_lr": 0.00025, "low_lr": 5e-06, "step": 1425 }, { "epoch": 3.747534516765286, "high_lr": 0.00025, "low_lr": 5e-06, "step": 1425 }, { "epoch": 3.747534516765286, "high_lr": 0.00025, "low_lr": 5e-06, "step": 1425 }, { "epoch": 3.750164365548981, "grad_norm": 1.5149577856063843, "learning_rate": 0.00024947368421052635, "loss": 1.3398, "step": 1426 }, { "epoch": 3.750164365548981, "high_lr": 0.00024947368421052635, "low_lr": 4.989473684210527e-06, "step": 1426 }, { "epoch": 3.750164365548981, "high_lr": 0.00024947368421052635, "low_lr": 4.989473684210527e-06, "step": 1426 }, { "epoch": 3.750164365548981, "high_lr": 0.00024947368421052635, "low_lr": 4.989473684210527e-06, "step": 1426 }, { "epoch": 3.750164365548981, "high_lr": 0.00024947368421052635, "low_lr": 4.989473684210527e-06, "step": 1426 }, { "epoch": 3.750164365548981, "high_lr": 0.00024947368421052635, "low_lr": 4.989473684210527e-06, "step": 1426 }, { "epoch": 3.750164365548981, "high_lr": 0.00024947368421052635, "low_lr": 4.989473684210527e-06, "step": 1426 }, { "epoch": 3.750164365548981, "high_lr": 0.00024947368421052635, "low_lr": 4.989473684210527e-06, "step": 1426 }, { "epoch": 3.750164365548981, "high_lr": 0.00024947368421052635, "low_lr": 4.989473684210527e-06, "step": 1426 }, { "epoch": 3.752794214332676, "grad_norm": 1.5774260759353638, "learning_rate": 0.00024894736842105263, "loss": 1.2966, "step": 1427 }, { "epoch": 3.752794214332676, "high_lr": 0.00024894736842105263, "low_lr": 4.978947368421053e-06, "step": 1427 }, { "epoch": 3.752794214332676, "high_lr": 0.00024894736842105263, "low_lr": 4.978947368421053e-06, "step": 1427 }, { "epoch": 3.752794214332676, "high_lr": 0.00024894736842105263, "low_lr": 4.978947368421053e-06, "step": 1427 }, { "epoch": 3.752794214332676, "high_lr": 0.00024894736842105263, "low_lr": 4.978947368421053e-06, "step": 1427 }, { "epoch": 3.752794214332676, "high_lr": 0.00024894736842105263, "low_lr": 4.978947368421053e-06, "step": 1427 }, { "epoch": 3.752794214332676, "high_lr": 0.00024894736842105263, "low_lr": 4.978947368421053e-06, "step": 1427 }, { "epoch": 3.752794214332676, "high_lr": 0.00024894736842105263, "low_lr": 4.978947368421053e-06, "step": 1427 }, { "epoch": 3.752794214332676, "high_lr": 0.00024894736842105263, "low_lr": 4.978947368421053e-06, "step": 1427 }, { "epoch": 3.755424063116371, "grad_norm": 1.3350043296813965, "learning_rate": 0.00024842105263157897, "loss": 1.2871, "step": 1428 }, { "epoch": 3.755424063116371, "high_lr": 0.00024842105263157897, "low_lr": 4.968421052631579e-06, "step": 1428 }, { "epoch": 3.755424063116371, "high_lr": 0.00024842105263157897, "low_lr": 4.968421052631579e-06, "step": 1428 }, { "epoch": 3.755424063116371, "high_lr": 0.00024842105263157897, "low_lr": 4.968421052631579e-06, "step": 1428 }, { "epoch": 3.755424063116371, "high_lr": 0.00024842105263157897, "low_lr": 4.968421052631579e-06, "step": 1428 }, { "epoch": 3.755424063116371, "high_lr": 0.00024842105263157897, "low_lr": 4.968421052631579e-06, "step": 1428 }, { "epoch": 3.755424063116371, "high_lr": 0.00024842105263157897, "low_lr": 4.968421052631579e-06, "step": 1428 }, { "epoch": 3.755424063116371, "high_lr": 0.00024842105263157897, "low_lr": 4.968421052631579e-06, "step": 1428 }, { "epoch": 3.755424063116371, "high_lr": 0.00024842105263157897, "low_lr": 4.968421052631579e-06, "step": 1428 }, { "epoch": 3.758053911900066, "grad_norm": 1.4210309982299805, "learning_rate": 0.00024789473684210526, "loss": 1.2749, "step": 1429 }, { "epoch": 3.758053911900066, "high_lr": 0.00024789473684210526, "low_lr": 4.957894736842106e-06, "step": 1429 }, { "epoch": 3.758053911900066, "high_lr": 0.00024789473684210526, "low_lr": 4.957894736842106e-06, "step": 1429 }, { "epoch": 3.758053911900066, "high_lr": 0.00024789473684210526, "low_lr": 4.957894736842106e-06, "step": 1429 }, { "epoch": 3.758053911900066, "high_lr": 0.00024789473684210526, "low_lr": 4.957894736842106e-06, "step": 1429 }, { "epoch": 3.758053911900066, "high_lr": 0.00024789473684210526, "low_lr": 4.957894736842106e-06, "step": 1429 }, { "epoch": 3.758053911900066, "high_lr": 0.00024789473684210526, "low_lr": 4.957894736842106e-06, "step": 1429 }, { "epoch": 3.758053911900066, "high_lr": 0.00024789473684210526, "low_lr": 4.957894736842106e-06, "step": 1429 }, { "epoch": 3.758053911900066, "high_lr": 0.00024789473684210526, "low_lr": 4.957894736842106e-06, "step": 1429 }, { "epoch": 3.7606837606837606, "grad_norm": 1.4051158428192139, "learning_rate": 0.0002473684210526316, "loss": 1.299, "step": 1430 }, { "epoch": 3.7606837606837606, "high_lr": 0.0002473684210526316, "low_lr": 4.947368421052632e-06, "step": 1430 }, { "epoch": 3.7606837606837606, "high_lr": 0.0002473684210526316, "low_lr": 4.947368421052632e-06, "step": 1430 }, { "epoch": 3.7606837606837606, "high_lr": 0.0002473684210526316, "low_lr": 4.947368421052632e-06, "step": 1430 }, { "epoch": 3.7606837606837606, "high_lr": 0.0002473684210526316, "low_lr": 4.947368421052632e-06, "step": 1430 }, { "epoch": 3.7606837606837606, "high_lr": 0.0002473684210526316, "low_lr": 4.947368421052632e-06, "step": 1430 }, { "epoch": 3.7606837606837606, "high_lr": 0.0002473684210526316, "low_lr": 4.947368421052632e-06, "step": 1430 }, { "epoch": 3.7606837606837606, "high_lr": 0.0002473684210526316, "low_lr": 4.947368421052632e-06, "step": 1430 }, { "epoch": 3.7606837606837606, "high_lr": 0.0002473684210526316, "low_lr": 4.947368421052632e-06, "step": 1430 }, { "epoch": 3.7633136094674557, "grad_norm": 1.4473483562469482, "learning_rate": 0.0002468421052631579, "loss": 1.3305, "step": 1431 }, { "epoch": 3.7633136094674557, "high_lr": 0.0002468421052631579, "low_lr": 4.936842105263158e-06, "step": 1431 }, { "epoch": 3.7633136094674557, "high_lr": 0.0002468421052631579, "low_lr": 4.936842105263158e-06, "step": 1431 }, { "epoch": 3.7633136094674557, "high_lr": 0.0002468421052631579, "low_lr": 4.936842105263158e-06, "step": 1431 }, { "epoch": 3.7633136094674557, "high_lr": 0.0002468421052631579, "low_lr": 4.936842105263158e-06, "step": 1431 }, { "epoch": 3.7633136094674557, "high_lr": 0.0002468421052631579, "low_lr": 4.936842105263158e-06, "step": 1431 }, { "epoch": 3.7633136094674557, "high_lr": 0.0002468421052631579, "low_lr": 4.936842105263158e-06, "step": 1431 }, { "epoch": 3.7633136094674557, "high_lr": 0.0002468421052631579, "low_lr": 4.936842105263158e-06, "step": 1431 }, { "epoch": 3.7633136094674557, "high_lr": 0.0002468421052631579, "low_lr": 4.936842105263158e-06, "step": 1431 }, { "epoch": 3.7659434582511504, "grad_norm": 1.4937715530395508, "learning_rate": 0.0002463157894736842, "loss": 1.2965, "step": 1432 }, { "epoch": 3.7659434582511504, "high_lr": 0.0002463157894736842, "low_lr": 4.926315789473685e-06, "step": 1432 }, { "epoch": 3.7659434582511504, "high_lr": 0.0002463157894736842, "low_lr": 4.926315789473685e-06, "step": 1432 }, { "epoch": 3.7659434582511504, "high_lr": 0.0002463157894736842, "low_lr": 4.926315789473685e-06, "step": 1432 }, { "epoch": 3.7659434582511504, "high_lr": 0.0002463157894736842, "low_lr": 4.926315789473685e-06, "step": 1432 }, { "epoch": 3.7659434582511504, "high_lr": 0.0002463157894736842, "low_lr": 4.926315789473685e-06, "step": 1432 }, { "epoch": 3.7659434582511504, "high_lr": 0.0002463157894736842, "low_lr": 4.926315789473685e-06, "step": 1432 }, { "epoch": 3.7659434582511504, "high_lr": 0.0002463157894736842, "low_lr": 4.926315789473685e-06, "step": 1432 }, { "epoch": 3.7659434582511504, "high_lr": 0.0002463157894736842, "low_lr": 4.926315789473685e-06, "step": 1432 }, { "epoch": 3.7685733070348455, "grad_norm": 1.414452314376831, "learning_rate": 0.0002457894736842105, "loss": 1.3198, "step": 1433 }, { "epoch": 3.7685733070348455, "high_lr": 0.0002457894736842105, "low_lr": 4.915789473684211e-06, "step": 1433 }, { "epoch": 3.7685733070348455, "high_lr": 0.0002457894736842105, "low_lr": 4.915789473684211e-06, "step": 1433 }, { "epoch": 3.7685733070348455, "high_lr": 0.0002457894736842105, "low_lr": 4.915789473684211e-06, "step": 1433 }, { "epoch": 3.7685733070348455, "high_lr": 0.0002457894736842105, "low_lr": 4.915789473684211e-06, "step": 1433 }, { "epoch": 3.7685733070348455, "high_lr": 0.0002457894736842105, "low_lr": 4.915789473684211e-06, "step": 1433 }, { "epoch": 3.7685733070348455, "high_lr": 0.0002457894736842105, "low_lr": 4.915789473684211e-06, "step": 1433 }, { "epoch": 3.7685733070348455, "high_lr": 0.0002457894736842105, "low_lr": 4.915789473684211e-06, "step": 1433 }, { "epoch": 3.7685733070348455, "high_lr": 0.0002457894736842105, "low_lr": 4.915789473684211e-06, "step": 1433 }, { "epoch": 3.7712031558185406, "grad_norm": 1.4509018659591675, "learning_rate": 0.00024526315789473685, "loss": 1.3448, "step": 1434 }, { "epoch": 3.7712031558185406, "high_lr": 0.00024526315789473685, "low_lr": 4.905263157894737e-06, "step": 1434 }, { "epoch": 3.7712031558185406, "high_lr": 0.00024526315789473685, "low_lr": 4.905263157894737e-06, "step": 1434 }, { "epoch": 3.7712031558185406, "high_lr": 0.00024526315789473685, "low_lr": 4.905263157894737e-06, "step": 1434 }, { "epoch": 3.7712031558185406, "high_lr": 0.00024526315789473685, "low_lr": 4.905263157894737e-06, "step": 1434 }, { "epoch": 3.7712031558185406, "high_lr": 0.00024526315789473685, "low_lr": 4.905263157894737e-06, "step": 1434 }, { "epoch": 3.7712031558185406, "high_lr": 0.00024526315789473685, "low_lr": 4.905263157894737e-06, "step": 1434 }, { "epoch": 3.7712031558185406, "high_lr": 0.00024526315789473685, "low_lr": 4.905263157894737e-06, "step": 1434 }, { "epoch": 3.7712031558185406, "high_lr": 0.00024526315789473685, "low_lr": 4.905263157894737e-06, "step": 1434 }, { "epoch": 3.7738330046022353, "grad_norm": 1.3394984006881714, "learning_rate": 0.0002447368421052632, "loss": 1.3366, "step": 1435 }, { "epoch": 3.7738330046022353, "high_lr": 0.0002447368421052632, "low_lr": 4.894736842105264e-06, "step": 1435 }, { "epoch": 3.7738330046022353, "high_lr": 0.0002447368421052632, "low_lr": 4.894736842105264e-06, "step": 1435 }, { "epoch": 3.7738330046022353, "high_lr": 0.0002447368421052632, "low_lr": 4.894736842105264e-06, "step": 1435 }, { "epoch": 3.7738330046022353, "high_lr": 0.0002447368421052632, "low_lr": 4.894736842105264e-06, "step": 1435 }, { "epoch": 3.7738330046022353, "high_lr": 0.0002447368421052632, "low_lr": 4.894736842105264e-06, "step": 1435 }, { "epoch": 3.7738330046022353, "high_lr": 0.0002447368421052632, "low_lr": 4.894736842105264e-06, "step": 1435 }, { "epoch": 3.7738330046022353, "high_lr": 0.0002447368421052632, "low_lr": 4.894736842105264e-06, "step": 1435 }, { "epoch": 3.7738330046022353, "high_lr": 0.0002447368421052632, "low_lr": 4.894736842105264e-06, "step": 1435 }, { "epoch": 3.7764628533859304, "grad_norm": 1.3452506065368652, "learning_rate": 0.0002442105263157895, "loss": 1.2546, "step": 1436 }, { "epoch": 3.7764628533859304, "high_lr": 0.0002442105263157895, "low_lr": 4.88421052631579e-06, "step": 1436 }, { "epoch": 3.7764628533859304, "high_lr": 0.0002442105263157895, "low_lr": 4.88421052631579e-06, "step": 1436 }, { "epoch": 3.7764628533859304, "high_lr": 0.0002442105263157895, "low_lr": 4.88421052631579e-06, "step": 1436 }, { "epoch": 3.7764628533859304, "high_lr": 0.0002442105263157895, "low_lr": 4.88421052631579e-06, "step": 1436 }, { "epoch": 3.7764628533859304, "high_lr": 0.0002442105263157895, "low_lr": 4.88421052631579e-06, "step": 1436 }, { "epoch": 3.7764628533859304, "high_lr": 0.0002442105263157895, "low_lr": 4.88421052631579e-06, "step": 1436 }, { "epoch": 3.7764628533859304, "high_lr": 0.0002442105263157895, "low_lr": 4.88421052631579e-06, "step": 1436 }, { "epoch": 3.7764628533859304, "high_lr": 0.0002442105263157895, "low_lr": 4.88421052631579e-06, "step": 1436 }, { "epoch": 3.779092702169625, "grad_norm": 1.4009126424789429, "learning_rate": 0.0002436842105263158, "loss": 1.3374, "step": 1437 }, { "epoch": 3.779092702169625, "high_lr": 0.0002436842105263158, "low_lr": 4.873684210526316e-06, "step": 1437 }, { "epoch": 3.779092702169625, "high_lr": 0.0002436842105263158, "low_lr": 4.873684210526316e-06, "step": 1437 }, { "epoch": 3.779092702169625, "high_lr": 0.0002436842105263158, "low_lr": 4.873684210526316e-06, "step": 1437 }, { "epoch": 3.779092702169625, "high_lr": 0.0002436842105263158, "low_lr": 4.873684210526316e-06, "step": 1437 }, { "epoch": 3.779092702169625, "high_lr": 0.0002436842105263158, "low_lr": 4.873684210526316e-06, "step": 1437 }, { "epoch": 3.779092702169625, "high_lr": 0.0002436842105263158, "low_lr": 4.873684210526316e-06, "step": 1437 }, { "epoch": 3.779092702169625, "high_lr": 0.0002436842105263158, "low_lr": 4.873684210526316e-06, "step": 1437 }, { "epoch": 3.779092702169625, "high_lr": 0.0002436842105263158, "low_lr": 4.873684210526316e-06, "step": 1437 }, { "epoch": 3.78172255095332, "grad_norm": 1.3517147302627563, "learning_rate": 0.0002431578947368421, "loss": 1.3329, "step": 1438 }, { "epoch": 3.78172255095332, "high_lr": 0.0002431578947368421, "low_lr": 4.863157894736843e-06, "step": 1438 }, { "epoch": 3.78172255095332, "high_lr": 0.0002431578947368421, "low_lr": 4.863157894736843e-06, "step": 1438 }, { "epoch": 3.78172255095332, "high_lr": 0.0002431578947368421, "low_lr": 4.863157894736843e-06, "step": 1438 }, { "epoch": 3.78172255095332, "high_lr": 0.0002431578947368421, "low_lr": 4.863157894736843e-06, "step": 1438 }, { "epoch": 3.78172255095332, "high_lr": 0.0002431578947368421, "low_lr": 4.863157894736843e-06, "step": 1438 }, { "epoch": 3.78172255095332, "high_lr": 0.0002431578947368421, "low_lr": 4.863157894736843e-06, "step": 1438 }, { "epoch": 3.78172255095332, "high_lr": 0.0002431578947368421, "low_lr": 4.863157894736843e-06, "step": 1438 }, { "epoch": 3.78172255095332, "high_lr": 0.0002431578947368421, "low_lr": 4.863157894736843e-06, "step": 1438 }, { "epoch": 3.784352399737015, "grad_norm": 1.3775666952133179, "learning_rate": 0.00024263157894736841, "loss": 1.2801, "step": 1439 }, { "epoch": 3.784352399737015, "high_lr": 0.00024263157894736841, "low_lr": 4.852631578947369e-06, "step": 1439 }, { "epoch": 3.784352399737015, "high_lr": 0.00024263157894736841, "low_lr": 4.852631578947369e-06, "step": 1439 }, { "epoch": 3.784352399737015, "high_lr": 0.00024263157894736841, "low_lr": 4.852631578947369e-06, "step": 1439 }, { "epoch": 3.784352399737015, "high_lr": 0.00024263157894736841, "low_lr": 4.852631578947369e-06, "step": 1439 }, { "epoch": 3.784352399737015, "high_lr": 0.00024263157894736841, "low_lr": 4.852631578947369e-06, "step": 1439 }, { "epoch": 3.784352399737015, "high_lr": 0.00024263157894736841, "low_lr": 4.852631578947369e-06, "step": 1439 }, { "epoch": 3.784352399737015, "high_lr": 0.00024263157894736841, "low_lr": 4.852631578947369e-06, "step": 1439 }, { "epoch": 3.784352399737015, "high_lr": 0.00024263157894736841, "low_lr": 4.852631578947369e-06, "step": 1439 }, { "epoch": 3.78698224852071, "grad_norm": 1.4380093812942505, "learning_rate": 0.00024210526315789475, "loss": 1.2578, "step": 1440 }, { "epoch": 3.78698224852071, "high_lr": 0.00024210526315789475, "low_lr": 4.842105263157895e-06, "step": 1440 }, { "epoch": 3.78698224852071, "high_lr": 0.00024210526315789475, "low_lr": 4.842105263157895e-06, "step": 1440 }, { "epoch": 3.78698224852071, "high_lr": 0.00024210526315789475, "low_lr": 4.842105263157895e-06, "step": 1440 }, { "epoch": 3.78698224852071, "high_lr": 0.00024210526315789475, "low_lr": 4.842105263157895e-06, "step": 1440 }, { "epoch": 3.78698224852071, "high_lr": 0.00024210526315789475, "low_lr": 4.842105263157895e-06, "step": 1440 }, { "epoch": 3.78698224852071, "high_lr": 0.00024210526315789475, "low_lr": 4.842105263157895e-06, "step": 1440 }, { "epoch": 3.78698224852071, "high_lr": 0.00024210526315789475, "low_lr": 4.842105263157895e-06, "step": 1440 }, { "epoch": 3.78698224852071, "high_lr": 0.00024210526315789475, "low_lr": 4.842105263157895e-06, "step": 1440 }, { "epoch": 3.789612097304405, "grad_norm": 1.425616979598999, "learning_rate": 0.00024157894736842107, "loss": 1.3009, "step": 1441 }, { "epoch": 3.789612097304405, "high_lr": 0.00024157894736842107, "low_lr": 4.831578947368422e-06, "step": 1441 }, { "epoch": 3.789612097304405, "high_lr": 0.00024157894736842107, "low_lr": 4.831578947368422e-06, "step": 1441 }, { "epoch": 3.789612097304405, "high_lr": 0.00024157894736842107, "low_lr": 4.831578947368422e-06, "step": 1441 }, { "epoch": 3.789612097304405, "high_lr": 0.00024157894736842107, "low_lr": 4.831578947368422e-06, "step": 1441 }, { "epoch": 3.789612097304405, "high_lr": 0.00024157894736842107, "low_lr": 4.831578947368422e-06, "step": 1441 }, { "epoch": 3.789612097304405, "high_lr": 0.00024157894736842107, "low_lr": 4.831578947368422e-06, "step": 1441 }, { "epoch": 3.789612097304405, "high_lr": 0.00024157894736842107, "low_lr": 4.831578947368422e-06, "step": 1441 }, { "epoch": 3.789612097304405, "high_lr": 0.00024157894736842107, "low_lr": 4.831578947368422e-06, "step": 1441 }, { "epoch": 3.7922419460881, "grad_norm": 1.315299391746521, "learning_rate": 0.00024105263157894738, "loss": 1.2655, "step": 1442 }, { "epoch": 3.7922419460881, "high_lr": 0.00024105263157894738, "low_lr": 4.821052631578948e-06, "step": 1442 }, { "epoch": 3.7922419460881, "high_lr": 0.00024105263157894738, "low_lr": 4.821052631578948e-06, "step": 1442 }, { "epoch": 3.7922419460881, "high_lr": 0.00024105263157894738, "low_lr": 4.821052631578948e-06, "step": 1442 }, { "epoch": 3.7922419460881, "high_lr": 0.00024105263157894738, "low_lr": 4.821052631578948e-06, "step": 1442 }, { "epoch": 3.7922419460881, "high_lr": 0.00024105263157894738, "low_lr": 4.821052631578948e-06, "step": 1442 }, { "epoch": 3.7922419460881, "high_lr": 0.00024105263157894738, "low_lr": 4.821052631578948e-06, "step": 1442 }, { "epoch": 3.7922419460881, "high_lr": 0.00024105263157894738, "low_lr": 4.821052631578948e-06, "step": 1442 }, { "epoch": 3.7922419460881, "high_lr": 0.00024105263157894738, "low_lr": 4.821052631578948e-06, "step": 1442 }, { "epoch": 3.7948717948717947, "grad_norm": 1.506906509399414, "learning_rate": 0.00024052631578947367, "loss": 1.2971, "step": 1443 }, { "epoch": 3.7948717948717947, "high_lr": 0.00024052631578947367, "low_lr": 4.8105263157894735e-06, "step": 1443 }, { "epoch": 3.7948717948717947, "high_lr": 0.00024052631578947367, "low_lr": 4.8105263157894735e-06, "step": 1443 }, { "epoch": 3.7948717948717947, "high_lr": 0.00024052631578947367, "low_lr": 4.8105263157894735e-06, "step": 1443 }, { "epoch": 3.7948717948717947, "high_lr": 0.00024052631578947367, "low_lr": 4.8105263157894735e-06, "step": 1443 }, { "epoch": 3.7948717948717947, "high_lr": 0.00024052631578947367, "low_lr": 4.8105263157894735e-06, "step": 1443 }, { "epoch": 3.7948717948717947, "high_lr": 0.00024052631578947367, "low_lr": 4.8105263157894735e-06, "step": 1443 }, { "epoch": 3.7948717948717947, "high_lr": 0.00024052631578947367, "low_lr": 4.8105263157894735e-06, "step": 1443 }, { "epoch": 3.7948717948717947, "high_lr": 0.00024052631578947367, "low_lr": 4.8105263157894735e-06, "step": 1443 }, { "epoch": 3.79750164365549, "grad_norm": 1.4422025680541992, "learning_rate": 0.00024, "loss": 1.3258, "step": 1444 }, { "epoch": 3.79750164365549, "high_lr": 0.00024, "low_lr": 4.800000000000001e-06, "step": 1444 }, { "epoch": 3.79750164365549, "high_lr": 0.00024, "low_lr": 4.800000000000001e-06, "step": 1444 }, { "epoch": 3.79750164365549, "high_lr": 0.00024, "low_lr": 4.800000000000001e-06, "step": 1444 }, { "epoch": 3.79750164365549, "high_lr": 0.00024, "low_lr": 4.800000000000001e-06, "step": 1444 }, { "epoch": 3.79750164365549, "high_lr": 0.00024, "low_lr": 4.800000000000001e-06, "step": 1444 }, { "epoch": 3.79750164365549, "high_lr": 0.00024, "low_lr": 4.800000000000001e-06, "step": 1444 }, { "epoch": 3.79750164365549, "high_lr": 0.00024, "low_lr": 4.800000000000001e-06, "step": 1444 }, { "epoch": 3.79750164365549, "high_lr": 0.00024, "low_lr": 4.800000000000001e-06, "step": 1444 }, { "epoch": 3.800131492439185, "grad_norm": 1.4381208419799805, "learning_rate": 0.00023947368421052632, "loss": 1.3295, "step": 1445 }, { "epoch": 3.800131492439185, "high_lr": 0.00023947368421052632, "low_lr": 4.789473684210527e-06, "step": 1445 }, { "epoch": 3.800131492439185, "high_lr": 0.00023947368421052632, "low_lr": 4.789473684210527e-06, "step": 1445 }, { "epoch": 3.800131492439185, "high_lr": 0.00023947368421052632, "low_lr": 4.789473684210527e-06, "step": 1445 }, { "epoch": 3.800131492439185, "high_lr": 0.00023947368421052632, "low_lr": 4.789473684210527e-06, "step": 1445 }, { "epoch": 3.800131492439185, "high_lr": 0.00023947368421052632, "low_lr": 4.789473684210527e-06, "step": 1445 }, { "epoch": 3.800131492439185, "high_lr": 0.00023947368421052632, "low_lr": 4.789473684210527e-06, "step": 1445 }, { "epoch": 3.800131492439185, "high_lr": 0.00023947368421052632, "low_lr": 4.789473684210527e-06, "step": 1445 }, { "epoch": 3.800131492439185, "high_lr": 0.00023947368421052632, "low_lr": 4.789473684210527e-06, "step": 1445 }, { "epoch": 3.8027613412228796, "grad_norm": 1.4451509714126587, "learning_rate": 0.00023894736842105263, "loss": 1.2901, "step": 1446 }, { "epoch": 3.8027613412228796, "high_lr": 0.00023894736842105263, "low_lr": 4.778947368421053e-06, "step": 1446 }, { "epoch": 3.8027613412228796, "high_lr": 0.00023894736842105263, "low_lr": 4.778947368421053e-06, "step": 1446 }, { "epoch": 3.8027613412228796, "high_lr": 0.00023894736842105263, "low_lr": 4.778947368421053e-06, "step": 1446 }, { "epoch": 3.8027613412228796, "high_lr": 0.00023894736842105263, "low_lr": 4.778947368421053e-06, "step": 1446 }, { "epoch": 3.8027613412228796, "high_lr": 0.00023894736842105263, "low_lr": 4.778947368421053e-06, "step": 1446 }, { "epoch": 3.8027613412228796, "high_lr": 0.00023894736842105263, "low_lr": 4.778947368421053e-06, "step": 1446 }, { "epoch": 3.8027613412228796, "high_lr": 0.00023894736842105263, "low_lr": 4.778947368421053e-06, "step": 1446 }, { "epoch": 3.8027613412228796, "high_lr": 0.00023894736842105263, "low_lr": 4.778947368421053e-06, "step": 1446 }, { "epoch": 3.8053911900065747, "grad_norm": 1.4591439962387085, "learning_rate": 0.00023842105263157895, "loss": 1.2576, "step": 1447 }, { "epoch": 3.8053911900065747, "high_lr": 0.00023842105263157895, "low_lr": 4.76842105263158e-06, "step": 1447 }, { "epoch": 3.8053911900065747, "high_lr": 0.00023842105263157895, "low_lr": 4.76842105263158e-06, "step": 1447 }, { "epoch": 3.8053911900065747, "high_lr": 0.00023842105263157895, "low_lr": 4.76842105263158e-06, "step": 1447 }, { "epoch": 3.8053911900065747, "high_lr": 0.00023842105263157895, "low_lr": 4.76842105263158e-06, "step": 1447 }, { "epoch": 3.8053911900065747, "high_lr": 0.00023842105263157895, "low_lr": 4.76842105263158e-06, "step": 1447 }, { "epoch": 3.8053911900065747, "high_lr": 0.00023842105263157895, "low_lr": 4.76842105263158e-06, "step": 1447 }, { "epoch": 3.8053911900065747, "high_lr": 0.00023842105263157895, "low_lr": 4.76842105263158e-06, "step": 1447 }, { "epoch": 3.8053911900065747, "high_lr": 0.00023842105263157895, "low_lr": 4.76842105263158e-06, "step": 1447 }, { "epoch": 3.8080210387902698, "grad_norm": 1.5963643789291382, "learning_rate": 0.00023789473684210529, "loss": 1.3422, "step": 1448 }, { "epoch": 3.8080210387902698, "high_lr": 0.00023789473684210529, "low_lr": 4.757894736842106e-06, "step": 1448 }, { "epoch": 3.8080210387902698, "high_lr": 0.00023789473684210529, "low_lr": 4.757894736842106e-06, "step": 1448 }, { "epoch": 3.8080210387902698, "high_lr": 0.00023789473684210529, "low_lr": 4.757894736842106e-06, "step": 1448 }, { "epoch": 3.8080210387902698, "high_lr": 0.00023789473684210529, "low_lr": 4.757894736842106e-06, "step": 1448 }, { "epoch": 3.8080210387902698, "high_lr": 0.00023789473684210529, "low_lr": 4.757894736842106e-06, "step": 1448 }, { "epoch": 3.8080210387902698, "high_lr": 0.00023789473684210529, "low_lr": 4.757894736842106e-06, "step": 1448 }, { "epoch": 3.8080210387902698, "high_lr": 0.00023789473684210529, "low_lr": 4.757894736842106e-06, "step": 1448 }, { "epoch": 3.8080210387902698, "high_lr": 0.00023789473684210529, "low_lr": 4.757894736842106e-06, "step": 1448 }, { "epoch": 3.8106508875739644, "grad_norm": 1.3172365427017212, "learning_rate": 0.0002373684210526316, "loss": 1.2781, "step": 1449 }, { "epoch": 3.8106508875739644, "high_lr": 0.0002373684210526316, "low_lr": 4.747368421052632e-06, "step": 1449 }, { "epoch": 3.8106508875739644, "high_lr": 0.0002373684210526316, "low_lr": 4.747368421052632e-06, "step": 1449 }, { "epoch": 3.8106508875739644, "high_lr": 0.0002373684210526316, "low_lr": 4.747368421052632e-06, "step": 1449 }, { "epoch": 3.8106508875739644, "high_lr": 0.0002373684210526316, "low_lr": 4.747368421052632e-06, "step": 1449 }, { "epoch": 3.8106508875739644, "high_lr": 0.0002373684210526316, "low_lr": 4.747368421052632e-06, "step": 1449 }, { "epoch": 3.8106508875739644, "high_lr": 0.0002373684210526316, "low_lr": 4.747368421052632e-06, "step": 1449 }, { "epoch": 3.8106508875739644, "high_lr": 0.0002373684210526316, "low_lr": 4.747368421052632e-06, "step": 1449 }, { "epoch": 3.8106508875739644, "high_lr": 0.0002373684210526316, "low_lr": 4.747368421052632e-06, "step": 1449 }, { "epoch": 3.8132807363576595, "grad_norm": 1.4922595024108887, "learning_rate": 0.00023684210526315788, "loss": 1.2936, "step": 1450 }, { "epoch": 3.8132807363576595, "high_lr": 0.00023684210526315788, "low_lr": 4.736842105263158e-06, "step": 1450 }, { "epoch": 3.8132807363576595, "high_lr": 0.00023684210526315788, "low_lr": 4.736842105263158e-06, "step": 1450 }, { "epoch": 3.8132807363576595, "high_lr": 0.00023684210526315788, "low_lr": 4.736842105263158e-06, "step": 1450 }, { "epoch": 3.8132807363576595, "high_lr": 0.00023684210526315788, "low_lr": 4.736842105263158e-06, "step": 1450 }, { "epoch": 3.8132807363576595, "high_lr": 0.00023684210526315788, "low_lr": 4.736842105263158e-06, "step": 1450 }, { "epoch": 3.8132807363576595, "high_lr": 0.00023684210526315788, "low_lr": 4.736842105263158e-06, "step": 1450 }, { "epoch": 3.8132807363576595, "high_lr": 0.00023684210526315788, "low_lr": 4.736842105263158e-06, "step": 1450 }, { "epoch": 3.8132807363576595, "high_lr": 0.00023684210526315788, "low_lr": 4.736842105263158e-06, "step": 1450 }, { "epoch": 3.8159105851413546, "grad_norm": 1.4833645820617676, "learning_rate": 0.0002363157894736842, "loss": 1.2909, "step": 1451 }, { "epoch": 3.8159105851413546, "high_lr": 0.0002363157894736842, "low_lr": 4.726315789473684e-06, "step": 1451 }, { "epoch": 3.8159105851413546, "high_lr": 0.0002363157894736842, "low_lr": 4.726315789473684e-06, "step": 1451 }, { "epoch": 3.8159105851413546, "high_lr": 0.0002363157894736842, "low_lr": 4.726315789473684e-06, "step": 1451 }, { "epoch": 3.8159105851413546, "high_lr": 0.0002363157894736842, "low_lr": 4.726315789473684e-06, "step": 1451 }, { "epoch": 3.8159105851413546, "high_lr": 0.0002363157894736842, "low_lr": 4.726315789473684e-06, "step": 1451 }, { "epoch": 3.8159105851413546, "high_lr": 0.0002363157894736842, "low_lr": 4.726315789473684e-06, "step": 1451 }, { "epoch": 3.8159105851413546, "high_lr": 0.0002363157894736842, "low_lr": 4.726315789473684e-06, "step": 1451 }, { "epoch": 3.8159105851413546, "high_lr": 0.0002363157894736842, "low_lr": 4.726315789473684e-06, "step": 1451 }, { "epoch": 3.8185404339250493, "grad_norm": 1.4897242784500122, "learning_rate": 0.00023578947368421054, "loss": 1.2462, "step": 1452 }, { "epoch": 3.8185404339250493, "high_lr": 0.00023578947368421054, "low_lr": 4.71578947368421e-06, "step": 1452 }, { "epoch": 3.8185404339250493, "high_lr": 0.00023578947368421054, "low_lr": 4.71578947368421e-06, "step": 1452 }, { "epoch": 3.8185404339250493, "high_lr": 0.00023578947368421054, "low_lr": 4.71578947368421e-06, "step": 1452 }, { "epoch": 3.8185404339250493, "high_lr": 0.00023578947368421054, "low_lr": 4.71578947368421e-06, "step": 1452 }, { "epoch": 3.8185404339250493, "high_lr": 0.00023578947368421054, "low_lr": 4.71578947368421e-06, "step": 1452 }, { "epoch": 3.8185404339250493, "high_lr": 0.00023578947368421054, "low_lr": 4.71578947368421e-06, "step": 1452 }, { "epoch": 3.8185404339250493, "high_lr": 0.00023578947368421054, "low_lr": 4.71578947368421e-06, "step": 1452 }, { "epoch": 3.8185404339250493, "high_lr": 0.00023578947368421054, "low_lr": 4.71578947368421e-06, "step": 1452 }, { "epoch": 3.8211702827087444, "grad_norm": 1.5979965925216675, "learning_rate": 0.00023526315789473685, "loss": 1.3424, "step": 1453 }, { "epoch": 3.8211702827087444, "high_lr": 0.00023526315789473685, "low_lr": 4.705263157894738e-06, "step": 1453 }, { "epoch": 3.8211702827087444, "high_lr": 0.00023526315789473685, "low_lr": 4.705263157894738e-06, "step": 1453 }, { "epoch": 3.8211702827087444, "high_lr": 0.00023526315789473685, "low_lr": 4.705263157894738e-06, "step": 1453 }, { "epoch": 3.8211702827087444, "high_lr": 0.00023526315789473685, "low_lr": 4.705263157894738e-06, "step": 1453 }, { "epoch": 3.8211702827087444, "high_lr": 0.00023526315789473685, "low_lr": 4.705263157894738e-06, "step": 1453 }, { "epoch": 3.8211702827087444, "high_lr": 0.00023526315789473685, "low_lr": 4.705263157894738e-06, "step": 1453 }, { "epoch": 3.8211702827087444, "high_lr": 0.00023526315789473685, "low_lr": 4.705263157894738e-06, "step": 1453 }, { "epoch": 3.8211702827087444, "high_lr": 0.00023526315789473685, "low_lr": 4.705263157894738e-06, "step": 1453 }, { "epoch": 3.823800131492439, "grad_norm": 1.2779732942581177, "learning_rate": 0.00023473684210526316, "loss": 1.3107, "step": 1454 }, { "epoch": 3.823800131492439, "high_lr": 0.00023473684210526316, "low_lr": 4.694736842105264e-06, "step": 1454 }, { "epoch": 3.823800131492439, "high_lr": 0.00023473684210526316, "low_lr": 4.694736842105264e-06, "step": 1454 }, { "epoch": 3.823800131492439, "high_lr": 0.00023473684210526316, "low_lr": 4.694736842105264e-06, "step": 1454 }, { "epoch": 3.823800131492439, "high_lr": 0.00023473684210526316, "low_lr": 4.694736842105264e-06, "step": 1454 }, { "epoch": 3.823800131492439, "high_lr": 0.00023473684210526316, "low_lr": 4.694736842105264e-06, "step": 1454 }, { "epoch": 3.823800131492439, "high_lr": 0.00023473684210526316, "low_lr": 4.694736842105264e-06, "step": 1454 }, { "epoch": 3.823800131492439, "high_lr": 0.00023473684210526316, "low_lr": 4.694736842105264e-06, "step": 1454 }, { "epoch": 3.823800131492439, "high_lr": 0.00023473684210526316, "low_lr": 4.694736842105264e-06, "step": 1454 }, { "epoch": 3.826429980276134, "grad_norm": 1.4164676666259766, "learning_rate": 0.00023421052631578948, "loss": 1.3561, "step": 1455 }, { "epoch": 3.826429980276134, "high_lr": 0.00023421052631578948, "low_lr": 4.68421052631579e-06, "step": 1455 }, { "epoch": 3.826429980276134, "high_lr": 0.00023421052631578948, "low_lr": 4.68421052631579e-06, "step": 1455 }, { "epoch": 3.826429980276134, "high_lr": 0.00023421052631578948, "low_lr": 4.68421052631579e-06, "step": 1455 }, { "epoch": 3.826429980276134, "high_lr": 0.00023421052631578948, "low_lr": 4.68421052631579e-06, "step": 1455 }, { "epoch": 3.826429980276134, "high_lr": 0.00023421052631578948, "low_lr": 4.68421052631579e-06, "step": 1455 }, { "epoch": 3.826429980276134, "high_lr": 0.00023421052631578948, "low_lr": 4.68421052631579e-06, "step": 1455 }, { "epoch": 3.826429980276134, "high_lr": 0.00023421052631578948, "low_lr": 4.68421052631579e-06, "step": 1455 }, { "epoch": 3.826429980276134, "high_lr": 0.00023421052631578948, "low_lr": 4.68421052631579e-06, "step": 1455 }, { "epoch": 3.8290598290598292, "grad_norm": 1.6803197860717773, "learning_rate": 0.00023368421052631582, "loss": 1.3378, "step": 1456 }, { "epoch": 3.8290598290598292, "high_lr": 0.00023368421052631582, "low_lr": 4.6736842105263166e-06, "step": 1456 }, { "epoch": 3.8290598290598292, "high_lr": 0.00023368421052631582, "low_lr": 4.6736842105263166e-06, "step": 1456 }, { "epoch": 3.8290598290598292, "high_lr": 0.00023368421052631582, "low_lr": 4.6736842105263166e-06, "step": 1456 }, { "epoch": 3.8290598290598292, "high_lr": 0.00023368421052631582, "low_lr": 4.6736842105263166e-06, "step": 1456 }, { "epoch": 3.8290598290598292, "high_lr": 0.00023368421052631582, "low_lr": 4.6736842105263166e-06, "step": 1456 }, { "epoch": 3.8290598290598292, "high_lr": 0.00023368421052631582, "low_lr": 4.6736842105263166e-06, "step": 1456 }, { "epoch": 3.8290598290598292, "high_lr": 0.00023368421052631582, "low_lr": 4.6736842105263166e-06, "step": 1456 }, { "epoch": 3.8290598290598292, "high_lr": 0.00023368421052631582, "low_lr": 4.6736842105263166e-06, "step": 1456 }, { "epoch": 3.831689677843524, "grad_norm": 1.4625133275985718, "learning_rate": 0.0002331578947368421, "loss": 1.3359, "step": 1457 }, { "epoch": 3.831689677843524, "high_lr": 0.0002331578947368421, "low_lr": 4.663157894736842e-06, "step": 1457 }, { "epoch": 3.831689677843524, "high_lr": 0.0002331578947368421, "low_lr": 4.663157894736842e-06, "step": 1457 }, { "epoch": 3.831689677843524, "high_lr": 0.0002331578947368421, "low_lr": 4.663157894736842e-06, "step": 1457 }, { "epoch": 3.831689677843524, "high_lr": 0.0002331578947368421, "low_lr": 4.663157894736842e-06, "step": 1457 }, { "epoch": 3.831689677843524, "high_lr": 0.0002331578947368421, "low_lr": 4.663157894736842e-06, "step": 1457 }, { "epoch": 3.831689677843524, "high_lr": 0.0002331578947368421, "low_lr": 4.663157894736842e-06, "step": 1457 }, { "epoch": 3.831689677843524, "high_lr": 0.0002331578947368421, "low_lr": 4.663157894736842e-06, "step": 1457 }, { "epoch": 3.831689677843524, "high_lr": 0.0002331578947368421, "low_lr": 4.663157894736842e-06, "step": 1457 }, { "epoch": 3.834319526627219, "grad_norm": 1.5109957456588745, "learning_rate": 0.00023263157894736841, "loss": 1.2938, "step": 1458 }, { "epoch": 3.834319526627219, "high_lr": 0.00023263157894736841, "low_lr": 4.652631578947368e-06, "step": 1458 }, { "epoch": 3.834319526627219, "high_lr": 0.00023263157894736841, "low_lr": 4.652631578947368e-06, "step": 1458 }, { "epoch": 3.834319526627219, "high_lr": 0.00023263157894736841, "low_lr": 4.652631578947368e-06, "step": 1458 }, { "epoch": 3.834319526627219, "high_lr": 0.00023263157894736841, "low_lr": 4.652631578947368e-06, "step": 1458 }, { "epoch": 3.834319526627219, "high_lr": 0.00023263157894736841, "low_lr": 4.652631578947368e-06, "step": 1458 }, { "epoch": 3.834319526627219, "high_lr": 0.00023263157894736841, "low_lr": 4.652631578947368e-06, "step": 1458 }, { "epoch": 3.834319526627219, "high_lr": 0.00023263157894736841, "low_lr": 4.652631578947368e-06, "step": 1458 }, { "epoch": 3.834319526627219, "high_lr": 0.00023263157894736841, "low_lr": 4.652631578947368e-06, "step": 1458 }, { "epoch": 3.8369493754109136, "grad_norm": 1.3899002075195312, "learning_rate": 0.00023210526315789473, "loss": 1.3382, "step": 1459 }, { "epoch": 3.8369493754109136, "high_lr": 0.00023210526315789473, "low_lr": 4.642105263157895e-06, "step": 1459 }, { "epoch": 3.8369493754109136, "high_lr": 0.00023210526315789473, "low_lr": 4.642105263157895e-06, "step": 1459 }, { "epoch": 3.8369493754109136, "high_lr": 0.00023210526315789473, "low_lr": 4.642105263157895e-06, "step": 1459 }, { "epoch": 3.8369493754109136, "high_lr": 0.00023210526315789473, "low_lr": 4.642105263157895e-06, "step": 1459 }, { "epoch": 3.8369493754109136, "high_lr": 0.00023210526315789473, "low_lr": 4.642105263157895e-06, "step": 1459 }, { "epoch": 3.8369493754109136, "high_lr": 0.00023210526315789473, "low_lr": 4.642105263157895e-06, "step": 1459 }, { "epoch": 3.8369493754109136, "high_lr": 0.00023210526315789473, "low_lr": 4.642105263157895e-06, "step": 1459 }, { "epoch": 3.8369493754109136, "high_lr": 0.00023210526315789473, "low_lr": 4.642105263157895e-06, "step": 1459 }, { "epoch": 3.8395792241946087, "grad_norm": 1.5576353073120117, "learning_rate": 0.00023157894736842107, "loss": 1.3188, "step": 1460 }, { "epoch": 3.8395792241946087, "high_lr": 0.00023157894736842107, "low_lr": 4.631578947368421e-06, "step": 1460 }, { "epoch": 3.8395792241946087, "high_lr": 0.00023157894736842107, "low_lr": 4.631578947368421e-06, "step": 1460 }, { "epoch": 3.8395792241946087, "high_lr": 0.00023157894736842107, "low_lr": 4.631578947368421e-06, "step": 1460 }, { "epoch": 3.8395792241946087, "high_lr": 0.00023157894736842107, "low_lr": 4.631578947368421e-06, "step": 1460 }, { "epoch": 3.8395792241946087, "high_lr": 0.00023157894736842107, "low_lr": 4.631578947368421e-06, "step": 1460 }, { "epoch": 3.8395792241946087, "high_lr": 0.00023157894736842107, "low_lr": 4.631578947368421e-06, "step": 1460 }, { "epoch": 3.8395792241946087, "high_lr": 0.00023157894736842107, "low_lr": 4.631578947368421e-06, "step": 1460 }, { "epoch": 3.8395792241946087, "high_lr": 0.00023157894736842107, "low_lr": 4.631578947368421e-06, "step": 1460 }, { "epoch": 3.842209072978304, "grad_norm": 1.354019045829773, "learning_rate": 0.00023105263157894738, "loss": 1.311, "step": 1461 }, { "epoch": 3.842209072978304, "high_lr": 0.00023105263157894738, "low_lr": 4.621052631578948e-06, "step": 1461 }, { "epoch": 3.842209072978304, "high_lr": 0.00023105263157894738, "low_lr": 4.621052631578948e-06, "step": 1461 }, { "epoch": 3.842209072978304, "high_lr": 0.00023105263157894738, "low_lr": 4.621052631578948e-06, "step": 1461 }, { "epoch": 3.842209072978304, "high_lr": 0.00023105263157894738, "low_lr": 4.621052631578948e-06, "step": 1461 }, { "epoch": 3.842209072978304, "high_lr": 0.00023105263157894738, "low_lr": 4.621052631578948e-06, "step": 1461 }, { "epoch": 3.842209072978304, "high_lr": 0.00023105263157894738, "low_lr": 4.621052631578948e-06, "step": 1461 }, { "epoch": 3.842209072978304, "high_lr": 0.00023105263157894738, "low_lr": 4.621052631578948e-06, "step": 1461 }, { "epoch": 3.842209072978304, "high_lr": 0.00023105263157894738, "low_lr": 4.621052631578948e-06, "step": 1461 }, { "epoch": 3.8448389217619985, "grad_norm": 1.4877536296844482, "learning_rate": 0.0002305263157894737, "loss": 1.2894, "step": 1462 }, { "epoch": 3.8448389217619985, "high_lr": 0.0002305263157894737, "low_lr": 4.6105263157894745e-06, "step": 1462 }, { "epoch": 3.8448389217619985, "high_lr": 0.0002305263157894737, "low_lr": 4.6105263157894745e-06, "step": 1462 }, { "epoch": 3.8448389217619985, "high_lr": 0.0002305263157894737, "low_lr": 4.6105263157894745e-06, "step": 1462 }, { "epoch": 3.8448389217619985, "high_lr": 0.0002305263157894737, "low_lr": 4.6105263157894745e-06, "step": 1462 }, { "epoch": 3.8448389217619985, "high_lr": 0.0002305263157894737, "low_lr": 4.6105263157894745e-06, "step": 1462 }, { "epoch": 3.8448389217619985, "high_lr": 0.0002305263157894737, "low_lr": 4.6105263157894745e-06, "step": 1462 }, { "epoch": 3.8448389217619985, "high_lr": 0.0002305263157894737, "low_lr": 4.6105263157894745e-06, "step": 1462 }, { "epoch": 3.8448389217619985, "high_lr": 0.0002305263157894737, "low_lr": 4.6105263157894745e-06, "step": 1462 }, { "epoch": 3.8474687705456936, "grad_norm": 1.513124942779541, "learning_rate": 0.00023, "loss": 1.3518, "step": 1463 }, { "epoch": 3.8474687705456936, "high_lr": 0.00023, "low_lr": 4.600000000000001e-06, "step": 1463 }, { "epoch": 3.8474687705456936, "high_lr": 0.00023, "low_lr": 4.600000000000001e-06, "step": 1463 }, { "epoch": 3.8474687705456936, "high_lr": 0.00023, "low_lr": 4.600000000000001e-06, "step": 1463 }, { "epoch": 3.8474687705456936, "high_lr": 0.00023, "low_lr": 4.600000000000001e-06, "step": 1463 }, { "epoch": 3.8474687705456936, "high_lr": 0.00023, "low_lr": 4.600000000000001e-06, "step": 1463 }, { "epoch": 3.8474687705456936, "high_lr": 0.00023, "low_lr": 4.600000000000001e-06, "step": 1463 }, { "epoch": 3.8474687705456936, "high_lr": 0.00023, "low_lr": 4.600000000000001e-06, "step": 1463 }, { "epoch": 3.8474687705456936, "high_lr": 0.00023, "low_lr": 4.600000000000001e-06, "step": 1463 }, { "epoch": 3.8500986193293887, "grad_norm": 1.502281665802002, "learning_rate": 0.00022947368421052632, "loss": 1.2577, "step": 1464 }, { "epoch": 3.8500986193293887, "high_lr": 0.00022947368421052632, "low_lr": 4.589473684210526e-06, "step": 1464 }, { "epoch": 3.8500986193293887, "high_lr": 0.00022947368421052632, "low_lr": 4.589473684210526e-06, "step": 1464 }, { "epoch": 3.8500986193293887, "high_lr": 0.00022947368421052632, "low_lr": 4.589473684210526e-06, "step": 1464 }, { "epoch": 3.8500986193293887, "high_lr": 0.00022947368421052632, "low_lr": 4.589473684210526e-06, "step": 1464 }, { "epoch": 3.8500986193293887, "high_lr": 0.00022947368421052632, "low_lr": 4.589473684210526e-06, "step": 1464 }, { "epoch": 3.8500986193293887, "high_lr": 0.00022947368421052632, "low_lr": 4.589473684210526e-06, "step": 1464 }, { "epoch": 3.8500986193293887, "high_lr": 0.00022947368421052632, "low_lr": 4.589473684210526e-06, "step": 1464 }, { "epoch": 3.8500986193293887, "high_lr": 0.00022947368421052632, "low_lr": 4.589473684210526e-06, "step": 1464 }, { "epoch": 3.8527284681130833, "grad_norm": 1.6253715753555298, "learning_rate": 0.00022894736842105263, "loss": 1.2833, "step": 1465 }, { "epoch": 3.8527284681130833, "high_lr": 0.00022894736842105263, "low_lr": 4.578947368421053e-06, "step": 1465 }, { "epoch": 3.8527284681130833, "high_lr": 0.00022894736842105263, "low_lr": 4.578947368421053e-06, "step": 1465 }, { "epoch": 3.8527284681130833, "high_lr": 0.00022894736842105263, "low_lr": 4.578947368421053e-06, "step": 1465 }, { "epoch": 3.8527284681130833, "high_lr": 0.00022894736842105263, "low_lr": 4.578947368421053e-06, "step": 1465 }, { "epoch": 3.8527284681130833, "high_lr": 0.00022894736842105263, "low_lr": 4.578947368421053e-06, "step": 1465 }, { "epoch": 3.8527284681130833, "high_lr": 0.00022894736842105263, "low_lr": 4.578947368421053e-06, "step": 1465 }, { "epoch": 3.8527284681130833, "high_lr": 0.00022894736842105263, "low_lr": 4.578947368421053e-06, "step": 1465 }, { "epoch": 3.8527284681130833, "high_lr": 0.00022894736842105263, "low_lr": 4.578947368421053e-06, "step": 1465 }, { "epoch": 3.8553583168967784, "grad_norm": 1.4256298542022705, "learning_rate": 0.00022842105263157895, "loss": 1.3053, "step": 1466 }, { "epoch": 3.8553583168967784, "high_lr": 0.00022842105263157895, "low_lr": 4.568421052631579e-06, "step": 1466 }, { "epoch": 3.8553583168967784, "high_lr": 0.00022842105263157895, "low_lr": 4.568421052631579e-06, "step": 1466 }, { "epoch": 3.8553583168967784, "high_lr": 0.00022842105263157895, "low_lr": 4.568421052631579e-06, "step": 1466 }, { "epoch": 3.8553583168967784, "high_lr": 0.00022842105263157895, "low_lr": 4.568421052631579e-06, "step": 1466 }, { "epoch": 3.8553583168967784, "high_lr": 0.00022842105263157895, "low_lr": 4.568421052631579e-06, "step": 1466 }, { "epoch": 3.8553583168967784, "high_lr": 0.00022842105263157895, "low_lr": 4.568421052631579e-06, "step": 1466 }, { "epoch": 3.8553583168967784, "high_lr": 0.00022842105263157895, "low_lr": 4.568421052631579e-06, "step": 1466 }, { "epoch": 3.8553583168967784, "high_lr": 0.00022842105263157895, "low_lr": 4.568421052631579e-06, "step": 1466 }, { "epoch": 3.8579881656804735, "grad_norm": 1.4660565853118896, "learning_rate": 0.00022789473684210526, "loss": 1.2828, "step": 1467 }, { "epoch": 3.8579881656804735, "high_lr": 0.00022789473684210526, "low_lr": 4.557894736842105e-06, "step": 1467 }, { "epoch": 3.8579881656804735, "high_lr": 0.00022789473684210526, "low_lr": 4.557894736842105e-06, "step": 1467 }, { "epoch": 3.8579881656804735, "high_lr": 0.00022789473684210526, "low_lr": 4.557894736842105e-06, "step": 1467 }, { "epoch": 3.8579881656804735, "high_lr": 0.00022789473684210526, "low_lr": 4.557894736842105e-06, "step": 1467 }, { "epoch": 3.8579881656804735, "high_lr": 0.00022789473684210526, "low_lr": 4.557894736842105e-06, "step": 1467 }, { "epoch": 3.8579881656804735, "high_lr": 0.00022789473684210526, "low_lr": 4.557894736842105e-06, "step": 1467 }, { "epoch": 3.8579881656804735, "high_lr": 0.00022789473684210526, "low_lr": 4.557894736842105e-06, "step": 1467 }, { "epoch": 3.8579881656804735, "high_lr": 0.00022789473684210526, "low_lr": 4.557894736842105e-06, "step": 1467 }, { "epoch": 3.860618014464168, "grad_norm": 1.4297759532928467, "learning_rate": 0.0002273684210526316, "loss": 1.3214, "step": 1468 }, { "epoch": 3.860618014464168, "high_lr": 0.0002273684210526316, "low_lr": 4.547368421052632e-06, "step": 1468 }, { "epoch": 3.860618014464168, "high_lr": 0.0002273684210526316, "low_lr": 4.547368421052632e-06, "step": 1468 }, { "epoch": 3.860618014464168, "high_lr": 0.0002273684210526316, "low_lr": 4.547368421052632e-06, "step": 1468 }, { "epoch": 3.860618014464168, "high_lr": 0.0002273684210526316, "low_lr": 4.547368421052632e-06, "step": 1468 }, { "epoch": 3.860618014464168, "high_lr": 0.0002273684210526316, "low_lr": 4.547368421052632e-06, "step": 1468 }, { "epoch": 3.860618014464168, "high_lr": 0.0002273684210526316, "low_lr": 4.547368421052632e-06, "step": 1468 }, { "epoch": 3.860618014464168, "high_lr": 0.0002273684210526316, "low_lr": 4.547368421052632e-06, "step": 1468 }, { "epoch": 3.860618014464168, "high_lr": 0.0002273684210526316, "low_lr": 4.547368421052632e-06, "step": 1468 }, { "epoch": 3.8632478632478633, "grad_norm": 1.435152292251587, "learning_rate": 0.0002268421052631579, "loss": 1.2979, "step": 1469 }, { "epoch": 3.8632478632478633, "high_lr": 0.0002268421052631579, "low_lr": 4.536842105263158e-06, "step": 1469 }, { "epoch": 3.8632478632478633, "high_lr": 0.0002268421052631579, "low_lr": 4.536842105263158e-06, "step": 1469 }, { "epoch": 3.8632478632478633, "high_lr": 0.0002268421052631579, "low_lr": 4.536842105263158e-06, "step": 1469 }, { "epoch": 3.8632478632478633, "high_lr": 0.0002268421052631579, "low_lr": 4.536842105263158e-06, "step": 1469 }, { "epoch": 3.8632478632478633, "high_lr": 0.0002268421052631579, "low_lr": 4.536842105263158e-06, "step": 1469 }, { "epoch": 3.8632478632478633, "high_lr": 0.0002268421052631579, "low_lr": 4.536842105263158e-06, "step": 1469 }, { "epoch": 3.8632478632478633, "high_lr": 0.0002268421052631579, "low_lr": 4.536842105263158e-06, "step": 1469 }, { "epoch": 3.8632478632478633, "high_lr": 0.0002268421052631579, "low_lr": 4.536842105263158e-06, "step": 1469 }, { "epoch": 3.8658777120315584, "grad_norm": 1.4923878908157349, "learning_rate": 0.00022631578947368422, "loss": 1.3003, "step": 1470 }, { "epoch": 3.8658777120315584, "high_lr": 0.00022631578947368422, "low_lr": 4.526315789473685e-06, "step": 1470 }, { "epoch": 3.8658777120315584, "high_lr": 0.00022631578947368422, "low_lr": 4.526315789473685e-06, "step": 1470 }, { "epoch": 3.8658777120315584, "high_lr": 0.00022631578947368422, "low_lr": 4.526315789473685e-06, "step": 1470 }, { "epoch": 3.8658777120315584, "high_lr": 0.00022631578947368422, "low_lr": 4.526315789473685e-06, "step": 1470 }, { "epoch": 3.8658777120315584, "high_lr": 0.00022631578947368422, "low_lr": 4.526315789473685e-06, "step": 1470 }, { "epoch": 3.8658777120315584, "high_lr": 0.00022631578947368422, "low_lr": 4.526315789473685e-06, "step": 1470 }, { "epoch": 3.8658777120315584, "high_lr": 0.00022631578947368422, "low_lr": 4.526315789473685e-06, "step": 1470 }, { "epoch": 3.8658777120315584, "high_lr": 0.00022631578947368422, "low_lr": 4.526315789473685e-06, "step": 1470 }, { "epoch": 3.868507560815253, "grad_norm": 1.4546798467636108, "learning_rate": 0.00022578947368421054, "loss": 1.3611, "step": 1471 }, { "epoch": 3.868507560815253, "high_lr": 0.00022578947368421054, "low_lr": 4.5157894736842115e-06, "step": 1471 }, { "epoch": 3.868507560815253, "high_lr": 0.00022578947368421054, "low_lr": 4.5157894736842115e-06, "step": 1471 }, { "epoch": 3.868507560815253, "high_lr": 0.00022578947368421054, "low_lr": 4.5157894736842115e-06, "step": 1471 }, { "epoch": 3.868507560815253, "high_lr": 0.00022578947368421054, "low_lr": 4.5157894736842115e-06, "step": 1471 }, { "epoch": 3.868507560815253, "high_lr": 0.00022578947368421054, "low_lr": 4.5157894736842115e-06, "step": 1471 }, { "epoch": 3.868507560815253, "high_lr": 0.00022578947368421054, "low_lr": 4.5157894736842115e-06, "step": 1471 }, { "epoch": 3.868507560815253, "high_lr": 0.00022578947368421054, "low_lr": 4.5157894736842115e-06, "step": 1471 }, { "epoch": 3.868507560815253, "high_lr": 0.00022578947368421054, "low_lr": 4.5157894736842115e-06, "step": 1471 }, { "epoch": 3.871137409598948, "grad_norm": 1.4836859703063965, "learning_rate": 0.00022526315789473682, "loss": 1.311, "step": 1472 }, { "epoch": 3.871137409598948, "high_lr": 0.00022526315789473682, "low_lr": 4.505263157894737e-06, "step": 1472 }, { "epoch": 3.871137409598948, "high_lr": 0.00022526315789473682, "low_lr": 4.505263157894737e-06, "step": 1472 }, { "epoch": 3.871137409598948, "high_lr": 0.00022526315789473682, "low_lr": 4.505263157894737e-06, "step": 1472 }, { "epoch": 3.871137409598948, "high_lr": 0.00022526315789473682, "low_lr": 4.505263157894737e-06, "step": 1472 }, { "epoch": 3.871137409598948, "high_lr": 0.00022526315789473682, "low_lr": 4.505263157894737e-06, "step": 1472 }, { "epoch": 3.871137409598948, "high_lr": 0.00022526315789473682, "low_lr": 4.505263157894737e-06, "step": 1472 }, { "epoch": 3.871137409598948, "high_lr": 0.00022526315789473682, "low_lr": 4.505263157894737e-06, "step": 1472 }, { "epoch": 3.871137409598948, "high_lr": 0.00022526315789473682, "low_lr": 4.505263157894737e-06, "step": 1472 }, { "epoch": 3.8737672583826432, "grad_norm": 1.4043117761611938, "learning_rate": 0.00022473684210526316, "loss": 1.2994, "step": 1473 }, { "epoch": 3.8737672583826432, "high_lr": 0.00022473684210526316, "low_lr": 4.494736842105263e-06, "step": 1473 }, { "epoch": 3.8737672583826432, "high_lr": 0.00022473684210526316, "low_lr": 4.494736842105263e-06, "step": 1473 }, { "epoch": 3.8737672583826432, "high_lr": 0.00022473684210526316, "low_lr": 4.494736842105263e-06, "step": 1473 }, { "epoch": 3.8737672583826432, "high_lr": 0.00022473684210526316, "low_lr": 4.494736842105263e-06, "step": 1473 }, { "epoch": 3.8737672583826432, "high_lr": 0.00022473684210526316, "low_lr": 4.494736842105263e-06, "step": 1473 }, { "epoch": 3.8737672583826432, "high_lr": 0.00022473684210526316, "low_lr": 4.494736842105263e-06, "step": 1473 }, { "epoch": 3.8737672583826432, "high_lr": 0.00022473684210526316, "low_lr": 4.494736842105263e-06, "step": 1473 }, { "epoch": 3.8737672583826432, "high_lr": 0.00022473684210526316, "low_lr": 4.494736842105263e-06, "step": 1473 }, { "epoch": 3.876397107166338, "grad_norm": 1.354508876800537, "learning_rate": 0.00022421052631578948, "loss": 1.3001, "step": 1474 }, { "epoch": 3.876397107166338, "high_lr": 0.00022421052631578948, "low_lr": 4.48421052631579e-06, "step": 1474 }, { "epoch": 3.876397107166338, "high_lr": 0.00022421052631578948, "low_lr": 4.48421052631579e-06, "step": 1474 }, { "epoch": 3.876397107166338, "high_lr": 0.00022421052631578948, "low_lr": 4.48421052631579e-06, "step": 1474 }, { "epoch": 3.876397107166338, "high_lr": 0.00022421052631578948, "low_lr": 4.48421052631579e-06, "step": 1474 }, { "epoch": 3.876397107166338, "high_lr": 0.00022421052631578948, "low_lr": 4.48421052631579e-06, "step": 1474 }, { "epoch": 3.876397107166338, "high_lr": 0.00022421052631578948, "low_lr": 4.48421052631579e-06, "step": 1474 }, { "epoch": 3.876397107166338, "high_lr": 0.00022421052631578948, "low_lr": 4.48421052631579e-06, "step": 1474 }, { "epoch": 3.876397107166338, "high_lr": 0.00022421052631578948, "low_lr": 4.48421052631579e-06, "step": 1474 }, { "epoch": 3.879026955950033, "grad_norm": 1.5343304872512817, "learning_rate": 0.0002236842105263158, "loss": 1.3185, "step": 1475 }, { "epoch": 3.879026955950033, "high_lr": 0.0002236842105263158, "low_lr": 4.473684210526316e-06, "step": 1475 }, { "epoch": 3.879026955950033, "high_lr": 0.0002236842105263158, "low_lr": 4.473684210526316e-06, "step": 1475 }, { "epoch": 3.879026955950033, "high_lr": 0.0002236842105263158, "low_lr": 4.473684210526316e-06, "step": 1475 }, { "epoch": 3.879026955950033, "high_lr": 0.0002236842105263158, "low_lr": 4.473684210526316e-06, "step": 1475 }, { "epoch": 3.879026955950033, "high_lr": 0.0002236842105263158, "low_lr": 4.473684210526316e-06, "step": 1475 }, { "epoch": 3.879026955950033, "high_lr": 0.0002236842105263158, "low_lr": 4.473684210526316e-06, "step": 1475 }, { "epoch": 3.879026955950033, "high_lr": 0.0002236842105263158, "low_lr": 4.473684210526316e-06, "step": 1475 }, { "epoch": 3.879026955950033, "high_lr": 0.0002236842105263158, "low_lr": 4.473684210526316e-06, "step": 1475 }, { "epoch": 3.8816568047337277, "grad_norm": 1.3767120838165283, "learning_rate": 0.0002231578947368421, "loss": 1.2882, "step": 1476 }, { "epoch": 3.8816568047337277, "high_lr": 0.0002231578947368421, "low_lr": 4.463157894736842e-06, "step": 1476 }, { "epoch": 3.8816568047337277, "high_lr": 0.0002231578947368421, "low_lr": 4.463157894736842e-06, "step": 1476 }, { "epoch": 3.8816568047337277, "high_lr": 0.0002231578947368421, "low_lr": 4.463157894736842e-06, "step": 1476 }, { "epoch": 3.8816568047337277, "high_lr": 0.0002231578947368421, "low_lr": 4.463157894736842e-06, "step": 1476 }, { "epoch": 3.8816568047337277, "high_lr": 0.0002231578947368421, "low_lr": 4.463157894736842e-06, "step": 1476 }, { "epoch": 3.8816568047337277, "high_lr": 0.0002231578947368421, "low_lr": 4.463157894736842e-06, "step": 1476 }, { "epoch": 3.8816568047337277, "high_lr": 0.0002231578947368421, "low_lr": 4.463157894736842e-06, "step": 1476 }, { "epoch": 3.8816568047337277, "high_lr": 0.0002231578947368421, "low_lr": 4.463157894736842e-06, "step": 1476 }, { "epoch": 3.8842866535174227, "grad_norm": 1.4227728843688965, "learning_rate": 0.00022263157894736844, "loss": 1.3117, "step": 1477 }, { "epoch": 3.8842866535174227, "high_lr": 0.00022263157894736844, "low_lr": 4.452631578947369e-06, "step": 1477 }, { "epoch": 3.8842866535174227, "high_lr": 0.00022263157894736844, "low_lr": 4.452631578947369e-06, "step": 1477 }, { "epoch": 3.8842866535174227, "high_lr": 0.00022263157894736844, "low_lr": 4.452631578947369e-06, "step": 1477 }, { "epoch": 3.8842866535174227, "high_lr": 0.00022263157894736844, "low_lr": 4.452631578947369e-06, "step": 1477 }, { "epoch": 3.8842866535174227, "high_lr": 0.00022263157894736844, "low_lr": 4.452631578947369e-06, "step": 1477 }, { "epoch": 3.8842866535174227, "high_lr": 0.00022263157894736844, "low_lr": 4.452631578947369e-06, "step": 1477 }, { "epoch": 3.8842866535174227, "high_lr": 0.00022263157894736844, "low_lr": 4.452631578947369e-06, "step": 1477 }, { "epoch": 3.8842866535174227, "high_lr": 0.00022263157894736844, "low_lr": 4.452631578947369e-06, "step": 1477 }, { "epoch": 3.886916502301118, "grad_norm": 1.4930652379989624, "learning_rate": 0.00022210526315789476, "loss": 1.3118, "step": 1478 }, { "epoch": 3.886916502301118, "high_lr": 0.00022210526315789476, "low_lr": 4.442105263157896e-06, "step": 1478 }, { "epoch": 3.886916502301118, "high_lr": 0.00022210526315789476, "low_lr": 4.442105263157896e-06, "step": 1478 }, { "epoch": 3.886916502301118, "high_lr": 0.00022210526315789476, "low_lr": 4.442105263157896e-06, "step": 1478 }, { "epoch": 3.886916502301118, "high_lr": 0.00022210526315789476, "low_lr": 4.442105263157896e-06, "step": 1478 }, { "epoch": 3.886916502301118, "high_lr": 0.00022210526315789476, "low_lr": 4.442105263157896e-06, "step": 1478 }, { "epoch": 3.886916502301118, "high_lr": 0.00022210526315789476, "low_lr": 4.442105263157896e-06, "step": 1478 }, { "epoch": 3.886916502301118, "high_lr": 0.00022210526315789476, "low_lr": 4.442105263157896e-06, "step": 1478 }, { "epoch": 3.886916502301118, "high_lr": 0.00022210526315789476, "low_lr": 4.442105263157896e-06, "step": 1478 }, { "epoch": 3.8895463510848125, "grad_norm": 1.4825832843780518, "learning_rate": 0.00022157894736842104, "loss": 1.336, "step": 1479 }, { "epoch": 3.8895463510848125, "high_lr": 0.00022157894736842104, "low_lr": 4.431578947368421e-06, "step": 1479 }, { "epoch": 3.8895463510848125, "high_lr": 0.00022157894736842104, "low_lr": 4.431578947368421e-06, "step": 1479 }, { "epoch": 3.8895463510848125, "high_lr": 0.00022157894736842104, "low_lr": 4.431578947368421e-06, "step": 1479 }, { "epoch": 3.8895463510848125, "high_lr": 0.00022157894736842104, "low_lr": 4.431578947368421e-06, "step": 1479 }, { "epoch": 3.8895463510848125, "high_lr": 0.00022157894736842104, "low_lr": 4.431578947368421e-06, "step": 1479 }, { "epoch": 3.8895463510848125, "high_lr": 0.00022157894736842104, "low_lr": 4.431578947368421e-06, "step": 1479 }, { "epoch": 3.8895463510848125, "high_lr": 0.00022157894736842104, "low_lr": 4.431578947368421e-06, "step": 1479 }, { "epoch": 3.8895463510848125, "high_lr": 0.00022157894736842104, "low_lr": 4.431578947368421e-06, "step": 1479 }, { "epoch": 3.8921761998685076, "grad_norm": 1.572178602218628, "learning_rate": 0.00022105263157894735, "loss": 1.3435, "step": 1480 }, { "epoch": 3.8921761998685076, "high_lr": 0.00022105263157894735, "low_lr": 4.4210526315789476e-06, "step": 1480 }, { "epoch": 3.8921761998685076, "high_lr": 0.00022105263157894735, "low_lr": 4.4210526315789476e-06, "step": 1480 }, { "epoch": 3.8921761998685076, "high_lr": 0.00022105263157894735, "low_lr": 4.4210526315789476e-06, "step": 1480 }, { "epoch": 3.8921761998685076, "high_lr": 0.00022105263157894735, "low_lr": 4.4210526315789476e-06, "step": 1480 }, { "epoch": 3.8921761998685076, "high_lr": 0.00022105263157894735, "low_lr": 4.4210526315789476e-06, "step": 1480 }, { "epoch": 3.8921761998685076, "high_lr": 0.00022105263157894735, "low_lr": 4.4210526315789476e-06, "step": 1480 }, { "epoch": 3.8921761998685076, "high_lr": 0.00022105263157894735, "low_lr": 4.4210526315789476e-06, "step": 1480 }, { "epoch": 3.8921761998685076, "high_lr": 0.00022105263157894735, "low_lr": 4.4210526315789476e-06, "step": 1480 }, { "epoch": 3.8948060486522023, "grad_norm": 1.5475068092346191, "learning_rate": 0.0002205263157894737, "loss": 1.2861, "step": 1481 }, { "epoch": 3.8948060486522023, "high_lr": 0.0002205263157894737, "low_lr": 4.410526315789474e-06, "step": 1481 }, { "epoch": 3.8948060486522023, "high_lr": 0.0002205263157894737, "low_lr": 4.410526315789474e-06, "step": 1481 }, { "epoch": 3.8948060486522023, "high_lr": 0.0002205263157894737, "low_lr": 4.410526315789474e-06, "step": 1481 }, { "epoch": 3.8948060486522023, "high_lr": 0.0002205263157894737, "low_lr": 4.410526315789474e-06, "step": 1481 }, { "epoch": 3.8948060486522023, "high_lr": 0.0002205263157894737, "low_lr": 4.410526315789474e-06, "step": 1481 }, { "epoch": 3.8948060486522023, "high_lr": 0.0002205263157894737, "low_lr": 4.410526315789474e-06, "step": 1481 }, { "epoch": 3.8948060486522023, "high_lr": 0.0002205263157894737, "low_lr": 4.410526315789474e-06, "step": 1481 }, { "epoch": 3.8948060486522023, "high_lr": 0.0002205263157894737, "low_lr": 4.410526315789474e-06, "step": 1481 }, { "epoch": 3.8974358974358974, "grad_norm": 1.3821595907211304, "learning_rate": 0.00022, "loss": 1.3257, "step": 1482 }, { "epoch": 3.8974358974358974, "high_lr": 0.00022, "low_lr": 4.4e-06, "step": 1482 }, { "epoch": 3.8974358974358974, "high_lr": 0.00022, "low_lr": 4.4e-06, "step": 1482 }, { "epoch": 3.8974358974358974, "high_lr": 0.00022, "low_lr": 4.4e-06, "step": 1482 }, { "epoch": 3.8974358974358974, "high_lr": 0.00022, "low_lr": 4.4e-06, "step": 1482 }, { "epoch": 3.8974358974358974, "high_lr": 0.00022, "low_lr": 4.4e-06, "step": 1482 }, { "epoch": 3.8974358974358974, "high_lr": 0.00022, "low_lr": 4.4e-06, "step": 1482 }, { "epoch": 3.8974358974358974, "high_lr": 0.00022, "low_lr": 4.4e-06, "step": 1482 }, { "epoch": 3.8974358974358974, "high_lr": 0.00022, "low_lr": 4.4e-06, "step": 1482 }, { "epoch": 3.9000657462195925, "grad_norm": 1.5113525390625, "learning_rate": 0.00021947368421052632, "loss": 1.3, "step": 1483 }, { "epoch": 3.9000657462195925, "high_lr": 0.00021947368421052632, "low_lr": 4.3894736842105266e-06, "step": 1483 }, { "epoch": 3.9000657462195925, "high_lr": 0.00021947368421052632, "low_lr": 4.3894736842105266e-06, "step": 1483 }, { "epoch": 3.9000657462195925, "high_lr": 0.00021947368421052632, "low_lr": 4.3894736842105266e-06, "step": 1483 }, { "epoch": 3.9000657462195925, "high_lr": 0.00021947368421052632, "low_lr": 4.3894736842105266e-06, "step": 1483 }, { "epoch": 3.9000657462195925, "high_lr": 0.00021947368421052632, "low_lr": 4.3894736842105266e-06, "step": 1483 }, { "epoch": 3.9000657462195925, "high_lr": 0.00021947368421052632, "low_lr": 4.3894736842105266e-06, "step": 1483 }, { "epoch": 3.9000657462195925, "high_lr": 0.00021947368421052632, "low_lr": 4.3894736842105266e-06, "step": 1483 }, { "epoch": 3.9000657462195925, "high_lr": 0.00021947368421052632, "low_lr": 4.3894736842105266e-06, "step": 1483 }, { "epoch": 3.902695595003287, "grad_norm": 1.4746077060699463, "learning_rate": 0.00021894736842105263, "loss": 1.3474, "step": 1484 }, { "epoch": 3.902695595003287, "high_lr": 0.00021894736842105263, "low_lr": 4.378947368421053e-06, "step": 1484 }, { "epoch": 3.902695595003287, "high_lr": 0.00021894736842105263, "low_lr": 4.378947368421053e-06, "step": 1484 }, { "epoch": 3.902695595003287, "high_lr": 0.00021894736842105263, "low_lr": 4.378947368421053e-06, "step": 1484 }, { "epoch": 3.902695595003287, "high_lr": 0.00021894736842105263, "low_lr": 4.378947368421053e-06, "step": 1484 }, { "epoch": 3.902695595003287, "high_lr": 0.00021894736842105263, "low_lr": 4.378947368421053e-06, "step": 1484 }, { "epoch": 3.902695595003287, "high_lr": 0.00021894736842105263, "low_lr": 4.378947368421053e-06, "step": 1484 }, { "epoch": 3.902695595003287, "high_lr": 0.00021894736842105263, "low_lr": 4.378947368421053e-06, "step": 1484 }, { "epoch": 3.902695595003287, "high_lr": 0.00021894736842105263, "low_lr": 4.378947368421053e-06, "step": 1484 }, { "epoch": 3.905325443786982, "grad_norm": 1.3812576532363892, "learning_rate": 0.00021842105263157897, "loss": 1.2668, "step": 1485 }, { "epoch": 3.905325443786982, "high_lr": 0.00021842105263157897, "low_lr": 4.368421052631579e-06, "step": 1485 }, { "epoch": 3.905325443786982, "high_lr": 0.00021842105263157897, "low_lr": 4.368421052631579e-06, "step": 1485 }, { "epoch": 3.905325443786982, "high_lr": 0.00021842105263157897, "low_lr": 4.368421052631579e-06, "step": 1485 }, { "epoch": 3.905325443786982, "high_lr": 0.00021842105263157897, "low_lr": 4.368421052631579e-06, "step": 1485 }, { "epoch": 3.905325443786982, "high_lr": 0.00021842105263157897, "low_lr": 4.368421052631579e-06, "step": 1485 }, { "epoch": 3.905325443786982, "high_lr": 0.00021842105263157897, "low_lr": 4.368421052631579e-06, "step": 1485 }, { "epoch": 3.905325443786982, "high_lr": 0.00021842105263157897, "low_lr": 4.368421052631579e-06, "step": 1485 }, { "epoch": 3.905325443786982, "high_lr": 0.00021842105263157897, "low_lr": 4.368421052631579e-06, "step": 1485 }, { "epoch": 3.9079552925706773, "grad_norm": 1.5691782236099243, "learning_rate": 0.00021789473684210526, "loss": 1.3194, "step": 1486 }, { "epoch": 3.9079552925706773, "high_lr": 0.00021789473684210526, "low_lr": 4.3578947368421055e-06, "step": 1486 }, { "epoch": 3.9079552925706773, "high_lr": 0.00021789473684210526, "low_lr": 4.3578947368421055e-06, "step": 1486 }, { "epoch": 3.9079552925706773, "high_lr": 0.00021789473684210526, "low_lr": 4.3578947368421055e-06, "step": 1486 }, { "epoch": 3.9079552925706773, "high_lr": 0.00021789473684210526, "low_lr": 4.3578947368421055e-06, "step": 1486 }, { "epoch": 3.9079552925706773, "high_lr": 0.00021789473684210526, "low_lr": 4.3578947368421055e-06, "step": 1486 }, { "epoch": 3.9079552925706773, "high_lr": 0.00021789473684210526, "low_lr": 4.3578947368421055e-06, "step": 1486 }, { "epoch": 3.9079552925706773, "high_lr": 0.00021789473684210526, "low_lr": 4.3578947368421055e-06, "step": 1486 }, { "epoch": 3.9079552925706773, "high_lr": 0.00021789473684210526, "low_lr": 4.3578947368421055e-06, "step": 1486 }, { "epoch": 3.910585141354372, "grad_norm": 1.395737886428833, "learning_rate": 0.00021736842105263157, "loss": 1.3049, "step": 1487 }, { "epoch": 3.910585141354372, "high_lr": 0.00021736842105263157, "low_lr": 4.347368421052632e-06, "step": 1487 }, { "epoch": 3.910585141354372, "high_lr": 0.00021736842105263157, "low_lr": 4.347368421052632e-06, "step": 1487 }, { "epoch": 3.910585141354372, "high_lr": 0.00021736842105263157, "low_lr": 4.347368421052632e-06, "step": 1487 }, { "epoch": 3.910585141354372, "high_lr": 0.00021736842105263157, "low_lr": 4.347368421052632e-06, "step": 1487 }, { "epoch": 3.910585141354372, "high_lr": 0.00021736842105263157, "low_lr": 4.347368421052632e-06, "step": 1487 }, { "epoch": 3.910585141354372, "high_lr": 0.00021736842105263157, "low_lr": 4.347368421052632e-06, "step": 1487 }, { "epoch": 3.910585141354372, "high_lr": 0.00021736842105263157, "low_lr": 4.347368421052632e-06, "step": 1487 }, { "epoch": 3.910585141354372, "high_lr": 0.00021736842105263157, "low_lr": 4.347368421052632e-06, "step": 1487 }, { "epoch": 3.913214990138067, "grad_norm": 1.4675577878952026, "learning_rate": 0.00021684210526315789, "loss": 1.3194, "step": 1488 }, { "epoch": 3.913214990138067, "high_lr": 0.00021684210526315789, "low_lr": 4.336842105263158e-06, "step": 1488 }, { "epoch": 3.913214990138067, "high_lr": 0.00021684210526315789, "low_lr": 4.336842105263158e-06, "step": 1488 }, { "epoch": 3.913214990138067, "high_lr": 0.00021684210526315789, "low_lr": 4.336842105263158e-06, "step": 1488 }, { "epoch": 3.913214990138067, "high_lr": 0.00021684210526315789, "low_lr": 4.336842105263158e-06, "step": 1488 }, { "epoch": 3.913214990138067, "high_lr": 0.00021684210526315789, "low_lr": 4.336842105263158e-06, "step": 1488 }, { "epoch": 3.913214990138067, "high_lr": 0.00021684210526315789, "low_lr": 4.336842105263158e-06, "step": 1488 }, { "epoch": 3.913214990138067, "high_lr": 0.00021684210526315789, "low_lr": 4.336842105263158e-06, "step": 1488 }, { "epoch": 3.913214990138067, "high_lr": 0.00021684210526315789, "low_lr": 4.336842105263158e-06, "step": 1488 }, { "epoch": 3.915844838921762, "grad_norm": 1.4874144792556763, "learning_rate": 0.00021631578947368423, "loss": 1.2999, "step": 1489 }, { "epoch": 3.915844838921762, "high_lr": 0.00021631578947368423, "low_lr": 4.3263157894736845e-06, "step": 1489 }, { "epoch": 3.915844838921762, "high_lr": 0.00021631578947368423, "low_lr": 4.3263157894736845e-06, "step": 1489 }, { "epoch": 3.915844838921762, "high_lr": 0.00021631578947368423, "low_lr": 4.3263157894736845e-06, "step": 1489 }, { "epoch": 3.915844838921762, "high_lr": 0.00021631578947368423, "low_lr": 4.3263157894736845e-06, "step": 1489 }, { "epoch": 3.915844838921762, "high_lr": 0.00021631578947368423, "low_lr": 4.3263157894736845e-06, "step": 1489 }, { "epoch": 3.915844838921762, "high_lr": 0.00021631578947368423, "low_lr": 4.3263157894736845e-06, "step": 1489 }, { "epoch": 3.915844838921762, "high_lr": 0.00021631578947368423, "low_lr": 4.3263157894736845e-06, "step": 1489 }, { "epoch": 3.915844838921762, "high_lr": 0.00021631578947368423, "low_lr": 4.3263157894736845e-06, "step": 1489 }, { "epoch": 3.918474687705457, "grad_norm": 1.4818798303604126, "learning_rate": 0.00021578947368421054, "loss": 1.3418, "step": 1490 }, { "epoch": 3.918474687705457, "high_lr": 0.00021578947368421054, "low_lr": 4.315789473684211e-06, "step": 1490 }, { "epoch": 3.918474687705457, "high_lr": 0.00021578947368421054, "low_lr": 4.315789473684211e-06, "step": 1490 }, { "epoch": 3.918474687705457, "high_lr": 0.00021578947368421054, "low_lr": 4.315789473684211e-06, "step": 1490 }, { "epoch": 3.918474687705457, "high_lr": 0.00021578947368421054, "low_lr": 4.315789473684211e-06, "step": 1490 }, { "epoch": 3.918474687705457, "high_lr": 0.00021578947368421054, "low_lr": 4.315789473684211e-06, "step": 1490 }, { "epoch": 3.918474687705457, "high_lr": 0.00021578947368421054, "low_lr": 4.315789473684211e-06, "step": 1490 }, { "epoch": 3.918474687705457, "high_lr": 0.00021578947368421054, "low_lr": 4.315789473684211e-06, "step": 1490 }, { "epoch": 3.918474687705457, "high_lr": 0.00021578947368421054, "low_lr": 4.315789473684211e-06, "step": 1490 }, { "epoch": 3.921104536489152, "grad_norm": 1.362735629081726, "learning_rate": 0.00021526315789473685, "loss": 1.2931, "step": 1491 }, { "epoch": 3.921104536489152, "high_lr": 0.00021526315789473685, "low_lr": 4.305263157894737e-06, "step": 1491 }, { "epoch": 3.921104536489152, "high_lr": 0.00021526315789473685, "low_lr": 4.305263157894737e-06, "step": 1491 }, { "epoch": 3.921104536489152, "high_lr": 0.00021526315789473685, "low_lr": 4.305263157894737e-06, "step": 1491 }, { "epoch": 3.921104536489152, "high_lr": 0.00021526315789473685, "low_lr": 4.305263157894737e-06, "step": 1491 }, { "epoch": 3.921104536489152, "high_lr": 0.00021526315789473685, "low_lr": 4.305263157894737e-06, "step": 1491 }, { "epoch": 3.921104536489152, "high_lr": 0.00021526315789473685, "low_lr": 4.305263157894737e-06, "step": 1491 }, { "epoch": 3.921104536489152, "high_lr": 0.00021526315789473685, "low_lr": 4.305263157894737e-06, "step": 1491 }, { "epoch": 3.921104536489152, "high_lr": 0.00021526315789473685, "low_lr": 4.305263157894737e-06, "step": 1491 }, { "epoch": 3.923734385272847, "grad_norm": 1.3892029523849487, "learning_rate": 0.00021473684210526316, "loss": 1.2784, "step": 1492 }, { "epoch": 3.923734385272847, "high_lr": 0.00021473684210526316, "low_lr": 4.2947368421052635e-06, "step": 1492 }, { "epoch": 3.923734385272847, "high_lr": 0.00021473684210526316, "low_lr": 4.2947368421052635e-06, "step": 1492 }, { "epoch": 3.923734385272847, "high_lr": 0.00021473684210526316, "low_lr": 4.2947368421052635e-06, "step": 1492 }, { "epoch": 3.923734385272847, "high_lr": 0.00021473684210526316, "low_lr": 4.2947368421052635e-06, "step": 1492 }, { "epoch": 3.923734385272847, "high_lr": 0.00021473684210526316, "low_lr": 4.2947368421052635e-06, "step": 1492 }, { "epoch": 3.923734385272847, "high_lr": 0.00021473684210526316, "low_lr": 4.2947368421052635e-06, "step": 1492 }, { "epoch": 3.923734385272847, "high_lr": 0.00021473684210526316, "low_lr": 4.2947368421052635e-06, "step": 1492 }, { "epoch": 3.923734385272847, "high_lr": 0.00021473684210526316, "low_lr": 4.2947368421052635e-06, "step": 1492 }, { "epoch": 3.9263642340565417, "grad_norm": 1.3729498386383057, "learning_rate": 0.00021421052631578948, "loss": 1.2919, "step": 1493 }, { "epoch": 3.9263642340565417, "high_lr": 0.00021421052631578948, "low_lr": 4.28421052631579e-06, "step": 1493 }, { "epoch": 3.9263642340565417, "high_lr": 0.00021421052631578948, "low_lr": 4.28421052631579e-06, "step": 1493 }, { "epoch": 3.9263642340565417, "high_lr": 0.00021421052631578948, "low_lr": 4.28421052631579e-06, "step": 1493 }, { "epoch": 3.9263642340565417, "high_lr": 0.00021421052631578948, "low_lr": 4.28421052631579e-06, "step": 1493 }, { "epoch": 3.9263642340565417, "high_lr": 0.00021421052631578948, "low_lr": 4.28421052631579e-06, "step": 1493 }, { "epoch": 3.9263642340565417, "high_lr": 0.00021421052631578948, "low_lr": 4.28421052631579e-06, "step": 1493 }, { "epoch": 3.9263642340565417, "high_lr": 0.00021421052631578948, "low_lr": 4.28421052631579e-06, "step": 1493 }, { "epoch": 3.9263642340565417, "high_lr": 0.00021421052631578948, "low_lr": 4.28421052631579e-06, "step": 1493 }, { "epoch": 3.9289940828402368, "grad_norm": 1.4095979928970337, "learning_rate": 0.0002136842105263158, "loss": 1.3006, "step": 1494 }, { "epoch": 3.9289940828402368, "high_lr": 0.0002136842105263158, "low_lr": 4.273684210526316e-06, "step": 1494 }, { "epoch": 3.9289940828402368, "high_lr": 0.0002136842105263158, "low_lr": 4.273684210526316e-06, "step": 1494 }, { "epoch": 3.9289940828402368, "high_lr": 0.0002136842105263158, "low_lr": 4.273684210526316e-06, "step": 1494 }, { "epoch": 3.9289940828402368, "high_lr": 0.0002136842105263158, "low_lr": 4.273684210526316e-06, "step": 1494 }, { "epoch": 3.9289940828402368, "high_lr": 0.0002136842105263158, "low_lr": 4.273684210526316e-06, "step": 1494 }, { "epoch": 3.9289940828402368, "high_lr": 0.0002136842105263158, "low_lr": 4.273684210526316e-06, "step": 1494 }, { "epoch": 3.9289940828402368, "high_lr": 0.0002136842105263158, "low_lr": 4.273684210526316e-06, "step": 1494 }, { "epoch": 3.9289940828402368, "high_lr": 0.0002136842105263158, "low_lr": 4.273684210526316e-06, "step": 1494 }, { "epoch": 3.931623931623932, "grad_norm": 1.605855107307434, "learning_rate": 0.0002131578947368421, "loss": 1.2677, "step": 1495 }, { "epoch": 3.931623931623932, "high_lr": 0.0002131578947368421, "low_lr": 4.2631578947368425e-06, "step": 1495 }, { "epoch": 3.931623931623932, "high_lr": 0.0002131578947368421, "low_lr": 4.2631578947368425e-06, "step": 1495 }, { "epoch": 3.931623931623932, "high_lr": 0.0002131578947368421, "low_lr": 4.2631578947368425e-06, "step": 1495 }, { "epoch": 3.931623931623932, "high_lr": 0.0002131578947368421, "low_lr": 4.2631578947368425e-06, "step": 1495 }, { "epoch": 3.931623931623932, "high_lr": 0.0002131578947368421, "low_lr": 4.2631578947368425e-06, "step": 1495 }, { "epoch": 3.931623931623932, "high_lr": 0.0002131578947368421, "low_lr": 4.2631578947368425e-06, "step": 1495 }, { "epoch": 3.931623931623932, "high_lr": 0.0002131578947368421, "low_lr": 4.2631578947368425e-06, "step": 1495 }, { "epoch": 3.931623931623932, "high_lr": 0.0002131578947368421, "low_lr": 4.2631578947368425e-06, "step": 1495 }, { "epoch": 3.9342537804076265, "grad_norm": 1.4077523946762085, "learning_rate": 0.00021263157894736842, "loss": 1.2999, "step": 1496 }, { "epoch": 3.9342537804076265, "high_lr": 0.00021263157894736842, "low_lr": 4.252631578947369e-06, "step": 1496 }, { "epoch": 3.9342537804076265, "high_lr": 0.00021263157894736842, "low_lr": 4.252631578947369e-06, "step": 1496 }, { "epoch": 3.9342537804076265, "high_lr": 0.00021263157894736842, "low_lr": 4.252631578947369e-06, "step": 1496 }, { "epoch": 3.9342537804076265, "high_lr": 0.00021263157894736842, "low_lr": 4.252631578947369e-06, "step": 1496 }, { "epoch": 3.9342537804076265, "high_lr": 0.00021263157894736842, "low_lr": 4.252631578947369e-06, "step": 1496 }, { "epoch": 3.9342537804076265, "high_lr": 0.00021263157894736842, "low_lr": 4.252631578947369e-06, "step": 1496 }, { "epoch": 3.9342537804076265, "high_lr": 0.00021263157894736842, "low_lr": 4.252631578947369e-06, "step": 1496 }, { "epoch": 3.9342537804076265, "high_lr": 0.00021263157894736842, "low_lr": 4.252631578947369e-06, "step": 1496 }, { "epoch": 3.9368836291913216, "grad_norm": 1.4664191007614136, "learning_rate": 0.00021210526315789476, "loss": 1.3139, "step": 1497 }, { "epoch": 3.9368836291913216, "high_lr": 0.00021210526315789476, "low_lr": 4.242105263157895e-06, "step": 1497 }, { "epoch": 3.9368836291913216, "high_lr": 0.00021210526315789476, "low_lr": 4.242105263157895e-06, "step": 1497 }, { "epoch": 3.9368836291913216, "high_lr": 0.00021210526315789476, "low_lr": 4.242105263157895e-06, "step": 1497 }, { "epoch": 3.9368836291913216, "high_lr": 0.00021210526315789476, "low_lr": 4.242105263157895e-06, "step": 1497 }, { "epoch": 3.9368836291913216, "high_lr": 0.00021210526315789476, "low_lr": 4.242105263157895e-06, "step": 1497 }, { "epoch": 3.9368836291913216, "high_lr": 0.00021210526315789476, "low_lr": 4.242105263157895e-06, "step": 1497 }, { "epoch": 3.9368836291913216, "high_lr": 0.00021210526315789476, "low_lr": 4.242105263157895e-06, "step": 1497 }, { "epoch": 3.9368836291913216, "high_lr": 0.00021210526315789476, "low_lr": 4.242105263157895e-06, "step": 1497 }, { "epoch": 3.9395134779750163, "grad_norm": 1.480134129524231, "learning_rate": 0.00021157894736842107, "loss": 1.2808, "step": 1498 }, { "epoch": 3.9395134779750163, "high_lr": 0.00021157894736842107, "low_lr": 4.2315789473684215e-06, "step": 1498 }, { "epoch": 3.9395134779750163, "high_lr": 0.00021157894736842107, "low_lr": 4.2315789473684215e-06, "step": 1498 }, { "epoch": 3.9395134779750163, "high_lr": 0.00021157894736842107, "low_lr": 4.2315789473684215e-06, "step": 1498 }, { "epoch": 3.9395134779750163, "high_lr": 0.00021157894736842107, "low_lr": 4.2315789473684215e-06, "step": 1498 }, { "epoch": 3.9395134779750163, "high_lr": 0.00021157894736842107, "low_lr": 4.2315789473684215e-06, "step": 1498 }, { "epoch": 3.9395134779750163, "high_lr": 0.00021157894736842107, "low_lr": 4.2315789473684215e-06, "step": 1498 }, { "epoch": 3.9395134779750163, "high_lr": 0.00021157894736842107, "low_lr": 4.2315789473684215e-06, "step": 1498 }, { "epoch": 3.9395134779750163, "high_lr": 0.00021157894736842107, "low_lr": 4.2315789473684215e-06, "step": 1498 }, { "epoch": 3.9421433267587114, "grad_norm": 1.5324522256851196, "learning_rate": 0.00021105263157894738, "loss": 1.3113, "step": 1499 }, { "epoch": 3.9421433267587114, "high_lr": 0.00021105263157894738, "low_lr": 4.221052631578948e-06, "step": 1499 }, { "epoch": 3.9421433267587114, "high_lr": 0.00021105263157894738, "low_lr": 4.221052631578948e-06, "step": 1499 }, { "epoch": 3.9421433267587114, "high_lr": 0.00021105263157894738, "low_lr": 4.221052631578948e-06, "step": 1499 }, { "epoch": 3.9421433267587114, "high_lr": 0.00021105263157894738, "low_lr": 4.221052631578948e-06, "step": 1499 }, { "epoch": 3.9421433267587114, "high_lr": 0.00021105263157894738, "low_lr": 4.221052631578948e-06, "step": 1499 }, { "epoch": 3.9421433267587114, "high_lr": 0.00021105263157894738, "low_lr": 4.221052631578948e-06, "step": 1499 }, { "epoch": 3.9421433267587114, "high_lr": 0.00021105263157894738, "low_lr": 4.221052631578948e-06, "step": 1499 }, { "epoch": 3.9421433267587114, "high_lr": 0.00021105263157894738, "low_lr": 4.221052631578948e-06, "step": 1499 }, { "epoch": 3.9447731755424065, "grad_norm": 1.5184235572814941, "learning_rate": 0.00021052631578947367, "loss": 1.2854, "step": 1500 }, { "epoch": 3.9447731755424065, "high_lr": 0.00021052631578947367, "low_lr": 4.210526315789474e-06, "step": 1500 }, { "epoch": 3.9447731755424065, "high_lr": 0.00021052631578947367, "low_lr": 4.210526315789474e-06, "step": 1500 }, { "epoch": 3.9447731755424065, "high_lr": 0.00021052631578947367, "low_lr": 4.210526315789474e-06, "step": 1500 }, { "epoch": 3.9447731755424065, "high_lr": 0.00021052631578947367, "low_lr": 4.210526315789474e-06, "step": 1500 }, { "epoch": 3.9447731755424065, "high_lr": 0.00021052631578947367, "low_lr": 4.210526315789474e-06, "step": 1500 }, { "epoch": 3.9447731755424065, "high_lr": 0.00021052631578947367, "low_lr": 4.210526315789474e-06, "step": 1500 }, { "epoch": 3.9447731755424065, "high_lr": 0.00021052631578947367, "low_lr": 4.210526315789474e-06, "step": 1500 }, { "epoch": 3.9447731755424065, "high_lr": 0.00021052631578947367, "low_lr": 4.210526315789474e-06, "step": 1500 }, { "epoch": 3.947403024326101, "grad_norm": 1.3334966897964478, "learning_rate": 0.00021, "loss": 1.3304, "step": 1501 }, { "epoch": 3.947403024326101, "high_lr": 0.00021, "low_lr": 4.2000000000000004e-06, "step": 1501 }, { "epoch": 3.947403024326101, "high_lr": 0.00021, "low_lr": 4.2000000000000004e-06, "step": 1501 }, { "epoch": 3.947403024326101, "high_lr": 0.00021, "low_lr": 4.2000000000000004e-06, "step": 1501 }, { "epoch": 3.947403024326101, "high_lr": 0.00021, "low_lr": 4.2000000000000004e-06, "step": 1501 }, { "epoch": 3.947403024326101, "high_lr": 0.00021, "low_lr": 4.2000000000000004e-06, "step": 1501 }, { "epoch": 3.947403024326101, "high_lr": 0.00021, "low_lr": 4.2000000000000004e-06, "step": 1501 }, { "epoch": 3.947403024326101, "high_lr": 0.00021, "low_lr": 4.2000000000000004e-06, "step": 1501 }, { "epoch": 3.947403024326101, "high_lr": 0.00021, "low_lr": 4.2000000000000004e-06, "step": 1501 }, { "epoch": 3.9500328731097962, "grad_norm": 1.3624662160873413, "learning_rate": 0.00020947368421052632, "loss": 1.2916, "step": 1502 }, { "epoch": 3.9500328731097962, "high_lr": 0.00020947368421052632, "low_lr": 4.189473684210527e-06, "step": 1502 }, { "epoch": 3.9500328731097962, "high_lr": 0.00020947368421052632, "low_lr": 4.189473684210527e-06, "step": 1502 }, { "epoch": 3.9500328731097962, "high_lr": 0.00020947368421052632, "low_lr": 4.189473684210527e-06, "step": 1502 }, { "epoch": 3.9500328731097962, "high_lr": 0.00020947368421052632, "low_lr": 4.189473684210527e-06, "step": 1502 }, { "epoch": 3.9500328731097962, "high_lr": 0.00020947368421052632, "low_lr": 4.189473684210527e-06, "step": 1502 }, { "epoch": 3.9500328731097962, "high_lr": 0.00020947368421052632, "low_lr": 4.189473684210527e-06, "step": 1502 }, { "epoch": 3.9500328731097962, "high_lr": 0.00020947368421052632, "low_lr": 4.189473684210527e-06, "step": 1502 }, { "epoch": 3.9500328731097962, "high_lr": 0.00020947368421052632, "low_lr": 4.189473684210527e-06, "step": 1502 }, { "epoch": 3.952662721893491, "grad_norm": 1.3479913473129272, "learning_rate": 0.00020894736842105263, "loss": 1.2774, "step": 1503 }, { "epoch": 3.952662721893491, "high_lr": 0.00020894736842105263, "low_lr": 4.178947368421053e-06, "step": 1503 }, { "epoch": 3.952662721893491, "high_lr": 0.00020894736842105263, "low_lr": 4.178947368421053e-06, "step": 1503 }, { "epoch": 3.952662721893491, "high_lr": 0.00020894736842105263, "low_lr": 4.178947368421053e-06, "step": 1503 }, { "epoch": 3.952662721893491, "high_lr": 0.00020894736842105263, "low_lr": 4.178947368421053e-06, "step": 1503 }, { "epoch": 3.952662721893491, "high_lr": 0.00020894736842105263, "low_lr": 4.178947368421053e-06, "step": 1503 }, { "epoch": 3.952662721893491, "high_lr": 0.00020894736842105263, "low_lr": 4.178947368421053e-06, "step": 1503 }, { "epoch": 3.952662721893491, "high_lr": 0.00020894736842105263, "low_lr": 4.178947368421053e-06, "step": 1503 }, { "epoch": 3.952662721893491, "high_lr": 0.00020894736842105263, "low_lr": 4.178947368421053e-06, "step": 1503 }, { "epoch": 3.955292570677186, "grad_norm": 1.4971421957015991, "learning_rate": 0.00020842105263157895, "loss": 1.3091, "step": 1504 }, { "epoch": 3.955292570677186, "high_lr": 0.00020842105263157895, "low_lr": 4.1684210526315794e-06, "step": 1504 }, { "epoch": 3.955292570677186, "high_lr": 0.00020842105263157895, "low_lr": 4.1684210526315794e-06, "step": 1504 }, { "epoch": 3.955292570677186, "high_lr": 0.00020842105263157895, "low_lr": 4.1684210526315794e-06, "step": 1504 }, { "epoch": 3.955292570677186, "high_lr": 0.00020842105263157895, "low_lr": 4.1684210526315794e-06, "step": 1504 }, { "epoch": 3.955292570677186, "high_lr": 0.00020842105263157895, "low_lr": 4.1684210526315794e-06, "step": 1504 }, { "epoch": 3.955292570677186, "high_lr": 0.00020842105263157895, "low_lr": 4.1684210526315794e-06, "step": 1504 }, { "epoch": 3.955292570677186, "high_lr": 0.00020842105263157895, "low_lr": 4.1684210526315794e-06, "step": 1504 }, { "epoch": 3.955292570677186, "high_lr": 0.00020842105263157895, "low_lr": 4.1684210526315794e-06, "step": 1504 }, { "epoch": 3.957922419460881, "grad_norm": 1.4270907640457153, "learning_rate": 0.0002078947368421053, "loss": 1.2585, "step": 1505 }, { "epoch": 3.957922419460881, "high_lr": 0.0002078947368421053, "low_lr": 4.157894736842106e-06, "step": 1505 }, { "epoch": 3.957922419460881, "high_lr": 0.0002078947368421053, "low_lr": 4.157894736842106e-06, "step": 1505 }, { "epoch": 3.957922419460881, "high_lr": 0.0002078947368421053, "low_lr": 4.157894736842106e-06, "step": 1505 }, { "epoch": 3.957922419460881, "high_lr": 0.0002078947368421053, "low_lr": 4.157894736842106e-06, "step": 1505 }, { "epoch": 3.957922419460881, "high_lr": 0.0002078947368421053, "low_lr": 4.157894736842106e-06, "step": 1505 }, { "epoch": 3.957922419460881, "high_lr": 0.0002078947368421053, "low_lr": 4.157894736842106e-06, "step": 1505 }, { "epoch": 3.957922419460881, "high_lr": 0.0002078947368421053, "low_lr": 4.157894736842106e-06, "step": 1505 }, { "epoch": 3.957922419460881, "high_lr": 0.0002078947368421053, "low_lr": 4.157894736842106e-06, "step": 1505 }, { "epoch": 3.9605522682445757, "grad_norm": 1.5448126792907715, "learning_rate": 0.0002073684210526316, "loss": 1.2704, "step": 1506 }, { "epoch": 3.9605522682445757, "high_lr": 0.0002073684210526316, "low_lr": 4.147368421052632e-06, "step": 1506 }, { "epoch": 3.9605522682445757, "high_lr": 0.0002073684210526316, "low_lr": 4.147368421052632e-06, "step": 1506 }, { "epoch": 3.9605522682445757, "high_lr": 0.0002073684210526316, "low_lr": 4.147368421052632e-06, "step": 1506 }, { "epoch": 3.9605522682445757, "high_lr": 0.0002073684210526316, "low_lr": 4.147368421052632e-06, "step": 1506 }, { "epoch": 3.9605522682445757, "high_lr": 0.0002073684210526316, "low_lr": 4.147368421052632e-06, "step": 1506 }, { "epoch": 3.9605522682445757, "high_lr": 0.0002073684210526316, "low_lr": 4.147368421052632e-06, "step": 1506 }, { "epoch": 3.9605522682445757, "high_lr": 0.0002073684210526316, "low_lr": 4.147368421052632e-06, "step": 1506 }, { "epoch": 3.9605522682445757, "high_lr": 0.0002073684210526316, "low_lr": 4.147368421052632e-06, "step": 1506 }, { "epoch": 3.963182117028271, "grad_norm": 1.743727684020996, "learning_rate": 0.0002068421052631579, "loss": 1.3103, "step": 1507 }, { "epoch": 3.963182117028271, "high_lr": 0.0002068421052631579, "low_lr": 4.136842105263158e-06, "step": 1507 }, { "epoch": 3.963182117028271, "high_lr": 0.0002068421052631579, "low_lr": 4.136842105263158e-06, "step": 1507 }, { "epoch": 3.963182117028271, "high_lr": 0.0002068421052631579, "low_lr": 4.136842105263158e-06, "step": 1507 }, { "epoch": 3.963182117028271, "high_lr": 0.0002068421052631579, "low_lr": 4.136842105263158e-06, "step": 1507 }, { "epoch": 3.963182117028271, "high_lr": 0.0002068421052631579, "low_lr": 4.136842105263158e-06, "step": 1507 }, { "epoch": 3.963182117028271, "high_lr": 0.0002068421052631579, "low_lr": 4.136842105263158e-06, "step": 1507 }, { "epoch": 3.963182117028271, "high_lr": 0.0002068421052631579, "low_lr": 4.136842105263158e-06, "step": 1507 }, { "epoch": 3.963182117028271, "high_lr": 0.0002068421052631579, "low_lr": 4.136842105263158e-06, "step": 1507 }, { "epoch": 3.965811965811966, "grad_norm": 1.3927522897720337, "learning_rate": 0.0002063157894736842, "loss": 1.3128, "step": 1508 }, { "epoch": 3.965811965811966, "high_lr": 0.0002063157894736842, "low_lr": 4.126315789473685e-06, "step": 1508 }, { "epoch": 3.965811965811966, "high_lr": 0.0002063157894736842, "low_lr": 4.126315789473685e-06, "step": 1508 }, { "epoch": 3.965811965811966, "high_lr": 0.0002063157894736842, "low_lr": 4.126315789473685e-06, "step": 1508 }, { "epoch": 3.965811965811966, "high_lr": 0.0002063157894736842, "low_lr": 4.126315789473685e-06, "step": 1508 }, { "epoch": 3.965811965811966, "high_lr": 0.0002063157894736842, "low_lr": 4.126315789473685e-06, "step": 1508 }, { "epoch": 3.965811965811966, "high_lr": 0.0002063157894736842, "low_lr": 4.126315789473685e-06, "step": 1508 }, { "epoch": 3.965811965811966, "high_lr": 0.0002063157894736842, "low_lr": 4.126315789473685e-06, "step": 1508 }, { "epoch": 3.965811965811966, "high_lr": 0.0002063157894736842, "low_lr": 4.126315789473685e-06, "step": 1508 }, { "epoch": 3.9684418145956606, "grad_norm": 1.514550805091858, "learning_rate": 0.00020578947368421054, "loss": 1.2983, "step": 1509 }, { "epoch": 3.9684418145956606, "high_lr": 0.00020578947368421054, "low_lr": 4.115789473684211e-06, "step": 1509 }, { "epoch": 3.9684418145956606, "high_lr": 0.00020578947368421054, "low_lr": 4.115789473684211e-06, "step": 1509 }, { "epoch": 3.9684418145956606, "high_lr": 0.00020578947368421054, "low_lr": 4.115789473684211e-06, "step": 1509 }, { "epoch": 3.9684418145956606, "high_lr": 0.00020578947368421054, "low_lr": 4.115789473684211e-06, "step": 1509 }, { "epoch": 3.9684418145956606, "high_lr": 0.00020578947368421054, "low_lr": 4.115789473684211e-06, "step": 1509 }, { "epoch": 3.9684418145956606, "high_lr": 0.00020578947368421054, "low_lr": 4.115789473684211e-06, "step": 1509 }, { "epoch": 3.9684418145956606, "high_lr": 0.00020578947368421054, "low_lr": 4.115789473684211e-06, "step": 1509 }, { "epoch": 3.9684418145956606, "high_lr": 0.00020578947368421054, "low_lr": 4.115789473684211e-06, "step": 1509 }, { "epoch": 3.9710716633793557, "grad_norm": 1.374853253364563, "learning_rate": 0.00020526315789473685, "loss": 1.2859, "step": 1510 }, { "epoch": 3.9710716633793557, "high_lr": 0.00020526315789473685, "low_lr": 4.105263157894737e-06, "step": 1510 }, { "epoch": 3.9710716633793557, "high_lr": 0.00020526315789473685, "low_lr": 4.105263157894737e-06, "step": 1510 }, { "epoch": 3.9710716633793557, "high_lr": 0.00020526315789473685, "low_lr": 4.105263157894737e-06, "step": 1510 }, { "epoch": 3.9710716633793557, "high_lr": 0.00020526315789473685, "low_lr": 4.105263157894737e-06, "step": 1510 }, { "epoch": 3.9710716633793557, "high_lr": 0.00020526315789473685, "low_lr": 4.105263157894737e-06, "step": 1510 }, { "epoch": 3.9710716633793557, "high_lr": 0.00020526315789473685, "low_lr": 4.105263157894737e-06, "step": 1510 }, { "epoch": 3.9710716633793557, "high_lr": 0.00020526315789473685, "low_lr": 4.105263157894737e-06, "step": 1510 }, { "epoch": 3.9710716633793557, "high_lr": 0.00020526315789473685, "low_lr": 4.105263157894737e-06, "step": 1510 }, { "epoch": 3.973701512163051, "grad_norm": 1.4939712285995483, "learning_rate": 0.00020473684210526317, "loss": 1.2691, "step": 1511 }, { "epoch": 3.973701512163051, "high_lr": 0.00020473684210526317, "low_lr": 4.094736842105264e-06, "step": 1511 }, { "epoch": 3.973701512163051, "high_lr": 0.00020473684210526317, "low_lr": 4.094736842105264e-06, "step": 1511 }, { "epoch": 3.973701512163051, "high_lr": 0.00020473684210526317, "low_lr": 4.094736842105264e-06, "step": 1511 }, { "epoch": 3.973701512163051, "high_lr": 0.00020473684210526317, "low_lr": 4.094736842105264e-06, "step": 1511 }, { "epoch": 3.973701512163051, "high_lr": 0.00020473684210526317, "low_lr": 4.094736842105264e-06, "step": 1511 }, { "epoch": 3.973701512163051, "high_lr": 0.00020473684210526317, "low_lr": 4.094736842105264e-06, "step": 1511 }, { "epoch": 3.973701512163051, "high_lr": 0.00020473684210526317, "low_lr": 4.094736842105264e-06, "step": 1511 }, { "epoch": 3.973701512163051, "high_lr": 0.00020473684210526317, "low_lr": 4.094736842105264e-06, "step": 1511 }, { "epoch": 3.9763313609467454, "grad_norm": 1.342739224433899, "learning_rate": 0.00020421052631578948, "loss": 1.2671, "step": 1512 }, { "epoch": 3.9763313609467454, "high_lr": 0.00020421052631578948, "low_lr": 4.08421052631579e-06, "step": 1512 }, { "epoch": 3.9763313609467454, "high_lr": 0.00020421052631578948, "low_lr": 4.08421052631579e-06, "step": 1512 }, { "epoch": 3.9763313609467454, "high_lr": 0.00020421052631578948, "low_lr": 4.08421052631579e-06, "step": 1512 }, { "epoch": 3.9763313609467454, "high_lr": 0.00020421052631578948, "low_lr": 4.08421052631579e-06, "step": 1512 }, { "epoch": 3.9763313609467454, "high_lr": 0.00020421052631578948, "low_lr": 4.08421052631579e-06, "step": 1512 }, { "epoch": 3.9763313609467454, "high_lr": 0.00020421052631578948, "low_lr": 4.08421052631579e-06, "step": 1512 }, { "epoch": 3.9763313609467454, "high_lr": 0.00020421052631578948, "low_lr": 4.08421052631579e-06, "step": 1512 }, { "epoch": 3.9763313609467454, "high_lr": 0.00020421052631578948, "low_lr": 4.08421052631579e-06, "step": 1512 }, { "epoch": 3.9789612097304405, "grad_norm": 1.4210036993026733, "learning_rate": 0.00020368421052631582, "loss": 1.2986, "step": 1513 }, { "epoch": 3.9789612097304405, "high_lr": 0.00020368421052631582, "low_lr": 4.073684210526316e-06, "step": 1513 }, { "epoch": 3.9789612097304405, "high_lr": 0.00020368421052631582, "low_lr": 4.073684210526316e-06, "step": 1513 }, { "epoch": 3.9789612097304405, "high_lr": 0.00020368421052631582, "low_lr": 4.073684210526316e-06, "step": 1513 }, { "epoch": 3.9789612097304405, "high_lr": 0.00020368421052631582, "low_lr": 4.073684210526316e-06, "step": 1513 }, { "epoch": 3.9789612097304405, "high_lr": 0.00020368421052631582, "low_lr": 4.073684210526316e-06, "step": 1513 }, { "epoch": 3.9789612097304405, "high_lr": 0.00020368421052631582, "low_lr": 4.073684210526316e-06, "step": 1513 }, { "epoch": 3.9789612097304405, "high_lr": 0.00020368421052631582, "low_lr": 4.073684210526316e-06, "step": 1513 }, { "epoch": 3.9789612097304405, "high_lr": 0.00020368421052631582, "low_lr": 4.073684210526316e-06, "step": 1513 }, { "epoch": 3.9815910585141356, "grad_norm": 1.3917219638824463, "learning_rate": 0.0002031578947368421, "loss": 1.2982, "step": 1514 }, { "epoch": 3.9815910585141356, "high_lr": 0.0002031578947368421, "low_lr": 4.063157894736842e-06, "step": 1514 }, { "epoch": 3.9815910585141356, "high_lr": 0.0002031578947368421, "low_lr": 4.063157894736842e-06, "step": 1514 }, { "epoch": 3.9815910585141356, "high_lr": 0.0002031578947368421, "low_lr": 4.063157894736842e-06, "step": 1514 }, { "epoch": 3.9815910585141356, "high_lr": 0.0002031578947368421, "low_lr": 4.063157894736842e-06, "step": 1514 }, { "epoch": 3.9815910585141356, "high_lr": 0.0002031578947368421, "low_lr": 4.063157894736842e-06, "step": 1514 }, { "epoch": 3.9815910585141356, "high_lr": 0.0002031578947368421, "low_lr": 4.063157894736842e-06, "step": 1514 }, { "epoch": 3.9815910585141356, "high_lr": 0.0002031578947368421, "low_lr": 4.063157894736842e-06, "step": 1514 }, { "epoch": 3.9815910585141356, "high_lr": 0.0002031578947368421, "low_lr": 4.063157894736842e-06, "step": 1514 }, { "epoch": 3.9842209072978303, "grad_norm": 1.4617544412612915, "learning_rate": 0.00020263157894736842, "loss": 1.2933, "step": 1515 }, { "epoch": 3.9842209072978303, "high_lr": 0.00020263157894736842, "low_lr": 4.052631578947368e-06, "step": 1515 }, { "epoch": 3.9842209072978303, "high_lr": 0.00020263157894736842, "low_lr": 4.052631578947368e-06, "step": 1515 }, { "epoch": 3.9842209072978303, "high_lr": 0.00020263157894736842, "low_lr": 4.052631578947368e-06, "step": 1515 }, { "epoch": 3.9842209072978303, "high_lr": 0.00020263157894736842, "low_lr": 4.052631578947368e-06, "step": 1515 }, { "epoch": 3.9842209072978303, "high_lr": 0.00020263157894736842, "low_lr": 4.052631578947368e-06, "step": 1515 }, { "epoch": 3.9842209072978303, "high_lr": 0.00020263157894736842, "low_lr": 4.052631578947368e-06, "step": 1515 }, { "epoch": 3.9842209072978303, "high_lr": 0.00020263157894736842, "low_lr": 4.052631578947368e-06, "step": 1515 }, { "epoch": 3.9842209072978303, "high_lr": 0.00020263157894736842, "low_lr": 4.052631578947368e-06, "step": 1515 }, { "epoch": 3.9868507560815254, "grad_norm": 1.5116146802902222, "learning_rate": 0.00020210526315789473, "loss": 1.2887, "step": 1516 }, { "epoch": 3.9868507560815254, "high_lr": 0.00020210526315789473, "low_lr": 4.042105263157895e-06, "step": 1516 }, { "epoch": 3.9868507560815254, "high_lr": 0.00020210526315789473, "low_lr": 4.042105263157895e-06, "step": 1516 }, { "epoch": 3.9868507560815254, "high_lr": 0.00020210526315789473, "low_lr": 4.042105263157895e-06, "step": 1516 }, { "epoch": 3.9868507560815254, "high_lr": 0.00020210526315789473, "low_lr": 4.042105263157895e-06, "step": 1516 }, { "epoch": 3.9868507560815254, "high_lr": 0.00020210526315789473, "low_lr": 4.042105263157895e-06, "step": 1516 }, { "epoch": 3.9868507560815254, "high_lr": 0.00020210526315789473, "low_lr": 4.042105263157895e-06, "step": 1516 }, { "epoch": 3.9868507560815254, "high_lr": 0.00020210526315789473, "low_lr": 4.042105263157895e-06, "step": 1516 }, { "epoch": 3.9868507560815254, "high_lr": 0.00020210526315789473, "low_lr": 4.042105263157895e-06, "step": 1516 }, { "epoch": 3.9894806048652205, "grad_norm": 1.4623873233795166, "learning_rate": 0.00020157894736842104, "loss": 1.3125, "step": 1517 }, { "epoch": 3.9894806048652205, "high_lr": 0.00020157894736842104, "low_lr": 4.031578947368422e-06, "step": 1517 }, { "epoch": 3.9894806048652205, "high_lr": 0.00020157894736842104, "low_lr": 4.031578947368422e-06, "step": 1517 }, { "epoch": 3.9894806048652205, "high_lr": 0.00020157894736842104, "low_lr": 4.031578947368422e-06, "step": 1517 }, { "epoch": 3.9894806048652205, "high_lr": 0.00020157894736842104, "low_lr": 4.031578947368422e-06, "step": 1517 }, { "epoch": 3.9894806048652205, "high_lr": 0.00020157894736842104, "low_lr": 4.031578947368422e-06, "step": 1517 }, { "epoch": 3.9894806048652205, "high_lr": 0.00020157894736842104, "low_lr": 4.031578947368422e-06, "step": 1517 }, { "epoch": 3.9894806048652205, "high_lr": 0.00020157894736842104, "low_lr": 4.031578947368422e-06, "step": 1517 }, { "epoch": 3.9894806048652205, "high_lr": 0.00020157894736842104, "low_lr": 4.031578947368422e-06, "step": 1517 }, { "epoch": 3.992110453648915, "grad_norm": 1.4037643671035767, "learning_rate": 0.00020105263157894738, "loss": 1.3055, "step": 1518 }, { "epoch": 3.992110453648915, "high_lr": 0.00020105263157894738, "low_lr": 4.021052631578948e-06, "step": 1518 }, { "epoch": 3.992110453648915, "high_lr": 0.00020105263157894738, "low_lr": 4.021052631578948e-06, "step": 1518 }, { "epoch": 3.992110453648915, "high_lr": 0.00020105263157894738, "low_lr": 4.021052631578948e-06, "step": 1518 }, { "epoch": 3.992110453648915, "high_lr": 0.00020105263157894738, "low_lr": 4.021052631578948e-06, "step": 1518 }, { "epoch": 3.992110453648915, "high_lr": 0.00020105263157894738, "low_lr": 4.021052631578948e-06, "step": 1518 }, { "epoch": 3.992110453648915, "high_lr": 0.00020105263157894738, "low_lr": 4.021052631578948e-06, "step": 1518 }, { "epoch": 3.992110453648915, "high_lr": 0.00020105263157894738, "low_lr": 4.021052631578948e-06, "step": 1518 }, { "epoch": 3.992110453648915, "high_lr": 0.00020105263157894738, "low_lr": 4.021052631578948e-06, "step": 1518 }, { "epoch": 3.9947403024326102, "grad_norm": 1.492454171180725, "learning_rate": 0.0002005263157894737, "loss": 1.3008, "step": 1519 }, { "epoch": 3.9947403024326102, "high_lr": 0.0002005263157894737, "low_lr": 4.010526315789474e-06, "step": 1519 }, { "epoch": 3.9947403024326102, "high_lr": 0.0002005263157894737, "low_lr": 4.010526315789474e-06, "step": 1519 }, { "epoch": 3.9947403024326102, "high_lr": 0.0002005263157894737, "low_lr": 4.010526315789474e-06, "step": 1519 }, { "epoch": 3.9947403024326102, "high_lr": 0.0002005263157894737, "low_lr": 4.010526315789474e-06, "step": 1519 }, { "epoch": 3.9947403024326102, "high_lr": 0.0002005263157894737, "low_lr": 4.010526315789474e-06, "step": 1519 }, { "epoch": 3.9947403024326102, "high_lr": 0.0002005263157894737, "low_lr": 4.010526315789474e-06, "step": 1519 }, { "epoch": 3.9947403024326102, "high_lr": 0.0002005263157894737, "low_lr": 4.010526315789474e-06, "step": 1519 }, { "epoch": 3.9947403024326102, "high_lr": 0.0002005263157894737, "low_lr": 4.010526315789474e-06, "step": 1519 }, { "epoch": 3.997370151216305, "grad_norm": 1.3126296997070312, "learning_rate": 0.0002, "loss": 1.283, "step": 1520 }, { "epoch": 3.997370151216305, "high_lr": 0.0002, "low_lr": 4.000000000000001e-06, "step": 1520 }, { "epoch": 3.997370151216305, "high_lr": 0.0002, "low_lr": 4.000000000000001e-06, "step": 1520 }, { "epoch": 3.997370151216305, "high_lr": 0.0002, "low_lr": 4.000000000000001e-06, "step": 1520 }, { "epoch": 3.997370151216305, "high_lr": 0.0002, "low_lr": 4.000000000000001e-06, "step": 1520 }, { "epoch": 3.997370151216305, "high_lr": 0.0002, "low_lr": 4.000000000000001e-06, "step": 1520 }, { "epoch": 3.997370151216305, "high_lr": 0.0002, "low_lr": 4.000000000000001e-06, "step": 1520 }, { "epoch": 3.997370151216305, "high_lr": 0.0002, "low_lr": 4.000000000000001e-06, "step": 1520 }, { "epoch": 3.997370151216305, "high_lr": 0.0002, "low_lr": 4.000000000000001e-06, "step": 1520 }, { "epoch": 4.0, "grad_norm": 1.4551256895065308, "learning_rate": 0.0001994736842105263, "loss": 1.2644, "step": 1521 }, { "epoch": 4.0, "high_lr": 0.0001994736842105263, "low_lr": 3.989473684210526e-06, "step": 1521 }, { "epoch": 4.0, "high_lr": 0.0001994736842105263, "low_lr": 3.989473684210526e-06, "step": 1521 }, { "epoch": 4.0, "high_lr": 0.0001994736842105263, "low_lr": 3.989473684210526e-06, "step": 1521 }, { "epoch": 4.0, "high_lr": 0.0001994736842105263, "low_lr": 3.989473684210526e-06, "step": 1521 }, { "epoch": 4.0, "high_lr": 0.0001994736842105263, "low_lr": 3.989473684210526e-06, "step": 1521 }, { "epoch": 4.0, "high_lr": 0.0001994736842105263, "low_lr": 3.989473684210526e-06, "step": 1521 }, { "epoch": 4.0, "high_lr": 0.0001994736842105263, "low_lr": 3.989473684210526e-06, "step": 1521 }, { "epoch": 4.0, "high_lr": 0.0001994736842105263, "low_lr": 3.989473684210526e-06, "step": 1521 }, { "epoch": 4.002629848783695, "grad_norm": 1.3417943716049194, "learning_rate": 0.00019894736842105264, "loss": 1.2327, "step": 1522 }, { "epoch": 4.002629848783695, "high_lr": 0.00019894736842105264, "low_lr": 3.9789473684210525e-06, "step": 1522 }, { "epoch": 4.002629848783695, "high_lr": 0.00019894736842105264, "low_lr": 3.9789473684210525e-06, "step": 1522 }, { "epoch": 4.002629848783695, "high_lr": 0.00019894736842105264, "low_lr": 3.9789473684210525e-06, "step": 1522 }, { "epoch": 4.002629848783695, "high_lr": 0.00019894736842105264, "low_lr": 3.9789473684210525e-06, "step": 1522 }, { "epoch": 4.002629848783695, "high_lr": 0.00019894736842105264, "low_lr": 3.9789473684210525e-06, "step": 1522 }, { "epoch": 4.002629848783695, "high_lr": 0.00019894736842105264, "low_lr": 3.9789473684210525e-06, "step": 1522 }, { "epoch": 4.002629848783695, "high_lr": 0.00019894736842105264, "low_lr": 3.9789473684210525e-06, "step": 1522 }, { "epoch": 4.002629848783695, "high_lr": 0.00019894736842105264, "low_lr": 3.9789473684210525e-06, "step": 1522 }, { "epoch": 4.00525969756739, "grad_norm": 1.7795209884643555, "learning_rate": 0.00019842105263157895, "loss": 1.2555, "step": 1523 }, { "epoch": 4.00525969756739, "high_lr": 0.00019842105263157895, "low_lr": 3.968421052631579e-06, "step": 1523 }, { "epoch": 4.00525969756739, "high_lr": 0.00019842105263157895, "low_lr": 3.968421052631579e-06, "step": 1523 }, { "epoch": 4.00525969756739, "high_lr": 0.00019842105263157895, "low_lr": 3.968421052631579e-06, "step": 1523 }, { "epoch": 4.00525969756739, "high_lr": 0.00019842105263157895, "low_lr": 3.968421052631579e-06, "step": 1523 }, { "epoch": 4.00525969756739, "high_lr": 0.00019842105263157895, "low_lr": 3.968421052631579e-06, "step": 1523 }, { "epoch": 4.00525969756739, "high_lr": 0.00019842105263157895, "low_lr": 3.968421052631579e-06, "step": 1523 }, { "epoch": 4.00525969756739, "high_lr": 0.00019842105263157895, "low_lr": 3.968421052631579e-06, "step": 1523 }, { "epoch": 4.00525969756739, "high_lr": 0.00019842105263157895, "low_lr": 3.968421052631579e-06, "step": 1523 }, { "epoch": 4.007889546351085, "grad_norm": 1.3884600400924683, "learning_rate": 0.00019789473684210526, "loss": 1.2375, "step": 1524 }, { "epoch": 4.007889546351085, "high_lr": 0.00019789473684210526, "low_lr": 3.957894736842106e-06, "step": 1524 }, { "epoch": 4.007889546351085, "high_lr": 0.00019789473684210526, "low_lr": 3.957894736842106e-06, "step": 1524 }, { "epoch": 4.007889546351085, "high_lr": 0.00019789473684210526, "low_lr": 3.957894736842106e-06, "step": 1524 }, { "epoch": 4.007889546351085, "high_lr": 0.00019789473684210526, "low_lr": 3.957894736842106e-06, "step": 1524 }, { "epoch": 4.007889546351085, "high_lr": 0.00019789473684210526, "low_lr": 3.957894736842106e-06, "step": 1524 }, { "epoch": 4.007889546351085, "high_lr": 0.00019789473684210526, "low_lr": 3.957894736842106e-06, "step": 1524 }, { "epoch": 4.007889546351085, "high_lr": 0.00019789473684210526, "low_lr": 3.957894736842106e-06, "step": 1524 }, { "epoch": 4.007889546351085, "high_lr": 0.00019789473684210526, "low_lr": 3.957894736842106e-06, "step": 1524 }, { "epoch": 4.0105193951347795, "grad_norm": 1.4155784845352173, "learning_rate": 0.00019736842105263157, "loss": 1.2417, "step": 1525 }, { "epoch": 4.0105193951347795, "high_lr": 0.00019736842105263157, "low_lr": 3.947368421052632e-06, "step": 1525 }, { "epoch": 4.0105193951347795, "high_lr": 0.00019736842105263157, "low_lr": 3.947368421052632e-06, "step": 1525 }, { "epoch": 4.0105193951347795, "high_lr": 0.00019736842105263157, "low_lr": 3.947368421052632e-06, "step": 1525 }, { "epoch": 4.0105193951347795, "high_lr": 0.00019736842105263157, "low_lr": 3.947368421052632e-06, "step": 1525 }, { "epoch": 4.0105193951347795, "high_lr": 0.00019736842105263157, "low_lr": 3.947368421052632e-06, "step": 1525 }, { "epoch": 4.0105193951347795, "high_lr": 0.00019736842105263157, "low_lr": 3.947368421052632e-06, "step": 1525 }, { "epoch": 4.0105193951347795, "high_lr": 0.00019736842105263157, "low_lr": 3.947368421052632e-06, "step": 1525 }, { "epoch": 4.0105193951347795, "high_lr": 0.00019736842105263157, "low_lr": 3.947368421052632e-06, "step": 1525 }, { "epoch": 4.013149243918475, "grad_norm": 1.4495775699615479, "learning_rate": 0.00019684210526315791, "loss": 1.2783, "step": 1526 }, { "epoch": 4.013149243918475, "high_lr": 0.00019684210526315791, "low_lr": 3.936842105263159e-06, "step": 1526 }, { "epoch": 4.013149243918475, "high_lr": 0.00019684210526315791, "low_lr": 3.936842105263159e-06, "step": 1526 }, { "epoch": 4.013149243918475, "high_lr": 0.00019684210526315791, "low_lr": 3.936842105263159e-06, "step": 1526 }, { "epoch": 4.013149243918475, "high_lr": 0.00019684210526315791, "low_lr": 3.936842105263159e-06, "step": 1526 }, { "epoch": 4.013149243918475, "high_lr": 0.00019684210526315791, "low_lr": 3.936842105263159e-06, "step": 1526 }, { "epoch": 4.013149243918475, "high_lr": 0.00019684210526315791, "low_lr": 3.936842105263159e-06, "step": 1526 }, { "epoch": 4.013149243918475, "high_lr": 0.00019684210526315791, "low_lr": 3.936842105263159e-06, "step": 1526 }, { "epoch": 4.013149243918475, "high_lr": 0.00019684210526315791, "low_lr": 3.936842105263159e-06, "step": 1526 }, { "epoch": 4.01577909270217, "grad_norm": 1.4377909898757935, "learning_rate": 0.00019631578947368423, "loss": 1.2653, "step": 1527 }, { "epoch": 4.01577909270217, "high_lr": 0.00019631578947368423, "low_lr": 3.926315789473685e-06, "step": 1527 }, { "epoch": 4.01577909270217, "high_lr": 0.00019631578947368423, "low_lr": 3.926315789473685e-06, "step": 1527 }, { "epoch": 4.01577909270217, "high_lr": 0.00019631578947368423, "low_lr": 3.926315789473685e-06, "step": 1527 }, { "epoch": 4.01577909270217, "high_lr": 0.00019631578947368423, "low_lr": 3.926315789473685e-06, "step": 1527 }, { "epoch": 4.01577909270217, "high_lr": 0.00019631578947368423, "low_lr": 3.926315789473685e-06, "step": 1527 }, { "epoch": 4.01577909270217, "high_lr": 0.00019631578947368423, "low_lr": 3.926315789473685e-06, "step": 1527 }, { "epoch": 4.01577909270217, "high_lr": 0.00019631578947368423, "low_lr": 3.926315789473685e-06, "step": 1527 }, { "epoch": 4.01577909270217, "high_lr": 0.00019631578947368423, "low_lr": 3.926315789473685e-06, "step": 1527 }, { "epoch": 4.018408941485864, "grad_norm": 1.4182953834533691, "learning_rate": 0.0001957894736842105, "loss": 1.2415, "step": 1528 }, { "epoch": 4.018408941485864, "high_lr": 0.0001957894736842105, "low_lr": 3.9157894736842104e-06, "step": 1528 }, { "epoch": 4.018408941485864, "high_lr": 0.0001957894736842105, "low_lr": 3.9157894736842104e-06, "step": 1528 }, { "epoch": 4.018408941485864, "high_lr": 0.0001957894736842105, "low_lr": 3.9157894736842104e-06, "step": 1528 }, { "epoch": 4.018408941485864, "high_lr": 0.0001957894736842105, "low_lr": 3.9157894736842104e-06, "step": 1528 }, { "epoch": 4.018408941485864, "high_lr": 0.0001957894736842105, "low_lr": 3.9157894736842104e-06, "step": 1528 }, { "epoch": 4.018408941485864, "high_lr": 0.0001957894736842105, "low_lr": 3.9157894736842104e-06, "step": 1528 }, { "epoch": 4.018408941485864, "high_lr": 0.0001957894736842105, "low_lr": 3.9157894736842104e-06, "step": 1528 }, { "epoch": 4.018408941485864, "high_lr": 0.0001957894736842105, "low_lr": 3.9157894736842104e-06, "step": 1528 }, { "epoch": 4.02103879026956, "grad_norm": 1.4995777606964111, "learning_rate": 0.00019526315789473683, "loss": 1.2168, "step": 1529 }, { "epoch": 4.02103879026956, "high_lr": 0.00019526315789473683, "low_lr": 3.905263157894737e-06, "step": 1529 }, { "epoch": 4.02103879026956, "high_lr": 0.00019526315789473683, "low_lr": 3.905263157894737e-06, "step": 1529 }, { "epoch": 4.02103879026956, "high_lr": 0.00019526315789473683, "low_lr": 3.905263157894737e-06, "step": 1529 }, { "epoch": 4.02103879026956, "high_lr": 0.00019526315789473683, "low_lr": 3.905263157894737e-06, "step": 1529 }, { "epoch": 4.02103879026956, "high_lr": 0.00019526315789473683, "low_lr": 3.905263157894737e-06, "step": 1529 }, { "epoch": 4.02103879026956, "high_lr": 0.00019526315789473683, "low_lr": 3.905263157894737e-06, "step": 1529 }, { "epoch": 4.02103879026956, "high_lr": 0.00019526315789473683, "low_lr": 3.905263157894737e-06, "step": 1529 }, { "epoch": 4.02103879026956, "high_lr": 0.00019526315789473683, "low_lr": 3.905263157894737e-06, "step": 1529 }, { "epoch": 4.023668639053255, "grad_norm": 1.4290416240692139, "learning_rate": 0.00019473684210526317, "loss": 1.2391, "step": 1530 }, { "epoch": 4.023668639053255, "high_lr": 0.00019473684210526317, "low_lr": 3.894736842105263e-06, "step": 1530 }, { "epoch": 4.023668639053255, "high_lr": 0.00019473684210526317, "low_lr": 3.894736842105263e-06, "step": 1530 }, { "epoch": 4.023668639053255, "high_lr": 0.00019473684210526317, "low_lr": 3.894736842105263e-06, "step": 1530 }, { "epoch": 4.023668639053255, "high_lr": 0.00019473684210526317, "low_lr": 3.894736842105263e-06, "step": 1530 }, { "epoch": 4.023668639053255, "high_lr": 0.00019473684210526317, "low_lr": 3.894736842105263e-06, "step": 1530 }, { "epoch": 4.023668639053255, "high_lr": 0.00019473684210526317, "low_lr": 3.894736842105263e-06, "step": 1530 }, { "epoch": 4.023668639053255, "high_lr": 0.00019473684210526317, "low_lr": 3.894736842105263e-06, "step": 1530 }, { "epoch": 4.023668639053255, "high_lr": 0.00019473684210526317, "low_lr": 3.894736842105263e-06, "step": 1530 }, { "epoch": 4.026298487836949, "grad_norm": 1.347391963005066, "learning_rate": 0.00019421052631578948, "loss": 1.239, "step": 1531 }, { "epoch": 4.026298487836949, "high_lr": 0.00019421052631578948, "low_lr": 3.884210526315789e-06, "step": 1531 }, { "epoch": 4.026298487836949, "high_lr": 0.00019421052631578948, "low_lr": 3.884210526315789e-06, "step": 1531 }, { "epoch": 4.026298487836949, "high_lr": 0.00019421052631578948, "low_lr": 3.884210526315789e-06, "step": 1531 }, { "epoch": 4.026298487836949, "high_lr": 0.00019421052631578948, "low_lr": 3.884210526315789e-06, "step": 1531 }, { "epoch": 4.026298487836949, "high_lr": 0.00019421052631578948, "low_lr": 3.884210526315789e-06, "step": 1531 }, { "epoch": 4.026298487836949, "high_lr": 0.00019421052631578948, "low_lr": 3.884210526315789e-06, "step": 1531 }, { "epoch": 4.026298487836949, "high_lr": 0.00019421052631578948, "low_lr": 3.884210526315789e-06, "step": 1531 }, { "epoch": 4.026298487836949, "high_lr": 0.00019421052631578948, "low_lr": 3.884210526315789e-06, "step": 1531 }, { "epoch": 4.028928336620645, "grad_norm": 1.490256667137146, "learning_rate": 0.0001936842105263158, "loss": 1.187, "step": 1532 }, { "epoch": 4.028928336620645, "high_lr": 0.0001936842105263158, "low_lr": 3.873684210526316e-06, "step": 1532 }, { "epoch": 4.028928336620645, "high_lr": 0.0001936842105263158, "low_lr": 3.873684210526316e-06, "step": 1532 }, { "epoch": 4.028928336620645, "high_lr": 0.0001936842105263158, "low_lr": 3.873684210526316e-06, "step": 1532 }, { "epoch": 4.028928336620645, "high_lr": 0.0001936842105263158, "low_lr": 3.873684210526316e-06, "step": 1532 }, { "epoch": 4.028928336620645, "high_lr": 0.0001936842105263158, "low_lr": 3.873684210526316e-06, "step": 1532 }, { "epoch": 4.028928336620645, "high_lr": 0.0001936842105263158, "low_lr": 3.873684210526316e-06, "step": 1532 }, { "epoch": 4.028928336620645, "high_lr": 0.0001936842105263158, "low_lr": 3.873684210526316e-06, "step": 1532 }, { "epoch": 4.028928336620645, "high_lr": 0.0001936842105263158, "low_lr": 3.873684210526316e-06, "step": 1532 }, { "epoch": 4.031558185404339, "grad_norm": 1.5121945142745972, "learning_rate": 0.0001931578947368421, "loss": 1.1633, "step": 1533 }, { "epoch": 4.031558185404339, "high_lr": 0.0001931578947368421, "low_lr": 3.863157894736843e-06, "step": 1533 }, { "epoch": 4.031558185404339, "high_lr": 0.0001931578947368421, "low_lr": 3.863157894736843e-06, "step": 1533 }, { "epoch": 4.031558185404339, "high_lr": 0.0001931578947368421, "low_lr": 3.863157894736843e-06, "step": 1533 }, { "epoch": 4.031558185404339, "high_lr": 0.0001931578947368421, "low_lr": 3.863157894736843e-06, "step": 1533 }, { "epoch": 4.031558185404339, "high_lr": 0.0001931578947368421, "low_lr": 3.863157894736843e-06, "step": 1533 }, { "epoch": 4.031558185404339, "high_lr": 0.0001931578947368421, "low_lr": 3.863157894736843e-06, "step": 1533 }, { "epoch": 4.031558185404339, "high_lr": 0.0001931578947368421, "low_lr": 3.863157894736843e-06, "step": 1533 }, { "epoch": 4.031558185404339, "high_lr": 0.0001931578947368421, "low_lr": 3.863157894736843e-06, "step": 1533 }, { "epoch": 4.034188034188034, "grad_norm": 1.5313997268676758, "learning_rate": 0.00019263157894736845, "loss": 1.2624, "step": 1534 }, { "epoch": 4.034188034188034, "high_lr": 0.00019263157894736845, "low_lr": 3.852631578947369e-06, "step": 1534 }, { "epoch": 4.034188034188034, "high_lr": 0.00019263157894736845, "low_lr": 3.852631578947369e-06, "step": 1534 }, { "epoch": 4.034188034188034, "high_lr": 0.00019263157894736845, "low_lr": 3.852631578947369e-06, "step": 1534 }, { "epoch": 4.034188034188034, "high_lr": 0.00019263157894736845, "low_lr": 3.852631578947369e-06, "step": 1534 }, { "epoch": 4.034188034188034, "high_lr": 0.00019263157894736845, "low_lr": 3.852631578947369e-06, "step": 1534 }, { "epoch": 4.034188034188034, "high_lr": 0.00019263157894736845, "low_lr": 3.852631578947369e-06, "step": 1534 }, { "epoch": 4.034188034188034, "high_lr": 0.00019263157894736845, "low_lr": 3.852631578947369e-06, "step": 1534 }, { "epoch": 4.034188034188034, "high_lr": 0.00019263157894736845, "low_lr": 3.852631578947369e-06, "step": 1534 }, { "epoch": 4.036817882971729, "grad_norm": 1.4823217391967773, "learning_rate": 0.00019210526315789473, "loss": 1.2216, "step": 1535 }, { "epoch": 4.036817882971729, "high_lr": 0.00019210526315789473, "low_lr": 3.842105263157895e-06, "step": 1535 }, { "epoch": 4.036817882971729, "high_lr": 0.00019210526315789473, "low_lr": 3.842105263157895e-06, "step": 1535 }, { "epoch": 4.036817882971729, "high_lr": 0.00019210526315789473, "low_lr": 3.842105263157895e-06, "step": 1535 }, { "epoch": 4.036817882971729, "high_lr": 0.00019210526315789473, "low_lr": 3.842105263157895e-06, "step": 1535 }, { "epoch": 4.036817882971729, "high_lr": 0.00019210526315789473, "low_lr": 3.842105263157895e-06, "step": 1535 }, { "epoch": 4.036817882971729, "high_lr": 0.00019210526315789473, "low_lr": 3.842105263157895e-06, "step": 1535 }, { "epoch": 4.036817882971729, "high_lr": 0.00019210526315789473, "low_lr": 3.842105263157895e-06, "step": 1535 }, { "epoch": 4.036817882971729, "high_lr": 0.00019210526315789473, "low_lr": 3.842105263157895e-06, "step": 1535 }, { "epoch": 4.039447731755424, "grad_norm": 1.5323439836502075, "learning_rate": 0.00019157894736842104, "loss": 1.2006, "step": 1536 }, { "epoch": 4.039447731755424, "high_lr": 0.00019157894736842104, "low_lr": 3.831578947368421e-06, "step": 1536 }, { "epoch": 4.039447731755424, "high_lr": 0.00019157894736842104, "low_lr": 3.831578947368421e-06, "step": 1536 }, { "epoch": 4.039447731755424, "high_lr": 0.00019157894736842104, "low_lr": 3.831578947368421e-06, "step": 1536 }, { "epoch": 4.039447731755424, "high_lr": 0.00019157894736842104, "low_lr": 3.831578947368421e-06, "step": 1536 }, { "epoch": 4.039447731755424, "high_lr": 0.00019157894736842104, "low_lr": 3.831578947368421e-06, "step": 1536 }, { "epoch": 4.039447731755424, "high_lr": 0.00019157894736842104, "low_lr": 3.831578947368421e-06, "step": 1536 }, { "epoch": 4.039447731755424, "high_lr": 0.00019157894736842104, "low_lr": 3.831578947368421e-06, "step": 1536 }, { "epoch": 4.039447731755424, "high_lr": 0.00019157894736842104, "low_lr": 3.831578947368421e-06, "step": 1536 }, { "epoch": 4.042077580539119, "grad_norm": 1.4558566808700562, "learning_rate": 0.00019105263157894736, "loss": 1.1886, "step": 1537 }, { "epoch": 4.042077580539119, "high_lr": 0.00019105263157894736, "low_lr": 3.821052631578947e-06, "step": 1537 }, { "epoch": 4.042077580539119, "high_lr": 0.00019105263157894736, "low_lr": 3.821052631578947e-06, "step": 1537 }, { "epoch": 4.042077580539119, "high_lr": 0.00019105263157894736, "low_lr": 3.821052631578947e-06, "step": 1537 }, { "epoch": 4.042077580539119, "high_lr": 0.00019105263157894736, "low_lr": 3.821052631578947e-06, "step": 1537 }, { "epoch": 4.042077580539119, "high_lr": 0.00019105263157894736, "low_lr": 3.821052631578947e-06, "step": 1537 }, { "epoch": 4.042077580539119, "high_lr": 0.00019105263157894736, "low_lr": 3.821052631578947e-06, "step": 1537 }, { "epoch": 4.042077580539119, "high_lr": 0.00019105263157894736, "low_lr": 3.821052631578947e-06, "step": 1537 }, { "epoch": 4.042077580539119, "high_lr": 0.00019105263157894736, "low_lr": 3.821052631578947e-06, "step": 1537 }, { "epoch": 4.044707429322814, "grad_norm": 1.3596470355987549, "learning_rate": 0.0001905263157894737, "loss": 1.2352, "step": 1538 }, { "epoch": 4.044707429322814, "high_lr": 0.0001905263157894737, "low_lr": 3.810526315789474e-06, "step": 1538 }, { "epoch": 4.044707429322814, "high_lr": 0.0001905263157894737, "low_lr": 3.810526315789474e-06, "step": 1538 }, { "epoch": 4.044707429322814, "high_lr": 0.0001905263157894737, "low_lr": 3.810526315789474e-06, "step": 1538 }, { "epoch": 4.044707429322814, "high_lr": 0.0001905263157894737, "low_lr": 3.810526315789474e-06, "step": 1538 }, { "epoch": 4.044707429322814, "high_lr": 0.0001905263157894737, "low_lr": 3.810526315789474e-06, "step": 1538 }, { "epoch": 4.044707429322814, "high_lr": 0.0001905263157894737, "low_lr": 3.810526315789474e-06, "step": 1538 }, { "epoch": 4.044707429322814, "high_lr": 0.0001905263157894737, "low_lr": 3.810526315789474e-06, "step": 1538 }, { "epoch": 4.044707429322814, "high_lr": 0.0001905263157894737, "low_lr": 3.810526315789474e-06, "step": 1538 }, { "epoch": 4.047337278106509, "grad_norm": 1.5192526578903198, "learning_rate": 0.00019, "loss": 1.2235, "step": 1539 }, { "epoch": 4.047337278106509, "high_lr": 0.00019, "low_lr": 3.8000000000000005e-06, "step": 1539 }, { "epoch": 4.047337278106509, "high_lr": 0.00019, "low_lr": 3.8000000000000005e-06, "step": 1539 }, { "epoch": 4.047337278106509, "high_lr": 0.00019, "low_lr": 3.8000000000000005e-06, "step": 1539 }, { "epoch": 4.047337278106509, "high_lr": 0.00019, "low_lr": 3.8000000000000005e-06, "step": 1539 }, { "epoch": 4.047337278106509, "high_lr": 0.00019, "low_lr": 3.8000000000000005e-06, "step": 1539 }, { "epoch": 4.047337278106509, "high_lr": 0.00019, "low_lr": 3.8000000000000005e-06, "step": 1539 }, { "epoch": 4.047337278106509, "high_lr": 0.00019, "low_lr": 3.8000000000000005e-06, "step": 1539 }, { "epoch": 4.047337278106509, "high_lr": 0.00019, "low_lr": 3.8000000000000005e-06, "step": 1539 }, { "epoch": 4.049967126890204, "grad_norm": 1.5695946216583252, "learning_rate": 0.00018947368421052632, "loss": 1.2062, "step": 1540 }, { "epoch": 4.049967126890204, "high_lr": 0.00018947368421052632, "low_lr": 3.789473684210527e-06, "step": 1540 }, { "epoch": 4.049967126890204, "high_lr": 0.00018947368421052632, "low_lr": 3.789473684210527e-06, "step": 1540 }, { "epoch": 4.049967126890204, "high_lr": 0.00018947368421052632, "low_lr": 3.789473684210527e-06, "step": 1540 }, { "epoch": 4.049967126890204, "high_lr": 0.00018947368421052632, "low_lr": 3.789473684210527e-06, "step": 1540 }, { "epoch": 4.049967126890204, "high_lr": 0.00018947368421052632, "low_lr": 3.789473684210527e-06, "step": 1540 }, { "epoch": 4.049967126890204, "high_lr": 0.00018947368421052632, "low_lr": 3.789473684210527e-06, "step": 1540 }, { "epoch": 4.049967126890204, "high_lr": 0.00018947368421052632, "low_lr": 3.789473684210527e-06, "step": 1540 }, { "epoch": 4.049967126890204, "high_lr": 0.00018947368421052632, "low_lr": 3.789473684210527e-06, "step": 1540 }, { "epoch": 4.052596975673898, "grad_norm": 1.7779476642608643, "learning_rate": 0.00018894736842105264, "loss": 1.2194, "step": 1541 }, { "epoch": 4.052596975673898, "high_lr": 0.00018894736842105264, "low_lr": 3.778947368421053e-06, "step": 1541 }, { "epoch": 4.052596975673898, "high_lr": 0.00018894736842105264, "low_lr": 3.778947368421053e-06, "step": 1541 }, { "epoch": 4.052596975673898, "high_lr": 0.00018894736842105264, "low_lr": 3.778947368421053e-06, "step": 1541 }, { "epoch": 4.052596975673898, "high_lr": 0.00018894736842105264, "low_lr": 3.778947368421053e-06, "step": 1541 }, { "epoch": 4.052596975673898, "high_lr": 0.00018894736842105264, "low_lr": 3.778947368421053e-06, "step": 1541 }, { "epoch": 4.052596975673898, "high_lr": 0.00018894736842105264, "low_lr": 3.778947368421053e-06, "step": 1541 }, { "epoch": 4.052596975673898, "high_lr": 0.00018894736842105264, "low_lr": 3.778947368421053e-06, "step": 1541 }, { "epoch": 4.052596975673898, "high_lr": 0.00018894736842105264, "low_lr": 3.778947368421053e-06, "step": 1541 }, { "epoch": 4.055226824457594, "grad_norm": 1.5357064008712769, "learning_rate": 0.00018842105263157895, "loss": 1.2305, "step": 1542 }, { "epoch": 4.055226824457594, "high_lr": 0.00018842105263157895, "low_lr": 3.768421052631579e-06, "step": 1542 }, { "epoch": 4.055226824457594, "high_lr": 0.00018842105263157895, "low_lr": 3.768421052631579e-06, "step": 1542 }, { "epoch": 4.055226824457594, "high_lr": 0.00018842105263157895, "low_lr": 3.768421052631579e-06, "step": 1542 }, { "epoch": 4.055226824457594, "high_lr": 0.00018842105263157895, "low_lr": 3.768421052631579e-06, "step": 1542 }, { "epoch": 4.055226824457594, "high_lr": 0.00018842105263157895, "low_lr": 3.768421052631579e-06, "step": 1542 }, { "epoch": 4.055226824457594, "high_lr": 0.00018842105263157895, "low_lr": 3.768421052631579e-06, "step": 1542 }, { "epoch": 4.055226824457594, "high_lr": 0.00018842105263157895, "low_lr": 3.768421052631579e-06, "step": 1542 }, { "epoch": 4.055226824457594, "high_lr": 0.00018842105263157895, "low_lr": 3.768421052631579e-06, "step": 1542 }, { "epoch": 4.057856673241289, "grad_norm": 1.4252111911773682, "learning_rate": 0.00018789473684210526, "loss": 1.2283, "step": 1543 }, { "epoch": 4.057856673241289, "high_lr": 0.00018789473684210526, "low_lr": 3.7578947368421053e-06, "step": 1543 }, { "epoch": 4.057856673241289, "high_lr": 0.00018789473684210526, "low_lr": 3.7578947368421053e-06, "step": 1543 }, { "epoch": 4.057856673241289, "high_lr": 0.00018789473684210526, "low_lr": 3.7578947368421053e-06, "step": 1543 }, { "epoch": 4.057856673241289, "high_lr": 0.00018789473684210526, "low_lr": 3.7578947368421053e-06, "step": 1543 }, { "epoch": 4.057856673241289, "high_lr": 0.00018789473684210526, "low_lr": 3.7578947368421053e-06, "step": 1543 }, { "epoch": 4.057856673241289, "high_lr": 0.00018789473684210526, "low_lr": 3.7578947368421053e-06, "step": 1543 }, { "epoch": 4.057856673241289, "high_lr": 0.00018789473684210526, "low_lr": 3.7578947368421053e-06, "step": 1543 }, { "epoch": 4.057856673241289, "high_lr": 0.00018789473684210526, "low_lr": 3.7578947368421053e-06, "step": 1543 }, { "epoch": 4.060486522024983, "grad_norm": 1.531919240951538, "learning_rate": 0.00018736842105263158, "loss": 1.2027, "step": 1544 }, { "epoch": 4.060486522024983, "high_lr": 0.00018736842105263158, "low_lr": 3.7473684210526317e-06, "step": 1544 }, { "epoch": 4.060486522024983, "high_lr": 0.00018736842105263158, "low_lr": 3.7473684210526317e-06, "step": 1544 }, { "epoch": 4.060486522024983, "high_lr": 0.00018736842105263158, "low_lr": 3.7473684210526317e-06, "step": 1544 }, { "epoch": 4.060486522024983, "high_lr": 0.00018736842105263158, "low_lr": 3.7473684210526317e-06, "step": 1544 }, { "epoch": 4.060486522024983, "high_lr": 0.00018736842105263158, "low_lr": 3.7473684210526317e-06, "step": 1544 }, { "epoch": 4.060486522024983, "high_lr": 0.00018736842105263158, "low_lr": 3.7473684210526317e-06, "step": 1544 }, { "epoch": 4.060486522024983, "high_lr": 0.00018736842105263158, "low_lr": 3.7473684210526317e-06, "step": 1544 }, { "epoch": 4.060486522024983, "high_lr": 0.00018736842105263158, "low_lr": 3.7473684210526317e-06, "step": 1544 }, { "epoch": 4.063116370808679, "grad_norm": 1.4464205503463745, "learning_rate": 0.0001868421052631579, "loss": 1.184, "step": 1545 }, { "epoch": 4.063116370808679, "high_lr": 0.0001868421052631579, "low_lr": 3.736842105263158e-06, "step": 1545 }, { "epoch": 4.063116370808679, "high_lr": 0.0001868421052631579, "low_lr": 3.736842105263158e-06, "step": 1545 }, { "epoch": 4.063116370808679, "high_lr": 0.0001868421052631579, "low_lr": 3.736842105263158e-06, "step": 1545 }, { "epoch": 4.063116370808679, "high_lr": 0.0001868421052631579, "low_lr": 3.736842105263158e-06, "step": 1545 }, { "epoch": 4.063116370808679, "high_lr": 0.0001868421052631579, "low_lr": 3.736842105263158e-06, "step": 1545 }, { "epoch": 4.063116370808679, "high_lr": 0.0001868421052631579, "low_lr": 3.736842105263158e-06, "step": 1545 }, { "epoch": 4.063116370808679, "high_lr": 0.0001868421052631579, "low_lr": 3.736842105263158e-06, "step": 1545 }, { "epoch": 4.063116370808679, "high_lr": 0.0001868421052631579, "low_lr": 3.736842105263158e-06, "step": 1545 }, { "epoch": 4.0657462195923735, "grad_norm": 1.3648449182510376, "learning_rate": 0.00018631578947368423, "loss": 1.2097, "step": 1546 }, { "epoch": 4.0657462195923735, "high_lr": 0.00018631578947368423, "low_lr": 3.7263157894736848e-06, "step": 1546 }, { "epoch": 4.0657462195923735, "high_lr": 0.00018631578947368423, "low_lr": 3.7263157894736848e-06, "step": 1546 }, { "epoch": 4.0657462195923735, "high_lr": 0.00018631578947368423, "low_lr": 3.7263157894736848e-06, "step": 1546 }, { "epoch": 4.0657462195923735, "high_lr": 0.00018631578947368423, "low_lr": 3.7263157894736848e-06, "step": 1546 }, { "epoch": 4.0657462195923735, "high_lr": 0.00018631578947368423, "low_lr": 3.7263157894736848e-06, "step": 1546 }, { "epoch": 4.0657462195923735, "high_lr": 0.00018631578947368423, "low_lr": 3.7263157894736848e-06, "step": 1546 }, { "epoch": 4.0657462195923735, "high_lr": 0.00018631578947368423, "low_lr": 3.7263157894736848e-06, "step": 1546 }, { "epoch": 4.0657462195923735, "high_lr": 0.00018631578947368423, "low_lr": 3.7263157894736848e-06, "step": 1546 }, { "epoch": 4.068376068376068, "grad_norm": 1.4777864217758179, "learning_rate": 0.00018578947368421054, "loss": 1.2012, "step": 1547 }, { "epoch": 4.068376068376068, "high_lr": 0.00018578947368421054, "low_lr": 3.715789473684211e-06, "step": 1547 }, { "epoch": 4.068376068376068, "high_lr": 0.00018578947368421054, "low_lr": 3.715789473684211e-06, "step": 1547 }, { "epoch": 4.068376068376068, "high_lr": 0.00018578947368421054, "low_lr": 3.715789473684211e-06, "step": 1547 }, { "epoch": 4.068376068376068, "high_lr": 0.00018578947368421054, "low_lr": 3.715789473684211e-06, "step": 1547 }, { "epoch": 4.068376068376068, "high_lr": 0.00018578947368421054, "low_lr": 3.715789473684211e-06, "step": 1547 }, { "epoch": 4.068376068376068, "high_lr": 0.00018578947368421054, "low_lr": 3.715789473684211e-06, "step": 1547 }, { "epoch": 4.068376068376068, "high_lr": 0.00018578947368421054, "low_lr": 3.715789473684211e-06, "step": 1547 }, { "epoch": 4.068376068376068, "high_lr": 0.00018578947368421054, "low_lr": 3.715789473684211e-06, "step": 1547 }, { "epoch": 4.071005917159764, "grad_norm": 1.5571383237838745, "learning_rate": 0.00018526315789473685, "loss": 1.2002, "step": 1548 }, { "epoch": 4.071005917159764, "high_lr": 0.00018526315789473685, "low_lr": 3.7052631578947374e-06, "step": 1548 }, { "epoch": 4.071005917159764, "high_lr": 0.00018526315789473685, "low_lr": 3.7052631578947374e-06, "step": 1548 }, { "epoch": 4.071005917159764, "high_lr": 0.00018526315789473685, "low_lr": 3.7052631578947374e-06, "step": 1548 }, { "epoch": 4.071005917159764, "high_lr": 0.00018526315789473685, "low_lr": 3.7052631578947374e-06, "step": 1548 }, { "epoch": 4.071005917159764, "high_lr": 0.00018526315789473685, "low_lr": 3.7052631578947374e-06, "step": 1548 }, { "epoch": 4.071005917159764, "high_lr": 0.00018526315789473685, "low_lr": 3.7052631578947374e-06, "step": 1548 }, { "epoch": 4.071005917159764, "high_lr": 0.00018526315789473685, "low_lr": 3.7052631578947374e-06, "step": 1548 }, { "epoch": 4.071005917159764, "high_lr": 0.00018526315789473685, "low_lr": 3.7052631578947374e-06, "step": 1548 }, { "epoch": 4.073635765943458, "grad_norm": 1.5273700952529907, "learning_rate": 0.00018473684210526317, "loss": 1.1797, "step": 1549 }, { "epoch": 4.073635765943458, "high_lr": 0.00018473684210526317, "low_lr": 3.6947368421052637e-06, "step": 1549 }, { "epoch": 4.073635765943458, "high_lr": 0.00018473684210526317, "low_lr": 3.6947368421052637e-06, "step": 1549 }, { "epoch": 4.073635765943458, "high_lr": 0.00018473684210526317, "low_lr": 3.6947368421052637e-06, "step": 1549 }, { "epoch": 4.073635765943458, "high_lr": 0.00018473684210526317, "low_lr": 3.6947368421052637e-06, "step": 1549 }, { "epoch": 4.073635765943458, "high_lr": 0.00018473684210526317, "low_lr": 3.6947368421052637e-06, "step": 1549 }, { "epoch": 4.073635765943458, "high_lr": 0.00018473684210526317, "low_lr": 3.6947368421052637e-06, "step": 1549 }, { "epoch": 4.073635765943458, "high_lr": 0.00018473684210526317, "low_lr": 3.6947368421052637e-06, "step": 1549 }, { "epoch": 4.073635765943458, "high_lr": 0.00018473684210526317, "low_lr": 3.6947368421052637e-06, "step": 1549 }, { "epoch": 4.076265614727153, "grad_norm": 1.395696759223938, "learning_rate": 0.00018421052631578948, "loss": 1.2308, "step": 1550 }, { "epoch": 4.076265614727153, "high_lr": 0.00018421052631578948, "low_lr": 3.6842105263157896e-06, "step": 1550 }, { "epoch": 4.076265614727153, "high_lr": 0.00018421052631578948, "low_lr": 3.6842105263157896e-06, "step": 1550 }, { "epoch": 4.076265614727153, "high_lr": 0.00018421052631578948, "low_lr": 3.6842105263157896e-06, "step": 1550 }, { "epoch": 4.076265614727153, "high_lr": 0.00018421052631578948, "low_lr": 3.6842105263157896e-06, "step": 1550 }, { "epoch": 4.076265614727153, "high_lr": 0.00018421052631578948, "low_lr": 3.6842105263157896e-06, "step": 1550 }, { "epoch": 4.076265614727153, "high_lr": 0.00018421052631578948, "low_lr": 3.6842105263157896e-06, "step": 1550 }, { "epoch": 4.076265614727153, "high_lr": 0.00018421052631578948, "low_lr": 3.6842105263157896e-06, "step": 1550 }, { "epoch": 4.076265614727153, "high_lr": 0.00018421052631578948, "low_lr": 3.6842105263157896e-06, "step": 1550 }, { "epoch": 4.0788954635108485, "grad_norm": 1.3241750001907349, "learning_rate": 0.0001836842105263158, "loss": 1.2319, "step": 1551 }, { "epoch": 4.0788954635108485, "high_lr": 0.0001836842105263158, "low_lr": 3.673684210526316e-06, "step": 1551 }, { "epoch": 4.0788954635108485, "high_lr": 0.0001836842105263158, "low_lr": 3.673684210526316e-06, "step": 1551 }, { "epoch": 4.0788954635108485, "high_lr": 0.0001836842105263158, "low_lr": 3.673684210526316e-06, "step": 1551 }, { "epoch": 4.0788954635108485, "high_lr": 0.0001836842105263158, "low_lr": 3.673684210526316e-06, "step": 1551 }, { "epoch": 4.0788954635108485, "high_lr": 0.0001836842105263158, "low_lr": 3.673684210526316e-06, "step": 1551 }, { "epoch": 4.0788954635108485, "high_lr": 0.0001836842105263158, "low_lr": 3.673684210526316e-06, "step": 1551 }, { "epoch": 4.0788954635108485, "high_lr": 0.0001836842105263158, "low_lr": 3.673684210526316e-06, "step": 1551 }, { "epoch": 4.0788954635108485, "high_lr": 0.0001836842105263158, "low_lr": 3.673684210526316e-06, "step": 1551 }, { "epoch": 4.081525312294543, "grad_norm": 1.4620137214660645, "learning_rate": 0.0001831578947368421, "loss": 1.2283, "step": 1552 }, { "epoch": 4.081525312294543, "high_lr": 0.0001831578947368421, "low_lr": 3.6631578947368423e-06, "step": 1552 }, { "epoch": 4.081525312294543, "high_lr": 0.0001831578947368421, "low_lr": 3.6631578947368423e-06, "step": 1552 }, { "epoch": 4.081525312294543, "high_lr": 0.0001831578947368421, "low_lr": 3.6631578947368423e-06, "step": 1552 }, { "epoch": 4.081525312294543, "high_lr": 0.0001831578947368421, "low_lr": 3.6631578947368423e-06, "step": 1552 }, { "epoch": 4.081525312294543, "high_lr": 0.0001831578947368421, "low_lr": 3.6631578947368423e-06, "step": 1552 }, { "epoch": 4.081525312294543, "high_lr": 0.0001831578947368421, "low_lr": 3.6631578947368423e-06, "step": 1552 }, { "epoch": 4.081525312294543, "high_lr": 0.0001831578947368421, "low_lr": 3.6631578947368423e-06, "step": 1552 }, { "epoch": 4.081525312294543, "high_lr": 0.0001831578947368421, "low_lr": 3.6631578947368423e-06, "step": 1552 }, { "epoch": 4.084155161078238, "grad_norm": 1.4948234558105469, "learning_rate": 0.00018263157894736842, "loss": 1.2602, "step": 1553 }, { "epoch": 4.084155161078238, "high_lr": 0.00018263157894736842, "low_lr": 3.6526315789473686e-06, "step": 1553 }, { "epoch": 4.084155161078238, "high_lr": 0.00018263157894736842, "low_lr": 3.6526315789473686e-06, "step": 1553 }, { "epoch": 4.084155161078238, "high_lr": 0.00018263157894736842, "low_lr": 3.6526315789473686e-06, "step": 1553 }, { "epoch": 4.084155161078238, "high_lr": 0.00018263157894736842, "low_lr": 3.6526315789473686e-06, "step": 1553 }, { "epoch": 4.084155161078238, "high_lr": 0.00018263157894736842, "low_lr": 3.6526315789473686e-06, "step": 1553 }, { "epoch": 4.084155161078238, "high_lr": 0.00018263157894736842, "low_lr": 3.6526315789473686e-06, "step": 1553 }, { "epoch": 4.084155161078238, "high_lr": 0.00018263157894736842, "low_lr": 3.6526315789473686e-06, "step": 1553 }, { "epoch": 4.084155161078238, "high_lr": 0.00018263157894736842, "low_lr": 3.6526315789473686e-06, "step": 1553 }, { "epoch": 4.0867850098619325, "grad_norm": 1.5336121320724487, "learning_rate": 0.00018210526315789476, "loss": 1.1819, "step": 1554 }, { "epoch": 4.0867850098619325, "high_lr": 0.00018210526315789476, "low_lr": 3.642105263157895e-06, "step": 1554 }, { "epoch": 4.0867850098619325, "high_lr": 0.00018210526315789476, "low_lr": 3.642105263157895e-06, "step": 1554 }, { "epoch": 4.0867850098619325, "high_lr": 0.00018210526315789476, "low_lr": 3.642105263157895e-06, "step": 1554 }, { "epoch": 4.0867850098619325, "high_lr": 0.00018210526315789476, "low_lr": 3.642105263157895e-06, "step": 1554 }, { "epoch": 4.0867850098619325, "high_lr": 0.00018210526315789476, "low_lr": 3.642105263157895e-06, "step": 1554 }, { "epoch": 4.0867850098619325, "high_lr": 0.00018210526315789476, "low_lr": 3.642105263157895e-06, "step": 1554 }, { "epoch": 4.0867850098619325, "high_lr": 0.00018210526315789476, "low_lr": 3.642105263157895e-06, "step": 1554 }, { "epoch": 4.0867850098619325, "high_lr": 0.00018210526315789476, "low_lr": 3.642105263157895e-06, "step": 1554 }, { "epoch": 4.089414858645628, "grad_norm": 1.4120430946350098, "learning_rate": 0.00018157894736842107, "loss": 1.2153, "step": 1555 }, { "epoch": 4.089414858645628, "high_lr": 0.00018157894736842107, "low_lr": 3.6315789473684217e-06, "step": 1555 }, { "epoch": 4.089414858645628, "high_lr": 0.00018157894736842107, "low_lr": 3.6315789473684217e-06, "step": 1555 }, { "epoch": 4.089414858645628, "high_lr": 0.00018157894736842107, "low_lr": 3.6315789473684217e-06, "step": 1555 }, { "epoch": 4.089414858645628, "high_lr": 0.00018157894736842107, "low_lr": 3.6315789473684217e-06, "step": 1555 }, { "epoch": 4.089414858645628, "high_lr": 0.00018157894736842107, "low_lr": 3.6315789473684217e-06, "step": 1555 }, { "epoch": 4.089414858645628, "high_lr": 0.00018157894736842107, "low_lr": 3.6315789473684217e-06, "step": 1555 }, { "epoch": 4.089414858645628, "high_lr": 0.00018157894736842107, "low_lr": 3.6315789473684217e-06, "step": 1555 }, { "epoch": 4.089414858645628, "high_lr": 0.00018157894736842107, "low_lr": 3.6315789473684217e-06, "step": 1555 }, { "epoch": 4.092044707429323, "grad_norm": 1.483199954032898, "learning_rate": 0.00018105263157894739, "loss": 1.2308, "step": 1556 }, { "epoch": 4.092044707429323, "high_lr": 0.00018105263157894739, "low_lr": 3.621052631578948e-06, "step": 1556 }, { "epoch": 4.092044707429323, "high_lr": 0.00018105263157894739, "low_lr": 3.621052631578948e-06, "step": 1556 }, { "epoch": 4.092044707429323, "high_lr": 0.00018105263157894739, "low_lr": 3.621052631578948e-06, "step": 1556 }, { "epoch": 4.092044707429323, "high_lr": 0.00018105263157894739, "low_lr": 3.621052631578948e-06, "step": 1556 }, { "epoch": 4.092044707429323, "high_lr": 0.00018105263157894739, "low_lr": 3.621052631578948e-06, "step": 1556 }, { "epoch": 4.092044707429323, "high_lr": 0.00018105263157894739, "low_lr": 3.621052631578948e-06, "step": 1556 }, { "epoch": 4.092044707429323, "high_lr": 0.00018105263157894739, "low_lr": 3.621052631578948e-06, "step": 1556 }, { "epoch": 4.092044707429323, "high_lr": 0.00018105263157894739, "low_lr": 3.621052631578948e-06, "step": 1556 }, { "epoch": 4.094674556213017, "grad_norm": 1.3914930820465088, "learning_rate": 0.00018052631578947367, "loss": 1.1732, "step": 1557 }, { "epoch": 4.094674556213017, "high_lr": 0.00018052631578947367, "low_lr": 3.610526315789474e-06, "step": 1557 }, { "epoch": 4.094674556213017, "high_lr": 0.00018052631578947367, "low_lr": 3.610526315789474e-06, "step": 1557 }, { "epoch": 4.094674556213017, "high_lr": 0.00018052631578947367, "low_lr": 3.610526315789474e-06, "step": 1557 }, { "epoch": 4.094674556213017, "high_lr": 0.00018052631578947367, "low_lr": 3.610526315789474e-06, "step": 1557 }, { "epoch": 4.094674556213017, "high_lr": 0.00018052631578947367, "low_lr": 3.610526315789474e-06, "step": 1557 }, { "epoch": 4.094674556213017, "high_lr": 0.00018052631578947367, "low_lr": 3.610526315789474e-06, "step": 1557 }, { "epoch": 4.094674556213017, "high_lr": 0.00018052631578947367, "low_lr": 3.610526315789474e-06, "step": 1557 }, { "epoch": 4.094674556213017, "high_lr": 0.00018052631578947367, "low_lr": 3.610526315789474e-06, "step": 1557 }, { "epoch": 4.097304404996713, "grad_norm": 1.4019222259521484, "learning_rate": 0.00017999999999999998, "loss": 1.225, "step": 1558 }, { "epoch": 4.097304404996713, "high_lr": 0.00017999999999999998, "low_lr": 3.6000000000000003e-06, "step": 1558 }, { "epoch": 4.097304404996713, "high_lr": 0.00017999999999999998, "low_lr": 3.6000000000000003e-06, "step": 1558 }, { "epoch": 4.097304404996713, "high_lr": 0.00017999999999999998, "low_lr": 3.6000000000000003e-06, "step": 1558 }, { "epoch": 4.097304404996713, "high_lr": 0.00017999999999999998, "low_lr": 3.6000000000000003e-06, "step": 1558 }, { "epoch": 4.097304404996713, "high_lr": 0.00017999999999999998, "low_lr": 3.6000000000000003e-06, "step": 1558 }, { "epoch": 4.097304404996713, "high_lr": 0.00017999999999999998, "low_lr": 3.6000000000000003e-06, "step": 1558 }, { "epoch": 4.097304404996713, "high_lr": 0.00017999999999999998, "low_lr": 3.6000000000000003e-06, "step": 1558 }, { "epoch": 4.097304404996713, "high_lr": 0.00017999999999999998, "low_lr": 3.6000000000000003e-06, "step": 1558 }, { "epoch": 4.0999342537804075, "grad_norm": 1.376409888267517, "learning_rate": 0.00017947368421052632, "loss": 1.2313, "step": 1559 }, { "epoch": 4.0999342537804075, "high_lr": 0.00017947368421052632, "low_lr": 3.5894736842105266e-06, "step": 1559 }, { "epoch": 4.0999342537804075, "high_lr": 0.00017947368421052632, "low_lr": 3.5894736842105266e-06, "step": 1559 }, { "epoch": 4.0999342537804075, "high_lr": 0.00017947368421052632, "low_lr": 3.5894736842105266e-06, "step": 1559 }, { "epoch": 4.0999342537804075, "high_lr": 0.00017947368421052632, "low_lr": 3.5894736842105266e-06, "step": 1559 }, { "epoch": 4.0999342537804075, "high_lr": 0.00017947368421052632, "low_lr": 3.5894736842105266e-06, "step": 1559 }, { "epoch": 4.0999342537804075, "high_lr": 0.00017947368421052632, "low_lr": 3.5894736842105266e-06, "step": 1559 }, { "epoch": 4.0999342537804075, "high_lr": 0.00017947368421052632, "low_lr": 3.5894736842105266e-06, "step": 1559 }, { "epoch": 4.0999342537804075, "high_lr": 0.00017947368421052632, "low_lr": 3.5894736842105266e-06, "step": 1559 }, { "epoch": 4.102564102564102, "grad_norm": 1.4729450941085815, "learning_rate": 0.00017894736842105264, "loss": 1.2115, "step": 1560 }, { "epoch": 4.102564102564102, "high_lr": 0.00017894736842105264, "low_lr": 3.578947368421053e-06, "step": 1560 }, { "epoch": 4.102564102564102, "high_lr": 0.00017894736842105264, "low_lr": 3.578947368421053e-06, "step": 1560 }, { "epoch": 4.102564102564102, "high_lr": 0.00017894736842105264, "low_lr": 3.578947368421053e-06, "step": 1560 }, { "epoch": 4.102564102564102, "high_lr": 0.00017894736842105264, "low_lr": 3.578947368421053e-06, "step": 1560 }, { "epoch": 4.102564102564102, "high_lr": 0.00017894736842105264, "low_lr": 3.578947368421053e-06, "step": 1560 }, { "epoch": 4.102564102564102, "high_lr": 0.00017894736842105264, "low_lr": 3.578947368421053e-06, "step": 1560 }, { "epoch": 4.102564102564102, "high_lr": 0.00017894736842105264, "low_lr": 3.578947368421053e-06, "step": 1560 }, { "epoch": 4.102564102564102, "high_lr": 0.00017894736842105264, "low_lr": 3.578947368421053e-06, "step": 1560 }, { "epoch": 4.105193951347798, "grad_norm": 1.512831449508667, "learning_rate": 0.00017842105263157895, "loss": 1.2355, "step": 1561 }, { "epoch": 4.105193951347798, "high_lr": 0.00017842105263157895, "low_lr": 3.5684210526315792e-06, "step": 1561 }, { "epoch": 4.105193951347798, "high_lr": 0.00017842105263157895, "low_lr": 3.5684210526315792e-06, "step": 1561 }, { "epoch": 4.105193951347798, "high_lr": 0.00017842105263157895, "low_lr": 3.5684210526315792e-06, "step": 1561 }, { "epoch": 4.105193951347798, "high_lr": 0.00017842105263157895, "low_lr": 3.5684210526315792e-06, "step": 1561 }, { "epoch": 4.105193951347798, "high_lr": 0.00017842105263157895, "low_lr": 3.5684210526315792e-06, "step": 1561 }, { "epoch": 4.105193951347798, "high_lr": 0.00017842105263157895, "low_lr": 3.5684210526315792e-06, "step": 1561 }, { "epoch": 4.105193951347798, "high_lr": 0.00017842105263157895, "low_lr": 3.5684210526315792e-06, "step": 1561 }, { "epoch": 4.105193951347798, "high_lr": 0.00017842105263157895, "low_lr": 3.5684210526315792e-06, "step": 1561 }, { "epoch": 4.107823800131492, "grad_norm": 1.4762496948242188, "learning_rate": 0.00017789473684210526, "loss": 1.2217, "step": 1562 }, { "epoch": 4.107823800131492, "high_lr": 0.00017789473684210526, "low_lr": 3.5578947368421056e-06, "step": 1562 }, { "epoch": 4.107823800131492, "high_lr": 0.00017789473684210526, "low_lr": 3.5578947368421056e-06, "step": 1562 }, { "epoch": 4.107823800131492, "high_lr": 0.00017789473684210526, "low_lr": 3.5578947368421056e-06, "step": 1562 }, { "epoch": 4.107823800131492, "high_lr": 0.00017789473684210526, "low_lr": 3.5578947368421056e-06, "step": 1562 }, { "epoch": 4.107823800131492, "high_lr": 0.00017789473684210526, "low_lr": 3.5578947368421056e-06, "step": 1562 }, { "epoch": 4.107823800131492, "high_lr": 0.00017789473684210526, "low_lr": 3.5578947368421056e-06, "step": 1562 }, { "epoch": 4.107823800131492, "high_lr": 0.00017789473684210526, "low_lr": 3.5578947368421056e-06, "step": 1562 }, { "epoch": 4.107823800131492, "high_lr": 0.00017789473684210526, "low_lr": 3.5578947368421056e-06, "step": 1562 }, { "epoch": 4.110453648915187, "grad_norm": 1.7933307886123657, "learning_rate": 0.0001773684210526316, "loss": 1.2195, "step": 1563 }, { "epoch": 4.110453648915187, "high_lr": 0.0001773684210526316, "low_lr": 3.5473684210526323e-06, "step": 1563 }, { "epoch": 4.110453648915187, "high_lr": 0.0001773684210526316, "low_lr": 3.5473684210526323e-06, "step": 1563 }, { "epoch": 4.110453648915187, "high_lr": 0.0001773684210526316, "low_lr": 3.5473684210526323e-06, "step": 1563 }, { "epoch": 4.110453648915187, "high_lr": 0.0001773684210526316, "low_lr": 3.5473684210526323e-06, "step": 1563 }, { "epoch": 4.110453648915187, "high_lr": 0.0001773684210526316, "low_lr": 3.5473684210526323e-06, "step": 1563 }, { "epoch": 4.110453648915187, "high_lr": 0.0001773684210526316, "low_lr": 3.5473684210526323e-06, "step": 1563 }, { "epoch": 4.110453648915187, "high_lr": 0.0001773684210526316, "low_lr": 3.5473684210526323e-06, "step": 1563 }, { "epoch": 4.110453648915187, "high_lr": 0.0001773684210526316, "low_lr": 3.5473684210526323e-06, "step": 1563 }, { "epoch": 4.113083497698883, "grad_norm": 1.4589201211929321, "learning_rate": 0.0001768421052631579, "loss": 1.259, "step": 1564 }, { "epoch": 4.113083497698883, "high_lr": 0.0001768421052631579, "low_lr": 3.536842105263158e-06, "step": 1564 }, { "epoch": 4.113083497698883, "high_lr": 0.0001768421052631579, "low_lr": 3.536842105263158e-06, "step": 1564 }, { "epoch": 4.113083497698883, "high_lr": 0.0001768421052631579, "low_lr": 3.536842105263158e-06, "step": 1564 }, { "epoch": 4.113083497698883, "high_lr": 0.0001768421052631579, "low_lr": 3.536842105263158e-06, "step": 1564 }, { "epoch": 4.113083497698883, "high_lr": 0.0001768421052631579, "low_lr": 3.536842105263158e-06, "step": 1564 }, { "epoch": 4.113083497698883, "high_lr": 0.0001768421052631579, "low_lr": 3.536842105263158e-06, "step": 1564 }, { "epoch": 4.113083497698883, "high_lr": 0.0001768421052631579, "low_lr": 3.536842105263158e-06, "step": 1564 }, { "epoch": 4.113083497698883, "high_lr": 0.0001768421052631579, "low_lr": 3.536842105263158e-06, "step": 1564 }, { "epoch": 4.115713346482577, "grad_norm": 1.5560483932495117, "learning_rate": 0.0001763157894736842, "loss": 1.2098, "step": 1565 }, { "epoch": 4.115713346482577, "high_lr": 0.0001763157894736842, "low_lr": 3.5263157894736846e-06, "step": 1565 }, { "epoch": 4.115713346482577, "high_lr": 0.0001763157894736842, "low_lr": 3.5263157894736846e-06, "step": 1565 }, { "epoch": 4.115713346482577, "high_lr": 0.0001763157894736842, "low_lr": 3.5263157894736846e-06, "step": 1565 }, { "epoch": 4.115713346482577, "high_lr": 0.0001763157894736842, "low_lr": 3.5263157894736846e-06, "step": 1565 }, { "epoch": 4.115713346482577, "high_lr": 0.0001763157894736842, "low_lr": 3.5263157894736846e-06, "step": 1565 }, { "epoch": 4.115713346482577, "high_lr": 0.0001763157894736842, "low_lr": 3.5263157894736846e-06, "step": 1565 }, { "epoch": 4.115713346482577, "high_lr": 0.0001763157894736842, "low_lr": 3.5263157894736846e-06, "step": 1565 }, { "epoch": 4.115713346482577, "high_lr": 0.0001763157894736842, "low_lr": 3.5263157894736846e-06, "step": 1565 }, { "epoch": 4.118343195266272, "grad_norm": 1.5046355724334717, "learning_rate": 0.00017578947368421052, "loss": 1.1998, "step": 1566 }, { "epoch": 4.118343195266272, "high_lr": 0.00017578947368421052, "low_lr": 3.515789473684211e-06, "step": 1566 }, { "epoch": 4.118343195266272, "high_lr": 0.00017578947368421052, "low_lr": 3.515789473684211e-06, "step": 1566 }, { "epoch": 4.118343195266272, "high_lr": 0.00017578947368421052, "low_lr": 3.515789473684211e-06, "step": 1566 }, { "epoch": 4.118343195266272, "high_lr": 0.00017578947368421052, "low_lr": 3.515789473684211e-06, "step": 1566 }, { "epoch": 4.118343195266272, "high_lr": 0.00017578947368421052, "low_lr": 3.515789473684211e-06, "step": 1566 }, { "epoch": 4.118343195266272, "high_lr": 0.00017578947368421052, "low_lr": 3.515789473684211e-06, "step": 1566 }, { "epoch": 4.118343195266272, "high_lr": 0.00017578947368421052, "low_lr": 3.515789473684211e-06, "step": 1566 }, { "epoch": 4.118343195266272, "high_lr": 0.00017578947368421052, "low_lr": 3.515789473684211e-06, "step": 1566 }, { "epoch": 4.1209730440499674, "grad_norm": 1.4738900661468506, "learning_rate": 0.00017526315789473686, "loss": 1.2065, "step": 1567 }, { "epoch": 4.1209730440499674, "high_lr": 0.00017526315789473686, "low_lr": 3.505263157894737e-06, "step": 1567 }, { "epoch": 4.1209730440499674, "high_lr": 0.00017526315789473686, "low_lr": 3.505263157894737e-06, "step": 1567 }, { "epoch": 4.1209730440499674, "high_lr": 0.00017526315789473686, "low_lr": 3.505263157894737e-06, "step": 1567 }, { "epoch": 4.1209730440499674, "high_lr": 0.00017526315789473686, "low_lr": 3.505263157894737e-06, "step": 1567 }, { "epoch": 4.1209730440499674, "high_lr": 0.00017526315789473686, "low_lr": 3.505263157894737e-06, "step": 1567 }, { "epoch": 4.1209730440499674, "high_lr": 0.00017526315789473686, "low_lr": 3.505263157894737e-06, "step": 1567 }, { "epoch": 4.1209730440499674, "high_lr": 0.00017526315789473686, "low_lr": 3.505263157894737e-06, "step": 1567 }, { "epoch": 4.1209730440499674, "high_lr": 0.00017526315789473686, "low_lr": 3.505263157894737e-06, "step": 1567 }, { "epoch": 4.123602892833662, "grad_norm": 1.3990614414215088, "learning_rate": 0.00017473684210526317, "loss": 1.1996, "step": 1568 }, { "epoch": 4.123602892833662, "high_lr": 0.00017473684210526317, "low_lr": 3.4947368421052635e-06, "step": 1568 }, { "epoch": 4.123602892833662, "high_lr": 0.00017473684210526317, "low_lr": 3.4947368421052635e-06, "step": 1568 }, { "epoch": 4.123602892833662, "high_lr": 0.00017473684210526317, "low_lr": 3.4947368421052635e-06, "step": 1568 }, { "epoch": 4.123602892833662, "high_lr": 0.00017473684210526317, "low_lr": 3.4947368421052635e-06, "step": 1568 }, { "epoch": 4.123602892833662, "high_lr": 0.00017473684210526317, "low_lr": 3.4947368421052635e-06, "step": 1568 }, { "epoch": 4.123602892833662, "high_lr": 0.00017473684210526317, "low_lr": 3.4947368421052635e-06, "step": 1568 }, { "epoch": 4.123602892833662, "high_lr": 0.00017473684210526317, "low_lr": 3.4947368421052635e-06, "step": 1568 }, { "epoch": 4.123602892833662, "high_lr": 0.00017473684210526317, "low_lr": 3.4947368421052635e-06, "step": 1568 }, { "epoch": 4.126232741617357, "grad_norm": 1.5344288349151611, "learning_rate": 0.00017421052631578948, "loss": 1.2541, "step": 1569 }, { "epoch": 4.126232741617357, "high_lr": 0.00017421052631578948, "low_lr": 3.48421052631579e-06, "step": 1569 }, { "epoch": 4.126232741617357, "high_lr": 0.00017421052631578948, "low_lr": 3.48421052631579e-06, "step": 1569 }, { "epoch": 4.126232741617357, "high_lr": 0.00017421052631578948, "low_lr": 3.48421052631579e-06, "step": 1569 }, { "epoch": 4.126232741617357, "high_lr": 0.00017421052631578948, "low_lr": 3.48421052631579e-06, "step": 1569 }, { "epoch": 4.126232741617357, "high_lr": 0.00017421052631578948, "low_lr": 3.48421052631579e-06, "step": 1569 }, { "epoch": 4.126232741617357, "high_lr": 0.00017421052631578948, "low_lr": 3.48421052631579e-06, "step": 1569 }, { "epoch": 4.126232741617357, "high_lr": 0.00017421052631578948, "low_lr": 3.48421052631579e-06, "step": 1569 }, { "epoch": 4.126232741617357, "high_lr": 0.00017421052631578948, "low_lr": 3.48421052631579e-06, "step": 1569 }, { "epoch": 4.128862590401052, "grad_norm": 1.587054967880249, "learning_rate": 0.0001736842105263158, "loss": 1.2023, "step": 1570 }, { "epoch": 4.128862590401052, "high_lr": 0.0001736842105263158, "low_lr": 3.473684210526316e-06, "step": 1570 }, { "epoch": 4.128862590401052, "high_lr": 0.0001736842105263158, "low_lr": 3.473684210526316e-06, "step": 1570 }, { "epoch": 4.128862590401052, "high_lr": 0.0001736842105263158, "low_lr": 3.473684210526316e-06, "step": 1570 }, { "epoch": 4.128862590401052, "high_lr": 0.0001736842105263158, "low_lr": 3.473684210526316e-06, "step": 1570 }, { "epoch": 4.128862590401052, "high_lr": 0.0001736842105263158, "low_lr": 3.473684210526316e-06, "step": 1570 }, { "epoch": 4.128862590401052, "high_lr": 0.0001736842105263158, "low_lr": 3.473684210526316e-06, "step": 1570 }, { "epoch": 4.128862590401052, "high_lr": 0.0001736842105263158, "low_lr": 3.473684210526316e-06, "step": 1570 }, { "epoch": 4.128862590401052, "high_lr": 0.0001736842105263158, "low_lr": 3.473684210526316e-06, "step": 1570 }, { "epoch": 4.131492439184747, "grad_norm": 1.4721533060073853, "learning_rate": 0.0001731578947368421, "loss": 1.2065, "step": 1571 }, { "epoch": 4.131492439184747, "high_lr": 0.0001731578947368421, "low_lr": 3.463157894736842e-06, "step": 1571 }, { "epoch": 4.131492439184747, "high_lr": 0.0001731578947368421, "low_lr": 3.463157894736842e-06, "step": 1571 }, { "epoch": 4.131492439184747, "high_lr": 0.0001731578947368421, "low_lr": 3.463157894736842e-06, "step": 1571 }, { "epoch": 4.131492439184747, "high_lr": 0.0001731578947368421, "low_lr": 3.463157894736842e-06, "step": 1571 }, { "epoch": 4.131492439184747, "high_lr": 0.0001731578947368421, "low_lr": 3.463157894736842e-06, "step": 1571 }, { "epoch": 4.131492439184747, "high_lr": 0.0001731578947368421, "low_lr": 3.463157894736842e-06, "step": 1571 }, { "epoch": 4.131492439184747, "high_lr": 0.0001731578947368421, "low_lr": 3.463157894736842e-06, "step": 1571 }, { "epoch": 4.131492439184747, "high_lr": 0.0001731578947368421, "low_lr": 3.463157894736842e-06, "step": 1571 }, { "epoch": 4.134122287968442, "grad_norm": 1.6678563356399536, "learning_rate": 0.00017263157894736842, "loss": 1.1925, "step": 1572 }, { "epoch": 4.134122287968442, "high_lr": 0.00017263157894736842, "low_lr": 3.4526315789473684e-06, "step": 1572 }, { "epoch": 4.134122287968442, "high_lr": 0.00017263157894736842, "low_lr": 3.4526315789473684e-06, "step": 1572 }, { "epoch": 4.134122287968442, "high_lr": 0.00017263157894736842, "low_lr": 3.4526315789473684e-06, "step": 1572 }, { "epoch": 4.134122287968442, "high_lr": 0.00017263157894736842, "low_lr": 3.4526315789473684e-06, "step": 1572 }, { "epoch": 4.134122287968442, "high_lr": 0.00017263157894736842, "low_lr": 3.4526315789473684e-06, "step": 1572 }, { "epoch": 4.134122287968442, "high_lr": 0.00017263157894736842, "low_lr": 3.4526315789473684e-06, "step": 1572 }, { "epoch": 4.134122287968442, "high_lr": 0.00017263157894736842, "low_lr": 3.4526315789473684e-06, "step": 1572 }, { "epoch": 4.134122287968442, "high_lr": 0.00017263157894736842, "low_lr": 3.4526315789473684e-06, "step": 1572 }, { "epoch": 4.136752136752137, "grad_norm": 1.4178546667099, "learning_rate": 0.00017210526315789473, "loss": 1.2316, "step": 1573 }, { "epoch": 4.136752136752137, "high_lr": 0.00017210526315789473, "low_lr": 3.4421052631578947e-06, "step": 1573 }, { "epoch": 4.136752136752137, "high_lr": 0.00017210526315789473, "low_lr": 3.4421052631578947e-06, "step": 1573 }, { "epoch": 4.136752136752137, "high_lr": 0.00017210526315789473, "low_lr": 3.4421052631578947e-06, "step": 1573 }, { "epoch": 4.136752136752137, "high_lr": 0.00017210526315789473, "low_lr": 3.4421052631578947e-06, "step": 1573 }, { "epoch": 4.136752136752137, "high_lr": 0.00017210526315789473, "low_lr": 3.4421052631578947e-06, "step": 1573 }, { "epoch": 4.136752136752137, "high_lr": 0.00017210526315789473, "low_lr": 3.4421052631578947e-06, "step": 1573 }, { "epoch": 4.136752136752137, "high_lr": 0.00017210526315789473, "low_lr": 3.4421052631578947e-06, "step": 1573 }, { "epoch": 4.136752136752137, "high_lr": 0.00017210526315789473, "low_lr": 3.4421052631578947e-06, "step": 1573 }, { "epoch": 4.139381985535832, "grad_norm": 1.584417462348938, "learning_rate": 0.00017157894736842105, "loss": 1.1981, "step": 1574 }, { "epoch": 4.139381985535832, "high_lr": 0.00017157894736842105, "low_lr": 3.4315789473684215e-06, "step": 1574 }, { "epoch": 4.139381985535832, "high_lr": 0.00017157894736842105, "low_lr": 3.4315789473684215e-06, "step": 1574 }, { "epoch": 4.139381985535832, "high_lr": 0.00017157894736842105, "low_lr": 3.4315789473684215e-06, "step": 1574 }, { "epoch": 4.139381985535832, "high_lr": 0.00017157894736842105, "low_lr": 3.4315789473684215e-06, "step": 1574 }, { "epoch": 4.139381985535832, "high_lr": 0.00017157894736842105, "low_lr": 3.4315789473684215e-06, "step": 1574 }, { "epoch": 4.139381985535832, "high_lr": 0.00017157894736842105, "low_lr": 3.4315789473684215e-06, "step": 1574 }, { "epoch": 4.139381985535832, "high_lr": 0.00017157894736842105, "low_lr": 3.4315789473684215e-06, "step": 1574 }, { "epoch": 4.139381985535832, "high_lr": 0.00017157894736842105, "low_lr": 3.4315789473684215e-06, "step": 1574 }, { "epoch": 4.1420118343195265, "grad_norm": 1.5301527976989746, "learning_rate": 0.00017105263157894739, "loss": 1.2721, "step": 1575 }, { "epoch": 4.1420118343195265, "high_lr": 0.00017105263157894739, "low_lr": 3.421052631578948e-06, "step": 1575 }, { "epoch": 4.1420118343195265, "high_lr": 0.00017105263157894739, "low_lr": 3.421052631578948e-06, "step": 1575 }, { "epoch": 4.1420118343195265, "high_lr": 0.00017105263157894739, "low_lr": 3.421052631578948e-06, "step": 1575 }, { "epoch": 4.1420118343195265, "high_lr": 0.00017105263157894739, "low_lr": 3.421052631578948e-06, "step": 1575 }, { "epoch": 4.1420118343195265, "high_lr": 0.00017105263157894739, "low_lr": 3.421052631578948e-06, "step": 1575 }, { "epoch": 4.1420118343195265, "high_lr": 0.00017105263157894739, "low_lr": 3.421052631578948e-06, "step": 1575 }, { "epoch": 4.1420118343195265, "high_lr": 0.00017105263157894739, "low_lr": 3.421052631578948e-06, "step": 1575 }, { "epoch": 4.1420118343195265, "high_lr": 0.00017105263157894739, "low_lr": 3.421052631578948e-06, "step": 1575 }, { "epoch": 4.144641683103222, "grad_norm": 1.5100499391555786, "learning_rate": 0.0001705263157894737, "loss": 1.2152, "step": 1576 }, { "epoch": 4.144641683103222, "high_lr": 0.0001705263157894737, "low_lr": 3.410526315789474e-06, "step": 1576 }, { "epoch": 4.144641683103222, "high_lr": 0.0001705263157894737, "low_lr": 3.410526315789474e-06, "step": 1576 }, { "epoch": 4.144641683103222, "high_lr": 0.0001705263157894737, "low_lr": 3.410526315789474e-06, "step": 1576 }, { "epoch": 4.144641683103222, "high_lr": 0.0001705263157894737, "low_lr": 3.410526315789474e-06, "step": 1576 }, { "epoch": 4.144641683103222, "high_lr": 0.0001705263157894737, "low_lr": 3.410526315789474e-06, "step": 1576 }, { "epoch": 4.144641683103222, "high_lr": 0.0001705263157894737, "low_lr": 3.410526315789474e-06, "step": 1576 }, { "epoch": 4.144641683103222, "high_lr": 0.0001705263157894737, "low_lr": 3.410526315789474e-06, "step": 1576 }, { "epoch": 4.144641683103222, "high_lr": 0.0001705263157894737, "low_lr": 3.410526315789474e-06, "step": 1576 }, { "epoch": 4.147271531886917, "grad_norm": 1.4661401510238647, "learning_rate": 0.00017, "loss": 1.1896, "step": 1577 }, { "epoch": 4.147271531886917, "high_lr": 0.00017, "low_lr": 3.4000000000000005e-06, "step": 1577 }, { "epoch": 4.147271531886917, "high_lr": 0.00017, "low_lr": 3.4000000000000005e-06, "step": 1577 }, { "epoch": 4.147271531886917, "high_lr": 0.00017, "low_lr": 3.4000000000000005e-06, "step": 1577 }, { "epoch": 4.147271531886917, "high_lr": 0.00017, "low_lr": 3.4000000000000005e-06, "step": 1577 }, { "epoch": 4.147271531886917, "high_lr": 0.00017, "low_lr": 3.4000000000000005e-06, "step": 1577 }, { "epoch": 4.147271531886917, "high_lr": 0.00017, "low_lr": 3.4000000000000005e-06, "step": 1577 }, { "epoch": 4.147271531886917, "high_lr": 0.00017, "low_lr": 3.4000000000000005e-06, "step": 1577 }, { "epoch": 4.147271531886917, "high_lr": 0.00017, "low_lr": 3.4000000000000005e-06, "step": 1577 }, { "epoch": 4.149901380670611, "grad_norm": 1.496145248413086, "learning_rate": 0.0001694736842105263, "loss": 1.1908, "step": 1578 }, { "epoch": 4.149901380670611, "high_lr": 0.0001694736842105263, "low_lr": 3.3894736842105264e-06, "step": 1578 }, { "epoch": 4.149901380670611, "high_lr": 0.0001694736842105263, "low_lr": 3.3894736842105264e-06, "step": 1578 }, { "epoch": 4.149901380670611, "high_lr": 0.0001694736842105263, "low_lr": 3.3894736842105264e-06, "step": 1578 }, { "epoch": 4.149901380670611, "high_lr": 0.0001694736842105263, "low_lr": 3.3894736842105264e-06, "step": 1578 }, { "epoch": 4.149901380670611, "high_lr": 0.0001694736842105263, "low_lr": 3.3894736842105264e-06, "step": 1578 }, { "epoch": 4.149901380670611, "high_lr": 0.0001694736842105263, "low_lr": 3.3894736842105264e-06, "step": 1578 }, { "epoch": 4.149901380670611, "high_lr": 0.0001694736842105263, "low_lr": 3.3894736842105264e-06, "step": 1578 }, { "epoch": 4.149901380670611, "high_lr": 0.0001694736842105263, "low_lr": 3.3894736842105264e-06, "step": 1578 }, { "epoch": 4.152531229454306, "grad_norm": 1.5556325912475586, "learning_rate": 0.00016894736842105264, "loss": 1.2083, "step": 1579 }, { "epoch": 4.152531229454306, "high_lr": 0.00016894736842105264, "low_lr": 3.3789473684210527e-06, "step": 1579 }, { "epoch": 4.152531229454306, "high_lr": 0.00016894736842105264, "low_lr": 3.3789473684210527e-06, "step": 1579 }, { "epoch": 4.152531229454306, "high_lr": 0.00016894736842105264, "low_lr": 3.3789473684210527e-06, "step": 1579 }, { "epoch": 4.152531229454306, "high_lr": 0.00016894736842105264, "low_lr": 3.3789473684210527e-06, "step": 1579 }, { "epoch": 4.152531229454306, "high_lr": 0.00016894736842105264, "low_lr": 3.3789473684210527e-06, "step": 1579 }, { "epoch": 4.152531229454306, "high_lr": 0.00016894736842105264, "low_lr": 3.3789473684210527e-06, "step": 1579 }, { "epoch": 4.152531229454306, "high_lr": 0.00016894736842105264, "low_lr": 3.3789473684210527e-06, "step": 1579 }, { "epoch": 4.152531229454306, "high_lr": 0.00016894736842105264, "low_lr": 3.3789473684210527e-06, "step": 1579 }, { "epoch": 4.1551610782380015, "grad_norm": 1.4800539016723633, "learning_rate": 0.00016842105263157895, "loss": 1.2144, "step": 1580 }, { "epoch": 4.1551610782380015, "high_lr": 0.00016842105263157895, "low_lr": 3.368421052631579e-06, "step": 1580 }, { "epoch": 4.1551610782380015, "high_lr": 0.00016842105263157895, "low_lr": 3.368421052631579e-06, "step": 1580 }, { "epoch": 4.1551610782380015, "high_lr": 0.00016842105263157895, "low_lr": 3.368421052631579e-06, "step": 1580 }, { "epoch": 4.1551610782380015, "high_lr": 0.00016842105263157895, "low_lr": 3.368421052631579e-06, "step": 1580 }, { "epoch": 4.1551610782380015, "high_lr": 0.00016842105263157895, "low_lr": 3.368421052631579e-06, "step": 1580 }, { "epoch": 4.1551610782380015, "high_lr": 0.00016842105263157895, "low_lr": 3.368421052631579e-06, "step": 1580 }, { "epoch": 4.1551610782380015, "high_lr": 0.00016842105263157895, "low_lr": 3.368421052631579e-06, "step": 1580 }, { "epoch": 4.1551610782380015, "high_lr": 0.00016842105263157895, "low_lr": 3.368421052631579e-06, "step": 1580 }, { "epoch": 4.157790927021696, "grad_norm": 1.4237494468688965, "learning_rate": 0.00016789473684210526, "loss": 1.1848, "step": 1581 }, { "epoch": 4.157790927021696, "high_lr": 0.00016789473684210526, "low_lr": 3.3578947368421054e-06, "step": 1581 }, { "epoch": 4.157790927021696, "high_lr": 0.00016789473684210526, "low_lr": 3.3578947368421054e-06, "step": 1581 }, { "epoch": 4.157790927021696, "high_lr": 0.00016789473684210526, "low_lr": 3.3578947368421054e-06, "step": 1581 }, { "epoch": 4.157790927021696, "high_lr": 0.00016789473684210526, "low_lr": 3.3578947368421054e-06, "step": 1581 }, { "epoch": 4.157790927021696, "high_lr": 0.00016789473684210526, "low_lr": 3.3578947368421054e-06, "step": 1581 }, { "epoch": 4.157790927021696, "high_lr": 0.00016789473684210526, "low_lr": 3.3578947368421054e-06, "step": 1581 }, { "epoch": 4.157790927021696, "high_lr": 0.00016789473684210526, "low_lr": 3.3578947368421054e-06, "step": 1581 }, { "epoch": 4.157790927021696, "high_lr": 0.00016789473684210526, "low_lr": 3.3578947368421054e-06, "step": 1581 }, { "epoch": 4.160420775805391, "grad_norm": 1.644124150276184, "learning_rate": 0.00016736842105263158, "loss": 1.1958, "step": 1582 }, { "epoch": 4.160420775805391, "high_lr": 0.00016736842105263158, "low_lr": 3.347368421052632e-06, "step": 1582 }, { "epoch": 4.160420775805391, "high_lr": 0.00016736842105263158, "low_lr": 3.347368421052632e-06, "step": 1582 }, { "epoch": 4.160420775805391, "high_lr": 0.00016736842105263158, "low_lr": 3.347368421052632e-06, "step": 1582 }, { "epoch": 4.160420775805391, "high_lr": 0.00016736842105263158, "low_lr": 3.347368421052632e-06, "step": 1582 }, { "epoch": 4.160420775805391, "high_lr": 0.00016736842105263158, "low_lr": 3.347368421052632e-06, "step": 1582 }, { "epoch": 4.160420775805391, "high_lr": 0.00016736842105263158, "low_lr": 3.347368421052632e-06, "step": 1582 }, { "epoch": 4.160420775805391, "high_lr": 0.00016736842105263158, "low_lr": 3.347368421052632e-06, "step": 1582 }, { "epoch": 4.160420775805391, "high_lr": 0.00016736842105263158, "low_lr": 3.347368421052632e-06, "step": 1582 }, { "epoch": 4.163050624589086, "grad_norm": 1.4499338865280151, "learning_rate": 0.00016684210526315792, "loss": 1.2298, "step": 1583 }, { "epoch": 4.163050624589086, "high_lr": 0.00016684210526315792, "low_lr": 3.3368421052631584e-06, "step": 1583 }, { "epoch": 4.163050624589086, "high_lr": 0.00016684210526315792, "low_lr": 3.3368421052631584e-06, "step": 1583 }, { "epoch": 4.163050624589086, "high_lr": 0.00016684210526315792, "low_lr": 3.3368421052631584e-06, "step": 1583 }, { "epoch": 4.163050624589086, "high_lr": 0.00016684210526315792, "low_lr": 3.3368421052631584e-06, "step": 1583 }, { "epoch": 4.163050624589086, "high_lr": 0.00016684210526315792, "low_lr": 3.3368421052631584e-06, "step": 1583 }, { "epoch": 4.163050624589086, "high_lr": 0.00016684210526315792, "low_lr": 3.3368421052631584e-06, "step": 1583 }, { "epoch": 4.163050624589086, "high_lr": 0.00016684210526315792, "low_lr": 3.3368421052631584e-06, "step": 1583 }, { "epoch": 4.163050624589086, "high_lr": 0.00016684210526315792, "low_lr": 3.3368421052631584e-06, "step": 1583 }, { "epoch": 4.165680473372781, "grad_norm": 1.4814369678497314, "learning_rate": 0.00016631578947368423, "loss": 1.2453, "step": 1584 }, { "epoch": 4.165680473372781, "high_lr": 0.00016631578947368423, "low_lr": 3.3263157894736848e-06, "step": 1584 }, { "epoch": 4.165680473372781, "high_lr": 0.00016631578947368423, "low_lr": 3.3263157894736848e-06, "step": 1584 }, { "epoch": 4.165680473372781, "high_lr": 0.00016631578947368423, "low_lr": 3.3263157894736848e-06, "step": 1584 }, { "epoch": 4.165680473372781, "high_lr": 0.00016631578947368423, "low_lr": 3.3263157894736848e-06, "step": 1584 }, { "epoch": 4.165680473372781, "high_lr": 0.00016631578947368423, "low_lr": 3.3263157894736848e-06, "step": 1584 }, { "epoch": 4.165680473372781, "high_lr": 0.00016631578947368423, "low_lr": 3.3263157894736848e-06, "step": 1584 }, { "epoch": 4.165680473372781, "high_lr": 0.00016631578947368423, "low_lr": 3.3263157894736848e-06, "step": 1584 }, { "epoch": 4.165680473372781, "high_lr": 0.00016631578947368423, "low_lr": 3.3263157894736848e-06, "step": 1584 }, { "epoch": 4.168310322156476, "grad_norm": 1.5331965684890747, "learning_rate": 0.00016578947368421052, "loss": 1.2199, "step": 1585 }, { "epoch": 4.168310322156476, "high_lr": 0.00016578947368421052, "low_lr": 3.3157894736842107e-06, "step": 1585 }, { "epoch": 4.168310322156476, "high_lr": 0.00016578947368421052, "low_lr": 3.3157894736842107e-06, "step": 1585 }, { "epoch": 4.168310322156476, "high_lr": 0.00016578947368421052, "low_lr": 3.3157894736842107e-06, "step": 1585 }, { "epoch": 4.168310322156476, "high_lr": 0.00016578947368421052, "low_lr": 3.3157894736842107e-06, "step": 1585 }, { "epoch": 4.168310322156476, "high_lr": 0.00016578947368421052, "low_lr": 3.3157894736842107e-06, "step": 1585 }, { "epoch": 4.168310322156476, "high_lr": 0.00016578947368421052, "low_lr": 3.3157894736842107e-06, "step": 1585 }, { "epoch": 4.168310322156476, "high_lr": 0.00016578947368421052, "low_lr": 3.3157894736842107e-06, "step": 1585 }, { "epoch": 4.168310322156476, "high_lr": 0.00016578947368421052, "low_lr": 3.3157894736842107e-06, "step": 1585 }, { "epoch": 4.170940170940171, "grad_norm": 1.4219096899032593, "learning_rate": 0.00016526315789473683, "loss": 1.202, "step": 1586 }, { "epoch": 4.170940170940171, "high_lr": 0.00016526315789473683, "low_lr": 3.305263157894737e-06, "step": 1586 }, { "epoch": 4.170940170940171, "high_lr": 0.00016526315789473683, "low_lr": 3.305263157894737e-06, "step": 1586 }, { "epoch": 4.170940170940171, "high_lr": 0.00016526315789473683, "low_lr": 3.305263157894737e-06, "step": 1586 }, { "epoch": 4.170940170940171, "high_lr": 0.00016526315789473683, "low_lr": 3.305263157894737e-06, "step": 1586 }, { "epoch": 4.170940170940171, "high_lr": 0.00016526315789473683, "low_lr": 3.305263157894737e-06, "step": 1586 }, { "epoch": 4.170940170940171, "high_lr": 0.00016526315789473683, "low_lr": 3.305263157894737e-06, "step": 1586 }, { "epoch": 4.170940170940171, "high_lr": 0.00016526315789473683, "low_lr": 3.305263157894737e-06, "step": 1586 }, { "epoch": 4.170940170940171, "high_lr": 0.00016526315789473683, "low_lr": 3.305263157894737e-06, "step": 1586 }, { "epoch": 4.173570019723866, "grad_norm": 1.4696342945098877, "learning_rate": 0.00016473684210526317, "loss": 1.2507, "step": 1587 }, { "epoch": 4.173570019723866, "high_lr": 0.00016473684210526317, "low_lr": 3.2947368421052633e-06, "step": 1587 }, { "epoch": 4.173570019723866, "high_lr": 0.00016473684210526317, "low_lr": 3.2947368421052633e-06, "step": 1587 }, { "epoch": 4.173570019723866, "high_lr": 0.00016473684210526317, "low_lr": 3.2947368421052633e-06, "step": 1587 }, { "epoch": 4.173570019723866, "high_lr": 0.00016473684210526317, "low_lr": 3.2947368421052633e-06, "step": 1587 }, { "epoch": 4.173570019723866, "high_lr": 0.00016473684210526317, "low_lr": 3.2947368421052633e-06, "step": 1587 }, { "epoch": 4.173570019723866, "high_lr": 0.00016473684210526317, "low_lr": 3.2947368421052633e-06, "step": 1587 }, { "epoch": 4.173570019723866, "high_lr": 0.00016473684210526317, "low_lr": 3.2947368421052633e-06, "step": 1587 }, { "epoch": 4.173570019723866, "high_lr": 0.00016473684210526317, "low_lr": 3.2947368421052633e-06, "step": 1587 }, { "epoch": 4.1761998685075605, "grad_norm": 1.5482137203216553, "learning_rate": 0.00016421052631578948, "loss": 1.2084, "step": 1588 }, { "epoch": 4.1761998685075605, "high_lr": 0.00016421052631578948, "low_lr": 3.2842105263157897e-06, "step": 1588 }, { "epoch": 4.1761998685075605, "high_lr": 0.00016421052631578948, "low_lr": 3.2842105263157897e-06, "step": 1588 }, { "epoch": 4.1761998685075605, "high_lr": 0.00016421052631578948, "low_lr": 3.2842105263157897e-06, "step": 1588 }, { "epoch": 4.1761998685075605, "high_lr": 0.00016421052631578948, "low_lr": 3.2842105263157897e-06, "step": 1588 }, { "epoch": 4.1761998685075605, "high_lr": 0.00016421052631578948, "low_lr": 3.2842105263157897e-06, "step": 1588 }, { "epoch": 4.1761998685075605, "high_lr": 0.00016421052631578948, "low_lr": 3.2842105263157897e-06, "step": 1588 }, { "epoch": 4.1761998685075605, "high_lr": 0.00016421052631578948, "low_lr": 3.2842105263157897e-06, "step": 1588 }, { "epoch": 4.1761998685075605, "high_lr": 0.00016421052631578948, "low_lr": 3.2842105263157897e-06, "step": 1588 }, { "epoch": 4.178829717291256, "grad_norm": 1.423588514328003, "learning_rate": 0.0001636842105263158, "loss": 1.2125, "step": 1589 }, { "epoch": 4.178829717291256, "high_lr": 0.0001636842105263158, "low_lr": 3.273684210526316e-06, "step": 1589 }, { "epoch": 4.178829717291256, "high_lr": 0.0001636842105263158, "low_lr": 3.273684210526316e-06, "step": 1589 }, { "epoch": 4.178829717291256, "high_lr": 0.0001636842105263158, "low_lr": 3.273684210526316e-06, "step": 1589 }, { "epoch": 4.178829717291256, "high_lr": 0.0001636842105263158, "low_lr": 3.273684210526316e-06, "step": 1589 }, { "epoch": 4.178829717291256, "high_lr": 0.0001636842105263158, "low_lr": 3.273684210526316e-06, "step": 1589 }, { "epoch": 4.178829717291256, "high_lr": 0.0001636842105263158, "low_lr": 3.273684210526316e-06, "step": 1589 }, { "epoch": 4.178829717291256, "high_lr": 0.0001636842105263158, "low_lr": 3.273684210526316e-06, "step": 1589 }, { "epoch": 4.178829717291256, "high_lr": 0.0001636842105263158, "low_lr": 3.273684210526316e-06, "step": 1589 }, { "epoch": 4.181459566074951, "grad_norm": 1.5778827667236328, "learning_rate": 0.0001631578947368421, "loss": 1.2128, "step": 1590 }, { "epoch": 4.181459566074951, "high_lr": 0.0001631578947368421, "low_lr": 3.2631578947368423e-06, "step": 1590 }, { "epoch": 4.181459566074951, "high_lr": 0.0001631578947368421, "low_lr": 3.2631578947368423e-06, "step": 1590 }, { "epoch": 4.181459566074951, "high_lr": 0.0001631578947368421, "low_lr": 3.2631578947368423e-06, "step": 1590 }, { "epoch": 4.181459566074951, "high_lr": 0.0001631578947368421, "low_lr": 3.2631578947368423e-06, "step": 1590 }, { "epoch": 4.181459566074951, "high_lr": 0.0001631578947368421, "low_lr": 3.2631578947368423e-06, "step": 1590 }, { "epoch": 4.181459566074951, "high_lr": 0.0001631578947368421, "low_lr": 3.2631578947368423e-06, "step": 1590 }, { "epoch": 4.181459566074951, "high_lr": 0.0001631578947368421, "low_lr": 3.2631578947368423e-06, "step": 1590 }, { "epoch": 4.181459566074951, "high_lr": 0.0001631578947368421, "low_lr": 3.2631578947368423e-06, "step": 1590 }, { "epoch": 4.184089414858645, "grad_norm": 1.484566330909729, "learning_rate": 0.00016263157894736845, "loss": 1.2444, "step": 1591 }, { "epoch": 4.184089414858645, "high_lr": 0.00016263157894736845, "low_lr": 3.252631578947369e-06, "step": 1591 }, { "epoch": 4.184089414858645, "high_lr": 0.00016263157894736845, "low_lr": 3.252631578947369e-06, "step": 1591 }, { "epoch": 4.184089414858645, "high_lr": 0.00016263157894736845, "low_lr": 3.252631578947369e-06, "step": 1591 }, { "epoch": 4.184089414858645, "high_lr": 0.00016263157894736845, "low_lr": 3.252631578947369e-06, "step": 1591 }, { "epoch": 4.184089414858645, "high_lr": 0.00016263157894736845, "low_lr": 3.252631578947369e-06, "step": 1591 }, { "epoch": 4.184089414858645, "high_lr": 0.00016263157894736845, "low_lr": 3.252631578947369e-06, "step": 1591 }, { "epoch": 4.184089414858645, "high_lr": 0.00016263157894736845, "low_lr": 3.252631578947369e-06, "step": 1591 }, { "epoch": 4.184089414858645, "high_lr": 0.00016263157894736845, "low_lr": 3.252631578947369e-06, "step": 1591 }, { "epoch": 4.186719263642341, "grad_norm": 1.489429235458374, "learning_rate": 0.00016210526315789473, "loss": 1.2286, "step": 1592 }, { "epoch": 4.186719263642341, "high_lr": 0.00016210526315789473, "low_lr": 3.2421052631578945e-06, "step": 1592 }, { "epoch": 4.186719263642341, "high_lr": 0.00016210526315789473, "low_lr": 3.2421052631578945e-06, "step": 1592 }, { "epoch": 4.186719263642341, "high_lr": 0.00016210526315789473, "low_lr": 3.2421052631578945e-06, "step": 1592 }, { "epoch": 4.186719263642341, "high_lr": 0.00016210526315789473, "low_lr": 3.2421052631578945e-06, "step": 1592 }, { "epoch": 4.186719263642341, "high_lr": 0.00016210526315789473, "low_lr": 3.2421052631578945e-06, "step": 1592 }, { "epoch": 4.186719263642341, "high_lr": 0.00016210526315789473, "low_lr": 3.2421052631578945e-06, "step": 1592 }, { "epoch": 4.186719263642341, "high_lr": 0.00016210526315789473, "low_lr": 3.2421052631578945e-06, "step": 1592 }, { "epoch": 4.186719263642341, "high_lr": 0.00016210526315789473, "low_lr": 3.2421052631578945e-06, "step": 1592 }, { "epoch": 4.189349112426036, "grad_norm": 1.4498363733291626, "learning_rate": 0.00016157894736842105, "loss": 1.27, "step": 1593 }, { "epoch": 4.189349112426036, "high_lr": 0.00016157894736842105, "low_lr": 3.2315789473684213e-06, "step": 1593 }, { "epoch": 4.189349112426036, "high_lr": 0.00016157894736842105, "low_lr": 3.2315789473684213e-06, "step": 1593 }, { "epoch": 4.189349112426036, "high_lr": 0.00016157894736842105, "low_lr": 3.2315789473684213e-06, "step": 1593 }, { "epoch": 4.189349112426036, "high_lr": 0.00016157894736842105, "low_lr": 3.2315789473684213e-06, "step": 1593 }, { "epoch": 4.189349112426036, "high_lr": 0.00016157894736842105, "low_lr": 3.2315789473684213e-06, "step": 1593 }, { "epoch": 4.189349112426036, "high_lr": 0.00016157894736842105, "low_lr": 3.2315789473684213e-06, "step": 1593 }, { "epoch": 4.189349112426036, "high_lr": 0.00016157894736842105, "low_lr": 3.2315789473684213e-06, "step": 1593 }, { "epoch": 4.189349112426036, "high_lr": 0.00016157894736842105, "low_lr": 3.2315789473684213e-06, "step": 1593 }, { "epoch": 4.19197896120973, "grad_norm": 1.443981409072876, "learning_rate": 0.00016105263157894736, "loss": 1.216, "step": 1594 }, { "epoch": 4.19197896120973, "high_lr": 0.00016105263157894736, "low_lr": 3.2210526315789476e-06, "step": 1594 }, { "epoch": 4.19197896120973, "high_lr": 0.00016105263157894736, "low_lr": 3.2210526315789476e-06, "step": 1594 }, { "epoch": 4.19197896120973, "high_lr": 0.00016105263157894736, "low_lr": 3.2210526315789476e-06, "step": 1594 }, { "epoch": 4.19197896120973, "high_lr": 0.00016105263157894736, "low_lr": 3.2210526315789476e-06, "step": 1594 }, { "epoch": 4.19197896120973, "high_lr": 0.00016105263157894736, "low_lr": 3.2210526315789476e-06, "step": 1594 }, { "epoch": 4.19197896120973, "high_lr": 0.00016105263157894736, "low_lr": 3.2210526315789476e-06, "step": 1594 }, { "epoch": 4.19197896120973, "high_lr": 0.00016105263157894736, "low_lr": 3.2210526315789476e-06, "step": 1594 }, { "epoch": 4.19197896120973, "high_lr": 0.00016105263157894736, "low_lr": 3.2210526315789476e-06, "step": 1594 }, { "epoch": 4.194608809993426, "grad_norm": 1.510570764541626, "learning_rate": 0.0001605263157894737, "loss": 1.2031, "step": 1595 }, { "epoch": 4.194608809993426, "high_lr": 0.0001605263157894737, "low_lr": 3.210526315789474e-06, "step": 1595 }, { "epoch": 4.194608809993426, "high_lr": 0.0001605263157894737, "low_lr": 3.210526315789474e-06, "step": 1595 }, { "epoch": 4.194608809993426, "high_lr": 0.0001605263157894737, "low_lr": 3.210526315789474e-06, "step": 1595 }, { "epoch": 4.194608809993426, "high_lr": 0.0001605263157894737, "low_lr": 3.210526315789474e-06, "step": 1595 }, { "epoch": 4.194608809993426, "high_lr": 0.0001605263157894737, "low_lr": 3.210526315789474e-06, "step": 1595 }, { "epoch": 4.194608809993426, "high_lr": 0.0001605263157894737, "low_lr": 3.210526315789474e-06, "step": 1595 }, { "epoch": 4.194608809993426, "high_lr": 0.0001605263157894737, "low_lr": 3.210526315789474e-06, "step": 1595 }, { "epoch": 4.194608809993426, "high_lr": 0.0001605263157894737, "low_lr": 3.210526315789474e-06, "step": 1595 }, { "epoch": 4.19723865877712, "grad_norm": 1.4223371744155884, "learning_rate": 0.00016, "loss": 1.2385, "step": 1596 }, { "epoch": 4.19723865877712, "high_lr": 0.00016, "low_lr": 3.2000000000000003e-06, "step": 1596 }, { "epoch": 4.19723865877712, "high_lr": 0.00016, "low_lr": 3.2000000000000003e-06, "step": 1596 }, { "epoch": 4.19723865877712, "high_lr": 0.00016, "low_lr": 3.2000000000000003e-06, "step": 1596 }, { "epoch": 4.19723865877712, "high_lr": 0.00016, "low_lr": 3.2000000000000003e-06, "step": 1596 }, { "epoch": 4.19723865877712, "high_lr": 0.00016, "low_lr": 3.2000000000000003e-06, "step": 1596 }, { "epoch": 4.19723865877712, "high_lr": 0.00016, "low_lr": 3.2000000000000003e-06, "step": 1596 }, { "epoch": 4.19723865877712, "high_lr": 0.00016, "low_lr": 3.2000000000000003e-06, "step": 1596 }, { "epoch": 4.19723865877712, "high_lr": 0.00016, "low_lr": 3.2000000000000003e-06, "step": 1596 }, { "epoch": 4.199868507560815, "grad_norm": 1.393445372581482, "learning_rate": 0.00015947368421052633, "loss": 1.1783, "step": 1597 }, { "epoch": 4.199868507560815, "high_lr": 0.00015947368421052633, "low_lr": 3.1894736842105266e-06, "step": 1597 }, { "epoch": 4.199868507560815, "high_lr": 0.00015947368421052633, "low_lr": 3.1894736842105266e-06, "step": 1597 }, { "epoch": 4.199868507560815, "high_lr": 0.00015947368421052633, "low_lr": 3.1894736842105266e-06, "step": 1597 }, { "epoch": 4.199868507560815, "high_lr": 0.00015947368421052633, "low_lr": 3.1894736842105266e-06, "step": 1597 }, { "epoch": 4.199868507560815, "high_lr": 0.00015947368421052633, "low_lr": 3.1894736842105266e-06, "step": 1597 }, { "epoch": 4.199868507560815, "high_lr": 0.00015947368421052633, "low_lr": 3.1894736842105266e-06, "step": 1597 }, { "epoch": 4.199868507560815, "high_lr": 0.00015947368421052633, "low_lr": 3.1894736842105266e-06, "step": 1597 }, { "epoch": 4.199868507560815, "high_lr": 0.00015947368421052633, "low_lr": 3.1894736842105266e-06, "step": 1597 }, { "epoch": 4.20249835634451, "grad_norm": 1.7282485961914062, "learning_rate": 0.00015894736842105264, "loss": 1.2489, "step": 1598 }, { "epoch": 4.20249835634451, "high_lr": 0.00015894736842105264, "low_lr": 3.178947368421053e-06, "step": 1598 }, { "epoch": 4.20249835634451, "high_lr": 0.00015894736842105264, "low_lr": 3.178947368421053e-06, "step": 1598 }, { "epoch": 4.20249835634451, "high_lr": 0.00015894736842105264, "low_lr": 3.178947368421053e-06, "step": 1598 }, { "epoch": 4.20249835634451, "high_lr": 0.00015894736842105264, "low_lr": 3.178947368421053e-06, "step": 1598 }, { "epoch": 4.20249835634451, "high_lr": 0.00015894736842105264, "low_lr": 3.178947368421053e-06, "step": 1598 }, { "epoch": 4.20249835634451, "high_lr": 0.00015894736842105264, "low_lr": 3.178947368421053e-06, "step": 1598 }, { "epoch": 4.20249835634451, "high_lr": 0.00015894736842105264, "low_lr": 3.178947368421053e-06, "step": 1598 }, { "epoch": 4.20249835634451, "high_lr": 0.00015894736842105264, "low_lr": 3.178947368421053e-06, "step": 1598 }, { "epoch": 4.205128205128205, "grad_norm": 1.4703502655029297, "learning_rate": 0.00015842105263157892, "loss": 1.1709, "step": 1599 }, { "epoch": 4.205128205128205, "high_lr": 0.00015842105263157892, "low_lr": 3.168421052631579e-06, "step": 1599 }, { "epoch": 4.205128205128205, "high_lr": 0.00015842105263157892, "low_lr": 3.168421052631579e-06, "step": 1599 }, { "epoch": 4.205128205128205, "high_lr": 0.00015842105263157892, "low_lr": 3.168421052631579e-06, "step": 1599 }, { "epoch": 4.205128205128205, "high_lr": 0.00015842105263157892, "low_lr": 3.168421052631579e-06, "step": 1599 }, { "epoch": 4.205128205128205, "high_lr": 0.00015842105263157892, "low_lr": 3.168421052631579e-06, "step": 1599 }, { "epoch": 4.205128205128205, "high_lr": 0.00015842105263157892, "low_lr": 3.168421052631579e-06, "step": 1599 }, { "epoch": 4.205128205128205, "high_lr": 0.00015842105263157892, "low_lr": 3.168421052631579e-06, "step": 1599 }, { "epoch": 4.205128205128205, "high_lr": 0.00015842105263157892, "low_lr": 3.168421052631579e-06, "step": 1599 }, { "epoch": 4.2077580539119, "grad_norm": 1.5567339658737183, "learning_rate": 0.00015789473684210527, "loss": 1.2299, "step": 1600 }, { "epoch": 4.2077580539119, "high_lr": 0.00015789473684210527, "low_lr": 3.157894736842105e-06, "step": 1600 }, { "epoch": 4.2077580539119, "high_lr": 0.00015789473684210527, "low_lr": 3.157894736842105e-06, "step": 1600 }, { "epoch": 4.2077580539119, "high_lr": 0.00015789473684210527, "low_lr": 3.157894736842105e-06, "step": 1600 }, { "epoch": 4.2077580539119, "high_lr": 0.00015789473684210527, "low_lr": 3.157894736842105e-06, "step": 1600 }, { "epoch": 4.2077580539119, "high_lr": 0.00015789473684210527, "low_lr": 3.157894736842105e-06, "step": 1600 }, { "epoch": 4.2077580539119, "high_lr": 0.00015789473684210527, "low_lr": 3.157894736842105e-06, "step": 1600 }, { "epoch": 4.2077580539119, "high_lr": 0.00015789473684210527, "low_lr": 3.157894736842105e-06, "step": 1600 }, { "epoch": 4.2077580539119, "high_lr": 0.00015789473684210527, "low_lr": 3.157894736842105e-06, "step": 1600 }, { "epoch": 4.210387902695595, "grad_norm": 1.6771581172943115, "learning_rate": 0.00015736842105263158, "loss": 1.2032, "step": 1601 }, { "epoch": 4.210387902695595, "high_lr": 0.00015736842105263158, "low_lr": 3.147368421052632e-06, "step": 1601 }, { "epoch": 4.210387902695595, "high_lr": 0.00015736842105263158, "low_lr": 3.147368421052632e-06, "step": 1601 }, { "epoch": 4.210387902695595, "high_lr": 0.00015736842105263158, "low_lr": 3.147368421052632e-06, "step": 1601 }, { "epoch": 4.210387902695595, "high_lr": 0.00015736842105263158, "low_lr": 3.147368421052632e-06, "step": 1601 }, { "epoch": 4.210387902695595, "high_lr": 0.00015736842105263158, "low_lr": 3.147368421052632e-06, "step": 1601 }, { "epoch": 4.210387902695595, "high_lr": 0.00015736842105263158, "low_lr": 3.147368421052632e-06, "step": 1601 }, { "epoch": 4.210387902695595, "high_lr": 0.00015736842105263158, "low_lr": 3.147368421052632e-06, "step": 1601 }, { "epoch": 4.210387902695595, "high_lr": 0.00015736842105263158, "low_lr": 3.147368421052632e-06, "step": 1601 }, { "epoch": 4.21301775147929, "grad_norm": 1.4878536462783813, "learning_rate": 0.0001568421052631579, "loss": 1.1945, "step": 1602 }, { "epoch": 4.21301775147929, "high_lr": 0.0001568421052631579, "low_lr": 3.1368421052631582e-06, "step": 1602 }, { "epoch": 4.21301775147929, "high_lr": 0.0001568421052631579, "low_lr": 3.1368421052631582e-06, "step": 1602 }, { "epoch": 4.21301775147929, "high_lr": 0.0001568421052631579, "low_lr": 3.1368421052631582e-06, "step": 1602 }, { "epoch": 4.21301775147929, "high_lr": 0.0001568421052631579, "low_lr": 3.1368421052631582e-06, "step": 1602 }, { "epoch": 4.21301775147929, "high_lr": 0.0001568421052631579, "low_lr": 3.1368421052631582e-06, "step": 1602 }, { "epoch": 4.21301775147929, "high_lr": 0.0001568421052631579, "low_lr": 3.1368421052631582e-06, "step": 1602 }, { "epoch": 4.21301775147929, "high_lr": 0.0001568421052631579, "low_lr": 3.1368421052631582e-06, "step": 1602 }, { "epoch": 4.21301775147929, "high_lr": 0.0001568421052631579, "low_lr": 3.1368421052631582e-06, "step": 1602 }, { "epoch": 4.215647600262985, "grad_norm": 1.6874127388000488, "learning_rate": 0.0001563157894736842, "loss": 1.1673, "step": 1603 }, { "epoch": 4.215647600262985, "high_lr": 0.0001563157894736842, "low_lr": 3.1263157894736846e-06, "step": 1603 }, { "epoch": 4.215647600262985, "high_lr": 0.0001563157894736842, "low_lr": 3.1263157894736846e-06, "step": 1603 }, { "epoch": 4.215647600262985, "high_lr": 0.0001563157894736842, "low_lr": 3.1263157894736846e-06, "step": 1603 }, { "epoch": 4.215647600262985, "high_lr": 0.0001563157894736842, "low_lr": 3.1263157894736846e-06, "step": 1603 }, { "epoch": 4.215647600262985, "high_lr": 0.0001563157894736842, "low_lr": 3.1263157894736846e-06, "step": 1603 }, { "epoch": 4.215647600262985, "high_lr": 0.0001563157894736842, "low_lr": 3.1263157894736846e-06, "step": 1603 }, { "epoch": 4.215647600262985, "high_lr": 0.0001563157894736842, "low_lr": 3.1263157894736846e-06, "step": 1603 }, { "epoch": 4.215647600262985, "high_lr": 0.0001563157894736842, "low_lr": 3.1263157894736846e-06, "step": 1603 }, { "epoch": 4.2182774490466795, "grad_norm": 1.5085018873214722, "learning_rate": 0.00015578947368421054, "loss": 1.2319, "step": 1604 }, { "epoch": 4.2182774490466795, "high_lr": 0.00015578947368421054, "low_lr": 3.115789473684211e-06, "step": 1604 }, { "epoch": 4.2182774490466795, "high_lr": 0.00015578947368421054, "low_lr": 3.115789473684211e-06, "step": 1604 }, { "epoch": 4.2182774490466795, "high_lr": 0.00015578947368421054, "low_lr": 3.115789473684211e-06, "step": 1604 }, { "epoch": 4.2182774490466795, "high_lr": 0.00015578947368421054, "low_lr": 3.115789473684211e-06, "step": 1604 }, { "epoch": 4.2182774490466795, "high_lr": 0.00015578947368421054, "low_lr": 3.115789473684211e-06, "step": 1604 }, { "epoch": 4.2182774490466795, "high_lr": 0.00015578947368421054, "low_lr": 3.115789473684211e-06, "step": 1604 }, { "epoch": 4.2182774490466795, "high_lr": 0.00015578947368421054, "low_lr": 3.115789473684211e-06, "step": 1604 }, { "epoch": 4.2182774490466795, "high_lr": 0.00015578947368421054, "low_lr": 3.115789473684211e-06, "step": 1604 }, { "epoch": 4.220907297830375, "grad_norm": 1.5692294836044312, "learning_rate": 0.00015526315789473686, "loss": 1.1995, "step": 1605 }, { "epoch": 4.220907297830375, "high_lr": 0.00015526315789473686, "low_lr": 3.1052631578947372e-06, "step": 1605 }, { "epoch": 4.220907297830375, "high_lr": 0.00015526315789473686, "low_lr": 3.1052631578947372e-06, "step": 1605 }, { "epoch": 4.220907297830375, "high_lr": 0.00015526315789473686, "low_lr": 3.1052631578947372e-06, "step": 1605 }, { "epoch": 4.220907297830375, "high_lr": 0.00015526315789473686, "low_lr": 3.1052631578947372e-06, "step": 1605 }, { "epoch": 4.220907297830375, "high_lr": 0.00015526315789473686, "low_lr": 3.1052631578947372e-06, "step": 1605 }, { "epoch": 4.220907297830375, "high_lr": 0.00015526315789473686, "low_lr": 3.1052631578947372e-06, "step": 1605 }, { "epoch": 4.220907297830375, "high_lr": 0.00015526315789473686, "low_lr": 3.1052631578947372e-06, "step": 1605 }, { "epoch": 4.220907297830375, "high_lr": 0.00015526315789473686, "low_lr": 3.1052631578947372e-06, "step": 1605 }, { "epoch": 4.22353714661407, "grad_norm": 1.49501633644104, "learning_rate": 0.00015473684210526314, "loss": 1.2792, "step": 1606 }, { "epoch": 4.22353714661407, "high_lr": 0.00015473684210526314, "low_lr": 3.094736842105263e-06, "step": 1606 }, { "epoch": 4.22353714661407, "high_lr": 0.00015473684210526314, "low_lr": 3.094736842105263e-06, "step": 1606 }, { "epoch": 4.22353714661407, "high_lr": 0.00015473684210526314, "low_lr": 3.094736842105263e-06, "step": 1606 }, { "epoch": 4.22353714661407, "high_lr": 0.00015473684210526314, "low_lr": 3.094736842105263e-06, "step": 1606 }, { "epoch": 4.22353714661407, "high_lr": 0.00015473684210526314, "low_lr": 3.094736842105263e-06, "step": 1606 }, { "epoch": 4.22353714661407, "high_lr": 0.00015473684210526314, "low_lr": 3.094736842105263e-06, "step": 1606 }, { "epoch": 4.22353714661407, "high_lr": 0.00015473684210526314, "low_lr": 3.094736842105263e-06, "step": 1606 }, { "epoch": 4.22353714661407, "high_lr": 0.00015473684210526314, "low_lr": 3.094736842105263e-06, "step": 1606 }, { "epoch": 4.226166995397764, "grad_norm": 1.5470064878463745, "learning_rate": 0.00015421052631578946, "loss": 1.2105, "step": 1607 }, { "epoch": 4.226166995397764, "high_lr": 0.00015421052631578946, "low_lr": 3.0842105263157895e-06, "step": 1607 }, { "epoch": 4.226166995397764, "high_lr": 0.00015421052631578946, "low_lr": 3.0842105263157895e-06, "step": 1607 }, { "epoch": 4.226166995397764, "high_lr": 0.00015421052631578946, "low_lr": 3.0842105263157895e-06, "step": 1607 }, { "epoch": 4.226166995397764, "high_lr": 0.00015421052631578946, "low_lr": 3.0842105263157895e-06, "step": 1607 }, { "epoch": 4.226166995397764, "high_lr": 0.00015421052631578946, "low_lr": 3.0842105263157895e-06, "step": 1607 }, { "epoch": 4.226166995397764, "high_lr": 0.00015421052631578946, "low_lr": 3.0842105263157895e-06, "step": 1607 }, { "epoch": 4.226166995397764, "high_lr": 0.00015421052631578946, "low_lr": 3.0842105263157895e-06, "step": 1607 }, { "epoch": 4.226166995397764, "high_lr": 0.00015421052631578946, "low_lr": 3.0842105263157895e-06, "step": 1607 }, { "epoch": 4.22879684418146, "grad_norm": 1.4905816316604614, "learning_rate": 0.0001536842105263158, "loss": 1.266, "step": 1608 }, { "epoch": 4.22879684418146, "high_lr": 0.0001536842105263158, "low_lr": 3.0736842105263158e-06, "step": 1608 }, { "epoch": 4.22879684418146, "high_lr": 0.0001536842105263158, "low_lr": 3.0736842105263158e-06, "step": 1608 }, { "epoch": 4.22879684418146, "high_lr": 0.0001536842105263158, "low_lr": 3.0736842105263158e-06, "step": 1608 }, { "epoch": 4.22879684418146, "high_lr": 0.0001536842105263158, "low_lr": 3.0736842105263158e-06, "step": 1608 }, { "epoch": 4.22879684418146, "high_lr": 0.0001536842105263158, "low_lr": 3.0736842105263158e-06, "step": 1608 }, { "epoch": 4.22879684418146, "high_lr": 0.0001536842105263158, "low_lr": 3.0736842105263158e-06, "step": 1608 }, { "epoch": 4.22879684418146, "high_lr": 0.0001536842105263158, "low_lr": 3.0736842105263158e-06, "step": 1608 }, { "epoch": 4.22879684418146, "high_lr": 0.0001536842105263158, "low_lr": 3.0736842105263158e-06, "step": 1608 }, { "epoch": 4.2314266929651545, "grad_norm": 1.675032615661621, "learning_rate": 0.0001531578947368421, "loss": 1.2316, "step": 1609 }, { "epoch": 4.2314266929651545, "high_lr": 0.0001531578947368421, "low_lr": 3.0631578947368425e-06, "step": 1609 }, { "epoch": 4.2314266929651545, "high_lr": 0.0001531578947368421, "low_lr": 3.0631578947368425e-06, "step": 1609 }, { "epoch": 4.2314266929651545, "high_lr": 0.0001531578947368421, "low_lr": 3.0631578947368425e-06, "step": 1609 }, { "epoch": 4.2314266929651545, "high_lr": 0.0001531578947368421, "low_lr": 3.0631578947368425e-06, "step": 1609 }, { "epoch": 4.2314266929651545, "high_lr": 0.0001531578947368421, "low_lr": 3.0631578947368425e-06, "step": 1609 }, { "epoch": 4.2314266929651545, "high_lr": 0.0001531578947368421, "low_lr": 3.0631578947368425e-06, "step": 1609 }, { "epoch": 4.2314266929651545, "high_lr": 0.0001531578947368421, "low_lr": 3.0631578947368425e-06, "step": 1609 }, { "epoch": 4.2314266929651545, "high_lr": 0.0001531578947368421, "low_lr": 3.0631578947368425e-06, "step": 1609 }, { "epoch": 4.234056541748849, "grad_norm": 1.5411441326141357, "learning_rate": 0.00015263157894736842, "loss": 1.229, "step": 1610 }, { "epoch": 4.234056541748849, "high_lr": 0.00015263157894736842, "low_lr": 3.052631578947369e-06, "step": 1610 }, { "epoch": 4.234056541748849, "high_lr": 0.00015263157894736842, "low_lr": 3.052631578947369e-06, "step": 1610 }, { "epoch": 4.234056541748849, "high_lr": 0.00015263157894736842, "low_lr": 3.052631578947369e-06, "step": 1610 }, { "epoch": 4.234056541748849, "high_lr": 0.00015263157894736842, "low_lr": 3.052631578947369e-06, "step": 1610 }, { "epoch": 4.234056541748849, "high_lr": 0.00015263157894736842, "low_lr": 3.052631578947369e-06, "step": 1610 }, { "epoch": 4.234056541748849, "high_lr": 0.00015263157894736842, "low_lr": 3.052631578947369e-06, "step": 1610 }, { "epoch": 4.234056541748849, "high_lr": 0.00015263157894736842, "low_lr": 3.052631578947369e-06, "step": 1610 }, { "epoch": 4.234056541748849, "high_lr": 0.00015263157894736842, "low_lr": 3.052631578947369e-06, "step": 1610 }, { "epoch": 4.236686390532545, "grad_norm": 1.406537413597107, "learning_rate": 0.00015210526315789473, "loss": 1.2178, "step": 1611 }, { "epoch": 4.236686390532545, "high_lr": 0.00015210526315789473, "low_lr": 3.042105263157895e-06, "step": 1611 }, { "epoch": 4.236686390532545, "high_lr": 0.00015210526315789473, "low_lr": 3.042105263157895e-06, "step": 1611 }, { "epoch": 4.236686390532545, "high_lr": 0.00015210526315789473, "low_lr": 3.042105263157895e-06, "step": 1611 }, { "epoch": 4.236686390532545, "high_lr": 0.00015210526315789473, "low_lr": 3.042105263157895e-06, "step": 1611 }, { "epoch": 4.236686390532545, "high_lr": 0.00015210526315789473, "low_lr": 3.042105263157895e-06, "step": 1611 }, { "epoch": 4.236686390532545, "high_lr": 0.00015210526315789473, "low_lr": 3.042105263157895e-06, "step": 1611 }, { "epoch": 4.236686390532545, "high_lr": 0.00015210526315789473, "low_lr": 3.042105263157895e-06, "step": 1611 }, { "epoch": 4.236686390532545, "high_lr": 0.00015210526315789473, "low_lr": 3.042105263157895e-06, "step": 1611 }, { "epoch": 4.239316239316239, "grad_norm": 1.6038017272949219, "learning_rate": 0.00015157894736842108, "loss": 1.2459, "step": 1612 }, { "epoch": 4.239316239316239, "high_lr": 0.00015157894736842108, "low_lr": 3.0315789473684215e-06, "step": 1612 }, { "epoch": 4.239316239316239, "high_lr": 0.00015157894736842108, "low_lr": 3.0315789473684215e-06, "step": 1612 }, { "epoch": 4.239316239316239, "high_lr": 0.00015157894736842108, "low_lr": 3.0315789473684215e-06, "step": 1612 }, { "epoch": 4.239316239316239, "high_lr": 0.00015157894736842108, "low_lr": 3.0315789473684215e-06, "step": 1612 }, { "epoch": 4.239316239316239, "high_lr": 0.00015157894736842108, "low_lr": 3.0315789473684215e-06, "step": 1612 }, { "epoch": 4.239316239316239, "high_lr": 0.00015157894736842108, "low_lr": 3.0315789473684215e-06, "step": 1612 }, { "epoch": 4.239316239316239, "high_lr": 0.00015157894736842108, "low_lr": 3.0315789473684215e-06, "step": 1612 }, { "epoch": 4.239316239316239, "high_lr": 0.00015157894736842108, "low_lr": 3.0315789473684215e-06, "step": 1612 }, { "epoch": 4.241946088099934, "grad_norm": 1.4717649221420288, "learning_rate": 0.00015105263157894736, "loss": 1.2447, "step": 1613 }, { "epoch": 4.241946088099934, "high_lr": 0.00015105263157894736, "low_lr": 3.0210526315789474e-06, "step": 1613 }, { "epoch": 4.241946088099934, "high_lr": 0.00015105263157894736, "low_lr": 3.0210526315789474e-06, "step": 1613 }, { "epoch": 4.241946088099934, "high_lr": 0.00015105263157894736, "low_lr": 3.0210526315789474e-06, "step": 1613 }, { "epoch": 4.241946088099934, "high_lr": 0.00015105263157894736, "low_lr": 3.0210526315789474e-06, "step": 1613 }, { "epoch": 4.241946088099934, "high_lr": 0.00015105263157894736, "low_lr": 3.0210526315789474e-06, "step": 1613 }, { "epoch": 4.241946088099934, "high_lr": 0.00015105263157894736, "low_lr": 3.0210526315789474e-06, "step": 1613 }, { "epoch": 4.241946088099934, "high_lr": 0.00015105263157894736, "low_lr": 3.0210526315789474e-06, "step": 1613 }, { "epoch": 4.241946088099934, "high_lr": 0.00015105263157894736, "low_lr": 3.0210526315789474e-06, "step": 1613 }, { "epoch": 4.2445759368836296, "grad_norm": 1.6775355339050293, "learning_rate": 0.00015052631578947367, "loss": 1.2372, "step": 1614 }, { "epoch": 4.2445759368836296, "high_lr": 0.00015052631578947367, "low_lr": 3.0105263157894737e-06, "step": 1614 }, { "epoch": 4.2445759368836296, "high_lr": 0.00015052631578947367, "low_lr": 3.0105263157894737e-06, "step": 1614 }, { "epoch": 4.2445759368836296, "high_lr": 0.00015052631578947367, "low_lr": 3.0105263157894737e-06, "step": 1614 }, { "epoch": 4.2445759368836296, "high_lr": 0.00015052631578947367, "low_lr": 3.0105263157894737e-06, "step": 1614 }, { "epoch": 4.2445759368836296, "high_lr": 0.00015052631578947367, "low_lr": 3.0105263157894737e-06, "step": 1614 }, { "epoch": 4.2445759368836296, "high_lr": 0.00015052631578947367, "low_lr": 3.0105263157894737e-06, "step": 1614 }, { "epoch": 4.2445759368836296, "high_lr": 0.00015052631578947367, "low_lr": 3.0105263157894737e-06, "step": 1614 }, { "epoch": 4.2445759368836296, "high_lr": 0.00015052631578947367, "low_lr": 3.0105263157894737e-06, "step": 1614 }, { "epoch": 4.247205785667324, "grad_norm": 1.6113718748092651, "learning_rate": 0.00015, "loss": 1.2275, "step": 1615 }, { "epoch": 4.247205785667324, "high_lr": 0.00015, "low_lr": 3e-06, "step": 1615 }, { "epoch": 4.247205785667324, "high_lr": 0.00015, "low_lr": 3e-06, "step": 1615 }, { "epoch": 4.247205785667324, "high_lr": 0.00015, "low_lr": 3e-06, "step": 1615 }, { "epoch": 4.247205785667324, "high_lr": 0.00015, "low_lr": 3e-06, "step": 1615 }, { "epoch": 4.247205785667324, "high_lr": 0.00015, "low_lr": 3e-06, "step": 1615 }, { "epoch": 4.247205785667324, "high_lr": 0.00015, "low_lr": 3e-06, "step": 1615 }, { "epoch": 4.247205785667324, "high_lr": 0.00015, "low_lr": 3e-06, "step": 1615 }, { "epoch": 4.247205785667324, "high_lr": 0.00015, "low_lr": 3e-06, "step": 1615 }, { "epoch": 4.249835634451019, "grad_norm": 1.4303596019744873, "learning_rate": 0.00014947368421052633, "loss": 1.173, "step": 1616 }, { "epoch": 4.249835634451019, "high_lr": 0.00014947368421052633, "low_lr": 2.9894736842105264e-06, "step": 1616 }, { "epoch": 4.249835634451019, "high_lr": 0.00014947368421052633, "low_lr": 2.9894736842105264e-06, "step": 1616 }, { "epoch": 4.249835634451019, "high_lr": 0.00014947368421052633, "low_lr": 2.9894736842105264e-06, "step": 1616 }, { "epoch": 4.249835634451019, "high_lr": 0.00014947368421052633, "low_lr": 2.9894736842105264e-06, "step": 1616 }, { "epoch": 4.249835634451019, "high_lr": 0.00014947368421052633, "low_lr": 2.9894736842105264e-06, "step": 1616 }, { "epoch": 4.249835634451019, "high_lr": 0.00014947368421052633, "low_lr": 2.9894736842105264e-06, "step": 1616 }, { "epoch": 4.249835634451019, "high_lr": 0.00014947368421052633, "low_lr": 2.9894736842105264e-06, "step": 1616 }, { "epoch": 4.249835634451019, "high_lr": 0.00014947368421052633, "low_lr": 2.9894736842105264e-06, "step": 1616 }, { "epoch": 4.252465483234714, "grad_norm": 1.557103157043457, "learning_rate": 0.00014894736842105264, "loss": 1.2103, "step": 1617 }, { "epoch": 4.252465483234714, "high_lr": 0.00014894736842105264, "low_lr": 2.9789473684210527e-06, "step": 1617 }, { "epoch": 4.252465483234714, "high_lr": 0.00014894736842105264, "low_lr": 2.9789473684210527e-06, "step": 1617 }, { "epoch": 4.252465483234714, "high_lr": 0.00014894736842105264, "low_lr": 2.9789473684210527e-06, "step": 1617 }, { "epoch": 4.252465483234714, "high_lr": 0.00014894736842105264, "low_lr": 2.9789473684210527e-06, "step": 1617 }, { "epoch": 4.252465483234714, "high_lr": 0.00014894736842105264, "low_lr": 2.9789473684210527e-06, "step": 1617 }, { "epoch": 4.252465483234714, "high_lr": 0.00014894736842105264, "low_lr": 2.9789473684210527e-06, "step": 1617 }, { "epoch": 4.252465483234714, "high_lr": 0.00014894736842105264, "low_lr": 2.9789473684210527e-06, "step": 1617 }, { "epoch": 4.252465483234714, "high_lr": 0.00014894736842105264, "low_lr": 2.9789473684210527e-06, "step": 1617 }, { "epoch": 4.255095332018409, "grad_norm": 1.4532480239868164, "learning_rate": 0.00014842105263157895, "loss": 1.239, "step": 1618 }, { "epoch": 4.255095332018409, "high_lr": 0.00014842105263157895, "low_lr": 2.9684210526315795e-06, "step": 1618 }, { "epoch": 4.255095332018409, "high_lr": 0.00014842105263157895, "low_lr": 2.9684210526315795e-06, "step": 1618 }, { "epoch": 4.255095332018409, "high_lr": 0.00014842105263157895, "low_lr": 2.9684210526315795e-06, "step": 1618 }, { "epoch": 4.255095332018409, "high_lr": 0.00014842105263157895, "low_lr": 2.9684210526315795e-06, "step": 1618 }, { "epoch": 4.255095332018409, "high_lr": 0.00014842105263157895, "low_lr": 2.9684210526315795e-06, "step": 1618 }, { "epoch": 4.255095332018409, "high_lr": 0.00014842105263157895, "low_lr": 2.9684210526315795e-06, "step": 1618 }, { "epoch": 4.255095332018409, "high_lr": 0.00014842105263157895, "low_lr": 2.9684210526315795e-06, "step": 1618 }, { "epoch": 4.255095332018409, "high_lr": 0.00014842105263157895, "low_lr": 2.9684210526315795e-06, "step": 1618 }, { "epoch": 4.257725180802104, "grad_norm": 1.565211296081543, "learning_rate": 0.00014789473684210527, "loss": 1.2053, "step": 1619 }, { "epoch": 4.257725180802104, "high_lr": 0.00014789473684210527, "low_lr": 2.957894736842106e-06, "step": 1619 }, { "epoch": 4.257725180802104, "high_lr": 0.00014789473684210527, "low_lr": 2.957894736842106e-06, "step": 1619 }, { "epoch": 4.257725180802104, "high_lr": 0.00014789473684210527, "low_lr": 2.957894736842106e-06, "step": 1619 }, { "epoch": 4.257725180802104, "high_lr": 0.00014789473684210527, "low_lr": 2.957894736842106e-06, "step": 1619 }, { "epoch": 4.257725180802104, "high_lr": 0.00014789473684210527, "low_lr": 2.957894736842106e-06, "step": 1619 }, { "epoch": 4.257725180802104, "high_lr": 0.00014789473684210527, "low_lr": 2.957894736842106e-06, "step": 1619 }, { "epoch": 4.257725180802104, "high_lr": 0.00014789473684210527, "low_lr": 2.957894736842106e-06, "step": 1619 }, { "epoch": 4.257725180802104, "high_lr": 0.00014789473684210527, "low_lr": 2.957894736842106e-06, "step": 1619 }, { "epoch": 4.260355029585799, "grad_norm": 1.51058030128479, "learning_rate": 0.00014736842105263158, "loss": 1.2283, "step": 1620 }, { "epoch": 4.260355029585799, "high_lr": 0.00014736842105263158, "low_lr": 2.9473684210526317e-06, "step": 1620 }, { "epoch": 4.260355029585799, "high_lr": 0.00014736842105263158, "low_lr": 2.9473684210526317e-06, "step": 1620 }, { "epoch": 4.260355029585799, "high_lr": 0.00014736842105263158, "low_lr": 2.9473684210526317e-06, "step": 1620 }, { "epoch": 4.260355029585799, "high_lr": 0.00014736842105263158, "low_lr": 2.9473684210526317e-06, "step": 1620 }, { "epoch": 4.260355029585799, "high_lr": 0.00014736842105263158, "low_lr": 2.9473684210526317e-06, "step": 1620 }, { "epoch": 4.260355029585799, "high_lr": 0.00014736842105263158, "low_lr": 2.9473684210526317e-06, "step": 1620 }, { "epoch": 4.260355029585799, "high_lr": 0.00014736842105263158, "low_lr": 2.9473684210526317e-06, "step": 1620 }, { "epoch": 4.260355029585799, "high_lr": 0.00014736842105263158, "low_lr": 2.9473684210526317e-06, "step": 1620 }, { "epoch": 4.262984878369494, "grad_norm": 1.89518141746521, "learning_rate": 0.0001468421052631579, "loss": 1.1705, "step": 1621 }, { "epoch": 4.262984878369494, "high_lr": 0.0001468421052631579, "low_lr": 2.936842105263158e-06, "step": 1621 }, { "epoch": 4.262984878369494, "high_lr": 0.0001468421052631579, "low_lr": 2.936842105263158e-06, "step": 1621 }, { "epoch": 4.262984878369494, "high_lr": 0.0001468421052631579, "low_lr": 2.936842105263158e-06, "step": 1621 }, { "epoch": 4.262984878369494, "high_lr": 0.0001468421052631579, "low_lr": 2.936842105263158e-06, "step": 1621 }, { "epoch": 4.262984878369494, "high_lr": 0.0001468421052631579, "low_lr": 2.936842105263158e-06, "step": 1621 }, { "epoch": 4.262984878369494, "high_lr": 0.0001468421052631579, "low_lr": 2.936842105263158e-06, "step": 1621 }, { "epoch": 4.262984878369494, "high_lr": 0.0001468421052631579, "low_lr": 2.936842105263158e-06, "step": 1621 }, { "epoch": 4.262984878369494, "high_lr": 0.0001468421052631579, "low_lr": 2.936842105263158e-06, "step": 1621 }, { "epoch": 4.265614727153189, "grad_norm": 1.4794954061508179, "learning_rate": 0.0001463157894736842, "loss": 1.174, "step": 1622 }, { "epoch": 4.265614727153189, "high_lr": 0.0001463157894736842, "low_lr": 2.9263157894736844e-06, "step": 1622 }, { "epoch": 4.265614727153189, "high_lr": 0.0001463157894736842, "low_lr": 2.9263157894736844e-06, "step": 1622 }, { "epoch": 4.265614727153189, "high_lr": 0.0001463157894736842, "low_lr": 2.9263157894736844e-06, "step": 1622 }, { "epoch": 4.265614727153189, "high_lr": 0.0001463157894736842, "low_lr": 2.9263157894736844e-06, "step": 1622 }, { "epoch": 4.265614727153189, "high_lr": 0.0001463157894736842, "low_lr": 2.9263157894736844e-06, "step": 1622 }, { "epoch": 4.265614727153189, "high_lr": 0.0001463157894736842, "low_lr": 2.9263157894736844e-06, "step": 1622 }, { "epoch": 4.265614727153189, "high_lr": 0.0001463157894736842, "low_lr": 2.9263157894736844e-06, "step": 1622 }, { "epoch": 4.265614727153189, "high_lr": 0.0001463157894736842, "low_lr": 2.9263157894736844e-06, "step": 1622 }, { "epoch": 4.268244575936883, "grad_norm": 1.5923118591308594, "learning_rate": 0.00014578947368421052, "loss": 1.2263, "step": 1623 }, { "epoch": 4.268244575936883, "high_lr": 0.00014578947368421052, "low_lr": 2.9157894736842107e-06, "step": 1623 }, { "epoch": 4.268244575936883, "high_lr": 0.00014578947368421052, "low_lr": 2.9157894736842107e-06, "step": 1623 }, { "epoch": 4.268244575936883, "high_lr": 0.00014578947368421052, "low_lr": 2.9157894736842107e-06, "step": 1623 }, { "epoch": 4.268244575936883, "high_lr": 0.00014578947368421052, "low_lr": 2.9157894736842107e-06, "step": 1623 }, { "epoch": 4.268244575936883, "high_lr": 0.00014578947368421052, "low_lr": 2.9157894736842107e-06, "step": 1623 }, { "epoch": 4.268244575936883, "high_lr": 0.00014578947368421052, "low_lr": 2.9157894736842107e-06, "step": 1623 }, { "epoch": 4.268244575936883, "high_lr": 0.00014578947368421052, "low_lr": 2.9157894736842107e-06, "step": 1623 }, { "epoch": 4.268244575936883, "high_lr": 0.00014578947368421052, "low_lr": 2.9157894736842107e-06, "step": 1623 }, { "epoch": 4.270874424720579, "grad_norm": 1.5842251777648926, "learning_rate": 0.00014526315789473686, "loss": 1.1803, "step": 1624 }, { "epoch": 4.270874424720579, "high_lr": 0.00014526315789473686, "low_lr": 2.905263157894737e-06, "step": 1624 }, { "epoch": 4.270874424720579, "high_lr": 0.00014526315789473686, "low_lr": 2.905263157894737e-06, "step": 1624 }, { "epoch": 4.270874424720579, "high_lr": 0.00014526315789473686, "low_lr": 2.905263157894737e-06, "step": 1624 }, { "epoch": 4.270874424720579, "high_lr": 0.00014526315789473686, "low_lr": 2.905263157894737e-06, "step": 1624 }, { "epoch": 4.270874424720579, "high_lr": 0.00014526315789473686, "low_lr": 2.905263157894737e-06, "step": 1624 }, { "epoch": 4.270874424720579, "high_lr": 0.00014526315789473686, "low_lr": 2.905263157894737e-06, "step": 1624 }, { "epoch": 4.270874424720579, "high_lr": 0.00014526315789473686, "low_lr": 2.905263157894737e-06, "step": 1624 }, { "epoch": 4.270874424720579, "high_lr": 0.00014526315789473686, "low_lr": 2.905263157894737e-06, "step": 1624 }, { "epoch": 4.273504273504273, "grad_norm": 1.6386295557022095, "learning_rate": 0.00014473684210526317, "loss": 1.2707, "step": 1625 }, { "epoch": 4.273504273504273, "high_lr": 0.00014473684210526317, "low_lr": 2.8947368421052634e-06, "step": 1625 }, { "epoch": 4.273504273504273, "high_lr": 0.00014473684210526317, "low_lr": 2.8947368421052634e-06, "step": 1625 }, { "epoch": 4.273504273504273, "high_lr": 0.00014473684210526317, "low_lr": 2.8947368421052634e-06, "step": 1625 }, { "epoch": 4.273504273504273, "high_lr": 0.00014473684210526317, "low_lr": 2.8947368421052634e-06, "step": 1625 }, { "epoch": 4.273504273504273, "high_lr": 0.00014473684210526317, "low_lr": 2.8947368421052634e-06, "step": 1625 }, { "epoch": 4.273504273504273, "high_lr": 0.00014473684210526317, "low_lr": 2.8947368421052634e-06, "step": 1625 }, { "epoch": 4.273504273504273, "high_lr": 0.00014473684210526317, "low_lr": 2.8947368421052634e-06, "step": 1625 }, { "epoch": 4.273504273504273, "high_lr": 0.00014473684210526317, "low_lr": 2.8947368421052634e-06, "step": 1625 }, { "epoch": 4.276134122287968, "grad_norm": 1.6175718307495117, "learning_rate": 0.00014421052631578948, "loss": 1.169, "step": 1626 }, { "epoch": 4.276134122287968, "high_lr": 0.00014421052631578948, "low_lr": 2.88421052631579e-06, "step": 1626 }, { "epoch": 4.276134122287968, "high_lr": 0.00014421052631578948, "low_lr": 2.88421052631579e-06, "step": 1626 }, { "epoch": 4.276134122287968, "high_lr": 0.00014421052631578948, "low_lr": 2.88421052631579e-06, "step": 1626 }, { "epoch": 4.276134122287968, "high_lr": 0.00014421052631578948, "low_lr": 2.88421052631579e-06, "step": 1626 }, { "epoch": 4.276134122287968, "high_lr": 0.00014421052631578948, "low_lr": 2.88421052631579e-06, "step": 1626 }, { "epoch": 4.276134122287968, "high_lr": 0.00014421052631578948, "low_lr": 2.88421052631579e-06, "step": 1626 }, { "epoch": 4.276134122287968, "high_lr": 0.00014421052631578948, "low_lr": 2.88421052631579e-06, "step": 1626 }, { "epoch": 4.276134122287968, "high_lr": 0.00014421052631578948, "low_lr": 2.88421052631579e-06, "step": 1626 }, { "epoch": 4.278763971071664, "grad_norm": 1.539567232131958, "learning_rate": 0.0001436842105263158, "loss": 1.224, "step": 1627 }, { "epoch": 4.278763971071664, "high_lr": 0.0001436842105263158, "low_lr": 2.8736842105263164e-06, "step": 1627 }, { "epoch": 4.278763971071664, "high_lr": 0.0001436842105263158, "low_lr": 2.8736842105263164e-06, "step": 1627 }, { "epoch": 4.278763971071664, "high_lr": 0.0001436842105263158, "low_lr": 2.8736842105263164e-06, "step": 1627 }, { "epoch": 4.278763971071664, "high_lr": 0.0001436842105263158, "low_lr": 2.8736842105263164e-06, "step": 1627 }, { "epoch": 4.278763971071664, "high_lr": 0.0001436842105263158, "low_lr": 2.8736842105263164e-06, "step": 1627 }, { "epoch": 4.278763971071664, "high_lr": 0.0001436842105263158, "low_lr": 2.8736842105263164e-06, "step": 1627 }, { "epoch": 4.278763971071664, "high_lr": 0.0001436842105263158, "low_lr": 2.8736842105263164e-06, "step": 1627 }, { "epoch": 4.278763971071664, "high_lr": 0.0001436842105263158, "low_lr": 2.8736842105263164e-06, "step": 1627 }, { "epoch": 4.281393819855358, "grad_norm": 1.699150562286377, "learning_rate": 0.0001431578947368421, "loss": 1.2255, "step": 1628 }, { "epoch": 4.281393819855358, "high_lr": 0.0001431578947368421, "low_lr": 2.8631578947368423e-06, "step": 1628 }, { "epoch": 4.281393819855358, "high_lr": 0.0001431578947368421, "low_lr": 2.8631578947368423e-06, "step": 1628 }, { "epoch": 4.281393819855358, "high_lr": 0.0001431578947368421, "low_lr": 2.8631578947368423e-06, "step": 1628 }, { "epoch": 4.281393819855358, "high_lr": 0.0001431578947368421, "low_lr": 2.8631578947368423e-06, "step": 1628 }, { "epoch": 4.281393819855358, "high_lr": 0.0001431578947368421, "low_lr": 2.8631578947368423e-06, "step": 1628 }, { "epoch": 4.281393819855358, "high_lr": 0.0001431578947368421, "low_lr": 2.8631578947368423e-06, "step": 1628 }, { "epoch": 4.281393819855358, "high_lr": 0.0001431578947368421, "low_lr": 2.8631578947368423e-06, "step": 1628 }, { "epoch": 4.281393819855358, "high_lr": 0.0001431578947368421, "low_lr": 2.8631578947368423e-06, "step": 1628 }, { "epoch": 4.284023668639053, "grad_norm": 2.0216362476348877, "learning_rate": 0.00014263157894736842, "loss": 1.1561, "step": 1629 }, { "epoch": 4.284023668639053, "high_lr": 0.00014263157894736842, "low_lr": 2.8526315789473687e-06, "step": 1629 }, { "epoch": 4.284023668639053, "high_lr": 0.00014263157894736842, "low_lr": 2.8526315789473687e-06, "step": 1629 }, { "epoch": 4.284023668639053, "high_lr": 0.00014263157894736842, "low_lr": 2.8526315789473687e-06, "step": 1629 }, { "epoch": 4.284023668639053, "high_lr": 0.00014263157894736842, "low_lr": 2.8526315789473687e-06, "step": 1629 }, { "epoch": 4.284023668639053, "high_lr": 0.00014263157894736842, "low_lr": 2.8526315789473687e-06, "step": 1629 }, { "epoch": 4.284023668639053, "high_lr": 0.00014263157894736842, "low_lr": 2.8526315789473687e-06, "step": 1629 }, { "epoch": 4.284023668639053, "high_lr": 0.00014263157894736842, "low_lr": 2.8526315789473687e-06, "step": 1629 }, { "epoch": 4.284023668639053, "high_lr": 0.00014263157894736842, "low_lr": 2.8526315789473687e-06, "step": 1629 }, { "epoch": 4.2866535174227485, "grad_norm": 1.4835139513015747, "learning_rate": 0.00014210526315789474, "loss": 1.2143, "step": 1630 }, { "epoch": 4.2866535174227485, "high_lr": 0.00014210526315789474, "low_lr": 2.842105263157895e-06, "step": 1630 }, { "epoch": 4.2866535174227485, "high_lr": 0.00014210526315789474, "low_lr": 2.842105263157895e-06, "step": 1630 }, { "epoch": 4.2866535174227485, "high_lr": 0.00014210526315789474, "low_lr": 2.842105263157895e-06, "step": 1630 }, { "epoch": 4.2866535174227485, "high_lr": 0.00014210526315789474, "low_lr": 2.842105263157895e-06, "step": 1630 }, { "epoch": 4.2866535174227485, "high_lr": 0.00014210526315789474, "low_lr": 2.842105263157895e-06, "step": 1630 }, { "epoch": 4.2866535174227485, "high_lr": 0.00014210526315789474, "low_lr": 2.842105263157895e-06, "step": 1630 }, { "epoch": 4.2866535174227485, "high_lr": 0.00014210526315789474, "low_lr": 2.842105263157895e-06, "step": 1630 }, { "epoch": 4.2866535174227485, "high_lr": 0.00014210526315789474, "low_lr": 2.842105263157895e-06, "step": 1630 }, { "epoch": 4.289283366206443, "grad_norm": 1.440755844116211, "learning_rate": 0.00014157894736842105, "loss": 1.223, "step": 1631 }, { "epoch": 4.289283366206443, "high_lr": 0.00014157894736842105, "low_lr": 2.8315789473684213e-06, "step": 1631 }, { "epoch": 4.289283366206443, "high_lr": 0.00014157894736842105, "low_lr": 2.8315789473684213e-06, "step": 1631 }, { "epoch": 4.289283366206443, "high_lr": 0.00014157894736842105, "low_lr": 2.8315789473684213e-06, "step": 1631 }, { "epoch": 4.289283366206443, "high_lr": 0.00014157894736842105, "low_lr": 2.8315789473684213e-06, "step": 1631 }, { "epoch": 4.289283366206443, "high_lr": 0.00014157894736842105, "low_lr": 2.8315789473684213e-06, "step": 1631 }, { "epoch": 4.289283366206443, "high_lr": 0.00014157894736842105, "low_lr": 2.8315789473684213e-06, "step": 1631 }, { "epoch": 4.289283366206443, "high_lr": 0.00014157894736842105, "low_lr": 2.8315789473684213e-06, "step": 1631 }, { "epoch": 4.289283366206443, "high_lr": 0.00014157894736842105, "low_lr": 2.8315789473684213e-06, "step": 1631 }, { "epoch": 4.291913214990138, "grad_norm": 1.5793685913085938, "learning_rate": 0.0001410526315789474, "loss": 1.2077, "step": 1632 }, { "epoch": 4.291913214990138, "high_lr": 0.0001410526315789474, "low_lr": 2.8210526315789476e-06, "step": 1632 }, { "epoch": 4.291913214990138, "high_lr": 0.0001410526315789474, "low_lr": 2.8210526315789476e-06, "step": 1632 }, { "epoch": 4.291913214990138, "high_lr": 0.0001410526315789474, "low_lr": 2.8210526315789476e-06, "step": 1632 }, { "epoch": 4.291913214990138, "high_lr": 0.0001410526315789474, "low_lr": 2.8210526315789476e-06, "step": 1632 }, { "epoch": 4.291913214990138, "high_lr": 0.0001410526315789474, "low_lr": 2.8210526315789476e-06, "step": 1632 }, { "epoch": 4.291913214990138, "high_lr": 0.0001410526315789474, "low_lr": 2.8210526315789476e-06, "step": 1632 }, { "epoch": 4.291913214990138, "high_lr": 0.0001410526315789474, "low_lr": 2.8210526315789476e-06, "step": 1632 }, { "epoch": 4.291913214990138, "high_lr": 0.0001410526315789474, "low_lr": 2.8210526315789476e-06, "step": 1632 }, { "epoch": 4.294543063773833, "grad_norm": 1.4733338356018066, "learning_rate": 0.0001405263157894737, "loss": 1.1838, "step": 1633 }, { "epoch": 4.294543063773833, "high_lr": 0.0001405263157894737, "low_lr": 2.810526315789474e-06, "step": 1633 }, { "epoch": 4.294543063773833, "high_lr": 0.0001405263157894737, "low_lr": 2.810526315789474e-06, "step": 1633 }, { "epoch": 4.294543063773833, "high_lr": 0.0001405263157894737, "low_lr": 2.810526315789474e-06, "step": 1633 }, { "epoch": 4.294543063773833, "high_lr": 0.0001405263157894737, "low_lr": 2.810526315789474e-06, "step": 1633 }, { "epoch": 4.294543063773833, "high_lr": 0.0001405263157894737, "low_lr": 2.810526315789474e-06, "step": 1633 }, { "epoch": 4.294543063773833, "high_lr": 0.0001405263157894737, "low_lr": 2.810526315789474e-06, "step": 1633 }, { "epoch": 4.294543063773833, "high_lr": 0.0001405263157894737, "low_lr": 2.810526315789474e-06, "step": 1633 }, { "epoch": 4.294543063773833, "high_lr": 0.0001405263157894737, "low_lr": 2.810526315789474e-06, "step": 1633 }, { "epoch": 4.297172912557528, "grad_norm": 1.656105875968933, "learning_rate": 0.00014000000000000001, "loss": 1.1924, "step": 1634 }, { "epoch": 4.297172912557528, "high_lr": 0.00014000000000000001, "low_lr": 2.8000000000000003e-06, "step": 1634 }, { "epoch": 4.297172912557528, "high_lr": 0.00014000000000000001, "low_lr": 2.8000000000000003e-06, "step": 1634 }, { "epoch": 4.297172912557528, "high_lr": 0.00014000000000000001, "low_lr": 2.8000000000000003e-06, "step": 1634 }, { "epoch": 4.297172912557528, "high_lr": 0.00014000000000000001, "low_lr": 2.8000000000000003e-06, "step": 1634 }, { "epoch": 4.297172912557528, "high_lr": 0.00014000000000000001, "low_lr": 2.8000000000000003e-06, "step": 1634 }, { "epoch": 4.297172912557528, "high_lr": 0.00014000000000000001, "low_lr": 2.8000000000000003e-06, "step": 1634 }, { "epoch": 4.297172912557528, "high_lr": 0.00014000000000000001, "low_lr": 2.8000000000000003e-06, "step": 1634 }, { "epoch": 4.297172912557528, "high_lr": 0.00014000000000000001, "low_lr": 2.8000000000000003e-06, "step": 1634 }, { "epoch": 4.299802761341223, "grad_norm": 1.5349594354629517, "learning_rate": 0.0001394736842105263, "loss": 1.2158, "step": 1635 }, { "epoch": 4.299802761341223, "high_lr": 0.0001394736842105263, "low_lr": 2.789473684210526e-06, "step": 1635 }, { "epoch": 4.299802761341223, "high_lr": 0.0001394736842105263, "low_lr": 2.789473684210526e-06, "step": 1635 }, { "epoch": 4.299802761341223, "high_lr": 0.0001394736842105263, "low_lr": 2.789473684210526e-06, "step": 1635 }, { "epoch": 4.299802761341223, "high_lr": 0.0001394736842105263, "low_lr": 2.789473684210526e-06, "step": 1635 }, { "epoch": 4.299802761341223, "high_lr": 0.0001394736842105263, "low_lr": 2.789473684210526e-06, "step": 1635 }, { "epoch": 4.299802761341223, "high_lr": 0.0001394736842105263, "low_lr": 2.789473684210526e-06, "step": 1635 }, { "epoch": 4.299802761341223, "high_lr": 0.0001394736842105263, "low_lr": 2.789473684210526e-06, "step": 1635 }, { "epoch": 4.299802761341223, "high_lr": 0.0001394736842105263, "low_lr": 2.789473684210526e-06, "step": 1635 }, { "epoch": 4.302432610124918, "grad_norm": 1.5845298767089844, "learning_rate": 0.00013894736842105264, "loss": 1.2199, "step": 1636 }, { "epoch": 4.302432610124918, "high_lr": 0.00013894736842105264, "low_lr": 2.7789473684210525e-06, "step": 1636 }, { "epoch": 4.302432610124918, "high_lr": 0.00013894736842105264, "low_lr": 2.7789473684210525e-06, "step": 1636 }, { "epoch": 4.302432610124918, "high_lr": 0.00013894736842105264, "low_lr": 2.7789473684210525e-06, "step": 1636 }, { "epoch": 4.302432610124918, "high_lr": 0.00013894736842105264, "low_lr": 2.7789473684210525e-06, "step": 1636 }, { "epoch": 4.302432610124918, "high_lr": 0.00013894736842105264, "low_lr": 2.7789473684210525e-06, "step": 1636 }, { "epoch": 4.302432610124918, "high_lr": 0.00013894736842105264, "low_lr": 2.7789473684210525e-06, "step": 1636 }, { "epoch": 4.302432610124918, "high_lr": 0.00013894736842105264, "low_lr": 2.7789473684210525e-06, "step": 1636 }, { "epoch": 4.302432610124918, "high_lr": 0.00013894736842105264, "low_lr": 2.7789473684210525e-06, "step": 1636 }, { "epoch": 4.305062458908613, "grad_norm": 1.5544193983078003, "learning_rate": 0.00013842105263157895, "loss": 1.1931, "step": 1637 }, { "epoch": 4.305062458908613, "high_lr": 0.00013842105263157895, "low_lr": 2.7684210526315793e-06, "step": 1637 }, { "epoch": 4.305062458908613, "high_lr": 0.00013842105263157895, "low_lr": 2.7684210526315793e-06, "step": 1637 }, { "epoch": 4.305062458908613, "high_lr": 0.00013842105263157895, "low_lr": 2.7684210526315793e-06, "step": 1637 }, { "epoch": 4.305062458908613, "high_lr": 0.00013842105263157895, "low_lr": 2.7684210526315793e-06, "step": 1637 }, { "epoch": 4.305062458908613, "high_lr": 0.00013842105263157895, "low_lr": 2.7684210526315793e-06, "step": 1637 }, { "epoch": 4.305062458908613, "high_lr": 0.00013842105263157895, "low_lr": 2.7684210526315793e-06, "step": 1637 }, { "epoch": 4.305062458908613, "high_lr": 0.00013842105263157895, "low_lr": 2.7684210526315793e-06, "step": 1637 }, { "epoch": 4.305062458908613, "high_lr": 0.00013842105263157895, "low_lr": 2.7684210526315793e-06, "step": 1637 }, { "epoch": 4.3076923076923075, "grad_norm": 1.5499837398529053, "learning_rate": 0.00013789473684210527, "loss": 1.1978, "step": 1638 }, { "epoch": 4.3076923076923075, "high_lr": 0.00013789473684210527, "low_lr": 2.7578947368421056e-06, "step": 1638 }, { "epoch": 4.3076923076923075, "high_lr": 0.00013789473684210527, "low_lr": 2.7578947368421056e-06, "step": 1638 }, { "epoch": 4.3076923076923075, "high_lr": 0.00013789473684210527, "low_lr": 2.7578947368421056e-06, "step": 1638 }, { "epoch": 4.3076923076923075, "high_lr": 0.00013789473684210527, "low_lr": 2.7578947368421056e-06, "step": 1638 }, { "epoch": 4.3076923076923075, "high_lr": 0.00013789473684210527, "low_lr": 2.7578947368421056e-06, "step": 1638 }, { "epoch": 4.3076923076923075, "high_lr": 0.00013789473684210527, "low_lr": 2.7578947368421056e-06, "step": 1638 }, { "epoch": 4.3076923076923075, "high_lr": 0.00013789473684210527, "low_lr": 2.7578947368421056e-06, "step": 1638 }, { "epoch": 4.3076923076923075, "high_lr": 0.00013789473684210527, "low_lr": 2.7578947368421056e-06, "step": 1638 }, { "epoch": 4.310322156476003, "grad_norm": 1.5697908401489258, "learning_rate": 0.00013736842105263158, "loss": 1.2205, "step": 1639 }, { "epoch": 4.310322156476003, "high_lr": 0.00013736842105263158, "low_lr": 2.747368421052632e-06, "step": 1639 }, { "epoch": 4.310322156476003, "high_lr": 0.00013736842105263158, "low_lr": 2.747368421052632e-06, "step": 1639 }, { "epoch": 4.310322156476003, "high_lr": 0.00013736842105263158, "low_lr": 2.747368421052632e-06, "step": 1639 }, { "epoch": 4.310322156476003, "high_lr": 0.00013736842105263158, "low_lr": 2.747368421052632e-06, "step": 1639 }, { "epoch": 4.310322156476003, "high_lr": 0.00013736842105263158, "low_lr": 2.747368421052632e-06, "step": 1639 }, { "epoch": 4.310322156476003, "high_lr": 0.00013736842105263158, "low_lr": 2.747368421052632e-06, "step": 1639 }, { "epoch": 4.310322156476003, "high_lr": 0.00013736842105263158, "low_lr": 2.747368421052632e-06, "step": 1639 }, { "epoch": 4.310322156476003, "high_lr": 0.00013736842105263158, "low_lr": 2.747368421052632e-06, "step": 1639 }, { "epoch": 4.312952005259698, "grad_norm": 1.465449571609497, "learning_rate": 0.00013684210526315792, "loss": 1.1993, "step": 1640 }, { "epoch": 4.312952005259698, "high_lr": 0.00013684210526315792, "low_lr": 2.7368421052631583e-06, "step": 1640 }, { "epoch": 4.312952005259698, "high_lr": 0.00013684210526315792, "low_lr": 2.7368421052631583e-06, "step": 1640 }, { "epoch": 4.312952005259698, "high_lr": 0.00013684210526315792, "low_lr": 2.7368421052631583e-06, "step": 1640 }, { "epoch": 4.312952005259698, "high_lr": 0.00013684210526315792, "low_lr": 2.7368421052631583e-06, "step": 1640 }, { "epoch": 4.312952005259698, "high_lr": 0.00013684210526315792, "low_lr": 2.7368421052631583e-06, "step": 1640 }, { "epoch": 4.312952005259698, "high_lr": 0.00013684210526315792, "low_lr": 2.7368421052631583e-06, "step": 1640 }, { "epoch": 4.312952005259698, "high_lr": 0.00013684210526315792, "low_lr": 2.7368421052631583e-06, "step": 1640 }, { "epoch": 4.312952005259698, "high_lr": 0.00013684210526315792, "low_lr": 2.7368421052631583e-06, "step": 1640 }, { "epoch": 4.315581854043392, "grad_norm": 1.5767720937728882, "learning_rate": 0.00013631578947368423, "loss": 1.1841, "step": 1641 }, { "epoch": 4.315581854043392, "high_lr": 0.00013631578947368423, "low_lr": 2.7263157894736846e-06, "step": 1641 }, { "epoch": 4.315581854043392, "high_lr": 0.00013631578947368423, "low_lr": 2.7263157894736846e-06, "step": 1641 }, { "epoch": 4.315581854043392, "high_lr": 0.00013631578947368423, "low_lr": 2.7263157894736846e-06, "step": 1641 }, { "epoch": 4.315581854043392, "high_lr": 0.00013631578947368423, "low_lr": 2.7263157894736846e-06, "step": 1641 }, { "epoch": 4.315581854043392, "high_lr": 0.00013631578947368423, "low_lr": 2.7263157894736846e-06, "step": 1641 }, { "epoch": 4.315581854043392, "high_lr": 0.00013631578947368423, "low_lr": 2.7263157894736846e-06, "step": 1641 }, { "epoch": 4.315581854043392, "high_lr": 0.00013631578947368423, "low_lr": 2.7263157894736846e-06, "step": 1641 }, { "epoch": 4.315581854043392, "high_lr": 0.00013631578947368423, "low_lr": 2.7263157894736846e-06, "step": 1641 }, { "epoch": 4.318211702827087, "grad_norm": 1.4431906938552856, "learning_rate": 0.00013578947368421052, "loss": 1.1995, "step": 1642 }, { "epoch": 4.318211702827087, "high_lr": 0.00013578947368421052, "low_lr": 2.7157894736842105e-06, "step": 1642 }, { "epoch": 4.318211702827087, "high_lr": 0.00013578947368421052, "low_lr": 2.7157894736842105e-06, "step": 1642 }, { "epoch": 4.318211702827087, "high_lr": 0.00013578947368421052, "low_lr": 2.7157894736842105e-06, "step": 1642 }, { "epoch": 4.318211702827087, "high_lr": 0.00013578947368421052, "low_lr": 2.7157894736842105e-06, "step": 1642 }, { "epoch": 4.318211702827087, "high_lr": 0.00013578947368421052, "low_lr": 2.7157894736842105e-06, "step": 1642 }, { "epoch": 4.318211702827087, "high_lr": 0.00013578947368421052, "low_lr": 2.7157894736842105e-06, "step": 1642 }, { "epoch": 4.318211702827087, "high_lr": 0.00013578947368421052, "low_lr": 2.7157894736842105e-06, "step": 1642 }, { "epoch": 4.318211702827087, "high_lr": 0.00013578947368421052, "low_lr": 2.7157894736842105e-06, "step": 1642 }, { "epoch": 4.3208415516107825, "grad_norm": 1.5884404182434082, "learning_rate": 0.00013526315789473683, "loss": 1.2289, "step": 1643 }, { "epoch": 4.3208415516107825, "high_lr": 0.00013526315789473683, "low_lr": 2.705263157894737e-06, "step": 1643 }, { "epoch": 4.3208415516107825, "high_lr": 0.00013526315789473683, "low_lr": 2.705263157894737e-06, "step": 1643 }, { "epoch": 4.3208415516107825, "high_lr": 0.00013526315789473683, "low_lr": 2.705263157894737e-06, "step": 1643 }, { "epoch": 4.3208415516107825, "high_lr": 0.00013526315789473683, "low_lr": 2.705263157894737e-06, "step": 1643 }, { "epoch": 4.3208415516107825, "high_lr": 0.00013526315789473683, "low_lr": 2.705263157894737e-06, "step": 1643 }, { "epoch": 4.3208415516107825, "high_lr": 0.00013526315789473683, "low_lr": 2.705263157894737e-06, "step": 1643 }, { "epoch": 4.3208415516107825, "high_lr": 0.00013526315789473683, "low_lr": 2.705263157894737e-06, "step": 1643 }, { "epoch": 4.3208415516107825, "high_lr": 0.00013526315789473683, "low_lr": 2.705263157894737e-06, "step": 1643 }, { "epoch": 4.323471400394477, "grad_norm": 1.6966110467910767, "learning_rate": 0.00013473684210526314, "loss": 1.1941, "step": 1644 }, { "epoch": 4.323471400394477, "high_lr": 0.00013473684210526314, "low_lr": 2.694736842105263e-06, "step": 1644 }, { "epoch": 4.323471400394477, "high_lr": 0.00013473684210526314, "low_lr": 2.694736842105263e-06, "step": 1644 }, { "epoch": 4.323471400394477, "high_lr": 0.00013473684210526314, "low_lr": 2.694736842105263e-06, "step": 1644 }, { "epoch": 4.323471400394477, "high_lr": 0.00013473684210526314, "low_lr": 2.694736842105263e-06, "step": 1644 }, { "epoch": 4.323471400394477, "high_lr": 0.00013473684210526314, "low_lr": 2.694736842105263e-06, "step": 1644 }, { "epoch": 4.323471400394477, "high_lr": 0.00013473684210526314, "low_lr": 2.694736842105263e-06, "step": 1644 }, { "epoch": 4.323471400394477, "high_lr": 0.00013473684210526314, "low_lr": 2.694736842105263e-06, "step": 1644 }, { "epoch": 4.323471400394477, "high_lr": 0.00013473684210526314, "low_lr": 2.694736842105263e-06, "step": 1644 }, { "epoch": 4.326101249178172, "grad_norm": 1.3939405679702759, "learning_rate": 0.00013421052631578948, "loss": 1.217, "step": 1645 }, { "epoch": 4.326101249178172, "high_lr": 0.00013421052631578948, "low_lr": 2.68421052631579e-06, "step": 1645 }, { "epoch": 4.326101249178172, "high_lr": 0.00013421052631578948, "low_lr": 2.68421052631579e-06, "step": 1645 }, { "epoch": 4.326101249178172, "high_lr": 0.00013421052631578948, "low_lr": 2.68421052631579e-06, "step": 1645 }, { "epoch": 4.326101249178172, "high_lr": 0.00013421052631578948, "low_lr": 2.68421052631579e-06, "step": 1645 }, { "epoch": 4.326101249178172, "high_lr": 0.00013421052631578948, "low_lr": 2.68421052631579e-06, "step": 1645 }, { "epoch": 4.326101249178172, "high_lr": 0.00013421052631578948, "low_lr": 2.68421052631579e-06, "step": 1645 }, { "epoch": 4.326101249178172, "high_lr": 0.00013421052631578948, "low_lr": 2.68421052631579e-06, "step": 1645 }, { "epoch": 4.326101249178172, "high_lr": 0.00013421052631578948, "low_lr": 2.68421052631579e-06, "step": 1645 }, { "epoch": 4.328731097961867, "grad_norm": 1.5275332927703857, "learning_rate": 0.0001336842105263158, "loss": 1.2183, "step": 1646 }, { "epoch": 4.328731097961867, "high_lr": 0.0001336842105263158, "low_lr": 2.6736842105263162e-06, "step": 1646 }, { "epoch": 4.328731097961867, "high_lr": 0.0001336842105263158, "low_lr": 2.6736842105263162e-06, "step": 1646 }, { "epoch": 4.328731097961867, "high_lr": 0.0001336842105263158, "low_lr": 2.6736842105263162e-06, "step": 1646 }, { "epoch": 4.328731097961867, "high_lr": 0.0001336842105263158, "low_lr": 2.6736842105263162e-06, "step": 1646 }, { "epoch": 4.328731097961867, "high_lr": 0.0001336842105263158, "low_lr": 2.6736842105263162e-06, "step": 1646 }, { "epoch": 4.328731097961867, "high_lr": 0.0001336842105263158, "low_lr": 2.6736842105263162e-06, "step": 1646 }, { "epoch": 4.328731097961867, "high_lr": 0.0001336842105263158, "low_lr": 2.6736842105263162e-06, "step": 1646 }, { "epoch": 4.328731097961867, "high_lr": 0.0001336842105263158, "low_lr": 2.6736842105263162e-06, "step": 1646 }, { "epoch": 4.331360946745562, "grad_norm": 1.4518120288848877, "learning_rate": 0.0001331578947368421, "loss": 1.1845, "step": 1647 }, { "epoch": 4.331360946745562, "high_lr": 0.0001331578947368421, "low_lr": 2.6631578947368426e-06, "step": 1647 }, { "epoch": 4.331360946745562, "high_lr": 0.0001331578947368421, "low_lr": 2.6631578947368426e-06, "step": 1647 }, { "epoch": 4.331360946745562, "high_lr": 0.0001331578947368421, "low_lr": 2.6631578947368426e-06, "step": 1647 }, { "epoch": 4.331360946745562, "high_lr": 0.0001331578947368421, "low_lr": 2.6631578947368426e-06, "step": 1647 }, { "epoch": 4.331360946745562, "high_lr": 0.0001331578947368421, "low_lr": 2.6631578947368426e-06, "step": 1647 }, { "epoch": 4.331360946745562, "high_lr": 0.0001331578947368421, "low_lr": 2.6631578947368426e-06, "step": 1647 }, { "epoch": 4.331360946745562, "high_lr": 0.0001331578947368421, "low_lr": 2.6631578947368426e-06, "step": 1647 }, { "epoch": 4.331360946745562, "high_lr": 0.0001331578947368421, "low_lr": 2.6631578947368426e-06, "step": 1647 }, { "epoch": 4.333990795529257, "grad_norm": 1.505735993385315, "learning_rate": 0.00013263157894736842, "loss": 1.2697, "step": 1648 }, { "epoch": 4.333990795529257, "high_lr": 0.00013263157894736842, "low_lr": 2.652631578947369e-06, "step": 1648 }, { "epoch": 4.333990795529257, "high_lr": 0.00013263157894736842, "low_lr": 2.652631578947369e-06, "step": 1648 }, { "epoch": 4.333990795529257, "high_lr": 0.00013263157894736842, "low_lr": 2.652631578947369e-06, "step": 1648 }, { "epoch": 4.333990795529257, "high_lr": 0.00013263157894736842, "low_lr": 2.652631578947369e-06, "step": 1648 }, { "epoch": 4.333990795529257, "high_lr": 0.00013263157894736842, "low_lr": 2.652631578947369e-06, "step": 1648 }, { "epoch": 4.333990795529257, "high_lr": 0.00013263157894736842, "low_lr": 2.652631578947369e-06, "step": 1648 }, { "epoch": 4.333990795529257, "high_lr": 0.00013263157894736842, "low_lr": 2.652631578947369e-06, "step": 1648 }, { "epoch": 4.333990795529257, "high_lr": 0.00013263157894736842, "low_lr": 2.652631578947369e-06, "step": 1648 }, { "epoch": 4.336620644312952, "grad_norm": 1.4569470882415771, "learning_rate": 0.00013210526315789474, "loss": 1.2057, "step": 1649 }, { "epoch": 4.336620644312952, "high_lr": 0.00013210526315789474, "low_lr": 2.6421052631578948e-06, "step": 1649 }, { "epoch": 4.336620644312952, "high_lr": 0.00013210526315789474, "low_lr": 2.6421052631578948e-06, "step": 1649 }, { "epoch": 4.336620644312952, "high_lr": 0.00013210526315789474, "low_lr": 2.6421052631578948e-06, "step": 1649 }, { "epoch": 4.336620644312952, "high_lr": 0.00013210526315789474, "low_lr": 2.6421052631578948e-06, "step": 1649 }, { "epoch": 4.336620644312952, "high_lr": 0.00013210526315789474, "low_lr": 2.6421052631578948e-06, "step": 1649 }, { "epoch": 4.336620644312952, "high_lr": 0.00013210526315789474, "low_lr": 2.6421052631578948e-06, "step": 1649 }, { "epoch": 4.336620644312952, "high_lr": 0.00013210526315789474, "low_lr": 2.6421052631578948e-06, "step": 1649 }, { "epoch": 4.336620644312952, "high_lr": 0.00013210526315789474, "low_lr": 2.6421052631578948e-06, "step": 1649 }, { "epoch": 4.339250493096647, "grad_norm": 1.4787817001342773, "learning_rate": 0.00013157894736842105, "loss": 1.1898, "step": 1650 }, { "epoch": 4.339250493096647, "high_lr": 0.00013157894736842105, "low_lr": 2.631578947368421e-06, "step": 1650 }, { "epoch": 4.339250493096647, "high_lr": 0.00013157894736842105, "low_lr": 2.631578947368421e-06, "step": 1650 }, { "epoch": 4.339250493096647, "high_lr": 0.00013157894736842105, "low_lr": 2.631578947368421e-06, "step": 1650 }, { "epoch": 4.339250493096647, "high_lr": 0.00013157894736842105, "low_lr": 2.631578947368421e-06, "step": 1650 }, { "epoch": 4.339250493096647, "high_lr": 0.00013157894736842105, "low_lr": 2.631578947368421e-06, "step": 1650 }, { "epoch": 4.339250493096647, "high_lr": 0.00013157894736842105, "low_lr": 2.631578947368421e-06, "step": 1650 }, { "epoch": 4.339250493096647, "high_lr": 0.00013157894736842105, "low_lr": 2.631578947368421e-06, "step": 1650 }, { "epoch": 4.339250493096647, "high_lr": 0.00013157894736842105, "low_lr": 2.631578947368421e-06, "step": 1650 }, { "epoch": 4.3418803418803416, "grad_norm": 1.423708200454712, "learning_rate": 0.00013105263157894736, "loss": 1.2488, "step": 1651 }, { "epoch": 4.3418803418803416, "high_lr": 0.00013105263157894736, "low_lr": 2.6210526315789474e-06, "step": 1651 }, { "epoch": 4.3418803418803416, "high_lr": 0.00013105263157894736, "low_lr": 2.6210526315789474e-06, "step": 1651 }, { "epoch": 4.3418803418803416, "high_lr": 0.00013105263157894736, "low_lr": 2.6210526315789474e-06, "step": 1651 }, { "epoch": 4.3418803418803416, "high_lr": 0.00013105263157894736, "low_lr": 2.6210526315789474e-06, "step": 1651 }, { "epoch": 4.3418803418803416, "high_lr": 0.00013105263157894736, "low_lr": 2.6210526315789474e-06, "step": 1651 }, { "epoch": 4.3418803418803416, "high_lr": 0.00013105263157894736, "low_lr": 2.6210526315789474e-06, "step": 1651 }, { "epoch": 4.3418803418803416, "high_lr": 0.00013105263157894736, "low_lr": 2.6210526315789474e-06, "step": 1651 }, { "epoch": 4.3418803418803416, "high_lr": 0.00013105263157894736, "low_lr": 2.6210526315789474e-06, "step": 1651 }, { "epoch": 4.344510190664037, "grad_norm": 1.5851572751998901, "learning_rate": 0.00013052631578947368, "loss": 1.2376, "step": 1652 }, { "epoch": 4.344510190664037, "high_lr": 0.00013052631578947368, "low_lr": 2.6105263157894738e-06, "step": 1652 }, { "epoch": 4.344510190664037, "high_lr": 0.00013052631578947368, "low_lr": 2.6105263157894738e-06, "step": 1652 }, { "epoch": 4.344510190664037, "high_lr": 0.00013052631578947368, "low_lr": 2.6105263157894738e-06, "step": 1652 }, { "epoch": 4.344510190664037, "high_lr": 0.00013052631578947368, "low_lr": 2.6105263157894738e-06, "step": 1652 }, { "epoch": 4.344510190664037, "high_lr": 0.00013052631578947368, "low_lr": 2.6105263157894738e-06, "step": 1652 }, { "epoch": 4.344510190664037, "high_lr": 0.00013052631578947368, "low_lr": 2.6105263157894738e-06, "step": 1652 }, { "epoch": 4.344510190664037, "high_lr": 0.00013052631578947368, "low_lr": 2.6105263157894738e-06, "step": 1652 }, { "epoch": 4.344510190664037, "high_lr": 0.00013052631578947368, "low_lr": 2.6105263157894738e-06, "step": 1652 }, { "epoch": 4.347140039447732, "grad_norm": 1.508453607559204, "learning_rate": 0.00013000000000000002, "loss": 1.2073, "step": 1653 }, { "epoch": 4.347140039447732, "high_lr": 0.00013000000000000002, "low_lr": 2.6e-06, "step": 1653 }, { "epoch": 4.347140039447732, "high_lr": 0.00013000000000000002, "low_lr": 2.6e-06, "step": 1653 }, { "epoch": 4.347140039447732, "high_lr": 0.00013000000000000002, "low_lr": 2.6e-06, "step": 1653 }, { "epoch": 4.347140039447732, "high_lr": 0.00013000000000000002, "low_lr": 2.6e-06, "step": 1653 }, { "epoch": 4.347140039447732, "high_lr": 0.00013000000000000002, "low_lr": 2.6e-06, "step": 1653 }, { "epoch": 4.347140039447732, "high_lr": 0.00013000000000000002, "low_lr": 2.6e-06, "step": 1653 }, { "epoch": 4.347140039447732, "high_lr": 0.00013000000000000002, "low_lr": 2.6e-06, "step": 1653 }, { "epoch": 4.347140039447732, "high_lr": 0.00013000000000000002, "low_lr": 2.6e-06, "step": 1653 }, { "epoch": 4.349769888231426, "grad_norm": 1.5777734518051147, "learning_rate": 0.00012947368421052633, "loss": 1.2179, "step": 1654 }, { "epoch": 4.349769888231426, "high_lr": 0.00012947368421052633, "low_lr": 2.589473684210527e-06, "step": 1654 }, { "epoch": 4.349769888231426, "high_lr": 0.00012947368421052633, "low_lr": 2.589473684210527e-06, "step": 1654 }, { "epoch": 4.349769888231426, "high_lr": 0.00012947368421052633, "low_lr": 2.589473684210527e-06, "step": 1654 }, { "epoch": 4.349769888231426, "high_lr": 0.00012947368421052633, "low_lr": 2.589473684210527e-06, "step": 1654 }, { "epoch": 4.349769888231426, "high_lr": 0.00012947368421052633, "low_lr": 2.589473684210527e-06, "step": 1654 }, { "epoch": 4.349769888231426, "high_lr": 0.00012947368421052633, "low_lr": 2.589473684210527e-06, "step": 1654 }, { "epoch": 4.349769888231426, "high_lr": 0.00012947368421052633, "low_lr": 2.589473684210527e-06, "step": 1654 }, { "epoch": 4.349769888231426, "high_lr": 0.00012947368421052633, "low_lr": 2.589473684210527e-06, "step": 1654 }, { "epoch": 4.352399737015122, "grad_norm": 1.4973317384719849, "learning_rate": 0.00012894736842105264, "loss": 1.2087, "step": 1655 }, { "epoch": 4.352399737015122, "high_lr": 0.00012894736842105264, "low_lr": 2.578947368421053e-06, "step": 1655 }, { "epoch": 4.352399737015122, "high_lr": 0.00012894736842105264, "low_lr": 2.578947368421053e-06, "step": 1655 }, { "epoch": 4.352399737015122, "high_lr": 0.00012894736842105264, "low_lr": 2.578947368421053e-06, "step": 1655 }, { "epoch": 4.352399737015122, "high_lr": 0.00012894736842105264, "low_lr": 2.578947368421053e-06, "step": 1655 }, { "epoch": 4.352399737015122, "high_lr": 0.00012894736842105264, "low_lr": 2.578947368421053e-06, "step": 1655 }, { "epoch": 4.352399737015122, "high_lr": 0.00012894736842105264, "low_lr": 2.578947368421053e-06, "step": 1655 }, { "epoch": 4.352399737015122, "high_lr": 0.00012894736842105264, "low_lr": 2.578947368421053e-06, "step": 1655 }, { "epoch": 4.352399737015122, "high_lr": 0.00012894736842105264, "low_lr": 2.578947368421053e-06, "step": 1655 }, { "epoch": 4.355029585798817, "grad_norm": 1.794551968574524, "learning_rate": 0.00012842105263157893, "loss": 1.1636, "step": 1656 }, { "epoch": 4.355029585798817, "high_lr": 0.00012842105263157893, "low_lr": 2.568421052631579e-06, "step": 1656 }, { "epoch": 4.355029585798817, "high_lr": 0.00012842105263157893, "low_lr": 2.568421052631579e-06, "step": 1656 }, { "epoch": 4.355029585798817, "high_lr": 0.00012842105263157893, "low_lr": 2.568421052631579e-06, "step": 1656 }, { "epoch": 4.355029585798817, "high_lr": 0.00012842105263157893, "low_lr": 2.568421052631579e-06, "step": 1656 }, { "epoch": 4.355029585798817, "high_lr": 0.00012842105263157893, "low_lr": 2.568421052631579e-06, "step": 1656 }, { "epoch": 4.355029585798817, "high_lr": 0.00012842105263157893, "low_lr": 2.568421052631579e-06, "step": 1656 }, { "epoch": 4.355029585798817, "high_lr": 0.00012842105263157893, "low_lr": 2.568421052631579e-06, "step": 1656 }, { "epoch": 4.355029585798817, "high_lr": 0.00012842105263157893, "low_lr": 2.568421052631579e-06, "step": 1656 }, { "epoch": 4.357659434582511, "grad_norm": 1.5746132135391235, "learning_rate": 0.00012789473684210527, "loss": 1.1974, "step": 1657 }, { "epoch": 4.357659434582511, "high_lr": 0.00012789473684210527, "low_lr": 2.5578947368421054e-06, "step": 1657 }, { "epoch": 4.357659434582511, "high_lr": 0.00012789473684210527, "low_lr": 2.5578947368421054e-06, "step": 1657 }, { "epoch": 4.357659434582511, "high_lr": 0.00012789473684210527, "low_lr": 2.5578947368421054e-06, "step": 1657 }, { "epoch": 4.357659434582511, "high_lr": 0.00012789473684210527, "low_lr": 2.5578947368421054e-06, "step": 1657 }, { "epoch": 4.357659434582511, "high_lr": 0.00012789473684210527, "low_lr": 2.5578947368421054e-06, "step": 1657 }, { "epoch": 4.357659434582511, "high_lr": 0.00012789473684210527, "low_lr": 2.5578947368421054e-06, "step": 1657 }, { "epoch": 4.357659434582511, "high_lr": 0.00012789473684210527, "low_lr": 2.5578947368421054e-06, "step": 1657 }, { "epoch": 4.357659434582511, "high_lr": 0.00012789473684210527, "low_lr": 2.5578947368421054e-06, "step": 1657 }, { "epoch": 4.360289283366207, "grad_norm": 1.554789662361145, "learning_rate": 0.00012736842105263158, "loss": 1.299, "step": 1658 }, { "epoch": 4.360289283366207, "high_lr": 0.00012736842105263158, "low_lr": 2.5473684210526317e-06, "step": 1658 }, { "epoch": 4.360289283366207, "high_lr": 0.00012736842105263158, "low_lr": 2.5473684210526317e-06, "step": 1658 }, { "epoch": 4.360289283366207, "high_lr": 0.00012736842105263158, "low_lr": 2.5473684210526317e-06, "step": 1658 }, { "epoch": 4.360289283366207, "high_lr": 0.00012736842105263158, "low_lr": 2.5473684210526317e-06, "step": 1658 }, { "epoch": 4.360289283366207, "high_lr": 0.00012736842105263158, "low_lr": 2.5473684210526317e-06, "step": 1658 }, { "epoch": 4.360289283366207, "high_lr": 0.00012736842105263158, "low_lr": 2.5473684210526317e-06, "step": 1658 }, { "epoch": 4.360289283366207, "high_lr": 0.00012736842105263158, "low_lr": 2.5473684210526317e-06, "step": 1658 }, { "epoch": 4.360289283366207, "high_lr": 0.00012736842105263158, "low_lr": 2.5473684210526317e-06, "step": 1658 }, { "epoch": 4.3629191321499015, "grad_norm": 1.7036932706832886, "learning_rate": 0.0001268421052631579, "loss": 1.1825, "step": 1659 }, { "epoch": 4.3629191321499015, "high_lr": 0.0001268421052631579, "low_lr": 2.536842105263158e-06, "step": 1659 }, { "epoch": 4.3629191321499015, "high_lr": 0.0001268421052631579, "low_lr": 2.536842105263158e-06, "step": 1659 }, { "epoch": 4.3629191321499015, "high_lr": 0.0001268421052631579, "low_lr": 2.536842105263158e-06, "step": 1659 }, { "epoch": 4.3629191321499015, "high_lr": 0.0001268421052631579, "low_lr": 2.536842105263158e-06, "step": 1659 }, { "epoch": 4.3629191321499015, "high_lr": 0.0001268421052631579, "low_lr": 2.536842105263158e-06, "step": 1659 }, { "epoch": 4.3629191321499015, "high_lr": 0.0001268421052631579, "low_lr": 2.536842105263158e-06, "step": 1659 }, { "epoch": 4.3629191321499015, "high_lr": 0.0001268421052631579, "low_lr": 2.536842105263158e-06, "step": 1659 }, { "epoch": 4.3629191321499015, "high_lr": 0.0001268421052631579, "low_lr": 2.536842105263158e-06, "step": 1659 }, { "epoch": 4.365548980933596, "grad_norm": 1.8712507486343384, "learning_rate": 0.0001263157894736842, "loss": 1.2871, "step": 1660 }, { "epoch": 4.365548980933596, "high_lr": 0.0001263157894736842, "low_lr": 2.5263157894736844e-06, "step": 1660 }, { "epoch": 4.365548980933596, "high_lr": 0.0001263157894736842, "low_lr": 2.5263157894736844e-06, "step": 1660 }, { "epoch": 4.365548980933596, "high_lr": 0.0001263157894736842, "low_lr": 2.5263157894736844e-06, "step": 1660 }, { "epoch": 4.365548980933596, "high_lr": 0.0001263157894736842, "low_lr": 2.5263157894736844e-06, "step": 1660 }, { "epoch": 4.365548980933596, "high_lr": 0.0001263157894736842, "low_lr": 2.5263157894736844e-06, "step": 1660 }, { "epoch": 4.365548980933596, "high_lr": 0.0001263157894736842, "low_lr": 2.5263157894736844e-06, "step": 1660 }, { "epoch": 4.365548980933596, "high_lr": 0.0001263157894736842, "low_lr": 2.5263157894736844e-06, "step": 1660 }, { "epoch": 4.365548980933596, "high_lr": 0.0001263157894736842, "low_lr": 2.5263157894736844e-06, "step": 1660 }, { "epoch": 4.368178829717292, "grad_norm": 1.3986327648162842, "learning_rate": 0.00012578947368421055, "loss": 1.16, "step": 1661 }, { "epoch": 4.368178829717292, "high_lr": 0.00012578947368421055, "low_lr": 2.5157894736842107e-06, "step": 1661 }, { "epoch": 4.368178829717292, "high_lr": 0.00012578947368421055, "low_lr": 2.5157894736842107e-06, "step": 1661 }, { "epoch": 4.368178829717292, "high_lr": 0.00012578947368421055, "low_lr": 2.5157894736842107e-06, "step": 1661 }, { "epoch": 4.368178829717292, "high_lr": 0.00012578947368421055, "low_lr": 2.5157894736842107e-06, "step": 1661 }, { "epoch": 4.368178829717292, "high_lr": 0.00012578947368421055, "low_lr": 2.5157894736842107e-06, "step": 1661 }, { "epoch": 4.368178829717292, "high_lr": 0.00012578947368421055, "low_lr": 2.5157894736842107e-06, "step": 1661 }, { "epoch": 4.368178829717292, "high_lr": 0.00012578947368421055, "low_lr": 2.5157894736842107e-06, "step": 1661 }, { "epoch": 4.368178829717292, "high_lr": 0.00012578947368421055, "low_lr": 2.5157894736842107e-06, "step": 1661 }, { "epoch": 4.370808678500986, "grad_norm": 1.5190725326538086, "learning_rate": 0.00012526315789473686, "loss": 1.233, "step": 1662 }, { "epoch": 4.370808678500986, "high_lr": 0.00012526315789473686, "low_lr": 2.5052631578947375e-06, "step": 1662 }, { "epoch": 4.370808678500986, "high_lr": 0.00012526315789473686, "low_lr": 2.5052631578947375e-06, "step": 1662 }, { "epoch": 4.370808678500986, "high_lr": 0.00012526315789473686, "low_lr": 2.5052631578947375e-06, "step": 1662 }, { "epoch": 4.370808678500986, "high_lr": 0.00012526315789473686, "low_lr": 2.5052631578947375e-06, "step": 1662 }, { "epoch": 4.370808678500986, "high_lr": 0.00012526315789473686, "low_lr": 2.5052631578947375e-06, "step": 1662 }, { "epoch": 4.370808678500986, "high_lr": 0.00012526315789473686, "low_lr": 2.5052631578947375e-06, "step": 1662 }, { "epoch": 4.370808678500986, "high_lr": 0.00012526315789473686, "low_lr": 2.5052631578947375e-06, "step": 1662 }, { "epoch": 4.370808678500986, "high_lr": 0.00012526315789473686, "low_lr": 2.5052631578947375e-06, "step": 1662 }, { "epoch": 4.373438527284681, "grad_norm": 1.5399186611175537, "learning_rate": 0.00012473684210526317, "loss": 1.238, "step": 1663 }, { "epoch": 4.373438527284681, "high_lr": 0.00012473684210526317, "low_lr": 2.4947368421052634e-06, "step": 1663 }, { "epoch": 4.373438527284681, "high_lr": 0.00012473684210526317, "low_lr": 2.4947368421052634e-06, "step": 1663 }, { "epoch": 4.373438527284681, "high_lr": 0.00012473684210526317, "low_lr": 2.4947368421052634e-06, "step": 1663 }, { "epoch": 4.373438527284681, "high_lr": 0.00012473684210526317, "low_lr": 2.4947368421052634e-06, "step": 1663 }, { "epoch": 4.373438527284681, "high_lr": 0.00012473684210526317, "low_lr": 2.4947368421052634e-06, "step": 1663 }, { "epoch": 4.373438527284681, "high_lr": 0.00012473684210526317, "low_lr": 2.4947368421052634e-06, "step": 1663 }, { "epoch": 4.373438527284681, "high_lr": 0.00012473684210526317, "low_lr": 2.4947368421052634e-06, "step": 1663 }, { "epoch": 4.373438527284681, "high_lr": 0.00012473684210526317, "low_lr": 2.4947368421052634e-06, "step": 1663 }, { "epoch": 4.3760683760683765, "grad_norm": 1.5317223072052002, "learning_rate": 0.00012421052631578949, "loss": 1.2542, "step": 1664 }, { "epoch": 4.3760683760683765, "high_lr": 0.00012421052631578949, "low_lr": 2.4842105263157897e-06, "step": 1664 }, { "epoch": 4.3760683760683765, "high_lr": 0.00012421052631578949, "low_lr": 2.4842105263157897e-06, "step": 1664 }, { "epoch": 4.3760683760683765, "high_lr": 0.00012421052631578949, "low_lr": 2.4842105263157897e-06, "step": 1664 }, { "epoch": 4.3760683760683765, "high_lr": 0.00012421052631578949, "low_lr": 2.4842105263157897e-06, "step": 1664 }, { "epoch": 4.3760683760683765, "high_lr": 0.00012421052631578949, "low_lr": 2.4842105263157897e-06, "step": 1664 }, { "epoch": 4.3760683760683765, "high_lr": 0.00012421052631578949, "low_lr": 2.4842105263157897e-06, "step": 1664 }, { "epoch": 4.3760683760683765, "high_lr": 0.00012421052631578949, "low_lr": 2.4842105263157897e-06, "step": 1664 }, { "epoch": 4.3760683760683765, "high_lr": 0.00012421052631578949, "low_lr": 2.4842105263157897e-06, "step": 1664 }, { "epoch": 4.378698224852071, "grad_norm": 1.6379618644714355, "learning_rate": 0.0001236842105263158, "loss": 1.2087, "step": 1665 }, { "epoch": 4.378698224852071, "high_lr": 0.0001236842105263158, "low_lr": 2.473684210526316e-06, "step": 1665 }, { "epoch": 4.378698224852071, "high_lr": 0.0001236842105263158, "low_lr": 2.473684210526316e-06, "step": 1665 }, { "epoch": 4.378698224852071, "high_lr": 0.0001236842105263158, "low_lr": 2.473684210526316e-06, "step": 1665 }, { "epoch": 4.378698224852071, "high_lr": 0.0001236842105263158, "low_lr": 2.473684210526316e-06, "step": 1665 }, { "epoch": 4.378698224852071, "high_lr": 0.0001236842105263158, "low_lr": 2.473684210526316e-06, "step": 1665 }, { "epoch": 4.378698224852071, "high_lr": 0.0001236842105263158, "low_lr": 2.473684210526316e-06, "step": 1665 }, { "epoch": 4.378698224852071, "high_lr": 0.0001236842105263158, "low_lr": 2.473684210526316e-06, "step": 1665 }, { "epoch": 4.378698224852071, "high_lr": 0.0001236842105263158, "low_lr": 2.473684210526316e-06, "step": 1665 }, { "epoch": 4.381328073635766, "grad_norm": 1.547407627105713, "learning_rate": 0.0001231578947368421, "loss": 1.2358, "step": 1666 }, { "epoch": 4.381328073635766, "high_lr": 0.0001231578947368421, "low_lr": 2.4631578947368424e-06, "step": 1666 }, { "epoch": 4.381328073635766, "high_lr": 0.0001231578947368421, "low_lr": 2.4631578947368424e-06, "step": 1666 }, { "epoch": 4.381328073635766, "high_lr": 0.0001231578947368421, "low_lr": 2.4631578947368424e-06, "step": 1666 }, { "epoch": 4.381328073635766, "high_lr": 0.0001231578947368421, "low_lr": 2.4631578947368424e-06, "step": 1666 }, { "epoch": 4.381328073635766, "high_lr": 0.0001231578947368421, "low_lr": 2.4631578947368424e-06, "step": 1666 }, { "epoch": 4.381328073635766, "high_lr": 0.0001231578947368421, "low_lr": 2.4631578947368424e-06, "step": 1666 }, { "epoch": 4.381328073635766, "high_lr": 0.0001231578947368421, "low_lr": 2.4631578947368424e-06, "step": 1666 }, { "epoch": 4.381328073635766, "high_lr": 0.0001231578947368421, "low_lr": 2.4631578947368424e-06, "step": 1666 }, { "epoch": 4.3839579224194605, "grad_norm": 1.5736956596374512, "learning_rate": 0.00012263157894736842, "loss": 1.2619, "step": 1667 }, { "epoch": 4.3839579224194605, "high_lr": 0.00012263157894736842, "low_lr": 2.4526315789473687e-06, "step": 1667 }, { "epoch": 4.3839579224194605, "high_lr": 0.00012263157894736842, "low_lr": 2.4526315789473687e-06, "step": 1667 }, { "epoch": 4.3839579224194605, "high_lr": 0.00012263157894736842, "low_lr": 2.4526315789473687e-06, "step": 1667 }, { "epoch": 4.3839579224194605, "high_lr": 0.00012263157894736842, "low_lr": 2.4526315789473687e-06, "step": 1667 }, { "epoch": 4.3839579224194605, "high_lr": 0.00012263157894736842, "low_lr": 2.4526315789473687e-06, "step": 1667 }, { "epoch": 4.3839579224194605, "high_lr": 0.00012263157894736842, "low_lr": 2.4526315789473687e-06, "step": 1667 }, { "epoch": 4.3839579224194605, "high_lr": 0.00012263157894736842, "low_lr": 2.4526315789473687e-06, "step": 1667 }, { "epoch": 4.3839579224194605, "high_lr": 0.00012263157894736842, "low_lr": 2.4526315789473687e-06, "step": 1667 }, { "epoch": 4.386587771203156, "grad_norm": 1.7092012166976929, "learning_rate": 0.00012210526315789474, "loss": 1.217, "step": 1668 }, { "epoch": 4.386587771203156, "high_lr": 0.00012210526315789474, "low_lr": 2.442105263157895e-06, "step": 1668 }, { "epoch": 4.386587771203156, "high_lr": 0.00012210526315789474, "low_lr": 2.442105263157895e-06, "step": 1668 }, { "epoch": 4.386587771203156, "high_lr": 0.00012210526315789474, "low_lr": 2.442105263157895e-06, "step": 1668 }, { "epoch": 4.386587771203156, "high_lr": 0.00012210526315789474, "low_lr": 2.442105263157895e-06, "step": 1668 }, { "epoch": 4.386587771203156, "high_lr": 0.00012210526315789474, "low_lr": 2.442105263157895e-06, "step": 1668 }, { "epoch": 4.386587771203156, "high_lr": 0.00012210526315789474, "low_lr": 2.442105263157895e-06, "step": 1668 }, { "epoch": 4.386587771203156, "high_lr": 0.00012210526315789474, "low_lr": 2.442105263157895e-06, "step": 1668 }, { "epoch": 4.386587771203156, "high_lr": 0.00012210526315789474, "low_lr": 2.442105263157895e-06, "step": 1668 }, { "epoch": 4.389217619986851, "grad_norm": 1.5518778562545776, "learning_rate": 0.00012157894736842105, "loss": 1.2391, "step": 1669 }, { "epoch": 4.389217619986851, "high_lr": 0.00012157894736842105, "low_lr": 2.4315789473684213e-06, "step": 1669 }, { "epoch": 4.389217619986851, "high_lr": 0.00012157894736842105, "low_lr": 2.4315789473684213e-06, "step": 1669 }, { "epoch": 4.389217619986851, "high_lr": 0.00012157894736842105, "low_lr": 2.4315789473684213e-06, "step": 1669 }, { "epoch": 4.389217619986851, "high_lr": 0.00012157894736842105, "low_lr": 2.4315789473684213e-06, "step": 1669 }, { "epoch": 4.389217619986851, "high_lr": 0.00012157894736842105, "low_lr": 2.4315789473684213e-06, "step": 1669 }, { "epoch": 4.389217619986851, "high_lr": 0.00012157894736842105, "low_lr": 2.4315789473684213e-06, "step": 1669 }, { "epoch": 4.389217619986851, "high_lr": 0.00012157894736842105, "low_lr": 2.4315789473684213e-06, "step": 1669 }, { "epoch": 4.389217619986851, "high_lr": 0.00012157894736842105, "low_lr": 2.4315789473684213e-06, "step": 1669 }, { "epoch": 4.391847468770545, "grad_norm": 1.568029522895813, "learning_rate": 0.00012105263157894738, "loss": 1.2401, "step": 1670 }, { "epoch": 4.391847468770545, "high_lr": 0.00012105263157894738, "low_lr": 2.4210526315789477e-06, "step": 1670 }, { "epoch": 4.391847468770545, "high_lr": 0.00012105263157894738, "low_lr": 2.4210526315789477e-06, "step": 1670 }, { "epoch": 4.391847468770545, "high_lr": 0.00012105263157894738, "low_lr": 2.4210526315789477e-06, "step": 1670 }, { "epoch": 4.391847468770545, "high_lr": 0.00012105263157894738, "low_lr": 2.4210526315789477e-06, "step": 1670 }, { "epoch": 4.391847468770545, "high_lr": 0.00012105263157894738, "low_lr": 2.4210526315789477e-06, "step": 1670 }, { "epoch": 4.391847468770545, "high_lr": 0.00012105263157894738, "low_lr": 2.4210526315789477e-06, "step": 1670 }, { "epoch": 4.391847468770545, "high_lr": 0.00012105263157894738, "low_lr": 2.4210526315789477e-06, "step": 1670 }, { "epoch": 4.391847468770545, "high_lr": 0.00012105263157894738, "low_lr": 2.4210526315789477e-06, "step": 1670 }, { "epoch": 4.394477317554241, "grad_norm": 1.6637319326400757, "learning_rate": 0.00012052631578947369, "loss": 1.2115, "step": 1671 }, { "epoch": 4.394477317554241, "high_lr": 0.00012052631578947369, "low_lr": 2.410526315789474e-06, "step": 1671 }, { "epoch": 4.394477317554241, "high_lr": 0.00012052631578947369, "low_lr": 2.410526315789474e-06, "step": 1671 }, { "epoch": 4.394477317554241, "high_lr": 0.00012052631578947369, "low_lr": 2.410526315789474e-06, "step": 1671 }, { "epoch": 4.394477317554241, "high_lr": 0.00012052631578947369, "low_lr": 2.410526315789474e-06, "step": 1671 }, { "epoch": 4.394477317554241, "high_lr": 0.00012052631578947369, "low_lr": 2.410526315789474e-06, "step": 1671 }, { "epoch": 4.394477317554241, "high_lr": 0.00012052631578947369, "low_lr": 2.410526315789474e-06, "step": 1671 }, { "epoch": 4.394477317554241, "high_lr": 0.00012052631578947369, "low_lr": 2.410526315789474e-06, "step": 1671 }, { "epoch": 4.394477317554241, "high_lr": 0.00012052631578947369, "low_lr": 2.410526315789474e-06, "step": 1671 }, { "epoch": 4.3971071663379355, "grad_norm": 1.4761321544647217, "learning_rate": 0.00012, "loss": 1.1913, "step": 1672 }, { "epoch": 4.3971071663379355, "high_lr": 0.00012, "low_lr": 2.4000000000000003e-06, "step": 1672 }, { "epoch": 4.3971071663379355, "high_lr": 0.00012, "low_lr": 2.4000000000000003e-06, "step": 1672 }, { "epoch": 4.3971071663379355, "high_lr": 0.00012, "low_lr": 2.4000000000000003e-06, "step": 1672 }, { "epoch": 4.3971071663379355, "high_lr": 0.00012, "low_lr": 2.4000000000000003e-06, "step": 1672 }, { "epoch": 4.3971071663379355, "high_lr": 0.00012, "low_lr": 2.4000000000000003e-06, "step": 1672 }, { "epoch": 4.3971071663379355, "high_lr": 0.00012, "low_lr": 2.4000000000000003e-06, "step": 1672 }, { "epoch": 4.3971071663379355, "high_lr": 0.00012, "low_lr": 2.4000000000000003e-06, "step": 1672 }, { "epoch": 4.3971071663379355, "high_lr": 0.00012, "low_lr": 2.4000000000000003e-06, "step": 1672 }, { "epoch": 4.39973701512163, "grad_norm": 1.4216514825820923, "learning_rate": 0.00011947368421052632, "loss": 1.2146, "step": 1673 }, { "epoch": 4.39973701512163, "high_lr": 0.00011947368421052632, "low_lr": 2.3894736842105266e-06, "step": 1673 }, { "epoch": 4.39973701512163, "high_lr": 0.00011947368421052632, "low_lr": 2.3894736842105266e-06, "step": 1673 }, { "epoch": 4.39973701512163, "high_lr": 0.00011947368421052632, "low_lr": 2.3894736842105266e-06, "step": 1673 }, { "epoch": 4.39973701512163, "high_lr": 0.00011947368421052632, "low_lr": 2.3894736842105266e-06, "step": 1673 }, { "epoch": 4.39973701512163, "high_lr": 0.00011947368421052632, "low_lr": 2.3894736842105266e-06, "step": 1673 }, { "epoch": 4.39973701512163, "high_lr": 0.00011947368421052632, "low_lr": 2.3894736842105266e-06, "step": 1673 }, { "epoch": 4.39973701512163, "high_lr": 0.00011947368421052632, "low_lr": 2.3894736842105266e-06, "step": 1673 }, { "epoch": 4.39973701512163, "high_lr": 0.00011947368421052632, "low_lr": 2.3894736842105266e-06, "step": 1673 }, { "epoch": 4.402366863905326, "grad_norm": 1.601261854171753, "learning_rate": 0.00011894736842105264, "loss": 1.2112, "step": 1674 }, { "epoch": 4.402366863905326, "high_lr": 0.00011894736842105264, "low_lr": 2.378947368421053e-06, "step": 1674 }, { "epoch": 4.402366863905326, "high_lr": 0.00011894736842105264, "low_lr": 2.378947368421053e-06, "step": 1674 }, { "epoch": 4.402366863905326, "high_lr": 0.00011894736842105264, "low_lr": 2.378947368421053e-06, "step": 1674 }, { "epoch": 4.402366863905326, "high_lr": 0.00011894736842105264, "low_lr": 2.378947368421053e-06, "step": 1674 }, { "epoch": 4.402366863905326, "high_lr": 0.00011894736842105264, "low_lr": 2.378947368421053e-06, "step": 1674 }, { "epoch": 4.402366863905326, "high_lr": 0.00011894736842105264, "low_lr": 2.378947368421053e-06, "step": 1674 }, { "epoch": 4.402366863905326, "high_lr": 0.00011894736842105264, "low_lr": 2.378947368421053e-06, "step": 1674 }, { "epoch": 4.402366863905326, "high_lr": 0.00011894736842105264, "low_lr": 2.378947368421053e-06, "step": 1674 }, { "epoch": 4.40499671268902, "grad_norm": 1.5565073490142822, "learning_rate": 0.00011842105263157894, "loss": 1.2682, "step": 1675 }, { "epoch": 4.40499671268902, "high_lr": 0.00011842105263157894, "low_lr": 2.368421052631579e-06, "step": 1675 }, { "epoch": 4.40499671268902, "high_lr": 0.00011842105263157894, "low_lr": 2.368421052631579e-06, "step": 1675 }, { "epoch": 4.40499671268902, "high_lr": 0.00011842105263157894, "low_lr": 2.368421052631579e-06, "step": 1675 }, { "epoch": 4.40499671268902, "high_lr": 0.00011842105263157894, "low_lr": 2.368421052631579e-06, "step": 1675 }, { "epoch": 4.40499671268902, "high_lr": 0.00011842105263157894, "low_lr": 2.368421052631579e-06, "step": 1675 }, { "epoch": 4.40499671268902, "high_lr": 0.00011842105263157894, "low_lr": 2.368421052631579e-06, "step": 1675 }, { "epoch": 4.40499671268902, "high_lr": 0.00011842105263157894, "low_lr": 2.368421052631579e-06, "step": 1675 }, { "epoch": 4.40499671268902, "high_lr": 0.00011842105263157894, "low_lr": 2.368421052631579e-06, "step": 1675 }, { "epoch": 4.407626561472715, "grad_norm": 1.542503833770752, "learning_rate": 0.00011789473684210527, "loss": 1.1684, "step": 1676 }, { "epoch": 4.407626561472715, "high_lr": 0.00011789473684210527, "low_lr": 2.357894736842105e-06, "step": 1676 }, { "epoch": 4.407626561472715, "high_lr": 0.00011789473684210527, "low_lr": 2.357894736842105e-06, "step": 1676 }, { "epoch": 4.407626561472715, "high_lr": 0.00011789473684210527, "low_lr": 2.357894736842105e-06, "step": 1676 }, { "epoch": 4.407626561472715, "high_lr": 0.00011789473684210527, "low_lr": 2.357894736842105e-06, "step": 1676 }, { "epoch": 4.407626561472715, "high_lr": 0.00011789473684210527, "low_lr": 2.357894736842105e-06, "step": 1676 }, { "epoch": 4.407626561472715, "high_lr": 0.00011789473684210527, "low_lr": 2.357894736842105e-06, "step": 1676 }, { "epoch": 4.407626561472715, "high_lr": 0.00011789473684210527, "low_lr": 2.357894736842105e-06, "step": 1676 }, { "epoch": 4.407626561472715, "high_lr": 0.00011789473684210527, "low_lr": 2.357894736842105e-06, "step": 1676 }, { "epoch": 4.410256410256411, "grad_norm": 1.492455005645752, "learning_rate": 0.00011736842105263158, "loss": 1.2266, "step": 1677 }, { "epoch": 4.410256410256411, "high_lr": 0.00011736842105263158, "low_lr": 2.347368421052632e-06, "step": 1677 }, { "epoch": 4.410256410256411, "high_lr": 0.00011736842105263158, "low_lr": 2.347368421052632e-06, "step": 1677 }, { "epoch": 4.410256410256411, "high_lr": 0.00011736842105263158, "low_lr": 2.347368421052632e-06, "step": 1677 }, { "epoch": 4.410256410256411, "high_lr": 0.00011736842105263158, "low_lr": 2.347368421052632e-06, "step": 1677 }, { "epoch": 4.410256410256411, "high_lr": 0.00011736842105263158, "low_lr": 2.347368421052632e-06, "step": 1677 }, { "epoch": 4.410256410256411, "high_lr": 0.00011736842105263158, "low_lr": 2.347368421052632e-06, "step": 1677 }, { "epoch": 4.410256410256411, "high_lr": 0.00011736842105263158, "low_lr": 2.347368421052632e-06, "step": 1677 }, { "epoch": 4.410256410256411, "high_lr": 0.00011736842105263158, "low_lr": 2.347368421052632e-06, "step": 1677 }, { "epoch": 4.412886259040105, "grad_norm": 1.5621352195739746, "learning_rate": 0.00011684210526315791, "loss": 1.142, "step": 1678 }, { "epoch": 4.412886259040105, "high_lr": 0.00011684210526315791, "low_lr": 2.3368421052631583e-06, "step": 1678 }, { "epoch": 4.412886259040105, "high_lr": 0.00011684210526315791, "low_lr": 2.3368421052631583e-06, "step": 1678 }, { "epoch": 4.412886259040105, "high_lr": 0.00011684210526315791, "low_lr": 2.3368421052631583e-06, "step": 1678 }, { "epoch": 4.412886259040105, "high_lr": 0.00011684210526315791, "low_lr": 2.3368421052631583e-06, "step": 1678 }, { "epoch": 4.412886259040105, "high_lr": 0.00011684210526315791, "low_lr": 2.3368421052631583e-06, "step": 1678 }, { "epoch": 4.412886259040105, "high_lr": 0.00011684210526315791, "low_lr": 2.3368421052631583e-06, "step": 1678 }, { "epoch": 4.412886259040105, "high_lr": 0.00011684210526315791, "low_lr": 2.3368421052631583e-06, "step": 1678 }, { "epoch": 4.412886259040105, "high_lr": 0.00011684210526315791, "low_lr": 2.3368421052631583e-06, "step": 1678 }, { "epoch": 4.4155161078238, "grad_norm": 1.5134774446487427, "learning_rate": 0.00011631578947368421, "loss": 1.1955, "step": 1679 }, { "epoch": 4.4155161078238, "high_lr": 0.00011631578947368421, "low_lr": 2.326315789473684e-06, "step": 1679 }, { "epoch": 4.4155161078238, "high_lr": 0.00011631578947368421, "low_lr": 2.326315789473684e-06, "step": 1679 }, { "epoch": 4.4155161078238, "high_lr": 0.00011631578947368421, "low_lr": 2.326315789473684e-06, "step": 1679 }, { "epoch": 4.4155161078238, "high_lr": 0.00011631578947368421, "low_lr": 2.326315789473684e-06, "step": 1679 }, { "epoch": 4.4155161078238, "high_lr": 0.00011631578947368421, "low_lr": 2.326315789473684e-06, "step": 1679 }, { "epoch": 4.4155161078238, "high_lr": 0.00011631578947368421, "low_lr": 2.326315789473684e-06, "step": 1679 }, { "epoch": 4.4155161078238, "high_lr": 0.00011631578947368421, "low_lr": 2.326315789473684e-06, "step": 1679 }, { "epoch": 4.4155161078238, "high_lr": 0.00011631578947368421, "low_lr": 2.326315789473684e-06, "step": 1679 }, { "epoch": 4.418145956607495, "grad_norm": 1.425864815711975, "learning_rate": 0.00011578947368421053, "loss": 1.2632, "step": 1680 }, { "epoch": 4.418145956607495, "high_lr": 0.00011578947368421053, "low_lr": 2.3157894736842105e-06, "step": 1680 }, { "epoch": 4.418145956607495, "high_lr": 0.00011578947368421053, "low_lr": 2.3157894736842105e-06, "step": 1680 }, { "epoch": 4.418145956607495, "high_lr": 0.00011578947368421053, "low_lr": 2.3157894736842105e-06, "step": 1680 }, { "epoch": 4.418145956607495, "high_lr": 0.00011578947368421053, "low_lr": 2.3157894736842105e-06, "step": 1680 }, { "epoch": 4.418145956607495, "high_lr": 0.00011578947368421053, "low_lr": 2.3157894736842105e-06, "step": 1680 }, { "epoch": 4.418145956607495, "high_lr": 0.00011578947368421053, "low_lr": 2.3157894736842105e-06, "step": 1680 }, { "epoch": 4.418145956607495, "high_lr": 0.00011578947368421053, "low_lr": 2.3157894736842105e-06, "step": 1680 }, { "epoch": 4.418145956607495, "high_lr": 0.00011578947368421053, "low_lr": 2.3157894736842105e-06, "step": 1680 }, { "epoch": 4.42077580539119, "grad_norm": 1.4130666255950928, "learning_rate": 0.00011526315789473685, "loss": 1.191, "step": 1681 }, { "epoch": 4.42077580539119, "high_lr": 0.00011526315789473685, "low_lr": 2.3052631578947373e-06, "step": 1681 }, { "epoch": 4.42077580539119, "high_lr": 0.00011526315789473685, "low_lr": 2.3052631578947373e-06, "step": 1681 }, { "epoch": 4.42077580539119, "high_lr": 0.00011526315789473685, "low_lr": 2.3052631578947373e-06, "step": 1681 }, { "epoch": 4.42077580539119, "high_lr": 0.00011526315789473685, "low_lr": 2.3052631578947373e-06, "step": 1681 }, { "epoch": 4.42077580539119, "high_lr": 0.00011526315789473685, "low_lr": 2.3052631578947373e-06, "step": 1681 }, { "epoch": 4.42077580539119, "high_lr": 0.00011526315789473685, "low_lr": 2.3052631578947373e-06, "step": 1681 }, { "epoch": 4.42077580539119, "high_lr": 0.00011526315789473685, "low_lr": 2.3052631578947373e-06, "step": 1681 }, { "epoch": 4.42077580539119, "high_lr": 0.00011526315789473685, "low_lr": 2.3052631578947373e-06, "step": 1681 }, { "epoch": 4.423405654174885, "grad_norm": 1.4954359531402588, "learning_rate": 0.00011473684210526316, "loss": 1.1924, "step": 1682 }, { "epoch": 4.423405654174885, "high_lr": 0.00011473684210526316, "low_lr": 2.294736842105263e-06, "step": 1682 }, { "epoch": 4.423405654174885, "high_lr": 0.00011473684210526316, "low_lr": 2.294736842105263e-06, "step": 1682 }, { "epoch": 4.423405654174885, "high_lr": 0.00011473684210526316, "low_lr": 2.294736842105263e-06, "step": 1682 }, { "epoch": 4.423405654174885, "high_lr": 0.00011473684210526316, "low_lr": 2.294736842105263e-06, "step": 1682 }, { "epoch": 4.423405654174885, "high_lr": 0.00011473684210526316, "low_lr": 2.294736842105263e-06, "step": 1682 }, { "epoch": 4.423405654174885, "high_lr": 0.00011473684210526316, "low_lr": 2.294736842105263e-06, "step": 1682 }, { "epoch": 4.423405654174885, "high_lr": 0.00011473684210526316, "low_lr": 2.294736842105263e-06, "step": 1682 }, { "epoch": 4.423405654174885, "high_lr": 0.00011473684210526316, "low_lr": 2.294736842105263e-06, "step": 1682 }, { "epoch": 4.42603550295858, "grad_norm": 1.8377904891967773, "learning_rate": 0.00011421052631578947, "loss": 1.1947, "step": 1683 }, { "epoch": 4.42603550295858, "high_lr": 0.00011421052631578947, "low_lr": 2.2842105263157895e-06, "step": 1683 }, { "epoch": 4.42603550295858, "high_lr": 0.00011421052631578947, "low_lr": 2.2842105263157895e-06, "step": 1683 }, { "epoch": 4.42603550295858, "high_lr": 0.00011421052631578947, "low_lr": 2.2842105263157895e-06, "step": 1683 }, { "epoch": 4.42603550295858, "high_lr": 0.00011421052631578947, "low_lr": 2.2842105263157895e-06, "step": 1683 }, { "epoch": 4.42603550295858, "high_lr": 0.00011421052631578947, "low_lr": 2.2842105263157895e-06, "step": 1683 }, { "epoch": 4.42603550295858, "high_lr": 0.00011421052631578947, "low_lr": 2.2842105263157895e-06, "step": 1683 }, { "epoch": 4.42603550295858, "high_lr": 0.00011421052631578947, "low_lr": 2.2842105263157895e-06, "step": 1683 }, { "epoch": 4.42603550295858, "high_lr": 0.00011421052631578947, "low_lr": 2.2842105263157895e-06, "step": 1683 }, { "epoch": 4.428665351742275, "grad_norm": 1.5843218564987183, "learning_rate": 0.0001136842105263158, "loss": 1.2083, "step": 1684 }, { "epoch": 4.428665351742275, "high_lr": 0.0001136842105263158, "low_lr": 2.273684210526316e-06, "step": 1684 }, { "epoch": 4.428665351742275, "high_lr": 0.0001136842105263158, "low_lr": 2.273684210526316e-06, "step": 1684 }, { "epoch": 4.428665351742275, "high_lr": 0.0001136842105263158, "low_lr": 2.273684210526316e-06, "step": 1684 }, { "epoch": 4.428665351742275, "high_lr": 0.0001136842105263158, "low_lr": 2.273684210526316e-06, "step": 1684 }, { "epoch": 4.428665351742275, "high_lr": 0.0001136842105263158, "low_lr": 2.273684210526316e-06, "step": 1684 }, { "epoch": 4.428665351742275, "high_lr": 0.0001136842105263158, "low_lr": 2.273684210526316e-06, "step": 1684 }, { "epoch": 4.428665351742275, "high_lr": 0.0001136842105263158, "low_lr": 2.273684210526316e-06, "step": 1684 }, { "epoch": 4.428665351742275, "high_lr": 0.0001136842105263158, "low_lr": 2.273684210526316e-06, "step": 1684 }, { "epoch": 4.43129520052597, "grad_norm": 1.5587294101715088, "learning_rate": 0.00011315789473684211, "loss": 1.2195, "step": 1685 }, { "epoch": 4.43129520052597, "high_lr": 0.00011315789473684211, "low_lr": 2.2631578947368426e-06, "step": 1685 }, { "epoch": 4.43129520052597, "high_lr": 0.00011315789473684211, "low_lr": 2.2631578947368426e-06, "step": 1685 }, { "epoch": 4.43129520052597, "high_lr": 0.00011315789473684211, "low_lr": 2.2631578947368426e-06, "step": 1685 }, { "epoch": 4.43129520052597, "high_lr": 0.00011315789473684211, "low_lr": 2.2631578947368426e-06, "step": 1685 }, { "epoch": 4.43129520052597, "high_lr": 0.00011315789473684211, "low_lr": 2.2631578947368426e-06, "step": 1685 }, { "epoch": 4.43129520052597, "high_lr": 0.00011315789473684211, "low_lr": 2.2631578947368426e-06, "step": 1685 }, { "epoch": 4.43129520052597, "high_lr": 0.00011315789473684211, "low_lr": 2.2631578947368426e-06, "step": 1685 }, { "epoch": 4.43129520052597, "high_lr": 0.00011315789473684211, "low_lr": 2.2631578947368426e-06, "step": 1685 }, { "epoch": 4.433925049309664, "grad_norm": 1.4924415349960327, "learning_rate": 0.00011263157894736841, "loss": 1.2152, "step": 1686 }, { "epoch": 4.433925049309664, "high_lr": 0.00011263157894736841, "low_lr": 2.2526315789473685e-06, "step": 1686 }, { "epoch": 4.433925049309664, "high_lr": 0.00011263157894736841, "low_lr": 2.2526315789473685e-06, "step": 1686 }, { "epoch": 4.433925049309664, "high_lr": 0.00011263157894736841, "low_lr": 2.2526315789473685e-06, "step": 1686 }, { "epoch": 4.433925049309664, "high_lr": 0.00011263157894736841, "low_lr": 2.2526315789473685e-06, "step": 1686 }, { "epoch": 4.433925049309664, "high_lr": 0.00011263157894736841, "low_lr": 2.2526315789473685e-06, "step": 1686 }, { "epoch": 4.433925049309664, "high_lr": 0.00011263157894736841, "low_lr": 2.2526315789473685e-06, "step": 1686 }, { "epoch": 4.433925049309664, "high_lr": 0.00011263157894736841, "low_lr": 2.2526315789473685e-06, "step": 1686 }, { "epoch": 4.433925049309664, "high_lr": 0.00011263157894736841, "low_lr": 2.2526315789473685e-06, "step": 1686 }, { "epoch": 4.43655489809336, "grad_norm": 1.5917601585388184, "learning_rate": 0.00011210526315789474, "loss": 1.2778, "step": 1687 }, { "epoch": 4.43655489809336, "high_lr": 0.00011210526315789474, "low_lr": 2.242105263157895e-06, "step": 1687 }, { "epoch": 4.43655489809336, "high_lr": 0.00011210526315789474, "low_lr": 2.242105263157895e-06, "step": 1687 }, { "epoch": 4.43655489809336, "high_lr": 0.00011210526315789474, "low_lr": 2.242105263157895e-06, "step": 1687 }, { "epoch": 4.43655489809336, "high_lr": 0.00011210526315789474, "low_lr": 2.242105263157895e-06, "step": 1687 }, { "epoch": 4.43655489809336, "high_lr": 0.00011210526315789474, "low_lr": 2.242105263157895e-06, "step": 1687 }, { "epoch": 4.43655489809336, "high_lr": 0.00011210526315789474, "low_lr": 2.242105263157895e-06, "step": 1687 }, { "epoch": 4.43655489809336, "high_lr": 0.00011210526315789474, "low_lr": 2.242105263157895e-06, "step": 1687 }, { "epoch": 4.43655489809336, "high_lr": 0.00011210526315789474, "low_lr": 2.242105263157895e-06, "step": 1687 }, { "epoch": 4.439184746877054, "grad_norm": 1.445594310760498, "learning_rate": 0.00011157894736842105, "loss": 1.2154, "step": 1688 }, { "epoch": 4.439184746877054, "high_lr": 0.00011157894736842105, "low_lr": 2.231578947368421e-06, "step": 1688 }, { "epoch": 4.439184746877054, "high_lr": 0.00011157894736842105, "low_lr": 2.231578947368421e-06, "step": 1688 }, { "epoch": 4.439184746877054, "high_lr": 0.00011157894736842105, "low_lr": 2.231578947368421e-06, "step": 1688 }, { "epoch": 4.439184746877054, "high_lr": 0.00011157894736842105, "low_lr": 2.231578947368421e-06, "step": 1688 }, { "epoch": 4.439184746877054, "high_lr": 0.00011157894736842105, "low_lr": 2.231578947368421e-06, "step": 1688 }, { "epoch": 4.439184746877054, "high_lr": 0.00011157894736842105, "low_lr": 2.231578947368421e-06, "step": 1688 }, { "epoch": 4.439184746877054, "high_lr": 0.00011157894736842105, "low_lr": 2.231578947368421e-06, "step": 1688 }, { "epoch": 4.439184746877054, "high_lr": 0.00011157894736842105, "low_lr": 2.231578947368421e-06, "step": 1688 }, { "epoch": 4.441814595660749, "grad_norm": 1.5913069248199463, "learning_rate": 0.00011105263157894738, "loss": 1.2254, "step": 1689 }, { "epoch": 4.441814595660749, "high_lr": 0.00011105263157894738, "low_lr": 2.221052631578948e-06, "step": 1689 }, { "epoch": 4.441814595660749, "high_lr": 0.00011105263157894738, "low_lr": 2.221052631578948e-06, "step": 1689 }, { "epoch": 4.441814595660749, "high_lr": 0.00011105263157894738, "low_lr": 2.221052631578948e-06, "step": 1689 }, { "epoch": 4.441814595660749, "high_lr": 0.00011105263157894738, "low_lr": 2.221052631578948e-06, "step": 1689 }, { "epoch": 4.441814595660749, "high_lr": 0.00011105263157894738, "low_lr": 2.221052631578948e-06, "step": 1689 }, { "epoch": 4.441814595660749, "high_lr": 0.00011105263157894738, "low_lr": 2.221052631578948e-06, "step": 1689 }, { "epoch": 4.441814595660749, "high_lr": 0.00011105263157894738, "low_lr": 2.221052631578948e-06, "step": 1689 }, { "epoch": 4.441814595660749, "high_lr": 0.00011105263157894738, "low_lr": 2.221052631578948e-06, "step": 1689 }, { "epoch": 4.444444444444445, "grad_norm": 1.5536595582962036, "learning_rate": 0.00011052631578947368, "loss": 1.244, "step": 1690 }, { "epoch": 4.444444444444445, "high_lr": 0.00011052631578947368, "low_lr": 2.2105263157894738e-06, "step": 1690 }, { "epoch": 4.444444444444445, "high_lr": 0.00011052631578947368, "low_lr": 2.2105263157894738e-06, "step": 1690 }, { "epoch": 4.444444444444445, "high_lr": 0.00011052631578947368, "low_lr": 2.2105263157894738e-06, "step": 1690 }, { "epoch": 4.444444444444445, "high_lr": 0.00011052631578947368, "low_lr": 2.2105263157894738e-06, "step": 1690 }, { "epoch": 4.444444444444445, "high_lr": 0.00011052631578947368, "low_lr": 2.2105263157894738e-06, "step": 1690 }, { "epoch": 4.444444444444445, "high_lr": 0.00011052631578947368, "low_lr": 2.2105263157894738e-06, "step": 1690 }, { "epoch": 4.444444444444445, "high_lr": 0.00011052631578947368, "low_lr": 2.2105263157894738e-06, "step": 1690 }, { "epoch": 4.444444444444445, "high_lr": 0.00011052631578947368, "low_lr": 2.2105263157894738e-06, "step": 1690 }, { "epoch": 4.447074293228139, "grad_norm": 1.6489031314849854, "learning_rate": 0.00011, "loss": 1.1711, "step": 1691 }, { "epoch": 4.447074293228139, "high_lr": 0.00011, "low_lr": 2.2e-06, "step": 1691 }, { "epoch": 4.447074293228139, "high_lr": 0.00011, "low_lr": 2.2e-06, "step": 1691 }, { "epoch": 4.447074293228139, "high_lr": 0.00011, "low_lr": 2.2e-06, "step": 1691 }, { "epoch": 4.447074293228139, "high_lr": 0.00011, "low_lr": 2.2e-06, "step": 1691 }, { "epoch": 4.447074293228139, "high_lr": 0.00011, "low_lr": 2.2e-06, "step": 1691 }, { "epoch": 4.447074293228139, "high_lr": 0.00011, "low_lr": 2.2e-06, "step": 1691 }, { "epoch": 4.447074293228139, "high_lr": 0.00011, "low_lr": 2.2e-06, "step": 1691 }, { "epoch": 4.447074293228139, "high_lr": 0.00011, "low_lr": 2.2e-06, "step": 1691 }, { "epoch": 4.449704142011834, "grad_norm": 1.5201053619384766, "learning_rate": 0.00010947368421052632, "loss": 1.2052, "step": 1692 }, { "epoch": 4.449704142011834, "high_lr": 0.00010947368421052632, "low_lr": 2.1894736842105264e-06, "step": 1692 }, { "epoch": 4.449704142011834, "high_lr": 0.00010947368421052632, "low_lr": 2.1894736842105264e-06, "step": 1692 }, { "epoch": 4.449704142011834, "high_lr": 0.00010947368421052632, "low_lr": 2.1894736842105264e-06, "step": 1692 }, { "epoch": 4.449704142011834, "high_lr": 0.00010947368421052632, "low_lr": 2.1894736842105264e-06, "step": 1692 }, { "epoch": 4.449704142011834, "high_lr": 0.00010947368421052632, "low_lr": 2.1894736842105264e-06, "step": 1692 }, { "epoch": 4.449704142011834, "high_lr": 0.00010947368421052632, "low_lr": 2.1894736842105264e-06, "step": 1692 }, { "epoch": 4.449704142011834, "high_lr": 0.00010947368421052632, "low_lr": 2.1894736842105264e-06, "step": 1692 }, { "epoch": 4.449704142011834, "high_lr": 0.00010947368421052632, "low_lr": 2.1894736842105264e-06, "step": 1692 }, { "epoch": 4.4523339907955295, "grad_norm": 1.6563962697982788, "learning_rate": 0.00010894736842105263, "loss": 1.2061, "step": 1693 }, { "epoch": 4.4523339907955295, "high_lr": 0.00010894736842105263, "low_lr": 2.1789473684210528e-06, "step": 1693 }, { "epoch": 4.4523339907955295, "high_lr": 0.00010894736842105263, "low_lr": 2.1789473684210528e-06, "step": 1693 }, { "epoch": 4.4523339907955295, "high_lr": 0.00010894736842105263, "low_lr": 2.1789473684210528e-06, "step": 1693 }, { "epoch": 4.4523339907955295, "high_lr": 0.00010894736842105263, "low_lr": 2.1789473684210528e-06, "step": 1693 }, { "epoch": 4.4523339907955295, "high_lr": 0.00010894736842105263, "low_lr": 2.1789473684210528e-06, "step": 1693 }, { "epoch": 4.4523339907955295, "high_lr": 0.00010894736842105263, "low_lr": 2.1789473684210528e-06, "step": 1693 }, { "epoch": 4.4523339907955295, "high_lr": 0.00010894736842105263, "low_lr": 2.1789473684210528e-06, "step": 1693 }, { "epoch": 4.4523339907955295, "high_lr": 0.00010894736842105263, "low_lr": 2.1789473684210528e-06, "step": 1693 }, { "epoch": 4.454963839579224, "grad_norm": 1.60569167137146, "learning_rate": 0.00010842105263157894, "loss": 1.197, "step": 1694 }, { "epoch": 4.454963839579224, "high_lr": 0.00010842105263157894, "low_lr": 2.168421052631579e-06, "step": 1694 }, { "epoch": 4.454963839579224, "high_lr": 0.00010842105263157894, "low_lr": 2.168421052631579e-06, "step": 1694 }, { "epoch": 4.454963839579224, "high_lr": 0.00010842105263157894, "low_lr": 2.168421052631579e-06, "step": 1694 }, { "epoch": 4.454963839579224, "high_lr": 0.00010842105263157894, "low_lr": 2.168421052631579e-06, "step": 1694 }, { "epoch": 4.454963839579224, "high_lr": 0.00010842105263157894, "low_lr": 2.168421052631579e-06, "step": 1694 }, { "epoch": 4.454963839579224, "high_lr": 0.00010842105263157894, "low_lr": 2.168421052631579e-06, "step": 1694 }, { "epoch": 4.454963839579224, "high_lr": 0.00010842105263157894, "low_lr": 2.168421052631579e-06, "step": 1694 }, { "epoch": 4.454963839579224, "high_lr": 0.00010842105263157894, "low_lr": 2.168421052631579e-06, "step": 1694 }, { "epoch": 4.457593688362919, "grad_norm": 1.5314149856567383, "learning_rate": 0.00010789473684210527, "loss": 1.1758, "step": 1695 }, { "epoch": 4.457593688362919, "high_lr": 0.00010789473684210527, "low_lr": 2.1578947368421054e-06, "step": 1695 }, { "epoch": 4.457593688362919, "high_lr": 0.00010789473684210527, "low_lr": 2.1578947368421054e-06, "step": 1695 }, { "epoch": 4.457593688362919, "high_lr": 0.00010789473684210527, "low_lr": 2.1578947368421054e-06, "step": 1695 }, { "epoch": 4.457593688362919, "high_lr": 0.00010789473684210527, "low_lr": 2.1578947368421054e-06, "step": 1695 }, { "epoch": 4.457593688362919, "high_lr": 0.00010789473684210527, "low_lr": 2.1578947368421054e-06, "step": 1695 }, { "epoch": 4.457593688362919, "high_lr": 0.00010789473684210527, "low_lr": 2.1578947368421054e-06, "step": 1695 }, { "epoch": 4.457593688362919, "high_lr": 0.00010789473684210527, "low_lr": 2.1578947368421054e-06, "step": 1695 }, { "epoch": 4.457593688362919, "high_lr": 0.00010789473684210527, "low_lr": 2.1578947368421054e-06, "step": 1695 }, { "epoch": 4.460223537146614, "grad_norm": 1.638298511505127, "learning_rate": 0.00010736842105263158, "loss": 1.2319, "step": 1696 }, { "epoch": 4.460223537146614, "high_lr": 0.00010736842105263158, "low_lr": 2.1473684210526317e-06, "step": 1696 }, { "epoch": 4.460223537146614, "high_lr": 0.00010736842105263158, "low_lr": 2.1473684210526317e-06, "step": 1696 }, { "epoch": 4.460223537146614, "high_lr": 0.00010736842105263158, "low_lr": 2.1473684210526317e-06, "step": 1696 }, { "epoch": 4.460223537146614, "high_lr": 0.00010736842105263158, "low_lr": 2.1473684210526317e-06, "step": 1696 }, { "epoch": 4.460223537146614, "high_lr": 0.00010736842105263158, "low_lr": 2.1473684210526317e-06, "step": 1696 }, { "epoch": 4.460223537146614, "high_lr": 0.00010736842105263158, "low_lr": 2.1473684210526317e-06, "step": 1696 }, { "epoch": 4.460223537146614, "high_lr": 0.00010736842105263158, "low_lr": 2.1473684210526317e-06, "step": 1696 }, { "epoch": 4.460223537146614, "high_lr": 0.00010736842105263158, "low_lr": 2.1473684210526317e-06, "step": 1696 }, { "epoch": 4.462853385930309, "grad_norm": 1.6212403774261475, "learning_rate": 0.0001068421052631579, "loss": 1.1893, "step": 1697 }, { "epoch": 4.462853385930309, "high_lr": 0.0001068421052631579, "low_lr": 2.136842105263158e-06, "step": 1697 }, { "epoch": 4.462853385930309, "high_lr": 0.0001068421052631579, "low_lr": 2.136842105263158e-06, "step": 1697 }, { "epoch": 4.462853385930309, "high_lr": 0.0001068421052631579, "low_lr": 2.136842105263158e-06, "step": 1697 }, { "epoch": 4.462853385930309, "high_lr": 0.0001068421052631579, "low_lr": 2.136842105263158e-06, "step": 1697 }, { "epoch": 4.462853385930309, "high_lr": 0.0001068421052631579, "low_lr": 2.136842105263158e-06, "step": 1697 }, { "epoch": 4.462853385930309, "high_lr": 0.0001068421052631579, "low_lr": 2.136842105263158e-06, "step": 1697 }, { "epoch": 4.462853385930309, "high_lr": 0.0001068421052631579, "low_lr": 2.136842105263158e-06, "step": 1697 }, { "epoch": 4.462853385930309, "high_lr": 0.0001068421052631579, "low_lr": 2.136842105263158e-06, "step": 1697 }, { "epoch": 4.465483234714004, "grad_norm": 1.431408166885376, "learning_rate": 0.00010631578947368421, "loss": 1.2464, "step": 1698 }, { "epoch": 4.465483234714004, "high_lr": 0.00010631578947368421, "low_lr": 2.1263157894736844e-06, "step": 1698 }, { "epoch": 4.465483234714004, "high_lr": 0.00010631578947368421, "low_lr": 2.1263157894736844e-06, "step": 1698 }, { "epoch": 4.465483234714004, "high_lr": 0.00010631578947368421, "low_lr": 2.1263157894736844e-06, "step": 1698 }, { "epoch": 4.465483234714004, "high_lr": 0.00010631578947368421, "low_lr": 2.1263157894736844e-06, "step": 1698 }, { "epoch": 4.465483234714004, "high_lr": 0.00010631578947368421, "low_lr": 2.1263157894736844e-06, "step": 1698 }, { "epoch": 4.465483234714004, "high_lr": 0.00010631578947368421, "low_lr": 2.1263157894736844e-06, "step": 1698 }, { "epoch": 4.465483234714004, "high_lr": 0.00010631578947368421, "low_lr": 2.1263157894736844e-06, "step": 1698 }, { "epoch": 4.465483234714004, "high_lr": 0.00010631578947368421, "low_lr": 2.1263157894736844e-06, "step": 1698 }, { "epoch": 4.468113083497699, "grad_norm": 1.4881477355957031, "learning_rate": 0.00010578947368421053, "loss": 1.2306, "step": 1699 }, { "epoch": 4.468113083497699, "high_lr": 0.00010578947368421053, "low_lr": 2.1157894736842107e-06, "step": 1699 }, { "epoch": 4.468113083497699, "high_lr": 0.00010578947368421053, "low_lr": 2.1157894736842107e-06, "step": 1699 }, { "epoch": 4.468113083497699, "high_lr": 0.00010578947368421053, "low_lr": 2.1157894736842107e-06, "step": 1699 }, { "epoch": 4.468113083497699, "high_lr": 0.00010578947368421053, "low_lr": 2.1157894736842107e-06, "step": 1699 }, { "epoch": 4.468113083497699, "high_lr": 0.00010578947368421053, "low_lr": 2.1157894736842107e-06, "step": 1699 }, { "epoch": 4.468113083497699, "high_lr": 0.00010578947368421053, "low_lr": 2.1157894736842107e-06, "step": 1699 }, { "epoch": 4.468113083497699, "high_lr": 0.00010578947368421053, "low_lr": 2.1157894736842107e-06, "step": 1699 }, { "epoch": 4.468113083497699, "high_lr": 0.00010578947368421053, "low_lr": 2.1157894736842107e-06, "step": 1699 }, { "epoch": 4.470742932281394, "grad_norm": 1.4463176727294922, "learning_rate": 0.00010526315789473683, "loss": 1.2367, "step": 1700 }, { "epoch": 4.470742932281394, "high_lr": 0.00010526315789473683, "low_lr": 2.105263157894737e-06, "step": 1700 }, { "epoch": 4.470742932281394, "high_lr": 0.00010526315789473683, "low_lr": 2.105263157894737e-06, "step": 1700 }, { "epoch": 4.470742932281394, "high_lr": 0.00010526315789473683, "low_lr": 2.105263157894737e-06, "step": 1700 }, { "epoch": 4.470742932281394, "high_lr": 0.00010526315789473683, "low_lr": 2.105263157894737e-06, "step": 1700 }, { "epoch": 4.470742932281394, "high_lr": 0.00010526315789473683, "low_lr": 2.105263157894737e-06, "step": 1700 }, { "epoch": 4.470742932281394, "high_lr": 0.00010526315789473683, "low_lr": 2.105263157894737e-06, "step": 1700 }, { "epoch": 4.470742932281394, "high_lr": 0.00010526315789473683, "low_lr": 2.105263157894737e-06, "step": 1700 }, { "epoch": 4.470742932281394, "high_lr": 0.00010526315789473683, "low_lr": 2.105263157894737e-06, "step": 1700 }, { "epoch": 4.4733727810650885, "grad_norm": 1.4434162378311157, "learning_rate": 0.00010473684210526316, "loss": 1.2676, "step": 1701 }, { "epoch": 4.4733727810650885, "high_lr": 0.00010473684210526316, "low_lr": 2.0947368421052634e-06, "step": 1701 }, { "epoch": 4.4733727810650885, "high_lr": 0.00010473684210526316, "low_lr": 2.0947368421052634e-06, "step": 1701 }, { "epoch": 4.4733727810650885, "high_lr": 0.00010473684210526316, "low_lr": 2.0947368421052634e-06, "step": 1701 }, { "epoch": 4.4733727810650885, "high_lr": 0.00010473684210526316, "low_lr": 2.0947368421052634e-06, "step": 1701 }, { "epoch": 4.4733727810650885, "high_lr": 0.00010473684210526316, "low_lr": 2.0947368421052634e-06, "step": 1701 }, { "epoch": 4.4733727810650885, "high_lr": 0.00010473684210526316, "low_lr": 2.0947368421052634e-06, "step": 1701 }, { "epoch": 4.4733727810650885, "high_lr": 0.00010473684210526316, "low_lr": 2.0947368421052634e-06, "step": 1701 }, { "epoch": 4.4733727810650885, "high_lr": 0.00010473684210526316, "low_lr": 2.0947368421052634e-06, "step": 1701 }, { "epoch": 4.476002629848784, "grad_norm": 1.4597495794296265, "learning_rate": 0.00010421052631578947, "loss": 1.208, "step": 1702 }, { "epoch": 4.476002629848784, "high_lr": 0.00010421052631578947, "low_lr": 2.0842105263157897e-06, "step": 1702 }, { "epoch": 4.476002629848784, "high_lr": 0.00010421052631578947, "low_lr": 2.0842105263157897e-06, "step": 1702 }, { "epoch": 4.476002629848784, "high_lr": 0.00010421052631578947, "low_lr": 2.0842105263157897e-06, "step": 1702 }, { "epoch": 4.476002629848784, "high_lr": 0.00010421052631578947, "low_lr": 2.0842105263157897e-06, "step": 1702 }, { "epoch": 4.476002629848784, "high_lr": 0.00010421052631578947, "low_lr": 2.0842105263157897e-06, "step": 1702 }, { "epoch": 4.476002629848784, "high_lr": 0.00010421052631578947, "low_lr": 2.0842105263157897e-06, "step": 1702 }, { "epoch": 4.476002629848784, "high_lr": 0.00010421052631578947, "low_lr": 2.0842105263157897e-06, "step": 1702 }, { "epoch": 4.476002629848784, "high_lr": 0.00010421052631578947, "low_lr": 2.0842105263157897e-06, "step": 1702 }, { "epoch": 4.478632478632479, "grad_norm": 1.516308069229126, "learning_rate": 0.0001036842105263158, "loss": 1.2793, "step": 1703 }, { "epoch": 4.478632478632479, "high_lr": 0.0001036842105263158, "low_lr": 2.073684210526316e-06, "step": 1703 }, { "epoch": 4.478632478632479, "high_lr": 0.0001036842105263158, "low_lr": 2.073684210526316e-06, "step": 1703 }, { "epoch": 4.478632478632479, "high_lr": 0.0001036842105263158, "low_lr": 2.073684210526316e-06, "step": 1703 }, { "epoch": 4.478632478632479, "high_lr": 0.0001036842105263158, "low_lr": 2.073684210526316e-06, "step": 1703 }, { "epoch": 4.478632478632479, "high_lr": 0.0001036842105263158, "low_lr": 2.073684210526316e-06, "step": 1703 }, { "epoch": 4.478632478632479, "high_lr": 0.0001036842105263158, "low_lr": 2.073684210526316e-06, "step": 1703 }, { "epoch": 4.478632478632479, "high_lr": 0.0001036842105263158, "low_lr": 2.073684210526316e-06, "step": 1703 }, { "epoch": 4.478632478632479, "high_lr": 0.0001036842105263158, "low_lr": 2.073684210526316e-06, "step": 1703 }, { "epoch": 4.481262327416173, "grad_norm": 1.5422941446304321, "learning_rate": 0.0001031578947368421, "loss": 1.1922, "step": 1704 }, { "epoch": 4.481262327416173, "high_lr": 0.0001031578947368421, "low_lr": 2.0631578947368424e-06, "step": 1704 }, { "epoch": 4.481262327416173, "high_lr": 0.0001031578947368421, "low_lr": 2.0631578947368424e-06, "step": 1704 }, { "epoch": 4.481262327416173, "high_lr": 0.0001031578947368421, "low_lr": 2.0631578947368424e-06, "step": 1704 }, { "epoch": 4.481262327416173, "high_lr": 0.0001031578947368421, "low_lr": 2.0631578947368424e-06, "step": 1704 }, { "epoch": 4.481262327416173, "high_lr": 0.0001031578947368421, "low_lr": 2.0631578947368424e-06, "step": 1704 }, { "epoch": 4.481262327416173, "high_lr": 0.0001031578947368421, "low_lr": 2.0631578947368424e-06, "step": 1704 }, { "epoch": 4.481262327416173, "high_lr": 0.0001031578947368421, "low_lr": 2.0631578947368424e-06, "step": 1704 }, { "epoch": 4.481262327416173, "high_lr": 0.0001031578947368421, "low_lr": 2.0631578947368424e-06, "step": 1704 }, { "epoch": 4.483892176199869, "grad_norm": 1.4864860773086548, "learning_rate": 0.00010263157894736843, "loss": 1.2205, "step": 1705 }, { "epoch": 4.483892176199869, "high_lr": 0.00010263157894736843, "low_lr": 2.0526315789473687e-06, "step": 1705 }, { "epoch": 4.483892176199869, "high_lr": 0.00010263157894736843, "low_lr": 2.0526315789473687e-06, "step": 1705 }, { "epoch": 4.483892176199869, "high_lr": 0.00010263157894736843, "low_lr": 2.0526315789473687e-06, "step": 1705 }, { "epoch": 4.483892176199869, "high_lr": 0.00010263157894736843, "low_lr": 2.0526315789473687e-06, "step": 1705 }, { "epoch": 4.483892176199869, "high_lr": 0.00010263157894736843, "low_lr": 2.0526315789473687e-06, "step": 1705 }, { "epoch": 4.483892176199869, "high_lr": 0.00010263157894736843, "low_lr": 2.0526315789473687e-06, "step": 1705 }, { "epoch": 4.483892176199869, "high_lr": 0.00010263157894736843, "low_lr": 2.0526315789473687e-06, "step": 1705 }, { "epoch": 4.483892176199869, "high_lr": 0.00010263157894736843, "low_lr": 2.0526315789473687e-06, "step": 1705 }, { "epoch": 4.486522024983564, "grad_norm": 1.792134165763855, "learning_rate": 0.00010210526315789474, "loss": 1.2321, "step": 1706 }, { "epoch": 4.486522024983564, "high_lr": 0.00010210526315789474, "low_lr": 2.042105263157895e-06, "step": 1706 }, { "epoch": 4.486522024983564, "high_lr": 0.00010210526315789474, "low_lr": 2.042105263157895e-06, "step": 1706 }, { "epoch": 4.486522024983564, "high_lr": 0.00010210526315789474, "low_lr": 2.042105263157895e-06, "step": 1706 }, { "epoch": 4.486522024983564, "high_lr": 0.00010210526315789474, "low_lr": 2.042105263157895e-06, "step": 1706 }, { "epoch": 4.486522024983564, "high_lr": 0.00010210526315789474, "low_lr": 2.042105263157895e-06, "step": 1706 }, { "epoch": 4.486522024983564, "high_lr": 0.00010210526315789474, "low_lr": 2.042105263157895e-06, "step": 1706 }, { "epoch": 4.486522024983564, "high_lr": 0.00010210526315789474, "low_lr": 2.042105263157895e-06, "step": 1706 }, { "epoch": 4.486522024983564, "high_lr": 0.00010210526315789474, "low_lr": 2.042105263157895e-06, "step": 1706 }, { "epoch": 4.489151873767258, "grad_norm": 1.527948021888733, "learning_rate": 0.00010157894736842105, "loss": 1.2312, "step": 1707 }, { "epoch": 4.489151873767258, "high_lr": 0.00010157894736842105, "low_lr": 2.031578947368421e-06, "step": 1707 }, { "epoch": 4.489151873767258, "high_lr": 0.00010157894736842105, "low_lr": 2.031578947368421e-06, "step": 1707 }, { "epoch": 4.489151873767258, "high_lr": 0.00010157894736842105, "low_lr": 2.031578947368421e-06, "step": 1707 }, { "epoch": 4.489151873767258, "high_lr": 0.00010157894736842105, "low_lr": 2.031578947368421e-06, "step": 1707 }, { "epoch": 4.489151873767258, "high_lr": 0.00010157894736842105, "low_lr": 2.031578947368421e-06, "step": 1707 }, { "epoch": 4.489151873767258, "high_lr": 0.00010157894736842105, "low_lr": 2.031578947368421e-06, "step": 1707 }, { "epoch": 4.489151873767258, "high_lr": 0.00010157894736842105, "low_lr": 2.031578947368421e-06, "step": 1707 }, { "epoch": 4.489151873767258, "high_lr": 0.00010157894736842105, "low_lr": 2.031578947368421e-06, "step": 1707 }, { "epoch": 4.491781722550954, "grad_norm": 1.4565075635910034, "learning_rate": 0.00010105263157894737, "loss": 1.2345, "step": 1708 }, { "epoch": 4.491781722550954, "high_lr": 0.00010105263157894737, "low_lr": 2.0210526315789477e-06, "step": 1708 }, { "epoch": 4.491781722550954, "high_lr": 0.00010105263157894737, "low_lr": 2.0210526315789477e-06, "step": 1708 }, { "epoch": 4.491781722550954, "high_lr": 0.00010105263157894737, "low_lr": 2.0210526315789477e-06, "step": 1708 }, { "epoch": 4.491781722550954, "high_lr": 0.00010105263157894737, "low_lr": 2.0210526315789477e-06, "step": 1708 }, { "epoch": 4.491781722550954, "high_lr": 0.00010105263157894737, "low_lr": 2.0210526315789477e-06, "step": 1708 }, { "epoch": 4.491781722550954, "high_lr": 0.00010105263157894737, "low_lr": 2.0210526315789477e-06, "step": 1708 }, { "epoch": 4.491781722550954, "high_lr": 0.00010105263157894737, "low_lr": 2.0210526315789477e-06, "step": 1708 }, { "epoch": 4.491781722550954, "high_lr": 0.00010105263157894737, "low_lr": 2.0210526315789477e-06, "step": 1708 }, { "epoch": 4.494411571334648, "grad_norm": 1.5402123928070068, "learning_rate": 0.00010052631578947369, "loss": 1.1942, "step": 1709 }, { "epoch": 4.494411571334648, "high_lr": 0.00010052631578947369, "low_lr": 2.010526315789474e-06, "step": 1709 }, { "epoch": 4.494411571334648, "high_lr": 0.00010052631578947369, "low_lr": 2.010526315789474e-06, "step": 1709 }, { "epoch": 4.494411571334648, "high_lr": 0.00010052631578947369, "low_lr": 2.010526315789474e-06, "step": 1709 }, { "epoch": 4.494411571334648, "high_lr": 0.00010052631578947369, "low_lr": 2.010526315789474e-06, "step": 1709 }, { "epoch": 4.494411571334648, "high_lr": 0.00010052631578947369, "low_lr": 2.010526315789474e-06, "step": 1709 }, { "epoch": 4.494411571334648, "high_lr": 0.00010052631578947369, "low_lr": 2.010526315789474e-06, "step": 1709 }, { "epoch": 4.494411571334648, "high_lr": 0.00010052631578947369, "low_lr": 2.010526315789474e-06, "step": 1709 }, { "epoch": 4.494411571334648, "high_lr": 0.00010052631578947369, "low_lr": 2.010526315789474e-06, "step": 1709 }, { "epoch": 4.497041420118343, "grad_norm": 1.6114143133163452, "learning_rate": 0.0001, "loss": 1.2542, "step": 1710 }, { "epoch": 4.497041420118343, "high_lr": 0.0001, "low_lr": 2.0000000000000003e-06, "step": 1710 }, { "epoch": 4.497041420118343, "high_lr": 0.0001, "low_lr": 2.0000000000000003e-06, "step": 1710 }, { "epoch": 4.497041420118343, "high_lr": 0.0001, "low_lr": 2.0000000000000003e-06, "step": 1710 }, { "epoch": 4.497041420118343, "high_lr": 0.0001, "low_lr": 2.0000000000000003e-06, "step": 1710 }, { "epoch": 4.497041420118343, "high_lr": 0.0001, "low_lr": 2.0000000000000003e-06, "step": 1710 }, { "epoch": 4.497041420118343, "high_lr": 0.0001, "low_lr": 2.0000000000000003e-06, "step": 1710 }, { "epoch": 4.497041420118343, "high_lr": 0.0001, "low_lr": 2.0000000000000003e-06, "step": 1710 }, { "epoch": 4.497041420118343, "high_lr": 0.0001, "low_lr": 2.0000000000000003e-06, "step": 1710 }, { "epoch": 4.499671268902038, "grad_norm": 1.480592131614685, "learning_rate": 9.947368421052632e-05, "loss": 1.2448, "step": 1711 }, { "epoch": 4.499671268902038, "high_lr": 9.947368421052632e-05, "low_lr": 1.9894736842105262e-06, "step": 1711 }, { "epoch": 4.499671268902038, "high_lr": 9.947368421052632e-05, "low_lr": 1.9894736842105262e-06, "step": 1711 }, { "epoch": 4.499671268902038, "high_lr": 9.947368421052632e-05, "low_lr": 1.9894736842105262e-06, "step": 1711 }, { "epoch": 4.499671268902038, "high_lr": 9.947368421052632e-05, "low_lr": 1.9894736842105262e-06, "step": 1711 }, { "epoch": 4.499671268902038, "high_lr": 9.947368421052632e-05, "low_lr": 1.9894736842105262e-06, "step": 1711 }, { "epoch": 4.499671268902038, "high_lr": 9.947368421052632e-05, "low_lr": 1.9894736842105262e-06, "step": 1711 }, { "epoch": 4.499671268902038, "high_lr": 9.947368421052632e-05, "low_lr": 1.9894736842105262e-06, "step": 1711 }, { "epoch": 4.499671268902038, "high_lr": 9.947368421052632e-05, "low_lr": 1.9894736842105262e-06, "step": 1711 }, { "epoch": 4.502301117685733, "grad_norm": 1.4475226402282715, "learning_rate": 9.894736842105263e-05, "loss": 1.2349, "step": 1712 }, { "epoch": 4.502301117685733, "high_lr": 9.894736842105263e-05, "low_lr": 1.978947368421053e-06, "step": 1712 }, { "epoch": 4.502301117685733, "high_lr": 9.894736842105263e-05, "low_lr": 1.978947368421053e-06, "step": 1712 }, { "epoch": 4.502301117685733, "high_lr": 9.894736842105263e-05, "low_lr": 1.978947368421053e-06, "step": 1712 }, { "epoch": 4.502301117685733, "high_lr": 9.894736842105263e-05, "low_lr": 1.978947368421053e-06, "step": 1712 }, { "epoch": 4.502301117685733, "high_lr": 9.894736842105263e-05, "low_lr": 1.978947368421053e-06, "step": 1712 }, { "epoch": 4.502301117685733, "high_lr": 9.894736842105263e-05, "low_lr": 1.978947368421053e-06, "step": 1712 }, { "epoch": 4.502301117685733, "high_lr": 9.894736842105263e-05, "low_lr": 1.978947368421053e-06, "step": 1712 }, { "epoch": 4.502301117685733, "high_lr": 9.894736842105263e-05, "low_lr": 1.978947368421053e-06, "step": 1712 }, { "epoch": 4.504930966469428, "grad_norm": 1.7226275205612183, "learning_rate": 9.842105263157896e-05, "loss": 1.1949, "step": 1713 }, { "epoch": 4.504930966469428, "high_lr": 9.842105263157896e-05, "low_lr": 1.9684210526315793e-06, "step": 1713 }, { "epoch": 4.504930966469428, "high_lr": 9.842105263157896e-05, "low_lr": 1.9684210526315793e-06, "step": 1713 }, { "epoch": 4.504930966469428, "high_lr": 9.842105263157896e-05, "low_lr": 1.9684210526315793e-06, "step": 1713 }, { "epoch": 4.504930966469428, "high_lr": 9.842105263157896e-05, "low_lr": 1.9684210526315793e-06, "step": 1713 }, { "epoch": 4.504930966469428, "high_lr": 9.842105263157896e-05, "low_lr": 1.9684210526315793e-06, "step": 1713 }, { "epoch": 4.504930966469428, "high_lr": 9.842105263157896e-05, "low_lr": 1.9684210526315793e-06, "step": 1713 }, { "epoch": 4.504930966469428, "high_lr": 9.842105263157896e-05, "low_lr": 1.9684210526315793e-06, "step": 1713 }, { "epoch": 4.504930966469428, "high_lr": 9.842105263157896e-05, "low_lr": 1.9684210526315793e-06, "step": 1713 }, { "epoch": 4.507560815253123, "grad_norm": 1.5617784261703491, "learning_rate": 9.789473684210526e-05, "loss": 1.2295, "step": 1714 }, { "epoch": 4.507560815253123, "high_lr": 9.789473684210526e-05, "low_lr": 1.9578947368421052e-06, "step": 1714 }, { "epoch": 4.507560815253123, "high_lr": 9.789473684210526e-05, "low_lr": 1.9578947368421052e-06, "step": 1714 }, { "epoch": 4.507560815253123, "high_lr": 9.789473684210526e-05, "low_lr": 1.9578947368421052e-06, "step": 1714 }, { "epoch": 4.507560815253123, "high_lr": 9.789473684210526e-05, "low_lr": 1.9578947368421052e-06, "step": 1714 }, { "epoch": 4.507560815253123, "high_lr": 9.789473684210526e-05, "low_lr": 1.9578947368421052e-06, "step": 1714 }, { "epoch": 4.507560815253123, "high_lr": 9.789473684210526e-05, "low_lr": 1.9578947368421052e-06, "step": 1714 }, { "epoch": 4.507560815253123, "high_lr": 9.789473684210526e-05, "low_lr": 1.9578947368421052e-06, "step": 1714 }, { "epoch": 4.507560815253123, "high_lr": 9.789473684210526e-05, "low_lr": 1.9578947368421052e-06, "step": 1714 }, { "epoch": 4.510190664036818, "grad_norm": 1.5321367979049683, "learning_rate": 9.736842105263158e-05, "loss": 1.2088, "step": 1715 }, { "epoch": 4.510190664036818, "high_lr": 9.736842105263158e-05, "low_lr": 1.9473684210526315e-06, "step": 1715 }, { "epoch": 4.510190664036818, "high_lr": 9.736842105263158e-05, "low_lr": 1.9473684210526315e-06, "step": 1715 }, { "epoch": 4.510190664036818, "high_lr": 9.736842105263158e-05, "low_lr": 1.9473684210526315e-06, "step": 1715 }, { "epoch": 4.510190664036818, "high_lr": 9.736842105263158e-05, "low_lr": 1.9473684210526315e-06, "step": 1715 }, { "epoch": 4.510190664036818, "high_lr": 9.736842105263158e-05, "low_lr": 1.9473684210526315e-06, "step": 1715 }, { "epoch": 4.510190664036818, "high_lr": 9.736842105263158e-05, "low_lr": 1.9473684210526315e-06, "step": 1715 }, { "epoch": 4.510190664036818, "high_lr": 9.736842105263158e-05, "low_lr": 1.9473684210526315e-06, "step": 1715 }, { "epoch": 4.510190664036818, "high_lr": 9.736842105263158e-05, "low_lr": 1.9473684210526315e-06, "step": 1715 }, { "epoch": 4.512820512820513, "grad_norm": 1.6108227968215942, "learning_rate": 9.68421052631579e-05, "loss": 1.2579, "step": 1716 }, { "epoch": 4.512820512820513, "high_lr": 9.68421052631579e-05, "low_lr": 1.936842105263158e-06, "step": 1716 }, { "epoch": 4.512820512820513, "high_lr": 9.68421052631579e-05, "low_lr": 1.936842105263158e-06, "step": 1716 }, { "epoch": 4.512820512820513, "high_lr": 9.68421052631579e-05, "low_lr": 1.936842105263158e-06, "step": 1716 }, { "epoch": 4.512820512820513, "high_lr": 9.68421052631579e-05, "low_lr": 1.936842105263158e-06, "step": 1716 }, { "epoch": 4.512820512820513, "high_lr": 9.68421052631579e-05, "low_lr": 1.936842105263158e-06, "step": 1716 }, { "epoch": 4.512820512820513, "high_lr": 9.68421052631579e-05, "low_lr": 1.936842105263158e-06, "step": 1716 }, { "epoch": 4.512820512820513, "high_lr": 9.68421052631579e-05, "low_lr": 1.936842105263158e-06, "step": 1716 }, { "epoch": 4.512820512820513, "high_lr": 9.68421052631579e-05, "low_lr": 1.936842105263158e-06, "step": 1716 }, { "epoch": 4.515450361604207, "grad_norm": 1.5909062623977661, "learning_rate": 9.631578947368422e-05, "loss": 1.1905, "step": 1717 }, { "epoch": 4.515450361604207, "high_lr": 9.631578947368422e-05, "low_lr": 1.9263157894736846e-06, "step": 1717 }, { "epoch": 4.515450361604207, "high_lr": 9.631578947368422e-05, "low_lr": 1.9263157894736846e-06, "step": 1717 }, { "epoch": 4.515450361604207, "high_lr": 9.631578947368422e-05, "low_lr": 1.9263157894736846e-06, "step": 1717 }, { "epoch": 4.515450361604207, "high_lr": 9.631578947368422e-05, "low_lr": 1.9263157894736846e-06, "step": 1717 }, { "epoch": 4.515450361604207, "high_lr": 9.631578947368422e-05, "low_lr": 1.9263157894736846e-06, "step": 1717 }, { "epoch": 4.515450361604207, "high_lr": 9.631578947368422e-05, "low_lr": 1.9263157894736846e-06, "step": 1717 }, { "epoch": 4.515450361604207, "high_lr": 9.631578947368422e-05, "low_lr": 1.9263157894736846e-06, "step": 1717 }, { "epoch": 4.515450361604207, "high_lr": 9.631578947368422e-05, "low_lr": 1.9263157894736846e-06, "step": 1717 }, { "epoch": 4.518080210387903, "grad_norm": 2.0398621559143066, "learning_rate": 9.578947368421052e-05, "loss": 1.1962, "step": 1718 }, { "epoch": 4.518080210387903, "high_lr": 9.578947368421052e-05, "low_lr": 1.9157894736842105e-06, "step": 1718 }, { "epoch": 4.518080210387903, "high_lr": 9.578947368421052e-05, "low_lr": 1.9157894736842105e-06, "step": 1718 }, { "epoch": 4.518080210387903, "high_lr": 9.578947368421052e-05, "low_lr": 1.9157894736842105e-06, "step": 1718 }, { "epoch": 4.518080210387903, "high_lr": 9.578947368421052e-05, "low_lr": 1.9157894736842105e-06, "step": 1718 }, { "epoch": 4.518080210387903, "high_lr": 9.578947368421052e-05, "low_lr": 1.9157894736842105e-06, "step": 1718 }, { "epoch": 4.518080210387903, "high_lr": 9.578947368421052e-05, "low_lr": 1.9157894736842105e-06, "step": 1718 }, { "epoch": 4.518080210387903, "high_lr": 9.578947368421052e-05, "low_lr": 1.9157894736842105e-06, "step": 1718 }, { "epoch": 4.518080210387903, "high_lr": 9.578947368421052e-05, "low_lr": 1.9157894736842105e-06, "step": 1718 }, { "epoch": 4.520710059171598, "grad_norm": 1.517663836479187, "learning_rate": 9.526315789473685e-05, "loss": 1.2209, "step": 1719 }, { "epoch": 4.520710059171598, "high_lr": 9.526315789473685e-05, "low_lr": 1.905263157894737e-06, "step": 1719 }, { "epoch": 4.520710059171598, "high_lr": 9.526315789473685e-05, "low_lr": 1.905263157894737e-06, "step": 1719 }, { "epoch": 4.520710059171598, "high_lr": 9.526315789473685e-05, "low_lr": 1.905263157894737e-06, "step": 1719 }, { "epoch": 4.520710059171598, "high_lr": 9.526315789473685e-05, "low_lr": 1.905263157894737e-06, "step": 1719 }, { "epoch": 4.520710059171598, "high_lr": 9.526315789473685e-05, "low_lr": 1.905263157894737e-06, "step": 1719 }, { "epoch": 4.520710059171598, "high_lr": 9.526315789473685e-05, "low_lr": 1.905263157894737e-06, "step": 1719 }, { "epoch": 4.520710059171598, "high_lr": 9.526315789473685e-05, "low_lr": 1.905263157894737e-06, "step": 1719 }, { "epoch": 4.520710059171598, "high_lr": 9.526315789473685e-05, "low_lr": 1.905263157894737e-06, "step": 1719 }, { "epoch": 4.523339907955292, "grad_norm": 1.4186933040618896, "learning_rate": 9.473684210526316e-05, "loss": 1.2476, "step": 1720 }, { "epoch": 4.523339907955292, "high_lr": 9.473684210526316e-05, "low_lr": 1.8947368421052634e-06, "step": 1720 }, { "epoch": 4.523339907955292, "high_lr": 9.473684210526316e-05, "low_lr": 1.8947368421052634e-06, "step": 1720 }, { "epoch": 4.523339907955292, "high_lr": 9.473684210526316e-05, "low_lr": 1.8947368421052634e-06, "step": 1720 }, { "epoch": 4.523339907955292, "high_lr": 9.473684210526316e-05, "low_lr": 1.8947368421052634e-06, "step": 1720 }, { "epoch": 4.523339907955292, "high_lr": 9.473684210526316e-05, "low_lr": 1.8947368421052634e-06, "step": 1720 }, { "epoch": 4.523339907955292, "high_lr": 9.473684210526316e-05, "low_lr": 1.8947368421052634e-06, "step": 1720 }, { "epoch": 4.523339907955292, "high_lr": 9.473684210526316e-05, "low_lr": 1.8947368421052634e-06, "step": 1720 }, { "epoch": 4.523339907955292, "high_lr": 9.473684210526316e-05, "low_lr": 1.8947368421052634e-06, "step": 1720 }, { "epoch": 4.525969756738988, "grad_norm": 1.722344160079956, "learning_rate": 9.421052631578947e-05, "loss": 1.2545, "step": 1721 }, { "epoch": 4.525969756738988, "high_lr": 9.421052631578947e-05, "low_lr": 1.8842105263157895e-06, "step": 1721 }, { "epoch": 4.525969756738988, "high_lr": 9.421052631578947e-05, "low_lr": 1.8842105263157895e-06, "step": 1721 }, { "epoch": 4.525969756738988, "high_lr": 9.421052631578947e-05, "low_lr": 1.8842105263157895e-06, "step": 1721 }, { "epoch": 4.525969756738988, "high_lr": 9.421052631578947e-05, "low_lr": 1.8842105263157895e-06, "step": 1721 }, { "epoch": 4.525969756738988, "high_lr": 9.421052631578947e-05, "low_lr": 1.8842105263157895e-06, "step": 1721 }, { "epoch": 4.525969756738988, "high_lr": 9.421052631578947e-05, "low_lr": 1.8842105263157895e-06, "step": 1721 }, { "epoch": 4.525969756738988, "high_lr": 9.421052631578947e-05, "low_lr": 1.8842105263157895e-06, "step": 1721 }, { "epoch": 4.525969756738988, "high_lr": 9.421052631578947e-05, "low_lr": 1.8842105263157895e-06, "step": 1721 }, { "epoch": 4.5285996055226825, "grad_norm": 1.625738263130188, "learning_rate": 9.368421052631579e-05, "loss": 1.2292, "step": 1722 }, { "epoch": 4.5285996055226825, "high_lr": 9.368421052631579e-05, "low_lr": 1.8736842105263158e-06, "step": 1722 }, { "epoch": 4.5285996055226825, "high_lr": 9.368421052631579e-05, "low_lr": 1.8736842105263158e-06, "step": 1722 }, { "epoch": 4.5285996055226825, "high_lr": 9.368421052631579e-05, "low_lr": 1.8736842105263158e-06, "step": 1722 }, { "epoch": 4.5285996055226825, "high_lr": 9.368421052631579e-05, "low_lr": 1.8736842105263158e-06, "step": 1722 }, { "epoch": 4.5285996055226825, "high_lr": 9.368421052631579e-05, "low_lr": 1.8736842105263158e-06, "step": 1722 }, { "epoch": 4.5285996055226825, "high_lr": 9.368421052631579e-05, "low_lr": 1.8736842105263158e-06, "step": 1722 }, { "epoch": 4.5285996055226825, "high_lr": 9.368421052631579e-05, "low_lr": 1.8736842105263158e-06, "step": 1722 }, { "epoch": 4.5285996055226825, "high_lr": 9.368421052631579e-05, "low_lr": 1.8736842105263158e-06, "step": 1722 }, { "epoch": 4.531229454306377, "grad_norm": 1.4990150928497314, "learning_rate": 9.315789473684211e-05, "loss": 1.1858, "step": 1723 }, { "epoch": 4.531229454306377, "high_lr": 9.315789473684211e-05, "low_lr": 1.8631578947368424e-06, "step": 1723 }, { "epoch": 4.531229454306377, "high_lr": 9.315789473684211e-05, "low_lr": 1.8631578947368424e-06, "step": 1723 }, { "epoch": 4.531229454306377, "high_lr": 9.315789473684211e-05, "low_lr": 1.8631578947368424e-06, "step": 1723 }, { "epoch": 4.531229454306377, "high_lr": 9.315789473684211e-05, "low_lr": 1.8631578947368424e-06, "step": 1723 }, { "epoch": 4.531229454306377, "high_lr": 9.315789473684211e-05, "low_lr": 1.8631578947368424e-06, "step": 1723 }, { "epoch": 4.531229454306377, "high_lr": 9.315789473684211e-05, "low_lr": 1.8631578947368424e-06, "step": 1723 }, { "epoch": 4.531229454306377, "high_lr": 9.315789473684211e-05, "low_lr": 1.8631578947368424e-06, "step": 1723 }, { "epoch": 4.531229454306377, "high_lr": 9.315789473684211e-05, "low_lr": 1.8631578947368424e-06, "step": 1723 }, { "epoch": 4.533859303090073, "grad_norm": 1.6220035552978516, "learning_rate": 9.263157894736843e-05, "loss": 1.2138, "step": 1724 }, { "epoch": 4.533859303090073, "high_lr": 9.263157894736843e-05, "low_lr": 1.8526315789473687e-06, "step": 1724 }, { "epoch": 4.533859303090073, "high_lr": 9.263157894736843e-05, "low_lr": 1.8526315789473687e-06, "step": 1724 }, { "epoch": 4.533859303090073, "high_lr": 9.263157894736843e-05, "low_lr": 1.8526315789473687e-06, "step": 1724 }, { "epoch": 4.533859303090073, "high_lr": 9.263157894736843e-05, "low_lr": 1.8526315789473687e-06, "step": 1724 }, { "epoch": 4.533859303090073, "high_lr": 9.263157894736843e-05, "low_lr": 1.8526315789473687e-06, "step": 1724 }, { "epoch": 4.533859303090073, "high_lr": 9.263157894736843e-05, "low_lr": 1.8526315789473687e-06, "step": 1724 }, { "epoch": 4.533859303090073, "high_lr": 9.263157894736843e-05, "low_lr": 1.8526315789473687e-06, "step": 1724 }, { "epoch": 4.533859303090073, "high_lr": 9.263157894736843e-05, "low_lr": 1.8526315789473687e-06, "step": 1724 }, { "epoch": 4.536489151873767, "grad_norm": 1.5870214700698853, "learning_rate": 9.210526315789474e-05, "loss": 1.235, "step": 1725 }, { "epoch": 4.536489151873767, "high_lr": 9.210526315789474e-05, "low_lr": 1.8421052631578948e-06, "step": 1725 }, { "epoch": 4.536489151873767, "high_lr": 9.210526315789474e-05, "low_lr": 1.8421052631578948e-06, "step": 1725 }, { "epoch": 4.536489151873767, "high_lr": 9.210526315789474e-05, "low_lr": 1.8421052631578948e-06, "step": 1725 }, { "epoch": 4.536489151873767, "high_lr": 9.210526315789474e-05, "low_lr": 1.8421052631578948e-06, "step": 1725 }, { "epoch": 4.536489151873767, "high_lr": 9.210526315789474e-05, "low_lr": 1.8421052631578948e-06, "step": 1725 }, { "epoch": 4.536489151873767, "high_lr": 9.210526315789474e-05, "low_lr": 1.8421052631578948e-06, "step": 1725 }, { "epoch": 4.536489151873767, "high_lr": 9.210526315789474e-05, "low_lr": 1.8421052631578948e-06, "step": 1725 }, { "epoch": 4.536489151873767, "high_lr": 9.210526315789474e-05, "low_lr": 1.8421052631578948e-06, "step": 1725 }, { "epoch": 4.539119000657462, "grad_norm": 1.57758629322052, "learning_rate": 9.157894736842105e-05, "loss": 1.2318, "step": 1726 }, { "epoch": 4.539119000657462, "high_lr": 9.157894736842105e-05, "low_lr": 1.8315789473684211e-06, "step": 1726 }, { "epoch": 4.539119000657462, "high_lr": 9.157894736842105e-05, "low_lr": 1.8315789473684211e-06, "step": 1726 }, { "epoch": 4.539119000657462, "high_lr": 9.157894736842105e-05, "low_lr": 1.8315789473684211e-06, "step": 1726 }, { "epoch": 4.539119000657462, "high_lr": 9.157894736842105e-05, "low_lr": 1.8315789473684211e-06, "step": 1726 }, { "epoch": 4.539119000657462, "high_lr": 9.157894736842105e-05, "low_lr": 1.8315789473684211e-06, "step": 1726 }, { "epoch": 4.539119000657462, "high_lr": 9.157894736842105e-05, "low_lr": 1.8315789473684211e-06, "step": 1726 }, { "epoch": 4.539119000657462, "high_lr": 9.157894736842105e-05, "low_lr": 1.8315789473684211e-06, "step": 1726 }, { "epoch": 4.539119000657462, "high_lr": 9.157894736842105e-05, "low_lr": 1.8315789473684211e-06, "step": 1726 }, { "epoch": 4.5417488494411575, "grad_norm": 1.504319667816162, "learning_rate": 9.105263157894738e-05, "loss": 1.1902, "step": 1727 }, { "epoch": 4.5417488494411575, "high_lr": 9.105263157894738e-05, "low_lr": 1.8210526315789475e-06, "step": 1727 }, { "epoch": 4.5417488494411575, "high_lr": 9.105263157894738e-05, "low_lr": 1.8210526315789475e-06, "step": 1727 }, { "epoch": 4.5417488494411575, "high_lr": 9.105263157894738e-05, "low_lr": 1.8210526315789475e-06, "step": 1727 }, { "epoch": 4.5417488494411575, "high_lr": 9.105263157894738e-05, "low_lr": 1.8210526315789475e-06, "step": 1727 }, { "epoch": 4.5417488494411575, "high_lr": 9.105263157894738e-05, "low_lr": 1.8210526315789475e-06, "step": 1727 }, { "epoch": 4.5417488494411575, "high_lr": 9.105263157894738e-05, "low_lr": 1.8210526315789475e-06, "step": 1727 }, { "epoch": 4.5417488494411575, "high_lr": 9.105263157894738e-05, "low_lr": 1.8210526315789475e-06, "step": 1727 }, { "epoch": 4.5417488494411575, "high_lr": 9.105263157894738e-05, "low_lr": 1.8210526315789475e-06, "step": 1727 }, { "epoch": 4.544378698224852, "grad_norm": 1.4558043479919434, "learning_rate": 9.052631578947369e-05, "loss": 1.2492, "step": 1728 }, { "epoch": 4.544378698224852, "high_lr": 9.052631578947369e-05, "low_lr": 1.810526315789474e-06, "step": 1728 }, { "epoch": 4.544378698224852, "high_lr": 9.052631578947369e-05, "low_lr": 1.810526315789474e-06, "step": 1728 }, { "epoch": 4.544378698224852, "high_lr": 9.052631578947369e-05, "low_lr": 1.810526315789474e-06, "step": 1728 }, { "epoch": 4.544378698224852, "high_lr": 9.052631578947369e-05, "low_lr": 1.810526315789474e-06, "step": 1728 }, { "epoch": 4.544378698224852, "high_lr": 9.052631578947369e-05, "low_lr": 1.810526315789474e-06, "step": 1728 }, { "epoch": 4.544378698224852, "high_lr": 9.052631578947369e-05, "low_lr": 1.810526315789474e-06, "step": 1728 }, { "epoch": 4.544378698224852, "high_lr": 9.052631578947369e-05, "low_lr": 1.810526315789474e-06, "step": 1728 }, { "epoch": 4.544378698224852, "high_lr": 9.052631578947369e-05, "low_lr": 1.810526315789474e-06, "step": 1728 }, { "epoch": 4.547008547008547, "grad_norm": 1.7187362909317017, "learning_rate": 8.999999999999999e-05, "loss": 1.2288, "step": 1729 }, { "epoch": 4.547008547008547, "high_lr": 8.999999999999999e-05, "low_lr": 1.8000000000000001e-06, "step": 1729 }, { "epoch": 4.547008547008547, "high_lr": 8.999999999999999e-05, "low_lr": 1.8000000000000001e-06, "step": 1729 }, { "epoch": 4.547008547008547, "high_lr": 8.999999999999999e-05, "low_lr": 1.8000000000000001e-06, "step": 1729 }, { "epoch": 4.547008547008547, "high_lr": 8.999999999999999e-05, "low_lr": 1.8000000000000001e-06, "step": 1729 }, { "epoch": 4.547008547008547, "high_lr": 8.999999999999999e-05, "low_lr": 1.8000000000000001e-06, "step": 1729 }, { "epoch": 4.547008547008547, "high_lr": 8.999999999999999e-05, "low_lr": 1.8000000000000001e-06, "step": 1729 }, { "epoch": 4.547008547008547, "high_lr": 8.999999999999999e-05, "low_lr": 1.8000000000000001e-06, "step": 1729 }, { "epoch": 4.547008547008547, "high_lr": 8.999999999999999e-05, "low_lr": 1.8000000000000001e-06, "step": 1729 }, { "epoch": 4.5496383957922415, "grad_norm": 1.537452220916748, "learning_rate": 8.947368421052632e-05, "loss": 1.2804, "step": 1730 }, { "epoch": 4.5496383957922415, "high_lr": 8.947368421052632e-05, "low_lr": 1.7894736842105265e-06, "step": 1730 }, { "epoch": 4.5496383957922415, "high_lr": 8.947368421052632e-05, "low_lr": 1.7894736842105265e-06, "step": 1730 }, { "epoch": 4.5496383957922415, "high_lr": 8.947368421052632e-05, "low_lr": 1.7894736842105265e-06, "step": 1730 }, { "epoch": 4.5496383957922415, "high_lr": 8.947368421052632e-05, "low_lr": 1.7894736842105265e-06, "step": 1730 }, { "epoch": 4.5496383957922415, "high_lr": 8.947368421052632e-05, "low_lr": 1.7894736842105265e-06, "step": 1730 }, { "epoch": 4.5496383957922415, "high_lr": 8.947368421052632e-05, "low_lr": 1.7894736842105265e-06, "step": 1730 }, { "epoch": 4.5496383957922415, "high_lr": 8.947368421052632e-05, "low_lr": 1.7894736842105265e-06, "step": 1730 }, { "epoch": 4.5496383957922415, "high_lr": 8.947368421052632e-05, "low_lr": 1.7894736842105265e-06, "step": 1730 }, { "epoch": 4.552268244575937, "grad_norm": 1.5931189060211182, "learning_rate": 8.894736842105263e-05, "loss": 1.205, "step": 1731 }, { "epoch": 4.552268244575937, "high_lr": 8.894736842105263e-05, "low_lr": 1.7789473684210528e-06, "step": 1731 }, { "epoch": 4.552268244575937, "high_lr": 8.894736842105263e-05, "low_lr": 1.7789473684210528e-06, "step": 1731 }, { "epoch": 4.552268244575937, "high_lr": 8.894736842105263e-05, "low_lr": 1.7789473684210528e-06, "step": 1731 }, { "epoch": 4.552268244575937, "high_lr": 8.894736842105263e-05, "low_lr": 1.7789473684210528e-06, "step": 1731 }, { "epoch": 4.552268244575937, "high_lr": 8.894736842105263e-05, "low_lr": 1.7789473684210528e-06, "step": 1731 }, { "epoch": 4.552268244575937, "high_lr": 8.894736842105263e-05, "low_lr": 1.7789473684210528e-06, "step": 1731 }, { "epoch": 4.552268244575937, "high_lr": 8.894736842105263e-05, "low_lr": 1.7789473684210528e-06, "step": 1731 }, { "epoch": 4.552268244575937, "high_lr": 8.894736842105263e-05, "low_lr": 1.7789473684210528e-06, "step": 1731 }, { "epoch": 4.554898093359632, "grad_norm": 1.483323574066162, "learning_rate": 8.842105263157894e-05, "loss": 1.1979, "step": 1732 }, { "epoch": 4.554898093359632, "high_lr": 8.842105263157894e-05, "low_lr": 1.768421052631579e-06, "step": 1732 }, { "epoch": 4.554898093359632, "high_lr": 8.842105263157894e-05, "low_lr": 1.768421052631579e-06, "step": 1732 }, { "epoch": 4.554898093359632, "high_lr": 8.842105263157894e-05, "low_lr": 1.768421052631579e-06, "step": 1732 }, { "epoch": 4.554898093359632, "high_lr": 8.842105263157894e-05, "low_lr": 1.768421052631579e-06, "step": 1732 }, { "epoch": 4.554898093359632, "high_lr": 8.842105263157894e-05, "low_lr": 1.768421052631579e-06, "step": 1732 }, { "epoch": 4.554898093359632, "high_lr": 8.842105263157894e-05, "low_lr": 1.768421052631579e-06, "step": 1732 }, { "epoch": 4.554898093359632, "high_lr": 8.842105263157894e-05, "low_lr": 1.768421052631579e-06, "step": 1732 }, { "epoch": 4.554898093359632, "high_lr": 8.842105263157894e-05, "low_lr": 1.768421052631579e-06, "step": 1732 }, { "epoch": 4.557527942143327, "grad_norm": 1.5738469362258911, "learning_rate": 8.789473684210526e-05, "loss": 1.2481, "step": 1733 }, { "epoch": 4.557527942143327, "high_lr": 8.789473684210526e-05, "low_lr": 1.7578947368421054e-06, "step": 1733 }, { "epoch": 4.557527942143327, "high_lr": 8.789473684210526e-05, "low_lr": 1.7578947368421054e-06, "step": 1733 }, { "epoch": 4.557527942143327, "high_lr": 8.789473684210526e-05, "low_lr": 1.7578947368421054e-06, "step": 1733 }, { "epoch": 4.557527942143327, "high_lr": 8.789473684210526e-05, "low_lr": 1.7578947368421054e-06, "step": 1733 }, { "epoch": 4.557527942143327, "high_lr": 8.789473684210526e-05, "low_lr": 1.7578947368421054e-06, "step": 1733 }, { "epoch": 4.557527942143327, "high_lr": 8.789473684210526e-05, "low_lr": 1.7578947368421054e-06, "step": 1733 }, { "epoch": 4.557527942143327, "high_lr": 8.789473684210526e-05, "low_lr": 1.7578947368421054e-06, "step": 1733 }, { "epoch": 4.557527942143327, "high_lr": 8.789473684210526e-05, "low_lr": 1.7578947368421054e-06, "step": 1733 }, { "epoch": 4.560157790927022, "grad_norm": 1.4959828853607178, "learning_rate": 8.736842105263158e-05, "loss": 1.1967, "step": 1734 }, { "epoch": 4.560157790927022, "high_lr": 8.736842105263158e-05, "low_lr": 1.7473684210526318e-06, "step": 1734 }, { "epoch": 4.560157790927022, "high_lr": 8.736842105263158e-05, "low_lr": 1.7473684210526318e-06, "step": 1734 }, { "epoch": 4.560157790927022, "high_lr": 8.736842105263158e-05, "low_lr": 1.7473684210526318e-06, "step": 1734 }, { "epoch": 4.560157790927022, "high_lr": 8.736842105263158e-05, "low_lr": 1.7473684210526318e-06, "step": 1734 }, { "epoch": 4.560157790927022, "high_lr": 8.736842105263158e-05, "low_lr": 1.7473684210526318e-06, "step": 1734 }, { "epoch": 4.560157790927022, "high_lr": 8.736842105263158e-05, "low_lr": 1.7473684210526318e-06, "step": 1734 }, { "epoch": 4.560157790927022, "high_lr": 8.736842105263158e-05, "low_lr": 1.7473684210526318e-06, "step": 1734 }, { "epoch": 4.560157790927022, "high_lr": 8.736842105263158e-05, "low_lr": 1.7473684210526318e-06, "step": 1734 }, { "epoch": 4.5627876397107165, "grad_norm": 1.5765563249588013, "learning_rate": 8.68421052631579e-05, "loss": 1.2323, "step": 1735 }, { "epoch": 4.5627876397107165, "high_lr": 8.68421052631579e-05, "low_lr": 1.736842105263158e-06, "step": 1735 }, { "epoch": 4.5627876397107165, "high_lr": 8.68421052631579e-05, "low_lr": 1.736842105263158e-06, "step": 1735 }, { "epoch": 4.5627876397107165, "high_lr": 8.68421052631579e-05, "low_lr": 1.736842105263158e-06, "step": 1735 }, { "epoch": 4.5627876397107165, "high_lr": 8.68421052631579e-05, "low_lr": 1.736842105263158e-06, "step": 1735 }, { "epoch": 4.5627876397107165, "high_lr": 8.68421052631579e-05, "low_lr": 1.736842105263158e-06, "step": 1735 }, { "epoch": 4.5627876397107165, "high_lr": 8.68421052631579e-05, "low_lr": 1.736842105263158e-06, "step": 1735 }, { "epoch": 4.5627876397107165, "high_lr": 8.68421052631579e-05, "low_lr": 1.736842105263158e-06, "step": 1735 }, { "epoch": 4.5627876397107165, "high_lr": 8.68421052631579e-05, "low_lr": 1.736842105263158e-06, "step": 1735 }, { "epoch": 4.565417488494411, "grad_norm": 1.4519524574279785, "learning_rate": 8.631578947368421e-05, "loss": 1.1848, "step": 1736 }, { "epoch": 4.565417488494411, "high_lr": 8.631578947368421e-05, "low_lr": 1.7263157894736842e-06, "step": 1736 }, { "epoch": 4.565417488494411, "high_lr": 8.631578947368421e-05, "low_lr": 1.7263157894736842e-06, "step": 1736 }, { "epoch": 4.565417488494411, "high_lr": 8.631578947368421e-05, "low_lr": 1.7263157894736842e-06, "step": 1736 }, { "epoch": 4.565417488494411, "high_lr": 8.631578947368421e-05, "low_lr": 1.7263157894736842e-06, "step": 1736 }, { "epoch": 4.565417488494411, "high_lr": 8.631578947368421e-05, "low_lr": 1.7263157894736842e-06, "step": 1736 }, { "epoch": 4.565417488494411, "high_lr": 8.631578947368421e-05, "low_lr": 1.7263157894736842e-06, "step": 1736 }, { "epoch": 4.565417488494411, "high_lr": 8.631578947368421e-05, "low_lr": 1.7263157894736842e-06, "step": 1736 }, { "epoch": 4.565417488494411, "high_lr": 8.631578947368421e-05, "low_lr": 1.7263157894736842e-06, "step": 1736 }, { "epoch": 4.568047337278107, "grad_norm": 1.6031697988510132, "learning_rate": 8.578947368421052e-05, "loss": 1.2147, "step": 1737 }, { "epoch": 4.568047337278107, "high_lr": 8.578947368421052e-05, "low_lr": 1.7157894736842107e-06, "step": 1737 }, { "epoch": 4.568047337278107, "high_lr": 8.578947368421052e-05, "low_lr": 1.7157894736842107e-06, "step": 1737 }, { "epoch": 4.568047337278107, "high_lr": 8.578947368421052e-05, "low_lr": 1.7157894736842107e-06, "step": 1737 }, { "epoch": 4.568047337278107, "high_lr": 8.578947368421052e-05, "low_lr": 1.7157894736842107e-06, "step": 1737 }, { "epoch": 4.568047337278107, "high_lr": 8.578947368421052e-05, "low_lr": 1.7157894736842107e-06, "step": 1737 }, { "epoch": 4.568047337278107, "high_lr": 8.578947368421052e-05, "low_lr": 1.7157894736842107e-06, "step": 1737 }, { "epoch": 4.568047337278107, "high_lr": 8.578947368421052e-05, "low_lr": 1.7157894736842107e-06, "step": 1737 }, { "epoch": 4.568047337278107, "high_lr": 8.578947368421052e-05, "low_lr": 1.7157894736842107e-06, "step": 1737 }, { "epoch": 4.570677186061801, "grad_norm": 1.5287643671035767, "learning_rate": 8.526315789473685e-05, "loss": 1.2137, "step": 1738 }, { "epoch": 4.570677186061801, "high_lr": 8.526315789473685e-05, "low_lr": 1.705263157894737e-06, "step": 1738 }, { "epoch": 4.570677186061801, "high_lr": 8.526315789473685e-05, "low_lr": 1.705263157894737e-06, "step": 1738 }, { "epoch": 4.570677186061801, "high_lr": 8.526315789473685e-05, "low_lr": 1.705263157894737e-06, "step": 1738 }, { "epoch": 4.570677186061801, "high_lr": 8.526315789473685e-05, "low_lr": 1.705263157894737e-06, "step": 1738 }, { "epoch": 4.570677186061801, "high_lr": 8.526315789473685e-05, "low_lr": 1.705263157894737e-06, "step": 1738 }, { "epoch": 4.570677186061801, "high_lr": 8.526315789473685e-05, "low_lr": 1.705263157894737e-06, "step": 1738 }, { "epoch": 4.570677186061801, "high_lr": 8.526315789473685e-05, "low_lr": 1.705263157894737e-06, "step": 1738 }, { "epoch": 4.570677186061801, "high_lr": 8.526315789473685e-05, "low_lr": 1.705263157894737e-06, "step": 1738 }, { "epoch": 4.573307034845496, "grad_norm": 1.4393537044525146, "learning_rate": 8.473684210526315e-05, "loss": 1.1929, "step": 1739 }, { "epoch": 4.573307034845496, "high_lr": 8.473684210526315e-05, "low_lr": 1.6947368421052632e-06, "step": 1739 }, { "epoch": 4.573307034845496, "high_lr": 8.473684210526315e-05, "low_lr": 1.6947368421052632e-06, "step": 1739 }, { "epoch": 4.573307034845496, "high_lr": 8.473684210526315e-05, "low_lr": 1.6947368421052632e-06, "step": 1739 }, { "epoch": 4.573307034845496, "high_lr": 8.473684210526315e-05, "low_lr": 1.6947368421052632e-06, "step": 1739 }, { "epoch": 4.573307034845496, "high_lr": 8.473684210526315e-05, "low_lr": 1.6947368421052632e-06, "step": 1739 }, { "epoch": 4.573307034845496, "high_lr": 8.473684210526315e-05, "low_lr": 1.6947368421052632e-06, "step": 1739 }, { "epoch": 4.573307034845496, "high_lr": 8.473684210526315e-05, "low_lr": 1.6947368421052632e-06, "step": 1739 }, { "epoch": 4.573307034845496, "high_lr": 8.473684210526315e-05, "low_lr": 1.6947368421052632e-06, "step": 1739 }, { "epoch": 4.575936883629192, "grad_norm": 1.4716784954071045, "learning_rate": 8.421052631578948e-05, "loss": 1.3061, "step": 1740 }, { "epoch": 4.575936883629192, "high_lr": 8.421052631578948e-05, "low_lr": 1.6842105263157895e-06, "step": 1740 }, { "epoch": 4.575936883629192, "high_lr": 8.421052631578948e-05, "low_lr": 1.6842105263157895e-06, "step": 1740 }, { "epoch": 4.575936883629192, "high_lr": 8.421052631578948e-05, "low_lr": 1.6842105263157895e-06, "step": 1740 }, { "epoch": 4.575936883629192, "high_lr": 8.421052631578948e-05, "low_lr": 1.6842105263157895e-06, "step": 1740 }, { "epoch": 4.575936883629192, "high_lr": 8.421052631578948e-05, "low_lr": 1.6842105263157895e-06, "step": 1740 }, { "epoch": 4.575936883629192, "high_lr": 8.421052631578948e-05, "low_lr": 1.6842105263157895e-06, "step": 1740 }, { "epoch": 4.575936883629192, "high_lr": 8.421052631578948e-05, "low_lr": 1.6842105263157895e-06, "step": 1740 }, { "epoch": 4.575936883629192, "high_lr": 8.421052631578948e-05, "low_lr": 1.6842105263157895e-06, "step": 1740 }, { "epoch": 4.578566732412886, "grad_norm": 1.615149736404419, "learning_rate": 8.368421052631579e-05, "loss": 1.214, "step": 1741 }, { "epoch": 4.578566732412886, "high_lr": 8.368421052631579e-05, "low_lr": 1.673684210526316e-06, "step": 1741 }, { "epoch": 4.578566732412886, "high_lr": 8.368421052631579e-05, "low_lr": 1.673684210526316e-06, "step": 1741 }, { "epoch": 4.578566732412886, "high_lr": 8.368421052631579e-05, "low_lr": 1.673684210526316e-06, "step": 1741 }, { "epoch": 4.578566732412886, "high_lr": 8.368421052631579e-05, "low_lr": 1.673684210526316e-06, "step": 1741 }, { "epoch": 4.578566732412886, "high_lr": 8.368421052631579e-05, "low_lr": 1.673684210526316e-06, "step": 1741 }, { "epoch": 4.578566732412886, "high_lr": 8.368421052631579e-05, "low_lr": 1.673684210526316e-06, "step": 1741 }, { "epoch": 4.578566732412886, "high_lr": 8.368421052631579e-05, "low_lr": 1.673684210526316e-06, "step": 1741 }, { "epoch": 4.578566732412886, "high_lr": 8.368421052631579e-05, "low_lr": 1.673684210526316e-06, "step": 1741 }, { "epoch": 4.581196581196581, "grad_norm": 1.5729044675827026, "learning_rate": 8.315789473684212e-05, "loss": 1.2583, "step": 1742 }, { "epoch": 4.581196581196581, "high_lr": 8.315789473684212e-05, "low_lr": 1.6631578947368424e-06, "step": 1742 }, { "epoch": 4.581196581196581, "high_lr": 8.315789473684212e-05, "low_lr": 1.6631578947368424e-06, "step": 1742 }, { "epoch": 4.581196581196581, "high_lr": 8.315789473684212e-05, "low_lr": 1.6631578947368424e-06, "step": 1742 }, { "epoch": 4.581196581196581, "high_lr": 8.315789473684212e-05, "low_lr": 1.6631578947368424e-06, "step": 1742 }, { "epoch": 4.581196581196581, "high_lr": 8.315789473684212e-05, "low_lr": 1.6631578947368424e-06, "step": 1742 }, { "epoch": 4.581196581196581, "high_lr": 8.315789473684212e-05, "low_lr": 1.6631578947368424e-06, "step": 1742 }, { "epoch": 4.581196581196581, "high_lr": 8.315789473684212e-05, "low_lr": 1.6631578947368424e-06, "step": 1742 }, { "epoch": 4.581196581196581, "high_lr": 8.315789473684212e-05, "low_lr": 1.6631578947368424e-06, "step": 1742 }, { "epoch": 4.5838264299802765, "grad_norm": 1.6791571378707886, "learning_rate": 8.263157894736841e-05, "loss": 1.2709, "step": 1743 }, { "epoch": 4.5838264299802765, "high_lr": 8.263157894736841e-05, "low_lr": 1.6526315789473685e-06, "step": 1743 }, { "epoch": 4.5838264299802765, "high_lr": 8.263157894736841e-05, "low_lr": 1.6526315789473685e-06, "step": 1743 }, { "epoch": 4.5838264299802765, "high_lr": 8.263157894736841e-05, "low_lr": 1.6526315789473685e-06, "step": 1743 }, { "epoch": 4.5838264299802765, "high_lr": 8.263157894736841e-05, "low_lr": 1.6526315789473685e-06, "step": 1743 }, { "epoch": 4.5838264299802765, "high_lr": 8.263157894736841e-05, "low_lr": 1.6526315789473685e-06, "step": 1743 }, { "epoch": 4.5838264299802765, "high_lr": 8.263157894736841e-05, "low_lr": 1.6526315789473685e-06, "step": 1743 }, { "epoch": 4.5838264299802765, "high_lr": 8.263157894736841e-05, "low_lr": 1.6526315789473685e-06, "step": 1743 }, { "epoch": 4.5838264299802765, "high_lr": 8.263157894736841e-05, "low_lr": 1.6526315789473685e-06, "step": 1743 }, { "epoch": 4.586456278763971, "grad_norm": 1.678160309791565, "learning_rate": 8.210526315789474e-05, "loss": 1.2268, "step": 1744 }, { "epoch": 4.586456278763971, "high_lr": 8.210526315789474e-05, "low_lr": 1.6421052631578948e-06, "step": 1744 }, { "epoch": 4.586456278763971, "high_lr": 8.210526315789474e-05, "low_lr": 1.6421052631578948e-06, "step": 1744 }, { "epoch": 4.586456278763971, "high_lr": 8.210526315789474e-05, "low_lr": 1.6421052631578948e-06, "step": 1744 }, { "epoch": 4.586456278763971, "high_lr": 8.210526315789474e-05, "low_lr": 1.6421052631578948e-06, "step": 1744 }, { "epoch": 4.586456278763971, "high_lr": 8.210526315789474e-05, "low_lr": 1.6421052631578948e-06, "step": 1744 }, { "epoch": 4.586456278763971, "high_lr": 8.210526315789474e-05, "low_lr": 1.6421052631578948e-06, "step": 1744 }, { "epoch": 4.586456278763971, "high_lr": 8.210526315789474e-05, "low_lr": 1.6421052631578948e-06, "step": 1744 }, { "epoch": 4.586456278763971, "high_lr": 8.210526315789474e-05, "low_lr": 1.6421052631578948e-06, "step": 1744 }, { "epoch": 4.589086127547666, "grad_norm": 1.5540153980255127, "learning_rate": 8.157894736842105e-05, "loss": 1.202, "step": 1745 }, { "epoch": 4.589086127547666, "high_lr": 8.157894736842105e-05, "low_lr": 1.6315789473684212e-06, "step": 1745 }, { "epoch": 4.589086127547666, "high_lr": 8.157894736842105e-05, "low_lr": 1.6315789473684212e-06, "step": 1745 }, { "epoch": 4.589086127547666, "high_lr": 8.157894736842105e-05, "low_lr": 1.6315789473684212e-06, "step": 1745 }, { "epoch": 4.589086127547666, "high_lr": 8.157894736842105e-05, "low_lr": 1.6315789473684212e-06, "step": 1745 }, { "epoch": 4.589086127547666, "high_lr": 8.157894736842105e-05, "low_lr": 1.6315789473684212e-06, "step": 1745 }, { "epoch": 4.589086127547666, "high_lr": 8.157894736842105e-05, "low_lr": 1.6315789473684212e-06, "step": 1745 }, { "epoch": 4.589086127547666, "high_lr": 8.157894736842105e-05, "low_lr": 1.6315789473684212e-06, "step": 1745 }, { "epoch": 4.589086127547666, "high_lr": 8.157894736842105e-05, "low_lr": 1.6315789473684212e-06, "step": 1745 }, { "epoch": 4.591715976331361, "grad_norm": 1.7042880058288574, "learning_rate": 8.105263157894737e-05, "loss": 1.177, "step": 1746 }, { "epoch": 4.591715976331361, "high_lr": 8.105263157894737e-05, "low_lr": 1.6210526315789473e-06, "step": 1746 }, { "epoch": 4.591715976331361, "high_lr": 8.105263157894737e-05, "low_lr": 1.6210526315789473e-06, "step": 1746 }, { "epoch": 4.591715976331361, "high_lr": 8.105263157894737e-05, "low_lr": 1.6210526315789473e-06, "step": 1746 }, { "epoch": 4.591715976331361, "high_lr": 8.105263157894737e-05, "low_lr": 1.6210526315789473e-06, "step": 1746 }, { "epoch": 4.591715976331361, "high_lr": 8.105263157894737e-05, "low_lr": 1.6210526315789473e-06, "step": 1746 }, { "epoch": 4.591715976331361, "high_lr": 8.105263157894737e-05, "low_lr": 1.6210526315789473e-06, "step": 1746 }, { "epoch": 4.591715976331361, "high_lr": 8.105263157894737e-05, "low_lr": 1.6210526315789473e-06, "step": 1746 }, { "epoch": 4.591715976331361, "high_lr": 8.105263157894737e-05, "low_lr": 1.6210526315789473e-06, "step": 1746 }, { "epoch": 4.594345825115056, "grad_norm": 1.5718997716903687, "learning_rate": 8.052631578947368e-05, "loss": 1.1972, "step": 1747 }, { "epoch": 4.594345825115056, "high_lr": 8.052631578947368e-05, "low_lr": 1.6105263157894738e-06, "step": 1747 }, { "epoch": 4.594345825115056, "high_lr": 8.052631578947368e-05, "low_lr": 1.6105263157894738e-06, "step": 1747 }, { "epoch": 4.594345825115056, "high_lr": 8.052631578947368e-05, "low_lr": 1.6105263157894738e-06, "step": 1747 }, { "epoch": 4.594345825115056, "high_lr": 8.052631578947368e-05, "low_lr": 1.6105263157894738e-06, "step": 1747 }, { "epoch": 4.594345825115056, "high_lr": 8.052631578947368e-05, "low_lr": 1.6105263157894738e-06, "step": 1747 }, { "epoch": 4.594345825115056, "high_lr": 8.052631578947368e-05, "low_lr": 1.6105263157894738e-06, "step": 1747 }, { "epoch": 4.594345825115056, "high_lr": 8.052631578947368e-05, "low_lr": 1.6105263157894738e-06, "step": 1747 }, { "epoch": 4.594345825115056, "high_lr": 8.052631578947368e-05, "low_lr": 1.6105263157894738e-06, "step": 1747 }, { "epoch": 4.596975673898751, "grad_norm": 1.4358165264129639, "learning_rate": 8e-05, "loss": 1.2242, "step": 1748 }, { "epoch": 4.596975673898751, "high_lr": 8e-05, "low_lr": 1.6000000000000001e-06, "step": 1748 }, { "epoch": 4.596975673898751, "high_lr": 8e-05, "low_lr": 1.6000000000000001e-06, "step": 1748 }, { "epoch": 4.596975673898751, "high_lr": 8e-05, "low_lr": 1.6000000000000001e-06, "step": 1748 }, { "epoch": 4.596975673898751, "high_lr": 8e-05, "low_lr": 1.6000000000000001e-06, "step": 1748 }, { "epoch": 4.596975673898751, "high_lr": 8e-05, "low_lr": 1.6000000000000001e-06, "step": 1748 }, { "epoch": 4.596975673898751, "high_lr": 8e-05, "low_lr": 1.6000000000000001e-06, "step": 1748 }, { "epoch": 4.596975673898751, "high_lr": 8e-05, "low_lr": 1.6000000000000001e-06, "step": 1748 }, { "epoch": 4.596975673898751, "high_lr": 8e-05, "low_lr": 1.6000000000000001e-06, "step": 1748 }, { "epoch": 4.599605522682445, "grad_norm": 1.642826795578003, "learning_rate": 7.947368421052632e-05, "loss": 1.153, "step": 1749 }, { "epoch": 4.599605522682445, "high_lr": 7.947368421052632e-05, "low_lr": 1.5894736842105265e-06, "step": 1749 }, { "epoch": 4.599605522682445, "high_lr": 7.947368421052632e-05, "low_lr": 1.5894736842105265e-06, "step": 1749 }, { "epoch": 4.599605522682445, "high_lr": 7.947368421052632e-05, "low_lr": 1.5894736842105265e-06, "step": 1749 }, { "epoch": 4.599605522682445, "high_lr": 7.947368421052632e-05, "low_lr": 1.5894736842105265e-06, "step": 1749 }, { "epoch": 4.599605522682445, "high_lr": 7.947368421052632e-05, "low_lr": 1.5894736842105265e-06, "step": 1749 }, { "epoch": 4.599605522682445, "high_lr": 7.947368421052632e-05, "low_lr": 1.5894736842105265e-06, "step": 1749 }, { "epoch": 4.599605522682445, "high_lr": 7.947368421052632e-05, "low_lr": 1.5894736842105265e-06, "step": 1749 }, { "epoch": 4.599605522682445, "high_lr": 7.947368421052632e-05, "low_lr": 1.5894736842105265e-06, "step": 1749 }, { "epoch": 4.602235371466141, "grad_norm": 1.6457551717758179, "learning_rate": 7.894736842105263e-05, "loss": 1.1835, "step": 1750 }, { "epoch": 4.602235371466141, "high_lr": 7.894736842105263e-05, "low_lr": 1.5789473684210526e-06, "step": 1750 }, { "epoch": 4.602235371466141, "high_lr": 7.894736842105263e-05, "low_lr": 1.5789473684210526e-06, "step": 1750 }, { "epoch": 4.602235371466141, "high_lr": 7.894736842105263e-05, "low_lr": 1.5789473684210526e-06, "step": 1750 }, { "epoch": 4.602235371466141, "high_lr": 7.894736842105263e-05, "low_lr": 1.5789473684210526e-06, "step": 1750 }, { "epoch": 4.602235371466141, "high_lr": 7.894736842105263e-05, "low_lr": 1.5789473684210526e-06, "step": 1750 }, { "epoch": 4.602235371466141, "high_lr": 7.894736842105263e-05, "low_lr": 1.5789473684210526e-06, "step": 1750 }, { "epoch": 4.602235371466141, "high_lr": 7.894736842105263e-05, "low_lr": 1.5789473684210526e-06, "step": 1750 }, { "epoch": 4.602235371466141, "high_lr": 7.894736842105263e-05, "low_lr": 1.5789473684210526e-06, "step": 1750 }, { "epoch": 4.6048652202498355, "grad_norm": 1.6383179426193237, "learning_rate": 7.842105263157895e-05, "loss": 1.2388, "step": 1751 }, { "epoch": 4.6048652202498355, "high_lr": 7.842105263157895e-05, "low_lr": 1.5684210526315791e-06, "step": 1751 }, { "epoch": 4.6048652202498355, "high_lr": 7.842105263157895e-05, "low_lr": 1.5684210526315791e-06, "step": 1751 }, { "epoch": 4.6048652202498355, "high_lr": 7.842105263157895e-05, "low_lr": 1.5684210526315791e-06, "step": 1751 }, { "epoch": 4.6048652202498355, "high_lr": 7.842105263157895e-05, "low_lr": 1.5684210526315791e-06, "step": 1751 }, { "epoch": 4.6048652202498355, "high_lr": 7.842105263157895e-05, "low_lr": 1.5684210526315791e-06, "step": 1751 }, { "epoch": 4.6048652202498355, "high_lr": 7.842105263157895e-05, "low_lr": 1.5684210526315791e-06, "step": 1751 }, { "epoch": 4.6048652202498355, "high_lr": 7.842105263157895e-05, "low_lr": 1.5684210526315791e-06, "step": 1751 }, { "epoch": 4.6048652202498355, "high_lr": 7.842105263157895e-05, "low_lr": 1.5684210526315791e-06, "step": 1751 }, { "epoch": 4.607495069033531, "grad_norm": 1.5649884939193726, "learning_rate": 7.789473684210527e-05, "loss": 1.1955, "step": 1752 }, { "epoch": 4.607495069033531, "high_lr": 7.789473684210527e-05, "low_lr": 1.5578947368421054e-06, "step": 1752 }, { "epoch": 4.607495069033531, "high_lr": 7.789473684210527e-05, "low_lr": 1.5578947368421054e-06, "step": 1752 }, { "epoch": 4.607495069033531, "high_lr": 7.789473684210527e-05, "low_lr": 1.5578947368421054e-06, "step": 1752 }, { "epoch": 4.607495069033531, "high_lr": 7.789473684210527e-05, "low_lr": 1.5578947368421054e-06, "step": 1752 }, { "epoch": 4.607495069033531, "high_lr": 7.789473684210527e-05, "low_lr": 1.5578947368421054e-06, "step": 1752 }, { "epoch": 4.607495069033531, "high_lr": 7.789473684210527e-05, "low_lr": 1.5578947368421054e-06, "step": 1752 }, { "epoch": 4.607495069033531, "high_lr": 7.789473684210527e-05, "low_lr": 1.5578947368421054e-06, "step": 1752 }, { "epoch": 4.607495069033531, "high_lr": 7.789473684210527e-05, "low_lr": 1.5578947368421054e-06, "step": 1752 }, { "epoch": 4.610124917817226, "grad_norm": 1.5859493017196655, "learning_rate": 7.736842105263157e-05, "loss": 1.1874, "step": 1753 }, { "epoch": 4.610124917817226, "high_lr": 7.736842105263157e-05, "low_lr": 1.5473684210526316e-06, "step": 1753 }, { "epoch": 4.610124917817226, "high_lr": 7.736842105263157e-05, "low_lr": 1.5473684210526316e-06, "step": 1753 }, { "epoch": 4.610124917817226, "high_lr": 7.736842105263157e-05, "low_lr": 1.5473684210526316e-06, "step": 1753 }, { "epoch": 4.610124917817226, "high_lr": 7.736842105263157e-05, "low_lr": 1.5473684210526316e-06, "step": 1753 }, { "epoch": 4.610124917817226, "high_lr": 7.736842105263157e-05, "low_lr": 1.5473684210526316e-06, "step": 1753 }, { "epoch": 4.610124917817226, "high_lr": 7.736842105263157e-05, "low_lr": 1.5473684210526316e-06, "step": 1753 }, { "epoch": 4.610124917817226, "high_lr": 7.736842105263157e-05, "low_lr": 1.5473684210526316e-06, "step": 1753 }, { "epoch": 4.610124917817226, "high_lr": 7.736842105263157e-05, "low_lr": 1.5473684210526316e-06, "step": 1753 }, { "epoch": 4.61275476660092, "grad_norm": 1.630597472190857, "learning_rate": 7.68421052631579e-05, "loss": 1.193, "step": 1754 }, { "epoch": 4.61275476660092, "high_lr": 7.68421052631579e-05, "low_lr": 1.5368421052631579e-06, "step": 1754 }, { "epoch": 4.61275476660092, "high_lr": 7.68421052631579e-05, "low_lr": 1.5368421052631579e-06, "step": 1754 }, { "epoch": 4.61275476660092, "high_lr": 7.68421052631579e-05, "low_lr": 1.5368421052631579e-06, "step": 1754 }, { "epoch": 4.61275476660092, "high_lr": 7.68421052631579e-05, "low_lr": 1.5368421052631579e-06, "step": 1754 }, { "epoch": 4.61275476660092, "high_lr": 7.68421052631579e-05, "low_lr": 1.5368421052631579e-06, "step": 1754 }, { "epoch": 4.61275476660092, "high_lr": 7.68421052631579e-05, "low_lr": 1.5368421052631579e-06, "step": 1754 }, { "epoch": 4.61275476660092, "high_lr": 7.68421052631579e-05, "low_lr": 1.5368421052631579e-06, "step": 1754 }, { "epoch": 4.61275476660092, "high_lr": 7.68421052631579e-05, "low_lr": 1.5368421052631579e-06, "step": 1754 }, { "epoch": 4.615384615384615, "grad_norm": 1.5932203531265259, "learning_rate": 7.631578947368421e-05, "loss": 1.2025, "step": 1755 }, { "epoch": 4.615384615384615, "high_lr": 7.631578947368421e-05, "low_lr": 1.5263157894736844e-06, "step": 1755 }, { "epoch": 4.615384615384615, "high_lr": 7.631578947368421e-05, "low_lr": 1.5263157894736844e-06, "step": 1755 }, { "epoch": 4.615384615384615, "high_lr": 7.631578947368421e-05, "low_lr": 1.5263157894736844e-06, "step": 1755 }, { "epoch": 4.615384615384615, "high_lr": 7.631578947368421e-05, "low_lr": 1.5263157894736844e-06, "step": 1755 }, { "epoch": 4.615384615384615, "high_lr": 7.631578947368421e-05, "low_lr": 1.5263157894736844e-06, "step": 1755 }, { "epoch": 4.615384615384615, "high_lr": 7.631578947368421e-05, "low_lr": 1.5263157894736844e-06, "step": 1755 }, { "epoch": 4.615384615384615, "high_lr": 7.631578947368421e-05, "low_lr": 1.5263157894736844e-06, "step": 1755 }, { "epoch": 4.615384615384615, "high_lr": 7.631578947368421e-05, "low_lr": 1.5263157894736844e-06, "step": 1755 }, { "epoch": 4.6180144641683105, "grad_norm": 1.6301535367965698, "learning_rate": 7.578947368421054e-05, "loss": 1.2028, "step": 1756 }, { "epoch": 4.6180144641683105, "high_lr": 7.578947368421054e-05, "low_lr": 1.5157894736842108e-06, "step": 1756 }, { "epoch": 4.6180144641683105, "high_lr": 7.578947368421054e-05, "low_lr": 1.5157894736842108e-06, "step": 1756 }, { "epoch": 4.6180144641683105, "high_lr": 7.578947368421054e-05, "low_lr": 1.5157894736842108e-06, "step": 1756 }, { "epoch": 4.6180144641683105, "high_lr": 7.578947368421054e-05, "low_lr": 1.5157894736842108e-06, "step": 1756 }, { "epoch": 4.6180144641683105, "high_lr": 7.578947368421054e-05, "low_lr": 1.5157894736842108e-06, "step": 1756 }, { "epoch": 4.6180144641683105, "high_lr": 7.578947368421054e-05, "low_lr": 1.5157894736842108e-06, "step": 1756 }, { "epoch": 4.6180144641683105, "high_lr": 7.578947368421054e-05, "low_lr": 1.5157894736842108e-06, "step": 1756 }, { "epoch": 4.6180144641683105, "high_lr": 7.578947368421054e-05, "low_lr": 1.5157894736842108e-06, "step": 1756 }, { "epoch": 4.620644312952005, "grad_norm": 1.7253530025482178, "learning_rate": 7.526315789473684e-05, "loss": 1.2403, "step": 1757 }, { "epoch": 4.620644312952005, "high_lr": 7.526315789473684e-05, "low_lr": 1.5052631578947369e-06, "step": 1757 }, { "epoch": 4.620644312952005, "high_lr": 7.526315789473684e-05, "low_lr": 1.5052631578947369e-06, "step": 1757 }, { "epoch": 4.620644312952005, "high_lr": 7.526315789473684e-05, "low_lr": 1.5052631578947369e-06, "step": 1757 }, { "epoch": 4.620644312952005, "high_lr": 7.526315789473684e-05, "low_lr": 1.5052631578947369e-06, "step": 1757 }, { "epoch": 4.620644312952005, "high_lr": 7.526315789473684e-05, "low_lr": 1.5052631578947369e-06, "step": 1757 }, { "epoch": 4.620644312952005, "high_lr": 7.526315789473684e-05, "low_lr": 1.5052631578947369e-06, "step": 1757 }, { "epoch": 4.620644312952005, "high_lr": 7.526315789473684e-05, "low_lr": 1.5052631578947369e-06, "step": 1757 }, { "epoch": 4.620644312952005, "high_lr": 7.526315789473684e-05, "low_lr": 1.5052631578947369e-06, "step": 1757 }, { "epoch": 4.6232741617357, "grad_norm": 1.6097984313964844, "learning_rate": 7.473684210526316e-05, "loss": 1.2019, "step": 1758 }, { "epoch": 4.6232741617357, "high_lr": 7.473684210526316e-05, "low_lr": 1.4947368421052632e-06, "step": 1758 }, { "epoch": 4.6232741617357, "high_lr": 7.473684210526316e-05, "low_lr": 1.4947368421052632e-06, "step": 1758 }, { "epoch": 4.6232741617357, "high_lr": 7.473684210526316e-05, "low_lr": 1.4947368421052632e-06, "step": 1758 }, { "epoch": 4.6232741617357, "high_lr": 7.473684210526316e-05, "low_lr": 1.4947368421052632e-06, "step": 1758 }, { "epoch": 4.6232741617357, "high_lr": 7.473684210526316e-05, "low_lr": 1.4947368421052632e-06, "step": 1758 }, { "epoch": 4.6232741617357, "high_lr": 7.473684210526316e-05, "low_lr": 1.4947368421052632e-06, "step": 1758 }, { "epoch": 4.6232741617357, "high_lr": 7.473684210526316e-05, "low_lr": 1.4947368421052632e-06, "step": 1758 }, { "epoch": 4.6232741617357, "high_lr": 7.473684210526316e-05, "low_lr": 1.4947368421052632e-06, "step": 1758 }, { "epoch": 4.625904010519395, "grad_norm": 1.4939899444580078, "learning_rate": 7.421052631578948e-05, "loss": 1.1483, "step": 1759 }, { "epoch": 4.625904010519395, "high_lr": 7.421052631578948e-05, "low_lr": 1.4842105263157897e-06, "step": 1759 }, { "epoch": 4.625904010519395, "high_lr": 7.421052631578948e-05, "low_lr": 1.4842105263157897e-06, "step": 1759 }, { "epoch": 4.625904010519395, "high_lr": 7.421052631578948e-05, "low_lr": 1.4842105263157897e-06, "step": 1759 }, { "epoch": 4.625904010519395, "high_lr": 7.421052631578948e-05, "low_lr": 1.4842105263157897e-06, "step": 1759 }, { "epoch": 4.625904010519395, "high_lr": 7.421052631578948e-05, "low_lr": 1.4842105263157897e-06, "step": 1759 }, { "epoch": 4.625904010519395, "high_lr": 7.421052631578948e-05, "low_lr": 1.4842105263157897e-06, "step": 1759 }, { "epoch": 4.625904010519395, "high_lr": 7.421052631578948e-05, "low_lr": 1.4842105263157897e-06, "step": 1759 }, { "epoch": 4.625904010519395, "high_lr": 7.421052631578948e-05, "low_lr": 1.4842105263157897e-06, "step": 1759 }, { "epoch": 4.62853385930309, "grad_norm": 1.5386459827423096, "learning_rate": 7.368421052631579e-05, "loss": 1.266, "step": 1760 }, { "epoch": 4.62853385930309, "high_lr": 7.368421052631579e-05, "low_lr": 1.4736842105263159e-06, "step": 1760 }, { "epoch": 4.62853385930309, "high_lr": 7.368421052631579e-05, "low_lr": 1.4736842105263159e-06, "step": 1760 }, { "epoch": 4.62853385930309, "high_lr": 7.368421052631579e-05, "low_lr": 1.4736842105263159e-06, "step": 1760 }, { "epoch": 4.62853385930309, "high_lr": 7.368421052631579e-05, "low_lr": 1.4736842105263159e-06, "step": 1760 }, { "epoch": 4.62853385930309, "high_lr": 7.368421052631579e-05, "low_lr": 1.4736842105263159e-06, "step": 1760 }, { "epoch": 4.62853385930309, "high_lr": 7.368421052631579e-05, "low_lr": 1.4736842105263159e-06, "step": 1760 }, { "epoch": 4.62853385930309, "high_lr": 7.368421052631579e-05, "low_lr": 1.4736842105263159e-06, "step": 1760 }, { "epoch": 4.62853385930309, "high_lr": 7.368421052631579e-05, "low_lr": 1.4736842105263159e-06, "step": 1760 }, { "epoch": 4.631163708086785, "grad_norm": 1.3842270374298096, "learning_rate": 7.31578947368421e-05, "loss": 1.2, "step": 1761 }, { "epoch": 4.631163708086785, "high_lr": 7.31578947368421e-05, "low_lr": 1.4631578947368422e-06, "step": 1761 }, { "epoch": 4.631163708086785, "high_lr": 7.31578947368421e-05, "low_lr": 1.4631578947368422e-06, "step": 1761 }, { "epoch": 4.631163708086785, "high_lr": 7.31578947368421e-05, "low_lr": 1.4631578947368422e-06, "step": 1761 }, { "epoch": 4.631163708086785, "high_lr": 7.31578947368421e-05, "low_lr": 1.4631578947368422e-06, "step": 1761 }, { "epoch": 4.631163708086785, "high_lr": 7.31578947368421e-05, "low_lr": 1.4631578947368422e-06, "step": 1761 }, { "epoch": 4.631163708086785, "high_lr": 7.31578947368421e-05, "low_lr": 1.4631578947368422e-06, "step": 1761 }, { "epoch": 4.631163708086785, "high_lr": 7.31578947368421e-05, "low_lr": 1.4631578947368422e-06, "step": 1761 }, { "epoch": 4.631163708086785, "high_lr": 7.31578947368421e-05, "low_lr": 1.4631578947368422e-06, "step": 1761 }, { "epoch": 4.63379355687048, "grad_norm": 1.5356643199920654, "learning_rate": 7.263157894736843e-05, "loss": 1.2668, "step": 1762 }, { "epoch": 4.63379355687048, "high_lr": 7.263157894736843e-05, "low_lr": 1.4526315789473685e-06, "step": 1762 }, { "epoch": 4.63379355687048, "high_lr": 7.263157894736843e-05, "low_lr": 1.4526315789473685e-06, "step": 1762 }, { "epoch": 4.63379355687048, "high_lr": 7.263157894736843e-05, "low_lr": 1.4526315789473685e-06, "step": 1762 }, { "epoch": 4.63379355687048, "high_lr": 7.263157894736843e-05, "low_lr": 1.4526315789473685e-06, "step": 1762 }, { "epoch": 4.63379355687048, "high_lr": 7.263157894736843e-05, "low_lr": 1.4526315789473685e-06, "step": 1762 }, { "epoch": 4.63379355687048, "high_lr": 7.263157894736843e-05, "low_lr": 1.4526315789473685e-06, "step": 1762 }, { "epoch": 4.63379355687048, "high_lr": 7.263157894736843e-05, "low_lr": 1.4526315789473685e-06, "step": 1762 }, { "epoch": 4.63379355687048, "high_lr": 7.263157894736843e-05, "low_lr": 1.4526315789473685e-06, "step": 1762 }, { "epoch": 4.636423405654175, "grad_norm": 1.7029571533203125, "learning_rate": 7.210526315789474e-05, "loss": 1.2446, "step": 1763 }, { "epoch": 4.636423405654175, "high_lr": 7.210526315789474e-05, "low_lr": 1.442105263157895e-06, "step": 1763 }, { "epoch": 4.636423405654175, "high_lr": 7.210526315789474e-05, "low_lr": 1.442105263157895e-06, "step": 1763 }, { "epoch": 4.636423405654175, "high_lr": 7.210526315789474e-05, "low_lr": 1.442105263157895e-06, "step": 1763 }, { "epoch": 4.636423405654175, "high_lr": 7.210526315789474e-05, "low_lr": 1.442105263157895e-06, "step": 1763 }, { "epoch": 4.636423405654175, "high_lr": 7.210526315789474e-05, "low_lr": 1.442105263157895e-06, "step": 1763 }, { "epoch": 4.636423405654175, "high_lr": 7.210526315789474e-05, "low_lr": 1.442105263157895e-06, "step": 1763 }, { "epoch": 4.636423405654175, "high_lr": 7.210526315789474e-05, "low_lr": 1.442105263157895e-06, "step": 1763 }, { "epoch": 4.636423405654175, "high_lr": 7.210526315789474e-05, "low_lr": 1.442105263157895e-06, "step": 1763 }, { "epoch": 4.6390532544378695, "grad_norm": 1.6327396631240845, "learning_rate": 7.157894736842105e-05, "loss": 1.1984, "step": 1764 }, { "epoch": 4.6390532544378695, "high_lr": 7.157894736842105e-05, "low_lr": 1.4315789473684212e-06, "step": 1764 }, { "epoch": 4.6390532544378695, "high_lr": 7.157894736842105e-05, "low_lr": 1.4315789473684212e-06, "step": 1764 }, { "epoch": 4.6390532544378695, "high_lr": 7.157894736842105e-05, "low_lr": 1.4315789473684212e-06, "step": 1764 }, { "epoch": 4.6390532544378695, "high_lr": 7.157894736842105e-05, "low_lr": 1.4315789473684212e-06, "step": 1764 }, { "epoch": 4.6390532544378695, "high_lr": 7.157894736842105e-05, "low_lr": 1.4315789473684212e-06, "step": 1764 }, { "epoch": 4.6390532544378695, "high_lr": 7.157894736842105e-05, "low_lr": 1.4315789473684212e-06, "step": 1764 }, { "epoch": 4.6390532544378695, "high_lr": 7.157894736842105e-05, "low_lr": 1.4315789473684212e-06, "step": 1764 }, { "epoch": 4.6390532544378695, "high_lr": 7.157894736842105e-05, "low_lr": 1.4315789473684212e-06, "step": 1764 }, { "epoch": 4.641683103221565, "grad_norm": 1.6883735656738281, "learning_rate": 7.105263157894737e-05, "loss": 1.2464, "step": 1765 }, { "epoch": 4.641683103221565, "high_lr": 7.105263157894737e-05, "low_lr": 1.4210526315789475e-06, "step": 1765 }, { "epoch": 4.641683103221565, "high_lr": 7.105263157894737e-05, "low_lr": 1.4210526315789475e-06, "step": 1765 }, { "epoch": 4.641683103221565, "high_lr": 7.105263157894737e-05, "low_lr": 1.4210526315789475e-06, "step": 1765 }, { "epoch": 4.641683103221565, "high_lr": 7.105263157894737e-05, "low_lr": 1.4210526315789475e-06, "step": 1765 }, { "epoch": 4.641683103221565, "high_lr": 7.105263157894737e-05, "low_lr": 1.4210526315789475e-06, "step": 1765 }, { "epoch": 4.641683103221565, "high_lr": 7.105263157894737e-05, "low_lr": 1.4210526315789475e-06, "step": 1765 }, { "epoch": 4.641683103221565, "high_lr": 7.105263157894737e-05, "low_lr": 1.4210526315789475e-06, "step": 1765 }, { "epoch": 4.641683103221565, "high_lr": 7.105263157894737e-05, "low_lr": 1.4210526315789475e-06, "step": 1765 }, { "epoch": 4.64431295200526, "grad_norm": 1.5838812589645386, "learning_rate": 7.05263157894737e-05, "loss": 1.2236, "step": 1766 }, { "epoch": 4.64431295200526, "high_lr": 7.05263157894737e-05, "low_lr": 1.4105263157894738e-06, "step": 1766 }, { "epoch": 4.64431295200526, "high_lr": 7.05263157894737e-05, "low_lr": 1.4105263157894738e-06, "step": 1766 }, { "epoch": 4.64431295200526, "high_lr": 7.05263157894737e-05, "low_lr": 1.4105263157894738e-06, "step": 1766 }, { "epoch": 4.64431295200526, "high_lr": 7.05263157894737e-05, "low_lr": 1.4105263157894738e-06, "step": 1766 }, { "epoch": 4.64431295200526, "high_lr": 7.05263157894737e-05, "low_lr": 1.4105263157894738e-06, "step": 1766 }, { "epoch": 4.64431295200526, "high_lr": 7.05263157894737e-05, "low_lr": 1.4105263157894738e-06, "step": 1766 }, { "epoch": 4.64431295200526, "high_lr": 7.05263157894737e-05, "low_lr": 1.4105263157894738e-06, "step": 1766 }, { "epoch": 4.64431295200526, "high_lr": 7.05263157894737e-05, "low_lr": 1.4105263157894738e-06, "step": 1766 }, { "epoch": 4.646942800788954, "grad_norm": 1.6791688203811646, "learning_rate": 7.000000000000001e-05, "loss": 1.1848, "step": 1767 }, { "epoch": 4.646942800788954, "high_lr": 7.000000000000001e-05, "low_lr": 1.4000000000000001e-06, "step": 1767 }, { "epoch": 4.646942800788954, "high_lr": 7.000000000000001e-05, "low_lr": 1.4000000000000001e-06, "step": 1767 }, { "epoch": 4.646942800788954, "high_lr": 7.000000000000001e-05, "low_lr": 1.4000000000000001e-06, "step": 1767 }, { "epoch": 4.646942800788954, "high_lr": 7.000000000000001e-05, "low_lr": 1.4000000000000001e-06, "step": 1767 }, { "epoch": 4.646942800788954, "high_lr": 7.000000000000001e-05, "low_lr": 1.4000000000000001e-06, "step": 1767 }, { "epoch": 4.646942800788954, "high_lr": 7.000000000000001e-05, "low_lr": 1.4000000000000001e-06, "step": 1767 }, { "epoch": 4.646942800788954, "high_lr": 7.000000000000001e-05, "low_lr": 1.4000000000000001e-06, "step": 1767 }, { "epoch": 4.646942800788954, "high_lr": 7.000000000000001e-05, "low_lr": 1.4000000000000001e-06, "step": 1767 }, { "epoch": 4.64957264957265, "grad_norm": 1.6748144626617432, "learning_rate": 6.947368421052632e-05, "loss": 1.1814, "step": 1768 }, { "epoch": 4.64957264957265, "high_lr": 6.947368421052632e-05, "low_lr": 1.3894736842105263e-06, "step": 1768 }, { "epoch": 4.64957264957265, "high_lr": 6.947368421052632e-05, "low_lr": 1.3894736842105263e-06, "step": 1768 }, { "epoch": 4.64957264957265, "high_lr": 6.947368421052632e-05, "low_lr": 1.3894736842105263e-06, "step": 1768 }, { "epoch": 4.64957264957265, "high_lr": 6.947368421052632e-05, "low_lr": 1.3894736842105263e-06, "step": 1768 }, { "epoch": 4.64957264957265, "high_lr": 6.947368421052632e-05, "low_lr": 1.3894736842105263e-06, "step": 1768 }, { "epoch": 4.64957264957265, "high_lr": 6.947368421052632e-05, "low_lr": 1.3894736842105263e-06, "step": 1768 }, { "epoch": 4.64957264957265, "high_lr": 6.947368421052632e-05, "low_lr": 1.3894736842105263e-06, "step": 1768 }, { "epoch": 4.64957264957265, "high_lr": 6.947368421052632e-05, "low_lr": 1.3894736842105263e-06, "step": 1768 }, { "epoch": 4.652202498356345, "grad_norm": 1.42503821849823, "learning_rate": 6.894736842105263e-05, "loss": 1.2064, "step": 1769 }, { "epoch": 4.652202498356345, "high_lr": 6.894736842105263e-05, "low_lr": 1.3789473684210528e-06, "step": 1769 }, { "epoch": 4.652202498356345, "high_lr": 6.894736842105263e-05, "low_lr": 1.3789473684210528e-06, "step": 1769 }, { "epoch": 4.652202498356345, "high_lr": 6.894736842105263e-05, "low_lr": 1.3789473684210528e-06, "step": 1769 }, { "epoch": 4.652202498356345, "high_lr": 6.894736842105263e-05, "low_lr": 1.3789473684210528e-06, "step": 1769 }, { "epoch": 4.652202498356345, "high_lr": 6.894736842105263e-05, "low_lr": 1.3789473684210528e-06, "step": 1769 }, { "epoch": 4.652202498356345, "high_lr": 6.894736842105263e-05, "low_lr": 1.3789473684210528e-06, "step": 1769 }, { "epoch": 4.652202498356345, "high_lr": 6.894736842105263e-05, "low_lr": 1.3789473684210528e-06, "step": 1769 }, { "epoch": 4.652202498356345, "high_lr": 6.894736842105263e-05, "low_lr": 1.3789473684210528e-06, "step": 1769 }, { "epoch": 4.654832347140039, "grad_norm": 1.7519937753677368, "learning_rate": 6.842105263157896e-05, "loss": 1.2512, "step": 1770 }, { "epoch": 4.654832347140039, "high_lr": 6.842105263157896e-05, "low_lr": 1.3684210526315791e-06, "step": 1770 }, { "epoch": 4.654832347140039, "high_lr": 6.842105263157896e-05, "low_lr": 1.3684210526315791e-06, "step": 1770 }, { "epoch": 4.654832347140039, "high_lr": 6.842105263157896e-05, "low_lr": 1.3684210526315791e-06, "step": 1770 }, { "epoch": 4.654832347140039, "high_lr": 6.842105263157896e-05, "low_lr": 1.3684210526315791e-06, "step": 1770 }, { "epoch": 4.654832347140039, "high_lr": 6.842105263157896e-05, "low_lr": 1.3684210526315791e-06, "step": 1770 }, { "epoch": 4.654832347140039, "high_lr": 6.842105263157896e-05, "low_lr": 1.3684210526315791e-06, "step": 1770 }, { "epoch": 4.654832347140039, "high_lr": 6.842105263157896e-05, "low_lr": 1.3684210526315791e-06, "step": 1770 }, { "epoch": 4.654832347140039, "high_lr": 6.842105263157896e-05, "low_lr": 1.3684210526315791e-06, "step": 1770 }, { "epoch": 4.657462195923735, "grad_norm": 1.4318557977676392, "learning_rate": 6.789473684210526e-05, "loss": 1.2424, "step": 1771 }, { "epoch": 4.657462195923735, "high_lr": 6.789473684210526e-05, "low_lr": 1.3578947368421052e-06, "step": 1771 }, { "epoch": 4.657462195923735, "high_lr": 6.789473684210526e-05, "low_lr": 1.3578947368421052e-06, "step": 1771 }, { "epoch": 4.657462195923735, "high_lr": 6.789473684210526e-05, "low_lr": 1.3578947368421052e-06, "step": 1771 }, { "epoch": 4.657462195923735, "high_lr": 6.789473684210526e-05, "low_lr": 1.3578947368421052e-06, "step": 1771 }, { "epoch": 4.657462195923735, "high_lr": 6.789473684210526e-05, "low_lr": 1.3578947368421052e-06, "step": 1771 }, { "epoch": 4.657462195923735, "high_lr": 6.789473684210526e-05, "low_lr": 1.3578947368421052e-06, "step": 1771 }, { "epoch": 4.657462195923735, "high_lr": 6.789473684210526e-05, "low_lr": 1.3578947368421052e-06, "step": 1771 }, { "epoch": 4.657462195923735, "high_lr": 6.789473684210526e-05, "low_lr": 1.3578947368421052e-06, "step": 1771 }, { "epoch": 4.660092044707429, "grad_norm": 1.539652705192566, "learning_rate": 6.736842105263157e-05, "loss": 1.2336, "step": 1772 }, { "epoch": 4.660092044707429, "high_lr": 6.736842105263157e-05, "low_lr": 1.3473684210526316e-06, "step": 1772 }, { "epoch": 4.660092044707429, "high_lr": 6.736842105263157e-05, "low_lr": 1.3473684210526316e-06, "step": 1772 }, { "epoch": 4.660092044707429, "high_lr": 6.736842105263157e-05, "low_lr": 1.3473684210526316e-06, "step": 1772 }, { "epoch": 4.660092044707429, "high_lr": 6.736842105263157e-05, "low_lr": 1.3473684210526316e-06, "step": 1772 }, { "epoch": 4.660092044707429, "high_lr": 6.736842105263157e-05, "low_lr": 1.3473684210526316e-06, "step": 1772 }, { "epoch": 4.660092044707429, "high_lr": 6.736842105263157e-05, "low_lr": 1.3473684210526316e-06, "step": 1772 }, { "epoch": 4.660092044707429, "high_lr": 6.736842105263157e-05, "low_lr": 1.3473684210526316e-06, "step": 1772 }, { "epoch": 4.660092044707429, "high_lr": 6.736842105263157e-05, "low_lr": 1.3473684210526316e-06, "step": 1772 }, { "epoch": 4.662721893491124, "grad_norm": 1.5103962421417236, "learning_rate": 6.68421052631579e-05, "loss": 1.2183, "step": 1773 }, { "epoch": 4.662721893491124, "high_lr": 6.68421052631579e-05, "low_lr": 1.3368421052631581e-06, "step": 1773 }, { "epoch": 4.662721893491124, "high_lr": 6.68421052631579e-05, "low_lr": 1.3368421052631581e-06, "step": 1773 }, { "epoch": 4.662721893491124, "high_lr": 6.68421052631579e-05, "low_lr": 1.3368421052631581e-06, "step": 1773 }, { "epoch": 4.662721893491124, "high_lr": 6.68421052631579e-05, "low_lr": 1.3368421052631581e-06, "step": 1773 }, { "epoch": 4.662721893491124, "high_lr": 6.68421052631579e-05, "low_lr": 1.3368421052631581e-06, "step": 1773 }, { "epoch": 4.662721893491124, "high_lr": 6.68421052631579e-05, "low_lr": 1.3368421052631581e-06, "step": 1773 }, { "epoch": 4.662721893491124, "high_lr": 6.68421052631579e-05, "low_lr": 1.3368421052631581e-06, "step": 1773 }, { "epoch": 4.662721893491124, "high_lr": 6.68421052631579e-05, "low_lr": 1.3368421052631581e-06, "step": 1773 }, { "epoch": 4.665351742274819, "grad_norm": 1.577190637588501, "learning_rate": 6.631578947368421e-05, "loss": 1.2445, "step": 1774 }, { "epoch": 4.665351742274819, "high_lr": 6.631578947368421e-05, "low_lr": 1.3263157894736844e-06, "step": 1774 }, { "epoch": 4.665351742274819, "high_lr": 6.631578947368421e-05, "low_lr": 1.3263157894736844e-06, "step": 1774 }, { "epoch": 4.665351742274819, "high_lr": 6.631578947368421e-05, "low_lr": 1.3263157894736844e-06, "step": 1774 }, { "epoch": 4.665351742274819, "high_lr": 6.631578947368421e-05, "low_lr": 1.3263157894736844e-06, "step": 1774 }, { "epoch": 4.665351742274819, "high_lr": 6.631578947368421e-05, "low_lr": 1.3263157894736844e-06, "step": 1774 }, { "epoch": 4.665351742274819, "high_lr": 6.631578947368421e-05, "low_lr": 1.3263157894736844e-06, "step": 1774 }, { "epoch": 4.665351742274819, "high_lr": 6.631578947368421e-05, "low_lr": 1.3263157894736844e-06, "step": 1774 }, { "epoch": 4.665351742274819, "high_lr": 6.631578947368421e-05, "low_lr": 1.3263157894736844e-06, "step": 1774 }, { "epoch": 4.667981591058514, "grad_norm": 1.4346929788589478, "learning_rate": 6.578947368421052e-05, "loss": 1.1832, "step": 1775 }, { "epoch": 4.667981591058514, "high_lr": 6.578947368421052e-05, "low_lr": 1.3157894736842106e-06, "step": 1775 }, { "epoch": 4.667981591058514, "high_lr": 6.578947368421052e-05, "low_lr": 1.3157894736842106e-06, "step": 1775 }, { "epoch": 4.667981591058514, "high_lr": 6.578947368421052e-05, "low_lr": 1.3157894736842106e-06, "step": 1775 }, { "epoch": 4.667981591058514, "high_lr": 6.578947368421052e-05, "low_lr": 1.3157894736842106e-06, "step": 1775 }, { "epoch": 4.667981591058514, "high_lr": 6.578947368421052e-05, "low_lr": 1.3157894736842106e-06, "step": 1775 }, { "epoch": 4.667981591058514, "high_lr": 6.578947368421052e-05, "low_lr": 1.3157894736842106e-06, "step": 1775 }, { "epoch": 4.667981591058514, "high_lr": 6.578947368421052e-05, "low_lr": 1.3157894736842106e-06, "step": 1775 }, { "epoch": 4.667981591058514, "high_lr": 6.578947368421052e-05, "low_lr": 1.3157894736842106e-06, "step": 1775 }, { "epoch": 4.670611439842209, "grad_norm": 1.6455739736557007, "learning_rate": 6.526315789473684e-05, "loss": 1.1984, "step": 1776 }, { "epoch": 4.670611439842209, "high_lr": 6.526315789473684e-05, "low_lr": 1.3052631578947369e-06, "step": 1776 }, { "epoch": 4.670611439842209, "high_lr": 6.526315789473684e-05, "low_lr": 1.3052631578947369e-06, "step": 1776 }, { "epoch": 4.670611439842209, "high_lr": 6.526315789473684e-05, "low_lr": 1.3052631578947369e-06, "step": 1776 }, { "epoch": 4.670611439842209, "high_lr": 6.526315789473684e-05, "low_lr": 1.3052631578947369e-06, "step": 1776 }, { "epoch": 4.670611439842209, "high_lr": 6.526315789473684e-05, "low_lr": 1.3052631578947369e-06, "step": 1776 }, { "epoch": 4.670611439842209, "high_lr": 6.526315789473684e-05, "low_lr": 1.3052631578947369e-06, "step": 1776 }, { "epoch": 4.670611439842209, "high_lr": 6.526315789473684e-05, "low_lr": 1.3052631578947369e-06, "step": 1776 }, { "epoch": 4.670611439842209, "high_lr": 6.526315789473684e-05, "low_lr": 1.3052631578947369e-06, "step": 1776 }, { "epoch": 4.6732412886259045, "grad_norm": 1.6151623725891113, "learning_rate": 6.473684210526316e-05, "loss": 1.2542, "step": 1777 }, { "epoch": 4.6732412886259045, "high_lr": 6.473684210526316e-05, "low_lr": 1.2947368421052634e-06, "step": 1777 }, { "epoch": 4.6732412886259045, "high_lr": 6.473684210526316e-05, "low_lr": 1.2947368421052634e-06, "step": 1777 }, { "epoch": 4.6732412886259045, "high_lr": 6.473684210526316e-05, "low_lr": 1.2947368421052634e-06, "step": 1777 }, { "epoch": 4.6732412886259045, "high_lr": 6.473684210526316e-05, "low_lr": 1.2947368421052634e-06, "step": 1777 }, { "epoch": 4.6732412886259045, "high_lr": 6.473684210526316e-05, "low_lr": 1.2947368421052634e-06, "step": 1777 }, { "epoch": 4.6732412886259045, "high_lr": 6.473684210526316e-05, "low_lr": 1.2947368421052634e-06, "step": 1777 }, { "epoch": 4.6732412886259045, "high_lr": 6.473684210526316e-05, "low_lr": 1.2947368421052634e-06, "step": 1777 }, { "epoch": 4.6732412886259045, "high_lr": 6.473684210526316e-05, "low_lr": 1.2947368421052634e-06, "step": 1777 }, { "epoch": 4.675871137409599, "grad_norm": 1.5017870664596558, "learning_rate": 6.421052631578946e-05, "loss": 1.2247, "step": 1778 }, { "epoch": 4.675871137409599, "high_lr": 6.421052631578946e-05, "low_lr": 1.2842105263157895e-06, "step": 1778 }, { "epoch": 4.675871137409599, "high_lr": 6.421052631578946e-05, "low_lr": 1.2842105263157895e-06, "step": 1778 }, { "epoch": 4.675871137409599, "high_lr": 6.421052631578946e-05, "low_lr": 1.2842105263157895e-06, "step": 1778 }, { "epoch": 4.675871137409599, "high_lr": 6.421052631578946e-05, "low_lr": 1.2842105263157895e-06, "step": 1778 }, { "epoch": 4.675871137409599, "high_lr": 6.421052631578946e-05, "low_lr": 1.2842105263157895e-06, "step": 1778 }, { "epoch": 4.675871137409599, "high_lr": 6.421052631578946e-05, "low_lr": 1.2842105263157895e-06, "step": 1778 }, { "epoch": 4.675871137409599, "high_lr": 6.421052631578946e-05, "low_lr": 1.2842105263157895e-06, "step": 1778 }, { "epoch": 4.675871137409599, "high_lr": 6.421052631578946e-05, "low_lr": 1.2842105263157895e-06, "step": 1778 }, { "epoch": 4.678500986193294, "grad_norm": 1.5715364217758179, "learning_rate": 6.368421052631579e-05, "loss": 1.208, "step": 1779 }, { "epoch": 4.678500986193294, "high_lr": 6.368421052631579e-05, "low_lr": 1.2736842105263159e-06, "step": 1779 }, { "epoch": 4.678500986193294, "high_lr": 6.368421052631579e-05, "low_lr": 1.2736842105263159e-06, "step": 1779 }, { "epoch": 4.678500986193294, "high_lr": 6.368421052631579e-05, "low_lr": 1.2736842105263159e-06, "step": 1779 }, { "epoch": 4.678500986193294, "high_lr": 6.368421052631579e-05, "low_lr": 1.2736842105263159e-06, "step": 1779 }, { "epoch": 4.678500986193294, "high_lr": 6.368421052631579e-05, "low_lr": 1.2736842105263159e-06, "step": 1779 }, { "epoch": 4.678500986193294, "high_lr": 6.368421052631579e-05, "low_lr": 1.2736842105263159e-06, "step": 1779 }, { "epoch": 4.678500986193294, "high_lr": 6.368421052631579e-05, "low_lr": 1.2736842105263159e-06, "step": 1779 }, { "epoch": 4.678500986193294, "high_lr": 6.368421052631579e-05, "low_lr": 1.2736842105263159e-06, "step": 1779 }, { "epoch": 4.6811308349769885, "grad_norm": 1.5146335363388062, "learning_rate": 6.31578947368421e-05, "loss": 1.2493, "step": 1780 }, { "epoch": 4.6811308349769885, "high_lr": 6.31578947368421e-05, "low_lr": 1.2631578947368422e-06, "step": 1780 }, { "epoch": 4.6811308349769885, "high_lr": 6.31578947368421e-05, "low_lr": 1.2631578947368422e-06, "step": 1780 }, { "epoch": 4.6811308349769885, "high_lr": 6.31578947368421e-05, "low_lr": 1.2631578947368422e-06, "step": 1780 }, { "epoch": 4.6811308349769885, "high_lr": 6.31578947368421e-05, "low_lr": 1.2631578947368422e-06, "step": 1780 }, { "epoch": 4.6811308349769885, "high_lr": 6.31578947368421e-05, "low_lr": 1.2631578947368422e-06, "step": 1780 }, { "epoch": 4.6811308349769885, "high_lr": 6.31578947368421e-05, "low_lr": 1.2631578947368422e-06, "step": 1780 }, { "epoch": 4.6811308349769885, "high_lr": 6.31578947368421e-05, "low_lr": 1.2631578947368422e-06, "step": 1780 }, { "epoch": 4.6811308349769885, "high_lr": 6.31578947368421e-05, "low_lr": 1.2631578947368422e-06, "step": 1780 }, { "epoch": 4.683760683760684, "grad_norm": 1.4685746431350708, "learning_rate": 6.263157894736843e-05, "loss": 1.2214, "step": 1781 }, { "epoch": 4.683760683760684, "high_lr": 6.263157894736843e-05, "low_lr": 1.2526315789473687e-06, "step": 1781 }, { "epoch": 4.683760683760684, "high_lr": 6.263157894736843e-05, "low_lr": 1.2526315789473687e-06, "step": 1781 }, { "epoch": 4.683760683760684, "high_lr": 6.263157894736843e-05, "low_lr": 1.2526315789473687e-06, "step": 1781 }, { "epoch": 4.683760683760684, "high_lr": 6.263157894736843e-05, "low_lr": 1.2526315789473687e-06, "step": 1781 }, { "epoch": 4.683760683760684, "high_lr": 6.263157894736843e-05, "low_lr": 1.2526315789473687e-06, "step": 1781 }, { "epoch": 4.683760683760684, "high_lr": 6.263157894736843e-05, "low_lr": 1.2526315789473687e-06, "step": 1781 }, { "epoch": 4.683760683760684, "high_lr": 6.263157894736843e-05, "low_lr": 1.2526315789473687e-06, "step": 1781 }, { "epoch": 4.683760683760684, "high_lr": 6.263157894736843e-05, "low_lr": 1.2526315789473687e-06, "step": 1781 }, { "epoch": 4.686390532544379, "grad_norm": 1.6440409421920776, "learning_rate": 6.210526315789474e-05, "loss": 1.1739, "step": 1782 }, { "epoch": 4.686390532544379, "high_lr": 6.210526315789474e-05, "low_lr": 1.2421052631578948e-06, "step": 1782 }, { "epoch": 4.686390532544379, "high_lr": 6.210526315789474e-05, "low_lr": 1.2421052631578948e-06, "step": 1782 }, { "epoch": 4.686390532544379, "high_lr": 6.210526315789474e-05, "low_lr": 1.2421052631578948e-06, "step": 1782 }, { "epoch": 4.686390532544379, "high_lr": 6.210526315789474e-05, "low_lr": 1.2421052631578948e-06, "step": 1782 }, { "epoch": 4.686390532544379, "high_lr": 6.210526315789474e-05, "low_lr": 1.2421052631578948e-06, "step": 1782 }, { "epoch": 4.686390532544379, "high_lr": 6.210526315789474e-05, "low_lr": 1.2421052631578948e-06, "step": 1782 }, { "epoch": 4.686390532544379, "high_lr": 6.210526315789474e-05, "low_lr": 1.2421052631578948e-06, "step": 1782 }, { "epoch": 4.686390532544379, "high_lr": 6.210526315789474e-05, "low_lr": 1.2421052631578948e-06, "step": 1782 }, { "epoch": 4.689020381328073, "grad_norm": 1.6437931060791016, "learning_rate": 6.157894736842106e-05, "loss": 1.2407, "step": 1783 }, { "epoch": 4.689020381328073, "high_lr": 6.157894736842106e-05, "low_lr": 1.2315789473684212e-06, "step": 1783 }, { "epoch": 4.689020381328073, "high_lr": 6.157894736842106e-05, "low_lr": 1.2315789473684212e-06, "step": 1783 }, { "epoch": 4.689020381328073, "high_lr": 6.157894736842106e-05, "low_lr": 1.2315789473684212e-06, "step": 1783 }, { "epoch": 4.689020381328073, "high_lr": 6.157894736842106e-05, "low_lr": 1.2315789473684212e-06, "step": 1783 }, { "epoch": 4.689020381328073, "high_lr": 6.157894736842106e-05, "low_lr": 1.2315789473684212e-06, "step": 1783 }, { "epoch": 4.689020381328073, "high_lr": 6.157894736842106e-05, "low_lr": 1.2315789473684212e-06, "step": 1783 }, { "epoch": 4.689020381328073, "high_lr": 6.157894736842106e-05, "low_lr": 1.2315789473684212e-06, "step": 1783 }, { "epoch": 4.689020381328073, "high_lr": 6.157894736842106e-05, "low_lr": 1.2315789473684212e-06, "step": 1783 }, { "epoch": 4.691650230111769, "grad_norm": 1.6867625713348389, "learning_rate": 6.105263157894737e-05, "loss": 1.1678, "step": 1784 }, { "epoch": 4.691650230111769, "high_lr": 6.105263157894737e-05, "low_lr": 1.2210526315789475e-06, "step": 1784 }, { "epoch": 4.691650230111769, "high_lr": 6.105263157894737e-05, "low_lr": 1.2210526315789475e-06, "step": 1784 }, { "epoch": 4.691650230111769, "high_lr": 6.105263157894737e-05, "low_lr": 1.2210526315789475e-06, "step": 1784 }, { "epoch": 4.691650230111769, "high_lr": 6.105263157894737e-05, "low_lr": 1.2210526315789475e-06, "step": 1784 }, { "epoch": 4.691650230111769, "high_lr": 6.105263157894737e-05, "low_lr": 1.2210526315789475e-06, "step": 1784 }, { "epoch": 4.691650230111769, "high_lr": 6.105263157894737e-05, "low_lr": 1.2210526315789475e-06, "step": 1784 }, { "epoch": 4.691650230111769, "high_lr": 6.105263157894737e-05, "low_lr": 1.2210526315789475e-06, "step": 1784 }, { "epoch": 4.691650230111769, "high_lr": 6.105263157894737e-05, "low_lr": 1.2210526315789475e-06, "step": 1784 }, { "epoch": 4.6942800788954635, "grad_norm": 1.5684926509857178, "learning_rate": 6.052631578947369e-05, "loss": 1.2521, "step": 1785 }, { "epoch": 4.6942800788954635, "high_lr": 6.052631578947369e-05, "low_lr": 1.2105263157894738e-06, "step": 1785 }, { "epoch": 4.6942800788954635, "high_lr": 6.052631578947369e-05, "low_lr": 1.2105263157894738e-06, "step": 1785 }, { "epoch": 4.6942800788954635, "high_lr": 6.052631578947369e-05, "low_lr": 1.2105263157894738e-06, "step": 1785 }, { "epoch": 4.6942800788954635, "high_lr": 6.052631578947369e-05, "low_lr": 1.2105263157894738e-06, "step": 1785 }, { "epoch": 4.6942800788954635, "high_lr": 6.052631578947369e-05, "low_lr": 1.2105263157894738e-06, "step": 1785 }, { "epoch": 4.6942800788954635, "high_lr": 6.052631578947369e-05, "low_lr": 1.2105263157894738e-06, "step": 1785 }, { "epoch": 4.6942800788954635, "high_lr": 6.052631578947369e-05, "low_lr": 1.2105263157894738e-06, "step": 1785 }, { "epoch": 4.6942800788954635, "high_lr": 6.052631578947369e-05, "low_lr": 1.2105263157894738e-06, "step": 1785 }, { "epoch": 4.696909927679158, "grad_norm": 1.5172704458236694, "learning_rate": 6e-05, "loss": 1.2188, "step": 1786 }, { "epoch": 4.696909927679158, "high_lr": 6e-05, "low_lr": 1.2000000000000002e-06, "step": 1786 }, { "epoch": 4.696909927679158, "high_lr": 6e-05, "low_lr": 1.2000000000000002e-06, "step": 1786 }, { "epoch": 4.696909927679158, "high_lr": 6e-05, "low_lr": 1.2000000000000002e-06, "step": 1786 }, { "epoch": 4.696909927679158, "high_lr": 6e-05, "low_lr": 1.2000000000000002e-06, "step": 1786 }, { "epoch": 4.696909927679158, "high_lr": 6e-05, "low_lr": 1.2000000000000002e-06, "step": 1786 }, { "epoch": 4.696909927679158, "high_lr": 6e-05, "low_lr": 1.2000000000000002e-06, "step": 1786 }, { "epoch": 4.696909927679158, "high_lr": 6e-05, "low_lr": 1.2000000000000002e-06, "step": 1786 }, { "epoch": 4.696909927679158, "high_lr": 6e-05, "low_lr": 1.2000000000000002e-06, "step": 1786 }, { "epoch": 4.699539776462854, "grad_norm": 1.485851764678955, "learning_rate": 5.947368421052632e-05, "loss": 1.1884, "step": 1787 }, { "epoch": 4.699539776462854, "high_lr": 5.947368421052632e-05, "low_lr": 1.1894736842105265e-06, "step": 1787 }, { "epoch": 4.699539776462854, "high_lr": 5.947368421052632e-05, "low_lr": 1.1894736842105265e-06, "step": 1787 }, { "epoch": 4.699539776462854, "high_lr": 5.947368421052632e-05, "low_lr": 1.1894736842105265e-06, "step": 1787 }, { "epoch": 4.699539776462854, "high_lr": 5.947368421052632e-05, "low_lr": 1.1894736842105265e-06, "step": 1787 }, { "epoch": 4.699539776462854, "high_lr": 5.947368421052632e-05, "low_lr": 1.1894736842105265e-06, "step": 1787 }, { "epoch": 4.699539776462854, "high_lr": 5.947368421052632e-05, "low_lr": 1.1894736842105265e-06, "step": 1787 }, { "epoch": 4.699539776462854, "high_lr": 5.947368421052632e-05, "low_lr": 1.1894736842105265e-06, "step": 1787 }, { "epoch": 4.699539776462854, "high_lr": 5.947368421052632e-05, "low_lr": 1.1894736842105265e-06, "step": 1787 }, { "epoch": 4.702169625246548, "grad_norm": 1.542825698852539, "learning_rate": 5.8947368421052634e-05, "loss": 1.1931, "step": 1788 }, { "epoch": 4.702169625246548, "high_lr": 5.8947368421052634e-05, "low_lr": 1.1789473684210526e-06, "step": 1788 }, { "epoch": 4.702169625246548, "high_lr": 5.8947368421052634e-05, "low_lr": 1.1789473684210526e-06, "step": 1788 }, { "epoch": 4.702169625246548, "high_lr": 5.8947368421052634e-05, "low_lr": 1.1789473684210526e-06, "step": 1788 }, { "epoch": 4.702169625246548, "high_lr": 5.8947368421052634e-05, "low_lr": 1.1789473684210526e-06, "step": 1788 }, { "epoch": 4.702169625246548, "high_lr": 5.8947368421052634e-05, "low_lr": 1.1789473684210526e-06, "step": 1788 }, { "epoch": 4.702169625246548, "high_lr": 5.8947368421052634e-05, "low_lr": 1.1789473684210526e-06, "step": 1788 }, { "epoch": 4.702169625246548, "high_lr": 5.8947368421052634e-05, "low_lr": 1.1789473684210526e-06, "step": 1788 }, { "epoch": 4.702169625246548, "high_lr": 5.8947368421052634e-05, "low_lr": 1.1789473684210526e-06, "step": 1788 }, { "epoch": 4.704799474030243, "grad_norm": 1.4580672979354858, "learning_rate": 5.8421052631578954e-05, "loss": 1.2192, "step": 1789 }, { "epoch": 4.704799474030243, "high_lr": 5.8421052631578954e-05, "low_lr": 1.1684210526315791e-06, "step": 1789 }, { "epoch": 4.704799474030243, "high_lr": 5.8421052631578954e-05, "low_lr": 1.1684210526315791e-06, "step": 1789 }, { "epoch": 4.704799474030243, "high_lr": 5.8421052631578954e-05, "low_lr": 1.1684210526315791e-06, "step": 1789 }, { "epoch": 4.704799474030243, "high_lr": 5.8421052631578954e-05, "low_lr": 1.1684210526315791e-06, "step": 1789 }, { "epoch": 4.704799474030243, "high_lr": 5.8421052631578954e-05, "low_lr": 1.1684210526315791e-06, "step": 1789 }, { "epoch": 4.704799474030243, "high_lr": 5.8421052631578954e-05, "low_lr": 1.1684210526315791e-06, "step": 1789 }, { "epoch": 4.704799474030243, "high_lr": 5.8421052631578954e-05, "low_lr": 1.1684210526315791e-06, "step": 1789 }, { "epoch": 4.704799474030243, "high_lr": 5.8421052631578954e-05, "low_lr": 1.1684210526315791e-06, "step": 1789 }, { "epoch": 4.7074293228139386, "grad_norm": 1.5951370000839233, "learning_rate": 5.789473684210527e-05, "loss": 1.2283, "step": 1790 }, { "epoch": 4.7074293228139386, "high_lr": 5.789473684210527e-05, "low_lr": 1.1578947368421053e-06, "step": 1790 }, { "epoch": 4.7074293228139386, "high_lr": 5.789473684210527e-05, "low_lr": 1.1578947368421053e-06, "step": 1790 }, { "epoch": 4.7074293228139386, "high_lr": 5.789473684210527e-05, "low_lr": 1.1578947368421053e-06, "step": 1790 }, { "epoch": 4.7074293228139386, "high_lr": 5.789473684210527e-05, "low_lr": 1.1578947368421053e-06, "step": 1790 }, { "epoch": 4.7074293228139386, "high_lr": 5.789473684210527e-05, "low_lr": 1.1578947368421053e-06, "step": 1790 }, { "epoch": 4.7074293228139386, "high_lr": 5.789473684210527e-05, "low_lr": 1.1578947368421053e-06, "step": 1790 }, { "epoch": 4.7074293228139386, "high_lr": 5.789473684210527e-05, "low_lr": 1.1578947368421053e-06, "step": 1790 }, { "epoch": 4.7074293228139386, "high_lr": 5.789473684210527e-05, "low_lr": 1.1578947368421053e-06, "step": 1790 }, { "epoch": 4.710059171597633, "grad_norm": 1.511790156364441, "learning_rate": 5.736842105263158e-05, "loss": 1.2441, "step": 1791 }, { "epoch": 4.710059171597633, "high_lr": 5.736842105263158e-05, "low_lr": 1.1473684210526316e-06, "step": 1791 }, { "epoch": 4.710059171597633, "high_lr": 5.736842105263158e-05, "low_lr": 1.1473684210526316e-06, "step": 1791 }, { "epoch": 4.710059171597633, "high_lr": 5.736842105263158e-05, "low_lr": 1.1473684210526316e-06, "step": 1791 }, { "epoch": 4.710059171597633, "high_lr": 5.736842105263158e-05, "low_lr": 1.1473684210526316e-06, "step": 1791 }, { "epoch": 4.710059171597633, "high_lr": 5.736842105263158e-05, "low_lr": 1.1473684210526316e-06, "step": 1791 }, { "epoch": 4.710059171597633, "high_lr": 5.736842105263158e-05, "low_lr": 1.1473684210526316e-06, "step": 1791 }, { "epoch": 4.710059171597633, "high_lr": 5.736842105263158e-05, "low_lr": 1.1473684210526316e-06, "step": 1791 }, { "epoch": 4.710059171597633, "high_lr": 5.736842105263158e-05, "low_lr": 1.1473684210526316e-06, "step": 1791 }, { "epoch": 4.712689020381328, "grad_norm": 1.5266692638397217, "learning_rate": 5.68421052631579e-05, "loss": 1.2198, "step": 1792 }, { "epoch": 4.712689020381328, "high_lr": 5.68421052631579e-05, "low_lr": 1.136842105263158e-06, "step": 1792 }, { "epoch": 4.712689020381328, "high_lr": 5.68421052631579e-05, "low_lr": 1.136842105263158e-06, "step": 1792 }, { "epoch": 4.712689020381328, "high_lr": 5.68421052631579e-05, "low_lr": 1.136842105263158e-06, "step": 1792 }, { "epoch": 4.712689020381328, "high_lr": 5.68421052631579e-05, "low_lr": 1.136842105263158e-06, "step": 1792 }, { "epoch": 4.712689020381328, "high_lr": 5.68421052631579e-05, "low_lr": 1.136842105263158e-06, "step": 1792 }, { "epoch": 4.712689020381328, "high_lr": 5.68421052631579e-05, "low_lr": 1.136842105263158e-06, "step": 1792 }, { "epoch": 4.712689020381328, "high_lr": 5.68421052631579e-05, "low_lr": 1.136842105263158e-06, "step": 1792 }, { "epoch": 4.712689020381328, "high_lr": 5.68421052631579e-05, "low_lr": 1.136842105263158e-06, "step": 1792 }, { "epoch": 4.7153188691650225, "grad_norm": 1.5269373655319214, "learning_rate": 5.6315789473684206e-05, "loss": 1.2131, "step": 1793 }, { "epoch": 4.7153188691650225, "high_lr": 5.6315789473684206e-05, "low_lr": 1.1263157894736842e-06, "step": 1793 }, { "epoch": 4.7153188691650225, "high_lr": 5.6315789473684206e-05, "low_lr": 1.1263157894736842e-06, "step": 1793 }, { "epoch": 4.7153188691650225, "high_lr": 5.6315789473684206e-05, "low_lr": 1.1263157894736842e-06, "step": 1793 }, { "epoch": 4.7153188691650225, "high_lr": 5.6315789473684206e-05, "low_lr": 1.1263157894736842e-06, "step": 1793 }, { "epoch": 4.7153188691650225, "high_lr": 5.6315789473684206e-05, "low_lr": 1.1263157894736842e-06, "step": 1793 }, { "epoch": 4.7153188691650225, "high_lr": 5.6315789473684206e-05, "low_lr": 1.1263157894736842e-06, "step": 1793 }, { "epoch": 4.7153188691650225, "high_lr": 5.6315789473684206e-05, "low_lr": 1.1263157894736842e-06, "step": 1793 }, { "epoch": 4.7153188691650225, "high_lr": 5.6315789473684206e-05, "low_lr": 1.1263157894736842e-06, "step": 1793 }, { "epoch": 4.717948717948718, "grad_norm": 1.5177338123321533, "learning_rate": 5.5789473684210526e-05, "loss": 1.2279, "step": 1794 }, { "epoch": 4.717948717948718, "high_lr": 5.5789473684210526e-05, "low_lr": 1.1157894736842106e-06, "step": 1794 }, { "epoch": 4.717948717948718, "high_lr": 5.5789473684210526e-05, "low_lr": 1.1157894736842106e-06, "step": 1794 }, { "epoch": 4.717948717948718, "high_lr": 5.5789473684210526e-05, "low_lr": 1.1157894736842106e-06, "step": 1794 }, { "epoch": 4.717948717948718, "high_lr": 5.5789473684210526e-05, "low_lr": 1.1157894736842106e-06, "step": 1794 }, { "epoch": 4.717948717948718, "high_lr": 5.5789473684210526e-05, "low_lr": 1.1157894736842106e-06, "step": 1794 }, { "epoch": 4.717948717948718, "high_lr": 5.5789473684210526e-05, "low_lr": 1.1157894736842106e-06, "step": 1794 }, { "epoch": 4.717948717948718, "high_lr": 5.5789473684210526e-05, "low_lr": 1.1157894736842106e-06, "step": 1794 }, { "epoch": 4.717948717948718, "high_lr": 5.5789473684210526e-05, "low_lr": 1.1157894736842106e-06, "step": 1794 }, { "epoch": 4.720578566732413, "grad_norm": 1.61833655834198, "learning_rate": 5.526315789473684e-05, "loss": 1.1762, "step": 1795 }, { "epoch": 4.720578566732413, "high_lr": 5.526315789473684e-05, "low_lr": 1.1052631578947369e-06, "step": 1795 }, { "epoch": 4.720578566732413, "high_lr": 5.526315789473684e-05, "low_lr": 1.1052631578947369e-06, "step": 1795 }, { "epoch": 4.720578566732413, "high_lr": 5.526315789473684e-05, "low_lr": 1.1052631578947369e-06, "step": 1795 }, { "epoch": 4.720578566732413, "high_lr": 5.526315789473684e-05, "low_lr": 1.1052631578947369e-06, "step": 1795 }, { "epoch": 4.720578566732413, "high_lr": 5.526315789473684e-05, "low_lr": 1.1052631578947369e-06, "step": 1795 }, { "epoch": 4.720578566732413, "high_lr": 5.526315789473684e-05, "low_lr": 1.1052631578947369e-06, "step": 1795 }, { "epoch": 4.720578566732413, "high_lr": 5.526315789473684e-05, "low_lr": 1.1052631578947369e-06, "step": 1795 }, { "epoch": 4.720578566732413, "high_lr": 5.526315789473684e-05, "low_lr": 1.1052631578947369e-06, "step": 1795 }, { "epoch": 4.723208415516108, "grad_norm": 1.5420902967453003, "learning_rate": 5.473684210526316e-05, "loss": 1.1901, "step": 1796 }, { "epoch": 4.723208415516108, "high_lr": 5.473684210526316e-05, "low_lr": 1.0947368421052632e-06, "step": 1796 }, { "epoch": 4.723208415516108, "high_lr": 5.473684210526316e-05, "low_lr": 1.0947368421052632e-06, "step": 1796 }, { "epoch": 4.723208415516108, "high_lr": 5.473684210526316e-05, "low_lr": 1.0947368421052632e-06, "step": 1796 }, { "epoch": 4.723208415516108, "high_lr": 5.473684210526316e-05, "low_lr": 1.0947368421052632e-06, "step": 1796 }, { "epoch": 4.723208415516108, "high_lr": 5.473684210526316e-05, "low_lr": 1.0947368421052632e-06, "step": 1796 }, { "epoch": 4.723208415516108, "high_lr": 5.473684210526316e-05, "low_lr": 1.0947368421052632e-06, "step": 1796 }, { "epoch": 4.723208415516108, "high_lr": 5.473684210526316e-05, "low_lr": 1.0947368421052632e-06, "step": 1796 }, { "epoch": 4.723208415516108, "high_lr": 5.473684210526316e-05, "low_lr": 1.0947368421052632e-06, "step": 1796 }, { "epoch": 4.725838264299803, "grad_norm": 1.4761364459991455, "learning_rate": 5.421052631578947e-05, "loss": 1.1768, "step": 1797 }, { "epoch": 4.725838264299803, "high_lr": 5.421052631578947e-05, "low_lr": 1.0842105263157895e-06, "step": 1797 }, { "epoch": 4.725838264299803, "high_lr": 5.421052631578947e-05, "low_lr": 1.0842105263157895e-06, "step": 1797 }, { "epoch": 4.725838264299803, "high_lr": 5.421052631578947e-05, "low_lr": 1.0842105263157895e-06, "step": 1797 }, { "epoch": 4.725838264299803, "high_lr": 5.421052631578947e-05, "low_lr": 1.0842105263157895e-06, "step": 1797 }, { "epoch": 4.725838264299803, "high_lr": 5.421052631578947e-05, "low_lr": 1.0842105263157895e-06, "step": 1797 }, { "epoch": 4.725838264299803, "high_lr": 5.421052631578947e-05, "low_lr": 1.0842105263157895e-06, "step": 1797 }, { "epoch": 4.725838264299803, "high_lr": 5.421052631578947e-05, "low_lr": 1.0842105263157895e-06, "step": 1797 }, { "epoch": 4.725838264299803, "high_lr": 5.421052631578947e-05, "low_lr": 1.0842105263157895e-06, "step": 1797 }, { "epoch": 4.728468113083498, "grad_norm": 1.5077128410339355, "learning_rate": 5.368421052631579e-05, "loss": 1.2006, "step": 1798 }, { "epoch": 4.728468113083498, "high_lr": 5.368421052631579e-05, "low_lr": 1.0736842105263159e-06, "step": 1798 }, { "epoch": 4.728468113083498, "high_lr": 5.368421052631579e-05, "low_lr": 1.0736842105263159e-06, "step": 1798 }, { "epoch": 4.728468113083498, "high_lr": 5.368421052631579e-05, "low_lr": 1.0736842105263159e-06, "step": 1798 }, { "epoch": 4.728468113083498, "high_lr": 5.368421052631579e-05, "low_lr": 1.0736842105263159e-06, "step": 1798 }, { "epoch": 4.728468113083498, "high_lr": 5.368421052631579e-05, "low_lr": 1.0736842105263159e-06, "step": 1798 }, { "epoch": 4.728468113083498, "high_lr": 5.368421052631579e-05, "low_lr": 1.0736842105263159e-06, "step": 1798 }, { "epoch": 4.728468113083498, "high_lr": 5.368421052631579e-05, "low_lr": 1.0736842105263159e-06, "step": 1798 }, { "epoch": 4.728468113083498, "high_lr": 5.368421052631579e-05, "low_lr": 1.0736842105263159e-06, "step": 1798 }, { "epoch": 4.731097961867192, "grad_norm": 1.6717760562896729, "learning_rate": 5.3157894736842104e-05, "loss": 1.2318, "step": 1799 }, { "epoch": 4.731097961867192, "high_lr": 5.3157894736842104e-05, "low_lr": 1.0631578947368422e-06, "step": 1799 }, { "epoch": 4.731097961867192, "high_lr": 5.3157894736842104e-05, "low_lr": 1.0631578947368422e-06, "step": 1799 }, { "epoch": 4.731097961867192, "high_lr": 5.3157894736842104e-05, "low_lr": 1.0631578947368422e-06, "step": 1799 }, { "epoch": 4.731097961867192, "high_lr": 5.3157894736842104e-05, "low_lr": 1.0631578947368422e-06, "step": 1799 }, { "epoch": 4.731097961867192, "high_lr": 5.3157894736842104e-05, "low_lr": 1.0631578947368422e-06, "step": 1799 }, { "epoch": 4.731097961867192, "high_lr": 5.3157894736842104e-05, "low_lr": 1.0631578947368422e-06, "step": 1799 }, { "epoch": 4.731097961867192, "high_lr": 5.3157894736842104e-05, "low_lr": 1.0631578947368422e-06, "step": 1799 }, { "epoch": 4.731097961867192, "high_lr": 5.3157894736842104e-05, "low_lr": 1.0631578947368422e-06, "step": 1799 }, { "epoch": 4.733727810650888, "grad_norm": 1.5752482414245605, "learning_rate": 5.263157894736842e-05, "loss": 1.1888, "step": 1800 }, { "epoch": 4.733727810650888, "high_lr": 5.263157894736842e-05, "low_lr": 1.0526315789473685e-06, "step": 1800 }, { "epoch": 4.733727810650888, "high_lr": 5.263157894736842e-05, "low_lr": 1.0526315789473685e-06, "step": 1800 }, { "epoch": 4.733727810650888, "high_lr": 5.263157894736842e-05, "low_lr": 1.0526315789473685e-06, "step": 1800 }, { "epoch": 4.733727810650888, "high_lr": 5.263157894736842e-05, "low_lr": 1.0526315789473685e-06, "step": 1800 }, { "epoch": 4.733727810650888, "high_lr": 5.263157894736842e-05, "low_lr": 1.0526315789473685e-06, "step": 1800 }, { "epoch": 4.733727810650888, "high_lr": 5.263157894736842e-05, "low_lr": 1.0526315789473685e-06, "step": 1800 }, { "epoch": 4.733727810650888, "high_lr": 5.263157894736842e-05, "low_lr": 1.0526315789473685e-06, "step": 1800 }, { "epoch": 4.733727810650888, "high_lr": 5.263157894736842e-05, "low_lr": 1.0526315789473685e-06, "step": 1800 }, { "epoch": 4.736357659434582, "grad_norm": 1.6021584272384644, "learning_rate": 5.210526315789474e-05, "loss": 1.2797, "step": 1801 }, { "epoch": 4.736357659434582, "high_lr": 5.210526315789474e-05, "low_lr": 1.0421052631578949e-06, "step": 1801 }, { "epoch": 4.736357659434582, "high_lr": 5.210526315789474e-05, "low_lr": 1.0421052631578949e-06, "step": 1801 }, { "epoch": 4.736357659434582, "high_lr": 5.210526315789474e-05, "low_lr": 1.0421052631578949e-06, "step": 1801 }, { "epoch": 4.736357659434582, "high_lr": 5.210526315789474e-05, "low_lr": 1.0421052631578949e-06, "step": 1801 }, { "epoch": 4.736357659434582, "high_lr": 5.210526315789474e-05, "low_lr": 1.0421052631578949e-06, "step": 1801 }, { "epoch": 4.736357659434582, "high_lr": 5.210526315789474e-05, "low_lr": 1.0421052631578949e-06, "step": 1801 }, { "epoch": 4.736357659434582, "high_lr": 5.210526315789474e-05, "low_lr": 1.0421052631578949e-06, "step": 1801 }, { "epoch": 4.736357659434582, "high_lr": 5.210526315789474e-05, "low_lr": 1.0421052631578949e-06, "step": 1801 }, { "epoch": 4.738987508218277, "grad_norm": 1.5199358463287354, "learning_rate": 5.157894736842105e-05, "loss": 1.1962, "step": 1802 }, { "epoch": 4.738987508218277, "high_lr": 5.157894736842105e-05, "low_lr": 1.0315789473684212e-06, "step": 1802 }, { "epoch": 4.738987508218277, "high_lr": 5.157894736842105e-05, "low_lr": 1.0315789473684212e-06, "step": 1802 }, { "epoch": 4.738987508218277, "high_lr": 5.157894736842105e-05, "low_lr": 1.0315789473684212e-06, "step": 1802 }, { "epoch": 4.738987508218277, "high_lr": 5.157894736842105e-05, "low_lr": 1.0315789473684212e-06, "step": 1802 }, { "epoch": 4.738987508218277, "high_lr": 5.157894736842105e-05, "low_lr": 1.0315789473684212e-06, "step": 1802 }, { "epoch": 4.738987508218277, "high_lr": 5.157894736842105e-05, "low_lr": 1.0315789473684212e-06, "step": 1802 }, { "epoch": 4.738987508218277, "high_lr": 5.157894736842105e-05, "low_lr": 1.0315789473684212e-06, "step": 1802 }, { "epoch": 4.738987508218277, "high_lr": 5.157894736842105e-05, "low_lr": 1.0315789473684212e-06, "step": 1802 }, { "epoch": 4.741617357001973, "grad_norm": 1.5833852291107178, "learning_rate": 5.105263157894737e-05, "loss": 1.2322, "step": 1803 }, { "epoch": 4.741617357001973, "high_lr": 5.105263157894737e-05, "low_lr": 1.0210526315789475e-06, "step": 1803 }, { "epoch": 4.741617357001973, "high_lr": 5.105263157894737e-05, "low_lr": 1.0210526315789475e-06, "step": 1803 }, { "epoch": 4.741617357001973, "high_lr": 5.105263157894737e-05, "low_lr": 1.0210526315789475e-06, "step": 1803 }, { "epoch": 4.741617357001973, "high_lr": 5.105263157894737e-05, "low_lr": 1.0210526315789475e-06, "step": 1803 }, { "epoch": 4.741617357001973, "high_lr": 5.105263157894737e-05, "low_lr": 1.0210526315789475e-06, "step": 1803 }, { "epoch": 4.741617357001973, "high_lr": 5.105263157894737e-05, "low_lr": 1.0210526315789475e-06, "step": 1803 }, { "epoch": 4.741617357001973, "high_lr": 5.105263157894737e-05, "low_lr": 1.0210526315789475e-06, "step": 1803 }, { "epoch": 4.741617357001973, "high_lr": 5.105263157894737e-05, "low_lr": 1.0210526315789475e-06, "step": 1803 }, { "epoch": 4.744247205785667, "grad_norm": 1.4354584217071533, "learning_rate": 5.052631578947368e-05, "loss": 1.2384, "step": 1804 }, { "epoch": 4.744247205785667, "high_lr": 5.052631578947368e-05, "low_lr": 1.0105263157894738e-06, "step": 1804 }, { "epoch": 4.744247205785667, "high_lr": 5.052631578947368e-05, "low_lr": 1.0105263157894738e-06, "step": 1804 }, { "epoch": 4.744247205785667, "high_lr": 5.052631578947368e-05, "low_lr": 1.0105263157894738e-06, "step": 1804 }, { "epoch": 4.744247205785667, "high_lr": 5.052631578947368e-05, "low_lr": 1.0105263157894738e-06, "step": 1804 }, { "epoch": 4.744247205785667, "high_lr": 5.052631578947368e-05, "low_lr": 1.0105263157894738e-06, "step": 1804 }, { "epoch": 4.744247205785667, "high_lr": 5.052631578947368e-05, "low_lr": 1.0105263157894738e-06, "step": 1804 }, { "epoch": 4.744247205785667, "high_lr": 5.052631578947368e-05, "low_lr": 1.0105263157894738e-06, "step": 1804 }, { "epoch": 4.744247205785667, "high_lr": 5.052631578947368e-05, "low_lr": 1.0105263157894738e-06, "step": 1804 }, { "epoch": 4.746877054569362, "grad_norm": 1.5235344171524048, "learning_rate": 5e-05, "loss": 1.2031, "step": 1805 }, { "epoch": 4.746877054569362, "high_lr": 5e-05, "low_lr": 1.0000000000000002e-06, "step": 1805 }, { "epoch": 4.746877054569362, "high_lr": 5e-05, "low_lr": 1.0000000000000002e-06, "step": 1805 }, { "epoch": 4.746877054569362, "high_lr": 5e-05, "low_lr": 1.0000000000000002e-06, "step": 1805 }, { "epoch": 4.746877054569362, "high_lr": 5e-05, "low_lr": 1.0000000000000002e-06, "step": 1805 }, { "epoch": 4.746877054569362, "high_lr": 5e-05, "low_lr": 1.0000000000000002e-06, "step": 1805 }, { "epoch": 4.746877054569362, "high_lr": 5e-05, "low_lr": 1.0000000000000002e-06, "step": 1805 }, { "epoch": 4.746877054569362, "high_lr": 5e-05, "low_lr": 1.0000000000000002e-06, "step": 1805 }, { "epoch": 4.746877054569362, "high_lr": 5e-05, "low_lr": 1.0000000000000002e-06, "step": 1805 }, { "epoch": 4.7495069033530575, "grad_norm": 1.5912785530090332, "learning_rate": 4.9473684210526315e-05, "loss": 1.1858, "step": 1806 }, { "epoch": 4.7495069033530575, "high_lr": 4.9473684210526315e-05, "low_lr": 9.894736842105265e-07, "step": 1806 }, { "epoch": 4.7495069033530575, "high_lr": 4.9473684210526315e-05, "low_lr": 9.894736842105265e-07, "step": 1806 }, { "epoch": 4.7495069033530575, "high_lr": 4.9473684210526315e-05, "low_lr": 9.894736842105265e-07, "step": 1806 }, { "epoch": 4.7495069033530575, "high_lr": 4.9473684210526315e-05, "low_lr": 9.894736842105265e-07, "step": 1806 }, { "epoch": 4.7495069033530575, "high_lr": 4.9473684210526315e-05, "low_lr": 9.894736842105265e-07, "step": 1806 }, { "epoch": 4.7495069033530575, "high_lr": 4.9473684210526315e-05, "low_lr": 9.894736842105265e-07, "step": 1806 }, { "epoch": 4.7495069033530575, "high_lr": 4.9473684210526315e-05, "low_lr": 9.894736842105265e-07, "step": 1806 }, { "epoch": 4.7495069033530575, "high_lr": 4.9473684210526315e-05, "low_lr": 9.894736842105265e-07, "step": 1806 }, { "epoch": 4.752136752136752, "grad_norm": 1.6209546327590942, "learning_rate": 4.894736842105263e-05, "loss": 1.2086, "step": 1807 }, { "epoch": 4.752136752136752, "high_lr": 4.894736842105263e-05, "low_lr": 9.789473684210526e-07, "step": 1807 }, { "epoch": 4.752136752136752, "high_lr": 4.894736842105263e-05, "low_lr": 9.789473684210526e-07, "step": 1807 }, { "epoch": 4.752136752136752, "high_lr": 4.894736842105263e-05, "low_lr": 9.789473684210526e-07, "step": 1807 }, { "epoch": 4.752136752136752, "high_lr": 4.894736842105263e-05, "low_lr": 9.789473684210526e-07, "step": 1807 }, { "epoch": 4.752136752136752, "high_lr": 4.894736842105263e-05, "low_lr": 9.789473684210526e-07, "step": 1807 }, { "epoch": 4.752136752136752, "high_lr": 4.894736842105263e-05, "low_lr": 9.789473684210526e-07, "step": 1807 }, { "epoch": 4.752136752136752, "high_lr": 4.894736842105263e-05, "low_lr": 9.789473684210526e-07, "step": 1807 }, { "epoch": 4.752136752136752, "high_lr": 4.894736842105263e-05, "low_lr": 9.789473684210526e-07, "step": 1807 }, { "epoch": 4.754766600920447, "grad_norm": 1.6664385795593262, "learning_rate": 4.842105263157895e-05, "loss": 1.2079, "step": 1808 }, { "epoch": 4.754766600920447, "high_lr": 4.842105263157895e-05, "low_lr": 9.68421052631579e-07, "step": 1808 }, { "epoch": 4.754766600920447, "high_lr": 4.842105263157895e-05, "low_lr": 9.68421052631579e-07, "step": 1808 }, { "epoch": 4.754766600920447, "high_lr": 4.842105263157895e-05, "low_lr": 9.68421052631579e-07, "step": 1808 }, { "epoch": 4.754766600920447, "high_lr": 4.842105263157895e-05, "low_lr": 9.68421052631579e-07, "step": 1808 }, { "epoch": 4.754766600920447, "high_lr": 4.842105263157895e-05, "low_lr": 9.68421052631579e-07, "step": 1808 }, { "epoch": 4.754766600920447, "high_lr": 4.842105263157895e-05, "low_lr": 9.68421052631579e-07, "step": 1808 }, { "epoch": 4.754766600920447, "high_lr": 4.842105263157895e-05, "low_lr": 9.68421052631579e-07, "step": 1808 }, { "epoch": 4.754766600920447, "high_lr": 4.842105263157895e-05, "low_lr": 9.68421052631579e-07, "step": 1808 }, { "epoch": 4.757396449704142, "grad_norm": 1.6431585550308228, "learning_rate": 4.789473684210526e-05, "loss": 1.2445, "step": 1809 }, { "epoch": 4.757396449704142, "high_lr": 4.789473684210526e-05, "low_lr": 9.578947368421053e-07, "step": 1809 }, { "epoch": 4.757396449704142, "high_lr": 4.789473684210526e-05, "low_lr": 9.578947368421053e-07, "step": 1809 }, { "epoch": 4.757396449704142, "high_lr": 4.789473684210526e-05, "low_lr": 9.578947368421053e-07, "step": 1809 }, { "epoch": 4.757396449704142, "high_lr": 4.789473684210526e-05, "low_lr": 9.578947368421053e-07, "step": 1809 }, { "epoch": 4.757396449704142, "high_lr": 4.789473684210526e-05, "low_lr": 9.578947368421053e-07, "step": 1809 }, { "epoch": 4.757396449704142, "high_lr": 4.789473684210526e-05, "low_lr": 9.578947368421053e-07, "step": 1809 }, { "epoch": 4.757396449704142, "high_lr": 4.789473684210526e-05, "low_lr": 9.578947368421053e-07, "step": 1809 }, { "epoch": 4.757396449704142, "high_lr": 4.789473684210526e-05, "low_lr": 9.578947368421053e-07, "step": 1809 }, { "epoch": 4.760026298487837, "grad_norm": 1.4404851198196411, "learning_rate": 4.736842105263158e-05, "loss": 1.2352, "step": 1810 }, { "epoch": 4.760026298487837, "high_lr": 4.736842105263158e-05, "low_lr": 9.473684210526317e-07, "step": 1810 }, { "epoch": 4.760026298487837, "high_lr": 4.736842105263158e-05, "low_lr": 9.473684210526317e-07, "step": 1810 }, { "epoch": 4.760026298487837, "high_lr": 4.736842105263158e-05, "low_lr": 9.473684210526317e-07, "step": 1810 }, { "epoch": 4.760026298487837, "high_lr": 4.736842105263158e-05, "low_lr": 9.473684210526317e-07, "step": 1810 }, { "epoch": 4.760026298487837, "high_lr": 4.736842105263158e-05, "low_lr": 9.473684210526317e-07, "step": 1810 }, { "epoch": 4.760026298487837, "high_lr": 4.736842105263158e-05, "low_lr": 9.473684210526317e-07, "step": 1810 }, { "epoch": 4.760026298487837, "high_lr": 4.736842105263158e-05, "low_lr": 9.473684210526317e-07, "step": 1810 }, { "epoch": 4.760026298487837, "high_lr": 4.736842105263158e-05, "low_lr": 9.473684210526317e-07, "step": 1810 }, { "epoch": 4.762656147271532, "grad_norm": 1.6453670263290405, "learning_rate": 4.6842105263157894e-05, "loss": 1.234, "step": 1811 }, { "epoch": 4.762656147271532, "high_lr": 4.6842105263157894e-05, "low_lr": 9.368421052631579e-07, "step": 1811 }, { "epoch": 4.762656147271532, "high_lr": 4.6842105263157894e-05, "low_lr": 9.368421052631579e-07, "step": 1811 }, { "epoch": 4.762656147271532, "high_lr": 4.6842105263157894e-05, "low_lr": 9.368421052631579e-07, "step": 1811 }, { "epoch": 4.762656147271532, "high_lr": 4.6842105263157894e-05, "low_lr": 9.368421052631579e-07, "step": 1811 }, { "epoch": 4.762656147271532, "high_lr": 4.6842105263157894e-05, "low_lr": 9.368421052631579e-07, "step": 1811 }, { "epoch": 4.762656147271532, "high_lr": 4.6842105263157894e-05, "low_lr": 9.368421052631579e-07, "step": 1811 }, { "epoch": 4.762656147271532, "high_lr": 4.6842105263157894e-05, "low_lr": 9.368421052631579e-07, "step": 1811 }, { "epoch": 4.762656147271532, "high_lr": 4.6842105263157894e-05, "low_lr": 9.368421052631579e-07, "step": 1811 }, { "epoch": 4.765285996055227, "grad_norm": 1.5535515546798706, "learning_rate": 4.6315789473684214e-05, "loss": 1.1985, "step": 1812 }, { "epoch": 4.765285996055227, "high_lr": 4.6315789473684214e-05, "low_lr": 9.263157894736844e-07, "step": 1812 }, { "epoch": 4.765285996055227, "high_lr": 4.6315789473684214e-05, "low_lr": 9.263157894736844e-07, "step": 1812 }, { "epoch": 4.765285996055227, "high_lr": 4.6315789473684214e-05, "low_lr": 9.263157894736844e-07, "step": 1812 }, { "epoch": 4.765285996055227, "high_lr": 4.6315789473684214e-05, "low_lr": 9.263157894736844e-07, "step": 1812 }, { "epoch": 4.765285996055227, "high_lr": 4.6315789473684214e-05, "low_lr": 9.263157894736844e-07, "step": 1812 }, { "epoch": 4.765285996055227, "high_lr": 4.6315789473684214e-05, "low_lr": 9.263157894736844e-07, "step": 1812 }, { "epoch": 4.765285996055227, "high_lr": 4.6315789473684214e-05, "low_lr": 9.263157894736844e-07, "step": 1812 }, { "epoch": 4.765285996055227, "high_lr": 4.6315789473684214e-05, "low_lr": 9.263157894736844e-07, "step": 1812 }, { "epoch": 4.767915844838922, "grad_norm": 1.5614182949066162, "learning_rate": 4.5789473684210527e-05, "loss": 1.2091, "step": 1813 }, { "epoch": 4.767915844838922, "high_lr": 4.5789473684210527e-05, "low_lr": 9.157894736842106e-07, "step": 1813 }, { "epoch": 4.767915844838922, "high_lr": 4.5789473684210527e-05, "low_lr": 9.157894736842106e-07, "step": 1813 }, { "epoch": 4.767915844838922, "high_lr": 4.5789473684210527e-05, "low_lr": 9.157894736842106e-07, "step": 1813 }, { "epoch": 4.767915844838922, "high_lr": 4.5789473684210527e-05, "low_lr": 9.157894736842106e-07, "step": 1813 }, { "epoch": 4.767915844838922, "high_lr": 4.5789473684210527e-05, "low_lr": 9.157894736842106e-07, "step": 1813 }, { "epoch": 4.767915844838922, "high_lr": 4.5789473684210527e-05, "low_lr": 9.157894736842106e-07, "step": 1813 }, { "epoch": 4.767915844838922, "high_lr": 4.5789473684210527e-05, "low_lr": 9.157894736842106e-07, "step": 1813 }, { "epoch": 4.767915844838922, "high_lr": 4.5789473684210527e-05, "low_lr": 9.157894736842106e-07, "step": 1813 }, { "epoch": 4.7705456936226165, "grad_norm": 1.6125268936157227, "learning_rate": 4.5263157894736846e-05, "loss": 1.1937, "step": 1814 }, { "epoch": 4.7705456936226165, "high_lr": 4.5263157894736846e-05, "low_lr": 9.05263157894737e-07, "step": 1814 }, { "epoch": 4.7705456936226165, "high_lr": 4.5263157894736846e-05, "low_lr": 9.05263157894737e-07, "step": 1814 }, { "epoch": 4.7705456936226165, "high_lr": 4.5263157894736846e-05, "low_lr": 9.05263157894737e-07, "step": 1814 }, { "epoch": 4.7705456936226165, "high_lr": 4.5263157894736846e-05, "low_lr": 9.05263157894737e-07, "step": 1814 }, { "epoch": 4.7705456936226165, "high_lr": 4.5263157894736846e-05, "low_lr": 9.05263157894737e-07, "step": 1814 }, { "epoch": 4.7705456936226165, "high_lr": 4.5263157894736846e-05, "low_lr": 9.05263157894737e-07, "step": 1814 }, { "epoch": 4.7705456936226165, "high_lr": 4.5263157894736846e-05, "low_lr": 9.05263157894737e-07, "step": 1814 }, { "epoch": 4.7705456936226165, "high_lr": 4.5263157894736846e-05, "low_lr": 9.05263157894737e-07, "step": 1814 }, { "epoch": 4.773175542406312, "grad_norm": 1.580861210823059, "learning_rate": 4.473684210526316e-05, "loss": 1.2034, "step": 1815 }, { "epoch": 4.773175542406312, "high_lr": 4.473684210526316e-05, "low_lr": 8.947368421052632e-07, "step": 1815 }, { "epoch": 4.773175542406312, "high_lr": 4.473684210526316e-05, "low_lr": 8.947368421052632e-07, "step": 1815 }, { "epoch": 4.773175542406312, "high_lr": 4.473684210526316e-05, "low_lr": 8.947368421052632e-07, "step": 1815 }, { "epoch": 4.773175542406312, "high_lr": 4.473684210526316e-05, "low_lr": 8.947368421052632e-07, "step": 1815 }, { "epoch": 4.773175542406312, "high_lr": 4.473684210526316e-05, "low_lr": 8.947368421052632e-07, "step": 1815 }, { "epoch": 4.773175542406312, "high_lr": 4.473684210526316e-05, "low_lr": 8.947368421052632e-07, "step": 1815 }, { "epoch": 4.773175542406312, "high_lr": 4.473684210526316e-05, "low_lr": 8.947368421052632e-07, "step": 1815 }, { "epoch": 4.773175542406312, "high_lr": 4.473684210526316e-05, "low_lr": 8.947368421052632e-07, "step": 1815 }, { "epoch": 4.775805391190007, "grad_norm": 1.6488875150680542, "learning_rate": 4.421052631578947e-05, "loss": 1.2161, "step": 1816 }, { "epoch": 4.775805391190007, "high_lr": 4.421052631578947e-05, "low_lr": 8.842105263157895e-07, "step": 1816 }, { "epoch": 4.775805391190007, "high_lr": 4.421052631578947e-05, "low_lr": 8.842105263157895e-07, "step": 1816 }, { "epoch": 4.775805391190007, "high_lr": 4.421052631578947e-05, "low_lr": 8.842105263157895e-07, "step": 1816 }, { "epoch": 4.775805391190007, "high_lr": 4.421052631578947e-05, "low_lr": 8.842105263157895e-07, "step": 1816 }, { "epoch": 4.775805391190007, "high_lr": 4.421052631578947e-05, "low_lr": 8.842105263157895e-07, "step": 1816 }, { "epoch": 4.775805391190007, "high_lr": 4.421052631578947e-05, "low_lr": 8.842105263157895e-07, "step": 1816 }, { "epoch": 4.775805391190007, "high_lr": 4.421052631578947e-05, "low_lr": 8.842105263157895e-07, "step": 1816 }, { "epoch": 4.775805391190007, "high_lr": 4.421052631578947e-05, "low_lr": 8.842105263157895e-07, "step": 1816 }, { "epoch": 4.778435239973701, "grad_norm": 1.513824224472046, "learning_rate": 4.368421052631579e-05, "loss": 1.1955, "step": 1817 }, { "epoch": 4.778435239973701, "high_lr": 4.368421052631579e-05, "low_lr": 8.736842105263159e-07, "step": 1817 }, { "epoch": 4.778435239973701, "high_lr": 4.368421052631579e-05, "low_lr": 8.736842105263159e-07, "step": 1817 }, { "epoch": 4.778435239973701, "high_lr": 4.368421052631579e-05, "low_lr": 8.736842105263159e-07, "step": 1817 }, { "epoch": 4.778435239973701, "high_lr": 4.368421052631579e-05, "low_lr": 8.736842105263159e-07, "step": 1817 }, { "epoch": 4.778435239973701, "high_lr": 4.368421052631579e-05, "low_lr": 8.736842105263159e-07, "step": 1817 }, { "epoch": 4.778435239973701, "high_lr": 4.368421052631579e-05, "low_lr": 8.736842105263159e-07, "step": 1817 }, { "epoch": 4.778435239973701, "high_lr": 4.368421052631579e-05, "low_lr": 8.736842105263159e-07, "step": 1817 }, { "epoch": 4.778435239973701, "high_lr": 4.368421052631579e-05, "low_lr": 8.736842105263159e-07, "step": 1817 }, { "epoch": 4.781065088757396, "grad_norm": 1.5169581174850464, "learning_rate": 4.3157894736842105e-05, "loss": 1.1814, "step": 1818 }, { "epoch": 4.781065088757396, "high_lr": 4.3157894736842105e-05, "low_lr": 8.631578947368421e-07, "step": 1818 }, { "epoch": 4.781065088757396, "high_lr": 4.3157894736842105e-05, "low_lr": 8.631578947368421e-07, "step": 1818 }, { "epoch": 4.781065088757396, "high_lr": 4.3157894736842105e-05, "low_lr": 8.631578947368421e-07, "step": 1818 }, { "epoch": 4.781065088757396, "high_lr": 4.3157894736842105e-05, "low_lr": 8.631578947368421e-07, "step": 1818 }, { "epoch": 4.781065088757396, "high_lr": 4.3157894736842105e-05, "low_lr": 8.631578947368421e-07, "step": 1818 }, { "epoch": 4.781065088757396, "high_lr": 4.3157894736842105e-05, "low_lr": 8.631578947368421e-07, "step": 1818 }, { "epoch": 4.781065088757396, "high_lr": 4.3157894736842105e-05, "low_lr": 8.631578947368421e-07, "step": 1818 }, { "epoch": 4.781065088757396, "high_lr": 4.3157894736842105e-05, "low_lr": 8.631578947368421e-07, "step": 1818 }, { "epoch": 4.7836949375410915, "grad_norm": 1.5888378620147705, "learning_rate": 4.2631578947368425e-05, "loss": 1.2298, "step": 1819 }, { "epoch": 4.7836949375410915, "high_lr": 4.2631578947368425e-05, "low_lr": 8.526315789473685e-07, "step": 1819 }, { "epoch": 4.7836949375410915, "high_lr": 4.2631578947368425e-05, "low_lr": 8.526315789473685e-07, "step": 1819 }, { "epoch": 4.7836949375410915, "high_lr": 4.2631578947368425e-05, "low_lr": 8.526315789473685e-07, "step": 1819 }, { "epoch": 4.7836949375410915, "high_lr": 4.2631578947368425e-05, "low_lr": 8.526315789473685e-07, "step": 1819 }, { "epoch": 4.7836949375410915, "high_lr": 4.2631578947368425e-05, "low_lr": 8.526315789473685e-07, "step": 1819 }, { "epoch": 4.7836949375410915, "high_lr": 4.2631578947368425e-05, "low_lr": 8.526315789473685e-07, "step": 1819 }, { "epoch": 4.7836949375410915, "high_lr": 4.2631578947368425e-05, "low_lr": 8.526315789473685e-07, "step": 1819 }, { "epoch": 4.7836949375410915, "high_lr": 4.2631578947368425e-05, "low_lr": 8.526315789473685e-07, "step": 1819 }, { "epoch": 4.786324786324786, "grad_norm": 1.5194802284240723, "learning_rate": 4.210526315789474e-05, "loss": 1.2103, "step": 1820 }, { "epoch": 4.786324786324786, "high_lr": 4.210526315789474e-05, "low_lr": 8.421052631578948e-07, "step": 1820 }, { "epoch": 4.786324786324786, "high_lr": 4.210526315789474e-05, "low_lr": 8.421052631578948e-07, "step": 1820 }, { "epoch": 4.786324786324786, "high_lr": 4.210526315789474e-05, "low_lr": 8.421052631578948e-07, "step": 1820 }, { "epoch": 4.786324786324786, "high_lr": 4.210526315789474e-05, "low_lr": 8.421052631578948e-07, "step": 1820 }, { "epoch": 4.786324786324786, "high_lr": 4.210526315789474e-05, "low_lr": 8.421052631578948e-07, "step": 1820 }, { "epoch": 4.786324786324786, "high_lr": 4.210526315789474e-05, "low_lr": 8.421052631578948e-07, "step": 1820 }, { "epoch": 4.786324786324786, "high_lr": 4.210526315789474e-05, "low_lr": 8.421052631578948e-07, "step": 1820 }, { "epoch": 4.786324786324786, "high_lr": 4.210526315789474e-05, "low_lr": 8.421052631578948e-07, "step": 1820 }, { "epoch": 4.788954635108482, "grad_norm": 1.589921236038208, "learning_rate": 4.157894736842106e-05, "loss": 1.2039, "step": 1821 }, { "epoch": 4.788954635108482, "high_lr": 4.157894736842106e-05, "low_lr": 8.315789473684212e-07, "step": 1821 }, { "epoch": 4.788954635108482, "high_lr": 4.157894736842106e-05, "low_lr": 8.315789473684212e-07, "step": 1821 }, { "epoch": 4.788954635108482, "high_lr": 4.157894736842106e-05, "low_lr": 8.315789473684212e-07, "step": 1821 }, { "epoch": 4.788954635108482, "high_lr": 4.157894736842106e-05, "low_lr": 8.315789473684212e-07, "step": 1821 }, { "epoch": 4.788954635108482, "high_lr": 4.157894736842106e-05, "low_lr": 8.315789473684212e-07, "step": 1821 }, { "epoch": 4.788954635108482, "high_lr": 4.157894736842106e-05, "low_lr": 8.315789473684212e-07, "step": 1821 }, { "epoch": 4.788954635108482, "high_lr": 4.157894736842106e-05, "low_lr": 8.315789473684212e-07, "step": 1821 }, { "epoch": 4.788954635108482, "high_lr": 4.157894736842106e-05, "low_lr": 8.315789473684212e-07, "step": 1821 }, { "epoch": 4.791584483892176, "grad_norm": 1.4875950813293457, "learning_rate": 4.105263157894737e-05, "loss": 1.2248, "step": 1822 }, { "epoch": 4.791584483892176, "high_lr": 4.105263157894737e-05, "low_lr": 8.210526315789474e-07, "step": 1822 }, { "epoch": 4.791584483892176, "high_lr": 4.105263157894737e-05, "low_lr": 8.210526315789474e-07, "step": 1822 }, { "epoch": 4.791584483892176, "high_lr": 4.105263157894737e-05, "low_lr": 8.210526315789474e-07, "step": 1822 }, { "epoch": 4.791584483892176, "high_lr": 4.105263157894737e-05, "low_lr": 8.210526315789474e-07, "step": 1822 }, { "epoch": 4.791584483892176, "high_lr": 4.105263157894737e-05, "low_lr": 8.210526315789474e-07, "step": 1822 }, { "epoch": 4.791584483892176, "high_lr": 4.105263157894737e-05, "low_lr": 8.210526315789474e-07, "step": 1822 }, { "epoch": 4.791584483892176, "high_lr": 4.105263157894737e-05, "low_lr": 8.210526315789474e-07, "step": 1822 }, { "epoch": 4.791584483892176, "high_lr": 4.105263157894737e-05, "low_lr": 8.210526315789474e-07, "step": 1822 }, { "epoch": 4.794214332675871, "grad_norm": 1.4205827713012695, "learning_rate": 4.0526315789473684e-05, "loss": 1.2522, "step": 1823 }, { "epoch": 4.794214332675871, "high_lr": 4.0526315789473684e-05, "low_lr": 8.105263157894736e-07, "step": 1823 }, { "epoch": 4.794214332675871, "high_lr": 4.0526315789473684e-05, "low_lr": 8.105263157894736e-07, "step": 1823 }, { "epoch": 4.794214332675871, "high_lr": 4.0526315789473684e-05, "low_lr": 8.105263157894736e-07, "step": 1823 }, { "epoch": 4.794214332675871, "high_lr": 4.0526315789473684e-05, "low_lr": 8.105263157894736e-07, "step": 1823 }, { "epoch": 4.794214332675871, "high_lr": 4.0526315789473684e-05, "low_lr": 8.105263157894736e-07, "step": 1823 }, { "epoch": 4.794214332675871, "high_lr": 4.0526315789473684e-05, "low_lr": 8.105263157894736e-07, "step": 1823 }, { "epoch": 4.794214332675871, "high_lr": 4.0526315789473684e-05, "low_lr": 8.105263157894736e-07, "step": 1823 }, { "epoch": 4.794214332675871, "high_lr": 4.0526315789473684e-05, "low_lr": 8.105263157894736e-07, "step": 1823 }, { "epoch": 4.796844181459566, "grad_norm": 1.5499900579452515, "learning_rate": 4e-05, "loss": 1.2627, "step": 1824 }, { "epoch": 4.796844181459566, "high_lr": 4e-05, "low_lr": 8.000000000000001e-07, "step": 1824 }, { "epoch": 4.796844181459566, "high_lr": 4e-05, "low_lr": 8.000000000000001e-07, "step": 1824 }, { "epoch": 4.796844181459566, "high_lr": 4e-05, "low_lr": 8.000000000000001e-07, "step": 1824 }, { "epoch": 4.796844181459566, "high_lr": 4e-05, "low_lr": 8.000000000000001e-07, "step": 1824 }, { "epoch": 4.796844181459566, "high_lr": 4e-05, "low_lr": 8.000000000000001e-07, "step": 1824 }, { "epoch": 4.796844181459566, "high_lr": 4e-05, "low_lr": 8.000000000000001e-07, "step": 1824 }, { "epoch": 4.796844181459566, "high_lr": 4e-05, "low_lr": 8.000000000000001e-07, "step": 1824 }, { "epoch": 4.796844181459566, "high_lr": 4e-05, "low_lr": 8.000000000000001e-07, "step": 1824 }, { "epoch": 4.799474030243261, "grad_norm": 1.6648582220077515, "learning_rate": 3.9473684210526316e-05, "loss": 1.2229, "step": 1825 }, { "epoch": 4.799474030243261, "high_lr": 3.9473684210526316e-05, "low_lr": 7.894736842105263e-07, "step": 1825 }, { "epoch": 4.799474030243261, "high_lr": 3.9473684210526316e-05, "low_lr": 7.894736842105263e-07, "step": 1825 }, { "epoch": 4.799474030243261, "high_lr": 3.9473684210526316e-05, "low_lr": 7.894736842105263e-07, "step": 1825 }, { "epoch": 4.799474030243261, "high_lr": 3.9473684210526316e-05, "low_lr": 7.894736842105263e-07, "step": 1825 }, { "epoch": 4.799474030243261, "high_lr": 3.9473684210526316e-05, "low_lr": 7.894736842105263e-07, "step": 1825 }, { "epoch": 4.799474030243261, "high_lr": 3.9473684210526316e-05, "low_lr": 7.894736842105263e-07, "step": 1825 }, { "epoch": 4.799474030243261, "high_lr": 3.9473684210526316e-05, "low_lr": 7.894736842105263e-07, "step": 1825 }, { "epoch": 4.799474030243261, "high_lr": 3.9473684210526316e-05, "low_lr": 7.894736842105263e-07, "step": 1825 }, { "epoch": 4.802103879026956, "grad_norm": 1.5830285549163818, "learning_rate": 3.8947368421052636e-05, "loss": 1.2279, "step": 1826 }, { "epoch": 4.802103879026956, "high_lr": 3.8947368421052636e-05, "low_lr": 7.789473684210527e-07, "step": 1826 }, { "epoch": 4.802103879026956, "high_lr": 3.8947368421052636e-05, "low_lr": 7.789473684210527e-07, "step": 1826 }, { "epoch": 4.802103879026956, "high_lr": 3.8947368421052636e-05, "low_lr": 7.789473684210527e-07, "step": 1826 }, { "epoch": 4.802103879026956, "high_lr": 3.8947368421052636e-05, "low_lr": 7.789473684210527e-07, "step": 1826 }, { "epoch": 4.802103879026956, "high_lr": 3.8947368421052636e-05, "low_lr": 7.789473684210527e-07, "step": 1826 }, { "epoch": 4.802103879026956, "high_lr": 3.8947368421052636e-05, "low_lr": 7.789473684210527e-07, "step": 1826 }, { "epoch": 4.802103879026956, "high_lr": 3.8947368421052636e-05, "low_lr": 7.789473684210527e-07, "step": 1826 }, { "epoch": 4.802103879026956, "high_lr": 3.8947368421052636e-05, "low_lr": 7.789473684210527e-07, "step": 1826 }, { "epoch": 4.804733727810651, "grad_norm": 1.554327130317688, "learning_rate": 3.842105263157895e-05, "loss": 1.2297, "step": 1827 }, { "epoch": 4.804733727810651, "high_lr": 3.842105263157895e-05, "low_lr": 7.684210526315789e-07, "step": 1827 }, { "epoch": 4.804733727810651, "high_lr": 3.842105263157895e-05, "low_lr": 7.684210526315789e-07, "step": 1827 }, { "epoch": 4.804733727810651, "high_lr": 3.842105263157895e-05, "low_lr": 7.684210526315789e-07, "step": 1827 }, { "epoch": 4.804733727810651, "high_lr": 3.842105263157895e-05, "low_lr": 7.684210526315789e-07, "step": 1827 }, { "epoch": 4.804733727810651, "high_lr": 3.842105263157895e-05, "low_lr": 7.684210526315789e-07, "step": 1827 }, { "epoch": 4.804733727810651, "high_lr": 3.842105263157895e-05, "low_lr": 7.684210526315789e-07, "step": 1827 }, { "epoch": 4.804733727810651, "high_lr": 3.842105263157895e-05, "low_lr": 7.684210526315789e-07, "step": 1827 }, { "epoch": 4.804733727810651, "high_lr": 3.842105263157895e-05, "low_lr": 7.684210526315789e-07, "step": 1827 }, { "epoch": 4.807363576594346, "grad_norm": 1.5508402585983276, "learning_rate": 3.789473684210527e-05, "loss": 1.216, "step": 1828 }, { "epoch": 4.807363576594346, "high_lr": 3.789473684210527e-05, "low_lr": 7.578947368421054e-07, "step": 1828 }, { "epoch": 4.807363576594346, "high_lr": 3.789473684210527e-05, "low_lr": 7.578947368421054e-07, "step": 1828 }, { "epoch": 4.807363576594346, "high_lr": 3.789473684210527e-05, "low_lr": 7.578947368421054e-07, "step": 1828 }, { "epoch": 4.807363576594346, "high_lr": 3.789473684210527e-05, "low_lr": 7.578947368421054e-07, "step": 1828 }, { "epoch": 4.807363576594346, "high_lr": 3.789473684210527e-05, "low_lr": 7.578947368421054e-07, "step": 1828 }, { "epoch": 4.807363576594346, "high_lr": 3.789473684210527e-05, "low_lr": 7.578947368421054e-07, "step": 1828 }, { "epoch": 4.807363576594346, "high_lr": 3.789473684210527e-05, "low_lr": 7.578947368421054e-07, "step": 1828 }, { "epoch": 4.807363576594346, "high_lr": 3.789473684210527e-05, "low_lr": 7.578947368421054e-07, "step": 1828 }, { "epoch": 4.809993425378041, "grad_norm": 1.5729447603225708, "learning_rate": 3.736842105263158e-05, "loss": 1.2267, "step": 1829 }, { "epoch": 4.809993425378041, "high_lr": 3.736842105263158e-05, "low_lr": 7.473684210526316e-07, "step": 1829 }, { "epoch": 4.809993425378041, "high_lr": 3.736842105263158e-05, "low_lr": 7.473684210526316e-07, "step": 1829 }, { "epoch": 4.809993425378041, "high_lr": 3.736842105263158e-05, "low_lr": 7.473684210526316e-07, "step": 1829 }, { "epoch": 4.809993425378041, "high_lr": 3.736842105263158e-05, "low_lr": 7.473684210526316e-07, "step": 1829 }, { "epoch": 4.809993425378041, "high_lr": 3.736842105263158e-05, "low_lr": 7.473684210526316e-07, "step": 1829 }, { "epoch": 4.809993425378041, "high_lr": 3.736842105263158e-05, "low_lr": 7.473684210526316e-07, "step": 1829 }, { "epoch": 4.809993425378041, "high_lr": 3.736842105263158e-05, "low_lr": 7.473684210526316e-07, "step": 1829 }, { "epoch": 4.809993425378041, "high_lr": 3.736842105263158e-05, "low_lr": 7.473684210526316e-07, "step": 1829 }, { "epoch": 4.812623274161735, "grad_norm": 1.4417654275894165, "learning_rate": 3.6842105263157895e-05, "loss": 1.2055, "step": 1830 }, { "epoch": 4.812623274161735, "high_lr": 3.6842105263157895e-05, "low_lr": 7.368421052631579e-07, "step": 1830 }, { "epoch": 4.812623274161735, "high_lr": 3.6842105263157895e-05, "low_lr": 7.368421052631579e-07, "step": 1830 }, { "epoch": 4.812623274161735, "high_lr": 3.6842105263157895e-05, "low_lr": 7.368421052631579e-07, "step": 1830 }, { "epoch": 4.812623274161735, "high_lr": 3.6842105263157895e-05, "low_lr": 7.368421052631579e-07, "step": 1830 }, { "epoch": 4.812623274161735, "high_lr": 3.6842105263157895e-05, "low_lr": 7.368421052631579e-07, "step": 1830 }, { "epoch": 4.812623274161735, "high_lr": 3.6842105263157895e-05, "low_lr": 7.368421052631579e-07, "step": 1830 }, { "epoch": 4.812623274161735, "high_lr": 3.6842105263157895e-05, "low_lr": 7.368421052631579e-07, "step": 1830 }, { "epoch": 4.812623274161735, "high_lr": 3.6842105263157895e-05, "low_lr": 7.368421052631579e-07, "step": 1830 }, { "epoch": 4.815253122945431, "grad_norm": 1.4560467004776, "learning_rate": 3.6315789473684214e-05, "loss": 1.2136, "step": 1831 }, { "epoch": 4.815253122945431, "high_lr": 3.6315789473684214e-05, "low_lr": 7.263157894736843e-07, "step": 1831 }, { "epoch": 4.815253122945431, "high_lr": 3.6315789473684214e-05, "low_lr": 7.263157894736843e-07, "step": 1831 }, { "epoch": 4.815253122945431, "high_lr": 3.6315789473684214e-05, "low_lr": 7.263157894736843e-07, "step": 1831 }, { "epoch": 4.815253122945431, "high_lr": 3.6315789473684214e-05, "low_lr": 7.263157894736843e-07, "step": 1831 }, { "epoch": 4.815253122945431, "high_lr": 3.6315789473684214e-05, "low_lr": 7.263157894736843e-07, "step": 1831 }, { "epoch": 4.815253122945431, "high_lr": 3.6315789473684214e-05, "low_lr": 7.263157894736843e-07, "step": 1831 }, { "epoch": 4.815253122945431, "high_lr": 3.6315789473684214e-05, "low_lr": 7.263157894736843e-07, "step": 1831 }, { "epoch": 4.815253122945431, "high_lr": 3.6315789473684214e-05, "low_lr": 7.263157894736843e-07, "step": 1831 }, { "epoch": 4.817882971729126, "grad_norm": 1.5613391399383545, "learning_rate": 3.578947368421053e-05, "loss": 1.2168, "step": 1832 }, { "epoch": 4.817882971729126, "high_lr": 3.578947368421053e-05, "low_lr": 7.157894736842106e-07, "step": 1832 }, { "epoch": 4.817882971729126, "high_lr": 3.578947368421053e-05, "low_lr": 7.157894736842106e-07, "step": 1832 }, { "epoch": 4.817882971729126, "high_lr": 3.578947368421053e-05, "low_lr": 7.157894736842106e-07, "step": 1832 }, { "epoch": 4.817882971729126, "high_lr": 3.578947368421053e-05, "low_lr": 7.157894736842106e-07, "step": 1832 }, { "epoch": 4.817882971729126, "high_lr": 3.578947368421053e-05, "low_lr": 7.157894736842106e-07, "step": 1832 }, { "epoch": 4.817882971729126, "high_lr": 3.578947368421053e-05, "low_lr": 7.157894736842106e-07, "step": 1832 }, { "epoch": 4.817882971729126, "high_lr": 3.578947368421053e-05, "low_lr": 7.157894736842106e-07, "step": 1832 }, { "epoch": 4.817882971729126, "high_lr": 3.578947368421053e-05, "low_lr": 7.157894736842106e-07, "step": 1832 }, { "epoch": 4.82051282051282, "grad_norm": 1.5630950927734375, "learning_rate": 3.526315789473685e-05, "loss": 1.2381, "step": 1833 }, { "epoch": 4.82051282051282, "high_lr": 3.526315789473685e-05, "low_lr": 7.052631578947369e-07, "step": 1833 }, { "epoch": 4.82051282051282, "high_lr": 3.526315789473685e-05, "low_lr": 7.052631578947369e-07, "step": 1833 }, { "epoch": 4.82051282051282, "high_lr": 3.526315789473685e-05, "low_lr": 7.052631578947369e-07, "step": 1833 }, { "epoch": 4.82051282051282, "high_lr": 3.526315789473685e-05, "low_lr": 7.052631578947369e-07, "step": 1833 }, { "epoch": 4.82051282051282, "high_lr": 3.526315789473685e-05, "low_lr": 7.052631578947369e-07, "step": 1833 }, { "epoch": 4.82051282051282, "high_lr": 3.526315789473685e-05, "low_lr": 7.052631578947369e-07, "step": 1833 }, { "epoch": 4.82051282051282, "high_lr": 3.526315789473685e-05, "low_lr": 7.052631578947369e-07, "step": 1833 }, { "epoch": 4.82051282051282, "high_lr": 3.526315789473685e-05, "low_lr": 7.052631578947369e-07, "step": 1833 }, { "epoch": 4.823142669296516, "grad_norm": 1.5562907457351685, "learning_rate": 3.473684210526316e-05, "loss": 1.2246, "step": 1834 }, { "epoch": 4.823142669296516, "high_lr": 3.473684210526316e-05, "low_lr": 6.947368421052631e-07, "step": 1834 }, { "epoch": 4.823142669296516, "high_lr": 3.473684210526316e-05, "low_lr": 6.947368421052631e-07, "step": 1834 }, { "epoch": 4.823142669296516, "high_lr": 3.473684210526316e-05, "low_lr": 6.947368421052631e-07, "step": 1834 }, { "epoch": 4.823142669296516, "high_lr": 3.473684210526316e-05, "low_lr": 6.947368421052631e-07, "step": 1834 }, { "epoch": 4.823142669296516, "high_lr": 3.473684210526316e-05, "low_lr": 6.947368421052631e-07, "step": 1834 }, { "epoch": 4.823142669296516, "high_lr": 3.473684210526316e-05, "low_lr": 6.947368421052631e-07, "step": 1834 }, { "epoch": 4.823142669296516, "high_lr": 3.473684210526316e-05, "low_lr": 6.947368421052631e-07, "step": 1834 }, { "epoch": 4.823142669296516, "high_lr": 3.473684210526316e-05, "low_lr": 6.947368421052631e-07, "step": 1834 }, { "epoch": 4.8257725180802105, "grad_norm": 1.4658030271530151, "learning_rate": 3.421052631578948e-05, "loss": 1.2116, "step": 1835 }, { "epoch": 4.8257725180802105, "high_lr": 3.421052631578948e-05, "low_lr": 6.842105263157896e-07, "step": 1835 }, { "epoch": 4.8257725180802105, "high_lr": 3.421052631578948e-05, "low_lr": 6.842105263157896e-07, "step": 1835 }, { "epoch": 4.8257725180802105, "high_lr": 3.421052631578948e-05, "low_lr": 6.842105263157896e-07, "step": 1835 }, { "epoch": 4.8257725180802105, "high_lr": 3.421052631578948e-05, "low_lr": 6.842105263157896e-07, "step": 1835 }, { "epoch": 4.8257725180802105, "high_lr": 3.421052631578948e-05, "low_lr": 6.842105263157896e-07, "step": 1835 }, { "epoch": 4.8257725180802105, "high_lr": 3.421052631578948e-05, "low_lr": 6.842105263157896e-07, "step": 1835 }, { "epoch": 4.8257725180802105, "high_lr": 3.421052631578948e-05, "low_lr": 6.842105263157896e-07, "step": 1835 }, { "epoch": 4.8257725180802105, "high_lr": 3.421052631578948e-05, "low_lr": 6.842105263157896e-07, "step": 1835 }, { "epoch": 4.828402366863905, "grad_norm": 1.5255767107009888, "learning_rate": 3.3684210526315786e-05, "loss": 1.2127, "step": 1836 }, { "epoch": 4.828402366863905, "high_lr": 3.3684210526315786e-05, "low_lr": 6.736842105263158e-07, "step": 1836 }, { "epoch": 4.828402366863905, "high_lr": 3.3684210526315786e-05, "low_lr": 6.736842105263158e-07, "step": 1836 }, { "epoch": 4.828402366863905, "high_lr": 3.3684210526315786e-05, "low_lr": 6.736842105263158e-07, "step": 1836 }, { "epoch": 4.828402366863905, "high_lr": 3.3684210526315786e-05, "low_lr": 6.736842105263158e-07, "step": 1836 }, { "epoch": 4.828402366863905, "high_lr": 3.3684210526315786e-05, "low_lr": 6.736842105263158e-07, "step": 1836 }, { "epoch": 4.828402366863905, "high_lr": 3.3684210526315786e-05, "low_lr": 6.736842105263158e-07, "step": 1836 }, { "epoch": 4.828402366863905, "high_lr": 3.3684210526315786e-05, "low_lr": 6.736842105263158e-07, "step": 1836 }, { "epoch": 4.828402366863905, "high_lr": 3.3684210526315786e-05, "low_lr": 6.736842105263158e-07, "step": 1836 }, { "epoch": 4.8310322156476, "grad_norm": 1.6164422035217285, "learning_rate": 3.3157894736842106e-05, "loss": 1.1702, "step": 1837 }, { "epoch": 4.8310322156476, "high_lr": 3.3157894736842106e-05, "low_lr": 6.631578947368422e-07, "step": 1837 }, { "epoch": 4.8310322156476, "high_lr": 3.3157894736842106e-05, "low_lr": 6.631578947368422e-07, "step": 1837 }, { "epoch": 4.8310322156476, "high_lr": 3.3157894736842106e-05, "low_lr": 6.631578947368422e-07, "step": 1837 }, { "epoch": 4.8310322156476, "high_lr": 3.3157894736842106e-05, "low_lr": 6.631578947368422e-07, "step": 1837 }, { "epoch": 4.8310322156476, "high_lr": 3.3157894736842106e-05, "low_lr": 6.631578947368422e-07, "step": 1837 }, { "epoch": 4.8310322156476, "high_lr": 3.3157894736842106e-05, "low_lr": 6.631578947368422e-07, "step": 1837 }, { "epoch": 4.8310322156476, "high_lr": 3.3157894736842106e-05, "low_lr": 6.631578947368422e-07, "step": 1837 }, { "epoch": 4.8310322156476, "high_lr": 3.3157894736842106e-05, "low_lr": 6.631578947368422e-07, "step": 1837 }, { "epoch": 4.833662064431295, "grad_norm": 1.6228440999984741, "learning_rate": 3.263157894736842e-05, "loss": 1.2445, "step": 1838 }, { "epoch": 4.833662064431295, "high_lr": 3.263157894736842e-05, "low_lr": 6.526315789473684e-07, "step": 1838 }, { "epoch": 4.833662064431295, "high_lr": 3.263157894736842e-05, "low_lr": 6.526315789473684e-07, "step": 1838 }, { "epoch": 4.833662064431295, "high_lr": 3.263157894736842e-05, "low_lr": 6.526315789473684e-07, "step": 1838 }, { "epoch": 4.833662064431295, "high_lr": 3.263157894736842e-05, "low_lr": 6.526315789473684e-07, "step": 1838 }, { "epoch": 4.833662064431295, "high_lr": 3.263157894736842e-05, "low_lr": 6.526315789473684e-07, "step": 1838 }, { "epoch": 4.833662064431295, "high_lr": 3.263157894736842e-05, "low_lr": 6.526315789473684e-07, "step": 1838 }, { "epoch": 4.833662064431295, "high_lr": 3.263157894736842e-05, "low_lr": 6.526315789473684e-07, "step": 1838 }, { "epoch": 4.833662064431295, "high_lr": 3.263157894736842e-05, "low_lr": 6.526315789473684e-07, "step": 1838 }, { "epoch": 4.83629191321499, "grad_norm": 1.4909409284591675, "learning_rate": 3.210526315789473e-05, "loss": 1.2221, "step": 1839 }, { "epoch": 4.83629191321499, "high_lr": 3.210526315789473e-05, "low_lr": 6.421052631578948e-07, "step": 1839 }, { "epoch": 4.83629191321499, "high_lr": 3.210526315789473e-05, "low_lr": 6.421052631578948e-07, "step": 1839 }, { "epoch": 4.83629191321499, "high_lr": 3.210526315789473e-05, "low_lr": 6.421052631578948e-07, "step": 1839 }, { "epoch": 4.83629191321499, "high_lr": 3.210526315789473e-05, "low_lr": 6.421052631578948e-07, "step": 1839 }, { "epoch": 4.83629191321499, "high_lr": 3.210526315789473e-05, "low_lr": 6.421052631578948e-07, "step": 1839 }, { "epoch": 4.83629191321499, "high_lr": 3.210526315789473e-05, "low_lr": 6.421052631578948e-07, "step": 1839 }, { "epoch": 4.83629191321499, "high_lr": 3.210526315789473e-05, "low_lr": 6.421052631578948e-07, "step": 1839 }, { "epoch": 4.83629191321499, "high_lr": 3.210526315789473e-05, "low_lr": 6.421052631578948e-07, "step": 1839 }, { "epoch": 4.8389217619986855, "grad_norm": 1.4390934705734253, "learning_rate": 3.157894736842105e-05, "loss": 1.2059, "step": 1840 }, { "epoch": 4.8389217619986855, "high_lr": 3.157894736842105e-05, "low_lr": 6.315789473684211e-07, "step": 1840 }, { "epoch": 4.8389217619986855, "high_lr": 3.157894736842105e-05, "low_lr": 6.315789473684211e-07, "step": 1840 }, { "epoch": 4.8389217619986855, "high_lr": 3.157894736842105e-05, "low_lr": 6.315789473684211e-07, "step": 1840 }, { "epoch": 4.8389217619986855, "high_lr": 3.157894736842105e-05, "low_lr": 6.315789473684211e-07, "step": 1840 }, { "epoch": 4.8389217619986855, "high_lr": 3.157894736842105e-05, "low_lr": 6.315789473684211e-07, "step": 1840 }, { "epoch": 4.8389217619986855, "high_lr": 3.157894736842105e-05, "low_lr": 6.315789473684211e-07, "step": 1840 }, { "epoch": 4.8389217619986855, "high_lr": 3.157894736842105e-05, "low_lr": 6.315789473684211e-07, "step": 1840 }, { "epoch": 4.8389217619986855, "high_lr": 3.157894736842105e-05, "low_lr": 6.315789473684211e-07, "step": 1840 }, { "epoch": 4.84155161078238, "grad_norm": 1.493750810623169, "learning_rate": 3.105263157894737e-05, "loss": 1.2165, "step": 1841 }, { "epoch": 4.84155161078238, "high_lr": 3.105263157894737e-05, "low_lr": 6.210526315789474e-07, "step": 1841 }, { "epoch": 4.84155161078238, "high_lr": 3.105263157894737e-05, "low_lr": 6.210526315789474e-07, "step": 1841 }, { "epoch": 4.84155161078238, "high_lr": 3.105263157894737e-05, "low_lr": 6.210526315789474e-07, "step": 1841 }, { "epoch": 4.84155161078238, "high_lr": 3.105263157894737e-05, "low_lr": 6.210526315789474e-07, "step": 1841 }, { "epoch": 4.84155161078238, "high_lr": 3.105263157894737e-05, "low_lr": 6.210526315789474e-07, "step": 1841 }, { "epoch": 4.84155161078238, "high_lr": 3.105263157894737e-05, "low_lr": 6.210526315789474e-07, "step": 1841 }, { "epoch": 4.84155161078238, "high_lr": 3.105263157894737e-05, "low_lr": 6.210526315789474e-07, "step": 1841 }, { "epoch": 4.84155161078238, "high_lr": 3.105263157894737e-05, "low_lr": 6.210526315789474e-07, "step": 1841 }, { "epoch": 4.844181459566075, "grad_norm": 1.6403719186782837, "learning_rate": 3.0526315789473684e-05, "loss": 1.2407, "step": 1842 }, { "epoch": 4.844181459566075, "high_lr": 3.0526315789473684e-05, "low_lr": 6.105263157894738e-07, "step": 1842 }, { "epoch": 4.844181459566075, "high_lr": 3.0526315789473684e-05, "low_lr": 6.105263157894738e-07, "step": 1842 }, { "epoch": 4.844181459566075, "high_lr": 3.0526315789473684e-05, "low_lr": 6.105263157894738e-07, "step": 1842 }, { "epoch": 4.844181459566075, "high_lr": 3.0526315789473684e-05, "low_lr": 6.105263157894738e-07, "step": 1842 }, { "epoch": 4.844181459566075, "high_lr": 3.0526315789473684e-05, "low_lr": 6.105263157894738e-07, "step": 1842 }, { "epoch": 4.844181459566075, "high_lr": 3.0526315789473684e-05, "low_lr": 6.105263157894738e-07, "step": 1842 }, { "epoch": 4.844181459566075, "high_lr": 3.0526315789473684e-05, "low_lr": 6.105263157894738e-07, "step": 1842 }, { "epoch": 4.844181459566075, "high_lr": 3.0526315789473684e-05, "low_lr": 6.105263157894738e-07, "step": 1842 }, { "epoch": 4.8468113083497695, "grad_norm": 1.5683202743530273, "learning_rate": 3e-05, "loss": 1.194, "step": 1843 }, { "epoch": 4.8468113083497695, "high_lr": 3e-05, "low_lr": 6.000000000000001e-07, "step": 1843 }, { "epoch": 4.8468113083497695, "high_lr": 3e-05, "low_lr": 6.000000000000001e-07, "step": 1843 }, { "epoch": 4.8468113083497695, "high_lr": 3e-05, "low_lr": 6.000000000000001e-07, "step": 1843 }, { "epoch": 4.8468113083497695, "high_lr": 3e-05, "low_lr": 6.000000000000001e-07, "step": 1843 }, { "epoch": 4.8468113083497695, "high_lr": 3e-05, "low_lr": 6.000000000000001e-07, "step": 1843 }, { "epoch": 4.8468113083497695, "high_lr": 3e-05, "low_lr": 6.000000000000001e-07, "step": 1843 }, { "epoch": 4.8468113083497695, "high_lr": 3e-05, "low_lr": 6.000000000000001e-07, "step": 1843 }, { "epoch": 4.8468113083497695, "high_lr": 3e-05, "low_lr": 6.000000000000001e-07, "step": 1843 }, { "epoch": 4.849441157133465, "grad_norm": 1.7279189825057983, "learning_rate": 2.9473684210526317e-05, "loss": 1.2562, "step": 1844 }, { "epoch": 4.849441157133465, "high_lr": 2.9473684210526317e-05, "low_lr": 5.894736842105263e-07, "step": 1844 }, { "epoch": 4.849441157133465, "high_lr": 2.9473684210526317e-05, "low_lr": 5.894736842105263e-07, "step": 1844 }, { "epoch": 4.849441157133465, "high_lr": 2.9473684210526317e-05, "low_lr": 5.894736842105263e-07, "step": 1844 }, { "epoch": 4.849441157133465, "high_lr": 2.9473684210526317e-05, "low_lr": 5.894736842105263e-07, "step": 1844 }, { "epoch": 4.849441157133465, "high_lr": 2.9473684210526317e-05, "low_lr": 5.894736842105263e-07, "step": 1844 }, { "epoch": 4.849441157133465, "high_lr": 2.9473684210526317e-05, "low_lr": 5.894736842105263e-07, "step": 1844 }, { "epoch": 4.849441157133465, "high_lr": 2.9473684210526317e-05, "low_lr": 5.894736842105263e-07, "step": 1844 }, { "epoch": 4.849441157133465, "high_lr": 2.9473684210526317e-05, "low_lr": 5.894736842105263e-07, "step": 1844 }, { "epoch": 4.85207100591716, "grad_norm": 1.4668720960617065, "learning_rate": 2.8947368421052634e-05, "loss": 1.2107, "step": 1845 }, { "epoch": 4.85207100591716, "high_lr": 2.8947368421052634e-05, "low_lr": 5.789473684210526e-07, "step": 1845 }, { "epoch": 4.85207100591716, "high_lr": 2.8947368421052634e-05, "low_lr": 5.789473684210526e-07, "step": 1845 }, { "epoch": 4.85207100591716, "high_lr": 2.8947368421052634e-05, "low_lr": 5.789473684210526e-07, "step": 1845 }, { "epoch": 4.85207100591716, "high_lr": 2.8947368421052634e-05, "low_lr": 5.789473684210526e-07, "step": 1845 }, { "epoch": 4.85207100591716, "high_lr": 2.8947368421052634e-05, "low_lr": 5.789473684210526e-07, "step": 1845 }, { "epoch": 4.85207100591716, "high_lr": 2.8947368421052634e-05, "low_lr": 5.789473684210526e-07, "step": 1845 }, { "epoch": 4.85207100591716, "high_lr": 2.8947368421052634e-05, "low_lr": 5.789473684210526e-07, "step": 1845 }, { "epoch": 4.85207100591716, "high_lr": 2.8947368421052634e-05, "low_lr": 5.789473684210526e-07, "step": 1845 }, { "epoch": 4.854700854700854, "grad_norm": 1.5402225255966187, "learning_rate": 2.842105263157895e-05, "loss": 1.2733, "step": 1846 }, { "epoch": 4.854700854700854, "high_lr": 2.842105263157895e-05, "low_lr": 5.68421052631579e-07, "step": 1846 }, { "epoch": 4.854700854700854, "high_lr": 2.842105263157895e-05, "low_lr": 5.68421052631579e-07, "step": 1846 }, { "epoch": 4.854700854700854, "high_lr": 2.842105263157895e-05, "low_lr": 5.68421052631579e-07, "step": 1846 }, { "epoch": 4.854700854700854, "high_lr": 2.842105263157895e-05, "low_lr": 5.68421052631579e-07, "step": 1846 }, { "epoch": 4.854700854700854, "high_lr": 2.842105263157895e-05, "low_lr": 5.68421052631579e-07, "step": 1846 }, { "epoch": 4.854700854700854, "high_lr": 2.842105263157895e-05, "low_lr": 5.68421052631579e-07, "step": 1846 }, { "epoch": 4.854700854700854, "high_lr": 2.842105263157895e-05, "low_lr": 5.68421052631579e-07, "step": 1846 }, { "epoch": 4.854700854700854, "high_lr": 2.842105263157895e-05, "low_lr": 5.68421052631579e-07, "step": 1846 }, { "epoch": 4.85733070348455, "grad_norm": 1.7148109674453735, "learning_rate": 2.7894736842105263e-05, "loss": 1.1944, "step": 1847 }, { "epoch": 4.85733070348455, "high_lr": 2.7894736842105263e-05, "low_lr": 5.578947368421053e-07, "step": 1847 }, { "epoch": 4.85733070348455, "high_lr": 2.7894736842105263e-05, "low_lr": 5.578947368421053e-07, "step": 1847 }, { "epoch": 4.85733070348455, "high_lr": 2.7894736842105263e-05, "low_lr": 5.578947368421053e-07, "step": 1847 }, { "epoch": 4.85733070348455, "high_lr": 2.7894736842105263e-05, "low_lr": 5.578947368421053e-07, "step": 1847 }, { "epoch": 4.85733070348455, "high_lr": 2.7894736842105263e-05, "low_lr": 5.578947368421053e-07, "step": 1847 }, { "epoch": 4.85733070348455, "high_lr": 2.7894736842105263e-05, "low_lr": 5.578947368421053e-07, "step": 1847 }, { "epoch": 4.85733070348455, "high_lr": 2.7894736842105263e-05, "low_lr": 5.578947368421053e-07, "step": 1847 }, { "epoch": 4.85733070348455, "high_lr": 2.7894736842105263e-05, "low_lr": 5.578947368421053e-07, "step": 1847 }, { "epoch": 4.8599605522682445, "grad_norm": 1.5167216062545776, "learning_rate": 2.736842105263158e-05, "loss": 1.2431, "step": 1848 }, { "epoch": 4.8599605522682445, "high_lr": 2.736842105263158e-05, "low_lr": 5.473684210526316e-07, "step": 1848 }, { "epoch": 4.8599605522682445, "high_lr": 2.736842105263158e-05, "low_lr": 5.473684210526316e-07, "step": 1848 }, { "epoch": 4.8599605522682445, "high_lr": 2.736842105263158e-05, "low_lr": 5.473684210526316e-07, "step": 1848 }, { "epoch": 4.8599605522682445, "high_lr": 2.736842105263158e-05, "low_lr": 5.473684210526316e-07, "step": 1848 }, { "epoch": 4.8599605522682445, "high_lr": 2.736842105263158e-05, "low_lr": 5.473684210526316e-07, "step": 1848 }, { "epoch": 4.8599605522682445, "high_lr": 2.736842105263158e-05, "low_lr": 5.473684210526316e-07, "step": 1848 }, { "epoch": 4.8599605522682445, "high_lr": 2.736842105263158e-05, "low_lr": 5.473684210526316e-07, "step": 1848 }, { "epoch": 4.8599605522682445, "high_lr": 2.736842105263158e-05, "low_lr": 5.473684210526316e-07, "step": 1848 }, { "epoch": 4.862590401051939, "grad_norm": 1.5432519912719727, "learning_rate": 2.6842105263157896e-05, "loss": 1.1531, "step": 1849 }, { "epoch": 4.862590401051939, "high_lr": 2.6842105263157896e-05, "low_lr": 5.368421052631579e-07, "step": 1849 }, { "epoch": 4.862590401051939, "high_lr": 2.6842105263157896e-05, "low_lr": 5.368421052631579e-07, "step": 1849 }, { "epoch": 4.862590401051939, "high_lr": 2.6842105263157896e-05, "low_lr": 5.368421052631579e-07, "step": 1849 }, { "epoch": 4.862590401051939, "high_lr": 2.6842105263157896e-05, "low_lr": 5.368421052631579e-07, "step": 1849 }, { "epoch": 4.862590401051939, "high_lr": 2.6842105263157896e-05, "low_lr": 5.368421052631579e-07, "step": 1849 }, { "epoch": 4.862590401051939, "high_lr": 2.6842105263157896e-05, "low_lr": 5.368421052631579e-07, "step": 1849 }, { "epoch": 4.862590401051939, "high_lr": 2.6842105263157896e-05, "low_lr": 5.368421052631579e-07, "step": 1849 }, { "epoch": 4.862590401051939, "high_lr": 2.6842105263157896e-05, "low_lr": 5.368421052631579e-07, "step": 1849 }, { "epoch": 4.865220249835635, "grad_norm": 1.5639528036117554, "learning_rate": 2.631578947368421e-05, "loss": 1.1994, "step": 1850 }, { "epoch": 4.865220249835635, "high_lr": 2.631578947368421e-05, "low_lr": 5.263157894736843e-07, "step": 1850 }, { "epoch": 4.865220249835635, "high_lr": 2.631578947368421e-05, "low_lr": 5.263157894736843e-07, "step": 1850 }, { "epoch": 4.865220249835635, "high_lr": 2.631578947368421e-05, "low_lr": 5.263157894736843e-07, "step": 1850 }, { "epoch": 4.865220249835635, "high_lr": 2.631578947368421e-05, "low_lr": 5.263157894736843e-07, "step": 1850 }, { "epoch": 4.865220249835635, "high_lr": 2.631578947368421e-05, "low_lr": 5.263157894736843e-07, "step": 1850 }, { "epoch": 4.865220249835635, "high_lr": 2.631578947368421e-05, "low_lr": 5.263157894736843e-07, "step": 1850 }, { "epoch": 4.865220249835635, "high_lr": 2.631578947368421e-05, "low_lr": 5.263157894736843e-07, "step": 1850 }, { "epoch": 4.865220249835635, "high_lr": 2.631578947368421e-05, "low_lr": 5.263157894736843e-07, "step": 1850 }, { "epoch": 4.867850098619329, "grad_norm": 1.597714900970459, "learning_rate": 2.5789473684210525e-05, "loss": 1.1889, "step": 1851 }, { "epoch": 4.867850098619329, "high_lr": 2.5789473684210525e-05, "low_lr": 5.157894736842106e-07, "step": 1851 }, { "epoch": 4.867850098619329, "high_lr": 2.5789473684210525e-05, "low_lr": 5.157894736842106e-07, "step": 1851 }, { "epoch": 4.867850098619329, "high_lr": 2.5789473684210525e-05, "low_lr": 5.157894736842106e-07, "step": 1851 }, { "epoch": 4.867850098619329, "high_lr": 2.5789473684210525e-05, "low_lr": 5.157894736842106e-07, "step": 1851 }, { "epoch": 4.867850098619329, "high_lr": 2.5789473684210525e-05, "low_lr": 5.157894736842106e-07, "step": 1851 }, { "epoch": 4.867850098619329, "high_lr": 2.5789473684210525e-05, "low_lr": 5.157894736842106e-07, "step": 1851 }, { "epoch": 4.867850098619329, "high_lr": 2.5789473684210525e-05, "low_lr": 5.157894736842106e-07, "step": 1851 }, { "epoch": 4.867850098619329, "high_lr": 2.5789473684210525e-05, "low_lr": 5.157894736842106e-07, "step": 1851 }, { "epoch": 4.870479947403024, "grad_norm": 1.6463160514831543, "learning_rate": 2.526315789473684e-05, "loss": 1.2115, "step": 1852 }, { "epoch": 4.870479947403024, "high_lr": 2.526315789473684e-05, "low_lr": 5.052631578947369e-07, "step": 1852 }, { "epoch": 4.870479947403024, "high_lr": 2.526315789473684e-05, "low_lr": 5.052631578947369e-07, "step": 1852 }, { "epoch": 4.870479947403024, "high_lr": 2.526315789473684e-05, "low_lr": 5.052631578947369e-07, "step": 1852 }, { "epoch": 4.870479947403024, "high_lr": 2.526315789473684e-05, "low_lr": 5.052631578947369e-07, "step": 1852 }, { "epoch": 4.870479947403024, "high_lr": 2.526315789473684e-05, "low_lr": 5.052631578947369e-07, "step": 1852 }, { "epoch": 4.870479947403024, "high_lr": 2.526315789473684e-05, "low_lr": 5.052631578947369e-07, "step": 1852 }, { "epoch": 4.870479947403024, "high_lr": 2.526315789473684e-05, "low_lr": 5.052631578947369e-07, "step": 1852 }, { "epoch": 4.870479947403024, "high_lr": 2.526315789473684e-05, "low_lr": 5.052631578947369e-07, "step": 1852 }, { "epoch": 4.87310979618672, "grad_norm": 1.67196524143219, "learning_rate": 2.4736842105263158e-05, "loss": 1.2499, "step": 1853 }, { "epoch": 4.87310979618672, "high_lr": 2.4736842105263158e-05, "low_lr": 4.947368421052632e-07, "step": 1853 }, { "epoch": 4.87310979618672, "high_lr": 2.4736842105263158e-05, "low_lr": 4.947368421052632e-07, "step": 1853 }, { "epoch": 4.87310979618672, "high_lr": 2.4736842105263158e-05, "low_lr": 4.947368421052632e-07, "step": 1853 }, { "epoch": 4.87310979618672, "high_lr": 2.4736842105263158e-05, "low_lr": 4.947368421052632e-07, "step": 1853 }, { "epoch": 4.87310979618672, "high_lr": 2.4736842105263158e-05, "low_lr": 4.947368421052632e-07, "step": 1853 }, { "epoch": 4.87310979618672, "high_lr": 2.4736842105263158e-05, "low_lr": 4.947368421052632e-07, "step": 1853 }, { "epoch": 4.87310979618672, "high_lr": 2.4736842105263158e-05, "low_lr": 4.947368421052632e-07, "step": 1853 }, { "epoch": 4.87310979618672, "high_lr": 2.4736842105263158e-05, "low_lr": 4.947368421052632e-07, "step": 1853 }, { "epoch": 4.875739644970414, "grad_norm": 1.541540265083313, "learning_rate": 2.4210526315789474e-05, "loss": 1.1752, "step": 1854 }, { "epoch": 4.875739644970414, "high_lr": 2.4210526315789474e-05, "low_lr": 4.842105263157895e-07, "step": 1854 }, { "epoch": 4.875739644970414, "high_lr": 2.4210526315789474e-05, "low_lr": 4.842105263157895e-07, "step": 1854 }, { "epoch": 4.875739644970414, "high_lr": 2.4210526315789474e-05, "low_lr": 4.842105263157895e-07, "step": 1854 }, { "epoch": 4.875739644970414, "high_lr": 2.4210526315789474e-05, "low_lr": 4.842105263157895e-07, "step": 1854 }, { "epoch": 4.875739644970414, "high_lr": 2.4210526315789474e-05, "low_lr": 4.842105263157895e-07, "step": 1854 }, { "epoch": 4.875739644970414, "high_lr": 2.4210526315789474e-05, "low_lr": 4.842105263157895e-07, "step": 1854 }, { "epoch": 4.875739644970414, "high_lr": 2.4210526315789474e-05, "low_lr": 4.842105263157895e-07, "step": 1854 }, { "epoch": 4.875739644970414, "high_lr": 2.4210526315789474e-05, "low_lr": 4.842105263157895e-07, "step": 1854 }, { "epoch": 4.878369493754109, "grad_norm": 1.6568161249160767, "learning_rate": 2.368421052631579e-05, "loss": 1.1952, "step": 1855 }, { "epoch": 4.878369493754109, "high_lr": 2.368421052631579e-05, "low_lr": 4.7368421052631585e-07, "step": 1855 }, { "epoch": 4.878369493754109, "high_lr": 2.368421052631579e-05, "low_lr": 4.7368421052631585e-07, "step": 1855 }, { "epoch": 4.878369493754109, "high_lr": 2.368421052631579e-05, "low_lr": 4.7368421052631585e-07, "step": 1855 }, { "epoch": 4.878369493754109, "high_lr": 2.368421052631579e-05, "low_lr": 4.7368421052631585e-07, "step": 1855 }, { "epoch": 4.878369493754109, "high_lr": 2.368421052631579e-05, "low_lr": 4.7368421052631585e-07, "step": 1855 }, { "epoch": 4.878369493754109, "high_lr": 2.368421052631579e-05, "low_lr": 4.7368421052631585e-07, "step": 1855 }, { "epoch": 4.878369493754109, "high_lr": 2.368421052631579e-05, "low_lr": 4.7368421052631585e-07, "step": 1855 }, { "epoch": 4.878369493754109, "high_lr": 2.368421052631579e-05, "low_lr": 4.7368421052631585e-07, "step": 1855 }, { "epoch": 4.880999342537804, "grad_norm": 1.5552313327789307, "learning_rate": 2.3157894736842107e-05, "loss": 1.19, "step": 1856 }, { "epoch": 4.880999342537804, "high_lr": 2.3157894736842107e-05, "low_lr": 4.631578947368422e-07, "step": 1856 }, { "epoch": 4.880999342537804, "high_lr": 2.3157894736842107e-05, "low_lr": 4.631578947368422e-07, "step": 1856 }, { "epoch": 4.880999342537804, "high_lr": 2.3157894736842107e-05, "low_lr": 4.631578947368422e-07, "step": 1856 }, { "epoch": 4.880999342537804, "high_lr": 2.3157894736842107e-05, "low_lr": 4.631578947368422e-07, "step": 1856 }, { "epoch": 4.880999342537804, "high_lr": 2.3157894736842107e-05, "low_lr": 4.631578947368422e-07, "step": 1856 }, { "epoch": 4.880999342537804, "high_lr": 2.3157894736842107e-05, "low_lr": 4.631578947368422e-07, "step": 1856 }, { "epoch": 4.880999342537804, "high_lr": 2.3157894736842107e-05, "low_lr": 4.631578947368422e-07, "step": 1856 }, { "epoch": 4.880999342537804, "high_lr": 2.3157894736842107e-05, "low_lr": 4.631578947368422e-07, "step": 1856 }, { "epoch": 4.883629191321499, "grad_norm": 1.3749786615371704, "learning_rate": 2.2631578947368423e-05, "loss": 1.1782, "step": 1857 }, { "epoch": 4.883629191321499, "high_lr": 2.2631578947368423e-05, "low_lr": 4.526315789473685e-07, "step": 1857 }, { "epoch": 4.883629191321499, "high_lr": 2.2631578947368423e-05, "low_lr": 4.526315789473685e-07, "step": 1857 }, { "epoch": 4.883629191321499, "high_lr": 2.2631578947368423e-05, "low_lr": 4.526315789473685e-07, "step": 1857 }, { "epoch": 4.883629191321499, "high_lr": 2.2631578947368423e-05, "low_lr": 4.526315789473685e-07, "step": 1857 }, { "epoch": 4.883629191321499, "high_lr": 2.2631578947368423e-05, "low_lr": 4.526315789473685e-07, "step": 1857 }, { "epoch": 4.883629191321499, "high_lr": 2.2631578947368423e-05, "low_lr": 4.526315789473685e-07, "step": 1857 }, { "epoch": 4.883629191321499, "high_lr": 2.2631578947368423e-05, "low_lr": 4.526315789473685e-07, "step": 1857 }, { "epoch": 4.883629191321499, "high_lr": 2.2631578947368423e-05, "low_lr": 4.526315789473685e-07, "step": 1857 }, { "epoch": 4.886259040105194, "grad_norm": 1.627353310585022, "learning_rate": 2.2105263157894736e-05, "loss": 1.2503, "step": 1858 }, { "epoch": 4.886259040105194, "high_lr": 2.2105263157894736e-05, "low_lr": 4.421052631578947e-07, "step": 1858 }, { "epoch": 4.886259040105194, "high_lr": 2.2105263157894736e-05, "low_lr": 4.421052631578947e-07, "step": 1858 }, { "epoch": 4.886259040105194, "high_lr": 2.2105263157894736e-05, "low_lr": 4.421052631578947e-07, "step": 1858 }, { "epoch": 4.886259040105194, "high_lr": 2.2105263157894736e-05, "low_lr": 4.421052631578947e-07, "step": 1858 }, { "epoch": 4.886259040105194, "high_lr": 2.2105263157894736e-05, "low_lr": 4.421052631578947e-07, "step": 1858 }, { "epoch": 4.886259040105194, "high_lr": 2.2105263157894736e-05, "low_lr": 4.421052631578947e-07, "step": 1858 }, { "epoch": 4.886259040105194, "high_lr": 2.2105263157894736e-05, "low_lr": 4.421052631578947e-07, "step": 1858 }, { "epoch": 4.886259040105194, "high_lr": 2.2105263157894736e-05, "low_lr": 4.421052631578947e-07, "step": 1858 }, { "epoch": 4.888888888888889, "grad_norm": 1.7379108667373657, "learning_rate": 2.1578947368421053e-05, "loss": 1.2567, "step": 1859 }, { "epoch": 4.888888888888889, "high_lr": 2.1578947368421053e-05, "low_lr": 4.3157894736842105e-07, "step": 1859 }, { "epoch": 4.888888888888889, "high_lr": 2.1578947368421053e-05, "low_lr": 4.3157894736842105e-07, "step": 1859 }, { "epoch": 4.888888888888889, "high_lr": 2.1578947368421053e-05, "low_lr": 4.3157894736842105e-07, "step": 1859 }, { "epoch": 4.888888888888889, "high_lr": 2.1578947368421053e-05, "low_lr": 4.3157894736842105e-07, "step": 1859 }, { "epoch": 4.888888888888889, "high_lr": 2.1578947368421053e-05, "low_lr": 4.3157894736842105e-07, "step": 1859 }, { "epoch": 4.888888888888889, "high_lr": 2.1578947368421053e-05, "low_lr": 4.3157894736842105e-07, "step": 1859 }, { "epoch": 4.888888888888889, "high_lr": 2.1578947368421053e-05, "low_lr": 4.3157894736842105e-07, "step": 1859 }, { "epoch": 4.888888888888889, "high_lr": 2.1578947368421053e-05, "low_lr": 4.3157894736842105e-07, "step": 1859 }, { "epoch": 4.891518737672584, "grad_norm": 1.6780471801757812, "learning_rate": 2.105263157894737e-05, "loss": 1.2238, "step": 1860 }, { "epoch": 4.891518737672584, "high_lr": 2.105263157894737e-05, "low_lr": 4.210526315789474e-07, "step": 1860 }, { "epoch": 4.891518737672584, "high_lr": 2.105263157894737e-05, "low_lr": 4.210526315789474e-07, "step": 1860 }, { "epoch": 4.891518737672584, "high_lr": 2.105263157894737e-05, "low_lr": 4.210526315789474e-07, "step": 1860 }, { "epoch": 4.891518737672584, "high_lr": 2.105263157894737e-05, "low_lr": 4.210526315789474e-07, "step": 1860 }, { "epoch": 4.891518737672584, "high_lr": 2.105263157894737e-05, "low_lr": 4.210526315789474e-07, "step": 1860 }, { "epoch": 4.891518737672584, "high_lr": 2.105263157894737e-05, "low_lr": 4.210526315789474e-07, "step": 1860 }, { "epoch": 4.891518737672584, "high_lr": 2.105263157894737e-05, "low_lr": 4.210526315789474e-07, "step": 1860 }, { "epoch": 4.891518737672584, "high_lr": 2.105263157894737e-05, "low_lr": 4.210526315789474e-07, "step": 1860 }, { "epoch": 4.894148586456279, "grad_norm": 1.7077248096466064, "learning_rate": 2.0526315789473685e-05, "loss": 1.2527, "step": 1861 }, { "epoch": 4.894148586456279, "high_lr": 2.0526315789473685e-05, "low_lr": 4.105263157894737e-07, "step": 1861 }, { "epoch": 4.894148586456279, "high_lr": 2.0526315789473685e-05, "low_lr": 4.105263157894737e-07, "step": 1861 }, { "epoch": 4.894148586456279, "high_lr": 2.0526315789473685e-05, "low_lr": 4.105263157894737e-07, "step": 1861 }, { "epoch": 4.894148586456279, "high_lr": 2.0526315789473685e-05, "low_lr": 4.105263157894737e-07, "step": 1861 }, { "epoch": 4.894148586456279, "high_lr": 2.0526315789473685e-05, "low_lr": 4.105263157894737e-07, "step": 1861 }, { "epoch": 4.894148586456279, "high_lr": 2.0526315789473685e-05, "low_lr": 4.105263157894737e-07, "step": 1861 }, { "epoch": 4.894148586456279, "high_lr": 2.0526315789473685e-05, "low_lr": 4.105263157894737e-07, "step": 1861 }, { "epoch": 4.894148586456279, "high_lr": 2.0526315789473685e-05, "low_lr": 4.105263157894737e-07, "step": 1861 }, { "epoch": 4.896778435239973, "grad_norm": 1.4945363998413086, "learning_rate": 2e-05, "loss": 1.2072, "step": 1862 }, { "epoch": 4.896778435239973, "high_lr": 2e-05, "low_lr": 4.0000000000000003e-07, "step": 1862 }, { "epoch": 4.896778435239973, "high_lr": 2e-05, "low_lr": 4.0000000000000003e-07, "step": 1862 }, { "epoch": 4.896778435239973, "high_lr": 2e-05, "low_lr": 4.0000000000000003e-07, "step": 1862 }, { "epoch": 4.896778435239973, "high_lr": 2e-05, "low_lr": 4.0000000000000003e-07, "step": 1862 }, { "epoch": 4.896778435239973, "high_lr": 2e-05, "low_lr": 4.0000000000000003e-07, "step": 1862 }, { "epoch": 4.896778435239973, "high_lr": 2e-05, "low_lr": 4.0000000000000003e-07, "step": 1862 }, { "epoch": 4.896778435239973, "high_lr": 2e-05, "low_lr": 4.0000000000000003e-07, "step": 1862 }, { "epoch": 4.896778435239973, "high_lr": 2e-05, "low_lr": 4.0000000000000003e-07, "step": 1862 }, { "epoch": 4.899408284023669, "grad_norm": 1.5202914476394653, "learning_rate": 1.9473684210526318e-05, "loss": 1.2008, "step": 1863 }, { "epoch": 4.899408284023669, "high_lr": 1.9473684210526318e-05, "low_lr": 3.8947368421052636e-07, "step": 1863 }, { "epoch": 4.899408284023669, "high_lr": 1.9473684210526318e-05, "low_lr": 3.8947368421052636e-07, "step": 1863 }, { "epoch": 4.899408284023669, "high_lr": 1.9473684210526318e-05, "low_lr": 3.8947368421052636e-07, "step": 1863 }, { "epoch": 4.899408284023669, "high_lr": 1.9473684210526318e-05, "low_lr": 3.8947368421052636e-07, "step": 1863 }, { "epoch": 4.899408284023669, "high_lr": 1.9473684210526318e-05, "low_lr": 3.8947368421052636e-07, "step": 1863 }, { "epoch": 4.899408284023669, "high_lr": 1.9473684210526318e-05, "low_lr": 3.8947368421052636e-07, "step": 1863 }, { "epoch": 4.899408284023669, "high_lr": 1.9473684210526318e-05, "low_lr": 3.8947368421052636e-07, "step": 1863 }, { "epoch": 4.899408284023669, "high_lr": 1.9473684210526318e-05, "low_lr": 3.8947368421052636e-07, "step": 1863 }, { "epoch": 4.9020381328073634, "grad_norm": 1.537229299545288, "learning_rate": 1.8947368421052634e-05, "loss": 1.2101, "step": 1864 }, { "epoch": 4.9020381328073634, "high_lr": 1.8947368421052634e-05, "low_lr": 3.789473684210527e-07, "step": 1864 }, { "epoch": 4.9020381328073634, "high_lr": 1.8947368421052634e-05, "low_lr": 3.789473684210527e-07, "step": 1864 }, { "epoch": 4.9020381328073634, "high_lr": 1.8947368421052634e-05, "low_lr": 3.789473684210527e-07, "step": 1864 }, { "epoch": 4.9020381328073634, "high_lr": 1.8947368421052634e-05, "low_lr": 3.789473684210527e-07, "step": 1864 }, { "epoch": 4.9020381328073634, "high_lr": 1.8947368421052634e-05, "low_lr": 3.789473684210527e-07, "step": 1864 }, { "epoch": 4.9020381328073634, "high_lr": 1.8947368421052634e-05, "low_lr": 3.789473684210527e-07, "step": 1864 }, { "epoch": 4.9020381328073634, "high_lr": 1.8947368421052634e-05, "low_lr": 3.789473684210527e-07, "step": 1864 }, { "epoch": 4.9020381328073634, "high_lr": 1.8947368421052634e-05, "low_lr": 3.789473684210527e-07, "step": 1864 }, { "epoch": 4.904667981591059, "grad_norm": 1.3996493816375732, "learning_rate": 1.8421052631578947e-05, "loss": 1.1862, "step": 1865 }, { "epoch": 4.904667981591059, "high_lr": 1.8421052631578947e-05, "low_lr": 3.6842105263157896e-07, "step": 1865 }, { "epoch": 4.904667981591059, "high_lr": 1.8421052631578947e-05, "low_lr": 3.6842105263157896e-07, "step": 1865 }, { "epoch": 4.904667981591059, "high_lr": 1.8421052631578947e-05, "low_lr": 3.6842105263157896e-07, "step": 1865 }, { "epoch": 4.904667981591059, "high_lr": 1.8421052631578947e-05, "low_lr": 3.6842105263157896e-07, "step": 1865 }, { "epoch": 4.904667981591059, "high_lr": 1.8421052631578947e-05, "low_lr": 3.6842105263157896e-07, "step": 1865 }, { "epoch": 4.904667981591059, "high_lr": 1.8421052631578947e-05, "low_lr": 3.6842105263157896e-07, "step": 1865 }, { "epoch": 4.904667981591059, "high_lr": 1.8421052631578947e-05, "low_lr": 3.6842105263157896e-07, "step": 1865 }, { "epoch": 4.904667981591059, "high_lr": 1.8421052631578947e-05, "low_lr": 3.6842105263157896e-07, "step": 1865 }, { "epoch": 4.907297830374754, "grad_norm": 1.4659632444381714, "learning_rate": 1.7894736842105264e-05, "loss": 1.1872, "step": 1866 }, { "epoch": 4.907297830374754, "high_lr": 1.7894736842105264e-05, "low_lr": 3.578947368421053e-07, "step": 1866 }, { "epoch": 4.907297830374754, "high_lr": 1.7894736842105264e-05, "low_lr": 3.578947368421053e-07, "step": 1866 }, { "epoch": 4.907297830374754, "high_lr": 1.7894736842105264e-05, "low_lr": 3.578947368421053e-07, "step": 1866 }, { "epoch": 4.907297830374754, "high_lr": 1.7894736842105264e-05, "low_lr": 3.578947368421053e-07, "step": 1866 }, { "epoch": 4.907297830374754, "high_lr": 1.7894736842105264e-05, "low_lr": 3.578947368421053e-07, "step": 1866 }, { "epoch": 4.907297830374754, "high_lr": 1.7894736842105264e-05, "low_lr": 3.578947368421053e-07, "step": 1866 }, { "epoch": 4.907297830374754, "high_lr": 1.7894736842105264e-05, "low_lr": 3.578947368421053e-07, "step": 1866 }, { "epoch": 4.907297830374754, "high_lr": 1.7894736842105264e-05, "low_lr": 3.578947368421053e-07, "step": 1866 }, { "epoch": 4.909927679158448, "grad_norm": 1.7892240285873413, "learning_rate": 1.736842105263158e-05, "loss": 1.1989, "step": 1867 }, { "epoch": 4.909927679158448, "high_lr": 1.736842105263158e-05, "low_lr": 3.4736842105263157e-07, "step": 1867 }, { "epoch": 4.909927679158448, "high_lr": 1.736842105263158e-05, "low_lr": 3.4736842105263157e-07, "step": 1867 }, { "epoch": 4.909927679158448, "high_lr": 1.736842105263158e-05, "low_lr": 3.4736842105263157e-07, "step": 1867 }, { "epoch": 4.909927679158448, "high_lr": 1.736842105263158e-05, "low_lr": 3.4736842105263157e-07, "step": 1867 }, { "epoch": 4.909927679158448, "high_lr": 1.736842105263158e-05, "low_lr": 3.4736842105263157e-07, "step": 1867 }, { "epoch": 4.909927679158448, "high_lr": 1.736842105263158e-05, "low_lr": 3.4736842105263157e-07, "step": 1867 }, { "epoch": 4.909927679158448, "high_lr": 1.736842105263158e-05, "low_lr": 3.4736842105263157e-07, "step": 1867 }, { "epoch": 4.909927679158448, "high_lr": 1.736842105263158e-05, "low_lr": 3.4736842105263157e-07, "step": 1867 }, { "epoch": 4.912557527942143, "grad_norm": 1.5503710508346558, "learning_rate": 1.6842105263157893e-05, "loss": 1.1529, "step": 1868 }, { "epoch": 4.912557527942143, "high_lr": 1.6842105263157893e-05, "low_lr": 3.368421052631579e-07, "step": 1868 }, { "epoch": 4.912557527942143, "high_lr": 1.6842105263157893e-05, "low_lr": 3.368421052631579e-07, "step": 1868 }, { "epoch": 4.912557527942143, "high_lr": 1.6842105263157893e-05, "low_lr": 3.368421052631579e-07, "step": 1868 }, { "epoch": 4.912557527942143, "high_lr": 1.6842105263157893e-05, "low_lr": 3.368421052631579e-07, "step": 1868 }, { "epoch": 4.912557527942143, "high_lr": 1.6842105263157893e-05, "low_lr": 3.368421052631579e-07, "step": 1868 }, { "epoch": 4.912557527942143, "high_lr": 1.6842105263157893e-05, "low_lr": 3.368421052631579e-07, "step": 1868 }, { "epoch": 4.912557527942143, "high_lr": 1.6842105263157893e-05, "low_lr": 3.368421052631579e-07, "step": 1868 }, { "epoch": 4.912557527942143, "high_lr": 1.6842105263157893e-05, "low_lr": 3.368421052631579e-07, "step": 1868 }, { "epoch": 4.9151873767258385, "grad_norm": 1.544754981994629, "learning_rate": 1.631578947368421e-05, "loss": 1.1871, "step": 1869 }, { "epoch": 4.9151873767258385, "high_lr": 1.631578947368421e-05, "low_lr": 3.263157894736842e-07, "step": 1869 }, { "epoch": 4.9151873767258385, "high_lr": 1.631578947368421e-05, "low_lr": 3.263157894736842e-07, "step": 1869 }, { "epoch": 4.9151873767258385, "high_lr": 1.631578947368421e-05, "low_lr": 3.263157894736842e-07, "step": 1869 }, { "epoch": 4.9151873767258385, "high_lr": 1.631578947368421e-05, "low_lr": 3.263157894736842e-07, "step": 1869 }, { "epoch": 4.9151873767258385, "high_lr": 1.631578947368421e-05, "low_lr": 3.263157894736842e-07, "step": 1869 }, { "epoch": 4.9151873767258385, "high_lr": 1.631578947368421e-05, "low_lr": 3.263157894736842e-07, "step": 1869 }, { "epoch": 4.9151873767258385, "high_lr": 1.631578947368421e-05, "low_lr": 3.263157894736842e-07, "step": 1869 }, { "epoch": 4.9151873767258385, "high_lr": 1.631578947368421e-05, "low_lr": 3.263157894736842e-07, "step": 1869 }, { "epoch": 4.917817225509533, "grad_norm": 1.4125280380249023, "learning_rate": 1.5789473684210526e-05, "loss": 1.2186, "step": 1870 }, { "epoch": 4.917817225509533, "high_lr": 1.5789473684210526e-05, "low_lr": 3.1578947368421055e-07, "step": 1870 }, { "epoch": 4.917817225509533, "high_lr": 1.5789473684210526e-05, "low_lr": 3.1578947368421055e-07, "step": 1870 }, { "epoch": 4.917817225509533, "high_lr": 1.5789473684210526e-05, "low_lr": 3.1578947368421055e-07, "step": 1870 }, { "epoch": 4.917817225509533, "high_lr": 1.5789473684210526e-05, "low_lr": 3.1578947368421055e-07, "step": 1870 }, { "epoch": 4.917817225509533, "high_lr": 1.5789473684210526e-05, "low_lr": 3.1578947368421055e-07, "step": 1870 }, { "epoch": 4.917817225509533, "high_lr": 1.5789473684210526e-05, "low_lr": 3.1578947368421055e-07, "step": 1870 }, { "epoch": 4.917817225509533, "high_lr": 1.5789473684210526e-05, "low_lr": 3.1578947368421055e-07, "step": 1870 }, { "epoch": 4.917817225509533, "high_lr": 1.5789473684210526e-05, "low_lr": 3.1578947368421055e-07, "step": 1870 }, { "epoch": 4.920447074293228, "grad_norm": 1.5392786264419556, "learning_rate": 1.5263157894736842e-05, "loss": 1.2449, "step": 1871 }, { "epoch": 4.920447074293228, "high_lr": 1.5263157894736842e-05, "low_lr": 3.052631578947369e-07, "step": 1871 }, { "epoch": 4.920447074293228, "high_lr": 1.5263157894736842e-05, "low_lr": 3.052631578947369e-07, "step": 1871 }, { "epoch": 4.920447074293228, "high_lr": 1.5263157894736842e-05, "low_lr": 3.052631578947369e-07, "step": 1871 }, { "epoch": 4.920447074293228, "high_lr": 1.5263157894736842e-05, "low_lr": 3.052631578947369e-07, "step": 1871 }, { "epoch": 4.920447074293228, "high_lr": 1.5263157894736842e-05, "low_lr": 3.052631578947369e-07, "step": 1871 }, { "epoch": 4.920447074293228, "high_lr": 1.5263157894736842e-05, "low_lr": 3.052631578947369e-07, "step": 1871 }, { "epoch": 4.920447074293228, "high_lr": 1.5263157894736842e-05, "low_lr": 3.052631578947369e-07, "step": 1871 }, { "epoch": 4.920447074293228, "high_lr": 1.5263157894736842e-05, "low_lr": 3.052631578947369e-07, "step": 1871 }, { "epoch": 4.923076923076923, "grad_norm": 1.5249948501586914, "learning_rate": 1.4736842105263159e-05, "loss": 1.1833, "step": 1872 }, { "epoch": 4.923076923076923, "high_lr": 1.4736842105263159e-05, "low_lr": 2.9473684210526315e-07, "step": 1872 }, { "epoch": 4.923076923076923, "high_lr": 1.4736842105263159e-05, "low_lr": 2.9473684210526315e-07, "step": 1872 }, { "epoch": 4.923076923076923, "high_lr": 1.4736842105263159e-05, "low_lr": 2.9473684210526315e-07, "step": 1872 }, { "epoch": 4.923076923076923, "high_lr": 1.4736842105263159e-05, "low_lr": 2.9473684210526315e-07, "step": 1872 }, { "epoch": 4.923076923076923, "high_lr": 1.4736842105263159e-05, "low_lr": 2.9473684210526315e-07, "step": 1872 }, { "epoch": 4.923076923076923, "high_lr": 1.4736842105263159e-05, "low_lr": 2.9473684210526315e-07, "step": 1872 }, { "epoch": 4.923076923076923, "high_lr": 1.4736842105263159e-05, "low_lr": 2.9473684210526315e-07, "step": 1872 }, { "epoch": 4.923076923076923, "high_lr": 1.4736842105263159e-05, "low_lr": 2.9473684210526315e-07, "step": 1872 }, { "epoch": 4.925706771860618, "grad_norm": 1.487542986869812, "learning_rate": 1.4210526315789475e-05, "loss": 1.2305, "step": 1873 }, { "epoch": 4.925706771860618, "high_lr": 1.4210526315789475e-05, "low_lr": 2.842105263157895e-07, "step": 1873 }, { "epoch": 4.925706771860618, "high_lr": 1.4210526315789475e-05, "low_lr": 2.842105263157895e-07, "step": 1873 }, { "epoch": 4.925706771860618, "high_lr": 1.4210526315789475e-05, "low_lr": 2.842105263157895e-07, "step": 1873 }, { "epoch": 4.925706771860618, "high_lr": 1.4210526315789475e-05, "low_lr": 2.842105263157895e-07, "step": 1873 }, { "epoch": 4.925706771860618, "high_lr": 1.4210526315789475e-05, "low_lr": 2.842105263157895e-07, "step": 1873 }, { "epoch": 4.925706771860618, "high_lr": 1.4210526315789475e-05, "low_lr": 2.842105263157895e-07, "step": 1873 }, { "epoch": 4.925706771860618, "high_lr": 1.4210526315789475e-05, "low_lr": 2.842105263157895e-07, "step": 1873 }, { "epoch": 4.925706771860618, "high_lr": 1.4210526315789475e-05, "low_lr": 2.842105263157895e-07, "step": 1873 }, { "epoch": 4.928336620644313, "grad_norm": 1.5068485736846924, "learning_rate": 1.368421052631579e-05, "loss": 1.1939, "step": 1874 }, { "epoch": 4.928336620644313, "high_lr": 1.368421052631579e-05, "low_lr": 2.736842105263158e-07, "step": 1874 }, { "epoch": 4.928336620644313, "high_lr": 1.368421052631579e-05, "low_lr": 2.736842105263158e-07, "step": 1874 }, { "epoch": 4.928336620644313, "high_lr": 1.368421052631579e-05, "low_lr": 2.736842105263158e-07, "step": 1874 }, { "epoch": 4.928336620644313, "high_lr": 1.368421052631579e-05, "low_lr": 2.736842105263158e-07, "step": 1874 }, { "epoch": 4.928336620644313, "high_lr": 1.368421052631579e-05, "low_lr": 2.736842105263158e-07, "step": 1874 }, { "epoch": 4.928336620644313, "high_lr": 1.368421052631579e-05, "low_lr": 2.736842105263158e-07, "step": 1874 }, { "epoch": 4.928336620644313, "high_lr": 1.368421052631579e-05, "low_lr": 2.736842105263158e-07, "step": 1874 }, { "epoch": 4.928336620644313, "high_lr": 1.368421052631579e-05, "low_lr": 2.736842105263158e-07, "step": 1874 }, { "epoch": 4.930966469428008, "grad_norm": 1.566163182258606, "learning_rate": 1.3157894736842104e-05, "loss": 1.2882, "step": 1875 }, { "epoch": 4.930966469428008, "high_lr": 1.3157894736842104e-05, "low_lr": 2.6315789473684213e-07, "step": 1875 }, { "epoch": 4.930966469428008, "high_lr": 1.3157894736842104e-05, "low_lr": 2.6315789473684213e-07, "step": 1875 }, { "epoch": 4.930966469428008, "high_lr": 1.3157894736842104e-05, "low_lr": 2.6315789473684213e-07, "step": 1875 }, { "epoch": 4.930966469428008, "high_lr": 1.3157894736842104e-05, "low_lr": 2.6315789473684213e-07, "step": 1875 }, { "epoch": 4.930966469428008, "high_lr": 1.3157894736842104e-05, "low_lr": 2.6315789473684213e-07, "step": 1875 }, { "epoch": 4.930966469428008, "high_lr": 1.3157894736842104e-05, "low_lr": 2.6315789473684213e-07, "step": 1875 }, { "epoch": 4.930966469428008, "high_lr": 1.3157894736842104e-05, "low_lr": 2.6315789473684213e-07, "step": 1875 }, { "epoch": 4.930966469428008, "high_lr": 1.3157894736842104e-05, "low_lr": 2.6315789473684213e-07, "step": 1875 }, { "epoch": 4.933596318211703, "grad_norm": 1.42124342918396, "learning_rate": 1.263157894736842e-05, "loss": 1.2149, "step": 1876 }, { "epoch": 4.933596318211703, "high_lr": 1.263157894736842e-05, "low_lr": 2.5263157894736846e-07, "step": 1876 }, { "epoch": 4.933596318211703, "high_lr": 1.263157894736842e-05, "low_lr": 2.5263157894736846e-07, "step": 1876 }, { "epoch": 4.933596318211703, "high_lr": 1.263157894736842e-05, "low_lr": 2.5263157894736846e-07, "step": 1876 }, { "epoch": 4.933596318211703, "high_lr": 1.263157894736842e-05, "low_lr": 2.5263157894736846e-07, "step": 1876 }, { "epoch": 4.933596318211703, "high_lr": 1.263157894736842e-05, "low_lr": 2.5263157894736846e-07, "step": 1876 }, { "epoch": 4.933596318211703, "high_lr": 1.263157894736842e-05, "low_lr": 2.5263157894736846e-07, "step": 1876 }, { "epoch": 4.933596318211703, "high_lr": 1.263157894736842e-05, "low_lr": 2.5263157894736846e-07, "step": 1876 }, { "epoch": 4.933596318211703, "high_lr": 1.263157894736842e-05, "low_lr": 2.5263157894736846e-07, "step": 1876 }, { "epoch": 4.9362261669953975, "grad_norm": 1.578900933265686, "learning_rate": 1.2105263157894737e-05, "loss": 1.1708, "step": 1877 }, { "epoch": 4.9362261669953975, "high_lr": 1.2105263157894737e-05, "low_lr": 2.4210526315789473e-07, "step": 1877 }, { "epoch": 4.9362261669953975, "high_lr": 1.2105263157894737e-05, "low_lr": 2.4210526315789473e-07, "step": 1877 }, { "epoch": 4.9362261669953975, "high_lr": 1.2105263157894737e-05, "low_lr": 2.4210526315789473e-07, "step": 1877 }, { "epoch": 4.9362261669953975, "high_lr": 1.2105263157894737e-05, "low_lr": 2.4210526315789473e-07, "step": 1877 }, { "epoch": 4.9362261669953975, "high_lr": 1.2105263157894737e-05, "low_lr": 2.4210526315789473e-07, "step": 1877 }, { "epoch": 4.9362261669953975, "high_lr": 1.2105263157894737e-05, "low_lr": 2.4210526315789473e-07, "step": 1877 }, { "epoch": 4.9362261669953975, "high_lr": 1.2105263157894737e-05, "low_lr": 2.4210526315789473e-07, "step": 1877 }, { "epoch": 4.9362261669953975, "high_lr": 1.2105263157894737e-05, "low_lr": 2.4210526315789473e-07, "step": 1877 }, { "epoch": 4.938856015779093, "grad_norm": 1.6655032634735107, "learning_rate": 1.1578947368421053e-05, "loss": 1.1479, "step": 1878 }, { "epoch": 4.938856015779093, "high_lr": 1.1578947368421053e-05, "low_lr": 2.315789473684211e-07, "step": 1878 }, { "epoch": 4.938856015779093, "high_lr": 1.1578947368421053e-05, "low_lr": 2.315789473684211e-07, "step": 1878 }, { "epoch": 4.938856015779093, "high_lr": 1.1578947368421053e-05, "low_lr": 2.315789473684211e-07, "step": 1878 }, { "epoch": 4.938856015779093, "high_lr": 1.1578947368421053e-05, "low_lr": 2.315789473684211e-07, "step": 1878 }, { "epoch": 4.938856015779093, "high_lr": 1.1578947368421053e-05, "low_lr": 2.315789473684211e-07, "step": 1878 }, { "epoch": 4.938856015779093, "high_lr": 1.1578947368421053e-05, "low_lr": 2.315789473684211e-07, "step": 1878 }, { "epoch": 4.938856015779093, "high_lr": 1.1578947368421053e-05, "low_lr": 2.315789473684211e-07, "step": 1878 }, { "epoch": 4.938856015779093, "high_lr": 1.1578947368421053e-05, "low_lr": 2.315789473684211e-07, "step": 1878 }, { "epoch": 4.941485864562788, "grad_norm": 1.6482006311416626, "learning_rate": 1.1052631578947368e-05, "loss": 1.1975, "step": 1879 }, { "epoch": 4.941485864562788, "high_lr": 1.1052631578947368e-05, "low_lr": 2.2105263157894736e-07, "step": 1879 }, { "epoch": 4.941485864562788, "high_lr": 1.1052631578947368e-05, "low_lr": 2.2105263157894736e-07, "step": 1879 }, { "epoch": 4.941485864562788, "high_lr": 1.1052631578947368e-05, "low_lr": 2.2105263157894736e-07, "step": 1879 }, { "epoch": 4.941485864562788, "high_lr": 1.1052631578947368e-05, "low_lr": 2.2105263157894736e-07, "step": 1879 }, { "epoch": 4.941485864562788, "high_lr": 1.1052631578947368e-05, "low_lr": 2.2105263157894736e-07, "step": 1879 }, { "epoch": 4.941485864562788, "high_lr": 1.1052631578947368e-05, "low_lr": 2.2105263157894736e-07, "step": 1879 }, { "epoch": 4.941485864562788, "high_lr": 1.1052631578947368e-05, "low_lr": 2.2105263157894736e-07, "step": 1879 }, { "epoch": 4.941485864562788, "high_lr": 1.1052631578947368e-05, "low_lr": 2.2105263157894736e-07, "step": 1879 }, { "epoch": 4.944115713346482, "grad_norm": 1.5597196817398071, "learning_rate": 1.0526315789473684e-05, "loss": 1.2371, "step": 1880 }, { "epoch": 4.944115713346482, "high_lr": 1.0526315789473684e-05, "low_lr": 2.105263157894737e-07, "step": 1880 }, { "epoch": 4.944115713346482, "high_lr": 1.0526315789473684e-05, "low_lr": 2.105263157894737e-07, "step": 1880 }, { "epoch": 4.944115713346482, "high_lr": 1.0526315789473684e-05, "low_lr": 2.105263157894737e-07, "step": 1880 }, { "epoch": 4.944115713346482, "high_lr": 1.0526315789473684e-05, "low_lr": 2.105263157894737e-07, "step": 1880 }, { "epoch": 4.944115713346482, "high_lr": 1.0526315789473684e-05, "low_lr": 2.105263157894737e-07, "step": 1880 }, { "epoch": 4.944115713346482, "high_lr": 1.0526315789473684e-05, "low_lr": 2.105263157894737e-07, "step": 1880 }, { "epoch": 4.944115713346482, "high_lr": 1.0526315789473684e-05, "low_lr": 2.105263157894737e-07, "step": 1880 }, { "epoch": 4.944115713346482, "high_lr": 1.0526315789473684e-05, "low_lr": 2.105263157894737e-07, "step": 1880 }, { "epoch": 4.946745562130177, "grad_norm": 1.6737630367279053, "learning_rate": 1e-05, "loss": 1.2194, "step": 1881 }, { "epoch": 4.946745562130177, "high_lr": 1e-05, "low_lr": 2.0000000000000002e-07, "step": 1881 }, { "epoch": 4.946745562130177, "high_lr": 1e-05, "low_lr": 2.0000000000000002e-07, "step": 1881 }, { "epoch": 4.946745562130177, "high_lr": 1e-05, "low_lr": 2.0000000000000002e-07, "step": 1881 }, { "epoch": 4.946745562130177, "high_lr": 1e-05, "low_lr": 2.0000000000000002e-07, "step": 1881 }, { "epoch": 4.946745562130177, "high_lr": 1e-05, "low_lr": 2.0000000000000002e-07, "step": 1881 }, { "epoch": 4.946745562130177, "high_lr": 1e-05, "low_lr": 2.0000000000000002e-07, "step": 1881 }, { "epoch": 4.946745562130177, "high_lr": 1e-05, "low_lr": 2.0000000000000002e-07, "step": 1881 }, { "epoch": 4.946745562130177, "high_lr": 1e-05, "low_lr": 2.0000000000000002e-07, "step": 1881 }, { "epoch": 4.949375410913873, "grad_norm": 1.546088695526123, "learning_rate": 9.473684210526317e-06, "loss": 1.2373, "step": 1882 }, { "epoch": 4.949375410913873, "high_lr": 9.473684210526317e-06, "low_lr": 1.8947368421052634e-07, "step": 1882 }, { "epoch": 4.949375410913873, "high_lr": 9.473684210526317e-06, "low_lr": 1.8947368421052634e-07, "step": 1882 }, { "epoch": 4.949375410913873, "high_lr": 9.473684210526317e-06, "low_lr": 1.8947368421052634e-07, "step": 1882 }, { "epoch": 4.949375410913873, "high_lr": 9.473684210526317e-06, "low_lr": 1.8947368421052634e-07, "step": 1882 }, { "epoch": 4.949375410913873, "high_lr": 9.473684210526317e-06, "low_lr": 1.8947368421052634e-07, "step": 1882 }, { "epoch": 4.949375410913873, "high_lr": 9.473684210526317e-06, "low_lr": 1.8947368421052634e-07, "step": 1882 }, { "epoch": 4.949375410913873, "high_lr": 9.473684210526317e-06, "low_lr": 1.8947368421052634e-07, "step": 1882 }, { "epoch": 4.949375410913873, "high_lr": 9.473684210526317e-06, "low_lr": 1.8947368421052634e-07, "step": 1882 }, { "epoch": 4.952005259697567, "grad_norm": 1.5193564891815186, "learning_rate": 8.947368421052632e-06, "loss": 1.2045, "step": 1883 }, { "epoch": 4.952005259697567, "high_lr": 8.947368421052632e-06, "low_lr": 1.7894736842105265e-07, "step": 1883 }, { "epoch": 4.952005259697567, "high_lr": 8.947368421052632e-06, "low_lr": 1.7894736842105265e-07, "step": 1883 }, { "epoch": 4.952005259697567, "high_lr": 8.947368421052632e-06, "low_lr": 1.7894736842105265e-07, "step": 1883 }, { "epoch": 4.952005259697567, "high_lr": 8.947368421052632e-06, "low_lr": 1.7894736842105265e-07, "step": 1883 }, { "epoch": 4.952005259697567, "high_lr": 8.947368421052632e-06, "low_lr": 1.7894736842105265e-07, "step": 1883 }, { "epoch": 4.952005259697567, "high_lr": 8.947368421052632e-06, "low_lr": 1.7894736842105265e-07, "step": 1883 }, { "epoch": 4.952005259697567, "high_lr": 8.947368421052632e-06, "low_lr": 1.7894736842105265e-07, "step": 1883 }, { "epoch": 4.952005259697567, "high_lr": 8.947368421052632e-06, "low_lr": 1.7894736842105265e-07, "step": 1883 }, { "epoch": 4.954635108481263, "grad_norm": 1.4926979541778564, "learning_rate": 8.421052631578947e-06, "loss": 1.1982, "step": 1884 }, { "epoch": 4.954635108481263, "high_lr": 8.421052631578947e-06, "low_lr": 1.6842105263157895e-07, "step": 1884 }, { "epoch": 4.954635108481263, "high_lr": 8.421052631578947e-06, "low_lr": 1.6842105263157895e-07, "step": 1884 }, { "epoch": 4.954635108481263, "high_lr": 8.421052631578947e-06, "low_lr": 1.6842105263157895e-07, "step": 1884 }, { "epoch": 4.954635108481263, "high_lr": 8.421052631578947e-06, "low_lr": 1.6842105263157895e-07, "step": 1884 }, { "epoch": 4.954635108481263, "high_lr": 8.421052631578947e-06, "low_lr": 1.6842105263157895e-07, "step": 1884 }, { "epoch": 4.954635108481263, "high_lr": 8.421052631578947e-06, "low_lr": 1.6842105263157895e-07, "step": 1884 }, { "epoch": 4.954635108481263, "high_lr": 8.421052631578947e-06, "low_lr": 1.6842105263157895e-07, "step": 1884 }, { "epoch": 4.954635108481263, "high_lr": 8.421052631578947e-06, "low_lr": 1.6842105263157895e-07, "step": 1884 }, { "epoch": 4.957264957264957, "grad_norm": 1.6040585041046143, "learning_rate": 7.894736842105263e-06, "loss": 1.1909, "step": 1885 }, { "epoch": 4.957264957264957, "high_lr": 7.894736842105263e-06, "low_lr": 1.5789473684210527e-07, "step": 1885 }, { "epoch": 4.957264957264957, "high_lr": 7.894736842105263e-06, "low_lr": 1.5789473684210527e-07, "step": 1885 }, { "epoch": 4.957264957264957, "high_lr": 7.894736842105263e-06, "low_lr": 1.5789473684210527e-07, "step": 1885 }, { "epoch": 4.957264957264957, "high_lr": 7.894736842105263e-06, "low_lr": 1.5789473684210527e-07, "step": 1885 }, { "epoch": 4.957264957264957, "high_lr": 7.894736842105263e-06, "low_lr": 1.5789473684210527e-07, "step": 1885 }, { "epoch": 4.957264957264957, "high_lr": 7.894736842105263e-06, "low_lr": 1.5789473684210527e-07, "step": 1885 }, { "epoch": 4.957264957264957, "high_lr": 7.894736842105263e-06, "low_lr": 1.5789473684210527e-07, "step": 1885 }, { "epoch": 4.957264957264957, "high_lr": 7.894736842105263e-06, "low_lr": 1.5789473684210527e-07, "step": 1885 }, { "epoch": 4.959894806048652, "grad_norm": 1.4907753467559814, "learning_rate": 7.368421052631579e-06, "loss": 1.1852, "step": 1886 }, { "epoch": 4.959894806048652, "high_lr": 7.368421052631579e-06, "low_lr": 1.4736842105263158e-07, "step": 1886 }, { "epoch": 4.959894806048652, "high_lr": 7.368421052631579e-06, "low_lr": 1.4736842105263158e-07, "step": 1886 }, { "epoch": 4.959894806048652, "high_lr": 7.368421052631579e-06, "low_lr": 1.4736842105263158e-07, "step": 1886 }, { "epoch": 4.959894806048652, "high_lr": 7.368421052631579e-06, "low_lr": 1.4736842105263158e-07, "step": 1886 }, { "epoch": 4.959894806048652, "high_lr": 7.368421052631579e-06, "low_lr": 1.4736842105263158e-07, "step": 1886 }, { "epoch": 4.959894806048652, "high_lr": 7.368421052631579e-06, "low_lr": 1.4736842105263158e-07, "step": 1886 }, { "epoch": 4.959894806048652, "high_lr": 7.368421052631579e-06, "low_lr": 1.4736842105263158e-07, "step": 1886 }, { "epoch": 4.959894806048652, "high_lr": 7.368421052631579e-06, "low_lr": 1.4736842105263158e-07, "step": 1886 }, { "epoch": 4.962524654832347, "grad_norm": 1.4674580097198486, "learning_rate": 6.842105263157895e-06, "loss": 1.2096, "step": 1887 }, { "epoch": 4.962524654832347, "high_lr": 6.842105263157895e-06, "low_lr": 1.368421052631579e-07, "step": 1887 }, { "epoch": 4.962524654832347, "high_lr": 6.842105263157895e-06, "low_lr": 1.368421052631579e-07, "step": 1887 }, { "epoch": 4.962524654832347, "high_lr": 6.842105263157895e-06, "low_lr": 1.368421052631579e-07, "step": 1887 }, { "epoch": 4.962524654832347, "high_lr": 6.842105263157895e-06, "low_lr": 1.368421052631579e-07, "step": 1887 }, { "epoch": 4.962524654832347, "high_lr": 6.842105263157895e-06, "low_lr": 1.368421052631579e-07, "step": 1887 }, { "epoch": 4.962524654832347, "high_lr": 6.842105263157895e-06, "low_lr": 1.368421052631579e-07, "step": 1887 }, { "epoch": 4.962524654832347, "high_lr": 6.842105263157895e-06, "low_lr": 1.368421052631579e-07, "step": 1887 }, { "epoch": 4.962524654832347, "high_lr": 6.842105263157895e-06, "low_lr": 1.368421052631579e-07, "step": 1887 }, { "epoch": 4.965154503616042, "grad_norm": 1.4111533164978027, "learning_rate": 6.31578947368421e-06, "loss": 1.195, "step": 1888 }, { "epoch": 4.965154503616042, "high_lr": 6.31578947368421e-06, "low_lr": 1.2631578947368423e-07, "step": 1888 }, { "epoch": 4.965154503616042, "high_lr": 6.31578947368421e-06, "low_lr": 1.2631578947368423e-07, "step": 1888 }, { "epoch": 4.965154503616042, "high_lr": 6.31578947368421e-06, "low_lr": 1.2631578947368423e-07, "step": 1888 }, { "epoch": 4.965154503616042, "high_lr": 6.31578947368421e-06, "low_lr": 1.2631578947368423e-07, "step": 1888 }, { "epoch": 4.965154503616042, "high_lr": 6.31578947368421e-06, "low_lr": 1.2631578947368423e-07, "step": 1888 }, { "epoch": 4.965154503616042, "high_lr": 6.31578947368421e-06, "low_lr": 1.2631578947368423e-07, "step": 1888 }, { "epoch": 4.965154503616042, "high_lr": 6.31578947368421e-06, "low_lr": 1.2631578947368423e-07, "step": 1888 }, { "epoch": 4.965154503616042, "high_lr": 6.31578947368421e-06, "low_lr": 1.2631578947368423e-07, "step": 1888 }, { "epoch": 4.967784352399737, "grad_norm": 1.5410196781158447, "learning_rate": 5.789473684210527e-06, "loss": 1.2292, "step": 1889 }, { "epoch": 4.967784352399737, "high_lr": 5.789473684210527e-06, "low_lr": 1.1578947368421054e-07, "step": 1889 }, { "epoch": 4.967784352399737, "high_lr": 5.789473684210527e-06, "low_lr": 1.1578947368421054e-07, "step": 1889 }, { "epoch": 4.967784352399737, "high_lr": 5.789473684210527e-06, "low_lr": 1.1578947368421054e-07, "step": 1889 }, { "epoch": 4.967784352399737, "high_lr": 5.789473684210527e-06, "low_lr": 1.1578947368421054e-07, "step": 1889 }, { "epoch": 4.967784352399737, "high_lr": 5.789473684210527e-06, "low_lr": 1.1578947368421054e-07, "step": 1889 }, { "epoch": 4.967784352399737, "high_lr": 5.789473684210527e-06, "low_lr": 1.1578947368421054e-07, "step": 1889 }, { "epoch": 4.967784352399737, "high_lr": 5.789473684210527e-06, "low_lr": 1.1578947368421054e-07, "step": 1889 }, { "epoch": 4.967784352399737, "high_lr": 5.789473684210527e-06, "low_lr": 1.1578947368421054e-07, "step": 1889 }, { "epoch": 4.970414201183432, "grad_norm": 1.4448254108428955, "learning_rate": 5.263157894736842e-06, "loss": 1.2125, "step": 1890 }, { "epoch": 4.970414201183432, "high_lr": 5.263157894736842e-06, "low_lr": 1.0526315789473685e-07, "step": 1890 }, { "epoch": 4.970414201183432, "high_lr": 5.263157894736842e-06, "low_lr": 1.0526315789473685e-07, "step": 1890 }, { "epoch": 4.970414201183432, "high_lr": 5.263157894736842e-06, "low_lr": 1.0526315789473685e-07, "step": 1890 }, { "epoch": 4.970414201183432, "high_lr": 5.263157894736842e-06, "low_lr": 1.0526315789473685e-07, "step": 1890 }, { "epoch": 4.970414201183432, "high_lr": 5.263157894736842e-06, "low_lr": 1.0526315789473685e-07, "step": 1890 }, { "epoch": 4.970414201183432, "high_lr": 5.263157894736842e-06, "low_lr": 1.0526315789473685e-07, "step": 1890 }, { "epoch": 4.970414201183432, "high_lr": 5.263157894736842e-06, "low_lr": 1.0526315789473685e-07, "step": 1890 }, { "epoch": 4.970414201183432, "high_lr": 5.263157894736842e-06, "low_lr": 1.0526315789473685e-07, "step": 1890 }, { "epoch": 4.973044049967127, "grad_norm": 1.7245209217071533, "learning_rate": 4.736842105263159e-06, "loss": 1.2209, "step": 1891 }, { "epoch": 4.973044049967127, "high_lr": 4.736842105263159e-06, "low_lr": 9.473684210526317e-08, "step": 1891 }, { "epoch": 4.973044049967127, "high_lr": 4.736842105263159e-06, "low_lr": 9.473684210526317e-08, "step": 1891 }, { "epoch": 4.973044049967127, "high_lr": 4.736842105263159e-06, "low_lr": 9.473684210526317e-08, "step": 1891 }, { "epoch": 4.973044049967127, "high_lr": 4.736842105263159e-06, "low_lr": 9.473684210526317e-08, "step": 1891 }, { "epoch": 4.973044049967127, "high_lr": 4.736842105263159e-06, "low_lr": 9.473684210526317e-08, "step": 1891 }, { "epoch": 4.973044049967127, "high_lr": 4.736842105263159e-06, "low_lr": 9.473684210526317e-08, "step": 1891 }, { "epoch": 4.973044049967127, "high_lr": 4.736842105263159e-06, "low_lr": 9.473684210526317e-08, "step": 1891 }, { "epoch": 4.973044049967127, "high_lr": 4.736842105263159e-06, "low_lr": 9.473684210526317e-08, "step": 1891 }, { "epoch": 4.975673898750822, "grad_norm": 1.7153047323226929, "learning_rate": 4.210526315789473e-06, "loss": 1.2268, "step": 1892 }, { "epoch": 4.975673898750822, "high_lr": 4.210526315789473e-06, "low_lr": 8.421052631578947e-08, "step": 1892 }, { "epoch": 4.975673898750822, "high_lr": 4.210526315789473e-06, "low_lr": 8.421052631578947e-08, "step": 1892 }, { "epoch": 4.975673898750822, "high_lr": 4.210526315789473e-06, "low_lr": 8.421052631578947e-08, "step": 1892 }, { "epoch": 4.975673898750822, "high_lr": 4.210526315789473e-06, "low_lr": 8.421052631578947e-08, "step": 1892 }, { "epoch": 4.975673898750822, "high_lr": 4.210526315789473e-06, "low_lr": 8.421052631578947e-08, "step": 1892 }, { "epoch": 4.975673898750822, "high_lr": 4.210526315789473e-06, "low_lr": 8.421052631578947e-08, "step": 1892 }, { "epoch": 4.975673898750822, "high_lr": 4.210526315789473e-06, "low_lr": 8.421052631578947e-08, "step": 1892 }, { "epoch": 4.975673898750822, "high_lr": 4.210526315789473e-06, "low_lr": 8.421052631578947e-08, "step": 1892 }, { "epoch": 4.978303747534516, "grad_norm": 1.6368732452392578, "learning_rate": 3.6842105263157896e-06, "loss": 1.192, "step": 1893 }, { "epoch": 4.978303747534516, "high_lr": 3.6842105263157896e-06, "low_lr": 7.368421052631579e-08, "step": 1893 }, { "epoch": 4.978303747534516, "high_lr": 3.6842105263157896e-06, "low_lr": 7.368421052631579e-08, "step": 1893 }, { "epoch": 4.978303747534516, "high_lr": 3.6842105263157896e-06, "low_lr": 7.368421052631579e-08, "step": 1893 }, { "epoch": 4.978303747534516, "high_lr": 3.6842105263157896e-06, "low_lr": 7.368421052631579e-08, "step": 1893 }, { "epoch": 4.978303747534516, "high_lr": 3.6842105263157896e-06, "low_lr": 7.368421052631579e-08, "step": 1893 }, { "epoch": 4.978303747534516, "high_lr": 3.6842105263157896e-06, "low_lr": 7.368421052631579e-08, "step": 1893 }, { "epoch": 4.978303747534516, "high_lr": 3.6842105263157896e-06, "low_lr": 7.368421052631579e-08, "step": 1893 }, { "epoch": 4.978303747534516, "high_lr": 3.6842105263157896e-06, "low_lr": 7.368421052631579e-08, "step": 1893 }, { "epoch": 4.980933596318212, "grad_norm": 1.6589494943618774, "learning_rate": 3.157894736842105e-06, "loss": 1.1895, "step": 1894 }, { "epoch": 4.980933596318212, "high_lr": 3.157894736842105e-06, "low_lr": 6.315789473684211e-08, "step": 1894 }, { "epoch": 4.980933596318212, "high_lr": 3.157894736842105e-06, "low_lr": 6.315789473684211e-08, "step": 1894 }, { "epoch": 4.980933596318212, "high_lr": 3.157894736842105e-06, "low_lr": 6.315789473684211e-08, "step": 1894 }, { "epoch": 4.980933596318212, "high_lr": 3.157894736842105e-06, "low_lr": 6.315789473684211e-08, "step": 1894 }, { "epoch": 4.980933596318212, "high_lr": 3.157894736842105e-06, "low_lr": 6.315789473684211e-08, "step": 1894 }, { "epoch": 4.980933596318212, "high_lr": 3.157894736842105e-06, "low_lr": 6.315789473684211e-08, "step": 1894 }, { "epoch": 4.980933596318212, "high_lr": 3.157894736842105e-06, "low_lr": 6.315789473684211e-08, "step": 1894 }, { "epoch": 4.980933596318212, "high_lr": 3.157894736842105e-06, "low_lr": 6.315789473684211e-08, "step": 1894 }, { "epoch": 4.983563445101907, "grad_norm": 1.4983834028244019, "learning_rate": 2.631578947368421e-06, "loss": 1.2616, "step": 1895 }, { "epoch": 4.983563445101907, "high_lr": 2.631578947368421e-06, "low_lr": 5.263157894736842e-08, "step": 1895 }, { "epoch": 4.983563445101907, "high_lr": 2.631578947368421e-06, "low_lr": 5.263157894736842e-08, "step": 1895 }, { "epoch": 4.983563445101907, "high_lr": 2.631578947368421e-06, "low_lr": 5.263157894736842e-08, "step": 1895 }, { "epoch": 4.983563445101907, "high_lr": 2.631578947368421e-06, "low_lr": 5.263157894736842e-08, "step": 1895 }, { "epoch": 4.983563445101907, "high_lr": 2.631578947368421e-06, "low_lr": 5.263157894736842e-08, "step": 1895 }, { "epoch": 4.983563445101907, "high_lr": 2.631578947368421e-06, "low_lr": 5.263157894736842e-08, "step": 1895 }, { "epoch": 4.983563445101907, "high_lr": 2.631578947368421e-06, "low_lr": 5.263157894736842e-08, "step": 1895 }, { "epoch": 4.983563445101907, "high_lr": 2.631578947368421e-06, "low_lr": 5.263157894736842e-08, "step": 1895 }, { "epoch": 4.986193293885601, "grad_norm": 1.6577376127243042, "learning_rate": 2.1052631578947366e-06, "loss": 1.2307, "step": 1896 }, { "epoch": 4.986193293885601, "high_lr": 2.1052631578947366e-06, "low_lr": 4.2105263157894737e-08, "step": 1896 }, { "epoch": 4.986193293885601, "high_lr": 2.1052631578947366e-06, "low_lr": 4.2105263157894737e-08, "step": 1896 }, { "epoch": 4.986193293885601, "high_lr": 2.1052631578947366e-06, "low_lr": 4.2105263157894737e-08, "step": 1896 }, { "epoch": 4.986193293885601, "high_lr": 2.1052631578947366e-06, "low_lr": 4.2105263157894737e-08, "step": 1896 }, { "epoch": 4.986193293885601, "high_lr": 2.1052631578947366e-06, "low_lr": 4.2105263157894737e-08, "step": 1896 }, { "epoch": 4.986193293885601, "high_lr": 2.1052631578947366e-06, "low_lr": 4.2105263157894737e-08, "step": 1896 }, { "epoch": 4.986193293885601, "high_lr": 2.1052631578947366e-06, "low_lr": 4.2105263157894737e-08, "step": 1896 }, { "epoch": 4.986193293885601, "high_lr": 2.1052631578947366e-06, "low_lr": 4.2105263157894737e-08, "step": 1896 }, { "epoch": 4.988823142669297, "grad_norm": 1.618255853652954, "learning_rate": 1.5789473684210526e-06, "loss": 1.2291, "step": 1897 }, { "epoch": 4.988823142669297, "high_lr": 1.5789473684210526e-06, "low_lr": 3.157894736842106e-08, "step": 1897 }, { "epoch": 4.988823142669297, "high_lr": 1.5789473684210526e-06, "low_lr": 3.157894736842106e-08, "step": 1897 }, { "epoch": 4.988823142669297, "high_lr": 1.5789473684210526e-06, "low_lr": 3.157894736842106e-08, "step": 1897 }, { "epoch": 4.988823142669297, "high_lr": 1.5789473684210526e-06, "low_lr": 3.157894736842106e-08, "step": 1897 }, { "epoch": 4.988823142669297, "high_lr": 1.5789473684210526e-06, "low_lr": 3.157894736842106e-08, "step": 1897 }, { "epoch": 4.988823142669297, "high_lr": 1.5789473684210526e-06, "low_lr": 3.157894736842106e-08, "step": 1897 }, { "epoch": 4.988823142669297, "high_lr": 1.5789473684210526e-06, "low_lr": 3.157894736842106e-08, "step": 1897 }, { "epoch": 4.988823142669297, "high_lr": 1.5789473684210526e-06, "low_lr": 3.157894736842106e-08, "step": 1897 }, { "epoch": 4.9914529914529915, "grad_norm": 1.6011543273925781, "learning_rate": 1.0526315789473683e-06, "loss": 1.1987, "step": 1898 }, { "epoch": 4.9914529914529915, "high_lr": 1.0526315789473683e-06, "low_lr": 2.1052631578947368e-08, "step": 1898 }, { "epoch": 4.9914529914529915, "high_lr": 1.0526315789473683e-06, "low_lr": 2.1052631578947368e-08, "step": 1898 }, { "epoch": 4.9914529914529915, "high_lr": 1.0526315789473683e-06, "low_lr": 2.1052631578947368e-08, "step": 1898 }, { "epoch": 4.9914529914529915, "high_lr": 1.0526315789473683e-06, "low_lr": 2.1052631578947368e-08, "step": 1898 }, { "epoch": 4.9914529914529915, "high_lr": 1.0526315789473683e-06, "low_lr": 2.1052631578947368e-08, "step": 1898 }, { "epoch": 4.9914529914529915, "high_lr": 1.0526315789473683e-06, "low_lr": 2.1052631578947368e-08, "step": 1898 }, { "epoch": 4.9914529914529915, "high_lr": 1.0526315789473683e-06, "low_lr": 2.1052631578947368e-08, "step": 1898 }, { "epoch": 4.9914529914529915, "high_lr": 1.0526315789473683e-06, "low_lr": 2.1052631578947368e-08, "step": 1898 }, { "epoch": 4.994082840236686, "grad_norm": 1.5550873279571533, "learning_rate": 5.263157894736842e-07, "loss": 1.251, "step": 1899 }, { "epoch": 4.994082840236686, "high_lr": 5.263157894736842e-07, "low_lr": 1.0526315789473684e-08, "step": 1899 }, { "epoch": 4.994082840236686, "high_lr": 5.263157894736842e-07, "low_lr": 1.0526315789473684e-08, "step": 1899 }, { "epoch": 4.994082840236686, "high_lr": 5.263157894736842e-07, "low_lr": 1.0526315789473684e-08, "step": 1899 }, { "epoch": 4.994082840236686, "high_lr": 5.263157894736842e-07, "low_lr": 1.0526315789473684e-08, "step": 1899 }, { "epoch": 4.994082840236686, "high_lr": 5.263157894736842e-07, "low_lr": 1.0526315789473684e-08, "step": 1899 }, { "epoch": 4.994082840236686, "high_lr": 5.263157894736842e-07, "low_lr": 1.0526315789473684e-08, "step": 1899 }, { "epoch": 4.994082840236686, "high_lr": 5.263157894736842e-07, "low_lr": 1.0526315789473684e-08, "step": 1899 }, { "epoch": 4.994082840236686, "high_lr": 5.263157894736842e-07, "low_lr": 1.0526315789473684e-08, "step": 1899 }, { "epoch": 4.996712689020382, "grad_norm": 1.5914562940597534, "learning_rate": 0.0, "loss": 1.2178, "step": 1900 }, { "epoch": 4.996712689020382, "step": 1900, "total_flos": 1.1194761013985542e+19, "train_loss": 1.445524227619171, "train_runtime": 18692.4736, "train_samples_per_second": 6.509, "train_steps_per_second": 0.102 } ], "logging_steps": 1.0, "max_steps": 1900, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1194761013985542e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }