diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1dd7cec1482b2bd1555c904970aaa136a44023a3 --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,36 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "/home/models/llava_model/llava-qwen-2-7b-clip_original/", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 64, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "multi_modal_projector" + ], + "peft_type": "LORA", + "r": 32, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "o_proj", + "down_proj", + "gate_proj", + "k_proj", + "v_proj", + "up_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d398b85e935b8c6d160f9ff4cfcdb35f67b141b1 --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9818e4835cef3b133c30dcdc81884f4815ab626ba82fa1a10886c1dceebcd08c +size 204045488 diff --git a/images/llava-qwen2-lora-01.JPG b/images/llava-qwen2-lora-01.JPG new file mode 100644 index 0000000000000000000000000000000000000000..70bf15f94a08dee71739fa83176caa6e9c67cb1f Binary files /dev/null and b/images/llava-qwen2-lora-01.JPG differ diff --git a/images/llava-qwen2-lora-02.JPG b/images/llava-qwen2-lora-02.JPG new file mode 100644 index 0000000000000000000000000000000000000000..e2ef0c09c96c99a0c247886c9b0b1911138e2ca4 Binary files /dev/null and b/images/llava-qwen2-lora-02.JPG differ diff --git a/images/llava-qwen2-lora-03.JPG b/images/llava-qwen2-lora-03.JPG new file mode 100644 index 0000000000000000000000000000000000000000..d4c42e29c27dd4d57872214478fce1c2bd5ce228 Binary files /dev/null and b/images/llava-qwen2-lora-03.JPG differ diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f863b17b0c1acefe5533c905be8432bf7e1cb95d --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,104542 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.996712689020382, + "eval_steps": 500, + "global_step": 1900, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0, + "high_lr": 0.001, + "low_lr": 2e-05, + "step": 0 + }, + { + "epoch": 0, + "high_lr": 0.001, + "low_lr": 2e-05, + "step": 0 + }, + { + "epoch": 0, + "high_lr": 0.001, + "low_lr": 2e-05, + "step": 0 + }, + { + "epoch": 0, + "high_lr": 0.001, + "low_lr": 2e-05, + "step": 0 + }, + { + "epoch": 0, + "high_lr": 0.001, + "low_lr": 2e-05, + "step": 0 + }, + { + "epoch": 0, + "high_lr": 0.001, + "low_lr": 2e-05, + "step": 0 + }, + { + "epoch": 0, + "high_lr": 0.001, + "low_lr": 2e-05, + "step": 0 + }, + { + "epoch": 0, + "high_lr": 0.001, + "low_lr": 2e-05, + "step": 0 + }, + { + "epoch": 0.0026298487836949377, + "grad_norm": 2.205960988998413, + "learning_rate": 0.0009994736842105264, + "loss": 2.5661, + "step": 1 + }, + { + "epoch": 0.0026298487836949377, + "high_lr": 0.0009994736842105264, + "low_lr": 1.9989473684210526e-05, + "step": 1 + }, + { + "epoch": 0.0026298487836949377, + "high_lr": 0.0009994736842105264, + "low_lr": 1.9989473684210526e-05, + "step": 1 + }, + { + "epoch": 0.0026298487836949377, + "high_lr": 0.0009994736842105264, + "low_lr": 1.9989473684210526e-05, + "step": 1 + }, + { + "epoch": 0.0026298487836949377, + "high_lr": 0.0009994736842105264, + "low_lr": 1.9989473684210526e-05, + "step": 1 + }, + { + "epoch": 0.0026298487836949377, + "high_lr": 0.0009994736842105264, + "low_lr": 1.9989473684210526e-05, + "step": 1 + }, + { + "epoch": 0.0026298487836949377, + "high_lr": 0.0009994736842105264, + "low_lr": 1.9989473684210526e-05, + "step": 1 + }, + { + "epoch": 0.0026298487836949377, + "high_lr": 0.0009994736842105264, + "low_lr": 1.9989473684210526e-05, + "step": 1 + }, + { + "epoch": 0.0026298487836949377, + "high_lr": 0.0009994736842105264, + "low_lr": 1.9989473684210526e-05, + "step": 1 + }, + { + "epoch": 0.005259697567389875, + "grad_norm": 1.3347679376602173, + "learning_rate": 0.0009989473684210526, + "loss": 2.5835, + "step": 2 + }, + { + "epoch": 0.005259697567389875, + "high_lr": 0.0009989473684210526, + "low_lr": 1.9978947368421054e-05, + "step": 2 + }, + { + "epoch": 0.005259697567389875, + "high_lr": 0.0009989473684210526, + "low_lr": 1.9978947368421054e-05, + "step": 2 + }, + { + "epoch": 0.005259697567389875, + "high_lr": 0.0009989473684210526, + "low_lr": 1.9978947368421054e-05, + "step": 2 + }, + { + "epoch": 0.005259697567389875, + "high_lr": 0.0009989473684210526, + "low_lr": 1.9978947368421054e-05, + "step": 2 + }, + { + "epoch": 0.005259697567389875, + "high_lr": 0.0009989473684210526, + "low_lr": 1.9978947368421054e-05, + "step": 2 + }, + { + "epoch": 0.005259697567389875, + "high_lr": 0.0009989473684210526, + "low_lr": 1.9978947368421054e-05, + "step": 2 + }, + { + "epoch": 0.005259697567389875, + "high_lr": 0.0009989473684210526, + "low_lr": 1.9978947368421054e-05, + "step": 2 + }, + { + "epoch": 0.005259697567389875, + "high_lr": 0.0009989473684210526, + "low_lr": 1.9978947368421054e-05, + "step": 2 + }, + { + "epoch": 0.007889546351084813, + "grad_norm": 1.0948487520217896, + "learning_rate": 0.000998421052631579, + "loss": 2.5211, + "step": 3 + }, + { + "epoch": 0.007889546351084813, + "high_lr": 0.000998421052631579, + "low_lr": 1.9968421052631582e-05, + "step": 3 + }, + { + "epoch": 0.007889546351084813, + "high_lr": 0.000998421052631579, + "low_lr": 1.9968421052631582e-05, + "step": 3 + }, + { + "epoch": 0.007889546351084813, + "high_lr": 0.000998421052631579, + "low_lr": 1.9968421052631582e-05, + "step": 3 + }, + { + "epoch": 0.007889546351084813, + "high_lr": 0.000998421052631579, + "low_lr": 1.9968421052631582e-05, + "step": 3 + }, + { + "epoch": 0.007889546351084813, + "high_lr": 0.000998421052631579, + "low_lr": 1.9968421052631582e-05, + "step": 3 + }, + { + "epoch": 0.007889546351084813, + "high_lr": 0.000998421052631579, + "low_lr": 1.9968421052631582e-05, + "step": 3 + }, + { + "epoch": 0.007889546351084813, + "high_lr": 0.000998421052631579, + "low_lr": 1.9968421052631582e-05, + "step": 3 + }, + { + "epoch": 0.007889546351084813, + "high_lr": 0.000998421052631579, + "low_lr": 1.9968421052631582e-05, + "step": 3 + }, + { + "epoch": 0.01051939513477975, + "grad_norm": 0.8912683725357056, + "learning_rate": 0.0009978947368421054, + "loss": 2.4531, + "step": 4 + }, + { + "epoch": 0.01051939513477975, + "high_lr": 0.0009978947368421054, + "low_lr": 1.9957894736842107e-05, + "step": 4 + }, + { + "epoch": 0.01051939513477975, + "high_lr": 0.0009978947368421054, + "low_lr": 1.9957894736842107e-05, + "step": 4 + }, + { + "epoch": 0.01051939513477975, + "high_lr": 0.0009978947368421054, + "low_lr": 1.9957894736842107e-05, + "step": 4 + }, + { + "epoch": 0.01051939513477975, + "high_lr": 0.0009978947368421054, + "low_lr": 1.9957894736842107e-05, + "step": 4 + }, + { + "epoch": 0.01051939513477975, + "high_lr": 0.0009978947368421054, + "low_lr": 1.9957894736842107e-05, + "step": 4 + }, + { + "epoch": 0.01051939513477975, + "high_lr": 0.0009978947368421054, + "low_lr": 1.9957894736842107e-05, + "step": 4 + }, + { + "epoch": 0.01051939513477975, + "high_lr": 0.0009978947368421054, + "low_lr": 1.9957894736842107e-05, + "step": 4 + }, + { + "epoch": 0.01051939513477975, + "high_lr": 0.0009978947368421054, + "low_lr": 1.9957894736842107e-05, + "step": 4 + }, + { + "epoch": 0.013149243918474688, + "grad_norm": 0.7557066082954407, + "learning_rate": 0.0009973684210526316, + "loss": 2.3694, + "step": 5 + }, + { + "epoch": 0.013149243918474688, + "high_lr": 0.0009973684210526316, + "low_lr": 1.994736842105263e-05, + "step": 5 + }, + { + "epoch": 0.013149243918474688, + "high_lr": 0.0009973684210526316, + "low_lr": 1.994736842105263e-05, + "step": 5 + }, + { + "epoch": 0.013149243918474688, + "high_lr": 0.0009973684210526316, + "low_lr": 1.994736842105263e-05, + "step": 5 + }, + { + "epoch": 0.013149243918474688, + "high_lr": 0.0009973684210526316, + "low_lr": 1.994736842105263e-05, + "step": 5 + }, + { + "epoch": 0.013149243918474688, + "high_lr": 0.0009973684210526316, + "low_lr": 1.994736842105263e-05, + "step": 5 + }, + { + "epoch": 0.013149243918474688, + "high_lr": 0.0009973684210526316, + "low_lr": 1.994736842105263e-05, + "step": 5 + }, + { + "epoch": 0.013149243918474688, + "high_lr": 0.0009973684210526316, + "low_lr": 1.994736842105263e-05, + "step": 5 + }, + { + "epoch": 0.013149243918474688, + "high_lr": 0.0009973684210526316, + "low_lr": 1.994736842105263e-05, + "step": 5 + }, + { + "epoch": 0.015779092702169626, + "grad_norm": 0.6246759295463562, + "learning_rate": 0.000996842105263158, + "loss": 2.3495, + "step": 6 + }, + { + "epoch": 0.015779092702169626, + "high_lr": 0.000996842105263158, + "low_lr": 1.993684210526316e-05, + "step": 6 + }, + { + "epoch": 0.015779092702169626, + "high_lr": 0.000996842105263158, + "low_lr": 1.993684210526316e-05, + "step": 6 + }, + { + "epoch": 0.015779092702169626, + "high_lr": 0.000996842105263158, + "low_lr": 1.993684210526316e-05, + "step": 6 + }, + { + "epoch": 0.015779092702169626, + "high_lr": 0.000996842105263158, + "low_lr": 1.993684210526316e-05, + "step": 6 + }, + { + "epoch": 0.015779092702169626, + "high_lr": 0.000996842105263158, + "low_lr": 1.993684210526316e-05, + "step": 6 + }, + { + "epoch": 0.015779092702169626, + "high_lr": 0.000996842105263158, + "low_lr": 1.993684210526316e-05, + "step": 6 + }, + { + "epoch": 0.015779092702169626, + "high_lr": 0.000996842105263158, + "low_lr": 1.993684210526316e-05, + "step": 6 + }, + { + "epoch": 0.015779092702169626, + "high_lr": 0.000996842105263158, + "low_lr": 1.993684210526316e-05, + "step": 6 + }, + { + "epoch": 0.018408941485864562, + "grad_norm": 0.5611429214477539, + "learning_rate": 0.0009963157894736843, + "loss": 2.368, + "step": 7 + }, + { + "epoch": 0.018408941485864562, + "high_lr": 0.0009963157894736843, + "low_lr": 1.9926315789473688e-05, + "step": 7 + }, + { + "epoch": 0.018408941485864562, + "high_lr": 0.0009963157894736843, + "low_lr": 1.9926315789473688e-05, + "step": 7 + }, + { + "epoch": 0.018408941485864562, + "high_lr": 0.0009963157894736843, + "low_lr": 1.9926315789473688e-05, + "step": 7 + }, + { + "epoch": 0.018408941485864562, + "high_lr": 0.0009963157894736843, + "low_lr": 1.9926315789473688e-05, + "step": 7 + }, + { + "epoch": 0.018408941485864562, + "high_lr": 0.0009963157894736843, + "low_lr": 1.9926315789473688e-05, + "step": 7 + }, + { + "epoch": 0.018408941485864562, + "high_lr": 0.0009963157894736843, + "low_lr": 1.9926315789473688e-05, + "step": 7 + }, + { + "epoch": 0.018408941485864562, + "high_lr": 0.0009963157894736843, + "low_lr": 1.9926315789473688e-05, + "step": 7 + }, + { + "epoch": 0.018408941485864562, + "high_lr": 0.0009963157894736843, + "low_lr": 1.9926315789473688e-05, + "step": 7 + }, + { + "epoch": 0.0210387902695595, + "grad_norm": 0.5358776450157166, + "learning_rate": 0.0009957894736842105, + "loss": 2.326, + "step": 8 + }, + { + "epoch": 0.0210387902695595, + "high_lr": 0.0009957894736842105, + "low_lr": 1.9915789473684212e-05, + "step": 8 + }, + { + "epoch": 0.0210387902695595, + "high_lr": 0.0009957894736842105, + "low_lr": 1.9915789473684212e-05, + "step": 8 + }, + { + "epoch": 0.0210387902695595, + "high_lr": 0.0009957894736842105, + "low_lr": 1.9915789473684212e-05, + "step": 8 + }, + { + "epoch": 0.0210387902695595, + "high_lr": 0.0009957894736842105, + "low_lr": 1.9915789473684212e-05, + "step": 8 + }, + { + "epoch": 0.0210387902695595, + "high_lr": 0.0009957894736842105, + "low_lr": 1.9915789473684212e-05, + "step": 8 + }, + { + "epoch": 0.0210387902695595, + "high_lr": 0.0009957894736842105, + "low_lr": 1.9915789473684212e-05, + "step": 8 + }, + { + "epoch": 0.0210387902695595, + "high_lr": 0.0009957894736842105, + "low_lr": 1.9915789473684212e-05, + "step": 8 + }, + { + "epoch": 0.0210387902695595, + "high_lr": 0.0009957894736842105, + "low_lr": 1.9915789473684212e-05, + "step": 8 + }, + { + "epoch": 0.023668639053254437, + "grad_norm": 0.46877679228782654, + "learning_rate": 0.000995263157894737, + "loss": 2.2725, + "step": 9 + }, + { + "epoch": 0.023668639053254437, + "high_lr": 0.000995263157894737, + "low_lr": 1.990526315789474e-05, + "step": 9 + }, + { + "epoch": 0.023668639053254437, + "high_lr": 0.000995263157894737, + "low_lr": 1.990526315789474e-05, + "step": 9 + }, + { + "epoch": 0.023668639053254437, + "high_lr": 0.000995263157894737, + "low_lr": 1.990526315789474e-05, + "step": 9 + }, + { + "epoch": 0.023668639053254437, + "high_lr": 0.000995263157894737, + "low_lr": 1.990526315789474e-05, + "step": 9 + }, + { + "epoch": 0.023668639053254437, + "high_lr": 0.000995263157894737, + "low_lr": 1.990526315789474e-05, + "step": 9 + }, + { + "epoch": 0.023668639053254437, + "high_lr": 0.000995263157894737, + "low_lr": 1.990526315789474e-05, + "step": 9 + }, + { + "epoch": 0.023668639053254437, + "high_lr": 0.000995263157894737, + "low_lr": 1.990526315789474e-05, + "step": 9 + }, + { + "epoch": 0.023668639053254437, + "high_lr": 0.000995263157894737, + "low_lr": 1.990526315789474e-05, + "step": 9 + }, + { + "epoch": 0.026298487836949377, + "grad_norm": 0.4279167056083679, + "learning_rate": 0.000994736842105263, + "loss": 2.3037, + "step": 10 + }, + { + "epoch": 0.026298487836949377, + "high_lr": 0.000994736842105263, + "low_lr": 1.9894736842105265e-05, + "step": 10 + }, + { + "epoch": 0.026298487836949377, + "high_lr": 0.000994736842105263, + "low_lr": 1.9894736842105265e-05, + "step": 10 + }, + { + "epoch": 0.026298487836949377, + "high_lr": 0.000994736842105263, + "low_lr": 1.9894736842105265e-05, + "step": 10 + }, + { + "epoch": 0.026298487836949377, + "high_lr": 0.000994736842105263, + "low_lr": 1.9894736842105265e-05, + "step": 10 + }, + { + "epoch": 0.026298487836949377, + "high_lr": 0.000994736842105263, + "low_lr": 1.9894736842105265e-05, + "step": 10 + }, + { + "epoch": 0.026298487836949377, + "high_lr": 0.000994736842105263, + "low_lr": 1.9894736842105265e-05, + "step": 10 + }, + { + "epoch": 0.026298487836949377, + "high_lr": 0.000994736842105263, + "low_lr": 1.9894736842105265e-05, + "step": 10 + }, + { + "epoch": 0.026298487836949377, + "high_lr": 0.000994736842105263, + "low_lr": 1.9894736842105265e-05, + "step": 10 + }, + { + "epoch": 0.028928336620644313, + "grad_norm": 0.4076891541481018, + "learning_rate": 0.0009942105263157895, + "loss": 2.2379, + "step": 11 + }, + { + "epoch": 0.028928336620644313, + "high_lr": 0.0009942105263157895, + "low_lr": 1.988421052631579e-05, + "step": 11 + }, + { + "epoch": 0.028928336620644313, + "high_lr": 0.0009942105263157895, + "low_lr": 1.988421052631579e-05, + "step": 11 + }, + { + "epoch": 0.028928336620644313, + "high_lr": 0.0009942105263157895, + "low_lr": 1.988421052631579e-05, + "step": 11 + }, + { + "epoch": 0.028928336620644313, + "high_lr": 0.0009942105263157895, + "low_lr": 1.988421052631579e-05, + "step": 11 + }, + { + "epoch": 0.028928336620644313, + "high_lr": 0.0009942105263157895, + "low_lr": 1.988421052631579e-05, + "step": 11 + }, + { + "epoch": 0.028928336620644313, + "high_lr": 0.0009942105263157895, + "low_lr": 1.988421052631579e-05, + "step": 11 + }, + { + "epoch": 0.028928336620644313, + "high_lr": 0.0009942105263157895, + "low_lr": 1.988421052631579e-05, + "step": 11 + }, + { + "epoch": 0.028928336620644313, + "high_lr": 0.0009942105263157895, + "low_lr": 1.988421052631579e-05, + "step": 11 + }, + { + "epoch": 0.03155818540433925, + "grad_norm": 0.4081032872200012, + "learning_rate": 0.0009936842105263159, + "loss": 2.1953, + "step": 12 + }, + { + "epoch": 0.03155818540433925, + "high_lr": 0.0009936842105263159, + "low_lr": 1.9873684210526318e-05, + "step": 12 + }, + { + "epoch": 0.03155818540433925, + "high_lr": 0.0009936842105263159, + "low_lr": 1.9873684210526318e-05, + "step": 12 + }, + { + "epoch": 0.03155818540433925, + "high_lr": 0.0009936842105263159, + "low_lr": 1.9873684210526318e-05, + "step": 12 + }, + { + "epoch": 0.03155818540433925, + "high_lr": 0.0009936842105263159, + "low_lr": 1.9873684210526318e-05, + "step": 12 + }, + { + "epoch": 0.03155818540433925, + "high_lr": 0.0009936842105263159, + "low_lr": 1.9873684210526318e-05, + "step": 12 + }, + { + "epoch": 0.03155818540433925, + "high_lr": 0.0009936842105263159, + "low_lr": 1.9873684210526318e-05, + "step": 12 + }, + { + "epoch": 0.03155818540433925, + "high_lr": 0.0009936842105263159, + "low_lr": 1.9873684210526318e-05, + "step": 12 + }, + { + "epoch": 0.03155818540433925, + "high_lr": 0.0009936842105263159, + "low_lr": 1.9873684210526318e-05, + "step": 12 + }, + { + "epoch": 0.03418803418803419, + "grad_norm": 0.5867348909378052, + "learning_rate": 0.000993157894736842, + "loss": 2.2433, + "step": 13 + }, + { + "epoch": 0.03418803418803419, + "high_lr": 0.000993157894736842, + "low_lr": 1.9863157894736846e-05, + "step": 13 + }, + { + "epoch": 0.03418803418803419, + "high_lr": 0.000993157894736842, + "low_lr": 1.9863157894736846e-05, + "step": 13 + }, + { + "epoch": 0.03418803418803419, + "high_lr": 0.000993157894736842, + "low_lr": 1.9863157894736846e-05, + "step": 13 + }, + { + "epoch": 0.03418803418803419, + "high_lr": 0.000993157894736842, + "low_lr": 1.9863157894736846e-05, + "step": 13 + }, + { + "epoch": 0.03418803418803419, + "high_lr": 0.000993157894736842, + "low_lr": 1.9863157894736846e-05, + "step": 13 + }, + { + "epoch": 0.03418803418803419, + "high_lr": 0.000993157894736842, + "low_lr": 1.9863157894736846e-05, + "step": 13 + }, + { + "epoch": 0.03418803418803419, + "high_lr": 0.000993157894736842, + "low_lr": 1.9863157894736846e-05, + "step": 13 + }, + { + "epoch": 0.03418803418803419, + "high_lr": 0.000993157894736842, + "low_lr": 1.9863157894736846e-05, + "step": 13 + }, + { + "epoch": 0.036817882971729124, + "grad_norm": 0.453240305185318, + "learning_rate": 0.0009926315789473685, + "loss": 2.1978, + "step": 14 + }, + { + "epoch": 0.036817882971729124, + "high_lr": 0.0009926315789473685, + "low_lr": 1.985263157894737e-05, + "step": 14 + }, + { + "epoch": 0.036817882971729124, + "high_lr": 0.0009926315789473685, + "low_lr": 1.985263157894737e-05, + "step": 14 + }, + { + "epoch": 0.036817882971729124, + "high_lr": 0.0009926315789473685, + "low_lr": 1.985263157894737e-05, + "step": 14 + }, + { + "epoch": 0.036817882971729124, + "high_lr": 0.0009926315789473685, + "low_lr": 1.985263157894737e-05, + "step": 14 + }, + { + "epoch": 0.036817882971729124, + "high_lr": 0.0009926315789473685, + "low_lr": 1.985263157894737e-05, + "step": 14 + }, + { + "epoch": 0.036817882971729124, + "high_lr": 0.0009926315789473685, + "low_lr": 1.985263157894737e-05, + "step": 14 + }, + { + "epoch": 0.036817882971729124, + "high_lr": 0.0009926315789473685, + "low_lr": 1.985263157894737e-05, + "step": 14 + }, + { + "epoch": 0.036817882971729124, + "high_lr": 0.0009926315789473685, + "low_lr": 1.985263157894737e-05, + "step": 14 + }, + { + "epoch": 0.03944773175542406, + "grad_norm": 0.4354722797870636, + "learning_rate": 0.0009921052631578946, + "loss": 2.2465, + "step": 15 + }, + { + "epoch": 0.03944773175542406, + "high_lr": 0.0009921052631578946, + "low_lr": 1.9842105263157895e-05, + "step": 15 + }, + { + "epoch": 0.03944773175542406, + "high_lr": 0.0009921052631578946, + "low_lr": 1.9842105263157895e-05, + "step": 15 + }, + { + "epoch": 0.03944773175542406, + "high_lr": 0.0009921052631578946, + "low_lr": 1.9842105263157895e-05, + "step": 15 + }, + { + "epoch": 0.03944773175542406, + "high_lr": 0.0009921052631578946, + "low_lr": 1.9842105263157895e-05, + "step": 15 + }, + { + "epoch": 0.03944773175542406, + "high_lr": 0.0009921052631578946, + "low_lr": 1.9842105263157895e-05, + "step": 15 + }, + { + "epoch": 0.03944773175542406, + "high_lr": 0.0009921052631578946, + "low_lr": 1.9842105263157895e-05, + "step": 15 + }, + { + "epoch": 0.03944773175542406, + "high_lr": 0.0009921052631578946, + "low_lr": 1.9842105263157895e-05, + "step": 15 + }, + { + "epoch": 0.03944773175542406, + "high_lr": 0.0009921052631578946, + "low_lr": 1.9842105263157895e-05, + "step": 15 + }, + { + "epoch": 0.042077580539119, + "grad_norm": 0.4762808084487915, + "learning_rate": 0.000991578947368421, + "loss": 2.2044, + "step": 16 + }, + { + "epoch": 0.042077580539119, + "high_lr": 0.000991578947368421, + "low_lr": 1.9831578947368423e-05, + "step": 16 + }, + { + "epoch": 0.042077580539119, + "high_lr": 0.000991578947368421, + "low_lr": 1.9831578947368423e-05, + "step": 16 + }, + { + "epoch": 0.042077580539119, + "high_lr": 0.000991578947368421, + "low_lr": 1.9831578947368423e-05, + "step": 16 + }, + { + "epoch": 0.042077580539119, + "high_lr": 0.000991578947368421, + "low_lr": 1.9831578947368423e-05, + "step": 16 + }, + { + "epoch": 0.042077580539119, + "high_lr": 0.000991578947368421, + "low_lr": 1.9831578947368423e-05, + "step": 16 + }, + { + "epoch": 0.042077580539119, + "high_lr": 0.000991578947368421, + "low_lr": 1.9831578947368423e-05, + "step": 16 + }, + { + "epoch": 0.042077580539119, + "high_lr": 0.000991578947368421, + "low_lr": 1.9831578947368423e-05, + "step": 16 + }, + { + "epoch": 0.042077580539119, + "high_lr": 0.000991578947368421, + "low_lr": 1.9831578947368423e-05, + "step": 16 + }, + { + "epoch": 0.044707429322813935, + "grad_norm": 0.5493490099906921, + "learning_rate": 0.0009910526315789474, + "loss": 2.1654, + "step": 17 + }, + { + "epoch": 0.044707429322813935, + "high_lr": 0.0009910526315789474, + "low_lr": 1.982105263157895e-05, + "step": 17 + }, + { + "epoch": 0.044707429322813935, + "high_lr": 0.0009910526315789474, + "low_lr": 1.982105263157895e-05, + "step": 17 + }, + { + "epoch": 0.044707429322813935, + "high_lr": 0.0009910526315789474, + "low_lr": 1.982105263157895e-05, + "step": 17 + }, + { + "epoch": 0.044707429322813935, + "high_lr": 0.0009910526315789474, + "low_lr": 1.982105263157895e-05, + "step": 17 + }, + { + "epoch": 0.044707429322813935, + "high_lr": 0.0009910526315789474, + "low_lr": 1.982105263157895e-05, + "step": 17 + }, + { + "epoch": 0.044707429322813935, + "high_lr": 0.0009910526315789474, + "low_lr": 1.982105263157895e-05, + "step": 17 + }, + { + "epoch": 0.044707429322813935, + "high_lr": 0.0009910526315789474, + "low_lr": 1.982105263157895e-05, + "step": 17 + }, + { + "epoch": 0.044707429322813935, + "high_lr": 0.0009910526315789474, + "low_lr": 1.982105263157895e-05, + "step": 17 + }, + { + "epoch": 0.047337278106508875, + "grad_norm": 0.43686118721961975, + "learning_rate": 0.0009905263157894738, + "loss": 2.1749, + "step": 18 + }, + { + "epoch": 0.047337278106508875, + "high_lr": 0.0009905263157894738, + "low_lr": 1.9810526315789476e-05, + "step": 18 + }, + { + "epoch": 0.047337278106508875, + "high_lr": 0.0009905263157894738, + "low_lr": 1.9810526315789476e-05, + "step": 18 + }, + { + "epoch": 0.047337278106508875, + "high_lr": 0.0009905263157894738, + "low_lr": 1.9810526315789476e-05, + "step": 18 + }, + { + "epoch": 0.047337278106508875, + "high_lr": 0.0009905263157894738, + "low_lr": 1.9810526315789476e-05, + "step": 18 + }, + { + "epoch": 0.047337278106508875, + "high_lr": 0.0009905263157894738, + "low_lr": 1.9810526315789476e-05, + "step": 18 + }, + { + "epoch": 0.047337278106508875, + "high_lr": 0.0009905263157894738, + "low_lr": 1.9810526315789476e-05, + "step": 18 + }, + { + "epoch": 0.047337278106508875, + "high_lr": 0.0009905263157894738, + "low_lr": 1.9810526315789476e-05, + "step": 18 + }, + { + "epoch": 0.047337278106508875, + "high_lr": 0.0009905263157894738, + "low_lr": 1.9810526315789476e-05, + "step": 18 + }, + { + "epoch": 0.049967126890203814, + "grad_norm": 0.5231380462646484, + "learning_rate": 0.00099, + "loss": 2.1947, + "step": 19 + }, + { + "epoch": 0.049967126890203814, + "high_lr": 0.00099, + "low_lr": 1.98e-05, + "step": 19 + }, + { + "epoch": 0.049967126890203814, + "high_lr": 0.00099, + "low_lr": 1.98e-05, + "step": 19 + }, + { + "epoch": 0.049967126890203814, + "high_lr": 0.00099, + "low_lr": 1.98e-05, + "step": 19 + }, + { + "epoch": 0.049967126890203814, + "high_lr": 0.00099, + "low_lr": 1.98e-05, + "step": 19 + }, + { + "epoch": 0.049967126890203814, + "high_lr": 0.00099, + "low_lr": 1.98e-05, + "step": 19 + }, + { + "epoch": 0.049967126890203814, + "high_lr": 0.00099, + "low_lr": 1.98e-05, + "step": 19 + }, + { + "epoch": 0.049967126890203814, + "high_lr": 0.00099, + "low_lr": 1.98e-05, + "step": 19 + }, + { + "epoch": 0.049967126890203814, + "high_lr": 0.00099, + "low_lr": 1.98e-05, + "step": 19 + }, + { + "epoch": 0.05259697567389875, + "grad_norm": 0.5058587789535522, + "learning_rate": 0.0009894736842105264, + "loss": 2.1794, + "step": 20 + }, + { + "epoch": 0.05259697567389875, + "high_lr": 0.0009894736842105264, + "low_lr": 1.9789473684210528e-05, + "step": 20 + }, + { + "epoch": 0.05259697567389875, + "high_lr": 0.0009894736842105264, + "low_lr": 1.9789473684210528e-05, + "step": 20 + }, + { + "epoch": 0.05259697567389875, + "high_lr": 0.0009894736842105264, + "low_lr": 1.9789473684210528e-05, + "step": 20 + }, + { + "epoch": 0.05259697567389875, + "high_lr": 0.0009894736842105264, + "low_lr": 1.9789473684210528e-05, + "step": 20 + }, + { + "epoch": 0.05259697567389875, + "high_lr": 0.0009894736842105264, + "low_lr": 1.9789473684210528e-05, + "step": 20 + }, + { + "epoch": 0.05259697567389875, + "high_lr": 0.0009894736842105264, + "low_lr": 1.9789473684210528e-05, + "step": 20 + }, + { + "epoch": 0.05259697567389875, + "high_lr": 0.0009894736842105264, + "low_lr": 1.9789473684210528e-05, + "step": 20 + }, + { + "epoch": 0.05259697567389875, + "high_lr": 0.0009894736842105264, + "low_lr": 1.9789473684210528e-05, + "step": 20 + }, + { + "epoch": 0.055226824457593686, + "grad_norm": 0.41516372561454773, + "learning_rate": 0.0009889473684210528, + "loss": 2.1126, + "step": 21 + }, + { + "epoch": 0.055226824457593686, + "high_lr": 0.0009889473684210528, + "low_lr": 1.9778947368421056e-05, + "step": 21 + }, + { + "epoch": 0.055226824457593686, + "high_lr": 0.0009889473684210528, + "low_lr": 1.9778947368421056e-05, + "step": 21 + }, + { + "epoch": 0.055226824457593686, + "high_lr": 0.0009889473684210528, + "low_lr": 1.9778947368421056e-05, + "step": 21 + }, + { + "epoch": 0.055226824457593686, + "high_lr": 0.0009889473684210528, + "low_lr": 1.9778947368421056e-05, + "step": 21 + }, + { + "epoch": 0.055226824457593686, + "high_lr": 0.0009889473684210528, + "low_lr": 1.9778947368421056e-05, + "step": 21 + }, + { + "epoch": 0.055226824457593686, + "high_lr": 0.0009889473684210528, + "low_lr": 1.9778947368421056e-05, + "step": 21 + }, + { + "epoch": 0.055226824457593686, + "high_lr": 0.0009889473684210528, + "low_lr": 1.9778947368421056e-05, + "step": 21 + }, + { + "epoch": 0.055226824457593686, + "high_lr": 0.0009889473684210528, + "low_lr": 1.9778947368421056e-05, + "step": 21 + }, + { + "epoch": 0.057856673241288625, + "grad_norm": 0.4417484402656555, + "learning_rate": 0.000988421052631579, + "loss": 2.1126, + "step": 22 + }, + { + "epoch": 0.057856673241288625, + "high_lr": 0.000988421052631579, + "low_lr": 1.976842105263158e-05, + "step": 22 + }, + { + "epoch": 0.057856673241288625, + "high_lr": 0.000988421052631579, + "low_lr": 1.976842105263158e-05, + "step": 22 + }, + { + "epoch": 0.057856673241288625, + "high_lr": 0.000988421052631579, + "low_lr": 1.976842105263158e-05, + "step": 22 + }, + { + "epoch": 0.057856673241288625, + "high_lr": 0.000988421052631579, + "low_lr": 1.976842105263158e-05, + "step": 22 + }, + { + "epoch": 0.057856673241288625, + "high_lr": 0.000988421052631579, + "low_lr": 1.976842105263158e-05, + "step": 22 + }, + { + "epoch": 0.057856673241288625, + "high_lr": 0.000988421052631579, + "low_lr": 1.976842105263158e-05, + "step": 22 + }, + { + "epoch": 0.057856673241288625, + "high_lr": 0.000988421052631579, + "low_lr": 1.976842105263158e-05, + "step": 22 + }, + { + "epoch": 0.057856673241288625, + "high_lr": 0.000988421052631579, + "low_lr": 1.976842105263158e-05, + "step": 22 + }, + { + "epoch": 0.060486522024983565, + "grad_norm": 0.4763607978820801, + "learning_rate": 0.0009878947368421054, + "loss": 2.1861, + "step": 23 + }, + { + "epoch": 0.060486522024983565, + "high_lr": 0.0009878947368421054, + "low_lr": 1.9757894736842105e-05, + "step": 23 + }, + { + "epoch": 0.060486522024983565, + "high_lr": 0.0009878947368421054, + "low_lr": 1.9757894736842105e-05, + "step": 23 + }, + { + "epoch": 0.060486522024983565, + "high_lr": 0.0009878947368421054, + "low_lr": 1.9757894736842105e-05, + "step": 23 + }, + { + "epoch": 0.060486522024983565, + "high_lr": 0.0009878947368421054, + "low_lr": 1.9757894736842105e-05, + "step": 23 + }, + { + "epoch": 0.060486522024983565, + "high_lr": 0.0009878947368421054, + "low_lr": 1.9757894736842105e-05, + "step": 23 + }, + { + "epoch": 0.060486522024983565, + "high_lr": 0.0009878947368421054, + "low_lr": 1.9757894736842105e-05, + "step": 23 + }, + { + "epoch": 0.060486522024983565, + "high_lr": 0.0009878947368421054, + "low_lr": 1.9757894736842105e-05, + "step": 23 + }, + { + "epoch": 0.060486522024983565, + "high_lr": 0.0009878947368421054, + "low_lr": 1.9757894736842105e-05, + "step": 23 + }, + { + "epoch": 0.0631163708086785, + "grad_norm": 0.42307791113853455, + "learning_rate": 0.0009873684210526315, + "loss": 2.0869, + "step": 24 + }, + { + "epoch": 0.0631163708086785, + "high_lr": 0.0009873684210526315, + "low_lr": 1.9747368421052633e-05, + "step": 24 + }, + { + "epoch": 0.0631163708086785, + "high_lr": 0.0009873684210526315, + "low_lr": 1.9747368421052633e-05, + "step": 24 + }, + { + "epoch": 0.0631163708086785, + "high_lr": 0.0009873684210526315, + "low_lr": 1.9747368421052633e-05, + "step": 24 + }, + { + "epoch": 0.0631163708086785, + "high_lr": 0.0009873684210526315, + "low_lr": 1.9747368421052633e-05, + "step": 24 + }, + { + "epoch": 0.0631163708086785, + "high_lr": 0.0009873684210526315, + "low_lr": 1.9747368421052633e-05, + "step": 24 + }, + { + "epoch": 0.0631163708086785, + "high_lr": 0.0009873684210526315, + "low_lr": 1.9747368421052633e-05, + "step": 24 + }, + { + "epoch": 0.0631163708086785, + "high_lr": 0.0009873684210526315, + "low_lr": 1.9747368421052633e-05, + "step": 24 + }, + { + "epoch": 0.0631163708086785, + "high_lr": 0.0009873684210526315, + "low_lr": 1.9747368421052633e-05, + "step": 24 + }, + { + "epoch": 0.06574621959237344, + "grad_norm": 0.439748615026474, + "learning_rate": 0.000986842105263158, + "loss": 2.0882, + "step": 25 + }, + { + "epoch": 0.06574621959237344, + "high_lr": 0.000986842105263158, + "low_lr": 1.9736842105263158e-05, + "step": 25 + }, + { + "epoch": 0.06574621959237344, + "high_lr": 0.000986842105263158, + "low_lr": 1.9736842105263158e-05, + "step": 25 + }, + { + "epoch": 0.06574621959237344, + "high_lr": 0.000986842105263158, + "low_lr": 1.9736842105263158e-05, + "step": 25 + }, + { + "epoch": 0.06574621959237344, + "high_lr": 0.000986842105263158, + "low_lr": 1.9736842105263158e-05, + "step": 25 + }, + { + "epoch": 0.06574621959237344, + "high_lr": 0.000986842105263158, + "low_lr": 1.9736842105263158e-05, + "step": 25 + }, + { + "epoch": 0.06574621959237344, + "high_lr": 0.000986842105263158, + "low_lr": 1.9736842105263158e-05, + "step": 25 + }, + { + "epoch": 0.06574621959237344, + "high_lr": 0.000986842105263158, + "low_lr": 1.9736842105263158e-05, + "step": 25 + }, + { + "epoch": 0.06574621959237344, + "high_lr": 0.000986842105263158, + "low_lr": 1.9736842105263158e-05, + "step": 25 + }, + { + "epoch": 0.06837606837606838, + "grad_norm": 0.39283064007759094, + "learning_rate": 0.0009863157894736843, + "loss": 2.0292, + "step": 26 + }, + { + "epoch": 0.06837606837606838, + "high_lr": 0.0009863157894736843, + "low_lr": 1.9726315789473686e-05, + "step": 26 + }, + { + "epoch": 0.06837606837606838, + "high_lr": 0.0009863157894736843, + "low_lr": 1.9726315789473686e-05, + "step": 26 + }, + { + "epoch": 0.06837606837606838, + "high_lr": 0.0009863157894736843, + "low_lr": 1.9726315789473686e-05, + "step": 26 + }, + { + "epoch": 0.06837606837606838, + "high_lr": 0.0009863157894736843, + "low_lr": 1.9726315789473686e-05, + "step": 26 + }, + { + "epoch": 0.06837606837606838, + "high_lr": 0.0009863157894736843, + "low_lr": 1.9726315789473686e-05, + "step": 26 + }, + { + "epoch": 0.06837606837606838, + "high_lr": 0.0009863157894736843, + "low_lr": 1.9726315789473686e-05, + "step": 26 + }, + { + "epoch": 0.06837606837606838, + "high_lr": 0.0009863157894736843, + "low_lr": 1.9726315789473686e-05, + "step": 26 + }, + { + "epoch": 0.06837606837606838, + "high_lr": 0.0009863157894736843, + "low_lr": 1.9726315789473686e-05, + "step": 26 + }, + { + "epoch": 0.07100591715976332, + "grad_norm": 0.4499070346355438, + "learning_rate": 0.0009857894736842105, + "loss": 2.1265, + "step": 27 + }, + { + "epoch": 0.07100591715976332, + "high_lr": 0.0009857894736842105, + "low_lr": 1.9715789473684214e-05, + "step": 27 + }, + { + "epoch": 0.07100591715976332, + "high_lr": 0.0009857894736842105, + "low_lr": 1.9715789473684214e-05, + "step": 27 + }, + { + "epoch": 0.07100591715976332, + "high_lr": 0.0009857894736842105, + "low_lr": 1.9715789473684214e-05, + "step": 27 + }, + { + "epoch": 0.07100591715976332, + "high_lr": 0.0009857894736842105, + "low_lr": 1.9715789473684214e-05, + "step": 27 + }, + { + "epoch": 0.07100591715976332, + "high_lr": 0.0009857894736842105, + "low_lr": 1.9715789473684214e-05, + "step": 27 + }, + { + "epoch": 0.07100591715976332, + "high_lr": 0.0009857894736842105, + "low_lr": 1.9715789473684214e-05, + "step": 27 + }, + { + "epoch": 0.07100591715976332, + "high_lr": 0.0009857894736842105, + "low_lr": 1.9715789473684214e-05, + "step": 27 + }, + { + "epoch": 0.07100591715976332, + "high_lr": 0.0009857894736842105, + "low_lr": 1.9715789473684214e-05, + "step": 27 + }, + { + "epoch": 0.07363576594345825, + "grad_norm": 0.41874054074287415, + "learning_rate": 0.000985263157894737, + "loss": 1.9887, + "step": 28 + }, + { + "epoch": 0.07363576594345825, + "high_lr": 0.000985263157894737, + "low_lr": 1.970526315789474e-05, + "step": 28 + }, + { + "epoch": 0.07363576594345825, + "high_lr": 0.000985263157894737, + "low_lr": 1.970526315789474e-05, + "step": 28 + }, + { + "epoch": 0.07363576594345825, + "high_lr": 0.000985263157894737, + "low_lr": 1.970526315789474e-05, + "step": 28 + }, + { + "epoch": 0.07363576594345825, + "high_lr": 0.000985263157894737, + "low_lr": 1.970526315789474e-05, + "step": 28 + }, + { + "epoch": 0.07363576594345825, + "high_lr": 0.000985263157894737, + "low_lr": 1.970526315789474e-05, + "step": 28 + }, + { + "epoch": 0.07363576594345825, + "high_lr": 0.000985263157894737, + "low_lr": 1.970526315789474e-05, + "step": 28 + }, + { + "epoch": 0.07363576594345825, + "high_lr": 0.000985263157894737, + "low_lr": 1.970526315789474e-05, + "step": 28 + }, + { + "epoch": 0.07363576594345825, + "high_lr": 0.000985263157894737, + "low_lr": 1.970526315789474e-05, + "step": 28 + }, + { + "epoch": 0.0762656147271532, + "grad_norm": 0.4721282124519348, + "learning_rate": 0.000984736842105263, + "loss": 2.1088, + "step": 29 + }, + { + "epoch": 0.0762656147271532, + "high_lr": 0.000984736842105263, + "low_lr": 1.9694736842105263e-05, + "step": 29 + }, + { + "epoch": 0.0762656147271532, + "high_lr": 0.000984736842105263, + "low_lr": 1.9694736842105263e-05, + "step": 29 + }, + { + "epoch": 0.0762656147271532, + "high_lr": 0.000984736842105263, + "low_lr": 1.9694736842105263e-05, + "step": 29 + }, + { + "epoch": 0.0762656147271532, + "high_lr": 0.000984736842105263, + "low_lr": 1.9694736842105263e-05, + "step": 29 + }, + { + "epoch": 0.0762656147271532, + "high_lr": 0.000984736842105263, + "low_lr": 1.9694736842105263e-05, + "step": 29 + }, + { + "epoch": 0.0762656147271532, + "high_lr": 0.000984736842105263, + "low_lr": 1.9694736842105263e-05, + "step": 29 + }, + { + "epoch": 0.0762656147271532, + "high_lr": 0.000984736842105263, + "low_lr": 1.9694736842105263e-05, + "step": 29 + }, + { + "epoch": 0.0762656147271532, + "high_lr": 0.000984736842105263, + "low_lr": 1.9694736842105263e-05, + "step": 29 + }, + { + "epoch": 0.07889546351084813, + "grad_norm": 0.4673194885253906, + "learning_rate": 0.0009842105263157895, + "loss": 2.1235, + "step": 30 + }, + { + "epoch": 0.07889546351084813, + "high_lr": 0.0009842105263157895, + "low_lr": 1.968421052631579e-05, + "step": 30 + }, + { + "epoch": 0.07889546351084813, + "high_lr": 0.0009842105263157895, + "low_lr": 1.968421052631579e-05, + "step": 30 + }, + { + "epoch": 0.07889546351084813, + "high_lr": 0.0009842105263157895, + "low_lr": 1.968421052631579e-05, + "step": 30 + }, + { + "epoch": 0.07889546351084813, + "high_lr": 0.0009842105263157895, + "low_lr": 1.968421052631579e-05, + "step": 30 + }, + { + "epoch": 0.07889546351084813, + "high_lr": 0.0009842105263157895, + "low_lr": 1.968421052631579e-05, + "step": 30 + }, + { + "epoch": 0.07889546351084813, + "high_lr": 0.0009842105263157895, + "low_lr": 1.968421052631579e-05, + "step": 30 + }, + { + "epoch": 0.07889546351084813, + "high_lr": 0.0009842105263157895, + "low_lr": 1.968421052631579e-05, + "step": 30 + }, + { + "epoch": 0.07889546351084813, + "high_lr": 0.0009842105263157895, + "low_lr": 1.968421052631579e-05, + "step": 30 + }, + { + "epoch": 0.08152531229454306, + "grad_norm": 0.43837472796440125, + "learning_rate": 0.0009836842105263159, + "loss": 2.1186, + "step": 31 + }, + { + "epoch": 0.08152531229454306, + "high_lr": 0.0009836842105263159, + "low_lr": 1.967368421052632e-05, + "step": 31 + }, + { + "epoch": 0.08152531229454306, + "high_lr": 0.0009836842105263159, + "low_lr": 1.967368421052632e-05, + "step": 31 + }, + { + "epoch": 0.08152531229454306, + "high_lr": 0.0009836842105263159, + "low_lr": 1.967368421052632e-05, + "step": 31 + }, + { + "epoch": 0.08152531229454306, + "high_lr": 0.0009836842105263159, + "low_lr": 1.967368421052632e-05, + "step": 31 + }, + { + "epoch": 0.08152531229454306, + "high_lr": 0.0009836842105263159, + "low_lr": 1.967368421052632e-05, + "step": 31 + }, + { + "epoch": 0.08152531229454306, + "high_lr": 0.0009836842105263159, + "low_lr": 1.967368421052632e-05, + "step": 31 + }, + { + "epoch": 0.08152531229454306, + "high_lr": 0.0009836842105263159, + "low_lr": 1.967368421052632e-05, + "step": 31 + }, + { + "epoch": 0.08152531229454306, + "high_lr": 0.0009836842105263159, + "low_lr": 1.967368421052632e-05, + "step": 31 + }, + { + "epoch": 0.084155161078238, + "grad_norm": 0.42769670486450195, + "learning_rate": 0.000983157894736842, + "loss": 2.0903, + "step": 32 + }, + { + "epoch": 0.084155161078238, + "high_lr": 0.000983157894736842, + "low_lr": 1.9663157894736844e-05, + "step": 32 + }, + { + "epoch": 0.084155161078238, + "high_lr": 0.000983157894736842, + "low_lr": 1.9663157894736844e-05, + "step": 32 + }, + { + "epoch": 0.084155161078238, + "high_lr": 0.000983157894736842, + "low_lr": 1.9663157894736844e-05, + "step": 32 + }, + { + "epoch": 0.084155161078238, + "high_lr": 0.000983157894736842, + "low_lr": 1.9663157894736844e-05, + "step": 32 + }, + { + "epoch": 0.084155161078238, + "high_lr": 0.000983157894736842, + "low_lr": 1.9663157894736844e-05, + "step": 32 + }, + { + "epoch": 0.084155161078238, + "high_lr": 0.000983157894736842, + "low_lr": 1.9663157894736844e-05, + "step": 32 + }, + { + "epoch": 0.084155161078238, + "high_lr": 0.000983157894736842, + "low_lr": 1.9663157894736844e-05, + "step": 32 + }, + { + "epoch": 0.084155161078238, + "high_lr": 0.000983157894736842, + "low_lr": 1.9663157894736844e-05, + "step": 32 + }, + { + "epoch": 0.08678500986193294, + "grad_norm": 0.4276221990585327, + "learning_rate": 0.0009826315789473684, + "loss": 2.0315, + "step": 33 + }, + { + "epoch": 0.08678500986193294, + "high_lr": 0.0009826315789473684, + "low_lr": 1.965263157894737e-05, + "step": 33 + }, + { + "epoch": 0.08678500986193294, + "high_lr": 0.0009826315789473684, + "low_lr": 1.965263157894737e-05, + "step": 33 + }, + { + "epoch": 0.08678500986193294, + "high_lr": 0.0009826315789473684, + "low_lr": 1.965263157894737e-05, + "step": 33 + }, + { + "epoch": 0.08678500986193294, + "high_lr": 0.0009826315789473684, + "low_lr": 1.965263157894737e-05, + "step": 33 + }, + { + "epoch": 0.08678500986193294, + "high_lr": 0.0009826315789473684, + "low_lr": 1.965263157894737e-05, + "step": 33 + }, + { + "epoch": 0.08678500986193294, + "high_lr": 0.0009826315789473684, + "low_lr": 1.965263157894737e-05, + "step": 33 + }, + { + "epoch": 0.08678500986193294, + "high_lr": 0.0009826315789473684, + "low_lr": 1.965263157894737e-05, + "step": 33 + }, + { + "epoch": 0.08678500986193294, + "high_lr": 0.0009826315789473684, + "low_lr": 1.965263157894737e-05, + "step": 33 + }, + { + "epoch": 0.08941485864562787, + "grad_norm": 0.46253710985183716, + "learning_rate": 0.0009821052631578948, + "loss": 2.0703, + "step": 34 + }, + { + "epoch": 0.08941485864562787, + "high_lr": 0.0009821052631578948, + "low_lr": 1.9642105263157897e-05, + "step": 34 + }, + { + "epoch": 0.08941485864562787, + "high_lr": 0.0009821052631578948, + "low_lr": 1.9642105263157897e-05, + "step": 34 + }, + { + "epoch": 0.08941485864562787, + "high_lr": 0.0009821052631578948, + "low_lr": 1.9642105263157897e-05, + "step": 34 + }, + { + "epoch": 0.08941485864562787, + "high_lr": 0.0009821052631578948, + "low_lr": 1.9642105263157897e-05, + "step": 34 + }, + { + "epoch": 0.08941485864562787, + "high_lr": 0.0009821052631578948, + "low_lr": 1.9642105263157897e-05, + "step": 34 + }, + { + "epoch": 0.08941485864562787, + "high_lr": 0.0009821052631578948, + "low_lr": 1.9642105263157897e-05, + "step": 34 + }, + { + "epoch": 0.08941485864562787, + "high_lr": 0.0009821052631578948, + "low_lr": 1.9642105263157897e-05, + "step": 34 + }, + { + "epoch": 0.08941485864562787, + "high_lr": 0.0009821052631578948, + "low_lr": 1.9642105263157897e-05, + "step": 34 + }, + { + "epoch": 0.09204470742932282, + "grad_norm": 0.4598131477832794, + "learning_rate": 0.0009815789473684212, + "loss": 2.1117, + "step": 35 + }, + { + "epoch": 0.09204470742932282, + "high_lr": 0.0009815789473684212, + "low_lr": 1.9631578947368425e-05, + "step": 35 + }, + { + "epoch": 0.09204470742932282, + "high_lr": 0.0009815789473684212, + "low_lr": 1.9631578947368425e-05, + "step": 35 + }, + { + "epoch": 0.09204470742932282, + "high_lr": 0.0009815789473684212, + "low_lr": 1.9631578947368425e-05, + "step": 35 + }, + { + "epoch": 0.09204470742932282, + "high_lr": 0.0009815789473684212, + "low_lr": 1.9631578947368425e-05, + "step": 35 + }, + { + "epoch": 0.09204470742932282, + "high_lr": 0.0009815789473684212, + "low_lr": 1.9631578947368425e-05, + "step": 35 + }, + { + "epoch": 0.09204470742932282, + "high_lr": 0.0009815789473684212, + "low_lr": 1.9631578947368425e-05, + "step": 35 + }, + { + "epoch": 0.09204470742932282, + "high_lr": 0.0009815789473684212, + "low_lr": 1.9631578947368425e-05, + "step": 35 + }, + { + "epoch": 0.09204470742932282, + "high_lr": 0.0009815789473684212, + "low_lr": 1.9631578947368425e-05, + "step": 35 + }, + { + "epoch": 0.09467455621301775, + "grad_norm": 0.44061988592147827, + "learning_rate": 0.0009810526315789474, + "loss": 2.0431, + "step": 36 + }, + { + "epoch": 0.09467455621301775, + "high_lr": 0.0009810526315789474, + "low_lr": 1.962105263157895e-05, + "step": 36 + }, + { + "epoch": 0.09467455621301775, + "high_lr": 0.0009810526315789474, + "low_lr": 1.962105263157895e-05, + "step": 36 + }, + { + "epoch": 0.09467455621301775, + "high_lr": 0.0009810526315789474, + "low_lr": 1.962105263157895e-05, + "step": 36 + }, + { + "epoch": 0.09467455621301775, + "high_lr": 0.0009810526315789474, + "low_lr": 1.962105263157895e-05, + "step": 36 + }, + { + "epoch": 0.09467455621301775, + "high_lr": 0.0009810526315789474, + "low_lr": 1.962105263157895e-05, + "step": 36 + }, + { + "epoch": 0.09467455621301775, + "high_lr": 0.0009810526315789474, + "low_lr": 1.962105263157895e-05, + "step": 36 + }, + { + "epoch": 0.09467455621301775, + "high_lr": 0.0009810526315789474, + "low_lr": 1.962105263157895e-05, + "step": 36 + }, + { + "epoch": 0.09467455621301775, + "high_lr": 0.0009810526315789474, + "low_lr": 1.962105263157895e-05, + "step": 36 + }, + { + "epoch": 0.0973044049967127, + "grad_norm": 0.46434134244918823, + "learning_rate": 0.0009805263157894738, + "loss": 2.0377, + "step": 37 + }, + { + "epoch": 0.0973044049967127, + "high_lr": 0.0009805263157894738, + "low_lr": 1.9610526315789474e-05, + "step": 37 + }, + { + "epoch": 0.0973044049967127, + "high_lr": 0.0009805263157894738, + "low_lr": 1.9610526315789474e-05, + "step": 37 + }, + { + "epoch": 0.0973044049967127, + "high_lr": 0.0009805263157894738, + "low_lr": 1.9610526315789474e-05, + "step": 37 + }, + { + "epoch": 0.0973044049967127, + "high_lr": 0.0009805263157894738, + "low_lr": 1.9610526315789474e-05, + "step": 37 + }, + { + "epoch": 0.0973044049967127, + "high_lr": 0.0009805263157894738, + "low_lr": 1.9610526315789474e-05, + "step": 37 + }, + { + "epoch": 0.0973044049967127, + "high_lr": 0.0009805263157894738, + "low_lr": 1.9610526315789474e-05, + "step": 37 + }, + { + "epoch": 0.0973044049967127, + "high_lr": 0.0009805263157894738, + "low_lr": 1.9610526315789474e-05, + "step": 37 + }, + { + "epoch": 0.0973044049967127, + "high_lr": 0.0009805263157894738, + "low_lr": 1.9610526315789474e-05, + "step": 37 + }, + { + "epoch": 0.09993425378040763, + "grad_norm": 0.48765864968299866, + "learning_rate": 0.00098, + "loss": 2.1, + "step": 38 + }, + { + "epoch": 0.09993425378040763, + "high_lr": 0.00098, + "low_lr": 1.9600000000000002e-05, + "step": 38 + }, + { + "epoch": 0.09993425378040763, + "high_lr": 0.00098, + "low_lr": 1.9600000000000002e-05, + "step": 38 + }, + { + "epoch": 0.09993425378040763, + "high_lr": 0.00098, + "low_lr": 1.9600000000000002e-05, + "step": 38 + }, + { + "epoch": 0.09993425378040763, + "high_lr": 0.00098, + "low_lr": 1.9600000000000002e-05, + "step": 38 + }, + { + "epoch": 0.09993425378040763, + "high_lr": 0.00098, + "low_lr": 1.9600000000000002e-05, + "step": 38 + }, + { + "epoch": 0.09993425378040763, + "high_lr": 0.00098, + "low_lr": 1.9600000000000002e-05, + "step": 38 + }, + { + "epoch": 0.09993425378040763, + "high_lr": 0.00098, + "low_lr": 1.9600000000000002e-05, + "step": 38 + }, + { + "epoch": 0.09993425378040763, + "high_lr": 0.00098, + "low_lr": 1.9600000000000002e-05, + "step": 38 + }, + { + "epoch": 0.10256410256410256, + "grad_norm": 0.4472310543060303, + "learning_rate": 0.0009794736842105264, + "loss": 2.0205, + "step": 39 + }, + { + "epoch": 0.10256410256410256, + "high_lr": 0.0009794736842105264, + "low_lr": 1.9589473684210527e-05, + "step": 39 + }, + { + "epoch": 0.10256410256410256, + "high_lr": 0.0009794736842105264, + "low_lr": 1.9589473684210527e-05, + "step": 39 + }, + { + "epoch": 0.10256410256410256, + "high_lr": 0.0009794736842105264, + "low_lr": 1.9589473684210527e-05, + "step": 39 + }, + { + "epoch": 0.10256410256410256, + "high_lr": 0.0009794736842105264, + "low_lr": 1.9589473684210527e-05, + "step": 39 + }, + { + "epoch": 0.10256410256410256, + "high_lr": 0.0009794736842105264, + "low_lr": 1.9589473684210527e-05, + "step": 39 + }, + { + "epoch": 0.10256410256410256, + "high_lr": 0.0009794736842105264, + "low_lr": 1.9589473684210527e-05, + "step": 39 + }, + { + "epoch": 0.10256410256410256, + "high_lr": 0.0009794736842105264, + "low_lr": 1.9589473684210527e-05, + "step": 39 + }, + { + "epoch": 0.10256410256410256, + "high_lr": 0.0009794736842105264, + "low_lr": 1.9589473684210527e-05, + "step": 39 + }, + { + "epoch": 0.1051939513477975, + "grad_norm": 0.5191563367843628, + "learning_rate": 0.0009789473684210528, + "loss": 2.1507, + "step": 40 + }, + { + "epoch": 0.1051939513477975, + "high_lr": 0.0009789473684210528, + "low_lr": 1.9578947368421055e-05, + "step": 40 + }, + { + "epoch": 0.1051939513477975, + "high_lr": 0.0009789473684210528, + "low_lr": 1.9578947368421055e-05, + "step": 40 + }, + { + "epoch": 0.1051939513477975, + "high_lr": 0.0009789473684210528, + "low_lr": 1.9578947368421055e-05, + "step": 40 + }, + { + "epoch": 0.1051939513477975, + "high_lr": 0.0009789473684210528, + "low_lr": 1.9578947368421055e-05, + "step": 40 + }, + { + "epoch": 0.1051939513477975, + "high_lr": 0.0009789473684210528, + "low_lr": 1.9578947368421055e-05, + "step": 40 + }, + { + "epoch": 0.1051939513477975, + "high_lr": 0.0009789473684210528, + "low_lr": 1.9578947368421055e-05, + "step": 40 + }, + { + "epoch": 0.1051939513477975, + "high_lr": 0.0009789473684210528, + "low_lr": 1.9578947368421055e-05, + "step": 40 + }, + { + "epoch": 0.1051939513477975, + "high_lr": 0.0009789473684210528, + "low_lr": 1.9578947368421055e-05, + "step": 40 + }, + { + "epoch": 0.10782380013149244, + "grad_norm": 0.4820740222930908, + "learning_rate": 0.000978421052631579, + "loss": 1.9987, + "step": 41 + }, + { + "epoch": 0.10782380013149244, + "high_lr": 0.000978421052631579, + "low_lr": 1.956842105263158e-05, + "step": 41 + }, + { + "epoch": 0.10782380013149244, + "high_lr": 0.000978421052631579, + "low_lr": 1.956842105263158e-05, + "step": 41 + }, + { + "epoch": 0.10782380013149244, + "high_lr": 0.000978421052631579, + "low_lr": 1.956842105263158e-05, + "step": 41 + }, + { + "epoch": 0.10782380013149244, + "high_lr": 0.000978421052631579, + "low_lr": 1.956842105263158e-05, + "step": 41 + }, + { + "epoch": 0.10782380013149244, + "high_lr": 0.000978421052631579, + "low_lr": 1.956842105263158e-05, + "step": 41 + }, + { + "epoch": 0.10782380013149244, + "high_lr": 0.000978421052631579, + "low_lr": 1.956842105263158e-05, + "step": 41 + }, + { + "epoch": 0.10782380013149244, + "high_lr": 0.000978421052631579, + "low_lr": 1.956842105263158e-05, + "step": 41 + }, + { + "epoch": 0.10782380013149244, + "high_lr": 0.000978421052631579, + "low_lr": 1.956842105263158e-05, + "step": 41 + }, + { + "epoch": 0.11045364891518737, + "grad_norm": 0.49189651012420654, + "learning_rate": 0.0009778947368421053, + "loss": 2.1101, + "step": 42 + }, + { + "epoch": 0.11045364891518737, + "high_lr": 0.0009778947368421053, + "low_lr": 1.9557894736842107e-05, + "step": 42 + }, + { + "epoch": 0.11045364891518737, + "high_lr": 0.0009778947368421053, + "low_lr": 1.9557894736842107e-05, + "step": 42 + }, + { + "epoch": 0.11045364891518737, + "high_lr": 0.0009778947368421053, + "low_lr": 1.9557894736842107e-05, + "step": 42 + }, + { + "epoch": 0.11045364891518737, + "high_lr": 0.0009778947368421053, + "low_lr": 1.9557894736842107e-05, + "step": 42 + }, + { + "epoch": 0.11045364891518737, + "high_lr": 0.0009778947368421053, + "low_lr": 1.9557894736842107e-05, + "step": 42 + }, + { + "epoch": 0.11045364891518737, + "high_lr": 0.0009778947368421053, + "low_lr": 1.9557894736842107e-05, + "step": 42 + }, + { + "epoch": 0.11045364891518737, + "high_lr": 0.0009778947368421053, + "low_lr": 1.9557894736842107e-05, + "step": 42 + }, + { + "epoch": 0.11045364891518737, + "high_lr": 0.0009778947368421053, + "low_lr": 1.9557894736842107e-05, + "step": 42 + }, + { + "epoch": 0.11308349769888232, + "grad_norm": 0.48595666885375977, + "learning_rate": 0.0009773684210526315, + "loss": 2.0509, + "step": 43 + }, + { + "epoch": 0.11308349769888232, + "high_lr": 0.0009773684210526315, + "low_lr": 1.9547368421052632e-05, + "step": 43 + }, + { + "epoch": 0.11308349769888232, + "high_lr": 0.0009773684210526315, + "low_lr": 1.9547368421052632e-05, + "step": 43 + }, + { + "epoch": 0.11308349769888232, + "high_lr": 0.0009773684210526315, + "low_lr": 1.9547368421052632e-05, + "step": 43 + }, + { + "epoch": 0.11308349769888232, + "high_lr": 0.0009773684210526315, + "low_lr": 1.9547368421052632e-05, + "step": 43 + }, + { + "epoch": 0.11308349769888232, + "high_lr": 0.0009773684210526315, + "low_lr": 1.9547368421052632e-05, + "step": 43 + }, + { + "epoch": 0.11308349769888232, + "high_lr": 0.0009773684210526315, + "low_lr": 1.9547368421052632e-05, + "step": 43 + }, + { + "epoch": 0.11308349769888232, + "high_lr": 0.0009773684210526315, + "low_lr": 1.9547368421052632e-05, + "step": 43 + }, + { + "epoch": 0.11308349769888232, + "high_lr": 0.0009773684210526315, + "low_lr": 1.9547368421052632e-05, + "step": 43 + }, + { + "epoch": 0.11571334648257725, + "grad_norm": 0.45890429615974426, + "learning_rate": 0.000976842105263158, + "loss": 1.9858, + "step": 44 + }, + { + "epoch": 0.11571334648257725, + "high_lr": 0.000976842105263158, + "low_lr": 1.953684210526316e-05, + "step": 44 + }, + { + "epoch": 0.11571334648257725, + "high_lr": 0.000976842105263158, + "low_lr": 1.953684210526316e-05, + "step": 44 + }, + { + "epoch": 0.11571334648257725, + "high_lr": 0.000976842105263158, + "low_lr": 1.953684210526316e-05, + "step": 44 + }, + { + "epoch": 0.11571334648257725, + "high_lr": 0.000976842105263158, + "low_lr": 1.953684210526316e-05, + "step": 44 + }, + { + "epoch": 0.11571334648257725, + "high_lr": 0.000976842105263158, + "low_lr": 1.953684210526316e-05, + "step": 44 + }, + { + "epoch": 0.11571334648257725, + "high_lr": 0.000976842105263158, + "low_lr": 1.953684210526316e-05, + "step": 44 + }, + { + "epoch": 0.11571334648257725, + "high_lr": 0.000976842105263158, + "low_lr": 1.953684210526316e-05, + "step": 44 + }, + { + "epoch": 0.11571334648257725, + "high_lr": 0.000976842105263158, + "low_lr": 1.953684210526316e-05, + "step": 44 + }, + { + "epoch": 0.11834319526627218, + "grad_norm": 0.500018298625946, + "learning_rate": 0.0009763157894736843, + "loss": 2.0669, + "step": 45 + }, + { + "epoch": 0.11834319526627218, + "high_lr": 0.0009763157894736843, + "low_lr": 1.9526315789473688e-05, + "step": 45 + }, + { + "epoch": 0.11834319526627218, + "high_lr": 0.0009763157894736843, + "low_lr": 1.9526315789473688e-05, + "step": 45 + }, + { + "epoch": 0.11834319526627218, + "high_lr": 0.0009763157894736843, + "low_lr": 1.9526315789473688e-05, + "step": 45 + }, + { + "epoch": 0.11834319526627218, + "high_lr": 0.0009763157894736843, + "low_lr": 1.9526315789473688e-05, + "step": 45 + }, + { + "epoch": 0.11834319526627218, + "high_lr": 0.0009763157894736843, + "low_lr": 1.9526315789473688e-05, + "step": 45 + }, + { + "epoch": 0.11834319526627218, + "high_lr": 0.0009763157894736843, + "low_lr": 1.9526315789473688e-05, + "step": 45 + }, + { + "epoch": 0.11834319526627218, + "high_lr": 0.0009763157894736843, + "low_lr": 1.9526315789473688e-05, + "step": 45 + }, + { + "epoch": 0.11834319526627218, + "high_lr": 0.0009763157894736843, + "low_lr": 1.9526315789473688e-05, + "step": 45 + }, + { + "epoch": 0.12097304404996713, + "grad_norm": 0.4983457922935486, + "learning_rate": 0.0009757894736842106, + "loss": 2.0629, + "step": 46 + }, + { + "epoch": 0.12097304404996713, + "high_lr": 0.0009757894736842106, + "low_lr": 1.9515789473684213e-05, + "step": 46 + }, + { + "epoch": 0.12097304404996713, + "high_lr": 0.0009757894736842106, + "low_lr": 1.9515789473684213e-05, + "step": 46 + }, + { + "epoch": 0.12097304404996713, + "high_lr": 0.0009757894736842106, + "low_lr": 1.9515789473684213e-05, + "step": 46 + }, + { + "epoch": 0.12097304404996713, + "high_lr": 0.0009757894736842106, + "low_lr": 1.9515789473684213e-05, + "step": 46 + }, + { + "epoch": 0.12097304404996713, + "high_lr": 0.0009757894736842106, + "low_lr": 1.9515789473684213e-05, + "step": 46 + }, + { + "epoch": 0.12097304404996713, + "high_lr": 0.0009757894736842106, + "low_lr": 1.9515789473684213e-05, + "step": 46 + }, + { + "epoch": 0.12097304404996713, + "high_lr": 0.0009757894736842106, + "low_lr": 1.9515789473684213e-05, + "step": 46 + }, + { + "epoch": 0.12097304404996713, + "high_lr": 0.0009757894736842106, + "low_lr": 1.9515789473684213e-05, + "step": 46 + }, + { + "epoch": 0.12360289283366206, + "grad_norm": 0.5502267479896545, + "learning_rate": 0.0009752631578947369, + "loss": 2.156, + "step": 47 + }, + { + "epoch": 0.12360289283366206, + "high_lr": 0.0009752631578947369, + "low_lr": 1.9505263157894737e-05, + "step": 47 + }, + { + "epoch": 0.12360289283366206, + "high_lr": 0.0009752631578947369, + "low_lr": 1.9505263157894737e-05, + "step": 47 + }, + { + "epoch": 0.12360289283366206, + "high_lr": 0.0009752631578947369, + "low_lr": 1.9505263157894737e-05, + "step": 47 + }, + { + "epoch": 0.12360289283366206, + "high_lr": 0.0009752631578947369, + "low_lr": 1.9505263157894737e-05, + "step": 47 + }, + { + "epoch": 0.12360289283366206, + "high_lr": 0.0009752631578947369, + "low_lr": 1.9505263157894737e-05, + "step": 47 + }, + { + "epoch": 0.12360289283366206, + "high_lr": 0.0009752631578947369, + "low_lr": 1.9505263157894737e-05, + "step": 47 + }, + { + "epoch": 0.12360289283366206, + "high_lr": 0.0009752631578947369, + "low_lr": 1.9505263157894737e-05, + "step": 47 + }, + { + "epoch": 0.12360289283366206, + "high_lr": 0.0009752631578947369, + "low_lr": 1.9505263157894737e-05, + "step": 47 + }, + { + "epoch": 0.126232741617357, + "grad_norm": 0.5233122110366821, + "learning_rate": 0.0009747368421052632, + "loss": 2.0685, + "step": 48 + }, + { + "epoch": 0.126232741617357, + "high_lr": 0.0009747368421052632, + "low_lr": 1.9494736842105265e-05, + "step": 48 + }, + { + "epoch": 0.126232741617357, + "high_lr": 0.0009747368421052632, + "low_lr": 1.9494736842105265e-05, + "step": 48 + }, + { + "epoch": 0.126232741617357, + "high_lr": 0.0009747368421052632, + "low_lr": 1.9494736842105265e-05, + "step": 48 + }, + { + "epoch": 0.126232741617357, + "high_lr": 0.0009747368421052632, + "low_lr": 1.9494736842105265e-05, + "step": 48 + }, + { + "epoch": 0.126232741617357, + "high_lr": 0.0009747368421052632, + "low_lr": 1.9494736842105265e-05, + "step": 48 + }, + { + "epoch": 0.126232741617357, + "high_lr": 0.0009747368421052632, + "low_lr": 1.9494736842105265e-05, + "step": 48 + }, + { + "epoch": 0.126232741617357, + "high_lr": 0.0009747368421052632, + "low_lr": 1.9494736842105265e-05, + "step": 48 + }, + { + "epoch": 0.126232741617357, + "high_lr": 0.0009747368421052632, + "low_lr": 1.9494736842105265e-05, + "step": 48 + }, + { + "epoch": 0.12886259040105194, + "grad_norm": 0.5220436453819275, + "learning_rate": 0.0009742105263157895, + "loss": 2.013, + "step": 49 + }, + { + "epoch": 0.12886259040105194, + "high_lr": 0.0009742105263157895, + "low_lr": 1.9484210526315793e-05, + "step": 49 + }, + { + "epoch": 0.12886259040105194, + "high_lr": 0.0009742105263157895, + "low_lr": 1.9484210526315793e-05, + "step": 49 + }, + { + "epoch": 0.12886259040105194, + "high_lr": 0.0009742105263157895, + "low_lr": 1.9484210526315793e-05, + "step": 49 + }, + { + "epoch": 0.12886259040105194, + "high_lr": 0.0009742105263157895, + "low_lr": 1.9484210526315793e-05, + "step": 49 + }, + { + "epoch": 0.12886259040105194, + "high_lr": 0.0009742105263157895, + "low_lr": 1.9484210526315793e-05, + "step": 49 + }, + { + "epoch": 0.12886259040105194, + "high_lr": 0.0009742105263157895, + "low_lr": 1.9484210526315793e-05, + "step": 49 + }, + { + "epoch": 0.12886259040105194, + "high_lr": 0.0009742105263157895, + "low_lr": 1.9484210526315793e-05, + "step": 49 + }, + { + "epoch": 0.12886259040105194, + "high_lr": 0.0009742105263157895, + "low_lr": 1.9484210526315793e-05, + "step": 49 + }, + { + "epoch": 0.13149243918474687, + "grad_norm": 0.455016165971756, + "learning_rate": 0.0009736842105263158, + "loss": 2.0045, + "step": 50 + }, + { + "epoch": 0.13149243918474687, + "high_lr": 0.0009736842105263158, + "low_lr": 1.9473684210526318e-05, + "step": 50 + }, + { + "epoch": 0.13149243918474687, + "high_lr": 0.0009736842105263158, + "low_lr": 1.9473684210526318e-05, + "step": 50 + }, + { + "epoch": 0.13149243918474687, + "high_lr": 0.0009736842105263158, + "low_lr": 1.9473684210526318e-05, + "step": 50 + }, + { + "epoch": 0.13149243918474687, + "high_lr": 0.0009736842105263158, + "low_lr": 1.9473684210526318e-05, + "step": 50 + }, + { + "epoch": 0.13149243918474687, + "high_lr": 0.0009736842105263158, + "low_lr": 1.9473684210526318e-05, + "step": 50 + }, + { + "epoch": 0.13149243918474687, + "high_lr": 0.0009736842105263158, + "low_lr": 1.9473684210526318e-05, + "step": 50 + }, + { + "epoch": 0.13149243918474687, + "high_lr": 0.0009736842105263158, + "low_lr": 1.9473684210526318e-05, + "step": 50 + }, + { + "epoch": 0.13149243918474687, + "high_lr": 0.0009736842105263158, + "low_lr": 1.9473684210526318e-05, + "step": 50 + }, + { + "epoch": 0.1341222879684418, + "grad_norm": 0.5332766175270081, + "learning_rate": 0.0009731578947368421, + "loss": 2.0508, + "step": 51 + }, + { + "epoch": 0.1341222879684418, + "high_lr": 0.0009731578947368421, + "low_lr": 1.9463157894736843e-05, + "step": 51 + }, + { + "epoch": 0.1341222879684418, + "high_lr": 0.0009731578947368421, + "low_lr": 1.9463157894736843e-05, + "step": 51 + }, + { + "epoch": 0.1341222879684418, + "high_lr": 0.0009731578947368421, + "low_lr": 1.9463157894736843e-05, + "step": 51 + }, + { + "epoch": 0.1341222879684418, + "high_lr": 0.0009731578947368421, + "low_lr": 1.9463157894736843e-05, + "step": 51 + }, + { + "epoch": 0.1341222879684418, + "high_lr": 0.0009731578947368421, + "low_lr": 1.9463157894736843e-05, + "step": 51 + }, + { + "epoch": 0.1341222879684418, + "high_lr": 0.0009731578947368421, + "low_lr": 1.9463157894736843e-05, + "step": 51 + }, + { + "epoch": 0.1341222879684418, + "high_lr": 0.0009731578947368421, + "low_lr": 1.9463157894736843e-05, + "step": 51 + }, + { + "epoch": 0.1341222879684418, + "high_lr": 0.0009731578947368421, + "low_lr": 1.9463157894736843e-05, + "step": 51 + }, + { + "epoch": 0.13675213675213677, + "grad_norm": 0.5860726833343506, + "learning_rate": 0.0009726315789473684, + "loss": 2.064, + "step": 52 + }, + { + "epoch": 0.13675213675213677, + "high_lr": 0.0009726315789473684, + "low_lr": 1.945263157894737e-05, + "step": 52 + }, + { + "epoch": 0.13675213675213677, + "high_lr": 0.0009726315789473684, + "low_lr": 1.945263157894737e-05, + "step": 52 + }, + { + "epoch": 0.13675213675213677, + "high_lr": 0.0009726315789473684, + "low_lr": 1.945263157894737e-05, + "step": 52 + }, + { + "epoch": 0.13675213675213677, + "high_lr": 0.0009726315789473684, + "low_lr": 1.945263157894737e-05, + "step": 52 + }, + { + "epoch": 0.13675213675213677, + "high_lr": 0.0009726315789473684, + "low_lr": 1.945263157894737e-05, + "step": 52 + }, + { + "epoch": 0.13675213675213677, + "high_lr": 0.0009726315789473684, + "low_lr": 1.945263157894737e-05, + "step": 52 + }, + { + "epoch": 0.13675213675213677, + "high_lr": 0.0009726315789473684, + "low_lr": 1.945263157894737e-05, + "step": 52 + }, + { + "epoch": 0.13675213675213677, + "high_lr": 0.0009726315789473684, + "low_lr": 1.945263157894737e-05, + "step": 52 + }, + { + "epoch": 0.1393819855358317, + "grad_norm": 0.5577614307403564, + "learning_rate": 0.0009721052631578947, + "loss": 2.0728, + "step": 53 + }, + { + "epoch": 0.1393819855358317, + "high_lr": 0.0009721052631578947, + "low_lr": 1.9442105263157895e-05, + "step": 53 + }, + { + "epoch": 0.1393819855358317, + "high_lr": 0.0009721052631578947, + "low_lr": 1.9442105263157895e-05, + "step": 53 + }, + { + "epoch": 0.1393819855358317, + "high_lr": 0.0009721052631578947, + "low_lr": 1.9442105263157895e-05, + "step": 53 + }, + { + "epoch": 0.1393819855358317, + "high_lr": 0.0009721052631578947, + "low_lr": 1.9442105263157895e-05, + "step": 53 + }, + { + "epoch": 0.1393819855358317, + "high_lr": 0.0009721052631578947, + "low_lr": 1.9442105263157895e-05, + "step": 53 + }, + { + "epoch": 0.1393819855358317, + "high_lr": 0.0009721052631578947, + "low_lr": 1.9442105263157895e-05, + "step": 53 + }, + { + "epoch": 0.1393819855358317, + "high_lr": 0.0009721052631578947, + "low_lr": 1.9442105263157895e-05, + "step": 53 + }, + { + "epoch": 0.1393819855358317, + "high_lr": 0.0009721052631578947, + "low_lr": 1.9442105263157895e-05, + "step": 53 + }, + { + "epoch": 0.14201183431952663, + "grad_norm": 0.5507766604423523, + "learning_rate": 0.0009715789473684211, + "loss": 2.0143, + "step": 54 + }, + { + "epoch": 0.14201183431952663, + "high_lr": 0.0009715789473684211, + "low_lr": 1.9431578947368423e-05, + "step": 54 + }, + { + "epoch": 0.14201183431952663, + "high_lr": 0.0009715789473684211, + "low_lr": 1.9431578947368423e-05, + "step": 54 + }, + { + "epoch": 0.14201183431952663, + "high_lr": 0.0009715789473684211, + "low_lr": 1.9431578947368423e-05, + "step": 54 + }, + { + "epoch": 0.14201183431952663, + "high_lr": 0.0009715789473684211, + "low_lr": 1.9431578947368423e-05, + "step": 54 + }, + { + "epoch": 0.14201183431952663, + "high_lr": 0.0009715789473684211, + "low_lr": 1.9431578947368423e-05, + "step": 54 + }, + { + "epoch": 0.14201183431952663, + "high_lr": 0.0009715789473684211, + "low_lr": 1.9431578947368423e-05, + "step": 54 + }, + { + "epoch": 0.14201183431952663, + "high_lr": 0.0009715789473684211, + "low_lr": 1.9431578947368423e-05, + "step": 54 + }, + { + "epoch": 0.14201183431952663, + "high_lr": 0.0009715789473684211, + "low_lr": 1.9431578947368423e-05, + "step": 54 + }, + { + "epoch": 0.14464168310322156, + "grad_norm": 0.5255084037780762, + "learning_rate": 0.0009710526315789474, + "loss": 2.0189, + "step": 55 + }, + { + "epoch": 0.14464168310322156, + "high_lr": 0.0009710526315789474, + "low_lr": 1.9421052631578948e-05, + "step": 55 + }, + { + "epoch": 0.14464168310322156, + "high_lr": 0.0009710526315789474, + "low_lr": 1.9421052631578948e-05, + "step": 55 + }, + { + "epoch": 0.14464168310322156, + "high_lr": 0.0009710526315789474, + "low_lr": 1.9421052631578948e-05, + "step": 55 + }, + { + "epoch": 0.14464168310322156, + "high_lr": 0.0009710526315789474, + "low_lr": 1.9421052631578948e-05, + "step": 55 + }, + { + "epoch": 0.14464168310322156, + "high_lr": 0.0009710526315789474, + "low_lr": 1.9421052631578948e-05, + "step": 55 + }, + { + "epoch": 0.14464168310322156, + "high_lr": 0.0009710526315789474, + "low_lr": 1.9421052631578948e-05, + "step": 55 + }, + { + "epoch": 0.14464168310322156, + "high_lr": 0.0009710526315789474, + "low_lr": 1.9421052631578948e-05, + "step": 55 + }, + { + "epoch": 0.14464168310322156, + "high_lr": 0.0009710526315789474, + "low_lr": 1.9421052631578948e-05, + "step": 55 + }, + { + "epoch": 0.1472715318869165, + "grad_norm": 0.5672211647033691, + "learning_rate": 0.0009705263157894737, + "loss": 2.0786, + "step": 56 + }, + { + "epoch": 0.1472715318869165, + "high_lr": 0.0009705263157894737, + "low_lr": 1.9410526315789476e-05, + "step": 56 + }, + { + "epoch": 0.1472715318869165, + "high_lr": 0.0009705263157894737, + "low_lr": 1.9410526315789476e-05, + "step": 56 + }, + { + "epoch": 0.1472715318869165, + "high_lr": 0.0009705263157894737, + "low_lr": 1.9410526315789476e-05, + "step": 56 + }, + { + "epoch": 0.1472715318869165, + "high_lr": 0.0009705263157894737, + "low_lr": 1.9410526315789476e-05, + "step": 56 + }, + { + "epoch": 0.1472715318869165, + "high_lr": 0.0009705263157894737, + "low_lr": 1.9410526315789476e-05, + "step": 56 + }, + { + "epoch": 0.1472715318869165, + "high_lr": 0.0009705263157894737, + "low_lr": 1.9410526315789476e-05, + "step": 56 + }, + { + "epoch": 0.1472715318869165, + "high_lr": 0.0009705263157894737, + "low_lr": 1.9410526315789476e-05, + "step": 56 + }, + { + "epoch": 0.1472715318869165, + "high_lr": 0.0009705263157894737, + "low_lr": 1.9410526315789476e-05, + "step": 56 + }, + { + "epoch": 0.14990138067061143, + "grad_norm": 0.5566748976707458, + "learning_rate": 0.0009699999999999999, + "loss": 2.0574, + "step": 57 + }, + { + "epoch": 0.14990138067061143, + "high_lr": 0.0009699999999999999, + "low_lr": 1.94e-05, + "step": 57 + }, + { + "epoch": 0.14990138067061143, + "high_lr": 0.0009699999999999999, + "low_lr": 1.94e-05, + "step": 57 + }, + { + "epoch": 0.14990138067061143, + "high_lr": 0.0009699999999999999, + "low_lr": 1.94e-05, + "step": 57 + }, + { + "epoch": 0.14990138067061143, + "high_lr": 0.0009699999999999999, + "low_lr": 1.94e-05, + "step": 57 + }, + { + "epoch": 0.14990138067061143, + "high_lr": 0.0009699999999999999, + "low_lr": 1.94e-05, + "step": 57 + }, + { + "epoch": 0.14990138067061143, + "high_lr": 0.0009699999999999999, + "low_lr": 1.94e-05, + "step": 57 + }, + { + "epoch": 0.14990138067061143, + "high_lr": 0.0009699999999999999, + "low_lr": 1.94e-05, + "step": 57 + }, + { + "epoch": 0.14990138067061143, + "high_lr": 0.0009699999999999999, + "low_lr": 1.94e-05, + "step": 57 + }, + { + "epoch": 0.1525312294543064, + "grad_norm": 0.49547362327575684, + "learning_rate": 0.0009694736842105263, + "loss": 2.0502, + "step": 58 + }, + { + "epoch": 0.1525312294543064, + "high_lr": 0.0009694736842105263, + "low_lr": 1.9389473684210525e-05, + "step": 58 + }, + { + "epoch": 0.1525312294543064, + "high_lr": 0.0009694736842105263, + "low_lr": 1.9389473684210525e-05, + "step": 58 + }, + { + "epoch": 0.1525312294543064, + "high_lr": 0.0009694736842105263, + "low_lr": 1.9389473684210525e-05, + "step": 58 + }, + { + "epoch": 0.1525312294543064, + "high_lr": 0.0009694736842105263, + "low_lr": 1.9389473684210525e-05, + "step": 58 + }, + { + "epoch": 0.1525312294543064, + "high_lr": 0.0009694736842105263, + "low_lr": 1.9389473684210525e-05, + "step": 58 + }, + { + "epoch": 0.1525312294543064, + "high_lr": 0.0009694736842105263, + "low_lr": 1.9389473684210525e-05, + "step": 58 + }, + { + "epoch": 0.1525312294543064, + "high_lr": 0.0009694736842105263, + "low_lr": 1.9389473684210525e-05, + "step": 58 + }, + { + "epoch": 0.1525312294543064, + "high_lr": 0.0009694736842105263, + "low_lr": 1.9389473684210525e-05, + "step": 58 + }, + { + "epoch": 0.15516107823800132, + "grad_norm": 0.6034396886825562, + "learning_rate": 0.0009689473684210527, + "loss": 2.0388, + "step": 59 + }, + { + "epoch": 0.15516107823800132, + "high_lr": 0.0009689473684210527, + "low_lr": 1.9378947368421053e-05, + "step": 59 + }, + { + "epoch": 0.15516107823800132, + "high_lr": 0.0009689473684210527, + "low_lr": 1.9378947368421053e-05, + "step": 59 + }, + { + "epoch": 0.15516107823800132, + "high_lr": 0.0009689473684210527, + "low_lr": 1.9378947368421053e-05, + "step": 59 + }, + { + "epoch": 0.15516107823800132, + "high_lr": 0.0009689473684210527, + "low_lr": 1.9378947368421053e-05, + "step": 59 + }, + { + "epoch": 0.15516107823800132, + "high_lr": 0.0009689473684210527, + "low_lr": 1.9378947368421053e-05, + "step": 59 + }, + { + "epoch": 0.15516107823800132, + "high_lr": 0.0009689473684210527, + "low_lr": 1.9378947368421053e-05, + "step": 59 + }, + { + "epoch": 0.15516107823800132, + "high_lr": 0.0009689473684210527, + "low_lr": 1.9378947368421053e-05, + "step": 59 + }, + { + "epoch": 0.15516107823800132, + "high_lr": 0.0009689473684210527, + "low_lr": 1.9378947368421053e-05, + "step": 59 + }, + { + "epoch": 0.15779092702169625, + "grad_norm": 0.5364948511123657, + "learning_rate": 0.000968421052631579, + "loss": 1.96, + "step": 60 + }, + { + "epoch": 0.15779092702169625, + "high_lr": 0.000968421052631579, + "low_lr": 1.936842105263158e-05, + "step": 60 + }, + { + "epoch": 0.15779092702169625, + "high_lr": 0.000968421052631579, + "low_lr": 1.936842105263158e-05, + "step": 60 + }, + { + "epoch": 0.15779092702169625, + "high_lr": 0.000968421052631579, + "low_lr": 1.936842105263158e-05, + "step": 60 + }, + { + "epoch": 0.15779092702169625, + "high_lr": 0.000968421052631579, + "low_lr": 1.936842105263158e-05, + "step": 60 + }, + { + "epoch": 0.15779092702169625, + "high_lr": 0.000968421052631579, + "low_lr": 1.936842105263158e-05, + "step": 60 + }, + { + "epoch": 0.15779092702169625, + "high_lr": 0.000968421052631579, + "low_lr": 1.936842105263158e-05, + "step": 60 + }, + { + "epoch": 0.15779092702169625, + "high_lr": 0.000968421052631579, + "low_lr": 1.936842105263158e-05, + "step": 60 + }, + { + "epoch": 0.15779092702169625, + "high_lr": 0.000968421052631579, + "low_lr": 1.936842105263158e-05, + "step": 60 + }, + { + "epoch": 0.16042077580539119, + "grad_norm": 0.5651670694351196, + "learning_rate": 0.0009678947368421053, + "loss": 1.9774, + "step": 61 + }, + { + "epoch": 0.16042077580539119, + "high_lr": 0.0009678947368421053, + "low_lr": 1.9357894736842106e-05, + "step": 61 + }, + { + "epoch": 0.16042077580539119, + "high_lr": 0.0009678947368421053, + "low_lr": 1.9357894736842106e-05, + "step": 61 + }, + { + "epoch": 0.16042077580539119, + "high_lr": 0.0009678947368421053, + "low_lr": 1.9357894736842106e-05, + "step": 61 + }, + { + "epoch": 0.16042077580539119, + "high_lr": 0.0009678947368421053, + "low_lr": 1.9357894736842106e-05, + "step": 61 + }, + { + "epoch": 0.16042077580539119, + "high_lr": 0.0009678947368421053, + "low_lr": 1.9357894736842106e-05, + "step": 61 + }, + { + "epoch": 0.16042077580539119, + "high_lr": 0.0009678947368421053, + "low_lr": 1.9357894736842106e-05, + "step": 61 + }, + { + "epoch": 0.16042077580539119, + "high_lr": 0.0009678947368421053, + "low_lr": 1.9357894736842106e-05, + "step": 61 + }, + { + "epoch": 0.16042077580539119, + "high_lr": 0.0009678947368421053, + "low_lr": 1.9357894736842106e-05, + "step": 61 + }, + { + "epoch": 0.16305062458908612, + "grad_norm": 0.5697059631347656, + "learning_rate": 0.0009673684210526316, + "loss": 2.0256, + "step": 62 + }, + { + "epoch": 0.16305062458908612, + "high_lr": 0.0009673684210526316, + "low_lr": 1.9347368421052634e-05, + "step": 62 + }, + { + "epoch": 0.16305062458908612, + "high_lr": 0.0009673684210526316, + "low_lr": 1.9347368421052634e-05, + "step": 62 + }, + { + "epoch": 0.16305062458908612, + "high_lr": 0.0009673684210526316, + "low_lr": 1.9347368421052634e-05, + "step": 62 + }, + { + "epoch": 0.16305062458908612, + "high_lr": 0.0009673684210526316, + "low_lr": 1.9347368421052634e-05, + "step": 62 + }, + { + "epoch": 0.16305062458908612, + "high_lr": 0.0009673684210526316, + "low_lr": 1.9347368421052634e-05, + "step": 62 + }, + { + "epoch": 0.16305062458908612, + "high_lr": 0.0009673684210526316, + "low_lr": 1.9347368421052634e-05, + "step": 62 + }, + { + "epoch": 0.16305062458908612, + "high_lr": 0.0009673684210526316, + "low_lr": 1.9347368421052634e-05, + "step": 62 + }, + { + "epoch": 0.16305062458908612, + "high_lr": 0.0009673684210526316, + "low_lr": 1.9347368421052634e-05, + "step": 62 + }, + { + "epoch": 0.16568047337278108, + "grad_norm": 0.6273848414421082, + "learning_rate": 0.000966842105263158, + "loss": 2.0194, + "step": 63 + }, + { + "epoch": 0.16568047337278108, + "high_lr": 0.000966842105263158, + "low_lr": 1.9336842105263162e-05, + "step": 63 + }, + { + "epoch": 0.16568047337278108, + "high_lr": 0.000966842105263158, + "low_lr": 1.9336842105263162e-05, + "step": 63 + }, + { + "epoch": 0.16568047337278108, + "high_lr": 0.000966842105263158, + "low_lr": 1.9336842105263162e-05, + "step": 63 + }, + { + "epoch": 0.16568047337278108, + "high_lr": 0.000966842105263158, + "low_lr": 1.9336842105263162e-05, + "step": 63 + }, + { + "epoch": 0.16568047337278108, + "high_lr": 0.000966842105263158, + "low_lr": 1.9336842105263162e-05, + "step": 63 + }, + { + "epoch": 0.16568047337278108, + "high_lr": 0.000966842105263158, + "low_lr": 1.9336842105263162e-05, + "step": 63 + }, + { + "epoch": 0.16568047337278108, + "high_lr": 0.000966842105263158, + "low_lr": 1.9336842105263162e-05, + "step": 63 + }, + { + "epoch": 0.16568047337278108, + "high_lr": 0.000966842105263158, + "low_lr": 1.9336842105263162e-05, + "step": 63 + }, + { + "epoch": 0.168310322156476, + "grad_norm": 0.6345831155776978, + "learning_rate": 0.0009663157894736843, + "loss": 2.0486, + "step": 64 + }, + { + "epoch": 0.168310322156476, + "high_lr": 0.0009663157894736843, + "low_lr": 1.9326315789473687e-05, + "step": 64 + }, + { + "epoch": 0.168310322156476, + "high_lr": 0.0009663157894736843, + "low_lr": 1.9326315789473687e-05, + "step": 64 + }, + { + "epoch": 0.168310322156476, + "high_lr": 0.0009663157894736843, + "low_lr": 1.9326315789473687e-05, + "step": 64 + }, + { + "epoch": 0.168310322156476, + "high_lr": 0.0009663157894736843, + "low_lr": 1.9326315789473687e-05, + "step": 64 + }, + { + "epoch": 0.168310322156476, + "high_lr": 0.0009663157894736843, + "low_lr": 1.9326315789473687e-05, + "step": 64 + }, + { + "epoch": 0.168310322156476, + "high_lr": 0.0009663157894736843, + "low_lr": 1.9326315789473687e-05, + "step": 64 + }, + { + "epoch": 0.168310322156476, + "high_lr": 0.0009663157894736843, + "low_lr": 1.9326315789473687e-05, + "step": 64 + }, + { + "epoch": 0.168310322156476, + "high_lr": 0.0009663157894736843, + "low_lr": 1.9326315789473687e-05, + "step": 64 + }, + { + "epoch": 0.17094017094017094, + "grad_norm": 0.5673817992210388, + "learning_rate": 0.0009657894736842106, + "loss": 2.0464, + "step": 65 + }, + { + "epoch": 0.17094017094017094, + "high_lr": 0.0009657894736842106, + "low_lr": 1.931578947368421e-05, + "step": 65 + }, + { + "epoch": 0.17094017094017094, + "high_lr": 0.0009657894736842106, + "low_lr": 1.931578947368421e-05, + "step": 65 + }, + { + "epoch": 0.17094017094017094, + "high_lr": 0.0009657894736842106, + "low_lr": 1.931578947368421e-05, + "step": 65 + }, + { + "epoch": 0.17094017094017094, + "high_lr": 0.0009657894736842106, + "low_lr": 1.931578947368421e-05, + "step": 65 + }, + { + "epoch": 0.17094017094017094, + "high_lr": 0.0009657894736842106, + "low_lr": 1.931578947368421e-05, + "step": 65 + }, + { + "epoch": 0.17094017094017094, + "high_lr": 0.0009657894736842106, + "low_lr": 1.931578947368421e-05, + "step": 65 + }, + { + "epoch": 0.17094017094017094, + "high_lr": 0.0009657894736842106, + "low_lr": 1.931578947368421e-05, + "step": 65 + }, + { + "epoch": 0.17094017094017094, + "high_lr": 0.0009657894736842106, + "low_lr": 1.931578947368421e-05, + "step": 65 + }, + { + "epoch": 0.17357001972386588, + "grad_norm": 0.5592147707939148, + "learning_rate": 0.0009652631578947368, + "loss": 2.0164, + "step": 66 + }, + { + "epoch": 0.17357001972386588, + "high_lr": 0.0009652631578947368, + "low_lr": 1.930526315789474e-05, + "step": 66 + }, + { + "epoch": 0.17357001972386588, + "high_lr": 0.0009652631578947368, + "low_lr": 1.930526315789474e-05, + "step": 66 + }, + { + "epoch": 0.17357001972386588, + "high_lr": 0.0009652631578947368, + "low_lr": 1.930526315789474e-05, + "step": 66 + }, + { + "epoch": 0.17357001972386588, + "high_lr": 0.0009652631578947368, + "low_lr": 1.930526315789474e-05, + "step": 66 + }, + { + "epoch": 0.17357001972386588, + "high_lr": 0.0009652631578947368, + "low_lr": 1.930526315789474e-05, + "step": 66 + }, + { + "epoch": 0.17357001972386588, + "high_lr": 0.0009652631578947368, + "low_lr": 1.930526315789474e-05, + "step": 66 + }, + { + "epoch": 0.17357001972386588, + "high_lr": 0.0009652631578947368, + "low_lr": 1.930526315789474e-05, + "step": 66 + }, + { + "epoch": 0.17357001972386588, + "high_lr": 0.0009652631578947368, + "low_lr": 1.930526315789474e-05, + "step": 66 + }, + { + "epoch": 0.1761998685075608, + "grad_norm": 0.5632216930389404, + "learning_rate": 0.0009647368421052631, + "loss": 1.9588, + "step": 67 + }, + { + "epoch": 0.1761998685075608, + "high_lr": 0.0009647368421052631, + "low_lr": 1.9294736842105264e-05, + "step": 67 + }, + { + "epoch": 0.1761998685075608, + "high_lr": 0.0009647368421052631, + "low_lr": 1.9294736842105264e-05, + "step": 67 + }, + { + "epoch": 0.1761998685075608, + "high_lr": 0.0009647368421052631, + "low_lr": 1.9294736842105264e-05, + "step": 67 + }, + { + "epoch": 0.1761998685075608, + "high_lr": 0.0009647368421052631, + "low_lr": 1.9294736842105264e-05, + "step": 67 + }, + { + "epoch": 0.1761998685075608, + "high_lr": 0.0009647368421052631, + "low_lr": 1.9294736842105264e-05, + "step": 67 + }, + { + "epoch": 0.1761998685075608, + "high_lr": 0.0009647368421052631, + "low_lr": 1.9294736842105264e-05, + "step": 67 + }, + { + "epoch": 0.1761998685075608, + "high_lr": 0.0009647368421052631, + "low_lr": 1.9294736842105264e-05, + "step": 67 + }, + { + "epoch": 0.1761998685075608, + "high_lr": 0.0009647368421052631, + "low_lr": 1.9294736842105264e-05, + "step": 67 + }, + { + "epoch": 0.17882971729125574, + "grad_norm": 0.5781631469726562, + "learning_rate": 0.0009642105263157895, + "loss": 1.9856, + "step": 68 + }, + { + "epoch": 0.17882971729125574, + "high_lr": 0.0009642105263157895, + "low_lr": 1.9284210526315792e-05, + "step": 68 + }, + { + "epoch": 0.17882971729125574, + "high_lr": 0.0009642105263157895, + "low_lr": 1.9284210526315792e-05, + "step": 68 + }, + { + "epoch": 0.17882971729125574, + "high_lr": 0.0009642105263157895, + "low_lr": 1.9284210526315792e-05, + "step": 68 + }, + { + "epoch": 0.17882971729125574, + "high_lr": 0.0009642105263157895, + "low_lr": 1.9284210526315792e-05, + "step": 68 + }, + { + "epoch": 0.17882971729125574, + "high_lr": 0.0009642105263157895, + "low_lr": 1.9284210526315792e-05, + "step": 68 + }, + { + "epoch": 0.17882971729125574, + "high_lr": 0.0009642105263157895, + "low_lr": 1.9284210526315792e-05, + "step": 68 + }, + { + "epoch": 0.17882971729125574, + "high_lr": 0.0009642105263157895, + "low_lr": 1.9284210526315792e-05, + "step": 68 + }, + { + "epoch": 0.17882971729125574, + "high_lr": 0.0009642105263157895, + "low_lr": 1.9284210526315792e-05, + "step": 68 + }, + { + "epoch": 0.1814595660749507, + "grad_norm": 0.509727418422699, + "learning_rate": 0.0009636842105263158, + "loss": 1.8842, + "step": 69 + }, + { + "epoch": 0.1814595660749507, + "high_lr": 0.0009636842105263158, + "low_lr": 1.9273684210526317e-05, + "step": 69 + }, + { + "epoch": 0.1814595660749507, + "high_lr": 0.0009636842105263158, + "low_lr": 1.9273684210526317e-05, + "step": 69 + }, + { + "epoch": 0.1814595660749507, + "high_lr": 0.0009636842105263158, + "low_lr": 1.9273684210526317e-05, + "step": 69 + }, + { + "epoch": 0.1814595660749507, + "high_lr": 0.0009636842105263158, + "low_lr": 1.9273684210526317e-05, + "step": 69 + }, + { + "epoch": 0.1814595660749507, + "high_lr": 0.0009636842105263158, + "low_lr": 1.9273684210526317e-05, + "step": 69 + }, + { + "epoch": 0.1814595660749507, + "high_lr": 0.0009636842105263158, + "low_lr": 1.9273684210526317e-05, + "step": 69 + }, + { + "epoch": 0.1814595660749507, + "high_lr": 0.0009636842105263158, + "low_lr": 1.9273684210526317e-05, + "step": 69 + }, + { + "epoch": 0.1814595660749507, + "high_lr": 0.0009636842105263158, + "low_lr": 1.9273684210526317e-05, + "step": 69 + }, + { + "epoch": 0.18408941485864563, + "grad_norm": 1.9063596725463867, + "learning_rate": 0.0009631578947368421, + "loss": 2.0654, + "step": 70 + }, + { + "epoch": 0.18408941485864563, + "high_lr": 0.0009631578947368421, + "low_lr": 1.9263157894736845e-05, + "step": 70 + }, + { + "epoch": 0.18408941485864563, + "high_lr": 0.0009631578947368421, + "low_lr": 1.9263157894736845e-05, + "step": 70 + }, + { + "epoch": 0.18408941485864563, + "high_lr": 0.0009631578947368421, + "low_lr": 1.9263157894736845e-05, + "step": 70 + }, + { + "epoch": 0.18408941485864563, + "high_lr": 0.0009631578947368421, + "low_lr": 1.9263157894736845e-05, + "step": 70 + }, + { + "epoch": 0.18408941485864563, + "high_lr": 0.0009631578947368421, + "low_lr": 1.9263157894736845e-05, + "step": 70 + }, + { + "epoch": 0.18408941485864563, + "high_lr": 0.0009631578947368421, + "low_lr": 1.9263157894736845e-05, + "step": 70 + }, + { + "epoch": 0.18408941485864563, + "high_lr": 0.0009631578947368421, + "low_lr": 1.9263157894736845e-05, + "step": 70 + }, + { + "epoch": 0.18408941485864563, + "high_lr": 0.0009631578947368421, + "low_lr": 1.9263157894736845e-05, + "step": 70 + }, + { + "epoch": 0.18671926364234057, + "grad_norm": 0.6156869530677795, + "learning_rate": 0.0009626315789473684, + "loss": 2.0458, + "step": 71 + }, + { + "epoch": 0.18671926364234057, + "high_lr": 0.0009626315789473684, + "low_lr": 1.925263157894737e-05, + "step": 71 + }, + { + "epoch": 0.18671926364234057, + "high_lr": 0.0009626315789473684, + "low_lr": 1.925263157894737e-05, + "step": 71 + }, + { + "epoch": 0.18671926364234057, + "high_lr": 0.0009626315789473684, + "low_lr": 1.925263157894737e-05, + "step": 71 + }, + { + "epoch": 0.18671926364234057, + "high_lr": 0.0009626315789473684, + "low_lr": 1.925263157894737e-05, + "step": 71 + }, + { + "epoch": 0.18671926364234057, + "high_lr": 0.0009626315789473684, + "low_lr": 1.925263157894737e-05, + "step": 71 + }, + { + "epoch": 0.18671926364234057, + "high_lr": 0.0009626315789473684, + "low_lr": 1.925263157894737e-05, + "step": 71 + }, + { + "epoch": 0.18671926364234057, + "high_lr": 0.0009626315789473684, + "low_lr": 1.925263157894737e-05, + "step": 71 + }, + { + "epoch": 0.18671926364234057, + "high_lr": 0.0009626315789473684, + "low_lr": 1.925263157894737e-05, + "step": 71 + }, + { + "epoch": 0.1893491124260355, + "grad_norm": 0.590178906917572, + "learning_rate": 0.0009621052631578947, + "loss": 1.954, + "step": 72 + }, + { + "epoch": 0.1893491124260355, + "high_lr": 0.0009621052631578947, + "low_lr": 1.9242105263157894e-05, + "step": 72 + }, + { + "epoch": 0.1893491124260355, + "high_lr": 0.0009621052631578947, + "low_lr": 1.9242105263157894e-05, + "step": 72 + }, + { + "epoch": 0.1893491124260355, + "high_lr": 0.0009621052631578947, + "low_lr": 1.9242105263157894e-05, + "step": 72 + }, + { + "epoch": 0.1893491124260355, + "high_lr": 0.0009621052631578947, + "low_lr": 1.9242105263157894e-05, + "step": 72 + }, + { + "epoch": 0.1893491124260355, + "high_lr": 0.0009621052631578947, + "low_lr": 1.9242105263157894e-05, + "step": 72 + }, + { + "epoch": 0.1893491124260355, + "high_lr": 0.0009621052631578947, + "low_lr": 1.9242105263157894e-05, + "step": 72 + }, + { + "epoch": 0.1893491124260355, + "high_lr": 0.0009621052631578947, + "low_lr": 1.9242105263157894e-05, + "step": 72 + }, + { + "epoch": 0.1893491124260355, + "high_lr": 0.0009621052631578947, + "low_lr": 1.9242105263157894e-05, + "step": 72 + }, + { + "epoch": 0.19197896120973043, + "grad_norm": 0.6381607055664062, + "learning_rate": 0.0009615789473684211, + "loss": 1.982, + "step": 73 + }, + { + "epoch": 0.19197896120973043, + "high_lr": 0.0009615789473684211, + "low_lr": 1.9231578947368422e-05, + "step": 73 + }, + { + "epoch": 0.19197896120973043, + "high_lr": 0.0009615789473684211, + "low_lr": 1.9231578947368422e-05, + "step": 73 + }, + { + "epoch": 0.19197896120973043, + "high_lr": 0.0009615789473684211, + "low_lr": 1.9231578947368422e-05, + "step": 73 + }, + { + "epoch": 0.19197896120973043, + "high_lr": 0.0009615789473684211, + "low_lr": 1.9231578947368422e-05, + "step": 73 + }, + { + "epoch": 0.19197896120973043, + "high_lr": 0.0009615789473684211, + "low_lr": 1.9231578947368422e-05, + "step": 73 + }, + { + "epoch": 0.19197896120973043, + "high_lr": 0.0009615789473684211, + "low_lr": 1.9231578947368422e-05, + "step": 73 + }, + { + "epoch": 0.19197896120973043, + "high_lr": 0.0009615789473684211, + "low_lr": 1.9231578947368422e-05, + "step": 73 + }, + { + "epoch": 0.19197896120973043, + "high_lr": 0.0009615789473684211, + "low_lr": 1.9231578947368422e-05, + "step": 73 + }, + { + "epoch": 0.1946088099934254, + "grad_norm": 0.6437403559684753, + "learning_rate": 0.0009610526315789475, + "loss": 1.9463, + "step": 74 + }, + { + "epoch": 0.1946088099934254, + "high_lr": 0.0009610526315789475, + "low_lr": 1.922105263157895e-05, + "step": 74 + }, + { + "epoch": 0.1946088099934254, + "high_lr": 0.0009610526315789475, + "low_lr": 1.922105263157895e-05, + "step": 74 + }, + { + "epoch": 0.1946088099934254, + "high_lr": 0.0009610526315789475, + "low_lr": 1.922105263157895e-05, + "step": 74 + }, + { + "epoch": 0.1946088099934254, + "high_lr": 0.0009610526315789475, + "low_lr": 1.922105263157895e-05, + "step": 74 + }, + { + "epoch": 0.1946088099934254, + "high_lr": 0.0009610526315789475, + "low_lr": 1.922105263157895e-05, + "step": 74 + }, + { + "epoch": 0.1946088099934254, + "high_lr": 0.0009610526315789475, + "low_lr": 1.922105263157895e-05, + "step": 74 + }, + { + "epoch": 0.1946088099934254, + "high_lr": 0.0009610526315789475, + "low_lr": 1.922105263157895e-05, + "step": 74 + }, + { + "epoch": 0.1946088099934254, + "high_lr": 0.0009610526315789475, + "low_lr": 1.922105263157895e-05, + "step": 74 + }, + { + "epoch": 0.19723865877712032, + "grad_norm": 0.6442136168479919, + "learning_rate": 0.0009605263157894737, + "loss": 2.0247, + "step": 75 + }, + { + "epoch": 0.19723865877712032, + "high_lr": 0.0009605263157894737, + "low_lr": 1.9210526315789474e-05, + "step": 75 + }, + { + "epoch": 0.19723865877712032, + "high_lr": 0.0009605263157894737, + "low_lr": 1.9210526315789474e-05, + "step": 75 + }, + { + "epoch": 0.19723865877712032, + "high_lr": 0.0009605263157894737, + "low_lr": 1.9210526315789474e-05, + "step": 75 + }, + { + "epoch": 0.19723865877712032, + "high_lr": 0.0009605263157894737, + "low_lr": 1.9210526315789474e-05, + "step": 75 + }, + { + "epoch": 0.19723865877712032, + "high_lr": 0.0009605263157894737, + "low_lr": 1.9210526315789474e-05, + "step": 75 + }, + { + "epoch": 0.19723865877712032, + "high_lr": 0.0009605263157894737, + "low_lr": 1.9210526315789474e-05, + "step": 75 + }, + { + "epoch": 0.19723865877712032, + "high_lr": 0.0009605263157894737, + "low_lr": 1.9210526315789474e-05, + "step": 75 + }, + { + "epoch": 0.19723865877712032, + "high_lr": 0.0009605263157894737, + "low_lr": 1.9210526315789474e-05, + "step": 75 + }, + { + "epoch": 0.19986850756081526, + "grad_norm": 0.775407612323761, + "learning_rate": 0.00096, + "loss": 2.0624, + "step": 76 + }, + { + "epoch": 0.19986850756081526, + "high_lr": 0.00096, + "low_lr": 1.9200000000000003e-05, + "step": 76 + }, + { + "epoch": 0.19986850756081526, + "high_lr": 0.00096, + "low_lr": 1.9200000000000003e-05, + "step": 76 + }, + { + "epoch": 0.19986850756081526, + "high_lr": 0.00096, + "low_lr": 1.9200000000000003e-05, + "step": 76 + }, + { + "epoch": 0.19986850756081526, + "high_lr": 0.00096, + "low_lr": 1.9200000000000003e-05, + "step": 76 + }, + { + "epoch": 0.19986850756081526, + "high_lr": 0.00096, + "low_lr": 1.9200000000000003e-05, + "step": 76 + }, + { + "epoch": 0.19986850756081526, + "high_lr": 0.00096, + "low_lr": 1.9200000000000003e-05, + "step": 76 + }, + { + "epoch": 0.19986850756081526, + "high_lr": 0.00096, + "low_lr": 1.9200000000000003e-05, + "step": 76 + }, + { + "epoch": 0.19986850756081526, + "high_lr": 0.00096, + "low_lr": 1.9200000000000003e-05, + "step": 76 + }, + { + "epoch": 0.2024983563445102, + "grad_norm": 0.6264069080352783, + "learning_rate": 0.0009594736842105264, + "loss": 1.9074, + "step": 77 + }, + { + "epoch": 0.2024983563445102, + "high_lr": 0.0009594736842105264, + "low_lr": 1.918947368421053e-05, + "step": 77 + }, + { + "epoch": 0.2024983563445102, + "high_lr": 0.0009594736842105264, + "low_lr": 1.918947368421053e-05, + "step": 77 + }, + { + "epoch": 0.2024983563445102, + "high_lr": 0.0009594736842105264, + "low_lr": 1.918947368421053e-05, + "step": 77 + }, + { + "epoch": 0.2024983563445102, + "high_lr": 0.0009594736842105264, + "low_lr": 1.918947368421053e-05, + "step": 77 + }, + { + "epoch": 0.2024983563445102, + "high_lr": 0.0009594736842105264, + "low_lr": 1.918947368421053e-05, + "step": 77 + }, + { + "epoch": 0.2024983563445102, + "high_lr": 0.0009594736842105264, + "low_lr": 1.918947368421053e-05, + "step": 77 + }, + { + "epoch": 0.2024983563445102, + "high_lr": 0.0009594736842105264, + "low_lr": 1.918947368421053e-05, + "step": 77 + }, + { + "epoch": 0.2024983563445102, + "high_lr": 0.0009594736842105264, + "low_lr": 1.918947368421053e-05, + "step": 77 + }, + { + "epoch": 0.20512820512820512, + "grad_norm": 0.6018377542495728, + "learning_rate": 0.0009589473684210527, + "loss": 1.9637, + "step": 78 + }, + { + "epoch": 0.20512820512820512, + "high_lr": 0.0009589473684210527, + "low_lr": 1.9178947368421055e-05, + "step": 78 + }, + { + "epoch": 0.20512820512820512, + "high_lr": 0.0009589473684210527, + "low_lr": 1.9178947368421055e-05, + "step": 78 + }, + { + "epoch": 0.20512820512820512, + "high_lr": 0.0009589473684210527, + "low_lr": 1.9178947368421055e-05, + "step": 78 + }, + { + "epoch": 0.20512820512820512, + "high_lr": 0.0009589473684210527, + "low_lr": 1.9178947368421055e-05, + "step": 78 + }, + { + "epoch": 0.20512820512820512, + "high_lr": 0.0009589473684210527, + "low_lr": 1.9178947368421055e-05, + "step": 78 + }, + { + "epoch": 0.20512820512820512, + "high_lr": 0.0009589473684210527, + "low_lr": 1.9178947368421055e-05, + "step": 78 + }, + { + "epoch": 0.20512820512820512, + "high_lr": 0.0009589473684210527, + "low_lr": 1.9178947368421055e-05, + "step": 78 + }, + { + "epoch": 0.20512820512820512, + "high_lr": 0.0009589473684210527, + "low_lr": 1.9178947368421055e-05, + "step": 78 + }, + { + "epoch": 0.20775805391190005, + "grad_norm": 0.5876686573028564, + "learning_rate": 0.000958421052631579, + "loss": 2.0186, + "step": 79 + }, + { + "epoch": 0.20775805391190005, + "high_lr": 0.000958421052631579, + "low_lr": 1.916842105263158e-05, + "step": 79 + }, + { + "epoch": 0.20775805391190005, + "high_lr": 0.000958421052631579, + "low_lr": 1.916842105263158e-05, + "step": 79 + }, + { + "epoch": 0.20775805391190005, + "high_lr": 0.000958421052631579, + "low_lr": 1.916842105263158e-05, + "step": 79 + }, + { + "epoch": 0.20775805391190005, + "high_lr": 0.000958421052631579, + "low_lr": 1.916842105263158e-05, + "step": 79 + }, + { + "epoch": 0.20775805391190005, + "high_lr": 0.000958421052631579, + "low_lr": 1.916842105263158e-05, + "step": 79 + }, + { + "epoch": 0.20775805391190005, + "high_lr": 0.000958421052631579, + "low_lr": 1.916842105263158e-05, + "step": 79 + }, + { + "epoch": 0.20775805391190005, + "high_lr": 0.000958421052631579, + "low_lr": 1.916842105263158e-05, + "step": 79 + }, + { + "epoch": 0.20775805391190005, + "high_lr": 0.000958421052631579, + "low_lr": 1.916842105263158e-05, + "step": 79 + }, + { + "epoch": 0.210387902695595, + "grad_norm": 0.6686984300613403, + "learning_rate": 0.0009578947368421053, + "loss": 2.0095, + "step": 80 + }, + { + "epoch": 0.210387902695595, + "high_lr": 0.0009578947368421053, + "low_lr": 1.9157894736842108e-05, + "step": 80 + }, + { + "epoch": 0.210387902695595, + "high_lr": 0.0009578947368421053, + "low_lr": 1.9157894736842108e-05, + "step": 80 + }, + { + "epoch": 0.210387902695595, + "high_lr": 0.0009578947368421053, + "low_lr": 1.9157894736842108e-05, + "step": 80 + }, + { + "epoch": 0.210387902695595, + "high_lr": 0.0009578947368421053, + "low_lr": 1.9157894736842108e-05, + "step": 80 + }, + { + "epoch": 0.210387902695595, + "high_lr": 0.0009578947368421053, + "low_lr": 1.9157894736842108e-05, + "step": 80 + }, + { + "epoch": 0.210387902695595, + "high_lr": 0.0009578947368421053, + "low_lr": 1.9157894736842108e-05, + "step": 80 + }, + { + "epoch": 0.210387902695595, + "high_lr": 0.0009578947368421053, + "low_lr": 1.9157894736842108e-05, + "step": 80 + }, + { + "epoch": 0.210387902695595, + "high_lr": 0.0009578947368421053, + "low_lr": 1.9157894736842108e-05, + "step": 80 + }, + { + "epoch": 0.21301775147928995, + "grad_norm": 0.5342804193496704, + "learning_rate": 0.0009573684210526316, + "loss": 1.9288, + "step": 81 + }, + { + "epoch": 0.21301775147928995, + "high_lr": 0.0009573684210526316, + "low_lr": 1.9147368421052632e-05, + "step": 81 + }, + { + "epoch": 0.21301775147928995, + "high_lr": 0.0009573684210526316, + "low_lr": 1.9147368421052632e-05, + "step": 81 + }, + { + "epoch": 0.21301775147928995, + "high_lr": 0.0009573684210526316, + "low_lr": 1.9147368421052632e-05, + "step": 81 + }, + { + "epoch": 0.21301775147928995, + "high_lr": 0.0009573684210526316, + "low_lr": 1.9147368421052632e-05, + "step": 81 + }, + { + "epoch": 0.21301775147928995, + "high_lr": 0.0009573684210526316, + "low_lr": 1.9147368421052632e-05, + "step": 81 + }, + { + "epoch": 0.21301775147928995, + "high_lr": 0.0009573684210526316, + "low_lr": 1.9147368421052632e-05, + "step": 81 + }, + { + "epoch": 0.21301775147928995, + "high_lr": 0.0009573684210526316, + "low_lr": 1.9147368421052632e-05, + "step": 81 + }, + { + "epoch": 0.21301775147928995, + "high_lr": 0.0009573684210526316, + "low_lr": 1.9147368421052632e-05, + "step": 81 + }, + { + "epoch": 0.21564760026298488, + "grad_norm": 0.636827826499939, + "learning_rate": 0.000956842105263158, + "loss": 1.9725, + "step": 82 + }, + { + "epoch": 0.21564760026298488, + "high_lr": 0.000956842105263158, + "low_lr": 1.913684210526316e-05, + "step": 82 + }, + { + "epoch": 0.21564760026298488, + "high_lr": 0.000956842105263158, + "low_lr": 1.913684210526316e-05, + "step": 82 + }, + { + "epoch": 0.21564760026298488, + "high_lr": 0.000956842105263158, + "low_lr": 1.913684210526316e-05, + "step": 82 + }, + { + "epoch": 0.21564760026298488, + "high_lr": 0.000956842105263158, + "low_lr": 1.913684210526316e-05, + "step": 82 + }, + { + "epoch": 0.21564760026298488, + "high_lr": 0.000956842105263158, + "low_lr": 1.913684210526316e-05, + "step": 82 + }, + { + "epoch": 0.21564760026298488, + "high_lr": 0.000956842105263158, + "low_lr": 1.913684210526316e-05, + "step": 82 + }, + { + "epoch": 0.21564760026298488, + "high_lr": 0.000956842105263158, + "low_lr": 1.913684210526316e-05, + "step": 82 + }, + { + "epoch": 0.21564760026298488, + "high_lr": 0.000956842105263158, + "low_lr": 1.913684210526316e-05, + "step": 82 + }, + { + "epoch": 0.2182774490466798, + "grad_norm": 0.6838151216506958, + "learning_rate": 0.0009563157894736842, + "loss": 2.0263, + "step": 83 + }, + { + "epoch": 0.2182774490466798, + "high_lr": 0.0009563157894736842, + "low_lr": 1.9126315789473685e-05, + "step": 83 + }, + { + "epoch": 0.2182774490466798, + "high_lr": 0.0009563157894736842, + "low_lr": 1.9126315789473685e-05, + "step": 83 + }, + { + "epoch": 0.2182774490466798, + "high_lr": 0.0009563157894736842, + "low_lr": 1.9126315789473685e-05, + "step": 83 + }, + { + "epoch": 0.2182774490466798, + "high_lr": 0.0009563157894736842, + "low_lr": 1.9126315789473685e-05, + "step": 83 + }, + { + "epoch": 0.2182774490466798, + "high_lr": 0.0009563157894736842, + "low_lr": 1.9126315789473685e-05, + "step": 83 + }, + { + "epoch": 0.2182774490466798, + "high_lr": 0.0009563157894736842, + "low_lr": 1.9126315789473685e-05, + "step": 83 + }, + { + "epoch": 0.2182774490466798, + "high_lr": 0.0009563157894736842, + "low_lr": 1.9126315789473685e-05, + "step": 83 + }, + { + "epoch": 0.2182774490466798, + "high_lr": 0.0009563157894736842, + "low_lr": 1.9126315789473685e-05, + "step": 83 + }, + { + "epoch": 0.22090729783037474, + "grad_norm": 0.5947110056877136, + "learning_rate": 0.0009557894736842105, + "loss": 1.9359, + "step": 84 + }, + { + "epoch": 0.22090729783037474, + "high_lr": 0.0009557894736842105, + "low_lr": 1.9115789473684213e-05, + "step": 84 + }, + { + "epoch": 0.22090729783037474, + "high_lr": 0.0009557894736842105, + "low_lr": 1.9115789473684213e-05, + "step": 84 + }, + { + "epoch": 0.22090729783037474, + "high_lr": 0.0009557894736842105, + "low_lr": 1.9115789473684213e-05, + "step": 84 + }, + { + "epoch": 0.22090729783037474, + "high_lr": 0.0009557894736842105, + "low_lr": 1.9115789473684213e-05, + "step": 84 + }, + { + "epoch": 0.22090729783037474, + "high_lr": 0.0009557894736842105, + "low_lr": 1.9115789473684213e-05, + "step": 84 + }, + { + "epoch": 0.22090729783037474, + "high_lr": 0.0009557894736842105, + "low_lr": 1.9115789473684213e-05, + "step": 84 + }, + { + "epoch": 0.22090729783037474, + "high_lr": 0.0009557894736842105, + "low_lr": 1.9115789473684213e-05, + "step": 84 + }, + { + "epoch": 0.22090729783037474, + "high_lr": 0.0009557894736842105, + "low_lr": 1.9115789473684213e-05, + "step": 84 + }, + { + "epoch": 0.2235371466140697, + "grad_norm": 0.6437410116195679, + "learning_rate": 0.0009552631578947368, + "loss": 1.9238, + "step": 85 + }, + { + "epoch": 0.2235371466140697, + "high_lr": 0.0009552631578947368, + "low_lr": 1.9105263157894738e-05, + "step": 85 + }, + { + "epoch": 0.2235371466140697, + "high_lr": 0.0009552631578947368, + "low_lr": 1.9105263157894738e-05, + "step": 85 + }, + { + "epoch": 0.2235371466140697, + "high_lr": 0.0009552631578947368, + "low_lr": 1.9105263157894738e-05, + "step": 85 + }, + { + "epoch": 0.2235371466140697, + "high_lr": 0.0009552631578947368, + "low_lr": 1.9105263157894738e-05, + "step": 85 + }, + { + "epoch": 0.2235371466140697, + "high_lr": 0.0009552631578947368, + "low_lr": 1.9105263157894738e-05, + "step": 85 + }, + { + "epoch": 0.2235371466140697, + "high_lr": 0.0009552631578947368, + "low_lr": 1.9105263157894738e-05, + "step": 85 + }, + { + "epoch": 0.2235371466140697, + "high_lr": 0.0009552631578947368, + "low_lr": 1.9105263157894738e-05, + "step": 85 + }, + { + "epoch": 0.2235371466140697, + "high_lr": 0.0009552631578947368, + "low_lr": 1.9105263157894738e-05, + "step": 85 + }, + { + "epoch": 0.22616699539776464, + "grad_norm": 0.6488404273986816, + "learning_rate": 0.0009547368421052631, + "loss": 1.9571, + "step": 86 + }, + { + "epoch": 0.22616699539776464, + "high_lr": 0.0009547368421052631, + "low_lr": 1.9094736842105262e-05, + "step": 86 + }, + { + "epoch": 0.22616699539776464, + "high_lr": 0.0009547368421052631, + "low_lr": 1.9094736842105262e-05, + "step": 86 + }, + { + "epoch": 0.22616699539776464, + "high_lr": 0.0009547368421052631, + "low_lr": 1.9094736842105262e-05, + "step": 86 + }, + { + "epoch": 0.22616699539776464, + "high_lr": 0.0009547368421052631, + "low_lr": 1.9094736842105262e-05, + "step": 86 + }, + { + "epoch": 0.22616699539776464, + "high_lr": 0.0009547368421052631, + "low_lr": 1.9094736842105262e-05, + "step": 86 + }, + { + "epoch": 0.22616699539776464, + "high_lr": 0.0009547368421052631, + "low_lr": 1.9094736842105262e-05, + "step": 86 + }, + { + "epoch": 0.22616699539776464, + "high_lr": 0.0009547368421052631, + "low_lr": 1.9094736842105262e-05, + "step": 86 + }, + { + "epoch": 0.22616699539776464, + "high_lr": 0.0009547368421052631, + "low_lr": 1.9094736842105262e-05, + "step": 86 + }, + { + "epoch": 0.22879684418145957, + "grad_norm": 0.6692899465560913, + "learning_rate": 0.0009542105263157895, + "loss": 2.0267, + "step": 87 + }, + { + "epoch": 0.22879684418145957, + "high_lr": 0.0009542105263157895, + "low_lr": 1.908421052631579e-05, + "step": 87 + }, + { + "epoch": 0.22879684418145957, + "high_lr": 0.0009542105263157895, + "low_lr": 1.908421052631579e-05, + "step": 87 + }, + { + "epoch": 0.22879684418145957, + "high_lr": 0.0009542105263157895, + "low_lr": 1.908421052631579e-05, + "step": 87 + }, + { + "epoch": 0.22879684418145957, + "high_lr": 0.0009542105263157895, + "low_lr": 1.908421052631579e-05, + "step": 87 + }, + { + "epoch": 0.22879684418145957, + "high_lr": 0.0009542105263157895, + "low_lr": 1.908421052631579e-05, + "step": 87 + }, + { + "epoch": 0.22879684418145957, + "high_lr": 0.0009542105263157895, + "low_lr": 1.908421052631579e-05, + "step": 87 + }, + { + "epoch": 0.22879684418145957, + "high_lr": 0.0009542105263157895, + "low_lr": 1.908421052631579e-05, + "step": 87 + }, + { + "epoch": 0.22879684418145957, + "high_lr": 0.0009542105263157895, + "low_lr": 1.908421052631579e-05, + "step": 87 + }, + { + "epoch": 0.2314266929651545, + "grad_norm": 0.6169629693031311, + "learning_rate": 0.0009536842105263158, + "loss": 1.9545, + "step": 88 + }, + { + "epoch": 0.2314266929651545, + "high_lr": 0.0009536842105263158, + "low_lr": 1.907368421052632e-05, + "step": 88 + }, + { + "epoch": 0.2314266929651545, + "high_lr": 0.0009536842105263158, + "low_lr": 1.907368421052632e-05, + "step": 88 + }, + { + "epoch": 0.2314266929651545, + "high_lr": 0.0009536842105263158, + "low_lr": 1.907368421052632e-05, + "step": 88 + }, + { + "epoch": 0.2314266929651545, + "high_lr": 0.0009536842105263158, + "low_lr": 1.907368421052632e-05, + "step": 88 + }, + { + "epoch": 0.2314266929651545, + "high_lr": 0.0009536842105263158, + "low_lr": 1.907368421052632e-05, + "step": 88 + }, + { + "epoch": 0.2314266929651545, + "high_lr": 0.0009536842105263158, + "low_lr": 1.907368421052632e-05, + "step": 88 + }, + { + "epoch": 0.2314266929651545, + "high_lr": 0.0009536842105263158, + "low_lr": 1.907368421052632e-05, + "step": 88 + }, + { + "epoch": 0.2314266929651545, + "high_lr": 0.0009536842105263158, + "low_lr": 1.907368421052632e-05, + "step": 88 + }, + { + "epoch": 0.23405654174884943, + "grad_norm": 0.587587296962738, + "learning_rate": 0.0009531578947368421, + "loss": 1.8974, + "step": 89 + }, + { + "epoch": 0.23405654174884943, + "high_lr": 0.0009531578947368421, + "low_lr": 1.9063157894736843e-05, + "step": 89 + }, + { + "epoch": 0.23405654174884943, + "high_lr": 0.0009531578947368421, + "low_lr": 1.9063157894736843e-05, + "step": 89 + }, + { + "epoch": 0.23405654174884943, + "high_lr": 0.0009531578947368421, + "low_lr": 1.9063157894736843e-05, + "step": 89 + }, + { + "epoch": 0.23405654174884943, + "high_lr": 0.0009531578947368421, + "low_lr": 1.9063157894736843e-05, + "step": 89 + }, + { + "epoch": 0.23405654174884943, + "high_lr": 0.0009531578947368421, + "low_lr": 1.9063157894736843e-05, + "step": 89 + }, + { + "epoch": 0.23405654174884943, + "high_lr": 0.0009531578947368421, + "low_lr": 1.9063157894736843e-05, + "step": 89 + }, + { + "epoch": 0.23405654174884943, + "high_lr": 0.0009531578947368421, + "low_lr": 1.9063157894736843e-05, + "step": 89 + }, + { + "epoch": 0.23405654174884943, + "high_lr": 0.0009531578947368421, + "low_lr": 1.9063157894736843e-05, + "step": 89 + }, + { + "epoch": 0.23668639053254437, + "grad_norm": 0.6006247401237488, + "learning_rate": 0.0009526315789473684, + "loss": 1.9328, + "step": 90 + }, + { + "epoch": 0.23668639053254437, + "high_lr": 0.0009526315789473684, + "low_lr": 1.9052631578947368e-05, + "step": 90 + }, + { + "epoch": 0.23668639053254437, + "high_lr": 0.0009526315789473684, + "low_lr": 1.9052631578947368e-05, + "step": 90 + }, + { + "epoch": 0.23668639053254437, + "high_lr": 0.0009526315789473684, + "low_lr": 1.9052631578947368e-05, + "step": 90 + }, + { + "epoch": 0.23668639053254437, + "high_lr": 0.0009526315789473684, + "low_lr": 1.9052631578947368e-05, + "step": 90 + }, + { + "epoch": 0.23668639053254437, + "high_lr": 0.0009526315789473684, + "low_lr": 1.9052631578947368e-05, + "step": 90 + }, + { + "epoch": 0.23668639053254437, + "high_lr": 0.0009526315789473684, + "low_lr": 1.9052631578947368e-05, + "step": 90 + }, + { + "epoch": 0.23668639053254437, + "high_lr": 0.0009526315789473684, + "low_lr": 1.9052631578947368e-05, + "step": 90 + }, + { + "epoch": 0.23668639053254437, + "high_lr": 0.0009526315789473684, + "low_lr": 1.9052631578947368e-05, + "step": 90 + }, + { + "epoch": 0.23931623931623933, + "grad_norm": 0.7251639366149902, + "learning_rate": 0.0009521052631578949, + "loss": 2.0029, + "step": 91 + }, + { + "epoch": 0.23931623931623933, + "high_lr": 0.0009521052631578949, + "low_lr": 1.9042105263157896e-05, + "step": 91 + }, + { + "epoch": 0.23931623931623933, + "high_lr": 0.0009521052631578949, + "low_lr": 1.9042105263157896e-05, + "step": 91 + }, + { + "epoch": 0.23931623931623933, + "high_lr": 0.0009521052631578949, + "low_lr": 1.9042105263157896e-05, + "step": 91 + }, + { + "epoch": 0.23931623931623933, + "high_lr": 0.0009521052631578949, + "low_lr": 1.9042105263157896e-05, + "step": 91 + }, + { + "epoch": 0.23931623931623933, + "high_lr": 0.0009521052631578949, + "low_lr": 1.9042105263157896e-05, + "step": 91 + }, + { + "epoch": 0.23931623931623933, + "high_lr": 0.0009521052631578949, + "low_lr": 1.9042105263157896e-05, + "step": 91 + }, + { + "epoch": 0.23931623931623933, + "high_lr": 0.0009521052631578949, + "low_lr": 1.9042105263157896e-05, + "step": 91 + }, + { + "epoch": 0.23931623931623933, + "high_lr": 0.0009521052631578949, + "low_lr": 1.9042105263157896e-05, + "step": 91 + }, + { + "epoch": 0.24194608809993426, + "grad_norm": 0.7573750019073486, + "learning_rate": 0.0009515789473684211, + "loss": 2.0662, + "step": 92 + }, + { + "epoch": 0.24194608809993426, + "high_lr": 0.0009515789473684211, + "low_lr": 1.9031578947368424e-05, + "step": 92 + }, + { + "epoch": 0.24194608809993426, + "high_lr": 0.0009515789473684211, + "low_lr": 1.9031578947368424e-05, + "step": 92 + }, + { + "epoch": 0.24194608809993426, + "high_lr": 0.0009515789473684211, + "low_lr": 1.9031578947368424e-05, + "step": 92 + }, + { + "epoch": 0.24194608809993426, + "high_lr": 0.0009515789473684211, + "low_lr": 1.9031578947368424e-05, + "step": 92 + }, + { + "epoch": 0.24194608809993426, + "high_lr": 0.0009515789473684211, + "low_lr": 1.9031578947368424e-05, + "step": 92 + }, + { + "epoch": 0.24194608809993426, + "high_lr": 0.0009515789473684211, + "low_lr": 1.9031578947368424e-05, + "step": 92 + }, + { + "epoch": 0.24194608809993426, + "high_lr": 0.0009515789473684211, + "low_lr": 1.9031578947368424e-05, + "step": 92 + }, + { + "epoch": 0.24194608809993426, + "high_lr": 0.0009515789473684211, + "low_lr": 1.9031578947368424e-05, + "step": 92 + }, + { + "epoch": 0.2445759368836292, + "grad_norm": 0.6473020315170288, + "learning_rate": 0.0009510526315789474, + "loss": 1.9681, + "step": 93 + }, + { + "epoch": 0.2445759368836292, + "high_lr": 0.0009510526315789474, + "low_lr": 1.902105263157895e-05, + "step": 93 + }, + { + "epoch": 0.2445759368836292, + "high_lr": 0.0009510526315789474, + "low_lr": 1.902105263157895e-05, + "step": 93 + }, + { + "epoch": 0.2445759368836292, + "high_lr": 0.0009510526315789474, + "low_lr": 1.902105263157895e-05, + "step": 93 + }, + { + "epoch": 0.2445759368836292, + "high_lr": 0.0009510526315789474, + "low_lr": 1.902105263157895e-05, + "step": 93 + }, + { + "epoch": 0.2445759368836292, + "high_lr": 0.0009510526315789474, + "low_lr": 1.902105263157895e-05, + "step": 93 + }, + { + "epoch": 0.2445759368836292, + "high_lr": 0.0009510526315789474, + "low_lr": 1.902105263157895e-05, + "step": 93 + }, + { + "epoch": 0.2445759368836292, + "high_lr": 0.0009510526315789474, + "low_lr": 1.902105263157895e-05, + "step": 93 + }, + { + "epoch": 0.2445759368836292, + "high_lr": 0.0009510526315789474, + "low_lr": 1.902105263157895e-05, + "step": 93 + }, + { + "epoch": 0.24720578566732412, + "grad_norm": 0.6018515229225159, + "learning_rate": 0.0009505263157894737, + "loss": 1.9542, + "step": 94 + }, + { + "epoch": 0.24720578566732412, + "high_lr": 0.0009505263157894737, + "low_lr": 1.9010526315789476e-05, + "step": 94 + }, + { + "epoch": 0.24720578566732412, + "high_lr": 0.0009505263157894737, + "low_lr": 1.9010526315789476e-05, + "step": 94 + }, + { + "epoch": 0.24720578566732412, + "high_lr": 0.0009505263157894737, + "low_lr": 1.9010526315789476e-05, + "step": 94 + }, + { + "epoch": 0.24720578566732412, + "high_lr": 0.0009505263157894737, + "low_lr": 1.9010526315789476e-05, + "step": 94 + }, + { + "epoch": 0.24720578566732412, + "high_lr": 0.0009505263157894737, + "low_lr": 1.9010526315789476e-05, + "step": 94 + }, + { + "epoch": 0.24720578566732412, + "high_lr": 0.0009505263157894737, + "low_lr": 1.9010526315789476e-05, + "step": 94 + }, + { + "epoch": 0.24720578566732412, + "high_lr": 0.0009505263157894737, + "low_lr": 1.9010526315789476e-05, + "step": 94 + }, + { + "epoch": 0.24720578566732412, + "high_lr": 0.0009505263157894737, + "low_lr": 1.9010526315789476e-05, + "step": 94 + }, + { + "epoch": 0.24983563445101906, + "grad_norm": 0.6318551898002625, + "learning_rate": 0.00095, + "loss": 1.9501, + "step": 95 + }, + { + "epoch": 0.24983563445101906, + "high_lr": 0.00095, + "low_lr": 1.9e-05, + "step": 95 + }, + { + "epoch": 0.24983563445101906, + "high_lr": 0.00095, + "low_lr": 1.9e-05, + "step": 95 + }, + { + "epoch": 0.24983563445101906, + "high_lr": 0.00095, + "low_lr": 1.9e-05, + "step": 95 + }, + { + "epoch": 0.24983563445101906, + "high_lr": 0.00095, + "low_lr": 1.9e-05, + "step": 95 + }, + { + "epoch": 0.24983563445101906, + "high_lr": 0.00095, + "low_lr": 1.9e-05, + "step": 95 + }, + { + "epoch": 0.24983563445101906, + "high_lr": 0.00095, + "low_lr": 1.9e-05, + "step": 95 + }, + { + "epoch": 0.24983563445101906, + "high_lr": 0.00095, + "low_lr": 1.9e-05, + "step": 95 + }, + { + "epoch": 0.24983563445101906, + "high_lr": 0.00095, + "low_lr": 1.9e-05, + "step": 95 + }, + { + "epoch": 0.252465483234714, + "grad_norm": 0.6606828570365906, + "learning_rate": 0.0009494736842105264, + "loss": 1.8956, + "step": 96 + }, + { + "epoch": 0.252465483234714, + "high_lr": 0.0009494736842105264, + "low_lr": 1.898947368421053e-05, + "step": 96 + }, + { + "epoch": 0.252465483234714, + "high_lr": 0.0009494736842105264, + "low_lr": 1.898947368421053e-05, + "step": 96 + }, + { + "epoch": 0.252465483234714, + "high_lr": 0.0009494736842105264, + "low_lr": 1.898947368421053e-05, + "step": 96 + }, + { + "epoch": 0.252465483234714, + "high_lr": 0.0009494736842105264, + "low_lr": 1.898947368421053e-05, + "step": 96 + }, + { + "epoch": 0.252465483234714, + "high_lr": 0.0009494736842105264, + "low_lr": 1.898947368421053e-05, + "step": 96 + }, + { + "epoch": 0.252465483234714, + "high_lr": 0.0009494736842105264, + "low_lr": 1.898947368421053e-05, + "step": 96 + }, + { + "epoch": 0.252465483234714, + "high_lr": 0.0009494736842105264, + "low_lr": 1.898947368421053e-05, + "step": 96 + }, + { + "epoch": 0.252465483234714, + "high_lr": 0.0009494736842105264, + "low_lr": 1.898947368421053e-05, + "step": 96 + }, + { + "epoch": 0.25509533201840895, + "grad_norm": 0.64029860496521, + "learning_rate": 0.0009489473684210527, + "loss": 1.9137, + "step": 97 + }, + { + "epoch": 0.25509533201840895, + "high_lr": 0.0009489473684210527, + "low_lr": 1.8978947368421054e-05, + "step": 97 + }, + { + "epoch": 0.25509533201840895, + "high_lr": 0.0009489473684210527, + "low_lr": 1.8978947368421054e-05, + "step": 97 + }, + { + "epoch": 0.25509533201840895, + "high_lr": 0.0009489473684210527, + "low_lr": 1.8978947368421054e-05, + "step": 97 + }, + { + "epoch": 0.25509533201840895, + "high_lr": 0.0009489473684210527, + "low_lr": 1.8978947368421054e-05, + "step": 97 + }, + { + "epoch": 0.25509533201840895, + "high_lr": 0.0009489473684210527, + "low_lr": 1.8978947368421054e-05, + "step": 97 + }, + { + "epoch": 0.25509533201840895, + "high_lr": 0.0009489473684210527, + "low_lr": 1.8978947368421054e-05, + "step": 97 + }, + { + "epoch": 0.25509533201840895, + "high_lr": 0.0009489473684210527, + "low_lr": 1.8978947368421054e-05, + "step": 97 + }, + { + "epoch": 0.25509533201840895, + "high_lr": 0.0009489473684210527, + "low_lr": 1.8978947368421054e-05, + "step": 97 + }, + { + "epoch": 0.2577251808021039, + "grad_norm": 0.6775450110435486, + "learning_rate": 0.000948421052631579, + "loss": 1.9968, + "step": 98 + }, + { + "epoch": 0.2577251808021039, + "high_lr": 0.000948421052631579, + "low_lr": 1.8968421052631582e-05, + "step": 98 + }, + { + "epoch": 0.2577251808021039, + "high_lr": 0.000948421052631579, + "low_lr": 1.8968421052631582e-05, + "step": 98 + }, + { + "epoch": 0.2577251808021039, + "high_lr": 0.000948421052631579, + "low_lr": 1.8968421052631582e-05, + "step": 98 + }, + { + "epoch": 0.2577251808021039, + "high_lr": 0.000948421052631579, + "low_lr": 1.8968421052631582e-05, + "step": 98 + }, + { + "epoch": 0.2577251808021039, + "high_lr": 0.000948421052631579, + "low_lr": 1.8968421052631582e-05, + "step": 98 + }, + { + "epoch": 0.2577251808021039, + "high_lr": 0.000948421052631579, + "low_lr": 1.8968421052631582e-05, + "step": 98 + }, + { + "epoch": 0.2577251808021039, + "high_lr": 0.000948421052631579, + "low_lr": 1.8968421052631582e-05, + "step": 98 + }, + { + "epoch": 0.2577251808021039, + "high_lr": 0.000948421052631579, + "low_lr": 1.8968421052631582e-05, + "step": 98 + }, + { + "epoch": 0.2603550295857988, + "grad_norm": 0.6305446028709412, + "learning_rate": 0.0009478947368421053, + "loss": 1.8879, + "step": 99 + }, + { + "epoch": 0.2603550295857988, + "high_lr": 0.0009478947368421053, + "low_lr": 1.8957894736842106e-05, + "step": 99 + }, + { + "epoch": 0.2603550295857988, + "high_lr": 0.0009478947368421053, + "low_lr": 1.8957894736842106e-05, + "step": 99 + }, + { + "epoch": 0.2603550295857988, + "high_lr": 0.0009478947368421053, + "low_lr": 1.8957894736842106e-05, + "step": 99 + }, + { + "epoch": 0.2603550295857988, + "high_lr": 0.0009478947368421053, + "low_lr": 1.8957894736842106e-05, + "step": 99 + }, + { + "epoch": 0.2603550295857988, + "high_lr": 0.0009478947368421053, + "low_lr": 1.8957894736842106e-05, + "step": 99 + }, + { + "epoch": 0.2603550295857988, + "high_lr": 0.0009478947368421053, + "low_lr": 1.8957894736842106e-05, + "step": 99 + }, + { + "epoch": 0.2603550295857988, + "high_lr": 0.0009478947368421053, + "low_lr": 1.8957894736842106e-05, + "step": 99 + }, + { + "epoch": 0.2603550295857988, + "high_lr": 0.0009478947368421053, + "low_lr": 1.8957894736842106e-05, + "step": 99 + }, + { + "epoch": 0.26298487836949375, + "grad_norm": 0.6236740946769714, + "learning_rate": 0.0009473684210526315, + "loss": 1.9292, + "step": 100 + }, + { + "epoch": 0.26298487836949375, + "high_lr": 0.0009473684210526315, + "low_lr": 1.894736842105263e-05, + "step": 100 + }, + { + "epoch": 0.26298487836949375, + "high_lr": 0.0009473684210526315, + "low_lr": 1.894736842105263e-05, + "step": 100 + }, + { + "epoch": 0.26298487836949375, + "high_lr": 0.0009473684210526315, + "low_lr": 1.894736842105263e-05, + "step": 100 + }, + { + "epoch": 0.26298487836949375, + "high_lr": 0.0009473684210526315, + "low_lr": 1.894736842105263e-05, + "step": 100 + }, + { + "epoch": 0.26298487836949375, + "high_lr": 0.0009473684210526315, + "low_lr": 1.894736842105263e-05, + "step": 100 + }, + { + "epoch": 0.26298487836949375, + "high_lr": 0.0009473684210526315, + "low_lr": 1.894736842105263e-05, + "step": 100 + }, + { + "epoch": 0.26298487836949375, + "high_lr": 0.0009473684210526315, + "low_lr": 1.894736842105263e-05, + "step": 100 + }, + { + "epoch": 0.26298487836949375, + "high_lr": 0.0009473684210526315, + "low_lr": 1.894736842105263e-05, + "step": 100 + }, + { + "epoch": 0.2656147271531887, + "grad_norm": 0.7075583934783936, + "learning_rate": 0.0009468421052631579, + "loss": 1.9358, + "step": 101 + }, + { + "epoch": 0.2656147271531887, + "high_lr": 0.0009468421052631579, + "low_lr": 1.893684210526316e-05, + "step": 101 + }, + { + "epoch": 0.2656147271531887, + "high_lr": 0.0009468421052631579, + "low_lr": 1.893684210526316e-05, + "step": 101 + }, + { + "epoch": 0.2656147271531887, + "high_lr": 0.0009468421052631579, + "low_lr": 1.893684210526316e-05, + "step": 101 + }, + { + "epoch": 0.2656147271531887, + "high_lr": 0.0009468421052631579, + "low_lr": 1.893684210526316e-05, + "step": 101 + }, + { + "epoch": 0.2656147271531887, + "high_lr": 0.0009468421052631579, + "low_lr": 1.893684210526316e-05, + "step": 101 + }, + { + "epoch": 0.2656147271531887, + "high_lr": 0.0009468421052631579, + "low_lr": 1.893684210526316e-05, + "step": 101 + }, + { + "epoch": 0.2656147271531887, + "high_lr": 0.0009468421052631579, + "low_lr": 1.893684210526316e-05, + "step": 101 + }, + { + "epoch": 0.2656147271531887, + "high_lr": 0.0009468421052631579, + "low_lr": 1.893684210526316e-05, + "step": 101 + }, + { + "epoch": 0.2682445759368836, + "grad_norm": 0.6899865865707397, + "learning_rate": 0.0009463157894736842, + "loss": 1.9407, + "step": 102 + }, + { + "epoch": 0.2682445759368836, + "high_lr": 0.0009463157894736842, + "low_lr": 1.8926315789473687e-05, + "step": 102 + }, + { + "epoch": 0.2682445759368836, + "high_lr": 0.0009463157894736842, + "low_lr": 1.8926315789473687e-05, + "step": 102 + }, + { + "epoch": 0.2682445759368836, + "high_lr": 0.0009463157894736842, + "low_lr": 1.8926315789473687e-05, + "step": 102 + }, + { + "epoch": 0.2682445759368836, + "high_lr": 0.0009463157894736842, + "low_lr": 1.8926315789473687e-05, + "step": 102 + }, + { + "epoch": 0.2682445759368836, + "high_lr": 0.0009463157894736842, + "low_lr": 1.8926315789473687e-05, + "step": 102 + }, + { + "epoch": 0.2682445759368836, + "high_lr": 0.0009463157894736842, + "low_lr": 1.8926315789473687e-05, + "step": 102 + }, + { + "epoch": 0.2682445759368836, + "high_lr": 0.0009463157894736842, + "low_lr": 1.8926315789473687e-05, + "step": 102 + }, + { + "epoch": 0.2682445759368836, + "high_lr": 0.0009463157894736842, + "low_lr": 1.8926315789473687e-05, + "step": 102 + }, + { + "epoch": 0.27087442472057854, + "grad_norm": 0.6408928632736206, + "learning_rate": 0.0009457894736842105, + "loss": 1.9906, + "step": 103 + }, + { + "epoch": 0.27087442472057854, + "high_lr": 0.0009457894736842105, + "low_lr": 1.891578947368421e-05, + "step": 103 + }, + { + "epoch": 0.27087442472057854, + "high_lr": 0.0009457894736842105, + "low_lr": 1.891578947368421e-05, + "step": 103 + }, + { + "epoch": 0.27087442472057854, + "high_lr": 0.0009457894736842105, + "low_lr": 1.891578947368421e-05, + "step": 103 + }, + { + "epoch": 0.27087442472057854, + "high_lr": 0.0009457894736842105, + "low_lr": 1.891578947368421e-05, + "step": 103 + }, + { + "epoch": 0.27087442472057854, + "high_lr": 0.0009457894736842105, + "low_lr": 1.891578947368421e-05, + "step": 103 + }, + { + "epoch": 0.27087442472057854, + "high_lr": 0.0009457894736842105, + "low_lr": 1.891578947368421e-05, + "step": 103 + }, + { + "epoch": 0.27087442472057854, + "high_lr": 0.0009457894736842105, + "low_lr": 1.891578947368421e-05, + "step": 103 + }, + { + "epoch": 0.27087442472057854, + "high_lr": 0.0009457894736842105, + "low_lr": 1.891578947368421e-05, + "step": 103 + }, + { + "epoch": 0.27350427350427353, + "grad_norm": 0.6023521423339844, + "learning_rate": 0.0009452631578947368, + "loss": 1.8894, + "step": 104 + }, + { + "epoch": 0.27350427350427353, + "high_lr": 0.0009452631578947368, + "low_lr": 1.8905263157894736e-05, + "step": 104 + }, + { + "epoch": 0.27350427350427353, + "high_lr": 0.0009452631578947368, + "low_lr": 1.8905263157894736e-05, + "step": 104 + }, + { + "epoch": 0.27350427350427353, + "high_lr": 0.0009452631578947368, + "low_lr": 1.8905263157894736e-05, + "step": 104 + }, + { + "epoch": 0.27350427350427353, + "high_lr": 0.0009452631578947368, + "low_lr": 1.8905263157894736e-05, + "step": 104 + }, + { + "epoch": 0.27350427350427353, + "high_lr": 0.0009452631578947368, + "low_lr": 1.8905263157894736e-05, + "step": 104 + }, + { + "epoch": 0.27350427350427353, + "high_lr": 0.0009452631578947368, + "low_lr": 1.8905263157894736e-05, + "step": 104 + }, + { + "epoch": 0.27350427350427353, + "high_lr": 0.0009452631578947368, + "low_lr": 1.8905263157894736e-05, + "step": 104 + }, + { + "epoch": 0.27350427350427353, + "high_lr": 0.0009452631578947368, + "low_lr": 1.8905263157894736e-05, + "step": 104 + }, + { + "epoch": 0.27613412228796846, + "grad_norm": 0.6638351082801819, + "learning_rate": 0.0009447368421052632, + "loss": 1.927, + "step": 105 + }, + { + "epoch": 0.27613412228796846, + "high_lr": 0.0009447368421052632, + "low_lr": 1.8894736842105264e-05, + "step": 105 + }, + { + "epoch": 0.27613412228796846, + "high_lr": 0.0009447368421052632, + "low_lr": 1.8894736842105264e-05, + "step": 105 + }, + { + "epoch": 0.27613412228796846, + "high_lr": 0.0009447368421052632, + "low_lr": 1.8894736842105264e-05, + "step": 105 + }, + { + "epoch": 0.27613412228796846, + "high_lr": 0.0009447368421052632, + "low_lr": 1.8894736842105264e-05, + "step": 105 + }, + { + "epoch": 0.27613412228796846, + "high_lr": 0.0009447368421052632, + "low_lr": 1.8894736842105264e-05, + "step": 105 + }, + { + "epoch": 0.27613412228796846, + "high_lr": 0.0009447368421052632, + "low_lr": 1.8894736842105264e-05, + "step": 105 + }, + { + "epoch": 0.27613412228796846, + "high_lr": 0.0009447368421052632, + "low_lr": 1.8894736842105264e-05, + "step": 105 + }, + { + "epoch": 0.27613412228796846, + "high_lr": 0.0009447368421052632, + "low_lr": 1.8894736842105264e-05, + "step": 105 + }, + { + "epoch": 0.2787639710716634, + "grad_norm": 0.6832205653190613, + "learning_rate": 0.0009442105263157895, + "loss": 1.9289, + "step": 106 + }, + { + "epoch": 0.2787639710716634, + "high_lr": 0.0009442105263157895, + "low_lr": 1.8884210526315792e-05, + "step": 106 + }, + { + "epoch": 0.2787639710716634, + "high_lr": 0.0009442105263157895, + "low_lr": 1.8884210526315792e-05, + "step": 106 + }, + { + "epoch": 0.2787639710716634, + "high_lr": 0.0009442105263157895, + "low_lr": 1.8884210526315792e-05, + "step": 106 + }, + { + "epoch": 0.2787639710716634, + "high_lr": 0.0009442105263157895, + "low_lr": 1.8884210526315792e-05, + "step": 106 + }, + { + "epoch": 0.2787639710716634, + "high_lr": 0.0009442105263157895, + "low_lr": 1.8884210526315792e-05, + "step": 106 + }, + { + "epoch": 0.2787639710716634, + "high_lr": 0.0009442105263157895, + "low_lr": 1.8884210526315792e-05, + "step": 106 + }, + { + "epoch": 0.2787639710716634, + "high_lr": 0.0009442105263157895, + "low_lr": 1.8884210526315792e-05, + "step": 106 + }, + { + "epoch": 0.2787639710716634, + "high_lr": 0.0009442105263157895, + "low_lr": 1.8884210526315792e-05, + "step": 106 + }, + { + "epoch": 0.28139381985535833, + "grad_norm": 0.6350919604301453, + "learning_rate": 0.0009436842105263159, + "loss": 1.9271, + "step": 107 + }, + { + "epoch": 0.28139381985535833, + "high_lr": 0.0009436842105263159, + "low_lr": 1.8873684210526317e-05, + "step": 107 + }, + { + "epoch": 0.28139381985535833, + "high_lr": 0.0009436842105263159, + "low_lr": 1.8873684210526317e-05, + "step": 107 + }, + { + "epoch": 0.28139381985535833, + "high_lr": 0.0009436842105263159, + "low_lr": 1.8873684210526317e-05, + "step": 107 + }, + { + "epoch": 0.28139381985535833, + "high_lr": 0.0009436842105263159, + "low_lr": 1.8873684210526317e-05, + "step": 107 + }, + { + "epoch": 0.28139381985535833, + "high_lr": 0.0009436842105263159, + "low_lr": 1.8873684210526317e-05, + "step": 107 + }, + { + "epoch": 0.28139381985535833, + "high_lr": 0.0009436842105263159, + "low_lr": 1.8873684210526317e-05, + "step": 107 + }, + { + "epoch": 0.28139381985535833, + "high_lr": 0.0009436842105263159, + "low_lr": 1.8873684210526317e-05, + "step": 107 + }, + { + "epoch": 0.28139381985535833, + "high_lr": 0.0009436842105263159, + "low_lr": 1.8873684210526317e-05, + "step": 107 + }, + { + "epoch": 0.28402366863905326, + "grad_norm": 0.7083135843276978, + "learning_rate": 0.0009431578947368421, + "loss": 1.9643, + "step": 108 + }, + { + "epoch": 0.28402366863905326, + "high_lr": 0.0009431578947368421, + "low_lr": 1.886315789473684e-05, + "step": 108 + }, + { + "epoch": 0.28402366863905326, + "high_lr": 0.0009431578947368421, + "low_lr": 1.886315789473684e-05, + "step": 108 + }, + { + "epoch": 0.28402366863905326, + "high_lr": 0.0009431578947368421, + "low_lr": 1.886315789473684e-05, + "step": 108 + }, + { + "epoch": 0.28402366863905326, + "high_lr": 0.0009431578947368421, + "low_lr": 1.886315789473684e-05, + "step": 108 + }, + { + "epoch": 0.28402366863905326, + "high_lr": 0.0009431578947368421, + "low_lr": 1.886315789473684e-05, + "step": 108 + }, + { + "epoch": 0.28402366863905326, + "high_lr": 0.0009431578947368421, + "low_lr": 1.886315789473684e-05, + "step": 108 + }, + { + "epoch": 0.28402366863905326, + "high_lr": 0.0009431578947368421, + "low_lr": 1.886315789473684e-05, + "step": 108 + }, + { + "epoch": 0.28402366863905326, + "high_lr": 0.0009431578947368421, + "low_lr": 1.886315789473684e-05, + "step": 108 + }, + { + "epoch": 0.2866535174227482, + "grad_norm": 0.7548526525497437, + "learning_rate": 0.0009426315789473684, + "loss": 1.9212, + "step": 109 + }, + { + "epoch": 0.2866535174227482, + "high_lr": 0.0009426315789473684, + "low_lr": 1.885263157894737e-05, + "step": 109 + }, + { + "epoch": 0.2866535174227482, + "high_lr": 0.0009426315789473684, + "low_lr": 1.885263157894737e-05, + "step": 109 + }, + { + "epoch": 0.2866535174227482, + "high_lr": 0.0009426315789473684, + "low_lr": 1.885263157894737e-05, + "step": 109 + }, + { + "epoch": 0.2866535174227482, + "high_lr": 0.0009426315789473684, + "low_lr": 1.885263157894737e-05, + "step": 109 + }, + { + "epoch": 0.2866535174227482, + "high_lr": 0.0009426315789473684, + "low_lr": 1.885263157894737e-05, + "step": 109 + }, + { + "epoch": 0.2866535174227482, + "high_lr": 0.0009426315789473684, + "low_lr": 1.885263157894737e-05, + "step": 109 + }, + { + "epoch": 0.2866535174227482, + "high_lr": 0.0009426315789473684, + "low_lr": 1.885263157894737e-05, + "step": 109 + }, + { + "epoch": 0.2866535174227482, + "high_lr": 0.0009426315789473684, + "low_lr": 1.885263157894737e-05, + "step": 109 + }, + { + "epoch": 0.2892833662064431, + "grad_norm": 0.6176226139068604, + "learning_rate": 0.0009421052631578948, + "loss": 1.8167, + "step": 110 + }, + { + "epoch": 0.2892833662064431, + "high_lr": 0.0009421052631578948, + "low_lr": 1.8842105263157898e-05, + "step": 110 + }, + { + "epoch": 0.2892833662064431, + "high_lr": 0.0009421052631578948, + "low_lr": 1.8842105263157898e-05, + "step": 110 + }, + { + "epoch": 0.2892833662064431, + "high_lr": 0.0009421052631578948, + "low_lr": 1.8842105263157898e-05, + "step": 110 + }, + { + "epoch": 0.2892833662064431, + "high_lr": 0.0009421052631578948, + "low_lr": 1.8842105263157898e-05, + "step": 110 + }, + { + "epoch": 0.2892833662064431, + "high_lr": 0.0009421052631578948, + "low_lr": 1.8842105263157898e-05, + "step": 110 + }, + { + "epoch": 0.2892833662064431, + "high_lr": 0.0009421052631578948, + "low_lr": 1.8842105263157898e-05, + "step": 110 + }, + { + "epoch": 0.2892833662064431, + "high_lr": 0.0009421052631578948, + "low_lr": 1.8842105263157898e-05, + "step": 110 + }, + { + "epoch": 0.2892833662064431, + "high_lr": 0.0009421052631578948, + "low_lr": 1.8842105263157898e-05, + "step": 110 + }, + { + "epoch": 0.29191321499013806, + "grad_norm": 0.6795015931129456, + "learning_rate": 0.0009415789473684211, + "loss": 1.9002, + "step": 111 + }, + { + "epoch": 0.29191321499013806, + "high_lr": 0.0009415789473684211, + "low_lr": 1.8831578947368422e-05, + "step": 111 + }, + { + "epoch": 0.29191321499013806, + "high_lr": 0.0009415789473684211, + "low_lr": 1.8831578947368422e-05, + "step": 111 + }, + { + "epoch": 0.29191321499013806, + "high_lr": 0.0009415789473684211, + "low_lr": 1.8831578947368422e-05, + "step": 111 + }, + { + "epoch": 0.29191321499013806, + "high_lr": 0.0009415789473684211, + "low_lr": 1.8831578947368422e-05, + "step": 111 + }, + { + "epoch": 0.29191321499013806, + "high_lr": 0.0009415789473684211, + "low_lr": 1.8831578947368422e-05, + "step": 111 + }, + { + "epoch": 0.29191321499013806, + "high_lr": 0.0009415789473684211, + "low_lr": 1.8831578947368422e-05, + "step": 111 + }, + { + "epoch": 0.29191321499013806, + "high_lr": 0.0009415789473684211, + "low_lr": 1.8831578947368422e-05, + "step": 111 + }, + { + "epoch": 0.29191321499013806, + "high_lr": 0.0009415789473684211, + "low_lr": 1.8831578947368422e-05, + "step": 111 + }, + { + "epoch": 0.294543063773833, + "grad_norm": 0.7737109065055847, + "learning_rate": 0.0009410526315789474, + "loss": 2.0236, + "step": 112 + }, + { + "epoch": 0.294543063773833, + "high_lr": 0.0009410526315789474, + "low_lr": 1.882105263157895e-05, + "step": 112 + }, + { + "epoch": 0.294543063773833, + "high_lr": 0.0009410526315789474, + "low_lr": 1.882105263157895e-05, + "step": 112 + }, + { + "epoch": 0.294543063773833, + "high_lr": 0.0009410526315789474, + "low_lr": 1.882105263157895e-05, + "step": 112 + }, + { + "epoch": 0.294543063773833, + "high_lr": 0.0009410526315789474, + "low_lr": 1.882105263157895e-05, + "step": 112 + }, + { + "epoch": 0.294543063773833, + "high_lr": 0.0009410526315789474, + "low_lr": 1.882105263157895e-05, + "step": 112 + }, + { + "epoch": 0.294543063773833, + "high_lr": 0.0009410526315789474, + "low_lr": 1.882105263157895e-05, + "step": 112 + }, + { + "epoch": 0.294543063773833, + "high_lr": 0.0009410526315789474, + "low_lr": 1.882105263157895e-05, + "step": 112 + }, + { + "epoch": 0.294543063773833, + "high_lr": 0.0009410526315789474, + "low_lr": 1.882105263157895e-05, + "step": 112 + }, + { + "epoch": 0.2971729125575279, + "grad_norm": 0.6610509753227234, + "learning_rate": 0.0009405263157894737, + "loss": 1.9563, + "step": 113 + }, + { + "epoch": 0.2971729125575279, + "high_lr": 0.0009405263157894737, + "low_lr": 1.8810526315789475e-05, + "step": 113 + }, + { + "epoch": 0.2971729125575279, + "high_lr": 0.0009405263157894737, + "low_lr": 1.8810526315789475e-05, + "step": 113 + }, + { + "epoch": 0.2971729125575279, + "high_lr": 0.0009405263157894737, + "low_lr": 1.8810526315789475e-05, + "step": 113 + }, + { + "epoch": 0.2971729125575279, + "high_lr": 0.0009405263157894737, + "low_lr": 1.8810526315789475e-05, + "step": 113 + }, + { + "epoch": 0.2971729125575279, + "high_lr": 0.0009405263157894737, + "low_lr": 1.8810526315789475e-05, + "step": 113 + }, + { + "epoch": 0.2971729125575279, + "high_lr": 0.0009405263157894737, + "low_lr": 1.8810526315789475e-05, + "step": 113 + }, + { + "epoch": 0.2971729125575279, + "high_lr": 0.0009405263157894737, + "low_lr": 1.8810526315789475e-05, + "step": 113 + }, + { + "epoch": 0.2971729125575279, + "high_lr": 0.0009405263157894737, + "low_lr": 1.8810526315789475e-05, + "step": 113 + }, + { + "epoch": 0.29980276134122286, + "grad_norm": 0.9263393878936768, + "learning_rate": 0.00094, + "loss": 1.8574, + "step": 114 + }, + { + "epoch": 0.29980276134122286, + "high_lr": 0.00094, + "low_lr": 1.88e-05, + "step": 114 + }, + { + "epoch": 0.29980276134122286, + "high_lr": 0.00094, + "low_lr": 1.88e-05, + "step": 114 + }, + { + "epoch": 0.29980276134122286, + "high_lr": 0.00094, + "low_lr": 1.88e-05, + "step": 114 + }, + { + "epoch": 0.29980276134122286, + "high_lr": 0.00094, + "low_lr": 1.88e-05, + "step": 114 + }, + { + "epoch": 0.29980276134122286, + "high_lr": 0.00094, + "low_lr": 1.88e-05, + "step": 114 + }, + { + "epoch": 0.29980276134122286, + "high_lr": 0.00094, + "low_lr": 1.88e-05, + "step": 114 + }, + { + "epoch": 0.29980276134122286, + "high_lr": 0.00094, + "low_lr": 1.88e-05, + "step": 114 + }, + { + "epoch": 0.29980276134122286, + "high_lr": 0.00094, + "low_lr": 1.88e-05, + "step": 114 + }, + { + "epoch": 0.30243261012491784, + "grad_norm": 0.7165038585662842, + "learning_rate": 0.0009394736842105264, + "loss": 1.9537, + "step": 115 + }, + { + "epoch": 0.30243261012491784, + "high_lr": 0.0009394736842105264, + "low_lr": 1.8789473684210528e-05, + "step": 115 + }, + { + "epoch": 0.30243261012491784, + "high_lr": 0.0009394736842105264, + "low_lr": 1.8789473684210528e-05, + "step": 115 + }, + { + "epoch": 0.30243261012491784, + "high_lr": 0.0009394736842105264, + "low_lr": 1.8789473684210528e-05, + "step": 115 + }, + { + "epoch": 0.30243261012491784, + "high_lr": 0.0009394736842105264, + "low_lr": 1.8789473684210528e-05, + "step": 115 + }, + { + "epoch": 0.30243261012491784, + "high_lr": 0.0009394736842105264, + "low_lr": 1.8789473684210528e-05, + "step": 115 + }, + { + "epoch": 0.30243261012491784, + "high_lr": 0.0009394736842105264, + "low_lr": 1.8789473684210528e-05, + "step": 115 + }, + { + "epoch": 0.30243261012491784, + "high_lr": 0.0009394736842105264, + "low_lr": 1.8789473684210528e-05, + "step": 115 + }, + { + "epoch": 0.30243261012491784, + "high_lr": 0.0009394736842105264, + "low_lr": 1.8789473684210528e-05, + "step": 115 + }, + { + "epoch": 0.3050624589086128, + "grad_norm": 0.7112441658973694, + "learning_rate": 0.0009389473684210527, + "loss": 1.9437, + "step": 116 + }, + { + "epoch": 0.3050624589086128, + "high_lr": 0.0009389473684210527, + "low_lr": 1.8778947368421056e-05, + "step": 116 + }, + { + "epoch": 0.3050624589086128, + "high_lr": 0.0009389473684210527, + "low_lr": 1.8778947368421056e-05, + "step": 116 + }, + { + "epoch": 0.3050624589086128, + "high_lr": 0.0009389473684210527, + "low_lr": 1.8778947368421056e-05, + "step": 116 + }, + { + "epoch": 0.3050624589086128, + "high_lr": 0.0009389473684210527, + "low_lr": 1.8778947368421056e-05, + "step": 116 + }, + { + "epoch": 0.3050624589086128, + "high_lr": 0.0009389473684210527, + "low_lr": 1.8778947368421056e-05, + "step": 116 + }, + { + "epoch": 0.3050624589086128, + "high_lr": 0.0009389473684210527, + "low_lr": 1.8778947368421056e-05, + "step": 116 + }, + { + "epoch": 0.3050624589086128, + "high_lr": 0.0009389473684210527, + "low_lr": 1.8778947368421056e-05, + "step": 116 + }, + { + "epoch": 0.3050624589086128, + "high_lr": 0.0009389473684210527, + "low_lr": 1.8778947368421056e-05, + "step": 116 + }, + { + "epoch": 0.3076923076923077, + "grad_norm": 0.6522067785263062, + "learning_rate": 0.0009384210526315789, + "loss": 1.905, + "step": 117 + }, + { + "epoch": 0.3076923076923077, + "high_lr": 0.0009384210526315789, + "low_lr": 1.876842105263158e-05, + "step": 117 + }, + { + "epoch": 0.3076923076923077, + "high_lr": 0.0009384210526315789, + "low_lr": 1.876842105263158e-05, + "step": 117 + }, + { + "epoch": 0.3076923076923077, + "high_lr": 0.0009384210526315789, + "low_lr": 1.876842105263158e-05, + "step": 117 + }, + { + "epoch": 0.3076923076923077, + "high_lr": 0.0009384210526315789, + "low_lr": 1.876842105263158e-05, + "step": 117 + }, + { + "epoch": 0.3076923076923077, + "high_lr": 0.0009384210526315789, + "low_lr": 1.876842105263158e-05, + "step": 117 + }, + { + "epoch": 0.3076923076923077, + "high_lr": 0.0009384210526315789, + "low_lr": 1.876842105263158e-05, + "step": 117 + }, + { + "epoch": 0.3076923076923077, + "high_lr": 0.0009384210526315789, + "low_lr": 1.876842105263158e-05, + "step": 117 + }, + { + "epoch": 0.3076923076923077, + "high_lr": 0.0009384210526315789, + "low_lr": 1.876842105263158e-05, + "step": 117 + }, + { + "epoch": 0.31032215647600264, + "grad_norm": 0.6830266714096069, + "learning_rate": 0.0009378947368421052, + "loss": 1.9671, + "step": 118 + }, + { + "epoch": 0.31032215647600264, + "high_lr": 0.0009378947368421052, + "low_lr": 1.8757894736842105e-05, + "step": 118 + }, + { + "epoch": 0.31032215647600264, + "high_lr": 0.0009378947368421052, + "low_lr": 1.8757894736842105e-05, + "step": 118 + }, + { + "epoch": 0.31032215647600264, + "high_lr": 0.0009378947368421052, + "low_lr": 1.8757894736842105e-05, + "step": 118 + }, + { + "epoch": 0.31032215647600264, + "high_lr": 0.0009378947368421052, + "low_lr": 1.8757894736842105e-05, + "step": 118 + }, + { + "epoch": 0.31032215647600264, + "high_lr": 0.0009378947368421052, + "low_lr": 1.8757894736842105e-05, + "step": 118 + }, + { + "epoch": 0.31032215647600264, + "high_lr": 0.0009378947368421052, + "low_lr": 1.8757894736842105e-05, + "step": 118 + }, + { + "epoch": 0.31032215647600264, + "high_lr": 0.0009378947368421052, + "low_lr": 1.8757894736842105e-05, + "step": 118 + }, + { + "epoch": 0.31032215647600264, + "high_lr": 0.0009378947368421052, + "low_lr": 1.8757894736842105e-05, + "step": 118 + }, + { + "epoch": 0.3129520052596976, + "grad_norm": 1.746518611907959, + "learning_rate": 0.0009373684210526316, + "loss": 1.9321, + "step": 119 + }, + { + "epoch": 0.3129520052596976, + "high_lr": 0.0009373684210526316, + "low_lr": 1.8747368421052633e-05, + "step": 119 + }, + { + "epoch": 0.3129520052596976, + "high_lr": 0.0009373684210526316, + "low_lr": 1.8747368421052633e-05, + "step": 119 + }, + { + "epoch": 0.3129520052596976, + "high_lr": 0.0009373684210526316, + "low_lr": 1.8747368421052633e-05, + "step": 119 + }, + { + "epoch": 0.3129520052596976, + "high_lr": 0.0009373684210526316, + "low_lr": 1.8747368421052633e-05, + "step": 119 + }, + { + "epoch": 0.3129520052596976, + "high_lr": 0.0009373684210526316, + "low_lr": 1.8747368421052633e-05, + "step": 119 + }, + { + "epoch": 0.3129520052596976, + "high_lr": 0.0009373684210526316, + "low_lr": 1.8747368421052633e-05, + "step": 119 + }, + { + "epoch": 0.3129520052596976, + "high_lr": 0.0009373684210526316, + "low_lr": 1.8747368421052633e-05, + "step": 119 + }, + { + "epoch": 0.3129520052596976, + "high_lr": 0.0009373684210526316, + "low_lr": 1.8747368421052633e-05, + "step": 119 + }, + { + "epoch": 0.3155818540433925, + "grad_norm": 0.6991109848022461, + "learning_rate": 0.0009368421052631579, + "loss": 1.9557, + "step": 120 + }, + { + "epoch": 0.3155818540433925, + "high_lr": 0.0009368421052631579, + "low_lr": 1.873684210526316e-05, + "step": 120 + }, + { + "epoch": 0.3155818540433925, + "high_lr": 0.0009368421052631579, + "low_lr": 1.873684210526316e-05, + "step": 120 + }, + { + "epoch": 0.3155818540433925, + "high_lr": 0.0009368421052631579, + "low_lr": 1.873684210526316e-05, + "step": 120 + }, + { + "epoch": 0.3155818540433925, + "high_lr": 0.0009368421052631579, + "low_lr": 1.873684210526316e-05, + "step": 120 + }, + { + "epoch": 0.3155818540433925, + "high_lr": 0.0009368421052631579, + "low_lr": 1.873684210526316e-05, + "step": 120 + }, + { + "epoch": 0.3155818540433925, + "high_lr": 0.0009368421052631579, + "low_lr": 1.873684210526316e-05, + "step": 120 + }, + { + "epoch": 0.3155818540433925, + "high_lr": 0.0009368421052631579, + "low_lr": 1.873684210526316e-05, + "step": 120 + }, + { + "epoch": 0.3155818540433925, + "high_lr": 0.0009368421052631579, + "low_lr": 1.873684210526316e-05, + "step": 120 + }, + { + "epoch": 0.31821170282708744, + "grad_norm": 0.6823937892913818, + "learning_rate": 0.0009363157894736842, + "loss": 1.9575, + "step": 121 + }, + { + "epoch": 0.31821170282708744, + "high_lr": 0.0009363157894736842, + "low_lr": 1.8726315789473686e-05, + "step": 121 + }, + { + "epoch": 0.31821170282708744, + "high_lr": 0.0009363157894736842, + "low_lr": 1.8726315789473686e-05, + "step": 121 + }, + { + "epoch": 0.31821170282708744, + "high_lr": 0.0009363157894736842, + "low_lr": 1.8726315789473686e-05, + "step": 121 + }, + { + "epoch": 0.31821170282708744, + "high_lr": 0.0009363157894736842, + "low_lr": 1.8726315789473686e-05, + "step": 121 + }, + { + "epoch": 0.31821170282708744, + "high_lr": 0.0009363157894736842, + "low_lr": 1.8726315789473686e-05, + "step": 121 + }, + { + "epoch": 0.31821170282708744, + "high_lr": 0.0009363157894736842, + "low_lr": 1.8726315789473686e-05, + "step": 121 + }, + { + "epoch": 0.31821170282708744, + "high_lr": 0.0009363157894736842, + "low_lr": 1.8726315789473686e-05, + "step": 121 + }, + { + "epoch": 0.31821170282708744, + "high_lr": 0.0009363157894736842, + "low_lr": 1.8726315789473686e-05, + "step": 121 + }, + { + "epoch": 0.32084155161078237, + "grad_norm": 0.901719331741333, + "learning_rate": 0.0009357894736842105, + "loss": 1.9167, + "step": 122 + }, + { + "epoch": 0.32084155161078237, + "high_lr": 0.0009357894736842105, + "low_lr": 1.871578947368421e-05, + "step": 122 + }, + { + "epoch": 0.32084155161078237, + "high_lr": 0.0009357894736842105, + "low_lr": 1.871578947368421e-05, + "step": 122 + }, + { + "epoch": 0.32084155161078237, + "high_lr": 0.0009357894736842105, + "low_lr": 1.871578947368421e-05, + "step": 122 + }, + { + "epoch": 0.32084155161078237, + "high_lr": 0.0009357894736842105, + "low_lr": 1.871578947368421e-05, + "step": 122 + }, + { + "epoch": 0.32084155161078237, + "high_lr": 0.0009357894736842105, + "low_lr": 1.871578947368421e-05, + "step": 122 + }, + { + "epoch": 0.32084155161078237, + "high_lr": 0.0009357894736842105, + "low_lr": 1.871578947368421e-05, + "step": 122 + }, + { + "epoch": 0.32084155161078237, + "high_lr": 0.0009357894736842105, + "low_lr": 1.871578947368421e-05, + "step": 122 + }, + { + "epoch": 0.32084155161078237, + "high_lr": 0.0009357894736842105, + "low_lr": 1.871578947368421e-05, + "step": 122 + }, + { + "epoch": 0.3234714003944773, + "grad_norm": 0.6262206435203552, + "learning_rate": 0.0009352631578947368, + "loss": 1.9214, + "step": 123 + }, + { + "epoch": 0.3234714003944773, + "high_lr": 0.0009352631578947368, + "low_lr": 1.8705263157894738e-05, + "step": 123 + }, + { + "epoch": 0.3234714003944773, + "high_lr": 0.0009352631578947368, + "low_lr": 1.8705263157894738e-05, + "step": 123 + }, + { + "epoch": 0.3234714003944773, + "high_lr": 0.0009352631578947368, + "low_lr": 1.8705263157894738e-05, + "step": 123 + }, + { + "epoch": 0.3234714003944773, + "high_lr": 0.0009352631578947368, + "low_lr": 1.8705263157894738e-05, + "step": 123 + }, + { + "epoch": 0.3234714003944773, + "high_lr": 0.0009352631578947368, + "low_lr": 1.8705263157894738e-05, + "step": 123 + }, + { + "epoch": 0.3234714003944773, + "high_lr": 0.0009352631578947368, + "low_lr": 1.8705263157894738e-05, + "step": 123 + }, + { + "epoch": 0.3234714003944773, + "high_lr": 0.0009352631578947368, + "low_lr": 1.8705263157894738e-05, + "step": 123 + }, + { + "epoch": 0.3234714003944773, + "high_lr": 0.0009352631578947368, + "low_lr": 1.8705263157894738e-05, + "step": 123 + }, + { + "epoch": 0.32610124917817224, + "grad_norm": 0.800382673740387, + "learning_rate": 0.0009347368421052633, + "loss": 1.9136, + "step": 124 + }, + { + "epoch": 0.32610124917817224, + "high_lr": 0.0009347368421052633, + "low_lr": 1.8694736842105266e-05, + "step": 124 + }, + { + "epoch": 0.32610124917817224, + "high_lr": 0.0009347368421052633, + "low_lr": 1.8694736842105266e-05, + "step": 124 + }, + { + "epoch": 0.32610124917817224, + "high_lr": 0.0009347368421052633, + "low_lr": 1.8694736842105266e-05, + "step": 124 + }, + { + "epoch": 0.32610124917817224, + "high_lr": 0.0009347368421052633, + "low_lr": 1.8694736842105266e-05, + "step": 124 + }, + { + "epoch": 0.32610124917817224, + "high_lr": 0.0009347368421052633, + "low_lr": 1.8694736842105266e-05, + "step": 124 + }, + { + "epoch": 0.32610124917817224, + "high_lr": 0.0009347368421052633, + "low_lr": 1.8694736842105266e-05, + "step": 124 + }, + { + "epoch": 0.32610124917817224, + "high_lr": 0.0009347368421052633, + "low_lr": 1.8694736842105266e-05, + "step": 124 + }, + { + "epoch": 0.32610124917817224, + "high_lr": 0.0009347368421052633, + "low_lr": 1.8694736842105266e-05, + "step": 124 + }, + { + "epoch": 0.32873109796186717, + "grad_norm": 0.6870277523994446, + "learning_rate": 0.0009342105263157896, + "loss": 1.8989, + "step": 125 + }, + { + "epoch": 0.32873109796186717, + "high_lr": 0.0009342105263157896, + "low_lr": 1.868421052631579e-05, + "step": 125 + }, + { + "epoch": 0.32873109796186717, + "high_lr": 0.0009342105263157896, + "low_lr": 1.868421052631579e-05, + "step": 125 + }, + { + "epoch": 0.32873109796186717, + "high_lr": 0.0009342105263157896, + "low_lr": 1.868421052631579e-05, + "step": 125 + }, + { + "epoch": 0.32873109796186717, + "high_lr": 0.0009342105263157896, + "low_lr": 1.868421052631579e-05, + "step": 125 + }, + { + "epoch": 0.32873109796186717, + "high_lr": 0.0009342105263157896, + "low_lr": 1.868421052631579e-05, + "step": 125 + }, + { + "epoch": 0.32873109796186717, + "high_lr": 0.0009342105263157896, + "low_lr": 1.868421052631579e-05, + "step": 125 + }, + { + "epoch": 0.32873109796186717, + "high_lr": 0.0009342105263157896, + "low_lr": 1.868421052631579e-05, + "step": 125 + }, + { + "epoch": 0.32873109796186717, + "high_lr": 0.0009342105263157896, + "low_lr": 1.868421052631579e-05, + "step": 125 + }, + { + "epoch": 0.33136094674556216, + "grad_norm": 0.7083466053009033, + "learning_rate": 0.0009336842105263158, + "loss": 1.8249, + "step": 126 + }, + { + "epoch": 0.33136094674556216, + "high_lr": 0.0009336842105263158, + "low_lr": 1.8673684210526316e-05, + "step": 126 + }, + { + "epoch": 0.33136094674556216, + "high_lr": 0.0009336842105263158, + "low_lr": 1.8673684210526316e-05, + "step": 126 + }, + { + "epoch": 0.33136094674556216, + "high_lr": 0.0009336842105263158, + "low_lr": 1.8673684210526316e-05, + "step": 126 + }, + { + "epoch": 0.33136094674556216, + "high_lr": 0.0009336842105263158, + "low_lr": 1.8673684210526316e-05, + "step": 126 + }, + { + "epoch": 0.33136094674556216, + "high_lr": 0.0009336842105263158, + "low_lr": 1.8673684210526316e-05, + "step": 126 + }, + { + "epoch": 0.33136094674556216, + "high_lr": 0.0009336842105263158, + "low_lr": 1.8673684210526316e-05, + "step": 126 + }, + { + "epoch": 0.33136094674556216, + "high_lr": 0.0009336842105263158, + "low_lr": 1.8673684210526316e-05, + "step": 126 + }, + { + "epoch": 0.33136094674556216, + "high_lr": 0.0009336842105263158, + "low_lr": 1.8673684210526316e-05, + "step": 126 + }, + { + "epoch": 0.3339907955292571, + "grad_norm": 0.6873717904090881, + "learning_rate": 0.0009331578947368421, + "loss": 1.9402, + "step": 127 + }, + { + "epoch": 0.3339907955292571, + "high_lr": 0.0009331578947368421, + "low_lr": 1.8663157894736844e-05, + "step": 127 + }, + { + "epoch": 0.3339907955292571, + "high_lr": 0.0009331578947368421, + "low_lr": 1.8663157894736844e-05, + "step": 127 + }, + { + "epoch": 0.3339907955292571, + "high_lr": 0.0009331578947368421, + "low_lr": 1.8663157894736844e-05, + "step": 127 + }, + { + "epoch": 0.3339907955292571, + "high_lr": 0.0009331578947368421, + "low_lr": 1.8663157894736844e-05, + "step": 127 + }, + { + "epoch": 0.3339907955292571, + "high_lr": 0.0009331578947368421, + "low_lr": 1.8663157894736844e-05, + "step": 127 + }, + { + "epoch": 0.3339907955292571, + "high_lr": 0.0009331578947368421, + "low_lr": 1.8663157894736844e-05, + "step": 127 + }, + { + "epoch": 0.3339907955292571, + "high_lr": 0.0009331578947368421, + "low_lr": 1.8663157894736844e-05, + "step": 127 + }, + { + "epoch": 0.3339907955292571, + "high_lr": 0.0009331578947368421, + "low_lr": 1.8663157894736844e-05, + "step": 127 + }, + { + "epoch": 0.336620644312952, + "grad_norm": 0.6439316272735596, + "learning_rate": 0.0009326315789473684, + "loss": 1.8117, + "step": 128 + }, + { + "epoch": 0.336620644312952, + "high_lr": 0.0009326315789473684, + "low_lr": 1.8652631578947368e-05, + "step": 128 + }, + { + "epoch": 0.336620644312952, + "high_lr": 0.0009326315789473684, + "low_lr": 1.8652631578947368e-05, + "step": 128 + }, + { + "epoch": 0.336620644312952, + "high_lr": 0.0009326315789473684, + "low_lr": 1.8652631578947368e-05, + "step": 128 + }, + { + "epoch": 0.336620644312952, + "high_lr": 0.0009326315789473684, + "low_lr": 1.8652631578947368e-05, + "step": 128 + }, + { + "epoch": 0.336620644312952, + "high_lr": 0.0009326315789473684, + "low_lr": 1.8652631578947368e-05, + "step": 128 + }, + { + "epoch": 0.336620644312952, + "high_lr": 0.0009326315789473684, + "low_lr": 1.8652631578947368e-05, + "step": 128 + }, + { + "epoch": 0.336620644312952, + "high_lr": 0.0009326315789473684, + "low_lr": 1.8652631578947368e-05, + "step": 128 + }, + { + "epoch": 0.336620644312952, + "high_lr": 0.0009326315789473684, + "low_lr": 1.8652631578947368e-05, + "step": 128 + }, + { + "epoch": 0.33925049309664695, + "grad_norm": 0.6715201139450073, + "learning_rate": 0.0009321052631578948, + "loss": 1.9089, + "step": 129 + }, + { + "epoch": 0.33925049309664695, + "high_lr": 0.0009321052631578948, + "low_lr": 1.8642105263157896e-05, + "step": 129 + }, + { + "epoch": 0.33925049309664695, + "high_lr": 0.0009321052631578948, + "low_lr": 1.8642105263157896e-05, + "step": 129 + }, + { + "epoch": 0.33925049309664695, + "high_lr": 0.0009321052631578948, + "low_lr": 1.8642105263157896e-05, + "step": 129 + }, + { + "epoch": 0.33925049309664695, + "high_lr": 0.0009321052631578948, + "low_lr": 1.8642105263157896e-05, + "step": 129 + }, + { + "epoch": 0.33925049309664695, + "high_lr": 0.0009321052631578948, + "low_lr": 1.8642105263157896e-05, + "step": 129 + }, + { + "epoch": 0.33925049309664695, + "high_lr": 0.0009321052631578948, + "low_lr": 1.8642105263157896e-05, + "step": 129 + }, + { + "epoch": 0.33925049309664695, + "high_lr": 0.0009321052631578948, + "low_lr": 1.8642105263157896e-05, + "step": 129 + }, + { + "epoch": 0.33925049309664695, + "high_lr": 0.0009321052631578948, + "low_lr": 1.8642105263157896e-05, + "step": 129 + }, + { + "epoch": 0.3418803418803419, + "grad_norm": 0.7621733546257019, + "learning_rate": 0.0009315789473684211, + "loss": 1.8731, + "step": 130 + }, + { + "epoch": 0.3418803418803419, + "high_lr": 0.0009315789473684211, + "low_lr": 1.8631578947368424e-05, + "step": 130 + }, + { + "epoch": 0.3418803418803419, + "high_lr": 0.0009315789473684211, + "low_lr": 1.8631578947368424e-05, + "step": 130 + }, + { + "epoch": 0.3418803418803419, + "high_lr": 0.0009315789473684211, + "low_lr": 1.8631578947368424e-05, + "step": 130 + }, + { + "epoch": 0.3418803418803419, + "high_lr": 0.0009315789473684211, + "low_lr": 1.8631578947368424e-05, + "step": 130 + }, + { + "epoch": 0.3418803418803419, + "high_lr": 0.0009315789473684211, + "low_lr": 1.8631578947368424e-05, + "step": 130 + }, + { + "epoch": 0.3418803418803419, + "high_lr": 0.0009315789473684211, + "low_lr": 1.8631578947368424e-05, + "step": 130 + }, + { + "epoch": 0.3418803418803419, + "high_lr": 0.0009315789473684211, + "low_lr": 1.8631578947368424e-05, + "step": 130 + }, + { + "epoch": 0.3418803418803419, + "high_lr": 0.0009315789473684211, + "low_lr": 1.8631578947368424e-05, + "step": 130 + }, + { + "epoch": 0.3445101906640368, + "grad_norm": 0.817223072052002, + "learning_rate": 0.0009310526315789474, + "loss": 1.8612, + "step": 131 + }, + { + "epoch": 0.3445101906640368, + "high_lr": 0.0009310526315789474, + "low_lr": 1.862105263157895e-05, + "step": 131 + }, + { + "epoch": 0.3445101906640368, + "high_lr": 0.0009310526315789474, + "low_lr": 1.862105263157895e-05, + "step": 131 + }, + { + "epoch": 0.3445101906640368, + "high_lr": 0.0009310526315789474, + "low_lr": 1.862105263157895e-05, + "step": 131 + }, + { + "epoch": 0.3445101906640368, + "high_lr": 0.0009310526315789474, + "low_lr": 1.862105263157895e-05, + "step": 131 + }, + { + "epoch": 0.3445101906640368, + "high_lr": 0.0009310526315789474, + "low_lr": 1.862105263157895e-05, + "step": 131 + }, + { + "epoch": 0.3445101906640368, + "high_lr": 0.0009310526315789474, + "low_lr": 1.862105263157895e-05, + "step": 131 + }, + { + "epoch": 0.3445101906640368, + "high_lr": 0.0009310526315789474, + "low_lr": 1.862105263157895e-05, + "step": 131 + }, + { + "epoch": 0.3445101906640368, + "high_lr": 0.0009310526315789474, + "low_lr": 1.862105263157895e-05, + "step": 131 + }, + { + "epoch": 0.34714003944773175, + "grad_norm": 0.8939811587333679, + "learning_rate": 0.0009305263157894737, + "loss": 1.882, + "step": 132 + }, + { + "epoch": 0.34714003944773175, + "high_lr": 0.0009305263157894737, + "low_lr": 1.8610526315789473e-05, + "step": 132 + }, + { + "epoch": 0.34714003944773175, + "high_lr": 0.0009305263157894737, + "low_lr": 1.8610526315789473e-05, + "step": 132 + }, + { + "epoch": 0.34714003944773175, + "high_lr": 0.0009305263157894737, + "low_lr": 1.8610526315789473e-05, + "step": 132 + }, + { + "epoch": 0.34714003944773175, + "high_lr": 0.0009305263157894737, + "low_lr": 1.8610526315789473e-05, + "step": 132 + }, + { + "epoch": 0.34714003944773175, + "high_lr": 0.0009305263157894737, + "low_lr": 1.8610526315789473e-05, + "step": 132 + }, + { + "epoch": 0.34714003944773175, + "high_lr": 0.0009305263157894737, + "low_lr": 1.8610526315789473e-05, + "step": 132 + }, + { + "epoch": 0.34714003944773175, + "high_lr": 0.0009305263157894737, + "low_lr": 1.8610526315789473e-05, + "step": 132 + }, + { + "epoch": 0.34714003944773175, + "high_lr": 0.0009305263157894737, + "low_lr": 1.8610526315789473e-05, + "step": 132 + }, + { + "epoch": 0.3497698882314267, + "grad_norm": 0.6691973209381104, + "learning_rate": 0.00093, + "loss": 1.8966, + "step": 133 + }, + { + "epoch": 0.3497698882314267, + "high_lr": 0.00093, + "low_lr": 1.86e-05, + "step": 133 + }, + { + "epoch": 0.3497698882314267, + "high_lr": 0.00093, + "low_lr": 1.86e-05, + "step": 133 + }, + { + "epoch": 0.3497698882314267, + "high_lr": 0.00093, + "low_lr": 1.86e-05, + "step": 133 + }, + { + "epoch": 0.3497698882314267, + "high_lr": 0.00093, + "low_lr": 1.86e-05, + "step": 133 + }, + { + "epoch": 0.3497698882314267, + "high_lr": 0.00093, + "low_lr": 1.86e-05, + "step": 133 + }, + { + "epoch": 0.3497698882314267, + "high_lr": 0.00093, + "low_lr": 1.86e-05, + "step": 133 + }, + { + "epoch": 0.3497698882314267, + "high_lr": 0.00093, + "low_lr": 1.86e-05, + "step": 133 + }, + { + "epoch": 0.3497698882314267, + "high_lr": 0.00093, + "low_lr": 1.86e-05, + "step": 133 + }, + { + "epoch": 0.3523997370151216, + "grad_norm": 1.2141129970550537, + "learning_rate": 0.0009294736842105263, + "loss": 1.9498, + "step": 134 + }, + { + "epoch": 0.3523997370151216, + "high_lr": 0.0009294736842105263, + "low_lr": 1.858947368421053e-05, + "step": 134 + }, + { + "epoch": 0.3523997370151216, + "high_lr": 0.0009294736842105263, + "low_lr": 1.858947368421053e-05, + "step": 134 + }, + { + "epoch": 0.3523997370151216, + "high_lr": 0.0009294736842105263, + "low_lr": 1.858947368421053e-05, + "step": 134 + }, + { + "epoch": 0.3523997370151216, + "high_lr": 0.0009294736842105263, + "low_lr": 1.858947368421053e-05, + "step": 134 + }, + { + "epoch": 0.3523997370151216, + "high_lr": 0.0009294736842105263, + "low_lr": 1.858947368421053e-05, + "step": 134 + }, + { + "epoch": 0.3523997370151216, + "high_lr": 0.0009294736842105263, + "low_lr": 1.858947368421053e-05, + "step": 134 + }, + { + "epoch": 0.3523997370151216, + "high_lr": 0.0009294736842105263, + "low_lr": 1.858947368421053e-05, + "step": 134 + }, + { + "epoch": 0.3523997370151216, + "high_lr": 0.0009294736842105263, + "low_lr": 1.858947368421053e-05, + "step": 134 + }, + { + "epoch": 0.35502958579881655, + "grad_norm": 0.7771977782249451, + "learning_rate": 0.0009289473684210526, + "loss": 2.0322, + "step": 135 + }, + { + "epoch": 0.35502958579881655, + "high_lr": 0.0009289473684210526, + "low_lr": 1.8578947368421054e-05, + "step": 135 + }, + { + "epoch": 0.35502958579881655, + "high_lr": 0.0009289473684210526, + "low_lr": 1.8578947368421054e-05, + "step": 135 + }, + { + "epoch": 0.35502958579881655, + "high_lr": 0.0009289473684210526, + "low_lr": 1.8578947368421054e-05, + "step": 135 + }, + { + "epoch": 0.35502958579881655, + "high_lr": 0.0009289473684210526, + "low_lr": 1.8578947368421054e-05, + "step": 135 + }, + { + "epoch": 0.35502958579881655, + "high_lr": 0.0009289473684210526, + "low_lr": 1.8578947368421054e-05, + "step": 135 + }, + { + "epoch": 0.35502958579881655, + "high_lr": 0.0009289473684210526, + "low_lr": 1.8578947368421054e-05, + "step": 135 + }, + { + "epoch": 0.35502958579881655, + "high_lr": 0.0009289473684210526, + "low_lr": 1.8578947368421054e-05, + "step": 135 + }, + { + "epoch": 0.35502958579881655, + "high_lr": 0.0009289473684210526, + "low_lr": 1.8578947368421054e-05, + "step": 135 + }, + { + "epoch": 0.3576594345825115, + "grad_norm": 0.7535092830657959, + "learning_rate": 0.0009284210526315789, + "loss": 1.9483, + "step": 136 + }, + { + "epoch": 0.3576594345825115, + "high_lr": 0.0009284210526315789, + "low_lr": 1.856842105263158e-05, + "step": 136 + }, + { + "epoch": 0.3576594345825115, + "high_lr": 0.0009284210526315789, + "low_lr": 1.856842105263158e-05, + "step": 136 + }, + { + "epoch": 0.3576594345825115, + "high_lr": 0.0009284210526315789, + "low_lr": 1.856842105263158e-05, + "step": 136 + }, + { + "epoch": 0.3576594345825115, + "high_lr": 0.0009284210526315789, + "low_lr": 1.856842105263158e-05, + "step": 136 + }, + { + "epoch": 0.3576594345825115, + "high_lr": 0.0009284210526315789, + "low_lr": 1.856842105263158e-05, + "step": 136 + }, + { + "epoch": 0.3576594345825115, + "high_lr": 0.0009284210526315789, + "low_lr": 1.856842105263158e-05, + "step": 136 + }, + { + "epoch": 0.3576594345825115, + "high_lr": 0.0009284210526315789, + "low_lr": 1.856842105263158e-05, + "step": 136 + }, + { + "epoch": 0.3576594345825115, + "high_lr": 0.0009284210526315789, + "low_lr": 1.856842105263158e-05, + "step": 136 + }, + { + "epoch": 0.36028928336620647, + "grad_norm": 0.6840701103210449, + "learning_rate": 0.0009278947368421052, + "loss": 1.8602, + "step": 137 + }, + { + "epoch": 0.36028928336620647, + "high_lr": 0.0009278947368421052, + "low_lr": 1.8557894736842107e-05, + "step": 137 + }, + { + "epoch": 0.36028928336620647, + "high_lr": 0.0009278947368421052, + "low_lr": 1.8557894736842107e-05, + "step": 137 + }, + { + "epoch": 0.36028928336620647, + "high_lr": 0.0009278947368421052, + "low_lr": 1.8557894736842107e-05, + "step": 137 + }, + { + "epoch": 0.36028928336620647, + "high_lr": 0.0009278947368421052, + "low_lr": 1.8557894736842107e-05, + "step": 137 + }, + { + "epoch": 0.36028928336620647, + "high_lr": 0.0009278947368421052, + "low_lr": 1.8557894736842107e-05, + "step": 137 + }, + { + "epoch": 0.36028928336620647, + "high_lr": 0.0009278947368421052, + "low_lr": 1.8557894736842107e-05, + "step": 137 + }, + { + "epoch": 0.36028928336620647, + "high_lr": 0.0009278947368421052, + "low_lr": 1.8557894736842107e-05, + "step": 137 + }, + { + "epoch": 0.36028928336620647, + "high_lr": 0.0009278947368421052, + "low_lr": 1.8557894736842107e-05, + "step": 137 + }, + { + "epoch": 0.3629191321499014, + "grad_norm": 0.6990517973899841, + "learning_rate": 0.0009273684210526316, + "loss": 1.7916, + "step": 138 + }, + { + "epoch": 0.3629191321499014, + "high_lr": 0.0009273684210526316, + "low_lr": 1.8547368421052635e-05, + "step": 138 + }, + { + "epoch": 0.3629191321499014, + "high_lr": 0.0009273684210526316, + "low_lr": 1.8547368421052635e-05, + "step": 138 + }, + { + "epoch": 0.3629191321499014, + "high_lr": 0.0009273684210526316, + "low_lr": 1.8547368421052635e-05, + "step": 138 + }, + { + "epoch": 0.3629191321499014, + "high_lr": 0.0009273684210526316, + "low_lr": 1.8547368421052635e-05, + "step": 138 + }, + { + "epoch": 0.3629191321499014, + "high_lr": 0.0009273684210526316, + "low_lr": 1.8547368421052635e-05, + "step": 138 + }, + { + "epoch": 0.3629191321499014, + "high_lr": 0.0009273684210526316, + "low_lr": 1.8547368421052635e-05, + "step": 138 + }, + { + "epoch": 0.3629191321499014, + "high_lr": 0.0009273684210526316, + "low_lr": 1.8547368421052635e-05, + "step": 138 + }, + { + "epoch": 0.3629191321499014, + "high_lr": 0.0009273684210526316, + "low_lr": 1.8547368421052635e-05, + "step": 138 + }, + { + "epoch": 0.36554898093359633, + "grad_norm": 0.7095828056335449, + "learning_rate": 0.0009268421052631579, + "loss": 1.9012, + "step": 139 + }, + { + "epoch": 0.36554898093359633, + "high_lr": 0.0009268421052631579, + "low_lr": 1.853684210526316e-05, + "step": 139 + }, + { + "epoch": 0.36554898093359633, + "high_lr": 0.0009268421052631579, + "low_lr": 1.853684210526316e-05, + "step": 139 + }, + { + "epoch": 0.36554898093359633, + "high_lr": 0.0009268421052631579, + "low_lr": 1.853684210526316e-05, + "step": 139 + }, + { + "epoch": 0.36554898093359633, + "high_lr": 0.0009268421052631579, + "low_lr": 1.853684210526316e-05, + "step": 139 + }, + { + "epoch": 0.36554898093359633, + "high_lr": 0.0009268421052631579, + "low_lr": 1.853684210526316e-05, + "step": 139 + }, + { + "epoch": 0.36554898093359633, + "high_lr": 0.0009268421052631579, + "low_lr": 1.853684210526316e-05, + "step": 139 + }, + { + "epoch": 0.36554898093359633, + "high_lr": 0.0009268421052631579, + "low_lr": 1.853684210526316e-05, + "step": 139 + }, + { + "epoch": 0.36554898093359633, + "high_lr": 0.0009268421052631579, + "low_lr": 1.853684210526316e-05, + "step": 139 + }, + { + "epoch": 0.36817882971729127, + "grad_norm": 0.6847386360168457, + "learning_rate": 0.0009263157894736843, + "loss": 1.84, + "step": 140 + }, + { + "epoch": 0.36817882971729127, + "high_lr": 0.0009263157894736843, + "low_lr": 1.8526315789473684e-05, + "step": 140 + }, + { + "epoch": 0.36817882971729127, + "high_lr": 0.0009263157894736843, + "low_lr": 1.8526315789473684e-05, + "step": 140 + }, + { + "epoch": 0.36817882971729127, + "high_lr": 0.0009263157894736843, + "low_lr": 1.8526315789473684e-05, + "step": 140 + }, + { + "epoch": 0.36817882971729127, + "high_lr": 0.0009263157894736843, + "low_lr": 1.8526315789473684e-05, + "step": 140 + }, + { + "epoch": 0.36817882971729127, + "high_lr": 0.0009263157894736843, + "low_lr": 1.8526315789473684e-05, + "step": 140 + }, + { + "epoch": 0.36817882971729127, + "high_lr": 0.0009263157894736843, + "low_lr": 1.8526315789473684e-05, + "step": 140 + }, + { + "epoch": 0.36817882971729127, + "high_lr": 0.0009263157894736843, + "low_lr": 1.8526315789473684e-05, + "step": 140 + }, + { + "epoch": 0.36817882971729127, + "high_lr": 0.0009263157894736843, + "low_lr": 1.8526315789473684e-05, + "step": 140 + }, + { + "epoch": 0.3708086785009862, + "grad_norm": 0.7900689840316772, + "learning_rate": 0.0009257894736842106, + "loss": 1.9082, + "step": 141 + }, + { + "epoch": 0.3708086785009862, + "high_lr": 0.0009257894736842106, + "low_lr": 1.8515789473684212e-05, + "step": 141 + }, + { + "epoch": 0.3708086785009862, + "high_lr": 0.0009257894736842106, + "low_lr": 1.8515789473684212e-05, + "step": 141 + }, + { + "epoch": 0.3708086785009862, + "high_lr": 0.0009257894736842106, + "low_lr": 1.8515789473684212e-05, + "step": 141 + }, + { + "epoch": 0.3708086785009862, + "high_lr": 0.0009257894736842106, + "low_lr": 1.8515789473684212e-05, + "step": 141 + }, + { + "epoch": 0.3708086785009862, + "high_lr": 0.0009257894736842106, + "low_lr": 1.8515789473684212e-05, + "step": 141 + }, + { + "epoch": 0.3708086785009862, + "high_lr": 0.0009257894736842106, + "low_lr": 1.8515789473684212e-05, + "step": 141 + }, + { + "epoch": 0.3708086785009862, + "high_lr": 0.0009257894736842106, + "low_lr": 1.8515789473684212e-05, + "step": 141 + }, + { + "epoch": 0.3708086785009862, + "high_lr": 0.0009257894736842106, + "low_lr": 1.8515789473684212e-05, + "step": 141 + }, + { + "epoch": 0.37343852728468113, + "grad_norm": 0.65594482421875, + "learning_rate": 0.0009252631578947368, + "loss": 1.8793, + "step": 142 + }, + { + "epoch": 0.37343852728468113, + "high_lr": 0.0009252631578947368, + "low_lr": 1.8505263157894737e-05, + "step": 142 + }, + { + "epoch": 0.37343852728468113, + "high_lr": 0.0009252631578947368, + "low_lr": 1.8505263157894737e-05, + "step": 142 + }, + { + "epoch": 0.37343852728468113, + "high_lr": 0.0009252631578947368, + "low_lr": 1.8505263157894737e-05, + "step": 142 + }, + { + "epoch": 0.37343852728468113, + "high_lr": 0.0009252631578947368, + "low_lr": 1.8505263157894737e-05, + "step": 142 + }, + { + "epoch": 0.37343852728468113, + "high_lr": 0.0009252631578947368, + "low_lr": 1.8505263157894737e-05, + "step": 142 + }, + { + "epoch": 0.37343852728468113, + "high_lr": 0.0009252631578947368, + "low_lr": 1.8505263157894737e-05, + "step": 142 + }, + { + "epoch": 0.37343852728468113, + "high_lr": 0.0009252631578947368, + "low_lr": 1.8505263157894737e-05, + "step": 142 + }, + { + "epoch": 0.37343852728468113, + "high_lr": 0.0009252631578947368, + "low_lr": 1.8505263157894737e-05, + "step": 142 + }, + { + "epoch": 0.37606837606837606, + "grad_norm": 0.7136049866676331, + "learning_rate": 0.0009247368421052632, + "loss": 1.9126, + "step": 143 + }, + { + "epoch": 0.37606837606837606, + "high_lr": 0.0009247368421052632, + "low_lr": 1.8494736842105265e-05, + "step": 143 + }, + { + "epoch": 0.37606837606837606, + "high_lr": 0.0009247368421052632, + "low_lr": 1.8494736842105265e-05, + "step": 143 + }, + { + "epoch": 0.37606837606837606, + "high_lr": 0.0009247368421052632, + "low_lr": 1.8494736842105265e-05, + "step": 143 + }, + { + "epoch": 0.37606837606837606, + "high_lr": 0.0009247368421052632, + "low_lr": 1.8494736842105265e-05, + "step": 143 + }, + { + "epoch": 0.37606837606837606, + "high_lr": 0.0009247368421052632, + "low_lr": 1.8494736842105265e-05, + "step": 143 + }, + { + "epoch": 0.37606837606837606, + "high_lr": 0.0009247368421052632, + "low_lr": 1.8494736842105265e-05, + "step": 143 + }, + { + "epoch": 0.37606837606837606, + "high_lr": 0.0009247368421052632, + "low_lr": 1.8494736842105265e-05, + "step": 143 + }, + { + "epoch": 0.37606837606837606, + "high_lr": 0.0009247368421052632, + "low_lr": 1.8494736842105265e-05, + "step": 143 + }, + { + "epoch": 0.378698224852071, + "grad_norm": 0.7377094030380249, + "learning_rate": 0.0009242105263157895, + "loss": 1.8829, + "step": 144 + }, + { + "epoch": 0.378698224852071, + "high_lr": 0.0009242105263157895, + "low_lr": 1.8484210526315793e-05, + "step": 144 + }, + { + "epoch": 0.378698224852071, + "high_lr": 0.0009242105263157895, + "low_lr": 1.8484210526315793e-05, + "step": 144 + }, + { + "epoch": 0.378698224852071, + "high_lr": 0.0009242105263157895, + "low_lr": 1.8484210526315793e-05, + "step": 144 + }, + { + "epoch": 0.378698224852071, + "high_lr": 0.0009242105263157895, + "low_lr": 1.8484210526315793e-05, + "step": 144 + }, + { + "epoch": 0.378698224852071, + "high_lr": 0.0009242105263157895, + "low_lr": 1.8484210526315793e-05, + "step": 144 + }, + { + "epoch": 0.378698224852071, + "high_lr": 0.0009242105263157895, + "low_lr": 1.8484210526315793e-05, + "step": 144 + }, + { + "epoch": 0.378698224852071, + "high_lr": 0.0009242105263157895, + "low_lr": 1.8484210526315793e-05, + "step": 144 + }, + { + "epoch": 0.378698224852071, + "high_lr": 0.0009242105263157895, + "low_lr": 1.8484210526315793e-05, + "step": 144 + }, + { + "epoch": 0.38132807363576593, + "grad_norm": 0.8433911800384521, + "learning_rate": 0.0009236842105263158, + "loss": 1.9184, + "step": 145 + }, + { + "epoch": 0.38132807363576593, + "high_lr": 0.0009236842105263158, + "low_lr": 1.8473684210526317e-05, + "step": 145 + }, + { + "epoch": 0.38132807363576593, + "high_lr": 0.0009236842105263158, + "low_lr": 1.8473684210526317e-05, + "step": 145 + }, + { + "epoch": 0.38132807363576593, + "high_lr": 0.0009236842105263158, + "low_lr": 1.8473684210526317e-05, + "step": 145 + }, + { + "epoch": 0.38132807363576593, + "high_lr": 0.0009236842105263158, + "low_lr": 1.8473684210526317e-05, + "step": 145 + }, + { + "epoch": 0.38132807363576593, + "high_lr": 0.0009236842105263158, + "low_lr": 1.8473684210526317e-05, + "step": 145 + }, + { + "epoch": 0.38132807363576593, + "high_lr": 0.0009236842105263158, + "low_lr": 1.8473684210526317e-05, + "step": 145 + }, + { + "epoch": 0.38132807363576593, + "high_lr": 0.0009236842105263158, + "low_lr": 1.8473684210526317e-05, + "step": 145 + }, + { + "epoch": 0.38132807363576593, + "high_lr": 0.0009236842105263158, + "low_lr": 1.8473684210526317e-05, + "step": 145 + }, + { + "epoch": 0.38395792241946086, + "grad_norm": 0.7446521520614624, + "learning_rate": 0.0009231578947368421, + "loss": 1.9224, + "step": 146 + }, + { + "epoch": 0.38395792241946086, + "high_lr": 0.0009231578947368421, + "low_lr": 1.8463157894736842e-05, + "step": 146 + }, + { + "epoch": 0.38395792241946086, + "high_lr": 0.0009231578947368421, + "low_lr": 1.8463157894736842e-05, + "step": 146 + }, + { + "epoch": 0.38395792241946086, + "high_lr": 0.0009231578947368421, + "low_lr": 1.8463157894736842e-05, + "step": 146 + }, + { + "epoch": 0.38395792241946086, + "high_lr": 0.0009231578947368421, + "low_lr": 1.8463157894736842e-05, + "step": 146 + }, + { + "epoch": 0.38395792241946086, + "high_lr": 0.0009231578947368421, + "low_lr": 1.8463157894736842e-05, + "step": 146 + }, + { + "epoch": 0.38395792241946086, + "high_lr": 0.0009231578947368421, + "low_lr": 1.8463157894736842e-05, + "step": 146 + }, + { + "epoch": 0.38395792241946086, + "high_lr": 0.0009231578947368421, + "low_lr": 1.8463157894736842e-05, + "step": 146 + }, + { + "epoch": 0.38395792241946086, + "high_lr": 0.0009231578947368421, + "low_lr": 1.8463157894736842e-05, + "step": 146 + }, + { + "epoch": 0.3865877712031558, + "grad_norm": 0.766181230545044, + "learning_rate": 0.0009226315789473685, + "loss": 1.9315, + "step": 147 + }, + { + "epoch": 0.3865877712031558, + "high_lr": 0.0009226315789473685, + "low_lr": 1.845263157894737e-05, + "step": 147 + }, + { + "epoch": 0.3865877712031558, + "high_lr": 0.0009226315789473685, + "low_lr": 1.845263157894737e-05, + "step": 147 + }, + { + "epoch": 0.3865877712031558, + "high_lr": 0.0009226315789473685, + "low_lr": 1.845263157894737e-05, + "step": 147 + }, + { + "epoch": 0.3865877712031558, + "high_lr": 0.0009226315789473685, + "low_lr": 1.845263157894737e-05, + "step": 147 + }, + { + "epoch": 0.3865877712031558, + "high_lr": 0.0009226315789473685, + "low_lr": 1.845263157894737e-05, + "step": 147 + }, + { + "epoch": 0.3865877712031558, + "high_lr": 0.0009226315789473685, + "low_lr": 1.845263157894737e-05, + "step": 147 + }, + { + "epoch": 0.3865877712031558, + "high_lr": 0.0009226315789473685, + "low_lr": 1.845263157894737e-05, + "step": 147 + }, + { + "epoch": 0.3865877712031558, + "high_lr": 0.0009226315789473685, + "low_lr": 1.845263157894737e-05, + "step": 147 + }, + { + "epoch": 0.3892176199868508, + "grad_norm": 0.7001948952674866, + "learning_rate": 0.0009221052631578948, + "loss": 1.8632, + "step": 148 + }, + { + "epoch": 0.3892176199868508, + "high_lr": 0.0009221052631578948, + "low_lr": 1.8442105263157898e-05, + "step": 148 + }, + { + "epoch": 0.3892176199868508, + "high_lr": 0.0009221052631578948, + "low_lr": 1.8442105263157898e-05, + "step": 148 + }, + { + "epoch": 0.3892176199868508, + "high_lr": 0.0009221052631578948, + "low_lr": 1.8442105263157898e-05, + "step": 148 + }, + { + "epoch": 0.3892176199868508, + "high_lr": 0.0009221052631578948, + "low_lr": 1.8442105263157898e-05, + "step": 148 + }, + { + "epoch": 0.3892176199868508, + "high_lr": 0.0009221052631578948, + "low_lr": 1.8442105263157898e-05, + "step": 148 + }, + { + "epoch": 0.3892176199868508, + "high_lr": 0.0009221052631578948, + "low_lr": 1.8442105263157898e-05, + "step": 148 + }, + { + "epoch": 0.3892176199868508, + "high_lr": 0.0009221052631578948, + "low_lr": 1.8442105263157898e-05, + "step": 148 + }, + { + "epoch": 0.3892176199868508, + "high_lr": 0.0009221052631578948, + "low_lr": 1.8442105263157898e-05, + "step": 148 + }, + { + "epoch": 0.3918474687705457, + "grad_norm": 0.6942868232727051, + "learning_rate": 0.0009215789473684211, + "loss": 1.9002, + "step": 149 + }, + { + "epoch": 0.3918474687705457, + "high_lr": 0.0009215789473684211, + "low_lr": 1.8431578947368423e-05, + "step": 149 + }, + { + "epoch": 0.3918474687705457, + "high_lr": 0.0009215789473684211, + "low_lr": 1.8431578947368423e-05, + "step": 149 + }, + { + "epoch": 0.3918474687705457, + "high_lr": 0.0009215789473684211, + "low_lr": 1.8431578947368423e-05, + "step": 149 + }, + { + "epoch": 0.3918474687705457, + "high_lr": 0.0009215789473684211, + "low_lr": 1.8431578947368423e-05, + "step": 149 + }, + { + "epoch": 0.3918474687705457, + "high_lr": 0.0009215789473684211, + "low_lr": 1.8431578947368423e-05, + "step": 149 + }, + { + "epoch": 0.3918474687705457, + "high_lr": 0.0009215789473684211, + "low_lr": 1.8431578947368423e-05, + "step": 149 + }, + { + "epoch": 0.3918474687705457, + "high_lr": 0.0009215789473684211, + "low_lr": 1.8431578947368423e-05, + "step": 149 + }, + { + "epoch": 0.3918474687705457, + "high_lr": 0.0009215789473684211, + "low_lr": 1.8431578947368423e-05, + "step": 149 + }, + { + "epoch": 0.39447731755424065, + "grad_norm": 1.636558175086975, + "learning_rate": 0.0009210526315789473, + "loss": 1.9456, + "step": 150 + }, + { + "epoch": 0.39447731755424065, + "high_lr": 0.0009210526315789473, + "low_lr": 1.8421052631578947e-05, + "step": 150 + }, + { + "epoch": 0.39447731755424065, + "high_lr": 0.0009210526315789473, + "low_lr": 1.8421052631578947e-05, + "step": 150 + }, + { + "epoch": 0.39447731755424065, + "high_lr": 0.0009210526315789473, + "low_lr": 1.8421052631578947e-05, + "step": 150 + }, + { + "epoch": 0.39447731755424065, + "high_lr": 0.0009210526315789473, + "low_lr": 1.8421052631578947e-05, + "step": 150 + }, + { + "epoch": 0.39447731755424065, + "high_lr": 0.0009210526315789473, + "low_lr": 1.8421052631578947e-05, + "step": 150 + }, + { + "epoch": 0.39447731755424065, + "high_lr": 0.0009210526315789473, + "low_lr": 1.8421052631578947e-05, + "step": 150 + }, + { + "epoch": 0.39447731755424065, + "high_lr": 0.0009210526315789473, + "low_lr": 1.8421052631578947e-05, + "step": 150 + }, + { + "epoch": 0.39447731755424065, + "high_lr": 0.0009210526315789473, + "low_lr": 1.8421052631578947e-05, + "step": 150 + }, + { + "epoch": 0.3971071663379356, + "grad_norm": 0.7493187785148621, + "learning_rate": 0.0009205263157894736, + "loss": 1.8301, + "step": 151 + }, + { + "epoch": 0.3971071663379356, + "high_lr": 0.0009205263157894736, + "low_lr": 1.8410526315789475e-05, + "step": 151 + }, + { + "epoch": 0.3971071663379356, + "high_lr": 0.0009205263157894736, + "low_lr": 1.8410526315789475e-05, + "step": 151 + }, + { + "epoch": 0.3971071663379356, + "high_lr": 0.0009205263157894736, + "low_lr": 1.8410526315789475e-05, + "step": 151 + }, + { + "epoch": 0.3971071663379356, + "high_lr": 0.0009205263157894736, + "low_lr": 1.8410526315789475e-05, + "step": 151 + }, + { + "epoch": 0.3971071663379356, + "high_lr": 0.0009205263157894736, + "low_lr": 1.8410526315789475e-05, + "step": 151 + }, + { + "epoch": 0.3971071663379356, + "high_lr": 0.0009205263157894736, + "low_lr": 1.8410526315789475e-05, + "step": 151 + }, + { + "epoch": 0.3971071663379356, + "high_lr": 0.0009205263157894736, + "low_lr": 1.8410526315789475e-05, + "step": 151 + }, + { + "epoch": 0.3971071663379356, + "high_lr": 0.0009205263157894736, + "low_lr": 1.8410526315789475e-05, + "step": 151 + }, + { + "epoch": 0.3997370151216305, + "grad_norm": 0.7495818734169006, + "learning_rate": 0.00092, + "loss": 1.88, + "step": 152 + }, + { + "epoch": 0.3997370151216305, + "high_lr": 0.00092, + "low_lr": 1.8400000000000003e-05, + "step": 152 + }, + { + "epoch": 0.3997370151216305, + "high_lr": 0.00092, + "low_lr": 1.8400000000000003e-05, + "step": 152 + }, + { + "epoch": 0.3997370151216305, + "high_lr": 0.00092, + "low_lr": 1.8400000000000003e-05, + "step": 152 + }, + { + "epoch": 0.3997370151216305, + "high_lr": 0.00092, + "low_lr": 1.8400000000000003e-05, + "step": 152 + }, + { + "epoch": 0.3997370151216305, + "high_lr": 0.00092, + "low_lr": 1.8400000000000003e-05, + "step": 152 + }, + { + "epoch": 0.3997370151216305, + "high_lr": 0.00092, + "low_lr": 1.8400000000000003e-05, + "step": 152 + }, + { + "epoch": 0.3997370151216305, + "high_lr": 0.00092, + "low_lr": 1.8400000000000003e-05, + "step": 152 + }, + { + "epoch": 0.3997370151216305, + "high_lr": 0.00092, + "low_lr": 1.8400000000000003e-05, + "step": 152 + }, + { + "epoch": 0.40236686390532544, + "grad_norm": 0.7916814684867859, + "learning_rate": 0.0009194736842105263, + "loss": 1.8471, + "step": 153 + }, + { + "epoch": 0.40236686390532544, + "high_lr": 0.0009194736842105263, + "low_lr": 1.8389473684210528e-05, + "step": 153 + }, + { + "epoch": 0.40236686390532544, + "high_lr": 0.0009194736842105263, + "low_lr": 1.8389473684210528e-05, + "step": 153 + }, + { + "epoch": 0.40236686390532544, + "high_lr": 0.0009194736842105263, + "low_lr": 1.8389473684210528e-05, + "step": 153 + }, + { + "epoch": 0.40236686390532544, + "high_lr": 0.0009194736842105263, + "low_lr": 1.8389473684210528e-05, + "step": 153 + }, + { + "epoch": 0.40236686390532544, + "high_lr": 0.0009194736842105263, + "low_lr": 1.8389473684210528e-05, + "step": 153 + }, + { + "epoch": 0.40236686390532544, + "high_lr": 0.0009194736842105263, + "low_lr": 1.8389473684210528e-05, + "step": 153 + }, + { + "epoch": 0.40236686390532544, + "high_lr": 0.0009194736842105263, + "low_lr": 1.8389473684210528e-05, + "step": 153 + }, + { + "epoch": 0.40236686390532544, + "high_lr": 0.0009194736842105263, + "low_lr": 1.8389473684210528e-05, + "step": 153 + }, + { + "epoch": 0.4049967126890204, + "grad_norm": 0.7696760296821594, + "learning_rate": 0.0009189473684210526, + "loss": 1.9551, + "step": 154 + }, + { + "epoch": 0.4049967126890204, + "high_lr": 0.0009189473684210526, + "low_lr": 1.8378947368421053e-05, + "step": 154 + }, + { + "epoch": 0.4049967126890204, + "high_lr": 0.0009189473684210526, + "low_lr": 1.8378947368421053e-05, + "step": 154 + }, + { + "epoch": 0.4049967126890204, + "high_lr": 0.0009189473684210526, + "low_lr": 1.8378947368421053e-05, + "step": 154 + }, + { + "epoch": 0.4049967126890204, + "high_lr": 0.0009189473684210526, + "low_lr": 1.8378947368421053e-05, + "step": 154 + }, + { + "epoch": 0.4049967126890204, + "high_lr": 0.0009189473684210526, + "low_lr": 1.8378947368421053e-05, + "step": 154 + }, + { + "epoch": 0.4049967126890204, + "high_lr": 0.0009189473684210526, + "low_lr": 1.8378947368421053e-05, + "step": 154 + }, + { + "epoch": 0.4049967126890204, + "high_lr": 0.0009189473684210526, + "low_lr": 1.8378947368421053e-05, + "step": 154 + }, + { + "epoch": 0.4049967126890204, + "high_lr": 0.0009189473684210526, + "low_lr": 1.8378947368421053e-05, + "step": 154 + }, + { + "epoch": 0.4076265614727153, + "grad_norm": 0.714638352394104, + "learning_rate": 0.0009184210526315789, + "loss": 1.8951, + "step": 155 + }, + { + "epoch": 0.4076265614727153, + "high_lr": 0.0009184210526315789, + "low_lr": 1.836842105263158e-05, + "step": 155 + }, + { + "epoch": 0.4076265614727153, + "high_lr": 0.0009184210526315789, + "low_lr": 1.836842105263158e-05, + "step": 155 + }, + { + "epoch": 0.4076265614727153, + "high_lr": 0.0009184210526315789, + "low_lr": 1.836842105263158e-05, + "step": 155 + }, + { + "epoch": 0.4076265614727153, + "high_lr": 0.0009184210526315789, + "low_lr": 1.836842105263158e-05, + "step": 155 + }, + { + "epoch": 0.4076265614727153, + "high_lr": 0.0009184210526315789, + "low_lr": 1.836842105263158e-05, + "step": 155 + }, + { + "epoch": 0.4076265614727153, + "high_lr": 0.0009184210526315789, + "low_lr": 1.836842105263158e-05, + "step": 155 + }, + { + "epoch": 0.4076265614727153, + "high_lr": 0.0009184210526315789, + "low_lr": 1.836842105263158e-05, + "step": 155 + }, + { + "epoch": 0.4076265614727153, + "high_lr": 0.0009184210526315789, + "low_lr": 1.836842105263158e-05, + "step": 155 + }, + { + "epoch": 0.41025641025641024, + "grad_norm": 0.7288228869438171, + "learning_rate": 0.0009178947368421053, + "loss": 1.8497, + "step": 156 + }, + { + "epoch": 0.41025641025641024, + "high_lr": 0.0009178947368421053, + "low_lr": 1.8357894736842105e-05, + "step": 156 + }, + { + "epoch": 0.41025641025641024, + "high_lr": 0.0009178947368421053, + "low_lr": 1.8357894736842105e-05, + "step": 156 + }, + { + "epoch": 0.41025641025641024, + "high_lr": 0.0009178947368421053, + "low_lr": 1.8357894736842105e-05, + "step": 156 + }, + { + "epoch": 0.41025641025641024, + "high_lr": 0.0009178947368421053, + "low_lr": 1.8357894736842105e-05, + "step": 156 + }, + { + "epoch": 0.41025641025641024, + "high_lr": 0.0009178947368421053, + "low_lr": 1.8357894736842105e-05, + "step": 156 + }, + { + "epoch": 0.41025641025641024, + "high_lr": 0.0009178947368421053, + "low_lr": 1.8357894736842105e-05, + "step": 156 + }, + { + "epoch": 0.41025641025641024, + "high_lr": 0.0009178947368421053, + "low_lr": 1.8357894736842105e-05, + "step": 156 + }, + { + "epoch": 0.41025641025641024, + "high_lr": 0.0009178947368421053, + "low_lr": 1.8357894736842105e-05, + "step": 156 + }, + { + "epoch": 0.4128862590401052, + "grad_norm": 0.7588838338851929, + "learning_rate": 0.0009173684210526317, + "loss": 1.8814, + "step": 157 + }, + { + "epoch": 0.4128862590401052, + "high_lr": 0.0009173684210526317, + "low_lr": 1.8347368421052633e-05, + "step": 157 + }, + { + "epoch": 0.4128862590401052, + "high_lr": 0.0009173684210526317, + "low_lr": 1.8347368421052633e-05, + "step": 157 + }, + { + "epoch": 0.4128862590401052, + "high_lr": 0.0009173684210526317, + "low_lr": 1.8347368421052633e-05, + "step": 157 + }, + { + "epoch": 0.4128862590401052, + "high_lr": 0.0009173684210526317, + "low_lr": 1.8347368421052633e-05, + "step": 157 + }, + { + "epoch": 0.4128862590401052, + "high_lr": 0.0009173684210526317, + "low_lr": 1.8347368421052633e-05, + "step": 157 + }, + { + "epoch": 0.4128862590401052, + "high_lr": 0.0009173684210526317, + "low_lr": 1.8347368421052633e-05, + "step": 157 + }, + { + "epoch": 0.4128862590401052, + "high_lr": 0.0009173684210526317, + "low_lr": 1.8347368421052633e-05, + "step": 157 + }, + { + "epoch": 0.4128862590401052, + "high_lr": 0.0009173684210526317, + "low_lr": 1.8347368421052633e-05, + "step": 157 + }, + { + "epoch": 0.4155161078238001, + "grad_norm": 0.7453787326812744, + "learning_rate": 0.000916842105263158, + "loss": 1.8727, + "step": 158 + }, + { + "epoch": 0.4155161078238001, + "high_lr": 0.000916842105263158, + "low_lr": 1.8336842105263158e-05, + "step": 158 + }, + { + "epoch": 0.4155161078238001, + "high_lr": 0.000916842105263158, + "low_lr": 1.8336842105263158e-05, + "step": 158 + }, + { + "epoch": 0.4155161078238001, + "high_lr": 0.000916842105263158, + "low_lr": 1.8336842105263158e-05, + "step": 158 + }, + { + "epoch": 0.4155161078238001, + "high_lr": 0.000916842105263158, + "low_lr": 1.8336842105263158e-05, + "step": 158 + }, + { + "epoch": 0.4155161078238001, + "high_lr": 0.000916842105263158, + "low_lr": 1.8336842105263158e-05, + "step": 158 + }, + { + "epoch": 0.4155161078238001, + "high_lr": 0.000916842105263158, + "low_lr": 1.8336842105263158e-05, + "step": 158 + }, + { + "epoch": 0.4155161078238001, + "high_lr": 0.000916842105263158, + "low_lr": 1.8336842105263158e-05, + "step": 158 + }, + { + "epoch": 0.4155161078238001, + "high_lr": 0.000916842105263158, + "low_lr": 1.8336842105263158e-05, + "step": 158 + }, + { + "epoch": 0.4181459566074951, + "grad_norm": 0.6738296151161194, + "learning_rate": 0.0009163157894736842, + "loss": 1.7921, + "step": 159 + }, + { + "epoch": 0.4181459566074951, + "high_lr": 0.0009163157894736842, + "low_lr": 1.8326315789473686e-05, + "step": 159 + }, + { + "epoch": 0.4181459566074951, + "high_lr": 0.0009163157894736842, + "low_lr": 1.8326315789473686e-05, + "step": 159 + }, + { + "epoch": 0.4181459566074951, + "high_lr": 0.0009163157894736842, + "low_lr": 1.8326315789473686e-05, + "step": 159 + }, + { + "epoch": 0.4181459566074951, + "high_lr": 0.0009163157894736842, + "low_lr": 1.8326315789473686e-05, + "step": 159 + }, + { + "epoch": 0.4181459566074951, + "high_lr": 0.0009163157894736842, + "low_lr": 1.8326315789473686e-05, + "step": 159 + }, + { + "epoch": 0.4181459566074951, + "high_lr": 0.0009163157894736842, + "low_lr": 1.8326315789473686e-05, + "step": 159 + }, + { + "epoch": 0.4181459566074951, + "high_lr": 0.0009163157894736842, + "low_lr": 1.8326315789473686e-05, + "step": 159 + }, + { + "epoch": 0.4181459566074951, + "high_lr": 0.0009163157894736842, + "low_lr": 1.8326315789473686e-05, + "step": 159 + }, + { + "epoch": 0.42077580539119, + "grad_norm": 0.6959648132324219, + "learning_rate": 0.0009157894736842105, + "loss": 1.8321, + "step": 160 + }, + { + "epoch": 0.42077580539119, + "high_lr": 0.0009157894736842105, + "low_lr": 1.831578947368421e-05, + "step": 160 + }, + { + "epoch": 0.42077580539119, + "high_lr": 0.0009157894736842105, + "low_lr": 1.831578947368421e-05, + "step": 160 + }, + { + "epoch": 0.42077580539119, + "high_lr": 0.0009157894736842105, + "low_lr": 1.831578947368421e-05, + "step": 160 + }, + { + "epoch": 0.42077580539119, + "high_lr": 0.0009157894736842105, + "low_lr": 1.831578947368421e-05, + "step": 160 + }, + { + "epoch": 0.42077580539119, + "high_lr": 0.0009157894736842105, + "low_lr": 1.831578947368421e-05, + "step": 160 + }, + { + "epoch": 0.42077580539119, + "high_lr": 0.0009157894736842105, + "low_lr": 1.831578947368421e-05, + "step": 160 + }, + { + "epoch": 0.42077580539119, + "high_lr": 0.0009157894736842105, + "low_lr": 1.831578947368421e-05, + "step": 160 + }, + { + "epoch": 0.42077580539119, + "high_lr": 0.0009157894736842105, + "low_lr": 1.831578947368421e-05, + "step": 160 + }, + { + "epoch": 0.42340565417488496, + "grad_norm": 0.7143363356590271, + "learning_rate": 0.0009152631578947369, + "loss": 1.8262, + "step": 161 + }, + { + "epoch": 0.42340565417488496, + "high_lr": 0.0009152631578947369, + "low_lr": 1.830526315789474e-05, + "step": 161 + }, + { + "epoch": 0.42340565417488496, + "high_lr": 0.0009152631578947369, + "low_lr": 1.830526315789474e-05, + "step": 161 + }, + { + "epoch": 0.42340565417488496, + "high_lr": 0.0009152631578947369, + "low_lr": 1.830526315789474e-05, + "step": 161 + }, + { + "epoch": 0.42340565417488496, + "high_lr": 0.0009152631578947369, + "low_lr": 1.830526315789474e-05, + "step": 161 + }, + { + "epoch": 0.42340565417488496, + "high_lr": 0.0009152631578947369, + "low_lr": 1.830526315789474e-05, + "step": 161 + }, + { + "epoch": 0.42340565417488496, + "high_lr": 0.0009152631578947369, + "low_lr": 1.830526315789474e-05, + "step": 161 + }, + { + "epoch": 0.42340565417488496, + "high_lr": 0.0009152631578947369, + "low_lr": 1.830526315789474e-05, + "step": 161 + }, + { + "epoch": 0.42340565417488496, + "high_lr": 0.0009152631578947369, + "low_lr": 1.830526315789474e-05, + "step": 161 + }, + { + "epoch": 0.4260355029585799, + "grad_norm": 0.7190316319465637, + "learning_rate": 0.0009147368421052632, + "loss": 1.7782, + "step": 162 + }, + { + "epoch": 0.4260355029585799, + "high_lr": 0.0009147368421052632, + "low_lr": 1.8294736842105267e-05, + "step": 162 + }, + { + "epoch": 0.4260355029585799, + "high_lr": 0.0009147368421052632, + "low_lr": 1.8294736842105267e-05, + "step": 162 + }, + { + "epoch": 0.4260355029585799, + "high_lr": 0.0009147368421052632, + "low_lr": 1.8294736842105267e-05, + "step": 162 + }, + { + "epoch": 0.4260355029585799, + "high_lr": 0.0009147368421052632, + "low_lr": 1.8294736842105267e-05, + "step": 162 + }, + { + "epoch": 0.4260355029585799, + "high_lr": 0.0009147368421052632, + "low_lr": 1.8294736842105267e-05, + "step": 162 + }, + { + "epoch": 0.4260355029585799, + "high_lr": 0.0009147368421052632, + "low_lr": 1.8294736842105267e-05, + "step": 162 + }, + { + "epoch": 0.4260355029585799, + "high_lr": 0.0009147368421052632, + "low_lr": 1.8294736842105267e-05, + "step": 162 + }, + { + "epoch": 0.4260355029585799, + "high_lr": 0.0009147368421052632, + "low_lr": 1.8294736842105267e-05, + "step": 162 + }, + { + "epoch": 0.4286653517422748, + "grad_norm": 0.7058752179145813, + "learning_rate": 0.0009142105263157895, + "loss": 1.8556, + "step": 163 + }, + { + "epoch": 0.4286653517422748, + "high_lr": 0.0009142105263157895, + "low_lr": 1.828421052631579e-05, + "step": 163 + }, + { + "epoch": 0.4286653517422748, + "high_lr": 0.0009142105263157895, + "low_lr": 1.828421052631579e-05, + "step": 163 + }, + { + "epoch": 0.4286653517422748, + "high_lr": 0.0009142105263157895, + "low_lr": 1.828421052631579e-05, + "step": 163 + }, + { + "epoch": 0.4286653517422748, + "high_lr": 0.0009142105263157895, + "low_lr": 1.828421052631579e-05, + "step": 163 + }, + { + "epoch": 0.4286653517422748, + "high_lr": 0.0009142105263157895, + "low_lr": 1.828421052631579e-05, + "step": 163 + }, + { + "epoch": 0.4286653517422748, + "high_lr": 0.0009142105263157895, + "low_lr": 1.828421052631579e-05, + "step": 163 + }, + { + "epoch": 0.4286653517422748, + "high_lr": 0.0009142105263157895, + "low_lr": 1.828421052631579e-05, + "step": 163 + }, + { + "epoch": 0.4286653517422748, + "high_lr": 0.0009142105263157895, + "low_lr": 1.828421052631579e-05, + "step": 163 + }, + { + "epoch": 0.43129520052596976, + "grad_norm": 0.7939090132713318, + "learning_rate": 0.0009136842105263158, + "loss": 1.8973, + "step": 164 + }, + { + "epoch": 0.43129520052596976, + "high_lr": 0.0009136842105263158, + "low_lr": 1.8273684210526316e-05, + "step": 164 + }, + { + "epoch": 0.43129520052596976, + "high_lr": 0.0009136842105263158, + "low_lr": 1.8273684210526316e-05, + "step": 164 + }, + { + "epoch": 0.43129520052596976, + "high_lr": 0.0009136842105263158, + "low_lr": 1.8273684210526316e-05, + "step": 164 + }, + { + "epoch": 0.43129520052596976, + "high_lr": 0.0009136842105263158, + "low_lr": 1.8273684210526316e-05, + "step": 164 + }, + { + "epoch": 0.43129520052596976, + "high_lr": 0.0009136842105263158, + "low_lr": 1.8273684210526316e-05, + "step": 164 + }, + { + "epoch": 0.43129520052596976, + "high_lr": 0.0009136842105263158, + "low_lr": 1.8273684210526316e-05, + "step": 164 + }, + { + "epoch": 0.43129520052596976, + "high_lr": 0.0009136842105263158, + "low_lr": 1.8273684210526316e-05, + "step": 164 + }, + { + "epoch": 0.43129520052596976, + "high_lr": 0.0009136842105263158, + "low_lr": 1.8273684210526316e-05, + "step": 164 + }, + { + "epoch": 0.4339250493096647, + "grad_norm": 0.719814121723175, + "learning_rate": 0.0009131578947368421, + "loss": 1.8264, + "step": 165 + }, + { + "epoch": 0.4339250493096647, + "high_lr": 0.0009131578947368421, + "low_lr": 1.8263157894736844e-05, + "step": 165 + }, + { + "epoch": 0.4339250493096647, + "high_lr": 0.0009131578947368421, + "low_lr": 1.8263157894736844e-05, + "step": 165 + }, + { + "epoch": 0.4339250493096647, + "high_lr": 0.0009131578947368421, + "low_lr": 1.8263157894736844e-05, + "step": 165 + }, + { + "epoch": 0.4339250493096647, + "high_lr": 0.0009131578947368421, + "low_lr": 1.8263157894736844e-05, + "step": 165 + }, + { + "epoch": 0.4339250493096647, + "high_lr": 0.0009131578947368421, + "low_lr": 1.8263157894736844e-05, + "step": 165 + }, + { + "epoch": 0.4339250493096647, + "high_lr": 0.0009131578947368421, + "low_lr": 1.8263157894736844e-05, + "step": 165 + }, + { + "epoch": 0.4339250493096647, + "high_lr": 0.0009131578947368421, + "low_lr": 1.8263157894736844e-05, + "step": 165 + }, + { + "epoch": 0.4339250493096647, + "high_lr": 0.0009131578947368421, + "low_lr": 1.8263157894736844e-05, + "step": 165 + }, + { + "epoch": 0.4365548980933596, + "grad_norm": 0.7035220265388489, + "learning_rate": 0.0009126315789473685, + "loss": 1.805, + "step": 166 + }, + { + "epoch": 0.4365548980933596, + "high_lr": 0.0009126315789473685, + "low_lr": 1.8252631578947372e-05, + "step": 166 + }, + { + "epoch": 0.4365548980933596, + "high_lr": 0.0009126315789473685, + "low_lr": 1.8252631578947372e-05, + "step": 166 + }, + { + "epoch": 0.4365548980933596, + "high_lr": 0.0009126315789473685, + "low_lr": 1.8252631578947372e-05, + "step": 166 + }, + { + "epoch": 0.4365548980933596, + "high_lr": 0.0009126315789473685, + "low_lr": 1.8252631578947372e-05, + "step": 166 + }, + { + "epoch": 0.4365548980933596, + "high_lr": 0.0009126315789473685, + "low_lr": 1.8252631578947372e-05, + "step": 166 + }, + { + "epoch": 0.4365548980933596, + "high_lr": 0.0009126315789473685, + "low_lr": 1.8252631578947372e-05, + "step": 166 + }, + { + "epoch": 0.4365548980933596, + "high_lr": 0.0009126315789473685, + "low_lr": 1.8252631578947372e-05, + "step": 166 + }, + { + "epoch": 0.4365548980933596, + "high_lr": 0.0009126315789473685, + "low_lr": 1.8252631578947372e-05, + "step": 166 + }, + { + "epoch": 0.43918474687705455, + "grad_norm": 0.7253255248069763, + "learning_rate": 0.0009121052631578947, + "loss": 1.809, + "step": 167 + }, + { + "epoch": 0.43918474687705455, + "high_lr": 0.0009121052631578947, + "low_lr": 1.8242105263157897e-05, + "step": 167 + }, + { + "epoch": 0.43918474687705455, + "high_lr": 0.0009121052631578947, + "low_lr": 1.8242105263157897e-05, + "step": 167 + }, + { + "epoch": 0.43918474687705455, + "high_lr": 0.0009121052631578947, + "low_lr": 1.8242105263157897e-05, + "step": 167 + }, + { + "epoch": 0.43918474687705455, + "high_lr": 0.0009121052631578947, + "low_lr": 1.8242105263157897e-05, + "step": 167 + }, + { + "epoch": 0.43918474687705455, + "high_lr": 0.0009121052631578947, + "low_lr": 1.8242105263157897e-05, + "step": 167 + }, + { + "epoch": 0.43918474687705455, + "high_lr": 0.0009121052631578947, + "low_lr": 1.8242105263157897e-05, + "step": 167 + }, + { + "epoch": 0.43918474687705455, + "high_lr": 0.0009121052631578947, + "low_lr": 1.8242105263157897e-05, + "step": 167 + }, + { + "epoch": 0.43918474687705455, + "high_lr": 0.0009121052631578947, + "low_lr": 1.8242105263157897e-05, + "step": 167 + }, + { + "epoch": 0.4418145956607495, + "grad_norm": 0.7530324459075928, + "learning_rate": 0.000911578947368421, + "loss": 1.8508, + "step": 168 + }, + { + "epoch": 0.4418145956607495, + "high_lr": 0.000911578947368421, + "low_lr": 1.823157894736842e-05, + "step": 168 + }, + { + "epoch": 0.4418145956607495, + "high_lr": 0.000911578947368421, + "low_lr": 1.823157894736842e-05, + "step": 168 + }, + { + "epoch": 0.4418145956607495, + "high_lr": 0.000911578947368421, + "low_lr": 1.823157894736842e-05, + "step": 168 + }, + { + "epoch": 0.4418145956607495, + "high_lr": 0.000911578947368421, + "low_lr": 1.823157894736842e-05, + "step": 168 + }, + { + "epoch": 0.4418145956607495, + "high_lr": 0.000911578947368421, + "low_lr": 1.823157894736842e-05, + "step": 168 + }, + { + "epoch": 0.4418145956607495, + "high_lr": 0.000911578947368421, + "low_lr": 1.823157894736842e-05, + "step": 168 + }, + { + "epoch": 0.4418145956607495, + "high_lr": 0.000911578947368421, + "low_lr": 1.823157894736842e-05, + "step": 168 + }, + { + "epoch": 0.4418145956607495, + "high_lr": 0.000911578947368421, + "low_lr": 1.823157894736842e-05, + "step": 168 + }, + { + "epoch": 0.4444444444444444, + "grad_norm": 0.7631351351737976, + "learning_rate": 0.0009110526315789473, + "loss": 1.8022, + "step": 169 + }, + { + "epoch": 0.4444444444444444, + "high_lr": 0.0009110526315789473, + "low_lr": 1.822105263157895e-05, + "step": 169 + }, + { + "epoch": 0.4444444444444444, + "high_lr": 0.0009110526315789473, + "low_lr": 1.822105263157895e-05, + "step": 169 + }, + { + "epoch": 0.4444444444444444, + "high_lr": 0.0009110526315789473, + "low_lr": 1.822105263157895e-05, + "step": 169 + }, + { + "epoch": 0.4444444444444444, + "high_lr": 0.0009110526315789473, + "low_lr": 1.822105263157895e-05, + "step": 169 + }, + { + "epoch": 0.4444444444444444, + "high_lr": 0.0009110526315789473, + "low_lr": 1.822105263157895e-05, + "step": 169 + }, + { + "epoch": 0.4444444444444444, + "high_lr": 0.0009110526315789473, + "low_lr": 1.822105263157895e-05, + "step": 169 + }, + { + "epoch": 0.4444444444444444, + "high_lr": 0.0009110526315789473, + "low_lr": 1.822105263157895e-05, + "step": 169 + }, + { + "epoch": 0.4444444444444444, + "high_lr": 0.0009110526315789473, + "low_lr": 1.822105263157895e-05, + "step": 169 + }, + { + "epoch": 0.4470742932281394, + "grad_norm": 0.9896913766860962, + "learning_rate": 0.0009105263157894737, + "loss": 1.7911, + "step": 170 + }, + { + "epoch": 0.4470742932281394, + "high_lr": 0.0009105263157894737, + "low_lr": 1.8210526315789477e-05, + "step": 170 + }, + { + "epoch": 0.4470742932281394, + "high_lr": 0.0009105263157894737, + "low_lr": 1.8210526315789477e-05, + "step": 170 + }, + { + "epoch": 0.4470742932281394, + "high_lr": 0.0009105263157894737, + "low_lr": 1.8210526315789477e-05, + "step": 170 + }, + { + "epoch": 0.4470742932281394, + "high_lr": 0.0009105263157894737, + "low_lr": 1.8210526315789477e-05, + "step": 170 + }, + { + "epoch": 0.4470742932281394, + "high_lr": 0.0009105263157894737, + "low_lr": 1.8210526315789477e-05, + "step": 170 + }, + { + "epoch": 0.4470742932281394, + "high_lr": 0.0009105263157894737, + "low_lr": 1.8210526315789477e-05, + "step": 170 + }, + { + "epoch": 0.4470742932281394, + "high_lr": 0.0009105263157894737, + "low_lr": 1.8210526315789477e-05, + "step": 170 + }, + { + "epoch": 0.4470742932281394, + "high_lr": 0.0009105263157894737, + "low_lr": 1.8210526315789477e-05, + "step": 170 + }, + { + "epoch": 0.44970414201183434, + "grad_norm": 0.7498970627784729, + "learning_rate": 0.00091, + "loss": 1.92, + "step": 171 + }, + { + "epoch": 0.44970414201183434, + "high_lr": 0.00091, + "low_lr": 1.8200000000000002e-05, + "step": 171 + }, + { + "epoch": 0.44970414201183434, + "high_lr": 0.00091, + "low_lr": 1.8200000000000002e-05, + "step": 171 + }, + { + "epoch": 0.44970414201183434, + "high_lr": 0.00091, + "low_lr": 1.8200000000000002e-05, + "step": 171 + }, + { + "epoch": 0.44970414201183434, + "high_lr": 0.00091, + "low_lr": 1.8200000000000002e-05, + "step": 171 + }, + { + "epoch": 0.44970414201183434, + "high_lr": 0.00091, + "low_lr": 1.8200000000000002e-05, + "step": 171 + }, + { + "epoch": 0.44970414201183434, + "high_lr": 0.00091, + "low_lr": 1.8200000000000002e-05, + "step": 171 + }, + { + "epoch": 0.44970414201183434, + "high_lr": 0.00091, + "low_lr": 1.8200000000000002e-05, + "step": 171 + }, + { + "epoch": 0.44970414201183434, + "high_lr": 0.00091, + "low_lr": 1.8200000000000002e-05, + "step": 171 + }, + { + "epoch": 0.4523339907955293, + "grad_norm": 0.77386075258255, + "learning_rate": 0.0009094736842105264, + "loss": 1.7877, + "step": 172 + }, + { + "epoch": 0.4523339907955293, + "high_lr": 0.0009094736842105264, + "low_lr": 1.8189473684210527e-05, + "step": 172 + }, + { + "epoch": 0.4523339907955293, + "high_lr": 0.0009094736842105264, + "low_lr": 1.8189473684210527e-05, + "step": 172 + }, + { + "epoch": 0.4523339907955293, + "high_lr": 0.0009094736842105264, + "low_lr": 1.8189473684210527e-05, + "step": 172 + }, + { + "epoch": 0.4523339907955293, + "high_lr": 0.0009094736842105264, + "low_lr": 1.8189473684210527e-05, + "step": 172 + }, + { + "epoch": 0.4523339907955293, + "high_lr": 0.0009094736842105264, + "low_lr": 1.8189473684210527e-05, + "step": 172 + }, + { + "epoch": 0.4523339907955293, + "high_lr": 0.0009094736842105264, + "low_lr": 1.8189473684210527e-05, + "step": 172 + }, + { + "epoch": 0.4523339907955293, + "high_lr": 0.0009094736842105264, + "low_lr": 1.8189473684210527e-05, + "step": 172 + }, + { + "epoch": 0.4523339907955293, + "high_lr": 0.0009094736842105264, + "low_lr": 1.8189473684210527e-05, + "step": 172 + }, + { + "epoch": 0.4549638395792242, + "grad_norm": 0.8331085443496704, + "learning_rate": 0.0009089473684210527, + "loss": 1.8145, + "step": 173 + }, + { + "epoch": 0.4549638395792242, + "high_lr": 0.0009089473684210527, + "low_lr": 1.8178947368421055e-05, + "step": 173 + }, + { + "epoch": 0.4549638395792242, + "high_lr": 0.0009089473684210527, + "low_lr": 1.8178947368421055e-05, + "step": 173 + }, + { + "epoch": 0.4549638395792242, + "high_lr": 0.0009089473684210527, + "low_lr": 1.8178947368421055e-05, + "step": 173 + }, + { + "epoch": 0.4549638395792242, + "high_lr": 0.0009089473684210527, + "low_lr": 1.8178947368421055e-05, + "step": 173 + }, + { + "epoch": 0.4549638395792242, + "high_lr": 0.0009089473684210527, + "low_lr": 1.8178947368421055e-05, + "step": 173 + }, + { + "epoch": 0.4549638395792242, + "high_lr": 0.0009089473684210527, + "low_lr": 1.8178947368421055e-05, + "step": 173 + }, + { + "epoch": 0.4549638395792242, + "high_lr": 0.0009089473684210527, + "low_lr": 1.8178947368421055e-05, + "step": 173 + }, + { + "epoch": 0.4549638395792242, + "high_lr": 0.0009089473684210527, + "low_lr": 1.8178947368421055e-05, + "step": 173 + }, + { + "epoch": 0.45759368836291914, + "grad_norm": 0.8667138814926147, + "learning_rate": 0.000908421052631579, + "loss": 1.8783, + "step": 174 + }, + { + "epoch": 0.45759368836291914, + "high_lr": 0.000908421052631579, + "low_lr": 1.816842105263158e-05, + "step": 174 + }, + { + "epoch": 0.45759368836291914, + "high_lr": 0.000908421052631579, + "low_lr": 1.816842105263158e-05, + "step": 174 + }, + { + "epoch": 0.45759368836291914, + "high_lr": 0.000908421052631579, + "low_lr": 1.816842105263158e-05, + "step": 174 + }, + { + "epoch": 0.45759368836291914, + "high_lr": 0.000908421052631579, + "low_lr": 1.816842105263158e-05, + "step": 174 + }, + { + "epoch": 0.45759368836291914, + "high_lr": 0.000908421052631579, + "low_lr": 1.816842105263158e-05, + "step": 174 + }, + { + "epoch": 0.45759368836291914, + "high_lr": 0.000908421052631579, + "low_lr": 1.816842105263158e-05, + "step": 174 + }, + { + "epoch": 0.45759368836291914, + "high_lr": 0.000908421052631579, + "low_lr": 1.816842105263158e-05, + "step": 174 + }, + { + "epoch": 0.45759368836291914, + "high_lr": 0.000908421052631579, + "low_lr": 1.816842105263158e-05, + "step": 174 + }, + { + "epoch": 0.46022353714661407, + "grad_norm": 0.7862030863761902, + "learning_rate": 0.0009078947368421054, + "loss": 1.9253, + "step": 175 + }, + { + "epoch": 0.46022353714661407, + "high_lr": 0.0009078947368421054, + "low_lr": 1.8157894736842107e-05, + "step": 175 + }, + { + "epoch": 0.46022353714661407, + "high_lr": 0.0009078947368421054, + "low_lr": 1.8157894736842107e-05, + "step": 175 + }, + { + "epoch": 0.46022353714661407, + "high_lr": 0.0009078947368421054, + "low_lr": 1.8157894736842107e-05, + "step": 175 + }, + { + "epoch": 0.46022353714661407, + "high_lr": 0.0009078947368421054, + "low_lr": 1.8157894736842107e-05, + "step": 175 + }, + { + "epoch": 0.46022353714661407, + "high_lr": 0.0009078947368421054, + "low_lr": 1.8157894736842107e-05, + "step": 175 + }, + { + "epoch": 0.46022353714661407, + "high_lr": 0.0009078947368421054, + "low_lr": 1.8157894736842107e-05, + "step": 175 + }, + { + "epoch": 0.46022353714661407, + "high_lr": 0.0009078947368421054, + "low_lr": 1.8157894736842107e-05, + "step": 175 + }, + { + "epoch": 0.46022353714661407, + "high_lr": 0.0009078947368421054, + "low_lr": 1.8157894736842107e-05, + "step": 175 + }, + { + "epoch": 0.462853385930309, + "grad_norm": 0.7251467108726501, + "learning_rate": 0.0009073684210526316, + "loss": 1.7903, + "step": 176 + }, + { + "epoch": 0.462853385930309, + "high_lr": 0.0009073684210526316, + "low_lr": 1.8147368421052632e-05, + "step": 176 + }, + { + "epoch": 0.462853385930309, + "high_lr": 0.0009073684210526316, + "low_lr": 1.8147368421052632e-05, + "step": 176 + }, + { + "epoch": 0.462853385930309, + "high_lr": 0.0009073684210526316, + "low_lr": 1.8147368421052632e-05, + "step": 176 + }, + { + "epoch": 0.462853385930309, + "high_lr": 0.0009073684210526316, + "low_lr": 1.8147368421052632e-05, + "step": 176 + }, + { + "epoch": 0.462853385930309, + "high_lr": 0.0009073684210526316, + "low_lr": 1.8147368421052632e-05, + "step": 176 + }, + { + "epoch": 0.462853385930309, + "high_lr": 0.0009073684210526316, + "low_lr": 1.8147368421052632e-05, + "step": 176 + }, + { + "epoch": 0.462853385930309, + "high_lr": 0.0009073684210526316, + "low_lr": 1.8147368421052632e-05, + "step": 176 + }, + { + "epoch": 0.462853385930309, + "high_lr": 0.0009073684210526316, + "low_lr": 1.8147368421052632e-05, + "step": 176 + }, + { + "epoch": 0.46548323471400394, + "grad_norm": 0.7861801385879517, + "learning_rate": 0.0009068421052631579, + "loss": 1.8458, + "step": 177 + }, + { + "epoch": 0.46548323471400394, + "high_lr": 0.0009068421052631579, + "low_lr": 1.813684210526316e-05, + "step": 177 + }, + { + "epoch": 0.46548323471400394, + "high_lr": 0.0009068421052631579, + "low_lr": 1.813684210526316e-05, + "step": 177 + }, + { + "epoch": 0.46548323471400394, + "high_lr": 0.0009068421052631579, + "low_lr": 1.813684210526316e-05, + "step": 177 + }, + { + "epoch": 0.46548323471400394, + "high_lr": 0.0009068421052631579, + "low_lr": 1.813684210526316e-05, + "step": 177 + }, + { + "epoch": 0.46548323471400394, + "high_lr": 0.0009068421052631579, + "low_lr": 1.813684210526316e-05, + "step": 177 + }, + { + "epoch": 0.46548323471400394, + "high_lr": 0.0009068421052631579, + "low_lr": 1.813684210526316e-05, + "step": 177 + }, + { + "epoch": 0.46548323471400394, + "high_lr": 0.0009068421052631579, + "low_lr": 1.813684210526316e-05, + "step": 177 + }, + { + "epoch": 0.46548323471400394, + "high_lr": 0.0009068421052631579, + "low_lr": 1.813684210526316e-05, + "step": 177 + }, + { + "epoch": 0.46811308349769887, + "grad_norm": 0.8178443312644958, + "learning_rate": 0.0009063157894736842, + "loss": 1.7825, + "step": 178 + }, + { + "epoch": 0.46811308349769887, + "high_lr": 0.0009063157894736842, + "low_lr": 1.8126315789473685e-05, + "step": 178 + }, + { + "epoch": 0.46811308349769887, + "high_lr": 0.0009063157894736842, + "low_lr": 1.8126315789473685e-05, + "step": 178 + }, + { + "epoch": 0.46811308349769887, + "high_lr": 0.0009063157894736842, + "low_lr": 1.8126315789473685e-05, + "step": 178 + }, + { + "epoch": 0.46811308349769887, + "high_lr": 0.0009063157894736842, + "low_lr": 1.8126315789473685e-05, + "step": 178 + }, + { + "epoch": 0.46811308349769887, + "high_lr": 0.0009063157894736842, + "low_lr": 1.8126315789473685e-05, + "step": 178 + }, + { + "epoch": 0.46811308349769887, + "high_lr": 0.0009063157894736842, + "low_lr": 1.8126315789473685e-05, + "step": 178 + }, + { + "epoch": 0.46811308349769887, + "high_lr": 0.0009063157894736842, + "low_lr": 1.8126315789473685e-05, + "step": 178 + }, + { + "epoch": 0.46811308349769887, + "high_lr": 0.0009063157894736842, + "low_lr": 1.8126315789473685e-05, + "step": 178 + }, + { + "epoch": 0.4707429322813938, + "grad_norm": 0.8175384402275085, + "learning_rate": 0.0009057894736842105, + "loss": 1.9254, + "step": 179 + }, + { + "epoch": 0.4707429322813938, + "high_lr": 0.0009057894736842105, + "low_lr": 1.8115789473684213e-05, + "step": 179 + }, + { + "epoch": 0.4707429322813938, + "high_lr": 0.0009057894736842105, + "low_lr": 1.8115789473684213e-05, + "step": 179 + }, + { + "epoch": 0.4707429322813938, + "high_lr": 0.0009057894736842105, + "low_lr": 1.8115789473684213e-05, + "step": 179 + }, + { + "epoch": 0.4707429322813938, + "high_lr": 0.0009057894736842105, + "low_lr": 1.8115789473684213e-05, + "step": 179 + }, + { + "epoch": 0.4707429322813938, + "high_lr": 0.0009057894736842105, + "low_lr": 1.8115789473684213e-05, + "step": 179 + }, + { + "epoch": 0.4707429322813938, + "high_lr": 0.0009057894736842105, + "low_lr": 1.8115789473684213e-05, + "step": 179 + }, + { + "epoch": 0.4707429322813938, + "high_lr": 0.0009057894736842105, + "low_lr": 1.8115789473684213e-05, + "step": 179 + }, + { + "epoch": 0.4707429322813938, + "high_lr": 0.0009057894736842105, + "low_lr": 1.8115789473684213e-05, + "step": 179 + }, + { + "epoch": 0.47337278106508873, + "grad_norm": 0.8413812518119812, + "learning_rate": 0.0009052631578947369, + "loss": 1.8219, + "step": 180 + }, + { + "epoch": 0.47337278106508873, + "high_lr": 0.0009052631578947369, + "low_lr": 1.810526315789474e-05, + "step": 180 + }, + { + "epoch": 0.47337278106508873, + "high_lr": 0.0009052631578947369, + "low_lr": 1.810526315789474e-05, + "step": 180 + }, + { + "epoch": 0.47337278106508873, + "high_lr": 0.0009052631578947369, + "low_lr": 1.810526315789474e-05, + "step": 180 + }, + { + "epoch": 0.47337278106508873, + "high_lr": 0.0009052631578947369, + "low_lr": 1.810526315789474e-05, + "step": 180 + }, + { + "epoch": 0.47337278106508873, + "high_lr": 0.0009052631578947369, + "low_lr": 1.810526315789474e-05, + "step": 180 + }, + { + "epoch": 0.47337278106508873, + "high_lr": 0.0009052631578947369, + "low_lr": 1.810526315789474e-05, + "step": 180 + }, + { + "epoch": 0.47337278106508873, + "high_lr": 0.0009052631578947369, + "low_lr": 1.810526315789474e-05, + "step": 180 + }, + { + "epoch": 0.47337278106508873, + "high_lr": 0.0009052631578947369, + "low_lr": 1.810526315789474e-05, + "step": 180 + }, + { + "epoch": 0.4760026298487837, + "grad_norm": 0.8568588495254517, + "learning_rate": 0.0009047368421052632, + "loss": 1.8912, + "step": 181 + }, + { + "epoch": 0.4760026298487837, + "high_lr": 0.0009047368421052632, + "low_lr": 1.8094736842105265e-05, + "step": 181 + }, + { + "epoch": 0.4760026298487837, + "high_lr": 0.0009047368421052632, + "low_lr": 1.8094736842105265e-05, + "step": 181 + }, + { + "epoch": 0.4760026298487837, + "high_lr": 0.0009047368421052632, + "low_lr": 1.8094736842105265e-05, + "step": 181 + }, + { + "epoch": 0.4760026298487837, + "high_lr": 0.0009047368421052632, + "low_lr": 1.8094736842105265e-05, + "step": 181 + }, + { + "epoch": 0.4760026298487837, + "high_lr": 0.0009047368421052632, + "low_lr": 1.8094736842105265e-05, + "step": 181 + }, + { + "epoch": 0.4760026298487837, + "high_lr": 0.0009047368421052632, + "low_lr": 1.8094736842105265e-05, + "step": 181 + }, + { + "epoch": 0.4760026298487837, + "high_lr": 0.0009047368421052632, + "low_lr": 1.8094736842105265e-05, + "step": 181 + }, + { + "epoch": 0.4760026298487837, + "high_lr": 0.0009047368421052632, + "low_lr": 1.8094736842105265e-05, + "step": 181 + }, + { + "epoch": 0.47863247863247865, + "grad_norm": 0.7578849792480469, + "learning_rate": 0.0009042105263157895, + "loss": 1.845, + "step": 182 + }, + { + "epoch": 0.47863247863247865, + "high_lr": 0.0009042105263157895, + "low_lr": 1.808421052631579e-05, + "step": 182 + }, + { + "epoch": 0.47863247863247865, + "high_lr": 0.0009042105263157895, + "low_lr": 1.808421052631579e-05, + "step": 182 + }, + { + "epoch": 0.47863247863247865, + "high_lr": 0.0009042105263157895, + "low_lr": 1.808421052631579e-05, + "step": 182 + }, + { + "epoch": 0.47863247863247865, + "high_lr": 0.0009042105263157895, + "low_lr": 1.808421052631579e-05, + "step": 182 + }, + { + "epoch": 0.47863247863247865, + "high_lr": 0.0009042105263157895, + "low_lr": 1.808421052631579e-05, + "step": 182 + }, + { + "epoch": 0.47863247863247865, + "high_lr": 0.0009042105263157895, + "low_lr": 1.808421052631579e-05, + "step": 182 + }, + { + "epoch": 0.47863247863247865, + "high_lr": 0.0009042105263157895, + "low_lr": 1.808421052631579e-05, + "step": 182 + }, + { + "epoch": 0.47863247863247865, + "high_lr": 0.0009042105263157895, + "low_lr": 1.808421052631579e-05, + "step": 182 + }, + { + "epoch": 0.4812623274161736, + "grad_norm": 0.9012306332588196, + "learning_rate": 0.0009036842105263158, + "loss": 1.847, + "step": 183 + }, + { + "epoch": 0.4812623274161736, + "high_lr": 0.0009036842105263158, + "low_lr": 1.8073684210526318e-05, + "step": 183 + }, + { + "epoch": 0.4812623274161736, + "high_lr": 0.0009036842105263158, + "low_lr": 1.8073684210526318e-05, + "step": 183 + }, + { + "epoch": 0.4812623274161736, + "high_lr": 0.0009036842105263158, + "low_lr": 1.8073684210526318e-05, + "step": 183 + }, + { + "epoch": 0.4812623274161736, + "high_lr": 0.0009036842105263158, + "low_lr": 1.8073684210526318e-05, + "step": 183 + }, + { + "epoch": 0.4812623274161736, + "high_lr": 0.0009036842105263158, + "low_lr": 1.8073684210526318e-05, + "step": 183 + }, + { + "epoch": 0.4812623274161736, + "high_lr": 0.0009036842105263158, + "low_lr": 1.8073684210526318e-05, + "step": 183 + }, + { + "epoch": 0.4812623274161736, + "high_lr": 0.0009036842105263158, + "low_lr": 1.8073684210526318e-05, + "step": 183 + }, + { + "epoch": 0.4812623274161736, + "high_lr": 0.0009036842105263158, + "low_lr": 1.8073684210526318e-05, + "step": 183 + }, + { + "epoch": 0.4838921761998685, + "grad_norm": 0.794228732585907, + "learning_rate": 0.0009031578947368422, + "loss": 1.8191, + "step": 184 + }, + { + "epoch": 0.4838921761998685, + "high_lr": 0.0009031578947368422, + "low_lr": 1.8063157894736846e-05, + "step": 184 + }, + { + "epoch": 0.4838921761998685, + "high_lr": 0.0009031578947368422, + "low_lr": 1.8063157894736846e-05, + "step": 184 + }, + { + "epoch": 0.4838921761998685, + "high_lr": 0.0009031578947368422, + "low_lr": 1.8063157894736846e-05, + "step": 184 + }, + { + "epoch": 0.4838921761998685, + "high_lr": 0.0009031578947368422, + "low_lr": 1.8063157894736846e-05, + "step": 184 + }, + { + "epoch": 0.4838921761998685, + "high_lr": 0.0009031578947368422, + "low_lr": 1.8063157894736846e-05, + "step": 184 + }, + { + "epoch": 0.4838921761998685, + "high_lr": 0.0009031578947368422, + "low_lr": 1.8063157894736846e-05, + "step": 184 + }, + { + "epoch": 0.4838921761998685, + "high_lr": 0.0009031578947368422, + "low_lr": 1.8063157894736846e-05, + "step": 184 + }, + { + "epoch": 0.4838921761998685, + "high_lr": 0.0009031578947368422, + "low_lr": 1.8063157894736846e-05, + "step": 184 + }, + { + "epoch": 0.48652202498356345, + "grad_norm": 0.7716225981712341, + "learning_rate": 0.0009026315789473684, + "loss": 1.8249, + "step": 185 + }, + { + "epoch": 0.48652202498356345, + "high_lr": 0.0009026315789473684, + "low_lr": 1.805263157894737e-05, + "step": 185 + }, + { + "epoch": 0.48652202498356345, + "high_lr": 0.0009026315789473684, + "low_lr": 1.805263157894737e-05, + "step": 185 + }, + { + "epoch": 0.48652202498356345, + "high_lr": 0.0009026315789473684, + "low_lr": 1.805263157894737e-05, + "step": 185 + }, + { + "epoch": 0.48652202498356345, + "high_lr": 0.0009026315789473684, + "low_lr": 1.805263157894737e-05, + "step": 185 + }, + { + "epoch": 0.48652202498356345, + "high_lr": 0.0009026315789473684, + "low_lr": 1.805263157894737e-05, + "step": 185 + }, + { + "epoch": 0.48652202498356345, + "high_lr": 0.0009026315789473684, + "low_lr": 1.805263157894737e-05, + "step": 185 + }, + { + "epoch": 0.48652202498356345, + "high_lr": 0.0009026315789473684, + "low_lr": 1.805263157894737e-05, + "step": 185 + }, + { + "epoch": 0.48652202498356345, + "high_lr": 0.0009026315789473684, + "low_lr": 1.805263157894737e-05, + "step": 185 + }, + { + "epoch": 0.4891518737672584, + "grad_norm": 0.8229754567146301, + "learning_rate": 0.0009021052631578947, + "loss": 1.8453, + "step": 186 + }, + { + "epoch": 0.4891518737672584, + "high_lr": 0.0009021052631578947, + "low_lr": 1.8042105263157895e-05, + "step": 186 + }, + { + "epoch": 0.4891518737672584, + "high_lr": 0.0009021052631578947, + "low_lr": 1.8042105263157895e-05, + "step": 186 + }, + { + "epoch": 0.4891518737672584, + "high_lr": 0.0009021052631578947, + "low_lr": 1.8042105263157895e-05, + "step": 186 + }, + { + "epoch": 0.4891518737672584, + "high_lr": 0.0009021052631578947, + "low_lr": 1.8042105263157895e-05, + "step": 186 + }, + { + "epoch": 0.4891518737672584, + "high_lr": 0.0009021052631578947, + "low_lr": 1.8042105263157895e-05, + "step": 186 + }, + { + "epoch": 0.4891518737672584, + "high_lr": 0.0009021052631578947, + "low_lr": 1.8042105263157895e-05, + "step": 186 + }, + { + "epoch": 0.4891518737672584, + "high_lr": 0.0009021052631578947, + "low_lr": 1.8042105263157895e-05, + "step": 186 + }, + { + "epoch": 0.4891518737672584, + "high_lr": 0.0009021052631578947, + "low_lr": 1.8042105263157895e-05, + "step": 186 + }, + { + "epoch": 0.4917817225509533, + "grad_norm": 0.8289306163787842, + "learning_rate": 0.000901578947368421, + "loss": 1.86, + "step": 187 + }, + { + "epoch": 0.4917817225509533, + "high_lr": 0.000901578947368421, + "low_lr": 1.8031578947368423e-05, + "step": 187 + }, + { + "epoch": 0.4917817225509533, + "high_lr": 0.000901578947368421, + "low_lr": 1.8031578947368423e-05, + "step": 187 + }, + { + "epoch": 0.4917817225509533, + "high_lr": 0.000901578947368421, + "low_lr": 1.8031578947368423e-05, + "step": 187 + }, + { + "epoch": 0.4917817225509533, + "high_lr": 0.000901578947368421, + "low_lr": 1.8031578947368423e-05, + "step": 187 + }, + { + "epoch": 0.4917817225509533, + "high_lr": 0.000901578947368421, + "low_lr": 1.8031578947368423e-05, + "step": 187 + }, + { + "epoch": 0.4917817225509533, + "high_lr": 0.000901578947368421, + "low_lr": 1.8031578947368423e-05, + "step": 187 + }, + { + "epoch": 0.4917817225509533, + "high_lr": 0.000901578947368421, + "low_lr": 1.8031578947368423e-05, + "step": 187 + }, + { + "epoch": 0.4917817225509533, + "high_lr": 0.000901578947368421, + "low_lr": 1.8031578947368423e-05, + "step": 187 + }, + { + "epoch": 0.49441157133464825, + "grad_norm": 0.7908790707588196, + "learning_rate": 0.0009010526315789473, + "loss": 1.8061, + "step": 188 + }, + { + "epoch": 0.49441157133464825, + "high_lr": 0.0009010526315789473, + "low_lr": 1.8021052631578948e-05, + "step": 188 + }, + { + "epoch": 0.49441157133464825, + "high_lr": 0.0009010526315789473, + "low_lr": 1.8021052631578948e-05, + "step": 188 + }, + { + "epoch": 0.49441157133464825, + "high_lr": 0.0009010526315789473, + "low_lr": 1.8021052631578948e-05, + "step": 188 + }, + { + "epoch": 0.49441157133464825, + "high_lr": 0.0009010526315789473, + "low_lr": 1.8021052631578948e-05, + "step": 188 + }, + { + "epoch": 0.49441157133464825, + "high_lr": 0.0009010526315789473, + "low_lr": 1.8021052631578948e-05, + "step": 188 + }, + { + "epoch": 0.49441157133464825, + "high_lr": 0.0009010526315789473, + "low_lr": 1.8021052631578948e-05, + "step": 188 + }, + { + "epoch": 0.49441157133464825, + "high_lr": 0.0009010526315789473, + "low_lr": 1.8021052631578948e-05, + "step": 188 + }, + { + "epoch": 0.49441157133464825, + "high_lr": 0.0009010526315789473, + "low_lr": 1.8021052631578948e-05, + "step": 188 + }, + { + "epoch": 0.4970414201183432, + "grad_norm": 0.9673170447349548, + "learning_rate": 0.0009005263157894738, + "loss": 1.8517, + "step": 189 + }, + { + "epoch": 0.4970414201183432, + "high_lr": 0.0009005263157894738, + "low_lr": 1.8010526315789476e-05, + "step": 189 + }, + { + "epoch": 0.4970414201183432, + "high_lr": 0.0009005263157894738, + "low_lr": 1.8010526315789476e-05, + "step": 189 + }, + { + "epoch": 0.4970414201183432, + "high_lr": 0.0009005263157894738, + "low_lr": 1.8010526315789476e-05, + "step": 189 + }, + { + "epoch": 0.4970414201183432, + "high_lr": 0.0009005263157894738, + "low_lr": 1.8010526315789476e-05, + "step": 189 + }, + { + "epoch": 0.4970414201183432, + "high_lr": 0.0009005263157894738, + "low_lr": 1.8010526315789476e-05, + "step": 189 + }, + { + "epoch": 0.4970414201183432, + "high_lr": 0.0009005263157894738, + "low_lr": 1.8010526315789476e-05, + "step": 189 + }, + { + "epoch": 0.4970414201183432, + "high_lr": 0.0009005263157894738, + "low_lr": 1.8010526315789476e-05, + "step": 189 + }, + { + "epoch": 0.4970414201183432, + "high_lr": 0.0009005263157894738, + "low_lr": 1.8010526315789476e-05, + "step": 189 + }, + { + "epoch": 0.4996712689020381, + "grad_norm": 0.862890899181366, + "learning_rate": 0.0009000000000000001, + "loss": 1.8615, + "step": 190 + }, + { + "epoch": 0.4996712689020381, + "high_lr": 0.0009000000000000001, + "low_lr": 1.8e-05, + "step": 190 + }, + { + "epoch": 0.4996712689020381, + "high_lr": 0.0009000000000000001, + "low_lr": 1.8e-05, + "step": 190 + }, + { + "epoch": 0.4996712689020381, + "high_lr": 0.0009000000000000001, + "low_lr": 1.8e-05, + "step": 190 + }, + { + "epoch": 0.4996712689020381, + "high_lr": 0.0009000000000000001, + "low_lr": 1.8e-05, + "step": 190 + }, + { + "epoch": 0.4996712689020381, + "high_lr": 0.0009000000000000001, + "low_lr": 1.8e-05, + "step": 190 + }, + { + "epoch": 0.4996712689020381, + "high_lr": 0.0009000000000000001, + "low_lr": 1.8e-05, + "step": 190 + }, + { + "epoch": 0.4996712689020381, + "high_lr": 0.0009000000000000001, + "low_lr": 1.8e-05, + "step": 190 + }, + { + "epoch": 0.4996712689020381, + "high_lr": 0.0009000000000000001, + "low_lr": 1.8e-05, + "step": 190 + }, + { + "epoch": 0.502301117685733, + "grad_norm": 0.8507909178733826, + "learning_rate": 0.0008994736842105264, + "loss": 1.8215, + "step": 191 + }, + { + "epoch": 0.502301117685733, + "high_lr": 0.0008994736842105264, + "low_lr": 1.798947368421053e-05, + "step": 191 + }, + { + "epoch": 0.502301117685733, + "high_lr": 0.0008994736842105264, + "low_lr": 1.798947368421053e-05, + "step": 191 + }, + { + "epoch": 0.502301117685733, + "high_lr": 0.0008994736842105264, + "low_lr": 1.798947368421053e-05, + "step": 191 + }, + { + "epoch": 0.502301117685733, + "high_lr": 0.0008994736842105264, + "low_lr": 1.798947368421053e-05, + "step": 191 + }, + { + "epoch": 0.502301117685733, + "high_lr": 0.0008994736842105264, + "low_lr": 1.798947368421053e-05, + "step": 191 + }, + { + "epoch": 0.502301117685733, + "high_lr": 0.0008994736842105264, + "low_lr": 1.798947368421053e-05, + "step": 191 + }, + { + "epoch": 0.502301117685733, + "high_lr": 0.0008994736842105264, + "low_lr": 1.798947368421053e-05, + "step": 191 + }, + { + "epoch": 0.502301117685733, + "high_lr": 0.0008994736842105264, + "low_lr": 1.798947368421053e-05, + "step": 191 + }, + { + "epoch": 0.504930966469428, + "grad_norm": 0.7558906674385071, + "learning_rate": 0.0008989473684210527, + "loss": 1.7142, + "step": 192 + }, + { + "epoch": 0.504930966469428, + "high_lr": 0.0008989473684210527, + "low_lr": 1.7978947368421053e-05, + "step": 192 + }, + { + "epoch": 0.504930966469428, + "high_lr": 0.0008989473684210527, + "low_lr": 1.7978947368421053e-05, + "step": 192 + }, + { + "epoch": 0.504930966469428, + "high_lr": 0.0008989473684210527, + "low_lr": 1.7978947368421053e-05, + "step": 192 + }, + { + "epoch": 0.504930966469428, + "high_lr": 0.0008989473684210527, + "low_lr": 1.7978947368421053e-05, + "step": 192 + }, + { + "epoch": 0.504930966469428, + "high_lr": 0.0008989473684210527, + "low_lr": 1.7978947368421053e-05, + "step": 192 + }, + { + "epoch": 0.504930966469428, + "high_lr": 0.0008989473684210527, + "low_lr": 1.7978947368421053e-05, + "step": 192 + }, + { + "epoch": 0.504930966469428, + "high_lr": 0.0008989473684210527, + "low_lr": 1.7978947368421053e-05, + "step": 192 + }, + { + "epoch": 0.504930966469428, + "high_lr": 0.0008989473684210527, + "low_lr": 1.7978947368421053e-05, + "step": 192 + }, + { + "epoch": 0.5075608152531229, + "grad_norm": 0.810209333896637, + "learning_rate": 0.0008984210526315789, + "loss": 1.8301, + "step": 193 + }, + { + "epoch": 0.5075608152531229, + "high_lr": 0.0008984210526315789, + "low_lr": 1.7968421052631578e-05, + "step": 193 + }, + { + "epoch": 0.5075608152531229, + "high_lr": 0.0008984210526315789, + "low_lr": 1.7968421052631578e-05, + "step": 193 + }, + { + "epoch": 0.5075608152531229, + "high_lr": 0.0008984210526315789, + "low_lr": 1.7968421052631578e-05, + "step": 193 + }, + { + "epoch": 0.5075608152531229, + "high_lr": 0.0008984210526315789, + "low_lr": 1.7968421052631578e-05, + "step": 193 + }, + { + "epoch": 0.5075608152531229, + "high_lr": 0.0008984210526315789, + "low_lr": 1.7968421052631578e-05, + "step": 193 + }, + { + "epoch": 0.5075608152531229, + "high_lr": 0.0008984210526315789, + "low_lr": 1.7968421052631578e-05, + "step": 193 + }, + { + "epoch": 0.5075608152531229, + "high_lr": 0.0008984210526315789, + "low_lr": 1.7968421052631578e-05, + "step": 193 + }, + { + "epoch": 0.5075608152531229, + "high_lr": 0.0008984210526315789, + "low_lr": 1.7968421052631578e-05, + "step": 193 + }, + { + "epoch": 0.5101906640368179, + "grad_norm": 0.7663288712501526, + "learning_rate": 0.0008978947368421053, + "loss": 1.8105, + "step": 194 + }, + { + "epoch": 0.5101906640368179, + "high_lr": 0.0008978947368421053, + "low_lr": 1.795789473684211e-05, + "step": 194 + }, + { + "epoch": 0.5101906640368179, + "high_lr": 0.0008978947368421053, + "low_lr": 1.795789473684211e-05, + "step": 194 + }, + { + "epoch": 0.5101906640368179, + "high_lr": 0.0008978947368421053, + "low_lr": 1.795789473684211e-05, + "step": 194 + }, + { + "epoch": 0.5101906640368179, + "high_lr": 0.0008978947368421053, + "low_lr": 1.795789473684211e-05, + "step": 194 + }, + { + "epoch": 0.5101906640368179, + "high_lr": 0.0008978947368421053, + "low_lr": 1.795789473684211e-05, + "step": 194 + }, + { + "epoch": 0.5101906640368179, + "high_lr": 0.0008978947368421053, + "low_lr": 1.795789473684211e-05, + "step": 194 + }, + { + "epoch": 0.5101906640368179, + "high_lr": 0.0008978947368421053, + "low_lr": 1.795789473684211e-05, + "step": 194 + }, + { + "epoch": 0.5101906640368179, + "high_lr": 0.0008978947368421053, + "low_lr": 1.795789473684211e-05, + "step": 194 + }, + { + "epoch": 0.5128205128205128, + "grad_norm": 0.8246523141860962, + "learning_rate": 0.0008973684210526316, + "loss": 1.8246, + "step": 195 + }, + { + "epoch": 0.5128205128205128, + "high_lr": 0.0008973684210526316, + "low_lr": 1.7947368421052634e-05, + "step": 195 + }, + { + "epoch": 0.5128205128205128, + "high_lr": 0.0008973684210526316, + "low_lr": 1.7947368421052634e-05, + "step": 195 + }, + { + "epoch": 0.5128205128205128, + "high_lr": 0.0008973684210526316, + "low_lr": 1.7947368421052634e-05, + "step": 195 + }, + { + "epoch": 0.5128205128205128, + "high_lr": 0.0008973684210526316, + "low_lr": 1.7947368421052634e-05, + "step": 195 + }, + { + "epoch": 0.5128205128205128, + "high_lr": 0.0008973684210526316, + "low_lr": 1.7947368421052634e-05, + "step": 195 + }, + { + "epoch": 0.5128205128205128, + "high_lr": 0.0008973684210526316, + "low_lr": 1.7947368421052634e-05, + "step": 195 + }, + { + "epoch": 0.5128205128205128, + "high_lr": 0.0008973684210526316, + "low_lr": 1.7947368421052634e-05, + "step": 195 + }, + { + "epoch": 0.5128205128205128, + "high_lr": 0.0008973684210526316, + "low_lr": 1.7947368421052634e-05, + "step": 195 + }, + { + "epoch": 0.5154503616042078, + "grad_norm": 0.8492828607559204, + "learning_rate": 0.0008968421052631579, + "loss": 1.8492, + "step": 196 + }, + { + "epoch": 0.5154503616042078, + "high_lr": 0.0008968421052631579, + "low_lr": 1.793684210526316e-05, + "step": 196 + }, + { + "epoch": 0.5154503616042078, + "high_lr": 0.0008968421052631579, + "low_lr": 1.793684210526316e-05, + "step": 196 + }, + { + "epoch": 0.5154503616042078, + "high_lr": 0.0008968421052631579, + "low_lr": 1.793684210526316e-05, + "step": 196 + }, + { + "epoch": 0.5154503616042078, + "high_lr": 0.0008968421052631579, + "low_lr": 1.793684210526316e-05, + "step": 196 + }, + { + "epoch": 0.5154503616042078, + "high_lr": 0.0008968421052631579, + "low_lr": 1.793684210526316e-05, + "step": 196 + }, + { + "epoch": 0.5154503616042078, + "high_lr": 0.0008968421052631579, + "low_lr": 1.793684210526316e-05, + "step": 196 + }, + { + "epoch": 0.5154503616042078, + "high_lr": 0.0008968421052631579, + "low_lr": 1.793684210526316e-05, + "step": 196 + }, + { + "epoch": 0.5154503616042078, + "high_lr": 0.0008968421052631579, + "low_lr": 1.793684210526316e-05, + "step": 196 + }, + { + "epoch": 0.5180802103879028, + "grad_norm": 0.7884014248847961, + "learning_rate": 0.0008963157894736842, + "loss": 1.8289, + "step": 197 + }, + { + "epoch": 0.5180802103879028, + "high_lr": 0.0008963157894736842, + "low_lr": 1.7926315789473686e-05, + "step": 197 + }, + { + "epoch": 0.5180802103879028, + "high_lr": 0.0008963157894736842, + "low_lr": 1.7926315789473686e-05, + "step": 197 + }, + { + "epoch": 0.5180802103879028, + "high_lr": 0.0008963157894736842, + "low_lr": 1.7926315789473686e-05, + "step": 197 + }, + { + "epoch": 0.5180802103879028, + "high_lr": 0.0008963157894736842, + "low_lr": 1.7926315789473686e-05, + "step": 197 + }, + { + "epoch": 0.5180802103879028, + "high_lr": 0.0008963157894736842, + "low_lr": 1.7926315789473686e-05, + "step": 197 + }, + { + "epoch": 0.5180802103879028, + "high_lr": 0.0008963157894736842, + "low_lr": 1.7926315789473686e-05, + "step": 197 + }, + { + "epoch": 0.5180802103879028, + "high_lr": 0.0008963157894736842, + "low_lr": 1.7926315789473686e-05, + "step": 197 + }, + { + "epoch": 0.5180802103879028, + "high_lr": 0.0008963157894736842, + "low_lr": 1.7926315789473686e-05, + "step": 197 + }, + { + "epoch": 0.5207100591715976, + "grad_norm": 0.878328800201416, + "learning_rate": 0.0008957894736842106, + "loss": 1.7901, + "step": 198 + }, + { + "epoch": 0.5207100591715976, + "high_lr": 0.0008957894736842106, + "low_lr": 1.7915789473684214e-05, + "step": 198 + }, + { + "epoch": 0.5207100591715976, + "high_lr": 0.0008957894736842106, + "low_lr": 1.7915789473684214e-05, + "step": 198 + }, + { + "epoch": 0.5207100591715976, + "high_lr": 0.0008957894736842106, + "low_lr": 1.7915789473684214e-05, + "step": 198 + }, + { + "epoch": 0.5207100591715976, + "high_lr": 0.0008957894736842106, + "low_lr": 1.7915789473684214e-05, + "step": 198 + }, + { + "epoch": 0.5207100591715976, + "high_lr": 0.0008957894736842106, + "low_lr": 1.7915789473684214e-05, + "step": 198 + }, + { + "epoch": 0.5207100591715976, + "high_lr": 0.0008957894736842106, + "low_lr": 1.7915789473684214e-05, + "step": 198 + }, + { + "epoch": 0.5207100591715976, + "high_lr": 0.0008957894736842106, + "low_lr": 1.7915789473684214e-05, + "step": 198 + }, + { + "epoch": 0.5207100591715976, + "high_lr": 0.0008957894736842106, + "low_lr": 1.7915789473684214e-05, + "step": 198 + }, + { + "epoch": 0.5233399079552926, + "grad_norm": 0.9260053038597107, + "learning_rate": 0.0008952631578947369, + "loss": 1.9373, + "step": 199 + }, + { + "epoch": 0.5233399079552926, + "high_lr": 0.0008952631578947369, + "low_lr": 1.790526315789474e-05, + "step": 199 + }, + { + "epoch": 0.5233399079552926, + "high_lr": 0.0008952631578947369, + "low_lr": 1.790526315789474e-05, + "step": 199 + }, + { + "epoch": 0.5233399079552926, + "high_lr": 0.0008952631578947369, + "low_lr": 1.790526315789474e-05, + "step": 199 + }, + { + "epoch": 0.5233399079552926, + "high_lr": 0.0008952631578947369, + "low_lr": 1.790526315789474e-05, + "step": 199 + }, + { + "epoch": 0.5233399079552926, + "high_lr": 0.0008952631578947369, + "low_lr": 1.790526315789474e-05, + "step": 199 + }, + { + "epoch": 0.5233399079552926, + "high_lr": 0.0008952631578947369, + "low_lr": 1.790526315789474e-05, + "step": 199 + }, + { + "epoch": 0.5233399079552926, + "high_lr": 0.0008952631578947369, + "low_lr": 1.790526315789474e-05, + "step": 199 + }, + { + "epoch": 0.5233399079552926, + "high_lr": 0.0008952631578947369, + "low_lr": 1.790526315789474e-05, + "step": 199 + }, + { + "epoch": 0.5259697567389875, + "grad_norm": 0.9091192483901978, + "learning_rate": 0.0008947368421052632, + "loss": 1.8995, + "step": 200 + }, + { + "epoch": 0.5259697567389875, + "high_lr": 0.0008947368421052632, + "low_lr": 1.7894736842105264e-05, + "step": 200 + }, + { + "epoch": 0.5259697567389875, + "high_lr": 0.0008947368421052632, + "low_lr": 1.7894736842105264e-05, + "step": 200 + }, + { + "epoch": 0.5259697567389875, + "high_lr": 0.0008947368421052632, + "low_lr": 1.7894736842105264e-05, + "step": 200 + }, + { + "epoch": 0.5259697567389875, + "high_lr": 0.0008947368421052632, + "low_lr": 1.7894736842105264e-05, + "step": 200 + }, + { + "epoch": 0.5259697567389875, + "high_lr": 0.0008947368421052632, + "low_lr": 1.7894736842105264e-05, + "step": 200 + }, + { + "epoch": 0.5259697567389875, + "high_lr": 0.0008947368421052632, + "low_lr": 1.7894736842105264e-05, + "step": 200 + }, + { + "epoch": 0.5259697567389875, + "high_lr": 0.0008947368421052632, + "low_lr": 1.7894736842105264e-05, + "step": 200 + }, + { + "epoch": 0.5259697567389875, + "high_lr": 0.0008947368421052632, + "low_lr": 1.7894736842105264e-05, + "step": 200 + }, + { + "epoch": 0.5285996055226825, + "grad_norm": 0.8205613493919373, + "learning_rate": 0.0008942105263157894, + "loss": 1.8548, + "step": 201 + }, + { + "epoch": 0.5285996055226825, + "high_lr": 0.0008942105263157894, + "low_lr": 1.7884210526315792e-05, + "step": 201 + }, + { + "epoch": 0.5285996055226825, + "high_lr": 0.0008942105263157894, + "low_lr": 1.7884210526315792e-05, + "step": 201 + }, + { + "epoch": 0.5285996055226825, + "high_lr": 0.0008942105263157894, + "low_lr": 1.7884210526315792e-05, + "step": 201 + }, + { + "epoch": 0.5285996055226825, + "high_lr": 0.0008942105263157894, + "low_lr": 1.7884210526315792e-05, + "step": 201 + }, + { + "epoch": 0.5285996055226825, + "high_lr": 0.0008942105263157894, + "low_lr": 1.7884210526315792e-05, + "step": 201 + }, + { + "epoch": 0.5285996055226825, + "high_lr": 0.0008942105263157894, + "low_lr": 1.7884210526315792e-05, + "step": 201 + }, + { + "epoch": 0.5285996055226825, + "high_lr": 0.0008942105263157894, + "low_lr": 1.7884210526315792e-05, + "step": 201 + }, + { + "epoch": 0.5285996055226825, + "high_lr": 0.0008942105263157894, + "low_lr": 1.7884210526315792e-05, + "step": 201 + }, + { + "epoch": 0.5312294543063774, + "grad_norm": 0.8982312083244324, + "learning_rate": 0.0008936842105263157, + "loss": 1.8409, + "step": 202 + }, + { + "epoch": 0.5312294543063774, + "high_lr": 0.0008936842105263157, + "low_lr": 1.7873684210526316e-05, + "step": 202 + }, + { + "epoch": 0.5312294543063774, + "high_lr": 0.0008936842105263157, + "low_lr": 1.7873684210526316e-05, + "step": 202 + }, + { + "epoch": 0.5312294543063774, + "high_lr": 0.0008936842105263157, + "low_lr": 1.7873684210526316e-05, + "step": 202 + }, + { + "epoch": 0.5312294543063774, + "high_lr": 0.0008936842105263157, + "low_lr": 1.7873684210526316e-05, + "step": 202 + }, + { + "epoch": 0.5312294543063774, + "high_lr": 0.0008936842105263157, + "low_lr": 1.7873684210526316e-05, + "step": 202 + }, + { + "epoch": 0.5312294543063774, + "high_lr": 0.0008936842105263157, + "low_lr": 1.7873684210526316e-05, + "step": 202 + }, + { + "epoch": 0.5312294543063774, + "high_lr": 0.0008936842105263157, + "low_lr": 1.7873684210526316e-05, + "step": 202 + }, + { + "epoch": 0.5312294543063774, + "high_lr": 0.0008936842105263157, + "low_lr": 1.7873684210526316e-05, + "step": 202 + }, + { + "epoch": 0.5338593030900723, + "grad_norm": 0.8637439012527466, + "learning_rate": 0.0008931578947368421, + "loss": 1.8147, + "step": 203 + }, + { + "epoch": 0.5338593030900723, + "high_lr": 0.0008931578947368421, + "low_lr": 1.7863157894736844e-05, + "step": 203 + }, + { + "epoch": 0.5338593030900723, + "high_lr": 0.0008931578947368421, + "low_lr": 1.7863157894736844e-05, + "step": 203 + }, + { + "epoch": 0.5338593030900723, + "high_lr": 0.0008931578947368421, + "low_lr": 1.7863157894736844e-05, + "step": 203 + }, + { + "epoch": 0.5338593030900723, + "high_lr": 0.0008931578947368421, + "low_lr": 1.7863157894736844e-05, + "step": 203 + }, + { + "epoch": 0.5338593030900723, + "high_lr": 0.0008931578947368421, + "low_lr": 1.7863157894736844e-05, + "step": 203 + }, + { + "epoch": 0.5338593030900723, + "high_lr": 0.0008931578947368421, + "low_lr": 1.7863157894736844e-05, + "step": 203 + }, + { + "epoch": 0.5338593030900723, + "high_lr": 0.0008931578947368421, + "low_lr": 1.7863157894736844e-05, + "step": 203 + }, + { + "epoch": 0.5338593030900723, + "high_lr": 0.0008931578947368421, + "low_lr": 1.7863157894736844e-05, + "step": 203 + }, + { + "epoch": 0.5364891518737672, + "grad_norm": 0.8085411787033081, + "learning_rate": 0.0008926315789473684, + "loss": 1.7611, + "step": 204 + }, + { + "epoch": 0.5364891518737672, + "high_lr": 0.0008926315789473684, + "low_lr": 1.785263157894737e-05, + "step": 204 + }, + { + "epoch": 0.5364891518737672, + "high_lr": 0.0008926315789473684, + "low_lr": 1.785263157894737e-05, + "step": 204 + }, + { + "epoch": 0.5364891518737672, + "high_lr": 0.0008926315789473684, + "low_lr": 1.785263157894737e-05, + "step": 204 + }, + { + "epoch": 0.5364891518737672, + "high_lr": 0.0008926315789473684, + "low_lr": 1.785263157894737e-05, + "step": 204 + }, + { + "epoch": 0.5364891518737672, + "high_lr": 0.0008926315789473684, + "low_lr": 1.785263157894737e-05, + "step": 204 + }, + { + "epoch": 0.5364891518737672, + "high_lr": 0.0008926315789473684, + "low_lr": 1.785263157894737e-05, + "step": 204 + }, + { + "epoch": 0.5364891518737672, + "high_lr": 0.0008926315789473684, + "low_lr": 1.785263157894737e-05, + "step": 204 + }, + { + "epoch": 0.5364891518737672, + "high_lr": 0.0008926315789473684, + "low_lr": 1.785263157894737e-05, + "step": 204 + }, + { + "epoch": 0.5391190006574622, + "grad_norm": 0.9032369256019592, + "learning_rate": 0.0008921052631578948, + "loss": 1.7992, + "step": 205 + }, + { + "epoch": 0.5391190006574622, + "high_lr": 0.0008921052631578948, + "low_lr": 1.7842105263157897e-05, + "step": 205 + }, + { + "epoch": 0.5391190006574622, + "high_lr": 0.0008921052631578948, + "low_lr": 1.7842105263157897e-05, + "step": 205 + }, + { + "epoch": 0.5391190006574622, + "high_lr": 0.0008921052631578948, + "low_lr": 1.7842105263157897e-05, + "step": 205 + }, + { + "epoch": 0.5391190006574622, + "high_lr": 0.0008921052631578948, + "low_lr": 1.7842105263157897e-05, + "step": 205 + }, + { + "epoch": 0.5391190006574622, + "high_lr": 0.0008921052631578948, + "low_lr": 1.7842105263157897e-05, + "step": 205 + }, + { + "epoch": 0.5391190006574622, + "high_lr": 0.0008921052631578948, + "low_lr": 1.7842105263157897e-05, + "step": 205 + }, + { + "epoch": 0.5391190006574622, + "high_lr": 0.0008921052631578948, + "low_lr": 1.7842105263157897e-05, + "step": 205 + }, + { + "epoch": 0.5391190006574622, + "high_lr": 0.0008921052631578948, + "low_lr": 1.7842105263157897e-05, + "step": 205 + }, + { + "epoch": 0.5417488494411571, + "grad_norm": 0.9390583038330078, + "learning_rate": 0.0008915789473684211, + "loss": 1.8455, + "step": 206 + }, + { + "epoch": 0.5417488494411571, + "high_lr": 0.0008915789473684211, + "low_lr": 1.7831578947368422e-05, + "step": 206 + }, + { + "epoch": 0.5417488494411571, + "high_lr": 0.0008915789473684211, + "low_lr": 1.7831578947368422e-05, + "step": 206 + }, + { + "epoch": 0.5417488494411571, + "high_lr": 0.0008915789473684211, + "low_lr": 1.7831578947368422e-05, + "step": 206 + }, + { + "epoch": 0.5417488494411571, + "high_lr": 0.0008915789473684211, + "low_lr": 1.7831578947368422e-05, + "step": 206 + }, + { + "epoch": 0.5417488494411571, + "high_lr": 0.0008915789473684211, + "low_lr": 1.7831578947368422e-05, + "step": 206 + }, + { + "epoch": 0.5417488494411571, + "high_lr": 0.0008915789473684211, + "low_lr": 1.7831578947368422e-05, + "step": 206 + }, + { + "epoch": 0.5417488494411571, + "high_lr": 0.0008915789473684211, + "low_lr": 1.7831578947368422e-05, + "step": 206 + }, + { + "epoch": 0.5417488494411571, + "high_lr": 0.0008915789473684211, + "low_lr": 1.7831578947368422e-05, + "step": 206 + }, + { + "epoch": 0.5443786982248521, + "grad_norm": 0.8497365117073059, + "learning_rate": 0.0008910526315789474, + "loss": 1.7379, + "step": 207 + }, + { + "epoch": 0.5443786982248521, + "high_lr": 0.0008910526315789474, + "low_lr": 1.7821052631578946e-05, + "step": 207 + }, + { + "epoch": 0.5443786982248521, + "high_lr": 0.0008910526315789474, + "low_lr": 1.7821052631578946e-05, + "step": 207 + }, + { + "epoch": 0.5443786982248521, + "high_lr": 0.0008910526315789474, + "low_lr": 1.7821052631578946e-05, + "step": 207 + }, + { + "epoch": 0.5443786982248521, + "high_lr": 0.0008910526315789474, + "low_lr": 1.7821052631578946e-05, + "step": 207 + }, + { + "epoch": 0.5443786982248521, + "high_lr": 0.0008910526315789474, + "low_lr": 1.7821052631578946e-05, + "step": 207 + }, + { + "epoch": 0.5443786982248521, + "high_lr": 0.0008910526315789474, + "low_lr": 1.7821052631578946e-05, + "step": 207 + }, + { + "epoch": 0.5443786982248521, + "high_lr": 0.0008910526315789474, + "low_lr": 1.7821052631578946e-05, + "step": 207 + }, + { + "epoch": 0.5443786982248521, + "high_lr": 0.0008910526315789474, + "low_lr": 1.7821052631578946e-05, + "step": 207 + }, + { + "epoch": 0.5470085470085471, + "grad_norm": 0.9147151112556458, + "learning_rate": 0.0008905263157894738, + "loss": 1.7945, + "step": 208 + }, + { + "epoch": 0.5470085470085471, + "high_lr": 0.0008905263157894738, + "low_lr": 1.7810526315789474e-05, + "step": 208 + }, + { + "epoch": 0.5470085470085471, + "high_lr": 0.0008905263157894738, + "low_lr": 1.7810526315789474e-05, + "step": 208 + }, + { + "epoch": 0.5470085470085471, + "high_lr": 0.0008905263157894738, + "low_lr": 1.7810526315789474e-05, + "step": 208 + }, + { + "epoch": 0.5470085470085471, + "high_lr": 0.0008905263157894738, + "low_lr": 1.7810526315789474e-05, + "step": 208 + }, + { + "epoch": 0.5470085470085471, + "high_lr": 0.0008905263157894738, + "low_lr": 1.7810526315789474e-05, + "step": 208 + }, + { + "epoch": 0.5470085470085471, + "high_lr": 0.0008905263157894738, + "low_lr": 1.7810526315789474e-05, + "step": 208 + }, + { + "epoch": 0.5470085470085471, + "high_lr": 0.0008905263157894738, + "low_lr": 1.7810526315789474e-05, + "step": 208 + }, + { + "epoch": 0.5470085470085471, + "high_lr": 0.0008905263157894738, + "low_lr": 1.7810526315789474e-05, + "step": 208 + }, + { + "epoch": 0.5496383957922419, + "grad_norm": 0.7853782176971436, + "learning_rate": 0.0008900000000000001, + "loss": 1.7118, + "step": 209 + }, + { + "epoch": 0.5496383957922419, + "high_lr": 0.0008900000000000001, + "low_lr": 1.7800000000000002e-05, + "step": 209 + }, + { + "epoch": 0.5496383957922419, + "high_lr": 0.0008900000000000001, + "low_lr": 1.7800000000000002e-05, + "step": 209 + }, + { + "epoch": 0.5496383957922419, + "high_lr": 0.0008900000000000001, + "low_lr": 1.7800000000000002e-05, + "step": 209 + }, + { + "epoch": 0.5496383957922419, + "high_lr": 0.0008900000000000001, + "low_lr": 1.7800000000000002e-05, + "step": 209 + }, + { + "epoch": 0.5496383957922419, + "high_lr": 0.0008900000000000001, + "low_lr": 1.7800000000000002e-05, + "step": 209 + }, + { + "epoch": 0.5496383957922419, + "high_lr": 0.0008900000000000001, + "low_lr": 1.7800000000000002e-05, + "step": 209 + }, + { + "epoch": 0.5496383957922419, + "high_lr": 0.0008900000000000001, + "low_lr": 1.7800000000000002e-05, + "step": 209 + }, + { + "epoch": 0.5496383957922419, + "high_lr": 0.0008900000000000001, + "low_lr": 1.7800000000000002e-05, + "step": 209 + }, + { + "epoch": 0.5522682445759369, + "grad_norm": 0.7360106110572815, + "learning_rate": 0.0008894736842105263, + "loss": 1.7677, + "step": 210 + }, + { + "epoch": 0.5522682445759369, + "high_lr": 0.0008894736842105263, + "low_lr": 1.7789473684210527e-05, + "step": 210 + }, + { + "epoch": 0.5522682445759369, + "high_lr": 0.0008894736842105263, + "low_lr": 1.7789473684210527e-05, + "step": 210 + }, + { + "epoch": 0.5522682445759369, + "high_lr": 0.0008894736842105263, + "low_lr": 1.7789473684210527e-05, + "step": 210 + }, + { + "epoch": 0.5522682445759369, + "high_lr": 0.0008894736842105263, + "low_lr": 1.7789473684210527e-05, + "step": 210 + }, + { + "epoch": 0.5522682445759369, + "high_lr": 0.0008894736842105263, + "low_lr": 1.7789473684210527e-05, + "step": 210 + }, + { + "epoch": 0.5522682445759369, + "high_lr": 0.0008894736842105263, + "low_lr": 1.7789473684210527e-05, + "step": 210 + }, + { + "epoch": 0.5522682445759369, + "high_lr": 0.0008894736842105263, + "low_lr": 1.7789473684210527e-05, + "step": 210 + }, + { + "epoch": 0.5522682445759369, + "high_lr": 0.0008894736842105263, + "low_lr": 1.7789473684210527e-05, + "step": 210 + }, + { + "epoch": 0.5548980933596318, + "grad_norm": 0.8855023980140686, + "learning_rate": 0.0008889473684210526, + "loss": 1.7806, + "step": 211 + }, + { + "epoch": 0.5548980933596318, + "high_lr": 0.0008889473684210526, + "low_lr": 1.7778947368421055e-05, + "step": 211 + }, + { + "epoch": 0.5548980933596318, + "high_lr": 0.0008889473684210526, + "low_lr": 1.7778947368421055e-05, + "step": 211 + }, + { + "epoch": 0.5548980933596318, + "high_lr": 0.0008889473684210526, + "low_lr": 1.7778947368421055e-05, + "step": 211 + }, + { + "epoch": 0.5548980933596318, + "high_lr": 0.0008889473684210526, + "low_lr": 1.7778947368421055e-05, + "step": 211 + }, + { + "epoch": 0.5548980933596318, + "high_lr": 0.0008889473684210526, + "low_lr": 1.7778947368421055e-05, + "step": 211 + }, + { + "epoch": 0.5548980933596318, + "high_lr": 0.0008889473684210526, + "low_lr": 1.7778947368421055e-05, + "step": 211 + }, + { + "epoch": 0.5548980933596318, + "high_lr": 0.0008889473684210526, + "low_lr": 1.7778947368421055e-05, + "step": 211 + }, + { + "epoch": 0.5548980933596318, + "high_lr": 0.0008889473684210526, + "low_lr": 1.7778947368421055e-05, + "step": 211 + }, + { + "epoch": 0.5575279421433268, + "grad_norm": 0.8010016679763794, + "learning_rate": 0.000888421052631579, + "loss": 1.7559, + "step": 212 + }, + { + "epoch": 0.5575279421433268, + "high_lr": 0.000888421052631579, + "low_lr": 1.7768421052631583e-05, + "step": 212 + }, + { + "epoch": 0.5575279421433268, + "high_lr": 0.000888421052631579, + "low_lr": 1.7768421052631583e-05, + "step": 212 + }, + { + "epoch": 0.5575279421433268, + "high_lr": 0.000888421052631579, + "low_lr": 1.7768421052631583e-05, + "step": 212 + }, + { + "epoch": 0.5575279421433268, + "high_lr": 0.000888421052631579, + "low_lr": 1.7768421052631583e-05, + "step": 212 + }, + { + "epoch": 0.5575279421433268, + "high_lr": 0.000888421052631579, + "low_lr": 1.7768421052631583e-05, + "step": 212 + }, + { + "epoch": 0.5575279421433268, + "high_lr": 0.000888421052631579, + "low_lr": 1.7768421052631583e-05, + "step": 212 + }, + { + "epoch": 0.5575279421433268, + "high_lr": 0.000888421052631579, + "low_lr": 1.7768421052631583e-05, + "step": 212 + }, + { + "epoch": 0.5575279421433268, + "high_lr": 0.000888421052631579, + "low_lr": 1.7768421052631583e-05, + "step": 212 + }, + { + "epoch": 0.5601577909270217, + "grad_norm": 0.9242972135543823, + "learning_rate": 0.0008878947368421053, + "loss": 1.7538, + "step": 213 + }, + { + "epoch": 0.5601577909270217, + "high_lr": 0.0008878947368421053, + "low_lr": 1.7757894736842108e-05, + "step": 213 + }, + { + "epoch": 0.5601577909270217, + "high_lr": 0.0008878947368421053, + "low_lr": 1.7757894736842108e-05, + "step": 213 + }, + { + "epoch": 0.5601577909270217, + "high_lr": 0.0008878947368421053, + "low_lr": 1.7757894736842108e-05, + "step": 213 + }, + { + "epoch": 0.5601577909270217, + "high_lr": 0.0008878947368421053, + "low_lr": 1.7757894736842108e-05, + "step": 213 + }, + { + "epoch": 0.5601577909270217, + "high_lr": 0.0008878947368421053, + "low_lr": 1.7757894736842108e-05, + "step": 213 + }, + { + "epoch": 0.5601577909270217, + "high_lr": 0.0008878947368421053, + "low_lr": 1.7757894736842108e-05, + "step": 213 + }, + { + "epoch": 0.5601577909270217, + "high_lr": 0.0008878947368421053, + "low_lr": 1.7757894736842108e-05, + "step": 213 + }, + { + "epoch": 0.5601577909270217, + "high_lr": 0.0008878947368421053, + "low_lr": 1.7757894736842108e-05, + "step": 213 + }, + { + "epoch": 0.5627876397107167, + "grad_norm": 0.8491708636283875, + "learning_rate": 0.0008873684210526316, + "loss": 1.8032, + "step": 214 + }, + { + "epoch": 0.5627876397107167, + "high_lr": 0.0008873684210526316, + "low_lr": 1.7747368421052632e-05, + "step": 214 + }, + { + "epoch": 0.5627876397107167, + "high_lr": 0.0008873684210526316, + "low_lr": 1.7747368421052632e-05, + "step": 214 + }, + { + "epoch": 0.5627876397107167, + "high_lr": 0.0008873684210526316, + "low_lr": 1.7747368421052632e-05, + "step": 214 + }, + { + "epoch": 0.5627876397107167, + "high_lr": 0.0008873684210526316, + "low_lr": 1.7747368421052632e-05, + "step": 214 + }, + { + "epoch": 0.5627876397107167, + "high_lr": 0.0008873684210526316, + "low_lr": 1.7747368421052632e-05, + "step": 214 + }, + { + "epoch": 0.5627876397107167, + "high_lr": 0.0008873684210526316, + "low_lr": 1.7747368421052632e-05, + "step": 214 + }, + { + "epoch": 0.5627876397107167, + "high_lr": 0.0008873684210526316, + "low_lr": 1.7747368421052632e-05, + "step": 214 + }, + { + "epoch": 0.5627876397107167, + "high_lr": 0.0008873684210526316, + "low_lr": 1.7747368421052632e-05, + "step": 214 + }, + { + "epoch": 0.5654174884944115, + "grad_norm": 0.8941650390625, + "learning_rate": 0.0008868421052631579, + "loss": 1.8055, + "step": 215 + }, + { + "epoch": 0.5654174884944115, + "high_lr": 0.0008868421052631579, + "low_lr": 1.773684210526316e-05, + "step": 215 + }, + { + "epoch": 0.5654174884944115, + "high_lr": 0.0008868421052631579, + "low_lr": 1.773684210526316e-05, + "step": 215 + }, + { + "epoch": 0.5654174884944115, + "high_lr": 0.0008868421052631579, + "low_lr": 1.773684210526316e-05, + "step": 215 + }, + { + "epoch": 0.5654174884944115, + "high_lr": 0.0008868421052631579, + "low_lr": 1.773684210526316e-05, + "step": 215 + }, + { + "epoch": 0.5654174884944115, + "high_lr": 0.0008868421052631579, + "low_lr": 1.773684210526316e-05, + "step": 215 + }, + { + "epoch": 0.5654174884944115, + "high_lr": 0.0008868421052631579, + "low_lr": 1.773684210526316e-05, + "step": 215 + }, + { + "epoch": 0.5654174884944115, + "high_lr": 0.0008868421052631579, + "low_lr": 1.773684210526316e-05, + "step": 215 + }, + { + "epoch": 0.5654174884944115, + "high_lr": 0.0008868421052631579, + "low_lr": 1.773684210526316e-05, + "step": 215 + }, + { + "epoch": 0.5680473372781065, + "grad_norm": 0.8661569356918335, + "learning_rate": 0.0008863157894736842, + "loss": 1.7234, + "step": 216 + }, + { + "epoch": 0.5680473372781065, + "high_lr": 0.0008863157894736842, + "low_lr": 1.7726315789473685e-05, + "step": 216 + }, + { + "epoch": 0.5680473372781065, + "high_lr": 0.0008863157894736842, + "low_lr": 1.7726315789473685e-05, + "step": 216 + }, + { + "epoch": 0.5680473372781065, + "high_lr": 0.0008863157894736842, + "low_lr": 1.7726315789473685e-05, + "step": 216 + }, + { + "epoch": 0.5680473372781065, + "high_lr": 0.0008863157894736842, + "low_lr": 1.7726315789473685e-05, + "step": 216 + }, + { + "epoch": 0.5680473372781065, + "high_lr": 0.0008863157894736842, + "low_lr": 1.7726315789473685e-05, + "step": 216 + }, + { + "epoch": 0.5680473372781065, + "high_lr": 0.0008863157894736842, + "low_lr": 1.7726315789473685e-05, + "step": 216 + }, + { + "epoch": 0.5680473372781065, + "high_lr": 0.0008863157894736842, + "low_lr": 1.7726315789473685e-05, + "step": 216 + }, + { + "epoch": 0.5680473372781065, + "high_lr": 0.0008863157894736842, + "low_lr": 1.7726315789473685e-05, + "step": 216 + }, + { + "epoch": 0.5706771860618014, + "grad_norm": 0.824845016002655, + "learning_rate": 0.0008857894736842106, + "loss": 1.7999, + "step": 217 + }, + { + "epoch": 0.5706771860618014, + "high_lr": 0.0008857894736842106, + "low_lr": 1.7715789473684213e-05, + "step": 217 + }, + { + "epoch": 0.5706771860618014, + "high_lr": 0.0008857894736842106, + "low_lr": 1.7715789473684213e-05, + "step": 217 + }, + { + "epoch": 0.5706771860618014, + "high_lr": 0.0008857894736842106, + "low_lr": 1.7715789473684213e-05, + "step": 217 + }, + { + "epoch": 0.5706771860618014, + "high_lr": 0.0008857894736842106, + "low_lr": 1.7715789473684213e-05, + "step": 217 + }, + { + "epoch": 0.5706771860618014, + "high_lr": 0.0008857894736842106, + "low_lr": 1.7715789473684213e-05, + "step": 217 + }, + { + "epoch": 0.5706771860618014, + "high_lr": 0.0008857894736842106, + "low_lr": 1.7715789473684213e-05, + "step": 217 + }, + { + "epoch": 0.5706771860618014, + "high_lr": 0.0008857894736842106, + "low_lr": 1.7715789473684213e-05, + "step": 217 + }, + { + "epoch": 0.5706771860618014, + "high_lr": 0.0008857894736842106, + "low_lr": 1.7715789473684213e-05, + "step": 217 + }, + { + "epoch": 0.5733070348454964, + "grad_norm": 0.9351339340209961, + "learning_rate": 0.0008852631578947368, + "loss": 1.8139, + "step": 218 + }, + { + "epoch": 0.5733070348454964, + "high_lr": 0.0008852631578947368, + "low_lr": 1.7705263157894738e-05, + "step": 218 + }, + { + "epoch": 0.5733070348454964, + "high_lr": 0.0008852631578947368, + "low_lr": 1.7705263157894738e-05, + "step": 218 + }, + { + "epoch": 0.5733070348454964, + "high_lr": 0.0008852631578947368, + "low_lr": 1.7705263157894738e-05, + "step": 218 + }, + { + "epoch": 0.5733070348454964, + "high_lr": 0.0008852631578947368, + "low_lr": 1.7705263157894738e-05, + "step": 218 + }, + { + "epoch": 0.5733070348454964, + "high_lr": 0.0008852631578947368, + "low_lr": 1.7705263157894738e-05, + "step": 218 + }, + { + "epoch": 0.5733070348454964, + "high_lr": 0.0008852631578947368, + "low_lr": 1.7705263157894738e-05, + "step": 218 + }, + { + "epoch": 0.5733070348454964, + "high_lr": 0.0008852631578947368, + "low_lr": 1.7705263157894738e-05, + "step": 218 + }, + { + "epoch": 0.5733070348454964, + "high_lr": 0.0008852631578947368, + "low_lr": 1.7705263157894738e-05, + "step": 218 + }, + { + "epoch": 0.5759368836291914, + "grad_norm": 0.8763374090194702, + "learning_rate": 0.0008847368421052631, + "loss": 1.7691, + "step": 219 + }, + { + "epoch": 0.5759368836291914, + "high_lr": 0.0008847368421052631, + "low_lr": 1.7694736842105266e-05, + "step": 219 + }, + { + "epoch": 0.5759368836291914, + "high_lr": 0.0008847368421052631, + "low_lr": 1.7694736842105266e-05, + "step": 219 + }, + { + "epoch": 0.5759368836291914, + "high_lr": 0.0008847368421052631, + "low_lr": 1.7694736842105266e-05, + "step": 219 + }, + { + "epoch": 0.5759368836291914, + "high_lr": 0.0008847368421052631, + "low_lr": 1.7694736842105266e-05, + "step": 219 + }, + { + "epoch": 0.5759368836291914, + "high_lr": 0.0008847368421052631, + "low_lr": 1.7694736842105266e-05, + "step": 219 + }, + { + "epoch": 0.5759368836291914, + "high_lr": 0.0008847368421052631, + "low_lr": 1.7694736842105266e-05, + "step": 219 + }, + { + "epoch": 0.5759368836291914, + "high_lr": 0.0008847368421052631, + "low_lr": 1.7694736842105266e-05, + "step": 219 + }, + { + "epoch": 0.5759368836291914, + "high_lr": 0.0008847368421052631, + "low_lr": 1.7694736842105266e-05, + "step": 219 + }, + { + "epoch": 0.5785667324128863, + "grad_norm": 0.8397433757781982, + "learning_rate": 0.0008842105263157894, + "loss": 1.7958, + "step": 220 + }, + { + "epoch": 0.5785667324128863, + "high_lr": 0.0008842105263157894, + "low_lr": 1.768421052631579e-05, + "step": 220 + }, + { + "epoch": 0.5785667324128863, + "high_lr": 0.0008842105263157894, + "low_lr": 1.768421052631579e-05, + "step": 220 + }, + { + "epoch": 0.5785667324128863, + "high_lr": 0.0008842105263157894, + "low_lr": 1.768421052631579e-05, + "step": 220 + }, + { + "epoch": 0.5785667324128863, + "high_lr": 0.0008842105263157894, + "low_lr": 1.768421052631579e-05, + "step": 220 + }, + { + "epoch": 0.5785667324128863, + "high_lr": 0.0008842105263157894, + "low_lr": 1.768421052631579e-05, + "step": 220 + }, + { + "epoch": 0.5785667324128863, + "high_lr": 0.0008842105263157894, + "low_lr": 1.768421052631579e-05, + "step": 220 + }, + { + "epoch": 0.5785667324128863, + "high_lr": 0.0008842105263157894, + "low_lr": 1.768421052631579e-05, + "step": 220 + }, + { + "epoch": 0.5785667324128863, + "high_lr": 0.0008842105263157894, + "low_lr": 1.768421052631579e-05, + "step": 220 + }, + { + "epoch": 0.5811965811965812, + "grad_norm": 0.8304501175880432, + "learning_rate": 0.0008836842105263157, + "loss": 1.706, + "step": 221 + }, + { + "epoch": 0.5811965811965812, + "high_lr": 0.0008836842105263157, + "low_lr": 1.7673684210526315e-05, + "step": 221 + }, + { + "epoch": 0.5811965811965812, + "high_lr": 0.0008836842105263157, + "low_lr": 1.7673684210526315e-05, + "step": 221 + }, + { + "epoch": 0.5811965811965812, + "high_lr": 0.0008836842105263157, + "low_lr": 1.7673684210526315e-05, + "step": 221 + }, + { + "epoch": 0.5811965811965812, + "high_lr": 0.0008836842105263157, + "low_lr": 1.7673684210526315e-05, + "step": 221 + }, + { + "epoch": 0.5811965811965812, + "high_lr": 0.0008836842105263157, + "low_lr": 1.7673684210526315e-05, + "step": 221 + }, + { + "epoch": 0.5811965811965812, + "high_lr": 0.0008836842105263157, + "low_lr": 1.7673684210526315e-05, + "step": 221 + }, + { + "epoch": 0.5811965811965812, + "high_lr": 0.0008836842105263157, + "low_lr": 1.7673684210526315e-05, + "step": 221 + }, + { + "epoch": 0.5811965811965812, + "high_lr": 0.0008836842105263157, + "low_lr": 1.7673684210526315e-05, + "step": 221 + }, + { + "epoch": 0.5838264299802761, + "grad_norm": 0.8978874087333679, + "learning_rate": 0.0008831578947368422, + "loss": 1.6779, + "step": 222 + }, + { + "epoch": 0.5838264299802761, + "high_lr": 0.0008831578947368422, + "low_lr": 1.7663157894736843e-05, + "step": 222 + }, + { + "epoch": 0.5838264299802761, + "high_lr": 0.0008831578947368422, + "low_lr": 1.7663157894736843e-05, + "step": 222 + }, + { + "epoch": 0.5838264299802761, + "high_lr": 0.0008831578947368422, + "low_lr": 1.7663157894736843e-05, + "step": 222 + }, + { + "epoch": 0.5838264299802761, + "high_lr": 0.0008831578947368422, + "low_lr": 1.7663157894736843e-05, + "step": 222 + }, + { + "epoch": 0.5838264299802761, + "high_lr": 0.0008831578947368422, + "low_lr": 1.7663157894736843e-05, + "step": 222 + }, + { + "epoch": 0.5838264299802761, + "high_lr": 0.0008831578947368422, + "low_lr": 1.7663157894736843e-05, + "step": 222 + }, + { + "epoch": 0.5838264299802761, + "high_lr": 0.0008831578947368422, + "low_lr": 1.7663157894736843e-05, + "step": 222 + }, + { + "epoch": 0.5838264299802761, + "high_lr": 0.0008831578947368422, + "low_lr": 1.7663157894736843e-05, + "step": 222 + }, + { + "epoch": 0.5864562787639711, + "grad_norm": 0.8363296389579773, + "learning_rate": 0.0008826315789473685, + "loss": 1.7498, + "step": 223 + }, + { + "epoch": 0.5864562787639711, + "high_lr": 0.0008826315789473685, + "low_lr": 1.765263157894737e-05, + "step": 223 + }, + { + "epoch": 0.5864562787639711, + "high_lr": 0.0008826315789473685, + "low_lr": 1.765263157894737e-05, + "step": 223 + }, + { + "epoch": 0.5864562787639711, + "high_lr": 0.0008826315789473685, + "low_lr": 1.765263157894737e-05, + "step": 223 + }, + { + "epoch": 0.5864562787639711, + "high_lr": 0.0008826315789473685, + "low_lr": 1.765263157894737e-05, + "step": 223 + }, + { + "epoch": 0.5864562787639711, + "high_lr": 0.0008826315789473685, + "low_lr": 1.765263157894737e-05, + "step": 223 + }, + { + "epoch": 0.5864562787639711, + "high_lr": 0.0008826315789473685, + "low_lr": 1.765263157894737e-05, + "step": 223 + }, + { + "epoch": 0.5864562787639711, + "high_lr": 0.0008826315789473685, + "low_lr": 1.765263157894737e-05, + "step": 223 + }, + { + "epoch": 0.5864562787639711, + "high_lr": 0.0008826315789473685, + "low_lr": 1.765263157894737e-05, + "step": 223 + }, + { + "epoch": 0.589086127547666, + "grad_norm": 1.024382472038269, + "learning_rate": 0.0008821052631578948, + "loss": 1.8, + "step": 224 + }, + { + "epoch": 0.589086127547666, + "high_lr": 0.0008821052631578948, + "low_lr": 1.7642105263157896e-05, + "step": 224 + }, + { + "epoch": 0.589086127547666, + "high_lr": 0.0008821052631578948, + "low_lr": 1.7642105263157896e-05, + "step": 224 + }, + { + "epoch": 0.589086127547666, + "high_lr": 0.0008821052631578948, + "low_lr": 1.7642105263157896e-05, + "step": 224 + }, + { + "epoch": 0.589086127547666, + "high_lr": 0.0008821052631578948, + "low_lr": 1.7642105263157896e-05, + "step": 224 + }, + { + "epoch": 0.589086127547666, + "high_lr": 0.0008821052631578948, + "low_lr": 1.7642105263157896e-05, + "step": 224 + }, + { + "epoch": 0.589086127547666, + "high_lr": 0.0008821052631578948, + "low_lr": 1.7642105263157896e-05, + "step": 224 + }, + { + "epoch": 0.589086127547666, + "high_lr": 0.0008821052631578948, + "low_lr": 1.7642105263157896e-05, + "step": 224 + }, + { + "epoch": 0.589086127547666, + "high_lr": 0.0008821052631578948, + "low_lr": 1.7642105263157896e-05, + "step": 224 + }, + { + "epoch": 0.591715976331361, + "grad_norm": 0.9602994918823242, + "learning_rate": 0.0008815789473684211, + "loss": 1.745, + "step": 225 + }, + { + "epoch": 0.591715976331361, + "high_lr": 0.0008815789473684211, + "low_lr": 1.763157894736842e-05, + "step": 225 + }, + { + "epoch": 0.591715976331361, + "high_lr": 0.0008815789473684211, + "low_lr": 1.763157894736842e-05, + "step": 225 + }, + { + "epoch": 0.591715976331361, + "high_lr": 0.0008815789473684211, + "low_lr": 1.763157894736842e-05, + "step": 225 + }, + { + "epoch": 0.591715976331361, + "high_lr": 0.0008815789473684211, + "low_lr": 1.763157894736842e-05, + "step": 225 + }, + { + "epoch": 0.591715976331361, + "high_lr": 0.0008815789473684211, + "low_lr": 1.763157894736842e-05, + "step": 225 + }, + { + "epoch": 0.591715976331361, + "high_lr": 0.0008815789473684211, + "low_lr": 1.763157894736842e-05, + "step": 225 + }, + { + "epoch": 0.591715976331361, + "high_lr": 0.0008815789473684211, + "low_lr": 1.763157894736842e-05, + "step": 225 + }, + { + "epoch": 0.591715976331361, + "high_lr": 0.0008815789473684211, + "low_lr": 1.763157894736842e-05, + "step": 225 + }, + { + "epoch": 0.5943458251150558, + "grad_norm": 0.8573108315467834, + "learning_rate": 0.0008810526315789475, + "loss": 1.7609, + "step": 226 + }, + { + "epoch": 0.5943458251150558, + "high_lr": 0.0008810526315789475, + "low_lr": 1.7621052631578948e-05, + "step": 226 + }, + { + "epoch": 0.5943458251150558, + "high_lr": 0.0008810526315789475, + "low_lr": 1.7621052631578948e-05, + "step": 226 + }, + { + "epoch": 0.5943458251150558, + "high_lr": 0.0008810526315789475, + "low_lr": 1.7621052631578948e-05, + "step": 226 + }, + { + "epoch": 0.5943458251150558, + "high_lr": 0.0008810526315789475, + "low_lr": 1.7621052631578948e-05, + "step": 226 + }, + { + "epoch": 0.5943458251150558, + "high_lr": 0.0008810526315789475, + "low_lr": 1.7621052631578948e-05, + "step": 226 + }, + { + "epoch": 0.5943458251150558, + "high_lr": 0.0008810526315789475, + "low_lr": 1.7621052631578948e-05, + "step": 226 + }, + { + "epoch": 0.5943458251150558, + "high_lr": 0.0008810526315789475, + "low_lr": 1.7621052631578948e-05, + "step": 226 + }, + { + "epoch": 0.5943458251150558, + "high_lr": 0.0008810526315789475, + "low_lr": 1.7621052631578948e-05, + "step": 226 + }, + { + "epoch": 0.5969756738987508, + "grad_norm": 0.8403958678245544, + "learning_rate": 0.0008805263157894737, + "loss": 1.7054, + "step": 227 + }, + { + "epoch": 0.5969756738987508, + "high_lr": 0.0008805263157894737, + "low_lr": 1.7610526315789476e-05, + "step": 227 + }, + { + "epoch": 0.5969756738987508, + "high_lr": 0.0008805263157894737, + "low_lr": 1.7610526315789476e-05, + "step": 227 + }, + { + "epoch": 0.5969756738987508, + "high_lr": 0.0008805263157894737, + "low_lr": 1.7610526315789476e-05, + "step": 227 + }, + { + "epoch": 0.5969756738987508, + "high_lr": 0.0008805263157894737, + "low_lr": 1.7610526315789476e-05, + "step": 227 + }, + { + "epoch": 0.5969756738987508, + "high_lr": 0.0008805263157894737, + "low_lr": 1.7610526315789476e-05, + "step": 227 + }, + { + "epoch": 0.5969756738987508, + "high_lr": 0.0008805263157894737, + "low_lr": 1.7610526315789476e-05, + "step": 227 + }, + { + "epoch": 0.5969756738987508, + "high_lr": 0.0008805263157894737, + "low_lr": 1.7610526315789476e-05, + "step": 227 + }, + { + "epoch": 0.5969756738987508, + "high_lr": 0.0008805263157894737, + "low_lr": 1.7610526315789476e-05, + "step": 227 + }, + { + "epoch": 0.5996055226824457, + "grad_norm": 0.922599732875824, + "learning_rate": 0.00088, + "loss": 1.7903, + "step": 228 + }, + { + "epoch": 0.5996055226824457, + "high_lr": 0.00088, + "low_lr": 1.76e-05, + "step": 228 + }, + { + "epoch": 0.5996055226824457, + "high_lr": 0.00088, + "low_lr": 1.76e-05, + "step": 228 + }, + { + "epoch": 0.5996055226824457, + "high_lr": 0.00088, + "low_lr": 1.76e-05, + "step": 228 + }, + { + "epoch": 0.5996055226824457, + "high_lr": 0.00088, + "low_lr": 1.76e-05, + "step": 228 + }, + { + "epoch": 0.5996055226824457, + "high_lr": 0.00088, + "low_lr": 1.76e-05, + "step": 228 + }, + { + "epoch": 0.5996055226824457, + "high_lr": 0.00088, + "low_lr": 1.76e-05, + "step": 228 + }, + { + "epoch": 0.5996055226824457, + "high_lr": 0.00088, + "low_lr": 1.76e-05, + "step": 228 + }, + { + "epoch": 0.5996055226824457, + "high_lr": 0.00088, + "low_lr": 1.76e-05, + "step": 228 + }, + { + "epoch": 0.6022353714661407, + "grad_norm": 0.8383209705352783, + "learning_rate": 0.0008794736842105263, + "loss": 1.7644, + "step": 229 + }, + { + "epoch": 0.6022353714661407, + "high_lr": 0.0008794736842105263, + "low_lr": 1.758947368421053e-05, + "step": 229 + }, + { + "epoch": 0.6022353714661407, + "high_lr": 0.0008794736842105263, + "low_lr": 1.758947368421053e-05, + "step": 229 + }, + { + "epoch": 0.6022353714661407, + "high_lr": 0.0008794736842105263, + "low_lr": 1.758947368421053e-05, + "step": 229 + }, + { + "epoch": 0.6022353714661407, + "high_lr": 0.0008794736842105263, + "low_lr": 1.758947368421053e-05, + "step": 229 + }, + { + "epoch": 0.6022353714661407, + "high_lr": 0.0008794736842105263, + "low_lr": 1.758947368421053e-05, + "step": 229 + }, + { + "epoch": 0.6022353714661407, + "high_lr": 0.0008794736842105263, + "low_lr": 1.758947368421053e-05, + "step": 229 + }, + { + "epoch": 0.6022353714661407, + "high_lr": 0.0008794736842105263, + "low_lr": 1.758947368421053e-05, + "step": 229 + }, + { + "epoch": 0.6022353714661407, + "high_lr": 0.0008794736842105263, + "low_lr": 1.758947368421053e-05, + "step": 229 + }, + { + "epoch": 0.6048652202498357, + "grad_norm": 78.71210479736328, + "learning_rate": 0.0008789473684210526, + "loss": 1.8008, + "step": 230 + }, + { + "epoch": 0.6048652202498357, + "high_lr": 0.0008789473684210526, + "low_lr": 1.7578947368421054e-05, + "step": 230 + }, + { + "epoch": 0.6048652202498357, + "high_lr": 0.0008789473684210526, + "low_lr": 1.7578947368421054e-05, + "step": 230 + }, + { + "epoch": 0.6048652202498357, + "high_lr": 0.0008789473684210526, + "low_lr": 1.7578947368421054e-05, + "step": 230 + }, + { + "epoch": 0.6048652202498357, + "high_lr": 0.0008789473684210526, + "low_lr": 1.7578947368421054e-05, + "step": 230 + }, + { + "epoch": 0.6048652202498357, + "high_lr": 0.0008789473684210526, + "low_lr": 1.7578947368421054e-05, + "step": 230 + }, + { + "epoch": 0.6048652202498357, + "high_lr": 0.0008789473684210526, + "low_lr": 1.7578947368421054e-05, + "step": 230 + }, + { + "epoch": 0.6048652202498357, + "high_lr": 0.0008789473684210526, + "low_lr": 1.7578947368421054e-05, + "step": 230 + }, + { + "epoch": 0.6048652202498357, + "high_lr": 0.0008789473684210526, + "low_lr": 1.7578947368421054e-05, + "step": 230 + }, + { + "epoch": 0.6074950690335306, + "grad_norm": 0.861912727355957, + "learning_rate": 0.000878421052631579, + "loss": 1.8102, + "step": 231 + }, + { + "epoch": 0.6074950690335306, + "high_lr": 0.000878421052631579, + "low_lr": 1.756842105263158e-05, + "step": 231 + }, + { + "epoch": 0.6074950690335306, + "high_lr": 0.000878421052631579, + "low_lr": 1.756842105263158e-05, + "step": 231 + }, + { + "epoch": 0.6074950690335306, + "high_lr": 0.000878421052631579, + "low_lr": 1.756842105263158e-05, + "step": 231 + }, + { + "epoch": 0.6074950690335306, + "high_lr": 0.000878421052631579, + "low_lr": 1.756842105263158e-05, + "step": 231 + }, + { + "epoch": 0.6074950690335306, + "high_lr": 0.000878421052631579, + "low_lr": 1.756842105263158e-05, + "step": 231 + }, + { + "epoch": 0.6074950690335306, + "high_lr": 0.000878421052631579, + "low_lr": 1.756842105263158e-05, + "step": 231 + }, + { + "epoch": 0.6074950690335306, + "high_lr": 0.000878421052631579, + "low_lr": 1.756842105263158e-05, + "step": 231 + }, + { + "epoch": 0.6074950690335306, + "high_lr": 0.000878421052631579, + "low_lr": 1.756842105263158e-05, + "step": 231 + }, + { + "epoch": 0.6101249178172256, + "grad_norm": 0.8288989067077637, + "learning_rate": 0.0008778947368421053, + "loss": 1.6689, + "step": 232 + }, + { + "epoch": 0.6101249178172256, + "high_lr": 0.0008778947368421053, + "low_lr": 1.7557894736842106e-05, + "step": 232 + }, + { + "epoch": 0.6101249178172256, + "high_lr": 0.0008778947368421053, + "low_lr": 1.7557894736842106e-05, + "step": 232 + }, + { + "epoch": 0.6101249178172256, + "high_lr": 0.0008778947368421053, + "low_lr": 1.7557894736842106e-05, + "step": 232 + }, + { + "epoch": 0.6101249178172256, + "high_lr": 0.0008778947368421053, + "low_lr": 1.7557894736842106e-05, + "step": 232 + }, + { + "epoch": 0.6101249178172256, + "high_lr": 0.0008778947368421053, + "low_lr": 1.7557894736842106e-05, + "step": 232 + }, + { + "epoch": 0.6101249178172256, + "high_lr": 0.0008778947368421053, + "low_lr": 1.7557894736842106e-05, + "step": 232 + }, + { + "epoch": 0.6101249178172256, + "high_lr": 0.0008778947368421053, + "low_lr": 1.7557894736842106e-05, + "step": 232 + }, + { + "epoch": 0.6101249178172256, + "high_lr": 0.0008778947368421053, + "low_lr": 1.7557894736842106e-05, + "step": 232 + }, + { + "epoch": 0.6127547666009204, + "grad_norm": 0.8340638279914856, + "learning_rate": 0.0008773684210526316, + "loss": 1.7501, + "step": 233 + }, + { + "epoch": 0.6127547666009204, + "high_lr": 0.0008773684210526316, + "low_lr": 1.7547368421052634e-05, + "step": 233 + }, + { + "epoch": 0.6127547666009204, + "high_lr": 0.0008773684210526316, + "low_lr": 1.7547368421052634e-05, + "step": 233 + }, + { + "epoch": 0.6127547666009204, + "high_lr": 0.0008773684210526316, + "low_lr": 1.7547368421052634e-05, + "step": 233 + }, + { + "epoch": 0.6127547666009204, + "high_lr": 0.0008773684210526316, + "low_lr": 1.7547368421052634e-05, + "step": 233 + }, + { + "epoch": 0.6127547666009204, + "high_lr": 0.0008773684210526316, + "low_lr": 1.7547368421052634e-05, + "step": 233 + }, + { + "epoch": 0.6127547666009204, + "high_lr": 0.0008773684210526316, + "low_lr": 1.7547368421052634e-05, + "step": 233 + }, + { + "epoch": 0.6127547666009204, + "high_lr": 0.0008773684210526316, + "low_lr": 1.7547368421052634e-05, + "step": 233 + }, + { + "epoch": 0.6127547666009204, + "high_lr": 0.0008773684210526316, + "low_lr": 1.7547368421052634e-05, + "step": 233 + }, + { + "epoch": 0.6153846153846154, + "grad_norm": 0.7775981426239014, + "learning_rate": 0.0008768421052631579, + "loss": 1.6956, + "step": 234 + }, + { + "epoch": 0.6153846153846154, + "high_lr": 0.0008768421052631579, + "low_lr": 1.753684210526316e-05, + "step": 234 + }, + { + "epoch": 0.6153846153846154, + "high_lr": 0.0008768421052631579, + "low_lr": 1.753684210526316e-05, + "step": 234 + }, + { + "epoch": 0.6153846153846154, + "high_lr": 0.0008768421052631579, + "low_lr": 1.753684210526316e-05, + "step": 234 + }, + { + "epoch": 0.6153846153846154, + "high_lr": 0.0008768421052631579, + "low_lr": 1.753684210526316e-05, + "step": 234 + }, + { + "epoch": 0.6153846153846154, + "high_lr": 0.0008768421052631579, + "low_lr": 1.753684210526316e-05, + "step": 234 + }, + { + "epoch": 0.6153846153846154, + "high_lr": 0.0008768421052631579, + "low_lr": 1.753684210526316e-05, + "step": 234 + }, + { + "epoch": 0.6153846153846154, + "high_lr": 0.0008768421052631579, + "low_lr": 1.753684210526316e-05, + "step": 234 + }, + { + "epoch": 0.6153846153846154, + "high_lr": 0.0008768421052631579, + "low_lr": 1.753684210526316e-05, + "step": 234 + }, + { + "epoch": 0.6180144641683103, + "grad_norm": 0.8985382914543152, + "learning_rate": 0.0008763157894736841, + "loss": 1.7198, + "step": 235 + }, + { + "epoch": 0.6180144641683103, + "high_lr": 0.0008763157894736841, + "low_lr": 1.7526315789473683e-05, + "step": 235 + }, + { + "epoch": 0.6180144641683103, + "high_lr": 0.0008763157894736841, + "low_lr": 1.7526315789473683e-05, + "step": 235 + }, + { + "epoch": 0.6180144641683103, + "high_lr": 0.0008763157894736841, + "low_lr": 1.7526315789473683e-05, + "step": 235 + }, + { + "epoch": 0.6180144641683103, + "high_lr": 0.0008763157894736841, + "low_lr": 1.7526315789473683e-05, + "step": 235 + }, + { + "epoch": 0.6180144641683103, + "high_lr": 0.0008763157894736841, + "low_lr": 1.7526315789473683e-05, + "step": 235 + }, + { + "epoch": 0.6180144641683103, + "high_lr": 0.0008763157894736841, + "low_lr": 1.7526315789473683e-05, + "step": 235 + }, + { + "epoch": 0.6180144641683103, + "high_lr": 0.0008763157894736841, + "low_lr": 1.7526315789473683e-05, + "step": 235 + }, + { + "epoch": 0.6180144641683103, + "high_lr": 0.0008763157894736841, + "low_lr": 1.7526315789473683e-05, + "step": 235 + }, + { + "epoch": 0.6206443129520053, + "grad_norm": 0.8429329991340637, + "learning_rate": 0.0008757894736842105, + "loss": 1.6871, + "step": 236 + }, + { + "epoch": 0.6206443129520053, + "high_lr": 0.0008757894736842105, + "low_lr": 1.751578947368421e-05, + "step": 236 + }, + { + "epoch": 0.6206443129520053, + "high_lr": 0.0008757894736842105, + "low_lr": 1.751578947368421e-05, + "step": 236 + }, + { + "epoch": 0.6206443129520053, + "high_lr": 0.0008757894736842105, + "low_lr": 1.751578947368421e-05, + "step": 236 + }, + { + "epoch": 0.6206443129520053, + "high_lr": 0.0008757894736842105, + "low_lr": 1.751578947368421e-05, + "step": 236 + }, + { + "epoch": 0.6206443129520053, + "high_lr": 0.0008757894736842105, + "low_lr": 1.751578947368421e-05, + "step": 236 + }, + { + "epoch": 0.6206443129520053, + "high_lr": 0.0008757894736842105, + "low_lr": 1.751578947368421e-05, + "step": 236 + }, + { + "epoch": 0.6206443129520053, + "high_lr": 0.0008757894736842105, + "low_lr": 1.751578947368421e-05, + "step": 236 + }, + { + "epoch": 0.6206443129520053, + "high_lr": 0.0008757894736842105, + "low_lr": 1.751578947368421e-05, + "step": 236 + }, + { + "epoch": 0.6232741617357002, + "grad_norm": 0.8710259795188904, + "learning_rate": 0.0008752631578947368, + "loss": 1.6992, + "step": 237 + }, + { + "epoch": 0.6232741617357002, + "high_lr": 0.0008752631578947368, + "low_lr": 1.750526315789474e-05, + "step": 237 + }, + { + "epoch": 0.6232741617357002, + "high_lr": 0.0008752631578947368, + "low_lr": 1.750526315789474e-05, + "step": 237 + }, + { + "epoch": 0.6232741617357002, + "high_lr": 0.0008752631578947368, + "low_lr": 1.750526315789474e-05, + "step": 237 + }, + { + "epoch": 0.6232741617357002, + "high_lr": 0.0008752631578947368, + "low_lr": 1.750526315789474e-05, + "step": 237 + }, + { + "epoch": 0.6232741617357002, + "high_lr": 0.0008752631578947368, + "low_lr": 1.750526315789474e-05, + "step": 237 + }, + { + "epoch": 0.6232741617357002, + "high_lr": 0.0008752631578947368, + "low_lr": 1.750526315789474e-05, + "step": 237 + }, + { + "epoch": 0.6232741617357002, + "high_lr": 0.0008752631578947368, + "low_lr": 1.750526315789474e-05, + "step": 237 + }, + { + "epoch": 0.6232741617357002, + "high_lr": 0.0008752631578947368, + "low_lr": 1.750526315789474e-05, + "step": 237 + }, + { + "epoch": 0.6259040105193951, + "grad_norm": 0.9169574975967407, + "learning_rate": 0.0008747368421052632, + "loss": 1.7979, + "step": 238 + }, + { + "epoch": 0.6259040105193951, + "high_lr": 0.0008747368421052632, + "low_lr": 1.7494736842105264e-05, + "step": 238 + }, + { + "epoch": 0.6259040105193951, + "high_lr": 0.0008747368421052632, + "low_lr": 1.7494736842105264e-05, + "step": 238 + }, + { + "epoch": 0.6259040105193951, + "high_lr": 0.0008747368421052632, + "low_lr": 1.7494736842105264e-05, + "step": 238 + }, + { + "epoch": 0.6259040105193951, + "high_lr": 0.0008747368421052632, + "low_lr": 1.7494736842105264e-05, + "step": 238 + }, + { + "epoch": 0.6259040105193951, + "high_lr": 0.0008747368421052632, + "low_lr": 1.7494736842105264e-05, + "step": 238 + }, + { + "epoch": 0.6259040105193951, + "high_lr": 0.0008747368421052632, + "low_lr": 1.7494736842105264e-05, + "step": 238 + }, + { + "epoch": 0.6259040105193951, + "high_lr": 0.0008747368421052632, + "low_lr": 1.7494736842105264e-05, + "step": 238 + }, + { + "epoch": 0.6259040105193951, + "high_lr": 0.0008747368421052632, + "low_lr": 1.7494736842105264e-05, + "step": 238 + }, + { + "epoch": 0.62853385930309, + "grad_norm": 2.7285892963409424, + "learning_rate": 0.0008742105263157895, + "loss": 1.8977, + "step": 239 + }, + { + "epoch": 0.62853385930309, + "high_lr": 0.0008742105263157895, + "low_lr": 1.748421052631579e-05, + "step": 239 + }, + { + "epoch": 0.62853385930309, + "high_lr": 0.0008742105263157895, + "low_lr": 1.748421052631579e-05, + "step": 239 + }, + { + "epoch": 0.62853385930309, + "high_lr": 0.0008742105263157895, + "low_lr": 1.748421052631579e-05, + "step": 239 + }, + { + "epoch": 0.62853385930309, + "high_lr": 0.0008742105263157895, + "low_lr": 1.748421052631579e-05, + "step": 239 + }, + { + "epoch": 0.62853385930309, + "high_lr": 0.0008742105263157895, + "low_lr": 1.748421052631579e-05, + "step": 239 + }, + { + "epoch": 0.62853385930309, + "high_lr": 0.0008742105263157895, + "low_lr": 1.748421052631579e-05, + "step": 239 + }, + { + "epoch": 0.62853385930309, + "high_lr": 0.0008742105263157895, + "low_lr": 1.748421052631579e-05, + "step": 239 + }, + { + "epoch": 0.62853385930309, + "high_lr": 0.0008742105263157895, + "low_lr": 1.748421052631579e-05, + "step": 239 + }, + { + "epoch": 0.631163708086785, + "grad_norm": 0.8813413381576538, + "learning_rate": 0.0008736842105263159, + "loss": 1.676, + "step": 240 + }, + { + "epoch": 0.631163708086785, + "high_lr": 0.0008736842105263159, + "low_lr": 1.7473684210526317e-05, + "step": 240 + }, + { + "epoch": 0.631163708086785, + "high_lr": 0.0008736842105263159, + "low_lr": 1.7473684210526317e-05, + "step": 240 + }, + { + "epoch": 0.631163708086785, + "high_lr": 0.0008736842105263159, + "low_lr": 1.7473684210526317e-05, + "step": 240 + }, + { + "epoch": 0.631163708086785, + "high_lr": 0.0008736842105263159, + "low_lr": 1.7473684210526317e-05, + "step": 240 + }, + { + "epoch": 0.631163708086785, + "high_lr": 0.0008736842105263159, + "low_lr": 1.7473684210526317e-05, + "step": 240 + }, + { + "epoch": 0.631163708086785, + "high_lr": 0.0008736842105263159, + "low_lr": 1.7473684210526317e-05, + "step": 240 + }, + { + "epoch": 0.631163708086785, + "high_lr": 0.0008736842105263159, + "low_lr": 1.7473684210526317e-05, + "step": 240 + }, + { + "epoch": 0.631163708086785, + "high_lr": 0.0008736842105263159, + "low_lr": 1.7473684210526317e-05, + "step": 240 + }, + { + "epoch": 0.63379355687048, + "grad_norm": 0.8603155612945557, + "learning_rate": 0.0008731578947368422, + "loss": 1.6664, + "step": 241 + }, + { + "epoch": 0.63379355687048, + "high_lr": 0.0008731578947368422, + "low_lr": 1.7463157894736845e-05, + "step": 241 + }, + { + "epoch": 0.63379355687048, + "high_lr": 0.0008731578947368422, + "low_lr": 1.7463157894736845e-05, + "step": 241 + }, + { + "epoch": 0.63379355687048, + "high_lr": 0.0008731578947368422, + "low_lr": 1.7463157894736845e-05, + "step": 241 + }, + { + "epoch": 0.63379355687048, + "high_lr": 0.0008731578947368422, + "low_lr": 1.7463157894736845e-05, + "step": 241 + }, + { + "epoch": 0.63379355687048, + "high_lr": 0.0008731578947368422, + "low_lr": 1.7463157894736845e-05, + "step": 241 + }, + { + "epoch": 0.63379355687048, + "high_lr": 0.0008731578947368422, + "low_lr": 1.7463157894736845e-05, + "step": 241 + }, + { + "epoch": 0.63379355687048, + "high_lr": 0.0008731578947368422, + "low_lr": 1.7463157894736845e-05, + "step": 241 + }, + { + "epoch": 0.63379355687048, + "high_lr": 0.0008731578947368422, + "low_lr": 1.7463157894736845e-05, + "step": 241 + }, + { + "epoch": 0.6364234056541749, + "grad_norm": 0.935298502445221, + "learning_rate": 0.0008726315789473685, + "loss": 1.6882, + "step": 242 + }, + { + "epoch": 0.6364234056541749, + "high_lr": 0.0008726315789473685, + "low_lr": 1.745263157894737e-05, + "step": 242 + }, + { + "epoch": 0.6364234056541749, + "high_lr": 0.0008726315789473685, + "low_lr": 1.745263157894737e-05, + "step": 242 + }, + { + "epoch": 0.6364234056541749, + "high_lr": 0.0008726315789473685, + "low_lr": 1.745263157894737e-05, + "step": 242 + }, + { + "epoch": 0.6364234056541749, + "high_lr": 0.0008726315789473685, + "low_lr": 1.745263157894737e-05, + "step": 242 + }, + { + "epoch": 0.6364234056541749, + "high_lr": 0.0008726315789473685, + "low_lr": 1.745263157894737e-05, + "step": 242 + }, + { + "epoch": 0.6364234056541749, + "high_lr": 0.0008726315789473685, + "low_lr": 1.745263157894737e-05, + "step": 242 + }, + { + "epoch": 0.6364234056541749, + "high_lr": 0.0008726315789473685, + "low_lr": 1.745263157894737e-05, + "step": 242 + }, + { + "epoch": 0.6364234056541749, + "high_lr": 0.0008726315789473685, + "low_lr": 1.745263157894737e-05, + "step": 242 + }, + { + "epoch": 0.6390532544378699, + "grad_norm": 0.9325158596038818, + "learning_rate": 0.0008721052631578948, + "loss": 1.6877, + "step": 243 + }, + { + "epoch": 0.6390532544378699, + "high_lr": 0.0008721052631578948, + "low_lr": 1.7442105263157894e-05, + "step": 243 + }, + { + "epoch": 0.6390532544378699, + "high_lr": 0.0008721052631578948, + "low_lr": 1.7442105263157894e-05, + "step": 243 + }, + { + "epoch": 0.6390532544378699, + "high_lr": 0.0008721052631578948, + "low_lr": 1.7442105263157894e-05, + "step": 243 + }, + { + "epoch": 0.6390532544378699, + "high_lr": 0.0008721052631578948, + "low_lr": 1.7442105263157894e-05, + "step": 243 + }, + { + "epoch": 0.6390532544378699, + "high_lr": 0.0008721052631578948, + "low_lr": 1.7442105263157894e-05, + "step": 243 + }, + { + "epoch": 0.6390532544378699, + "high_lr": 0.0008721052631578948, + "low_lr": 1.7442105263157894e-05, + "step": 243 + }, + { + "epoch": 0.6390532544378699, + "high_lr": 0.0008721052631578948, + "low_lr": 1.7442105263157894e-05, + "step": 243 + }, + { + "epoch": 0.6390532544378699, + "high_lr": 0.0008721052631578948, + "low_lr": 1.7442105263157894e-05, + "step": 243 + }, + { + "epoch": 0.6416831032215647, + "grad_norm": 0.8132498860359192, + "learning_rate": 0.000871578947368421, + "loss": 1.7054, + "step": 244 + }, + { + "epoch": 0.6416831032215647, + "high_lr": 0.000871578947368421, + "low_lr": 1.7431578947368422e-05, + "step": 244 + }, + { + "epoch": 0.6416831032215647, + "high_lr": 0.000871578947368421, + "low_lr": 1.7431578947368422e-05, + "step": 244 + }, + { + "epoch": 0.6416831032215647, + "high_lr": 0.000871578947368421, + "low_lr": 1.7431578947368422e-05, + "step": 244 + }, + { + "epoch": 0.6416831032215647, + "high_lr": 0.000871578947368421, + "low_lr": 1.7431578947368422e-05, + "step": 244 + }, + { + "epoch": 0.6416831032215647, + "high_lr": 0.000871578947368421, + "low_lr": 1.7431578947368422e-05, + "step": 244 + }, + { + "epoch": 0.6416831032215647, + "high_lr": 0.000871578947368421, + "low_lr": 1.7431578947368422e-05, + "step": 244 + }, + { + "epoch": 0.6416831032215647, + "high_lr": 0.000871578947368421, + "low_lr": 1.7431578947368422e-05, + "step": 244 + }, + { + "epoch": 0.6416831032215647, + "high_lr": 0.000871578947368421, + "low_lr": 1.7431578947368422e-05, + "step": 244 + }, + { + "epoch": 0.6443129520052597, + "grad_norm": 1.1216504573822021, + "learning_rate": 0.0008710526315789474, + "loss": 1.6937, + "step": 245 + }, + { + "epoch": 0.6443129520052597, + "high_lr": 0.0008710526315789474, + "low_lr": 1.742105263157895e-05, + "step": 245 + }, + { + "epoch": 0.6443129520052597, + "high_lr": 0.0008710526315789474, + "low_lr": 1.742105263157895e-05, + "step": 245 + }, + { + "epoch": 0.6443129520052597, + "high_lr": 0.0008710526315789474, + "low_lr": 1.742105263157895e-05, + "step": 245 + }, + { + "epoch": 0.6443129520052597, + "high_lr": 0.0008710526315789474, + "low_lr": 1.742105263157895e-05, + "step": 245 + }, + { + "epoch": 0.6443129520052597, + "high_lr": 0.0008710526315789474, + "low_lr": 1.742105263157895e-05, + "step": 245 + }, + { + "epoch": 0.6443129520052597, + "high_lr": 0.0008710526315789474, + "low_lr": 1.742105263157895e-05, + "step": 245 + }, + { + "epoch": 0.6443129520052597, + "high_lr": 0.0008710526315789474, + "low_lr": 1.742105263157895e-05, + "step": 245 + }, + { + "epoch": 0.6443129520052597, + "high_lr": 0.0008710526315789474, + "low_lr": 1.742105263157895e-05, + "step": 245 + }, + { + "epoch": 0.6469428007889546, + "grad_norm": 0.8950498700141907, + "learning_rate": 0.0008705263157894737, + "loss": 1.6761, + "step": 246 + }, + { + "epoch": 0.6469428007889546, + "high_lr": 0.0008705263157894737, + "low_lr": 1.7410526315789475e-05, + "step": 246 + }, + { + "epoch": 0.6469428007889546, + "high_lr": 0.0008705263157894737, + "low_lr": 1.7410526315789475e-05, + "step": 246 + }, + { + "epoch": 0.6469428007889546, + "high_lr": 0.0008705263157894737, + "low_lr": 1.7410526315789475e-05, + "step": 246 + }, + { + "epoch": 0.6469428007889546, + "high_lr": 0.0008705263157894737, + "low_lr": 1.7410526315789475e-05, + "step": 246 + }, + { + "epoch": 0.6469428007889546, + "high_lr": 0.0008705263157894737, + "low_lr": 1.7410526315789475e-05, + "step": 246 + }, + { + "epoch": 0.6469428007889546, + "high_lr": 0.0008705263157894737, + "low_lr": 1.7410526315789475e-05, + "step": 246 + }, + { + "epoch": 0.6469428007889546, + "high_lr": 0.0008705263157894737, + "low_lr": 1.7410526315789475e-05, + "step": 246 + }, + { + "epoch": 0.6469428007889546, + "high_lr": 0.0008705263157894737, + "low_lr": 1.7410526315789475e-05, + "step": 246 + }, + { + "epoch": 0.6495726495726496, + "grad_norm": 0.9288367033004761, + "learning_rate": 0.00087, + "loss": 1.634, + "step": 247 + }, + { + "epoch": 0.6495726495726496, + "high_lr": 0.00087, + "low_lr": 1.7400000000000003e-05, + "step": 247 + }, + { + "epoch": 0.6495726495726496, + "high_lr": 0.00087, + "low_lr": 1.7400000000000003e-05, + "step": 247 + }, + { + "epoch": 0.6495726495726496, + "high_lr": 0.00087, + "low_lr": 1.7400000000000003e-05, + "step": 247 + }, + { + "epoch": 0.6495726495726496, + "high_lr": 0.00087, + "low_lr": 1.7400000000000003e-05, + "step": 247 + }, + { + "epoch": 0.6495726495726496, + "high_lr": 0.00087, + "low_lr": 1.7400000000000003e-05, + "step": 247 + }, + { + "epoch": 0.6495726495726496, + "high_lr": 0.00087, + "low_lr": 1.7400000000000003e-05, + "step": 247 + }, + { + "epoch": 0.6495726495726496, + "high_lr": 0.00087, + "low_lr": 1.7400000000000003e-05, + "step": 247 + }, + { + "epoch": 0.6495726495726496, + "high_lr": 0.00087, + "low_lr": 1.7400000000000003e-05, + "step": 247 + }, + { + "epoch": 0.6522024983563445, + "grad_norm": 0.8630435466766357, + "learning_rate": 0.0008694736842105263, + "loss": 1.695, + "step": 248 + }, + { + "epoch": 0.6522024983563445, + "high_lr": 0.0008694736842105263, + "low_lr": 1.7389473684210527e-05, + "step": 248 + }, + { + "epoch": 0.6522024983563445, + "high_lr": 0.0008694736842105263, + "low_lr": 1.7389473684210527e-05, + "step": 248 + }, + { + "epoch": 0.6522024983563445, + "high_lr": 0.0008694736842105263, + "low_lr": 1.7389473684210527e-05, + "step": 248 + }, + { + "epoch": 0.6522024983563445, + "high_lr": 0.0008694736842105263, + "low_lr": 1.7389473684210527e-05, + "step": 248 + }, + { + "epoch": 0.6522024983563445, + "high_lr": 0.0008694736842105263, + "low_lr": 1.7389473684210527e-05, + "step": 248 + }, + { + "epoch": 0.6522024983563445, + "high_lr": 0.0008694736842105263, + "low_lr": 1.7389473684210527e-05, + "step": 248 + }, + { + "epoch": 0.6522024983563445, + "high_lr": 0.0008694736842105263, + "low_lr": 1.7389473684210527e-05, + "step": 248 + }, + { + "epoch": 0.6522024983563445, + "high_lr": 0.0008694736842105263, + "low_lr": 1.7389473684210527e-05, + "step": 248 + }, + { + "epoch": 0.6548323471400395, + "grad_norm": 0.9438216090202332, + "learning_rate": 0.0008689473684210526, + "loss": 1.7483, + "step": 249 + }, + { + "epoch": 0.6548323471400395, + "high_lr": 0.0008689473684210526, + "low_lr": 1.7378947368421052e-05, + "step": 249 + }, + { + "epoch": 0.6548323471400395, + "high_lr": 0.0008689473684210526, + "low_lr": 1.7378947368421052e-05, + "step": 249 + }, + { + "epoch": 0.6548323471400395, + "high_lr": 0.0008689473684210526, + "low_lr": 1.7378947368421052e-05, + "step": 249 + }, + { + "epoch": 0.6548323471400395, + "high_lr": 0.0008689473684210526, + "low_lr": 1.7378947368421052e-05, + "step": 249 + }, + { + "epoch": 0.6548323471400395, + "high_lr": 0.0008689473684210526, + "low_lr": 1.7378947368421052e-05, + "step": 249 + }, + { + "epoch": 0.6548323471400395, + "high_lr": 0.0008689473684210526, + "low_lr": 1.7378947368421052e-05, + "step": 249 + }, + { + "epoch": 0.6548323471400395, + "high_lr": 0.0008689473684210526, + "low_lr": 1.7378947368421052e-05, + "step": 249 + }, + { + "epoch": 0.6548323471400395, + "high_lr": 0.0008689473684210526, + "low_lr": 1.7378947368421052e-05, + "step": 249 + }, + { + "epoch": 0.6574621959237343, + "grad_norm": 0.9484896063804626, + "learning_rate": 0.000868421052631579, + "loss": 1.7012, + "step": 250 + }, + { + "epoch": 0.6574621959237343, + "high_lr": 0.000868421052631579, + "low_lr": 1.736842105263158e-05, + "step": 250 + }, + { + "epoch": 0.6574621959237343, + "high_lr": 0.000868421052631579, + "low_lr": 1.736842105263158e-05, + "step": 250 + }, + { + "epoch": 0.6574621959237343, + "high_lr": 0.000868421052631579, + "low_lr": 1.736842105263158e-05, + "step": 250 + }, + { + "epoch": 0.6574621959237343, + "high_lr": 0.000868421052631579, + "low_lr": 1.736842105263158e-05, + "step": 250 + }, + { + "epoch": 0.6574621959237343, + "high_lr": 0.000868421052631579, + "low_lr": 1.736842105263158e-05, + "step": 250 + }, + { + "epoch": 0.6574621959237343, + "high_lr": 0.000868421052631579, + "low_lr": 1.736842105263158e-05, + "step": 250 + }, + { + "epoch": 0.6574621959237343, + "high_lr": 0.000868421052631579, + "low_lr": 1.736842105263158e-05, + "step": 250 + }, + { + "epoch": 0.6574621959237343, + "high_lr": 0.000868421052631579, + "low_lr": 1.736842105263158e-05, + "step": 250 + }, + { + "epoch": 0.6600920447074293, + "grad_norm": 0.886074423789978, + "learning_rate": 0.0008678947368421053, + "loss": 1.6723, + "step": 251 + }, + { + "epoch": 0.6600920447074293, + "high_lr": 0.0008678947368421053, + "low_lr": 1.7357894736842108e-05, + "step": 251 + }, + { + "epoch": 0.6600920447074293, + "high_lr": 0.0008678947368421053, + "low_lr": 1.7357894736842108e-05, + "step": 251 + }, + { + "epoch": 0.6600920447074293, + "high_lr": 0.0008678947368421053, + "low_lr": 1.7357894736842108e-05, + "step": 251 + }, + { + "epoch": 0.6600920447074293, + "high_lr": 0.0008678947368421053, + "low_lr": 1.7357894736842108e-05, + "step": 251 + }, + { + "epoch": 0.6600920447074293, + "high_lr": 0.0008678947368421053, + "low_lr": 1.7357894736842108e-05, + "step": 251 + }, + { + "epoch": 0.6600920447074293, + "high_lr": 0.0008678947368421053, + "low_lr": 1.7357894736842108e-05, + "step": 251 + }, + { + "epoch": 0.6600920447074293, + "high_lr": 0.0008678947368421053, + "low_lr": 1.7357894736842108e-05, + "step": 251 + }, + { + "epoch": 0.6600920447074293, + "high_lr": 0.0008678947368421053, + "low_lr": 1.7357894736842108e-05, + "step": 251 + }, + { + "epoch": 0.6627218934911243, + "grad_norm": 0.9116823673248291, + "learning_rate": 0.0008673684210526315, + "loss": 1.7146, + "step": 252 + }, + { + "epoch": 0.6627218934911243, + "high_lr": 0.0008673684210526315, + "low_lr": 1.7347368421052633e-05, + "step": 252 + }, + { + "epoch": 0.6627218934911243, + "high_lr": 0.0008673684210526315, + "low_lr": 1.7347368421052633e-05, + "step": 252 + }, + { + "epoch": 0.6627218934911243, + "high_lr": 0.0008673684210526315, + "low_lr": 1.7347368421052633e-05, + "step": 252 + }, + { + "epoch": 0.6627218934911243, + "high_lr": 0.0008673684210526315, + "low_lr": 1.7347368421052633e-05, + "step": 252 + }, + { + "epoch": 0.6627218934911243, + "high_lr": 0.0008673684210526315, + "low_lr": 1.7347368421052633e-05, + "step": 252 + }, + { + "epoch": 0.6627218934911243, + "high_lr": 0.0008673684210526315, + "low_lr": 1.7347368421052633e-05, + "step": 252 + }, + { + "epoch": 0.6627218934911243, + "high_lr": 0.0008673684210526315, + "low_lr": 1.7347368421052633e-05, + "step": 252 + }, + { + "epoch": 0.6627218934911243, + "high_lr": 0.0008673684210526315, + "low_lr": 1.7347368421052633e-05, + "step": 252 + }, + { + "epoch": 0.6653517422748192, + "grad_norm": 0.8356298208236694, + "learning_rate": 0.0008668421052631578, + "loss": 1.658, + "step": 253 + }, + { + "epoch": 0.6653517422748192, + "high_lr": 0.0008668421052631578, + "low_lr": 1.7336842105263157e-05, + "step": 253 + }, + { + "epoch": 0.6653517422748192, + "high_lr": 0.0008668421052631578, + "low_lr": 1.7336842105263157e-05, + "step": 253 + }, + { + "epoch": 0.6653517422748192, + "high_lr": 0.0008668421052631578, + "low_lr": 1.7336842105263157e-05, + "step": 253 + }, + { + "epoch": 0.6653517422748192, + "high_lr": 0.0008668421052631578, + "low_lr": 1.7336842105263157e-05, + "step": 253 + }, + { + "epoch": 0.6653517422748192, + "high_lr": 0.0008668421052631578, + "low_lr": 1.7336842105263157e-05, + "step": 253 + }, + { + "epoch": 0.6653517422748192, + "high_lr": 0.0008668421052631578, + "low_lr": 1.7336842105263157e-05, + "step": 253 + }, + { + "epoch": 0.6653517422748192, + "high_lr": 0.0008668421052631578, + "low_lr": 1.7336842105263157e-05, + "step": 253 + }, + { + "epoch": 0.6653517422748192, + "high_lr": 0.0008668421052631578, + "low_lr": 1.7336842105263157e-05, + "step": 253 + }, + { + "epoch": 0.6679815910585142, + "grad_norm": 0.9419047236442566, + "learning_rate": 0.0008663157894736843, + "loss": 1.7317, + "step": 254 + }, + { + "epoch": 0.6679815910585142, + "high_lr": 0.0008663157894736843, + "low_lr": 1.7326315789473685e-05, + "step": 254 + }, + { + "epoch": 0.6679815910585142, + "high_lr": 0.0008663157894736843, + "low_lr": 1.7326315789473685e-05, + "step": 254 + }, + { + "epoch": 0.6679815910585142, + "high_lr": 0.0008663157894736843, + "low_lr": 1.7326315789473685e-05, + "step": 254 + }, + { + "epoch": 0.6679815910585142, + "high_lr": 0.0008663157894736843, + "low_lr": 1.7326315789473685e-05, + "step": 254 + }, + { + "epoch": 0.6679815910585142, + "high_lr": 0.0008663157894736843, + "low_lr": 1.7326315789473685e-05, + "step": 254 + }, + { + "epoch": 0.6679815910585142, + "high_lr": 0.0008663157894736843, + "low_lr": 1.7326315789473685e-05, + "step": 254 + }, + { + "epoch": 0.6679815910585142, + "high_lr": 0.0008663157894736843, + "low_lr": 1.7326315789473685e-05, + "step": 254 + }, + { + "epoch": 0.6679815910585142, + "high_lr": 0.0008663157894736843, + "low_lr": 1.7326315789473685e-05, + "step": 254 + }, + { + "epoch": 0.6706114398422091, + "grad_norm": 0.8268232941627502, + "learning_rate": 0.0008657894736842106, + "loss": 1.6354, + "step": 255 + }, + { + "epoch": 0.6706114398422091, + "high_lr": 0.0008657894736842106, + "low_lr": 1.7315789473684213e-05, + "step": 255 + }, + { + "epoch": 0.6706114398422091, + "high_lr": 0.0008657894736842106, + "low_lr": 1.7315789473684213e-05, + "step": 255 + }, + { + "epoch": 0.6706114398422091, + "high_lr": 0.0008657894736842106, + "low_lr": 1.7315789473684213e-05, + "step": 255 + }, + { + "epoch": 0.6706114398422091, + "high_lr": 0.0008657894736842106, + "low_lr": 1.7315789473684213e-05, + "step": 255 + }, + { + "epoch": 0.6706114398422091, + "high_lr": 0.0008657894736842106, + "low_lr": 1.7315789473684213e-05, + "step": 255 + }, + { + "epoch": 0.6706114398422091, + "high_lr": 0.0008657894736842106, + "low_lr": 1.7315789473684213e-05, + "step": 255 + }, + { + "epoch": 0.6706114398422091, + "high_lr": 0.0008657894736842106, + "low_lr": 1.7315789473684213e-05, + "step": 255 + }, + { + "epoch": 0.6706114398422091, + "high_lr": 0.0008657894736842106, + "low_lr": 1.7315789473684213e-05, + "step": 255 + }, + { + "epoch": 0.673241288625904, + "grad_norm": 0.8773369193077087, + "learning_rate": 0.0008652631578947369, + "loss": 1.6979, + "step": 256 + }, + { + "epoch": 0.673241288625904, + "high_lr": 0.0008652631578947369, + "low_lr": 1.7305263157894738e-05, + "step": 256 + }, + { + "epoch": 0.673241288625904, + "high_lr": 0.0008652631578947369, + "low_lr": 1.7305263157894738e-05, + "step": 256 + }, + { + "epoch": 0.673241288625904, + "high_lr": 0.0008652631578947369, + "low_lr": 1.7305263157894738e-05, + "step": 256 + }, + { + "epoch": 0.673241288625904, + "high_lr": 0.0008652631578947369, + "low_lr": 1.7305263157894738e-05, + "step": 256 + }, + { + "epoch": 0.673241288625904, + "high_lr": 0.0008652631578947369, + "low_lr": 1.7305263157894738e-05, + "step": 256 + }, + { + "epoch": 0.673241288625904, + "high_lr": 0.0008652631578947369, + "low_lr": 1.7305263157894738e-05, + "step": 256 + }, + { + "epoch": 0.673241288625904, + "high_lr": 0.0008652631578947369, + "low_lr": 1.7305263157894738e-05, + "step": 256 + }, + { + "epoch": 0.673241288625904, + "high_lr": 0.0008652631578947369, + "low_lr": 1.7305263157894738e-05, + "step": 256 + }, + { + "epoch": 0.6758711374095989, + "grad_norm": 1.0301709175109863, + "learning_rate": 0.0008647368421052632, + "loss": 1.6949, + "step": 257 + }, + { + "epoch": 0.6758711374095989, + "high_lr": 0.0008647368421052632, + "low_lr": 1.7294736842105263e-05, + "step": 257 + }, + { + "epoch": 0.6758711374095989, + "high_lr": 0.0008647368421052632, + "low_lr": 1.7294736842105263e-05, + "step": 257 + }, + { + "epoch": 0.6758711374095989, + "high_lr": 0.0008647368421052632, + "low_lr": 1.7294736842105263e-05, + "step": 257 + }, + { + "epoch": 0.6758711374095989, + "high_lr": 0.0008647368421052632, + "low_lr": 1.7294736842105263e-05, + "step": 257 + }, + { + "epoch": 0.6758711374095989, + "high_lr": 0.0008647368421052632, + "low_lr": 1.7294736842105263e-05, + "step": 257 + }, + { + "epoch": 0.6758711374095989, + "high_lr": 0.0008647368421052632, + "low_lr": 1.7294736842105263e-05, + "step": 257 + }, + { + "epoch": 0.6758711374095989, + "high_lr": 0.0008647368421052632, + "low_lr": 1.7294736842105263e-05, + "step": 257 + }, + { + "epoch": 0.6758711374095989, + "high_lr": 0.0008647368421052632, + "low_lr": 1.7294736842105263e-05, + "step": 257 + }, + { + "epoch": 0.6785009861932939, + "grad_norm": 1.0185023546218872, + "learning_rate": 0.0008642105263157895, + "loss": 1.6434, + "step": 258 + }, + { + "epoch": 0.6785009861932939, + "high_lr": 0.0008642105263157895, + "low_lr": 1.728421052631579e-05, + "step": 258 + }, + { + "epoch": 0.6785009861932939, + "high_lr": 0.0008642105263157895, + "low_lr": 1.728421052631579e-05, + "step": 258 + }, + { + "epoch": 0.6785009861932939, + "high_lr": 0.0008642105263157895, + "low_lr": 1.728421052631579e-05, + "step": 258 + }, + { + "epoch": 0.6785009861932939, + "high_lr": 0.0008642105263157895, + "low_lr": 1.728421052631579e-05, + "step": 258 + }, + { + "epoch": 0.6785009861932939, + "high_lr": 0.0008642105263157895, + "low_lr": 1.728421052631579e-05, + "step": 258 + }, + { + "epoch": 0.6785009861932939, + "high_lr": 0.0008642105263157895, + "low_lr": 1.728421052631579e-05, + "step": 258 + }, + { + "epoch": 0.6785009861932939, + "high_lr": 0.0008642105263157895, + "low_lr": 1.728421052631579e-05, + "step": 258 + }, + { + "epoch": 0.6785009861932939, + "high_lr": 0.0008642105263157895, + "low_lr": 1.728421052631579e-05, + "step": 258 + }, + { + "epoch": 0.6811308349769888, + "grad_norm": 0.9030142426490784, + "learning_rate": 0.0008636842105263159, + "loss": 1.6707, + "step": 259 + }, + { + "epoch": 0.6811308349769888, + "high_lr": 0.0008636842105263159, + "low_lr": 1.727368421052632e-05, + "step": 259 + }, + { + "epoch": 0.6811308349769888, + "high_lr": 0.0008636842105263159, + "low_lr": 1.727368421052632e-05, + "step": 259 + }, + { + "epoch": 0.6811308349769888, + "high_lr": 0.0008636842105263159, + "low_lr": 1.727368421052632e-05, + "step": 259 + }, + { + "epoch": 0.6811308349769888, + "high_lr": 0.0008636842105263159, + "low_lr": 1.727368421052632e-05, + "step": 259 + }, + { + "epoch": 0.6811308349769888, + "high_lr": 0.0008636842105263159, + "low_lr": 1.727368421052632e-05, + "step": 259 + }, + { + "epoch": 0.6811308349769888, + "high_lr": 0.0008636842105263159, + "low_lr": 1.727368421052632e-05, + "step": 259 + }, + { + "epoch": 0.6811308349769888, + "high_lr": 0.0008636842105263159, + "low_lr": 1.727368421052632e-05, + "step": 259 + }, + { + "epoch": 0.6811308349769888, + "high_lr": 0.0008636842105263159, + "low_lr": 1.727368421052632e-05, + "step": 259 + }, + { + "epoch": 0.6837606837606838, + "grad_norm": 0.9696186184883118, + "learning_rate": 0.0008631578947368422, + "loss": 1.698, + "step": 260 + }, + { + "epoch": 0.6837606837606838, + "high_lr": 0.0008631578947368422, + "low_lr": 1.7263157894736843e-05, + "step": 260 + }, + { + "epoch": 0.6837606837606838, + "high_lr": 0.0008631578947368422, + "low_lr": 1.7263157894736843e-05, + "step": 260 + }, + { + "epoch": 0.6837606837606838, + "high_lr": 0.0008631578947368422, + "low_lr": 1.7263157894736843e-05, + "step": 260 + }, + { + "epoch": 0.6837606837606838, + "high_lr": 0.0008631578947368422, + "low_lr": 1.7263157894736843e-05, + "step": 260 + }, + { + "epoch": 0.6837606837606838, + "high_lr": 0.0008631578947368422, + "low_lr": 1.7263157894736843e-05, + "step": 260 + }, + { + "epoch": 0.6837606837606838, + "high_lr": 0.0008631578947368422, + "low_lr": 1.7263157894736843e-05, + "step": 260 + }, + { + "epoch": 0.6837606837606838, + "high_lr": 0.0008631578947368422, + "low_lr": 1.7263157894736843e-05, + "step": 260 + }, + { + "epoch": 0.6837606837606838, + "high_lr": 0.0008631578947368422, + "low_lr": 1.7263157894736843e-05, + "step": 260 + }, + { + "epoch": 0.6863905325443787, + "grad_norm": 0.871801495552063, + "learning_rate": 0.0008626315789473684, + "loss": 1.7287, + "step": 261 + }, + { + "epoch": 0.6863905325443787, + "high_lr": 0.0008626315789473684, + "low_lr": 1.725263157894737e-05, + "step": 261 + }, + { + "epoch": 0.6863905325443787, + "high_lr": 0.0008626315789473684, + "low_lr": 1.725263157894737e-05, + "step": 261 + }, + { + "epoch": 0.6863905325443787, + "high_lr": 0.0008626315789473684, + "low_lr": 1.725263157894737e-05, + "step": 261 + }, + { + "epoch": 0.6863905325443787, + "high_lr": 0.0008626315789473684, + "low_lr": 1.725263157894737e-05, + "step": 261 + }, + { + "epoch": 0.6863905325443787, + "high_lr": 0.0008626315789473684, + "low_lr": 1.725263157894737e-05, + "step": 261 + }, + { + "epoch": 0.6863905325443787, + "high_lr": 0.0008626315789473684, + "low_lr": 1.725263157894737e-05, + "step": 261 + }, + { + "epoch": 0.6863905325443787, + "high_lr": 0.0008626315789473684, + "low_lr": 1.725263157894737e-05, + "step": 261 + }, + { + "epoch": 0.6863905325443787, + "high_lr": 0.0008626315789473684, + "low_lr": 1.725263157894737e-05, + "step": 261 + }, + { + "epoch": 0.6890203813280736, + "grad_norm": 1.0416325330734253, + "learning_rate": 0.0008621052631578947, + "loss": 1.7148, + "step": 262 + }, + { + "epoch": 0.6890203813280736, + "high_lr": 0.0008621052631578947, + "low_lr": 1.7242105263157896e-05, + "step": 262 + }, + { + "epoch": 0.6890203813280736, + "high_lr": 0.0008621052631578947, + "low_lr": 1.7242105263157896e-05, + "step": 262 + }, + { + "epoch": 0.6890203813280736, + "high_lr": 0.0008621052631578947, + "low_lr": 1.7242105263157896e-05, + "step": 262 + }, + { + "epoch": 0.6890203813280736, + "high_lr": 0.0008621052631578947, + "low_lr": 1.7242105263157896e-05, + "step": 262 + }, + { + "epoch": 0.6890203813280736, + "high_lr": 0.0008621052631578947, + "low_lr": 1.7242105263157896e-05, + "step": 262 + }, + { + "epoch": 0.6890203813280736, + "high_lr": 0.0008621052631578947, + "low_lr": 1.7242105263157896e-05, + "step": 262 + }, + { + "epoch": 0.6890203813280736, + "high_lr": 0.0008621052631578947, + "low_lr": 1.7242105263157896e-05, + "step": 262 + }, + { + "epoch": 0.6890203813280736, + "high_lr": 0.0008621052631578947, + "low_lr": 1.7242105263157896e-05, + "step": 262 + }, + { + "epoch": 0.6916502301117686, + "grad_norm": 0.8305730223655701, + "learning_rate": 0.000861578947368421, + "loss": 1.6686, + "step": 263 + }, + { + "epoch": 0.6916502301117686, + "high_lr": 0.000861578947368421, + "low_lr": 1.723157894736842e-05, + "step": 263 + }, + { + "epoch": 0.6916502301117686, + "high_lr": 0.000861578947368421, + "low_lr": 1.723157894736842e-05, + "step": 263 + }, + { + "epoch": 0.6916502301117686, + "high_lr": 0.000861578947368421, + "low_lr": 1.723157894736842e-05, + "step": 263 + }, + { + "epoch": 0.6916502301117686, + "high_lr": 0.000861578947368421, + "low_lr": 1.723157894736842e-05, + "step": 263 + }, + { + "epoch": 0.6916502301117686, + "high_lr": 0.000861578947368421, + "low_lr": 1.723157894736842e-05, + "step": 263 + }, + { + "epoch": 0.6916502301117686, + "high_lr": 0.000861578947368421, + "low_lr": 1.723157894736842e-05, + "step": 263 + }, + { + "epoch": 0.6916502301117686, + "high_lr": 0.000861578947368421, + "low_lr": 1.723157894736842e-05, + "step": 263 + }, + { + "epoch": 0.6916502301117686, + "high_lr": 0.000861578947368421, + "low_lr": 1.723157894736842e-05, + "step": 263 + }, + { + "epoch": 0.6942800788954635, + "grad_norm": 1.1706870794296265, + "learning_rate": 0.0008610526315789474, + "loss": 1.7173, + "step": 264 + }, + { + "epoch": 0.6942800788954635, + "high_lr": 0.0008610526315789474, + "low_lr": 1.722105263157895e-05, + "step": 264 + }, + { + "epoch": 0.6942800788954635, + "high_lr": 0.0008610526315789474, + "low_lr": 1.722105263157895e-05, + "step": 264 + }, + { + "epoch": 0.6942800788954635, + "high_lr": 0.0008610526315789474, + "low_lr": 1.722105263157895e-05, + "step": 264 + }, + { + "epoch": 0.6942800788954635, + "high_lr": 0.0008610526315789474, + "low_lr": 1.722105263157895e-05, + "step": 264 + }, + { + "epoch": 0.6942800788954635, + "high_lr": 0.0008610526315789474, + "low_lr": 1.722105263157895e-05, + "step": 264 + }, + { + "epoch": 0.6942800788954635, + "high_lr": 0.0008610526315789474, + "low_lr": 1.722105263157895e-05, + "step": 264 + }, + { + "epoch": 0.6942800788954635, + "high_lr": 0.0008610526315789474, + "low_lr": 1.722105263157895e-05, + "step": 264 + }, + { + "epoch": 0.6942800788954635, + "high_lr": 0.0008610526315789474, + "low_lr": 1.722105263157895e-05, + "step": 264 + }, + { + "epoch": 0.6969099276791585, + "grad_norm": 0.9050807952880859, + "learning_rate": 0.0008605263157894737, + "loss": 1.6596, + "step": 265 + }, + { + "epoch": 0.6969099276791585, + "high_lr": 0.0008605263157894737, + "low_lr": 1.7210526315789477e-05, + "step": 265 + }, + { + "epoch": 0.6969099276791585, + "high_lr": 0.0008605263157894737, + "low_lr": 1.7210526315789477e-05, + "step": 265 + }, + { + "epoch": 0.6969099276791585, + "high_lr": 0.0008605263157894737, + "low_lr": 1.7210526315789477e-05, + "step": 265 + }, + { + "epoch": 0.6969099276791585, + "high_lr": 0.0008605263157894737, + "low_lr": 1.7210526315789477e-05, + "step": 265 + }, + { + "epoch": 0.6969099276791585, + "high_lr": 0.0008605263157894737, + "low_lr": 1.7210526315789477e-05, + "step": 265 + }, + { + "epoch": 0.6969099276791585, + "high_lr": 0.0008605263157894737, + "low_lr": 1.7210526315789477e-05, + "step": 265 + }, + { + "epoch": 0.6969099276791585, + "high_lr": 0.0008605263157894737, + "low_lr": 1.7210526315789477e-05, + "step": 265 + }, + { + "epoch": 0.6969099276791585, + "high_lr": 0.0008605263157894737, + "low_lr": 1.7210526315789477e-05, + "step": 265 + }, + { + "epoch": 0.6995397764628534, + "grad_norm": 0.9089176058769226, + "learning_rate": 0.00086, + "loss": 1.6438, + "step": 266 + }, + { + "epoch": 0.6995397764628534, + "high_lr": 0.00086, + "low_lr": 1.72e-05, + "step": 266 + }, + { + "epoch": 0.6995397764628534, + "high_lr": 0.00086, + "low_lr": 1.72e-05, + "step": 266 + }, + { + "epoch": 0.6995397764628534, + "high_lr": 0.00086, + "low_lr": 1.72e-05, + "step": 266 + }, + { + "epoch": 0.6995397764628534, + "high_lr": 0.00086, + "low_lr": 1.72e-05, + "step": 266 + }, + { + "epoch": 0.6995397764628534, + "high_lr": 0.00086, + "low_lr": 1.72e-05, + "step": 266 + }, + { + "epoch": 0.6995397764628534, + "high_lr": 0.00086, + "low_lr": 1.72e-05, + "step": 266 + }, + { + "epoch": 0.6995397764628534, + "high_lr": 0.00086, + "low_lr": 1.72e-05, + "step": 266 + }, + { + "epoch": 0.6995397764628534, + "high_lr": 0.00086, + "low_lr": 1.72e-05, + "step": 266 + }, + { + "epoch": 0.7021696252465484, + "grad_norm": 1.1428871154785156, + "learning_rate": 0.0008594736842105263, + "loss": 1.6189, + "step": 267 + }, + { + "epoch": 0.7021696252465484, + "high_lr": 0.0008594736842105263, + "low_lr": 1.7189473684210526e-05, + "step": 267 + }, + { + "epoch": 0.7021696252465484, + "high_lr": 0.0008594736842105263, + "low_lr": 1.7189473684210526e-05, + "step": 267 + }, + { + "epoch": 0.7021696252465484, + "high_lr": 0.0008594736842105263, + "low_lr": 1.7189473684210526e-05, + "step": 267 + }, + { + "epoch": 0.7021696252465484, + "high_lr": 0.0008594736842105263, + "low_lr": 1.7189473684210526e-05, + "step": 267 + }, + { + "epoch": 0.7021696252465484, + "high_lr": 0.0008594736842105263, + "low_lr": 1.7189473684210526e-05, + "step": 267 + }, + { + "epoch": 0.7021696252465484, + "high_lr": 0.0008594736842105263, + "low_lr": 1.7189473684210526e-05, + "step": 267 + }, + { + "epoch": 0.7021696252465484, + "high_lr": 0.0008594736842105263, + "low_lr": 1.7189473684210526e-05, + "step": 267 + }, + { + "epoch": 0.7021696252465484, + "high_lr": 0.0008594736842105263, + "low_lr": 1.7189473684210526e-05, + "step": 267 + }, + { + "epoch": 0.7047994740302432, + "grad_norm": 0.8949025273323059, + "learning_rate": 0.0008589473684210527, + "loss": 1.6549, + "step": 268 + }, + { + "epoch": 0.7047994740302432, + "high_lr": 0.0008589473684210527, + "low_lr": 1.7178947368421054e-05, + "step": 268 + }, + { + "epoch": 0.7047994740302432, + "high_lr": 0.0008589473684210527, + "low_lr": 1.7178947368421054e-05, + "step": 268 + }, + { + "epoch": 0.7047994740302432, + "high_lr": 0.0008589473684210527, + "low_lr": 1.7178947368421054e-05, + "step": 268 + }, + { + "epoch": 0.7047994740302432, + "high_lr": 0.0008589473684210527, + "low_lr": 1.7178947368421054e-05, + "step": 268 + }, + { + "epoch": 0.7047994740302432, + "high_lr": 0.0008589473684210527, + "low_lr": 1.7178947368421054e-05, + "step": 268 + }, + { + "epoch": 0.7047994740302432, + "high_lr": 0.0008589473684210527, + "low_lr": 1.7178947368421054e-05, + "step": 268 + }, + { + "epoch": 0.7047994740302432, + "high_lr": 0.0008589473684210527, + "low_lr": 1.7178947368421054e-05, + "step": 268 + }, + { + "epoch": 0.7047994740302432, + "high_lr": 0.0008589473684210527, + "low_lr": 1.7178947368421054e-05, + "step": 268 + }, + { + "epoch": 0.7074293228139382, + "grad_norm": 0.8947936296463013, + "learning_rate": 0.0008584210526315789, + "loss": 1.6635, + "step": 269 + }, + { + "epoch": 0.7074293228139382, + "high_lr": 0.0008584210526315789, + "low_lr": 1.7168421052631582e-05, + "step": 269 + }, + { + "epoch": 0.7074293228139382, + "high_lr": 0.0008584210526315789, + "low_lr": 1.7168421052631582e-05, + "step": 269 + }, + { + "epoch": 0.7074293228139382, + "high_lr": 0.0008584210526315789, + "low_lr": 1.7168421052631582e-05, + "step": 269 + }, + { + "epoch": 0.7074293228139382, + "high_lr": 0.0008584210526315789, + "low_lr": 1.7168421052631582e-05, + "step": 269 + }, + { + "epoch": 0.7074293228139382, + "high_lr": 0.0008584210526315789, + "low_lr": 1.7168421052631582e-05, + "step": 269 + }, + { + "epoch": 0.7074293228139382, + "high_lr": 0.0008584210526315789, + "low_lr": 1.7168421052631582e-05, + "step": 269 + }, + { + "epoch": 0.7074293228139382, + "high_lr": 0.0008584210526315789, + "low_lr": 1.7168421052631582e-05, + "step": 269 + }, + { + "epoch": 0.7074293228139382, + "high_lr": 0.0008584210526315789, + "low_lr": 1.7168421052631582e-05, + "step": 269 + }, + { + "epoch": 0.7100591715976331, + "grad_norm": 0.8887916803359985, + "learning_rate": 0.0008578947368421052, + "loss": 1.65, + "step": 270 + }, + { + "epoch": 0.7100591715976331, + "high_lr": 0.0008578947368421052, + "low_lr": 1.7157894736842107e-05, + "step": 270 + }, + { + "epoch": 0.7100591715976331, + "high_lr": 0.0008578947368421052, + "low_lr": 1.7157894736842107e-05, + "step": 270 + }, + { + "epoch": 0.7100591715976331, + "high_lr": 0.0008578947368421052, + "low_lr": 1.7157894736842107e-05, + "step": 270 + }, + { + "epoch": 0.7100591715976331, + "high_lr": 0.0008578947368421052, + "low_lr": 1.7157894736842107e-05, + "step": 270 + }, + { + "epoch": 0.7100591715976331, + "high_lr": 0.0008578947368421052, + "low_lr": 1.7157894736842107e-05, + "step": 270 + }, + { + "epoch": 0.7100591715976331, + "high_lr": 0.0008578947368421052, + "low_lr": 1.7157894736842107e-05, + "step": 270 + }, + { + "epoch": 0.7100591715976331, + "high_lr": 0.0008578947368421052, + "low_lr": 1.7157894736842107e-05, + "step": 270 + }, + { + "epoch": 0.7100591715976331, + "high_lr": 0.0008578947368421052, + "low_lr": 1.7157894736842107e-05, + "step": 270 + }, + { + "epoch": 0.7126890203813281, + "grad_norm": 0.9368761777877808, + "learning_rate": 0.0008573684210526316, + "loss": 1.6606, + "step": 271 + }, + { + "epoch": 0.7126890203813281, + "high_lr": 0.0008573684210526316, + "low_lr": 1.714736842105263e-05, + "step": 271 + }, + { + "epoch": 0.7126890203813281, + "high_lr": 0.0008573684210526316, + "low_lr": 1.714736842105263e-05, + "step": 271 + }, + { + "epoch": 0.7126890203813281, + "high_lr": 0.0008573684210526316, + "low_lr": 1.714736842105263e-05, + "step": 271 + }, + { + "epoch": 0.7126890203813281, + "high_lr": 0.0008573684210526316, + "low_lr": 1.714736842105263e-05, + "step": 271 + }, + { + "epoch": 0.7126890203813281, + "high_lr": 0.0008573684210526316, + "low_lr": 1.714736842105263e-05, + "step": 271 + }, + { + "epoch": 0.7126890203813281, + "high_lr": 0.0008573684210526316, + "low_lr": 1.714736842105263e-05, + "step": 271 + }, + { + "epoch": 0.7126890203813281, + "high_lr": 0.0008573684210526316, + "low_lr": 1.714736842105263e-05, + "step": 271 + }, + { + "epoch": 0.7126890203813281, + "high_lr": 0.0008573684210526316, + "low_lr": 1.714736842105263e-05, + "step": 271 + }, + { + "epoch": 0.715318869165023, + "grad_norm": 0.9301985502243042, + "learning_rate": 0.0008568421052631579, + "loss": 1.637, + "step": 272 + }, + { + "epoch": 0.715318869165023, + "high_lr": 0.0008568421052631579, + "low_lr": 1.713684210526316e-05, + "step": 272 + }, + { + "epoch": 0.715318869165023, + "high_lr": 0.0008568421052631579, + "low_lr": 1.713684210526316e-05, + "step": 272 + }, + { + "epoch": 0.715318869165023, + "high_lr": 0.0008568421052631579, + "low_lr": 1.713684210526316e-05, + "step": 272 + }, + { + "epoch": 0.715318869165023, + "high_lr": 0.0008568421052631579, + "low_lr": 1.713684210526316e-05, + "step": 272 + }, + { + "epoch": 0.715318869165023, + "high_lr": 0.0008568421052631579, + "low_lr": 1.713684210526316e-05, + "step": 272 + }, + { + "epoch": 0.715318869165023, + "high_lr": 0.0008568421052631579, + "low_lr": 1.713684210526316e-05, + "step": 272 + }, + { + "epoch": 0.715318869165023, + "high_lr": 0.0008568421052631579, + "low_lr": 1.713684210526316e-05, + "step": 272 + }, + { + "epoch": 0.715318869165023, + "high_lr": 0.0008568421052631579, + "low_lr": 1.713684210526316e-05, + "step": 272 + }, + { + "epoch": 0.717948717948718, + "grad_norm": 0.9040118455886841, + "learning_rate": 0.0008563157894736843, + "loss": 1.624, + "step": 273 + }, + { + "epoch": 0.717948717948718, + "high_lr": 0.0008563157894736843, + "low_lr": 1.7126315789473687e-05, + "step": 273 + }, + { + "epoch": 0.717948717948718, + "high_lr": 0.0008563157894736843, + "low_lr": 1.7126315789473687e-05, + "step": 273 + }, + { + "epoch": 0.717948717948718, + "high_lr": 0.0008563157894736843, + "low_lr": 1.7126315789473687e-05, + "step": 273 + }, + { + "epoch": 0.717948717948718, + "high_lr": 0.0008563157894736843, + "low_lr": 1.7126315789473687e-05, + "step": 273 + }, + { + "epoch": 0.717948717948718, + "high_lr": 0.0008563157894736843, + "low_lr": 1.7126315789473687e-05, + "step": 273 + }, + { + "epoch": 0.717948717948718, + "high_lr": 0.0008563157894736843, + "low_lr": 1.7126315789473687e-05, + "step": 273 + }, + { + "epoch": 0.717948717948718, + "high_lr": 0.0008563157894736843, + "low_lr": 1.7126315789473687e-05, + "step": 273 + }, + { + "epoch": 0.717948717948718, + "high_lr": 0.0008563157894736843, + "low_lr": 1.7126315789473687e-05, + "step": 273 + }, + { + "epoch": 0.7205785667324129, + "grad_norm": 0.901945173740387, + "learning_rate": 0.0008557894736842106, + "loss": 1.6794, + "step": 274 + }, + { + "epoch": 0.7205785667324129, + "high_lr": 0.0008557894736842106, + "low_lr": 1.7115789473684212e-05, + "step": 274 + }, + { + "epoch": 0.7205785667324129, + "high_lr": 0.0008557894736842106, + "low_lr": 1.7115789473684212e-05, + "step": 274 + }, + { + "epoch": 0.7205785667324129, + "high_lr": 0.0008557894736842106, + "low_lr": 1.7115789473684212e-05, + "step": 274 + }, + { + "epoch": 0.7205785667324129, + "high_lr": 0.0008557894736842106, + "low_lr": 1.7115789473684212e-05, + "step": 274 + }, + { + "epoch": 0.7205785667324129, + "high_lr": 0.0008557894736842106, + "low_lr": 1.7115789473684212e-05, + "step": 274 + }, + { + "epoch": 0.7205785667324129, + "high_lr": 0.0008557894736842106, + "low_lr": 1.7115789473684212e-05, + "step": 274 + }, + { + "epoch": 0.7205785667324129, + "high_lr": 0.0008557894736842106, + "low_lr": 1.7115789473684212e-05, + "step": 274 + }, + { + "epoch": 0.7205785667324129, + "high_lr": 0.0008557894736842106, + "low_lr": 1.7115789473684212e-05, + "step": 274 + }, + { + "epoch": 0.7232084155161078, + "grad_norm": 0.8379615545272827, + "learning_rate": 0.0008552631578947369, + "loss": 1.6236, + "step": 275 + }, + { + "epoch": 0.7232084155161078, + "high_lr": 0.0008552631578947369, + "low_lr": 1.7105263157894737e-05, + "step": 275 + }, + { + "epoch": 0.7232084155161078, + "high_lr": 0.0008552631578947369, + "low_lr": 1.7105263157894737e-05, + "step": 275 + }, + { + "epoch": 0.7232084155161078, + "high_lr": 0.0008552631578947369, + "low_lr": 1.7105263157894737e-05, + "step": 275 + }, + { + "epoch": 0.7232084155161078, + "high_lr": 0.0008552631578947369, + "low_lr": 1.7105263157894737e-05, + "step": 275 + }, + { + "epoch": 0.7232084155161078, + "high_lr": 0.0008552631578947369, + "low_lr": 1.7105263157894737e-05, + "step": 275 + }, + { + "epoch": 0.7232084155161078, + "high_lr": 0.0008552631578947369, + "low_lr": 1.7105263157894737e-05, + "step": 275 + }, + { + "epoch": 0.7232084155161078, + "high_lr": 0.0008552631578947369, + "low_lr": 1.7105263157894737e-05, + "step": 275 + }, + { + "epoch": 0.7232084155161078, + "high_lr": 0.0008552631578947369, + "low_lr": 1.7105263157894737e-05, + "step": 275 + }, + { + "epoch": 0.7258382642998028, + "grad_norm": 0.8739696145057678, + "learning_rate": 0.0008547368421052632, + "loss": 1.6327, + "step": 276 + }, + { + "epoch": 0.7258382642998028, + "high_lr": 0.0008547368421052632, + "low_lr": 1.7094736842105265e-05, + "step": 276 + }, + { + "epoch": 0.7258382642998028, + "high_lr": 0.0008547368421052632, + "low_lr": 1.7094736842105265e-05, + "step": 276 + }, + { + "epoch": 0.7258382642998028, + "high_lr": 0.0008547368421052632, + "low_lr": 1.7094736842105265e-05, + "step": 276 + }, + { + "epoch": 0.7258382642998028, + "high_lr": 0.0008547368421052632, + "low_lr": 1.7094736842105265e-05, + "step": 276 + }, + { + "epoch": 0.7258382642998028, + "high_lr": 0.0008547368421052632, + "low_lr": 1.7094736842105265e-05, + "step": 276 + }, + { + "epoch": 0.7258382642998028, + "high_lr": 0.0008547368421052632, + "low_lr": 1.7094736842105265e-05, + "step": 276 + }, + { + "epoch": 0.7258382642998028, + "high_lr": 0.0008547368421052632, + "low_lr": 1.7094736842105265e-05, + "step": 276 + }, + { + "epoch": 0.7258382642998028, + "high_lr": 0.0008547368421052632, + "low_lr": 1.7094736842105265e-05, + "step": 276 + }, + { + "epoch": 0.7284681130834977, + "grad_norm": 0.8980576992034912, + "learning_rate": 0.0008542105263157894, + "loss": 1.6419, + "step": 277 + }, + { + "epoch": 0.7284681130834977, + "high_lr": 0.0008542105263157894, + "low_lr": 1.708421052631579e-05, + "step": 277 + }, + { + "epoch": 0.7284681130834977, + "high_lr": 0.0008542105263157894, + "low_lr": 1.708421052631579e-05, + "step": 277 + }, + { + "epoch": 0.7284681130834977, + "high_lr": 0.0008542105263157894, + "low_lr": 1.708421052631579e-05, + "step": 277 + }, + { + "epoch": 0.7284681130834977, + "high_lr": 0.0008542105263157894, + "low_lr": 1.708421052631579e-05, + "step": 277 + }, + { + "epoch": 0.7284681130834977, + "high_lr": 0.0008542105263157894, + "low_lr": 1.708421052631579e-05, + "step": 277 + }, + { + "epoch": 0.7284681130834977, + "high_lr": 0.0008542105263157894, + "low_lr": 1.708421052631579e-05, + "step": 277 + }, + { + "epoch": 0.7284681130834977, + "high_lr": 0.0008542105263157894, + "low_lr": 1.708421052631579e-05, + "step": 277 + }, + { + "epoch": 0.7284681130834977, + "high_lr": 0.0008542105263157894, + "low_lr": 1.708421052631579e-05, + "step": 277 + }, + { + "epoch": 0.7310979618671927, + "grad_norm": 0.8740023374557495, + "learning_rate": 0.0008536842105263158, + "loss": 1.6539, + "step": 278 + }, + { + "epoch": 0.7310979618671927, + "high_lr": 0.0008536842105263158, + "low_lr": 1.7073684210526317e-05, + "step": 278 + }, + { + "epoch": 0.7310979618671927, + "high_lr": 0.0008536842105263158, + "low_lr": 1.7073684210526317e-05, + "step": 278 + }, + { + "epoch": 0.7310979618671927, + "high_lr": 0.0008536842105263158, + "low_lr": 1.7073684210526317e-05, + "step": 278 + }, + { + "epoch": 0.7310979618671927, + "high_lr": 0.0008536842105263158, + "low_lr": 1.7073684210526317e-05, + "step": 278 + }, + { + "epoch": 0.7310979618671927, + "high_lr": 0.0008536842105263158, + "low_lr": 1.7073684210526317e-05, + "step": 278 + }, + { + "epoch": 0.7310979618671927, + "high_lr": 0.0008536842105263158, + "low_lr": 1.7073684210526317e-05, + "step": 278 + }, + { + "epoch": 0.7310979618671927, + "high_lr": 0.0008536842105263158, + "low_lr": 1.7073684210526317e-05, + "step": 278 + }, + { + "epoch": 0.7310979618671927, + "high_lr": 0.0008536842105263158, + "low_lr": 1.7073684210526317e-05, + "step": 278 + }, + { + "epoch": 0.7337278106508875, + "grad_norm": 0.8141024708747864, + "learning_rate": 0.0008531578947368421, + "loss": 1.6074, + "step": 279 + }, + { + "epoch": 0.7337278106508875, + "high_lr": 0.0008531578947368421, + "low_lr": 1.7063157894736845e-05, + "step": 279 + }, + { + "epoch": 0.7337278106508875, + "high_lr": 0.0008531578947368421, + "low_lr": 1.7063157894736845e-05, + "step": 279 + }, + { + "epoch": 0.7337278106508875, + "high_lr": 0.0008531578947368421, + "low_lr": 1.7063157894736845e-05, + "step": 279 + }, + { + "epoch": 0.7337278106508875, + "high_lr": 0.0008531578947368421, + "low_lr": 1.7063157894736845e-05, + "step": 279 + }, + { + "epoch": 0.7337278106508875, + "high_lr": 0.0008531578947368421, + "low_lr": 1.7063157894736845e-05, + "step": 279 + }, + { + "epoch": 0.7337278106508875, + "high_lr": 0.0008531578947368421, + "low_lr": 1.7063157894736845e-05, + "step": 279 + }, + { + "epoch": 0.7337278106508875, + "high_lr": 0.0008531578947368421, + "low_lr": 1.7063157894736845e-05, + "step": 279 + }, + { + "epoch": 0.7337278106508875, + "high_lr": 0.0008531578947368421, + "low_lr": 1.7063157894736845e-05, + "step": 279 + }, + { + "epoch": 0.7363576594345825, + "grad_norm": 0.9393441677093506, + "learning_rate": 0.0008526315789473684, + "loss": 1.663, + "step": 280 + }, + { + "epoch": 0.7363576594345825, + "high_lr": 0.0008526315789473684, + "low_lr": 1.705263157894737e-05, + "step": 280 + }, + { + "epoch": 0.7363576594345825, + "high_lr": 0.0008526315789473684, + "low_lr": 1.705263157894737e-05, + "step": 280 + }, + { + "epoch": 0.7363576594345825, + "high_lr": 0.0008526315789473684, + "low_lr": 1.705263157894737e-05, + "step": 280 + }, + { + "epoch": 0.7363576594345825, + "high_lr": 0.0008526315789473684, + "low_lr": 1.705263157894737e-05, + "step": 280 + }, + { + "epoch": 0.7363576594345825, + "high_lr": 0.0008526315789473684, + "low_lr": 1.705263157894737e-05, + "step": 280 + }, + { + "epoch": 0.7363576594345825, + "high_lr": 0.0008526315789473684, + "low_lr": 1.705263157894737e-05, + "step": 280 + }, + { + "epoch": 0.7363576594345825, + "high_lr": 0.0008526315789473684, + "low_lr": 1.705263157894737e-05, + "step": 280 + }, + { + "epoch": 0.7363576594345825, + "high_lr": 0.0008526315789473684, + "low_lr": 1.705263157894737e-05, + "step": 280 + }, + { + "epoch": 0.7389875082182774, + "grad_norm": 0.9309095144271851, + "learning_rate": 0.0008521052631578947, + "loss": 1.675, + "step": 281 + }, + { + "epoch": 0.7389875082182774, + "high_lr": 0.0008521052631578947, + "low_lr": 1.7042105263157895e-05, + "step": 281 + }, + { + "epoch": 0.7389875082182774, + "high_lr": 0.0008521052631578947, + "low_lr": 1.7042105263157895e-05, + "step": 281 + }, + { + "epoch": 0.7389875082182774, + "high_lr": 0.0008521052631578947, + "low_lr": 1.7042105263157895e-05, + "step": 281 + }, + { + "epoch": 0.7389875082182774, + "high_lr": 0.0008521052631578947, + "low_lr": 1.7042105263157895e-05, + "step": 281 + }, + { + "epoch": 0.7389875082182774, + "high_lr": 0.0008521052631578947, + "low_lr": 1.7042105263157895e-05, + "step": 281 + }, + { + "epoch": 0.7389875082182774, + "high_lr": 0.0008521052631578947, + "low_lr": 1.7042105263157895e-05, + "step": 281 + }, + { + "epoch": 0.7389875082182774, + "high_lr": 0.0008521052631578947, + "low_lr": 1.7042105263157895e-05, + "step": 281 + }, + { + "epoch": 0.7389875082182774, + "high_lr": 0.0008521052631578947, + "low_lr": 1.7042105263157895e-05, + "step": 281 + }, + { + "epoch": 0.7416173570019724, + "grad_norm": 0.9190104603767395, + "learning_rate": 0.0008515789473684211, + "loss": 1.647, + "step": 282 + }, + { + "epoch": 0.7416173570019724, + "high_lr": 0.0008515789473684211, + "low_lr": 1.7031578947368423e-05, + "step": 282 + }, + { + "epoch": 0.7416173570019724, + "high_lr": 0.0008515789473684211, + "low_lr": 1.7031578947368423e-05, + "step": 282 + }, + { + "epoch": 0.7416173570019724, + "high_lr": 0.0008515789473684211, + "low_lr": 1.7031578947368423e-05, + "step": 282 + }, + { + "epoch": 0.7416173570019724, + "high_lr": 0.0008515789473684211, + "low_lr": 1.7031578947368423e-05, + "step": 282 + }, + { + "epoch": 0.7416173570019724, + "high_lr": 0.0008515789473684211, + "low_lr": 1.7031578947368423e-05, + "step": 282 + }, + { + "epoch": 0.7416173570019724, + "high_lr": 0.0008515789473684211, + "low_lr": 1.7031578947368423e-05, + "step": 282 + }, + { + "epoch": 0.7416173570019724, + "high_lr": 0.0008515789473684211, + "low_lr": 1.7031578947368423e-05, + "step": 282 + }, + { + "epoch": 0.7416173570019724, + "high_lr": 0.0008515789473684211, + "low_lr": 1.7031578947368423e-05, + "step": 282 + }, + { + "epoch": 0.7442472057856673, + "grad_norm": 0.9044349789619446, + "learning_rate": 0.0008510526315789474, + "loss": 1.6526, + "step": 283 + }, + { + "epoch": 0.7442472057856673, + "high_lr": 0.0008510526315789474, + "low_lr": 1.702105263157895e-05, + "step": 283 + }, + { + "epoch": 0.7442472057856673, + "high_lr": 0.0008510526315789474, + "low_lr": 1.702105263157895e-05, + "step": 283 + }, + { + "epoch": 0.7442472057856673, + "high_lr": 0.0008510526315789474, + "low_lr": 1.702105263157895e-05, + "step": 283 + }, + { + "epoch": 0.7442472057856673, + "high_lr": 0.0008510526315789474, + "low_lr": 1.702105263157895e-05, + "step": 283 + }, + { + "epoch": 0.7442472057856673, + "high_lr": 0.0008510526315789474, + "low_lr": 1.702105263157895e-05, + "step": 283 + }, + { + "epoch": 0.7442472057856673, + "high_lr": 0.0008510526315789474, + "low_lr": 1.702105263157895e-05, + "step": 283 + }, + { + "epoch": 0.7442472057856673, + "high_lr": 0.0008510526315789474, + "low_lr": 1.702105263157895e-05, + "step": 283 + }, + { + "epoch": 0.7442472057856673, + "high_lr": 0.0008510526315789474, + "low_lr": 1.702105263157895e-05, + "step": 283 + }, + { + "epoch": 0.7468770545693623, + "grad_norm": 0.8546414971351624, + "learning_rate": 0.0008505263157894737, + "loss": 1.6029, + "step": 284 + }, + { + "epoch": 0.7468770545693623, + "high_lr": 0.0008505263157894737, + "low_lr": 1.7010526315789475e-05, + "step": 284 + }, + { + "epoch": 0.7468770545693623, + "high_lr": 0.0008505263157894737, + "low_lr": 1.7010526315789475e-05, + "step": 284 + }, + { + "epoch": 0.7468770545693623, + "high_lr": 0.0008505263157894737, + "low_lr": 1.7010526315789475e-05, + "step": 284 + }, + { + "epoch": 0.7468770545693623, + "high_lr": 0.0008505263157894737, + "low_lr": 1.7010526315789475e-05, + "step": 284 + }, + { + "epoch": 0.7468770545693623, + "high_lr": 0.0008505263157894737, + "low_lr": 1.7010526315789475e-05, + "step": 284 + }, + { + "epoch": 0.7468770545693623, + "high_lr": 0.0008505263157894737, + "low_lr": 1.7010526315789475e-05, + "step": 284 + }, + { + "epoch": 0.7468770545693623, + "high_lr": 0.0008505263157894737, + "low_lr": 1.7010526315789475e-05, + "step": 284 + }, + { + "epoch": 0.7468770545693623, + "high_lr": 0.0008505263157894737, + "low_lr": 1.7010526315789475e-05, + "step": 284 + }, + { + "epoch": 0.7495069033530573, + "grad_norm": 0.9218443036079407, + "learning_rate": 0.00085, + "loss": 1.6428, + "step": 285 + }, + { + "epoch": 0.7495069033530573, + "high_lr": 0.00085, + "low_lr": 1.7e-05, + "step": 285 + }, + { + "epoch": 0.7495069033530573, + "high_lr": 0.00085, + "low_lr": 1.7e-05, + "step": 285 + }, + { + "epoch": 0.7495069033530573, + "high_lr": 0.00085, + "low_lr": 1.7e-05, + "step": 285 + }, + { + "epoch": 0.7495069033530573, + "high_lr": 0.00085, + "low_lr": 1.7e-05, + "step": 285 + }, + { + "epoch": 0.7495069033530573, + "high_lr": 0.00085, + "low_lr": 1.7e-05, + "step": 285 + }, + { + "epoch": 0.7495069033530573, + "high_lr": 0.00085, + "low_lr": 1.7e-05, + "step": 285 + }, + { + "epoch": 0.7495069033530573, + "high_lr": 0.00085, + "low_lr": 1.7e-05, + "step": 285 + }, + { + "epoch": 0.7495069033530573, + "high_lr": 0.00085, + "low_lr": 1.7e-05, + "step": 285 + }, + { + "epoch": 0.7521367521367521, + "grad_norm": 0.971239447593689, + "learning_rate": 0.0008494736842105262, + "loss": 1.6809, + "step": 286 + }, + { + "epoch": 0.7521367521367521, + "high_lr": 0.0008494736842105262, + "low_lr": 1.6989473684210528e-05, + "step": 286 + }, + { + "epoch": 0.7521367521367521, + "high_lr": 0.0008494736842105262, + "low_lr": 1.6989473684210528e-05, + "step": 286 + }, + { + "epoch": 0.7521367521367521, + "high_lr": 0.0008494736842105262, + "low_lr": 1.6989473684210528e-05, + "step": 286 + }, + { + "epoch": 0.7521367521367521, + "high_lr": 0.0008494736842105262, + "low_lr": 1.6989473684210528e-05, + "step": 286 + }, + { + "epoch": 0.7521367521367521, + "high_lr": 0.0008494736842105262, + "low_lr": 1.6989473684210528e-05, + "step": 286 + }, + { + "epoch": 0.7521367521367521, + "high_lr": 0.0008494736842105262, + "low_lr": 1.6989473684210528e-05, + "step": 286 + }, + { + "epoch": 0.7521367521367521, + "high_lr": 0.0008494736842105262, + "low_lr": 1.6989473684210528e-05, + "step": 286 + }, + { + "epoch": 0.7521367521367521, + "high_lr": 0.0008494736842105262, + "low_lr": 1.6989473684210528e-05, + "step": 286 + }, + { + "epoch": 0.7547666009204471, + "grad_norm": 0.8431284427642822, + "learning_rate": 0.0008489473684210527, + "loss": 1.6044, + "step": 287 + }, + { + "epoch": 0.7547666009204471, + "high_lr": 0.0008489473684210527, + "low_lr": 1.6978947368421056e-05, + "step": 287 + }, + { + "epoch": 0.7547666009204471, + "high_lr": 0.0008489473684210527, + "low_lr": 1.6978947368421056e-05, + "step": 287 + }, + { + "epoch": 0.7547666009204471, + "high_lr": 0.0008489473684210527, + "low_lr": 1.6978947368421056e-05, + "step": 287 + }, + { + "epoch": 0.7547666009204471, + "high_lr": 0.0008489473684210527, + "low_lr": 1.6978947368421056e-05, + "step": 287 + }, + { + "epoch": 0.7547666009204471, + "high_lr": 0.0008489473684210527, + "low_lr": 1.6978947368421056e-05, + "step": 287 + }, + { + "epoch": 0.7547666009204471, + "high_lr": 0.0008489473684210527, + "low_lr": 1.6978947368421056e-05, + "step": 287 + }, + { + "epoch": 0.7547666009204471, + "high_lr": 0.0008489473684210527, + "low_lr": 1.6978947368421056e-05, + "step": 287 + }, + { + "epoch": 0.7547666009204471, + "high_lr": 0.0008489473684210527, + "low_lr": 1.6978947368421056e-05, + "step": 287 + }, + { + "epoch": 0.757396449704142, + "grad_norm": 0.9022310972213745, + "learning_rate": 0.000848421052631579, + "loss": 1.6496, + "step": 288 + }, + { + "epoch": 0.757396449704142, + "high_lr": 0.000848421052631579, + "low_lr": 1.696842105263158e-05, + "step": 288 + }, + { + "epoch": 0.757396449704142, + "high_lr": 0.000848421052631579, + "low_lr": 1.696842105263158e-05, + "step": 288 + }, + { + "epoch": 0.757396449704142, + "high_lr": 0.000848421052631579, + "low_lr": 1.696842105263158e-05, + "step": 288 + }, + { + "epoch": 0.757396449704142, + "high_lr": 0.000848421052631579, + "low_lr": 1.696842105263158e-05, + "step": 288 + }, + { + "epoch": 0.757396449704142, + "high_lr": 0.000848421052631579, + "low_lr": 1.696842105263158e-05, + "step": 288 + }, + { + "epoch": 0.757396449704142, + "high_lr": 0.000848421052631579, + "low_lr": 1.696842105263158e-05, + "step": 288 + }, + { + "epoch": 0.757396449704142, + "high_lr": 0.000848421052631579, + "low_lr": 1.696842105263158e-05, + "step": 288 + }, + { + "epoch": 0.757396449704142, + "high_lr": 0.000848421052631579, + "low_lr": 1.696842105263158e-05, + "step": 288 + }, + { + "epoch": 0.760026298487837, + "grad_norm": 0.9277114868164062, + "learning_rate": 0.0008478947368421053, + "loss": 1.6554, + "step": 289 + }, + { + "epoch": 0.760026298487837, + "high_lr": 0.0008478947368421053, + "low_lr": 1.6957894736842105e-05, + "step": 289 + }, + { + "epoch": 0.760026298487837, + "high_lr": 0.0008478947368421053, + "low_lr": 1.6957894736842105e-05, + "step": 289 + }, + { + "epoch": 0.760026298487837, + "high_lr": 0.0008478947368421053, + "low_lr": 1.6957894736842105e-05, + "step": 289 + }, + { + "epoch": 0.760026298487837, + "high_lr": 0.0008478947368421053, + "low_lr": 1.6957894736842105e-05, + "step": 289 + }, + { + "epoch": 0.760026298487837, + "high_lr": 0.0008478947368421053, + "low_lr": 1.6957894736842105e-05, + "step": 289 + }, + { + "epoch": 0.760026298487837, + "high_lr": 0.0008478947368421053, + "low_lr": 1.6957894736842105e-05, + "step": 289 + }, + { + "epoch": 0.760026298487837, + "high_lr": 0.0008478947368421053, + "low_lr": 1.6957894736842105e-05, + "step": 289 + }, + { + "epoch": 0.760026298487837, + "high_lr": 0.0008478947368421053, + "low_lr": 1.6957894736842105e-05, + "step": 289 + }, + { + "epoch": 0.7626561472715319, + "grad_norm": 1.0391700267791748, + "learning_rate": 0.0008473684210526316, + "loss": 1.6826, + "step": 290 + }, + { + "epoch": 0.7626561472715319, + "high_lr": 0.0008473684210526316, + "low_lr": 1.6947368421052633e-05, + "step": 290 + }, + { + "epoch": 0.7626561472715319, + "high_lr": 0.0008473684210526316, + "low_lr": 1.6947368421052633e-05, + "step": 290 + }, + { + "epoch": 0.7626561472715319, + "high_lr": 0.0008473684210526316, + "low_lr": 1.6947368421052633e-05, + "step": 290 + }, + { + "epoch": 0.7626561472715319, + "high_lr": 0.0008473684210526316, + "low_lr": 1.6947368421052633e-05, + "step": 290 + }, + { + "epoch": 0.7626561472715319, + "high_lr": 0.0008473684210526316, + "low_lr": 1.6947368421052633e-05, + "step": 290 + }, + { + "epoch": 0.7626561472715319, + "high_lr": 0.0008473684210526316, + "low_lr": 1.6947368421052633e-05, + "step": 290 + }, + { + "epoch": 0.7626561472715319, + "high_lr": 0.0008473684210526316, + "low_lr": 1.6947368421052633e-05, + "step": 290 + }, + { + "epoch": 0.7626561472715319, + "high_lr": 0.0008473684210526316, + "low_lr": 1.6947368421052633e-05, + "step": 290 + }, + { + "epoch": 0.7652859960552268, + "grad_norm": 0.8468443751335144, + "learning_rate": 0.0008468421052631579, + "loss": 1.6082, + "step": 291 + }, + { + "epoch": 0.7652859960552268, + "high_lr": 0.0008468421052631579, + "low_lr": 1.6936842105263158e-05, + "step": 291 + }, + { + "epoch": 0.7652859960552268, + "high_lr": 0.0008468421052631579, + "low_lr": 1.6936842105263158e-05, + "step": 291 + }, + { + "epoch": 0.7652859960552268, + "high_lr": 0.0008468421052631579, + "low_lr": 1.6936842105263158e-05, + "step": 291 + }, + { + "epoch": 0.7652859960552268, + "high_lr": 0.0008468421052631579, + "low_lr": 1.6936842105263158e-05, + "step": 291 + }, + { + "epoch": 0.7652859960552268, + "high_lr": 0.0008468421052631579, + "low_lr": 1.6936842105263158e-05, + "step": 291 + }, + { + "epoch": 0.7652859960552268, + "high_lr": 0.0008468421052631579, + "low_lr": 1.6936842105263158e-05, + "step": 291 + }, + { + "epoch": 0.7652859960552268, + "high_lr": 0.0008468421052631579, + "low_lr": 1.6936842105263158e-05, + "step": 291 + }, + { + "epoch": 0.7652859960552268, + "high_lr": 0.0008468421052631579, + "low_lr": 1.6936842105263158e-05, + "step": 291 + }, + { + "epoch": 0.7679158448389217, + "grad_norm": 0.8624788522720337, + "learning_rate": 0.0008463157894736843, + "loss": 1.6122, + "step": 292 + }, + { + "epoch": 0.7679158448389217, + "high_lr": 0.0008463157894736843, + "low_lr": 1.6926315789473686e-05, + "step": 292 + }, + { + "epoch": 0.7679158448389217, + "high_lr": 0.0008463157894736843, + "low_lr": 1.6926315789473686e-05, + "step": 292 + }, + { + "epoch": 0.7679158448389217, + "high_lr": 0.0008463157894736843, + "low_lr": 1.6926315789473686e-05, + "step": 292 + }, + { + "epoch": 0.7679158448389217, + "high_lr": 0.0008463157894736843, + "low_lr": 1.6926315789473686e-05, + "step": 292 + }, + { + "epoch": 0.7679158448389217, + "high_lr": 0.0008463157894736843, + "low_lr": 1.6926315789473686e-05, + "step": 292 + }, + { + "epoch": 0.7679158448389217, + "high_lr": 0.0008463157894736843, + "low_lr": 1.6926315789473686e-05, + "step": 292 + }, + { + "epoch": 0.7679158448389217, + "high_lr": 0.0008463157894736843, + "low_lr": 1.6926315789473686e-05, + "step": 292 + }, + { + "epoch": 0.7679158448389217, + "high_lr": 0.0008463157894736843, + "low_lr": 1.6926315789473686e-05, + "step": 292 + }, + { + "epoch": 0.7705456936226167, + "grad_norm": 0.940320611000061, + "learning_rate": 0.0008457894736842106, + "loss": 1.5794, + "step": 293 + }, + { + "epoch": 0.7705456936226167, + "high_lr": 0.0008457894736842106, + "low_lr": 1.691578947368421e-05, + "step": 293 + }, + { + "epoch": 0.7705456936226167, + "high_lr": 0.0008457894736842106, + "low_lr": 1.691578947368421e-05, + "step": 293 + }, + { + "epoch": 0.7705456936226167, + "high_lr": 0.0008457894736842106, + "low_lr": 1.691578947368421e-05, + "step": 293 + }, + { + "epoch": 0.7705456936226167, + "high_lr": 0.0008457894736842106, + "low_lr": 1.691578947368421e-05, + "step": 293 + }, + { + "epoch": 0.7705456936226167, + "high_lr": 0.0008457894736842106, + "low_lr": 1.691578947368421e-05, + "step": 293 + }, + { + "epoch": 0.7705456936226167, + "high_lr": 0.0008457894736842106, + "low_lr": 1.691578947368421e-05, + "step": 293 + }, + { + "epoch": 0.7705456936226167, + "high_lr": 0.0008457894736842106, + "low_lr": 1.691578947368421e-05, + "step": 293 + }, + { + "epoch": 0.7705456936226167, + "high_lr": 0.0008457894736842106, + "low_lr": 1.691578947368421e-05, + "step": 293 + }, + { + "epoch": 0.7731755424063116, + "grad_norm": 0.9850048422813416, + "learning_rate": 0.0008452631578947369, + "loss": 1.677, + "step": 294 + }, + { + "epoch": 0.7731755424063116, + "high_lr": 0.0008452631578947369, + "low_lr": 1.690526315789474e-05, + "step": 294 + }, + { + "epoch": 0.7731755424063116, + "high_lr": 0.0008452631578947369, + "low_lr": 1.690526315789474e-05, + "step": 294 + }, + { + "epoch": 0.7731755424063116, + "high_lr": 0.0008452631578947369, + "low_lr": 1.690526315789474e-05, + "step": 294 + }, + { + "epoch": 0.7731755424063116, + "high_lr": 0.0008452631578947369, + "low_lr": 1.690526315789474e-05, + "step": 294 + }, + { + "epoch": 0.7731755424063116, + "high_lr": 0.0008452631578947369, + "low_lr": 1.690526315789474e-05, + "step": 294 + }, + { + "epoch": 0.7731755424063116, + "high_lr": 0.0008452631578947369, + "low_lr": 1.690526315789474e-05, + "step": 294 + }, + { + "epoch": 0.7731755424063116, + "high_lr": 0.0008452631578947369, + "low_lr": 1.690526315789474e-05, + "step": 294 + }, + { + "epoch": 0.7731755424063116, + "high_lr": 0.0008452631578947369, + "low_lr": 1.690526315789474e-05, + "step": 294 + }, + { + "epoch": 0.7758053911900066, + "grad_norm": 0.8709503412246704, + "learning_rate": 0.0008447368421052631, + "loss": 1.5626, + "step": 295 + }, + { + "epoch": 0.7758053911900066, + "high_lr": 0.0008447368421052631, + "low_lr": 1.6894736842105263e-05, + "step": 295 + }, + { + "epoch": 0.7758053911900066, + "high_lr": 0.0008447368421052631, + "low_lr": 1.6894736842105263e-05, + "step": 295 + }, + { + "epoch": 0.7758053911900066, + "high_lr": 0.0008447368421052631, + "low_lr": 1.6894736842105263e-05, + "step": 295 + }, + { + "epoch": 0.7758053911900066, + "high_lr": 0.0008447368421052631, + "low_lr": 1.6894736842105263e-05, + "step": 295 + }, + { + "epoch": 0.7758053911900066, + "high_lr": 0.0008447368421052631, + "low_lr": 1.6894736842105263e-05, + "step": 295 + }, + { + "epoch": 0.7758053911900066, + "high_lr": 0.0008447368421052631, + "low_lr": 1.6894736842105263e-05, + "step": 295 + }, + { + "epoch": 0.7758053911900066, + "high_lr": 0.0008447368421052631, + "low_lr": 1.6894736842105263e-05, + "step": 295 + }, + { + "epoch": 0.7758053911900066, + "high_lr": 0.0008447368421052631, + "low_lr": 1.6894736842105263e-05, + "step": 295 + }, + { + "epoch": 0.7784352399737016, + "grad_norm": 0.8435001969337463, + "learning_rate": 0.0008442105263157895, + "loss": 1.5553, + "step": 296 + }, + { + "epoch": 0.7784352399737016, + "high_lr": 0.0008442105263157895, + "low_lr": 1.688421052631579e-05, + "step": 296 + }, + { + "epoch": 0.7784352399737016, + "high_lr": 0.0008442105263157895, + "low_lr": 1.688421052631579e-05, + "step": 296 + }, + { + "epoch": 0.7784352399737016, + "high_lr": 0.0008442105263157895, + "low_lr": 1.688421052631579e-05, + "step": 296 + }, + { + "epoch": 0.7784352399737016, + "high_lr": 0.0008442105263157895, + "low_lr": 1.688421052631579e-05, + "step": 296 + }, + { + "epoch": 0.7784352399737016, + "high_lr": 0.0008442105263157895, + "low_lr": 1.688421052631579e-05, + "step": 296 + }, + { + "epoch": 0.7784352399737016, + "high_lr": 0.0008442105263157895, + "low_lr": 1.688421052631579e-05, + "step": 296 + }, + { + "epoch": 0.7784352399737016, + "high_lr": 0.0008442105263157895, + "low_lr": 1.688421052631579e-05, + "step": 296 + }, + { + "epoch": 0.7784352399737016, + "high_lr": 0.0008442105263157895, + "low_lr": 1.688421052631579e-05, + "step": 296 + }, + { + "epoch": 0.7810650887573964, + "grad_norm": 0.8458621501922607, + "learning_rate": 0.0008436842105263158, + "loss": 1.6561, + "step": 297 + }, + { + "epoch": 0.7810650887573964, + "high_lr": 0.0008436842105263158, + "low_lr": 1.687368421052632e-05, + "step": 297 + }, + { + "epoch": 0.7810650887573964, + "high_lr": 0.0008436842105263158, + "low_lr": 1.687368421052632e-05, + "step": 297 + }, + { + "epoch": 0.7810650887573964, + "high_lr": 0.0008436842105263158, + "low_lr": 1.687368421052632e-05, + "step": 297 + }, + { + "epoch": 0.7810650887573964, + "high_lr": 0.0008436842105263158, + "low_lr": 1.687368421052632e-05, + "step": 297 + }, + { + "epoch": 0.7810650887573964, + "high_lr": 0.0008436842105263158, + "low_lr": 1.687368421052632e-05, + "step": 297 + }, + { + "epoch": 0.7810650887573964, + "high_lr": 0.0008436842105263158, + "low_lr": 1.687368421052632e-05, + "step": 297 + }, + { + "epoch": 0.7810650887573964, + "high_lr": 0.0008436842105263158, + "low_lr": 1.687368421052632e-05, + "step": 297 + }, + { + "epoch": 0.7810650887573964, + "high_lr": 0.0008436842105263158, + "low_lr": 1.687368421052632e-05, + "step": 297 + }, + { + "epoch": 0.7836949375410914, + "grad_norm": 0.893551766872406, + "learning_rate": 0.0008431578947368421, + "loss": 1.6, + "step": 298 + }, + { + "epoch": 0.7836949375410914, + "high_lr": 0.0008431578947368421, + "low_lr": 1.6863157894736844e-05, + "step": 298 + }, + { + "epoch": 0.7836949375410914, + "high_lr": 0.0008431578947368421, + "low_lr": 1.6863157894736844e-05, + "step": 298 + }, + { + "epoch": 0.7836949375410914, + "high_lr": 0.0008431578947368421, + "low_lr": 1.6863157894736844e-05, + "step": 298 + }, + { + "epoch": 0.7836949375410914, + "high_lr": 0.0008431578947368421, + "low_lr": 1.6863157894736844e-05, + "step": 298 + }, + { + "epoch": 0.7836949375410914, + "high_lr": 0.0008431578947368421, + "low_lr": 1.6863157894736844e-05, + "step": 298 + }, + { + "epoch": 0.7836949375410914, + "high_lr": 0.0008431578947368421, + "low_lr": 1.6863157894736844e-05, + "step": 298 + }, + { + "epoch": 0.7836949375410914, + "high_lr": 0.0008431578947368421, + "low_lr": 1.6863157894736844e-05, + "step": 298 + }, + { + "epoch": 0.7836949375410914, + "high_lr": 0.0008431578947368421, + "low_lr": 1.6863157894736844e-05, + "step": 298 + }, + { + "epoch": 0.7863247863247863, + "grad_norm": 0.9290257096290588, + "learning_rate": 0.0008426315789473684, + "loss": 1.6117, + "step": 299 + }, + { + "epoch": 0.7863247863247863, + "high_lr": 0.0008426315789473684, + "low_lr": 1.685263157894737e-05, + "step": 299 + }, + { + "epoch": 0.7863247863247863, + "high_lr": 0.0008426315789473684, + "low_lr": 1.685263157894737e-05, + "step": 299 + }, + { + "epoch": 0.7863247863247863, + "high_lr": 0.0008426315789473684, + "low_lr": 1.685263157894737e-05, + "step": 299 + }, + { + "epoch": 0.7863247863247863, + "high_lr": 0.0008426315789473684, + "low_lr": 1.685263157894737e-05, + "step": 299 + }, + { + "epoch": 0.7863247863247863, + "high_lr": 0.0008426315789473684, + "low_lr": 1.685263157894737e-05, + "step": 299 + }, + { + "epoch": 0.7863247863247863, + "high_lr": 0.0008426315789473684, + "low_lr": 1.685263157894737e-05, + "step": 299 + }, + { + "epoch": 0.7863247863247863, + "high_lr": 0.0008426315789473684, + "low_lr": 1.685263157894737e-05, + "step": 299 + }, + { + "epoch": 0.7863247863247863, + "high_lr": 0.0008426315789473684, + "low_lr": 1.685263157894737e-05, + "step": 299 + }, + { + "epoch": 0.7889546351084813, + "grad_norm": 0.9197156429290771, + "learning_rate": 0.0008421052631578947, + "loss": 1.6593, + "step": 300 + }, + { + "epoch": 0.7889546351084813, + "high_lr": 0.0008421052631578947, + "low_lr": 1.6842105263157896e-05, + "step": 300 + }, + { + "epoch": 0.7889546351084813, + "high_lr": 0.0008421052631578947, + "low_lr": 1.6842105263157896e-05, + "step": 300 + }, + { + "epoch": 0.7889546351084813, + "high_lr": 0.0008421052631578947, + "low_lr": 1.6842105263157896e-05, + "step": 300 + }, + { + "epoch": 0.7889546351084813, + "high_lr": 0.0008421052631578947, + "low_lr": 1.6842105263157896e-05, + "step": 300 + }, + { + "epoch": 0.7889546351084813, + "high_lr": 0.0008421052631578947, + "low_lr": 1.6842105263157896e-05, + "step": 300 + }, + { + "epoch": 0.7889546351084813, + "high_lr": 0.0008421052631578947, + "low_lr": 1.6842105263157896e-05, + "step": 300 + }, + { + "epoch": 0.7889546351084813, + "high_lr": 0.0008421052631578947, + "low_lr": 1.6842105263157896e-05, + "step": 300 + }, + { + "epoch": 0.7889546351084813, + "high_lr": 0.0008421052631578947, + "low_lr": 1.6842105263157896e-05, + "step": 300 + }, + { + "epoch": 0.7915844838921762, + "grad_norm": 1.0398555994033813, + "learning_rate": 0.0008415789473684211, + "loss": 1.6858, + "step": 301 + }, + { + "epoch": 0.7915844838921762, + "high_lr": 0.0008415789473684211, + "low_lr": 1.6831578947368424e-05, + "step": 301 + }, + { + "epoch": 0.7915844838921762, + "high_lr": 0.0008415789473684211, + "low_lr": 1.6831578947368424e-05, + "step": 301 + }, + { + "epoch": 0.7915844838921762, + "high_lr": 0.0008415789473684211, + "low_lr": 1.6831578947368424e-05, + "step": 301 + }, + { + "epoch": 0.7915844838921762, + "high_lr": 0.0008415789473684211, + "low_lr": 1.6831578947368424e-05, + "step": 301 + }, + { + "epoch": 0.7915844838921762, + "high_lr": 0.0008415789473684211, + "low_lr": 1.6831578947368424e-05, + "step": 301 + }, + { + "epoch": 0.7915844838921762, + "high_lr": 0.0008415789473684211, + "low_lr": 1.6831578947368424e-05, + "step": 301 + }, + { + "epoch": 0.7915844838921762, + "high_lr": 0.0008415789473684211, + "low_lr": 1.6831578947368424e-05, + "step": 301 + }, + { + "epoch": 0.7915844838921762, + "high_lr": 0.0008415789473684211, + "low_lr": 1.6831578947368424e-05, + "step": 301 + }, + { + "epoch": 0.7942143326758712, + "grad_norm": 0.9343593716621399, + "learning_rate": 0.0008410526315789474, + "loss": 1.6419, + "step": 302 + }, + { + "epoch": 0.7942143326758712, + "high_lr": 0.0008410526315789474, + "low_lr": 1.682105263157895e-05, + "step": 302 + }, + { + "epoch": 0.7942143326758712, + "high_lr": 0.0008410526315789474, + "low_lr": 1.682105263157895e-05, + "step": 302 + }, + { + "epoch": 0.7942143326758712, + "high_lr": 0.0008410526315789474, + "low_lr": 1.682105263157895e-05, + "step": 302 + }, + { + "epoch": 0.7942143326758712, + "high_lr": 0.0008410526315789474, + "low_lr": 1.682105263157895e-05, + "step": 302 + }, + { + "epoch": 0.7942143326758712, + "high_lr": 0.0008410526315789474, + "low_lr": 1.682105263157895e-05, + "step": 302 + }, + { + "epoch": 0.7942143326758712, + "high_lr": 0.0008410526315789474, + "low_lr": 1.682105263157895e-05, + "step": 302 + }, + { + "epoch": 0.7942143326758712, + "high_lr": 0.0008410526315789474, + "low_lr": 1.682105263157895e-05, + "step": 302 + }, + { + "epoch": 0.7942143326758712, + "high_lr": 0.0008410526315789474, + "low_lr": 1.682105263157895e-05, + "step": 302 + }, + { + "epoch": 0.796844181459566, + "grad_norm": 0.9294910430908203, + "learning_rate": 0.0008405263157894736, + "loss": 1.6051, + "step": 303 + }, + { + "epoch": 0.796844181459566, + "high_lr": 0.0008405263157894736, + "low_lr": 1.6810526315789474e-05, + "step": 303 + }, + { + "epoch": 0.796844181459566, + "high_lr": 0.0008405263157894736, + "low_lr": 1.6810526315789474e-05, + "step": 303 + }, + { + "epoch": 0.796844181459566, + "high_lr": 0.0008405263157894736, + "low_lr": 1.6810526315789474e-05, + "step": 303 + }, + { + "epoch": 0.796844181459566, + "high_lr": 0.0008405263157894736, + "low_lr": 1.6810526315789474e-05, + "step": 303 + }, + { + "epoch": 0.796844181459566, + "high_lr": 0.0008405263157894736, + "low_lr": 1.6810526315789474e-05, + "step": 303 + }, + { + "epoch": 0.796844181459566, + "high_lr": 0.0008405263157894736, + "low_lr": 1.6810526315789474e-05, + "step": 303 + }, + { + "epoch": 0.796844181459566, + "high_lr": 0.0008405263157894736, + "low_lr": 1.6810526315789474e-05, + "step": 303 + }, + { + "epoch": 0.796844181459566, + "high_lr": 0.0008405263157894736, + "low_lr": 1.6810526315789474e-05, + "step": 303 + }, + { + "epoch": 0.799474030243261, + "grad_norm": 0.9193534851074219, + "learning_rate": 0.00084, + "loss": 1.6315, + "step": 304 + }, + { + "epoch": 0.799474030243261, + "high_lr": 0.00084, + "low_lr": 1.6800000000000002e-05, + "step": 304 + }, + { + "epoch": 0.799474030243261, + "high_lr": 0.00084, + "low_lr": 1.6800000000000002e-05, + "step": 304 + }, + { + "epoch": 0.799474030243261, + "high_lr": 0.00084, + "low_lr": 1.6800000000000002e-05, + "step": 304 + }, + { + "epoch": 0.799474030243261, + "high_lr": 0.00084, + "low_lr": 1.6800000000000002e-05, + "step": 304 + }, + { + "epoch": 0.799474030243261, + "high_lr": 0.00084, + "low_lr": 1.6800000000000002e-05, + "step": 304 + }, + { + "epoch": 0.799474030243261, + "high_lr": 0.00084, + "low_lr": 1.6800000000000002e-05, + "step": 304 + }, + { + "epoch": 0.799474030243261, + "high_lr": 0.00084, + "low_lr": 1.6800000000000002e-05, + "step": 304 + }, + { + "epoch": 0.799474030243261, + "high_lr": 0.00084, + "low_lr": 1.6800000000000002e-05, + "step": 304 + }, + { + "epoch": 0.8021038790269559, + "grad_norm": 0.8686949014663696, + "learning_rate": 0.0008394736842105263, + "loss": 1.589, + "step": 305 + }, + { + "epoch": 0.8021038790269559, + "high_lr": 0.0008394736842105263, + "low_lr": 1.6789473684210526e-05, + "step": 305 + }, + { + "epoch": 0.8021038790269559, + "high_lr": 0.0008394736842105263, + "low_lr": 1.6789473684210526e-05, + "step": 305 + }, + { + "epoch": 0.8021038790269559, + "high_lr": 0.0008394736842105263, + "low_lr": 1.6789473684210526e-05, + "step": 305 + }, + { + "epoch": 0.8021038790269559, + "high_lr": 0.0008394736842105263, + "low_lr": 1.6789473684210526e-05, + "step": 305 + }, + { + "epoch": 0.8021038790269559, + "high_lr": 0.0008394736842105263, + "low_lr": 1.6789473684210526e-05, + "step": 305 + }, + { + "epoch": 0.8021038790269559, + "high_lr": 0.0008394736842105263, + "low_lr": 1.6789473684210526e-05, + "step": 305 + }, + { + "epoch": 0.8021038790269559, + "high_lr": 0.0008394736842105263, + "low_lr": 1.6789473684210526e-05, + "step": 305 + }, + { + "epoch": 0.8021038790269559, + "high_lr": 0.0008394736842105263, + "low_lr": 1.6789473684210526e-05, + "step": 305 + }, + { + "epoch": 0.8047337278106509, + "grad_norm": 0.892025887966156, + "learning_rate": 0.0008389473684210527, + "loss": 1.5995, + "step": 306 + }, + { + "epoch": 0.8047337278106509, + "high_lr": 0.0008389473684210527, + "low_lr": 1.6778947368421054e-05, + "step": 306 + }, + { + "epoch": 0.8047337278106509, + "high_lr": 0.0008389473684210527, + "low_lr": 1.6778947368421054e-05, + "step": 306 + }, + { + "epoch": 0.8047337278106509, + "high_lr": 0.0008389473684210527, + "low_lr": 1.6778947368421054e-05, + "step": 306 + }, + { + "epoch": 0.8047337278106509, + "high_lr": 0.0008389473684210527, + "low_lr": 1.6778947368421054e-05, + "step": 306 + }, + { + "epoch": 0.8047337278106509, + "high_lr": 0.0008389473684210527, + "low_lr": 1.6778947368421054e-05, + "step": 306 + }, + { + "epoch": 0.8047337278106509, + "high_lr": 0.0008389473684210527, + "low_lr": 1.6778947368421054e-05, + "step": 306 + }, + { + "epoch": 0.8047337278106509, + "high_lr": 0.0008389473684210527, + "low_lr": 1.6778947368421054e-05, + "step": 306 + }, + { + "epoch": 0.8047337278106509, + "high_lr": 0.0008389473684210527, + "low_lr": 1.6778947368421054e-05, + "step": 306 + }, + { + "epoch": 0.8073635765943459, + "grad_norm": 0.9276682734489441, + "learning_rate": 0.000838421052631579, + "loss": 1.6165, + "step": 307 + }, + { + "epoch": 0.8073635765943459, + "high_lr": 0.000838421052631579, + "low_lr": 1.676842105263158e-05, + "step": 307 + }, + { + "epoch": 0.8073635765943459, + "high_lr": 0.000838421052631579, + "low_lr": 1.676842105263158e-05, + "step": 307 + }, + { + "epoch": 0.8073635765943459, + "high_lr": 0.000838421052631579, + "low_lr": 1.676842105263158e-05, + "step": 307 + }, + { + "epoch": 0.8073635765943459, + "high_lr": 0.000838421052631579, + "low_lr": 1.676842105263158e-05, + "step": 307 + }, + { + "epoch": 0.8073635765943459, + "high_lr": 0.000838421052631579, + "low_lr": 1.676842105263158e-05, + "step": 307 + }, + { + "epoch": 0.8073635765943459, + "high_lr": 0.000838421052631579, + "low_lr": 1.676842105263158e-05, + "step": 307 + }, + { + "epoch": 0.8073635765943459, + "high_lr": 0.000838421052631579, + "low_lr": 1.676842105263158e-05, + "step": 307 + }, + { + "epoch": 0.8073635765943459, + "high_lr": 0.000838421052631579, + "low_lr": 1.676842105263158e-05, + "step": 307 + }, + { + "epoch": 0.8099934253780408, + "grad_norm": 0.9737101197242737, + "learning_rate": 0.0008378947368421053, + "loss": 1.6376, + "step": 308 + }, + { + "epoch": 0.8099934253780408, + "high_lr": 0.0008378947368421053, + "low_lr": 1.6757894736842107e-05, + "step": 308 + }, + { + "epoch": 0.8099934253780408, + "high_lr": 0.0008378947368421053, + "low_lr": 1.6757894736842107e-05, + "step": 308 + }, + { + "epoch": 0.8099934253780408, + "high_lr": 0.0008378947368421053, + "low_lr": 1.6757894736842107e-05, + "step": 308 + }, + { + "epoch": 0.8099934253780408, + "high_lr": 0.0008378947368421053, + "low_lr": 1.6757894736842107e-05, + "step": 308 + }, + { + "epoch": 0.8099934253780408, + "high_lr": 0.0008378947368421053, + "low_lr": 1.6757894736842107e-05, + "step": 308 + }, + { + "epoch": 0.8099934253780408, + "high_lr": 0.0008378947368421053, + "low_lr": 1.6757894736842107e-05, + "step": 308 + }, + { + "epoch": 0.8099934253780408, + "high_lr": 0.0008378947368421053, + "low_lr": 1.6757894736842107e-05, + "step": 308 + }, + { + "epoch": 0.8099934253780408, + "high_lr": 0.0008378947368421053, + "low_lr": 1.6757894736842107e-05, + "step": 308 + }, + { + "epoch": 0.8126232741617357, + "grad_norm": 0.915702223777771, + "learning_rate": 0.0008373684210526316, + "loss": 1.6523, + "step": 309 + }, + { + "epoch": 0.8126232741617357, + "high_lr": 0.0008373684210526316, + "low_lr": 1.6747368421052632e-05, + "step": 309 + }, + { + "epoch": 0.8126232741617357, + "high_lr": 0.0008373684210526316, + "low_lr": 1.6747368421052632e-05, + "step": 309 + }, + { + "epoch": 0.8126232741617357, + "high_lr": 0.0008373684210526316, + "low_lr": 1.6747368421052632e-05, + "step": 309 + }, + { + "epoch": 0.8126232741617357, + "high_lr": 0.0008373684210526316, + "low_lr": 1.6747368421052632e-05, + "step": 309 + }, + { + "epoch": 0.8126232741617357, + "high_lr": 0.0008373684210526316, + "low_lr": 1.6747368421052632e-05, + "step": 309 + }, + { + "epoch": 0.8126232741617357, + "high_lr": 0.0008373684210526316, + "low_lr": 1.6747368421052632e-05, + "step": 309 + }, + { + "epoch": 0.8126232741617357, + "high_lr": 0.0008373684210526316, + "low_lr": 1.6747368421052632e-05, + "step": 309 + }, + { + "epoch": 0.8126232741617357, + "high_lr": 0.0008373684210526316, + "low_lr": 1.6747368421052632e-05, + "step": 309 + }, + { + "epoch": 0.8152531229454306, + "grad_norm": 0.9162072539329529, + "learning_rate": 0.000836842105263158, + "loss": 1.5835, + "step": 310 + }, + { + "epoch": 0.8152531229454306, + "high_lr": 0.000836842105263158, + "low_lr": 1.673684210526316e-05, + "step": 310 + }, + { + "epoch": 0.8152531229454306, + "high_lr": 0.000836842105263158, + "low_lr": 1.673684210526316e-05, + "step": 310 + }, + { + "epoch": 0.8152531229454306, + "high_lr": 0.000836842105263158, + "low_lr": 1.673684210526316e-05, + "step": 310 + }, + { + "epoch": 0.8152531229454306, + "high_lr": 0.000836842105263158, + "low_lr": 1.673684210526316e-05, + "step": 310 + }, + { + "epoch": 0.8152531229454306, + "high_lr": 0.000836842105263158, + "low_lr": 1.673684210526316e-05, + "step": 310 + }, + { + "epoch": 0.8152531229454306, + "high_lr": 0.000836842105263158, + "low_lr": 1.673684210526316e-05, + "step": 310 + }, + { + "epoch": 0.8152531229454306, + "high_lr": 0.000836842105263158, + "low_lr": 1.673684210526316e-05, + "step": 310 + }, + { + "epoch": 0.8152531229454306, + "high_lr": 0.000836842105263158, + "low_lr": 1.673684210526316e-05, + "step": 310 + }, + { + "epoch": 0.8178829717291256, + "grad_norm": 0.8940391540527344, + "learning_rate": 0.0008363157894736843, + "loss": 1.6092, + "step": 311 + }, + { + "epoch": 0.8178829717291256, + "high_lr": 0.0008363157894736843, + "low_lr": 1.6726315789473684e-05, + "step": 311 + }, + { + "epoch": 0.8178829717291256, + "high_lr": 0.0008363157894736843, + "low_lr": 1.6726315789473684e-05, + "step": 311 + }, + { + "epoch": 0.8178829717291256, + "high_lr": 0.0008363157894736843, + "low_lr": 1.6726315789473684e-05, + "step": 311 + }, + { + "epoch": 0.8178829717291256, + "high_lr": 0.0008363157894736843, + "low_lr": 1.6726315789473684e-05, + "step": 311 + }, + { + "epoch": 0.8178829717291256, + "high_lr": 0.0008363157894736843, + "low_lr": 1.6726315789473684e-05, + "step": 311 + }, + { + "epoch": 0.8178829717291256, + "high_lr": 0.0008363157894736843, + "low_lr": 1.6726315789473684e-05, + "step": 311 + }, + { + "epoch": 0.8178829717291256, + "high_lr": 0.0008363157894736843, + "low_lr": 1.6726315789473684e-05, + "step": 311 + }, + { + "epoch": 0.8178829717291256, + "high_lr": 0.0008363157894736843, + "low_lr": 1.6726315789473684e-05, + "step": 311 + }, + { + "epoch": 0.8205128205128205, + "grad_norm": 0.9650228023529053, + "learning_rate": 0.0008357894736842105, + "loss": 1.6253, + "step": 312 + }, + { + "epoch": 0.8205128205128205, + "high_lr": 0.0008357894736842105, + "low_lr": 1.6715789473684212e-05, + "step": 312 + }, + { + "epoch": 0.8205128205128205, + "high_lr": 0.0008357894736842105, + "low_lr": 1.6715789473684212e-05, + "step": 312 + }, + { + "epoch": 0.8205128205128205, + "high_lr": 0.0008357894736842105, + "low_lr": 1.6715789473684212e-05, + "step": 312 + }, + { + "epoch": 0.8205128205128205, + "high_lr": 0.0008357894736842105, + "low_lr": 1.6715789473684212e-05, + "step": 312 + }, + { + "epoch": 0.8205128205128205, + "high_lr": 0.0008357894736842105, + "low_lr": 1.6715789473684212e-05, + "step": 312 + }, + { + "epoch": 0.8205128205128205, + "high_lr": 0.0008357894736842105, + "low_lr": 1.6715789473684212e-05, + "step": 312 + }, + { + "epoch": 0.8205128205128205, + "high_lr": 0.0008357894736842105, + "low_lr": 1.6715789473684212e-05, + "step": 312 + }, + { + "epoch": 0.8205128205128205, + "high_lr": 0.0008357894736842105, + "low_lr": 1.6715789473684212e-05, + "step": 312 + }, + { + "epoch": 0.8231426692965155, + "grad_norm": 0.874620795249939, + "learning_rate": 0.0008352631578947368, + "loss": 1.6371, + "step": 313 + }, + { + "epoch": 0.8231426692965155, + "high_lr": 0.0008352631578947368, + "low_lr": 1.6705263157894737e-05, + "step": 313 + }, + { + "epoch": 0.8231426692965155, + "high_lr": 0.0008352631578947368, + "low_lr": 1.6705263157894737e-05, + "step": 313 + }, + { + "epoch": 0.8231426692965155, + "high_lr": 0.0008352631578947368, + "low_lr": 1.6705263157894737e-05, + "step": 313 + }, + { + "epoch": 0.8231426692965155, + "high_lr": 0.0008352631578947368, + "low_lr": 1.6705263157894737e-05, + "step": 313 + }, + { + "epoch": 0.8231426692965155, + "high_lr": 0.0008352631578947368, + "low_lr": 1.6705263157894737e-05, + "step": 313 + }, + { + "epoch": 0.8231426692965155, + "high_lr": 0.0008352631578947368, + "low_lr": 1.6705263157894737e-05, + "step": 313 + }, + { + "epoch": 0.8231426692965155, + "high_lr": 0.0008352631578947368, + "low_lr": 1.6705263157894737e-05, + "step": 313 + }, + { + "epoch": 0.8231426692965155, + "high_lr": 0.0008352631578947368, + "low_lr": 1.6705263157894737e-05, + "step": 313 + }, + { + "epoch": 0.8257725180802103, + "grad_norm": 0.9986231923103333, + "learning_rate": 0.0008347368421052631, + "loss": 1.6738, + "step": 314 + }, + { + "epoch": 0.8257725180802103, + "high_lr": 0.0008347368421052631, + "low_lr": 1.6694736842105265e-05, + "step": 314 + }, + { + "epoch": 0.8257725180802103, + "high_lr": 0.0008347368421052631, + "low_lr": 1.6694736842105265e-05, + "step": 314 + }, + { + "epoch": 0.8257725180802103, + "high_lr": 0.0008347368421052631, + "low_lr": 1.6694736842105265e-05, + "step": 314 + }, + { + "epoch": 0.8257725180802103, + "high_lr": 0.0008347368421052631, + "low_lr": 1.6694736842105265e-05, + "step": 314 + }, + { + "epoch": 0.8257725180802103, + "high_lr": 0.0008347368421052631, + "low_lr": 1.6694736842105265e-05, + "step": 314 + }, + { + "epoch": 0.8257725180802103, + "high_lr": 0.0008347368421052631, + "low_lr": 1.6694736842105265e-05, + "step": 314 + }, + { + "epoch": 0.8257725180802103, + "high_lr": 0.0008347368421052631, + "low_lr": 1.6694736842105265e-05, + "step": 314 + }, + { + "epoch": 0.8257725180802103, + "high_lr": 0.0008347368421052631, + "low_lr": 1.6694736842105265e-05, + "step": 314 + }, + { + "epoch": 0.8284023668639053, + "grad_norm": 0.9026766419410706, + "learning_rate": 0.0008342105263157895, + "loss": 1.601, + "step": 315 + }, + { + "epoch": 0.8284023668639053, + "high_lr": 0.0008342105263157895, + "low_lr": 1.6684210526315793e-05, + "step": 315 + }, + { + "epoch": 0.8284023668639053, + "high_lr": 0.0008342105263157895, + "low_lr": 1.6684210526315793e-05, + "step": 315 + }, + { + "epoch": 0.8284023668639053, + "high_lr": 0.0008342105263157895, + "low_lr": 1.6684210526315793e-05, + "step": 315 + }, + { + "epoch": 0.8284023668639053, + "high_lr": 0.0008342105263157895, + "low_lr": 1.6684210526315793e-05, + "step": 315 + }, + { + "epoch": 0.8284023668639053, + "high_lr": 0.0008342105263157895, + "low_lr": 1.6684210526315793e-05, + "step": 315 + }, + { + "epoch": 0.8284023668639053, + "high_lr": 0.0008342105263157895, + "low_lr": 1.6684210526315793e-05, + "step": 315 + }, + { + "epoch": 0.8284023668639053, + "high_lr": 0.0008342105263157895, + "low_lr": 1.6684210526315793e-05, + "step": 315 + }, + { + "epoch": 0.8284023668639053, + "high_lr": 0.0008342105263157895, + "low_lr": 1.6684210526315793e-05, + "step": 315 + }, + { + "epoch": 0.8310322156476002, + "grad_norm": 0.8999379277229309, + "learning_rate": 0.0008336842105263158, + "loss": 1.548, + "step": 316 + }, + { + "epoch": 0.8310322156476002, + "high_lr": 0.0008336842105263158, + "low_lr": 1.6673684210526318e-05, + "step": 316 + }, + { + "epoch": 0.8310322156476002, + "high_lr": 0.0008336842105263158, + "low_lr": 1.6673684210526318e-05, + "step": 316 + }, + { + "epoch": 0.8310322156476002, + "high_lr": 0.0008336842105263158, + "low_lr": 1.6673684210526318e-05, + "step": 316 + }, + { + "epoch": 0.8310322156476002, + "high_lr": 0.0008336842105263158, + "low_lr": 1.6673684210526318e-05, + "step": 316 + }, + { + "epoch": 0.8310322156476002, + "high_lr": 0.0008336842105263158, + "low_lr": 1.6673684210526318e-05, + "step": 316 + }, + { + "epoch": 0.8310322156476002, + "high_lr": 0.0008336842105263158, + "low_lr": 1.6673684210526318e-05, + "step": 316 + }, + { + "epoch": 0.8310322156476002, + "high_lr": 0.0008336842105263158, + "low_lr": 1.6673684210526318e-05, + "step": 316 + }, + { + "epoch": 0.8310322156476002, + "high_lr": 0.0008336842105263158, + "low_lr": 1.6673684210526318e-05, + "step": 316 + }, + { + "epoch": 0.8336620644312952, + "grad_norm": 0.8758360147476196, + "learning_rate": 0.0008331578947368421, + "loss": 1.6161, + "step": 317 + }, + { + "epoch": 0.8336620644312952, + "high_lr": 0.0008331578947368421, + "low_lr": 1.6663157894736842e-05, + "step": 317 + }, + { + "epoch": 0.8336620644312952, + "high_lr": 0.0008331578947368421, + "low_lr": 1.6663157894736842e-05, + "step": 317 + }, + { + "epoch": 0.8336620644312952, + "high_lr": 0.0008331578947368421, + "low_lr": 1.6663157894736842e-05, + "step": 317 + }, + { + "epoch": 0.8336620644312952, + "high_lr": 0.0008331578947368421, + "low_lr": 1.6663157894736842e-05, + "step": 317 + }, + { + "epoch": 0.8336620644312952, + "high_lr": 0.0008331578947368421, + "low_lr": 1.6663157894736842e-05, + "step": 317 + }, + { + "epoch": 0.8336620644312952, + "high_lr": 0.0008331578947368421, + "low_lr": 1.6663157894736842e-05, + "step": 317 + }, + { + "epoch": 0.8336620644312952, + "high_lr": 0.0008331578947368421, + "low_lr": 1.6663157894736842e-05, + "step": 317 + }, + { + "epoch": 0.8336620644312952, + "high_lr": 0.0008331578947368421, + "low_lr": 1.6663157894736842e-05, + "step": 317 + }, + { + "epoch": 0.8362919132149902, + "grad_norm": 0.9446306824684143, + "learning_rate": 0.0008326315789473684, + "loss": 1.631, + "step": 318 + }, + { + "epoch": 0.8362919132149902, + "high_lr": 0.0008326315789473684, + "low_lr": 1.665263157894737e-05, + "step": 318 + }, + { + "epoch": 0.8362919132149902, + "high_lr": 0.0008326315789473684, + "low_lr": 1.665263157894737e-05, + "step": 318 + }, + { + "epoch": 0.8362919132149902, + "high_lr": 0.0008326315789473684, + "low_lr": 1.665263157894737e-05, + "step": 318 + }, + { + "epoch": 0.8362919132149902, + "high_lr": 0.0008326315789473684, + "low_lr": 1.665263157894737e-05, + "step": 318 + }, + { + "epoch": 0.8362919132149902, + "high_lr": 0.0008326315789473684, + "low_lr": 1.665263157894737e-05, + "step": 318 + }, + { + "epoch": 0.8362919132149902, + "high_lr": 0.0008326315789473684, + "low_lr": 1.665263157894737e-05, + "step": 318 + }, + { + "epoch": 0.8362919132149902, + "high_lr": 0.0008326315789473684, + "low_lr": 1.665263157894737e-05, + "step": 318 + }, + { + "epoch": 0.8362919132149902, + "high_lr": 0.0008326315789473684, + "low_lr": 1.665263157894737e-05, + "step": 318 + }, + { + "epoch": 0.8389217619986851, + "grad_norm": 0.9866498708724976, + "learning_rate": 0.0008321052631578948, + "loss": 1.615, + "step": 319 + }, + { + "epoch": 0.8389217619986851, + "high_lr": 0.0008321052631578948, + "low_lr": 1.66421052631579e-05, + "step": 319 + }, + { + "epoch": 0.8389217619986851, + "high_lr": 0.0008321052631578948, + "low_lr": 1.66421052631579e-05, + "step": 319 + }, + { + "epoch": 0.8389217619986851, + "high_lr": 0.0008321052631578948, + "low_lr": 1.66421052631579e-05, + "step": 319 + }, + { + "epoch": 0.8389217619986851, + "high_lr": 0.0008321052631578948, + "low_lr": 1.66421052631579e-05, + "step": 319 + }, + { + "epoch": 0.8389217619986851, + "high_lr": 0.0008321052631578948, + "low_lr": 1.66421052631579e-05, + "step": 319 + }, + { + "epoch": 0.8389217619986851, + "high_lr": 0.0008321052631578948, + "low_lr": 1.66421052631579e-05, + "step": 319 + }, + { + "epoch": 0.8389217619986851, + "high_lr": 0.0008321052631578948, + "low_lr": 1.66421052631579e-05, + "step": 319 + }, + { + "epoch": 0.8389217619986851, + "high_lr": 0.0008321052631578948, + "low_lr": 1.66421052631579e-05, + "step": 319 + }, + { + "epoch": 0.84155161078238, + "grad_norm": 0.9884803891181946, + "learning_rate": 0.0008315789473684212, + "loss": 1.6674, + "step": 320 + }, + { + "epoch": 0.84155161078238, + "high_lr": 0.0008315789473684212, + "low_lr": 1.6631578947368423e-05, + "step": 320 + }, + { + "epoch": 0.84155161078238, + "high_lr": 0.0008315789473684212, + "low_lr": 1.6631578947368423e-05, + "step": 320 + }, + { + "epoch": 0.84155161078238, + "high_lr": 0.0008315789473684212, + "low_lr": 1.6631578947368423e-05, + "step": 320 + }, + { + "epoch": 0.84155161078238, + "high_lr": 0.0008315789473684212, + "low_lr": 1.6631578947368423e-05, + "step": 320 + }, + { + "epoch": 0.84155161078238, + "high_lr": 0.0008315789473684212, + "low_lr": 1.6631578947368423e-05, + "step": 320 + }, + { + "epoch": 0.84155161078238, + "high_lr": 0.0008315789473684212, + "low_lr": 1.6631578947368423e-05, + "step": 320 + }, + { + "epoch": 0.84155161078238, + "high_lr": 0.0008315789473684212, + "low_lr": 1.6631578947368423e-05, + "step": 320 + }, + { + "epoch": 0.84155161078238, + "high_lr": 0.0008315789473684212, + "low_lr": 1.6631578947368423e-05, + "step": 320 + }, + { + "epoch": 0.8441814595660749, + "grad_norm": 0.8910170793533325, + "learning_rate": 0.0008310526315789474, + "loss": 1.5924, + "step": 321 + }, + { + "epoch": 0.8441814595660749, + "high_lr": 0.0008310526315789474, + "low_lr": 1.6621052631578948e-05, + "step": 321 + }, + { + "epoch": 0.8441814595660749, + "high_lr": 0.0008310526315789474, + "low_lr": 1.6621052631578948e-05, + "step": 321 + }, + { + "epoch": 0.8441814595660749, + "high_lr": 0.0008310526315789474, + "low_lr": 1.6621052631578948e-05, + "step": 321 + }, + { + "epoch": 0.8441814595660749, + "high_lr": 0.0008310526315789474, + "low_lr": 1.6621052631578948e-05, + "step": 321 + }, + { + "epoch": 0.8441814595660749, + "high_lr": 0.0008310526315789474, + "low_lr": 1.6621052631578948e-05, + "step": 321 + }, + { + "epoch": 0.8441814595660749, + "high_lr": 0.0008310526315789474, + "low_lr": 1.6621052631578948e-05, + "step": 321 + }, + { + "epoch": 0.8441814595660749, + "high_lr": 0.0008310526315789474, + "low_lr": 1.6621052631578948e-05, + "step": 321 + }, + { + "epoch": 0.8441814595660749, + "high_lr": 0.0008310526315789474, + "low_lr": 1.6621052631578948e-05, + "step": 321 + }, + { + "epoch": 0.8468113083497699, + "grad_norm": 0.9594380855560303, + "learning_rate": 0.0008305263157894737, + "loss": 1.6081, + "step": 322 + }, + { + "epoch": 0.8468113083497699, + "high_lr": 0.0008305263157894737, + "low_lr": 1.6610526315789476e-05, + "step": 322 + }, + { + "epoch": 0.8468113083497699, + "high_lr": 0.0008305263157894737, + "low_lr": 1.6610526315789476e-05, + "step": 322 + }, + { + "epoch": 0.8468113083497699, + "high_lr": 0.0008305263157894737, + "low_lr": 1.6610526315789476e-05, + "step": 322 + }, + { + "epoch": 0.8468113083497699, + "high_lr": 0.0008305263157894737, + "low_lr": 1.6610526315789476e-05, + "step": 322 + }, + { + "epoch": 0.8468113083497699, + "high_lr": 0.0008305263157894737, + "low_lr": 1.6610526315789476e-05, + "step": 322 + }, + { + "epoch": 0.8468113083497699, + "high_lr": 0.0008305263157894737, + "low_lr": 1.6610526315789476e-05, + "step": 322 + }, + { + "epoch": 0.8468113083497699, + "high_lr": 0.0008305263157894737, + "low_lr": 1.6610526315789476e-05, + "step": 322 + }, + { + "epoch": 0.8468113083497699, + "high_lr": 0.0008305263157894737, + "low_lr": 1.6610526315789476e-05, + "step": 322 + }, + { + "epoch": 0.8494411571334648, + "grad_norm": 0.9601569175720215, + "learning_rate": 0.00083, + "loss": 1.6509, + "step": 323 + }, + { + "epoch": 0.8494411571334648, + "high_lr": 0.00083, + "low_lr": 1.66e-05, + "step": 323 + }, + { + "epoch": 0.8494411571334648, + "high_lr": 0.00083, + "low_lr": 1.66e-05, + "step": 323 + }, + { + "epoch": 0.8494411571334648, + "high_lr": 0.00083, + "low_lr": 1.66e-05, + "step": 323 + }, + { + "epoch": 0.8494411571334648, + "high_lr": 0.00083, + "low_lr": 1.66e-05, + "step": 323 + }, + { + "epoch": 0.8494411571334648, + "high_lr": 0.00083, + "low_lr": 1.66e-05, + "step": 323 + }, + { + "epoch": 0.8494411571334648, + "high_lr": 0.00083, + "low_lr": 1.66e-05, + "step": 323 + }, + { + "epoch": 0.8494411571334648, + "high_lr": 0.00083, + "low_lr": 1.66e-05, + "step": 323 + }, + { + "epoch": 0.8494411571334648, + "high_lr": 0.00083, + "low_lr": 1.66e-05, + "step": 323 + }, + { + "epoch": 0.8520710059171598, + "grad_norm": 0.9653990864753723, + "learning_rate": 0.0008294736842105264, + "loss": 1.6323, + "step": 324 + }, + { + "epoch": 0.8520710059171598, + "high_lr": 0.0008294736842105264, + "low_lr": 1.658947368421053e-05, + "step": 324 + }, + { + "epoch": 0.8520710059171598, + "high_lr": 0.0008294736842105264, + "low_lr": 1.658947368421053e-05, + "step": 324 + }, + { + "epoch": 0.8520710059171598, + "high_lr": 0.0008294736842105264, + "low_lr": 1.658947368421053e-05, + "step": 324 + }, + { + "epoch": 0.8520710059171598, + "high_lr": 0.0008294736842105264, + "low_lr": 1.658947368421053e-05, + "step": 324 + }, + { + "epoch": 0.8520710059171598, + "high_lr": 0.0008294736842105264, + "low_lr": 1.658947368421053e-05, + "step": 324 + }, + { + "epoch": 0.8520710059171598, + "high_lr": 0.0008294736842105264, + "low_lr": 1.658947368421053e-05, + "step": 324 + }, + { + "epoch": 0.8520710059171598, + "high_lr": 0.0008294736842105264, + "low_lr": 1.658947368421053e-05, + "step": 324 + }, + { + "epoch": 0.8520710059171598, + "high_lr": 0.0008294736842105264, + "low_lr": 1.658947368421053e-05, + "step": 324 + }, + { + "epoch": 0.8547008547008547, + "grad_norm": 0.8928948640823364, + "learning_rate": 0.0008289473684210527, + "loss": 1.6013, + "step": 325 + }, + { + "epoch": 0.8547008547008547, + "high_lr": 0.0008289473684210527, + "low_lr": 1.6578947368421053e-05, + "step": 325 + }, + { + "epoch": 0.8547008547008547, + "high_lr": 0.0008289473684210527, + "low_lr": 1.6578947368421053e-05, + "step": 325 + }, + { + "epoch": 0.8547008547008547, + "high_lr": 0.0008289473684210527, + "low_lr": 1.6578947368421053e-05, + "step": 325 + }, + { + "epoch": 0.8547008547008547, + "high_lr": 0.0008289473684210527, + "low_lr": 1.6578947368421053e-05, + "step": 325 + }, + { + "epoch": 0.8547008547008547, + "high_lr": 0.0008289473684210527, + "low_lr": 1.6578947368421053e-05, + "step": 325 + }, + { + "epoch": 0.8547008547008547, + "high_lr": 0.0008289473684210527, + "low_lr": 1.6578947368421053e-05, + "step": 325 + }, + { + "epoch": 0.8547008547008547, + "high_lr": 0.0008289473684210527, + "low_lr": 1.6578947368421053e-05, + "step": 325 + }, + { + "epoch": 0.8547008547008547, + "high_lr": 0.0008289473684210527, + "low_lr": 1.6578947368421053e-05, + "step": 325 + }, + { + "epoch": 0.8573307034845496, + "grad_norm": 0.9746809601783752, + "learning_rate": 0.000828421052631579, + "loss": 1.6171, + "step": 326 + }, + { + "epoch": 0.8573307034845496, + "high_lr": 0.000828421052631579, + "low_lr": 1.656842105263158e-05, + "step": 326 + }, + { + "epoch": 0.8573307034845496, + "high_lr": 0.000828421052631579, + "low_lr": 1.656842105263158e-05, + "step": 326 + }, + { + "epoch": 0.8573307034845496, + "high_lr": 0.000828421052631579, + "low_lr": 1.656842105263158e-05, + "step": 326 + }, + { + "epoch": 0.8573307034845496, + "high_lr": 0.000828421052631579, + "low_lr": 1.656842105263158e-05, + "step": 326 + }, + { + "epoch": 0.8573307034845496, + "high_lr": 0.000828421052631579, + "low_lr": 1.656842105263158e-05, + "step": 326 + }, + { + "epoch": 0.8573307034845496, + "high_lr": 0.000828421052631579, + "low_lr": 1.656842105263158e-05, + "step": 326 + }, + { + "epoch": 0.8573307034845496, + "high_lr": 0.000828421052631579, + "low_lr": 1.656842105263158e-05, + "step": 326 + }, + { + "epoch": 0.8573307034845496, + "high_lr": 0.000828421052631579, + "low_lr": 1.656842105263158e-05, + "step": 326 + }, + { + "epoch": 0.8599605522682445, + "grad_norm": 0.9501665234565735, + "learning_rate": 0.0008278947368421053, + "loss": 1.6396, + "step": 327 + }, + { + "epoch": 0.8599605522682445, + "high_lr": 0.0008278947368421053, + "low_lr": 1.6557894736842106e-05, + "step": 327 + }, + { + "epoch": 0.8599605522682445, + "high_lr": 0.0008278947368421053, + "low_lr": 1.6557894736842106e-05, + "step": 327 + }, + { + "epoch": 0.8599605522682445, + "high_lr": 0.0008278947368421053, + "low_lr": 1.6557894736842106e-05, + "step": 327 + }, + { + "epoch": 0.8599605522682445, + "high_lr": 0.0008278947368421053, + "low_lr": 1.6557894736842106e-05, + "step": 327 + }, + { + "epoch": 0.8599605522682445, + "high_lr": 0.0008278947368421053, + "low_lr": 1.6557894736842106e-05, + "step": 327 + }, + { + "epoch": 0.8599605522682445, + "high_lr": 0.0008278947368421053, + "low_lr": 1.6557894736842106e-05, + "step": 327 + }, + { + "epoch": 0.8599605522682445, + "high_lr": 0.0008278947368421053, + "low_lr": 1.6557894736842106e-05, + "step": 327 + }, + { + "epoch": 0.8599605522682445, + "high_lr": 0.0008278947368421053, + "low_lr": 1.6557894736842106e-05, + "step": 327 + }, + { + "epoch": 0.8625904010519395, + "grad_norm": 2.2945706844329834, + "learning_rate": 0.0008273684210526315, + "loss": 1.6043, + "step": 328 + }, + { + "epoch": 0.8625904010519395, + "high_lr": 0.0008273684210526315, + "low_lr": 1.6547368421052634e-05, + "step": 328 + }, + { + "epoch": 0.8625904010519395, + "high_lr": 0.0008273684210526315, + "low_lr": 1.6547368421052634e-05, + "step": 328 + }, + { + "epoch": 0.8625904010519395, + "high_lr": 0.0008273684210526315, + "low_lr": 1.6547368421052634e-05, + "step": 328 + }, + { + "epoch": 0.8625904010519395, + "high_lr": 0.0008273684210526315, + "low_lr": 1.6547368421052634e-05, + "step": 328 + }, + { + "epoch": 0.8625904010519395, + "high_lr": 0.0008273684210526315, + "low_lr": 1.6547368421052634e-05, + "step": 328 + }, + { + "epoch": 0.8625904010519395, + "high_lr": 0.0008273684210526315, + "low_lr": 1.6547368421052634e-05, + "step": 328 + }, + { + "epoch": 0.8625904010519395, + "high_lr": 0.0008273684210526315, + "low_lr": 1.6547368421052634e-05, + "step": 328 + }, + { + "epoch": 0.8625904010519395, + "high_lr": 0.0008273684210526315, + "low_lr": 1.6547368421052634e-05, + "step": 328 + }, + { + "epoch": 0.8652202498356345, + "grad_norm": 0.9581923484802246, + "learning_rate": 0.0008268421052631579, + "loss": 1.615, + "step": 329 + }, + { + "epoch": 0.8652202498356345, + "high_lr": 0.0008268421052631579, + "low_lr": 1.653684210526316e-05, + "step": 329 + }, + { + "epoch": 0.8652202498356345, + "high_lr": 0.0008268421052631579, + "low_lr": 1.653684210526316e-05, + "step": 329 + }, + { + "epoch": 0.8652202498356345, + "high_lr": 0.0008268421052631579, + "low_lr": 1.653684210526316e-05, + "step": 329 + }, + { + "epoch": 0.8652202498356345, + "high_lr": 0.0008268421052631579, + "low_lr": 1.653684210526316e-05, + "step": 329 + }, + { + "epoch": 0.8652202498356345, + "high_lr": 0.0008268421052631579, + "low_lr": 1.653684210526316e-05, + "step": 329 + }, + { + "epoch": 0.8652202498356345, + "high_lr": 0.0008268421052631579, + "low_lr": 1.653684210526316e-05, + "step": 329 + }, + { + "epoch": 0.8652202498356345, + "high_lr": 0.0008268421052631579, + "low_lr": 1.653684210526316e-05, + "step": 329 + }, + { + "epoch": 0.8652202498356345, + "high_lr": 0.0008268421052631579, + "low_lr": 1.653684210526316e-05, + "step": 329 + }, + { + "epoch": 0.8678500986193294, + "grad_norm": 0.8627300262451172, + "learning_rate": 0.0008263157894736842, + "loss": 1.602, + "step": 330 + }, + { + "epoch": 0.8678500986193294, + "high_lr": 0.0008263157894736842, + "low_lr": 1.6526315789473686e-05, + "step": 330 + }, + { + "epoch": 0.8678500986193294, + "high_lr": 0.0008263157894736842, + "low_lr": 1.6526315789473686e-05, + "step": 330 + }, + { + "epoch": 0.8678500986193294, + "high_lr": 0.0008263157894736842, + "low_lr": 1.6526315789473686e-05, + "step": 330 + }, + { + "epoch": 0.8678500986193294, + "high_lr": 0.0008263157894736842, + "low_lr": 1.6526315789473686e-05, + "step": 330 + }, + { + "epoch": 0.8678500986193294, + "high_lr": 0.0008263157894736842, + "low_lr": 1.6526315789473686e-05, + "step": 330 + }, + { + "epoch": 0.8678500986193294, + "high_lr": 0.0008263157894736842, + "low_lr": 1.6526315789473686e-05, + "step": 330 + }, + { + "epoch": 0.8678500986193294, + "high_lr": 0.0008263157894736842, + "low_lr": 1.6526315789473686e-05, + "step": 330 + }, + { + "epoch": 0.8678500986193294, + "high_lr": 0.0008263157894736842, + "low_lr": 1.6526315789473686e-05, + "step": 330 + }, + { + "epoch": 0.8704799474030244, + "grad_norm": 0.9691030383110046, + "learning_rate": 0.0008257894736842105, + "loss": 1.6121, + "step": 331 + }, + { + "epoch": 0.8704799474030244, + "high_lr": 0.0008257894736842105, + "low_lr": 1.651578947368421e-05, + "step": 331 + }, + { + "epoch": 0.8704799474030244, + "high_lr": 0.0008257894736842105, + "low_lr": 1.651578947368421e-05, + "step": 331 + }, + { + "epoch": 0.8704799474030244, + "high_lr": 0.0008257894736842105, + "low_lr": 1.651578947368421e-05, + "step": 331 + }, + { + "epoch": 0.8704799474030244, + "high_lr": 0.0008257894736842105, + "low_lr": 1.651578947368421e-05, + "step": 331 + }, + { + "epoch": 0.8704799474030244, + "high_lr": 0.0008257894736842105, + "low_lr": 1.651578947368421e-05, + "step": 331 + }, + { + "epoch": 0.8704799474030244, + "high_lr": 0.0008257894736842105, + "low_lr": 1.651578947368421e-05, + "step": 331 + }, + { + "epoch": 0.8704799474030244, + "high_lr": 0.0008257894736842105, + "low_lr": 1.651578947368421e-05, + "step": 331 + }, + { + "epoch": 0.8704799474030244, + "high_lr": 0.0008257894736842105, + "low_lr": 1.651578947368421e-05, + "step": 331 + }, + { + "epoch": 0.8731097961867192, + "grad_norm": 0.9407858848571777, + "learning_rate": 0.0008252631578947368, + "loss": 1.5723, + "step": 332 + }, + { + "epoch": 0.8731097961867192, + "high_lr": 0.0008252631578947368, + "low_lr": 1.650526315789474e-05, + "step": 332 + }, + { + "epoch": 0.8731097961867192, + "high_lr": 0.0008252631578947368, + "low_lr": 1.650526315789474e-05, + "step": 332 + }, + { + "epoch": 0.8731097961867192, + "high_lr": 0.0008252631578947368, + "low_lr": 1.650526315789474e-05, + "step": 332 + }, + { + "epoch": 0.8731097961867192, + "high_lr": 0.0008252631578947368, + "low_lr": 1.650526315789474e-05, + "step": 332 + }, + { + "epoch": 0.8731097961867192, + "high_lr": 0.0008252631578947368, + "low_lr": 1.650526315789474e-05, + "step": 332 + }, + { + "epoch": 0.8731097961867192, + "high_lr": 0.0008252631578947368, + "low_lr": 1.650526315789474e-05, + "step": 332 + }, + { + "epoch": 0.8731097961867192, + "high_lr": 0.0008252631578947368, + "low_lr": 1.650526315789474e-05, + "step": 332 + }, + { + "epoch": 0.8731097961867192, + "high_lr": 0.0008252631578947368, + "low_lr": 1.650526315789474e-05, + "step": 332 + }, + { + "epoch": 0.8757396449704142, + "grad_norm": 0.8633793592453003, + "learning_rate": 0.0008247368421052632, + "loss": 1.5878, + "step": 333 + }, + { + "epoch": 0.8757396449704142, + "high_lr": 0.0008247368421052632, + "low_lr": 1.6494736842105267e-05, + "step": 333 + }, + { + "epoch": 0.8757396449704142, + "high_lr": 0.0008247368421052632, + "low_lr": 1.6494736842105267e-05, + "step": 333 + }, + { + "epoch": 0.8757396449704142, + "high_lr": 0.0008247368421052632, + "low_lr": 1.6494736842105267e-05, + "step": 333 + }, + { + "epoch": 0.8757396449704142, + "high_lr": 0.0008247368421052632, + "low_lr": 1.6494736842105267e-05, + "step": 333 + }, + { + "epoch": 0.8757396449704142, + "high_lr": 0.0008247368421052632, + "low_lr": 1.6494736842105267e-05, + "step": 333 + }, + { + "epoch": 0.8757396449704142, + "high_lr": 0.0008247368421052632, + "low_lr": 1.6494736842105267e-05, + "step": 333 + }, + { + "epoch": 0.8757396449704142, + "high_lr": 0.0008247368421052632, + "low_lr": 1.6494736842105267e-05, + "step": 333 + }, + { + "epoch": 0.8757396449704142, + "high_lr": 0.0008247368421052632, + "low_lr": 1.6494736842105267e-05, + "step": 333 + }, + { + "epoch": 0.8783694937541091, + "grad_norm": 0.9088572859764099, + "learning_rate": 0.0008242105263157895, + "loss": 1.6378, + "step": 334 + }, + { + "epoch": 0.8783694937541091, + "high_lr": 0.0008242105263157895, + "low_lr": 1.648421052631579e-05, + "step": 334 + }, + { + "epoch": 0.8783694937541091, + "high_lr": 0.0008242105263157895, + "low_lr": 1.648421052631579e-05, + "step": 334 + }, + { + "epoch": 0.8783694937541091, + "high_lr": 0.0008242105263157895, + "low_lr": 1.648421052631579e-05, + "step": 334 + }, + { + "epoch": 0.8783694937541091, + "high_lr": 0.0008242105263157895, + "low_lr": 1.648421052631579e-05, + "step": 334 + }, + { + "epoch": 0.8783694937541091, + "high_lr": 0.0008242105263157895, + "low_lr": 1.648421052631579e-05, + "step": 334 + }, + { + "epoch": 0.8783694937541091, + "high_lr": 0.0008242105263157895, + "low_lr": 1.648421052631579e-05, + "step": 334 + }, + { + "epoch": 0.8783694937541091, + "high_lr": 0.0008242105263157895, + "low_lr": 1.648421052631579e-05, + "step": 334 + }, + { + "epoch": 0.8783694937541091, + "high_lr": 0.0008242105263157895, + "low_lr": 1.648421052631579e-05, + "step": 334 + }, + { + "epoch": 0.8809993425378041, + "grad_norm": 0.8914270997047424, + "learning_rate": 0.0008236842105263158, + "loss": 1.596, + "step": 335 + }, + { + "epoch": 0.8809993425378041, + "high_lr": 0.0008236842105263158, + "low_lr": 1.6473684210526316e-05, + "step": 335 + }, + { + "epoch": 0.8809993425378041, + "high_lr": 0.0008236842105263158, + "low_lr": 1.6473684210526316e-05, + "step": 335 + }, + { + "epoch": 0.8809993425378041, + "high_lr": 0.0008236842105263158, + "low_lr": 1.6473684210526316e-05, + "step": 335 + }, + { + "epoch": 0.8809993425378041, + "high_lr": 0.0008236842105263158, + "low_lr": 1.6473684210526316e-05, + "step": 335 + }, + { + "epoch": 0.8809993425378041, + "high_lr": 0.0008236842105263158, + "low_lr": 1.6473684210526316e-05, + "step": 335 + }, + { + "epoch": 0.8809993425378041, + "high_lr": 0.0008236842105263158, + "low_lr": 1.6473684210526316e-05, + "step": 335 + }, + { + "epoch": 0.8809993425378041, + "high_lr": 0.0008236842105263158, + "low_lr": 1.6473684210526316e-05, + "step": 335 + }, + { + "epoch": 0.8809993425378041, + "high_lr": 0.0008236842105263158, + "low_lr": 1.6473684210526316e-05, + "step": 335 + }, + { + "epoch": 0.883629191321499, + "grad_norm": 0.9653371572494507, + "learning_rate": 0.0008231578947368422, + "loss": 1.6209, + "step": 336 + }, + { + "epoch": 0.883629191321499, + "high_lr": 0.0008231578947368422, + "low_lr": 1.6463157894736844e-05, + "step": 336 + }, + { + "epoch": 0.883629191321499, + "high_lr": 0.0008231578947368422, + "low_lr": 1.6463157894736844e-05, + "step": 336 + }, + { + "epoch": 0.883629191321499, + "high_lr": 0.0008231578947368422, + "low_lr": 1.6463157894736844e-05, + "step": 336 + }, + { + "epoch": 0.883629191321499, + "high_lr": 0.0008231578947368422, + "low_lr": 1.6463157894736844e-05, + "step": 336 + }, + { + "epoch": 0.883629191321499, + "high_lr": 0.0008231578947368422, + "low_lr": 1.6463157894736844e-05, + "step": 336 + }, + { + "epoch": 0.883629191321499, + "high_lr": 0.0008231578947368422, + "low_lr": 1.6463157894736844e-05, + "step": 336 + }, + { + "epoch": 0.883629191321499, + "high_lr": 0.0008231578947368422, + "low_lr": 1.6463157894736844e-05, + "step": 336 + }, + { + "epoch": 0.883629191321499, + "high_lr": 0.0008231578947368422, + "low_lr": 1.6463157894736844e-05, + "step": 336 + }, + { + "epoch": 0.886259040105194, + "grad_norm": 0.8828110098838806, + "learning_rate": 0.0008226315789473684, + "loss": 1.5831, + "step": 337 + }, + { + "epoch": 0.886259040105194, + "high_lr": 0.0008226315789473684, + "low_lr": 1.645263157894737e-05, + "step": 337 + }, + { + "epoch": 0.886259040105194, + "high_lr": 0.0008226315789473684, + "low_lr": 1.645263157894737e-05, + "step": 337 + }, + { + "epoch": 0.886259040105194, + "high_lr": 0.0008226315789473684, + "low_lr": 1.645263157894737e-05, + "step": 337 + }, + { + "epoch": 0.886259040105194, + "high_lr": 0.0008226315789473684, + "low_lr": 1.645263157894737e-05, + "step": 337 + }, + { + "epoch": 0.886259040105194, + "high_lr": 0.0008226315789473684, + "low_lr": 1.645263157894737e-05, + "step": 337 + }, + { + "epoch": 0.886259040105194, + "high_lr": 0.0008226315789473684, + "low_lr": 1.645263157894737e-05, + "step": 337 + }, + { + "epoch": 0.886259040105194, + "high_lr": 0.0008226315789473684, + "low_lr": 1.645263157894737e-05, + "step": 337 + }, + { + "epoch": 0.886259040105194, + "high_lr": 0.0008226315789473684, + "low_lr": 1.645263157894737e-05, + "step": 337 + }, + { + "epoch": 0.8888888888888888, + "grad_norm": 0.9500367045402527, + "learning_rate": 0.0008221052631578948, + "loss": 1.5705, + "step": 338 + }, + { + "epoch": 0.8888888888888888, + "high_lr": 0.0008221052631578948, + "low_lr": 1.6442105263157897e-05, + "step": 338 + }, + { + "epoch": 0.8888888888888888, + "high_lr": 0.0008221052631578948, + "low_lr": 1.6442105263157897e-05, + "step": 338 + }, + { + "epoch": 0.8888888888888888, + "high_lr": 0.0008221052631578948, + "low_lr": 1.6442105263157897e-05, + "step": 338 + }, + { + "epoch": 0.8888888888888888, + "high_lr": 0.0008221052631578948, + "low_lr": 1.6442105263157897e-05, + "step": 338 + }, + { + "epoch": 0.8888888888888888, + "high_lr": 0.0008221052631578948, + "low_lr": 1.6442105263157897e-05, + "step": 338 + }, + { + "epoch": 0.8888888888888888, + "high_lr": 0.0008221052631578948, + "low_lr": 1.6442105263157897e-05, + "step": 338 + }, + { + "epoch": 0.8888888888888888, + "high_lr": 0.0008221052631578948, + "low_lr": 1.6442105263157897e-05, + "step": 338 + }, + { + "epoch": 0.8888888888888888, + "high_lr": 0.0008221052631578948, + "low_lr": 1.6442105263157897e-05, + "step": 338 + }, + { + "epoch": 0.8915187376725838, + "grad_norm": 0.9348553419113159, + "learning_rate": 0.0008215789473684211, + "loss": 1.5876, + "step": 339 + }, + { + "epoch": 0.8915187376725838, + "high_lr": 0.0008215789473684211, + "low_lr": 1.643157894736842e-05, + "step": 339 + }, + { + "epoch": 0.8915187376725838, + "high_lr": 0.0008215789473684211, + "low_lr": 1.643157894736842e-05, + "step": 339 + }, + { + "epoch": 0.8915187376725838, + "high_lr": 0.0008215789473684211, + "low_lr": 1.643157894736842e-05, + "step": 339 + }, + { + "epoch": 0.8915187376725838, + "high_lr": 0.0008215789473684211, + "low_lr": 1.643157894736842e-05, + "step": 339 + }, + { + "epoch": 0.8915187376725838, + "high_lr": 0.0008215789473684211, + "low_lr": 1.643157894736842e-05, + "step": 339 + }, + { + "epoch": 0.8915187376725838, + "high_lr": 0.0008215789473684211, + "low_lr": 1.643157894736842e-05, + "step": 339 + }, + { + "epoch": 0.8915187376725838, + "high_lr": 0.0008215789473684211, + "low_lr": 1.643157894736842e-05, + "step": 339 + }, + { + "epoch": 0.8915187376725838, + "high_lr": 0.0008215789473684211, + "low_lr": 1.643157894736842e-05, + "step": 339 + }, + { + "epoch": 0.8941485864562788, + "grad_norm": 0.9669507741928101, + "learning_rate": 0.0008210526315789474, + "loss": 1.5939, + "step": 340 + }, + { + "epoch": 0.8941485864562788, + "high_lr": 0.0008210526315789474, + "low_lr": 1.642105263157895e-05, + "step": 340 + }, + { + "epoch": 0.8941485864562788, + "high_lr": 0.0008210526315789474, + "low_lr": 1.642105263157895e-05, + "step": 340 + }, + { + "epoch": 0.8941485864562788, + "high_lr": 0.0008210526315789474, + "low_lr": 1.642105263157895e-05, + "step": 340 + }, + { + "epoch": 0.8941485864562788, + "high_lr": 0.0008210526315789474, + "low_lr": 1.642105263157895e-05, + "step": 340 + }, + { + "epoch": 0.8941485864562788, + "high_lr": 0.0008210526315789474, + "low_lr": 1.642105263157895e-05, + "step": 340 + }, + { + "epoch": 0.8941485864562788, + "high_lr": 0.0008210526315789474, + "low_lr": 1.642105263157895e-05, + "step": 340 + }, + { + "epoch": 0.8941485864562788, + "high_lr": 0.0008210526315789474, + "low_lr": 1.642105263157895e-05, + "step": 340 + }, + { + "epoch": 0.8941485864562788, + "high_lr": 0.0008210526315789474, + "low_lr": 1.642105263157895e-05, + "step": 340 + }, + { + "epoch": 0.8967784352399737, + "grad_norm": 0.9118064641952515, + "learning_rate": 0.0008205263157894737, + "loss": 1.5395, + "step": 341 + }, + { + "epoch": 0.8967784352399737, + "high_lr": 0.0008205263157894737, + "low_lr": 1.6410526315789474e-05, + "step": 341 + }, + { + "epoch": 0.8967784352399737, + "high_lr": 0.0008205263157894737, + "low_lr": 1.6410526315789474e-05, + "step": 341 + }, + { + "epoch": 0.8967784352399737, + "high_lr": 0.0008205263157894737, + "low_lr": 1.6410526315789474e-05, + "step": 341 + }, + { + "epoch": 0.8967784352399737, + "high_lr": 0.0008205263157894737, + "low_lr": 1.6410526315789474e-05, + "step": 341 + }, + { + "epoch": 0.8967784352399737, + "high_lr": 0.0008205263157894737, + "low_lr": 1.6410526315789474e-05, + "step": 341 + }, + { + "epoch": 0.8967784352399737, + "high_lr": 0.0008205263157894737, + "low_lr": 1.6410526315789474e-05, + "step": 341 + }, + { + "epoch": 0.8967784352399737, + "high_lr": 0.0008205263157894737, + "low_lr": 1.6410526315789474e-05, + "step": 341 + }, + { + "epoch": 0.8967784352399737, + "high_lr": 0.0008205263157894737, + "low_lr": 1.6410526315789474e-05, + "step": 341 + }, + { + "epoch": 0.8994082840236687, + "grad_norm": 0.9734652638435364, + "learning_rate": 0.00082, + "loss": 1.5986, + "step": 342 + }, + { + "epoch": 0.8994082840236687, + "high_lr": 0.00082, + "low_lr": 1.64e-05, + "step": 342 + }, + { + "epoch": 0.8994082840236687, + "high_lr": 0.00082, + "low_lr": 1.64e-05, + "step": 342 + }, + { + "epoch": 0.8994082840236687, + "high_lr": 0.00082, + "low_lr": 1.64e-05, + "step": 342 + }, + { + "epoch": 0.8994082840236687, + "high_lr": 0.00082, + "low_lr": 1.64e-05, + "step": 342 + }, + { + "epoch": 0.8994082840236687, + "high_lr": 0.00082, + "low_lr": 1.64e-05, + "step": 342 + }, + { + "epoch": 0.8994082840236687, + "high_lr": 0.00082, + "low_lr": 1.64e-05, + "step": 342 + }, + { + "epoch": 0.8994082840236687, + "high_lr": 0.00082, + "low_lr": 1.64e-05, + "step": 342 + }, + { + "epoch": 0.8994082840236687, + "high_lr": 0.00082, + "low_lr": 1.64e-05, + "step": 342 + }, + { + "epoch": 0.9020381328073636, + "grad_norm": 0.9268157482147217, + "learning_rate": 0.0008194736842105264, + "loss": 1.5755, + "step": 343 + }, + { + "epoch": 0.9020381328073636, + "high_lr": 0.0008194736842105264, + "low_lr": 1.6389473684210527e-05, + "step": 343 + }, + { + "epoch": 0.9020381328073636, + "high_lr": 0.0008194736842105264, + "low_lr": 1.6389473684210527e-05, + "step": 343 + }, + { + "epoch": 0.9020381328073636, + "high_lr": 0.0008194736842105264, + "low_lr": 1.6389473684210527e-05, + "step": 343 + }, + { + "epoch": 0.9020381328073636, + "high_lr": 0.0008194736842105264, + "low_lr": 1.6389473684210527e-05, + "step": 343 + }, + { + "epoch": 0.9020381328073636, + "high_lr": 0.0008194736842105264, + "low_lr": 1.6389473684210527e-05, + "step": 343 + }, + { + "epoch": 0.9020381328073636, + "high_lr": 0.0008194736842105264, + "low_lr": 1.6389473684210527e-05, + "step": 343 + }, + { + "epoch": 0.9020381328073636, + "high_lr": 0.0008194736842105264, + "low_lr": 1.6389473684210527e-05, + "step": 343 + }, + { + "epoch": 0.9020381328073636, + "high_lr": 0.0008194736842105264, + "low_lr": 1.6389473684210527e-05, + "step": 343 + }, + { + "epoch": 0.9046679815910585, + "grad_norm": 0.9329240918159485, + "learning_rate": 0.0008189473684210527, + "loss": 1.5928, + "step": 344 + }, + { + "epoch": 0.9046679815910585, + "high_lr": 0.0008189473684210527, + "low_lr": 1.6378947368421055e-05, + "step": 344 + }, + { + "epoch": 0.9046679815910585, + "high_lr": 0.0008189473684210527, + "low_lr": 1.6378947368421055e-05, + "step": 344 + }, + { + "epoch": 0.9046679815910585, + "high_lr": 0.0008189473684210527, + "low_lr": 1.6378947368421055e-05, + "step": 344 + }, + { + "epoch": 0.9046679815910585, + "high_lr": 0.0008189473684210527, + "low_lr": 1.6378947368421055e-05, + "step": 344 + }, + { + "epoch": 0.9046679815910585, + "high_lr": 0.0008189473684210527, + "low_lr": 1.6378947368421055e-05, + "step": 344 + }, + { + "epoch": 0.9046679815910585, + "high_lr": 0.0008189473684210527, + "low_lr": 1.6378947368421055e-05, + "step": 344 + }, + { + "epoch": 0.9046679815910585, + "high_lr": 0.0008189473684210527, + "low_lr": 1.6378947368421055e-05, + "step": 344 + }, + { + "epoch": 0.9046679815910585, + "high_lr": 0.0008189473684210527, + "low_lr": 1.6378947368421055e-05, + "step": 344 + }, + { + "epoch": 0.9072978303747534, + "grad_norm": 0.9910773634910583, + "learning_rate": 0.000818421052631579, + "loss": 1.6063, + "step": 345 + }, + { + "epoch": 0.9072978303747534, + "high_lr": 0.000818421052631579, + "low_lr": 1.636842105263158e-05, + "step": 345 + }, + { + "epoch": 0.9072978303747534, + "high_lr": 0.000818421052631579, + "low_lr": 1.636842105263158e-05, + "step": 345 + }, + { + "epoch": 0.9072978303747534, + "high_lr": 0.000818421052631579, + "low_lr": 1.636842105263158e-05, + "step": 345 + }, + { + "epoch": 0.9072978303747534, + "high_lr": 0.000818421052631579, + "low_lr": 1.636842105263158e-05, + "step": 345 + }, + { + "epoch": 0.9072978303747534, + "high_lr": 0.000818421052631579, + "low_lr": 1.636842105263158e-05, + "step": 345 + }, + { + "epoch": 0.9072978303747534, + "high_lr": 0.000818421052631579, + "low_lr": 1.636842105263158e-05, + "step": 345 + }, + { + "epoch": 0.9072978303747534, + "high_lr": 0.000818421052631579, + "low_lr": 1.636842105263158e-05, + "step": 345 + }, + { + "epoch": 0.9072978303747534, + "high_lr": 0.000818421052631579, + "low_lr": 1.636842105263158e-05, + "step": 345 + }, + { + "epoch": 0.9099276791584484, + "grad_norm": 0.9147838354110718, + "learning_rate": 0.0008178947368421052, + "loss": 1.5372, + "step": 346 + }, + { + "epoch": 0.9099276791584484, + "high_lr": 0.0008178947368421052, + "low_lr": 1.6357894736842108e-05, + "step": 346 + }, + { + "epoch": 0.9099276791584484, + "high_lr": 0.0008178947368421052, + "low_lr": 1.6357894736842108e-05, + "step": 346 + }, + { + "epoch": 0.9099276791584484, + "high_lr": 0.0008178947368421052, + "low_lr": 1.6357894736842108e-05, + "step": 346 + }, + { + "epoch": 0.9099276791584484, + "high_lr": 0.0008178947368421052, + "low_lr": 1.6357894736842108e-05, + "step": 346 + }, + { + "epoch": 0.9099276791584484, + "high_lr": 0.0008178947368421052, + "low_lr": 1.6357894736842108e-05, + "step": 346 + }, + { + "epoch": 0.9099276791584484, + "high_lr": 0.0008178947368421052, + "low_lr": 1.6357894736842108e-05, + "step": 346 + }, + { + "epoch": 0.9099276791584484, + "high_lr": 0.0008178947368421052, + "low_lr": 1.6357894736842108e-05, + "step": 346 + }, + { + "epoch": 0.9099276791584484, + "high_lr": 0.0008178947368421052, + "low_lr": 1.6357894736842108e-05, + "step": 346 + }, + { + "epoch": 0.9125575279421433, + "grad_norm": 0.9866480231285095, + "learning_rate": 0.0008173684210526316, + "loss": 1.6184, + "step": 347 + }, + { + "epoch": 0.9125575279421433, + "high_lr": 0.0008173684210526316, + "low_lr": 1.6347368421052636e-05, + "step": 347 + }, + { + "epoch": 0.9125575279421433, + "high_lr": 0.0008173684210526316, + "low_lr": 1.6347368421052636e-05, + "step": 347 + }, + { + "epoch": 0.9125575279421433, + "high_lr": 0.0008173684210526316, + "low_lr": 1.6347368421052636e-05, + "step": 347 + }, + { + "epoch": 0.9125575279421433, + "high_lr": 0.0008173684210526316, + "low_lr": 1.6347368421052636e-05, + "step": 347 + }, + { + "epoch": 0.9125575279421433, + "high_lr": 0.0008173684210526316, + "low_lr": 1.6347368421052636e-05, + "step": 347 + }, + { + "epoch": 0.9125575279421433, + "high_lr": 0.0008173684210526316, + "low_lr": 1.6347368421052636e-05, + "step": 347 + }, + { + "epoch": 0.9125575279421433, + "high_lr": 0.0008173684210526316, + "low_lr": 1.6347368421052636e-05, + "step": 347 + }, + { + "epoch": 0.9125575279421433, + "high_lr": 0.0008173684210526316, + "low_lr": 1.6347368421052636e-05, + "step": 347 + }, + { + "epoch": 0.9151873767258383, + "grad_norm": 0.9890763759613037, + "learning_rate": 0.0008168421052631579, + "loss": 1.6494, + "step": 348 + }, + { + "epoch": 0.9151873767258383, + "high_lr": 0.0008168421052631579, + "low_lr": 1.633684210526316e-05, + "step": 348 + }, + { + "epoch": 0.9151873767258383, + "high_lr": 0.0008168421052631579, + "low_lr": 1.633684210526316e-05, + "step": 348 + }, + { + "epoch": 0.9151873767258383, + "high_lr": 0.0008168421052631579, + "low_lr": 1.633684210526316e-05, + "step": 348 + }, + { + "epoch": 0.9151873767258383, + "high_lr": 0.0008168421052631579, + "low_lr": 1.633684210526316e-05, + "step": 348 + }, + { + "epoch": 0.9151873767258383, + "high_lr": 0.0008168421052631579, + "low_lr": 1.633684210526316e-05, + "step": 348 + }, + { + "epoch": 0.9151873767258383, + "high_lr": 0.0008168421052631579, + "low_lr": 1.633684210526316e-05, + "step": 348 + }, + { + "epoch": 0.9151873767258383, + "high_lr": 0.0008168421052631579, + "low_lr": 1.633684210526316e-05, + "step": 348 + }, + { + "epoch": 0.9151873767258383, + "high_lr": 0.0008168421052631579, + "low_lr": 1.633684210526316e-05, + "step": 348 + }, + { + "epoch": 0.9178172255095332, + "grad_norm": 1.0507274866104126, + "learning_rate": 0.0008163157894736842, + "loss": 1.5947, + "step": 349 + }, + { + "epoch": 0.9178172255095332, + "high_lr": 0.0008163157894736842, + "low_lr": 1.6326315789473685e-05, + "step": 349 + }, + { + "epoch": 0.9178172255095332, + "high_lr": 0.0008163157894736842, + "low_lr": 1.6326315789473685e-05, + "step": 349 + }, + { + "epoch": 0.9178172255095332, + "high_lr": 0.0008163157894736842, + "low_lr": 1.6326315789473685e-05, + "step": 349 + }, + { + "epoch": 0.9178172255095332, + "high_lr": 0.0008163157894736842, + "low_lr": 1.6326315789473685e-05, + "step": 349 + }, + { + "epoch": 0.9178172255095332, + "high_lr": 0.0008163157894736842, + "low_lr": 1.6326315789473685e-05, + "step": 349 + }, + { + "epoch": 0.9178172255095332, + "high_lr": 0.0008163157894736842, + "low_lr": 1.6326315789473685e-05, + "step": 349 + }, + { + "epoch": 0.9178172255095332, + "high_lr": 0.0008163157894736842, + "low_lr": 1.6326315789473685e-05, + "step": 349 + }, + { + "epoch": 0.9178172255095332, + "high_lr": 0.0008163157894736842, + "low_lr": 1.6326315789473685e-05, + "step": 349 + }, + { + "epoch": 0.9204470742932281, + "grad_norm": 0.9223301410675049, + "learning_rate": 0.0008157894736842105, + "loss": 1.5522, + "step": 350 + }, + { + "epoch": 0.9204470742932281, + "high_lr": 0.0008157894736842105, + "low_lr": 1.6315789473684213e-05, + "step": 350 + }, + { + "epoch": 0.9204470742932281, + "high_lr": 0.0008157894736842105, + "low_lr": 1.6315789473684213e-05, + "step": 350 + }, + { + "epoch": 0.9204470742932281, + "high_lr": 0.0008157894736842105, + "low_lr": 1.6315789473684213e-05, + "step": 350 + }, + { + "epoch": 0.9204470742932281, + "high_lr": 0.0008157894736842105, + "low_lr": 1.6315789473684213e-05, + "step": 350 + }, + { + "epoch": 0.9204470742932281, + "high_lr": 0.0008157894736842105, + "low_lr": 1.6315789473684213e-05, + "step": 350 + }, + { + "epoch": 0.9204470742932281, + "high_lr": 0.0008157894736842105, + "low_lr": 1.6315789473684213e-05, + "step": 350 + }, + { + "epoch": 0.9204470742932281, + "high_lr": 0.0008157894736842105, + "low_lr": 1.6315789473684213e-05, + "step": 350 + }, + { + "epoch": 0.9204470742932281, + "high_lr": 0.0008157894736842105, + "low_lr": 1.6315789473684213e-05, + "step": 350 + }, + { + "epoch": 0.9230769230769231, + "grad_norm": 0.9552976489067078, + "learning_rate": 0.0008152631578947368, + "loss": 1.5934, + "step": 351 + }, + { + "epoch": 0.9230769230769231, + "high_lr": 0.0008152631578947368, + "low_lr": 1.6305263157894737e-05, + "step": 351 + }, + { + "epoch": 0.9230769230769231, + "high_lr": 0.0008152631578947368, + "low_lr": 1.6305263157894737e-05, + "step": 351 + }, + { + "epoch": 0.9230769230769231, + "high_lr": 0.0008152631578947368, + "low_lr": 1.6305263157894737e-05, + "step": 351 + }, + { + "epoch": 0.9230769230769231, + "high_lr": 0.0008152631578947368, + "low_lr": 1.6305263157894737e-05, + "step": 351 + }, + { + "epoch": 0.9230769230769231, + "high_lr": 0.0008152631578947368, + "low_lr": 1.6305263157894737e-05, + "step": 351 + }, + { + "epoch": 0.9230769230769231, + "high_lr": 0.0008152631578947368, + "low_lr": 1.6305263157894737e-05, + "step": 351 + }, + { + "epoch": 0.9230769230769231, + "high_lr": 0.0008152631578947368, + "low_lr": 1.6305263157894737e-05, + "step": 351 + }, + { + "epoch": 0.9230769230769231, + "high_lr": 0.0008152631578947368, + "low_lr": 1.6305263157894737e-05, + "step": 351 + }, + { + "epoch": 0.925706771860618, + "grad_norm": 0.9349220991134644, + "learning_rate": 0.0008147368421052633, + "loss": 1.6361, + "step": 352 + }, + { + "epoch": 0.925706771860618, + "high_lr": 0.0008147368421052633, + "low_lr": 1.6294736842105265e-05, + "step": 352 + }, + { + "epoch": 0.925706771860618, + "high_lr": 0.0008147368421052633, + "low_lr": 1.6294736842105265e-05, + "step": 352 + }, + { + "epoch": 0.925706771860618, + "high_lr": 0.0008147368421052633, + "low_lr": 1.6294736842105265e-05, + "step": 352 + }, + { + "epoch": 0.925706771860618, + "high_lr": 0.0008147368421052633, + "low_lr": 1.6294736842105265e-05, + "step": 352 + }, + { + "epoch": 0.925706771860618, + "high_lr": 0.0008147368421052633, + "low_lr": 1.6294736842105265e-05, + "step": 352 + }, + { + "epoch": 0.925706771860618, + "high_lr": 0.0008147368421052633, + "low_lr": 1.6294736842105265e-05, + "step": 352 + }, + { + "epoch": 0.925706771860618, + "high_lr": 0.0008147368421052633, + "low_lr": 1.6294736842105265e-05, + "step": 352 + }, + { + "epoch": 0.925706771860618, + "high_lr": 0.0008147368421052633, + "low_lr": 1.6294736842105265e-05, + "step": 352 + }, + { + "epoch": 0.928336620644313, + "grad_norm": 0.9589920043945312, + "learning_rate": 0.0008142105263157896, + "loss": 1.6333, + "step": 353 + }, + { + "epoch": 0.928336620644313, + "high_lr": 0.0008142105263157896, + "low_lr": 1.628421052631579e-05, + "step": 353 + }, + { + "epoch": 0.928336620644313, + "high_lr": 0.0008142105263157896, + "low_lr": 1.628421052631579e-05, + "step": 353 + }, + { + "epoch": 0.928336620644313, + "high_lr": 0.0008142105263157896, + "low_lr": 1.628421052631579e-05, + "step": 353 + }, + { + "epoch": 0.928336620644313, + "high_lr": 0.0008142105263157896, + "low_lr": 1.628421052631579e-05, + "step": 353 + }, + { + "epoch": 0.928336620644313, + "high_lr": 0.0008142105263157896, + "low_lr": 1.628421052631579e-05, + "step": 353 + }, + { + "epoch": 0.928336620644313, + "high_lr": 0.0008142105263157896, + "low_lr": 1.628421052631579e-05, + "step": 353 + }, + { + "epoch": 0.928336620644313, + "high_lr": 0.0008142105263157896, + "low_lr": 1.628421052631579e-05, + "step": 353 + }, + { + "epoch": 0.928336620644313, + "high_lr": 0.0008142105263157896, + "low_lr": 1.628421052631579e-05, + "step": 353 + }, + { + "epoch": 0.9309664694280079, + "grad_norm": 0.9250889420509338, + "learning_rate": 0.0008136842105263158, + "loss": 1.6012, + "step": 354 + }, + { + "epoch": 0.9309664694280079, + "high_lr": 0.0008136842105263158, + "low_lr": 1.6273684210526318e-05, + "step": 354 + }, + { + "epoch": 0.9309664694280079, + "high_lr": 0.0008136842105263158, + "low_lr": 1.6273684210526318e-05, + "step": 354 + }, + { + "epoch": 0.9309664694280079, + "high_lr": 0.0008136842105263158, + "low_lr": 1.6273684210526318e-05, + "step": 354 + }, + { + "epoch": 0.9309664694280079, + "high_lr": 0.0008136842105263158, + "low_lr": 1.6273684210526318e-05, + "step": 354 + }, + { + "epoch": 0.9309664694280079, + "high_lr": 0.0008136842105263158, + "low_lr": 1.6273684210526318e-05, + "step": 354 + }, + { + "epoch": 0.9309664694280079, + "high_lr": 0.0008136842105263158, + "low_lr": 1.6273684210526318e-05, + "step": 354 + }, + { + "epoch": 0.9309664694280079, + "high_lr": 0.0008136842105263158, + "low_lr": 1.6273684210526318e-05, + "step": 354 + }, + { + "epoch": 0.9309664694280079, + "high_lr": 0.0008136842105263158, + "low_lr": 1.6273684210526318e-05, + "step": 354 + }, + { + "epoch": 0.9335963182117029, + "grad_norm": 0.9966514706611633, + "learning_rate": 0.0008131578947368421, + "loss": 1.618, + "step": 355 + }, + { + "epoch": 0.9335963182117029, + "high_lr": 0.0008131578947368421, + "low_lr": 1.6263157894736843e-05, + "step": 355 + }, + { + "epoch": 0.9335963182117029, + "high_lr": 0.0008131578947368421, + "low_lr": 1.6263157894736843e-05, + "step": 355 + }, + { + "epoch": 0.9335963182117029, + "high_lr": 0.0008131578947368421, + "low_lr": 1.6263157894736843e-05, + "step": 355 + }, + { + "epoch": 0.9335963182117029, + "high_lr": 0.0008131578947368421, + "low_lr": 1.6263157894736843e-05, + "step": 355 + }, + { + "epoch": 0.9335963182117029, + "high_lr": 0.0008131578947368421, + "low_lr": 1.6263157894736843e-05, + "step": 355 + }, + { + "epoch": 0.9335963182117029, + "high_lr": 0.0008131578947368421, + "low_lr": 1.6263157894736843e-05, + "step": 355 + }, + { + "epoch": 0.9335963182117029, + "high_lr": 0.0008131578947368421, + "low_lr": 1.6263157894736843e-05, + "step": 355 + }, + { + "epoch": 0.9335963182117029, + "high_lr": 0.0008131578947368421, + "low_lr": 1.6263157894736843e-05, + "step": 355 + }, + { + "epoch": 0.9362261669953977, + "grad_norm": 0.9778467416763306, + "learning_rate": 0.0008126315789473684, + "loss": 1.5727, + "step": 356 + }, + { + "epoch": 0.9362261669953977, + "high_lr": 0.0008126315789473684, + "low_lr": 1.6252631578947367e-05, + "step": 356 + }, + { + "epoch": 0.9362261669953977, + "high_lr": 0.0008126315789473684, + "low_lr": 1.6252631578947367e-05, + "step": 356 + }, + { + "epoch": 0.9362261669953977, + "high_lr": 0.0008126315789473684, + "low_lr": 1.6252631578947367e-05, + "step": 356 + }, + { + "epoch": 0.9362261669953977, + "high_lr": 0.0008126315789473684, + "low_lr": 1.6252631578947367e-05, + "step": 356 + }, + { + "epoch": 0.9362261669953977, + "high_lr": 0.0008126315789473684, + "low_lr": 1.6252631578947367e-05, + "step": 356 + }, + { + "epoch": 0.9362261669953977, + "high_lr": 0.0008126315789473684, + "low_lr": 1.6252631578947367e-05, + "step": 356 + }, + { + "epoch": 0.9362261669953977, + "high_lr": 0.0008126315789473684, + "low_lr": 1.6252631578947367e-05, + "step": 356 + }, + { + "epoch": 0.9362261669953977, + "high_lr": 0.0008126315789473684, + "low_lr": 1.6252631578947367e-05, + "step": 356 + }, + { + "epoch": 0.9388560157790927, + "grad_norm": 0.9976954460144043, + "learning_rate": 0.0008121052631578948, + "loss": 1.6105, + "step": 357 + }, + { + "epoch": 0.9388560157790927, + "high_lr": 0.0008121052631578948, + "low_lr": 1.6242105263157895e-05, + "step": 357 + }, + { + "epoch": 0.9388560157790927, + "high_lr": 0.0008121052631578948, + "low_lr": 1.6242105263157895e-05, + "step": 357 + }, + { + "epoch": 0.9388560157790927, + "high_lr": 0.0008121052631578948, + "low_lr": 1.6242105263157895e-05, + "step": 357 + }, + { + "epoch": 0.9388560157790927, + "high_lr": 0.0008121052631578948, + "low_lr": 1.6242105263157895e-05, + "step": 357 + }, + { + "epoch": 0.9388560157790927, + "high_lr": 0.0008121052631578948, + "low_lr": 1.6242105263157895e-05, + "step": 357 + }, + { + "epoch": 0.9388560157790927, + "high_lr": 0.0008121052631578948, + "low_lr": 1.6242105263157895e-05, + "step": 357 + }, + { + "epoch": 0.9388560157790927, + "high_lr": 0.0008121052631578948, + "low_lr": 1.6242105263157895e-05, + "step": 357 + }, + { + "epoch": 0.9388560157790927, + "high_lr": 0.0008121052631578948, + "low_lr": 1.6242105263157895e-05, + "step": 357 + }, + { + "epoch": 0.9414858645627876, + "grad_norm": 0.9831271767616272, + "learning_rate": 0.0008115789473684211, + "loss": 1.5965, + "step": 358 + }, + { + "epoch": 0.9414858645627876, + "high_lr": 0.0008115789473684211, + "low_lr": 1.6231578947368423e-05, + "step": 358 + }, + { + "epoch": 0.9414858645627876, + "high_lr": 0.0008115789473684211, + "low_lr": 1.6231578947368423e-05, + "step": 358 + }, + { + "epoch": 0.9414858645627876, + "high_lr": 0.0008115789473684211, + "low_lr": 1.6231578947368423e-05, + "step": 358 + }, + { + "epoch": 0.9414858645627876, + "high_lr": 0.0008115789473684211, + "low_lr": 1.6231578947368423e-05, + "step": 358 + }, + { + "epoch": 0.9414858645627876, + "high_lr": 0.0008115789473684211, + "low_lr": 1.6231578947368423e-05, + "step": 358 + }, + { + "epoch": 0.9414858645627876, + "high_lr": 0.0008115789473684211, + "low_lr": 1.6231578947368423e-05, + "step": 358 + }, + { + "epoch": 0.9414858645627876, + "high_lr": 0.0008115789473684211, + "low_lr": 1.6231578947368423e-05, + "step": 358 + }, + { + "epoch": 0.9414858645627876, + "high_lr": 0.0008115789473684211, + "low_lr": 1.6231578947368423e-05, + "step": 358 + }, + { + "epoch": 0.9441157133464826, + "grad_norm": 0.9419095516204834, + "learning_rate": 0.0008110526315789474, + "loss": 1.6456, + "step": 359 + }, + { + "epoch": 0.9441157133464826, + "high_lr": 0.0008110526315789474, + "low_lr": 1.6221052631578948e-05, + "step": 359 + }, + { + "epoch": 0.9441157133464826, + "high_lr": 0.0008110526315789474, + "low_lr": 1.6221052631578948e-05, + "step": 359 + }, + { + "epoch": 0.9441157133464826, + "high_lr": 0.0008110526315789474, + "low_lr": 1.6221052631578948e-05, + "step": 359 + }, + { + "epoch": 0.9441157133464826, + "high_lr": 0.0008110526315789474, + "low_lr": 1.6221052631578948e-05, + "step": 359 + }, + { + "epoch": 0.9441157133464826, + "high_lr": 0.0008110526315789474, + "low_lr": 1.6221052631578948e-05, + "step": 359 + }, + { + "epoch": 0.9441157133464826, + "high_lr": 0.0008110526315789474, + "low_lr": 1.6221052631578948e-05, + "step": 359 + }, + { + "epoch": 0.9441157133464826, + "high_lr": 0.0008110526315789474, + "low_lr": 1.6221052631578948e-05, + "step": 359 + }, + { + "epoch": 0.9441157133464826, + "high_lr": 0.0008110526315789474, + "low_lr": 1.6221052631578948e-05, + "step": 359 + }, + { + "epoch": 0.9467455621301775, + "grad_norm": 1.0125757455825806, + "learning_rate": 0.0008105263157894737, + "loss": 1.6393, + "step": 360 + }, + { + "epoch": 0.9467455621301775, + "high_lr": 0.0008105263157894737, + "low_lr": 1.6210526315789473e-05, + "step": 360 + }, + { + "epoch": 0.9467455621301775, + "high_lr": 0.0008105263157894737, + "low_lr": 1.6210526315789473e-05, + "step": 360 + }, + { + "epoch": 0.9467455621301775, + "high_lr": 0.0008105263157894737, + "low_lr": 1.6210526315789473e-05, + "step": 360 + }, + { + "epoch": 0.9467455621301775, + "high_lr": 0.0008105263157894737, + "low_lr": 1.6210526315789473e-05, + "step": 360 + }, + { + "epoch": 0.9467455621301775, + "high_lr": 0.0008105263157894737, + "low_lr": 1.6210526315789473e-05, + "step": 360 + }, + { + "epoch": 0.9467455621301775, + "high_lr": 0.0008105263157894737, + "low_lr": 1.6210526315789473e-05, + "step": 360 + }, + { + "epoch": 0.9467455621301775, + "high_lr": 0.0008105263157894737, + "low_lr": 1.6210526315789473e-05, + "step": 360 + }, + { + "epoch": 0.9467455621301775, + "high_lr": 0.0008105263157894737, + "low_lr": 1.6210526315789473e-05, + "step": 360 + }, + { + "epoch": 0.9493754109138725, + "grad_norm": 0.9917994141578674, + "learning_rate": 0.0008100000000000001, + "loss": 1.6001, + "step": 361 + }, + { + "epoch": 0.9493754109138725, + "high_lr": 0.0008100000000000001, + "low_lr": 1.62e-05, + "step": 361 + }, + { + "epoch": 0.9493754109138725, + "high_lr": 0.0008100000000000001, + "low_lr": 1.62e-05, + "step": 361 + }, + { + "epoch": 0.9493754109138725, + "high_lr": 0.0008100000000000001, + "low_lr": 1.62e-05, + "step": 361 + }, + { + "epoch": 0.9493754109138725, + "high_lr": 0.0008100000000000001, + "low_lr": 1.62e-05, + "step": 361 + }, + { + "epoch": 0.9493754109138725, + "high_lr": 0.0008100000000000001, + "low_lr": 1.62e-05, + "step": 361 + }, + { + "epoch": 0.9493754109138725, + "high_lr": 0.0008100000000000001, + "low_lr": 1.62e-05, + "step": 361 + }, + { + "epoch": 0.9493754109138725, + "high_lr": 0.0008100000000000001, + "low_lr": 1.62e-05, + "step": 361 + }, + { + "epoch": 0.9493754109138725, + "high_lr": 0.0008100000000000001, + "low_lr": 1.62e-05, + "step": 361 + }, + { + "epoch": 0.9520052596975674, + "grad_norm": 0.9051048159599304, + "learning_rate": 0.0008094736842105264, + "loss": 1.5905, + "step": 362 + }, + { + "epoch": 0.9520052596975674, + "high_lr": 0.0008094736842105264, + "low_lr": 1.618947368421053e-05, + "step": 362 + }, + { + "epoch": 0.9520052596975674, + "high_lr": 0.0008094736842105264, + "low_lr": 1.618947368421053e-05, + "step": 362 + }, + { + "epoch": 0.9520052596975674, + "high_lr": 0.0008094736842105264, + "low_lr": 1.618947368421053e-05, + "step": 362 + }, + { + "epoch": 0.9520052596975674, + "high_lr": 0.0008094736842105264, + "low_lr": 1.618947368421053e-05, + "step": 362 + }, + { + "epoch": 0.9520052596975674, + "high_lr": 0.0008094736842105264, + "low_lr": 1.618947368421053e-05, + "step": 362 + }, + { + "epoch": 0.9520052596975674, + "high_lr": 0.0008094736842105264, + "low_lr": 1.618947368421053e-05, + "step": 362 + }, + { + "epoch": 0.9520052596975674, + "high_lr": 0.0008094736842105264, + "low_lr": 1.618947368421053e-05, + "step": 362 + }, + { + "epoch": 0.9520052596975674, + "high_lr": 0.0008094736842105264, + "low_lr": 1.618947368421053e-05, + "step": 362 + }, + { + "epoch": 0.9546351084812623, + "grad_norm": 0.9285058975219727, + "learning_rate": 0.0008089473684210526, + "loss": 1.5778, + "step": 363 + }, + { + "epoch": 0.9546351084812623, + "high_lr": 0.0008089473684210526, + "low_lr": 1.6178947368421053e-05, + "step": 363 + }, + { + "epoch": 0.9546351084812623, + "high_lr": 0.0008089473684210526, + "low_lr": 1.6178947368421053e-05, + "step": 363 + }, + { + "epoch": 0.9546351084812623, + "high_lr": 0.0008089473684210526, + "low_lr": 1.6178947368421053e-05, + "step": 363 + }, + { + "epoch": 0.9546351084812623, + "high_lr": 0.0008089473684210526, + "low_lr": 1.6178947368421053e-05, + "step": 363 + }, + { + "epoch": 0.9546351084812623, + "high_lr": 0.0008089473684210526, + "low_lr": 1.6178947368421053e-05, + "step": 363 + }, + { + "epoch": 0.9546351084812623, + "high_lr": 0.0008089473684210526, + "low_lr": 1.6178947368421053e-05, + "step": 363 + }, + { + "epoch": 0.9546351084812623, + "high_lr": 0.0008089473684210526, + "low_lr": 1.6178947368421053e-05, + "step": 363 + }, + { + "epoch": 0.9546351084812623, + "high_lr": 0.0008089473684210526, + "low_lr": 1.6178947368421053e-05, + "step": 363 + }, + { + "epoch": 0.9572649572649573, + "grad_norm": 0.9660423994064331, + "learning_rate": 0.0008084210526315789, + "loss": 1.5676, + "step": 364 + }, + { + "epoch": 0.9572649572649573, + "high_lr": 0.0008084210526315789, + "low_lr": 1.616842105263158e-05, + "step": 364 + }, + { + "epoch": 0.9572649572649573, + "high_lr": 0.0008084210526315789, + "low_lr": 1.616842105263158e-05, + "step": 364 + }, + { + "epoch": 0.9572649572649573, + "high_lr": 0.0008084210526315789, + "low_lr": 1.616842105263158e-05, + "step": 364 + }, + { + "epoch": 0.9572649572649573, + "high_lr": 0.0008084210526315789, + "low_lr": 1.616842105263158e-05, + "step": 364 + }, + { + "epoch": 0.9572649572649573, + "high_lr": 0.0008084210526315789, + "low_lr": 1.616842105263158e-05, + "step": 364 + }, + { + "epoch": 0.9572649572649573, + "high_lr": 0.0008084210526315789, + "low_lr": 1.616842105263158e-05, + "step": 364 + }, + { + "epoch": 0.9572649572649573, + "high_lr": 0.0008084210526315789, + "low_lr": 1.616842105263158e-05, + "step": 364 + }, + { + "epoch": 0.9572649572649573, + "high_lr": 0.0008084210526315789, + "low_lr": 1.616842105263158e-05, + "step": 364 + }, + { + "epoch": 0.9598948060486522, + "grad_norm": 0.9621961116790771, + "learning_rate": 0.0008078947368421052, + "loss": 1.5944, + "step": 365 + }, + { + "epoch": 0.9598948060486522, + "high_lr": 0.0008078947368421052, + "low_lr": 1.6157894736842106e-05, + "step": 365 + }, + { + "epoch": 0.9598948060486522, + "high_lr": 0.0008078947368421052, + "low_lr": 1.6157894736842106e-05, + "step": 365 + }, + { + "epoch": 0.9598948060486522, + "high_lr": 0.0008078947368421052, + "low_lr": 1.6157894736842106e-05, + "step": 365 + }, + { + "epoch": 0.9598948060486522, + "high_lr": 0.0008078947368421052, + "low_lr": 1.6157894736842106e-05, + "step": 365 + }, + { + "epoch": 0.9598948060486522, + "high_lr": 0.0008078947368421052, + "low_lr": 1.6157894736842106e-05, + "step": 365 + }, + { + "epoch": 0.9598948060486522, + "high_lr": 0.0008078947368421052, + "low_lr": 1.6157894736842106e-05, + "step": 365 + }, + { + "epoch": 0.9598948060486522, + "high_lr": 0.0008078947368421052, + "low_lr": 1.6157894736842106e-05, + "step": 365 + }, + { + "epoch": 0.9598948060486522, + "high_lr": 0.0008078947368421052, + "low_lr": 1.6157894736842106e-05, + "step": 365 + }, + { + "epoch": 0.9625246548323472, + "grad_norm": 0.8977233171463013, + "learning_rate": 0.0008073684210526316, + "loss": 1.6, + "step": 366 + }, + { + "epoch": 0.9625246548323472, + "high_lr": 0.0008073684210526316, + "low_lr": 1.6147368421052634e-05, + "step": 366 + }, + { + "epoch": 0.9625246548323472, + "high_lr": 0.0008073684210526316, + "low_lr": 1.6147368421052634e-05, + "step": 366 + }, + { + "epoch": 0.9625246548323472, + "high_lr": 0.0008073684210526316, + "low_lr": 1.6147368421052634e-05, + "step": 366 + }, + { + "epoch": 0.9625246548323472, + "high_lr": 0.0008073684210526316, + "low_lr": 1.6147368421052634e-05, + "step": 366 + }, + { + "epoch": 0.9625246548323472, + "high_lr": 0.0008073684210526316, + "low_lr": 1.6147368421052634e-05, + "step": 366 + }, + { + "epoch": 0.9625246548323472, + "high_lr": 0.0008073684210526316, + "low_lr": 1.6147368421052634e-05, + "step": 366 + }, + { + "epoch": 0.9625246548323472, + "high_lr": 0.0008073684210526316, + "low_lr": 1.6147368421052634e-05, + "step": 366 + }, + { + "epoch": 0.9625246548323472, + "high_lr": 0.0008073684210526316, + "low_lr": 1.6147368421052634e-05, + "step": 366 + }, + { + "epoch": 0.965154503616042, + "grad_norm": 0.9885714650154114, + "learning_rate": 0.0008068421052631579, + "loss": 1.6135, + "step": 367 + }, + { + "epoch": 0.965154503616042, + "high_lr": 0.0008068421052631579, + "low_lr": 1.613684210526316e-05, + "step": 367 + }, + { + "epoch": 0.965154503616042, + "high_lr": 0.0008068421052631579, + "low_lr": 1.613684210526316e-05, + "step": 367 + }, + { + "epoch": 0.965154503616042, + "high_lr": 0.0008068421052631579, + "low_lr": 1.613684210526316e-05, + "step": 367 + }, + { + "epoch": 0.965154503616042, + "high_lr": 0.0008068421052631579, + "low_lr": 1.613684210526316e-05, + "step": 367 + }, + { + "epoch": 0.965154503616042, + "high_lr": 0.0008068421052631579, + "low_lr": 1.613684210526316e-05, + "step": 367 + }, + { + "epoch": 0.965154503616042, + "high_lr": 0.0008068421052631579, + "low_lr": 1.613684210526316e-05, + "step": 367 + }, + { + "epoch": 0.965154503616042, + "high_lr": 0.0008068421052631579, + "low_lr": 1.613684210526316e-05, + "step": 367 + }, + { + "epoch": 0.965154503616042, + "high_lr": 0.0008068421052631579, + "low_lr": 1.613684210526316e-05, + "step": 367 + }, + { + "epoch": 0.967784352399737, + "grad_norm": 1.0077567100524902, + "learning_rate": 0.0008063157894736842, + "loss": 1.6205, + "step": 368 + }, + { + "epoch": 0.967784352399737, + "high_lr": 0.0008063157894736842, + "low_lr": 1.6126315789473687e-05, + "step": 368 + }, + { + "epoch": 0.967784352399737, + "high_lr": 0.0008063157894736842, + "low_lr": 1.6126315789473687e-05, + "step": 368 + }, + { + "epoch": 0.967784352399737, + "high_lr": 0.0008063157894736842, + "low_lr": 1.6126315789473687e-05, + "step": 368 + }, + { + "epoch": 0.967784352399737, + "high_lr": 0.0008063157894736842, + "low_lr": 1.6126315789473687e-05, + "step": 368 + }, + { + "epoch": 0.967784352399737, + "high_lr": 0.0008063157894736842, + "low_lr": 1.6126315789473687e-05, + "step": 368 + }, + { + "epoch": 0.967784352399737, + "high_lr": 0.0008063157894736842, + "low_lr": 1.6126315789473687e-05, + "step": 368 + }, + { + "epoch": 0.967784352399737, + "high_lr": 0.0008063157894736842, + "low_lr": 1.6126315789473687e-05, + "step": 368 + }, + { + "epoch": 0.967784352399737, + "high_lr": 0.0008063157894736842, + "low_lr": 1.6126315789473687e-05, + "step": 368 + }, + { + "epoch": 0.9704142011834319, + "grad_norm": 0.9423245787620544, + "learning_rate": 0.0008057894736842106, + "loss": 1.5613, + "step": 369 + }, + { + "epoch": 0.9704142011834319, + "high_lr": 0.0008057894736842106, + "low_lr": 1.611578947368421e-05, + "step": 369 + }, + { + "epoch": 0.9704142011834319, + "high_lr": 0.0008057894736842106, + "low_lr": 1.611578947368421e-05, + "step": 369 + }, + { + "epoch": 0.9704142011834319, + "high_lr": 0.0008057894736842106, + "low_lr": 1.611578947368421e-05, + "step": 369 + }, + { + "epoch": 0.9704142011834319, + "high_lr": 0.0008057894736842106, + "low_lr": 1.611578947368421e-05, + "step": 369 + }, + { + "epoch": 0.9704142011834319, + "high_lr": 0.0008057894736842106, + "low_lr": 1.611578947368421e-05, + "step": 369 + }, + { + "epoch": 0.9704142011834319, + "high_lr": 0.0008057894736842106, + "low_lr": 1.611578947368421e-05, + "step": 369 + }, + { + "epoch": 0.9704142011834319, + "high_lr": 0.0008057894736842106, + "low_lr": 1.611578947368421e-05, + "step": 369 + }, + { + "epoch": 0.9704142011834319, + "high_lr": 0.0008057894736842106, + "low_lr": 1.611578947368421e-05, + "step": 369 + }, + { + "epoch": 0.9730440499671269, + "grad_norm": 0.967456579208374, + "learning_rate": 0.0008052631578947369, + "loss": 1.5729, + "step": 370 + }, + { + "epoch": 0.9730440499671269, + "high_lr": 0.0008052631578947369, + "low_lr": 1.6105263157894736e-05, + "step": 370 + }, + { + "epoch": 0.9730440499671269, + "high_lr": 0.0008052631578947369, + "low_lr": 1.6105263157894736e-05, + "step": 370 + }, + { + "epoch": 0.9730440499671269, + "high_lr": 0.0008052631578947369, + "low_lr": 1.6105263157894736e-05, + "step": 370 + }, + { + "epoch": 0.9730440499671269, + "high_lr": 0.0008052631578947369, + "low_lr": 1.6105263157894736e-05, + "step": 370 + }, + { + "epoch": 0.9730440499671269, + "high_lr": 0.0008052631578947369, + "low_lr": 1.6105263157894736e-05, + "step": 370 + }, + { + "epoch": 0.9730440499671269, + "high_lr": 0.0008052631578947369, + "low_lr": 1.6105263157894736e-05, + "step": 370 + }, + { + "epoch": 0.9730440499671269, + "high_lr": 0.0008052631578947369, + "low_lr": 1.6105263157894736e-05, + "step": 370 + }, + { + "epoch": 0.9730440499671269, + "high_lr": 0.0008052631578947369, + "low_lr": 1.6105263157894736e-05, + "step": 370 + }, + { + "epoch": 0.9756738987508218, + "grad_norm": 0.9354105591773987, + "learning_rate": 0.0008047368421052632, + "loss": 1.6185, + "step": 371 + }, + { + "epoch": 0.9756738987508218, + "high_lr": 0.0008047368421052632, + "low_lr": 1.6094736842105264e-05, + "step": 371 + }, + { + "epoch": 0.9756738987508218, + "high_lr": 0.0008047368421052632, + "low_lr": 1.6094736842105264e-05, + "step": 371 + }, + { + "epoch": 0.9756738987508218, + "high_lr": 0.0008047368421052632, + "low_lr": 1.6094736842105264e-05, + "step": 371 + }, + { + "epoch": 0.9756738987508218, + "high_lr": 0.0008047368421052632, + "low_lr": 1.6094736842105264e-05, + "step": 371 + }, + { + "epoch": 0.9756738987508218, + "high_lr": 0.0008047368421052632, + "low_lr": 1.6094736842105264e-05, + "step": 371 + }, + { + "epoch": 0.9756738987508218, + "high_lr": 0.0008047368421052632, + "low_lr": 1.6094736842105264e-05, + "step": 371 + }, + { + "epoch": 0.9756738987508218, + "high_lr": 0.0008047368421052632, + "low_lr": 1.6094736842105264e-05, + "step": 371 + }, + { + "epoch": 0.9756738987508218, + "high_lr": 0.0008047368421052632, + "low_lr": 1.6094736842105264e-05, + "step": 371 + }, + { + "epoch": 0.9783037475345168, + "grad_norm": 1.0104918479919434, + "learning_rate": 0.0008042105263157895, + "loss": 1.5653, + "step": 372 + }, + { + "epoch": 0.9783037475345168, + "high_lr": 0.0008042105263157895, + "low_lr": 1.6084210526315792e-05, + "step": 372 + }, + { + "epoch": 0.9783037475345168, + "high_lr": 0.0008042105263157895, + "low_lr": 1.6084210526315792e-05, + "step": 372 + }, + { + "epoch": 0.9783037475345168, + "high_lr": 0.0008042105263157895, + "low_lr": 1.6084210526315792e-05, + "step": 372 + }, + { + "epoch": 0.9783037475345168, + "high_lr": 0.0008042105263157895, + "low_lr": 1.6084210526315792e-05, + "step": 372 + }, + { + "epoch": 0.9783037475345168, + "high_lr": 0.0008042105263157895, + "low_lr": 1.6084210526315792e-05, + "step": 372 + }, + { + "epoch": 0.9783037475345168, + "high_lr": 0.0008042105263157895, + "low_lr": 1.6084210526315792e-05, + "step": 372 + }, + { + "epoch": 0.9783037475345168, + "high_lr": 0.0008042105263157895, + "low_lr": 1.6084210526315792e-05, + "step": 372 + }, + { + "epoch": 0.9783037475345168, + "high_lr": 0.0008042105263157895, + "low_lr": 1.6084210526315792e-05, + "step": 372 + }, + { + "epoch": 0.9809335963182118, + "grad_norm": 0.9284670352935791, + "learning_rate": 0.0008036842105263158, + "loss": 1.5226, + "step": 373 + }, + { + "epoch": 0.9809335963182118, + "high_lr": 0.0008036842105263158, + "low_lr": 1.6073684210526317e-05, + "step": 373 + }, + { + "epoch": 0.9809335963182118, + "high_lr": 0.0008036842105263158, + "low_lr": 1.6073684210526317e-05, + "step": 373 + }, + { + "epoch": 0.9809335963182118, + "high_lr": 0.0008036842105263158, + "low_lr": 1.6073684210526317e-05, + "step": 373 + }, + { + "epoch": 0.9809335963182118, + "high_lr": 0.0008036842105263158, + "low_lr": 1.6073684210526317e-05, + "step": 373 + }, + { + "epoch": 0.9809335963182118, + "high_lr": 0.0008036842105263158, + "low_lr": 1.6073684210526317e-05, + "step": 373 + }, + { + "epoch": 0.9809335963182118, + "high_lr": 0.0008036842105263158, + "low_lr": 1.6073684210526317e-05, + "step": 373 + }, + { + "epoch": 0.9809335963182118, + "high_lr": 0.0008036842105263158, + "low_lr": 1.6073684210526317e-05, + "step": 373 + }, + { + "epoch": 0.9809335963182118, + "high_lr": 0.0008036842105263158, + "low_lr": 1.6073684210526317e-05, + "step": 373 + }, + { + "epoch": 0.9835634451019066, + "grad_norm": 0.9576569199562073, + "learning_rate": 0.0008031578947368421, + "loss": 1.6275, + "step": 374 + }, + { + "epoch": 0.9835634451019066, + "high_lr": 0.0008031578947368421, + "low_lr": 1.606315789473684e-05, + "step": 374 + }, + { + "epoch": 0.9835634451019066, + "high_lr": 0.0008031578947368421, + "low_lr": 1.606315789473684e-05, + "step": 374 + }, + { + "epoch": 0.9835634451019066, + "high_lr": 0.0008031578947368421, + "low_lr": 1.606315789473684e-05, + "step": 374 + }, + { + "epoch": 0.9835634451019066, + "high_lr": 0.0008031578947368421, + "low_lr": 1.606315789473684e-05, + "step": 374 + }, + { + "epoch": 0.9835634451019066, + "high_lr": 0.0008031578947368421, + "low_lr": 1.606315789473684e-05, + "step": 374 + }, + { + "epoch": 0.9835634451019066, + "high_lr": 0.0008031578947368421, + "low_lr": 1.606315789473684e-05, + "step": 374 + }, + { + "epoch": 0.9835634451019066, + "high_lr": 0.0008031578947368421, + "low_lr": 1.606315789473684e-05, + "step": 374 + }, + { + "epoch": 0.9835634451019066, + "high_lr": 0.0008031578947368421, + "low_lr": 1.606315789473684e-05, + "step": 374 + }, + { + "epoch": 0.9861932938856016, + "grad_norm": 0.9816845655441284, + "learning_rate": 0.0008026315789473685, + "loss": 1.5117, + "step": 375 + }, + { + "epoch": 0.9861932938856016, + "high_lr": 0.0008026315789473685, + "low_lr": 1.605263157894737e-05, + "step": 375 + }, + { + "epoch": 0.9861932938856016, + "high_lr": 0.0008026315789473685, + "low_lr": 1.605263157894737e-05, + "step": 375 + }, + { + "epoch": 0.9861932938856016, + "high_lr": 0.0008026315789473685, + "low_lr": 1.605263157894737e-05, + "step": 375 + }, + { + "epoch": 0.9861932938856016, + "high_lr": 0.0008026315789473685, + "low_lr": 1.605263157894737e-05, + "step": 375 + }, + { + "epoch": 0.9861932938856016, + "high_lr": 0.0008026315789473685, + "low_lr": 1.605263157894737e-05, + "step": 375 + }, + { + "epoch": 0.9861932938856016, + "high_lr": 0.0008026315789473685, + "low_lr": 1.605263157894737e-05, + "step": 375 + }, + { + "epoch": 0.9861932938856016, + "high_lr": 0.0008026315789473685, + "low_lr": 1.605263157894737e-05, + "step": 375 + }, + { + "epoch": 0.9861932938856016, + "high_lr": 0.0008026315789473685, + "low_lr": 1.605263157894737e-05, + "step": 375 + }, + { + "epoch": 0.9888231426692965, + "grad_norm": 0.9952553510665894, + "learning_rate": 0.0008021052631578948, + "loss": 1.6148, + "step": 376 + }, + { + "epoch": 0.9888231426692965, + "high_lr": 0.0008021052631578948, + "low_lr": 1.6042105263157897e-05, + "step": 376 + }, + { + "epoch": 0.9888231426692965, + "high_lr": 0.0008021052631578948, + "low_lr": 1.6042105263157897e-05, + "step": 376 + }, + { + "epoch": 0.9888231426692965, + "high_lr": 0.0008021052631578948, + "low_lr": 1.6042105263157897e-05, + "step": 376 + }, + { + "epoch": 0.9888231426692965, + "high_lr": 0.0008021052631578948, + "low_lr": 1.6042105263157897e-05, + "step": 376 + }, + { + "epoch": 0.9888231426692965, + "high_lr": 0.0008021052631578948, + "low_lr": 1.6042105263157897e-05, + "step": 376 + }, + { + "epoch": 0.9888231426692965, + "high_lr": 0.0008021052631578948, + "low_lr": 1.6042105263157897e-05, + "step": 376 + }, + { + "epoch": 0.9888231426692965, + "high_lr": 0.0008021052631578948, + "low_lr": 1.6042105263157897e-05, + "step": 376 + }, + { + "epoch": 0.9888231426692965, + "high_lr": 0.0008021052631578948, + "low_lr": 1.6042105263157897e-05, + "step": 376 + }, + { + "epoch": 0.9914529914529915, + "grad_norm": 1.0607998371124268, + "learning_rate": 0.0008015789473684211, + "loss": 1.6075, + "step": 377 + }, + { + "epoch": 0.9914529914529915, + "high_lr": 0.0008015789473684211, + "low_lr": 1.6031578947368422e-05, + "step": 377 + }, + { + "epoch": 0.9914529914529915, + "high_lr": 0.0008015789473684211, + "low_lr": 1.6031578947368422e-05, + "step": 377 + }, + { + "epoch": 0.9914529914529915, + "high_lr": 0.0008015789473684211, + "low_lr": 1.6031578947368422e-05, + "step": 377 + }, + { + "epoch": 0.9914529914529915, + "high_lr": 0.0008015789473684211, + "low_lr": 1.6031578947368422e-05, + "step": 377 + }, + { + "epoch": 0.9914529914529915, + "high_lr": 0.0008015789473684211, + "low_lr": 1.6031578947368422e-05, + "step": 377 + }, + { + "epoch": 0.9914529914529915, + "high_lr": 0.0008015789473684211, + "low_lr": 1.6031578947368422e-05, + "step": 377 + }, + { + "epoch": 0.9914529914529915, + "high_lr": 0.0008015789473684211, + "low_lr": 1.6031578947368422e-05, + "step": 377 + }, + { + "epoch": 0.9914529914529915, + "high_lr": 0.0008015789473684211, + "low_lr": 1.6031578947368422e-05, + "step": 377 + }, + { + "epoch": 0.9940828402366864, + "grad_norm": 0.975928783416748, + "learning_rate": 0.0008010526315789474, + "loss": 1.5553, + "step": 378 + }, + { + "epoch": 0.9940828402366864, + "high_lr": 0.0008010526315789474, + "low_lr": 1.6021052631578947e-05, + "step": 378 + }, + { + "epoch": 0.9940828402366864, + "high_lr": 0.0008010526315789474, + "low_lr": 1.6021052631578947e-05, + "step": 378 + }, + { + "epoch": 0.9940828402366864, + "high_lr": 0.0008010526315789474, + "low_lr": 1.6021052631578947e-05, + "step": 378 + }, + { + "epoch": 0.9940828402366864, + "high_lr": 0.0008010526315789474, + "low_lr": 1.6021052631578947e-05, + "step": 378 + }, + { + "epoch": 0.9940828402366864, + "high_lr": 0.0008010526315789474, + "low_lr": 1.6021052631578947e-05, + "step": 378 + }, + { + "epoch": 0.9940828402366864, + "high_lr": 0.0008010526315789474, + "low_lr": 1.6021052631578947e-05, + "step": 378 + }, + { + "epoch": 0.9940828402366864, + "high_lr": 0.0008010526315789474, + "low_lr": 1.6021052631578947e-05, + "step": 378 + }, + { + "epoch": 0.9940828402366864, + "high_lr": 0.0008010526315789474, + "low_lr": 1.6021052631578947e-05, + "step": 378 + }, + { + "epoch": 0.9967126890203813, + "grad_norm": 1.1215518712997437, + "learning_rate": 0.0008005263157894736, + "loss": 1.602, + "step": 379 + }, + { + "epoch": 0.9967126890203813, + "high_lr": 0.0008005263157894736, + "low_lr": 1.6010526315789475e-05, + "step": 379 + }, + { + "epoch": 0.9967126890203813, + "high_lr": 0.0008005263157894736, + "low_lr": 1.6010526315789475e-05, + "step": 379 + }, + { + "epoch": 0.9967126890203813, + "high_lr": 0.0008005263157894736, + "low_lr": 1.6010526315789475e-05, + "step": 379 + }, + { + "epoch": 0.9967126890203813, + "high_lr": 0.0008005263157894736, + "low_lr": 1.6010526315789475e-05, + "step": 379 + }, + { + "epoch": 0.9967126890203813, + "high_lr": 0.0008005263157894736, + "low_lr": 1.6010526315789475e-05, + "step": 379 + }, + { + "epoch": 0.9967126890203813, + "high_lr": 0.0008005263157894736, + "low_lr": 1.6010526315789475e-05, + "step": 379 + }, + { + "epoch": 0.9967126890203813, + "high_lr": 0.0008005263157894736, + "low_lr": 1.6010526315789475e-05, + "step": 379 + }, + { + "epoch": 0.9967126890203813, + "high_lr": 0.0008005263157894736, + "low_lr": 1.6010526315789475e-05, + "step": 379 + }, + { + "epoch": 0.9993425378040762, + "grad_norm": 0.9771432280540466, + "learning_rate": 0.0008, + "loss": 1.5582, + "step": 380 + }, + { + "epoch": 0.9993425378040762, + "high_lr": 0.0008, + "low_lr": 1.6000000000000003e-05, + "step": 380 + }, + { + "epoch": 0.9993425378040762, + "high_lr": 0.0008, + "low_lr": 1.6000000000000003e-05, + "step": 380 + }, + { + "epoch": 0.9993425378040762, + "high_lr": 0.0008, + "low_lr": 1.6000000000000003e-05, + "step": 380 + }, + { + "epoch": 0.9993425378040762, + "high_lr": 0.0008, + "low_lr": 1.6000000000000003e-05, + "step": 380 + }, + { + "epoch": 0.9993425378040762, + "high_lr": 0.0008, + "low_lr": 1.6000000000000003e-05, + "step": 380 + }, + { + "epoch": 0.9993425378040762, + "high_lr": 0.0008, + "low_lr": 1.6000000000000003e-05, + "step": 380 + }, + { + "epoch": 0.9993425378040762, + "high_lr": 0.0008, + "low_lr": 1.6000000000000003e-05, + "step": 380 + }, + { + "epoch": 0.9993425378040762, + "high_lr": 0.0008, + "low_lr": 1.6000000000000003e-05, + "step": 380 + }, + { + "epoch": 1.0019723865877712, + "grad_norm": 0.9735488891601562, + "learning_rate": 0.0007994736842105263, + "loss": 1.5239, + "step": 381 + }, + { + "epoch": 1.0019723865877712, + "high_lr": 0.0007994736842105263, + "low_lr": 1.5989473684210527e-05, + "step": 381 + }, + { + "epoch": 1.0019723865877712, + "high_lr": 0.0007994736842105263, + "low_lr": 1.5989473684210527e-05, + "step": 381 + }, + { + "epoch": 1.0019723865877712, + "high_lr": 0.0007994736842105263, + "low_lr": 1.5989473684210527e-05, + "step": 381 + }, + { + "epoch": 1.0019723865877712, + "high_lr": 0.0007994736842105263, + "low_lr": 1.5989473684210527e-05, + "step": 381 + }, + { + "epoch": 1.0019723865877712, + "high_lr": 0.0007994736842105263, + "low_lr": 1.5989473684210527e-05, + "step": 381 + }, + { + "epoch": 1.0019723865877712, + "high_lr": 0.0007994736842105263, + "low_lr": 1.5989473684210527e-05, + "step": 381 + }, + { + "epoch": 1.0019723865877712, + "high_lr": 0.0007994736842105263, + "low_lr": 1.5989473684210527e-05, + "step": 381 + }, + { + "epoch": 1.0019723865877712, + "high_lr": 0.0007994736842105263, + "low_lr": 1.5989473684210527e-05, + "step": 381 + }, + { + "epoch": 1.004602235371466, + "grad_norm": 0.9668174982070923, + "learning_rate": 0.0007989473684210526, + "loss": 1.4952, + "step": 382 + }, + { + "epoch": 1.004602235371466, + "high_lr": 0.0007989473684210526, + "low_lr": 1.5978947368421055e-05, + "step": 382 + }, + { + "epoch": 1.004602235371466, + "high_lr": 0.0007989473684210526, + "low_lr": 1.5978947368421055e-05, + "step": 382 + }, + { + "epoch": 1.004602235371466, + "high_lr": 0.0007989473684210526, + "low_lr": 1.5978947368421055e-05, + "step": 382 + }, + { + "epoch": 1.004602235371466, + "high_lr": 0.0007989473684210526, + "low_lr": 1.5978947368421055e-05, + "step": 382 + }, + { + "epoch": 1.004602235371466, + "high_lr": 0.0007989473684210526, + "low_lr": 1.5978947368421055e-05, + "step": 382 + }, + { + "epoch": 1.004602235371466, + "high_lr": 0.0007989473684210526, + "low_lr": 1.5978947368421055e-05, + "step": 382 + }, + { + "epoch": 1.004602235371466, + "high_lr": 0.0007989473684210526, + "low_lr": 1.5978947368421055e-05, + "step": 382 + }, + { + "epoch": 1.004602235371466, + "high_lr": 0.0007989473684210526, + "low_lr": 1.5978947368421055e-05, + "step": 382 + }, + { + "epoch": 1.0072320841551612, + "grad_norm": 0.9419265389442444, + "learning_rate": 0.0007984210526315789, + "loss": 1.5457, + "step": 383 + }, + { + "epoch": 1.0072320841551612, + "high_lr": 0.0007984210526315789, + "low_lr": 1.596842105263158e-05, + "step": 383 + }, + { + "epoch": 1.0072320841551612, + "high_lr": 0.0007984210526315789, + "low_lr": 1.596842105263158e-05, + "step": 383 + }, + { + "epoch": 1.0072320841551612, + "high_lr": 0.0007984210526315789, + "low_lr": 1.596842105263158e-05, + "step": 383 + }, + { + "epoch": 1.0072320841551612, + "high_lr": 0.0007984210526315789, + "low_lr": 1.596842105263158e-05, + "step": 383 + }, + { + "epoch": 1.0072320841551612, + "high_lr": 0.0007984210526315789, + "low_lr": 1.596842105263158e-05, + "step": 383 + }, + { + "epoch": 1.0072320841551612, + "high_lr": 0.0007984210526315789, + "low_lr": 1.596842105263158e-05, + "step": 383 + }, + { + "epoch": 1.0072320841551612, + "high_lr": 0.0007984210526315789, + "low_lr": 1.596842105263158e-05, + "step": 383 + }, + { + "epoch": 1.0072320841551612, + "high_lr": 0.0007984210526315789, + "low_lr": 1.596842105263158e-05, + "step": 383 + }, + { + "epoch": 1.009861932938856, + "grad_norm": 1.009337067604065, + "learning_rate": 0.0007978947368421052, + "loss": 1.5266, + "step": 384 + }, + { + "epoch": 1.009861932938856, + "high_lr": 0.0007978947368421052, + "low_lr": 1.5957894736842105e-05, + "step": 384 + }, + { + "epoch": 1.009861932938856, + "high_lr": 0.0007978947368421052, + "low_lr": 1.5957894736842105e-05, + "step": 384 + }, + { + "epoch": 1.009861932938856, + "high_lr": 0.0007978947368421052, + "low_lr": 1.5957894736842105e-05, + "step": 384 + }, + { + "epoch": 1.009861932938856, + "high_lr": 0.0007978947368421052, + "low_lr": 1.5957894736842105e-05, + "step": 384 + }, + { + "epoch": 1.009861932938856, + "high_lr": 0.0007978947368421052, + "low_lr": 1.5957894736842105e-05, + "step": 384 + }, + { + "epoch": 1.009861932938856, + "high_lr": 0.0007978947368421052, + "low_lr": 1.5957894736842105e-05, + "step": 384 + }, + { + "epoch": 1.009861932938856, + "high_lr": 0.0007978947368421052, + "low_lr": 1.5957894736842105e-05, + "step": 384 + }, + { + "epoch": 1.009861932938856, + "high_lr": 0.0007978947368421052, + "low_lr": 1.5957894736842105e-05, + "step": 384 + }, + { + "epoch": 1.012491781722551, + "grad_norm": 0.928406834602356, + "learning_rate": 0.0007973684210526317, + "loss": 1.4883, + "step": 385 + }, + { + "epoch": 1.012491781722551, + "high_lr": 0.0007973684210526317, + "low_lr": 1.5947368421052633e-05, + "step": 385 + }, + { + "epoch": 1.012491781722551, + "high_lr": 0.0007973684210526317, + "low_lr": 1.5947368421052633e-05, + "step": 385 + }, + { + "epoch": 1.012491781722551, + "high_lr": 0.0007973684210526317, + "low_lr": 1.5947368421052633e-05, + "step": 385 + }, + { + "epoch": 1.012491781722551, + "high_lr": 0.0007973684210526317, + "low_lr": 1.5947368421052633e-05, + "step": 385 + }, + { + "epoch": 1.012491781722551, + "high_lr": 0.0007973684210526317, + "low_lr": 1.5947368421052633e-05, + "step": 385 + }, + { + "epoch": 1.012491781722551, + "high_lr": 0.0007973684210526317, + "low_lr": 1.5947368421052633e-05, + "step": 385 + }, + { + "epoch": 1.012491781722551, + "high_lr": 0.0007973684210526317, + "low_lr": 1.5947368421052633e-05, + "step": 385 + }, + { + "epoch": 1.012491781722551, + "high_lr": 0.0007973684210526317, + "low_lr": 1.5947368421052633e-05, + "step": 385 + }, + { + "epoch": 1.0151216305062458, + "grad_norm": 1.0293240547180176, + "learning_rate": 0.000796842105263158, + "loss": 1.4763, + "step": 386 + }, + { + "epoch": 1.0151216305062458, + "high_lr": 0.000796842105263158, + "low_lr": 1.593684210526316e-05, + "step": 386 + }, + { + "epoch": 1.0151216305062458, + "high_lr": 0.000796842105263158, + "low_lr": 1.593684210526316e-05, + "step": 386 + }, + { + "epoch": 1.0151216305062458, + "high_lr": 0.000796842105263158, + "low_lr": 1.593684210526316e-05, + "step": 386 + }, + { + "epoch": 1.0151216305062458, + "high_lr": 0.000796842105263158, + "low_lr": 1.593684210526316e-05, + "step": 386 + }, + { + "epoch": 1.0151216305062458, + "high_lr": 0.000796842105263158, + "low_lr": 1.593684210526316e-05, + "step": 386 + }, + { + "epoch": 1.0151216305062458, + "high_lr": 0.000796842105263158, + "low_lr": 1.593684210526316e-05, + "step": 386 + }, + { + "epoch": 1.0151216305062458, + "high_lr": 0.000796842105263158, + "low_lr": 1.593684210526316e-05, + "step": 386 + }, + { + "epoch": 1.0151216305062458, + "high_lr": 0.000796842105263158, + "low_lr": 1.593684210526316e-05, + "step": 386 + }, + { + "epoch": 1.017751479289941, + "grad_norm": 0.953455924987793, + "learning_rate": 0.0007963157894736843, + "loss": 1.4951, + "step": 387 + }, + { + "epoch": 1.017751479289941, + "high_lr": 0.0007963157894736843, + "low_lr": 1.5926315789473685e-05, + "step": 387 + }, + { + "epoch": 1.017751479289941, + "high_lr": 0.0007963157894736843, + "low_lr": 1.5926315789473685e-05, + "step": 387 + }, + { + "epoch": 1.017751479289941, + "high_lr": 0.0007963157894736843, + "low_lr": 1.5926315789473685e-05, + "step": 387 + }, + { + "epoch": 1.017751479289941, + "high_lr": 0.0007963157894736843, + "low_lr": 1.5926315789473685e-05, + "step": 387 + }, + { + "epoch": 1.017751479289941, + "high_lr": 0.0007963157894736843, + "low_lr": 1.5926315789473685e-05, + "step": 387 + }, + { + "epoch": 1.017751479289941, + "high_lr": 0.0007963157894736843, + "low_lr": 1.5926315789473685e-05, + "step": 387 + }, + { + "epoch": 1.017751479289941, + "high_lr": 0.0007963157894736843, + "low_lr": 1.5926315789473685e-05, + "step": 387 + }, + { + "epoch": 1.017751479289941, + "high_lr": 0.0007963157894736843, + "low_lr": 1.5926315789473685e-05, + "step": 387 + }, + { + "epoch": 1.0203813280736358, + "grad_norm": 1.0384786128997803, + "learning_rate": 0.0007957894736842105, + "loss": 1.5585, + "step": 388 + }, + { + "epoch": 1.0203813280736358, + "high_lr": 0.0007957894736842105, + "low_lr": 1.591578947368421e-05, + "step": 388 + }, + { + "epoch": 1.0203813280736358, + "high_lr": 0.0007957894736842105, + "low_lr": 1.591578947368421e-05, + "step": 388 + }, + { + "epoch": 1.0203813280736358, + "high_lr": 0.0007957894736842105, + "low_lr": 1.591578947368421e-05, + "step": 388 + }, + { + "epoch": 1.0203813280736358, + "high_lr": 0.0007957894736842105, + "low_lr": 1.591578947368421e-05, + "step": 388 + }, + { + "epoch": 1.0203813280736358, + "high_lr": 0.0007957894736842105, + "low_lr": 1.591578947368421e-05, + "step": 388 + }, + { + "epoch": 1.0203813280736358, + "high_lr": 0.0007957894736842105, + "low_lr": 1.591578947368421e-05, + "step": 388 + }, + { + "epoch": 1.0203813280736358, + "high_lr": 0.0007957894736842105, + "low_lr": 1.591578947368421e-05, + "step": 388 + }, + { + "epoch": 1.0203813280736358, + "high_lr": 0.0007957894736842105, + "low_lr": 1.591578947368421e-05, + "step": 388 + }, + { + "epoch": 1.0230111768573307, + "grad_norm": 1.0553350448608398, + "learning_rate": 0.0007952631578947369, + "loss": 1.4933, + "step": 389 + }, + { + "epoch": 1.0230111768573307, + "high_lr": 0.0007952631578947369, + "low_lr": 1.5905263157894738e-05, + "step": 389 + }, + { + "epoch": 1.0230111768573307, + "high_lr": 0.0007952631578947369, + "low_lr": 1.5905263157894738e-05, + "step": 389 + }, + { + "epoch": 1.0230111768573307, + "high_lr": 0.0007952631578947369, + "low_lr": 1.5905263157894738e-05, + "step": 389 + }, + { + "epoch": 1.0230111768573307, + "high_lr": 0.0007952631578947369, + "low_lr": 1.5905263157894738e-05, + "step": 389 + }, + { + "epoch": 1.0230111768573307, + "high_lr": 0.0007952631578947369, + "low_lr": 1.5905263157894738e-05, + "step": 389 + }, + { + "epoch": 1.0230111768573307, + "high_lr": 0.0007952631578947369, + "low_lr": 1.5905263157894738e-05, + "step": 389 + }, + { + "epoch": 1.0230111768573307, + "high_lr": 0.0007952631578947369, + "low_lr": 1.5905263157894738e-05, + "step": 389 + }, + { + "epoch": 1.0230111768573307, + "high_lr": 0.0007952631578947369, + "low_lr": 1.5905263157894738e-05, + "step": 389 + }, + { + "epoch": 1.0256410256410255, + "grad_norm": 0.9986532330513, + "learning_rate": 0.0007947368421052632, + "loss": 1.5027, + "step": 390 + }, + { + "epoch": 1.0256410256410255, + "high_lr": 0.0007947368421052632, + "low_lr": 1.5894736842105266e-05, + "step": 390 + }, + { + "epoch": 1.0256410256410255, + "high_lr": 0.0007947368421052632, + "low_lr": 1.5894736842105266e-05, + "step": 390 + }, + { + "epoch": 1.0256410256410255, + "high_lr": 0.0007947368421052632, + "low_lr": 1.5894736842105266e-05, + "step": 390 + }, + { + "epoch": 1.0256410256410255, + "high_lr": 0.0007947368421052632, + "low_lr": 1.5894736842105266e-05, + "step": 390 + }, + { + "epoch": 1.0256410256410255, + "high_lr": 0.0007947368421052632, + "low_lr": 1.5894736842105266e-05, + "step": 390 + }, + { + "epoch": 1.0256410256410255, + "high_lr": 0.0007947368421052632, + "low_lr": 1.5894736842105266e-05, + "step": 390 + }, + { + "epoch": 1.0256410256410255, + "high_lr": 0.0007947368421052632, + "low_lr": 1.5894736842105266e-05, + "step": 390 + }, + { + "epoch": 1.0256410256410255, + "high_lr": 0.0007947368421052632, + "low_lr": 1.5894736842105266e-05, + "step": 390 + }, + { + "epoch": 1.0282708744247206, + "grad_norm": 0.9266156554222107, + "learning_rate": 0.0007942105263157895, + "loss": 1.4784, + "step": 391 + }, + { + "epoch": 1.0282708744247206, + "high_lr": 0.0007942105263157895, + "low_lr": 1.588421052631579e-05, + "step": 391 + }, + { + "epoch": 1.0282708744247206, + "high_lr": 0.0007942105263157895, + "low_lr": 1.588421052631579e-05, + "step": 391 + }, + { + "epoch": 1.0282708744247206, + "high_lr": 0.0007942105263157895, + "low_lr": 1.588421052631579e-05, + "step": 391 + }, + { + "epoch": 1.0282708744247206, + "high_lr": 0.0007942105263157895, + "low_lr": 1.588421052631579e-05, + "step": 391 + }, + { + "epoch": 1.0282708744247206, + "high_lr": 0.0007942105263157895, + "low_lr": 1.588421052631579e-05, + "step": 391 + }, + { + "epoch": 1.0282708744247206, + "high_lr": 0.0007942105263157895, + "low_lr": 1.588421052631579e-05, + "step": 391 + }, + { + "epoch": 1.0282708744247206, + "high_lr": 0.0007942105263157895, + "low_lr": 1.588421052631579e-05, + "step": 391 + }, + { + "epoch": 1.0282708744247206, + "high_lr": 0.0007942105263157895, + "low_lr": 1.588421052631579e-05, + "step": 391 + }, + { + "epoch": 1.0309007232084155, + "grad_norm": 0.9740056991577148, + "learning_rate": 0.0007936842105263158, + "loss": 1.5323, + "step": 392 + }, + { + "epoch": 1.0309007232084155, + "high_lr": 0.0007936842105263158, + "low_lr": 1.5873684210526315e-05, + "step": 392 + }, + { + "epoch": 1.0309007232084155, + "high_lr": 0.0007936842105263158, + "low_lr": 1.5873684210526315e-05, + "step": 392 + }, + { + "epoch": 1.0309007232084155, + "high_lr": 0.0007936842105263158, + "low_lr": 1.5873684210526315e-05, + "step": 392 + }, + { + "epoch": 1.0309007232084155, + "high_lr": 0.0007936842105263158, + "low_lr": 1.5873684210526315e-05, + "step": 392 + }, + { + "epoch": 1.0309007232084155, + "high_lr": 0.0007936842105263158, + "low_lr": 1.5873684210526315e-05, + "step": 392 + }, + { + "epoch": 1.0309007232084155, + "high_lr": 0.0007936842105263158, + "low_lr": 1.5873684210526315e-05, + "step": 392 + }, + { + "epoch": 1.0309007232084155, + "high_lr": 0.0007936842105263158, + "low_lr": 1.5873684210526315e-05, + "step": 392 + }, + { + "epoch": 1.0309007232084155, + "high_lr": 0.0007936842105263158, + "low_lr": 1.5873684210526315e-05, + "step": 392 + }, + { + "epoch": 1.0335305719921104, + "grad_norm": 1.048331379890442, + "learning_rate": 0.0007931578947368421, + "loss": 1.5524, + "step": 393 + }, + { + "epoch": 1.0335305719921104, + "high_lr": 0.0007931578947368421, + "low_lr": 1.5863157894736843e-05, + "step": 393 + }, + { + "epoch": 1.0335305719921104, + "high_lr": 0.0007931578947368421, + "low_lr": 1.5863157894736843e-05, + "step": 393 + }, + { + "epoch": 1.0335305719921104, + "high_lr": 0.0007931578947368421, + "low_lr": 1.5863157894736843e-05, + "step": 393 + }, + { + "epoch": 1.0335305719921104, + "high_lr": 0.0007931578947368421, + "low_lr": 1.5863157894736843e-05, + "step": 393 + }, + { + "epoch": 1.0335305719921104, + "high_lr": 0.0007931578947368421, + "low_lr": 1.5863157894736843e-05, + "step": 393 + }, + { + "epoch": 1.0335305719921104, + "high_lr": 0.0007931578947368421, + "low_lr": 1.5863157894736843e-05, + "step": 393 + }, + { + "epoch": 1.0335305719921104, + "high_lr": 0.0007931578947368421, + "low_lr": 1.5863157894736843e-05, + "step": 393 + }, + { + "epoch": 1.0335305719921104, + "high_lr": 0.0007931578947368421, + "low_lr": 1.5863157894736843e-05, + "step": 393 + }, + { + "epoch": 1.0361604207758055, + "grad_norm": 0.9899749159812927, + "learning_rate": 0.0007926315789473685, + "loss": 1.5418, + "step": 394 + }, + { + "epoch": 1.0361604207758055, + "high_lr": 0.0007926315789473685, + "low_lr": 1.585263157894737e-05, + "step": 394 + }, + { + "epoch": 1.0361604207758055, + "high_lr": 0.0007926315789473685, + "low_lr": 1.585263157894737e-05, + "step": 394 + }, + { + "epoch": 1.0361604207758055, + "high_lr": 0.0007926315789473685, + "low_lr": 1.585263157894737e-05, + "step": 394 + }, + { + "epoch": 1.0361604207758055, + "high_lr": 0.0007926315789473685, + "low_lr": 1.585263157894737e-05, + "step": 394 + }, + { + "epoch": 1.0361604207758055, + "high_lr": 0.0007926315789473685, + "low_lr": 1.585263157894737e-05, + "step": 394 + }, + { + "epoch": 1.0361604207758055, + "high_lr": 0.0007926315789473685, + "low_lr": 1.585263157894737e-05, + "step": 394 + }, + { + "epoch": 1.0361604207758055, + "high_lr": 0.0007926315789473685, + "low_lr": 1.585263157894737e-05, + "step": 394 + }, + { + "epoch": 1.0361604207758055, + "high_lr": 0.0007926315789473685, + "low_lr": 1.585263157894737e-05, + "step": 394 + }, + { + "epoch": 1.0387902695595004, + "grad_norm": 0.9434759616851807, + "learning_rate": 0.0007921052631578948, + "loss": 1.5092, + "step": 395 + }, + { + "epoch": 1.0387902695595004, + "high_lr": 0.0007921052631578948, + "low_lr": 1.5842105263157896e-05, + "step": 395 + }, + { + "epoch": 1.0387902695595004, + "high_lr": 0.0007921052631578948, + "low_lr": 1.5842105263157896e-05, + "step": 395 + }, + { + "epoch": 1.0387902695595004, + "high_lr": 0.0007921052631578948, + "low_lr": 1.5842105263157896e-05, + "step": 395 + }, + { + "epoch": 1.0387902695595004, + "high_lr": 0.0007921052631578948, + "low_lr": 1.5842105263157896e-05, + "step": 395 + }, + { + "epoch": 1.0387902695595004, + "high_lr": 0.0007921052631578948, + "low_lr": 1.5842105263157896e-05, + "step": 395 + }, + { + "epoch": 1.0387902695595004, + "high_lr": 0.0007921052631578948, + "low_lr": 1.5842105263157896e-05, + "step": 395 + }, + { + "epoch": 1.0387902695595004, + "high_lr": 0.0007921052631578948, + "low_lr": 1.5842105263157896e-05, + "step": 395 + }, + { + "epoch": 1.0387902695595004, + "high_lr": 0.0007921052631578948, + "low_lr": 1.5842105263157896e-05, + "step": 395 + }, + { + "epoch": 1.0414201183431953, + "grad_norm": 0.9564380049705505, + "learning_rate": 0.000791578947368421, + "loss": 1.5142, + "step": 396 + }, + { + "epoch": 1.0414201183431953, + "high_lr": 0.000791578947368421, + "low_lr": 1.5831578947368424e-05, + "step": 396 + }, + { + "epoch": 1.0414201183431953, + "high_lr": 0.000791578947368421, + "low_lr": 1.5831578947368424e-05, + "step": 396 + }, + { + "epoch": 1.0414201183431953, + "high_lr": 0.000791578947368421, + "low_lr": 1.5831578947368424e-05, + "step": 396 + }, + { + "epoch": 1.0414201183431953, + "high_lr": 0.000791578947368421, + "low_lr": 1.5831578947368424e-05, + "step": 396 + }, + { + "epoch": 1.0414201183431953, + "high_lr": 0.000791578947368421, + "low_lr": 1.5831578947368424e-05, + "step": 396 + }, + { + "epoch": 1.0414201183431953, + "high_lr": 0.000791578947368421, + "low_lr": 1.5831578947368424e-05, + "step": 396 + }, + { + "epoch": 1.0414201183431953, + "high_lr": 0.000791578947368421, + "low_lr": 1.5831578947368424e-05, + "step": 396 + }, + { + "epoch": 1.0414201183431953, + "high_lr": 0.000791578947368421, + "low_lr": 1.5831578947368424e-05, + "step": 396 + }, + { + "epoch": 1.0440499671268901, + "grad_norm": 0.9293102025985718, + "learning_rate": 0.0007910526315789473, + "loss": 1.5236, + "step": 397 + }, + { + "epoch": 1.0440499671268901, + "high_lr": 0.0007910526315789473, + "low_lr": 1.582105263157895e-05, + "step": 397 + }, + { + "epoch": 1.0440499671268901, + "high_lr": 0.0007910526315789473, + "low_lr": 1.582105263157895e-05, + "step": 397 + }, + { + "epoch": 1.0440499671268901, + "high_lr": 0.0007910526315789473, + "low_lr": 1.582105263157895e-05, + "step": 397 + }, + { + "epoch": 1.0440499671268901, + "high_lr": 0.0007910526315789473, + "low_lr": 1.582105263157895e-05, + "step": 397 + }, + { + "epoch": 1.0440499671268901, + "high_lr": 0.0007910526315789473, + "low_lr": 1.582105263157895e-05, + "step": 397 + }, + { + "epoch": 1.0440499671268901, + "high_lr": 0.0007910526315789473, + "low_lr": 1.582105263157895e-05, + "step": 397 + }, + { + "epoch": 1.0440499671268901, + "high_lr": 0.0007910526315789473, + "low_lr": 1.582105263157895e-05, + "step": 397 + }, + { + "epoch": 1.0440499671268901, + "high_lr": 0.0007910526315789473, + "low_lr": 1.582105263157895e-05, + "step": 397 + }, + { + "epoch": 1.0466798159105852, + "grad_norm": 0.9483926296234131, + "learning_rate": 0.0007905263157894736, + "loss": 1.5735, + "step": 398 + }, + { + "epoch": 1.0466798159105852, + "high_lr": 0.0007905263157894736, + "low_lr": 1.5810526315789473e-05, + "step": 398 + }, + { + "epoch": 1.0466798159105852, + "high_lr": 0.0007905263157894736, + "low_lr": 1.5810526315789473e-05, + "step": 398 + }, + { + "epoch": 1.0466798159105852, + "high_lr": 0.0007905263157894736, + "low_lr": 1.5810526315789473e-05, + "step": 398 + }, + { + "epoch": 1.0466798159105852, + "high_lr": 0.0007905263157894736, + "low_lr": 1.5810526315789473e-05, + "step": 398 + }, + { + "epoch": 1.0466798159105852, + "high_lr": 0.0007905263157894736, + "low_lr": 1.5810526315789473e-05, + "step": 398 + }, + { + "epoch": 1.0466798159105852, + "high_lr": 0.0007905263157894736, + "low_lr": 1.5810526315789473e-05, + "step": 398 + }, + { + "epoch": 1.0466798159105852, + "high_lr": 0.0007905263157894736, + "low_lr": 1.5810526315789473e-05, + "step": 398 + }, + { + "epoch": 1.0466798159105852, + "high_lr": 0.0007905263157894736, + "low_lr": 1.5810526315789473e-05, + "step": 398 + }, + { + "epoch": 1.04930966469428, + "grad_norm": 1.06614089012146, + "learning_rate": 0.00079, + "loss": 1.5154, + "step": 399 + }, + { + "epoch": 1.04930966469428, + "high_lr": 0.00079, + "low_lr": 1.58e-05, + "step": 399 + }, + { + "epoch": 1.04930966469428, + "high_lr": 0.00079, + "low_lr": 1.58e-05, + "step": 399 + }, + { + "epoch": 1.04930966469428, + "high_lr": 0.00079, + "low_lr": 1.58e-05, + "step": 399 + }, + { + "epoch": 1.04930966469428, + "high_lr": 0.00079, + "low_lr": 1.58e-05, + "step": 399 + }, + { + "epoch": 1.04930966469428, + "high_lr": 0.00079, + "low_lr": 1.58e-05, + "step": 399 + }, + { + "epoch": 1.04930966469428, + "high_lr": 0.00079, + "low_lr": 1.58e-05, + "step": 399 + }, + { + "epoch": 1.04930966469428, + "high_lr": 0.00079, + "low_lr": 1.58e-05, + "step": 399 + }, + { + "epoch": 1.04930966469428, + "high_lr": 0.00079, + "low_lr": 1.58e-05, + "step": 399 + }, + { + "epoch": 1.051939513477975, + "grad_norm": 1.038985013961792, + "learning_rate": 0.0007894736842105263, + "loss": 1.5807, + "step": 400 + }, + { + "epoch": 1.051939513477975, + "high_lr": 0.0007894736842105263, + "low_lr": 1.578947368421053e-05, + "step": 400 + }, + { + "epoch": 1.051939513477975, + "high_lr": 0.0007894736842105263, + "low_lr": 1.578947368421053e-05, + "step": 400 + }, + { + "epoch": 1.051939513477975, + "high_lr": 0.0007894736842105263, + "low_lr": 1.578947368421053e-05, + "step": 400 + }, + { + "epoch": 1.051939513477975, + "high_lr": 0.0007894736842105263, + "low_lr": 1.578947368421053e-05, + "step": 400 + }, + { + "epoch": 1.051939513477975, + "high_lr": 0.0007894736842105263, + "low_lr": 1.578947368421053e-05, + "step": 400 + }, + { + "epoch": 1.051939513477975, + "high_lr": 0.0007894736842105263, + "low_lr": 1.578947368421053e-05, + "step": 400 + }, + { + "epoch": 1.051939513477975, + "high_lr": 0.0007894736842105263, + "low_lr": 1.578947368421053e-05, + "step": 400 + }, + { + "epoch": 1.051939513477975, + "high_lr": 0.0007894736842105263, + "low_lr": 1.578947368421053e-05, + "step": 400 + }, + { + "epoch": 1.0545693622616699, + "grad_norm": 0.9950482249259949, + "learning_rate": 0.0007889473684210526, + "loss": 1.4867, + "step": 401 + }, + { + "epoch": 1.0545693622616699, + "high_lr": 0.0007889473684210526, + "low_lr": 1.5778947368421054e-05, + "step": 401 + }, + { + "epoch": 1.0545693622616699, + "high_lr": 0.0007889473684210526, + "low_lr": 1.5778947368421054e-05, + "step": 401 + }, + { + "epoch": 1.0545693622616699, + "high_lr": 0.0007889473684210526, + "low_lr": 1.5778947368421054e-05, + "step": 401 + }, + { + "epoch": 1.0545693622616699, + "high_lr": 0.0007889473684210526, + "low_lr": 1.5778947368421054e-05, + "step": 401 + }, + { + "epoch": 1.0545693622616699, + "high_lr": 0.0007889473684210526, + "low_lr": 1.5778947368421054e-05, + "step": 401 + }, + { + "epoch": 1.0545693622616699, + "high_lr": 0.0007889473684210526, + "low_lr": 1.5778947368421054e-05, + "step": 401 + }, + { + "epoch": 1.0545693622616699, + "high_lr": 0.0007889473684210526, + "low_lr": 1.5778947368421054e-05, + "step": 401 + }, + { + "epoch": 1.0545693622616699, + "high_lr": 0.0007889473684210526, + "low_lr": 1.5778947368421054e-05, + "step": 401 + }, + { + "epoch": 1.057199211045365, + "grad_norm": 1.071161150932312, + "learning_rate": 0.000788421052631579, + "loss": 1.5655, + "step": 402 + }, + { + "epoch": 1.057199211045365, + "high_lr": 0.000788421052631579, + "low_lr": 1.576842105263158e-05, + "step": 402 + }, + { + "epoch": 1.057199211045365, + "high_lr": 0.000788421052631579, + "low_lr": 1.576842105263158e-05, + "step": 402 + }, + { + "epoch": 1.057199211045365, + "high_lr": 0.000788421052631579, + "low_lr": 1.576842105263158e-05, + "step": 402 + }, + { + "epoch": 1.057199211045365, + "high_lr": 0.000788421052631579, + "low_lr": 1.576842105263158e-05, + "step": 402 + }, + { + "epoch": 1.057199211045365, + "high_lr": 0.000788421052631579, + "low_lr": 1.576842105263158e-05, + "step": 402 + }, + { + "epoch": 1.057199211045365, + "high_lr": 0.000788421052631579, + "low_lr": 1.576842105263158e-05, + "step": 402 + }, + { + "epoch": 1.057199211045365, + "high_lr": 0.000788421052631579, + "low_lr": 1.576842105263158e-05, + "step": 402 + }, + { + "epoch": 1.057199211045365, + "high_lr": 0.000788421052631579, + "low_lr": 1.576842105263158e-05, + "step": 402 + }, + { + "epoch": 1.0598290598290598, + "grad_norm": 1.0037591457366943, + "learning_rate": 0.0007878947368421054, + "loss": 1.4863, + "step": 403 + }, + { + "epoch": 1.0598290598290598, + "high_lr": 0.0007878947368421054, + "low_lr": 1.5757894736842107e-05, + "step": 403 + }, + { + "epoch": 1.0598290598290598, + "high_lr": 0.0007878947368421054, + "low_lr": 1.5757894736842107e-05, + "step": 403 + }, + { + "epoch": 1.0598290598290598, + "high_lr": 0.0007878947368421054, + "low_lr": 1.5757894736842107e-05, + "step": 403 + }, + { + "epoch": 1.0598290598290598, + "high_lr": 0.0007878947368421054, + "low_lr": 1.5757894736842107e-05, + "step": 403 + }, + { + "epoch": 1.0598290598290598, + "high_lr": 0.0007878947368421054, + "low_lr": 1.5757894736842107e-05, + "step": 403 + }, + { + "epoch": 1.0598290598290598, + "high_lr": 0.0007878947368421054, + "low_lr": 1.5757894736842107e-05, + "step": 403 + }, + { + "epoch": 1.0598290598290598, + "high_lr": 0.0007878947368421054, + "low_lr": 1.5757894736842107e-05, + "step": 403 + }, + { + "epoch": 1.0598290598290598, + "high_lr": 0.0007878947368421054, + "low_lr": 1.5757894736842107e-05, + "step": 403 + }, + { + "epoch": 1.0624589086127547, + "grad_norm": 0.9963755011558533, + "learning_rate": 0.0007873684210526317, + "loss": 1.5674, + "step": 404 + }, + { + "epoch": 1.0624589086127547, + "high_lr": 0.0007873684210526317, + "low_lr": 1.5747368421052635e-05, + "step": 404 + }, + { + "epoch": 1.0624589086127547, + "high_lr": 0.0007873684210526317, + "low_lr": 1.5747368421052635e-05, + "step": 404 + }, + { + "epoch": 1.0624589086127547, + "high_lr": 0.0007873684210526317, + "low_lr": 1.5747368421052635e-05, + "step": 404 + }, + { + "epoch": 1.0624589086127547, + "high_lr": 0.0007873684210526317, + "low_lr": 1.5747368421052635e-05, + "step": 404 + }, + { + "epoch": 1.0624589086127547, + "high_lr": 0.0007873684210526317, + "low_lr": 1.5747368421052635e-05, + "step": 404 + }, + { + "epoch": 1.0624589086127547, + "high_lr": 0.0007873684210526317, + "low_lr": 1.5747368421052635e-05, + "step": 404 + }, + { + "epoch": 1.0624589086127547, + "high_lr": 0.0007873684210526317, + "low_lr": 1.5747368421052635e-05, + "step": 404 + }, + { + "epoch": 1.0624589086127547, + "high_lr": 0.0007873684210526317, + "low_lr": 1.5747368421052635e-05, + "step": 404 + }, + { + "epoch": 1.0650887573964498, + "grad_norm": 0.9359927773475647, + "learning_rate": 0.0007868421052631579, + "loss": 1.4635, + "step": 405 + }, + { + "epoch": 1.0650887573964498, + "high_lr": 0.0007868421052631579, + "low_lr": 1.573684210526316e-05, + "step": 405 + }, + { + "epoch": 1.0650887573964498, + "high_lr": 0.0007868421052631579, + "low_lr": 1.573684210526316e-05, + "step": 405 + }, + { + "epoch": 1.0650887573964498, + "high_lr": 0.0007868421052631579, + "low_lr": 1.573684210526316e-05, + "step": 405 + }, + { + "epoch": 1.0650887573964498, + "high_lr": 0.0007868421052631579, + "low_lr": 1.573684210526316e-05, + "step": 405 + }, + { + "epoch": 1.0650887573964498, + "high_lr": 0.0007868421052631579, + "low_lr": 1.573684210526316e-05, + "step": 405 + }, + { + "epoch": 1.0650887573964498, + "high_lr": 0.0007868421052631579, + "low_lr": 1.573684210526316e-05, + "step": 405 + }, + { + "epoch": 1.0650887573964498, + "high_lr": 0.0007868421052631579, + "low_lr": 1.573684210526316e-05, + "step": 405 + }, + { + "epoch": 1.0650887573964498, + "high_lr": 0.0007868421052631579, + "low_lr": 1.573684210526316e-05, + "step": 405 + }, + { + "epoch": 1.0677186061801447, + "grad_norm": 0.9304128289222717, + "learning_rate": 0.0007863157894736842, + "loss": 1.4867, + "step": 406 + }, + { + "epoch": 1.0677186061801447, + "high_lr": 0.0007863157894736842, + "low_lr": 1.5726315789473684e-05, + "step": 406 + }, + { + "epoch": 1.0677186061801447, + "high_lr": 0.0007863157894736842, + "low_lr": 1.5726315789473684e-05, + "step": 406 + }, + { + "epoch": 1.0677186061801447, + "high_lr": 0.0007863157894736842, + "low_lr": 1.5726315789473684e-05, + "step": 406 + }, + { + "epoch": 1.0677186061801447, + "high_lr": 0.0007863157894736842, + "low_lr": 1.5726315789473684e-05, + "step": 406 + }, + { + "epoch": 1.0677186061801447, + "high_lr": 0.0007863157894736842, + "low_lr": 1.5726315789473684e-05, + "step": 406 + }, + { + "epoch": 1.0677186061801447, + "high_lr": 0.0007863157894736842, + "low_lr": 1.5726315789473684e-05, + "step": 406 + }, + { + "epoch": 1.0677186061801447, + "high_lr": 0.0007863157894736842, + "low_lr": 1.5726315789473684e-05, + "step": 406 + }, + { + "epoch": 1.0677186061801447, + "high_lr": 0.0007863157894736842, + "low_lr": 1.5726315789473684e-05, + "step": 406 + }, + { + "epoch": 1.0703484549638396, + "grad_norm": 0.9901525974273682, + "learning_rate": 0.0007857894736842105, + "loss": 1.5089, + "step": 407 + }, + { + "epoch": 1.0703484549638396, + "high_lr": 0.0007857894736842105, + "low_lr": 1.5715789473684212e-05, + "step": 407 + }, + { + "epoch": 1.0703484549638396, + "high_lr": 0.0007857894736842105, + "low_lr": 1.5715789473684212e-05, + "step": 407 + }, + { + "epoch": 1.0703484549638396, + "high_lr": 0.0007857894736842105, + "low_lr": 1.5715789473684212e-05, + "step": 407 + }, + { + "epoch": 1.0703484549638396, + "high_lr": 0.0007857894736842105, + "low_lr": 1.5715789473684212e-05, + "step": 407 + }, + { + "epoch": 1.0703484549638396, + "high_lr": 0.0007857894736842105, + "low_lr": 1.5715789473684212e-05, + "step": 407 + }, + { + "epoch": 1.0703484549638396, + "high_lr": 0.0007857894736842105, + "low_lr": 1.5715789473684212e-05, + "step": 407 + }, + { + "epoch": 1.0703484549638396, + "high_lr": 0.0007857894736842105, + "low_lr": 1.5715789473684212e-05, + "step": 407 + }, + { + "epoch": 1.0703484549638396, + "high_lr": 0.0007857894736842105, + "low_lr": 1.5715789473684212e-05, + "step": 407 + }, + { + "epoch": 1.0729783037475344, + "grad_norm": 1.1033786535263062, + "learning_rate": 0.0007852631578947369, + "loss": 1.487, + "step": 408 + }, + { + "epoch": 1.0729783037475344, + "high_lr": 0.0007852631578947369, + "low_lr": 1.570526315789474e-05, + "step": 408 + }, + { + "epoch": 1.0729783037475344, + "high_lr": 0.0007852631578947369, + "low_lr": 1.570526315789474e-05, + "step": 408 + }, + { + "epoch": 1.0729783037475344, + "high_lr": 0.0007852631578947369, + "low_lr": 1.570526315789474e-05, + "step": 408 + }, + { + "epoch": 1.0729783037475344, + "high_lr": 0.0007852631578947369, + "low_lr": 1.570526315789474e-05, + "step": 408 + }, + { + "epoch": 1.0729783037475344, + "high_lr": 0.0007852631578947369, + "low_lr": 1.570526315789474e-05, + "step": 408 + }, + { + "epoch": 1.0729783037475344, + "high_lr": 0.0007852631578947369, + "low_lr": 1.570526315789474e-05, + "step": 408 + }, + { + "epoch": 1.0729783037475344, + "high_lr": 0.0007852631578947369, + "low_lr": 1.570526315789474e-05, + "step": 408 + }, + { + "epoch": 1.0729783037475344, + "high_lr": 0.0007852631578947369, + "low_lr": 1.570526315789474e-05, + "step": 408 + }, + { + "epoch": 1.0756081525312295, + "grad_norm": 1.0218335390090942, + "learning_rate": 0.0007847368421052632, + "loss": 1.5137, + "step": 409 + }, + { + "epoch": 1.0756081525312295, + "high_lr": 0.0007847368421052632, + "low_lr": 1.5694736842105264e-05, + "step": 409 + }, + { + "epoch": 1.0756081525312295, + "high_lr": 0.0007847368421052632, + "low_lr": 1.5694736842105264e-05, + "step": 409 + }, + { + "epoch": 1.0756081525312295, + "high_lr": 0.0007847368421052632, + "low_lr": 1.5694736842105264e-05, + "step": 409 + }, + { + "epoch": 1.0756081525312295, + "high_lr": 0.0007847368421052632, + "low_lr": 1.5694736842105264e-05, + "step": 409 + }, + { + "epoch": 1.0756081525312295, + "high_lr": 0.0007847368421052632, + "low_lr": 1.5694736842105264e-05, + "step": 409 + }, + { + "epoch": 1.0756081525312295, + "high_lr": 0.0007847368421052632, + "low_lr": 1.5694736842105264e-05, + "step": 409 + }, + { + "epoch": 1.0756081525312295, + "high_lr": 0.0007847368421052632, + "low_lr": 1.5694736842105264e-05, + "step": 409 + }, + { + "epoch": 1.0756081525312295, + "high_lr": 0.0007847368421052632, + "low_lr": 1.5694736842105264e-05, + "step": 409 + }, + { + "epoch": 1.0782380013149244, + "grad_norm": 0.9986996054649353, + "learning_rate": 0.0007842105263157895, + "loss": 1.5205, + "step": 410 + }, + { + "epoch": 1.0782380013149244, + "high_lr": 0.0007842105263157895, + "low_lr": 1.568421052631579e-05, + "step": 410 + }, + { + "epoch": 1.0782380013149244, + "high_lr": 0.0007842105263157895, + "low_lr": 1.568421052631579e-05, + "step": 410 + }, + { + "epoch": 1.0782380013149244, + "high_lr": 0.0007842105263157895, + "low_lr": 1.568421052631579e-05, + "step": 410 + }, + { + "epoch": 1.0782380013149244, + "high_lr": 0.0007842105263157895, + "low_lr": 1.568421052631579e-05, + "step": 410 + }, + { + "epoch": 1.0782380013149244, + "high_lr": 0.0007842105263157895, + "low_lr": 1.568421052631579e-05, + "step": 410 + }, + { + "epoch": 1.0782380013149244, + "high_lr": 0.0007842105263157895, + "low_lr": 1.568421052631579e-05, + "step": 410 + }, + { + "epoch": 1.0782380013149244, + "high_lr": 0.0007842105263157895, + "low_lr": 1.568421052631579e-05, + "step": 410 + }, + { + "epoch": 1.0782380013149244, + "high_lr": 0.0007842105263157895, + "low_lr": 1.568421052631579e-05, + "step": 410 + }, + { + "epoch": 1.0808678500986193, + "grad_norm": 0.9301803112030029, + "learning_rate": 0.0007836842105263158, + "loss": 1.4571, + "step": 411 + }, + { + "epoch": 1.0808678500986193, + "high_lr": 0.0007836842105263158, + "low_lr": 1.5673684210526317e-05, + "step": 411 + }, + { + "epoch": 1.0808678500986193, + "high_lr": 0.0007836842105263158, + "low_lr": 1.5673684210526317e-05, + "step": 411 + }, + { + "epoch": 1.0808678500986193, + "high_lr": 0.0007836842105263158, + "low_lr": 1.5673684210526317e-05, + "step": 411 + }, + { + "epoch": 1.0808678500986193, + "high_lr": 0.0007836842105263158, + "low_lr": 1.5673684210526317e-05, + "step": 411 + }, + { + "epoch": 1.0808678500986193, + "high_lr": 0.0007836842105263158, + "low_lr": 1.5673684210526317e-05, + "step": 411 + }, + { + "epoch": 1.0808678500986193, + "high_lr": 0.0007836842105263158, + "low_lr": 1.5673684210526317e-05, + "step": 411 + }, + { + "epoch": 1.0808678500986193, + "high_lr": 0.0007836842105263158, + "low_lr": 1.5673684210526317e-05, + "step": 411 + }, + { + "epoch": 1.0808678500986193, + "high_lr": 0.0007836842105263158, + "low_lr": 1.5673684210526317e-05, + "step": 411 + }, + { + "epoch": 1.0834976988823142, + "grad_norm": 0.975524365901947, + "learning_rate": 0.000783157894736842, + "loss": 1.5262, + "step": 412 + }, + { + "epoch": 1.0834976988823142, + "high_lr": 0.000783157894736842, + "low_lr": 1.5663157894736842e-05, + "step": 412 + }, + { + "epoch": 1.0834976988823142, + "high_lr": 0.000783157894736842, + "low_lr": 1.5663157894736842e-05, + "step": 412 + }, + { + "epoch": 1.0834976988823142, + "high_lr": 0.000783157894736842, + "low_lr": 1.5663157894736842e-05, + "step": 412 + }, + { + "epoch": 1.0834976988823142, + "high_lr": 0.000783157894736842, + "low_lr": 1.5663157894736842e-05, + "step": 412 + }, + { + "epoch": 1.0834976988823142, + "high_lr": 0.000783157894736842, + "low_lr": 1.5663157894736842e-05, + "step": 412 + }, + { + "epoch": 1.0834976988823142, + "high_lr": 0.000783157894736842, + "low_lr": 1.5663157894736842e-05, + "step": 412 + }, + { + "epoch": 1.0834976988823142, + "high_lr": 0.000783157894736842, + "low_lr": 1.5663157894736842e-05, + "step": 412 + }, + { + "epoch": 1.0834976988823142, + "high_lr": 0.000783157894736842, + "low_lr": 1.5663157894736842e-05, + "step": 412 + }, + { + "epoch": 1.0861275476660093, + "grad_norm": 0.9938554167747498, + "learning_rate": 0.0007826315789473684, + "loss": 1.5412, + "step": 413 + }, + { + "epoch": 1.0861275476660093, + "high_lr": 0.0007826315789473684, + "low_lr": 1.565263157894737e-05, + "step": 413 + }, + { + "epoch": 1.0861275476660093, + "high_lr": 0.0007826315789473684, + "low_lr": 1.565263157894737e-05, + "step": 413 + }, + { + "epoch": 1.0861275476660093, + "high_lr": 0.0007826315789473684, + "low_lr": 1.565263157894737e-05, + "step": 413 + }, + { + "epoch": 1.0861275476660093, + "high_lr": 0.0007826315789473684, + "low_lr": 1.565263157894737e-05, + "step": 413 + }, + { + "epoch": 1.0861275476660093, + "high_lr": 0.0007826315789473684, + "low_lr": 1.565263157894737e-05, + "step": 413 + }, + { + "epoch": 1.0861275476660093, + "high_lr": 0.0007826315789473684, + "low_lr": 1.565263157894737e-05, + "step": 413 + }, + { + "epoch": 1.0861275476660093, + "high_lr": 0.0007826315789473684, + "low_lr": 1.565263157894737e-05, + "step": 413 + }, + { + "epoch": 1.0861275476660093, + "high_lr": 0.0007826315789473684, + "low_lr": 1.565263157894737e-05, + "step": 413 + }, + { + "epoch": 1.0887573964497042, + "grad_norm": 0.9773283004760742, + "learning_rate": 0.0007821052631578947, + "loss": 1.5134, + "step": 414 + }, + { + "epoch": 1.0887573964497042, + "high_lr": 0.0007821052631578947, + "low_lr": 1.5642105263157898e-05, + "step": 414 + }, + { + "epoch": 1.0887573964497042, + "high_lr": 0.0007821052631578947, + "low_lr": 1.5642105263157898e-05, + "step": 414 + }, + { + "epoch": 1.0887573964497042, + "high_lr": 0.0007821052631578947, + "low_lr": 1.5642105263157898e-05, + "step": 414 + }, + { + "epoch": 1.0887573964497042, + "high_lr": 0.0007821052631578947, + "low_lr": 1.5642105263157898e-05, + "step": 414 + }, + { + "epoch": 1.0887573964497042, + "high_lr": 0.0007821052631578947, + "low_lr": 1.5642105263157898e-05, + "step": 414 + }, + { + "epoch": 1.0887573964497042, + "high_lr": 0.0007821052631578947, + "low_lr": 1.5642105263157898e-05, + "step": 414 + }, + { + "epoch": 1.0887573964497042, + "high_lr": 0.0007821052631578947, + "low_lr": 1.5642105263157898e-05, + "step": 414 + }, + { + "epoch": 1.0887573964497042, + "high_lr": 0.0007821052631578947, + "low_lr": 1.5642105263157898e-05, + "step": 414 + }, + { + "epoch": 1.091387245233399, + "grad_norm": 1.018235206604004, + "learning_rate": 0.000781578947368421, + "loss": 1.5264, + "step": 415 + }, + { + "epoch": 1.091387245233399, + "high_lr": 0.000781578947368421, + "low_lr": 1.5631578947368422e-05, + "step": 415 + }, + { + "epoch": 1.091387245233399, + "high_lr": 0.000781578947368421, + "low_lr": 1.5631578947368422e-05, + "step": 415 + }, + { + "epoch": 1.091387245233399, + "high_lr": 0.000781578947368421, + "low_lr": 1.5631578947368422e-05, + "step": 415 + }, + { + "epoch": 1.091387245233399, + "high_lr": 0.000781578947368421, + "low_lr": 1.5631578947368422e-05, + "step": 415 + }, + { + "epoch": 1.091387245233399, + "high_lr": 0.000781578947368421, + "low_lr": 1.5631578947368422e-05, + "step": 415 + }, + { + "epoch": 1.091387245233399, + "high_lr": 0.000781578947368421, + "low_lr": 1.5631578947368422e-05, + "step": 415 + }, + { + "epoch": 1.091387245233399, + "high_lr": 0.000781578947368421, + "low_lr": 1.5631578947368422e-05, + "step": 415 + }, + { + "epoch": 1.091387245233399, + "high_lr": 0.000781578947368421, + "low_lr": 1.5631578947368422e-05, + "step": 415 + }, + { + "epoch": 1.0940170940170941, + "grad_norm": 0.9918999671936035, + "learning_rate": 0.0007810526315789473, + "loss": 1.5063, + "step": 416 + }, + { + "epoch": 1.0940170940170941, + "high_lr": 0.0007810526315789473, + "low_lr": 1.5621052631578947e-05, + "step": 416 + }, + { + "epoch": 1.0940170940170941, + "high_lr": 0.0007810526315789473, + "low_lr": 1.5621052631578947e-05, + "step": 416 + }, + { + "epoch": 1.0940170940170941, + "high_lr": 0.0007810526315789473, + "low_lr": 1.5621052631578947e-05, + "step": 416 + }, + { + "epoch": 1.0940170940170941, + "high_lr": 0.0007810526315789473, + "low_lr": 1.5621052631578947e-05, + "step": 416 + }, + { + "epoch": 1.0940170940170941, + "high_lr": 0.0007810526315789473, + "low_lr": 1.5621052631578947e-05, + "step": 416 + }, + { + "epoch": 1.0940170940170941, + "high_lr": 0.0007810526315789473, + "low_lr": 1.5621052631578947e-05, + "step": 416 + }, + { + "epoch": 1.0940170940170941, + "high_lr": 0.0007810526315789473, + "low_lr": 1.5621052631578947e-05, + "step": 416 + }, + { + "epoch": 1.0940170940170941, + "high_lr": 0.0007810526315789473, + "low_lr": 1.5621052631578947e-05, + "step": 416 + }, + { + "epoch": 1.096646942800789, + "grad_norm": 0.954351007938385, + "learning_rate": 0.0007805263157894737, + "loss": 1.5365, + "step": 417 + }, + { + "epoch": 1.096646942800789, + "high_lr": 0.0007805263157894737, + "low_lr": 1.5610526315789475e-05, + "step": 417 + }, + { + "epoch": 1.096646942800789, + "high_lr": 0.0007805263157894737, + "low_lr": 1.5610526315789475e-05, + "step": 417 + }, + { + "epoch": 1.096646942800789, + "high_lr": 0.0007805263157894737, + "low_lr": 1.5610526315789475e-05, + "step": 417 + }, + { + "epoch": 1.096646942800789, + "high_lr": 0.0007805263157894737, + "low_lr": 1.5610526315789475e-05, + "step": 417 + }, + { + "epoch": 1.096646942800789, + "high_lr": 0.0007805263157894737, + "low_lr": 1.5610526315789475e-05, + "step": 417 + }, + { + "epoch": 1.096646942800789, + "high_lr": 0.0007805263157894737, + "low_lr": 1.5610526315789475e-05, + "step": 417 + }, + { + "epoch": 1.096646942800789, + "high_lr": 0.0007805263157894737, + "low_lr": 1.5610526315789475e-05, + "step": 417 + }, + { + "epoch": 1.096646942800789, + "high_lr": 0.0007805263157894737, + "low_lr": 1.5610526315789475e-05, + "step": 417 + }, + { + "epoch": 1.0992767915844839, + "grad_norm": 0.9554334282875061, + "learning_rate": 0.0007800000000000001, + "loss": 1.4601, + "step": 418 + }, + { + "epoch": 1.0992767915844839, + "high_lr": 0.0007800000000000001, + "low_lr": 1.5600000000000003e-05, + "step": 418 + }, + { + "epoch": 1.0992767915844839, + "high_lr": 0.0007800000000000001, + "low_lr": 1.5600000000000003e-05, + "step": 418 + }, + { + "epoch": 1.0992767915844839, + "high_lr": 0.0007800000000000001, + "low_lr": 1.5600000000000003e-05, + "step": 418 + }, + { + "epoch": 1.0992767915844839, + "high_lr": 0.0007800000000000001, + "low_lr": 1.5600000000000003e-05, + "step": 418 + }, + { + "epoch": 1.0992767915844839, + "high_lr": 0.0007800000000000001, + "low_lr": 1.5600000000000003e-05, + "step": 418 + }, + { + "epoch": 1.0992767915844839, + "high_lr": 0.0007800000000000001, + "low_lr": 1.5600000000000003e-05, + "step": 418 + }, + { + "epoch": 1.0992767915844839, + "high_lr": 0.0007800000000000001, + "low_lr": 1.5600000000000003e-05, + "step": 418 + }, + { + "epoch": 1.0992767915844839, + "high_lr": 0.0007800000000000001, + "low_lr": 1.5600000000000003e-05, + "step": 418 + }, + { + "epoch": 1.1019066403681788, + "grad_norm": 0.938503086566925, + "learning_rate": 0.0007794736842105264, + "loss": 1.5218, + "step": 419 + }, + { + "epoch": 1.1019066403681788, + "high_lr": 0.0007794736842105264, + "low_lr": 1.5589473684210528e-05, + "step": 419 + }, + { + "epoch": 1.1019066403681788, + "high_lr": 0.0007794736842105264, + "low_lr": 1.5589473684210528e-05, + "step": 419 + }, + { + "epoch": 1.1019066403681788, + "high_lr": 0.0007794736842105264, + "low_lr": 1.5589473684210528e-05, + "step": 419 + }, + { + "epoch": 1.1019066403681788, + "high_lr": 0.0007794736842105264, + "low_lr": 1.5589473684210528e-05, + "step": 419 + }, + { + "epoch": 1.1019066403681788, + "high_lr": 0.0007794736842105264, + "low_lr": 1.5589473684210528e-05, + "step": 419 + }, + { + "epoch": 1.1019066403681788, + "high_lr": 0.0007794736842105264, + "low_lr": 1.5589473684210528e-05, + "step": 419 + }, + { + "epoch": 1.1019066403681788, + "high_lr": 0.0007794736842105264, + "low_lr": 1.5589473684210528e-05, + "step": 419 + }, + { + "epoch": 1.1019066403681788, + "high_lr": 0.0007794736842105264, + "low_lr": 1.5589473684210528e-05, + "step": 419 + }, + { + "epoch": 1.1045364891518739, + "grad_norm": 1.0199605226516724, + "learning_rate": 0.0007789473684210527, + "loss": 1.5454, + "step": 420 + }, + { + "epoch": 1.1045364891518739, + "high_lr": 0.0007789473684210527, + "low_lr": 1.5578947368421052e-05, + "step": 420 + }, + { + "epoch": 1.1045364891518739, + "high_lr": 0.0007789473684210527, + "low_lr": 1.5578947368421052e-05, + "step": 420 + }, + { + "epoch": 1.1045364891518739, + "high_lr": 0.0007789473684210527, + "low_lr": 1.5578947368421052e-05, + "step": 420 + }, + { + "epoch": 1.1045364891518739, + "high_lr": 0.0007789473684210527, + "low_lr": 1.5578947368421052e-05, + "step": 420 + }, + { + "epoch": 1.1045364891518739, + "high_lr": 0.0007789473684210527, + "low_lr": 1.5578947368421052e-05, + "step": 420 + }, + { + "epoch": 1.1045364891518739, + "high_lr": 0.0007789473684210527, + "low_lr": 1.5578947368421052e-05, + "step": 420 + }, + { + "epoch": 1.1045364891518739, + "high_lr": 0.0007789473684210527, + "low_lr": 1.5578947368421052e-05, + "step": 420 + }, + { + "epoch": 1.1045364891518739, + "high_lr": 0.0007789473684210527, + "low_lr": 1.5578947368421052e-05, + "step": 420 + }, + { + "epoch": 1.1071663379355687, + "grad_norm": 0.986962080001831, + "learning_rate": 0.000778421052631579, + "loss": 1.5153, + "step": 421 + }, + { + "epoch": 1.1071663379355687, + "high_lr": 0.000778421052631579, + "low_lr": 1.556842105263158e-05, + "step": 421 + }, + { + "epoch": 1.1071663379355687, + "high_lr": 0.000778421052631579, + "low_lr": 1.556842105263158e-05, + "step": 421 + }, + { + "epoch": 1.1071663379355687, + "high_lr": 0.000778421052631579, + "low_lr": 1.556842105263158e-05, + "step": 421 + }, + { + "epoch": 1.1071663379355687, + "high_lr": 0.000778421052631579, + "low_lr": 1.556842105263158e-05, + "step": 421 + }, + { + "epoch": 1.1071663379355687, + "high_lr": 0.000778421052631579, + "low_lr": 1.556842105263158e-05, + "step": 421 + }, + { + "epoch": 1.1071663379355687, + "high_lr": 0.000778421052631579, + "low_lr": 1.556842105263158e-05, + "step": 421 + }, + { + "epoch": 1.1071663379355687, + "high_lr": 0.000778421052631579, + "low_lr": 1.556842105263158e-05, + "step": 421 + }, + { + "epoch": 1.1071663379355687, + "high_lr": 0.000778421052631579, + "low_lr": 1.556842105263158e-05, + "step": 421 + }, + { + "epoch": 1.1097961867192636, + "grad_norm": 0.9597830176353455, + "learning_rate": 0.0007778947368421053, + "loss": 1.444, + "step": 422 + }, + { + "epoch": 1.1097961867192636, + "high_lr": 0.0007778947368421053, + "low_lr": 1.555789473684211e-05, + "step": 422 + }, + { + "epoch": 1.1097961867192636, + "high_lr": 0.0007778947368421053, + "low_lr": 1.555789473684211e-05, + "step": 422 + }, + { + "epoch": 1.1097961867192636, + "high_lr": 0.0007778947368421053, + "low_lr": 1.555789473684211e-05, + "step": 422 + }, + { + "epoch": 1.1097961867192636, + "high_lr": 0.0007778947368421053, + "low_lr": 1.555789473684211e-05, + "step": 422 + }, + { + "epoch": 1.1097961867192636, + "high_lr": 0.0007778947368421053, + "low_lr": 1.555789473684211e-05, + "step": 422 + }, + { + "epoch": 1.1097961867192636, + "high_lr": 0.0007778947368421053, + "low_lr": 1.555789473684211e-05, + "step": 422 + }, + { + "epoch": 1.1097961867192636, + "high_lr": 0.0007778947368421053, + "low_lr": 1.555789473684211e-05, + "step": 422 + }, + { + "epoch": 1.1097961867192636, + "high_lr": 0.0007778947368421053, + "low_lr": 1.555789473684211e-05, + "step": 422 + }, + { + "epoch": 1.1124260355029585, + "grad_norm": 0.9897134900093079, + "learning_rate": 0.0007773684210526316, + "loss": 1.4764, + "step": 423 + }, + { + "epoch": 1.1124260355029585, + "high_lr": 0.0007773684210526316, + "low_lr": 1.5547368421052633e-05, + "step": 423 + }, + { + "epoch": 1.1124260355029585, + "high_lr": 0.0007773684210526316, + "low_lr": 1.5547368421052633e-05, + "step": 423 + }, + { + "epoch": 1.1124260355029585, + "high_lr": 0.0007773684210526316, + "low_lr": 1.5547368421052633e-05, + "step": 423 + }, + { + "epoch": 1.1124260355029585, + "high_lr": 0.0007773684210526316, + "low_lr": 1.5547368421052633e-05, + "step": 423 + }, + { + "epoch": 1.1124260355029585, + "high_lr": 0.0007773684210526316, + "low_lr": 1.5547368421052633e-05, + "step": 423 + }, + { + "epoch": 1.1124260355029585, + "high_lr": 0.0007773684210526316, + "low_lr": 1.5547368421052633e-05, + "step": 423 + }, + { + "epoch": 1.1124260355029585, + "high_lr": 0.0007773684210526316, + "low_lr": 1.5547368421052633e-05, + "step": 423 + }, + { + "epoch": 1.1124260355029585, + "high_lr": 0.0007773684210526316, + "low_lr": 1.5547368421052633e-05, + "step": 423 + }, + { + "epoch": 1.1150558842866536, + "grad_norm": 1.0836101770401, + "learning_rate": 0.0007768421052631579, + "loss": 1.5184, + "step": 424 + }, + { + "epoch": 1.1150558842866536, + "high_lr": 0.0007768421052631579, + "low_lr": 1.5536842105263158e-05, + "step": 424 + }, + { + "epoch": 1.1150558842866536, + "high_lr": 0.0007768421052631579, + "low_lr": 1.5536842105263158e-05, + "step": 424 + }, + { + "epoch": 1.1150558842866536, + "high_lr": 0.0007768421052631579, + "low_lr": 1.5536842105263158e-05, + "step": 424 + }, + { + "epoch": 1.1150558842866536, + "high_lr": 0.0007768421052631579, + "low_lr": 1.5536842105263158e-05, + "step": 424 + }, + { + "epoch": 1.1150558842866536, + "high_lr": 0.0007768421052631579, + "low_lr": 1.5536842105263158e-05, + "step": 424 + }, + { + "epoch": 1.1150558842866536, + "high_lr": 0.0007768421052631579, + "low_lr": 1.5536842105263158e-05, + "step": 424 + }, + { + "epoch": 1.1150558842866536, + "high_lr": 0.0007768421052631579, + "low_lr": 1.5536842105263158e-05, + "step": 424 + }, + { + "epoch": 1.1150558842866536, + "high_lr": 0.0007768421052631579, + "low_lr": 1.5536842105263158e-05, + "step": 424 + }, + { + "epoch": 1.1176857330703485, + "grad_norm": 1.0581777095794678, + "learning_rate": 0.0007763157894736842, + "loss": 1.5481, + "step": 425 + }, + { + "epoch": 1.1176857330703485, + "high_lr": 0.0007763157894736842, + "low_lr": 1.5526315789473686e-05, + "step": 425 + }, + { + "epoch": 1.1176857330703485, + "high_lr": 0.0007763157894736842, + "low_lr": 1.5526315789473686e-05, + "step": 425 + }, + { + "epoch": 1.1176857330703485, + "high_lr": 0.0007763157894736842, + "low_lr": 1.5526315789473686e-05, + "step": 425 + }, + { + "epoch": 1.1176857330703485, + "high_lr": 0.0007763157894736842, + "low_lr": 1.5526315789473686e-05, + "step": 425 + }, + { + "epoch": 1.1176857330703485, + "high_lr": 0.0007763157894736842, + "low_lr": 1.5526315789473686e-05, + "step": 425 + }, + { + "epoch": 1.1176857330703485, + "high_lr": 0.0007763157894736842, + "low_lr": 1.5526315789473686e-05, + "step": 425 + }, + { + "epoch": 1.1176857330703485, + "high_lr": 0.0007763157894736842, + "low_lr": 1.5526315789473686e-05, + "step": 425 + }, + { + "epoch": 1.1176857330703485, + "high_lr": 0.0007763157894736842, + "low_lr": 1.5526315789473686e-05, + "step": 425 + }, + { + "epoch": 1.1203155818540433, + "grad_norm": 1.011803150177002, + "learning_rate": 0.0007757894736842105, + "loss": 1.5031, + "step": 426 + }, + { + "epoch": 1.1203155818540433, + "high_lr": 0.0007757894736842105, + "low_lr": 1.551578947368421e-05, + "step": 426 + }, + { + "epoch": 1.1203155818540433, + "high_lr": 0.0007757894736842105, + "low_lr": 1.551578947368421e-05, + "step": 426 + }, + { + "epoch": 1.1203155818540433, + "high_lr": 0.0007757894736842105, + "low_lr": 1.551578947368421e-05, + "step": 426 + }, + { + "epoch": 1.1203155818540433, + "high_lr": 0.0007757894736842105, + "low_lr": 1.551578947368421e-05, + "step": 426 + }, + { + "epoch": 1.1203155818540433, + "high_lr": 0.0007757894736842105, + "low_lr": 1.551578947368421e-05, + "step": 426 + }, + { + "epoch": 1.1203155818540433, + "high_lr": 0.0007757894736842105, + "low_lr": 1.551578947368421e-05, + "step": 426 + }, + { + "epoch": 1.1203155818540433, + "high_lr": 0.0007757894736842105, + "low_lr": 1.551578947368421e-05, + "step": 426 + }, + { + "epoch": 1.1203155818540433, + "high_lr": 0.0007757894736842105, + "low_lr": 1.551578947368421e-05, + "step": 426 + }, + { + "epoch": 1.1229454306377384, + "grad_norm": 0.9950527548789978, + "learning_rate": 0.0007752631578947369, + "loss": 1.526, + "step": 427 + }, + { + "epoch": 1.1229454306377384, + "high_lr": 0.0007752631578947369, + "low_lr": 1.550526315789474e-05, + "step": 427 + }, + { + "epoch": 1.1229454306377384, + "high_lr": 0.0007752631578947369, + "low_lr": 1.550526315789474e-05, + "step": 427 + }, + { + "epoch": 1.1229454306377384, + "high_lr": 0.0007752631578947369, + "low_lr": 1.550526315789474e-05, + "step": 427 + }, + { + "epoch": 1.1229454306377384, + "high_lr": 0.0007752631578947369, + "low_lr": 1.550526315789474e-05, + "step": 427 + }, + { + "epoch": 1.1229454306377384, + "high_lr": 0.0007752631578947369, + "low_lr": 1.550526315789474e-05, + "step": 427 + }, + { + "epoch": 1.1229454306377384, + "high_lr": 0.0007752631578947369, + "low_lr": 1.550526315789474e-05, + "step": 427 + }, + { + "epoch": 1.1229454306377384, + "high_lr": 0.0007752631578947369, + "low_lr": 1.550526315789474e-05, + "step": 427 + }, + { + "epoch": 1.1229454306377384, + "high_lr": 0.0007752631578947369, + "low_lr": 1.550526315789474e-05, + "step": 427 + }, + { + "epoch": 1.1255752794214333, + "grad_norm": 1.0123891830444336, + "learning_rate": 0.0007747368421052632, + "loss": 1.5468, + "step": 428 + }, + { + "epoch": 1.1255752794214333, + "high_lr": 0.0007747368421052632, + "low_lr": 1.5494736842105263e-05, + "step": 428 + }, + { + "epoch": 1.1255752794214333, + "high_lr": 0.0007747368421052632, + "low_lr": 1.5494736842105263e-05, + "step": 428 + }, + { + "epoch": 1.1255752794214333, + "high_lr": 0.0007747368421052632, + "low_lr": 1.5494736842105263e-05, + "step": 428 + }, + { + "epoch": 1.1255752794214333, + "high_lr": 0.0007747368421052632, + "low_lr": 1.5494736842105263e-05, + "step": 428 + }, + { + "epoch": 1.1255752794214333, + "high_lr": 0.0007747368421052632, + "low_lr": 1.5494736842105263e-05, + "step": 428 + }, + { + "epoch": 1.1255752794214333, + "high_lr": 0.0007747368421052632, + "low_lr": 1.5494736842105263e-05, + "step": 428 + }, + { + "epoch": 1.1255752794214333, + "high_lr": 0.0007747368421052632, + "low_lr": 1.5494736842105263e-05, + "step": 428 + }, + { + "epoch": 1.1255752794214333, + "high_lr": 0.0007747368421052632, + "low_lr": 1.5494736842105263e-05, + "step": 428 + }, + { + "epoch": 1.1282051282051282, + "grad_norm": 1.0693731307983398, + "learning_rate": 0.0007742105263157895, + "loss": 1.4981, + "step": 429 + }, + { + "epoch": 1.1282051282051282, + "high_lr": 0.0007742105263157895, + "low_lr": 1.548421052631579e-05, + "step": 429 + }, + { + "epoch": 1.1282051282051282, + "high_lr": 0.0007742105263157895, + "low_lr": 1.548421052631579e-05, + "step": 429 + }, + { + "epoch": 1.1282051282051282, + "high_lr": 0.0007742105263157895, + "low_lr": 1.548421052631579e-05, + "step": 429 + }, + { + "epoch": 1.1282051282051282, + "high_lr": 0.0007742105263157895, + "low_lr": 1.548421052631579e-05, + "step": 429 + }, + { + "epoch": 1.1282051282051282, + "high_lr": 0.0007742105263157895, + "low_lr": 1.548421052631579e-05, + "step": 429 + }, + { + "epoch": 1.1282051282051282, + "high_lr": 0.0007742105263157895, + "low_lr": 1.548421052631579e-05, + "step": 429 + }, + { + "epoch": 1.1282051282051282, + "high_lr": 0.0007742105263157895, + "low_lr": 1.548421052631579e-05, + "step": 429 + }, + { + "epoch": 1.1282051282051282, + "high_lr": 0.0007742105263157895, + "low_lr": 1.548421052631579e-05, + "step": 429 + }, + { + "epoch": 1.130834976988823, + "grad_norm": 1.0787478685379028, + "learning_rate": 0.0007736842105263157, + "loss": 1.5177, + "step": 430 + }, + { + "epoch": 1.130834976988823, + "high_lr": 0.0007736842105263157, + "low_lr": 1.5473684210526316e-05, + "step": 430 + }, + { + "epoch": 1.130834976988823, + "high_lr": 0.0007736842105263157, + "low_lr": 1.5473684210526316e-05, + "step": 430 + }, + { + "epoch": 1.130834976988823, + "high_lr": 0.0007736842105263157, + "low_lr": 1.5473684210526316e-05, + "step": 430 + }, + { + "epoch": 1.130834976988823, + "high_lr": 0.0007736842105263157, + "low_lr": 1.5473684210526316e-05, + "step": 430 + }, + { + "epoch": 1.130834976988823, + "high_lr": 0.0007736842105263157, + "low_lr": 1.5473684210526316e-05, + "step": 430 + }, + { + "epoch": 1.130834976988823, + "high_lr": 0.0007736842105263157, + "low_lr": 1.5473684210526316e-05, + "step": 430 + }, + { + "epoch": 1.130834976988823, + "high_lr": 0.0007736842105263157, + "low_lr": 1.5473684210526316e-05, + "step": 430 + }, + { + "epoch": 1.130834976988823, + "high_lr": 0.0007736842105263157, + "low_lr": 1.5473684210526316e-05, + "step": 430 + }, + { + "epoch": 1.1334648257725182, + "grad_norm": 1.0918725728988647, + "learning_rate": 0.0007731578947368421, + "loss": 1.5557, + "step": 431 + }, + { + "epoch": 1.1334648257725182, + "high_lr": 0.0007731578947368421, + "low_lr": 1.5463157894736844e-05, + "step": 431 + }, + { + "epoch": 1.1334648257725182, + "high_lr": 0.0007731578947368421, + "low_lr": 1.5463157894736844e-05, + "step": 431 + }, + { + "epoch": 1.1334648257725182, + "high_lr": 0.0007731578947368421, + "low_lr": 1.5463157894736844e-05, + "step": 431 + }, + { + "epoch": 1.1334648257725182, + "high_lr": 0.0007731578947368421, + "low_lr": 1.5463157894736844e-05, + "step": 431 + }, + { + "epoch": 1.1334648257725182, + "high_lr": 0.0007731578947368421, + "low_lr": 1.5463157894736844e-05, + "step": 431 + }, + { + "epoch": 1.1334648257725182, + "high_lr": 0.0007731578947368421, + "low_lr": 1.5463157894736844e-05, + "step": 431 + }, + { + "epoch": 1.1334648257725182, + "high_lr": 0.0007731578947368421, + "low_lr": 1.5463157894736844e-05, + "step": 431 + }, + { + "epoch": 1.1334648257725182, + "high_lr": 0.0007731578947368421, + "low_lr": 1.5463157894736844e-05, + "step": 431 + }, + { + "epoch": 1.136094674556213, + "grad_norm": 1.0071529150009155, + "learning_rate": 0.0007726315789473684, + "loss": 1.5034, + "step": 432 + }, + { + "epoch": 1.136094674556213, + "high_lr": 0.0007726315789473684, + "low_lr": 1.545263157894737e-05, + "step": 432 + }, + { + "epoch": 1.136094674556213, + "high_lr": 0.0007726315789473684, + "low_lr": 1.545263157894737e-05, + "step": 432 + }, + { + "epoch": 1.136094674556213, + "high_lr": 0.0007726315789473684, + "low_lr": 1.545263157894737e-05, + "step": 432 + }, + { + "epoch": 1.136094674556213, + "high_lr": 0.0007726315789473684, + "low_lr": 1.545263157894737e-05, + "step": 432 + }, + { + "epoch": 1.136094674556213, + "high_lr": 0.0007726315789473684, + "low_lr": 1.545263157894737e-05, + "step": 432 + }, + { + "epoch": 1.136094674556213, + "high_lr": 0.0007726315789473684, + "low_lr": 1.545263157894737e-05, + "step": 432 + }, + { + "epoch": 1.136094674556213, + "high_lr": 0.0007726315789473684, + "low_lr": 1.545263157894737e-05, + "step": 432 + }, + { + "epoch": 1.136094674556213, + "high_lr": 0.0007726315789473684, + "low_lr": 1.545263157894737e-05, + "step": 432 + }, + { + "epoch": 1.138724523339908, + "grad_norm": 1.2867735624313354, + "learning_rate": 0.0007721052631578947, + "loss": 1.5102, + "step": 433 + }, + { + "epoch": 1.138724523339908, + "high_lr": 0.0007721052631578947, + "low_lr": 1.5442105263157896e-05, + "step": 433 + }, + { + "epoch": 1.138724523339908, + "high_lr": 0.0007721052631578947, + "low_lr": 1.5442105263157896e-05, + "step": 433 + }, + { + "epoch": 1.138724523339908, + "high_lr": 0.0007721052631578947, + "low_lr": 1.5442105263157896e-05, + "step": 433 + }, + { + "epoch": 1.138724523339908, + "high_lr": 0.0007721052631578947, + "low_lr": 1.5442105263157896e-05, + "step": 433 + }, + { + "epoch": 1.138724523339908, + "high_lr": 0.0007721052631578947, + "low_lr": 1.5442105263157896e-05, + "step": 433 + }, + { + "epoch": 1.138724523339908, + "high_lr": 0.0007721052631578947, + "low_lr": 1.5442105263157896e-05, + "step": 433 + }, + { + "epoch": 1.138724523339908, + "high_lr": 0.0007721052631578947, + "low_lr": 1.5442105263157896e-05, + "step": 433 + }, + { + "epoch": 1.138724523339908, + "high_lr": 0.0007721052631578947, + "low_lr": 1.5442105263157896e-05, + "step": 433 + }, + { + "epoch": 1.1413543721236028, + "grad_norm": 1.1004258394241333, + "learning_rate": 0.000771578947368421, + "loss": 1.4904, + "step": 434 + }, + { + "epoch": 1.1413543721236028, + "high_lr": 0.000771578947368421, + "low_lr": 1.543157894736842e-05, + "step": 434 + }, + { + "epoch": 1.1413543721236028, + "high_lr": 0.000771578947368421, + "low_lr": 1.543157894736842e-05, + "step": 434 + }, + { + "epoch": 1.1413543721236028, + "high_lr": 0.000771578947368421, + "low_lr": 1.543157894736842e-05, + "step": 434 + }, + { + "epoch": 1.1413543721236028, + "high_lr": 0.000771578947368421, + "low_lr": 1.543157894736842e-05, + "step": 434 + }, + { + "epoch": 1.1413543721236028, + "high_lr": 0.000771578947368421, + "low_lr": 1.543157894736842e-05, + "step": 434 + }, + { + "epoch": 1.1413543721236028, + "high_lr": 0.000771578947368421, + "low_lr": 1.543157894736842e-05, + "step": 434 + }, + { + "epoch": 1.1413543721236028, + "high_lr": 0.000771578947368421, + "low_lr": 1.543157894736842e-05, + "step": 434 + }, + { + "epoch": 1.1413543721236028, + "high_lr": 0.000771578947368421, + "low_lr": 1.543157894736842e-05, + "step": 434 + }, + { + "epoch": 1.143984220907298, + "grad_norm": 1.057671070098877, + "learning_rate": 0.0007710526315789474, + "loss": 1.5217, + "step": 435 + }, + { + "epoch": 1.143984220907298, + "high_lr": 0.0007710526315789474, + "low_lr": 1.542105263157895e-05, + "step": 435 + }, + { + "epoch": 1.143984220907298, + "high_lr": 0.0007710526315789474, + "low_lr": 1.542105263157895e-05, + "step": 435 + }, + { + "epoch": 1.143984220907298, + "high_lr": 0.0007710526315789474, + "low_lr": 1.542105263157895e-05, + "step": 435 + }, + { + "epoch": 1.143984220907298, + "high_lr": 0.0007710526315789474, + "low_lr": 1.542105263157895e-05, + "step": 435 + }, + { + "epoch": 1.143984220907298, + "high_lr": 0.0007710526315789474, + "low_lr": 1.542105263157895e-05, + "step": 435 + }, + { + "epoch": 1.143984220907298, + "high_lr": 0.0007710526315789474, + "low_lr": 1.542105263157895e-05, + "step": 435 + }, + { + "epoch": 1.143984220907298, + "high_lr": 0.0007710526315789474, + "low_lr": 1.542105263157895e-05, + "step": 435 + }, + { + "epoch": 1.143984220907298, + "high_lr": 0.0007710526315789474, + "low_lr": 1.542105263157895e-05, + "step": 435 + }, + { + "epoch": 1.1466140696909928, + "grad_norm": 1.0055783987045288, + "learning_rate": 0.0007705263157894738, + "loss": 1.5086, + "step": 436 + }, + { + "epoch": 1.1466140696909928, + "high_lr": 0.0007705263157894738, + "low_lr": 1.5410526315789477e-05, + "step": 436 + }, + { + "epoch": 1.1466140696909928, + "high_lr": 0.0007705263157894738, + "low_lr": 1.5410526315789477e-05, + "step": 436 + }, + { + "epoch": 1.1466140696909928, + "high_lr": 0.0007705263157894738, + "low_lr": 1.5410526315789477e-05, + "step": 436 + }, + { + "epoch": 1.1466140696909928, + "high_lr": 0.0007705263157894738, + "low_lr": 1.5410526315789477e-05, + "step": 436 + }, + { + "epoch": 1.1466140696909928, + "high_lr": 0.0007705263157894738, + "low_lr": 1.5410526315789477e-05, + "step": 436 + }, + { + "epoch": 1.1466140696909928, + "high_lr": 0.0007705263157894738, + "low_lr": 1.5410526315789477e-05, + "step": 436 + }, + { + "epoch": 1.1466140696909928, + "high_lr": 0.0007705263157894738, + "low_lr": 1.5410526315789477e-05, + "step": 436 + }, + { + "epoch": 1.1466140696909928, + "high_lr": 0.0007705263157894738, + "low_lr": 1.5410526315789477e-05, + "step": 436 + }, + { + "epoch": 1.1492439184746877, + "grad_norm": 1.0507538318634033, + "learning_rate": 0.0007700000000000001, + "loss": 1.5008, + "step": 437 + }, + { + "epoch": 1.1492439184746877, + "high_lr": 0.0007700000000000001, + "low_lr": 1.54e-05, + "step": 437 + }, + { + "epoch": 1.1492439184746877, + "high_lr": 0.0007700000000000001, + "low_lr": 1.54e-05, + "step": 437 + }, + { + "epoch": 1.1492439184746877, + "high_lr": 0.0007700000000000001, + "low_lr": 1.54e-05, + "step": 437 + }, + { + "epoch": 1.1492439184746877, + "high_lr": 0.0007700000000000001, + "low_lr": 1.54e-05, + "step": 437 + }, + { + "epoch": 1.1492439184746877, + "high_lr": 0.0007700000000000001, + "low_lr": 1.54e-05, + "step": 437 + }, + { + "epoch": 1.1492439184746877, + "high_lr": 0.0007700000000000001, + "low_lr": 1.54e-05, + "step": 437 + }, + { + "epoch": 1.1492439184746877, + "high_lr": 0.0007700000000000001, + "low_lr": 1.54e-05, + "step": 437 + }, + { + "epoch": 1.1492439184746877, + "high_lr": 0.0007700000000000001, + "low_lr": 1.54e-05, + "step": 437 + }, + { + "epoch": 1.1518737672583828, + "grad_norm": 0.9970671534538269, + "learning_rate": 0.0007694736842105264, + "loss": 1.477, + "step": 438 + }, + { + "epoch": 1.1518737672583828, + "high_lr": 0.0007694736842105264, + "low_lr": 1.5389473684210526e-05, + "step": 438 + }, + { + "epoch": 1.1518737672583828, + "high_lr": 0.0007694736842105264, + "low_lr": 1.5389473684210526e-05, + "step": 438 + }, + { + "epoch": 1.1518737672583828, + "high_lr": 0.0007694736842105264, + "low_lr": 1.5389473684210526e-05, + "step": 438 + }, + { + "epoch": 1.1518737672583828, + "high_lr": 0.0007694736842105264, + "low_lr": 1.5389473684210526e-05, + "step": 438 + }, + { + "epoch": 1.1518737672583828, + "high_lr": 0.0007694736842105264, + "low_lr": 1.5389473684210526e-05, + "step": 438 + }, + { + "epoch": 1.1518737672583828, + "high_lr": 0.0007694736842105264, + "low_lr": 1.5389473684210526e-05, + "step": 438 + }, + { + "epoch": 1.1518737672583828, + "high_lr": 0.0007694736842105264, + "low_lr": 1.5389473684210526e-05, + "step": 438 + }, + { + "epoch": 1.1518737672583828, + "high_lr": 0.0007694736842105264, + "low_lr": 1.5389473684210526e-05, + "step": 438 + }, + { + "epoch": 1.1545036160420776, + "grad_norm": 1.0109833478927612, + "learning_rate": 0.0007689473684210526, + "loss": 1.5215, + "step": 439 + }, + { + "epoch": 1.1545036160420776, + "high_lr": 0.0007689473684210526, + "low_lr": 1.5378947368421054e-05, + "step": 439 + }, + { + "epoch": 1.1545036160420776, + "high_lr": 0.0007689473684210526, + "low_lr": 1.5378947368421054e-05, + "step": 439 + }, + { + "epoch": 1.1545036160420776, + "high_lr": 0.0007689473684210526, + "low_lr": 1.5378947368421054e-05, + "step": 439 + }, + { + "epoch": 1.1545036160420776, + "high_lr": 0.0007689473684210526, + "low_lr": 1.5378947368421054e-05, + "step": 439 + }, + { + "epoch": 1.1545036160420776, + "high_lr": 0.0007689473684210526, + "low_lr": 1.5378947368421054e-05, + "step": 439 + }, + { + "epoch": 1.1545036160420776, + "high_lr": 0.0007689473684210526, + "low_lr": 1.5378947368421054e-05, + "step": 439 + }, + { + "epoch": 1.1545036160420776, + "high_lr": 0.0007689473684210526, + "low_lr": 1.5378947368421054e-05, + "step": 439 + }, + { + "epoch": 1.1545036160420776, + "high_lr": 0.0007689473684210526, + "low_lr": 1.5378947368421054e-05, + "step": 439 + }, + { + "epoch": 1.1571334648257725, + "grad_norm": 1.018898606300354, + "learning_rate": 0.0007684210526315789, + "loss": 1.5551, + "step": 440 + }, + { + "epoch": 1.1571334648257725, + "high_lr": 0.0007684210526315789, + "low_lr": 1.536842105263158e-05, + "step": 440 + }, + { + "epoch": 1.1571334648257725, + "high_lr": 0.0007684210526315789, + "low_lr": 1.536842105263158e-05, + "step": 440 + }, + { + "epoch": 1.1571334648257725, + "high_lr": 0.0007684210526315789, + "low_lr": 1.536842105263158e-05, + "step": 440 + }, + { + "epoch": 1.1571334648257725, + "high_lr": 0.0007684210526315789, + "low_lr": 1.536842105263158e-05, + "step": 440 + }, + { + "epoch": 1.1571334648257725, + "high_lr": 0.0007684210526315789, + "low_lr": 1.536842105263158e-05, + "step": 440 + }, + { + "epoch": 1.1571334648257725, + "high_lr": 0.0007684210526315789, + "low_lr": 1.536842105263158e-05, + "step": 440 + }, + { + "epoch": 1.1571334648257725, + "high_lr": 0.0007684210526315789, + "low_lr": 1.536842105263158e-05, + "step": 440 + }, + { + "epoch": 1.1571334648257725, + "high_lr": 0.0007684210526315789, + "low_lr": 1.536842105263158e-05, + "step": 440 + }, + { + "epoch": 1.1597633136094674, + "grad_norm": 1.0163764953613281, + "learning_rate": 0.0007678947368421053, + "loss": 1.493, + "step": 441 + }, + { + "epoch": 1.1597633136094674, + "high_lr": 0.0007678947368421053, + "low_lr": 1.5357894736842107e-05, + "step": 441 + }, + { + "epoch": 1.1597633136094674, + "high_lr": 0.0007678947368421053, + "low_lr": 1.5357894736842107e-05, + "step": 441 + }, + { + "epoch": 1.1597633136094674, + "high_lr": 0.0007678947368421053, + "low_lr": 1.5357894736842107e-05, + "step": 441 + }, + { + "epoch": 1.1597633136094674, + "high_lr": 0.0007678947368421053, + "low_lr": 1.5357894736842107e-05, + "step": 441 + }, + { + "epoch": 1.1597633136094674, + "high_lr": 0.0007678947368421053, + "low_lr": 1.5357894736842107e-05, + "step": 441 + }, + { + "epoch": 1.1597633136094674, + "high_lr": 0.0007678947368421053, + "low_lr": 1.5357894736842107e-05, + "step": 441 + }, + { + "epoch": 1.1597633136094674, + "high_lr": 0.0007678947368421053, + "low_lr": 1.5357894736842107e-05, + "step": 441 + }, + { + "epoch": 1.1597633136094674, + "high_lr": 0.0007678947368421053, + "low_lr": 1.5357894736842107e-05, + "step": 441 + }, + { + "epoch": 1.1623931623931625, + "grad_norm": 0.9848003387451172, + "learning_rate": 0.0007673684210526316, + "loss": 1.564, + "step": 442 + }, + { + "epoch": 1.1623931623931625, + "high_lr": 0.0007673684210526316, + "low_lr": 1.534736842105263e-05, + "step": 442 + }, + { + "epoch": 1.1623931623931625, + "high_lr": 0.0007673684210526316, + "low_lr": 1.534736842105263e-05, + "step": 442 + }, + { + "epoch": 1.1623931623931625, + "high_lr": 0.0007673684210526316, + "low_lr": 1.534736842105263e-05, + "step": 442 + }, + { + "epoch": 1.1623931623931625, + "high_lr": 0.0007673684210526316, + "low_lr": 1.534736842105263e-05, + "step": 442 + }, + { + "epoch": 1.1623931623931625, + "high_lr": 0.0007673684210526316, + "low_lr": 1.534736842105263e-05, + "step": 442 + }, + { + "epoch": 1.1623931623931625, + "high_lr": 0.0007673684210526316, + "low_lr": 1.534736842105263e-05, + "step": 442 + }, + { + "epoch": 1.1623931623931625, + "high_lr": 0.0007673684210526316, + "low_lr": 1.534736842105263e-05, + "step": 442 + }, + { + "epoch": 1.1623931623931625, + "high_lr": 0.0007673684210526316, + "low_lr": 1.534736842105263e-05, + "step": 442 + }, + { + "epoch": 1.1650230111768574, + "grad_norm": 1.058992862701416, + "learning_rate": 0.0007668421052631579, + "loss": 1.4823, + "step": 443 + }, + { + "epoch": 1.1650230111768574, + "high_lr": 0.0007668421052631579, + "low_lr": 1.533684210526316e-05, + "step": 443 + }, + { + "epoch": 1.1650230111768574, + "high_lr": 0.0007668421052631579, + "low_lr": 1.533684210526316e-05, + "step": 443 + }, + { + "epoch": 1.1650230111768574, + "high_lr": 0.0007668421052631579, + "low_lr": 1.533684210526316e-05, + "step": 443 + }, + { + "epoch": 1.1650230111768574, + "high_lr": 0.0007668421052631579, + "low_lr": 1.533684210526316e-05, + "step": 443 + }, + { + "epoch": 1.1650230111768574, + "high_lr": 0.0007668421052631579, + "low_lr": 1.533684210526316e-05, + "step": 443 + }, + { + "epoch": 1.1650230111768574, + "high_lr": 0.0007668421052631579, + "low_lr": 1.533684210526316e-05, + "step": 443 + }, + { + "epoch": 1.1650230111768574, + "high_lr": 0.0007668421052631579, + "low_lr": 1.533684210526316e-05, + "step": 443 + }, + { + "epoch": 1.1650230111768574, + "high_lr": 0.0007668421052631579, + "low_lr": 1.533684210526316e-05, + "step": 443 + }, + { + "epoch": 1.1676528599605522, + "grad_norm": 1.038759469985962, + "learning_rate": 0.0007663157894736842, + "loss": 1.521, + "step": 444 + }, + { + "epoch": 1.1676528599605522, + "high_lr": 0.0007663157894736842, + "low_lr": 1.5326315789473684e-05, + "step": 444 + }, + { + "epoch": 1.1676528599605522, + "high_lr": 0.0007663157894736842, + "low_lr": 1.5326315789473684e-05, + "step": 444 + }, + { + "epoch": 1.1676528599605522, + "high_lr": 0.0007663157894736842, + "low_lr": 1.5326315789473684e-05, + "step": 444 + }, + { + "epoch": 1.1676528599605522, + "high_lr": 0.0007663157894736842, + "low_lr": 1.5326315789473684e-05, + "step": 444 + }, + { + "epoch": 1.1676528599605522, + "high_lr": 0.0007663157894736842, + "low_lr": 1.5326315789473684e-05, + "step": 444 + }, + { + "epoch": 1.1676528599605522, + "high_lr": 0.0007663157894736842, + "low_lr": 1.5326315789473684e-05, + "step": 444 + }, + { + "epoch": 1.1676528599605522, + "high_lr": 0.0007663157894736842, + "low_lr": 1.5326315789473684e-05, + "step": 444 + }, + { + "epoch": 1.1676528599605522, + "high_lr": 0.0007663157894736842, + "low_lr": 1.5326315789473684e-05, + "step": 444 + }, + { + "epoch": 1.1702827087442471, + "grad_norm": 1.034196138381958, + "learning_rate": 0.0007657894736842106, + "loss": 1.4978, + "step": 445 + }, + { + "epoch": 1.1702827087442471, + "high_lr": 0.0007657894736842106, + "low_lr": 1.5315789473684212e-05, + "step": 445 + }, + { + "epoch": 1.1702827087442471, + "high_lr": 0.0007657894736842106, + "low_lr": 1.5315789473684212e-05, + "step": 445 + }, + { + "epoch": 1.1702827087442471, + "high_lr": 0.0007657894736842106, + "low_lr": 1.5315789473684212e-05, + "step": 445 + }, + { + "epoch": 1.1702827087442471, + "high_lr": 0.0007657894736842106, + "low_lr": 1.5315789473684212e-05, + "step": 445 + }, + { + "epoch": 1.1702827087442471, + "high_lr": 0.0007657894736842106, + "low_lr": 1.5315789473684212e-05, + "step": 445 + }, + { + "epoch": 1.1702827087442471, + "high_lr": 0.0007657894736842106, + "low_lr": 1.5315789473684212e-05, + "step": 445 + }, + { + "epoch": 1.1702827087442471, + "high_lr": 0.0007657894736842106, + "low_lr": 1.5315789473684212e-05, + "step": 445 + }, + { + "epoch": 1.1702827087442471, + "high_lr": 0.0007657894736842106, + "low_lr": 1.5315789473684212e-05, + "step": 445 + }, + { + "epoch": 1.1729125575279422, + "grad_norm": 1.1163936853408813, + "learning_rate": 0.0007652631578947369, + "loss": 1.5139, + "step": 446 + }, + { + "epoch": 1.1729125575279422, + "high_lr": 0.0007652631578947369, + "low_lr": 1.530526315789474e-05, + "step": 446 + }, + { + "epoch": 1.1729125575279422, + "high_lr": 0.0007652631578947369, + "low_lr": 1.530526315789474e-05, + "step": 446 + }, + { + "epoch": 1.1729125575279422, + "high_lr": 0.0007652631578947369, + "low_lr": 1.530526315789474e-05, + "step": 446 + }, + { + "epoch": 1.1729125575279422, + "high_lr": 0.0007652631578947369, + "low_lr": 1.530526315789474e-05, + "step": 446 + }, + { + "epoch": 1.1729125575279422, + "high_lr": 0.0007652631578947369, + "low_lr": 1.530526315789474e-05, + "step": 446 + }, + { + "epoch": 1.1729125575279422, + "high_lr": 0.0007652631578947369, + "low_lr": 1.530526315789474e-05, + "step": 446 + }, + { + "epoch": 1.1729125575279422, + "high_lr": 0.0007652631578947369, + "low_lr": 1.530526315789474e-05, + "step": 446 + }, + { + "epoch": 1.1729125575279422, + "high_lr": 0.0007652631578947369, + "low_lr": 1.530526315789474e-05, + "step": 446 + }, + { + "epoch": 1.175542406311637, + "grad_norm": 1.0809240341186523, + "learning_rate": 0.0007647368421052631, + "loss": 1.5607, + "step": 447 + }, + { + "epoch": 1.175542406311637, + "high_lr": 0.0007647368421052631, + "low_lr": 1.5294736842105265e-05, + "step": 447 + }, + { + "epoch": 1.175542406311637, + "high_lr": 0.0007647368421052631, + "low_lr": 1.5294736842105265e-05, + "step": 447 + }, + { + "epoch": 1.175542406311637, + "high_lr": 0.0007647368421052631, + "low_lr": 1.5294736842105265e-05, + "step": 447 + }, + { + "epoch": 1.175542406311637, + "high_lr": 0.0007647368421052631, + "low_lr": 1.5294736842105265e-05, + "step": 447 + }, + { + "epoch": 1.175542406311637, + "high_lr": 0.0007647368421052631, + "low_lr": 1.5294736842105265e-05, + "step": 447 + }, + { + "epoch": 1.175542406311637, + "high_lr": 0.0007647368421052631, + "low_lr": 1.5294736842105265e-05, + "step": 447 + }, + { + "epoch": 1.175542406311637, + "high_lr": 0.0007647368421052631, + "low_lr": 1.5294736842105265e-05, + "step": 447 + }, + { + "epoch": 1.175542406311637, + "high_lr": 0.0007647368421052631, + "low_lr": 1.5294736842105265e-05, + "step": 447 + }, + { + "epoch": 1.178172255095332, + "grad_norm": 1.0977026224136353, + "learning_rate": 0.0007642105263157894, + "loss": 1.5739, + "step": 448 + }, + { + "epoch": 1.178172255095332, + "high_lr": 0.0007642105263157894, + "low_lr": 1.528421052631579e-05, + "step": 448 + }, + { + "epoch": 1.178172255095332, + "high_lr": 0.0007642105263157894, + "low_lr": 1.528421052631579e-05, + "step": 448 + }, + { + "epoch": 1.178172255095332, + "high_lr": 0.0007642105263157894, + "low_lr": 1.528421052631579e-05, + "step": 448 + }, + { + "epoch": 1.178172255095332, + "high_lr": 0.0007642105263157894, + "low_lr": 1.528421052631579e-05, + "step": 448 + }, + { + "epoch": 1.178172255095332, + "high_lr": 0.0007642105263157894, + "low_lr": 1.528421052631579e-05, + "step": 448 + }, + { + "epoch": 1.178172255095332, + "high_lr": 0.0007642105263157894, + "low_lr": 1.528421052631579e-05, + "step": 448 + }, + { + "epoch": 1.178172255095332, + "high_lr": 0.0007642105263157894, + "low_lr": 1.528421052631579e-05, + "step": 448 + }, + { + "epoch": 1.178172255095332, + "high_lr": 0.0007642105263157894, + "low_lr": 1.528421052631579e-05, + "step": 448 + }, + { + "epoch": 1.180802103879027, + "grad_norm": 1.0918418169021606, + "learning_rate": 0.0007636842105263157, + "loss": 1.5288, + "step": 449 + }, + { + "epoch": 1.180802103879027, + "high_lr": 0.0007636842105263157, + "low_lr": 1.5273684210526318e-05, + "step": 449 + }, + { + "epoch": 1.180802103879027, + "high_lr": 0.0007636842105263157, + "low_lr": 1.5273684210526318e-05, + "step": 449 + }, + { + "epoch": 1.180802103879027, + "high_lr": 0.0007636842105263157, + "low_lr": 1.5273684210526318e-05, + "step": 449 + }, + { + "epoch": 1.180802103879027, + "high_lr": 0.0007636842105263157, + "low_lr": 1.5273684210526318e-05, + "step": 449 + }, + { + "epoch": 1.180802103879027, + "high_lr": 0.0007636842105263157, + "low_lr": 1.5273684210526318e-05, + "step": 449 + }, + { + "epoch": 1.180802103879027, + "high_lr": 0.0007636842105263157, + "low_lr": 1.5273684210526318e-05, + "step": 449 + }, + { + "epoch": 1.180802103879027, + "high_lr": 0.0007636842105263157, + "low_lr": 1.5273684210526318e-05, + "step": 449 + }, + { + "epoch": 1.180802103879027, + "high_lr": 0.0007636842105263157, + "low_lr": 1.5273684210526318e-05, + "step": 449 + }, + { + "epoch": 1.183431952662722, + "grad_norm": 1.018148422241211, + "learning_rate": 0.0007631578947368421, + "loss": 1.5004, + "step": 450 + }, + { + "epoch": 1.183431952662722, + "high_lr": 0.0007631578947368421, + "low_lr": 1.5263157894736846e-05, + "step": 450 + }, + { + "epoch": 1.183431952662722, + "high_lr": 0.0007631578947368421, + "low_lr": 1.5263157894736846e-05, + "step": 450 + }, + { + "epoch": 1.183431952662722, + "high_lr": 0.0007631578947368421, + "low_lr": 1.5263157894736846e-05, + "step": 450 + }, + { + "epoch": 1.183431952662722, + "high_lr": 0.0007631578947368421, + "low_lr": 1.5263157894736846e-05, + "step": 450 + }, + { + "epoch": 1.183431952662722, + "high_lr": 0.0007631578947368421, + "low_lr": 1.5263157894736846e-05, + "step": 450 + }, + { + "epoch": 1.183431952662722, + "high_lr": 0.0007631578947368421, + "low_lr": 1.5263157894736846e-05, + "step": 450 + }, + { + "epoch": 1.183431952662722, + "high_lr": 0.0007631578947368421, + "low_lr": 1.5263157894736846e-05, + "step": 450 + }, + { + "epoch": 1.183431952662722, + "high_lr": 0.0007631578947368421, + "low_lr": 1.5263157894736846e-05, + "step": 450 + }, + { + "epoch": 1.1860618014464168, + "grad_norm": 1.0755535364151, + "learning_rate": 0.0007626315789473685, + "loss": 1.5135, + "step": 451 + }, + { + "epoch": 1.1860618014464168, + "high_lr": 0.0007626315789473685, + "low_lr": 1.525263157894737e-05, + "step": 451 + }, + { + "epoch": 1.1860618014464168, + "high_lr": 0.0007626315789473685, + "low_lr": 1.525263157894737e-05, + "step": 451 + }, + { + "epoch": 1.1860618014464168, + "high_lr": 0.0007626315789473685, + "low_lr": 1.525263157894737e-05, + "step": 451 + }, + { + "epoch": 1.1860618014464168, + "high_lr": 0.0007626315789473685, + "low_lr": 1.525263157894737e-05, + "step": 451 + }, + { + "epoch": 1.1860618014464168, + "high_lr": 0.0007626315789473685, + "low_lr": 1.525263157894737e-05, + "step": 451 + }, + { + "epoch": 1.1860618014464168, + "high_lr": 0.0007626315789473685, + "low_lr": 1.525263157894737e-05, + "step": 451 + }, + { + "epoch": 1.1860618014464168, + "high_lr": 0.0007626315789473685, + "low_lr": 1.525263157894737e-05, + "step": 451 + }, + { + "epoch": 1.1860618014464168, + "high_lr": 0.0007626315789473685, + "low_lr": 1.525263157894737e-05, + "step": 451 + }, + { + "epoch": 1.1886916502301117, + "grad_norm": 1.0672171115875244, + "learning_rate": 0.0007621052631578948, + "loss": 1.456, + "step": 452 + }, + { + "epoch": 1.1886916502301117, + "high_lr": 0.0007621052631578948, + "low_lr": 1.5242105263157897e-05, + "step": 452 + }, + { + "epoch": 1.1886916502301117, + "high_lr": 0.0007621052631578948, + "low_lr": 1.5242105263157897e-05, + "step": 452 + }, + { + "epoch": 1.1886916502301117, + "high_lr": 0.0007621052631578948, + "low_lr": 1.5242105263157897e-05, + "step": 452 + }, + { + "epoch": 1.1886916502301117, + "high_lr": 0.0007621052631578948, + "low_lr": 1.5242105263157897e-05, + "step": 452 + }, + { + "epoch": 1.1886916502301117, + "high_lr": 0.0007621052631578948, + "low_lr": 1.5242105263157897e-05, + "step": 452 + }, + { + "epoch": 1.1886916502301117, + "high_lr": 0.0007621052631578948, + "low_lr": 1.5242105263157897e-05, + "step": 452 + }, + { + "epoch": 1.1886916502301117, + "high_lr": 0.0007621052631578948, + "low_lr": 1.5242105263157897e-05, + "step": 452 + }, + { + "epoch": 1.1886916502301117, + "high_lr": 0.0007621052631578948, + "low_lr": 1.5242105263157897e-05, + "step": 452 + }, + { + "epoch": 1.1913214990138068, + "grad_norm": 1.021423101425171, + "learning_rate": 0.0007615789473684211, + "loss": 1.5174, + "step": 453 + }, + { + "epoch": 1.1913214990138068, + "high_lr": 0.0007615789473684211, + "low_lr": 1.5231578947368421e-05, + "step": 453 + }, + { + "epoch": 1.1913214990138068, + "high_lr": 0.0007615789473684211, + "low_lr": 1.5231578947368421e-05, + "step": 453 + }, + { + "epoch": 1.1913214990138068, + "high_lr": 0.0007615789473684211, + "low_lr": 1.5231578947368421e-05, + "step": 453 + }, + { + "epoch": 1.1913214990138068, + "high_lr": 0.0007615789473684211, + "low_lr": 1.5231578947368421e-05, + "step": 453 + }, + { + "epoch": 1.1913214990138068, + "high_lr": 0.0007615789473684211, + "low_lr": 1.5231578947368421e-05, + "step": 453 + }, + { + "epoch": 1.1913214990138068, + "high_lr": 0.0007615789473684211, + "low_lr": 1.5231578947368421e-05, + "step": 453 + }, + { + "epoch": 1.1913214990138068, + "high_lr": 0.0007615789473684211, + "low_lr": 1.5231578947368421e-05, + "step": 453 + }, + { + "epoch": 1.1913214990138068, + "high_lr": 0.0007615789473684211, + "low_lr": 1.5231578947368421e-05, + "step": 453 + }, + { + "epoch": 1.1939513477975017, + "grad_norm": 0.9255045056343079, + "learning_rate": 0.0007610526315789474, + "loss": 1.5226, + "step": 454 + }, + { + "epoch": 1.1939513477975017, + "high_lr": 0.0007610526315789474, + "low_lr": 1.5221052631578948e-05, + "step": 454 + }, + { + "epoch": 1.1939513477975017, + "high_lr": 0.0007610526315789474, + "low_lr": 1.5221052631578948e-05, + "step": 454 + }, + { + "epoch": 1.1939513477975017, + "high_lr": 0.0007610526315789474, + "low_lr": 1.5221052631578948e-05, + "step": 454 + }, + { + "epoch": 1.1939513477975017, + "high_lr": 0.0007610526315789474, + "low_lr": 1.5221052631578948e-05, + "step": 454 + }, + { + "epoch": 1.1939513477975017, + "high_lr": 0.0007610526315789474, + "low_lr": 1.5221052631578948e-05, + "step": 454 + }, + { + "epoch": 1.1939513477975017, + "high_lr": 0.0007610526315789474, + "low_lr": 1.5221052631578948e-05, + "step": 454 + }, + { + "epoch": 1.1939513477975017, + "high_lr": 0.0007610526315789474, + "low_lr": 1.5221052631578948e-05, + "step": 454 + }, + { + "epoch": 1.1939513477975017, + "high_lr": 0.0007610526315789474, + "low_lr": 1.5221052631578948e-05, + "step": 454 + }, + { + "epoch": 1.1965811965811965, + "grad_norm": 1.060662031173706, + "learning_rate": 0.0007605263157894738, + "loss": 1.4796, + "step": 455 + }, + { + "epoch": 1.1965811965811965, + "high_lr": 0.0007605263157894738, + "low_lr": 1.5210526315789476e-05, + "step": 455 + }, + { + "epoch": 1.1965811965811965, + "high_lr": 0.0007605263157894738, + "low_lr": 1.5210526315789476e-05, + "step": 455 + }, + { + "epoch": 1.1965811965811965, + "high_lr": 0.0007605263157894738, + "low_lr": 1.5210526315789476e-05, + "step": 455 + }, + { + "epoch": 1.1965811965811965, + "high_lr": 0.0007605263157894738, + "low_lr": 1.5210526315789476e-05, + "step": 455 + }, + { + "epoch": 1.1965811965811965, + "high_lr": 0.0007605263157894738, + "low_lr": 1.5210526315789476e-05, + "step": 455 + }, + { + "epoch": 1.1965811965811965, + "high_lr": 0.0007605263157894738, + "low_lr": 1.5210526315789476e-05, + "step": 455 + }, + { + "epoch": 1.1965811965811965, + "high_lr": 0.0007605263157894738, + "low_lr": 1.5210526315789476e-05, + "step": 455 + }, + { + "epoch": 1.1965811965811965, + "high_lr": 0.0007605263157894738, + "low_lr": 1.5210526315789476e-05, + "step": 455 + }, + { + "epoch": 1.1992110453648914, + "grad_norm": 1.1802492141723633, + "learning_rate": 0.00076, + "loss": 1.5481, + "step": 456 + }, + { + "epoch": 1.1992110453648914, + "high_lr": 0.00076, + "low_lr": 1.5200000000000002e-05, + "step": 456 + }, + { + "epoch": 1.1992110453648914, + "high_lr": 0.00076, + "low_lr": 1.5200000000000002e-05, + "step": 456 + }, + { + "epoch": 1.1992110453648914, + "high_lr": 0.00076, + "low_lr": 1.5200000000000002e-05, + "step": 456 + }, + { + "epoch": 1.1992110453648914, + "high_lr": 0.00076, + "low_lr": 1.5200000000000002e-05, + "step": 456 + }, + { + "epoch": 1.1992110453648914, + "high_lr": 0.00076, + "low_lr": 1.5200000000000002e-05, + "step": 456 + }, + { + "epoch": 1.1992110453648914, + "high_lr": 0.00076, + "low_lr": 1.5200000000000002e-05, + "step": 456 + }, + { + "epoch": 1.1992110453648914, + "high_lr": 0.00076, + "low_lr": 1.5200000000000002e-05, + "step": 456 + }, + { + "epoch": 1.1992110453648914, + "high_lr": 0.00076, + "low_lr": 1.5200000000000002e-05, + "step": 456 + }, + { + "epoch": 1.2018408941485865, + "grad_norm": 1.0207029581069946, + "learning_rate": 0.0007594736842105263, + "loss": 1.5057, + "step": 457 + }, + { + "epoch": 1.2018408941485865, + "high_lr": 0.0007594736842105263, + "low_lr": 1.5189473684210526e-05, + "step": 457 + }, + { + "epoch": 1.2018408941485865, + "high_lr": 0.0007594736842105263, + "low_lr": 1.5189473684210526e-05, + "step": 457 + }, + { + "epoch": 1.2018408941485865, + "high_lr": 0.0007594736842105263, + "low_lr": 1.5189473684210526e-05, + "step": 457 + }, + { + "epoch": 1.2018408941485865, + "high_lr": 0.0007594736842105263, + "low_lr": 1.5189473684210526e-05, + "step": 457 + }, + { + "epoch": 1.2018408941485865, + "high_lr": 0.0007594736842105263, + "low_lr": 1.5189473684210526e-05, + "step": 457 + }, + { + "epoch": 1.2018408941485865, + "high_lr": 0.0007594736842105263, + "low_lr": 1.5189473684210526e-05, + "step": 457 + }, + { + "epoch": 1.2018408941485865, + "high_lr": 0.0007594736842105263, + "low_lr": 1.5189473684210526e-05, + "step": 457 + }, + { + "epoch": 1.2018408941485865, + "high_lr": 0.0007594736842105263, + "low_lr": 1.5189473684210526e-05, + "step": 457 + }, + { + "epoch": 1.2044707429322814, + "grad_norm": 1.0410336256027222, + "learning_rate": 0.0007589473684210526, + "loss": 1.5044, + "step": 458 + }, + { + "epoch": 1.2044707429322814, + "high_lr": 0.0007589473684210526, + "low_lr": 1.5178947368421053e-05, + "step": 458 + }, + { + "epoch": 1.2044707429322814, + "high_lr": 0.0007589473684210526, + "low_lr": 1.5178947368421053e-05, + "step": 458 + }, + { + "epoch": 1.2044707429322814, + "high_lr": 0.0007589473684210526, + "low_lr": 1.5178947368421053e-05, + "step": 458 + }, + { + "epoch": 1.2044707429322814, + "high_lr": 0.0007589473684210526, + "low_lr": 1.5178947368421053e-05, + "step": 458 + }, + { + "epoch": 1.2044707429322814, + "high_lr": 0.0007589473684210526, + "low_lr": 1.5178947368421053e-05, + "step": 458 + }, + { + "epoch": 1.2044707429322814, + "high_lr": 0.0007589473684210526, + "low_lr": 1.5178947368421053e-05, + "step": 458 + }, + { + "epoch": 1.2044707429322814, + "high_lr": 0.0007589473684210526, + "low_lr": 1.5178947368421053e-05, + "step": 458 + }, + { + "epoch": 1.2044707429322814, + "high_lr": 0.0007589473684210526, + "low_lr": 1.5178947368421053e-05, + "step": 458 + }, + { + "epoch": 1.2071005917159763, + "grad_norm": 1.304905891418457, + "learning_rate": 0.000758421052631579, + "loss": 1.5078, + "step": 459 + }, + { + "epoch": 1.2071005917159763, + "high_lr": 0.000758421052631579, + "low_lr": 1.516842105263158e-05, + "step": 459 + }, + { + "epoch": 1.2071005917159763, + "high_lr": 0.000758421052631579, + "low_lr": 1.516842105263158e-05, + "step": 459 + }, + { + "epoch": 1.2071005917159763, + "high_lr": 0.000758421052631579, + "low_lr": 1.516842105263158e-05, + "step": 459 + }, + { + "epoch": 1.2071005917159763, + "high_lr": 0.000758421052631579, + "low_lr": 1.516842105263158e-05, + "step": 459 + }, + { + "epoch": 1.2071005917159763, + "high_lr": 0.000758421052631579, + "low_lr": 1.516842105263158e-05, + "step": 459 + }, + { + "epoch": 1.2071005917159763, + "high_lr": 0.000758421052631579, + "low_lr": 1.516842105263158e-05, + "step": 459 + }, + { + "epoch": 1.2071005917159763, + "high_lr": 0.000758421052631579, + "low_lr": 1.516842105263158e-05, + "step": 459 + }, + { + "epoch": 1.2071005917159763, + "high_lr": 0.000758421052631579, + "low_lr": 1.516842105263158e-05, + "step": 459 + }, + { + "epoch": 1.2097304404996714, + "grad_norm": 1.040297508239746, + "learning_rate": 0.0007578947368421053, + "loss": 1.5058, + "step": 460 + }, + { + "epoch": 1.2097304404996714, + "high_lr": 0.0007578947368421053, + "low_lr": 1.5157894736842107e-05, + "step": 460 + }, + { + "epoch": 1.2097304404996714, + "high_lr": 0.0007578947368421053, + "low_lr": 1.5157894736842107e-05, + "step": 460 + }, + { + "epoch": 1.2097304404996714, + "high_lr": 0.0007578947368421053, + "low_lr": 1.5157894736842107e-05, + "step": 460 + }, + { + "epoch": 1.2097304404996714, + "high_lr": 0.0007578947368421053, + "low_lr": 1.5157894736842107e-05, + "step": 460 + }, + { + "epoch": 1.2097304404996714, + "high_lr": 0.0007578947368421053, + "low_lr": 1.5157894736842107e-05, + "step": 460 + }, + { + "epoch": 1.2097304404996714, + "high_lr": 0.0007578947368421053, + "low_lr": 1.5157894736842107e-05, + "step": 460 + }, + { + "epoch": 1.2097304404996714, + "high_lr": 0.0007578947368421053, + "low_lr": 1.5157894736842107e-05, + "step": 460 + }, + { + "epoch": 1.2097304404996714, + "high_lr": 0.0007578947368421053, + "low_lr": 1.5157894736842107e-05, + "step": 460 + }, + { + "epoch": 1.2123602892833663, + "grad_norm": 1.012952446937561, + "learning_rate": 0.0007573684210526316, + "loss": 1.5206, + "step": 461 + }, + { + "epoch": 1.2123602892833663, + "high_lr": 0.0007573684210526316, + "low_lr": 1.5147368421052633e-05, + "step": 461 + }, + { + "epoch": 1.2123602892833663, + "high_lr": 0.0007573684210526316, + "low_lr": 1.5147368421052633e-05, + "step": 461 + }, + { + "epoch": 1.2123602892833663, + "high_lr": 0.0007573684210526316, + "low_lr": 1.5147368421052633e-05, + "step": 461 + }, + { + "epoch": 1.2123602892833663, + "high_lr": 0.0007573684210526316, + "low_lr": 1.5147368421052633e-05, + "step": 461 + }, + { + "epoch": 1.2123602892833663, + "high_lr": 0.0007573684210526316, + "low_lr": 1.5147368421052633e-05, + "step": 461 + }, + { + "epoch": 1.2123602892833663, + "high_lr": 0.0007573684210526316, + "low_lr": 1.5147368421052633e-05, + "step": 461 + }, + { + "epoch": 1.2123602892833663, + "high_lr": 0.0007573684210526316, + "low_lr": 1.5147368421052633e-05, + "step": 461 + }, + { + "epoch": 1.2123602892833663, + "high_lr": 0.0007573684210526316, + "low_lr": 1.5147368421052633e-05, + "step": 461 + }, + { + "epoch": 1.2149901380670611, + "grad_norm": 1.0536714792251587, + "learning_rate": 0.0007568421052631579, + "loss": 1.4892, + "step": 462 + }, + { + "epoch": 1.2149901380670611, + "high_lr": 0.0007568421052631579, + "low_lr": 1.5136842105263158e-05, + "step": 462 + }, + { + "epoch": 1.2149901380670611, + "high_lr": 0.0007568421052631579, + "low_lr": 1.5136842105263158e-05, + "step": 462 + }, + { + "epoch": 1.2149901380670611, + "high_lr": 0.0007568421052631579, + "low_lr": 1.5136842105263158e-05, + "step": 462 + }, + { + "epoch": 1.2149901380670611, + "high_lr": 0.0007568421052631579, + "low_lr": 1.5136842105263158e-05, + "step": 462 + }, + { + "epoch": 1.2149901380670611, + "high_lr": 0.0007568421052631579, + "low_lr": 1.5136842105263158e-05, + "step": 462 + }, + { + "epoch": 1.2149901380670611, + "high_lr": 0.0007568421052631579, + "low_lr": 1.5136842105263158e-05, + "step": 462 + }, + { + "epoch": 1.2149901380670611, + "high_lr": 0.0007568421052631579, + "low_lr": 1.5136842105263158e-05, + "step": 462 + }, + { + "epoch": 1.2149901380670611, + "high_lr": 0.0007568421052631579, + "low_lr": 1.5136842105263158e-05, + "step": 462 + }, + { + "epoch": 1.217619986850756, + "grad_norm": 1.0727607011795044, + "learning_rate": 0.0007563157894736842, + "loss": 1.5471, + "step": 463 + }, + { + "epoch": 1.217619986850756, + "high_lr": 0.0007563157894736842, + "low_lr": 1.5126315789473684e-05, + "step": 463 + }, + { + "epoch": 1.217619986850756, + "high_lr": 0.0007563157894736842, + "low_lr": 1.5126315789473684e-05, + "step": 463 + }, + { + "epoch": 1.217619986850756, + "high_lr": 0.0007563157894736842, + "low_lr": 1.5126315789473684e-05, + "step": 463 + }, + { + "epoch": 1.217619986850756, + "high_lr": 0.0007563157894736842, + "low_lr": 1.5126315789473684e-05, + "step": 463 + }, + { + "epoch": 1.217619986850756, + "high_lr": 0.0007563157894736842, + "low_lr": 1.5126315789473684e-05, + "step": 463 + }, + { + "epoch": 1.217619986850756, + "high_lr": 0.0007563157894736842, + "low_lr": 1.5126315789473684e-05, + "step": 463 + }, + { + "epoch": 1.217619986850756, + "high_lr": 0.0007563157894736842, + "low_lr": 1.5126315789473684e-05, + "step": 463 + }, + { + "epoch": 1.217619986850756, + "high_lr": 0.0007563157894736842, + "low_lr": 1.5126315789473684e-05, + "step": 463 + }, + { + "epoch": 1.220249835634451, + "grad_norm": 0.9881057143211365, + "learning_rate": 0.0007557894736842105, + "loss": 1.4938, + "step": 464 + }, + { + "epoch": 1.220249835634451, + "high_lr": 0.0007557894736842105, + "low_lr": 1.5115789473684212e-05, + "step": 464 + }, + { + "epoch": 1.220249835634451, + "high_lr": 0.0007557894736842105, + "low_lr": 1.5115789473684212e-05, + "step": 464 + }, + { + "epoch": 1.220249835634451, + "high_lr": 0.0007557894736842105, + "low_lr": 1.5115789473684212e-05, + "step": 464 + }, + { + "epoch": 1.220249835634451, + "high_lr": 0.0007557894736842105, + "low_lr": 1.5115789473684212e-05, + "step": 464 + }, + { + "epoch": 1.220249835634451, + "high_lr": 0.0007557894736842105, + "low_lr": 1.5115789473684212e-05, + "step": 464 + }, + { + "epoch": 1.220249835634451, + "high_lr": 0.0007557894736842105, + "low_lr": 1.5115789473684212e-05, + "step": 464 + }, + { + "epoch": 1.220249835634451, + "high_lr": 0.0007557894736842105, + "low_lr": 1.5115789473684212e-05, + "step": 464 + }, + { + "epoch": 1.220249835634451, + "high_lr": 0.0007557894736842105, + "low_lr": 1.5115789473684212e-05, + "step": 464 + }, + { + "epoch": 1.222879684418146, + "grad_norm": 1.0744034051895142, + "learning_rate": 0.0007552631578947368, + "loss": 1.4936, + "step": 465 + }, + { + "epoch": 1.222879684418146, + "high_lr": 0.0007552631578947368, + "low_lr": 1.5105263157894739e-05, + "step": 465 + }, + { + "epoch": 1.222879684418146, + "high_lr": 0.0007552631578947368, + "low_lr": 1.5105263157894739e-05, + "step": 465 + }, + { + "epoch": 1.222879684418146, + "high_lr": 0.0007552631578947368, + "low_lr": 1.5105263157894739e-05, + "step": 465 + }, + { + "epoch": 1.222879684418146, + "high_lr": 0.0007552631578947368, + "low_lr": 1.5105263157894739e-05, + "step": 465 + }, + { + "epoch": 1.222879684418146, + "high_lr": 0.0007552631578947368, + "low_lr": 1.5105263157894739e-05, + "step": 465 + }, + { + "epoch": 1.222879684418146, + "high_lr": 0.0007552631578947368, + "low_lr": 1.5105263157894739e-05, + "step": 465 + }, + { + "epoch": 1.222879684418146, + "high_lr": 0.0007552631578947368, + "low_lr": 1.5105263157894739e-05, + "step": 465 + }, + { + "epoch": 1.222879684418146, + "high_lr": 0.0007552631578947368, + "low_lr": 1.5105263157894739e-05, + "step": 465 + }, + { + "epoch": 1.2255095332018409, + "grad_norm": 1.0122504234313965, + "learning_rate": 0.0007547368421052631, + "loss": 1.5304, + "step": 466 + }, + { + "epoch": 1.2255095332018409, + "high_lr": 0.0007547368421052631, + "low_lr": 1.5094736842105263e-05, + "step": 466 + }, + { + "epoch": 1.2255095332018409, + "high_lr": 0.0007547368421052631, + "low_lr": 1.5094736842105263e-05, + "step": 466 + }, + { + "epoch": 1.2255095332018409, + "high_lr": 0.0007547368421052631, + "low_lr": 1.5094736842105263e-05, + "step": 466 + }, + { + "epoch": 1.2255095332018409, + "high_lr": 0.0007547368421052631, + "low_lr": 1.5094736842105263e-05, + "step": 466 + }, + { + "epoch": 1.2255095332018409, + "high_lr": 0.0007547368421052631, + "low_lr": 1.5094736842105263e-05, + "step": 466 + }, + { + "epoch": 1.2255095332018409, + "high_lr": 0.0007547368421052631, + "low_lr": 1.5094736842105263e-05, + "step": 466 + }, + { + "epoch": 1.2255095332018409, + "high_lr": 0.0007547368421052631, + "low_lr": 1.5094736842105263e-05, + "step": 466 + }, + { + "epoch": 1.2255095332018409, + "high_lr": 0.0007547368421052631, + "low_lr": 1.5094736842105263e-05, + "step": 466 + }, + { + "epoch": 1.2281393819855357, + "grad_norm": 0.9647414088249207, + "learning_rate": 0.0007542105263157895, + "loss": 1.4701, + "step": 467 + }, + { + "epoch": 1.2281393819855357, + "high_lr": 0.0007542105263157895, + "low_lr": 1.508421052631579e-05, + "step": 467 + }, + { + "epoch": 1.2281393819855357, + "high_lr": 0.0007542105263157895, + "low_lr": 1.508421052631579e-05, + "step": 467 + }, + { + "epoch": 1.2281393819855357, + "high_lr": 0.0007542105263157895, + "low_lr": 1.508421052631579e-05, + "step": 467 + }, + { + "epoch": 1.2281393819855357, + "high_lr": 0.0007542105263157895, + "low_lr": 1.508421052631579e-05, + "step": 467 + }, + { + "epoch": 1.2281393819855357, + "high_lr": 0.0007542105263157895, + "low_lr": 1.508421052631579e-05, + "step": 467 + }, + { + "epoch": 1.2281393819855357, + "high_lr": 0.0007542105263157895, + "low_lr": 1.508421052631579e-05, + "step": 467 + }, + { + "epoch": 1.2281393819855357, + "high_lr": 0.0007542105263157895, + "low_lr": 1.508421052631579e-05, + "step": 467 + }, + { + "epoch": 1.2281393819855357, + "high_lr": 0.0007542105263157895, + "low_lr": 1.508421052631579e-05, + "step": 467 + }, + { + "epoch": 1.2307692307692308, + "grad_norm": 1.006118655204773, + "learning_rate": 0.0007536842105263158, + "loss": 1.5536, + "step": 468 + }, + { + "epoch": 1.2307692307692308, + "high_lr": 0.0007536842105263158, + "low_lr": 1.5073684210526316e-05, + "step": 468 + }, + { + "epoch": 1.2307692307692308, + "high_lr": 0.0007536842105263158, + "low_lr": 1.5073684210526316e-05, + "step": 468 + }, + { + "epoch": 1.2307692307692308, + "high_lr": 0.0007536842105263158, + "low_lr": 1.5073684210526316e-05, + "step": 468 + }, + { + "epoch": 1.2307692307692308, + "high_lr": 0.0007536842105263158, + "low_lr": 1.5073684210526316e-05, + "step": 468 + }, + { + "epoch": 1.2307692307692308, + "high_lr": 0.0007536842105263158, + "low_lr": 1.5073684210526316e-05, + "step": 468 + }, + { + "epoch": 1.2307692307692308, + "high_lr": 0.0007536842105263158, + "low_lr": 1.5073684210526316e-05, + "step": 468 + }, + { + "epoch": 1.2307692307692308, + "high_lr": 0.0007536842105263158, + "low_lr": 1.5073684210526316e-05, + "step": 468 + }, + { + "epoch": 1.2307692307692308, + "high_lr": 0.0007536842105263158, + "low_lr": 1.5073684210526316e-05, + "step": 468 + }, + { + "epoch": 1.2333990795529257, + "grad_norm": 0.9264904856681824, + "learning_rate": 0.0007531578947368422, + "loss": 1.4641, + "step": 469 + }, + { + "epoch": 1.2333990795529257, + "high_lr": 0.0007531578947368422, + "low_lr": 1.5063157894736844e-05, + "step": 469 + }, + { + "epoch": 1.2333990795529257, + "high_lr": 0.0007531578947368422, + "low_lr": 1.5063157894736844e-05, + "step": 469 + }, + { + "epoch": 1.2333990795529257, + "high_lr": 0.0007531578947368422, + "low_lr": 1.5063157894736844e-05, + "step": 469 + }, + { + "epoch": 1.2333990795529257, + "high_lr": 0.0007531578947368422, + "low_lr": 1.5063157894736844e-05, + "step": 469 + }, + { + "epoch": 1.2333990795529257, + "high_lr": 0.0007531578947368422, + "low_lr": 1.5063157894736844e-05, + "step": 469 + }, + { + "epoch": 1.2333990795529257, + "high_lr": 0.0007531578947368422, + "low_lr": 1.5063157894736844e-05, + "step": 469 + }, + { + "epoch": 1.2333990795529257, + "high_lr": 0.0007531578947368422, + "low_lr": 1.5063157894736844e-05, + "step": 469 + }, + { + "epoch": 1.2333990795529257, + "high_lr": 0.0007531578947368422, + "low_lr": 1.5063157894736844e-05, + "step": 469 + }, + { + "epoch": 1.2360289283366206, + "grad_norm": 0.9730166792869568, + "learning_rate": 0.0007526315789473685, + "loss": 1.5371, + "step": 470 + }, + { + "epoch": 1.2360289283366206, + "high_lr": 0.0007526315789473685, + "low_lr": 1.505263157894737e-05, + "step": 470 + }, + { + "epoch": 1.2360289283366206, + "high_lr": 0.0007526315789473685, + "low_lr": 1.505263157894737e-05, + "step": 470 + }, + { + "epoch": 1.2360289283366206, + "high_lr": 0.0007526315789473685, + "low_lr": 1.505263157894737e-05, + "step": 470 + }, + { + "epoch": 1.2360289283366206, + "high_lr": 0.0007526315789473685, + "low_lr": 1.505263157894737e-05, + "step": 470 + }, + { + "epoch": 1.2360289283366206, + "high_lr": 0.0007526315789473685, + "low_lr": 1.505263157894737e-05, + "step": 470 + }, + { + "epoch": 1.2360289283366206, + "high_lr": 0.0007526315789473685, + "low_lr": 1.505263157894737e-05, + "step": 470 + }, + { + "epoch": 1.2360289283366206, + "high_lr": 0.0007526315789473685, + "low_lr": 1.505263157894737e-05, + "step": 470 + }, + { + "epoch": 1.2360289283366206, + "high_lr": 0.0007526315789473685, + "low_lr": 1.505263157894737e-05, + "step": 470 + }, + { + "epoch": 1.2386587771203157, + "grad_norm": 1.172485589981079, + "learning_rate": 0.0007521052631578948, + "loss": 1.5384, + "step": 471 + }, + { + "epoch": 1.2386587771203157, + "high_lr": 0.0007521052631578948, + "low_lr": 1.5042105263157895e-05, + "step": 471 + }, + { + "epoch": 1.2386587771203157, + "high_lr": 0.0007521052631578948, + "low_lr": 1.5042105263157895e-05, + "step": 471 + }, + { + "epoch": 1.2386587771203157, + "high_lr": 0.0007521052631578948, + "low_lr": 1.5042105263157895e-05, + "step": 471 + }, + { + "epoch": 1.2386587771203157, + "high_lr": 0.0007521052631578948, + "low_lr": 1.5042105263157895e-05, + "step": 471 + }, + { + "epoch": 1.2386587771203157, + "high_lr": 0.0007521052631578948, + "low_lr": 1.5042105263157895e-05, + "step": 471 + }, + { + "epoch": 1.2386587771203157, + "high_lr": 0.0007521052631578948, + "low_lr": 1.5042105263157895e-05, + "step": 471 + }, + { + "epoch": 1.2386587771203157, + "high_lr": 0.0007521052631578948, + "low_lr": 1.5042105263157895e-05, + "step": 471 + }, + { + "epoch": 1.2386587771203157, + "high_lr": 0.0007521052631578948, + "low_lr": 1.5042105263157895e-05, + "step": 471 + }, + { + "epoch": 1.2412886259040106, + "grad_norm": 0.9785175323486328, + "learning_rate": 0.000751578947368421, + "loss": 1.5094, + "step": 472 + }, + { + "epoch": 1.2412886259040106, + "high_lr": 0.000751578947368421, + "low_lr": 1.5031578947368421e-05, + "step": 472 + }, + { + "epoch": 1.2412886259040106, + "high_lr": 0.000751578947368421, + "low_lr": 1.5031578947368421e-05, + "step": 472 + }, + { + "epoch": 1.2412886259040106, + "high_lr": 0.000751578947368421, + "low_lr": 1.5031578947368421e-05, + "step": 472 + }, + { + "epoch": 1.2412886259040106, + "high_lr": 0.000751578947368421, + "low_lr": 1.5031578947368421e-05, + "step": 472 + }, + { + "epoch": 1.2412886259040106, + "high_lr": 0.000751578947368421, + "low_lr": 1.5031578947368421e-05, + "step": 472 + }, + { + "epoch": 1.2412886259040106, + "high_lr": 0.000751578947368421, + "low_lr": 1.5031578947368421e-05, + "step": 472 + }, + { + "epoch": 1.2412886259040106, + "high_lr": 0.000751578947368421, + "low_lr": 1.5031578947368421e-05, + "step": 472 + }, + { + "epoch": 1.2412886259040106, + "high_lr": 0.000751578947368421, + "low_lr": 1.5031578947368421e-05, + "step": 472 + }, + { + "epoch": 1.2439184746877054, + "grad_norm": 0.9950137138366699, + "learning_rate": 0.0007510526315789474, + "loss": 1.4456, + "step": 473 + }, + { + "epoch": 1.2439184746877054, + "high_lr": 0.0007510526315789474, + "low_lr": 1.502105263157895e-05, + "step": 473 + }, + { + "epoch": 1.2439184746877054, + "high_lr": 0.0007510526315789474, + "low_lr": 1.502105263157895e-05, + "step": 473 + }, + { + "epoch": 1.2439184746877054, + "high_lr": 0.0007510526315789474, + "low_lr": 1.502105263157895e-05, + "step": 473 + }, + { + "epoch": 1.2439184746877054, + "high_lr": 0.0007510526315789474, + "low_lr": 1.502105263157895e-05, + "step": 473 + }, + { + "epoch": 1.2439184746877054, + "high_lr": 0.0007510526315789474, + "low_lr": 1.502105263157895e-05, + "step": 473 + }, + { + "epoch": 1.2439184746877054, + "high_lr": 0.0007510526315789474, + "low_lr": 1.502105263157895e-05, + "step": 473 + }, + { + "epoch": 1.2439184746877054, + "high_lr": 0.0007510526315789474, + "low_lr": 1.502105263157895e-05, + "step": 473 + }, + { + "epoch": 1.2439184746877054, + "high_lr": 0.0007510526315789474, + "low_lr": 1.502105263157895e-05, + "step": 473 + }, + { + "epoch": 1.2465483234714003, + "grad_norm": 1.0175728797912598, + "learning_rate": 0.0007505263157894737, + "loss": 1.4836, + "step": 474 + }, + { + "epoch": 1.2465483234714003, + "high_lr": 0.0007505263157894737, + "low_lr": 1.5010526315789476e-05, + "step": 474 + }, + { + "epoch": 1.2465483234714003, + "high_lr": 0.0007505263157894737, + "low_lr": 1.5010526315789476e-05, + "step": 474 + }, + { + "epoch": 1.2465483234714003, + "high_lr": 0.0007505263157894737, + "low_lr": 1.5010526315789476e-05, + "step": 474 + }, + { + "epoch": 1.2465483234714003, + "high_lr": 0.0007505263157894737, + "low_lr": 1.5010526315789476e-05, + "step": 474 + }, + { + "epoch": 1.2465483234714003, + "high_lr": 0.0007505263157894737, + "low_lr": 1.5010526315789476e-05, + "step": 474 + }, + { + "epoch": 1.2465483234714003, + "high_lr": 0.0007505263157894737, + "low_lr": 1.5010526315789476e-05, + "step": 474 + }, + { + "epoch": 1.2465483234714003, + "high_lr": 0.0007505263157894737, + "low_lr": 1.5010526315789476e-05, + "step": 474 + }, + { + "epoch": 1.2465483234714003, + "high_lr": 0.0007505263157894737, + "low_lr": 1.5010526315789476e-05, + "step": 474 + }, + { + "epoch": 1.2491781722550954, + "grad_norm": 0.9616714119911194, + "learning_rate": 0.00075, + "loss": 1.4358, + "step": 475 + }, + { + "epoch": 1.2491781722550954, + "high_lr": 0.00075, + "low_lr": 1.5000000000000002e-05, + "step": 475 + }, + { + "epoch": 1.2491781722550954, + "high_lr": 0.00075, + "low_lr": 1.5000000000000002e-05, + "step": 475 + }, + { + "epoch": 1.2491781722550954, + "high_lr": 0.00075, + "low_lr": 1.5000000000000002e-05, + "step": 475 + }, + { + "epoch": 1.2491781722550954, + "high_lr": 0.00075, + "low_lr": 1.5000000000000002e-05, + "step": 475 + }, + { + "epoch": 1.2491781722550954, + "high_lr": 0.00075, + "low_lr": 1.5000000000000002e-05, + "step": 475 + }, + { + "epoch": 1.2491781722550954, + "high_lr": 0.00075, + "low_lr": 1.5000000000000002e-05, + "step": 475 + }, + { + "epoch": 1.2491781722550954, + "high_lr": 0.00075, + "low_lr": 1.5000000000000002e-05, + "step": 475 + }, + { + "epoch": 1.2491781722550954, + "high_lr": 0.00075, + "low_lr": 1.5000000000000002e-05, + "step": 475 + }, + { + "epoch": 1.2518080210387903, + "grad_norm": 1.003388524055481, + "learning_rate": 0.0007494736842105263, + "loss": 1.4858, + "step": 476 + }, + { + "epoch": 1.2518080210387903, + "high_lr": 0.0007494736842105263, + "low_lr": 1.4989473684210527e-05, + "step": 476 + }, + { + "epoch": 1.2518080210387903, + "high_lr": 0.0007494736842105263, + "low_lr": 1.4989473684210527e-05, + "step": 476 + }, + { + "epoch": 1.2518080210387903, + "high_lr": 0.0007494736842105263, + "low_lr": 1.4989473684210527e-05, + "step": 476 + }, + { + "epoch": 1.2518080210387903, + "high_lr": 0.0007494736842105263, + "low_lr": 1.4989473684210527e-05, + "step": 476 + }, + { + "epoch": 1.2518080210387903, + "high_lr": 0.0007494736842105263, + "low_lr": 1.4989473684210527e-05, + "step": 476 + }, + { + "epoch": 1.2518080210387903, + "high_lr": 0.0007494736842105263, + "low_lr": 1.4989473684210527e-05, + "step": 476 + }, + { + "epoch": 1.2518080210387903, + "high_lr": 0.0007494736842105263, + "low_lr": 1.4989473684210527e-05, + "step": 476 + }, + { + "epoch": 1.2518080210387903, + "high_lr": 0.0007494736842105263, + "low_lr": 1.4989473684210527e-05, + "step": 476 + }, + { + "epoch": 1.2544378698224852, + "grad_norm": 1.0780497789382935, + "learning_rate": 0.0007489473684210526, + "loss": 1.5253, + "step": 477 + }, + { + "epoch": 1.2544378698224852, + "high_lr": 0.0007489473684210526, + "low_lr": 1.4978947368421053e-05, + "step": 477 + }, + { + "epoch": 1.2544378698224852, + "high_lr": 0.0007489473684210526, + "low_lr": 1.4978947368421053e-05, + "step": 477 + }, + { + "epoch": 1.2544378698224852, + "high_lr": 0.0007489473684210526, + "low_lr": 1.4978947368421053e-05, + "step": 477 + }, + { + "epoch": 1.2544378698224852, + "high_lr": 0.0007489473684210526, + "low_lr": 1.4978947368421053e-05, + "step": 477 + }, + { + "epoch": 1.2544378698224852, + "high_lr": 0.0007489473684210526, + "low_lr": 1.4978947368421053e-05, + "step": 477 + }, + { + "epoch": 1.2544378698224852, + "high_lr": 0.0007489473684210526, + "low_lr": 1.4978947368421053e-05, + "step": 477 + }, + { + "epoch": 1.2544378698224852, + "high_lr": 0.0007489473684210526, + "low_lr": 1.4978947368421053e-05, + "step": 477 + }, + { + "epoch": 1.2544378698224852, + "high_lr": 0.0007489473684210526, + "low_lr": 1.4978947368421053e-05, + "step": 477 + }, + { + "epoch": 1.25706771860618, + "grad_norm": 1.0622295141220093, + "learning_rate": 0.000748421052631579, + "loss": 1.5205, + "step": 478 + }, + { + "epoch": 1.25706771860618, + "high_lr": 0.000748421052631579, + "low_lr": 1.4968421052631581e-05, + "step": 478 + }, + { + "epoch": 1.25706771860618, + "high_lr": 0.000748421052631579, + "low_lr": 1.4968421052631581e-05, + "step": 478 + }, + { + "epoch": 1.25706771860618, + "high_lr": 0.000748421052631579, + "low_lr": 1.4968421052631581e-05, + "step": 478 + }, + { + "epoch": 1.25706771860618, + "high_lr": 0.000748421052631579, + "low_lr": 1.4968421052631581e-05, + "step": 478 + }, + { + "epoch": 1.25706771860618, + "high_lr": 0.000748421052631579, + "low_lr": 1.4968421052631581e-05, + "step": 478 + }, + { + "epoch": 1.25706771860618, + "high_lr": 0.000748421052631579, + "low_lr": 1.4968421052631581e-05, + "step": 478 + }, + { + "epoch": 1.25706771860618, + "high_lr": 0.000748421052631579, + "low_lr": 1.4968421052631581e-05, + "step": 478 + }, + { + "epoch": 1.25706771860618, + "high_lr": 0.000748421052631579, + "low_lr": 1.4968421052631581e-05, + "step": 478 + }, + { + "epoch": 1.2596975673898752, + "grad_norm": 1.078023910522461, + "learning_rate": 0.0007478947368421053, + "loss": 1.5447, + "step": 479 + }, + { + "epoch": 1.2596975673898752, + "high_lr": 0.0007478947368421053, + "low_lr": 1.4957894736842107e-05, + "step": 479 + }, + { + "epoch": 1.2596975673898752, + "high_lr": 0.0007478947368421053, + "low_lr": 1.4957894736842107e-05, + "step": 479 + }, + { + "epoch": 1.2596975673898752, + "high_lr": 0.0007478947368421053, + "low_lr": 1.4957894736842107e-05, + "step": 479 + }, + { + "epoch": 1.2596975673898752, + "high_lr": 0.0007478947368421053, + "low_lr": 1.4957894736842107e-05, + "step": 479 + }, + { + "epoch": 1.2596975673898752, + "high_lr": 0.0007478947368421053, + "low_lr": 1.4957894736842107e-05, + "step": 479 + }, + { + "epoch": 1.2596975673898752, + "high_lr": 0.0007478947368421053, + "low_lr": 1.4957894736842107e-05, + "step": 479 + }, + { + "epoch": 1.2596975673898752, + "high_lr": 0.0007478947368421053, + "low_lr": 1.4957894736842107e-05, + "step": 479 + }, + { + "epoch": 1.2596975673898752, + "high_lr": 0.0007478947368421053, + "low_lr": 1.4957894736842107e-05, + "step": 479 + }, + { + "epoch": 1.26232741617357, + "grad_norm": 1.0321089029312134, + "learning_rate": 0.0007473684210526316, + "loss": 1.4966, + "step": 480 + }, + { + "epoch": 1.26232741617357, + "high_lr": 0.0007473684210526316, + "low_lr": 1.4947368421052632e-05, + "step": 480 + }, + { + "epoch": 1.26232741617357, + "high_lr": 0.0007473684210526316, + "low_lr": 1.4947368421052632e-05, + "step": 480 + }, + { + "epoch": 1.26232741617357, + "high_lr": 0.0007473684210526316, + "low_lr": 1.4947368421052632e-05, + "step": 480 + }, + { + "epoch": 1.26232741617357, + "high_lr": 0.0007473684210526316, + "low_lr": 1.4947368421052632e-05, + "step": 480 + }, + { + "epoch": 1.26232741617357, + "high_lr": 0.0007473684210526316, + "low_lr": 1.4947368421052632e-05, + "step": 480 + }, + { + "epoch": 1.26232741617357, + "high_lr": 0.0007473684210526316, + "low_lr": 1.4947368421052632e-05, + "step": 480 + }, + { + "epoch": 1.26232741617357, + "high_lr": 0.0007473684210526316, + "low_lr": 1.4947368421052632e-05, + "step": 480 + }, + { + "epoch": 1.26232741617357, + "high_lr": 0.0007473684210526316, + "low_lr": 1.4947368421052632e-05, + "step": 480 + }, + { + "epoch": 1.264957264957265, + "grad_norm": 1.0012013912200928, + "learning_rate": 0.0007468421052631578, + "loss": 1.4845, + "step": 481 + }, + { + "epoch": 1.264957264957265, + "high_lr": 0.0007468421052631578, + "low_lr": 1.4936842105263158e-05, + "step": 481 + }, + { + "epoch": 1.264957264957265, + "high_lr": 0.0007468421052631578, + "low_lr": 1.4936842105263158e-05, + "step": 481 + }, + { + "epoch": 1.264957264957265, + "high_lr": 0.0007468421052631578, + "low_lr": 1.4936842105263158e-05, + "step": 481 + }, + { + "epoch": 1.264957264957265, + "high_lr": 0.0007468421052631578, + "low_lr": 1.4936842105263158e-05, + "step": 481 + }, + { + "epoch": 1.264957264957265, + "high_lr": 0.0007468421052631578, + "low_lr": 1.4936842105263158e-05, + "step": 481 + }, + { + "epoch": 1.264957264957265, + "high_lr": 0.0007468421052631578, + "low_lr": 1.4936842105263158e-05, + "step": 481 + }, + { + "epoch": 1.264957264957265, + "high_lr": 0.0007468421052631578, + "low_lr": 1.4936842105263158e-05, + "step": 481 + }, + { + "epoch": 1.264957264957265, + "high_lr": 0.0007468421052631578, + "low_lr": 1.4936842105263158e-05, + "step": 481 + }, + { + "epoch": 1.26758711374096, + "grad_norm": 1.076859712600708, + "learning_rate": 0.0007463157894736842, + "loss": 1.5144, + "step": 482 + }, + { + "epoch": 1.26758711374096, + "high_lr": 0.0007463157894736842, + "low_lr": 1.4926315789473686e-05, + "step": 482 + }, + { + "epoch": 1.26758711374096, + "high_lr": 0.0007463157894736842, + "low_lr": 1.4926315789473686e-05, + "step": 482 + }, + { + "epoch": 1.26758711374096, + "high_lr": 0.0007463157894736842, + "low_lr": 1.4926315789473686e-05, + "step": 482 + }, + { + "epoch": 1.26758711374096, + "high_lr": 0.0007463157894736842, + "low_lr": 1.4926315789473686e-05, + "step": 482 + }, + { + "epoch": 1.26758711374096, + "high_lr": 0.0007463157894736842, + "low_lr": 1.4926315789473686e-05, + "step": 482 + }, + { + "epoch": 1.26758711374096, + "high_lr": 0.0007463157894736842, + "low_lr": 1.4926315789473686e-05, + "step": 482 + }, + { + "epoch": 1.26758711374096, + "high_lr": 0.0007463157894736842, + "low_lr": 1.4926315789473686e-05, + "step": 482 + }, + { + "epoch": 1.26758711374096, + "high_lr": 0.0007463157894736842, + "low_lr": 1.4926315789473686e-05, + "step": 482 + }, + { + "epoch": 1.2702169625246549, + "grad_norm": 1.1330339908599854, + "learning_rate": 0.0007457894736842105, + "loss": 1.524, + "step": 483 + }, + { + "epoch": 1.2702169625246549, + "high_lr": 0.0007457894736842105, + "low_lr": 1.4915789473684213e-05, + "step": 483 + }, + { + "epoch": 1.2702169625246549, + "high_lr": 0.0007457894736842105, + "low_lr": 1.4915789473684213e-05, + "step": 483 + }, + { + "epoch": 1.2702169625246549, + "high_lr": 0.0007457894736842105, + "low_lr": 1.4915789473684213e-05, + "step": 483 + }, + { + "epoch": 1.2702169625246549, + "high_lr": 0.0007457894736842105, + "low_lr": 1.4915789473684213e-05, + "step": 483 + }, + { + "epoch": 1.2702169625246549, + "high_lr": 0.0007457894736842105, + "low_lr": 1.4915789473684213e-05, + "step": 483 + }, + { + "epoch": 1.2702169625246549, + "high_lr": 0.0007457894736842105, + "low_lr": 1.4915789473684213e-05, + "step": 483 + }, + { + "epoch": 1.2702169625246549, + "high_lr": 0.0007457894736842105, + "low_lr": 1.4915789473684213e-05, + "step": 483 + }, + { + "epoch": 1.2702169625246549, + "high_lr": 0.0007457894736842105, + "low_lr": 1.4915789473684213e-05, + "step": 483 + }, + { + "epoch": 1.2728468113083498, + "grad_norm": 1.0957008600234985, + "learning_rate": 0.0007452631578947369, + "loss": 1.4832, + "step": 484 + }, + { + "epoch": 1.2728468113083498, + "high_lr": 0.0007452631578947369, + "low_lr": 1.4905263157894739e-05, + "step": 484 + }, + { + "epoch": 1.2728468113083498, + "high_lr": 0.0007452631578947369, + "low_lr": 1.4905263157894739e-05, + "step": 484 + }, + { + "epoch": 1.2728468113083498, + "high_lr": 0.0007452631578947369, + "low_lr": 1.4905263157894739e-05, + "step": 484 + }, + { + "epoch": 1.2728468113083498, + "high_lr": 0.0007452631578947369, + "low_lr": 1.4905263157894739e-05, + "step": 484 + }, + { + "epoch": 1.2728468113083498, + "high_lr": 0.0007452631578947369, + "low_lr": 1.4905263157894739e-05, + "step": 484 + }, + { + "epoch": 1.2728468113083498, + "high_lr": 0.0007452631578947369, + "low_lr": 1.4905263157894739e-05, + "step": 484 + }, + { + "epoch": 1.2728468113083498, + "high_lr": 0.0007452631578947369, + "low_lr": 1.4905263157894739e-05, + "step": 484 + }, + { + "epoch": 1.2728468113083498, + "high_lr": 0.0007452631578947369, + "low_lr": 1.4905263157894739e-05, + "step": 484 + }, + { + "epoch": 1.2754766600920446, + "grad_norm": 0.9506247043609619, + "learning_rate": 0.0007447368421052632, + "loss": 1.4619, + "step": 485 + }, + { + "epoch": 1.2754766600920446, + "high_lr": 0.0007447368421052632, + "low_lr": 1.4894736842105264e-05, + "step": 485 + }, + { + "epoch": 1.2754766600920446, + "high_lr": 0.0007447368421052632, + "low_lr": 1.4894736842105264e-05, + "step": 485 + }, + { + "epoch": 1.2754766600920446, + "high_lr": 0.0007447368421052632, + "low_lr": 1.4894736842105264e-05, + "step": 485 + }, + { + "epoch": 1.2754766600920446, + "high_lr": 0.0007447368421052632, + "low_lr": 1.4894736842105264e-05, + "step": 485 + }, + { + "epoch": 1.2754766600920446, + "high_lr": 0.0007447368421052632, + "low_lr": 1.4894736842105264e-05, + "step": 485 + }, + { + "epoch": 1.2754766600920446, + "high_lr": 0.0007447368421052632, + "low_lr": 1.4894736842105264e-05, + "step": 485 + }, + { + "epoch": 1.2754766600920446, + "high_lr": 0.0007447368421052632, + "low_lr": 1.4894736842105264e-05, + "step": 485 + }, + { + "epoch": 1.2754766600920446, + "high_lr": 0.0007447368421052632, + "low_lr": 1.4894736842105264e-05, + "step": 485 + }, + { + "epoch": 1.2781065088757395, + "grad_norm": 1.2661606073379517, + "learning_rate": 0.0007442105263157895, + "loss": 1.5559, + "step": 486 + }, + { + "epoch": 1.2781065088757395, + "high_lr": 0.0007442105263157895, + "low_lr": 1.488421052631579e-05, + "step": 486 + }, + { + "epoch": 1.2781065088757395, + "high_lr": 0.0007442105263157895, + "low_lr": 1.488421052631579e-05, + "step": 486 + }, + { + "epoch": 1.2781065088757395, + "high_lr": 0.0007442105263157895, + "low_lr": 1.488421052631579e-05, + "step": 486 + }, + { + "epoch": 1.2781065088757395, + "high_lr": 0.0007442105263157895, + "low_lr": 1.488421052631579e-05, + "step": 486 + }, + { + "epoch": 1.2781065088757395, + "high_lr": 0.0007442105263157895, + "low_lr": 1.488421052631579e-05, + "step": 486 + }, + { + "epoch": 1.2781065088757395, + "high_lr": 0.0007442105263157895, + "low_lr": 1.488421052631579e-05, + "step": 486 + }, + { + "epoch": 1.2781065088757395, + "high_lr": 0.0007442105263157895, + "low_lr": 1.488421052631579e-05, + "step": 486 + }, + { + "epoch": 1.2781065088757395, + "high_lr": 0.0007442105263157895, + "low_lr": 1.488421052631579e-05, + "step": 486 + }, + { + "epoch": 1.2807363576594346, + "grad_norm": 1.0828596353530884, + "learning_rate": 0.0007436842105263159, + "loss": 1.5071, + "step": 487 + }, + { + "epoch": 1.2807363576594346, + "high_lr": 0.0007436842105263159, + "low_lr": 1.4873684210526318e-05, + "step": 487 + }, + { + "epoch": 1.2807363576594346, + "high_lr": 0.0007436842105263159, + "low_lr": 1.4873684210526318e-05, + "step": 487 + }, + { + "epoch": 1.2807363576594346, + "high_lr": 0.0007436842105263159, + "low_lr": 1.4873684210526318e-05, + "step": 487 + }, + { + "epoch": 1.2807363576594346, + "high_lr": 0.0007436842105263159, + "low_lr": 1.4873684210526318e-05, + "step": 487 + }, + { + "epoch": 1.2807363576594346, + "high_lr": 0.0007436842105263159, + "low_lr": 1.4873684210526318e-05, + "step": 487 + }, + { + "epoch": 1.2807363576594346, + "high_lr": 0.0007436842105263159, + "low_lr": 1.4873684210526318e-05, + "step": 487 + }, + { + "epoch": 1.2807363576594346, + "high_lr": 0.0007436842105263159, + "low_lr": 1.4873684210526318e-05, + "step": 487 + }, + { + "epoch": 1.2807363576594346, + "high_lr": 0.0007436842105263159, + "low_lr": 1.4873684210526318e-05, + "step": 487 + }, + { + "epoch": 1.2833662064431295, + "grad_norm": 1.1039183139801025, + "learning_rate": 0.0007431578947368422, + "loss": 1.5014, + "step": 488 + }, + { + "epoch": 1.2833662064431295, + "high_lr": 0.0007431578947368422, + "low_lr": 1.4863157894736844e-05, + "step": 488 + }, + { + "epoch": 1.2833662064431295, + "high_lr": 0.0007431578947368422, + "low_lr": 1.4863157894736844e-05, + "step": 488 + }, + { + "epoch": 1.2833662064431295, + "high_lr": 0.0007431578947368422, + "low_lr": 1.4863157894736844e-05, + "step": 488 + }, + { + "epoch": 1.2833662064431295, + "high_lr": 0.0007431578947368422, + "low_lr": 1.4863157894736844e-05, + "step": 488 + }, + { + "epoch": 1.2833662064431295, + "high_lr": 0.0007431578947368422, + "low_lr": 1.4863157894736844e-05, + "step": 488 + }, + { + "epoch": 1.2833662064431295, + "high_lr": 0.0007431578947368422, + "low_lr": 1.4863157894736844e-05, + "step": 488 + }, + { + "epoch": 1.2833662064431295, + "high_lr": 0.0007431578947368422, + "low_lr": 1.4863157894736844e-05, + "step": 488 + }, + { + "epoch": 1.2833662064431295, + "high_lr": 0.0007431578947368422, + "low_lr": 1.4863157894736844e-05, + "step": 488 + }, + { + "epoch": 1.2859960552268244, + "grad_norm": 1.0724716186523438, + "learning_rate": 0.0007426315789473685, + "loss": 1.4949, + "step": 489 + }, + { + "epoch": 1.2859960552268244, + "high_lr": 0.0007426315789473685, + "low_lr": 1.4852631578947369e-05, + "step": 489 + }, + { + "epoch": 1.2859960552268244, + "high_lr": 0.0007426315789473685, + "low_lr": 1.4852631578947369e-05, + "step": 489 + }, + { + "epoch": 1.2859960552268244, + "high_lr": 0.0007426315789473685, + "low_lr": 1.4852631578947369e-05, + "step": 489 + }, + { + "epoch": 1.2859960552268244, + "high_lr": 0.0007426315789473685, + "low_lr": 1.4852631578947369e-05, + "step": 489 + }, + { + "epoch": 1.2859960552268244, + "high_lr": 0.0007426315789473685, + "low_lr": 1.4852631578947369e-05, + "step": 489 + }, + { + "epoch": 1.2859960552268244, + "high_lr": 0.0007426315789473685, + "low_lr": 1.4852631578947369e-05, + "step": 489 + }, + { + "epoch": 1.2859960552268244, + "high_lr": 0.0007426315789473685, + "low_lr": 1.4852631578947369e-05, + "step": 489 + }, + { + "epoch": 1.2859960552268244, + "high_lr": 0.0007426315789473685, + "low_lr": 1.4852631578947369e-05, + "step": 489 + }, + { + "epoch": 1.2886259040105195, + "grad_norm": 1.0938204526901245, + "learning_rate": 0.0007421052631578947, + "loss": 1.565, + "step": 490 + }, + { + "epoch": 1.2886259040105195, + "high_lr": 0.0007421052631578947, + "low_lr": 1.4842105263157895e-05, + "step": 490 + }, + { + "epoch": 1.2886259040105195, + "high_lr": 0.0007421052631578947, + "low_lr": 1.4842105263157895e-05, + "step": 490 + }, + { + "epoch": 1.2886259040105195, + "high_lr": 0.0007421052631578947, + "low_lr": 1.4842105263157895e-05, + "step": 490 + }, + { + "epoch": 1.2886259040105195, + "high_lr": 0.0007421052631578947, + "low_lr": 1.4842105263157895e-05, + "step": 490 + }, + { + "epoch": 1.2886259040105195, + "high_lr": 0.0007421052631578947, + "low_lr": 1.4842105263157895e-05, + "step": 490 + }, + { + "epoch": 1.2886259040105195, + "high_lr": 0.0007421052631578947, + "low_lr": 1.4842105263157895e-05, + "step": 490 + }, + { + "epoch": 1.2886259040105195, + "high_lr": 0.0007421052631578947, + "low_lr": 1.4842105263157895e-05, + "step": 490 + }, + { + "epoch": 1.2886259040105195, + "high_lr": 0.0007421052631578947, + "low_lr": 1.4842105263157895e-05, + "step": 490 + }, + { + "epoch": 1.2912557527942143, + "grad_norm": 1.025592565536499, + "learning_rate": 0.000741578947368421, + "loss": 1.4964, + "step": 491 + }, + { + "epoch": 1.2912557527942143, + "high_lr": 0.000741578947368421, + "low_lr": 1.4831578947368422e-05, + "step": 491 + }, + { + "epoch": 1.2912557527942143, + "high_lr": 0.000741578947368421, + "low_lr": 1.4831578947368422e-05, + "step": 491 + }, + { + "epoch": 1.2912557527942143, + "high_lr": 0.000741578947368421, + "low_lr": 1.4831578947368422e-05, + "step": 491 + }, + { + "epoch": 1.2912557527942143, + "high_lr": 0.000741578947368421, + "low_lr": 1.4831578947368422e-05, + "step": 491 + }, + { + "epoch": 1.2912557527942143, + "high_lr": 0.000741578947368421, + "low_lr": 1.4831578947368422e-05, + "step": 491 + }, + { + "epoch": 1.2912557527942143, + "high_lr": 0.000741578947368421, + "low_lr": 1.4831578947368422e-05, + "step": 491 + }, + { + "epoch": 1.2912557527942143, + "high_lr": 0.000741578947368421, + "low_lr": 1.4831578947368422e-05, + "step": 491 + }, + { + "epoch": 1.2912557527942143, + "high_lr": 0.000741578947368421, + "low_lr": 1.4831578947368422e-05, + "step": 491 + }, + { + "epoch": 1.2938856015779092, + "grad_norm": 1.0463948249816895, + "learning_rate": 0.0007410526315789474, + "loss": 1.5096, + "step": 492 + }, + { + "epoch": 1.2938856015779092, + "high_lr": 0.0007410526315789474, + "low_lr": 1.482105263157895e-05, + "step": 492 + }, + { + "epoch": 1.2938856015779092, + "high_lr": 0.0007410526315789474, + "low_lr": 1.482105263157895e-05, + "step": 492 + }, + { + "epoch": 1.2938856015779092, + "high_lr": 0.0007410526315789474, + "low_lr": 1.482105263157895e-05, + "step": 492 + }, + { + "epoch": 1.2938856015779092, + "high_lr": 0.0007410526315789474, + "low_lr": 1.482105263157895e-05, + "step": 492 + }, + { + "epoch": 1.2938856015779092, + "high_lr": 0.0007410526315789474, + "low_lr": 1.482105263157895e-05, + "step": 492 + }, + { + "epoch": 1.2938856015779092, + "high_lr": 0.0007410526315789474, + "low_lr": 1.482105263157895e-05, + "step": 492 + }, + { + "epoch": 1.2938856015779092, + "high_lr": 0.0007410526315789474, + "low_lr": 1.482105263157895e-05, + "step": 492 + }, + { + "epoch": 1.2938856015779092, + "high_lr": 0.0007410526315789474, + "low_lr": 1.482105263157895e-05, + "step": 492 + }, + { + "epoch": 1.2965154503616043, + "grad_norm": 1.0823513269424438, + "learning_rate": 0.0007405263157894737, + "loss": 1.5304, + "step": 493 + }, + { + "epoch": 1.2965154503616043, + "high_lr": 0.0007405263157894737, + "low_lr": 1.4810526315789476e-05, + "step": 493 + }, + { + "epoch": 1.2965154503616043, + "high_lr": 0.0007405263157894737, + "low_lr": 1.4810526315789476e-05, + "step": 493 + }, + { + "epoch": 1.2965154503616043, + "high_lr": 0.0007405263157894737, + "low_lr": 1.4810526315789476e-05, + "step": 493 + }, + { + "epoch": 1.2965154503616043, + "high_lr": 0.0007405263157894737, + "low_lr": 1.4810526315789476e-05, + "step": 493 + }, + { + "epoch": 1.2965154503616043, + "high_lr": 0.0007405263157894737, + "low_lr": 1.4810526315789476e-05, + "step": 493 + }, + { + "epoch": 1.2965154503616043, + "high_lr": 0.0007405263157894737, + "low_lr": 1.4810526315789476e-05, + "step": 493 + }, + { + "epoch": 1.2965154503616043, + "high_lr": 0.0007405263157894737, + "low_lr": 1.4810526315789476e-05, + "step": 493 + }, + { + "epoch": 1.2965154503616043, + "high_lr": 0.0007405263157894737, + "low_lr": 1.4810526315789476e-05, + "step": 493 + }, + { + "epoch": 1.2991452991452992, + "grad_norm": 1.0551989078521729, + "learning_rate": 0.00074, + "loss": 1.5116, + "step": 494 + }, + { + "epoch": 1.2991452991452992, + "high_lr": 0.00074, + "low_lr": 1.48e-05, + "step": 494 + }, + { + "epoch": 1.2991452991452992, + "high_lr": 0.00074, + "low_lr": 1.48e-05, + "step": 494 + }, + { + "epoch": 1.2991452991452992, + "high_lr": 0.00074, + "low_lr": 1.48e-05, + "step": 494 + }, + { + "epoch": 1.2991452991452992, + "high_lr": 0.00074, + "low_lr": 1.48e-05, + "step": 494 + }, + { + "epoch": 1.2991452991452992, + "high_lr": 0.00074, + "low_lr": 1.48e-05, + "step": 494 + }, + { + "epoch": 1.2991452991452992, + "high_lr": 0.00074, + "low_lr": 1.48e-05, + "step": 494 + }, + { + "epoch": 1.2991452991452992, + "high_lr": 0.00074, + "low_lr": 1.48e-05, + "step": 494 + }, + { + "epoch": 1.2991452991452992, + "high_lr": 0.00074, + "low_lr": 1.48e-05, + "step": 494 + }, + { + "epoch": 1.301775147928994, + "grad_norm": 1.1153234243392944, + "learning_rate": 0.0007394736842105263, + "loss": 1.5439, + "step": 495 + }, + { + "epoch": 1.301775147928994, + "high_lr": 0.0007394736842105263, + "low_lr": 1.4789473684210527e-05, + "step": 495 + }, + { + "epoch": 1.301775147928994, + "high_lr": 0.0007394736842105263, + "low_lr": 1.4789473684210527e-05, + "step": 495 + }, + { + "epoch": 1.301775147928994, + "high_lr": 0.0007394736842105263, + "low_lr": 1.4789473684210527e-05, + "step": 495 + }, + { + "epoch": 1.301775147928994, + "high_lr": 0.0007394736842105263, + "low_lr": 1.4789473684210527e-05, + "step": 495 + }, + { + "epoch": 1.301775147928994, + "high_lr": 0.0007394736842105263, + "low_lr": 1.4789473684210527e-05, + "step": 495 + }, + { + "epoch": 1.301775147928994, + "high_lr": 0.0007394736842105263, + "low_lr": 1.4789473684210527e-05, + "step": 495 + }, + { + "epoch": 1.301775147928994, + "high_lr": 0.0007394736842105263, + "low_lr": 1.4789473684210527e-05, + "step": 495 + }, + { + "epoch": 1.301775147928994, + "high_lr": 0.0007394736842105263, + "low_lr": 1.4789473684210527e-05, + "step": 495 + }, + { + "epoch": 1.304404996712689, + "grad_norm": 0.9963828921318054, + "learning_rate": 0.0007389473684210527, + "loss": 1.4877, + "step": 496 + }, + { + "epoch": 1.304404996712689, + "high_lr": 0.0007389473684210527, + "low_lr": 1.4778947368421055e-05, + "step": 496 + }, + { + "epoch": 1.304404996712689, + "high_lr": 0.0007389473684210527, + "low_lr": 1.4778947368421055e-05, + "step": 496 + }, + { + "epoch": 1.304404996712689, + "high_lr": 0.0007389473684210527, + "low_lr": 1.4778947368421055e-05, + "step": 496 + }, + { + "epoch": 1.304404996712689, + "high_lr": 0.0007389473684210527, + "low_lr": 1.4778947368421055e-05, + "step": 496 + }, + { + "epoch": 1.304404996712689, + "high_lr": 0.0007389473684210527, + "low_lr": 1.4778947368421055e-05, + "step": 496 + }, + { + "epoch": 1.304404996712689, + "high_lr": 0.0007389473684210527, + "low_lr": 1.4778947368421055e-05, + "step": 496 + }, + { + "epoch": 1.304404996712689, + "high_lr": 0.0007389473684210527, + "low_lr": 1.4778947368421055e-05, + "step": 496 + }, + { + "epoch": 1.304404996712689, + "high_lr": 0.0007389473684210527, + "low_lr": 1.4778947368421055e-05, + "step": 496 + }, + { + "epoch": 1.3070348454963838, + "grad_norm": 1.0606313943862915, + "learning_rate": 0.000738421052631579, + "loss": 1.485, + "step": 497 + }, + { + "epoch": 1.3070348454963838, + "high_lr": 0.000738421052631579, + "low_lr": 1.4768421052631581e-05, + "step": 497 + }, + { + "epoch": 1.3070348454963838, + "high_lr": 0.000738421052631579, + "low_lr": 1.4768421052631581e-05, + "step": 497 + }, + { + "epoch": 1.3070348454963838, + "high_lr": 0.000738421052631579, + "low_lr": 1.4768421052631581e-05, + "step": 497 + }, + { + "epoch": 1.3070348454963838, + "high_lr": 0.000738421052631579, + "low_lr": 1.4768421052631581e-05, + "step": 497 + }, + { + "epoch": 1.3070348454963838, + "high_lr": 0.000738421052631579, + "low_lr": 1.4768421052631581e-05, + "step": 497 + }, + { + "epoch": 1.3070348454963838, + "high_lr": 0.000738421052631579, + "low_lr": 1.4768421052631581e-05, + "step": 497 + }, + { + "epoch": 1.3070348454963838, + "high_lr": 0.000738421052631579, + "low_lr": 1.4768421052631581e-05, + "step": 497 + }, + { + "epoch": 1.3070348454963838, + "high_lr": 0.000738421052631579, + "low_lr": 1.4768421052631581e-05, + "step": 497 + }, + { + "epoch": 1.309664694280079, + "grad_norm": 1.0575344562530518, + "learning_rate": 0.0007378947368421052, + "loss": 1.4944, + "step": 498 + }, + { + "epoch": 1.309664694280079, + "high_lr": 0.0007378947368421052, + "low_lr": 1.4757894736842106e-05, + "step": 498 + }, + { + "epoch": 1.309664694280079, + "high_lr": 0.0007378947368421052, + "low_lr": 1.4757894736842106e-05, + "step": 498 + }, + { + "epoch": 1.309664694280079, + "high_lr": 0.0007378947368421052, + "low_lr": 1.4757894736842106e-05, + "step": 498 + }, + { + "epoch": 1.309664694280079, + "high_lr": 0.0007378947368421052, + "low_lr": 1.4757894736842106e-05, + "step": 498 + }, + { + "epoch": 1.309664694280079, + "high_lr": 0.0007378947368421052, + "low_lr": 1.4757894736842106e-05, + "step": 498 + }, + { + "epoch": 1.309664694280079, + "high_lr": 0.0007378947368421052, + "low_lr": 1.4757894736842106e-05, + "step": 498 + }, + { + "epoch": 1.309664694280079, + "high_lr": 0.0007378947368421052, + "low_lr": 1.4757894736842106e-05, + "step": 498 + }, + { + "epoch": 1.309664694280079, + "high_lr": 0.0007378947368421052, + "low_lr": 1.4757894736842106e-05, + "step": 498 + }, + { + "epoch": 1.3122945430637738, + "grad_norm": 1.0020501613616943, + "learning_rate": 0.0007373684210526315, + "loss": 1.4982, + "step": 499 + }, + { + "epoch": 1.3122945430637738, + "high_lr": 0.0007373684210526315, + "low_lr": 1.4747368421052632e-05, + "step": 499 + }, + { + "epoch": 1.3122945430637738, + "high_lr": 0.0007373684210526315, + "low_lr": 1.4747368421052632e-05, + "step": 499 + }, + { + "epoch": 1.3122945430637738, + "high_lr": 0.0007373684210526315, + "low_lr": 1.4747368421052632e-05, + "step": 499 + }, + { + "epoch": 1.3122945430637738, + "high_lr": 0.0007373684210526315, + "low_lr": 1.4747368421052632e-05, + "step": 499 + }, + { + "epoch": 1.3122945430637738, + "high_lr": 0.0007373684210526315, + "low_lr": 1.4747368421052632e-05, + "step": 499 + }, + { + "epoch": 1.3122945430637738, + "high_lr": 0.0007373684210526315, + "low_lr": 1.4747368421052632e-05, + "step": 499 + }, + { + "epoch": 1.3122945430637738, + "high_lr": 0.0007373684210526315, + "low_lr": 1.4747368421052632e-05, + "step": 499 + }, + { + "epoch": 1.3122945430637738, + "high_lr": 0.0007373684210526315, + "low_lr": 1.4747368421052632e-05, + "step": 499 + }, + { + "epoch": 1.3149243918474687, + "grad_norm": 1.1564699411392212, + "learning_rate": 0.0007368421052631579, + "loss": 1.5925, + "step": 500 + }, + { + "epoch": 1.3149243918474687, + "high_lr": 0.0007368421052631579, + "low_lr": 1.4736842105263159e-05, + "step": 500 + }, + { + "epoch": 1.3149243918474687, + "high_lr": 0.0007368421052631579, + "low_lr": 1.4736842105263159e-05, + "step": 500 + }, + { + "epoch": 1.3149243918474687, + "high_lr": 0.0007368421052631579, + "low_lr": 1.4736842105263159e-05, + "step": 500 + }, + { + "epoch": 1.3149243918474687, + "high_lr": 0.0007368421052631579, + "low_lr": 1.4736842105263159e-05, + "step": 500 + }, + { + "epoch": 1.3149243918474687, + "high_lr": 0.0007368421052631579, + "low_lr": 1.4736842105263159e-05, + "step": 500 + }, + { + "epoch": 1.3149243918474687, + "high_lr": 0.0007368421052631579, + "low_lr": 1.4736842105263159e-05, + "step": 500 + }, + { + "epoch": 1.3149243918474687, + "high_lr": 0.0007368421052631579, + "low_lr": 1.4736842105263159e-05, + "step": 500 + }, + { + "epoch": 1.3149243918474687, + "high_lr": 0.0007368421052631579, + "low_lr": 1.4736842105263159e-05, + "step": 500 + }, + { + "epoch": 1.3175542406311638, + "grad_norm": 1.0927271842956543, + "learning_rate": 0.0007363157894736843, + "loss": 1.5029, + "step": 501 + }, + { + "epoch": 1.3175542406311638, + "high_lr": 0.0007363157894736843, + "low_lr": 1.4726315789473687e-05, + "step": 501 + }, + { + "epoch": 1.3175542406311638, + "high_lr": 0.0007363157894736843, + "low_lr": 1.4726315789473687e-05, + "step": 501 + }, + { + "epoch": 1.3175542406311638, + "high_lr": 0.0007363157894736843, + "low_lr": 1.4726315789473687e-05, + "step": 501 + }, + { + "epoch": 1.3175542406311638, + "high_lr": 0.0007363157894736843, + "low_lr": 1.4726315789473687e-05, + "step": 501 + }, + { + "epoch": 1.3175542406311638, + "high_lr": 0.0007363157894736843, + "low_lr": 1.4726315789473687e-05, + "step": 501 + }, + { + "epoch": 1.3175542406311638, + "high_lr": 0.0007363157894736843, + "low_lr": 1.4726315789473687e-05, + "step": 501 + }, + { + "epoch": 1.3175542406311638, + "high_lr": 0.0007363157894736843, + "low_lr": 1.4726315789473687e-05, + "step": 501 + }, + { + "epoch": 1.3175542406311638, + "high_lr": 0.0007363157894736843, + "low_lr": 1.4726315789473687e-05, + "step": 501 + }, + { + "epoch": 1.3201840894148587, + "grad_norm": 1.0777077674865723, + "learning_rate": 0.0007357894736842106, + "loss": 1.4987, + "step": 502 + }, + { + "epoch": 1.3201840894148587, + "high_lr": 0.0007357894736842106, + "low_lr": 1.4715789473684213e-05, + "step": 502 + }, + { + "epoch": 1.3201840894148587, + "high_lr": 0.0007357894736842106, + "low_lr": 1.4715789473684213e-05, + "step": 502 + }, + { + "epoch": 1.3201840894148587, + "high_lr": 0.0007357894736842106, + "low_lr": 1.4715789473684213e-05, + "step": 502 + }, + { + "epoch": 1.3201840894148587, + "high_lr": 0.0007357894736842106, + "low_lr": 1.4715789473684213e-05, + "step": 502 + }, + { + "epoch": 1.3201840894148587, + "high_lr": 0.0007357894736842106, + "low_lr": 1.4715789473684213e-05, + "step": 502 + }, + { + "epoch": 1.3201840894148587, + "high_lr": 0.0007357894736842106, + "low_lr": 1.4715789473684213e-05, + "step": 502 + }, + { + "epoch": 1.3201840894148587, + "high_lr": 0.0007357894736842106, + "low_lr": 1.4715789473684213e-05, + "step": 502 + }, + { + "epoch": 1.3201840894148587, + "high_lr": 0.0007357894736842106, + "low_lr": 1.4715789473684213e-05, + "step": 502 + }, + { + "epoch": 1.3228139381985535, + "grad_norm": 1.0413988828659058, + "learning_rate": 0.0007352631578947369, + "loss": 1.475, + "step": 503 + }, + { + "epoch": 1.3228139381985535, + "high_lr": 0.0007352631578947369, + "low_lr": 1.4705263157894738e-05, + "step": 503 + }, + { + "epoch": 1.3228139381985535, + "high_lr": 0.0007352631578947369, + "low_lr": 1.4705263157894738e-05, + "step": 503 + }, + { + "epoch": 1.3228139381985535, + "high_lr": 0.0007352631578947369, + "low_lr": 1.4705263157894738e-05, + "step": 503 + }, + { + "epoch": 1.3228139381985535, + "high_lr": 0.0007352631578947369, + "low_lr": 1.4705263157894738e-05, + "step": 503 + }, + { + "epoch": 1.3228139381985535, + "high_lr": 0.0007352631578947369, + "low_lr": 1.4705263157894738e-05, + "step": 503 + }, + { + "epoch": 1.3228139381985535, + "high_lr": 0.0007352631578947369, + "low_lr": 1.4705263157894738e-05, + "step": 503 + }, + { + "epoch": 1.3228139381985535, + "high_lr": 0.0007352631578947369, + "low_lr": 1.4705263157894738e-05, + "step": 503 + }, + { + "epoch": 1.3228139381985535, + "high_lr": 0.0007352631578947369, + "low_lr": 1.4705263157894738e-05, + "step": 503 + }, + { + "epoch": 1.3254437869822486, + "grad_norm": 1.0489373207092285, + "learning_rate": 0.0007347368421052632, + "loss": 1.535, + "step": 504 + }, + { + "epoch": 1.3254437869822486, + "high_lr": 0.0007347368421052632, + "low_lr": 1.4694736842105264e-05, + "step": 504 + }, + { + "epoch": 1.3254437869822486, + "high_lr": 0.0007347368421052632, + "low_lr": 1.4694736842105264e-05, + "step": 504 + }, + { + "epoch": 1.3254437869822486, + "high_lr": 0.0007347368421052632, + "low_lr": 1.4694736842105264e-05, + "step": 504 + }, + { + "epoch": 1.3254437869822486, + "high_lr": 0.0007347368421052632, + "low_lr": 1.4694736842105264e-05, + "step": 504 + }, + { + "epoch": 1.3254437869822486, + "high_lr": 0.0007347368421052632, + "low_lr": 1.4694736842105264e-05, + "step": 504 + }, + { + "epoch": 1.3254437869822486, + "high_lr": 0.0007347368421052632, + "low_lr": 1.4694736842105264e-05, + "step": 504 + }, + { + "epoch": 1.3254437869822486, + "high_lr": 0.0007347368421052632, + "low_lr": 1.4694736842105264e-05, + "step": 504 + }, + { + "epoch": 1.3254437869822486, + "high_lr": 0.0007347368421052632, + "low_lr": 1.4694736842105264e-05, + "step": 504 + }, + { + "epoch": 1.3280736357659435, + "grad_norm": 1.0401055812835693, + "learning_rate": 0.0007342105263157895, + "loss": 1.4928, + "step": 505 + }, + { + "epoch": 1.3280736357659435, + "high_lr": 0.0007342105263157895, + "low_lr": 1.468421052631579e-05, + "step": 505 + }, + { + "epoch": 1.3280736357659435, + "high_lr": 0.0007342105263157895, + "low_lr": 1.468421052631579e-05, + "step": 505 + }, + { + "epoch": 1.3280736357659435, + "high_lr": 0.0007342105263157895, + "low_lr": 1.468421052631579e-05, + "step": 505 + }, + { + "epoch": 1.3280736357659435, + "high_lr": 0.0007342105263157895, + "low_lr": 1.468421052631579e-05, + "step": 505 + }, + { + "epoch": 1.3280736357659435, + "high_lr": 0.0007342105263157895, + "low_lr": 1.468421052631579e-05, + "step": 505 + }, + { + "epoch": 1.3280736357659435, + "high_lr": 0.0007342105263157895, + "low_lr": 1.468421052631579e-05, + "step": 505 + }, + { + "epoch": 1.3280736357659435, + "high_lr": 0.0007342105263157895, + "low_lr": 1.468421052631579e-05, + "step": 505 + }, + { + "epoch": 1.3280736357659435, + "high_lr": 0.0007342105263157895, + "low_lr": 1.468421052631579e-05, + "step": 505 + }, + { + "epoch": 1.3307034845496384, + "grad_norm": 1.0362565517425537, + "learning_rate": 0.0007336842105263159, + "loss": 1.4942, + "step": 506 + }, + { + "epoch": 1.3307034845496384, + "high_lr": 0.0007336842105263159, + "low_lr": 1.4673684210526318e-05, + "step": 506 + }, + { + "epoch": 1.3307034845496384, + "high_lr": 0.0007336842105263159, + "low_lr": 1.4673684210526318e-05, + "step": 506 + }, + { + "epoch": 1.3307034845496384, + "high_lr": 0.0007336842105263159, + "low_lr": 1.4673684210526318e-05, + "step": 506 + }, + { + "epoch": 1.3307034845496384, + "high_lr": 0.0007336842105263159, + "low_lr": 1.4673684210526318e-05, + "step": 506 + }, + { + "epoch": 1.3307034845496384, + "high_lr": 0.0007336842105263159, + "low_lr": 1.4673684210526318e-05, + "step": 506 + }, + { + "epoch": 1.3307034845496384, + "high_lr": 0.0007336842105263159, + "low_lr": 1.4673684210526318e-05, + "step": 506 + }, + { + "epoch": 1.3307034845496384, + "high_lr": 0.0007336842105263159, + "low_lr": 1.4673684210526318e-05, + "step": 506 + }, + { + "epoch": 1.3307034845496384, + "high_lr": 0.0007336842105263159, + "low_lr": 1.4673684210526318e-05, + "step": 506 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 0.9949714541435242, + "learning_rate": 0.0007331578947368421, + "loss": 1.4918, + "step": 507 + }, + { + "epoch": 1.3333333333333333, + "high_lr": 0.0007331578947368421, + "low_lr": 1.4663157894736843e-05, + "step": 507 + }, + { + "epoch": 1.3333333333333333, + "high_lr": 0.0007331578947368421, + "low_lr": 1.4663157894736843e-05, + "step": 507 + }, + { + "epoch": 1.3333333333333333, + "high_lr": 0.0007331578947368421, + "low_lr": 1.4663157894736843e-05, + "step": 507 + }, + { + "epoch": 1.3333333333333333, + "high_lr": 0.0007331578947368421, + "low_lr": 1.4663157894736843e-05, + "step": 507 + }, + { + "epoch": 1.3333333333333333, + "high_lr": 0.0007331578947368421, + "low_lr": 1.4663157894736843e-05, + "step": 507 + }, + { + "epoch": 1.3333333333333333, + "high_lr": 0.0007331578947368421, + "low_lr": 1.4663157894736843e-05, + "step": 507 + }, + { + "epoch": 1.3333333333333333, + "high_lr": 0.0007331578947368421, + "low_lr": 1.4663157894736843e-05, + "step": 507 + }, + { + "epoch": 1.3333333333333333, + "high_lr": 0.0007331578947368421, + "low_lr": 1.4663157894736843e-05, + "step": 507 + }, + { + "epoch": 1.3359631821170281, + "grad_norm": 1.072759985923767, + "learning_rate": 0.0007326315789473684, + "loss": 1.5207, + "step": 508 + }, + { + "epoch": 1.3359631821170281, + "high_lr": 0.0007326315789473684, + "low_lr": 1.465263157894737e-05, + "step": 508 + }, + { + "epoch": 1.3359631821170281, + "high_lr": 0.0007326315789473684, + "low_lr": 1.465263157894737e-05, + "step": 508 + }, + { + "epoch": 1.3359631821170281, + "high_lr": 0.0007326315789473684, + "low_lr": 1.465263157894737e-05, + "step": 508 + }, + { + "epoch": 1.3359631821170281, + "high_lr": 0.0007326315789473684, + "low_lr": 1.465263157894737e-05, + "step": 508 + }, + { + "epoch": 1.3359631821170281, + "high_lr": 0.0007326315789473684, + "low_lr": 1.465263157894737e-05, + "step": 508 + }, + { + "epoch": 1.3359631821170281, + "high_lr": 0.0007326315789473684, + "low_lr": 1.465263157894737e-05, + "step": 508 + }, + { + "epoch": 1.3359631821170281, + "high_lr": 0.0007326315789473684, + "low_lr": 1.465263157894737e-05, + "step": 508 + }, + { + "epoch": 1.3359631821170281, + "high_lr": 0.0007326315789473684, + "low_lr": 1.465263157894737e-05, + "step": 508 + }, + { + "epoch": 1.3385930309007232, + "grad_norm": 1.176019310951233, + "learning_rate": 0.0007321052631578947, + "loss": 1.5604, + "step": 509 + }, + { + "epoch": 1.3385930309007232, + "high_lr": 0.0007321052631578947, + "low_lr": 1.4642105263157896e-05, + "step": 509 + }, + { + "epoch": 1.3385930309007232, + "high_lr": 0.0007321052631578947, + "low_lr": 1.4642105263157896e-05, + "step": 509 + }, + { + "epoch": 1.3385930309007232, + "high_lr": 0.0007321052631578947, + "low_lr": 1.4642105263157896e-05, + "step": 509 + }, + { + "epoch": 1.3385930309007232, + "high_lr": 0.0007321052631578947, + "low_lr": 1.4642105263157896e-05, + "step": 509 + }, + { + "epoch": 1.3385930309007232, + "high_lr": 0.0007321052631578947, + "low_lr": 1.4642105263157896e-05, + "step": 509 + }, + { + "epoch": 1.3385930309007232, + "high_lr": 0.0007321052631578947, + "low_lr": 1.4642105263157896e-05, + "step": 509 + }, + { + "epoch": 1.3385930309007232, + "high_lr": 0.0007321052631578947, + "low_lr": 1.4642105263157896e-05, + "step": 509 + }, + { + "epoch": 1.3385930309007232, + "high_lr": 0.0007321052631578947, + "low_lr": 1.4642105263157896e-05, + "step": 509 + }, + { + "epoch": 1.3412228796844181, + "grad_norm": 1.09358549118042, + "learning_rate": 0.0007315789473684211, + "loss": 1.5187, + "step": 510 + }, + { + "epoch": 1.3412228796844181, + "high_lr": 0.0007315789473684211, + "low_lr": 1.4631578947368424e-05, + "step": 510 + }, + { + "epoch": 1.3412228796844181, + "high_lr": 0.0007315789473684211, + "low_lr": 1.4631578947368424e-05, + "step": 510 + }, + { + "epoch": 1.3412228796844181, + "high_lr": 0.0007315789473684211, + "low_lr": 1.4631578947368424e-05, + "step": 510 + }, + { + "epoch": 1.3412228796844181, + "high_lr": 0.0007315789473684211, + "low_lr": 1.4631578947368424e-05, + "step": 510 + }, + { + "epoch": 1.3412228796844181, + "high_lr": 0.0007315789473684211, + "low_lr": 1.4631578947368424e-05, + "step": 510 + }, + { + "epoch": 1.3412228796844181, + "high_lr": 0.0007315789473684211, + "low_lr": 1.4631578947368424e-05, + "step": 510 + }, + { + "epoch": 1.3412228796844181, + "high_lr": 0.0007315789473684211, + "low_lr": 1.4631578947368424e-05, + "step": 510 + }, + { + "epoch": 1.3412228796844181, + "high_lr": 0.0007315789473684211, + "low_lr": 1.4631578947368424e-05, + "step": 510 + }, + { + "epoch": 1.343852728468113, + "grad_norm": 1.1042243242263794, + "learning_rate": 0.0007310526315789474, + "loss": 1.5189, + "step": 511 + }, + { + "epoch": 1.343852728468113, + "high_lr": 0.0007310526315789474, + "low_lr": 1.462105263157895e-05, + "step": 511 + }, + { + "epoch": 1.343852728468113, + "high_lr": 0.0007310526315789474, + "low_lr": 1.462105263157895e-05, + "step": 511 + }, + { + "epoch": 1.343852728468113, + "high_lr": 0.0007310526315789474, + "low_lr": 1.462105263157895e-05, + "step": 511 + }, + { + "epoch": 1.343852728468113, + "high_lr": 0.0007310526315789474, + "low_lr": 1.462105263157895e-05, + "step": 511 + }, + { + "epoch": 1.343852728468113, + "high_lr": 0.0007310526315789474, + "low_lr": 1.462105263157895e-05, + "step": 511 + }, + { + "epoch": 1.343852728468113, + "high_lr": 0.0007310526315789474, + "low_lr": 1.462105263157895e-05, + "step": 511 + }, + { + "epoch": 1.343852728468113, + "high_lr": 0.0007310526315789474, + "low_lr": 1.462105263157895e-05, + "step": 511 + }, + { + "epoch": 1.343852728468113, + "high_lr": 0.0007310526315789474, + "low_lr": 1.462105263157895e-05, + "step": 511 + }, + { + "epoch": 1.346482577251808, + "grad_norm": 1.0067287683486938, + "learning_rate": 0.0007305263157894737, + "loss": 1.562, + "step": 512 + }, + { + "epoch": 1.346482577251808, + "high_lr": 0.0007305263157894737, + "low_lr": 1.4610526315789474e-05, + "step": 512 + }, + { + "epoch": 1.346482577251808, + "high_lr": 0.0007305263157894737, + "low_lr": 1.4610526315789474e-05, + "step": 512 + }, + { + "epoch": 1.346482577251808, + "high_lr": 0.0007305263157894737, + "low_lr": 1.4610526315789474e-05, + "step": 512 + }, + { + "epoch": 1.346482577251808, + "high_lr": 0.0007305263157894737, + "low_lr": 1.4610526315789474e-05, + "step": 512 + }, + { + "epoch": 1.346482577251808, + "high_lr": 0.0007305263157894737, + "low_lr": 1.4610526315789474e-05, + "step": 512 + }, + { + "epoch": 1.346482577251808, + "high_lr": 0.0007305263157894737, + "low_lr": 1.4610526315789474e-05, + "step": 512 + }, + { + "epoch": 1.346482577251808, + "high_lr": 0.0007305263157894737, + "low_lr": 1.4610526315789474e-05, + "step": 512 + }, + { + "epoch": 1.346482577251808, + "high_lr": 0.0007305263157894737, + "low_lr": 1.4610526315789474e-05, + "step": 512 + }, + { + "epoch": 1.349112426035503, + "grad_norm": 1.0002638101577759, + "learning_rate": 0.00073, + "loss": 1.4714, + "step": 513 + }, + { + "epoch": 1.349112426035503, + "high_lr": 0.00073, + "low_lr": 1.46e-05, + "step": 513 + }, + { + "epoch": 1.349112426035503, + "high_lr": 0.00073, + "low_lr": 1.46e-05, + "step": 513 + }, + { + "epoch": 1.349112426035503, + "high_lr": 0.00073, + "low_lr": 1.46e-05, + "step": 513 + }, + { + "epoch": 1.349112426035503, + "high_lr": 0.00073, + "low_lr": 1.46e-05, + "step": 513 + }, + { + "epoch": 1.349112426035503, + "high_lr": 0.00073, + "low_lr": 1.46e-05, + "step": 513 + }, + { + "epoch": 1.349112426035503, + "high_lr": 0.00073, + "low_lr": 1.46e-05, + "step": 513 + }, + { + "epoch": 1.349112426035503, + "high_lr": 0.00073, + "low_lr": 1.46e-05, + "step": 513 + }, + { + "epoch": 1.349112426035503, + "high_lr": 0.00073, + "low_lr": 1.46e-05, + "step": 513 + }, + { + "epoch": 1.3517422748191978, + "grad_norm": 1.0247275829315186, + "learning_rate": 0.0007294736842105262, + "loss": 1.5009, + "step": 514 + }, + { + "epoch": 1.3517422748191978, + "high_lr": 0.0007294736842105262, + "low_lr": 1.4589473684210527e-05, + "step": 514 + }, + { + "epoch": 1.3517422748191978, + "high_lr": 0.0007294736842105262, + "low_lr": 1.4589473684210527e-05, + "step": 514 + }, + { + "epoch": 1.3517422748191978, + "high_lr": 0.0007294736842105262, + "low_lr": 1.4589473684210527e-05, + "step": 514 + }, + { + "epoch": 1.3517422748191978, + "high_lr": 0.0007294736842105262, + "low_lr": 1.4589473684210527e-05, + "step": 514 + }, + { + "epoch": 1.3517422748191978, + "high_lr": 0.0007294736842105262, + "low_lr": 1.4589473684210527e-05, + "step": 514 + }, + { + "epoch": 1.3517422748191978, + "high_lr": 0.0007294736842105262, + "low_lr": 1.4589473684210527e-05, + "step": 514 + }, + { + "epoch": 1.3517422748191978, + "high_lr": 0.0007294736842105262, + "low_lr": 1.4589473684210527e-05, + "step": 514 + }, + { + "epoch": 1.3517422748191978, + "high_lr": 0.0007294736842105262, + "low_lr": 1.4589473684210527e-05, + "step": 514 + }, + { + "epoch": 1.354372123602893, + "grad_norm": 1.0016647577285767, + "learning_rate": 0.0007289473684210526, + "loss": 1.4438, + "step": 515 + }, + { + "epoch": 1.354372123602893, + "high_lr": 0.0007289473684210526, + "low_lr": 1.4578947368421055e-05, + "step": 515 + }, + { + "epoch": 1.354372123602893, + "high_lr": 0.0007289473684210526, + "low_lr": 1.4578947368421055e-05, + "step": 515 + }, + { + "epoch": 1.354372123602893, + "high_lr": 0.0007289473684210526, + "low_lr": 1.4578947368421055e-05, + "step": 515 + }, + { + "epoch": 1.354372123602893, + "high_lr": 0.0007289473684210526, + "low_lr": 1.4578947368421055e-05, + "step": 515 + }, + { + "epoch": 1.354372123602893, + "high_lr": 0.0007289473684210526, + "low_lr": 1.4578947368421055e-05, + "step": 515 + }, + { + "epoch": 1.354372123602893, + "high_lr": 0.0007289473684210526, + "low_lr": 1.4578947368421055e-05, + "step": 515 + }, + { + "epoch": 1.354372123602893, + "high_lr": 0.0007289473684210526, + "low_lr": 1.4578947368421055e-05, + "step": 515 + }, + { + "epoch": 1.354372123602893, + "high_lr": 0.0007289473684210526, + "low_lr": 1.4578947368421055e-05, + "step": 515 + }, + { + "epoch": 1.3570019723865878, + "grad_norm": 1.0843017101287842, + "learning_rate": 0.000728421052631579, + "loss": 1.5361, + "step": 516 + }, + { + "epoch": 1.3570019723865878, + "high_lr": 0.000728421052631579, + "low_lr": 1.456842105263158e-05, + "step": 516 + }, + { + "epoch": 1.3570019723865878, + "high_lr": 0.000728421052631579, + "low_lr": 1.456842105263158e-05, + "step": 516 + }, + { + "epoch": 1.3570019723865878, + "high_lr": 0.000728421052631579, + "low_lr": 1.456842105263158e-05, + "step": 516 + }, + { + "epoch": 1.3570019723865878, + "high_lr": 0.000728421052631579, + "low_lr": 1.456842105263158e-05, + "step": 516 + }, + { + "epoch": 1.3570019723865878, + "high_lr": 0.000728421052631579, + "low_lr": 1.456842105263158e-05, + "step": 516 + }, + { + "epoch": 1.3570019723865878, + "high_lr": 0.000728421052631579, + "low_lr": 1.456842105263158e-05, + "step": 516 + }, + { + "epoch": 1.3570019723865878, + "high_lr": 0.000728421052631579, + "low_lr": 1.456842105263158e-05, + "step": 516 + }, + { + "epoch": 1.3570019723865878, + "high_lr": 0.000728421052631579, + "low_lr": 1.456842105263158e-05, + "step": 516 + }, + { + "epoch": 1.3596318211702827, + "grad_norm": 1.0744608640670776, + "learning_rate": 0.0007278947368421053, + "loss": 1.4725, + "step": 517 + }, + { + "epoch": 1.3596318211702827, + "high_lr": 0.0007278947368421053, + "low_lr": 1.4557894736842106e-05, + "step": 517 + }, + { + "epoch": 1.3596318211702827, + "high_lr": 0.0007278947368421053, + "low_lr": 1.4557894736842106e-05, + "step": 517 + }, + { + "epoch": 1.3596318211702827, + "high_lr": 0.0007278947368421053, + "low_lr": 1.4557894736842106e-05, + "step": 517 + }, + { + "epoch": 1.3596318211702827, + "high_lr": 0.0007278947368421053, + "low_lr": 1.4557894736842106e-05, + "step": 517 + }, + { + "epoch": 1.3596318211702827, + "high_lr": 0.0007278947368421053, + "low_lr": 1.4557894736842106e-05, + "step": 517 + }, + { + "epoch": 1.3596318211702827, + "high_lr": 0.0007278947368421053, + "low_lr": 1.4557894736842106e-05, + "step": 517 + }, + { + "epoch": 1.3596318211702827, + "high_lr": 0.0007278947368421053, + "low_lr": 1.4557894736842106e-05, + "step": 517 + }, + { + "epoch": 1.3596318211702827, + "high_lr": 0.0007278947368421053, + "low_lr": 1.4557894736842106e-05, + "step": 517 + }, + { + "epoch": 1.3622616699539776, + "grad_norm": 1.1196846961975098, + "learning_rate": 0.0007273684210526316, + "loss": 1.5027, + "step": 518 + }, + { + "epoch": 1.3622616699539776, + "high_lr": 0.0007273684210526316, + "low_lr": 1.4547368421052632e-05, + "step": 518 + }, + { + "epoch": 1.3622616699539776, + "high_lr": 0.0007273684210526316, + "low_lr": 1.4547368421052632e-05, + "step": 518 + }, + { + "epoch": 1.3622616699539776, + "high_lr": 0.0007273684210526316, + "low_lr": 1.4547368421052632e-05, + "step": 518 + }, + { + "epoch": 1.3622616699539776, + "high_lr": 0.0007273684210526316, + "low_lr": 1.4547368421052632e-05, + "step": 518 + }, + { + "epoch": 1.3622616699539776, + "high_lr": 0.0007273684210526316, + "low_lr": 1.4547368421052632e-05, + "step": 518 + }, + { + "epoch": 1.3622616699539776, + "high_lr": 0.0007273684210526316, + "low_lr": 1.4547368421052632e-05, + "step": 518 + }, + { + "epoch": 1.3622616699539776, + "high_lr": 0.0007273684210526316, + "low_lr": 1.4547368421052632e-05, + "step": 518 + }, + { + "epoch": 1.3622616699539776, + "high_lr": 0.0007273684210526316, + "low_lr": 1.4547368421052632e-05, + "step": 518 + }, + { + "epoch": 1.3648915187376724, + "grad_norm": 1.1462395191192627, + "learning_rate": 0.0007268421052631579, + "loss": 1.5538, + "step": 519 + }, + { + "epoch": 1.3648915187376724, + "high_lr": 0.0007268421052631579, + "low_lr": 1.4536842105263159e-05, + "step": 519 + }, + { + "epoch": 1.3648915187376724, + "high_lr": 0.0007268421052631579, + "low_lr": 1.4536842105263159e-05, + "step": 519 + }, + { + "epoch": 1.3648915187376724, + "high_lr": 0.0007268421052631579, + "low_lr": 1.4536842105263159e-05, + "step": 519 + }, + { + "epoch": 1.3648915187376724, + "high_lr": 0.0007268421052631579, + "low_lr": 1.4536842105263159e-05, + "step": 519 + }, + { + "epoch": 1.3648915187376724, + "high_lr": 0.0007268421052631579, + "low_lr": 1.4536842105263159e-05, + "step": 519 + }, + { + "epoch": 1.3648915187376724, + "high_lr": 0.0007268421052631579, + "low_lr": 1.4536842105263159e-05, + "step": 519 + }, + { + "epoch": 1.3648915187376724, + "high_lr": 0.0007268421052631579, + "low_lr": 1.4536842105263159e-05, + "step": 519 + }, + { + "epoch": 1.3648915187376724, + "high_lr": 0.0007268421052631579, + "low_lr": 1.4536842105263159e-05, + "step": 519 + }, + { + "epoch": 1.3675213675213675, + "grad_norm": 1.1063448190689087, + "learning_rate": 0.0007263157894736843, + "loss": 1.5369, + "step": 520 + }, + { + "epoch": 1.3675213675213675, + "high_lr": 0.0007263157894736843, + "low_lr": 1.4526315789473687e-05, + "step": 520 + }, + { + "epoch": 1.3675213675213675, + "high_lr": 0.0007263157894736843, + "low_lr": 1.4526315789473687e-05, + "step": 520 + }, + { + "epoch": 1.3675213675213675, + "high_lr": 0.0007263157894736843, + "low_lr": 1.4526315789473687e-05, + "step": 520 + }, + { + "epoch": 1.3675213675213675, + "high_lr": 0.0007263157894736843, + "low_lr": 1.4526315789473687e-05, + "step": 520 + }, + { + "epoch": 1.3675213675213675, + "high_lr": 0.0007263157894736843, + "low_lr": 1.4526315789473687e-05, + "step": 520 + }, + { + "epoch": 1.3675213675213675, + "high_lr": 0.0007263157894736843, + "low_lr": 1.4526315789473687e-05, + "step": 520 + }, + { + "epoch": 1.3675213675213675, + "high_lr": 0.0007263157894736843, + "low_lr": 1.4526315789473687e-05, + "step": 520 + }, + { + "epoch": 1.3675213675213675, + "high_lr": 0.0007263157894736843, + "low_lr": 1.4526315789473687e-05, + "step": 520 + }, + { + "epoch": 1.3701512163050624, + "grad_norm": 1.0946933031082153, + "learning_rate": 0.0007257894736842106, + "loss": 1.5006, + "step": 521 + }, + { + "epoch": 1.3701512163050624, + "high_lr": 0.0007257894736842106, + "low_lr": 1.4515789473684211e-05, + "step": 521 + }, + { + "epoch": 1.3701512163050624, + "high_lr": 0.0007257894736842106, + "low_lr": 1.4515789473684211e-05, + "step": 521 + }, + { + "epoch": 1.3701512163050624, + "high_lr": 0.0007257894736842106, + "low_lr": 1.4515789473684211e-05, + "step": 521 + }, + { + "epoch": 1.3701512163050624, + "high_lr": 0.0007257894736842106, + "low_lr": 1.4515789473684211e-05, + "step": 521 + }, + { + "epoch": 1.3701512163050624, + "high_lr": 0.0007257894736842106, + "low_lr": 1.4515789473684211e-05, + "step": 521 + }, + { + "epoch": 1.3701512163050624, + "high_lr": 0.0007257894736842106, + "low_lr": 1.4515789473684211e-05, + "step": 521 + }, + { + "epoch": 1.3701512163050624, + "high_lr": 0.0007257894736842106, + "low_lr": 1.4515789473684211e-05, + "step": 521 + }, + { + "epoch": 1.3701512163050624, + "high_lr": 0.0007257894736842106, + "low_lr": 1.4515789473684211e-05, + "step": 521 + }, + { + "epoch": 1.3727810650887573, + "grad_norm": 1.0498840808868408, + "learning_rate": 0.0007252631578947369, + "loss": 1.4724, + "step": 522 + }, + { + "epoch": 1.3727810650887573, + "high_lr": 0.0007252631578947369, + "low_lr": 1.4505263157894738e-05, + "step": 522 + }, + { + "epoch": 1.3727810650887573, + "high_lr": 0.0007252631578947369, + "low_lr": 1.4505263157894738e-05, + "step": 522 + }, + { + "epoch": 1.3727810650887573, + "high_lr": 0.0007252631578947369, + "low_lr": 1.4505263157894738e-05, + "step": 522 + }, + { + "epoch": 1.3727810650887573, + "high_lr": 0.0007252631578947369, + "low_lr": 1.4505263157894738e-05, + "step": 522 + }, + { + "epoch": 1.3727810650887573, + "high_lr": 0.0007252631578947369, + "low_lr": 1.4505263157894738e-05, + "step": 522 + }, + { + "epoch": 1.3727810650887573, + "high_lr": 0.0007252631578947369, + "low_lr": 1.4505263157894738e-05, + "step": 522 + }, + { + "epoch": 1.3727810650887573, + "high_lr": 0.0007252631578947369, + "low_lr": 1.4505263157894738e-05, + "step": 522 + }, + { + "epoch": 1.3727810650887573, + "high_lr": 0.0007252631578947369, + "low_lr": 1.4505263157894738e-05, + "step": 522 + }, + { + "epoch": 1.3754109138724524, + "grad_norm": 1.0840269327163696, + "learning_rate": 0.0007247368421052631, + "loss": 1.4561, + "step": 523 + }, + { + "epoch": 1.3754109138724524, + "high_lr": 0.0007247368421052631, + "low_lr": 1.4494736842105264e-05, + "step": 523 + }, + { + "epoch": 1.3754109138724524, + "high_lr": 0.0007247368421052631, + "low_lr": 1.4494736842105264e-05, + "step": 523 + }, + { + "epoch": 1.3754109138724524, + "high_lr": 0.0007247368421052631, + "low_lr": 1.4494736842105264e-05, + "step": 523 + }, + { + "epoch": 1.3754109138724524, + "high_lr": 0.0007247368421052631, + "low_lr": 1.4494736842105264e-05, + "step": 523 + }, + { + "epoch": 1.3754109138724524, + "high_lr": 0.0007247368421052631, + "low_lr": 1.4494736842105264e-05, + "step": 523 + }, + { + "epoch": 1.3754109138724524, + "high_lr": 0.0007247368421052631, + "low_lr": 1.4494736842105264e-05, + "step": 523 + }, + { + "epoch": 1.3754109138724524, + "high_lr": 0.0007247368421052631, + "low_lr": 1.4494736842105264e-05, + "step": 523 + }, + { + "epoch": 1.3754109138724524, + "high_lr": 0.0007247368421052631, + "low_lr": 1.4494736842105264e-05, + "step": 523 + }, + { + "epoch": 1.3780407626561473, + "grad_norm": 1.0193777084350586, + "learning_rate": 0.0007242105263157895, + "loss": 1.4748, + "step": 524 + }, + { + "epoch": 1.3780407626561473, + "high_lr": 0.0007242105263157895, + "low_lr": 1.4484210526315792e-05, + "step": 524 + }, + { + "epoch": 1.3780407626561473, + "high_lr": 0.0007242105263157895, + "low_lr": 1.4484210526315792e-05, + "step": 524 + }, + { + "epoch": 1.3780407626561473, + "high_lr": 0.0007242105263157895, + "low_lr": 1.4484210526315792e-05, + "step": 524 + }, + { + "epoch": 1.3780407626561473, + "high_lr": 0.0007242105263157895, + "low_lr": 1.4484210526315792e-05, + "step": 524 + }, + { + "epoch": 1.3780407626561473, + "high_lr": 0.0007242105263157895, + "low_lr": 1.4484210526315792e-05, + "step": 524 + }, + { + "epoch": 1.3780407626561473, + "high_lr": 0.0007242105263157895, + "low_lr": 1.4484210526315792e-05, + "step": 524 + }, + { + "epoch": 1.3780407626561473, + "high_lr": 0.0007242105263157895, + "low_lr": 1.4484210526315792e-05, + "step": 524 + }, + { + "epoch": 1.3780407626561473, + "high_lr": 0.0007242105263157895, + "low_lr": 1.4484210526315792e-05, + "step": 524 + }, + { + "epoch": 1.3806706114398422, + "grad_norm": 1.0453554391860962, + "learning_rate": 0.0007236842105263158, + "loss": 1.4933, + "step": 525 + }, + { + "epoch": 1.3806706114398422, + "high_lr": 0.0007236842105263158, + "low_lr": 1.4473684210526317e-05, + "step": 525 + }, + { + "epoch": 1.3806706114398422, + "high_lr": 0.0007236842105263158, + "low_lr": 1.4473684210526317e-05, + "step": 525 + }, + { + "epoch": 1.3806706114398422, + "high_lr": 0.0007236842105263158, + "low_lr": 1.4473684210526317e-05, + "step": 525 + }, + { + "epoch": 1.3806706114398422, + "high_lr": 0.0007236842105263158, + "low_lr": 1.4473684210526317e-05, + "step": 525 + }, + { + "epoch": 1.3806706114398422, + "high_lr": 0.0007236842105263158, + "low_lr": 1.4473684210526317e-05, + "step": 525 + }, + { + "epoch": 1.3806706114398422, + "high_lr": 0.0007236842105263158, + "low_lr": 1.4473684210526317e-05, + "step": 525 + }, + { + "epoch": 1.3806706114398422, + "high_lr": 0.0007236842105263158, + "low_lr": 1.4473684210526317e-05, + "step": 525 + }, + { + "epoch": 1.3806706114398422, + "high_lr": 0.0007236842105263158, + "low_lr": 1.4473684210526317e-05, + "step": 525 + }, + { + "epoch": 1.3833004602235373, + "grad_norm": 1.0979812145233154, + "learning_rate": 0.0007231578947368421, + "loss": 1.49, + "step": 526 + }, + { + "epoch": 1.3833004602235373, + "high_lr": 0.0007231578947368421, + "low_lr": 1.4463157894736843e-05, + "step": 526 + }, + { + "epoch": 1.3833004602235373, + "high_lr": 0.0007231578947368421, + "low_lr": 1.4463157894736843e-05, + "step": 526 + }, + { + "epoch": 1.3833004602235373, + "high_lr": 0.0007231578947368421, + "low_lr": 1.4463157894736843e-05, + "step": 526 + }, + { + "epoch": 1.3833004602235373, + "high_lr": 0.0007231578947368421, + "low_lr": 1.4463157894736843e-05, + "step": 526 + }, + { + "epoch": 1.3833004602235373, + "high_lr": 0.0007231578947368421, + "low_lr": 1.4463157894736843e-05, + "step": 526 + }, + { + "epoch": 1.3833004602235373, + "high_lr": 0.0007231578947368421, + "low_lr": 1.4463157894736843e-05, + "step": 526 + }, + { + "epoch": 1.3833004602235373, + "high_lr": 0.0007231578947368421, + "low_lr": 1.4463157894736843e-05, + "step": 526 + }, + { + "epoch": 1.3833004602235373, + "high_lr": 0.0007231578947368421, + "low_lr": 1.4463157894736843e-05, + "step": 526 + }, + { + "epoch": 1.3859303090072321, + "grad_norm": 1.0743083953857422, + "learning_rate": 0.0007226315789473684, + "loss": 1.5036, + "step": 527 + }, + { + "epoch": 1.3859303090072321, + "high_lr": 0.0007226315789473684, + "low_lr": 1.445263157894737e-05, + "step": 527 + }, + { + "epoch": 1.3859303090072321, + "high_lr": 0.0007226315789473684, + "low_lr": 1.445263157894737e-05, + "step": 527 + }, + { + "epoch": 1.3859303090072321, + "high_lr": 0.0007226315789473684, + "low_lr": 1.445263157894737e-05, + "step": 527 + }, + { + "epoch": 1.3859303090072321, + "high_lr": 0.0007226315789473684, + "low_lr": 1.445263157894737e-05, + "step": 527 + }, + { + "epoch": 1.3859303090072321, + "high_lr": 0.0007226315789473684, + "low_lr": 1.445263157894737e-05, + "step": 527 + }, + { + "epoch": 1.3859303090072321, + "high_lr": 0.0007226315789473684, + "low_lr": 1.445263157894737e-05, + "step": 527 + }, + { + "epoch": 1.3859303090072321, + "high_lr": 0.0007226315789473684, + "low_lr": 1.445263157894737e-05, + "step": 527 + }, + { + "epoch": 1.3859303090072321, + "high_lr": 0.0007226315789473684, + "low_lr": 1.445263157894737e-05, + "step": 527 + }, + { + "epoch": 1.388560157790927, + "grad_norm": 1.0622069835662842, + "learning_rate": 0.0007221052631578947, + "loss": 1.5221, + "step": 528 + }, + { + "epoch": 1.388560157790927, + "high_lr": 0.0007221052631578947, + "low_lr": 1.4442105263157896e-05, + "step": 528 + }, + { + "epoch": 1.388560157790927, + "high_lr": 0.0007221052631578947, + "low_lr": 1.4442105263157896e-05, + "step": 528 + }, + { + "epoch": 1.388560157790927, + "high_lr": 0.0007221052631578947, + "low_lr": 1.4442105263157896e-05, + "step": 528 + }, + { + "epoch": 1.388560157790927, + "high_lr": 0.0007221052631578947, + "low_lr": 1.4442105263157896e-05, + "step": 528 + }, + { + "epoch": 1.388560157790927, + "high_lr": 0.0007221052631578947, + "low_lr": 1.4442105263157896e-05, + "step": 528 + }, + { + "epoch": 1.388560157790927, + "high_lr": 0.0007221052631578947, + "low_lr": 1.4442105263157896e-05, + "step": 528 + }, + { + "epoch": 1.388560157790927, + "high_lr": 0.0007221052631578947, + "low_lr": 1.4442105263157896e-05, + "step": 528 + }, + { + "epoch": 1.388560157790927, + "high_lr": 0.0007221052631578947, + "low_lr": 1.4442105263157896e-05, + "step": 528 + }, + { + "epoch": 1.3911900065746219, + "grad_norm": 1.0960756540298462, + "learning_rate": 0.0007215789473684211, + "loss": 1.5349, + "step": 529 + }, + { + "epoch": 1.3911900065746219, + "high_lr": 0.0007215789473684211, + "low_lr": 1.4431578947368424e-05, + "step": 529 + }, + { + "epoch": 1.3911900065746219, + "high_lr": 0.0007215789473684211, + "low_lr": 1.4431578947368424e-05, + "step": 529 + }, + { + "epoch": 1.3911900065746219, + "high_lr": 0.0007215789473684211, + "low_lr": 1.4431578947368424e-05, + "step": 529 + }, + { + "epoch": 1.3911900065746219, + "high_lr": 0.0007215789473684211, + "low_lr": 1.4431578947368424e-05, + "step": 529 + }, + { + "epoch": 1.3911900065746219, + "high_lr": 0.0007215789473684211, + "low_lr": 1.4431578947368424e-05, + "step": 529 + }, + { + "epoch": 1.3911900065746219, + "high_lr": 0.0007215789473684211, + "low_lr": 1.4431578947368424e-05, + "step": 529 + }, + { + "epoch": 1.3911900065746219, + "high_lr": 0.0007215789473684211, + "low_lr": 1.4431578947368424e-05, + "step": 529 + }, + { + "epoch": 1.3911900065746219, + "high_lr": 0.0007215789473684211, + "low_lr": 1.4431578947368424e-05, + "step": 529 + }, + { + "epoch": 1.3938198553583168, + "grad_norm": 1.1283634901046753, + "learning_rate": 0.0007210526315789474, + "loss": 1.5013, + "step": 530 + }, + { + "epoch": 1.3938198553583168, + "high_lr": 0.0007210526315789474, + "low_lr": 1.4421052631578948e-05, + "step": 530 + }, + { + "epoch": 1.3938198553583168, + "high_lr": 0.0007210526315789474, + "low_lr": 1.4421052631578948e-05, + "step": 530 + }, + { + "epoch": 1.3938198553583168, + "high_lr": 0.0007210526315789474, + "low_lr": 1.4421052631578948e-05, + "step": 530 + }, + { + "epoch": 1.3938198553583168, + "high_lr": 0.0007210526315789474, + "low_lr": 1.4421052631578948e-05, + "step": 530 + }, + { + "epoch": 1.3938198553583168, + "high_lr": 0.0007210526315789474, + "low_lr": 1.4421052631578948e-05, + "step": 530 + }, + { + "epoch": 1.3938198553583168, + "high_lr": 0.0007210526315789474, + "low_lr": 1.4421052631578948e-05, + "step": 530 + }, + { + "epoch": 1.3938198553583168, + "high_lr": 0.0007210526315789474, + "low_lr": 1.4421052631578948e-05, + "step": 530 + }, + { + "epoch": 1.3938198553583168, + "high_lr": 0.0007210526315789474, + "low_lr": 1.4421052631578948e-05, + "step": 530 + }, + { + "epoch": 1.3964497041420119, + "grad_norm": 1.0277425050735474, + "learning_rate": 0.0007205263157894737, + "loss": 1.5036, + "step": 531 + }, + { + "epoch": 1.3964497041420119, + "high_lr": 0.0007205263157894737, + "low_lr": 1.4410526315789475e-05, + "step": 531 + }, + { + "epoch": 1.3964497041420119, + "high_lr": 0.0007205263157894737, + "low_lr": 1.4410526315789475e-05, + "step": 531 + }, + { + "epoch": 1.3964497041420119, + "high_lr": 0.0007205263157894737, + "low_lr": 1.4410526315789475e-05, + "step": 531 + }, + { + "epoch": 1.3964497041420119, + "high_lr": 0.0007205263157894737, + "low_lr": 1.4410526315789475e-05, + "step": 531 + }, + { + "epoch": 1.3964497041420119, + "high_lr": 0.0007205263157894737, + "low_lr": 1.4410526315789475e-05, + "step": 531 + }, + { + "epoch": 1.3964497041420119, + "high_lr": 0.0007205263157894737, + "low_lr": 1.4410526315789475e-05, + "step": 531 + }, + { + "epoch": 1.3964497041420119, + "high_lr": 0.0007205263157894737, + "low_lr": 1.4410526315789475e-05, + "step": 531 + }, + { + "epoch": 1.3964497041420119, + "high_lr": 0.0007205263157894737, + "low_lr": 1.4410526315789475e-05, + "step": 531 + }, + { + "epoch": 1.3990795529257067, + "grad_norm": 1.084513545036316, + "learning_rate": 0.0007199999999999999, + "loss": 1.4435, + "step": 532 + }, + { + "epoch": 1.3990795529257067, + "high_lr": 0.0007199999999999999, + "low_lr": 1.4400000000000001e-05, + "step": 532 + }, + { + "epoch": 1.3990795529257067, + "high_lr": 0.0007199999999999999, + "low_lr": 1.4400000000000001e-05, + "step": 532 + }, + { + "epoch": 1.3990795529257067, + "high_lr": 0.0007199999999999999, + "low_lr": 1.4400000000000001e-05, + "step": 532 + }, + { + "epoch": 1.3990795529257067, + "high_lr": 0.0007199999999999999, + "low_lr": 1.4400000000000001e-05, + "step": 532 + }, + { + "epoch": 1.3990795529257067, + "high_lr": 0.0007199999999999999, + "low_lr": 1.4400000000000001e-05, + "step": 532 + }, + { + "epoch": 1.3990795529257067, + "high_lr": 0.0007199999999999999, + "low_lr": 1.4400000000000001e-05, + "step": 532 + }, + { + "epoch": 1.3990795529257067, + "high_lr": 0.0007199999999999999, + "low_lr": 1.4400000000000001e-05, + "step": 532 + }, + { + "epoch": 1.3990795529257067, + "high_lr": 0.0007199999999999999, + "low_lr": 1.4400000000000001e-05, + "step": 532 + }, + { + "epoch": 1.4017094017094016, + "grad_norm": 1.1466425657272339, + "learning_rate": 0.0007194736842105263, + "loss": 1.5007, + "step": 533 + }, + { + "epoch": 1.4017094017094016, + "high_lr": 0.0007194736842105263, + "low_lr": 1.4389473684210526e-05, + "step": 533 + }, + { + "epoch": 1.4017094017094016, + "high_lr": 0.0007194736842105263, + "low_lr": 1.4389473684210526e-05, + "step": 533 + }, + { + "epoch": 1.4017094017094016, + "high_lr": 0.0007194736842105263, + "low_lr": 1.4389473684210526e-05, + "step": 533 + }, + { + "epoch": 1.4017094017094016, + "high_lr": 0.0007194736842105263, + "low_lr": 1.4389473684210526e-05, + "step": 533 + }, + { + "epoch": 1.4017094017094016, + "high_lr": 0.0007194736842105263, + "low_lr": 1.4389473684210526e-05, + "step": 533 + }, + { + "epoch": 1.4017094017094016, + "high_lr": 0.0007194736842105263, + "low_lr": 1.4389473684210526e-05, + "step": 533 + }, + { + "epoch": 1.4017094017094016, + "high_lr": 0.0007194736842105263, + "low_lr": 1.4389473684210526e-05, + "step": 533 + }, + { + "epoch": 1.4017094017094016, + "high_lr": 0.0007194736842105263, + "low_lr": 1.4389473684210526e-05, + "step": 533 + }, + { + "epoch": 1.4043392504930967, + "grad_norm": 1.099838376045227, + "learning_rate": 0.0007189473684210527, + "loss": 1.5459, + "step": 534 + }, + { + "epoch": 1.4043392504930967, + "high_lr": 0.0007189473684210527, + "low_lr": 1.4378947368421054e-05, + "step": 534 + }, + { + "epoch": 1.4043392504930967, + "high_lr": 0.0007189473684210527, + "low_lr": 1.4378947368421054e-05, + "step": 534 + }, + { + "epoch": 1.4043392504930967, + "high_lr": 0.0007189473684210527, + "low_lr": 1.4378947368421054e-05, + "step": 534 + }, + { + "epoch": 1.4043392504930967, + "high_lr": 0.0007189473684210527, + "low_lr": 1.4378947368421054e-05, + "step": 534 + }, + { + "epoch": 1.4043392504930967, + "high_lr": 0.0007189473684210527, + "low_lr": 1.4378947368421054e-05, + "step": 534 + }, + { + "epoch": 1.4043392504930967, + "high_lr": 0.0007189473684210527, + "low_lr": 1.4378947368421054e-05, + "step": 534 + }, + { + "epoch": 1.4043392504930967, + "high_lr": 0.0007189473684210527, + "low_lr": 1.4378947368421054e-05, + "step": 534 + }, + { + "epoch": 1.4043392504930967, + "high_lr": 0.0007189473684210527, + "low_lr": 1.4378947368421054e-05, + "step": 534 + }, + { + "epoch": 1.4069690992767916, + "grad_norm": 1.1151689291000366, + "learning_rate": 0.000718421052631579, + "loss": 1.4911, + "step": 535 + }, + { + "epoch": 1.4069690992767916, + "high_lr": 0.000718421052631579, + "low_lr": 1.436842105263158e-05, + "step": 535 + }, + { + "epoch": 1.4069690992767916, + "high_lr": 0.000718421052631579, + "low_lr": 1.436842105263158e-05, + "step": 535 + }, + { + "epoch": 1.4069690992767916, + "high_lr": 0.000718421052631579, + "low_lr": 1.436842105263158e-05, + "step": 535 + }, + { + "epoch": 1.4069690992767916, + "high_lr": 0.000718421052631579, + "low_lr": 1.436842105263158e-05, + "step": 535 + }, + { + "epoch": 1.4069690992767916, + "high_lr": 0.000718421052631579, + "low_lr": 1.436842105263158e-05, + "step": 535 + }, + { + "epoch": 1.4069690992767916, + "high_lr": 0.000718421052631579, + "low_lr": 1.436842105263158e-05, + "step": 535 + }, + { + "epoch": 1.4069690992767916, + "high_lr": 0.000718421052631579, + "low_lr": 1.436842105263158e-05, + "step": 535 + }, + { + "epoch": 1.4069690992767916, + "high_lr": 0.000718421052631579, + "low_lr": 1.436842105263158e-05, + "step": 535 + }, + { + "epoch": 1.4095989480604865, + "grad_norm": 1.112339973449707, + "learning_rate": 0.0007178947368421053, + "loss": 1.4461, + "step": 536 + }, + { + "epoch": 1.4095989480604865, + "high_lr": 0.0007178947368421053, + "low_lr": 1.4357894736842106e-05, + "step": 536 + }, + { + "epoch": 1.4095989480604865, + "high_lr": 0.0007178947368421053, + "low_lr": 1.4357894736842106e-05, + "step": 536 + }, + { + "epoch": 1.4095989480604865, + "high_lr": 0.0007178947368421053, + "low_lr": 1.4357894736842106e-05, + "step": 536 + }, + { + "epoch": 1.4095989480604865, + "high_lr": 0.0007178947368421053, + "low_lr": 1.4357894736842106e-05, + "step": 536 + }, + { + "epoch": 1.4095989480604865, + "high_lr": 0.0007178947368421053, + "low_lr": 1.4357894736842106e-05, + "step": 536 + }, + { + "epoch": 1.4095989480604865, + "high_lr": 0.0007178947368421053, + "low_lr": 1.4357894736842106e-05, + "step": 536 + }, + { + "epoch": 1.4095989480604865, + "high_lr": 0.0007178947368421053, + "low_lr": 1.4357894736842106e-05, + "step": 536 + }, + { + "epoch": 1.4095989480604865, + "high_lr": 0.0007178947368421053, + "low_lr": 1.4357894736842106e-05, + "step": 536 + }, + { + "epoch": 1.4122287968441816, + "grad_norm": 1.1630061864852905, + "learning_rate": 0.0007173684210526316, + "loss": 1.4448, + "step": 537 + }, + { + "epoch": 1.4122287968441816, + "high_lr": 0.0007173684210526316, + "low_lr": 1.4347368421052633e-05, + "step": 537 + }, + { + "epoch": 1.4122287968441816, + "high_lr": 0.0007173684210526316, + "low_lr": 1.4347368421052633e-05, + "step": 537 + }, + { + "epoch": 1.4122287968441816, + "high_lr": 0.0007173684210526316, + "low_lr": 1.4347368421052633e-05, + "step": 537 + }, + { + "epoch": 1.4122287968441816, + "high_lr": 0.0007173684210526316, + "low_lr": 1.4347368421052633e-05, + "step": 537 + }, + { + "epoch": 1.4122287968441816, + "high_lr": 0.0007173684210526316, + "low_lr": 1.4347368421052633e-05, + "step": 537 + }, + { + "epoch": 1.4122287968441816, + "high_lr": 0.0007173684210526316, + "low_lr": 1.4347368421052633e-05, + "step": 537 + }, + { + "epoch": 1.4122287968441816, + "high_lr": 0.0007173684210526316, + "low_lr": 1.4347368421052633e-05, + "step": 537 + }, + { + "epoch": 1.4122287968441816, + "high_lr": 0.0007173684210526316, + "low_lr": 1.4347368421052633e-05, + "step": 537 + }, + { + "epoch": 1.4148586456278764, + "grad_norm": 1.1844900846481323, + "learning_rate": 0.000716842105263158, + "loss": 1.4975, + "step": 538 + }, + { + "epoch": 1.4148586456278764, + "high_lr": 0.000716842105263158, + "low_lr": 1.433684210526316e-05, + "step": 538 + }, + { + "epoch": 1.4148586456278764, + "high_lr": 0.000716842105263158, + "low_lr": 1.433684210526316e-05, + "step": 538 + }, + { + "epoch": 1.4148586456278764, + "high_lr": 0.000716842105263158, + "low_lr": 1.433684210526316e-05, + "step": 538 + }, + { + "epoch": 1.4148586456278764, + "high_lr": 0.000716842105263158, + "low_lr": 1.433684210526316e-05, + "step": 538 + }, + { + "epoch": 1.4148586456278764, + "high_lr": 0.000716842105263158, + "low_lr": 1.433684210526316e-05, + "step": 538 + }, + { + "epoch": 1.4148586456278764, + "high_lr": 0.000716842105263158, + "low_lr": 1.433684210526316e-05, + "step": 538 + }, + { + "epoch": 1.4148586456278764, + "high_lr": 0.000716842105263158, + "low_lr": 1.433684210526316e-05, + "step": 538 + }, + { + "epoch": 1.4148586456278764, + "high_lr": 0.000716842105263158, + "low_lr": 1.433684210526316e-05, + "step": 538 + }, + { + "epoch": 1.4174884944115713, + "grad_norm": 1.059999704360962, + "learning_rate": 0.0007163157894736843, + "loss": 1.5175, + "step": 539 + }, + { + "epoch": 1.4174884944115713, + "high_lr": 0.0007163157894736843, + "low_lr": 1.4326315789473685e-05, + "step": 539 + }, + { + "epoch": 1.4174884944115713, + "high_lr": 0.0007163157894736843, + "low_lr": 1.4326315789473685e-05, + "step": 539 + }, + { + "epoch": 1.4174884944115713, + "high_lr": 0.0007163157894736843, + "low_lr": 1.4326315789473685e-05, + "step": 539 + }, + { + "epoch": 1.4174884944115713, + "high_lr": 0.0007163157894736843, + "low_lr": 1.4326315789473685e-05, + "step": 539 + }, + { + "epoch": 1.4174884944115713, + "high_lr": 0.0007163157894736843, + "low_lr": 1.4326315789473685e-05, + "step": 539 + }, + { + "epoch": 1.4174884944115713, + "high_lr": 0.0007163157894736843, + "low_lr": 1.4326315789473685e-05, + "step": 539 + }, + { + "epoch": 1.4174884944115713, + "high_lr": 0.0007163157894736843, + "low_lr": 1.4326315789473685e-05, + "step": 539 + }, + { + "epoch": 1.4174884944115713, + "high_lr": 0.0007163157894736843, + "low_lr": 1.4326315789473685e-05, + "step": 539 + }, + { + "epoch": 1.4201183431952662, + "grad_norm": 1.0168579816818237, + "learning_rate": 0.0007157894736842105, + "loss": 1.4733, + "step": 540 + }, + { + "epoch": 1.4201183431952662, + "high_lr": 0.0007157894736842105, + "low_lr": 1.4315789473684212e-05, + "step": 540 + }, + { + "epoch": 1.4201183431952662, + "high_lr": 0.0007157894736842105, + "low_lr": 1.4315789473684212e-05, + "step": 540 + }, + { + "epoch": 1.4201183431952662, + "high_lr": 0.0007157894736842105, + "low_lr": 1.4315789473684212e-05, + "step": 540 + }, + { + "epoch": 1.4201183431952662, + "high_lr": 0.0007157894736842105, + "low_lr": 1.4315789473684212e-05, + "step": 540 + }, + { + "epoch": 1.4201183431952662, + "high_lr": 0.0007157894736842105, + "low_lr": 1.4315789473684212e-05, + "step": 540 + }, + { + "epoch": 1.4201183431952662, + "high_lr": 0.0007157894736842105, + "low_lr": 1.4315789473684212e-05, + "step": 540 + }, + { + "epoch": 1.4201183431952662, + "high_lr": 0.0007157894736842105, + "low_lr": 1.4315789473684212e-05, + "step": 540 + }, + { + "epoch": 1.4201183431952662, + "high_lr": 0.0007157894736842105, + "low_lr": 1.4315789473684212e-05, + "step": 540 + }, + { + "epoch": 1.422748191978961, + "grad_norm": 1.0919007062911987, + "learning_rate": 0.0007152631578947368, + "loss": 1.4975, + "step": 541 + }, + { + "epoch": 1.422748191978961, + "high_lr": 0.0007152631578947368, + "low_lr": 1.4305263157894738e-05, + "step": 541 + }, + { + "epoch": 1.422748191978961, + "high_lr": 0.0007152631578947368, + "low_lr": 1.4305263157894738e-05, + "step": 541 + }, + { + "epoch": 1.422748191978961, + "high_lr": 0.0007152631578947368, + "low_lr": 1.4305263157894738e-05, + "step": 541 + }, + { + "epoch": 1.422748191978961, + "high_lr": 0.0007152631578947368, + "low_lr": 1.4305263157894738e-05, + "step": 541 + }, + { + "epoch": 1.422748191978961, + "high_lr": 0.0007152631578947368, + "low_lr": 1.4305263157894738e-05, + "step": 541 + }, + { + "epoch": 1.422748191978961, + "high_lr": 0.0007152631578947368, + "low_lr": 1.4305263157894738e-05, + "step": 541 + }, + { + "epoch": 1.422748191978961, + "high_lr": 0.0007152631578947368, + "low_lr": 1.4305263157894738e-05, + "step": 541 + }, + { + "epoch": 1.422748191978961, + "high_lr": 0.0007152631578947368, + "low_lr": 1.4305263157894738e-05, + "step": 541 + }, + { + "epoch": 1.4253780407626562, + "grad_norm": 1.0264736413955688, + "learning_rate": 0.0007147368421052631, + "loss": 1.4414, + "step": 542 + }, + { + "epoch": 1.4253780407626562, + "high_lr": 0.0007147368421052631, + "low_lr": 1.4294736842105263e-05, + "step": 542 + }, + { + "epoch": 1.4253780407626562, + "high_lr": 0.0007147368421052631, + "low_lr": 1.4294736842105263e-05, + "step": 542 + }, + { + "epoch": 1.4253780407626562, + "high_lr": 0.0007147368421052631, + "low_lr": 1.4294736842105263e-05, + "step": 542 + }, + { + "epoch": 1.4253780407626562, + "high_lr": 0.0007147368421052631, + "low_lr": 1.4294736842105263e-05, + "step": 542 + }, + { + "epoch": 1.4253780407626562, + "high_lr": 0.0007147368421052631, + "low_lr": 1.4294736842105263e-05, + "step": 542 + }, + { + "epoch": 1.4253780407626562, + "high_lr": 0.0007147368421052631, + "low_lr": 1.4294736842105263e-05, + "step": 542 + }, + { + "epoch": 1.4253780407626562, + "high_lr": 0.0007147368421052631, + "low_lr": 1.4294736842105263e-05, + "step": 542 + }, + { + "epoch": 1.4253780407626562, + "high_lr": 0.0007147368421052631, + "low_lr": 1.4294736842105263e-05, + "step": 542 + }, + { + "epoch": 1.428007889546351, + "grad_norm": 1.1516650915145874, + "learning_rate": 0.0007142105263157895, + "loss": 1.5142, + "step": 543 + }, + { + "epoch": 1.428007889546351, + "high_lr": 0.0007142105263157895, + "low_lr": 1.4284210526315792e-05, + "step": 543 + }, + { + "epoch": 1.428007889546351, + "high_lr": 0.0007142105263157895, + "low_lr": 1.4284210526315792e-05, + "step": 543 + }, + { + "epoch": 1.428007889546351, + "high_lr": 0.0007142105263157895, + "low_lr": 1.4284210526315792e-05, + "step": 543 + }, + { + "epoch": 1.428007889546351, + "high_lr": 0.0007142105263157895, + "low_lr": 1.4284210526315792e-05, + "step": 543 + }, + { + "epoch": 1.428007889546351, + "high_lr": 0.0007142105263157895, + "low_lr": 1.4284210526315792e-05, + "step": 543 + }, + { + "epoch": 1.428007889546351, + "high_lr": 0.0007142105263157895, + "low_lr": 1.4284210526315792e-05, + "step": 543 + }, + { + "epoch": 1.428007889546351, + "high_lr": 0.0007142105263157895, + "low_lr": 1.4284210526315792e-05, + "step": 543 + }, + { + "epoch": 1.428007889546351, + "high_lr": 0.0007142105263157895, + "low_lr": 1.4284210526315792e-05, + "step": 543 + }, + { + "epoch": 1.430637738330046, + "grad_norm": 1.1006218194961548, + "learning_rate": 0.0007136842105263158, + "loss": 1.5291, + "step": 544 + }, + { + "epoch": 1.430637738330046, + "high_lr": 0.0007136842105263158, + "low_lr": 1.4273684210526317e-05, + "step": 544 + }, + { + "epoch": 1.430637738330046, + "high_lr": 0.0007136842105263158, + "low_lr": 1.4273684210526317e-05, + "step": 544 + }, + { + "epoch": 1.430637738330046, + "high_lr": 0.0007136842105263158, + "low_lr": 1.4273684210526317e-05, + "step": 544 + }, + { + "epoch": 1.430637738330046, + "high_lr": 0.0007136842105263158, + "low_lr": 1.4273684210526317e-05, + "step": 544 + }, + { + "epoch": 1.430637738330046, + "high_lr": 0.0007136842105263158, + "low_lr": 1.4273684210526317e-05, + "step": 544 + }, + { + "epoch": 1.430637738330046, + "high_lr": 0.0007136842105263158, + "low_lr": 1.4273684210526317e-05, + "step": 544 + }, + { + "epoch": 1.430637738330046, + "high_lr": 0.0007136842105263158, + "low_lr": 1.4273684210526317e-05, + "step": 544 + }, + { + "epoch": 1.430637738330046, + "high_lr": 0.0007136842105263158, + "low_lr": 1.4273684210526317e-05, + "step": 544 + }, + { + "epoch": 1.433267587113741, + "grad_norm": 0.9842214584350586, + "learning_rate": 0.0007131578947368421, + "loss": 1.4205, + "step": 545 + }, + { + "epoch": 1.433267587113741, + "high_lr": 0.0007131578947368421, + "low_lr": 1.4263157894736843e-05, + "step": 545 + }, + { + "epoch": 1.433267587113741, + "high_lr": 0.0007131578947368421, + "low_lr": 1.4263157894736843e-05, + "step": 545 + }, + { + "epoch": 1.433267587113741, + "high_lr": 0.0007131578947368421, + "low_lr": 1.4263157894736843e-05, + "step": 545 + }, + { + "epoch": 1.433267587113741, + "high_lr": 0.0007131578947368421, + "low_lr": 1.4263157894736843e-05, + "step": 545 + }, + { + "epoch": 1.433267587113741, + "high_lr": 0.0007131578947368421, + "low_lr": 1.4263157894736843e-05, + "step": 545 + }, + { + "epoch": 1.433267587113741, + "high_lr": 0.0007131578947368421, + "low_lr": 1.4263157894736843e-05, + "step": 545 + }, + { + "epoch": 1.433267587113741, + "high_lr": 0.0007131578947368421, + "low_lr": 1.4263157894736843e-05, + "step": 545 + }, + { + "epoch": 1.433267587113741, + "high_lr": 0.0007131578947368421, + "low_lr": 1.4263157894736843e-05, + "step": 545 + }, + { + "epoch": 1.435897435897436, + "grad_norm": 1.1286381483078003, + "learning_rate": 0.0007126315789473684, + "loss": 1.506, + "step": 546 + }, + { + "epoch": 1.435897435897436, + "high_lr": 0.0007126315789473684, + "low_lr": 1.425263157894737e-05, + "step": 546 + }, + { + "epoch": 1.435897435897436, + "high_lr": 0.0007126315789473684, + "low_lr": 1.425263157894737e-05, + "step": 546 + }, + { + "epoch": 1.435897435897436, + "high_lr": 0.0007126315789473684, + "low_lr": 1.425263157894737e-05, + "step": 546 + }, + { + "epoch": 1.435897435897436, + "high_lr": 0.0007126315789473684, + "low_lr": 1.425263157894737e-05, + "step": 546 + }, + { + "epoch": 1.435897435897436, + "high_lr": 0.0007126315789473684, + "low_lr": 1.425263157894737e-05, + "step": 546 + }, + { + "epoch": 1.435897435897436, + "high_lr": 0.0007126315789473684, + "low_lr": 1.425263157894737e-05, + "step": 546 + }, + { + "epoch": 1.435897435897436, + "high_lr": 0.0007126315789473684, + "low_lr": 1.425263157894737e-05, + "step": 546 + }, + { + "epoch": 1.435897435897436, + "high_lr": 0.0007126315789473684, + "low_lr": 1.425263157894737e-05, + "step": 546 + }, + { + "epoch": 1.4385272846811308, + "grad_norm": 1.1527904272079468, + "learning_rate": 0.0007121052631578947, + "loss": 1.4887, + "step": 547 + }, + { + "epoch": 1.4385272846811308, + "high_lr": 0.0007121052631578947, + "low_lr": 1.4242105263157894e-05, + "step": 547 + }, + { + "epoch": 1.4385272846811308, + "high_lr": 0.0007121052631578947, + "low_lr": 1.4242105263157894e-05, + "step": 547 + }, + { + "epoch": 1.4385272846811308, + "high_lr": 0.0007121052631578947, + "low_lr": 1.4242105263157894e-05, + "step": 547 + }, + { + "epoch": 1.4385272846811308, + "high_lr": 0.0007121052631578947, + "low_lr": 1.4242105263157894e-05, + "step": 547 + }, + { + "epoch": 1.4385272846811308, + "high_lr": 0.0007121052631578947, + "low_lr": 1.4242105263157894e-05, + "step": 547 + }, + { + "epoch": 1.4385272846811308, + "high_lr": 0.0007121052631578947, + "low_lr": 1.4242105263157894e-05, + "step": 547 + }, + { + "epoch": 1.4385272846811308, + "high_lr": 0.0007121052631578947, + "low_lr": 1.4242105263157894e-05, + "step": 547 + }, + { + "epoch": 1.4385272846811308, + "high_lr": 0.0007121052631578947, + "low_lr": 1.4242105263157894e-05, + "step": 547 + }, + { + "epoch": 1.4411571334648259, + "grad_norm": 1.0327802896499634, + "learning_rate": 0.000711578947368421, + "loss": 1.4907, + "step": 548 + }, + { + "epoch": 1.4411571334648259, + "high_lr": 0.000711578947368421, + "low_lr": 1.4231578947368422e-05, + "step": 548 + }, + { + "epoch": 1.4411571334648259, + "high_lr": 0.000711578947368421, + "low_lr": 1.4231578947368422e-05, + "step": 548 + }, + { + "epoch": 1.4411571334648259, + "high_lr": 0.000711578947368421, + "low_lr": 1.4231578947368422e-05, + "step": 548 + }, + { + "epoch": 1.4411571334648259, + "high_lr": 0.000711578947368421, + "low_lr": 1.4231578947368422e-05, + "step": 548 + }, + { + "epoch": 1.4411571334648259, + "high_lr": 0.000711578947368421, + "low_lr": 1.4231578947368422e-05, + "step": 548 + }, + { + "epoch": 1.4411571334648259, + "high_lr": 0.000711578947368421, + "low_lr": 1.4231578947368422e-05, + "step": 548 + }, + { + "epoch": 1.4411571334648259, + "high_lr": 0.000711578947368421, + "low_lr": 1.4231578947368422e-05, + "step": 548 + }, + { + "epoch": 1.4411571334648259, + "high_lr": 0.000711578947368421, + "low_lr": 1.4231578947368422e-05, + "step": 548 + }, + { + "epoch": 1.4437869822485208, + "grad_norm": 0.9996568560600281, + "learning_rate": 0.0007110526315789474, + "loss": 1.4501, + "step": 549 + }, + { + "epoch": 1.4437869822485208, + "high_lr": 0.0007110526315789474, + "low_lr": 1.4221052631578949e-05, + "step": 549 + }, + { + "epoch": 1.4437869822485208, + "high_lr": 0.0007110526315789474, + "low_lr": 1.4221052631578949e-05, + "step": 549 + }, + { + "epoch": 1.4437869822485208, + "high_lr": 0.0007110526315789474, + "low_lr": 1.4221052631578949e-05, + "step": 549 + }, + { + "epoch": 1.4437869822485208, + "high_lr": 0.0007110526315789474, + "low_lr": 1.4221052631578949e-05, + "step": 549 + }, + { + "epoch": 1.4437869822485208, + "high_lr": 0.0007110526315789474, + "low_lr": 1.4221052631578949e-05, + "step": 549 + }, + { + "epoch": 1.4437869822485208, + "high_lr": 0.0007110526315789474, + "low_lr": 1.4221052631578949e-05, + "step": 549 + }, + { + "epoch": 1.4437869822485208, + "high_lr": 0.0007110526315789474, + "low_lr": 1.4221052631578949e-05, + "step": 549 + }, + { + "epoch": 1.4437869822485208, + "high_lr": 0.0007110526315789474, + "low_lr": 1.4221052631578949e-05, + "step": 549 + }, + { + "epoch": 1.4464168310322156, + "grad_norm": 1.054540991783142, + "learning_rate": 0.0007105263157894737, + "loss": 1.4928, + "step": 550 + }, + { + "epoch": 1.4464168310322156, + "high_lr": 0.0007105263157894737, + "low_lr": 1.4210526315789475e-05, + "step": 550 + }, + { + "epoch": 1.4464168310322156, + "high_lr": 0.0007105263157894737, + "low_lr": 1.4210526315789475e-05, + "step": 550 + }, + { + "epoch": 1.4464168310322156, + "high_lr": 0.0007105263157894737, + "low_lr": 1.4210526315789475e-05, + "step": 550 + }, + { + "epoch": 1.4464168310322156, + "high_lr": 0.0007105263157894737, + "low_lr": 1.4210526315789475e-05, + "step": 550 + }, + { + "epoch": 1.4464168310322156, + "high_lr": 0.0007105263157894737, + "low_lr": 1.4210526315789475e-05, + "step": 550 + }, + { + "epoch": 1.4464168310322156, + "high_lr": 0.0007105263157894737, + "low_lr": 1.4210526315789475e-05, + "step": 550 + }, + { + "epoch": 1.4464168310322156, + "high_lr": 0.0007105263157894737, + "low_lr": 1.4210526315789475e-05, + "step": 550 + }, + { + "epoch": 1.4464168310322156, + "high_lr": 0.0007105263157894737, + "low_lr": 1.4210526315789475e-05, + "step": 550 + }, + { + "epoch": 1.4490466798159105, + "grad_norm": 1.1426942348480225, + "learning_rate": 0.00071, + "loss": 1.5159, + "step": 551 + }, + { + "epoch": 1.4490466798159105, + "high_lr": 0.00071, + "low_lr": 1.4200000000000001e-05, + "step": 551 + }, + { + "epoch": 1.4490466798159105, + "high_lr": 0.00071, + "low_lr": 1.4200000000000001e-05, + "step": 551 + }, + { + "epoch": 1.4490466798159105, + "high_lr": 0.00071, + "low_lr": 1.4200000000000001e-05, + "step": 551 + }, + { + "epoch": 1.4490466798159105, + "high_lr": 0.00071, + "low_lr": 1.4200000000000001e-05, + "step": 551 + }, + { + "epoch": 1.4490466798159105, + "high_lr": 0.00071, + "low_lr": 1.4200000000000001e-05, + "step": 551 + }, + { + "epoch": 1.4490466798159105, + "high_lr": 0.00071, + "low_lr": 1.4200000000000001e-05, + "step": 551 + }, + { + "epoch": 1.4490466798159105, + "high_lr": 0.00071, + "low_lr": 1.4200000000000001e-05, + "step": 551 + }, + { + "epoch": 1.4490466798159105, + "high_lr": 0.00071, + "low_lr": 1.4200000000000001e-05, + "step": 551 + }, + { + "epoch": 1.4516765285996054, + "grad_norm": 1.1202912330627441, + "learning_rate": 0.0007094736842105264, + "loss": 1.494, + "step": 552 + }, + { + "epoch": 1.4516765285996054, + "high_lr": 0.0007094736842105264, + "low_lr": 1.418947368421053e-05, + "step": 552 + }, + { + "epoch": 1.4516765285996054, + "high_lr": 0.0007094736842105264, + "low_lr": 1.418947368421053e-05, + "step": 552 + }, + { + "epoch": 1.4516765285996054, + "high_lr": 0.0007094736842105264, + "low_lr": 1.418947368421053e-05, + "step": 552 + }, + { + "epoch": 1.4516765285996054, + "high_lr": 0.0007094736842105264, + "low_lr": 1.418947368421053e-05, + "step": 552 + }, + { + "epoch": 1.4516765285996054, + "high_lr": 0.0007094736842105264, + "low_lr": 1.418947368421053e-05, + "step": 552 + }, + { + "epoch": 1.4516765285996054, + "high_lr": 0.0007094736842105264, + "low_lr": 1.418947368421053e-05, + "step": 552 + }, + { + "epoch": 1.4516765285996054, + "high_lr": 0.0007094736842105264, + "low_lr": 1.418947368421053e-05, + "step": 552 + }, + { + "epoch": 1.4516765285996054, + "high_lr": 0.0007094736842105264, + "low_lr": 1.418947368421053e-05, + "step": 552 + }, + { + "epoch": 1.4543063773833005, + "grad_norm": 1.1229134798049927, + "learning_rate": 0.0007089473684210527, + "loss": 1.4982, + "step": 553 + }, + { + "epoch": 1.4543063773833005, + "high_lr": 0.0007089473684210527, + "low_lr": 1.4178947368421054e-05, + "step": 553 + }, + { + "epoch": 1.4543063773833005, + "high_lr": 0.0007089473684210527, + "low_lr": 1.4178947368421054e-05, + "step": 553 + }, + { + "epoch": 1.4543063773833005, + "high_lr": 0.0007089473684210527, + "low_lr": 1.4178947368421054e-05, + "step": 553 + }, + { + "epoch": 1.4543063773833005, + "high_lr": 0.0007089473684210527, + "low_lr": 1.4178947368421054e-05, + "step": 553 + }, + { + "epoch": 1.4543063773833005, + "high_lr": 0.0007089473684210527, + "low_lr": 1.4178947368421054e-05, + "step": 553 + }, + { + "epoch": 1.4543063773833005, + "high_lr": 0.0007089473684210527, + "low_lr": 1.4178947368421054e-05, + "step": 553 + }, + { + "epoch": 1.4543063773833005, + "high_lr": 0.0007089473684210527, + "low_lr": 1.4178947368421054e-05, + "step": 553 + }, + { + "epoch": 1.4543063773833005, + "high_lr": 0.0007089473684210527, + "low_lr": 1.4178947368421054e-05, + "step": 553 + }, + { + "epoch": 1.4569362261669954, + "grad_norm": 1.000651478767395, + "learning_rate": 0.000708421052631579, + "loss": 1.4656, + "step": 554 + }, + { + "epoch": 1.4569362261669954, + "high_lr": 0.000708421052631579, + "low_lr": 1.416842105263158e-05, + "step": 554 + }, + { + "epoch": 1.4569362261669954, + "high_lr": 0.000708421052631579, + "low_lr": 1.416842105263158e-05, + "step": 554 + }, + { + "epoch": 1.4569362261669954, + "high_lr": 0.000708421052631579, + "low_lr": 1.416842105263158e-05, + "step": 554 + }, + { + "epoch": 1.4569362261669954, + "high_lr": 0.000708421052631579, + "low_lr": 1.416842105263158e-05, + "step": 554 + }, + { + "epoch": 1.4569362261669954, + "high_lr": 0.000708421052631579, + "low_lr": 1.416842105263158e-05, + "step": 554 + }, + { + "epoch": 1.4569362261669954, + "high_lr": 0.000708421052631579, + "low_lr": 1.416842105263158e-05, + "step": 554 + }, + { + "epoch": 1.4569362261669954, + "high_lr": 0.000708421052631579, + "low_lr": 1.416842105263158e-05, + "step": 554 + }, + { + "epoch": 1.4569362261669954, + "high_lr": 0.000708421052631579, + "low_lr": 1.416842105263158e-05, + "step": 554 + }, + { + "epoch": 1.4595660749506902, + "grad_norm": 1.1136142015457153, + "learning_rate": 0.0007078947368421053, + "loss": 1.5472, + "step": 555 + }, + { + "epoch": 1.4595660749506902, + "high_lr": 0.0007078947368421053, + "low_lr": 1.4157894736842107e-05, + "step": 555 + }, + { + "epoch": 1.4595660749506902, + "high_lr": 0.0007078947368421053, + "low_lr": 1.4157894736842107e-05, + "step": 555 + }, + { + "epoch": 1.4595660749506902, + "high_lr": 0.0007078947368421053, + "low_lr": 1.4157894736842107e-05, + "step": 555 + }, + { + "epoch": 1.4595660749506902, + "high_lr": 0.0007078947368421053, + "low_lr": 1.4157894736842107e-05, + "step": 555 + }, + { + "epoch": 1.4595660749506902, + "high_lr": 0.0007078947368421053, + "low_lr": 1.4157894736842107e-05, + "step": 555 + }, + { + "epoch": 1.4595660749506902, + "high_lr": 0.0007078947368421053, + "low_lr": 1.4157894736842107e-05, + "step": 555 + }, + { + "epoch": 1.4595660749506902, + "high_lr": 0.0007078947368421053, + "low_lr": 1.4157894736842107e-05, + "step": 555 + }, + { + "epoch": 1.4595660749506902, + "high_lr": 0.0007078947368421053, + "low_lr": 1.4157894736842107e-05, + "step": 555 + }, + { + "epoch": 1.4621959237343853, + "grad_norm": 1.0530623197555542, + "learning_rate": 0.0007073684210526316, + "loss": 1.4984, + "step": 556 + }, + { + "epoch": 1.4621959237343853, + "high_lr": 0.0007073684210526316, + "low_lr": 1.4147368421052631e-05, + "step": 556 + }, + { + "epoch": 1.4621959237343853, + "high_lr": 0.0007073684210526316, + "low_lr": 1.4147368421052631e-05, + "step": 556 + }, + { + "epoch": 1.4621959237343853, + "high_lr": 0.0007073684210526316, + "low_lr": 1.4147368421052631e-05, + "step": 556 + }, + { + "epoch": 1.4621959237343853, + "high_lr": 0.0007073684210526316, + "low_lr": 1.4147368421052631e-05, + "step": 556 + }, + { + "epoch": 1.4621959237343853, + "high_lr": 0.0007073684210526316, + "low_lr": 1.4147368421052631e-05, + "step": 556 + }, + { + "epoch": 1.4621959237343853, + "high_lr": 0.0007073684210526316, + "low_lr": 1.4147368421052631e-05, + "step": 556 + }, + { + "epoch": 1.4621959237343853, + "high_lr": 0.0007073684210526316, + "low_lr": 1.4147368421052631e-05, + "step": 556 + }, + { + "epoch": 1.4621959237343853, + "high_lr": 0.0007073684210526316, + "low_lr": 1.4147368421052631e-05, + "step": 556 + }, + { + "epoch": 1.4648257725180802, + "grad_norm": 1.0790746212005615, + "learning_rate": 0.000706842105263158, + "loss": 1.492, + "step": 557 + }, + { + "epoch": 1.4648257725180802, + "high_lr": 0.000706842105263158, + "low_lr": 1.413684210526316e-05, + "step": 557 + }, + { + "epoch": 1.4648257725180802, + "high_lr": 0.000706842105263158, + "low_lr": 1.413684210526316e-05, + "step": 557 + }, + { + "epoch": 1.4648257725180802, + "high_lr": 0.000706842105263158, + "low_lr": 1.413684210526316e-05, + "step": 557 + }, + { + "epoch": 1.4648257725180802, + "high_lr": 0.000706842105263158, + "low_lr": 1.413684210526316e-05, + "step": 557 + }, + { + "epoch": 1.4648257725180802, + "high_lr": 0.000706842105263158, + "low_lr": 1.413684210526316e-05, + "step": 557 + }, + { + "epoch": 1.4648257725180802, + "high_lr": 0.000706842105263158, + "low_lr": 1.413684210526316e-05, + "step": 557 + }, + { + "epoch": 1.4648257725180802, + "high_lr": 0.000706842105263158, + "low_lr": 1.413684210526316e-05, + "step": 557 + }, + { + "epoch": 1.4648257725180802, + "high_lr": 0.000706842105263158, + "low_lr": 1.413684210526316e-05, + "step": 557 + }, + { + "epoch": 1.467455621301775, + "grad_norm": 1.2384121417999268, + "learning_rate": 0.0007063157894736842, + "loss": 1.4896, + "step": 558 + }, + { + "epoch": 1.467455621301775, + "high_lr": 0.0007063157894736842, + "low_lr": 1.4126315789473686e-05, + "step": 558 + }, + { + "epoch": 1.467455621301775, + "high_lr": 0.0007063157894736842, + "low_lr": 1.4126315789473686e-05, + "step": 558 + }, + { + "epoch": 1.467455621301775, + "high_lr": 0.0007063157894736842, + "low_lr": 1.4126315789473686e-05, + "step": 558 + }, + { + "epoch": 1.467455621301775, + "high_lr": 0.0007063157894736842, + "low_lr": 1.4126315789473686e-05, + "step": 558 + }, + { + "epoch": 1.467455621301775, + "high_lr": 0.0007063157894736842, + "low_lr": 1.4126315789473686e-05, + "step": 558 + }, + { + "epoch": 1.467455621301775, + "high_lr": 0.0007063157894736842, + "low_lr": 1.4126315789473686e-05, + "step": 558 + }, + { + "epoch": 1.467455621301775, + "high_lr": 0.0007063157894736842, + "low_lr": 1.4126315789473686e-05, + "step": 558 + }, + { + "epoch": 1.467455621301775, + "high_lr": 0.0007063157894736842, + "low_lr": 1.4126315789473686e-05, + "step": 558 + }, + { + "epoch": 1.4700854700854702, + "grad_norm": 1.0954855680465698, + "learning_rate": 0.0007057894736842105, + "loss": 1.4779, + "step": 559 + }, + { + "epoch": 1.4700854700854702, + "high_lr": 0.0007057894736842105, + "low_lr": 1.4115789473684212e-05, + "step": 559 + }, + { + "epoch": 1.4700854700854702, + "high_lr": 0.0007057894736842105, + "low_lr": 1.4115789473684212e-05, + "step": 559 + }, + { + "epoch": 1.4700854700854702, + "high_lr": 0.0007057894736842105, + "low_lr": 1.4115789473684212e-05, + "step": 559 + }, + { + "epoch": 1.4700854700854702, + "high_lr": 0.0007057894736842105, + "low_lr": 1.4115789473684212e-05, + "step": 559 + }, + { + "epoch": 1.4700854700854702, + "high_lr": 0.0007057894736842105, + "low_lr": 1.4115789473684212e-05, + "step": 559 + }, + { + "epoch": 1.4700854700854702, + "high_lr": 0.0007057894736842105, + "low_lr": 1.4115789473684212e-05, + "step": 559 + }, + { + "epoch": 1.4700854700854702, + "high_lr": 0.0007057894736842105, + "low_lr": 1.4115789473684212e-05, + "step": 559 + }, + { + "epoch": 1.4700854700854702, + "high_lr": 0.0007057894736842105, + "low_lr": 1.4115789473684212e-05, + "step": 559 + }, + { + "epoch": 1.472715318869165, + "grad_norm": 1.0818630456924438, + "learning_rate": 0.0007052631578947368, + "loss": 1.5016, + "step": 560 + }, + { + "epoch": 1.472715318869165, + "high_lr": 0.0007052631578947368, + "low_lr": 1.4105263157894738e-05, + "step": 560 + }, + { + "epoch": 1.472715318869165, + "high_lr": 0.0007052631578947368, + "low_lr": 1.4105263157894738e-05, + "step": 560 + }, + { + "epoch": 1.472715318869165, + "high_lr": 0.0007052631578947368, + "low_lr": 1.4105263157894738e-05, + "step": 560 + }, + { + "epoch": 1.472715318869165, + "high_lr": 0.0007052631578947368, + "low_lr": 1.4105263157894738e-05, + "step": 560 + }, + { + "epoch": 1.472715318869165, + "high_lr": 0.0007052631578947368, + "low_lr": 1.4105263157894738e-05, + "step": 560 + }, + { + "epoch": 1.472715318869165, + "high_lr": 0.0007052631578947368, + "low_lr": 1.4105263157894738e-05, + "step": 560 + }, + { + "epoch": 1.472715318869165, + "high_lr": 0.0007052631578947368, + "low_lr": 1.4105263157894738e-05, + "step": 560 + }, + { + "epoch": 1.472715318869165, + "high_lr": 0.0007052631578947368, + "low_lr": 1.4105263157894738e-05, + "step": 560 + }, + { + "epoch": 1.47534516765286, + "grad_norm": 2.5597925186157227, + "learning_rate": 0.0007047368421052631, + "loss": 1.5734, + "step": 561 + }, + { + "epoch": 1.47534516765286, + "high_lr": 0.0007047368421052631, + "low_lr": 1.4094736842105263e-05, + "step": 561 + }, + { + "epoch": 1.47534516765286, + "high_lr": 0.0007047368421052631, + "low_lr": 1.4094736842105263e-05, + "step": 561 + }, + { + "epoch": 1.47534516765286, + "high_lr": 0.0007047368421052631, + "low_lr": 1.4094736842105263e-05, + "step": 561 + }, + { + "epoch": 1.47534516765286, + "high_lr": 0.0007047368421052631, + "low_lr": 1.4094736842105263e-05, + "step": 561 + }, + { + "epoch": 1.47534516765286, + "high_lr": 0.0007047368421052631, + "low_lr": 1.4094736842105263e-05, + "step": 561 + }, + { + "epoch": 1.47534516765286, + "high_lr": 0.0007047368421052631, + "low_lr": 1.4094736842105263e-05, + "step": 561 + }, + { + "epoch": 1.47534516765286, + "high_lr": 0.0007047368421052631, + "low_lr": 1.4094736842105263e-05, + "step": 561 + }, + { + "epoch": 1.47534516765286, + "high_lr": 0.0007047368421052631, + "low_lr": 1.4094736842105263e-05, + "step": 561 + }, + { + "epoch": 1.4779750164365548, + "grad_norm": 1.0145095586776733, + "learning_rate": 0.0007042105263157895, + "loss": 1.5125, + "step": 562 + }, + { + "epoch": 1.4779750164365548, + "high_lr": 0.0007042105263157895, + "low_lr": 1.4084210526315791e-05, + "step": 562 + }, + { + "epoch": 1.4779750164365548, + "high_lr": 0.0007042105263157895, + "low_lr": 1.4084210526315791e-05, + "step": 562 + }, + { + "epoch": 1.4779750164365548, + "high_lr": 0.0007042105263157895, + "low_lr": 1.4084210526315791e-05, + "step": 562 + }, + { + "epoch": 1.4779750164365548, + "high_lr": 0.0007042105263157895, + "low_lr": 1.4084210526315791e-05, + "step": 562 + }, + { + "epoch": 1.4779750164365548, + "high_lr": 0.0007042105263157895, + "low_lr": 1.4084210526315791e-05, + "step": 562 + }, + { + "epoch": 1.4779750164365548, + "high_lr": 0.0007042105263157895, + "low_lr": 1.4084210526315791e-05, + "step": 562 + }, + { + "epoch": 1.4779750164365548, + "high_lr": 0.0007042105263157895, + "low_lr": 1.4084210526315791e-05, + "step": 562 + }, + { + "epoch": 1.4779750164365548, + "high_lr": 0.0007042105263157895, + "low_lr": 1.4084210526315791e-05, + "step": 562 + }, + { + "epoch": 1.4806048652202497, + "grad_norm": 75.1147689819336, + "learning_rate": 0.0007036842105263158, + "loss": 1.4529, + "step": 563 + }, + { + "epoch": 1.4806048652202497, + "high_lr": 0.0007036842105263158, + "low_lr": 1.4073684210526317e-05, + "step": 563 + }, + { + "epoch": 1.4806048652202497, + "high_lr": 0.0007036842105263158, + "low_lr": 1.4073684210526317e-05, + "step": 563 + }, + { + "epoch": 1.4806048652202497, + "high_lr": 0.0007036842105263158, + "low_lr": 1.4073684210526317e-05, + "step": 563 + }, + { + "epoch": 1.4806048652202497, + "high_lr": 0.0007036842105263158, + "low_lr": 1.4073684210526317e-05, + "step": 563 + }, + { + "epoch": 1.4806048652202497, + "high_lr": 0.0007036842105263158, + "low_lr": 1.4073684210526317e-05, + "step": 563 + }, + { + "epoch": 1.4806048652202497, + "high_lr": 0.0007036842105263158, + "low_lr": 1.4073684210526317e-05, + "step": 563 + }, + { + "epoch": 1.4806048652202497, + "high_lr": 0.0007036842105263158, + "low_lr": 1.4073684210526317e-05, + "step": 563 + }, + { + "epoch": 1.4806048652202497, + "high_lr": 0.0007036842105263158, + "low_lr": 1.4073684210526317e-05, + "step": 563 + }, + { + "epoch": 1.4832347140039448, + "grad_norm": 1.159050464630127, + "learning_rate": 0.0007031578947368421, + "loss": 1.5372, + "step": 564 + }, + { + "epoch": 1.4832347140039448, + "high_lr": 0.0007031578947368421, + "low_lr": 1.4063157894736844e-05, + "step": 564 + }, + { + "epoch": 1.4832347140039448, + "high_lr": 0.0007031578947368421, + "low_lr": 1.4063157894736844e-05, + "step": 564 + }, + { + "epoch": 1.4832347140039448, + "high_lr": 0.0007031578947368421, + "low_lr": 1.4063157894736844e-05, + "step": 564 + }, + { + "epoch": 1.4832347140039448, + "high_lr": 0.0007031578947368421, + "low_lr": 1.4063157894736844e-05, + "step": 564 + }, + { + "epoch": 1.4832347140039448, + "high_lr": 0.0007031578947368421, + "low_lr": 1.4063157894736844e-05, + "step": 564 + }, + { + "epoch": 1.4832347140039448, + "high_lr": 0.0007031578947368421, + "low_lr": 1.4063157894736844e-05, + "step": 564 + }, + { + "epoch": 1.4832347140039448, + "high_lr": 0.0007031578947368421, + "low_lr": 1.4063157894736844e-05, + "step": 564 + }, + { + "epoch": 1.4832347140039448, + "high_lr": 0.0007031578947368421, + "low_lr": 1.4063157894736844e-05, + "step": 564 + }, + { + "epoch": 1.4858645627876397, + "grad_norm": 1.106245994567871, + "learning_rate": 0.0007026315789473683, + "loss": 1.51, + "step": 565 + }, + { + "epoch": 1.4858645627876397, + "high_lr": 0.0007026315789473683, + "low_lr": 1.4052631578947368e-05, + "step": 565 + }, + { + "epoch": 1.4858645627876397, + "high_lr": 0.0007026315789473683, + "low_lr": 1.4052631578947368e-05, + "step": 565 + }, + { + "epoch": 1.4858645627876397, + "high_lr": 0.0007026315789473683, + "low_lr": 1.4052631578947368e-05, + "step": 565 + }, + { + "epoch": 1.4858645627876397, + "high_lr": 0.0007026315789473683, + "low_lr": 1.4052631578947368e-05, + "step": 565 + }, + { + "epoch": 1.4858645627876397, + "high_lr": 0.0007026315789473683, + "low_lr": 1.4052631578947368e-05, + "step": 565 + }, + { + "epoch": 1.4858645627876397, + "high_lr": 0.0007026315789473683, + "low_lr": 1.4052631578947368e-05, + "step": 565 + }, + { + "epoch": 1.4858645627876397, + "high_lr": 0.0007026315789473683, + "low_lr": 1.4052631578947368e-05, + "step": 565 + }, + { + "epoch": 1.4858645627876397, + "high_lr": 0.0007026315789473683, + "low_lr": 1.4052631578947368e-05, + "step": 565 + }, + { + "epoch": 1.4884944115713346, + "grad_norm": 1.1369872093200684, + "learning_rate": 0.0007021052631578948, + "loss": 1.4915, + "step": 566 + }, + { + "epoch": 1.4884944115713346, + "high_lr": 0.0007021052631578948, + "low_lr": 1.4042105263157896e-05, + "step": 566 + }, + { + "epoch": 1.4884944115713346, + "high_lr": 0.0007021052631578948, + "low_lr": 1.4042105263157896e-05, + "step": 566 + }, + { + "epoch": 1.4884944115713346, + "high_lr": 0.0007021052631578948, + "low_lr": 1.4042105263157896e-05, + "step": 566 + }, + { + "epoch": 1.4884944115713346, + "high_lr": 0.0007021052631578948, + "low_lr": 1.4042105263157896e-05, + "step": 566 + }, + { + "epoch": 1.4884944115713346, + "high_lr": 0.0007021052631578948, + "low_lr": 1.4042105263157896e-05, + "step": 566 + }, + { + "epoch": 1.4884944115713346, + "high_lr": 0.0007021052631578948, + "low_lr": 1.4042105263157896e-05, + "step": 566 + }, + { + "epoch": 1.4884944115713346, + "high_lr": 0.0007021052631578948, + "low_lr": 1.4042105263157896e-05, + "step": 566 + }, + { + "epoch": 1.4884944115713346, + "high_lr": 0.0007021052631578948, + "low_lr": 1.4042105263157896e-05, + "step": 566 + }, + { + "epoch": 1.4911242603550297, + "grad_norm": 1.026293396949768, + "learning_rate": 0.0007015789473684211, + "loss": 1.476, + "step": 567 + }, + { + "epoch": 1.4911242603550297, + "high_lr": 0.0007015789473684211, + "low_lr": 1.4031578947368423e-05, + "step": 567 + }, + { + "epoch": 1.4911242603550297, + "high_lr": 0.0007015789473684211, + "low_lr": 1.4031578947368423e-05, + "step": 567 + }, + { + "epoch": 1.4911242603550297, + "high_lr": 0.0007015789473684211, + "low_lr": 1.4031578947368423e-05, + "step": 567 + }, + { + "epoch": 1.4911242603550297, + "high_lr": 0.0007015789473684211, + "low_lr": 1.4031578947368423e-05, + "step": 567 + }, + { + "epoch": 1.4911242603550297, + "high_lr": 0.0007015789473684211, + "low_lr": 1.4031578947368423e-05, + "step": 567 + }, + { + "epoch": 1.4911242603550297, + "high_lr": 0.0007015789473684211, + "low_lr": 1.4031578947368423e-05, + "step": 567 + }, + { + "epoch": 1.4911242603550297, + "high_lr": 0.0007015789473684211, + "low_lr": 1.4031578947368423e-05, + "step": 567 + }, + { + "epoch": 1.4911242603550297, + "high_lr": 0.0007015789473684211, + "low_lr": 1.4031578947368423e-05, + "step": 567 + }, + { + "epoch": 1.4937541091387245, + "grad_norm": 1.069638729095459, + "learning_rate": 0.0007010526315789474, + "loss": 1.488, + "step": 568 + }, + { + "epoch": 1.4937541091387245, + "high_lr": 0.0007010526315789474, + "low_lr": 1.4021052631578949e-05, + "step": 568 + }, + { + "epoch": 1.4937541091387245, + "high_lr": 0.0007010526315789474, + "low_lr": 1.4021052631578949e-05, + "step": 568 + }, + { + "epoch": 1.4937541091387245, + "high_lr": 0.0007010526315789474, + "low_lr": 1.4021052631578949e-05, + "step": 568 + }, + { + "epoch": 1.4937541091387245, + "high_lr": 0.0007010526315789474, + "low_lr": 1.4021052631578949e-05, + "step": 568 + }, + { + "epoch": 1.4937541091387245, + "high_lr": 0.0007010526315789474, + "low_lr": 1.4021052631578949e-05, + "step": 568 + }, + { + "epoch": 1.4937541091387245, + "high_lr": 0.0007010526315789474, + "low_lr": 1.4021052631578949e-05, + "step": 568 + }, + { + "epoch": 1.4937541091387245, + "high_lr": 0.0007010526315789474, + "low_lr": 1.4021052631578949e-05, + "step": 568 + }, + { + "epoch": 1.4937541091387245, + "high_lr": 0.0007010526315789474, + "low_lr": 1.4021052631578949e-05, + "step": 568 + }, + { + "epoch": 1.4963839579224194, + "grad_norm": 1.0903842449188232, + "learning_rate": 0.0007005263157894737, + "loss": 1.5264, + "step": 569 + }, + { + "epoch": 1.4963839579224194, + "high_lr": 0.0007005263157894737, + "low_lr": 1.4010526315789475e-05, + "step": 569 + }, + { + "epoch": 1.4963839579224194, + "high_lr": 0.0007005263157894737, + "low_lr": 1.4010526315789475e-05, + "step": 569 + }, + { + "epoch": 1.4963839579224194, + "high_lr": 0.0007005263157894737, + "low_lr": 1.4010526315789475e-05, + "step": 569 + }, + { + "epoch": 1.4963839579224194, + "high_lr": 0.0007005263157894737, + "low_lr": 1.4010526315789475e-05, + "step": 569 + }, + { + "epoch": 1.4963839579224194, + "high_lr": 0.0007005263157894737, + "low_lr": 1.4010526315789475e-05, + "step": 569 + }, + { + "epoch": 1.4963839579224194, + "high_lr": 0.0007005263157894737, + "low_lr": 1.4010526315789475e-05, + "step": 569 + }, + { + "epoch": 1.4963839579224194, + "high_lr": 0.0007005263157894737, + "low_lr": 1.4010526315789475e-05, + "step": 569 + }, + { + "epoch": 1.4963839579224194, + "high_lr": 0.0007005263157894737, + "low_lr": 1.4010526315789475e-05, + "step": 569 + }, + { + "epoch": 1.4990138067061145, + "grad_norm": 1.1938930749893188, + "learning_rate": 0.0007, + "loss": 1.5603, + "step": 570 + }, + { + "epoch": 1.4990138067061145, + "high_lr": 0.0007, + "low_lr": 1.4e-05, + "step": 570 + }, + { + "epoch": 1.4990138067061145, + "high_lr": 0.0007, + "low_lr": 1.4e-05, + "step": 570 + }, + { + "epoch": 1.4990138067061145, + "high_lr": 0.0007, + "low_lr": 1.4e-05, + "step": 570 + }, + { + "epoch": 1.4990138067061145, + "high_lr": 0.0007, + "low_lr": 1.4e-05, + "step": 570 + }, + { + "epoch": 1.4990138067061145, + "high_lr": 0.0007, + "low_lr": 1.4e-05, + "step": 570 + }, + { + "epoch": 1.4990138067061145, + "high_lr": 0.0007, + "low_lr": 1.4e-05, + "step": 570 + }, + { + "epoch": 1.4990138067061145, + "high_lr": 0.0007, + "low_lr": 1.4e-05, + "step": 570 + }, + { + "epoch": 1.4990138067061145, + "high_lr": 0.0007, + "low_lr": 1.4e-05, + "step": 570 + }, + { + "epoch": 1.5016436554898094, + "grad_norm": 1.3760454654693604, + "learning_rate": 0.0006994736842105264, + "loss": 1.5244, + "step": 571 + }, + { + "epoch": 1.5016436554898094, + "high_lr": 0.0006994736842105264, + "low_lr": 1.3989473684210528e-05, + "step": 571 + }, + { + "epoch": 1.5016436554898094, + "high_lr": 0.0006994736842105264, + "low_lr": 1.3989473684210528e-05, + "step": 571 + }, + { + "epoch": 1.5016436554898094, + "high_lr": 0.0006994736842105264, + "low_lr": 1.3989473684210528e-05, + "step": 571 + }, + { + "epoch": 1.5016436554898094, + "high_lr": 0.0006994736842105264, + "low_lr": 1.3989473684210528e-05, + "step": 571 + }, + { + "epoch": 1.5016436554898094, + "high_lr": 0.0006994736842105264, + "low_lr": 1.3989473684210528e-05, + "step": 571 + }, + { + "epoch": 1.5016436554898094, + "high_lr": 0.0006994736842105264, + "low_lr": 1.3989473684210528e-05, + "step": 571 + }, + { + "epoch": 1.5016436554898094, + "high_lr": 0.0006994736842105264, + "low_lr": 1.3989473684210528e-05, + "step": 571 + }, + { + "epoch": 1.5016436554898094, + "high_lr": 0.0006994736842105264, + "low_lr": 1.3989473684210528e-05, + "step": 571 + }, + { + "epoch": 1.5042735042735043, + "grad_norm": 1.042982578277588, + "learning_rate": 0.0006989473684210527, + "loss": 1.441, + "step": 572 + }, + { + "epoch": 1.5042735042735043, + "high_lr": 0.0006989473684210527, + "low_lr": 1.3978947368421054e-05, + "step": 572 + }, + { + "epoch": 1.5042735042735043, + "high_lr": 0.0006989473684210527, + "low_lr": 1.3978947368421054e-05, + "step": 572 + }, + { + "epoch": 1.5042735042735043, + "high_lr": 0.0006989473684210527, + "low_lr": 1.3978947368421054e-05, + "step": 572 + }, + { + "epoch": 1.5042735042735043, + "high_lr": 0.0006989473684210527, + "low_lr": 1.3978947368421054e-05, + "step": 572 + }, + { + "epoch": 1.5042735042735043, + "high_lr": 0.0006989473684210527, + "low_lr": 1.3978947368421054e-05, + "step": 572 + }, + { + "epoch": 1.5042735042735043, + "high_lr": 0.0006989473684210527, + "low_lr": 1.3978947368421054e-05, + "step": 572 + }, + { + "epoch": 1.5042735042735043, + "high_lr": 0.0006989473684210527, + "low_lr": 1.3978947368421054e-05, + "step": 572 + }, + { + "epoch": 1.5042735042735043, + "high_lr": 0.0006989473684210527, + "low_lr": 1.3978947368421054e-05, + "step": 572 + }, + { + "epoch": 1.5069033530571994, + "grad_norm": 1.1798406839370728, + "learning_rate": 0.000698421052631579, + "loss": 1.478, + "step": 573 + }, + { + "epoch": 1.5069033530571994, + "high_lr": 0.000698421052631579, + "low_lr": 1.396842105263158e-05, + "step": 573 + }, + { + "epoch": 1.5069033530571994, + "high_lr": 0.000698421052631579, + "low_lr": 1.396842105263158e-05, + "step": 573 + }, + { + "epoch": 1.5069033530571994, + "high_lr": 0.000698421052631579, + "low_lr": 1.396842105263158e-05, + "step": 573 + }, + { + "epoch": 1.5069033530571994, + "high_lr": 0.000698421052631579, + "low_lr": 1.396842105263158e-05, + "step": 573 + }, + { + "epoch": 1.5069033530571994, + "high_lr": 0.000698421052631579, + "low_lr": 1.396842105263158e-05, + "step": 573 + }, + { + "epoch": 1.5069033530571994, + "high_lr": 0.000698421052631579, + "low_lr": 1.396842105263158e-05, + "step": 573 + }, + { + "epoch": 1.5069033530571994, + "high_lr": 0.000698421052631579, + "low_lr": 1.396842105263158e-05, + "step": 573 + }, + { + "epoch": 1.5069033530571994, + "high_lr": 0.000698421052631579, + "low_lr": 1.396842105263158e-05, + "step": 573 + }, + { + "epoch": 1.509533201840894, + "grad_norm": 1.2408303022384644, + "learning_rate": 0.0006978947368421052, + "loss": 1.4939, + "step": 574 + }, + { + "epoch": 1.509533201840894, + "high_lr": 0.0006978947368421052, + "low_lr": 1.3957894736842105e-05, + "step": 574 + }, + { + "epoch": 1.509533201840894, + "high_lr": 0.0006978947368421052, + "low_lr": 1.3957894736842105e-05, + "step": 574 + }, + { + "epoch": 1.509533201840894, + "high_lr": 0.0006978947368421052, + "low_lr": 1.3957894736842105e-05, + "step": 574 + }, + { + "epoch": 1.509533201840894, + "high_lr": 0.0006978947368421052, + "low_lr": 1.3957894736842105e-05, + "step": 574 + }, + { + "epoch": 1.509533201840894, + "high_lr": 0.0006978947368421052, + "low_lr": 1.3957894736842105e-05, + "step": 574 + }, + { + "epoch": 1.509533201840894, + "high_lr": 0.0006978947368421052, + "low_lr": 1.3957894736842105e-05, + "step": 574 + }, + { + "epoch": 1.509533201840894, + "high_lr": 0.0006978947368421052, + "low_lr": 1.3957894736842105e-05, + "step": 574 + }, + { + "epoch": 1.509533201840894, + "high_lr": 0.0006978947368421052, + "low_lr": 1.3957894736842105e-05, + "step": 574 + }, + { + "epoch": 1.5121630506245891, + "grad_norm": 1.1613993644714355, + "learning_rate": 0.0006973684210526315, + "loss": 1.4693, + "step": 575 + }, + { + "epoch": 1.5121630506245891, + "high_lr": 0.0006973684210526315, + "low_lr": 1.3947368421052631e-05, + "step": 575 + }, + { + "epoch": 1.5121630506245891, + "high_lr": 0.0006973684210526315, + "low_lr": 1.3947368421052631e-05, + "step": 575 + }, + { + "epoch": 1.5121630506245891, + "high_lr": 0.0006973684210526315, + "low_lr": 1.3947368421052631e-05, + "step": 575 + }, + { + "epoch": 1.5121630506245891, + "high_lr": 0.0006973684210526315, + "low_lr": 1.3947368421052631e-05, + "step": 575 + }, + { + "epoch": 1.5121630506245891, + "high_lr": 0.0006973684210526315, + "low_lr": 1.3947368421052631e-05, + "step": 575 + }, + { + "epoch": 1.5121630506245891, + "high_lr": 0.0006973684210526315, + "low_lr": 1.3947368421052631e-05, + "step": 575 + }, + { + "epoch": 1.5121630506245891, + "high_lr": 0.0006973684210526315, + "low_lr": 1.3947368421052631e-05, + "step": 575 + }, + { + "epoch": 1.5121630506245891, + "high_lr": 0.0006973684210526315, + "low_lr": 1.3947368421052631e-05, + "step": 575 + }, + { + "epoch": 1.514792899408284, + "grad_norm": 1.0958755016326904, + "learning_rate": 0.0006968421052631579, + "loss": 1.4779, + "step": 576 + }, + { + "epoch": 1.514792899408284, + "high_lr": 0.0006968421052631579, + "low_lr": 1.393684210526316e-05, + "step": 576 + }, + { + "epoch": 1.514792899408284, + "high_lr": 0.0006968421052631579, + "low_lr": 1.393684210526316e-05, + "step": 576 + }, + { + "epoch": 1.514792899408284, + "high_lr": 0.0006968421052631579, + "low_lr": 1.393684210526316e-05, + "step": 576 + }, + { + "epoch": 1.514792899408284, + "high_lr": 0.0006968421052631579, + "low_lr": 1.393684210526316e-05, + "step": 576 + }, + { + "epoch": 1.514792899408284, + "high_lr": 0.0006968421052631579, + "low_lr": 1.393684210526316e-05, + "step": 576 + }, + { + "epoch": 1.514792899408284, + "high_lr": 0.0006968421052631579, + "low_lr": 1.393684210526316e-05, + "step": 576 + }, + { + "epoch": 1.514792899408284, + "high_lr": 0.0006968421052631579, + "low_lr": 1.393684210526316e-05, + "step": 576 + }, + { + "epoch": 1.514792899408284, + "high_lr": 0.0006968421052631579, + "low_lr": 1.393684210526316e-05, + "step": 576 + }, + { + "epoch": 1.5174227481919789, + "grad_norm": 1.1317672729492188, + "learning_rate": 0.0006963157894736842, + "loss": 1.472, + "step": 577 + }, + { + "epoch": 1.5174227481919789, + "high_lr": 0.0006963157894736842, + "low_lr": 1.3926315789473686e-05, + "step": 577 + }, + { + "epoch": 1.5174227481919789, + "high_lr": 0.0006963157894736842, + "low_lr": 1.3926315789473686e-05, + "step": 577 + }, + { + "epoch": 1.5174227481919789, + "high_lr": 0.0006963157894736842, + "low_lr": 1.3926315789473686e-05, + "step": 577 + }, + { + "epoch": 1.5174227481919789, + "high_lr": 0.0006963157894736842, + "low_lr": 1.3926315789473686e-05, + "step": 577 + }, + { + "epoch": 1.5174227481919789, + "high_lr": 0.0006963157894736842, + "low_lr": 1.3926315789473686e-05, + "step": 577 + }, + { + "epoch": 1.5174227481919789, + "high_lr": 0.0006963157894736842, + "low_lr": 1.3926315789473686e-05, + "step": 577 + }, + { + "epoch": 1.5174227481919789, + "high_lr": 0.0006963157894736842, + "low_lr": 1.3926315789473686e-05, + "step": 577 + }, + { + "epoch": 1.5174227481919789, + "high_lr": 0.0006963157894736842, + "low_lr": 1.3926315789473686e-05, + "step": 577 + }, + { + "epoch": 1.520052596975674, + "grad_norm": 1.1417269706726074, + "learning_rate": 0.0006957894736842105, + "loss": 1.4846, + "step": 578 + }, + { + "epoch": 1.520052596975674, + "high_lr": 0.0006957894736842105, + "low_lr": 1.3915789473684212e-05, + "step": 578 + }, + { + "epoch": 1.520052596975674, + "high_lr": 0.0006957894736842105, + "low_lr": 1.3915789473684212e-05, + "step": 578 + }, + { + "epoch": 1.520052596975674, + "high_lr": 0.0006957894736842105, + "low_lr": 1.3915789473684212e-05, + "step": 578 + }, + { + "epoch": 1.520052596975674, + "high_lr": 0.0006957894736842105, + "low_lr": 1.3915789473684212e-05, + "step": 578 + }, + { + "epoch": 1.520052596975674, + "high_lr": 0.0006957894736842105, + "low_lr": 1.3915789473684212e-05, + "step": 578 + }, + { + "epoch": 1.520052596975674, + "high_lr": 0.0006957894736842105, + "low_lr": 1.3915789473684212e-05, + "step": 578 + }, + { + "epoch": 1.520052596975674, + "high_lr": 0.0006957894736842105, + "low_lr": 1.3915789473684212e-05, + "step": 578 + }, + { + "epoch": 1.520052596975674, + "high_lr": 0.0006957894736842105, + "low_lr": 1.3915789473684212e-05, + "step": 578 + }, + { + "epoch": 1.5226824457593688, + "grad_norm": 4.384186267852783, + "learning_rate": 0.0006952631578947368, + "loss": 1.497, + "step": 579 + }, + { + "epoch": 1.5226824457593688, + "high_lr": 0.0006952631578947368, + "low_lr": 1.3905263157894737e-05, + "step": 579 + }, + { + "epoch": 1.5226824457593688, + "high_lr": 0.0006952631578947368, + "low_lr": 1.3905263157894737e-05, + "step": 579 + }, + { + "epoch": 1.5226824457593688, + "high_lr": 0.0006952631578947368, + "low_lr": 1.3905263157894737e-05, + "step": 579 + }, + { + "epoch": 1.5226824457593688, + "high_lr": 0.0006952631578947368, + "low_lr": 1.3905263157894737e-05, + "step": 579 + }, + { + "epoch": 1.5226824457593688, + "high_lr": 0.0006952631578947368, + "low_lr": 1.3905263157894737e-05, + "step": 579 + }, + { + "epoch": 1.5226824457593688, + "high_lr": 0.0006952631578947368, + "low_lr": 1.3905263157894737e-05, + "step": 579 + }, + { + "epoch": 1.5226824457593688, + "high_lr": 0.0006952631578947368, + "low_lr": 1.3905263157894737e-05, + "step": 579 + }, + { + "epoch": 1.5226824457593688, + "high_lr": 0.0006952631578947368, + "low_lr": 1.3905263157894737e-05, + "step": 579 + }, + { + "epoch": 1.5253122945430637, + "grad_norm": 1.1421034336090088, + "learning_rate": 0.0006947368421052632, + "loss": 1.5128, + "step": 580 + }, + { + "epoch": 1.5253122945430637, + "high_lr": 0.0006947368421052632, + "low_lr": 1.3894736842105265e-05, + "step": 580 + }, + { + "epoch": 1.5253122945430637, + "high_lr": 0.0006947368421052632, + "low_lr": 1.3894736842105265e-05, + "step": 580 + }, + { + "epoch": 1.5253122945430637, + "high_lr": 0.0006947368421052632, + "low_lr": 1.3894736842105265e-05, + "step": 580 + }, + { + "epoch": 1.5253122945430637, + "high_lr": 0.0006947368421052632, + "low_lr": 1.3894736842105265e-05, + "step": 580 + }, + { + "epoch": 1.5253122945430637, + "high_lr": 0.0006947368421052632, + "low_lr": 1.3894736842105265e-05, + "step": 580 + }, + { + "epoch": 1.5253122945430637, + "high_lr": 0.0006947368421052632, + "low_lr": 1.3894736842105265e-05, + "step": 580 + }, + { + "epoch": 1.5253122945430637, + "high_lr": 0.0006947368421052632, + "low_lr": 1.3894736842105265e-05, + "step": 580 + }, + { + "epoch": 1.5253122945430637, + "high_lr": 0.0006947368421052632, + "low_lr": 1.3894736842105265e-05, + "step": 580 + }, + { + "epoch": 1.5279421433267588, + "grad_norm": 1.2075952291488647, + "learning_rate": 0.0006942105263157895, + "loss": 1.5414, + "step": 581 + }, + { + "epoch": 1.5279421433267588, + "high_lr": 0.0006942105263157895, + "low_lr": 1.3884210526315791e-05, + "step": 581 + }, + { + "epoch": 1.5279421433267588, + "high_lr": 0.0006942105263157895, + "low_lr": 1.3884210526315791e-05, + "step": 581 + }, + { + "epoch": 1.5279421433267588, + "high_lr": 0.0006942105263157895, + "low_lr": 1.3884210526315791e-05, + "step": 581 + }, + { + "epoch": 1.5279421433267588, + "high_lr": 0.0006942105263157895, + "low_lr": 1.3884210526315791e-05, + "step": 581 + }, + { + "epoch": 1.5279421433267588, + "high_lr": 0.0006942105263157895, + "low_lr": 1.3884210526315791e-05, + "step": 581 + }, + { + "epoch": 1.5279421433267588, + "high_lr": 0.0006942105263157895, + "low_lr": 1.3884210526315791e-05, + "step": 581 + }, + { + "epoch": 1.5279421433267588, + "high_lr": 0.0006942105263157895, + "low_lr": 1.3884210526315791e-05, + "step": 581 + }, + { + "epoch": 1.5279421433267588, + "high_lr": 0.0006942105263157895, + "low_lr": 1.3884210526315791e-05, + "step": 581 + }, + { + "epoch": 1.5305719921104537, + "grad_norm": 1.0938867330551147, + "learning_rate": 0.0006936842105263159, + "loss": 1.4475, + "step": 582 + }, + { + "epoch": 1.5305719921104537, + "high_lr": 0.0006936842105263159, + "low_lr": 1.3873684210526317e-05, + "step": 582 + }, + { + "epoch": 1.5305719921104537, + "high_lr": 0.0006936842105263159, + "low_lr": 1.3873684210526317e-05, + "step": 582 + }, + { + "epoch": 1.5305719921104537, + "high_lr": 0.0006936842105263159, + "low_lr": 1.3873684210526317e-05, + "step": 582 + }, + { + "epoch": 1.5305719921104537, + "high_lr": 0.0006936842105263159, + "low_lr": 1.3873684210526317e-05, + "step": 582 + }, + { + "epoch": 1.5305719921104537, + "high_lr": 0.0006936842105263159, + "low_lr": 1.3873684210526317e-05, + "step": 582 + }, + { + "epoch": 1.5305719921104537, + "high_lr": 0.0006936842105263159, + "low_lr": 1.3873684210526317e-05, + "step": 582 + }, + { + "epoch": 1.5305719921104537, + "high_lr": 0.0006936842105263159, + "low_lr": 1.3873684210526317e-05, + "step": 582 + }, + { + "epoch": 1.5305719921104537, + "high_lr": 0.0006936842105263159, + "low_lr": 1.3873684210526317e-05, + "step": 582 + }, + { + "epoch": 1.5332018408941486, + "grad_norm": 1.0829936265945435, + "learning_rate": 0.0006931578947368421, + "loss": 1.436, + "step": 583 + }, + { + "epoch": 1.5332018408941486, + "high_lr": 0.0006931578947368421, + "low_lr": 1.3863157894736842e-05, + "step": 583 + }, + { + "epoch": 1.5332018408941486, + "high_lr": 0.0006931578947368421, + "low_lr": 1.3863157894736842e-05, + "step": 583 + }, + { + "epoch": 1.5332018408941486, + "high_lr": 0.0006931578947368421, + "low_lr": 1.3863157894736842e-05, + "step": 583 + }, + { + "epoch": 1.5332018408941486, + "high_lr": 0.0006931578947368421, + "low_lr": 1.3863157894736842e-05, + "step": 583 + }, + { + "epoch": 1.5332018408941486, + "high_lr": 0.0006931578947368421, + "low_lr": 1.3863157894736842e-05, + "step": 583 + }, + { + "epoch": 1.5332018408941486, + "high_lr": 0.0006931578947368421, + "low_lr": 1.3863157894736842e-05, + "step": 583 + }, + { + "epoch": 1.5332018408941486, + "high_lr": 0.0006931578947368421, + "low_lr": 1.3863157894736842e-05, + "step": 583 + }, + { + "epoch": 1.5332018408941486, + "high_lr": 0.0006931578947368421, + "low_lr": 1.3863157894736842e-05, + "step": 583 + }, + { + "epoch": 1.5358316896778437, + "grad_norm": 1.1421469449996948, + "learning_rate": 0.0006926315789473684, + "loss": 1.4937, + "step": 584 + }, + { + "epoch": 1.5358316896778437, + "high_lr": 0.0006926315789473684, + "low_lr": 1.3852631578947368e-05, + "step": 584 + }, + { + "epoch": 1.5358316896778437, + "high_lr": 0.0006926315789473684, + "low_lr": 1.3852631578947368e-05, + "step": 584 + }, + { + "epoch": 1.5358316896778437, + "high_lr": 0.0006926315789473684, + "low_lr": 1.3852631578947368e-05, + "step": 584 + }, + { + "epoch": 1.5358316896778437, + "high_lr": 0.0006926315789473684, + "low_lr": 1.3852631578947368e-05, + "step": 584 + }, + { + "epoch": 1.5358316896778437, + "high_lr": 0.0006926315789473684, + "low_lr": 1.3852631578947368e-05, + "step": 584 + }, + { + "epoch": 1.5358316896778437, + "high_lr": 0.0006926315789473684, + "low_lr": 1.3852631578947368e-05, + "step": 584 + }, + { + "epoch": 1.5358316896778437, + "high_lr": 0.0006926315789473684, + "low_lr": 1.3852631578947368e-05, + "step": 584 + }, + { + "epoch": 1.5358316896778437, + "high_lr": 0.0006926315789473684, + "low_lr": 1.3852631578947368e-05, + "step": 584 + }, + { + "epoch": 1.5384615384615383, + "grad_norm": 1.1205614805221558, + "learning_rate": 0.0006921052631578948, + "loss": 1.5554, + "step": 585 + }, + { + "epoch": 1.5384615384615383, + "high_lr": 0.0006921052631578948, + "low_lr": 1.3842105263157896e-05, + "step": 585 + }, + { + "epoch": 1.5384615384615383, + "high_lr": 0.0006921052631578948, + "low_lr": 1.3842105263157896e-05, + "step": 585 + }, + { + "epoch": 1.5384615384615383, + "high_lr": 0.0006921052631578948, + "low_lr": 1.3842105263157896e-05, + "step": 585 + }, + { + "epoch": 1.5384615384615383, + "high_lr": 0.0006921052631578948, + "low_lr": 1.3842105263157896e-05, + "step": 585 + }, + { + "epoch": 1.5384615384615383, + "high_lr": 0.0006921052631578948, + "low_lr": 1.3842105263157896e-05, + "step": 585 + }, + { + "epoch": 1.5384615384615383, + "high_lr": 0.0006921052631578948, + "low_lr": 1.3842105263157896e-05, + "step": 585 + }, + { + "epoch": 1.5384615384615383, + "high_lr": 0.0006921052631578948, + "low_lr": 1.3842105263157896e-05, + "step": 585 + }, + { + "epoch": 1.5384615384615383, + "high_lr": 0.0006921052631578948, + "low_lr": 1.3842105263157896e-05, + "step": 585 + }, + { + "epoch": 1.5410913872452334, + "grad_norm": 1.1230958700180054, + "learning_rate": 0.0006915789473684211, + "loss": 1.4586, + "step": 586 + }, + { + "epoch": 1.5410913872452334, + "high_lr": 0.0006915789473684211, + "low_lr": 1.3831578947368423e-05, + "step": 586 + }, + { + "epoch": 1.5410913872452334, + "high_lr": 0.0006915789473684211, + "low_lr": 1.3831578947368423e-05, + "step": 586 + }, + { + "epoch": 1.5410913872452334, + "high_lr": 0.0006915789473684211, + "low_lr": 1.3831578947368423e-05, + "step": 586 + }, + { + "epoch": 1.5410913872452334, + "high_lr": 0.0006915789473684211, + "low_lr": 1.3831578947368423e-05, + "step": 586 + }, + { + "epoch": 1.5410913872452334, + "high_lr": 0.0006915789473684211, + "low_lr": 1.3831578947368423e-05, + "step": 586 + }, + { + "epoch": 1.5410913872452334, + "high_lr": 0.0006915789473684211, + "low_lr": 1.3831578947368423e-05, + "step": 586 + }, + { + "epoch": 1.5410913872452334, + "high_lr": 0.0006915789473684211, + "low_lr": 1.3831578947368423e-05, + "step": 586 + }, + { + "epoch": 1.5410913872452334, + "high_lr": 0.0006915789473684211, + "low_lr": 1.3831578947368423e-05, + "step": 586 + }, + { + "epoch": 1.5437212360289283, + "grad_norm": 1.1113606691360474, + "learning_rate": 0.0006910526315789474, + "loss": 1.4812, + "step": 587 + }, + { + "epoch": 1.5437212360289283, + "high_lr": 0.0006910526315789474, + "low_lr": 1.3821052631578949e-05, + "step": 587 + }, + { + "epoch": 1.5437212360289283, + "high_lr": 0.0006910526315789474, + "low_lr": 1.3821052631578949e-05, + "step": 587 + }, + { + "epoch": 1.5437212360289283, + "high_lr": 0.0006910526315789474, + "low_lr": 1.3821052631578949e-05, + "step": 587 + }, + { + "epoch": 1.5437212360289283, + "high_lr": 0.0006910526315789474, + "low_lr": 1.3821052631578949e-05, + "step": 587 + }, + { + "epoch": 1.5437212360289283, + "high_lr": 0.0006910526315789474, + "low_lr": 1.3821052631578949e-05, + "step": 587 + }, + { + "epoch": 1.5437212360289283, + "high_lr": 0.0006910526315789474, + "low_lr": 1.3821052631578949e-05, + "step": 587 + }, + { + "epoch": 1.5437212360289283, + "high_lr": 0.0006910526315789474, + "low_lr": 1.3821052631578949e-05, + "step": 587 + }, + { + "epoch": 1.5437212360289283, + "high_lr": 0.0006910526315789474, + "low_lr": 1.3821052631578949e-05, + "step": 587 + }, + { + "epoch": 1.5463510848126232, + "grad_norm": 1.0749051570892334, + "learning_rate": 0.0006905263157894737, + "loss": 1.4668, + "step": 588 + }, + { + "epoch": 1.5463510848126232, + "high_lr": 0.0006905263157894737, + "low_lr": 1.3810526315789474e-05, + "step": 588 + }, + { + "epoch": 1.5463510848126232, + "high_lr": 0.0006905263157894737, + "low_lr": 1.3810526315789474e-05, + "step": 588 + }, + { + "epoch": 1.5463510848126232, + "high_lr": 0.0006905263157894737, + "low_lr": 1.3810526315789474e-05, + "step": 588 + }, + { + "epoch": 1.5463510848126232, + "high_lr": 0.0006905263157894737, + "low_lr": 1.3810526315789474e-05, + "step": 588 + }, + { + "epoch": 1.5463510848126232, + "high_lr": 0.0006905263157894737, + "low_lr": 1.3810526315789474e-05, + "step": 588 + }, + { + "epoch": 1.5463510848126232, + "high_lr": 0.0006905263157894737, + "low_lr": 1.3810526315789474e-05, + "step": 588 + }, + { + "epoch": 1.5463510848126232, + "high_lr": 0.0006905263157894737, + "low_lr": 1.3810526315789474e-05, + "step": 588 + }, + { + "epoch": 1.5463510848126232, + "high_lr": 0.0006905263157894737, + "low_lr": 1.3810526315789474e-05, + "step": 588 + }, + { + "epoch": 1.5489809335963183, + "grad_norm": 1.0763673782348633, + "learning_rate": 0.00069, + "loss": 1.5195, + "step": 589 + }, + { + "epoch": 1.5489809335963183, + "high_lr": 0.00069, + "low_lr": 1.38e-05, + "step": 589 + }, + { + "epoch": 1.5489809335963183, + "high_lr": 0.00069, + "low_lr": 1.38e-05, + "step": 589 + }, + { + "epoch": 1.5489809335963183, + "high_lr": 0.00069, + "low_lr": 1.38e-05, + "step": 589 + }, + { + "epoch": 1.5489809335963183, + "high_lr": 0.00069, + "low_lr": 1.38e-05, + "step": 589 + }, + { + "epoch": 1.5489809335963183, + "high_lr": 0.00069, + "low_lr": 1.38e-05, + "step": 589 + }, + { + "epoch": 1.5489809335963183, + "high_lr": 0.00069, + "low_lr": 1.38e-05, + "step": 589 + }, + { + "epoch": 1.5489809335963183, + "high_lr": 0.00069, + "low_lr": 1.38e-05, + "step": 589 + }, + { + "epoch": 1.5489809335963183, + "high_lr": 0.00069, + "low_lr": 1.38e-05, + "step": 589 + }, + { + "epoch": 1.5516107823800132, + "grad_norm": 1.023079752922058, + "learning_rate": 0.0006894736842105264, + "loss": 1.4612, + "step": 590 + }, + { + "epoch": 1.5516107823800132, + "high_lr": 0.0006894736842105264, + "low_lr": 1.3789473684210528e-05, + "step": 590 + }, + { + "epoch": 1.5516107823800132, + "high_lr": 0.0006894736842105264, + "low_lr": 1.3789473684210528e-05, + "step": 590 + }, + { + "epoch": 1.5516107823800132, + "high_lr": 0.0006894736842105264, + "low_lr": 1.3789473684210528e-05, + "step": 590 + }, + { + "epoch": 1.5516107823800132, + "high_lr": 0.0006894736842105264, + "low_lr": 1.3789473684210528e-05, + "step": 590 + }, + { + "epoch": 1.5516107823800132, + "high_lr": 0.0006894736842105264, + "low_lr": 1.3789473684210528e-05, + "step": 590 + }, + { + "epoch": 1.5516107823800132, + "high_lr": 0.0006894736842105264, + "low_lr": 1.3789473684210528e-05, + "step": 590 + }, + { + "epoch": 1.5516107823800132, + "high_lr": 0.0006894736842105264, + "low_lr": 1.3789473684210528e-05, + "step": 590 + }, + { + "epoch": 1.5516107823800132, + "high_lr": 0.0006894736842105264, + "low_lr": 1.3789473684210528e-05, + "step": 590 + }, + { + "epoch": 1.554240631163708, + "grad_norm": 1.1420434713363647, + "learning_rate": 0.0006889473684210526, + "loss": 1.46, + "step": 591 + }, + { + "epoch": 1.554240631163708, + "high_lr": 0.0006889473684210526, + "low_lr": 1.3778947368421054e-05, + "step": 591 + }, + { + "epoch": 1.554240631163708, + "high_lr": 0.0006889473684210526, + "low_lr": 1.3778947368421054e-05, + "step": 591 + }, + { + "epoch": 1.554240631163708, + "high_lr": 0.0006889473684210526, + "low_lr": 1.3778947368421054e-05, + "step": 591 + }, + { + "epoch": 1.554240631163708, + "high_lr": 0.0006889473684210526, + "low_lr": 1.3778947368421054e-05, + "step": 591 + }, + { + "epoch": 1.554240631163708, + "high_lr": 0.0006889473684210526, + "low_lr": 1.3778947368421054e-05, + "step": 591 + }, + { + "epoch": 1.554240631163708, + "high_lr": 0.0006889473684210526, + "low_lr": 1.3778947368421054e-05, + "step": 591 + }, + { + "epoch": 1.554240631163708, + "high_lr": 0.0006889473684210526, + "low_lr": 1.3778947368421054e-05, + "step": 591 + }, + { + "epoch": 1.554240631163708, + "high_lr": 0.0006889473684210526, + "low_lr": 1.3778947368421054e-05, + "step": 591 + }, + { + "epoch": 1.5568704799474031, + "grad_norm": 1.221968412399292, + "learning_rate": 0.0006884210526315789, + "loss": 1.4592, + "step": 592 + }, + { + "epoch": 1.5568704799474031, + "high_lr": 0.0006884210526315789, + "low_lr": 1.3768421052631579e-05, + "step": 592 + }, + { + "epoch": 1.5568704799474031, + "high_lr": 0.0006884210526315789, + "low_lr": 1.3768421052631579e-05, + "step": 592 + }, + { + "epoch": 1.5568704799474031, + "high_lr": 0.0006884210526315789, + "low_lr": 1.3768421052631579e-05, + "step": 592 + }, + { + "epoch": 1.5568704799474031, + "high_lr": 0.0006884210526315789, + "low_lr": 1.3768421052631579e-05, + "step": 592 + }, + { + "epoch": 1.5568704799474031, + "high_lr": 0.0006884210526315789, + "low_lr": 1.3768421052631579e-05, + "step": 592 + }, + { + "epoch": 1.5568704799474031, + "high_lr": 0.0006884210526315789, + "low_lr": 1.3768421052631579e-05, + "step": 592 + }, + { + "epoch": 1.5568704799474031, + "high_lr": 0.0006884210526315789, + "low_lr": 1.3768421052631579e-05, + "step": 592 + }, + { + "epoch": 1.5568704799474031, + "high_lr": 0.0006884210526315789, + "low_lr": 1.3768421052631579e-05, + "step": 592 + }, + { + "epoch": 1.559500328731098, + "grad_norm": 1.0707217454910278, + "learning_rate": 0.0006878947368421052, + "loss": 1.5016, + "step": 593 + }, + { + "epoch": 1.559500328731098, + "high_lr": 0.0006878947368421052, + "low_lr": 1.3757894736842105e-05, + "step": 593 + }, + { + "epoch": 1.559500328731098, + "high_lr": 0.0006878947368421052, + "low_lr": 1.3757894736842105e-05, + "step": 593 + }, + { + "epoch": 1.559500328731098, + "high_lr": 0.0006878947368421052, + "low_lr": 1.3757894736842105e-05, + "step": 593 + }, + { + "epoch": 1.559500328731098, + "high_lr": 0.0006878947368421052, + "low_lr": 1.3757894736842105e-05, + "step": 593 + }, + { + "epoch": 1.559500328731098, + "high_lr": 0.0006878947368421052, + "low_lr": 1.3757894736842105e-05, + "step": 593 + }, + { + "epoch": 1.559500328731098, + "high_lr": 0.0006878947368421052, + "low_lr": 1.3757894736842105e-05, + "step": 593 + }, + { + "epoch": 1.559500328731098, + "high_lr": 0.0006878947368421052, + "low_lr": 1.3757894736842105e-05, + "step": 593 + }, + { + "epoch": 1.559500328731098, + "high_lr": 0.0006878947368421052, + "low_lr": 1.3757894736842105e-05, + "step": 593 + }, + { + "epoch": 1.5621301775147929, + "grad_norm": 1.0185551643371582, + "learning_rate": 0.0006873684210526316, + "loss": 1.4795, + "step": 594 + }, + { + "epoch": 1.5621301775147929, + "high_lr": 0.0006873684210526316, + "low_lr": 1.3747368421052633e-05, + "step": 594 + }, + { + "epoch": 1.5621301775147929, + "high_lr": 0.0006873684210526316, + "low_lr": 1.3747368421052633e-05, + "step": 594 + }, + { + "epoch": 1.5621301775147929, + "high_lr": 0.0006873684210526316, + "low_lr": 1.3747368421052633e-05, + "step": 594 + }, + { + "epoch": 1.5621301775147929, + "high_lr": 0.0006873684210526316, + "low_lr": 1.3747368421052633e-05, + "step": 594 + }, + { + "epoch": 1.5621301775147929, + "high_lr": 0.0006873684210526316, + "low_lr": 1.3747368421052633e-05, + "step": 594 + }, + { + "epoch": 1.5621301775147929, + "high_lr": 0.0006873684210526316, + "low_lr": 1.3747368421052633e-05, + "step": 594 + }, + { + "epoch": 1.5621301775147929, + "high_lr": 0.0006873684210526316, + "low_lr": 1.3747368421052633e-05, + "step": 594 + }, + { + "epoch": 1.5621301775147929, + "high_lr": 0.0006873684210526316, + "low_lr": 1.3747368421052633e-05, + "step": 594 + }, + { + "epoch": 1.564760026298488, + "grad_norm": 1.0264899730682373, + "learning_rate": 0.0006868421052631579, + "loss": 1.488, + "step": 595 + }, + { + "epoch": 1.564760026298488, + "high_lr": 0.0006868421052631579, + "low_lr": 1.373684210526316e-05, + "step": 595 + }, + { + "epoch": 1.564760026298488, + "high_lr": 0.0006868421052631579, + "low_lr": 1.373684210526316e-05, + "step": 595 + }, + { + "epoch": 1.564760026298488, + "high_lr": 0.0006868421052631579, + "low_lr": 1.373684210526316e-05, + "step": 595 + }, + { + "epoch": 1.564760026298488, + "high_lr": 0.0006868421052631579, + "low_lr": 1.373684210526316e-05, + "step": 595 + }, + { + "epoch": 1.564760026298488, + "high_lr": 0.0006868421052631579, + "low_lr": 1.373684210526316e-05, + "step": 595 + }, + { + "epoch": 1.564760026298488, + "high_lr": 0.0006868421052631579, + "low_lr": 1.373684210526316e-05, + "step": 595 + }, + { + "epoch": 1.564760026298488, + "high_lr": 0.0006868421052631579, + "low_lr": 1.373684210526316e-05, + "step": 595 + }, + { + "epoch": 1.564760026298488, + "high_lr": 0.0006868421052631579, + "low_lr": 1.373684210526316e-05, + "step": 595 + }, + { + "epoch": 1.5673898750821826, + "grad_norm": 1.1029831171035767, + "learning_rate": 0.0006863157894736842, + "loss": 1.5167, + "step": 596 + }, + { + "epoch": 1.5673898750821826, + "high_lr": 0.0006863157894736842, + "low_lr": 1.3726315789473686e-05, + "step": 596 + }, + { + "epoch": 1.5673898750821826, + "high_lr": 0.0006863157894736842, + "low_lr": 1.3726315789473686e-05, + "step": 596 + }, + { + "epoch": 1.5673898750821826, + "high_lr": 0.0006863157894736842, + "low_lr": 1.3726315789473686e-05, + "step": 596 + }, + { + "epoch": 1.5673898750821826, + "high_lr": 0.0006863157894736842, + "low_lr": 1.3726315789473686e-05, + "step": 596 + }, + { + "epoch": 1.5673898750821826, + "high_lr": 0.0006863157894736842, + "low_lr": 1.3726315789473686e-05, + "step": 596 + }, + { + "epoch": 1.5673898750821826, + "high_lr": 0.0006863157894736842, + "low_lr": 1.3726315789473686e-05, + "step": 596 + }, + { + "epoch": 1.5673898750821826, + "high_lr": 0.0006863157894736842, + "low_lr": 1.3726315789473686e-05, + "step": 596 + }, + { + "epoch": 1.5673898750821826, + "high_lr": 0.0006863157894736842, + "low_lr": 1.3726315789473686e-05, + "step": 596 + }, + { + "epoch": 1.5700197238658777, + "grad_norm": 1.1172997951507568, + "learning_rate": 0.0006857894736842105, + "loss": 1.4726, + "step": 597 + }, + { + "epoch": 1.5700197238658777, + "high_lr": 0.0006857894736842105, + "low_lr": 1.371578947368421e-05, + "step": 597 + }, + { + "epoch": 1.5700197238658777, + "high_lr": 0.0006857894736842105, + "low_lr": 1.371578947368421e-05, + "step": 597 + }, + { + "epoch": 1.5700197238658777, + "high_lr": 0.0006857894736842105, + "low_lr": 1.371578947368421e-05, + "step": 597 + }, + { + "epoch": 1.5700197238658777, + "high_lr": 0.0006857894736842105, + "low_lr": 1.371578947368421e-05, + "step": 597 + }, + { + "epoch": 1.5700197238658777, + "high_lr": 0.0006857894736842105, + "low_lr": 1.371578947368421e-05, + "step": 597 + }, + { + "epoch": 1.5700197238658777, + "high_lr": 0.0006857894736842105, + "low_lr": 1.371578947368421e-05, + "step": 597 + }, + { + "epoch": 1.5700197238658777, + "high_lr": 0.0006857894736842105, + "low_lr": 1.371578947368421e-05, + "step": 597 + }, + { + "epoch": 1.5700197238658777, + "high_lr": 0.0006857894736842105, + "low_lr": 1.371578947368421e-05, + "step": 597 + }, + { + "epoch": 1.5726495726495726, + "grad_norm": 1.0712045431137085, + "learning_rate": 0.0006852631578947368, + "loss": 1.4423, + "step": 598 + }, + { + "epoch": 1.5726495726495726, + "high_lr": 0.0006852631578947368, + "low_lr": 1.3705263157894737e-05, + "step": 598 + }, + { + "epoch": 1.5726495726495726, + "high_lr": 0.0006852631578947368, + "low_lr": 1.3705263157894737e-05, + "step": 598 + }, + { + "epoch": 1.5726495726495726, + "high_lr": 0.0006852631578947368, + "low_lr": 1.3705263157894737e-05, + "step": 598 + }, + { + "epoch": 1.5726495726495726, + "high_lr": 0.0006852631578947368, + "low_lr": 1.3705263157894737e-05, + "step": 598 + }, + { + "epoch": 1.5726495726495726, + "high_lr": 0.0006852631578947368, + "low_lr": 1.3705263157894737e-05, + "step": 598 + }, + { + "epoch": 1.5726495726495726, + "high_lr": 0.0006852631578947368, + "low_lr": 1.3705263157894737e-05, + "step": 598 + }, + { + "epoch": 1.5726495726495726, + "high_lr": 0.0006852631578947368, + "low_lr": 1.3705263157894737e-05, + "step": 598 + }, + { + "epoch": 1.5726495726495726, + "high_lr": 0.0006852631578947368, + "low_lr": 1.3705263157894737e-05, + "step": 598 + }, + { + "epoch": 1.5752794214332675, + "grad_norm": 1.0438041687011719, + "learning_rate": 0.0006847368421052633, + "loss": 1.4476, + "step": 599 + }, + { + "epoch": 1.5752794214332675, + "high_lr": 0.0006847368421052633, + "low_lr": 1.3694736842105265e-05, + "step": 599 + }, + { + "epoch": 1.5752794214332675, + "high_lr": 0.0006847368421052633, + "low_lr": 1.3694736842105265e-05, + "step": 599 + }, + { + "epoch": 1.5752794214332675, + "high_lr": 0.0006847368421052633, + "low_lr": 1.3694736842105265e-05, + "step": 599 + }, + { + "epoch": 1.5752794214332675, + "high_lr": 0.0006847368421052633, + "low_lr": 1.3694736842105265e-05, + "step": 599 + }, + { + "epoch": 1.5752794214332675, + "high_lr": 0.0006847368421052633, + "low_lr": 1.3694736842105265e-05, + "step": 599 + }, + { + "epoch": 1.5752794214332675, + "high_lr": 0.0006847368421052633, + "low_lr": 1.3694736842105265e-05, + "step": 599 + }, + { + "epoch": 1.5752794214332675, + "high_lr": 0.0006847368421052633, + "low_lr": 1.3694736842105265e-05, + "step": 599 + }, + { + "epoch": 1.5752794214332675, + "high_lr": 0.0006847368421052633, + "low_lr": 1.3694736842105265e-05, + "step": 599 + }, + { + "epoch": 1.5779092702169626, + "grad_norm": 1.0444962978363037, + "learning_rate": 0.0006842105263157895, + "loss": 1.4121, + "step": 600 + }, + { + "epoch": 1.5779092702169626, + "high_lr": 0.0006842105263157895, + "low_lr": 1.3684210526315791e-05, + "step": 600 + }, + { + "epoch": 1.5779092702169626, + "high_lr": 0.0006842105263157895, + "low_lr": 1.3684210526315791e-05, + "step": 600 + }, + { + "epoch": 1.5779092702169626, + "high_lr": 0.0006842105263157895, + "low_lr": 1.3684210526315791e-05, + "step": 600 + }, + { + "epoch": 1.5779092702169626, + "high_lr": 0.0006842105263157895, + "low_lr": 1.3684210526315791e-05, + "step": 600 + }, + { + "epoch": 1.5779092702169626, + "high_lr": 0.0006842105263157895, + "low_lr": 1.3684210526315791e-05, + "step": 600 + }, + { + "epoch": 1.5779092702169626, + "high_lr": 0.0006842105263157895, + "low_lr": 1.3684210526315791e-05, + "step": 600 + }, + { + "epoch": 1.5779092702169626, + "high_lr": 0.0006842105263157895, + "low_lr": 1.3684210526315791e-05, + "step": 600 + }, + { + "epoch": 1.5779092702169626, + "high_lr": 0.0006842105263157895, + "low_lr": 1.3684210526315791e-05, + "step": 600 + }, + { + "epoch": 1.5805391190006575, + "grad_norm": 1.0622714757919312, + "learning_rate": 0.0006836842105263158, + "loss": 1.5067, + "step": 601 + }, + { + "epoch": 1.5805391190006575, + "high_lr": 0.0006836842105263158, + "low_lr": 1.3673684210526316e-05, + "step": 601 + }, + { + "epoch": 1.5805391190006575, + "high_lr": 0.0006836842105263158, + "low_lr": 1.3673684210526316e-05, + "step": 601 + }, + { + "epoch": 1.5805391190006575, + "high_lr": 0.0006836842105263158, + "low_lr": 1.3673684210526316e-05, + "step": 601 + }, + { + "epoch": 1.5805391190006575, + "high_lr": 0.0006836842105263158, + "low_lr": 1.3673684210526316e-05, + "step": 601 + }, + { + "epoch": 1.5805391190006575, + "high_lr": 0.0006836842105263158, + "low_lr": 1.3673684210526316e-05, + "step": 601 + }, + { + "epoch": 1.5805391190006575, + "high_lr": 0.0006836842105263158, + "low_lr": 1.3673684210526316e-05, + "step": 601 + }, + { + "epoch": 1.5805391190006575, + "high_lr": 0.0006836842105263158, + "low_lr": 1.3673684210526316e-05, + "step": 601 + }, + { + "epoch": 1.5805391190006575, + "high_lr": 0.0006836842105263158, + "low_lr": 1.3673684210526316e-05, + "step": 601 + }, + { + "epoch": 1.5831689677843523, + "grad_norm": 1.0328023433685303, + "learning_rate": 0.0006831578947368421, + "loss": 1.5176, + "step": 602 + }, + { + "epoch": 1.5831689677843523, + "high_lr": 0.0006831578947368421, + "low_lr": 1.3663157894736842e-05, + "step": 602 + }, + { + "epoch": 1.5831689677843523, + "high_lr": 0.0006831578947368421, + "low_lr": 1.3663157894736842e-05, + "step": 602 + }, + { + "epoch": 1.5831689677843523, + "high_lr": 0.0006831578947368421, + "low_lr": 1.3663157894736842e-05, + "step": 602 + }, + { + "epoch": 1.5831689677843523, + "high_lr": 0.0006831578947368421, + "low_lr": 1.3663157894736842e-05, + "step": 602 + }, + { + "epoch": 1.5831689677843523, + "high_lr": 0.0006831578947368421, + "low_lr": 1.3663157894736842e-05, + "step": 602 + }, + { + "epoch": 1.5831689677843523, + "high_lr": 0.0006831578947368421, + "low_lr": 1.3663157894736842e-05, + "step": 602 + }, + { + "epoch": 1.5831689677843523, + "high_lr": 0.0006831578947368421, + "low_lr": 1.3663157894736842e-05, + "step": 602 + }, + { + "epoch": 1.5831689677843523, + "high_lr": 0.0006831578947368421, + "low_lr": 1.3663157894736842e-05, + "step": 602 + }, + { + "epoch": 1.5857988165680474, + "grad_norm": 1.0628708600997925, + "learning_rate": 0.0006826315789473684, + "loss": 1.475, + "step": 603 + }, + { + "epoch": 1.5857988165680474, + "high_lr": 0.0006826315789473684, + "low_lr": 1.3652631578947369e-05, + "step": 603 + }, + { + "epoch": 1.5857988165680474, + "high_lr": 0.0006826315789473684, + "low_lr": 1.3652631578947369e-05, + "step": 603 + }, + { + "epoch": 1.5857988165680474, + "high_lr": 0.0006826315789473684, + "low_lr": 1.3652631578947369e-05, + "step": 603 + }, + { + "epoch": 1.5857988165680474, + "high_lr": 0.0006826315789473684, + "low_lr": 1.3652631578947369e-05, + "step": 603 + }, + { + "epoch": 1.5857988165680474, + "high_lr": 0.0006826315789473684, + "low_lr": 1.3652631578947369e-05, + "step": 603 + }, + { + "epoch": 1.5857988165680474, + "high_lr": 0.0006826315789473684, + "low_lr": 1.3652631578947369e-05, + "step": 603 + }, + { + "epoch": 1.5857988165680474, + "high_lr": 0.0006826315789473684, + "low_lr": 1.3652631578947369e-05, + "step": 603 + }, + { + "epoch": 1.5857988165680474, + "high_lr": 0.0006826315789473684, + "low_lr": 1.3652631578947369e-05, + "step": 603 + }, + { + "epoch": 1.5884286653517423, + "grad_norm": 1.1042441129684448, + "learning_rate": 0.0006821052631578948, + "loss": 1.5409, + "step": 604 + }, + { + "epoch": 1.5884286653517423, + "high_lr": 0.0006821052631578948, + "low_lr": 1.3642105263157897e-05, + "step": 604 + }, + { + "epoch": 1.5884286653517423, + "high_lr": 0.0006821052631578948, + "low_lr": 1.3642105263157897e-05, + "step": 604 + }, + { + "epoch": 1.5884286653517423, + "high_lr": 0.0006821052631578948, + "low_lr": 1.3642105263157897e-05, + "step": 604 + }, + { + "epoch": 1.5884286653517423, + "high_lr": 0.0006821052631578948, + "low_lr": 1.3642105263157897e-05, + "step": 604 + }, + { + "epoch": 1.5884286653517423, + "high_lr": 0.0006821052631578948, + "low_lr": 1.3642105263157897e-05, + "step": 604 + }, + { + "epoch": 1.5884286653517423, + "high_lr": 0.0006821052631578948, + "low_lr": 1.3642105263157897e-05, + "step": 604 + }, + { + "epoch": 1.5884286653517423, + "high_lr": 0.0006821052631578948, + "low_lr": 1.3642105263157897e-05, + "step": 604 + }, + { + "epoch": 1.5884286653517423, + "high_lr": 0.0006821052631578948, + "low_lr": 1.3642105263157897e-05, + "step": 604 + }, + { + "epoch": 1.5910585141354372, + "grad_norm": 1.1029032468795776, + "learning_rate": 0.0006815789473684211, + "loss": 1.4833, + "step": 605 + }, + { + "epoch": 1.5910585141354372, + "high_lr": 0.0006815789473684211, + "low_lr": 1.3631578947368423e-05, + "step": 605 + }, + { + "epoch": 1.5910585141354372, + "high_lr": 0.0006815789473684211, + "low_lr": 1.3631578947368423e-05, + "step": 605 + }, + { + "epoch": 1.5910585141354372, + "high_lr": 0.0006815789473684211, + "low_lr": 1.3631578947368423e-05, + "step": 605 + }, + { + "epoch": 1.5910585141354372, + "high_lr": 0.0006815789473684211, + "low_lr": 1.3631578947368423e-05, + "step": 605 + }, + { + "epoch": 1.5910585141354372, + "high_lr": 0.0006815789473684211, + "low_lr": 1.3631578947368423e-05, + "step": 605 + }, + { + "epoch": 1.5910585141354372, + "high_lr": 0.0006815789473684211, + "low_lr": 1.3631578947368423e-05, + "step": 605 + }, + { + "epoch": 1.5910585141354372, + "high_lr": 0.0006815789473684211, + "low_lr": 1.3631578947368423e-05, + "step": 605 + }, + { + "epoch": 1.5910585141354372, + "high_lr": 0.0006815789473684211, + "low_lr": 1.3631578947368423e-05, + "step": 605 + }, + { + "epoch": 1.5936883629191323, + "grad_norm": 1.0896053314208984, + "learning_rate": 0.0006810526315789474, + "loss": 1.4316, + "step": 606 + }, + { + "epoch": 1.5936883629191323, + "high_lr": 0.0006810526315789474, + "low_lr": 1.3621052631578948e-05, + "step": 606 + }, + { + "epoch": 1.5936883629191323, + "high_lr": 0.0006810526315789474, + "low_lr": 1.3621052631578948e-05, + "step": 606 + }, + { + "epoch": 1.5936883629191323, + "high_lr": 0.0006810526315789474, + "low_lr": 1.3621052631578948e-05, + "step": 606 + }, + { + "epoch": 1.5936883629191323, + "high_lr": 0.0006810526315789474, + "low_lr": 1.3621052631578948e-05, + "step": 606 + }, + { + "epoch": 1.5936883629191323, + "high_lr": 0.0006810526315789474, + "low_lr": 1.3621052631578948e-05, + "step": 606 + }, + { + "epoch": 1.5936883629191323, + "high_lr": 0.0006810526315789474, + "low_lr": 1.3621052631578948e-05, + "step": 606 + }, + { + "epoch": 1.5936883629191323, + "high_lr": 0.0006810526315789474, + "low_lr": 1.3621052631578948e-05, + "step": 606 + }, + { + "epoch": 1.5936883629191323, + "high_lr": 0.0006810526315789474, + "low_lr": 1.3621052631578948e-05, + "step": 606 + }, + { + "epoch": 1.596318211702827, + "grad_norm": 1.2059811353683472, + "learning_rate": 0.0006805263157894737, + "loss": 1.4984, + "step": 607 + }, + { + "epoch": 1.596318211702827, + "high_lr": 0.0006805263157894737, + "low_lr": 1.3610526315789474e-05, + "step": 607 + }, + { + "epoch": 1.596318211702827, + "high_lr": 0.0006805263157894737, + "low_lr": 1.3610526315789474e-05, + "step": 607 + }, + { + "epoch": 1.596318211702827, + "high_lr": 0.0006805263157894737, + "low_lr": 1.3610526315789474e-05, + "step": 607 + }, + { + "epoch": 1.596318211702827, + "high_lr": 0.0006805263157894737, + "low_lr": 1.3610526315789474e-05, + "step": 607 + }, + { + "epoch": 1.596318211702827, + "high_lr": 0.0006805263157894737, + "low_lr": 1.3610526315789474e-05, + "step": 607 + }, + { + "epoch": 1.596318211702827, + "high_lr": 0.0006805263157894737, + "low_lr": 1.3610526315789474e-05, + "step": 607 + }, + { + "epoch": 1.596318211702827, + "high_lr": 0.0006805263157894737, + "low_lr": 1.3610526315789474e-05, + "step": 607 + }, + { + "epoch": 1.596318211702827, + "high_lr": 0.0006805263157894737, + "low_lr": 1.3610526315789474e-05, + "step": 607 + }, + { + "epoch": 1.598948060486522, + "grad_norm": 1.071141242980957, + "learning_rate": 0.00068, + "loss": 1.5504, + "step": 608 + }, + { + "epoch": 1.598948060486522, + "high_lr": 0.00068, + "low_lr": 1.3600000000000002e-05, + "step": 608 + }, + { + "epoch": 1.598948060486522, + "high_lr": 0.00068, + "low_lr": 1.3600000000000002e-05, + "step": 608 + }, + { + "epoch": 1.598948060486522, + "high_lr": 0.00068, + "low_lr": 1.3600000000000002e-05, + "step": 608 + }, + { + "epoch": 1.598948060486522, + "high_lr": 0.00068, + "low_lr": 1.3600000000000002e-05, + "step": 608 + }, + { + "epoch": 1.598948060486522, + "high_lr": 0.00068, + "low_lr": 1.3600000000000002e-05, + "step": 608 + }, + { + "epoch": 1.598948060486522, + "high_lr": 0.00068, + "low_lr": 1.3600000000000002e-05, + "step": 608 + }, + { + "epoch": 1.598948060486522, + "high_lr": 0.00068, + "low_lr": 1.3600000000000002e-05, + "step": 608 + }, + { + "epoch": 1.598948060486522, + "high_lr": 0.00068, + "low_lr": 1.3600000000000002e-05, + "step": 608 + }, + { + "epoch": 1.601577909270217, + "grad_norm": 1.0634510517120361, + "learning_rate": 0.0006794736842105263, + "loss": 1.5281, + "step": 609 + }, + { + "epoch": 1.601577909270217, + "high_lr": 0.0006794736842105263, + "low_lr": 1.3589473684210528e-05, + "step": 609 + }, + { + "epoch": 1.601577909270217, + "high_lr": 0.0006794736842105263, + "low_lr": 1.3589473684210528e-05, + "step": 609 + }, + { + "epoch": 1.601577909270217, + "high_lr": 0.0006794736842105263, + "low_lr": 1.3589473684210528e-05, + "step": 609 + }, + { + "epoch": 1.601577909270217, + "high_lr": 0.0006794736842105263, + "low_lr": 1.3589473684210528e-05, + "step": 609 + }, + { + "epoch": 1.601577909270217, + "high_lr": 0.0006794736842105263, + "low_lr": 1.3589473684210528e-05, + "step": 609 + }, + { + "epoch": 1.601577909270217, + "high_lr": 0.0006794736842105263, + "low_lr": 1.3589473684210528e-05, + "step": 609 + }, + { + "epoch": 1.601577909270217, + "high_lr": 0.0006794736842105263, + "low_lr": 1.3589473684210528e-05, + "step": 609 + }, + { + "epoch": 1.601577909270217, + "high_lr": 0.0006794736842105263, + "low_lr": 1.3589473684210528e-05, + "step": 609 + }, + { + "epoch": 1.6042077580539118, + "grad_norm": 1.0419907569885254, + "learning_rate": 0.0006789473684210526, + "loss": 1.4822, + "step": 610 + }, + { + "epoch": 1.6042077580539118, + "high_lr": 0.0006789473684210526, + "low_lr": 1.3578947368421055e-05, + "step": 610 + }, + { + "epoch": 1.6042077580539118, + "high_lr": 0.0006789473684210526, + "low_lr": 1.3578947368421055e-05, + "step": 610 + }, + { + "epoch": 1.6042077580539118, + "high_lr": 0.0006789473684210526, + "low_lr": 1.3578947368421055e-05, + "step": 610 + }, + { + "epoch": 1.6042077580539118, + "high_lr": 0.0006789473684210526, + "low_lr": 1.3578947368421055e-05, + "step": 610 + }, + { + "epoch": 1.6042077580539118, + "high_lr": 0.0006789473684210526, + "low_lr": 1.3578947368421055e-05, + "step": 610 + }, + { + "epoch": 1.6042077580539118, + "high_lr": 0.0006789473684210526, + "low_lr": 1.3578947368421055e-05, + "step": 610 + }, + { + "epoch": 1.6042077580539118, + "high_lr": 0.0006789473684210526, + "low_lr": 1.3578947368421055e-05, + "step": 610 + }, + { + "epoch": 1.6042077580539118, + "high_lr": 0.0006789473684210526, + "low_lr": 1.3578947368421055e-05, + "step": 610 + }, + { + "epoch": 1.606837606837607, + "grad_norm": 1.1207650899887085, + "learning_rate": 0.0006784210526315789, + "loss": 1.5263, + "step": 611 + }, + { + "epoch": 1.606837606837607, + "high_lr": 0.0006784210526315789, + "low_lr": 1.356842105263158e-05, + "step": 611 + }, + { + "epoch": 1.606837606837607, + "high_lr": 0.0006784210526315789, + "low_lr": 1.356842105263158e-05, + "step": 611 + }, + { + "epoch": 1.606837606837607, + "high_lr": 0.0006784210526315789, + "low_lr": 1.356842105263158e-05, + "step": 611 + }, + { + "epoch": 1.606837606837607, + "high_lr": 0.0006784210526315789, + "low_lr": 1.356842105263158e-05, + "step": 611 + }, + { + "epoch": 1.606837606837607, + "high_lr": 0.0006784210526315789, + "low_lr": 1.356842105263158e-05, + "step": 611 + }, + { + "epoch": 1.606837606837607, + "high_lr": 0.0006784210526315789, + "low_lr": 1.356842105263158e-05, + "step": 611 + }, + { + "epoch": 1.606837606837607, + "high_lr": 0.0006784210526315789, + "low_lr": 1.356842105263158e-05, + "step": 611 + }, + { + "epoch": 1.606837606837607, + "high_lr": 0.0006784210526315789, + "low_lr": 1.356842105263158e-05, + "step": 611 + }, + { + "epoch": 1.6094674556213018, + "grad_norm": 1.0400859117507935, + "learning_rate": 0.0006778947368421052, + "loss": 1.4753, + "step": 612 + }, + { + "epoch": 1.6094674556213018, + "high_lr": 0.0006778947368421052, + "low_lr": 1.3557894736842106e-05, + "step": 612 + }, + { + "epoch": 1.6094674556213018, + "high_lr": 0.0006778947368421052, + "low_lr": 1.3557894736842106e-05, + "step": 612 + }, + { + "epoch": 1.6094674556213018, + "high_lr": 0.0006778947368421052, + "low_lr": 1.3557894736842106e-05, + "step": 612 + }, + { + "epoch": 1.6094674556213018, + "high_lr": 0.0006778947368421052, + "low_lr": 1.3557894736842106e-05, + "step": 612 + }, + { + "epoch": 1.6094674556213018, + "high_lr": 0.0006778947368421052, + "low_lr": 1.3557894736842106e-05, + "step": 612 + }, + { + "epoch": 1.6094674556213018, + "high_lr": 0.0006778947368421052, + "low_lr": 1.3557894736842106e-05, + "step": 612 + }, + { + "epoch": 1.6094674556213018, + "high_lr": 0.0006778947368421052, + "low_lr": 1.3557894736842106e-05, + "step": 612 + }, + { + "epoch": 1.6094674556213018, + "high_lr": 0.0006778947368421052, + "low_lr": 1.3557894736842106e-05, + "step": 612 + }, + { + "epoch": 1.6120973044049967, + "grad_norm": 1.067070484161377, + "learning_rate": 0.0006773684210526316, + "loss": 1.5082, + "step": 613 + }, + { + "epoch": 1.6120973044049967, + "high_lr": 0.0006773684210526316, + "low_lr": 1.3547368421052634e-05, + "step": 613 + }, + { + "epoch": 1.6120973044049967, + "high_lr": 0.0006773684210526316, + "low_lr": 1.3547368421052634e-05, + "step": 613 + }, + { + "epoch": 1.6120973044049967, + "high_lr": 0.0006773684210526316, + "low_lr": 1.3547368421052634e-05, + "step": 613 + }, + { + "epoch": 1.6120973044049967, + "high_lr": 0.0006773684210526316, + "low_lr": 1.3547368421052634e-05, + "step": 613 + }, + { + "epoch": 1.6120973044049967, + "high_lr": 0.0006773684210526316, + "low_lr": 1.3547368421052634e-05, + "step": 613 + }, + { + "epoch": 1.6120973044049967, + "high_lr": 0.0006773684210526316, + "low_lr": 1.3547368421052634e-05, + "step": 613 + }, + { + "epoch": 1.6120973044049967, + "high_lr": 0.0006773684210526316, + "low_lr": 1.3547368421052634e-05, + "step": 613 + }, + { + "epoch": 1.6120973044049967, + "high_lr": 0.0006773684210526316, + "low_lr": 1.3547368421052634e-05, + "step": 613 + }, + { + "epoch": 1.6147271531886918, + "grad_norm": 1.1833747625350952, + "learning_rate": 0.0006768421052631579, + "loss": 1.511, + "step": 614 + }, + { + "epoch": 1.6147271531886918, + "high_lr": 0.0006768421052631579, + "low_lr": 1.353684210526316e-05, + "step": 614 + }, + { + "epoch": 1.6147271531886918, + "high_lr": 0.0006768421052631579, + "low_lr": 1.353684210526316e-05, + "step": 614 + }, + { + "epoch": 1.6147271531886918, + "high_lr": 0.0006768421052631579, + "low_lr": 1.353684210526316e-05, + "step": 614 + }, + { + "epoch": 1.6147271531886918, + "high_lr": 0.0006768421052631579, + "low_lr": 1.353684210526316e-05, + "step": 614 + }, + { + "epoch": 1.6147271531886918, + "high_lr": 0.0006768421052631579, + "low_lr": 1.353684210526316e-05, + "step": 614 + }, + { + "epoch": 1.6147271531886918, + "high_lr": 0.0006768421052631579, + "low_lr": 1.353684210526316e-05, + "step": 614 + }, + { + "epoch": 1.6147271531886918, + "high_lr": 0.0006768421052631579, + "low_lr": 1.353684210526316e-05, + "step": 614 + }, + { + "epoch": 1.6147271531886918, + "high_lr": 0.0006768421052631579, + "low_lr": 1.353684210526316e-05, + "step": 614 + }, + { + "epoch": 1.6173570019723866, + "grad_norm": 1.060511589050293, + "learning_rate": 0.0006763157894736843, + "loss": 1.4908, + "step": 615 + }, + { + "epoch": 1.6173570019723866, + "high_lr": 0.0006763157894736843, + "low_lr": 1.3526315789473685e-05, + "step": 615 + }, + { + "epoch": 1.6173570019723866, + "high_lr": 0.0006763157894736843, + "low_lr": 1.3526315789473685e-05, + "step": 615 + }, + { + "epoch": 1.6173570019723866, + "high_lr": 0.0006763157894736843, + "low_lr": 1.3526315789473685e-05, + "step": 615 + }, + { + "epoch": 1.6173570019723866, + "high_lr": 0.0006763157894736843, + "low_lr": 1.3526315789473685e-05, + "step": 615 + }, + { + "epoch": 1.6173570019723866, + "high_lr": 0.0006763157894736843, + "low_lr": 1.3526315789473685e-05, + "step": 615 + }, + { + "epoch": 1.6173570019723866, + "high_lr": 0.0006763157894736843, + "low_lr": 1.3526315789473685e-05, + "step": 615 + }, + { + "epoch": 1.6173570019723866, + "high_lr": 0.0006763157894736843, + "low_lr": 1.3526315789473685e-05, + "step": 615 + }, + { + "epoch": 1.6173570019723866, + "high_lr": 0.0006763157894736843, + "low_lr": 1.3526315789473685e-05, + "step": 615 + }, + { + "epoch": 1.6199868507560815, + "grad_norm": 1.0827206373214722, + "learning_rate": 0.0006757894736842106, + "loss": 1.4658, + "step": 616 + }, + { + "epoch": 1.6199868507560815, + "high_lr": 0.0006757894736842106, + "low_lr": 1.3515789473684211e-05, + "step": 616 + }, + { + "epoch": 1.6199868507560815, + "high_lr": 0.0006757894736842106, + "low_lr": 1.3515789473684211e-05, + "step": 616 + }, + { + "epoch": 1.6199868507560815, + "high_lr": 0.0006757894736842106, + "low_lr": 1.3515789473684211e-05, + "step": 616 + }, + { + "epoch": 1.6199868507560815, + "high_lr": 0.0006757894736842106, + "low_lr": 1.3515789473684211e-05, + "step": 616 + }, + { + "epoch": 1.6199868507560815, + "high_lr": 0.0006757894736842106, + "low_lr": 1.3515789473684211e-05, + "step": 616 + }, + { + "epoch": 1.6199868507560815, + "high_lr": 0.0006757894736842106, + "low_lr": 1.3515789473684211e-05, + "step": 616 + }, + { + "epoch": 1.6199868507560815, + "high_lr": 0.0006757894736842106, + "low_lr": 1.3515789473684211e-05, + "step": 616 + }, + { + "epoch": 1.6199868507560815, + "high_lr": 0.0006757894736842106, + "low_lr": 1.3515789473684211e-05, + "step": 616 + }, + { + "epoch": 1.6226166995397766, + "grad_norm": 1.04438316822052, + "learning_rate": 0.0006752631578947368, + "loss": 1.4972, + "step": 617 + }, + { + "epoch": 1.6226166995397766, + "high_lr": 0.0006752631578947368, + "low_lr": 1.3505263157894737e-05, + "step": 617 + }, + { + "epoch": 1.6226166995397766, + "high_lr": 0.0006752631578947368, + "low_lr": 1.3505263157894737e-05, + "step": 617 + }, + { + "epoch": 1.6226166995397766, + "high_lr": 0.0006752631578947368, + "low_lr": 1.3505263157894737e-05, + "step": 617 + }, + { + "epoch": 1.6226166995397766, + "high_lr": 0.0006752631578947368, + "low_lr": 1.3505263157894737e-05, + "step": 617 + }, + { + "epoch": 1.6226166995397766, + "high_lr": 0.0006752631578947368, + "low_lr": 1.3505263157894737e-05, + "step": 617 + }, + { + "epoch": 1.6226166995397766, + "high_lr": 0.0006752631578947368, + "low_lr": 1.3505263157894737e-05, + "step": 617 + }, + { + "epoch": 1.6226166995397766, + "high_lr": 0.0006752631578947368, + "low_lr": 1.3505263157894737e-05, + "step": 617 + }, + { + "epoch": 1.6226166995397766, + "high_lr": 0.0006752631578947368, + "low_lr": 1.3505263157894737e-05, + "step": 617 + }, + { + "epoch": 1.6252465483234713, + "grad_norm": 1.1282044649124146, + "learning_rate": 0.0006747368421052632, + "loss": 1.5409, + "step": 618 + }, + { + "epoch": 1.6252465483234713, + "high_lr": 0.0006747368421052632, + "low_lr": 1.3494736842105265e-05, + "step": 618 + }, + { + "epoch": 1.6252465483234713, + "high_lr": 0.0006747368421052632, + "low_lr": 1.3494736842105265e-05, + "step": 618 + }, + { + "epoch": 1.6252465483234713, + "high_lr": 0.0006747368421052632, + "low_lr": 1.3494736842105265e-05, + "step": 618 + }, + { + "epoch": 1.6252465483234713, + "high_lr": 0.0006747368421052632, + "low_lr": 1.3494736842105265e-05, + "step": 618 + }, + { + "epoch": 1.6252465483234713, + "high_lr": 0.0006747368421052632, + "low_lr": 1.3494736842105265e-05, + "step": 618 + }, + { + "epoch": 1.6252465483234713, + "high_lr": 0.0006747368421052632, + "low_lr": 1.3494736842105265e-05, + "step": 618 + }, + { + "epoch": 1.6252465483234713, + "high_lr": 0.0006747368421052632, + "low_lr": 1.3494736842105265e-05, + "step": 618 + }, + { + "epoch": 1.6252465483234713, + "high_lr": 0.0006747368421052632, + "low_lr": 1.3494736842105265e-05, + "step": 618 + }, + { + "epoch": 1.6278763971071664, + "grad_norm": 1.096423864364624, + "learning_rate": 0.0006742105263157895, + "loss": 1.4574, + "step": 619 + }, + { + "epoch": 1.6278763971071664, + "high_lr": 0.0006742105263157895, + "low_lr": 1.3484210526315792e-05, + "step": 619 + }, + { + "epoch": 1.6278763971071664, + "high_lr": 0.0006742105263157895, + "low_lr": 1.3484210526315792e-05, + "step": 619 + }, + { + "epoch": 1.6278763971071664, + "high_lr": 0.0006742105263157895, + "low_lr": 1.3484210526315792e-05, + "step": 619 + }, + { + "epoch": 1.6278763971071664, + "high_lr": 0.0006742105263157895, + "low_lr": 1.3484210526315792e-05, + "step": 619 + }, + { + "epoch": 1.6278763971071664, + "high_lr": 0.0006742105263157895, + "low_lr": 1.3484210526315792e-05, + "step": 619 + }, + { + "epoch": 1.6278763971071664, + "high_lr": 0.0006742105263157895, + "low_lr": 1.3484210526315792e-05, + "step": 619 + }, + { + "epoch": 1.6278763971071664, + "high_lr": 0.0006742105263157895, + "low_lr": 1.3484210526315792e-05, + "step": 619 + }, + { + "epoch": 1.6278763971071664, + "high_lr": 0.0006742105263157895, + "low_lr": 1.3484210526315792e-05, + "step": 619 + }, + { + "epoch": 1.6305062458908612, + "grad_norm": 1.0992679595947266, + "learning_rate": 0.0006736842105263158, + "loss": 1.4983, + "step": 620 + }, + { + "epoch": 1.6305062458908612, + "high_lr": 0.0006736842105263158, + "low_lr": 1.3473684210526316e-05, + "step": 620 + }, + { + "epoch": 1.6305062458908612, + "high_lr": 0.0006736842105263158, + "low_lr": 1.3473684210526316e-05, + "step": 620 + }, + { + "epoch": 1.6305062458908612, + "high_lr": 0.0006736842105263158, + "low_lr": 1.3473684210526316e-05, + "step": 620 + }, + { + "epoch": 1.6305062458908612, + "high_lr": 0.0006736842105263158, + "low_lr": 1.3473684210526316e-05, + "step": 620 + }, + { + "epoch": 1.6305062458908612, + "high_lr": 0.0006736842105263158, + "low_lr": 1.3473684210526316e-05, + "step": 620 + }, + { + "epoch": 1.6305062458908612, + "high_lr": 0.0006736842105263158, + "low_lr": 1.3473684210526316e-05, + "step": 620 + }, + { + "epoch": 1.6305062458908612, + "high_lr": 0.0006736842105263158, + "low_lr": 1.3473684210526316e-05, + "step": 620 + }, + { + "epoch": 1.6305062458908612, + "high_lr": 0.0006736842105263158, + "low_lr": 1.3473684210526316e-05, + "step": 620 + }, + { + "epoch": 1.6331360946745561, + "grad_norm": 1.0997353792190552, + "learning_rate": 0.0006731578947368421, + "loss": 1.5037, + "step": 621 + }, + { + "epoch": 1.6331360946745561, + "high_lr": 0.0006731578947368421, + "low_lr": 1.3463157894736842e-05, + "step": 621 + }, + { + "epoch": 1.6331360946745561, + "high_lr": 0.0006731578947368421, + "low_lr": 1.3463157894736842e-05, + "step": 621 + }, + { + "epoch": 1.6331360946745561, + "high_lr": 0.0006731578947368421, + "low_lr": 1.3463157894736842e-05, + "step": 621 + }, + { + "epoch": 1.6331360946745561, + "high_lr": 0.0006731578947368421, + "low_lr": 1.3463157894736842e-05, + "step": 621 + }, + { + "epoch": 1.6331360946745561, + "high_lr": 0.0006731578947368421, + "low_lr": 1.3463157894736842e-05, + "step": 621 + }, + { + "epoch": 1.6331360946745561, + "high_lr": 0.0006731578947368421, + "low_lr": 1.3463157894736842e-05, + "step": 621 + }, + { + "epoch": 1.6331360946745561, + "high_lr": 0.0006731578947368421, + "low_lr": 1.3463157894736842e-05, + "step": 621 + }, + { + "epoch": 1.6331360946745561, + "high_lr": 0.0006731578947368421, + "low_lr": 1.3463157894736842e-05, + "step": 621 + }, + { + "epoch": 1.6357659434582512, + "grad_norm": 1.1235257387161255, + "learning_rate": 0.0006726315789473685, + "loss": 1.5002, + "step": 622 + }, + { + "epoch": 1.6357659434582512, + "high_lr": 0.0006726315789473685, + "low_lr": 1.345263157894737e-05, + "step": 622 + }, + { + "epoch": 1.6357659434582512, + "high_lr": 0.0006726315789473685, + "low_lr": 1.345263157894737e-05, + "step": 622 + }, + { + "epoch": 1.6357659434582512, + "high_lr": 0.0006726315789473685, + "low_lr": 1.345263157894737e-05, + "step": 622 + }, + { + "epoch": 1.6357659434582512, + "high_lr": 0.0006726315789473685, + "low_lr": 1.345263157894737e-05, + "step": 622 + }, + { + "epoch": 1.6357659434582512, + "high_lr": 0.0006726315789473685, + "low_lr": 1.345263157894737e-05, + "step": 622 + }, + { + "epoch": 1.6357659434582512, + "high_lr": 0.0006726315789473685, + "low_lr": 1.345263157894737e-05, + "step": 622 + }, + { + "epoch": 1.6357659434582512, + "high_lr": 0.0006726315789473685, + "low_lr": 1.345263157894737e-05, + "step": 622 + }, + { + "epoch": 1.6357659434582512, + "high_lr": 0.0006726315789473685, + "low_lr": 1.345263157894737e-05, + "step": 622 + }, + { + "epoch": 1.638395792241946, + "grad_norm": 1.065502643585205, + "learning_rate": 0.0006721052631578948, + "loss": 1.4911, + "step": 623 + }, + { + "epoch": 1.638395792241946, + "high_lr": 0.0006721052631578948, + "low_lr": 1.3442105263157897e-05, + "step": 623 + }, + { + "epoch": 1.638395792241946, + "high_lr": 0.0006721052631578948, + "low_lr": 1.3442105263157897e-05, + "step": 623 + }, + { + "epoch": 1.638395792241946, + "high_lr": 0.0006721052631578948, + "low_lr": 1.3442105263157897e-05, + "step": 623 + }, + { + "epoch": 1.638395792241946, + "high_lr": 0.0006721052631578948, + "low_lr": 1.3442105263157897e-05, + "step": 623 + }, + { + "epoch": 1.638395792241946, + "high_lr": 0.0006721052631578948, + "low_lr": 1.3442105263157897e-05, + "step": 623 + }, + { + "epoch": 1.638395792241946, + "high_lr": 0.0006721052631578948, + "low_lr": 1.3442105263157897e-05, + "step": 623 + }, + { + "epoch": 1.638395792241946, + "high_lr": 0.0006721052631578948, + "low_lr": 1.3442105263157897e-05, + "step": 623 + }, + { + "epoch": 1.638395792241946, + "high_lr": 0.0006721052631578948, + "low_lr": 1.3442105263157897e-05, + "step": 623 + }, + { + "epoch": 1.641025641025641, + "grad_norm": 1.0841379165649414, + "learning_rate": 0.0006715789473684211, + "loss": 1.5082, + "step": 624 + }, + { + "epoch": 1.641025641025641, + "high_lr": 0.0006715789473684211, + "low_lr": 1.3431578947368421e-05, + "step": 624 + }, + { + "epoch": 1.641025641025641, + "high_lr": 0.0006715789473684211, + "low_lr": 1.3431578947368421e-05, + "step": 624 + }, + { + "epoch": 1.641025641025641, + "high_lr": 0.0006715789473684211, + "low_lr": 1.3431578947368421e-05, + "step": 624 + }, + { + "epoch": 1.641025641025641, + "high_lr": 0.0006715789473684211, + "low_lr": 1.3431578947368421e-05, + "step": 624 + }, + { + "epoch": 1.641025641025641, + "high_lr": 0.0006715789473684211, + "low_lr": 1.3431578947368421e-05, + "step": 624 + }, + { + "epoch": 1.641025641025641, + "high_lr": 0.0006715789473684211, + "low_lr": 1.3431578947368421e-05, + "step": 624 + }, + { + "epoch": 1.641025641025641, + "high_lr": 0.0006715789473684211, + "low_lr": 1.3431578947368421e-05, + "step": 624 + }, + { + "epoch": 1.641025641025641, + "high_lr": 0.0006715789473684211, + "low_lr": 1.3431578947368421e-05, + "step": 624 + }, + { + "epoch": 1.643655489809336, + "grad_norm": 1.1010593175888062, + "learning_rate": 0.0006710526315789473, + "loss": 1.4974, + "step": 625 + }, + { + "epoch": 1.643655489809336, + "high_lr": 0.0006710526315789473, + "low_lr": 1.3421052631578948e-05, + "step": 625 + }, + { + "epoch": 1.643655489809336, + "high_lr": 0.0006710526315789473, + "low_lr": 1.3421052631578948e-05, + "step": 625 + }, + { + "epoch": 1.643655489809336, + "high_lr": 0.0006710526315789473, + "low_lr": 1.3421052631578948e-05, + "step": 625 + }, + { + "epoch": 1.643655489809336, + "high_lr": 0.0006710526315789473, + "low_lr": 1.3421052631578948e-05, + "step": 625 + }, + { + "epoch": 1.643655489809336, + "high_lr": 0.0006710526315789473, + "low_lr": 1.3421052631578948e-05, + "step": 625 + }, + { + "epoch": 1.643655489809336, + "high_lr": 0.0006710526315789473, + "low_lr": 1.3421052631578948e-05, + "step": 625 + }, + { + "epoch": 1.643655489809336, + "high_lr": 0.0006710526315789473, + "low_lr": 1.3421052631578948e-05, + "step": 625 + }, + { + "epoch": 1.643655489809336, + "high_lr": 0.0006710526315789473, + "low_lr": 1.3421052631578948e-05, + "step": 625 + }, + { + "epoch": 1.646285338593031, + "grad_norm": 1.1630381345748901, + "learning_rate": 0.0006705263157894736, + "loss": 1.5256, + "step": 626 + }, + { + "epoch": 1.646285338593031, + "high_lr": 0.0006705263157894736, + "low_lr": 1.3410526315789474e-05, + "step": 626 + }, + { + "epoch": 1.646285338593031, + "high_lr": 0.0006705263157894736, + "low_lr": 1.3410526315789474e-05, + "step": 626 + }, + { + "epoch": 1.646285338593031, + "high_lr": 0.0006705263157894736, + "low_lr": 1.3410526315789474e-05, + "step": 626 + }, + { + "epoch": 1.646285338593031, + "high_lr": 0.0006705263157894736, + "low_lr": 1.3410526315789474e-05, + "step": 626 + }, + { + "epoch": 1.646285338593031, + "high_lr": 0.0006705263157894736, + "low_lr": 1.3410526315789474e-05, + "step": 626 + }, + { + "epoch": 1.646285338593031, + "high_lr": 0.0006705263157894736, + "low_lr": 1.3410526315789474e-05, + "step": 626 + }, + { + "epoch": 1.646285338593031, + "high_lr": 0.0006705263157894736, + "low_lr": 1.3410526315789474e-05, + "step": 626 + }, + { + "epoch": 1.646285338593031, + "high_lr": 0.0006705263157894736, + "low_lr": 1.3410526315789474e-05, + "step": 626 + }, + { + "epoch": 1.6489151873767258, + "grad_norm": 1.060465693473816, + "learning_rate": 0.00067, + "loss": 1.4869, + "step": 627 + }, + { + "epoch": 1.6489151873767258, + "high_lr": 0.00067, + "low_lr": 1.3400000000000002e-05, + "step": 627 + }, + { + "epoch": 1.6489151873767258, + "high_lr": 0.00067, + "low_lr": 1.3400000000000002e-05, + "step": 627 + }, + { + "epoch": 1.6489151873767258, + "high_lr": 0.00067, + "low_lr": 1.3400000000000002e-05, + "step": 627 + }, + { + "epoch": 1.6489151873767258, + "high_lr": 0.00067, + "low_lr": 1.3400000000000002e-05, + "step": 627 + }, + { + "epoch": 1.6489151873767258, + "high_lr": 0.00067, + "low_lr": 1.3400000000000002e-05, + "step": 627 + }, + { + "epoch": 1.6489151873767258, + "high_lr": 0.00067, + "low_lr": 1.3400000000000002e-05, + "step": 627 + }, + { + "epoch": 1.6489151873767258, + "high_lr": 0.00067, + "low_lr": 1.3400000000000002e-05, + "step": 627 + }, + { + "epoch": 1.6489151873767258, + "high_lr": 0.00067, + "low_lr": 1.3400000000000002e-05, + "step": 627 + }, + { + "epoch": 1.651545036160421, + "grad_norm": 1.1353005170822144, + "learning_rate": 0.0006694736842105263, + "loss": 1.5764, + "step": 628 + }, + { + "epoch": 1.651545036160421, + "high_lr": 0.0006694736842105263, + "low_lr": 1.3389473684210528e-05, + "step": 628 + }, + { + "epoch": 1.651545036160421, + "high_lr": 0.0006694736842105263, + "low_lr": 1.3389473684210528e-05, + "step": 628 + }, + { + "epoch": 1.651545036160421, + "high_lr": 0.0006694736842105263, + "low_lr": 1.3389473684210528e-05, + "step": 628 + }, + { + "epoch": 1.651545036160421, + "high_lr": 0.0006694736842105263, + "low_lr": 1.3389473684210528e-05, + "step": 628 + }, + { + "epoch": 1.651545036160421, + "high_lr": 0.0006694736842105263, + "low_lr": 1.3389473684210528e-05, + "step": 628 + }, + { + "epoch": 1.651545036160421, + "high_lr": 0.0006694736842105263, + "low_lr": 1.3389473684210528e-05, + "step": 628 + }, + { + "epoch": 1.651545036160421, + "high_lr": 0.0006694736842105263, + "low_lr": 1.3389473684210528e-05, + "step": 628 + }, + { + "epoch": 1.651545036160421, + "high_lr": 0.0006694736842105263, + "low_lr": 1.3389473684210528e-05, + "step": 628 + }, + { + "epoch": 1.6541748849441156, + "grad_norm": 1.0829559564590454, + "learning_rate": 0.0006689473684210526, + "loss": 1.472, + "step": 629 + }, + { + "epoch": 1.6541748849441156, + "high_lr": 0.0006689473684210526, + "low_lr": 1.3378947368421053e-05, + "step": 629 + }, + { + "epoch": 1.6541748849441156, + "high_lr": 0.0006689473684210526, + "low_lr": 1.3378947368421053e-05, + "step": 629 + }, + { + "epoch": 1.6541748849441156, + "high_lr": 0.0006689473684210526, + "low_lr": 1.3378947368421053e-05, + "step": 629 + }, + { + "epoch": 1.6541748849441156, + "high_lr": 0.0006689473684210526, + "low_lr": 1.3378947368421053e-05, + "step": 629 + }, + { + "epoch": 1.6541748849441156, + "high_lr": 0.0006689473684210526, + "low_lr": 1.3378947368421053e-05, + "step": 629 + }, + { + "epoch": 1.6541748849441156, + "high_lr": 0.0006689473684210526, + "low_lr": 1.3378947368421053e-05, + "step": 629 + }, + { + "epoch": 1.6541748849441156, + "high_lr": 0.0006689473684210526, + "low_lr": 1.3378947368421053e-05, + "step": 629 + }, + { + "epoch": 1.6541748849441156, + "high_lr": 0.0006689473684210526, + "low_lr": 1.3378947368421053e-05, + "step": 629 + }, + { + "epoch": 1.6568047337278107, + "grad_norm": 1.1032090187072754, + "learning_rate": 0.0006684210526315789, + "loss": 1.5168, + "step": 630 + }, + { + "epoch": 1.6568047337278107, + "high_lr": 0.0006684210526315789, + "low_lr": 1.336842105263158e-05, + "step": 630 + }, + { + "epoch": 1.6568047337278107, + "high_lr": 0.0006684210526315789, + "low_lr": 1.336842105263158e-05, + "step": 630 + }, + { + "epoch": 1.6568047337278107, + "high_lr": 0.0006684210526315789, + "low_lr": 1.336842105263158e-05, + "step": 630 + }, + { + "epoch": 1.6568047337278107, + "high_lr": 0.0006684210526315789, + "low_lr": 1.336842105263158e-05, + "step": 630 + }, + { + "epoch": 1.6568047337278107, + "high_lr": 0.0006684210526315789, + "low_lr": 1.336842105263158e-05, + "step": 630 + }, + { + "epoch": 1.6568047337278107, + "high_lr": 0.0006684210526315789, + "low_lr": 1.336842105263158e-05, + "step": 630 + }, + { + "epoch": 1.6568047337278107, + "high_lr": 0.0006684210526315789, + "low_lr": 1.336842105263158e-05, + "step": 630 + }, + { + "epoch": 1.6568047337278107, + "high_lr": 0.0006684210526315789, + "low_lr": 1.336842105263158e-05, + "step": 630 + }, + { + "epoch": 1.6594345825115056, + "grad_norm": 1.1293106079101562, + "learning_rate": 0.0006678947368421053, + "loss": 1.4934, + "step": 631 + }, + { + "epoch": 1.6594345825115056, + "high_lr": 0.0006678947368421053, + "low_lr": 1.3357894736842106e-05, + "step": 631 + }, + { + "epoch": 1.6594345825115056, + "high_lr": 0.0006678947368421053, + "low_lr": 1.3357894736842106e-05, + "step": 631 + }, + { + "epoch": 1.6594345825115056, + "high_lr": 0.0006678947368421053, + "low_lr": 1.3357894736842106e-05, + "step": 631 + }, + { + "epoch": 1.6594345825115056, + "high_lr": 0.0006678947368421053, + "low_lr": 1.3357894736842106e-05, + "step": 631 + }, + { + "epoch": 1.6594345825115056, + "high_lr": 0.0006678947368421053, + "low_lr": 1.3357894736842106e-05, + "step": 631 + }, + { + "epoch": 1.6594345825115056, + "high_lr": 0.0006678947368421053, + "low_lr": 1.3357894736842106e-05, + "step": 631 + }, + { + "epoch": 1.6594345825115056, + "high_lr": 0.0006678947368421053, + "low_lr": 1.3357894736842106e-05, + "step": 631 + }, + { + "epoch": 1.6594345825115056, + "high_lr": 0.0006678947368421053, + "low_lr": 1.3357894736842106e-05, + "step": 631 + }, + { + "epoch": 1.6620644312952004, + "grad_norm": 1.1231383085250854, + "learning_rate": 0.0006673684210526317, + "loss": 1.4535, + "step": 632 + }, + { + "epoch": 1.6620644312952004, + "high_lr": 0.0006673684210526317, + "low_lr": 1.3347368421052634e-05, + "step": 632 + }, + { + "epoch": 1.6620644312952004, + "high_lr": 0.0006673684210526317, + "low_lr": 1.3347368421052634e-05, + "step": 632 + }, + { + "epoch": 1.6620644312952004, + "high_lr": 0.0006673684210526317, + "low_lr": 1.3347368421052634e-05, + "step": 632 + }, + { + "epoch": 1.6620644312952004, + "high_lr": 0.0006673684210526317, + "low_lr": 1.3347368421052634e-05, + "step": 632 + }, + { + "epoch": 1.6620644312952004, + "high_lr": 0.0006673684210526317, + "low_lr": 1.3347368421052634e-05, + "step": 632 + }, + { + "epoch": 1.6620644312952004, + "high_lr": 0.0006673684210526317, + "low_lr": 1.3347368421052634e-05, + "step": 632 + }, + { + "epoch": 1.6620644312952004, + "high_lr": 0.0006673684210526317, + "low_lr": 1.3347368421052634e-05, + "step": 632 + }, + { + "epoch": 1.6620644312952004, + "high_lr": 0.0006673684210526317, + "low_lr": 1.3347368421052634e-05, + "step": 632 + }, + { + "epoch": 1.6646942800788955, + "grad_norm": 1.0538183450698853, + "learning_rate": 0.000666842105263158, + "loss": 1.4255, + "step": 633 + }, + { + "epoch": 1.6646942800788955, + "high_lr": 0.000666842105263158, + "low_lr": 1.3336842105263158e-05, + "step": 633 + }, + { + "epoch": 1.6646942800788955, + "high_lr": 0.000666842105263158, + "low_lr": 1.3336842105263158e-05, + "step": 633 + }, + { + "epoch": 1.6646942800788955, + "high_lr": 0.000666842105263158, + "low_lr": 1.3336842105263158e-05, + "step": 633 + }, + { + "epoch": 1.6646942800788955, + "high_lr": 0.000666842105263158, + "low_lr": 1.3336842105263158e-05, + "step": 633 + }, + { + "epoch": 1.6646942800788955, + "high_lr": 0.000666842105263158, + "low_lr": 1.3336842105263158e-05, + "step": 633 + }, + { + "epoch": 1.6646942800788955, + "high_lr": 0.000666842105263158, + "low_lr": 1.3336842105263158e-05, + "step": 633 + }, + { + "epoch": 1.6646942800788955, + "high_lr": 0.000666842105263158, + "low_lr": 1.3336842105263158e-05, + "step": 633 + }, + { + "epoch": 1.6646942800788955, + "high_lr": 0.000666842105263158, + "low_lr": 1.3336842105263158e-05, + "step": 633 + }, + { + "epoch": 1.6673241288625904, + "grad_norm": 1.0623528957366943, + "learning_rate": 0.0006663157894736842, + "loss": 1.4342, + "step": 634 + }, + { + "epoch": 1.6673241288625904, + "high_lr": 0.0006663157894736842, + "low_lr": 1.3326315789473685e-05, + "step": 634 + }, + { + "epoch": 1.6673241288625904, + "high_lr": 0.0006663157894736842, + "low_lr": 1.3326315789473685e-05, + "step": 634 + }, + { + "epoch": 1.6673241288625904, + "high_lr": 0.0006663157894736842, + "low_lr": 1.3326315789473685e-05, + "step": 634 + }, + { + "epoch": 1.6673241288625904, + "high_lr": 0.0006663157894736842, + "low_lr": 1.3326315789473685e-05, + "step": 634 + }, + { + "epoch": 1.6673241288625904, + "high_lr": 0.0006663157894736842, + "low_lr": 1.3326315789473685e-05, + "step": 634 + }, + { + "epoch": 1.6673241288625904, + "high_lr": 0.0006663157894736842, + "low_lr": 1.3326315789473685e-05, + "step": 634 + }, + { + "epoch": 1.6673241288625904, + "high_lr": 0.0006663157894736842, + "low_lr": 1.3326315789473685e-05, + "step": 634 + }, + { + "epoch": 1.6673241288625904, + "high_lr": 0.0006663157894736842, + "low_lr": 1.3326315789473685e-05, + "step": 634 + }, + { + "epoch": 1.6699539776462853, + "grad_norm": 1.1783959865570068, + "learning_rate": 0.0006657894736842105, + "loss": 1.525, + "step": 635 + }, + { + "epoch": 1.6699539776462853, + "high_lr": 0.0006657894736842105, + "low_lr": 1.3315789473684211e-05, + "step": 635 + }, + { + "epoch": 1.6699539776462853, + "high_lr": 0.0006657894736842105, + "low_lr": 1.3315789473684211e-05, + "step": 635 + }, + { + "epoch": 1.6699539776462853, + "high_lr": 0.0006657894736842105, + "low_lr": 1.3315789473684211e-05, + "step": 635 + }, + { + "epoch": 1.6699539776462853, + "high_lr": 0.0006657894736842105, + "low_lr": 1.3315789473684211e-05, + "step": 635 + }, + { + "epoch": 1.6699539776462853, + "high_lr": 0.0006657894736842105, + "low_lr": 1.3315789473684211e-05, + "step": 635 + }, + { + "epoch": 1.6699539776462853, + "high_lr": 0.0006657894736842105, + "low_lr": 1.3315789473684211e-05, + "step": 635 + }, + { + "epoch": 1.6699539776462853, + "high_lr": 0.0006657894736842105, + "low_lr": 1.3315789473684211e-05, + "step": 635 + }, + { + "epoch": 1.6699539776462853, + "high_lr": 0.0006657894736842105, + "low_lr": 1.3315789473684211e-05, + "step": 635 + }, + { + "epoch": 1.6725838264299804, + "grad_norm": 1.04253089427948, + "learning_rate": 0.0006652631578947369, + "loss": 1.4717, + "step": 636 + }, + { + "epoch": 1.6725838264299804, + "high_lr": 0.0006652631578947369, + "low_lr": 1.3305263157894739e-05, + "step": 636 + }, + { + "epoch": 1.6725838264299804, + "high_lr": 0.0006652631578947369, + "low_lr": 1.3305263157894739e-05, + "step": 636 + }, + { + "epoch": 1.6725838264299804, + "high_lr": 0.0006652631578947369, + "low_lr": 1.3305263157894739e-05, + "step": 636 + }, + { + "epoch": 1.6725838264299804, + "high_lr": 0.0006652631578947369, + "low_lr": 1.3305263157894739e-05, + "step": 636 + }, + { + "epoch": 1.6725838264299804, + "high_lr": 0.0006652631578947369, + "low_lr": 1.3305263157894739e-05, + "step": 636 + }, + { + "epoch": 1.6725838264299804, + "high_lr": 0.0006652631578947369, + "low_lr": 1.3305263157894739e-05, + "step": 636 + }, + { + "epoch": 1.6725838264299804, + "high_lr": 0.0006652631578947369, + "low_lr": 1.3305263157894739e-05, + "step": 636 + }, + { + "epoch": 1.6725838264299804, + "high_lr": 0.0006652631578947369, + "low_lr": 1.3305263157894739e-05, + "step": 636 + }, + { + "epoch": 1.6752136752136753, + "grad_norm": 1.1287508010864258, + "learning_rate": 0.0006647368421052632, + "loss": 1.4899, + "step": 637 + }, + { + "epoch": 1.6752136752136753, + "high_lr": 0.0006647368421052632, + "low_lr": 1.3294736842105265e-05, + "step": 637 + }, + { + "epoch": 1.6752136752136753, + "high_lr": 0.0006647368421052632, + "low_lr": 1.3294736842105265e-05, + "step": 637 + }, + { + "epoch": 1.6752136752136753, + "high_lr": 0.0006647368421052632, + "low_lr": 1.3294736842105265e-05, + "step": 637 + }, + { + "epoch": 1.6752136752136753, + "high_lr": 0.0006647368421052632, + "low_lr": 1.3294736842105265e-05, + "step": 637 + }, + { + "epoch": 1.6752136752136753, + "high_lr": 0.0006647368421052632, + "low_lr": 1.3294736842105265e-05, + "step": 637 + }, + { + "epoch": 1.6752136752136753, + "high_lr": 0.0006647368421052632, + "low_lr": 1.3294736842105265e-05, + "step": 637 + }, + { + "epoch": 1.6752136752136753, + "high_lr": 0.0006647368421052632, + "low_lr": 1.3294736842105265e-05, + "step": 637 + }, + { + "epoch": 1.6752136752136753, + "high_lr": 0.0006647368421052632, + "low_lr": 1.3294736842105265e-05, + "step": 637 + }, + { + "epoch": 1.6778435239973701, + "grad_norm": 1.3079376220703125, + "learning_rate": 0.0006642105263157895, + "loss": 1.5828, + "step": 638 + }, + { + "epoch": 1.6778435239973701, + "high_lr": 0.0006642105263157895, + "low_lr": 1.328421052631579e-05, + "step": 638 + }, + { + "epoch": 1.6778435239973701, + "high_lr": 0.0006642105263157895, + "low_lr": 1.328421052631579e-05, + "step": 638 + }, + { + "epoch": 1.6778435239973701, + "high_lr": 0.0006642105263157895, + "low_lr": 1.328421052631579e-05, + "step": 638 + }, + { + "epoch": 1.6778435239973701, + "high_lr": 0.0006642105263157895, + "low_lr": 1.328421052631579e-05, + "step": 638 + }, + { + "epoch": 1.6778435239973701, + "high_lr": 0.0006642105263157895, + "low_lr": 1.328421052631579e-05, + "step": 638 + }, + { + "epoch": 1.6778435239973701, + "high_lr": 0.0006642105263157895, + "low_lr": 1.328421052631579e-05, + "step": 638 + }, + { + "epoch": 1.6778435239973701, + "high_lr": 0.0006642105263157895, + "low_lr": 1.328421052631579e-05, + "step": 638 + }, + { + "epoch": 1.6778435239973701, + "high_lr": 0.0006642105263157895, + "low_lr": 1.328421052631579e-05, + "step": 638 + }, + { + "epoch": 1.6804733727810652, + "grad_norm": 1.1009700298309326, + "learning_rate": 0.0006636842105263158, + "loss": 1.4915, + "step": 639 + }, + { + "epoch": 1.6804733727810652, + "high_lr": 0.0006636842105263158, + "low_lr": 1.3273684210526316e-05, + "step": 639 + }, + { + "epoch": 1.6804733727810652, + "high_lr": 0.0006636842105263158, + "low_lr": 1.3273684210526316e-05, + "step": 639 + }, + { + "epoch": 1.6804733727810652, + "high_lr": 0.0006636842105263158, + "low_lr": 1.3273684210526316e-05, + "step": 639 + }, + { + "epoch": 1.6804733727810652, + "high_lr": 0.0006636842105263158, + "low_lr": 1.3273684210526316e-05, + "step": 639 + }, + { + "epoch": 1.6804733727810652, + "high_lr": 0.0006636842105263158, + "low_lr": 1.3273684210526316e-05, + "step": 639 + }, + { + "epoch": 1.6804733727810652, + "high_lr": 0.0006636842105263158, + "low_lr": 1.3273684210526316e-05, + "step": 639 + }, + { + "epoch": 1.6804733727810652, + "high_lr": 0.0006636842105263158, + "low_lr": 1.3273684210526316e-05, + "step": 639 + }, + { + "epoch": 1.6804733727810652, + "high_lr": 0.0006636842105263158, + "low_lr": 1.3273684210526316e-05, + "step": 639 + }, + { + "epoch": 1.6831032215647599, + "grad_norm": 1.0824031829833984, + "learning_rate": 0.0006631578947368421, + "loss": 1.4459, + "step": 640 + }, + { + "epoch": 1.6831032215647599, + "high_lr": 0.0006631578947368421, + "low_lr": 1.3263157894736843e-05, + "step": 640 + }, + { + "epoch": 1.6831032215647599, + "high_lr": 0.0006631578947368421, + "low_lr": 1.3263157894736843e-05, + "step": 640 + }, + { + "epoch": 1.6831032215647599, + "high_lr": 0.0006631578947368421, + "low_lr": 1.3263157894736843e-05, + "step": 640 + }, + { + "epoch": 1.6831032215647599, + "high_lr": 0.0006631578947368421, + "low_lr": 1.3263157894736843e-05, + "step": 640 + }, + { + "epoch": 1.6831032215647599, + "high_lr": 0.0006631578947368421, + "low_lr": 1.3263157894736843e-05, + "step": 640 + }, + { + "epoch": 1.6831032215647599, + "high_lr": 0.0006631578947368421, + "low_lr": 1.3263157894736843e-05, + "step": 640 + }, + { + "epoch": 1.6831032215647599, + "high_lr": 0.0006631578947368421, + "low_lr": 1.3263157894736843e-05, + "step": 640 + }, + { + "epoch": 1.6831032215647599, + "high_lr": 0.0006631578947368421, + "low_lr": 1.3263157894736843e-05, + "step": 640 + }, + { + "epoch": 1.685733070348455, + "grad_norm": 1.137454628944397, + "learning_rate": 0.0006626315789473685, + "loss": 1.4475, + "step": 641 + }, + { + "epoch": 1.685733070348455, + "high_lr": 0.0006626315789473685, + "low_lr": 1.325263157894737e-05, + "step": 641 + }, + { + "epoch": 1.685733070348455, + "high_lr": 0.0006626315789473685, + "low_lr": 1.325263157894737e-05, + "step": 641 + }, + { + "epoch": 1.685733070348455, + "high_lr": 0.0006626315789473685, + "low_lr": 1.325263157894737e-05, + "step": 641 + }, + { + "epoch": 1.685733070348455, + "high_lr": 0.0006626315789473685, + "low_lr": 1.325263157894737e-05, + "step": 641 + }, + { + "epoch": 1.685733070348455, + "high_lr": 0.0006626315789473685, + "low_lr": 1.325263157894737e-05, + "step": 641 + }, + { + "epoch": 1.685733070348455, + "high_lr": 0.0006626315789473685, + "low_lr": 1.325263157894737e-05, + "step": 641 + }, + { + "epoch": 1.685733070348455, + "high_lr": 0.0006626315789473685, + "low_lr": 1.325263157894737e-05, + "step": 641 + }, + { + "epoch": 1.685733070348455, + "high_lr": 0.0006626315789473685, + "low_lr": 1.325263157894737e-05, + "step": 641 + }, + { + "epoch": 1.6883629191321499, + "grad_norm": 1.089472770690918, + "learning_rate": 0.0006621052631578947, + "loss": 1.4748, + "step": 642 + }, + { + "epoch": 1.6883629191321499, + "high_lr": 0.0006621052631578947, + "low_lr": 1.3242105263157895e-05, + "step": 642 + }, + { + "epoch": 1.6883629191321499, + "high_lr": 0.0006621052631578947, + "low_lr": 1.3242105263157895e-05, + "step": 642 + }, + { + "epoch": 1.6883629191321499, + "high_lr": 0.0006621052631578947, + "low_lr": 1.3242105263157895e-05, + "step": 642 + }, + { + "epoch": 1.6883629191321499, + "high_lr": 0.0006621052631578947, + "low_lr": 1.3242105263157895e-05, + "step": 642 + }, + { + "epoch": 1.6883629191321499, + "high_lr": 0.0006621052631578947, + "low_lr": 1.3242105263157895e-05, + "step": 642 + }, + { + "epoch": 1.6883629191321499, + "high_lr": 0.0006621052631578947, + "low_lr": 1.3242105263157895e-05, + "step": 642 + }, + { + "epoch": 1.6883629191321499, + "high_lr": 0.0006621052631578947, + "low_lr": 1.3242105263157895e-05, + "step": 642 + }, + { + "epoch": 1.6883629191321499, + "high_lr": 0.0006621052631578947, + "low_lr": 1.3242105263157895e-05, + "step": 642 + }, + { + "epoch": 1.6909927679158447, + "grad_norm": 1.1632318496704102, + "learning_rate": 0.000661578947368421, + "loss": 1.4587, + "step": 643 + }, + { + "epoch": 1.6909927679158447, + "high_lr": 0.000661578947368421, + "low_lr": 1.3231578947368422e-05, + "step": 643 + }, + { + "epoch": 1.6909927679158447, + "high_lr": 0.000661578947368421, + "low_lr": 1.3231578947368422e-05, + "step": 643 + }, + { + "epoch": 1.6909927679158447, + "high_lr": 0.000661578947368421, + "low_lr": 1.3231578947368422e-05, + "step": 643 + }, + { + "epoch": 1.6909927679158447, + "high_lr": 0.000661578947368421, + "low_lr": 1.3231578947368422e-05, + "step": 643 + }, + { + "epoch": 1.6909927679158447, + "high_lr": 0.000661578947368421, + "low_lr": 1.3231578947368422e-05, + "step": 643 + }, + { + "epoch": 1.6909927679158447, + "high_lr": 0.000661578947368421, + "low_lr": 1.3231578947368422e-05, + "step": 643 + }, + { + "epoch": 1.6909927679158447, + "high_lr": 0.000661578947368421, + "low_lr": 1.3231578947368422e-05, + "step": 643 + }, + { + "epoch": 1.6909927679158447, + "high_lr": 0.000661578947368421, + "low_lr": 1.3231578947368422e-05, + "step": 643 + }, + { + "epoch": 1.6936226166995398, + "grad_norm": 1.1421369314193726, + "learning_rate": 0.0006610526315789473, + "loss": 1.4532, + "step": 644 + }, + { + "epoch": 1.6936226166995398, + "high_lr": 0.0006610526315789473, + "low_lr": 1.3221052631578948e-05, + "step": 644 + }, + { + "epoch": 1.6936226166995398, + "high_lr": 0.0006610526315789473, + "low_lr": 1.3221052631578948e-05, + "step": 644 + }, + { + "epoch": 1.6936226166995398, + "high_lr": 0.0006610526315789473, + "low_lr": 1.3221052631578948e-05, + "step": 644 + }, + { + "epoch": 1.6936226166995398, + "high_lr": 0.0006610526315789473, + "low_lr": 1.3221052631578948e-05, + "step": 644 + }, + { + "epoch": 1.6936226166995398, + "high_lr": 0.0006610526315789473, + "low_lr": 1.3221052631578948e-05, + "step": 644 + }, + { + "epoch": 1.6936226166995398, + "high_lr": 0.0006610526315789473, + "low_lr": 1.3221052631578948e-05, + "step": 644 + }, + { + "epoch": 1.6936226166995398, + "high_lr": 0.0006610526315789473, + "low_lr": 1.3221052631578948e-05, + "step": 644 + }, + { + "epoch": 1.6936226166995398, + "high_lr": 0.0006610526315789473, + "low_lr": 1.3221052631578948e-05, + "step": 644 + }, + { + "epoch": 1.6962524654832347, + "grad_norm": 1.0684672594070435, + "learning_rate": 0.0006605263157894737, + "loss": 1.4379, + "step": 645 + }, + { + "epoch": 1.6962524654832347, + "high_lr": 0.0006605263157894737, + "low_lr": 1.3210526315789476e-05, + "step": 645 + }, + { + "epoch": 1.6962524654832347, + "high_lr": 0.0006605263157894737, + "low_lr": 1.3210526315789476e-05, + "step": 645 + }, + { + "epoch": 1.6962524654832347, + "high_lr": 0.0006605263157894737, + "low_lr": 1.3210526315789476e-05, + "step": 645 + }, + { + "epoch": 1.6962524654832347, + "high_lr": 0.0006605263157894737, + "low_lr": 1.3210526315789476e-05, + "step": 645 + }, + { + "epoch": 1.6962524654832347, + "high_lr": 0.0006605263157894737, + "low_lr": 1.3210526315789476e-05, + "step": 645 + }, + { + "epoch": 1.6962524654832347, + "high_lr": 0.0006605263157894737, + "low_lr": 1.3210526315789476e-05, + "step": 645 + }, + { + "epoch": 1.6962524654832347, + "high_lr": 0.0006605263157894737, + "low_lr": 1.3210526315789476e-05, + "step": 645 + }, + { + "epoch": 1.6962524654832347, + "high_lr": 0.0006605263157894737, + "low_lr": 1.3210526315789476e-05, + "step": 645 + }, + { + "epoch": 1.6988823142669296, + "grad_norm": 1.0975732803344727, + "learning_rate": 0.00066, + "loss": 1.4593, + "step": 646 + }, + { + "epoch": 1.6988823142669296, + "high_lr": 0.00066, + "low_lr": 1.3200000000000002e-05, + "step": 646 + }, + { + "epoch": 1.6988823142669296, + "high_lr": 0.00066, + "low_lr": 1.3200000000000002e-05, + "step": 646 + }, + { + "epoch": 1.6988823142669296, + "high_lr": 0.00066, + "low_lr": 1.3200000000000002e-05, + "step": 646 + }, + { + "epoch": 1.6988823142669296, + "high_lr": 0.00066, + "low_lr": 1.3200000000000002e-05, + "step": 646 + }, + { + "epoch": 1.6988823142669296, + "high_lr": 0.00066, + "low_lr": 1.3200000000000002e-05, + "step": 646 + }, + { + "epoch": 1.6988823142669296, + "high_lr": 0.00066, + "low_lr": 1.3200000000000002e-05, + "step": 646 + }, + { + "epoch": 1.6988823142669296, + "high_lr": 0.00066, + "low_lr": 1.3200000000000002e-05, + "step": 646 + }, + { + "epoch": 1.6988823142669296, + "high_lr": 0.00066, + "low_lr": 1.3200000000000002e-05, + "step": 646 + }, + { + "epoch": 1.7015121630506247, + "grad_norm": 1.161067247390747, + "learning_rate": 0.0006594736842105264, + "loss": 1.4718, + "step": 647 + }, + { + "epoch": 1.7015121630506247, + "high_lr": 0.0006594736842105264, + "low_lr": 1.3189473684210527e-05, + "step": 647 + }, + { + "epoch": 1.7015121630506247, + "high_lr": 0.0006594736842105264, + "low_lr": 1.3189473684210527e-05, + "step": 647 + }, + { + "epoch": 1.7015121630506247, + "high_lr": 0.0006594736842105264, + "low_lr": 1.3189473684210527e-05, + "step": 647 + }, + { + "epoch": 1.7015121630506247, + "high_lr": 0.0006594736842105264, + "low_lr": 1.3189473684210527e-05, + "step": 647 + }, + { + "epoch": 1.7015121630506247, + "high_lr": 0.0006594736842105264, + "low_lr": 1.3189473684210527e-05, + "step": 647 + }, + { + "epoch": 1.7015121630506247, + "high_lr": 0.0006594736842105264, + "low_lr": 1.3189473684210527e-05, + "step": 647 + }, + { + "epoch": 1.7015121630506247, + "high_lr": 0.0006594736842105264, + "low_lr": 1.3189473684210527e-05, + "step": 647 + }, + { + "epoch": 1.7015121630506247, + "high_lr": 0.0006594736842105264, + "low_lr": 1.3189473684210527e-05, + "step": 647 + }, + { + "epoch": 1.7041420118343196, + "grad_norm": 1.2819602489471436, + "learning_rate": 0.0006589473684210527, + "loss": 1.4713, + "step": 648 + }, + { + "epoch": 1.7041420118343196, + "high_lr": 0.0006589473684210527, + "low_lr": 1.3178947368421053e-05, + "step": 648 + }, + { + "epoch": 1.7041420118343196, + "high_lr": 0.0006589473684210527, + "low_lr": 1.3178947368421053e-05, + "step": 648 + }, + { + "epoch": 1.7041420118343196, + "high_lr": 0.0006589473684210527, + "low_lr": 1.3178947368421053e-05, + "step": 648 + }, + { + "epoch": 1.7041420118343196, + "high_lr": 0.0006589473684210527, + "low_lr": 1.3178947368421053e-05, + "step": 648 + }, + { + "epoch": 1.7041420118343196, + "high_lr": 0.0006589473684210527, + "low_lr": 1.3178947368421053e-05, + "step": 648 + }, + { + "epoch": 1.7041420118343196, + "high_lr": 0.0006589473684210527, + "low_lr": 1.3178947368421053e-05, + "step": 648 + }, + { + "epoch": 1.7041420118343196, + "high_lr": 0.0006589473684210527, + "low_lr": 1.3178947368421053e-05, + "step": 648 + }, + { + "epoch": 1.7041420118343196, + "high_lr": 0.0006589473684210527, + "low_lr": 1.3178947368421053e-05, + "step": 648 + }, + { + "epoch": 1.7067718606180144, + "grad_norm": 1.1141456365585327, + "learning_rate": 0.000658421052631579, + "loss": 1.4705, + "step": 649 + }, + { + "epoch": 1.7067718606180144, + "high_lr": 0.000658421052631579, + "low_lr": 1.316842105263158e-05, + "step": 649 + }, + { + "epoch": 1.7067718606180144, + "high_lr": 0.000658421052631579, + "low_lr": 1.316842105263158e-05, + "step": 649 + }, + { + "epoch": 1.7067718606180144, + "high_lr": 0.000658421052631579, + "low_lr": 1.316842105263158e-05, + "step": 649 + }, + { + "epoch": 1.7067718606180144, + "high_lr": 0.000658421052631579, + "low_lr": 1.316842105263158e-05, + "step": 649 + }, + { + "epoch": 1.7067718606180144, + "high_lr": 0.000658421052631579, + "low_lr": 1.316842105263158e-05, + "step": 649 + }, + { + "epoch": 1.7067718606180144, + "high_lr": 0.000658421052631579, + "low_lr": 1.316842105263158e-05, + "step": 649 + }, + { + "epoch": 1.7067718606180144, + "high_lr": 0.000658421052631579, + "low_lr": 1.316842105263158e-05, + "step": 649 + }, + { + "epoch": 1.7067718606180144, + "high_lr": 0.000658421052631579, + "low_lr": 1.316842105263158e-05, + "step": 649 + }, + { + "epoch": 1.7094017094017095, + "grad_norm": 1.2047836780548096, + "learning_rate": 0.0006578947368421054, + "loss": 1.5427, + "step": 650 + }, + { + "epoch": 1.7094017094017095, + "high_lr": 0.0006578947368421054, + "low_lr": 1.3157894736842108e-05, + "step": 650 + }, + { + "epoch": 1.7094017094017095, + "high_lr": 0.0006578947368421054, + "low_lr": 1.3157894736842108e-05, + "step": 650 + }, + { + "epoch": 1.7094017094017095, + "high_lr": 0.0006578947368421054, + "low_lr": 1.3157894736842108e-05, + "step": 650 + }, + { + "epoch": 1.7094017094017095, + "high_lr": 0.0006578947368421054, + "low_lr": 1.3157894736842108e-05, + "step": 650 + }, + { + "epoch": 1.7094017094017095, + "high_lr": 0.0006578947368421054, + "low_lr": 1.3157894736842108e-05, + "step": 650 + }, + { + "epoch": 1.7094017094017095, + "high_lr": 0.0006578947368421054, + "low_lr": 1.3157894736842108e-05, + "step": 650 + }, + { + "epoch": 1.7094017094017095, + "high_lr": 0.0006578947368421054, + "low_lr": 1.3157894736842108e-05, + "step": 650 + }, + { + "epoch": 1.7094017094017095, + "high_lr": 0.0006578947368421054, + "low_lr": 1.3157894736842108e-05, + "step": 650 + }, + { + "epoch": 1.7120315581854042, + "grad_norm": 1.0834522247314453, + "learning_rate": 0.0006573684210526316, + "loss": 1.4951, + "step": 651 + }, + { + "epoch": 1.7120315581854042, + "high_lr": 0.0006573684210526316, + "low_lr": 1.3147368421052632e-05, + "step": 651 + }, + { + "epoch": 1.7120315581854042, + "high_lr": 0.0006573684210526316, + "low_lr": 1.3147368421052632e-05, + "step": 651 + }, + { + "epoch": 1.7120315581854042, + "high_lr": 0.0006573684210526316, + "low_lr": 1.3147368421052632e-05, + "step": 651 + }, + { + "epoch": 1.7120315581854042, + "high_lr": 0.0006573684210526316, + "low_lr": 1.3147368421052632e-05, + "step": 651 + }, + { + "epoch": 1.7120315581854042, + "high_lr": 0.0006573684210526316, + "low_lr": 1.3147368421052632e-05, + "step": 651 + }, + { + "epoch": 1.7120315581854042, + "high_lr": 0.0006573684210526316, + "low_lr": 1.3147368421052632e-05, + "step": 651 + }, + { + "epoch": 1.7120315581854042, + "high_lr": 0.0006573684210526316, + "low_lr": 1.3147368421052632e-05, + "step": 651 + }, + { + "epoch": 1.7120315581854042, + "high_lr": 0.0006573684210526316, + "low_lr": 1.3147368421052632e-05, + "step": 651 + }, + { + "epoch": 1.7146614069690993, + "grad_norm": 1.027203917503357, + "learning_rate": 0.0006568421052631579, + "loss": 1.4369, + "step": 652 + }, + { + "epoch": 1.7146614069690993, + "high_lr": 0.0006568421052631579, + "low_lr": 1.3136842105263159e-05, + "step": 652 + }, + { + "epoch": 1.7146614069690993, + "high_lr": 0.0006568421052631579, + "low_lr": 1.3136842105263159e-05, + "step": 652 + }, + { + "epoch": 1.7146614069690993, + "high_lr": 0.0006568421052631579, + "low_lr": 1.3136842105263159e-05, + "step": 652 + }, + { + "epoch": 1.7146614069690993, + "high_lr": 0.0006568421052631579, + "low_lr": 1.3136842105263159e-05, + "step": 652 + }, + { + "epoch": 1.7146614069690993, + "high_lr": 0.0006568421052631579, + "low_lr": 1.3136842105263159e-05, + "step": 652 + }, + { + "epoch": 1.7146614069690993, + "high_lr": 0.0006568421052631579, + "low_lr": 1.3136842105263159e-05, + "step": 652 + }, + { + "epoch": 1.7146614069690993, + "high_lr": 0.0006568421052631579, + "low_lr": 1.3136842105263159e-05, + "step": 652 + }, + { + "epoch": 1.7146614069690993, + "high_lr": 0.0006568421052631579, + "low_lr": 1.3136842105263159e-05, + "step": 652 + }, + { + "epoch": 1.7172912557527942, + "grad_norm": 1.1530510187149048, + "learning_rate": 0.0006563157894736842, + "loss": 1.5094, + "step": 653 + }, + { + "epoch": 1.7172912557527942, + "high_lr": 0.0006563157894736842, + "low_lr": 1.3126315789473685e-05, + "step": 653 + }, + { + "epoch": 1.7172912557527942, + "high_lr": 0.0006563157894736842, + "low_lr": 1.3126315789473685e-05, + "step": 653 + }, + { + "epoch": 1.7172912557527942, + "high_lr": 0.0006563157894736842, + "low_lr": 1.3126315789473685e-05, + "step": 653 + }, + { + "epoch": 1.7172912557527942, + "high_lr": 0.0006563157894736842, + "low_lr": 1.3126315789473685e-05, + "step": 653 + }, + { + "epoch": 1.7172912557527942, + "high_lr": 0.0006563157894736842, + "low_lr": 1.3126315789473685e-05, + "step": 653 + }, + { + "epoch": 1.7172912557527942, + "high_lr": 0.0006563157894736842, + "low_lr": 1.3126315789473685e-05, + "step": 653 + }, + { + "epoch": 1.7172912557527942, + "high_lr": 0.0006563157894736842, + "low_lr": 1.3126315789473685e-05, + "step": 653 + }, + { + "epoch": 1.7172912557527942, + "high_lr": 0.0006563157894736842, + "low_lr": 1.3126315789473685e-05, + "step": 653 + }, + { + "epoch": 1.719921104536489, + "grad_norm": 1.1710301637649536, + "learning_rate": 0.0006557894736842105, + "loss": 1.483, + "step": 654 + }, + { + "epoch": 1.719921104536489, + "high_lr": 0.0006557894736842105, + "low_lr": 1.3115789473684211e-05, + "step": 654 + }, + { + "epoch": 1.719921104536489, + "high_lr": 0.0006557894736842105, + "low_lr": 1.3115789473684211e-05, + "step": 654 + }, + { + "epoch": 1.719921104536489, + "high_lr": 0.0006557894736842105, + "low_lr": 1.3115789473684211e-05, + "step": 654 + }, + { + "epoch": 1.719921104536489, + "high_lr": 0.0006557894736842105, + "low_lr": 1.3115789473684211e-05, + "step": 654 + }, + { + "epoch": 1.719921104536489, + "high_lr": 0.0006557894736842105, + "low_lr": 1.3115789473684211e-05, + "step": 654 + }, + { + "epoch": 1.719921104536489, + "high_lr": 0.0006557894736842105, + "low_lr": 1.3115789473684211e-05, + "step": 654 + }, + { + "epoch": 1.719921104536489, + "high_lr": 0.0006557894736842105, + "low_lr": 1.3115789473684211e-05, + "step": 654 + }, + { + "epoch": 1.719921104536489, + "high_lr": 0.0006557894736842105, + "low_lr": 1.3115789473684211e-05, + "step": 654 + }, + { + "epoch": 1.7225509533201842, + "grad_norm": 1.118013620376587, + "learning_rate": 0.0006552631578947369, + "loss": 1.4718, + "step": 655 + }, + { + "epoch": 1.7225509533201842, + "high_lr": 0.0006552631578947369, + "low_lr": 1.310526315789474e-05, + "step": 655 + }, + { + "epoch": 1.7225509533201842, + "high_lr": 0.0006552631578947369, + "low_lr": 1.310526315789474e-05, + "step": 655 + }, + { + "epoch": 1.7225509533201842, + "high_lr": 0.0006552631578947369, + "low_lr": 1.310526315789474e-05, + "step": 655 + }, + { + "epoch": 1.7225509533201842, + "high_lr": 0.0006552631578947369, + "low_lr": 1.310526315789474e-05, + "step": 655 + }, + { + "epoch": 1.7225509533201842, + "high_lr": 0.0006552631578947369, + "low_lr": 1.310526315789474e-05, + "step": 655 + }, + { + "epoch": 1.7225509533201842, + "high_lr": 0.0006552631578947369, + "low_lr": 1.310526315789474e-05, + "step": 655 + }, + { + "epoch": 1.7225509533201842, + "high_lr": 0.0006552631578947369, + "low_lr": 1.310526315789474e-05, + "step": 655 + }, + { + "epoch": 1.7225509533201842, + "high_lr": 0.0006552631578947369, + "low_lr": 1.310526315789474e-05, + "step": 655 + }, + { + "epoch": 1.725180802103879, + "grad_norm": 1.1998544931411743, + "learning_rate": 0.0006547368421052632, + "loss": 1.4875, + "step": 656 + }, + { + "epoch": 1.725180802103879, + "high_lr": 0.0006547368421052632, + "low_lr": 1.3094736842105264e-05, + "step": 656 + }, + { + "epoch": 1.725180802103879, + "high_lr": 0.0006547368421052632, + "low_lr": 1.3094736842105264e-05, + "step": 656 + }, + { + "epoch": 1.725180802103879, + "high_lr": 0.0006547368421052632, + "low_lr": 1.3094736842105264e-05, + "step": 656 + }, + { + "epoch": 1.725180802103879, + "high_lr": 0.0006547368421052632, + "low_lr": 1.3094736842105264e-05, + "step": 656 + }, + { + "epoch": 1.725180802103879, + "high_lr": 0.0006547368421052632, + "low_lr": 1.3094736842105264e-05, + "step": 656 + }, + { + "epoch": 1.725180802103879, + "high_lr": 0.0006547368421052632, + "low_lr": 1.3094736842105264e-05, + "step": 656 + }, + { + "epoch": 1.725180802103879, + "high_lr": 0.0006547368421052632, + "low_lr": 1.3094736842105264e-05, + "step": 656 + }, + { + "epoch": 1.725180802103879, + "high_lr": 0.0006547368421052632, + "low_lr": 1.3094736842105264e-05, + "step": 656 + }, + { + "epoch": 1.727810650887574, + "grad_norm": 1.1463004350662231, + "learning_rate": 0.0006542105263157895, + "loss": 1.5059, + "step": 657 + }, + { + "epoch": 1.727810650887574, + "high_lr": 0.0006542105263157895, + "low_lr": 1.308421052631579e-05, + "step": 657 + }, + { + "epoch": 1.727810650887574, + "high_lr": 0.0006542105263157895, + "low_lr": 1.308421052631579e-05, + "step": 657 + }, + { + "epoch": 1.727810650887574, + "high_lr": 0.0006542105263157895, + "low_lr": 1.308421052631579e-05, + "step": 657 + }, + { + "epoch": 1.727810650887574, + "high_lr": 0.0006542105263157895, + "low_lr": 1.308421052631579e-05, + "step": 657 + }, + { + "epoch": 1.727810650887574, + "high_lr": 0.0006542105263157895, + "low_lr": 1.308421052631579e-05, + "step": 657 + }, + { + "epoch": 1.727810650887574, + "high_lr": 0.0006542105263157895, + "low_lr": 1.308421052631579e-05, + "step": 657 + }, + { + "epoch": 1.727810650887574, + "high_lr": 0.0006542105263157895, + "low_lr": 1.308421052631579e-05, + "step": 657 + }, + { + "epoch": 1.727810650887574, + "high_lr": 0.0006542105263157895, + "low_lr": 1.308421052631579e-05, + "step": 657 + }, + { + "epoch": 1.730440499671269, + "grad_norm": 1.1531513929367065, + "learning_rate": 0.0006536842105263158, + "loss": 1.4984, + "step": 658 + }, + { + "epoch": 1.730440499671269, + "high_lr": 0.0006536842105263158, + "low_lr": 1.3073684210526317e-05, + "step": 658 + }, + { + "epoch": 1.730440499671269, + "high_lr": 0.0006536842105263158, + "low_lr": 1.3073684210526317e-05, + "step": 658 + }, + { + "epoch": 1.730440499671269, + "high_lr": 0.0006536842105263158, + "low_lr": 1.3073684210526317e-05, + "step": 658 + }, + { + "epoch": 1.730440499671269, + "high_lr": 0.0006536842105263158, + "low_lr": 1.3073684210526317e-05, + "step": 658 + }, + { + "epoch": 1.730440499671269, + "high_lr": 0.0006536842105263158, + "low_lr": 1.3073684210526317e-05, + "step": 658 + }, + { + "epoch": 1.730440499671269, + "high_lr": 0.0006536842105263158, + "low_lr": 1.3073684210526317e-05, + "step": 658 + }, + { + "epoch": 1.730440499671269, + "high_lr": 0.0006536842105263158, + "low_lr": 1.3073684210526317e-05, + "step": 658 + }, + { + "epoch": 1.730440499671269, + "high_lr": 0.0006536842105263158, + "low_lr": 1.3073684210526317e-05, + "step": 658 + }, + { + "epoch": 1.7330703484549639, + "grad_norm": 1.1394236087799072, + "learning_rate": 0.0006531578947368421, + "loss": 1.4976, + "step": 659 + }, + { + "epoch": 1.7330703484549639, + "high_lr": 0.0006531578947368421, + "low_lr": 1.3063157894736845e-05, + "step": 659 + }, + { + "epoch": 1.7330703484549639, + "high_lr": 0.0006531578947368421, + "low_lr": 1.3063157894736845e-05, + "step": 659 + }, + { + "epoch": 1.7330703484549639, + "high_lr": 0.0006531578947368421, + "low_lr": 1.3063157894736845e-05, + "step": 659 + }, + { + "epoch": 1.7330703484549639, + "high_lr": 0.0006531578947368421, + "low_lr": 1.3063157894736845e-05, + "step": 659 + }, + { + "epoch": 1.7330703484549639, + "high_lr": 0.0006531578947368421, + "low_lr": 1.3063157894736845e-05, + "step": 659 + }, + { + "epoch": 1.7330703484549639, + "high_lr": 0.0006531578947368421, + "low_lr": 1.3063157894736845e-05, + "step": 659 + }, + { + "epoch": 1.7330703484549639, + "high_lr": 0.0006531578947368421, + "low_lr": 1.3063157894736845e-05, + "step": 659 + }, + { + "epoch": 1.7330703484549639, + "high_lr": 0.0006531578947368421, + "low_lr": 1.3063157894736845e-05, + "step": 659 + }, + { + "epoch": 1.7357001972386588, + "grad_norm": 1.1302404403686523, + "learning_rate": 0.0006526315789473684, + "loss": 1.4824, + "step": 660 + }, + { + "epoch": 1.7357001972386588, + "high_lr": 0.0006526315789473684, + "low_lr": 1.305263157894737e-05, + "step": 660 + }, + { + "epoch": 1.7357001972386588, + "high_lr": 0.0006526315789473684, + "low_lr": 1.305263157894737e-05, + "step": 660 + }, + { + "epoch": 1.7357001972386588, + "high_lr": 0.0006526315789473684, + "low_lr": 1.305263157894737e-05, + "step": 660 + }, + { + "epoch": 1.7357001972386588, + "high_lr": 0.0006526315789473684, + "low_lr": 1.305263157894737e-05, + "step": 660 + }, + { + "epoch": 1.7357001972386588, + "high_lr": 0.0006526315789473684, + "low_lr": 1.305263157894737e-05, + "step": 660 + }, + { + "epoch": 1.7357001972386588, + "high_lr": 0.0006526315789473684, + "low_lr": 1.305263157894737e-05, + "step": 660 + }, + { + "epoch": 1.7357001972386588, + "high_lr": 0.0006526315789473684, + "low_lr": 1.305263157894737e-05, + "step": 660 + }, + { + "epoch": 1.7357001972386588, + "high_lr": 0.0006526315789473684, + "low_lr": 1.305263157894737e-05, + "step": 660 + }, + { + "epoch": 1.7383300460223539, + "grad_norm": 1.1688681840896606, + "learning_rate": 0.0006521052631578947, + "loss": 1.5363, + "step": 661 + }, + { + "epoch": 1.7383300460223539, + "high_lr": 0.0006521052631578947, + "low_lr": 1.3042105263157896e-05, + "step": 661 + }, + { + "epoch": 1.7383300460223539, + "high_lr": 0.0006521052631578947, + "low_lr": 1.3042105263157896e-05, + "step": 661 + }, + { + "epoch": 1.7383300460223539, + "high_lr": 0.0006521052631578947, + "low_lr": 1.3042105263157896e-05, + "step": 661 + }, + { + "epoch": 1.7383300460223539, + "high_lr": 0.0006521052631578947, + "low_lr": 1.3042105263157896e-05, + "step": 661 + }, + { + "epoch": 1.7383300460223539, + "high_lr": 0.0006521052631578947, + "low_lr": 1.3042105263157896e-05, + "step": 661 + }, + { + "epoch": 1.7383300460223539, + "high_lr": 0.0006521052631578947, + "low_lr": 1.3042105263157896e-05, + "step": 661 + }, + { + "epoch": 1.7383300460223539, + "high_lr": 0.0006521052631578947, + "low_lr": 1.3042105263157896e-05, + "step": 661 + }, + { + "epoch": 1.7383300460223539, + "high_lr": 0.0006521052631578947, + "low_lr": 1.3042105263157896e-05, + "step": 661 + }, + { + "epoch": 1.7409598948060485, + "grad_norm": 1.0534868240356445, + "learning_rate": 0.000651578947368421, + "loss": 1.4678, + "step": 662 + }, + { + "epoch": 1.7409598948060485, + "high_lr": 0.000651578947368421, + "low_lr": 1.3031578947368422e-05, + "step": 662 + }, + { + "epoch": 1.7409598948060485, + "high_lr": 0.000651578947368421, + "low_lr": 1.3031578947368422e-05, + "step": 662 + }, + { + "epoch": 1.7409598948060485, + "high_lr": 0.000651578947368421, + "low_lr": 1.3031578947368422e-05, + "step": 662 + }, + { + "epoch": 1.7409598948060485, + "high_lr": 0.000651578947368421, + "low_lr": 1.3031578947368422e-05, + "step": 662 + }, + { + "epoch": 1.7409598948060485, + "high_lr": 0.000651578947368421, + "low_lr": 1.3031578947368422e-05, + "step": 662 + }, + { + "epoch": 1.7409598948060485, + "high_lr": 0.000651578947368421, + "low_lr": 1.3031578947368422e-05, + "step": 662 + }, + { + "epoch": 1.7409598948060485, + "high_lr": 0.000651578947368421, + "low_lr": 1.3031578947368422e-05, + "step": 662 + }, + { + "epoch": 1.7409598948060485, + "high_lr": 0.000651578947368421, + "low_lr": 1.3031578947368422e-05, + "step": 662 + }, + { + "epoch": 1.7435897435897436, + "grad_norm": 1.075860619544983, + "learning_rate": 0.0006510526315789473, + "loss": 1.4319, + "step": 663 + }, + { + "epoch": 1.7435897435897436, + "high_lr": 0.0006510526315789473, + "low_lr": 1.3021052631578948e-05, + "step": 663 + }, + { + "epoch": 1.7435897435897436, + "high_lr": 0.0006510526315789473, + "low_lr": 1.3021052631578948e-05, + "step": 663 + }, + { + "epoch": 1.7435897435897436, + "high_lr": 0.0006510526315789473, + "low_lr": 1.3021052631578948e-05, + "step": 663 + }, + { + "epoch": 1.7435897435897436, + "high_lr": 0.0006510526315789473, + "low_lr": 1.3021052631578948e-05, + "step": 663 + }, + { + "epoch": 1.7435897435897436, + "high_lr": 0.0006510526315789473, + "low_lr": 1.3021052631578948e-05, + "step": 663 + }, + { + "epoch": 1.7435897435897436, + "high_lr": 0.0006510526315789473, + "low_lr": 1.3021052631578948e-05, + "step": 663 + }, + { + "epoch": 1.7435897435897436, + "high_lr": 0.0006510526315789473, + "low_lr": 1.3021052631578948e-05, + "step": 663 + }, + { + "epoch": 1.7435897435897436, + "high_lr": 0.0006510526315789473, + "low_lr": 1.3021052631578948e-05, + "step": 663 + }, + { + "epoch": 1.7462195923734385, + "grad_norm": 1.1058990955352783, + "learning_rate": 0.0006505263157894738, + "loss": 1.4832, + "step": 664 + }, + { + "epoch": 1.7462195923734385, + "high_lr": 0.0006505263157894738, + "low_lr": 1.3010526315789476e-05, + "step": 664 + }, + { + "epoch": 1.7462195923734385, + "high_lr": 0.0006505263157894738, + "low_lr": 1.3010526315789476e-05, + "step": 664 + }, + { + "epoch": 1.7462195923734385, + "high_lr": 0.0006505263157894738, + "low_lr": 1.3010526315789476e-05, + "step": 664 + }, + { + "epoch": 1.7462195923734385, + "high_lr": 0.0006505263157894738, + "low_lr": 1.3010526315789476e-05, + "step": 664 + }, + { + "epoch": 1.7462195923734385, + "high_lr": 0.0006505263157894738, + "low_lr": 1.3010526315789476e-05, + "step": 664 + }, + { + "epoch": 1.7462195923734385, + "high_lr": 0.0006505263157894738, + "low_lr": 1.3010526315789476e-05, + "step": 664 + }, + { + "epoch": 1.7462195923734385, + "high_lr": 0.0006505263157894738, + "low_lr": 1.3010526315789476e-05, + "step": 664 + }, + { + "epoch": 1.7462195923734385, + "high_lr": 0.0006505263157894738, + "low_lr": 1.3010526315789476e-05, + "step": 664 + }, + { + "epoch": 1.7488494411571334, + "grad_norm": 1.2384603023529053, + "learning_rate": 0.0006500000000000001, + "loss": 1.5347, + "step": 665 + }, + { + "epoch": 1.7488494411571334, + "high_lr": 0.0006500000000000001, + "low_lr": 1.3000000000000001e-05, + "step": 665 + }, + { + "epoch": 1.7488494411571334, + "high_lr": 0.0006500000000000001, + "low_lr": 1.3000000000000001e-05, + "step": 665 + }, + { + "epoch": 1.7488494411571334, + "high_lr": 0.0006500000000000001, + "low_lr": 1.3000000000000001e-05, + "step": 665 + }, + { + "epoch": 1.7488494411571334, + "high_lr": 0.0006500000000000001, + "low_lr": 1.3000000000000001e-05, + "step": 665 + }, + { + "epoch": 1.7488494411571334, + "high_lr": 0.0006500000000000001, + "low_lr": 1.3000000000000001e-05, + "step": 665 + }, + { + "epoch": 1.7488494411571334, + "high_lr": 0.0006500000000000001, + "low_lr": 1.3000000000000001e-05, + "step": 665 + }, + { + "epoch": 1.7488494411571334, + "high_lr": 0.0006500000000000001, + "low_lr": 1.3000000000000001e-05, + "step": 665 + }, + { + "epoch": 1.7488494411571334, + "high_lr": 0.0006500000000000001, + "low_lr": 1.3000000000000001e-05, + "step": 665 + }, + { + "epoch": 1.7514792899408285, + "grad_norm": 1.1601611375808716, + "learning_rate": 0.0006494736842105264, + "loss": 1.4963, + "step": 666 + }, + { + "epoch": 1.7514792899408285, + "high_lr": 0.0006494736842105264, + "low_lr": 1.2989473684210527e-05, + "step": 666 + }, + { + "epoch": 1.7514792899408285, + "high_lr": 0.0006494736842105264, + "low_lr": 1.2989473684210527e-05, + "step": 666 + }, + { + "epoch": 1.7514792899408285, + "high_lr": 0.0006494736842105264, + "low_lr": 1.2989473684210527e-05, + "step": 666 + }, + { + "epoch": 1.7514792899408285, + "high_lr": 0.0006494736842105264, + "low_lr": 1.2989473684210527e-05, + "step": 666 + }, + { + "epoch": 1.7514792899408285, + "high_lr": 0.0006494736842105264, + "low_lr": 1.2989473684210527e-05, + "step": 666 + }, + { + "epoch": 1.7514792899408285, + "high_lr": 0.0006494736842105264, + "low_lr": 1.2989473684210527e-05, + "step": 666 + }, + { + "epoch": 1.7514792899408285, + "high_lr": 0.0006494736842105264, + "low_lr": 1.2989473684210527e-05, + "step": 666 + }, + { + "epoch": 1.7514792899408285, + "high_lr": 0.0006494736842105264, + "low_lr": 1.2989473684210527e-05, + "step": 666 + }, + { + "epoch": 1.7541091387245233, + "grad_norm": 1.135069489479065, + "learning_rate": 0.0006489473684210527, + "loss": 1.4478, + "step": 667 + }, + { + "epoch": 1.7541091387245233, + "high_lr": 0.0006489473684210527, + "low_lr": 1.2978947368421054e-05, + "step": 667 + }, + { + "epoch": 1.7541091387245233, + "high_lr": 0.0006489473684210527, + "low_lr": 1.2978947368421054e-05, + "step": 667 + }, + { + "epoch": 1.7541091387245233, + "high_lr": 0.0006489473684210527, + "low_lr": 1.2978947368421054e-05, + "step": 667 + }, + { + "epoch": 1.7541091387245233, + "high_lr": 0.0006489473684210527, + "low_lr": 1.2978947368421054e-05, + "step": 667 + }, + { + "epoch": 1.7541091387245233, + "high_lr": 0.0006489473684210527, + "low_lr": 1.2978947368421054e-05, + "step": 667 + }, + { + "epoch": 1.7541091387245233, + "high_lr": 0.0006489473684210527, + "low_lr": 1.2978947368421054e-05, + "step": 667 + }, + { + "epoch": 1.7541091387245233, + "high_lr": 0.0006489473684210527, + "low_lr": 1.2978947368421054e-05, + "step": 667 + }, + { + "epoch": 1.7541091387245233, + "high_lr": 0.0006489473684210527, + "low_lr": 1.2978947368421054e-05, + "step": 667 + }, + { + "epoch": 1.7567389875082182, + "grad_norm": 1.2301567792892456, + "learning_rate": 0.0006484210526315789, + "loss": 1.4792, + "step": 668 + }, + { + "epoch": 1.7567389875082182, + "high_lr": 0.0006484210526315789, + "low_lr": 1.2968421052631578e-05, + "step": 668 + }, + { + "epoch": 1.7567389875082182, + "high_lr": 0.0006484210526315789, + "low_lr": 1.2968421052631578e-05, + "step": 668 + }, + { + "epoch": 1.7567389875082182, + "high_lr": 0.0006484210526315789, + "low_lr": 1.2968421052631578e-05, + "step": 668 + }, + { + "epoch": 1.7567389875082182, + "high_lr": 0.0006484210526315789, + "low_lr": 1.2968421052631578e-05, + "step": 668 + }, + { + "epoch": 1.7567389875082182, + "high_lr": 0.0006484210526315789, + "low_lr": 1.2968421052631578e-05, + "step": 668 + }, + { + "epoch": 1.7567389875082182, + "high_lr": 0.0006484210526315789, + "low_lr": 1.2968421052631578e-05, + "step": 668 + }, + { + "epoch": 1.7567389875082182, + "high_lr": 0.0006484210526315789, + "low_lr": 1.2968421052631578e-05, + "step": 668 + }, + { + "epoch": 1.7567389875082182, + "high_lr": 0.0006484210526315789, + "low_lr": 1.2968421052631578e-05, + "step": 668 + }, + { + "epoch": 1.7593688362919133, + "grad_norm": 1.132869839668274, + "learning_rate": 0.0006478947368421053, + "loss": 1.473, + "step": 669 + }, + { + "epoch": 1.7593688362919133, + "high_lr": 0.0006478947368421053, + "low_lr": 1.2957894736842108e-05, + "step": 669 + }, + { + "epoch": 1.7593688362919133, + "high_lr": 0.0006478947368421053, + "low_lr": 1.2957894736842108e-05, + "step": 669 + }, + { + "epoch": 1.7593688362919133, + "high_lr": 0.0006478947368421053, + "low_lr": 1.2957894736842108e-05, + "step": 669 + }, + { + "epoch": 1.7593688362919133, + "high_lr": 0.0006478947368421053, + "low_lr": 1.2957894736842108e-05, + "step": 669 + }, + { + "epoch": 1.7593688362919133, + "high_lr": 0.0006478947368421053, + "low_lr": 1.2957894736842108e-05, + "step": 669 + }, + { + "epoch": 1.7593688362919133, + "high_lr": 0.0006478947368421053, + "low_lr": 1.2957894736842108e-05, + "step": 669 + }, + { + "epoch": 1.7593688362919133, + "high_lr": 0.0006478947368421053, + "low_lr": 1.2957894736842108e-05, + "step": 669 + }, + { + "epoch": 1.7593688362919133, + "high_lr": 0.0006478947368421053, + "low_lr": 1.2957894736842108e-05, + "step": 669 + }, + { + "epoch": 1.7619986850756082, + "grad_norm": 1.162306547164917, + "learning_rate": 0.0006473684210526316, + "loss": 1.5266, + "step": 670 + }, + { + "epoch": 1.7619986850756082, + "high_lr": 0.0006473684210526316, + "low_lr": 1.2947368421052633e-05, + "step": 670 + }, + { + "epoch": 1.7619986850756082, + "high_lr": 0.0006473684210526316, + "low_lr": 1.2947368421052633e-05, + "step": 670 + }, + { + "epoch": 1.7619986850756082, + "high_lr": 0.0006473684210526316, + "low_lr": 1.2947368421052633e-05, + "step": 670 + }, + { + "epoch": 1.7619986850756082, + "high_lr": 0.0006473684210526316, + "low_lr": 1.2947368421052633e-05, + "step": 670 + }, + { + "epoch": 1.7619986850756082, + "high_lr": 0.0006473684210526316, + "low_lr": 1.2947368421052633e-05, + "step": 670 + }, + { + "epoch": 1.7619986850756082, + "high_lr": 0.0006473684210526316, + "low_lr": 1.2947368421052633e-05, + "step": 670 + }, + { + "epoch": 1.7619986850756082, + "high_lr": 0.0006473684210526316, + "low_lr": 1.2947368421052633e-05, + "step": 670 + }, + { + "epoch": 1.7619986850756082, + "high_lr": 0.0006473684210526316, + "low_lr": 1.2947368421052633e-05, + "step": 670 + }, + { + "epoch": 1.764628533859303, + "grad_norm": 1.0740419626235962, + "learning_rate": 0.0006468421052631579, + "loss": 1.49, + "step": 671 + }, + { + "epoch": 1.764628533859303, + "high_lr": 0.0006468421052631579, + "low_lr": 1.2936842105263159e-05, + "step": 671 + }, + { + "epoch": 1.764628533859303, + "high_lr": 0.0006468421052631579, + "low_lr": 1.2936842105263159e-05, + "step": 671 + }, + { + "epoch": 1.764628533859303, + "high_lr": 0.0006468421052631579, + "low_lr": 1.2936842105263159e-05, + "step": 671 + }, + { + "epoch": 1.764628533859303, + "high_lr": 0.0006468421052631579, + "low_lr": 1.2936842105263159e-05, + "step": 671 + }, + { + "epoch": 1.764628533859303, + "high_lr": 0.0006468421052631579, + "low_lr": 1.2936842105263159e-05, + "step": 671 + }, + { + "epoch": 1.764628533859303, + "high_lr": 0.0006468421052631579, + "low_lr": 1.2936842105263159e-05, + "step": 671 + }, + { + "epoch": 1.764628533859303, + "high_lr": 0.0006468421052631579, + "low_lr": 1.2936842105263159e-05, + "step": 671 + }, + { + "epoch": 1.764628533859303, + "high_lr": 0.0006468421052631579, + "low_lr": 1.2936842105263159e-05, + "step": 671 + }, + { + "epoch": 1.7672583826429982, + "grad_norm": 1.0520868301391602, + "learning_rate": 0.0006463157894736842, + "loss": 1.4678, + "step": 672 + }, + { + "epoch": 1.7672583826429982, + "high_lr": 0.0006463157894736842, + "low_lr": 1.2926315789473685e-05, + "step": 672 + }, + { + "epoch": 1.7672583826429982, + "high_lr": 0.0006463157894736842, + "low_lr": 1.2926315789473685e-05, + "step": 672 + }, + { + "epoch": 1.7672583826429982, + "high_lr": 0.0006463157894736842, + "low_lr": 1.2926315789473685e-05, + "step": 672 + }, + { + "epoch": 1.7672583826429982, + "high_lr": 0.0006463157894736842, + "low_lr": 1.2926315789473685e-05, + "step": 672 + }, + { + "epoch": 1.7672583826429982, + "high_lr": 0.0006463157894736842, + "low_lr": 1.2926315789473685e-05, + "step": 672 + }, + { + "epoch": 1.7672583826429982, + "high_lr": 0.0006463157894736842, + "low_lr": 1.2926315789473685e-05, + "step": 672 + }, + { + "epoch": 1.7672583826429982, + "high_lr": 0.0006463157894736842, + "low_lr": 1.2926315789473685e-05, + "step": 672 + }, + { + "epoch": 1.7672583826429982, + "high_lr": 0.0006463157894736842, + "low_lr": 1.2926315789473685e-05, + "step": 672 + }, + { + "epoch": 1.7698882314266928, + "grad_norm": 1.1029653549194336, + "learning_rate": 0.0006457894736842106, + "loss": 1.4892, + "step": 673 + }, + { + "epoch": 1.7698882314266928, + "high_lr": 0.0006457894736842106, + "low_lr": 1.2915789473684213e-05, + "step": 673 + }, + { + "epoch": 1.7698882314266928, + "high_lr": 0.0006457894736842106, + "low_lr": 1.2915789473684213e-05, + "step": 673 + }, + { + "epoch": 1.7698882314266928, + "high_lr": 0.0006457894736842106, + "low_lr": 1.2915789473684213e-05, + "step": 673 + }, + { + "epoch": 1.7698882314266928, + "high_lr": 0.0006457894736842106, + "low_lr": 1.2915789473684213e-05, + "step": 673 + }, + { + "epoch": 1.7698882314266928, + "high_lr": 0.0006457894736842106, + "low_lr": 1.2915789473684213e-05, + "step": 673 + }, + { + "epoch": 1.7698882314266928, + "high_lr": 0.0006457894736842106, + "low_lr": 1.2915789473684213e-05, + "step": 673 + }, + { + "epoch": 1.7698882314266928, + "high_lr": 0.0006457894736842106, + "low_lr": 1.2915789473684213e-05, + "step": 673 + }, + { + "epoch": 1.7698882314266928, + "high_lr": 0.0006457894736842106, + "low_lr": 1.2915789473684213e-05, + "step": 673 + }, + { + "epoch": 1.772518080210388, + "grad_norm": 1.1500648260116577, + "learning_rate": 0.0006452631578947369, + "loss": 1.4969, + "step": 674 + }, + { + "epoch": 1.772518080210388, + "high_lr": 0.0006452631578947369, + "low_lr": 1.2905263157894738e-05, + "step": 674 + }, + { + "epoch": 1.772518080210388, + "high_lr": 0.0006452631578947369, + "low_lr": 1.2905263157894738e-05, + "step": 674 + }, + { + "epoch": 1.772518080210388, + "high_lr": 0.0006452631578947369, + "low_lr": 1.2905263157894738e-05, + "step": 674 + }, + { + "epoch": 1.772518080210388, + "high_lr": 0.0006452631578947369, + "low_lr": 1.2905263157894738e-05, + "step": 674 + }, + { + "epoch": 1.772518080210388, + "high_lr": 0.0006452631578947369, + "low_lr": 1.2905263157894738e-05, + "step": 674 + }, + { + "epoch": 1.772518080210388, + "high_lr": 0.0006452631578947369, + "low_lr": 1.2905263157894738e-05, + "step": 674 + }, + { + "epoch": 1.772518080210388, + "high_lr": 0.0006452631578947369, + "low_lr": 1.2905263157894738e-05, + "step": 674 + }, + { + "epoch": 1.772518080210388, + "high_lr": 0.0006452631578947369, + "low_lr": 1.2905263157894738e-05, + "step": 674 + }, + { + "epoch": 1.7751479289940828, + "grad_norm": 1.1641913652420044, + "learning_rate": 0.0006447368421052632, + "loss": 1.4436, + "step": 675 + }, + { + "epoch": 1.7751479289940828, + "high_lr": 0.0006447368421052632, + "low_lr": 1.2894736842105264e-05, + "step": 675 + }, + { + "epoch": 1.7751479289940828, + "high_lr": 0.0006447368421052632, + "low_lr": 1.2894736842105264e-05, + "step": 675 + }, + { + "epoch": 1.7751479289940828, + "high_lr": 0.0006447368421052632, + "low_lr": 1.2894736842105264e-05, + "step": 675 + }, + { + "epoch": 1.7751479289940828, + "high_lr": 0.0006447368421052632, + "low_lr": 1.2894736842105264e-05, + "step": 675 + }, + { + "epoch": 1.7751479289940828, + "high_lr": 0.0006447368421052632, + "low_lr": 1.2894736842105264e-05, + "step": 675 + }, + { + "epoch": 1.7751479289940828, + "high_lr": 0.0006447368421052632, + "low_lr": 1.2894736842105264e-05, + "step": 675 + }, + { + "epoch": 1.7751479289940828, + "high_lr": 0.0006447368421052632, + "low_lr": 1.2894736842105264e-05, + "step": 675 + }, + { + "epoch": 1.7751479289940828, + "high_lr": 0.0006447368421052632, + "low_lr": 1.2894736842105264e-05, + "step": 675 + }, + { + "epoch": 1.7777777777777777, + "grad_norm": 1.161435604095459, + "learning_rate": 0.0006442105263157894, + "loss": 1.5171, + "step": 676 + }, + { + "epoch": 1.7777777777777777, + "high_lr": 0.0006442105263157894, + "low_lr": 1.288421052631579e-05, + "step": 676 + }, + { + "epoch": 1.7777777777777777, + "high_lr": 0.0006442105263157894, + "low_lr": 1.288421052631579e-05, + "step": 676 + }, + { + "epoch": 1.7777777777777777, + "high_lr": 0.0006442105263157894, + "low_lr": 1.288421052631579e-05, + "step": 676 + }, + { + "epoch": 1.7777777777777777, + "high_lr": 0.0006442105263157894, + "low_lr": 1.288421052631579e-05, + "step": 676 + }, + { + "epoch": 1.7777777777777777, + "high_lr": 0.0006442105263157894, + "low_lr": 1.288421052631579e-05, + "step": 676 + }, + { + "epoch": 1.7777777777777777, + "high_lr": 0.0006442105263157894, + "low_lr": 1.288421052631579e-05, + "step": 676 + }, + { + "epoch": 1.7777777777777777, + "high_lr": 0.0006442105263157894, + "low_lr": 1.288421052631579e-05, + "step": 676 + }, + { + "epoch": 1.7777777777777777, + "high_lr": 0.0006442105263157894, + "low_lr": 1.288421052631579e-05, + "step": 676 + }, + { + "epoch": 1.7804076265614728, + "grad_norm": 1.1899640560150146, + "learning_rate": 0.0006436842105263157, + "loss": 1.482, + "step": 677 + }, + { + "epoch": 1.7804076265614728, + "high_lr": 0.0006436842105263157, + "low_lr": 1.2873684210526317e-05, + "step": 677 + }, + { + "epoch": 1.7804076265614728, + "high_lr": 0.0006436842105263157, + "low_lr": 1.2873684210526317e-05, + "step": 677 + }, + { + "epoch": 1.7804076265614728, + "high_lr": 0.0006436842105263157, + "low_lr": 1.2873684210526317e-05, + "step": 677 + }, + { + "epoch": 1.7804076265614728, + "high_lr": 0.0006436842105263157, + "low_lr": 1.2873684210526317e-05, + "step": 677 + }, + { + "epoch": 1.7804076265614728, + "high_lr": 0.0006436842105263157, + "low_lr": 1.2873684210526317e-05, + "step": 677 + }, + { + "epoch": 1.7804076265614728, + "high_lr": 0.0006436842105263157, + "low_lr": 1.2873684210526317e-05, + "step": 677 + }, + { + "epoch": 1.7804076265614728, + "high_lr": 0.0006436842105263157, + "low_lr": 1.2873684210526317e-05, + "step": 677 + }, + { + "epoch": 1.7804076265614728, + "high_lr": 0.0006436842105263157, + "low_lr": 1.2873684210526317e-05, + "step": 677 + }, + { + "epoch": 1.7830374753451677, + "grad_norm": 1.110012412071228, + "learning_rate": 0.0006431578947368421, + "loss": 1.4839, + "step": 678 + }, + { + "epoch": 1.7830374753451677, + "high_lr": 0.0006431578947368421, + "low_lr": 1.2863157894736845e-05, + "step": 678 + }, + { + "epoch": 1.7830374753451677, + "high_lr": 0.0006431578947368421, + "low_lr": 1.2863157894736845e-05, + "step": 678 + }, + { + "epoch": 1.7830374753451677, + "high_lr": 0.0006431578947368421, + "low_lr": 1.2863157894736845e-05, + "step": 678 + }, + { + "epoch": 1.7830374753451677, + "high_lr": 0.0006431578947368421, + "low_lr": 1.2863157894736845e-05, + "step": 678 + }, + { + "epoch": 1.7830374753451677, + "high_lr": 0.0006431578947368421, + "low_lr": 1.2863157894736845e-05, + "step": 678 + }, + { + "epoch": 1.7830374753451677, + "high_lr": 0.0006431578947368421, + "low_lr": 1.2863157894736845e-05, + "step": 678 + }, + { + "epoch": 1.7830374753451677, + "high_lr": 0.0006431578947368421, + "low_lr": 1.2863157894736845e-05, + "step": 678 + }, + { + "epoch": 1.7830374753451677, + "high_lr": 0.0006431578947368421, + "low_lr": 1.2863157894736845e-05, + "step": 678 + }, + { + "epoch": 1.7856673241288625, + "grad_norm": 1.1355431079864502, + "learning_rate": 0.0006426315789473684, + "loss": 1.4858, + "step": 679 + }, + { + "epoch": 1.7856673241288625, + "high_lr": 0.0006426315789473684, + "low_lr": 1.285263157894737e-05, + "step": 679 + }, + { + "epoch": 1.7856673241288625, + "high_lr": 0.0006426315789473684, + "low_lr": 1.285263157894737e-05, + "step": 679 + }, + { + "epoch": 1.7856673241288625, + "high_lr": 0.0006426315789473684, + "low_lr": 1.285263157894737e-05, + "step": 679 + }, + { + "epoch": 1.7856673241288625, + "high_lr": 0.0006426315789473684, + "low_lr": 1.285263157894737e-05, + "step": 679 + }, + { + "epoch": 1.7856673241288625, + "high_lr": 0.0006426315789473684, + "low_lr": 1.285263157894737e-05, + "step": 679 + }, + { + "epoch": 1.7856673241288625, + "high_lr": 0.0006426315789473684, + "low_lr": 1.285263157894737e-05, + "step": 679 + }, + { + "epoch": 1.7856673241288625, + "high_lr": 0.0006426315789473684, + "low_lr": 1.285263157894737e-05, + "step": 679 + }, + { + "epoch": 1.7856673241288625, + "high_lr": 0.0006426315789473684, + "low_lr": 1.285263157894737e-05, + "step": 679 + }, + { + "epoch": 1.7882971729125576, + "grad_norm": 1.0867681503295898, + "learning_rate": 0.0006421052631578948, + "loss": 1.4809, + "step": 680 + }, + { + "epoch": 1.7882971729125576, + "high_lr": 0.0006421052631578948, + "low_lr": 1.2842105263157896e-05, + "step": 680 + }, + { + "epoch": 1.7882971729125576, + "high_lr": 0.0006421052631578948, + "low_lr": 1.2842105263157896e-05, + "step": 680 + }, + { + "epoch": 1.7882971729125576, + "high_lr": 0.0006421052631578948, + "low_lr": 1.2842105263157896e-05, + "step": 680 + }, + { + "epoch": 1.7882971729125576, + "high_lr": 0.0006421052631578948, + "low_lr": 1.2842105263157896e-05, + "step": 680 + }, + { + "epoch": 1.7882971729125576, + "high_lr": 0.0006421052631578948, + "low_lr": 1.2842105263157896e-05, + "step": 680 + }, + { + "epoch": 1.7882971729125576, + "high_lr": 0.0006421052631578948, + "low_lr": 1.2842105263157896e-05, + "step": 680 + }, + { + "epoch": 1.7882971729125576, + "high_lr": 0.0006421052631578948, + "low_lr": 1.2842105263157896e-05, + "step": 680 + }, + { + "epoch": 1.7882971729125576, + "high_lr": 0.0006421052631578948, + "low_lr": 1.2842105263157896e-05, + "step": 680 + }, + { + "epoch": 1.7909270216962525, + "grad_norm": 2.313262701034546, + "learning_rate": 0.0006415789473684211, + "loss": 1.4851, + "step": 681 + }, + { + "epoch": 1.7909270216962525, + "high_lr": 0.0006415789473684211, + "low_lr": 1.2831578947368422e-05, + "step": 681 + }, + { + "epoch": 1.7909270216962525, + "high_lr": 0.0006415789473684211, + "low_lr": 1.2831578947368422e-05, + "step": 681 + }, + { + "epoch": 1.7909270216962525, + "high_lr": 0.0006415789473684211, + "low_lr": 1.2831578947368422e-05, + "step": 681 + }, + { + "epoch": 1.7909270216962525, + "high_lr": 0.0006415789473684211, + "low_lr": 1.2831578947368422e-05, + "step": 681 + }, + { + "epoch": 1.7909270216962525, + "high_lr": 0.0006415789473684211, + "low_lr": 1.2831578947368422e-05, + "step": 681 + }, + { + "epoch": 1.7909270216962525, + "high_lr": 0.0006415789473684211, + "low_lr": 1.2831578947368422e-05, + "step": 681 + }, + { + "epoch": 1.7909270216962525, + "high_lr": 0.0006415789473684211, + "low_lr": 1.2831578947368422e-05, + "step": 681 + }, + { + "epoch": 1.7909270216962525, + "high_lr": 0.0006415789473684211, + "low_lr": 1.2831578947368422e-05, + "step": 681 + }, + { + "epoch": 1.7935568704799474, + "grad_norm": 1.1034647226333618, + "learning_rate": 0.0006410526315789474, + "loss": 1.4758, + "step": 682 + }, + { + "epoch": 1.7935568704799474, + "high_lr": 0.0006410526315789474, + "low_lr": 1.2821052631578947e-05, + "step": 682 + }, + { + "epoch": 1.7935568704799474, + "high_lr": 0.0006410526315789474, + "low_lr": 1.2821052631578947e-05, + "step": 682 + }, + { + "epoch": 1.7935568704799474, + "high_lr": 0.0006410526315789474, + "low_lr": 1.2821052631578947e-05, + "step": 682 + }, + { + "epoch": 1.7935568704799474, + "high_lr": 0.0006410526315789474, + "low_lr": 1.2821052631578947e-05, + "step": 682 + }, + { + "epoch": 1.7935568704799474, + "high_lr": 0.0006410526315789474, + "low_lr": 1.2821052631578947e-05, + "step": 682 + }, + { + "epoch": 1.7935568704799474, + "high_lr": 0.0006410526315789474, + "low_lr": 1.2821052631578947e-05, + "step": 682 + }, + { + "epoch": 1.7935568704799474, + "high_lr": 0.0006410526315789474, + "low_lr": 1.2821052631578947e-05, + "step": 682 + }, + { + "epoch": 1.7935568704799474, + "high_lr": 0.0006410526315789474, + "low_lr": 1.2821052631578947e-05, + "step": 682 + }, + { + "epoch": 1.7961867192636425, + "grad_norm": 1.1270182132720947, + "learning_rate": 0.0006405263157894738, + "loss": 1.4794, + "step": 683 + }, + { + "epoch": 1.7961867192636425, + "high_lr": 0.0006405263157894738, + "low_lr": 1.2810526315789475e-05, + "step": 683 + }, + { + "epoch": 1.7961867192636425, + "high_lr": 0.0006405263157894738, + "low_lr": 1.2810526315789475e-05, + "step": 683 + }, + { + "epoch": 1.7961867192636425, + "high_lr": 0.0006405263157894738, + "low_lr": 1.2810526315789475e-05, + "step": 683 + }, + { + "epoch": 1.7961867192636425, + "high_lr": 0.0006405263157894738, + "low_lr": 1.2810526315789475e-05, + "step": 683 + }, + { + "epoch": 1.7961867192636425, + "high_lr": 0.0006405263157894738, + "low_lr": 1.2810526315789475e-05, + "step": 683 + }, + { + "epoch": 1.7961867192636425, + "high_lr": 0.0006405263157894738, + "low_lr": 1.2810526315789475e-05, + "step": 683 + }, + { + "epoch": 1.7961867192636425, + "high_lr": 0.0006405263157894738, + "low_lr": 1.2810526315789475e-05, + "step": 683 + }, + { + "epoch": 1.7961867192636425, + "high_lr": 0.0006405263157894738, + "low_lr": 1.2810526315789475e-05, + "step": 683 + }, + { + "epoch": 1.7988165680473371, + "grad_norm": 1.1152960062026978, + "learning_rate": 0.00064, + "loss": 1.4582, + "step": 684 + }, + { + "epoch": 1.7988165680473371, + "high_lr": 0.00064, + "low_lr": 1.2800000000000001e-05, + "step": 684 + }, + { + "epoch": 1.7988165680473371, + "high_lr": 0.00064, + "low_lr": 1.2800000000000001e-05, + "step": 684 + }, + { + "epoch": 1.7988165680473371, + "high_lr": 0.00064, + "low_lr": 1.2800000000000001e-05, + "step": 684 + }, + { + "epoch": 1.7988165680473371, + "high_lr": 0.00064, + "low_lr": 1.2800000000000001e-05, + "step": 684 + }, + { + "epoch": 1.7988165680473371, + "high_lr": 0.00064, + "low_lr": 1.2800000000000001e-05, + "step": 684 + }, + { + "epoch": 1.7988165680473371, + "high_lr": 0.00064, + "low_lr": 1.2800000000000001e-05, + "step": 684 + }, + { + "epoch": 1.7988165680473371, + "high_lr": 0.00064, + "low_lr": 1.2800000000000001e-05, + "step": 684 + }, + { + "epoch": 1.7988165680473371, + "high_lr": 0.00064, + "low_lr": 1.2800000000000001e-05, + "step": 684 + }, + { + "epoch": 1.8014464168310322, + "grad_norm": 1.0992164611816406, + "learning_rate": 0.0006394736842105263, + "loss": 1.5235, + "step": 685 + }, + { + "epoch": 1.8014464168310322, + "high_lr": 0.0006394736842105263, + "low_lr": 1.2789473684210527e-05, + "step": 685 + }, + { + "epoch": 1.8014464168310322, + "high_lr": 0.0006394736842105263, + "low_lr": 1.2789473684210527e-05, + "step": 685 + }, + { + "epoch": 1.8014464168310322, + "high_lr": 0.0006394736842105263, + "low_lr": 1.2789473684210527e-05, + "step": 685 + }, + { + "epoch": 1.8014464168310322, + "high_lr": 0.0006394736842105263, + "low_lr": 1.2789473684210527e-05, + "step": 685 + }, + { + "epoch": 1.8014464168310322, + "high_lr": 0.0006394736842105263, + "low_lr": 1.2789473684210527e-05, + "step": 685 + }, + { + "epoch": 1.8014464168310322, + "high_lr": 0.0006394736842105263, + "low_lr": 1.2789473684210527e-05, + "step": 685 + }, + { + "epoch": 1.8014464168310322, + "high_lr": 0.0006394736842105263, + "low_lr": 1.2789473684210527e-05, + "step": 685 + }, + { + "epoch": 1.8014464168310322, + "high_lr": 0.0006394736842105263, + "low_lr": 1.2789473684210527e-05, + "step": 685 + }, + { + "epoch": 1.8040762656147271, + "grad_norm": 1.1349226236343384, + "learning_rate": 0.0006389473684210526, + "loss": 1.4314, + "step": 686 + }, + { + "epoch": 1.8040762656147271, + "high_lr": 0.0006389473684210526, + "low_lr": 1.2778947368421054e-05, + "step": 686 + }, + { + "epoch": 1.8040762656147271, + "high_lr": 0.0006389473684210526, + "low_lr": 1.2778947368421054e-05, + "step": 686 + }, + { + "epoch": 1.8040762656147271, + "high_lr": 0.0006389473684210526, + "low_lr": 1.2778947368421054e-05, + "step": 686 + }, + { + "epoch": 1.8040762656147271, + "high_lr": 0.0006389473684210526, + "low_lr": 1.2778947368421054e-05, + "step": 686 + }, + { + "epoch": 1.8040762656147271, + "high_lr": 0.0006389473684210526, + "low_lr": 1.2778947368421054e-05, + "step": 686 + }, + { + "epoch": 1.8040762656147271, + "high_lr": 0.0006389473684210526, + "low_lr": 1.2778947368421054e-05, + "step": 686 + }, + { + "epoch": 1.8040762656147271, + "high_lr": 0.0006389473684210526, + "low_lr": 1.2778947368421054e-05, + "step": 686 + }, + { + "epoch": 1.8040762656147271, + "high_lr": 0.0006389473684210526, + "low_lr": 1.2778947368421054e-05, + "step": 686 + }, + { + "epoch": 1.806706114398422, + "grad_norm": 1.1309823989868164, + "learning_rate": 0.000638421052631579, + "loss": 1.453, + "step": 687 + }, + { + "epoch": 1.806706114398422, + "high_lr": 0.000638421052631579, + "low_lr": 1.2768421052631582e-05, + "step": 687 + }, + { + "epoch": 1.806706114398422, + "high_lr": 0.000638421052631579, + "low_lr": 1.2768421052631582e-05, + "step": 687 + }, + { + "epoch": 1.806706114398422, + "high_lr": 0.000638421052631579, + "low_lr": 1.2768421052631582e-05, + "step": 687 + }, + { + "epoch": 1.806706114398422, + "high_lr": 0.000638421052631579, + "low_lr": 1.2768421052631582e-05, + "step": 687 + }, + { + "epoch": 1.806706114398422, + "high_lr": 0.000638421052631579, + "low_lr": 1.2768421052631582e-05, + "step": 687 + }, + { + "epoch": 1.806706114398422, + "high_lr": 0.000638421052631579, + "low_lr": 1.2768421052631582e-05, + "step": 687 + }, + { + "epoch": 1.806706114398422, + "high_lr": 0.000638421052631579, + "low_lr": 1.2768421052631582e-05, + "step": 687 + }, + { + "epoch": 1.806706114398422, + "high_lr": 0.000638421052631579, + "low_lr": 1.2768421052631582e-05, + "step": 687 + }, + { + "epoch": 1.809335963182117, + "grad_norm": 1.1155540943145752, + "learning_rate": 0.0006378947368421053, + "loss": 1.4349, + "step": 688 + }, + { + "epoch": 1.809335963182117, + "high_lr": 0.0006378947368421053, + "low_lr": 1.2757894736842106e-05, + "step": 688 + }, + { + "epoch": 1.809335963182117, + "high_lr": 0.0006378947368421053, + "low_lr": 1.2757894736842106e-05, + "step": 688 + }, + { + "epoch": 1.809335963182117, + "high_lr": 0.0006378947368421053, + "low_lr": 1.2757894736842106e-05, + "step": 688 + }, + { + "epoch": 1.809335963182117, + "high_lr": 0.0006378947368421053, + "low_lr": 1.2757894736842106e-05, + "step": 688 + }, + { + "epoch": 1.809335963182117, + "high_lr": 0.0006378947368421053, + "low_lr": 1.2757894736842106e-05, + "step": 688 + }, + { + "epoch": 1.809335963182117, + "high_lr": 0.0006378947368421053, + "low_lr": 1.2757894736842106e-05, + "step": 688 + }, + { + "epoch": 1.809335963182117, + "high_lr": 0.0006378947368421053, + "low_lr": 1.2757894736842106e-05, + "step": 688 + }, + { + "epoch": 1.809335963182117, + "high_lr": 0.0006378947368421053, + "low_lr": 1.2757894736842106e-05, + "step": 688 + }, + { + "epoch": 1.811965811965812, + "grad_norm": 1.0642731189727783, + "learning_rate": 0.0006373684210526316, + "loss": 1.4595, + "step": 689 + }, + { + "epoch": 1.811965811965812, + "high_lr": 0.0006373684210526316, + "low_lr": 1.2747368421052633e-05, + "step": 689 + }, + { + "epoch": 1.811965811965812, + "high_lr": 0.0006373684210526316, + "low_lr": 1.2747368421052633e-05, + "step": 689 + }, + { + "epoch": 1.811965811965812, + "high_lr": 0.0006373684210526316, + "low_lr": 1.2747368421052633e-05, + "step": 689 + }, + { + "epoch": 1.811965811965812, + "high_lr": 0.0006373684210526316, + "low_lr": 1.2747368421052633e-05, + "step": 689 + }, + { + "epoch": 1.811965811965812, + "high_lr": 0.0006373684210526316, + "low_lr": 1.2747368421052633e-05, + "step": 689 + }, + { + "epoch": 1.811965811965812, + "high_lr": 0.0006373684210526316, + "low_lr": 1.2747368421052633e-05, + "step": 689 + }, + { + "epoch": 1.811965811965812, + "high_lr": 0.0006373684210526316, + "low_lr": 1.2747368421052633e-05, + "step": 689 + }, + { + "epoch": 1.811965811965812, + "high_lr": 0.0006373684210526316, + "low_lr": 1.2747368421052633e-05, + "step": 689 + }, + { + "epoch": 1.8145956607495068, + "grad_norm": 1.1135817766189575, + "learning_rate": 0.0006368421052631579, + "loss": 1.5065, + "step": 690 + }, + { + "epoch": 1.8145956607495068, + "high_lr": 0.0006368421052631579, + "low_lr": 1.2736842105263159e-05, + "step": 690 + }, + { + "epoch": 1.8145956607495068, + "high_lr": 0.0006368421052631579, + "low_lr": 1.2736842105263159e-05, + "step": 690 + }, + { + "epoch": 1.8145956607495068, + "high_lr": 0.0006368421052631579, + "low_lr": 1.2736842105263159e-05, + "step": 690 + }, + { + "epoch": 1.8145956607495068, + "high_lr": 0.0006368421052631579, + "low_lr": 1.2736842105263159e-05, + "step": 690 + }, + { + "epoch": 1.8145956607495068, + "high_lr": 0.0006368421052631579, + "low_lr": 1.2736842105263159e-05, + "step": 690 + }, + { + "epoch": 1.8145956607495068, + "high_lr": 0.0006368421052631579, + "low_lr": 1.2736842105263159e-05, + "step": 690 + }, + { + "epoch": 1.8145956607495068, + "high_lr": 0.0006368421052631579, + "low_lr": 1.2736842105263159e-05, + "step": 690 + }, + { + "epoch": 1.8145956607495068, + "high_lr": 0.0006368421052631579, + "low_lr": 1.2736842105263159e-05, + "step": 690 + }, + { + "epoch": 1.817225509533202, + "grad_norm": 1.1606158018112183, + "learning_rate": 0.0006363157894736842, + "loss": 1.4447, + "step": 691 + }, + { + "epoch": 1.817225509533202, + "high_lr": 0.0006363157894736842, + "low_lr": 1.2726315789473684e-05, + "step": 691 + }, + { + "epoch": 1.817225509533202, + "high_lr": 0.0006363157894736842, + "low_lr": 1.2726315789473684e-05, + "step": 691 + }, + { + "epoch": 1.817225509533202, + "high_lr": 0.0006363157894736842, + "low_lr": 1.2726315789473684e-05, + "step": 691 + }, + { + "epoch": 1.817225509533202, + "high_lr": 0.0006363157894736842, + "low_lr": 1.2726315789473684e-05, + "step": 691 + }, + { + "epoch": 1.817225509533202, + "high_lr": 0.0006363157894736842, + "low_lr": 1.2726315789473684e-05, + "step": 691 + }, + { + "epoch": 1.817225509533202, + "high_lr": 0.0006363157894736842, + "low_lr": 1.2726315789473684e-05, + "step": 691 + }, + { + "epoch": 1.817225509533202, + "high_lr": 0.0006363157894736842, + "low_lr": 1.2726315789473684e-05, + "step": 691 + }, + { + "epoch": 1.817225509533202, + "high_lr": 0.0006363157894736842, + "low_lr": 1.2726315789473684e-05, + "step": 691 + }, + { + "epoch": 1.8198553583168968, + "grad_norm": 1.2284473180770874, + "learning_rate": 0.0006357894736842106, + "loss": 1.5534, + "step": 692 + }, + { + "epoch": 1.8198553583168968, + "high_lr": 0.0006357894736842106, + "low_lr": 1.2715789473684212e-05, + "step": 692 + }, + { + "epoch": 1.8198553583168968, + "high_lr": 0.0006357894736842106, + "low_lr": 1.2715789473684212e-05, + "step": 692 + }, + { + "epoch": 1.8198553583168968, + "high_lr": 0.0006357894736842106, + "low_lr": 1.2715789473684212e-05, + "step": 692 + }, + { + "epoch": 1.8198553583168968, + "high_lr": 0.0006357894736842106, + "low_lr": 1.2715789473684212e-05, + "step": 692 + }, + { + "epoch": 1.8198553583168968, + "high_lr": 0.0006357894736842106, + "low_lr": 1.2715789473684212e-05, + "step": 692 + }, + { + "epoch": 1.8198553583168968, + "high_lr": 0.0006357894736842106, + "low_lr": 1.2715789473684212e-05, + "step": 692 + }, + { + "epoch": 1.8198553583168968, + "high_lr": 0.0006357894736842106, + "low_lr": 1.2715789473684212e-05, + "step": 692 + }, + { + "epoch": 1.8198553583168968, + "high_lr": 0.0006357894736842106, + "low_lr": 1.2715789473684212e-05, + "step": 692 + }, + { + "epoch": 1.8224852071005917, + "grad_norm": 1.128949522972107, + "learning_rate": 0.0006352631578947368, + "loss": 1.471, + "step": 693 + }, + { + "epoch": 1.8224852071005917, + "high_lr": 0.0006352631578947368, + "low_lr": 1.2705263157894738e-05, + "step": 693 + }, + { + "epoch": 1.8224852071005917, + "high_lr": 0.0006352631578947368, + "low_lr": 1.2705263157894738e-05, + "step": 693 + }, + { + "epoch": 1.8224852071005917, + "high_lr": 0.0006352631578947368, + "low_lr": 1.2705263157894738e-05, + "step": 693 + }, + { + "epoch": 1.8224852071005917, + "high_lr": 0.0006352631578947368, + "low_lr": 1.2705263157894738e-05, + "step": 693 + }, + { + "epoch": 1.8224852071005917, + "high_lr": 0.0006352631578947368, + "low_lr": 1.2705263157894738e-05, + "step": 693 + }, + { + "epoch": 1.8224852071005917, + "high_lr": 0.0006352631578947368, + "low_lr": 1.2705263157894738e-05, + "step": 693 + }, + { + "epoch": 1.8224852071005917, + "high_lr": 0.0006352631578947368, + "low_lr": 1.2705263157894738e-05, + "step": 693 + }, + { + "epoch": 1.8224852071005917, + "high_lr": 0.0006352631578947368, + "low_lr": 1.2705263157894738e-05, + "step": 693 + }, + { + "epoch": 1.8251150558842868, + "grad_norm": 1.2049524784088135, + "learning_rate": 0.0006347368421052631, + "loss": 1.4575, + "step": 694 + }, + { + "epoch": 1.8251150558842868, + "high_lr": 0.0006347368421052631, + "low_lr": 1.2694736842105264e-05, + "step": 694 + }, + { + "epoch": 1.8251150558842868, + "high_lr": 0.0006347368421052631, + "low_lr": 1.2694736842105264e-05, + "step": 694 + }, + { + "epoch": 1.8251150558842868, + "high_lr": 0.0006347368421052631, + "low_lr": 1.2694736842105264e-05, + "step": 694 + }, + { + "epoch": 1.8251150558842868, + "high_lr": 0.0006347368421052631, + "low_lr": 1.2694736842105264e-05, + "step": 694 + }, + { + "epoch": 1.8251150558842868, + "high_lr": 0.0006347368421052631, + "low_lr": 1.2694736842105264e-05, + "step": 694 + }, + { + "epoch": 1.8251150558842868, + "high_lr": 0.0006347368421052631, + "low_lr": 1.2694736842105264e-05, + "step": 694 + }, + { + "epoch": 1.8251150558842868, + "high_lr": 0.0006347368421052631, + "low_lr": 1.2694736842105264e-05, + "step": 694 + }, + { + "epoch": 1.8251150558842868, + "high_lr": 0.0006347368421052631, + "low_lr": 1.2694736842105264e-05, + "step": 694 + }, + { + "epoch": 1.8277449046679815, + "grad_norm": 1.0460563898086548, + "learning_rate": 0.0006342105263157894, + "loss": 1.4718, + "step": 695 + }, + { + "epoch": 1.8277449046679815, + "high_lr": 0.0006342105263157894, + "low_lr": 1.268421052631579e-05, + "step": 695 + }, + { + "epoch": 1.8277449046679815, + "high_lr": 0.0006342105263157894, + "low_lr": 1.268421052631579e-05, + "step": 695 + }, + { + "epoch": 1.8277449046679815, + "high_lr": 0.0006342105263157894, + "low_lr": 1.268421052631579e-05, + "step": 695 + }, + { + "epoch": 1.8277449046679815, + "high_lr": 0.0006342105263157894, + "low_lr": 1.268421052631579e-05, + "step": 695 + }, + { + "epoch": 1.8277449046679815, + "high_lr": 0.0006342105263157894, + "low_lr": 1.268421052631579e-05, + "step": 695 + }, + { + "epoch": 1.8277449046679815, + "high_lr": 0.0006342105263157894, + "low_lr": 1.268421052631579e-05, + "step": 695 + }, + { + "epoch": 1.8277449046679815, + "high_lr": 0.0006342105263157894, + "low_lr": 1.268421052631579e-05, + "step": 695 + }, + { + "epoch": 1.8277449046679815, + "high_lr": 0.0006342105263157894, + "low_lr": 1.268421052631579e-05, + "step": 695 + }, + { + "epoch": 1.8303747534516766, + "grad_norm": 1.1107012033462524, + "learning_rate": 0.0006336842105263157, + "loss": 1.409, + "step": 696 + }, + { + "epoch": 1.8303747534516766, + "high_lr": 0.0006336842105263157, + "low_lr": 1.2673684210526315e-05, + "step": 696 + }, + { + "epoch": 1.8303747534516766, + "high_lr": 0.0006336842105263157, + "low_lr": 1.2673684210526315e-05, + "step": 696 + }, + { + "epoch": 1.8303747534516766, + "high_lr": 0.0006336842105263157, + "low_lr": 1.2673684210526315e-05, + "step": 696 + }, + { + "epoch": 1.8303747534516766, + "high_lr": 0.0006336842105263157, + "low_lr": 1.2673684210526315e-05, + "step": 696 + }, + { + "epoch": 1.8303747534516766, + "high_lr": 0.0006336842105263157, + "low_lr": 1.2673684210526315e-05, + "step": 696 + }, + { + "epoch": 1.8303747534516766, + "high_lr": 0.0006336842105263157, + "low_lr": 1.2673684210526315e-05, + "step": 696 + }, + { + "epoch": 1.8303747534516766, + "high_lr": 0.0006336842105263157, + "low_lr": 1.2673684210526315e-05, + "step": 696 + }, + { + "epoch": 1.8303747534516766, + "high_lr": 0.0006336842105263157, + "low_lr": 1.2673684210526315e-05, + "step": 696 + }, + { + "epoch": 1.8330046022353714, + "grad_norm": 1.090254783630371, + "learning_rate": 0.0006331578947368422, + "loss": 1.4258, + "step": 697 + }, + { + "epoch": 1.8330046022353714, + "high_lr": 0.0006331578947368422, + "low_lr": 1.2663157894736843e-05, + "step": 697 + }, + { + "epoch": 1.8330046022353714, + "high_lr": 0.0006331578947368422, + "low_lr": 1.2663157894736843e-05, + "step": 697 + }, + { + "epoch": 1.8330046022353714, + "high_lr": 0.0006331578947368422, + "low_lr": 1.2663157894736843e-05, + "step": 697 + }, + { + "epoch": 1.8330046022353714, + "high_lr": 0.0006331578947368422, + "low_lr": 1.2663157894736843e-05, + "step": 697 + }, + { + "epoch": 1.8330046022353714, + "high_lr": 0.0006331578947368422, + "low_lr": 1.2663157894736843e-05, + "step": 697 + }, + { + "epoch": 1.8330046022353714, + "high_lr": 0.0006331578947368422, + "low_lr": 1.2663157894736843e-05, + "step": 697 + }, + { + "epoch": 1.8330046022353714, + "high_lr": 0.0006331578947368422, + "low_lr": 1.2663157894736843e-05, + "step": 697 + }, + { + "epoch": 1.8330046022353714, + "high_lr": 0.0006331578947368422, + "low_lr": 1.2663157894736843e-05, + "step": 697 + }, + { + "epoch": 1.8356344510190663, + "grad_norm": 1.116258978843689, + "learning_rate": 0.0006326315789473685, + "loss": 1.4292, + "step": 698 + }, + { + "epoch": 1.8356344510190663, + "high_lr": 0.0006326315789473685, + "low_lr": 1.265263157894737e-05, + "step": 698 + }, + { + "epoch": 1.8356344510190663, + "high_lr": 0.0006326315789473685, + "low_lr": 1.265263157894737e-05, + "step": 698 + }, + { + "epoch": 1.8356344510190663, + "high_lr": 0.0006326315789473685, + "low_lr": 1.265263157894737e-05, + "step": 698 + }, + { + "epoch": 1.8356344510190663, + "high_lr": 0.0006326315789473685, + "low_lr": 1.265263157894737e-05, + "step": 698 + }, + { + "epoch": 1.8356344510190663, + "high_lr": 0.0006326315789473685, + "low_lr": 1.265263157894737e-05, + "step": 698 + }, + { + "epoch": 1.8356344510190663, + "high_lr": 0.0006326315789473685, + "low_lr": 1.265263157894737e-05, + "step": 698 + }, + { + "epoch": 1.8356344510190663, + "high_lr": 0.0006326315789473685, + "low_lr": 1.265263157894737e-05, + "step": 698 + }, + { + "epoch": 1.8356344510190663, + "high_lr": 0.0006326315789473685, + "low_lr": 1.265263157894737e-05, + "step": 698 + }, + { + "epoch": 1.8382642998027614, + "grad_norm": 1.164453387260437, + "learning_rate": 0.0006321052631578948, + "loss": 1.4696, + "step": 699 + }, + { + "epoch": 1.8382642998027614, + "high_lr": 0.0006321052631578948, + "low_lr": 1.2642105263157896e-05, + "step": 699 + }, + { + "epoch": 1.8382642998027614, + "high_lr": 0.0006321052631578948, + "low_lr": 1.2642105263157896e-05, + "step": 699 + }, + { + "epoch": 1.8382642998027614, + "high_lr": 0.0006321052631578948, + "low_lr": 1.2642105263157896e-05, + "step": 699 + }, + { + "epoch": 1.8382642998027614, + "high_lr": 0.0006321052631578948, + "low_lr": 1.2642105263157896e-05, + "step": 699 + }, + { + "epoch": 1.8382642998027614, + "high_lr": 0.0006321052631578948, + "low_lr": 1.2642105263157896e-05, + "step": 699 + }, + { + "epoch": 1.8382642998027614, + "high_lr": 0.0006321052631578948, + "low_lr": 1.2642105263157896e-05, + "step": 699 + }, + { + "epoch": 1.8382642998027614, + "high_lr": 0.0006321052631578948, + "low_lr": 1.2642105263157896e-05, + "step": 699 + }, + { + "epoch": 1.8382642998027614, + "high_lr": 0.0006321052631578948, + "low_lr": 1.2642105263157896e-05, + "step": 699 + }, + { + "epoch": 1.8408941485864563, + "grad_norm": 1.1327639818191528, + "learning_rate": 0.0006315789473684211, + "loss": 1.4706, + "step": 700 + }, + { + "epoch": 1.8408941485864563, + "high_lr": 0.0006315789473684211, + "low_lr": 1.263157894736842e-05, + "step": 700 + }, + { + "epoch": 1.8408941485864563, + "high_lr": 0.0006315789473684211, + "low_lr": 1.263157894736842e-05, + "step": 700 + }, + { + "epoch": 1.8408941485864563, + "high_lr": 0.0006315789473684211, + "low_lr": 1.263157894736842e-05, + "step": 700 + }, + { + "epoch": 1.8408941485864563, + "high_lr": 0.0006315789473684211, + "low_lr": 1.263157894736842e-05, + "step": 700 + }, + { + "epoch": 1.8408941485864563, + "high_lr": 0.0006315789473684211, + "low_lr": 1.263157894736842e-05, + "step": 700 + }, + { + "epoch": 1.8408941485864563, + "high_lr": 0.0006315789473684211, + "low_lr": 1.263157894736842e-05, + "step": 700 + }, + { + "epoch": 1.8408941485864563, + "high_lr": 0.0006315789473684211, + "low_lr": 1.263157894736842e-05, + "step": 700 + }, + { + "epoch": 1.8408941485864563, + "high_lr": 0.0006315789473684211, + "low_lr": 1.263157894736842e-05, + "step": 700 + }, + { + "epoch": 1.8435239973701512, + "grad_norm": 1.1588438749313354, + "learning_rate": 0.0006310526315789475, + "loss": 1.4673, + "step": 701 + }, + { + "epoch": 1.8435239973701512, + "high_lr": 0.0006310526315789475, + "low_lr": 1.2621052631578949e-05, + "step": 701 + }, + { + "epoch": 1.8435239973701512, + "high_lr": 0.0006310526315789475, + "low_lr": 1.2621052631578949e-05, + "step": 701 + }, + { + "epoch": 1.8435239973701512, + "high_lr": 0.0006310526315789475, + "low_lr": 1.2621052631578949e-05, + "step": 701 + }, + { + "epoch": 1.8435239973701512, + "high_lr": 0.0006310526315789475, + "low_lr": 1.2621052631578949e-05, + "step": 701 + }, + { + "epoch": 1.8435239973701512, + "high_lr": 0.0006310526315789475, + "low_lr": 1.2621052631578949e-05, + "step": 701 + }, + { + "epoch": 1.8435239973701512, + "high_lr": 0.0006310526315789475, + "low_lr": 1.2621052631578949e-05, + "step": 701 + }, + { + "epoch": 1.8435239973701512, + "high_lr": 0.0006310526315789475, + "low_lr": 1.2621052631578949e-05, + "step": 701 + }, + { + "epoch": 1.8435239973701512, + "high_lr": 0.0006310526315789475, + "low_lr": 1.2621052631578949e-05, + "step": 701 + }, + { + "epoch": 1.8461538461538463, + "grad_norm": 1.1653773784637451, + "learning_rate": 0.0006305263157894737, + "loss": 1.4839, + "step": 702 + }, + { + "epoch": 1.8461538461538463, + "high_lr": 0.0006305263157894737, + "low_lr": 1.2610526315789475e-05, + "step": 702 + }, + { + "epoch": 1.8461538461538463, + "high_lr": 0.0006305263157894737, + "low_lr": 1.2610526315789475e-05, + "step": 702 + }, + { + "epoch": 1.8461538461538463, + "high_lr": 0.0006305263157894737, + "low_lr": 1.2610526315789475e-05, + "step": 702 + }, + { + "epoch": 1.8461538461538463, + "high_lr": 0.0006305263157894737, + "low_lr": 1.2610526315789475e-05, + "step": 702 + }, + { + "epoch": 1.8461538461538463, + "high_lr": 0.0006305263157894737, + "low_lr": 1.2610526315789475e-05, + "step": 702 + }, + { + "epoch": 1.8461538461538463, + "high_lr": 0.0006305263157894737, + "low_lr": 1.2610526315789475e-05, + "step": 702 + }, + { + "epoch": 1.8461538461538463, + "high_lr": 0.0006305263157894737, + "low_lr": 1.2610526315789475e-05, + "step": 702 + }, + { + "epoch": 1.8461538461538463, + "high_lr": 0.0006305263157894737, + "low_lr": 1.2610526315789475e-05, + "step": 702 + }, + { + "epoch": 1.8487836949375411, + "grad_norm": 1.1697614192962646, + "learning_rate": 0.00063, + "loss": 1.4751, + "step": 703 + }, + { + "epoch": 1.8487836949375411, + "high_lr": 0.00063, + "low_lr": 1.2600000000000001e-05, + "step": 703 + }, + { + "epoch": 1.8487836949375411, + "high_lr": 0.00063, + "low_lr": 1.2600000000000001e-05, + "step": 703 + }, + { + "epoch": 1.8487836949375411, + "high_lr": 0.00063, + "low_lr": 1.2600000000000001e-05, + "step": 703 + }, + { + "epoch": 1.8487836949375411, + "high_lr": 0.00063, + "low_lr": 1.2600000000000001e-05, + "step": 703 + }, + { + "epoch": 1.8487836949375411, + "high_lr": 0.00063, + "low_lr": 1.2600000000000001e-05, + "step": 703 + }, + { + "epoch": 1.8487836949375411, + "high_lr": 0.00063, + "low_lr": 1.2600000000000001e-05, + "step": 703 + }, + { + "epoch": 1.8487836949375411, + "high_lr": 0.00063, + "low_lr": 1.2600000000000001e-05, + "step": 703 + }, + { + "epoch": 1.8487836949375411, + "high_lr": 0.00063, + "low_lr": 1.2600000000000001e-05, + "step": 703 + }, + { + "epoch": 1.851413543721236, + "grad_norm": 1.1886961460113525, + "learning_rate": 0.0006294736842105263, + "loss": 1.5048, + "step": 704 + }, + { + "epoch": 1.851413543721236, + "high_lr": 0.0006294736842105263, + "low_lr": 1.2589473684210528e-05, + "step": 704 + }, + { + "epoch": 1.851413543721236, + "high_lr": 0.0006294736842105263, + "low_lr": 1.2589473684210528e-05, + "step": 704 + }, + { + "epoch": 1.851413543721236, + "high_lr": 0.0006294736842105263, + "low_lr": 1.2589473684210528e-05, + "step": 704 + }, + { + "epoch": 1.851413543721236, + "high_lr": 0.0006294736842105263, + "low_lr": 1.2589473684210528e-05, + "step": 704 + }, + { + "epoch": 1.851413543721236, + "high_lr": 0.0006294736842105263, + "low_lr": 1.2589473684210528e-05, + "step": 704 + }, + { + "epoch": 1.851413543721236, + "high_lr": 0.0006294736842105263, + "low_lr": 1.2589473684210528e-05, + "step": 704 + }, + { + "epoch": 1.851413543721236, + "high_lr": 0.0006294736842105263, + "low_lr": 1.2589473684210528e-05, + "step": 704 + }, + { + "epoch": 1.851413543721236, + "high_lr": 0.0006294736842105263, + "low_lr": 1.2589473684210528e-05, + "step": 704 + }, + { + "epoch": 1.854043392504931, + "grad_norm": 1.1062883138656616, + "learning_rate": 0.0006289473684210526, + "loss": 1.4738, + "step": 705 + }, + { + "epoch": 1.854043392504931, + "high_lr": 0.0006289473684210526, + "low_lr": 1.2578947368421052e-05, + "step": 705 + }, + { + "epoch": 1.854043392504931, + "high_lr": 0.0006289473684210526, + "low_lr": 1.2578947368421052e-05, + "step": 705 + }, + { + "epoch": 1.854043392504931, + "high_lr": 0.0006289473684210526, + "low_lr": 1.2578947368421052e-05, + "step": 705 + }, + { + "epoch": 1.854043392504931, + "high_lr": 0.0006289473684210526, + "low_lr": 1.2578947368421052e-05, + "step": 705 + }, + { + "epoch": 1.854043392504931, + "high_lr": 0.0006289473684210526, + "low_lr": 1.2578947368421052e-05, + "step": 705 + }, + { + "epoch": 1.854043392504931, + "high_lr": 0.0006289473684210526, + "low_lr": 1.2578947368421052e-05, + "step": 705 + }, + { + "epoch": 1.854043392504931, + "high_lr": 0.0006289473684210526, + "low_lr": 1.2578947368421052e-05, + "step": 705 + }, + { + "epoch": 1.854043392504931, + "high_lr": 0.0006289473684210526, + "low_lr": 1.2578947368421052e-05, + "step": 705 + }, + { + "epoch": 1.8566732412886258, + "grad_norm": 1.1155682802200317, + "learning_rate": 0.000628421052631579, + "loss": 1.481, + "step": 706 + }, + { + "epoch": 1.8566732412886258, + "high_lr": 0.000628421052631579, + "low_lr": 1.256842105263158e-05, + "step": 706 + }, + { + "epoch": 1.8566732412886258, + "high_lr": 0.000628421052631579, + "low_lr": 1.256842105263158e-05, + "step": 706 + }, + { + "epoch": 1.8566732412886258, + "high_lr": 0.000628421052631579, + "low_lr": 1.256842105263158e-05, + "step": 706 + }, + { + "epoch": 1.8566732412886258, + "high_lr": 0.000628421052631579, + "low_lr": 1.256842105263158e-05, + "step": 706 + }, + { + "epoch": 1.8566732412886258, + "high_lr": 0.000628421052631579, + "low_lr": 1.256842105263158e-05, + "step": 706 + }, + { + "epoch": 1.8566732412886258, + "high_lr": 0.000628421052631579, + "low_lr": 1.256842105263158e-05, + "step": 706 + }, + { + "epoch": 1.8566732412886258, + "high_lr": 0.000628421052631579, + "low_lr": 1.256842105263158e-05, + "step": 706 + }, + { + "epoch": 1.8566732412886258, + "high_lr": 0.000628421052631579, + "low_lr": 1.256842105263158e-05, + "step": 706 + }, + { + "epoch": 1.8593030900723209, + "grad_norm": 1.0740604400634766, + "learning_rate": 0.0006278947368421053, + "loss": 1.4829, + "step": 707 + }, + { + "epoch": 1.8593030900723209, + "high_lr": 0.0006278947368421053, + "low_lr": 1.2557894736842107e-05, + "step": 707 + }, + { + "epoch": 1.8593030900723209, + "high_lr": 0.0006278947368421053, + "low_lr": 1.2557894736842107e-05, + "step": 707 + }, + { + "epoch": 1.8593030900723209, + "high_lr": 0.0006278947368421053, + "low_lr": 1.2557894736842107e-05, + "step": 707 + }, + { + "epoch": 1.8593030900723209, + "high_lr": 0.0006278947368421053, + "low_lr": 1.2557894736842107e-05, + "step": 707 + }, + { + "epoch": 1.8593030900723209, + "high_lr": 0.0006278947368421053, + "low_lr": 1.2557894736842107e-05, + "step": 707 + }, + { + "epoch": 1.8593030900723209, + "high_lr": 0.0006278947368421053, + "low_lr": 1.2557894736842107e-05, + "step": 707 + }, + { + "epoch": 1.8593030900723209, + "high_lr": 0.0006278947368421053, + "low_lr": 1.2557894736842107e-05, + "step": 707 + }, + { + "epoch": 1.8593030900723209, + "high_lr": 0.0006278947368421053, + "low_lr": 1.2557894736842107e-05, + "step": 707 + }, + { + "epoch": 1.8619329388560157, + "grad_norm": 1.1394215822219849, + "learning_rate": 0.0006273684210526316, + "loss": 1.5019, + "step": 708 + }, + { + "epoch": 1.8619329388560157, + "high_lr": 0.0006273684210526316, + "low_lr": 1.2547368421052633e-05, + "step": 708 + }, + { + "epoch": 1.8619329388560157, + "high_lr": 0.0006273684210526316, + "low_lr": 1.2547368421052633e-05, + "step": 708 + }, + { + "epoch": 1.8619329388560157, + "high_lr": 0.0006273684210526316, + "low_lr": 1.2547368421052633e-05, + "step": 708 + }, + { + "epoch": 1.8619329388560157, + "high_lr": 0.0006273684210526316, + "low_lr": 1.2547368421052633e-05, + "step": 708 + }, + { + "epoch": 1.8619329388560157, + "high_lr": 0.0006273684210526316, + "low_lr": 1.2547368421052633e-05, + "step": 708 + }, + { + "epoch": 1.8619329388560157, + "high_lr": 0.0006273684210526316, + "low_lr": 1.2547368421052633e-05, + "step": 708 + }, + { + "epoch": 1.8619329388560157, + "high_lr": 0.0006273684210526316, + "low_lr": 1.2547368421052633e-05, + "step": 708 + }, + { + "epoch": 1.8619329388560157, + "high_lr": 0.0006273684210526316, + "low_lr": 1.2547368421052633e-05, + "step": 708 + }, + { + "epoch": 1.8645627876397106, + "grad_norm": 1.1655924320220947, + "learning_rate": 0.0006268421052631578, + "loss": 1.4564, + "step": 709 + }, + { + "epoch": 1.8645627876397106, + "high_lr": 0.0006268421052631578, + "low_lr": 1.2536842105263158e-05, + "step": 709 + }, + { + "epoch": 1.8645627876397106, + "high_lr": 0.0006268421052631578, + "low_lr": 1.2536842105263158e-05, + "step": 709 + }, + { + "epoch": 1.8645627876397106, + "high_lr": 0.0006268421052631578, + "low_lr": 1.2536842105263158e-05, + "step": 709 + }, + { + "epoch": 1.8645627876397106, + "high_lr": 0.0006268421052631578, + "low_lr": 1.2536842105263158e-05, + "step": 709 + }, + { + "epoch": 1.8645627876397106, + "high_lr": 0.0006268421052631578, + "low_lr": 1.2536842105263158e-05, + "step": 709 + }, + { + "epoch": 1.8645627876397106, + "high_lr": 0.0006268421052631578, + "low_lr": 1.2536842105263158e-05, + "step": 709 + }, + { + "epoch": 1.8645627876397106, + "high_lr": 0.0006268421052631578, + "low_lr": 1.2536842105263158e-05, + "step": 709 + }, + { + "epoch": 1.8645627876397106, + "high_lr": 0.0006268421052631578, + "low_lr": 1.2536842105263158e-05, + "step": 709 + }, + { + "epoch": 1.8671926364234057, + "grad_norm": 1.150449275970459, + "learning_rate": 0.0006263157894736841, + "loss": 1.4662, + "step": 710 + }, + { + "epoch": 1.8671926364234057, + "high_lr": 0.0006263157894736841, + "low_lr": 1.2526315789473684e-05, + "step": 710 + }, + { + "epoch": 1.8671926364234057, + "high_lr": 0.0006263157894736841, + "low_lr": 1.2526315789473684e-05, + "step": 710 + }, + { + "epoch": 1.8671926364234057, + "high_lr": 0.0006263157894736841, + "low_lr": 1.2526315789473684e-05, + "step": 710 + }, + { + "epoch": 1.8671926364234057, + "high_lr": 0.0006263157894736841, + "low_lr": 1.2526315789473684e-05, + "step": 710 + }, + { + "epoch": 1.8671926364234057, + "high_lr": 0.0006263157894736841, + "low_lr": 1.2526315789473684e-05, + "step": 710 + }, + { + "epoch": 1.8671926364234057, + "high_lr": 0.0006263157894736841, + "low_lr": 1.2526315789473684e-05, + "step": 710 + }, + { + "epoch": 1.8671926364234057, + "high_lr": 0.0006263157894736841, + "low_lr": 1.2526315789473684e-05, + "step": 710 + }, + { + "epoch": 1.8671926364234057, + "high_lr": 0.0006263157894736841, + "low_lr": 1.2526315789473684e-05, + "step": 710 + }, + { + "epoch": 1.8698224852071006, + "grad_norm": 1.20257568359375, + "learning_rate": 0.0006257894736842105, + "loss": 1.4665, + "step": 711 + }, + { + "epoch": 1.8698224852071006, + "high_lr": 0.0006257894736842105, + "low_lr": 1.2515789473684212e-05, + "step": 711 + }, + { + "epoch": 1.8698224852071006, + "high_lr": 0.0006257894736842105, + "low_lr": 1.2515789473684212e-05, + "step": 711 + }, + { + "epoch": 1.8698224852071006, + "high_lr": 0.0006257894736842105, + "low_lr": 1.2515789473684212e-05, + "step": 711 + }, + { + "epoch": 1.8698224852071006, + "high_lr": 0.0006257894736842105, + "low_lr": 1.2515789473684212e-05, + "step": 711 + }, + { + "epoch": 1.8698224852071006, + "high_lr": 0.0006257894736842105, + "low_lr": 1.2515789473684212e-05, + "step": 711 + }, + { + "epoch": 1.8698224852071006, + "high_lr": 0.0006257894736842105, + "low_lr": 1.2515789473684212e-05, + "step": 711 + }, + { + "epoch": 1.8698224852071006, + "high_lr": 0.0006257894736842105, + "low_lr": 1.2515789473684212e-05, + "step": 711 + }, + { + "epoch": 1.8698224852071006, + "high_lr": 0.0006257894736842105, + "low_lr": 1.2515789473684212e-05, + "step": 711 + }, + { + "epoch": 1.8724523339907955, + "grad_norm": 1.0963051319122314, + "learning_rate": 0.0006252631578947368, + "loss": 1.4644, + "step": 712 + }, + { + "epoch": 1.8724523339907955, + "high_lr": 0.0006252631578947368, + "low_lr": 1.2505263157894738e-05, + "step": 712 + }, + { + "epoch": 1.8724523339907955, + "high_lr": 0.0006252631578947368, + "low_lr": 1.2505263157894738e-05, + "step": 712 + }, + { + "epoch": 1.8724523339907955, + "high_lr": 0.0006252631578947368, + "low_lr": 1.2505263157894738e-05, + "step": 712 + }, + { + "epoch": 1.8724523339907955, + "high_lr": 0.0006252631578947368, + "low_lr": 1.2505263157894738e-05, + "step": 712 + }, + { + "epoch": 1.8724523339907955, + "high_lr": 0.0006252631578947368, + "low_lr": 1.2505263157894738e-05, + "step": 712 + }, + { + "epoch": 1.8724523339907955, + "high_lr": 0.0006252631578947368, + "low_lr": 1.2505263157894738e-05, + "step": 712 + }, + { + "epoch": 1.8724523339907955, + "high_lr": 0.0006252631578947368, + "low_lr": 1.2505263157894738e-05, + "step": 712 + }, + { + "epoch": 1.8724523339907955, + "high_lr": 0.0006252631578947368, + "low_lr": 1.2505263157894738e-05, + "step": 712 + }, + { + "epoch": 1.8750821827744906, + "grad_norm": 1.2021636962890625, + "learning_rate": 0.0006247368421052632, + "loss": 1.4841, + "step": 713 + }, + { + "epoch": 1.8750821827744906, + "high_lr": 0.0006247368421052632, + "low_lr": 1.2494736842105265e-05, + "step": 713 + }, + { + "epoch": 1.8750821827744906, + "high_lr": 0.0006247368421052632, + "low_lr": 1.2494736842105265e-05, + "step": 713 + }, + { + "epoch": 1.8750821827744906, + "high_lr": 0.0006247368421052632, + "low_lr": 1.2494736842105265e-05, + "step": 713 + }, + { + "epoch": 1.8750821827744906, + "high_lr": 0.0006247368421052632, + "low_lr": 1.2494736842105265e-05, + "step": 713 + }, + { + "epoch": 1.8750821827744906, + "high_lr": 0.0006247368421052632, + "low_lr": 1.2494736842105265e-05, + "step": 713 + }, + { + "epoch": 1.8750821827744906, + "high_lr": 0.0006247368421052632, + "low_lr": 1.2494736842105265e-05, + "step": 713 + }, + { + "epoch": 1.8750821827744906, + "high_lr": 0.0006247368421052632, + "low_lr": 1.2494736842105265e-05, + "step": 713 + }, + { + "epoch": 1.8750821827744906, + "high_lr": 0.0006247368421052632, + "low_lr": 1.2494736842105265e-05, + "step": 713 + }, + { + "epoch": 1.8777120315581854, + "grad_norm": 1.085993766784668, + "learning_rate": 0.0006242105263157895, + "loss": 1.4279, + "step": 714 + }, + { + "epoch": 1.8777120315581854, + "high_lr": 0.0006242105263157895, + "low_lr": 1.248421052631579e-05, + "step": 714 + }, + { + "epoch": 1.8777120315581854, + "high_lr": 0.0006242105263157895, + "low_lr": 1.248421052631579e-05, + "step": 714 + }, + { + "epoch": 1.8777120315581854, + "high_lr": 0.0006242105263157895, + "low_lr": 1.248421052631579e-05, + "step": 714 + }, + { + "epoch": 1.8777120315581854, + "high_lr": 0.0006242105263157895, + "low_lr": 1.248421052631579e-05, + "step": 714 + }, + { + "epoch": 1.8777120315581854, + "high_lr": 0.0006242105263157895, + "low_lr": 1.248421052631579e-05, + "step": 714 + }, + { + "epoch": 1.8777120315581854, + "high_lr": 0.0006242105263157895, + "low_lr": 1.248421052631579e-05, + "step": 714 + }, + { + "epoch": 1.8777120315581854, + "high_lr": 0.0006242105263157895, + "low_lr": 1.248421052631579e-05, + "step": 714 + }, + { + "epoch": 1.8777120315581854, + "high_lr": 0.0006242105263157895, + "low_lr": 1.248421052631579e-05, + "step": 714 + }, + { + "epoch": 1.8803418803418803, + "grad_norm": 1.02846097946167, + "learning_rate": 0.0006236842105263159, + "loss": 1.4146, + "step": 715 + }, + { + "epoch": 1.8803418803418803, + "high_lr": 0.0006236842105263159, + "low_lr": 1.2473684210526317e-05, + "step": 715 + }, + { + "epoch": 1.8803418803418803, + "high_lr": 0.0006236842105263159, + "low_lr": 1.2473684210526317e-05, + "step": 715 + }, + { + "epoch": 1.8803418803418803, + "high_lr": 0.0006236842105263159, + "low_lr": 1.2473684210526317e-05, + "step": 715 + }, + { + "epoch": 1.8803418803418803, + "high_lr": 0.0006236842105263159, + "low_lr": 1.2473684210526317e-05, + "step": 715 + }, + { + "epoch": 1.8803418803418803, + "high_lr": 0.0006236842105263159, + "low_lr": 1.2473684210526317e-05, + "step": 715 + }, + { + "epoch": 1.8803418803418803, + "high_lr": 0.0006236842105263159, + "low_lr": 1.2473684210526317e-05, + "step": 715 + }, + { + "epoch": 1.8803418803418803, + "high_lr": 0.0006236842105263159, + "low_lr": 1.2473684210526317e-05, + "step": 715 + }, + { + "epoch": 1.8803418803418803, + "high_lr": 0.0006236842105263159, + "low_lr": 1.2473684210526317e-05, + "step": 715 + }, + { + "epoch": 1.8829717291255754, + "grad_norm": 1.2024359703063965, + "learning_rate": 0.0006231578947368422, + "loss": 1.4347, + "step": 716 + }, + { + "epoch": 1.8829717291255754, + "high_lr": 0.0006231578947368422, + "low_lr": 1.2463157894736844e-05, + "step": 716 + }, + { + "epoch": 1.8829717291255754, + "high_lr": 0.0006231578947368422, + "low_lr": 1.2463157894736844e-05, + "step": 716 + }, + { + "epoch": 1.8829717291255754, + "high_lr": 0.0006231578947368422, + "low_lr": 1.2463157894736844e-05, + "step": 716 + }, + { + "epoch": 1.8829717291255754, + "high_lr": 0.0006231578947368422, + "low_lr": 1.2463157894736844e-05, + "step": 716 + }, + { + "epoch": 1.8829717291255754, + "high_lr": 0.0006231578947368422, + "low_lr": 1.2463157894736844e-05, + "step": 716 + }, + { + "epoch": 1.8829717291255754, + "high_lr": 0.0006231578947368422, + "low_lr": 1.2463157894736844e-05, + "step": 716 + }, + { + "epoch": 1.8829717291255754, + "high_lr": 0.0006231578947368422, + "low_lr": 1.2463157894736844e-05, + "step": 716 + }, + { + "epoch": 1.8829717291255754, + "high_lr": 0.0006231578947368422, + "low_lr": 1.2463157894736844e-05, + "step": 716 + }, + { + "epoch": 1.88560157790927, + "grad_norm": 1.135720133781433, + "learning_rate": 0.0006226315789473685, + "loss": 1.4472, + "step": 717 + }, + { + "epoch": 1.88560157790927, + "high_lr": 0.0006226315789473685, + "low_lr": 1.245263157894737e-05, + "step": 717 + }, + { + "epoch": 1.88560157790927, + "high_lr": 0.0006226315789473685, + "low_lr": 1.245263157894737e-05, + "step": 717 + }, + { + "epoch": 1.88560157790927, + "high_lr": 0.0006226315789473685, + "low_lr": 1.245263157894737e-05, + "step": 717 + }, + { + "epoch": 1.88560157790927, + "high_lr": 0.0006226315789473685, + "low_lr": 1.245263157894737e-05, + "step": 717 + }, + { + "epoch": 1.88560157790927, + "high_lr": 0.0006226315789473685, + "low_lr": 1.245263157894737e-05, + "step": 717 + }, + { + "epoch": 1.88560157790927, + "high_lr": 0.0006226315789473685, + "low_lr": 1.245263157894737e-05, + "step": 717 + }, + { + "epoch": 1.88560157790927, + "high_lr": 0.0006226315789473685, + "low_lr": 1.245263157894737e-05, + "step": 717 + }, + { + "epoch": 1.88560157790927, + "high_lr": 0.0006226315789473685, + "low_lr": 1.245263157894737e-05, + "step": 717 + }, + { + "epoch": 1.8882314266929652, + "grad_norm": 1.146132230758667, + "learning_rate": 0.0006221052631578947, + "loss": 1.4908, + "step": 718 + }, + { + "epoch": 1.8882314266929652, + "high_lr": 0.0006221052631578947, + "low_lr": 1.2442105263157895e-05, + "step": 718 + }, + { + "epoch": 1.8882314266929652, + "high_lr": 0.0006221052631578947, + "low_lr": 1.2442105263157895e-05, + "step": 718 + }, + { + "epoch": 1.8882314266929652, + "high_lr": 0.0006221052631578947, + "low_lr": 1.2442105263157895e-05, + "step": 718 + }, + { + "epoch": 1.8882314266929652, + "high_lr": 0.0006221052631578947, + "low_lr": 1.2442105263157895e-05, + "step": 718 + }, + { + "epoch": 1.8882314266929652, + "high_lr": 0.0006221052631578947, + "low_lr": 1.2442105263157895e-05, + "step": 718 + }, + { + "epoch": 1.8882314266929652, + "high_lr": 0.0006221052631578947, + "low_lr": 1.2442105263157895e-05, + "step": 718 + }, + { + "epoch": 1.8882314266929652, + "high_lr": 0.0006221052631578947, + "low_lr": 1.2442105263157895e-05, + "step": 718 + }, + { + "epoch": 1.8882314266929652, + "high_lr": 0.0006221052631578947, + "low_lr": 1.2442105263157895e-05, + "step": 718 + }, + { + "epoch": 1.89086127547666, + "grad_norm": 1.113741159439087, + "learning_rate": 0.000621578947368421, + "loss": 1.476, + "step": 719 + }, + { + "epoch": 1.89086127547666, + "high_lr": 0.000621578947368421, + "low_lr": 1.2431578947368421e-05, + "step": 719 + }, + { + "epoch": 1.89086127547666, + "high_lr": 0.000621578947368421, + "low_lr": 1.2431578947368421e-05, + "step": 719 + }, + { + "epoch": 1.89086127547666, + "high_lr": 0.000621578947368421, + "low_lr": 1.2431578947368421e-05, + "step": 719 + }, + { + "epoch": 1.89086127547666, + "high_lr": 0.000621578947368421, + "low_lr": 1.2431578947368421e-05, + "step": 719 + }, + { + "epoch": 1.89086127547666, + "high_lr": 0.000621578947368421, + "low_lr": 1.2431578947368421e-05, + "step": 719 + }, + { + "epoch": 1.89086127547666, + "high_lr": 0.000621578947368421, + "low_lr": 1.2431578947368421e-05, + "step": 719 + }, + { + "epoch": 1.89086127547666, + "high_lr": 0.000621578947368421, + "low_lr": 1.2431578947368421e-05, + "step": 719 + }, + { + "epoch": 1.89086127547666, + "high_lr": 0.000621578947368421, + "low_lr": 1.2431578947368421e-05, + "step": 719 + }, + { + "epoch": 1.893491124260355, + "grad_norm": 1.0785640478134155, + "learning_rate": 0.0006210526315789474, + "loss": 1.4843, + "step": 720 + }, + { + "epoch": 1.893491124260355, + "high_lr": 0.0006210526315789474, + "low_lr": 1.2421052631578949e-05, + "step": 720 + }, + { + "epoch": 1.893491124260355, + "high_lr": 0.0006210526315789474, + "low_lr": 1.2421052631578949e-05, + "step": 720 + }, + { + "epoch": 1.893491124260355, + "high_lr": 0.0006210526315789474, + "low_lr": 1.2421052631578949e-05, + "step": 720 + }, + { + "epoch": 1.893491124260355, + "high_lr": 0.0006210526315789474, + "low_lr": 1.2421052631578949e-05, + "step": 720 + }, + { + "epoch": 1.893491124260355, + "high_lr": 0.0006210526315789474, + "low_lr": 1.2421052631578949e-05, + "step": 720 + }, + { + "epoch": 1.893491124260355, + "high_lr": 0.0006210526315789474, + "low_lr": 1.2421052631578949e-05, + "step": 720 + }, + { + "epoch": 1.893491124260355, + "high_lr": 0.0006210526315789474, + "low_lr": 1.2421052631578949e-05, + "step": 720 + }, + { + "epoch": 1.893491124260355, + "high_lr": 0.0006210526315789474, + "low_lr": 1.2421052631578949e-05, + "step": 720 + }, + { + "epoch": 1.89612097304405, + "grad_norm": 1.1490678787231445, + "learning_rate": 0.0006205263157894737, + "loss": 1.5026, + "step": 721 + }, + { + "epoch": 1.89612097304405, + "high_lr": 0.0006205263157894737, + "low_lr": 1.2410526315789475e-05, + "step": 721 + }, + { + "epoch": 1.89612097304405, + "high_lr": 0.0006205263157894737, + "low_lr": 1.2410526315789475e-05, + "step": 721 + }, + { + "epoch": 1.89612097304405, + "high_lr": 0.0006205263157894737, + "low_lr": 1.2410526315789475e-05, + "step": 721 + }, + { + "epoch": 1.89612097304405, + "high_lr": 0.0006205263157894737, + "low_lr": 1.2410526315789475e-05, + "step": 721 + }, + { + "epoch": 1.89612097304405, + "high_lr": 0.0006205263157894737, + "low_lr": 1.2410526315789475e-05, + "step": 721 + }, + { + "epoch": 1.89612097304405, + "high_lr": 0.0006205263157894737, + "low_lr": 1.2410526315789475e-05, + "step": 721 + }, + { + "epoch": 1.89612097304405, + "high_lr": 0.0006205263157894737, + "low_lr": 1.2410526315789475e-05, + "step": 721 + }, + { + "epoch": 1.89612097304405, + "high_lr": 0.0006205263157894737, + "low_lr": 1.2410526315789475e-05, + "step": 721 + }, + { + "epoch": 1.898750821827745, + "grad_norm": 1.1461573839187622, + "learning_rate": 0.00062, + "loss": 1.4493, + "step": 722 + }, + { + "epoch": 1.898750821827745, + "high_lr": 0.00062, + "low_lr": 1.2400000000000002e-05, + "step": 722 + }, + { + "epoch": 1.898750821827745, + "high_lr": 0.00062, + "low_lr": 1.2400000000000002e-05, + "step": 722 + }, + { + "epoch": 1.898750821827745, + "high_lr": 0.00062, + "low_lr": 1.2400000000000002e-05, + "step": 722 + }, + { + "epoch": 1.898750821827745, + "high_lr": 0.00062, + "low_lr": 1.2400000000000002e-05, + "step": 722 + }, + { + "epoch": 1.898750821827745, + "high_lr": 0.00062, + "low_lr": 1.2400000000000002e-05, + "step": 722 + }, + { + "epoch": 1.898750821827745, + "high_lr": 0.00062, + "low_lr": 1.2400000000000002e-05, + "step": 722 + }, + { + "epoch": 1.898750821827745, + "high_lr": 0.00062, + "low_lr": 1.2400000000000002e-05, + "step": 722 + }, + { + "epoch": 1.898750821827745, + "high_lr": 0.00062, + "low_lr": 1.2400000000000002e-05, + "step": 722 + }, + { + "epoch": 1.9013806706114398, + "grad_norm": 1.1096080541610718, + "learning_rate": 0.0006194736842105263, + "loss": 1.4309, + "step": 723 + }, + { + "epoch": 1.9013806706114398, + "high_lr": 0.0006194736842105263, + "low_lr": 1.2389473684210526e-05, + "step": 723 + }, + { + "epoch": 1.9013806706114398, + "high_lr": 0.0006194736842105263, + "low_lr": 1.2389473684210526e-05, + "step": 723 + }, + { + "epoch": 1.9013806706114398, + "high_lr": 0.0006194736842105263, + "low_lr": 1.2389473684210526e-05, + "step": 723 + }, + { + "epoch": 1.9013806706114398, + "high_lr": 0.0006194736842105263, + "low_lr": 1.2389473684210526e-05, + "step": 723 + }, + { + "epoch": 1.9013806706114398, + "high_lr": 0.0006194736842105263, + "low_lr": 1.2389473684210526e-05, + "step": 723 + }, + { + "epoch": 1.9013806706114398, + "high_lr": 0.0006194736842105263, + "low_lr": 1.2389473684210526e-05, + "step": 723 + }, + { + "epoch": 1.9013806706114398, + "high_lr": 0.0006194736842105263, + "low_lr": 1.2389473684210526e-05, + "step": 723 + }, + { + "epoch": 1.9013806706114398, + "high_lr": 0.0006194736842105263, + "low_lr": 1.2389473684210526e-05, + "step": 723 + }, + { + "epoch": 1.9040105193951349, + "grad_norm": 1.2648789882659912, + "learning_rate": 0.0006189473684210526, + "loss": 1.5025, + "step": 724 + }, + { + "epoch": 1.9040105193951349, + "high_lr": 0.0006189473684210526, + "low_lr": 1.2378947368421053e-05, + "step": 724 + }, + { + "epoch": 1.9040105193951349, + "high_lr": 0.0006189473684210526, + "low_lr": 1.2378947368421053e-05, + "step": 724 + }, + { + "epoch": 1.9040105193951349, + "high_lr": 0.0006189473684210526, + "low_lr": 1.2378947368421053e-05, + "step": 724 + }, + { + "epoch": 1.9040105193951349, + "high_lr": 0.0006189473684210526, + "low_lr": 1.2378947368421053e-05, + "step": 724 + }, + { + "epoch": 1.9040105193951349, + "high_lr": 0.0006189473684210526, + "low_lr": 1.2378947368421053e-05, + "step": 724 + }, + { + "epoch": 1.9040105193951349, + "high_lr": 0.0006189473684210526, + "low_lr": 1.2378947368421053e-05, + "step": 724 + }, + { + "epoch": 1.9040105193951349, + "high_lr": 0.0006189473684210526, + "low_lr": 1.2378947368421053e-05, + "step": 724 + }, + { + "epoch": 1.9040105193951349, + "high_lr": 0.0006189473684210526, + "low_lr": 1.2378947368421053e-05, + "step": 724 + }, + { + "epoch": 1.9066403681788298, + "grad_norm": 1.1586730480194092, + "learning_rate": 0.000618421052631579, + "loss": 1.4308, + "step": 725 + }, + { + "epoch": 1.9066403681788298, + "high_lr": 0.000618421052631579, + "low_lr": 1.236842105263158e-05, + "step": 725 + }, + { + "epoch": 1.9066403681788298, + "high_lr": 0.000618421052631579, + "low_lr": 1.236842105263158e-05, + "step": 725 + }, + { + "epoch": 1.9066403681788298, + "high_lr": 0.000618421052631579, + "low_lr": 1.236842105263158e-05, + "step": 725 + }, + { + "epoch": 1.9066403681788298, + "high_lr": 0.000618421052631579, + "low_lr": 1.236842105263158e-05, + "step": 725 + }, + { + "epoch": 1.9066403681788298, + "high_lr": 0.000618421052631579, + "low_lr": 1.236842105263158e-05, + "step": 725 + }, + { + "epoch": 1.9066403681788298, + "high_lr": 0.000618421052631579, + "low_lr": 1.236842105263158e-05, + "step": 725 + }, + { + "epoch": 1.9066403681788298, + "high_lr": 0.000618421052631579, + "low_lr": 1.236842105263158e-05, + "step": 725 + }, + { + "epoch": 1.9066403681788298, + "high_lr": 0.000618421052631579, + "low_lr": 1.236842105263158e-05, + "step": 725 + }, + { + "epoch": 1.9092702169625246, + "grad_norm": 1.1156361103057861, + "learning_rate": 0.0006178947368421053, + "loss": 1.4735, + "step": 726 + }, + { + "epoch": 1.9092702169625246, + "high_lr": 0.0006178947368421053, + "low_lr": 1.2357894736842107e-05, + "step": 726 + }, + { + "epoch": 1.9092702169625246, + "high_lr": 0.0006178947368421053, + "low_lr": 1.2357894736842107e-05, + "step": 726 + }, + { + "epoch": 1.9092702169625246, + "high_lr": 0.0006178947368421053, + "low_lr": 1.2357894736842107e-05, + "step": 726 + }, + { + "epoch": 1.9092702169625246, + "high_lr": 0.0006178947368421053, + "low_lr": 1.2357894736842107e-05, + "step": 726 + }, + { + "epoch": 1.9092702169625246, + "high_lr": 0.0006178947368421053, + "low_lr": 1.2357894736842107e-05, + "step": 726 + }, + { + "epoch": 1.9092702169625246, + "high_lr": 0.0006178947368421053, + "low_lr": 1.2357894736842107e-05, + "step": 726 + }, + { + "epoch": 1.9092702169625246, + "high_lr": 0.0006178947368421053, + "low_lr": 1.2357894736842107e-05, + "step": 726 + }, + { + "epoch": 1.9092702169625246, + "high_lr": 0.0006178947368421053, + "low_lr": 1.2357894736842107e-05, + "step": 726 + }, + { + "epoch": 1.9119000657462197, + "grad_norm": 1.0861486196517944, + "learning_rate": 0.0006173684210526315, + "loss": 1.4552, + "step": 727 + }, + { + "epoch": 1.9119000657462197, + "high_lr": 0.0006173684210526315, + "low_lr": 1.2347368421052631e-05, + "step": 727 + }, + { + "epoch": 1.9119000657462197, + "high_lr": 0.0006173684210526315, + "low_lr": 1.2347368421052631e-05, + "step": 727 + }, + { + "epoch": 1.9119000657462197, + "high_lr": 0.0006173684210526315, + "low_lr": 1.2347368421052631e-05, + "step": 727 + }, + { + "epoch": 1.9119000657462197, + "high_lr": 0.0006173684210526315, + "low_lr": 1.2347368421052631e-05, + "step": 727 + }, + { + "epoch": 1.9119000657462197, + "high_lr": 0.0006173684210526315, + "low_lr": 1.2347368421052631e-05, + "step": 727 + }, + { + "epoch": 1.9119000657462197, + "high_lr": 0.0006173684210526315, + "low_lr": 1.2347368421052631e-05, + "step": 727 + }, + { + "epoch": 1.9119000657462197, + "high_lr": 0.0006173684210526315, + "low_lr": 1.2347368421052631e-05, + "step": 727 + }, + { + "epoch": 1.9119000657462197, + "high_lr": 0.0006173684210526315, + "low_lr": 1.2347368421052631e-05, + "step": 727 + }, + { + "epoch": 1.9145299145299144, + "grad_norm": 1.0762629508972168, + "learning_rate": 0.0006168421052631578, + "loss": 1.4672, + "step": 728 + }, + { + "epoch": 1.9145299145299144, + "high_lr": 0.0006168421052631578, + "low_lr": 1.2336842105263158e-05, + "step": 728 + }, + { + "epoch": 1.9145299145299144, + "high_lr": 0.0006168421052631578, + "low_lr": 1.2336842105263158e-05, + "step": 728 + }, + { + "epoch": 1.9145299145299144, + "high_lr": 0.0006168421052631578, + "low_lr": 1.2336842105263158e-05, + "step": 728 + }, + { + "epoch": 1.9145299145299144, + "high_lr": 0.0006168421052631578, + "low_lr": 1.2336842105263158e-05, + "step": 728 + }, + { + "epoch": 1.9145299145299144, + "high_lr": 0.0006168421052631578, + "low_lr": 1.2336842105263158e-05, + "step": 728 + }, + { + "epoch": 1.9145299145299144, + "high_lr": 0.0006168421052631578, + "low_lr": 1.2336842105263158e-05, + "step": 728 + }, + { + "epoch": 1.9145299145299144, + "high_lr": 0.0006168421052631578, + "low_lr": 1.2336842105263158e-05, + "step": 728 + }, + { + "epoch": 1.9145299145299144, + "high_lr": 0.0006168421052631578, + "low_lr": 1.2336842105263158e-05, + "step": 728 + }, + { + "epoch": 1.9171597633136095, + "grad_norm": 1.1548411846160889, + "learning_rate": 0.0006163157894736843, + "loss": 1.4726, + "step": 729 + }, + { + "epoch": 1.9171597633136095, + "high_lr": 0.0006163157894736843, + "low_lr": 1.2326315789473686e-05, + "step": 729 + }, + { + "epoch": 1.9171597633136095, + "high_lr": 0.0006163157894736843, + "low_lr": 1.2326315789473686e-05, + "step": 729 + }, + { + "epoch": 1.9171597633136095, + "high_lr": 0.0006163157894736843, + "low_lr": 1.2326315789473686e-05, + "step": 729 + }, + { + "epoch": 1.9171597633136095, + "high_lr": 0.0006163157894736843, + "low_lr": 1.2326315789473686e-05, + "step": 729 + }, + { + "epoch": 1.9171597633136095, + "high_lr": 0.0006163157894736843, + "low_lr": 1.2326315789473686e-05, + "step": 729 + }, + { + "epoch": 1.9171597633136095, + "high_lr": 0.0006163157894736843, + "low_lr": 1.2326315789473686e-05, + "step": 729 + }, + { + "epoch": 1.9171597633136095, + "high_lr": 0.0006163157894736843, + "low_lr": 1.2326315789473686e-05, + "step": 729 + }, + { + "epoch": 1.9171597633136095, + "high_lr": 0.0006163157894736843, + "low_lr": 1.2326315789473686e-05, + "step": 729 + }, + { + "epoch": 1.9197896120973044, + "grad_norm": 1.122642159461975, + "learning_rate": 0.0006157894736842106, + "loss": 1.4835, + "step": 730 + }, + { + "epoch": 1.9197896120973044, + "high_lr": 0.0006157894736842106, + "low_lr": 1.2315789473684212e-05, + "step": 730 + }, + { + "epoch": 1.9197896120973044, + "high_lr": 0.0006157894736842106, + "low_lr": 1.2315789473684212e-05, + "step": 730 + }, + { + "epoch": 1.9197896120973044, + "high_lr": 0.0006157894736842106, + "low_lr": 1.2315789473684212e-05, + "step": 730 + }, + { + "epoch": 1.9197896120973044, + "high_lr": 0.0006157894736842106, + "low_lr": 1.2315789473684212e-05, + "step": 730 + }, + { + "epoch": 1.9197896120973044, + "high_lr": 0.0006157894736842106, + "low_lr": 1.2315789473684212e-05, + "step": 730 + }, + { + "epoch": 1.9197896120973044, + "high_lr": 0.0006157894736842106, + "low_lr": 1.2315789473684212e-05, + "step": 730 + }, + { + "epoch": 1.9197896120973044, + "high_lr": 0.0006157894736842106, + "low_lr": 1.2315789473684212e-05, + "step": 730 + }, + { + "epoch": 1.9197896120973044, + "high_lr": 0.0006157894736842106, + "low_lr": 1.2315789473684212e-05, + "step": 730 + }, + { + "epoch": 1.9224194608809992, + "grad_norm": 1.1182903051376343, + "learning_rate": 0.0006152631578947369, + "loss": 1.507, + "step": 731 + }, + { + "epoch": 1.9224194608809992, + "high_lr": 0.0006152631578947369, + "low_lr": 1.2305263157894739e-05, + "step": 731 + }, + { + "epoch": 1.9224194608809992, + "high_lr": 0.0006152631578947369, + "low_lr": 1.2305263157894739e-05, + "step": 731 + }, + { + "epoch": 1.9224194608809992, + "high_lr": 0.0006152631578947369, + "low_lr": 1.2305263157894739e-05, + "step": 731 + }, + { + "epoch": 1.9224194608809992, + "high_lr": 0.0006152631578947369, + "low_lr": 1.2305263157894739e-05, + "step": 731 + }, + { + "epoch": 1.9224194608809992, + "high_lr": 0.0006152631578947369, + "low_lr": 1.2305263157894739e-05, + "step": 731 + }, + { + "epoch": 1.9224194608809992, + "high_lr": 0.0006152631578947369, + "low_lr": 1.2305263157894739e-05, + "step": 731 + }, + { + "epoch": 1.9224194608809992, + "high_lr": 0.0006152631578947369, + "low_lr": 1.2305263157894739e-05, + "step": 731 + }, + { + "epoch": 1.9224194608809992, + "high_lr": 0.0006152631578947369, + "low_lr": 1.2305263157894739e-05, + "step": 731 + }, + { + "epoch": 1.9250493096646943, + "grad_norm": 1.0955809354782104, + "learning_rate": 0.0006147368421052632, + "loss": 1.4574, + "step": 732 + }, + { + "epoch": 1.9250493096646943, + "high_lr": 0.0006147368421052632, + "low_lr": 1.2294736842105263e-05, + "step": 732 + }, + { + "epoch": 1.9250493096646943, + "high_lr": 0.0006147368421052632, + "low_lr": 1.2294736842105263e-05, + "step": 732 + }, + { + "epoch": 1.9250493096646943, + "high_lr": 0.0006147368421052632, + "low_lr": 1.2294736842105263e-05, + "step": 732 + }, + { + "epoch": 1.9250493096646943, + "high_lr": 0.0006147368421052632, + "low_lr": 1.2294736842105263e-05, + "step": 732 + }, + { + "epoch": 1.9250493096646943, + "high_lr": 0.0006147368421052632, + "low_lr": 1.2294736842105263e-05, + "step": 732 + }, + { + "epoch": 1.9250493096646943, + "high_lr": 0.0006147368421052632, + "low_lr": 1.2294736842105263e-05, + "step": 732 + }, + { + "epoch": 1.9250493096646943, + "high_lr": 0.0006147368421052632, + "low_lr": 1.2294736842105263e-05, + "step": 732 + }, + { + "epoch": 1.9250493096646943, + "high_lr": 0.0006147368421052632, + "low_lr": 1.2294736842105263e-05, + "step": 732 + }, + { + "epoch": 1.9276791584483892, + "grad_norm": 1.0820459127426147, + "learning_rate": 0.0006142105263157895, + "loss": 1.4271, + "step": 733 + }, + { + "epoch": 1.9276791584483892, + "high_lr": 0.0006142105263157895, + "low_lr": 1.228421052631579e-05, + "step": 733 + }, + { + "epoch": 1.9276791584483892, + "high_lr": 0.0006142105263157895, + "low_lr": 1.228421052631579e-05, + "step": 733 + }, + { + "epoch": 1.9276791584483892, + "high_lr": 0.0006142105263157895, + "low_lr": 1.228421052631579e-05, + "step": 733 + }, + { + "epoch": 1.9276791584483892, + "high_lr": 0.0006142105263157895, + "low_lr": 1.228421052631579e-05, + "step": 733 + }, + { + "epoch": 1.9276791584483892, + "high_lr": 0.0006142105263157895, + "low_lr": 1.228421052631579e-05, + "step": 733 + }, + { + "epoch": 1.9276791584483892, + "high_lr": 0.0006142105263157895, + "low_lr": 1.228421052631579e-05, + "step": 733 + }, + { + "epoch": 1.9276791584483892, + "high_lr": 0.0006142105263157895, + "low_lr": 1.228421052631579e-05, + "step": 733 + }, + { + "epoch": 1.9276791584483892, + "high_lr": 0.0006142105263157895, + "low_lr": 1.228421052631579e-05, + "step": 733 + }, + { + "epoch": 1.930309007232084, + "grad_norm": 1.2596343755722046, + "learning_rate": 0.0006136842105263159, + "loss": 1.525, + "step": 734 + }, + { + "epoch": 1.930309007232084, + "high_lr": 0.0006136842105263159, + "low_lr": 1.2273684210526317e-05, + "step": 734 + }, + { + "epoch": 1.930309007232084, + "high_lr": 0.0006136842105263159, + "low_lr": 1.2273684210526317e-05, + "step": 734 + }, + { + "epoch": 1.930309007232084, + "high_lr": 0.0006136842105263159, + "low_lr": 1.2273684210526317e-05, + "step": 734 + }, + { + "epoch": 1.930309007232084, + "high_lr": 0.0006136842105263159, + "low_lr": 1.2273684210526317e-05, + "step": 734 + }, + { + "epoch": 1.930309007232084, + "high_lr": 0.0006136842105263159, + "low_lr": 1.2273684210526317e-05, + "step": 734 + }, + { + "epoch": 1.930309007232084, + "high_lr": 0.0006136842105263159, + "low_lr": 1.2273684210526317e-05, + "step": 734 + }, + { + "epoch": 1.930309007232084, + "high_lr": 0.0006136842105263159, + "low_lr": 1.2273684210526317e-05, + "step": 734 + }, + { + "epoch": 1.930309007232084, + "high_lr": 0.0006136842105263159, + "low_lr": 1.2273684210526317e-05, + "step": 734 + }, + { + "epoch": 1.9329388560157792, + "grad_norm": 1.1566976308822632, + "learning_rate": 0.0006131578947368421, + "loss": 1.5111, + "step": 735 + }, + { + "epoch": 1.9329388560157792, + "high_lr": 0.0006131578947368421, + "low_lr": 1.2263157894736844e-05, + "step": 735 + }, + { + "epoch": 1.9329388560157792, + "high_lr": 0.0006131578947368421, + "low_lr": 1.2263157894736844e-05, + "step": 735 + }, + { + "epoch": 1.9329388560157792, + "high_lr": 0.0006131578947368421, + "low_lr": 1.2263157894736844e-05, + "step": 735 + }, + { + "epoch": 1.9329388560157792, + "high_lr": 0.0006131578947368421, + "low_lr": 1.2263157894736844e-05, + "step": 735 + }, + { + "epoch": 1.9329388560157792, + "high_lr": 0.0006131578947368421, + "low_lr": 1.2263157894736844e-05, + "step": 735 + }, + { + "epoch": 1.9329388560157792, + "high_lr": 0.0006131578947368421, + "low_lr": 1.2263157894736844e-05, + "step": 735 + }, + { + "epoch": 1.9329388560157792, + "high_lr": 0.0006131578947368421, + "low_lr": 1.2263157894736844e-05, + "step": 735 + }, + { + "epoch": 1.9329388560157792, + "high_lr": 0.0006131578947368421, + "low_lr": 1.2263157894736844e-05, + "step": 735 + }, + { + "epoch": 1.935568704799474, + "grad_norm": 1.1604498624801636, + "learning_rate": 0.0006126315789473684, + "loss": 1.5178, + "step": 736 + }, + { + "epoch": 1.935568704799474, + "high_lr": 0.0006126315789473684, + "low_lr": 1.225263157894737e-05, + "step": 736 + }, + { + "epoch": 1.935568704799474, + "high_lr": 0.0006126315789473684, + "low_lr": 1.225263157894737e-05, + "step": 736 + }, + { + "epoch": 1.935568704799474, + "high_lr": 0.0006126315789473684, + "low_lr": 1.225263157894737e-05, + "step": 736 + }, + { + "epoch": 1.935568704799474, + "high_lr": 0.0006126315789473684, + "low_lr": 1.225263157894737e-05, + "step": 736 + }, + { + "epoch": 1.935568704799474, + "high_lr": 0.0006126315789473684, + "low_lr": 1.225263157894737e-05, + "step": 736 + }, + { + "epoch": 1.935568704799474, + "high_lr": 0.0006126315789473684, + "low_lr": 1.225263157894737e-05, + "step": 736 + }, + { + "epoch": 1.935568704799474, + "high_lr": 0.0006126315789473684, + "low_lr": 1.225263157894737e-05, + "step": 736 + }, + { + "epoch": 1.935568704799474, + "high_lr": 0.0006126315789473684, + "low_lr": 1.225263157894737e-05, + "step": 736 + }, + { + "epoch": 1.938198553583169, + "grad_norm": 1.1423522233963013, + "learning_rate": 0.0006121052631578947, + "loss": 1.4149, + "step": 737 + }, + { + "epoch": 1.938198553583169, + "high_lr": 0.0006121052631578947, + "low_lr": 1.2242105263157895e-05, + "step": 737 + }, + { + "epoch": 1.938198553583169, + "high_lr": 0.0006121052631578947, + "low_lr": 1.2242105263157895e-05, + "step": 737 + }, + { + "epoch": 1.938198553583169, + "high_lr": 0.0006121052631578947, + "low_lr": 1.2242105263157895e-05, + "step": 737 + }, + { + "epoch": 1.938198553583169, + "high_lr": 0.0006121052631578947, + "low_lr": 1.2242105263157895e-05, + "step": 737 + }, + { + "epoch": 1.938198553583169, + "high_lr": 0.0006121052631578947, + "low_lr": 1.2242105263157895e-05, + "step": 737 + }, + { + "epoch": 1.938198553583169, + "high_lr": 0.0006121052631578947, + "low_lr": 1.2242105263157895e-05, + "step": 737 + }, + { + "epoch": 1.938198553583169, + "high_lr": 0.0006121052631578947, + "low_lr": 1.2242105263157895e-05, + "step": 737 + }, + { + "epoch": 1.938198553583169, + "high_lr": 0.0006121052631578947, + "low_lr": 1.2242105263157895e-05, + "step": 737 + }, + { + "epoch": 1.940828402366864, + "grad_norm": 1.1732314825057983, + "learning_rate": 0.000611578947368421, + "loss": 1.4408, + "step": 738 + }, + { + "epoch": 1.940828402366864, + "high_lr": 0.000611578947368421, + "low_lr": 1.2231578947368421e-05, + "step": 738 + }, + { + "epoch": 1.940828402366864, + "high_lr": 0.000611578947368421, + "low_lr": 1.2231578947368421e-05, + "step": 738 + }, + { + "epoch": 1.940828402366864, + "high_lr": 0.000611578947368421, + "low_lr": 1.2231578947368421e-05, + "step": 738 + }, + { + "epoch": 1.940828402366864, + "high_lr": 0.000611578947368421, + "low_lr": 1.2231578947368421e-05, + "step": 738 + }, + { + "epoch": 1.940828402366864, + "high_lr": 0.000611578947368421, + "low_lr": 1.2231578947368421e-05, + "step": 738 + }, + { + "epoch": 1.940828402366864, + "high_lr": 0.000611578947368421, + "low_lr": 1.2231578947368421e-05, + "step": 738 + }, + { + "epoch": 1.940828402366864, + "high_lr": 0.000611578947368421, + "low_lr": 1.2231578947368421e-05, + "step": 738 + }, + { + "epoch": 1.940828402366864, + "high_lr": 0.000611578947368421, + "low_lr": 1.2231578947368421e-05, + "step": 738 + }, + { + "epoch": 1.9434582511505587, + "grad_norm": 1.0459725856781006, + "learning_rate": 0.0006110526315789474, + "loss": 1.4532, + "step": 739 + }, + { + "epoch": 1.9434582511505587, + "high_lr": 0.0006110526315789474, + "low_lr": 1.2221052631578949e-05, + "step": 739 + }, + { + "epoch": 1.9434582511505587, + "high_lr": 0.0006110526315789474, + "low_lr": 1.2221052631578949e-05, + "step": 739 + }, + { + "epoch": 1.9434582511505587, + "high_lr": 0.0006110526315789474, + "low_lr": 1.2221052631578949e-05, + "step": 739 + }, + { + "epoch": 1.9434582511505587, + "high_lr": 0.0006110526315789474, + "low_lr": 1.2221052631578949e-05, + "step": 739 + }, + { + "epoch": 1.9434582511505587, + "high_lr": 0.0006110526315789474, + "low_lr": 1.2221052631578949e-05, + "step": 739 + }, + { + "epoch": 1.9434582511505587, + "high_lr": 0.0006110526315789474, + "low_lr": 1.2221052631578949e-05, + "step": 739 + }, + { + "epoch": 1.9434582511505587, + "high_lr": 0.0006110526315789474, + "low_lr": 1.2221052631578949e-05, + "step": 739 + }, + { + "epoch": 1.9434582511505587, + "high_lr": 0.0006110526315789474, + "low_lr": 1.2221052631578949e-05, + "step": 739 + }, + { + "epoch": 1.9460880999342538, + "grad_norm": 1.2245088815689087, + "learning_rate": 0.0006105263157894737, + "loss": 1.5121, + "step": 740 + }, + { + "epoch": 1.9460880999342538, + "high_lr": 0.0006105263157894737, + "low_lr": 1.2210526315789475e-05, + "step": 740 + }, + { + "epoch": 1.9460880999342538, + "high_lr": 0.0006105263157894737, + "low_lr": 1.2210526315789475e-05, + "step": 740 + }, + { + "epoch": 1.9460880999342538, + "high_lr": 0.0006105263157894737, + "low_lr": 1.2210526315789475e-05, + "step": 740 + }, + { + "epoch": 1.9460880999342538, + "high_lr": 0.0006105263157894737, + "low_lr": 1.2210526315789475e-05, + "step": 740 + }, + { + "epoch": 1.9460880999342538, + "high_lr": 0.0006105263157894737, + "low_lr": 1.2210526315789475e-05, + "step": 740 + }, + { + "epoch": 1.9460880999342538, + "high_lr": 0.0006105263157894737, + "low_lr": 1.2210526315789475e-05, + "step": 740 + }, + { + "epoch": 1.9460880999342538, + "high_lr": 0.0006105263157894737, + "low_lr": 1.2210526315789475e-05, + "step": 740 + }, + { + "epoch": 1.9460880999342538, + "high_lr": 0.0006105263157894737, + "low_lr": 1.2210526315789475e-05, + "step": 740 + }, + { + "epoch": 1.9487179487179487, + "grad_norm": 1.1924901008605957, + "learning_rate": 0.00061, + "loss": 1.4692, + "step": 741 + }, + { + "epoch": 1.9487179487179487, + "high_lr": 0.00061, + "low_lr": 1.22e-05, + "step": 741 + }, + { + "epoch": 1.9487179487179487, + "high_lr": 0.00061, + "low_lr": 1.22e-05, + "step": 741 + }, + { + "epoch": 1.9487179487179487, + "high_lr": 0.00061, + "low_lr": 1.22e-05, + "step": 741 + }, + { + "epoch": 1.9487179487179487, + "high_lr": 0.00061, + "low_lr": 1.22e-05, + "step": 741 + }, + { + "epoch": 1.9487179487179487, + "high_lr": 0.00061, + "low_lr": 1.22e-05, + "step": 741 + }, + { + "epoch": 1.9487179487179487, + "high_lr": 0.00061, + "low_lr": 1.22e-05, + "step": 741 + }, + { + "epoch": 1.9487179487179487, + "high_lr": 0.00061, + "low_lr": 1.22e-05, + "step": 741 + }, + { + "epoch": 1.9487179487179487, + "high_lr": 0.00061, + "low_lr": 1.22e-05, + "step": 741 + }, + { + "epoch": 1.9513477975016436, + "grad_norm": 1.0595648288726807, + "learning_rate": 0.0006094736842105263, + "loss": 1.5118, + "step": 742 + }, + { + "epoch": 1.9513477975016436, + "high_lr": 0.0006094736842105263, + "low_lr": 1.2189473684210526e-05, + "step": 742 + }, + { + "epoch": 1.9513477975016436, + "high_lr": 0.0006094736842105263, + "low_lr": 1.2189473684210526e-05, + "step": 742 + }, + { + "epoch": 1.9513477975016436, + "high_lr": 0.0006094736842105263, + "low_lr": 1.2189473684210526e-05, + "step": 742 + }, + { + "epoch": 1.9513477975016436, + "high_lr": 0.0006094736842105263, + "low_lr": 1.2189473684210526e-05, + "step": 742 + }, + { + "epoch": 1.9513477975016436, + "high_lr": 0.0006094736842105263, + "low_lr": 1.2189473684210526e-05, + "step": 742 + }, + { + "epoch": 1.9513477975016436, + "high_lr": 0.0006094736842105263, + "low_lr": 1.2189473684210526e-05, + "step": 742 + }, + { + "epoch": 1.9513477975016436, + "high_lr": 0.0006094736842105263, + "low_lr": 1.2189473684210526e-05, + "step": 742 + }, + { + "epoch": 1.9513477975016436, + "high_lr": 0.0006094736842105263, + "low_lr": 1.2189473684210526e-05, + "step": 742 + }, + { + "epoch": 1.9539776462853387, + "grad_norm": 1.097933053970337, + "learning_rate": 0.0006089473684210527, + "loss": 1.4404, + "step": 743 + }, + { + "epoch": 1.9539776462853387, + "high_lr": 0.0006089473684210527, + "low_lr": 1.2178947368421054e-05, + "step": 743 + }, + { + "epoch": 1.9539776462853387, + "high_lr": 0.0006089473684210527, + "low_lr": 1.2178947368421054e-05, + "step": 743 + }, + { + "epoch": 1.9539776462853387, + "high_lr": 0.0006089473684210527, + "low_lr": 1.2178947368421054e-05, + "step": 743 + }, + { + "epoch": 1.9539776462853387, + "high_lr": 0.0006089473684210527, + "low_lr": 1.2178947368421054e-05, + "step": 743 + }, + { + "epoch": 1.9539776462853387, + "high_lr": 0.0006089473684210527, + "low_lr": 1.2178947368421054e-05, + "step": 743 + }, + { + "epoch": 1.9539776462853387, + "high_lr": 0.0006089473684210527, + "low_lr": 1.2178947368421054e-05, + "step": 743 + }, + { + "epoch": 1.9539776462853387, + "high_lr": 0.0006089473684210527, + "low_lr": 1.2178947368421054e-05, + "step": 743 + }, + { + "epoch": 1.9539776462853387, + "high_lr": 0.0006089473684210527, + "low_lr": 1.2178947368421054e-05, + "step": 743 + }, + { + "epoch": 1.9566074950690335, + "grad_norm": 1.1369494199752808, + "learning_rate": 0.0006084210526315789, + "loss": 1.4706, + "step": 744 + }, + { + "epoch": 1.9566074950690335, + "high_lr": 0.0006084210526315789, + "low_lr": 1.216842105263158e-05, + "step": 744 + }, + { + "epoch": 1.9566074950690335, + "high_lr": 0.0006084210526315789, + "low_lr": 1.216842105263158e-05, + "step": 744 + }, + { + "epoch": 1.9566074950690335, + "high_lr": 0.0006084210526315789, + "low_lr": 1.216842105263158e-05, + "step": 744 + }, + { + "epoch": 1.9566074950690335, + "high_lr": 0.0006084210526315789, + "low_lr": 1.216842105263158e-05, + "step": 744 + }, + { + "epoch": 1.9566074950690335, + "high_lr": 0.0006084210526315789, + "low_lr": 1.216842105263158e-05, + "step": 744 + }, + { + "epoch": 1.9566074950690335, + "high_lr": 0.0006084210526315789, + "low_lr": 1.216842105263158e-05, + "step": 744 + }, + { + "epoch": 1.9566074950690335, + "high_lr": 0.0006084210526315789, + "low_lr": 1.216842105263158e-05, + "step": 744 + }, + { + "epoch": 1.9566074950690335, + "high_lr": 0.0006084210526315789, + "low_lr": 1.216842105263158e-05, + "step": 744 + }, + { + "epoch": 1.9592373438527284, + "grad_norm": 1.1629635095596313, + "learning_rate": 0.0006078947368421052, + "loss": 1.4443, + "step": 745 + }, + { + "epoch": 1.9592373438527284, + "high_lr": 0.0006078947368421052, + "low_lr": 1.2157894736842107e-05, + "step": 745 + }, + { + "epoch": 1.9592373438527284, + "high_lr": 0.0006078947368421052, + "low_lr": 1.2157894736842107e-05, + "step": 745 + }, + { + "epoch": 1.9592373438527284, + "high_lr": 0.0006078947368421052, + "low_lr": 1.2157894736842107e-05, + "step": 745 + }, + { + "epoch": 1.9592373438527284, + "high_lr": 0.0006078947368421052, + "low_lr": 1.2157894736842107e-05, + "step": 745 + }, + { + "epoch": 1.9592373438527284, + "high_lr": 0.0006078947368421052, + "low_lr": 1.2157894736842107e-05, + "step": 745 + }, + { + "epoch": 1.9592373438527284, + "high_lr": 0.0006078947368421052, + "low_lr": 1.2157894736842107e-05, + "step": 745 + }, + { + "epoch": 1.9592373438527284, + "high_lr": 0.0006078947368421052, + "low_lr": 1.2157894736842107e-05, + "step": 745 + }, + { + "epoch": 1.9592373438527284, + "high_lr": 0.0006078947368421052, + "low_lr": 1.2157894736842107e-05, + "step": 745 + }, + { + "epoch": 1.9618671926364235, + "grad_norm": 1.108062982559204, + "learning_rate": 0.0006073684210526316, + "loss": 1.4084, + "step": 746 + }, + { + "epoch": 1.9618671926364235, + "high_lr": 0.0006073684210526316, + "low_lr": 1.2147368421052632e-05, + "step": 746 + }, + { + "epoch": 1.9618671926364235, + "high_lr": 0.0006073684210526316, + "low_lr": 1.2147368421052632e-05, + "step": 746 + }, + { + "epoch": 1.9618671926364235, + "high_lr": 0.0006073684210526316, + "low_lr": 1.2147368421052632e-05, + "step": 746 + }, + { + "epoch": 1.9618671926364235, + "high_lr": 0.0006073684210526316, + "low_lr": 1.2147368421052632e-05, + "step": 746 + }, + { + "epoch": 1.9618671926364235, + "high_lr": 0.0006073684210526316, + "low_lr": 1.2147368421052632e-05, + "step": 746 + }, + { + "epoch": 1.9618671926364235, + "high_lr": 0.0006073684210526316, + "low_lr": 1.2147368421052632e-05, + "step": 746 + }, + { + "epoch": 1.9618671926364235, + "high_lr": 0.0006073684210526316, + "low_lr": 1.2147368421052632e-05, + "step": 746 + }, + { + "epoch": 1.9618671926364235, + "high_lr": 0.0006073684210526316, + "low_lr": 1.2147368421052632e-05, + "step": 746 + }, + { + "epoch": 1.9644970414201184, + "grad_norm": 1.122900128364563, + "learning_rate": 0.0006068421052631579, + "loss": 1.4306, + "step": 747 + }, + { + "epoch": 1.9644970414201184, + "high_lr": 0.0006068421052631579, + "low_lr": 1.2136842105263158e-05, + "step": 747 + }, + { + "epoch": 1.9644970414201184, + "high_lr": 0.0006068421052631579, + "low_lr": 1.2136842105263158e-05, + "step": 747 + }, + { + "epoch": 1.9644970414201184, + "high_lr": 0.0006068421052631579, + "low_lr": 1.2136842105263158e-05, + "step": 747 + }, + { + "epoch": 1.9644970414201184, + "high_lr": 0.0006068421052631579, + "low_lr": 1.2136842105263158e-05, + "step": 747 + }, + { + "epoch": 1.9644970414201184, + "high_lr": 0.0006068421052631579, + "low_lr": 1.2136842105263158e-05, + "step": 747 + }, + { + "epoch": 1.9644970414201184, + "high_lr": 0.0006068421052631579, + "low_lr": 1.2136842105263158e-05, + "step": 747 + }, + { + "epoch": 1.9644970414201184, + "high_lr": 0.0006068421052631579, + "low_lr": 1.2136842105263158e-05, + "step": 747 + }, + { + "epoch": 1.9644970414201184, + "high_lr": 0.0006068421052631579, + "low_lr": 1.2136842105263158e-05, + "step": 747 + }, + { + "epoch": 1.9671268902038133, + "grad_norm": 1.1524890661239624, + "learning_rate": 0.0006063157894736843, + "loss": 1.4589, + "step": 748 + }, + { + "epoch": 1.9671268902038133, + "high_lr": 0.0006063157894736843, + "low_lr": 1.2126315789473686e-05, + "step": 748 + }, + { + "epoch": 1.9671268902038133, + "high_lr": 0.0006063157894736843, + "low_lr": 1.2126315789473686e-05, + "step": 748 + }, + { + "epoch": 1.9671268902038133, + "high_lr": 0.0006063157894736843, + "low_lr": 1.2126315789473686e-05, + "step": 748 + }, + { + "epoch": 1.9671268902038133, + "high_lr": 0.0006063157894736843, + "low_lr": 1.2126315789473686e-05, + "step": 748 + }, + { + "epoch": 1.9671268902038133, + "high_lr": 0.0006063157894736843, + "low_lr": 1.2126315789473686e-05, + "step": 748 + }, + { + "epoch": 1.9671268902038133, + "high_lr": 0.0006063157894736843, + "low_lr": 1.2126315789473686e-05, + "step": 748 + }, + { + "epoch": 1.9671268902038133, + "high_lr": 0.0006063157894736843, + "low_lr": 1.2126315789473686e-05, + "step": 748 + }, + { + "epoch": 1.9671268902038133, + "high_lr": 0.0006063157894736843, + "low_lr": 1.2126315789473686e-05, + "step": 748 + }, + { + "epoch": 1.9697567389875084, + "grad_norm": 1.1671377420425415, + "learning_rate": 0.0006057894736842106, + "loss": 1.4813, + "step": 749 + }, + { + "epoch": 1.9697567389875084, + "high_lr": 0.0006057894736842106, + "low_lr": 1.2115789473684212e-05, + "step": 749 + }, + { + "epoch": 1.9697567389875084, + "high_lr": 0.0006057894736842106, + "low_lr": 1.2115789473684212e-05, + "step": 749 + }, + { + "epoch": 1.9697567389875084, + "high_lr": 0.0006057894736842106, + "low_lr": 1.2115789473684212e-05, + "step": 749 + }, + { + "epoch": 1.9697567389875084, + "high_lr": 0.0006057894736842106, + "low_lr": 1.2115789473684212e-05, + "step": 749 + }, + { + "epoch": 1.9697567389875084, + "high_lr": 0.0006057894736842106, + "low_lr": 1.2115789473684212e-05, + "step": 749 + }, + { + "epoch": 1.9697567389875084, + "high_lr": 0.0006057894736842106, + "low_lr": 1.2115789473684212e-05, + "step": 749 + }, + { + "epoch": 1.9697567389875084, + "high_lr": 0.0006057894736842106, + "low_lr": 1.2115789473684212e-05, + "step": 749 + }, + { + "epoch": 1.9697567389875084, + "high_lr": 0.0006057894736842106, + "low_lr": 1.2115789473684212e-05, + "step": 749 + }, + { + "epoch": 1.972386587771203, + "grad_norm": 1.1508365869522095, + "learning_rate": 0.0006052631578947369, + "loss": 1.5052, + "step": 750 + }, + { + "epoch": 1.972386587771203, + "high_lr": 0.0006052631578947369, + "low_lr": 1.2105263157894737e-05, + "step": 750 + }, + { + "epoch": 1.972386587771203, + "high_lr": 0.0006052631578947369, + "low_lr": 1.2105263157894737e-05, + "step": 750 + }, + { + "epoch": 1.972386587771203, + "high_lr": 0.0006052631578947369, + "low_lr": 1.2105263157894737e-05, + "step": 750 + }, + { + "epoch": 1.972386587771203, + "high_lr": 0.0006052631578947369, + "low_lr": 1.2105263157894737e-05, + "step": 750 + }, + { + "epoch": 1.972386587771203, + "high_lr": 0.0006052631578947369, + "low_lr": 1.2105263157894737e-05, + "step": 750 + }, + { + "epoch": 1.972386587771203, + "high_lr": 0.0006052631578947369, + "low_lr": 1.2105263157894737e-05, + "step": 750 + }, + { + "epoch": 1.972386587771203, + "high_lr": 0.0006052631578947369, + "low_lr": 1.2105263157894737e-05, + "step": 750 + }, + { + "epoch": 1.972386587771203, + "high_lr": 0.0006052631578947369, + "low_lr": 1.2105263157894737e-05, + "step": 750 + }, + { + "epoch": 1.9750164365548981, + "grad_norm": 1.1085923910140991, + "learning_rate": 0.0006047368421052632, + "loss": 1.5286, + "step": 751 + }, + { + "epoch": 1.9750164365548981, + "high_lr": 0.0006047368421052632, + "low_lr": 1.2094736842105263e-05, + "step": 751 + }, + { + "epoch": 1.9750164365548981, + "high_lr": 0.0006047368421052632, + "low_lr": 1.2094736842105263e-05, + "step": 751 + }, + { + "epoch": 1.9750164365548981, + "high_lr": 0.0006047368421052632, + "low_lr": 1.2094736842105263e-05, + "step": 751 + }, + { + "epoch": 1.9750164365548981, + "high_lr": 0.0006047368421052632, + "low_lr": 1.2094736842105263e-05, + "step": 751 + }, + { + "epoch": 1.9750164365548981, + "high_lr": 0.0006047368421052632, + "low_lr": 1.2094736842105263e-05, + "step": 751 + }, + { + "epoch": 1.9750164365548981, + "high_lr": 0.0006047368421052632, + "low_lr": 1.2094736842105263e-05, + "step": 751 + }, + { + "epoch": 1.9750164365548981, + "high_lr": 0.0006047368421052632, + "low_lr": 1.2094736842105263e-05, + "step": 751 + }, + { + "epoch": 1.9750164365548981, + "high_lr": 0.0006047368421052632, + "low_lr": 1.2094736842105263e-05, + "step": 751 + }, + { + "epoch": 1.977646285338593, + "grad_norm": 1.2111530303955078, + "learning_rate": 0.0006042105263157894, + "loss": 1.5144, + "step": 752 + }, + { + "epoch": 1.977646285338593, + "high_lr": 0.0006042105263157894, + "low_lr": 1.208421052631579e-05, + "step": 752 + }, + { + "epoch": 1.977646285338593, + "high_lr": 0.0006042105263157894, + "low_lr": 1.208421052631579e-05, + "step": 752 + }, + { + "epoch": 1.977646285338593, + "high_lr": 0.0006042105263157894, + "low_lr": 1.208421052631579e-05, + "step": 752 + }, + { + "epoch": 1.977646285338593, + "high_lr": 0.0006042105263157894, + "low_lr": 1.208421052631579e-05, + "step": 752 + }, + { + "epoch": 1.977646285338593, + "high_lr": 0.0006042105263157894, + "low_lr": 1.208421052631579e-05, + "step": 752 + }, + { + "epoch": 1.977646285338593, + "high_lr": 0.0006042105263157894, + "low_lr": 1.208421052631579e-05, + "step": 752 + }, + { + "epoch": 1.977646285338593, + "high_lr": 0.0006042105263157894, + "low_lr": 1.208421052631579e-05, + "step": 752 + }, + { + "epoch": 1.977646285338593, + "high_lr": 0.0006042105263157894, + "low_lr": 1.208421052631579e-05, + "step": 752 + }, + { + "epoch": 1.9802761341222879, + "grad_norm": 1.09846830368042, + "learning_rate": 0.0006036842105263158, + "loss": 1.4373, + "step": 753 + }, + { + "epoch": 1.9802761341222879, + "high_lr": 0.0006036842105263158, + "low_lr": 1.2073684210526318e-05, + "step": 753 + }, + { + "epoch": 1.9802761341222879, + "high_lr": 0.0006036842105263158, + "low_lr": 1.2073684210526318e-05, + "step": 753 + }, + { + "epoch": 1.9802761341222879, + "high_lr": 0.0006036842105263158, + "low_lr": 1.2073684210526318e-05, + "step": 753 + }, + { + "epoch": 1.9802761341222879, + "high_lr": 0.0006036842105263158, + "low_lr": 1.2073684210526318e-05, + "step": 753 + }, + { + "epoch": 1.9802761341222879, + "high_lr": 0.0006036842105263158, + "low_lr": 1.2073684210526318e-05, + "step": 753 + }, + { + "epoch": 1.9802761341222879, + "high_lr": 0.0006036842105263158, + "low_lr": 1.2073684210526318e-05, + "step": 753 + }, + { + "epoch": 1.9802761341222879, + "high_lr": 0.0006036842105263158, + "low_lr": 1.2073684210526318e-05, + "step": 753 + }, + { + "epoch": 1.9802761341222879, + "high_lr": 0.0006036842105263158, + "low_lr": 1.2073684210526318e-05, + "step": 753 + }, + { + "epoch": 1.982905982905983, + "grad_norm": 1.1521211862564087, + "learning_rate": 0.0006031578947368421, + "loss": 1.4419, + "step": 754 + }, + { + "epoch": 1.982905982905983, + "high_lr": 0.0006031578947368421, + "low_lr": 1.2063157894736844e-05, + "step": 754 + }, + { + "epoch": 1.982905982905983, + "high_lr": 0.0006031578947368421, + "low_lr": 1.2063157894736844e-05, + "step": 754 + }, + { + "epoch": 1.982905982905983, + "high_lr": 0.0006031578947368421, + "low_lr": 1.2063157894736844e-05, + "step": 754 + }, + { + "epoch": 1.982905982905983, + "high_lr": 0.0006031578947368421, + "low_lr": 1.2063157894736844e-05, + "step": 754 + }, + { + "epoch": 1.982905982905983, + "high_lr": 0.0006031578947368421, + "low_lr": 1.2063157894736844e-05, + "step": 754 + }, + { + "epoch": 1.982905982905983, + "high_lr": 0.0006031578947368421, + "low_lr": 1.2063157894736844e-05, + "step": 754 + }, + { + "epoch": 1.982905982905983, + "high_lr": 0.0006031578947368421, + "low_lr": 1.2063157894736844e-05, + "step": 754 + }, + { + "epoch": 1.982905982905983, + "high_lr": 0.0006031578947368421, + "low_lr": 1.2063157894736844e-05, + "step": 754 + }, + { + "epoch": 1.9855358316896778, + "grad_norm": 1.1370145082473755, + "learning_rate": 0.0006026315789473684, + "loss": 1.4149, + "step": 755 + }, + { + "epoch": 1.9855358316896778, + "high_lr": 0.0006026315789473684, + "low_lr": 1.2052631578947369e-05, + "step": 755 + }, + { + "epoch": 1.9855358316896778, + "high_lr": 0.0006026315789473684, + "low_lr": 1.2052631578947369e-05, + "step": 755 + }, + { + "epoch": 1.9855358316896778, + "high_lr": 0.0006026315789473684, + "low_lr": 1.2052631578947369e-05, + "step": 755 + }, + { + "epoch": 1.9855358316896778, + "high_lr": 0.0006026315789473684, + "low_lr": 1.2052631578947369e-05, + "step": 755 + }, + { + "epoch": 1.9855358316896778, + "high_lr": 0.0006026315789473684, + "low_lr": 1.2052631578947369e-05, + "step": 755 + }, + { + "epoch": 1.9855358316896778, + "high_lr": 0.0006026315789473684, + "low_lr": 1.2052631578947369e-05, + "step": 755 + }, + { + "epoch": 1.9855358316896778, + "high_lr": 0.0006026315789473684, + "low_lr": 1.2052631578947369e-05, + "step": 755 + }, + { + "epoch": 1.9855358316896778, + "high_lr": 0.0006026315789473684, + "low_lr": 1.2052631578947369e-05, + "step": 755 + }, + { + "epoch": 1.9881656804733727, + "grad_norm": 1.2295494079589844, + "learning_rate": 0.0006021052631578947, + "loss": 1.5146, + "step": 756 + }, + { + "epoch": 1.9881656804733727, + "high_lr": 0.0006021052631578947, + "low_lr": 1.2042105263157895e-05, + "step": 756 + }, + { + "epoch": 1.9881656804733727, + "high_lr": 0.0006021052631578947, + "low_lr": 1.2042105263157895e-05, + "step": 756 + }, + { + "epoch": 1.9881656804733727, + "high_lr": 0.0006021052631578947, + "low_lr": 1.2042105263157895e-05, + "step": 756 + }, + { + "epoch": 1.9881656804733727, + "high_lr": 0.0006021052631578947, + "low_lr": 1.2042105263157895e-05, + "step": 756 + }, + { + "epoch": 1.9881656804733727, + "high_lr": 0.0006021052631578947, + "low_lr": 1.2042105263157895e-05, + "step": 756 + }, + { + "epoch": 1.9881656804733727, + "high_lr": 0.0006021052631578947, + "low_lr": 1.2042105263157895e-05, + "step": 756 + }, + { + "epoch": 1.9881656804733727, + "high_lr": 0.0006021052631578947, + "low_lr": 1.2042105263157895e-05, + "step": 756 + }, + { + "epoch": 1.9881656804733727, + "high_lr": 0.0006021052631578947, + "low_lr": 1.2042105263157895e-05, + "step": 756 + }, + { + "epoch": 1.9907955292570678, + "grad_norm": 1.0924862623214722, + "learning_rate": 0.0006015789473684211, + "loss": 1.4997, + "step": 757 + }, + { + "epoch": 1.9907955292570678, + "high_lr": 0.0006015789473684211, + "low_lr": 1.2031578947368423e-05, + "step": 757 + }, + { + "epoch": 1.9907955292570678, + "high_lr": 0.0006015789473684211, + "low_lr": 1.2031578947368423e-05, + "step": 757 + }, + { + "epoch": 1.9907955292570678, + "high_lr": 0.0006015789473684211, + "low_lr": 1.2031578947368423e-05, + "step": 757 + }, + { + "epoch": 1.9907955292570678, + "high_lr": 0.0006015789473684211, + "low_lr": 1.2031578947368423e-05, + "step": 757 + }, + { + "epoch": 1.9907955292570678, + "high_lr": 0.0006015789473684211, + "low_lr": 1.2031578947368423e-05, + "step": 757 + }, + { + "epoch": 1.9907955292570678, + "high_lr": 0.0006015789473684211, + "low_lr": 1.2031578947368423e-05, + "step": 757 + }, + { + "epoch": 1.9907955292570678, + "high_lr": 0.0006015789473684211, + "low_lr": 1.2031578947368423e-05, + "step": 757 + }, + { + "epoch": 1.9907955292570678, + "high_lr": 0.0006015789473684211, + "low_lr": 1.2031578947368423e-05, + "step": 757 + }, + { + "epoch": 1.9934253780407627, + "grad_norm": 1.096147894859314, + "learning_rate": 0.0006010526315789474, + "loss": 1.4644, + "step": 758 + }, + { + "epoch": 1.9934253780407627, + "high_lr": 0.0006010526315789474, + "low_lr": 1.202105263157895e-05, + "step": 758 + }, + { + "epoch": 1.9934253780407627, + "high_lr": 0.0006010526315789474, + "low_lr": 1.202105263157895e-05, + "step": 758 + }, + { + "epoch": 1.9934253780407627, + "high_lr": 0.0006010526315789474, + "low_lr": 1.202105263157895e-05, + "step": 758 + }, + { + "epoch": 1.9934253780407627, + "high_lr": 0.0006010526315789474, + "low_lr": 1.202105263157895e-05, + "step": 758 + }, + { + "epoch": 1.9934253780407627, + "high_lr": 0.0006010526315789474, + "low_lr": 1.202105263157895e-05, + "step": 758 + }, + { + "epoch": 1.9934253780407627, + "high_lr": 0.0006010526315789474, + "low_lr": 1.202105263157895e-05, + "step": 758 + }, + { + "epoch": 1.9934253780407627, + "high_lr": 0.0006010526315789474, + "low_lr": 1.202105263157895e-05, + "step": 758 + }, + { + "epoch": 1.9934253780407627, + "high_lr": 0.0006010526315789474, + "low_lr": 1.202105263157895e-05, + "step": 758 + }, + { + "epoch": 1.9960552268244576, + "grad_norm": 1.1285107135772705, + "learning_rate": 0.0006005263157894737, + "loss": 1.4483, + "step": 759 + }, + { + "epoch": 1.9960552268244576, + "high_lr": 0.0006005263157894737, + "low_lr": 1.2010526315789474e-05, + "step": 759 + }, + { + "epoch": 1.9960552268244576, + "high_lr": 0.0006005263157894737, + "low_lr": 1.2010526315789474e-05, + "step": 759 + }, + { + "epoch": 1.9960552268244576, + "high_lr": 0.0006005263157894737, + "low_lr": 1.2010526315789474e-05, + "step": 759 + }, + { + "epoch": 1.9960552268244576, + "high_lr": 0.0006005263157894737, + "low_lr": 1.2010526315789474e-05, + "step": 759 + }, + { + "epoch": 1.9960552268244576, + "high_lr": 0.0006005263157894737, + "low_lr": 1.2010526315789474e-05, + "step": 759 + }, + { + "epoch": 1.9960552268244576, + "high_lr": 0.0006005263157894737, + "low_lr": 1.2010526315789474e-05, + "step": 759 + }, + { + "epoch": 1.9960552268244576, + "high_lr": 0.0006005263157894737, + "low_lr": 1.2010526315789474e-05, + "step": 759 + }, + { + "epoch": 1.9960552268244576, + "high_lr": 0.0006005263157894737, + "low_lr": 1.2010526315789474e-05, + "step": 759 + }, + { + "epoch": 1.9986850756081527, + "grad_norm": 2.830282211303711, + "learning_rate": 0.0006, + "loss": 1.5092, + "step": 760 + }, + { + "epoch": 1.9986850756081527, + "high_lr": 0.0006, + "low_lr": 1.2e-05, + "step": 760 + }, + { + "epoch": 1.9986850756081527, + "high_lr": 0.0006, + "low_lr": 1.2e-05, + "step": 760 + }, + { + "epoch": 1.9986850756081527, + "high_lr": 0.0006, + "low_lr": 1.2e-05, + "step": 760 + }, + { + "epoch": 1.9986850756081527, + "high_lr": 0.0006, + "low_lr": 1.2e-05, + "step": 760 + }, + { + "epoch": 1.9986850756081527, + "high_lr": 0.0006, + "low_lr": 1.2e-05, + "step": 760 + }, + { + "epoch": 1.9986850756081527, + "high_lr": 0.0006, + "low_lr": 1.2e-05, + "step": 760 + }, + { + "epoch": 1.9986850756081527, + "high_lr": 0.0006, + "low_lr": 1.2e-05, + "step": 760 + }, + { + "epoch": 1.9986850756081527, + "high_lr": 0.0006, + "low_lr": 1.2e-05, + "step": 760 + }, + { + "epoch": 2.0013149243918473, + "grad_norm": 1.145472764968872, + "learning_rate": 0.0005994736842105262, + "loss": 1.4476, + "step": 761 + }, + { + "epoch": 2.0013149243918473, + "high_lr": 0.0005994736842105262, + "low_lr": 1.1989473684210527e-05, + "step": 761 + }, + { + "epoch": 2.0013149243918473, + "high_lr": 0.0005994736842105262, + "low_lr": 1.1989473684210527e-05, + "step": 761 + }, + { + "epoch": 2.0013149243918473, + "high_lr": 0.0005994736842105262, + "low_lr": 1.1989473684210527e-05, + "step": 761 + }, + { + "epoch": 2.0013149243918473, + "high_lr": 0.0005994736842105262, + "low_lr": 1.1989473684210527e-05, + "step": 761 + }, + { + "epoch": 2.0013149243918473, + "high_lr": 0.0005994736842105262, + "low_lr": 1.1989473684210527e-05, + "step": 761 + }, + { + "epoch": 2.0013149243918473, + "high_lr": 0.0005994736842105262, + "low_lr": 1.1989473684210527e-05, + "step": 761 + }, + { + "epoch": 2.0013149243918473, + "high_lr": 0.0005994736842105262, + "low_lr": 1.1989473684210527e-05, + "step": 761 + }, + { + "epoch": 2.0013149243918473, + "high_lr": 0.0005994736842105262, + "low_lr": 1.1989473684210527e-05, + "step": 761 + }, + { + "epoch": 2.0039447731755424, + "grad_norm": 1.2734953165054321, + "learning_rate": 0.0005989473684210527, + "loss": 1.3967, + "step": 762 + }, + { + "epoch": 2.0039447731755424, + "high_lr": 0.0005989473684210527, + "low_lr": 1.1978947368421055e-05, + "step": 762 + }, + { + "epoch": 2.0039447731755424, + "high_lr": 0.0005989473684210527, + "low_lr": 1.1978947368421055e-05, + "step": 762 + }, + { + "epoch": 2.0039447731755424, + "high_lr": 0.0005989473684210527, + "low_lr": 1.1978947368421055e-05, + "step": 762 + }, + { + "epoch": 2.0039447731755424, + "high_lr": 0.0005989473684210527, + "low_lr": 1.1978947368421055e-05, + "step": 762 + }, + { + "epoch": 2.0039447731755424, + "high_lr": 0.0005989473684210527, + "low_lr": 1.1978947368421055e-05, + "step": 762 + }, + { + "epoch": 2.0039447731755424, + "high_lr": 0.0005989473684210527, + "low_lr": 1.1978947368421055e-05, + "step": 762 + }, + { + "epoch": 2.0039447731755424, + "high_lr": 0.0005989473684210527, + "low_lr": 1.1978947368421055e-05, + "step": 762 + }, + { + "epoch": 2.0039447731755424, + "high_lr": 0.0005989473684210527, + "low_lr": 1.1978947368421055e-05, + "step": 762 + }, + { + "epoch": 2.0065746219592375, + "grad_norm": 1.0868024826049805, + "learning_rate": 0.000598421052631579, + "loss": 1.3929, + "step": 763 + }, + { + "epoch": 2.0065746219592375, + "high_lr": 0.000598421052631579, + "low_lr": 1.1968421052631581e-05, + "step": 763 + }, + { + "epoch": 2.0065746219592375, + "high_lr": 0.000598421052631579, + "low_lr": 1.1968421052631581e-05, + "step": 763 + }, + { + "epoch": 2.0065746219592375, + "high_lr": 0.000598421052631579, + "low_lr": 1.1968421052631581e-05, + "step": 763 + }, + { + "epoch": 2.0065746219592375, + "high_lr": 0.000598421052631579, + "low_lr": 1.1968421052631581e-05, + "step": 763 + }, + { + "epoch": 2.0065746219592375, + "high_lr": 0.000598421052631579, + "low_lr": 1.1968421052631581e-05, + "step": 763 + }, + { + "epoch": 2.0065746219592375, + "high_lr": 0.000598421052631579, + "low_lr": 1.1968421052631581e-05, + "step": 763 + }, + { + "epoch": 2.0065746219592375, + "high_lr": 0.000598421052631579, + "low_lr": 1.1968421052631581e-05, + "step": 763 + }, + { + "epoch": 2.0065746219592375, + "high_lr": 0.000598421052631579, + "low_lr": 1.1968421052631581e-05, + "step": 763 + }, + { + "epoch": 2.009204470742932, + "grad_norm": 1.0661119222640991, + "learning_rate": 0.0005978947368421053, + "loss": 1.4308, + "step": 764 + }, + { + "epoch": 2.009204470742932, + "high_lr": 0.0005978947368421053, + "low_lr": 1.1957894736842106e-05, + "step": 764 + }, + { + "epoch": 2.009204470742932, + "high_lr": 0.0005978947368421053, + "low_lr": 1.1957894736842106e-05, + "step": 764 + }, + { + "epoch": 2.009204470742932, + "high_lr": 0.0005978947368421053, + "low_lr": 1.1957894736842106e-05, + "step": 764 + }, + { + "epoch": 2.009204470742932, + "high_lr": 0.0005978947368421053, + "low_lr": 1.1957894736842106e-05, + "step": 764 + }, + { + "epoch": 2.009204470742932, + "high_lr": 0.0005978947368421053, + "low_lr": 1.1957894736842106e-05, + "step": 764 + }, + { + "epoch": 2.009204470742932, + "high_lr": 0.0005978947368421053, + "low_lr": 1.1957894736842106e-05, + "step": 764 + }, + { + "epoch": 2.009204470742932, + "high_lr": 0.0005978947368421053, + "low_lr": 1.1957894736842106e-05, + "step": 764 + }, + { + "epoch": 2.009204470742932, + "high_lr": 0.0005978947368421053, + "low_lr": 1.1957894736842106e-05, + "step": 764 + }, + { + "epoch": 2.0118343195266273, + "grad_norm": 1.0468875169754028, + "learning_rate": 0.0005973684210526316, + "loss": 1.3778, + "step": 765 + }, + { + "epoch": 2.0118343195266273, + "high_lr": 0.0005973684210526316, + "low_lr": 1.1947368421052632e-05, + "step": 765 + }, + { + "epoch": 2.0118343195266273, + "high_lr": 0.0005973684210526316, + "low_lr": 1.1947368421052632e-05, + "step": 765 + }, + { + "epoch": 2.0118343195266273, + "high_lr": 0.0005973684210526316, + "low_lr": 1.1947368421052632e-05, + "step": 765 + }, + { + "epoch": 2.0118343195266273, + "high_lr": 0.0005973684210526316, + "low_lr": 1.1947368421052632e-05, + "step": 765 + }, + { + "epoch": 2.0118343195266273, + "high_lr": 0.0005973684210526316, + "low_lr": 1.1947368421052632e-05, + "step": 765 + }, + { + "epoch": 2.0118343195266273, + "high_lr": 0.0005973684210526316, + "low_lr": 1.1947368421052632e-05, + "step": 765 + }, + { + "epoch": 2.0118343195266273, + "high_lr": 0.0005973684210526316, + "low_lr": 1.1947368421052632e-05, + "step": 765 + }, + { + "epoch": 2.0118343195266273, + "high_lr": 0.0005973684210526316, + "low_lr": 1.1947368421052632e-05, + "step": 765 + }, + { + "epoch": 2.0144641683103224, + "grad_norm": 1.1116379499435425, + "learning_rate": 0.0005968421052631579, + "loss": 1.3936, + "step": 766 + }, + { + "epoch": 2.0144641683103224, + "high_lr": 0.0005968421052631579, + "low_lr": 1.1936842105263158e-05, + "step": 766 + }, + { + "epoch": 2.0144641683103224, + "high_lr": 0.0005968421052631579, + "low_lr": 1.1936842105263158e-05, + "step": 766 + }, + { + "epoch": 2.0144641683103224, + "high_lr": 0.0005968421052631579, + "low_lr": 1.1936842105263158e-05, + "step": 766 + }, + { + "epoch": 2.0144641683103224, + "high_lr": 0.0005968421052631579, + "low_lr": 1.1936842105263158e-05, + "step": 766 + }, + { + "epoch": 2.0144641683103224, + "high_lr": 0.0005968421052631579, + "low_lr": 1.1936842105263158e-05, + "step": 766 + }, + { + "epoch": 2.0144641683103224, + "high_lr": 0.0005968421052631579, + "low_lr": 1.1936842105263158e-05, + "step": 766 + }, + { + "epoch": 2.0144641683103224, + "high_lr": 0.0005968421052631579, + "low_lr": 1.1936842105263158e-05, + "step": 766 + }, + { + "epoch": 2.0144641683103224, + "high_lr": 0.0005968421052631579, + "low_lr": 1.1936842105263158e-05, + "step": 766 + }, + { + "epoch": 2.017094017094017, + "grad_norm": 1.1133373975753784, + "learning_rate": 0.0005963157894736843, + "loss": 1.351, + "step": 767 + }, + { + "epoch": 2.017094017094017, + "high_lr": 0.0005963157894736843, + "low_lr": 1.1926315789473686e-05, + "step": 767 + }, + { + "epoch": 2.017094017094017, + "high_lr": 0.0005963157894736843, + "low_lr": 1.1926315789473686e-05, + "step": 767 + }, + { + "epoch": 2.017094017094017, + "high_lr": 0.0005963157894736843, + "low_lr": 1.1926315789473686e-05, + "step": 767 + }, + { + "epoch": 2.017094017094017, + "high_lr": 0.0005963157894736843, + "low_lr": 1.1926315789473686e-05, + "step": 767 + }, + { + "epoch": 2.017094017094017, + "high_lr": 0.0005963157894736843, + "low_lr": 1.1926315789473686e-05, + "step": 767 + }, + { + "epoch": 2.017094017094017, + "high_lr": 0.0005963157894736843, + "low_lr": 1.1926315789473686e-05, + "step": 767 + }, + { + "epoch": 2.017094017094017, + "high_lr": 0.0005963157894736843, + "low_lr": 1.1926315789473686e-05, + "step": 767 + }, + { + "epoch": 2.017094017094017, + "high_lr": 0.0005963157894736843, + "low_lr": 1.1926315789473686e-05, + "step": 767 + }, + { + "epoch": 2.019723865877712, + "grad_norm": 1.1404736042022705, + "learning_rate": 0.0005957894736842106, + "loss": 1.3537, + "step": 768 + }, + { + "epoch": 2.019723865877712, + "high_lr": 0.0005957894736842106, + "low_lr": 1.1915789473684211e-05, + "step": 768 + }, + { + "epoch": 2.019723865877712, + "high_lr": 0.0005957894736842106, + "low_lr": 1.1915789473684211e-05, + "step": 768 + }, + { + "epoch": 2.019723865877712, + "high_lr": 0.0005957894736842106, + "low_lr": 1.1915789473684211e-05, + "step": 768 + }, + { + "epoch": 2.019723865877712, + "high_lr": 0.0005957894736842106, + "low_lr": 1.1915789473684211e-05, + "step": 768 + }, + { + "epoch": 2.019723865877712, + "high_lr": 0.0005957894736842106, + "low_lr": 1.1915789473684211e-05, + "step": 768 + }, + { + "epoch": 2.019723865877712, + "high_lr": 0.0005957894736842106, + "low_lr": 1.1915789473684211e-05, + "step": 768 + }, + { + "epoch": 2.019723865877712, + "high_lr": 0.0005957894736842106, + "low_lr": 1.1915789473684211e-05, + "step": 768 + }, + { + "epoch": 2.019723865877712, + "high_lr": 0.0005957894736842106, + "low_lr": 1.1915789473684211e-05, + "step": 768 + }, + { + "epoch": 2.022353714661407, + "grad_norm": 1.2281948328018188, + "learning_rate": 0.0005952631578947368, + "loss": 1.4146, + "step": 769 + }, + { + "epoch": 2.022353714661407, + "high_lr": 0.0005952631578947368, + "low_lr": 1.1905263157894737e-05, + "step": 769 + }, + { + "epoch": 2.022353714661407, + "high_lr": 0.0005952631578947368, + "low_lr": 1.1905263157894737e-05, + "step": 769 + }, + { + "epoch": 2.022353714661407, + "high_lr": 0.0005952631578947368, + "low_lr": 1.1905263157894737e-05, + "step": 769 + }, + { + "epoch": 2.022353714661407, + "high_lr": 0.0005952631578947368, + "low_lr": 1.1905263157894737e-05, + "step": 769 + }, + { + "epoch": 2.022353714661407, + "high_lr": 0.0005952631578947368, + "low_lr": 1.1905263157894737e-05, + "step": 769 + }, + { + "epoch": 2.022353714661407, + "high_lr": 0.0005952631578947368, + "low_lr": 1.1905263157894737e-05, + "step": 769 + }, + { + "epoch": 2.022353714661407, + "high_lr": 0.0005952631578947368, + "low_lr": 1.1905263157894737e-05, + "step": 769 + }, + { + "epoch": 2.022353714661407, + "high_lr": 0.0005952631578947368, + "low_lr": 1.1905263157894737e-05, + "step": 769 + }, + { + "epoch": 2.024983563445102, + "grad_norm": 1.2524970769882202, + "learning_rate": 0.0005947368421052631, + "loss": 1.4337, + "step": 770 + }, + { + "epoch": 2.024983563445102, + "high_lr": 0.0005947368421052631, + "low_lr": 1.1894736842105264e-05, + "step": 770 + }, + { + "epoch": 2.024983563445102, + "high_lr": 0.0005947368421052631, + "low_lr": 1.1894736842105264e-05, + "step": 770 + }, + { + "epoch": 2.024983563445102, + "high_lr": 0.0005947368421052631, + "low_lr": 1.1894736842105264e-05, + "step": 770 + }, + { + "epoch": 2.024983563445102, + "high_lr": 0.0005947368421052631, + "low_lr": 1.1894736842105264e-05, + "step": 770 + }, + { + "epoch": 2.024983563445102, + "high_lr": 0.0005947368421052631, + "low_lr": 1.1894736842105264e-05, + "step": 770 + }, + { + "epoch": 2.024983563445102, + "high_lr": 0.0005947368421052631, + "low_lr": 1.1894736842105264e-05, + "step": 770 + }, + { + "epoch": 2.024983563445102, + "high_lr": 0.0005947368421052631, + "low_lr": 1.1894736842105264e-05, + "step": 770 + }, + { + "epoch": 2.024983563445102, + "high_lr": 0.0005947368421052631, + "low_lr": 1.1894736842105264e-05, + "step": 770 + }, + { + "epoch": 2.027613412228797, + "grad_norm": 1.2224129438400269, + "learning_rate": 0.0005942105263157895, + "loss": 1.3969, + "step": 771 + }, + { + "epoch": 2.027613412228797, + "high_lr": 0.0005942105263157895, + "low_lr": 1.1884210526315792e-05, + "step": 771 + }, + { + "epoch": 2.027613412228797, + "high_lr": 0.0005942105263157895, + "low_lr": 1.1884210526315792e-05, + "step": 771 + }, + { + "epoch": 2.027613412228797, + "high_lr": 0.0005942105263157895, + "low_lr": 1.1884210526315792e-05, + "step": 771 + }, + { + "epoch": 2.027613412228797, + "high_lr": 0.0005942105263157895, + "low_lr": 1.1884210526315792e-05, + "step": 771 + }, + { + "epoch": 2.027613412228797, + "high_lr": 0.0005942105263157895, + "low_lr": 1.1884210526315792e-05, + "step": 771 + }, + { + "epoch": 2.027613412228797, + "high_lr": 0.0005942105263157895, + "low_lr": 1.1884210526315792e-05, + "step": 771 + }, + { + "epoch": 2.027613412228797, + "high_lr": 0.0005942105263157895, + "low_lr": 1.1884210526315792e-05, + "step": 771 + }, + { + "epoch": 2.027613412228797, + "high_lr": 0.0005942105263157895, + "low_lr": 1.1884210526315792e-05, + "step": 771 + }, + { + "epoch": 2.0302432610124916, + "grad_norm": 1.123277187347412, + "learning_rate": 0.0005936842105263158, + "loss": 1.3849, + "step": 772 + }, + { + "epoch": 2.0302432610124916, + "high_lr": 0.0005936842105263158, + "low_lr": 1.1873684210526318e-05, + "step": 772 + }, + { + "epoch": 2.0302432610124916, + "high_lr": 0.0005936842105263158, + "low_lr": 1.1873684210526318e-05, + "step": 772 + }, + { + "epoch": 2.0302432610124916, + "high_lr": 0.0005936842105263158, + "low_lr": 1.1873684210526318e-05, + "step": 772 + }, + { + "epoch": 2.0302432610124916, + "high_lr": 0.0005936842105263158, + "low_lr": 1.1873684210526318e-05, + "step": 772 + }, + { + "epoch": 2.0302432610124916, + "high_lr": 0.0005936842105263158, + "low_lr": 1.1873684210526318e-05, + "step": 772 + }, + { + "epoch": 2.0302432610124916, + "high_lr": 0.0005936842105263158, + "low_lr": 1.1873684210526318e-05, + "step": 772 + }, + { + "epoch": 2.0302432610124916, + "high_lr": 0.0005936842105263158, + "low_lr": 1.1873684210526318e-05, + "step": 772 + }, + { + "epoch": 2.0302432610124916, + "high_lr": 0.0005936842105263158, + "low_lr": 1.1873684210526318e-05, + "step": 772 + }, + { + "epoch": 2.0328731097961867, + "grad_norm": 1.2327888011932373, + "learning_rate": 0.0005931578947368421, + "loss": 1.4064, + "step": 773 + }, + { + "epoch": 2.0328731097961867, + "high_lr": 0.0005931578947368421, + "low_lr": 1.1863157894736843e-05, + "step": 773 + }, + { + "epoch": 2.0328731097961867, + "high_lr": 0.0005931578947368421, + "low_lr": 1.1863157894736843e-05, + "step": 773 + }, + { + "epoch": 2.0328731097961867, + "high_lr": 0.0005931578947368421, + "low_lr": 1.1863157894736843e-05, + "step": 773 + }, + { + "epoch": 2.0328731097961867, + "high_lr": 0.0005931578947368421, + "low_lr": 1.1863157894736843e-05, + "step": 773 + }, + { + "epoch": 2.0328731097961867, + "high_lr": 0.0005931578947368421, + "low_lr": 1.1863157894736843e-05, + "step": 773 + }, + { + "epoch": 2.0328731097961867, + "high_lr": 0.0005931578947368421, + "low_lr": 1.1863157894736843e-05, + "step": 773 + }, + { + "epoch": 2.0328731097961867, + "high_lr": 0.0005931578947368421, + "low_lr": 1.1863157894736843e-05, + "step": 773 + }, + { + "epoch": 2.0328731097961867, + "high_lr": 0.0005931578947368421, + "low_lr": 1.1863157894736843e-05, + "step": 773 + }, + { + "epoch": 2.035502958579882, + "grad_norm": 1.1804497241973877, + "learning_rate": 0.0005926315789473684, + "loss": 1.3843, + "step": 774 + }, + { + "epoch": 2.035502958579882, + "high_lr": 0.0005926315789473684, + "low_lr": 1.1852631578947369e-05, + "step": 774 + }, + { + "epoch": 2.035502958579882, + "high_lr": 0.0005926315789473684, + "low_lr": 1.1852631578947369e-05, + "step": 774 + }, + { + "epoch": 2.035502958579882, + "high_lr": 0.0005926315789473684, + "low_lr": 1.1852631578947369e-05, + "step": 774 + }, + { + "epoch": 2.035502958579882, + "high_lr": 0.0005926315789473684, + "low_lr": 1.1852631578947369e-05, + "step": 774 + }, + { + "epoch": 2.035502958579882, + "high_lr": 0.0005926315789473684, + "low_lr": 1.1852631578947369e-05, + "step": 774 + }, + { + "epoch": 2.035502958579882, + "high_lr": 0.0005926315789473684, + "low_lr": 1.1852631578947369e-05, + "step": 774 + }, + { + "epoch": 2.035502958579882, + "high_lr": 0.0005926315789473684, + "low_lr": 1.1852631578947369e-05, + "step": 774 + }, + { + "epoch": 2.035502958579882, + "high_lr": 0.0005926315789473684, + "low_lr": 1.1852631578947369e-05, + "step": 774 + }, + { + "epoch": 2.0381328073635765, + "grad_norm": 1.1749237775802612, + "learning_rate": 0.0005921052631578947, + "loss": 1.4147, + "step": 775 + }, + { + "epoch": 2.0381328073635765, + "high_lr": 0.0005921052631578947, + "low_lr": 1.1842105263157895e-05, + "step": 775 + }, + { + "epoch": 2.0381328073635765, + "high_lr": 0.0005921052631578947, + "low_lr": 1.1842105263157895e-05, + "step": 775 + }, + { + "epoch": 2.0381328073635765, + "high_lr": 0.0005921052631578947, + "low_lr": 1.1842105263157895e-05, + "step": 775 + }, + { + "epoch": 2.0381328073635765, + "high_lr": 0.0005921052631578947, + "low_lr": 1.1842105263157895e-05, + "step": 775 + }, + { + "epoch": 2.0381328073635765, + "high_lr": 0.0005921052631578947, + "low_lr": 1.1842105263157895e-05, + "step": 775 + }, + { + "epoch": 2.0381328073635765, + "high_lr": 0.0005921052631578947, + "low_lr": 1.1842105263157895e-05, + "step": 775 + }, + { + "epoch": 2.0381328073635765, + "high_lr": 0.0005921052631578947, + "low_lr": 1.1842105263157895e-05, + "step": 775 + }, + { + "epoch": 2.0381328073635765, + "high_lr": 0.0005921052631578947, + "low_lr": 1.1842105263157895e-05, + "step": 775 + }, + { + "epoch": 2.0407626561472716, + "grad_norm": 1.1598745584487915, + "learning_rate": 0.0005915789473684211, + "loss": 1.384, + "step": 776 + }, + { + "epoch": 2.0407626561472716, + "high_lr": 0.0005915789473684211, + "low_lr": 1.1831578947368423e-05, + "step": 776 + }, + { + "epoch": 2.0407626561472716, + "high_lr": 0.0005915789473684211, + "low_lr": 1.1831578947368423e-05, + "step": 776 + }, + { + "epoch": 2.0407626561472716, + "high_lr": 0.0005915789473684211, + "low_lr": 1.1831578947368423e-05, + "step": 776 + }, + { + "epoch": 2.0407626561472716, + "high_lr": 0.0005915789473684211, + "low_lr": 1.1831578947368423e-05, + "step": 776 + }, + { + "epoch": 2.0407626561472716, + "high_lr": 0.0005915789473684211, + "low_lr": 1.1831578947368423e-05, + "step": 776 + }, + { + "epoch": 2.0407626561472716, + "high_lr": 0.0005915789473684211, + "low_lr": 1.1831578947368423e-05, + "step": 776 + }, + { + "epoch": 2.0407626561472716, + "high_lr": 0.0005915789473684211, + "low_lr": 1.1831578947368423e-05, + "step": 776 + }, + { + "epoch": 2.0407626561472716, + "high_lr": 0.0005915789473684211, + "low_lr": 1.1831578947368423e-05, + "step": 776 + }, + { + "epoch": 2.0433925049309662, + "grad_norm": 1.0803080797195435, + "learning_rate": 0.0005910526315789473, + "loss": 1.4139, + "step": 777 + }, + { + "epoch": 2.0433925049309662, + "high_lr": 0.0005910526315789473, + "low_lr": 1.1821052631578948e-05, + "step": 777 + }, + { + "epoch": 2.0433925049309662, + "high_lr": 0.0005910526315789473, + "low_lr": 1.1821052631578948e-05, + "step": 777 + }, + { + "epoch": 2.0433925049309662, + "high_lr": 0.0005910526315789473, + "low_lr": 1.1821052631578948e-05, + "step": 777 + }, + { + "epoch": 2.0433925049309662, + "high_lr": 0.0005910526315789473, + "low_lr": 1.1821052631578948e-05, + "step": 777 + }, + { + "epoch": 2.0433925049309662, + "high_lr": 0.0005910526315789473, + "low_lr": 1.1821052631578948e-05, + "step": 777 + }, + { + "epoch": 2.0433925049309662, + "high_lr": 0.0005910526315789473, + "low_lr": 1.1821052631578948e-05, + "step": 777 + }, + { + "epoch": 2.0433925049309662, + "high_lr": 0.0005910526315789473, + "low_lr": 1.1821052631578948e-05, + "step": 777 + }, + { + "epoch": 2.0433925049309662, + "high_lr": 0.0005910526315789473, + "low_lr": 1.1821052631578948e-05, + "step": 777 + }, + { + "epoch": 2.0460223537146613, + "grad_norm": 1.0830882787704468, + "learning_rate": 0.0005905263157894736, + "loss": 1.3873, + "step": 778 + }, + { + "epoch": 2.0460223537146613, + "high_lr": 0.0005905263157894736, + "low_lr": 1.1810526315789474e-05, + "step": 778 + }, + { + "epoch": 2.0460223537146613, + "high_lr": 0.0005905263157894736, + "low_lr": 1.1810526315789474e-05, + "step": 778 + }, + { + "epoch": 2.0460223537146613, + "high_lr": 0.0005905263157894736, + "low_lr": 1.1810526315789474e-05, + "step": 778 + }, + { + "epoch": 2.0460223537146613, + "high_lr": 0.0005905263157894736, + "low_lr": 1.1810526315789474e-05, + "step": 778 + }, + { + "epoch": 2.0460223537146613, + "high_lr": 0.0005905263157894736, + "low_lr": 1.1810526315789474e-05, + "step": 778 + }, + { + "epoch": 2.0460223537146613, + "high_lr": 0.0005905263157894736, + "low_lr": 1.1810526315789474e-05, + "step": 778 + }, + { + "epoch": 2.0460223537146613, + "high_lr": 0.0005905263157894736, + "low_lr": 1.1810526315789474e-05, + "step": 778 + }, + { + "epoch": 2.0460223537146613, + "high_lr": 0.0005905263157894736, + "low_lr": 1.1810526315789474e-05, + "step": 778 + }, + { + "epoch": 2.0486522024983564, + "grad_norm": 1.1197872161865234, + "learning_rate": 0.00059, + "loss": 1.4179, + "step": 779 + }, + { + "epoch": 2.0486522024983564, + "high_lr": 0.00059, + "low_lr": 1.18e-05, + "step": 779 + }, + { + "epoch": 2.0486522024983564, + "high_lr": 0.00059, + "low_lr": 1.18e-05, + "step": 779 + }, + { + "epoch": 2.0486522024983564, + "high_lr": 0.00059, + "low_lr": 1.18e-05, + "step": 779 + }, + { + "epoch": 2.0486522024983564, + "high_lr": 0.00059, + "low_lr": 1.18e-05, + "step": 779 + }, + { + "epoch": 2.0486522024983564, + "high_lr": 0.00059, + "low_lr": 1.18e-05, + "step": 779 + }, + { + "epoch": 2.0486522024983564, + "high_lr": 0.00059, + "low_lr": 1.18e-05, + "step": 779 + }, + { + "epoch": 2.0486522024983564, + "high_lr": 0.00059, + "low_lr": 1.18e-05, + "step": 779 + }, + { + "epoch": 2.0486522024983564, + "high_lr": 0.00059, + "low_lr": 1.18e-05, + "step": 779 + }, + { + "epoch": 2.051282051282051, + "grad_norm": 1.1884441375732422, + "learning_rate": 0.0005894736842105263, + "loss": 1.402, + "step": 780 + }, + { + "epoch": 2.051282051282051, + "high_lr": 0.0005894736842105263, + "low_lr": 1.1789473684210527e-05, + "step": 780 + }, + { + "epoch": 2.051282051282051, + "high_lr": 0.0005894736842105263, + "low_lr": 1.1789473684210527e-05, + "step": 780 + }, + { + "epoch": 2.051282051282051, + "high_lr": 0.0005894736842105263, + "low_lr": 1.1789473684210527e-05, + "step": 780 + }, + { + "epoch": 2.051282051282051, + "high_lr": 0.0005894736842105263, + "low_lr": 1.1789473684210527e-05, + "step": 780 + }, + { + "epoch": 2.051282051282051, + "high_lr": 0.0005894736842105263, + "low_lr": 1.1789473684210527e-05, + "step": 780 + }, + { + "epoch": 2.051282051282051, + "high_lr": 0.0005894736842105263, + "low_lr": 1.1789473684210527e-05, + "step": 780 + }, + { + "epoch": 2.051282051282051, + "high_lr": 0.0005894736842105263, + "low_lr": 1.1789473684210527e-05, + "step": 780 + }, + { + "epoch": 2.051282051282051, + "high_lr": 0.0005894736842105263, + "low_lr": 1.1789473684210527e-05, + "step": 780 + }, + { + "epoch": 2.053911900065746, + "grad_norm": 1.2280651330947876, + "learning_rate": 0.0005889473684210527, + "loss": 1.3927, + "step": 781 + }, + { + "epoch": 2.053911900065746, + "high_lr": 0.0005889473684210527, + "low_lr": 1.1778947368421055e-05, + "step": 781 + }, + { + "epoch": 2.053911900065746, + "high_lr": 0.0005889473684210527, + "low_lr": 1.1778947368421055e-05, + "step": 781 + }, + { + "epoch": 2.053911900065746, + "high_lr": 0.0005889473684210527, + "low_lr": 1.1778947368421055e-05, + "step": 781 + }, + { + "epoch": 2.053911900065746, + "high_lr": 0.0005889473684210527, + "low_lr": 1.1778947368421055e-05, + "step": 781 + }, + { + "epoch": 2.053911900065746, + "high_lr": 0.0005889473684210527, + "low_lr": 1.1778947368421055e-05, + "step": 781 + }, + { + "epoch": 2.053911900065746, + "high_lr": 0.0005889473684210527, + "low_lr": 1.1778947368421055e-05, + "step": 781 + }, + { + "epoch": 2.053911900065746, + "high_lr": 0.0005889473684210527, + "low_lr": 1.1778947368421055e-05, + "step": 781 + }, + { + "epoch": 2.053911900065746, + "high_lr": 0.0005889473684210527, + "low_lr": 1.1778947368421055e-05, + "step": 781 + }, + { + "epoch": 2.0565417488494413, + "grad_norm": 1.2406994104385376, + "learning_rate": 0.000588421052631579, + "loss": 1.4305, + "step": 782 + }, + { + "epoch": 2.0565417488494413, + "high_lr": 0.000588421052631579, + "low_lr": 1.176842105263158e-05, + "step": 782 + }, + { + "epoch": 2.0565417488494413, + "high_lr": 0.000588421052631579, + "low_lr": 1.176842105263158e-05, + "step": 782 + }, + { + "epoch": 2.0565417488494413, + "high_lr": 0.000588421052631579, + "low_lr": 1.176842105263158e-05, + "step": 782 + }, + { + "epoch": 2.0565417488494413, + "high_lr": 0.000588421052631579, + "low_lr": 1.176842105263158e-05, + "step": 782 + }, + { + "epoch": 2.0565417488494413, + "high_lr": 0.000588421052631579, + "low_lr": 1.176842105263158e-05, + "step": 782 + }, + { + "epoch": 2.0565417488494413, + "high_lr": 0.000588421052631579, + "low_lr": 1.176842105263158e-05, + "step": 782 + }, + { + "epoch": 2.0565417488494413, + "high_lr": 0.000588421052631579, + "low_lr": 1.176842105263158e-05, + "step": 782 + }, + { + "epoch": 2.0565417488494413, + "high_lr": 0.000588421052631579, + "low_lr": 1.176842105263158e-05, + "step": 782 + }, + { + "epoch": 2.059171597633136, + "grad_norm": 1.098323941230774, + "learning_rate": 0.0005878947368421053, + "loss": 1.3512, + "step": 783 + }, + { + "epoch": 2.059171597633136, + "high_lr": 0.0005878947368421053, + "low_lr": 1.1757894736842106e-05, + "step": 783 + }, + { + "epoch": 2.059171597633136, + "high_lr": 0.0005878947368421053, + "low_lr": 1.1757894736842106e-05, + "step": 783 + }, + { + "epoch": 2.059171597633136, + "high_lr": 0.0005878947368421053, + "low_lr": 1.1757894736842106e-05, + "step": 783 + }, + { + "epoch": 2.059171597633136, + "high_lr": 0.0005878947368421053, + "low_lr": 1.1757894736842106e-05, + "step": 783 + }, + { + "epoch": 2.059171597633136, + "high_lr": 0.0005878947368421053, + "low_lr": 1.1757894736842106e-05, + "step": 783 + }, + { + "epoch": 2.059171597633136, + "high_lr": 0.0005878947368421053, + "low_lr": 1.1757894736842106e-05, + "step": 783 + }, + { + "epoch": 2.059171597633136, + "high_lr": 0.0005878947368421053, + "low_lr": 1.1757894736842106e-05, + "step": 783 + }, + { + "epoch": 2.059171597633136, + "high_lr": 0.0005878947368421053, + "low_lr": 1.1757894736842106e-05, + "step": 783 + }, + { + "epoch": 2.061801446416831, + "grad_norm": 1.1673874855041504, + "learning_rate": 0.0005873684210526316, + "loss": 1.3542, + "step": 784 + }, + { + "epoch": 2.061801446416831, + "high_lr": 0.0005873684210526316, + "low_lr": 1.1747368421052632e-05, + "step": 784 + }, + { + "epoch": 2.061801446416831, + "high_lr": 0.0005873684210526316, + "low_lr": 1.1747368421052632e-05, + "step": 784 + }, + { + "epoch": 2.061801446416831, + "high_lr": 0.0005873684210526316, + "low_lr": 1.1747368421052632e-05, + "step": 784 + }, + { + "epoch": 2.061801446416831, + "high_lr": 0.0005873684210526316, + "low_lr": 1.1747368421052632e-05, + "step": 784 + }, + { + "epoch": 2.061801446416831, + "high_lr": 0.0005873684210526316, + "low_lr": 1.1747368421052632e-05, + "step": 784 + }, + { + "epoch": 2.061801446416831, + "high_lr": 0.0005873684210526316, + "low_lr": 1.1747368421052632e-05, + "step": 784 + }, + { + "epoch": 2.061801446416831, + "high_lr": 0.0005873684210526316, + "low_lr": 1.1747368421052632e-05, + "step": 784 + }, + { + "epoch": 2.061801446416831, + "high_lr": 0.0005873684210526316, + "low_lr": 1.1747368421052632e-05, + "step": 784 + }, + { + "epoch": 2.064431295200526, + "grad_norm": 1.362140417098999, + "learning_rate": 0.000586842105263158, + "loss": 1.3845, + "step": 785 + }, + { + "epoch": 2.064431295200526, + "high_lr": 0.000586842105263158, + "low_lr": 1.173684210526316e-05, + "step": 785 + }, + { + "epoch": 2.064431295200526, + "high_lr": 0.000586842105263158, + "low_lr": 1.173684210526316e-05, + "step": 785 + }, + { + "epoch": 2.064431295200526, + "high_lr": 0.000586842105263158, + "low_lr": 1.173684210526316e-05, + "step": 785 + }, + { + "epoch": 2.064431295200526, + "high_lr": 0.000586842105263158, + "low_lr": 1.173684210526316e-05, + "step": 785 + }, + { + "epoch": 2.064431295200526, + "high_lr": 0.000586842105263158, + "low_lr": 1.173684210526316e-05, + "step": 785 + }, + { + "epoch": 2.064431295200526, + "high_lr": 0.000586842105263158, + "low_lr": 1.173684210526316e-05, + "step": 785 + }, + { + "epoch": 2.064431295200526, + "high_lr": 0.000586842105263158, + "low_lr": 1.173684210526316e-05, + "step": 785 + }, + { + "epoch": 2.064431295200526, + "high_lr": 0.000586842105263158, + "low_lr": 1.173684210526316e-05, + "step": 785 + }, + { + "epoch": 2.067061143984221, + "grad_norm": 1.187095284461975, + "learning_rate": 0.0005863157894736842, + "loss": 1.4345, + "step": 786 + }, + { + "epoch": 2.067061143984221, + "high_lr": 0.0005863157894736842, + "low_lr": 1.1726315789473685e-05, + "step": 786 + }, + { + "epoch": 2.067061143984221, + "high_lr": 0.0005863157894736842, + "low_lr": 1.1726315789473685e-05, + "step": 786 + }, + { + "epoch": 2.067061143984221, + "high_lr": 0.0005863157894736842, + "low_lr": 1.1726315789473685e-05, + "step": 786 + }, + { + "epoch": 2.067061143984221, + "high_lr": 0.0005863157894736842, + "low_lr": 1.1726315789473685e-05, + "step": 786 + }, + { + "epoch": 2.067061143984221, + "high_lr": 0.0005863157894736842, + "low_lr": 1.1726315789473685e-05, + "step": 786 + }, + { + "epoch": 2.067061143984221, + "high_lr": 0.0005863157894736842, + "low_lr": 1.1726315789473685e-05, + "step": 786 + }, + { + "epoch": 2.067061143984221, + "high_lr": 0.0005863157894736842, + "low_lr": 1.1726315789473685e-05, + "step": 786 + }, + { + "epoch": 2.067061143984221, + "high_lr": 0.0005863157894736842, + "low_lr": 1.1726315789473685e-05, + "step": 786 + }, + { + "epoch": 2.069690992767916, + "grad_norm": 1.184488296508789, + "learning_rate": 0.0005857894736842105, + "loss": 1.4132, + "step": 787 + }, + { + "epoch": 2.069690992767916, + "high_lr": 0.0005857894736842105, + "low_lr": 1.1715789473684211e-05, + "step": 787 + }, + { + "epoch": 2.069690992767916, + "high_lr": 0.0005857894736842105, + "low_lr": 1.1715789473684211e-05, + "step": 787 + }, + { + "epoch": 2.069690992767916, + "high_lr": 0.0005857894736842105, + "low_lr": 1.1715789473684211e-05, + "step": 787 + }, + { + "epoch": 2.069690992767916, + "high_lr": 0.0005857894736842105, + "low_lr": 1.1715789473684211e-05, + "step": 787 + }, + { + "epoch": 2.069690992767916, + "high_lr": 0.0005857894736842105, + "low_lr": 1.1715789473684211e-05, + "step": 787 + }, + { + "epoch": 2.069690992767916, + "high_lr": 0.0005857894736842105, + "low_lr": 1.1715789473684211e-05, + "step": 787 + }, + { + "epoch": 2.069690992767916, + "high_lr": 0.0005857894736842105, + "low_lr": 1.1715789473684211e-05, + "step": 787 + }, + { + "epoch": 2.069690992767916, + "high_lr": 0.0005857894736842105, + "low_lr": 1.1715789473684211e-05, + "step": 787 + }, + { + "epoch": 2.072320841551611, + "grad_norm": 1.1735727787017822, + "learning_rate": 0.0005852631578947368, + "loss": 1.3603, + "step": 788 + }, + { + "epoch": 2.072320841551611, + "high_lr": 0.0005852631578947368, + "low_lr": 1.1705263157894737e-05, + "step": 788 + }, + { + "epoch": 2.072320841551611, + "high_lr": 0.0005852631578947368, + "low_lr": 1.1705263157894737e-05, + "step": 788 + }, + { + "epoch": 2.072320841551611, + "high_lr": 0.0005852631578947368, + "low_lr": 1.1705263157894737e-05, + "step": 788 + }, + { + "epoch": 2.072320841551611, + "high_lr": 0.0005852631578947368, + "low_lr": 1.1705263157894737e-05, + "step": 788 + }, + { + "epoch": 2.072320841551611, + "high_lr": 0.0005852631578947368, + "low_lr": 1.1705263157894737e-05, + "step": 788 + }, + { + "epoch": 2.072320841551611, + "high_lr": 0.0005852631578947368, + "low_lr": 1.1705263157894737e-05, + "step": 788 + }, + { + "epoch": 2.072320841551611, + "high_lr": 0.0005852631578947368, + "low_lr": 1.1705263157894737e-05, + "step": 788 + }, + { + "epoch": 2.072320841551611, + "high_lr": 0.0005852631578947368, + "low_lr": 1.1705263157894737e-05, + "step": 788 + }, + { + "epoch": 2.0749506903353057, + "grad_norm": 1.1374088525772095, + "learning_rate": 0.0005847368421052631, + "loss": 1.3822, + "step": 789 + }, + { + "epoch": 2.0749506903353057, + "high_lr": 0.0005847368421052631, + "low_lr": 1.1694736842105264e-05, + "step": 789 + }, + { + "epoch": 2.0749506903353057, + "high_lr": 0.0005847368421052631, + "low_lr": 1.1694736842105264e-05, + "step": 789 + }, + { + "epoch": 2.0749506903353057, + "high_lr": 0.0005847368421052631, + "low_lr": 1.1694736842105264e-05, + "step": 789 + }, + { + "epoch": 2.0749506903353057, + "high_lr": 0.0005847368421052631, + "low_lr": 1.1694736842105264e-05, + "step": 789 + }, + { + "epoch": 2.0749506903353057, + "high_lr": 0.0005847368421052631, + "low_lr": 1.1694736842105264e-05, + "step": 789 + }, + { + "epoch": 2.0749506903353057, + "high_lr": 0.0005847368421052631, + "low_lr": 1.1694736842105264e-05, + "step": 789 + }, + { + "epoch": 2.0749506903353057, + "high_lr": 0.0005847368421052631, + "low_lr": 1.1694736842105264e-05, + "step": 789 + }, + { + "epoch": 2.0749506903353057, + "high_lr": 0.0005847368421052631, + "low_lr": 1.1694736842105264e-05, + "step": 789 + }, + { + "epoch": 2.0775805391190008, + "grad_norm": 1.120962142944336, + "learning_rate": 0.0005842105263157895, + "loss": 1.3845, + "step": 790 + }, + { + "epoch": 2.0775805391190008, + "high_lr": 0.0005842105263157895, + "low_lr": 1.1684210526315792e-05, + "step": 790 + }, + { + "epoch": 2.0775805391190008, + "high_lr": 0.0005842105263157895, + "low_lr": 1.1684210526315792e-05, + "step": 790 + }, + { + "epoch": 2.0775805391190008, + "high_lr": 0.0005842105263157895, + "low_lr": 1.1684210526315792e-05, + "step": 790 + }, + { + "epoch": 2.0775805391190008, + "high_lr": 0.0005842105263157895, + "low_lr": 1.1684210526315792e-05, + "step": 790 + }, + { + "epoch": 2.0775805391190008, + "high_lr": 0.0005842105263157895, + "low_lr": 1.1684210526315792e-05, + "step": 790 + }, + { + "epoch": 2.0775805391190008, + "high_lr": 0.0005842105263157895, + "low_lr": 1.1684210526315792e-05, + "step": 790 + }, + { + "epoch": 2.0775805391190008, + "high_lr": 0.0005842105263157895, + "low_lr": 1.1684210526315792e-05, + "step": 790 + }, + { + "epoch": 2.0775805391190008, + "high_lr": 0.0005842105263157895, + "low_lr": 1.1684210526315792e-05, + "step": 790 + }, + { + "epoch": 2.0802103879026954, + "grad_norm": 1.1211026906967163, + "learning_rate": 0.0005836842105263158, + "loss": 1.3344, + "step": 791 + }, + { + "epoch": 2.0802103879026954, + "high_lr": 0.0005836842105263158, + "low_lr": 1.1673684210526316e-05, + "step": 791 + }, + { + "epoch": 2.0802103879026954, + "high_lr": 0.0005836842105263158, + "low_lr": 1.1673684210526316e-05, + "step": 791 + }, + { + "epoch": 2.0802103879026954, + "high_lr": 0.0005836842105263158, + "low_lr": 1.1673684210526316e-05, + "step": 791 + }, + { + "epoch": 2.0802103879026954, + "high_lr": 0.0005836842105263158, + "low_lr": 1.1673684210526316e-05, + "step": 791 + }, + { + "epoch": 2.0802103879026954, + "high_lr": 0.0005836842105263158, + "low_lr": 1.1673684210526316e-05, + "step": 791 + }, + { + "epoch": 2.0802103879026954, + "high_lr": 0.0005836842105263158, + "low_lr": 1.1673684210526316e-05, + "step": 791 + }, + { + "epoch": 2.0802103879026954, + "high_lr": 0.0005836842105263158, + "low_lr": 1.1673684210526316e-05, + "step": 791 + }, + { + "epoch": 2.0802103879026954, + "high_lr": 0.0005836842105263158, + "low_lr": 1.1673684210526316e-05, + "step": 791 + }, + { + "epoch": 2.0828402366863905, + "grad_norm": 1.1464637517929077, + "learning_rate": 0.0005831578947368421, + "loss": 1.4136, + "step": 792 + }, + { + "epoch": 2.0828402366863905, + "high_lr": 0.0005831578947368421, + "low_lr": 1.1663157894736843e-05, + "step": 792 + }, + { + "epoch": 2.0828402366863905, + "high_lr": 0.0005831578947368421, + "low_lr": 1.1663157894736843e-05, + "step": 792 + }, + { + "epoch": 2.0828402366863905, + "high_lr": 0.0005831578947368421, + "low_lr": 1.1663157894736843e-05, + "step": 792 + }, + { + "epoch": 2.0828402366863905, + "high_lr": 0.0005831578947368421, + "low_lr": 1.1663157894736843e-05, + "step": 792 + }, + { + "epoch": 2.0828402366863905, + "high_lr": 0.0005831578947368421, + "low_lr": 1.1663157894736843e-05, + "step": 792 + }, + { + "epoch": 2.0828402366863905, + "high_lr": 0.0005831578947368421, + "low_lr": 1.1663157894736843e-05, + "step": 792 + }, + { + "epoch": 2.0828402366863905, + "high_lr": 0.0005831578947368421, + "low_lr": 1.1663157894736843e-05, + "step": 792 + }, + { + "epoch": 2.0828402366863905, + "high_lr": 0.0005831578947368421, + "low_lr": 1.1663157894736843e-05, + "step": 792 + }, + { + "epoch": 2.0854700854700856, + "grad_norm": 1.169979214668274, + "learning_rate": 0.0005826315789473684, + "loss": 1.4088, + "step": 793 + }, + { + "epoch": 2.0854700854700856, + "high_lr": 0.0005826315789473684, + "low_lr": 1.1652631578947369e-05, + "step": 793 + }, + { + "epoch": 2.0854700854700856, + "high_lr": 0.0005826315789473684, + "low_lr": 1.1652631578947369e-05, + "step": 793 + }, + { + "epoch": 2.0854700854700856, + "high_lr": 0.0005826315789473684, + "low_lr": 1.1652631578947369e-05, + "step": 793 + }, + { + "epoch": 2.0854700854700856, + "high_lr": 0.0005826315789473684, + "low_lr": 1.1652631578947369e-05, + "step": 793 + }, + { + "epoch": 2.0854700854700856, + "high_lr": 0.0005826315789473684, + "low_lr": 1.1652631578947369e-05, + "step": 793 + }, + { + "epoch": 2.0854700854700856, + "high_lr": 0.0005826315789473684, + "low_lr": 1.1652631578947369e-05, + "step": 793 + }, + { + "epoch": 2.0854700854700856, + "high_lr": 0.0005826315789473684, + "low_lr": 1.1652631578947369e-05, + "step": 793 + }, + { + "epoch": 2.0854700854700856, + "high_lr": 0.0005826315789473684, + "low_lr": 1.1652631578947369e-05, + "step": 793 + }, + { + "epoch": 2.0880999342537803, + "grad_norm": 1.22758150100708, + "learning_rate": 0.0005821052631578948, + "loss": 1.4294, + "step": 794 + }, + { + "epoch": 2.0880999342537803, + "high_lr": 0.0005821052631578948, + "low_lr": 1.1642105263157897e-05, + "step": 794 + }, + { + "epoch": 2.0880999342537803, + "high_lr": 0.0005821052631578948, + "low_lr": 1.1642105263157897e-05, + "step": 794 + }, + { + "epoch": 2.0880999342537803, + "high_lr": 0.0005821052631578948, + "low_lr": 1.1642105263157897e-05, + "step": 794 + }, + { + "epoch": 2.0880999342537803, + "high_lr": 0.0005821052631578948, + "low_lr": 1.1642105263157897e-05, + "step": 794 + }, + { + "epoch": 2.0880999342537803, + "high_lr": 0.0005821052631578948, + "low_lr": 1.1642105263157897e-05, + "step": 794 + }, + { + "epoch": 2.0880999342537803, + "high_lr": 0.0005821052631578948, + "low_lr": 1.1642105263157897e-05, + "step": 794 + }, + { + "epoch": 2.0880999342537803, + "high_lr": 0.0005821052631578948, + "low_lr": 1.1642105263157897e-05, + "step": 794 + }, + { + "epoch": 2.0880999342537803, + "high_lr": 0.0005821052631578948, + "low_lr": 1.1642105263157897e-05, + "step": 794 + }, + { + "epoch": 2.0907297830374754, + "grad_norm": 1.3216019868850708, + "learning_rate": 0.0005815789473684211, + "loss": 1.4271, + "step": 795 + }, + { + "epoch": 2.0907297830374754, + "high_lr": 0.0005815789473684211, + "low_lr": 1.1631578947368423e-05, + "step": 795 + }, + { + "epoch": 2.0907297830374754, + "high_lr": 0.0005815789473684211, + "low_lr": 1.1631578947368423e-05, + "step": 795 + }, + { + "epoch": 2.0907297830374754, + "high_lr": 0.0005815789473684211, + "low_lr": 1.1631578947368423e-05, + "step": 795 + }, + { + "epoch": 2.0907297830374754, + "high_lr": 0.0005815789473684211, + "low_lr": 1.1631578947368423e-05, + "step": 795 + }, + { + "epoch": 2.0907297830374754, + "high_lr": 0.0005815789473684211, + "low_lr": 1.1631578947368423e-05, + "step": 795 + }, + { + "epoch": 2.0907297830374754, + "high_lr": 0.0005815789473684211, + "low_lr": 1.1631578947368423e-05, + "step": 795 + }, + { + "epoch": 2.0907297830374754, + "high_lr": 0.0005815789473684211, + "low_lr": 1.1631578947368423e-05, + "step": 795 + }, + { + "epoch": 2.0907297830374754, + "high_lr": 0.0005815789473684211, + "low_lr": 1.1631578947368423e-05, + "step": 795 + }, + { + "epoch": 2.0933596318211705, + "grad_norm": 1.0885225534439087, + "learning_rate": 0.0005810526315789474, + "loss": 1.3856, + "step": 796 + }, + { + "epoch": 2.0933596318211705, + "high_lr": 0.0005810526315789474, + "low_lr": 1.1621052631578948e-05, + "step": 796 + }, + { + "epoch": 2.0933596318211705, + "high_lr": 0.0005810526315789474, + "low_lr": 1.1621052631578948e-05, + "step": 796 + }, + { + "epoch": 2.0933596318211705, + "high_lr": 0.0005810526315789474, + "low_lr": 1.1621052631578948e-05, + "step": 796 + }, + { + "epoch": 2.0933596318211705, + "high_lr": 0.0005810526315789474, + "low_lr": 1.1621052631578948e-05, + "step": 796 + }, + { + "epoch": 2.0933596318211705, + "high_lr": 0.0005810526315789474, + "low_lr": 1.1621052631578948e-05, + "step": 796 + }, + { + "epoch": 2.0933596318211705, + "high_lr": 0.0005810526315789474, + "low_lr": 1.1621052631578948e-05, + "step": 796 + }, + { + "epoch": 2.0933596318211705, + "high_lr": 0.0005810526315789474, + "low_lr": 1.1621052631578948e-05, + "step": 796 + }, + { + "epoch": 2.0933596318211705, + "high_lr": 0.0005810526315789474, + "low_lr": 1.1621052631578948e-05, + "step": 796 + }, + { + "epoch": 2.095989480604865, + "grad_norm": 1.1253178119659424, + "learning_rate": 0.0005805263157894737, + "loss": 1.4179, + "step": 797 + }, + { + "epoch": 2.095989480604865, + "high_lr": 0.0005805263157894737, + "low_lr": 1.1610526315789474e-05, + "step": 797 + }, + { + "epoch": 2.095989480604865, + "high_lr": 0.0005805263157894737, + "low_lr": 1.1610526315789474e-05, + "step": 797 + }, + { + "epoch": 2.095989480604865, + "high_lr": 0.0005805263157894737, + "low_lr": 1.1610526315789474e-05, + "step": 797 + }, + { + "epoch": 2.095989480604865, + "high_lr": 0.0005805263157894737, + "low_lr": 1.1610526315789474e-05, + "step": 797 + }, + { + "epoch": 2.095989480604865, + "high_lr": 0.0005805263157894737, + "low_lr": 1.1610526315789474e-05, + "step": 797 + }, + { + "epoch": 2.095989480604865, + "high_lr": 0.0005805263157894737, + "low_lr": 1.1610526315789474e-05, + "step": 797 + }, + { + "epoch": 2.095989480604865, + "high_lr": 0.0005805263157894737, + "low_lr": 1.1610526315789474e-05, + "step": 797 + }, + { + "epoch": 2.095989480604865, + "high_lr": 0.0005805263157894737, + "low_lr": 1.1610526315789474e-05, + "step": 797 + }, + { + "epoch": 2.09861932938856, + "grad_norm": 1.229320764541626, + "learning_rate": 0.00058, + "loss": 1.4089, + "step": 798 + }, + { + "epoch": 2.09861932938856, + "high_lr": 0.00058, + "low_lr": 1.16e-05, + "step": 798 + }, + { + "epoch": 2.09861932938856, + "high_lr": 0.00058, + "low_lr": 1.16e-05, + "step": 798 + }, + { + "epoch": 2.09861932938856, + "high_lr": 0.00058, + "low_lr": 1.16e-05, + "step": 798 + }, + { + "epoch": 2.09861932938856, + "high_lr": 0.00058, + "low_lr": 1.16e-05, + "step": 798 + }, + { + "epoch": 2.09861932938856, + "high_lr": 0.00058, + "low_lr": 1.16e-05, + "step": 798 + }, + { + "epoch": 2.09861932938856, + "high_lr": 0.00058, + "low_lr": 1.16e-05, + "step": 798 + }, + { + "epoch": 2.09861932938856, + "high_lr": 0.00058, + "low_lr": 1.16e-05, + "step": 798 + }, + { + "epoch": 2.09861932938856, + "high_lr": 0.00058, + "low_lr": 1.16e-05, + "step": 798 + }, + { + "epoch": 2.101249178172255, + "grad_norm": 1.207044005393982, + "learning_rate": 0.0005794736842105264, + "loss": 1.4144, + "step": 799 + }, + { + "epoch": 2.101249178172255, + "high_lr": 0.0005794736842105264, + "low_lr": 1.1589473684210529e-05, + "step": 799 + }, + { + "epoch": 2.101249178172255, + "high_lr": 0.0005794736842105264, + "low_lr": 1.1589473684210529e-05, + "step": 799 + }, + { + "epoch": 2.101249178172255, + "high_lr": 0.0005794736842105264, + "low_lr": 1.1589473684210529e-05, + "step": 799 + }, + { + "epoch": 2.101249178172255, + "high_lr": 0.0005794736842105264, + "low_lr": 1.1589473684210529e-05, + "step": 799 + }, + { + "epoch": 2.101249178172255, + "high_lr": 0.0005794736842105264, + "low_lr": 1.1589473684210529e-05, + "step": 799 + }, + { + "epoch": 2.101249178172255, + "high_lr": 0.0005794736842105264, + "low_lr": 1.1589473684210529e-05, + "step": 799 + }, + { + "epoch": 2.101249178172255, + "high_lr": 0.0005794736842105264, + "low_lr": 1.1589473684210529e-05, + "step": 799 + }, + { + "epoch": 2.101249178172255, + "high_lr": 0.0005794736842105264, + "low_lr": 1.1589473684210529e-05, + "step": 799 + }, + { + "epoch": 2.10387902695595, + "grad_norm": 1.188481092453003, + "learning_rate": 0.0005789473684210527, + "loss": 1.3837, + "step": 800 + }, + { + "epoch": 2.10387902695595, + "high_lr": 0.0005789473684210527, + "low_lr": 1.1578947368421053e-05, + "step": 800 + }, + { + "epoch": 2.10387902695595, + "high_lr": 0.0005789473684210527, + "low_lr": 1.1578947368421053e-05, + "step": 800 + }, + { + "epoch": 2.10387902695595, + "high_lr": 0.0005789473684210527, + "low_lr": 1.1578947368421053e-05, + "step": 800 + }, + { + "epoch": 2.10387902695595, + "high_lr": 0.0005789473684210527, + "low_lr": 1.1578947368421053e-05, + "step": 800 + }, + { + "epoch": 2.10387902695595, + "high_lr": 0.0005789473684210527, + "low_lr": 1.1578947368421053e-05, + "step": 800 + }, + { + "epoch": 2.10387902695595, + "high_lr": 0.0005789473684210527, + "low_lr": 1.1578947368421053e-05, + "step": 800 + }, + { + "epoch": 2.10387902695595, + "high_lr": 0.0005789473684210527, + "low_lr": 1.1578947368421053e-05, + "step": 800 + }, + { + "epoch": 2.10387902695595, + "high_lr": 0.0005789473684210527, + "low_lr": 1.1578947368421053e-05, + "step": 800 + }, + { + "epoch": 2.106508875739645, + "grad_norm": 1.1202236413955688, + "learning_rate": 0.000578421052631579, + "loss": 1.3754, + "step": 801 + }, + { + "epoch": 2.106508875739645, + "high_lr": 0.000578421052631579, + "low_lr": 1.156842105263158e-05, + "step": 801 + }, + { + "epoch": 2.106508875739645, + "high_lr": 0.000578421052631579, + "low_lr": 1.156842105263158e-05, + "step": 801 + }, + { + "epoch": 2.106508875739645, + "high_lr": 0.000578421052631579, + "low_lr": 1.156842105263158e-05, + "step": 801 + }, + { + "epoch": 2.106508875739645, + "high_lr": 0.000578421052631579, + "low_lr": 1.156842105263158e-05, + "step": 801 + }, + { + "epoch": 2.106508875739645, + "high_lr": 0.000578421052631579, + "low_lr": 1.156842105263158e-05, + "step": 801 + }, + { + "epoch": 2.106508875739645, + "high_lr": 0.000578421052631579, + "low_lr": 1.156842105263158e-05, + "step": 801 + }, + { + "epoch": 2.106508875739645, + "high_lr": 0.000578421052631579, + "low_lr": 1.156842105263158e-05, + "step": 801 + }, + { + "epoch": 2.106508875739645, + "high_lr": 0.000578421052631579, + "low_lr": 1.156842105263158e-05, + "step": 801 + }, + { + "epoch": 2.1091387245233397, + "grad_norm": 1.2411617040634155, + "learning_rate": 0.0005778947368421053, + "loss": 1.3105, + "step": 802 + }, + { + "epoch": 2.1091387245233397, + "high_lr": 0.0005778947368421053, + "low_lr": 1.1557894736842106e-05, + "step": 802 + }, + { + "epoch": 2.1091387245233397, + "high_lr": 0.0005778947368421053, + "low_lr": 1.1557894736842106e-05, + "step": 802 + }, + { + "epoch": 2.1091387245233397, + "high_lr": 0.0005778947368421053, + "low_lr": 1.1557894736842106e-05, + "step": 802 + }, + { + "epoch": 2.1091387245233397, + "high_lr": 0.0005778947368421053, + "low_lr": 1.1557894736842106e-05, + "step": 802 + }, + { + "epoch": 2.1091387245233397, + "high_lr": 0.0005778947368421053, + "low_lr": 1.1557894736842106e-05, + "step": 802 + }, + { + "epoch": 2.1091387245233397, + "high_lr": 0.0005778947368421053, + "low_lr": 1.1557894736842106e-05, + "step": 802 + }, + { + "epoch": 2.1091387245233397, + "high_lr": 0.0005778947368421053, + "low_lr": 1.1557894736842106e-05, + "step": 802 + }, + { + "epoch": 2.1091387245233397, + "high_lr": 0.0005778947368421053, + "low_lr": 1.1557894736842106e-05, + "step": 802 + }, + { + "epoch": 2.111768573307035, + "grad_norm": 1.1825741529464722, + "learning_rate": 0.0005773684210526315, + "loss": 1.3767, + "step": 803 + }, + { + "epoch": 2.111768573307035, + "high_lr": 0.0005773684210526315, + "low_lr": 1.1547368421052632e-05, + "step": 803 + }, + { + "epoch": 2.111768573307035, + "high_lr": 0.0005773684210526315, + "low_lr": 1.1547368421052632e-05, + "step": 803 + }, + { + "epoch": 2.111768573307035, + "high_lr": 0.0005773684210526315, + "low_lr": 1.1547368421052632e-05, + "step": 803 + }, + { + "epoch": 2.111768573307035, + "high_lr": 0.0005773684210526315, + "low_lr": 1.1547368421052632e-05, + "step": 803 + }, + { + "epoch": 2.111768573307035, + "high_lr": 0.0005773684210526315, + "low_lr": 1.1547368421052632e-05, + "step": 803 + }, + { + "epoch": 2.111768573307035, + "high_lr": 0.0005773684210526315, + "low_lr": 1.1547368421052632e-05, + "step": 803 + }, + { + "epoch": 2.111768573307035, + "high_lr": 0.0005773684210526315, + "low_lr": 1.1547368421052632e-05, + "step": 803 + }, + { + "epoch": 2.111768573307035, + "high_lr": 0.0005773684210526315, + "low_lr": 1.1547368421052632e-05, + "step": 803 + }, + { + "epoch": 2.11439842209073, + "grad_norm": 1.1901229619979858, + "learning_rate": 0.0005768421052631579, + "loss": 1.4043, + "step": 804 + }, + { + "epoch": 2.11439842209073, + "high_lr": 0.0005768421052631579, + "low_lr": 1.153684210526316e-05, + "step": 804 + }, + { + "epoch": 2.11439842209073, + "high_lr": 0.0005768421052631579, + "low_lr": 1.153684210526316e-05, + "step": 804 + }, + { + "epoch": 2.11439842209073, + "high_lr": 0.0005768421052631579, + "low_lr": 1.153684210526316e-05, + "step": 804 + }, + { + "epoch": 2.11439842209073, + "high_lr": 0.0005768421052631579, + "low_lr": 1.153684210526316e-05, + "step": 804 + }, + { + "epoch": 2.11439842209073, + "high_lr": 0.0005768421052631579, + "low_lr": 1.153684210526316e-05, + "step": 804 + }, + { + "epoch": 2.11439842209073, + "high_lr": 0.0005768421052631579, + "low_lr": 1.153684210526316e-05, + "step": 804 + }, + { + "epoch": 2.11439842209073, + "high_lr": 0.0005768421052631579, + "low_lr": 1.153684210526316e-05, + "step": 804 + }, + { + "epoch": 2.11439842209073, + "high_lr": 0.0005768421052631579, + "low_lr": 1.153684210526316e-05, + "step": 804 + }, + { + "epoch": 2.1170282708744246, + "grad_norm": 1.2637526988983154, + "learning_rate": 0.0005763157894736842, + "loss": 1.4499, + "step": 805 + }, + { + "epoch": 2.1170282708744246, + "high_lr": 0.0005763157894736842, + "low_lr": 1.1526315789473685e-05, + "step": 805 + }, + { + "epoch": 2.1170282708744246, + "high_lr": 0.0005763157894736842, + "low_lr": 1.1526315789473685e-05, + "step": 805 + }, + { + "epoch": 2.1170282708744246, + "high_lr": 0.0005763157894736842, + "low_lr": 1.1526315789473685e-05, + "step": 805 + }, + { + "epoch": 2.1170282708744246, + "high_lr": 0.0005763157894736842, + "low_lr": 1.1526315789473685e-05, + "step": 805 + }, + { + "epoch": 2.1170282708744246, + "high_lr": 0.0005763157894736842, + "low_lr": 1.1526315789473685e-05, + "step": 805 + }, + { + "epoch": 2.1170282708744246, + "high_lr": 0.0005763157894736842, + "low_lr": 1.1526315789473685e-05, + "step": 805 + }, + { + "epoch": 2.1170282708744246, + "high_lr": 0.0005763157894736842, + "low_lr": 1.1526315789473685e-05, + "step": 805 + }, + { + "epoch": 2.1170282708744246, + "high_lr": 0.0005763157894736842, + "low_lr": 1.1526315789473685e-05, + "step": 805 + }, + { + "epoch": 2.1196581196581197, + "grad_norm": 1.1765457391738892, + "learning_rate": 0.0005757894736842105, + "loss": 1.365, + "step": 806 + }, + { + "epoch": 2.1196581196581197, + "high_lr": 0.0005757894736842105, + "low_lr": 1.1515789473684211e-05, + "step": 806 + }, + { + "epoch": 2.1196581196581197, + "high_lr": 0.0005757894736842105, + "low_lr": 1.1515789473684211e-05, + "step": 806 + }, + { + "epoch": 2.1196581196581197, + "high_lr": 0.0005757894736842105, + "low_lr": 1.1515789473684211e-05, + "step": 806 + }, + { + "epoch": 2.1196581196581197, + "high_lr": 0.0005757894736842105, + "low_lr": 1.1515789473684211e-05, + "step": 806 + }, + { + "epoch": 2.1196581196581197, + "high_lr": 0.0005757894736842105, + "low_lr": 1.1515789473684211e-05, + "step": 806 + }, + { + "epoch": 2.1196581196581197, + "high_lr": 0.0005757894736842105, + "low_lr": 1.1515789473684211e-05, + "step": 806 + }, + { + "epoch": 2.1196581196581197, + "high_lr": 0.0005757894736842105, + "low_lr": 1.1515789473684211e-05, + "step": 806 + }, + { + "epoch": 2.1196581196581197, + "high_lr": 0.0005757894736842105, + "low_lr": 1.1515789473684211e-05, + "step": 806 + }, + { + "epoch": 2.1222879684418148, + "grad_norm": 1.150899887084961, + "learning_rate": 0.0005752631578947368, + "loss": 1.3587, + "step": 807 + }, + { + "epoch": 2.1222879684418148, + "high_lr": 0.0005752631578947368, + "low_lr": 1.1505263157894738e-05, + "step": 807 + }, + { + "epoch": 2.1222879684418148, + "high_lr": 0.0005752631578947368, + "low_lr": 1.1505263157894738e-05, + "step": 807 + }, + { + "epoch": 2.1222879684418148, + "high_lr": 0.0005752631578947368, + "low_lr": 1.1505263157894738e-05, + "step": 807 + }, + { + "epoch": 2.1222879684418148, + "high_lr": 0.0005752631578947368, + "low_lr": 1.1505263157894738e-05, + "step": 807 + }, + { + "epoch": 2.1222879684418148, + "high_lr": 0.0005752631578947368, + "low_lr": 1.1505263157894738e-05, + "step": 807 + }, + { + "epoch": 2.1222879684418148, + "high_lr": 0.0005752631578947368, + "low_lr": 1.1505263157894738e-05, + "step": 807 + }, + { + "epoch": 2.1222879684418148, + "high_lr": 0.0005752631578947368, + "low_lr": 1.1505263157894738e-05, + "step": 807 + }, + { + "epoch": 2.1222879684418148, + "high_lr": 0.0005752631578947368, + "low_lr": 1.1505263157894738e-05, + "step": 807 + }, + { + "epoch": 2.1249178172255094, + "grad_norm": 1.220388412475586, + "learning_rate": 0.0005747368421052632, + "loss": 1.3871, + "step": 808 + }, + { + "epoch": 2.1249178172255094, + "high_lr": 0.0005747368421052632, + "low_lr": 1.1494736842105266e-05, + "step": 808 + }, + { + "epoch": 2.1249178172255094, + "high_lr": 0.0005747368421052632, + "low_lr": 1.1494736842105266e-05, + "step": 808 + }, + { + "epoch": 2.1249178172255094, + "high_lr": 0.0005747368421052632, + "low_lr": 1.1494736842105266e-05, + "step": 808 + }, + { + "epoch": 2.1249178172255094, + "high_lr": 0.0005747368421052632, + "low_lr": 1.1494736842105266e-05, + "step": 808 + }, + { + "epoch": 2.1249178172255094, + "high_lr": 0.0005747368421052632, + "low_lr": 1.1494736842105266e-05, + "step": 808 + }, + { + "epoch": 2.1249178172255094, + "high_lr": 0.0005747368421052632, + "low_lr": 1.1494736842105266e-05, + "step": 808 + }, + { + "epoch": 2.1249178172255094, + "high_lr": 0.0005747368421052632, + "low_lr": 1.1494736842105266e-05, + "step": 808 + }, + { + "epoch": 2.1249178172255094, + "high_lr": 0.0005747368421052632, + "low_lr": 1.1494736842105266e-05, + "step": 808 + }, + { + "epoch": 2.1275476660092045, + "grad_norm": 1.2202270030975342, + "learning_rate": 0.0005742105263157895, + "loss": 1.3888, + "step": 809 + }, + { + "epoch": 2.1275476660092045, + "high_lr": 0.0005742105263157895, + "low_lr": 1.148421052631579e-05, + "step": 809 + }, + { + "epoch": 2.1275476660092045, + "high_lr": 0.0005742105263157895, + "low_lr": 1.148421052631579e-05, + "step": 809 + }, + { + "epoch": 2.1275476660092045, + "high_lr": 0.0005742105263157895, + "low_lr": 1.148421052631579e-05, + "step": 809 + }, + { + "epoch": 2.1275476660092045, + "high_lr": 0.0005742105263157895, + "low_lr": 1.148421052631579e-05, + "step": 809 + }, + { + "epoch": 2.1275476660092045, + "high_lr": 0.0005742105263157895, + "low_lr": 1.148421052631579e-05, + "step": 809 + }, + { + "epoch": 2.1275476660092045, + "high_lr": 0.0005742105263157895, + "low_lr": 1.148421052631579e-05, + "step": 809 + }, + { + "epoch": 2.1275476660092045, + "high_lr": 0.0005742105263157895, + "low_lr": 1.148421052631579e-05, + "step": 809 + }, + { + "epoch": 2.1275476660092045, + "high_lr": 0.0005742105263157895, + "low_lr": 1.148421052631579e-05, + "step": 809 + }, + { + "epoch": 2.1301775147928996, + "grad_norm": 1.2154358625411987, + "learning_rate": 0.0005736842105263158, + "loss": 1.3709, + "step": 810 + }, + { + "epoch": 2.1301775147928996, + "high_lr": 0.0005736842105263158, + "low_lr": 1.1473684210526317e-05, + "step": 810 + }, + { + "epoch": 2.1301775147928996, + "high_lr": 0.0005736842105263158, + "low_lr": 1.1473684210526317e-05, + "step": 810 + }, + { + "epoch": 2.1301775147928996, + "high_lr": 0.0005736842105263158, + "low_lr": 1.1473684210526317e-05, + "step": 810 + }, + { + "epoch": 2.1301775147928996, + "high_lr": 0.0005736842105263158, + "low_lr": 1.1473684210526317e-05, + "step": 810 + }, + { + "epoch": 2.1301775147928996, + "high_lr": 0.0005736842105263158, + "low_lr": 1.1473684210526317e-05, + "step": 810 + }, + { + "epoch": 2.1301775147928996, + "high_lr": 0.0005736842105263158, + "low_lr": 1.1473684210526317e-05, + "step": 810 + }, + { + "epoch": 2.1301775147928996, + "high_lr": 0.0005736842105263158, + "low_lr": 1.1473684210526317e-05, + "step": 810 + }, + { + "epoch": 2.1301775147928996, + "high_lr": 0.0005736842105263158, + "low_lr": 1.1473684210526317e-05, + "step": 810 + }, + { + "epoch": 2.1328073635765943, + "grad_norm": 1.2578213214874268, + "learning_rate": 0.0005731578947368422, + "loss": 1.4057, + "step": 811 + }, + { + "epoch": 2.1328073635765943, + "high_lr": 0.0005731578947368422, + "low_lr": 1.1463157894736843e-05, + "step": 811 + }, + { + "epoch": 2.1328073635765943, + "high_lr": 0.0005731578947368422, + "low_lr": 1.1463157894736843e-05, + "step": 811 + }, + { + "epoch": 2.1328073635765943, + "high_lr": 0.0005731578947368422, + "low_lr": 1.1463157894736843e-05, + "step": 811 + }, + { + "epoch": 2.1328073635765943, + "high_lr": 0.0005731578947368422, + "low_lr": 1.1463157894736843e-05, + "step": 811 + }, + { + "epoch": 2.1328073635765943, + "high_lr": 0.0005731578947368422, + "low_lr": 1.1463157894736843e-05, + "step": 811 + }, + { + "epoch": 2.1328073635765943, + "high_lr": 0.0005731578947368422, + "low_lr": 1.1463157894736843e-05, + "step": 811 + }, + { + "epoch": 2.1328073635765943, + "high_lr": 0.0005731578947368422, + "low_lr": 1.1463157894736843e-05, + "step": 811 + }, + { + "epoch": 2.1328073635765943, + "high_lr": 0.0005731578947368422, + "low_lr": 1.1463157894736843e-05, + "step": 811 + }, + { + "epoch": 2.1354372123602894, + "grad_norm": 1.2131134271621704, + "learning_rate": 0.0005726315789473684, + "loss": 1.3604, + "step": 812 + }, + { + "epoch": 2.1354372123602894, + "high_lr": 0.0005726315789473684, + "low_lr": 1.145263157894737e-05, + "step": 812 + }, + { + "epoch": 2.1354372123602894, + "high_lr": 0.0005726315789473684, + "low_lr": 1.145263157894737e-05, + "step": 812 + }, + { + "epoch": 2.1354372123602894, + "high_lr": 0.0005726315789473684, + "low_lr": 1.145263157894737e-05, + "step": 812 + }, + { + "epoch": 2.1354372123602894, + "high_lr": 0.0005726315789473684, + "low_lr": 1.145263157894737e-05, + "step": 812 + }, + { + "epoch": 2.1354372123602894, + "high_lr": 0.0005726315789473684, + "low_lr": 1.145263157894737e-05, + "step": 812 + }, + { + "epoch": 2.1354372123602894, + "high_lr": 0.0005726315789473684, + "low_lr": 1.145263157894737e-05, + "step": 812 + }, + { + "epoch": 2.1354372123602894, + "high_lr": 0.0005726315789473684, + "low_lr": 1.145263157894737e-05, + "step": 812 + }, + { + "epoch": 2.1354372123602894, + "high_lr": 0.0005726315789473684, + "low_lr": 1.145263157894737e-05, + "step": 812 + }, + { + "epoch": 2.138067061143984, + "grad_norm": 1.1633799076080322, + "learning_rate": 0.0005721052631578948, + "loss": 1.3531, + "step": 813 + }, + { + "epoch": 2.138067061143984, + "high_lr": 0.0005721052631578948, + "low_lr": 1.1442105263157897e-05, + "step": 813 + }, + { + "epoch": 2.138067061143984, + "high_lr": 0.0005721052631578948, + "low_lr": 1.1442105263157897e-05, + "step": 813 + }, + { + "epoch": 2.138067061143984, + "high_lr": 0.0005721052631578948, + "low_lr": 1.1442105263157897e-05, + "step": 813 + }, + { + "epoch": 2.138067061143984, + "high_lr": 0.0005721052631578948, + "low_lr": 1.1442105263157897e-05, + "step": 813 + }, + { + "epoch": 2.138067061143984, + "high_lr": 0.0005721052631578948, + "low_lr": 1.1442105263157897e-05, + "step": 813 + }, + { + "epoch": 2.138067061143984, + "high_lr": 0.0005721052631578948, + "low_lr": 1.1442105263157897e-05, + "step": 813 + }, + { + "epoch": 2.138067061143984, + "high_lr": 0.0005721052631578948, + "low_lr": 1.1442105263157897e-05, + "step": 813 + }, + { + "epoch": 2.138067061143984, + "high_lr": 0.0005721052631578948, + "low_lr": 1.1442105263157897e-05, + "step": 813 + }, + { + "epoch": 2.140696909927679, + "grad_norm": 1.1977272033691406, + "learning_rate": 0.0005715789473684211, + "loss": 1.3672, + "step": 814 + }, + { + "epoch": 2.140696909927679, + "high_lr": 0.0005715789473684211, + "low_lr": 1.1431578947368422e-05, + "step": 814 + }, + { + "epoch": 2.140696909927679, + "high_lr": 0.0005715789473684211, + "low_lr": 1.1431578947368422e-05, + "step": 814 + }, + { + "epoch": 2.140696909927679, + "high_lr": 0.0005715789473684211, + "low_lr": 1.1431578947368422e-05, + "step": 814 + }, + { + "epoch": 2.140696909927679, + "high_lr": 0.0005715789473684211, + "low_lr": 1.1431578947368422e-05, + "step": 814 + }, + { + "epoch": 2.140696909927679, + "high_lr": 0.0005715789473684211, + "low_lr": 1.1431578947368422e-05, + "step": 814 + }, + { + "epoch": 2.140696909927679, + "high_lr": 0.0005715789473684211, + "low_lr": 1.1431578947368422e-05, + "step": 814 + }, + { + "epoch": 2.140696909927679, + "high_lr": 0.0005715789473684211, + "low_lr": 1.1431578947368422e-05, + "step": 814 + }, + { + "epoch": 2.140696909927679, + "high_lr": 0.0005715789473684211, + "low_lr": 1.1431578947368422e-05, + "step": 814 + }, + { + "epoch": 2.1433267587113742, + "grad_norm": 1.2198271751403809, + "learning_rate": 0.0005710526315789474, + "loss": 1.3856, + "step": 815 + }, + { + "epoch": 2.1433267587113742, + "high_lr": 0.0005710526315789474, + "low_lr": 1.1421052631578948e-05, + "step": 815 + }, + { + "epoch": 2.1433267587113742, + "high_lr": 0.0005710526315789474, + "low_lr": 1.1421052631578948e-05, + "step": 815 + }, + { + "epoch": 2.1433267587113742, + "high_lr": 0.0005710526315789474, + "low_lr": 1.1421052631578948e-05, + "step": 815 + }, + { + "epoch": 2.1433267587113742, + "high_lr": 0.0005710526315789474, + "low_lr": 1.1421052631578948e-05, + "step": 815 + }, + { + "epoch": 2.1433267587113742, + "high_lr": 0.0005710526315789474, + "low_lr": 1.1421052631578948e-05, + "step": 815 + }, + { + "epoch": 2.1433267587113742, + "high_lr": 0.0005710526315789474, + "low_lr": 1.1421052631578948e-05, + "step": 815 + }, + { + "epoch": 2.1433267587113742, + "high_lr": 0.0005710526315789474, + "low_lr": 1.1421052631578948e-05, + "step": 815 + }, + { + "epoch": 2.1433267587113742, + "high_lr": 0.0005710526315789474, + "low_lr": 1.1421052631578948e-05, + "step": 815 + }, + { + "epoch": 2.145956607495069, + "grad_norm": 1.2254588603973389, + "learning_rate": 0.0005705263157894737, + "loss": 1.3596, + "step": 816 + }, + { + "epoch": 2.145956607495069, + "high_lr": 0.0005705263157894737, + "low_lr": 1.1410526315789475e-05, + "step": 816 + }, + { + "epoch": 2.145956607495069, + "high_lr": 0.0005705263157894737, + "low_lr": 1.1410526315789475e-05, + "step": 816 + }, + { + "epoch": 2.145956607495069, + "high_lr": 0.0005705263157894737, + "low_lr": 1.1410526315789475e-05, + "step": 816 + }, + { + "epoch": 2.145956607495069, + "high_lr": 0.0005705263157894737, + "low_lr": 1.1410526315789475e-05, + "step": 816 + }, + { + "epoch": 2.145956607495069, + "high_lr": 0.0005705263157894737, + "low_lr": 1.1410526315789475e-05, + "step": 816 + }, + { + "epoch": 2.145956607495069, + "high_lr": 0.0005705263157894737, + "low_lr": 1.1410526315789475e-05, + "step": 816 + }, + { + "epoch": 2.145956607495069, + "high_lr": 0.0005705263157894737, + "low_lr": 1.1410526315789475e-05, + "step": 816 + }, + { + "epoch": 2.145956607495069, + "high_lr": 0.0005705263157894737, + "low_lr": 1.1410526315789475e-05, + "step": 816 + }, + { + "epoch": 2.148586456278764, + "grad_norm": 1.173850417137146, + "learning_rate": 0.00057, + "loss": 1.4394, + "step": 817 + }, + { + "epoch": 2.148586456278764, + "high_lr": 0.00057, + "low_lr": 1.14e-05, + "step": 817 + }, + { + "epoch": 2.148586456278764, + "high_lr": 0.00057, + "low_lr": 1.14e-05, + "step": 817 + }, + { + "epoch": 2.148586456278764, + "high_lr": 0.00057, + "low_lr": 1.14e-05, + "step": 817 + }, + { + "epoch": 2.148586456278764, + "high_lr": 0.00057, + "low_lr": 1.14e-05, + "step": 817 + }, + { + "epoch": 2.148586456278764, + "high_lr": 0.00057, + "low_lr": 1.14e-05, + "step": 817 + }, + { + "epoch": 2.148586456278764, + "high_lr": 0.00057, + "low_lr": 1.14e-05, + "step": 817 + }, + { + "epoch": 2.148586456278764, + "high_lr": 0.00057, + "low_lr": 1.14e-05, + "step": 817 + }, + { + "epoch": 2.148586456278764, + "high_lr": 0.00057, + "low_lr": 1.14e-05, + "step": 817 + }, + { + "epoch": 2.151216305062459, + "grad_norm": 1.183383584022522, + "learning_rate": 0.0005694736842105264, + "loss": 1.3927, + "step": 818 + }, + { + "epoch": 2.151216305062459, + "high_lr": 0.0005694736842105264, + "low_lr": 1.1389473684210527e-05, + "step": 818 + }, + { + "epoch": 2.151216305062459, + "high_lr": 0.0005694736842105264, + "low_lr": 1.1389473684210527e-05, + "step": 818 + }, + { + "epoch": 2.151216305062459, + "high_lr": 0.0005694736842105264, + "low_lr": 1.1389473684210527e-05, + "step": 818 + }, + { + "epoch": 2.151216305062459, + "high_lr": 0.0005694736842105264, + "low_lr": 1.1389473684210527e-05, + "step": 818 + }, + { + "epoch": 2.151216305062459, + "high_lr": 0.0005694736842105264, + "low_lr": 1.1389473684210527e-05, + "step": 818 + }, + { + "epoch": 2.151216305062459, + "high_lr": 0.0005694736842105264, + "low_lr": 1.1389473684210527e-05, + "step": 818 + }, + { + "epoch": 2.151216305062459, + "high_lr": 0.0005694736842105264, + "low_lr": 1.1389473684210527e-05, + "step": 818 + }, + { + "epoch": 2.151216305062459, + "high_lr": 0.0005694736842105264, + "low_lr": 1.1389473684210527e-05, + "step": 818 + }, + { + "epoch": 2.1538461538461537, + "grad_norm": 1.128944754600525, + "learning_rate": 0.0005689473684210527, + "loss": 1.3884, + "step": 819 + }, + { + "epoch": 2.1538461538461537, + "high_lr": 0.0005689473684210527, + "low_lr": 1.1378947368421054e-05, + "step": 819 + }, + { + "epoch": 2.1538461538461537, + "high_lr": 0.0005689473684210527, + "low_lr": 1.1378947368421054e-05, + "step": 819 + }, + { + "epoch": 2.1538461538461537, + "high_lr": 0.0005689473684210527, + "low_lr": 1.1378947368421054e-05, + "step": 819 + }, + { + "epoch": 2.1538461538461537, + "high_lr": 0.0005689473684210527, + "low_lr": 1.1378947368421054e-05, + "step": 819 + }, + { + "epoch": 2.1538461538461537, + "high_lr": 0.0005689473684210527, + "low_lr": 1.1378947368421054e-05, + "step": 819 + }, + { + "epoch": 2.1538461538461537, + "high_lr": 0.0005689473684210527, + "low_lr": 1.1378947368421054e-05, + "step": 819 + }, + { + "epoch": 2.1538461538461537, + "high_lr": 0.0005689473684210527, + "low_lr": 1.1378947368421054e-05, + "step": 819 + }, + { + "epoch": 2.1538461538461537, + "high_lr": 0.0005689473684210527, + "low_lr": 1.1378947368421054e-05, + "step": 819 + }, + { + "epoch": 2.156476002629849, + "grad_norm": 1.2648850679397583, + "learning_rate": 0.0005684210526315789, + "loss": 1.3936, + "step": 820 + }, + { + "epoch": 2.156476002629849, + "high_lr": 0.0005684210526315789, + "low_lr": 1.136842105263158e-05, + "step": 820 + }, + { + "epoch": 2.156476002629849, + "high_lr": 0.0005684210526315789, + "low_lr": 1.136842105263158e-05, + "step": 820 + }, + { + "epoch": 2.156476002629849, + "high_lr": 0.0005684210526315789, + "low_lr": 1.136842105263158e-05, + "step": 820 + }, + { + "epoch": 2.156476002629849, + "high_lr": 0.0005684210526315789, + "low_lr": 1.136842105263158e-05, + "step": 820 + }, + { + "epoch": 2.156476002629849, + "high_lr": 0.0005684210526315789, + "low_lr": 1.136842105263158e-05, + "step": 820 + }, + { + "epoch": 2.156476002629849, + "high_lr": 0.0005684210526315789, + "low_lr": 1.136842105263158e-05, + "step": 820 + }, + { + "epoch": 2.156476002629849, + "high_lr": 0.0005684210526315789, + "low_lr": 1.136842105263158e-05, + "step": 820 + }, + { + "epoch": 2.156476002629849, + "high_lr": 0.0005684210526315789, + "low_lr": 1.136842105263158e-05, + "step": 820 + }, + { + "epoch": 2.1591058514135435, + "grad_norm": 1.2596508264541626, + "learning_rate": 0.0005678947368421052, + "loss": 1.3974, + "step": 821 + }, + { + "epoch": 2.1591058514135435, + "high_lr": 0.0005678947368421052, + "low_lr": 1.1357894736842106e-05, + "step": 821 + }, + { + "epoch": 2.1591058514135435, + "high_lr": 0.0005678947368421052, + "low_lr": 1.1357894736842106e-05, + "step": 821 + }, + { + "epoch": 2.1591058514135435, + "high_lr": 0.0005678947368421052, + "low_lr": 1.1357894736842106e-05, + "step": 821 + }, + { + "epoch": 2.1591058514135435, + "high_lr": 0.0005678947368421052, + "low_lr": 1.1357894736842106e-05, + "step": 821 + }, + { + "epoch": 2.1591058514135435, + "high_lr": 0.0005678947368421052, + "low_lr": 1.1357894736842106e-05, + "step": 821 + }, + { + "epoch": 2.1591058514135435, + "high_lr": 0.0005678947368421052, + "low_lr": 1.1357894736842106e-05, + "step": 821 + }, + { + "epoch": 2.1591058514135435, + "high_lr": 0.0005678947368421052, + "low_lr": 1.1357894736842106e-05, + "step": 821 + }, + { + "epoch": 2.1591058514135435, + "high_lr": 0.0005678947368421052, + "low_lr": 1.1357894736842106e-05, + "step": 821 + }, + { + "epoch": 2.1617357001972386, + "grad_norm": 1.180319905281067, + "learning_rate": 0.0005673684210526316, + "loss": 1.3733, + "step": 822 + }, + { + "epoch": 2.1617357001972386, + "high_lr": 0.0005673684210526316, + "low_lr": 1.1347368421052634e-05, + "step": 822 + }, + { + "epoch": 2.1617357001972386, + "high_lr": 0.0005673684210526316, + "low_lr": 1.1347368421052634e-05, + "step": 822 + }, + { + "epoch": 2.1617357001972386, + "high_lr": 0.0005673684210526316, + "low_lr": 1.1347368421052634e-05, + "step": 822 + }, + { + "epoch": 2.1617357001972386, + "high_lr": 0.0005673684210526316, + "low_lr": 1.1347368421052634e-05, + "step": 822 + }, + { + "epoch": 2.1617357001972386, + "high_lr": 0.0005673684210526316, + "low_lr": 1.1347368421052634e-05, + "step": 822 + }, + { + "epoch": 2.1617357001972386, + "high_lr": 0.0005673684210526316, + "low_lr": 1.1347368421052634e-05, + "step": 822 + }, + { + "epoch": 2.1617357001972386, + "high_lr": 0.0005673684210526316, + "low_lr": 1.1347368421052634e-05, + "step": 822 + }, + { + "epoch": 2.1617357001972386, + "high_lr": 0.0005673684210526316, + "low_lr": 1.1347368421052634e-05, + "step": 822 + }, + { + "epoch": 2.1643655489809337, + "grad_norm": 1.2433300018310547, + "learning_rate": 0.0005668421052631579, + "loss": 1.4125, + "step": 823 + }, + { + "epoch": 2.1643655489809337, + "high_lr": 0.0005668421052631579, + "low_lr": 1.1336842105263159e-05, + "step": 823 + }, + { + "epoch": 2.1643655489809337, + "high_lr": 0.0005668421052631579, + "low_lr": 1.1336842105263159e-05, + "step": 823 + }, + { + "epoch": 2.1643655489809337, + "high_lr": 0.0005668421052631579, + "low_lr": 1.1336842105263159e-05, + "step": 823 + }, + { + "epoch": 2.1643655489809337, + "high_lr": 0.0005668421052631579, + "low_lr": 1.1336842105263159e-05, + "step": 823 + }, + { + "epoch": 2.1643655489809337, + "high_lr": 0.0005668421052631579, + "low_lr": 1.1336842105263159e-05, + "step": 823 + }, + { + "epoch": 2.1643655489809337, + "high_lr": 0.0005668421052631579, + "low_lr": 1.1336842105263159e-05, + "step": 823 + }, + { + "epoch": 2.1643655489809337, + "high_lr": 0.0005668421052631579, + "low_lr": 1.1336842105263159e-05, + "step": 823 + }, + { + "epoch": 2.1643655489809337, + "high_lr": 0.0005668421052631579, + "low_lr": 1.1336842105263159e-05, + "step": 823 + }, + { + "epoch": 2.1669953977646284, + "grad_norm": 1.219747543334961, + "learning_rate": 0.0005663157894736842, + "loss": 1.4256, + "step": 824 + }, + { + "epoch": 2.1669953977646284, + "high_lr": 0.0005663157894736842, + "low_lr": 1.1326315789473685e-05, + "step": 824 + }, + { + "epoch": 2.1669953977646284, + "high_lr": 0.0005663157894736842, + "low_lr": 1.1326315789473685e-05, + "step": 824 + }, + { + "epoch": 2.1669953977646284, + "high_lr": 0.0005663157894736842, + "low_lr": 1.1326315789473685e-05, + "step": 824 + }, + { + "epoch": 2.1669953977646284, + "high_lr": 0.0005663157894736842, + "low_lr": 1.1326315789473685e-05, + "step": 824 + }, + { + "epoch": 2.1669953977646284, + "high_lr": 0.0005663157894736842, + "low_lr": 1.1326315789473685e-05, + "step": 824 + }, + { + "epoch": 2.1669953977646284, + "high_lr": 0.0005663157894736842, + "low_lr": 1.1326315789473685e-05, + "step": 824 + }, + { + "epoch": 2.1669953977646284, + "high_lr": 0.0005663157894736842, + "low_lr": 1.1326315789473685e-05, + "step": 824 + }, + { + "epoch": 2.1669953977646284, + "high_lr": 0.0005663157894736842, + "low_lr": 1.1326315789473685e-05, + "step": 824 + }, + { + "epoch": 2.1696252465483234, + "grad_norm": 1.3756067752838135, + "learning_rate": 0.0005657894736842105, + "loss": 1.3732, + "step": 825 + }, + { + "epoch": 2.1696252465483234, + "high_lr": 0.0005657894736842105, + "low_lr": 1.1315789473684212e-05, + "step": 825 + }, + { + "epoch": 2.1696252465483234, + "high_lr": 0.0005657894736842105, + "low_lr": 1.1315789473684212e-05, + "step": 825 + }, + { + "epoch": 2.1696252465483234, + "high_lr": 0.0005657894736842105, + "low_lr": 1.1315789473684212e-05, + "step": 825 + }, + { + "epoch": 2.1696252465483234, + "high_lr": 0.0005657894736842105, + "low_lr": 1.1315789473684212e-05, + "step": 825 + }, + { + "epoch": 2.1696252465483234, + "high_lr": 0.0005657894736842105, + "low_lr": 1.1315789473684212e-05, + "step": 825 + }, + { + "epoch": 2.1696252465483234, + "high_lr": 0.0005657894736842105, + "low_lr": 1.1315789473684212e-05, + "step": 825 + }, + { + "epoch": 2.1696252465483234, + "high_lr": 0.0005657894736842105, + "low_lr": 1.1315789473684212e-05, + "step": 825 + }, + { + "epoch": 2.1696252465483234, + "high_lr": 0.0005657894736842105, + "low_lr": 1.1315789473684212e-05, + "step": 825 + }, + { + "epoch": 2.1722550953320185, + "grad_norm": 1.3394635915756226, + "learning_rate": 0.0005652631578947368, + "loss": 1.427, + "step": 826 + }, + { + "epoch": 2.1722550953320185, + "high_lr": 0.0005652631578947368, + "low_lr": 1.1305263157894736e-05, + "step": 826 + }, + { + "epoch": 2.1722550953320185, + "high_lr": 0.0005652631578947368, + "low_lr": 1.1305263157894736e-05, + "step": 826 + }, + { + "epoch": 2.1722550953320185, + "high_lr": 0.0005652631578947368, + "low_lr": 1.1305263157894736e-05, + "step": 826 + }, + { + "epoch": 2.1722550953320185, + "high_lr": 0.0005652631578947368, + "low_lr": 1.1305263157894736e-05, + "step": 826 + }, + { + "epoch": 2.1722550953320185, + "high_lr": 0.0005652631578947368, + "low_lr": 1.1305263157894736e-05, + "step": 826 + }, + { + "epoch": 2.1722550953320185, + "high_lr": 0.0005652631578947368, + "low_lr": 1.1305263157894736e-05, + "step": 826 + }, + { + "epoch": 2.1722550953320185, + "high_lr": 0.0005652631578947368, + "low_lr": 1.1305263157894736e-05, + "step": 826 + }, + { + "epoch": 2.1722550953320185, + "high_lr": 0.0005652631578947368, + "low_lr": 1.1305263157894736e-05, + "step": 826 + }, + { + "epoch": 2.174884944115713, + "grad_norm": 1.1506638526916504, + "learning_rate": 0.0005647368421052633, + "loss": 1.3673, + "step": 827 + }, + { + "epoch": 2.174884944115713, + "high_lr": 0.0005647368421052633, + "low_lr": 1.1294736842105264e-05, + "step": 827 + }, + { + "epoch": 2.174884944115713, + "high_lr": 0.0005647368421052633, + "low_lr": 1.1294736842105264e-05, + "step": 827 + }, + { + "epoch": 2.174884944115713, + "high_lr": 0.0005647368421052633, + "low_lr": 1.1294736842105264e-05, + "step": 827 + }, + { + "epoch": 2.174884944115713, + "high_lr": 0.0005647368421052633, + "low_lr": 1.1294736842105264e-05, + "step": 827 + }, + { + "epoch": 2.174884944115713, + "high_lr": 0.0005647368421052633, + "low_lr": 1.1294736842105264e-05, + "step": 827 + }, + { + "epoch": 2.174884944115713, + "high_lr": 0.0005647368421052633, + "low_lr": 1.1294736842105264e-05, + "step": 827 + }, + { + "epoch": 2.174884944115713, + "high_lr": 0.0005647368421052633, + "low_lr": 1.1294736842105264e-05, + "step": 827 + }, + { + "epoch": 2.174884944115713, + "high_lr": 0.0005647368421052633, + "low_lr": 1.1294736842105264e-05, + "step": 827 + }, + { + "epoch": 2.1775147928994083, + "grad_norm": 1.342165231704712, + "learning_rate": 0.0005642105263157896, + "loss": 1.3631, + "step": 828 + }, + { + "epoch": 2.1775147928994083, + "high_lr": 0.0005642105263157896, + "low_lr": 1.128421052631579e-05, + "step": 828 + }, + { + "epoch": 2.1775147928994083, + "high_lr": 0.0005642105263157896, + "low_lr": 1.128421052631579e-05, + "step": 828 + }, + { + "epoch": 2.1775147928994083, + "high_lr": 0.0005642105263157896, + "low_lr": 1.128421052631579e-05, + "step": 828 + }, + { + "epoch": 2.1775147928994083, + "high_lr": 0.0005642105263157896, + "low_lr": 1.128421052631579e-05, + "step": 828 + }, + { + "epoch": 2.1775147928994083, + "high_lr": 0.0005642105263157896, + "low_lr": 1.128421052631579e-05, + "step": 828 + }, + { + "epoch": 2.1775147928994083, + "high_lr": 0.0005642105263157896, + "low_lr": 1.128421052631579e-05, + "step": 828 + }, + { + "epoch": 2.1775147928994083, + "high_lr": 0.0005642105263157896, + "low_lr": 1.128421052631579e-05, + "step": 828 + }, + { + "epoch": 2.1775147928994083, + "high_lr": 0.0005642105263157896, + "low_lr": 1.128421052631579e-05, + "step": 828 + }, + { + "epoch": 2.1801446416831034, + "grad_norm": 1.2696932554244995, + "learning_rate": 0.0005636842105263158, + "loss": 1.3854, + "step": 829 + }, + { + "epoch": 2.1801446416831034, + "high_lr": 0.0005636842105263158, + "low_lr": 1.1273684210526317e-05, + "step": 829 + }, + { + "epoch": 2.1801446416831034, + "high_lr": 0.0005636842105263158, + "low_lr": 1.1273684210526317e-05, + "step": 829 + }, + { + "epoch": 2.1801446416831034, + "high_lr": 0.0005636842105263158, + "low_lr": 1.1273684210526317e-05, + "step": 829 + }, + { + "epoch": 2.1801446416831034, + "high_lr": 0.0005636842105263158, + "low_lr": 1.1273684210526317e-05, + "step": 829 + }, + { + "epoch": 2.1801446416831034, + "high_lr": 0.0005636842105263158, + "low_lr": 1.1273684210526317e-05, + "step": 829 + }, + { + "epoch": 2.1801446416831034, + "high_lr": 0.0005636842105263158, + "low_lr": 1.1273684210526317e-05, + "step": 829 + }, + { + "epoch": 2.1801446416831034, + "high_lr": 0.0005636842105263158, + "low_lr": 1.1273684210526317e-05, + "step": 829 + }, + { + "epoch": 2.1801446416831034, + "high_lr": 0.0005636842105263158, + "low_lr": 1.1273684210526317e-05, + "step": 829 + }, + { + "epoch": 2.182774490466798, + "grad_norm": 2.098314046859741, + "learning_rate": 0.0005631578947368421, + "loss": 1.4307, + "step": 830 + }, + { + "epoch": 2.182774490466798, + "high_lr": 0.0005631578947368421, + "low_lr": 1.1263157894736843e-05, + "step": 830 + }, + { + "epoch": 2.182774490466798, + "high_lr": 0.0005631578947368421, + "low_lr": 1.1263157894736843e-05, + "step": 830 + }, + { + "epoch": 2.182774490466798, + "high_lr": 0.0005631578947368421, + "low_lr": 1.1263157894736843e-05, + "step": 830 + }, + { + "epoch": 2.182774490466798, + "high_lr": 0.0005631578947368421, + "low_lr": 1.1263157894736843e-05, + "step": 830 + }, + { + "epoch": 2.182774490466798, + "high_lr": 0.0005631578947368421, + "low_lr": 1.1263157894736843e-05, + "step": 830 + }, + { + "epoch": 2.182774490466798, + "high_lr": 0.0005631578947368421, + "low_lr": 1.1263157894736843e-05, + "step": 830 + }, + { + "epoch": 2.182774490466798, + "high_lr": 0.0005631578947368421, + "low_lr": 1.1263157894736843e-05, + "step": 830 + }, + { + "epoch": 2.182774490466798, + "high_lr": 0.0005631578947368421, + "low_lr": 1.1263157894736843e-05, + "step": 830 + }, + { + "epoch": 2.185404339250493, + "grad_norm": 1.9224364757537842, + "learning_rate": 0.0005626315789473684, + "loss": 1.4026, + "step": 831 + }, + { + "epoch": 2.185404339250493, + "high_lr": 0.0005626315789473684, + "low_lr": 1.1252631578947368e-05, + "step": 831 + }, + { + "epoch": 2.185404339250493, + "high_lr": 0.0005626315789473684, + "low_lr": 1.1252631578947368e-05, + "step": 831 + }, + { + "epoch": 2.185404339250493, + "high_lr": 0.0005626315789473684, + "low_lr": 1.1252631578947368e-05, + "step": 831 + }, + { + "epoch": 2.185404339250493, + "high_lr": 0.0005626315789473684, + "low_lr": 1.1252631578947368e-05, + "step": 831 + }, + { + "epoch": 2.185404339250493, + "high_lr": 0.0005626315789473684, + "low_lr": 1.1252631578947368e-05, + "step": 831 + }, + { + "epoch": 2.185404339250493, + "high_lr": 0.0005626315789473684, + "low_lr": 1.1252631578947368e-05, + "step": 831 + }, + { + "epoch": 2.185404339250493, + "high_lr": 0.0005626315789473684, + "low_lr": 1.1252631578947368e-05, + "step": 831 + }, + { + "epoch": 2.185404339250493, + "high_lr": 0.0005626315789473684, + "low_lr": 1.1252631578947368e-05, + "step": 831 + }, + { + "epoch": 2.1880341880341883, + "grad_norm": 1.2604079246520996, + "learning_rate": 0.0005621052631578948, + "loss": 1.371, + "step": 832 + }, + { + "epoch": 2.1880341880341883, + "high_lr": 0.0005621052631578948, + "low_lr": 1.1242105263157896e-05, + "step": 832 + }, + { + "epoch": 2.1880341880341883, + "high_lr": 0.0005621052631578948, + "low_lr": 1.1242105263157896e-05, + "step": 832 + }, + { + "epoch": 2.1880341880341883, + "high_lr": 0.0005621052631578948, + "low_lr": 1.1242105263157896e-05, + "step": 832 + }, + { + "epoch": 2.1880341880341883, + "high_lr": 0.0005621052631578948, + "low_lr": 1.1242105263157896e-05, + "step": 832 + }, + { + "epoch": 2.1880341880341883, + "high_lr": 0.0005621052631578948, + "low_lr": 1.1242105263157896e-05, + "step": 832 + }, + { + "epoch": 2.1880341880341883, + "high_lr": 0.0005621052631578948, + "low_lr": 1.1242105263157896e-05, + "step": 832 + }, + { + "epoch": 2.1880341880341883, + "high_lr": 0.0005621052631578948, + "low_lr": 1.1242105263157896e-05, + "step": 832 + }, + { + "epoch": 2.1880341880341883, + "high_lr": 0.0005621052631578948, + "low_lr": 1.1242105263157896e-05, + "step": 832 + }, + { + "epoch": 2.190664036817883, + "grad_norm": 2.1818010807037354, + "learning_rate": 0.0005615789473684211, + "loss": 1.3593, + "step": 833 + }, + { + "epoch": 2.190664036817883, + "high_lr": 0.0005615789473684211, + "low_lr": 1.1231578947368422e-05, + "step": 833 + }, + { + "epoch": 2.190664036817883, + "high_lr": 0.0005615789473684211, + "low_lr": 1.1231578947368422e-05, + "step": 833 + }, + { + "epoch": 2.190664036817883, + "high_lr": 0.0005615789473684211, + "low_lr": 1.1231578947368422e-05, + "step": 833 + }, + { + "epoch": 2.190664036817883, + "high_lr": 0.0005615789473684211, + "low_lr": 1.1231578947368422e-05, + "step": 833 + }, + { + "epoch": 2.190664036817883, + "high_lr": 0.0005615789473684211, + "low_lr": 1.1231578947368422e-05, + "step": 833 + }, + { + "epoch": 2.190664036817883, + "high_lr": 0.0005615789473684211, + "low_lr": 1.1231578947368422e-05, + "step": 833 + }, + { + "epoch": 2.190664036817883, + "high_lr": 0.0005615789473684211, + "low_lr": 1.1231578947368422e-05, + "step": 833 + }, + { + "epoch": 2.190664036817883, + "high_lr": 0.0005615789473684211, + "low_lr": 1.1231578947368422e-05, + "step": 833 + }, + { + "epoch": 2.193293885601578, + "grad_norm": 13.749748229980469, + "learning_rate": 0.0005610526315789474, + "loss": 1.4095, + "step": 834 + }, + { + "epoch": 2.193293885601578, + "high_lr": 0.0005610526315789474, + "low_lr": 1.1221052631578949e-05, + "step": 834 + }, + { + "epoch": 2.193293885601578, + "high_lr": 0.0005610526315789474, + "low_lr": 1.1221052631578949e-05, + "step": 834 + }, + { + "epoch": 2.193293885601578, + "high_lr": 0.0005610526315789474, + "low_lr": 1.1221052631578949e-05, + "step": 834 + }, + { + "epoch": 2.193293885601578, + "high_lr": 0.0005610526315789474, + "low_lr": 1.1221052631578949e-05, + "step": 834 + }, + { + "epoch": 2.193293885601578, + "high_lr": 0.0005610526315789474, + "low_lr": 1.1221052631578949e-05, + "step": 834 + }, + { + "epoch": 2.193293885601578, + "high_lr": 0.0005610526315789474, + "low_lr": 1.1221052631578949e-05, + "step": 834 + }, + { + "epoch": 2.193293885601578, + "high_lr": 0.0005610526315789474, + "low_lr": 1.1221052631578949e-05, + "step": 834 + }, + { + "epoch": 2.193293885601578, + "high_lr": 0.0005610526315789474, + "low_lr": 1.1221052631578949e-05, + "step": 834 + }, + { + "epoch": 2.1959237343852727, + "grad_norm": 1.1653400659561157, + "learning_rate": 0.0005605263157894737, + "loss": 1.3679, + "step": 835 + }, + { + "epoch": 2.1959237343852727, + "high_lr": 0.0005605263157894737, + "low_lr": 1.1210526315789473e-05, + "step": 835 + }, + { + "epoch": 2.1959237343852727, + "high_lr": 0.0005605263157894737, + "low_lr": 1.1210526315789473e-05, + "step": 835 + }, + { + "epoch": 2.1959237343852727, + "high_lr": 0.0005605263157894737, + "low_lr": 1.1210526315789473e-05, + "step": 835 + }, + { + "epoch": 2.1959237343852727, + "high_lr": 0.0005605263157894737, + "low_lr": 1.1210526315789473e-05, + "step": 835 + }, + { + "epoch": 2.1959237343852727, + "high_lr": 0.0005605263157894737, + "low_lr": 1.1210526315789473e-05, + "step": 835 + }, + { + "epoch": 2.1959237343852727, + "high_lr": 0.0005605263157894737, + "low_lr": 1.1210526315789473e-05, + "step": 835 + }, + { + "epoch": 2.1959237343852727, + "high_lr": 0.0005605263157894737, + "low_lr": 1.1210526315789473e-05, + "step": 835 + }, + { + "epoch": 2.1959237343852727, + "high_lr": 0.0005605263157894737, + "low_lr": 1.1210526315789473e-05, + "step": 835 + }, + { + "epoch": 2.1985535831689678, + "grad_norm": 1.3364615440368652, + "learning_rate": 0.0005600000000000001, + "loss": 1.3428, + "step": 836 + }, + { + "epoch": 2.1985535831689678, + "high_lr": 0.0005600000000000001, + "low_lr": 1.1200000000000001e-05, + "step": 836 + }, + { + "epoch": 2.1985535831689678, + "high_lr": 0.0005600000000000001, + "low_lr": 1.1200000000000001e-05, + "step": 836 + }, + { + "epoch": 2.1985535831689678, + "high_lr": 0.0005600000000000001, + "low_lr": 1.1200000000000001e-05, + "step": 836 + }, + { + "epoch": 2.1985535831689678, + "high_lr": 0.0005600000000000001, + "low_lr": 1.1200000000000001e-05, + "step": 836 + }, + { + "epoch": 2.1985535831689678, + "high_lr": 0.0005600000000000001, + "low_lr": 1.1200000000000001e-05, + "step": 836 + }, + { + "epoch": 2.1985535831689678, + "high_lr": 0.0005600000000000001, + "low_lr": 1.1200000000000001e-05, + "step": 836 + }, + { + "epoch": 2.1985535831689678, + "high_lr": 0.0005600000000000001, + "low_lr": 1.1200000000000001e-05, + "step": 836 + }, + { + "epoch": 2.1985535831689678, + "high_lr": 0.0005600000000000001, + "low_lr": 1.1200000000000001e-05, + "step": 836 + }, + { + "epoch": 2.201183431952663, + "grad_norm": 1.1404310464859009, + "learning_rate": 0.0005594736842105263, + "loss": 1.3897, + "step": 837 + }, + { + "epoch": 2.201183431952663, + "high_lr": 0.0005594736842105263, + "low_lr": 1.1189473684210528e-05, + "step": 837 + }, + { + "epoch": 2.201183431952663, + "high_lr": 0.0005594736842105263, + "low_lr": 1.1189473684210528e-05, + "step": 837 + }, + { + "epoch": 2.201183431952663, + "high_lr": 0.0005594736842105263, + "low_lr": 1.1189473684210528e-05, + "step": 837 + }, + { + "epoch": 2.201183431952663, + "high_lr": 0.0005594736842105263, + "low_lr": 1.1189473684210528e-05, + "step": 837 + }, + { + "epoch": 2.201183431952663, + "high_lr": 0.0005594736842105263, + "low_lr": 1.1189473684210528e-05, + "step": 837 + }, + { + "epoch": 2.201183431952663, + "high_lr": 0.0005594736842105263, + "low_lr": 1.1189473684210528e-05, + "step": 837 + }, + { + "epoch": 2.201183431952663, + "high_lr": 0.0005594736842105263, + "low_lr": 1.1189473684210528e-05, + "step": 837 + }, + { + "epoch": 2.201183431952663, + "high_lr": 0.0005594736842105263, + "low_lr": 1.1189473684210528e-05, + "step": 837 + }, + { + "epoch": 2.2038132807363575, + "grad_norm": 1.1752078533172607, + "learning_rate": 0.0005589473684210526, + "loss": 1.4003, + "step": 838 + }, + { + "epoch": 2.2038132807363575, + "high_lr": 0.0005589473684210526, + "low_lr": 1.1178947368421054e-05, + "step": 838 + }, + { + "epoch": 2.2038132807363575, + "high_lr": 0.0005589473684210526, + "low_lr": 1.1178947368421054e-05, + "step": 838 + }, + { + "epoch": 2.2038132807363575, + "high_lr": 0.0005589473684210526, + "low_lr": 1.1178947368421054e-05, + "step": 838 + }, + { + "epoch": 2.2038132807363575, + "high_lr": 0.0005589473684210526, + "low_lr": 1.1178947368421054e-05, + "step": 838 + }, + { + "epoch": 2.2038132807363575, + "high_lr": 0.0005589473684210526, + "low_lr": 1.1178947368421054e-05, + "step": 838 + }, + { + "epoch": 2.2038132807363575, + "high_lr": 0.0005589473684210526, + "low_lr": 1.1178947368421054e-05, + "step": 838 + }, + { + "epoch": 2.2038132807363575, + "high_lr": 0.0005589473684210526, + "low_lr": 1.1178947368421054e-05, + "step": 838 + }, + { + "epoch": 2.2038132807363575, + "high_lr": 0.0005589473684210526, + "low_lr": 1.1178947368421054e-05, + "step": 838 + }, + { + "epoch": 2.2064431295200526, + "grad_norm": 1.1771602630615234, + "learning_rate": 0.0005584210526315789, + "loss": 1.3759, + "step": 839 + }, + { + "epoch": 2.2064431295200526, + "high_lr": 0.0005584210526315789, + "low_lr": 1.116842105263158e-05, + "step": 839 + }, + { + "epoch": 2.2064431295200526, + "high_lr": 0.0005584210526315789, + "low_lr": 1.116842105263158e-05, + "step": 839 + }, + { + "epoch": 2.2064431295200526, + "high_lr": 0.0005584210526315789, + "low_lr": 1.116842105263158e-05, + "step": 839 + }, + { + "epoch": 2.2064431295200526, + "high_lr": 0.0005584210526315789, + "low_lr": 1.116842105263158e-05, + "step": 839 + }, + { + "epoch": 2.2064431295200526, + "high_lr": 0.0005584210526315789, + "low_lr": 1.116842105263158e-05, + "step": 839 + }, + { + "epoch": 2.2064431295200526, + "high_lr": 0.0005584210526315789, + "low_lr": 1.116842105263158e-05, + "step": 839 + }, + { + "epoch": 2.2064431295200526, + "high_lr": 0.0005584210526315789, + "low_lr": 1.116842105263158e-05, + "step": 839 + }, + { + "epoch": 2.2064431295200526, + "high_lr": 0.0005584210526315789, + "low_lr": 1.116842105263158e-05, + "step": 839 + }, + { + "epoch": 2.2090729783037477, + "grad_norm": 1.2733348608016968, + "learning_rate": 0.0005578947368421052, + "loss": 1.4378, + "step": 840 + }, + { + "epoch": 2.2090729783037477, + "high_lr": 0.0005578947368421052, + "low_lr": 1.1157894736842105e-05, + "step": 840 + }, + { + "epoch": 2.2090729783037477, + "high_lr": 0.0005578947368421052, + "low_lr": 1.1157894736842105e-05, + "step": 840 + }, + { + "epoch": 2.2090729783037477, + "high_lr": 0.0005578947368421052, + "low_lr": 1.1157894736842105e-05, + "step": 840 + }, + { + "epoch": 2.2090729783037477, + "high_lr": 0.0005578947368421052, + "low_lr": 1.1157894736842105e-05, + "step": 840 + }, + { + "epoch": 2.2090729783037477, + "high_lr": 0.0005578947368421052, + "low_lr": 1.1157894736842105e-05, + "step": 840 + }, + { + "epoch": 2.2090729783037477, + "high_lr": 0.0005578947368421052, + "low_lr": 1.1157894736842105e-05, + "step": 840 + }, + { + "epoch": 2.2090729783037477, + "high_lr": 0.0005578947368421052, + "low_lr": 1.1157894736842105e-05, + "step": 840 + }, + { + "epoch": 2.2090729783037477, + "high_lr": 0.0005578947368421052, + "low_lr": 1.1157894736842105e-05, + "step": 840 + }, + { + "epoch": 2.2117028270874424, + "grad_norm": 1.1732800006866455, + "learning_rate": 0.0005573684210526316, + "loss": 1.3983, + "step": 841 + }, + { + "epoch": 2.2117028270874424, + "high_lr": 0.0005573684210526316, + "low_lr": 1.1147368421052633e-05, + "step": 841 + }, + { + "epoch": 2.2117028270874424, + "high_lr": 0.0005573684210526316, + "low_lr": 1.1147368421052633e-05, + "step": 841 + }, + { + "epoch": 2.2117028270874424, + "high_lr": 0.0005573684210526316, + "low_lr": 1.1147368421052633e-05, + "step": 841 + }, + { + "epoch": 2.2117028270874424, + "high_lr": 0.0005573684210526316, + "low_lr": 1.1147368421052633e-05, + "step": 841 + }, + { + "epoch": 2.2117028270874424, + "high_lr": 0.0005573684210526316, + "low_lr": 1.1147368421052633e-05, + "step": 841 + }, + { + "epoch": 2.2117028270874424, + "high_lr": 0.0005573684210526316, + "low_lr": 1.1147368421052633e-05, + "step": 841 + }, + { + "epoch": 2.2117028270874424, + "high_lr": 0.0005573684210526316, + "low_lr": 1.1147368421052633e-05, + "step": 841 + }, + { + "epoch": 2.2117028270874424, + "high_lr": 0.0005573684210526316, + "low_lr": 1.1147368421052633e-05, + "step": 841 + }, + { + "epoch": 2.2143326758711375, + "grad_norm": 1.2102004289627075, + "learning_rate": 0.0005568421052631579, + "loss": 1.3733, + "step": 842 + }, + { + "epoch": 2.2143326758711375, + "high_lr": 0.0005568421052631579, + "low_lr": 1.1136842105263159e-05, + "step": 842 + }, + { + "epoch": 2.2143326758711375, + "high_lr": 0.0005568421052631579, + "low_lr": 1.1136842105263159e-05, + "step": 842 + }, + { + "epoch": 2.2143326758711375, + "high_lr": 0.0005568421052631579, + "low_lr": 1.1136842105263159e-05, + "step": 842 + }, + { + "epoch": 2.2143326758711375, + "high_lr": 0.0005568421052631579, + "low_lr": 1.1136842105263159e-05, + "step": 842 + }, + { + "epoch": 2.2143326758711375, + "high_lr": 0.0005568421052631579, + "low_lr": 1.1136842105263159e-05, + "step": 842 + }, + { + "epoch": 2.2143326758711375, + "high_lr": 0.0005568421052631579, + "low_lr": 1.1136842105263159e-05, + "step": 842 + }, + { + "epoch": 2.2143326758711375, + "high_lr": 0.0005568421052631579, + "low_lr": 1.1136842105263159e-05, + "step": 842 + }, + { + "epoch": 2.2143326758711375, + "high_lr": 0.0005568421052631579, + "low_lr": 1.1136842105263159e-05, + "step": 842 + }, + { + "epoch": 2.216962524654832, + "grad_norm": 1.2097240686416626, + "learning_rate": 0.0005563157894736842, + "loss": 1.3866, + "step": 843 + }, + { + "epoch": 2.216962524654832, + "high_lr": 0.0005563157894736842, + "low_lr": 1.1126315789473685e-05, + "step": 843 + }, + { + "epoch": 2.216962524654832, + "high_lr": 0.0005563157894736842, + "low_lr": 1.1126315789473685e-05, + "step": 843 + }, + { + "epoch": 2.216962524654832, + "high_lr": 0.0005563157894736842, + "low_lr": 1.1126315789473685e-05, + "step": 843 + }, + { + "epoch": 2.216962524654832, + "high_lr": 0.0005563157894736842, + "low_lr": 1.1126315789473685e-05, + "step": 843 + }, + { + "epoch": 2.216962524654832, + "high_lr": 0.0005563157894736842, + "low_lr": 1.1126315789473685e-05, + "step": 843 + }, + { + "epoch": 2.216962524654832, + "high_lr": 0.0005563157894736842, + "low_lr": 1.1126315789473685e-05, + "step": 843 + }, + { + "epoch": 2.216962524654832, + "high_lr": 0.0005563157894736842, + "low_lr": 1.1126315789473685e-05, + "step": 843 + }, + { + "epoch": 2.216962524654832, + "high_lr": 0.0005563157894736842, + "low_lr": 1.1126315789473685e-05, + "step": 843 + }, + { + "epoch": 2.219592373438527, + "grad_norm": 1.1703976392745972, + "learning_rate": 0.0005557894736842106, + "loss": 1.4377, + "step": 844 + }, + { + "epoch": 2.219592373438527, + "high_lr": 0.0005557894736842106, + "low_lr": 1.111578947368421e-05, + "step": 844 + }, + { + "epoch": 2.219592373438527, + "high_lr": 0.0005557894736842106, + "low_lr": 1.111578947368421e-05, + "step": 844 + }, + { + "epoch": 2.219592373438527, + "high_lr": 0.0005557894736842106, + "low_lr": 1.111578947368421e-05, + "step": 844 + }, + { + "epoch": 2.219592373438527, + "high_lr": 0.0005557894736842106, + "low_lr": 1.111578947368421e-05, + "step": 844 + }, + { + "epoch": 2.219592373438527, + "high_lr": 0.0005557894736842106, + "low_lr": 1.111578947368421e-05, + "step": 844 + }, + { + "epoch": 2.219592373438527, + "high_lr": 0.0005557894736842106, + "low_lr": 1.111578947368421e-05, + "step": 844 + }, + { + "epoch": 2.219592373438527, + "high_lr": 0.0005557894736842106, + "low_lr": 1.111578947368421e-05, + "step": 844 + }, + { + "epoch": 2.219592373438527, + "high_lr": 0.0005557894736842106, + "low_lr": 1.111578947368421e-05, + "step": 844 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 1.1830466985702515, + "learning_rate": 0.0005552631578947368, + "loss": 1.3486, + "step": 845 + }, + { + "epoch": 2.2222222222222223, + "high_lr": 0.0005552631578947368, + "low_lr": 1.1105263157894736e-05, + "step": 845 + }, + { + "epoch": 2.2222222222222223, + "high_lr": 0.0005552631578947368, + "low_lr": 1.1105263157894736e-05, + "step": 845 + }, + { + "epoch": 2.2222222222222223, + "high_lr": 0.0005552631578947368, + "low_lr": 1.1105263157894736e-05, + "step": 845 + }, + { + "epoch": 2.2222222222222223, + "high_lr": 0.0005552631578947368, + "low_lr": 1.1105263157894736e-05, + "step": 845 + }, + { + "epoch": 2.2222222222222223, + "high_lr": 0.0005552631578947368, + "low_lr": 1.1105263157894736e-05, + "step": 845 + }, + { + "epoch": 2.2222222222222223, + "high_lr": 0.0005552631578947368, + "low_lr": 1.1105263157894736e-05, + "step": 845 + }, + { + "epoch": 2.2222222222222223, + "high_lr": 0.0005552631578947368, + "low_lr": 1.1105263157894736e-05, + "step": 845 + }, + { + "epoch": 2.2222222222222223, + "high_lr": 0.0005552631578947368, + "low_lr": 1.1105263157894736e-05, + "step": 845 + }, + { + "epoch": 2.224852071005917, + "grad_norm": 1.2717379331588745, + "learning_rate": 0.0005547368421052632, + "loss": 1.4297, + "step": 846 + }, + { + "epoch": 2.224852071005917, + "high_lr": 0.0005547368421052632, + "low_lr": 1.1094736842105264e-05, + "step": 846 + }, + { + "epoch": 2.224852071005917, + "high_lr": 0.0005547368421052632, + "low_lr": 1.1094736842105264e-05, + "step": 846 + }, + { + "epoch": 2.224852071005917, + "high_lr": 0.0005547368421052632, + "low_lr": 1.1094736842105264e-05, + "step": 846 + }, + { + "epoch": 2.224852071005917, + "high_lr": 0.0005547368421052632, + "low_lr": 1.1094736842105264e-05, + "step": 846 + }, + { + "epoch": 2.224852071005917, + "high_lr": 0.0005547368421052632, + "low_lr": 1.1094736842105264e-05, + "step": 846 + }, + { + "epoch": 2.224852071005917, + "high_lr": 0.0005547368421052632, + "low_lr": 1.1094736842105264e-05, + "step": 846 + }, + { + "epoch": 2.224852071005917, + "high_lr": 0.0005547368421052632, + "low_lr": 1.1094736842105264e-05, + "step": 846 + }, + { + "epoch": 2.224852071005917, + "high_lr": 0.0005547368421052632, + "low_lr": 1.1094736842105264e-05, + "step": 846 + }, + { + "epoch": 2.227481919789612, + "grad_norm": 1.3143019676208496, + "learning_rate": 0.0005542105263157895, + "loss": 1.4476, + "step": 847 + }, + { + "epoch": 2.227481919789612, + "high_lr": 0.0005542105263157895, + "low_lr": 1.108421052631579e-05, + "step": 847 + }, + { + "epoch": 2.227481919789612, + "high_lr": 0.0005542105263157895, + "low_lr": 1.108421052631579e-05, + "step": 847 + }, + { + "epoch": 2.227481919789612, + "high_lr": 0.0005542105263157895, + "low_lr": 1.108421052631579e-05, + "step": 847 + }, + { + "epoch": 2.227481919789612, + "high_lr": 0.0005542105263157895, + "low_lr": 1.108421052631579e-05, + "step": 847 + }, + { + "epoch": 2.227481919789612, + "high_lr": 0.0005542105263157895, + "low_lr": 1.108421052631579e-05, + "step": 847 + }, + { + "epoch": 2.227481919789612, + "high_lr": 0.0005542105263157895, + "low_lr": 1.108421052631579e-05, + "step": 847 + }, + { + "epoch": 2.227481919789612, + "high_lr": 0.0005542105263157895, + "low_lr": 1.108421052631579e-05, + "step": 847 + }, + { + "epoch": 2.227481919789612, + "high_lr": 0.0005542105263157895, + "low_lr": 1.108421052631579e-05, + "step": 847 + }, + { + "epoch": 2.230111768573307, + "grad_norm": 1.2574163675308228, + "learning_rate": 0.0005536842105263158, + "loss": 1.3895, + "step": 848 + }, + { + "epoch": 2.230111768573307, + "high_lr": 0.0005536842105263158, + "low_lr": 1.1073684210526317e-05, + "step": 848 + }, + { + "epoch": 2.230111768573307, + "high_lr": 0.0005536842105263158, + "low_lr": 1.1073684210526317e-05, + "step": 848 + }, + { + "epoch": 2.230111768573307, + "high_lr": 0.0005536842105263158, + "low_lr": 1.1073684210526317e-05, + "step": 848 + }, + { + "epoch": 2.230111768573307, + "high_lr": 0.0005536842105263158, + "low_lr": 1.1073684210526317e-05, + "step": 848 + }, + { + "epoch": 2.230111768573307, + "high_lr": 0.0005536842105263158, + "low_lr": 1.1073684210526317e-05, + "step": 848 + }, + { + "epoch": 2.230111768573307, + "high_lr": 0.0005536842105263158, + "low_lr": 1.1073684210526317e-05, + "step": 848 + }, + { + "epoch": 2.230111768573307, + "high_lr": 0.0005536842105263158, + "low_lr": 1.1073684210526317e-05, + "step": 848 + }, + { + "epoch": 2.230111768573307, + "high_lr": 0.0005536842105263158, + "low_lr": 1.1073684210526317e-05, + "step": 848 + }, + { + "epoch": 2.232741617357002, + "grad_norm": 1.2217366695404053, + "learning_rate": 0.0005531578947368421, + "loss": 1.3625, + "step": 849 + }, + { + "epoch": 2.232741617357002, + "high_lr": 0.0005531578947368421, + "low_lr": 1.1063157894736842e-05, + "step": 849 + }, + { + "epoch": 2.232741617357002, + "high_lr": 0.0005531578947368421, + "low_lr": 1.1063157894736842e-05, + "step": 849 + }, + { + "epoch": 2.232741617357002, + "high_lr": 0.0005531578947368421, + "low_lr": 1.1063157894736842e-05, + "step": 849 + }, + { + "epoch": 2.232741617357002, + "high_lr": 0.0005531578947368421, + "low_lr": 1.1063157894736842e-05, + "step": 849 + }, + { + "epoch": 2.232741617357002, + "high_lr": 0.0005531578947368421, + "low_lr": 1.1063157894736842e-05, + "step": 849 + }, + { + "epoch": 2.232741617357002, + "high_lr": 0.0005531578947368421, + "low_lr": 1.1063157894736842e-05, + "step": 849 + }, + { + "epoch": 2.232741617357002, + "high_lr": 0.0005531578947368421, + "low_lr": 1.1063157894736842e-05, + "step": 849 + }, + { + "epoch": 2.232741617357002, + "high_lr": 0.0005531578947368421, + "low_lr": 1.1063157894736842e-05, + "step": 849 + }, + { + "epoch": 2.235371466140697, + "grad_norm": 1.2277804613113403, + "learning_rate": 0.0005526315789473685, + "loss": 1.4304, + "step": 850 + }, + { + "epoch": 2.235371466140697, + "high_lr": 0.0005526315789473685, + "low_lr": 1.105263157894737e-05, + "step": 850 + }, + { + "epoch": 2.235371466140697, + "high_lr": 0.0005526315789473685, + "low_lr": 1.105263157894737e-05, + "step": 850 + }, + { + "epoch": 2.235371466140697, + "high_lr": 0.0005526315789473685, + "low_lr": 1.105263157894737e-05, + "step": 850 + }, + { + "epoch": 2.235371466140697, + "high_lr": 0.0005526315789473685, + "low_lr": 1.105263157894737e-05, + "step": 850 + }, + { + "epoch": 2.235371466140697, + "high_lr": 0.0005526315789473685, + "low_lr": 1.105263157894737e-05, + "step": 850 + }, + { + "epoch": 2.235371466140697, + "high_lr": 0.0005526315789473685, + "low_lr": 1.105263157894737e-05, + "step": 850 + }, + { + "epoch": 2.235371466140697, + "high_lr": 0.0005526315789473685, + "low_lr": 1.105263157894737e-05, + "step": 850 + }, + { + "epoch": 2.235371466140697, + "high_lr": 0.0005526315789473685, + "low_lr": 1.105263157894737e-05, + "step": 850 + }, + { + "epoch": 2.238001314924392, + "grad_norm": 1.2404119968414307, + "learning_rate": 0.0005521052631578948, + "loss": 1.3227, + "step": 851 + }, + { + "epoch": 2.238001314924392, + "high_lr": 0.0005521052631578948, + "low_lr": 1.1042105263157896e-05, + "step": 851 + }, + { + "epoch": 2.238001314924392, + "high_lr": 0.0005521052631578948, + "low_lr": 1.1042105263157896e-05, + "step": 851 + }, + { + "epoch": 2.238001314924392, + "high_lr": 0.0005521052631578948, + "low_lr": 1.1042105263157896e-05, + "step": 851 + }, + { + "epoch": 2.238001314924392, + "high_lr": 0.0005521052631578948, + "low_lr": 1.1042105263157896e-05, + "step": 851 + }, + { + "epoch": 2.238001314924392, + "high_lr": 0.0005521052631578948, + "low_lr": 1.1042105263157896e-05, + "step": 851 + }, + { + "epoch": 2.238001314924392, + "high_lr": 0.0005521052631578948, + "low_lr": 1.1042105263157896e-05, + "step": 851 + }, + { + "epoch": 2.238001314924392, + "high_lr": 0.0005521052631578948, + "low_lr": 1.1042105263157896e-05, + "step": 851 + }, + { + "epoch": 2.238001314924392, + "high_lr": 0.0005521052631578948, + "low_lr": 1.1042105263157896e-05, + "step": 851 + }, + { + "epoch": 2.2406311637080867, + "grad_norm": 1.2292978763580322, + "learning_rate": 0.0005515789473684211, + "loss": 1.3879, + "step": 852 + }, + { + "epoch": 2.2406311637080867, + "high_lr": 0.0005515789473684211, + "low_lr": 1.1031578947368422e-05, + "step": 852 + }, + { + "epoch": 2.2406311637080867, + "high_lr": 0.0005515789473684211, + "low_lr": 1.1031578947368422e-05, + "step": 852 + }, + { + "epoch": 2.2406311637080867, + "high_lr": 0.0005515789473684211, + "low_lr": 1.1031578947368422e-05, + "step": 852 + }, + { + "epoch": 2.2406311637080867, + "high_lr": 0.0005515789473684211, + "low_lr": 1.1031578947368422e-05, + "step": 852 + }, + { + "epoch": 2.2406311637080867, + "high_lr": 0.0005515789473684211, + "low_lr": 1.1031578947368422e-05, + "step": 852 + }, + { + "epoch": 2.2406311637080867, + "high_lr": 0.0005515789473684211, + "low_lr": 1.1031578947368422e-05, + "step": 852 + }, + { + "epoch": 2.2406311637080867, + "high_lr": 0.0005515789473684211, + "low_lr": 1.1031578947368422e-05, + "step": 852 + }, + { + "epoch": 2.2406311637080867, + "high_lr": 0.0005515789473684211, + "low_lr": 1.1031578947368422e-05, + "step": 852 + }, + { + "epoch": 2.243261012491782, + "grad_norm": 1.25013267993927, + "learning_rate": 0.0005510526315789474, + "loss": 1.4225, + "step": 853 + }, + { + "epoch": 2.243261012491782, + "high_lr": 0.0005510526315789474, + "low_lr": 1.1021052631578947e-05, + "step": 853 + }, + { + "epoch": 2.243261012491782, + "high_lr": 0.0005510526315789474, + "low_lr": 1.1021052631578947e-05, + "step": 853 + }, + { + "epoch": 2.243261012491782, + "high_lr": 0.0005510526315789474, + "low_lr": 1.1021052631578947e-05, + "step": 853 + }, + { + "epoch": 2.243261012491782, + "high_lr": 0.0005510526315789474, + "low_lr": 1.1021052631578947e-05, + "step": 853 + }, + { + "epoch": 2.243261012491782, + "high_lr": 0.0005510526315789474, + "low_lr": 1.1021052631578947e-05, + "step": 853 + }, + { + "epoch": 2.243261012491782, + "high_lr": 0.0005510526315789474, + "low_lr": 1.1021052631578947e-05, + "step": 853 + }, + { + "epoch": 2.243261012491782, + "high_lr": 0.0005510526315789474, + "low_lr": 1.1021052631578947e-05, + "step": 853 + }, + { + "epoch": 2.243261012491782, + "high_lr": 0.0005510526315789474, + "low_lr": 1.1021052631578947e-05, + "step": 853 + }, + { + "epoch": 2.245890861275477, + "grad_norm": 1.2261604070663452, + "learning_rate": 0.0005505263157894736, + "loss": 1.3401, + "step": 854 + }, + { + "epoch": 2.245890861275477, + "high_lr": 0.0005505263157894736, + "low_lr": 1.1010526315789473e-05, + "step": 854 + }, + { + "epoch": 2.245890861275477, + "high_lr": 0.0005505263157894736, + "low_lr": 1.1010526315789473e-05, + "step": 854 + }, + { + "epoch": 2.245890861275477, + "high_lr": 0.0005505263157894736, + "low_lr": 1.1010526315789473e-05, + "step": 854 + }, + { + "epoch": 2.245890861275477, + "high_lr": 0.0005505263157894736, + "low_lr": 1.1010526315789473e-05, + "step": 854 + }, + { + "epoch": 2.245890861275477, + "high_lr": 0.0005505263157894736, + "low_lr": 1.1010526315789473e-05, + "step": 854 + }, + { + "epoch": 2.245890861275477, + "high_lr": 0.0005505263157894736, + "low_lr": 1.1010526315789473e-05, + "step": 854 + }, + { + "epoch": 2.245890861275477, + "high_lr": 0.0005505263157894736, + "low_lr": 1.1010526315789473e-05, + "step": 854 + }, + { + "epoch": 2.245890861275477, + "high_lr": 0.0005505263157894736, + "low_lr": 1.1010526315789473e-05, + "step": 854 + }, + { + "epoch": 2.2485207100591715, + "grad_norm": 1.202841877937317, + "learning_rate": 0.00055, + "loss": 1.3989, + "step": 855 + }, + { + "epoch": 2.2485207100591715, + "high_lr": 0.00055, + "low_lr": 1.1000000000000001e-05, + "step": 855 + }, + { + "epoch": 2.2485207100591715, + "high_lr": 0.00055, + "low_lr": 1.1000000000000001e-05, + "step": 855 + }, + { + "epoch": 2.2485207100591715, + "high_lr": 0.00055, + "low_lr": 1.1000000000000001e-05, + "step": 855 + }, + { + "epoch": 2.2485207100591715, + "high_lr": 0.00055, + "low_lr": 1.1000000000000001e-05, + "step": 855 + }, + { + "epoch": 2.2485207100591715, + "high_lr": 0.00055, + "low_lr": 1.1000000000000001e-05, + "step": 855 + }, + { + "epoch": 2.2485207100591715, + "high_lr": 0.00055, + "low_lr": 1.1000000000000001e-05, + "step": 855 + }, + { + "epoch": 2.2485207100591715, + "high_lr": 0.00055, + "low_lr": 1.1000000000000001e-05, + "step": 855 + }, + { + "epoch": 2.2485207100591715, + "high_lr": 0.00055, + "low_lr": 1.1000000000000001e-05, + "step": 855 + }, + { + "epoch": 2.2511505588428666, + "grad_norm": 1.2519607543945312, + "learning_rate": 0.0005494736842105263, + "loss": 1.3917, + "step": 856 + }, + { + "epoch": 2.2511505588428666, + "high_lr": 0.0005494736842105263, + "low_lr": 1.0989473684210528e-05, + "step": 856 + }, + { + "epoch": 2.2511505588428666, + "high_lr": 0.0005494736842105263, + "low_lr": 1.0989473684210528e-05, + "step": 856 + }, + { + "epoch": 2.2511505588428666, + "high_lr": 0.0005494736842105263, + "low_lr": 1.0989473684210528e-05, + "step": 856 + }, + { + "epoch": 2.2511505588428666, + "high_lr": 0.0005494736842105263, + "low_lr": 1.0989473684210528e-05, + "step": 856 + }, + { + "epoch": 2.2511505588428666, + "high_lr": 0.0005494736842105263, + "low_lr": 1.0989473684210528e-05, + "step": 856 + }, + { + "epoch": 2.2511505588428666, + "high_lr": 0.0005494736842105263, + "low_lr": 1.0989473684210528e-05, + "step": 856 + }, + { + "epoch": 2.2511505588428666, + "high_lr": 0.0005494736842105263, + "low_lr": 1.0989473684210528e-05, + "step": 856 + }, + { + "epoch": 2.2511505588428666, + "high_lr": 0.0005494736842105263, + "low_lr": 1.0989473684210528e-05, + "step": 856 + }, + { + "epoch": 2.2537804076265613, + "grad_norm": 1.1744623184204102, + "learning_rate": 0.0005489473684210526, + "loss": 1.3736, + "step": 857 + }, + { + "epoch": 2.2537804076265613, + "high_lr": 0.0005489473684210526, + "low_lr": 1.0978947368421054e-05, + "step": 857 + }, + { + "epoch": 2.2537804076265613, + "high_lr": 0.0005489473684210526, + "low_lr": 1.0978947368421054e-05, + "step": 857 + }, + { + "epoch": 2.2537804076265613, + "high_lr": 0.0005489473684210526, + "low_lr": 1.0978947368421054e-05, + "step": 857 + }, + { + "epoch": 2.2537804076265613, + "high_lr": 0.0005489473684210526, + "low_lr": 1.0978947368421054e-05, + "step": 857 + }, + { + "epoch": 2.2537804076265613, + "high_lr": 0.0005489473684210526, + "low_lr": 1.0978947368421054e-05, + "step": 857 + }, + { + "epoch": 2.2537804076265613, + "high_lr": 0.0005489473684210526, + "low_lr": 1.0978947368421054e-05, + "step": 857 + }, + { + "epoch": 2.2537804076265613, + "high_lr": 0.0005489473684210526, + "low_lr": 1.0978947368421054e-05, + "step": 857 + }, + { + "epoch": 2.2537804076265613, + "high_lr": 0.0005489473684210526, + "low_lr": 1.0978947368421054e-05, + "step": 857 + }, + { + "epoch": 2.2564102564102564, + "grad_norm": 1.2681621313095093, + "learning_rate": 0.0005484210526315789, + "loss": 1.3639, + "step": 858 + }, + { + "epoch": 2.2564102564102564, + "high_lr": 0.0005484210526315789, + "low_lr": 1.0968421052631579e-05, + "step": 858 + }, + { + "epoch": 2.2564102564102564, + "high_lr": 0.0005484210526315789, + "low_lr": 1.0968421052631579e-05, + "step": 858 + }, + { + "epoch": 2.2564102564102564, + "high_lr": 0.0005484210526315789, + "low_lr": 1.0968421052631579e-05, + "step": 858 + }, + { + "epoch": 2.2564102564102564, + "high_lr": 0.0005484210526315789, + "low_lr": 1.0968421052631579e-05, + "step": 858 + }, + { + "epoch": 2.2564102564102564, + "high_lr": 0.0005484210526315789, + "low_lr": 1.0968421052631579e-05, + "step": 858 + }, + { + "epoch": 2.2564102564102564, + "high_lr": 0.0005484210526315789, + "low_lr": 1.0968421052631579e-05, + "step": 858 + }, + { + "epoch": 2.2564102564102564, + "high_lr": 0.0005484210526315789, + "low_lr": 1.0968421052631579e-05, + "step": 858 + }, + { + "epoch": 2.2564102564102564, + "high_lr": 0.0005484210526315789, + "low_lr": 1.0968421052631579e-05, + "step": 858 + }, + { + "epoch": 2.2590401051939515, + "grad_norm": 1.1985419988632202, + "learning_rate": 0.0005478947368421052, + "loss": 1.3894, + "step": 859 + }, + { + "epoch": 2.2590401051939515, + "high_lr": 0.0005478947368421052, + "low_lr": 1.0957894736842105e-05, + "step": 859 + }, + { + "epoch": 2.2590401051939515, + "high_lr": 0.0005478947368421052, + "low_lr": 1.0957894736842105e-05, + "step": 859 + }, + { + "epoch": 2.2590401051939515, + "high_lr": 0.0005478947368421052, + "low_lr": 1.0957894736842105e-05, + "step": 859 + }, + { + "epoch": 2.2590401051939515, + "high_lr": 0.0005478947368421052, + "low_lr": 1.0957894736842105e-05, + "step": 859 + }, + { + "epoch": 2.2590401051939515, + "high_lr": 0.0005478947368421052, + "low_lr": 1.0957894736842105e-05, + "step": 859 + }, + { + "epoch": 2.2590401051939515, + "high_lr": 0.0005478947368421052, + "low_lr": 1.0957894736842105e-05, + "step": 859 + }, + { + "epoch": 2.2590401051939515, + "high_lr": 0.0005478947368421052, + "low_lr": 1.0957894736842105e-05, + "step": 859 + }, + { + "epoch": 2.2590401051939515, + "high_lr": 0.0005478947368421052, + "low_lr": 1.0957894736842105e-05, + "step": 859 + }, + { + "epoch": 2.261669953977646, + "grad_norm": 1.3177270889282227, + "learning_rate": 0.0005473684210526317, + "loss": 1.3522, + "step": 860 + }, + { + "epoch": 2.261669953977646, + "high_lr": 0.0005473684210526317, + "low_lr": 1.0947368421052633e-05, + "step": 860 + }, + { + "epoch": 2.261669953977646, + "high_lr": 0.0005473684210526317, + "low_lr": 1.0947368421052633e-05, + "step": 860 + }, + { + "epoch": 2.261669953977646, + "high_lr": 0.0005473684210526317, + "low_lr": 1.0947368421052633e-05, + "step": 860 + }, + { + "epoch": 2.261669953977646, + "high_lr": 0.0005473684210526317, + "low_lr": 1.0947368421052633e-05, + "step": 860 + }, + { + "epoch": 2.261669953977646, + "high_lr": 0.0005473684210526317, + "low_lr": 1.0947368421052633e-05, + "step": 860 + }, + { + "epoch": 2.261669953977646, + "high_lr": 0.0005473684210526317, + "low_lr": 1.0947368421052633e-05, + "step": 860 + }, + { + "epoch": 2.261669953977646, + "high_lr": 0.0005473684210526317, + "low_lr": 1.0947368421052633e-05, + "step": 860 + }, + { + "epoch": 2.261669953977646, + "high_lr": 0.0005473684210526317, + "low_lr": 1.0947368421052633e-05, + "step": 860 + }, + { + "epoch": 2.2642998027613412, + "grad_norm": 1.23194420337677, + "learning_rate": 0.000546842105263158, + "loss": 1.3465, + "step": 861 + }, + { + "epoch": 2.2642998027613412, + "high_lr": 0.000546842105263158, + "low_lr": 1.093684210526316e-05, + "step": 861 + }, + { + "epoch": 2.2642998027613412, + "high_lr": 0.000546842105263158, + "low_lr": 1.093684210526316e-05, + "step": 861 + }, + { + "epoch": 2.2642998027613412, + "high_lr": 0.000546842105263158, + "low_lr": 1.093684210526316e-05, + "step": 861 + }, + { + "epoch": 2.2642998027613412, + "high_lr": 0.000546842105263158, + "low_lr": 1.093684210526316e-05, + "step": 861 + }, + { + "epoch": 2.2642998027613412, + "high_lr": 0.000546842105263158, + "low_lr": 1.093684210526316e-05, + "step": 861 + }, + { + "epoch": 2.2642998027613412, + "high_lr": 0.000546842105263158, + "low_lr": 1.093684210526316e-05, + "step": 861 + }, + { + "epoch": 2.2642998027613412, + "high_lr": 0.000546842105263158, + "low_lr": 1.093684210526316e-05, + "step": 861 + }, + { + "epoch": 2.2642998027613412, + "high_lr": 0.000546842105263158, + "low_lr": 1.093684210526316e-05, + "step": 861 + }, + { + "epoch": 2.2669296515450363, + "grad_norm": 1.2162786722183228, + "learning_rate": 0.0005463157894736843, + "loss": 1.4018, + "step": 862 + }, + { + "epoch": 2.2669296515450363, + "high_lr": 0.0005463157894736843, + "low_lr": 1.0926315789473686e-05, + "step": 862 + }, + { + "epoch": 2.2669296515450363, + "high_lr": 0.0005463157894736843, + "low_lr": 1.0926315789473686e-05, + "step": 862 + }, + { + "epoch": 2.2669296515450363, + "high_lr": 0.0005463157894736843, + "low_lr": 1.0926315789473686e-05, + "step": 862 + }, + { + "epoch": 2.2669296515450363, + "high_lr": 0.0005463157894736843, + "low_lr": 1.0926315789473686e-05, + "step": 862 + }, + { + "epoch": 2.2669296515450363, + "high_lr": 0.0005463157894736843, + "low_lr": 1.0926315789473686e-05, + "step": 862 + }, + { + "epoch": 2.2669296515450363, + "high_lr": 0.0005463157894736843, + "low_lr": 1.0926315789473686e-05, + "step": 862 + }, + { + "epoch": 2.2669296515450363, + "high_lr": 0.0005463157894736843, + "low_lr": 1.0926315789473686e-05, + "step": 862 + }, + { + "epoch": 2.2669296515450363, + "high_lr": 0.0005463157894736843, + "low_lr": 1.0926315789473686e-05, + "step": 862 + }, + { + "epoch": 2.269559500328731, + "grad_norm": 1.2616527080535889, + "learning_rate": 0.0005457894736842105, + "loss": 1.4306, + "step": 863 + }, + { + "epoch": 2.269559500328731, + "high_lr": 0.0005457894736842105, + "low_lr": 1.091578947368421e-05, + "step": 863 + }, + { + "epoch": 2.269559500328731, + "high_lr": 0.0005457894736842105, + "low_lr": 1.091578947368421e-05, + "step": 863 + }, + { + "epoch": 2.269559500328731, + "high_lr": 0.0005457894736842105, + "low_lr": 1.091578947368421e-05, + "step": 863 + }, + { + "epoch": 2.269559500328731, + "high_lr": 0.0005457894736842105, + "low_lr": 1.091578947368421e-05, + "step": 863 + }, + { + "epoch": 2.269559500328731, + "high_lr": 0.0005457894736842105, + "low_lr": 1.091578947368421e-05, + "step": 863 + }, + { + "epoch": 2.269559500328731, + "high_lr": 0.0005457894736842105, + "low_lr": 1.091578947368421e-05, + "step": 863 + }, + { + "epoch": 2.269559500328731, + "high_lr": 0.0005457894736842105, + "low_lr": 1.091578947368421e-05, + "step": 863 + }, + { + "epoch": 2.269559500328731, + "high_lr": 0.0005457894736842105, + "low_lr": 1.091578947368421e-05, + "step": 863 + }, + { + "epoch": 2.272189349112426, + "grad_norm": 1.2469723224639893, + "learning_rate": 0.0005452631578947369, + "loss": 1.3668, + "step": 864 + }, + { + "epoch": 2.272189349112426, + "high_lr": 0.0005452631578947369, + "low_lr": 1.0905263157894738e-05, + "step": 864 + }, + { + "epoch": 2.272189349112426, + "high_lr": 0.0005452631578947369, + "low_lr": 1.0905263157894738e-05, + "step": 864 + }, + { + "epoch": 2.272189349112426, + "high_lr": 0.0005452631578947369, + "low_lr": 1.0905263157894738e-05, + "step": 864 + }, + { + "epoch": 2.272189349112426, + "high_lr": 0.0005452631578947369, + "low_lr": 1.0905263157894738e-05, + "step": 864 + }, + { + "epoch": 2.272189349112426, + "high_lr": 0.0005452631578947369, + "low_lr": 1.0905263157894738e-05, + "step": 864 + }, + { + "epoch": 2.272189349112426, + "high_lr": 0.0005452631578947369, + "low_lr": 1.0905263157894738e-05, + "step": 864 + }, + { + "epoch": 2.272189349112426, + "high_lr": 0.0005452631578947369, + "low_lr": 1.0905263157894738e-05, + "step": 864 + }, + { + "epoch": 2.272189349112426, + "high_lr": 0.0005452631578947369, + "low_lr": 1.0905263157894738e-05, + "step": 864 + }, + { + "epoch": 2.2748191978961207, + "grad_norm": 1.2859033346176147, + "learning_rate": 0.0005447368421052632, + "loss": 1.3792, + "step": 865 + }, + { + "epoch": 2.2748191978961207, + "high_lr": 0.0005447368421052632, + "low_lr": 1.0894736842105265e-05, + "step": 865 + }, + { + "epoch": 2.2748191978961207, + "high_lr": 0.0005447368421052632, + "low_lr": 1.0894736842105265e-05, + "step": 865 + }, + { + "epoch": 2.2748191978961207, + "high_lr": 0.0005447368421052632, + "low_lr": 1.0894736842105265e-05, + "step": 865 + }, + { + "epoch": 2.2748191978961207, + "high_lr": 0.0005447368421052632, + "low_lr": 1.0894736842105265e-05, + "step": 865 + }, + { + "epoch": 2.2748191978961207, + "high_lr": 0.0005447368421052632, + "low_lr": 1.0894736842105265e-05, + "step": 865 + }, + { + "epoch": 2.2748191978961207, + "high_lr": 0.0005447368421052632, + "low_lr": 1.0894736842105265e-05, + "step": 865 + }, + { + "epoch": 2.2748191978961207, + "high_lr": 0.0005447368421052632, + "low_lr": 1.0894736842105265e-05, + "step": 865 + }, + { + "epoch": 2.2748191978961207, + "high_lr": 0.0005447368421052632, + "low_lr": 1.0894736842105265e-05, + "step": 865 + }, + { + "epoch": 2.277449046679816, + "grad_norm": 1.2813301086425781, + "learning_rate": 0.0005442105263157895, + "loss": 1.3938, + "step": 866 + }, + { + "epoch": 2.277449046679816, + "high_lr": 0.0005442105263157895, + "low_lr": 1.0884210526315791e-05, + "step": 866 + }, + { + "epoch": 2.277449046679816, + "high_lr": 0.0005442105263157895, + "low_lr": 1.0884210526315791e-05, + "step": 866 + }, + { + "epoch": 2.277449046679816, + "high_lr": 0.0005442105263157895, + "low_lr": 1.0884210526315791e-05, + "step": 866 + }, + { + "epoch": 2.277449046679816, + "high_lr": 0.0005442105263157895, + "low_lr": 1.0884210526315791e-05, + "step": 866 + }, + { + "epoch": 2.277449046679816, + "high_lr": 0.0005442105263157895, + "low_lr": 1.0884210526315791e-05, + "step": 866 + }, + { + "epoch": 2.277449046679816, + "high_lr": 0.0005442105263157895, + "low_lr": 1.0884210526315791e-05, + "step": 866 + }, + { + "epoch": 2.277449046679816, + "high_lr": 0.0005442105263157895, + "low_lr": 1.0884210526315791e-05, + "step": 866 + }, + { + "epoch": 2.277449046679816, + "high_lr": 0.0005442105263157895, + "low_lr": 1.0884210526315791e-05, + "step": 866 + }, + { + "epoch": 2.280078895463511, + "grad_norm": 1.2008345127105713, + "learning_rate": 0.0005436842105263158, + "loss": 1.417, + "step": 867 + }, + { + "epoch": 2.280078895463511, + "high_lr": 0.0005436842105263158, + "low_lr": 1.0873684210526316e-05, + "step": 867 + }, + { + "epoch": 2.280078895463511, + "high_lr": 0.0005436842105263158, + "low_lr": 1.0873684210526316e-05, + "step": 867 + }, + { + "epoch": 2.280078895463511, + "high_lr": 0.0005436842105263158, + "low_lr": 1.0873684210526316e-05, + "step": 867 + }, + { + "epoch": 2.280078895463511, + "high_lr": 0.0005436842105263158, + "low_lr": 1.0873684210526316e-05, + "step": 867 + }, + { + "epoch": 2.280078895463511, + "high_lr": 0.0005436842105263158, + "low_lr": 1.0873684210526316e-05, + "step": 867 + }, + { + "epoch": 2.280078895463511, + "high_lr": 0.0005436842105263158, + "low_lr": 1.0873684210526316e-05, + "step": 867 + }, + { + "epoch": 2.280078895463511, + "high_lr": 0.0005436842105263158, + "low_lr": 1.0873684210526316e-05, + "step": 867 + }, + { + "epoch": 2.280078895463511, + "high_lr": 0.0005436842105263158, + "low_lr": 1.0873684210526316e-05, + "step": 867 + }, + { + "epoch": 2.2827087442472056, + "grad_norm": 1.2014013528823853, + "learning_rate": 0.0005431578947368421, + "loss": 1.3606, + "step": 868 + }, + { + "epoch": 2.2827087442472056, + "high_lr": 0.0005431578947368421, + "low_lr": 1.0863157894736842e-05, + "step": 868 + }, + { + "epoch": 2.2827087442472056, + "high_lr": 0.0005431578947368421, + "low_lr": 1.0863157894736842e-05, + "step": 868 + }, + { + "epoch": 2.2827087442472056, + "high_lr": 0.0005431578947368421, + "low_lr": 1.0863157894736842e-05, + "step": 868 + }, + { + "epoch": 2.2827087442472056, + "high_lr": 0.0005431578947368421, + "low_lr": 1.0863157894736842e-05, + "step": 868 + }, + { + "epoch": 2.2827087442472056, + "high_lr": 0.0005431578947368421, + "low_lr": 1.0863157894736842e-05, + "step": 868 + }, + { + "epoch": 2.2827087442472056, + "high_lr": 0.0005431578947368421, + "low_lr": 1.0863157894736842e-05, + "step": 868 + }, + { + "epoch": 2.2827087442472056, + "high_lr": 0.0005431578947368421, + "low_lr": 1.0863157894736842e-05, + "step": 868 + }, + { + "epoch": 2.2827087442472056, + "high_lr": 0.0005431578947368421, + "low_lr": 1.0863157894736842e-05, + "step": 868 + }, + { + "epoch": 2.2853385930309007, + "grad_norm": 1.2660644054412842, + "learning_rate": 0.0005426315789473685, + "loss": 1.3771, + "step": 869 + }, + { + "epoch": 2.2853385930309007, + "high_lr": 0.0005426315789473685, + "low_lr": 1.085263157894737e-05, + "step": 869 + }, + { + "epoch": 2.2853385930309007, + "high_lr": 0.0005426315789473685, + "low_lr": 1.085263157894737e-05, + "step": 869 + }, + { + "epoch": 2.2853385930309007, + "high_lr": 0.0005426315789473685, + "low_lr": 1.085263157894737e-05, + "step": 869 + }, + { + "epoch": 2.2853385930309007, + "high_lr": 0.0005426315789473685, + "low_lr": 1.085263157894737e-05, + "step": 869 + }, + { + "epoch": 2.2853385930309007, + "high_lr": 0.0005426315789473685, + "low_lr": 1.085263157894737e-05, + "step": 869 + }, + { + "epoch": 2.2853385930309007, + "high_lr": 0.0005426315789473685, + "low_lr": 1.085263157894737e-05, + "step": 869 + }, + { + "epoch": 2.2853385930309007, + "high_lr": 0.0005426315789473685, + "low_lr": 1.085263157894737e-05, + "step": 869 + }, + { + "epoch": 2.2853385930309007, + "high_lr": 0.0005426315789473685, + "low_lr": 1.085263157894737e-05, + "step": 869 + }, + { + "epoch": 2.287968441814596, + "grad_norm": 1.1866534948349, + "learning_rate": 0.0005421052631578948, + "loss": 1.4152, + "step": 870 + }, + { + "epoch": 2.287968441814596, + "high_lr": 0.0005421052631578948, + "low_lr": 1.0842105263157896e-05, + "step": 870 + }, + { + "epoch": 2.287968441814596, + "high_lr": 0.0005421052631578948, + "low_lr": 1.0842105263157896e-05, + "step": 870 + }, + { + "epoch": 2.287968441814596, + "high_lr": 0.0005421052631578948, + "low_lr": 1.0842105263157896e-05, + "step": 870 + }, + { + "epoch": 2.287968441814596, + "high_lr": 0.0005421052631578948, + "low_lr": 1.0842105263157896e-05, + "step": 870 + }, + { + "epoch": 2.287968441814596, + "high_lr": 0.0005421052631578948, + "low_lr": 1.0842105263157896e-05, + "step": 870 + }, + { + "epoch": 2.287968441814596, + "high_lr": 0.0005421052631578948, + "low_lr": 1.0842105263157896e-05, + "step": 870 + }, + { + "epoch": 2.287968441814596, + "high_lr": 0.0005421052631578948, + "low_lr": 1.0842105263157896e-05, + "step": 870 + }, + { + "epoch": 2.287968441814596, + "high_lr": 0.0005421052631578948, + "low_lr": 1.0842105263157896e-05, + "step": 870 + }, + { + "epoch": 2.2905982905982905, + "grad_norm": 1.195812702178955, + "learning_rate": 0.000541578947368421, + "loss": 1.3509, + "step": 871 + }, + { + "epoch": 2.2905982905982905, + "high_lr": 0.000541578947368421, + "low_lr": 1.0831578947368423e-05, + "step": 871 + }, + { + "epoch": 2.2905982905982905, + "high_lr": 0.000541578947368421, + "low_lr": 1.0831578947368423e-05, + "step": 871 + }, + { + "epoch": 2.2905982905982905, + "high_lr": 0.000541578947368421, + "low_lr": 1.0831578947368423e-05, + "step": 871 + }, + { + "epoch": 2.2905982905982905, + "high_lr": 0.000541578947368421, + "low_lr": 1.0831578947368423e-05, + "step": 871 + }, + { + "epoch": 2.2905982905982905, + "high_lr": 0.000541578947368421, + "low_lr": 1.0831578947368423e-05, + "step": 871 + }, + { + "epoch": 2.2905982905982905, + "high_lr": 0.000541578947368421, + "low_lr": 1.0831578947368423e-05, + "step": 871 + }, + { + "epoch": 2.2905982905982905, + "high_lr": 0.000541578947368421, + "low_lr": 1.0831578947368423e-05, + "step": 871 + }, + { + "epoch": 2.2905982905982905, + "high_lr": 0.000541578947368421, + "low_lr": 1.0831578947368423e-05, + "step": 871 + }, + { + "epoch": 2.2932281393819856, + "grad_norm": 1.1974976062774658, + "learning_rate": 0.0005410526315789473, + "loss": 1.3986, + "step": 872 + }, + { + "epoch": 2.2932281393819856, + "high_lr": 0.0005410526315789473, + "low_lr": 1.0821052631578947e-05, + "step": 872 + }, + { + "epoch": 2.2932281393819856, + "high_lr": 0.0005410526315789473, + "low_lr": 1.0821052631578947e-05, + "step": 872 + }, + { + "epoch": 2.2932281393819856, + "high_lr": 0.0005410526315789473, + "low_lr": 1.0821052631578947e-05, + "step": 872 + }, + { + "epoch": 2.2932281393819856, + "high_lr": 0.0005410526315789473, + "low_lr": 1.0821052631578947e-05, + "step": 872 + }, + { + "epoch": 2.2932281393819856, + "high_lr": 0.0005410526315789473, + "low_lr": 1.0821052631578947e-05, + "step": 872 + }, + { + "epoch": 2.2932281393819856, + "high_lr": 0.0005410526315789473, + "low_lr": 1.0821052631578947e-05, + "step": 872 + }, + { + "epoch": 2.2932281393819856, + "high_lr": 0.0005410526315789473, + "low_lr": 1.0821052631578947e-05, + "step": 872 + }, + { + "epoch": 2.2932281393819856, + "high_lr": 0.0005410526315789473, + "low_lr": 1.0821052631578947e-05, + "step": 872 + }, + { + "epoch": 2.2958579881656807, + "grad_norm": 1.3242113590240479, + "learning_rate": 0.0005405263157894736, + "loss": 1.3586, + "step": 873 + }, + { + "epoch": 2.2958579881656807, + "high_lr": 0.0005405263157894736, + "low_lr": 1.0810526315789474e-05, + "step": 873 + }, + { + "epoch": 2.2958579881656807, + "high_lr": 0.0005405263157894736, + "low_lr": 1.0810526315789474e-05, + "step": 873 + }, + { + "epoch": 2.2958579881656807, + "high_lr": 0.0005405263157894736, + "low_lr": 1.0810526315789474e-05, + "step": 873 + }, + { + "epoch": 2.2958579881656807, + "high_lr": 0.0005405263157894736, + "low_lr": 1.0810526315789474e-05, + "step": 873 + }, + { + "epoch": 2.2958579881656807, + "high_lr": 0.0005405263157894736, + "low_lr": 1.0810526315789474e-05, + "step": 873 + }, + { + "epoch": 2.2958579881656807, + "high_lr": 0.0005405263157894736, + "low_lr": 1.0810526315789474e-05, + "step": 873 + }, + { + "epoch": 2.2958579881656807, + "high_lr": 0.0005405263157894736, + "low_lr": 1.0810526315789474e-05, + "step": 873 + }, + { + "epoch": 2.2958579881656807, + "high_lr": 0.0005405263157894736, + "low_lr": 1.0810526315789474e-05, + "step": 873 + }, + { + "epoch": 2.2984878369493753, + "grad_norm": 1.312902569770813, + "learning_rate": 0.00054, + "loss": 1.3863, + "step": 874 + }, + { + "epoch": 2.2984878369493753, + "high_lr": 0.00054, + "low_lr": 1.0800000000000002e-05, + "step": 874 + }, + { + "epoch": 2.2984878369493753, + "high_lr": 0.00054, + "low_lr": 1.0800000000000002e-05, + "step": 874 + }, + { + "epoch": 2.2984878369493753, + "high_lr": 0.00054, + "low_lr": 1.0800000000000002e-05, + "step": 874 + }, + { + "epoch": 2.2984878369493753, + "high_lr": 0.00054, + "low_lr": 1.0800000000000002e-05, + "step": 874 + }, + { + "epoch": 2.2984878369493753, + "high_lr": 0.00054, + "low_lr": 1.0800000000000002e-05, + "step": 874 + }, + { + "epoch": 2.2984878369493753, + "high_lr": 0.00054, + "low_lr": 1.0800000000000002e-05, + "step": 874 + }, + { + "epoch": 2.2984878369493753, + "high_lr": 0.00054, + "low_lr": 1.0800000000000002e-05, + "step": 874 + }, + { + "epoch": 2.2984878369493753, + "high_lr": 0.00054, + "low_lr": 1.0800000000000002e-05, + "step": 874 + }, + { + "epoch": 2.3011176857330704, + "grad_norm": 1.3001848459243774, + "learning_rate": 0.0005394736842105263, + "loss": 1.3902, + "step": 875 + }, + { + "epoch": 2.3011176857330704, + "high_lr": 0.0005394736842105263, + "low_lr": 1.0789473684210528e-05, + "step": 875 + }, + { + "epoch": 2.3011176857330704, + "high_lr": 0.0005394736842105263, + "low_lr": 1.0789473684210528e-05, + "step": 875 + }, + { + "epoch": 2.3011176857330704, + "high_lr": 0.0005394736842105263, + "low_lr": 1.0789473684210528e-05, + "step": 875 + }, + { + "epoch": 2.3011176857330704, + "high_lr": 0.0005394736842105263, + "low_lr": 1.0789473684210528e-05, + "step": 875 + }, + { + "epoch": 2.3011176857330704, + "high_lr": 0.0005394736842105263, + "low_lr": 1.0789473684210528e-05, + "step": 875 + }, + { + "epoch": 2.3011176857330704, + "high_lr": 0.0005394736842105263, + "low_lr": 1.0789473684210528e-05, + "step": 875 + }, + { + "epoch": 2.3011176857330704, + "high_lr": 0.0005394736842105263, + "low_lr": 1.0789473684210528e-05, + "step": 875 + }, + { + "epoch": 2.3011176857330704, + "high_lr": 0.0005394736842105263, + "low_lr": 1.0789473684210528e-05, + "step": 875 + }, + { + "epoch": 2.3037475345167655, + "grad_norm": 1.207558274269104, + "learning_rate": 0.0005389473684210526, + "loss": 1.3501, + "step": 876 + }, + { + "epoch": 2.3037475345167655, + "high_lr": 0.0005389473684210526, + "low_lr": 1.0778947368421053e-05, + "step": 876 + }, + { + "epoch": 2.3037475345167655, + "high_lr": 0.0005389473684210526, + "low_lr": 1.0778947368421053e-05, + "step": 876 + }, + { + "epoch": 2.3037475345167655, + "high_lr": 0.0005389473684210526, + "low_lr": 1.0778947368421053e-05, + "step": 876 + }, + { + "epoch": 2.3037475345167655, + "high_lr": 0.0005389473684210526, + "low_lr": 1.0778947368421053e-05, + "step": 876 + }, + { + "epoch": 2.3037475345167655, + "high_lr": 0.0005389473684210526, + "low_lr": 1.0778947368421053e-05, + "step": 876 + }, + { + "epoch": 2.3037475345167655, + "high_lr": 0.0005389473684210526, + "low_lr": 1.0778947368421053e-05, + "step": 876 + }, + { + "epoch": 2.3037475345167655, + "high_lr": 0.0005389473684210526, + "low_lr": 1.0778947368421053e-05, + "step": 876 + }, + { + "epoch": 2.3037475345167655, + "high_lr": 0.0005389473684210526, + "low_lr": 1.0778947368421053e-05, + "step": 876 + }, + { + "epoch": 2.30637738330046, + "grad_norm": 1.2861137390136719, + "learning_rate": 0.000538421052631579, + "loss": 1.3996, + "step": 877 + }, + { + "epoch": 2.30637738330046, + "high_lr": 0.000538421052631579, + "low_lr": 1.0768421052631579e-05, + "step": 877 + }, + { + "epoch": 2.30637738330046, + "high_lr": 0.000538421052631579, + "low_lr": 1.0768421052631579e-05, + "step": 877 + }, + { + "epoch": 2.30637738330046, + "high_lr": 0.000538421052631579, + "low_lr": 1.0768421052631579e-05, + "step": 877 + }, + { + "epoch": 2.30637738330046, + "high_lr": 0.000538421052631579, + "low_lr": 1.0768421052631579e-05, + "step": 877 + }, + { + "epoch": 2.30637738330046, + "high_lr": 0.000538421052631579, + "low_lr": 1.0768421052631579e-05, + "step": 877 + }, + { + "epoch": 2.30637738330046, + "high_lr": 0.000538421052631579, + "low_lr": 1.0768421052631579e-05, + "step": 877 + }, + { + "epoch": 2.30637738330046, + "high_lr": 0.000538421052631579, + "low_lr": 1.0768421052631579e-05, + "step": 877 + }, + { + "epoch": 2.30637738330046, + "high_lr": 0.000538421052631579, + "low_lr": 1.0768421052631579e-05, + "step": 877 + }, + { + "epoch": 2.3090072320841553, + "grad_norm": 1.238488793373108, + "learning_rate": 0.0005378947368421054, + "loss": 1.3846, + "step": 878 + }, + { + "epoch": 2.3090072320841553, + "high_lr": 0.0005378947368421054, + "low_lr": 1.0757894736842107e-05, + "step": 878 + }, + { + "epoch": 2.3090072320841553, + "high_lr": 0.0005378947368421054, + "low_lr": 1.0757894736842107e-05, + "step": 878 + }, + { + "epoch": 2.3090072320841553, + "high_lr": 0.0005378947368421054, + "low_lr": 1.0757894736842107e-05, + "step": 878 + }, + { + "epoch": 2.3090072320841553, + "high_lr": 0.0005378947368421054, + "low_lr": 1.0757894736842107e-05, + "step": 878 + }, + { + "epoch": 2.3090072320841553, + "high_lr": 0.0005378947368421054, + "low_lr": 1.0757894736842107e-05, + "step": 878 + }, + { + "epoch": 2.3090072320841553, + "high_lr": 0.0005378947368421054, + "low_lr": 1.0757894736842107e-05, + "step": 878 + }, + { + "epoch": 2.3090072320841553, + "high_lr": 0.0005378947368421054, + "low_lr": 1.0757894736842107e-05, + "step": 878 + }, + { + "epoch": 2.3090072320841553, + "high_lr": 0.0005378947368421054, + "low_lr": 1.0757894736842107e-05, + "step": 878 + }, + { + "epoch": 2.31163708086785, + "grad_norm": 1.239641785621643, + "learning_rate": 0.0005373684210526317, + "loss": 1.3896, + "step": 879 + }, + { + "epoch": 2.31163708086785, + "high_lr": 0.0005373684210526317, + "low_lr": 1.0747368421052633e-05, + "step": 879 + }, + { + "epoch": 2.31163708086785, + "high_lr": 0.0005373684210526317, + "low_lr": 1.0747368421052633e-05, + "step": 879 + }, + { + "epoch": 2.31163708086785, + "high_lr": 0.0005373684210526317, + "low_lr": 1.0747368421052633e-05, + "step": 879 + }, + { + "epoch": 2.31163708086785, + "high_lr": 0.0005373684210526317, + "low_lr": 1.0747368421052633e-05, + "step": 879 + }, + { + "epoch": 2.31163708086785, + "high_lr": 0.0005373684210526317, + "low_lr": 1.0747368421052633e-05, + "step": 879 + }, + { + "epoch": 2.31163708086785, + "high_lr": 0.0005373684210526317, + "low_lr": 1.0747368421052633e-05, + "step": 879 + }, + { + "epoch": 2.31163708086785, + "high_lr": 0.0005373684210526317, + "low_lr": 1.0747368421052633e-05, + "step": 879 + }, + { + "epoch": 2.31163708086785, + "high_lr": 0.0005373684210526317, + "low_lr": 1.0747368421052633e-05, + "step": 879 + }, + { + "epoch": 2.314266929651545, + "grad_norm": 1.2383469343185425, + "learning_rate": 0.0005368421052631579, + "loss": 1.3641, + "step": 880 + }, + { + "epoch": 2.314266929651545, + "high_lr": 0.0005368421052631579, + "low_lr": 1.073684210526316e-05, + "step": 880 + }, + { + "epoch": 2.314266929651545, + "high_lr": 0.0005368421052631579, + "low_lr": 1.073684210526316e-05, + "step": 880 + }, + { + "epoch": 2.314266929651545, + "high_lr": 0.0005368421052631579, + "low_lr": 1.073684210526316e-05, + "step": 880 + }, + { + "epoch": 2.314266929651545, + "high_lr": 0.0005368421052631579, + "low_lr": 1.073684210526316e-05, + "step": 880 + }, + { + "epoch": 2.314266929651545, + "high_lr": 0.0005368421052631579, + "low_lr": 1.073684210526316e-05, + "step": 880 + }, + { + "epoch": 2.314266929651545, + "high_lr": 0.0005368421052631579, + "low_lr": 1.073684210526316e-05, + "step": 880 + }, + { + "epoch": 2.314266929651545, + "high_lr": 0.0005368421052631579, + "low_lr": 1.073684210526316e-05, + "step": 880 + }, + { + "epoch": 2.314266929651545, + "high_lr": 0.0005368421052631579, + "low_lr": 1.073684210526316e-05, + "step": 880 + }, + { + "epoch": 2.31689677843524, + "grad_norm": 1.268680214881897, + "learning_rate": 0.0005363157894736842, + "loss": 1.368, + "step": 881 + }, + { + "epoch": 2.31689677843524, + "high_lr": 0.0005363157894736842, + "low_lr": 1.0726315789473684e-05, + "step": 881 + }, + { + "epoch": 2.31689677843524, + "high_lr": 0.0005363157894736842, + "low_lr": 1.0726315789473684e-05, + "step": 881 + }, + { + "epoch": 2.31689677843524, + "high_lr": 0.0005363157894736842, + "low_lr": 1.0726315789473684e-05, + "step": 881 + }, + { + "epoch": 2.31689677843524, + "high_lr": 0.0005363157894736842, + "low_lr": 1.0726315789473684e-05, + "step": 881 + }, + { + "epoch": 2.31689677843524, + "high_lr": 0.0005363157894736842, + "low_lr": 1.0726315789473684e-05, + "step": 881 + }, + { + "epoch": 2.31689677843524, + "high_lr": 0.0005363157894736842, + "low_lr": 1.0726315789473684e-05, + "step": 881 + }, + { + "epoch": 2.31689677843524, + "high_lr": 0.0005363157894736842, + "low_lr": 1.0726315789473684e-05, + "step": 881 + }, + { + "epoch": 2.31689677843524, + "high_lr": 0.0005363157894736842, + "low_lr": 1.0726315789473684e-05, + "step": 881 + }, + { + "epoch": 2.3195266272189348, + "grad_norm": 1.2960704565048218, + "learning_rate": 0.0005357894736842105, + "loss": 1.4236, + "step": 882 + }, + { + "epoch": 2.3195266272189348, + "high_lr": 0.0005357894736842105, + "low_lr": 1.071578947368421e-05, + "step": 882 + }, + { + "epoch": 2.3195266272189348, + "high_lr": 0.0005357894736842105, + "low_lr": 1.071578947368421e-05, + "step": 882 + }, + { + "epoch": 2.3195266272189348, + "high_lr": 0.0005357894736842105, + "low_lr": 1.071578947368421e-05, + "step": 882 + }, + { + "epoch": 2.3195266272189348, + "high_lr": 0.0005357894736842105, + "low_lr": 1.071578947368421e-05, + "step": 882 + }, + { + "epoch": 2.3195266272189348, + "high_lr": 0.0005357894736842105, + "low_lr": 1.071578947368421e-05, + "step": 882 + }, + { + "epoch": 2.3195266272189348, + "high_lr": 0.0005357894736842105, + "low_lr": 1.071578947368421e-05, + "step": 882 + }, + { + "epoch": 2.3195266272189348, + "high_lr": 0.0005357894736842105, + "low_lr": 1.071578947368421e-05, + "step": 882 + }, + { + "epoch": 2.3195266272189348, + "high_lr": 0.0005357894736842105, + "low_lr": 1.071578947368421e-05, + "step": 882 + }, + { + "epoch": 2.32215647600263, + "grad_norm": 1.2486226558685303, + "learning_rate": 0.0005352631578947369, + "loss": 1.3622, + "step": 883 + }, + { + "epoch": 2.32215647600263, + "high_lr": 0.0005352631578947369, + "low_lr": 1.0705263157894739e-05, + "step": 883 + }, + { + "epoch": 2.32215647600263, + "high_lr": 0.0005352631578947369, + "low_lr": 1.0705263157894739e-05, + "step": 883 + }, + { + "epoch": 2.32215647600263, + "high_lr": 0.0005352631578947369, + "low_lr": 1.0705263157894739e-05, + "step": 883 + }, + { + "epoch": 2.32215647600263, + "high_lr": 0.0005352631578947369, + "low_lr": 1.0705263157894739e-05, + "step": 883 + }, + { + "epoch": 2.32215647600263, + "high_lr": 0.0005352631578947369, + "low_lr": 1.0705263157894739e-05, + "step": 883 + }, + { + "epoch": 2.32215647600263, + "high_lr": 0.0005352631578947369, + "low_lr": 1.0705263157894739e-05, + "step": 883 + }, + { + "epoch": 2.32215647600263, + "high_lr": 0.0005352631578947369, + "low_lr": 1.0705263157894739e-05, + "step": 883 + }, + { + "epoch": 2.32215647600263, + "high_lr": 0.0005352631578947369, + "low_lr": 1.0705263157894739e-05, + "step": 883 + }, + { + "epoch": 2.324786324786325, + "grad_norm": 1.1381765604019165, + "learning_rate": 0.0005347368421052632, + "loss": 1.3331, + "step": 884 + }, + { + "epoch": 2.324786324786325, + "high_lr": 0.0005347368421052632, + "low_lr": 1.0694736842105265e-05, + "step": 884 + }, + { + "epoch": 2.324786324786325, + "high_lr": 0.0005347368421052632, + "low_lr": 1.0694736842105265e-05, + "step": 884 + }, + { + "epoch": 2.324786324786325, + "high_lr": 0.0005347368421052632, + "low_lr": 1.0694736842105265e-05, + "step": 884 + }, + { + "epoch": 2.324786324786325, + "high_lr": 0.0005347368421052632, + "low_lr": 1.0694736842105265e-05, + "step": 884 + }, + { + "epoch": 2.324786324786325, + "high_lr": 0.0005347368421052632, + "low_lr": 1.0694736842105265e-05, + "step": 884 + }, + { + "epoch": 2.324786324786325, + "high_lr": 0.0005347368421052632, + "low_lr": 1.0694736842105265e-05, + "step": 884 + }, + { + "epoch": 2.324786324786325, + "high_lr": 0.0005347368421052632, + "low_lr": 1.0694736842105265e-05, + "step": 884 + }, + { + "epoch": 2.324786324786325, + "high_lr": 0.0005347368421052632, + "low_lr": 1.0694736842105265e-05, + "step": 884 + }, + { + "epoch": 2.3274161735700196, + "grad_norm": 1.3162821531295776, + "learning_rate": 0.0005342105263157895, + "loss": 1.4184, + "step": 885 + }, + { + "epoch": 2.3274161735700196, + "high_lr": 0.0005342105263157895, + "low_lr": 1.068421052631579e-05, + "step": 885 + }, + { + "epoch": 2.3274161735700196, + "high_lr": 0.0005342105263157895, + "low_lr": 1.068421052631579e-05, + "step": 885 + }, + { + "epoch": 2.3274161735700196, + "high_lr": 0.0005342105263157895, + "low_lr": 1.068421052631579e-05, + "step": 885 + }, + { + "epoch": 2.3274161735700196, + "high_lr": 0.0005342105263157895, + "low_lr": 1.068421052631579e-05, + "step": 885 + }, + { + "epoch": 2.3274161735700196, + "high_lr": 0.0005342105263157895, + "low_lr": 1.068421052631579e-05, + "step": 885 + }, + { + "epoch": 2.3274161735700196, + "high_lr": 0.0005342105263157895, + "low_lr": 1.068421052631579e-05, + "step": 885 + }, + { + "epoch": 2.3274161735700196, + "high_lr": 0.0005342105263157895, + "low_lr": 1.068421052631579e-05, + "step": 885 + }, + { + "epoch": 2.3274161735700196, + "high_lr": 0.0005342105263157895, + "low_lr": 1.068421052631579e-05, + "step": 885 + }, + { + "epoch": 2.3300460223537147, + "grad_norm": 1.3376330137252808, + "learning_rate": 0.0005336842105263158, + "loss": 1.419, + "step": 886 + }, + { + "epoch": 2.3300460223537147, + "high_lr": 0.0005336842105263158, + "low_lr": 1.0673684210526316e-05, + "step": 886 + }, + { + "epoch": 2.3300460223537147, + "high_lr": 0.0005336842105263158, + "low_lr": 1.0673684210526316e-05, + "step": 886 + }, + { + "epoch": 2.3300460223537147, + "high_lr": 0.0005336842105263158, + "low_lr": 1.0673684210526316e-05, + "step": 886 + }, + { + "epoch": 2.3300460223537147, + "high_lr": 0.0005336842105263158, + "low_lr": 1.0673684210526316e-05, + "step": 886 + }, + { + "epoch": 2.3300460223537147, + "high_lr": 0.0005336842105263158, + "low_lr": 1.0673684210526316e-05, + "step": 886 + }, + { + "epoch": 2.3300460223537147, + "high_lr": 0.0005336842105263158, + "low_lr": 1.0673684210526316e-05, + "step": 886 + }, + { + "epoch": 2.3300460223537147, + "high_lr": 0.0005336842105263158, + "low_lr": 1.0673684210526316e-05, + "step": 886 + }, + { + "epoch": 2.3300460223537147, + "high_lr": 0.0005336842105263158, + "low_lr": 1.0673684210526316e-05, + "step": 886 + }, + { + "epoch": 2.3326758711374094, + "grad_norm": 1.2614970207214355, + "learning_rate": 0.000533157894736842, + "loss": 1.4318, + "step": 887 + }, + { + "epoch": 2.3326758711374094, + "high_lr": 0.000533157894736842, + "low_lr": 1.0663157894736842e-05, + "step": 887 + }, + { + "epoch": 2.3326758711374094, + "high_lr": 0.000533157894736842, + "low_lr": 1.0663157894736842e-05, + "step": 887 + }, + { + "epoch": 2.3326758711374094, + "high_lr": 0.000533157894736842, + "low_lr": 1.0663157894736842e-05, + "step": 887 + }, + { + "epoch": 2.3326758711374094, + "high_lr": 0.000533157894736842, + "low_lr": 1.0663157894736842e-05, + "step": 887 + }, + { + "epoch": 2.3326758711374094, + "high_lr": 0.000533157894736842, + "low_lr": 1.0663157894736842e-05, + "step": 887 + }, + { + "epoch": 2.3326758711374094, + "high_lr": 0.000533157894736842, + "low_lr": 1.0663157894736842e-05, + "step": 887 + }, + { + "epoch": 2.3326758711374094, + "high_lr": 0.000533157894736842, + "low_lr": 1.0663157894736842e-05, + "step": 887 + }, + { + "epoch": 2.3326758711374094, + "high_lr": 0.000533157894736842, + "low_lr": 1.0663157894736842e-05, + "step": 887 + }, + { + "epoch": 2.3353057199211045, + "grad_norm": 1.267763614654541, + "learning_rate": 0.0005326315789473684, + "loss": 1.3359, + "step": 888 + }, + { + "epoch": 2.3353057199211045, + "high_lr": 0.0005326315789473684, + "low_lr": 1.065263157894737e-05, + "step": 888 + }, + { + "epoch": 2.3353057199211045, + "high_lr": 0.0005326315789473684, + "low_lr": 1.065263157894737e-05, + "step": 888 + }, + { + "epoch": 2.3353057199211045, + "high_lr": 0.0005326315789473684, + "low_lr": 1.065263157894737e-05, + "step": 888 + }, + { + "epoch": 2.3353057199211045, + "high_lr": 0.0005326315789473684, + "low_lr": 1.065263157894737e-05, + "step": 888 + }, + { + "epoch": 2.3353057199211045, + "high_lr": 0.0005326315789473684, + "low_lr": 1.065263157894737e-05, + "step": 888 + }, + { + "epoch": 2.3353057199211045, + "high_lr": 0.0005326315789473684, + "low_lr": 1.065263157894737e-05, + "step": 888 + }, + { + "epoch": 2.3353057199211045, + "high_lr": 0.0005326315789473684, + "low_lr": 1.065263157894737e-05, + "step": 888 + }, + { + "epoch": 2.3353057199211045, + "high_lr": 0.0005326315789473684, + "low_lr": 1.065263157894737e-05, + "step": 888 + }, + { + "epoch": 2.3379355687047996, + "grad_norm": 1.2461472749710083, + "learning_rate": 0.0005321052631578947, + "loss": 1.4038, + "step": 889 + }, + { + "epoch": 2.3379355687047996, + "high_lr": 0.0005321052631578947, + "low_lr": 1.0642105263157897e-05, + "step": 889 + }, + { + "epoch": 2.3379355687047996, + "high_lr": 0.0005321052631578947, + "low_lr": 1.0642105263157897e-05, + "step": 889 + }, + { + "epoch": 2.3379355687047996, + "high_lr": 0.0005321052631578947, + "low_lr": 1.0642105263157897e-05, + "step": 889 + }, + { + "epoch": 2.3379355687047996, + "high_lr": 0.0005321052631578947, + "low_lr": 1.0642105263157897e-05, + "step": 889 + }, + { + "epoch": 2.3379355687047996, + "high_lr": 0.0005321052631578947, + "low_lr": 1.0642105263157897e-05, + "step": 889 + }, + { + "epoch": 2.3379355687047996, + "high_lr": 0.0005321052631578947, + "low_lr": 1.0642105263157897e-05, + "step": 889 + }, + { + "epoch": 2.3379355687047996, + "high_lr": 0.0005321052631578947, + "low_lr": 1.0642105263157897e-05, + "step": 889 + }, + { + "epoch": 2.3379355687047996, + "high_lr": 0.0005321052631578947, + "low_lr": 1.0642105263157897e-05, + "step": 889 + }, + { + "epoch": 2.3405654174884942, + "grad_norm": 1.2365288734436035, + "learning_rate": 0.000531578947368421, + "loss": 1.3389, + "step": 890 + }, + { + "epoch": 2.3405654174884942, + "high_lr": 0.000531578947368421, + "low_lr": 1.0631578947368421e-05, + "step": 890 + }, + { + "epoch": 2.3405654174884942, + "high_lr": 0.000531578947368421, + "low_lr": 1.0631578947368421e-05, + "step": 890 + }, + { + "epoch": 2.3405654174884942, + "high_lr": 0.000531578947368421, + "low_lr": 1.0631578947368421e-05, + "step": 890 + }, + { + "epoch": 2.3405654174884942, + "high_lr": 0.000531578947368421, + "low_lr": 1.0631578947368421e-05, + "step": 890 + }, + { + "epoch": 2.3405654174884942, + "high_lr": 0.000531578947368421, + "low_lr": 1.0631578947368421e-05, + "step": 890 + }, + { + "epoch": 2.3405654174884942, + "high_lr": 0.000531578947368421, + "low_lr": 1.0631578947368421e-05, + "step": 890 + }, + { + "epoch": 2.3405654174884942, + "high_lr": 0.000531578947368421, + "low_lr": 1.0631578947368421e-05, + "step": 890 + }, + { + "epoch": 2.3405654174884942, + "high_lr": 0.000531578947368421, + "low_lr": 1.0631578947368421e-05, + "step": 890 + }, + { + "epoch": 2.3431952662721893, + "grad_norm": 1.289572834968567, + "learning_rate": 0.0005310526315789473, + "loss": 1.4167, + "step": 891 + }, + { + "epoch": 2.3431952662721893, + "high_lr": 0.0005310526315789473, + "low_lr": 1.0621052631578948e-05, + "step": 891 + }, + { + "epoch": 2.3431952662721893, + "high_lr": 0.0005310526315789473, + "low_lr": 1.0621052631578948e-05, + "step": 891 + }, + { + "epoch": 2.3431952662721893, + "high_lr": 0.0005310526315789473, + "low_lr": 1.0621052631578948e-05, + "step": 891 + }, + { + "epoch": 2.3431952662721893, + "high_lr": 0.0005310526315789473, + "low_lr": 1.0621052631578948e-05, + "step": 891 + }, + { + "epoch": 2.3431952662721893, + "high_lr": 0.0005310526315789473, + "low_lr": 1.0621052631578948e-05, + "step": 891 + }, + { + "epoch": 2.3431952662721893, + "high_lr": 0.0005310526315789473, + "low_lr": 1.0621052631578948e-05, + "step": 891 + }, + { + "epoch": 2.3431952662721893, + "high_lr": 0.0005310526315789473, + "low_lr": 1.0621052631578948e-05, + "step": 891 + }, + { + "epoch": 2.3431952662721893, + "high_lr": 0.0005310526315789473, + "low_lr": 1.0621052631578948e-05, + "step": 891 + }, + { + "epoch": 2.3458251150558844, + "grad_norm": 1.2580618858337402, + "learning_rate": 0.0005305263157894737, + "loss": 1.4169, + "step": 892 + }, + { + "epoch": 2.3458251150558844, + "high_lr": 0.0005305263157894737, + "low_lr": 1.0610526315789476e-05, + "step": 892 + }, + { + "epoch": 2.3458251150558844, + "high_lr": 0.0005305263157894737, + "low_lr": 1.0610526315789476e-05, + "step": 892 + }, + { + "epoch": 2.3458251150558844, + "high_lr": 0.0005305263157894737, + "low_lr": 1.0610526315789476e-05, + "step": 892 + }, + { + "epoch": 2.3458251150558844, + "high_lr": 0.0005305263157894737, + "low_lr": 1.0610526315789476e-05, + "step": 892 + }, + { + "epoch": 2.3458251150558844, + "high_lr": 0.0005305263157894737, + "low_lr": 1.0610526315789476e-05, + "step": 892 + }, + { + "epoch": 2.3458251150558844, + "high_lr": 0.0005305263157894737, + "low_lr": 1.0610526315789476e-05, + "step": 892 + }, + { + "epoch": 2.3458251150558844, + "high_lr": 0.0005305263157894737, + "low_lr": 1.0610526315789476e-05, + "step": 892 + }, + { + "epoch": 2.3458251150558844, + "high_lr": 0.0005305263157894737, + "low_lr": 1.0610526315789476e-05, + "step": 892 + }, + { + "epoch": 2.348454963839579, + "grad_norm": 1.1528156995773315, + "learning_rate": 0.0005300000000000001, + "loss": 1.3834, + "step": 893 + }, + { + "epoch": 2.348454963839579, + "high_lr": 0.0005300000000000001, + "low_lr": 1.0600000000000002e-05, + "step": 893 + }, + { + "epoch": 2.348454963839579, + "high_lr": 0.0005300000000000001, + "low_lr": 1.0600000000000002e-05, + "step": 893 + }, + { + "epoch": 2.348454963839579, + "high_lr": 0.0005300000000000001, + "low_lr": 1.0600000000000002e-05, + "step": 893 + }, + { + "epoch": 2.348454963839579, + "high_lr": 0.0005300000000000001, + "low_lr": 1.0600000000000002e-05, + "step": 893 + }, + { + "epoch": 2.348454963839579, + "high_lr": 0.0005300000000000001, + "low_lr": 1.0600000000000002e-05, + "step": 893 + }, + { + "epoch": 2.348454963839579, + "high_lr": 0.0005300000000000001, + "low_lr": 1.0600000000000002e-05, + "step": 893 + }, + { + "epoch": 2.348454963839579, + "high_lr": 0.0005300000000000001, + "low_lr": 1.0600000000000002e-05, + "step": 893 + }, + { + "epoch": 2.348454963839579, + "high_lr": 0.0005300000000000001, + "low_lr": 1.0600000000000002e-05, + "step": 893 + }, + { + "epoch": 2.351084812623274, + "grad_norm": 1.290389895439148, + "learning_rate": 0.0005294736842105264, + "loss": 1.4016, + "step": 894 + }, + { + "epoch": 2.351084812623274, + "high_lr": 0.0005294736842105264, + "low_lr": 1.0589473684210526e-05, + "step": 894 + }, + { + "epoch": 2.351084812623274, + "high_lr": 0.0005294736842105264, + "low_lr": 1.0589473684210526e-05, + "step": 894 + }, + { + "epoch": 2.351084812623274, + "high_lr": 0.0005294736842105264, + "low_lr": 1.0589473684210526e-05, + "step": 894 + }, + { + "epoch": 2.351084812623274, + "high_lr": 0.0005294736842105264, + "low_lr": 1.0589473684210526e-05, + "step": 894 + }, + { + "epoch": 2.351084812623274, + "high_lr": 0.0005294736842105264, + "low_lr": 1.0589473684210526e-05, + "step": 894 + }, + { + "epoch": 2.351084812623274, + "high_lr": 0.0005294736842105264, + "low_lr": 1.0589473684210526e-05, + "step": 894 + }, + { + "epoch": 2.351084812623274, + "high_lr": 0.0005294736842105264, + "low_lr": 1.0589473684210526e-05, + "step": 894 + }, + { + "epoch": 2.351084812623274, + "high_lr": 0.0005294736842105264, + "low_lr": 1.0589473684210526e-05, + "step": 894 + }, + { + "epoch": 2.3537146614069693, + "grad_norm": 1.329153060913086, + "learning_rate": 0.0005289473684210527, + "loss": 1.4091, + "step": 895 + }, + { + "epoch": 2.3537146614069693, + "high_lr": 0.0005289473684210527, + "low_lr": 1.0578947368421053e-05, + "step": 895 + }, + { + "epoch": 2.3537146614069693, + "high_lr": 0.0005289473684210527, + "low_lr": 1.0578947368421053e-05, + "step": 895 + }, + { + "epoch": 2.3537146614069693, + "high_lr": 0.0005289473684210527, + "low_lr": 1.0578947368421053e-05, + "step": 895 + }, + { + "epoch": 2.3537146614069693, + "high_lr": 0.0005289473684210527, + "low_lr": 1.0578947368421053e-05, + "step": 895 + }, + { + "epoch": 2.3537146614069693, + "high_lr": 0.0005289473684210527, + "low_lr": 1.0578947368421053e-05, + "step": 895 + }, + { + "epoch": 2.3537146614069693, + "high_lr": 0.0005289473684210527, + "low_lr": 1.0578947368421053e-05, + "step": 895 + }, + { + "epoch": 2.3537146614069693, + "high_lr": 0.0005289473684210527, + "low_lr": 1.0578947368421053e-05, + "step": 895 + }, + { + "epoch": 2.3537146614069693, + "high_lr": 0.0005289473684210527, + "low_lr": 1.0578947368421053e-05, + "step": 895 + }, + { + "epoch": 2.356344510190664, + "grad_norm": 1.2408227920532227, + "learning_rate": 0.000528421052631579, + "loss": 1.3911, + "step": 896 + }, + { + "epoch": 2.356344510190664, + "high_lr": 0.000528421052631579, + "low_lr": 1.0568421052631579e-05, + "step": 896 + }, + { + "epoch": 2.356344510190664, + "high_lr": 0.000528421052631579, + "low_lr": 1.0568421052631579e-05, + "step": 896 + }, + { + "epoch": 2.356344510190664, + "high_lr": 0.000528421052631579, + "low_lr": 1.0568421052631579e-05, + "step": 896 + }, + { + "epoch": 2.356344510190664, + "high_lr": 0.000528421052631579, + "low_lr": 1.0568421052631579e-05, + "step": 896 + }, + { + "epoch": 2.356344510190664, + "high_lr": 0.000528421052631579, + "low_lr": 1.0568421052631579e-05, + "step": 896 + }, + { + "epoch": 2.356344510190664, + "high_lr": 0.000528421052631579, + "low_lr": 1.0568421052631579e-05, + "step": 896 + }, + { + "epoch": 2.356344510190664, + "high_lr": 0.000528421052631579, + "low_lr": 1.0568421052631579e-05, + "step": 896 + }, + { + "epoch": 2.356344510190664, + "high_lr": 0.000528421052631579, + "low_lr": 1.0568421052631579e-05, + "step": 896 + }, + { + "epoch": 2.358974358974359, + "grad_norm": 1.286737084388733, + "learning_rate": 0.0005278947368421053, + "loss": 1.4165, + "step": 897 + }, + { + "epoch": 2.358974358974359, + "high_lr": 0.0005278947368421053, + "low_lr": 1.0557894736842107e-05, + "step": 897 + }, + { + "epoch": 2.358974358974359, + "high_lr": 0.0005278947368421053, + "low_lr": 1.0557894736842107e-05, + "step": 897 + }, + { + "epoch": 2.358974358974359, + "high_lr": 0.0005278947368421053, + "low_lr": 1.0557894736842107e-05, + "step": 897 + }, + { + "epoch": 2.358974358974359, + "high_lr": 0.0005278947368421053, + "low_lr": 1.0557894736842107e-05, + "step": 897 + }, + { + "epoch": 2.358974358974359, + "high_lr": 0.0005278947368421053, + "low_lr": 1.0557894736842107e-05, + "step": 897 + }, + { + "epoch": 2.358974358974359, + "high_lr": 0.0005278947368421053, + "low_lr": 1.0557894736842107e-05, + "step": 897 + }, + { + "epoch": 2.358974358974359, + "high_lr": 0.0005278947368421053, + "low_lr": 1.0557894736842107e-05, + "step": 897 + }, + { + "epoch": 2.358974358974359, + "high_lr": 0.0005278947368421053, + "low_lr": 1.0557894736842107e-05, + "step": 897 + }, + { + "epoch": 2.361604207758054, + "grad_norm": 1.2762736082077026, + "learning_rate": 0.0005273684210526316, + "loss": 1.403, + "step": 898 + }, + { + "epoch": 2.361604207758054, + "high_lr": 0.0005273684210526316, + "low_lr": 1.0547368421052633e-05, + "step": 898 + }, + { + "epoch": 2.361604207758054, + "high_lr": 0.0005273684210526316, + "low_lr": 1.0547368421052633e-05, + "step": 898 + }, + { + "epoch": 2.361604207758054, + "high_lr": 0.0005273684210526316, + "low_lr": 1.0547368421052633e-05, + "step": 898 + }, + { + "epoch": 2.361604207758054, + "high_lr": 0.0005273684210526316, + "low_lr": 1.0547368421052633e-05, + "step": 898 + }, + { + "epoch": 2.361604207758054, + "high_lr": 0.0005273684210526316, + "low_lr": 1.0547368421052633e-05, + "step": 898 + }, + { + "epoch": 2.361604207758054, + "high_lr": 0.0005273684210526316, + "low_lr": 1.0547368421052633e-05, + "step": 898 + }, + { + "epoch": 2.361604207758054, + "high_lr": 0.0005273684210526316, + "low_lr": 1.0547368421052633e-05, + "step": 898 + }, + { + "epoch": 2.361604207758054, + "high_lr": 0.0005273684210526316, + "low_lr": 1.0547368421052633e-05, + "step": 898 + }, + { + "epoch": 2.364234056541749, + "grad_norm": 1.1602425575256348, + "learning_rate": 0.0005268421052631579, + "loss": 1.3799, + "step": 899 + }, + { + "epoch": 2.364234056541749, + "high_lr": 0.0005268421052631579, + "low_lr": 1.0536842105263158e-05, + "step": 899 + }, + { + "epoch": 2.364234056541749, + "high_lr": 0.0005268421052631579, + "low_lr": 1.0536842105263158e-05, + "step": 899 + }, + { + "epoch": 2.364234056541749, + "high_lr": 0.0005268421052631579, + "low_lr": 1.0536842105263158e-05, + "step": 899 + }, + { + "epoch": 2.364234056541749, + "high_lr": 0.0005268421052631579, + "low_lr": 1.0536842105263158e-05, + "step": 899 + }, + { + "epoch": 2.364234056541749, + "high_lr": 0.0005268421052631579, + "low_lr": 1.0536842105263158e-05, + "step": 899 + }, + { + "epoch": 2.364234056541749, + "high_lr": 0.0005268421052631579, + "low_lr": 1.0536842105263158e-05, + "step": 899 + }, + { + "epoch": 2.364234056541749, + "high_lr": 0.0005268421052631579, + "low_lr": 1.0536842105263158e-05, + "step": 899 + }, + { + "epoch": 2.364234056541749, + "high_lr": 0.0005268421052631579, + "low_lr": 1.0536842105263158e-05, + "step": 899 + }, + { + "epoch": 2.366863905325444, + "grad_norm": 1.2419919967651367, + "learning_rate": 0.0005263157894736842, + "loss": 1.3534, + "step": 900 + }, + { + "epoch": 2.366863905325444, + "high_lr": 0.0005263157894736842, + "low_lr": 1.0526315789473684e-05, + "step": 900 + }, + { + "epoch": 2.366863905325444, + "high_lr": 0.0005263157894736842, + "low_lr": 1.0526315789473684e-05, + "step": 900 + }, + { + "epoch": 2.366863905325444, + "high_lr": 0.0005263157894736842, + "low_lr": 1.0526315789473684e-05, + "step": 900 + }, + { + "epoch": 2.366863905325444, + "high_lr": 0.0005263157894736842, + "low_lr": 1.0526315789473684e-05, + "step": 900 + }, + { + "epoch": 2.366863905325444, + "high_lr": 0.0005263157894736842, + "low_lr": 1.0526315789473684e-05, + "step": 900 + }, + { + "epoch": 2.366863905325444, + "high_lr": 0.0005263157894736842, + "low_lr": 1.0526315789473684e-05, + "step": 900 + }, + { + "epoch": 2.366863905325444, + "high_lr": 0.0005263157894736842, + "low_lr": 1.0526315789473684e-05, + "step": 900 + }, + { + "epoch": 2.366863905325444, + "high_lr": 0.0005263157894736842, + "low_lr": 1.0526315789473684e-05, + "step": 900 + }, + { + "epoch": 2.3694937541091385, + "grad_norm": 1.212281346321106, + "learning_rate": 0.0005257894736842105, + "loss": 1.3761, + "step": 901 + }, + { + "epoch": 2.3694937541091385, + "high_lr": 0.0005257894736842105, + "low_lr": 1.051578947368421e-05, + "step": 901 + }, + { + "epoch": 2.3694937541091385, + "high_lr": 0.0005257894736842105, + "low_lr": 1.051578947368421e-05, + "step": 901 + }, + { + "epoch": 2.3694937541091385, + "high_lr": 0.0005257894736842105, + "low_lr": 1.051578947368421e-05, + "step": 901 + }, + { + "epoch": 2.3694937541091385, + "high_lr": 0.0005257894736842105, + "low_lr": 1.051578947368421e-05, + "step": 901 + }, + { + "epoch": 2.3694937541091385, + "high_lr": 0.0005257894736842105, + "low_lr": 1.051578947368421e-05, + "step": 901 + }, + { + "epoch": 2.3694937541091385, + "high_lr": 0.0005257894736842105, + "low_lr": 1.051578947368421e-05, + "step": 901 + }, + { + "epoch": 2.3694937541091385, + "high_lr": 0.0005257894736842105, + "low_lr": 1.051578947368421e-05, + "step": 901 + }, + { + "epoch": 2.3694937541091385, + "high_lr": 0.0005257894736842105, + "low_lr": 1.051578947368421e-05, + "step": 901 + }, + { + "epoch": 2.3721236028928336, + "grad_norm": 1.226370930671692, + "learning_rate": 0.0005252631578947369, + "loss": 1.3435, + "step": 902 + }, + { + "epoch": 2.3721236028928336, + "high_lr": 0.0005252631578947369, + "low_lr": 1.0505263157894739e-05, + "step": 902 + }, + { + "epoch": 2.3721236028928336, + "high_lr": 0.0005252631578947369, + "low_lr": 1.0505263157894739e-05, + "step": 902 + }, + { + "epoch": 2.3721236028928336, + "high_lr": 0.0005252631578947369, + "low_lr": 1.0505263157894739e-05, + "step": 902 + }, + { + "epoch": 2.3721236028928336, + "high_lr": 0.0005252631578947369, + "low_lr": 1.0505263157894739e-05, + "step": 902 + }, + { + "epoch": 2.3721236028928336, + "high_lr": 0.0005252631578947369, + "low_lr": 1.0505263157894739e-05, + "step": 902 + }, + { + "epoch": 2.3721236028928336, + "high_lr": 0.0005252631578947369, + "low_lr": 1.0505263157894739e-05, + "step": 902 + }, + { + "epoch": 2.3721236028928336, + "high_lr": 0.0005252631578947369, + "low_lr": 1.0505263157894739e-05, + "step": 902 + }, + { + "epoch": 2.3721236028928336, + "high_lr": 0.0005252631578947369, + "low_lr": 1.0505263157894739e-05, + "step": 902 + }, + { + "epoch": 2.3747534516765287, + "grad_norm": 1.2944639921188354, + "learning_rate": 0.0005247368421052632, + "loss": 1.4133, + "step": 903 + }, + { + "epoch": 2.3747534516765287, + "high_lr": 0.0005247368421052632, + "low_lr": 1.0494736842105263e-05, + "step": 903 + }, + { + "epoch": 2.3747534516765287, + "high_lr": 0.0005247368421052632, + "low_lr": 1.0494736842105263e-05, + "step": 903 + }, + { + "epoch": 2.3747534516765287, + "high_lr": 0.0005247368421052632, + "low_lr": 1.0494736842105263e-05, + "step": 903 + }, + { + "epoch": 2.3747534516765287, + "high_lr": 0.0005247368421052632, + "low_lr": 1.0494736842105263e-05, + "step": 903 + }, + { + "epoch": 2.3747534516765287, + "high_lr": 0.0005247368421052632, + "low_lr": 1.0494736842105263e-05, + "step": 903 + }, + { + "epoch": 2.3747534516765287, + "high_lr": 0.0005247368421052632, + "low_lr": 1.0494736842105263e-05, + "step": 903 + }, + { + "epoch": 2.3747534516765287, + "high_lr": 0.0005247368421052632, + "low_lr": 1.0494736842105263e-05, + "step": 903 + }, + { + "epoch": 2.3747534516765287, + "high_lr": 0.0005247368421052632, + "low_lr": 1.0494736842105263e-05, + "step": 903 + }, + { + "epoch": 2.3773833004602234, + "grad_norm": 1.2294273376464844, + "learning_rate": 0.0005242105263157895, + "loss": 1.3538, + "step": 904 + }, + { + "epoch": 2.3773833004602234, + "high_lr": 0.0005242105263157895, + "low_lr": 1.048421052631579e-05, + "step": 904 + }, + { + "epoch": 2.3773833004602234, + "high_lr": 0.0005242105263157895, + "low_lr": 1.048421052631579e-05, + "step": 904 + }, + { + "epoch": 2.3773833004602234, + "high_lr": 0.0005242105263157895, + "low_lr": 1.048421052631579e-05, + "step": 904 + }, + { + "epoch": 2.3773833004602234, + "high_lr": 0.0005242105263157895, + "low_lr": 1.048421052631579e-05, + "step": 904 + }, + { + "epoch": 2.3773833004602234, + "high_lr": 0.0005242105263157895, + "low_lr": 1.048421052631579e-05, + "step": 904 + }, + { + "epoch": 2.3773833004602234, + "high_lr": 0.0005242105263157895, + "low_lr": 1.048421052631579e-05, + "step": 904 + }, + { + "epoch": 2.3773833004602234, + "high_lr": 0.0005242105263157895, + "low_lr": 1.048421052631579e-05, + "step": 904 + }, + { + "epoch": 2.3773833004602234, + "high_lr": 0.0005242105263157895, + "low_lr": 1.048421052631579e-05, + "step": 904 + }, + { + "epoch": 2.3800131492439185, + "grad_norm": 1.2078107595443726, + "learning_rate": 0.0005236842105263157, + "loss": 1.3645, + "step": 905 + }, + { + "epoch": 2.3800131492439185, + "high_lr": 0.0005236842105263157, + "low_lr": 1.0473684210526316e-05, + "step": 905 + }, + { + "epoch": 2.3800131492439185, + "high_lr": 0.0005236842105263157, + "low_lr": 1.0473684210526316e-05, + "step": 905 + }, + { + "epoch": 2.3800131492439185, + "high_lr": 0.0005236842105263157, + "low_lr": 1.0473684210526316e-05, + "step": 905 + }, + { + "epoch": 2.3800131492439185, + "high_lr": 0.0005236842105263157, + "low_lr": 1.0473684210526316e-05, + "step": 905 + }, + { + "epoch": 2.3800131492439185, + "high_lr": 0.0005236842105263157, + "low_lr": 1.0473684210526316e-05, + "step": 905 + }, + { + "epoch": 2.3800131492439185, + "high_lr": 0.0005236842105263157, + "low_lr": 1.0473684210526316e-05, + "step": 905 + }, + { + "epoch": 2.3800131492439185, + "high_lr": 0.0005236842105263157, + "low_lr": 1.0473684210526316e-05, + "step": 905 + }, + { + "epoch": 2.3800131492439185, + "high_lr": 0.0005236842105263157, + "low_lr": 1.0473684210526316e-05, + "step": 905 + }, + { + "epoch": 2.3826429980276136, + "grad_norm": 1.2253119945526123, + "learning_rate": 0.0005231578947368421, + "loss": 1.3828, + "step": 906 + }, + { + "epoch": 2.3826429980276136, + "high_lr": 0.0005231578947368421, + "low_lr": 1.0463157894736844e-05, + "step": 906 + }, + { + "epoch": 2.3826429980276136, + "high_lr": 0.0005231578947368421, + "low_lr": 1.0463157894736844e-05, + "step": 906 + }, + { + "epoch": 2.3826429980276136, + "high_lr": 0.0005231578947368421, + "low_lr": 1.0463157894736844e-05, + "step": 906 + }, + { + "epoch": 2.3826429980276136, + "high_lr": 0.0005231578947368421, + "low_lr": 1.0463157894736844e-05, + "step": 906 + }, + { + "epoch": 2.3826429980276136, + "high_lr": 0.0005231578947368421, + "low_lr": 1.0463157894736844e-05, + "step": 906 + }, + { + "epoch": 2.3826429980276136, + "high_lr": 0.0005231578947368421, + "low_lr": 1.0463157894736844e-05, + "step": 906 + }, + { + "epoch": 2.3826429980276136, + "high_lr": 0.0005231578947368421, + "low_lr": 1.0463157894736844e-05, + "step": 906 + }, + { + "epoch": 2.3826429980276136, + "high_lr": 0.0005231578947368421, + "low_lr": 1.0463157894736844e-05, + "step": 906 + }, + { + "epoch": 2.3852728468113082, + "grad_norm": 1.2381949424743652, + "learning_rate": 0.0005226315789473684, + "loss": 1.4067, + "step": 907 + }, + { + "epoch": 2.3852728468113082, + "high_lr": 0.0005226315789473684, + "low_lr": 1.045263157894737e-05, + "step": 907 + }, + { + "epoch": 2.3852728468113082, + "high_lr": 0.0005226315789473684, + "low_lr": 1.045263157894737e-05, + "step": 907 + }, + { + "epoch": 2.3852728468113082, + "high_lr": 0.0005226315789473684, + "low_lr": 1.045263157894737e-05, + "step": 907 + }, + { + "epoch": 2.3852728468113082, + "high_lr": 0.0005226315789473684, + "low_lr": 1.045263157894737e-05, + "step": 907 + }, + { + "epoch": 2.3852728468113082, + "high_lr": 0.0005226315789473684, + "low_lr": 1.045263157894737e-05, + "step": 907 + }, + { + "epoch": 2.3852728468113082, + "high_lr": 0.0005226315789473684, + "low_lr": 1.045263157894737e-05, + "step": 907 + }, + { + "epoch": 2.3852728468113082, + "high_lr": 0.0005226315789473684, + "low_lr": 1.045263157894737e-05, + "step": 907 + }, + { + "epoch": 2.3852728468113082, + "high_lr": 0.0005226315789473684, + "low_lr": 1.045263157894737e-05, + "step": 907 + }, + { + "epoch": 2.3879026955950033, + "grad_norm": 1.351272463798523, + "learning_rate": 0.0005221052631578947, + "loss": 1.4329, + "step": 908 + }, + { + "epoch": 2.3879026955950033, + "high_lr": 0.0005221052631578947, + "low_lr": 1.0442105263157895e-05, + "step": 908 + }, + { + "epoch": 2.3879026955950033, + "high_lr": 0.0005221052631578947, + "low_lr": 1.0442105263157895e-05, + "step": 908 + }, + { + "epoch": 2.3879026955950033, + "high_lr": 0.0005221052631578947, + "low_lr": 1.0442105263157895e-05, + "step": 908 + }, + { + "epoch": 2.3879026955950033, + "high_lr": 0.0005221052631578947, + "low_lr": 1.0442105263157895e-05, + "step": 908 + }, + { + "epoch": 2.3879026955950033, + "high_lr": 0.0005221052631578947, + "low_lr": 1.0442105263157895e-05, + "step": 908 + }, + { + "epoch": 2.3879026955950033, + "high_lr": 0.0005221052631578947, + "low_lr": 1.0442105263157895e-05, + "step": 908 + }, + { + "epoch": 2.3879026955950033, + "high_lr": 0.0005221052631578947, + "low_lr": 1.0442105263157895e-05, + "step": 908 + }, + { + "epoch": 2.3879026955950033, + "high_lr": 0.0005221052631578947, + "low_lr": 1.0442105263157895e-05, + "step": 908 + }, + { + "epoch": 2.390532544378698, + "grad_norm": 1.2670356035232544, + "learning_rate": 0.000521578947368421, + "loss": 1.3633, + "step": 909 + }, + { + "epoch": 2.390532544378698, + "high_lr": 0.000521578947368421, + "low_lr": 1.0431578947368421e-05, + "step": 909 + }, + { + "epoch": 2.390532544378698, + "high_lr": 0.000521578947368421, + "low_lr": 1.0431578947368421e-05, + "step": 909 + }, + { + "epoch": 2.390532544378698, + "high_lr": 0.000521578947368421, + "low_lr": 1.0431578947368421e-05, + "step": 909 + }, + { + "epoch": 2.390532544378698, + "high_lr": 0.000521578947368421, + "low_lr": 1.0431578947368421e-05, + "step": 909 + }, + { + "epoch": 2.390532544378698, + "high_lr": 0.000521578947368421, + "low_lr": 1.0431578947368421e-05, + "step": 909 + }, + { + "epoch": 2.390532544378698, + "high_lr": 0.000521578947368421, + "low_lr": 1.0431578947368421e-05, + "step": 909 + }, + { + "epoch": 2.390532544378698, + "high_lr": 0.000521578947368421, + "low_lr": 1.0431578947368421e-05, + "step": 909 + }, + { + "epoch": 2.390532544378698, + "high_lr": 0.000521578947368421, + "low_lr": 1.0431578947368421e-05, + "step": 909 + }, + { + "epoch": 2.393162393162393, + "grad_norm": 1.2720977067947388, + "learning_rate": 0.0005210526315789474, + "loss": 1.3497, + "step": 910 + }, + { + "epoch": 2.393162393162393, + "high_lr": 0.0005210526315789474, + "low_lr": 1.0421052631578948e-05, + "step": 910 + }, + { + "epoch": 2.393162393162393, + "high_lr": 0.0005210526315789474, + "low_lr": 1.0421052631578948e-05, + "step": 910 + }, + { + "epoch": 2.393162393162393, + "high_lr": 0.0005210526315789474, + "low_lr": 1.0421052631578948e-05, + "step": 910 + }, + { + "epoch": 2.393162393162393, + "high_lr": 0.0005210526315789474, + "low_lr": 1.0421052631578948e-05, + "step": 910 + }, + { + "epoch": 2.393162393162393, + "high_lr": 0.0005210526315789474, + "low_lr": 1.0421052631578948e-05, + "step": 910 + }, + { + "epoch": 2.393162393162393, + "high_lr": 0.0005210526315789474, + "low_lr": 1.0421052631578948e-05, + "step": 910 + }, + { + "epoch": 2.393162393162393, + "high_lr": 0.0005210526315789474, + "low_lr": 1.0421052631578948e-05, + "step": 910 + }, + { + "epoch": 2.393162393162393, + "high_lr": 0.0005210526315789474, + "low_lr": 1.0421052631578948e-05, + "step": 910 + }, + { + "epoch": 2.395792241946088, + "grad_norm": 1.2801196575164795, + "learning_rate": 0.0005205263157894738, + "loss": 1.417, + "step": 911 + }, + { + "epoch": 2.395792241946088, + "high_lr": 0.0005205263157894738, + "low_lr": 1.0410526315789476e-05, + "step": 911 + }, + { + "epoch": 2.395792241946088, + "high_lr": 0.0005205263157894738, + "low_lr": 1.0410526315789476e-05, + "step": 911 + }, + { + "epoch": 2.395792241946088, + "high_lr": 0.0005205263157894738, + "low_lr": 1.0410526315789476e-05, + "step": 911 + }, + { + "epoch": 2.395792241946088, + "high_lr": 0.0005205263157894738, + "low_lr": 1.0410526315789476e-05, + "step": 911 + }, + { + "epoch": 2.395792241946088, + "high_lr": 0.0005205263157894738, + "low_lr": 1.0410526315789476e-05, + "step": 911 + }, + { + "epoch": 2.395792241946088, + "high_lr": 0.0005205263157894738, + "low_lr": 1.0410526315789476e-05, + "step": 911 + }, + { + "epoch": 2.395792241946088, + "high_lr": 0.0005205263157894738, + "low_lr": 1.0410526315789476e-05, + "step": 911 + }, + { + "epoch": 2.395792241946088, + "high_lr": 0.0005205263157894738, + "low_lr": 1.0410526315789476e-05, + "step": 911 + }, + { + "epoch": 2.398422090729783, + "grad_norm": 1.3025861978530884, + "learning_rate": 0.0005200000000000001, + "loss": 1.4148, + "step": 912 + }, + { + "epoch": 2.398422090729783, + "high_lr": 0.0005200000000000001, + "low_lr": 1.04e-05, + "step": 912 + }, + { + "epoch": 2.398422090729783, + "high_lr": 0.0005200000000000001, + "low_lr": 1.04e-05, + "step": 912 + }, + { + "epoch": 2.398422090729783, + "high_lr": 0.0005200000000000001, + "low_lr": 1.04e-05, + "step": 912 + }, + { + "epoch": 2.398422090729783, + "high_lr": 0.0005200000000000001, + "low_lr": 1.04e-05, + "step": 912 + }, + { + "epoch": 2.398422090729783, + "high_lr": 0.0005200000000000001, + "low_lr": 1.04e-05, + "step": 912 + }, + { + "epoch": 2.398422090729783, + "high_lr": 0.0005200000000000001, + "low_lr": 1.04e-05, + "step": 912 + }, + { + "epoch": 2.398422090729783, + "high_lr": 0.0005200000000000001, + "low_lr": 1.04e-05, + "step": 912 + }, + { + "epoch": 2.398422090729783, + "high_lr": 0.0005200000000000001, + "low_lr": 1.04e-05, + "step": 912 + }, + { + "epoch": 2.401051939513478, + "grad_norm": 1.2210677862167358, + "learning_rate": 0.0005194736842105263, + "loss": 1.34, + "step": 913 + }, + { + "epoch": 2.401051939513478, + "high_lr": 0.0005194736842105263, + "low_lr": 1.0389473684210527e-05, + "step": 913 + }, + { + "epoch": 2.401051939513478, + "high_lr": 0.0005194736842105263, + "low_lr": 1.0389473684210527e-05, + "step": 913 + }, + { + "epoch": 2.401051939513478, + "high_lr": 0.0005194736842105263, + "low_lr": 1.0389473684210527e-05, + "step": 913 + }, + { + "epoch": 2.401051939513478, + "high_lr": 0.0005194736842105263, + "low_lr": 1.0389473684210527e-05, + "step": 913 + }, + { + "epoch": 2.401051939513478, + "high_lr": 0.0005194736842105263, + "low_lr": 1.0389473684210527e-05, + "step": 913 + }, + { + "epoch": 2.401051939513478, + "high_lr": 0.0005194736842105263, + "low_lr": 1.0389473684210527e-05, + "step": 913 + }, + { + "epoch": 2.401051939513478, + "high_lr": 0.0005194736842105263, + "low_lr": 1.0389473684210527e-05, + "step": 913 + }, + { + "epoch": 2.401051939513478, + "high_lr": 0.0005194736842105263, + "low_lr": 1.0389473684210527e-05, + "step": 913 + }, + { + "epoch": 2.403681788297173, + "grad_norm": 1.3378238677978516, + "learning_rate": 0.0005189473684210526, + "loss": 1.3899, + "step": 914 + }, + { + "epoch": 2.403681788297173, + "high_lr": 0.0005189473684210526, + "low_lr": 1.0378947368421053e-05, + "step": 914 + }, + { + "epoch": 2.403681788297173, + "high_lr": 0.0005189473684210526, + "low_lr": 1.0378947368421053e-05, + "step": 914 + }, + { + "epoch": 2.403681788297173, + "high_lr": 0.0005189473684210526, + "low_lr": 1.0378947368421053e-05, + "step": 914 + }, + { + "epoch": 2.403681788297173, + "high_lr": 0.0005189473684210526, + "low_lr": 1.0378947368421053e-05, + "step": 914 + }, + { + "epoch": 2.403681788297173, + "high_lr": 0.0005189473684210526, + "low_lr": 1.0378947368421053e-05, + "step": 914 + }, + { + "epoch": 2.403681788297173, + "high_lr": 0.0005189473684210526, + "low_lr": 1.0378947368421053e-05, + "step": 914 + }, + { + "epoch": 2.403681788297173, + "high_lr": 0.0005189473684210526, + "low_lr": 1.0378947368421053e-05, + "step": 914 + }, + { + "epoch": 2.403681788297173, + "high_lr": 0.0005189473684210526, + "low_lr": 1.0378947368421053e-05, + "step": 914 + }, + { + "epoch": 2.4063116370808677, + "grad_norm": 1.3250514268875122, + "learning_rate": 0.0005184210526315789, + "loss": 1.4164, + "step": 915 + }, + { + "epoch": 2.4063116370808677, + "high_lr": 0.0005184210526315789, + "low_lr": 1.036842105263158e-05, + "step": 915 + }, + { + "epoch": 2.4063116370808677, + "high_lr": 0.0005184210526315789, + "low_lr": 1.036842105263158e-05, + "step": 915 + }, + { + "epoch": 2.4063116370808677, + "high_lr": 0.0005184210526315789, + "low_lr": 1.036842105263158e-05, + "step": 915 + }, + { + "epoch": 2.4063116370808677, + "high_lr": 0.0005184210526315789, + "low_lr": 1.036842105263158e-05, + "step": 915 + }, + { + "epoch": 2.4063116370808677, + "high_lr": 0.0005184210526315789, + "low_lr": 1.036842105263158e-05, + "step": 915 + }, + { + "epoch": 2.4063116370808677, + "high_lr": 0.0005184210526315789, + "low_lr": 1.036842105263158e-05, + "step": 915 + }, + { + "epoch": 2.4063116370808677, + "high_lr": 0.0005184210526315789, + "low_lr": 1.036842105263158e-05, + "step": 915 + }, + { + "epoch": 2.4063116370808677, + "high_lr": 0.0005184210526315789, + "low_lr": 1.036842105263158e-05, + "step": 915 + }, + { + "epoch": 2.408941485864563, + "grad_norm": 1.3091120719909668, + "learning_rate": 0.0005178947368421053, + "loss": 1.3754, + "step": 916 + }, + { + "epoch": 2.408941485864563, + "high_lr": 0.0005178947368421053, + "low_lr": 1.0357894736842107e-05, + "step": 916 + }, + { + "epoch": 2.408941485864563, + "high_lr": 0.0005178947368421053, + "low_lr": 1.0357894736842107e-05, + "step": 916 + }, + { + "epoch": 2.408941485864563, + "high_lr": 0.0005178947368421053, + "low_lr": 1.0357894736842107e-05, + "step": 916 + }, + { + "epoch": 2.408941485864563, + "high_lr": 0.0005178947368421053, + "low_lr": 1.0357894736842107e-05, + "step": 916 + }, + { + "epoch": 2.408941485864563, + "high_lr": 0.0005178947368421053, + "low_lr": 1.0357894736842107e-05, + "step": 916 + }, + { + "epoch": 2.408941485864563, + "high_lr": 0.0005178947368421053, + "low_lr": 1.0357894736842107e-05, + "step": 916 + }, + { + "epoch": 2.408941485864563, + "high_lr": 0.0005178947368421053, + "low_lr": 1.0357894736842107e-05, + "step": 916 + }, + { + "epoch": 2.408941485864563, + "high_lr": 0.0005178947368421053, + "low_lr": 1.0357894736842107e-05, + "step": 916 + }, + { + "epoch": 2.411571334648258, + "grad_norm": 1.2084747552871704, + "learning_rate": 0.0005173684210526316, + "loss": 1.3484, + "step": 917 + }, + { + "epoch": 2.411571334648258, + "high_lr": 0.0005173684210526316, + "low_lr": 1.0347368421052632e-05, + "step": 917 + }, + { + "epoch": 2.411571334648258, + "high_lr": 0.0005173684210526316, + "low_lr": 1.0347368421052632e-05, + "step": 917 + }, + { + "epoch": 2.411571334648258, + "high_lr": 0.0005173684210526316, + "low_lr": 1.0347368421052632e-05, + "step": 917 + }, + { + "epoch": 2.411571334648258, + "high_lr": 0.0005173684210526316, + "low_lr": 1.0347368421052632e-05, + "step": 917 + }, + { + "epoch": 2.411571334648258, + "high_lr": 0.0005173684210526316, + "low_lr": 1.0347368421052632e-05, + "step": 917 + }, + { + "epoch": 2.411571334648258, + "high_lr": 0.0005173684210526316, + "low_lr": 1.0347368421052632e-05, + "step": 917 + }, + { + "epoch": 2.411571334648258, + "high_lr": 0.0005173684210526316, + "low_lr": 1.0347368421052632e-05, + "step": 917 + }, + { + "epoch": 2.411571334648258, + "high_lr": 0.0005173684210526316, + "low_lr": 1.0347368421052632e-05, + "step": 917 + }, + { + "epoch": 2.4142011834319526, + "grad_norm": 1.3047235012054443, + "learning_rate": 0.0005168421052631579, + "loss": 1.3769, + "step": 918 + }, + { + "epoch": 2.4142011834319526, + "high_lr": 0.0005168421052631579, + "low_lr": 1.0336842105263158e-05, + "step": 918 + }, + { + "epoch": 2.4142011834319526, + "high_lr": 0.0005168421052631579, + "low_lr": 1.0336842105263158e-05, + "step": 918 + }, + { + "epoch": 2.4142011834319526, + "high_lr": 0.0005168421052631579, + "low_lr": 1.0336842105263158e-05, + "step": 918 + }, + { + "epoch": 2.4142011834319526, + "high_lr": 0.0005168421052631579, + "low_lr": 1.0336842105263158e-05, + "step": 918 + }, + { + "epoch": 2.4142011834319526, + "high_lr": 0.0005168421052631579, + "low_lr": 1.0336842105263158e-05, + "step": 918 + }, + { + "epoch": 2.4142011834319526, + "high_lr": 0.0005168421052631579, + "low_lr": 1.0336842105263158e-05, + "step": 918 + }, + { + "epoch": 2.4142011834319526, + "high_lr": 0.0005168421052631579, + "low_lr": 1.0336842105263158e-05, + "step": 918 + }, + { + "epoch": 2.4142011834319526, + "high_lr": 0.0005168421052631579, + "low_lr": 1.0336842105263158e-05, + "step": 918 + }, + { + "epoch": 2.4168310322156477, + "grad_norm": 1.2231842279434204, + "learning_rate": 0.0005163157894736842, + "loss": 1.3473, + "step": 919 + }, + { + "epoch": 2.4168310322156477, + "high_lr": 0.0005163157894736842, + "low_lr": 1.0326315789473685e-05, + "step": 919 + }, + { + "epoch": 2.4168310322156477, + "high_lr": 0.0005163157894736842, + "low_lr": 1.0326315789473685e-05, + "step": 919 + }, + { + "epoch": 2.4168310322156477, + "high_lr": 0.0005163157894736842, + "low_lr": 1.0326315789473685e-05, + "step": 919 + }, + { + "epoch": 2.4168310322156477, + "high_lr": 0.0005163157894736842, + "low_lr": 1.0326315789473685e-05, + "step": 919 + }, + { + "epoch": 2.4168310322156477, + "high_lr": 0.0005163157894736842, + "low_lr": 1.0326315789473685e-05, + "step": 919 + }, + { + "epoch": 2.4168310322156477, + "high_lr": 0.0005163157894736842, + "low_lr": 1.0326315789473685e-05, + "step": 919 + }, + { + "epoch": 2.4168310322156477, + "high_lr": 0.0005163157894736842, + "low_lr": 1.0326315789473685e-05, + "step": 919 + }, + { + "epoch": 2.4168310322156477, + "high_lr": 0.0005163157894736842, + "low_lr": 1.0326315789473685e-05, + "step": 919 + }, + { + "epoch": 2.4194608809993428, + "grad_norm": 1.344902515411377, + "learning_rate": 0.0005157894736842106, + "loss": 1.4027, + "step": 920 + }, + { + "epoch": 2.4194608809993428, + "high_lr": 0.0005157894736842106, + "low_lr": 1.0315789473684213e-05, + "step": 920 + }, + { + "epoch": 2.4194608809993428, + "high_lr": 0.0005157894736842106, + "low_lr": 1.0315789473684213e-05, + "step": 920 + }, + { + "epoch": 2.4194608809993428, + "high_lr": 0.0005157894736842106, + "low_lr": 1.0315789473684213e-05, + "step": 920 + }, + { + "epoch": 2.4194608809993428, + "high_lr": 0.0005157894736842106, + "low_lr": 1.0315789473684213e-05, + "step": 920 + }, + { + "epoch": 2.4194608809993428, + "high_lr": 0.0005157894736842106, + "low_lr": 1.0315789473684213e-05, + "step": 920 + }, + { + "epoch": 2.4194608809993428, + "high_lr": 0.0005157894736842106, + "low_lr": 1.0315789473684213e-05, + "step": 920 + }, + { + "epoch": 2.4194608809993428, + "high_lr": 0.0005157894736842106, + "low_lr": 1.0315789473684213e-05, + "step": 920 + }, + { + "epoch": 2.4194608809993428, + "high_lr": 0.0005157894736842106, + "low_lr": 1.0315789473684213e-05, + "step": 920 + }, + { + "epoch": 2.4220907297830374, + "grad_norm": 1.3281110525131226, + "learning_rate": 0.0005152631578947369, + "loss": 1.3709, + "step": 921 + }, + { + "epoch": 2.4220907297830374, + "high_lr": 0.0005152631578947369, + "low_lr": 1.0305263157894739e-05, + "step": 921 + }, + { + "epoch": 2.4220907297830374, + "high_lr": 0.0005152631578947369, + "low_lr": 1.0305263157894739e-05, + "step": 921 + }, + { + "epoch": 2.4220907297830374, + "high_lr": 0.0005152631578947369, + "low_lr": 1.0305263157894739e-05, + "step": 921 + }, + { + "epoch": 2.4220907297830374, + "high_lr": 0.0005152631578947369, + "low_lr": 1.0305263157894739e-05, + "step": 921 + }, + { + "epoch": 2.4220907297830374, + "high_lr": 0.0005152631578947369, + "low_lr": 1.0305263157894739e-05, + "step": 921 + }, + { + "epoch": 2.4220907297830374, + "high_lr": 0.0005152631578947369, + "low_lr": 1.0305263157894739e-05, + "step": 921 + }, + { + "epoch": 2.4220907297830374, + "high_lr": 0.0005152631578947369, + "low_lr": 1.0305263157894739e-05, + "step": 921 + }, + { + "epoch": 2.4220907297830374, + "high_lr": 0.0005152631578947369, + "low_lr": 1.0305263157894739e-05, + "step": 921 + }, + { + "epoch": 2.4247205785667325, + "grad_norm": 1.271446943283081, + "learning_rate": 0.0005147368421052631, + "loss": 1.3206, + "step": 922 + }, + { + "epoch": 2.4247205785667325, + "high_lr": 0.0005147368421052631, + "low_lr": 1.0294736842105264e-05, + "step": 922 + }, + { + "epoch": 2.4247205785667325, + "high_lr": 0.0005147368421052631, + "low_lr": 1.0294736842105264e-05, + "step": 922 + }, + { + "epoch": 2.4247205785667325, + "high_lr": 0.0005147368421052631, + "low_lr": 1.0294736842105264e-05, + "step": 922 + }, + { + "epoch": 2.4247205785667325, + "high_lr": 0.0005147368421052631, + "low_lr": 1.0294736842105264e-05, + "step": 922 + }, + { + "epoch": 2.4247205785667325, + "high_lr": 0.0005147368421052631, + "low_lr": 1.0294736842105264e-05, + "step": 922 + }, + { + "epoch": 2.4247205785667325, + "high_lr": 0.0005147368421052631, + "low_lr": 1.0294736842105264e-05, + "step": 922 + }, + { + "epoch": 2.4247205785667325, + "high_lr": 0.0005147368421052631, + "low_lr": 1.0294736842105264e-05, + "step": 922 + }, + { + "epoch": 2.4247205785667325, + "high_lr": 0.0005147368421052631, + "low_lr": 1.0294736842105264e-05, + "step": 922 + }, + { + "epoch": 2.427350427350427, + "grad_norm": 1.2340179681777954, + "learning_rate": 0.0005142105263157894, + "loss": 1.4087, + "step": 923 + }, + { + "epoch": 2.427350427350427, + "high_lr": 0.0005142105263157894, + "low_lr": 1.028421052631579e-05, + "step": 923 + }, + { + "epoch": 2.427350427350427, + "high_lr": 0.0005142105263157894, + "low_lr": 1.028421052631579e-05, + "step": 923 + }, + { + "epoch": 2.427350427350427, + "high_lr": 0.0005142105263157894, + "low_lr": 1.028421052631579e-05, + "step": 923 + }, + { + "epoch": 2.427350427350427, + "high_lr": 0.0005142105263157894, + "low_lr": 1.028421052631579e-05, + "step": 923 + }, + { + "epoch": 2.427350427350427, + "high_lr": 0.0005142105263157894, + "low_lr": 1.028421052631579e-05, + "step": 923 + }, + { + "epoch": 2.427350427350427, + "high_lr": 0.0005142105263157894, + "low_lr": 1.028421052631579e-05, + "step": 923 + }, + { + "epoch": 2.427350427350427, + "high_lr": 0.0005142105263157894, + "low_lr": 1.028421052631579e-05, + "step": 923 + }, + { + "epoch": 2.427350427350427, + "high_lr": 0.0005142105263157894, + "low_lr": 1.028421052631579e-05, + "step": 923 + }, + { + "epoch": 2.4299802761341223, + "grad_norm": 1.2009501457214355, + "learning_rate": 0.0005136842105263157, + "loss": 1.362, + "step": 924 + }, + { + "epoch": 2.4299802761341223, + "high_lr": 0.0005136842105263157, + "low_lr": 1.0273684210526316e-05, + "step": 924 + }, + { + "epoch": 2.4299802761341223, + "high_lr": 0.0005136842105263157, + "low_lr": 1.0273684210526316e-05, + "step": 924 + }, + { + "epoch": 2.4299802761341223, + "high_lr": 0.0005136842105263157, + "low_lr": 1.0273684210526316e-05, + "step": 924 + }, + { + "epoch": 2.4299802761341223, + "high_lr": 0.0005136842105263157, + "low_lr": 1.0273684210526316e-05, + "step": 924 + }, + { + "epoch": 2.4299802761341223, + "high_lr": 0.0005136842105263157, + "low_lr": 1.0273684210526316e-05, + "step": 924 + }, + { + "epoch": 2.4299802761341223, + "high_lr": 0.0005136842105263157, + "low_lr": 1.0273684210526316e-05, + "step": 924 + }, + { + "epoch": 2.4299802761341223, + "high_lr": 0.0005136842105263157, + "low_lr": 1.0273684210526316e-05, + "step": 924 + }, + { + "epoch": 2.4299802761341223, + "high_lr": 0.0005136842105263157, + "low_lr": 1.0273684210526316e-05, + "step": 924 + }, + { + "epoch": 2.4326101249178174, + "grad_norm": 1.222140908241272, + "learning_rate": 0.0005131578947368421, + "loss": 1.3997, + "step": 925 + }, + { + "epoch": 2.4326101249178174, + "high_lr": 0.0005131578947368421, + "low_lr": 1.0263157894736844e-05, + "step": 925 + }, + { + "epoch": 2.4326101249178174, + "high_lr": 0.0005131578947368421, + "low_lr": 1.0263157894736844e-05, + "step": 925 + }, + { + "epoch": 2.4326101249178174, + "high_lr": 0.0005131578947368421, + "low_lr": 1.0263157894736844e-05, + "step": 925 + }, + { + "epoch": 2.4326101249178174, + "high_lr": 0.0005131578947368421, + "low_lr": 1.0263157894736844e-05, + "step": 925 + }, + { + "epoch": 2.4326101249178174, + "high_lr": 0.0005131578947368421, + "low_lr": 1.0263157894736844e-05, + "step": 925 + }, + { + "epoch": 2.4326101249178174, + "high_lr": 0.0005131578947368421, + "low_lr": 1.0263157894736844e-05, + "step": 925 + }, + { + "epoch": 2.4326101249178174, + "high_lr": 0.0005131578947368421, + "low_lr": 1.0263157894736844e-05, + "step": 925 + }, + { + "epoch": 2.4326101249178174, + "high_lr": 0.0005131578947368421, + "low_lr": 1.0263157894736844e-05, + "step": 925 + }, + { + "epoch": 2.435239973701512, + "grad_norm": 1.2842364311218262, + "learning_rate": 0.0005126315789473685, + "loss": 1.3655, + "step": 926 + }, + { + "epoch": 2.435239973701512, + "high_lr": 0.0005126315789473685, + "low_lr": 1.0252631578947369e-05, + "step": 926 + }, + { + "epoch": 2.435239973701512, + "high_lr": 0.0005126315789473685, + "low_lr": 1.0252631578947369e-05, + "step": 926 + }, + { + "epoch": 2.435239973701512, + "high_lr": 0.0005126315789473685, + "low_lr": 1.0252631578947369e-05, + "step": 926 + }, + { + "epoch": 2.435239973701512, + "high_lr": 0.0005126315789473685, + "low_lr": 1.0252631578947369e-05, + "step": 926 + }, + { + "epoch": 2.435239973701512, + "high_lr": 0.0005126315789473685, + "low_lr": 1.0252631578947369e-05, + "step": 926 + }, + { + "epoch": 2.435239973701512, + "high_lr": 0.0005126315789473685, + "low_lr": 1.0252631578947369e-05, + "step": 926 + }, + { + "epoch": 2.435239973701512, + "high_lr": 0.0005126315789473685, + "low_lr": 1.0252631578947369e-05, + "step": 926 + }, + { + "epoch": 2.435239973701512, + "high_lr": 0.0005126315789473685, + "low_lr": 1.0252631578947369e-05, + "step": 926 + }, + { + "epoch": 2.437869822485207, + "grad_norm": 1.326341986656189, + "learning_rate": 0.0005121052631578948, + "loss": 1.4301, + "step": 927 + }, + { + "epoch": 2.437869822485207, + "high_lr": 0.0005121052631578948, + "low_lr": 1.0242105263157895e-05, + "step": 927 + }, + { + "epoch": 2.437869822485207, + "high_lr": 0.0005121052631578948, + "low_lr": 1.0242105263157895e-05, + "step": 927 + }, + { + "epoch": 2.437869822485207, + "high_lr": 0.0005121052631578948, + "low_lr": 1.0242105263157895e-05, + "step": 927 + }, + { + "epoch": 2.437869822485207, + "high_lr": 0.0005121052631578948, + "low_lr": 1.0242105263157895e-05, + "step": 927 + }, + { + "epoch": 2.437869822485207, + "high_lr": 0.0005121052631578948, + "low_lr": 1.0242105263157895e-05, + "step": 927 + }, + { + "epoch": 2.437869822485207, + "high_lr": 0.0005121052631578948, + "low_lr": 1.0242105263157895e-05, + "step": 927 + }, + { + "epoch": 2.437869822485207, + "high_lr": 0.0005121052631578948, + "low_lr": 1.0242105263157895e-05, + "step": 927 + }, + { + "epoch": 2.437869822485207, + "high_lr": 0.0005121052631578948, + "low_lr": 1.0242105263157895e-05, + "step": 927 + }, + { + "epoch": 2.440499671268902, + "grad_norm": 1.2571117877960205, + "learning_rate": 0.0005115789473684211, + "loss": 1.4242, + "step": 928 + }, + { + "epoch": 2.440499671268902, + "high_lr": 0.0005115789473684211, + "low_lr": 1.0231578947368422e-05, + "step": 928 + }, + { + "epoch": 2.440499671268902, + "high_lr": 0.0005115789473684211, + "low_lr": 1.0231578947368422e-05, + "step": 928 + }, + { + "epoch": 2.440499671268902, + "high_lr": 0.0005115789473684211, + "low_lr": 1.0231578947368422e-05, + "step": 928 + }, + { + "epoch": 2.440499671268902, + "high_lr": 0.0005115789473684211, + "low_lr": 1.0231578947368422e-05, + "step": 928 + }, + { + "epoch": 2.440499671268902, + "high_lr": 0.0005115789473684211, + "low_lr": 1.0231578947368422e-05, + "step": 928 + }, + { + "epoch": 2.440499671268902, + "high_lr": 0.0005115789473684211, + "low_lr": 1.0231578947368422e-05, + "step": 928 + }, + { + "epoch": 2.440499671268902, + "high_lr": 0.0005115789473684211, + "low_lr": 1.0231578947368422e-05, + "step": 928 + }, + { + "epoch": 2.440499671268902, + "high_lr": 0.0005115789473684211, + "low_lr": 1.0231578947368422e-05, + "step": 928 + }, + { + "epoch": 2.443129520052597, + "grad_norm": 1.199884057044983, + "learning_rate": 0.0005110526315789474, + "loss": 1.3604, + "step": 929 + }, + { + "epoch": 2.443129520052597, + "high_lr": 0.0005110526315789474, + "low_lr": 1.0221052631578948e-05, + "step": 929 + }, + { + "epoch": 2.443129520052597, + "high_lr": 0.0005110526315789474, + "low_lr": 1.0221052631578948e-05, + "step": 929 + }, + { + "epoch": 2.443129520052597, + "high_lr": 0.0005110526315789474, + "low_lr": 1.0221052631578948e-05, + "step": 929 + }, + { + "epoch": 2.443129520052597, + "high_lr": 0.0005110526315789474, + "low_lr": 1.0221052631578948e-05, + "step": 929 + }, + { + "epoch": 2.443129520052597, + "high_lr": 0.0005110526315789474, + "low_lr": 1.0221052631578948e-05, + "step": 929 + }, + { + "epoch": 2.443129520052597, + "high_lr": 0.0005110526315789474, + "low_lr": 1.0221052631578948e-05, + "step": 929 + }, + { + "epoch": 2.443129520052597, + "high_lr": 0.0005110526315789474, + "low_lr": 1.0221052631578948e-05, + "step": 929 + }, + { + "epoch": 2.443129520052597, + "high_lr": 0.0005110526315789474, + "low_lr": 1.0221052631578948e-05, + "step": 929 + }, + { + "epoch": 2.445759368836292, + "grad_norm": 1.2833794355392456, + "learning_rate": 0.0005105263157894738, + "loss": 1.4088, + "step": 930 + }, + { + "epoch": 2.445759368836292, + "high_lr": 0.0005105263157894738, + "low_lr": 1.0210526315789476e-05, + "step": 930 + }, + { + "epoch": 2.445759368836292, + "high_lr": 0.0005105263157894738, + "low_lr": 1.0210526315789476e-05, + "step": 930 + }, + { + "epoch": 2.445759368836292, + "high_lr": 0.0005105263157894738, + "low_lr": 1.0210526315789476e-05, + "step": 930 + }, + { + "epoch": 2.445759368836292, + "high_lr": 0.0005105263157894738, + "low_lr": 1.0210526315789476e-05, + "step": 930 + }, + { + "epoch": 2.445759368836292, + "high_lr": 0.0005105263157894738, + "low_lr": 1.0210526315789476e-05, + "step": 930 + }, + { + "epoch": 2.445759368836292, + "high_lr": 0.0005105263157894738, + "low_lr": 1.0210526315789476e-05, + "step": 930 + }, + { + "epoch": 2.445759368836292, + "high_lr": 0.0005105263157894738, + "low_lr": 1.0210526315789476e-05, + "step": 930 + }, + { + "epoch": 2.445759368836292, + "high_lr": 0.0005105263157894738, + "low_lr": 1.0210526315789476e-05, + "step": 930 + }, + { + "epoch": 2.4483892176199866, + "grad_norm": 1.1905300617218018, + "learning_rate": 0.00051, + "loss": 1.3732, + "step": 931 + }, + { + "epoch": 2.4483892176199866, + "high_lr": 0.00051, + "low_lr": 1.02e-05, + "step": 931 + }, + { + "epoch": 2.4483892176199866, + "high_lr": 0.00051, + "low_lr": 1.02e-05, + "step": 931 + }, + { + "epoch": 2.4483892176199866, + "high_lr": 0.00051, + "low_lr": 1.02e-05, + "step": 931 + }, + { + "epoch": 2.4483892176199866, + "high_lr": 0.00051, + "low_lr": 1.02e-05, + "step": 931 + }, + { + "epoch": 2.4483892176199866, + "high_lr": 0.00051, + "low_lr": 1.02e-05, + "step": 931 + }, + { + "epoch": 2.4483892176199866, + "high_lr": 0.00051, + "low_lr": 1.02e-05, + "step": 931 + }, + { + "epoch": 2.4483892176199866, + "high_lr": 0.00051, + "low_lr": 1.02e-05, + "step": 931 + }, + { + "epoch": 2.4483892176199866, + "high_lr": 0.00051, + "low_lr": 1.02e-05, + "step": 931 + }, + { + "epoch": 2.4510190664036817, + "grad_norm": 1.2222079038619995, + "learning_rate": 0.0005094736842105263, + "loss": 1.3916, + "step": 932 + }, + { + "epoch": 2.4510190664036817, + "high_lr": 0.0005094736842105263, + "low_lr": 1.0189473684210527e-05, + "step": 932 + }, + { + "epoch": 2.4510190664036817, + "high_lr": 0.0005094736842105263, + "low_lr": 1.0189473684210527e-05, + "step": 932 + }, + { + "epoch": 2.4510190664036817, + "high_lr": 0.0005094736842105263, + "low_lr": 1.0189473684210527e-05, + "step": 932 + }, + { + "epoch": 2.4510190664036817, + "high_lr": 0.0005094736842105263, + "low_lr": 1.0189473684210527e-05, + "step": 932 + }, + { + "epoch": 2.4510190664036817, + "high_lr": 0.0005094736842105263, + "low_lr": 1.0189473684210527e-05, + "step": 932 + }, + { + "epoch": 2.4510190664036817, + "high_lr": 0.0005094736842105263, + "low_lr": 1.0189473684210527e-05, + "step": 932 + }, + { + "epoch": 2.4510190664036817, + "high_lr": 0.0005094736842105263, + "low_lr": 1.0189473684210527e-05, + "step": 932 + }, + { + "epoch": 2.4510190664036817, + "high_lr": 0.0005094736842105263, + "low_lr": 1.0189473684210527e-05, + "step": 932 + }, + { + "epoch": 2.453648915187377, + "grad_norm": 1.1991325616836548, + "learning_rate": 0.0005089473684210526, + "loss": 1.3654, + "step": 933 + }, + { + "epoch": 2.453648915187377, + "high_lr": 0.0005089473684210526, + "low_lr": 1.0178947368421053e-05, + "step": 933 + }, + { + "epoch": 2.453648915187377, + "high_lr": 0.0005089473684210526, + "low_lr": 1.0178947368421053e-05, + "step": 933 + }, + { + "epoch": 2.453648915187377, + "high_lr": 0.0005089473684210526, + "low_lr": 1.0178947368421053e-05, + "step": 933 + }, + { + "epoch": 2.453648915187377, + "high_lr": 0.0005089473684210526, + "low_lr": 1.0178947368421053e-05, + "step": 933 + }, + { + "epoch": 2.453648915187377, + "high_lr": 0.0005089473684210526, + "low_lr": 1.0178947368421053e-05, + "step": 933 + }, + { + "epoch": 2.453648915187377, + "high_lr": 0.0005089473684210526, + "low_lr": 1.0178947368421053e-05, + "step": 933 + }, + { + "epoch": 2.453648915187377, + "high_lr": 0.0005089473684210526, + "low_lr": 1.0178947368421053e-05, + "step": 933 + }, + { + "epoch": 2.453648915187377, + "high_lr": 0.0005089473684210526, + "low_lr": 1.0178947368421053e-05, + "step": 933 + }, + { + "epoch": 2.4562787639710715, + "grad_norm": 1.1947762966156006, + "learning_rate": 0.000508421052631579, + "loss": 1.4105, + "step": 934 + }, + { + "epoch": 2.4562787639710715, + "high_lr": 0.000508421052631579, + "low_lr": 1.0168421052631581e-05, + "step": 934 + }, + { + "epoch": 2.4562787639710715, + "high_lr": 0.000508421052631579, + "low_lr": 1.0168421052631581e-05, + "step": 934 + }, + { + "epoch": 2.4562787639710715, + "high_lr": 0.000508421052631579, + "low_lr": 1.0168421052631581e-05, + "step": 934 + }, + { + "epoch": 2.4562787639710715, + "high_lr": 0.000508421052631579, + "low_lr": 1.0168421052631581e-05, + "step": 934 + }, + { + "epoch": 2.4562787639710715, + "high_lr": 0.000508421052631579, + "low_lr": 1.0168421052631581e-05, + "step": 934 + }, + { + "epoch": 2.4562787639710715, + "high_lr": 0.000508421052631579, + "low_lr": 1.0168421052631581e-05, + "step": 934 + }, + { + "epoch": 2.4562787639710715, + "high_lr": 0.000508421052631579, + "low_lr": 1.0168421052631581e-05, + "step": 934 + }, + { + "epoch": 2.4562787639710715, + "high_lr": 0.000508421052631579, + "low_lr": 1.0168421052631581e-05, + "step": 934 + }, + { + "epoch": 2.4589086127547666, + "grad_norm": 1.3332761526107788, + "learning_rate": 0.0005078947368421053, + "loss": 1.4219, + "step": 935 + }, + { + "epoch": 2.4589086127547666, + "high_lr": 0.0005078947368421053, + "low_lr": 1.0157894736842106e-05, + "step": 935 + }, + { + "epoch": 2.4589086127547666, + "high_lr": 0.0005078947368421053, + "low_lr": 1.0157894736842106e-05, + "step": 935 + }, + { + "epoch": 2.4589086127547666, + "high_lr": 0.0005078947368421053, + "low_lr": 1.0157894736842106e-05, + "step": 935 + }, + { + "epoch": 2.4589086127547666, + "high_lr": 0.0005078947368421053, + "low_lr": 1.0157894736842106e-05, + "step": 935 + }, + { + "epoch": 2.4589086127547666, + "high_lr": 0.0005078947368421053, + "low_lr": 1.0157894736842106e-05, + "step": 935 + }, + { + "epoch": 2.4589086127547666, + "high_lr": 0.0005078947368421053, + "low_lr": 1.0157894736842106e-05, + "step": 935 + }, + { + "epoch": 2.4589086127547666, + "high_lr": 0.0005078947368421053, + "low_lr": 1.0157894736842106e-05, + "step": 935 + }, + { + "epoch": 2.4589086127547666, + "high_lr": 0.0005078947368421053, + "low_lr": 1.0157894736842106e-05, + "step": 935 + }, + { + "epoch": 2.4615384615384617, + "grad_norm": 1.2377543449401855, + "learning_rate": 0.0005073684210526316, + "loss": 1.3687, + "step": 936 + }, + { + "epoch": 2.4615384615384617, + "high_lr": 0.0005073684210526316, + "low_lr": 1.0147368421052632e-05, + "step": 936 + }, + { + "epoch": 2.4615384615384617, + "high_lr": 0.0005073684210526316, + "low_lr": 1.0147368421052632e-05, + "step": 936 + }, + { + "epoch": 2.4615384615384617, + "high_lr": 0.0005073684210526316, + "low_lr": 1.0147368421052632e-05, + "step": 936 + }, + { + "epoch": 2.4615384615384617, + "high_lr": 0.0005073684210526316, + "low_lr": 1.0147368421052632e-05, + "step": 936 + }, + { + "epoch": 2.4615384615384617, + "high_lr": 0.0005073684210526316, + "low_lr": 1.0147368421052632e-05, + "step": 936 + }, + { + "epoch": 2.4615384615384617, + "high_lr": 0.0005073684210526316, + "low_lr": 1.0147368421052632e-05, + "step": 936 + }, + { + "epoch": 2.4615384615384617, + "high_lr": 0.0005073684210526316, + "low_lr": 1.0147368421052632e-05, + "step": 936 + }, + { + "epoch": 2.4615384615384617, + "high_lr": 0.0005073684210526316, + "low_lr": 1.0147368421052632e-05, + "step": 936 + }, + { + "epoch": 2.4641683103221563, + "grad_norm": 1.1500272750854492, + "learning_rate": 0.0005068421052631579, + "loss": 1.391, + "step": 937 + }, + { + "epoch": 2.4641683103221563, + "high_lr": 0.0005068421052631579, + "low_lr": 1.0136842105263159e-05, + "step": 937 + }, + { + "epoch": 2.4641683103221563, + "high_lr": 0.0005068421052631579, + "low_lr": 1.0136842105263159e-05, + "step": 937 + }, + { + "epoch": 2.4641683103221563, + "high_lr": 0.0005068421052631579, + "low_lr": 1.0136842105263159e-05, + "step": 937 + }, + { + "epoch": 2.4641683103221563, + "high_lr": 0.0005068421052631579, + "low_lr": 1.0136842105263159e-05, + "step": 937 + }, + { + "epoch": 2.4641683103221563, + "high_lr": 0.0005068421052631579, + "low_lr": 1.0136842105263159e-05, + "step": 937 + }, + { + "epoch": 2.4641683103221563, + "high_lr": 0.0005068421052631579, + "low_lr": 1.0136842105263159e-05, + "step": 937 + }, + { + "epoch": 2.4641683103221563, + "high_lr": 0.0005068421052631579, + "low_lr": 1.0136842105263159e-05, + "step": 937 + }, + { + "epoch": 2.4641683103221563, + "high_lr": 0.0005068421052631579, + "low_lr": 1.0136842105263159e-05, + "step": 937 + }, + { + "epoch": 2.4667981591058514, + "grad_norm": 1.2336149215698242, + "learning_rate": 0.0005063157894736841, + "loss": 1.3741, + "step": 938 + }, + { + "epoch": 2.4667981591058514, + "high_lr": 0.0005063157894736841, + "low_lr": 1.0126315789473685e-05, + "step": 938 + }, + { + "epoch": 2.4667981591058514, + "high_lr": 0.0005063157894736841, + "low_lr": 1.0126315789473685e-05, + "step": 938 + }, + { + "epoch": 2.4667981591058514, + "high_lr": 0.0005063157894736841, + "low_lr": 1.0126315789473685e-05, + "step": 938 + }, + { + "epoch": 2.4667981591058514, + "high_lr": 0.0005063157894736841, + "low_lr": 1.0126315789473685e-05, + "step": 938 + }, + { + "epoch": 2.4667981591058514, + "high_lr": 0.0005063157894736841, + "low_lr": 1.0126315789473685e-05, + "step": 938 + }, + { + "epoch": 2.4667981591058514, + "high_lr": 0.0005063157894736841, + "low_lr": 1.0126315789473685e-05, + "step": 938 + }, + { + "epoch": 2.4667981591058514, + "high_lr": 0.0005063157894736841, + "low_lr": 1.0126315789473685e-05, + "step": 938 + }, + { + "epoch": 2.4667981591058514, + "high_lr": 0.0005063157894736841, + "low_lr": 1.0126315789473685e-05, + "step": 938 + }, + { + "epoch": 2.4694280078895465, + "grad_norm": 1.2014271020889282, + "learning_rate": 0.0005057894736842105, + "loss": 1.3637, + "step": 939 + }, + { + "epoch": 2.4694280078895465, + "high_lr": 0.0005057894736842105, + "low_lr": 1.0115789473684213e-05, + "step": 939 + }, + { + "epoch": 2.4694280078895465, + "high_lr": 0.0005057894736842105, + "low_lr": 1.0115789473684213e-05, + "step": 939 + }, + { + "epoch": 2.4694280078895465, + "high_lr": 0.0005057894736842105, + "low_lr": 1.0115789473684213e-05, + "step": 939 + }, + { + "epoch": 2.4694280078895465, + "high_lr": 0.0005057894736842105, + "low_lr": 1.0115789473684213e-05, + "step": 939 + }, + { + "epoch": 2.4694280078895465, + "high_lr": 0.0005057894736842105, + "low_lr": 1.0115789473684213e-05, + "step": 939 + }, + { + "epoch": 2.4694280078895465, + "high_lr": 0.0005057894736842105, + "low_lr": 1.0115789473684213e-05, + "step": 939 + }, + { + "epoch": 2.4694280078895465, + "high_lr": 0.0005057894736842105, + "low_lr": 1.0115789473684213e-05, + "step": 939 + }, + { + "epoch": 2.4694280078895465, + "high_lr": 0.0005057894736842105, + "low_lr": 1.0115789473684213e-05, + "step": 939 + }, + { + "epoch": 2.472057856673241, + "grad_norm": 1.2425811290740967, + "learning_rate": 0.0005052631578947368, + "loss": 1.404, + "step": 940 + }, + { + "epoch": 2.472057856673241, + "high_lr": 0.0005052631578947368, + "low_lr": 1.0105263157894738e-05, + "step": 940 + }, + { + "epoch": 2.472057856673241, + "high_lr": 0.0005052631578947368, + "low_lr": 1.0105263157894738e-05, + "step": 940 + }, + { + "epoch": 2.472057856673241, + "high_lr": 0.0005052631578947368, + "low_lr": 1.0105263157894738e-05, + "step": 940 + }, + { + "epoch": 2.472057856673241, + "high_lr": 0.0005052631578947368, + "low_lr": 1.0105263157894738e-05, + "step": 940 + }, + { + "epoch": 2.472057856673241, + "high_lr": 0.0005052631578947368, + "low_lr": 1.0105263157894738e-05, + "step": 940 + }, + { + "epoch": 2.472057856673241, + "high_lr": 0.0005052631578947368, + "low_lr": 1.0105263157894738e-05, + "step": 940 + }, + { + "epoch": 2.472057856673241, + "high_lr": 0.0005052631578947368, + "low_lr": 1.0105263157894738e-05, + "step": 940 + }, + { + "epoch": 2.472057856673241, + "high_lr": 0.0005052631578947368, + "low_lr": 1.0105263157894738e-05, + "step": 940 + }, + { + "epoch": 2.4746877054569363, + "grad_norm": 1.2446467876434326, + "learning_rate": 0.0005047368421052631, + "loss": 1.3615, + "step": 941 + }, + { + "epoch": 2.4746877054569363, + "high_lr": 0.0005047368421052631, + "low_lr": 1.0094736842105264e-05, + "step": 941 + }, + { + "epoch": 2.4746877054569363, + "high_lr": 0.0005047368421052631, + "low_lr": 1.0094736842105264e-05, + "step": 941 + }, + { + "epoch": 2.4746877054569363, + "high_lr": 0.0005047368421052631, + "low_lr": 1.0094736842105264e-05, + "step": 941 + }, + { + "epoch": 2.4746877054569363, + "high_lr": 0.0005047368421052631, + "low_lr": 1.0094736842105264e-05, + "step": 941 + }, + { + "epoch": 2.4746877054569363, + "high_lr": 0.0005047368421052631, + "low_lr": 1.0094736842105264e-05, + "step": 941 + }, + { + "epoch": 2.4746877054569363, + "high_lr": 0.0005047368421052631, + "low_lr": 1.0094736842105264e-05, + "step": 941 + }, + { + "epoch": 2.4746877054569363, + "high_lr": 0.0005047368421052631, + "low_lr": 1.0094736842105264e-05, + "step": 941 + }, + { + "epoch": 2.4746877054569363, + "high_lr": 0.0005047368421052631, + "low_lr": 1.0094736842105264e-05, + "step": 941 + }, + { + "epoch": 2.4773175542406314, + "grad_norm": 1.1776800155639648, + "learning_rate": 0.0005042105263157895, + "loss": 1.4181, + "step": 942 + }, + { + "epoch": 2.4773175542406314, + "high_lr": 0.0005042105263157895, + "low_lr": 1.008421052631579e-05, + "step": 942 + }, + { + "epoch": 2.4773175542406314, + "high_lr": 0.0005042105263157895, + "low_lr": 1.008421052631579e-05, + "step": 942 + }, + { + "epoch": 2.4773175542406314, + "high_lr": 0.0005042105263157895, + "low_lr": 1.008421052631579e-05, + "step": 942 + }, + { + "epoch": 2.4773175542406314, + "high_lr": 0.0005042105263157895, + "low_lr": 1.008421052631579e-05, + "step": 942 + }, + { + "epoch": 2.4773175542406314, + "high_lr": 0.0005042105263157895, + "low_lr": 1.008421052631579e-05, + "step": 942 + }, + { + "epoch": 2.4773175542406314, + "high_lr": 0.0005042105263157895, + "low_lr": 1.008421052631579e-05, + "step": 942 + }, + { + "epoch": 2.4773175542406314, + "high_lr": 0.0005042105263157895, + "low_lr": 1.008421052631579e-05, + "step": 942 + }, + { + "epoch": 2.4773175542406314, + "high_lr": 0.0005042105263157895, + "low_lr": 1.008421052631579e-05, + "step": 942 + }, + { + "epoch": 2.479947403024326, + "grad_norm": 1.4032350778579712, + "learning_rate": 0.0005036842105263158, + "loss": 1.43, + "step": 943 + }, + { + "epoch": 2.479947403024326, + "high_lr": 0.0005036842105263158, + "low_lr": 1.0073684210526315e-05, + "step": 943 + }, + { + "epoch": 2.479947403024326, + "high_lr": 0.0005036842105263158, + "low_lr": 1.0073684210526315e-05, + "step": 943 + }, + { + "epoch": 2.479947403024326, + "high_lr": 0.0005036842105263158, + "low_lr": 1.0073684210526315e-05, + "step": 943 + }, + { + "epoch": 2.479947403024326, + "high_lr": 0.0005036842105263158, + "low_lr": 1.0073684210526315e-05, + "step": 943 + }, + { + "epoch": 2.479947403024326, + "high_lr": 0.0005036842105263158, + "low_lr": 1.0073684210526315e-05, + "step": 943 + }, + { + "epoch": 2.479947403024326, + "high_lr": 0.0005036842105263158, + "low_lr": 1.0073684210526315e-05, + "step": 943 + }, + { + "epoch": 2.479947403024326, + "high_lr": 0.0005036842105263158, + "low_lr": 1.0073684210526315e-05, + "step": 943 + }, + { + "epoch": 2.479947403024326, + "high_lr": 0.0005036842105263158, + "low_lr": 1.0073684210526315e-05, + "step": 943 + }, + { + "epoch": 2.482577251808021, + "grad_norm": 1.2709810733795166, + "learning_rate": 0.0005031578947368422, + "loss": 1.4115, + "step": 944 + }, + { + "epoch": 2.482577251808021, + "high_lr": 0.0005031578947368422, + "low_lr": 1.0063157894736843e-05, + "step": 944 + }, + { + "epoch": 2.482577251808021, + "high_lr": 0.0005031578947368422, + "low_lr": 1.0063157894736843e-05, + "step": 944 + }, + { + "epoch": 2.482577251808021, + "high_lr": 0.0005031578947368422, + "low_lr": 1.0063157894736843e-05, + "step": 944 + }, + { + "epoch": 2.482577251808021, + "high_lr": 0.0005031578947368422, + "low_lr": 1.0063157894736843e-05, + "step": 944 + }, + { + "epoch": 2.482577251808021, + "high_lr": 0.0005031578947368422, + "low_lr": 1.0063157894736843e-05, + "step": 944 + }, + { + "epoch": 2.482577251808021, + "high_lr": 0.0005031578947368422, + "low_lr": 1.0063157894736843e-05, + "step": 944 + }, + { + "epoch": 2.482577251808021, + "high_lr": 0.0005031578947368422, + "low_lr": 1.0063157894736843e-05, + "step": 944 + }, + { + "epoch": 2.482577251808021, + "high_lr": 0.0005031578947368422, + "low_lr": 1.0063157894736843e-05, + "step": 944 + }, + { + "epoch": 2.485207100591716, + "grad_norm": 1.2019845247268677, + "learning_rate": 0.0005026315789473685, + "loss": 1.3414, + "step": 945 + }, + { + "epoch": 2.485207100591716, + "high_lr": 0.0005026315789473685, + "low_lr": 1.005263157894737e-05, + "step": 945 + }, + { + "epoch": 2.485207100591716, + "high_lr": 0.0005026315789473685, + "low_lr": 1.005263157894737e-05, + "step": 945 + }, + { + "epoch": 2.485207100591716, + "high_lr": 0.0005026315789473685, + "low_lr": 1.005263157894737e-05, + "step": 945 + }, + { + "epoch": 2.485207100591716, + "high_lr": 0.0005026315789473685, + "low_lr": 1.005263157894737e-05, + "step": 945 + }, + { + "epoch": 2.485207100591716, + "high_lr": 0.0005026315789473685, + "low_lr": 1.005263157894737e-05, + "step": 945 + }, + { + "epoch": 2.485207100591716, + "high_lr": 0.0005026315789473685, + "low_lr": 1.005263157894737e-05, + "step": 945 + }, + { + "epoch": 2.485207100591716, + "high_lr": 0.0005026315789473685, + "low_lr": 1.005263157894737e-05, + "step": 945 + }, + { + "epoch": 2.485207100591716, + "high_lr": 0.0005026315789473685, + "low_lr": 1.005263157894737e-05, + "step": 945 + }, + { + "epoch": 2.487836949375411, + "grad_norm": 1.287297010421753, + "learning_rate": 0.0005021052631578948, + "loss": 1.3787, + "step": 946 + }, + { + "epoch": 2.487836949375411, + "high_lr": 0.0005021052631578948, + "low_lr": 1.0042105263157896e-05, + "step": 946 + }, + { + "epoch": 2.487836949375411, + "high_lr": 0.0005021052631578948, + "low_lr": 1.0042105263157896e-05, + "step": 946 + }, + { + "epoch": 2.487836949375411, + "high_lr": 0.0005021052631578948, + "low_lr": 1.0042105263157896e-05, + "step": 946 + }, + { + "epoch": 2.487836949375411, + "high_lr": 0.0005021052631578948, + "low_lr": 1.0042105263157896e-05, + "step": 946 + }, + { + "epoch": 2.487836949375411, + "high_lr": 0.0005021052631578948, + "low_lr": 1.0042105263157896e-05, + "step": 946 + }, + { + "epoch": 2.487836949375411, + "high_lr": 0.0005021052631578948, + "low_lr": 1.0042105263157896e-05, + "step": 946 + }, + { + "epoch": 2.487836949375411, + "high_lr": 0.0005021052631578948, + "low_lr": 1.0042105263157896e-05, + "step": 946 + }, + { + "epoch": 2.487836949375411, + "high_lr": 0.0005021052631578948, + "low_lr": 1.0042105263157896e-05, + "step": 946 + }, + { + "epoch": 2.490466798159106, + "grad_norm": 1.2814826965332031, + "learning_rate": 0.000501578947368421, + "loss": 1.4206, + "step": 947 + }, + { + "epoch": 2.490466798159106, + "high_lr": 0.000501578947368421, + "low_lr": 1.0031578947368422e-05, + "step": 947 + }, + { + "epoch": 2.490466798159106, + "high_lr": 0.000501578947368421, + "low_lr": 1.0031578947368422e-05, + "step": 947 + }, + { + "epoch": 2.490466798159106, + "high_lr": 0.000501578947368421, + "low_lr": 1.0031578947368422e-05, + "step": 947 + }, + { + "epoch": 2.490466798159106, + "high_lr": 0.000501578947368421, + "low_lr": 1.0031578947368422e-05, + "step": 947 + }, + { + "epoch": 2.490466798159106, + "high_lr": 0.000501578947368421, + "low_lr": 1.0031578947368422e-05, + "step": 947 + }, + { + "epoch": 2.490466798159106, + "high_lr": 0.000501578947368421, + "low_lr": 1.0031578947368422e-05, + "step": 947 + }, + { + "epoch": 2.490466798159106, + "high_lr": 0.000501578947368421, + "low_lr": 1.0031578947368422e-05, + "step": 947 + }, + { + "epoch": 2.490466798159106, + "high_lr": 0.000501578947368421, + "low_lr": 1.0031578947368422e-05, + "step": 947 + }, + { + "epoch": 2.4930966469428006, + "grad_norm": 1.1957181692123413, + "learning_rate": 0.0005010526315789474, + "loss": 1.3637, + "step": 948 + }, + { + "epoch": 2.4930966469428006, + "high_lr": 0.0005010526315789474, + "low_lr": 1.002105263157895e-05, + "step": 948 + }, + { + "epoch": 2.4930966469428006, + "high_lr": 0.0005010526315789474, + "low_lr": 1.002105263157895e-05, + "step": 948 + }, + { + "epoch": 2.4930966469428006, + "high_lr": 0.0005010526315789474, + "low_lr": 1.002105263157895e-05, + "step": 948 + }, + { + "epoch": 2.4930966469428006, + "high_lr": 0.0005010526315789474, + "low_lr": 1.002105263157895e-05, + "step": 948 + }, + { + "epoch": 2.4930966469428006, + "high_lr": 0.0005010526315789474, + "low_lr": 1.002105263157895e-05, + "step": 948 + }, + { + "epoch": 2.4930966469428006, + "high_lr": 0.0005010526315789474, + "low_lr": 1.002105263157895e-05, + "step": 948 + }, + { + "epoch": 2.4930966469428006, + "high_lr": 0.0005010526315789474, + "low_lr": 1.002105263157895e-05, + "step": 948 + }, + { + "epoch": 2.4930966469428006, + "high_lr": 0.0005010526315789474, + "low_lr": 1.002105263157895e-05, + "step": 948 + }, + { + "epoch": 2.4957264957264957, + "grad_norm": 1.253524899482727, + "learning_rate": 0.0005005263157894737, + "loss": 1.3531, + "step": 949 + }, + { + "epoch": 2.4957264957264957, + "high_lr": 0.0005005263157894737, + "low_lr": 1.0010526315789474e-05, + "step": 949 + }, + { + "epoch": 2.4957264957264957, + "high_lr": 0.0005005263157894737, + "low_lr": 1.0010526315789474e-05, + "step": 949 + }, + { + "epoch": 2.4957264957264957, + "high_lr": 0.0005005263157894737, + "low_lr": 1.0010526315789474e-05, + "step": 949 + }, + { + "epoch": 2.4957264957264957, + "high_lr": 0.0005005263157894737, + "low_lr": 1.0010526315789474e-05, + "step": 949 + }, + { + "epoch": 2.4957264957264957, + "high_lr": 0.0005005263157894737, + "low_lr": 1.0010526315789474e-05, + "step": 949 + }, + { + "epoch": 2.4957264957264957, + "high_lr": 0.0005005263157894737, + "low_lr": 1.0010526315789474e-05, + "step": 949 + }, + { + "epoch": 2.4957264957264957, + "high_lr": 0.0005005263157894737, + "low_lr": 1.0010526315789474e-05, + "step": 949 + }, + { + "epoch": 2.4957264957264957, + "high_lr": 0.0005005263157894737, + "low_lr": 1.0010526315789474e-05, + "step": 949 + }, + { + "epoch": 2.498356344510191, + "grad_norm": 1.2310932874679565, + "learning_rate": 0.0005, + "loss": 1.3921, + "step": 950 + }, + { + "epoch": 2.498356344510191, + "high_lr": 0.0005, + "low_lr": 1e-05, + "step": 950 + }, + { + "epoch": 2.498356344510191, + "high_lr": 0.0005, + "low_lr": 1e-05, + "step": 950 + }, + { + "epoch": 2.498356344510191, + "high_lr": 0.0005, + "low_lr": 1e-05, + "step": 950 + }, + { + "epoch": 2.498356344510191, + "high_lr": 0.0005, + "low_lr": 1e-05, + "step": 950 + }, + { + "epoch": 2.498356344510191, + "high_lr": 0.0005, + "low_lr": 1e-05, + "step": 950 + }, + { + "epoch": 2.498356344510191, + "high_lr": 0.0005, + "low_lr": 1e-05, + "step": 950 + }, + { + "epoch": 2.498356344510191, + "high_lr": 0.0005, + "low_lr": 1e-05, + "step": 950 + }, + { + "epoch": 2.498356344510191, + "high_lr": 0.0005, + "low_lr": 1e-05, + "step": 950 + }, + { + "epoch": 2.5009861932938855, + "grad_norm": 1.2856698036193848, + "learning_rate": 0.0004994736842105263, + "loss": 1.3888, + "step": 951 + }, + { + "epoch": 2.5009861932938855, + "high_lr": 0.0004994736842105263, + "low_lr": 9.989473684210527e-06, + "step": 951 + }, + { + "epoch": 2.5009861932938855, + "high_lr": 0.0004994736842105263, + "low_lr": 9.989473684210527e-06, + "step": 951 + }, + { + "epoch": 2.5009861932938855, + "high_lr": 0.0004994736842105263, + "low_lr": 9.989473684210527e-06, + "step": 951 + }, + { + "epoch": 2.5009861932938855, + "high_lr": 0.0004994736842105263, + "low_lr": 9.989473684210527e-06, + "step": 951 + }, + { + "epoch": 2.5009861932938855, + "high_lr": 0.0004994736842105263, + "low_lr": 9.989473684210527e-06, + "step": 951 + }, + { + "epoch": 2.5009861932938855, + "high_lr": 0.0004994736842105263, + "low_lr": 9.989473684210527e-06, + "step": 951 + }, + { + "epoch": 2.5009861932938855, + "high_lr": 0.0004994736842105263, + "low_lr": 9.989473684210527e-06, + "step": 951 + }, + { + "epoch": 2.5009861932938855, + "high_lr": 0.0004994736842105263, + "low_lr": 9.989473684210527e-06, + "step": 951 + }, + { + "epoch": 2.5036160420775806, + "grad_norm": 1.3589057922363281, + "learning_rate": 0.0004989473684210527, + "loss": 1.3897, + "step": 952 + }, + { + "epoch": 2.5036160420775806, + "high_lr": 0.0004989473684210527, + "low_lr": 9.978947368421053e-06, + "step": 952 + }, + { + "epoch": 2.5036160420775806, + "high_lr": 0.0004989473684210527, + "low_lr": 9.978947368421053e-06, + "step": 952 + }, + { + "epoch": 2.5036160420775806, + "high_lr": 0.0004989473684210527, + "low_lr": 9.978947368421053e-06, + "step": 952 + }, + { + "epoch": 2.5036160420775806, + "high_lr": 0.0004989473684210527, + "low_lr": 9.978947368421053e-06, + "step": 952 + }, + { + "epoch": 2.5036160420775806, + "high_lr": 0.0004989473684210527, + "low_lr": 9.978947368421053e-06, + "step": 952 + }, + { + "epoch": 2.5036160420775806, + "high_lr": 0.0004989473684210527, + "low_lr": 9.978947368421053e-06, + "step": 952 + }, + { + "epoch": 2.5036160420775806, + "high_lr": 0.0004989473684210527, + "low_lr": 9.978947368421053e-06, + "step": 952 + }, + { + "epoch": 2.5036160420775806, + "high_lr": 0.0004989473684210527, + "low_lr": 9.978947368421053e-06, + "step": 952 + }, + { + "epoch": 2.5062458908612752, + "grad_norm": 1.202459692955017, + "learning_rate": 0.000498421052631579, + "loss": 1.3616, + "step": 953 + }, + { + "epoch": 2.5062458908612752, + "high_lr": 0.000498421052631579, + "low_lr": 9.96842105263158e-06, + "step": 953 + }, + { + "epoch": 2.5062458908612752, + "high_lr": 0.000498421052631579, + "low_lr": 9.96842105263158e-06, + "step": 953 + }, + { + "epoch": 2.5062458908612752, + "high_lr": 0.000498421052631579, + "low_lr": 9.96842105263158e-06, + "step": 953 + }, + { + "epoch": 2.5062458908612752, + "high_lr": 0.000498421052631579, + "low_lr": 9.96842105263158e-06, + "step": 953 + }, + { + "epoch": 2.5062458908612752, + "high_lr": 0.000498421052631579, + "low_lr": 9.96842105263158e-06, + "step": 953 + }, + { + "epoch": 2.5062458908612752, + "high_lr": 0.000498421052631579, + "low_lr": 9.96842105263158e-06, + "step": 953 + }, + { + "epoch": 2.5062458908612752, + "high_lr": 0.000498421052631579, + "low_lr": 9.96842105263158e-06, + "step": 953 + }, + { + "epoch": 2.5062458908612752, + "high_lr": 0.000498421052631579, + "low_lr": 9.96842105263158e-06, + "step": 953 + }, + { + "epoch": 2.5088757396449703, + "grad_norm": 1.286462426185608, + "learning_rate": 0.0004978947368421053, + "loss": 1.3871, + "step": 954 + }, + { + "epoch": 2.5088757396449703, + "high_lr": 0.0004978947368421053, + "low_lr": 9.957894736842106e-06, + "step": 954 + }, + { + "epoch": 2.5088757396449703, + "high_lr": 0.0004978947368421053, + "low_lr": 9.957894736842106e-06, + "step": 954 + }, + { + "epoch": 2.5088757396449703, + "high_lr": 0.0004978947368421053, + "low_lr": 9.957894736842106e-06, + "step": 954 + }, + { + "epoch": 2.5088757396449703, + "high_lr": 0.0004978947368421053, + "low_lr": 9.957894736842106e-06, + "step": 954 + }, + { + "epoch": 2.5088757396449703, + "high_lr": 0.0004978947368421053, + "low_lr": 9.957894736842106e-06, + "step": 954 + }, + { + "epoch": 2.5088757396449703, + "high_lr": 0.0004978947368421053, + "low_lr": 9.957894736842106e-06, + "step": 954 + }, + { + "epoch": 2.5088757396449703, + "high_lr": 0.0004978947368421053, + "low_lr": 9.957894736842106e-06, + "step": 954 + }, + { + "epoch": 2.5088757396449703, + "high_lr": 0.0004978947368421053, + "low_lr": 9.957894736842106e-06, + "step": 954 + }, + { + "epoch": 2.5115055884286654, + "grad_norm": 1.2855150699615479, + "learning_rate": 0.0004973684210526315, + "loss": 1.3865, + "step": 955 + }, + { + "epoch": 2.5115055884286654, + "high_lr": 0.0004973684210526315, + "low_lr": 9.947368421052632e-06, + "step": 955 + }, + { + "epoch": 2.5115055884286654, + "high_lr": 0.0004973684210526315, + "low_lr": 9.947368421052632e-06, + "step": 955 + }, + { + "epoch": 2.5115055884286654, + "high_lr": 0.0004973684210526315, + "low_lr": 9.947368421052632e-06, + "step": 955 + }, + { + "epoch": 2.5115055884286654, + "high_lr": 0.0004973684210526315, + "low_lr": 9.947368421052632e-06, + "step": 955 + }, + { + "epoch": 2.5115055884286654, + "high_lr": 0.0004973684210526315, + "low_lr": 9.947368421052632e-06, + "step": 955 + }, + { + "epoch": 2.5115055884286654, + "high_lr": 0.0004973684210526315, + "low_lr": 9.947368421052632e-06, + "step": 955 + }, + { + "epoch": 2.5115055884286654, + "high_lr": 0.0004973684210526315, + "low_lr": 9.947368421052632e-06, + "step": 955 + }, + { + "epoch": 2.5115055884286654, + "high_lr": 0.0004973684210526315, + "low_lr": 9.947368421052632e-06, + "step": 955 + }, + { + "epoch": 2.51413543721236, + "grad_norm": 1.3831669092178345, + "learning_rate": 0.0004968421052631579, + "loss": 1.3705, + "step": 956 + }, + { + "epoch": 2.51413543721236, + "high_lr": 0.0004968421052631579, + "low_lr": 9.936842105263159e-06, + "step": 956 + }, + { + "epoch": 2.51413543721236, + "high_lr": 0.0004968421052631579, + "low_lr": 9.936842105263159e-06, + "step": 956 + }, + { + "epoch": 2.51413543721236, + "high_lr": 0.0004968421052631579, + "low_lr": 9.936842105263159e-06, + "step": 956 + }, + { + "epoch": 2.51413543721236, + "high_lr": 0.0004968421052631579, + "low_lr": 9.936842105263159e-06, + "step": 956 + }, + { + "epoch": 2.51413543721236, + "high_lr": 0.0004968421052631579, + "low_lr": 9.936842105263159e-06, + "step": 956 + }, + { + "epoch": 2.51413543721236, + "high_lr": 0.0004968421052631579, + "low_lr": 9.936842105263159e-06, + "step": 956 + }, + { + "epoch": 2.51413543721236, + "high_lr": 0.0004968421052631579, + "low_lr": 9.936842105263159e-06, + "step": 956 + }, + { + "epoch": 2.51413543721236, + "high_lr": 0.0004968421052631579, + "low_lr": 9.936842105263159e-06, + "step": 956 + }, + { + "epoch": 2.516765285996055, + "grad_norm": 1.3121248483657837, + "learning_rate": 0.0004963157894736842, + "loss": 1.4089, + "step": 957 + }, + { + "epoch": 2.516765285996055, + "high_lr": 0.0004963157894736842, + "low_lr": 9.926315789473685e-06, + "step": 957 + }, + { + "epoch": 2.516765285996055, + "high_lr": 0.0004963157894736842, + "low_lr": 9.926315789473685e-06, + "step": 957 + }, + { + "epoch": 2.516765285996055, + "high_lr": 0.0004963157894736842, + "low_lr": 9.926315789473685e-06, + "step": 957 + }, + { + "epoch": 2.516765285996055, + "high_lr": 0.0004963157894736842, + "low_lr": 9.926315789473685e-06, + "step": 957 + }, + { + "epoch": 2.516765285996055, + "high_lr": 0.0004963157894736842, + "low_lr": 9.926315789473685e-06, + "step": 957 + }, + { + "epoch": 2.516765285996055, + "high_lr": 0.0004963157894736842, + "low_lr": 9.926315789473685e-06, + "step": 957 + }, + { + "epoch": 2.516765285996055, + "high_lr": 0.0004963157894736842, + "low_lr": 9.926315789473685e-06, + "step": 957 + }, + { + "epoch": 2.516765285996055, + "high_lr": 0.0004963157894736842, + "low_lr": 9.926315789473685e-06, + "step": 957 + }, + { + "epoch": 2.5193951347797503, + "grad_norm": 1.2612452507019043, + "learning_rate": 0.0004957894736842105, + "loss": 1.4095, + "step": 958 + }, + { + "epoch": 2.5193951347797503, + "high_lr": 0.0004957894736842105, + "low_lr": 9.915789473684211e-06, + "step": 958 + }, + { + "epoch": 2.5193951347797503, + "high_lr": 0.0004957894736842105, + "low_lr": 9.915789473684211e-06, + "step": 958 + }, + { + "epoch": 2.5193951347797503, + "high_lr": 0.0004957894736842105, + "low_lr": 9.915789473684211e-06, + "step": 958 + }, + { + "epoch": 2.5193951347797503, + "high_lr": 0.0004957894736842105, + "low_lr": 9.915789473684211e-06, + "step": 958 + }, + { + "epoch": 2.5193951347797503, + "high_lr": 0.0004957894736842105, + "low_lr": 9.915789473684211e-06, + "step": 958 + }, + { + "epoch": 2.5193951347797503, + "high_lr": 0.0004957894736842105, + "low_lr": 9.915789473684211e-06, + "step": 958 + }, + { + "epoch": 2.5193951347797503, + "high_lr": 0.0004957894736842105, + "low_lr": 9.915789473684211e-06, + "step": 958 + }, + { + "epoch": 2.5193951347797503, + "high_lr": 0.0004957894736842105, + "low_lr": 9.915789473684211e-06, + "step": 958 + }, + { + "epoch": 2.522024983563445, + "grad_norm": 1.2511200904846191, + "learning_rate": 0.0004952631578947369, + "loss": 1.4177, + "step": 959 + }, + { + "epoch": 2.522024983563445, + "high_lr": 0.0004952631578947369, + "low_lr": 9.905263157894738e-06, + "step": 959 + }, + { + "epoch": 2.522024983563445, + "high_lr": 0.0004952631578947369, + "low_lr": 9.905263157894738e-06, + "step": 959 + }, + { + "epoch": 2.522024983563445, + "high_lr": 0.0004952631578947369, + "low_lr": 9.905263157894738e-06, + "step": 959 + }, + { + "epoch": 2.522024983563445, + "high_lr": 0.0004952631578947369, + "low_lr": 9.905263157894738e-06, + "step": 959 + }, + { + "epoch": 2.522024983563445, + "high_lr": 0.0004952631578947369, + "low_lr": 9.905263157894738e-06, + "step": 959 + }, + { + "epoch": 2.522024983563445, + "high_lr": 0.0004952631578947369, + "low_lr": 9.905263157894738e-06, + "step": 959 + }, + { + "epoch": 2.522024983563445, + "high_lr": 0.0004952631578947369, + "low_lr": 9.905263157894738e-06, + "step": 959 + }, + { + "epoch": 2.522024983563445, + "high_lr": 0.0004952631578947369, + "low_lr": 9.905263157894738e-06, + "step": 959 + }, + { + "epoch": 2.52465483234714, + "grad_norm": 1.2834885120391846, + "learning_rate": 0.0004947368421052632, + "loss": 1.3563, + "step": 960 + }, + { + "epoch": 2.52465483234714, + "high_lr": 0.0004947368421052632, + "low_lr": 9.894736842105264e-06, + "step": 960 + }, + { + "epoch": 2.52465483234714, + "high_lr": 0.0004947368421052632, + "low_lr": 9.894736842105264e-06, + "step": 960 + }, + { + "epoch": 2.52465483234714, + "high_lr": 0.0004947368421052632, + "low_lr": 9.894736842105264e-06, + "step": 960 + }, + { + "epoch": 2.52465483234714, + "high_lr": 0.0004947368421052632, + "low_lr": 9.894736842105264e-06, + "step": 960 + }, + { + "epoch": 2.52465483234714, + "high_lr": 0.0004947368421052632, + "low_lr": 9.894736842105264e-06, + "step": 960 + }, + { + "epoch": 2.52465483234714, + "high_lr": 0.0004947368421052632, + "low_lr": 9.894736842105264e-06, + "step": 960 + }, + { + "epoch": 2.52465483234714, + "high_lr": 0.0004947368421052632, + "low_lr": 9.894736842105264e-06, + "step": 960 + }, + { + "epoch": 2.52465483234714, + "high_lr": 0.0004947368421052632, + "low_lr": 9.894736842105264e-06, + "step": 960 + }, + { + "epoch": 2.527284681130835, + "grad_norm": 1.2725642919540405, + "learning_rate": 0.0004942105263157895, + "loss": 1.3973, + "step": 961 + }, + { + "epoch": 2.527284681130835, + "high_lr": 0.0004942105263157895, + "low_lr": 9.88421052631579e-06, + "step": 961 + }, + { + "epoch": 2.527284681130835, + "high_lr": 0.0004942105263157895, + "low_lr": 9.88421052631579e-06, + "step": 961 + }, + { + "epoch": 2.527284681130835, + "high_lr": 0.0004942105263157895, + "low_lr": 9.88421052631579e-06, + "step": 961 + }, + { + "epoch": 2.527284681130835, + "high_lr": 0.0004942105263157895, + "low_lr": 9.88421052631579e-06, + "step": 961 + }, + { + "epoch": 2.527284681130835, + "high_lr": 0.0004942105263157895, + "low_lr": 9.88421052631579e-06, + "step": 961 + }, + { + "epoch": 2.527284681130835, + "high_lr": 0.0004942105263157895, + "low_lr": 9.88421052631579e-06, + "step": 961 + }, + { + "epoch": 2.527284681130835, + "high_lr": 0.0004942105263157895, + "low_lr": 9.88421052631579e-06, + "step": 961 + }, + { + "epoch": 2.527284681130835, + "high_lr": 0.0004942105263157895, + "low_lr": 9.88421052631579e-06, + "step": 961 + }, + { + "epoch": 2.52991452991453, + "grad_norm": 1.165604591369629, + "learning_rate": 0.0004936842105263158, + "loss": 1.367, + "step": 962 + }, + { + "epoch": 2.52991452991453, + "high_lr": 0.0004936842105263158, + "low_lr": 9.873684210526317e-06, + "step": 962 + }, + { + "epoch": 2.52991452991453, + "high_lr": 0.0004936842105263158, + "low_lr": 9.873684210526317e-06, + "step": 962 + }, + { + "epoch": 2.52991452991453, + "high_lr": 0.0004936842105263158, + "low_lr": 9.873684210526317e-06, + "step": 962 + }, + { + "epoch": 2.52991452991453, + "high_lr": 0.0004936842105263158, + "low_lr": 9.873684210526317e-06, + "step": 962 + }, + { + "epoch": 2.52991452991453, + "high_lr": 0.0004936842105263158, + "low_lr": 9.873684210526317e-06, + "step": 962 + }, + { + "epoch": 2.52991452991453, + "high_lr": 0.0004936842105263158, + "low_lr": 9.873684210526317e-06, + "step": 962 + }, + { + "epoch": 2.52991452991453, + "high_lr": 0.0004936842105263158, + "low_lr": 9.873684210526317e-06, + "step": 962 + }, + { + "epoch": 2.52991452991453, + "high_lr": 0.0004936842105263158, + "low_lr": 9.873684210526317e-06, + "step": 962 + }, + { + "epoch": 2.532544378698225, + "grad_norm": 1.3213335275650024, + "learning_rate": 0.0004931578947368422, + "loss": 1.4425, + "step": 963 + }, + { + "epoch": 2.532544378698225, + "high_lr": 0.0004931578947368422, + "low_lr": 9.863157894736843e-06, + "step": 963 + }, + { + "epoch": 2.532544378698225, + "high_lr": 0.0004931578947368422, + "low_lr": 9.863157894736843e-06, + "step": 963 + }, + { + "epoch": 2.532544378698225, + "high_lr": 0.0004931578947368422, + "low_lr": 9.863157894736843e-06, + "step": 963 + }, + { + "epoch": 2.532544378698225, + "high_lr": 0.0004931578947368422, + "low_lr": 9.863157894736843e-06, + "step": 963 + }, + { + "epoch": 2.532544378698225, + "high_lr": 0.0004931578947368422, + "low_lr": 9.863157894736843e-06, + "step": 963 + }, + { + "epoch": 2.532544378698225, + "high_lr": 0.0004931578947368422, + "low_lr": 9.863157894736843e-06, + "step": 963 + }, + { + "epoch": 2.532544378698225, + "high_lr": 0.0004931578947368422, + "low_lr": 9.863157894736843e-06, + "step": 963 + }, + { + "epoch": 2.532544378698225, + "high_lr": 0.0004931578947368422, + "low_lr": 9.863157894736843e-06, + "step": 963 + }, + { + "epoch": 2.53517422748192, + "grad_norm": 1.2542871236801147, + "learning_rate": 0.0004926315789473684, + "loss": 1.4138, + "step": 964 + }, + { + "epoch": 2.53517422748192, + "high_lr": 0.0004926315789473684, + "low_lr": 9.85263157894737e-06, + "step": 964 + }, + { + "epoch": 2.53517422748192, + "high_lr": 0.0004926315789473684, + "low_lr": 9.85263157894737e-06, + "step": 964 + }, + { + "epoch": 2.53517422748192, + "high_lr": 0.0004926315789473684, + "low_lr": 9.85263157894737e-06, + "step": 964 + }, + { + "epoch": 2.53517422748192, + "high_lr": 0.0004926315789473684, + "low_lr": 9.85263157894737e-06, + "step": 964 + }, + { + "epoch": 2.53517422748192, + "high_lr": 0.0004926315789473684, + "low_lr": 9.85263157894737e-06, + "step": 964 + }, + { + "epoch": 2.53517422748192, + "high_lr": 0.0004926315789473684, + "low_lr": 9.85263157894737e-06, + "step": 964 + }, + { + "epoch": 2.53517422748192, + "high_lr": 0.0004926315789473684, + "low_lr": 9.85263157894737e-06, + "step": 964 + }, + { + "epoch": 2.53517422748192, + "high_lr": 0.0004926315789473684, + "low_lr": 9.85263157894737e-06, + "step": 964 + }, + { + "epoch": 2.5378040762656147, + "grad_norm": 1.2876955270767212, + "learning_rate": 0.0004921052631578947, + "loss": 1.415, + "step": 965 + }, + { + "epoch": 2.5378040762656147, + "high_lr": 0.0004921052631578947, + "low_lr": 9.842105263157896e-06, + "step": 965 + }, + { + "epoch": 2.5378040762656147, + "high_lr": 0.0004921052631578947, + "low_lr": 9.842105263157896e-06, + "step": 965 + }, + { + "epoch": 2.5378040762656147, + "high_lr": 0.0004921052631578947, + "low_lr": 9.842105263157896e-06, + "step": 965 + }, + { + "epoch": 2.5378040762656147, + "high_lr": 0.0004921052631578947, + "low_lr": 9.842105263157896e-06, + "step": 965 + }, + { + "epoch": 2.5378040762656147, + "high_lr": 0.0004921052631578947, + "low_lr": 9.842105263157896e-06, + "step": 965 + }, + { + "epoch": 2.5378040762656147, + "high_lr": 0.0004921052631578947, + "low_lr": 9.842105263157896e-06, + "step": 965 + }, + { + "epoch": 2.5378040762656147, + "high_lr": 0.0004921052631578947, + "low_lr": 9.842105263157896e-06, + "step": 965 + }, + { + "epoch": 2.5378040762656147, + "high_lr": 0.0004921052631578947, + "low_lr": 9.842105263157896e-06, + "step": 965 + }, + { + "epoch": 2.5404339250493098, + "grad_norm": 1.2044575214385986, + "learning_rate": 0.000491578947368421, + "loss": 1.3757, + "step": 966 + }, + { + "epoch": 2.5404339250493098, + "high_lr": 0.000491578947368421, + "low_lr": 9.831578947368422e-06, + "step": 966 + }, + { + "epoch": 2.5404339250493098, + "high_lr": 0.000491578947368421, + "low_lr": 9.831578947368422e-06, + "step": 966 + }, + { + "epoch": 2.5404339250493098, + "high_lr": 0.000491578947368421, + "low_lr": 9.831578947368422e-06, + "step": 966 + }, + { + "epoch": 2.5404339250493098, + "high_lr": 0.000491578947368421, + "low_lr": 9.831578947368422e-06, + "step": 966 + }, + { + "epoch": 2.5404339250493098, + "high_lr": 0.000491578947368421, + "low_lr": 9.831578947368422e-06, + "step": 966 + }, + { + "epoch": 2.5404339250493098, + "high_lr": 0.000491578947368421, + "low_lr": 9.831578947368422e-06, + "step": 966 + }, + { + "epoch": 2.5404339250493098, + "high_lr": 0.000491578947368421, + "low_lr": 9.831578947368422e-06, + "step": 966 + }, + { + "epoch": 2.5404339250493098, + "high_lr": 0.000491578947368421, + "low_lr": 9.831578947368422e-06, + "step": 966 + }, + { + "epoch": 2.543063773833005, + "grad_norm": 2.4558346271514893, + "learning_rate": 0.0004910526315789474, + "loss": 1.4181, + "step": 967 + }, + { + "epoch": 2.543063773833005, + "high_lr": 0.0004910526315789474, + "low_lr": 9.821052631578948e-06, + "step": 967 + }, + { + "epoch": 2.543063773833005, + "high_lr": 0.0004910526315789474, + "low_lr": 9.821052631578948e-06, + "step": 967 + }, + { + "epoch": 2.543063773833005, + "high_lr": 0.0004910526315789474, + "low_lr": 9.821052631578948e-06, + "step": 967 + }, + { + "epoch": 2.543063773833005, + "high_lr": 0.0004910526315789474, + "low_lr": 9.821052631578948e-06, + "step": 967 + }, + { + "epoch": 2.543063773833005, + "high_lr": 0.0004910526315789474, + "low_lr": 9.821052631578948e-06, + "step": 967 + }, + { + "epoch": 2.543063773833005, + "high_lr": 0.0004910526315789474, + "low_lr": 9.821052631578948e-06, + "step": 967 + }, + { + "epoch": 2.543063773833005, + "high_lr": 0.0004910526315789474, + "low_lr": 9.821052631578948e-06, + "step": 967 + }, + { + "epoch": 2.543063773833005, + "high_lr": 0.0004910526315789474, + "low_lr": 9.821052631578948e-06, + "step": 967 + }, + { + "epoch": 2.5456936226166995, + "grad_norm": 1.3899211883544922, + "learning_rate": 0.0004905263157894737, + "loss": 1.4303, + "step": 968 + }, + { + "epoch": 2.5456936226166995, + "high_lr": 0.0004905263157894737, + "low_lr": 9.810526315789475e-06, + "step": 968 + }, + { + "epoch": 2.5456936226166995, + "high_lr": 0.0004905263157894737, + "low_lr": 9.810526315789475e-06, + "step": 968 + }, + { + "epoch": 2.5456936226166995, + "high_lr": 0.0004905263157894737, + "low_lr": 9.810526315789475e-06, + "step": 968 + }, + { + "epoch": 2.5456936226166995, + "high_lr": 0.0004905263157894737, + "low_lr": 9.810526315789475e-06, + "step": 968 + }, + { + "epoch": 2.5456936226166995, + "high_lr": 0.0004905263157894737, + "low_lr": 9.810526315789475e-06, + "step": 968 + }, + { + "epoch": 2.5456936226166995, + "high_lr": 0.0004905263157894737, + "low_lr": 9.810526315789475e-06, + "step": 968 + }, + { + "epoch": 2.5456936226166995, + "high_lr": 0.0004905263157894737, + "low_lr": 9.810526315789475e-06, + "step": 968 + }, + { + "epoch": 2.5456936226166995, + "high_lr": 0.0004905263157894737, + "low_lr": 9.810526315789475e-06, + "step": 968 + }, + { + "epoch": 2.5483234714003946, + "grad_norm": 1.3217566013336182, + "learning_rate": 0.00049, + "loss": 1.3862, + "step": 969 + }, + { + "epoch": 2.5483234714003946, + "high_lr": 0.00049, + "low_lr": 9.800000000000001e-06, + "step": 969 + }, + { + "epoch": 2.5483234714003946, + "high_lr": 0.00049, + "low_lr": 9.800000000000001e-06, + "step": 969 + }, + { + "epoch": 2.5483234714003946, + "high_lr": 0.00049, + "low_lr": 9.800000000000001e-06, + "step": 969 + }, + { + "epoch": 2.5483234714003946, + "high_lr": 0.00049, + "low_lr": 9.800000000000001e-06, + "step": 969 + }, + { + "epoch": 2.5483234714003946, + "high_lr": 0.00049, + "low_lr": 9.800000000000001e-06, + "step": 969 + }, + { + "epoch": 2.5483234714003946, + "high_lr": 0.00049, + "low_lr": 9.800000000000001e-06, + "step": 969 + }, + { + "epoch": 2.5483234714003946, + "high_lr": 0.00049, + "low_lr": 9.800000000000001e-06, + "step": 969 + }, + { + "epoch": 2.5483234714003946, + "high_lr": 0.00049, + "low_lr": 9.800000000000001e-06, + "step": 969 + }, + { + "epoch": 2.5509533201840893, + "grad_norm": 1.2447056770324707, + "learning_rate": 0.0004894736842105264, + "loss": 1.3551, + "step": 970 + }, + { + "epoch": 2.5509533201840893, + "high_lr": 0.0004894736842105264, + "low_lr": 9.789473684210527e-06, + "step": 970 + }, + { + "epoch": 2.5509533201840893, + "high_lr": 0.0004894736842105264, + "low_lr": 9.789473684210527e-06, + "step": 970 + }, + { + "epoch": 2.5509533201840893, + "high_lr": 0.0004894736842105264, + "low_lr": 9.789473684210527e-06, + "step": 970 + }, + { + "epoch": 2.5509533201840893, + "high_lr": 0.0004894736842105264, + "low_lr": 9.789473684210527e-06, + "step": 970 + }, + { + "epoch": 2.5509533201840893, + "high_lr": 0.0004894736842105264, + "low_lr": 9.789473684210527e-06, + "step": 970 + }, + { + "epoch": 2.5509533201840893, + "high_lr": 0.0004894736842105264, + "low_lr": 9.789473684210527e-06, + "step": 970 + }, + { + "epoch": 2.5509533201840893, + "high_lr": 0.0004894736842105264, + "low_lr": 9.789473684210527e-06, + "step": 970 + }, + { + "epoch": 2.5509533201840893, + "high_lr": 0.0004894736842105264, + "low_lr": 9.789473684210527e-06, + "step": 970 + }, + { + "epoch": 2.5535831689677844, + "grad_norm": 1.3785122632980347, + "learning_rate": 0.0004889473684210527, + "loss": 1.4053, + "step": 971 + }, + { + "epoch": 2.5535831689677844, + "high_lr": 0.0004889473684210527, + "low_lr": 9.778947368421054e-06, + "step": 971 + }, + { + "epoch": 2.5535831689677844, + "high_lr": 0.0004889473684210527, + "low_lr": 9.778947368421054e-06, + "step": 971 + }, + { + "epoch": 2.5535831689677844, + "high_lr": 0.0004889473684210527, + "low_lr": 9.778947368421054e-06, + "step": 971 + }, + { + "epoch": 2.5535831689677844, + "high_lr": 0.0004889473684210527, + "low_lr": 9.778947368421054e-06, + "step": 971 + }, + { + "epoch": 2.5535831689677844, + "high_lr": 0.0004889473684210527, + "low_lr": 9.778947368421054e-06, + "step": 971 + }, + { + "epoch": 2.5535831689677844, + "high_lr": 0.0004889473684210527, + "low_lr": 9.778947368421054e-06, + "step": 971 + }, + { + "epoch": 2.5535831689677844, + "high_lr": 0.0004889473684210527, + "low_lr": 9.778947368421054e-06, + "step": 971 + }, + { + "epoch": 2.5535831689677844, + "high_lr": 0.0004889473684210527, + "low_lr": 9.778947368421054e-06, + "step": 971 + }, + { + "epoch": 2.556213017751479, + "grad_norm": 1.2596253156661987, + "learning_rate": 0.000488421052631579, + "loss": 1.4237, + "step": 972 + }, + { + "epoch": 2.556213017751479, + "high_lr": 0.000488421052631579, + "low_lr": 9.76842105263158e-06, + "step": 972 + }, + { + "epoch": 2.556213017751479, + "high_lr": 0.000488421052631579, + "low_lr": 9.76842105263158e-06, + "step": 972 + }, + { + "epoch": 2.556213017751479, + "high_lr": 0.000488421052631579, + "low_lr": 9.76842105263158e-06, + "step": 972 + }, + { + "epoch": 2.556213017751479, + "high_lr": 0.000488421052631579, + "low_lr": 9.76842105263158e-06, + "step": 972 + }, + { + "epoch": 2.556213017751479, + "high_lr": 0.000488421052631579, + "low_lr": 9.76842105263158e-06, + "step": 972 + }, + { + "epoch": 2.556213017751479, + "high_lr": 0.000488421052631579, + "low_lr": 9.76842105263158e-06, + "step": 972 + }, + { + "epoch": 2.556213017751479, + "high_lr": 0.000488421052631579, + "low_lr": 9.76842105263158e-06, + "step": 972 + }, + { + "epoch": 2.556213017751479, + "high_lr": 0.000488421052631579, + "low_lr": 9.76842105263158e-06, + "step": 972 + }, + { + "epoch": 2.558842866535174, + "grad_norm": 1.2338931560516357, + "learning_rate": 0.0004878947368421053, + "loss": 1.3836, + "step": 973 + }, + { + "epoch": 2.558842866535174, + "high_lr": 0.0004878947368421053, + "low_lr": 9.757894736842106e-06, + "step": 973 + }, + { + "epoch": 2.558842866535174, + "high_lr": 0.0004878947368421053, + "low_lr": 9.757894736842106e-06, + "step": 973 + }, + { + "epoch": 2.558842866535174, + "high_lr": 0.0004878947368421053, + "low_lr": 9.757894736842106e-06, + "step": 973 + }, + { + "epoch": 2.558842866535174, + "high_lr": 0.0004878947368421053, + "low_lr": 9.757894736842106e-06, + "step": 973 + }, + { + "epoch": 2.558842866535174, + "high_lr": 0.0004878947368421053, + "low_lr": 9.757894736842106e-06, + "step": 973 + }, + { + "epoch": 2.558842866535174, + "high_lr": 0.0004878947368421053, + "low_lr": 9.757894736842106e-06, + "step": 973 + }, + { + "epoch": 2.558842866535174, + "high_lr": 0.0004878947368421053, + "low_lr": 9.757894736842106e-06, + "step": 973 + }, + { + "epoch": 2.558842866535174, + "high_lr": 0.0004878947368421053, + "low_lr": 9.757894736842106e-06, + "step": 973 + }, + { + "epoch": 2.561472715318869, + "grad_norm": 1.2043802738189697, + "learning_rate": 0.0004873684210526316, + "loss": 1.3826, + "step": 974 + }, + { + "epoch": 2.561472715318869, + "high_lr": 0.0004873684210526316, + "low_lr": 9.747368421052633e-06, + "step": 974 + }, + { + "epoch": 2.561472715318869, + "high_lr": 0.0004873684210526316, + "low_lr": 9.747368421052633e-06, + "step": 974 + }, + { + "epoch": 2.561472715318869, + "high_lr": 0.0004873684210526316, + "low_lr": 9.747368421052633e-06, + "step": 974 + }, + { + "epoch": 2.561472715318869, + "high_lr": 0.0004873684210526316, + "low_lr": 9.747368421052633e-06, + "step": 974 + }, + { + "epoch": 2.561472715318869, + "high_lr": 0.0004873684210526316, + "low_lr": 9.747368421052633e-06, + "step": 974 + }, + { + "epoch": 2.561472715318869, + "high_lr": 0.0004873684210526316, + "low_lr": 9.747368421052633e-06, + "step": 974 + }, + { + "epoch": 2.561472715318869, + "high_lr": 0.0004873684210526316, + "low_lr": 9.747368421052633e-06, + "step": 974 + }, + { + "epoch": 2.561472715318869, + "high_lr": 0.0004873684210526316, + "low_lr": 9.747368421052633e-06, + "step": 974 + }, + { + "epoch": 2.564102564102564, + "grad_norm": 1.2998297214508057, + "learning_rate": 0.0004868421052631579, + "loss": 1.3963, + "step": 975 + }, + { + "epoch": 2.564102564102564, + "high_lr": 0.0004868421052631579, + "low_lr": 9.736842105263159e-06, + "step": 975 + }, + { + "epoch": 2.564102564102564, + "high_lr": 0.0004868421052631579, + "low_lr": 9.736842105263159e-06, + "step": 975 + }, + { + "epoch": 2.564102564102564, + "high_lr": 0.0004868421052631579, + "low_lr": 9.736842105263159e-06, + "step": 975 + }, + { + "epoch": 2.564102564102564, + "high_lr": 0.0004868421052631579, + "low_lr": 9.736842105263159e-06, + "step": 975 + }, + { + "epoch": 2.564102564102564, + "high_lr": 0.0004868421052631579, + "low_lr": 9.736842105263159e-06, + "step": 975 + }, + { + "epoch": 2.564102564102564, + "high_lr": 0.0004868421052631579, + "low_lr": 9.736842105263159e-06, + "step": 975 + }, + { + "epoch": 2.564102564102564, + "high_lr": 0.0004868421052631579, + "low_lr": 9.736842105263159e-06, + "step": 975 + }, + { + "epoch": 2.564102564102564, + "high_lr": 0.0004868421052631579, + "low_lr": 9.736842105263159e-06, + "step": 975 + }, + { + "epoch": 2.566732412886259, + "grad_norm": 1.3420791625976562, + "learning_rate": 0.0004863157894736842, + "loss": 1.4045, + "step": 976 + }, + { + "epoch": 2.566732412886259, + "high_lr": 0.0004863157894736842, + "low_lr": 9.726315789473685e-06, + "step": 976 + }, + { + "epoch": 2.566732412886259, + "high_lr": 0.0004863157894736842, + "low_lr": 9.726315789473685e-06, + "step": 976 + }, + { + "epoch": 2.566732412886259, + "high_lr": 0.0004863157894736842, + "low_lr": 9.726315789473685e-06, + "step": 976 + }, + { + "epoch": 2.566732412886259, + "high_lr": 0.0004863157894736842, + "low_lr": 9.726315789473685e-06, + "step": 976 + }, + { + "epoch": 2.566732412886259, + "high_lr": 0.0004863157894736842, + "low_lr": 9.726315789473685e-06, + "step": 976 + }, + { + "epoch": 2.566732412886259, + "high_lr": 0.0004863157894736842, + "low_lr": 9.726315789473685e-06, + "step": 976 + }, + { + "epoch": 2.566732412886259, + "high_lr": 0.0004863157894736842, + "low_lr": 9.726315789473685e-06, + "step": 976 + }, + { + "epoch": 2.566732412886259, + "high_lr": 0.0004863157894736842, + "low_lr": 9.726315789473685e-06, + "step": 976 + }, + { + "epoch": 2.569362261669954, + "grad_norm": 1.2570667266845703, + "learning_rate": 0.00048578947368421054, + "loss": 1.3937, + "step": 977 + }, + { + "epoch": 2.569362261669954, + "high_lr": 0.00048578947368421054, + "low_lr": 9.715789473684212e-06, + "step": 977 + }, + { + "epoch": 2.569362261669954, + "high_lr": 0.00048578947368421054, + "low_lr": 9.715789473684212e-06, + "step": 977 + }, + { + "epoch": 2.569362261669954, + "high_lr": 0.00048578947368421054, + "low_lr": 9.715789473684212e-06, + "step": 977 + }, + { + "epoch": 2.569362261669954, + "high_lr": 0.00048578947368421054, + "low_lr": 9.715789473684212e-06, + "step": 977 + }, + { + "epoch": 2.569362261669954, + "high_lr": 0.00048578947368421054, + "low_lr": 9.715789473684212e-06, + "step": 977 + }, + { + "epoch": 2.569362261669954, + "high_lr": 0.00048578947368421054, + "low_lr": 9.715789473684212e-06, + "step": 977 + }, + { + "epoch": 2.569362261669954, + "high_lr": 0.00048578947368421054, + "low_lr": 9.715789473684212e-06, + "step": 977 + }, + { + "epoch": 2.569362261669954, + "high_lr": 0.00048578947368421054, + "low_lr": 9.715789473684212e-06, + "step": 977 + }, + { + "epoch": 2.5719921104536487, + "grad_norm": 1.1883705854415894, + "learning_rate": 0.00048526315789473683, + "loss": 1.3889, + "step": 978 + }, + { + "epoch": 2.5719921104536487, + "high_lr": 0.00048526315789473683, + "low_lr": 9.705263157894738e-06, + "step": 978 + }, + { + "epoch": 2.5719921104536487, + "high_lr": 0.00048526315789473683, + "low_lr": 9.705263157894738e-06, + "step": 978 + }, + { + "epoch": 2.5719921104536487, + "high_lr": 0.00048526315789473683, + "low_lr": 9.705263157894738e-06, + "step": 978 + }, + { + "epoch": 2.5719921104536487, + "high_lr": 0.00048526315789473683, + "low_lr": 9.705263157894738e-06, + "step": 978 + }, + { + "epoch": 2.5719921104536487, + "high_lr": 0.00048526315789473683, + "low_lr": 9.705263157894738e-06, + "step": 978 + }, + { + "epoch": 2.5719921104536487, + "high_lr": 0.00048526315789473683, + "low_lr": 9.705263157894738e-06, + "step": 978 + }, + { + "epoch": 2.5719921104536487, + "high_lr": 0.00048526315789473683, + "low_lr": 9.705263157894738e-06, + "step": 978 + }, + { + "epoch": 2.5719921104536487, + "high_lr": 0.00048526315789473683, + "low_lr": 9.705263157894738e-06, + "step": 978 + }, + { + "epoch": 2.574621959237344, + "grad_norm": 1.3627675771713257, + "learning_rate": 0.00048473684210526317, + "loss": 1.3776, + "step": 979 + }, + { + "epoch": 2.574621959237344, + "high_lr": 0.00048473684210526317, + "low_lr": 9.694736842105263e-06, + "step": 979 + }, + { + "epoch": 2.574621959237344, + "high_lr": 0.00048473684210526317, + "low_lr": 9.694736842105263e-06, + "step": 979 + }, + { + "epoch": 2.574621959237344, + "high_lr": 0.00048473684210526317, + "low_lr": 9.694736842105263e-06, + "step": 979 + }, + { + "epoch": 2.574621959237344, + "high_lr": 0.00048473684210526317, + "low_lr": 9.694736842105263e-06, + "step": 979 + }, + { + "epoch": 2.574621959237344, + "high_lr": 0.00048473684210526317, + "low_lr": 9.694736842105263e-06, + "step": 979 + }, + { + "epoch": 2.574621959237344, + "high_lr": 0.00048473684210526317, + "low_lr": 9.694736842105263e-06, + "step": 979 + }, + { + "epoch": 2.574621959237344, + "high_lr": 0.00048473684210526317, + "low_lr": 9.694736842105263e-06, + "step": 979 + }, + { + "epoch": 2.574621959237344, + "high_lr": 0.00048473684210526317, + "low_lr": 9.694736842105263e-06, + "step": 979 + }, + { + "epoch": 2.577251808021039, + "grad_norm": 1.2854207754135132, + "learning_rate": 0.0004842105263157895, + "loss": 1.4233, + "step": 980 + }, + { + "epoch": 2.577251808021039, + "high_lr": 0.0004842105263157895, + "low_lr": 9.68421052631579e-06, + "step": 980 + }, + { + "epoch": 2.577251808021039, + "high_lr": 0.0004842105263157895, + "low_lr": 9.68421052631579e-06, + "step": 980 + }, + { + "epoch": 2.577251808021039, + "high_lr": 0.0004842105263157895, + "low_lr": 9.68421052631579e-06, + "step": 980 + }, + { + "epoch": 2.577251808021039, + "high_lr": 0.0004842105263157895, + "low_lr": 9.68421052631579e-06, + "step": 980 + }, + { + "epoch": 2.577251808021039, + "high_lr": 0.0004842105263157895, + "low_lr": 9.68421052631579e-06, + "step": 980 + }, + { + "epoch": 2.577251808021039, + "high_lr": 0.0004842105263157895, + "low_lr": 9.68421052631579e-06, + "step": 980 + }, + { + "epoch": 2.577251808021039, + "high_lr": 0.0004842105263157895, + "low_lr": 9.68421052631579e-06, + "step": 980 + }, + { + "epoch": 2.577251808021039, + "high_lr": 0.0004842105263157895, + "low_lr": 9.68421052631579e-06, + "step": 980 + }, + { + "epoch": 2.5798816568047336, + "grad_norm": 1.1889302730560303, + "learning_rate": 0.0004836842105263158, + "loss": 1.3547, + "step": 981 + }, + { + "epoch": 2.5798816568047336, + "high_lr": 0.0004836842105263158, + "low_lr": 9.673684210526317e-06, + "step": 981 + }, + { + "epoch": 2.5798816568047336, + "high_lr": 0.0004836842105263158, + "low_lr": 9.673684210526317e-06, + "step": 981 + }, + { + "epoch": 2.5798816568047336, + "high_lr": 0.0004836842105263158, + "low_lr": 9.673684210526317e-06, + "step": 981 + }, + { + "epoch": 2.5798816568047336, + "high_lr": 0.0004836842105263158, + "low_lr": 9.673684210526317e-06, + "step": 981 + }, + { + "epoch": 2.5798816568047336, + "high_lr": 0.0004836842105263158, + "low_lr": 9.673684210526317e-06, + "step": 981 + }, + { + "epoch": 2.5798816568047336, + "high_lr": 0.0004836842105263158, + "low_lr": 9.673684210526317e-06, + "step": 981 + }, + { + "epoch": 2.5798816568047336, + "high_lr": 0.0004836842105263158, + "low_lr": 9.673684210526317e-06, + "step": 981 + }, + { + "epoch": 2.5798816568047336, + "high_lr": 0.0004836842105263158, + "low_lr": 9.673684210526317e-06, + "step": 981 + }, + { + "epoch": 2.5825115055884287, + "grad_norm": 1.2391724586486816, + "learning_rate": 0.00048315789473684213, + "loss": 1.3968, + "step": 982 + }, + { + "epoch": 2.5825115055884287, + "high_lr": 0.00048315789473684213, + "low_lr": 9.663157894736843e-06, + "step": 982 + }, + { + "epoch": 2.5825115055884287, + "high_lr": 0.00048315789473684213, + "low_lr": 9.663157894736843e-06, + "step": 982 + }, + { + "epoch": 2.5825115055884287, + "high_lr": 0.00048315789473684213, + "low_lr": 9.663157894736843e-06, + "step": 982 + }, + { + "epoch": 2.5825115055884287, + "high_lr": 0.00048315789473684213, + "low_lr": 9.663157894736843e-06, + "step": 982 + }, + { + "epoch": 2.5825115055884287, + "high_lr": 0.00048315789473684213, + "low_lr": 9.663157894736843e-06, + "step": 982 + }, + { + "epoch": 2.5825115055884287, + "high_lr": 0.00048315789473684213, + "low_lr": 9.663157894736843e-06, + "step": 982 + }, + { + "epoch": 2.5825115055884287, + "high_lr": 0.00048315789473684213, + "low_lr": 9.663157894736843e-06, + "step": 982 + }, + { + "epoch": 2.5825115055884287, + "high_lr": 0.00048315789473684213, + "low_lr": 9.663157894736843e-06, + "step": 982 + }, + { + "epoch": 2.585141354372124, + "grad_norm": 1.2237834930419922, + "learning_rate": 0.0004826315789473684, + "loss": 1.3391, + "step": 983 + }, + { + "epoch": 2.585141354372124, + "high_lr": 0.0004826315789473684, + "low_lr": 9.65263157894737e-06, + "step": 983 + }, + { + "epoch": 2.585141354372124, + "high_lr": 0.0004826315789473684, + "low_lr": 9.65263157894737e-06, + "step": 983 + }, + { + "epoch": 2.585141354372124, + "high_lr": 0.0004826315789473684, + "low_lr": 9.65263157894737e-06, + "step": 983 + }, + { + "epoch": 2.585141354372124, + "high_lr": 0.0004826315789473684, + "low_lr": 9.65263157894737e-06, + "step": 983 + }, + { + "epoch": 2.585141354372124, + "high_lr": 0.0004826315789473684, + "low_lr": 9.65263157894737e-06, + "step": 983 + }, + { + "epoch": 2.585141354372124, + "high_lr": 0.0004826315789473684, + "low_lr": 9.65263157894737e-06, + "step": 983 + }, + { + "epoch": 2.585141354372124, + "high_lr": 0.0004826315789473684, + "low_lr": 9.65263157894737e-06, + "step": 983 + }, + { + "epoch": 2.585141354372124, + "high_lr": 0.0004826315789473684, + "low_lr": 9.65263157894737e-06, + "step": 983 + }, + { + "epoch": 2.5877712031558184, + "grad_norm": 1.2656444311141968, + "learning_rate": 0.00048210526315789476, + "loss": 1.3969, + "step": 984 + }, + { + "epoch": 2.5877712031558184, + "high_lr": 0.00048210526315789476, + "low_lr": 9.642105263157896e-06, + "step": 984 + }, + { + "epoch": 2.5877712031558184, + "high_lr": 0.00048210526315789476, + "low_lr": 9.642105263157896e-06, + "step": 984 + }, + { + "epoch": 2.5877712031558184, + "high_lr": 0.00048210526315789476, + "low_lr": 9.642105263157896e-06, + "step": 984 + }, + { + "epoch": 2.5877712031558184, + "high_lr": 0.00048210526315789476, + "low_lr": 9.642105263157896e-06, + "step": 984 + }, + { + "epoch": 2.5877712031558184, + "high_lr": 0.00048210526315789476, + "low_lr": 9.642105263157896e-06, + "step": 984 + }, + { + "epoch": 2.5877712031558184, + "high_lr": 0.00048210526315789476, + "low_lr": 9.642105263157896e-06, + "step": 984 + }, + { + "epoch": 2.5877712031558184, + "high_lr": 0.00048210526315789476, + "low_lr": 9.642105263157896e-06, + "step": 984 + }, + { + "epoch": 2.5877712031558184, + "high_lr": 0.00048210526315789476, + "low_lr": 9.642105263157896e-06, + "step": 984 + }, + { + "epoch": 2.5904010519395135, + "grad_norm": 1.304051399230957, + "learning_rate": 0.00048157894736842105, + "loss": 1.3388, + "step": 985 + }, + { + "epoch": 2.5904010519395135, + "high_lr": 0.00048157894736842105, + "low_lr": 9.631578947368422e-06, + "step": 985 + }, + { + "epoch": 2.5904010519395135, + "high_lr": 0.00048157894736842105, + "low_lr": 9.631578947368422e-06, + "step": 985 + }, + { + "epoch": 2.5904010519395135, + "high_lr": 0.00048157894736842105, + "low_lr": 9.631578947368422e-06, + "step": 985 + }, + { + "epoch": 2.5904010519395135, + "high_lr": 0.00048157894736842105, + "low_lr": 9.631578947368422e-06, + "step": 985 + }, + { + "epoch": 2.5904010519395135, + "high_lr": 0.00048157894736842105, + "low_lr": 9.631578947368422e-06, + "step": 985 + }, + { + "epoch": 2.5904010519395135, + "high_lr": 0.00048157894736842105, + "low_lr": 9.631578947368422e-06, + "step": 985 + }, + { + "epoch": 2.5904010519395135, + "high_lr": 0.00048157894736842105, + "low_lr": 9.631578947368422e-06, + "step": 985 + }, + { + "epoch": 2.5904010519395135, + "high_lr": 0.00048157894736842105, + "low_lr": 9.631578947368422e-06, + "step": 985 + }, + { + "epoch": 2.5930309007232086, + "grad_norm": 1.3260159492492676, + "learning_rate": 0.00048105263157894733, + "loss": 1.3607, + "step": 986 + }, + { + "epoch": 2.5930309007232086, + "high_lr": 0.00048105263157894733, + "low_lr": 9.621052631578947e-06, + "step": 986 + }, + { + "epoch": 2.5930309007232086, + "high_lr": 0.00048105263157894733, + "low_lr": 9.621052631578947e-06, + "step": 986 + }, + { + "epoch": 2.5930309007232086, + "high_lr": 0.00048105263157894733, + "low_lr": 9.621052631578947e-06, + "step": 986 + }, + { + "epoch": 2.5930309007232086, + "high_lr": 0.00048105263157894733, + "low_lr": 9.621052631578947e-06, + "step": 986 + }, + { + "epoch": 2.5930309007232086, + "high_lr": 0.00048105263157894733, + "low_lr": 9.621052631578947e-06, + "step": 986 + }, + { + "epoch": 2.5930309007232086, + "high_lr": 0.00048105263157894733, + "low_lr": 9.621052631578947e-06, + "step": 986 + }, + { + "epoch": 2.5930309007232086, + "high_lr": 0.00048105263157894733, + "low_lr": 9.621052631578947e-06, + "step": 986 + }, + { + "epoch": 2.5930309007232086, + "high_lr": 0.00048105263157894733, + "low_lr": 9.621052631578947e-06, + "step": 986 + }, + { + "epoch": 2.5956607495069033, + "grad_norm": 1.2157127857208252, + "learning_rate": 0.0004805263157894737, + "loss": 1.3737, + "step": 987 + }, + { + "epoch": 2.5956607495069033, + "high_lr": 0.0004805263157894737, + "low_lr": 9.610526315789475e-06, + "step": 987 + }, + { + "epoch": 2.5956607495069033, + "high_lr": 0.0004805263157894737, + "low_lr": 9.610526315789475e-06, + "step": 987 + }, + { + "epoch": 2.5956607495069033, + "high_lr": 0.0004805263157894737, + "low_lr": 9.610526315789475e-06, + "step": 987 + }, + { + "epoch": 2.5956607495069033, + "high_lr": 0.0004805263157894737, + "low_lr": 9.610526315789475e-06, + "step": 987 + }, + { + "epoch": 2.5956607495069033, + "high_lr": 0.0004805263157894737, + "low_lr": 9.610526315789475e-06, + "step": 987 + }, + { + "epoch": 2.5956607495069033, + "high_lr": 0.0004805263157894737, + "low_lr": 9.610526315789475e-06, + "step": 987 + }, + { + "epoch": 2.5956607495069033, + "high_lr": 0.0004805263157894737, + "low_lr": 9.610526315789475e-06, + "step": 987 + }, + { + "epoch": 2.5956607495069033, + "high_lr": 0.0004805263157894737, + "low_lr": 9.610526315789475e-06, + "step": 987 + }, + { + "epoch": 2.5982905982905984, + "grad_norm": 1.289671778678894, + "learning_rate": 0.00048, + "loss": 1.3784, + "step": 988 + }, + { + "epoch": 2.5982905982905984, + "high_lr": 0.00048, + "low_lr": 9.600000000000001e-06, + "step": 988 + }, + { + "epoch": 2.5982905982905984, + "high_lr": 0.00048, + "low_lr": 9.600000000000001e-06, + "step": 988 + }, + { + "epoch": 2.5982905982905984, + "high_lr": 0.00048, + "low_lr": 9.600000000000001e-06, + "step": 988 + }, + { + "epoch": 2.5982905982905984, + "high_lr": 0.00048, + "low_lr": 9.600000000000001e-06, + "step": 988 + }, + { + "epoch": 2.5982905982905984, + "high_lr": 0.00048, + "low_lr": 9.600000000000001e-06, + "step": 988 + }, + { + "epoch": 2.5982905982905984, + "high_lr": 0.00048, + "low_lr": 9.600000000000001e-06, + "step": 988 + }, + { + "epoch": 2.5982905982905984, + "high_lr": 0.00048, + "low_lr": 9.600000000000001e-06, + "step": 988 + }, + { + "epoch": 2.5982905982905984, + "high_lr": 0.00048, + "low_lr": 9.600000000000001e-06, + "step": 988 + }, + { + "epoch": 2.6009204470742935, + "grad_norm": 1.2512733936309814, + "learning_rate": 0.00047947368421052635, + "loss": 1.4151, + "step": 989 + }, + { + "epoch": 2.6009204470742935, + "high_lr": 0.00047947368421052635, + "low_lr": 9.589473684210528e-06, + "step": 989 + }, + { + "epoch": 2.6009204470742935, + "high_lr": 0.00047947368421052635, + "low_lr": 9.589473684210528e-06, + "step": 989 + }, + { + "epoch": 2.6009204470742935, + "high_lr": 0.00047947368421052635, + "low_lr": 9.589473684210528e-06, + "step": 989 + }, + { + "epoch": 2.6009204470742935, + "high_lr": 0.00047947368421052635, + "low_lr": 9.589473684210528e-06, + "step": 989 + }, + { + "epoch": 2.6009204470742935, + "high_lr": 0.00047947368421052635, + "low_lr": 9.589473684210528e-06, + "step": 989 + }, + { + "epoch": 2.6009204470742935, + "high_lr": 0.00047947368421052635, + "low_lr": 9.589473684210528e-06, + "step": 989 + }, + { + "epoch": 2.6009204470742935, + "high_lr": 0.00047947368421052635, + "low_lr": 9.589473684210528e-06, + "step": 989 + }, + { + "epoch": 2.6009204470742935, + "high_lr": 0.00047947368421052635, + "low_lr": 9.589473684210528e-06, + "step": 989 + }, + { + "epoch": 2.603550295857988, + "grad_norm": 1.3273476362228394, + "learning_rate": 0.00047894736842105264, + "loss": 1.3562, + "step": 990 + }, + { + "epoch": 2.603550295857988, + "high_lr": 0.00047894736842105264, + "low_lr": 9.578947368421054e-06, + "step": 990 + }, + { + "epoch": 2.603550295857988, + "high_lr": 0.00047894736842105264, + "low_lr": 9.578947368421054e-06, + "step": 990 + }, + { + "epoch": 2.603550295857988, + "high_lr": 0.00047894736842105264, + "low_lr": 9.578947368421054e-06, + "step": 990 + }, + { + "epoch": 2.603550295857988, + "high_lr": 0.00047894736842105264, + "low_lr": 9.578947368421054e-06, + "step": 990 + }, + { + "epoch": 2.603550295857988, + "high_lr": 0.00047894736842105264, + "low_lr": 9.578947368421054e-06, + "step": 990 + }, + { + "epoch": 2.603550295857988, + "high_lr": 0.00047894736842105264, + "low_lr": 9.578947368421054e-06, + "step": 990 + }, + { + "epoch": 2.603550295857988, + "high_lr": 0.00047894736842105264, + "low_lr": 9.578947368421054e-06, + "step": 990 + }, + { + "epoch": 2.603550295857988, + "high_lr": 0.00047894736842105264, + "low_lr": 9.578947368421054e-06, + "step": 990 + }, + { + "epoch": 2.6061801446416832, + "grad_norm": 1.1679435968399048, + "learning_rate": 0.000478421052631579, + "loss": 1.3634, + "step": 991 + }, + { + "epoch": 2.6061801446416832, + "high_lr": 0.000478421052631579, + "low_lr": 9.56842105263158e-06, + "step": 991 + }, + { + "epoch": 2.6061801446416832, + "high_lr": 0.000478421052631579, + "low_lr": 9.56842105263158e-06, + "step": 991 + }, + { + "epoch": 2.6061801446416832, + "high_lr": 0.000478421052631579, + "low_lr": 9.56842105263158e-06, + "step": 991 + }, + { + "epoch": 2.6061801446416832, + "high_lr": 0.000478421052631579, + "low_lr": 9.56842105263158e-06, + "step": 991 + }, + { + "epoch": 2.6061801446416832, + "high_lr": 0.000478421052631579, + "low_lr": 9.56842105263158e-06, + "step": 991 + }, + { + "epoch": 2.6061801446416832, + "high_lr": 0.000478421052631579, + "low_lr": 9.56842105263158e-06, + "step": 991 + }, + { + "epoch": 2.6061801446416832, + "high_lr": 0.000478421052631579, + "low_lr": 9.56842105263158e-06, + "step": 991 + }, + { + "epoch": 2.6061801446416832, + "high_lr": 0.000478421052631579, + "low_lr": 9.56842105263158e-06, + "step": 991 + }, + { + "epoch": 2.608809993425378, + "grad_norm": 1.233843445777893, + "learning_rate": 0.00047789473684210526, + "loss": 1.3652, + "step": 992 + }, + { + "epoch": 2.608809993425378, + "high_lr": 0.00047789473684210526, + "low_lr": 9.557894736842107e-06, + "step": 992 + }, + { + "epoch": 2.608809993425378, + "high_lr": 0.00047789473684210526, + "low_lr": 9.557894736842107e-06, + "step": 992 + }, + { + "epoch": 2.608809993425378, + "high_lr": 0.00047789473684210526, + "low_lr": 9.557894736842107e-06, + "step": 992 + }, + { + "epoch": 2.608809993425378, + "high_lr": 0.00047789473684210526, + "low_lr": 9.557894736842107e-06, + "step": 992 + }, + { + "epoch": 2.608809993425378, + "high_lr": 0.00047789473684210526, + "low_lr": 9.557894736842107e-06, + "step": 992 + }, + { + "epoch": 2.608809993425378, + "high_lr": 0.00047789473684210526, + "low_lr": 9.557894736842107e-06, + "step": 992 + }, + { + "epoch": 2.608809993425378, + "high_lr": 0.00047789473684210526, + "low_lr": 9.557894736842107e-06, + "step": 992 + }, + { + "epoch": 2.608809993425378, + "high_lr": 0.00047789473684210526, + "low_lr": 9.557894736842107e-06, + "step": 992 + }, + { + "epoch": 2.611439842209073, + "grad_norm": 1.2381819486618042, + "learning_rate": 0.00047736842105263155, + "loss": 1.3308, + "step": 993 + }, + { + "epoch": 2.611439842209073, + "high_lr": 0.00047736842105263155, + "low_lr": 9.547368421052631e-06, + "step": 993 + }, + { + "epoch": 2.611439842209073, + "high_lr": 0.00047736842105263155, + "low_lr": 9.547368421052631e-06, + "step": 993 + }, + { + "epoch": 2.611439842209073, + "high_lr": 0.00047736842105263155, + "low_lr": 9.547368421052631e-06, + "step": 993 + }, + { + "epoch": 2.611439842209073, + "high_lr": 0.00047736842105263155, + "low_lr": 9.547368421052631e-06, + "step": 993 + }, + { + "epoch": 2.611439842209073, + "high_lr": 0.00047736842105263155, + "low_lr": 9.547368421052631e-06, + "step": 993 + }, + { + "epoch": 2.611439842209073, + "high_lr": 0.00047736842105263155, + "low_lr": 9.547368421052631e-06, + "step": 993 + }, + { + "epoch": 2.611439842209073, + "high_lr": 0.00047736842105263155, + "low_lr": 9.547368421052631e-06, + "step": 993 + }, + { + "epoch": 2.611439842209073, + "high_lr": 0.00047736842105263155, + "low_lr": 9.547368421052631e-06, + "step": 993 + }, + { + "epoch": 2.6140696909927676, + "grad_norm": 1.323737382888794, + "learning_rate": 0.0004768421052631579, + "loss": 1.3516, + "step": 994 + }, + { + "epoch": 2.6140696909927676, + "high_lr": 0.0004768421052631579, + "low_lr": 9.53684210526316e-06, + "step": 994 + }, + { + "epoch": 2.6140696909927676, + "high_lr": 0.0004768421052631579, + "low_lr": 9.53684210526316e-06, + "step": 994 + }, + { + "epoch": 2.6140696909927676, + "high_lr": 0.0004768421052631579, + "low_lr": 9.53684210526316e-06, + "step": 994 + }, + { + "epoch": 2.6140696909927676, + "high_lr": 0.0004768421052631579, + "low_lr": 9.53684210526316e-06, + "step": 994 + }, + { + "epoch": 2.6140696909927676, + "high_lr": 0.0004768421052631579, + "low_lr": 9.53684210526316e-06, + "step": 994 + }, + { + "epoch": 2.6140696909927676, + "high_lr": 0.0004768421052631579, + "low_lr": 9.53684210526316e-06, + "step": 994 + }, + { + "epoch": 2.6140696909927676, + "high_lr": 0.0004768421052631579, + "low_lr": 9.53684210526316e-06, + "step": 994 + }, + { + "epoch": 2.6140696909927676, + "high_lr": 0.0004768421052631579, + "low_lr": 9.53684210526316e-06, + "step": 994 + }, + { + "epoch": 2.6166995397764627, + "grad_norm": 1.3120098114013672, + "learning_rate": 0.0004763157894736842, + "loss": 1.4448, + "step": 995 + }, + { + "epoch": 2.6166995397764627, + "high_lr": 0.0004763157894736842, + "low_lr": 9.526315789473684e-06, + "step": 995 + }, + { + "epoch": 2.6166995397764627, + "high_lr": 0.0004763157894736842, + "low_lr": 9.526315789473684e-06, + "step": 995 + }, + { + "epoch": 2.6166995397764627, + "high_lr": 0.0004763157894736842, + "low_lr": 9.526315789473684e-06, + "step": 995 + }, + { + "epoch": 2.6166995397764627, + "high_lr": 0.0004763157894736842, + "low_lr": 9.526315789473684e-06, + "step": 995 + }, + { + "epoch": 2.6166995397764627, + "high_lr": 0.0004763157894736842, + "low_lr": 9.526315789473684e-06, + "step": 995 + }, + { + "epoch": 2.6166995397764627, + "high_lr": 0.0004763157894736842, + "low_lr": 9.526315789473684e-06, + "step": 995 + }, + { + "epoch": 2.6166995397764627, + "high_lr": 0.0004763157894736842, + "low_lr": 9.526315789473684e-06, + "step": 995 + }, + { + "epoch": 2.6166995397764627, + "high_lr": 0.0004763157894736842, + "low_lr": 9.526315789473684e-06, + "step": 995 + }, + { + "epoch": 2.619329388560158, + "grad_norm": 1.3577193021774292, + "learning_rate": 0.00047578947368421057, + "loss": 1.4299, + "step": 996 + }, + { + "epoch": 2.619329388560158, + "high_lr": 0.00047578947368421057, + "low_lr": 9.515789473684212e-06, + "step": 996 + }, + { + "epoch": 2.619329388560158, + "high_lr": 0.00047578947368421057, + "low_lr": 9.515789473684212e-06, + "step": 996 + }, + { + "epoch": 2.619329388560158, + "high_lr": 0.00047578947368421057, + "low_lr": 9.515789473684212e-06, + "step": 996 + }, + { + "epoch": 2.619329388560158, + "high_lr": 0.00047578947368421057, + "low_lr": 9.515789473684212e-06, + "step": 996 + }, + { + "epoch": 2.619329388560158, + "high_lr": 0.00047578947368421057, + "low_lr": 9.515789473684212e-06, + "step": 996 + }, + { + "epoch": 2.619329388560158, + "high_lr": 0.00047578947368421057, + "low_lr": 9.515789473684212e-06, + "step": 996 + }, + { + "epoch": 2.619329388560158, + "high_lr": 0.00047578947368421057, + "low_lr": 9.515789473684212e-06, + "step": 996 + }, + { + "epoch": 2.619329388560158, + "high_lr": 0.00047578947368421057, + "low_lr": 9.515789473684212e-06, + "step": 996 + }, + { + "epoch": 2.6219592373438525, + "grad_norm": 1.3073041439056396, + "learning_rate": 0.00047526315789473686, + "loss": 1.3781, + "step": 997 + }, + { + "epoch": 2.6219592373438525, + "high_lr": 0.00047526315789473686, + "low_lr": 9.505263157894738e-06, + "step": 997 + }, + { + "epoch": 2.6219592373438525, + "high_lr": 0.00047526315789473686, + "low_lr": 9.505263157894738e-06, + "step": 997 + }, + { + "epoch": 2.6219592373438525, + "high_lr": 0.00047526315789473686, + "low_lr": 9.505263157894738e-06, + "step": 997 + }, + { + "epoch": 2.6219592373438525, + "high_lr": 0.00047526315789473686, + "low_lr": 9.505263157894738e-06, + "step": 997 + }, + { + "epoch": 2.6219592373438525, + "high_lr": 0.00047526315789473686, + "low_lr": 9.505263157894738e-06, + "step": 997 + }, + { + "epoch": 2.6219592373438525, + "high_lr": 0.00047526315789473686, + "low_lr": 9.505263157894738e-06, + "step": 997 + }, + { + "epoch": 2.6219592373438525, + "high_lr": 0.00047526315789473686, + "low_lr": 9.505263157894738e-06, + "step": 997 + }, + { + "epoch": 2.6219592373438525, + "high_lr": 0.00047526315789473686, + "low_lr": 9.505263157894738e-06, + "step": 997 + }, + { + "epoch": 2.6245890861275476, + "grad_norm": 1.2592058181762695, + "learning_rate": 0.0004747368421052632, + "loss": 1.3993, + "step": 998 + }, + { + "epoch": 2.6245890861275476, + "high_lr": 0.0004747368421052632, + "low_lr": 9.494736842105265e-06, + "step": 998 + }, + { + "epoch": 2.6245890861275476, + "high_lr": 0.0004747368421052632, + "low_lr": 9.494736842105265e-06, + "step": 998 + }, + { + "epoch": 2.6245890861275476, + "high_lr": 0.0004747368421052632, + "low_lr": 9.494736842105265e-06, + "step": 998 + }, + { + "epoch": 2.6245890861275476, + "high_lr": 0.0004747368421052632, + "low_lr": 9.494736842105265e-06, + "step": 998 + }, + { + "epoch": 2.6245890861275476, + "high_lr": 0.0004747368421052632, + "low_lr": 9.494736842105265e-06, + "step": 998 + }, + { + "epoch": 2.6245890861275476, + "high_lr": 0.0004747368421052632, + "low_lr": 9.494736842105265e-06, + "step": 998 + }, + { + "epoch": 2.6245890861275476, + "high_lr": 0.0004747368421052632, + "low_lr": 9.494736842105265e-06, + "step": 998 + }, + { + "epoch": 2.6245890861275476, + "high_lr": 0.0004747368421052632, + "low_lr": 9.494736842105265e-06, + "step": 998 + }, + { + "epoch": 2.6272189349112427, + "grad_norm": 1.1837654113769531, + "learning_rate": 0.0004742105263157895, + "loss": 1.3234, + "step": 999 + }, + { + "epoch": 2.6272189349112427, + "high_lr": 0.0004742105263157895, + "low_lr": 9.484210526315791e-06, + "step": 999 + }, + { + "epoch": 2.6272189349112427, + "high_lr": 0.0004742105263157895, + "low_lr": 9.484210526315791e-06, + "step": 999 + }, + { + "epoch": 2.6272189349112427, + "high_lr": 0.0004742105263157895, + "low_lr": 9.484210526315791e-06, + "step": 999 + }, + { + "epoch": 2.6272189349112427, + "high_lr": 0.0004742105263157895, + "low_lr": 9.484210526315791e-06, + "step": 999 + }, + { + "epoch": 2.6272189349112427, + "high_lr": 0.0004742105263157895, + "low_lr": 9.484210526315791e-06, + "step": 999 + }, + { + "epoch": 2.6272189349112427, + "high_lr": 0.0004742105263157895, + "low_lr": 9.484210526315791e-06, + "step": 999 + }, + { + "epoch": 2.6272189349112427, + "high_lr": 0.0004742105263157895, + "low_lr": 9.484210526315791e-06, + "step": 999 + }, + { + "epoch": 2.6272189349112427, + "high_lr": 0.0004742105263157895, + "low_lr": 9.484210526315791e-06, + "step": 999 + }, + { + "epoch": 2.6298487836949374, + "grad_norm": 1.3210477828979492, + "learning_rate": 0.00047368421052631577, + "loss": 1.3892, + "step": 1000 + }, + { + "epoch": 2.6298487836949374, + "high_lr": 0.00047368421052631577, + "low_lr": 9.473684210526315e-06, + "step": 1000 + }, + { + "epoch": 2.6298487836949374, + "high_lr": 0.00047368421052631577, + "low_lr": 9.473684210526315e-06, + "step": 1000 + }, + { + "epoch": 2.6298487836949374, + "high_lr": 0.00047368421052631577, + "low_lr": 9.473684210526315e-06, + "step": 1000 + }, + { + "epoch": 2.6298487836949374, + "high_lr": 0.00047368421052631577, + "low_lr": 9.473684210526315e-06, + "step": 1000 + }, + { + "epoch": 2.6298487836949374, + "high_lr": 0.00047368421052631577, + "low_lr": 9.473684210526315e-06, + "step": 1000 + }, + { + "epoch": 2.6298487836949374, + "high_lr": 0.00047368421052631577, + "low_lr": 9.473684210526315e-06, + "step": 1000 + }, + { + "epoch": 2.6298487836949374, + "high_lr": 0.00047368421052631577, + "low_lr": 9.473684210526315e-06, + "step": 1000 + }, + { + "epoch": 2.6298487836949374, + "high_lr": 0.00047368421052631577, + "low_lr": 9.473684210526315e-06, + "step": 1000 + }, + { + "epoch": 2.6324786324786325, + "grad_norm": 1.3772284984588623, + "learning_rate": 0.0004731578947368421, + "loss": 1.359, + "step": 1001 + }, + { + "epoch": 2.6324786324786325, + "high_lr": 0.0004731578947368421, + "low_lr": 9.463157894736844e-06, + "step": 1001 + }, + { + "epoch": 2.6324786324786325, + "high_lr": 0.0004731578947368421, + "low_lr": 9.463157894736844e-06, + "step": 1001 + }, + { + "epoch": 2.6324786324786325, + "high_lr": 0.0004731578947368421, + "low_lr": 9.463157894736844e-06, + "step": 1001 + }, + { + "epoch": 2.6324786324786325, + "high_lr": 0.0004731578947368421, + "low_lr": 9.463157894736844e-06, + "step": 1001 + }, + { + "epoch": 2.6324786324786325, + "high_lr": 0.0004731578947368421, + "low_lr": 9.463157894736844e-06, + "step": 1001 + }, + { + "epoch": 2.6324786324786325, + "high_lr": 0.0004731578947368421, + "low_lr": 9.463157894736844e-06, + "step": 1001 + }, + { + "epoch": 2.6324786324786325, + "high_lr": 0.0004731578947368421, + "low_lr": 9.463157894736844e-06, + "step": 1001 + }, + { + "epoch": 2.6324786324786325, + "high_lr": 0.0004731578947368421, + "low_lr": 9.463157894736844e-06, + "step": 1001 + }, + { + "epoch": 2.6351084812623276, + "grad_norm": 1.3075475692749023, + "learning_rate": 0.0004726315789473684, + "loss": 1.3993, + "step": 1002 + }, + { + "epoch": 2.6351084812623276, + "high_lr": 0.0004726315789473684, + "low_lr": 9.452631578947368e-06, + "step": 1002 + }, + { + "epoch": 2.6351084812623276, + "high_lr": 0.0004726315789473684, + "low_lr": 9.452631578947368e-06, + "step": 1002 + }, + { + "epoch": 2.6351084812623276, + "high_lr": 0.0004726315789473684, + "low_lr": 9.452631578947368e-06, + "step": 1002 + }, + { + "epoch": 2.6351084812623276, + "high_lr": 0.0004726315789473684, + "low_lr": 9.452631578947368e-06, + "step": 1002 + }, + { + "epoch": 2.6351084812623276, + "high_lr": 0.0004726315789473684, + "low_lr": 9.452631578947368e-06, + "step": 1002 + }, + { + "epoch": 2.6351084812623276, + "high_lr": 0.0004726315789473684, + "low_lr": 9.452631578947368e-06, + "step": 1002 + }, + { + "epoch": 2.6351084812623276, + "high_lr": 0.0004726315789473684, + "low_lr": 9.452631578947368e-06, + "step": 1002 + }, + { + "epoch": 2.6351084812623276, + "high_lr": 0.0004726315789473684, + "low_lr": 9.452631578947368e-06, + "step": 1002 + }, + { + "epoch": 2.637738330046022, + "grad_norm": 1.3833972215652466, + "learning_rate": 0.00047210526315789473, + "loss": 1.4675, + "step": 1003 + }, + { + "epoch": 2.637738330046022, + "high_lr": 0.00047210526315789473, + "low_lr": 9.442105263157896e-06, + "step": 1003 + }, + { + "epoch": 2.637738330046022, + "high_lr": 0.00047210526315789473, + "low_lr": 9.442105263157896e-06, + "step": 1003 + }, + { + "epoch": 2.637738330046022, + "high_lr": 0.00047210526315789473, + "low_lr": 9.442105263157896e-06, + "step": 1003 + }, + { + "epoch": 2.637738330046022, + "high_lr": 0.00047210526315789473, + "low_lr": 9.442105263157896e-06, + "step": 1003 + }, + { + "epoch": 2.637738330046022, + "high_lr": 0.00047210526315789473, + "low_lr": 9.442105263157896e-06, + "step": 1003 + }, + { + "epoch": 2.637738330046022, + "high_lr": 0.00047210526315789473, + "low_lr": 9.442105263157896e-06, + "step": 1003 + }, + { + "epoch": 2.637738330046022, + "high_lr": 0.00047210526315789473, + "low_lr": 9.442105263157896e-06, + "step": 1003 + }, + { + "epoch": 2.637738330046022, + "high_lr": 0.00047210526315789473, + "low_lr": 9.442105263157896e-06, + "step": 1003 + }, + { + "epoch": 2.6403681788297173, + "grad_norm": 1.2069523334503174, + "learning_rate": 0.0004715789473684211, + "loss": 1.406, + "step": 1004 + }, + { + "epoch": 2.6403681788297173, + "high_lr": 0.0004715789473684211, + "low_lr": 9.43157894736842e-06, + "step": 1004 + }, + { + "epoch": 2.6403681788297173, + "high_lr": 0.0004715789473684211, + "low_lr": 9.43157894736842e-06, + "step": 1004 + }, + { + "epoch": 2.6403681788297173, + "high_lr": 0.0004715789473684211, + "low_lr": 9.43157894736842e-06, + "step": 1004 + }, + { + "epoch": 2.6403681788297173, + "high_lr": 0.0004715789473684211, + "low_lr": 9.43157894736842e-06, + "step": 1004 + }, + { + "epoch": 2.6403681788297173, + "high_lr": 0.0004715789473684211, + "low_lr": 9.43157894736842e-06, + "step": 1004 + }, + { + "epoch": 2.6403681788297173, + "high_lr": 0.0004715789473684211, + "low_lr": 9.43157894736842e-06, + "step": 1004 + }, + { + "epoch": 2.6403681788297173, + "high_lr": 0.0004715789473684211, + "low_lr": 9.43157894736842e-06, + "step": 1004 + }, + { + "epoch": 2.6403681788297173, + "high_lr": 0.0004715789473684211, + "low_lr": 9.43157894736842e-06, + "step": 1004 + }, + { + "epoch": 2.6429980276134124, + "grad_norm": 1.2812342643737793, + "learning_rate": 0.0004710526315789474, + "loss": 1.3987, + "step": 1005 + }, + { + "epoch": 2.6429980276134124, + "high_lr": 0.0004710526315789474, + "low_lr": 9.421052631578949e-06, + "step": 1005 + }, + { + "epoch": 2.6429980276134124, + "high_lr": 0.0004710526315789474, + "low_lr": 9.421052631578949e-06, + "step": 1005 + }, + { + "epoch": 2.6429980276134124, + "high_lr": 0.0004710526315789474, + "low_lr": 9.421052631578949e-06, + "step": 1005 + }, + { + "epoch": 2.6429980276134124, + "high_lr": 0.0004710526315789474, + "low_lr": 9.421052631578949e-06, + "step": 1005 + }, + { + "epoch": 2.6429980276134124, + "high_lr": 0.0004710526315789474, + "low_lr": 9.421052631578949e-06, + "step": 1005 + }, + { + "epoch": 2.6429980276134124, + "high_lr": 0.0004710526315789474, + "low_lr": 9.421052631578949e-06, + "step": 1005 + }, + { + "epoch": 2.6429980276134124, + "high_lr": 0.0004710526315789474, + "low_lr": 9.421052631578949e-06, + "step": 1005 + }, + { + "epoch": 2.6429980276134124, + "high_lr": 0.0004710526315789474, + "low_lr": 9.421052631578949e-06, + "step": 1005 + }, + { + "epoch": 2.645627876397107, + "grad_norm": 1.30702543258667, + "learning_rate": 0.0004705263157894737, + "loss": 1.371, + "step": 1006 + }, + { + "epoch": 2.645627876397107, + "high_lr": 0.0004705263157894737, + "low_lr": 9.410526315789475e-06, + "step": 1006 + }, + { + "epoch": 2.645627876397107, + "high_lr": 0.0004705263157894737, + "low_lr": 9.410526315789475e-06, + "step": 1006 + }, + { + "epoch": 2.645627876397107, + "high_lr": 0.0004705263157894737, + "low_lr": 9.410526315789475e-06, + "step": 1006 + }, + { + "epoch": 2.645627876397107, + "high_lr": 0.0004705263157894737, + "low_lr": 9.410526315789475e-06, + "step": 1006 + }, + { + "epoch": 2.645627876397107, + "high_lr": 0.0004705263157894737, + "low_lr": 9.410526315789475e-06, + "step": 1006 + }, + { + "epoch": 2.645627876397107, + "high_lr": 0.0004705263157894737, + "low_lr": 9.410526315789475e-06, + "step": 1006 + }, + { + "epoch": 2.645627876397107, + "high_lr": 0.0004705263157894737, + "low_lr": 9.410526315789475e-06, + "step": 1006 + }, + { + "epoch": 2.645627876397107, + "high_lr": 0.0004705263157894737, + "low_lr": 9.410526315789475e-06, + "step": 1006 + }, + { + "epoch": 2.648257725180802, + "grad_norm": 1.2445564270019531, + "learning_rate": 0.00047, + "loss": 1.397, + "step": 1007 + }, + { + "epoch": 2.648257725180802, + "high_lr": 0.00047, + "low_lr": 9.4e-06, + "step": 1007 + }, + { + "epoch": 2.648257725180802, + "high_lr": 0.00047, + "low_lr": 9.4e-06, + "step": 1007 + }, + { + "epoch": 2.648257725180802, + "high_lr": 0.00047, + "low_lr": 9.4e-06, + "step": 1007 + }, + { + "epoch": 2.648257725180802, + "high_lr": 0.00047, + "low_lr": 9.4e-06, + "step": 1007 + }, + { + "epoch": 2.648257725180802, + "high_lr": 0.00047, + "low_lr": 9.4e-06, + "step": 1007 + }, + { + "epoch": 2.648257725180802, + "high_lr": 0.00047, + "low_lr": 9.4e-06, + "step": 1007 + }, + { + "epoch": 2.648257725180802, + "high_lr": 0.00047, + "low_lr": 9.4e-06, + "step": 1007 + }, + { + "epoch": 2.648257725180802, + "high_lr": 0.00047, + "low_lr": 9.4e-06, + "step": 1007 + }, + { + "epoch": 2.6508875739644973, + "grad_norm": 1.2458575963974, + "learning_rate": 0.0004694736842105263, + "loss": 1.4151, + "step": 1008 + }, + { + "epoch": 2.6508875739644973, + "high_lr": 0.0004694736842105263, + "low_lr": 9.389473684210528e-06, + "step": 1008 + }, + { + "epoch": 2.6508875739644973, + "high_lr": 0.0004694736842105263, + "low_lr": 9.389473684210528e-06, + "step": 1008 + }, + { + "epoch": 2.6508875739644973, + "high_lr": 0.0004694736842105263, + "low_lr": 9.389473684210528e-06, + "step": 1008 + }, + { + "epoch": 2.6508875739644973, + "high_lr": 0.0004694736842105263, + "low_lr": 9.389473684210528e-06, + "step": 1008 + }, + { + "epoch": 2.6508875739644973, + "high_lr": 0.0004694736842105263, + "low_lr": 9.389473684210528e-06, + "step": 1008 + }, + { + "epoch": 2.6508875739644973, + "high_lr": 0.0004694736842105263, + "low_lr": 9.389473684210528e-06, + "step": 1008 + }, + { + "epoch": 2.6508875739644973, + "high_lr": 0.0004694736842105263, + "low_lr": 9.389473684210528e-06, + "step": 1008 + }, + { + "epoch": 2.6508875739644973, + "high_lr": 0.0004694736842105263, + "low_lr": 9.389473684210528e-06, + "step": 1008 + }, + { + "epoch": 2.653517422748192, + "grad_norm": 1.2521098852157593, + "learning_rate": 0.0004689473684210526, + "loss": 1.3789, + "step": 1009 + }, + { + "epoch": 2.653517422748192, + "high_lr": 0.0004689473684210526, + "low_lr": 9.378947368421052e-06, + "step": 1009 + }, + { + "epoch": 2.653517422748192, + "high_lr": 0.0004689473684210526, + "low_lr": 9.378947368421052e-06, + "step": 1009 + }, + { + "epoch": 2.653517422748192, + "high_lr": 0.0004689473684210526, + "low_lr": 9.378947368421052e-06, + "step": 1009 + }, + { + "epoch": 2.653517422748192, + "high_lr": 0.0004689473684210526, + "low_lr": 9.378947368421052e-06, + "step": 1009 + }, + { + "epoch": 2.653517422748192, + "high_lr": 0.0004689473684210526, + "low_lr": 9.378947368421052e-06, + "step": 1009 + }, + { + "epoch": 2.653517422748192, + "high_lr": 0.0004689473684210526, + "low_lr": 9.378947368421052e-06, + "step": 1009 + }, + { + "epoch": 2.653517422748192, + "high_lr": 0.0004689473684210526, + "low_lr": 9.378947368421052e-06, + "step": 1009 + }, + { + "epoch": 2.653517422748192, + "high_lr": 0.0004689473684210526, + "low_lr": 9.378947368421052e-06, + "step": 1009 + }, + { + "epoch": 2.656147271531887, + "grad_norm": 1.286059021949768, + "learning_rate": 0.00046842105263157895, + "loss": 1.4011, + "step": 1010 + }, + { + "epoch": 2.656147271531887, + "high_lr": 0.00046842105263157895, + "low_lr": 9.36842105263158e-06, + "step": 1010 + }, + { + "epoch": 2.656147271531887, + "high_lr": 0.00046842105263157895, + "low_lr": 9.36842105263158e-06, + "step": 1010 + }, + { + "epoch": 2.656147271531887, + "high_lr": 0.00046842105263157895, + "low_lr": 9.36842105263158e-06, + "step": 1010 + }, + { + "epoch": 2.656147271531887, + "high_lr": 0.00046842105263157895, + "low_lr": 9.36842105263158e-06, + "step": 1010 + }, + { + "epoch": 2.656147271531887, + "high_lr": 0.00046842105263157895, + "low_lr": 9.36842105263158e-06, + "step": 1010 + }, + { + "epoch": 2.656147271531887, + "high_lr": 0.00046842105263157895, + "low_lr": 9.36842105263158e-06, + "step": 1010 + }, + { + "epoch": 2.656147271531887, + "high_lr": 0.00046842105263157895, + "low_lr": 9.36842105263158e-06, + "step": 1010 + }, + { + "epoch": 2.656147271531887, + "high_lr": 0.00046842105263157895, + "low_lr": 9.36842105263158e-06, + "step": 1010 + }, + { + "epoch": 2.658777120315582, + "grad_norm": 1.3176252841949463, + "learning_rate": 0.00046789473684210524, + "loss": 1.4245, + "step": 1011 + }, + { + "epoch": 2.658777120315582, + "high_lr": 0.00046789473684210524, + "low_lr": 9.357894736842105e-06, + "step": 1011 + }, + { + "epoch": 2.658777120315582, + "high_lr": 0.00046789473684210524, + "low_lr": 9.357894736842105e-06, + "step": 1011 + }, + { + "epoch": 2.658777120315582, + "high_lr": 0.00046789473684210524, + "low_lr": 9.357894736842105e-06, + "step": 1011 + }, + { + "epoch": 2.658777120315582, + "high_lr": 0.00046789473684210524, + "low_lr": 9.357894736842105e-06, + "step": 1011 + }, + { + "epoch": 2.658777120315582, + "high_lr": 0.00046789473684210524, + "low_lr": 9.357894736842105e-06, + "step": 1011 + }, + { + "epoch": 2.658777120315582, + "high_lr": 0.00046789473684210524, + "low_lr": 9.357894736842105e-06, + "step": 1011 + }, + { + "epoch": 2.658777120315582, + "high_lr": 0.00046789473684210524, + "low_lr": 9.357894736842105e-06, + "step": 1011 + }, + { + "epoch": 2.658777120315582, + "high_lr": 0.00046789473684210524, + "low_lr": 9.357894736842105e-06, + "step": 1011 + }, + { + "epoch": 2.6614069690992768, + "grad_norm": 1.219322919845581, + "learning_rate": 0.00046736842105263163, + "loss": 1.4275, + "step": 1012 + }, + { + "epoch": 2.6614069690992768, + "high_lr": 0.00046736842105263163, + "low_lr": 9.347368421052633e-06, + "step": 1012 + }, + { + "epoch": 2.6614069690992768, + "high_lr": 0.00046736842105263163, + "low_lr": 9.347368421052633e-06, + "step": 1012 + }, + { + "epoch": 2.6614069690992768, + "high_lr": 0.00046736842105263163, + "low_lr": 9.347368421052633e-06, + "step": 1012 + }, + { + "epoch": 2.6614069690992768, + "high_lr": 0.00046736842105263163, + "low_lr": 9.347368421052633e-06, + "step": 1012 + }, + { + "epoch": 2.6614069690992768, + "high_lr": 0.00046736842105263163, + "low_lr": 9.347368421052633e-06, + "step": 1012 + }, + { + "epoch": 2.6614069690992768, + "high_lr": 0.00046736842105263163, + "low_lr": 9.347368421052633e-06, + "step": 1012 + }, + { + "epoch": 2.6614069690992768, + "high_lr": 0.00046736842105263163, + "low_lr": 9.347368421052633e-06, + "step": 1012 + }, + { + "epoch": 2.6614069690992768, + "high_lr": 0.00046736842105263163, + "low_lr": 9.347368421052633e-06, + "step": 1012 + }, + { + "epoch": 2.664036817882972, + "grad_norm": 1.2309496402740479, + "learning_rate": 0.0004668421052631579, + "loss": 1.386, + "step": 1013 + }, + { + "epoch": 2.664036817882972, + "high_lr": 0.0004668421052631579, + "low_lr": 9.336842105263158e-06, + "step": 1013 + }, + { + "epoch": 2.664036817882972, + "high_lr": 0.0004668421052631579, + "low_lr": 9.336842105263158e-06, + "step": 1013 + }, + { + "epoch": 2.664036817882972, + "high_lr": 0.0004668421052631579, + "low_lr": 9.336842105263158e-06, + "step": 1013 + }, + { + "epoch": 2.664036817882972, + "high_lr": 0.0004668421052631579, + "low_lr": 9.336842105263158e-06, + "step": 1013 + }, + { + "epoch": 2.664036817882972, + "high_lr": 0.0004668421052631579, + "low_lr": 9.336842105263158e-06, + "step": 1013 + }, + { + "epoch": 2.664036817882972, + "high_lr": 0.0004668421052631579, + "low_lr": 9.336842105263158e-06, + "step": 1013 + }, + { + "epoch": 2.664036817882972, + "high_lr": 0.0004668421052631579, + "low_lr": 9.336842105263158e-06, + "step": 1013 + }, + { + "epoch": 2.664036817882972, + "high_lr": 0.0004668421052631579, + "low_lr": 9.336842105263158e-06, + "step": 1013 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 1.352072834968567, + "learning_rate": 0.0004663157894736842, + "loss": 1.4204, + "step": 1014 + }, + { + "epoch": 2.6666666666666665, + "high_lr": 0.0004663157894736842, + "low_lr": 9.326315789473684e-06, + "step": 1014 + }, + { + "epoch": 2.6666666666666665, + "high_lr": 0.0004663157894736842, + "low_lr": 9.326315789473684e-06, + "step": 1014 + }, + { + "epoch": 2.6666666666666665, + "high_lr": 0.0004663157894736842, + "low_lr": 9.326315789473684e-06, + "step": 1014 + }, + { + "epoch": 2.6666666666666665, + "high_lr": 0.0004663157894736842, + "low_lr": 9.326315789473684e-06, + "step": 1014 + }, + { + "epoch": 2.6666666666666665, + "high_lr": 0.0004663157894736842, + "low_lr": 9.326315789473684e-06, + "step": 1014 + }, + { + "epoch": 2.6666666666666665, + "high_lr": 0.0004663157894736842, + "low_lr": 9.326315789473684e-06, + "step": 1014 + }, + { + "epoch": 2.6666666666666665, + "high_lr": 0.0004663157894736842, + "low_lr": 9.326315789473684e-06, + "step": 1014 + }, + { + "epoch": 2.6666666666666665, + "high_lr": 0.0004663157894736842, + "low_lr": 9.326315789473684e-06, + "step": 1014 + }, + { + "epoch": 2.6692965154503616, + "grad_norm": 1.2742241621017456, + "learning_rate": 0.00046578947368421054, + "loss": 1.4226, + "step": 1015 + }, + { + "epoch": 2.6692965154503616, + "high_lr": 0.00046578947368421054, + "low_lr": 9.315789473684212e-06, + "step": 1015 + }, + { + "epoch": 2.6692965154503616, + "high_lr": 0.00046578947368421054, + "low_lr": 9.315789473684212e-06, + "step": 1015 + }, + { + "epoch": 2.6692965154503616, + "high_lr": 0.00046578947368421054, + "low_lr": 9.315789473684212e-06, + "step": 1015 + }, + { + "epoch": 2.6692965154503616, + "high_lr": 0.00046578947368421054, + "low_lr": 9.315789473684212e-06, + "step": 1015 + }, + { + "epoch": 2.6692965154503616, + "high_lr": 0.00046578947368421054, + "low_lr": 9.315789473684212e-06, + "step": 1015 + }, + { + "epoch": 2.6692965154503616, + "high_lr": 0.00046578947368421054, + "low_lr": 9.315789473684212e-06, + "step": 1015 + }, + { + "epoch": 2.6692965154503616, + "high_lr": 0.00046578947368421054, + "low_lr": 9.315789473684212e-06, + "step": 1015 + }, + { + "epoch": 2.6692965154503616, + "high_lr": 0.00046578947368421054, + "low_lr": 9.315789473684212e-06, + "step": 1015 + }, + { + "epoch": 2.6719263642340563, + "grad_norm": 1.3469874858856201, + "learning_rate": 0.00046526315789473683, + "loss": 1.3989, + "step": 1016 + }, + { + "epoch": 2.6719263642340563, + "high_lr": 0.00046526315789473683, + "low_lr": 9.305263157894737e-06, + "step": 1016 + }, + { + "epoch": 2.6719263642340563, + "high_lr": 0.00046526315789473683, + "low_lr": 9.305263157894737e-06, + "step": 1016 + }, + { + "epoch": 2.6719263642340563, + "high_lr": 0.00046526315789473683, + "low_lr": 9.305263157894737e-06, + "step": 1016 + }, + { + "epoch": 2.6719263642340563, + "high_lr": 0.00046526315789473683, + "low_lr": 9.305263157894737e-06, + "step": 1016 + }, + { + "epoch": 2.6719263642340563, + "high_lr": 0.00046526315789473683, + "low_lr": 9.305263157894737e-06, + "step": 1016 + }, + { + "epoch": 2.6719263642340563, + "high_lr": 0.00046526315789473683, + "low_lr": 9.305263157894737e-06, + "step": 1016 + }, + { + "epoch": 2.6719263642340563, + "high_lr": 0.00046526315789473683, + "low_lr": 9.305263157894737e-06, + "step": 1016 + }, + { + "epoch": 2.6719263642340563, + "high_lr": 0.00046526315789473683, + "low_lr": 9.305263157894737e-06, + "step": 1016 + }, + { + "epoch": 2.6745562130177514, + "grad_norm": 1.3517266511917114, + "learning_rate": 0.00046473684210526317, + "loss": 1.3909, + "step": 1017 + }, + { + "epoch": 2.6745562130177514, + "high_lr": 0.00046473684210526317, + "low_lr": 9.294736842105265e-06, + "step": 1017 + }, + { + "epoch": 2.6745562130177514, + "high_lr": 0.00046473684210526317, + "low_lr": 9.294736842105265e-06, + "step": 1017 + }, + { + "epoch": 2.6745562130177514, + "high_lr": 0.00046473684210526317, + "low_lr": 9.294736842105265e-06, + "step": 1017 + }, + { + "epoch": 2.6745562130177514, + "high_lr": 0.00046473684210526317, + "low_lr": 9.294736842105265e-06, + "step": 1017 + }, + { + "epoch": 2.6745562130177514, + "high_lr": 0.00046473684210526317, + "low_lr": 9.294736842105265e-06, + "step": 1017 + }, + { + "epoch": 2.6745562130177514, + "high_lr": 0.00046473684210526317, + "low_lr": 9.294736842105265e-06, + "step": 1017 + }, + { + "epoch": 2.6745562130177514, + "high_lr": 0.00046473684210526317, + "low_lr": 9.294736842105265e-06, + "step": 1017 + }, + { + "epoch": 2.6745562130177514, + "high_lr": 0.00046473684210526317, + "low_lr": 9.294736842105265e-06, + "step": 1017 + }, + { + "epoch": 2.6771860618014465, + "grad_norm": 1.2885749340057373, + "learning_rate": 0.00046421052631578946, + "loss": 1.363, + "step": 1018 + }, + { + "epoch": 2.6771860618014465, + "high_lr": 0.00046421052631578946, + "low_lr": 9.28421052631579e-06, + "step": 1018 + }, + { + "epoch": 2.6771860618014465, + "high_lr": 0.00046421052631578946, + "low_lr": 9.28421052631579e-06, + "step": 1018 + }, + { + "epoch": 2.6771860618014465, + "high_lr": 0.00046421052631578946, + "low_lr": 9.28421052631579e-06, + "step": 1018 + }, + { + "epoch": 2.6771860618014465, + "high_lr": 0.00046421052631578946, + "low_lr": 9.28421052631579e-06, + "step": 1018 + }, + { + "epoch": 2.6771860618014465, + "high_lr": 0.00046421052631578946, + "low_lr": 9.28421052631579e-06, + "step": 1018 + }, + { + "epoch": 2.6771860618014465, + "high_lr": 0.00046421052631578946, + "low_lr": 9.28421052631579e-06, + "step": 1018 + }, + { + "epoch": 2.6771860618014465, + "high_lr": 0.00046421052631578946, + "low_lr": 9.28421052631579e-06, + "step": 1018 + }, + { + "epoch": 2.6771860618014465, + "high_lr": 0.00046421052631578946, + "low_lr": 9.28421052631579e-06, + "step": 1018 + }, + { + "epoch": 2.679815910585141, + "grad_norm": 1.333804726600647, + "learning_rate": 0.0004636842105263158, + "loss": 1.416, + "step": 1019 + }, + { + "epoch": 2.679815910585141, + "high_lr": 0.0004636842105263158, + "low_lr": 9.273684210526317e-06, + "step": 1019 + }, + { + "epoch": 2.679815910585141, + "high_lr": 0.0004636842105263158, + "low_lr": 9.273684210526317e-06, + "step": 1019 + }, + { + "epoch": 2.679815910585141, + "high_lr": 0.0004636842105263158, + "low_lr": 9.273684210526317e-06, + "step": 1019 + }, + { + "epoch": 2.679815910585141, + "high_lr": 0.0004636842105263158, + "low_lr": 9.273684210526317e-06, + "step": 1019 + }, + { + "epoch": 2.679815910585141, + "high_lr": 0.0004636842105263158, + "low_lr": 9.273684210526317e-06, + "step": 1019 + }, + { + "epoch": 2.679815910585141, + "high_lr": 0.0004636842105263158, + "low_lr": 9.273684210526317e-06, + "step": 1019 + }, + { + "epoch": 2.679815910585141, + "high_lr": 0.0004636842105263158, + "low_lr": 9.273684210526317e-06, + "step": 1019 + }, + { + "epoch": 2.679815910585141, + "high_lr": 0.0004636842105263158, + "low_lr": 9.273684210526317e-06, + "step": 1019 + }, + { + "epoch": 2.6824457593688362, + "grad_norm": 1.2797845602035522, + "learning_rate": 0.00046315789473684214, + "loss": 1.333, + "step": 1020 + }, + { + "epoch": 2.6824457593688362, + "high_lr": 0.00046315789473684214, + "low_lr": 9.263157894736842e-06, + "step": 1020 + }, + { + "epoch": 2.6824457593688362, + "high_lr": 0.00046315789473684214, + "low_lr": 9.263157894736842e-06, + "step": 1020 + }, + { + "epoch": 2.6824457593688362, + "high_lr": 0.00046315789473684214, + "low_lr": 9.263157894736842e-06, + "step": 1020 + }, + { + "epoch": 2.6824457593688362, + "high_lr": 0.00046315789473684214, + "low_lr": 9.263157894736842e-06, + "step": 1020 + }, + { + "epoch": 2.6824457593688362, + "high_lr": 0.00046315789473684214, + "low_lr": 9.263157894736842e-06, + "step": 1020 + }, + { + "epoch": 2.6824457593688362, + "high_lr": 0.00046315789473684214, + "low_lr": 9.263157894736842e-06, + "step": 1020 + }, + { + "epoch": 2.6824457593688362, + "high_lr": 0.00046315789473684214, + "low_lr": 9.263157894736842e-06, + "step": 1020 + }, + { + "epoch": 2.6824457593688362, + "high_lr": 0.00046315789473684214, + "low_lr": 9.263157894736842e-06, + "step": 1020 + }, + { + "epoch": 2.6850756081525313, + "grad_norm": 1.2081880569458008, + "learning_rate": 0.0004626315789473684, + "loss": 1.3541, + "step": 1021 + }, + { + "epoch": 2.6850756081525313, + "high_lr": 0.0004626315789473684, + "low_lr": 9.252631578947368e-06, + "step": 1021 + }, + { + "epoch": 2.6850756081525313, + "high_lr": 0.0004626315789473684, + "low_lr": 9.252631578947368e-06, + "step": 1021 + }, + { + "epoch": 2.6850756081525313, + "high_lr": 0.0004626315789473684, + "low_lr": 9.252631578947368e-06, + "step": 1021 + }, + { + "epoch": 2.6850756081525313, + "high_lr": 0.0004626315789473684, + "low_lr": 9.252631578947368e-06, + "step": 1021 + }, + { + "epoch": 2.6850756081525313, + "high_lr": 0.0004626315789473684, + "low_lr": 9.252631578947368e-06, + "step": 1021 + }, + { + "epoch": 2.6850756081525313, + "high_lr": 0.0004626315789473684, + "low_lr": 9.252631578947368e-06, + "step": 1021 + }, + { + "epoch": 2.6850756081525313, + "high_lr": 0.0004626315789473684, + "low_lr": 9.252631578947368e-06, + "step": 1021 + }, + { + "epoch": 2.6850756081525313, + "high_lr": 0.0004626315789473684, + "low_lr": 9.252631578947368e-06, + "step": 1021 + }, + { + "epoch": 2.687705456936226, + "grad_norm": 1.3071238994598389, + "learning_rate": 0.00046210526315789476, + "loss": 1.3664, + "step": 1022 + }, + { + "epoch": 2.687705456936226, + "high_lr": 0.00046210526315789476, + "low_lr": 9.242105263157896e-06, + "step": 1022 + }, + { + "epoch": 2.687705456936226, + "high_lr": 0.00046210526315789476, + "low_lr": 9.242105263157896e-06, + "step": 1022 + }, + { + "epoch": 2.687705456936226, + "high_lr": 0.00046210526315789476, + "low_lr": 9.242105263157896e-06, + "step": 1022 + }, + { + "epoch": 2.687705456936226, + "high_lr": 0.00046210526315789476, + "low_lr": 9.242105263157896e-06, + "step": 1022 + }, + { + "epoch": 2.687705456936226, + "high_lr": 0.00046210526315789476, + "low_lr": 9.242105263157896e-06, + "step": 1022 + }, + { + "epoch": 2.687705456936226, + "high_lr": 0.00046210526315789476, + "low_lr": 9.242105263157896e-06, + "step": 1022 + }, + { + "epoch": 2.687705456936226, + "high_lr": 0.00046210526315789476, + "low_lr": 9.242105263157896e-06, + "step": 1022 + }, + { + "epoch": 2.687705456936226, + "high_lr": 0.00046210526315789476, + "low_lr": 9.242105263157896e-06, + "step": 1022 + }, + { + "epoch": 2.690335305719921, + "grad_norm": 1.2534948587417603, + "learning_rate": 0.00046157894736842105, + "loss": 1.3551, + "step": 1023 + }, + { + "epoch": 2.690335305719921, + "high_lr": 0.00046157894736842105, + "low_lr": 9.231578947368421e-06, + "step": 1023 + }, + { + "epoch": 2.690335305719921, + "high_lr": 0.00046157894736842105, + "low_lr": 9.231578947368421e-06, + "step": 1023 + }, + { + "epoch": 2.690335305719921, + "high_lr": 0.00046157894736842105, + "low_lr": 9.231578947368421e-06, + "step": 1023 + }, + { + "epoch": 2.690335305719921, + "high_lr": 0.00046157894736842105, + "low_lr": 9.231578947368421e-06, + "step": 1023 + }, + { + "epoch": 2.690335305719921, + "high_lr": 0.00046157894736842105, + "low_lr": 9.231578947368421e-06, + "step": 1023 + }, + { + "epoch": 2.690335305719921, + "high_lr": 0.00046157894736842105, + "low_lr": 9.231578947368421e-06, + "step": 1023 + }, + { + "epoch": 2.690335305719921, + "high_lr": 0.00046157894736842105, + "low_lr": 9.231578947368421e-06, + "step": 1023 + }, + { + "epoch": 2.690335305719921, + "high_lr": 0.00046157894736842105, + "low_lr": 9.231578947368421e-06, + "step": 1023 + }, + { + "epoch": 2.692965154503616, + "grad_norm": 1.2943768501281738, + "learning_rate": 0.0004610526315789474, + "loss": 1.3696, + "step": 1024 + }, + { + "epoch": 2.692965154503616, + "high_lr": 0.0004610526315789474, + "low_lr": 9.221052631578949e-06, + "step": 1024 + }, + { + "epoch": 2.692965154503616, + "high_lr": 0.0004610526315789474, + "low_lr": 9.221052631578949e-06, + "step": 1024 + }, + { + "epoch": 2.692965154503616, + "high_lr": 0.0004610526315789474, + "low_lr": 9.221052631578949e-06, + "step": 1024 + }, + { + "epoch": 2.692965154503616, + "high_lr": 0.0004610526315789474, + "low_lr": 9.221052631578949e-06, + "step": 1024 + }, + { + "epoch": 2.692965154503616, + "high_lr": 0.0004610526315789474, + "low_lr": 9.221052631578949e-06, + "step": 1024 + }, + { + "epoch": 2.692965154503616, + "high_lr": 0.0004610526315789474, + "low_lr": 9.221052631578949e-06, + "step": 1024 + }, + { + "epoch": 2.692965154503616, + "high_lr": 0.0004610526315789474, + "low_lr": 9.221052631578949e-06, + "step": 1024 + }, + { + "epoch": 2.692965154503616, + "high_lr": 0.0004610526315789474, + "low_lr": 9.221052631578949e-06, + "step": 1024 + }, + { + "epoch": 2.695595003287311, + "grad_norm": 1.3256723880767822, + "learning_rate": 0.0004605263157894737, + "loss": 1.4162, + "step": 1025 + }, + { + "epoch": 2.695595003287311, + "high_lr": 0.0004605263157894737, + "low_lr": 9.210526315789474e-06, + "step": 1025 + }, + { + "epoch": 2.695595003287311, + "high_lr": 0.0004605263157894737, + "low_lr": 9.210526315789474e-06, + "step": 1025 + }, + { + "epoch": 2.695595003287311, + "high_lr": 0.0004605263157894737, + "low_lr": 9.210526315789474e-06, + "step": 1025 + }, + { + "epoch": 2.695595003287311, + "high_lr": 0.0004605263157894737, + "low_lr": 9.210526315789474e-06, + "step": 1025 + }, + { + "epoch": 2.695595003287311, + "high_lr": 0.0004605263157894737, + "low_lr": 9.210526315789474e-06, + "step": 1025 + }, + { + "epoch": 2.695595003287311, + "high_lr": 0.0004605263157894737, + "low_lr": 9.210526315789474e-06, + "step": 1025 + }, + { + "epoch": 2.695595003287311, + "high_lr": 0.0004605263157894737, + "low_lr": 9.210526315789474e-06, + "step": 1025 + }, + { + "epoch": 2.695595003287311, + "high_lr": 0.0004605263157894737, + "low_lr": 9.210526315789474e-06, + "step": 1025 + }, + { + "epoch": 2.698224852071006, + "grad_norm": 1.3400224447250366, + "learning_rate": 0.00046, + "loss": 1.3839, + "step": 1026 + }, + { + "epoch": 2.698224852071006, + "high_lr": 0.00046, + "low_lr": 9.200000000000002e-06, + "step": 1026 + }, + { + "epoch": 2.698224852071006, + "high_lr": 0.00046, + "low_lr": 9.200000000000002e-06, + "step": 1026 + }, + { + "epoch": 2.698224852071006, + "high_lr": 0.00046, + "low_lr": 9.200000000000002e-06, + "step": 1026 + }, + { + "epoch": 2.698224852071006, + "high_lr": 0.00046, + "low_lr": 9.200000000000002e-06, + "step": 1026 + }, + { + "epoch": 2.698224852071006, + "high_lr": 0.00046, + "low_lr": 9.200000000000002e-06, + "step": 1026 + }, + { + "epoch": 2.698224852071006, + "high_lr": 0.00046, + "low_lr": 9.200000000000002e-06, + "step": 1026 + }, + { + "epoch": 2.698224852071006, + "high_lr": 0.00046, + "low_lr": 9.200000000000002e-06, + "step": 1026 + }, + { + "epoch": 2.698224852071006, + "high_lr": 0.00046, + "low_lr": 9.200000000000002e-06, + "step": 1026 + }, + { + "epoch": 2.700854700854701, + "grad_norm": 1.2152968645095825, + "learning_rate": 0.0004594736842105263, + "loss": 1.3392, + "step": 1027 + }, + { + "epoch": 2.700854700854701, + "high_lr": 0.0004594736842105263, + "low_lr": 9.189473684210526e-06, + "step": 1027 + }, + { + "epoch": 2.700854700854701, + "high_lr": 0.0004594736842105263, + "low_lr": 9.189473684210526e-06, + "step": 1027 + }, + { + "epoch": 2.700854700854701, + "high_lr": 0.0004594736842105263, + "low_lr": 9.189473684210526e-06, + "step": 1027 + }, + { + "epoch": 2.700854700854701, + "high_lr": 0.0004594736842105263, + "low_lr": 9.189473684210526e-06, + "step": 1027 + }, + { + "epoch": 2.700854700854701, + "high_lr": 0.0004594736842105263, + "low_lr": 9.189473684210526e-06, + "step": 1027 + }, + { + "epoch": 2.700854700854701, + "high_lr": 0.0004594736842105263, + "low_lr": 9.189473684210526e-06, + "step": 1027 + }, + { + "epoch": 2.700854700854701, + "high_lr": 0.0004594736842105263, + "low_lr": 9.189473684210526e-06, + "step": 1027 + }, + { + "epoch": 2.700854700854701, + "high_lr": 0.0004594736842105263, + "low_lr": 9.189473684210526e-06, + "step": 1027 + }, + { + "epoch": 2.7034845496383957, + "grad_norm": 1.329406976699829, + "learning_rate": 0.00045894736842105264, + "loss": 1.3347, + "step": 1028 + }, + { + "epoch": 2.7034845496383957, + "high_lr": 0.00045894736842105264, + "low_lr": 9.178947368421053e-06, + "step": 1028 + }, + { + "epoch": 2.7034845496383957, + "high_lr": 0.00045894736842105264, + "low_lr": 9.178947368421053e-06, + "step": 1028 + }, + { + "epoch": 2.7034845496383957, + "high_lr": 0.00045894736842105264, + "low_lr": 9.178947368421053e-06, + "step": 1028 + }, + { + "epoch": 2.7034845496383957, + "high_lr": 0.00045894736842105264, + "low_lr": 9.178947368421053e-06, + "step": 1028 + }, + { + "epoch": 2.7034845496383957, + "high_lr": 0.00045894736842105264, + "low_lr": 9.178947368421053e-06, + "step": 1028 + }, + { + "epoch": 2.7034845496383957, + "high_lr": 0.00045894736842105264, + "low_lr": 9.178947368421053e-06, + "step": 1028 + }, + { + "epoch": 2.7034845496383957, + "high_lr": 0.00045894736842105264, + "low_lr": 9.178947368421053e-06, + "step": 1028 + }, + { + "epoch": 2.7034845496383957, + "high_lr": 0.00045894736842105264, + "low_lr": 9.178947368421053e-06, + "step": 1028 + }, + { + "epoch": 2.706114398422091, + "grad_norm": 1.3266621828079224, + "learning_rate": 0.000458421052631579, + "loss": 1.4081, + "step": 1029 + }, + { + "epoch": 2.706114398422091, + "high_lr": 0.000458421052631579, + "low_lr": 9.168421052631579e-06, + "step": 1029 + }, + { + "epoch": 2.706114398422091, + "high_lr": 0.000458421052631579, + "low_lr": 9.168421052631579e-06, + "step": 1029 + }, + { + "epoch": 2.706114398422091, + "high_lr": 0.000458421052631579, + "low_lr": 9.168421052631579e-06, + "step": 1029 + }, + { + "epoch": 2.706114398422091, + "high_lr": 0.000458421052631579, + "low_lr": 9.168421052631579e-06, + "step": 1029 + }, + { + "epoch": 2.706114398422091, + "high_lr": 0.000458421052631579, + "low_lr": 9.168421052631579e-06, + "step": 1029 + }, + { + "epoch": 2.706114398422091, + "high_lr": 0.000458421052631579, + "low_lr": 9.168421052631579e-06, + "step": 1029 + }, + { + "epoch": 2.706114398422091, + "high_lr": 0.000458421052631579, + "low_lr": 9.168421052631579e-06, + "step": 1029 + }, + { + "epoch": 2.706114398422091, + "high_lr": 0.000458421052631579, + "low_lr": 9.168421052631579e-06, + "step": 1029 + }, + { + "epoch": 2.708744247205786, + "grad_norm": 1.3029323816299438, + "learning_rate": 0.00045789473684210527, + "loss": 1.4039, + "step": 1030 + }, + { + "epoch": 2.708744247205786, + "high_lr": 0.00045789473684210527, + "low_lr": 9.157894736842105e-06, + "step": 1030 + }, + { + "epoch": 2.708744247205786, + "high_lr": 0.00045789473684210527, + "low_lr": 9.157894736842105e-06, + "step": 1030 + }, + { + "epoch": 2.708744247205786, + "high_lr": 0.00045789473684210527, + "low_lr": 9.157894736842105e-06, + "step": 1030 + }, + { + "epoch": 2.708744247205786, + "high_lr": 0.00045789473684210527, + "low_lr": 9.157894736842105e-06, + "step": 1030 + }, + { + "epoch": 2.708744247205786, + "high_lr": 0.00045789473684210527, + "low_lr": 9.157894736842105e-06, + "step": 1030 + }, + { + "epoch": 2.708744247205786, + "high_lr": 0.00045789473684210527, + "low_lr": 9.157894736842105e-06, + "step": 1030 + }, + { + "epoch": 2.708744247205786, + "high_lr": 0.00045789473684210527, + "low_lr": 9.157894736842105e-06, + "step": 1030 + }, + { + "epoch": 2.708744247205786, + "high_lr": 0.00045789473684210527, + "low_lr": 9.157894736842105e-06, + "step": 1030 + }, + { + "epoch": 2.7113740959894805, + "grad_norm": 1.299635648727417, + "learning_rate": 0.0004573684210526316, + "loss": 1.4511, + "step": 1031 + }, + { + "epoch": 2.7113740959894805, + "high_lr": 0.0004573684210526316, + "low_lr": 9.147368421052633e-06, + "step": 1031 + }, + { + "epoch": 2.7113740959894805, + "high_lr": 0.0004573684210526316, + "low_lr": 9.147368421052633e-06, + "step": 1031 + }, + { + "epoch": 2.7113740959894805, + "high_lr": 0.0004573684210526316, + "low_lr": 9.147368421052633e-06, + "step": 1031 + }, + { + "epoch": 2.7113740959894805, + "high_lr": 0.0004573684210526316, + "low_lr": 9.147368421052633e-06, + "step": 1031 + }, + { + "epoch": 2.7113740959894805, + "high_lr": 0.0004573684210526316, + "low_lr": 9.147368421052633e-06, + "step": 1031 + }, + { + "epoch": 2.7113740959894805, + "high_lr": 0.0004573684210526316, + "low_lr": 9.147368421052633e-06, + "step": 1031 + }, + { + "epoch": 2.7113740959894805, + "high_lr": 0.0004573684210526316, + "low_lr": 9.147368421052633e-06, + "step": 1031 + }, + { + "epoch": 2.7113740959894805, + "high_lr": 0.0004573684210526316, + "low_lr": 9.147368421052633e-06, + "step": 1031 + }, + { + "epoch": 2.7140039447731756, + "grad_norm": 1.3590953350067139, + "learning_rate": 0.0004568421052631579, + "loss": 1.3464, + "step": 1032 + }, + { + "epoch": 2.7140039447731756, + "high_lr": 0.0004568421052631579, + "low_lr": 9.136842105263158e-06, + "step": 1032 + }, + { + "epoch": 2.7140039447731756, + "high_lr": 0.0004568421052631579, + "low_lr": 9.136842105263158e-06, + "step": 1032 + }, + { + "epoch": 2.7140039447731756, + "high_lr": 0.0004568421052631579, + "low_lr": 9.136842105263158e-06, + "step": 1032 + }, + { + "epoch": 2.7140039447731756, + "high_lr": 0.0004568421052631579, + "low_lr": 9.136842105263158e-06, + "step": 1032 + }, + { + "epoch": 2.7140039447731756, + "high_lr": 0.0004568421052631579, + "low_lr": 9.136842105263158e-06, + "step": 1032 + }, + { + "epoch": 2.7140039447731756, + "high_lr": 0.0004568421052631579, + "low_lr": 9.136842105263158e-06, + "step": 1032 + }, + { + "epoch": 2.7140039447731756, + "high_lr": 0.0004568421052631579, + "low_lr": 9.136842105263158e-06, + "step": 1032 + }, + { + "epoch": 2.7140039447731756, + "high_lr": 0.0004568421052631579, + "low_lr": 9.136842105263158e-06, + "step": 1032 + }, + { + "epoch": 2.7166337935568707, + "grad_norm": 1.2269901037216187, + "learning_rate": 0.00045631578947368423, + "loss": 1.3538, + "step": 1033 + }, + { + "epoch": 2.7166337935568707, + "high_lr": 0.00045631578947368423, + "low_lr": 9.126315789473686e-06, + "step": 1033 + }, + { + "epoch": 2.7166337935568707, + "high_lr": 0.00045631578947368423, + "low_lr": 9.126315789473686e-06, + "step": 1033 + }, + { + "epoch": 2.7166337935568707, + "high_lr": 0.00045631578947368423, + "low_lr": 9.126315789473686e-06, + "step": 1033 + }, + { + "epoch": 2.7166337935568707, + "high_lr": 0.00045631578947368423, + "low_lr": 9.126315789473686e-06, + "step": 1033 + }, + { + "epoch": 2.7166337935568707, + "high_lr": 0.00045631578947368423, + "low_lr": 9.126315789473686e-06, + "step": 1033 + }, + { + "epoch": 2.7166337935568707, + "high_lr": 0.00045631578947368423, + "low_lr": 9.126315789473686e-06, + "step": 1033 + }, + { + "epoch": 2.7166337935568707, + "high_lr": 0.00045631578947368423, + "low_lr": 9.126315789473686e-06, + "step": 1033 + }, + { + "epoch": 2.7166337935568707, + "high_lr": 0.00045631578947368423, + "low_lr": 9.126315789473686e-06, + "step": 1033 + }, + { + "epoch": 2.7192636423405654, + "grad_norm": 1.3440412282943726, + "learning_rate": 0.0004557894736842105, + "loss": 1.3747, + "step": 1034 + }, + { + "epoch": 2.7192636423405654, + "high_lr": 0.0004557894736842105, + "low_lr": 9.11578947368421e-06, + "step": 1034 + }, + { + "epoch": 2.7192636423405654, + "high_lr": 0.0004557894736842105, + "low_lr": 9.11578947368421e-06, + "step": 1034 + }, + { + "epoch": 2.7192636423405654, + "high_lr": 0.0004557894736842105, + "low_lr": 9.11578947368421e-06, + "step": 1034 + }, + { + "epoch": 2.7192636423405654, + "high_lr": 0.0004557894736842105, + "low_lr": 9.11578947368421e-06, + "step": 1034 + }, + { + "epoch": 2.7192636423405654, + "high_lr": 0.0004557894736842105, + "low_lr": 9.11578947368421e-06, + "step": 1034 + }, + { + "epoch": 2.7192636423405654, + "high_lr": 0.0004557894736842105, + "low_lr": 9.11578947368421e-06, + "step": 1034 + }, + { + "epoch": 2.7192636423405654, + "high_lr": 0.0004557894736842105, + "low_lr": 9.11578947368421e-06, + "step": 1034 + }, + { + "epoch": 2.7192636423405654, + "high_lr": 0.0004557894736842105, + "low_lr": 9.11578947368421e-06, + "step": 1034 + }, + { + "epoch": 2.7218934911242605, + "grad_norm": 1.2938976287841797, + "learning_rate": 0.00045526315789473686, + "loss": 1.3702, + "step": 1035 + }, + { + "epoch": 2.7218934911242605, + "high_lr": 0.00045526315789473686, + "low_lr": 9.105263157894739e-06, + "step": 1035 + }, + { + "epoch": 2.7218934911242605, + "high_lr": 0.00045526315789473686, + "low_lr": 9.105263157894739e-06, + "step": 1035 + }, + { + "epoch": 2.7218934911242605, + "high_lr": 0.00045526315789473686, + "low_lr": 9.105263157894739e-06, + "step": 1035 + }, + { + "epoch": 2.7218934911242605, + "high_lr": 0.00045526315789473686, + "low_lr": 9.105263157894739e-06, + "step": 1035 + }, + { + "epoch": 2.7218934911242605, + "high_lr": 0.00045526315789473686, + "low_lr": 9.105263157894739e-06, + "step": 1035 + }, + { + "epoch": 2.7218934911242605, + "high_lr": 0.00045526315789473686, + "low_lr": 9.105263157894739e-06, + "step": 1035 + }, + { + "epoch": 2.7218934911242605, + "high_lr": 0.00045526315789473686, + "low_lr": 9.105263157894739e-06, + "step": 1035 + }, + { + "epoch": 2.7218934911242605, + "high_lr": 0.00045526315789473686, + "low_lr": 9.105263157894739e-06, + "step": 1035 + }, + { + "epoch": 2.724523339907955, + "grad_norm": 1.2927764654159546, + "learning_rate": 0.0004547368421052632, + "loss": 1.427, + "step": 1036 + }, + { + "epoch": 2.724523339907955, + "high_lr": 0.0004547368421052632, + "low_lr": 9.094736842105263e-06, + "step": 1036 + }, + { + "epoch": 2.724523339907955, + "high_lr": 0.0004547368421052632, + "low_lr": 9.094736842105263e-06, + "step": 1036 + }, + { + "epoch": 2.724523339907955, + "high_lr": 0.0004547368421052632, + "low_lr": 9.094736842105263e-06, + "step": 1036 + }, + { + "epoch": 2.724523339907955, + "high_lr": 0.0004547368421052632, + "low_lr": 9.094736842105263e-06, + "step": 1036 + }, + { + "epoch": 2.724523339907955, + "high_lr": 0.0004547368421052632, + "low_lr": 9.094736842105263e-06, + "step": 1036 + }, + { + "epoch": 2.724523339907955, + "high_lr": 0.0004547368421052632, + "low_lr": 9.094736842105263e-06, + "step": 1036 + }, + { + "epoch": 2.724523339907955, + "high_lr": 0.0004547368421052632, + "low_lr": 9.094736842105263e-06, + "step": 1036 + }, + { + "epoch": 2.724523339907955, + "high_lr": 0.0004547368421052632, + "low_lr": 9.094736842105263e-06, + "step": 1036 + }, + { + "epoch": 2.7271531886916502, + "grad_norm": 1.2207523584365845, + "learning_rate": 0.0004542105263157895, + "loss": 1.3559, + "step": 1037 + }, + { + "epoch": 2.7271531886916502, + "high_lr": 0.0004542105263157895, + "low_lr": 9.08421052631579e-06, + "step": 1037 + }, + { + "epoch": 2.7271531886916502, + "high_lr": 0.0004542105263157895, + "low_lr": 9.08421052631579e-06, + "step": 1037 + }, + { + "epoch": 2.7271531886916502, + "high_lr": 0.0004542105263157895, + "low_lr": 9.08421052631579e-06, + "step": 1037 + }, + { + "epoch": 2.7271531886916502, + "high_lr": 0.0004542105263157895, + "low_lr": 9.08421052631579e-06, + "step": 1037 + }, + { + "epoch": 2.7271531886916502, + "high_lr": 0.0004542105263157895, + "low_lr": 9.08421052631579e-06, + "step": 1037 + }, + { + "epoch": 2.7271531886916502, + "high_lr": 0.0004542105263157895, + "low_lr": 9.08421052631579e-06, + "step": 1037 + }, + { + "epoch": 2.7271531886916502, + "high_lr": 0.0004542105263157895, + "low_lr": 9.08421052631579e-06, + "step": 1037 + }, + { + "epoch": 2.7271531886916502, + "high_lr": 0.0004542105263157895, + "low_lr": 9.08421052631579e-06, + "step": 1037 + }, + { + "epoch": 2.729783037475345, + "grad_norm": 1.2172162532806396, + "learning_rate": 0.0004536842105263158, + "loss": 1.4333, + "step": 1038 + }, + { + "epoch": 2.729783037475345, + "high_lr": 0.0004536842105263158, + "low_lr": 9.073684210526316e-06, + "step": 1038 + }, + { + "epoch": 2.729783037475345, + "high_lr": 0.0004536842105263158, + "low_lr": 9.073684210526316e-06, + "step": 1038 + }, + { + "epoch": 2.729783037475345, + "high_lr": 0.0004536842105263158, + "low_lr": 9.073684210526316e-06, + "step": 1038 + }, + { + "epoch": 2.729783037475345, + "high_lr": 0.0004536842105263158, + "low_lr": 9.073684210526316e-06, + "step": 1038 + }, + { + "epoch": 2.729783037475345, + "high_lr": 0.0004536842105263158, + "low_lr": 9.073684210526316e-06, + "step": 1038 + }, + { + "epoch": 2.729783037475345, + "high_lr": 0.0004536842105263158, + "low_lr": 9.073684210526316e-06, + "step": 1038 + }, + { + "epoch": 2.729783037475345, + "high_lr": 0.0004536842105263158, + "low_lr": 9.073684210526316e-06, + "step": 1038 + }, + { + "epoch": 2.729783037475345, + "high_lr": 0.0004536842105263158, + "low_lr": 9.073684210526316e-06, + "step": 1038 + }, + { + "epoch": 2.73241288625904, + "grad_norm": 1.2281653881072998, + "learning_rate": 0.0004531578947368421, + "loss": 1.3917, + "step": 1039 + }, + { + "epoch": 2.73241288625904, + "high_lr": 0.0004531578947368421, + "low_lr": 9.063157894736842e-06, + "step": 1039 + }, + { + "epoch": 2.73241288625904, + "high_lr": 0.0004531578947368421, + "low_lr": 9.063157894736842e-06, + "step": 1039 + }, + { + "epoch": 2.73241288625904, + "high_lr": 0.0004531578947368421, + "low_lr": 9.063157894736842e-06, + "step": 1039 + }, + { + "epoch": 2.73241288625904, + "high_lr": 0.0004531578947368421, + "low_lr": 9.063157894736842e-06, + "step": 1039 + }, + { + "epoch": 2.73241288625904, + "high_lr": 0.0004531578947368421, + "low_lr": 9.063157894736842e-06, + "step": 1039 + }, + { + "epoch": 2.73241288625904, + "high_lr": 0.0004531578947368421, + "low_lr": 9.063157894736842e-06, + "step": 1039 + }, + { + "epoch": 2.73241288625904, + "high_lr": 0.0004531578947368421, + "low_lr": 9.063157894736842e-06, + "step": 1039 + }, + { + "epoch": 2.73241288625904, + "high_lr": 0.0004531578947368421, + "low_lr": 9.063157894736842e-06, + "step": 1039 + }, + { + "epoch": 2.735042735042735, + "grad_norm": 1.2144098281860352, + "learning_rate": 0.00045263157894736845, + "loss": 1.3998, + "step": 1040 + }, + { + "epoch": 2.735042735042735, + "high_lr": 0.00045263157894736845, + "low_lr": 9.05263157894737e-06, + "step": 1040 + }, + { + "epoch": 2.735042735042735, + "high_lr": 0.00045263157894736845, + "low_lr": 9.05263157894737e-06, + "step": 1040 + }, + { + "epoch": 2.735042735042735, + "high_lr": 0.00045263157894736845, + "low_lr": 9.05263157894737e-06, + "step": 1040 + }, + { + "epoch": 2.735042735042735, + "high_lr": 0.00045263157894736845, + "low_lr": 9.05263157894737e-06, + "step": 1040 + }, + { + "epoch": 2.735042735042735, + "high_lr": 0.00045263157894736845, + "low_lr": 9.05263157894737e-06, + "step": 1040 + }, + { + "epoch": 2.735042735042735, + "high_lr": 0.00045263157894736845, + "low_lr": 9.05263157894737e-06, + "step": 1040 + }, + { + "epoch": 2.735042735042735, + "high_lr": 0.00045263157894736845, + "low_lr": 9.05263157894737e-06, + "step": 1040 + }, + { + "epoch": 2.735042735042735, + "high_lr": 0.00045263157894736845, + "low_lr": 9.05263157894737e-06, + "step": 1040 + }, + { + "epoch": 2.7376725838264298, + "grad_norm": 1.2607382535934448, + "learning_rate": 0.00045210526315789474, + "loss": 1.3706, + "step": 1041 + }, + { + "epoch": 2.7376725838264298, + "high_lr": 0.00045210526315789474, + "low_lr": 9.042105263157895e-06, + "step": 1041 + }, + { + "epoch": 2.7376725838264298, + "high_lr": 0.00045210526315789474, + "low_lr": 9.042105263157895e-06, + "step": 1041 + }, + { + "epoch": 2.7376725838264298, + "high_lr": 0.00045210526315789474, + "low_lr": 9.042105263157895e-06, + "step": 1041 + }, + { + "epoch": 2.7376725838264298, + "high_lr": 0.00045210526315789474, + "low_lr": 9.042105263157895e-06, + "step": 1041 + }, + { + "epoch": 2.7376725838264298, + "high_lr": 0.00045210526315789474, + "low_lr": 9.042105263157895e-06, + "step": 1041 + }, + { + "epoch": 2.7376725838264298, + "high_lr": 0.00045210526315789474, + "low_lr": 9.042105263157895e-06, + "step": 1041 + }, + { + "epoch": 2.7376725838264298, + "high_lr": 0.00045210526315789474, + "low_lr": 9.042105263157895e-06, + "step": 1041 + }, + { + "epoch": 2.7376725838264298, + "high_lr": 0.00045210526315789474, + "low_lr": 9.042105263157895e-06, + "step": 1041 + }, + { + "epoch": 2.740302432610125, + "grad_norm": 1.1702170372009277, + "learning_rate": 0.0004515789473684211, + "loss": 1.3604, + "step": 1042 + }, + { + "epoch": 2.740302432610125, + "high_lr": 0.0004515789473684211, + "low_lr": 9.031578947368423e-06, + "step": 1042 + }, + { + "epoch": 2.740302432610125, + "high_lr": 0.0004515789473684211, + "low_lr": 9.031578947368423e-06, + "step": 1042 + }, + { + "epoch": 2.740302432610125, + "high_lr": 0.0004515789473684211, + "low_lr": 9.031578947368423e-06, + "step": 1042 + }, + { + "epoch": 2.740302432610125, + "high_lr": 0.0004515789473684211, + "low_lr": 9.031578947368423e-06, + "step": 1042 + }, + { + "epoch": 2.740302432610125, + "high_lr": 0.0004515789473684211, + "low_lr": 9.031578947368423e-06, + "step": 1042 + }, + { + "epoch": 2.740302432610125, + "high_lr": 0.0004515789473684211, + "low_lr": 9.031578947368423e-06, + "step": 1042 + }, + { + "epoch": 2.740302432610125, + "high_lr": 0.0004515789473684211, + "low_lr": 9.031578947368423e-06, + "step": 1042 + }, + { + "epoch": 2.740302432610125, + "high_lr": 0.0004515789473684211, + "low_lr": 9.031578947368423e-06, + "step": 1042 + }, + { + "epoch": 2.74293228139382, + "grad_norm": 1.2794098854064941, + "learning_rate": 0.00045105263157894736, + "loss": 1.4444, + "step": 1043 + }, + { + "epoch": 2.74293228139382, + "high_lr": 0.00045105263157894736, + "low_lr": 9.021052631578948e-06, + "step": 1043 + }, + { + "epoch": 2.74293228139382, + "high_lr": 0.00045105263157894736, + "low_lr": 9.021052631578948e-06, + "step": 1043 + }, + { + "epoch": 2.74293228139382, + "high_lr": 0.00045105263157894736, + "low_lr": 9.021052631578948e-06, + "step": 1043 + }, + { + "epoch": 2.74293228139382, + "high_lr": 0.00045105263157894736, + "low_lr": 9.021052631578948e-06, + "step": 1043 + }, + { + "epoch": 2.74293228139382, + "high_lr": 0.00045105263157894736, + "low_lr": 9.021052631578948e-06, + "step": 1043 + }, + { + "epoch": 2.74293228139382, + "high_lr": 0.00045105263157894736, + "low_lr": 9.021052631578948e-06, + "step": 1043 + }, + { + "epoch": 2.74293228139382, + "high_lr": 0.00045105263157894736, + "low_lr": 9.021052631578948e-06, + "step": 1043 + }, + { + "epoch": 2.74293228139382, + "high_lr": 0.00045105263157894736, + "low_lr": 9.021052631578948e-06, + "step": 1043 + }, + { + "epoch": 2.7455621301775146, + "grad_norm": 1.275855541229248, + "learning_rate": 0.00045052631578947365, + "loss": 1.4003, + "step": 1044 + }, + { + "epoch": 2.7455621301775146, + "high_lr": 0.00045052631578947365, + "low_lr": 9.010526315789474e-06, + "step": 1044 + }, + { + "epoch": 2.7455621301775146, + "high_lr": 0.00045052631578947365, + "low_lr": 9.010526315789474e-06, + "step": 1044 + }, + { + "epoch": 2.7455621301775146, + "high_lr": 0.00045052631578947365, + "low_lr": 9.010526315789474e-06, + "step": 1044 + }, + { + "epoch": 2.7455621301775146, + "high_lr": 0.00045052631578947365, + "low_lr": 9.010526315789474e-06, + "step": 1044 + }, + { + "epoch": 2.7455621301775146, + "high_lr": 0.00045052631578947365, + "low_lr": 9.010526315789474e-06, + "step": 1044 + }, + { + "epoch": 2.7455621301775146, + "high_lr": 0.00045052631578947365, + "low_lr": 9.010526315789474e-06, + "step": 1044 + }, + { + "epoch": 2.7455621301775146, + "high_lr": 0.00045052631578947365, + "low_lr": 9.010526315789474e-06, + "step": 1044 + }, + { + "epoch": 2.7455621301775146, + "high_lr": 0.00045052631578947365, + "low_lr": 9.010526315789474e-06, + "step": 1044 + }, + { + "epoch": 2.7481919789612097, + "grad_norm": 1.3354053497314453, + "learning_rate": 0.00045000000000000004, + "loss": 1.3732, + "step": 1045 + }, + { + "epoch": 2.7481919789612097, + "high_lr": 0.00045000000000000004, + "low_lr": 9e-06, + "step": 1045 + }, + { + "epoch": 2.7481919789612097, + "high_lr": 0.00045000000000000004, + "low_lr": 9e-06, + "step": 1045 + }, + { + "epoch": 2.7481919789612097, + "high_lr": 0.00045000000000000004, + "low_lr": 9e-06, + "step": 1045 + }, + { + "epoch": 2.7481919789612097, + "high_lr": 0.00045000000000000004, + "low_lr": 9e-06, + "step": 1045 + }, + { + "epoch": 2.7481919789612097, + "high_lr": 0.00045000000000000004, + "low_lr": 9e-06, + "step": 1045 + }, + { + "epoch": 2.7481919789612097, + "high_lr": 0.00045000000000000004, + "low_lr": 9e-06, + "step": 1045 + }, + { + "epoch": 2.7481919789612097, + "high_lr": 0.00045000000000000004, + "low_lr": 9e-06, + "step": 1045 + }, + { + "epoch": 2.7481919789612097, + "high_lr": 0.00045000000000000004, + "low_lr": 9e-06, + "step": 1045 + }, + { + "epoch": 2.750821827744905, + "grad_norm": 1.233538031578064, + "learning_rate": 0.00044947368421052633, + "loss": 1.365, + "step": 1046 + }, + { + "epoch": 2.750821827744905, + "high_lr": 0.00044947368421052633, + "low_lr": 8.989473684210527e-06, + "step": 1046 + }, + { + "epoch": 2.750821827744905, + "high_lr": 0.00044947368421052633, + "low_lr": 8.989473684210527e-06, + "step": 1046 + }, + { + "epoch": 2.750821827744905, + "high_lr": 0.00044947368421052633, + "low_lr": 8.989473684210527e-06, + "step": 1046 + }, + { + "epoch": 2.750821827744905, + "high_lr": 0.00044947368421052633, + "low_lr": 8.989473684210527e-06, + "step": 1046 + }, + { + "epoch": 2.750821827744905, + "high_lr": 0.00044947368421052633, + "low_lr": 8.989473684210527e-06, + "step": 1046 + }, + { + "epoch": 2.750821827744905, + "high_lr": 0.00044947368421052633, + "low_lr": 8.989473684210527e-06, + "step": 1046 + }, + { + "epoch": 2.750821827744905, + "high_lr": 0.00044947368421052633, + "low_lr": 8.989473684210527e-06, + "step": 1046 + }, + { + "epoch": 2.750821827744905, + "high_lr": 0.00044947368421052633, + "low_lr": 8.989473684210527e-06, + "step": 1046 + }, + { + "epoch": 2.7534516765285995, + "grad_norm": 1.243747353553772, + "learning_rate": 0.00044894736842105267, + "loss": 1.3859, + "step": 1047 + }, + { + "epoch": 2.7534516765285995, + "high_lr": 0.00044894736842105267, + "low_lr": 8.978947368421055e-06, + "step": 1047 + }, + { + "epoch": 2.7534516765285995, + "high_lr": 0.00044894736842105267, + "low_lr": 8.978947368421055e-06, + "step": 1047 + }, + { + "epoch": 2.7534516765285995, + "high_lr": 0.00044894736842105267, + "low_lr": 8.978947368421055e-06, + "step": 1047 + }, + { + "epoch": 2.7534516765285995, + "high_lr": 0.00044894736842105267, + "low_lr": 8.978947368421055e-06, + "step": 1047 + }, + { + "epoch": 2.7534516765285995, + "high_lr": 0.00044894736842105267, + "low_lr": 8.978947368421055e-06, + "step": 1047 + }, + { + "epoch": 2.7534516765285995, + "high_lr": 0.00044894736842105267, + "low_lr": 8.978947368421055e-06, + "step": 1047 + }, + { + "epoch": 2.7534516765285995, + "high_lr": 0.00044894736842105267, + "low_lr": 8.978947368421055e-06, + "step": 1047 + }, + { + "epoch": 2.7534516765285995, + "high_lr": 0.00044894736842105267, + "low_lr": 8.978947368421055e-06, + "step": 1047 + }, + { + "epoch": 2.7560815253122946, + "grad_norm": 1.3367723226547241, + "learning_rate": 0.00044842105263157895, + "loss": 1.3905, + "step": 1048 + }, + { + "epoch": 2.7560815253122946, + "high_lr": 0.00044842105263157895, + "low_lr": 8.96842105263158e-06, + "step": 1048 + }, + { + "epoch": 2.7560815253122946, + "high_lr": 0.00044842105263157895, + "low_lr": 8.96842105263158e-06, + "step": 1048 + }, + { + "epoch": 2.7560815253122946, + "high_lr": 0.00044842105263157895, + "low_lr": 8.96842105263158e-06, + "step": 1048 + }, + { + "epoch": 2.7560815253122946, + "high_lr": 0.00044842105263157895, + "low_lr": 8.96842105263158e-06, + "step": 1048 + }, + { + "epoch": 2.7560815253122946, + "high_lr": 0.00044842105263157895, + "low_lr": 8.96842105263158e-06, + "step": 1048 + }, + { + "epoch": 2.7560815253122946, + "high_lr": 0.00044842105263157895, + "low_lr": 8.96842105263158e-06, + "step": 1048 + }, + { + "epoch": 2.7560815253122946, + "high_lr": 0.00044842105263157895, + "low_lr": 8.96842105263158e-06, + "step": 1048 + }, + { + "epoch": 2.7560815253122946, + "high_lr": 0.00044842105263157895, + "low_lr": 8.96842105263158e-06, + "step": 1048 + }, + { + "epoch": 2.7587113740959897, + "grad_norm": 1.2546968460083008, + "learning_rate": 0.0004478947368421053, + "loss": 1.4116, + "step": 1049 + }, + { + "epoch": 2.7587113740959897, + "high_lr": 0.0004478947368421053, + "low_lr": 8.957894736842107e-06, + "step": 1049 + }, + { + "epoch": 2.7587113740959897, + "high_lr": 0.0004478947368421053, + "low_lr": 8.957894736842107e-06, + "step": 1049 + }, + { + "epoch": 2.7587113740959897, + "high_lr": 0.0004478947368421053, + "low_lr": 8.957894736842107e-06, + "step": 1049 + }, + { + "epoch": 2.7587113740959897, + "high_lr": 0.0004478947368421053, + "low_lr": 8.957894736842107e-06, + "step": 1049 + }, + { + "epoch": 2.7587113740959897, + "high_lr": 0.0004478947368421053, + "low_lr": 8.957894736842107e-06, + "step": 1049 + }, + { + "epoch": 2.7587113740959897, + "high_lr": 0.0004478947368421053, + "low_lr": 8.957894736842107e-06, + "step": 1049 + }, + { + "epoch": 2.7587113740959897, + "high_lr": 0.0004478947368421053, + "low_lr": 8.957894736842107e-06, + "step": 1049 + }, + { + "epoch": 2.7587113740959897, + "high_lr": 0.0004478947368421053, + "low_lr": 8.957894736842107e-06, + "step": 1049 + }, + { + "epoch": 2.7613412228796843, + "grad_norm": 1.2776412963867188, + "learning_rate": 0.0004473684210526316, + "loss": 1.3674, + "step": 1050 + }, + { + "epoch": 2.7613412228796843, + "high_lr": 0.0004473684210526316, + "low_lr": 8.947368421052632e-06, + "step": 1050 + }, + { + "epoch": 2.7613412228796843, + "high_lr": 0.0004473684210526316, + "low_lr": 8.947368421052632e-06, + "step": 1050 + }, + { + "epoch": 2.7613412228796843, + "high_lr": 0.0004473684210526316, + "low_lr": 8.947368421052632e-06, + "step": 1050 + }, + { + "epoch": 2.7613412228796843, + "high_lr": 0.0004473684210526316, + "low_lr": 8.947368421052632e-06, + "step": 1050 + }, + { + "epoch": 2.7613412228796843, + "high_lr": 0.0004473684210526316, + "low_lr": 8.947368421052632e-06, + "step": 1050 + }, + { + "epoch": 2.7613412228796843, + "high_lr": 0.0004473684210526316, + "low_lr": 8.947368421052632e-06, + "step": 1050 + }, + { + "epoch": 2.7613412228796843, + "high_lr": 0.0004473684210526316, + "low_lr": 8.947368421052632e-06, + "step": 1050 + }, + { + "epoch": 2.7613412228796843, + "high_lr": 0.0004473684210526316, + "low_lr": 8.947368421052632e-06, + "step": 1050 + }, + { + "epoch": 2.7639710716633794, + "grad_norm": 1.3470097780227661, + "learning_rate": 0.00044684210526315787, + "loss": 1.3553, + "step": 1051 + }, + { + "epoch": 2.7639710716633794, + "high_lr": 0.00044684210526315787, + "low_lr": 8.936842105263158e-06, + "step": 1051 + }, + { + "epoch": 2.7639710716633794, + "high_lr": 0.00044684210526315787, + "low_lr": 8.936842105263158e-06, + "step": 1051 + }, + { + "epoch": 2.7639710716633794, + "high_lr": 0.00044684210526315787, + "low_lr": 8.936842105263158e-06, + "step": 1051 + }, + { + "epoch": 2.7639710716633794, + "high_lr": 0.00044684210526315787, + "low_lr": 8.936842105263158e-06, + "step": 1051 + }, + { + "epoch": 2.7639710716633794, + "high_lr": 0.00044684210526315787, + "low_lr": 8.936842105263158e-06, + "step": 1051 + }, + { + "epoch": 2.7639710716633794, + "high_lr": 0.00044684210526315787, + "low_lr": 8.936842105263158e-06, + "step": 1051 + }, + { + "epoch": 2.7639710716633794, + "high_lr": 0.00044684210526315787, + "low_lr": 8.936842105263158e-06, + "step": 1051 + }, + { + "epoch": 2.7639710716633794, + "high_lr": 0.00044684210526315787, + "low_lr": 8.936842105263158e-06, + "step": 1051 + }, + { + "epoch": 2.7666009204470745, + "grad_norm": 1.3481202125549316, + "learning_rate": 0.0004463157894736842, + "loss": 1.4003, + "step": 1052 + }, + { + "epoch": 2.7666009204470745, + "high_lr": 0.0004463157894736842, + "low_lr": 8.926315789473685e-06, + "step": 1052 + }, + { + "epoch": 2.7666009204470745, + "high_lr": 0.0004463157894736842, + "low_lr": 8.926315789473685e-06, + "step": 1052 + }, + { + "epoch": 2.7666009204470745, + "high_lr": 0.0004463157894736842, + "low_lr": 8.926315789473685e-06, + "step": 1052 + }, + { + "epoch": 2.7666009204470745, + "high_lr": 0.0004463157894736842, + "low_lr": 8.926315789473685e-06, + "step": 1052 + }, + { + "epoch": 2.7666009204470745, + "high_lr": 0.0004463157894736842, + "low_lr": 8.926315789473685e-06, + "step": 1052 + }, + { + "epoch": 2.7666009204470745, + "high_lr": 0.0004463157894736842, + "low_lr": 8.926315789473685e-06, + "step": 1052 + }, + { + "epoch": 2.7666009204470745, + "high_lr": 0.0004463157894736842, + "low_lr": 8.926315789473685e-06, + "step": 1052 + }, + { + "epoch": 2.7666009204470745, + "high_lr": 0.0004463157894736842, + "low_lr": 8.926315789473685e-06, + "step": 1052 + }, + { + "epoch": 2.769230769230769, + "grad_norm": 1.2733091115951538, + "learning_rate": 0.00044578947368421055, + "loss": 1.3668, + "step": 1053 + }, + { + "epoch": 2.769230769230769, + "high_lr": 0.00044578947368421055, + "low_lr": 8.915789473684211e-06, + "step": 1053 + }, + { + "epoch": 2.769230769230769, + "high_lr": 0.00044578947368421055, + "low_lr": 8.915789473684211e-06, + "step": 1053 + }, + { + "epoch": 2.769230769230769, + "high_lr": 0.00044578947368421055, + "low_lr": 8.915789473684211e-06, + "step": 1053 + }, + { + "epoch": 2.769230769230769, + "high_lr": 0.00044578947368421055, + "low_lr": 8.915789473684211e-06, + "step": 1053 + }, + { + "epoch": 2.769230769230769, + "high_lr": 0.00044578947368421055, + "low_lr": 8.915789473684211e-06, + "step": 1053 + }, + { + "epoch": 2.769230769230769, + "high_lr": 0.00044578947368421055, + "low_lr": 8.915789473684211e-06, + "step": 1053 + }, + { + "epoch": 2.769230769230769, + "high_lr": 0.00044578947368421055, + "low_lr": 8.915789473684211e-06, + "step": 1053 + }, + { + "epoch": 2.769230769230769, + "high_lr": 0.00044578947368421055, + "low_lr": 8.915789473684211e-06, + "step": 1053 + }, + { + "epoch": 2.7718606180144643, + "grad_norm": 1.2438217401504517, + "learning_rate": 0.0004452631578947369, + "loss": 1.3692, + "step": 1054 + }, + { + "epoch": 2.7718606180144643, + "high_lr": 0.0004452631578947369, + "low_lr": 8.905263157894737e-06, + "step": 1054 + }, + { + "epoch": 2.7718606180144643, + "high_lr": 0.0004452631578947369, + "low_lr": 8.905263157894737e-06, + "step": 1054 + }, + { + "epoch": 2.7718606180144643, + "high_lr": 0.0004452631578947369, + "low_lr": 8.905263157894737e-06, + "step": 1054 + }, + { + "epoch": 2.7718606180144643, + "high_lr": 0.0004452631578947369, + "low_lr": 8.905263157894737e-06, + "step": 1054 + }, + { + "epoch": 2.7718606180144643, + "high_lr": 0.0004452631578947369, + "low_lr": 8.905263157894737e-06, + "step": 1054 + }, + { + "epoch": 2.7718606180144643, + "high_lr": 0.0004452631578947369, + "low_lr": 8.905263157894737e-06, + "step": 1054 + }, + { + "epoch": 2.7718606180144643, + "high_lr": 0.0004452631578947369, + "low_lr": 8.905263157894737e-06, + "step": 1054 + }, + { + "epoch": 2.7718606180144643, + "high_lr": 0.0004452631578947369, + "low_lr": 8.905263157894737e-06, + "step": 1054 + }, + { + "epoch": 2.7744904667981594, + "grad_norm": 1.2441380023956299, + "learning_rate": 0.00044473684210526317, + "loss": 1.4018, + "step": 1055 + }, + { + "epoch": 2.7744904667981594, + "high_lr": 0.00044473684210526317, + "low_lr": 8.894736842105264e-06, + "step": 1055 + }, + { + "epoch": 2.7744904667981594, + "high_lr": 0.00044473684210526317, + "low_lr": 8.894736842105264e-06, + "step": 1055 + }, + { + "epoch": 2.7744904667981594, + "high_lr": 0.00044473684210526317, + "low_lr": 8.894736842105264e-06, + "step": 1055 + }, + { + "epoch": 2.7744904667981594, + "high_lr": 0.00044473684210526317, + "low_lr": 8.894736842105264e-06, + "step": 1055 + }, + { + "epoch": 2.7744904667981594, + "high_lr": 0.00044473684210526317, + "low_lr": 8.894736842105264e-06, + "step": 1055 + }, + { + "epoch": 2.7744904667981594, + "high_lr": 0.00044473684210526317, + "low_lr": 8.894736842105264e-06, + "step": 1055 + }, + { + "epoch": 2.7744904667981594, + "high_lr": 0.00044473684210526317, + "low_lr": 8.894736842105264e-06, + "step": 1055 + }, + { + "epoch": 2.7744904667981594, + "high_lr": 0.00044473684210526317, + "low_lr": 8.894736842105264e-06, + "step": 1055 + }, + { + "epoch": 2.777120315581854, + "grad_norm": 1.2575567960739136, + "learning_rate": 0.0004442105263157895, + "loss": 1.4044, + "step": 1056 + }, + { + "epoch": 2.777120315581854, + "high_lr": 0.0004442105263157895, + "low_lr": 8.884210526315792e-06, + "step": 1056 + }, + { + "epoch": 2.777120315581854, + "high_lr": 0.0004442105263157895, + "low_lr": 8.884210526315792e-06, + "step": 1056 + }, + { + "epoch": 2.777120315581854, + "high_lr": 0.0004442105263157895, + "low_lr": 8.884210526315792e-06, + "step": 1056 + }, + { + "epoch": 2.777120315581854, + "high_lr": 0.0004442105263157895, + "low_lr": 8.884210526315792e-06, + "step": 1056 + }, + { + "epoch": 2.777120315581854, + "high_lr": 0.0004442105263157895, + "low_lr": 8.884210526315792e-06, + "step": 1056 + }, + { + "epoch": 2.777120315581854, + "high_lr": 0.0004442105263157895, + "low_lr": 8.884210526315792e-06, + "step": 1056 + }, + { + "epoch": 2.777120315581854, + "high_lr": 0.0004442105263157895, + "low_lr": 8.884210526315792e-06, + "step": 1056 + }, + { + "epoch": 2.777120315581854, + "high_lr": 0.0004442105263157895, + "low_lr": 8.884210526315792e-06, + "step": 1056 + }, + { + "epoch": 2.779750164365549, + "grad_norm": 1.360580325126648, + "learning_rate": 0.0004436842105263158, + "loss": 1.4391, + "step": 1057 + }, + { + "epoch": 2.779750164365549, + "high_lr": 0.0004436842105263158, + "low_lr": 8.873684210526316e-06, + "step": 1057 + }, + { + "epoch": 2.779750164365549, + "high_lr": 0.0004436842105263158, + "low_lr": 8.873684210526316e-06, + "step": 1057 + }, + { + "epoch": 2.779750164365549, + "high_lr": 0.0004436842105263158, + "low_lr": 8.873684210526316e-06, + "step": 1057 + }, + { + "epoch": 2.779750164365549, + "high_lr": 0.0004436842105263158, + "low_lr": 8.873684210526316e-06, + "step": 1057 + }, + { + "epoch": 2.779750164365549, + "high_lr": 0.0004436842105263158, + "low_lr": 8.873684210526316e-06, + "step": 1057 + }, + { + "epoch": 2.779750164365549, + "high_lr": 0.0004436842105263158, + "low_lr": 8.873684210526316e-06, + "step": 1057 + }, + { + "epoch": 2.779750164365549, + "high_lr": 0.0004436842105263158, + "low_lr": 8.873684210526316e-06, + "step": 1057 + }, + { + "epoch": 2.779750164365549, + "high_lr": 0.0004436842105263158, + "low_lr": 8.873684210526316e-06, + "step": 1057 + }, + { + "epoch": 2.7823800131492438, + "grad_norm": 1.2910478115081787, + "learning_rate": 0.0004431578947368421, + "loss": 1.3596, + "step": 1058 + }, + { + "epoch": 2.7823800131492438, + "high_lr": 0.0004431578947368421, + "low_lr": 8.863157894736842e-06, + "step": 1058 + }, + { + "epoch": 2.7823800131492438, + "high_lr": 0.0004431578947368421, + "low_lr": 8.863157894736842e-06, + "step": 1058 + }, + { + "epoch": 2.7823800131492438, + "high_lr": 0.0004431578947368421, + "low_lr": 8.863157894736842e-06, + "step": 1058 + }, + { + "epoch": 2.7823800131492438, + "high_lr": 0.0004431578947368421, + "low_lr": 8.863157894736842e-06, + "step": 1058 + }, + { + "epoch": 2.7823800131492438, + "high_lr": 0.0004431578947368421, + "low_lr": 8.863157894736842e-06, + "step": 1058 + }, + { + "epoch": 2.7823800131492438, + "high_lr": 0.0004431578947368421, + "low_lr": 8.863157894736842e-06, + "step": 1058 + }, + { + "epoch": 2.7823800131492438, + "high_lr": 0.0004431578947368421, + "low_lr": 8.863157894736842e-06, + "step": 1058 + }, + { + "epoch": 2.7823800131492438, + "high_lr": 0.0004431578947368421, + "low_lr": 8.863157894736842e-06, + "step": 1058 + }, + { + "epoch": 2.785009861932939, + "grad_norm": 1.2490891218185425, + "learning_rate": 0.0004426315789473684, + "loss": 1.3702, + "step": 1059 + }, + { + "epoch": 2.785009861932939, + "high_lr": 0.0004426315789473684, + "low_lr": 8.852631578947369e-06, + "step": 1059 + }, + { + "epoch": 2.785009861932939, + "high_lr": 0.0004426315789473684, + "low_lr": 8.852631578947369e-06, + "step": 1059 + }, + { + "epoch": 2.785009861932939, + "high_lr": 0.0004426315789473684, + "low_lr": 8.852631578947369e-06, + "step": 1059 + }, + { + "epoch": 2.785009861932939, + "high_lr": 0.0004426315789473684, + "low_lr": 8.852631578947369e-06, + "step": 1059 + }, + { + "epoch": 2.785009861932939, + "high_lr": 0.0004426315789473684, + "low_lr": 8.852631578947369e-06, + "step": 1059 + }, + { + "epoch": 2.785009861932939, + "high_lr": 0.0004426315789473684, + "low_lr": 8.852631578947369e-06, + "step": 1059 + }, + { + "epoch": 2.785009861932939, + "high_lr": 0.0004426315789473684, + "low_lr": 8.852631578947369e-06, + "step": 1059 + }, + { + "epoch": 2.785009861932939, + "high_lr": 0.0004426315789473684, + "low_lr": 8.852631578947369e-06, + "step": 1059 + }, + { + "epoch": 2.7876397107166335, + "grad_norm": 1.3067238330841064, + "learning_rate": 0.0004421052631578947, + "loss": 1.4366, + "step": 1060 + }, + { + "epoch": 2.7876397107166335, + "high_lr": 0.0004421052631578947, + "low_lr": 8.842105263157895e-06, + "step": 1060 + }, + { + "epoch": 2.7876397107166335, + "high_lr": 0.0004421052631578947, + "low_lr": 8.842105263157895e-06, + "step": 1060 + }, + { + "epoch": 2.7876397107166335, + "high_lr": 0.0004421052631578947, + "low_lr": 8.842105263157895e-06, + "step": 1060 + }, + { + "epoch": 2.7876397107166335, + "high_lr": 0.0004421052631578947, + "low_lr": 8.842105263157895e-06, + "step": 1060 + }, + { + "epoch": 2.7876397107166335, + "high_lr": 0.0004421052631578947, + "low_lr": 8.842105263157895e-06, + "step": 1060 + }, + { + "epoch": 2.7876397107166335, + "high_lr": 0.0004421052631578947, + "low_lr": 8.842105263157895e-06, + "step": 1060 + }, + { + "epoch": 2.7876397107166335, + "high_lr": 0.0004421052631578947, + "low_lr": 8.842105263157895e-06, + "step": 1060 + }, + { + "epoch": 2.7876397107166335, + "high_lr": 0.0004421052631578947, + "low_lr": 8.842105263157895e-06, + "step": 1060 + }, + { + "epoch": 2.7902695595003286, + "grad_norm": 1.3038711547851562, + "learning_rate": 0.0004415789473684211, + "loss": 1.3697, + "step": 1061 + }, + { + "epoch": 2.7902695595003286, + "high_lr": 0.0004415789473684211, + "low_lr": 8.831578947368421e-06, + "step": 1061 + }, + { + "epoch": 2.7902695595003286, + "high_lr": 0.0004415789473684211, + "low_lr": 8.831578947368421e-06, + "step": 1061 + }, + { + "epoch": 2.7902695595003286, + "high_lr": 0.0004415789473684211, + "low_lr": 8.831578947368421e-06, + "step": 1061 + }, + { + "epoch": 2.7902695595003286, + "high_lr": 0.0004415789473684211, + "low_lr": 8.831578947368421e-06, + "step": 1061 + }, + { + "epoch": 2.7902695595003286, + "high_lr": 0.0004415789473684211, + "low_lr": 8.831578947368421e-06, + "step": 1061 + }, + { + "epoch": 2.7902695595003286, + "high_lr": 0.0004415789473684211, + "low_lr": 8.831578947368421e-06, + "step": 1061 + }, + { + "epoch": 2.7902695595003286, + "high_lr": 0.0004415789473684211, + "low_lr": 8.831578947368421e-06, + "step": 1061 + }, + { + "epoch": 2.7902695595003286, + "high_lr": 0.0004415789473684211, + "low_lr": 8.831578947368421e-06, + "step": 1061 + }, + { + "epoch": 2.7928994082840237, + "grad_norm": 1.3181500434875488, + "learning_rate": 0.0004410526315789474, + "loss": 1.4253, + "step": 1062 + }, + { + "epoch": 2.7928994082840237, + "high_lr": 0.0004410526315789474, + "low_lr": 8.821052631578948e-06, + "step": 1062 + }, + { + "epoch": 2.7928994082840237, + "high_lr": 0.0004410526315789474, + "low_lr": 8.821052631578948e-06, + "step": 1062 + }, + { + "epoch": 2.7928994082840237, + "high_lr": 0.0004410526315789474, + "low_lr": 8.821052631578948e-06, + "step": 1062 + }, + { + "epoch": 2.7928994082840237, + "high_lr": 0.0004410526315789474, + "low_lr": 8.821052631578948e-06, + "step": 1062 + }, + { + "epoch": 2.7928994082840237, + "high_lr": 0.0004410526315789474, + "low_lr": 8.821052631578948e-06, + "step": 1062 + }, + { + "epoch": 2.7928994082840237, + "high_lr": 0.0004410526315789474, + "low_lr": 8.821052631578948e-06, + "step": 1062 + }, + { + "epoch": 2.7928994082840237, + "high_lr": 0.0004410526315789474, + "low_lr": 8.821052631578948e-06, + "step": 1062 + }, + { + "epoch": 2.7928994082840237, + "high_lr": 0.0004410526315789474, + "low_lr": 8.821052631578948e-06, + "step": 1062 + }, + { + "epoch": 2.7955292570677184, + "grad_norm": 1.366966962814331, + "learning_rate": 0.00044052631578947373, + "loss": 1.4173, + "step": 1063 + }, + { + "epoch": 2.7955292570677184, + "high_lr": 0.00044052631578947373, + "low_lr": 8.810526315789474e-06, + "step": 1063 + }, + { + "epoch": 2.7955292570677184, + "high_lr": 0.00044052631578947373, + "low_lr": 8.810526315789474e-06, + "step": 1063 + }, + { + "epoch": 2.7955292570677184, + "high_lr": 0.00044052631578947373, + "low_lr": 8.810526315789474e-06, + "step": 1063 + }, + { + "epoch": 2.7955292570677184, + "high_lr": 0.00044052631578947373, + "low_lr": 8.810526315789474e-06, + "step": 1063 + }, + { + "epoch": 2.7955292570677184, + "high_lr": 0.00044052631578947373, + "low_lr": 8.810526315789474e-06, + "step": 1063 + }, + { + "epoch": 2.7955292570677184, + "high_lr": 0.00044052631578947373, + "low_lr": 8.810526315789474e-06, + "step": 1063 + }, + { + "epoch": 2.7955292570677184, + "high_lr": 0.00044052631578947373, + "low_lr": 8.810526315789474e-06, + "step": 1063 + }, + { + "epoch": 2.7955292570677184, + "high_lr": 0.00044052631578947373, + "low_lr": 8.810526315789474e-06, + "step": 1063 + }, + { + "epoch": 2.7981591058514135, + "grad_norm": 1.249804139137268, + "learning_rate": 0.00044, + "loss": 1.4177, + "step": 1064 + }, + { + "epoch": 2.7981591058514135, + "high_lr": 0.00044, + "low_lr": 8.8e-06, + "step": 1064 + }, + { + "epoch": 2.7981591058514135, + "high_lr": 0.00044, + "low_lr": 8.8e-06, + "step": 1064 + }, + { + "epoch": 2.7981591058514135, + "high_lr": 0.00044, + "low_lr": 8.8e-06, + "step": 1064 + }, + { + "epoch": 2.7981591058514135, + "high_lr": 0.00044, + "low_lr": 8.8e-06, + "step": 1064 + }, + { + "epoch": 2.7981591058514135, + "high_lr": 0.00044, + "low_lr": 8.8e-06, + "step": 1064 + }, + { + "epoch": 2.7981591058514135, + "high_lr": 0.00044, + "low_lr": 8.8e-06, + "step": 1064 + }, + { + "epoch": 2.7981591058514135, + "high_lr": 0.00044, + "low_lr": 8.8e-06, + "step": 1064 + }, + { + "epoch": 2.7981591058514135, + "high_lr": 0.00044, + "low_lr": 8.8e-06, + "step": 1064 + }, + { + "epoch": 2.8007889546351086, + "grad_norm": 1.2769664525985718, + "learning_rate": 0.0004394736842105263, + "loss": 1.3844, + "step": 1065 + }, + { + "epoch": 2.8007889546351086, + "high_lr": 0.0004394736842105263, + "low_lr": 8.789473684210527e-06, + "step": 1065 + }, + { + "epoch": 2.8007889546351086, + "high_lr": 0.0004394736842105263, + "low_lr": 8.789473684210527e-06, + "step": 1065 + }, + { + "epoch": 2.8007889546351086, + "high_lr": 0.0004394736842105263, + "low_lr": 8.789473684210527e-06, + "step": 1065 + }, + { + "epoch": 2.8007889546351086, + "high_lr": 0.0004394736842105263, + "low_lr": 8.789473684210527e-06, + "step": 1065 + }, + { + "epoch": 2.8007889546351086, + "high_lr": 0.0004394736842105263, + "low_lr": 8.789473684210527e-06, + "step": 1065 + }, + { + "epoch": 2.8007889546351086, + "high_lr": 0.0004394736842105263, + "low_lr": 8.789473684210527e-06, + "step": 1065 + }, + { + "epoch": 2.8007889546351086, + "high_lr": 0.0004394736842105263, + "low_lr": 8.789473684210527e-06, + "step": 1065 + }, + { + "epoch": 2.8007889546351086, + "high_lr": 0.0004394736842105263, + "low_lr": 8.789473684210527e-06, + "step": 1065 + }, + { + "epoch": 2.8034188034188032, + "grad_norm": 1.2841724157333374, + "learning_rate": 0.00043894736842105264, + "loss": 1.3809, + "step": 1066 + }, + { + "epoch": 2.8034188034188032, + "high_lr": 0.00043894736842105264, + "low_lr": 8.778947368421053e-06, + "step": 1066 + }, + { + "epoch": 2.8034188034188032, + "high_lr": 0.00043894736842105264, + "low_lr": 8.778947368421053e-06, + "step": 1066 + }, + { + "epoch": 2.8034188034188032, + "high_lr": 0.00043894736842105264, + "low_lr": 8.778947368421053e-06, + "step": 1066 + }, + { + "epoch": 2.8034188034188032, + "high_lr": 0.00043894736842105264, + "low_lr": 8.778947368421053e-06, + "step": 1066 + }, + { + "epoch": 2.8034188034188032, + "high_lr": 0.00043894736842105264, + "low_lr": 8.778947368421053e-06, + "step": 1066 + }, + { + "epoch": 2.8034188034188032, + "high_lr": 0.00043894736842105264, + "low_lr": 8.778947368421053e-06, + "step": 1066 + }, + { + "epoch": 2.8034188034188032, + "high_lr": 0.00043894736842105264, + "low_lr": 8.778947368421053e-06, + "step": 1066 + }, + { + "epoch": 2.8034188034188032, + "high_lr": 0.00043894736842105264, + "low_lr": 8.778947368421053e-06, + "step": 1066 + }, + { + "epoch": 2.8060486522024983, + "grad_norm": 1.3203274011611938, + "learning_rate": 0.00043842105263157893, + "loss": 1.3903, + "step": 1067 + }, + { + "epoch": 2.8060486522024983, + "high_lr": 0.00043842105263157893, + "low_lr": 8.76842105263158e-06, + "step": 1067 + }, + { + "epoch": 2.8060486522024983, + "high_lr": 0.00043842105263157893, + "low_lr": 8.76842105263158e-06, + "step": 1067 + }, + { + "epoch": 2.8060486522024983, + "high_lr": 0.00043842105263157893, + "low_lr": 8.76842105263158e-06, + "step": 1067 + }, + { + "epoch": 2.8060486522024983, + "high_lr": 0.00043842105263157893, + "low_lr": 8.76842105263158e-06, + "step": 1067 + }, + { + "epoch": 2.8060486522024983, + "high_lr": 0.00043842105263157893, + "low_lr": 8.76842105263158e-06, + "step": 1067 + }, + { + "epoch": 2.8060486522024983, + "high_lr": 0.00043842105263157893, + "low_lr": 8.76842105263158e-06, + "step": 1067 + }, + { + "epoch": 2.8060486522024983, + "high_lr": 0.00043842105263157893, + "low_lr": 8.76842105263158e-06, + "step": 1067 + }, + { + "epoch": 2.8060486522024983, + "high_lr": 0.00043842105263157893, + "low_lr": 8.76842105263158e-06, + "step": 1067 + }, + { + "epoch": 2.8086785009861934, + "grad_norm": 1.3281534910202026, + "learning_rate": 0.00043789473684210527, + "loss": 1.3852, + "step": 1068 + }, + { + "epoch": 2.8086785009861934, + "high_lr": 0.00043789473684210527, + "low_lr": 8.757894736842106e-06, + "step": 1068 + }, + { + "epoch": 2.8086785009861934, + "high_lr": 0.00043789473684210527, + "low_lr": 8.757894736842106e-06, + "step": 1068 + }, + { + "epoch": 2.8086785009861934, + "high_lr": 0.00043789473684210527, + "low_lr": 8.757894736842106e-06, + "step": 1068 + }, + { + "epoch": 2.8086785009861934, + "high_lr": 0.00043789473684210527, + "low_lr": 8.757894736842106e-06, + "step": 1068 + }, + { + "epoch": 2.8086785009861934, + "high_lr": 0.00043789473684210527, + "low_lr": 8.757894736842106e-06, + "step": 1068 + }, + { + "epoch": 2.8086785009861934, + "high_lr": 0.00043789473684210527, + "low_lr": 8.757894736842106e-06, + "step": 1068 + }, + { + "epoch": 2.8086785009861934, + "high_lr": 0.00043789473684210527, + "low_lr": 8.757894736842106e-06, + "step": 1068 + }, + { + "epoch": 2.8086785009861934, + "high_lr": 0.00043789473684210527, + "low_lr": 8.757894736842106e-06, + "step": 1068 + }, + { + "epoch": 2.811308349769888, + "grad_norm": 1.3264497518539429, + "learning_rate": 0.0004373684210526316, + "loss": 1.4132, + "step": 1069 + }, + { + "epoch": 2.811308349769888, + "high_lr": 0.0004373684210526316, + "low_lr": 8.747368421052632e-06, + "step": 1069 + }, + { + "epoch": 2.811308349769888, + "high_lr": 0.0004373684210526316, + "low_lr": 8.747368421052632e-06, + "step": 1069 + }, + { + "epoch": 2.811308349769888, + "high_lr": 0.0004373684210526316, + "low_lr": 8.747368421052632e-06, + "step": 1069 + }, + { + "epoch": 2.811308349769888, + "high_lr": 0.0004373684210526316, + "low_lr": 8.747368421052632e-06, + "step": 1069 + }, + { + "epoch": 2.811308349769888, + "high_lr": 0.0004373684210526316, + "low_lr": 8.747368421052632e-06, + "step": 1069 + }, + { + "epoch": 2.811308349769888, + "high_lr": 0.0004373684210526316, + "low_lr": 8.747368421052632e-06, + "step": 1069 + }, + { + "epoch": 2.811308349769888, + "high_lr": 0.0004373684210526316, + "low_lr": 8.747368421052632e-06, + "step": 1069 + }, + { + "epoch": 2.811308349769888, + "high_lr": 0.0004373684210526316, + "low_lr": 8.747368421052632e-06, + "step": 1069 + }, + { + "epoch": 2.813938198553583, + "grad_norm": 1.288648247718811, + "learning_rate": 0.00043684210526315795, + "loss": 1.4039, + "step": 1070 + }, + { + "epoch": 2.813938198553583, + "high_lr": 0.00043684210526315795, + "low_lr": 8.736842105263158e-06, + "step": 1070 + }, + { + "epoch": 2.813938198553583, + "high_lr": 0.00043684210526315795, + "low_lr": 8.736842105263158e-06, + "step": 1070 + }, + { + "epoch": 2.813938198553583, + "high_lr": 0.00043684210526315795, + "low_lr": 8.736842105263158e-06, + "step": 1070 + }, + { + "epoch": 2.813938198553583, + "high_lr": 0.00043684210526315795, + "low_lr": 8.736842105263158e-06, + "step": 1070 + }, + { + "epoch": 2.813938198553583, + "high_lr": 0.00043684210526315795, + "low_lr": 8.736842105263158e-06, + "step": 1070 + }, + { + "epoch": 2.813938198553583, + "high_lr": 0.00043684210526315795, + "low_lr": 8.736842105263158e-06, + "step": 1070 + }, + { + "epoch": 2.813938198553583, + "high_lr": 0.00043684210526315795, + "low_lr": 8.736842105263158e-06, + "step": 1070 + }, + { + "epoch": 2.813938198553583, + "high_lr": 0.00043684210526315795, + "low_lr": 8.736842105263158e-06, + "step": 1070 + }, + { + "epoch": 2.8165680473372783, + "grad_norm": 1.276823878288269, + "learning_rate": 0.00043631578947368423, + "loss": 1.377, + "step": 1071 + }, + { + "epoch": 2.8165680473372783, + "high_lr": 0.00043631578947368423, + "low_lr": 8.726315789473685e-06, + "step": 1071 + }, + { + "epoch": 2.8165680473372783, + "high_lr": 0.00043631578947368423, + "low_lr": 8.726315789473685e-06, + "step": 1071 + }, + { + "epoch": 2.8165680473372783, + "high_lr": 0.00043631578947368423, + "low_lr": 8.726315789473685e-06, + "step": 1071 + }, + { + "epoch": 2.8165680473372783, + "high_lr": 0.00043631578947368423, + "low_lr": 8.726315789473685e-06, + "step": 1071 + }, + { + "epoch": 2.8165680473372783, + "high_lr": 0.00043631578947368423, + "low_lr": 8.726315789473685e-06, + "step": 1071 + }, + { + "epoch": 2.8165680473372783, + "high_lr": 0.00043631578947368423, + "low_lr": 8.726315789473685e-06, + "step": 1071 + }, + { + "epoch": 2.8165680473372783, + "high_lr": 0.00043631578947368423, + "low_lr": 8.726315789473685e-06, + "step": 1071 + }, + { + "epoch": 2.8165680473372783, + "high_lr": 0.00043631578947368423, + "low_lr": 8.726315789473685e-06, + "step": 1071 + }, + { + "epoch": 2.819197896120973, + "grad_norm": 1.3558621406555176, + "learning_rate": 0.0004357894736842105, + "loss": 1.399, + "step": 1072 + }, + { + "epoch": 2.819197896120973, + "high_lr": 0.0004357894736842105, + "low_lr": 8.715789473684211e-06, + "step": 1072 + }, + { + "epoch": 2.819197896120973, + "high_lr": 0.0004357894736842105, + "low_lr": 8.715789473684211e-06, + "step": 1072 + }, + { + "epoch": 2.819197896120973, + "high_lr": 0.0004357894736842105, + "low_lr": 8.715789473684211e-06, + "step": 1072 + }, + { + "epoch": 2.819197896120973, + "high_lr": 0.0004357894736842105, + "low_lr": 8.715789473684211e-06, + "step": 1072 + }, + { + "epoch": 2.819197896120973, + "high_lr": 0.0004357894736842105, + "low_lr": 8.715789473684211e-06, + "step": 1072 + }, + { + "epoch": 2.819197896120973, + "high_lr": 0.0004357894736842105, + "low_lr": 8.715789473684211e-06, + "step": 1072 + }, + { + "epoch": 2.819197896120973, + "high_lr": 0.0004357894736842105, + "low_lr": 8.715789473684211e-06, + "step": 1072 + }, + { + "epoch": 2.819197896120973, + "high_lr": 0.0004357894736842105, + "low_lr": 8.715789473684211e-06, + "step": 1072 + }, + { + "epoch": 2.821827744904668, + "grad_norm": 1.3223730325698853, + "learning_rate": 0.00043526315789473686, + "loss": 1.3744, + "step": 1073 + }, + { + "epoch": 2.821827744904668, + "high_lr": 0.00043526315789473686, + "low_lr": 8.705263157894737e-06, + "step": 1073 + }, + { + "epoch": 2.821827744904668, + "high_lr": 0.00043526315789473686, + "low_lr": 8.705263157894737e-06, + "step": 1073 + }, + { + "epoch": 2.821827744904668, + "high_lr": 0.00043526315789473686, + "low_lr": 8.705263157894737e-06, + "step": 1073 + }, + { + "epoch": 2.821827744904668, + "high_lr": 0.00043526315789473686, + "low_lr": 8.705263157894737e-06, + "step": 1073 + }, + { + "epoch": 2.821827744904668, + "high_lr": 0.00043526315789473686, + "low_lr": 8.705263157894737e-06, + "step": 1073 + }, + { + "epoch": 2.821827744904668, + "high_lr": 0.00043526315789473686, + "low_lr": 8.705263157894737e-06, + "step": 1073 + }, + { + "epoch": 2.821827744904668, + "high_lr": 0.00043526315789473686, + "low_lr": 8.705263157894737e-06, + "step": 1073 + }, + { + "epoch": 2.821827744904668, + "high_lr": 0.00043526315789473686, + "low_lr": 8.705263157894737e-06, + "step": 1073 + }, + { + "epoch": 2.824457593688363, + "grad_norm": 1.284300446510315, + "learning_rate": 0.00043473684210526315, + "loss": 1.3657, + "step": 1074 + }, + { + "epoch": 2.824457593688363, + "high_lr": 0.00043473684210526315, + "low_lr": 8.694736842105264e-06, + "step": 1074 + }, + { + "epoch": 2.824457593688363, + "high_lr": 0.00043473684210526315, + "low_lr": 8.694736842105264e-06, + "step": 1074 + }, + { + "epoch": 2.824457593688363, + "high_lr": 0.00043473684210526315, + "low_lr": 8.694736842105264e-06, + "step": 1074 + }, + { + "epoch": 2.824457593688363, + "high_lr": 0.00043473684210526315, + "low_lr": 8.694736842105264e-06, + "step": 1074 + }, + { + "epoch": 2.824457593688363, + "high_lr": 0.00043473684210526315, + "low_lr": 8.694736842105264e-06, + "step": 1074 + }, + { + "epoch": 2.824457593688363, + "high_lr": 0.00043473684210526315, + "low_lr": 8.694736842105264e-06, + "step": 1074 + }, + { + "epoch": 2.824457593688363, + "high_lr": 0.00043473684210526315, + "low_lr": 8.694736842105264e-06, + "step": 1074 + }, + { + "epoch": 2.824457593688363, + "high_lr": 0.00043473684210526315, + "low_lr": 8.694736842105264e-06, + "step": 1074 + }, + { + "epoch": 2.827087442472058, + "grad_norm": 1.3425698280334473, + "learning_rate": 0.0004342105263157895, + "loss": 1.3967, + "step": 1075 + }, + { + "epoch": 2.827087442472058, + "high_lr": 0.0004342105263157895, + "low_lr": 8.68421052631579e-06, + "step": 1075 + }, + { + "epoch": 2.827087442472058, + "high_lr": 0.0004342105263157895, + "low_lr": 8.68421052631579e-06, + "step": 1075 + }, + { + "epoch": 2.827087442472058, + "high_lr": 0.0004342105263157895, + "low_lr": 8.68421052631579e-06, + "step": 1075 + }, + { + "epoch": 2.827087442472058, + "high_lr": 0.0004342105263157895, + "low_lr": 8.68421052631579e-06, + "step": 1075 + }, + { + "epoch": 2.827087442472058, + "high_lr": 0.0004342105263157895, + "low_lr": 8.68421052631579e-06, + "step": 1075 + }, + { + "epoch": 2.827087442472058, + "high_lr": 0.0004342105263157895, + "low_lr": 8.68421052631579e-06, + "step": 1075 + }, + { + "epoch": 2.827087442472058, + "high_lr": 0.0004342105263157895, + "low_lr": 8.68421052631579e-06, + "step": 1075 + }, + { + "epoch": 2.827087442472058, + "high_lr": 0.0004342105263157895, + "low_lr": 8.68421052631579e-06, + "step": 1075 + }, + { + "epoch": 2.829717291255753, + "grad_norm": 1.2884697914123535, + "learning_rate": 0.00043368421052631577, + "loss": 1.367, + "step": 1076 + }, + { + "epoch": 2.829717291255753, + "high_lr": 0.00043368421052631577, + "low_lr": 8.673684210526316e-06, + "step": 1076 + }, + { + "epoch": 2.829717291255753, + "high_lr": 0.00043368421052631577, + "low_lr": 8.673684210526316e-06, + "step": 1076 + }, + { + "epoch": 2.829717291255753, + "high_lr": 0.00043368421052631577, + "low_lr": 8.673684210526316e-06, + "step": 1076 + }, + { + "epoch": 2.829717291255753, + "high_lr": 0.00043368421052631577, + "low_lr": 8.673684210526316e-06, + "step": 1076 + }, + { + "epoch": 2.829717291255753, + "high_lr": 0.00043368421052631577, + "low_lr": 8.673684210526316e-06, + "step": 1076 + }, + { + "epoch": 2.829717291255753, + "high_lr": 0.00043368421052631577, + "low_lr": 8.673684210526316e-06, + "step": 1076 + }, + { + "epoch": 2.829717291255753, + "high_lr": 0.00043368421052631577, + "low_lr": 8.673684210526316e-06, + "step": 1076 + }, + { + "epoch": 2.829717291255753, + "high_lr": 0.00043368421052631577, + "low_lr": 8.673684210526316e-06, + "step": 1076 + }, + { + "epoch": 2.832347140039448, + "grad_norm": 1.3022724390029907, + "learning_rate": 0.00043315789473684217, + "loss": 1.3843, + "step": 1077 + }, + { + "epoch": 2.832347140039448, + "high_lr": 0.00043315789473684217, + "low_lr": 8.663157894736843e-06, + "step": 1077 + }, + { + "epoch": 2.832347140039448, + "high_lr": 0.00043315789473684217, + "low_lr": 8.663157894736843e-06, + "step": 1077 + }, + { + "epoch": 2.832347140039448, + "high_lr": 0.00043315789473684217, + "low_lr": 8.663157894736843e-06, + "step": 1077 + }, + { + "epoch": 2.832347140039448, + "high_lr": 0.00043315789473684217, + "low_lr": 8.663157894736843e-06, + "step": 1077 + }, + { + "epoch": 2.832347140039448, + "high_lr": 0.00043315789473684217, + "low_lr": 8.663157894736843e-06, + "step": 1077 + }, + { + "epoch": 2.832347140039448, + "high_lr": 0.00043315789473684217, + "low_lr": 8.663157894736843e-06, + "step": 1077 + }, + { + "epoch": 2.832347140039448, + "high_lr": 0.00043315789473684217, + "low_lr": 8.663157894736843e-06, + "step": 1077 + }, + { + "epoch": 2.832347140039448, + "high_lr": 0.00043315789473684217, + "low_lr": 8.663157894736843e-06, + "step": 1077 + }, + { + "epoch": 2.8349769888231426, + "grad_norm": 1.2575839757919312, + "learning_rate": 0.00043263157894736845, + "loss": 1.3433, + "step": 1078 + }, + { + "epoch": 2.8349769888231426, + "high_lr": 0.00043263157894736845, + "low_lr": 8.652631578947369e-06, + "step": 1078 + }, + { + "epoch": 2.8349769888231426, + "high_lr": 0.00043263157894736845, + "low_lr": 8.652631578947369e-06, + "step": 1078 + }, + { + "epoch": 2.8349769888231426, + "high_lr": 0.00043263157894736845, + "low_lr": 8.652631578947369e-06, + "step": 1078 + }, + { + "epoch": 2.8349769888231426, + "high_lr": 0.00043263157894736845, + "low_lr": 8.652631578947369e-06, + "step": 1078 + }, + { + "epoch": 2.8349769888231426, + "high_lr": 0.00043263157894736845, + "low_lr": 8.652631578947369e-06, + "step": 1078 + }, + { + "epoch": 2.8349769888231426, + "high_lr": 0.00043263157894736845, + "low_lr": 8.652631578947369e-06, + "step": 1078 + }, + { + "epoch": 2.8349769888231426, + "high_lr": 0.00043263157894736845, + "low_lr": 8.652631578947369e-06, + "step": 1078 + }, + { + "epoch": 2.8349769888231426, + "high_lr": 0.00043263157894736845, + "low_lr": 8.652631578947369e-06, + "step": 1078 + }, + { + "epoch": 2.8376068376068377, + "grad_norm": 1.3376774787902832, + "learning_rate": 0.00043210526315789474, + "loss": 1.3808, + "step": 1079 + }, + { + "epoch": 2.8376068376068377, + "high_lr": 0.00043210526315789474, + "low_lr": 8.642105263157895e-06, + "step": 1079 + }, + { + "epoch": 2.8376068376068377, + "high_lr": 0.00043210526315789474, + "low_lr": 8.642105263157895e-06, + "step": 1079 + }, + { + "epoch": 2.8376068376068377, + "high_lr": 0.00043210526315789474, + "low_lr": 8.642105263157895e-06, + "step": 1079 + }, + { + "epoch": 2.8376068376068377, + "high_lr": 0.00043210526315789474, + "low_lr": 8.642105263157895e-06, + "step": 1079 + }, + { + "epoch": 2.8376068376068377, + "high_lr": 0.00043210526315789474, + "low_lr": 8.642105263157895e-06, + "step": 1079 + }, + { + "epoch": 2.8376068376068377, + "high_lr": 0.00043210526315789474, + "low_lr": 8.642105263157895e-06, + "step": 1079 + }, + { + "epoch": 2.8376068376068377, + "high_lr": 0.00043210526315789474, + "low_lr": 8.642105263157895e-06, + "step": 1079 + }, + { + "epoch": 2.8376068376068377, + "high_lr": 0.00043210526315789474, + "low_lr": 8.642105263157895e-06, + "step": 1079 + }, + { + "epoch": 2.8402366863905324, + "grad_norm": 1.318920612335205, + "learning_rate": 0.0004315789473684211, + "loss": 1.4487, + "step": 1080 + }, + { + "epoch": 2.8402366863905324, + "high_lr": 0.0004315789473684211, + "low_lr": 8.631578947368422e-06, + "step": 1080 + }, + { + "epoch": 2.8402366863905324, + "high_lr": 0.0004315789473684211, + "low_lr": 8.631578947368422e-06, + "step": 1080 + }, + { + "epoch": 2.8402366863905324, + "high_lr": 0.0004315789473684211, + "low_lr": 8.631578947368422e-06, + "step": 1080 + }, + { + "epoch": 2.8402366863905324, + "high_lr": 0.0004315789473684211, + "low_lr": 8.631578947368422e-06, + "step": 1080 + }, + { + "epoch": 2.8402366863905324, + "high_lr": 0.0004315789473684211, + "low_lr": 8.631578947368422e-06, + "step": 1080 + }, + { + "epoch": 2.8402366863905324, + "high_lr": 0.0004315789473684211, + "low_lr": 8.631578947368422e-06, + "step": 1080 + }, + { + "epoch": 2.8402366863905324, + "high_lr": 0.0004315789473684211, + "low_lr": 8.631578947368422e-06, + "step": 1080 + }, + { + "epoch": 2.8402366863905324, + "high_lr": 0.0004315789473684211, + "low_lr": 8.631578947368422e-06, + "step": 1080 + }, + { + "epoch": 2.8428665351742275, + "grad_norm": 1.413286566734314, + "learning_rate": 0.00043105263157894736, + "loss": 1.3623, + "step": 1081 + }, + { + "epoch": 2.8428665351742275, + "high_lr": 0.00043105263157894736, + "low_lr": 8.621052631578948e-06, + "step": 1081 + }, + { + "epoch": 2.8428665351742275, + "high_lr": 0.00043105263157894736, + "low_lr": 8.621052631578948e-06, + "step": 1081 + }, + { + "epoch": 2.8428665351742275, + "high_lr": 0.00043105263157894736, + "low_lr": 8.621052631578948e-06, + "step": 1081 + }, + { + "epoch": 2.8428665351742275, + "high_lr": 0.00043105263157894736, + "low_lr": 8.621052631578948e-06, + "step": 1081 + }, + { + "epoch": 2.8428665351742275, + "high_lr": 0.00043105263157894736, + "low_lr": 8.621052631578948e-06, + "step": 1081 + }, + { + "epoch": 2.8428665351742275, + "high_lr": 0.00043105263157894736, + "low_lr": 8.621052631578948e-06, + "step": 1081 + }, + { + "epoch": 2.8428665351742275, + "high_lr": 0.00043105263157894736, + "low_lr": 8.621052631578948e-06, + "step": 1081 + }, + { + "epoch": 2.8428665351742275, + "high_lr": 0.00043105263157894736, + "low_lr": 8.621052631578948e-06, + "step": 1081 + }, + { + "epoch": 2.845496383957922, + "grad_norm": 1.2876933813095093, + "learning_rate": 0.0004305263157894737, + "loss": 1.38, + "step": 1082 + }, + { + "epoch": 2.845496383957922, + "high_lr": 0.0004305263157894737, + "low_lr": 8.610526315789474e-06, + "step": 1082 + }, + { + "epoch": 2.845496383957922, + "high_lr": 0.0004305263157894737, + "low_lr": 8.610526315789474e-06, + "step": 1082 + }, + { + "epoch": 2.845496383957922, + "high_lr": 0.0004305263157894737, + "low_lr": 8.610526315789474e-06, + "step": 1082 + }, + { + "epoch": 2.845496383957922, + "high_lr": 0.0004305263157894737, + "low_lr": 8.610526315789474e-06, + "step": 1082 + }, + { + "epoch": 2.845496383957922, + "high_lr": 0.0004305263157894737, + "low_lr": 8.610526315789474e-06, + "step": 1082 + }, + { + "epoch": 2.845496383957922, + "high_lr": 0.0004305263157894737, + "low_lr": 8.610526315789474e-06, + "step": 1082 + }, + { + "epoch": 2.845496383957922, + "high_lr": 0.0004305263157894737, + "low_lr": 8.610526315789474e-06, + "step": 1082 + }, + { + "epoch": 2.845496383957922, + "high_lr": 0.0004305263157894737, + "low_lr": 8.610526315789474e-06, + "step": 1082 + }, + { + "epoch": 2.8481262327416172, + "grad_norm": 1.250273585319519, + "learning_rate": 0.00043, + "loss": 1.3492, + "step": 1083 + }, + { + "epoch": 2.8481262327416172, + "high_lr": 0.00043, + "low_lr": 8.6e-06, + "step": 1083 + }, + { + "epoch": 2.8481262327416172, + "high_lr": 0.00043, + "low_lr": 8.6e-06, + "step": 1083 + }, + { + "epoch": 2.8481262327416172, + "high_lr": 0.00043, + "low_lr": 8.6e-06, + "step": 1083 + }, + { + "epoch": 2.8481262327416172, + "high_lr": 0.00043, + "low_lr": 8.6e-06, + "step": 1083 + }, + { + "epoch": 2.8481262327416172, + "high_lr": 0.00043, + "low_lr": 8.6e-06, + "step": 1083 + }, + { + "epoch": 2.8481262327416172, + "high_lr": 0.00043, + "low_lr": 8.6e-06, + "step": 1083 + }, + { + "epoch": 2.8481262327416172, + "high_lr": 0.00043, + "low_lr": 8.6e-06, + "step": 1083 + }, + { + "epoch": 2.8481262327416172, + "high_lr": 0.00043, + "low_lr": 8.6e-06, + "step": 1083 + }, + { + "epoch": 2.8507560815253123, + "grad_norm": 1.2635754346847534, + "learning_rate": 0.00042947368421052633, + "loss": 1.3557, + "step": 1084 + }, + { + "epoch": 2.8507560815253123, + "high_lr": 0.00042947368421052633, + "low_lr": 8.589473684210527e-06, + "step": 1084 + }, + { + "epoch": 2.8507560815253123, + "high_lr": 0.00042947368421052633, + "low_lr": 8.589473684210527e-06, + "step": 1084 + }, + { + "epoch": 2.8507560815253123, + "high_lr": 0.00042947368421052633, + "low_lr": 8.589473684210527e-06, + "step": 1084 + }, + { + "epoch": 2.8507560815253123, + "high_lr": 0.00042947368421052633, + "low_lr": 8.589473684210527e-06, + "step": 1084 + }, + { + "epoch": 2.8507560815253123, + "high_lr": 0.00042947368421052633, + "low_lr": 8.589473684210527e-06, + "step": 1084 + }, + { + "epoch": 2.8507560815253123, + "high_lr": 0.00042947368421052633, + "low_lr": 8.589473684210527e-06, + "step": 1084 + }, + { + "epoch": 2.8507560815253123, + "high_lr": 0.00042947368421052633, + "low_lr": 8.589473684210527e-06, + "step": 1084 + }, + { + "epoch": 2.8507560815253123, + "high_lr": 0.00042947368421052633, + "low_lr": 8.589473684210527e-06, + "step": 1084 + }, + { + "epoch": 2.853385930309007, + "grad_norm": 1.2985435724258423, + "learning_rate": 0.0004289473684210526, + "loss": 1.3558, + "step": 1085 + }, + { + "epoch": 2.853385930309007, + "high_lr": 0.0004289473684210526, + "low_lr": 8.578947368421053e-06, + "step": 1085 + }, + { + "epoch": 2.853385930309007, + "high_lr": 0.0004289473684210526, + "low_lr": 8.578947368421053e-06, + "step": 1085 + }, + { + "epoch": 2.853385930309007, + "high_lr": 0.0004289473684210526, + "low_lr": 8.578947368421053e-06, + "step": 1085 + }, + { + "epoch": 2.853385930309007, + "high_lr": 0.0004289473684210526, + "low_lr": 8.578947368421053e-06, + "step": 1085 + }, + { + "epoch": 2.853385930309007, + "high_lr": 0.0004289473684210526, + "low_lr": 8.578947368421053e-06, + "step": 1085 + }, + { + "epoch": 2.853385930309007, + "high_lr": 0.0004289473684210526, + "low_lr": 8.578947368421053e-06, + "step": 1085 + }, + { + "epoch": 2.853385930309007, + "high_lr": 0.0004289473684210526, + "low_lr": 8.578947368421053e-06, + "step": 1085 + }, + { + "epoch": 2.853385930309007, + "high_lr": 0.0004289473684210526, + "low_lr": 8.578947368421053e-06, + "step": 1085 + }, + { + "epoch": 2.856015779092702, + "grad_norm": 1.3095905780792236, + "learning_rate": 0.00042842105263157896, + "loss": 1.4058, + "step": 1086 + }, + { + "epoch": 2.856015779092702, + "high_lr": 0.00042842105263157896, + "low_lr": 8.56842105263158e-06, + "step": 1086 + }, + { + "epoch": 2.856015779092702, + "high_lr": 0.00042842105263157896, + "low_lr": 8.56842105263158e-06, + "step": 1086 + }, + { + "epoch": 2.856015779092702, + "high_lr": 0.00042842105263157896, + "low_lr": 8.56842105263158e-06, + "step": 1086 + }, + { + "epoch": 2.856015779092702, + "high_lr": 0.00042842105263157896, + "low_lr": 8.56842105263158e-06, + "step": 1086 + }, + { + "epoch": 2.856015779092702, + "high_lr": 0.00042842105263157896, + "low_lr": 8.56842105263158e-06, + "step": 1086 + }, + { + "epoch": 2.856015779092702, + "high_lr": 0.00042842105263157896, + "low_lr": 8.56842105263158e-06, + "step": 1086 + }, + { + "epoch": 2.856015779092702, + "high_lr": 0.00042842105263157896, + "low_lr": 8.56842105263158e-06, + "step": 1086 + }, + { + "epoch": 2.856015779092702, + "high_lr": 0.00042842105263157896, + "low_lr": 8.56842105263158e-06, + "step": 1086 + }, + { + "epoch": 2.858645627876397, + "grad_norm": 1.2998263835906982, + "learning_rate": 0.0004278947368421053, + "loss": 1.4075, + "step": 1087 + }, + { + "epoch": 2.858645627876397, + "high_lr": 0.0004278947368421053, + "low_lr": 8.557894736842106e-06, + "step": 1087 + }, + { + "epoch": 2.858645627876397, + "high_lr": 0.0004278947368421053, + "low_lr": 8.557894736842106e-06, + "step": 1087 + }, + { + "epoch": 2.858645627876397, + "high_lr": 0.0004278947368421053, + "low_lr": 8.557894736842106e-06, + "step": 1087 + }, + { + "epoch": 2.858645627876397, + "high_lr": 0.0004278947368421053, + "low_lr": 8.557894736842106e-06, + "step": 1087 + }, + { + "epoch": 2.858645627876397, + "high_lr": 0.0004278947368421053, + "low_lr": 8.557894736842106e-06, + "step": 1087 + }, + { + "epoch": 2.858645627876397, + "high_lr": 0.0004278947368421053, + "low_lr": 8.557894736842106e-06, + "step": 1087 + }, + { + "epoch": 2.858645627876397, + "high_lr": 0.0004278947368421053, + "low_lr": 8.557894736842106e-06, + "step": 1087 + }, + { + "epoch": 2.858645627876397, + "high_lr": 0.0004278947368421053, + "low_lr": 8.557894736842106e-06, + "step": 1087 + }, + { + "epoch": 2.861275476660092, + "grad_norm": 1.369041919708252, + "learning_rate": 0.0004273684210526316, + "loss": 1.4008, + "step": 1088 + }, + { + "epoch": 2.861275476660092, + "high_lr": 0.0004273684210526316, + "low_lr": 8.547368421052632e-06, + "step": 1088 + }, + { + "epoch": 2.861275476660092, + "high_lr": 0.0004273684210526316, + "low_lr": 8.547368421052632e-06, + "step": 1088 + }, + { + "epoch": 2.861275476660092, + "high_lr": 0.0004273684210526316, + "low_lr": 8.547368421052632e-06, + "step": 1088 + }, + { + "epoch": 2.861275476660092, + "high_lr": 0.0004273684210526316, + "low_lr": 8.547368421052632e-06, + "step": 1088 + }, + { + "epoch": 2.861275476660092, + "high_lr": 0.0004273684210526316, + "low_lr": 8.547368421052632e-06, + "step": 1088 + }, + { + "epoch": 2.861275476660092, + "high_lr": 0.0004273684210526316, + "low_lr": 8.547368421052632e-06, + "step": 1088 + }, + { + "epoch": 2.861275476660092, + "high_lr": 0.0004273684210526316, + "low_lr": 8.547368421052632e-06, + "step": 1088 + }, + { + "epoch": 2.861275476660092, + "high_lr": 0.0004273684210526316, + "low_lr": 8.547368421052632e-06, + "step": 1088 + }, + { + "epoch": 2.863905325443787, + "grad_norm": 1.3575427532196045, + "learning_rate": 0.0004268421052631579, + "loss": 1.441, + "step": 1089 + }, + { + "epoch": 2.863905325443787, + "high_lr": 0.0004268421052631579, + "low_lr": 8.536842105263159e-06, + "step": 1089 + }, + { + "epoch": 2.863905325443787, + "high_lr": 0.0004268421052631579, + "low_lr": 8.536842105263159e-06, + "step": 1089 + }, + { + "epoch": 2.863905325443787, + "high_lr": 0.0004268421052631579, + "low_lr": 8.536842105263159e-06, + "step": 1089 + }, + { + "epoch": 2.863905325443787, + "high_lr": 0.0004268421052631579, + "low_lr": 8.536842105263159e-06, + "step": 1089 + }, + { + "epoch": 2.863905325443787, + "high_lr": 0.0004268421052631579, + "low_lr": 8.536842105263159e-06, + "step": 1089 + }, + { + "epoch": 2.863905325443787, + "high_lr": 0.0004268421052631579, + "low_lr": 8.536842105263159e-06, + "step": 1089 + }, + { + "epoch": 2.863905325443787, + "high_lr": 0.0004268421052631579, + "low_lr": 8.536842105263159e-06, + "step": 1089 + }, + { + "epoch": 2.863905325443787, + "high_lr": 0.0004268421052631579, + "low_lr": 8.536842105263159e-06, + "step": 1089 + }, + { + "epoch": 2.866535174227482, + "grad_norm": 1.2896759510040283, + "learning_rate": 0.0004263157894736842, + "loss": 1.3802, + "step": 1090 + }, + { + "epoch": 2.866535174227482, + "high_lr": 0.0004263157894736842, + "low_lr": 8.526315789473685e-06, + "step": 1090 + }, + { + "epoch": 2.866535174227482, + "high_lr": 0.0004263157894736842, + "low_lr": 8.526315789473685e-06, + "step": 1090 + }, + { + "epoch": 2.866535174227482, + "high_lr": 0.0004263157894736842, + "low_lr": 8.526315789473685e-06, + "step": 1090 + }, + { + "epoch": 2.866535174227482, + "high_lr": 0.0004263157894736842, + "low_lr": 8.526315789473685e-06, + "step": 1090 + }, + { + "epoch": 2.866535174227482, + "high_lr": 0.0004263157894736842, + "low_lr": 8.526315789473685e-06, + "step": 1090 + }, + { + "epoch": 2.866535174227482, + "high_lr": 0.0004263157894736842, + "low_lr": 8.526315789473685e-06, + "step": 1090 + }, + { + "epoch": 2.866535174227482, + "high_lr": 0.0004263157894736842, + "low_lr": 8.526315789473685e-06, + "step": 1090 + }, + { + "epoch": 2.866535174227482, + "high_lr": 0.0004263157894736842, + "low_lr": 8.526315789473685e-06, + "step": 1090 + }, + { + "epoch": 2.8691650230111767, + "grad_norm": 1.2547643184661865, + "learning_rate": 0.00042578947368421055, + "loss": 1.3332, + "step": 1091 + }, + { + "epoch": 2.8691650230111767, + "high_lr": 0.00042578947368421055, + "low_lr": 8.515789473684211e-06, + "step": 1091 + }, + { + "epoch": 2.8691650230111767, + "high_lr": 0.00042578947368421055, + "low_lr": 8.515789473684211e-06, + "step": 1091 + }, + { + "epoch": 2.8691650230111767, + "high_lr": 0.00042578947368421055, + "low_lr": 8.515789473684211e-06, + "step": 1091 + }, + { + "epoch": 2.8691650230111767, + "high_lr": 0.00042578947368421055, + "low_lr": 8.515789473684211e-06, + "step": 1091 + }, + { + "epoch": 2.8691650230111767, + "high_lr": 0.00042578947368421055, + "low_lr": 8.515789473684211e-06, + "step": 1091 + }, + { + "epoch": 2.8691650230111767, + "high_lr": 0.00042578947368421055, + "low_lr": 8.515789473684211e-06, + "step": 1091 + }, + { + "epoch": 2.8691650230111767, + "high_lr": 0.00042578947368421055, + "low_lr": 8.515789473684211e-06, + "step": 1091 + }, + { + "epoch": 2.8691650230111767, + "high_lr": 0.00042578947368421055, + "low_lr": 8.515789473684211e-06, + "step": 1091 + }, + { + "epoch": 2.871794871794872, + "grad_norm": 2.4219512939453125, + "learning_rate": 0.00042526315789473683, + "loss": 1.3427, + "step": 1092 + }, + { + "epoch": 2.871794871794872, + "high_lr": 0.00042526315789473683, + "low_lr": 8.505263157894738e-06, + "step": 1092 + }, + { + "epoch": 2.871794871794872, + "high_lr": 0.00042526315789473683, + "low_lr": 8.505263157894738e-06, + "step": 1092 + }, + { + "epoch": 2.871794871794872, + "high_lr": 0.00042526315789473683, + "low_lr": 8.505263157894738e-06, + "step": 1092 + }, + { + "epoch": 2.871794871794872, + "high_lr": 0.00042526315789473683, + "low_lr": 8.505263157894738e-06, + "step": 1092 + }, + { + "epoch": 2.871794871794872, + "high_lr": 0.00042526315789473683, + "low_lr": 8.505263157894738e-06, + "step": 1092 + }, + { + "epoch": 2.871794871794872, + "high_lr": 0.00042526315789473683, + "low_lr": 8.505263157894738e-06, + "step": 1092 + }, + { + "epoch": 2.871794871794872, + "high_lr": 0.00042526315789473683, + "low_lr": 8.505263157894738e-06, + "step": 1092 + }, + { + "epoch": 2.871794871794872, + "high_lr": 0.00042526315789473683, + "low_lr": 8.505263157894738e-06, + "step": 1092 + }, + { + "epoch": 2.874424720578567, + "grad_norm": 1.207375168800354, + "learning_rate": 0.0004247368421052631, + "loss": 1.3563, + "step": 1093 + }, + { + "epoch": 2.874424720578567, + "high_lr": 0.0004247368421052631, + "low_lr": 8.494736842105264e-06, + "step": 1093 + }, + { + "epoch": 2.874424720578567, + "high_lr": 0.0004247368421052631, + "low_lr": 8.494736842105264e-06, + "step": 1093 + }, + { + "epoch": 2.874424720578567, + "high_lr": 0.0004247368421052631, + "low_lr": 8.494736842105264e-06, + "step": 1093 + }, + { + "epoch": 2.874424720578567, + "high_lr": 0.0004247368421052631, + "low_lr": 8.494736842105264e-06, + "step": 1093 + }, + { + "epoch": 2.874424720578567, + "high_lr": 0.0004247368421052631, + "low_lr": 8.494736842105264e-06, + "step": 1093 + }, + { + "epoch": 2.874424720578567, + "high_lr": 0.0004247368421052631, + "low_lr": 8.494736842105264e-06, + "step": 1093 + }, + { + "epoch": 2.874424720578567, + "high_lr": 0.0004247368421052631, + "low_lr": 8.494736842105264e-06, + "step": 1093 + }, + { + "epoch": 2.874424720578567, + "high_lr": 0.0004247368421052631, + "low_lr": 8.494736842105264e-06, + "step": 1093 + }, + { + "epoch": 2.8770545693622616, + "grad_norm": 1.3039588928222656, + "learning_rate": 0.0004242105263157895, + "loss": 1.4024, + "step": 1094 + }, + { + "epoch": 2.8770545693622616, + "high_lr": 0.0004242105263157895, + "low_lr": 8.48421052631579e-06, + "step": 1094 + }, + { + "epoch": 2.8770545693622616, + "high_lr": 0.0004242105263157895, + "low_lr": 8.48421052631579e-06, + "step": 1094 + }, + { + "epoch": 2.8770545693622616, + "high_lr": 0.0004242105263157895, + "low_lr": 8.48421052631579e-06, + "step": 1094 + }, + { + "epoch": 2.8770545693622616, + "high_lr": 0.0004242105263157895, + "low_lr": 8.48421052631579e-06, + "step": 1094 + }, + { + "epoch": 2.8770545693622616, + "high_lr": 0.0004242105263157895, + "low_lr": 8.48421052631579e-06, + "step": 1094 + }, + { + "epoch": 2.8770545693622616, + "high_lr": 0.0004242105263157895, + "low_lr": 8.48421052631579e-06, + "step": 1094 + }, + { + "epoch": 2.8770545693622616, + "high_lr": 0.0004242105263157895, + "low_lr": 8.48421052631579e-06, + "step": 1094 + }, + { + "epoch": 2.8770545693622616, + "high_lr": 0.0004242105263157895, + "low_lr": 8.48421052631579e-06, + "step": 1094 + }, + { + "epoch": 2.8796844181459567, + "grad_norm": 1.2961663007736206, + "learning_rate": 0.0004236842105263158, + "loss": 1.3692, + "step": 1095 + }, + { + "epoch": 2.8796844181459567, + "high_lr": 0.0004236842105263158, + "low_lr": 8.473684210526317e-06, + "step": 1095 + }, + { + "epoch": 2.8796844181459567, + "high_lr": 0.0004236842105263158, + "low_lr": 8.473684210526317e-06, + "step": 1095 + }, + { + "epoch": 2.8796844181459567, + "high_lr": 0.0004236842105263158, + "low_lr": 8.473684210526317e-06, + "step": 1095 + }, + { + "epoch": 2.8796844181459567, + "high_lr": 0.0004236842105263158, + "low_lr": 8.473684210526317e-06, + "step": 1095 + }, + { + "epoch": 2.8796844181459567, + "high_lr": 0.0004236842105263158, + "low_lr": 8.473684210526317e-06, + "step": 1095 + }, + { + "epoch": 2.8796844181459567, + "high_lr": 0.0004236842105263158, + "low_lr": 8.473684210526317e-06, + "step": 1095 + }, + { + "epoch": 2.8796844181459567, + "high_lr": 0.0004236842105263158, + "low_lr": 8.473684210526317e-06, + "step": 1095 + }, + { + "epoch": 2.8796844181459567, + "high_lr": 0.0004236842105263158, + "low_lr": 8.473684210526317e-06, + "step": 1095 + }, + { + "epoch": 2.8823142669296518, + "grad_norm": 1.3432323932647705, + "learning_rate": 0.00042315789473684214, + "loss": 1.4235, + "step": 1096 + }, + { + "epoch": 2.8823142669296518, + "high_lr": 0.00042315789473684214, + "low_lr": 8.463157894736843e-06, + "step": 1096 + }, + { + "epoch": 2.8823142669296518, + "high_lr": 0.00042315789473684214, + "low_lr": 8.463157894736843e-06, + "step": 1096 + }, + { + "epoch": 2.8823142669296518, + "high_lr": 0.00042315789473684214, + "low_lr": 8.463157894736843e-06, + "step": 1096 + }, + { + "epoch": 2.8823142669296518, + "high_lr": 0.00042315789473684214, + "low_lr": 8.463157894736843e-06, + "step": 1096 + }, + { + "epoch": 2.8823142669296518, + "high_lr": 0.00042315789473684214, + "low_lr": 8.463157894736843e-06, + "step": 1096 + }, + { + "epoch": 2.8823142669296518, + "high_lr": 0.00042315789473684214, + "low_lr": 8.463157894736843e-06, + "step": 1096 + }, + { + "epoch": 2.8823142669296518, + "high_lr": 0.00042315789473684214, + "low_lr": 8.463157894736843e-06, + "step": 1096 + }, + { + "epoch": 2.8823142669296518, + "high_lr": 0.00042315789473684214, + "low_lr": 8.463157894736843e-06, + "step": 1096 + }, + { + "epoch": 2.8849441157133464, + "grad_norm": 1.2235922813415527, + "learning_rate": 0.0004226315789473684, + "loss": 1.318, + "step": 1097 + }, + { + "epoch": 2.8849441157133464, + "high_lr": 0.0004226315789473684, + "low_lr": 8.45263157894737e-06, + "step": 1097 + }, + { + "epoch": 2.8849441157133464, + "high_lr": 0.0004226315789473684, + "low_lr": 8.45263157894737e-06, + "step": 1097 + }, + { + "epoch": 2.8849441157133464, + "high_lr": 0.0004226315789473684, + "low_lr": 8.45263157894737e-06, + "step": 1097 + }, + { + "epoch": 2.8849441157133464, + "high_lr": 0.0004226315789473684, + "low_lr": 8.45263157894737e-06, + "step": 1097 + }, + { + "epoch": 2.8849441157133464, + "high_lr": 0.0004226315789473684, + "low_lr": 8.45263157894737e-06, + "step": 1097 + }, + { + "epoch": 2.8849441157133464, + "high_lr": 0.0004226315789473684, + "low_lr": 8.45263157894737e-06, + "step": 1097 + }, + { + "epoch": 2.8849441157133464, + "high_lr": 0.0004226315789473684, + "low_lr": 8.45263157894737e-06, + "step": 1097 + }, + { + "epoch": 2.8849441157133464, + "high_lr": 0.0004226315789473684, + "low_lr": 8.45263157894737e-06, + "step": 1097 + }, + { + "epoch": 2.8875739644970415, + "grad_norm": 1.2706127166748047, + "learning_rate": 0.00042210526315789477, + "loss": 1.4046, + "step": 1098 + }, + { + "epoch": 2.8875739644970415, + "high_lr": 0.00042210526315789477, + "low_lr": 8.442105263157896e-06, + "step": 1098 + }, + { + "epoch": 2.8875739644970415, + "high_lr": 0.00042210526315789477, + "low_lr": 8.442105263157896e-06, + "step": 1098 + }, + { + "epoch": 2.8875739644970415, + "high_lr": 0.00042210526315789477, + "low_lr": 8.442105263157896e-06, + "step": 1098 + }, + { + "epoch": 2.8875739644970415, + "high_lr": 0.00042210526315789477, + "low_lr": 8.442105263157896e-06, + "step": 1098 + }, + { + "epoch": 2.8875739644970415, + "high_lr": 0.00042210526315789477, + "low_lr": 8.442105263157896e-06, + "step": 1098 + }, + { + "epoch": 2.8875739644970415, + "high_lr": 0.00042210526315789477, + "low_lr": 8.442105263157896e-06, + "step": 1098 + }, + { + "epoch": 2.8875739644970415, + "high_lr": 0.00042210526315789477, + "low_lr": 8.442105263157896e-06, + "step": 1098 + }, + { + "epoch": 2.8875739644970415, + "high_lr": 0.00042210526315789477, + "low_lr": 8.442105263157896e-06, + "step": 1098 + }, + { + "epoch": 2.8902038132807366, + "grad_norm": 1.2929545640945435, + "learning_rate": 0.00042157894736842105, + "loss": 1.3805, + "step": 1099 + }, + { + "epoch": 2.8902038132807366, + "high_lr": 0.00042157894736842105, + "low_lr": 8.431578947368422e-06, + "step": 1099 + }, + { + "epoch": 2.8902038132807366, + "high_lr": 0.00042157894736842105, + "low_lr": 8.431578947368422e-06, + "step": 1099 + }, + { + "epoch": 2.8902038132807366, + "high_lr": 0.00042157894736842105, + "low_lr": 8.431578947368422e-06, + "step": 1099 + }, + { + "epoch": 2.8902038132807366, + "high_lr": 0.00042157894736842105, + "low_lr": 8.431578947368422e-06, + "step": 1099 + }, + { + "epoch": 2.8902038132807366, + "high_lr": 0.00042157894736842105, + "low_lr": 8.431578947368422e-06, + "step": 1099 + }, + { + "epoch": 2.8902038132807366, + "high_lr": 0.00042157894736842105, + "low_lr": 8.431578947368422e-06, + "step": 1099 + }, + { + "epoch": 2.8902038132807366, + "high_lr": 0.00042157894736842105, + "low_lr": 8.431578947368422e-06, + "step": 1099 + }, + { + "epoch": 2.8902038132807366, + "high_lr": 0.00042157894736842105, + "low_lr": 8.431578947368422e-06, + "step": 1099 + }, + { + "epoch": 2.8928336620644313, + "grad_norm": 1.4245407581329346, + "learning_rate": 0.00042105263157894734, + "loss": 1.4389, + "step": 1100 + }, + { + "epoch": 2.8928336620644313, + "high_lr": 0.00042105263157894734, + "low_lr": 8.421052631578948e-06, + "step": 1100 + }, + { + "epoch": 2.8928336620644313, + "high_lr": 0.00042105263157894734, + "low_lr": 8.421052631578948e-06, + "step": 1100 + }, + { + "epoch": 2.8928336620644313, + "high_lr": 0.00042105263157894734, + "low_lr": 8.421052631578948e-06, + "step": 1100 + }, + { + "epoch": 2.8928336620644313, + "high_lr": 0.00042105263157894734, + "low_lr": 8.421052631578948e-06, + "step": 1100 + }, + { + "epoch": 2.8928336620644313, + "high_lr": 0.00042105263157894734, + "low_lr": 8.421052631578948e-06, + "step": 1100 + }, + { + "epoch": 2.8928336620644313, + "high_lr": 0.00042105263157894734, + "low_lr": 8.421052631578948e-06, + "step": 1100 + }, + { + "epoch": 2.8928336620644313, + "high_lr": 0.00042105263157894734, + "low_lr": 8.421052631578948e-06, + "step": 1100 + }, + { + "epoch": 2.8928336620644313, + "high_lr": 0.00042105263157894734, + "low_lr": 8.421052631578948e-06, + "step": 1100 + }, + { + "epoch": 2.8954635108481264, + "grad_norm": 1.3371567726135254, + "learning_rate": 0.0004205263157894737, + "loss": 1.3643, + "step": 1101 + }, + { + "epoch": 2.8954635108481264, + "high_lr": 0.0004205263157894737, + "low_lr": 8.410526315789475e-06, + "step": 1101 + }, + { + "epoch": 2.8954635108481264, + "high_lr": 0.0004205263157894737, + "low_lr": 8.410526315789475e-06, + "step": 1101 + }, + { + "epoch": 2.8954635108481264, + "high_lr": 0.0004205263157894737, + "low_lr": 8.410526315789475e-06, + "step": 1101 + }, + { + "epoch": 2.8954635108481264, + "high_lr": 0.0004205263157894737, + "low_lr": 8.410526315789475e-06, + "step": 1101 + }, + { + "epoch": 2.8954635108481264, + "high_lr": 0.0004205263157894737, + "low_lr": 8.410526315789475e-06, + "step": 1101 + }, + { + "epoch": 2.8954635108481264, + "high_lr": 0.0004205263157894737, + "low_lr": 8.410526315789475e-06, + "step": 1101 + }, + { + "epoch": 2.8954635108481264, + "high_lr": 0.0004205263157894737, + "low_lr": 8.410526315789475e-06, + "step": 1101 + }, + { + "epoch": 2.8954635108481264, + "high_lr": 0.0004205263157894737, + "low_lr": 8.410526315789475e-06, + "step": 1101 + }, + { + "epoch": 2.898093359631821, + "grad_norm": 1.2589834928512573, + "learning_rate": 0.00042, + "loss": 1.4083, + "step": 1102 + }, + { + "epoch": 2.898093359631821, + "high_lr": 0.00042, + "low_lr": 8.400000000000001e-06, + "step": 1102 + }, + { + "epoch": 2.898093359631821, + "high_lr": 0.00042, + "low_lr": 8.400000000000001e-06, + "step": 1102 + }, + { + "epoch": 2.898093359631821, + "high_lr": 0.00042, + "low_lr": 8.400000000000001e-06, + "step": 1102 + }, + { + "epoch": 2.898093359631821, + "high_lr": 0.00042, + "low_lr": 8.400000000000001e-06, + "step": 1102 + }, + { + "epoch": 2.898093359631821, + "high_lr": 0.00042, + "low_lr": 8.400000000000001e-06, + "step": 1102 + }, + { + "epoch": 2.898093359631821, + "high_lr": 0.00042, + "low_lr": 8.400000000000001e-06, + "step": 1102 + }, + { + "epoch": 2.898093359631821, + "high_lr": 0.00042, + "low_lr": 8.400000000000001e-06, + "step": 1102 + }, + { + "epoch": 2.898093359631821, + "high_lr": 0.00042, + "low_lr": 8.400000000000001e-06, + "step": 1102 + }, + { + "epoch": 2.900723208415516, + "grad_norm": 1.3375548124313354, + "learning_rate": 0.00041947368421052636, + "loss": 1.3482, + "step": 1103 + }, + { + "epoch": 2.900723208415516, + "high_lr": 0.00041947368421052636, + "low_lr": 8.389473684210527e-06, + "step": 1103 + }, + { + "epoch": 2.900723208415516, + "high_lr": 0.00041947368421052636, + "low_lr": 8.389473684210527e-06, + "step": 1103 + }, + { + "epoch": 2.900723208415516, + "high_lr": 0.00041947368421052636, + "low_lr": 8.389473684210527e-06, + "step": 1103 + }, + { + "epoch": 2.900723208415516, + "high_lr": 0.00041947368421052636, + "low_lr": 8.389473684210527e-06, + "step": 1103 + }, + { + "epoch": 2.900723208415516, + "high_lr": 0.00041947368421052636, + "low_lr": 8.389473684210527e-06, + "step": 1103 + }, + { + "epoch": 2.900723208415516, + "high_lr": 0.00041947368421052636, + "low_lr": 8.389473684210527e-06, + "step": 1103 + }, + { + "epoch": 2.900723208415516, + "high_lr": 0.00041947368421052636, + "low_lr": 8.389473684210527e-06, + "step": 1103 + }, + { + "epoch": 2.900723208415516, + "high_lr": 0.00041947368421052636, + "low_lr": 8.389473684210527e-06, + "step": 1103 + }, + { + "epoch": 2.9033530571992108, + "grad_norm": 1.3510268926620483, + "learning_rate": 0.00041894736842105264, + "loss": 1.3335, + "step": 1104 + }, + { + "epoch": 2.9033530571992108, + "high_lr": 0.00041894736842105264, + "low_lr": 8.378947368421054e-06, + "step": 1104 + }, + { + "epoch": 2.9033530571992108, + "high_lr": 0.00041894736842105264, + "low_lr": 8.378947368421054e-06, + "step": 1104 + }, + { + "epoch": 2.9033530571992108, + "high_lr": 0.00041894736842105264, + "low_lr": 8.378947368421054e-06, + "step": 1104 + }, + { + "epoch": 2.9033530571992108, + "high_lr": 0.00041894736842105264, + "low_lr": 8.378947368421054e-06, + "step": 1104 + }, + { + "epoch": 2.9033530571992108, + "high_lr": 0.00041894736842105264, + "low_lr": 8.378947368421054e-06, + "step": 1104 + }, + { + "epoch": 2.9033530571992108, + "high_lr": 0.00041894736842105264, + "low_lr": 8.378947368421054e-06, + "step": 1104 + }, + { + "epoch": 2.9033530571992108, + "high_lr": 0.00041894736842105264, + "low_lr": 8.378947368421054e-06, + "step": 1104 + }, + { + "epoch": 2.9033530571992108, + "high_lr": 0.00041894736842105264, + "low_lr": 8.378947368421054e-06, + "step": 1104 + }, + { + "epoch": 2.905982905982906, + "grad_norm": 1.38094961643219, + "learning_rate": 0.000418421052631579, + "loss": 1.3722, + "step": 1105 + }, + { + "epoch": 2.905982905982906, + "high_lr": 0.000418421052631579, + "low_lr": 8.36842105263158e-06, + "step": 1105 + }, + { + "epoch": 2.905982905982906, + "high_lr": 0.000418421052631579, + "low_lr": 8.36842105263158e-06, + "step": 1105 + }, + { + "epoch": 2.905982905982906, + "high_lr": 0.000418421052631579, + "low_lr": 8.36842105263158e-06, + "step": 1105 + }, + { + "epoch": 2.905982905982906, + "high_lr": 0.000418421052631579, + "low_lr": 8.36842105263158e-06, + "step": 1105 + }, + { + "epoch": 2.905982905982906, + "high_lr": 0.000418421052631579, + "low_lr": 8.36842105263158e-06, + "step": 1105 + }, + { + "epoch": 2.905982905982906, + "high_lr": 0.000418421052631579, + "low_lr": 8.36842105263158e-06, + "step": 1105 + }, + { + "epoch": 2.905982905982906, + "high_lr": 0.000418421052631579, + "low_lr": 8.36842105263158e-06, + "step": 1105 + }, + { + "epoch": 2.905982905982906, + "high_lr": 0.000418421052631579, + "low_lr": 8.36842105263158e-06, + "step": 1105 + }, + { + "epoch": 2.908612754766601, + "grad_norm": 1.4025527238845825, + "learning_rate": 0.00041789473684210527, + "loss": 1.3813, + "step": 1106 + }, + { + "epoch": 2.908612754766601, + "high_lr": 0.00041789473684210527, + "low_lr": 8.357894736842106e-06, + "step": 1106 + }, + { + "epoch": 2.908612754766601, + "high_lr": 0.00041789473684210527, + "low_lr": 8.357894736842106e-06, + "step": 1106 + }, + { + "epoch": 2.908612754766601, + "high_lr": 0.00041789473684210527, + "low_lr": 8.357894736842106e-06, + "step": 1106 + }, + { + "epoch": 2.908612754766601, + "high_lr": 0.00041789473684210527, + "low_lr": 8.357894736842106e-06, + "step": 1106 + }, + { + "epoch": 2.908612754766601, + "high_lr": 0.00041789473684210527, + "low_lr": 8.357894736842106e-06, + "step": 1106 + }, + { + "epoch": 2.908612754766601, + "high_lr": 0.00041789473684210527, + "low_lr": 8.357894736842106e-06, + "step": 1106 + }, + { + "epoch": 2.908612754766601, + "high_lr": 0.00041789473684210527, + "low_lr": 8.357894736842106e-06, + "step": 1106 + }, + { + "epoch": 2.908612754766601, + "high_lr": 0.00041789473684210527, + "low_lr": 8.357894736842106e-06, + "step": 1106 + }, + { + "epoch": 2.9112426035502956, + "grad_norm": 1.372842788696289, + "learning_rate": 0.00041736842105263156, + "loss": 1.3694, + "step": 1107 + }, + { + "epoch": 2.9112426035502956, + "high_lr": 0.00041736842105263156, + "low_lr": 8.347368421052633e-06, + "step": 1107 + }, + { + "epoch": 2.9112426035502956, + "high_lr": 0.00041736842105263156, + "low_lr": 8.347368421052633e-06, + "step": 1107 + }, + { + "epoch": 2.9112426035502956, + "high_lr": 0.00041736842105263156, + "low_lr": 8.347368421052633e-06, + "step": 1107 + }, + { + "epoch": 2.9112426035502956, + "high_lr": 0.00041736842105263156, + "low_lr": 8.347368421052633e-06, + "step": 1107 + }, + { + "epoch": 2.9112426035502956, + "high_lr": 0.00041736842105263156, + "low_lr": 8.347368421052633e-06, + "step": 1107 + }, + { + "epoch": 2.9112426035502956, + "high_lr": 0.00041736842105263156, + "low_lr": 8.347368421052633e-06, + "step": 1107 + }, + { + "epoch": 2.9112426035502956, + "high_lr": 0.00041736842105263156, + "low_lr": 8.347368421052633e-06, + "step": 1107 + }, + { + "epoch": 2.9112426035502956, + "high_lr": 0.00041736842105263156, + "low_lr": 8.347368421052633e-06, + "step": 1107 + }, + { + "epoch": 2.9138724523339907, + "grad_norm": 1.254815936088562, + "learning_rate": 0.0004168421052631579, + "loss": 1.3346, + "step": 1108 + }, + { + "epoch": 2.9138724523339907, + "high_lr": 0.0004168421052631579, + "low_lr": 8.336842105263159e-06, + "step": 1108 + }, + { + "epoch": 2.9138724523339907, + "high_lr": 0.0004168421052631579, + "low_lr": 8.336842105263159e-06, + "step": 1108 + }, + { + "epoch": 2.9138724523339907, + "high_lr": 0.0004168421052631579, + "low_lr": 8.336842105263159e-06, + "step": 1108 + }, + { + "epoch": 2.9138724523339907, + "high_lr": 0.0004168421052631579, + "low_lr": 8.336842105263159e-06, + "step": 1108 + }, + { + "epoch": 2.9138724523339907, + "high_lr": 0.0004168421052631579, + "low_lr": 8.336842105263159e-06, + "step": 1108 + }, + { + "epoch": 2.9138724523339907, + "high_lr": 0.0004168421052631579, + "low_lr": 8.336842105263159e-06, + "step": 1108 + }, + { + "epoch": 2.9138724523339907, + "high_lr": 0.0004168421052631579, + "low_lr": 8.336842105263159e-06, + "step": 1108 + }, + { + "epoch": 2.9138724523339907, + "high_lr": 0.0004168421052631579, + "low_lr": 8.336842105263159e-06, + "step": 1108 + }, + { + "epoch": 2.916502301117686, + "grad_norm": 1.2994201183319092, + "learning_rate": 0.0004163157894736842, + "loss": 1.4007, + "step": 1109 + }, + { + "epoch": 2.916502301117686, + "high_lr": 0.0004163157894736842, + "low_lr": 8.326315789473685e-06, + "step": 1109 + }, + { + "epoch": 2.916502301117686, + "high_lr": 0.0004163157894736842, + "low_lr": 8.326315789473685e-06, + "step": 1109 + }, + { + "epoch": 2.916502301117686, + "high_lr": 0.0004163157894736842, + "low_lr": 8.326315789473685e-06, + "step": 1109 + }, + { + "epoch": 2.916502301117686, + "high_lr": 0.0004163157894736842, + "low_lr": 8.326315789473685e-06, + "step": 1109 + }, + { + "epoch": 2.916502301117686, + "high_lr": 0.0004163157894736842, + "low_lr": 8.326315789473685e-06, + "step": 1109 + }, + { + "epoch": 2.916502301117686, + "high_lr": 0.0004163157894736842, + "low_lr": 8.326315789473685e-06, + "step": 1109 + }, + { + "epoch": 2.916502301117686, + "high_lr": 0.0004163157894736842, + "low_lr": 8.326315789473685e-06, + "step": 1109 + }, + { + "epoch": 2.916502301117686, + "high_lr": 0.0004163157894736842, + "low_lr": 8.326315789473685e-06, + "step": 1109 + }, + { + "epoch": 2.9191321499013805, + "grad_norm": 1.2803548574447632, + "learning_rate": 0.0004157894736842106, + "loss": 1.3833, + "step": 1110 + }, + { + "epoch": 2.9191321499013805, + "high_lr": 0.0004157894736842106, + "low_lr": 8.315789473684212e-06, + "step": 1110 + }, + { + "epoch": 2.9191321499013805, + "high_lr": 0.0004157894736842106, + "low_lr": 8.315789473684212e-06, + "step": 1110 + }, + { + "epoch": 2.9191321499013805, + "high_lr": 0.0004157894736842106, + "low_lr": 8.315789473684212e-06, + "step": 1110 + }, + { + "epoch": 2.9191321499013805, + "high_lr": 0.0004157894736842106, + "low_lr": 8.315789473684212e-06, + "step": 1110 + }, + { + "epoch": 2.9191321499013805, + "high_lr": 0.0004157894736842106, + "low_lr": 8.315789473684212e-06, + "step": 1110 + }, + { + "epoch": 2.9191321499013805, + "high_lr": 0.0004157894736842106, + "low_lr": 8.315789473684212e-06, + "step": 1110 + }, + { + "epoch": 2.9191321499013805, + "high_lr": 0.0004157894736842106, + "low_lr": 8.315789473684212e-06, + "step": 1110 + }, + { + "epoch": 2.9191321499013805, + "high_lr": 0.0004157894736842106, + "low_lr": 8.315789473684212e-06, + "step": 1110 + }, + { + "epoch": 2.9217619986850756, + "grad_norm": 1.354733943939209, + "learning_rate": 0.00041526315789473686, + "loss": 1.3278, + "step": 1111 + }, + { + "epoch": 2.9217619986850756, + "high_lr": 0.00041526315789473686, + "low_lr": 8.305263157894738e-06, + "step": 1111 + }, + { + "epoch": 2.9217619986850756, + "high_lr": 0.00041526315789473686, + "low_lr": 8.305263157894738e-06, + "step": 1111 + }, + { + "epoch": 2.9217619986850756, + "high_lr": 0.00041526315789473686, + "low_lr": 8.305263157894738e-06, + "step": 1111 + }, + { + "epoch": 2.9217619986850756, + "high_lr": 0.00041526315789473686, + "low_lr": 8.305263157894738e-06, + "step": 1111 + }, + { + "epoch": 2.9217619986850756, + "high_lr": 0.00041526315789473686, + "low_lr": 8.305263157894738e-06, + "step": 1111 + }, + { + "epoch": 2.9217619986850756, + "high_lr": 0.00041526315789473686, + "low_lr": 8.305263157894738e-06, + "step": 1111 + }, + { + "epoch": 2.9217619986850756, + "high_lr": 0.00041526315789473686, + "low_lr": 8.305263157894738e-06, + "step": 1111 + }, + { + "epoch": 2.9217619986850756, + "high_lr": 0.00041526315789473686, + "low_lr": 8.305263157894738e-06, + "step": 1111 + }, + { + "epoch": 2.9243918474687707, + "grad_norm": 1.2924370765686035, + "learning_rate": 0.0004147368421052632, + "loss": 1.3748, + "step": 1112 + }, + { + "epoch": 2.9243918474687707, + "high_lr": 0.0004147368421052632, + "low_lr": 8.294736842105264e-06, + "step": 1112 + }, + { + "epoch": 2.9243918474687707, + "high_lr": 0.0004147368421052632, + "low_lr": 8.294736842105264e-06, + "step": 1112 + }, + { + "epoch": 2.9243918474687707, + "high_lr": 0.0004147368421052632, + "low_lr": 8.294736842105264e-06, + "step": 1112 + }, + { + "epoch": 2.9243918474687707, + "high_lr": 0.0004147368421052632, + "low_lr": 8.294736842105264e-06, + "step": 1112 + }, + { + "epoch": 2.9243918474687707, + "high_lr": 0.0004147368421052632, + "low_lr": 8.294736842105264e-06, + "step": 1112 + }, + { + "epoch": 2.9243918474687707, + "high_lr": 0.0004147368421052632, + "low_lr": 8.294736842105264e-06, + "step": 1112 + }, + { + "epoch": 2.9243918474687707, + "high_lr": 0.0004147368421052632, + "low_lr": 8.294736842105264e-06, + "step": 1112 + }, + { + "epoch": 2.9243918474687707, + "high_lr": 0.0004147368421052632, + "low_lr": 8.294736842105264e-06, + "step": 1112 + }, + { + "epoch": 2.9270216962524653, + "grad_norm": 1.2990412712097168, + "learning_rate": 0.0004142105263157895, + "loss": 1.3719, + "step": 1113 + }, + { + "epoch": 2.9270216962524653, + "high_lr": 0.0004142105263157895, + "low_lr": 8.28421052631579e-06, + "step": 1113 + }, + { + "epoch": 2.9270216962524653, + "high_lr": 0.0004142105263157895, + "low_lr": 8.28421052631579e-06, + "step": 1113 + }, + { + "epoch": 2.9270216962524653, + "high_lr": 0.0004142105263157895, + "low_lr": 8.28421052631579e-06, + "step": 1113 + }, + { + "epoch": 2.9270216962524653, + "high_lr": 0.0004142105263157895, + "low_lr": 8.28421052631579e-06, + "step": 1113 + }, + { + "epoch": 2.9270216962524653, + "high_lr": 0.0004142105263157895, + "low_lr": 8.28421052631579e-06, + "step": 1113 + }, + { + "epoch": 2.9270216962524653, + "high_lr": 0.0004142105263157895, + "low_lr": 8.28421052631579e-06, + "step": 1113 + }, + { + "epoch": 2.9270216962524653, + "high_lr": 0.0004142105263157895, + "low_lr": 8.28421052631579e-06, + "step": 1113 + }, + { + "epoch": 2.9270216962524653, + "high_lr": 0.0004142105263157895, + "low_lr": 8.28421052631579e-06, + "step": 1113 + }, + { + "epoch": 2.9296515450361604, + "grad_norm": 1.5746723413467407, + "learning_rate": 0.0004136842105263158, + "loss": 1.4748, + "step": 1114 + }, + { + "epoch": 2.9296515450361604, + "high_lr": 0.0004136842105263158, + "low_lr": 8.273684210526317e-06, + "step": 1114 + }, + { + "epoch": 2.9296515450361604, + "high_lr": 0.0004136842105263158, + "low_lr": 8.273684210526317e-06, + "step": 1114 + }, + { + "epoch": 2.9296515450361604, + "high_lr": 0.0004136842105263158, + "low_lr": 8.273684210526317e-06, + "step": 1114 + }, + { + "epoch": 2.9296515450361604, + "high_lr": 0.0004136842105263158, + "low_lr": 8.273684210526317e-06, + "step": 1114 + }, + { + "epoch": 2.9296515450361604, + "high_lr": 0.0004136842105263158, + "low_lr": 8.273684210526317e-06, + "step": 1114 + }, + { + "epoch": 2.9296515450361604, + "high_lr": 0.0004136842105263158, + "low_lr": 8.273684210526317e-06, + "step": 1114 + }, + { + "epoch": 2.9296515450361604, + "high_lr": 0.0004136842105263158, + "low_lr": 8.273684210526317e-06, + "step": 1114 + }, + { + "epoch": 2.9296515450361604, + "high_lr": 0.0004136842105263158, + "low_lr": 8.273684210526317e-06, + "step": 1114 + }, + { + "epoch": 2.9322813938198555, + "grad_norm": 1.3033024072647095, + "learning_rate": 0.0004131578947368421, + "loss": 1.4211, + "step": 1115 + }, + { + "epoch": 2.9322813938198555, + "high_lr": 0.0004131578947368421, + "low_lr": 8.263157894736843e-06, + "step": 1115 + }, + { + "epoch": 2.9322813938198555, + "high_lr": 0.0004131578947368421, + "low_lr": 8.263157894736843e-06, + "step": 1115 + }, + { + "epoch": 2.9322813938198555, + "high_lr": 0.0004131578947368421, + "low_lr": 8.263157894736843e-06, + "step": 1115 + }, + { + "epoch": 2.9322813938198555, + "high_lr": 0.0004131578947368421, + "low_lr": 8.263157894736843e-06, + "step": 1115 + }, + { + "epoch": 2.9322813938198555, + "high_lr": 0.0004131578947368421, + "low_lr": 8.263157894736843e-06, + "step": 1115 + }, + { + "epoch": 2.9322813938198555, + "high_lr": 0.0004131578947368421, + "low_lr": 8.263157894736843e-06, + "step": 1115 + }, + { + "epoch": 2.9322813938198555, + "high_lr": 0.0004131578947368421, + "low_lr": 8.263157894736843e-06, + "step": 1115 + }, + { + "epoch": 2.9322813938198555, + "high_lr": 0.0004131578947368421, + "low_lr": 8.263157894736843e-06, + "step": 1115 + }, + { + "epoch": 2.93491124260355, + "grad_norm": 1.3628966808319092, + "learning_rate": 0.0004126315789473684, + "loss": 1.3952, + "step": 1116 + }, + { + "epoch": 2.93491124260355, + "high_lr": 0.0004126315789473684, + "low_lr": 8.25263157894737e-06, + "step": 1116 + }, + { + "epoch": 2.93491124260355, + "high_lr": 0.0004126315789473684, + "low_lr": 8.25263157894737e-06, + "step": 1116 + }, + { + "epoch": 2.93491124260355, + "high_lr": 0.0004126315789473684, + "low_lr": 8.25263157894737e-06, + "step": 1116 + }, + { + "epoch": 2.93491124260355, + "high_lr": 0.0004126315789473684, + "low_lr": 8.25263157894737e-06, + "step": 1116 + }, + { + "epoch": 2.93491124260355, + "high_lr": 0.0004126315789473684, + "low_lr": 8.25263157894737e-06, + "step": 1116 + }, + { + "epoch": 2.93491124260355, + "high_lr": 0.0004126315789473684, + "low_lr": 8.25263157894737e-06, + "step": 1116 + }, + { + "epoch": 2.93491124260355, + "high_lr": 0.0004126315789473684, + "low_lr": 8.25263157894737e-06, + "step": 1116 + }, + { + "epoch": 2.93491124260355, + "high_lr": 0.0004126315789473684, + "low_lr": 8.25263157894737e-06, + "step": 1116 + }, + { + "epoch": 2.9375410913872453, + "grad_norm": 1.1582694053649902, + "learning_rate": 0.00041210526315789474, + "loss": 1.3778, + "step": 1117 + }, + { + "epoch": 2.9375410913872453, + "high_lr": 0.00041210526315789474, + "low_lr": 8.242105263157896e-06, + "step": 1117 + }, + { + "epoch": 2.9375410913872453, + "high_lr": 0.00041210526315789474, + "low_lr": 8.242105263157896e-06, + "step": 1117 + }, + { + "epoch": 2.9375410913872453, + "high_lr": 0.00041210526315789474, + "low_lr": 8.242105263157896e-06, + "step": 1117 + }, + { + "epoch": 2.9375410913872453, + "high_lr": 0.00041210526315789474, + "low_lr": 8.242105263157896e-06, + "step": 1117 + }, + { + "epoch": 2.9375410913872453, + "high_lr": 0.00041210526315789474, + "low_lr": 8.242105263157896e-06, + "step": 1117 + }, + { + "epoch": 2.9375410913872453, + "high_lr": 0.00041210526315789474, + "low_lr": 8.242105263157896e-06, + "step": 1117 + }, + { + "epoch": 2.9375410913872453, + "high_lr": 0.00041210526315789474, + "low_lr": 8.242105263157896e-06, + "step": 1117 + }, + { + "epoch": 2.9375410913872453, + "high_lr": 0.00041210526315789474, + "low_lr": 8.242105263157896e-06, + "step": 1117 + }, + { + "epoch": 2.9401709401709404, + "grad_norm": 1.3429607152938843, + "learning_rate": 0.0004115789473684211, + "loss": 1.3652, + "step": 1118 + }, + { + "epoch": 2.9401709401709404, + "high_lr": 0.0004115789473684211, + "low_lr": 8.231578947368422e-06, + "step": 1118 + }, + { + "epoch": 2.9401709401709404, + "high_lr": 0.0004115789473684211, + "low_lr": 8.231578947368422e-06, + "step": 1118 + }, + { + "epoch": 2.9401709401709404, + "high_lr": 0.0004115789473684211, + "low_lr": 8.231578947368422e-06, + "step": 1118 + }, + { + "epoch": 2.9401709401709404, + "high_lr": 0.0004115789473684211, + "low_lr": 8.231578947368422e-06, + "step": 1118 + }, + { + "epoch": 2.9401709401709404, + "high_lr": 0.0004115789473684211, + "low_lr": 8.231578947368422e-06, + "step": 1118 + }, + { + "epoch": 2.9401709401709404, + "high_lr": 0.0004115789473684211, + "low_lr": 8.231578947368422e-06, + "step": 1118 + }, + { + "epoch": 2.9401709401709404, + "high_lr": 0.0004115789473684211, + "low_lr": 8.231578947368422e-06, + "step": 1118 + }, + { + "epoch": 2.9401709401709404, + "high_lr": 0.0004115789473684211, + "low_lr": 8.231578947368422e-06, + "step": 1118 + }, + { + "epoch": 2.942800788954635, + "grad_norm": 1.2951549291610718, + "learning_rate": 0.0004110526315789474, + "loss": 1.4009, + "step": 1119 + }, + { + "epoch": 2.942800788954635, + "high_lr": 0.0004110526315789474, + "low_lr": 8.221052631578948e-06, + "step": 1119 + }, + { + "epoch": 2.942800788954635, + "high_lr": 0.0004110526315789474, + "low_lr": 8.221052631578948e-06, + "step": 1119 + }, + { + "epoch": 2.942800788954635, + "high_lr": 0.0004110526315789474, + "low_lr": 8.221052631578948e-06, + "step": 1119 + }, + { + "epoch": 2.942800788954635, + "high_lr": 0.0004110526315789474, + "low_lr": 8.221052631578948e-06, + "step": 1119 + }, + { + "epoch": 2.942800788954635, + "high_lr": 0.0004110526315789474, + "low_lr": 8.221052631578948e-06, + "step": 1119 + }, + { + "epoch": 2.942800788954635, + "high_lr": 0.0004110526315789474, + "low_lr": 8.221052631578948e-06, + "step": 1119 + }, + { + "epoch": 2.942800788954635, + "high_lr": 0.0004110526315789474, + "low_lr": 8.221052631578948e-06, + "step": 1119 + }, + { + "epoch": 2.942800788954635, + "high_lr": 0.0004110526315789474, + "low_lr": 8.221052631578948e-06, + "step": 1119 + }, + { + "epoch": 2.94543063773833, + "grad_norm": 1.3277417421340942, + "learning_rate": 0.0004105263157894737, + "loss": 1.3428, + "step": 1120 + }, + { + "epoch": 2.94543063773833, + "high_lr": 0.0004105263157894737, + "low_lr": 8.210526315789475e-06, + "step": 1120 + }, + { + "epoch": 2.94543063773833, + "high_lr": 0.0004105263157894737, + "low_lr": 8.210526315789475e-06, + "step": 1120 + }, + { + "epoch": 2.94543063773833, + "high_lr": 0.0004105263157894737, + "low_lr": 8.210526315789475e-06, + "step": 1120 + }, + { + "epoch": 2.94543063773833, + "high_lr": 0.0004105263157894737, + "low_lr": 8.210526315789475e-06, + "step": 1120 + }, + { + "epoch": 2.94543063773833, + "high_lr": 0.0004105263157894737, + "low_lr": 8.210526315789475e-06, + "step": 1120 + }, + { + "epoch": 2.94543063773833, + "high_lr": 0.0004105263157894737, + "low_lr": 8.210526315789475e-06, + "step": 1120 + }, + { + "epoch": 2.94543063773833, + "high_lr": 0.0004105263157894737, + "low_lr": 8.210526315789475e-06, + "step": 1120 + }, + { + "epoch": 2.94543063773833, + "high_lr": 0.0004105263157894737, + "low_lr": 8.210526315789475e-06, + "step": 1120 + }, + { + "epoch": 2.9480604865220252, + "grad_norm": 1.3367265462875366, + "learning_rate": 0.00041, + "loss": 1.3685, + "step": 1121 + }, + { + "epoch": 2.9480604865220252, + "high_lr": 0.00041, + "low_lr": 8.2e-06, + "step": 1121 + }, + { + "epoch": 2.9480604865220252, + "high_lr": 0.00041, + "low_lr": 8.2e-06, + "step": 1121 + }, + { + "epoch": 2.9480604865220252, + "high_lr": 0.00041, + "low_lr": 8.2e-06, + "step": 1121 + }, + { + "epoch": 2.9480604865220252, + "high_lr": 0.00041, + "low_lr": 8.2e-06, + "step": 1121 + }, + { + "epoch": 2.9480604865220252, + "high_lr": 0.00041, + "low_lr": 8.2e-06, + "step": 1121 + }, + { + "epoch": 2.9480604865220252, + "high_lr": 0.00041, + "low_lr": 8.2e-06, + "step": 1121 + }, + { + "epoch": 2.9480604865220252, + "high_lr": 0.00041, + "low_lr": 8.2e-06, + "step": 1121 + }, + { + "epoch": 2.9480604865220252, + "high_lr": 0.00041, + "low_lr": 8.2e-06, + "step": 1121 + }, + { + "epoch": 2.95069033530572, + "grad_norm": 1.2772372961044312, + "learning_rate": 0.00040947368421052633, + "loss": 1.3718, + "step": 1122 + }, + { + "epoch": 2.95069033530572, + "high_lr": 0.00040947368421052633, + "low_lr": 8.189473684210527e-06, + "step": 1122 + }, + { + "epoch": 2.95069033530572, + "high_lr": 0.00040947368421052633, + "low_lr": 8.189473684210527e-06, + "step": 1122 + }, + { + "epoch": 2.95069033530572, + "high_lr": 0.00040947368421052633, + "low_lr": 8.189473684210527e-06, + "step": 1122 + }, + { + "epoch": 2.95069033530572, + "high_lr": 0.00040947368421052633, + "low_lr": 8.189473684210527e-06, + "step": 1122 + }, + { + "epoch": 2.95069033530572, + "high_lr": 0.00040947368421052633, + "low_lr": 8.189473684210527e-06, + "step": 1122 + }, + { + "epoch": 2.95069033530572, + "high_lr": 0.00040947368421052633, + "low_lr": 8.189473684210527e-06, + "step": 1122 + }, + { + "epoch": 2.95069033530572, + "high_lr": 0.00040947368421052633, + "low_lr": 8.189473684210527e-06, + "step": 1122 + }, + { + "epoch": 2.95069033530572, + "high_lr": 0.00040947368421052633, + "low_lr": 8.189473684210527e-06, + "step": 1122 + }, + { + "epoch": 2.953320184089415, + "grad_norm": 1.3111339807510376, + "learning_rate": 0.0004089473684210526, + "loss": 1.395, + "step": 1123 + }, + { + "epoch": 2.953320184089415, + "high_lr": 0.0004089473684210526, + "low_lr": 8.178947368421054e-06, + "step": 1123 + }, + { + "epoch": 2.953320184089415, + "high_lr": 0.0004089473684210526, + "low_lr": 8.178947368421054e-06, + "step": 1123 + }, + { + "epoch": 2.953320184089415, + "high_lr": 0.0004089473684210526, + "low_lr": 8.178947368421054e-06, + "step": 1123 + }, + { + "epoch": 2.953320184089415, + "high_lr": 0.0004089473684210526, + "low_lr": 8.178947368421054e-06, + "step": 1123 + }, + { + "epoch": 2.953320184089415, + "high_lr": 0.0004089473684210526, + "low_lr": 8.178947368421054e-06, + "step": 1123 + }, + { + "epoch": 2.953320184089415, + "high_lr": 0.0004089473684210526, + "low_lr": 8.178947368421054e-06, + "step": 1123 + }, + { + "epoch": 2.953320184089415, + "high_lr": 0.0004089473684210526, + "low_lr": 8.178947368421054e-06, + "step": 1123 + }, + { + "epoch": 2.953320184089415, + "high_lr": 0.0004089473684210526, + "low_lr": 8.178947368421054e-06, + "step": 1123 + }, + { + "epoch": 2.9559500328731096, + "grad_norm": 1.2343921661376953, + "learning_rate": 0.00040842105263157896, + "loss": 1.3731, + "step": 1124 + }, + { + "epoch": 2.9559500328731096, + "high_lr": 0.00040842105263157896, + "low_lr": 8.16842105263158e-06, + "step": 1124 + }, + { + "epoch": 2.9559500328731096, + "high_lr": 0.00040842105263157896, + "low_lr": 8.16842105263158e-06, + "step": 1124 + }, + { + "epoch": 2.9559500328731096, + "high_lr": 0.00040842105263157896, + "low_lr": 8.16842105263158e-06, + "step": 1124 + }, + { + "epoch": 2.9559500328731096, + "high_lr": 0.00040842105263157896, + "low_lr": 8.16842105263158e-06, + "step": 1124 + }, + { + "epoch": 2.9559500328731096, + "high_lr": 0.00040842105263157896, + "low_lr": 8.16842105263158e-06, + "step": 1124 + }, + { + "epoch": 2.9559500328731096, + "high_lr": 0.00040842105263157896, + "low_lr": 8.16842105263158e-06, + "step": 1124 + }, + { + "epoch": 2.9559500328731096, + "high_lr": 0.00040842105263157896, + "low_lr": 8.16842105263158e-06, + "step": 1124 + }, + { + "epoch": 2.9559500328731096, + "high_lr": 0.00040842105263157896, + "low_lr": 8.16842105263158e-06, + "step": 1124 + }, + { + "epoch": 2.9585798816568047, + "grad_norm": 1.253628134727478, + "learning_rate": 0.00040789473684210524, + "loss": 1.381, + "step": 1125 + }, + { + "epoch": 2.9585798816568047, + "high_lr": 0.00040789473684210524, + "low_lr": 8.157894736842106e-06, + "step": 1125 + }, + { + "epoch": 2.9585798816568047, + "high_lr": 0.00040789473684210524, + "low_lr": 8.157894736842106e-06, + "step": 1125 + }, + { + "epoch": 2.9585798816568047, + "high_lr": 0.00040789473684210524, + "low_lr": 8.157894736842106e-06, + "step": 1125 + }, + { + "epoch": 2.9585798816568047, + "high_lr": 0.00040789473684210524, + "low_lr": 8.157894736842106e-06, + "step": 1125 + }, + { + "epoch": 2.9585798816568047, + "high_lr": 0.00040789473684210524, + "low_lr": 8.157894736842106e-06, + "step": 1125 + }, + { + "epoch": 2.9585798816568047, + "high_lr": 0.00040789473684210524, + "low_lr": 8.157894736842106e-06, + "step": 1125 + }, + { + "epoch": 2.9585798816568047, + "high_lr": 0.00040789473684210524, + "low_lr": 8.157894736842106e-06, + "step": 1125 + }, + { + "epoch": 2.9585798816568047, + "high_lr": 0.00040789473684210524, + "low_lr": 8.157894736842106e-06, + "step": 1125 + }, + { + "epoch": 2.9612097304404994, + "grad_norm": 1.2898083925247192, + "learning_rate": 0.00040736842105263164, + "loss": 1.4433, + "step": 1126 + }, + { + "epoch": 2.9612097304404994, + "high_lr": 0.00040736842105263164, + "low_lr": 8.147368421052633e-06, + "step": 1126 + }, + { + "epoch": 2.9612097304404994, + "high_lr": 0.00040736842105263164, + "low_lr": 8.147368421052633e-06, + "step": 1126 + }, + { + "epoch": 2.9612097304404994, + "high_lr": 0.00040736842105263164, + "low_lr": 8.147368421052633e-06, + "step": 1126 + }, + { + "epoch": 2.9612097304404994, + "high_lr": 0.00040736842105263164, + "low_lr": 8.147368421052633e-06, + "step": 1126 + }, + { + "epoch": 2.9612097304404994, + "high_lr": 0.00040736842105263164, + "low_lr": 8.147368421052633e-06, + "step": 1126 + }, + { + "epoch": 2.9612097304404994, + "high_lr": 0.00040736842105263164, + "low_lr": 8.147368421052633e-06, + "step": 1126 + }, + { + "epoch": 2.9612097304404994, + "high_lr": 0.00040736842105263164, + "low_lr": 8.147368421052633e-06, + "step": 1126 + }, + { + "epoch": 2.9612097304404994, + "high_lr": 0.00040736842105263164, + "low_lr": 8.147368421052633e-06, + "step": 1126 + }, + { + "epoch": 2.9638395792241945, + "grad_norm": 1.3289254903793335, + "learning_rate": 0.0004068421052631579, + "loss": 1.3839, + "step": 1127 + }, + { + "epoch": 2.9638395792241945, + "high_lr": 0.0004068421052631579, + "low_lr": 8.136842105263159e-06, + "step": 1127 + }, + { + "epoch": 2.9638395792241945, + "high_lr": 0.0004068421052631579, + "low_lr": 8.136842105263159e-06, + "step": 1127 + }, + { + "epoch": 2.9638395792241945, + "high_lr": 0.0004068421052631579, + "low_lr": 8.136842105263159e-06, + "step": 1127 + }, + { + "epoch": 2.9638395792241945, + "high_lr": 0.0004068421052631579, + "low_lr": 8.136842105263159e-06, + "step": 1127 + }, + { + "epoch": 2.9638395792241945, + "high_lr": 0.0004068421052631579, + "low_lr": 8.136842105263159e-06, + "step": 1127 + }, + { + "epoch": 2.9638395792241945, + "high_lr": 0.0004068421052631579, + "low_lr": 8.136842105263159e-06, + "step": 1127 + }, + { + "epoch": 2.9638395792241945, + "high_lr": 0.0004068421052631579, + "low_lr": 8.136842105263159e-06, + "step": 1127 + }, + { + "epoch": 2.9638395792241945, + "high_lr": 0.0004068421052631579, + "low_lr": 8.136842105263159e-06, + "step": 1127 + }, + { + "epoch": 2.9664694280078896, + "grad_norm": 1.3643008470535278, + "learning_rate": 0.0004063157894736842, + "loss": 1.4039, + "step": 1128 + }, + { + "epoch": 2.9664694280078896, + "high_lr": 0.0004063157894736842, + "low_lr": 8.126315789473684e-06, + "step": 1128 + }, + { + "epoch": 2.9664694280078896, + "high_lr": 0.0004063157894736842, + "low_lr": 8.126315789473684e-06, + "step": 1128 + }, + { + "epoch": 2.9664694280078896, + "high_lr": 0.0004063157894736842, + "low_lr": 8.126315789473684e-06, + "step": 1128 + }, + { + "epoch": 2.9664694280078896, + "high_lr": 0.0004063157894736842, + "low_lr": 8.126315789473684e-06, + "step": 1128 + }, + { + "epoch": 2.9664694280078896, + "high_lr": 0.0004063157894736842, + "low_lr": 8.126315789473684e-06, + "step": 1128 + }, + { + "epoch": 2.9664694280078896, + "high_lr": 0.0004063157894736842, + "low_lr": 8.126315789473684e-06, + "step": 1128 + }, + { + "epoch": 2.9664694280078896, + "high_lr": 0.0004063157894736842, + "low_lr": 8.126315789473684e-06, + "step": 1128 + }, + { + "epoch": 2.9664694280078896, + "high_lr": 0.0004063157894736842, + "low_lr": 8.126315789473684e-06, + "step": 1128 + }, + { + "epoch": 2.9690992767915843, + "grad_norm": 1.2966452836990356, + "learning_rate": 0.00040578947368421055, + "loss": 1.3814, + "step": 1129 + }, + { + "epoch": 2.9690992767915843, + "high_lr": 0.00040578947368421055, + "low_lr": 8.115789473684212e-06, + "step": 1129 + }, + { + "epoch": 2.9690992767915843, + "high_lr": 0.00040578947368421055, + "low_lr": 8.115789473684212e-06, + "step": 1129 + }, + { + "epoch": 2.9690992767915843, + "high_lr": 0.00040578947368421055, + "low_lr": 8.115789473684212e-06, + "step": 1129 + }, + { + "epoch": 2.9690992767915843, + "high_lr": 0.00040578947368421055, + "low_lr": 8.115789473684212e-06, + "step": 1129 + }, + { + "epoch": 2.9690992767915843, + "high_lr": 0.00040578947368421055, + "low_lr": 8.115789473684212e-06, + "step": 1129 + }, + { + "epoch": 2.9690992767915843, + "high_lr": 0.00040578947368421055, + "low_lr": 8.115789473684212e-06, + "step": 1129 + }, + { + "epoch": 2.9690992767915843, + "high_lr": 0.00040578947368421055, + "low_lr": 8.115789473684212e-06, + "step": 1129 + }, + { + "epoch": 2.9690992767915843, + "high_lr": 0.00040578947368421055, + "low_lr": 8.115789473684212e-06, + "step": 1129 + }, + { + "epoch": 2.9717291255752794, + "grad_norm": 1.4507331848144531, + "learning_rate": 0.00040526315789473684, + "loss": 1.3616, + "step": 1130 + }, + { + "epoch": 2.9717291255752794, + "high_lr": 0.00040526315789473684, + "low_lr": 8.105263157894736e-06, + "step": 1130 + }, + { + "epoch": 2.9717291255752794, + "high_lr": 0.00040526315789473684, + "low_lr": 8.105263157894736e-06, + "step": 1130 + }, + { + "epoch": 2.9717291255752794, + "high_lr": 0.00040526315789473684, + "low_lr": 8.105263157894736e-06, + "step": 1130 + }, + { + "epoch": 2.9717291255752794, + "high_lr": 0.00040526315789473684, + "low_lr": 8.105263157894736e-06, + "step": 1130 + }, + { + "epoch": 2.9717291255752794, + "high_lr": 0.00040526315789473684, + "low_lr": 8.105263157894736e-06, + "step": 1130 + }, + { + "epoch": 2.9717291255752794, + "high_lr": 0.00040526315789473684, + "low_lr": 8.105263157894736e-06, + "step": 1130 + }, + { + "epoch": 2.9717291255752794, + "high_lr": 0.00040526315789473684, + "low_lr": 8.105263157894736e-06, + "step": 1130 + }, + { + "epoch": 2.9717291255752794, + "high_lr": 0.00040526315789473684, + "low_lr": 8.105263157894736e-06, + "step": 1130 + }, + { + "epoch": 2.9743589743589745, + "grad_norm": 1.286329984664917, + "learning_rate": 0.0004047368421052632, + "loss": 1.466, + "step": 1131 + }, + { + "epoch": 2.9743589743589745, + "high_lr": 0.0004047368421052632, + "low_lr": 8.094736842105264e-06, + "step": 1131 + }, + { + "epoch": 2.9743589743589745, + "high_lr": 0.0004047368421052632, + "low_lr": 8.094736842105264e-06, + "step": 1131 + }, + { + "epoch": 2.9743589743589745, + "high_lr": 0.0004047368421052632, + "low_lr": 8.094736842105264e-06, + "step": 1131 + }, + { + "epoch": 2.9743589743589745, + "high_lr": 0.0004047368421052632, + "low_lr": 8.094736842105264e-06, + "step": 1131 + }, + { + "epoch": 2.9743589743589745, + "high_lr": 0.0004047368421052632, + "low_lr": 8.094736842105264e-06, + "step": 1131 + }, + { + "epoch": 2.9743589743589745, + "high_lr": 0.0004047368421052632, + "low_lr": 8.094736842105264e-06, + "step": 1131 + }, + { + "epoch": 2.9743589743589745, + "high_lr": 0.0004047368421052632, + "low_lr": 8.094736842105264e-06, + "step": 1131 + }, + { + "epoch": 2.9743589743589745, + "high_lr": 0.0004047368421052632, + "low_lr": 8.094736842105264e-06, + "step": 1131 + }, + { + "epoch": 2.976988823142669, + "grad_norm": 1.3008004426956177, + "learning_rate": 0.00040421052631578946, + "loss": 1.3937, + "step": 1132 + }, + { + "epoch": 2.976988823142669, + "high_lr": 0.00040421052631578946, + "low_lr": 8.08421052631579e-06, + "step": 1132 + }, + { + "epoch": 2.976988823142669, + "high_lr": 0.00040421052631578946, + "low_lr": 8.08421052631579e-06, + "step": 1132 + }, + { + "epoch": 2.976988823142669, + "high_lr": 0.00040421052631578946, + "low_lr": 8.08421052631579e-06, + "step": 1132 + }, + { + "epoch": 2.976988823142669, + "high_lr": 0.00040421052631578946, + "low_lr": 8.08421052631579e-06, + "step": 1132 + }, + { + "epoch": 2.976988823142669, + "high_lr": 0.00040421052631578946, + "low_lr": 8.08421052631579e-06, + "step": 1132 + }, + { + "epoch": 2.976988823142669, + "high_lr": 0.00040421052631578946, + "low_lr": 8.08421052631579e-06, + "step": 1132 + }, + { + "epoch": 2.976988823142669, + "high_lr": 0.00040421052631578946, + "low_lr": 8.08421052631579e-06, + "step": 1132 + }, + { + "epoch": 2.976988823142669, + "high_lr": 0.00040421052631578946, + "low_lr": 8.08421052631579e-06, + "step": 1132 + }, + { + "epoch": 2.979618671926364, + "grad_norm": 1.2866188287734985, + "learning_rate": 0.0004036842105263158, + "loss": 1.4485, + "step": 1133 + }, + { + "epoch": 2.979618671926364, + "high_lr": 0.0004036842105263158, + "low_lr": 8.073684210526317e-06, + "step": 1133 + }, + { + "epoch": 2.979618671926364, + "high_lr": 0.0004036842105263158, + "low_lr": 8.073684210526317e-06, + "step": 1133 + }, + { + "epoch": 2.979618671926364, + "high_lr": 0.0004036842105263158, + "low_lr": 8.073684210526317e-06, + "step": 1133 + }, + { + "epoch": 2.979618671926364, + "high_lr": 0.0004036842105263158, + "low_lr": 8.073684210526317e-06, + "step": 1133 + }, + { + "epoch": 2.979618671926364, + "high_lr": 0.0004036842105263158, + "low_lr": 8.073684210526317e-06, + "step": 1133 + }, + { + "epoch": 2.979618671926364, + "high_lr": 0.0004036842105263158, + "low_lr": 8.073684210526317e-06, + "step": 1133 + }, + { + "epoch": 2.979618671926364, + "high_lr": 0.0004036842105263158, + "low_lr": 8.073684210526317e-06, + "step": 1133 + }, + { + "epoch": 2.979618671926364, + "high_lr": 0.0004036842105263158, + "low_lr": 8.073684210526317e-06, + "step": 1133 + }, + { + "epoch": 2.9822485207100593, + "grad_norm": 1.3842809200286865, + "learning_rate": 0.0004031578947368421, + "loss": 1.4056, + "step": 1134 + }, + { + "epoch": 2.9822485207100593, + "high_lr": 0.0004031578947368421, + "low_lr": 8.063157894736843e-06, + "step": 1134 + }, + { + "epoch": 2.9822485207100593, + "high_lr": 0.0004031578947368421, + "low_lr": 8.063157894736843e-06, + "step": 1134 + }, + { + "epoch": 2.9822485207100593, + "high_lr": 0.0004031578947368421, + "low_lr": 8.063157894736843e-06, + "step": 1134 + }, + { + "epoch": 2.9822485207100593, + "high_lr": 0.0004031578947368421, + "low_lr": 8.063157894736843e-06, + "step": 1134 + }, + { + "epoch": 2.9822485207100593, + "high_lr": 0.0004031578947368421, + "low_lr": 8.063157894736843e-06, + "step": 1134 + }, + { + "epoch": 2.9822485207100593, + "high_lr": 0.0004031578947368421, + "low_lr": 8.063157894736843e-06, + "step": 1134 + }, + { + "epoch": 2.9822485207100593, + "high_lr": 0.0004031578947368421, + "low_lr": 8.063157894736843e-06, + "step": 1134 + }, + { + "epoch": 2.9822485207100593, + "high_lr": 0.0004031578947368421, + "low_lr": 8.063157894736843e-06, + "step": 1134 + }, + { + "epoch": 2.984878369493754, + "grad_norm": 1.3081294298171997, + "learning_rate": 0.00040263157894736843, + "loss": 1.3381, + "step": 1135 + }, + { + "epoch": 2.984878369493754, + "high_lr": 0.00040263157894736843, + "low_lr": 8.052631578947368e-06, + "step": 1135 + }, + { + "epoch": 2.984878369493754, + "high_lr": 0.00040263157894736843, + "low_lr": 8.052631578947368e-06, + "step": 1135 + }, + { + "epoch": 2.984878369493754, + "high_lr": 0.00040263157894736843, + "low_lr": 8.052631578947368e-06, + "step": 1135 + }, + { + "epoch": 2.984878369493754, + "high_lr": 0.00040263157894736843, + "low_lr": 8.052631578947368e-06, + "step": 1135 + }, + { + "epoch": 2.984878369493754, + "high_lr": 0.00040263157894736843, + "low_lr": 8.052631578947368e-06, + "step": 1135 + }, + { + "epoch": 2.984878369493754, + "high_lr": 0.00040263157894736843, + "low_lr": 8.052631578947368e-06, + "step": 1135 + }, + { + "epoch": 2.984878369493754, + "high_lr": 0.00040263157894736843, + "low_lr": 8.052631578947368e-06, + "step": 1135 + }, + { + "epoch": 2.984878369493754, + "high_lr": 0.00040263157894736843, + "low_lr": 8.052631578947368e-06, + "step": 1135 + }, + { + "epoch": 2.987508218277449, + "grad_norm": 1.2561933994293213, + "learning_rate": 0.00040210526315789477, + "loss": 1.371, + "step": 1136 + }, + { + "epoch": 2.987508218277449, + "high_lr": 0.00040210526315789477, + "low_lr": 8.042105263157896e-06, + "step": 1136 + }, + { + "epoch": 2.987508218277449, + "high_lr": 0.00040210526315789477, + "low_lr": 8.042105263157896e-06, + "step": 1136 + }, + { + "epoch": 2.987508218277449, + "high_lr": 0.00040210526315789477, + "low_lr": 8.042105263157896e-06, + "step": 1136 + }, + { + "epoch": 2.987508218277449, + "high_lr": 0.00040210526315789477, + "low_lr": 8.042105263157896e-06, + "step": 1136 + }, + { + "epoch": 2.987508218277449, + "high_lr": 0.00040210526315789477, + "low_lr": 8.042105263157896e-06, + "step": 1136 + }, + { + "epoch": 2.987508218277449, + "high_lr": 0.00040210526315789477, + "low_lr": 8.042105263157896e-06, + "step": 1136 + }, + { + "epoch": 2.987508218277449, + "high_lr": 0.00040210526315789477, + "low_lr": 8.042105263157896e-06, + "step": 1136 + }, + { + "epoch": 2.987508218277449, + "high_lr": 0.00040210526315789477, + "low_lr": 8.042105263157896e-06, + "step": 1136 + }, + { + "epoch": 2.990138067061144, + "grad_norm": 1.2555829286575317, + "learning_rate": 0.00040157894736842105, + "loss": 1.3887, + "step": 1137 + }, + { + "epoch": 2.990138067061144, + "high_lr": 0.00040157894736842105, + "low_lr": 8.03157894736842e-06, + "step": 1137 + }, + { + "epoch": 2.990138067061144, + "high_lr": 0.00040157894736842105, + "low_lr": 8.03157894736842e-06, + "step": 1137 + }, + { + "epoch": 2.990138067061144, + "high_lr": 0.00040157894736842105, + "low_lr": 8.03157894736842e-06, + "step": 1137 + }, + { + "epoch": 2.990138067061144, + "high_lr": 0.00040157894736842105, + "low_lr": 8.03157894736842e-06, + "step": 1137 + }, + { + "epoch": 2.990138067061144, + "high_lr": 0.00040157894736842105, + "low_lr": 8.03157894736842e-06, + "step": 1137 + }, + { + "epoch": 2.990138067061144, + "high_lr": 0.00040157894736842105, + "low_lr": 8.03157894736842e-06, + "step": 1137 + }, + { + "epoch": 2.990138067061144, + "high_lr": 0.00040157894736842105, + "low_lr": 8.03157894736842e-06, + "step": 1137 + }, + { + "epoch": 2.990138067061144, + "high_lr": 0.00040157894736842105, + "low_lr": 8.03157894736842e-06, + "step": 1137 + }, + { + "epoch": 2.992767915844839, + "grad_norm": 1.3083648681640625, + "learning_rate": 0.0004010526315789474, + "loss": 1.4056, + "step": 1138 + }, + { + "epoch": 2.992767915844839, + "high_lr": 0.0004010526315789474, + "low_lr": 8.021052631578949e-06, + "step": 1138 + }, + { + "epoch": 2.992767915844839, + "high_lr": 0.0004010526315789474, + "low_lr": 8.021052631578949e-06, + "step": 1138 + }, + { + "epoch": 2.992767915844839, + "high_lr": 0.0004010526315789474, + "low_lr": 8.021052631578949e-06, + "step": 1138 + }, + { + "epoch": 2.992767915844839, + "high_lr": 0.0004010526315789474, + "low_lr": 8.021052631578949e-06, + "step": 1138 + }, + { + "epoch": 2.992767915844839, + "high_lr": 0.0004010526315789474, + "low_lr": 8.021052631578949e-06, + "step": 1138 + }, + { + "epoch": 2.992767915844839, + "high_lr": 0.0004010526315789474, + "low_lr": 8.021052631578949e-06, + "step": 1138 + }, + { + "epoch": 2.992767915844839, + "high_lr": 0.0004010526315789474, + "low_lr": 8.021052631578949e-06, + "step": 1138 + }, + { + "epoch": 2.992767915844839, + "high_lr": 0.0004010526315789474, + "low_lr": 8.021052631578949e-06, + "step": 1138 + }, + { + "epoch": 2.995397764628534, + "grad_norm": 1.28179132938385, + "learning_rate": 0.0004005263157894737, + "loss": 1.3428, + "step": 1139 + }, + { + "epoch": 2.995397764628534, + "high_lr": 0.0004005263157894737, + "low_lr": 8.010526315789473e-06, + "step": 1139 + }, + { + "epoch": 2.995397764628534, + "high_lr": 0.0004005263157894737, + "low_lr": 8.010526315789473e-06, + "step": 1139 + }, + { + "epoch": 2.995397764628534, + "high_lr": 0.0004005263157894737, + "low_lr": 8.010526315789473e-06, + "step": 1139 + }, + { + "epoch": 2.995397764628534, + "high_lr": 0.0004005263157894737, + "low_lr": 8.010526315789473e-06, + "step": 1139 + }, + { + "epoch": 2.995397764628534, + "high_lr": 0.0004005263157894737, + "low_lr": 8.010526315789473e-06, + "step": 1139 + }, + { + "epoch": 2.995397764628534, + "high_lr": 0.0004005263157894737, + "low_lr": 8.010526315789473e-06, + "step": 1139 + }, + { + "epoch": 2.995397764628534, + "high_lr": 0.0004005263157894737, + "low_lr": 8.010526315789473e-06, + "step": 1139 + }, + { + "epoch": 2.995397764628534, + "high_lr": 0.0004005263157894737, + "low_lr": 8.010526315789473e-06, + "step": 1139 + }, + { + "epoch": 2.998027613412229, + "grad_norm": 1.225212574005127, + "learning_rate": 0.0004, + "loss": 1.38, + "step": 1140 + }, + { + "epoch": 2.998027613412229, + "high_lr": 0.0004, + "low_lr": 8.000000000000001e-06, + "step": 1140 + }, + { + "epoch": 2.998027613412229, + "high_lr": 0.0004, + "low_lr": 8.000000000000001e-06, + "step": 1140 + }, + { + "epoch": 2.998027613412229, + "high_lr": 0.0004, + "low_lr": 8.000000000000001e-06, + "step": 1140 + }, + { + "epoch": 2.998027613412229, + "high_lr": 0.0004, + "low_lr": 8.000000000000001e-06, + "step": 1140 + }, + { + "epoch": 2.998027613412229, + "high_lr": 0.0004, + "low_lr": 8.000000000000001e-06, + "step": 1140 + }, + { + "epoch": 2.998027613412229, + "high_lr": 0.0004, + "low_lr": 8.000000000000001e-06, + "step": 1140 + }, + { + "epoch": 2.998027613412229, + "high_lr": 0.0004, + "low_lr": 8.000000000000001e-06, + "step": 1140 + }, + { + "epoch": 2.998027613412229, + "high_lr": 0.0004, + "low_lr": 8.000000000000001e-06, + "step": 1140 + }, + { + "epoch": 3.0006574621959237, + "grad_norm": 1.3067548274993896, + "learning_rate": 0.0003994736842105263, + "loss": 1.367, + "step": 1141 + }, + { + "epoch": 3.0006574621959237, + "high_lr": 0.0003994736842105263, + "low_lr": 7.989473684210528e-06, + "step": 1141 + }, + { + "epoch": 3.0006574621959237, + "high_lr": 0.0003994736842105263, + "low_lr": 7.989473684210528e-06, + "step": 1141 + }, + { + "epoch": 3.0006574621959237, + "high_lr": 0.0003994736842105263, + "low_lr": 7.989473684210528e-06, + "step": 1141 + }, + { + "epoch": 3.0006574621959237, + "high_lr": 0.0003994736842105263, + "low_lr": 7.989473684210528e-06, + "step": 1141 + }, + { + "epoch": 3.0006574621959237, + "high_lr": 0.0003994736842105263, + "low_lr": 7.989473684210528e-06, + "step": 1141 + }, + { + "epoch": 3.0006574621959237, + "high_lr": 0.0003994736842105263, + "low_lr": 7.989473684210528e-06, + "step": 1141 + }, + { + "epoch": 3.0006574621959237, + "high_lr": 0.0003994736842105263, + "low_lr": 7.989473684210528e-06, + "step": 1141 + }, + { + "epoch": 3.0006574621959237, + "high_lr": 0.0003994736842105263, + "low_lr": 7.989473684210528e-06, + "step": 1141 + }, + { + "epoch": 3.0032873109796188, + "grad_norm": 1.2940045595169067, + "learning_rate": 0.0003989473684210526, + "loss": 1.3059, + "step": 1142 + }, + { + "epoch": 3.0032873109796188, + "high_lr": 0.0003989473684210526, + "low_lr": 7.978947368421052e-06, + "step": 1142 + }, + { + "epoch": 3.0032873109796188, + "high_lr": 0.0003989473684210526, + "low_lr": 7.978947368421052e-06, + "step": 1142 + }, + { + "epoch": 3.0032873109796188, + "high_lr": 0.0003989473684210526, + "low_lr": 7.978947368421052e-06, + "step": 1142 + }, + { + "epoch": 3.0032873109796188, + "high_lr": 0.0003989473684210526, + "low_lr": 7.978947368421052e-06, + "step": 1142 + }, + { + "epoch": 3.0032873109796188, + "high_lr": 0.0003989473684210526, + "low_lr": 7.978947368421052e-06, + "step": 1142 + }, + { + "epoch": 3.0032873109796188, + "high_lr": 0.0003989473684210526, + "low_lr": 7.978947368421052e-06, + "step": 1142 + }, + { + "epoch": 3.0032873109796188, + "high_lr": 0.0003989473684210526, + "low_lr": 7.978947368421052e-06, + "step": 1142 + }, + { + "epoch": 3.0032873109796188, + "high_lr": 0.0003989473684210526, + "low_lr": 7.978947368421052e-06, + "step": 1142 + }, + { + "epoch": 3.0059171597633134, + "grad_norm": 1.3060542345046997, + "learning_rate": 0.000398421052631579, + "loss": 1.3071, + "step": 1143 + }, + { + "epoch": 3.0059171597633134, + "high_lr": 0.000398421052631579, + "low_lr": 7.96842105263158e-06, + "step": 1143 + }, + { + "epoch": 3.0059171597633134, + "high_lr": 0.000398421052631579, + "low_lr": 7.96842105263158e-06, + "step": 1143 + }, + { + "epoch": 3.0059171597633134, + "high_lr": 0.000398421052631579, + "low_lr": 7.96842105263158e-06, + "step": 1143 + }, + { + "epoch": 3.0059171597633134, + "high_lr": 0.000398421052631579, + "low_lr": 7.96842105263158e-06, + "step": 1143 + }, + { + "epoch": 3.0059171597633134, + "high_lr": 0.000398421052631579, + "low_lr": 7.96842105263158e-06, + "step": 1143 + }, + { + "epoch": 3.0059171597633134, + "high_lr": 0.000398421052631579, + "low_lr": 7.96842105263158e-06, + "step": 1143 + }, + { + "epoch": 3.0059171597633134, + "high_lr": 0.000398421052631579, + "low_lr": 7.96842105263158e-06, + "step": 1143 + }, + { + "epoch": 3.0059171597633134, + "high_lr": 0.000398421052631579, + "low_lr": 7.96842105263158e-06, + "step": 1143 + }, + { + "epoch": 3.0085470085470085, + "grad_norm": 1.2439887523651123, + "learning_rate": 0.00039789473684210527, + "loss": 1.2963, + "step": 1144 + }, + { + "epoch": 3.0085470085470085, + "high_lr": 0.00039789473684210527, + "low_lr": 7.957894736842105e-06, + "step": 1144 + }, + { + "epoch": 3.0085470085470085, + "high_lr": 0.00039789473684210527, + "low_lr": 7.957894736842105e-06, + "step": 1144 + }, + { + "epoch": 3.0085470085470085, + "high_lr": 0.00039789473684210527, + "low_lr": 7.957894736842105e-06, + "step": 1144 + }, + { + "epoch": 3.0085470085470085, + "high_lr": 0.00039789473684210527, + "low_lr": 7.957894736842105e-06, + "step": 1144 + }, + { + "epoch": 3.0085470085470085, + "high_lr": 0.00039789473684210527, + "low_lr": 7.957894736842105e-06, + "step": 1144 + }, + { + "epoch": 3.0085470085470085, + "high_lr": 0.00039789473684210527, + "low_lr": 7.957894736842105e-06, + "step": 1144 + }, + { + "epoch": 3.0085470085470085, + "high_lr": 0.00039789473684210527, + "low_lr": 7.957894736842105e-06, + "step": 1144 + }, + { + "epoch": 3.0085470085470085, + "high_lr": 0.00039789473684210527, + "low_lr": 7.957894736842105e-06, + "step": 1144 + }, + { + "epoch": 3.0111768573307036, + "grad_norm": 1.3645330667495728, + "learning_rate": 0.0003973684210526316, + "loss": 1.3169, + "step": 1145 + }, + { + "epoch": 3.0111768573307036, + "high_lr": 0.0003973684210526316, + "low_lr": 7.947368421052633e-06, + "step": 1145 + }, + { + "epoch": 3.0111768573307036, + "high_lr": 0.0003973684210526316, + "low_lr": 7.947368421052633e-06, + "step": 1145 + }, + { + "epoch": 3.0111768573307036, + "high_lr": 0.0003973684210526316, + "low_lr": 7.947368421052633e-06, + "step": 1145 + }, + { + "epoch": 3.0111768573307036, + "high_lr": 0.0003973684210526316, + "low_lr": 7.947368421052633e-06, + "step": 1145 + }, + { + "epoch": 3.0111768573307036, + "high_lr": 0.0003973684210526316, + "low_lr": 7.947368421052633e-06, + "step": 1145 + }, + { + "epoch": 3.0111768573307036, + "high_lr": 0.0003973684210526316, + "low_lr": 7.947368421052633e-06, + "step": 1145 + }, + { + "epoch": 3.0111768573307036, + "high_lr": 0.0003973684210526316, + "low_lr": 7.947368421052633e-06, + "step": 1145 + }, + { + "epoch": 3.0111768573307036, + "high_lr": 0.0003973684210526316, + "low_lr": 7.947368421052633e-06, + "step": 1145 + }, + { + "epoch": 3.0138067061143983, + "grad_norm": 1.377105474472046, + "learning_rate": 0.0003968421052631579, + "loss": 1.2971, + "step": 1146 + }, + { + "epoch": 3.0138067061143983, + "high_lr": 0.0003968421052631579, + "low_lr": 7.936842105263158e-06, + "step": 1146 + }, + { + "epoch": 3.0138067061143983, + "high_lr": 0.0003968421052631579, + "low_lr": 7.936842105263158e-06, + "step": 1146 + }, + { + "epoch": 3.0138067061143983, + "high_lr": 0.0003968421052631579, + "low_lr": 7.936842105263158e-06, + "step": 1146 + }, + { + "epoch": 3.0138067061143983, + "high_lr": 0.0003968421052631579, + "low_lr": 7.936842105263158e-06, + "step": 1146 + }, + { + "epoch": 3.0138067061143983, + "high_lr": 0.0003968421052631579, + "low_lr": 7.936842105263158e-06, + "step": 1146 + }, + { + "epoch": 3.0138067061143983, + "high_lr": 0.0003968421052631579, + "low_lr": 7.936842105263158e-06, + "step": 1146 + }, + { + "epoch": 3.0138067061143983, + "high_lr": 0.0003968421052631579, + "low_lr": 7.936842105263158e-06, + "step": 1146 + }, + { + "epoch": 3.0138067061143983, + "high_lr": 0.0003968421052631579, + "low_lr": 7.936842105263158e-06, + "step": 1146 + }, + { + "epoch": 3.0164365548980934, + "grad_norm": 1.2766972780227661, + "learning_rate": 0.00039631578947368424, + "loss": 1.3058, + "step": 1147 + }, + { + "epoch": 3.0164365548980934, + "high_lr": 0.00039631578947368424, + "low_lr": 7.926315789473686e-06, + "step": 1147 + }, + { + "epoch": 3.0164365548980934, + "high_lr": 0.00039631578947368424, + "low_lr": 7.926315789473686e-06, + "step": 1147 + }, + { + "epoch": 3.0164365548980934, + "high_lr": 0.00039631578947368424, + "low_lr": 7.926315789473686e-06, + "step": 1147 + }, + { + "epoch": 3.0164365548980934, + "high_lr": 0.00039631578947368424, + "low_lr": 7.926315789473686e-06, + "step": 1147 + }, + { + "epoch": 3.0164365548980934, + "high_lr": 0.00039631578947368424, + "low_lr": 7.926315789473686e-06, + "step": 1147 + }, + { + "epoch": 3.0164365548980934, + "high_lr": 0.00039631578947368424, + "low_lr": 7.926315789473686e-06, + "step": 1147 + }, + { + "epoch": 3.0164365548980934, + "high_lr": 0.00039631578947368424, + "low_lr": 7.926315789473686e-06, + "step": 1147 + }, + { + "epoch": 3.0164365548980934, + "high_lr": 0.00039631578947368424, + "low_lr": 7.926315789473686e-06, + "step": 1147 + }, + { + "epoch": 3.0190664036817885, + "grad_norm": 1.2870296239852905, + "learning_rate": 0.0003957894736842105, + "loss": 1.3569, + "step": 1148 + }, + { + "epoch": 3.0190664036817885, + "high_lr": 0.0003957894736842105, + "low_lr": 7.915789473684212e-06, + "step": 1148 + }, + { + "epoch": 3.0190664036817885, + "high_lr": 0.0003957894736842105, + "low_lr": 7.915789473684212e-06, + "step": 1148 + }, + { + "epoch": 3.0190664036817885, + "high_lr": 0.0003957894736842105, + "low_lr": 7.915789473684212e-06, + "step": 1148 + }, + { + "epoch": 3.0190664036817885, + "high_lr": 0.0003957894736842105, + "low_lr": 7.915789473684212e-06, + "step": 1148 + }, + { + "epoch": 3.0190664036817885, + "high_lr": 0.0003957894736842105, + "low_lr": 7.915789473684212e-06, + "step": 1148 + }, + { + "epoch": 3.0190664036817885, + "high_lr": 0.0003957894736842105, + "low_lr": 7.915789473684212e-06, + "step": 1148 + }, + { + "epoch": 3.0190664036817885, + "high_lr": 0.0003957894736842105, + "low_lr": 7.915789473684212e-06, + "step": 1148 + }, + { + "epoch": 3.0190664036817885, + "high_lr": 0.0003957894736842105, + "low_lr": 7.915789473684212e-06, + "step": 1148 + }, + { + "epoch": 3.021696252465483, + "grad_norm": 1.3098727464675903, + "learning_rate": 0.0003952631578947368, + "loss": 1.3159, + "step": 1149 + }, + { + "epoch": 3.021696252465483, + "high_lr": 0.0003952631578947368, + "low_lr": 7.905263157894737e-06, + "step": 1149 + }, + { + "epoch": 3.021696252465483, + "high_lr": 0.0003952631578947368, + "low_lr": 7.905263157894737e-06, + "step": 1149 + }, + { + "epoch": 3.021696252465483, + "high_lr": 0.0003952631578947368, + "low_lr": 7.905263157894737e-06, + "step": 1149 + }, + { + "epoch": 3.021696252465483, + "high_lr": 0.0003952631578947368, + "low_lr": 7.905263157894737e-06, + "step": 1149 + }, + { + "epoch": 3.021696252465483, + "high_lr": 0.0003952631578947368, + "low_lr": 7.905263157894737e-06, + "step": 1149 + }, + { + "epoch": 3.021696252465483, + "high_lr": 0.0003952631578947368, + "low_lr": 7.905263157894737e-06, + "step": 1149 + }, + { + "epoch": 3.021696252465483, + "high_lr": 0.0003952631578947368, + "low_lr": 7.905263157894737e-06, + "step": 1149 + }, + { + "epoch": 3.021696252465483, + "high_lr": 0.0003952631578947368, + "low_lr": 7.905263157894737e-06, + "step": 1149 + }, + { + "epoch": 3.0243261012491782, + "grad_norm": 1.410931944847107, + "learning_rate": 0.00039473684210526315, + "loss": 1.3034, + "step": 1150 + }, + { + "epoch": 3.0243261012491782, + "high_lr": 0.00039473684210526315, + "low_lr": 7.894736842105265e-06, + "step": 1150 + }, + { + "epoch": 3.0243261012491782, + "high_lr": 0.00039473684210526315, + "low_lr": 7.894736842105265e-06, + "step": 1150 + }, + { + "epoch": 3.0243261012491782, + "high_lr": 0.00039473684210526315, + "low_lr": 7.894736842105265e-06, + "step": 1150 + }, + { + "epoch": 3.0243261012491782, + "high_lr": 0.00039473684210526315, + "low_lr": 7.894736842105265e-06, + "step": 1150 + }, + { + "epoch": 3.0243261012491782, + "high_lr": 0.00039473684210526315, + "low_lr": 7.894736842105265e-06, + "step": 1150 + }, + { + "epoch": 3.0243261012491782, + "high_lr": 0.00039473684210526315, + "low_lr": 7.894736842105265e-06, + "step": 1150 + }, + { + "epoch": 3.0243261012491782, + "high_lr": 0.00039473684210526315, + "low_lr": 7.894736842105265e-06, + "step": 1150 + }, + { + "epoch": 3.0243261012491782, + "high_lr": 0.00039473684210526315, + "low_lr": 7.894736842105265e-06, + "step": 1150 + }, + { + "epoch": 3.0269559500328733, + "grad_norm": 1.3159284591674805, + "learning_rate": 0.0003942105263157895, + "loss": 1.2683, + "step": 1151 + }, + { + "epoch": 3.0269559500328733, + "high_lr": 0.0003942105263157895, + "low_lr": 7.88421052631579e-06, + "step": 1151 + }, + { + "epoch": 3.0269559500328733, + "high_lr": 0.0003942105263157895, + "low_lr": 7.88421052631579e-06, + "step": 1151 + }, + { + "epoch": 3.0269559500328733, + "high_lr": 0.0003942105263157895, + "low_lr": 7.88421052631579e-06, + "step": 1151 + }, + { + "epoch": 3.0269559500328733, + "high_lr": 0.0003942105263157895, + "low_lr": 7.88421052631579e-06, + "step": 1151 + }, + { + "epoch": 3.0269559500328733, + "high_lr": 0.0003942105263157895, + "low_lr": 7.88421052631579e-06, + "step": 1151 + }, + { + "epoch": 3.0269559500328733, + "high_lr": 0.0003942105263157895, + "low_lr": 7.88421052631579e-06, + "step": 1151 + }, + { + "epoch": 3.0269559500328733, + "high_lr": 0.0003942105263157895, + "low_lr": 7.88421052631579e-06, + "step": 1151 + }, + { + "epoch": 3.0269559500328733, + "high_lr": 0.0003942105263157895, + "low_lr": 7.88421052631579e-06, + "step": 1151 + }, + { + "epoch": 3.029585798816568, + "grad_norm": 1.3127446174621582, + "learning_rate": 0.00039368421052631583, + "loss": 1.2996, + "step": 1152 + }, + { + "epoch": 3.029585798816568, + "high_lr": 0.00039368421052631583, + "low_lr": 7.873684210526317e-06, + "step": 1152 + }, + { + "epoch": 3.029585798816568, + "high_lr": 0.00039368421052631583, + "low_lr": 7.873684210526317e-06, + "step": 1152 + }, + { + "epoch": 3.029585798816568, + "high_lr": 0.00039368421052631583, + "low_lr": 7.873684210526317e-06, + "step": 1152 + }, + { + "epoch": 3.029585798816568, + "high_lr": 0.00039368421052631583, + "low_lr": 7.873684210526317e-06, + "step": 1152 + }, + { + "epoch": 3.029585798816568, + "high_lr": 0.00039368421052631583, + "low_lr": 7.873684210526317e-06, + "step": 1152 + }, + { + "epoch": 3.029585798816568, + "high_lr": 0.00039368421052631583, + "low_lr": 7.873684210526317e-06, + "step": 1152 + }, + { + "epoch": 3.029585798816568, + "high_lr": 0.00039368421052631583, + "low_lr": 7.873684210526317e-06, + "step": 1152 + }, + { + "epoch": 3.029585798816568, + "high_lr": 0.00039368421052631583, + "low_lr": 7.873684210526317e-06, + "step": 1152 + }, + { + "epoch": 3.032215647600263, + "grad_norm": 1.3679771423339844, + "learning_rate": 0.0003931578947368421, + "loss": 1.3208, + "step": 1153 + }, + { + "epoch": 3.032215647600263, + "high_lr": 0.0003931578947368421, + "low_lr": 7.863157894736842e-06, + "step": 1153 + }, + { + "epoch": 3.032215647600263, + "high_lr": 0.0003931578947368421, + "low_lr": 7.863157894736842e-06, + "step": 1153 + }, + { + "epoch": 3.032215647600263, + "high_lr": 0.0003931578947368421, + "low_lr": 7.863157894736842e-06, + "step": 1153 + }, + { + "epoch": 3.032215647600263, + "high_lr": 0.0003931578947368421, + "low_lr": 7.863157894736842e-06, + "step": 1153 + }, + { + "epoch": 3.032215647600263, + "high_lr": 0.0003931578947368421, + "low_lr": 7.863157894736842e-06, + "step": 1153 + }, + { + "epoch": 3.032215647600263, + "high_lr": 0.0003931578947368421, + "low_lr": 7.863157894736842e-06, + "step": 1153 + }, + { + "epoch": 3.032215647600263, + "high_lr": 0.0003931578947368421, + "low_lr": 7.863157894736842e-06, + "step": 1153 + }, + { + "epoch": 3.032215647600263, + "high_lr": 0.0003931578947368421, + "low_lr": 7.863157894736842e-06, + "step": 1153 + }, + { + "epoch": 3.0348454963839577, + "grad_norm": 1.3075698614120483, + "learning_rate": 0.00039263157894736846, + "loss": 1.3233, + "step": 1154 + }, + { + "epoch": 3.0348454963839577, + "high_lr": 0.00039263157894736846, + "low_lr": 7.85263157894737e-06, + "step": 1154 + }, + { + "epoch": 3.0348454963839577, + "high_lr": 0.00039263157894736846, + "low_lr": 7.85263157894737e-06, + "step": 1154 + }, + { + "epoch": 3.0348454963839577, + "high_lr": 0.00039263157894736846, + "low_lr": 7.85263157894737e-06, + "step": 1154 + }, + { + "epoch": 3.0348454963839577, + "high_lr": 0.00039263157894736846, + "low_lr": 7.85263157894737e-06, + "step": 1154 + }, + { + "epoch": 3.0348454963839577, + "high_lr": 0.00039263157894736846, + "low_lr": 7.85263157894737e-06, + "step": 1154 + }, + { + "epoch": 3.0348454963839577, + "high_lr": 0.00039263157894736846, + "low_lr": 7.85263157894737e-06, + "step": 1154 + }, + { + "epoch": 3.0348454963839577, + "high_lr": 0.00039263157894736846, + "low_lr": 7.85263157894737e-06, + "step": 1154 + }, + { + "epoch": 3.0348454963839577, + "high_lr": 0.00039263157894736846, + "low_lr": 7.85263157894737e-06, + "step": 1154 + }, + { + "epoch": 3.037475345167653, + "grad_norm": 1.3870766162872314, + "learning_rate": 0.00039210526315789474, + "loss": 1.3746, + "step": 1155 + }, + { + "epoch": 3.037475345167653, + "high_lr": 0.00039210526315789474, + "low_lr": 7.842105263157895e-06, + "step": 1155 + }, + { + "epoch": 3.037475345167653, + "high_lr": 0.00039210526315789474, + "low_lr": 7.842105263157895e-06, + "step": 1155 + }, + { + "epoch": 3.037475345167653, + "high_lr": 0.00039210526315789474, + "low_lr": 7.842105263157895e-06, + "step": 1155 + }, + { + "epoch": 3.037475345167653, + "high_lr": 0.00039210526315789474, + "low_lr": 7.842105263157895e-06, + "step": 1155 + }, + { + "epoch": 3.037475345167653, + "high_lr": 0.00039210526315789474, + "low_lr": 7.842105263157895e-06, + "step": 1155 + }, + { + "epoch": 3.037475345167653, + "high_lr": 0.00039210526315789474, + "low_lr": 7.842105263157895e-06, + "step": 1155 + }, + { + "epoch": 3.037475345167653, + "high_lr": 0.00039210526315789474, + "low_lr": 7.842105263157895e-06, + "step": 1155 + }, + { + "epoch": 3.037475345167653, + "high_lr": 0.00039210526315789474, + "low_lr": 7.842105263157895e-06, + "step": 1155 + }, + { + "epoch": 3.040105193951348, + "grad_norm": 1.500157117843628, + "learning_rate": 0.000391578947368421, + "loss": 1.2875, + "step": 1156 + }, + { + "epoch": 3.040105193951348, + "high_lr": 0.000391578947368421, + "low_lr": 7.831578947368421e-06, + "step": 1156 + }, + { + "epoch": 3.040105193951348, + "high_lr": 0.000391578947368421, + "low_lr": 7.831578947368421e-06, + "step": 1156 + }, + { + "epoch": 3.040105193951348, + "high_lr": 0.000391578947368421, + "low_lr": 7.831578947368421e-06, + "step": 1156 + }, + { + "epoch": 3.040105193951348, + "high_lr": 0.000391578947368421, + "low_lr": 7.831578947368421e-06, + "step": 1156 + }, + { + "epoch": 3.040105193951348, + "high_lr": 0.000391578947368421, + "low_lr": 7.831578947368421e-06, + "step": 1156 + }, + { + "epoch": 3.040105193951348, + "high_lr": 0.000391578947368421, + "low_lr": 7.831578947368421e-06, + "step": 1156 + }, + { + "epoch": 3.040105193951348, + "high_lr": 0.000391578947368421, + "low_lr": 7.831578947368421e-06, + "step": 1156 + }, + { + "epoch": 3.040105193951348, + "high_lr": 0.000391578947368421, + "low_lr": 7.831578947368421e-06, + "step": 1156 + }, + { + "epoch": 3.0427350427350426, + "grad_norm": 1.3098334074020386, + "learning_rate": 0.00039105263157894737, + "loss": 1.2819, + "step": 1157 + }, + { + "epoch": 3.0427350427350426, + "high_lr": 0.00039105263157894737, + "low_lr": 7.821052631578949e-06, + "step": 1157 + }, + { + "epoch": 3.0427350427350426, + "high_lr": 0.00039105263157894737, + "low_lr": 7.821052631578949e-06, + "step": 1157 + }, + { + "epoch": 3.0427350427350426, + "high_lr": 0.00039105263157894737, + "low_lr": 7.821052631578949e-06, + "step": 1157 + }, + { + "epoch": 3.0427350427350426, + "high_lr": 0.00039105263157894737, + "low_lr": 7.821052631578949e-06, + "step": 1157 + }, + { + "epoch": 3.0427350427350426, + "high_lr": 0.00039105263157894737, + "low_lr": 7.821052631578949e-06, + "step": 1157 + }, + { + "epoch": 3.0427350427350426, + "high_lr": 0.00039105263157894737, + "low_lr": 7.821052631578949e-06, + "step": 1157 + }, + { + "epoch": 3.0427350427350426, + "high_lr": 0.00039105263157894737, + "low_lr": 7.821052631578949e-06, + "step": 1157 + }, + { + "epoch": 3.0427350427350426, + "high_lr": 0.00039105263157894737, + "low_lr": 7.821052631578949e-06, + "step": 1157 + }, + { + "epoch": 3.0453648915187377, + "grad_norm": 1.370381474494934, + "learning_rate": 0.00039052631578947365, + "loss": 1.3062, + "step": 1158 + }, + { + "epoch": 3.0453648915187377, + "high_lr": 0.00039052631578947365, + "low_lr": 7.810526315789474e-06, + "step": 1158 + }, + { + "epoch": 3.0453648915187377, + "high_lr": 0.00039052631578947365, + "low_lr": 7.810526315789474e-06, + "step": 1158 + }, + { + "epoch": 3.0453648915187377, + "high_lr": 0.00039052631578947365, + "low_lr": 7.810526315789474e-06, + "step": 1158 + }, + { + "epoch": 3.0453648915187377, + "high_lr": 0.00039052631578947365, + "low_lr": 7.810526315789474e-06, + "step": 1158 + }, + { + "epoch": 3.0453648915187377, + "high_lr": 0.00039052631578947365, + "low_lr": 7.810526315789474e-06, + "step": 1158 + }, + { + "epoch": 3.0453648915187377, + "high_lr": 0.00039052631578947365, + "low_lr": 7.810526315789474e-06, + "step": 1158 + }, + { + "epoch": 3.0453648915187377, + "high_lr": 0.00039052631578947365, + "low_lr": 7.810526315789474e-06, + "step": 1158 + }, + { + "epoch": 3.0453648915187377, + "high_lr": 0.00039052631578947365, + "low_lr": 7.810526315789474e-06, + "step": 1158 + }, + { + "epoch": 3.047994740302433, + "grad_norm": 1.270233154296875, + "learning_rate": 0.00039000000000000005, + "loss": 1.3884, + "step": 1159 + }, + { + "epoch": 3.047994740302433, + "high_lr": 0.00039000000000000005, + "low_lr": 7.800000000000002e-06, + "step": 1159 + }, + { + "epoch": 3.047994740302433, + "high_lr": 0.00039000000000000005, + "low_lr": 7.800000000000002e-06, + "step": 1159 + }, + { + "epoch": 3.047994740302433, + "high_lr": 0.00039000000000000005, + "low_lr": 7.800000000000002e-06, + "step": 1159 + }, + { + "epoch": 3.047994740302433, + "high_lr": 0.00039000000000000005, + "low_lr": 7.800000000000002e-06, + "step": 1159 + }, + { + "epoch": 3.047994740302433, + "high_lr": 0.00039000000000000005, + "low_lr": 7.800000000000002e-06, + "step": 1159 + }, + { + "epoch": 3.047994740302433, + "high_lr": 0.00039000000000000005, + "low_lr": 7.800000000000002e-06, + "step": 1159 + }, + { + "epoch": 3.047994740302433, + "high_lr": 0.00039000000000000005, + "low_lr": 7.800000000000002e-06, + "step": 1159 + }, + { + "epoch": 3.047994740302433, + "high_lr": 0.00039000000000000005, + "low_lr": 7.800000000000002e-06, + "step": 1159 + }, + { + "epoch": 3.0506245890861274, + "grad_norm": 1.2626560926437378, + "learning_rate": 0.00038947368421052633, + "loss": 1.3011, + "step": 1160 + }, + { + "epoch": 3.0506245890861274, + "high_lr": 0.00038947368421052633, + "low_lr": 7.789473684210526e-06, + "step": 1160 + }, + { + "epoch": 3.0506245890861274, + "high_lr": 0.00038947368421052633, + "low_lr": 7.789473684210526e-06, + "step": 1160 + }, + { + "epoch": 3.0506245890861274, + "high_lr": 0.00038947368421052633, + "low_lr": 7.789473684210526e-06, + "step": 1160 + }, + { + "epoch": 3.0506245890861274, + "high_lr": 0.00038947368421052633, + "low_lr": 7.789473684210526e-06, + "step": 1160 + }, + { + "epoch": 3.0506245890861274, + "high_lr": 0.00038947368421052633, + "low_lr": 7.789473684210526e-06, + "step": 1160 + }, + { + "epoch": 3.0506245890861274, + "high_lr": 0.00038947368421052633, + "low_lr": 7.789473684210526e-06, + "step": 1160 + }, + { + "epoch": 3.0506245890861274, + "high_lr": 0.00038947368421052633, + "low_lr": 7.789473684210526e-06, + "step": 1160 + }, + { + "epoch": 3.0506245890861274, + "high_lr": 0.00038947368421052633, + "low_lr": 7.789473684210526e-06, + "step": 1160 + }, + { + "epoch": 3.0532544378698225, + "grad_norm": 1.2719833850860596, + "learning_rate": 0.0003889473684210527, + "loss": 1.3133, + "step": 1161 + }, + { + "epoch": 3.0532544378698225, + "high_lr": 0.0003889473684210527, + "low_lr": 7.778947368421054e-06, + "step": 1161 + }, + { + "epoch": 3.0532544378698225, + "high_lr": 0.0003889473684210527, + "low_lr": 7.778947368421054e-06, + "step": 1161 + }, + { + "epoch": 3.0532544378698225, + "high_lr": 0.0003889473684210527, + "low_lr": 7.778947368421054e-06, + "step": 1161 + }, + { + "epoch": 3.0532544378698225, + "high_lr": 0.0003889473684210527, + "low_lr": 7.778947368421054e-06, + "step": 1161 + }, + { + "epoch": 3.0532544378698225, + "high_lr": 0.0003889473684210527, + "low_lr": 7.778947368421054e-06, + "step": 1161 + }, + { + "epoch": 3.0532544378698225, + "high_lr": 0.0003889473684210527, + "low_lr": 7.778947368421054e-06, + "step": 1161 + }, + { + "epoch": 3.0532544378698225, + "high_lr": 0.0003889473684210527, + "low_lr": 7.778947368421054e-06, + "step": 1161 + }, + { + "epoch": 3.0532544378698225, + "high_lr": 0.0003889473684210527, + "low_lr": 7.778947368421054e-06, + "step": 1161 + }, + { + "epoch": 3.0558842866535176, + "grad_norm": 1.2456934452056885, + "learning_rate": 0.00038842105263157896, + "loss": 1.3058, + "step": 1162 + }, + { + "epoch": 3.0558842866535176, + "high_lr": 0.00038842105263157896, + "low_lr": 7.768421052631579e-06, + "step": 1162 + }, + { + "epoch": 3.0558842866535176, + "high_lr": 0.00038842105263157896, + "low_lr": 7.768421052631579e-06, + "step": 1162 + }, + { + "epoch": 3.0558842866535176, + "high_lr": 0.00038842105263157896, + "low_lr": 7.768421052631579e-06, + "step": 1162 + }, + { + "epoch": 3.0558842866535176, + "high_lr": 0.00038842105263157896, + "low_lr": 7.768421052631579e-06, + "step": 1162 + }, + { + "epoch": 3.0558842866535176, + "high_lr": 0.00038842105263157896, + "low_lr": 7.768421052631579e-06, + "step": 1162 + }, + { + "epoch": 3.0558842866535176, + "high_lr": 0.00038842105263157896, + "low_lr": 7.768421052631579e-06, + "step": 1162 + }, + { + "epoch": 3.0558842866535176, + "high_lr": 0.00038842105263157896, + "low_lr": 7.768421052631579e-06, + "step": 1162 + }, + { + "epoch": 3.0558842866535176, + "high_lr": 0.00038842105263157896, + "low_lr": 7.768421052631579e-06, + "step": 1162 + }, + { + "epoch": 3.0585141354372123, + "grad_norm": 1.3651379346847534, + "learning_rate": 0.00038789473684210524, + "loss": 1.2902, + "step": 1163 + }, + { + "epoch": 3.0585141354372123, + "high_lr": 0.00038789473684210524, + "low_lr": 7.757894736842105e-06, + "step": 1163 + }, + { + "epoch": 3.0585141354372123, + "high_lr": 0.00038789473684210524, + "low_lr": 7.757894736842105e-06, + "step": 1163 + }, + { + "epoch": 3.0585141354372123, + "high_lr": 0.00038789473684210524, + "low_lr": 7.757894736842105e-06, + "step": 1163 + }, + { + "epoch": 3.0585141354372123, + "high_lr": 0.00038789473684210524, + "low_lr": 7.757894736842105e-06, + "step": 1163 + }, + { + "epoch": 3.0585141354372123, + "high_lr": 0.00038789473684210524, + "low_lr": 7.757894736842105e-06, + "step": 1163 + }, + { + "epoch": 3.0585141354372123, + "high_lr": 0.00038789473684210524, + "low_lr": 7.757894736842105e-06, + "step": 1163 + }, + { + "epoch": 3.0585141354372123, + "high_lr": 0.00038789473684210524, + "low_lr": 7.757894736842105e-06, + "step": 1163 + }, + { + "epoch": 3.0585141354372123, + "high_lr": 0.00038789473684210524, + "low_lr": 7.757894736842105e-06, + "step": 1163 + }, + { + "epoch": 3.0611439842209074, + "grad_norm": 1.316124677658081, + "learning_rate": 0.0003873684210526316, + "loss": 1.2918, + "step": 1164 + }, + { + "epoch": 3.0611439842209074, + "high_lr": 0.0003873684210526316, + "low_lr": 7.747368421052631e-06, + "step": 1164 + }, + { + "epoch": 3.0611439842209074, + "high_lr": 0.0003873684210526316, + "low_lr": 7.747368421052631e-06, + "step": 1164 + }, + { + "epoch": 3.0611439842209074, + "high_lr": 0.0003873684210526316, + "low_lr": 7.747368421052631e-06, + "step": 1164 + }, + { + "epoch": 3.0611439842209074, + "high_lr": 0.0003873684210526316, + "low_lr": 7.747368421052631e-06, + "step": 1164 + }, + { + "epoch": 3.0611439842209074, + "high_lr": 0.0003873684210526316, + "low_lr": 7.747368421052631e-06, + "step": 1164 + }, + { + "epoch": 3.0611439842209074, + "high_lr": 0.0003873684210526316, + "low_lr": 7.747368421052631e-06, + "step": 1164 + }, + { + "epoch": 3.0611439842209074, + "high_lr": 0.0003873684210526316, + "low_lr": 7.747368421052631e-06, + "step": 1164 + }, + { + "epoch": 3.0611439842209074, + "high_lr": 0.0003873684210526316, + "low_lr": 7.747368421052631e-06, + "step": 1164 + }, + { + "epoch": 3.063773833004602, + "grad_norm": 1.3913902044296265, + "learning_rate": 0.00038684210526315787, + "loss": 1.2928, + "step": 1165 + }, + { + "epoch": 3.063773833004602, + "high_lr": 0.00038684210526315787, + "low_lr": 7.736842105263158e-06, + "step": 1165 + }, + { + "epoch": 3.063773833004602, + "high_lr": 0.00038684210526315787, + "low_lr": 7.736842105263158e-06, + "step": 1165 + }, + { + "epoch": 3.063773833004602, + "high_lr": 0.00038684210526315787, + "low_lr": 7.736842105263158e-06, + "step": 1165 + }, + { + "epoch": 3.063773833004602, + "high_lr": 0.00038684210526315787, + "low_lr": 7.736842105263158e-06, + "step": 1165 + }, + { + "epoch": 3.063773833004602, + "high_lr": 0.00038684210526315787, + "low_lr": 7.736842105263158e-06, + "step": 1165 + }, + { + "epoch": 3.063773833004602, + "high_lr": 0.00038684210526315787, + "low_lr": 7.736842105263158e-06, + "step": 1165 + }, + { + "epoch": 3.063773833004602, + "high_lr": 0.00038684210526315787, + "low_lr": 7.736842105263158e-06, + "step": 1165 + }, + { + "epoch": 3.063773833004602, + "high_lr": 0.00038684210526315787, + "low_lr": 7.736842105263158e-06, + "step": 1165 + }, + { + "epoch": 3.066403681788297, + "grad_norm": 1.357170581817627, + "learning_rate": 0.0003863157894736842, + "loss": 1.2819, + "step": 1166 + }, + { + "epoch": 3.066403681788297, + "high_lr": 0.0003863157894736842, + "low_lr": 7.726315789473686e-06, + "step": 1166 + }, + { + "epoch": 3.066403681788297, + "high_lr": 0.0003863157894736842, + "low_lr": 7.726315789473686e-06, + "step": 1166 + }, + { + "epoch": 3.066403681788297, + "high_lr": 0.0003863157894736842, + "low_lr": 7.726315789473686e-06, + "step": 1166 + }, + { + "epoch": 3.066403681788297, + "high_lr": 0.0003863157894736842, + "low_lr": 7.726315789473686e-06, + "step": 1166 + }, + { + "epoch": 3.066403681788297, + "high_lr": 0.0003863157894736842, + "low_lr": 7.726315789473686e-06, + "step": 1166 + }, + { + "epoch": 3.066403681788297, + "high_lr": 0.0003863157894736842, + "low_lr": 7.726315789473686e-06, + "step": 1166 + }, + { + "epoch": 3.066403681788297, + "high_lr": 0.0003863157894736842, + "low_lr": 7.726315789473686e-06, + "step": 1166 + }, + { + "epoch": 3.066403681788297, + "high_lr": 0.0003863157894736842, + "low_lr": 7.726315789473686e-06, + "step": 1166 + }, + { + "epoch": 3.0690335305719922, + "grad_norm": 1.297806978225708, + "learning_rate": 0.0003857894736842105, + "loss": 1.2749, + "step": 1167 + }, + { + "epoch": 3.0690335305719922, + "high_lr": 0.0003857894736842105, + "low_lr": 7.71578947368421e-06, + "step": 1167 + }, + { + "epoch": 3.0690335305719922, + "high_lr": 0.0003857894736842105, + "low_lr": 7.71578947368421e-06, + "step": 1167 + }, + { + "epoch": 3.0690335305719922, + "high_lr": 0.0003857894736842105, + "low_lr": 7.71578947368421e-06, + "step": 1167 + }, + { + "epoch": 3.0690335305719922, + "high_lr": 0.0003857894736842105, + "low_lr": 7.71578947368421e-06, + "step": 1167 + }, + { + "epoch": 3.0690335305719922, + "high_lr": 0.0003857894736842105, + "low_lr": 7.71578947368421e-06, + "step": 1167 + }, + { + "epoch": 3.0690335305719922, + "high_lr": 0.0003857894736842105, + "low_lr": 7.71578947368421e-06, + "step": 1167 + }, + { + "epoch": 3.0690335305719922, + "high_lr": 0.0003857894736842105, + "low_lr": 7.71578947368421e-06, + "step": 1167 + }, + { + "epoch": 3.0690335305719922, + "high_lr": 0.0003857894736842105, + "low_lr": 7.71578947368421e-06, + "step": 1167 + }, + { + "epoch": 3.071663379355687, + "grad_norm": 1.3861886262893677, + "learning_rate": 0.0003852631578947369, + "loss": 1.265, + "step": 1168 + }, + { + "epoch": 3.071663379355687, + "high_lr": 0.0003852631578947369, + "low_lr": 7.705263157894738e-06, + "step": 1168 + }, + { + "epoch": 3.071663379355687, + "high_lr": 0.0003852631578947369, + "low_lr": 7.705263157894738e-06, + "step": 1168 + }, + { + "epoch": 3.071663379355687, + "high_lr": 0.0003852631578947369, + "low_lr": 7.705263157894738e-06, + "step": 1168 + }, + { + "epoch": 3.071663379355687, + "high_lr": 0.0003852631578947369, + "low_lr": 7.705263157894738e-06, + "step": 1168 + }, + { + "epoch": 3.071663379355687, + "high_lr": 0.0003852631578947369, + "low_lr": 7.705263157894738e-06, + "step": 1168 + }, + { + "epoch": 3.071663379355687, + "high_lr": 0.0003852631578947369, + "low_lr": 7.705263157894738e-06, + "step": 1168 + }, + { + "epoch": 3.071663379355687, + "high_lr": 0.0003852631578947369, + "low_lr": 7.705263157894738e-06, + "step": 1168 + }, + { + "epoch": 3.071663379355687, + "high_lr": 0.0003852631578947369, + "low_lr": 7.705263157894738e-06, + "step": 1168 + }, + { + "epoch": 3.074293228139382, + "grad_norm": 1.303112268447876, + "learning_rate": 0.0003847368421052632, + "loss": 1.2866, + "step": 1169 + }, + { + "epoch": 3.074293228139382, + "high_lr": 0.0003847368421052632, + "low_lr": 7.694736842105263e-06, + "step": 1169 + }, + { + "epoch": 3.074293228139382, + "high_lr": 0.0003847368421052632, + "low_lr": 7.694736842105263e-06, + "step": 1169 + }, + { + "epoch": 3.074293228139382, + "high_lr": 0.0003847368421052632, + "low_lr": 7.694736842105263e-06, + "step": 1169 + }, + { + "epoch": 3.074293228139382, + "high_lr": 0.0003847368421052632, + "low_lr": 7.694736842105263e-06, + "step": 1169 + }, + { + "epoch": 3.074293228139382, + "high_lr": 0.0003847368421052632, + "low_lr": 7.694736842105263e-06, + "step": 1169 + }, + { + "epoch": 3.074293228139382, + "high_lr": 0.0003847368421052632, + "low_lr": 7.694736842105263e-06, + "step": 1169 + }, + { + "epoch": 3.074293228139382, + "high_lr": 0.0003847368421052632, + "low_lr": 7.694736842105263e-06, + "step": 1169 + }, + { + "epoch": 3.074293228139382, + "high_lr": 0.0003847368421052632, + "low_lr": 7.694736842105263e-06, + "step": 1169 + }, + { + "epoch": 3.076923076923077, + "grad_norm": 1.435673713684082, + "learning_rate": 0.00038421052631578946, + "loss": 1.3403, + "step": 1170 + }, + { + "epoch": 3.076923076923077, + "high_lr": 0.00038421052631578946, + "low_lr": 7.68421052631579e-06, + "step": 1170 + }, + { + "epoch": 3.076923076923077, + "high_lr": 0.00038421052631578946, + "low_lr": 7.68421052631579e-06, + "step": 1170 + }, + { + "epoch": 3.076923076923077, + "high_lr": 0.00038421052631578946, + "low_lr": 7.68421052631579e-06, + "step": 1170 + }, + { + "epoch": 3.076923076923077, + "high_lr": 0.00038421052631578946, + "low_lr": 7.68421052631579e-06, + "step": 1170 + }, + { + "epoch": 3.076923076923077, + "high_lr": 0.00038421052631578946, + "low_lr": 7.68421052631579e-06, + "step": 1170 + }, + { + "epoch": 3.076923076923077, + "high_lr": 0.00038421052631578946, + "low_lr": 7.68421052631579e-06, + "step": 1170 + }, + { + "epoch": 3.076923076923077, + "high_lr": 0.00038421052631578946, + "low_lr": 7.68421052631579e-06, + "step": 1170 + }, + { + "epoch": 3.076923076923077, + "high_lr": 0.00038421052631578946, + "low_lr": 7.68421052631579e-06, + "step": 1170 + }, + { + "epoch": 3.0795529257067717, + "grad_norm": 1.348922848701477, + "learning_rate": 0.0003836842105263158, + "loss": 1.2902, + "step": 1171 + }, + { + "epoch": 3.0795529257067717, + "high_lr": 0.0003836842105263158, + "low_lr": 7.673684210526316e-06, + "step": 1171 + }, + { + "epoch": 3.0795529257067717, + "high_lr": 0.0003836842105263158, + "low_lr": 7.673684210526316e-06, + "step": 1171 + }, + { + "epoch": 3.0795529257067717, + "high_lr": 0.0003836842105263158, + "low_lr": 7.673684210526316e-06, + "step": 1171 + }, + { + "epoch": 3.0795529257067717, + "high_lr": 0.0003836842105263158, + "low_lr": 7.673684210526316e-06, + "step": 1171 + }, + { + "epoch": 3.0795529257067717, + "high_lr": 0.0003836842105263158, + "low_lr": 7.673684210526316e-06, + "step": 1171 + }, + { + "epoch": 3.0795529257067717, + "high_lr": 0.0003836842105263158, + "low_lr": 7.673684210526316e-06, + "step": 1171 + }, + { + "epoch": 3.0795529257067717, + "high_lr": 0.0003836842105263158, + "low_lr": 7.673684210526316e-06, + "step": 1171 + }, + { + "epoch": 3.0795529257067717, + "high_lr": 0.0003836842105263158, + "low_lr": 7.673684210526316e-06, + "step": 1171 + }, + { + "epoch": 3.082182774490467, + "grad_norm": 1.350555658340454, + "learning_rate": 0.0003831578947368421, + "loss": 1.3351, + "step": 1172 + }, + { + "epoch": 3.082182774490467, + "high_lr": 0.0003831578947368421, + "low_lr": 7.663157894736842e-06, + "step": 1172 + }, + { + "epoch": 3.082182774490467, + "high_lr": 0.0003831578947368421, + "low_lr": 7.663157894736842e-06, + "step": 1172 + }, + { + "epoch": 3.082182774490467, + "high_lr": 0.0003831578947368421, + "low_lr": 7.663157894736842e-06, + "step": 1172 + }, + { + "epoch": 3.082182774490467, + "high_lr": 0.0003831578947368421, + "low_lr": 7.663157894736842e-06, + "step": 1172 + }, + { + "epoch": 3.082182774490467, + "high_lr": 0.0003831578947368421, + "low_lr": 7.663157894736842e-06, + "step": 1172 + }, + { + "epoch": 3.082182774490467, + "high_lr": 0.0003831578947368421, + "low_lr": 7.663157894736842e-06, + "step": 1172 + }, + { + "epoch": 3.082182774490467, + "high_lr": 0.0003831578947368421, + "low_lr": 7.663157894736842e-06, + "step": 1172 + }, + { + "epoch": 3.082182774490467, + "high_lr": 0.0003831578947368421, + "low_lr": 7.663157894736842e-06, + "step": 1172 + }, + { + "epoch": 3.084812623274162, + "grad_norm": 1.331555724143982, + "learning_rate": 0.00038263157894736843, + "loss": 1.2961, + "step": 1173 + }, + { + "epoch": 3.084812623274162, + "high_lr": 0.00038263157894736843, + "low_lr": 7.65263157894737e-06, + "step": 1173 + }, + { + "epoch": 3.084812623274162, + "high_lr": 0.00038263157894736843, + "low_lr": 7.65263157894737e-06, + "step": 1173 + }, + { + "epoch": 3.084812623274162, + "high_lr": 0.00038263157894736843, + "low_lr": 7.65263157894737e-06, + "step": 1173 + }, + { + "epoch": 3.084812623274162, + "high_lr": 0.00038263157894736843, + "low_lr": 7.65263157894737e-06, + "step": 1173 + }, + { + "epoch": 3.084812623274162, + "high_lr": 0.00038263157894736843, + "low_lr": 7.65263157894737e-06, + "step": 1173 + }, + { + "epoch": 3.084812623274162, + "high_lr": 0.00038263157894736843, + "low_lr": 7.65263157894737e-06, + "step": 1173 + }, + { + "epoch": 3.084812623274162, + "high_lr": 0.00038263157894736843, + "low_lr": 7.65263157894737e-06, + "step": 1173 + }, + { + "epoch": 3.084812623274162, + "high_lr": 0.00038263157894736843, + "low_lr": 7.65263157894737e-06, + "step": 1173 + }, + { + "epoch": 3.0874424720578566, + "grad_norm": 1.455815315246582, + "learning_rate": 0.0003821052631578947, + "loss": 1.2662, + "step": 1174 + }, + { + "epoch": 3.0874424720578566, + "high_lr": 0.0003821052631578947, + "low_lr": 7.642105263157895e-06, + "step": 1174 + }, + { + "epoch": 3.0874424720578566, + "high_lr": 0.0003821052631578947, + "low_lr": 7.642105263157895e-06, + "step": 1174 + }, + { + "epoch": 3.0874424720578566, + "high_lr": 0.0003821052631578947, + "low_lr": 7.642105263157895e-06, + "step": 1174 + }, + { + "epoch": 3.0874424720578566, + "high_lr": 0.0003821052631578947, + "low_lr": 7.642105263157895e-06, + "step": 1174 + }, + { + "epoch": 3.0874424720578566, + "high_lr": 0.0003821052631578947, + "low_lr": 7.642105263157895e-06, + "step": 1174 + }, + { + "epoch": 3.0874424720578566, + "high_lr": 0.0003821052631578947, + "low_lr": 7.642105263157895e-06, + "step": 1174 + }, + { + "epoch": 3.0874424720578566, + "high_lr": 0.0003821052631578947, + "low_lr": 7.642105263157895e-06, + "step": 1174 + }, + { + "epoch": 3.0874424720578566, + "high_lr": 0.0003821052631578947, + "low_lr": 7.642105263157895e-06, + "step": 1174 + }, + { + "epoch": 3.0900723208415517, + "grad_norm": 1.3014329671859741, + "learning_rate": 0.00038157894736842105, + "loss": 1.337, + "step": 1175 + }, + { + "epoch": 3.0900723208415517, + "high_lr": 0.00038157894736842105, + "low_lr": 7.631578947368423e-06, + "step": 1175 + }, + { + "epoch": 3.0900723208415517, + "high_lr": 0.00038157894736842105, + "low_lr": 7.631578947368423e-06, + "step": 1175 + }, + { + "epoch": 3.0900723208415517, + "high_lr": 0.00038157894736842105, + "low_lr": 7.631578947368423e-06, + "step": 1175 + }, + { + "epoch": 3.0900723208415517, + "high_lr": 0.00038157894736842105, + "low_lr": 7.631578947368423e-06, + "step": 1175 + }, + { + "epoch": 3.0900723208415517, + "high_lr": 0.00038157894736842105, + "low_lr": 7.631578947368423e-06, + "step": 1175 + }, + { + "epoch": 3.0900723208415517, + "high_lr": 0.00038157894736842105, + "low_lr": 7.631578947368423e-06, + "step": 1175 + }, + { + "epoch": 3.0900723208415517, + "high_lr": 0.00038157894736842105, + "low_lr": 7.631578947368423e-06, + "step": 1175 + }, + { + "epoch": 3.0900723208415517, + "high_lr": 0.00038157894736842105, + "low_lr": 7.631578947368423e-06, + "step": 1175 + }, + { + "epoch": 3.0927021696252464, + "grad_norm": 1.3321278095245361, + "learning_rate": 0.0003810526315789474, + "loss": 1.3101, + "step": 1176 + }, + { + "epoch": 3.0927021696252464, + "high_lr": 0.0003810526315789474, + "low_lr": 7.621052631578948e-06, + "step": 1176 + }, + { + "epoch": 3.0927021696252464, + "high_lr": 0.0003810526315789474, + "low_lr": 7.621052631578948e-06, + "step": 1176 + }, + { + "epoch": 3.0927021696252464, + "high_lr": 0.0003810526315789474, + "low_lr": 7.621052631578948e-06, + "step": 1176 + }, + { + "epoch": 3.0927021696252464, + "high_lr": 0.0003810526315789474, + "low_lr": 7.621052631578948e-06, + "step": 1176 + }, + { + "epoch": 3.0927021696252464, + "high_lr": 0.0003810526315789474, + "low_lr": 7.621052631578948e-06, + "step": 1176 + }, + { + "epoch": 3.0927021696252464, + "high_lr": 0.0003810526315789474, + "low_lr": 7.621052631578948e-06, + "step": 1176 + }, + { + "epoch": 3.0927021696252464, + "high_lr": 0.0003810526315789474, + "low_lr": 7.621052631578948e-06, + "step": 1176 + }, + { + "epoch": 3.0927021696252464, + "high_lr": 0.0003810526315789474, + "low_lr": 7.621052631578948e-06, + "step": 1176 + }, + { + "epoch": 3.0953320184089415, + "grad_norm": 1.3796805143356323, + "learning_rate": 0.0003805263157894737, + "loss": 1.325, + "step": 1177 + }, + { + "epoch": 3.0953320184089415, + "high_lr": 0.0003805263157894737, + "low_lr": 7.610526315789474e-06, + "step": 1177 + }, + { + "epoch": 3.0953320184089415, + "high_lr": 0.0003805263157894737, + "low_lr": 7.610526315789474e-06, + "step": 1177 + }, + { + "epoch": 3.0953320184089415, + "high_lr": 0.0003805263157894737, + "low_lr": 7.610526315789474e-06, + "step": 1177 + }, + { + "epoch": 3.0953320184089415, + "high_lr": 0.0003805263157894737, + "low_lr": 7.610526315789474e-06, + "step": 1177 + }, + { + "epoch": 3.0953320184089415, + "high_lr": 0.0003805263157894737, + "low_lr": 7.610526315789474e-06, + "step": 1177 + }, + { + "epoch": 3.0953320184089415, + "high_lr": 0.0003805263157894737, + "low_lr": 7.610526315789474e-06, + "step": 1177 + }, + { + "epoch": 3.0953320184089415, + "high_lr": 0.0003805263157894737, + "low_lr": 7.610526315789474e-06, + "step": 1177 + }, + { + "epoch": 3.0953320184089415, + "high_lr": 0.0003805263157894737, + "low_lr": 7.610526315789474e-06, + "step": 1177 + }, + { + "epoch": 3.0979618671926366, + "grad_norm": 1.3283296823501587, + "learning_rate": 0.00038, + "loss": 1.3269, + "step": 1178 + }, + { + "epoch": 3.0979618671926366, + "high_lr": 0.00038, + "low_lr": 7.600000000000001e-06, + "step": 1178 + }, + { + "epoch": 3.0979618671926366, + "high_lr": 0.00038, + "low_lr": 7.600000000000001e-06, + "step": 1178 + }, + { + "epoch": 3.0979618671926366, + "high_lr": 0.00038, + "low_lr": 7.600000000000001e-06, + "step": 1178 + }, + { + "epoch": 3.0979618671926366, + "high_lr": 0.00038, + "low_lr": 7.600000000000001e-06, + "step": 1178 + }, + { + "epoch": 3.0979618671926366, + "high_lr": 0.00038, + "low_lr": 7.600000000000001e-06, + "step": 1178 + }, + { + "epoch": 3.0979618671926366, + "high_lr": 0.00038, + "low_lr": 7.600000000000001e-06, + "step": 1178 + }, + { + "epoch": 3.0979618671926366, + "high_lr": 0.00038, + "low_lr": 7.600000000000001e-06, + "step": 1178 + }, + { + "epoch": 3.0979618671926366, + "high_lr": 0.00038, + "low_lr": 7.600000000000001e-06, + "step": 1178 + }, + { + "epoch": 3.100591715976331, + "grad_norm": 1.359445571899414, + "learning_rate": 0.0003794736842105263, + "loss": 1.2569, + "step": 1179 + }, + { + "epoch": 3.100591715976331, + "high_lr": 0.0003794736842105263, + "low_lr": 7.589473684210526e-06, + "step": 1179 + }, + { + "epoch": 3.100591715976331, + "high_lr": 0.0003794736842105263, + "low_lr": 7.589473684210526e-06, + "step": 1179 + }, + { + "epoch": 3.100591715976331, + "high_lr": 0.0003794736842105263, + "low_lr": 7.589473684210526e-06, + "step": 1179 + }, + { + "epoch": 3.100591715976331, + "high_lr": 0.0003794736842105263, + "low_lr": 7.589473684210526e-06, + "step": 1179 + }, + { + "epoch": 3.100591715976331, + "high_lr": 0.0003794736842105263, + "low_lr": 7.589473684210526e-06, + "step": 1179 + }, + { + "epoch": 3.100591715976331, + "high_lr": 0.0003794736842105263, + "low_lr": 7.589473684210526e-06, + "step": 1179 + }, + { + "epoch": 3.100591715976331, + "high_lr": 0.0003794736842105263, + "low_lr": 7.589473684210526e-06, + "step": 1179 + }, + { + "epoch": 3.100591715976331, + "high_lr": 0.0003794736842105263, + "low_lr": 7.589473684210526e-06, + "step": 1179 + }, + { + "epoch": 3.1032215647600263, + "grad_norm": 1.3396329879760742, + "learning_rate": 0.00037894736842105265, + "loss": 1.3353, + "step": 1180 + }, + { + "epoch": 3.1032215647600263, + "high_lr": 0.00037894736842105265, + "low_lr": 7.578947368421054e-06, + "step": 1180 + }, + { + "epoch": 3.1032215647600263, + "high_lr": 0.00037894736842105265, + "low_lr": 7.578947368421054e-06, + "step": 1180 + }, + { + "epoch": 3.1032215647600263, + "high_lr": 0.00037894736842105265, + "low_lr": 7.578947368421054e-06, + "step": 1180 + }, + { + "epoch": 3.1032215647600263, + "high_lr": 0.00037894736842105265, + "low_lr": 7.578947368421054e-06, + "step": 1180 + }, + { + "epoch": 3.1032215647600263, + "high_lr": 0.00037894736842105265, + "low_lr": 7.578947368421054e-06, + "step": 1180 + }, + { + "epoch": 3.1032215647600263, + "high_lr": 0.00037894736842105265, + "low_lr": 7.578947368421054e-06, + "step": 1180 + }, + { + "epoch": 3.1032215647600263, + "high_lr": 0.00037894736842105265, + "low_lr": 7.578947368421054e-06, + "step": 1180 + }, + { + "epoch": 3.1032215647600263, + "high_lr": 0.00037894736842105265, + "low_lr": 7.578947368421054e-06, + "step": 1180 + }, + { + "epoch": 3.1058514135437214, + "grad_norm": 1.2817565202713013, + "learning_rate": 0.00037842105263157893, + "loss": 1.3289, + "step": 1181 + }, + { + "epoch": 3.1058514135437214, + "high_lr": 0.00037842105263157893, + "low_lr": 7.568421052631579e-06, + "step": 1181 + }, + { + "epoch": 3.1058514135437214, + "high_lr": 0.00037842105263157893, + "low_lr": 7.568421052631579e-06, + "step": 1181 + }, + { + "epoch": 3.1058514135437214, + "high_lr": 0.00037842105263157893, + "low_lr": 7.568421052631579e-06, + "step": 1181 + }, + { + "epoch": 3.1058514135437214, + "high_lr": 0.00037842105263157893, + "low_lr": 7.568421052631579e-06, + "step": 1181 + }, + { + "epoch": 3.1058514135437214, + "high_lr": 0.00037842105263157893, + "low_lr": 7.568421052631579e-06, + "step": 1181 + }, + { + "epoch": 3.1058514135437214, + "high_lr": 0.00037842105263157893, + "low_lr": 7.568421052631579e-06, + "step": 1181 + }, + { + "epoch": 3.1058514135437214, + "high_lr": 0.00037842105263157893, + "low_lr": 7.568421052631579e-06, + "step": 1181 + }, + { + "epoch": 3.1058514135437214, + "high_lr": 0.00037842105263157893, + "low_lr": 7.568421052631579e-06, + "step": 1181 + }, + { + "epoch": 3.108481262327416, + "grad_norm": 1.3851240873336792, + "learning_rate": 0.00037789473684210527, + "loss": 1.337, + "step": 1182 + }, + { + "epoch": 3.108481262327416, + "high_lr": 0.00037789473684210527, + "low_lr": 7.557894736842106e-06, + "step": 1182 + }, + { + "epoch": 3.108481262327416, + "high_lr": 0.00037789473684210527, + "low_lr": 7.557894736842106e-06, + "step": 1182 + }, + { + "epoch": 3.108481262327416, + "high_lr": 0.00037789473684210527, + "low_lr": 7.557894736842106e-06, + "step": 1182 + }, + { + "epoch": 3.108481262327416, + "high_lr": 0.00037789473684210527, + "low_lr": 7.557894736842106e-06, + "step": 1182 + }, + { + "epoch": 3.108481262327416, + "high_lr": 0.00037789473684210527, + "low_lr": 7.557894736842106e-06, + "step": 1182 + }, + { + "epoch": 3.108481262327416, + "high_lr": 0.00037789473684210527, + "low_lr": 7.557894736842106e-06, + "step": 1182 + }, + { + "epoch": 3.108481262327416, + "high_lr": 0.00037789473684210527, + "low_lr": 7.557894736842106e-06, + "step": 1182 + }, + { + "epoch": 3.108481262327416, + "high_lr": 0.00037789473684210527, + "low_lr": 7.557894736842106e-06, + "step": 1182 + }, + { + "epoch": 3.111111111111111, + "grad_norm": 1.2566713094711304, + "learning_rate": 0.00037736842105263156, + "loss": 1.2824, + "step": 1183 + }, + { + "epoch": 3.111111111111111, + "high_lr": 0.00037736842105263156, + "low_lr": 7.547368421052632e-06, + "step": 1183 + }, + { + "epoch": 3.111111111111111, + "high_lr": 0.00037736842105263156, + "low_lr": 7.547368421052632e-06, + "step": 1183 + }, + { + "epoch": 3.111111111111111, + "high_lr": 0.00037736842105263156, + "low_lr": 7.547368421052632e-06, + "step": 1183 + }, + { + "epoch": 3.111111111111111, + "high_lr": 0.00037736842105263156, + "low_lr": 7.547368421052632e-06, + "step": 1183 + }, + { + "epoch": 3.111111111111111, + "high_lr": 0.00037736842105263156, + "low_lr": 7.547368421052632e-06, + "step": 1183 + }, + { + "epoch": 3.111111111111111, + "high_lr": 0.00037736842105263156, + "low_lr": 7.547368421052632e-06, + "step": 1183 + }, + { + "epoch": 3.111111111111111, + "high_lr": 0.00037736842105263156, + "low_lr": 7.547368421052632e-06, + "step": 1183 + }, + { + "epoch": 3.111111111111111, + "high_lr": 0.00037736842105263156, + "low_lr": 7.547368421052632e-06, + "step": 1183 + }, + { + "epoch": 3.1137409598948063, + "grad_norm": 1.3147213459014893, + "learning_rate": 0.0003768421052631579, + "loss": 1.2508, + "step": 1184 + }, + { + "epoch": 3.1137409598948063, + "high_lr": 0.0003768421052631579, + "low_lr": 7.536842105263158e-06, + "step": 1184 + }, + { + "epoch": 3.1137409598948063, + "high_lr": 0.0003768421052631579, + "low_lr": 7.536842105263158e-06, + "step": 1184 + }, + { + "epoch": 3.1137409598948063, + "high_lr": 0.0003768421052631579, + "low_lr": 7.536842105263158e-06, + "step": 1184 + }, + { + "epoch": 3.1137409598948063, + "high_lr": 0.0003768421052631579, + "low_lr": 7.536842105263158e-06, + "step": 1184 + }, + { + "epoch": 3.1137409598948063, + "high_lr": 0.0003768421052631579, + "low_lr": 7.536842105263158e-06, + "step": 1184 + }, + { + "epoch": 3.1137409598948063, + "high_lr": 0.0003768421052631579, + "low_lr": 7.536842105263158e-06, + "step": 1184 + }, + { + "epoch": 3.1137409598948063, + "high_lr": 0.0003768421052631579, + "low_lr": 7.536842105263158e-06, + "step": 1184 + }, + { + "epoch": 3.1137409598948063, + "high_lr": 0.0003768421052631579, + "low_lr": 7.536842105263158e-06, + "step": 1184 + }, + { + "epoch": 3.116370808678501, + "grad_norm": 1.3407189846038818, + "learning_rate": 0.00037631578947368424, + "loss": 1.2757, + "step": 1185 + }, + { + "epoch": 3.116370808678501, + "high_lr": 0.00037631578947368424, + "low_lr": 7.526315789473685e-06, + "step": 1185 + }, + { + "epoch": 3.116370808678501, + "high_lr": 0.00037631578947368424, + "low_lr": 7.526315789473685e-06, + "step": 1185 + }, + { + "epoch": 3.116370808678501, + "high_lr": 0.00037631578947368424, + "low_lr": 7.526315789473685e-06, + "step": 1185 + }, + { + "epoch": 3.116370808678501, + "high_lr": 0.00037631578947368424, + "low_lr": 7.526315789473685e-06, + "step": 1185 + }, + { + "epoch": 3.116370808678501, + "high_lr": 0.00037631578947368424, + "low_lr": 7.526315789473685e-06, + "step": 1185 + }, + { + "epoch": 3.116370808678501, + "high_lr": 0.00037631578947368424, + "low_lr": 7.526315789473685e-06, + "step": 1185 + }, + { + "epoch": 3.116370808678501, + "high_lr": 0.00037631578947368424, + "low_lr": 7.526315789473685e-06, + "step": 1185 + }, + { + "epoch": 3.116370808678501, + "high_lr": 0.00037631578947368424, + "low_lr": 7.526315789473685e-06, + "step": 1185 + }, + { + "epoch": 3.119000657462196, + "grad_norm": 1.3812859058380127, + "learning_rate": 0.0003757894736842105, + "loss": 1.2975, + "step": 1186 + }, + { + "epoch": 3.119000657462196, + "high_lr": 0.0003757894736842105, + "low_lr": 7.515789473684211e-06, + "step": 1186 + }, + { + "epoch": 3.119000657462196, + "high_lr": 0.0003757894736842105, + "low_lr": 7.515789473684211e-06, + "step": 1186 + }, + { + "epoch": 3.119000657462196, + "high_lr": 0.0003757894736842105, + "low_lr": 7.515789473684211e-06, + "step": 1186 + }, + { + "epoch": 3.119000657462196, + "high_lr": 0.0003757894736842105, + "low_lr": 7.515789473684211e-06, + "step": 1186 + }, + { + "epoch": 3.119000657462196, + "high_lr": 0.0003757894736842105, + "low_lr": 7.515789473684211e-06, + "step": 1186 + }, + { + "epoch": 3.119000657462196, + "high_lr": 0.0003757894736842105, + "low_lr": 7.515789473684211e-06, + "step": 1186 + }, + { + "epoch": 3.119000657462196, + "high_lr": 0.0003757894736842105, + "low_lr": 7.515789473684211e-06, + "step": 1186 + }, + { + "epoch": 3.119000657462196, + "high_lr": 0.0003757894736842105, + "low_lr": 7.515789473684211e-06, + "step": 1186 + }, + { + "epoch": 3.1216305062458907, + "grad_norm": 1.363731861114502, + "learning_rate": 0.00037526315789473686, + "loss": 1.2942, + "step": 1187 + }, + { + "epoch": 3.1216305062458907, + "high_lr": 0.00037526315789473686, + "low_lr": 7.505263157894738e-06, + "step": 1187 + }, + { + "epoch": 3.1216305062458907, + "high_lr": 0.00037526315789473686, + "low_lr": 7.505263157894738e-06, + "step": 1187 + }, + { + "epoch": 3.1216305062458907, + "high_lr": 0.00037526315789473686, + "low_lr": 7.505263157894738e-06, + "step": 1187 + }, + { + "epoch": 3.1216305062458907, + "high_lr": 0.00037526315789473686, + "low_lr": 7.505263157894738e-06, + "step": 1187 + }, + { + "epoch": 3.1216305062458907, + "high_lr": 0.00037526315789473686, + "low_lr": 7.505263157894738e-06, + "step": 1187 + }, + { + "epoch": 3.1216305062458907, + "high_lr": 0.00037526315789473686, + "low_lr": 7.505263157894738e-06, + "step": 1187 + }, + { + "epoch": 3.1216305062458907, + "high_lr": 0.00037526315789473686, + "low_lr": 7.505263157894738e-06, + "step": 1187 + }, + { + "epoch": 3.1216305062458907, + "high_lr": 0.00037526315789473686, + "low_lr": 7.505263157894738e-06, + "step": 1187 + }, + { + "epoch": 3.1242603550295858, + "grad_norm": 1.3423538208007812, + "learning_rate": 0.00037473684210526315, + "loss": 1.2962, + "step": 1188 + }, + { + "epoch": 3.1242603550295858, + "high_lr": 0.00037473684210526315, + "low_lr": 7.494736842105263e-06, + "step": 1188 + }, + { + "epoch": 3.1242603550295858, + "high_lr": 0.00037473684210526315, + "low_lr": 7.494736842105263e-06, + "step": 1188 + }, + { + "epoch": 3.1242603550295858, + "high_lr": 0.00037473684210526315, + "low_lr": 7.494736842105263e-06, + "step": 1188 + }, + { + "epoch": 3.1242603550295858, + "high_lr": 0.00037473684210526315, + "low_lr": 7.494736842105263e-06, + "step": 1188 + }, + { + "epoch": 3.1242603550295858, + "high_lr": 0.00037473684210526315, + "low_lr": 7.494736842105263e-06, + "step": 1188 + }, + { + "epoch": 3.1242603550295858, + "high_lr": 0.00037473684210526315, + "low_lr": 7.494736842105263e-06, + "step": 1188 + }, + { + "epoch": 3.1242603550295858, + "high_lr": 0.00037473684210526315, + "low_lr": 7.494736842105263e-06, + "step": 1188 + }, + { + "epoch": 3.1242603550295858, + "high_lr": 0.00037473684210526315, + "low_lr": 7.494736842105263e-06, + "step": 1188 + }, + { + "epoch": 3.126890203813281, + "grad_norm": 1.337683081626892, + "learning_rate": 0.0003742105263157895, + "loss": 1.327, + "step": 1189 + }, + { + "epoch": 3.126890203813281, + "high_lr": 0.0003742105263157895, + "low_lr": 7.4842105263157905e-06, + "step": 1189 + }, + { + "epoch": 3.126890203813281, + "high_lr": 0.0003742105263157895, + "low_lr": 7.4842105263157905e-06, + "step": 1189 + }, + { + "epoch": 3.126890203813281, + "high_lr": 0.0003742105263157895, + "low_lr": 7.4842105263157905e-06, + "step": 1189 + }, + { + "epoch": 3.126890203813281, + "high_lr": 0.0003742105263157895, + "low_lr": 7.4842105263157905e-06, + "step": 1189 + }, + { + "epoch": 3.126890203813281, + "high_lr": 0.0003742105263157895, + "low_lr": 7.4842105263157905e-06, + "step": 1189 + }, + { + "epoch": 3.126890203813281, + "high_lr": 0.0003742105263157895, + "low_lr": 7.4842105263157905e-06, + "step": 1189 + }, + { + "epoch": 3.126890203813281, + "high_lr": 0.0003742105263157895, + "low_lr": 7.4842105263157905e-06, + "step": 1189 + }, + { + "epoch": 3.126890203813281, + "high_lr": 0.0003742105263157895, + "low_lr": 7.4842105263157905e-06, + "step": 1189 + }, + { + "epoch": 3.1295200525969755, + "grad_norm": 1.4115649461746216, + "learning_rate": 0.0003736842105263158, + "loss": 1.2936, + "step": 1190 + }, + { + "epoch": 3.1295200525969755, + "high_lr": 0.0003736842105263158, + "low_lr": 7.473684210526316e-06, + "step": 1190 + }, + { + "epoch": 3.1295200525969755, + "high_lr": 0.0003736842105263158, + "low_lr": 7.473684210526316e-06, + "step": 1190 + }, + { + "epoch": 3.1295200525969755, + "high_lr": 0.0003736842105263158, + "low_lr": 7.473684210526316e-06, + "step": 1190 + }, + { + "epoch": 3.1295200525969755, + "high_lr": 0.0003736842105263158, + "low_lr": 7.473684210526316e-06, + "step": 1190 + }, + { + "epoch": 3.1295200525969755, + "high_lr": 0.0003736842105263158, + "low_lr": 7.473684210526316e-06, + "step": 1190 + }, + { + "epoch": 3.1295200525969755, + "high_lr": 0.0003736842105263158, + "low_lr": 7.473684210526316e-06, + "step": 1190 + }, + { + "epoch": 3.1295200525969755, + "high_lr": 0.0003736842105263158, + "low_lr": 7.473684210526316e-06, + "step": 1190 + }, + { + "epoch": 3.1295200525969755, + "high_lr": 0.0003736842105263158, + "low_lr": 7.473684210526316e-06, + "step": 1190 + }, + { + "epoch": 3.1321499013806706, + "grad_norm": 1.3720990419387817, + "learning_rate": 0.0003731578947368421, + "loss": 1.2764, + "step": 1191 + }, + { + "epoch": 3.1321499013806706, + "high_lr": 0.0003731578947368421, + "low_lr": 7.463157894736843e-06, + "step": 1191 + }, + { + "epoch": 3.1321499013806706, + "high_lr": 0.0003731578947368421, + "low_lr": 7.463157894736843e-06, + "step": 1191 + }, + { + "epoch": 3.1321499013806706, + "high_lr": 0.0003731578947368421, + "low_lr": 7.463157894736843e-06, + "step": 1191 + }, + { + "epoch": 3.1321499013806706, + "high_lr": 0.0003731578947368421, + "low_lr": 7.463157894736843e-06, + "step": 1191 + }, + { + "epoch": 3.1321499013806706, + "high_lr": 0.0003731578947368421, + "low_lr": 7.463157894736843e-06, + "step": 1191 + }, + { + "epoch": 3.1321499013806706, + "high_lr": 0.0003731578947368421, + "low_lr": 7.463157894736843e-06, + "step": 1191 + }, + { + "epoch": 3.1321499013806706, + "high_lr": 0.0003731578947368421, + "low_lr": 7.463157894736843e-06, + "step": 1191 + }, + { + "epoch": 3.1321499013806706, + "high_lr": 0.0003731578947368421, + "low_lr": 7.463157894736843e-06, + "step": 1191 + }, + { + "epoch": 3.1347797501643657, + "grad_norm": 1.5742392539978027, + "learning_rate": 0.00037263157894736846, + "loss": 1.3159, + "step": 1192 + }, + { + "epoch": 3.1347797501643657, + "high_lr": 0.00037263157894736846, + "low_lr": 7.4526315789473695e-06, + "step": 1192 + }, + { + "epoch": 3.1347797501643657, + "high_lr": 0.00037263157894736846, + "low_lr": 7.4526315789473695e-06, + "step": 1192 + }, + { + "epoch": 3.1347797501643657, + "high_lr": 0.00037263157894736846, + "low_lr": 7.4526315789473695e-06, + "step": 1192 + }, + { + "epoch": 3.1347797501643657, + "high_lr": 0.00037263157894736846, + "low_lr": 7.4526315789473695e-06, + "step": 1192 + }, + { + "epoch": 3.1347797501643657, + "high_lr": 0.00037263157894736846, + "low_lr": 7.4526315789473695e-06, + "step": 1192 + }, + { + "epoch": 3.1347797501643657, + "high_lr": 0.00037263157894736846, + "low_lr": 7.4526315789473695e-06, + "step": 1192 + }, + { + "epoch": 3.1347797501643657, + "high_lr": 0.00037263157894736846, + "low_lr": 7.4526315789473695e-06, + "step": 1192 + }, + { + "epoch": 3.1347797501643657, + "high_lr": 0.00037263157894736846, + "low_lr": 7.4526315789473695e-06, + "step": 1192 + }, + { + "epoch": 3.1374095989480604, + "grad_norm": 1.5012247562408447, + "learning_rate": 0.00037210526315789474, + "loss": 1.28, + "step": 1193 + }, + { + "epoch": 3.1374095989480604, + "high_lr": 0.00037210526315789474, + "low_lr": 7.442105263157895e-06, + "step": 1193 + }, + { + "epoch": 3.1374095989480604, + "high_lr": 0.00037210526315789474, + "low_lr": 7.442105263157895e-06, + "step": 1193 + }, + { + "epoch": 3.1374095989480604, + "high_lr": 0.00037210526315789474, + "low_lr": 7.442105263157895e-06, + "step": 1193 + }, + { + "epoch": 3.1374095989480604, + "high_lr": 0.00037210526315789474, + "low_lr": 7.442105263157895e-06, + "step": 1193 + }, + { + "epoch": 3.1374095989480604, + "high_lr": 0.00037210526315789474, + "low_lr": 7.442105263157895e-06, + "step": 1193 + }, + { + "epoch": 3.1374095989480604, + "high_lr": 0.00037210526315789474, + "low_lr": 7.442105263157895e-06, + "step": 1193 + }, + { + "epoch": 3.1374095989480604, + "high_lr": 0.00037210526315789474, + "low_lr": 7.442105263157895e-06, + "step": 1193 + }, + { + "epoch": 3.1374095989480604, + "high_lr": 0.00037210526315789474, + "low_lr": 7.442105263157895e-06, + "step": 1193 + }, + { + "epoch": 3.1400394477317555, + "grad_norm": 1.3024135828018188, + "learning_rate": 0.0003715789473684211, + "loss": 1.284, + "step": 1194 + }, + { + "epoch": 3.1400394477317555, + "high_lr": 0.0003715789473684211, + "low_lr": 7.431578947368422e-06, + "step": 1194 + }, + { + "epoch": 3.1400394477317555, + "high_lr": 0.0003715789473684211, + "low_lr": 7.431578947368422e-06, + "step": 1194 + }, + { + "epoch": 3.1400394477317555, + "high_lr": 0.0003715789473684211, + "low_lr": 7.431578947368422e-06, + "step": 1194 + }, + { + "epoch": 3.1400394477317555, + "high_lr": 0.0003715789473684211, + "low_lr": 7.431578947368422e-06, + "step": 1194 + }, + { + "epoch": 3.1400394477317555, + "high_lr": 0.0003715789473684211, + "low_lr": 7.431578947368422e-06, + "step": 1194 + }, + { + "epoch": 3.1400394477317555, + "high_lr": 0.0003715789473684211, + "low_lr": 7.431578947368422e-06, + "step": 1194 + }, + { + "epoch": 3.1400394477317555, + "high_lr": 0.0003715789473684211, + "low_lr": 7.431578947368422e-06, + "step": 1194 + }, + { + "epoch": 3.1400394477317555, + "high_lr": 0.0003715789473684211, + "low_lr": 7.431578947368422e-06, + "step": 1194 + }, + { + "epoch": 3.14266929651545, + "grad_norm": 1.489599585533142, + "learning_rate": 0.00037105263157894737, + "loss": 1.2985, + "step": 1195 + }, + { + "epoch": 3.14266929651545, + "high_lr": 0.00037105263157894737, + "low_lr": 7.421052631578948e-06, + "step": 1195 + }, + { + "epoch": 3.14266929651545, + "high_lr": 0.00037105263157894737, + "low_lr": 7.421052631578948e-06, + "step": 1195 + }, + { + "epoch": 3.14266929651545, + "high_lr": 0.00037105263157894737, + "low_lr": 7.421052631578948e-06, + "step": 1195 + }, + { + "epoch": 3.14266929651545, + "high_lr": 0.00037105263157894737, + "low_lr": 7.421052631578948e-06, + "step": 1195 + }, + { + "epoch": 3.14266929651545, + "high_lr": 0.00037105263157894737, + "low_lr": 7.421052631578948e-06, + "step": 1195 + }, + { + "epoch": 3.14266929651545, + "high_lr": 0.00037105263157894737, + "low_lr": 7.421052631578948e-06, + "step": 1195 + }, + { + "epoch": 3.14266929651545, + "high_lr": 0.00037105263157894737, + "low_lr": 7.421052631578948e-06, + "step": 1195 + }, + { + "epoch": 3.14266929651545, + "high_lr": 0.00037105263157894737, + "low_lr": 7.421052631578948e-06, + "step": 1195 + }, + { + "epoch": 3.1452991452991452, + "grad_norm": 1.2693541049957275, + "learning_rate": 0.0003705263157894737, + "loss": 1.2973, + "step": 1196 + }, + { + "epoch": 3.1452991452991452, + "high_lr": 0.0003705263157894737, + "low_lr": 7.410526315789475e-06, + "step": 1196 + }, + { + "epoch": 3.1452991452991452, + "high_lr": 0.0003705263157894737, + "low_lr": 7.410526315789475e-06, + "step": 1196 + }, + { + "epoch": 3.1452991452991452, + "high_lr": 0.0003705263157894737, + "low_lr": 7.410526315789475e-06, + "step": 1196 + }, + { + "epoch": 3.1452991452991452, + "high_lr": 0.0003705263157894737, + "low_lr": 7.410526315789475e-06, + "step": 1196 + }, + { + "epoch": 3.1452991452991452, + "high_lr": 0.0003705263157894737, + "low_lr": 7.410526315789475e-06, + "step": 1196 + }, + { + "epoch": 3.1452991452991452, + "high_lr": 0.0003705263157894737, + "low_lr": 7.410526315789475e-06, + "step": 1196 + }, + { + "epoch": 3.1452991452991452, + "high_lr": 0.0003705263157894737, + "low_lr": 7.410526315789475e-06, + "step": 1196 + }, + { + "epoch": 3.1452991452991452, + "high_lr": 0.0003705263157894737, + "low_lr": 7.410526315789475e-06, + "step": 1196 + }, + { + "epoch": 3.1479289940828403, + "grad_norm": 1.2748855352401733, + "learning_rate": 0.00037, + "loss": 1.2723, + "step": 1197 + }, + { + "epoch": 3.1479289940828403, + "high_lr": 0.00037, + "low_lr": 7.4e-06, + "step": 1197 + }, + { + "epoch": 3.1479289940828403, + "high_lr": 0.00037, + "low_lr": 7.4e-06, + "step": 1197 + }, + { + "epoch": 3.1479289940828403, + "high_lr": 0.00037, + "low_lr": 7.4e-06, + "step": 1197 + }, + { + "epoch": 3.1479289940828403, + "high_lr": 0.00037, + "low_lr": 7.4e-06, + "step": 1197 + }, + { + "epoch": 3.1479289940828403, + "high_lr": 0.00037, + "low_lr": 7.4e-06, + "step": 1197 + }, + { + "epoch": 3.1479289940828403, + "high_lr": 0.00037, + "low_lr": 7.4e-06, + "step": 1197 + }, + { + "epoch": 3.1479289940828403, + "high_lr": 0.00037, + "low_lr": 7.4e-06, + "step": 1197 + }, + { + "epoch": 3.1479289940828403, + "high_lr": 0.00037, + "low_lr": 7.4e-06, + "step": 1197 + }, + { + "epoch": 3.150558842866535, + "grad_norm": 1.4245465993881226, + "learning_rate": 0.00036947368421052633, + "loss": 1.3359, + "step": 1198 + }, + { + "epoch": 3.150558842866535, + "high_lr": 0.00036947368421052633, + "low_lr": 7.3894736842105275e-06, + "step": 1198 + }, + { + "epoch": 3.150558842866535, + "high_lr": 0.00036947368421052633, + "low_lr": 7.3894736842105275e-06, + "step": 1198 + }, + { + "epoch": 3.150558842866535, + "high_lr": 0.00036947368421052633, + "low_lr": 7.3894736842105275e-06, + "step": 1198 + }, + { + "epoch": 3.150558842866535, + "high_lr": 0.00036947368421052633, + "low_lr": 7.3894736842105275e-06, + "step": 1198 + }, + { + "epoch": 3.150558842866535, + "high_lr": 0.00036947368421052633, + "low_lr": 7.3894736842105275e-06, + "step": 1198 + }, + { + "epoch": 3.150558842866535, + "high_lr": 0.00036947368421052633, + "low_lr": 7.3894736842105275e-06, + "step": 1198 + }, + { + "epoch": 3.150558842866535, + "high_lr": 0.00036947368421052633, + "low_lr": 7.3894736842105275e-06, + "step": 1198 + }, + { + "epoch": 3.150558842866535, + "high_lr": 0.00036947368421052633, + "low_lr": 7.3894736842105275e-06, + "step": 1198 + }, + { + "epoch": 3.15318869165023, + "grad_norm": 1.4283756017684937, + "learning_rate": 0.0003689473684210526, + "loss": 1.2856, + "step": 1199 + }, + { + "epoch": 3.15318869165023, + "high_lr": 0.0003689473684210526, + "low_lr": 7.378947368421053e-06, + "step": 1199 + }, + { + "epoch": 3.15318869165023, + "high_lr": 0.0003689473684210526, + "low_lr": 7.378947368421053e-06, + "step": 1199 + }, + { + "epoch": 3.15318869165023, + "high_lr": 0.0003689473684210526, + "low_lr": 7.378947368421053e-06, + "step": 1199 + }, + { + "epoch": 3.15318869165023, + "high_lr": 0.0003689473684210526, + "low_lr": 7.378947368421053e-06, + "step": 1199 + }, + { + "epoch": 3.15318869165023, + "high_lr": 0.0003689473684210526, + "low_lr": 7.378947368421053e-06, + "step": 1199 + }, + { + "epoch": 3.15318869165023, + "high_lr": 0.0003689473684210526, + "low_lr": 7.378947368421053e-06, + "step": 1199 + }, + { + "epoch": 3.15318869165023, + "high_lr": 0.0003689473684210526, + "low_lr": 7.378947368421053e-06, + "step": 1199 + }, + { + "epoch": 3.15318869165023, + "high_lr": 0.0003689473684210526, + "low_lr": 7.378947368421053e-06, + "step": 1199 + }, + { + "epoch": 3.155818540433925, + "grad_norm": 1.4153192043304443, + "learning_rate": 0.00036842105263157896, + "loss": 1.2831, + "step": 1200 + }, + { + "epoch": 3.155818540433925, + "high_lr": 0.00036842105263157896, + "low_lr": 7.368421052631579e-06, + "step": 1200 + }, + { + "epoch": 3.155818540433925, + "high_lr": 0.00036842105263157896, + "low_lr": 7.368421052631579e-06, + "step": 1200 + }, + { + "epoch": 3.155818540433925, + "high_lr": 0.00036842105263157896, + "low_lr": 7.368421052631579e-06, + "step": 1200 + }, + { + "epoch": 3.155818540433925, + "high_lr": 0.00036842105263157896, + "low_lr": 7.368421052631579e-06, + "step": 1200 + }, + { + "epoch": 3.155818540433925, + "high_lr": 0.00036842105263157896, + "low_lr": 7.368421052631579e-06, + "step": 1200 + }, + { + "epoch": 3.155818540433925, + "high_lr": 0.00036842105263157896, + "low_lr": 7.368421052631579e-06, + "step": 1200 + }, + { + "epoch": 3.155818540433925, + "high_lr": 0.00036842105263157896, + "low_lr": 7.368421052631579e-06, + "step": 1200 + }, + { + "epoch": 3.155818540433925, + "high_lr": 0.00036842105263157896, + "low_lr": 7.368421052631579e-06, + "step": 1200 + }, + { + "epoch": 3.15844838921762, + "grad_norm": 1.3777788877487183, + "learning_rate": 0.0003678947368421053, + "loss": 1.2857, + "step": 1201 + }, + { + "epoch": 3.15844838921762, + "high_lr": 0.0003678947368421053, + "low_lr": 7.3578947368421065e-06, + "step": 1201 + }, + { + "epoch": 3.15844838921762, + "high_lr": 0.0003678947368421053, + "low_lr": 7.3578947368421065e-06, + "step": 1201 + }, + { + "epoch": 3.15844838921762, + "high_lr": 0.0003678947368421053, + "low_lr": 7.3578947368421065e-06, + "step": 1201 + }, + { + "epoch": 3.15844838921762, + "high_lr": 0.0003678947368421053, + "low_lr": 7.3578947368421065e-06, + "step": 1201 + }, + { + "epoch": 3.15844838921762, + "high_lr": 0.0003678947368421053, + "low_lr": 7.3578947368421065e-06, + "step": 1201 + }, + { + "epoch": 3.15844838921762, + "high_lr": 0.0003678947368421053, + "low_lr": 7.3578947368421065e-06, + "step": 1201 + }, + { + "epoch": 3.15844838921762, + "high_lr": 0.0003678947368421053, + "low_lr": 7.3578947368421065e-06, + "step": 1201 + }, + { + "epoch": 3.15844838921762, + "high_lr": 0.0003678947368421053, + "low_lr": 7.3578947368421065e-06, + "step": 1201 + }, + { + "epoch": 3.161078238001315, + "grad_norm": 1.4032799005508423, + "learning_rate": 0.0003673684210526316, + "loss": 1.3104, + "step": 1202 + }, + { + "epoch": 3.161078238001315, + "high_lr": 0.0003673684210526316, + "low_lr": 7.347368421052632e-06, + "step": 1202 + }, + { + "epoch": 3.161078238001315, + "high_lr": 0.0003673684210526316, + "low_lr": 7.347368421052632e-06, + "step": 1202 + }, + { + "epoch": 3.161078238001315, + "high_lr": 0.0003673684210526316, + "low_lr": 7.347368421052632e-06, + "step": 1202 + }, + { + "epoch": 3.161078238001315, + "high_lr": 0.0003673684210526316, + "low_lr": 7.347368421052632e-06, + "step": 1202 + }, + { + "epoch": 3.161078238001315, + "high_lr": 0.0003673684210526316, + "low_lr": 7.347368421052632e-06, + "step": 1202 + }, + { + "epoch": 3.161078238001315, + "high_lr": 0.0003673684210526316, + "low_lr": 7.347368421052632e-06, + "step": 1202 + }, + { + "epoch": 3.161078238001315, + "high_lr": 0.0003673684210526316, + "low_lr": 7.347368421052632e-06, + "step": 1202 + }, + { + "epoch": 3.161078238001315, + "high_lr": 0.0003673684210526316, + "low_lr": 7.347368421052632e-06, + "step": 1202 + }, + { + "epoch": 3.16370808678501, + "grad_norm": 1.3944417238235474, + "learning_rate": 0.0003668421052631579, + "loss": 1.2586, + "step": 1203 + }, + { + "epoch": 3.16370808678501, + "high_lr": 0.0003668421052631579, + "low_lr": 7.336842105263159e-06, + "step": 1203 + }, + { + "epoch": 3.16370808678501, + "high_lr": 0.0003668421052631579, + "low_lr": 7.336842105263159e-06, + "step": 1203 + }, + { + "epoch": 3.16370808678501, + "high_lr": 0.0003668421052631579, + "low_lr": 7.336842105263159e-06, + "step": 1203 + }, + { + "epoch": 3.16370808678501, + "high_lr": 0.0003668421052631579, + "low_lr": 7.336842105263159e-06, + "step": 1203 + }, + { + "epoch": 3.16370808678501, + "high_lr": 0.0003668421052631579, + "low_lr": 7.336842105263159e-06, + "step": 1203 + }, + { + "epoch": 3.16370808678501, + "high_lr": 0.0003668421052631579, + "low_lr": 7.336842105263159e-06, + "step": 1203 + }, + { + "epoch": 3.16370808678501, + "high_lr": 0.0003668421052631579, + "low_lr": 7.336842105263159e-06, + "step": 1203 + }, + { + "epoch": 3.16370808678501, + "high_lr": 0.0003668421052631579, + "low_lr": 7.336842105263159e-06, + "step": 1203 + }, + { + "epoch": 3.1663379355687047, + "grad_norm": 1.3695789575576782, + "learning_rate": 0.0003663157894736842, + "loss": 1.2655, + "step": 1204 + }, + { + "epoch": 3.1663379355687047, + "high_lr": 0.0003663157894736842, + "low_lr": 7.326315789473685e-06, + "step": 1204 + }, + { + "epoch": 3.1663379355687047, + "high_lr": 0.0003663157894736842, + "low_lr": 7.326315789473685e-06, + "step": 1204 + }, + { + "epoch": 3.1663379355687047, + "high_lr": 0.0003663157894736842, + "low_lr": 7.326315789473685e-06, + "step": 1204 + }, + { + "epoch": 3.1663379355687047, + "high_lr": 0.0003663157894736842, + "low_lr": 7.326315789473685e-06, + "step": 1204 + }, + { + "epoch": 3.1663379355687047, + "high_lr": 0.0003663157894736842, + "low_lr": 7.326315789473685e-06, + "step": 1204 + }, + { + "epoch": 3.1663379355687047, + "high_lr": 0.0003663157894736842, + "low_lr": 7.326315789473685e-06, + "step": 1204 + }, + { + "epoch": 3.1663379355687047, + "high_lr": 0.0003663157894736842, + "low_lr": 7.326315789473685e-06, + "step": 1204 + }, + { + "epoch": 3.1663379355687047, + "high_lr": 0.0003663157894736842, + "low_lr": 7.326315789473685e-06, + "step": 1204 + }, + { + "epoch": 3.1689677843524, + "grad_norm": 1.3784259557724, + "learning_rate": 0.00036578947368421055, + "loss": 1.316, + "step": 1205 + }, + { + "epoch": 3.1689677843524, + "high_lr": 0.00036578947368421055, + "low_lr": 7.315789473684212e-06, + "step": 1205 + }, + { + "epoch": 3.1689677843524, + "high_lr": 0.00036578947368421055, + "low_lr": 7.315789473684212e-06, + "step": 1205 + }, + { + "epoch": 3.1689677843524, + "high_lr": 0.00036578947368421055, + "low_lr": 7.315789473684212e-06, + "step": 1205 + }, + { + "epoch": 3.1689677843524, + "high_lr": 0.00036578947368421055, + "low_lr": 7.315789473684212e-06, + "step": 1205 + }, + { + "epoch": 3.1689677843524, + "high_lr": 0.00036578947368421055, + "low_lr": 7.315789473684212e-06, + "step": 1205 + }, + { + "epoch": 3.1689677843524, + "high_lr": 0.00036578947368421055, + "low_lr": 7.315789473684212e-06, + "step": 1205 + }, + { + "epoch": 3.1689677843524, + "high_lr": 0.00036578947368421055, + "low_lr": 7.315789473684212e-06, + "step": 1205 + }, + { + "epoch": 3.1689677843524, + "high_lr": 0.00036578947368421055, + "low_lr": 7.315789473684212e-06, + "step": 1205 + }, + { + "epoch": 3.171597633136095, + "grad_norm": 1.284883737564087, + "learning_rate": 0.00036526315789473684, + "loss": 1.3479, + "step": 1206 + }, + { + "epoch": 3.171597633136095, + "high_lr": 0.00036526315789473684, + "low_lr": 7.305263157894737e-06, + "step": 1206 + }, + { + "epoch": 3.171597633136095, + "high_lr": 0.00036526315789473684, + "low_lr": 7.305263157894737e-06, + "step": 1206 + }, + { + "epoch": 3.171597633136095, + "high_lr": 0.00036526315789473684, + "low_lr": 7.305263157894737e-06, + "step": 1206 + }, + { + "epoch": 3.171597633136095, + "high_lr": 0.00036526315789473684, + "low_lr": 7.305263157894737e-06, + "step": 1206 + }, + { + "epoch": 3.171597633136095, + "high_lr": 0.00036526315789473684, + "low_lr": 7.305263157894737e-06, + "step": 1206 + }, + { + "epoch": 3.171597633136095, + "high_lr": 0.00036526315789473684, + "low_lr": 7.305263157894737e-06, + "step": 1206 + }, + { + "epoch": 3.171597633136095, + "high_lr": 0.00036526315789473684, + "low_lr": 7.305263157894737e-06, + "step": 1206 + }, + { + "epoch": 3.171597633136095, + "high_lr": 0.00036526315789473684, + "low_lr": 7.305263157894737e-06, + "step": 1206 + }, + { + "epoch": 3.1742274819197895, + "grad_norm": 1.364266037940979, + "learning_rate": 0.0003647368421052631, + "loss": 1.2961, + "step": 1207 + }, + { + "epoch": 3.1742274819197895, + "high_lr": 0.0003647368421052631, + "low_lr": 7.2947368421052636e-06, + "step": 1207 + }, + { + "epoch": 3.1742274819197895, + "high_lr": 0.0003647368421052631, + "low_lr": 7.2947368421052636e-06, + "step": 1207 + }, + { + "epoch": 3.1742274819197895, + "high_lr": 0.0003647368421052631, + "low_lr": 7.2947368421052636e-06, + "step": 1207 + }, + { + "epoch": 3.1742274819197895, + "high_lr": 0.0003647368421052631, + "low_lr": 7.2947368421052636e-06, + "step": 1207 + }, + { + "epoch": 3.1742274819197895, + "high_lr": 0.0003647368421052631, + "low_lr": 7.2947368421052636e-06, + "step": 1207 + }, + { + "epoch": 3.1742274819197895, + "high_lr": 0.0003647368421052631, + "low_lr": 7.2947368421052636e-06, + "step": 1207 + }, + { + "epoch": 3.1742274819197895, + "high_lr": 0.0003647368421052631, + "low_lr": 7.2947368421052636e-06, + "step": 1207 + }, + { + "epoch": 3.1742274819197895, + "high_lr": 0.0003647368421052631, + "low_lr": 7.2947368421052636e-06, + "step": 1207 + }, + { + "epoch": 3.1768573307034846, + "grad_norm": 1.5872355699539185, + "learning_rate": 0.0003642105263157895, + "loss": 1.2343, + "step": 1208 + }, + { + "epoch": 3.1768573307034846, + "high_lr": 0.0003642105263157895, + "low_lr": 7.28421052631579e-06, + "step": 1208 + }, + { + "epoch": 3.1768573307034846, + "high_lr": 0.0003642105263157895, + "low_lr": 7.28421052631579e-06, + "step": 1208 + }, + { + "epoch": 3.1768573307034846, + "high_lr": 0.0003642105263157895, + "low_lr": 7.28421052631579e-06, + "step": 1208 + }, + { + "epoch": 3.1768573307034846, + "high_lr": 0.0003642105263157895, + "low_lr": 7.28421052631579e-06, + "step": 1208 + }, + { + "epoch": 3.1768573307034846, + "high_lr": 0.0003642105263157895, + "low_lr": 7.28421052631579e-06, + "step": 1208 + }, + { + "epoch": 3.1768573307034846, + "high_lr": 0.0003642105263157895, + "low_lr": 7.28421052631579e-06, + "step": 1208 + }, + { + "epoch": 3.1768573307034846, + "high_lr": 0.0003642105263157895, + "low_lr": 7.28421052631579e-06, + "step": 1208 + }, + { + "epoch": 3.1768573307034846, + "high_lr": 0.0003642105263157895, + "low_lr": 7.28421052631579e-06, + "step": 1208 + }, + { + "epoch": 3.1794871794871793, + "grad_norm": 1.3258522748947144, + "learning_rate": 0.0003636842105263158, + "loss": 1.2877, + "step": 1209 + }, + { + "epoch": 3.1794871794871793, + "high_lr": 0.0003636842105263158, + "low_lr": 7.273684210526316e-06, + "step": 1209 + }, + { + "epoch": 3.1794871794871793, + "high_lr": 0.0003636842105263158, + "low_lr": 7.273684210526316e-06, + "step": 1209 + }, + { + "epoch": 3.1794871794871793, + "high_lr": 0.0003636842105263158, + "low_lr": 7.273684210526316e-06, + "step": 1209 + }, + { + "epoch": 3.1794871794871793, + "high_lr": 0.0003636842105263158, + "low_lr": 7.273684210526316e-06, + "step": 1209 + }, + { + "epoch": 3.1794871794871793, + "high_lr": 0.0003636842105263158, + "low_lr": 7.273684210526316e-06, + "step": 1209 + }, + { + "epoch": 3.1794871794871793, + "high_lr": 0.0003636842105263158, + "low_lr": 7.273684210526316e-06, + "step": 1209 + }, + { + "epoch": 3.1794871794871793, + "high_lr": 0.0003636842105263158, + "low_lr": 7.273684210526316e-06, + "step": 1209 + }, + { + "epoch": 3.1794871794871793, + "high_lr": 0.0003636842105263158, + "low_lr": 7.273684210526316e-06, + "step": 1209 + }, + { + "epoch": 3.1821170282708744, + "grad_norm": 1.4260060787200928, + "learning_rate": 0.00036315789473684214, + "loss": 1.3165, + "step": 1210 + }, + { + "epoch": 3.1821170282708744, + "high_lr": 0.00036315789473684214, + "low_lr": 7.263157894736843e-06, + "step": 1210 + }, + { + "epoch": 3.1821170282708744, + "high_lr": 0.00036315789473684214, + "low_lr": 7.263157894736843e-06, + "step": 1210 + }, + { + "epoch": 3.1821170282708744, + "high_lr": 0.00036315789473684214, + "low_lr": 7.263157894736843e-06, + "step": 1210 + }, + { + "epoch": 3.1821170282708744, + "high_lr": 0.00036315789473684214, + "low_lr": 7.263157894736843e-06, + "step": 1210 + }, + { + "epoch": 3.1821170282708744, + "high_lr": 0.00036315789473684214, + "low_lr": 7.263157894736843e-06, + "step": 1210 + }, + { + "epoch": 3.1821170282708744, + "high_lr": 0.00036315789473684214, + "low_lr": 7.263157894736843e-06, + "step": 1210 + }, + { + "epoch": 3.1821170282708744, + "high_lr": 0.00036315789473684214, + "low_lr": 7.263157894736843e-06, + "step": 1210 + }, + { + "epoch": 3.1821170282708744, + "high_lr": 0.00036315789473684214, + "low_lr": 7.263157894736843e-06, + "step": 1210 + }, + { + "epoch": 3.1847468770545695, + "grad_norm": 1.4013381004333496, + "learning_rate": 0.00036263157894736843, + "loss": 1.3305, + "step": 1211 + }, + { + "epoch": 3.1847468770545695, + "high_lr": 0.00036263157894736843, + "low_lr": 7.252631578947369e-06, + "step": 1211 + }, + { + "epoch": 3.1847468770545695, + "high_lr": 0.00036263157894736843, + "low_lr": 7.252631578947369e-06, + "step": 1211 + }, + { + "epoch": 3.1847468770545695, + "high_lr": 0.00036263157894736843, + "low_lr": 7.252631578947369e-06, + "step": 1211 + }, + { + "epoch": 3.1847468770545695, + "high_lr": 0.00036263157894736843, + "low_lr": 7.252631578947369e-06, + "step": 1211 + }, + { + "epoch": 3.1847468770545695, + "high_lr": 0.00036263157894736843, + "low_lr": 7.252631578947369e-06, + "step": 1211 + }, + { + "epoch": 3.1847468770545695, + "high_lr": 0.00036263157894736843, + "low_lr": 7.252631578947369e-06, + "step": 1211 + }, + { + "epoch": 3.1847468770545695, + "high_lr": 0.00036263157894736843, + "low_lr": 7.252631578947369e-06, + "step": 1211 + }, + { + "epoch": 3.1847468770545695, + "high_lr": 0.00036263157894736843, + "low_lr": 7.252631578947369e-06, + "step": 1211 + }, + { + "epoch": 3.187376725838264, + "grad_norm": 1.4238418340682983, + "learning_rate": 0.00036210526315789477, + "loss": 1.3362, + "step": 1212 + }, + { + "epoch": 3.187376725838264, + "high_lr": 0.00036210526315789477, + "low_lr": 7.242105263157896e-06, + "step": 1212 + }, + { + "epoch": 3.187376725838264, + "high_lr": 0.00036210526315789477, + "low_lr": 7.242105263157896e-06, + "step": 1212 + }, + { + "epoch": 3.187376725838264, + "high_lr": 0.00036210526315789477, + "low_lr": 7.242105263157896e-06, + "step": 1212 + }, + { + "epoch": 3.187376725838264, + "high_lr": 0.00036210526315789477, + "low_lr": 7.242105263157896e-06, + "step": 1212 + }, + { + "epoch": 3.187376725838264, + "high_lr": 0.00036210526315789477, + "low_lr": 7.242105263157896e-06, + "step": 1212 + }, + { + "epoch": 3.187376725838264, + "high_lr": 0.00036210526315789477, + "low_lr": 7.242105263157896e-06, + "step": 1212 + }, + { + "epoch": 3.187376725838264, + "high_lr": 0.00036210526315789477, + "low_lr": 7.242105263157896e-06, + "step": 1212 + }, + { + "epoch": 3.187376725838264, + "high_lr": 0.00036210526315789477, + "low_lr": 7.242105263157896e-06, + "step": 1212 + }, + { + "epoch": 3.1900065746219592, + "grad_norm": 1.3276677131652832, + "learning_rate": 0.00036157894736842106, + "loss": 1.2812, + "step": 1213 + }, + { + "epoch": 3.1900065746219592, + "high_lr": 0.00036157894736842106, + "low_lr": 7.2315789473684215e-06, + "step": 1213 + }, + { + "epoch": 3.1900065746219592, + "high_lr": 0.00036157894736842106, + "low_lr": 7.2315789473684215e-06, + "step": 1213 + }, + { + "epoch": 3.1900065746219592, + "high_lr": 0.00036157894736842106, + "low_lr": 7.2315789473684215e-06, + "step": 1213 + }, + { + "epoch": 3.1900065746219592, + "high_lr": 0.00036157894736842106, + "low_lr": 7.2315789473684215e-06, + "step": 1213 + }, + { + "epoch": 3.1900065746219592, + "high_lr": 0.00036157894736842106, + "low_lr": 7.2315789473684215e-06, + "step": 1213 + }, + { + "epoch": 3.1900065746219592, + "high_lr": 0.00036157894736842106, + "low_lr": 7.2315789473684215e-06, + "step": 1213 + }, + { + "epoch": 3.1900065746219592, + "high_lr": 0.00036157894736842106, + "low_lr": 7.2315789473684215e-06, + "step": 1213 + }, + { + "epoch": 3.1900065746219592, + "high_lr": 0.00036157894736842106, + "low_lr": 7.2315789473684215e-06, + "step": 1213 + }, + { + "epoch": 3.1926364234056543, + "grad_norm": 1.320291519165039, + "learning_rate": 0.00036105263157894734, + "loss": 1.3383, + "step": 1214 + }, + { + "epoch": 3.1926364234056543, + "high_lr": 0.00036105263157894734, + "low_lr": 7.221052631578948e-06, + "step": 1214 + }, + { + "epoch": 3.1926364234056543, + "high_lr": 0.00036105263157894734, + "low_lr": 7.221052631578948e-06, + "step": 1214 + }, + { + "epoch": 3.1926364234056543, + "high_lr": 0.00036105263157894734, + "low_lr": 7.221052631578948e-06, + "step": 1214 + }, + { + "epoch": 3.1926364234056543, + "high_lr": 0.00036105263157894734, + "low_lr": 7.221052631578948e-06, + "step": 1214 + }, + { + "epoch": 3.1926364234056543, + "high_lr": 0.00036105263157894734, + "low_lr": 7.221052631578948e-06, + "step": 1214 + }, + { + "epoch": 3.1926364234056543, + "high_lr": 0.00036105263157894734, + "low_lr": 7.221052631578948e-06, + "step": 1214 + }, + { + "epoch": 3.1926364234056543, + "high_lr": 0.00036105263157894734, + "low_lr": 7.221052631578948e-06, + "step": 1214 + }, + { + "epoch": 3.1926364234056543, + "high_lr": 0.00036105263157894734, + "low_lr": 7.221052631578948e-06, + "step": 1214 + }, + { + "epoch": 3.195266272189349, + "grad_norm": 1.5130553245544434, + "learning_rate": 0.0003605263157894737, + "loss": 1.3628, + "step": 1215 + }, + { + "epoch": 3.195266272189349, + "high_lr": 0.0003605263157894737, + "low_lr": 7.210526315789474e-06, + "step": 1215 + }, + { + "epoch": 3.195266272189349, + "high_lr": 0.0003605263157894737, + "low_lr": 7.210526315789474e-06, + "step": 1215 + }, + { + "epoch": 3.195266272189349, + "high_lr": 0.0003605263157894737, + "low_lr": 7.210526315789474e-06, + "step": 1215 + }, + { + "epoch": 3.195266272189349, + "high_lr": 0.0003605263157894737, + "low_lr": 7.210526315789474e-06, + "step": 1215 + }, + { + "epoch": 3.195266272189349, + "high_lr": 0.0003605263157894737, + "low_lr": 7.210526315789474e-06, + "step": 1215 + }, + { + "epoch": 3.195266272189349, + "high_lr": 0.0003605263157894737, + "low_lr": 7.210526315789474e-06, + "step": 1215 + }, + { + "epoch": 3.195266272189349, + "high_lr": 0.0003605263157894737, + "low_lr": 7.210526315789474e-06, + "step": 1215 + }, + { + "epoch": 3.195266272189349, + "high_lr": 0.0003605263157894737, + "low_lr": 7.210526315789474e-06, + "step": 1215 + }, + { + "epoch": 3.197896120973044, + "grad_norm": 1.4092506170272827, + "learning_rate": 0.00035999999999999997, + "loss": 1.3085, + "step": 1216 + }, + { + "epoch": 3.197896120973044, + "high_lr": 0.00035999999999999997, + "low_lr": 7.2000000000000005e-06, + "step": 1216 + }, + { + "epoch": 3.197896120973044, + "high_lr": 0.00035999999999999997, + "low_lr": 7.2000000000000005e-06, + "step": 1216 + }, + { + "epoch": 3.197896120973044, + "high_lr": 0.00035999999999999997, + "low_lr": 7.2000000000000005e-06, + "step": 1216 + }, + { + "epoch": 3.197896120973044, + "high_lr": 0.00035999999999999997, + "low_lr": 7.2000000000000005e-06, + "step": 1216 + }, + { + "epoch": 3.197896120973044, + "high_lr": 0.00035999999999999997, + "low_lr": 7.2000000000000005e-06, + "step": 1216 + }, + { + "epoch": 3.197896120973044, + "high_lr": 0.00035999999999999997, + "low_lr": 7.2000000000000005e-06, + "step": 1216 + }, + { + "epoch": 3.197896120973044, + "high_lr": 0.00035999999999999997, + "low_lr": 7.2000000000000005e-06, + "step": 1216 + }, + { + "epoch": 3.197896120973044, + "high_lr": 0.00035999999999999997, + "low_lr": 7.2000000000000005e-06, + "step": 1216 + }, + { + "epoch": 3.2005259697567388, + "grad_norm": 1.3080147504806519, + "learning_rate": 0.00035947368421052636, + "loss": 1.2738, + "step": 1217 + }, + { + "epoch": 3.2005259697567388, + "high_lr": 0.00035947368421052636, + "low_lr": 7.189473684210527e-06, + "step": 1217 + }, + { + "epoch": 3.2005259697567388, + "high_lr": 0.00035947368421052636, + "low_lr": 7.189473684210527e-06, + "step": 1217 + }, + { + "epoch": 3.2005259697567388, + "high_lr": 0.00035947368421052636, + "low_lr": 7.189473684210527e-06, + "step": 1217 + }, + { + "epoch": 3.2005259697567388, + "high_lr": 0.00035947368421052636, + "low_lr": 7.189473684210527e-06, + "step": 1217 + }, + { + "epoch": 3.2005259697567388, + "high_lr": 0.00035947368421052636, + "low_lr": 7.189473684210527e-06, + "step": 1217 + }, + { + "epoch": 3.2005259697567388, + "high_lr": 0.00035947368421052636, + "low_lr": 7.189473684210527e-06, + "step": 1217 + }, + { + "epoch": 3.2005259697567388, + "high_lr": 0.00035947368421052636, + "low_lr": 7.189473684210527e-06, + "step": 1217 + }, + { + "epoch": 3.2005259697567388, + "high_lr": 0.00035947368421052636, + "low_lr": 7.189473684210527e-06, + "step": 1217 + }, + { + "epoch": 3.203155818540434, + "grad_norm": 1.2631381750106812, + "learning_rate": 0.00035894736842105265, + "loss": 1.3021, + "step": 1218 + }, + { + "epoch": 3.203155818540434, + "high_lr": 0.00035894736842105265, + "low_lr": 7.178947368421053e-06, + "step": 1218 + }, + { + "epoch": 3.203155818540434, + "high_lr": 0.00035894736842105265, + "low_lr": 7.178947368421053e-06, + "step": 1218 + }, + { + "epoch": 3.203155818540434, + "high_lr": 0.00035894736842105265, + "low_lr": 7.178947368421053e-06, + "step": 1218 + }, + { + "epoch": 3.203155818540434, + "high_lr": 0.00035894736842105265, + "low_lr": 7.178947368421053e-06, + "step": 1218 + }, + { + "epoch": 3.203155818540434, + "high_lr": 0.00035894736842105265, + "low_lr": 7.178947368421053e-06, + "step": 1218 + }, + { + "epoch": 3.203155818540434, + "high_lr": 0.00035894736842105265, + "low_lr": 7.178947368421053e-06, + "step": 1218 + }, + { + "epoch": 3.203155818540434, + "high_lr": 0.00035894736842105265, + "low_lr": 7.178947368421053e-06, + "step": 1218 + }, + { + "epoch": 3.203155818540434, + "high_lr": 0.00035894736842105265, + "low_lr": 7.178947368421053e-06, + "step": 1218 + }, + { + "epoch": 3.205785667324129, + "grad_norm": 1.3651789426803589, + "learning_rate": 0.000358421052631579, + "loss": 1.323, + "step": 1219 + }, + { + "epoch": 3.205785667324129, + "high_lr": 0.000358421052631579, + "low_lr": 7.16842105263158e-06, + "step": 1219 + }, + { + "epoch": 3.205785667324129, + "high_lr": 0.000358421052631579, + "low_lr": 7.16842105263158e-06, + "step": 1219 + }, + { + "epoch": 3.205785667324129, + "high_lr": 0.000358421052631579, + "low_lr": 7.16842105263158e-06, + "step": 1219 + }, + { + "epoch": 3.205785667324129, + "high_lr": 0.000358421052631579, + "low_lr": 7.16842105263158e-06, + "step": 1219 + }, + { + "epoch": 3.205785667324129, + "high_lr": 0.000358421052631579, + "low_lr": 7.16842105263158e-06, + "step": 1219 + }, + { + "epoch": 3.205785667324129, + "high_lr": 0.000358421052631579, + "low_lr": 7.16842105263158e-06, + "step": 1219 + }, + { + "epoch": 3.205785667324129, + "high_lr": 0.000358421052631579, + "low_lr": 7.16842105263158e-06, + "step": 1219 + }, + { + "epoch": 3.205785667324129, + "high_lr": 0.000358421052631579, + "low_lr": 7.16842105263158e-06, + "step": 1219 + }, + { + "epoch": 3.2084155161078236, + "grad_norm": 1.3125711679458618, + "learning_rate": 0.0003578947368421053, + "loss": 1.3265, + "step": 1220 + }, + { + "epoch": 3.2084155161078236, + "high_lr": 0.0003578947368421053, + "low_lr": 7.157894736842106e-06, + "step": 1220 + }, + { + "epoch": 3.2084155161078236, + "high_lr": 0.0003578947368421053, + "low_lr": 7.157894736842106e-06, + "step": 1220 + }, + { + "epoch": 3.2084155161078236, + "high_lr": 0.0003578947368421053, + "low_lr": 7.157894736842106e-06, + "step": 1220 + }, + { + "epoch": 3.2084155161078236, + "high_lr": 0.0003578947368421053, + "low_lr": 7.157894736842106e-06, + "step": 1220 + }, + { + "epoch": 3.2084155161078236, + "high_lr": 0.0003578947368421053, + "low_lr": 7.157894736842106e-06, + "step": 1220 + }, + { + "epoch": 3.2084155161078236, + "high_lr": 0.0003578947368421053, + "low_lr": 7.157894736842106e-06, + "step": 1220 + }, + { + "epoch": 3.2084155161078236, + "high_lr": 0.0003578947368421053, + "low_lr": 7.157894736842106e-06, + "step": 1220 + }, + { + "epoch": 3.2084155161078236, + "high_lr": 0.0003578947368421053, + "low_lr": 7.157894736842106e-06, + "step": 1220 + }, + { + "epoch": 3.2110453648915187, + "grad_norm": 1.3207658529281616, + "learning_rate": 0.00035736842105263156, + "loss": 1.2898, + "step": 1221 + }, + { + "epoch": 3.2110453648915187, + "high_lr": 0.00035736842105263156, + "low_lr": 7.147368421052631e-06, + "step": 1221 + }, + { + "epoch": 3.2110453648915187, + "high_lr": 0.00035736842105263156, + "low_lr": 7.147368421052631e-06, + "step": 1221 + }, + { + "epoch": 3.2110453648915187, + "high_lr": 0.00035736842105263156, + "low_lr": 7.147368421052631e-06, + "step": 1221 + }, + { + "epoch": 3.2110453648915187, + "high_lr": 0.00035736842105263156, + "low_lr": 7.147368421052631e-06, + "step": 1221 + }, + { + "epoch": 3.2110453648915187, + "high_lr": 0.00035736842105263156, + "low_lr": 7.147368421052631e-06, + "step": 1221 + }, + { + "epoch": 3.2110453648915187, + "high_lr": 0.00035736842105263156, + "low_lr": 7.147368421052631e-06, + "step": 1221 + }, + { + "epoch": 3.2110453648915187, + "high_lr": 0.00035736842105263156, + "low_lr": 7.147368421052631e-06, + "step": 1221 + }, + { + "epoch": 3.2110453648915187, + "high_lr": 0.00035736842105263156, + "low_lr": 7.147368421052631e-06, + "step": 1221 + }, + { + "epoch": 3.213675213675214, + "grad_norm": 1.4371613264083862, + "learning_rate": 0.0003568421052631579, + "loss": 1.3598, + "step": 1222 + }, + { + "epoch": 3.213675213675214, + "high_lr": 0.0003568421052631579, + "low_lr": 7.1368421052631585e-06, + "step": 1222 + }, + { + "epoch": 3.213675213675214, + "high_lr": 0.0003568421052631579, + "low_lr": 7.1368421052631585e-06, + "step": 1222 + }, + { + "epoch": 3.213675213675214, + "high_lr": 0.0003568421052631579, + "low_lr": 7.1368421052631585e-06, + "step": 1222 + }, + { + "epoch": 3.213675213675214, + "high_lr": 0.0003568421052631579, + "low_lr": 7.1368421052631585e-06, + "step": 1222 + }, + { + "epoch": 3.213675213675214, + "high_lr": 0.0003568421052631579, + "low_lr": 7.1368421052631585e-06, + "step": 1222 + }, + { + "epoch": 3.213675213675214, + "high_lr": 0.0003568421052631579, + "low_lr": 7.1368421052631585e-06, + "step": 1222 + }, + { + "epoch": 3.213675213675214, + "high_lr": 0.0003568421052631579, + "low_lr": 7.1368421052631585e-06, + "step": 1222 + }, + { + "epoch": 3.213675213675214, + "high_lr": 0.0003568421052631579, + "low_lr": 7.1368421052631585e-06, + "step": 1222 + }, + { + "epoch": 3.2163050624589085, + "grad_norm": 1.6412293910980225, + "learning_rate": 0.0003563157894736842, + "loss": 1.2523, + "step": 1223 + }, + { + "epoch": 3.2163050624589085, + "high_lr": 0.0003563157894736842, + "low_lr": 7.126315789473685e-06, + "step": 1223 + }, + { + "epoch": 3.2163050624589085, + "high_lr": 0.0003563157894736842, + "low_lr": 7.126315789473685e-06, + "step": 1223 + }, + { + "epoch": 3.2163050624589085, + "high_lr": 0.0003563157894736842, + "low_lr": 7.126315789473685e-06, + "step": 1223 + }, + { + "epoch": 3.2163050624589085, + "high_lr": 0.0003563157894736842, + "low_lr": 7.126315789473685e-06, + "step": 1223 + }, + { + "epoch": 3.2163050624589085, + "high_lr": 0.0003563157894736842, + "low_lr": 7.126315789473685e-06, + "step": 1223 + }, + { + "epoch": 3.2163050624589085, + "high_lr": 0.0003563157894736842, + "low_lr": 7.126315789473685e-06, + "step": 1223 + }, + { + "epoch": 3.2163050624589085, + "high_lr": 0.0003563157894736842, + "low_lr": 7.126315789473685e-06, + "step": 1223 + }, + { + "epoch": 3.2163050624589085, + "high_lr": 0.0003563157894736842, + "low_lr": 7.126315789473685e-06, + "step": 1223 + }, + { + "epoch": 3.2189349112426036, + "grad_norm": 1.2812743186950684, + "learning_rate": 0.0003557894736842105, + "loss": 1.3104, + "step": 1224 + }, + { + "epoch": 3.2189349112426036, + "high_lr": 0.0003557894736842105, + "low_lr": 7.115789473684211e-06, + "step": 1224 + }, + { + "epoch": 3.2189349112426036, + "high_lr": 0.0003557894736842105, + "low_lr": 7.115789473684211e-06, + "step": 1224 + }, + { + "epoch": 3.2189349112426036, + "high_lr": 0.0003557894736842105, + "low_lr": 7.115789473684211e-06, + "step": 1224 + }, + { + "epoch": 3.2189349112426036, + "high_lr": 0.0003557894736842105, + "low_lr": 7.115789473684211e-06, + "step": 1224 + }, + { + "epoch": 3.2189349112426036, + "high_lr": 0.0003557894736842105, + "low_lr": 7.115789473684211e-06, + "step": 1224 + }, + { + "epoch": 3.2189349112426036, + "high_lr": 0.0003557894736842105, + "low_lr": 7.115789473684211e-06, + "step": 1224 + }, + { + "epoch": 3.2189349112426036, + "high_lr": 0.0003557894736842105, + "low_lr": 7.115789473684211e-06, + "step": 1224 + }, + { + "epoch": 3.2189349112426036, + "high_lr": 0.0003557894736842105, + "low_lr": 7.115789473684211e-06, + "step": 1224 + }, + { + "epoch": 3.2215647600262987, + "grad_norm": 1.2608232498168945, + "learning_rate": 0.00035526315789473687, + "loss": 1.2855, + "step": 1225 + }, + { + "epoch": 3.2215647600262987, + "high_lr": 0.00035526315789473687, + "low_lr": 7.1052631578947375e-06, + "step": 1225 + }, + { + "epoch": 3.2215647600262987, + "high_lr": 0.00035526315789473687, + "low_lr": 7.1052631578947375e-06, + "step": 1225 + }, + { + "epoch": 3.2215647600262987, + "high_lr": 0.00035526315789473687, + "low_lr": 7.1052631578947375e-06, + "step": 1225 + }, + { + "epoch": 3.2215647600262987, + "high_lr": 0.00035526315789473687, + "low_lr": 7.1052631578947375e-06, + "step": 1225 + }, + { + "epoch": 3.2215647600262987, + "high_lr": 0.00035526315789473687, + "low_lr": 7.1052631578947375e-06, + "step": 1225 + }, + { + "epoch": 3.2215647600262987, + "high_lr": 0.00035526315789473687, + "low_lr": 7.1052631578947375e-06, + "step": 1225 + }, + { + "epoch": 3.2215647600262987, + "high_lr": 0.00035526315789473687, + "low_lr": 7.1052631578947375e-06, + "step": 1225 + }, + { + "epoch": 3.2215647600262987, + "high_lr": 0.00035526315789473687, + "low_lr": 7.1052631578947375e-06, + "step": 1225 + }, + { + "epoch": 3.2241946088099933, + "grad_norm": 1.4294136762619019, + "learning_rate": 0.0003547368421052632, + "loss": 1.288, + "step": 1226 + }, + { + "epoch": 3.2241946088099933, + "high_lr": 0.0003547368421052632, + "low_lr": 7.094736842105265e-06, + "step": 1226 + }, + { + "epoch": 3.2241946088099933, + "high_lr": 0.0003547368421052632, + "low_lr": 7.094736842105265e-06, + "step": 1226 + }, + { + "epoch": 3.2241946088099933, + "high_lr": 0.0003547368421052632, + "low_lr": 7.094736842105265e-06, + "step": 1226 + }, + { + "epoch": 3.2241946088099933, + "high_lr": 0.0003547368421052632, + "low_lr": 7.094736842105265e-06, + "step": 1226 + }, + { + "epoch": 3.2241946088099933, + "high_lr": 0.0003547368421052632, + "low_lr": 7.094736842105265e-06, + "step": 1226 + }, + { + "epoch": 3.2241946088099933, + "high_lr": 0.0003547368421052632, + "low_lr": 7.094736842105265e-06, + "step": 1226 + }, + { + "epoch": 3.2241946088099933, + "high_lr": 0.0003547368421052632, + "low_lr": 7.094736842105265e-06, + "step": 1226 + }, + { + "epoch": 3.2241946088099933, + "high_lr": 0.0003547368421052632, + "low_lr": 7.094736842105265e-06, + "step": 1226 + }, + { + "epoch": 3.2268244575936884, + "grad_norm": 1.3366351127624512, + "learning_rate": 0.0003542105263157895, + "loss": 1.2917, + "step": 1227 + }, + { + "epoch": 3.2268244575936884, + "high_lr": 0.0003542105263157895, + "low_lr": 7.08421052631579e-06, + "step": 1227 + }, + { + "epoch": 3.2268244575936884, + "high_lr": 0.0003542105263157895, + "low_lr": 7.08421052631579e-06, + "step": 1227 + }, + { + "epoch": 3.2268244575936884, + "high_lr": 0.0003542105263157895, + "low_lr": 7.08421052631579e-06, + "step": 1227 + }, + { + "epoch": 3.2268244575936884, + "high_lr": 0.0003542105263157895, + "low_lr": 7.08421052631579e-06, + "step": 1227 + }, + { + "epoch": 3.2268244575936884, + "high_lr": 0.0003542105263157895, + "low_lr": 7.08421052631579e-06, + "step": 1227 + }, + { + "epoch": 3.2268244575936884, + "high_lr": 0.0003542105263157895, + "low_lr": 7.08421052631579e-06, + "step": 1227 + }, + { + "epoch": 3.2268244575936884, + "high_lr": 0.0003542105263157895, + "low_lr": 7.08421052631579e-06, + "step": 1227 + }, + { + "epoch": 3.2268244575936884, + "high_lr": 0.0003542105263157895, + "low_lr": 7.08421052631579e-06, + "step": 1227 + }, + { + "epoch": 3.2294543063773835, + "grad_norm": 1.504764199256897, + "learning_rate": 0.0003536842105263158, + "loss": 1.2717, + "step": 1228 + }, + { + "epoch": 3.2294543063773835, + "high_lr": 0.0003536842105263158, + "low_lr": 7.073684210526316e-06, + "step": 1228 + }, + { + "epoch": 3.2294543063773835, + "high_lr": 0.0003536842105263158, + "low_lr": 7.073684210526316e-06, + "step": 1228 + }, + { + "epoch": 3.2294543063773835, + "high_lr": 0.0003536842105263158, + "low_lr": 7.073684210526316e-06, + "step": 1228 + }, + { + "epoch": 3.2294543063773835, + "high_lr": 0.0003536842105263158, + "low_lr": 7.073684210526316e-06, + "step": 1228 + }, + { + "epoch": 3.2294543063773835, + "high_lr": 0.0003536842105263158, + "low_lr": 7.073684210526316e-06, + "step": 1228 + }, + { + "epoch": 3.2294543063773835, + "high_lr": 0.0003536842105263158, + "low_lr": 7.073684210526316e-06, + "step": 1228 + }, + { + "epoch": 3.2294543063773835, + "high_lr": 0.0003536842105263158, + "low_lr": 7.073684210526316e-06, + "step": 1228 + }, + { + "epoch": 3.2294543063773835, + "high_lr": 0.0003536842105263158, + "low_lr": 7.073684210526316e-06, + "step": 1228 + }, + { + "epoch": 3.232084155161078, + "grad_norm": 1.5057621002197266, + "learning_rate": 0.0003531578947368421, + "loss": 1.3052, + "step": 1229 + }, + { + "epoch": 3.232084155161078, + "high_lr": 0.0003531578947368421, + "low_lr": 7.063157894736843e-06, + "step": 1229 + }, + { + "epoch": 3.232084155161078, + "high_lr": 0.0003531578947368421, + "low_lr": 7.063157894736843e-06, + "step": 1229 + }, + { + "epoch": 3.232084155161078, + "high_lr": 0.0003531578947368421, + "low_lr": 7.063157894736843e-06, + "step": 1229 + }, + { + "epoch": 3.232084155161078, + "high_lr": 0.0003531578947368421, + "low_lr": 7.063157894736843e-06, + "step": 1229 + }, + { + "epoch": 3.232084155161078, + "high_lr": 0.0003531578947368421, + "low_lr": 7.063157894736843e-06, + "step": 1229 + }, + { + "epoch": 3.232084155161078, + "high_lr": 0.0003531578947368421, + "low_lr": 7.063157894736843e-06, + "step": 1229 + }, + { + "epoch": 3.232084155161078, + "high_lr": 0.0003531578947368421, + "low_lr": 7.063157894736843e-06, + "step": 1229 + }, + { + "epoch": 3.232084155161078, + "high_lr": 0.0003531578947368421, + "low_lr": 7.063157894736843e-06, + "step": 1229 + }, + { + "epoch": 3.2347140039447733, + "grad_norm": 1.4025167226791382, + "learning_rate": 0.0003526315789473684, + "loss": 1.2541, + "step": 1230 + }, + { + "epoch": 3.2347140039447733, + "high_lr": 0.0003526315789473684, + "low_lr": 7.052631578947369e-06, + "step": 1230 + }, + { + "epoch": 3.2347140039447733, + "high_lr": 0.0003526315789473684, + "low_lr": 7.052631578947369e-06, + "step": 1230 + }, + { + "epoch": 3.2347140039447733, + "high_lr": 0.0003526315789473684, + "low_lr": 7.052631578947369e-06, + "step": 1230 + }, + { + "epoch": 3.2347140039447733, + "high_lr": 0.0003526315789473684, + "low_lr": 7.052631578947369e-06, + "step": 1230 + }, + { + "epoch": 3.2347140039447733, + "high_lr": 0.0003526315789473684, + "low_lr": 7.052631578947369e-06, + "step": 1230 + }, + { + "epoch": 3.2347140039447733, + "high_lr": 0.0003526315789473684, + "low_lr": 7.052631578947369e-06, + "step": 1230 + }, + { + "epoch": 3.2347140039447733, + "high_lr": 0.0003526315789473684, + "low_lr": 7.052631578947369e-06, + "step": 1230 + }, + { + "epoch": 3.2347140039447733, + "high_lr": 0.0003526315789473684, + "low_lr": 7.052631578947369e-06, + "step": 1230 + }, + { + "epoch": 3.237343852728468, + "grad_norm": 1.487691879272461, + "learning_rate": 0.00035210526315789474, + "loss": 1.2767, + "step": 1231 + }, + { + "epoch": 3.237343852728468, + "high_lr": 0.00035210526315789474, + "low_lr": 7.0421052631578954e-06, + "step": 1231 + }, + { + "epoch": 3.237343852728468, + "high_lr": 0.00035210526315789474, + "low_lr": 7.0421052631578954e-06, + "step": 1231 + }, + { + "epoch": 3.237343852728468, + "high_lr": 0.00035210526315789474, + "low_lr": 7.0421052631578954e-06, + "step": 1231 + }, + { + "epoch": 3.237343852728468, + "high_lr": 0.00035210526315789474, + "low_lr": 7.0421052631578954e-06, + "step": 1231 + }, + { + "epoch": 3.237343852728468, + "high_lr": 0.00035210526315789474, + "low_lr": 7.0421052631578954e-06, + "step": 1231 + }, + { + "epoch": 3.237343852728468, + "high_lr": 0.00035210526315789474, + "low_lr": 7.0421052631578954e-06, + "step": 1231 + }, + { + "epoch": 3.237343852728468, + "high_lr": 0.00035210526315789474, + "low_lr": 7.0421052631578954e-06, + "step": 1231 + }, + { + "epoch": 3.237343852728468, + "high_lr": 0.00035210526315789474, + "low_lr": 7.0421052631578954e-06, + "step": 1231 + }, + { + "epoch": 3.239973701512163, + "grad_norm": 1.3063784837722778, + "learning_rate": 0.00035157894736842103, + "loss": 1.2583, + "step": 1232 + }, + { + "epoch": 3.239973701512163, + "high_lr": 0.00035157894736842103, + "low_lr": 7.031578947368422e-06, + "step": 1232 + }, + { + "epoch": 3.239973701512163, + "high_lr": 0.00035157894736842103, + "low_lr": 7.031578947368422e-06, + "step": 1232 + }, + { + "epoch": 3.239973701512163, + "high_lr": 0.00035157894736842103, + "low_lr": 7.031578947368422e-06, + "step": 1232 + }, + { + "epoch": 3.239973701512163, + "high_lr": 0.00035157894736842103, + "low_lr": 7.031578947368422e-06, + "step": 1232 + }, + { + "epoch": 3.239973701512163, + "high_lr": 0.00035157894736842103, + "low_lr": 7.031578947368422e-06, + "step": 1232 + }, + { + "epoch": 3.239973701512163, + "high_lr": 0.00035157894736842103, + "low_lr": 7.031578947368422e-06, + "step": 1232 + }, + { + "epoch": 3.239973701512163, + "high_lr": 0.00035157894736842103, + "low_lr": 7.031578947368422e-06, + "step": 1232 + }, + { + "epoch": 3.239973701512163, + "high_lr": 0.00035157894736842103, + "low_lr": 7.031578947368422e-06, + "step": 1232 + }, + { + "epoch": 3.242603550295858, + "grad_norm": 1.5102909803390503, + "learning_rate": 0.0003510526315789474, + "loss": 1.3413, + "step": 1233 + }, + { + "epoch": 3.242603550295858, + "high_lr": 0.0003510526315789474, + "low_lr": 7.021052631578948e-06, + "step": 1233 + }, + { + "epoch": 3.242603550295858, + "high_lr": 0.0003510526315789474, + "low_lr": 7.021052631578948e-06, + "step": 1233 + }, + { + "epoch": 3.242603550295858, + "high_lr": 0.0003510526315789474, + "low_lr": 7.021052631578948e-06, + "step": 1233 + }, + { + "epoch": 3.242603550295858, + "high_lr": 0.0003510526315789474, + "low_lr": 7.021052631578948e-06, + "step": 1233 + }, + { + "epoch": 3.242603550295858, + "high_lr": 0.0003510526315789474, + "low_lr": 7.021052631578948e-06, + "step": 1233 + }, + { + "epoch": 3.242603550295858, + "high_lr": 0.0003510526315789474, + "low_lr": 7.021052631578948e-06, + "step": 1233 + }, + { + "epoch": 3.242603550295858, + "high_lr": 0.0003510526315789474, + "low_lr": 7.021052631578948e-06, + "step": 1233 + }, + { + "epoch": 3.242603550295858, + "high_lr": 0.0003510526315789474, + "low_lr": 7.021052631578948e-06, + "step": 1233 + }, + { + "epoch": 3.2452333990795528, + "grad_norm": 1.4315178394317627, + "learning_rate": 0.0003505263157894737, + "loss": 1.3391, + "step": 1234 + }, + { + "epoch": 3.2452333990795528, + "high_lr": 0.0003505263157894737, + "low_lr": 7.010526315789474e-06, + "step": 1234 + }, + { + "epoch": 3.2452333990795528, + "high_lr": 0.0003505263157894737, + "low_lr": 7.010526315789474e-06, + "step": 1234 + }, + { + "epoch": 3.2452333990795528, + "high_lr": 0.0003505263157894737, + "low_lr": 7.010526315789474e-06, + "step": 1234 + }, + { + "epoch": 3.2452333990795528, + "high_lr": 0.0003505263157894737, + "low_lr": 7.010526315789474e-06, + "step": 1234 + }, + { + "epoch": 3.2452333990795528, + "high_lr": 0.0003505263157894737, + "low_lr": 7.010526315789474e-06, + "step": 1234 + }, + { + "epoch": 3.2452333990795528, + "high_lr": 0.0003505263157894737, + "low_lr": 7.010526315789474e-06, + "step": 1234 + }, + { + "epoch": 3.2452333990795528, + "high_lr": 0.0003505263157894737, + "low_lr": 7.010526315789474e-06, + "step": 1234 + }, + { + "epoch": 3.2452333990795528, + "high_lr": 0.0003505263157894737, + "low_lr": 7.010526315789474e-06, + "step": 1234 + }, + { + "epoch": 3.247863247863248, + "grad_norm": 1.4550777673721313, + "learning_rate": 0.00035, + "loss": 1.2911, + "step": 1235 + }, + { + "epoch": 3.247863247863248, + "high_lr": 0.00035, + "low_lr": 7e-06, + "step": 1235 + }, + { + "epoch": 3.247863247863248, + "high_lr": 0.00035, + "low_lr": 7e-06, + "step": 1235 + }, + { + "epoch": 3.247863247863248, + "high_lr": 0.00035, + "low_lr": 7e-06, + "step": 1235 + }, + { + "epoch": 3.247863247863248, + "high_lr": 0.00035, + "low_lr": 7e-06, + "step": 1235 + }, + { + "epoch": 3.247863247863248, + "high_lr": 0.00035, + "low_lr": 7e-06, + "step": 1235 + }, + { + "epoch": 3.247863247863248, + "high_lr": 0.00035, + "low_lr": 7e-06, + "step": 1235 + }, + { + "epoch": 3.247863247863248, + "high_lr": 0.00035, + "low_lr": 7e-06, + "step": 1235 + }, + { + "epoch": 3.247863247863248, + "high_lr": 0.00035, + "low_lr": 7e-06, + "step": 1235 + }, + { + "epoch": 3.250493096646943, + "grad_norm": 1.4469130039215088, + "learning_rate": 0.00034947368421052634, + "loss": 1.3573, + "step": 1236 + }, + { + "epoch": 3.250493096646943, + "high_lr": 0.00034947368421052634, + "low_lr": 6.989473684210527e-06, + "step": 1236 + }, + { + "epoch": 3.250493096646943, + "high_lr": 0.00034947368421052634, + "low_lr": 6.989473684210527e-06, + "step": 1236 + }, + { + "epoch": 3.250493096646943, + "high_lr": 0.00034947368421052634, + "low_lr": 6.989473684210527e-06, + "step": 1236 + }, + { + "epoch": 3.250493096646943, + "high_lr": 0.00034947368421052634, + "low_lr": 6.989473684210527e-06, + "step": 1236 + }, + { + "epoch": 3.250493096646943, + "high_lr": 0.00034947368421052634, + "low_lr": 6.989473684210527e-06, + "step": 1236 + }, + { + "epoch": 3.250493096646943, + "high_lr": 0.00034947368421052634, + "low_lr": 6.989473684210527e-06, + "step": 1236 + }, + { + "epoch": 3.250493096646943, + "high_lr": 0.00034947368421052634, + "low_lr": 6.989473684210527e-06, + "step": 1236 + }, + { + "epoch": 3.250493096646943, + "high_lr": 0.00034947368421052634, + "low_lr": 6.989473684210527e-06, + "step": 1236 + }, + { + "epoch": 3.2531229454306376, + "grad_norm": 1.4454048871994019, + "learning_rate": 0.0003489473684210526, + "loss": 1.3024, + "step": 1237 + }, + { + "epoch": 3.2531229454306376, + "high_lr": 0.0003489473684210526, + "low_lr": 6.9789473684210525e-06, + "step": 1237 + }, + { + "epoch": 3.2531229454306376, + "high_lr": 0.0003489473684210526, + "low_lr": 6.9789473684210525e-06, + "step": 1237 + }, + { + "epoch": 3.2531229454306376, + "high_lr": 0.0003489473684210526, + "low_lr": 6.9789473684210525e-06, + "step": 1237 + }, + { + "epoch": 3.2531229454306376, + "high_lr": 0.0003489473684210526, + "low_lr": 6.9789473684210525e-06, + "step": 1237 + }, + { + "epoch": 3.2531229454306376, + "high_lr": 0.0003489473684210526, + "low_lr": 6.9789473684210525e-06, + "step": 1237 + }, + { + "epoch": 3.2531229454306376, + "high_lr": 0.0003489473684210526, + "low_lr": 6.9789473684210525e-06, + "step": 1237 + }, + { + "epoch": 3.2531229454306376, + "high_lr": 0.0003489473684210526, + "low_lr": 6.9789473684210525e-06, + "step": 1237 + }, + { + "epoch": 3.2531229454306376, + "high_lr": 0.0003489473684210526, + "low_lr": 6.9789473684210525e-06, + "step": 1237 + }, + { + "epoch": 3.2557527942143327, + "grad_norm": 1.3726730346679688, + "learning_rate": 0.00034842105263157896, + "loss": 1.3433, + "step": 1238 + }, + { + "epoch": 3.2557527942143327, + "high_lr": 0.00034842105263157896, + "low_lr": 6.96842105263158e-06, + "step": 1238 + }, + { + "epoch": 3.2557527942143327, + "high_lr": 0.00034842105263157896, + "low_lr": 6.96842105263158e-06, + "step": 1238 + }, + { + "epoch": 3.2557527942143327, + "high_lr": 0.00034842105263157896, + "low_lr": 6.96842105263158e-06, + "step": 1238 + }, + { + "epoch": 3.2557527942143327, + "high_lr": 0.00034842105263157896, + "low_lr": 6.96842105263158e-06, + "step": 1238 + }, + { + "epoch": 3.2557527942143327, + "high_lr": 0.00034842105263157896, + "low_lr": 6.96842105263158e-06, + "step": 1238 + }, + { + "epoch": 3.2557527942143327, + "high_lr": 0.00034842105263157896, + "low_lr": 6.96842105263158e-06, + "step": 1238 + }, + { + "epoch": 3.2557527942143327, + "high_lr": 0.00034842105263157896, + "low_lr": 6.96842105263158e-06, + "step": 1238 + }, + { + "epoch": 3.2557527942143327, + "high_lr": 0.00034842105263157896, + "low_lr": 6.96842105263158e-06, + "step": 1238 + }, + { + "epoch": 3.2583826429980274, + "grad_norm": 1.4009768962860107, + "learning_rate": 0.00034789473684210525, + "loss": 1.3129, + "step": 1239 + }, + { + "epoch": 3.2583826429980274, + "high_lr": 0.00034789473684210525, + "low_lr": 6.957894736842106e-06, + "step": 1239 + }, + { + "epoch": 3.2583826429980274, + "high_lr": 0.00034789473684210525, + "low_lr": 6.957894736842106e-06, + "step": 1239 + }, + { + "epoch": 3.2583826429980274, + "high_lr": 0.00034789473684210525, + "low_lr": 6.957894736842106e-06, + "step": 1239 + }, + { + "epoch": 3.2583826429980274, + "high_lr": 0.00034789473684210525, + "low_lr": 6.957894736842106e-06, + "step": 1239 + }, + { + "epoch": 3.2583826429980274, + "high_lr": 0.00034789473684210525, + "low_lr": 6.957894736842106e-06, + "step": 1239 + }, + { + "epoch": 3.2583826429980274, + "high_lr": 0.00034789473684210525, + "low_lr": 6.957894736842106e-06, + "step": 1239 + }, + { + "epoch": 3.2583826429980274, + "high_lr": 0.00034789473684210525, + "low_lr": 6.957894736842106e-06, + "step": 1239 + }, + { + "epoch": 3.2583826429980274, + "high_lr": 0.00034789473684210525, + "low_lr": 6.957894736842106e-06, + "step": 1239 + }, + { + "epoch": 3.2610124917817225, + "grad_norm": 1.4004924297332764, + "learning_rate": 0.0003473684210526316, + "loss": 1.2838, + "step": 1240 + }, + { + "epoch": 3.2610124917817225, + "high_lr": 0.0003473684210526316, + "low_lr": 6.947368421052632e-06, + "step": 1240 + }, + { + "epoch": 3.2610124917817225, + "high_lr": 0.0003473684210526316, + "low_lr": 6.947368421052632e-06, + "step": 1240 + }, + { + "epoch": 3.2610124917817225, + "high_lr": 0.0003473684210526316, + "low_lr": 6.947368421052632e-06, + "step": 1240 + }, + { + "epoch": 3.2610124917817225, + "high_lr": 0.0003473684210526316, + "low_lr": 6.947368421052632e-06, + "step": 1240 + }, + { + "epoch": 3.2610124917817225, + "high_lr": 0.0003473684210526316, + "low_lr": 6.947368421052632e-06, + "step": 1240 + }, + { + "epoch": 3.2610124917817225, + "high_lr": 0.0003473684210526316, + "low_lr": 6.947368421052632e-06, + "step": 1240 + }, + { + "epoch": 3.2610124917817225, + "high_lr": 0.0003473684210526316, + "low_lr": 6.947368421052632e-06, + "step": 1240 + }, + { + "epoch": 3.2610124917817225, + "high_lr": 0.0003473684210526316, + "low_lr": 6.947368421052632e-06, + "step": 1240 + }, + { + "epoch": 3.2636423405654176, + "grad_norm": 1.3671832084655762, + "learning_rate": 0.00034684210526315793, + "loss": 1.2908, + "step": 1241 + }, + { + "epoch": 3.2636423405654176, + "high_lr": 0.00034684210526315793, + "low_lr": 6.936842105263159e-06, + "step": 1241 + }, + { + "epoch": 3.2636423405654176, + "high_lr": 0.00034684210526315793, + "low_lr": 6.936842105263159e-06, + "step": 1241 + }, + { + "epoch": 3.2636423405654176, + "high_lr": 0.00034684210526315793, + "low_lr": 6.936842105263159e-06, + "step": 1241 + }, + { + "epoch": 3.2636423405654176, + "high_lr": 0.00034684210526315793, + "low_lr": 6.936842105263159e-06, + "step": 1241 + }, + { + "epoch": 3.2636423405654176, + "high_lr": 0.00034684210526315793, + "low_lr": 6.936842105263159e-06, + "step": 1241 + }, + { + "epoch": 3.2636423405654176, + "high_lr": 0.00034684210526315793, + "low_lr": 6.936842105263159e-06, + "step": 1241 + }, + { + "epoch": 3.2636423405654176, + "high_lr": 0.00034684210526315793, + "low_lr": 6.936842105263159e-06, + "step": 1241 + }, + { + "epoch": 3.2636423405654176, + "high_lr": 0.00034684210526315793, + "low_lr": 6.936842105263159e-06, + "step": 1241 + }, + { + "epoch": 3.2662721893491122, + "grad_norm": 1.3692514896392822, + "learning_rate": 0.0003463157894736842, + "loss": 1.3547, + "step": 1242 + }, + { + "epoch": 3.2662721893491122, + "high_lr": 0.0003463157894736842, + "low_lr": 6.926315789473684e-06, + "step": 1242 + }, + { + "epoch": 3.2662721893491122, + "high_lr": 0.0003463157894736842, + "low_lr": 6.926315789473684e-06, + "step": 1242 + }, + { + "epoch": 3.2662721893491122, + "high_lr": 0.0003463157894736842, + "low_lr": 6.926315789473684e-06, + "step": 1242 + }, + { + "epoch": 3.2662721893491122, + "high_lr": 0.0003463157894736842, + "low_lr": 6.926315789473684e-06, + "step": 1242 + }, + { + "epoch": 3.2662721893491122, + "high_lr": 0.0003463157894736842, + "low_lr": 6.926315789473684e-06, + "step": 1242 + }, + { + "epoch": 3.2662721893491122, + "high_lr": 0.0003463157894736842, + "low_lr": 6.926315789473684e-06, + "step": 1242 + }, + { + "epoch": 3.2662721893491122, + "high_lr": 0.0003463157894736842, + "low_lr": 6.926315789473684e-06, + "step": 1242 + }, + { + "epoch": 3.2662721893491122, + "high_lr": 0.0003463157894736842, + "low_lr": 6.926315789473684e-06, + "step": 1242 + }, + { + "epoch": 3.2689020381328073, + "grad_norm": 1.3831573724746704, + "learning_rate": 0.00034578947368421055, + "loss": 1.3195, + "step": 1243 + }, + { + "epoch": 3.2689020381328073, + "high_lr": 0.00034578947368421055, + "low_lr": 6.915789473684211e-06, + "step": 1243 + }, + { + "epoch": 3.2689020381328073, + "high_lr": 0.00034578947368421055, + "low_lr": 6.915789473684211e-06, + "step": 1243 + }, + { + "epoch": 3.2689020381328073, + "high_lr": 0.00034578947368421055, + "low_lr": 6.915789473684211e-06, + "step": 1243 + }, + { + "epoch": 3.2689020381328073, + "high_lr": 0.00034578947368421055, + "low_lr": 6.915789473684211e-06, + "step": 1243 + }, + { + "epoch": 3.2689020381328073, + "high_lr": 0.00034578947368421055, + "low_lr": 6.915789473684211e-06, + "step": 1243 + }, + { + "epoch": 3.2689020381328073, + "high_lr": 0.00034578947368421055, + "low_lr": 6.915789473684211e-06, + "step": 1243 + }, + { + "epoch": 3.2689020381328073, + "high_lr": 0.00034578947368421055, + "low_lr": 6.915789473684211e-06, + "step": 1243 + }, + { + "epoch": 3.2689020381328073, + "high_lr": 0.00034578947368421055, + "low_lr": 6.915789473684211e-06, + "step": 1243 + }, + { + "epoch": 3.2715318869165024, + "grad_norm": 1.3399865627288818, + "learning_rate": 0.00034526315789473684, + "loss": 1.3231, + "step": 1244 + }, + { + "epoch": 3.2715318869165024, + "high_lr": 0.00034526315789473684, + "low_lr": 6.905263157894737e-06, + "step": 1244 + }, + { + "epoch": 3.2715318869165024, + "high_lr": 0.00034526315789473684, + "low_lr": 6.905263157894737e-06, + "step": 1244 + }, + { + "epoch": 3.2715318869165024, + "high_lr": 0.00034526315789473684, + "low_lr": 6.905263157894737e-06, + "step": 1244 + }, + { + "epoch": 3.2715318869165024, + "high_lr": 0.00034526315789473684, + "low_lr": 6.905263157894737e-06, + "step": 1244 + }, + { + "epoch": 3.2715318869165024, + "high_lr": 0.00034526315789473684, + "low_lr": 6.905263157894737e-06, + "step": 1244 + }, + { + "epoch": 3.2715318869165024, + "high_lr": 0.00034526315789473684, + "low_lr": 6.905263157894737e-06, + "step": 1244 + }, + { + "epoch": 3.2715318869165024, + "high_lr": 0.00034526315789473684, + "low_lr": 6.905263157894737e-06, + "step": 1244 + }, + { + "epoch": 3.2715318869165024, + "high_lr": 0.00034526315789473684, + "low_lr": 6.905263157894737e-06, + "step": 1244 + }, + { + "epoch": 3.274161735700197, + "grad_norm": 1.346484661102295, + "learning_rate": 0.0003447368421052632, + "loss": 1.2596, + "step": 1245 + }, + { + "epoch": 3.274161735700197, + "high_lr": 0.0003447368421052632, + "low_lr": 6.894736842105264e-06, + "step": 1245 + }, + { + "epoch": 3.274161735700197, + "high_lr": 0.0003447368421052632, + "low_lr": 6.894736842105264e-06, + "step": 1245 + }, + { + "epoch": 3.274161735700197, + "high_lr": 0.0003447368421052632, + "low_lr": 6.894736842105264e-06, + "step": 1245 + }, + { + "epoch": 3.274161735700197, + "high_lr": 0.0003447368421052632, + "low_lr": 6.894736842105264e-06, + "step": 1245 + }, + { + "epoch": 3.274161735700197, + "high_lr": 0.0003447368421052632, + "low_lr": 6.894736842105264e-06, + "step": 1245 + }, + { + "epoch": 3.274161735700197, + "high_lr": 0.0003447368421052632, + "low_lr": 6.894736842105264e-06, + "step": 1245 + }, + { + "epoch": 3.274161735700197, + "high_lr": 0.0003447368421052632, + "low_lr": 6.894736842105264e-06, + "step": 1245 + }, + { + "epoch": 3.274161735700197, + "high_lr": 0.0003447368421052632, + "low_lr": 6.894736842105264e-06, + "step": 1245 + }, + { + "epoch": 3.276791584483892, + "grad_norm": 1.3539178371429443, + "learning_rate": 0.00034421052631578947, + "loss": 1.3023, + "step": 1246 + }, + { + "epoch": 3.276791584483892, + "high_lr": 0.00034421052631578947, + "low_lr": 6.8842105263157895e-06, + "step": 1246 + }, + { + "epoch": 3.276791584483892, + "high_lr": 0.00034421052631578947, + "low_lr": 6.8842105263157895e-06, + "step": 1246 + }, + { + "epoch": 3.276791584483892, + "high_lr": 0.00034421052631578947, + "low_lr": 6.8842105263157895e-06, + "step": 1246 + }, + { + "epoch": 3.276791584483892, + "high_lr": 0.00034421052631578947, + "low_lr": 6.8842105263157895e-06, + "step": 1246 + }, + { + "epoch": 3.276791584483892, + "high_lr": 0.00034421052631578947, + "low_lr": 6.8842105263157895e-06, + "step": 1246 + }, + { + "epoch": 3.276791584483892, + "high_lr": 0.00034421052631578947, + "low_lr": 6.8842105263157895e-06, + "step": 1246 + }, + { + "epoch": 3.276791584483892, + "high_lr": 0.00034421052631578947, + "low_lr": 6.8842105263157895e-06, + "step": 1246 + }, + { + "epoch": 3.276791584483892, + "high_lr": 0.00034421052631578947, + "low_lr": 6.8842105263157895e-06, + "step": 1246 + }, + { + "epoch": 3.2794214332675873, + "grad_norm": 1.3168984651565552, + "learning_rate": 0.0003436842105263158, + "loss": 1.3064, + "step": 1247 + }, + { + "epoch": 3.2794214332675873, + "high_lr": 0.0003436842105263158, + "low_lr": 6.873684210526317e-06, + "step": 1247 + }, + { + "epoch": 3.2794214332675873, + "high_lr": 0.0003436842105263158, + "low_lr": 6.873684210526317e-06, + "step": 1247 + }, + { + "epoch": 3.2794214332675873, + "high_lr": 0.0003436842105263158, + "low_lr": 6.873684210526317e-06, + "step": 1247 + }, + { + "epoch": 3.2794214332675873, + "high_lr": 0.0003436842105263158, + "low_lr": 6.873684210526317e-06, + "step": 1247 + }, + { + "epoch": 3.2794214332675873, + "high_lr": 0.0003436842105263158, + "low_lr": 6.873684210526317e-06, + "step": 1247 + }, + { + "epoch": 3.2794214332675873, + "high_lr": 0.0003436842105263158, + "low_lr": 6.873684210526317e-06, + "step": 1247 + }, + { + "epoch": 3.2794214332675873, + "high_lr": 0.0003436842105263158, + "low_lr": 6.873684210526317e-06, + "step": 1247 + }, + { + "epoch": 3.2794214332675873, + "high_lr": 0.0003436842105263158, + "low_lr": 6.873684210526317e-06, + "step": 1247 + }, + { + "epoch": 3.282051282051282, + "grad_norm": 1.372035264968872, + "learning_rate": 0.0003431578947368421, + "loss": 1.3217, + "step": 1248 + }, + { + "epoch": 3.282051282051282, + "high_lr": 0.0003431578947368421, + "low_lr": 6.863157894736843e-06, + "step": 1248 + }, + { + "epoch": 3.282051282051282, + "high_lr": 0.0003431578947368421, + "low_lr": 6.863157894736843e-06, + "step": 1248 + }, + { + "epoch": 3.282051282051282, + "high_lr": 0.0003431578947368421, + "low_lr": 6.863157894736843e-06, + "step": 1248 + }, + { + "epoch": 3.282051282051282, + "high_lr": 0.0003431578947368421, + "low_lr": 6.863157894736843e-06, + "step": 1248 + }, + { + "epoch": 3.282051282051282, + "high_lr": 0.0003431578947368421, + "low_lr": 6.863157894736843e-06, + "step": 1248 + }, + { + "epoch": 3.282051282051282, + "high_lr": 0.0003431578947368421, + "low_lr": 6.863157894736843e-06, + "step": 1248 + }, + { + "epoch": 3.282051282051282, + "high_lr": 0.0003431578947368421, + "low_lr": 6.863157894736843e-06, + "step": 1248 + }, + { + "epoch": 3.282051282051282, + "high_lr": 0.0003431578947368421, + "low_lr": 6.863157894736843e-06, + "step": 1248 + }, + { + "epoch": 3.284681130834977, + "grad_norm": 1.3251991271972656, + "learning_rate": 0.0003426315789473684, + "loss": 1.3082, + "step": 1249 + }, + { + "epoch": 3.284681130834977, + "high_lr": 0.0003426315789473684, + "low_lr": 6.8526315789473685e-06, + "step": 1249 + }, + { + "epoch": 3.284681130834977, + "high_lr": 0.0003426315789473684, + "low_lr": 6.8526315789473685e-06, + "step": 1249 + }, + { + "epoch": 3.284681130834977, + "high_lr": 0.0003426315789473684, + "low_lr": 6.8526315789473685e-06, + "step": 1249 + }, + { + "epoch": 3.284681130834977, + "high_lr": 0.0003426315789473684, + "low_lr": 6.8526315789473685e-06, + "step": 1249 + }, + { + "epoch": 3.284681130834977, + "high_lr": 0.0003426315789473684, + "low_lr": 6.8526315789473685e-06, + "step": 1249 + }, + { + "epoch": 3.284681130834977, + "high_lr": 0.0003426315789473684, + "low_lr": 6.8526315789473685e-06, + "step": 1249 + }, + { + "epoch": 3.284681130834977, + "high_lr": 0.0003426315789473684, + "low_lr": 6.8526315789473685e-06, + "step": 1249 + }, + { + "epoch": 3.284681130834977, + "high_lr": 0.0003426315789473684, + "low_lr": 6.8526315789473685e-06, + "step": 1249 + }, + { + "epoch": 3.287310979618672, + "grad_norm": 1.2727844715118408, + "learning_rate": 0.00034210526315789477, + "loss": 1.3121, + "step": 1250 + }, + { + "epoch": 3.287310979618672, + "high_lr": 0.00034210526315789477, + "low_lr": 6.842105263157896e-06, + "step": 1250 + }, + { + "epoch": 3.287310979618672, + "high_lr": 0.00034210526315789477, + "low_lr": 6.842105263157896e-06, + "step": 1250 + }, + { + "epoch": 3.287310979618672, + "high_lr": 0.00034210526315789477, + "low_lr": 6.842105263157896e-06, + "step": 1250 + }, + { + "epoch": 3.287310979618672, + "high_lr": 0.00034210526315789477, + "low_lr": 6.842105263157896e-06, + "step": 1250 + }, + { + "epoch": 3.287310979618672, + "high_lr": 0.00034210526315789477, + "low_lr": 6.842105263157896e-06, + "step": 1250 + }, + { + "epoch": 3.287310979618672, + "high_lr": 0.00034210526315789477, + "low_lr": 6.842105263157896e-06, + "step": 1250 + }, + { + "epoch": 3.287310979618672, + "high_lr": 0.00034210526315789477, + "low_lr": 6.842105263157896e-06, + "step": 1250 + }, + { + "epoch": 3.287310979618672, + "high_lr": 0.00034210526315789477, + "low_lr": 6.842105263157896e-06, + "step": 1250 + }, + { + "epoch": 3.289940828402367, + "grad_norm": 1.456199049949646, + "learning_rate": 0.00034157894736842106, + "loss": 1.323, + "step": 1251 + }, + { + "epoch": 3.289940828402367, + "high_lr": 0.00034157894736842106, + "low_lr": 6.831578947368421e-06, + "step": 1251 + }, + { + "epoch": 3.289940828402367, + "high_lr": 0.00034157894736842106, + "low_lr": 6.831578947368421e-06, + "step": 1251 + }, + { + "epoch": 3.289940828402367, + "high_lr": 0.00034157894736842106, + "low_lr": 6.831578947368421e-06, + "step": 1251 + }, + { + "epoch": 3.289940828402367, + "high_lr": 0.00034157894736842106, + "low_lr": 6.831578947368421e-06, + "step": 1251 + }, + { + "epoch": 3.289940828402367, + "high_lr": 0.00034157894736842106, + "low_lr": 6.831578947368421e-06, + "step": 1251 + }, + { + "epoch": 3.289940828402367, + "high_lr": 0.00034157894736842106, + "low_lr": 6.831578947368421e-06, + "step": 1251 + }, + { + "epoch": 3.289940828402367, + "high_lr": 0.00034157894736842106, + "low_lr": 6.831578947368421e-06, + "step": 1251 + }, + { + "epoch": 3.289940828402367, + "high_lr": 0.00034157894736842106, + "low_lr": 6.831578947368421e-06, + "step": 1251 + }, + { + "epoch": 3.292570677186062, + "grad_norm": 1.4075325727462769, + "learning_rate": 0.0003410526315789474, + "loss": 1.3247, + "step": 1252 + }, + { + "epoch": 3.292570677186062, + "high_lr": 0.0003410526315789474, + "low_lr": 6.821052631578948e-06, + "step": 1252 + }, + { + "epoch": 3.292570677186062, + "high_lr": 0.0003410526315789474, + "low_lr": 6.821052631578948e-06, + "step": 1252 + }, + { + "epoch": 3.292570677186062, + "high_lr": 0.0003410526315789474, + "low_lr": 6.821052631578948e-06, + "step": 1252 + }, + { + "epoch": 3.292570677186062, + "high_lr": 0.0003410526315789474, + "low_lr": 6.821052631578948e-06, + "step": 1252 + }, + { + "epoch": 3.292570677186062, + "high_lr": 0.0003410526315789474, + "low_lr": 6.821052631578948e-06, + "step": 1252 + }, + { + "epoch": 3.292570677186062, + "high_lr": 0.0003410526315789474, + "low_lr": 6.821052631578948e-06, + "step": 1252 + }, + { + "epoch": 3.292570677186062, + "high_lr": 0.0003410526315789474, + "low_lr": 6.821052631578948e-06, + "step": 1252 + }, + { + "epoch": 3.292570677186062, + "high_lr": 0.0003410526315789474, + "low_lr": 6.821052631578948e-06, + "step": 1252 + }, + { + "epoch": 3.295200525969757, + "grad_norm": 1.3012107610702515, + "learning_rate": 0.0003405263157894737, + "loss": 1.3016, + "step": 1253 + }, + { + "epoch": 3.295200525969757, + "high_lr": 0.0003405263157894737, + "low_lr": 6.810526315789474e-06, + "step": 1253 + }, + { + "epoch": 3.295200525969757, + "high_lr": 0.0003405263157894737, + "low_lr": 6.810526315789474e-06, + "step": 1253 + }, + { + "epoch": 3.295200525969757, + "high_lr": 0.0003405263157894737, + "low_lr": 6.810526315789474e-06, + "step": 1253 + }, + { + "epoch": 3.295200525969757, + "high_lr": 0.0003405263157894737, + "low_lr": 6.810526315789474e-06, + "step": 1253 + }, + { + "epoch": 3.295200525969757, + "high_lr": 0.0003405263157894737, + "low_lr": 6.810526315789474e-06, + "step": 1253 + }, + { + "epoch": 3.295200525969757, + "high_lr": 0.0003405263157894737, + "low_lr": 6.810526315789474e-06, + "step": 1253 + }, + { + "epoch": 3.295200525969757, + "high_lr": 0.0003405263157894737, + "low_lr": 6.810526315789474e-06, + "step": 1253 + }, + { + "epoch": 3.295200525969757, + "high_lr": 0.0003405263157894737, + "low_lr": 6.810526315789474e-06, + "step": 1253 + }, + { + "epoch": 3.2978303747534516, + "grad_norm": 1.4447226524353027, + "learning_rate": 0.00034, + "loss": 1.3979, + "step": 1254 + }, + { + "epoch": 3.2978303747534516, + "high_lr": 0.00034, + "low_lr": 6.800000000000001e-06, + "step": 1254 + }, + { + "epoch": 3.2978303747534516, + "high_lr": 0.00034, + "low_lr": 6.800000000000001e-06, + "step": 1254 + }, + { + "epoch": 3.2978303747534516, + "high_lr": 0.00034, + "low_lr": 6.800000000000001e-06, + "step": 1254 + }, + { + "epoch": 3.2978303747534516, + "high_lr": 0.00034, + "low_lr": 6.800000000000001e-06, + "step": 1254 + }, + { + "epoch": 3.2978303747534516, + "high_lr": 0.00034, + "low_lr": 6.800000000000001e-06, + "step": 1254 + }, + { + "epoch": 3.2978303747534516, + "high_lr": 0.00034, + "low_lr": 6.800000000000001e-06, + "step": 1254 + }, + { + "epoch": 3.2978303747534516, + "high_lr": 0.00034, + "low_lr": 6.800000000000001e-06, + "step": 1254 + }, + { + "epoch": 3.2978303747534516, + "high_lr": 0.00034, + "low_lr": 6.800000000000001e-06, + "step": 1254 + }, + { + "epoch": 3.3004602235371467, + "grad_norm": 1.4133968353271484, + "learning_rate": 0.0003394736842105263, + "loss": 1.3364, + "step": 1255 + }, + { + "epoch": 3.3004602235371467, + "high_lr": 0.0003394736842105263, + "low_lr": 6.789473684210527e-06, + "step": 1255 + }, + { + "epoch": 3.3004602235371467, + "high_lr": 0.0003394736842105263, + "low_lr": 6.789473684210527e-06, + "step": 1255 + }, + { + "epoch": 3.3004602235371467, + "high_lr": 0.0003394736842105263, + "low_lr": 6.789473684210527e-06, + "step": 1255 + }, + { + "epoch": 3.3004602235371467, + "high_lr": 0.0003394736842105263, + "low_lr": 6.789473684210527e-06, + "step": 1255 + }, + { + "epoch": 3.3004602235371467, + "high_lr": 0.0003394736842105263, + "low_lr": 6.789473684210527e-06, + "step": 1255 + }, + { + "epoch": 3.3004602235371467, + "high_lr": 0.0003394736842105263, + "low_lr": 6.789473684210527e-06, + "step": 1255 + }, + { + "epoch": 3.3004602235371467, + "high_lr": 0.0003394736842105263, + "low_lr": 6.789473684210527e-06, + "step": 1255 + }, + { + "epoch": 3.3004602235371467, + "high_lr": 0.0003394736842105263, + "low_lr": 6.789473684210527e-06, + "step": 1255 + }, + { + "epoch": 3.3030900723208414, + "grad_norm": 1.3877695798873901, + "learning_rate": 0.0003389473684210526, + "loss": 1.3226, + "step": 1256 + }, + { + "epoch": 3.3030900723208414, + "high_lr": 0.0003389473684210526, + "low_lr": 6.778947368421053e-06, + "step": 1256 + }, + { + "epoch": 3.3030900723208414, + "high_lr": 0.0003389473684210526, + "low_lr": 6.778947368421053e-06, + "step": 1256 + }, + { + "epoch": 3.3030900723208414, + "high_lr": 0.0003389473684210526, + "low_lr": 6.778947368421053e-06, + "step": 1256 + }, + { + "epoch": 3.3030900723208414, + "high_lr": 0.0003389473684210526, + "low_lr": 6.778947368421053e-06, + "step": 1256 + }, + { + "epoch": 3.3030900723208414, + "high_lr": 0.0003389473684210526, + "low_lr": 6.778947368421053e-06, + "step": 1256 + }, + { + "epoch": 3.3030900723208414, + "high_lr": 0.0003389473684210526, + "low_lr": 6.778947368421053e-06, + "step": 1256 + }, + { + "epoch": 3.3030900723208414, + "high_lr": 0.0003389473684210526, + "low_lr": 6.778947368421053e-06, + "step": 1256 + }, + { + "epoch": 3.3030900723208414, + "high_lr": 0.0003389473684210526, + "low_lr": 6.778947368421053e-06, + "step": 1256 + }, + { + "epoch": 3.3057199211045365, + "grad_norm": 1.4301080703735352, + "learning_rate": 0.00033842105263157894, + "loss": 1.343, + "step": 1257 + }, + { + "epoch": 3.3057199211045365, + "high_lr": 0.00033842105263157894, + "low_lr": 6.76842105263158e-06, + "step": 1257 + }, + { + "epoch": 3.3057199211045365, + "high_lr": 0.00033842105263157894, + "low_lr": 6.76842105263158e-06, + "step": 1257 + }, + { + "epoch": 3.3057199211045365, + "high_lr": 0.00033842105263157894, + "low_lr": 6.76842105263158e-06, + "step": 1257 + }, + { + "epoch": 3.3057199211045365, + "high_lr": 0.00033842105263157894, + "low_lr": 6.76842105263158e-06, + "step": 1257 + }, + { + "epoch": 3.3057199211045365, + "high_lr": 0.00033842105263157894, + "low_lr": 6.76842105263158e-06, + "step": 1257 + }, + { + "epoch": 3.3057199211045365, + "high_lr": 0.00033842105263157894, + "low_lr": 6.76842105263158e-06, + "step": 1257 + }, + { + "epoch": 3.3057199211045365, + "high_lr": 0.00033842105263157894, + "low_lr": 6.76842105263158e-06, + "step": 1257 + }, + { + "epoch": 3.3057199211045365, + "high_lr": 0.00033842105263157894, + "low_lr": 6.76842105263158e-06, + "step": 1257 + }, + { + "epoch": 3.3083497698882316, + "grad_norm": 1.403597116470337, + "learning_rate": 0.0003378947368421053, + "loss": 1.3917, + "step": 1258 + }, + { + "epoch": 3.3083497698882316, + "high_lr": 0.0003378947368421053, + "low_lr": 6.7578947368421054e-06, + "step": 1258 + }, + { + "epoch": 3.3083497698882316, + "high_lr": 0.0003378947368421053, + "low_lr": 6.7578947368421054e-06, + "step": 1258 + }, + { + "epoch": 3.3083497698882316, + "high_lr": 0.0003378947368421053, + "low_lr": 6.7578947368421054e-06, + "step": 1258 + }, + { + "epoch": 3.3083497698882316, + "high_lr": 0.0003378947368421053, + "low_lr": 6.7578947368421054e-06, + "step": 1258 + }, + { + "epoch": 3.3083497698882316, + "high_lr": 0.0003378947368421053, + "low_lr": 6.7578947368421054e-06, + "step": 1258 + }, + { + "epoch": 3.3083497698882316, + "high_lr": 0.0003378947368421053, + "low_lr": 6.7578947368421054e-06, + "step": 1258 + }, + { + "epoch": 3.3083497698882316, + "high_lr": 0.0003378947368421053, + "low_lr": 6.7578947368421054e-06, + "step": 1258 + }, + { + "epoch": 3.3083497698882316, + "high_lr": 0.0003378947368421053, + "low_lr": 6.7578947368421054e-06, + "step": 1258 + }, + { + "epoch": 3.3109796186719263, + "grad_norm": 1.4304726123809814, + "learning_rate": 0.0003373684210526316, + "loss": 1.2391, + "step": 1259 + }, + { + "epoch": 3.3109796186719263, + "high_lr": 0.0003373684210526316, + "low_lr": 6.747368421052633e-06, + "step": 1259 + }, + { + "epoch": 3.3109796186719263, + "high_lr": 0.0003373684210526316, + "low_lr": 6.747368421052633e-06, + "step": 1259 + }, + { + "epoch": 3.3109796186719263, + "high_lr": 0.0003373684210526316, + "low_lr": 6.747368421052633e-06, + "step": 1259 + }, + { + "epoch": 3.3109796186719263, + "high_lr": 0.0003373684210526316, + "low_lr": 6.747368421052633e-06, + "step": 1259 + }, + { + "epoch": 3.3109796186719263, + "high_lr": 0.0003373684210526316, + "low_lr": 6.747368421052633e-06, + "step": 1259 + }, + { + "epoch": 3.3109796186719263, + "high_lr": 0.0003373684210526316, + "low_lr": 6.747368421052633e-06, + "step": 1259 + }, + { + "epoch": 3.3109796186719263, + "high_lr": 0.0003373684210526316, + "low_lr": 6.747368421052633e-06, + "step": 1259 + }, + { + "epoch": 3.3109796186719263, + "high_lr": 0.0003373684210526316, + "low_lr": 6.747368421052633e-06, + "step": 1259 + }, + { + "epoch": 3.3136094674556213, + "grad_norm": 1.3591734170913696, + "learning_rate": 0.0003368421052631579, + "loss": 1.2765, + "step": 1260 + }, + { + "epoch": 3.3136094674556213, + "high_lr": 0.0003368421052631579, + "low_lr": 6.736842105263158e-06, + "step": 1260 + }, + { + "epoch": 3.3136094674556213, + "high_lr": 0.0003368421052631579, + "low_lr": 6.736842105263158e-06, + "step": 1260 + }, + { + "epoch": 3.3136094674556213, + "high_lr": 0.0003368421052631579, + "low_lr": 6.736842105263158e-06, + "step": 1260 + }, + { + "epoch": 3.3136094674556213, + "high_lr": 0.0003368421052631579, + "low_lr": 6.736842105263158e-06, + "step": 1260 + }, + { + "epoch": 3.3136094674556213, + "high_lr": 0.0003368421052631579, + "low_lr": 6.736842105263158e-06, + "step": 1260 + }, + { + "epoch": 3.3136094674556213, + "high_lr": 0.0003368421052631579, + "low_lr": 6.736842105263158e-06, + "step": 1260 + }, + { + "epoch": 3.3136094674556213, + "high_lr": 0.0003368421052631579, + "low_lr": 6.736842105263158e-06, + "step": 1260 + }, + { + "epoch": 3.3136094674556213, + "high_lr": 0.0003368421052631579, + "low_lr": 6.736842105263158e-06, + "step": 1260 + }, + { + "epoch": 3.316239316239316, + "grad_norm": 1.3532341718673706, + "learning_rate": 0.00033631578947368424, + "loss": 1.3013, + "step": 1261 + }, + { + "epoch": 3.316239316239316, + "high_lr": 0.00033631578947368424, + "low_lr": 6.726315789473685e-06, + "step": 1261 + }, + { + "epoch": 3.316239316239316, + "high_lr": 0.00033631578947368424, + "low_lr": 6.726315789473685e-06, + "step": 1261 + }, + { + "epoch": 3.316239316239316, + "high_lr": 0.00033631578947368424, + "low_lr": 6.726315789473685e-06, + "step": 1261 + }, + { + "epoch": 3.316239316239316, + "high_lr": 0.00033631578947368424, + "low_lr": 6.726315789473685e-06, + "step": 1261 + }, + { + "epoch": 3.316239316239316, + "high_lr": 0.00033631578947368424, + "low_lr": 6.726315789473685e-06, + "step": 1261 + }, + { + "epoch": 3.316239316239316, + "high_lr": 0.00033631578947368424, + "low_lr": 6.726315789473685e-06, + "step": 1261 + }, + { + "epoch": 3.316239316239316, + "high_lr": 0.00033631578947368424, + "low_lr": 6.726315789473685e-06, + "step": 1261 + }, + { + "epoch": 3.316239316239316, + "high_lr": 0.00033631578947368424, + "low_lr": 6.726315789473685e-06, + "step": 1261 + }, + { + "epoch": 3.318869165023011, + "grad_norm": 1.3452813625335693, + "learning_rate": 0.00033578947368421053, + "loss": 1.3099, + "step": 1262 + }, + { + "epoch": 3.318869165023011, + "high_lr": 0.00033578947368421053, + "low_lr": 6.715789473684211e-06, + "step": 1262 + }, + { + "epoch": 3.318869165023011, + "high_lr": 0.00033578947368421053, + "low_lr": 6.715789473684211e-06, + "step": 1262 + }, + { + "epoch": 3.318869165023011, + "high_lr": 0.00033578947368421053, + "low_lr": 6.715789473684211e-06, + "step": 1262 + }, + { + "epoch": 3.318869165023011, + "high_lr": 0.00033578947368421053, + "low_lr": 6.715789473684211e-06, + "step": 1262 + }, + { + "epoch": 3.318869165023011, + "high_lr": 0.00033578947368421053, + "low_lr": 6.715789473684211e-06, + "step": 1262 + }, + { + "epoch": 3.318869165023011, + "high_lr": 0.00033578947368421053, + "low_lr": 6.715789473684211e-06, + "step": 1262 + }, + { + "epoch": 3.318869165023011, + "high_lr": 0.00033578947368421053, + "low_lr": 6.715789473684211e-06, + "step": 1262 + }, + { + "epoch": 3.318869165023011, + "high_lr": 0.00033578947368421053, + "low_lr": 6.715789473684211e-06, + "step": 1262 + }, + { + "epoch": 3.321499013806706, + "grad_norm": 1.4131159782409668, + "learning_rate": 0.0003352631578947368, + "loss": 1.3188, + "step": 1263 + }, + { + "epoch": 3.321499013806706, + "high_lr": 0.0003352631578947368, + "low_lr": 6.705263157894737e-06, + "step": 1263 + }, + { + "epoch": 3.321499013806706, + "high_lr": 0.0003352631578947368, + "low_lr": 6.705263157894737e-06, + "step": 1263 + }, + { + "epoch": 3.321499013806706, + "high_lr": 0.0003352631578947368, + "low_lr": 6.705263157894737e-06, + "step": 1263 + }, + { + "epoch": 3.321499013806706, + "high_lr": 0.0003352631578947368, + "low_lr": 6.705263157894737e-06, + "step": 1263 + }, + { + "epoch": 3.321499013806706, + "high_lr": 0.0003352631578947368, + "low_lr": 6.705263157894737e-06, + "step": 1263 + }, + { + "epoch": 3.321499013806706, + "high_lr": 0.0003352631578947368, + "low_lr": 6.705263157894737e-06, + "step": 1263 + }, + { + "epoch": 3.321499013806706, + "high_lr": 0.0003352631578947368, + "low_lr": 6.705263157894737e-06, + "step": 1263 + }, + { + "epoch": 3.321499013806706, + "high_lr": 0.0003352631578947368, + "low_lr": 6.705263157894737e-06, + "step": 1263 + }, + { + "epoch": 3.324128862590401, + "grad_norm": 1.5348033905029297, + "learning_rate": 0.00033473684210526315, + "loss": 1.336, + "step": 1264 + }, + { + "epoch": 3.324128862590401, + "high_lr": 0.00033473684210526315, + "low_lr": 6.694736842105264e-06, + "step": 1264 + }, + { + "epoch": 3.324128862590401, + "high_lr": 0.00033473684210526315, + "low_lr": 6.694736842105264e-06, + "step": 1264 + }, + { + "epoch": 3.324128862590401, + "high_lr": 0.00033473684210526315, + "low_lr": 6.694736842105264e-06, + "step": 1264 + }, + { + "epoch": 3.324128862590401, + "high_lr": 0.00033473684210526315, + "low_lr": 6.694736842105264e-06, + "step": 1264 + }, + { + "epoch": 3.324128862590401, + "high_lr": 0.00033473684210526315, + "low_lr": 6.694736842105264e-06, + "step": 1264 + }, + { + "epoch": 3.324128862590401, + "high_lr": 0.00033473684210526315, + "low_lr": 6.694736842105264e-06, + "step": 1264 + }, + { + "epoch": 3.324128862590401, + "high_lr": 0.00033473684210526315, + "low_lr": 6.694736842105264e-06, + "step": 1264 + }, + { + "epoch": 3.324128862590401, + "high_lr": 0.00033473684210526315, + "low_lr": 6.694736842105264e-06, + "step": 1264 + }, + { + "epoch": 3.326758711374096, + "grad_norm": 1.4381033182144165, + "learning_rate": 0.00033421052631578944, + "loss": 1.3093, + "step": 1265 + }, + { + "epoch": 3.326758711374096, + "high_lr": 0.00033421052631578944, + "low_lr": 6.68421052631579e-06, + "step": 1265 + }, + { + "epoch": 3.326758711374096, + "high_lr": 0.00033421052631578944, + "low_lr": 6.68421052631579e-06, + "step": 1265 + }, + { + "epoch": 3.326758711374096, + "high_lr": 0.00033421052631578944, + "low_lr": 6.68421052631579e-06, + "step": 1265 + }, + { + "epoch": 3.326758711374096, + "high_lr": 0.00033421052631578944, + "low_lr": 6.68421052631579e-06, + "step": 1265 + }, + { + "epoch": 3.326758711374096, + "high_lr": 0.00033421052631578944, + "low_lr": 6.68421052631579e-06, + "step": 1265 + }, + { + "epoch": 3.326758711374096, + "high_lr": 0.00033421052631578944, + "low_lr": 6.68421052631579e-06, + "step": 1265 + }, + { + "epoch": 3.326758711374096, + "high_lr": 0.00033421052631578944, + "low_lr": 6.68421052631579e-06, + "step": 1265 + }, + { + "epoch": 3.326758711374096, + "high_lr": 0.00033421052631578944, + "low_lr": 6.68421052631579e-06, + "step": 1265 + }, + { + "epoch": 3.329388560157791, + "grad_norm": 1.3288220167160034, + "learning_rate": 0.00033368421052631583, + "loss": 1.3156, + "step": 1266 + }, + { + "epoch": 3.329388560157791, + "high_lr": 0.00033368421052631583, + "low_lr": 6.673684210526317e-06, + "step": 1266 + }, + { + "epoch": 3.329388560157791, + "high_lr": 0.00033368421052631583, + "low_lr": 6.673684210526317e-06, + "step": 1266 + }, + { + "epoch": 3.329388560157791, + "high_lr": 0.00033368421052631583, + "low_lr": 6.673684210526317e-06, + "step": 1266 + }, + { + "epoch": 3.329388560157791, + "high_lr": 0.00033368421052631583, + "low_lr": 6.673684210526317e-06, + "step": 1266 + }, + { + "epoch": 3.329388560157791, + "high_lr": 0.00033368421052631583, + "low_lr": 6.673684210526317e-06, + "step": 1266 + }, + { + "epoch": 3.329388560157791, + "high_lr": 0.00033368421052631583, + "low_lr": 6.673684210526317e-06, + "step": 1266 + }, + { + "epoch": 3.329388560157791, + "high_lr": 0.00033368421052631583, + "low_lr": 6.673684210526317e-06, + "step": 1266 + }, + { + "epoch": 3.329388560157791, + "high_lr": 0.00033368421052631583, + "low_lr": 6.673684210526317e-06, + "step": 1266 + }, + { + "epoch": 3.3320184089414857, + "grad_norm": 1.386177897453308, + "learning_rate": 0.0003331578947368421, + "loss": 1.2822, + "step": 1267 + }, + { + "epoch": 3.3320184089414857, + "high_lr": 0.0003331578947368421, + "low_lr": 6.663157894736842e-06, + "step": 1267 + }, + { + "epoch": 3.3320184089414857, + "high_lr": 0.0003331578947368421, + "low_lr": 6.663157894736842e-06, + "step": 1267 + }, + { + "epoch": 3.3320184089414857, + "high_lr": 0.0003331578947368421, + "low_lr": 6.663157894736842e-06, + "step": 1267 + }, + { + "epoch": 3.3320184089414857, + "high_lr": 0.0003331578947368421, + "low_lr": 6.663157894736842e-06, + "step": 1267 + }, + { + "epoch": 3.3320184089414857, + "high_lr": 0.0003331578947368421, + "low_lr": 6.663157894736842e-06, + "step": 1267 + }, + { + "epoch": 3.3320184089414857, + "high_lr": 0.0003331578947368421, + "low_lr": 6.663157894736842e-06, + "step": 1267 + }, + { + "epoch": 3.3320184089414857, + "high_lr": 0.0003331578947368421, + "low_lr": 6.663157894736842e-06, + "step": 1267 + }, + { + "epoch": 3.3320184089414857, + "high_lr": 0.0003331578947368421, + "low_lr": 6.663157894736842e-06, + "step": 1267 + }, + { + "epoch": 3.334648257725181, + "grad_norm": 1.3520636558532715, + "learning_rate": 0.00033263157894736846, + "loss": 1.3012, + "step": 1268 + }, + { + "epoch": 3.334648257725181, + "high_lr": 0.00033263157894736846, + "low_lr": 6.6526315789473695e-06, + "step": 1268 + }, + { + "epoch": 3.334648257725181, + "high_lr": 0.00033263157894736846, + "low_lr": 6.6526315789473695e-06, + "step": 1268 + }, + { + "epoch": 3.334648257725181, + "high_lr": 0.00033263157894736846, + "low_lr": 6.6526315789473695e-06, + "step": 1268 + }, + { + "epoch": 3.334648257725181, + "high_lr": 0.00033263157894736846, + "low_lr": 6.6526315789473695e-06, + "step": 1268 + }, + { + "epoch": 3.334648257725181, + "high_lr": 0.00033263157894736846, + "low_lr": 6.6526315789473695e-06, + "step": 1268 + }, + { + "epoch": 3.334648257725181, + "high_lr": 0.00033263157894736846, + "low_lr": 6.6526315789473695e-06, + "step": 1268 + }, + { + "epoch": 3.334648257725181, + "high_lr": 0.00033263157894736846, + "low_lr": 6.6526315789473695e-06, + "step": 1268 + }, + { + "epoch": 3.334648257725181, + "high_lr": 0.00033263157894736846, + "low_lr": 6.6526315789473695e-06, + "step": 1268 + }, + { + "epoch": 3.337278106508876, + "grad_norm": 1.4403034448623657, + "learning_rate": 0.00033210526315789475, + "loss": 1.2646, + "step": 1269 + }, + { + "epoch": 3.337278106508876, + "high_lr": 0.00033210526315789475, + "low_lr": 6.642105263157895e-06, + "step": 1269 + }, + { + "epoch": 3.337278106508876, + "high_lr": 0.00033210526315789475, + "low_lr": 6.642105263157895e-06, + "step": 1269 + }, + { + "epoch": 3.337278106508876, + "high_lr": 0.00033210526315789475, + "low_lr": 6.642105263157895e-06, + "step": 1269 + }, + { + "epoch": 3.337278106508876, + "high_lr": 0.00033210526315789475, + "low_lr": 6.642105263157895e-06, + "step": 1269 + }, + { + "epoch": 3.337278106508876, + "high_lr": 0.00033210526315789475, + "low_lr": 6.642105263157895e-06, + "step": 1269 + }, + { + "epoch": 3.337278106508876, + "high_lr": 0.00033210526315789475, + "low_lr": 6.642105263157895e-06, + "step": 1269 + }, + { + "epoch": 3.337278106508876, + "high_lr": 0.00033210526315789475, + "low_lr": 6.642105263157895e-06, + "step": 1269 + }, + { + "epoch": 3.337278106508876, + "high_lr": 0.00033210526315789475, + "low_lr": 6.642105263157895e-06, + "step": 1269 + }, + { + "epoch": 3.3399079552925706, + "grad_norm": 1.5093498229980469, + "learning_rate": 0.00033157894736842103, + "loss": 1.3276, + "step": 1270 + }, + { + "epoch": 3.3399079552925706, + "high_lr": 0.00033157894736842103, + "low_lr": 6.631578947368421e-06, + "step": 1270 + }, + { + "epoch": 3.3399079552925706, + "high_lr": 0.00033157894736842103, + "low_lr": 6.631578947368421e-06, + "step": 1270 + }, + { + "epoch": 3.3399079552925706, + "high_lr": 0.00033157894736842103, + "low_lr": 6.631578947368421e-06, + "step": 1270 + }, + { + "epoch": 3.3399079552925706, + "high_lr": 0.00033157894736842103, + "low_lr": 6.631578947368421e-06, + "step": 1270 + }, + { + "epoch": 3.3399079552925706, + "high_lr": 0.00033157894736842103, + "low_lr": 6.631578947368421e-06, + "step": 1270 + }, + { + "epoch": 3.3399079552925706, + "high_lr": 0.00033157894736842103, + "low_lr": 6.631578947368421e-06, + "step": 1270 + }, + { + "epoch": 3.3399079552925706, + "high_lr": 0.00033157894736842103, + "low_lr": 6.631578947368421e-06, + "step": 1270 + }, + { + "epoch": 3.3399079552925706, + "high_lr": 0.00033157894736842103, + "low_lr": 6.631578947368421e-06, + "step": 1270 + }, + { + "epoch": 3.3425378040762657, + "grad_norm": 1.4945142269134521, + "learning_rate": 0.00033105263157894737, + "loss": 1.3251, + "step": 1271 + }, + { + "epoch": 3.3425378040762657, + "high_lr": 0.00033105263157894737, + "low_lr": 6.621052631578948e-06, + "step": 1271 + }, + { + "epoch": 3.3425378040762657, + "high_lr": 0.00033105263157894737, + "low_lr": 6.621052631578948e-06, + "step": 1271 + }, + { + "epoch": 3.3425378040762657, + "high_lr": 0.00033105263157894737, + "low_lr": 6.621052631578948e-06, + "step": 1271 + }, + { + "epoch": 3.3425378040762657, + "high_lr": 0.00033105263157894737, + "low_lr": 6.621052631578948e-06, + "step": 1271 + }, + { + "epoch": 3.3425378040762657, + "high_lr": 0.00033105263157894737, + "low_lr": 6.621052631578948e-06, + "step": 1271 + }, + { + "epoch": 3.3425378040762657, + "high_lr": 0.00033105263157894737, + "low_lr": 6.621052631578948e-06, + "step": 1271 + }, + { + "epoch": 3.3425378040762657, + "high_lr": 0.00033105263157894737, + "low_lr": 6.621052631578948e-06, + "step": 1271 + }, + { + "epoch": 3.3425378040762657, + "high_lr": 0.00033105263157894737, + "low_lr": 6.621052631578948e-06, + "step": 1271 + }, + { + "epoch": 3.3451676528599608, + "grad_norm": 1.4792543649673462, + "learning_rate": 0.00033052631578947366, + "loss": 1.3416, + "step": 1272 + }, + { + "epoch": 3.3451676528599608, + "high_lr": 0.00033052631578947366, + "low_lr": 6.610526315789474e-06, + "step": 1272 + }, + { + "epoch": 3.3451676528599608, + "high_lr": 0.00033052631578947366, + "low_lr": 6.610526315789474e-06, + "step": 1272 + }, + { + "epoch": 3.3451676528599608, + "high_lr": 0.00033052631578947366, + "low_lr": 6.610526315789474e-06, + "step": 1272 + }, + { + "epoch": 3.3451676528599608, + "high_lr": 0.00033052631578947366, + "low_lr": 6.610526315789474e-06, + "step": 1272 + }, + { + "epoch": 3.3451676528599608, + "high_lr": 0.00033052631578947366, + "low_lr": 6.610526315789474e-06, + "step": 1272 + }, + { + "epoch": 3.3451676528599608, + "high_lr": 0.00033052631578947366, + "low_lr": 6.610526315789474e-06, + "step": 1272 + }, + { + "epoch": 3.3451676528599608, + "high_lr": 0.00033052631578947366, + "low_lr": 6.610526315789474e-06, + "step": 1272 + }, + { + "epoch": 3.3451676528599608, + "high_lr": 0.00033052631578947366, + "low_lr": 6.610526315789474e-06, + "step": 1272 + }, + { + "epoch": 3.3477975016436554, + "grad_norm": 1.414167046546936, + "learning_rate": 0.00033, + "loss": 1.3377, + "step": 1273 + }, + { + "epoch": 3.3477975016436554, + "high_lr": 0.00033, + "low_lr": 6.600000000000001e-06, + "step": 1273 + }, + { + "epoch": 3.3477975016436554, + "high_lr": 0.00033, + "low_lr": 6.600000000000001e-06, + "step": 1273 + }, + { + "epoch": 3.3477975016436554, + "high_lr": 0.00033, + "low_lr": 6.600000000000001e-06, + "step": 1273 + }, + { + "epoch": 3.3477975016436554, + "high_lr": 0.00033, + "low_lr": 6.600000000000001e-06, + "step": 1273 + }, + { + "epoch": 3.3477975016436554, + "high_lr": 0.00033, + "low_lr": 6.600000000000001e-06, + "step": 1273 + }, + { + "epoch": 3.3477975016436554, + "high_lr": 0.00033, + "low_lr": 6.600000000000001e-06, + "step": 1273 + }, + { + "epoch": 3.3477975016436554, + "high_lr": 0.00033, + "low_lr": 6.600000000000001e-06, + "step": 1273 + }, + { + "epoch": 3.3477975016436554, + "high_lr": 0.00033, + "low_lr": 6.600000000000001e-06, + "step": 1273 + }, + { + "epoch": 3.3504273504273505, + "grad_norm": 1.4472049474716187, + "learning_rate": 0.00032947368421052634, + "loss": 1.2771, + "step": 1274 + }, + { + "epoch": 3.3504273504273505, + "high_lr": 0.00032947368421052634, + "low_lr": 6.589473684210527e-06, + "step": 1274 + }, + { + "epoch": 3.3504273504273505, + "high_lr": 0.00032947368421052634, + "low_lr": 6.589473684210527e-06, + "step": 1274 + }, + { + "epoch": 3.3504273504273505, + "high_lr": 0.00032947368421052634, + "low_lr": 6.589473684210527e-06, + "step": 1274 + }, + { + "epoch": 3.3504273504273505, + "high_lr": 0.00032947368421052634, + "low_lr": 6.589473684210527e-06, + "step": 1274 + }, + { + "epoch": 3.3504273504273505, + "high_lr": 0.00032947368421052634, + "low_lr": 6.589473684210527e-06, + "step": 1274 + }, + { + "epoch": 3.3504273504273505, + "high_lr": 0.00032947368421052634, + "low_lr": 6.589473684210527e-06, + "step": 1274 + }, + { + "epoch": 3.3504273504273505, + "high_lr": 0.00032947368421052634, + "low_lr": 6.589473684210527e-06, + "step": 1274 + }, + { + "epoch": 3.3504273504273505, + "high_lr": 0.00032947368421052634, + "low_lr": 6.589473684210527e-06, + "step": 1274 + }, + { + "epoch": 3.3530571992110456, + "grad_norm": 1.3168164491653442, + "learning_rate": 0.0003289473684210527, + "loss": 1.2955, + "step": 1275 + }, + { + "epoch": 3.3530571992110456, + "high_lr": 0.0003289473684210527, + "low_lr": 6.578947368421054e-06, + "step": 1275 + }, + { + "epoch": 3.3530571992110456, + "high_lr": 0.0003289473684210527, + "low_lr": 6.578947368421054e-06, + "step": 1275 + }, + { + "epoch": 3.3530571992110456, + "high_lr": 0.0003289473684210527, + "low_lr": 6.578947368421054e-06, + "step": 1275 + }, + { + "epoch": 3.3530571992110456, + "high_lr": 0.0003289473684210527, + "low_lr": 6.578947368421054e-06, + "step": 1275 + }, + { + "epoch": 3.3530571992110456, + "high_lr": 0.0003289473684210527, + "low_lr": 6.578947368421054e-06, + "step": 1275 + }, + { + "epoch": 3.3530571992110456, + "high_lr": 0.0003289473684210527, + "low_lr": 6.578947368421054e-06, + "step": 1275 + }, + { + "epoch": 3.3530571992110456, + "high_lr": 0.0003289473684210527, + "low_lr": 6.578947368421054e-06, + "step": 1275 + }, + { + "epoch": 3.3530571992110456, + "high_lr": 0.0003289473684210527, + "low_lr": 6.578947368421054e-06, + "step": 1275 + }, + { + "epoch": 3.3556870479947403, + "grad_norm": 1.312169075012207, + "learning_rate": 0.00032842105263157896, + "loss": 1.3291, + "step": 1276 + }, + { + "epoch": 3.3556870479947403, + "high_lr": 0.00032842105263157896, + "low_lr": 6.568421052631579e-06, + "step": 1276 + }, + { + "epoch": 3.3556870479947403, + "high_lr": 0.00032842105263157896, + "low_lr": 6.568421052631579e-06, + "step": 1276 + }, + { + "epoch": 3.3556870479947403, + "high_lr": 0.00032842105263157896, + "low_lr": 6.568421052631579e-06, + "step": 1276 + }, + { + "epoch": 3.3556870479947403, + "high_lr": 0.00032842105263157896, + "low_lr": 6.568421052631579e-06, + "step": 1276 + }, + { + "epoch": 3.3556870479947403, + "high_lr": 0.00032842105263157896, + "low_lr": 6.568421052631579e-06, + "step": 1276 + }, + { + "epoch": 3.3556870479947403, + "high_lr": 0.00032842105263157896, + "low_lr": 6.568421052631579e-06, + "step": 1276 + }, + { + "epoch": 3.3556870479947403, + "high_lr": 0.00032842105263157896, + "low_lr": 6.568421052631579e-06, + "step": 1276 + }, + { + "epoch": 3.3556870479947403, + "high_lr": 0.00032842105263157896, + "low_lr": 6.568421052631579e-06, + "step": 1276 + }, + { + "epoch": 3.3583168967784354, + "grad_norm": 1.3834537267684937, + "learning_rate": 0.00032789473684210525, + "loss": 1.2886, + "step": 1277 + }, + { + "epoch": 3.3583168967784354, + "high_lr": 0.00032789473684210525, + "low_lr": 6.557894736842106e-06, + "step": 1277 + }, + { + "epoch": 3.3583168967784354, + "high_lr": 0.00032789473684210525, + "low_lr": 6.557894736842106e-06, + "step": 1277 + }, + { + "epoch": 3.3583168967784354, + "high_lr": 0.00032789473684210525, + "low_lr": 6.557894736842106e-06, + "step": 1277 + }, + { + "epoch": 3.3583168967784354, + "high_lr": 0.00032789473684210525, + "low_lr": 6.557894736842106e-06, + "step": 1277 + }, + { + "epoch": 3.3583168967784354, + "high_lr": 0.00032789473684210525, + "low_lr": 6.557894736842106e-06, + "step": 1277 + }, + { + "epoch": 3.3583168967784354, + "high_lr": 0.00032789473684210525, + "low_lr": 6.557894736842106e-06, + "step": 1277 + }, + { + "epoch": 3.3583168967784354, + "high_lr": 0.00032789473684210525, + "low_lr": 6.557894736842106e-06, + "step": 1277 + }, + { + "epoch": 3.3583168967784354, + "high_lr": 0.00032789473684210525, + "low_lr": 6.557894736842106e-06, + "step": 1277 + }, + { + "epoch": 3.36094674556213, + "grad_norm": 1.4049303531646729, + "learning_rate": 0.0003273684210526316, + "loss": 1.3054, + "step": 1278 + }, + { + "epoch": 3.36094674556213, + "high_lr": 0.0003273684210526316, + "low_lr": 6.547368421052632e-06, + "step": 1278 + }, + { + "epoch": 3.36094674556213, + "high_lr": 0.0003273684210526316, + "low_lr": 6.547368421052632e-06, + "step": 1278 + }, + { + "epoch": 3.36094674556213, + "high_lr": 0.0003273684210526316, + "low_lr": 6.547368421052632e-06, + "step": 1278 + }, + { + "epoch": 3.36094674556213, + "high_lr": 0.0003273684210526316, + "low_lr": 6.547368421052632e-06, + "step": 1278 + }, + { + "epoch": 3.36094674556213, + "high_lr": 0.0003273684210526316, + "low_lr": 6.547368421052632e-06, + "step": 1278 + }, + { + "epoch": 3.36094674556213, + "high_lr": 0.0003273684210526316, + "low_lr": 6.547368421052632e-06, + "step": 1278 + }, + { + "epoch": 3.36094674556213, + "high_lr": 0.0003273684210526316, + "low_lr": 6.547368421052632e-06, + "step": 1278 + }, + { + "epoch": 3.36094674556213, + "high_lr": 0.0003273684210526316, + "low_lr": 6.547368421052632e-06, + "step": 1278 + }, + { + "epoch": 3.363576594345825, + "grad_norm": 1.3778067827224731, + "learning_rate": 0.0003268421052631579, + "loss": 1.3031, + "step": 1279 + }, + { + "epoch": 3.363576594345825, + "high_lr": 0.0003268421052631579, + "low_lr": 6.536842105263158e-06, + "step": 1279 + }, + { + "epoch": 3.363576594345825, + "high_lr": 0.0003268421052631579, + "low_lr": 6.536842105263158e-06, + "step": 1279 + }, + { + "epoch": 3.363576594345825, + "high_lr": 0.0003268421052631579, + "low_lr": 6.536842105263158e-06, + "step": 1279 + }, + { + "epoch": 3.363576594345825, + "high_lr": 0.0003268421052631579, + "low_lr": 6.536842105263158e-06, + "step": 1279 + }, + { + "epoch": 3.363576594345825, + "high_lr": 0.0003268421052631579, + "low_lr": 6.536842105263158e-06, + "step": 1279 + }, + { + "epoch": 3.363576594345825, + "high_lr": 0.0003268421052631579, + "low_lr": 6.536842105263158e-06, + "step": 1279 + }, + { + "epoch": 3.363576594345825, + "high_lr": 0.0003268421052631579, + "low_lr": 6.536842105263158e-06, + "step": 1279 + }, + { + "epoch": 3.363576594345825, + "high_lr": 0.0003268421052631579, + "low_lr": 6.536842105263158e-06, + "step": 1279 + }, + { + "epoch": 3.36620644312952, + "grad_norm": 1.3682295083999634, + "learning_rate": 0.0003263157894736842, + "loss": 1.2862, + "step": 1280 + }, + { + "epoch": 3.36620644312952, + "high_lr": 0.0003263157894736842, + "low_lr": 6.526315789473685e-06, + "step": 1280 + }, + { + "epoch": 3.36620644312952, + "high_lr": 0.0003263157894736842, + "low_lr": 6.526315789473685e-06, + "step": 1280 + }, + { + "epoch": 3.36620644312952, + "high_lr": 0.0003263157894736842, + "low_lr": 6.526315789473685e-06, + "step": 1280 + }, + { + "epoch": 3.36620644312952, + "high_lr": 0.0003263157894736842, + "low_lr": 6.526315789473685e-06, + "step": 1280 + }, + { + "epoch": 3.36620644312952, + "high_lr": 0.0003263157894736842, + "low_lr": 6.526315789473685e-06, + "step": 1280 + }, + { + "epoch": 3.36620644312952, + "high_lr": 0.0003263157894736842, + "low_lr": 6.526315789473685e-06, + "step": 1280 + }, + { + "epoch": 3.36620644312952, + "high_lr": 0.0003263157894736842, + "low_lr": 6.526315789473685e-06, + "step": 1280 + }, + { + "epoch": 3.36620644312952, + "high_lr": 0.0003263157894736842, + "low_lr": 6.526315789473685e-06, + "step": 1280 + }, + { + "epoch": 3.368836291913215, + "grad_norm": 1.3198716640472412, + "learning_rate": 0.0003257894736842105, + "loss": 1.2398, + "step": 1281 + }, + { + "epoch": 3.368836291913215, + "high_lr": 0.0003257894736842105, + "low_lr": 6.515789473684211e-06, + "step": 1281 + }, + { + "epoch": 3.368836291913215, + "high_lr": 0.0003257894736842105, + "low_lr": 6.515789473684211e-06, + "step": 1281 + }, + { + "epoch": 3.368836291913215, + "high_lr": 0.0003257894736842105, + "low_lr": 6.515789473684211e-06, + "step": 1281 + }, + { + "epoch": 3.368836291913215, + "high_lr": 0.0003257894736842105, + "low_lr": 6.515789473684211e-06, + "step": 1281 + }, + { + "epoch": 3.368836291913215, + "high_lr": 0.0003257894736842105, + "low_lr": 6.515789473684211e-06, + "step": 1281 + }, + { + "epoch": 3.368836291913215, + "high_lr": 0.0003257894736842105, + "low_lr": 6.515789473684211e-06, + "step": 1281 + }, + { + "epoch": 3.368836291913215, + "high_lr": 0.0003257894736842105, + "low_lr": 6.515789473684211e-06, + "step": 1281 + }, + { + "epoch": 3.368836291913215, + "high_lr": 0.0003257894736842105, + "low_lr": 6.515789473684211e-06, + "step": 1281 + }, + { + "epoch": 3.37146614069691, + "grad_norm": 1.4166431427001953, + "learning_rate": 0.0003252631578947369, + "loss": 1.3183, + "step": 1282 + }, + { + "epoch": 3.37146614069691, + "high_lr": 0.0003252631578947369, + "low_lr": 6.505263157894738e-06, + "step": 1282 + }, + { + "epoch": 3.37146614069691, + "high_lr": 0.0003252631578947369, + "low_lr": 6.505263157894738e-06, + "step": 1282 + }, + { + "epoch": 3.37146614069691, + "high_lr": 0.0003252631578947369, + "low_lr": 6.505263157894738e-06, + "step": 1282 + }, + { + "epoch": 3.37146614069691, + "high_lr": 0.0003252631578947369, + "low_lr": 6.505263157894738e-06, + "step": 1282 + }, + { + "epoch": 3.37146614069691, + "high_lr": 0.0003252631578947369, + "low_lr": 6.505263157894738e-06, + "step": 1282 + }, + { + "epoch": 3.37146614069691, + "high_lr": 0.0003252631578947369, + "low_lr": 6.505263157894738e-06, + "step": 1282 + }, + { + "epoch": 3.37146614069691, + "high_lr": 0.0003252631578947369, + "low_lr": 6.505263157894738e-06, + "step": 1282 + }, + { + "epoch": 3.37146614069691, + "high_lr": 0.0003252631578947369, + "low_lr": 6.505263157894738e-06, + "step": 1282 + }, + { + "epoch": 3.3740959894806046, + "grad_norm": 1.3787736892700195, + "learning_rate": 0.0003247368421052632, + "loss": 1.3056, + "step": 1283 + }, + { + "epoch": 3.3740959894806046, + "high_lr": 0.0003247368421052632, + "low_lr": 6.494736842105264e-06, + "step": 1283 + }, + { + "epoch": 3.3740959894806046, + "high_lr": 0.0003247368421052632, + "low_lr": 6.494736842105264e-06, + "step": 1283 + }, + { + "epoch": 3.3740959894806046, + "high_lr": 0.0003247368421052632, + "low_lr": 6.494736842105264e-06, + "step": 1283 + }, + { + "epoch": 3.3740959894806046, + "high_lr": 0.0003247368421052632, + "low_lr": 6.494736842105264e-06, + "step": 1283 + }, + { + "epoch": 3.3740959894806046, + "high_lr": 0.0003247368421052632, + "low_lr": 6.494736842105264e-06, + "step": 1283 + }, + { + "epoch": 3.3740959894806046, + "high_lr": 0.0003247368421052632, + "low_lr": 6.494736842105264e-06, + "step": 1283 + }, + { + "epoch": 3.3740959894806046, + "high_lr": 0.0003247368421052632, + "low_lr": 6.494736842105264e-06, + "step": 1283 + }, + { + "epoch": 3.3740959894806046, + "high_lr": 0.0003247368421052632, + "low_lr": 6.494736842105264e-06, + "step": 1283 + }, + { + "epoch": 3.3767258382642997, + "grad_norm": 1.41636061668396, + "learning_rate": 0.00032421052631578947, + "loss": 1.3253, + "step": 1284 + }, + { + "epoch": 3.3767258382642997, + "high_lr": 0.00032421052631578947, + "low_lr": 6.484210526315789e-06, + "step": 1284 + }, + { + "epoch": 3.3767258382642997, + "high_lr": 0.00032421052631578947, + "low_lr": 6.484210526315789e-06, + "step": 1284 + }, + { + "epoch": 3.3767258382642997, + "high_lr": 0.00032421052631578947, + "low_lr": 6.484210526315789e-06, + "step": 1284 + }, + { + "epoch": 3.3767258382642997, + "high_lr": 0.00032421052631578947, + "low_lr": 6.484210526315789e-06, + "step": 1284 + }, + { + "epoch": 3.3767258382642997, + "high_lr": 0.00032421052631578947, + "low_lr": 6.484210526315789e-06, + "step": 1284 + }, + { + "epoch": 3.3767258382642997, + "high_lr": 0.00032421052631578947, + "low_lr": 6.484210526315789e-06, + "step": 1284 + }, + { + "epoch": 3.3767258382642997, + "high_lr": 0.00032421052631578947, + "low_lr": 6.484210526315789e-06, + "step": 1284 + }, + { + "epoch": 3.3767258382642997, + "high_lr": 0.00032421052631578947, + "low_lr": 6.484210526315789e-06, + "step": 1284 + }, + { + "epoch": 3.379355687047995, + "grad_norm": 1.5021086931228638, + "learning_rate": 0.0003236842105263158, + "loss": 1.28, + "step": 1285 + }, + { + "epoch": 3.379355687047995, + "high_lr": 0.0003236842105263158, + "low_lr": 6.473684210526316e-06, + "step": 1285 + }, + { + "epoch": 3.379355687047995, + "high_lr": 0.0003236842105263158, + "low_lr": 6.473684210526316e-06, + "step": 1285 + }, + { + "epoch": 3.379355687047995, + "high_lr": 0.0003236842105263158, + "low_lr": 6.473684210526316e-06, + "step": 1285 + }, + { + "epoch": 3.379355687047995, + "high_lr": 0.0003236842105263158, + "low_lr": 6.473684210526316e-06, + "step": 1285 + }, + { + "epoch": 3.379355687047995, + "high_lr": 0.0003236842105263158, + "low_lr": 6.473684210526316e-06, + "step": 1285 + }, + { + "epoch": 3.379355687047995, + "high_lr": 0.0003236842105263158, + "low_lr": 6.473684210526316e-06, + "step": 1285 + }, + { + "epoch": 3.379355687047995, + "high_lr": 0.0003236842105263158, + "low_lr": 6.473684210526316e-06, + "step": 1285 + }, + { + "epoch": 3.379355687047995, + "high_lr": 0.0003236842105263158, + "low_lr": 6.473684210526316e-06, + "step": 1285 + }, + { + "epoch": 3.3819855358316895, + "grad_norm": 1.497136116027832, + "learning_rate": 0.0003231578947368421, + "loss": 1.2785, + "step": 1286 + }, + { + "epoch": 3.3819855358316895, + "high_lr": 0.0003231578947368421, + "low_lr": 6.463157894736843e-06, + "step": 1286 + }, + { + "epoch": 3.3819855358316895, + "high_lr": 0.0003231578947368421, + "low_lr": 6.463157894736843e-06, + "step": 1286 + }, + { + "epoch": 3.3819855358316895, + "high_lr": 0.0003231578947368421, + "low_lr": 6.463157894736843e-06, + "step": 1286 + }, + { + "epoch": 3.3819855358316895, + "high_lr": 0.0003231578947368421, + "low_lr": 6.463157894736843e-06, + "step": 1286 + }, + { + "epoch": 3.3819855358316895, + "high_lr": 0.0003231578947368421, + "low_lr": 6.463157894736843e-06, + "step": 1286 + }, + { + "epoch": 3.3819855358316895, + "high_lr": 0.0003231578947368421, + "low_lr": 6.463157894736843e-06, + "step": 1286 + }, + { + "epoch": 3.3819855358316895, + "high_lr": 0.0003231578947368421, + "low_lr": 6.463157894736843e-06, + "step": 1286 + }, + { + "epoch": 3.3819855358316895, + "high_lr": 0.0003231578947368421, + "low_lr": 6.463157894736843e-06, + "step": 1286 + }, + { + "epoch": 3.3846153846153846, + "grad_norm": 1.3111793994903564, + "learning_rate": 0.00032263157894736843, + "loss": 1.2936, + "step": 1287 + }, + { + "epoch": 3.3846153846153846, + "high_lr": 0.00032263157894736843, + "low_lr": 6.452631578947369e-06, + "step": 1287 + }, + { + "epoch": 3.3846153846153846, + "high_lr": 0.00032263157894736843, + "low_lr": 6.452631578947369e-06, + "step": 1287 + }, + { + "epoch": 3.3846153846153846, + "high_lr": 0.00032263157894736843, + "low_lr": 6.452631578947369e-06, + "step": 1287 + }, + { + "epoch": 3.3846153846153846, + "high_lr": 0.00032263157894736843, + "low_lr": 6.452631578947369e-06, + "step": 1287 + }, + { + "epoch": 3.3846153846153846, + "high_lr": 0.00032263157894736843, + "low_lr": 6.452631578947369e-06, + "step": 1287 + }, + { + "epoch": 3.3846153846153846, + "high_lr": 0.00032263157894736843, + "low_lr": 6.452631578947369e-06, + "step": 1287 + }, + { + "epoch": 3.3846153846153846, + "high_lr": 0.00032263157894736843, + "low_lr": 6.452631578947369e-06, + "step": 1287 + }, + { + "epoch": 3.3846153846153846, + "high_lr": 0.00032263157894736843, + "low_lr": 6.452631578947369e-06, + "step": 1287 + }, + { + "epoch": 3.3872452333990797, + "grad_norm": 1.333734154701233, + "learning_rate": 0.0003221052631578947, + "loss": 1.286, + "step": 1288 + }, + { + "epoch": 3.3872452333990797, + "high_lr": 0.0003221052631578947, + "low_lr": 6.442105263157895e-06, + "step": 1288 + }, + { + "epoch": 3.3872452333990797, + "high_lr": 0.0003221052631578947, + "low_lr": 6.442105263157895e-06, + "step": 1288 + }, + { + "epoch": 3.3872452333990797, + "high_lr": 0.0003221052631578947, + "low_lr": 6.442105263157895e-06, + "step": 1288 + }, + { + "epoch": 3.3872452333990797, + "high_lr": 0.0003221052631578947, + "low_lr": 6.442105263157895e-06, + "step": 1288 + }, + { + "epoch": 3.3872452333990797, + "high_lr": 0.0003221052631578947, + "low_lr": 6.442105263157895e-06, + "step": 1288 + }, + { + "epoch": 3.3872452333990797, + "high_lr": 0.0003221052631578947, + "low_lr": 6.442105263157895e-06, + "step": 1288 + }, + { + "epoch": 3.3872452333990797, + "high_lr": 0.0003221052631578947, + "low_lr": 6.442105263157895e-06, + "step": 1288 + }, + { + "epoch": 3.3872452333990797, + "high_lr": 0.0003221052631578947, + "low_lr": 6.442105263157895e-06, + "step": 1288 + }, + { + "epoch": 3.3898750821827743, + "grad_norm": 1.371147632598877, + "learning_rate": 0.00032157894736842106, + "loss": 1.2734, + "step": 1289 + }, + { + "epoch": 3.3898750821827743, + "high_lr": 0.00032157894736842106, + "low_lr": 6.431578947368422e-06, + "step": 1289 + }, + { + "epoch": 3.3898750821827743, + "high_lr": 0.00032157894736842106, + "low_lr": 6.431578947368422e-06, + "step": 1289 + }, + { + "epoch": 3.3898750821827743, + "high_lr": 0.00032157894736842106, + "low_lr": 6.431578947368422e-06, + "step": 1289 + }, + { + "epoch": 3.3898750821827743, + "high_lr": 0.00032157894736842106, + "low_lr": 6.431578947368422e-06, + "step": 1289 + }, + { + "epoch": 3.3898750821827743, + "high_lr": 0.00032157894736842106, + "low_lr": 6.431578947368422e-06, + "step": 1289 + }, + { + "epoch": 3.3898750821827743, + "high_lr": 0.00032157894736842106, + "low_lr": 6.431578947368422e-06, + "step": 1289 + }, + { + "epoch": 3.3898750821827743, + "high_lr": 0.00032157894736842106, + "low_lr": 6.431578947368422e-06, + "step": 1289 + }, + { + "epoch": 3.3898750821827743, + "high_lr": 0.00032157894736842106, + "low_lr": 6.431578947368422e-06, + "step": 1289 + }, + { + "epoch": 3.3925049309664694, + "grad_norm": 1.4894404411315918, + "learning_rate": 0.0003210526315789474, + "loss": 1.3076, + "step": 1290 + }, + { + "epoch": 3.3925049309664694, + "high_lr": 0.0003210526315789474, + "low_lr": 6.421052631578948e-06, + "step": 1290 + }, + { + "epoch": 3.3925049309664694, + "high_lr": 0.0003210526315789474, + "low_lr": 6.421052631578948e-06, + "step": 1290 + }, + { + "epoch": 3.3925049309664694, + "high_lr": 0.0003210526315789474, + "low_lr": 6.421052631578948e-06, + "step": 1290 + }, + { + "epoch": 3.3925049309664694, + "high_lr": 0.0003210526315789474, + "low_lr": 6.421052631578948e-06, + "step": 1290 + }, + { + "epoch": 3.3925049309664694, + "high_lr": 0.0003210526315789474, + "low_lr": 6.421052631578948e-06, + "step": 1290 + }, + { + "epoch": 3.3925049309664694, + "high_lr": 0.0003210526315789474, + "low_lr": 6.421052631578948e-06, + "step": 1290 + }, + { + "epoch": 3.3925049309664694, + "high_lr": 0.0003210526315789474, + "low_lr": 6.421052631578948e-06, + "step": 1290 + }, + { + "epoch": 3.3925049309664694, + "high_lr": 0.0003210526315789474, + "low_lr": 6.421052631578948e-06, + "step": 1290 + }, + { + "epoch": 3.3951347797501645, + "grad_norm": 1.3065286874771118, + "learning_rate": 0.0003205263157894737, + "loss": 1.2725, + "step": 1291 + }, + { + "epoch": 3.3951347797501645, + "high_lr": 0.0003205263157894737, + "low_lr": 6.410526315789473e-06, + "step": 1291 + }, + { + "epoch": 3.3951347797501645, + "high_lr": 0.0003205263157894737, + "low_lr": 6.410526315789473e-06, + "step": 1291 + }, + { + "epoch": 3.3951347797501645, + "high_lr": 0.0003205263157894737, + "low_lr": 6.410526315789473e-06, + "step": 1291 + }, + { + "epoch": 3.3951347797501645, + "high_lr": 0.0003205263157894737, + "low_lr": 6.410526315789473e-06, + "step": 1291 + }, + { + "epoch": 3.3951347797501645, + "high_lr": 0.0003205263157894737, + "low_lr": 6.410526315789473e-06, + "step": 1291 + }, + { + "epoch": 3.3951347797501645, + "high_lr": 0.0003205263157894737, + "low_lr": 6.410526315789473e-06, + "step": 1291 + }, + { + "epoch": 3.3951347797501645, + "high_lr": 0.0003205263157894737, + "low_lr": 6.410526315789473e-06, + "step": 1291 + }, + { + "epoch": 3.3951347797501645, + "high_lr": 0.0003205263157894737, + "low_lr": 6.410526315789473e-06, + "step": 1291 + }, + { + "epoch": 3.397764628533859, + "grad_norm": 1.4746716022491455, + "learning_rate": 0.00032, + "loss": 1.2408, + "step": 1292 + }, + { + "epoch": 3.397764628533859, + "high_lr": 0.00032, + "low_lr": 6.4000000000000006e-06, + "step": 1292 + }, + { + "epoch": 3.397764628533859, + "high_lr": 0.00032, + "low_lr": 6.4000000000000006e-06, + "step": 1292 + }, + { + "epoch": 3.397764628533859, + "high_lr": 0.00032, + "low_lr": 6.4000000000000006e-06, + "step": 1292 + }, + { + "epoch": 3.397764628533859, + "high_lr": 0.00032, + "low_lr": 6.4000000000000006e-06, + "step": 1292 + }, + { + "epoch": 3.397764628533859, + "high_lr": 0.00032, + "low_lr": 6.4000000000000006e-06, + "step": 1292 + }, + { + "epoch": 3.397764628533859, + "high_lr": 0.00032, + "low_lr": 6.4000000000000006e-06, + "step": 1292 + }, + { + "epoch": 3.397764628533859, + "high_lr": 0.00032, + "low_lr": 6.4000000000000006e-06, + "step": 1292 + }, + { + "epoch": 3.397764628533859, + "high_lr": 0.00032, + "low_lr": 6.4000000000000006e-06, + "step": 1292 + }, + { + "epoch": 3.4003944773175543, + "grad_norm": 1.4193333387374878, + "learning_rate": 0.0003194736842105263, + "loss": 1.3131, + "step": 1293 + }, + { + "epoch": 3.4003944773175543, + "high_lr": 0.0003194736842105263, + "low_lr": 6.389473684210527e-06, + "step": 1293 + }, + { + "epoch": 3.4003944773175543, + "high_lr": 0.0003194736842105263, + "low_lr": 6.389473684210527e-06, + "step": 1293 + }, + { + "epoch": 3.4003944773175543, + "high_lr": 0.0003194736842105263, + "low_lr": 6.389473684210527e-06, + "step": 1293 + }, + { + "epoch": 3.4003944773175543, + "high_lr": 0.0003194736842105263, + "low_lr": 6.389473684210527e-06, + "step": 1293 + }, + { + "epoch": 3.4003944773175543, + "high_lr": 0.0003194736842105263, + "low_lr": 6.389473684210527e-06, + "step": 1293 + }, + { + "epoch": 3.4003944773175543, + "high_lr": 0.0003194736842105263, + "low_lr": 6.389473684210527e-06, + "step": 1293 + }, + { + "epoch": 3.4003944773175543, + "high_lr": 0.0003194736842105263, + "low_lr": 6.389473684210527e-06, + "step": 1293 + }, + { + "epoch": 3.4003944773175543, + "high_lr": 0.0003194736842105263, + "low_lr": 6.389473684210527e-06, + "step": 1293 + }, + { + "epoch": 3.4030243261012494, + "grad_norm": 1.4493755102157593, + "learning_rate": 0.00031894736842105265, + "loss": 1.3229, + "step": 1294 + }, + { + "epoch": 3.4030243261012494, + "high_lr": 0.00031894736842105265, + "low_lr": 6.378947368421053e-06, + "step": 1294 + }, + { + "epoch": 3.4030243261012494, + "high_lr": 0.00031894736842105265, + "low_lr": 6.378947368421053e-06, + "step": 1294 + }, + { + "epoch": 3.4030243261012494, + "high_lr": 0.00031894736842105265, + "low_lr": 6.378947368421053e-06, + "step": 1294 + }, + { + "epoch": 3.4030243261012494, + "high_lr": 0.00031894736842105265, + "low_lr": 6.378947368421053e-06, + "step": 1294 + }, + { + "epoch": 3.4030243261012494, + "high_lr": 0.00031894736842105265, + "low_lr": 6.378947368421053e-06, + "step": 1294 + }, + { + "epoch": 3.4030243261012494, + "high_lr": 0.00031894736842105265, + "low_lr": 6.378947368421053e-06, + "step": 1294 + }, + { + "epoch": 3.4030243261012494, + "high_lr": 0.00031894736842105265, + "low_lr": 6.378947368421053e-06, + "step": 1294 + }, + { + "epoch": 3.4030243261012494, + "high_lr": 0.00031894736842105265, + "low_lr": 6.378947368421053e-06, + "step": 1294 + }, + { + "epoch": 3.405654174884944, + "grad_norm": 1.3478337526321411, + "learning_rate": 0.00031842105263157894, + "loss": 1.2642, + "step": 1295 + }, + { + "epoch": 3.405654174884944, + "high_lr": 0.00031842105263157894, + "low_lr": 6.3684210526315795e-06, + "step": 1295 + }, + { + "epoch": 3.405654174884944, + "high_lr": 0.00031842105263157894, + "low_lr": 6.3684210526315795e-06, + "step": 1295 + }, + { + "epoch": 3.405654174884944, + "high_lr": 0.00031842105263157894, + "low_lr": 6.3684210526315795e-06, + "step": 1295 + }, + { + "epoch": 3.405654174884944, + "high_lr": 0.00031842105263157894, + "low_lr": 6.3684210526315795e-06, + "step": 1295 + }, + { + "epoch": 3.405654174884944, + "high_lr": 0.00031842105263157894, + "low_lr": 6.3684210526315795e-06, + "step": 1295 + }, + { + "epoch": 3.405654174884944, + "high_lr": 0.00031842105263157894, + "low_lr": 6.3684210526315795e-06, + "step": 1295 + }, + { + "epoch": 3.405654174884944, + "high_lr": 0.00031842105263157894, + "low_lr": 6.3684210526315795e-06, + "step": 1295 + }, + { + "epoch": 3.405654174884944, + "high_lr": 0.00031842105263157894, + "low_lr": 6.3684210526315795e-06, + "step": 1295 + }, + { + "epoch": 3.408284023668639, + "grad_norm": 1.4272356033325195, + "learning_rate": 0.0003178947368421053, + "loss": 1.3132, + "step": 1296 + }, + { + "epoch": 3.408284023668639, + "high_lr": 0.0003178947368421053, + "low_lr": 6.357894736842106e-06, + "step": 1296 + }, + { + "epoch": 3.408284023668639, + "high_lr": 0.0003178947368421053, + "low_lr": 6.357894736842106e-06, + "step": 1296 + }, + { + "epoch": 3.408284023668639, + "high_lr": 0.0003178947368421053, + "low_lr": 6.357894736842106e-06, + "step": 1296 + }, + { + "epoch": 3.408284023668639, + "high_lr": 0.0003178947368421053, + "low_lr": 6.357894736842106e-06, + "step": 1296 + }, + { + "epoch": 3.408284023668639, + "high_lr": 0.0003178947368421053, + "low_lr": 6.357894736842106e-06, + "step": 1296 + }, + { + "epoch": 3.408284023668639, + "high_lr": 0.0003178947368421053, + "low_lr": 6.357894736842106e-06, + "step": 1296 + }, + { + "epoch": 3.408284023668639, + "high_lr": 0.0003178947368421053, + "low_lr": 6.357894736842106e-06, + "step": 1296 + }, + { + "epoch": 3.408284023668639, + "high_lr": 0.0003178947368421053, + "low_lr": 6.357894736842106e-06, + "step": 1296 + }, + { + "epoch": 3.4109138724523342, + "grad_norm": 1.3690342903137207, + "learning_rate": 0.00031736842105263156, + "loss": 1.3243, + "step": 1297 + }, + { + "epoch": 3.4109138724523342, + "high_lr": 0.00031736842105263156, + "low_lr": 6.347368421052632e-06, + "step": 1297 + }, + { + "epoch": 3.4109138724523342, + "high_lr": 0.00031736842105263156, + "low_lr": 6.347368421052632e-06, + "step": 1297 + }, + { + "epoch": 3.4109138724523342, + "high_lr": 0.00031736842105263156, + "low_lr": 6.347368421052632e-06, + "step": 1297 + }, + { + "epoch": 3.4109138724523342, + "high_lr": 0.00031736842105263156, + "low_lr": 6.347368421052632e-06, + "step": 1297 + }, + { + "epoch": 3.4109138724523342, + "high_lr": 0.00031736842105263156, + "low_lr": 6.347368421052632e-06, + "step": 1297 + }, + { + "epoch": 3.4109138724523342, + "high_lr": 0.00031736842105263156, + "low_lr": 6.347368421052632e-06, + "step": 1297 + }, + { + "epoch": 3.4109138724523342, + "high_lr": 0.00031736842105263156, + "low_lr": 6.347368421052632e-06, + "step": 1297 + }, + { + "epoch": 3.4109138724523342, + "high_lr": 0.00031736842105263156, + "low_lr": 6.347368421052632e-06, + "step": 1297 + }, + { + "epoch": 3.413543721236029, + "grad_norm": 1.4308713674545288, + "learning_rate": 0.00031684210526315785, + "loss": 1.2948, + "step": 1298 + }, + { + "epoch": 3.413543721236029, + "high_lr": 0.00031684210526315785, + "low_lr": 6.336842105263158e-06, + "step": 1298 + }, + { + "epoch": 3.413543721236029, + "high_lr": 0.00031684210526315785, + "low_lr": 6.336842105263158e-06, + "step": 1298 + }, + { + "epoch": 3.413543721236029, + "high_lr": 0.00031684210526315785, + "low_lr": 6.336842105263158e-06, + "step": 1298 + }, + { + "epoch": 3.413543721236029, + "high_lr": 0.00031684210526315785, + "low_lr": 6.336842105263158e-06, + "step": 1298 + }, + { + "epoch": 3.413543721236029, + "high_lr": 0.00031684210526315785, + "low_lr": 6.336842105263158e-06, + "step": 1298 + }, + { + "epoch": 3.413543721236029, + "high_lr": 0.00031684210526315785, + "low_lr": 6.336842105263158e-06, + "step": 1298 + }, + { + "epoch": 3.413543721236029, + "high_lr": 0.00031684210526315785, + "low_lr": 6.336842105263158e-06, + "step": 1298 + }, + { + "epoch": 3.413543721236029, + "high_lr": 0.00031684210526315785, + "low_lr": 6.336842105263158e-06, + "step": 1298 + }, + { + "epoch": 3.416173570019724, + "grad_norm": 1.416103482246399, + "learning_rate": 0.00031631578947368424, + "loss": 1.2928, + "step": 1299 + }, + { + "epoch": 3.416173570019724, + "high_lr": 0.00031631578947368424, + "low_lr": 6.326315789473685e-06, + "step": 1299 + }, + { + "epoch": 3.416173570019724, + "high_lr": 0.00031631578947368424, + "low_lr": 6.326315789473685e-06, + "step": 1299 + }, + { + "epoch": 3.416173570019724, + "high_lr": 0.00031631578947368424, + "low_lr": 6.326315789473685e-06, + "step": 1299 + }, + { + "epoch": 3.416173570019724, + "high_lr": 0.00031631578947368424, + "low_lr": 6.326315789473685e-06, + "step": 1299 + }, + { + "epoch": 3.416173570019724, + "high_lr": 0.00031631578947368424, + "low_lr": 6.326315789473685e-06, + "step": 1299 + }, + { + "epoch": 3.416173570019724, + "high_lr": 0.00031631578947368424, + "low_lr": 6.326315789473685e-06, + "step": 1299 + }, + { + "epoch": 3.416173570019724, + "high_lr": 0.00031631578947368424, + "low_lr": 6.326315789473685e-06, + "step": 1299 + }, + { + "epoch": 3.416173570019724, + "high_lr": 0.00031631578947368424, + "low_lr": 6.326315789473685e-06, + "step": 1299 + }, + { + "epoch": 3.4188034188034186, + "grad_norm": 1.3614590167999268, + "learning_rate": 0.00031578947368421053, + "loss": 1.2941, + "step": 1300 + }, + { + "epoch": 3.4188034188034186, + "high_lr": 0.00031578947368421053, + "low_lr": 6.31578947368421e-06, + "step": 1300 + }, + { + "epoch": 3.4188034188034186, + "high_lr": 0.00031578947368421053, + "low_lr": 6.31578947368421e-06, + "step": 1300 + }, + { + "epoch": 3.4188034188034186, + "high_lr": 0.00031578947368421053, + "low_lr": 6.31578947368421e-06, + "step": 1300 + }, + { + "epoch": 3.4188034188034186, + "high_lr": 0.00031578947368421053, + "low_lr": 6.31578947368421e-06, + "step": 1300 + }, + { + "epoch": 3.4188034188034186, + "high_lr": 0.00031578947368421053, + "low_lr": 6.31578947368421e-06, + "step": 1300 + }, + { + "epoch": 3.4188034188034186, + "high_lr": 0.00031578947368421053, + "low_lr": 6.31578947368421e-06, + "step": 1300 + }, + { + "epoch": 3.4188034188034186, + "high_lr": 0.00031578947368421053, + "low_lr": 6.31578947368421e-06, + "step": 1300 + }, + { + "epoch": 3.4188034188034186, + "high_lr": 0.00031578947368421053, + "low_lr": 6.31578947368421e-06, + "step": 1300 + }, + { + "epoch": 3.4214332675871137, + "grad_norm": 1.4130271673202515, + "learning_rate": 0.00031526315789473687, + "loss": 1.3067, + "step": 1301 + }, + { + "epoch": 3.4214332675871137, + "high_lr": 0.00031526315789473687, + "low_lr": 6.3052631578947375e-06, + "step": 1301 + }, + { + "epoch": 3.4214332675871137, + "high_lr": 0.00031526315789473687, + "low_lr": 6.3052631578947375e-06, + "step": 1301 + }, + { + "epoch": 3.4214332675871137, + "high_lr": 0.00031526315789473687, + "low_lr": 6.3052631578947375e-06, + "step": 1301 + }, + { + "epoch": 3.4214332675871137, + "high_lr": 0.00031526315789473687, + "low_lr": 6.3052631578947375e-06, + "step": 1301 + }, + { + "epoch": 3.4214332675871137, + "high_lr": 0.00031526315789473687, + "low_lr": 6.3052631578947375e-06, + "step": 1301 + }, + { + "epoch": 3.4214332675871137, + "high_lr": 0.00031526315789473687, + "low_lr": 6.3052631578947375e-06, + "step": 1301 + }, + { + "epoch": 3.4214332675871137, + "high_lr": 0.00031526315789473687, + "low_lr": 6.3052631578947375e-06, + "step": 1301 + }, + { + "epoch": 3.4214332675871137, + "high_lr": 0.00031526315789473687, + "low_lr": 6.3052631578947375e-06, + "step": 1301 + }, + { + "epoch": 3.424063116370809, + "grad_norm": 1.4979448318481445, + "learning_rate": 0.00031473684210526316, + "loss": 1.3681, + "step": 1302 + }, + { + "epoch": 3.424063116370809, + "high_lr": 0.00031473684210526316, + "low_lr": 6.294736842105264e-06, + "step": 1302 + }, + { + "epoch": 3.424063116370809, + "high_lr": 0.00031473684210526316, + "low_lr": 6.294736842105264e-06, + "step": 1302 + }, + { + "epoch": 3.424063116370809, + "high_lr": 0.00031473684210526316, + "low_lr": 6.294736842105264e-06, + "step": 1302 + }, + { + "epoch": 3.424063116370809, + "high_lr": 0.00031473684210526316, + "low_lr": 6.294736842105264e-06, + "step": 1302 + }, + { + "epoch": 3.424063116370809, + "high_lr": 0.00031473684210526316, + "low_lr": 6.294736842105264e-06, + "step": 1302 + }, + { + "epoch": 3.424063116370809, + "high_lr": 0.00031473684210526316, + "low_lr": 6.294736842105264e-06, + "step": 1302 + }, + { + "epoch": 3.424063116370809, + "high_lr": 0.00031473684210526316, + "low_lr": 6.294736842105264e-06, + "step": 1302 + }, + { + "epoch": 3.424063116370809, + "high_lr": 0.00031473684210526316, + "low_lr": 6.294736842105264e-06, + "step": 1302 + }, + { + "epoch": 3.4266929651545035, + "grad_norm": 1.4602686166763306, + "learning_rate": 0.0003142105263157895, + "loss": 1.2643, + "step": 1303 + }, + { + "epoch": 3.4266929651545035, + "high_lr": 0.0003142105263157895, + "low_lr": 6.28421052631579e-06, + "step": 1303 + }, + { + "epoch": 3.4266929651545035, + "high_lr": 0.0003142105263157895, + "low_lr": 6.28421052631579e-06, + "step": 1303 + }, + { + "epoch": 3.4266929651545035, + "high_lr": 0.0003142105263157895, + "low_lr": 6.28421052631579e-06, + "step": 1303 + }, + { + "epoch": 3.4266929651545035, + "high_lr": 0.0003142105263157895, + "low_lr": 6.28421052631579e-06, + "step": 1303 + }, + { + "epoch": 3.4266929651545035, + "high_lr": 0.0003142105263157895, + "low_lr": 6.28421052631579e-06, + "step": 1303 + }, + { + "epoch": 3.4266929651545035, + "high_lr": 0.0003142105263157895, + "low_lr": 6.28421052631579e-06, + "step": 1303 + }, + { + "epoch": 3.4266929651545035, + "high_lr": 0.0003142105263157895, + "low_lr": 6.28421052631579e-06, + "step": 1303 + }, + { + "epoch": 3.4266929651545035, + "high_lr": 0.0003142105263157895, + "low_lr": 6.28421052631579e-06, + "step": 1303 + }, + { + "epoch": 3.4293228139381986, + "grad_norm": 1.4567514657974243, + "learning_rate": 0.0003136842105263158, + "loss": 1.2775, + "step": 1304 + }, + { + "epoch": 3.4293228139381986, + "high_lr": 0.0003136842105263158, + "low_lr": 6.2736842105263165e-06, + "step": 1304 + }, + { + "epoch": 3.4293228139381986, + "high_lr": 0.0003136842105263158, + "low_lr": 6.2736842105263165e-06, + "step": 1304 + }, + { + "epoch": 3.4293228139381986, + "high_lr": 0.0003136842105263158, + "low_lr": 6.2736842105263165e-06, + "step": 1304 + }, + { + "epoch": 3.4293228139381986, + "high_lr": 0.0003136842105263158, + "low_lr": 6.2736842105263165e-06, + "step": 1304 + }, + { + "epoch": 3.4293228139381986, + "high_lr": 0.0003136842105263158, + "low_lr": 6.2736842105263165e-06, + "step": 1304 + }, + { + "epoch": 3.4293228139381986, + "high_lr": 0.0003136842105263158, + "low_lr": 6.2736842105263165e-06, + "step": 1304 + }, + { + "epoch": 3.4293228139381986, + "high_lr": 0.0003136842105263158, + "low_lr": 6.2736842105263165e-06, + "step": 1304 + }, + { + "epoch": 3.4293228139381986, + "high_lr": 0.0003136842105263158, + "low_lr": 6.2736842105263165e-06, + "step": 1304 + }, + { + "epoch": 3.4319526627218933, + "grad_norm": 1.3604228496551514, + "learning_rate": 0.00031315789473684207, + "loss": 1.3007, + "step": 1305 + }, + { + "epoch": 3.4319526627218933, + "high_lr": 0.00031315789473684207, + "low_lr": 6.263157894736842e-06, + "step": 1305 + }, + { + "epoch": 3.4319526627218933, + "high_lr": 0.00031315789473684207, + "low_lr": 6.263157894736842e-06, + "step": 1305 + }, + { + "epoch": 3.4319526627218933, + "high_lr": 0.00031315789473684207, + "low_lr": 6.263157894736842e-06, + "step": 1305 + }, + { + "epoch": 3.4319526627218933, + "high_lr": 0.00031315789473684207, + "low_lr": 6.263157894736842e-06, + "step": 1305 + }, + { + "epoch": 3.4319526627218933, + "high_lr": 0.00031315789473684207, + "low_lr": 6.263157894736842e-06, + "step": 1305 + }, + { + "epoch": 3.4319526627218933, + "high_lr": 0.00031315789473684207, + "low_lr": 6.263157894736842e-06, + "step": 1305 + }, + { + "epoch": 3.4319526627218933, + "high_lr": 0.00031315789473684207, + "low_lr": 6.263157894736842e-06, + "step": 1305 + }, + { + "epoch": 3.4319526627218933, + "high_lr": 0.00031315789473684207, + "low_lr": 6.263157894736842e-06, + "step": 1305 + }, + { + "epoch": 3.4345825115055884, + "grad_norm": 1.4118196964263916, + "learning_rate": 0.0003126315789473684, + "loss": 1.2874, + "step": 1306 + }, + { + "epoch": 3.4345825115055884, + "high_lr": 0.0003126315789473684, + "low_lr": 6.252631578947369e-06, + "step": 1306 + }, + { + "epoch": 3.4345825115055884, + "high_lr": 0.0003126315789473684, + "low_lr": 6.252631578947369e-06, + "step": 1306 + }, + { + "epoch": 3.4345825115055884, + "high_lr": 0.0003126315789473684, + "low_lr": 6.252631578947369e-06, + "step": 1306 + }, + { + "epoch": 3.4345825115055884, + "high_lr": 0.0003126315789473684, + "low_lr": 6.252631578947369e-06, + "step": 1306 + }, + { + "epoch": 3.4345825115055884, + "high_lr": 0.0003126315789473684, + "low_lr": 6.252631578947369e-06, + "step": 1306 + }, + { + "epoch": 3.4345825115055884, + "high_lr": 0.0003126315789473684, + "low_lr": 6.252631578947369e-06, + "step": 1306 + }, + { + "epoch": 3.4345825115055884, + "high_lr": 0.0003126315789473684, + "low_lr": 6.252631578947369e-06, + "step": 1306 + }, + { + "epoch": 3.4345825115055884, + "high_lr": 0.0003126315789473684, + "low_lr": 6.252631578947369e-06, + "step": 1306 + }, + { + "epoch": 3.4372123602892835, + "grad_norm": 1.3901150226593018, + "learning_rate": 0.00031210526315789475, + "loss": 1.2901, + "step": 1307 + }, + { + "epoch": 3.4372123602892835, + "high_lr": 0.00031210526315789475, + "low_lr": 6.242105263157895e-06, + "step": 1307 + }, + { + "epoch": 3.4372123602892835, + "high_lr": 0.00031210526315789475, + "low_lr": 6.242105263157895e-06, + "step": 1307 + }, + { + "epoch": 3.4372123602892835, + "high_lr": 0.00031210526315789475, + "low_lr": 6.242105263157895e-06, + "step": 1307 + }, + { + "epoch": 3.4372123602892835, + "high_lr": 0.00031210526315789475, + "low_lr": 6.242105263157895e-06, + "step": 1307 + }, + { + "epoch": 3.4372123602892835, + "high_lr": 0.00031210526315789475, + "low_lr": 6.242105263157895e-06, + "step": 1307 + }, + { + "epoch": 3.4372123602892835, + "high_lr": 0.00031210526315789475, + "low_lr": 6.242105263157895e-06, + "step": 1307 + }, + { + "epoch": 3.4372123602892835, + "high_lr": 0.00031210526315789475, + "low_lr": 6.242105263157895e-06, + "step": 1307 + }, + { + "epoch": 3.4372123602892835, + "high_lr": 0.00031210526315789475, + "low_lr": 6.242105263157895e-06, + "step": 1307 + }, + { + "epoch": 3.439842209072978, + "grad_norm": 1.3946117162704468, + "learning_rate": 0.0003115789473684211, + "loss": 1.2785, + "step": 1308 + }, + { + "epoch": 3.439842209072978, + "high_lr": 0.0003115789473684211, + "low_lr": 6.231578947368422e-06, + "step": 1308 + }, + { + "epoch": 3.439842209072978, + "high_lr": 0.0003115789473684211, + "low_lr": 6.231578947368422e-06, + "step": 1308 + }, + { + "epoch": 3.439842209072978, + "high_lr": 0.0003115789473684211, + "low_lr": 6.231578947368422e-06, + "step": 1308 + }, + { + "epoch": 3.439842209072978, + "high_lr": 0.0003115789473684211, + "low_lr": 6.231578947368422e-06, + "step": 1308 + }, + { + "epoch": 3.439842209072978, + "high_lr": 0.0003115789473684211, + "low_lr": 6.231578947368422e-06, + "step": 1308 + }, + { + "epoch": 3.439842209072978, + "high_lr": 0.0003115789473684211, + "low_lr": 6.231578947368422e-06, + "step": 1308 + }, + { + "epoch": 3.439842209072978, + "high_lr": 0.0003115789473684211, + "low_lr": 6.231578947368422e-06, + "step": 1308 + }, + { + "epoch": 3.439842209072978, + "high_lr": 0.0003115789473684211, + "low_lr": 6.231578947368422e-06, + "step": 1308 + }, + { + "epoch": 3.442472057856673, + "grad_norm": 1.4135258197784424, + "learning_rate": 0.0003110526315789474, + "loss": 1.301, + "step": 1309 + }, + { + "epoch": 3.442472057856673, + "high_lr": 0.0003110526315789474, + "low_lr": 6.221052631578947e-06, + "step": 1309 + }, + { + "epoch": 3.442472057856673, + "high_lr": 0.0003110526315789474, + "low_lr": 6.221052631578947e-06, + "step": 1309 + }, + { + "epoch": 3.442472057856673, + "high_lr": 0.0003110526315789474, + "low_lr": 6.221052631578947e-06, + "step": 1309 + }, + { + "epoch": 3.442472057856673, + "high_lr": 0.0003110526315789474, + "low_lr": 6.221052631578947e-06, + "step": 1309 + }, + { + "epoch": 3.442472057856673, + "high_lr": 0.0003110526315789474, + "low_lr": 6.221052631578947e-06, + "step": 1309 + }, + { + "epoch": 3.442472057856673, + "high_lr": 0.0003110526315789474, + "low_lr": 6.221052631578947e-06, + "step": 1309 + }, + { + "epoch": 3.442472057856673, + "high_lr": 0.0003110526315789474, + "low_lr": 6.221052631578947e-06, + "step": 1309 + }, + { + "epoch": 3.442472057856673, + "high_lr": 0.0003110526315789474, + "low_lr": 6.221052631578947e-06, + "step": 1309 + }, + { + "epoch": 3.4451019066403683, + "grad_norm": 1.4388030767440796, + "learning_rate": 0.0003105263157894737, + "loss": 1.3084, + "step": 1310 + }, + { + "epoch": 3.4451019066403683, + "high_lr": 0.0003105263157894737, + "low_lr": 6.2105263157894745e-06, + "step": 1310 + }, + { + "epoch": 3.4451019066403683, + "high_lr": 0.0003105263157894737, + "low_lr": 6.2105263157894745e-06, + "step": 1310 + }, + { + "epoch": 3.4451019066403683, + "high_lr": 0.0003105263157894737, + "low_lr": 6.2105263157894745e-06, + "step": 1310 + }, + { + "epoch": 3.4451019066403683, + "high_lr": 0.0003105263157894737, + "low_lr": 6.2105263157894745e-06, + "step": 1310 + }, + { + "epoch": 3.4451019066403683, + "high_lr": 0.0003105263157894737, + "low_lr": 6.2105263157894745e-06, + "step": 1310 + }, + { + "epoch": 3.4451019066403683, + "high_lr": 0.0003105263157894737, + "low_lr": 6.2105263157894745e-06, + "step": 1310 + }, + { + "epoch": 3.4451019066403683, + "high_lr": 0.0003105263157894737, + "low_lr": 6.2105263157894745e-06, + "step": 1310 + }, + { + "epoch": 3.4451019066403683, + "high_lr": 0.0003105263157894737, + "low_lr": 6.2105263157894745e-06, + "step": 1310 + }, + { + "epoch": 3.447731755424063, + "grad_norm": 1.385117769241333, + "learning_rate": 0.00031, + "loss": 1.3113, + "step": 1311 + }, + { + "epoch": 3.447731755424063, + "high_lr": 0.00031, + "low_lr": 6.200000000000001e-06, + "step": 1311 + }, + { + "epoch": 3.447731755424063, + "high_lr": 0.00031, + "low_lr": 6.200000000000001e-06, + "step": 1311 + }, + { + "epoch": 3.447731755424063, + "high_lr": 0.00031, + "low_lr": 6.200000000000001e-06, + "step": 1311 + }, + { + "epoch": 3.447731755424063, + "high_lr": 0.00031, + "low_lr": 6.200000000000001e-06, + "step": 1311 + }, + { + "epoch": 3.447731755424063, + "high_lr": 0.00031, + "low_lr": 6.200000000000001e-06, + "step": 1311 + }, + { + "epoch": 3.447731755424063, + "high_lr": 0.00031, + "low_lr": 6.200000000000001e-06, + "step": 1311 + }, + { + "epoch": 3.447731755424063, + "high_lr": 0.00031, + "low_lr": 6.200000000000001e-06, + "step": 1311 + }, + { + "epoch": 3.447731755424063, + "high_lr": 0.00031, + "low_lr": 6.200000000000001e-06, + "step": 1311 + }, + { + "epoch": 3.450361604207758, + "grad_norm": 1.5202070474624634, + "learning_rate": 0.0003094736842105263, + "loss": 1.3932, + "step": 1312 + }, + { + "epoch": 3.450361604207758, + "high_lr": 0.0003094736842105263, + "low_lr": 6.189473684210526e-06, + "step": 1312 + }, + { + "epoch": 3.450361604207758, + "high_lr": 0.0003094736842105263, + "low_lr": 6.189473684210526e-06, + "step": 1312 + }, + { + "epoch": 3.450361604207758, + "high_lr": 0.0003094736842105263, + "low_lr": 6.189473684210526e-06, + "step": 1312 + }, + { + "epoch": 3.450361604207758, + "high_lr": 0.0003094736842105263, + "low_lr": 6.189473684210526e-06, + "step": 1312 + }, + { + "epoch": 3.450361604207758, + "high_lr": 0.0003094736842105263, + "low_lr": 6.189473684210526e-06, + "step": 1312 + }, + { + "epoch": 3.450361604207758, + "high_lr": 0.0003094736842105263, + "low_lr": 6.189473684210526e-06, + "step": 1312 + }, + { + "epoch": 3.450361604207758, + "high_lr": 0.0003094736842105263, + "low_lr": 6.189473684210526e-06, + "step": 1312 + }, + { + "epoch": 3.450361604207758, + "high_lr": 0.0003094736842105263, + "low_lr": 6.189473684210526e-06, + "step": 1312 + }, + { + "epoch": 3.452991452991453, + "grad_norm": 1.3394801616668701, + "learning_rate": 0.0003089473684210526, + "loss": 1.3255, + "step": 1313 + }, + { + "epoch": 3.452991452991453, + "high_lr": 0.0003089473684210526, + "low_lr": 6.1789473684210534e-06, + "step": 1313 + }, + { + "epoch": 3.452991452991453, + "high_lr": 0.0003089473684210526, + "low_lr": 6.1789473684210534e-06, + "step": 1313 + }, + { + "epoch": 3.452991452991453, + "high_lr": 0.0003089473684210526, + "low_lr": 6.1789473684210534e-06, + "step": 1313 + }, + { + "epoch": 3.452991452991453, + "high_lr": 0.0003089473684210526, + "low_lr": 6.1789473684210534e-06, + "step": 1313 + }, + { + "epoch": 3.452991452991453, + "high_lr": 0.0003089473684210526, + "low_lr": 6.1789473684210534e-06, + "step": 1313 + }, + { + "epoch": 3.452991452991453, + "high_lr": 0.0003089473684210526, + "low_lr": 6.1789473684210534e-06, + "step": 1313 + }, + { + "epoch": 3.452991452991453, + "high_lr": 0.0003089473684210526, + "low_lr": 6.1789473684210534e-06, + "step": 1313 + }, + { + "epoch": 3.452991452991453, + "high_lr": 0.0003089473684210526, + "low_lr": 6.1789473684210534e-06, + "step": 1313 + }, + { + "epoch": 3.455621301775148, + "grad_norm": 1.495471715927124, + "learning_rate": 0.0003084210526315789, + "loss": 1.3534, + "step": 1314 + }, + { + "epoch": 3.455621301775148, + "high_lr": 0.0003084210526315789, + "low_lr": 6.168421052631579e-06, + "step": 1314 + }, + { + "epoch": 3.455621301775148, + "high_lr": 0.0003084210526315789, + "low_lr": 6.168421052631579e-06, + "step": 1314 + }, + { + "epoch": 3.455621301775148, + "high_lr": 0.0003084210526315789, + "low_lr": 6.168421052631579e-06, + "step": 1314 + }, + { + "epoch": 3.455621301775148, + "high_lr": 0.0003084210526315789, + "low_lr": 6.168421052631579e-06, + "step": 1314 + }, + { + "epoch": 3.455621301775148, + "high_lr": 0.0003084210526315789, + "low_lr": 6.168421052631579e-06, + "step": 1314 + }, + { + "epoch": 3.455621301775148, + "high_lr": 0.0003084210526315789, + "low_lr": 6.168421052631579e-06, + "step": 1314 + }, + { + "epoch": 3.455621301775148, + "high_lr": 0.0003084210526315789, + "low_lr": 6.168421052631579e-06, + "step": 1314 + }, + { + "epoch": 3.455621301775148, + "high_lr": 0.0003084210526315789, + "low_lr": 6.168421052631579e-06, + "step": 1314 + }, + { + "epoch": 3.458251150558843, + "grad_norm": 1.4218358993530273, + "learning_rate": 0.0003078947368421053, + "loss": 1.2885, + "step": 1315 + }, + { + "epoch": 3.458251150558843, + "high_lr": 0.0003078947368421053, + "low_lr": 6.157894736842106e-06, + "step": 1315 + }, + { + "epoch": 3.458251150558843, + "high_lr": 0.0003078947368421053, + "low_lr": 6.157894736842106e-06, + "step": 1315 + }, + { + "epoch": 3.458251150558843, + "high_lr": 0.0003078947368421053, + "low_lr": 6.157894736842106e-06, + "step": 1315 + }, + { + "epoch": 3.458251150558843, + "high_lr": 0.0003078947368421053, + "low_lr": 6.157894736842106e-06, + "step": 1315 + }, + { + "epoch": 3.458251150558843, + "high_lr": 0.0003078947368421053, + "low_lr": 6.157894736842106e-06, + "step": 1315 + }, + { + "epoch": 3.458251150558843, + "high_lr": 0.0003078947368421053, + "low_lr": 6.157894736842106e-06, + "step": 1315 + }, + { + "epoch": 3.458251150558843, + "high_lr": 0.0003078947368421053, + "low_lr": 6.157894736842106e-06, + "step": 1315 + }, + { + "epoch": 3.458251150558843, + "high_lr": 0.0003078947368421053, + "low_lr": 6.157894736842106e-06, + "step": 1315 + }, + { + "epoch": 3.460880999342538, + "grad_norm": 1.413833498954773, + "learning_rate": 0.0003073684210526316, + "loss": 1.2894, + "step": 1316 + }, + { + "epoch": 3.460880999342538, + "high_lr": 0.0003073684210526316, + "low_lr": 6.1473684210526316e-06, + "step": 1316 + }, + { + "epoch": 3.460880999342538, + "high_lr": 0.0003073684210526316, + "low_lr": 6.1473684210526316e-06, + "step": 1316 + }, + { + "epoch": 3.460880999342538, + "high_lr": 0.0003073684210526316, + "low_lr": 6.1473684210526316e-06, + "step": 1316 + }, + { + "epoch": 3.460880999342538, + "high_lr": 0.0003073684210526316, + "low_lr": 6.1473684210526316e-06, + "step": 1316 + }, + { + "epoch": 3.460880999342538, + "high_lr": 0.0003073684210526316, + "low_lr": 6.1473684210526316e-06, + "step": 1316 + }, + { + "epoch": 3.460880999342538, + "high_lr": 0.0003073684210526316, + "low_lr": 6.1473684210526316e-06, + "step": 1316 + }, + { + "epoch": 3.460880999342538, + "high_lr": 0.0003073684210526316, + "low_lr": 6.1473684210526316e-06, + "step": 1316 + }, + { + "epoch": 3.460880999342538, + "high_lr": 0.0003073684210526316, + "low_lr": 6.1473684210526316e-06, + "step": 1316 + }, + { + "epoch": 3.4635108481262327, + "grad_norm": 1.3772145509719849, + "learning_rate": 0.00030684210526315793, + "loss": 1.3812, + "step": 1317 + }, + { + "epoch": 3.4635108481262327, + "high_lr": 0.00030684210526315793, + "low_lr": 6.136842105263159e-06, + "step": 1317 + }, + { + "epoch": 3.4635108481262327, + "high_lr": 0.00030684210526315793, + "low_lr": 6.136842105263159e-06, + "step": 1317 + }, + { + "epoch": 3.4635108481262327, + "high_lr": 0.00030684210526315793, + "low_lr": 6.136842105263159e-06, + "step": 1317 + }, + { + "epoch": 3.4635108481262327, + "high_lr": 0.00030684210526315793, + "low_lr": 6.136842105263159e-06, + "step": 1317 + }, + { + "epoch": 3.4635108481262327, + "high_lr": 0.00030684210526315793, + "low_lr": 6.136842105263159e-06, + "step": 1317 + }, + { + "epoch": 3.4635108481262327, + "high_lr": 0.00030684210526315793, + "low_lr": 6.136842105263159e-06, + "step": 1317 + }, + { + "epoch": 3.4635108481262327, + "high_lr": 0.00030684210526315793, + "low_lr": 6.136842105263159e-06, + "step": 1317 + }, + { + "epoch": 3.4635108481262327, + "high_lr": 0.00030684210526315793, + "low_lr": 6.136842105263159e-06, + "step": 1317 + }, + { + "epoch": 3.4661406969099278, + "grad_norm": 1.3490023612976074, + "learning_rate": 0.0003063157894736842, + "loss": 1.3206, + "step": 1318 + }, + { + "epoch": 3.4661406969099278, + "high_lr": 0.0003063157894736842, + "low_lr": 6.126315789473685e-06, + "step": 1318 + }, + { + "epoch": 3.4661406969099278, + "high_lr": 0.0003063157894736842, + "low_lr": 6.126315789473685e-06, + "step": 1318 + }, + { + "epoch": 3.4661406969099278, + "high_lr": 0.0003063157894736842, + "low_lr": 6.126315789473685e-06, + "step": 1318 + }, + { + "epoch": 3.4661406969099278, + "high_lr": 0.0003063157894736842, + "low_lr": 6.126315789473685e-06, + "step": 1318 + }, + { + "epoch": 3.4661406969099278, + "high_lr": 0.0003063157894736842, + "low_lr": 6.126315789473685e-06, + "step": 1318 + }, + { + "epoch": 3.4661406969099278, + "high_lr": 0.0003063157894736842, + "low_lr": 6.126315789473685e-06, + "step": 1318 + }, + { + "epoch": 3.4661406969099278, + "high_lr": 0.0003063157894736842, + "low_lr": 6.126315789473685e-06, + "step": 1318 + }, + { + "epoch": 3.4661406969099278, + "high_lr": 0.0003063157894736842, + "low_lr": 6.126315789473685e-06, + "step": 1318 + }, + { + "epoch": 3.468770545693623, + "grad_norm": 1.4685592651367188, + "learning_rate": 0.0003057894736842105, + "loss": 1.2311, + "step": 1319 + }, + { + "epoch": 3.468770545693623, + "high_lr": 0.0003057894736842105, + "low_lr": 6.1157894736842106e-06, + "step": 1319 + }, + { + "epoch": 3.468770545693623, + "high_lr": 0.0003057894736842105, + "low_lr": 6.1157894736842106e-06, + "step": 1319 + }, + { + "epoch": 3.468770545693623, + "high_lr": 0.0003057894736842105, + "low_lr": 6.1157894736842106e-06, + "step": 1319 + }, + { + "epoch": 3.468770545693623, + "high_lr": 0.0003057894736842105, + "low_lr": 6.1157894736842106e-06, + "step": 1319 + }, + { + "epoch": 3.468770545693623, + "high_lr": 0.0003057894736842105, + "low_lr": 6.1157894736842106e-06, + "step": 1319 + }, + { + "epoch": 3.468770545693623, + "high_lr": 0.0003057894736842105, + "low_lr": 6.1157894736842106e-06, + "step": 1319 + }, + { + "epoch": 3.468770545693623, + "high_lr": 0.0003057894736842105, + "low_lr": 6.1157894736842106e-06, + "step": 1319 + }, + { + "epoch": 3.468770545693623, + "high_lr": 0.0003057894736842105, + "low_lr": 6.1157894736842106e-06, + "step": 1319 + }, + { + "epoch": 3.4714003944773175, + "grad_norm": 1.5080946683883667, + "learning_rate": 0.00030526315789473684, + "loss": 1.286, + "step": 1320 + }, + { + "epoch": 3.4714003944773175, + "high_lr": 0.00030526315789473684, + "low_lr": 6.105263157894738e-06, + "step": 1320 + }, + { + "epoch": 3.4714003944773175, + "high_lr": 0.00030526315789473684, + "low_lr": 6.105263157894738e-06, + "step": 1320 + }, + { + "epoch": 3.4714003944773175, + "high_lr": 0.00030526315789473684, + "low_lr": 6.105263157894738e-06, + "step": 1320 + }, + { + "epoch": 3.4714003944773175, + "high_lr": 0.00030526315789473684, + "low_lr": 6.105263157894738e-06, + "step": 1320 + }, + { + "epoch": 3.4714003944773175, + "high_lr": 0.00030526315789473684, + "low_lr": 6.105263157894738e-06, + "step": 1320 + }, + { + "epoch": 3.4714003944773175, + "high_lr": 0.00030526315789473684, + "low_lr": 6.105263157894738e-06, + "step": 1320 + }, + { + "epoch": 3.4714003944773175, + "high_lr": 0.00030526315789473684, + "low_lr": 6.105263157894738e-06, + "step": 1320 + }, + { + "epoch": 3.4714003944773175, + "high_lr": 0.00030526315789473684, + "low_lr": 6.105263157894738e-06, + "step": 1320 + }, + { + "epoch": 3.4740302432610126, + "grad_norm": 1.4560896158218384, + "learning_rate": 0.00030473684210526313, + "loss": 1.2817, + "step": 1321 + }, + { + "epoch": 3.4740302432610126, + "high_lr": 0.00030473684210526313, + "low_lr": 6.094736842105263e-06, + "step": 1321 + }, + { + "epoch": 3.4740302432610126, + "high_lr": 0.00030473684210526313, + "low_lr": 6.094736842105263e-06, + "step": 1321 + }, + { + "epoch": 3.4740302432610126, + "high_lr": 0.00030473684210526313, + "low_lr": 6.094736842105263e-06, + "step": 1321 + }, + { + "epoch": 3.4740302432610126, + "high_lr": 0.00030473684210526313, + "low_lr": 6.094736842105263e-06, + "step": 1321 + }, + { + "epoch": 3.4740302432610126, + "high_lr": 0.00030473684210526313, + "low_lr": 6.094736842105263e-06, + "step": 1321 + }, + { + "epoch": 3.4740302432610126, + "high_lr": 0.00030473684210526313, + "low_lr": 6.094736842105263e-06, + "step": 1321 + }, + { + "epoch": 3.4740302432610126, + "high_lr": 0.00030473684210526313, + "low_lr": 6.094736842105263e-06, + "step": 1321 + }, + { + "epoch": 3.4740302432610126, + "high_lr": 0.00030473684210526313, + "low_lr": 6.094736842105263e-06, + "step": 1321 + }, + { + "epoch": 3.4766600920447073, + "grad_norm": 1.376052737236023, + "learning_rate": 0.00030421052631578947, + "loss": 1.3213, + "step": 1322 + }, + { + "epoch": 3.4766600920447073, + "high_lr": 0.00030421052631578947, + "low_lr": 6.08421052631579e-06, + "step": 1322 + }, + { + "epoch": 3.4766600920447073, + "high_lr": 0.00030421052631578947, + "low_lr": 6.08421052631579e-06, + "step": 1322 + }, + { + "epoch": 3.4766600920447073, + "high_lr": 0.00030421052631578947, + "low_lr": 6.08421052631579e-06, + "step": 1322 + }, + { + "epoch": 3.4766600920447073, + "high_lr": 0.00030421052631578947, + "low_lr": 6.08421052631579e-06, + "step": 1322 + }, + { + "epoch": 3.4766600920447073, + "high_lr": 0.00030421052631578947, + "low_lr": 6.08421052631579e-06, + "step": 1322 + }, + { + "epoch": 3.4766600920447073, + "high_lr": 0.00030421052631578947, + "low_lr": 6.08421052631579e-06, + "step": 1322 + }, + { + "epoch": 3.4766600920447073, + "high_lr": 0.00030421052631578947, + "low_lr": 6.08421052631579e-06, + "step": 1322 + }, + { + "epoch": 3.4766600920447073, + "high_lr": 0.00030421052631578947, + "low_lr": 6.08421052631579e-06, + "step": 1322 + }, + { + "epoch": 3.4792899408284024, + "grad_norm": 1.3689539432525635, + "learning_rate": 0.0003036842105263158, + "loss": 1.3158, + "step": 1323 + }, + { + "epoch": 3.4792899408284024, + "high_lr": 0.0003036842105263158, + "low_lr": 6.073684210526316e-06, + "step": 1323 + }, + { + "epoch": 3.4792899408284024, + "high_lr": 0.0003036842105263158, + "low_lr": 6.073684210526316e-06, + "step": 1323 + }, + { + "epoch": 3.4792899408284024, + "high_lr": 0.0003036842105263158, + "low_lr": 6.073684210526316e-06, + "step": 1323 + }, + { + "epoch": 3.4792899408284024, + "high_lr": 0.0003036842105263158, + "low_lr": 6.073684210526316e-06, + "step": 1323 + }, + { + "epoch": 3.4792899408284024, + "high_lr": 0.0003036842105263158, + "low_lr": 6.073684210526316e-06, + "step": 1323 + }, + { + "epoch": 3.4792899408284024, + "high_lr": 0.0003036842105263158, + "low_lr": 6.073684210526316e-06, + "step": 1323 + }, + { + "epoch": 3.4792899408284024, + "high_lr": 0.0003036842105263158, + "low_lr": 6.073684210526316e-06, + "step": 1323 + }, + { + "epoch": 3.4792899408284024, + "high_lr": 0.0003036842105263158, + "low_lr": 6.073684210526316e-06, + "step": 1323 + }, + { + "epoch": 3.4819197896120975, + "grad_norm": 1.4256808757781982, + "learning_rate": 0.00030315789473684215, + "loss": 1.3019, + "step": 1324 + }, + { + "epoch": 3.4819197896120975, + "high_lr": 0.00030315789473684215, + "low_lr": 6.063157894736843e-06, + "step": 1324 + }, + { + "epoch": 3.4819197896120975, + "high_lr": 0.00030315789473684215, + "low_lr": 6.063157894736843e-06, + "step": 1324 + }, + { + "epoch": 3.4819197896120975, + "high_lr": 0.00030315789473684215, + "low_lr": 6.063157894736843e-06, + "step": 1324 + }, + { + "epoch": 3.4819197896120975, + "high_lr": 0.00030315789473684215, + "low_lr": 6.063157894736843e-06, + "step": 1324 + }, + { + "epoch": 3.4819197896120975, + "high_lr": 0.00030315789473684215, + "low_lr": 6.063157894736843e-06, + "step": 1324 + }, + { + "epoch": 3.4819197896120975, + "high_lr": 0.00030315789473684215, + "low_lr": 6.063157894736843e-06, + "step": 1324 + }, + { + "epoch": 3.4819197896120975, + "high_lr": 0.00030315789473684215, + "low_lr": 6.063157894736843e-06, + "step": 1324 + }, + { + "epoch": 3.4819197896120975, + "high_lr": 0.00030315789473684215, + "low_lr": 6.063157894736843e-06, + "step": 1324 + }, + { + "epoch": 3.484549638395792, + "grad_norm": 1.4182405471801758, + "learning_rate": 0.00030263157894736844, + "loss": 1.2553, + "step": 1325 + }, + { + "epoch": 3.484549638395792, + "high_lr": 0.00030263157894736844, + "low_lr": 6.0526315789473685e-06, + "step": 1325 + }, + { + "epoch": 3.484549638395792, + "high_lr": 0.00030263157894736844, + "low_lr": 6.0526315789473685e-06, + "step": 1325 + }, + { + "epoch": 3.484549638395792, + "high_lr": 0.00030263157894736844, + "low_lr": 6.0526315789473685e-06, + "step": 1325 + }, + { + "epoch": 3.484549638395792, + "high_lr": 0.00030263157894736844, + "low_lr": 6.0526315789473685e-06, + "step": 1325 + }, + { + "epoch": 3.484549638395792, + "high_lr": 0.00030263157894736844, + "low_lr": 6.0526315789473685e-06, + "step": 1325 + }, + { + "epoch": 3.484549638395792, + "high_lr": 0.00030263157894736844, + "low_lr": 6.0526315789473685e-06, + "step": 1325 + }, + { + "epoch": 3.484549638395792, + "high_lr": 0.00030263157894736844, + "low_lr": 6.0526315789473685e-06, + "step": 1325 + }, + { + "epoch": 3.484549638395792, + "high_lr": 0.00030263157894736844, + "low_lr": 6.0526315789473685e-06, + "step": 1325 + }, + { + "epoch": 3.4871794871794872, + "grad_norm": 1.3478316068649292, + "learning_rate": 0.0003021052631578947, + "loss": 1.3268, + "step": 1326 + }, + { + "epoch": 3.4871794871794872, + "high_lr": 0.0003021052631578947, + "low_lr": 6.042105263157895e-06, + "step": 1326 + }, + { + "epoch": 3.4871794871794872, + "high_lr": 0.0003021052631578947, + "low_lr": 6.042105263157895e-06, + "step": 1326 + }, + { + "epoch": 3.4871794871794872, + "high_lr": 0.0003021052631578947, + "low_lr": 6.042105263157895e-06, + "step": 1326 + }, + { + "epoch": 3.4871794871794872, + "high_lr": 0.0003021052631578947, + "low_lr": 6.042105263157895e-06, + "step": 1326 + }, + { + "epoch": 3.4871794871794872, + "high_lr": 0.0003021052631578947, + "low_lr": 6.042105263157895e-06, + "step": 1326 + }, + { + "epoch": 3.4871794871794872, + "high_lr": 0.0003021052631578947, + "low_lr": 6.042105263157895e-06, + "step": 1326 + }, + { + "epoch": 3.4871794871794872, + "high_lr": 0.0003021052631578947, + "low_lr": 6.042105263157895e-06, + "step": 1326 + }, + { + "epoch": 3.4871794871794872, + "high_lr": 0.0003021052631578947, + "low_lr": 6.042105263157895e-06, + "step": 1326 + }, + { + "epoch": 3.489809335963182, + "grad_norm": 1.4121602773666382, + "learning_rate": 0.00030157894736842106, + "loss": 1.2991, + "step": 1327 + }, + { + "epoch": 3.489809335963182, + "high_lr": 0.00030157894736842106, + "low_lr": 6.031578947368422e-06, + "step": 1327 + }, + { + "epoch": 3.489809335963182, + "high_lr": 0.00030157894736842106, + "low_lr": 6.031578947368422e-06, + "step": 1327 + }, + { + "epoch": 3.489809335963182, + "high_lr": 0.00030157894736842106, + "low_lr": 6.031578947368422e-06, + "step": 1327 + }, + { + "epoch": 3.489809335963182, + "high_lr": 0.00030157894736842106, + "low_lr": 6.031578947368422e-06, + "step": 1327 + }, + { + "epoch": 3.489809335963182, + "high_lr": 0.00030157894736842106, + "low_lr": 6.031578947368422e-06, + "step": 1327 + }, + { + "epoch": 3.489809335963182, + "high_lr": 0.00030157894736842106, + "low_lr": 6.031578947368422e-06, + "step": 1327 + }, + { + "epoch": 3.489809335963182, + "high_lr": 0.00030157894736842106, + "low_lr": 6.031578947368422e-06, + "step": 1327 + }, + { + "epoch": 3.489809335963182, + "high_lr": 0.00030157894736842106, + "low_lr": 6.031578947368422e-06, + "step": 1327 + }, + { + "epoch": 3.492439184746877, + "grad_norm": 1.4898854494094849, + "learning_rate": 0.00030105263157894735, + "loss": 1.3184, + "step": 1328 + }, + { + "epoch": 3.492439184746877, + "high_lr": 0.00030105263157894735, + "low_lr": 6.0210526315789475e-06, + "step": 1328 + }, + { + "epoch": 3.492439184746877, + "high_lr": 0.00030105263157894735, + "low_lr": 6.0210526315789475e-06, + "step": 1328 + }, + { + "epoch": 3.492439184746877, + "high_lr": 0.00030105263157894735, + "low_lr": 6.0210526315789475e-06, + "step": 1328 + }, + { + "epoch": 3.492439184746877, + "high_lr": 0.00030105263157894735, + "low_lr": 6.0210526315789475e-06, + "step": 1328 + }, + { + "epoch": 3.492439184746877, + "high_lr": 0.00030105263157894735, + "low_lr": 6.0210526315789475e-06, + "step": 1328 + }, + { + "epoch": 3.492439184746877, + "high_lr": 0.00030105263157894735, + "low_lr": 6.0210526315789475e-06, + "step": 1328 + }, + { + "epoch": 3.492439184746877, + "high_lr": 0.00030105263157894735, + "low_lr": 6.0210526315789475e-06, + "step": 1328 + }, + { + "epoch": 3.492439184746877, + "high_lr": 0.00030105263157894735, + "low_lr": 6.0210526315789475e-06, + "step": 1328 + }, + { + "epoch": 3.495069033530572, + "grad_norm": 1.3850432634353638, + "learning_rate": 0.0003005263157894737, + "loss": 1.3219, + "step": 1329 + }, + { + "epoch": 3.495069033530572, + "high_lr": 0.0003005263157894737, + "low_lr": 6.010526315789475e-06, + "step": 1329 + }, + { + "epoch": 3.495069033530572, + "high_lr": 0.0003005263157894737, + "low_lr": 6.010526315789475e-06, + "step": 1329 + }, + { + "epoch": 3.495069033530572, + "high_lr": 0.0003005263157894737, + "low_lr": 6.010526315789475e-06, + "step": 1329 + }, + { + "epoch": 3.495069033530572, + "high_lr": 0.0003005263157894737, + "low_lr": 6.010526315789475e-06, + "step": 1329 + }, + { + "epoch": 3.495069033530572, + "high_lr": 0.0003005263157894737, + "low_lr": 6.010526315789475e-06, + "step": 1329 + }, + { + "epoch": 3.495069033530572, + "high_lr": 0.0003005263157894737, + "low_lr": 6.010526315789475e-06, + "step": 1329 + }, + { + "epoch": 3.495069033530572, + "high_lr": 0.0003005263157894737, + "low_lr": 6.010526315789475e-06, + "step": 1329 + }, + { + "epoch": 3.495069033530572, + "high_lr": 0.0003005263157894737, + "low_lr": 6.010526315789475e-06, + "step": 1329 + }, + { + "epoch": 3.4976988823142667, + "grad_norm": 1.4616377353668213, + "learning_rate": 0.0003, + "loss": 1.2914, + "step": 1330 + }, + { + "epoch": 3.4976988823142667, + "high_lr": 0.0003, + "low_lr": 6e-06, + "step": 1330 + }, + { + "epoch": 3.4976988823142667, + "high_lr": 0.0003, + "low_lr": 6e-06, + "step": 1330 + }, + { + "epoch": 3.4976988823142667, + "high_lr": 0.0003, + "low_lr": 6e-06, + "step": 1330 + }, + { + "epoch": 3.4976988823142667, + "high_lr": 0.0003, + "low_lr": 6e-06, + "step": 1330 + }, + { + "epoch": 3.4976988823142667, + "high_lr": 0.0003, + "low_lr": 6e-06, + "step": 1330 + }, + { + "epoch": 3.4976988823142667, + "high_lr": 0.0003, + "low_lr": 6e-06, + "step": 1330 + }, + { + "epoch": 3.4976988823142667, + "high_lr": 0.0003, + "low_lr": 6e-06, + "step": 1330 + }, + { + "epoch": 3.4976988823142667, + "high_lr": 0.0003, + "low_lr": 6e-06, + "step": 1330 + }, + { + "epoch": 3.500328731097962, + "grad_norm": 1.3514699935913086, + "learning_rate": 0.00029947368421052637, + "loss": 1.2483, + "step": 1331 + }, + { + "epoch": 3.500328731097962, + "high_lr": 0.00029947368421052637, + "low_lr": 5.989473684210527e-06, + "step": 1331 + }, + { + "epoch": 3.500328731097962, + "high_lr": 0.00029947368421052637, + "low_lr": 5.989473684210527e-06, + "step": 1331 + }, + { + "epoch": 3.500328731097962, + "high_lr": 0.00029947368421052637, + "low_lr": 5.989473684210527e-06, + "step": 1331 + }, + { + "epoch": 3.500328731097962, + "high_lr": 0.00029947368421052637, + "low_lr": 5.989473684210527e-06, + "step": 1331 + }, + { + "epoch": 3.500328731097962, + "high_lr": 0.00029947368421052637, + "low_lr": 5.989473684210527e-06, + "step": 1331 + }, + { + "epoch": 3.500328731097962, + "high_lr": 0.00029947368421052637, + "low_lr": 5.989473684210527e-06, + "step": 1331 + }, + { + "epoch": 3.500328731097962, + "high_lr": 0.00029947368421052637, + "low_lr": 5.989473684210527e-06, + "step": 1331 + }, + { + "epoch": 3.500328731097962, + "high_lr": 0.00029947368421052637, + "low_lr": 5.989473684210527e-06, + "step": 1331 + }, + { + "epoch": 3.502958579881657, + "grad_norm": 1.5310125350952148, + "learning_rate": 0.00029894736842105265, + "loss": 1.321, + "step": 1332 + }, + { + "epoch": 3.502958579881657, + "high_lr": 0.00029894736842105265, + "low_lr": 5.978947368421053e-06, + "step": 1332 + }, + { + "epoch": 3.502958579881657, + "high_lr": 0.00029894736842105265, + "low_lr": 5.978947368421053e-06, + "step": 1332 + }, + { + "epoch": 3.502958579881657, + "high_lr": 0.00029894736842105265, + "low_lr": 5.978947368421053e-06, + "step": 1332 + }, + { + "epoch": 3.502958579881657, + "high_lr": 0.00029894736842105265, + "low_lr": 5.978947368421053e-06, + "step": 1332 + }, + { + "epoch": 3.502958579881657, + "high_lr": 0.00029894736842105265, + "low_lr": 5.978947368421053e-06, + "step": 1332 + }, + { + "epoch": 3.502958579881657, + "high_lr": 0.00029894736842105265, + "low_lr": 5.978947368421053e-06, + "step": 1332 + }, + { + "epoch": 3.502958579881657, + "high_lr": 0.00029894736842105265, + "low_lr": 5.978947368421053e-06, + "step": 1332 + }, + { + "epoch": 3.502958579881657, + "high_lr": 0.00029894736842105265, + "low_lr": 5.978947368421053e-06, + "step": 1332 + }, + { + "epoch": 3.5055884286653516, + "grad_norm": 1.3913053274154663, + "learning_rate": 0.00029842105263157894, + "loss": 1.2846, + "step": 1333 + }, + { + "epoch": 3.5055884286653516, + "high_lr": 0.00029842105263157894, + "low_lr": 5.968421052631579e-06, + "step": 1333 + }, + { + "epoch": 3.5055884286653516, + "high_lr": 0.00029842105263157894, + "low_lr": 5.968421052631579e-06, + "step": 1333 + }, + { + "epoch": 3.5055884286653516, + "high_lr": 0.00029842105263157894, + "low_lr": 5.968421052631579e-06, + "step": 1333 + }, + { + "epoch": 3.5055884286653516, + "high_lr": 0.00029842105263157894, + "low_lr": 5.968421052631579e-06, + "step": 1333 + }, + { + "epoch": 3.5055884286653516, + "high_lr": 0.00029842105263157894, + "low_lr": 5.968421052631579e-06, + "step": 1333 + }, + { + "epoch": 3.5055884286653516, + "high_lr": 0.00029842105263157894, + "low_lr": 5.968421052631579e-06, + "step": 1333 + }, + { + "epoch": 3.5055884286653516, + "high_lr": 0.00029842105263157894, + "low_lr": 5.968421052631579e-06, + "step": 1333 + }, + { + "epoch": 3.5055884286653516, + "high_lr": 0.00029842105263157894, + "low_lr": 5.968421052631579e-06, + "step": 1333 + }, + { + "epoch": 3.5082182774490467, + "grad_norm": 1.4891670942306519, + "learning_rate": 0.0002978947368421053, + "loss": 1.294, + "step": 1334 + }, + { + "epoch": 3.5082182774490467, + "high_lr": 0.0002978947368421053, + "low_lr": 5.9578947368421055e-06, + "step": 1334 + }, + { + "epoch": 3.5082182774490467, + "high_lr": 0.0002978947368421053, + "low_lr": 5.9578947368421055e-06, + "step": 1334 + }, + { + "epoch": 3.5082182774490467, + "high_lr": 0.0002978947368421053, + "low_lr": 5.9578947368421055e-06, + "step": 1334 + }, + { + "epoch": 3.5082182774490467, + "high_lr": 0.0002978947368421053, + "low_lr": 5.9578947368421055e-06, + "step": 1334 + }, + { + "epoch": 3.5082182774490467, + "high_lr": 0.0002978947368421053, + "low_lr": 5.9578947368421055e-06, + "step": 1334 + }, + { + "epoch": 3.5082182774490467, + "high_lr": 0.0002978947368421053, + "low_lr": 5.9578947368421055e-06, + "step": 1334 + }, + { + "epoch": 3.5082182774490467, + "high_lr": 0.0002978947368421053, + "low_lr": 5.9578947368421055e-06, + "step": 1334 + }, + { + "epoch": 3.5082182774490467, + "high_lr": 0.0002978947368421053, + "low_lr": 5.9578947368421055e-06, + "step": 1334 + }, + { + "epoch": 3.510848126232742, + "grad_norm": 1.3192452192306519, + "learning_rate": 0.00029736842105263157, + "loss": 1.3118, + "step": 1335 + }, + { + "epoch": 3.510848126232742, + "high_lr": 0.00029736842105263157, + "low_lr": 5.947368421052632e-06, + "step": 1335 + }, + { + "epoch": 3.510848126232742, + "high_lr": 0.00029736842105263157, + "low_lr": 5.947368421052632e-06, + "step": 1335 + }, + { + "epoch": 3.510848126232742, + "high_lr": 0.00029736842105263157, + "low_lr": 5.947368421052632e-06, + "step": 1335 + }, + { + "epoch": 3.510848126232742, + "high_lr": 0.00029736842105263157, + "low_lr": 5.947368421052632e-06, + "step": 1335 + }, + { + "epoch": 3.510848126232742, + "high_lr": 0.00029736842105263157, + "low_lr": 5.947368421052632e-06, + "step": 1335 + }, + { + "epoch": 3.510848126232742, + "high_lr": 0.00029736842105263157, + "low_lr": 5.947368421052632e-06, + "step": 1335 + }, + { + "epoch": 3.510848126232742, + "high_lr": 0.00029736842105263157, + "low_lr": 5.947368421052632e-06, + "step": 1335 + }, + { + "epoch": 3.510848126232742, + "high_lr": 0.00029736842105263157, + "low_lr": 5.947368421052632e-06, + "step": 1335 + }, + { + "epoch": 3.5134779750164364, + "grad_norm": 1.4548825025558472, + "learning_rate": 0.0002968421052631579, + "loss": 1.2824, + "step": 1336 + }, + { + "epoch": 3.5134779750164364, + "high_lr": 0.0002968421052631579, + "low_lr": 5.936842105263159e-06, + "step": 1336 + }, + { + "epoch": 3.5134779750164364, + "high_lr": 0.0002968421052631579, + "low_lr": 5.936842105263159e-06, + "step": 1336 + }, + { + "epoch": 3.5134779750164364, + "high_lr": 0.0002968421052631579, + "low_lr": 5.936842105263159e-06, + "step": 1336 + }, + { + "epoch": 3.5134779750164364, + "high_lr": 0.0002968421052631579, + "low_lr": 5.936842105263159e-06, + "step": 1336 + }, + { + "epoch": 3.5134779750164364, + "high_lr": 0.0002968421052631579, + "low_lr": 5.936842105263159e-06, + "step": 1336 + }, + { + "epoch": 3.5134779750164364, + "high_lr": 0.0002968421052631579, + "low_lr": 5.936842105263159e-06, + "step": 1336 + }, + { + "epoch": 3.5134779750164364, + "high_lr": 0.0002968421052631579, + "low_lr": 5.936842105263159e-06, + "step": 1336 + }, + { + "epoch": 3.5134779750164364, + "high_lr": 0.0002968421052631579, + "low_lr": 5.936842105263159e-06, + "step": 1336 + }, + { + "epoch": 3.5161078238001315, + "grad_norm": 1.349829912185669, + "learning_rate": 0.0002963157894736842, + "loss": 1.2968, + "step": 1337 + }, + { + "epoch": 3.5161078238001315, + "high_lr": 0.0002963157894736842, + "low_lr": 5.9263157894736844e-06, + "step": 1337 + }, + { + "epoch": 3.5161078238001315, + "high_lr": 0.0002963157894736842, + "low_lr": 5.9263157894736844e-06, + "step": 1337 + }, + { + "epoch": 3.5161078238001315, + "high_lr": 0.0002963157894736842, + "low_lr": 5.9263157894736844e-06, + "step": 1337 + }, + { + "epoch": 3.5161078238001315, + "high_lr": 0.0002963157894736842, + "low_lr": 5.9263157894736844e-06, + "step": 1337 + }, + { + "epoch": 3.5161078238001315, + "high_lr": 0.0002963157894736842, + "low_lr": 5.9263157894736844e-06, + "step": 1337 + }, + { + "epoch": 3.5161078238001315, + "high_lr": 0.0002963157894736842, + "low_lr": 5.9263157894736844e-06, + "step": 1337 + }, + { + "epoch": 3.5161078238001315, + "high_lr": 0.0002963157894736842, + "low_lr": 5.9263157894736844e-06, + "step": 1337 + }, + { + "epoch": 3.5161078238001315, + "high_lr": 0.0002963157894736842, + "low_lr": 5.9263157894736844e-06, + "step": 1337 + }, + { + "epoch": 3.5187376725838266, + "grad_norm": 1.38246750831604, + "learning_rate": 0.00029578947368421053, + "loss": 1.2769, + "step": 1338 + }, + { + "epoch": 3.5187376725838266, + "high_lr": 0.00029578947368421053, + "low_lr": 5.915789473684212e-06, + "step": 1338 + }, + { + "epoch": 3.5187376725838266, + "high_lr": 0.00029578947368421053, + "low_lr": 5.915789473684212e-06, + "step": 1338 + }, + { + "epoch": 3.5187376725838266, + "high_lr": 0.00029578947368421053, + "low_lr": 5.915789473684212e-06, + "step": 1338 + }, + { + "epoch": 3.5187376725838266, + "high_lr": 0.00029578947368421053, + "low_lr": 5.915789473684212e-06, + "step": 1338 + }, + { + "epoch": 3.5187376725838266, + "high_lr": 0.00029578947368421053, + "low_lr": 5.915789473684212e-06, + "step": 1338 + }, + { + "epoch": 3.5187376725838266, + "high_lr": 0.00029578947368421053, + "low_lr": 5.915789473684212e-06, + "step": 1338 + }, + { + "epoch": 3.5187376725838266, + "high_lr": 0.00029578947368421053, + "low_lr": 5.915789473684212e-06, + "step": 1338 + }, + { + "epoch": 3.5187376725838266, + "high_lr": 0.00029578947368421053, + "low_lr": 5.915789473684212e-06, + "step": 1338 + }, + { + "epoch": 3.5213675213675213, + "grad_norm": 1.3541733026504517, + "learning_rate": 0.0002952631578947368, + "loss": 1.3354, + "step": 1339 + }, + { + "epoch": 3.5213675213675213, + "high_lr": 0.0002952631578947368, + "low_lr": 5.905263157894737e-06, + "step": 1339 + }, + { + "epoch": 3.5213675213675213, + "high_lr": 0.0002952631578947368, + "low_lr": 5.905263157894737e-06, + "step": 1339 + }, + { + "epoch": 3.5213675213675213, + "high_lr": 0.0002952631578947368, + "low_lr": 5.905263157894737e-06, + "step": 1339 + }, + { + "epoch": 3.5213675213675213, + "high_lr": 0.0002952631578947368, + "low_lr": 5.905263157894737e-06, + "step": 1339 + }, + { + "epoch": 3.5213675213675213, + "high_lr": 0.0002952631578947368, + "low_lr": 5.905263157894737e-06, + "step": 1339 + }, + { + "epoch": 3.5213675213675213, + "high_lr": 0.0002952631578947368, + "low_lr": 5.905263157894737e-06, + "step": 1339 + }, + { + "epoch": 3.5213675213675213, + "high_lr": 0.0002952631578947368, + "low_lr": 5.905263157894737e-06, + "step": 1339 + }, + { + "epoch": 3.5213675213675213, + "high_lr": 0.0002952631578947368, + "low_lr": 5.905263157894737e-06, + "step": 1339 + }, + { + "epoch": 3.5239973701512164, + "grad_norm": 1.4464415311813354, + "learning_rate": 0.00029473684210526316, + "loss": 1.3037, + "step": 1340 + }, + { + "epoch": 3.5239973701512164, + "high_lr": 0.00029473684210526316, + "low_lr": 5.8947368421052634e-06, + "step": 1340 + }, + { + "epoch": 3.5239973701512164, + "high_lr": 0.00029473684210526316, + "low_lr": 5.8947368421052634e-06, + "step": 1340 + }, + { + "epoch": 3.5239973701512164, + "high_lr": 0.00029473684210526316, + "low_lr": 5.8947368421052634e-06, + "step": 1340 + }, + { + "epoch": 3.5239973701512164, + "high_lr": 0.00029473684210526316, + "low_lr": 5.8947368421052634e-06, + "step": 1340 + }, + { + "epoch": 3.5239973701512164, + "high_lr": 0.00029473684210526316, + "low_lr": 5.8947368421052634e-06, + "step": 1340 + }, + { + "epoch": 3.5239973701512164, + "high_lr": 0.00029473684210526316, + "low_lr": 5.8947368421052634e-06, + "step": 1340 + }, + { + "epoch": 3.5239973701512164, + "high_lr": 0.00029473684210526316, + "low_lr": 5.8947368421052634e-06, + "step": 1340 + }, + { + "epoch": 3.5239973701512164, + "high_lr": 0.00029473684210526316, + "low_lr": 5.8947368421052634e-06, + "step": 1340 + }, + { + "epoch": 3.5266272189349115, + "grad_norm": 1.4825074672698975, + "learning_rate": 0.0002942105263157895, + "loss": 1.2406, + "step": 1341 + }, + { + "epoch": 3.5266272189349115, + "high_lr": 0.0002942105263157895, + "low_lr": 5.88421052631579e-06, + "step": 1341 + }, + { + "epoch": 3.5266272189349115, + "high_lr": 0.0002942105263157895, + "low_lr": 5.88421052631579e-06, + "step": 1341 + }, + { + "epoch": 3.5266272189349115, + "high_lr": 0.0002942105263157895, + "low_lr": 5.88421052631579e-06, + "step": 1341 + }, + { + "epoch": 3.5266272189349115, + "high_lr": 0.0002942105263157895, + "low_lr": 5.88421052631579e-06, + "step": 1341 + }, + { + "epoch": 3.5266272189349115, + "high_lr": 0.0002942105263157895, + "low_lr": 5.88421052631579e-06, + "step": 1341 + }, + { + "epoch": 3.5266272189349115, + "high_lr": 0.0002942105263157895, + "low_lr": 5.88421052631579e-06, + "step": 1341 + }, + { + "epoch": 3.5266272189349115, + "high_lr": 0.0002942105263157895, + "low_lr": 5.88421052631579e-06, + "step": 1341 + }, + { + "epoch": 3.5266272189349115, + "high_lr": 0.0002942105263157895, + "low_lr": 5.88421052631579e-06, + "step": 1341 + }, + { + "epoch": 3.529257067718606, + "grad_norm": 1.2963181734085083, + "learning_rate": 0.0002936842105263158, + "loss": 1.2894, + "step": 1342 + }, + { + "epoch": 3.529257067718606, + "high_lr": 0.0002936842105263158, + "low_lr": 5.873684210526316e-06, + "step": 1342 + }, + { + "epoch": 3.529257067718606, + "high_lr": 0.0002936842105263158, + "low_lr": 5.873684210526316e-06, + "step": 1342 + }, + { + "epoch": 3.529257067718606, + "high_lr": 0.0002936842105263158, + "low_lr": 5.873684210526316e-06, + "step": 1342 + }, + { + "epoch": 3.529257067718606, + "high_lr": 0.0002936842105263158, + "low_lr": 5.873684210526316e-06, + "step": 1342 + }, + { + "epoch": 3.529257067718606, + "high_lr": 0.0002936842105263158, + "low_lr": 5.873684210526316e-06, + "step": 1342 + }, + { + "epoch": 3.529257067718606, + "high_lr": 0.0002936842105263158, + "low_lr": 5.873684210526316e-06, + "step": 1342 + }, + { + "epoch": 3.529257067718606, + "high_lr": 0.0002936842105263158, + "low_lr": 5.873684210526316e-06, + "step": 1342 + }, + { + "epoch": 3.529257067718606, + "high_lr": 0.0002936842105263158, + "low_lr": 5.873684210526316e-06, + "step": 1342 + }, + { + "epoch": 3.5318869165023012, + "grad_norm": 1.4152305126190186, + "learning_rate": 0.0002931578947368421, + "loss": 1.3201, + "step": 1343 + }, + { + "epoch": 3.5318869165023012, + "high_lr": 0.0002931578947368421, + "low_lr": 5.863157894736842e-06, + "step": 1343 + }, + { + "epoch": 3.5318869165023012, + "high_lr": 0.0002931578947368421, + "low_lr": 5.863157894736842e-06, + "step": 1343 + }, + { + "epoch": 3.5318869165023012, + "high_lr": 0.0002931578947368421, + "low_lr": 5.863157894736842e-06, + "step": 1343 + }, + { + "epoch": 3.5318869165023012, + "high_lr": 0.0002931578947368421, + "low_lr": 5.863157894736842e-06, + "step": 1343 + }, + { + "epoch": 3.5318869165023012, + "high_lr": 0.0002931578947368421, + "low_lr": 5.863157894736842e-06, + "step": 1343 + }, + { + "epoch": 3.5318869165023012, + "high_lr": 0.0002931578947368421, + "low_lr": 5.863157894736842e-06, + "step": 1343 + }, + { + "epoch": 3.5318869165023012, + "high_lr": 0.0002931578947368421, + "low_lr": 5.863157894736842e-06, + "step": 1343 + }, + { + "epoch": 3.5318869165023012, + "high_lr": 0.0002931578947368421, + "low_lr": 5.863157894736842e-06, + "step": 1343 + }, + { + "epoch": 3.534516765285996, + "grad_norm": 1.319513201713562, + "learning_rate": 0.0002926315789473684, + "loss": 1.3224, + "step": 1344 + }, + { + "epoch": 3.534516765285996, + "high_lr": 0.0002926315789473684, + "low_lr": 5.852631578947369e-06, + "step": 1344 + }, + { + "epoch": 3.534516765285996, + "high_lr": 0.0002926315789473684, + "low_lr": 5.852631578947369e-06, + "step": 1344 + }, + { + "epoch": 3.534516765285996, + "high_lr": 0.0002926315789473684, + "low_lr": 5.852631578947369e-06, + "step": 1344 + }, + { + "epoch": 3.534516765285996, + "high_lr": 0.0002926315789473684, + "low_lr": 5.852631578947369e-06, + "step": 1344 + }, + { + "epoch": 3.534516765285996, + "high_lr": 0.0002926315789473684, + "low_lr": 5.852631578947369e-06, + "step": 1344 + }, + { + "epoch": 3.534516765285996, + "high_lr": 0.0002926315789473684, + "low_lr": 5.852631578947369e-06, + "step": 1344 + }, + { + "epoch": 3.534516765285996, + "high_lr": 0.0002926315789473684, + "low_lr": 5.852631578947369e-06, + "step": 1344 + }, + { + "epoch": 3.534516765285996, + "high_lr": 0.0002926315789473684, + "low_lr": 5.852631578947369e-06, + "step": 1344 + }, + { + "epoch": 3.537146614069691, + "grad_norm": 1.3924484252929688, + "learning_rate": 0.00029210526315789475, + "loss": 1.2676, + "step": 1345 + }, + { + "epoch": 3.537146614069691, + "high_lr": 0.00029210526315789475, + "low_lr": 5.842105263157896e-06, + "step": 1345 + }, + { + "epoch": 3.537146614069691, + "high_lr": 0.00029210526315789475, + "low_lr": 5.842105263157896e-06, + "step": 1345 + }, + { + "epoch": 3.537146614069691, + "high_lr": 0.00029210526315789475, + "low_lr": 5.842105263157896e-06, + "step": 1345 + }, + { + "epoch": 3.537146614069691, + "high_lr": 0.00029210526315789475, + "low_lr": 5.842105263157896e-06, + "step": 1345 + }, + { + "epoch": 3.537146614069691, + "high_lr": 0.00029210526315789475, + "low_lr": 5.842105263157896e-06, + "step": 1345 + }, + { + "epoch": 3.537146614069691, + "high_lr": 0.00029210526315789475, + "low_lr": 5.842105263157896e-06, + "step": 1345 + }, + { + "epoch": 3.537146614069691, + "high_lr": 0.00029210526315789475, + "low_lr": 5.842105263157896e-06, + "step": 1345 + }, + { + "epoch": 3.537146614069691, + "high_lr": 0.00029210526315789475, + "low_lr": 5.842105263157896e-06, + "step": 1345 + }, + { + "epoch": 3.539776462853386, + "grad_norm": 1.495673656463623, + "learning_rate": 0.00029157894736842104, + "loss": 1.3082, + "step": 1346 + }, + { + "epoch": 3.539776462853386, + "high_lr": 0.00029157894736842104, + "low_lr": 5.831578947368421e-06, + "step": 1346 + }, + { + "epoch": 3.539776462853386, + "high_lr": 0.00029157894736842104, + "low_lr": 5.831578947368421e-06, + "step": 1346 + }, + { + "epoch": 3.539776462853386, + "high_lr": 0.00029157894736842104, + "low_lr": 5.831578947368421e-06, + "step": 1346 + }, + { + "epoch": 3.539776462853386, + "high_lr": 0.00029157894736842104, + "low_lr": 5.831578947368421e-06, + "step": 1346 + }, + { + "epoch": 3.539776462853386, + "high_lr": 0.00029157894736842104, + "low_lr": 5.831578947368421e-06, + "step": 1346 + }, + { + "epoch": 3.539776462853386, + "high_lr": 0.00029157894736842104, + "low_lr": 5.831578947368421e-06, + "step": 1346 + }, + { + "epoch": 3.539776462853386, + "high_lr": 0.00029157894736842104, + "low_lr": 5.831578947368421e-06, + "step": 1346 + }, + { + "epoch": 3.539776462853386, + "high_lr": 0.00029157894736842104, + "low_lr": 5.831578947368421e-06, + "step": 1346 + }, + { + "epoch": 3.5424063116370808, + "grad_norm": 1.3282442092895508, + "learning_rate": 0.0002910526315789474, + "loss": 1.3114, + "step": 1347 + }, + { + "epoch": 3.5424063116370808, + "high_lr": 0.0002910526315789474, + "low_lr": 5.8210526315789486e-06, + "step": 1347 + }, + { + "epoch": 3.5424063116370808, + "high_lr": 0.0002910526315789474, + "low_lr": 5.8210526315789486e-06, + "step": 1347 + }, + { + "epoch": 3.5424063116370808, + "high_lr": 0.0002910526315789474, + "low_lr": 5.8210526315789486e-06, + "step": 1347 + }, + { + "epoch": 3.5424063116370808, + "high_lr": 0.0002910526315789474, + "low_lr": 5.8210526315789486e-06, + "step": 1347 + }, + { + "epoch": 3.5424063116370808, + "high_lr": 0.0002910526315789474, + "low_lr": 5.8210526315789486e-06, + "step": 1347 + }, + { + "epoch": 3.5424063116370808, + "high_lr": 0.0002910526315789474, + "low_lr": 5.8210526315789486e-06, + "step": 1347 + }, + { + "epoch": 3.5424063116370808, + "high_lr": 0.0002910526315789474, + "low_lr": 5.8210526315789486e-06, + "step": 1347 + }, + { + "epoch": 3.5424063116370808, + "high_lr": 0.0002910526315789474, + "low_lr": 5.8210526315789486e-06, + "step": 1347 + }, + { + "epoch": 3.545036160420776, + "grad_norm": 1.2868953943252563, + "learning_rate": 0.0002905263157894737, + "loss": 1.3005, + "step": 1348 + }, + { + "epoch": 3.545036160420776, + "high_lr": 0.0002905263157894737, + "low_lr": 5.810526315789474e-06, + "step": 1348 + }, + { + "epoch": 3.545036160420776, + "high_lr": 0.0002905263157894737, + "low_lr": 5.810526315789474e-06, + "step": 1348 + }, + { + "epoch": 3.545036160420776, + "high_lr": 0.0002905263157894737, + "low_lr": 5.810526315789474e-06, + "step": 1348 + }, + { + "epoch": 3.545036160420776, + "high_lr": 0.0002905263157894737, + "low_lr": 5.810526315789474e-06, + "step": 1348 + }, + { + "epoch": 3.545036160420776, + "high_lr": 0.0002905263157894737, + "low_lr": 5.810526315789474e-06, + "step": 1348 + }, + { + "epoch": 3.545036160420776, + "high_lr": 0.0002905263157894737, + "low_lr": 5.810526315789474e-06, + "step": 1348 + }, + { + "epoch": 3.545036160420776, + "high_lr": 0.0002905263157894737, + "low_lr": 5.810526315789474e-06, + "step": 1348 + }, + { + "epoch": 3.545036160420776, + "high_lr": 0.0002905263157894737, + "low_lr": 5.810526315789474e-06, + "step": 1348 + }, + { + "epoch": 3.5476660092044705, + "grad_norm": 1.4387948513031006, + "learning_rate": 0.00029, + "loss": 1.2966, + "step": 1349 + }, + { + "epoch": 3.5476660092044705, + "high_lr": 0.00029, + "low_lr": 5.8e-06, + "step": 1349 + }, + { + "epoch": 3.5476660092044705, + "high_lr": 0.00029, + "low_lr": 5.8e-06, + "step": 1349 + }, + { + "epoch": 3.5476660092044705, + "high_lr": 0.00029, + "low_lr": 5.8e-06, + "step": 1349 + }, + { + "epoch": 3.5476660092044705, + "high_lr": 0.00029, + "low_lr": 5.8e-06, + "step": 1349 + }, + { + "epoch": 3.5476660092044705, + "high_lr": 0.00029, + "low_lr": 5.8e-06, + "step": 1349 + }, + { + "epoch": 3.5476660092044705, + "high_lr": 0.00029, + "low_lr": 5.8e-06, + "step": 1349 + }, + { + "epoch": 3.5476660092044705, + "high_lr": 0.00029, + "low_lr": 5.8e-06, + "step": 1349 + }, + { + "epoch": 3.5476660092044705, + "high_lr": 0.00029, + "low_lr": 5.8e-06, + "step": 1349 + }, + { + "epoch": 3.5502958579881656, + "grad_norm": 1.4605005979537964, + "learning_rate": 0.00028947368421052634, + "loss": 1.3046, + "step": 1350 + }, + { + "epoch": 3.5502958579881656, + "high_lr": 0.00028947368421052634, + "low_lr": 5.789473684210527e-06, + "step": 1350 + }, + { + "epoch": 3.5502958579881656, + "high_lr": 0.00028947368421052634, + "low_lr": 5.789473684210527e-06, + "step": 1350 + }, + { + "epoch": 3.5502958579881656, + "high_lr": 0.00028947368421052634, + "low_lr": 5.789473684210527e-06, + "step": 1350 + }, + { + "epoch": 3.5502958579881656, + "high_lr": 0.00028947368421052634, + "low_lr": 5.789473684210527e-06, + "step": 1350 + }, + { + "epoch": 3.5502958579881656, + "high_lr": 0.00028947368421052634, + "low_lr": 5.789473684210527e-06, + "step": 1350 + }, + { + "epoch": 3.5502958579881656, + "high_lr": 0.00028947368421052634, + "low_lr": 5.789473684210527e-06, + "step": 1350 + }, + { + "epoch": 3.5502958579881656, + "high_lr": 0.00028947368421052634, + "low_lr": 5.789473684210527e-06, + "step": 1350 + }, + { + "epoch": 3.5502958579881656, + "high_lr": 0.00028947368421052634, + "low_lr": 5.789473684210527e-06, + "step": 1350 + }, + { + "epoch": 3.5529257067718607, + "grad_norm": 1.4000284671783447, + "learning_rate": 0.00028894736842105263, + "loss": 1.3419, + "step": 1351 + }, + { + "epoch": 3.5529257067718607, + "high_lr": 0.00028894736842105263, + "low_lr": 5.778947368421053e-06, + "step": 1351 + }, + { + "epoch": 3.5529257067718607, + "high_lr": 0.00028894736842105263, + "low_lr": 5.778947368421053e-06, + "step": 1351 + }, + { + "epoch": 3.5529257067718607, + "high_lr": 0.00028894736842105263, + "low_lr": 5.778947368421053e-06, + "step": 1351 + }, + { + "epoch": 3.5529257067718607, + "high_lr": 0.00028894736842105263, + "low_lr": 5.778947368421053e-06, + "step": 1351 + }, + { + "epoch": 3.5529257067718607, + "high_lr": 0.00028894736842105263, + "low_lr": 5.778947368421053e-06, + "step": 1351 + }, + { + "epoch": 3.5529257067718607, + "high_lr": 0.00028894736842105263, + "low_lr": 5.778947368421053e-06, + "step": 1351 + }, + { + "epoch": 3.5529257067718607, + "high_lr": 0.00028894736842105263, + "low_lr": 5.778947368421053e-06, + "step": 1351 + }, + { + "epoch": 3.5529257067718607, + "high_lr": 0.00028894736842105263, + "low_lr": 5.778947368421053e-06, + "step": 1351 + }, + { + "epoch": 3.5555555555555554, + "grad_norm": 1.4252368211746216, + "learning_rate": 0.00028842105263157897, + "loss": 1.3249, + "step": 1352 + }, + { + "epoch": 3.5555555555555554, + "high_lr": 0.00028842105263157897, + "low_lr": 5.76842105263158e-06, + "step": 1352 + }, + { + "epoch": 3.5555555555555554, + "high_lr": 0.00028842105263157897, + "low_lr": 5.76842105263158e-06, + "step": 1352 + }, + { + "epoch": 3.5555555555555554, + "high_lr": 0.00028842105263157897, + "low_lr": 5.76842105263158e-06, + "step": 1352 + }, + { + "epoch": 3.5555555555555554, + "high_lr": 0.00028842105263157897, + "low_lr": 5.76842105263158e-06, + "step": 1352 + }, + { + "epoch": 3.5555555555555554, + "high_lr": 0.00028842105263157897, + "low_lr": 5.76842105263158e-06, + "step": 1352 + }, + { + "epoch": 3.5555555555555554, + "high_lr": 0.00028842105263157897, + "low_lr": 5.76842105263158e-06, + "step": 1352 + }, + { + "epoch": 3.5555555555555554, + "high_lr": 0.00028842105263157897, + "low_lr": 5.76842105263158e-06, + "step": 1352 + }, + { + "epoch": 3.5555555555555554, + "high_lr": 0.00028842105263157897, + "low_lr": 5.76842105263158e-06, + "step": 1352 + }, + { + "epoch": 3.5581854043392505, + "grad_norm": 1.4046462774276733, + "learning_rate": 0.00028789473684210525, + "loss": 1.2866, + "step": 1353 + }, + { + "epoch": 3.5581854043392505, + "high_lr": 0.00028789473684210525, + "low_lr": 5.757894736842106e-06, + "step": 1353 + }, + { + "epoch": 3.5581854043392505, + "high_lr": 0.00028789473684210525, + "low_lr": 5.757894736842106e-06, + "step": 1353 + }, + { + "epoch": 3.5581854043392505, + "high_lr": 0.00028789473684210525, + "low_lr": 5.757894736842106e-06, + "step": 1353 + }, + { + "epoch": 3.5581854043392505, + "high_lr": 0.00028789473684210525, + "low_lr": 5.757894736842106e-06, + "step": 1353 + }, + { + "epoch": 3.5581854043392505, + "high_lr": 0.00028789473684210525, + "low_lr": 5.757894736842106e-06, + "step": 1353 + }, + { + "epoch": 3.5581854043392505, + "high_lr": 0.00028789473684210525, + "low_lr": 5.757894736842106e-06, + "step": 1353 + }, + { + "epoch": 3.5581854043392505, + "high_lr": 0.00028789473684210525, + "low_lr": 5.757894736842106e-06, + "step": 1353 + }, + { + "epoch": 3.5581854043392505, + "high_lr": 0.00028789473684210525, + "low_lr": 5.757894736842106e-06, + "step": 1353 + }, + { + "epoch": 3.5608152531229456, + "grad_norm": 1.5079935789108276, + "learning_rate": 0.0002873684210526316, + "loss": 1.2601, + "step": 1354 + }, + { + "epoch": 3.5608152531229456, + "high_lr": 0.0002873684210526316, + "low_lr": 5.747368421052633e-06, + "step": 1354 + }, + { + "epoch": 3.5608152531229456, + "high_lr": 0.0002873684210526316, + "low_lr": 5.747368421052633e-06, + "step": 1354 + }, + { + "epoch": 3.5608152531229456, + "high_lr": 0.0002873684210526316, + "low_lr": 5.747368421052633e-06, + "step": 1354 + }, + { + "epoch": 3.5608152531229456, + "high_lr": 0.0002873684210526316, + "low_lr": 5.747368421052633e-06, + "step": 1354 + }, + { + "epoch": 3.5608152531229456, + "high_lr": 0.0002873684210526316, + "low_lr": 5.747368421052633e-06, + "step": 1354 + }, + { + "epoch": 3.5608152531229456, + "high_lr": 0.0002873684210526316, + "low_lr": 5.747368421052633e-06, + "step": 1354 + }, + { + "epoch": 3.5608152531229456, + "high_lr": 0.0002873684210526316, + "low_lr": 5.747368421052633e-06, + "step": 1354 + }, + { + "epoch": 3.5608152531229456, + "high_lr": 0.0002873684210526316, + "low_lr": 5.747368421052633e-06, + "step": 1354 + }, + { + "epoch": 3.56344510190664, + "grad_norm": 1.4072425365447998, + "learning_rate": 0.0002868421052631579, + "loss": 1.3042, + "step": 1355 + }, + { + "epoch": 3.56344510190664, + "high_lr": 0.0002868421052631579, + "low_lr": 5.736842105263158e-06, + "step": 1355 + }, + { + "epoch": 3.56344510190664, + "high_lr": 0.0002868421052631579, + "low_lr": 5.736842105263158e-06, + "step": 1355 + }, + { + "epoch": 3.56344510190664, + "high_lr": 0.0002868421052631579, + "low_lr": 5.736842105263158e-06, + "step": 1355 + }, + { + "epoch": 3.56344510190664, + "high_lr": 0.0002868421052631579, + "low_lr": 5.736842105263158e-06, + "step": 1355 + }, + { + "epoch": 3.56344510190664, + "high_lr": 0.0002868421052631579, + "low_lr": 5.736842105263158e-06, + "step": 1355 + }, + { + "epoch": 3.56344510190664, + "high_lr": 0.0002868421052631579, + "low_lr": 5.736842105263158e-06, + "step": 1355 + }, + { + "epoch": 3.56344510190664, + "high_lr": 0.0002868421052631579, + "low_lr": 5.736842105263158e-06, + "step": 1355 + }, + { + "epoch": 3.56344510190664, + "high_lr": 0.0002868421052631579, + "low_lr": 5.736842105263158e-06, + "step": 1355 + }, + { + "epoch": 3.5660749506903353, + "grad_norm": 1.595568299293518, + "learning_rate": 0.0002863157894736842, + "loss": 1.3556, + "step": 1356 + }, + { + "epoch": 3.5660749506903353, + "high_lr": 0.0002863157894736842, + "low_lr": 5.726315789473685e-06, + "step": 1356 + }, + { + "epoch": 3.5660749506903353, + "high_lr": 0.0002863157894736842, + "low_lr": 5.726315789473685e-06, + "step": 1356 + }, + { + "epoch": 3.5660749506903353, + "high_lr": 0.0002863157894736842, + "low_lr": 5.726315789473685e-06, + "step": 1356 + }, + { + "epoch": 3.5660749506903353, + "high_lr": 0.0002863157894736842, + "low_lr": 5.726315789473685e-06, + "step": 1356 + }, + { + "epoch": 3.5660749506903353, + "high_lr": 0.0002863157894736842, + "low_lr": 5.726315789473685e-06, + "step": 1356 + }, + { + "epoch": 3.5660749506903353, + "high_lr": 0.0002863157894736842, + "low_lr": 5.726315789473685e-06, + "step": 1356 + }, + { + "epoch": 3.5660749506903353, + "high_lr": 0.0002863157894736842, + "low_lr": 5.726315789473685e-06, + "step": 1356 + }, + { + "epoch": 3.5660749506903353, + "high_lr": 0.0002863157894736842, + "low_lr": 5.726315789473685e-06, + "step": 1356 + }, + { + "epoch": 3.5687047994740304, + "grad_norm": 1.3672691583633423, + "learning_rate": 0.00028578947368421056, + "loss": 1.2951, + "step": 1357 + }, + { + "epoch": 3.5687047994740304, + "high_lr": 0.00028578947368421056, + "low_lr": 5.715789473684211e-06, + "step": 1357 + }, + { + "epoch": 3.5687047994740304, + "high_lr": 0.00028578947368421056, + "low_lr": 5.715789473684211e-06, + "step": 1357 + }, + { + "epoch": 3.5687047994740304, + "high_lr": 0.00028578947368421056, + "low_lr": 5.715789473684211e-06, + "step": 1357 + }, + { + "epoch": 3.5687047994740304, + "high_lr": 0.00028578947368421056, + "low_lr": 5.715789473684211e-06, + "step": 1357 + }, + { + "epoch": 3.5687047994740304, + "high_lr": 0.00028578947368421056, + "low_lr": 5.715789473684211e-06, + "step": 1357 + }, + { + "epoch": 3.5687047994740304, + "high_lr": 0.00028578947368421056, + "low_lr": 5.715789473684211e-06, + "step": 1357 + }, + { + "epoch": 3.5687047994740304, + "high_lr": 0.00028578947368421056, + "low_lr": 5.715789473684211e-06, + "step": 1357 + }, + { + "epoch": 3.5687047994740304, + "high_lr": 0.00028578947368421056, + "low_lr": 5.715789473684211e-06, + "step": 1357 + }, + { + "epoch": 3.571334648257725, + "grad_norm": 1.4218196868896484, + "learning_rate": 0.00028526315789473685, + "loss": 1.2929, + "step": 1358 + }, + { + "epoch": 3.571334648257725, + "high_lr": 0.00028526315789473685, + "low_lr": 5.705263157894737e-06, + "step": 1358 + }, + { + "epoch": 3.571334648257725, + "high_lr": 0.00028526315789473685, + "low_lr": 5.705263157894737e-06, + "step": 1358 + }, + { + "epoch": 3.571334648257725, + "high_lr": 0.00028526315789473685, + "low_lr": 5.705263157894737e-06, + "step": 1358 + }, + { + "epoch": 3.571334648257725, + "high_lr": 0.00028526315789473685, + "low_lr": 5.705263157894737e-06, + "step": 1358 + }, + { + "epoch": 3.571334648257725, + "high_lr": 0.00028526315789473685, + "low_lr": 5.705263157894737e-06, + "step": 1358 + }, + { + "epoch": 3.571334648257725, + "high_lr": 0.00028526315789473685, + "low_lr": 5.705263157894737e-06, + "step": 1358 + }, + { + "epoch": 3.571334648257725, + "high_lr": 0.00028526315789473685, + "low_lr": 5.705263157894737e-06, + "step": 1358 + }, + { + "epoch": 3.571334648257725, + "high_lr": 0.00028526315789473685, + "low_lr": 5.705263157894737e-06, + "step": 1358 + }, + { + "epoch": 3.57396449704142, + "grad_norm": 1.3965482711791992, + "learning_rate": 0.0002847368421052632, + "loss": 1.2911, + "step": 1359 + }, + { + "epoch": 3.57396449704142, + "high_lr": 0.0002847368421052632, + "low_lr": 5.694736842105264e-06, + "step": 1359 + }, + { + "epoch": 3.57396449704142, + "high_lr": 0.0002847368421052632, + "low_lr": 5.694736842105264e-06, + "step": 1359 + }, + { + "epoch": 3.57396449704142, + "high_lr": 0.0002847368421052632, + "low_lr": 5.694736842105264e-06, + "step": 1359 + }, + { + "epoch": 3.57396449704142, + "high_lr": 0.0002847368421052632, + "low_lr": 5.694736842105264e-06, + "step": 1359 + }, + { + "epoch": 3.57396449704142, + "high_lr": 0.0002847368421052632, + "low_lr": 5.694736842105264e-06, + "step": 1359 + }, + { + "epoch": 3.57396449704142, + "high_lr": 0.0002847368421052632, + "low_lr": 5.694736842105264e-06, + "step": 1359 + }, + { + "epoch": 3.57396449704142, + "high_lr": 0.0002847368421052632, + "low_lr": 5.694736842105264e-06, + "step": 1359 + }, + { + "epoch": 3.57396449704142, + "high_lr": 0.0002847368421052632, + "low_lr": 5.694736842105264e-06, + "step": 1359 + }, + { + "epoch": 3.5765943458251153, + "grad_norm": 1.6971560716629028, + "learning_rate": 0.00028421052631578947, + "loss": 1.3143, + "step": 1360 + }, + { + "epoch": 3.5765943458251153, + "high_lr": 0.00028421052631578947, + "low_lr": 5.68421052631579e-06, + "step": 1360 + }, + { + "epoch": 3.5765943458251153, + "high_lr": 0.00028421052631578947, + "low_lr": 5.68421052631579e-06, + "step": 1360 + }, + { + "epoch": 3.5765943458251153, + "high_lr": 0.00028421052631578947, + "low_lr": 5.68421052631579e-06, + "step": 1360 + }, + { + "epoch": 3.5765943458251153, + "high_lr": 0.00028421052631578947, + "low_lr": 5.68421052631579e-06, + "step": 1360 + }, + { + "epoch": 3.5765943458251153, + "high_lr": 0.00028421052631578947, + "low_lr": 5.68421052631579e-06, + "step": 1360 + }, + { + "epoch": 3.5765943458251153, + "high_lr": 0.00028421052631578947, + "low_lr": 5.68421052631579e-06, + "step": 1360 + }, + { + "epoch": 3.5765943458251153, + "high_lr": 0.00028421052631578947, + "low_lr": 5.68421052631579e-06, + "step": 1360 + }, + { + "epoch": 3.5765943458251153, + "high_lr": 0.00028421052631578947, + "low_lr": 5.68421052631579e-06, + "step": 1360 + }, + { + "epoch": 3.57922419460881, + "grad_norm": 1.4533029794692993, + "learning_rate": 0.0002836842105263158, + "loss": 1.3407, + "step": 1361 + }, + { + "epoch": 3.57922419460881, + "high_lr": 0.0002836842105263158, + "low_lr": 5.673684210526317e-06, + "step": 1361 + }, + { + "epoch": 3.57922419460881, + "high_lr": 0.0002836842105263158, + "low_lr": 5.673684210526317e-06, + "step": 1361 + }, + { + "epoch": 3.57922419460881, + "high_lr": 0.0002836842105263158, + "low_lr": 5.673684210526317e-06, + "step": 1361 + }, + { + "epoch": 3.57922419460881, + "high_lr": 0.0002836842105263158, + "low_lr": 5.673684210526317e-06, + "step": 1361 + }, + { + "epoch": 3.57922419460881, + "high_lr": 0.0002836842105263158, + "low_lr": 5.673684210526317e-06, + "step": 1361 + }, + { + "epoch": 3.57922419460881, + "high_lr": 0.0002836842105263158, + "low_lr": 5.673684210526317e-06, + "step": 1361 + }, + { + "epoch": 3.57922419460881, + "high_lr": 0.0002836842105263158, + "low_lr": 5.673684210526317e-06, + "step": 1361 + }, + { + "epoch": 3.57922419460881, + "high_lr": 0.0002836842105263158, + "low_lr": 5.673684210526317e-06, + "step": 1361 + }, + { + "epoch": 3.581854043392505, + "grad_norm": 1.512963891029358, + "learning_rate": 0.0002831578947368421, + "loss": 1.2996, + "step": 1362 + }, + { + "epoch": 3.581854043392505, + "high_lr": 0.0002831578947368421, + "low_lr": 5.663157894736843e-06, + "step": 1362 + }, + { + "epoch": 3.581854043392505, + "high_lr": 0.0002831578947368421, + "low_lr": 5.663157894736843e-06, + "step": 1362 + }, + { + "epoch": 3.581854043392505, + "high_lr": 0.0002831578947368421, + "low_lr": 5.663157894736843e-06, + "step": 1362 + }, + { + "epoch": 3.581854043392505, + "high_lr": 0.0002831578947368421, + "low_lr": 5.663157894736843e-06, + "step": 1362 + }, + { + "epoch": 3.581854043392505, + "high_lr": 0.0002831578947368421, + "low_lr": 5.663157894736843e-06, + "step": 1362 + }, + { + "epoch": 3.581854043392505, + "high_lr": 0.0002831578947368421, + "low_lr": 5.663157894736843e-06, + "step": 1362 + }, + { + "epoch": 3.581854043392505, + "high_lr": 0.0002831578947368421, + "low_lr": 5.663157894736843e-06, + "step": 1362 + }, + { + "epoch": 3.581854043392505, + "high_lr": 0.0002831578947368421, + "low_lr": 5.663157894736843e-06, + "step": 1362 + }, + { + "epoch": 3.5844838921762, + "grad_norm": 1.5167478322982788, + "learning_rate": 0.0002826315789473684, + "loss": 1.278, + "step": 1363 + }, + { + "epoch": 3.5844838921762, + "high_lr": 0.0002826315789473684, + "low_lr": 5.652631578947368e-06, + "step": 1363 + }, + { + "epoch": 3.5844838921762, + "high_lr": 0.0002826315789473684, + "low_lr": 5.652631578947368e-06, + "step": 1363 + }, + { + "epoch": 3.5844838921762, + "high_lr": 0.0002826315789473684, + "low_lr": 5.652631578947368e-06, + "step": 1363 + }, + { + "epoch": 3.5844838921762, + "high_lr": 0.0002826315789473684, + "low_lr": 5.652631578947368e-06, + "step": 1363 + }, + { + "epoch": 3.5844838921762, + "high_lr": 0.0002826315789473684, + "low_lr": 5.652631578947368e-06, + "step": 1363 + }, + { + "epoch": 3.5844838921762, + "high_lr": 0.0002826315789473684, + "low_lr": 5.652631578947368e-06, + "step": 1363 + }, + { + "epoch": 3.5844838921762, + "high_lr": 0.0002826315789473684, + "low_lr": 5.652631578947368e-06, + "step": 1363 + }, + { + "epoch": 3.5844838921762, + "high_lr": 0.0002826315789473684, + "low_lr": 5.652631578947368e-06, + "step": 1363 + }, + { + "epoch": 3.5871137409598948, + "grad_norm": 1.4397393465042114, + "learning_rate": 0.0002821052631578948, + "loss": 1.3288, + "step": 1364 + }, + { + "epoch": 3.5871137409598948, + "high_lr": 0.0002821052631578948, + "low_lr": 5.642105263157895e-06, + "step": 1364 + }, + { + "epoch": 3.5871137409598948, + "high_lr": 0.0002821052631578948, + "low_lr": 5.642105263157895e-06, + "step": 1364 + }, + { + "epoch": 3.5871137409598948, + "high_lr": 0.0002821052631578948, + "low_lr": 5.642105263157895e-06, + "step": 1364 + }, + { + "epoch": 3.5871137409598948, + "high_lr": 0.0002821052631578948, + "low_lr": 5.642105263157895e-06, + "step": 1364 + }, + { + "epoch": 3.5871137409598948, + "high_lr": 0.0002821052631578948, + "low_lr": 5.642105263157895e-06, + "step": 1364 + }, + { + "epoch": 3.5871137409598948, + "high_lr": 0.0002821052631578948, + "low_lr": 5.642105263157895e-06, + "step": 1364 + }, + { + "epoch": 3.5871137409598948, + "high_lr": 0.0002821052631578948, + "low_lr": 5.642105263157895e-06, + "step": 1364 + }, + { + "epoch": 3.5871137409598948, + "high_lr": 0.0002821052631578948, + "low_lr": 5.642105263157895e-06, + "step": 1364 + }, + { + "epoch": 3.58974358974359, + "grad_norm": 1.3969866037368774, + "learning_rate": 0.00028157894736842106, + "loss": 1.3335, + "step": 1365 + }, + { + "epoch": 3.58974358974359, + "high_lr": 0.00028157894736842106, + "low_lr": 5.631578947368422e-06, + "step": 1365 + }, + { + "epoch": 3.58974358974359, + "high_lr": 0.00028157894736842106, + "low_lr": 5.631578947368422e-06, + "step": 1365 + }, + { + "epoch": 3.58974358974359, + "high_lr": 0.00028157894736842106, + "low_lr": 5.631578947368422e-06, + "step": 1365 + }, + { + "epoch": 3.58974358974359, + "high_lr": 0.00028157894736842106, + "low_lr": 5.631578947368422e-06, + "step": 1365 + }, + { + "epoch": 3.58974358974359, + "high_lr": 0.00028157894736842106, + "low_lr": 5.631578947368422e-06, + "step": 1365 + }, + { + "epoch": 3.58974358974359, + "high_lr": 0.00028157894736842106, + "low_lr": 5.631578947368422e-06, + "step": 1365 + }, + { + "epoch": 3.58974358974359, + "high_lr": 0.00028157894736842106, + "low_lr": 5.631578947368422e-06, + "step": 1365 + }, + { + "epoch": 3.58974358974359, + "high_lr": 0.00028157894736842106, + "low_lr": 5.631578947368422e-06, + "step": 1365 + }, + { + "epoch": 3.5923734385272845, + "grad_norm": 1.3118953704833984, + "learning_rate": 0.0002810526315789474, + "loss": 1.3282, + "step": 1366 + }, + { + "epoch": 3.5923734385272845, + "high_lr": 0.0002810526315789474, + "low_lr": 5.621052631578948e-06, + "step": 1366 + }, + { + "epoch": 3.5923734385272845, + "high_lr": 0.0002810526315789474, + "low_lr": 5.621052631578948e-06, + "step": 1366 + }, + { + "epoch": 3.5923734385272845, + "high_lr": 0.0002810526315789474, + "low_lr": 5.621052631578948e-06, + "step": 1366 + }, + { + "epoch": 3.5923734385272845, + "high_lr": 0.0002810526315789474, + "low_lr": 5.621052631578948e-06, + "step": 1366 + }, + { + "epoch": 3.5923734385272845, + "high_lr": 0.0002810526315789474, + "low_lr": 5.621052631578948e-06, + "step": 1366 + }, + { + "epoch": 3.5923734385272845, + "high_lr": 0.0002810526315789474, + "low_lr": 5.621052631578948e-06, + "step": 1366 + }, + { + "epoch": 3.5923734385272845, + "high_lr": 0.0002810526315789474, + "low_lr": 5.621052631578948e-06, + "step": 1366 + }, + { + "epoch": 3.5923734385272845, + "high_lr": 0.0002810526315789474, + "low_lr": 5.621052631578948e-06, + "step": 1366 + }, + { + "epoch": 3.5950032873109796, + "grad_norm": 1.4552891254425049, + "learning_rate": 0.0002805263157894737, + "loss": 1.2933, + "step": 1367 + }, + { + "epoch": 3.5950032873109796, + "high_lr": 0.0002805263157894737, + "low_lr": 5.610526315789474e-06, + "step": 1367 + }, + { + "epoch": 3.5950032873109796, + "high_lr": 0.0002805263157894737, + "low_lr": 5.610526315789474e-06, + "step": 1367 + }, + { + "epoch": 3.5950032873109796, + "high_lr": 0.0002805263157894737, + "low_lr": 5.610526315789474e-06, + "step": 1367 + }, + { + "epoch": 3.5950032873109796, + "high_lr": 0.0002805263157894737, + "low_lr": 5.610526315789474e-06, + "step": 1367 + }, + { + "epoch": 3.5950032873109796, + "high_lr": 0.0002805263157894737, + "low_lr": 5.610526315789474e-06, + "step": 1367 + }, + { + "epoch": 3.5950032873109796, + "high_lr": 0.0002805263157894737, + "low_lr": 5.610526315789474e-06, + "step": 1367 + }, + { + "epoch": 3.5950032873109796, + "high_lr": 0.0002805263157894737, + "low_lr": 5.610526315789474e-06, + "step": 1367 + }, + { + "epoch": 3.5950032873109796, + "high_lr": 0.0002805263157894737, + "low_lr": 5.610526315789474e-06, + "step": 1367 + }, + { + "epoch": 3.5976331360946747, + "grad_norm": 1.4135433435440063, + "learning_rate": 0.00028000000000000003, + "loss": 1.3545, + "step": 1368 + }, + { + "epoch": 3.5976331360946747, + "high_lr": 0.00028000000000000003, + "low_lr": 5.600000000000001e-06, + "step": 1368 + }, + { + "epoch": 3.5976331360946747, + "high_lr": 0.00028000000000000003, + "low_lr": 5.600000000000001e-06, + "step": 1368 + }, + { + "epoch": 3.5976331360946747, + "high_lr": 0.00028000000000000003, + "low_lr": 5.600000000000001e-06, + "step": 1368 + }, + { + "epoch": 3.5976331360946747, + "high_lr": 0.00028000000000000003, + "low_lr": 5.600000000000001e-06, + "step": 1368 + }, + { + "epoch": 3.5976331360946747, + "high_lr": 0.00028000000000000003, + "low_lr": 5.600000000000001e-06, + "step": 1368 + }, + { + "epoch": 3.5976331360946747, + "high_lr": 0.00028000000000000003, + "low_lr": 5.600000000000001e-06, + "step": 1368 + }, + { + "epoch": 3.5976331360946747, + "high_lr": 0.00028000000000000003, + "low_lr": 5.600000000000001e-06, + "step": 1368 + }, + { + "epoch": 3.5976331360946747, + "high_lr": 0.00028000000000000003, + "low_lr": 5.600000000000001e-06, + "step": 1368 + }, + { + "epoch": 3.6002629848783694, + "grad_norm": 1.3850128650665283, + "learning_rate": 0.0002794736842105263, + "loss": 1.2739, + "step": 1369 + }, + { + "epoch": 3.6002629848783694, + "high_lr": 0.0002794736842105263, + "low_lr": 5.589473684210527e-06, + "step": 1369 + }, + { + "epoch": 3.6002629848783694, + "high_lr": 0.0002794736842105263, + "low_lr": 5.589473684210527e-06, + "step": 1369 + }, + { + "epoch": 3.6002629848783694, + "high_lr": 0.0002794736842105263, + "low_lr": 5.589473684210527e-06, + "step": 1369 + }, + { + "epoch": 3.6002629848783694, + "high_lr": 0.0002794736842105263, + "low_lr": 5.589473684210527e-06, + "step": 1369 + }, + { + "epoch": 3.6002629848783694, + "high_lr": 0.0002794736842105263, + "low_lr": 5.589473684210527e-06, + "step": 1369 + }, + { + "epoch": 3.6002629848783694, + "high_lr": 0.0002794736842105263, + "low_lr": 5.589473684210527e-06, + "step": 1369 + }, + { + "epoch": 3.6002629848783694, + "high_lr": 0.0002794736842105263, + "low_lr": 5.589473684210527e-06, + "step": 1369 + }, + { + "epoch": 3.6002629848783694, + "high_lr": 0.0002794736842105263, + "low_lr": 5.589473684210527e-06, + "step": 1369 + }, + { + "epoch": 3.6028928336620645, + "grad_norm": 1.3625056743621826, + "learning_rate": 0.0002789473684210526, + "loss": 1.285, + "step": 1370 + }, + { + "epoch": 3.6028928336620645, + "high_lr": 0.0002789473684210526, + "low_lr": 5.578947368421052e-06, + "step": 1370 + }, + { + "epoch": 3.6028928336620645, + "high_lr": 0.0002789473684210526, + "low_lr": 5.578947368421052e-06, + "step": 1370 + }, + { + "epoch": 3.6028928336620645, + "high_lr": 0.0002789473684210526, + "low_lr": 5.578947368421052e-06, + "step": 1370 + }, + { + "epoch": 3.6028928336620645, + "high_lr": 0.0002789473684210526, + "low_lr": 5.578947368421052e-06, + "step": 1370 + }, + { + "epoch": 3.6028928336620645, + "high_lr": 0.0002789473684210526, + "low_lr": 5.578947368421052e-06, + "step": 1370 + }, + { + "epoch": 3.6028928336620645, + "high_lr": 0.0002789473684210526, + "low_lr": 5.578947368421052e-06, + "step": 1370 + }, + { + "epoch": 3.6028928336620645, + "high_lr": 0.0002789473684210526, + "low_lr": 5.578947368421052e-06, + "step": 1370 + }, + { + "epoch": 3.6028928336620645, + "high_lr": 0.0002789473684210526, + "low_lr": 5.578947368421052e-06, + "step": 1370 + }, + { + "epoch": 3.605522682445759, + "grad_norm": 1.468536615371704, + "learning_rate": 0.00027842105263157894, + "loss": 1.2537, + "step": 1371 + }, + { + "epoch": 3.605522682445759, + "high_lr": 0.00027842105263157894, + "low_lr": 5.5684210526315796e-06, + "step": 1371 + }, + { + "epoch": 3.605522682445759, + "high_lr": 0.00027842105263157894, + "low_lr": 5.5684210526315796e-06, + "step": 1371 + }, + { + "epoch": 3.605522682445759, + "high_lr": 0.00027842105263157894, + "low_lr": 5.5684210526315796e-06, + "step": 1371 + }, + { + "epoch": 3.605522682445759, + "high_lr": 0.00027842105263157894, + "low_lr": 5.5684210526315796e-06, + "step": 1371 + }, + { + "epoch": 3.605522682445759, + "high_lr": 0.00027842105263157894, + "low_lr": 5.5684210526315796e-06, + "step": 1371 + }, + { + "epoch": 3.605522682445759, + "high_lr": 0.00027842105263157894, + "low_lr": 5.5684210526315796e-06, + "step": 1371 + }, + { + "epoch": 3.605522682445759, + "high_lr": 0.00027842105263157894, + "low_lr": 5.5684210526315796e-06, + "step": 1371 + }, + { + "epoch": 3.605522682445759, + "high_lr": 0.00027842105263157894, + "low_lr": 5.5684210526315796e-06, + "step": 1371 + }, + { + "epoch": 3.6081525312294542, + "grad_norm": 1.3496447801589966, + "learning_rate": 0.0002778947368421053, + "loss": 1.3043, + "step": 1372 + }, + { + "epoch": 3.6081525312294542, + "high_lr": 0.0002778947368421053, + "low_lr": 5.557894736842105e-06, + "step": 1372 + }, + { + "epoch": 3.6081525312294542, + "high_lr": 0.0002778947368421053, + "low_lr": 5.557894736842105e-06, + "step": 1372 + }, + { + "epoch": 3.6081525312294542, + "high_lr": 0.0002778947368421053, + "low_lr": 5.557894736842105e-06, + "step": 1372 + }, + { + "epoch": 3.6081525312294542, + "high_lr": 0.0002778947368421053, + "low_lr": 5.557894736842105e-06, + "step": 1372 + }, + { + "epoch": 3.6081525312294542, + "high_lr": 0.0002778947368421053, + "low_lr": 5.557894736842105e-06, + "step": 1372 + }, + { + "epoch": 3.6081525312294542, + "high_lr": 0.0002778947368421053, + "low_lr": 5.557894736842105e-06, + "step": 1372 + }, + { + "epoch": 3.6081525312294542, + "high_lr": 0.0002778947368421053, + "low_lr": 5.557894736842105e-06, + "step": 1372 + }, + { + "epoch": 3.6081525312294542, + "high_lr": 0.0002778947368421053, + "low_lr": 5.557894736842105e-06, + "step": 1372 + }, + { + "epoch": 3.6107823800131493, + "grad_norm": 1.5024930238723755, + "learning_rate": 0.0002773684210526316, + "loss": 1.2904, + "step": 1373 + }, + { + "epoch": 3.6107823800131493, + "high_lr": 0.0002773684210526316, + "low_lr": 5.547368421052632e-06, + "step": 1373 + }, + { + "epoch": 3.6107823800131493, + "high_lr": 0.0002773684210526316, + "low_lr": 5.547368421052632e-06, + "step": 1373 + }, + { + "epoch": 3.6107823800131493, + "high_lr": 0.0002773684210526316, + "low_lr": 5.547368421052632e-06, + "step": 1373 + }, + { + "epoch": 3.6107823800131493, + "high_lr": 0.0002773684210526316, + "low_lr": 5.547368421052632e-06, + "step": 1373 + }, + { + "epoch": 3.6107823800131493, + "high_lr": 0.0002773684210526316, + "low_lr": 5.547368421052632e-06, + "step": 1373 + }, + { + "epoch": 3.6107823800131493, + "high_lr": 0.0002773684210526316, + "low_lr": 5.547368421052632e-06, + "step": 1373 + }, + { + "epoch": 3.6107823800131493, + "high_lr": 0.0002773684210526316, + "low_lr": 5.547368421052632e-06, + "step": 1373 + }, + { + "epoch": 3.6107823800131493, + "high_lr": 0.0002773684210526316, + "low_lr": 5.547368421052632e-06, + "step": 1373 + }, + { + "epoch": 3.613412228796844, + "grad_norm": 1.3925727605819702, + "learning_rate": 0.0002768421052631579, + "loss": 1.3119, + "step": 1374 + }, + { + "epoch": 3.613412228796844, + "high_lr": 0.0002768421052631579, + "low_lr": 5.5368421052631586e-06, + "step": 1374 + }, + { + "epoch": 3.613412228796844, + "high_lr": 0.0002768421052631579, + "low_lr": 5.5368421052631586e-06, + "step": 1374 + }, + { + "epoch": 3.613412228796844, + "high_lr": 0.0002768421052631579, + "low_lr": 5.5368421052631586e-06, + "step": 1374 + }, + { + "epoch": 3.613412228796844, + "high_lr": 0.0002768421052631579, + "low_lr": 5.5368421052631586e-06, + "step": 1374 + }, + { + "epoch": 3.613412228796844, + "high_lr": 0.0002768421052631579, + "low_lr": 5.5368421052631586e-06, + "step": 1374 + }, + { + "epoch": 3.613412228796844, + "high_lr": 0.0002768421052631579, + "low_lr": 5.5368421052631586e-06, + "step": 1374 + }, + { + "epoch": 3.613412228796844, + "high_lr": 0.0002768421052631579, + "low_lr": 5.5368421052631586e-06, + "step": 1374 + }, + { + "epoch": 3.613412228796844, + "high_lr": 0.0002768421052631579, + "low_lr": 5.5368421052631586e-06, + "step": 1374 + }, + { + "epoch": 3.616042077580539, + "grad_norm": 1.3758540153503418, + "learning_rate": 0.00027631578947368425, + "loss": 1.2542, + "step": 1375 + }, + { + "epoch": 3.616042077580539, + "high_lr": 0.00027631578947368425, + "low_lr": 5.526315789473685e-06, + "step": 1375 + }, + { + "epoch": 3.616042077580539, + "high_lr": 0.00027631578947368425, + "low_lr": 5.526315789473685e-06, + "step": 1375 + }, + { + "epoch": 3.616042077580539, + "high_lr": 0.00027631578947368425, + "low_lr": 5.526315789473685e-06, + "step": 1375 + }, + { + "epoch": 3.616042077580539, + "high_lr": 0.00027631578947368425, + "low_lr": 5.526315789473685e-06, + "step": 1375 + }, + { + "epoch": 3.616042077580539, + "high_lr": 0.00027631578947368425, + "low_lr": 5.526315789473685e-06, + "step": 1375 + }, + { + "epoch": 3.616042077580539, + "high_lr": 0.00027631578947368425, + "low_lr": 5.526315789473685e-06, + "step": 1375 + }, + { + "epoch": 3.616042077580539, + "high_lr": 0.00027631578947368425, + "low_lr": 5.526315789473685e-06, + "step": 1375 + }, + { + "epoch": 3.616042077580539, + "high_lr": 0.00027631578947368425, + "low_lr": 5.526315789473685e-06, + "step": 1375 + }, + { + "epoch": 3.618671926364234, + "grad_norm": 1.4709190130233765, + "learning_rate": 0.00027578947368421053, + "loss": 1.2812, + "step": 1376 + }, + { + "epoch": 3.618671926364234, + "high_lr": 0.00027578947368421053, + "low_lr": 5.515789473684211e-06, + "step": 1376 + }, + { + "epoch": 3.618671926364234, + "high_lr": 0.00027578947368421053, + "low_lr": 5.515789473684211e-06, + "step": 1376 + }, + { + "epoch": 3.618671926364234, + "high_lr": 0.00027578947368421053, + "low_lr": 5.515789473684211e-06, + "step": 1376 + }, + { + "epoch": 3.618671926364234, + "high_lr": 0.00027578947368421053, + "low_lr": 5.515789473684211e-06, + "step": 1376 + }, + { + "epoch": 3.618671926364234, + "high_lr": 0.00027578947368421053, + "low_lr": 5.515789473684211e-06, + "step": 1376 + }, + { + "epoch": 3.618671926364234, + "high_lr": 0.00027578947368421053, + "low_lr": 5.515789473684211e-06, + "step": 1376 + }, + { + "epoch": 3.618671926364234, + "high_lr": 0.00027578947368421053, + "low_lr": 5.515789473684211e-06, + "step": 1376 + }, + { + "epoch": 3.618671926364234, + "high_lr": 0.00027578947368421053, + "low_lr": 5.515789473684211e-06, + "step": 1376 + }, + { + "epoch": 3.621301775147929, + "grad_norm": 1.3651463985443115, + "learning_rate": 0.0002752631578947368, + "loss": 1.2765, + "step": 1377 + }, + { + "epoch": 3.621301775147929, + "high_lr": 0.0002752631578947368, + "low_lr": 5.505263157894737e-06, + "step": 1377 + }, + { + "epoch": 3.621301775147929, + "high_lr": 0.0002752631578947368, + "low_lr": 5.505263157894737e-06, + "step": 1377 + }, + { + "epoch": 3.621301775147929, + "high_lr": 0.0002752631578947368, + "low_lr": 5.505263157894737e-06, + "step": 1377 + }, + { + "epoch": 3.621301775147929, + "high_lr": 0.0002752631578947368, + "low_lr": 5.505263157894737e-06, + "step": 1377 + }, + { + "epoch": 3.621301775147929, + "high_lr": 0.0002752631578947368, + "low_lr": 5.505263157894737e-06, + "step": 1377 + }, + { + "epoch": 3.621301775147929, + "high_lr": 0.0002752631578947368, + "low_lr": 5.505263157894737e-06, + "step": 1377 + }, + { + "epoch": 3.621301775147929, + "high_lr": 0.0002752631578947368, + "low_lr": 5.505263157894737e-06, + "step": 1377 + }, + { + "epoch": 3.621301775147929, + "high_lr": 0.0002752631578947368, + "low_lr": 5.505263157894737e-06, + "step": 1377 + }, + { + "epoch": 3.623931623931624, + "grad_norm": 1.4042037725448608, + "learning_rate": 0.00027473684210526316, + "loss": 1.3151, + "step": 1378 + }, + { + "epoch": 3.623931623931624, + "high_lr": 0.00027473684210526316, + "low_lr": 5.494736842105264e-06, + "step": 1378 + }, + { + "epoch": 3.623931623931624, + "high_lr": 0.00027473684210526316, + "low_lr": 5.494736842105264e-06, + "step": 1378 + }, + { + "epoch": 3.623931623931624, + "high_lr": 0.00027473684210526316, + "low_lr": 5.494736842105264e-06, + "step": 1378 + }, + { + "epoch": 3.623931623931624, + "high_lr": 0.00027473684210526316, + "low_lr": 5.494736842105264e-06, + "step": 1378 + }, + { + "epoch": 3.623931623931624, + "high_lr": 0.00027473684210526316, + "low_lr": 5.494736842105264e-06, + "step": 1378 + }, + { + "epoch": 3.623931623931624, + "high_lr": 0.00027473684210526316, + "low_lr": 5.494736842105264e-06, + "step": 1378 + }, + { + "epoch": 3.623931623931624, + "high_lr": 0.00027473684210526316, + "low_lr": 5.494736842105264e-06, + "step": 1378 + }, + { + "epoch": 3.623931623931624, + "high_lr": 0.00027473684210526316, + "low_lr": 5.494736842105264e-06, + "step": 1378 + }, + { + "epoch": 3.626561472715319, + "grad_norm": 1.4488333463668823, + "learning_rate": 0.00027421052631578945, + "loss": 1.3332, + "step": 1379 + }, + { + "epoch": 3.626561472715319, + "high_lr": 0.00027421052631578945, + "low_lr": 5.484210526315789e-06, + "step": 1379 + }, + { + "epoch": 3.626561472715319, + "high_lr": 0.00027421052631578945, + "low_lr": 5.484210526315789e-06, + "step": 1379 + }, + { + "epoch": 3.626561472715319, + "high_lr": 0.00027421052631578945, + "low_lr": 5.484210526315789e-06, + "step": 1379 + }, + { + "epoch": 3.626561472715319, + "high_lr": 0.00027421052631578945, + "low_lr": 5.484210526315789e-06, + "step": 1379 + }, + { + "epoch": 3.626561472715319, + "high_lr": 0.00027421052631578945, + "low_lr": 5.484210526315789e-06, + "step": 1379 + }, + { + "epoch": 3.626561472715319, + "high_lr": 0.00027421052631578945, + "low_lr": 5.484210526315789e-06, + "step": 1379 + }, + { + "epoch": 3.626561472715319, + "high_lr": 0.00027421052631578945, + "low_lr": 5.484210526315789e-06, + "step": 1379 + }, + { + "epoch": 3.626561472715319, + "high_lr": 0.00027421052631578945, + "low_lr": 5.484210526315789e-06, + "step": 1379 + }, + { + "epoch": 3.6291913214990137, + "grad_norm": 1.440686583518982, + "learning_rate": 0.00027368421052631584, + "loss": 1.2859, + "step": 1380 + }, + { + "epoch": 3.6291913214990137, + "high_lr": 0.00027368421052631584, + "low_lr": 5.4736842105263165e-06, + "step": 1380 + }, + { + "epoch": 3.6291913214990137, + "high_lr": 0.00027368421052631584, + "low_lr": 5.4736842105263165e-06, + "step": 1380 + }, + { + "epoch": 3.6291913214990137, + "high_lr": 0.00027368421052631584, + "low_lr": 5.4736842105263165e-06, + "step": 1380 + }, + { + "epoch": 3.6291913214990137, + "high_lr": 0.00027368421052631584, + "low_lr": 5.4736842105263165e-06, + "step": 1380 + }, + { + "epoch": 3.6291913214990137, + "high_lr": 0.00027368421052631584, + "low_lr": 5.4736842105263165e-06, + "step": 1380 + }, + { + "epoch": 3.6291913214990137, + "high_lr": 0.00027368421052631584, + "low_lr": 5.4736842105263165e-06, + "step": 1380 + }, + { + "epoch": 3.6291913214990137, + "high_lr": 0.00027368421052631584, + "low_lr": 5.4736842105263165e-06, + "step": 1380 + }, + { + "epoch": 3.6291913214990137, + "high_lr": 0.00027368421052631584, + "low_lr": 5.4736842105263165e-06, + "step": 1380 + }, + { + "epoch": 3.631821170282709, + "grad_norm": 1.4867942333221436, + "learning_rate": 0.0002731578947368421, + "loss": 1.3299, + "step": 1381 + }, + { + "epoch": 3.631821170282709, + "high_lr": 0.0002731578947368421, + "low_lr": 5.463157894736843e-06, + "step": 1381 + }, + { + "epoch": 3.631821170282709, + "high_lr": 0.0002731578947368421, + "low_lr": 5.463157894736843e-06, + "step": 1381 + }, + { + "epoch": 3.631821170282709, + "high_lr": 0.0002731578947368421, + "low_lr": 5.463157894736843e-06, + "step": 1381 + }, + { + "epoch": 3.631821170282709, + "high_lr": 0.0002731578947368421, + "low_lr": 5.463157894736843e-06, + "step": 1381 + }, + { + "epoch": 3.631821170282709, + "high_lr": 0.0002731578947368421, + "low_lr": 5.463157894736843e-06, + "step": 1381 + }, + { + "epoch": 3.631821170282709, + "high_lr": 0.0002731578947368421, + "low_lr": 5.463157894736843e-06, + "step": 1381 + }, + { + "epoch": 3.631821170282709, + "high_lr": 0.0002731578947368421, + "low_lr": 5.463157894736843e-06, + "step": 1381 + }, + { + "epoch": 3.631821170282709, + "high_lr": 0.0002731578947368421, + "low_lr": 5.463157894736843e-06, + "step": 1381 + }, + { + "epoch": 3.634451019066404, + "grad_norm": 1.3880106210708618, + "learning_rate": 0.00027263157894736847, + "loss": 1.262, + "step": 1382 + }, + { + "epoch": 3.634451019066404, + "high_lr": 0.00027263157894736847, + "low_lr": 5.452631578947369e-06, + "step": 1382 + }, + { + "epoch": 3.634451019066404, + "high_lr": 0.00027263157894736847, + "low_lr": 5.452631578947369e-06, + "step": 1382 + }, + { + "epoch": 3.634451019066404, + "high_lr": 0.00027263157894736847, + "low_lr": 5.452631578947369e-06, + "step": 1382 + }, + { + "epoch": 3.634451019066404, + "high_lr": 0.00027263157894736847, + "low_lr": 5.452631578947369e-06, + "step": 1382 + }, + { + "epoch": 3.634451019066404, + "high_lr": 0.00027263157894736847, + "low_lr": 5.452631578947369e-06, + "step": 1382 + }, + { + "epoch": 3.634451019066404, + "high_lr": 0.00027263157894736847, + "low_lr": 5.452631578947369e-06, + "step": 1382 + }, + { + "epoch": 3.634451019066404, + "high_lr": 0.00027263157894736847, + "low_lr": 5.452631578947369e-06, + "step": 1382 + }, + { + "epoch": 3.634451019066404, + "high_lr": 0.00027263157894736847, + "low_lr": 5.452631578947369e-06, + "step": 1382 + }, + { + "epoch": 3.6370808678500985, + "grad_norm": 1.6879795789718628, + "learning_rate": 0.00027210526315789475, + "loss": 1.2888, + "step": 1383 + }, + { + "epoch": 3.6370808678500985, + "high_lr": 0.00027210526315789475, + "low_lr": 5.4421052631578955e-06, + "step": 1383 + }, + { + "epoch": 3.6370808678500985, + "high_lr": 0.00027210526315789475, + "low_lr": 5.4421052631578955e-06, + "step": 1383 + }, + { + "epoch": 3.6370808678500985, + "high_lr": 0.00027210526315789475, + "low_lr": 5.4421052631578955e-06, + "step": 1383 + }, + { + "epoch": 3.6370808678500985, + "high_lr": 0.00027210526315789475, + "low_lr": 5.4421052631578955e-06, + "step": 1383 + }, + { + "epoch": 3.6370808678500985, + "high_lr": 0.00027210526315789475, + "low_lr": 5.4421052631578955e-06, + "step": 1383 + }, + { + "epoch": 3.6370808678500985, + "high_lr": 0.00027210526315789475, + "low_lr": 5.4421052631578955e-06, + "step": 1383 + }, + { + "epoch": 3.6370808678500985, + "high_lr": 0.00027210526315789475, + "low_lr": 5.4421052631578955e-06, + "step": 1383 + }, + { + "epoch": 3.6370808678500985, + "high_lr": 0.00027210526315789475, + "low_lr": 5.4421052631578955e-06, + "step": 1383 + }, + { + "epoch": 3.6397107166337936, + "grad_norm": 1.3893376588821411, + "learning_rate": 0.00027157894736842104, + "loss": 1.2899, + "step": 1384 + }, + { + "epoch": 3.6397107166337936, + "high_lr": 0.00027157894736842104, + "low_lr": 5.431578947368421e-06, + "step": 1384 + }, + { + "epoch": 3.6397107166337936, + "high_lr": 0.00027157894736842104, + "low_lr": 5.431578947368421e-06, + "step": 1384 + }, + { + "epoch": 3.6397107166337936, + "high_lr": 0.00027157894736842104, + "low_lr": 5.431578947368421e-06, + "step": 1384 + }, + { + "epoch": 3.6397107166337936, + "high_lr": 0.00027157894736842104, + "low_lr": 5.431578947368421e-06, + "step": 1384 + }, + { + "epoch": 3.6397107166337936, + "high_lr": 0.00027157894736842104, + "low_lr": 5.431578947368421e-06, + "step": 1384 + }, + { + "epoch": 3.6397107166337936, + "high_lr": 0.00027157894736842104, + "low_lr": 5.431578947368421e-06, + "step": 1384 + }, + { + "epoch": 3.6397107166337936, + "high_lr": 0.00027157894736842104, + "low_lr": 5.431578947368421e-06, + "step": 1384 + }, + { + "epoch": 3.6397107166337936, + "high_lr": 0.00027157894736842104, + "low_lr": 5.431578947368421e-06, + "step": 1384 + }, + { + "epoch": 3.6423405654174887, + "grad_norm": 1.4377245903015137, + "learning_rate": 0.0002710526315789474, + "loss": 1.309, + "step": 1385 + }, + { + "epoch": 3.6423405654174887, + "high_lr": 0.0002710526315789474, + "low_lr": 5.421052631578948e-06, + "step": 1385 + }, + { + "epoch": 3.6423405654174887, + "high_lr": 0.0002710526315789474, + "low_lr": 5.421052631578948e-06, + "step": 1385 + }, + { + "epoch": 3.6423405654174887, + "high_lr": 0.0002710526315789474, + "low_lr": 5.421052631578948e-06, + "step": 1385 + }, + { + "epoch": 3.6423405654174887, + "high_lr": 0.0002710526315789474, + "low_lr": 5.421052631578948e-06, + "step": 1385 + }, + { + "epoch": 3.6423405654174887, + "high_lr": 0.0002710526315789474, + "low_lr": 5.421052631578948e-06, + "step": 1385 + }, + { + "epoch": 3.6423405654174887, + "high_lr": 0.0002710526315789474, + "low_lr": 5.421052631578948e-06, + "step": 1385 + }, + { + "epoch": 3.6423405654174887, + "high_lr": 0.0002710526315789474, + "low_lr": 5.421052631578948e-06, + "step": 1385 + }, + { + "epoch": 3.6423405654174887, + "high_lr": 0.0002710526315789474, + "low_lr": 5.421052631578948e-06, + "step": 1385 + }, + { + "epoch": 3.6449704142011834, + "grad_norm": 1.39258873462677, + "learning_rate": 0.00027052631578947366, + "loss": 1.2727, + "step": 1386 + }, + { + "epoch": 3.6449704142011834, + "high_lr": 0.00027052631578947366, + "low_lr": 5.410526315789474e-06, + "step": 1386 + }, + { + "epoch": 3.6449704142011834, + "high_lr": 0.00027052631578947366, + "low_lr": 5.410526315789474e-06, + "step": 1386 + }, + { + "epoch": 3.6449704142011834, + "high_lr": 0.00027052631578947366, + "low_lr": 5.410526315789474e-06, + "step": 1386 + }, + { + "epoch": 3.6449704142011834, + "high_lr": 0.00027052631578947366, + "low_lr": 5.410526315789474e-06, + "step": 1386 + }, + { + "epoch": 3.6449704142011834, + "high_lr": 0.00027052631578947366, + "low_lr": 5.410526315789474e-06, + "step": 1386 + }, + { + "epoch": 3.6449704142011834, + "high_lr": 0.00027052631578947366, + "low_lr": 5.410526315789474e-06, + "step": 1386 + }, + { + "epoch": 3.6449704142011834, + "high_lr": 0.00027052631578947366, + "low_lr": 5.410526315789474e-06, + "step": 1386 + }, + { + "epoch": 3.6449704142011834, + "high_lr": 0.00027052631578947366, + "low_lr": 5.410526315789474e-06, + "step": 1386 + }, + { + "epoch": 3.6476002629848785, + "grad_norm": 1.4606719017028809, + "learning_rate": 0.00027, + "loss": 1.3294, + "step": 1387 + }, + { + "epoch": 3.6476002629848785, + "high_lr": 0.00027, + "low_lr": 5.400000000000001e-06, + "step": 1387 + }, + { + "epoch": 3.6476002629848785, + "high_lr": 0.00027, + "low_lr": 5.400000000000001e-06, + "step": 1387 + }, + { + "epoch": 3.6476002629848785, + "high_lr": 0.00027, + "low_lr": 5.400000000000001e-06, + "step": 1387 + }, + { + "epoch": 3.6476002629848785, + "high_lr": 0.00027, + "low_lr": 5.400000000000001e-06, + "step": 1387 + }, + { + "epoch": 3.6476002629848785, + "high_lr": 0.00027, + "low_lr": 5.400000000000001e-06, + "step": 1387 + }, + { + "epoch": 3.6476002629848785, + "high_lr": 0.00027, + "low_lr": 5.400000000000001e-06, + "step": 1387 + }, + { + "epoch": 3.6476002629848785, + "high_lr": 0.00027, + "low_lr": 5.400000000000001e-06, + "step": 1387 + }, + { + "epoch": 3.6476002629848785, + "high_lr": 0.00027, + "low_lr": 5.400000000000001e-06, + "step": 1387 + }, + { + "epoch": 3.650230111768573, + "grad_norm": 1.4539998769760132, + "learning_rate": 0.0002694736842105263, + "loss": 1.2799, + "step": 1388 + }, + { + "epoch": 3.650230111768573, + "high_lr": 0.0002694736842105263, + "low_lr": 5.389473684210526e-06, + "step": 1388 + }, + { + "epoch": 3.650230111768573, + "high_lr": 0.0002694736842105263, + "low_lr": 5.389473684210526e-06, + "step": 1388 + }, + { + "epoch": 3.650230111768573, + "high_lr": 0.0002694736842105263, + "low_lr": 5.389473684210526e-06, + "step": 1388 + }, + { + "epoch": 3.650230111768573, + "high_lr": 0.0002694736842105263, + "low_lr": 5.389473684210526e-06, + "step": 1388 + }, + { + "epoch": 3.650230111768573, + "high_lr": 0.0002694736842105263, + "low_lr": 5.389473684210526e-06, + "step": 1388 + }, + { + "epoch": 3.650230111768573, + "high_lr": 0.0002694736842105263, + "low_lr": 5.389473684210526e-06, + "step": 1388 + }, + { + "epoch": 3.650230111768573, + "high_lr": 0.0002694736842105263, + "low_lr": 5.389473684210526e-06, + "step": 1388 + }, + { + "epoch": 3.650230111768573, + "high_lr": 0.0002694736842105263, + "low_lr": 5.389473684210526e-06, + "step": 1388 + }, + { + "epoch": 3.6528599605522682, + "grad_norm": 1.4604228734970093, + "learning_rate": 0.0002689473684210527, + "loss": 1.2791, + "step": 1389 + }, + { + "epoch": 3.6528599605522682, + "high_lr": 0.0002689473684210527, + "low_lr": 5.3789473684210535e-06, + "step": 1389 + }, + { + "epoch": 3.6528599605522682, + "high_lr": 0.0002689473684210527, + "low_lr": 5.3789473684210535e-06, + "step": 1389 + }, + { + "epoch": 3.6528599605522682, + "high_lr": 0.0002689473684210527, + "low_lr": 5.3789473684210535e-06, + "step": 1389 + }, + { + "epoch": 3.6528599605522682, + "high_lr": 0.0002689473684210527, + "low_lr": 5.3789473684210535e-06, + "step": 1389 + }, + { + "epoch": 3.6528599605522682, + "high_lr": 0.0002689473684210527, + "low_lr": 5.3789473684210535e-06, + "step": 1389 + }, + { + "epoch": 3.6528599605522682, + "high_lr": 0.0002689473684210527, + "low_lr": 5.3789473684210535e-06, + "step": 1389 + }, + { + "epoch": 3.6528599605522682, + "high_lr": 0.0002689473684210527, + "low_lr": 5.3789473684210535e-06, + "step": 1389 + }, + { + "epoch": 3.6528599605522682, + "high_lr": 0.0002689473684210527, + "low_lr": 5.3789473684210535e-06, + "step": 1389 + }, + { + "epoch": 3.6554898093359633, + "grad_norm": 1.453295350074768, + "learning_rate": 0.00026842105263157897, + "loss": 1.298, + "step": 1390 + }, + { + "epoch": 3.6554898093359633, + "high_lr": 0.00026842105263157897, + "low_lr": 5.36842105263158e-06, + "step": 1390 + }, + { + "epoch": 3.6554898093359633, + "high_lr": 0.00026842105263157897, + "low_lr": 5.36842105263158e-06, + "step": 1390 + }, + { + "epoch": 3.6554898093359633, + "high_lr": 0.00026842105263157897, + "low_lr": 5.36842105263158e-06, + "step": 1390 + }, + { + "epoch": 3.6554898093359633, + "high_lr": 0.00026842105263157897, + "low_lr": 5.36842105263158e-06, + "step": 1390 + }, + { + "epoch": 3.6554898093359633, + "high_lr": 0.00026842105263157897, + "low_lr": 5.36842105263158e-06, + "step": 1390 + }, + { + "epoch": 3.6554898093359633, + "high_lr": 0.00026842105263157897, + "low_lr": 5.36842105263158e-06, + "step": 1390 + }, + { + "epoch": 3.6554898093359633, + "high_lr": 0.00026842105263157897, + "low_lr": 5.36842105263158e-06, + "step": 1390 + }, + { + "epoch": 3.6554898093359633, + "high_lr": 0.00026842105263157897, + "low_lr": 5.36842105263158e-06, + "step": 1390 + }, + { + "epoch": 3.658119658119658, + "grad_norm": 1.3308731317520142, + "learning_rate": 0.00026789473684210526, + "loss": 1.3055, + "step": 1391 + }, + { + "epoch": 3.658119658119658, + "high_lr": 0.00026789473684210526, + "low_lr": 5.357894736842105e-06, + "step": 1391 + }, + { + "epoch": 3.658119658119658, + "high_lr": 0.00026789473684210526, + "low_lr": 5.357894736842105e-06, + "step": 1391 + }, + { + "epoch": 3.658119658119658, + "high_lr": 0.00026789473684210526, + "low_lr": 5.357894736842105e-06, + "step": 1391 + }, + { + "epoch": 3.658119658119658, + "high_lr": 0.00026789473684210526, + "low_lr": 5.357894736842105e-06, + "step": 1391 + }, + { + "epoch": 3.658119658119658, + "high_lr": 0.00026789473684210526, + "low_lr": 5.357894736842105e-06, + "step": 1391 + }, + { + "epoch": 3.658119658119658, + "high_lr": 0.00026789473684210526, + "low_lr": 5.357894736842105e-06, + "step": 1391 + }, + { + "epoch": 3.658119658119658, + "high_lr": 0.00026789473684210526, + "low_lr": 5.357894736842105e-06, + "step": 1391 + }, + { + "epoch": 3.658119658119658, + "high_lr": 0.00026789473684210526, + "low_lr": 5.357894736842105e-06, + "step": 1391 + }, + { + "epoch": 3.660749506903353, + "grad_norm": 1.4058395624160767, + "learning_rate": 0.0002673684210526316, + "loss": 1.2897, + "step": 1392 + }, + { + "epoch": 3.660749506903353, + "high_lr": 0.0002673684210526316, + "low_lr": 5.3473684210526325e-06, + "step": 1392 + }, + { + "epoch": 3.660749506903353, + "high_lr": 0.0002673684210526316, + "low_lr": 5.3473684210526325e-06, + "step": 1392 + }, + { + "epoch": 3.660749506903353, + "high_lr": 0.0002673684210526316, + "low_lr": 5.3473684210526325e-06, + "step": 1392 + }, + { + "epoch": 3.660749506903353, + "high_lr": 0.0002673684210526316, + "low_lr": 5.3473684210526325e-06, + "step": 1392 + }, + { + "epoch": 3.660749506903353, + "high_lr": 0.0002673684210526316, + "low_lr": 5.3473684210526325e-06, + "step": 1392 + }, + { + "epoch": 3.660749506903353, + "high_lr": 0.0002673684210526316, + "low_lr": 5.3473684210526325e-06, + "step": 1392 + }, + { + "epoch": 3.660749506903353, + "high_lr": 0.0002673684210526316, + "low_lr": 5.3473684210526325e-06, + "step": 1392 + }, + { + "epoch": 3.660749506903353, + "high_lr": 0.0002673684210526316, + "low_lr": 5.3473684210526325e-06, + "step": 1392 + }, + { + "epoch": 3.6633793556870478, + "grad_norm": 1.326690912246704, + "learning_rate": 0.0002668421052631579, + "loss": 1.3402, + "step": 1393 + }, + { + "epoch": 3.6633793556870478, + "high_lr": 0.0002668421052631579, + "low_lr": 5.336842105263158e-06, + "step": 1393 + }, + { + "epoch": 3.6633793556870478, + "high_lr": 0.0002668421052631579, + "low_lr": 5.336842105263158e-06, + "step": 1393 + }, + { + "epoch": 3.6633793556870478, + "high_lr": 0.0002668421052631579, + "low_lr": 5.336842105263158e-06, + "step": 1393 + }, + { + "epoch": 3.6633793556870478, + "high_lr": 0.0002668421052631579, + "low_lr": 5.336842105263158e-06, + "step": 1393 + }, + { + "epoch": 3.6633793556870478, + "high_lr": 0.0002668421052631579, + "low_lr": 5.336842105263158e-06, + "step": 1393 + }, + { + "epoch": 3.6633793556870478, + "high_lr": 0.0002668421052631579, + "low_lr": 5.336842105263158e-06, + "step": 1393 + }, + { + "epoch": 3.6633793556870478, + "high_lr": 0.0002668421052631579, + "low_lr": 5.336842105263158e-06, + "step": 1393 + }, + { + "epoch": 3.6633793556870478, + "high_lr": 0.0002668421052631579, + "low_lr": 5.336842105263158e-06, + "step": 1393 + }, + { + "epoch": 3.666009204470743, + "grad_norm": 1.3362632989883423, + "learning_rate": 0.0002663157894736842, + "loss": 1.3058, + "step": 1394 + }, + { + "epoch": 3.666009204470743, + "high_lr": 0.0002663157894736842, + "low_lr": 5.326315789473685e-06, + "step": 1394 + }, + { + "epoch": 3.666009204470743, + "high_lr": 0.0002663157894736842, + "low_lr": 5.326315789473685e-06, + "step": 1394 + }, + { + "epoch": 3.666009204470743, + "high_lr": 0.0002663157894736842, + "low_lr": 5.326315789473685e-06, + "step": 1394 + }, + { + "epoch": 3.666009204470743, + "high_lr": 0.0002663157894736842, + "low_lr": 5.326315789473685e-06, + "step": 1394 + }, + { + "epoch": 3.666009204470743, + "high_lr": 0.0002663157894736842, + "low_lr": 5.326315789473685e-06, + "step": 1394 + }, + { + "epoch": 3.666009204470743, + "high_lr": 0.0002663157894736842, + "low_lr": 5.326315789473685e-06, + "step": 1394 + }, + { + "epoch": 3.666009204470743, + "high_lr": 0.0002663157894736842, + "low_lr": 5.326315789473685e-06, + "step": 1394 + }, + { + "epoch": 3.666009204470743, + "high_lr": 0.0002663157894736842, + "low_lr": 5.326315789473685e-06, + "step": 1394 + }, + { + "epoch": 3.668639053254438, + "grad_norm": 1.5829945802688599, + "learning_rate": 0.0002657894736842105, + "loss": 1.3656, + "step": 1395 + }, + { + "epoch": 3.668639053254438, + "high_lr": 0.0002657894736842105, + "low_lr": 5.315789473684211e-06, + "step": 1395 + }, + { + "epoch": 3.668639053254438, + "high_lr": 0.0002657894736842105, + "low_lr": 5.315789473684211e-06, + "step": 1395 + }, + { + "epoch": 3.668639053254438, + "high_lr": 0.0002657894736842105, + "low_lr": 5.315789473684211e-06, + "step": 1395 + }, + { + "epoch": 3.668639053254438, + "high_lr": 0.0002657894736842105, + "low_lr": 5.315789473684211e-06, + "step": 1395 + }, + { + "epoch": 3.668639053254438, + "high_lr": 0.0002657894736842105, + "low_lr": 5.315789473684211e-06, + "step": 1395 + }, + { + "epoch": 3.668639053254438, + "high_lr": 0.0002657894736842105, + "low_lr": 5.315789473684211e-06, + "step": 1395 + }, + { + "epoch": 3.668639053254438, + "high_lr": 0.0002657894736842105, + "low_lr": 5.315789473684211e-06, + "step": 1395 + }, + { + "epoch": 3.668639053254438, + "high_lr": 0.0002657894736842105, + "low_lr": 5.315789473684211e-06, + "step": 1395 + }, + { + "epoch": 3.6712689020381326, + "grad_norm": 1.4499706029891968, + "learning_rate": 0.00026526315789473685, + "loss": 1.2588, + "step": 1396 + }, + { + "epoch": 3.6712689020381326, + "high_lr": 0.00026526315789473685, + "low_lr": 5.305263157894738e-06, + "step": 1396 + }, + { + "epoch": 3.6712689020381326, + "high_lr": 0.00026526315789473685, + "low_lr": 5.305263157894738e-06, + "step": 1396 + }, + { + "epoch": 3.6712689020381326, + "high_lr": 0.00026526315789473685, + "low_lr": 5.305263157894738e-06, + "step": 1396 + }, + { + "epoch": 3.6712689020381326, + "high_lr": 0.00026526315789473685, + "low_lr": 5.305263157894738e-06, + "step": 1396 + }, + { + "epoch": 3.6712689020381326, + "high_lr": 0.00026526315789473685, + "low_lr": 5.305263157894738e-06, + "step": 1396 + }, + { + "epoch": 3.6712689020381326, + "high_lr": 0.00026526315789473685, + "low_lr": 5.305263157894738e-06, + "step": 1396 + }, + { + "epoch": 3.6712689020381326, + "high_lr": 0.00026526315789473685, + "low_lr": 5.305263157894738e-06, + "step": 1396 + }, + { + "epoch": 3.6712689020381326, + "high_lr": 0.00026526315789473685, + "low_lr": 5.305263157894738e-06, + "step": 1396 + }, + { + "epoch": 3.6738987508218277, + "grad_norm": 1.3186489343643188, + "learning_rate": 0.0002647368421052632, + "loss": 1.2915, + "step": 1397 + }, + { + "epoch": 3.6738987508218277, + "high_lr": 0.0002647368421052632, + "low_lr": 5.294736842105263e-06, + "step": 1397 + }, + { + "epoch": 3.6738987508218277, + "high_lr": 0.0002647368421052632, + "low_lr": 5.294736842105263e-06, + "step": 1397 + }, + { + "epoch": 3.6738987508218277, + "high_lr": 0.0002647368421052632, + "low_lr": 5.294736842105263e-06, + "step": 1397 + }, + { + "epoch": 3.6738987508218277, + "high_lr": 0.0002647368421052632, + "low_lr": 5.294736842105263e-06, + "step": 1397 + }, + { + "epoch": 3.6738987508218277, + "high_lr": 0.0002647368421052632, + "low_lr": 5.294736842105263e-06, + "step": 1397 + }, + { + "epoch": 3.6738987508218277, + "high_lr": 0.0002647368421052632, + "low_lr": 5.294736842105263e-06, + "step": 1397 + }, + { + "epoch": 3.6738987508218277, + "high_lr": 0.0002647368421052632, + "low_lr": 5.294736842105263e-06, + "step": 1397 + }, + { + "epoch": 3.6738987508218277, + "high_lr": 0.0002647368421052632, + "low_lr": 5.294736842105263e-06, + "step": 1397 + }, + { + "epoch": 3.676528599605523, + "grad_norm": 1.3520015478134155, + "learning_rate": 0.0002642105263157895, + "loss": 1.2928, + "step": 1398 + }, + { + "epoch": 3.676528599605523, + "high_lr": 0.0002642105263157895, + "low_lr": 5.2842105263157896e-06, + "step": 1398 + }, + { + "epoch": 3.676528599605523, + "high_lr": 0.0002642105263157895, + "low_lr": 5.2842105263157896e-06, + "step": 1398 + }, + { + "epoch": 3.676528599605523, + "high_lr": 0.0002642105263157895, + "low_lr": 5.2842105263157896e-06, + "step": 1398 + }, + { + "epoch": 3.676528599605523, + "high_lr": 0.0002642105263157895, + "low_lr": 5.2842105263157896e-06, + "step": 1398 + }, + { + "epoch": 3.676528599605523, + "high_lr": 0.0002642105263157895, + "low_lr": 5.2842105263157896e-06, + "step": 1398 + }, + { + "epoch": 3.676528599605523, + "high_lr": 0.0002642105263157895, + "low_lr": 5.2842105263157896e-06, + "step": 1398 + }, + { + "epoch": 3.676528599605523, + "high_lr": 0.0002642105263157895, + "low_lr": 5.2842105263157896e-06, + "step": 1398 + }, + { + "epoch": 3.676528599605523, + "high_lr": 0.0002642105263157895, + "low_lr": 5.2842105263157896e-06, + "step": 1398 + }, + { + "epoch": 3.6791584483892175, + "grad_norm": 1.4090241193771362, + "learning_rate": 0.0002636842105263158, + "loss": 1.2711, + "step": 1399 + }, + { + "epoch": 3.6791584483892175, + "high_lr": 0.0002636842105263158, + "low_lr": 5.273684210526317e-06, + "step": 1399 + }, + { + "epoch": 3.6791584483892175, + "high_lr": 0.0002636842105263158, + "low_lr": 5.273684210526317e-06, + "step": 1399 + }, + { + "epoch": 3.6791584483892175, + "high_lr": 0.0002636842105263158, + "low_lr": 5.273684210526317e-06, + "step": 1399 + }, + { + "epoch": 3.6791584483892175, + "high_lr": 0.0002636842105263158, + "low_lr": 5.273684210526317e-06, + "step": 1399 + }, + { + "epoch": 3.6791584483892175, + "high_lr": 0.0002636842105263158, + "low_lr": 5.273684210526317e-06, + "step": 1399 + }, + { + "epoch": 3.6791584483892175, + "high_lr": 0.0002636842105263158, + "low_lr": 5.273684210526317e-06, + "step": 1399 + }, + { + "epoch": 3.6791584483892175, + "high_lr": 0.0002636842105263158, + "low_lr": 5.273684210526317e-06, + "step": 1399 + }, + { + "epoch": 3.6791584483892175, + "high_lr": 0.0002636842105263158, + "low_lr": 5.273684210526317e-06, + "step": 1399 + }, + { + "epoch": 3.6817882971729126, + "grad_norm": 1.359285593032837, + "learning_rate": 0.0002631578947368421, + "loss": 1.3154, + "step": 1400 + }, + { + "epoch": 3.6817882971729126, + "high_lr": 0.0002631578947368421, + "low_lr": 5.263157894736842e-06, + "step": 1400 + }, + { + "epoch": 3.6817882971729126, + "high_lr": 0.0002631578947368421, + "low_lr": 5.263157894736842e-06, + "step": 1400 + }, + { + "epoch": 3.6817882971729126, + "high_lr": 0.0002631578947368421, + "low_lr": 5.263157894736842e-06, + "step": 1400 + }, + { + "epoch": 3.6817882971729126, + "high_lr": 0.0002631578947368421, + "low_lr": 5.263157894736842e-06, + "step": 1400 + }, + { + "epoch": 3.6817882971729126, + "high_lr": 0.0002631578947368421, + "low_lr": 5.263157894736842e-06, + "step": 1400 + }, + { + "epoch": 3.6817882971729126, + "high_lr": 0.0002631578947368421, + "low_lr": 5.263157894736842e-06, + "step": 1400 + }, + { + "epoch": 3.6817882971729126, + "high_lr": 0.0002631578947368421, + "low_lr": 5.263157894736842e-06, + "step": 1400 + }, + { + "epoch": 3.6817882971729126, + "high_lr": 0.0002631578947368421, + "low_lr": 5.263157894736842e-06, + "step": 1400 + }, + { + "epoch": 3.6844181459566077, + "grad_norm": 1.3238935470581055, + "learning_rate": 0.00026263157894736844, + "loss": 1.2649, + "step": 1401 + }, + { + "epoch": 3.6844181459566077, + "high_lr": 0.00026263157894736844, + "low_lr": 5.252631578947369e-06, + "step": 1401 + }, + { + "epoch": 3.6844181459566077, + "high_lr": 0.00026263157894736844, + "low_lr": 5.252631578947369e-06, + "step": 1401 + }, + { + "epoch": 3.6844181459566077, + "high_lr": 0.00026263157894736844, + "low_lr": 5.252631578947369e-06, + "step": 1401 + }, + { + "epoch": 3.6844181459566077, + "high_lr": 0.00026263157894736844, + "low_lr": 5.252631578947369e-06, + "step": 1401 + }, + { + "epoch": 3.6844181459566077, + "high_lr": 0.00026263157894736844, + "low_lr": 5.252631578947369e-06, + "step": 1401 + }, + { + "epoch": 3.6844181459566077, + "high_lr": 0.00026263157894736844, + "low_lr": 5.252631578947369e-06, + "step": 1401 + }, + { + "epoch": 3.6844181459566077, + "high_lr": 0.00026263157894736844, + "low_lr": 5.252631578947369e-06, + "step": 1401 + }, + { + "epoch": 3.6844181459566077, + "high_lr": 0.00026263157894736844, + "low_lr": 5.252631578947369e-06, + "step": 1401 + }, + { + "epoch": 3.6870479947403023, + "grad_norm": 1.4484986066818237, + "learning_rate": 0.0002621052631578947, + "loss": 1.3265, + "step": 1402 + }, + { + "epoch": 3.6870479947403023, + "high_lr": 0.0002621052631578947, + "low_lr": 5.242105263157895e-06, + "step": 1402 + }, + { + "epoch": 3.6870479947403023, + "high_lr": 0.0002621052631578947, + "low_lr": 5.242105263157895e-06, + "step": 1402 + }, + { + "epoch": 3.6870479947403023, + "high_lr": 0.0002621052631578947, + "low_lr": 5.242105263157895e-06, + "step": 1402 + }, + { + "epoch": 3.6870479947403023, + "high_lr": 0.0002621052631578947, + "low_lr": 5.242105263157895e-06, + "step": 1402 + }, + { + "epoch": 3.6870479947403023, + "high_lr": 0.0002621052631578947, + "low_lr": 5.242105263157895e-06, + "step": 1402 + }, + { + "epoch": 3.6870479947403023, + "high_lr": 0.0002621052631578947, + "low_lr": 5.242105263157895e-06, + "step": 1402 + }, + { + "epoch": 3.6870479947403023, + "high_lr": 0.0002621052631578947, + "low_lr": 5.242105263157895e-06, + "step": 1402 + }, + { + "epoch": 3.6870479947403023, + "high_lr": 0.0002621052631578947, + "low_lr": 5.242105263157895e-06, + "step": 1402 + }, + { + "epoch": 3.6896778435239974, + "grad_norm": 1.3380155563354492, + "learning_rate": 0.00026157894736842107, + "loss": 1.2926, + "step": 1403 + }, + { + "epoch": 3.6896778435239974, + "high_lr": 0.00026157894736842107, + "low_lr": 5.231578947368422e-06, + "step": 1403 + }, + { + "epoch": 3.6896778435239974, + "high_lr": 0.00026157894736842107, + "low_lr": 5.231578947368422e-06, + "step": 1403 + }, + { + "epoch": 3.6896778435239974, + "high_lr": 0.00026157894736842107, + "low_lr": 5.231578947368422e-06, + "step": 1403 + }, + { + "epoch": 3.6896778435239974, + "high_lr": 0.00026157894736842107, + "low_lr": 5.231578947368422e-06, + "step": 1403 + }, + { + "epoch": 3.6896778435239974, + "high_lr": 0.00026157894736842107, + "low_lr": 5.231578947368422e-06, + "step": 1403 + }, + { + "epoch": 3.6896778435239974, + "high_lr": 0.00026157894736842107, + "low_lr": 5.231578947368422e-06, + "step": 1403 + }, + { + "epoch": 3.6896778435239974, + "high_lr": 0.00026157894736842107, + "low_lr": 5.231578947368422e-06, + "step": 1403 + }, + { + "epoch": 3.6896778435239974, + "high_lr": 0.00026157894736842107, + "low_lr": 5.231578947368422e-06, + "step": 1403 + }, + { + "epoch": 3.6923076923076925, + "grad_norm": 1.4612758159637451, + "learning_rate": 0.00026105263157894735, + "loss": 1.3273, + "step": 1404 + }, + { + "epoch": 3.6923076923076925, + "high_lr": 0.00026105263157894735, + "low_lr": 5.2210526315789475e-06, + "step": 1404 + }, + { + "epoch": 3.6923076923076925, + "high_lr": 0.00026105263157894735, + "low_lr": 5.2210526315789475e-06, + "step": 1404 + }, + { + "epoch": 3.6923076923076925, + "high_lr": 0.00026105263157894735, + "low_lr": 5.2210526315789475e-06, + "step": 1404 + }, + { + "epoch": 3.6923076923076925, + "high_lr": 0.00026105263157894735, + "low_lr": 5.2210526315789475e-06, + "step": 1404 + }, + { + "epoch": 3.6923076923076925, + "high_lr": 0.00026105263157894735, + "low_lr": 5.2210526315789475e-06, + "step": 1404 + }, + { + "epoch": 3.6923076923076925, + "high_lr": 0.00026105263157894735, + "low_lr": 5.2210526315789475e-06, + "step": 1404 + }, + { + "epoch": 3.6923076923076925, + "high_lr": 0.00026105263157894735, + "low_lr": 5.2210526315789475e-06, + "step": 1404 + }, + { + "epoch": 3.6923076923076925, + "high_lr": 0.00026105263157894735, + "low_lr": 5.2210526315789475e-06, + "step": 1404 + }, + { + "epoch": 3.694937541091387, + "grad_norm": 1.5359078645706177, + "learning_rate": 0.0002605263157894737, + "loss": 1.3441, + "step": 1405 + }, + { + "epoch": 3.694937541091387, + "high_lr": 0.0002605263157894737, + "low_lr": 5.210526315789474e-06, + "step": 1405 + }, + { + "epoch": 3.694937541091387, + "high_lr": 0.0002605263157894737, + "low_lr": 5.210526315789474e-06, + "step": 1405 + }, + { + "epoch": 3.694937541091387, + "high_lr": 0.0002605263157894737, + "low_lr": 5.210526315789474e-06, + "step": 1405 + }, + { + "epoch": 3.694937541091387, + "high_lr": 0.0002605263157894737, + "low_lr": 5.210526315789474e-06, + "step": 1405 + }, + { + "epoch": 3.694937541091387, + "high_lr": 0.0002605263157894737, + "low_lr": 5.210526315789474e-06, + "step": 1405 + }, + { + "epoch": 3.694937541091387, + "high_lr": 0.0002605263157894737, + "low_lr": 5.210526315789474e-06, + "step": 1405 + }, + { + "epoch": 3.694937541091387, + "high_lr": 0.0002605263157894737, + "low_lr": 5.210526315789474e-06, + "step": 1405 + }, + { + "epoch": 3.694937541091387, + "high_lr": 0.0002605263157894737, + "low_lr": 5.210526315789474e-06, + "step": 1405 + }, + { + "epoch": 3.6975673898750823, + "grad_norm": 1.486899733543396, + "learning_rate": 0.00026000000000000003, + "loss": 1.2808, + "step": 1406 + }, + { + "epoch": 3.6975673898750823, + "high_lr": 0.00026000000000000003, + "low_lr": 5.2e-06, + "step": 1406 + }, + { + "epoch": 3.6975673898750823, + "high_lr": 0.00026000000000000003, + "low_lr": 5.2e-06, + "step": 1406 + }, + { + "epoch": 3.6975673898750823, + "high_lr": 0.00026000000000000003, + "low_lr": 5.2e-06, + "step": 1406 + }, + { + "epoch": 3.6975673898750823, + "high_lr": 0.00026000000000000003, + "low_lr": 5.2e-06, + "step": 1406 + }, + { + "epoch": 3.6975673898750823, + "high_lr": 0.00026000000000000003, + "low_lr": 5.2e-06, + "step": 1406 + }, + { + "epoch": 3.6975673898750823, + "high_lr": 0.00026000000000000003, + "low_lr": 5.2e-06, + "step": 1406 + }, + { + "epoch": 3.6975673898750823, + "high_lr": 0.00026000000000000003, + "low_lr": 5.2e-06, + "step": 1406 + }, + { + "epoch": 3.6975673898750823, + "high_lr": 0.00026000000000000003, + "low_lr": 5.2e-06, + "step": 1406 + }, + { + "epoch": 3.7001972386587774, + "grad_norm": 1.377661943435669, + "learning_rate": 0.0002594736842105263, + "loss": 1.2963, + "step": 1407 + }, + { + "epoch": 3.7001972386587774, + "high_lr": 0.0002594736842105263, + "low_lr": 5.1894736842105265e-06, + "step": 1407 + }, + { + "epoch": 3.7001972386587774, + "high_lr": 0.0002594736842105263, + "low_lr": 5.1894736842105265e-06, + "step": 1407 + }, + { + "epoch": 3.7001972386587774, + "high_lr": 0.0002594736842105263, + "low_lr": 5.1894736842105265e-06, + "step": 1407 + }, + { + "epoch": 3.7001972386587774, + "high_lr": 0.0002594736842105263, + "low_lr": 5.1894736842105265e-06, + "step": 1407 + }, + { + "epoch": 3.7001972386587774, + "high_lr": 0.0002594736842105263, + "low_lr": 5.1894736842105265e-06, + "step": 1407 + }, + { + "epoch": 3.7001972386587774, + "high_lr": 0.0002594736842105263, + "low_lr": 5.1894736842105265e-06, + "step": 1407 + }, + { + "epoch": 3.7001972386587774, + "high_lr": 0.0002594736842105263, + "low_lr": 5.1894736842105265e-06, + "step": 1407 + }, + { + "epoch": 3.7001972386587774, + "high_lr": 0.0002594736842105263, + "low_lr": 5.1894736842105265e-06, + "step": 1407 + }, + { + "epoch": 3.702827087442472, + "grad_norm": 1.764217495918274, + "learning_rate": 0.00025894736842105266, + "loss": 1.3115, + "step": 1408 + }, + { + "epoch": 3.702827087442472, + "high_lr": 0.00025894736842105266, + "low_lr": 5.178947368421054e-06, + "step": 1408 + }, + { + "epoch": 3.702827087442472, + "high_lr": 0.00025894736842105266, + "low_lr": 5.178947368421054e-06, + "step": 1408 + }, + { + "epoch": 3.702827087442472, + "high_lr": 0.00025894736842105266, + "low_lr": 5.178947368421054e-06, + "step": 1408 + }, + { + "epoch": 3.702827087442472, + "high_lr": 0.00025894736842105266, + "low_lr": 5.178947368421054e-06, + "step": 1408 + }, + { + "epoch": 3.702827087442472, + "high_lr": 0.00025894736842105266, + "low_lr": 5.178947368421054e-06, + "step": 1408 + }, + { + "epoch": 3.702827087442472, + "high_lr": 0.00025894736842105266, + "low_lr": 5.178947368421054e-06, + "step": 1408 + }, + { + "epoch": 3.702827087442472, + "high_lr": 0.00025894736842105266, + "low_lr": 5.178947368421054e-06, + "step": 1408 + }, + { + "epoch": 3.702827087442472, + "high_lr": 0.00025894736842105266, + "low_lr": 5.178947368421054e-06, + "step": 1408 + }, + { + "epoch": 3.705456936226167, + "grad_norm": 1.3747307062149048, + "learning_rate": 0.00025842105263157894, + "loss": 1.3314, + "step": 1409 + }, + { + "epoch": 3.705456936226167, + "high_lr": 0.00025842105263157894, + "low_lr": 5.168421052631579e-06, + "step": 1409 + }, + { + "epoch": 3.705456936226167, + "high_lr": 0.00025842105263157894, + "low_lr": 5.168421052631579e-06, + "step": 1409 + }, + { + "epoch": 3.705456936226167, + "high_lr": 0.00025842105263157894, + "low_lr": 5.168421052631579e-06, + "step": 1409 + }, + { + "epoch": 3.705456936226167, + "high_lr": 0.00025842105263157894, + "low_lr": 5.168421052631579e-06, + "step": 1409 + }, + { + "epoch": 3.705456936226167, + "high_lr": 0.00025842105263157894, + "low_lr": 5.168421052631579e-06, + "step": 1409 + }, + { + "epoch": 3.705456936226167, + "high_lr": 0.00025842105263157894, + "low_lr": 5.168421052631579e-06, + "step": 1409 + }, + { + "epoch": 3.705456936226167, + "high_lr": 0.00025842105263157894, + "low_lr": 5.168421052631579e-06, + "step": 1409 + }, + { + "epoch": 3.705456936226167, + "high_lr": 0.00025842105263157894, + "low_lr": 5.168421052631579e-06, + "step": 1409 + }, + { + "epoch": 3.7080867850098618, + "grad_norm": 1.390458583831787, + "learning_rate": 0.0002578947368421053, + "loss": 1.309, + "step": 1410 + }, + { + "epoch": 3.7080867850098618, + "high_lr": 0.0002578947368421053, + "low_lr": 5.157894736842106e-06, + "step": 1410 + }, + { + "epoch": 3.7080867850098618, + "high_lr": 0.0002578947368421053, + "low_lr": 5.157894736842106e-06, + "step": 1410 + }, + { + "epoch": 3.7080867850098618, + "high_lr": 0.0002578947368421053, + "low_lr": 5.157894736842106e-06, + "step": 1410 + }, + { + "epoch": 3.7080867850098618, + "high_lr": 0.0002578947368421053, + "low_lr": 5.157894736842106e-06, + "step": 1410 + }, + { + "epoch": 3.7080867850098618, + "high_lr": 0.0002578947368421053, + "low_lr": 5.157894736842106e-06, + "step": 1410 + }, + { + "epoch": 3.7080867850098618, + "high_lr": 0.0002578947368421053, + "low_lr": 5.157894736842106e-06, + "step": 1410 + }, + { + "epoch": 3.7080867850098618, + "high_lr": 0.0002578947368421053, + "low_lr": 5.157894736842106e-06, + "step": 1410 + }, + { + "epoch": 3.7080867850098618, + "high_lr": 0.0002578947368421053, + "low_lr": 5.157894736842106e-06, + "step": 1410 + }, + { + "epoch": 3.710716633793557, + "grad_norm": 1.4675899744033813, + "learning_rate": 0.00025736842105263157, + "loss": 1.2697, + "step": 1411 + }, + { + "epoch": 3.710716633793557, + "high_lr": 0.00025736842105263157, + "low_lr": 5.147368421052632e-06, + "step": 1411 + }, + { + "epoch": 3.710716633793557, + "high_lr": 0.00025736842105263157, + "low_lr": 5.147368421052632e-06, + "step": 1411 + }, + { + "epoch": 3.710716633793557, + "high_lr": 0.00025736842105263157, + "low_lr": 5.147368421052632e-06, + "step": 1411 + }, + { + "epoch": 3.710716633793557, + "high_lr": 0.00025736842105263157, + "low_lr": 5.147368421052632e-06, + "step": 1411 + }, + { + "epoch": 3.710716633793557, + "high_lr": 0.00025736842105263157, + "low_lr": 5.147368421052632e-06, + "step": 1411 + }, + { + "epoch": 3.710716633793557, + "high_lr": 0.00025736842105263157, + "low_lr": 5.147368421052632e-06, + "step": 1411 + }, + { + "epoch": 3.710716633793557, + "high_lr": 0.00025736842105263157, + "low_lr": 5.147368421052632e-06, + "step": 1411 + }, + { + "epoch": 3.710716633793557, + "high_lr": 0.00025736842105263157, + "low_lr": 5.147368421052632e-06, + "step": 1411 + }, + { + "epoch": 3.713346482577252, + "grad_norm": 1.3478926420211792, + "learning_rate": 0.00025684210526315786, + "loss": 1.3522, + "step": 1412 + }, + { + "epoch": 3.713346482577252, + "high_lr": 0.00025684210526315786, + "low_lr": 5.136842105263158e-06, + "step": 1412 + }, + { + "epoch": 3.713346482577252, + "high_lr": 0.00025684210526315786, + "low_lr": 5.136842105263158e-06, + "step": 1412 + }, + { + "epoch": 3.713346482577252, + "high_lr": 0.00025684210526315786, + "low_lr": 5.136842105263158e-06, + "step": 1412 + }, + { + "epoch": 3.713346482577252, + "high_lr": 0.00025684210526315786, + "low_lr": 5.136842105263158e-06, + "step": 1412 + }, + { + "epoch": 3.713346482577252, + "high_lr": 0.00025684210526315786, + "low_lr": 5.136842105263158e-06, + "step": 1412 + }, + { + "epoch": 3.713346482577252, + "high_lr": 0.00025684210526315786, + "low_lr": 5.136842105263158e-06, + "step": 1412 + }, + { + "epoch": 3.713346482577252, + "high_lr": 0.00025684210526315786, + "low_lr": 5.136842105263158e-06, + "step": 1412 + }, + { + "epoch": 3.713346482577252, + "high_lr": 0.00025684210526315786, + "low_lr": 5.136842105263158e-06, + "step": 1412 + }, + { + "epoch": 3.7159763313609466, + "grad_norm": 1.4412983655929565, + "learning_rate": 0.00025631578947368425, + "loss": 1.2975, + "step": 1413 + }, + { + "epoch": 3.7159763313609466, + "high_lr": 0.00025631578947368425, + "low_lr": 5.1263157894736845e-06, + "step": 1413 + }, + { + "epoch": 3.7159763313609466, + "high_lr": 0.00025631578947368425, + "low_lr": 5.1263157894736845e-06, + "step": 1413 + }, + { + "epoch": 3.7159763313609466, + "high_lr": 0.00025631578947368425, + "low_lr": 5.1263157894736845e-06, + "step": 1413 + }, + { + "epoch": 3.7159763313609466, + "high_lr": 0.00025631578947368425, + "low_lr": 5.1263157894736845e-06, + "step": 1413 + }, + { + "epoch": 3.7159763313609466, + "high_lr": 0.00025631578947368425, + "low_lr": 5.1263157894736845e-06, + "step": 1413 + }, + { + "epoch": 3.7159763313609466, + "high_lr": 0.00025631578947368425, + "low_lr": 5.1263157894736845e-06, + "step": 1413 + }, + { + "epoch": 3.7159763313609466, + "high_lr": 0.00025631578947368425, + "low_lr": 5.1263157894736845e-06, + "step": 1413 + }, + { + "epoch": 3.7159763313609466, + "high_lr": 0.00025631578947368425, + "low_lr": 5.1263157894736845e-06, + "step": 1413 + }, + { + "epoch": 3.7186061801446417, + "grad_norm": 1.5651241540908813, + "learning_rate": 0.00025578947368421054, + "loss": 1.2681, + "step": 1414 + }, + { + "epoch": 3.7186061801446417, + "high_lr": 0.00025578947368421054, + "low_lr": 5.115789473684211e-06, + "step": 1414 + }, + { + "epoch": 3.7186061801446417, + "high_lr": 0.00025578947368421054, + "low_lr": 5.115789473684211e-06, + "step": 1414 + }, + { + "epoch": 3.7186061801446417, + "high_lr": 0.00025578947368421054, + "low_lr": 5.115789473684211e-06, + "step": 1414 + }, + { + "epoch": 3.7186061801446417, + "high_lr": 0.00025578947368421054, + "low_lr": 5.115789473684211e-06, + "step": 1414 + }, + { + "epoch": 3.7186061801446417, + "high_lr": 0.00025578947368421054, + "low_lr": 5.115789473684211e-06, + "step": 1414 + }, + { + "epoch": 3.7186061801446417, + "high_lr": 0.00025578947368421054, + "low_lr": 5.115789473684211e-06, + "step": 1414 + }, + { + "epoch": 3.7186061801446417, + "high_lr": 0.00025578947368421054, + "low_lr": 5.115789473684211e-06, + "step": 1414 + }, + { + "epoch": 3.7186061801446417, + "high_lr": 0.00025578947368421054, + "low_lr": 5.115789473684211e-06, + "step": 1414 + }, + { + "epoch": 3.7212360289283364, + "grad_norm": 1.3770606517791748, + "learning_rate": 0.0002552631578947369, + "loss": 1.2874, + "step": 1415 + }, + { + "epoch": 3.7212360289283364, + "high_lr": 0.0002552631578947369, + "low_lr": 5.105263157894738e-06, + "step": 1415 + }, + { + "epoch": 3.7212360289283364, + "high_lr": 0.0002552631578947369, + "low_lr": 5.105263157894738e-06, + "step": 1415 + }, + { + "epoch": 3.7212360289283364, + "high_lr": 0.0002552631578947369, + "low_lr": 5.105263157894738e-06, + "step": 1415 + }, + { + "epoch": 3.7212360289283364, + "high_lr": 0.0002552631578947369, + "low_lr": 5.105263157894738e-06, + "step": 1415 + }, + { + "epoch": 3.7212360289283364, + "high_lr": 0.0002552631578947369, + "low_lr": 5.105263157894738e-06, + "step": 1415 + }, + { + "epoch": 3.7212360289283364, + "high_lr": 0.0002552631578947369, + "low_lr": 5.105263157894738e-06, + "step": 1415 + }, + { + "epoch": 3.7212360289283364, + "high_lr": 0.0002552631578947369, + "low_lr": 5.105263157894738e-06, + "step": 1415 + }, + { + "epoch": 3.7212360289283364, + "high_lr": 0.0002552631578947369, + "low_lr": 5.105263157894738e-06, + "step": 1415 + }, + { + "epoch": 3.7238658777120315, + "grad_norm": 1.4465447664260864, + "learning_rate": 0.00025473684210526316, + "loss": 1.2934, + "step": 1416 + }, + { + "epoch": 3.7238658777120315, + "high_lr": 0.00025473684210526316, + "low_lr": 5.0947368421052635e-06, + "step": 1416 + }, + { + "epoch": 3.7238658777120315, + "high_lr": 0.00025473684210526316, + "low_lr": 5.0947368421052635e-06, + "step": 1416 + }, + { + "epoch": 3.7238658777120315, + "high_lr": 0.00025473684210526316, + "low_lr": 5.0947368421052635e-06, + "step": 1416 + }, + { + "epoch": 3.7238658777120315, + "high_lr": 0.00025473684210526316, + "low_lr": 5.0947368421052635e-06, + "step": 1416 + }, + { + "epoch": 3.7238658777120315, + "high_lr": 0.00025473684210526316, + "low_lr": 5.0947368421052635e-06, + "step": 1416 + }, + { + "epoch": 3.7238658777120315, + "high_lr": 0.00025473684210526316, + "low_lr": 5.0947368421052635e-06, + "step": 1416 + }, + { + "epoch": 3.7238658777120315, + "high_lr": 0.00025473684210526316, + "low_lr": 5.0947368421052635e-06, + "step": 1416 + }, + { + "epoch": 3.7238658777120315, + "high_lr": 0.00025473684210526316, + "low_lr": 5.0947368421052635e-06, + "step": 1416 + }, + { + "epoch": 3.7264957264957266, + "grad_norm": 1.2946622371673584, + "learning_rate": 0.0002542105263157895, + "loss": 1.312, + "step": 1417 + }, + { + "epoch": 3.7264957264957266, + "high_lr": 0.0002542105263157895, + "low_lr": 5.084210526315791e-06, + "step": 1417 + }, + { + "epoch": 3.7264957264957266, + "high_lr": 0.0002542105263157895, + "low_lr": 5.084210526315791e-06, + "step": 1417 + }, + { + "epoch": 3.7264957264957266, + "high_lr": 0.0002542105263157895, + "low_lr": 5.084210526315791e-06, + "step": 1417 + }, + { + "epoch": 3.7264957264957266, + "high_lr": 0.0002542105263157895, + "low_lr": 5.084210526315791e-06, + "step": 1417 + }, + { + "epoch": 3.7264957264957266, + "high_lr": 0.0002542105263157895, + "low_lr": 5.084210526315791e-06, + "step": 1417 + }, + { + "epoch": 3.7264957264957266, + "high_lr": 0.0002542105263157895, + "low_lr": 5.084210526315791e-06, + "step": 1417 + }, + { + "epoch": 3.7264957264957266, + "high_lr": 0.0002542105263157895, + "low_lr": 5.084210526315791e-06, + "step": 1417 + }, + { + "epoch": 3.7264957264957266, + "high_lr": 0.0002542105263157895, + "low_lr": 5.084210526315791e-06, + "step": 1417 + }, + { + "epoch": 3.7291255752794212, + "grad_norm": 1.3798704147338867, + "learning_rate": 0.0002536842105263158, + "loss": 1.2907, + "step": 1418 + }, + { + "epoch": 3.7291255752794212, + "high_lr": 0.0002536842105263158, + "low_lr": 5.073684210526316e-06, + "step": 1418 + }, + { + "epoch": 3.7291255752794212, + "high_lr": 0.0002536842105263158, + "low_lr": 5.073684210526316e-06, + "step": 1418 + }, + { + "epoch": 3.7291255752794212, + "high_lr": 0.0002536842105263158, + "low_lr": 5.073684210526316e-06, + "step": 1418 + }, + { + "epoch": 3.7291255752794212, + "high_lr": 0.0002536842105263158, + "low_lr": 5.073684210526316e-06, + "step": 1418 + }, + { + "epoch": 3.7291255752794212, + "high_lr": 0.0002536842105263158, + "low_lr": 5.073684210526316e-06, + "step": 1418 + }, + { + "epoch": 3.7291255752794212, + "high_lr": 0.0002536842105263158, + "low_lr": 5.073684210526316e-06, + "step": 1418 + }, + { + "epoch": 3.7291255752794212, + "high_lr": 0.0002536842105263158, + "low_lr": 5.073684210526316e-06, + "step": 1418 + }, + { + "epoch": 3.7291255752794212, + "high_lr": 0.0002536842105263158, + "low_lr": 5.073684210526316e-06, + "step": 1418 + }, + { + "epoch": 3.7317554240631163, + "grad_norm": 1.5236715078353882, + "learning_rate": 0.0002531578947368421, + "loss": 1.2864, + "step": 1419 + }, + { + "epoch": 3.7317554240631163, + "high_lr": 0.0002531578947368421, + "low_lr": 5.0631578947368424e-06, + "step": 1419 + }, + { + "epoch": 3.7317554240631163, + "high_lr": 0.0002531578947368421, + "low_lr": 5.0631578947368424e-06, + "step": 1419 + }, + { + "epoch": 3.7317554240631163, + "high_lr": 0.0002531578947368421, + "low_lr": 5.0631578947368424e-06, + "step": 1419 + }, + { + "epoch": 3.7317554240631163, + "high_lr": 0.0002531578947368421, + "low_lr": 5.0631578947368424e-06, + "step": 1419 + }, + { + "epoch": 3.7317554240631163, + "high_lr": 0.0002531578947368421, + "low_lr": 5.0631578947368424e-06, + "step": 1419 + }, + { + "epoch": 3.7317554240631163, + "high_lr": 0.0002531578947368421, + "low_lr": 5.0631578947368424e-06, + "step": 1419 + }, + { + "epoch": 3.7317554240631163, + "high_lr": 0.0002531578947368421, + "low_lr": 5.0631578947368424e-06, + "step": 1419 + }, + { + "epoch": 3.7317554240631163, + "high_lr": 0.0002531578947368421, + "low_lr": 5.0631578947368424e-06, + "step": 1419 + }, + { + "epoch": 3.7343852728468114, + "grad_norm": 1.4545153379440308, + "learning_rate": 0.0002526315789473684, + "loss": 1.2923, + "step": 1420 + }, + { + "epoch": 3.7343852728468114, + "high_lr": 0.0002526315789473684, + "low_lr": 5.052631578947369e-06, + "step": 1420 + }, + { + "epoch": 3.7343852728468114, + "high_lr": 0.0002526315789473684, + "low_lr": 5.052631578947369e-06, + "step": 1420 + }, + { + "epoch": 3.7343852728468114, + "high_lr": 0.0002526315789473684, + "low_lr": 5.052631578947369e-06, + "step": 1420 + }, + { + "epoch": 3.7343852728468114, + "high_lr": 0.0002526315789473684, + "low_lr": 5.052631578947369e-06, + "step": 1420 + }, + { + "epoch": 3.7343852728468114, + "high_lr": 0.0002526315789473684, + "low_lr": 5.052631578947369e-06, + "step": 1420 + }, + { + "epoch": 3.7343852728468114, + "high_lr": 0.0002526315789473684, + "low_lr": 5.052631578947369e-06, + "step": 1420 + }, + { + "epoch": 3.7343852728468114, + "high_lr": 0.0002526315789473684, + "low_lr": 5.052631578947369e-06, + "step": 1420 + }, + { + "epoch": 3.7343852728468114, + "high_lr": 0.0002526315789473684, + "low_lr": 5.052631578947369e-06, + "step": 1420 + }, + { + "epoch": 3.737015121630506, + "grad_norm": 1.4548542499542236, + "learning_rate": 0.00025210526315789475, + "loss": 1.292, + "step": 1421 + }, + { + "epoch": 3.737015121630506, + "high_lr": 0.00025210526315789475, + "low_lr": 5.042105263157895e-06, + "step": 1421 + }, + { + "epoch": 3.737015121630506, + "high_lr": 0.00025210526315789475, + "low_lr": 5.042105263157895e-06, + "step": 1421 + }, + { + "epoch": 3.737015121630506, + "high_lr": 0.00025210526315789475, + "low_lr": 5.042105263157895e-06, + "step": 1421 + }, + { + "epoch": 3.737015121630506, + "high_lr": 0.00025210526315789475, + "low_lr": 5.042105263157895e-06, + "step": 1421 + }, + { + "epoch": 3.737015121630506, + "high_lr": 0.00025210526315789475, + "low_lr": 5.042105263157895e-06, + "step": 1421 + }, + { + "epoch": 3.737015121630506, + "high_lr": 0.00025210526315789475, + "low_lr": 5.042105263157895e-06, + "step": 1421 + }, + { + "epoch": 3.737015121630506, + "high_lr": 0.00025210526315789475, + "low_lr": 5.042105263157895e-06, + "step": 1421 + }, + { + "epoch": 3.737015121630506, + "high_lr": 0.00025210526315789475, + "low_lr": 5.042105263157895e-06, + "step": 1421 + }, + { + "epoch": 3.739644970414201, + "grad_norm": 1.5131704807281494, + "learning_rate": 0.0002515789473684211, + "loss": 1.295, + "step": 1422 + }, + { + "epoch": 3.739644970414201, + "high_lr": 0.0002515789473684211, + "low_lr": 5.0315789473684214e-06, + "step": 1422 + }, + { + "epoch": 3.739644970414201, + "high_lr": 0.0002515789473684211, + "low_lr": 5.0315789473684214e-06, + "step": 1422 + }, + { + "epoch": 3.739644970414201, + "high_lr": 0.0002515789473684211, + "low_lr": 5.0315789473684214e-06, + "step": 1422 + }, + { + "epoch": 3.739644970414201, + "high_lr": 0.0002515789473684211, + "low_lr": 5.0315789473684214e-06, + "step": 1422 + }, + { + "epoch": 3.739644970414201, + "high_lr": 0.0002515789473684211, + "low_lr": 5.0315789473684214e-06, + "step": 1422 + }, + { + "epoch": 3.739644970414201, + "high_lr": 0.0002515789473684211, + "low_lr": 5.0315789473684214e-06, + "step": 1422 + }, + { + "epoch": 3.739644970414201, + "high_lr": 0.0002515789473684211, + "low_lr": 5.0315789473684214e-06, + "step": 1422 + }, + { + "epoch": 3.739644970414201, + "high_lr": 0.0002515789473684211, + "low_lr": 5.0315789473684214e-06, + "step": 1422 + }, + { + "epoch": 3.7422748191978963, + "grad_norm": 1.3713613748550415, + "learning_rate": 0.0002510526315789474, + "loss": 1.2988, + "step": 1423 + }, + { + "epoch": 3.7422748191978963, + "high_lr": 0.0002510526315789474, + "low_lr": 5.021052631578948e-06, + "step": 1423 + }, + { + "epoch": 3.7422748191978963, + "high_lr": 0.0002510526315789474, + "low_lr": 5.021052631578948e-06, + "step": 1423 + }, + { + "epoch": 3.7422748191978963, + "high_lr": 0.0002510526315789474, + "low_lr": 5.021052631578948e-06, + "step": 1423 + }, + { + "epoch": 3.7422748191978963, + "high_lr": 0.0002510526315789474, + "low_lr": 5.021052631578948e-06, + "step": 1423 + }, + { + "epoch": 3.7422748191978963, + "high_lr": 0.0002510526315789474, + "low_lr": 5.021052631578948e-06, + "step": 1423 + }, + { + "epoch": 3.7422748191978963, + "high_lr": 0.0002510526315789474, + "low_lr": 5.021052631578948e-06, + "step": 1423 + }, + { + "epoch": 3.7422748191978963, + "high_lr": 0.0002510526315789474, + "low_lr": 5.021052631578948e-06, + "step": 1423 + }, + { + "epoch": 3.7422748191978963, + "high_lr": 0.0002510526315789474, + "low_lr": 5.021052631578948e-06, + "step": 1423 + }, + { + "epoch": 3.744904667981591, + "grad_norm": 1.3472787141799927, + "learning_rate": 0.0002505263157894737, + "loss": 1.3521, + "step": 1424 + }, + { + "epoch": 3.744904667981591, + "high_lr": 0.0002505263157894737, + "low_lr": 5.010526315789475e-06, + "step": 1424 + }, + { + "epoch": 3.744904667981591, + "high_lr": 0.0002505263157894737, + "low_lr": 5.010526315789475e-06, + "step": 1424 + }, + { + "epoch": 3.744904667981591, + "high_lr": 0.0002505263157894737, + "low_lr": 5.010526315789475e-06, + "step": 1424 + }, + { + "epoch": 3.744904667981591, + "high_lr": 0.0002505263157894737, + "low_lr": 5.010526315789475e-06, + "step": 1424 + }, + { + "epoch": 3.744904667981591, + "high_lr": 0.0002505263157894737, + "low_lr": 5.010526315789475e-06, + "step": 1424 + }, + { + "epoch": 3.744904667981591, + "high_lr": 0.0002505263157894737, + "low_lr": 5.010526315789475e-06, + "step": 1424 + }, + { + "epoch": 3.744904667981591, + "high_lr": 0.0002505263157894737, + "low_lr": 5.010526315789475e-06, + "step": 1424 + }, + { + "epoch": 3.744904667981591, + "high_lr": 0.0002505263157894737, + "low_lr": 5.010526315789475e-06, + "step": 1424 + }, + { + "epoch": 3.747534516765286, + "grad_norm": 1.3773797750473022, + "learning_rate": 0.00025, + "loss": 1.3132, + "step": 1425 + }, + { + "epoch": 3.747534516765286, + "high_lr": 0.00025, + "low_lr": 5e-06, + "step": 1425 + }, + { + "epoch": 3.747534516765286, + "high_lr": 0.00025, + "low_lr": 5e-06, + "step": 1425 + }, + { + "epoch": 3.747534516765286, + "high_lr": 0.00025, + "low_lr": 5e-06, + "step": 1425 + }, + { + "epoch": 3.747534516765286, + "high_lr": 0.00025, + "low_lr": 5e-06, + "step": 1425 + }, + { + "epoch": 3.747534516765286, + "high_lr": 0.00025, + "low_lr": 5e-06, + "step": 1425 + }, + { + "epoch": 3.747534516765286, + "high_lr": 0.00025, + "low_lr": 5e-06, + "step": 1425 + }, + { + "epoch": 3.747534516765286, + "high_lr": 0.00025, + "low_lr": 5e-06, + "step": 1425 + }, + { + "epoch": 3.747534516765286, + "high_lr": 0.00025, + "low_lr": 5e-06, + "step": 1425 + }, + { + "epoch": 3.750164365548981, + "grad_norm": 1.5149577856063843, + "learning_rate": 0.00024947368421052635, + "loss": 1.3398, + "step": 1426 + }, + { + "epoch": 3.750164365548981, + "high_lr": 0.00024947368421052635, + "low_lr": 4.989473684210527e-06, + "step": 1426 + }, + { + "epoch": 3.750164365548981, + "high_lr": 0.00024947368421052635, + "low_lr": 4.989473684210527e-06, + "step": 1426 + }, + { + "epoch": 3.750164365548981, + "high_lr": 0.00024947368421052635, + "low_lr": 4.989473684210527e-06, + "step": 1426 + }, + { + "epoch": 3.750164365548981, + "high_lr": 0.00024947368421052635, + "low_lr": 4.989473684210527e-06, + "step": 1426 + }, + { + "epoch": 3.750164365548981, + "high_lr": 0.00024947368421052635, + "low_lr": 4.989473684210527e-06, + "step": 1426 + }, + { + "epoch": 3.750164365548981, + "high_lr": 0.00024947368421052635, + "low_lr": 4.989473684210527e-06, + "step": 1426 + }, + { + "epoch": 3.750164365548981, + "high_lr": 0.00024947368421052635, + "low_lr": 4.989473684210527e-06, + "step": 1426 + }, + { + "epoch": 3.750164365548981, + "high_lr": 0.00024947368421052635, + "low_lr": 4.989473684210527e-06, + "step": 1426 + }, + { + "epoch": 3.752794214332676, + "grad_norm": 1.5774260759353638, + "learning_rate": 0.00024894736842105263, + "loss": 1.2966, + "step": 1427 + }, + { + "epoch": 3.752794214332676, + "high_lr": 0.00024894736842105263, + "low_lr": 4.978947368421053e-06, + "step": 1427 + }, + { + "epoch": 3.752794214332676, + "high_lr": 0.00024894736842105263, + "low_lr": 4.978947368421053e-06, + "step": 1427 + }, + { + "epoch": 3.752794214332676, + "high_lr": 0.00024894736842105263, + "low_lr": 4.978947368421053e-06, + "step": 1427 + }, + { + "epoch": 3.752794214332676, + "high_lr": 0.00024894736842105263, + "low_lr": 4.978947368421053e-06, + "step": 1427 + }, + { + "epoch": 3.752794214332676, + "high_lr": 0.00024894736842105263, + "low_lr": 4.978947368421053e-06, + "step": 1427 + }, + { + "epoch": 3.752794214332676, + "high_lr": 0.00024894736842105263, + "low_lr": 4.978947368421053e-06, + "step": 1427 + }, + { + "epoch": 3.752794214332676, + "high_lr": 0.00024894736842105263, + "low_lr": 4.978947368421053e-06, + "step": 1427 + }, + { + "epoch": 3.752794214332676, + "high_lr": 0.00024894736842105263, + "low_lr": 4.978947368421053e-06, + "step": 1427 + }, + { + "epoch": 3.755424063116371, + "grad_norm": 1.3350043296813965, + "learning_rate": 0.00024842105263157897, + "loss": 1.2871, + "step": 1428 + }, + { + "epoch": 3.755424063116371, + "high_lr": 0.00024842105263157897, + "low_lr": 4.968421052631579e-06, + "step": 1428 + }, + { + "epoch": 3.755424063116371, + "high_lr": 0.00024842105263157897, + "low_lr": 4.968421052631579e-06, + "step": 1428 + }, + { + "epoch": 3.755424063116371, + "high_lr": 0.00024842105263157897, + "low_lr": 4.968421052631579e-06, + "step": 1428 + }, + { + "epoch": 3.755424063116371, + "high_lr": 0.00024842105263157897, + "low_lr": 4.968421052631579e-06, + "step": 1428 + }, + { + "epoch": 3.755424063116371, + "high_lr": 0.00024842105263157897, + "low_lr": 4.968421052631579e-06, + "step": 1428 + }, + { + "epoch": 3.755424063116371, + "high_lr": 0.00024842105263157897, + "low_lr": 4.968421052631579e-06, + "step": 1428 + }, + { + "epoch": 3.755424063116371, + "high_lr": 0.00024842105263157897, + "low_lr": 4.968421052631579e-06, + "step": 1428 + }, + { + "epoch": 3.755424063116371, + "high_lr": 0.00024842105263157897, + "low_lr": 4.968421052631579e-06, + "step": 1428 + }, + { + "epoch": 3.758053911900066, + "grad_norm": 1.4210309982299805, + "learning_rate": 0.00024789473684210526, + "loss": 1.2749, + "step": 1429 + }, + { + "epoch": 3.758053911900066, + "high_lr": 0.00024789473684210526, + "low_lr": 4.957894736842106e-06, + "step": 1429 + }, + { + "epoch": 3.758053911900066, + "high_lr": 0.00024789473684210526, + "low_lr": 4.957894736842106e-06, + "step": 1429 + }, + { + "epoch": 3.758053911900066, + "high_lr": 0.00024789473684210526, + "low_lr": 4.957894736842106e-06, + "step": 1429 + }, + { + "epoch": 3.758053911900066, + "high_lr": 0.00024789473684210526, + "low_lr": 4.957894736842106e-06, + "step": 1429 + }, + { + "epoch": 3.758053911900066, + "high_lr": 0.00024789473684210526, + "low_lr": 4.957894736842106e-06, + "step": 1429 + }, + { + "epoch": 3.758053911900066, + "high_lr": 0.00024789473684210526, + "low_lr": 4.957894736842106e-06, + "step": 1429 + }, + { + "epoch": 3.758053911900066, + "high_lr": 0.00024789473684210526, + "low_lr": 4.957894736842106e-06, + "step": 1429 + }, + { + "epoch": 3.758053911900066, + "high_lr": 0.00024789473684210526, + "low_lr": 4.957894736842106e-06, + "step": 1429 + }, + { + "epoch": 3.7606837606837606, + "grad_norm": 1.4051158428192139, + "learning_rate": 0.0002473684210526316, + "loss": 1.299, + "step": 1430 + }, + { + "epoch": 3.7606837606837606, + "high_lr": 0.0002473684210526316, + "low_lr": 4.947368421052632e-06, + "step": 1430 + }, + { + "epoch": 3.7606837606837606, + "high_lr": 0.0002473684210526316, + "low_lr": 4.947368421052632e-06, + "step": 1430 + }, + { + "epoch": 3.7606837606837606, + "high_lr": 0.0002473684210526316, + "low_lr": 4.947368421052632e-06, + "step": 1430 + }, + { + "epoch": 3.7606837606837606, + "high_lr": 0.0002473684210526316, + "low_lr": 4.947368421052632e-06, + "step": 1430 + }, + { + "epoch": 3.7606837606837606, + "high_lr": 0.0002473684210526316, + "low_lr": 4.947368421052632e-06, + "step": 1430 + }, + { + "epoch": 3.7606837606837606, + "high_lr": 0.0002473684210526316, + "low_lr": 4.947368421052632e-06, + "step": 1430 + }, + { + "epoch": 3.7606837606837606, + "high_lr": 0.0002473684210526316, + "low_lr": 4.947368421052632e-06, + "step": 1430 + }, + { + "epoch": 3.7606837606837606, + "high_lr": 0.0002473684210526316, + "low_lr": 4.947368421052632e-06, + "step": 1430 + }, + { + "epoch": 3.7633136094674557, + "grad_norm": 1.4473483562469482, + "learning_rate": 0.0002468421052631579, + "loss": 1.3305, + "step": 1431 + }, + { + "epoch": 3.7633136094674557, + "high_lr": 0.0002468421052631579, + "low_lr": 4.936842105263158e-06, + "step": 1431 + }, + { + "epoch": 3.7633136094674557, + "high_lr": 0.0002468421052631579, + "low_lr": 4.936842105263158e-06, + "step": 1431 + }, + { + "epoch": 3.7633136094674557, + "high_lr": 0.0002468421052631579, + "low_lr": 4.936842105263158e-06, + "step": 1431 + }, + { + "epoch": 3.7633136094674557, + "high_lr": 0.0002468421052631579, + "low_lr": 4.936842105263158e-06, + "step": 1431 + }, + { + "epoch": 3.7633136094674557, + "high_lr": 0.0002468421052631579, + "low_lr": 4.936842105263158e-06, + "step": 1431 + }, + { + "epoch": 3.7633136094674557, + "high_lr": 0.0002468421052631579, + "low_lr": 4.936842105263158e-06, + "step": 1431 + }, + { + "epoch": 3.7633136094674557, + "high_lr": 0.0002468421052631579, + "low_lr": 4.936842105263158e-06, + "step": 1431 + }, + { + "epoch": 3.7633136094674557, + "high_lr": 0.0002468421052631579, + "low_lr": 4.936842105263158e-06, + "step": 1431 + }, + { + "epoch": 3.7659434582511504, + "grad_norm": 1.4937715530395508, + "learning_rate": 0.0002463157894736842, + "loss": 1.2965, + "step": 1432 + }, + { + "epoch": 3.7659434582511504, + "high_lr": 0.0002463157894736842, + "low_lr": 4.926315789473685e-06, + "step": 1432 + }, + { + "epoch": 3.7659434582511504, + "high_lr": 0.0002463157894736842, + "low_lr": 4.926315789473685e-06, + "step": 1432 + }, + { + "epoch": 3.7659434582511504, + "high_lr": 0.0002463157894736842, + "low_lr": 4.926315789473685e-06, + "step": 1432 + }, + { + "epoch": 3.7659434582511504, + "high_lr": 0.0002463157894736842, + "low_lr": 4.926315789473685e-06, + "step": 1432 + }, + { + "epoch": 3.7659434582511504, + "high_lr": 0.0002463157894736842, + "low_lr": 4.926315789473685e-06, + "step": 1432 + }, + { + "epoch": 3.7659434582511504, + "high_lr": 0.0002463157894736842, + "low_lr": 4.926315789473685e-06, + "step": 1432 + }, + { + "epoch": 3.7659434582511504, + "high_lr": 0.0002463157894736842, + "low_lr": 4.926315789473685e-06, + "step": 1432 + }, + { + "epoch": 3.7659434582511504, + "high_lr": 0.0002463157894736842, + "low_lr": 4.926315789473685e-06, + "step": 1432 + }, + { + "epoch": 3.7685733070348455, + "grad_norm": 1.414452314376831, + "learning_rate": 0.0002457894736842105, + "loss": 1.3198, + "step": 1433 + }, + { + "epoch": 3.7685733070348455, + "high_lr": 0.0002457894736842105, + "low_lr": 4.915789473684211e-06, + "step": 1433 + }, + { + "epoch": 3.7685733070348455, + "high_lr": 0.0002457894736842105, + "low_lr": 4.915789473684211e-06, + "step": 1433 + }, + { + "epoch": 3.7685733070348455, + "high_lr": 0.0002457894736842105, + "low_lr": 4.915789473684211e-06, + "step": 1433 + }, + { + "epoch": 3.7685733070348455, + "high_lr": 0.0002457894736842105, + "low_lr": 4.915789473684211e-06, + "step": 1433 + }, + { + "epoch": 3.7685733070348455, + "high_lr": 0.0002457894736842105, + "low_lr": 4.915789473684211e-06, + "step": 1433 + }, + { + "epoch": 3.7685733070348455, + "high_lr": 0.0002457894736842105, + "low_lr": 4.915789473684211e-06, + "step": 1433 + }, + { + "epoch": 3.7685733070348455, + "high_lr": 0.0002457894736842105, + "low_lr": 4.915789473684211e-06, + "step": 1433 + }, + { + "epoch": 3.7685733070348455, + "high_lr": 0.0002457894736842105, + "low_lr": 4.915789473684211e-06, + "step": 1433 + }, + { + "epoch": 3.7712031558185406, + "grad_norm": 1.4509018659591675, + "learning_rate": 0.00024526315789473685, + "loss": 1.3448, + "step": 1434 + }, + { + "epoch": 3.7712031558185406, + "high_lr": 0.00024526315789473685, + "low_lr": 4.905263157894737e-06, + "step": 1434 + }, + { + "epoch": 3.7712031558185406, + "high_lr": 0.00024526315789473685, + "low_lr": 4.905263157894737e-06, + "step": 1434 + }, + { + "epoch": 3.7712031558185406, + "high_lr": 0.00024526315789473685, + "low_lr": 4.905263157894737e-06, + "step": 1434 + }, + { + "epoch": 3.7712031558185406, + "high_lr": 0.00024526315789473685, + "low_lr": 4.905263157894737e-06, + "step": 1434 + }, + { + "epoch": 3.7712031558185406, + "high_lr": 0.00024526315789473685, + "low_lr": 4.905263157894737e-06, + "step": 1434 + }, + { + "epoch": 3.7712031558185406, + "high_lr": 0.00024526315789473685, + "low_lr": 4.905263157894737e-06, + "step": 1434 + }, + { + "epoch": 3.7712031558185406, + "high_lr": 0.00024526315789473685, + "low_lr": 4.905263157894737e-06, + "step": 1434 + }, + { + "epoch": 3.7712031558185406, + "high_lr": 0.00024526315789473685, + "low_lr": 4.905263157894737e-06, + "step": 1434 + }, + { + "epoch": 3.7738330046022353, + "grad_norm": 1.3394984006881714, + "learning_rate": 0.0002447368421052632, + "loss": 1.3366, + "step": 1435 + }, + { + "epoch": 3.7738330046022353, + "high_lr": 0.0002447368421052632, + "low_lr": 4.894736842105264e-06, + "step": 1435 + }, + { + "epoch": 3.7738330046022353, + "high_lr": 0.0002447368421052632, + "low_lr": 4.894736842105264e-06, + "step": 1435 + }, + { + "epoch": 3.7738330046022353, + "high_lr": 0.0002447368421052632, + "low_lr": 4.894736842105264e-06, + "step": 1435 + }, + { + "epoch": 3.7738330046022353, + "high_lr": 0.0002447368421052632, + "low_lr": 4.894736842105264e-06, + "step": 1435 + }, + { + "epoch": 3.7738330046022353, + "high_lr": 0.0002447368421052632, + "low_lr": 4.894736842105264e-06, + "step": 1435 + }, + { + "epoch": 3.7738330046022353, + "high_lr": 0.0002447368421052632, + "low_lr": 4.894736842105264e-06, + "step": 1435 + }, + { + "epoch": 3.7738330046022353, + "high_lr": 0.0002447368421052632, + "low_lr": 4.894736842105264e-06, + "step": 1435 + }, + { + "epoch": 3.7738330046022353, + "high_lr": 0.0002447368421052632, + "low_lr": 4.894736842105264e-06, + "step": 1435 + }, + { + "epoch": 3.7764628533859304, + "grad_norm": 1.3452506065368652, + "learning_rate": 0.0002442105263157895, + "loss": 1.2546, + "step": 1436 + }, + { + "epoch": 3.7764628533859304, + "high_lr": 0.0002442105263157895, + "low_lr": 4.88421052631579e-06, + "step": 1436 + }, + { + "epoch": 3.7764628533859304, + "high_lr": 0.0002442105263157895, + "low_lr": 4.88421052631579e-06, + "step": 1436 + }, + { + "epoch": 3.7764628533859304, + "high_lr": 0.0002442105263157895, + "low_lr": 4.88421052631579e-06, + "step": 1436 + }, + { + "epoch": 3.7764628533859304, + "high_lr": 0.0002442105263157895, + "low_lr": 4.88421052631579e-06, + "step": 1436 + }, + { + "epoch": 3.7764628533859304, + "high_lr": 0.0002442105263157895, + "low_lr": 4.88421052631579e-06, + "step": 1436 + }, + { + "epoch": 3.7764628533859304, + "high_lr": 0.0002442105263157895, + "low_lr": 4.88421052631579e-06, + "step": 1436 + }, + { + "epoch": 3.7764628533859304, + "high_lr": 0.0002442105263157895, + "low_lr": 4.88421052631579e-06, + "step": 1436 + }, + { + "epoch": 3.7764628533859304, + "high_lr": 0.0002442105263157895, + "low_lr": 4.88421052631579e-06, + "step": 1436 + }, + { + "epoch": 3.779092702169625, + "grad_norm": 1.4009126424789429, + "learning_rate": 0.0002436842105263158, + "loss": 1.3374, + "step": 1437 + }, + { + "epoch": 3.779092702169625, + "high_lr": 0.0002436842105263158, + "low_lr": 4.873684210526316e-06, + "step": 1437 + }, + { + "epoch": 3.779092702169625, + "high_lr": 0.0002436842105263158, + "low_lr": 4.873684210526316e-06, + "step": 1437 + }, + { + "epoch": 3.779092702169625, + "high_lr": 0.0002436842105263158, + "low_lr": 4.873684210526316e-06, + "step": 1437 + }, + { + "epoch": 3.779092702169625, + "high_lr": 0.0002436842105263158, + "low_lr": 4.873684210526316e-06, + "step": 1437 + }, + { + "epoch": 3.779092702169625, + "high_lr": 0.0002436842105263158, + "low_lr": 4.873684210526316e-06, + "step": 1437 + }, + { + "epoch": 3.779092702169625, + "high_lr": 0.0002436842105263158, + "low_lr": 4.873684210526316e-06, + "step": 1437 + }, + { + "epoch": 3.779092702169625, + "high_lr": 0.0002436842105263158, + "low_lr": 4.873684210526316e-06, + "step": 1437 + }, + { + "epoch": 3.779092702169625, + "high_lr": 0.0002436842105263158, + "low_lr": 4.873684210526316e-06, + "step": 1437 + }, + { + "epoch": 3.78172255095332, + "grad_norm": 1.3517147302627563, + "learning_rate": 0.0002431578947368421, + "loss": 1.3329, + "step": 1438 + }, + { + "epoch": 3.78172255095332, + "high_lr": 0.0002431578947368421, + "low_lr": 4.863157894736843e-06, + "step": 1438 + }, + { + "epoch": 3.78172255095332, + "high_lr": 0.0002431578947368421, + "low_lr": 4.863157894736843e-06, + "step": 1438 + }, + { + "epoch": 3.78172255095332, + "high_lr": 0.0002431578947368421, + "low_lr": 4.863157894736843e-06, + "step": 1438 + }, + { + "epoch": 3.78172255095332, + "high_lr": 0.0002431578947368421, + "low_lr": 4.863157894736843e-06, + "step": 1438 + }, + { + "epoch": 3.78172255095332, + "high_lr": 0.0002431578947368421, + "low_lr": 4.863157894736843e-06, + "step": 1438 + }, + { + "epoch": 3.78172255095332, + "high_lr": 0.0002431578947368421, + "low_lr": 4.863157894736843e-06, + "step": 1438 + }, + { + "epoch": 3.78172255095332, + "high_lr": 0.0002431578947368421, + "low_lr": 4.863157894736843e-06, + "step": 1438 + }, + { + "epoch": 3.78172255095332, + "high_lr": 0.0002431578947368421, + "low_lr": 4.863157894736843e-06, + "step": 1438 + }, + { + "epoch": 3.784352399737015, + "grad_norm": 1.3775666952133179, + "learning_rate": 0.00024263157894736841, + "loss": 1.2801, + "step": 1439 + }, + { + "epoch": 3.784352399737015, + "high_lr": 0.00024263157894736841, + "low_lr": 4.852631578947369e-06, + "step": 1439 + }, + { + "epoch": 3.784352399737015, + "high_lr": 0.00024263157894736841, + "low_lr": 4.852631578947369e-06, + "step": 1439 + }, + { + "epoch": 3.784352399737015, + "high_lr": 0.00024263157894736841, + "low_lr": 4.852631578947369e-06, + "step": 1439 + }, + { + "epoch": 3.784352399737015, + "high_lr": 0.00024263157894736841, + "low_lr": 4.852631578947369e-06, + "step": 1439 + }, + { + "epoch": 3.784352399737015, + "high_lr": 0.00024263157894736841, + "low_lr": 4.852631578947369e-06, + "step": 1439 + }, + { + "epoch": 3.784352399737015, + "high_lr": 0.00024263157894736841, + "low_lr": 4.852631578947369e-06, + "step": 1439 + }, + { + "epoch": 3.784352399737015, + "high_lr": 0.00024263157894736841, + "low_lr": 4.852631578947369e-06, + "step": 1439 + }, + { + "epoch": 3.784352399737015, + "high_lr": 0.00024263157894736841, + "low_lr": 4.852631578947369e-06, + "step": 1439 + }, + { + "epoch": 3.78698224852071, + "grad_norm": 1.4380093812942505, + "learning_rate": 0.00024210526315789475, + "loss": 1.2578, + "step": 1440 + }, + { + "epoch": 3.78698224852071, + "high_lr": 0.00024210526315789475, + "low_lr": 4.842105263157895e-06, + "step": 1440 + }, + { + "epoch": 3.78698224852071, + "high_lr": 0.00024210526315789475, + "low_lr": 4.842105263157895e-06, + "step": 1440 + }, + { + "epoch": 3.78698224852071, + "high_lr": 0.00024210526315789475, + "low_lr": 4.842105263157895e-06, + "step": 1440 + }, + { + "epoch": 3.78698224852071, + "high_lr": 0.00024210526315789475, + "low_lr": 4.842105263157895e-06, + "step": 1440 + }, + { + "epoch": 3.78698224852071, + "high_lr": 0.00024210526315789475, + "low_lr": 4.842105263157895e-06, + "step": 1440 + }, + { + "epoch": 3.78698224852071, + "high_lr": 0.00024210526315789475, + "low_lr": 4.842105263157895e-06, + "step": 1440 + }, + { + "epoch": 3.78698224852071, + "high_lr": 0.00024210526315789475, + "low_lr": 4.842105263157895e-06, + "step": 1440 + }, + { + "epoch": 3.78698224852071, + "high_lr": 0.00024210526315789475, + "low_lr": 4.842105263157895e-06, + "step": 1440 + }, + { + "epoch": 3.789612097304405, + "grad_norm": 1.425616979598999, + "learning_rate": 0.00024157894736842107, + "loss": 1.3009, + "step": 1441 + }, + { + "epoch": 3.789612097304405, + "high_lr": 0.00024157894736842107, + "low_lr": 4.831578947368422e-06, + "step": 1441 + }, + { + "epoch": 3.789612097304405, + "high_lr": 0.00024157894736842107, + "low_lr": 4.831578947368422e-06, + "step": 1441 + }, + { + "epoch": 3.789612097304405, + "high_lr": 0.00024157894736842107, + "low_lr": 4.831578947368422e-06, + "step": 1441 + }, + { + "epoch": 3.789612097304405, + "high_lr": 0.00024157894736842107, + "low_lr": 4.831578947368422e-06, + "step": 1441 + }, + { + "epoch": 3.789612097304405, + "high_lr": 0.00024157894736842107, + "low_lr": 4.831578947368422e-06, + "step": 1441 + }, + { + "epoch": 3.789612097304405, + "high_lr": 0.00024157894736842107, + "low_lr": 4.831578947368422e-06, + "step": 1441 + }, + { + "epoch": 3.789612097304405, + "high_lr": 0.00024157894736842107, + "low_lr": 4.831578947368422e-06, + "step": 1441 + }, + { + "epoch": 3.789612097304405, + "high_lr": 0.00024157894736842107, + "low_lr": 4.831578947368422e-06, + "step": 1441 + }, + { + "epoch": 3.7922419460881, + "grad_norm": 1.315299391746521, + "learning_rate": 0.00024105263157894738, + "loss": 1.2655, + "step": 1442 + }, + { + "epoch": 3.7922419460881, + "high_lr": 0.00024105263157894738, + "low_lr": 4.821052631578948e-06, + "step": 1442 + }, + { + "epoch": 3.7922419460881, + "high_lr": 0.00024105263157894738, + "low_lr": 4.821052631578948e-06, + "step": 1442 + }, + { + "epoch": 3.7922419460881, + "high_lr": 0.00024105263157894738, + "low_lr": 4.821052631578948e-06, + "step": 1442 + }, + { + "epoch": 3.7922419460881, + "high_lr": 0.00024105263157894738, + "low_lr": 4.821052631578948e-06, + "step": 1442 + }, + { + "epoch": 3.7922419460881, + "high_lr": 0.00024105263157894738, + "low_lr": 4.821052631578948e-06, + "step": 1442 + }, + { + "epoch": 3.7922419460881, + "high_lr": 0.00024105263157894738, + "low_lr": 4.821052631578948e-06, + "step": 1442 + }, + { + "epoch": 3.7922419460881, + "high_lr": 0.00024105263157894738, + "low_lr": 4.821052631578948e-06, + "step": 1442 + }, + { + "epoch": 3.7922419460881, + "high_lr": 0.00024105263157894738, + "low_lr": 4.821052631578948e-06, + "step": 1442 + }, + { + "epoch": 3.7948717948717947, + "grad_norm": 1.506906509399414, + "learning_rate": 0.00024052631578947367, + "loss": 1.2971, + "step": 1443 + }, + { + "epoch": 3.7948717948717947, + "high_lr": 0.00024052631578947367, + "low_lr": 4.8105263157894735e-06, + "step": 1443 + }, + { + "epoch": 3.7948717948717947, + "high_lr": 0.00024052631578947367, + "low_lr": 4.8105263157894735e-06, + "step": 1443 + }, + { + "epoch": 3.7948717948717947, + "high_lr": 0.00024052631578947367, + "low_lr": 4.8105263157894735e-06, + "step": 1443 + }, + { + "epoch": 3.7948717948717947, + "high_lr": 0.00024052631578947367, + "low_lr": 4.8105263157894735e-06, + "step": 1443 + }, + { + "epoch": 3.7948717948717947, + "high_lr": 0.00024052631578947367, + "low_lr": 4.8105263157894735e-06, + "step": 1443 + }, + { + "epoch": 3.7948717948717947, + "high_lr": 0.00024052631578947367, + "low_lr": 4.8105263157894735e-06, + "step": 1443 + }, + { + "epoch": 3.7948717948717947, + "high_lr": 0.00024052631578947367, + "low_lr": 4.8105263157894735e-06, + "step": 1443 + }, + { + "epoch": 3.7948717948717947, + "high_lr": 0.00024052631578947367, + "low_lr": 4.8105263157894735e-06, + "step": 1443 + }, + { + "epoch": 3.79750164365549, + "grad_norm": 1.4422025680541992, + "learning_rate": 0.00024, + "loss": 1.3258, + "step": 1444 + }, + { + "epoch": 3.79750164365549, + "high_lr": 0.00024, + "low_lr": 4.800000000000001e-06, + "step": 1444 + }, + { + "epoch": 3.79750164365549, + "high_lr": 0.00024, + "low_lr": 4.800000000000001e-06, + "step": 1444 + }, + { + "epoch": 3.79750164365549, + "high_lr": 0.00024, + "low_lr": 4.800000000000001e-06, + "step": 1444 + }, + { + "epoch": 3.79750164365549, + "high_lr": 0.00024, + "low_lr": 4.800000000000001e-06, + "step": 1444 + }, + { + "epoch": 3.79750164365549, + "high_lr": 0.00024, + "low_lr": 4.800000000000001e-06, + "step": 1444 + }, + { + "epoch": 3.79750164365549, + "high_lr": 0.00024, + "low_lr": 4.800000000000001e-06, + "step": 1444 + }, + { + "epoch": 3.79750164365549, + "high_lr": 0.00024, + "low_lr": 4.800000000000001e-06, + "step": 1444 + }, + { + "epoch": 3.79750164365549, + "high_lr": 0.00024, + "low_lr": 4.800000000000001e-06, + "step": 1444 + }, + { + "epoch": 3.800131492439185, + "grad_norm": 1.4381208419799805, + "learning_rate": 0.00023947368421052632, + "loss": 1.3295, + "step": 1445 + }, + { + "epoch": 3.800131492439185, + "high_lr": 0.00023947368421052632, + "low_lr": 4.789473684210527e-06, + "step": 1445 + }, + { + "epoch": 3.800131492439185, + "high_lr": 0.00023947368421052632, + "low_lr": 4.789473684210527e-06, + "step": 1445 + }, + { + "epoch": 3.800131492439185, + "high_lr": 0.00023947368421052632, + "low_lr": 4.789473684210527e-06, + "step": 1445 + }, + { + "epoch": 3.800131492439185, + "high_lr": 0.00023947368421052632, + "low_lr": 4.789473684210527e-06, + "step": 1445 + }, + { + "epoch": 3.800131492439185, + "high_lr": 0.00023947368421052632, + "low_lr": 4.789473684210527e-06, + "step": 1445 + }, + { + "epoch": 3.800131492439185, + "high_lr": 0.00023947368421052632, + "low_lr": 4.789473684210527e-06, + "step": 1445 + }, + { + "epoch": 3.800131492439185, + "high_lr": 0.00023947368421052632, + "low_lr": 4.789473684210527e-06, + "step": 1445 + }, + { + "epoch": 3.800131492439185, + "high_lr": 0.00023947368421052632, + "low_lr": 4.789473684210527e-06, + "step": 1445 + }, + { + "epoch": 3.8027613412228796, + "grad_norm": 1.4451509714126587, + "learning_rate": 0.00023894736842105263, + "loss": 1.2901, + "step": 1446 + }, + { + "epoch": 3.8027613412228796, + "high_lr": 0.00023894736842105263, + "low_lr": 4.778947368421053e-06, + "step": 1446 + }, + { + "epoch": 3.8027613412228796, + "high_lr": 0.00023894736842105263, + "low_lr": 4.778947368421053e-06, + "step": 1446 + }, + { + "epoch": 3.8027613412228796, + "high_lr": 0.00023894736842105263, + "low_lr": 4.778947368421053e-06, + "step": 1446 + }, + { + "epoch": 3.8027613412228796, + "high_lr": 0.00023894736842105263, + "low_lr": 4.778947368421053e-06, + "step": 1446 + }, + { + "epoch": 3.8027613412228796, + "high_lr": 0.00023894736842105263, + "low_lr": 4.778947368421053e-06, + "step": 1446 + }, + { + "epoch": 3.8027613412228796, + "high_lr": 0.00023894736842105263, + "low_lr": 4.778947368421053e-06, + "step": 1446 + }, + { + "epoch": 3.8027613412228796, + "high_lr": 0.00023894736842105263, + "low_lr": 4.778947368421053e-06, + "step": 1446 + }, + { + "epoch": 3.8027613412228796, + "high_lr": 0.00023894736842105263, + "low_lr": 4.778947368421053e-06, + "step": 1446 + }, + { + "epoch": 3.8053911900065747, + "grad_norm": 1.4591439962387085, + "learning_rate": 0.00023842105263157895, + "loss": 1.2576, + "step": 1447 + }, + { + "epoch": 3.8053911900065747, + "high_lr": 0.00023842105263157895, + "low_lr": 4.76842105263158e-06, + "step": 1447 + }, + { + "epoch": 3.8053911900065747, + "high_lr": 0.00023842105263157895, + "low_lr": 4.76842105263158e-06, + "step": 1447 + }, + { + "epoch": 3.8053911900065747, + "high_lr": 0.00023842105263157895, + "low_lr": 4.76842105263158e-06, + "step": 1447 + }, + { + "epoch": 3.8053911900065747, + "high_lr": 0.00023842105263157895, + "low_lr": 4.76842105263158e-06, + "step": 1447 + }, + { + "epoch": 3.8053911900065747, + "high_lr": 0.00023842105263157895, + "low_lr": 4.76842105263158e-06, + "step": 1447 + }, + { + "epoch": 3.8053911900065747, + "high_lr": 0.00023842105263157895, + "low_lr": 4.76842105263158e-06, + "step": 1447 + }, + { + "epoch": 3.8053911900065747, + "high_lr": 0.00023842105263157895, + "low_lr": 4.76842105263158e-06, + "step": 1447 + }, + { + "epoch": 3.8053911900065747, + "high_lr": 0.00023842105263157895, + "low_lr": 4.76842105263158e-06, + "step": 1447 + }, + { + "epoch": 3.8080210387902698, + "grad_norm": 1.5963643789291382, + "learning_rate": 0.00023789473684210529, + "loss": 1.3422, + "step": 1448 + }, + { + "epoch": 3.8080210387902698, + "high_lr": 0.00023789473684210529, + "low_lr": 4.757894736842106e-06, + "step": 1448 + }, + { + "epoch": 3.8080210387902698, + "high_lr": 0.00023789473684210529, + "low_lr": 4.757894736842106e-06, + "step": 1448 + }, + { + "epoch": 3.8080210387902698, + "high_lr": 0.00023789473684210529, + "low_lr": 4.757894736842106e-06, + "step": 1448 + }, + { + "epoch": 3.8080210387902698, + "high_lr": 0.00023789473684210529, + "low_lr": 4.757894736842106e-06, + "step": 1448 + }, + { + "epoch": 3.8080210387902698, + "high_lr": 0.00023789473684210529, + "low_lr": 4.757894736842106e-06, + "step": 1448 + }, + { + "epoch": 3.8080210387902698, + "high_lr": 0.00023789473684210529, + "low_lr": 4.757894736842106e-06, + "step": 1448 + }, + { + "epoch": 3.8080210387902698, + "high_lr": 0.00023789473684210529, + "low_lr": 4.757894736842106e-06, + "step": 1448 + }, + { + "epoch": 3.8080210387902698, + "high_lr": 0.00023789473684210529, + "low_lr": 4.757894736842106e-06, + "step": 1448 + }, + { + "epoch": 3.8106508875739644, + "grad_norm": 1.3172365427017212, + "learning_rate": 0.0002373684210526316, + "loss": 1.2781, + "step": 1449 + }, + { + "epoch": 3.8106508875739644, + "high_lr": 0.0002373684210526316, + "low_lr": 4.747368421052632e-06, + "step": 1449 + }, + { + "epoch": 3.8106508875739644, + "high_lr": 0.0002373684210526316, + "low_lr": 4.747368421052632e-06, + "step": 1449 + }, + { + "epoch": 3.8106508875739644, + "high_lr": 0.0002373684210526316, + "low_lr": 4.747368421052632e-06, + "step": 1449 + }, + { + "epoch": 3.8106508875739644, + "high_lr": 0.0002373684210526316, + "low_lr": 4.747368421052632e-06, + "step": 1449 + }, + { + "epoch": 3.8106508875739644, + "high_lr": 0.0002373684210526316, + "low_lr": 4.747368421052632e-06, + "step": 1449 + }, + { + "epoch": 3.8106508875739644, + "high_lr": 0.0002373684210526316, + "low_lr": 4.747368421052632e-06, + "step": 1449 + }, + { + "epoch": 3.8106508875739644, + "high_lr": 0.0002373684210526316, + "low_lr": 4.747368421052632e-06, + "step": 1449 + }, + { + "epoch": 3.8106508875739644, + "high_lr": 0.0002373684210526316, + "low_lr": 4.747368421052632e-06, + "step": 1449 + }, + { + "epoch": 3.8132807363576595, + "grad_norm": 1.4922595024108887, + "learning_rate": 0.00023684210526315788, + "loss": 1.2936, + "step": 1450 + }, + { + "epoch": 3.8132807363576595, + "high_lr": 0.00023684210526315788, + "low_lr": 4.736842105263158e-06, + "step": 1450 + }, + { + "epoch": 3.8132807363576595, + "high_lr": 0.00023684210526315788, + "low_lr": 4.736842105263158e-06, + "step": 1450 + }, + { + "epoch": 3.8132807363576595, + "high_lr": 0.00023684210526315788, + "low_lr": 4.736842105263158e-06, + "step": 1450 + }, + { + "epoch": 3.8132807363576595, + "high_lr": 0.00023684210526315788, + "low_lr": 4.736842105263158e-06, + "step": 1450 + }, + { + "epoch": 3.8132807363576595, + "high_lr": 0.00023684210526315788, + "low_lr": 4.736842105263158e-06, + "step": 1450 + }, + { + "epoch": 3.8132807363576595, + "high_lr": 0.00023684210526315788, + "low_lr": 4.736842105263158e-06, + "step": 1450 + }, + { + "epoch": 3.8132807363576595, + "high_lr": 0.00023684210526315788, + "low_lr": 4.736842105263158e-06, + "step": 1450 + }, + { + "epoch": 3.8132807363576595, + "high_lr": 0.00023684210526315788, + "low_lr": 4.736842105263158e-06, + "step": 1450 + }, + { + "epoch": 3.8159105851413546, + "grad_norm": 1.4833645820617676, + "learning_rate": 0.0002363157894736842, + "loss": 1.2909, + "step": 1451 + }, + { + "epoch": 3.8159105851413546, + "high_lr": 0.0002363157894736842, + "low_lr": 4.726315789473684e-06, + "step": 1451 + }, + { + "epoch": 3.8159105851413546, + "high_lr": 0.0002363157894736842, + "low_lr": 4.726315789473684e-06, + "step": 1451 + }, + { + "epoch": 3.8159105851413546, + "high_lr": 0.0002363157894736842, + "low_lr": 4.726315789473684e-06, + "step": 1451 + }, + { + "epoch": 3.8159105851413546, + "high_lr": 0.0002363157894736842, + "low_lr": 4.726315789473684e-06, + "step": 1451 + }, + { + "epoch": 3.8159105851413546, + "high_lr": 0.0002363157894736842, + "low_lr": 4.726315789473684e-06, + "step": 1451 + }, + { + "epoch": 3.8159105851413546, + "high_lr": 0.0002363157894736842, + "low_lr": 4.726315789473684e-06, + "step": 1451 + }, + { + "epoch": 3.8159105851413546, + "high_lr": 0.0002363157894736842, + "low_lr": 4.726315789473684e-06, + "step": 1451 + }, + { + "epoch": 3.8159105851413546, + "high_lr": 0.0002363157894736842, + "low_lr": 4.726315789473684e-06, + "step": 1451 + }, + { + "epoch": 3.8185404339250493, + "grad_norm": 1.4897242784500122, + "learning_rate": 0.00023578947368421054, + "loss": 1.2462, + "step": 1452 + }, + { + "epoch": 3.8185404339250493, + "high_lr": 0.00023578947368421054, + "low_lr": 4.71578947368421e-06, + "step": 1452 + }, + { + "epoch": 3.8185404339250493, + "high_lr": 0.00023578947368421054, + "low_lr": 4.71578947368421e-06, + "step": 1452 + }, + { + "epoch": 3.8185404339250493, + "high_lr": 0.00023578947368421054, + "low_lr": 4.71578947368421e-06, + "step": 1452 + }, + { + "epoch": 3.8185404339250493, + "high_lr": 0.00023578947368421054, + "low_lr": 4.71578947368421e-06, + "step": 1452 + }, + { + "epoch": 3.8185404339250493, + "high_lr": 0.00023578947368421054, + "low_lr": 4.71578947368421e-06, + "step": 1452 + }, + { + "epoch": 3.8185404339250493, + "high_lr": 0.00023578947368421054, + "low_lr": 4.71578947368421e-06, + "step": 1452 + }, + { + "epoch": 3.8185404339250493, + "high_lr": 0.00023578947368421054, + "low_lr": 4.71578947368421e-06, + "step": 1452 + }, + { + "epoch": 3.8185404339250493, + "high_lr": 0.00023578947368421054, + "low_lr": 4.71578947368421e-06, + "step": 1452 + }, + { + "epoch": 3.8211702827087444, + "grad_norm": 1.5979965925216675, + "learning_rate": 0.00023526315789473685, + "loss": 1.3424, + "step": 1453 + }, + { + "epoch": 3.8211702827087444, + "high_lr": 0.00023526315789473685, + "low_lr": 4.705263157894738e-06, + "step": 1453 + }, + { + "epoch": 3.8211702827087444, + "high_lr": 0.00023526315789473685, + "low_lr": 4.705263157894738e-06, + "step": 1453 + }, + { + "epoch": 3.8211702827087444, + "high_lr": 0.00023526315789473685, + "low_lr": 4.705263157894738e-06, + "step": 1453 + }, + { + "epoch": 3.8211702827087444, + "high_lr": 0.00023526315789473685, + "low_lr": 4.705263157894738e-06, + "step": 1453 + }, + { + "epoch": 3.8211702827087444, + "high_lr": 0.00023526315789473685, + "low_lr": 4.705263157894738e-06, + "step": 1453 + }, + { + "epoch": 3.8211702827087444, + "high_lr": 0.00023526315789473685, + "low_lr": 4.705263157894738e-06, + "step": 1453 + }, + { + "epoch": 3.8211702827087444, + "high_lr": 0.00023526315789473685, + "low_lr": 4.705263157894738e-06, + "step": 1453 + }, + { + "epoch": 3.8211702827087444, + "high_lr": 0.00023526315789473685, + "low_lr": 4.705263157894738e-06, + "step": 1453 + }, + { + "epoch": 3.823800131492439, + "grad_norm": 1.2779732942581177, + "learning_rate": 0.00023473684210526316, + "loss": 1.3107, + "step": 1454 + }, + { + "epoch": 3.823800131492439, + "high_lr": 0.00023473684210526316, + "low_lr": 4.694736842105264e-06, + "step": 1454 + }, + { + "epoch": 3.823800131492439, + "high_lr": 0.00023473684210526316, + "low_lr": 4.694736842105264e-06, + "step": 1454 + }, + { + "epoch": 3.823800131492439, + "high_lr": 0.00023473684210526316, + "low_lr": 4.694736842105264e-06, + "step": 1454 + }, + { + "epoch": 3.823800131492439, + "high_lr": 0.00023473684210526316, + "low_lr": 4.694736842105264e-06, + "step": 1454 + }, + { + "epoch": 3.823800131492439, + "high_lr": 0.00023473684210526316, + "low_lr": 4.694736842105264e-06, + "step": 1454 + }, + { + "epoch": 3.823800131492439, + "high_lr": 0.00023473684210526316, + "low_lr": 4.694736842105264e-06, + "step": 1454 + }, + { + "epoch": 3.823800131492439, + "high_lr": 0.00023473684210526316, + "low_lr": 4.694736842105264e-06, + "step": 1454 + }, + { + "epoch": 3.823800131492439, + "high_lr": 0.00023473684210526316, + "low_lr": 4.694736842105264e-06, + "step": 1454 + }, + { + "epoch": 3.826429980276134, + "grad_norm": 1.4164676666259766, + "learning_rate": 0.00023421052631578948, + "loss": 1.3561, + "step": 1455 + }, + { + "epoch": 3.826429980276134, + "high_lr": 0.00023421052631578948, + "low_lr": 4.68421052631579e-06, + "step": 1455 + }, + { + "epoch": 3.826429980276134, + "high_lr": 0.00023421052631578948, + "low_lr": 4.68421052631579e-06, + "step": 1455 + }, + { + "epoch": 3.826429980276134, + "high_lr": 0.00023421052631578948, + "low_lr": 4.68421052631579e-06, + "step": 1455 + }, + { + "epoch": 3.826429980276134, + "high_lr": 0.00023421052631578948, + "low_lr": 4.68421052631579e-06, + "step": 1455 + }, + { + "epoch": 3.826429980276134, + "high_lr": 0.00023421052631578948, + "low_lr": 4.68421052631579e-06, + "step": 1455 + }, + { + "epoch": 3.826429980276134, + "high_lr": 0.00023421052631578948, + "low_lr": 4.68421052631579e-06, + "step": 1455 + }, + { + "epoch": 3.826429980276134, + "high_lr": 0.00023421052631578948, + "low_lr": 4.68421052631579e-06, + "step": 1455 + }, + { + "epoch": 3.826429980276134, + "high_lr": 0.00023421052631578948, + "low_lr": 4.68421052631579e-06, + "step": 1455 + }, + { + "epoch": 3.8290598290598292, + "grad_norm": 1.6803197860717773, + "learning_rate": 0.00023368421052631582, + "loss": 1.3378, + "step": 1456 + }, + { + "epoch": 3.8290598290598292, + "high_lr": 0.00023368421052631582, + "low_lr": 4.6736842105263166e-06, + "step": 1456 + }, + { + "epoch": 3.8290598290598292, + "high_lr": 0.00023368421052631582, + "low_lr": 4.6736842105263166e-06, + "step": 1456 + }, + { + "epoch": 3.8290598290598292, + "high_lr": 0.00023368421052631582, + "low_lr": 4.6736842105263166e-06, + "step": 1456 + }, + { + "epoch": 3.8290598290598292, + "high_lr": 0.00023368421052631582, + "low_lr": 4.6736842105263166e-06, + "step": 1456 + }, + { + "epoch": 3.8290598290598292, + "high_lr": 0.00023368421052631582, + "low_lr": 4.6736842105263166e-06, + "step": 1456 + }, + { + "epoch": 3.8290598290598292, + "high_lr": 0.00023368421052631582, + "low_lr": 4.6736842105263166e-06, + "step": 1456 + }, + { + "epoch": 3.8290598290598292, + "high_lr": 0.00023368421052631582, + "low_lr": 4.6736842105263166e-06, + "step": 1456 + }, + { + "epoch": 3.8290598290598292, + "high_lr": 0.00023368421052631582, + "low_lr": 4.6736842105263166e-06, + "step": 1456 + }, + { + "epoch": 3.831689677843524, + "grad_norm": 1.4625133275985718, + "learning_rate": 0.0002331578947368421, + "loss": 1.3359, + "step": 1457 + }, + { + "epoch": 3.831689677843524, + "high_lr": 0.0002331578947368421, + "low_lr": 4.663157894736842e-06, + "step": 1457 + }, + { + "epoch": 3.831689677843524, + "high_lr": 0.0002331578947368421, + "low_lr": 4.663157894736842e-06, + "step": 1457 + }, + { + "epoch": 3.831689677843524, + "high_lr": 0.0002331578947368421, + "low_lr": 4.663157894736842e-06, + "step": 1457 + }, + { + "epoch": 3.831689677843524, + "high_lr": 0.0002331578947368421, + "low_lr": 4.663157894736842e-06, + "step": 1457 + }, + { + "epoch": 3.831689677843524, + "high_lr": 0.0002331578947368421, + "low_lr": 4.663157894736842e-06, + "step": 1457 + }, + { + "epoch": 3.831689677843524, + "high_lr": 0.0002331578947368421, + "low_lr": 4.663157894736842e-06, + "step": 1457 + }, + { + "epoch": 3.831689677843524, + "high_lr": 0.0002331578947368421, + "low_lr": 4.663157894736842e-06, + "step": 1457 + }, + { + "epoch": 3.831689677843524, + "high_lr": 0.0002331578947368421, + "low_lr": 4.663157894736842e-06, + "step": 1457 + }, + { + "epoch": 3.834319526627219, + "grad_norm": 1.5109957456588745, + "learning_rate": 0.00023263157894736841, + "loss": 1.2938, + "step": 1458 + }, + { + "epoch": 3.834319526627219, + "high_lr": 0.00023263157894736841, + "low_lr": 4.652631578947368e-06, + "step": 1458 + }, + { + "epoch": 3.834319526627219, + "high_lr": 0.00023263157894736841, + "low_lr": 4.652631578947368e-06, + "step": 1458 + }, + { + "epoch": 3.834319526627219, + "high_lr": 0.00023263157894736841, + "low_lr": 4.652631578947368e-06, + "step": 1458 + }, + { + "epoch": 3.834319526627219, + "high_lr": 0.00023263157894736841, + "low_lr": 4.652631578947368e-06, + "step": 1458 + }, + { + "epoch": 3.834319526627219, + "high_lr": 0.00023263157894736841, + "low_lr": 4.652631578947368e-06, + "step": 1458 + }, + { + "epoch": 3.834319526627219, + "high_lr": 0.00023263157894736841, + "low_lr": 4.652631578947368e-06, + "step": 1458 + }, + { + "epoch": 3.834319526627219, + "high_lr": 0.00023263157894736841, + "low_lr": 4.652631578947368e-06, + "step": 1458 + }, + { + "epoch": 3.834319526627219, + "high_lr": 0.00023263157894736841, + "low_lr": 4.652631578947368e-06, + "step": 1458 + }, + { + "epoch": 3.8369493754109136, + "grad_norm": 1.3899002075195312, + "learning_rate": 0.00023210526315789473, + "loss": 1.3382, + "step": 1459 + }, + { + "epoch": 3.8369493754109136, + "high_lr": 0.00023210526315789473, + "low_lr": 4.642105263157895e-06, + "step": 1459 + }, + { + "epoch": 3.8369493754109136, + "high_lr": 0.00023210526315789473, + "low_lr": 4.642105263157895e-06, + "step": 1459 + }, + { + "epoch": 3.8369493754109136, + "high_lr": 0.00023210526315789473, + "low_lr": 4.642105263157895e-06, + "step": 1459 + }, + { + "epoch": 3.8369493754109136, + "high_lr": 0.00023210526315789473, + "low_lr": 4.642105263157895e-06, + "step": 1459 + }, + { + "epoch": 3.8369493754109136, + "high_lr": 0.00023210526315789473, + "low_lr": 4.642105263157895e-06, + "step": 1459 + }, + { + "epoch": 3.8369493754109136, + "high_lr": 0.00023210526315789473, + "low_lr": 4.642105263157895e-06, + "step": 1459 + }, + { + "epoch": 3.8369493754109136, + "high_lr": 0.00023210526315789473, + "low_lr": 4.642105263157895e-06, + "step": 1459 + }, + { + "epoch": 3.8369493754109136, + "high_lr": 0.00023210526315789473, + "low_lr": 4.642105263157895e-06, + "step": 1459 + }, + { + "epoch": 3.8395792241946087, + "grad_norm": 1.5576353073120117, + "learning_rate": 0.00023157894736842107, + "loss": 1.3188, + "step": 1460 + }, + { + "epoch": 3.8395792241946087, + "high_lr": 0.00023157894736842107, + "low_lr": 4.631578947368421e-06, + "step": 1460 + }, + { + "epoch": 3.8395792241946087, + "high_lr": 0.00023157894736842107, + "low_lr": 4.631578947368421e-06, + "step": 1460 + }, + { + "epoch": 3.8395792241946087, + "high_lr": 0.00023157894736842107, + "low_lr": 4.631578947368421e-06, + "step": 1460 + }, + { + "epoch": 3.8395792241946087, + "high_lr": 0.00023157894736842107, + "low_lr": 4.631578947368421e-06, + "step": 1460 + }, + { + "epoch": 3.8395792241946087, + "high_lr": 0.00023157894736842107, + "low_lr": 4.631578947368421e-06, + "step": 1460 + }, + { + "epoch": 3.8395792241946087, + "high_lr": 0.00023157894736842107, + "low_lr": 4.631578947368421e-06, + "step": 1460 + }, + { + "epoch": 3.8395792241946087, + "high_lr": 0.00023157894736842107, + "low_lr": 4.631578947368421e-06, + "step": 1460 + }, + { + "epoch": 3.8395792241946087, + "high_lr": 0.00023157894736842107, + "low_lr": 4.631578947368421e-06, + "step": 1460 + }, + { + "epoch": 3.842209072978304, + "grad_norm": 1.354019045829773, + "learning_rate": 0.00023105263157894738, + "loss": 1.311, + "step": 1461 + }, + { + "epoch": 3.842209072978304, + "high_lr": 0.00023105263157894738, + "low_lr": 4.621052631578948e-06, + "step": 1461 + }, + { + "epoch": 3.842209072978304, + "high_lr": 0.00023105263157894738, + "low_lr": 4.621052631578948e-06, + "step": 1461 + }, + { + "epoch": 3.842209072978304, + "high_lr": 0.00023105263157894738, + "low_lr": 4.621052631578948e-06, + "step": 1461 + }, + { + "epoch": 3.842209072978304, + "high_lr": 0.00023105263157894738, + "low_lr": 4.621052631578948e-06, + "step": 1461 + }, + { + "epoch": 3.842209072978304, + "high_lr": 0.00023105263157894738, + "low_lr": 4.621052631578948e-06, + "step": 1461 + }, + { + "epoch": 3.842209072978304, + "high_lr": 0.00023105263157894738, + "low_lr": 4.621052631578948e-06, + "step": 1461 + }, + { + "epoch": 3.842209072978304, + "high_lr": 0.00023105263157894738, + "low_lr": 4.621052631578948e-06, + "step": 1461 + }, + { + "epoch": 3.842209072978304, + "high_lr": 0.00023105263157894738, + "low_lr": 4.621052631578948e-06, + "step": 1461 + }, + { + "epoch": 3.8448389217619985, + "grad_norm": 1.4877536296844482, + "learning_rate": 0.0002305263157894737, + "loss": 1.2894, + "step": 1462 + }, + { + "epoch": 3.8448389217619985, + "high_lr": 0.0002305263157894737, + "low_lr": 4.6105263157894745e-06, + "step": 1462 + }, + { + "epoch": 3.8448389217619985, + "high_lr": 0.0002305263157894737, + "low_lr": 4.6105263157894745e-06, + "step": 1462 + }, + { + "epoch": 3.8448389217619985, + "high_lr": 0.0002305263157894737, + "low_lr": 4.6105263157894745e-06, + "step": 1462 + }, + { + "epoch": 3.8448389217619985, + "high_lr": 0.0002305263157894737, + "low_lr": 4.6105263157894745e-06, + "step": 1462 + }, + { + "epoch": 3.8448389217619985, + "high_lr": 0.0002305263157894737, + "low_lr": 4.6105263157894745e-06, + "step": 1462 + }, + { + "epoch": 3.8448389217619985, + "high_lr": 0.0002305263157894737, + "low_lr": 4.6105263157894745e-06, + "step": 1462 + }, + { + "epoch": 3.8448389217619985, + "high_lr": 0.0002305263157894737, + "low_lr": 4.6105263157894745e-06, + "step": 1462 + }, + { + "epoch": 3.8448389217619985, + "high_lr": 0.0002305263157894737, + "low_lr": 4.6105263157894745e-06, + "step": 1462 + }, + { + "epoch": 3.8474687705456936, + "grad_norm": 1.513124942779541, + "learning_rate": 0.00023, + "loss": 1.3518, + "step": 1463 + }, + { + "epoch": 3.8474687705456936, + "high_lr": 0.00023, + "low_lr": 4.600000000000001e-06, + "step": 1463 + }, + { + "epoch": 3.8474687705456936, + "high_lr": 0.00023, + "low_lr": 4.600000000000001e-06, + "step": 1463 + }, + { + "epoch": 3.8474687705456936, + "high_lr": 0.00023, + "low_lr": 4.600000000000001e-06, + "step": 1463 + }, + { + "epoch": 3.8474687705456936, + "high_lr": 0.00023, + "low_lr": 4.600000000000001e-06, + "step": 1463 + }, + { + "epoch": 3.8474687705456936, + "high_lr": 0.00023, + "low_lr": 4.600000000000001e-06, + "step": 1463 + }, + { + "epoch": 3.8474687705456936, + "high_lr": 0.00023, + "low_lr": 4.600000000000001e-06, + "step": 1463 + }, + { + "epoch": 3.8474687705456936, + "high_lr": 0.00023, + "low_lr": 4.600000000000001e-06, + "step": 1463 + }, + { + "epoch": 3.8474687705456936, + "high_lr": 0.00023, + "low_lr": 4.600000000000001e-06, + "step": 1463 + }, + { + "epoch": 3.8500986193293887, + "grad_norm": 1.502281665802002, + "learning_rate": 0.00022947368421052632, + "loss": 1.2577, + "step": 1464 + }, + { + "epoch": 3.8500986193293887, + "high_lr": 0.00022947368421052632, + "low_lr": 4.589473684210526e-06, + "step": 1464 + }, + { + "epoch": 3.8500986193293887, + "high_lr": 0.00022947368421052632, + "low_lr": 4.589473684210526e-06, + "step": 1464 + }, + { + "epoch": 3.8500986193293887, + "high_lr": 0.00022947368421052632, + "low_lr": 4.589473684210526e-06, + "step": 1464 + }, + { + "epoch": 3.8500986193293887, + "high_lr": 0.00022947368421052632, + "low_lr": 4.589473684210526e-06, + "step": 1464 + }, + { + "epoch": 3.8500986193293887, + "high_lr": 0.00022947368421052632, + "low_lr": 4.589473684210526e-06, + "step": 1464 + }, + { + "epoch": 3.8500986193293887, + "high_lr": 0.00022947368421052632, + "low_lr": 4.589473684210526e-06, + "step": 1464 + }, + { + "epoch": 3.8500986193293887, + "high_lr": 0.00022947368421052632, + "low_lr": 4.589473684210526e-06, + "step": 1464 + }, + { + "epoch": 3.8500986193293887, + "high_lr": 0.00022947368421052632, + "low_lr": 4.589473684210526e-06, + "step": 1464 + }, + { + "epoch": 3.8527284681130833, + "grad_norm": 1.6253715753555298, + "learning_rate": 0.00022894736842105263, + "loss": 1.2833, + "step": 1465 + }, + { + "epoch": 3.8527284681130833, + "high_lr": 0.00022894736842105263, + "low_lr": 4.578947368421053e-06, + "step": 1465 + }, + { + "epoch": 3.8527284681130833, + "high_lr": 0.00022894736842105263, + "low_lr": 4.578947368421053e-06, + "step": 1465 + }, + { + "epoch": 3.8527284681130833, + "high_lr": 0.00022894736842105263, + "low_lr": 4.578947368421053e-06, + "step": 1465 + }, + { + "epoch": 3.8527284681130833, + "high_lr": 0.00022894736842105263, + "low_lr": 4.578947368421053e-06, + "step": 1465 + }, + { + "epoch": 3.8527284681130833, + "high_lr": 0.00022894736842105263, + "low_lr": 4.578947368421053e-06, + "step": 1465 + }, + { + "epoch": 3.8527284681130833, + "high_lr": 0.00022894736842105263, + "low_lr": 4.578947368421053e-06, + "step": 1465 + }, + { + "epoch": 3.8527284681130833, + "high_lr": 0.00022894736842105263, + "low_lr": 4.578947368421053e-06, + "step": 1465 + }, + { + "epoch": 3.8527284681130833, + "high_lr": 0.00022894736842105263, + "low_lr": 4.578947368421053e-06, + "step": 1465 + }, + { + "epoch": 3.8553583168967784, + "grad_norm": 1.4256298542022705, + "learning_rate": 0.00022842105263157895, + "loss": 1.3053, + "step": 1466 + }, + { + "epoch": 3.8553583168967784, + "high_lr": 0.00022842105263157895, + "low_lr": 4.568421052631579e-06, + "step": 1466 + }, + { + "epoch": 3.8553583168967784, + "high_lr": 0.00022842105263157895, + "low_lr": 4.568421052631579e-06, + "step": 1466 + }, + { + "epoch": 3.8553583168967784, + "high_lr": 0.00022842105263157895, + "low_lr": 4.568421052631579e-06, + "step": 1466 + }, + { + "epoch": 3.8553583168967784, + "high_lr": 0.00022842105263157895, + "low_lr": 4.568421052631579e-06, + "step": 1466 + }, + { + "epoch": 3.8553583168967784, + "high_lr": 0.00022842105263157895, + "low_lr": 4.568421052631579e-06, + "step": 1466 + }, + { + "epoch": 3.8553583168967784, + "high_lr": 0.00022842105263157895, + "low_lr": 4.568421052631579e-06, + "step": 1466 + }, + { + "epoch": 3.8553583168967784, + "high_lr": 0.00022842105263157895, + "low_lr": 4.568421052631579e-06, + "step": 1466 + }, + { + "epoch": 3.8553583168967784, + "high_lr": 0.00022842105263157895, + "low_lr": 4.568421052631579e-06, + "step": 1466 + }, + { + "epoch": 3.8579881656804735, + "grad_norm": 1.4660565853118896, + "learning_rate": 0.00022789473684210526, + "loss": 1.2828, + "step": 1467 + }, + { + "epoch": 3.8579881656804735, + "high_lr": 0.00022789473684210526, + "low_lr": 4.557894736842105e-06, + "step": 1467 + }, + { + "epoch": 3.8579881656804735, + "high_lr": 0.00022789473684210526, + "low_lr": 4.557894736842105e-06, + "step": 1467 + }, + { + "epoch": 3.8579881656804735, + "high_lr": 0.00022789473684210526, + "low_lr": 4.557894736842105e-06, + "step": 1467 + }, + { + "epoch": 3.8579881656804735, + "high_lr": 0.00022789473684210526, + "low_lr": 4.557894736842105e-06, + "step": 1467 + }, + { + "epoch": 3.8579881656804735, + "high_lr": 0.00022789473684210526, + "low_lr": 4.557894736842105e-06, + "step": 1467 + }, + { + "epoch": 3.8579881656804735, + "high_lr": 0.00022789473684210526, + "low_lr": 4.557894736842105e-06, + "step": 1467 + }, + { + "epoch": 3.8579881656804735, + "high_lr": 0.00022789473684210526, + "low_lr": 4.557894736842105e-06, + "step": 1467 + }, + { + "epoch": 3.8579881656804735, + "high_lr": 0.00022789473684210526, + "low_lr": 4.557894736842105e-06, + "step": 1467 + }, + { + "epoch": 3.860618014464168, + "grad_norm": 1.4297759532928467, + "learning_rate": 0.0002273684210526316, + "loss": 1.3214, + "step": 1468 + }, + { + "epoch": 3.860618014464168, + "high_lr": 0.0002273684210526316, + "low_lr": 4.547368421052632e-06, + "step": 1468 + }, + { + "epoch": 3.860618014464168, + "high_lr": 0.0002273684210526316, + "low_lr": 4.547368421052632e-06, + "step": 1468 + }, + { + "epoch": 3.860618014464168, + "high_lr": 0.0002273684210526316, + "low_lr": 4.547368421052632e-06, + "step": 1468 + }, + { + "epoch": 3.860618014464168, + "high_lr": 0.0002273684210526316, + "low_lr": 4.547368421052632e-06, + "step": 1468 + }, + { + "epoch": 3.860618014464168, + "high_lr": 0.0002273684210526316, + "low_lr": 4.547368421052632e-06, + "step": 1468 + }, + { + "epoch": 3.860618014464168, + "high_lr": 0.0002273684210526316, + "low_lr": 4.547368421052632e-06, + "step": 1468 + }, + { + "epoch": 3.860618014464168, + "high_lr": 0.0002273684210526316, + "low_lr": 4.547368421052632e-06, + "step": 1468 + }, + { + "epoch": 3.860618014464168, + "high_lr": 0.0002273684210526316, + "low_lr": 4.547368421052632e-06, + "step": 1468 + }, + { + "epoch": 3.8632478632478633, + "grad_norm": 1.435152292251587, + "learning_rate": 0.0002268421052631579, + "loss": 1.2979, + "step": 1469 + }, + { + "epoch": 3.8632478632478633, + "high_lr": 0.0002268421052631579, + "low_lr": 4.536842105263158e-06, + "step": 1469 + }, + { + "epoch": 3.8632478632478633, + "high_lr": 0.0002268421052631579, + "low_lr": 4.536842105263158e-06, + "step": 1469 + }, + { + "epoch": 3.8632478632478633, + "high_lr": 0.0002268421052631579, + "low_lr": 4.536842105263158e-06, + "step": 1469 + }, + { + "epoch": 3.8632478632478633, + "high_lr": 0.0002268421052631579, + "low_lr": 4.536842105263158e-06, + "step": 1469 + }, + { + "epoch": 3.8632478632478633, + "high_lr": 0.0002268421052631579, + "low_lr": 4.536842105263158e-06, + "step": 1469 + }, + { + "epoch": 3.8632478632478633, + "high_lr": 0.0002268421052631579, + "low_lr": 4.536842105263158e-06, + "step": 1469 + }, + { + "epoch": 3.8632478632478633, + "high_lr": 0.0002268421052631579, + "low_lr": 4.536842105263158e-06, + "step": 1469 + }, + { + "epoch": 3.8632478632478633, + "high_lr": 0.0002268421052631579, + "low_lr": 4.536842105263158e-06, + "step": 1469 + }, + { + "epoch": 3.8658777120315584, + "grad_norm": 1.4923878908157349, + "learning_rate": 0.00022631578947368422, + "loss": 1.3003, + "step": 1470 + }, + { + "epoch": 3.8658777120315584, + "high_lr": 0.00022631578947368422, + "low_lr": 4.526315789473685e-06, + "step": 1470 + }, + { + "epoch": 3.8658777120315584, + "high_lr": 0.00022631578947368422, + "low_lr": 4.526315789473685e-06, + "step": 1470 + }, + { + "epoch": 3.8658777120315584, + "high_lr": 0.00022631578947368422, + "low_lr": 4.526315789473685e-06, + "step": 1470 + }, + { + "epoch": 3.8658777120315584, + "high_lr": 0.00022631578947368422, + "low_lr": 4.526315789473685e-06, + "step": 1470 + }, + { + "epoch": 3.8658777120315584, + "high_lr": 0.00022631578947368422, + "low_lr": 4.526315789473685e-06, + "step": 1470 + }, + { + "epoch": 3.8658777120315584, + "high_lr": 0.00022631578947368422, + "low_lr": 4.526315789473685e-06, + "step": 1470 + }, + { + "epoch": 3.8658777120315584, + "high_lr": 0.00022631578947368422, + "low_lr": 4.526315789473685e-06, + "step": 1470 + }, + { + "epoch": 3.8658777120315584, + "high_lr": 0.00022631578947368422, + "low_lr": 4.526315789473685e-06, + "step": 1470 + }, + { + "epoch": 3.868507560815253, + "grad_norm": 1.4546798467636108, + "learning_rate": 0.00022578947368421054, + "loss": 1.3611, + "step": 1471 + }, + { + "epoch": 3.868507560815253, + "high_lr": 0.00022578947368421054, + "low_lr": 4.5157894736842115e-06, + "step": 1471 + }, + { + "epoch": 3.868507560815253, + "high_lr": 0.00022578947368421054, + "low_lr": 4.5157894736842115e-06, + "step": 1471 + }, + { + "epoch": 3.868507560815253, + "high_lr": 0.00022578947368421054, + "low_lr": 4.5157894736842115e-06, + "step": 1471 + }, + { + "epoch": 3.868507560815253, + "high_lr": 0.00022578947368421054, + "low_lr": 4.5157894736842115e-06, + "step": 1471 + }, + { + "epoch": 3.868507560815253, + "high_lr": 0.00022578947368421054, + "low_lr": 4.5157894736842115e-06, + "step": 1471 + }, + { + "epoch": 3.868507560815253, + "high_lr": 0.00022578947368421054, + "low_lr": 4.5157894736842115e-06, + "step": 1471 + }, + { + "epoch": 3.868507560815253, + "high_lr": 0.00022578947368421054, + "low_lr": 4.5157894736842115e-06, + "step": 1471 + }, + { + "epoch": 3.868507560815253, + "high_lr": 0.00022578947368421054, + "low_lr": 4.5157894736842115e-06, + "step": 1471 + }, + { + "epoch": 3.871137409598948, + "grad_norm": 1.4836859703063965, + "learning_rate": 0.00022526315789473682, + "loss": 1.311, + "step": 1472 + }, + { + "epoch": 3.871137409598948, + "high_lr": 0.00022526315789473682, + "low_lr": 4.505263157894737e-06, + "step": 1472 + }, + { + "epoch": 3.871137409598948, + "high_lr": 0.00022526315789473682, + "low_lr": 4.505263157894737e-06, + "step": 1472 + }, + { + "epoch": 3.871137409598948, + "high_lr": 0.00022526315789473682, + "low_lr": 4.505263157894737e-06, + "step": 1472 + }, + { + "epoch": 3.871137409598948, + "high_lr": 0.00022526315789473682, + "low_lr": 4.505263157894737e-06, + "step": 1472 + }, + { + "epoch": 3.871137409598948, + "high_lr": 0.00022526315789473682, + "low_lr": 4.505263157894737e-06, + "step": 1472 + }, + { + "epoch": 3.871137409598948, + "high_lr": 0.00022526315789473682, + "low_lr": 4.505263157894737e-06, + "step": 1472 + }, + { + "epoch": 3.871137409598948, + "high_lr": 0.00022526315789473682, + "low_lr": 4.505263157894737e-06, + "step": 1472 + }, + { + "epoch": 3.871137409598948, + "high_lr": 0.00022526315789473682, + "low_lr": 4.505263157894737e-06, + "step": 1472 + }, + { + "epoch": 3.8737672583826432, + "grad_norm": 1.4043117761611938, + "learning_rate": 0.00022473684210526316, + "loss": 1.2994, + "step": 1473 + }, + { + "epoch": 3.8737672583826432, + "high_lr": 0.00022473684210526316, + "low_lr": 4.494736842105263e-06, + "step": 1473 + }, + { + "epoch": 3.8737672583826432, + "high_lr": 0.00022473684210526316, + "low_lr": 4.494736842105263e-06, + "step": 1473 + }, + { + "epoch": 3.8737672583826432, + "high_lr": 0.00022473684210526316, + "low_lr": 4.494736842105263e-06, + "step": 1473 + }, + { + "epoch": 3.8737672583826432, + "high_lr": 0.00022473684210526316, + "low_lr": 4.494736842105263e-06, + "step": 1473 + }, + { + "epoch": 3.8737672583826432, + "high_lr": 0.00022473684210526316, + "low_lr": 4.494736842105263e-06, + "step": 1473 + }, + { + "epoch": 3.8737672583826432, + "high_lr": 0.00022473684210526316, + "low_lr": 4.494736842105263e-06, + "step": 1473 + }, + { + "epoch": 3.8737672583826432, + "high_lr": 0.00022473684210526316, + "low_lr": 4.494736842105263e-06, + "step": 1473 + }, + { + "epoch": 3.8737672583826432, + "high_lr": 0.00022473684210526316, + "low_lr": 4.494736842105263e-06, + "step": 1473 + }, + { + "epoch": 3.876397107166338, + "grad_norm": 1.354508876800537, + "learning_rate": 0.00022421052631578948, + "loss": 1.3001, + "step": 1474 + }, + { + "epoch": 3.876397107166338, + "high_lr": 0.00022421052631578948, + "low_lr": 4.48421052631579e-06, + "step": 1474 + }, + { + "epoch": 3.876397107166338, + "high_lr": 0.00022421052631578948, + "low_lr": 4.48421052631579e-06, + "step": 1474 + }, + { + "epoch": 3.876397107166338, + "high_lr": 0.00022421052631578948, + "low_lr": 4.48421052631579e-06, + "step": 1474 + }, + { + "epoch": 3.876397107166338, + "high_lr": 0.00022421052631578948, + "low_lr": 4.48421052631579e-06, + "step": 1474 + }, + { + "epoch": 3.876397107166338, + "high_lr": 0.00022421052631578948, + "low_lr": 4.48421052631579e-06, + "step": 1474 + }, + { + "epoch": 3.876397107166338, + "high_lr": 0.00022421052631578948, + "low_lr": 4.48421052631579e-06, + "step": 1474 + }, + { + "epoch": 3.876397107166338, + "high_lr": 0.00022421052631578948, + "low_lr": 4.48421052631579e-06, + "step": 1474 + }, + { + "epoch": 3.876397107166338, + "high_lr": 0.00022421052631578948, + "low_lr": 4.48421052631579e-06, + "step": 1474 + }, + { + "epoch": 3.879026955950033, + "grad_norm": 1.5343304872512817, + "learning_rate": 0.0002236842105263158, + "loss": 1.3185, + "step": 1475 + }, + { + "epoch": 3.879026955950033, + "high_lr": 0.0002236842105263158, + "low_lr": 4.473684210526316e-06, + "step": 1475 + }, + { + "epoch": 3.879026955950033, + "high_lr": 0.0002236842105263158, + "low_lr": 4.473684210526316e-06, + "step": 1475 + }, + { + "epoch": 3.879026955950033, + "high_lr": 0.0002236842105263158, + "low_lr": 4.473684210526316e-06, + "step": 1475 + }, + { + "epoch": 3.879026955950033, + "high_lr": 0.0002236842105263158, + "low_lr": 4.473684210526316e-06, + "step": 1475 + }, + { + "epoch": 3.879026955950033, + "high_lr": 0.0002236842105263158, + "low_lr": 4.473684210526316e-06, + "step": 1475 + }, + { + "epoch": 3.879026955950033, + "high_lr": 0.0002236842105263158, + "low_lr": 4.473684210526316e-06, + "step": 1475 + }, + { + "epoch": 3.879026955950033, + "high_lr": 0.0002236842105263158, + "low_lr": 4.473684210526316e-06, + "step": 1475 + }, + { + "epoch": 3.879026955950033, + "high_lr": 0.0002236842105263158, + "low_lr": 4.473684210526316e-06, + "step": 1475 + }, + { + "epoch": 3.8816568047337277, + "grad_norm": 1.3767120838165283, + "learning_rate": 0.0002231578947368421, + "loss": 1.2882, + "step": 1476 + }, + { + "epoch": 3.8816568047337277, + "high_lr": 0.0002231578947368421, + "low_lr": 4.463157894736842e-06, + "step": 1476 + }, + { + "epoch": 3.8816568047337277, + "high_lr": 0.0002231578947368421, + "low_lr": 4.463157894736842e-06, + "step": 1476 + }, + { + "epoch": 3.8816568047337277, + "high_lr": 0.0002231578947368421, + "low_lr": 4.463157894736842e-06, + "step": 1476 + }, + { + "epoch": 3.8816568047337277, + "high_lr": 0.0002231578947368421, + "low_lr": 4.463157894736842e-06, + "step": 1476 + }, + { + "epoch": 3.8816568047337277, + "high_lr": 0.0002231578947368421, + "low_lr": 4.463157894736842e-06, + "step": 1476 + }, + { + "epoch": 3.8816568047337277, + "high_lr": 0.0002231578947368421, + "low_lr": 4.463157894736842e-06, + "step": 1476 + }, + { + "epoch": 3.8816568047337277, + "high_lr": 0.0002231578947368421, + "low_lr": 4.463157894736842e-06, + "step": 1476 + }, + { + "epoch": 3.8816568047337277, + "high_lr": 0.0002231578947368421, + "low_lr": 4.463157894736842e-06, + "step": 1476 + }, + { + "epoch": 3.8842866535174227, + "grad_norm": 1.4227728843688965, + "learning_rate": 0.00022263157894736844, + "loss": 1.3117, + "step": 1477 + }, + { + "epoch": 3.8842866535174227, + "high_lr": 0.00022263157894736844, + "low_lr": 4.452631578947369e-06, + "step": 1477 + }, + { + "epoch": 3.8842866535174227, + "high_lr": 0.00022263157894736844, + "low_lr": 4.452631578947369e-06, + "step": 1477 + }, + { + "epoch": 3.8842866535174227, + "high_lr": 0.00022263157894736844, + "low_lr": 4.452631578947369e-06, + "step": 1477 + }, + { + "epoch": 3.8842866535174227, + "high_lr": 0.00022263157894736844, + "low_lr": 4.452631578947369e-06, + "step": 1477 + }, + { + "epoch": 3.8842866535174227, + "high_lr": 0.00022263157894736844, + "low_lr": 4.452631578947369e-06, + "step": 1477 + }, + { + "epoch": 3.8842866535174227, + "high_lr": 0.00022263157894736844, + "low_lr": 4.452631578947369e-06, + "step": 1477 + }, + { + "epoch": 3.8842866535174227, + "high_lr": 0.00022263157894736844, + "low_lr": 4.452631578947369e-06, + "step": 1477 + }, + { + "epoch": 3.8842866535174227, + "high_lr": 0.00022263157894736844, + "low_lr": 4.452631578947369e-06, + "step": 1477 + }, + { + "epoch": 3.886916502301118, + "grad_norm": 1.4930652379989624, + "learning_rate": 0.00022210526315789476, + "loss": 1.3118, + "step": 1478 + }, + { + "epoch": 3.886916502301118, + "high_lr": 0.00022210526315789476, + "low_lr": 4.442105263157896e-06, + "step": 1478 + }, + { + "epoch": 3.886916502301118, + "high_lr": 0.00022210526315789476, + "low_lr": 4.442105263157896e-06, + "step": 1478 + }, + { + "epoch": 3.886916502301118, + "high_lr": 0.00022210526315789476, + "low_lr": 4.442105263157896e-06, + "step": 1478 + }, + { + "epoch": 3.886916502301118, + "high_lr": 0.00022210526315789476, + "low_lr": 4.442105263157896e-06, + "step": 1478 + }, + { + "epoch": 3.886916502301118, + "high_lr": 0.00022210526315789476, + "low_lr": 4.442105263157896e-06, + "step": 1478 + }, + { + "epoch": 3.886916502301118, + "high_lr": 0.00022210526315789476, + "low_lr": 4.442105263157896e-06, + "step": 1478 + }, + { + "epoch": 3.886916502301118, + "high_lr": 0.00022210526315789476, + "low_lr": 4.442105263157896e-06, + "step": 1478 + }, + { + "epoch": 3.886916502301118, + "high_lr": 0.00022210526315789476, + "low_lr": 4.442105263157896e-06, + "step": 1478 + }, + { + "epoch": 3.8895463510848125, + "grad_norm": 1.4825832843780518, + "learning_rate": 0.00022157894736842104, + "loss": 1.336, + "step": 1479 + }, + { + "epoch": 3.8895463510848125, + "high_lr": 0.00022157894736842104, + "low_lr": 4.431578947368421e-06, + "step": 1479 + }, + { + "epoch": 3.8895463510848125, + "high_lr": 0.00022157894736842104, + "low_lr": 4.431578947368421e-06, + "step": 1479 + }, + { + "epoch": 3.8895463510848125, + "high_lr": 0.00022157894736842104, + "low_lr": 4.431578947368421e-06, + "step": 1479 + }, + { + "epoch": 3.8895463510848125, + "high_lr": 0.00022157894736842104, + "low_lr": 4.431578947368421e-06, + "step": 1479 + }, + { + "epoch": 3.8895463510848125, + "high_lr": 0.00022157894736842104, + "low_lr": 4.431578947368421e-06, + "step": 1479 + }, + { + "epoch": 3.8895463510848125, + "high_lr": 0.00022157894736842104, + "low_lr": 4.431578947368421e-06, + "step": 1479 + }, + { + "epoch": 3.8895463510848125, + "high_lr": 0.00022157894736842104, + "low_lr": 4.431578947368421e-06, + "step": 1479 + }, + { + "epoch": 3.8895463510848125, + "high_lr": 0.00022157894736842104, + "low_lr": 4.431578947368421e-06, + "step": 1479 + }, + { + "epoch": 3.8921761998685076, + "grad_norm": 1.572178602218628, + "learning_rate": 0.00022105263157894735, + "loss": 1.3435, + "step": 1480 + }, + { + "epoch": 3.8921761998685076, + "high_lr": 0.00022105263157894735, + "low_lr": 4.4210526315789476e-06, + "step": 1480 + }, + { + "epoch": 3.8921761998685076, + "high_lr": 0.00022105263157894735, + "low_lr": 4.4210526315789476e-06, + "step": 1480 + }, + { + "epoch": 3.8921761998685076, + "high_lr": 0.00022105263157894735, + "low_lr": 4.4210526315789476e-06, + "step": 1480 + }, + { + "epoch": 3.8921761998685076, + "high_lr": 0.00022105263157894735, + "low_lr": 4.4210526315789476e-06, + "step": 1480 + }, + { + "epoch": 3.8921761998685076, + "high_lr": 0.00022105263157894735, + "low_lr": 4.4210526315789476e-06, + "step": 1480 + }, + { + "epoch": 3.8921761998685076, + "high_lr": 0.00022105263157894735, + "low_lr": 4.4210526315789476e-06, + "step": 1480 + }, + { + "epoch": 3.8921761998685076, + "high_lr": 0.00022105263157894735, + "low_lr": 4.4210526315789476e-06, + "step": 1480 + }, + { + "epoch": 3.8921761998685076, + "high_lr": 0.00022105263157894735, + "low_lr": 4.4210526315789476e-06, + "step": 1480 + }, + { + "epoch": 3.8948060486522023, + "grad_norm": 1.5475068092346191, + "learning_rate": 0.0002205263157894737, + "loss": 1.2861, + "step": 1481 + }, + { + "epoch": 3.8948060486522023, + "high_lr": 0.0002205263157894737, + "low_lr": 4.410526315789474e-06, + "step": 1481 + }, + { + "epoch": 3.8948060486522023, + "high_lr": 0.0002205263157894737, + "low_lr": 4.410526315789474e-06, + "step": 1481 + }, + { + "epoch": 3.8948060486522023, + "high_lr": 0.0002205263157894737, + "low_lr": 4.410526315789474e-06, + "step": 1481 + }, + { + "epoch": 3.8948060486522023, + "high_lr": 0.0002205263157894737, + "low_lr": 4.410526315789474e-06, + "step": 1481 + }, + { + "epoch": 3.8948060486522023, + "high_lr": 0.0002205263157894737, + "low_lr": 4.410526315789474e-06, + "step": 1481 + }, + { + "epoch": 3.8948060486522023, + "high_lr": 0.0002205263157894737, + "low_lr": 4.410526315789474e-06, + "step": 1481 + }, + { + "epoch": 3.8948060486522023, + "high_lr": 0.0002205263157894737, + "low_lr": 4.410526315789474e-06, + "step": 1481 + }, + { + "epoch": 3.8948060486522023, + "high_lr": 0.0002205263157894737, + "low_lr": 4.410526315789474e-06, + "step": 1481 + }, + { + "epoch": 3.8974358974358974, + "grad_norm": 1.3821595907211304, + "learning_rate": 0.00022, + "loss": 1.3257, + "step": 1482 + }, + { + "epoch": 3.8974358974358974, + "high_lr": 0.00022, + "low_lr": 4.4e-06, + "step": 1482 + }, + { + "epoch": 3.8974358974358974, + "high_lr": 0.00022, + "low_lr": 4.4e-06, + "step": 1482 + }, + { + "epoch": 3.8974358974358974, + "high_lr": 0.00022, + "low_lr": 4.4e-06, + "step": 1482 + }, + { + "epoch": 3.8974358974358974, + "high_lr": 0.00022, + "low_lr": 4.4e-06, + "step": 1482 + }, + { + "epoch": 3.8974358974358974, + "high_lr": 0.00022, + "low_lr": 4.4e-06, + "step": 1482 + }, + { + "epoch": 3.8974358974358974, + "high_lr": 0.00022, + "low_lr": 4.4e-06, + "step": 1482 + }, + { + "epoch": 3.8974358974358974, + "high_lr": 0.00022, + "low_lr": 4.4e-06, + "step": 1482 + }, + { + "epoch": 3.8974358974358974, + "high_lr": 0.00022, + "low_lr": 4.4e-06, + "step": 1482 + }, + { + "epoch": 3.9000657462195925, + "grad_norm": 1.5113525390625, + "learning_rate": 0.00021947368421052632, + "loss": 1.3, + "step": 1483 + }, + { + "epoch": 3.9000657462195925, + "high_lr": 0.00021947368421052632, + "low_lr": 4.3894736842105266e-06, + "step": 1483 + }, + { + "epoch": 3.9000657462195925, + "high_lr": 0.00021947368421052632, + "low_lr": 4.3894736842105266e-06, + "step": 1483 + }, + { + "epoch": 3.9000657462195925, + "high_lr": 0.00021947368421052632, + "low_lr": 4.3894736842105266e-06, + "step": 1483 + }, + { + "epoch": 3.9000657462195925, + "high_lr": 0.00021947368421052632, + "low_lr": 4.3894736842105266e-06, + "step": 1483 + }, + { + "epoch": 3.9000657462195925, + "high_lr": 0.00021947368421052632, + "low_lr": 4.3894736842105266e-06, + "step": 1483 + }, + { + "epoch": 3.9000657462195925, + "high_lr": 0.00021947368421052632, + "low_lr": 4.3894736842105266e-06, + "step": 1483 + }, + { + "epoch": 3.9000657462195925, + "high_lr": 0.00021947368421052632, + "low_lr": 4.3894736842105266e-06, + "step": 1483 + }, + { + "epoch": 3.9000657462195925, + "high_lr": 0.00021947368421052632, + "low_lr": 4.3894736842105266e-06, + "step": 1483 + }, + { + "epoch": 3.902695595003287, + "grad_norm": 1.4746077060699463, + "learning_rate": 0.00021894736842105263, + "loss": 1.3474, + "step": 1484 + }, + { + "epoch": 3.902695595003287, + "high_lr": 0.00021894736842105263, + "low_lr": 4.378947368421053e-06, + "step": 1484 + }, + { + "epoch": 3.902695595003287, + "high_lr": 0.00021894736842105263, + "low_lr": 4.378947368421053e-06, + "step": 1484 + }, + { + "epoch": 3.902695595003287, + "high_lr": 0.00021894736842105263, + "low_lr": 4.378947368421053e-06, + "step": 1484 + }, + { + "epoch": 3.902695595003287, + "high_lr": 0.00021894736842105263, + "low_lr": 4.378947368421053e-06, + "step": 1484 + }, + { + "epoch": 3.902695595003287, + "high_lr": 0.00021894736842105263, + "low_lr": 4.378947368421053e-06, + "step": 1484 + }, + { + "epoch": 3.902695595003287, + "high_lr": 0.00021894736842105263, + "low_lr": 4.378947368421053e-06, + "step": 1484 + }, + { + "epoch": 3.902695595003287, + "high_lr": 0.00021894736842105263, + "low_lr": 4.378947368421053e-06, + "step": 1484 + }, + { + "epoch": 3.902695595003287, + "high_lr": 0.00021894736842105263, + "low_lr": 4.378947368421053e-06, + "step": 1484 + }, + { + "epoch": 3.905325443786982, + "grad_norm": 1.3812576532363892, + "learning_rate": 0.00021842105263157897, + "loss": 1.2668, + "step": 1485 + }, + { + "epoch": 3.905325443786982, + "high_lr": 0.00021842105263157897, + "low_lr": 4.368421052631579e-06, + "step": 1485 + }, + { + "epoch": 3.905325443786982, + "high_lr": 0.00021842105263157897, + "low_lr": 4.368421052631579e-06, + "step": 1485 + }, + { + "epoch": 3.905325443786982, + "high_lr": 0.00021842105263157897, + "low_lr": 4.368421052631579e-06, + "step": 1485 + }, + { + "epoch": 3.905325443786982, + "high_lr": 0.00021842105263157897, + "low_lr": 4.368421052631579e-06, + "step": 1485 + }, + { + "epoch": 3.905325443786982, + "high_lr": 0.00021842105263157897, + "low_lr": 4.368421052631579e-06, + "step": 1485 + }, + { + "epoch": 3.905325443786982, + "high_lr": 0.00021842105263157897, + "low_lr": 4.368421052631579e-06, + "step": 1485 + }, + { + "epoch": 3.905325443786982, + "high_lr": 0.00021842105263157897, + "low_lr": 4.368421052631579e-06, + "step": 1485 + }, + { + "epoch": 3.905325443786982, + "high_lr": 0.00021842105263157897, + "low_lr": 4.368421052631579e-06, + "step": 1485 + }, + { + "epoch": 3.9079552925706773, + "grad_norm": 1.5691782236099243, + "learning_rate": 0.00021789473684210526, + "loss": 1.3194, + "step": 1486 + }, + { + "epoch": 3.9079552925706773, + "high_lr": 0.00021789473684210526, + "low_lr": 4.3578947368421055e-06, + "step": 1486 + }, + { + "epoch": 3.9079552925706773, + "high_lr": 0.00021789473684210526, + "low_lr": 4.3578947368421055e-06, + "step": 1486 + }, + { + "epoch": 3.9079552925706773, + "high_lr": 0.00021789473684210526, + "low_lr": 4.3578947368421055e-06, + "step": 1486 + }, + { + "epoch": 3.9079552925706773, + "high_lr": 0.00021789473684210526, + "low_lr": 4.3578947368421055e-06, + "step": 1486 + }, + { + "epoch": 3.9079552925706773, + "high_lr": 0.00021789473684210526, + "low_lr": 4.3578947368421055e-06, + "step": 1486 + }, + { + "epoch": 3.9079552925706773, + "high_lr": 0.00021789473684210526, + "low_lr": 4.3578947368421055e-06, + "step": 1486 + }, + { + "epoch": 3.9079552925706773, + "high_lr": 0.00021789473684210526, + "low_lr": 4.3578947368421055e-06, + "step": 1486 + }, + { + "epoch": 3.9079552925706773, + "high_lr": 0.00021789473684210526, + "low_lr": 4.3578947368421055e-06, + "step": 1486 + }, + { + "epoch": 3.910585141354372, + "grad_norm": 1.395737886428833, + "learning_rate": 0.00021736842105263157, + "loss": 1.3049, + "step": 1487 + }, + { + "epoch": 3.910585141354372, + "high_lr": 0.00021736842105263157, + "low_lr": 4.347368421052632e-06, + "step": 1487 + }, + { + "epoch": 3.910585141354372, + "high_lr": 0.00021736842105263157, + "low_lr": 4.347368421052632e-06, + "step": 1487 + }, + { + "epoch": 3.910585141354372, + "high_lr": 0.00021736842105263157, + "low_lr": 4.347368421052632e-06, + "step": 1487 + }, + { + "epoch": 3.910585141354372, + "high_lr": 0.00021736842105263157, + "low_lr": 4.347368421052632e-06, + "step": 1487 + }, + { + "epoch": 3.910585141354372, + "high_lr": 0.00021736842105263157, + "low_lr": 4.347368421052632e-06, + "step": 1487 + }, + { + "epoch": 3.910585141354372, + "high_lr": 0.00021736842105263157, + "low_lr": 4.347368421052632e-06, + "step": 1487 + }, + { + "epoch": 3.910585141354372, + "high_lr": 0.00021736842105263157, + "low_lr": 4.347368421052632e-06, + "step": 1487 + }, + { + "epoch": 3.910585141354372, + "high_lr": 0.00021736842105263157, + "low_lr": 4.347368421052632e-06, + "step": 1487 + }, + { + "epoch": 3.913214990138067, + "grad_norm": 1.4675577878952026, + "learning_rate": 0.00021684210526315789, + "loss": 1.3194, + "step": 1488 + }, + { + "epoch": 3.913214990138067, + "high_lr": 0.00021684210526315789, + "low_lr": 4.336842105263158e-06, + "step": 1488 + }, + { + "epoch": 3.913214990138067, + "high_lr": 0.00021684210526315789, + "low_lr": 4.336842105263158e-06, + "step": 1488 + }, + { + "epoch": 3.913214990138067, + "high_lr": 0.00021684210526315789, + "low_lr": 4.336842105263158e-06, + "step": 1488 + }, + { + "epoch": 3.913214990138067, + "high_lr": 0.00021684210526315789, + "low_lr": 4.336842105263158e-06, + "step": 1488 + }, + { + "epoch": 3.913214990138067, + "high_lr": 0.00021684210526315789, + "low_lr": 4.336842105263158e-06, + "step": 1488 + }, + { + "epoch": 3.913214990138067, + "high_lr": 0.00021684210526315789, + "low_lr": 4.336842105263158e-06, + "step": 1488 + }, + { + "epoch": 3.913214990138067, + "high_lr": 0.00021684210526315789, + "low_lr": 4.336842105263158e-06, + "step": 1488 + }, + { + "epoch": 3.913214990138067, + "high_lr": 0.00021684210526315789, + "low_lr": 4.336842105263158e-06, + "step": 1488 + }, + { + "epoch": 3.915844838921762, + "grad_norm": 1.4874144792556763, + "learning_rate": 0.00021631578947368423, + "loss": 1.2999, + "step": 1489 + }, + { + "epoch": 3.915844838921762, + "high_lr": 0.00021631578947368423, + "low_lr": 4.3263157894736845e-06, + "step": 1489 + }, + { + "epoch": 3.915844838921762, + "high_lr": 0.00021631578947368423, + "low_lr": 4.3263157894736845e-06, + "step": 1489 + }, + { + "epoch": 3.915844838921762, + "high_lr": 0.00021631578947368423, + "low_lr": 4.3263157894736845e-06, + "step": 1489 + }, + { + "epoch": 3.915844838921762, + "high_lr": 0.00021631578947368423, + "low_lr": 4.3263157894736845e-06, + "step": 1489 + }, + { + "epoch": 3.915844838921762, + "high_lr": 0.00021631578947368423, + "low_lr": 4.3263157894736845e-06, + "step": 1489 + }, + { + "epoch": 3.915844838921762, + "high_lr": 0.00021631578947368423, + "low_lr": 4.3263157894736845e-06, + "step": 1489 + }, + { + "epoch": 3.915844838921762, + "high_lr": 0.00021631578947368423, + "low_lr": 4.3263157894736845e-06, + "step": 1489 + }, + { + "epoch": 3.915844838921762, + "high_lr": 0.00021631578947368423, + "low_lr": 4.3263157894736845e-06, + "step": 1489 + }, + { + "epoch": 3.918474687705457, + "grad_norm": 1.4818798303604126, + "learning_rate": 0.00021578947368421054, + "loss": 1.3418, + "step": 1490 + }, + { + "epoch": 3.918474687705457, + "high_lr": 0.00021578947368421054, + "low_lr": 4.315789473684211e-06, + "step": 1490 + }, + { + "epoch": 3.918474687705457, + "high_lr": 0.00021578947368421054, + "low_lr": 4.315789473684211e-06, + "step": 1490 + }, + { + "epoch": 3.918474687705457, + "high_lr": 0.00021578947368421054, + "low_lr": 4.315789473684211e-06, + "step": 1490 + }, + { + "epoch": 3.918474687705457, + "high_lr": 0.00021578947368421054, + "low_lr": 4.315789473684211e-06, + "step": 1490 + }, + { + "epoch": 3.918474687705457, + "high_lr": 0.00021578947368421054, + "low_lr": 4.315789473684211e-06, + "step": 1490 + }, + { + "epoch": 3.918474687705457, + "high_lr": 0.00021578947368421054, + "low_lr": 4.315789473684211e-06, + "step": 1490 + }, + { + "epoch": 3.918474687705457, + "high_lr": 0.00021578947368421054, + "low_lr": 4.315789473684211e-06, + "step": 1490 + }, + { + "epoch": 3.918474687705457, + "high_lr": 0.00021578947368421054, + "low_lr": 4.315789473684211e-06, + "step": 1490 + }, + { + "epoch": 3.921104536489152, + "grad_norm": 1.362735629081726, + "learning_rate": 0.00021526315789473685, + "loss": 1.2931, + "step": 1491 + }, + { + "epoch": 3.921104536489152, + "high_lr": 0.00021526315789473685, + "low_lr": 4.305263157894737e-06, + "step": 1491 + }, + { + "epoch": 3.921104536489152, + "high_lr": 0.00021526315789473685, + "low_lr": 4.305263157894737e-06, + "step": 1491 + }, + { + "epoch": 3.921104536489152, + "high_lr": 0.00021526315789473685, + "low_lr": 4.305263157894737e-06, + "step": 1491 + }, + { + "epoch": 3.921104536489152, + "high_lr": 0.00021526315789473685, + "low_lr": 4.305263157894737e-06, + "step": 1491 + }, + { + "epoch": 3.921104536489152, + "high_lr": 0.00021526315789473685, + "low_lr": 4.305263157894737e-06, + "step": 1491 + }, + { + "epoch": 3.921104536489152, + "high_lr": 0.00021526315789473685, + "low_lr": 4.305263157894737e-06, + "step": 1491 + }, + { + "epoch": 3.921104536489152, + "high_lr": 0.00021526315789473685, + "low_lr": 4.305263157894737e-06, + "step": 1491 + }, + { + "epoch": 3.921104536489152, + "high_lr": 0.00021526315789473685, + "low_lr": 4.305263157894737e-06, + "step": 1491 + }, + { + "epoch": 3.923734385272847, + "grad_norm": 1.3892029523849487, + "learning_rate": 0.00021473684210526316, + "loss": 1.2784, + "step": 1492 + }, + { + "epoch": 3.923734385272847, + "high_lr": 0.00021473684210526316, + "low_lr": 4.2947368421052635e-06, + "step": 1492 + }, + { + "epoch": 3.923734385272847, + "high_lr": 0.00021473684210526316, + "low_lr": 4.2947368421052635e-06, + "step": 1492 + }, + { + "epoch": 3.923734385272847, + "high_lr": 0.00021473684210526316, + "low_lr": 4.2947368421052635e-06, + "step": 1492 + }, + { + "epoch": 3.923734385272847, + "high_lr": 0.00021473684210526316, + "low_lr": 4.2947368421052635e-06, + "step": 1492 + }, + { + "epoch": 3.923734385272847, + "high_lr": 0.00021473684210526316, + "low_lr": 4.2947368421052635e-06, + "step": 1492 + }, + { + "epoch": 3.923734385272847, + "high_lr": 0.00021473684210526316, + "low_lr": 4.2947368421052635e-06, + "step": 1492 + }, + { + "epoch": 3.923734385272847, + "high_lr": 0.00021473684210526316, + "low_lr": 4.2947368421052635e-06, + "step": 1492 + }, + { + "epoch": 3.923734385272847, + "high_lr": 0.00021473684210526316, + "low_lr": 4.2947368421052635e-06, + "step": 1492 + }, + { + "epoch": 3.9263642340565417, + "grad_norm": 1.3729498386383057, + "learning_rate": 0.00021421052631578948, + "loss": 1.2919, + "step": 1493 + }, + { + "epoch": 3.9263642340565417, + "high_lr": 0.00021421052631578948, + "low_lr": 4.28421052631579e-06, + "step": 1493 + }, + { + "epoch": 3.9263642340565417, + "high_lr": 0.00021421052631578948, + "low_lr": 4.28421052631579e-06, + "step": 1493 + }, + { + "epoch": 3.9263642340565417, + "high_lr": 0.00021421052631578948, + "low_lr": 4.28421052631579e-06, + "step": 1493 + }, + { + "epoch": 3.9263642340565417, + "high_lr": 0.00021421052631578948, + "low_lr": 4.28421052631579e-06, + "step": 1493 + }, + { + "epoch": 3.9263642340565417, + "high_lr": 0.00021421052631578948, + "low_lr": 4.28421052631579e-06, + "step": 1493 + }, + { + "epoch": 3.9263642340565417, + "high_lr": 0.00021421052631578948, + "low_lr": 4.28421052631579e-06, + "step": 1493 + }, + { + "epoch": 3.9263642340565417, + "high_lr": 0.00021421052631578948, + "low_lr": 4.28421052631579e-06, + "step": 1493 + }, + { + "epoch": 3.9263642340565417, + "high_lr": 0.00021421052631578948, + "low_lr": 4.28421052631579e-06, + "step": 1493 + }, + { + "epoch": 3.9289940828402368, + "grad_norm": 1.4095979928970337, + "learning_rate": 0.0002136842105263158, + "loss": 1.3006, + "step": 1494 + }, + { + "epoch": 3.9289940828402368, + "high_lr": 0.0002136842105263158, + "low_lr": 4.273684210526316e-06, + "step": 1494 + }, + { + "epoch": 3.9289940828402368, + "high_lr": 0.0002136842105263158, + "low_lr": 4.273684210526316e-06, + "step": 1494 + }, + { + "epoch": 3.9289940828402368, + "high_lr": 0.0002136842105263158, + "low_lr": 4.273684210526316e-06, + "step": 1494 + }, + { + "epoch": 3.9289940828402368, + "high_lr": 0.0002136842105263158, + "low_lr": 4.273684210526316e-06, + "step": 1494 + }, + { + "epoch": 3.9289940828402368, + "high_lr": 0.0002136842105263158, + "low_lr": 4.273684210526316e-06, + "step": 1494 + }, + { + "epoch": 3.9289940828402368, + "high_lr": 0.0002136842105263158, + "low_lr": 4.273684210526316e-06, + "step": 1494 + }, + { + "epoch": 3.9289940828402368, + "high_lr": 0.0002136842105263158, + "low_lr": 4.273684210526316e-06, + "step": 1494 + }, + { + "epoch": 3.9289940828402368, + "high_lr": 0.0002136842105263158, + "low_lr": 4.273684210526316e-06, + "step": 1494 + }, + { + "epoch": 3.931623931623932, + "grad_norm": 1.605855107307434, + "learning_rate": 0.0002131578947368421, + "loss": 1.2677, + "step": 1495 + }, + { + "epoch": 3.931623931623932, + "high_lr": 0.0002131578947368421, + "low_lr": 4.2631578947368425e-06, + "step": 1495 + }, + { + "epoch": 3.931623931623932, + "high_lr": 0.0002131578947368421, + "low_lr": 4.2631578947368425e-06, + "step": 1495 + }, + { + "epoch": 3.931623931623932, + "high_lr": 0.0002131578947368421, + "low_lr": 4.2631578947368425e-06, + "step": 1495 + }, + { + "epoch": 3.931623931623932, + "high_lr": 0.0002131578947368421, + "low_lr": 4.2631578947368425e-06, + "step": 1495 + }, + { + "epoch": 3.931623931623932, + "high_lr": 0.0002131578947368421, + "low_lr": 4.2631578947368425e-06, + "step": 1495 + }, + { + "epoch": 3.931623931623932, + "high_lr": 0.0002131578947368421, + "low_lr": 4.2631578947368425e-06, + "step": 1495 + }, + { + "epoch": 3.931623931623932, + "high_lr": 0.0002131578947368421, + "low_lr": 4.2631578947368425e-06, + "step": 1495 + }, + { + "epoch": 3.931623931623932, + "high_lr": 0.0002131578947368421, + "low_lr": 4.2631578947368425e-06, + "step": 1495 + }, + { + "epoch": 3.9342537804076265, + "grad_norm": 1.4077523946762085, + "learning_rate": 0.00021263157894736842, + "loss": 1.2999, + "step": 1496 + }, + { + "epoch": 3.9342537804076265, + "high_lr": 0.00021263157894736842, + "low_lr": 4.252631578947369e-06, + "step": 1496 + }, + { + "epoch": 3.9342537804076265, + "high_lr": 0.00021263157894736842, + "low_lr": 4.252631578947369e-06, + "step": 1496 + }, + { + "epoch": 3.9342537804076265, + "high_lr": 0.00021263157894736842, + "low_lr": 4.252631578947369e-06, + "step": 1496 + }, + { + "epoch": 3.9342537804076265, + "high_lr": 0.00021263157894736842, + "low_lr": 4.252631578947369e-06, + "step": 1496 + }, + { + "epoch": 3.9342537804076265, + "high_lr": 0.00021263157894736842, + "low_lr": 4.252631578947369e-06, + "step": 1496 + }, + { + "epoch": 3.9342537804076265, + "high_lr": 0.00021263157894736842, + "low_lr": 4.252631578947369e-06, + "step": 1496 + }, + { + "epoch": 3.9342537804076265, + "high_lr": 0.00021263157894736842, + "low_lr": 4.252631578947369e-06, + "step": 1496 + }, + { + "epoch": 3.9342537804076265, + "high_lr": 0.00021263157894736842, + "low_lr": 4.252631578947369e-06, + "step": 1496 + }, + { + "epoch": 3.9368836291913216, + "grad_norm": 1.4664191007614136, + "learning_rate": 0.00021210526315789476, + "loss": 1.3139, + "step": 1497 + }, + { + "epoch": 3.9368836291913216, + "high_lr": 0.00021210526315789476, + "low_lr": 4.242105263157895e-06, + "step": 1497 + }, + { + "epoch": 3.9368836291913216, + "high_lr": 0.00021210526315789476, + "low_lr": 4.242105263157895e-06, + "step": 1497 + }, + { + "epoch": 3.9368836291913216, + "high_lr": 0.00021210526315789476, + "low_lr": 4.242105263157895e-06, + "step": 1497 + }, + { + "epoch": 3.9368836291913216, + "high_lr": 0.00021210526315789476, + "low_lr": 4.242105263157895e-06, + "step": 1497 + }, + { + "epoch": 3.9368836291913216, + "high_lr": 0.00021210526315789476, + "low_lr": 4.242105263157895e-06, + "step": 1497 + }, + { + "epoch": 3.9368836291913216, + "high_lr": 0.00021210526315789476, + "low_lr": 4.242105263157895e-06, + "step": 1497 + }, + { + "epoch": 3.9368836291913216, + "high_lr": 0.00021210526315789476, + "low_lr": 4.242105263157895e-06, + "step": 1497 + }, + { + "epoch": 3.9368836291913216, + "high_lr": 0.00021210526315789476, + "low_lr": 4.242105263157895e-06, + "step": 1497 + }, + { + "epoch": 3.9395134779750163, + "grad_norm": 1.480134129524231, + "learning_rate": 0.00021157894736842107, + "loss": 1.2808, + "step": 1498 + }, + { + "epoch": 3.9395134779750163, + "high_lr": 0.00021157894736842107, + "low_lr": 4.2315789473684215e-06, + "step": 1498 + }, + { + "epoch": 3.9395134779750163, + "high_lr": 0.00021157894736842107, + "low_lr": 4.2315789473684215e-06, + "step": 1498 + }, + { + "epoch": 3.9395134779750163, + "high_lr": 0.00021157894736842107, + "low_lr": 4.2315789473684215e-06, + "step": 1498 + }, + { + "epoch": 3.9395134779750163, + "high_lr": 0.00021157894736842107, + "low_lr": 4.2315789473684215e-06, + "step": 1498 + }, + { + "epoch": 3.9395134779750163, + "high_lr": 0.00021157894736842107, + "low_lr": 4.2315789473684215e-06, + "step": 1498 + }, + { + "epoch": 3.9395134779750163, + "high_lr": 0.00021157894736842107, + "low_lr": 4.2315789473684215e-06, + "step": 1498 + }, + { + "epoch": 3.9395134779750163, + "high_lr": 0.00021157894736842107, + "low_lr": 4.2315789473684215e-06, + "step": 1498 + }, + { + "epoch": 3.9395134779750163, + "high_lr": 0.00021157894736842107, + "low_lr": 4.2315789473684215e-06, + "step": 1498 + }, + { + "epoch": 3.9421433267587114, + "grad_norm": 1.5324522256851196, + "learning_rate": 0.00021105263157894738, + "loss": 1.3113, + "step": 1499 + }, + { + "epoch": 3.9421433267587114, + "high_lr": 0.00021105263157894738, + "low_lr": 4.221052631578948e-06, + "step": 1499 + }, + { + "epoch": 3.9421433267587114, + "high_lr": 0.00021105263157894738, + "low_lr": 4.221052631578948e-06, + "step": 1499 + }, + { + "epoch": 3.9421433267587114, + "high_lr": 0.00021105263157894738, + "low_lr": 4.221052631578948e-06, + "step": 1499 + }, + { + "epoch": 3.9421433267587114, + "high_lr": 0.00021105263157894738, + "low_lr": 4.221052631578948e-06, + "step": 1499 + }, + { + "epoch": 3.9421433267587114, + "high_lr": 0.00021105263157894738, + "low_lr": 4.221052631578948e-06, + "step": 1499 + }, + { + "epoch": 3.9421433267587114, + "high_lr": 0.00021105263157894738, + "low_lr": 4.221052631578948e-06, + "step": 1499 + }, + { + "epoch": 3.9421433267587114, + "high_lr": 0.00021105263157894738, + "low_lr": 4.221052631578948e-06, + "step": 1499 + }, + { + "epoch": 3.9421433267587114, + "high_lr": 0.00021105263157894738, + "low_lr": 4.221052631578948e-06, + "step": 1499 + }, + { + "epoch": 3.9447731755424065, + "grad_norm": 1.5184235572814941, + "learning_rate": 0.00021052631578947367, + "loss": 1.2854, + "step": 1500 + }, + { + "epoch": 3.9447731755424065, + "high_lr": 0.00021052631578947367, + "low_lr": 4.210526315789474e-06, + "step": 1500 + }, + { + "epoch": 3.9447731755424065, + "high_lr": 0.00021052631578947367, + "low_lr": 4.210526315789474e-06, + "step": 1500 + }, + { + "epoch": 3.9447731755424065, + "high_lr": 0.00021052631578947367, + "low_lr": 4.210526315789474e-06, + "step": 1500 + }, + { + "epoch": 3.9447731755424065, + "high_lr": 0.00021052631578947367, + "low_lr": 4.210526315789474e-06, + "step": 1500 + }, + { + "epoch": 3.9447731755424065, + "high_lr": 0.00021052631578947367, + "low_lr": 4.210526315789474e-06, + "step": 1500 + }, + { + "epoch": 3.9447731755424065, + "high_lr": 0.00021052631578947367, + "low_lr": 4.210526315789474e-06, + "step": 1500 + }, + { + "epoch": 3.9447731755424065, + "high_lr": 0.00021052631578947367, + "low_lr": 4.210526315789474e-06, + "step": 1500 + }, + { + "epoch": 3.9447731755424065, + "high_lr": 0.00021052631578947367, + "low_lr": 4.210526315789474e-06, + "step": 1500 + }, + { + "epoch": 3.947403024326101, + "grad_norm": 1.3334966897964478, + "learning_rate": 0.00021, + "loss": 1.3304, + "step": 1501 + }, + { + "epoch": 3.947403024326101, + "high_lr": 0.00021, + "low_lr": 4.2000000000000004e-06, + "step": 1501 + }, + { + "epoch": 3.947403024326101, + "high_lr": 0.00021, + "low_lr": 4.2000000000000004e-06, + "step": 1501 + }, + { + "epoch": 3.947403024326101, + "high_lr": 0.00021, + "low_lr": 4.2000000000000004e-06, + "step": 1501 + }, + { + "epoch": 3.947403024326101, + "high_lr": 0.00021, + "low_lr": 4.2000000000000004e-06, + "step": 1501 + }, + { + "epoch": 3.947403024326101, + "high_lr": 0.00021, + "low_lr": 4.2000000000000004e-06, + "step": 1501 + }, + { + "epoch": 3.947403024326101, + "high_lr": 0.00021, + "low_lr": 4.2000000000000004e-06, + "step": 1501 + }, + { + "epoch": 3.947403024326101, + "high_lr": 0.00021, + "low_lr": 4.2000000000000004e-06, + "step": 1501 + }, + { + "epoch": 3.947403024326101, + "high_lr": 0.00021, + "low_lr": 4.2000000000000004e-06, + "step": 1501 + }, + { + "epoch": 3.9500328731097962, + "grad_norm": 1.3624662160873413, + "learning_rate": 0.00020947368421052632, + "loss": 1.2916, + "step": 1502 + }, + { + "epoch": 3.9500328731097962, + "high_lr": 0.00020947368421052632, + "low_lr": 4.189473684210527e-06, + "step": 1502 + }, + { + "epoch": 3.9500328731097962, + "high_lr": 0.00020947368421052632, + "low_lr": 4.189473684210527e-06, + "step": 1502 + }, + { + "epoch": 3.9500328731097962, + "high_lr": 0.00020947368421052632, + "low_lr": 4.189473684210527e-06, + "step": 1502 + }, + { + "epoch": 3.9500328731097962, + "high_lr": 0.00020947368421052632, + "low_lr": 4.189473684210527e-06, + "step": 1502 + }, + { + "epoch": 3.9500328731097962, + "high_lr": 0.00020947368421052632, + "low_lr": 4.189473684210527e-06, + "step": 1502 + }, + { + "epoch": 3.9500328731097962, + "high_lr": 0.00020947368421052632, + "low_lr": 4.189473684210527e-06, + "step": 1502 + }, + { + "epoch": 3.9500328731097962, + "high_lr": 0.00020947368421052632, + "low_lr": 4.189473684210527e-06, + "step": 1502 + }, + { + "epoch": 3.9500328731097962, + "high_lr": 0.00020947368421052632, + "low_lr": 4.189473684210527e-06, + "step": 1502 + }, + { + "epoch": 3.952662721893491, + "grad_norm": 1.3479913473129272, + "learning_rate": 0.00020894736842105263, + "loss": 1.2774, + "step": 1503 + }, + { + "epoch": 3.952662721893491, + "high_lr": 0.00020894736842105263, + "low_lr": 4.178947368421053e-06, + "step": 1503 + }, + { + "epoch": 3.952662721893491, + "high_lr": 0.00020894736842105263, + "low_lr": 4.178947368421053e-06, + "step": 1503 + }, + { + "epoch": 3.952662721893491, + "high_lr": 0.00020894736842105263, + "low_lr": 4.178947368421053e-06, + "step": 1503 + }, + { + "epoch": 3.952662721893491, + "high_lr": 0.00020894736842105263, + "low_lr": 4.178947368421053e-06, + "step": 1503 + }, + { + "epoch": 3.952662721893491, + "high_lr": 0.00020894736842105263, + "low_lr": 4.178947368421053e-06, + "step": 1503 + }, + { + "epoch": 3.952662721893491, + "high_lr": 0.00020894736842105263, + "low_lr": 4.178947368421053e-06, + "step": 1503 + }, + { + "epoch": 3.952662721893491, + "high_lr": 0.00020894736842105263, + "low_lr": 4.178947368421053e-06, + "step": 1503 + }, + { + "epoch": 3.952662721893491, + "high_lr": 0.00020894736842105263, + "low_lr": 4.178947368421053e-06, + "step": 1503 + }, + { + "epoch": 3.955292570677186, + "grad_norm": 1.4971421957015991, + "learning_rate": 0.00020842105263157895, + "loss": 1.3091, + "step": 1504 + }, + { + "epoch": 3.955292570677186, + "high_lr": 0.00020842105263157895, + "low_lr": 4.1684210526315794e-06, + "step": 1504 + }, + { + "epoch": 3.955292570677186, + "high_lr": 0.00020842105263157895, + "low_lr": 4.1684210526315794e-06, + "step": 1504 + }, + { + "epoch": 3.955292570677186, + "high_lr": 0.00020842105263157895, + "low_lr": 4.1684210526315794e-06, + "step": 1504 + }, + { + "epoch": 3.955292570677186, + "high_lr": 0.00020842105263157895, + "low_lr": 4.1684210526315794e-06, + "step": 1504 + }, + { + "epoch": 3.955292570677186, + "high_lr": 0.00020842105263157895, + "low_lr": 4.1684210526315794e-06, + "step": 1504 + }, + { + "epoch": 3.955292570677186, + "high_lr": 0.00020842105263157895, + "low_lr": 4.1684210526315794e-06, + "step": 1504 + }, + { + "epoch": 3.955292570677186, + "high_lr": 0.00020842105263157895, + "low_lr": 4.1684210526315794e-06, + "step": 1504 + }, + { + "epoch": 3.955292570677186, + "high_lr": 0.00020842105263157895, + "low_lr": 4.1684210526315794e-06, + "step": 1504 + }, + { + "epoch": 3.957922419460881, + "grad_norm": 1.4270907640457153, + "learning_rate": 0.0002078947368421053, + "loss": 1.2585, + "step": 1505 + }, + { + "epoch": 3.957922419460881, + "high_lr": 0.0002078947368421053, + "low_lr": 4.157894736842106e-06, + "step": 1505 + }, + { + "epoch": 3.957922419460881, + "high_lr": 0.0002078947368421053, + "low_lr": 4.157894736842106e-06, + "step": 1505 + }, + { + "epoch": 3.957922419460881, + "high_lr": 0.0002078947368421053, + "low_lr": 4.157894736842106e-06, + "step": 1505 + }, + { + "epoch": 3.957922419460881, + "high_lr": 0.0002078947368421053, + "low_lr": 4.157894736842106e-06, + "step": 1505 + }, + { + "epoch": 3.957922419460881, + "high_lr": 0.0002078947368421053, + "low_lr": 4.157894736842106e-06, + "step": 1505 + }, + { + "epoch": 3.957922419460881, + "high_lr": 0.0002078947368421053, + "low_lr": 4.157894736842106e-06, + "step": 1505 + }, + { + "epoch": 3.957922419460881, + "high_lr": 0.0002078947368421053, + "low_lr": 4.157894736842106e-06, + "step": 1505 + }, + { + "epoch": 3.957922419460881, + "high_lr": 0.0002078947368421053, + "low_lr": 4.157894736842106e-06, + "step": 1505 + }, + { + "epoch": 3.9605522682445757, + "grad_norm": 1.5448126792907715, + "learning_rate": 0.0002073684210526316, + "loss": 1.2704, + "step": 1506 + }, + { + "epoch": 3.9605522682445757, + "high_lr": 0.0002073684210526316, + "low_lr": 4.147368421052632e-06, + "step": 1506 + }, + { + "epoch": 3.9605522682445757, + "high_lr": 0.0002073684210526316, + "low_lr": 4.147368421052632e-06, + "step": 1506 + }, + { + "epoch": 3.9605522682445757, + "high_lr": 0.0002073684210526316, + "low_lr": 4.147368421052632e-06, + "step": 1506 + }, + { + "epoch": 3.9605522682445757, + "high_lr": 0.0002073684210526316, + "low_lr": 4.147368421052632e-06, + "step": 1506 + }, + { + "epoch": 3.9605522682445757, + "high_lr": 0.0002073684210526316, + "low_lr": 4.147368421052632e-06, + "step": 1506 + }, + { + "epoch": 3.9605522682445757, + "high_lr": 0.0002073684210526316, + "low_lr": 4.147368421052632e-06, + "step": 1506 + }, + { + "epoch": 3.9605522682445757, + "high_lr": 0.0002073684210526316, + "low_lr": 4.147368421052632e-06, + "step": 1506 + }, + { + "epoch": 3.9605522682445757, + "high_lr": 0.0002073684210526316, + "low_lr": 4.147368421052632e-06, + "step": 1506 + }, + { + "epoch": 3.963182117028271, + "grad_norm": 1.743727684020996, + "learning_rate": 0.0002068421052631579, + "loss": 1.3103, + "step": 1507 + }, + { + "epoch": 3.963182117028271, + "high_lr": 0.0002068421052631579, + "low_lr": 4.136842105263158e-06, + "step": 1507 + }, + { + "epoch": 3.963182117028271, + "high_lr": 0.0002068421052631579, + "low_lr": 4.136842105263158e-06, + "step": 1507 + }, + { + "epoch": 3.963182117028271, + "high_lr": 0.0002068421052631579, + "low_lr": 4.136842105263158e-06, + "step": 1507 + }, + { + "epoch": 3.963182117028271, + "high_lr": 0.0002068421052631579, + "low_lr": 4.136842105263158e-06, + "step": 1507 + }, + { + "epoch": 3.963182117028271, + "high_lr": 0.0002068421052631579, + "low_lr": 4.136842105263158e-06, + "step": 1507 + }, + { + "epoch": 3.963182117028271, + "high_lr": 0.0002068421052631579, + "low_lr": 4.136842105263158e-06, + "step": 1507 + }, + { + "epoch": 3.963182117028271, + "high_lr": 0.0002068421052631579, + "low_lr": 4.136842105263158e-06, + "step": 1507 + }, + { + "epoch": 3.963182117028271, + "high_lr": 0.0002068421052631579, + "low_lr": 4.136842105263158e-06, + "step": 1507 + }, + { + "epoch": 3.965811965811966, + "grad_norm": 1.3927522897720337, + "learning_rate": 0.0002063157894736842, + "loss": 1.3128, + "step": 1508 + }, + { + "epoch": 3.965811965811966, + "high_lr": 0.0002063157894736842, + "low_lr": 4.126315789473685e-06, + "step": 1508 + }, + { + "epoch": 3.965811965811966, + "high_lr": 0.0002063157894736842, + "low_lr": 4.126315789473685e-06, + "step": 1508 + }, + { + "epoch": 3.965811965811966, + "high_lr": 0.0002063157894736842, + "low_lr": 4.126315789473685e-06, + "step": 1508 + }, + { + "epoch": 3.965811965811966, + "high_lr": 0.0002063157894736842, + "low_lr": 4.126315789473685e-06, + "step": 1508 + }, + { + "epoch": 3.965811965811966, + "high_lr": 0.0002063157894736842, + "low_lr": 4.126315789473685e-06, + "step": 1508 + }, + { + "epoch": 3.965811965811966, + "high_lr": 0.0002063157894736842, + "low_lr": 4.126315789473685e-06, + "step": 1508 + }, + { + "epoch": 3.965811965811966, + "high_lr": 0.0002063157894736842, + "low_lr": 4.126315789473685e-06, + "step": 1508 + }, + { + "epoch": 3.965811965811966, + "high_lr": 0.0002063157894736842, + "low_lr": 4.126315789473685e-06, + "step": 1508 + }, + { + "epoch": 3.9684418145956606, + "grad_norm": 1.514550805091858, + "learning_rate": 0.00020578947368421054, + "loss": 1.2983, + "step": 1509 + }, + { + "epoch": 3.9684418145956606, + "high_lr": 0.00020578947368421054, + "low_lr": 4.115789473684211e-06, + "step": 1509 + }, + { + "epoch": 3.9684418145956606, + "high_lr": 0.00020578947368421054, + "low_lr": 4.115789473684211e-06, + "step": 1509 + }, + { + "epoch": 3.9684418145956606, + "high_lr": 0.00020578947368421054, + "low_lr": 4.115789473684211e-06, + "step": 1509 + }, + { + "epoch": 3.9684418145956606, + "high_lr": 0.00020578947368421054, + "low_lr": 4.115789473684211e-06, + "step": 1509 + }, + { + "epoch": 3.9684418145956606, + "high_lr": 0.00020578947368421054, + "low_lr": 4.115789473684211e-06, + "step": 1509 + }, + { + "epoch": 3.9684418145956606, + "high_lr": 0.00020578947368421054, + "low_lr": 4.115789473684211e-06, + "step": 1509 + }, + { + "epoch": 3.9684418145956606, + "high_lr": 0.00020578947368421054, + "low_lr": 4.115789473684211e-06, + "step": 1509 + }, + { + "epoch": 3.9684418145956606, + "high_lr": 0.00020578947368421054, + "low_lr": 4.115789473684211e-06, + "step": 1509 + }, + { + "epoch": 3.9710716633793557, + "grad_norm": 1.374853253364563, + "learning_rate": 0.00020526315789473685, + "loss": 1.2859, + "step": 1510 + }, + { + "epoch": 3.9710716633793557, + "high_lr": 0.00020526315789473685, + "low_lr": 4.105263157894737e-06, + "step": 1510 + }, + { + "epoch": 3.9710716633793557, + "high_lr": 0.00020526315789473685, + "low_lr": 4.105263157894737e-06, + "step": 1510 + }, + { + "epoch": 3.9710716633793557, + "high_lr": 0.00020526315789473685, + "low_lr": 4.105263157894737e-06, + "step": 1510 + }, + { + "epoch": 3.9710716633793557, + "high_lr": 0.00020526315789473685, + "low_lr": 4.105263157894737e-06, + "step": 1510 + }, + { + "epoch": 3.9710716633793557, + "high_lr": 0.00020526315789473685, + "low_lr": 4.105263157894737e-06, + "step": 1510 + }, + { + "epoch": 3.9710716633793557, + "high_lr": 0.00020526315789473685, + "low_lr": 4.105263157894737e-06, + "step": 1510 + }, + { + "epoch": 3.9710716633793557, + "high_lr": 0.00020526315789473685, + "low_lr": 4.105263157894737e-06, + "step": 1510 + }, + { + "epoch": 3.9710716633793557, + "high_lr": 0.00020526315789473685, + "low_lr": 4.105263157894737e-06, + "step": 1510 + }, + { + "epoch": 3.973701512163051, + "grad_norm": 1.4939712285995483, + "learning_rate": 0.00020473684210526317, + "loss": 1.2691, + "step": 1511 + }, + { + "epoch": 3.973701512163051, + "high_lr": 0.00020473684210526317, + "low_lr": 4.094736842105264e-06, + "step": 1511 + }, + { + "epoch": 3.973701512163051, + "high_lr": 0.00020473684210526317, + "low_lr": 4.094736842105264e-06, + "step": 1511 + }, + { + "epoch": 3.973701512163051, + "high_lr": 0.00020473684210526317, + "low_lr": 4.094736842105264e-06, + "step": 1511 + }, + { + "epoch": 3.973701512163051, + "high_lr": 0.00020473684210526317, + "low_lr": 4.094736842105264e-06, + "step": 1511 + }, + { + "epoch": 3.973701512163051, + "high_lr": 0.00020473684210526317, + "low_lr": 4.094736842105264e-06, + "step": 1511 + }, + { + "epoch": 3.973701512163051, + "high_lr": 0.00020473684210526317, + "low_lr": 4.094736842105264e-06, + "step": 1511 + }, + { + "epoch": 3.973701512163051, + "high_lr": 0.00020473684210526317, + "low_lr": 4.094736842105264e-06, + "step": 1511 + }, + { + "epoch": 3.973701512163051, + "high_lr": 0.00020473684210526317, + "low_lr": 4.094736842105264e-06, + "step": 1511 + }, + { + "epoch": 3.9763313609467454, + "grad_norm": 1.342739224433899, + "learning_rate": 0.00020421052631578948, + "loss": 1.2671, + "step": 1512 + }, + { + "epoch": 3.9763313609467454, + "high_lr": 0.00020421052631578948, + "low_lr": 4.08421052631579e-06, + "step": 1512 + }, + { + "epoch": 3.9763313609467454, + "high_lr": 0.00020421052631578948, + "low_lr": 4.08421052631579e-06, + "step": 1512 + }, + { + "epoch": 3.9763313609467454, + "high_lr": 0.00020421052631578948, + "low_lr": 4.08421052631579e-06, + "step": 1512 + }, + { + "epoch": 3.9763313609467454, + "high_lr": 0.00020421052631578948, + "low_lr": 4.08421052631579e-06, + "step": 1512 + }, + { + "epoch": 3.9763313609467454, + "high_lr": 0.00020421052631578948, + "low_lr": 4.08421052631579e-06, + "step": 1512 + }, + { + "epoch": 3.9763313609467454, + "high_lr": 0.00020421052631578948, + "low_lr": 4.08421052631579e-06, + "step": 1512 + }, + { + "epoch": 3.9763313609467454, + "high_lr": 0.00020421052631578948, + "low_lr": 4.08421052631579e-06, + "step": 1512 + }, + { + "epoch": 3.9763313609467454, + "high_lr": 0.00020421052631578948, + "low_lr": 4.08421052631579e-06, + "step": 1512 + }, + { + "epoch": 3.9789612097304405, + "grad_norm": 1.4210036993026733, + "learning_rate": 0.00020368421052631582, + "loss": 1.2986, + "step": 1513 + }, + { + "epoch": 3.9789612097304405, + "high_lr": 0.00020368421052631582, + "low_lr": 4.073684210526316e-06, + "step": 1513 + }, + { + "epoch": 3.9789612097304405, + "high_lr": 0.00020368421052631582, + "low_lr": 4.073684210526316e-06, + "step": 1513 + }, + { + "epoch": 3.9789612097304405, + "high_lr": 0.00020368421052631582, + "low_lr": 4.073684210526316e-06, + "step": 1513 + }, + { + "epoch": 3.9789612097304405, + "high_lr": 0.00020368421052631582, + "low_lr": 4.073684210526316e-06, + "step": 1513 + }, + { + "epoch": 3.9789612097304405, + "high_lr": 0.00020368421052631582, + "low_lr": 4.073684210526316e-06, + "step": 1513 + }, + { + "epoch": 3.9789612097304405, + "high_lr": 0.00020368421052631582, + "low_lr": 4.073684210526316e-06, + "step": 1513 + }, + { + "epoch": 3.9789612097304405, + "high_lr": 0.00020368421052631582, + "low_lr": 4.073684210526316e-06, + "step": 1513 + }, + { + "epoch": 3.9789612097304405, + "high_lr": 0.00020368421052631582, + "low_lr": 4.073684210526316e-06, + "step": 1513 + }, + { + "epoch": 3.9815910585141356, + "grad_norm": 1.3917219638824463, + "learning_rate": 0.0002031578947368421, + "loss": 1.2982, + "step": 1514 + }, + { + "epoch": 3.9815910585141356, + "high_lr": 0.0002031578947368421, + "low_lr": 4.063157894736842e-06, + "step": 1514 + }, + { + "epoch": 3.9815910585141356, + "high_lr": 0.0002031578947368421, + "low_lr": 4.063157894736842e-06, + "step": 1514 + }, + { + "epoch": 3.9815910585141356, + "high_lr": 0.0002031578947368421, + "low_lr": 4.063157894736842e-06, + "step": 1514 + }, + { + "epoch": 3.9815910585141356, + "high_lr": 0.0002031578947368421, + "low_lr": 4.063157894736842e-06, + "step": 1514 + }, + { + "epoch": 3.9815910585141356, + "high_lr": 0.0002031578947368421, + "low_lr": 4.063157894736842e-06, + "step": 1514 + }, + { + "epoch": 3.9815910585141356, + "high_lr": 0.0002031578947368421, + "low_lr": 4.063157894736842e-06, + "step": 1514 + }, + { + "epoch": 3.9815910585141356, + "high_lr": 0.0002031578947368421, + "low_lr": 4.063157894736842e-06, + "step": 1514 + }, + { + "epoch": 3.9815910585141356, + "high_lr": 0.0002031578947368421, + "low_lr": 4.063157894736842e-06, + "step": 1514 + }, + { + "epoch": 3.9842209072978303, + "grad_norm": 1.4617544412612915, + "learning_rate": 0.00020263157894736842, + "loss": 1.2933, + "step": 1515 + }, + { + "epoch": 3.9842209072978303, + "high_lr": 0.00020263157894736842, + "low_lr": 4.052631578947368e-06, + "step": 1515 + }, + { + "epoch": 3.9842209072978303, + "high_lr": 0.00020263157894736842, + "low_lr": 4.052631578947368e-06, + "step": 1515 + }, + { + "epoch": 3.9842209072978303, + "high_lr": 0.00020263157894736842, + "low_lr": 4.052631578947368e-06, + "step": 1515 + }, + { + "epoch": 3.9842209072978303, + "high_lr": 0.00020263157894736842, + "low_lr": 4.052631578947368e-06, + "step": 1515 + }, + { + "epoch": 3.9842209072978303, + "high_lr": 0.00020263157894736842, + "low_lr": 4.052631578947368e-06, + "step": 1515 + }, + { + "epoch": 3.9842209072978303, + "high_lr": 0.00020263157894736842, + "low_lr": 4.052631578947368e-06, + "step": 1515 + }, + { + "epoch": 3.9842209072978303, + "high_lr": 0.00020263157894736842, + "low_lr": 4.052631578947368e-06, + "step": 1515 + }, + { + "epoch": 3.9842209072978303, + "high_lr": 0.00020263157894736842, + "low_lr": 4.052631578947368e-06, + "step": 1515 + }, + { + "epoch": 3.9868507560815254, + "grad_norm": 1.5116146802902222, + "learning_rate": 0.00020210526315789473, + "loss": 1.2887, + "step": 1516 + }, + { + "epoch": 3.9868507560815254, + "high_lr": 0.00020210526315789473, + "low_lr": 4.042105263157895e-06, + "step": 1516 + }, + { + "epoch": 3.9868507560815254, + "high_lr": 0.00020210526315789473, + "low_lr": 4.042105263157895e-06, + "step": 1516 + }, + { + "epoch": 3.9868507560815254, + "high_lr": 0.00020210526315789473, + "low_lr": 4.042105263157895e-06, + "step": 1516 + }, + { + "epoch": 3.9868507560815254, + "high_lr": 0.00020210526315789473, + "low_lr": 4.042105263157895e-06, + "step": 1516 + }, + { + "epoch": 3.9868507560815254, + "high_lr": 0.00020210526315789473, + "low_lr": 4.042105263157895e-06, + "step": 1516 + }, + { + "epoch": 3.9868507560815254, + "high_lr": 0.00020210526315789473, + "low_lr": 4.042105263157895e-06, + "step": 1516 + }, + { + "epoch": 3.9868507560815254, + "high_lr": 0.00020210526315789473, + "low_lr": 4.042105263157895e-06, + "step": 1516 + }, + { + "epoch": 3.9868507560815254, + "high_lr": 0.00020210526315789473, + "low_lr": 4.042105263157895e-06, + "step": 1516 + }, + { + "epoch": 3.9894806048652205, + "grad_norm": 1.4623873233795166, + "learning_rate": 0.00020157894736842104, + "loss": 1.3125, + "step": 1517 + }, + { + "epoch": 3.9894806048652205, + "high_lr": 0.00020157894736842104, + "low_lr": 4.031578947368422e-06, + "step": 1517 + }, + { + "epoch": 3.9894806048652205, + "high_lr": 0.00020157894736842104, + "low_lr": 4.031578947368422e-06, + "step": 1517 + }, + { + "epoch": 3.9894806048652205, + "high_lr": 0.00020157894736842104, + "low_lr": 4.031578947368422e-06, + "step": 1517 + }, + { + "epoch": 3.9894806048652205, + "high_lr": 0.00020157894736842104, + "low_lr": 4.031578947368422e-06, + "step": 1517 + }, + { + "epoch": 3.9894806048652205, + "high_lr": 0.00020157894736842104, + "low_lr": 4.031578947368422e-06, + "step": 1517 + }, + { + "epoch": 3.9894806048652205, + "high_lr": 0.00020157894736842104, + "low_lr": 4.031578947368422e-06, + "step": 1517 + }, + { + "epoch": 3.9894806048652205, + "high_lr": 0.00020157894736842104, + "low_lr": 4.031578947368422e-06, + "step": 1517 + }, + { + "epoch": 3.9894806048652205, + "high_lr": 0.00020157894736842104, + "low_lr": 4.031578947368422e-06, + "step": 1517 + }, + { + "epoch": 3.992110453648915, + "grad_norm": 1.4037643671035767, + "learning_rate": 0.00020105263157894738, + "loss": 1.3055, + "step": 1518 + }, + { + "epoch": 3.992110453648915, + "high_lr": 0.00020105263157894738, + "low_lr": 4.021052631578948e-06, + "step": 1518 + }, + { + "epoch": 3.992110453648915, + "high_lr": 0.00020105263157894738, + "low_lr": 4.021052631578948e-06, + "step": 1518 + }, + { + "epoch": 3.992110453648915, + "high_lr": 0.00020105263157894738, + "low_lr": 4.021052631578948e-06, + "step": 1518 + }, + { + "epoch": 3.992110453648915, + "high_lr": 0.00020105263157894738, + "low_lr": 4.021052631578948e-06, + "step": 1518 + }, + { + "epoch": 3.992110453648915, + "high_lr": 0.00020105263157894738, + "low_lr": 4.021052631578948e-06, + "step": 1518 + }, + { + "epoch": 3.992110453648915, + "high_lr": 0.00020105263157894738, + "low_lr": 4.021052631578948e-06, + "step": 1518 + }, + { + "epoch": 3.992110453648915, + "high_lr": 0.00020105263157894738, + "low_lr": 4.021052631578948e-06, + "step": 1518 + }, + { + "epoch": 3.992110453648915, + "high_lr": 0.00020105263157894738, + "low_lr": 4.021052631578948e-06, + "step": 1518 + }, + { + "epoch": 3.9947403024326102, + "grad_norm": 1.492454171180725, + "learning_rate": 0.0002005263157894737, + "loss": 1.3008, + "step": 1519 + }, + { + "epoch": 3.9947403024326102, + "high_lr": 0.0002005263157894737, + "low_lr": 4.010526315789474e-06, + "step": 1519 + }, + { + "epoch": 3.9947403024326102, + "high_lr": 0.0002005263157894737, + "low_lr": 4.010526315789474e-06, + "step": 1519 + }, + { + "epoch": 3.9947403024326102, + "high_lr": 0.0002005263157894737, + "low_lr": 4.010526315789474e-06, + "step": 1519 + }, + { + "epoch": 3.9947403024326102, + "high_lr": 0.0002005263157894737, + "low_lr": 4.010526315789474e-06, + "step": 1519 + }, + { + "epoch": 3.9947403024326102, + "high_lr": 0.0002005263157894737, + "low_lr": 4.010526315789474e-06, + "step": 1519 + }, + { + "epoch": 3.9947403024326102, + "high_lr": 0.0002005263157894737, + "low_lr": 4.010526315789474e-06, + "step": 1519 + }, + { + "epoch": 3.9947403024326102, + "high_lr": 0.0002005263157894737, + "low_lr": 4.010526315789474e-06, + "step": 1519 + }, + { + "epoch": 3.9947403024326102, + "high_lr": 0.0002005263157894737, + "low_lr": 4.010526315789474e-06, + "step": 1519 + }, + { + "epoch": 3.997370151216305, + "grad_norm": 1.3126296997070312, + "learning_rate": 0.0002, + "loss": 1.283, + "step": 1520 + }, + { + "epoch": 3.997370151216305, + "high_lr": 0.0002, + "low_lr": 4.000000000000001e-06, + "step": 1520 + }, + { + "epoch": 3.997370151216305, + "high_lr": 0.0002, + "low_lr": 4.000000000000001e-06, + "step": 1520 + }, + { + "epoch": 3.997370151216305, + "high_lr": 0.0002, + "low_lr": 4.000000000000001e-06, + "step": 1520 + }, + { + "epoch": 3.997370151216305, + "high_lr": 0.0002, + "low_lr": 4.000000000000001e-06, + "step": 1520 + }, + { + "epoch": 3.997370151216305, + "high_lr": 0.0002, + "low_lr": 4.000000000000001e-06, + "step": 1520 + }, + { + "epoch": 3.997370151216305, + "high_lr": 0.0002, + "low_lr": 4.000000000000001e-06, + "step": 1520 + }, + { + "epoch": 3.997370151216305, + "high_lr": 0.0002, + "low_lr": 4.000000000000001e-06, + "step": 1520 + }, + { + "epoch": 3.997370151216305, + "high_lr": 0.0002, + "low_lr": 4.000000000000001e-06, + "step": 1520 + }, + { + "epoch": 4.0, + "grad_norm": 1.4551256895065308, + "learning_rate": 0.0001994736842105263, + "loss": 1.2644, + "step": 1521 + }, + { + "epoch": 4.0, + "high_lr": 0.0001994736842105263, + "low_lr": 3.989473684210526e-06, + "step": 1521 + }, + { + "epoch": 4.0, + "high_lr": 0.0001994736842105263, + "low_lr": 3.989473684210526e-06, + "step": 1521 + }, + { + "epoch": 4.0, + "high_lr": 0.0001994736842105263, + "low_lr": 3.989473684210526e-06, + "step": 1521 + }, + { + "epoch": 4.0, + "high_lr": 0.0001994736842105263, + "low_lr": 3.989473684210526e-06, + "step": 1521 + }, + { + "epoch": 4.0, + "high_lr": 0.0001994736842105263, + "low_lr": 3.989473684210526e-06, + "step": 1521 + }, + { + "epoch": 4.0, + "high_lr": 0.0001994736842105263, + "low_lr": 3.989473684210526e-06, + "step": 1521 + }, + { + "epoch": 4.0, + "high_lr": 0.0001994736842105263, + "low_lr": 3.989473684210526e-06, + "step": 1521 + }, + { + "epoch": 4.0, + "high_lr": 0.0001994736842105263, + "low_lr": 3.989473684210526e-06, + "step": 1521 + }, + { + "epoch": 4.002629848783695, + "grad_norm": 1.3417943716049194, + "learning_rate": 0.00019894736842105264, + "loss": 1.2327, + "step": 1522 + }, + { + "epoch": 4.002629848783695, + "high_lr": 0.00019894736842105264, + "low_lr": 3.9789473684210525e-06, + "step": 1522 + }, + { + "epoch": 4.002629848783695, + "high_lr": 0.00019894736842105264, + "low_lr": 3.9789473684210525e-06, + "step": 1522 + }, + { + "epoch": 4.002629848783695, + "high_lr": 0.00019894736842105264, + "low_lr": 3.9789473684210525e-06, + "step": 1522 + }, + { + "epoch": 4.002629848783695, + "high_lr": 0.00019894736842105264, + "low_lr": 3.9789473684210525e-06, + "step": 1522 + }, + { + "epoch": 4.002629848783695, + "high_lr": 0.00019894736842105264, + "low_lr": 3.9789473684210525e-06, + "step": 1522 + }, + { + "epoch": 4.002629848783695, + "high_lr": 0.00019894736842105264, + "low_lr": 3.9789473684210525e-06, + "step": 1522 + }, + { + "epoch": 4.002629848783695, + "high_lr": 0.00019894736842105264, + "low_lr": 3.9789473684210525e-06, + "step": 1522 + }, + { + "epoch": 4.002629848783695, + "high_lr": 0.00019894736842105264, + "low_lr": 3.9789473684210525e-06, + "step": 1522 + }, + { + "epoch": 4.00525969756739, + "grad_norm": 1.7795209884643555, + "learning_rate": 0.00019842105263157895, + "loss": 1.2555, + "step": 1523 + }, + { + "epoch": 4.00525969756739, + "high_lr": 0.00019842105263157895, + "low_lr": 3.968421052631579e-06, + "step": 1523 + }, + { + "epoch": 4.00525969756739, + "high_lr": 0.00019842105263157895, + "low_lr": 3.968421052631579e-06, + "step": 1523 + }, + { + "epoch": 4.00525969756739, + "high_lr": 0.00019842105263157895, + "low_lr": 3.968421052631579e-06, + "step": 1523 + }, + { + "epoch": 4.00525969756739, + "high_lr": 0.00019842105263157895, + "low_lr": 3.968421052631579e-06, + "step": 1523 + }, + { + "epoch": 4.00525969756739, + "high_lr": 0.00019842105263157895, + "low_lr": 3.968421052631579e-06, + "step": 1523 + }, + { + "epoch": 4.00525969756739, + "high_lr": 0.00019842105263157895, + "low_lr": 3.968421052631579e-06, + "step": 1523 + }, + { + "epoch": 4.00525969756739, + "high_lr": 0.00019842105263157895, + "low_lr": 3.968421052631579e-06, + "step": 1523 + }, + { + "epoch": 4.00525969756739, + "high_lr": 0.00019842105263157895, + "low_lr": 3.968421052631579e-06, + "step": 1523 + }, + { + "epoch": 4.007889546351085, + "grad_norm": 1.3884600400924683, + "learning_rate": 0.00019789473684210526, + "loss": 1.2375, + "step": 1524 + }, + { + "epoch": 4.007889546351085, + "high_lr": 0.00019789473684210526, + "low_lr": 3.957894736842106e-06, + "step": 1524 + }, + { + "epoch": 4.007889546351085, + "high_lr": 0.00019789473684210526, + "low_lr": 3.957894736842106e-06, + "step": 1524 + }, + { + "epoch": 4.007889546351085, + "high_lr": 0.00019789473684210526, + "low_lr": 3.957894736842106e-06, + "step": 1524 + }, + { + "epoch": 4.007889546351085, + "high_lr": 0.00019789473684210526, + "low_lr": 3.957894736842106e-06, + "step": 1524 + }, + { + "epoch": 4.007889546351085, + "high_lr": 0.00019789473684210526, + "low_lr": 3.957894736842106e-06, + "step": 1524 + }, + { + "epoch": 4.007889546351085, + "high_lr": 0.00019789473684210526, + "low_lr": 3.957894736842106e-06, + "step": 1524 + }, + { + "epoch": 4.007889546351085, + "high_lr": 0.00019789473684210526, + "low_lr": 3.957894736842106e-06, + "step": 1524 + }, + { + "epoch": 4.007889546351085, + "high_lr": 0.00019789473684210526, + "low_lr": 3.957894736842106e-06, + "step": 1524 + }, + { + "epoch": 4.0105193951347795, + "grad_norm": 1.4155784845352173, + "learning_rate": 0.00019736842105263157, + "loss": 1.2417, + "step": 1525 + }, + { + "epoch": 4.0105193951347795, + "high_lr": 0.00019736842105263157, + "low_lr": 3.947368421052632e-06, + "step": 1525 + }, + { + "epoch": 4.0105193951347795, + "high_lr": 0.00019736842105263157, + "low_lr": 3.947368421052632e-06, + "step": 1525 + }, + { + "epoch": 4.0105193951347795, + "high_lr": 0.00019736842105263157, + "low_lr": 3.947368421052632e-06, + "step": 1525 + }, + { + "epoch": 4.0105193951347795, + "high_lr": 0.00019736842105263157, + "low_lr": 3.947368421052632e-06, + "step": 1525 + }, + { + "epoch": 4.0105193951347795, + "high_lr": 0.00019736842105263157, + "low_lr": 3.947368421052632e-06, + "step": 1525 + }, + { + "epoch": 4.0105193951347795, + "high_lr": 0.00019736842105263157, + "low_lr": 3.947368421052632e-06, + "step": 1525 + }, + { + "epoch": 4.0105193951347795, + "high_lr": 0.00019736842105263157, + "low_lr": 3.947368421052632e-06, + "step": 1525 + }, + { + "epoch": 4.0105193951347795, + "high_lr": 0.00019736842105263157, + "low_lr": 3.947368421052632e-06, + "step": 1525 + }, + { + "epoch": 4.013149243918475, + "grad_norm": 1.4495775699615479, + "learning_rate": 0.00019684210526315791, + "loss": 1.2783, + "step": 1526 + }, + { + "epoch": 4.013149243918475, + "high_lr": 0.00019684210526315791, + "low_lr": 3.936842105263159e-06, + "step": 1526 + }, + { + "epoch": 4.013149243918475, + "high_lr": 0.00019684210526315791, + "low_lr": 3.936842105263159e-06, + "step": 1526 + }, + { + "epoch": 4.013149243918475, + "high_lr": 0.00019684210526315791, + "low_lr": 3.936842105263159e-06, + "step": 1526 + }, + { + "epoch": 4.013149243918475, + "high_lr": 0.00019684210526315791, + "low_lr": 3.936842105263159e-06, + "step": 1526 + }, + { + "epoch": 4.013149243918475, + "high_lr": 0.00019684210526315791, + "low_lr": 3.936842105263159e-06, + "step": 1526 + }, + { + "epoch": 4.013149243918475, + "high_lr": 0.00019684210526315791, + "low_lr": 3.936842105263159e-06, + "step": 1526 + }, + { + "epoch": 4.013149243918475, + "high_lr": 0.00019684210526315791, + "low_lr": 3.936842105263159e-06, + "step": 1526 + }, + { + "epoch": 4.013149243918475, + "high_lr": 0.00019684210526315791, + "low_lr": 3.936842105263159e-06, + "step": 1526 + }, + { + "epoch": 4.01577909270217, + "grad_norm": 1.4377909898757935, + "learning_rate": 0.00019631578947368423, + "loss": 1.2653, + "step": 1527 + }, + { + "epoch": 4.01577909270217, + "high_lr": 0.00019631578947368423, + "low_lr": 3.926315789473685e-06, + "step": 1527 + }, + { + "epoch": 4.01577909270217, + "high_lr": 0.00019631578947368423, + "low_lr": 3.926315789473685e-06, + "step": 1527 + }, + { + "epoch": 4.01577909270217, + "high_lr": 0.00019631578947368423, + "low_lr": 3.926315789473685e-06, + "step": 1527 + }, + { + "epoch": 4.01577909270217, + "high_lr": 0.00019631578947368423, + "low_lr": 3.926315789473685e-06, + "step": 1527 + }, + { + "epoch": 4.01577909270217, + "high_lr": 0.00019631578947368423, + "low_lr": 3.926315789473685e-06, + "step": 1527 + }, + { + "epoch": 4.01577909270217, + "high_lr": 0.00019631578947368423, + "low_lr": 3.926315789473685e-06, + "step": 1527 + }, + { + "epoch": 4.01577909270217, + "high_lr": 0.00019631578947368423, + "low_lr": 3.926315789473685e-06, + "step": 1527 + }, + { + "epoch": 4.01577909270217, + "high_lr": 0.00019631578947368423, + "low_lr": 3.926315789473685e-06, + "step": 1527 + }, + { + "epoch": 4.018408941485864, + "grad_norm": 1.4182953834533691, + "learning_rate": 0.0001957894736842105, + "loss": 1.2415, + "step": 1528 + }, + { + "epoch": 4.018408941485864, + "high_lr": 0.0001957894736842105, + "low_lr": 3.9157894736842104e-06, + "step": 1528 + }, + { + "epoch": 4.018408941485864, + "high_lr": 0.0001957894736842105, + "low_lr": 3.9157894736842104e-06, + "step": 1528 + }, + { + "epoch": 4.018408941485864, + "high_lr": 0.0001957894736842105, + "low_lr": 3.9157894736842104e-06, + "step": 1528 + }, + { + "epoch": 4.018408941485864, + "high_lr": 0.0001957894736842105, + "low_lr": 3.9157894736842104e-06, + "step": 1528 + }, + { + "epoch": 4.018408941485864, + "high_lr": 0.0001957894736842105, + "low_lr": 3.9157894736842104e-06, + "step": 1528 + }, + { + "epoch": 4.018408941485864, + "high_lr": 0.0001957894736842105, + "low_lr": 3.9157894736842104e-06, + "step": 1528 + }, + { + "epoch": 4.018408941485864, + "high_lr": 0.0001957894736842105, + "low_lr": 3.9157894736842104e-06, + "step": 1528 + }, + { + "epoch": 4.018408941485864, + "high_lr": 0.0001957894736842105, + "low_lr": 3.9157894736842104e-06, + "step": 1528 + }, + { + "epoch": 4.02103879026956, + "grad_norm": 1.4995777606964111, + "learning_rate": 0.00019526315789473683, + "loss": 1.2168, + "step": 1529 + }, + { + "epoch": 4.02103879026956, + "high_lr": 0.00019526315789473683, + "low_lr": 3.905263157894737e-06, + "step": 1529 + }, + { + "epoch": 4.02103879026956, + "high_lr": 0.00019526315789473683, + "low_lr": 3.905263157894737e-06, + "step": 1529 + }, + { + "epoch": 4.02103879026956, + "high_lr": 0.00019526315789473683, + "low_lr": 3.905263157894737e-06, + "step": 1529 + }, + { + "epoch": 4.02103879026956, + "high_lr": 0.00019526315789473683, + "low_lr": 3.905263157894737e-06, + "step": 1529 + }, + { + "epoch": 4.02103879026956, + "high_lr": 0.00019526315789473683, + "low_lr": 3.905263157894737e-06, + "step": 1529 + }, + { + "epoch": 4.02103879026956, + "high_lr": 0.00019526315789473683, + "low_lr": 3.905263157894737e-06, + "step": 1529 + }, + { + "epoch": 4.02103879026956, + "high_lr": 0.00019526315789473683, + "low_lr": 3.905263157894737e-06, + "step": 1529 + }, + { + "epoch": 4.02103879026956, + "high_lr": 0.00019526315789473683, + "low_lr": 3.905263157894737e-06, + "step": 1529 + }, + { + "epoch": 4.023668639053255, + "grad_norm": 1.4290416240692139, + "learning_rate": 0.00019473684210526317, + "loss": 1.2391, + "step": 1530 + }, + { + "epoch": 4.023668639053255, + "high_lr": 0.00019473684210526317, + "low_lr": 3.894736842105263e-06, + "step": 1530 + }, + { + "epoch": 4.023668639053255, + "high_lr": 0.00019473684210526317, + "low_lr": 3.894736842105263e-06, + "step": 1530 + }, + { + "epoch": 4.023668639053255, + "high_lr": 0.00019473684210526317, + "low_lr": 3.894736842105263e-06, + "step": 1530 + }, + { + "epoch": 4.023668639053255, + "high_lr": 0.00019473684210526317, + "low_lr": 3.894736842105263e-06, + "step": 1530 + }, + { + "epoch": 4.023668639053255, + "high_lr": 0.00019473684210526317, + "low_lr": 3.894736842105263e-06, + "step": 1530 + }, + { + "epoch": 4.023668639053255, + "high_lr": 0.00019473684210526317, + "low_lr": 3.894736842105263e-06, + "step": 1530 + }, + { + "epoch": 4.023668639053255, + "high_lr": 0.00019473684210526317, + "low_lr": 3.894736842105263e-06, + "step": 1530 + }, + { + "epoch": 4.023668639053255, + "high_lr": 0.00019473684210526317, + "low_lr": 3.894736842105263e-06, + "step": 1530 + }, + { + "epoch": 4.026298487836949, + "grad_norm": 1.347391963005066, + "learning_rate": 0.00019421052631578948, + "loss": 1.239, + "step": 1531 + }, + { + "epoch": 4.026298487836949, + "high_lr": 0.00019421052631578948, + "low_lr": 3.884210526315789e-06, + "step": 1531 + }, + { + "epoch": 4.026298487836949, + "high_lr": 0.00019421052631578948, + "low_lr": 3.884210526315789e-06, + "step": 1531 + }, + { + "epoch": 4.026298487836949, + "high_lr": 0.00019421052631578948, + "low_lr": 3.884210526315789e-06, + "step": 1531 + }, + { + "epoch": 4.026298487836949, + "high_lr": 0.00019421052631578948, + "low_lr": 3.884210526315789e-06, + "step": 1531 + }, + { + "epoch": 4.026298487836949, + "high_lr": 0.00019421052631578948, + "low_lr": 3.884210526315789e-06, + "step": 1531 + }, + { + "epoch": 4.026298487836949, + "high_lr": 0.00019421052631578948, + "low_lr": 3.884210526315789e-06, + "step": 1531 + }, + { + "epoch": 4.026298487836949, + "high_lr": 0.00019421052631578948, + "low_lr": 3.884210526315789e-06, + "step": 1531 + }, + { + "epoch": 4.026298487836949, + "high_lr": 0.00019421052631578948, + "low_lr": 3.884210526315789e-06, + "step": 1531 + }, + { + "epoch": 4.028928336620645, + "grad_norm": 1.490256667137146, + "learning_rate": 0.0001936842105263158, + "loss": 1.187, + "step": 1532 + }, + { + "epoch": 4.028928336620645, + "high_lr": 0.0001936842105263158, + "low_lr": 3.873684210526316e-06, + "step": 1532 + }, + { + "epoch": 4.028928336620645, + "high_lr": 0.0001936842105263158, + "low_lr": 3.873684210526316e-06, + "step": 1532 + }, + { + "epoch": 4.028928336620645, + "high_lr": 0.0001936842105263158, + "low_lr": 3.873684210526316e-06, + "step": 1532 + }, + { + "epoch": 4.028928336620645, + "high_lr": 0.0001936842105263158, + "low_lr": 3.873684210526316e-06, + "step": 1532 + }, + { + "epoch": 4.028928336620645, + "high_lr": 0.0001936842105263158, + "low_lr": 3.873684210526316e-06, + "step": 1532 + }, + { + "epoch": 4.028928336620645, + "high_lr": 0.0001936842105263158, + "low_lr": 3.873684210526316e-06, + "step": 1532 + }, + { + "epoch": 4.028928336620645, + "high_lr": 0.0001936842105263158, + "low_lr": 3.873684210526316e-06, + "step": 1532 + }, + { + "epoch": 4.028928336620645, + "high_lr": 0.0001936842105263158, + "low_lr": 3.873684210526316e-06, + "step": 1532 + }, + { + "epoch": 4.031558185404339, + "grad_norm": 1.5121945142745972, + "learning_rate": 0.0001931578947368421, + "loss": 1.1633, + "step": 1533 + }, + { + "epoch": 4.031558185404339, + "high_lr": 0.0001931578947368421, + "low_lr": 3.863157894736843e-06, + "step": 1533 + }, + { + "epoch": 4.031558185404339, + "high_lr": 0.0001931578947368421, + "low_lr": 3.863157894736843e-06, + "step": 1533 + }, + { + "epoch": 4.031558185404339, + "high_lr": 0.0001931578947368421, + "low_lr": 3.863157894736843e-06, + "step": 1533 + }, + { + "epoch": 4.031558185404339, + "high_lr": 0.0001931578947368421, + "low_lr": 3.863157894736843e-06, + "step": 1533 + }, + { + "epoch": 4.031558185404339, + "high_lr": 0.0001931578947368421, + "low_lr": 3.863157894736843e-06, + "step": 1533 + }, + { + "epoch": 4.031558185404339, + "high_lr": 0.0001931578947368421, + "low_lr": 3.863157894736843e-06, + "step": 1533 + }, + { + "epoch": 4.031558185404339, + "high_lr": 0.0001931578947368421, + "low_lr": 3.863157894736843e-06, + "step": 1533 + }, + { + "epoch": 4.031558185404339, + "high_lr": 0.0001931578947368421, + "low_lr": 3.863157894736843e-06, + "step": 1533 + }, + { + "epoch": 4.034188034188034, + "grad_norm": 1.5313997268676758, + "learning_rate": 0.00019263157894736845, + "loss": 1.2624, + "step": 1534 + }, + { + "epoch": 4.034188034188034, + "high_lr": 0.00019263157894736845, + "low_lr": 3.852631578947369e-06, + "step": 1534 + }, + { + "epoch": 4.034188034188034, + "high_lr": 0.00019263157894736845, + "low_lr": 3.852631578947369e-06, + "step": 1534 + }, + { + "epoch": 4.034188034188034, + "high_lr": 0.00019263157894736845, + "low_lr": 3.852631578947369e-06, + "step": 1534 + }, + { + "epoch": 4.034188034188034, + "high_lr": 0.00019263157894736845, + "low_lr": 3.852631578947369e-06, + "step": 1534 + }, + { + "epoch": 4.034188034188034, + "high_lr": 0.00019263157894736845, + "low_lr": 3.852631578947369e-06, + "step": 1534 + }, + { + "epoch": 4.034188034188034, + "high_lr": 0.00019263157894736845, + "low_lr": 3.852631578947369e-06, + "step": 1534 + }, + { + "epoch": 4.034188034188034, + "high_lr": 0.00019263157894736845, + "low_lr": 3.852631578947369e-06, + "step": 1534 + }, + { + "epoch": 4.034188034188034, + "high_lr": 0.00019263157894736845, + "low_lr": 3.852631578947369e-06, + "step": 1534 + }, + { + "epoch": 4.036817882971729, + "grad_norm": 1.4823217391967773, + "learning_rate": 0.00019210526315789473, + "loss": 1.2216, + "step": 1535 + }, + { + "epoch": 4.036817882971729, + "high_lr": 0.00019210526315789473, + "low_lr": 3.842105263157895e-06, + "step": 1535 + }, + { + "epoch": 4.036817882971729, + "high_lr": 0.00019210526315789473, + "low_lr": 3.842105263157895e-06, + "step": 1535 + }, + { + "epoch": 4.036817882971729, + "high_lr": 0.00019210526315789473, + "low_lr": 3.842105263157895e-06, + "step": 1535 + }, + { + "epoch": 4.036817882971729, + "high_lr": 0.00019210526315789473, + "low_lr": 3.842105263157895e-06, + "step": 1535 + }, + { + "epoch": 4.036817882971729, + "high_lr": 0.00019210526315789473, + "low_lr": 3.842105263157895e-06, + "step": 1535 + }, + { + "epoch": 4.036817882971729, + "high_lr": 0.00019210526315789473, + "low_lr": 3.842105263157895e-06, + "step": 1535 + }, + { + "epoch": 4.036817882971729, + "high_lr": 0.00019210526315789473, + "low_lr": 3.842105263157895e-06, + "step": 1535 + }, + { + "epoch": 4.036817882971729, + "high_lr": 0.00019210526315789473, + "low_lr": 3.842105263157895e-06, + "step": 1535 + }, + { + "epoch": 4.039447731755424, + "grad_norm": 1.5323439836502075, + "learning_rate": 0.00019157894736842104, + "loss": 1.2006, + "step": 1536 + }, + { + "epoch": 4.039447731755424, + "high_lr": 0.00019157894736842104, + "low_lr": 3.831578947368421e-06, + "step": 1536 + }, + { + "epoch": 4.039447731755424, + "high_lr": 0.00019157894736842104, + "low_lr": 3.831578947368421e-06, + "step": 1536 + }, + { + "epoch": 4.039447731755424, + "high_lr": 0.00019157894736842104, + "low_lr": 3.831578947368421e-06, + "step": 1536 + }, + { + "epoch": 4.039447731755424, + "high_lr": 0.00019157894736842104, + "low_lr": 3.831578947368421e-06, + "step": 1536 + }, + { + "epoch": 4.039447731755424, + "high_lr": 0.00019157894736842104, + "low_lr": 3.831578947368421e-06, + "step": 1536 + }, + { + "epoch": 4.039447731755424, + "high_lr": 0.00019157894736842104, + "low_lr": 3.831578947368421e-06, + "step": 1536 + }, + { + "epoch": 4.039447731755424, + "high_lr": 0.00019157894736842104, + "low_lr": 3.831578947368421e-06, + "step": 1536 + }, + { + "epoch": 4.039447731755424, + "high_lr": 0.00019157894736842104, + "low_lr": 3.831578947368421e-06, + "step": 1536 + }, + { + "epoch": 4.042077580539119, + "grad_norm": 1.4558566808700562, + "learning_rate": 0.00019105263157894736, + "loss": 1.1886, + "step": 1537 + }, + { + "epoch": 4.042077580539119, + "high_lr": 0.00019105263157894736, + "low_lr": 3.821052631578947e-06, + "step": 1537 + }, + { + "epoch": 4.042077580539119, + "high_lr": 0.00019105263157894736, + "low_lr": 3.821052631578947e-06, + "step": 1537 + }, + { + "epoch": 4.042077580539119, + "high_lr": 0.00019105263157894736, + "low_lr": 3.821052631578947e-06, + "step": 1537 + }, + { + "epoch": 4.042077580539119, + "high_lr": 0.00019105263157894736, + "low_lr": 3.821052631578947e-06, + "step": 1537 + }, + { + "epoch": 4.042077580539119, + "high_lr": 0.00019105263157894736, + "low_lr": 3.821052631578947e-06, + "step": 1537 + }, + { + "epoch": 4.042077580539119, + "high_lr": 0.00019105263157894736, + "low_lr": 3.821052631578947e-06, + "step": 1537 + }, + { + "epoch": 4.042077580539119, + "high_lr": 0.00019105263157894736, + "low_lr": 3.821052631578947e-06, + "step": 1537 + }, + { + "epoch": 4.042077580539119, + "high_lr": 0.00019105263157894736, + "low_lr": 3.821052631578947e-06, + "step": 1537 + }, + { + "epoch": 4.044707429322814, + "grad_norm": 1.3596470355987549, + "learning_rate": 0.0001905263157894737, + "loss": 1.2352, + "step": 1538 + }, + { + "epoch": 4.044707429322814, + "high_lr": 0.0001905263157894737, + "low_lr": 3.810526315789474e-06, + "step": 1538 + }, + { + "epoch": 4.044707429322814, + "high_lr": 0.0001905263157894737, + "low_lr": 3.810526315789474e-06, + "step": 1538 + }, + { + "epoch": 4.044707429322814, + "high_lr": 0.0001905263157894737, + "low_lr": 3.810526315789474e-06, + "step": 1538 + }, + { + "epoch": 4.044707429322814, + "high_lr": 0.0001905263157894737, + "low_lr": 3.810526315789474e-06, + "step": 1538 + }, + { + "epoch": 4.044707429322814, + "high_lr": 0.0001905263157894737, + "low_lr": 3.810526315789474e-06, + "step": 1538 + }, + { + "epoch": 4.044707429322814, + "high_lr": 0.0001905263157894737, + "low_lr": 3.810526315789474e-06, + "step": 1538 + }, + { + "epoch": 4.044707429322814, + "high_lr": 0.0001905263157894737, + "low_lr": 3.810526315789474e-06, + "step": 1538 + }, + { + "epoch": 4.044707429322814, + "high_lr": 0.0001905263157894737, + "low_lr": 3.810526315789474e-06, + "step": 1538 + }, + { + "epoch": 4.047337278106509, + "grad_norm": 1.5192526578903198, + "learning_rate": 0.00019, + "loss": 1.2235, + "step": 1539 + }, + { + "epoch": 4.047337278106509, + "high_lr": 0.00019, + "low_lr": 3.8000000000000005e-06, + "step": 1539 + }, + { + "epoch": 4.047337278106509, + "high_lr": 0.00019, + "low_lr": 3.8000000000000005e-06, + "step": 1539 + }, + { + "epoch": 4.047337278106509, + "high_lr": 0.00019, + "low_lr": 3.8000000000000005e-06, + "step": 1539 + }, + { + "epoch": 4.047337278106509, + "high_lr": 0.00019, + "low_lr": 3.8000000000000005e-06, + "step": 1539 + }, + { + "epoch": 4.047337278106509, + "high_lr": 0.00019, + "low_lr": 3.8000000000000005e-06, + "step": 1539 + }, + { + "epoch": 4.047337278106509, + "high_lr": 0.00019, + "low_lr": 3.8000000000000005e-06, + "step": 1539 + }, + { + "epoch": 4.047337278106509, + "high_lr": 0.00019, + "low_lr": 3.8000000000000005e-06, + "step": 1539 + }, + { + "epoch": 4.047337278106509, + "high_lr": 0.00019, + "low_lr": 3.8000000000000005e-06, + "step": 1539 + }, + { + "epoch": 4.049967126890204, + "grad_norm": 1.5695946216583252, + "learning_rate": 0.00018947368421052632, + "loss": 1.2062, + "step": 1540 + }, + { + "epoch": 4.049967126890204, + "high_lr": 0.00018947368421052632, + "low_lr": 3.789473684210527e-06, + "step": 1540 + }, + { + "epoch": 4.049967126890204, + "high_lr": 0.00018947368421052632, + "low_lr": 3.789473684210527e-06, + "step": 1540 + }, + { + "epoch": 4.049967126890204, + "high_lr": 0.00018947368421052632, + "low_lr": 3.789473684210527e-06, + "step": 1540 + }, + { + "epoch": 4.049967126890204, + "high_lr": 0.00018947368421052632, + "low_lr": 3.789473684210527e-06, + "step": 1540 + }, + { + "epoch": 4.049967126890204, + "high_lr": 0.00018947368421052632, + "low_lr": 3.789473684210527e-06, + "step": 1540 + }, + { + "epoch": 4.049967126890204, + "high_lr": 0.00018947368421052632, + "low_lr": 3.789473684210527e-06, + "step": 1540 + }, + { + "epoch": 4.049967126890204, + "high_lr": 0.00018947368421052632, + "low_lr": 3.789473684210527e-06, + "step": 1540 + }, + { + "epoch": 4.049967126890204, + "high_lr": 0.00018947368421052632, + "low_lr": 3.789473684210527e-06, + "step": 1540 + }, + { + "epoch": 4.052596975673898, + "grad_norm": 1.7779476642608643, + "learning_rate": 0.00018894736842105264, + "loss": 1.2194, + "step": 1541 + }, + { + "epoch": 4.052596975673898, + "high_lr": 0.00018894736842105264, + "low_lr": 3.778947368421053e-06, + "step": 1541 + }, + { + "epoch": 4.052596975673898, + "high_lr": 0.00018894736842105264, + "low_lr": 3.778947368421053e-06, + "step": 1541 + }, + { + "epoch": 4.052596975673898, + "high_lr": 0.00018894736842105264, + "low_lr": 3.778947368421053e-06, + "step": 1541 + }, + { + "epoch": 4.052596975673898, + "high_lr": 0.00018894736842105264, + "low_lr": 3.778947368421053e-06, + "step": 1541 + }, + { + "epoch": 4.052596975673898, + "high_lr": 0.00018894736842105264, + "low_lr": 3.778947368421053e-06, + "step": 1541 + }, + { + "epoch": 4.052596975673898, + "high_lr": 0.00018894736842105264, + "low_lr": 3.778947368421053e-06, + "step": 1541 + }, + { + "epoch": 4.052596975673898, + "high_lr": 0.00018894736842105264, + "low_lr": 3.778947368421053e-06, + "step": 1541 + }, + { + "epoch": 4.052596975673898, + "high_lr": 0.00018894736842105264, + "low_lr": 3.778947368421053e-06, + "step": 1541 + }, + { + "epoch": 4.055226824457594, + "grad_norm": 1.5357064008712769, + "learning_rate": 0.00018842105263157895, + "loss": 1.2305, + "step": 1542 + }, + { + "epoch": 4.055226824457594, + "high_lr": 0.00018842105263157895, + "low_lr": 3.768421052631579e-06, + "step": 1542 + }, + { + "epoch": 4.055226824457594, + "high_lr": 0.00018842105263157895, + "low_lr": 3.768421052631579e-06, + "step": 1542 + }, + { + "epoch": 4.055226824457594, + "high_lr": 0.00018842105263157895, + "low_lr": 3.768421052631579e-06, + "step": 1542 + }, + { + "epoch": 4.055226824457594, + "high_lr": 0.00018842105263157895, + "low_lr": 3.768421052631579e-06, + "step": 1542 + }, + { + "epoch": 4.055226824457594, + "high_lr": 0.00018842105263157895, + "low_lr": 3.768421052631579e-06, + "step": 1542 + }, + { + "epoch": 4.055226824457594, + "high_lr": 0.00018842105263157895, + "low_lr": 3.768421052631579e-06, + "step": 1542 + }, + { + "epoch": 4.055226824457594, + "high_lr": 0.00018842105263157895, + "low_lr": 3.768421052631579e-06, + "step": 1542 + }, + { + "epoch": 4.055226824457594, + "high_lr": 0.00018842105263157895, + "low_lr": 3.768421052631579e-06, + "step": 1542 + }, + { + "epoch": 4.057856673241289, + "grad_norm": 1.4252111911773682, + "learning_rate": 0.00018789473684210526, + "loss": 1.2283, + "step": 1543 + }, + { + "epoch": 4.057856673241289, + "high_lr": 0.00018789473684210526, + "low_lr": 3.7578947368421053e-06, + "step": 1543 + }, + { + "epoch": 4.057856673241289, + "high_lr": 0.00018789473684210526, + "low_lr": 3.7578947368421053e-06, + "step": 1543 + }, + { + "epoch": 4.057856673241289, + "high_lr": 0.00018789473684210526, + "low_lr": 3.7578947368421053e-06, + "step": 1543 + }, + { + "epoch": 4.057856673241289, + "high_lr": 0.00018789473684210526, + "low_lr": 3.7578947368421053e-06, + "step": 1543 + }, + { + "epoch": 4.057856673241289, + "high_lr": 0.00018789473684210526, + "low_lr": 3.7578947368421053e-06, + "step": 1543 + }, + { + "epoch": 4.057856673241289, + "high_lr": 0.00018789473684210526, + "low_lr": 3.7578947368421053e-06, + "step": 1543 + }, + { + "epoch": 4.057856673241289, + "high_lr": 0.00018789473684210526, + "low_lr": 3.7578947368421053e-06, + "step": 1543 + }, + { + "epoch": 4.057856673241289, + "high_lr": 0.00018789473684210526, + "low_lr": 3.7578947368421053e-06, + "step": 1543 + }, + { + "epoch": 4.060486522024983, + "grad_norm": 1.531919240951538, + "learning_rate": 0.00018736842105263158, + "loss": 1.2027, + "step": 1544 + }, + { + "epoch": 4.060486522024983, + "high_lr": 0.00018736842105263158, + "low_lr": 3.7473684210526317e-06, + "step": 1544 + }, + { + "epoch": 4.060486522024983, + "high_lr": 0.00018736842105263158, + "low_lr": 3.7473684210526317e-06, + "step": 1544 + }, + { + "epoch": 4.060486522024983, + "high_lr": 0.00018736842105263158, + "low_lr": 3.7473684210526317e-06, + "step": 1544 + }, + { + "epoch": 4.060486522024983, + "high_lr": 0.00018736842105263158, + "low_lr": 3.7473684210526317e-06, + "step": 1544 + }, + { + "epoch": 4.060486522024983, + "high_lr": 0.00018736842105263158, + "low_lr": 3.7473684210526317e-06, + "step": 1544 + }, + { + "epoch": 4.060486522024983, + "high_lr": 0.00018736842105263158, + "low_lr": 3.7473684210526317e-06, + "step": 1544 + }, + { + "epoch": 4.060486522024983, + "high_lr": 0.00018736842105263158, + "low_lr": 3.7473684210526317e-06, + "step": 1544 + }, + { + "epoch": 4.060486522024983, + "high_lr": 0.00018736842105263158, + "low_lr": 3.7473684210526317e-06, + "step": 1544 + }, + { + "epoch": 4.063116370808679, + "grad_norm": 1.4464205503463745, + "learning_rate": 0.0001868421052631579, + "loss": 1.184, + "step": 1545 + }, + { + "epoch": 4.063116370808679, + "high_lr": 0.0001868421052631579, + "low_lr": 3.736842105263158e-06, + "step": 1545 + }, + { + "epoch": 4.063116370808679, + "high_lr": 0.0001868421052631579, + "low_lr": 3.736842105263158e-06, + "step": 1545 + }, + { + "epoch": 4.063116370808679, + "high_lr": 0.0001868421052631579, + "low_lr": 3.736842105263158e-06, + "step": 1545 + }, + { + "epoch": 4.063116370808679, + "high_lr": 0.0001868421052631579, + "low_lr": 3.736842105263158e-06, + "step": 1545 + }, + { + "epoch": 4.063116370808679, + "high_lr": 0.0001868421052631579, + "low_lr": 3.736842105263158e-06, + "step": 1545 + }, + { + "epoch": 4.063116370808679, + "high_lr": 0.0001868421052631579, + "low_lr": 3.736842105263158e-06, + "step": 1545 + }, + { + "epoch": 4.063116370808679, + "high_lr": 0.0001868421052631579, + "low_lr": 3.736842105263158e-06, + "step": 1545 + }, + { + "epoch": 4.063116370808679, + "high_lr": 0.0001868421052631579, + "low_lr": 3.736842105263158e-06, + "step": 1545 + }, + { + "epoch": 4.0657462195923735, + "grad_norm": 1.3648449182510376, + "learning_rate": 0.00018631578947368423, + "loss": 1.2097, + "step": 1546 + }, + { + "epoch": 4.0657462195923735, + "high_lr": 0.00018631578947368423, + "low_lr": 3.7263157894736848e-06, + "step": 1546 + }, + { + "epoch": 4.0657462195923735, + "high_lr": 0.00018631578947368423, + "low_lr": 3.7263157894736848e-06, + "step": 1546 + }, + { + "epoch": 4.0657462195923735, + "high_lr": 0.00018631578947368423, + "low_lr": 3.7263157894736848e-06, + "step": 1546 + }, + { + "epoch": 4.0657462195923735, + "high_lr": 0.00018631578947368423, + "low_lr": 3.7263157894736848e-06, + "step": 1546 + }, + { + "epoch": 4.0657462195923735, + "high_lr": 0.00018631578947368423, + "low_lr": 3.7263157894736848e-06, + "step": 1546 + }, + { + "epoch": 4.0657462195923735, + "high_lr": 0.00018631578947368423, + "low_lr": 3.7263157894736848e-06, + "step": 1546 + }, + { + "epoch": 4.0657462195923735, + "high_lr": 0.00018631578947368423, + "low_lr": 3.7263157894736848e-06, + "step": 1546 + }, + { + "epoch": 4.0657462195923735, + "high_lr": 0.00018631578947368423, + "low_lr": 3.7263157894736848e-06, + "step": 1546 + }, + { + "epoch": 4.068376068376068, + "grad_norm": 1.4777864217758179, + "learning_rate": 0.00018578947368421054, + "loss": 1.2012, + "step": 1547 + }, + { + "epoch": 4.068376068376068, + "high_lr": 0.00018578947368421054, + "low_lr": 3.715789473684211e-06, + "step": 1547 + }, + { + "epoch": 4.068376068376068, + "high_lr": 0.00018578947368421054, + "low_lr": 3.715789473684211e-06, + "step": 1547 + }, + { + "epoch": 4.068376068376068, + "high_lr": 0.00018578947368421054, + "low_lr": 3.715789473684211e-06, + "step": 1547 + }, + { + "epoch": 4.068376068376068, + "high_lr": 0.00018578947368421054, + "low_lr": 3.715789473684211e-06, + "step": 1547 + }, + { + "epoch": 4.068376068376068, + "high_lr": 0.00018578947368421054, + "low_lr": 3.715789473684211e-06, + "step": 1547 + }, + { + "epoch": 4.068376068376068, + "high_lr": 0.00018578947368421054, + "low_lr": 3.715789473684211e-06, + "step": 1547 + }, + { + "epoch": 4.068376068376068, + "high_lr": 0.00018578947368421054, + "low_lr": 3.715789473684211e-06, + "step": 1547 + }, + { + "epoch": 4.068376068376068, + "high_lr": 0.00018578947368421054, + "low_lr": 3.715789473684211e-06, + "step": 1547 + }, + { + "epoch": 4.071005917159764, + "grad_norm": 1.5571383237838745, + "learning_rate": 0.00018526315789473685, + "loss": 1.2002, + "step": 1548 + }, + { + "epoch": 4.071005917159764, + "high_lr": 0.00018526315789473685, + "low_lr": 3.7052631578947374e-06, + "step": 1548 + }, + { + "epoch": 4.071005917159764, + "high_lr": 0.00018526315789473685, + "low_lr": 3.7052631578947374e-06, + "step": 1548 + }, + { + "epoch": 4.071005917159764, + "high_lr": 0.00018526315789473685, + "low_lr": 3.7052631578947374e-06, + "step": 1548 + }, + { + "epoch": 4.071005917159764, + "high_lr": 0.00018526315789473685, + "low_lr": 3.7052631578947374e-06, + "step": 1548 + }, + { + "epoch": 4.071005917159764, + "high_lr": 0.00018526315789473685, + "low_lr": 3.7052631578947374e-06, + "step": 1548 + }, + { + "epoch": 4.071005917159764, + "high_lr": 0.00018526315789473685, + "low_lr": 3.7052631578947374e-06, + "step": 1548 + }, + { + "epoch": 4.071005917159764, + "high_lr": 0.00018526315789473685, + "low_lr": 3.7052631578947374e-06, + "step": 1548 + }, + { + "epoch": 4.071005917159764, + "high_lr": 0.00018526315789473685, + "low_lr": 3.7052631578947374e-06, + "step": 1548 + }, + { + "epoch": 4.073635765943458, + "grad_norm": 1.5273700952529907, + "learning_rate": 0.00018473684210526317, + "loss": 1.1797, + "step": 1549 + }, + { + "epoch": 4.073635765943458, + "high_lr": 0.00018473684210526317, + "low_lr": 3.6947368421052637e-06, + "step": 1549 + }, + { + "epoch": 4.073635765943458, + "high_lr": 0.00018473684210526317, + "low_lr": 3.6947368421052637e-06, + "step": 1549 + }, + { + "epoch": 4.073635765943458, + "high_lr": 0.00018473684210526317, + "low_lr": 3.6947368421052637e-06, + "step": 1549 + }, + { + "epoch": 4.073635765943458, + "high_lr": 0.00018473684210526317, + "low_lr": 3.6947368421052637e-06, + "step": 1549 + }, + { + "epoch": 4.073635765943458, + "high_lr": 0.00018473684210526317, + "low_lr": 3.6947368421052637e-06, + "step": 1549 + }, + { + "epoch": 4.073635765943458, + "high_lr": 0.00018473684210526317, + "low_lr": 3.6947368421052637e-06, + "step": 1549 + }, + { + "epoch": 4.073635765943458, + "high_lr": 0.00018473684210526317, + "low_lr": 3.6947368421052637e-06, + "step": 1549 + }, + { + "epoch": 4.073635765943458, + "high_lr": 0.00018473684210526317, + "low_lr": 3.6947368421052637e-06, + "step": 1549 + }, + { + "epoch": 4.076265614727153, + "grad_norm": 1.395696759223938, + "learning_rate": 0.00018421052631578948, + "loss": 1.2308, + "step": 1550 + }, + { + "epoch": 4.076265614727153, + "high_lr": 0.00018421052631578948, + "low_lr": 3.6842105263157896e-06, + "step": 1550 + }, + { + "epoch": 4.076265614727153, + "high_lr": 0.00018421052631578948, + "low_lr": 3.6842105263157896e-06, + "step": 1550 + }, + { + "epoch": 4.076265614727153, + "high_lr": 0.00018421052631578948, + "low_lr": 3.6842105263157896e-06, + "step": 1550 + }, + { + "epoch": 4.076265614727153, + "high_lr": 0.00018421052631578948, + "low_lr": 3.6842105263157896e-06, + "step": 1550 + }, + { + "epoch": 4.076265614727153, + "high_lr": 0.00018421052631578948, + "low_lr": 3.6842105263157896e-06, + "step": 1550 + }, + { + "epoch": 4.076265614727153, + "high_lr": 0.00018421052631578948, + "low_lr": 3.6842105263157896e-06, + "step": 1550 + }, + { + "epoch": 4.076265614727153, + "high_lr": 0.00018421052631578948, + "low_lr": 3.6842105263157896e-06, + "step": 1550 + }, + { + "epoch": 4.076265614727153, + "high_lr": 0.00018421052631578948, + "low_lr": 3.6842105263157896e-06, + "step": 1550 + }, + { + "epoch": 4.0788954635108485, + "grad_norm": 1.3241750001907349, + "learning_rate": 0.0001836842105263158, + "loss": 1.2319, + "step": 1551 + }, + { + "epoch": 4.0788954635108485, + "high_lr": 0.0001836842105263158, + "low_lr": 3.673684210526316e-06, + "step": 1551 + }, + { + "epoch": 4.0788954635108485, + "high_lr": 0.0001836842105263158, + "low_lr": 3.673684210526316e-06, + "step": 1551 + }, + { + "epoch": 4.0788954635108485, + "high_lr": 0.0001836842105263158, + "low_lr": 3.673684210526316e-06, + "step": 1551 + }, + { + "epoch": 4.0788954635108485, + "high_lr": 0.0001836842105263158, + "low_lr": 3.673684210526316e-06, + "step": 1551 + }, + { + "epoch": 4.0788954635108485, + "high_lr": 0.0001836842105263158, + "low_lr": 3.673684210526316e-06, + "step": 1551 + }, + { + "epoch": 4.0788954635108485, + "high_lr": 0.0001836842105263158, + "low_lr": 3.673684210526316e-06, + "step": 1551 + }, + { + "epoch": 4.0788954635108485, + "high_lr": 0.0001836842105263158, + "low_lr": 3.673684210526316e-06, + "step": 1551 + }, + { + "epoch": 4.0788954635108485, + "high_lr": 0.0001836842105263158, + "low_lr": 3.673684210526316e-06, + "step": 1551 + }, + { + "epoch": 4.081525312294543, + "grad_norm": 1.4620137214660645, + "learning_rate": 0.0001831578947368421, + "loss": 1.2283, + "step": 1552 + }, + { + "epoch": 4.081525312294543, + "high_lr": 0.0001831578947368421, + "low_lr": 3.6631578947368423e-06, + "step": 1552 + }, + { + "epoch": 4.081525312294543, + "high_lr": 0.0001831578947368421, + "low_lr": 3.6631578947368423e-06, + "step": 1552 + }, + { + "epoch": 4.081525312294543, + "high_lr": 0.0001831578947368421, + "low_lr": 3.6631578947368423e-06, + "step": 1552 + }, + { + "epoch": 4.081525312294543, + "high_lr": 0.0001831578947368421, + "low_lr": 3.6631578947368423e-06, + "step": 1552 + }, + { + "epoch": 4.081525312294543, + "high_lr": 0.0001831578947368421, + "low_lr": 3.6631578947368423e-06, + "step": 1552 + }, + { + "epoch": 4.081525312294543, + "high_lr": 0.0001831578947368421, + "low_lr": 3.6631578947368423e-06, + "step": 1552 + }, + { + "epoch": 4.081525312294543, + "high_lr": 0.0001831578947368421, + "low_lr": 3.6631578947368423e-06, + "step": 1552 + }, + { + "epoch": 4.081525312294543, + "high_lr": 0.0001831578947368421, + "low_lr": 3.6631578947368423e-06, + "step": 1552 + }, + { + "epoch": 4.084155161078238, + "grad_norm": 1.4948234558105469, + "learning_rate": 0.00018263157894736842, + "loss": 1.2602, + "step": 1553 + }, + { + "epoch": 4.084155161078238, + "high_lr": 0.00018263157894736842, + "low_lr": 3.6526315789473686e-06, + "step": 1553 + }, + { + "epoch": 4.084155161078238, + "high_lr": 0.00018263157894736842, + "low_lr": 3.6526315789473686e-06, + "step": 1553 + }, + { + "epoch": 4.084155161078238, + "high_lr": 0.00018263157894736842, + "low_lr": 3.6526315789473686e-06, + "step": 1553 + }, + { + "epoch": 4.084155161078238, + "high_lr": 0.00018263157894736842, + "low_lr": 3.6526315789473686e-06, + "step": 1553 + }, + { + "epoch": 4.084155161078238, + "high_lr": 0.00018263157894736842, + "low_lr": 3.6526315789473686e-06, + "step": 1553 + }, + { + "epoch": 4.084155161078238, + "high_lr": 0.00018263157894736842, + "low_lr": 3.6526315789473686e-06, + "step": 1553 + }, + { + "epoch": 4.084155161078238, + "high_lr": 0.00018263157894736842, + "low_lr": 3.6526315789473686e-06, + "step": 1553 + }, + { + "epoch": 4.084155161078238, + "high_lr": 0.00018263157894736842, + "low_lr": 3.6526315789473686e-06, + "step": 1553 + }, + { + "epoch": 4.0867850098619325, + "grad_norm": 1.5336121320724487, + "learning_rate": 0.00018210526315789476, + "loss": 1.1819, + "step": 1554 + }, + { + "epoch": 4.0867850098619325, + "high_lr": 0.00018210526315789476, + "low_lr": 3.642105263157895e-06, + "step": 1554 + }, + { + "epoch": 4.0867850098619325, + "high_lr": 0.00018210526315789476, + "low_lr": 3.642105263157895e-06, + "step": 1554 + }, + { + "epoch": 4.0867850098619325, + "high_lr": 0.00018210526315789476, + "low_lr": 3.642105263157895e-06, + "step": 1554 + }, + { + "epoch": 4.0867850098619325, + "high_lr": 0.00018210526315789476, + "low_lr": 3.642105263157895e-06, + "step": 1554 + }, + { + "epoch": 4.0867850098619325, + "high_lr": 0.00018210526315789476, + "low_lr": 3.642105263157895e-06, + "step": 1554 + }, + { + "epoch": 4.0867850098619325, + "high_lr": 0.00018210526315789476, + "low_lr": 3.642105263157895e-06, + "step": 1554 + }, + { + "epoch": 4.0867850098619325, + "high_lr": 0.00018210526315789476, + "low_lr": 3.642105263157895e-06, + "step": 1554 + }, + { + "epoch": 4.0867850098619325, + "high_lr": 0.00018210526315789476, + "low_lr": 3.642105263157895e-06, + "step": 1554 + }, + { + "epoch": 4.089414858645628, + "grad_norm": 1.4120430946350098, + "learning_rate": 0.00018157894736842107, + "loss": 1.2153, + "step": 1555 + }, + { + "epoch": 4.089414858645628, + "high_lr": 0.00018157894736842107, + "low_lr": 3.6315789473684217e-06, + "step": 1555 + }, + { + "epoch": 4.089414858645628, + "high_lr": 0.00018157894736842107, + "low_lr": 3.6315789473684217e-06, + "step": 1555 + }, + { + "epoch": 4.089414858645628, + "high_lr": 0.00018157894736842107, + "low_lr": 3.6315789473684217e-06, + "step": 1555 + }, + { + "epoch": 4.089414858645628, + "high_lr": 0.00018157894736842107, + "low_lr": 3.6315789473684217e-06, + "step": 1555 + }, + { + "epoch": 4.089414858645628, + "high_lr": 0.00018157894736842107, + "low_lr": 3.6315789473684217e-06, + "step": 1555 + }, + { + "epoch": 4.089414858645628, + "high_lr": 0.00018157894736842107, + "low_lr": 3.6315789473684217e-06, + "step": 1555 + }, + { + "epoch": 4.089414858645628, + "high_lr": 0.00018157894736842107, + "low_lr": 3.6315789473684217e-06, + "step": 1555 + }, + { + "epoch": 4.089414858645628, + "high_lr": 0.00018157894736842107, + "low_lr": 3.6315789473684217e-06, + "step": 1555 + }, + { + "epoch": 4.092044707429323, + "grad_norm": 1.483199954032898, + "learning_rate": 0.00018105263157894739, + "loss": 1.2308, + "step": 1556 + }, + { + "epoch": 4.092044707429323, + "high_lr": 0.00018105263157894739, + "low_lr": 3.621052631578948e-06, + "step": 1556 + }, + { + "epoch": 4.092044707429323, + "high_lr": 0.00018105263157894739, + "low_lr": 3.621052631578948e-06, + "step": 1556 + }, + { + "epoch": 4.092044707429323, + "high_lr": 0.00018105263157894739, + "low_lr": 3.621052631578948e-06, + "step": 1556 + }, + { + "epoch": 4.092044707429323, + "high_lr": 0.00018105263157894739, + "low_lr": 3.621052631578948e-06, + "step": 1556 + }, + { + "epoch": 4.092044707429323, + "high_lr": 0.00018105263157894739, + "low_lr": 3.621052631578948e-06, + "step": 1556 + }, + { + "epoch": 4.092044707429323, + "high_lr": 0.00018105263157894739, + "low_lr": 3.621052631578948e-06, + "step": 1556 + }, + { + "epoch": 4.092044707429323, + "high_lr": 0.00018105263157894739, + "low_lr": 3.621052631578948e-06, + "step": 1556 + }, + { + "epoch": 4.092044707429323, + "high_lr": 0.00018105263157894739, + "low_lr": 3.621052631578948e-06, + "step": 1556 + }, + { + "epoch": 4.094674556213017, + "grad_norm": 1.3914930820465088, + "learning_rate": 0.00018052631578947367, + "loss": 1.1732, + "step": 1557 + }, + { + "epoch": 4.094674556213017, + "high_lr": 0.00018052631578947367, + "low_lr": 3.610526315789474e-06, + "step": 1557 + }, + { + "epoch": 4.094674556213017, + "high_lr": 0.00018052631578947367, + "low_lr": 3.610526315789474e-06, + "step": 1557 + }, + { + "epoch": 4.094674556213017, + "high_lr": 0.00018052631578947367, + "low_lr": 3.610526315789474e-06, + "step": 1557 + }, + { + "epoch": 4.094674556213017, + "high_lr": 0.00018052631578947367, + "low_lr": 3.610526315789474e-06, + "step": 1557 + }, + { + "epoch": 4.094674556213017, + "high_lr": 0.00018052631578947367, + "low_lr": 3.610526315789474e-06, + "step": 1557 + }, + { + "epoch": 4.094674556213017, + "high_lr": 0.00018052631578947367, + "low_lr": 3.610526315789474e-06, + "step": 1557 + }, + { + "epoch": 4.094674556213017, + "high_lr": 0.00018052631578947367, + "low_lr": 3.610526315789474e-06, + "step": 1557 + }, + { + "epoch": 4.094674556213017, + "high_lr": 0.00018052631578947367, + "low_lr": 3.610526315789474e-06, + "step": 1557 + }, + { + "epoch": 4.097304404996713, + "grad_norm": 1.4019222259521484, + "learning_rate": 0.00017999999999999998, + "loss": 1.225, + "step": 1558 + }, + { + "epoch": 4.097304404996713, + "high_lr": 0.00017999999999999998, + "low_lr": 3.6000000000000003e-06, + "step": 1558 + }, + { + "epoch": 4.097304404996713, + "high_lr": 0.00017999999999999998, + "low_lr": 3.6000000000000003e-06, + "step": 1558 + }, + { + "epoch": 4.097304404996713, + "high_lr": 0.00017999999999999998, + "low_lr": 3.6000000000000003e-06, + "step": 1558 + }, + { + "epoch": 4.097304404996713, + "high_lr": 0.00017999999999999998, + "low_lr": 3.6000000000000003e-06, + "step": 1558 + }, + { + "epoch": 4.097304404996713, + "high_lr": 0.00017999999999999998, + "low_lr": 3.6000000000000003e-06, + "step": 1558 + }, + { + "epoch": 4.097304404996713, + "high_lr": 0.00017999999999999998, + "low_lr": 3.6000000000000003e-06, + "step": 1558 + }, + { + "epoch": 4.097304404996713, + "high_lr": 0.00017999999999999998, + "low_lr": 3.6000000000000003e-06, + "step": 1558 + }, + { + "epoch": 4.097304404996713, + "high_lr": 0.00017999999999999998, + "low_lr": 3.6000000000000003e-06, + "step": 1558 + }, + { + "epoch": 4.0999342537804075, + "grad_norm": 1.376409888267517, + "learning_rate": 0.00017947368421052632, + "loss": 1.2313, + "step": 1559 + }, + { + "epoch": 4.0999342537804075, + "high_lr": 0.00017947368421052632, + "low_lr": 3.5894736842105266e-06, + "step": 1559 + }, + { + "epoch": 4.0999342537804075, + "high_lr": 0.00017947368421052632, + "low_lr": 3.5894736842105266e-06, + "step": 1559 + }, + { + "epoch": 4.0999342537804075, + "high_lr": 0.00017947368421052632, + "low_lr": 3.5894736842105266e-06, + "step": 1559 + }, + { + "epoch": 4.0999342537804075, + "high_lr": 0.00017947368421052632, + "low_lr": 3.5894736842105266e-06, + "step": 1559 + }, + { + "epoch": 4.0999342537804075, + "high_lr": 0.00017947368421052632, + "low_lr": 3.5894736842105266e-06, + "step": 1559 + }, + { + "epoch": 4.0999342537804075, + "high_lr": 0.00017947368421052632, + "low_lr": 3.5894736842105266e-06, + "step": 1559 + }, + { + "epoch": 4.0999342537804075, + "high_lr": 0.00017947368421052632, + "low_lr": 3.5894736842105266e-06, + "step": 1559 + }, + { + "epoch": 4.0999342537804075, + "high_lr": 0.00017947368421052632, + "low_lr": 3.5894736842105266e-06, + "step": 1559 + }, + { + "epoch": 4.102564102564102, + "grad_norm": 1.4729450941085815, + "learning_rate": 0.00017894736842105264, + "loss": 1.2115, + "step": 1560 + }, + { + "epoch": 4.102564102564102, + "high_lr": 0.00017894736842105264, + "low_lr": 3.578947368421053e-06, + "step": 1560 + }, + { + "epoch": 4.102564102564102, + "high_lr": 0.00017894736842105264, + "low_lr": 3.578947368421053e-06, + "step": 1560 + }, + { + "epoch": 4.102564102564102, + "high_lr": 0.00017894736842105264, + "low_lr": 3.578947368421053e-06, + "step": 1560 + }, + { + "epoch": 4.102564102564102, + "high_lr": 0.00017894736842105264, + "low_lr": 3.578947368421053e-06, + "step": 1560 + }, + { + "epoch": 4.102564102564102, + "high_lr": 0.00017894736842105264, + "low_lr": 3.578947368421053e-06, + "step": 1560 + }, + { + "epoch": 4.102564102564102, + "high_lr": 0.00017894736842105264, + "low_lr": 3.578947368421053e-06, + "step": 1560 + }, + { + "epoch": 4.102564102564102, + "high_lr": 0.00017894736842105264, + "low_lr": 3.578947368421053e-06, + "step": 1560 + }, + { + "epoch": 4.102564102564102, + "high_lr": 0.00017894736842105264, + "low_lr": 3.578947368421053e-06, + "step": 1560 + }, + { + "epoch": 4.105193951347798, + "grad_norm": 1.512831449508667, + "learning_rate": 0.00017842105263157895, + "loss": 1.2355, + "step": 1561 + }, + { + "epoch": 4.105193951347798, + "high_lr": 0.00017842105263157895, + "low_lr": 3.5684210526315792e-06, + "step": 1561 + }, + { + "epoch": 4.105193951347798, + "high_lr": 0.00017842105263157895, + "low_lr": 3.5684210526315792e-06, + "step": 1561 + }, + { + "epoch": 4.105193951347798, + "high_lr": 0.00017842105263157895, + "low_lr": 3.5684210526315792e-06, + "step": 1561 + }, + { + "epoch": 4.105193951347798, + "high_lr": 0.00017842105263157895, + "low_lr": 3.5684210526315792e-06, + "step": 1561 + }, + { + "epoch": 4.105193951347798, + "high_lr": 0.00017842105263157895, + "low_lr": 3.5684210526315792e-06, + "step": 1561 + }, + { + "epoch": 4.105193951347798, + "high_lr": 0.00017842105263157895, + "low_lr": 3.5684210526315792e-06, + "step": 1561 + }, + { + "epoch": 4.105193951347798, + "high_lr": 0.00017842105263157895, + "low_lr": 3.5684210526315792e-06, + "step": 1561 + }, + { + "epoch": 4.105193951347798, + "high_lr": 0.00017842105263157895, + "low_lr": 3.5684210526315792e-06, + "step": 1561 + }, + { + "epoch": 4.107823800131492, + "grad_norm": 1.4762496948242188, + "learning_rate": 0.00017789473684210526, + "loss": 1.2217, + "step": 1562 + }, + { + "epoch": 4.107823800131492, + "high_lr": 0.00017789473684210526, + "low_lr": 3.5578947368421056e-06, + "step": 1562 + }, + { + "epoch": 4.107823800131492, + "high_lr": 0.00017789473684210526, + "low_lr": 3.5578947368421056e-06, + "step": 1562 + }, + { + "epoch": 4.107823800131492, + "high_lr": 0.00017789473684210526, + "low_lr": 3.5578947368421056e-06, + "step": 1562 + }, + { + "epoch": 4.107823800131492, + "high_lr": 0.00017789473684210526, + "low_lr": 3.5578947368421056e-06, + "step": 1562 + }, + { + "epoch": 4.107823800131492, + "high_lr": 0.00017789473684210526, + "low_lr": 3.5578947368421056e-06, + "step": 1562 + }, + { + "epoch": 4.107823800131492, + "high_lr": 0.00017789473684210526, + "low_lr": 3.5578947368421056e-06, + "step": 1562 + }, + { + "epoch": 4.107823800131492, + "high_lr": 0.00017789473684210526, + "low_lr": 3.5578947368421056e-06, + "step": 1562 + }, + { + "epoch": 4.107823800131492, + "high_lr": 0.00017789473684210526, + "low_lr": 3.5578947368421056e-06, + "step": 1562 + }, + { + "epoch": 4.110453648915187, + "grad_norm": 1.7933307886123657, + "learning_rate": 0.0001773684210526316, + "loss": 1.2195, + "step": 1563 + }, + { + "epoch": 4.110453648915187, + "high_lr": 0.0001773684210526316, + "low_lr": 3.5473684210526323e-06, + "step": 1563 + }, + { + "epoch": 4.110453648915187, + "high_lr": 0.0001773684210526316, + "low_lr": 3.5473684210526323e-06, + "step": 1563 + }, + { + "epoch": 4.110453648915187, + "high_lr": 0.0001773684210526316, + "low_lr": 3.5473684210526323e-06, + "step": 1563 + }, + { + "epoch": 4.110453648915187, + "high_lr": 0.0001773684210526316, + "low_lr": 3.5473684210526323e-06, + "step": 1563 + }, + { + "epoch": 4.110453648915187, + "high_lr": 0.0001773684210526316, + "low_lr": 3.5473684210526323e-06, + "step": 1563 + }, + { + "epoch": 4.110453648915187, + "high_lr": 0.0001773684210526316, + "low_lr": 3.5473684210526323e-06, + "step": 1563 + }, + { + "epoch": 4.110453648915187, + "high_lr": 0.0001773684210526316, + "low_lr": 3.5473684210526323e-06, + "step": 1563 + }, + { + "epoch": 4.110453648915187, + "high_lr": 0.0001773684210526316, + "low_lr": 3.5473684210526323e-06, + "step": 1563 + }, + { + "epoch": 4.113083497698883, + "grad_norm": 1.4589201211929321, + "learning_rate": 0.0001768421052631579, + "loss": 1.259, + "step": 1564 + }, + { + "epoch": 4.113083497698883, + "high_lr": 0.0001768421052631579, + "low_lr": 3.536842105263158e-06, + "step": 1564 + }, + { + "epoch": 4.113083497698883, + "high_lr": 0.0001768421052631579, + "low_lr": 3.536842105263158e-06, + "step": 1564 + }, + { + "epoch": 4.113083497698883, + "high_lr": 0.0001768421052631579, + "low_lr": 3.536842105263158e-06, + "step": 1564 + }, + { + "epoch": 4.113083497698883, + "high_lr": 0.0001768421052631579, + "low_lr": 3.536842105263158e-06, + "step": 1564 + }, + { + "epoch": 4.113083497698883, + "high_lr": 0.0001768421052631579, + "low_lr": 3.536842105263158e-06, + "step": 1564 + }, + { + "epoch": 4.113083497698883, + "high_lr": 0.0001768421052631579, + "low_lr": 3.536842105263158e-06, + "step": 1564 + }, + { + "epoch": 4.113083497698883, + "high_lr": 0.0001768421052631579, + "low_lr": 3.536842105263158e-06, + "step": 1564 + }, + { + "epoch": 4.113083497698883, + "high_lr": 0.0001768421052631579, + "low_lr": 3.536842105263158e-06, + "step": 1564 + }, + { + "epoch": 4.115713346482577, + "grad_norm": 1.5560483932495117, + "learning_rate": 0.0001763157894736842, + "loss": 1.2098, + "step": 1565 + }, + { + "epoch": 4.115713346482577, + "high_lr": 0.0001763157894736842, + "low_lr": 3.5263157894736846e-06, + "step": 1565 + }, + { + "epoch": 4.115713346482577, + "high_lr": 0.0001763157894736842, + "low_lr": 3.5263157894736846e-06, + "step": 1565 + }, + { + "epoch": 4.115713346482577, + "high_lr": 0.0001763157894736842, + "low_lr": 3.5263157894736846e-06, + "step": 1565 + }, + { + "epoch": 4.115713346482577, + "high_lr": 0.0001763157894736842, + "low_lr": 3.5263157894736846e-06, + "step": 1565 + }, + { + "epoch": 4.115713346482577, + "high_lr": 0.0001763157894736842, + "low_lr": 3.5263157894736846e-06, + "step": 1565 + }, + { + "epoch": 4.115713346482577, + "high_lr": 0.0001763157894736842, + "low_lr": 3.5263157894736846e-06, + "step": 1565 + }, + { + "epoch": 4.115713346482577, + "high_lr": 0.0001763157894736842, + "low_lr": 3.5263157894736846e-06, + "step": 1565 + }, + { + "epoch": 4.115713346482577, + "high_lr": 0.0001763157894736842, + "low_lr": 3.5263157894736846e-06, + "step": 1565 + }, + { + "epoch": 4.118343195266272, + "grad_norm": 1.5046355724334717, + "learning_rate": 0.00017578947368421052, + "loss": 1.1998, + "step": 1566 + }, + { + "epoch": 4.118343195266272, + "high_lr": 0.00017578947368421052, + "low_lr": 3.515789473684211e-06, + "step": 1566 + }, + { + "epoch": 4.118343195266272, + "high_lr": 0.00017578947368421052, + "low_lr": 3.515789473684211e-06, + "step": 1566 + }, + { + "epoch": 4.118343195266272, + "high_lr": 0.00017578947368421052, + "low_lr": 3.515789473684211e-06, + "step": 1566 + }, + { + "epoch": 4.118343195266272, + "high_lr": 0.00017578947368421052, + "low_lr": 3.515789473684211e-06, + "step": 1566 + }, + { + "epoch": 4.118343195266272, + "high_lr": 0.00017578947368421052, + "low_lr": 3.515789473684211e-06, + "step": 1566 + }, + { + "epoch": 4.118343195266272, + "high_lr": 0.00017578947368421052, + "low_lr": 3.515789473684211e-06, + "step": 1566 + }, + { + "epoch": 4.118343195266272, + "high_lr": 0.00017578947368421052, + "low_lr": 3.515789473684211e-06, + "step": 1566 + }, + { + "epoch": 4.118343195266272, + "high_lr": 0.00017578947368421052, + "low_lr": 3.515789473684211e-06, + "step": 1566 + }, + { + "epoch": 4.1209730440499674, + "grad_norm": 1.4738900661468506, + "learning_rate": 0.00017526315789473686, + "loss": 1.2065, + "step": 1567 + }, + { + "epoch": 4.1209730440499674, + "high_lr": 0.00017526315789473686, + "low_lr": 3.505263157894737e-06, + "step": 1567 + }, + { + "epoch": 4.1209730440499674, + "high_lr": 0.00017526315789473686, + "low_lr": 3.505263157894737e-06, + "step": 1567 + }, + { + "epoch": 4.1209730440499674, + "high_lr": 0.00017526315789473686, + "low_lr": 3.505263157894737e-06, + "step": 1567 + }, + { + "epoch": 4.1209730440499674, + "high_lr": 0.00017526315789473686, + "low_lr": 3.505263157894737e-06, + "step": 1567 + }, + { + "epoch": 4.1209730440499674, + "high_lr": 0.00017526315789473686, + "low_lr": 3.505263157894737e-06, + "step": 1567 + }, + { + "epoch": 4.1209730440499674, + "high_lr": 0.00017526315789473686, + "low_lr": 3.505263157894737e-06, + "step": 1567 + }, + { + "epoch": 4.1209730440499674, + "high_lr": 0.00017526315789473686, + "low_lr": 3.505263157894737e-06, + "step": 1567 + }, + { + "epoch": 4.1209730440499674, + "high_lr": 0.00017526315789473686, + "low_lr": 3.505263157894737e-06, + "step": 1567 + }, + { + "epoch": 4.123602892833662, + "grad_norm": 1.3990614414215088, + "learning_rate": 0.00017473684210526317, + "loss": 1.1996, + "step": 1568 + }, + { + "epoch": 4.123602892833662, + "high_lr": 0.00017473684210526317, + "low_lr": 3.4947368421052635e-06, + "step": 1568 + }, + { + "epoch": 4.123602892833662, + "high_lr": 0.00017473684210526317, + "low_lr": 3.4947368421052635e-06, + "step": 1568 + }, + { + "epoch": 4.123602892833662, + "high_lr": 0.00017473684210526317, + "low_lr": 3.4947368421052635e-06, + "step": 1568 + }, + { + "epoch": 4.123602892833662, + "high_lr": 0.00017473684210526317, + "low_lr": 3.4947368421052635e-06, + "step": 1568 + }, + { + "epoch": 4.123602892833662, + "high_lr": 0.00017473684210526317, + "low_lr": 3.4947368421052635e-06, + "step": 1568 + }, + { + "epoch": 4.123602892833662, + "high_lr": 0.00017473684210526317, + "low_lr": 3.4947368421052635e-06, + "step": 1568 + }, + { + "epoch": 4.123602892833662, + "high_lr": 0.00017473684210526317, + "low_lr": 3.4947368421052635e-06, + "step": 1568 + }, + { + "epoch": 4.123602892833662, + "high_lr": 0.00017473684210526317, + "low_lr": 3.4947368421052635e-06, + "step": 1568 + }, + { + "epoch": 4.126232741617357, + "grad_norm": 1.5344288349151611, + "learning_rate": 0.00017421052631578948, + "loss": 1.2541, + "step": 1569 + }, + { + "epoch": 4.126232741617357, + "high_lr": 0.00017421052631578948, + "low_lr": 3.48421052631579e-06, + "step": 1569 + }, + { + "epoch": 4.126232741617357, + "high_lr": 0.00017421052631578948, + "low_lr": 3.48421052631579e-06, + "step": 1569 + }, + { + "epoch": 4.126232741617357, + "high_lr": 0.00017421052631578948, + "low_lr": 3.48421052631579e-06, + "step": 1569 + }, + { + "epoch": 4.126232741617357, + "high_lr": 0.00017421052631578948, + "low_lr": 3.48421052631579e-06, + "step": 1569 + }, + { + "epoch": 4.126232741617357, + "high_lr": 0.00017421052631578948, + "low_lr": 3.48421052631579e-06, + "step": 1569 + }, + { + "epoch": 4.126232741617357, + "high_lr": 0.00017421052631578948, + "low_lr": 3.48421052631579e-06, + "step": 1569 + }, + { + "epoch": 4.126232741617357, + "high_lr": 0.00017421052631578948, + "low_lr": 3.48421052631579e-06, + "step": 1569 + }, + { + "epoch": 4.126232741617357, + "high_lr": 0.00017421052631578948, + "low_lr": 3.48421052631579e-06, + "step": 1569 + }, + { + "epoch": 4.128862590401052, + "grad_norm": 1.587054967880249, + "learning_rate": 0.0001736842105263158, + "loss": 1.2023, + "step": 1570 + }, + { + "epoch": 4.128862590401052, + "high_lr": 0.0001736842105263158, + "low_lr": 3.473684210526316e-06, + "step": 1570 + }, + { + "epoch": 4.128862590401052, + "high_lr": 0.0001736842105263158, + "low_lr": 3.473684210526316e-06, + "step": 1570 + }, + { + "epoch": 4.128862590401052, + "high_lr": 0.0001736842105263158, + "low_lr": 3.473684210526316e-06, + "step": 1570 + }, + { + "epoch": 4.128862590401052, + "high_lr": 0.0001736842105263158, + "low_lr": 3.473684210526316e-06, + "step": 1570 + }, + { + "epoch": 4.128862590401052, + "high_lr": 0.0001736842105263158, + "low_lr": 3.473684210526316e-06, + "step": 1570 + }, + { + "epoch": 4.128862590401052, + "high_lr": 0.0001736842105263158, + "low_lr": 3.473684210526316e-06, + "step": 1570 + }, + { + "epoch": 4.128862590401052, + "high_lr": 0.0001736842105263158, + "low_lr": 3.473684210526316e-06, + "step": 1570 + }, + { + "epoch": 4.128862590401052, + "high_lr": 0.0001736842105263158, + "low_lr": 3.473684210526316e-06, + "step": 1570 + }, + { + "epoch": 4.131492439184747, + "grad_norm": 1.4721533060073853, + "learning_rate": 0.0001731578947368421, + "loss": 1.2065, + "step": 1571 + }, + { + "epoch": 4.131492439184747, + "high_lr": 0.0001731578947368421, + "low_lr": 3.463157894736842e-06, + "step": 1571 + }, + { + "epoch": 4.131492439184747, + "high_lr": 0.0001731578947368421, + "low_lr": 3.463157894736842e-06, + "step": 1571 + }, + { + "epoch": 4.131492439184747, + "high_lr": 0.0001731578947368421, + "low_lr": 3.463157894736842e-06, + "step": 1571 + }, + { + "epoch": 4.131492439184747, + "high_lr": 0.0001731578947368421, + "low_lr": 3.463157894736842e-06, + "step": 1571 + }, + { + "epoch": 4.131492439184747, + "high_lr": 0.0001731578947368421, + "low_lr": 3.463157894736842e-06, + "step": 1571 + }, + { + "epoch": 4.131492439184747, + "high_lr": 0.0001731578947368421, + "low_lr": 3.463157894736842e-06, + "step": 1571 + }, + { + "epoch": 4.131492439184747, + "high_lr": 0.0001731578947368421, + "low_lr": 3.463157894736842e-06, + "step": 1571 + }, + { + "epoch": 4.131492439184747, + "high_lr": 0.0001731578947368421, + "low_lr": 3.463157894736842e-06, + "step": 1571 + }, + { + "epoch": 4.134122287968442, + "grad_norm": 1.6678563356399536, + "learning_rate": 0.00017263157894736842, + "loss": 1.1925, + "step": 1572 + }, + { + "epoch": 4.134122287968442, + "high_lr": 0.00017263157894736842, + "low_lr": 3.4526315789473684e-06, + "step": 1572 + }, + { + "epoch": 4.134122287968442, + "high_lr": 0.00017263157894736842, + "low_lr": 3.4526315789473684e-06, + "step": 1572 + }, + { + "epoch": 4.134122287968442, + "high_lr": 0.00017263157894736842, + "low_lr": 3.4526315789473684e-06, + "step": 1572 + }, + { + "epoch": 4.134122287968442, + "high_lr": 0.00017263157894736842, + "low_lr": 3.4526315789473684e-06, + "step": 1572 + }, + { + "epoch": 4.134122287968442, + "high_lr": 0.00017263157894736842, + "low_lr": 3.4526315789473684e-06, + "step": 1572 + }, + { + "epoch": 4.134122287968442, + "high_lr": 0.00017263157894736842, + "low_lr": 3.4526315789473684e-06, + "step": 1572 + }, + { + "epoch": 4.134122287968442, + "high_lr": 0.00017263157894736842, + "low_lr": 3.4526315789473684e-06, + "step": 1572 + }, + { + "epoch": 4.134122287968442, + "high_lr": 0.00017263157894736842, + "low_lr": 3.4526315789473684e-06, + "step": 1572 + }, + { + "epoch": 4.136752136752137, + "grad_norm": 1.4178546667099, + "learning_rate": 0.00017210526315789473, + "loss": 1.2316, + "step": 1573 + }, + { + "epoch": 4.136752136752137, + "high_lr": 0.00017210526315789473, + "low_lr": 3.4421052631578947e-06, + "step": 1573 + }, + { + "epoch": 4.136752136752137, + "high_lr": 0.00017210526315789473, + "low_lr": 3.4421052631578947e-06, + "step": 1573 + }, + { + "epoch": 4.136752136752137, + "high_lr": 0.00017210526315789473, + "low_lr": 3.4421052631578947e-06, + "step": 1573 + }, + { + "epoch": 4.136752136752137, + "high_lr": 0.00017210526315789473, + "low_lr": 3.4421052631578947e-06, + "step": 1573 + }, + { + "epoch": 4.136752136752137, + "high_lr": 0.00017210526315789473, + "low_lr": 3.4421052631578947e-06, + "step": 1573 + }, + { + "epoch": 4.136752136752137, + "high_lr": 0.00017210526315789473, + "low_lr": 3.4421052631578947e-06, + "step": 1573 + }, + { + "epoch": 4.136752136752137, + "high_lr": 0.00017210526315789473, + "low_lr": 3.4421052631578947e-06, + "step": 1573 + }, + { + "epoch": 4.136752136752137, + "high_lr": 0.00017210526315789473, + "low_lr": 3.4421052631578947e-06, + "step": 1573 + }, + { + "epoch": 4.139381985535832, + "grad_norm": 1.584417462348938, + "learning_rate": 0.00017157894736842105, + "loss": 1.1981, + "step": 1574 + }, + { + "epoch": 4.139381985535832, + "high_lr": 0.00017157894736842105, + "low_lr": 3.4315789473684215e-06, + "step": 1574 + }, + { + "epoch": 4.139381985535832, + "high_lr": 0.00017157894736842105, + "low_lr": 3.4315789473684215e-06, + "step": 1574 + }, + { + "epoch": 4.139381985535832, + "high_lr": 0.00017157894736842105, + "low_lr": 3.4315789473684215e-06, + "step": 1574 + }, + { + "epoch": 4.139381985535832, + "high_lr": 0.00017157894736842105, + "low_lr": 3.4315789473684215e-06, + "step": 1574 + }, + { + "epoch": 4.139381985535832, + "high_lr": 0.00017157894736842105, + "low_lr": 3.4315789473684215e-06, + "step": 1574 + }, + { + "epoch": 4.139381985535832, + "high_lr": 0.00017157894736842105, + "low_lr": 3.4315789473684215e-06, + "step": 1574 + }, + { + "epoch": 4.139381985535832, + "high_lr": 0.00017157894736842105, + "low_lr": 3.4315789473684215e-06, + "step": 1574 + }, + { + "epoch": 4.139381985535832, + "high_lr": 0.00017157894736842105, + "low_lr": 3.4315789473684215e-06, + "step": 1574 + }, + { + "epoch": 4.1420118343195265, + "grad_norm": 1.5301527976989746, + "learning_rate": 0.00017105263157894739, + "loss": 1.2721, + "step": 1575 + }, + { + "epoch": 4.1420118343195265, + "high_lr": 0.00017105263157894739, + "low_lr": 3.421052631578948e-06, + "step": 1575 + }, + { + "epoch": 4.1420118343195265, + "high_lr": 0.00017105263157894739, + "low_lr": 3.421052631578948e-06, + "step": 1575 + }, + { + "epoch": 4.1420118343195265, + "high_lr": 0.00017105263157894739, + "low_lr": 3.421052631578948e-06, + "step": 1575 + }, + { + "epoch": 4.1420118343195265, + "high_lr": 0.00017105263157894739, + "low_lr": 3.421052631578948e-06, + "step": 1575 + }, + { + "epoch": 4.1420118343195265, + "high_lr": 0.00017105263157894739, + "low_lr": 3.421052631578948e-06, + "step": 1575 + }, + { + "epoch": 4.1420118343195265, + "high_lr": 0.00017105263157894739, + "low_lr": 3.421052631578948e-06, + "step": 1575 + }, + { + "epoch": 4.1420118343195265, + "high_lr": 0.00017105263157894739, + "low_lr": 3.421052631578948e-06, + "step": 1575 + }, + { + "epoch": 4.1420118343195265, + "high_lr": 0.00017105263157894739, + "low_lr": 3.421052631578948e-06, + "step": 1575 + }, + { + "epoch": 4.144641683103222, + "grad_norm": 1.5100499391555786, + "learning_rate": 0.0001705263157894737, + "loss": 1.2152, + "step": 1576 + }, + { + "epoch": 4.144641683103222, + "high_lr": 0.0001705263157894737, + "low_lr": 3.410526315789474e-06, + "step": 1576 + }, + { + "epoch": 4.144641683103222, + "high_lr": 0.0001705263157894737, + "low_lr": 3.410526315789474e-06, + "step": 1576 + }, + { + "epoch": 4.144641683103222, + "high_lr": 0.0001705263157894737, + "low_lr": 3.410526315789474e-06, + "step": 1576 + }, + { + "epoch": 4.144641683103222, + "high_lr": 0.0001705263157894737, + "low_lr": 3.410526315789474e-06, + "step": 1576 + }, + { + "epoch": 4.144641683103222, + "high_lr": 0.0001705263157894737, + "low_lr": 3.410526315789474e-06, + "step": 1576 + }, + { + "epoch": 4.144641683103222, + "high_lr": 0.0001705263157894737, + "low_lr": 3.410526315789474e-06, + "step": 1576 + }, + { + "epoch": 4.144641683103222, + "high_lr": 0.0001705263157894737, + "low_lr": 3.410526315789474e-06, + "step": 1576 + }, + { + "epoch": 4.144641683103222, + "high_lr": 0.0001705263157894737, + "low_lr": 3.410526315789474e-06, + "step": 1576 + }, + { + "epoch": 4.147271531886917, + "grad_norm": 1.4661401510238647, + "learning_rate": 0.00017, + "loss": 1.1896, + "step": 1577 + }, + { + "epoch": 4.147271531886917, + "high_lr": 0.00017, + "low_lr": 3.4000000000000005e-06, + "step": 1577 + }, + { + "epoch": 4.147271531886917, + "high_lr": 0.00017, + "low_lr": 3.4000000000000005e-06, + "step": 1577 + }, + { + "epoch": 4.147271531886917, + "high_lr": 0.00017, + "low_lr": 3.4000000000000005e-06, + "step": 1577 + }, + { + "epoch": 4.147271531886917, + "high_lr": 0.00017, + "low_lr": 3.4000000000000005e-06, + "step": 1577 + }, + { + "epoch": 4.147271531886917, + "high_lr": 0.00017, + "low_lr": 3.4000000000000005e-06, + "step": 1577 + }, + { + "epoch": 4.147271531886917, + "high_lr": 0.00017, + "low_lr": 3.4000000000000005e-06, + "step": 1577 + }, + { + "epoch": 4.147271531886917, + "high_lr": 0.00017, + "low_lr": 3.4000000000000005e-06, + "step": 1577 + }, + { + "epoch": 4.147271531886917, + "high_lr": 0.00017, + "low_lr": 3.4000000000000005e-06, + "step": 1577 + }, + { + "epoch": 4.149901380670611, + "grad_norm": 1.496145248413086, + "learning_rate": 0.0001694736842105263, + "loss": 1.1908, + "step": 1578 + }, + { + "epoch": 4.149901380670611, + "high_lr": 0.0001694736842105263, + "low_lr": 3.3894736842105264e-06, + "step": 1578 + }, + { + "epoch": 4.149901380670611, + "high_lr": 0.0001694736842105263, + "low_lr": 3.3894736842105264e-06, + "step": 1578 + }, + { + "epoch": 4.149901380670611, + "high_lr": 0.0001694736842105263, + "low_lr": 3.3894736842105264e-06, + "step": 1578 + }, + { + "epoch": 4.149901380670611, + "high_lr": 0.0001694736842105263, + "low_lr": 3.3894736842105264e-06, + "step": 1578 + }, + { + "epoch": 4.149901380670611, + "high_lr": 0.0001694736842105263, + "low_lr": 3.3894736842105264e-06, + "step": 1578 + }, + { + "epoch": 4.149901380670611, + "high_lr": 0.0001694736842105263, + "low_lr": 3.3894736842105264e-06, + "step": 1578 + }, + { + "epoch": 4.149901380670611, + "high_lr": 0.0001694736842105263, + "low_lr": 3.3894736842105264e-06, + "step": 1578 + }, + { + "epoch": 4.149901380670611, + "high_lr": 0.0001694736842105263, + "low_lr": 3.3894736842105264e-06, + "step": 1578 + }, + { + "epoch": 4.152531229454306, + "grad_norm": 1.5556325912475586, + "learning_rate": 0.00016894736842105264, + "loss": 1.2083, + "step": 1579 + }, + { + "epoch": 4.152531229454306, + "high_lr": 0.00016894736842105264, + "low_lr": 3.3789473684210527e-06, + "step": 1579 + }, + { + "epoch": 4.152531229454306, + "high_lr": 0.00016894736842105264, + "low_lr": 3.3789473684210527e-06, + "step": 1579 + }, + { + "epoch": 4.152531229454306, + "high_lr": 0.00016894736842105264, + "low_lr": 3.3789473684210527e-06, + "step": 1579 + }, + { + "epoch": 4.152531229454306, + "high_lr": 0.00016894736842105264, + "low_lr": 3.3789473684210527e-06, + "step": 1579 + }, + { + "epoch": 4.152531229454306, + "high_lr": 0.00016894736842105264, + "low_lr": 3.3789473684210527e-06, + "step": 1579 + }, + { + "epoch": 4.152531229454306, + "high_lr": 0.00016894736842105264, + "low_lr": 3.3789473684210527e-06, + "step": 1579 + }, + { + "epoch": 4.152531229454306, + "high_lr": 0.00016894736842105264, + "low_lr": 3.3789473684210527e-06, + "step": 1579 + }, + { + "epoch": 4.152531229454306, + "high_lr": 0.00016894736842105264, + "low_lr": 3.3789473684210527e-06, + "step": 1579 + }, + { + "epoch": 4.1551610782380015, + "grad_norm": 1.4800539016723633, + "learning_rate": 0.00016842105263157895, + "loss": 1.2144, + "step": 1580 + }, + { + "epoch": 4.1551610782380015, + "high_lr": 0.00016842105263157895, + "low_lr": 3.368421052631579e-06, + "step": 1580 + }, + { + "epoch": 4.1551610782380015, + "high_lr": 0.00016842105263157895, + "low_lr": 3.368421052631579e-06, + "step": 1580 + }, + { + "epoch": 4.1551610782380015, + "high_lr": 0.00016842105263157895, + "low_lr": 3.368421052631579e-06, + "step": 1580 + }, + { + "epoch": 4.1551610782380015, + "high_lr": 0.00016842105263157895, + "low_lr": 3.368421052631579e-06, + "step": 1580 + }, + { + "epoch": 4.1551610782380015, + "high_lr": 0.00016842105263157895, + "low_lr": 3.368421052631579e-06, + "step": 1580 + }, + { + "epoch": 4.1551610782380015, + "high_lr": 0.00016842105263157895, + "low_lr": 3.368421052631579e-06, + "step": 1580 + }, + { + "epoch": 4.1551610782380015, + "high_lr": 0.00016842105263157895, + "low_lr": 3.368421052631579e-06, + "step": 1580 + }, + { + "epoch": 4.1551610782380015, + "high_lr": 0.00016842105263157895, + "low_lr": 3.368421052631579e-06, + "step": 1580 + }, + { + "epoch": 4.157790927021696, + "grad_norm": 1.4237494468688965, + "learning_rate": 0.00016789473684210526, + "loss": 1.1848, + "step": 1581 + }, + { + "epoch": 4.157790927021696, + "high_lr": 0.00016789473684210526, + "low_lr": 3.3578947368421054e-06, + "step": 1581 + }, + { + "epoch": 4.157790927021696, + "high_lr": 0.00016789473684210526, + "low_lr": 3.3578947368421054e-06, + "step": 1581 + }, + { + "epoch": 4.157790927021696, + "high_lr": 0.00016789473684210526, + "low_lr": 3.3578947368421054e-06, + "step": 1581 + }, + { + "epoch": 4.157790927021696, + "high_lr": 0.00016789473684210526, + "low_lr": 3.3578947368421054e-06, + "step": 1581 + }, + { + "epoch": 4.157790927021696, + "high_lr": 0.00016789473684210526, + "low_lr": 3.3578947368421054e-06, + "step": 1581 + }, + { + "epoch": 4.157790927021696, + "high_lr": 0.00016789473684210526, + "low_lr": 3.3578947368421054e-06, + "step": 1581 + }, + { + "epoch": 4.157790927021696, + "high_lr": 0.00016789473684210526, + "low_lr": 3.3578947368421054e-06, + "step": 1581 + }, + { + "epoch": 4.157790927021696, + "high_lr": 0.00016789473684210526, + "low_lr": 3.3578947368421054e-06, + "step": 1581 + }, + { + "epoch": 4.160420775805391, + "grad_norm": 1.644124150276184, + "learning_rate": 0.00016736842105263158, + "loss": 1.1958, + "step": 1582 + }, + { + "epoch": 4.160420775805391, + "high_lr": 0.00016736842105263158, + "low_lr": 3.347368421052632e-06, + "step": 1582 + }, + { + "epoch": 4.160420775805391, + "high_lr": 0.00016736842105263158, + "low_lr": 3.347368421052632e-06, + "step": 1582 + }, + { + "epoch": 4.160420775805391, + "high_lr": 0.00016736842105263158, + "low_lr": 3.347368421052632e-06, + "step": 1582 + }, + { + "epoch": 4.160420775805391, + "high_lr": 0.00016736842105263158, + "low_lr": 3.347368421052632e-06, + "step": 1582 + }, + { + "epoch": 4.160420775805391, + "high_lr": 0.00016736842105263158, + "low_lr": 3.347368421052632e-06, + "step": 1582 + }, + { + "epoch": 4.160420775805391, + "high_lr": 0.00016736842105263158, + "low_lr": 3.347368421052632e-06, + "step": 1582 + }, + { + "epoch": 4.160420775805391, + "high_lr": 0.00016736842105263158, + "low_lr": 3.347368421052632e-06, + "step": 1582 + }, + { + "epoch": 4.160420775805391, + "high_lr": 0.00016736842105263158, + "low_lr": 3.347368421052632e-06, + "step": 1582 + }, + { + "epoch": 4.163050624589086, + "grad_norm": 1.4499338865280151, + "learning_rate": 0.00016684210526315792, + "loss": 1.2298, + "step": 1583 + }, + { + "epoch": 4.163050624589086, + "high_lr": 0.00016684210526315792, + "low_lr": 3.3368421052631584e-06, + "step": 1583 + }, + { + "epoch": 4.163050624589086, + "high_lr": 0.00016684210526315792, + "low_lr": 3.3368421052631584e-06, + "step": 1583 + }, + { + "epoch": 4.163050624589086, + "high_lr": 0.00016684210526315792, + "low_lr": 3.3368421052631584e-06, + "step": 1583 + }, + { + "epoch": 4.163050624589086, + "high_lr": 0.00016684210526315792, + "low_lr": 3.3368421052631584e-06, + "step": 1583 + }, + { + "epoch": 4.163050624589086, + "high_lr": 0.00016684210526315792, + "low_lr": 3.3368421052631584e-06, + "step": 1583 + }, + { + "epoch": 4.163050624589086, + "high_lr": 0.00016684210526315792, + "low_lr": 3.3368421052631584e-06, + "step": 1583 + }, + { + "epoch": 4.163050624589086, + "high_lr": 0.00016684210526315792, + "low_lr": 3.3368421052631584e-06, + "step": 1583 + }, + { + "epoch": 4.163050624589086, + "high_lr": 0.00016684210526315792, + "low_lr": 3.3368421052631584e-06, + "step": 1583 + }, + { + "epoch": 4.165680473372781, + "grad_norm": 1.4814369678497314, + "learning_rate": 0.00016631578947368423, + "loss": 1.2453, + "step": 1584 + }, + { + "epoch": 4.165680473372781, + "high_lr": 0.00016631578947368423, + "low_lr": 3.3263157894736848e-06, + "step": 1584 + }, + { + "epoch": 4.165680473372781, + "high_lr": 0.00016631578947368423, + "low_lr": 3.3263157894736848e-06, + "step": 1584 + }, + { + "epoch": 4.165680473372781, + "high_lr": 0.00016631578947368423, + "low_lr": 3.3263157894736848e-06, + "step": 1584 + }, + { + "epoch": 4.165680473372781, + "high_lr": 0.00016631578947368423, + "low_lr": 3.3263157894736848e-06, + "step": 1584 + }, + { + "epoch": 4.165680473372781, + "high_lr": 0.00016631578947368423, + "low_lr": 3.3263157894736848e-06, + "step": 1584 + }, + { + "epoch": 4.165680473372781, + "high_lr": 0.00016631578947368423, + "low_lr": 3.3263157894736848e-06, + "step": 1584 + }, + { + "epoch": 4.165680473372781, + "high_lr": 0.00016631578947368423, + "low_lr": 3.3263157894736848e-06, + "step": 1584 + }, + { + "epoch": 4.165680473372781, + "high_lr": 0.00016631578947368423, + "low_lr": 3.3263157894736848e-06, + "step": 1584 + }, + { + "epoch": 4.168310322156476, + "grad_norm": 1.5331965684890747, + "learning_rate": 0.00016578947368421052, + "loss": 1.2199, + "step": 1585 + }, + { + "epoch": 4.168310322156476, + "high_lr": 0.00016578947368421052, + "low_lr": 3.3157894736842107e-06, + "step": 1585 + }, + { + "epoch": 4.168310322156476, + "high_lr": 0.00016578947368421052, + "low_lr": 3.3157894736842107e-06, + "step": 1585 + }, + { + "epoch": 4.168310322156476, + "high_lr": 0.00016578947368421052, + "low_lr": 3.3157894736842107e-06, + "step": 1585 + }, + { + "epoch": 4.168310322156476, + "high_lr": 0.00016578947368421052, + "low_lr": 3.3157894736842107e-06, + "step": 1585 + }, + { + "epoch": 4.168310322156476, + "high_lr": 0.00016578947368421052, + "low_lr": 3.3157894736842107e-06, + "step": 1585 + }, + { + "epoch": 4.168310322156476, + "high_lr": 0.00016578947368421052, + "low_lr": 3.3157894736842107e-06, + "step": 1585 + }, + { + "epoch": 4.168310322156476, + "high_lr": 0.00016578947368421052, + "low_lr": 3.3157894736842107e-06, + "step": 1585 + }, + { + "epoch": 4.168310322156476, + "high_lr": 0.00016578947368421052, + "low_lr": 3.3157894736842107e-06, + "step": 1585 + }, + { + "epoch": 4.170940170940171, + "grad_norm": 1.4219096899032593, + "learning_rate": 0.00016526315789473683, + "loss": 1.202, + "step": 1586 + }, + { + "epoch": 4.170940170940171, + "high_lr": 0.00016526315789473683, + "low_lr": 3.305263157894737e-06, + "step": 1586 + }, + { + "epoch": 4.170940170940171, + "high_lr": 0.00016526315789473683, + "low_lr": 3.305263157894737e-06, + "step": 1586 + }, + { + "epoch": 4.170940170940171, + "high_lr": 0.00016526315789473683, + "low_lr": 3.305263157894737e-06, + "step": 1586 + }, + { + "epoch": 4.170940170940171, + "high_lr": 0.00016526315789473683, + "low_lr": 3.305263157894737e-06, + "step": 1586 + }, + { + "epoch": 4.170940170940171, + "high_lr": 0.00016526315789473683, + "low_lr": 3.305263157894737e-06, + "step": 1586 + }, + { + "epoch": 4.170940170940171, + "high_lr": 0.00016526315789473683, + "low_lr": 3.305263157894737e-06, + "step": 1586 + }, + { + "epoch": 4.170940170940171, + "high_lr": 0.00016526315789473683, + "low_lr": 3.305263157894737e-06, + "step": 1586 + }, + { + "epoch": 4.170940170940171, + "high_lr": 0.00016526315789473683, + "low_lr": 3.305263157894737e-06, + "step": 1586 + }, + { + "epoch": 4.173570019723866, + "grad_norm": 1.4696342945098877, + "learning_rate": 0.00016473684210526317, + "loss": 1.2507, + "step": 1587 + }, + { + "epoch": 4.173570019723866, + "high_lr": 0.00016473684210526317, + "low_lr": 3.2947368421052633e-06, + "step": 1587 + }, + { + "epoch": 4.173570019723866, + "high_lr": 0.00016473684210526317, + "low_lr": 3.2947368421052633e-06, + "step": 1587 + }, + { + "epoch": 4.173570019723866, + "high_lr": 0.00016473684210526317, + "low_lr": 3.2947368421052633e-06, + "step": 1587 + }, + { + "epoch": 4.173570019723866, + "high_lr": 0.00016473684210526317, + "low_lr": 3.2947368421052633e-06, + "step": 1587 + }, + { + "epoch": 4.173570019723866, + "high_lr": 0.00016473684210526317, + "low_lr": 3.2947368421052633e-06, + "step": 1587 + }, + { + "epoch": 4.173570019723866, + "high_lr": 0.00016473684210526317, + "low_lr": 3.2947368421052633e-06, + "step": 1587 + }, + { + "epoch": 4.173570019723866, + "high_lr": 0.00016473684210526317, + "low_lr": 3.2947368421052633e-06, + "step": 1587 + }, + { + "epoch": 4.173570019723866, + "high_lr": 0.00016473684210526317, + "low_lr": 3.2947368421052633e-06, + "step": 1587 + }, + { + "epoch": 4.1761998685075605, + "grad_norm": 1.5482137203216553, + "learning_rate": 0.00016421052631578948, + "loss": 1.2084, + "step": 1588 + }, + { + "epoch": 4.1761998685075605, + "high_lr": 0.00016421052631578948, + "low_lr": 3.2842105263157897e-06, + "step": 1588 + }, + { + "epoch": 4.1761998685075605, + "high_lr": 0.00016421052631578948, + "low_lr": 3.2842105263157897e-06, + "step": 1588 + }, + { + "epoch": 4.1761998685075605, + "high_lr": 0.00016421052631578948, + "low_lr": 3.2842105263157897e-06, + "step": 1588 + }, + { + "epoch": 4.1761998685075605, + "high_lr": 0.00016421052631578948, + "low_lr": 3.2842105263157897e-06, + "step": 1588 + }, + { + "epoch": 4.1761998685075605, + "high_lr": 0.00016421052631578948, + "low_lr": 3.2842105263157897e-06, + "step": 1588 + }, + { + "epoch": 4.1761998685075605, + "high_lr": 0.00016421052631578948, + "low_lr": 3.2842105263157897e-06, + "step": 1588 + }, + { + "epoch": 4.1761998685075605, + "high_lr": 0.00016421052631578948, + "low_lr": 3.2842105263157897e-06, + "step": 1588 + }, + { + "epoch": 4.1761998685075605, + "high_lr": 0.00016421052631578948, + "low_lr": 3.2842105263157897e-06, + "step": 1588 + }, + { + "epoch": 4.178829717291256, + "grad_norm": 1.423588514328003, + "learning_rate": 0.0001636842105263158, + "loss": 1.2125, + "step": 1589 + }, + { + "epoch": 4.178829717291256, + "high_lr": 0.0001636842105263158, + "low_lr": 3.273684210526316e-06, + "step": 1589 + }, + { + "epoch": 4.178829717291256, + "high_lr": 0.0001636842105263158, + "low_lr": 3.273684210526316e-06, + "step": 1589 + }, + { + "epoch": 4.178829717291256, + "high_lr": 0.0001636842105263158, + "low_lr": 3.273684210526316e-06, + "step": 1589 + }, + { + "epoch": 4.178829717291256, + "high_lr": 0.0001636842105263158, + "low_lr": 3.273684210526316e-06, + "step": 1589 + }, + { + "epoch": 4.178829717291256, + "high_lr": 0.0001636842105263158, + "low_lr": 3.273684210526316e-06, + "step": 1589 + }, + { + "epoch": 4.178829717291256, + "high_lr": 0.0001636842105263158, + "low_lr": 3.273684210526316e-06, + "step": 1589 + }, + { + "epoch": 4.178829717291256, + "high_lr": 0.0001636842105263158, + "low_lr": 3.273684210526316e-06, + "step": 1589 + }, + { + "epoch": 4.178829717291256, + "high_lr": 0.0001636842105263158, + "low_lr": 3.273684210526316e-06, + "step": 1589 + }, + { + "epoch": 4.181459566074951, + "grad_norm": 1.5778827667236328, + "learning_rate": 0.0001631578947368421, + "loss": 1.2128, + "step": 1590 + }, + { + "epoch": 4.181459566074951, + "high_lr": 0.0001631578947368421, + "low_lr": 3.2631578947368423e-06, + "step": 1590 + }, + { + "epoch": 4.181459566074951, + "high_lr": 0.0001631578947368421, + "low_lr": 3.2631578947368423e-06, + "step": 1590 + }, + { + "epoch": 4.181459566074951, + "high_lr": 0.0001631578947368421, + "low_lr": 3.2631578947368423e-06, + "step": 1590 + }, + { + "epoch": 4.181459566074951, + "high_lr": 0.0001631578947368421, + "low_lr": 3.2631578947368423e-06, + "step": 1590 + }, + { + "epoch": 4.181459566074951, + "high_lr": 0.0001631578947368421, + "low_lr": 3.2631578947368423e-06, + "step": 1590 + }, + { + "epoch": 4.181459566074951, + "high_lr": 0.0001631578947368421, + "low_lr": 3.2631578947368423e-06, + "step": 1590 + }, + { + "epoch": 4.181459566074951, + "high_lr": 0.0001631578947368421, + "low_lr": 3.2631578947368423e-06, + "step": 1590 + }, + { + "epoch": 4.181459566074951, + "high_lr": 0.0001631578947368421, + "low_lr": 3.2631578947368423e-06, + "step": 1590 + }, + { + "epoch": 4.184089414858645, + "grad_norm": 1.484566330909729, + "learning_rate": 0.00016263157894736845, + "loss": 1.2444, + "step": 1591 + }, + { + "epoch": 4.184089414858645, + "high_lr": 0.00016263157894736845, + "low_lr": 3.252631578947369e-06, + "step": 1591 + }, + { + "epoch": 4.184089414858645, + "high_lr": 0.00016263157894736845, + "low_lr": 3.252631578947369e-06, + "step": 1591 + }, + { + "epoch": 4.184089414858645, + "high_lr": 0.00016263157894736845, + "low_lr": 3.252631578947369e-06, + "step": 1591 + }, + { + "epoch": 4.184089414858645, + "high_lr": 0.00016263157894736845, + "low_lr": 3.252631578947369e-06, + "step": 1591 + }, + { + "epoch": 4.184089414858645, + "high_lr": 0.00016263157894736845, + "low_lr": 3.252631578947369e-06, + "step": 1591 + }, + { + "epoch": 4.184089414858645, + "high_lr": 0.00016263157894736845, + "low_lr": 3.252631578947369e-06, + "step": 1591 + }, + { + "epoch": 4.184089414858645, + "high_lr": 0.00016263157894736845, + "low_lr": 3.252631578947369e-06, + "step": 1591 + }, + { + "epoch": 4.184089414858645, + "high_lr": 0.00016263157894736845, + "low_lr": 3.252631578947369e-06, + "step": 1591 + }, + { + "epoch": 4.186719263642341, + "grad_norm": 1.489429235458374, + "learning_rate": 0.00016210526315789473, + "loss": 1.2286, + "step": 1592 + }, + { + "epoch": 4.186719263642341, + "high_lr": 0.00016210526315789473, + "low_lr": 3.2421052631578945e-06, + "step": 1592 + }, + { + "epoch": 4.186719263642341, + "high_lr": 0.00016210526315789473, + "low_lr": 3.2421052631578945e-06, + "step": 1592 + }, + { + "epoch": 4.186719263642341, + "high_lr": 0.00016210526315789473, + "low_lr": 3.2421052631578945e-06, + "step": 1592 + }, + { + "epoch": 4.186719263642341, + "high_lr": 0.00016210526315789473, + "low_lr": 3.2421052631578945e-06, + "step": 1592 + }, + { + "epoch": 4.186719263642341, + "high_lr": 0.00016210526315789473, + "low_lr": 3.2421052631578945e-06, + "step": 1592 + }, + { + "epoch": 4.186719263642341, + "high_lr": 0.00016210526315789473, + "low_lr": 3.2421052631578945e-06, + "step": 1592 + }, + { + "epoch": 4.186719263642341, + "high_lr": 0.00016210526315789473, + "low_lr": 3.2421052631578945e-06, + "step": 1592 + }, + { + "epoch": 4.186719263642341, + "high_lr": 0.00016210526315789473, + "low_lr": 3.2421052631578945e-06, + "step": 1592 + }, + { + "epoch": 4.189349112426036, + "grad_norm": 1.4498363733291626, + "learning_rate": 0.00016157894736842105, + "loss": 1.27, + "step": 1593 + }, + { + "epoch": 4.189349112426036, + "high_lr": 0.00016157894736842105, + "low_lr": 3.2315789473684213e-06, + "step": 1593 + }, + { + "epoch": 4.189349112426036, + "high_lr": 0.00016157894736842105, + "low_lr": 3.2315789473684213e-06, + "step": 1593 + }, + { + "epoch": 4.189349112426036, + "high_lr": 0.00016157894736842105, + "low_lr": 3.2315789473684213e-06, + "step": 1593 + }, + { + "epoch": 4.189349112426036, + "high_lr": 0.00016157894736842105, + "low_lr": 3.2315789473684213e-06, + "step": 1593 + }, + { + "epoch": 4.189349112426036, + "high_lr": 0.00016157894736842105, + "low_lr": 3.2315789473684213e-06, + "step": 1593 + }, + { + "epoch": 4.189349112426036, + "high_lr": 0.00016157894736842105, + "low_lr": 3.2315789473684213e-06, + "step": 1593 + }, + { + "epoch": 4.189349112426036, + "high_lr": 0.00016157894736842105, + "low_lr": 3.2315789473684213e-06, + "step": 1593 + }, + { + "epoch": 4.189349112426036, + "high_lr": 0.00016157894736842105, + "low_lr": 3.2315789473684213e-06, + "step": 1593 + }, + { + "epoch": 4.19197896120973, + "grad_norm": 1.443981409072876, + "learning_rate": 0.00016105263157894736, + "loss": 1.216, + "step": 1594 + }, + { + "epoch": 4.19197896120973, + "high_lr": 0.00016105263157894736, + "low_lr": 3.2210526315789476e-06, + "step": 1594 + }, + { + "epoch": 4.19197896120973, + "high_lr": 0.00016105263157894736, + "low_lr": 3.2210526315789476e-06, + "step": 1594 + }, + { + "epoch": 4.19197896120973, + "high_lr": 0.00016105263157894736, + "low_lr": 3.2210526315789476e-06, + "step": 1594 + }, + { + "epoch": 4.19197896120973, + "high_lr": 0.00016105263157894736, + "low_lr": 3.2210526315789476e-06, + "step": 1594 + }, + { + "epoch": 4.19197896120973, + "high_lr": 0.00016105263157894736, + "low_lr": 3.2210526315789476e-06, + "step": 1594 + }, + { + "epoch": 4.19197896120973, + "high_lr": 0.00016105263157894736, + "low_lr": 3.2210526315789476e-06, + "step": 1594 + }, + { + "epoch": 4.19197896120973, + "high_lr": 0.00016105263157894736, + "low_lr": 3.2210526315789476e-06, + "step": 1594 + }, + { + "epoch": 4.19197896120973, + "high_lr": 0.00016105263157894736, + "low_lr": 3.2210526315789476e-06, + "step": 1594 + }, + { + "epoch": 4.194608809993426, + "grad_norm": 1.510570764541626, + "learning_rate": 0.0001605263157894737, + "loss": 1.2031, + "step": 1595 + }, + { + "epoch": 4.194608809993426, + "high_lr": 0.0001605263157894737, + "low_lr": 3.210526315789474e-06, + "step": 1595 + }, + { + "epoch": 4.194608809993426, + "high_lr": 0.0001605263157894737, + "low_lr": 3.210526315789474e-06, + "step": 1595 + }, + { + "epoch": 4.194608809993426, + "high_lr": 0.0001605263157894737, + "low_lr": 3.210526315789474e-06, + "step": 1595 + }, + { + "epoch": 4.194608809993426, + "high_lr": 0.0001605263157894737, + "low_lr": 3.210526315789474e-06, + "step": 1595 + }, + { + "epoch": 4.194608809993426, + "high_lr": 0.0001605263157894737, + "low_lr": 3.210526315789474e-06, + "step": 1595 + }, + { + "epoch": 4.194608809993426, + "high_lr": 0.0001605263157894737, + "low_lr": 3.210526315789474e-06, + "step": 1595 + }, + { + "epoch": 4.194608809993426, + "high_lr": 0.0001605263157894737, + "low_lr": 3.210526315789474e-06, + "step": 1595 + }, + { + "epoch": 4.194608809993426, + "high_lr": 0.0001605263157894737, + "low_lr": 3.210526315789474e-06, + "step": 1595 + }, + { + "epoch": 4.19723865877712, + "grad_norm": 1.4223371744155884, + "learning_rate": 0.00016, + "loss": 1.2385, + "step": 1596 + }, + { + "epoch": 4.19723865877712, + "high_lr": 0.00016, + "low_lr": 3.2000000000000003e-06, + "step": 1596 + }, + { + "epoch": 4.19723865877712, + "high_lr": 0.00016, + "low_lr": 3.2000000000000003e-06, + "step": 1596 + }, + { + "epoch": 4.19723865877712, + "high_lr": 0.00016, + "low_lr": 3.2000000000000003e-06, + "step": 1596 + }, + { + "epoch": 4.19723865877712, + "high_lr": 0.00016, + "low_lr": 3.2000000000000003e-06, + "step": 1596 + }, + { + "epoch": 4.19723865877712, + "high_lr": 0.00016, + "low_lr": 3.2000000000000003e-06, + "step": 1596 + }, + { + "epoch": 4.19723865877712, + "high_lr": 0.00016, + "low_lr": 3.2000000000000003e-06, + "step": 1596 + }, + { + "epoch": 4.19723865877712, + "high_lr": 0.00016, + "low_lr": 3.2000000000000003e-06, + "step": 1596 + }, + { + "epoch": 4.19723865877712, + "high_lr": 0.00016, + "low_lr": 3.2000000000000003e-06, + "step": 1596 + }, + { + "epoch": 4.199868507560815, + "grad_norm": 1.393445372581482, + "learning_rate": 0.00015947368421052633, + "loss": 1.1783, + "step": 1597 + }, + { + "epoch": 4.199868507560815, + "high_lr": 0.00015947368421052633, + "low_lr": 3.1894736842105266e-06, + "step": 1597 + }, + { + "epoch": 4.199868507560815, + "high_lr": 0.00015947368421052633, + "low_lr": 3.1894736842105266e-06, + "step": 1597 + }, + { + "epoch": 4.199868507560815, + "high_lr": 0.00015947368421052633, + "low_lr": 3.1894736842105266e-06, + "step": 1597 + }, + { + "epoch": 4.199868507560815, + "high_lr": 0.00015947368421052633, + "low_lr": 3.1894736842105266e-06, + "step": 1597 + }, + { + "epoch": 4.199868507560815, + "high_lr": 0.00015947368421052633, + "low_lr": 3.1894736842105266e-06, + "step": 1597 + }, + { + "epoch": 4.199868507560815, + "high_lr": 0.00015947368421052633, + "low_lr": 3.1894736842105266e-06, + "step": 1597 + }, + { + "epoch": 4.199868507560815, + "high_lr": 0.00015947368421052633, + "low_lr": 3.1894736842105266e-06, + "step": 1597 + }, + { + "epoch": 4.199868507560815, + "high_lr": 0.00015947368421052633, + "low_lr": 3.1894736842105266e-06, + "step": 1597 + }, + { + "epoch": 4.20249835634451, + "grad_norm": 1.7282485961914062, + "learning_rate": 0.00015894736842105264, + "loss": 1.2489, + "step": 1598 + }, + { + "epoch": 4.20249835634451, + "high_lr": 0.00015894736842105264, + "low_lr": 3.178947368421053e-06, + "step": 1598 + }, + { + "epoch": 4.20249835634451, + "high_lr": 0.00015894736842105264, + "low_lr": 3.178947368421053e-06, + "step": 1598 + }, + { + "epoch": 4.20249835634451, + "high_lr": 0.00015894736842105264, + "low_lr": 3.178947368421053e-06, + "step": 1598 + }, + { + "epoch": 4.20249835634451, + "high_lr": 0.00015894736842105264, + "low_lr": 3.178947368421053e-06, + "step": 1598 + }, + { + "epoch": 4.20249835634451, + "high_lr": 0.00015894736842105264, + "low_lr": 3.178947368421053e-06, + "step": 1598 + }, + { + "epoch": 4.20249835634451, + "high_lr": 0.00015894736842105264, + "low_lr": 3.178947368421053e-06, + "step": 1598 + }, + { + "epoch": 4.20249835634451, + "high_lr": 0.00015894736842105264, + "low_lr": 3.178947368421053e-06, + "step": 1598 + }, + { + "epoch": 4.20249835634451, + "high_lr": 0.00015894736842105264, + "low_lr": 3.178947368421053e-06, + "step": 1598 + }, + { + "epoch": 4.205128205128205, + "grad_norm": 1.4703502655029297, + "learning_rate": 0.00015842105263157892, + "loss": 1.1709, + "step": 1599 + }, + { + "epoch": 4.205128205128205, + "high_lr": 0.00015842105263157892, + "low_lr": 3.168421052631579e-06, + "step": 1599 + }, + { + "epoch": 4.205128205128205, + "high_lr": 0.00015842105263157892, + "low_lr": 3.168421052631579e-06, + "step": 1599 + }, + { + "epoch": 4.205128205128205, + "high_lr": 0.00015842105263157892, + "low_lr": 3.168421052631579e-06, + "step": 1599 + }, + { + "epoch": 4.205128205128205, + "high_lr": 0.00015842105263157892, + "low_lr": 3.168421052631579e-06, + "step": 1599 + }, + { + "epoch": 4.205128205128205, + "high_lr": 0.00015842105263157892, + "low_lr": 3.168421052631579e-06, + "step": 1599 + }, + { + "epoch": 4.205128205128205, + "high_lr": 0.00015842105263157892, + "low_lr": 3.168421052631579e-06, + "step": 1599 + }, + { + "epoch": 4.205128205128205, + "high_lr": 0.00015842105263157892, + "low_lr": 3.168421052631579e-06, + "step": 1599 + }, + { + "epoch": 4.205128205128205, + "high_lr": 0.00015842105263157892, + "low_lr": 3.168421052631579e-06, + "step": 1599 + }, + { + "epoch": 4.2077580539119, + "grad_norm": 1.5567339658737183, + "learning_rate": 0.00015789473684210527, + "loss": 1.2299, + "step": 1600 + }, + { + "epoch": 4.2077580539119, + "high_lr": 0.00015789473684210527, + "low_lr": 3.157894736842105e-06, + "step": 1600 + }, + { + "epoch": 4.2077580539119, + "high_lr": 0.00015789473684210527, + "low_lr": 3.157894736842105e-06, + "step": 1600 + }, + { + "epoch": 4.2077580539119, + "high_lr": 0.00015789473684210527, + "low_lr": 3.157894736842105e-06, + "step": 1600 + }, + { + "epoch": 4.2077580539119, + "high_lr": 0.00015789473684210527, + "low_lr": 3.157894736842105e-06, + "step": 1600 + }, + { + "epoch": 4.2077580539119, + "high_lr": 0.00015789473684210527, + "low_lr": 3.157894736842105e-06, + "step": 1600 + }, + { + "epoch": 4.2077580539119, + "high_lr": 0.00015789473684210527, + "low_lr": 3.157894736842105e-06, + "step": 1600 + }, + { + "epoch": 4.2077580539119, + "high_lr": 0.00015789473684210527, + "low_lr": 3.157894736842105e-06, + "step": 1600 + }, + { + "epoch": 4.2077580539119, + "high_lr": 0.00015789473684210527, + "low_lr": 3.157894736842105e-06, + "step": 1600 + }, + { + "epoch": 4.210387902695595, + "grad_norm": 1.6771581172943115, + "learning_rate": 0.00015736842105263158, + "loss": 1.2032, + "step": 1601 + }, + { + "epoch": 4.210387902695595, + "high_lr": 0.00015736842105263158, + "low_lr": 3.147368421052632e-06, + "step": 1601 + }, + { + "epoch": 4.210387902695595, + "high_lr": 0.00015736842105263158, + "low_lr": 3.147368421052632e-06, + "step": 1601 + }, + { + "epoch": 4.210387902695595, + "high_lr": 0.00015736842105263158, + "low_lr": 3.147368421052632e-06, + "step": 1601 + }, + { + "epoch": 4.210387902695595, + "high_lr": 0.00015736842105263158, + "low_lr": 3.147368421052632e-06, + "step": 1601 + }, + { + "epoch": 4.210387902695595, + "high_lr": 0.00015736842105263158, + "low_lr": 3.147368421052632e-06, + "step": 1601 + }, + { + "epoch": 4.210387902695595, + "high_lr": 0.00015736842105263158, + "low_lr": 3.147368421052632e-06, + "step": 1601 + }, + { + "epoch": 4.210387902695595, + "high_lr": 0.00015736842105263158, + "low_lr": 3.147368421052632e-06, + "step": 1601 + }, + { + "epoch": 4.210387902695595, + "high_lr": 0.00015736842105263158, + "low_lr": 3.147368421052632e-06, + "step": 1601 + }, + { + "epoch": 4.21301775147929, + "grad_norm": 1.4878536462783813, + "learning_rate": 0.0001568421052631579, + "loss": 1.1945, + "step": 1602 + }, + { + "epoch": 4.21301775147929, + "high_lr": 0.0001568421052631579, + "low_lr": 3.1368421052631582e-06, + "step": 1602 + }, + { + "epoch": 4.21301775147929, + "high_lr": 0.0001568421052631579, + "low_lr": 3.1368421052631582e-06, + "step": 1602 + }, + { + "epoch": 4.21301775147929, + "high_lr": 0.0001568421052631579, + "low_lr": 3.1368421052631582e-06, + "step": 1602 + }, + { + "epoch": 4.21301775147929, + "high_lr": 0.0001568421052631579, + "low_lr": 3.1368421052631582e-06, + "step": 1602 + }, + { + "epoch": 4.21301775147929, + "high_lr": 0.0001568421052631579, + "low_lr": 3.1368421052631582e-06, + "step": 1602 + }, + { + "epoch": 4.21301775147929, + "high_lr": 0.0001568421052631579, + "low_lr": 3.1368421052631582e-06, + "step": 1602 + }, + { + "epoch": 4.21301775147929, + "high_lr": 0.0001568421052631579, + "low_lr": 3.1368421052631582e-06, + "step": 1602 + }, + { + "epoch": 4.21301775147929, + "high_lr": 0.0001568421052631579, + "low_lr": 3.1368421052631582e-06, + "step": 1602 + }, + { + "epoch": 4.215647600262985, + "grad_norm": 1.6874127388000488, + "learning_rate": 0.0001563157894736842, + "loss": 1.1673, + "step": 1603 + }, + { + "epoch": 4.215647600262985, + "high_lr": 0.0001563157894736842, + "low_lr": 3.1263157894736846e-06, + "step": 1603 + }, + { + "epoch": 4.215647600262985, + "high_lr": 0.0001563157894736842, + "low_lr": 3.1263157894736846e-06, + "step": 1603 + }, + { + "epoch": 4.215647600262985, + "high_lr": 0.0001563157894736842, + "low_lr": 3.1263157894736846e-06, + "step": 1603 + }, + { + "epoch": 4.215647600262985, + "high_lr": 0.0001563157894736842, + "low_lr": 3.1263157894736846e-06, + "step": 1603 + }, + { + "epoch": 4.215647600262985, + "high_lr": 0.0001563157894736842, + "low_lr": 3.1263157894736846e-06, + "step": 1603 + }, + { + "epoch": 4.215647600262985, + "high_lr": 0.0001563157894736842, + "low_lr": 3.1263157894736846e-06, + "step": 1603 + }, + { + "epoch": 4.215647600262985, + "high_lr": 0.0001563157894736842, + "low_lr": 3.1263157894736846e-06, + "step": 1603 + }, + { + "epoch": 4.215647600262985, + "high_lr": 0.0001563157894736842, + "low_lr": 3.1263157894736846e-06, + "step": 1603 + }, + { + "epoch": 4.2182774490466795, + "grad_norm": 1.5085018873214722, + "learning_rate": 0.00015578947368421054, + "loss": 1.2319, + "step": 1604 + }, + { + "epoch": 4.2182774490466795, + "high_lr": 0.00015578947368421054, + "low_lr": 3.115789473684211e-06, + "step": 1604 + }, + { + "epoch": 4.2182774490466795, + "high_lr": 0.00015578947368421054, + "low_lr": 3.115789473684211e-06, + "step": 1604 + }, + { + "epoch": 4.2182774490466795, + "high_lr": 0.00015578947368421054, + "low_lr": 3.115789473684211e-06, + "step": 1604 + }, + { + "epoch": 4.2182774490466795, + "high_lr": 0.00015578947368421054, + "low_lr": 3.115789473684211e-06, + "step": 1604 + }, + { + "epoch": 4.2182774490466795, + "high_lr": 0.00015578947368421054, + "low_lr": 3.115789473684211e-06, + "step": 1604 + }, + { + "epoch": 4.2182774490466795, + "high_lr": 0.00015578947368421054, + "low_lr": 3.115789473684211e-06, + "step": 1604 + }, + { + "epoch": 4.2182774490466795, + "high_lr": 0.00015578947368421054, + "low_lr": 3.115789473684211e-06, + "step": 1604 + }, + { + "epoch": 4.2182774490466795, + "high_lr": 0.00015578947368421054, + "low_lr": 3.115789473684211e-06, + "step": 1604 + }, + { + "epoch": 4.220907297830375, + "grad_norm": 1.5692294836044312, + "learning_rate": 0.00015526315789473686, + "loss": 1.1995, + "step": 1605 + }, + { + "epoch": 4.220907297830375, + "high_lr": 0.00015526315789473686, + "low_lr": 3.1052631578947372e-06, + "step": 1605 + }, + { + "epoch": 4.220907297830375, + "high_lr": 0.00015526315789473686, + "low_lr": 3.1052631578947372e-06, + "step": 1605 + }, + { + "epoch": 4.220907297830375, + "high_lr": 0.00015526315789473686, + "low_lr": 3.1052631578947372e-06, + "step": 1605 + }, + { + "epoch": 4.220907297830375, + "high_lr": 0.00015526315789473686, + "low_lr": 3.1052631578947372e-06, + "step": 1605 + }, + { + "epoch": 4.220907297830375, + "high_lr": 0.00015526315789473686, + "low_lr": 3.1052631578947372e-06, + "step": 1605 + }, + { + "epoch": 4.220907297830375, + "high_lr": 0.00015526315789473686, + "low_lr": 3.1052631578947372e-06, + "step": 1605 + }, + { + "epoch": 4.220907297830375, + "high_lr": 0.00015526315789473686, + "low_lr": 3.1052631578947372e-06, + "step": 1605 + }, + { + "epoch": 4.220907297830375, + "high_lr": 0.00015526315789473686, + "low_lr": 3.1052631578947372e-06, + "step": 1605 + }, + { + "epoch": 4.22353714661407, + "grad_norm": 1.49501633644104, + "learning_rate": 0.00015473684210526314, + "loss": 1.2792, + "step": 1606 + }, + { + "epoch": 4.22353714661407, + "high_lr": 0.00015473684210526314, + "low_lr": 3.094736842105263e-06, + "step": 1606 + }, + { + "epoch": 4.22353714661407, + "high_lr": 0.00015473684210526314, + "low_lr": 3.094736842105263e-06, + "step": 1606 + }, + { + "epoch": 4.22353714661407, + "high_lr": 0.00015473684210526314, + "low_lr": 3.094736842105263e-06, + "step": 1606 + }, + { + "epoch": 4.22353714661407, + "high_lr": 0.00015473684210526314, + "low_lr": 3.094736842105263e-06, + "step": 1606 + }, + { + "epoch": 4.22353714661407, + "high_lr": 0.00015473684210526314, + "low_lr": 3.094736842105263e-06, + "step": 1606 + }, + { + "epoch": 4.22353714661407, + "high_lr": 0.00015473684210526314, + "low_lr": 3.094736842105263e-06, + "step": 1606 + }, + { + "epoch": 4.22353714661407, + "high_lr": 0.00015473684210526314, + "low_lr": 3.094736842105263e-06, + "step": 1606 + }, + { + "epoch": 4.22353714661407, + "high_lr": 0.00015473684210526314, + "low_lr": 3.094736842105263e-06, + "step": 1606 + }, + { + "epoch": 4.226166995397764, + "grad_norm": 1.5470064878463745, + "learning_rate": 0.00015421052631578946, + "loss": 1.2105, + "step": 1607 + }, + { + "epoch": 4.226166995397764, + "high_lr": 0.00015421052631578946, + "low_lr": 3.0842105263157895e-06, + "step": 1607 + }, + { + "epoch": 4.226166995397764, + "high_lr": 0.00015421052631578946, + "low_lr": 3.0842105263157895e-06, + "step": 1607 + }, + { + "epoch": 4.226166995397764, + "high_lr": 0.00015421052631578946, + "low_lr": 3.0842105263157895e-06, + "step": 1607 + }, + { + "epoch": 4.226166995397764, + "high_lr": 0.00015421052631578946, + "low_lr": 3.0842105263157895e-06, + "step": 1607 + }, + { + "epoch": 4.226166995397764, + "high_lr": 0.00015421052631578946, + "low_lr": 3.0842105263157895e-06, + "step": 1607 + }, + { + "epoch": 4.226166995397764, + "high_lr": 0.00015421052631578946, + "low_lr": 3.0842105263157895e-06, + "step": 1607 + }, + { + "epoch": 4.226166995397764, + "high_lr": 0.00015421052631578946, + "low_lr": 3.0842105263157895e-06, + "step": 1607 + }, + { + "epoch": 4.226166995397764, + "high_lr": 0.00015421052631578946, + "low_lr": 3.0842105263157895e-06, + "step": 1607 + }, + { + "epoch": 4.22879684418146, + "grad_norm": 1.4905816316604614, + "learning_rate": 0.0001536842105263158, + "loss": 1.266, + "step": 1608 + }, + { + "epoch": 4.22879684418146, + "high_lr": 0.0001536842105263158, + "low_lr": 3.0736842105263158e-06, + "step": 1608 + }, + { + "epoch": 4.22879684418146, + "high_lr": 0.0001536842105263158, + "low_lr": 3.0736842105263158e-06, + "step": 1608 + }, + { + "epoch": 4.22879684418146, + "high_lr": 0.0001536842105263158, + "low_lr": 3.0736842105263158e-06, + "step": 1608 + }, + { + "epoch": 4.22879684418146, + "high_lr": 0.0001536842105263158, + "low_lr": 3.0736842105263158e-06, + "step": 1608 + }, + { + "epoch": 4.22879684418146, + "high_lr": 0.0001536842105263158, + "low_lr": 3.0736842105263158e-06, + "step": 1608 + }, + { + "epoch": 4.22879684418146, + "high_lr": 0.0001536842105263158, + "low_lr": 3.0736842105263158e-06, + "step": 1608 + }, + { + "epoch": 4.22879684418146, + "high_lr": 0.0001536842105263158, + "low_lr": 3.0736842105263158e-06, + "step": 1608 + }, + { + "epoch": 4.22879684418146, + "high_lr": 0.0001536842105263158, + "low_lr": 3.0736842105263158e-06, + "step": 1608 + }, + { + "epoch": 4.2314266929651545, + "grad_norm": 1.675032615661621, + "learning_rate": 0.0001531578947368421, + "loss": 1.2316, + "step": 1609 + }, + { + "epoch": 4.2314266929651545, + "high_lr": 0.0001531578947368421, + "low_lr": 3.0631578947368425e-06, + "step": 1609 + }, + { + "epoch": 4.2314266929651545, + "high_lr": 0.0001531578947368421, + "low_lr": 3.0631578947368425e-06, + "step": 1609 + }, + { + "epoch": 4.2314266929651545, + "high_lr": 0.0001531578947368421, + "low_lr": 3.0631578947368425e-06, + "step": 1609 + }, + { + "epoch": 4.2314266929651545, + "high_lr": 0.0001531578947368421, + "low_lr": 3.0631578947368425e-06, + "step": 1609 + }, + { + "epoch": 4.2314266929651545, + "high_lr": 0.0001531578947368421, + "low_lr": 3.0631578947368425e-06, + "step": 1609 + }, + { + "epoch": 4.2314266929651545, + "high_lr": 0.0001531578947368421, + "low_lr": 3.0631578947368425e-06, + "step": 1609 + }, + { + "epoch": 4.2314266929651545, + "high_lr": 0.0001531578947368421, + "low_lr": 3.0631578947368425e-06, + "step": 1609 + }, + { + "epoch": 4.2314266929651545, + "high_lr": 0.0001531578947368421, + "low_lr": 3.0631578947368425e-06, + "step": 1609 + }, + { + "epoch": 4.234056541748849, + "grad_norm": 1.5411441326141357, + "learning_rate": 0.00015263157894736842, + "loss": 1.229, + "step": 1610 + }, + { + "epoch": 4.234056541748849, + "high_lr": 0.00015263157894736842, + "low_lr": 3.052631578947369e-06, + "step": 1610 + }, + { + "epoch": 4.234056541748849, + "high_lr": 0.00015263157894736842, + "low_lr": 3.052631578947369e-06, + "step": 1610 + }, + { + "epoch": 4.234056541748849, + "high_lr": 0.00015263157894736842, + "low_lr": 3.052631578947369e-06, + "step": 1610 + }, + { + "epoch": 4.234056541748849, + "high_lr": 0.00015263157894736842, + "low_lr": 3.052631578947369e-06, + "step": 1610 + }, + { + "epoch": 4.234056541748849, + "high_lr": 0.00015263157894736842, + "low_lr": 3.052631578947369e-06, + "step": 1610 + }, + { + "epoch": 4.234056541748849, + "high_lr": 0.00015263157894736842, + "low_lr": 3.052631578947369e-06, + "step": 1610 + }, + { + "epoch": 4.234056541748849, + "high_lr": 0.00015263157894736842, + "low_lr": 3.052631578947369e-06, + "step": 1610 + }, + { + "epoch": 4.234056541748849, + "high_lr": 0.00015263157894736842, + "low_lr": 3.052631578947369e-06, + "step": 1610 + }, + { + "epoch": 4.236686390532545, + "grad_norm": 1.406537413597107, + "learning_rate": 0.00015210526315789473, + "loss": 1.2178, + "step": 1611 + }, + { + "epoch": 4.236686390532545, + "high_lr": 0.00015210526315789473, + "low_lr": 3.042105263157895e-06, + "step": 1611 + }, + { + "epoch": 4.236686390532545, + "high_lr": 0.00015210526315789473, + "low_lr": 3.042105263157895e-06, + "step": 1611 + }, + { + "epoch": 4.236686390532545, + "high_lr": 0.00015210526315789473, + "low_lr": 3.042105263157895e-06, + "step": 1611 + }, + { + "epoch": 4.236686390532545, + "high_lr": 0.00015210526315789473, + "low_lr": 3.042105263157895e-06, + "step": 1611 + }, + { + "epoch": 4.236686390532545, + "high_lr": 0.00015210526315789473, + "low_lr": 3.042105263157895e-06, + "step": 1611 + }, + { + "epoch": 4.236686390532545, + "high_lr": 0.00015210526315789473, + "low_lr": 3.042105263157895e-06, + "step": 1611 + }, + { + "epoch": 4.236686390532545, + "high_lr": 0.00015210526315789473, + "low_lr": 3.042105263157895e-06, + "step": 1611 + }, + { + "epoch": 4.236686390532545, + "high_lr": 0.00015210526315789473, + "low_lr": 3.042105263157895e-06, + "step": 1611 + }, + { + "epoch": 4.239316239316239, + "grad_norm": 1.6038017272949219, + "learning_rate": 0.00015157894736842108, + "loss": 1.2459, + "step": 1612 + }, + { + "epoch": 4.239316239316239, + "high_lr": 0.00015157894736842108, + "low_lr": 3.0315789473684215e-06, + "step": 1612 + }, + { + "epoch": 4.239316239316239, + "high_lr": 0.00015157894736842108, + "low_lr": 3.0315789473684215e-06, + "step": 1612 + }, + { + "epoch": 4.239316239316239, + "high_lr": 0.00015157894736842108, + "low_lr": 3.0315789473684215e-06, + "step": 1612 + }, + { + "epoch": 4.239316239316239, + "high_lr": 0.00015157894736842108, + "low_lr": 3.0315789473684215e-06, + "step": 1612 + }, + { + "epoch": 4.239316239316239, + "high_lr": 0.00015157894736842108, + "low_lr": 3.0315789473684215e-06, + "step": 1612 + }, + { + "epoch": 4.239316239316239, + "high_lr": 0.00015157894736842108, + "low_lr": 3.0315789473684215e-06, + "step": 1612 + }, + { + "epoch": 4.239316239316239, + "high_lr": 0.00015157894736842108, + "low_lr": 3.0315789473684215e-06, + "step": 1612 + }, + { + "epoch": 4.239316239316239, + "high_lr": 0.00015157894736842108, + "low_lr": 3.0315789473684215e-06, + "step": 1612 + }, + { + "epoch": 4.241946088099934, + "grad_norm": 1.4717649221420288, + "learning_rate": 0.00015105263157894736, + "loss": 1.2447, + "step": 1613 + }, + { + "epoch": 4.241946088099934, + "high_lr": 0.00015105263157894736, + "low_lr": 3.0210526315789474e-06, + "step": 1613 + }, + { + "epoch": 4.241946088099934, + "high_lr": 0.00015105263157894736, + "low_lr": 3.0210526315789474e-06, + "step": 1613 + }, + { + "epoch": 4.241946088099934, + "high_lr": 0.00015105263157894736, + "low_lr": 3.0210526315789474e-06, + "step": 1613 + }, + { + "epoch": 4.241946088099934, + "high_lr": 0.00015105263157894736, + "low_lr": 3.0210526315789474e-06, + "step": 1613 + }, + { + "epoch": 4.241946088099934, + "high_lr": 0.00015105263157894736, + "low_lr": 3.0210526315789474e-06, + "step": 1613 + }, + { + "epoch": 4.241946088099934, + "high_lr": 0.00015105263157894736, + "low_lr": 3.0210526315789474e-06, + "step": 1613 + }, + { + "epoch": 4.241946088099934, + "high_lr": 0.00015105263157894736, + "low_lr": 3.0210526315789474e-06, + "step": 1613 + }, + { + "epoch": 4.241946088099934, + "high_lr": 0.00015105263157894736, + "low_lr": 3.0210526315789474e-06, + "step": 1613 + }, + { + "epoch": 4.2445759368836296, + "grad_norm": 1.6775355339050293, + "learning_rate": 0.00015052631578947367, + "loss": 1.2372, + "step": 1614 + }, + { + "epoch": 4.2445759368836296, + "high_lr": 0.00015052631578947367, + "low_lr": 3.0105263157894737e-06, + "step": 1614 + }, + { + "epoch": 4.2445759368836296, + "high_lr": 0.00015052631578947367, + "low_lr": 3.0105263157894737e-06, + "step": 1614 + }, + { + "epoch": 4.2445759368836296, + "high_lr": 0.00015052631578947367, + "low_lr": 3.0105263157894737e-06, + "step": 1614 + }, + { + "epoch": 4.2445759368836296, + "high_lr": 0.00015052631578947367, + "low_lr": 3.0105263157894737e-06, + "step": 1614 + }, + { + "epoch": 4.2445759368836296, + "high_lr": 0.00015052631578947367, + "low_lr": 3.0105263157894737e-06, + "step": 1614 + }, + { + "epoch": 4.2445759368836296, + "high_lr": 0.00015052631578947367, + "low_lr": 3.0105263157894737e-06, + "step": 1614 + }, + { + "epoch": 4.2445759368836296, + "high_lr": 0.00015052631578947367, + "low_lr": 3.0105263157894737e-06, + "step": 1614 + }, + { + "epoch": 4.2445759368836296, + "high_lr": 0.00015052631578947367, + "low_lr": 3.0105263157894737e-06, + "step": 1614 + }, + { + "epoch": 4.247205785667324, + "grad_norm": 1.6113718748092651, + "learning_rate": 0.00015, + "loss": 1.2275, + "step": 1615 + }, + { + "epoch": 4.247205785667324, + "high_lr": 0.00015, + "low_lr": 3e-06, + "step": 1615 + }, + { + "epoch": 4.247205785667324, + "high_lr": 0.00015, + "low_lr": 3e-06, + "step": 1615 + }, + { + "epoch": 4.247205785667324, + "high_lr": 0.00015, + "low_lr": 3e-06, + "step": 1615 + }, + { + "epoch": 4.247205785667324, + "high_lr": 0.00015, + "low_lr": 3e-06, + "step": 1615 + }, + { + "epoch": 4.247205785667324, + "high_lr": 0.00015, + "low_lr": 3e-06, + "step": 1615 + }, + { + "epoch": 4.247205785667324, + "high_lr": 0.00015, + "low_lr": 3e-06, + "step": 1615 + }, + { + "epoch": 4.247205785667324, + "high_lr": 0.00015, + "low_lr": 3e-06, + "step": 1615 + }, + { + "epoch": 4.247205785667324, + "high_lr": 0.00015, + "low_lr": 3e-06, + "step": 1615 + }, + { + "epoch": 4.249835634451019, + "grad_norm": 1.4303596019744873, + "learning_rate": 0.00014947368421052633, + "loss": 1.173, + "step": 1616 + }, + { + "epoch": 4.249835634451019, + "high_lr": 0.00014947368421052633, + "low_lr": 2.9894736842105264e-06, + "step": 1616 + }, + { + "epoch": 4.249835634451019, + "high_lr": 0.00014947368421052633, + "low_lr": 2.9894736842105264e-06, + "step": 1616 + }, + { + "epoch": 4.249835634451019, + "high_lr": 0.00014947368421052633, + "low_lr": 2.9894736842105264e-06, + "step": 1616 + }, + { + "epoch": 4.249835634451019, + "high_lr": 0.00014947368421052633, + "low_lr": 2.9894736842105264e-06, + "step": 1616 + }, + { + "epoch": 4.249835634451019, + "high_lr": 0.00014947368421052633, + "low_lr": 2.9894736842105264e-06, + "step": 1616 + }, + { + "epoch": 4.249835634451019, + "high_lr": 0.00014947368421052633, + "low_lr": 2.9894736842105264e-06, + "step": 1616 + }, + { + "epoch": 4.249835634451019, + "high_lr": 0.00014947368421052633, + "low_lr": 2.9894736842105264e-06, + "step": 1616 + }, + { + "epoch": 4.249835634451019, + "high_lr": 0.00014947368421052633, + "low_lr": 2.9894736842105264e-06, + "step": 1616 + }, + { + "epoch": 4.252465483234714, + "grad_norm": 1.557103157043457, + "learning_rate": 0.00014894736842105264, + "loss": 1.2103, + "step": 1617 + }, + { + "epoch": 4.252465483234714, + "high_lr": 0.00014894736842105264, + "low_lr": 2.9789473684210527e-06, + "step": 1617 + }, + { + "epoch": 4.252465483234714, + "high_lr": 0.00014894736842105264, + "low_lr": 2.9789473684210527e-06, + "step": 1617 + }, + { + "epoch": 4.252465483234714, + "high_lr": 0.00014894736842105264, + "low_lr": 2.9789473684210527e-06, + "step": 1617 + }, + { + "epoch": 4.252465483234714, + "high_lr": 0.00014894736842105264, + "low_lr": 2.9789473684210527e-06, + "step": 1617 + }, + { + "epoch": 4.252465483234714, + "high_lr": 0.00014894736842105264, + "low_lr": 2.9789473684210527e-06, + "step": 1617 + }, + { + "epoch": 4.252465483234714, + "high_lr": 0.00014894736842105264, + "low_lr": 2.9789473684210527e-06, + "step": 1617 + }, + { + "epoch": 4.252465483234714, + "high_lr": 0.00014894736842105264, + "low_lr": 2.9789473684210527e-06, + "step": 1617 + }, + { + "epoch": 4.252465483234714, + "high_lr": 0.00014894736842105264, + "low_lr": 2.9789473684210527e-06, + "step": 1617 + }, + { + "epoch": 4.255095332018409, + "grad_norm": 1.4532480239868164, + "learning_rate": 0.00014842105263157895, + "loss": 1.239, + "step": 1618 + }, + { + "epoch": 4.255095332018409, + "high_lr": 0.00014842105263157895, + "low_lr": 2.9684210526315795e-06, + "step": 1618 + }, + { + "epoch": 4.255095332018409, + "high_lr": 0.00014842105263157895, + "low_lr": 2.9684210526315795e-06, + "step": 1618 + }, + { + "epoch": 4.255095332018409, + "high_lr": 0.00014842105263157895, + "low_lr": 2.9684210526315795e-06, + "step": 1618 + }, + { + "epoch": 4.255095332018409, + "high_lr": 0.00014842105263157895, + "low_lr": 2.9684210526315795e-06, + "step": 1618 + }, + { + "epoch": 4.255095332018409, + "high_lr": 0.00014842105263157895, + "low_lr": 2.9684210526315795e-06, + "step": 1618 + }, + { + "epoch": 4.255095332018409, + "high_lr": 0.00014842105263157895, + "low_lr": 2.9684210526315795e-06, + "step": 1618 + }, + { + "epoch": 4.255095332018409, + "high_lr": 0.00014842105263157895, + "low_lr": 2.9684210526315795e-06, + "step": 1618 + }, + { + "epoch": 4.255095332018409, + "high_lr": 0.00014842105263157895, + "low_lr": 2.9684210526315795e-06, + "step": 1618 + }, + { + "epoch": 4.257725180802104, + "grad_norm": 1.565211296081543, + "learning_rate": 0.00014789473684210527, + "loss": 1.2053, + "step": 1619 + }, + { + "epoch": 4.257725180802104, + "high_lr": 0.00014789473684210527, + "low_lr": 2.957894736842106e-06, + "step": 1619 + }, + { + "epoch": 4.257725180802104, + "high_lr": 0.00014789473684210527, + "low_lr": 2.957894736842106e-06, + "step": 1619 + }, + { + "epoch": 4.257725180802104, + "high_lr": 0.00014789473684210527, + "low_lr": 2.957894736842106e-06, + "step": 1619 + }, + { + "epoch": 4.257725180802104, + "high_lr": 0.00014789473684210527, + "low_lr": 2.957894736842106e-06, + "step": 1619 + }, + { + "epoch": 4.257725180802104, + "high_lr": 0.00014789473684210527, + "low_lr": 2.957894736842106e-06, + "step": 1619 + }, + { + "epoch": 4.257725180802104, + "high_lr": 0.00014789473684210527, + "low_lr": 2.957894736842106e-06, + "step": 1619 + }, + { + "epoch": 4.257725180802104, + "high_lr": 0.00014789473684210527, + "low_lr": 2.957894736842106e-06, + "step": 1619 + }, + { + "epoch": 4.257725180802104, + "high_lr": 0.00014789473684210527, + "low_lr": 2.957894736842106e-06, + "step": 1619 + }, + { + "epoch": 4.260355029585799, + "grad_norm": 1.51058030128479, + "learning_rate": 0.00014736842105263158, + "loss": 1.2283, + "step": 1620 + }, + { + "epoch": 4.260355029585799, + "high_lr": 0.00014736842105263158, + "low_lr": 2.9473684210526317e-06, + "step": 1620 + }, + { + "epoch": 4.260355029585799, + "high_lr": 0.00014736842105263158, + "low_lr": 2.9473684210526317e-06, + "step": 1620 + }, + { + "epoch": 4.260355029585799, + "high_lr": 0.00014736842105263158, + "low_lr": 2.9473684210526317e-06, + "step": 1620 + }, + { + "epoch": 4.260355029585799, + "high_lr": 0.00014736842105263158, + "low_lr": 2.9473684210526317e-06, + "step": 1620 + }, + { + "epoch": 4.260355029585799, + "high_lr": 0.00014736842105263158, + "low_lr": 2.9473684210526317e-06, + "step": 1620 + }, + { + "epoch": 4.260355029585799, + "high_lr": 0.00014736842105263158, + "low_lr": 2.9473684210526317e-06, + "step": 1620 + }, + { + "epoch": 4.260355029585799, + "high_lr": 0.00014736842105263158, + "low_lr": 2.9473684210526317e-06, + "step": 1620 + }, + { + "epoch": 4.260355029585799, + "high_lr": 0.00014736842105263158, + "low_lr": 2.9473684210526317e-06, + "step": 1620 + }, + { + "epoch": 4.262984878369494, + "grad_norm": 1.89518141746521, + "learning_rate": 0.0001468421052631579, + "loss": 1.1705, + "step": 1621 + }, + { + "epoch": 4.262984878369494, + "high_lr": 0.0001468421052631579, + "low_lr": 2.936842105263158e-06, + "step": 1621 + }, + { + "epoch": 4.262984878369494, + "high_lr": 0.0001468421052631579, + "low_lr": 2.936842105263158e-06, + "step": 1621 + }, + { + "epoch": 4.262984878369494, + "high_lr": 0.0001468421052631579, + "low_lr": 2.936842105263158e-06, + "step": 1621 + }, + { + "epoch": 4.262984878369494, + "high_lr": 0.0001468421052631579, + "low_lr": 2.936842105263158e-06, + "step": 1621 + }, + { + "epoch": 4.262984878369494, + "high_lr": 0.0001468421052631579, + "low_lr": 2.936842105263158e-06, + "step": 1621 + }, + { + "epoch": 4.262984878369494, + "high_lr": 0.0001468421052631579, + "low_lr": 2.936842105263158e-06, + "step": 1621 + }, + { + "epoch": 4.262984878369494, + "high_lr": 0.0001468421052631579, + "low_lr": 2.936842105263158e-06, + "step": 1621 + }, + { + "epoch": 4.262984878369494, + "high_lr": 0.0001468421052631579, + "low_lr": 2.936842105263158e-06, + "step": 1621 + }, + { + "epoch": 4.265614727153189, + "grad_norm": 1.4794954061508179, + "learning_rate": 0.0001463157894736842, + "loss": 1.174, + "step": 1622 + }, + { + "epoch": 4.265614727153189, + "high_lr": 0.0001463157894736842, + "low_lr": 2.9263157894736844e-06, + "step": 1622 + }, + { + "epoch": 4.265614727153189, + "high_lr": 0.0001463157894736842, + "low_lr": 2.9263157894736844e-06, + "step": 1622 + }, + { + "epoch": 4.265614727153189, + "high_lr": 0.0001463157894736842, + "low_lr": 2.9263157894736844e-06, + "step": 1622 + }, + { + "epoch": 4.265614727153189, + "high_lr": 0.0001463157894736842, + "low_lr": 2.9263157894736844e-06, + "step": 1622 + }, + { + "epoch": 4.265614727153189, + "high_lr": 0.0001463157894736842, + "low_lr": 2.9263157894736844e-06, + "step": 1622 + }, + { + "epoch": 4.265614727153189, + "high_lr": 0.0001463157894736842, + "low_lr": 2.9263157894736844e-06, + "step": 1622 + }, + { + "epoch": 4.265614727153189, + "high_lr": 0.0001463157894736842, + "low_lr": 2.9263157894736844e-06, + "step": 1622 + }, + { + "epoch": 4.265614727153189, + "high_lr": 0.0001463157894736842, + "low_lr": 2.9263157894736844e-06, + "step": 1622 + }, + { + "epoch": 4.268244575936883, + "grad_norm": 1.5923118591308594, + "learning_rate": 0.00014578947368421052, + "loss": 1.2263, + "step": 1623 + }, + { + "epoch": 4.268244575936883, + "high_lr": 0.00014578947368421052, + "low_lr": 2.9157894736842107e-06, + "step": 1623 + }, + { + "epoch": 4.268244575936883, + "high_lr": 0.00014578947368421052, + "low_lr": 2.9157894736842107e-06, + "step": 1623 + }, + { + "epoch": 4.268244575936883, + "high_lr": 0.00014578947368421052, + "low_lr": 2.9157894736842107e-06, + "step": 1623 + }, + { + "epoch": 4.268244575936883, + "high_lr": 0.00014578947368421052, + "low_lr": 2.9157894736842107e-06, + "step": 1623 + }, + { + "epoch": 4.268244575936883, + "high_lr": 0.00014578947368421052, + "low_lr": 2.9157894736842107e-06, + "step": 1623 + }, + { + "epoch": 4.268244575936883, + "high_lr": 0.00014578947368421052, + "low_lr": 2.9157894736842107e-06, + "step": 1623 + }, + { + "epoch": 4.268244575936883, + "high_lr": 0.00014578947368421052, + "low_lr": 2.9157894736842107e-06, + "step": 1623 + }, + { + "epoch": 4.268244575936883, + "high_lr": 0.00014578947368421052, + "low_lr": 2.9157894736842107e-06, + "step": 1623 + }, + { + "epoch": 4.270874424720579, + "grad_norm": 1.5842251777648926, + "learning_rate": 0.00014526315789473686, + "loss": 1.1803, + "step": 1624 + }, + { + "epoch": 4.270874424720579, + "high_lr": 0.00014526315789473686, + "low_lr": 2.905263157894737e-06, + "step": 1624 + }, + { + "epoch": 4.270874424720579, + "high_lr": 0.00014526315789473686, + "low_lr": 2.905263157894737e-06, + "step": 1624 + }, + { + "epoch": 4.270874424720579, + "high_lr": 0.00014526315789473686, + "low_lr": 2.905263157894737e-06, + "step": 1624 + }, + { + "epoch": 4.270874424720579, + "high_lr": 0.00014526315789473686, + "low_lr": 2.905263157894737e-06, + "step": 1624 + }, + { + "epoch": 4.270874424720579, + "high_lr": 0.00014526315789473686, + "low_lr": 2.905263157894737e-06, + "step": 1624 + }, + { + "epoch": 4.270874424720579, + "high_lr": 0.00014526315789473686, + "low_lr": 2.905263157894737e-06, + "step": 1624 + }, + { + "epoch": 4.270874424720579, + "high_lr": 0.00014526315789473686, + "low_lr": 2.905263157894737e-06, + "step": 1624 + }, + { + "epoch": 4.270874424720579, + "high_lr": 0.00014526315789473686, + "low_lr": 2.905263157894737e-06, + "step": 1624 + }, + { + "epoch": 4.273504273504273, + "grad_norm": 1.6386295557022095, + "learning_rate": 0.00014473684210526317, + "loss": 1.2707, + "step": 1625 + }, + { + "epoch": 4.273504273504273, + "high_lr": 0.00014473684210526317, + "low_lr": 2.8947368421052634e-06, + "step": 1625 + }, + { + "epoch": 4.273504273504273, + "high_lr": 0.00014473684210526317, + "low_lr": 2.8947368421052634e-06, + "step": 1625 + }, + { + "epoch": 4.273504273504273, + "high_lr": 0.00014473684210526317, + "low_lr": 2.8947368421052634e-06, + "step": 1625 + }, + { + "epoch": 4.273504273504273, + "high_lr": 0.00014473684210526317, + "low_lr": 2.8947368421052634e-06, + "step": 1625 + }, + { + "epoch": 4.273504273504273, + "high_lr": 0.00014473684210526317, + "low_lr": 2.8947368421052634e-06, + "step": 1625 + }, + { + "epoch": 4.273504273504273, + "high_lr": 0.00014473684210526317, + "low_lr": 2.8947368421052634e-06, + "step": 1625 + }, + { + "epoch": 4.273504273504273, + "high_lr": 0.00014473684210526317, + "low_lr": 2.8947368421052634e-06, + "step": 1625 + }, + { + "epoch": 4.273504273504273, + "high_lr": 0.00014473684210526317, + "low_lr": 2.8947368421052634e-06, + "step": 1625 + }, + { + "epoch": 4.276134122287968, + "grad_norm": 1.6175718307495117, + "learning_rate": 0.00014421052631578948, + "loss": 1.169, + "step": 1626 + }, + { + "epoch": 4.276134122287968, + "high_lr": 0.00014421052631578948, + "low_lr": 2.88421052631579e-06, + "step": 1626 + }, + { + "epoch": 4.276134122287968, + "high_lr": 0.00014421052631578948, + "low_lr": 2.88421052631579e-06, + "step": 1626 + }, + { + "epoch": 4.276134122287968, + "high_lr": 0.00014421052631578948, + "low_lr": 2.88421052631579e-06, + "step": 1626 + }, + { + "epoch": 4.276134122287968, + "high_lr": 0.00014421052631578948, + "low_lr": 2.88421052631579e-06, + "step": 1626 + }, + { + "epoch": 4.276134122287968, + "high_lr": 0.00014421052631578948, + "low_lr": 2.88421052631579e-06, + "step": 1626 + }, + { + "epoch": 4.276134122287968, + "high_lr": 0.00014421052631578948, + "low_lr": 2.88421052631579e-06, + "step": 1626 + }, + { + "epoch": 4.276134122287968, + "high_lr": 0.00014421052631578948, + "low_lr": 2.88421052631579e-06, + "step": 1626 + }, + { + "epoch": 4.276134122287968, + "high_lr": 0.00014421052631578948, + "low_lr": 2.88421052631579e-06, + "step": 1626 + }, + { + "epoch": 4.278763971071664, + "grad_norm": 1.539567232131958, + "learning_rate": 0.0001436842105263158, + "loss": 1.224, + "step": 1627 + }, + { + "epoch": 4.278763971071664, + "high_lr": 0.0001436842105263158, + "low_lr": 2.8736842105263164e-06, + "step": 1627 + }, + { + "epoch": 4.278763971071664, + "high_lr": 0.0001436842105263158, + "low_lr": 2.8736842105263164e-06, + "step": 1627 + }, + { + "epoch": 4.278763971071664, + "high_lr": 0.0001436842105263158, + "low_lr": 2.8736842105263164e-06, + "step": 1627 + }, + { + "epoch": 4.278763971071664, + "high_lr": 0.0001436842105263158, + "low_lr": 2.8736842105263164e-06, + "step": 1627 + }, + { + "epoch": 4.278763971071664, + "high_lr": 0.0001436842105263158, + "low_lr": 2.8736842105263164e-06, + "step": 1627 + }, + { + "epoch": 4.278763971071664, + "high_lr": 0.0001436842105263158, + "low_lr": 2.8736842105263164e-06, + "step": 1627 + }, + { + "epoch": 4.278763971071664, + "high_lr": 0.0001436842105263158, + "low_lr": 2.8736842105263164e-06, + "step": 1627 + }, + { + "epoch": 4.278763971071664, + "high_lr": 0.0001436842105263158, + "low_lr": 2.8736842105263164e-06, + "step": 1627 + }, + { + "epoch": 4.281393819855358, + "grad_norm": 1.699150562286377, + "learning_rate": 0.0001431578947368421, + "loss": 1.2255, + "step": 1628 + }, + { + "epoch": 4.281393819855358, + "high_lr": 0.0001431578947368421, + "low_lr": 2.8631578947368423e-06, + "step": 1628 + }, + { + "epoch": 4.281393819855358, + "high_lr": 0.0001431578947368421, + "low_lr": 2.8631578947368423e-06, + "step": 1628 + }, + { + "epoch": 4.281393819855358, + "high_lr": 0.0001431578947368421, + "low_lr": 2.8631578947368423e-06, + "step": 1628 + }, + { + "epoch": 4.281393819855358, + "high_lr": 0.0001431578947368421, + "low_lr": 2.8631578947368423e-06, + "step": 1628 + }, + { + "epoch": 4.281393819855358, + "high_lr": 0.0001431578947368421, + "low_lr": 2.8631578947368423e-06, + "step": 1628 + }, + { + "epoch": 4.281393819855358, + "high_lr": 0.0001431578947368421, + "low_lr": 2.8631578947368423e-06, + "step": 1628 + }, + { + "epoch": 4.281393819855358, + "high_lr": 0.0001431578947368421, + "low_lr": 2.8631578947368423e-06, + "step": 1628 + }, + { + "epoch": 4.281393819855358, + "high_lr": 0.0001431578947368421, + "low_lr": 2.8631578947368423e-06, + "step": 1628 + }, + { + "epoch": 4.284023668639053, + "grad_norm": 2.0216362476348877, + "learning_rate": 0.00014263157894736842, + "loss": 1.1561, + "step": 1629 + }, + { + "epoch": 4.284023668639053, + "high_lr": 0.00014263157894736842, + "low_lr": 2.8526315789473687e-06, + "step": 1629 + }, + { + "epoch": 4.284023668639053, + "high_lr": 0.00014263157894736842, + "low_lr": 2.8526315789473687e-06, + "step": 1629 + }, + { + "epoch": 4.284023668639053, + "high_lr": 0.00014263157894736842, + "low_lr": 2.8526315789473687e-06, + "step": 1629 + }, + { + "epoch": 4.284023668639053, + "high_lr": 0.00014263157894736842, + "low_lr": 2.8526315789473687e-06, + "step": 1629 + }, + { + "epoch": 4.284023668639053, + "high_lr": 0.00014263157894736842, + "low_lr": 2.8526315789473687e-06, + "step": 1629 + }, + { + "epoch": 4.284023668639053, + "high_lr": 0.00014263157894736842, + "low_lr": 2.8526315789473687e-06, + "step": 1629 + }, + { + "epoch": 4.284023668639053, + "high_lr": 0.00014263157894736842, + "low_lr": 2.8526315789473687e-06, + "step": 1629 + }, + { + "epoch": 4.284023668639053, + "high_lr": 0.00014263157894736842, + "low_lr": 2.8526315789473687e-06, + "step": 1629 + }, + { + "epoch": 4.2866535174227485, + "grad_norm": 1.4835139513015747, + "learning_rate": 0.00014210526315789474, + "loss": 1.2143, + "step": 1630 + }, + { + "epoch": 4.2866535174227485, + "high_lr": 0.00014210526315789474, + "low_lr": 2.842105263157895e-06, + "step": 1630 + }, + { + "epoch": 4.2866535174227485, + "high_lr": 0.00014210526315789474, + "low_lr": 2.842105263157895e-06, + "step": 1630 + }, + { + "epoch": 4.2866535174227485, + "high_lr": 0.00014210526315789474, + "low_lr": 2.842105263157895e-06, + "step": 1630 + }, + { + "epoch": 4.2866535174227485, + "high_lr": 0.00014210526315789474, + "low_lr": 2.842105263157895e-06, + "step": 1630 + }, + { + "epoch": 4.2866535174227485, + "high_lr": 0.00014210526315789474, + "low_lr": 2.842105263157895e-06, + "step": 1630 + }, + { + "epoch": 4.2866535174227485, + "high_lr": 0.00014210526315789474, + "low_lr": 2.842105263157895e-06, + "step": 1630 + }, + { + "epoch": 4.2866535174227485, + "high_lr": 0.00014210526315789474, + "low_lr": 2.842105263157895e-06, + "step": 1630 + }, + { + "epoch": 4.2866535174227485, + "high_lr": 0.00014210526315789474, + "low_lr": 2.842105263157895e-06, + "step": 1630 + }, + { + "epoch": 4.289283366206443, + "grad_norm": 1.440755844116211, + "learning_rate": 0.00014157894736842105, + "loss": 1.223, + "step": 1631 + }, + { + "epoch": 4.289283366206443, + "high_lr": 0.00014157894736842105, + "low_lr": 2.8315789473684213e-06, + "step": 1631 + }, + { + "epoch": 4.289283366206443, + "high_lr": 0.00014157894736842105, + "low_lr": 2.8315789473684213e-06, + "step": 1631 + }, + { + "epoch": 4.289283366206443, + "high_lr": 0.00014157894736842105, + "low_lr": 2.8315789473684213e-06, + "step": 1631 + }, + { + "epoch": 4.289283366206443, + "high_lr": 0.00014157894736842105, + "low_lr": 2.8315789473684213e-06, + "step": 1631 + }, + { + "epoch": 4.289283366206443, + "high_lr": 0.00014157894736842105, + "low_lr": 2.8315789473684213e-06, + "step": 1631 + }, + { + "epoch": 4.289283366206443, + "high_lr": 0.00014157894736842105, + "low_lr": 2.8315789473684213e-06, + "step": 1631 + }, + { + "epoch": 4.289283366206443, + "high_lr": 0.00014157894736842105, + "low_lr": 2.8315789473684213e-06, + "step": 1631 + }, + { + "epoch": 4.289283366206443, + "high_lr": 0.00014157894736842105, + "low_lr": 2.8315789473684213e-06, + "step": 1631 + }, + { + "epoch": 4.291913214990138, + "grad_norm": 1.5793685913085938, + "learning_rate": 0.0001410526315789474, + "loss": 1.2077, + "step": 1632 + }, + { + "epoch": 4.291913214990138, + "high_lr": 0.0001410526315789474, + "low_lr": 2.8210526315789476e-06, + "step": 1632 + }, + { + "epoch": 4.291913214990138, + "high_lr": 0.0001410526315789474, + "low_lr": 2.8210526315789476e-06, + "step": 1632 + }, + { + "epoch": 4.291913214990138, + "high_lr": 0.0001410526315789474, + "low_lr": 2.8210526315789476e-06, + "step": 1632 + }, + { + "epoch": 4.291913214990138, + "high_lr": 0.0001410526315789474, + "low_lr": 2.8210526315789476e-06, + "step": 1632 + }, + { + "epoch": 4.291913214990138, + "high_lr": 0.0001410526315789474, + "low_lr": 2.8210526315789476e-06, + "step": 1632 + }, + { + "epoch": 4.291913214990138, + "high_lr": 0.0001410526315789474, + "low_lr": 2.8210526315789476e-06, + "step": 1632 + }, + { + "epoch": 4.291913214990138, + "high_lr": 0.0001410526315789474, + "low_lr": 2.8210526315789476e-06, + "step": 1632 + }, + { + "epoch": 4.291913214990138, + "high_lr": 0.0001410526315789474, + "low_lr": 2.8210526315789476e-06, + "step": 1632 + }, + { + "epoch": 4.294543063773833, + "grad_norm": 1.4733338356018066, + "learning_rate": 0.0001405263157894737, + "loss": 1.1838, + "step": 1633 + }, + { + "epoch": 4.294543063773833, + "high_lr": 0.0001405263157894737, + "low_lr": 2.810526315789474e-06, + "step": 1633 + }, + { + "epoch": 4.294543063773833, + "high_lr": 0.0001405263157894737, + "low_lr": 2.810526315789474e-06, + "step": 1633 + }, + { + "epoch": 4.294543063773833, + "high_lr": 0.0001405263157894737, + "low_lr": 2.810526315789474e-06, + "step": 1633 + }, + { + "epoch": 4.294543063773833, + "high_lr": 0.0001405263157894737, + "low_lr": 2.810526315789474e-06, + "step": 1633 + }, + { + "epoch": 4.294543063773833, + "high_lr": 0.0001405263157894737, + "low_lr": 2.810526315789474e-06, + "step": 1633 + }, + { + "epoch": 4.294543063773833, + "high_lr": 0.0001405263157894737, + "low_lr": 2.810526315789474e-06, + "step": 1633 + }, + { + "epoch": 4.294543063773833, + "high_lr": 0.0001405263157894737, + "low_lr": 2.810526315789474e-06, + "step": 1633 + }, + { + "epoch": 4.294543063773833, + "high_lr": 0.0001405263157894737, + "low_lr": 2.810526315789474e-06, + "step": 1633 + }, + { + "epoch": 4.297172912557528, + "grad_norm": 1.656105875968933, + "learning_rate": 0.00014000000000000001, + "loss": 1.1924, + "step": 1634 + }, + { + "epoch": 4.297172912557528, + "high_lr": 0.00014000000000000001, + "low_lr": 2.8000000000000003e-06, + "step": 1634 + }, + { + "epoch": 4.297172912557528, + "high_lr": 0.00014000000000000001, + "low_lr": 2.8000000000000003e-06, + "step": 1634 + }, + { + "epoch": 4.297172912557528, + "high_lr": 0.00014000000000000001, + "low_lr": 2.8000000000000003e-06, + "step": 1634 + }, + { + "epoch": 4.297172912557528, + "high_lr": 0.00014000000000000001, + "low_lr": 2.8000000000000003e-06, + "step": 1634 + }, + { + "epoch": 4.297172912557528, + "high_lr": 0.00014000000000000001, + "low_lr": 2.8000000000000003e-06, + "step": 1634 + }, + { + "epoch": 4.297172912557528, + "high_lr": 0.00014000000000000001, + "low_lr": 2.8000000000000003e-06, + "step": 1634 + }, + { + "epoch": 4.297172912557528, + "high_lr": 0.00014000000000000001, + "low_lr": 2.8000000000000003e-06, + "step": 1634 + }, + { + "epoch": 4.297172912557528, + "high_lr": 0.00014000000000000001, + "low_lr": 2.8000000000000003e-06, + "step": 1634 + }, + { + "epoch": 4.299802761341223, + "grad_norm": 1.5349594354629517, + "learning_rate": 0.0001394736842105263, + "loss": 1.2158, + "step": 1635 + }, + { + "epoch": 4.299802761341223, + "high_lr": 0.0001394736842105263, + "low_lr": 2.789473684210526e-06, + "step": 1635 + }, + { + "epoch": 4.299802761341223, + "high_lr": 0.0001394736842105263, + "low_lr": 2.789473684210526e-06, + "step": 1635 + }, + { + "epoch": 4.299802761341223, + "high_lr": 0.0001394736842105263, + "low_lr": 2.789473684210526e-06, + "step": 1635 + }, + { + "epoch": 4.299802761341223, + "high_lr": 0.0001394736842105263, + "low_lr": 2.789473684210526e-06, + "step": 1635 + }, + { + "epoch": 4.299802761341223, + "high_lr": 0.0001394736842105263, + "low_lr": 2.789473684210526e-06, + "step": 1635 + }, + { + "epoch": 4.299802761341223, + "high_lr": 0.0001394736842105263, + "low_lr": 2.789473684210526e-06, + "step": 1635 + }, + { + "epoch": 4.299802761341223, + "high_lr": 0.0001394736842105263, + "low_lr": 2.789473684210526e-06, + "step": 1635 + }, + { + "epoch": 4.299802761341223, + "high_lr": 0.0001394736842105263, + "low_lr": 2.789473684210526e-06, + "step": 1635 + }, + { + "epoch": 4.302432610124918, + "grad_norm": 1.5845298767089844, + "learning_rate": 0.00013894736842105264, + "loss": 1.2199, + "step": 1636 + }, + { + "epoch": 4.302432610124918, + "high_lr": 0.00013894736842105264, + "low_lr": 2.7789473684210525e-06, + "step": 1636 + }, + { + "epoch": 4.302432610124918, + "high_lr": 0.00013894736842105264, + "low_lr": 2.7789473684210525e-06, + "step": 1636 + }, + { + "epoch": 4.302432610124918, + "high_lr": 0.00013894736842105264, + "low_lr": 2.7789473684210525e-06, + "step": 1636 + }, + { + "epoch": 4.302432610124918, + "high_lr": 0.00013894736842105264, + "low_lr": 2.7789473684210525e-06, + "step": 1636 + }, + { + "epoch": 4.302432610124918, + "high_lr": 0.00013894736842105264, + "low_lr": 2.7789473684210525e-06, + "step": 1636 + }, + { + "epoch": 4.302432610124918, + "high_lr": 0.00013894736842105264, + "low_lr": 2.7789473684210525e-06, + "step": 1636 + }, + { + "epoch": 4.302432610124918, + "high_lr": 0.00013894736842105264, + "low_lr": 2.7789473684210525e-06, + "step": 1636 + }, + { + "epoch": 4.302432610124918, + "high_lr": 0.00013894736842105264, + "low_lr": 2.7789473684210525e-06, + "step": 1636 + }, + { + "epoch": 4.305062458908613, + "grad_norm": 1.5544193983078003, + "learning_rate": 0.00013842105263157895, + "loss": 1.1931, + "step": 1637 + }, + { + "epoch": 4.305062458908613, + "high_lr": 0.00013842105263157895, + "low_lr": 2.7684210526315793e-06, + "step": 1637 + }, + { + "epoch": 4.305062458908613, + "high_lr": 0.00013842105263157895, + "low_lr": 2.7684210526315793e-06, + "step": 1637 + }, + { + "epoch": 4.305062458908613, + "high_lr": 0.00013842105263157895, + "low_lr": 2.7684210526315793e-06, + "step": 1637 + }, + { + "epoch": 4.305062458908613, + "high_lr": 0.00013842105263157895, + "low_lr": 2.7684210526315793e-06, + "step": 1637 + }, + { + "epoch": 4.305062458908613, + "high_lr": 0.00013842105263157895, + "low_lr": 2.7684210526315793e-06, + "step": 1637 + }, + { + "epoch": 4.305062458908613, + "high_lr": 0.00013842105263157895, + "low_lr": 2.7684210526315793e-06, + "step": 1637 + }, + { + "epoch": 4.305062458908613, + "high_lr": 0.00013842105263157895, + "low_lr": 2.7684210526315793e-06, + "step": 1637 + }, + { + "epoch": 4.305062458908613, + "high_lr": 0.00013842105263157895, + "low_lr": 2.7684210526315793e-06, + "step": 1637 + }, + { + "epoch": 4.3076923076923075, + "grad_norm": 1.5499837398529053, + "learning_rate": 0.00013789473684210527, + "loss": 1.1978, + "step": 1638 + }, + { + "epoch": 4.3076923076923075, + "high_lr": 0.00013789473684210527, + "low_lr": 2.7578947368421056e-06, + "step": 1638 + }, + { + "epoch": 4.3076923076923075, + "high_lr": 0.00013789473684210527, + "low_lr": 2.7578947368421056e-06, + "step": 1638 + }, + { + "epoch": 4.3076923076923075, + "high_lr": 0.00013789473684210527, + "low_lr": 2.7578947368421056e-06, + "step": 1638 + }, + { + "epoch": 4.3076923076923075, + "high_lr": 0.00013789473684210527, + "low_lr": 2.7578947368421056e-06, + "step": 1638 + }, + { + "epoch": 4.3076923076923075, + "high_lr": 0.00013789473684210527, + "low_lr": 2.7578947368421056e-06, + "step": 1638 + }, + { + "epoch": 4.3076923076923075, + "high_lr": 0.00013789473684210527, + "low_lr": 2.7578947368421056e-06, + "step": 1638 + }, + { + "epoch": 4.3076923076923075, + "high_lr": 0.00013789473684210527, + "low_lr": 2.7578947368421056e-06, + "step": 1638 + }, + { + "epoch": 4.3076923076923075, + "high_lr": 0.00013789473684210527, + "low_lr": 2.7578947368421056e-06, + "step": 1638 + }, + { + "epoch": 4.310322156476003, + "grad_norm": 1.5697908401489258, + "learning_rate": 0.00013736842105263158, + "loss": 1.2205, + "step": 1639 + }, + { + "epoch": 4.310322156476003, + "high_lr": 0.00013736842105263158, + "low_lr": 2.747368421052632e-06, + "step": 1639 + }, + { + "epoch": 4.310322156476003, + "high_lr": 0.00013736842105263158, + "low_lr": 2.747368421052632e-06, + "step": 1639 + }, + { + "epoch": 4.310322156476003, + "high_lr": 0.00013736842105263158, + "low_lr": 2.747368421052632e-06, + "step": 1639 + }, + { + "epoch": 4.310322156476003, + "high_lr": 0.00013736842105263158, + "low_lr": 2.747368421052632e-06, + "step": 1639 + }, + { + "epoch": 4.310322156476003, + "high_lr": 0.00013736842105263158, + "low_lr": 2.747368421052632e-06, + "step": 1639 + }, + { + "epoch": 4.310322156476003, + "high_lr": 0.00013736842105263158, + "low_lr": 2.747368421052632e-06, + "step": 1639 + }, + { + "epoch": 4.310322156476003, + "high_lr": 0.00013736842105263158, + "low_lr": 2.747368421052632e-06, + "step": 1639 + }, + { + "epoch": 4.310322156476003, + "high_lr": 0.00013736842105263158, + "low_lr": 2.747368421052632e-06, + "step": 1639 + }, + { + "epoch": 4.312952005259698, + "grad_norm": 1.465449571609497, + "learning_rate": 0.00013684210526315792, + "loss": 1.1993, + "step": 1640 + }, + { + "epoch": 4.312952005259698, + "high_lr": 0.00013684210526315792, + "low_lr": 2.7368421052631583e-06, + "step": 1640 + }, + { + "epoch": 4.312952005259698, + "high_lr": 0.00013684210526315792, + "low_lr": 2.7368421052631583e-06, + "step": 1640 + }, + { + "epoch": 4.312952005259698, + "high_lr": 0.00013684210526315792, + "low_lr": 2.7368421052631583e-06, + "step": 1640 + }, + { + "epoch": 4.312952005259698, + "high_lr": 0.00013684210526315792, + "low_lr": 2.7368421052631583e-06, + "step": 1640 + }, + { + "epoch": 4.312952005259698, + "high_lr": 0.00013684210526315792, + "low_lr": 2.7368421052631583e-06, + "step": 1640 + }, + { + "epoch": 4.312952005259698, + "high_lr": 0.00013684210526315792, + "low_lr": 2.7368421052631583e-06, + "step": 1640 + }, + { + "epoch": 4.312952005259698, + "high_lr": 0.00013684210526315792, + "low_lr": 2.7368421052631583e-06, + "step": 1640 + }, + { + "epoch": 4.312952005259698, + "high_lr": 0.00013684210526315792, + "low_lr": 2.7368421052631583e-06, + "step": 1640 + }, + { + "epoch": 4.315581854043392, + "grad_norm": 1.5767720937728882, + "learning_rate": 0.00013631578947368423, + "loss": 1.1841, + "step": 1641 + }, + { + "epoch": 4.315581854043392, + "high_lr": 0.00013631578947368423, + "low_lr": 2.7263157894736846e-06, + "step": 1641 + }, + { + "epoch": 4.315581854043392, + "high_lr": 0.00013631578947368423, + "low_lr": 2.7263157894736846e-06, + "step": 1641 + }, + { + "epoch": 4.315581854043392, + "high_lr": 0.00013631578947368423, + "low_lr": 2.7263157894736846e-06, + "step": 1641 + }, + { + "epoch": 4.315581854043392, + "high_lr": 0.00013631578947368423, + "low_lr": 2.7263157894736846e-06, + "step": 1641 + }, + { + "epoch": 4.315581854043392, + "high_lr": 0.00013631578947368423, + "low_lr": 2.7263157894736846e-06, + "step": 1641 + }, + { + "epoch": 4.315581854043392, + "high_lr": 0.00013631578947368423, + "low_lr": 2.7263157894736846e-06, + "step": 1641 + }, + { + "epoch": 4.315581854043392, + "high_lr": 0.00013631578947368423, + "low_lr": 2.7263157894736846e-06, + "step": 1641 + }, + { + "epoch": 4.315581854043392, + "high_lr": 0.00013631578947368423, + "low_lr": 2.7263157894736846e-06, + "step": 1641 + }, + { + "epoch": 4.318211702827087, + "grad_norm": 1.4431906938552856, + "learning_rate": 0.00013578947368421052, + "loss": 1.1995, + "step": 1642 + }, + { + "epoch": 4.318211702827087, + "high_lr": 0.00013578947368421052, + "low_lr": 2.7157894736842105e-06, + "step": 1642 + }, + { + "epoch": 4.318211702827087, + "high_lr": 0.00013578947368421052, + "low_lr": 2.7157894736842105e-06, + "step": 1642 + }, + { + "epoch": 4.318211702827087, + "high_lr": 0.00013578947368421052, + "low_lr": 2.7157894736842105e-06, + "step": 1642 + }, + { + "epoch": 4.318211702827087, + "high_lr": 0.00013578947368421052, + "low_lr": 2.7157894736842105e-06, + "step": 1642 + }, + { + "epoch": 4.318211702827087, + "high_lr": 0.00013578947368421052, + "low_lr": 2.7157894736842105e-06, + "step": 1642 + }, + { + "epoch": 4.318211702827087, + "high_lr": 0.00013578947368421052, + "low_lr": 2.7157894736842105e-06, + "step": 1642 + }, + { + "epoch": 4.318211702827087, + "high_lr": 0.00013578947368421052, + "low_lr": 2.7157894736842105e-06, + "step": 1642 + }, + { + "epoch": 4.318211702827087, + "high_lr": 0.00013578947368421052, + "low_lr": 2.7157894736842105e-06, + "step": 1642 + }, + { + "epoch": 4.3208415516107825, + "grad_norm": 1.5884404182434082, + "learning_rate": 0.00013526315789473683, + "loss": 1.2289, + "step": 1643 + }, + { + "epoch": 4.3208415516107825, + "high_lr": 0.00013526315789473683, + "low_lr": 2.705263157894737e-06, + "step": 1643 + }, + { + "epoch": 4.3208415516107825, + "high_lr": 0.00013526315789473683, + "low_lr": 2.705263157894737e-06, + "step": 1643 + }, + { + "epoch": 4.3208415516107825, + "high_lr": 0.00013526315789473683, + "low_lr": 2.705263157894737e-06, + "step": 1643 + }, + { + "epoch": 4.3208415516107825, + "high_lr": 0.00013526315789473683, + "low_lr": 2.705263157894737e-06, + "step": 1643 + }, + { + "epoch": 4.3208415516107825, + "high_lr": 0.00013526315789473683, + "low_lr": 2.705263157894737e-06, + "step": 1643 + }, + { + "epoch": 4.3208415516107825, + "high_lr": 0.00013526315789473683, + "low_lr": 2.705263157894737e-06, + "step": 1643 + }, + { + "epoch": 4.3208415516107825, + "high_lr": 0.00013526315789473683, + "low_lr": 2.705263157894737e-06, + "step": 1643 + }, + { + "epoch": 4.3208415516107825, + "high_lr": 0.00013526315789473683, + "low_lr": 2.705263157894737e-06, + "step": 1643 + }, + { + "epoch": 4.323471400394477, + "grad_norm": 1.6966110467910767, + "learning_rate": 0.00013473684210526314, + "loss": 1.1941, + "step": 1644 + }, + { + "epoch": 4.323471400394477, + "high_lr": 0.00013473684210526314, + "low_lr": 2.694736842105263e-06, + "step": 1644 + }, + { + "epoch": 4.323471400394477, + "high_lr": 0.00013473684210526314, + "low_lr": 2.694736842105263e-06, + "step": 1644 + }, + { + "epoch": 4.323471400394477, + "high_lr": 0.00013473684210526314, + "low_lr": 2.694736842105263e-06, + "step": 1644 + }, + { + "epoch": 4.323471400394477, + "high_lr": 0.00013473684210526314, + "low_lr": 2.694736842105263e-06, + "step": 1644 + }, + { + "epoch": 4.323471400394477, + "high_lr": 0.00013473684210526314, + "low_lr": 2.694736842105263e-06, + "step": 1644 + }, + { + "epoch": 4.323471400394477, + "high_lr": 0.00013473684210526314, + "low_lr": 2.694736842105263e-06, + "step": 1644 + }, + { + "epoch": 4.323471400394477, + "high_lr": 0.00013473684210526314, + "low_lr": 2.694736842105263e-06, + "step": 1644 + }, + { + "epoch": 4.323471400394477, + "high_lr": 0.00013473684210526314, + "low_lr": 2.694736842105263e-06, + "step": 1644 + }, + { + "epoch": 4.326101249178172, + "grad_norm": 1.3939405679702759, + "learning_rate": 0.00013421052631578948, + "loss": 1.217, + "step": 1645 + }, + { + "epoch": 4.326101249178172, + "high_lr": 0.00013421052631578948, + "low_lr": 2.68421052631579e-06, + "step": 1645 + }, + { + "epoch": 4.326101249178172, + "high_lr": 0.00013421052631578948, + "low_lr": 2.68421052631579e-06, + "step": 1645 + }, + { + "epoch": 4.326101249178172, + "high_lr": 0.00013421052631578948, + "low_lr": 2.68421052631579e-06, + "step": 1645 + }, + { + "epoch": 4.326101249178172, + "high_lr": 0.00013421052631578948, + "low_lr": 2.68421052631579e-06, + "step": 1645 + }, + { + "epoch": 4.326101249178172, + "high_lr": 0.00013421052631578948, + "low_lr": 2.68421052631579e-06, + "step": 1645 + }, + { + "epoch": 4.326101249178172, + "high_lr": 0.00013421052631578948, + "low_lr": 2.68421052631579e-06, + "step": 1645 + }, + { + "epoch": 4.326101249178172, + "high_lr": 0.00013421052631578948, + "low_lr": 2.68421052631579e-06, + "step": 1645 + }, + { + "epoch": 4.326101249178172, + "high_lr": 0.00013421052631578948, + "low_lr": 2.68421052631579e-06, + "step": 1645 + }, + { + "epoch": 4.328731097961867, + "grad_norm": 1.5275332927703857, + "learning_rate": 0.0001336842105263158, + "loss": 1.2183, + "step": 1646 + }, + { + "epoch": 4.328731097961867, + "high_lr": 0.0001336842105263158, + "low_lr": 2.6736842105263162e-06, + "step": 1646 + }, + { + "epoch": 4.328731097961867, + "high_lr": 0.0001336842105263158, + "low_lr": 2.6736842105263162e-06, + "step": 1646 + }, + { + "epoch": 4.328731097961867, + "high_lr": 0.0001336842105263158, + "low_lr": 2.6736842105263162e-06, + "step": 1646 + }, + { + "epoch": 4.328731097961867, + "high_lr": 0.0001336842105263158, + "low_lr": 2.6736842105263162e-06, + "step": 1646 + }, + { + "epoch": 4.328731097961867, + "high_lr": 0.0001336842105263158, + "low_lr": 2.6736842105263162e-06, + "step": 1646 + }, + { + "epoch": 4.328731097961867, + "high_lr": 0.0001336842105263158, + "low_lr": 2.6736842105263162e-06, + "step": 1646 + }, + { + "epoch": 4.328731097961867, + "high_lr": 0.0001336842105263158, + "low_lr": 2.6736842105263162e-06, + "step": 1646 + }, + { + "epoch": 4.328731097961867, + "high_lr": 0.0001336842105263158, + "low_lr": 2.6736842105263162e-06, + "step": 1646 + }, + { + "epoch": 4.331360946745562, + "grad_norm": 1.4518120288848877, + "learning_rate": 0.0001331578947368421, + "loss": 1.1845, + "step": 1647 + }, + { + "epoch": 4.331360946745562, + "high_lr": 0.0001331578947368421, + "low_lr": 2.6631578947368426e-06, + "step": 1647 + }, + { + "epoch": 4.331360946745562, + "high_lr": 0.0001331578947368421, + "low_lr": 2.6631578947368426e-06, + "step": 1647 + }, + { + "epoch": 4.331360946745562, + "high_lr": 0.0001331578947368421, + "low_lr": 2.6631578947368426e-06, + "step": 1647 + }, + { + "epoch": 4.331360946745562, + "high_lr": 0.0001331578947368421, + "low_lr": 2.6631578947368426e-06, + "step": 1647 + }, + { + "epoch": 4.331360946745562, + "high_lr": 0.0001331578947368421, + "low_lr": 2.6631578947368426e-06, + "step": 1647 + }, + { + "epoch": 4.331360946745562, + "high_lr": 0.0001331578947368421, + "low_lr": 2.6631578947368426e-06, + "step": 1647 + }, + { + "epoch": 4.331360946745562, + "high_lr": 0.0001331578947368421, + "low_lr": 2.6631578947368426e-06, + "step": 1647 + }, + { + "epoch": 4.331360946745562, + "high_lr": 0.0001331578947368421, + "low_lr": 2.6631578947368426e-06, + "step": 1647 + }, + { + "epoch": 4.333990795529257, + "grad_norm": 1.505735993385315, + "learning_rate": 0.00013263157894736842, + "loss": 1.2697, + "step": 1648 + }, + { + "epoch": 4.333990795529257, + "high_lr": 0.00013263157894736842, + "low_lr": 2.652631578947369e-06, + "step": 1648 + }, + { + "epoch": 4.333990795529257, + "high_lr": 0.00013263157894736842, + "low_lr": 2.652631578947369e-06, + "step": 1648 + }, + { + "epoch": 4.333990795529257, + "high_lr": 0.00013263157894736842, + "low_lr": 2.652631578947369e-06, + "step": 1648 + }, + { + "epoch": 4.333990795529257, + "high_lr": 0.00013263157894736842, + "low_lr": 2.652631578947369e-06, + "step": 1648 + }, + { + "epoch": 4.333990795529257, + "high_lr": 0.00013263157894736842, + "low_lr": 2.652631578947369e-06, + "step": 1648 + }, + { + "epoch": 4.333990795529257, + "high_lr": 0.00013263157894736842, + "low_lr": 2.652631578947369e-06, + "step": 1648 + }, + { + "epoch": 4.333990795529257, + "high_lr": 0.00013263157894736842, + "low_lr": 2.652631578947369e-06, + "step": 1648 + }, + { + "epoch": 4.333990795529257, + "high_lr": 0.00013263157894736842, + "low_lr": 2.652631578947369e-06, + "step": 1648 + }, + { + "epoch": 4.336620644312952, + "grad_norm": 1.4569470882415771, + "learning_rate": 0.00013210526315789474, + "loss": 1.2057, + "step": 1649 + }, + { + "epoch": 4.336620644312952, + "high_lr": 0.00013210526315789474, + "low_lr": 2.6421052631578948e-06, + "step": 1649 + }, + { + "epoch": 4.336620644312952, + "high_lr": 0.00013210526315789474, + "low_lr": 2.6421052631578948e-06, + "step": 1649 + }, + { + "epoch": 4.336620644312952, + "high_lr": 0.00013210526315789474, + "low_lr": 2.6421052631578948e-06, + "step": 1649 + }, + { + "epoch": 4.336620644312952, + "high_lr": 0.00013210526315789474, + "low_lr": 2.6421052631578948e-06, + "step": 1649 + }, + { + "epoch": 4.336620644312952, + "high_lr": 0.00013210526315789474, + "low_lr": 2.6421052631578948e-06, + "step": 1649 + }, + { + "epoch": 4.336620644312952, + "high_lr": 0.00013210526315789474, + "low_lr": 2.6421052631578948e-06, + "step": 1649 + }, + { + "epoch": 4.336620644312952, + "high_lr": 0.00013210526315789474, + "low_lr": 2.6421052631578948e-06, + "step": 1649 + }, + { + "epoch": 4.336620644312952, + "high_lr": 0.00013210526315789474, + "low_lr": 2.6421052631578948e-06, + "step": 1649 + }, + { + "epoch": 4.339250493096647, + "grad_norm": 1.4787817001342773, + "learning_rate": 0.00013157894736842105, + "loss": 1.1898, + "step": 1650 + }, + { + "epoch": 4.339250493096647, + "high_lr": 0.00013157894736842105, + "low_lr": 2.631578947368421e-06, + "step": 1650 + }, + { + "epoch": 4.339250493096647, + "high_lr": 0.00013157894736842105, + "low_lr": 2.631578947368421e-06, + "step": 1650 + }, + { + "epoch": 4.339250493096647, + "high_lr": 0.00013157894736842105, + "low_lr": 2.631578947368421e-06, + "step": 1650 + }, + { + "epoch": 4.339250493096647, + "high_lr": 0.00013157894736842105, + "low_lr": 2.631578947368421e-06, + "step": 1650 + }, + { + "epoch": 4.339250493096647, + "high_lr": 0.00013157894736842105, + "low_lr": 2.631578947368421e-06, + "step": 1650 + }, + { + "epoch": 4.339250493096647, + "high_lr": 0.00013157894736842105, + "low_lr": 2.631578947368421e-06, + "step": 1650 + }, + { + "epoch": 4.339250493096647, + "high_lr": 0.00013157894736842105, + "low_lr": 2.631578947368421e-06, + "step": 1650 + }, + { + "epoch": 4.339250493096647, + "high_lr": 0.00013157894736842105, + "low_lr": 2.631578947368421e-06, + "step": 1650 + }, + { + "epoch": 4.3418803418803416, + "grad_norm": 1.423708200454712, + "learning_rate": 0.00013105263157894736, + "loss": 1.2488, + "step": 1651 + }, + { + "epoch": 4.3418803418803416, + "high_lr": 0.00013105263157894736, + "low_lr": 2.6210526315789474e-06, + "step": 1651 + }, + { + "epoch": 4.3418803418803416, + "high_lr": 0.00013105263157894736, + "low_lr": 2.6210526315789474e-06, + "step": 1651 + }, + { + "epoch": 4.3418803418803416, + "high_lr": 0.00013105263157894736, + "low_lr": 2.6210526315789474e-06, + "step": 1651 + }, + { + "epoch": 4.3418803418803416, + "high_lr": 0.00013105263157894736, + "low_lr": 2.6210526315789474e-06, + "step": 1651 + }, + { + "epoch": 4.3418803418803416, + "high_lr": 0.00013105263157894736, + "low_lr": 2.6210526315789474e-06, + "step": 1651 + }, + { + "epoch": 4.3418803418803416, + "high_lr": 0.00013105263157894736, + "low_lr": 2.6210526315789474e-06, + "step": 1651 + }, + { + "epoch": 4.3418803418803416, + "high_lr": 0.00013105263157894736, + "low_lr": 2.6210526315789474e-06, + "step": 1651 + }, + { + "epoch": 4.3418803418803416, + "high_lr": 0.00013105263157894736, + "low_lr": 2.6210526315789474e-06, + "step": 1651 + }, + { + "epoch": 4.344510190664037, + "grad_norm": 1.5851572751998901, + "learning_rate": 0.00013052631578947368, + "loss": 1.2376, + "step": 1652 + }, + { + "epoch": 4.344510190664037, + "high_lr": 0.00013052631578947368, + "low_lr": 2.6105263157894738e-06, + "step": 1652 + }, + { + "epoch": 4.344510190664037, + "high_lr": 0.00013052631578947368, + "low_lr": 2.6105263157894738e-06, + "step": 1652 + }, + { + "epoch": 4.344510190664037, + "high_lr": 0.00013052631578947368, + "low_lr": 2.6105263157894738e-06, + "step": 1652 + }, + { + "epoch": 4.344510190664037, + "high_lr": 0.00013052631578947368, + "low_lr": 2.6105263157894738e-06, + "step": 1652 + }, + { + "epoch": 4.344510190664037, + "high_lr": 0.00013052631578947368, + "low_lr": 2.6105263157894738e-06, + "step": 1652 + }, + { + "epoch": 4.344510190664037, + "high_lr": 0.00013052631578947368, + "low_lr": 2.6105263157894738e-06, + "step": 1652 + }, + { + "epoch": 4.344510190664037, + "high_lr": 0.00013052631578947368, + "low_lr": 2.6105263157894738e-06, + "step": 1652 + }, + { + "epoch": 4.344510190664037, + "high_lr": 0.00013052631578947368, + "low_lr": 2.6105263157894738e-06, + "step": 1652 + }, + { + "epoch": 4.347140039447732, + "grad_norm": 1.508453607559204, + "learning_rate": 0.00013000000000000002, + "loss": 1.2073, + "step": 1653 + }, + { + "epoch": 4.347140039447732, + "high_lr": 0.00013000000000000002, + "low_lr": 2.6e-06, + "step": 1653 + }, + { + "epoch": 4.347140039447732, + "high_lr": 0.00013000000000000002, + "low_lr": 2.6e-06, + "step": 1653 + }, + { + "epoch": 4.347140039447732, + "high_lr": 0.00013000000000000002, + "low_lr": 2.6e-06, + "step": 1653 + }, + { + "epoch": 4.347140039447732, + "high_lr": 0.00013000000000000002, + "low_lr": 2.6e-06, + "step": 1653 + }, + { + "epoch": 4.347140039447732, + "high_lr": 0.00013000000000000002, + "low_lr": 2.6e-06, + "step": 1653 + }, + { + "epoch": 4.347140039447732, + "high_lr": 0.00013000000000000002, + "low_lr": 2.6e-06, + "step": 1653 + }, + { + "epoch": 4.347140039447732, + "high_lr": 0.00013000000000000002, + "low_lr": 2.6e-06, + "step": 1653 + }, + { + "epoch": 4.347140039447732, + "high_lr": 0.00013000000000000002, + "low_lr": 2.6e-06, + "step": 1653 + }, + { + "epoch": 4.349769888231426, + "grad_norm": 1.5777734518051147, + "learning_rate": 0.00012947368421052633, + "loss": 1.2179, + "step": 1654 + }, + { + "epoch": 4.349769888231426, + "high_lr": 0.00012947368421052633, + "low_lr": 2.589473684210527e-06, + "step": 1654 + }, + { + "epoch": 4.349769888231426, + "high_lr": 0.00012947368421052633, + "low_lr": 2.589473684210527e-06, + "step": 1654 + }, + { + "epoch": 4.349769888231426, + "high_lr": 0.00012947368421052633, + "low_lr": 2.589473684210527e-06, + "step": 1654 + }, + { + "epoch": 4.349769888231426, + "high_lr": 0.00012947368421052633, + "low_lr": 2.589473684210527e-06, + "step": 1654 + }, + { + "epoch": 4.349769888231426, + "high_lr": 0.00012947368421052633, + "low_lr": 2.589473684210527e-06, + "step": 1654 + }, + { + "epoch": 4.349769888231426, + "high_lr": 0.00012947368421052633, + "low_lr": 2.589473684210527e-06, + "step": 1654 + }, + { + "epoch": 4.349769888231426, + "high_lr": 0.00012947368421052633, + "low_lr": 2.589473684210527e-06, + "step": 1654 + }, + { + "epoch": 4.349769888231426, + "high_lr": 0.00012947368421052633, + "low_lr": 2.589473684210527e-06, + "step": 1654 + }, + { + "epoch": 4.352399737015122, + "grad_norm": 1.4973317384719849, + "learning_rate": 0.00012894736842105264, + "loss": 1.2087, + "step": 1655 + }, + { + "epoch": 4.352399737015122, + "high_lr": 0.00012894736842105264, + "low_lr": 2.578947368421053e-06, + "step": 1655 + }, + { + "epoch": 4.352399737015122, + "high_lr": 0.00012894736842105264, + "low_lr": 2.578947368421053e-06, + "step": 1655 + }, + { + "epoch": 4.352399737015122, + "high_lr": 0.00012894736842105264, + "low_lr": 2.578947368421053e-06, + "step": 1655 + }, + { + "epoch": 4.352399737015122, + "high_lr": 0.00012894736842105264, + "low_lr": 2.578947368421053e-06, + "step": 1655 + }, + { + "epoch": 4.352399737015122, + "high_lr": 0.00012894736842105264, + "low_lr": 2.578947368421053e-06, + "step": 1655 + }, + { + "epoch": 4.352399737015122, + "high_lr": 0.00012894736842105264, + "low_lr": 2.578947368421053e-06, + "step": 1655 + }, + { + "epoch": 4.352399737015122, + "high_lr": 0.00012894736842105264, + "low_lr": 2.578947368421053e-06, + "step": 1655 + }, + { + "epoch": 4.352399737015122, + "high_lr": 0.00012894736842105264, + "low_lr": 2.578947368421053e-06, + "step": 1655 + }, + { + "epoch": 4.355029585798817, + "grad_norm": 1.794551968574524, + "learning_rate": 0.00012842105263157893, + "loss": 1.1636, + "step": 1656 + }, + { + "epoch": 4.355029585798817, + "high_lr": 0.00012842105263157893, + "low_lr": 2.568421052631579e-06, + "step": 1656 + }, + { + "epoch": 4.355029585798817, + "high_lr": 0.00012842105263157893, + "low_lr": 2.568421052631579e-06, + "step": 1656 + }, + { + "epoch": 4.355029585798817, + "high_lr": 0.00012842105263157893, + "low_lr": 2.568421052631579e-06, + "step": 1656 + }, + { + "epoch": 4.355029585798817, + "high_lr": 0.00012842105263157893, + "low_lr": 2.568421052631579e-06, + "step": 1656 + }, + { + "epoch": 4.355029585798817, + "high_lr": 0.00012842105263157893, + "low_lr": 2.568421052631579e-06, + "step": 1656 + }, + { + "epoch": 4.355029585798817, + "high_lr": 0.00012842105263157893, + "low_lr": 2.568421052631579e-06, + "step": 1656 + }, + { + "epoch": 4.355029585798817, + "high_lr": 0.00012842105263157893, + "low_lr": 2.568421052631579e-06, + "step": 1656 + }, + { + "epoch": 4.355029585798817, + "high_lr": 0.00012842105263157893, + "low_lr": 2.568421052631579e-06, + "step": 1656 + }, + { + "epoch": 4.357659434582511, + "grad_norm": 1.5746132135391235, + "learning_rate": 0.00012789473684210527, + "loss": 1.1974, + "step": 1657 + }, + { + "epoch": 4.357659434582511, + "high_lr": 0.00012789473684210527, + "low_lr": 2.5578947368421054e-06, + "step": 1657 + }, + { + "epoch": 4.357659434582511, + "high_lr": 0.00012789473684210527, + "low_lr": 2.5578947368421054e-06, + "step": 1657 + }, + { + "epoch": 4.357659434582511, + "high_lr": 0.00012789473684210527, + "low_lr": 2.5578947368421054e-06, + "step": 1657 + }, + { + "epoch": 4.357659434582511, + "high_lr": 0.00012789473684210527, + "low_lr": 2.5578947368421054e-06, + "step": 1657 + }, + { + "epoch": 4.357659434582511, + "high_lr": 0.00012789473684210527, + "low_lr": 2.5578947368421054e-06, + "step": 1657 + }, + { + "epoch": 4.357659434582511, + "high_lr": 0.00012789473684210527, + "low_lr": 2.5578947368421054e-06, + "step": 1657 + }, + { + "epoch": 4.357659434582511, + "high_lr": 0.00012789473684210527, + "low_lr": 2.5578947368421054e-06, + "step": 1657 + }, + { + "epoch": 4.357659434582511, + "high_lr": 0.00012789473684210527, + "low_lr": 2.5578947368421054e-06, + "step": 1657 + }, + { + "epoch": 4.360289283366207, + "grad_norm": 1.554789662361145, + "learning_rate": 0.00012736842105263158, + "loss": 1.299, + "step": 1658 + }, + { + "epoch": 4.360289283366207, + "high_lr": 0.00012736842105263158, + "low_lr": 2.5473684210526317e-06, + "step": 1658 + }, + { + "epoch": 4.360289283366207, + "high_lr": 0.00012736842105263158, + "low_lr": 2.5473684210526317e-06, + "step": 1658 + }, + { + "epoch": 4.360289283366207, + "high_lr": 0.00012736842105263158, + "low_lr": 2.5473684210526317e-06, + "step": 1658 + }, + { + "epoch": 4.360289283366207, + "high_lr": 0.00012736842105263158, + "low_lr": 2.5473684210526317e-06, + "step": 1658 + }, + { + "epoch": 4.360289283366207, + "high_lr": 0.00012736842105263158, + "low_lr": 2.5473684210526317e-06, + "step": 1658 + }, + { + "epoch": 4.360289283366207, + "high_lr": 0.00012736842105263158, + "low_lr": 2.5473684210526317e-06, + "step": 1658 + }, + { + "epoch": 4.360289283366207, + "high_lr": 0.00012736842105263158, + "low_lr": 2.5473684210526317e-06, + "step": 1658 + }, + { + "epoch": 4.360289283366207, + "high_lr": 0.00012736842105263158, + "low_lr": 2.5473684210526317e-06, + "step": 1658 + }, + { + "epoch": 4.3629191321499015, + "grad_norm": 1.7036932706832886, + "learning_rate": 0.0001268421052631579, + "loss": 1.1825, + "step": 1659 + }, + { + "epoch": 4.3629191321499015, + "high_lr": 0.0001268421052631579, + "low_lr": 2.536842105263158e-06, + "step": 1659 + }, + { + "epoch": 4.3629191321499015, + "high_lr": 0.0001268421052631579, + "low_lr": 2.536842105263158e-06, + "step": 1659 + }, + { + "epoch": 4.3629191321499015, + "high_lr": 0.0001268421052631579, + "low_lr": 2.536842105263158e-06, + "step": 1659 + }, + { + "epoch": 4.3629191321499015, + "high_lr": 0.0001268421052631579, + "low_lr": 2.536842105263158e-06, + "step": 1659 + }, + { + "epoch": 4.3629191321499015, + "high_lr": 0.0001268421052631579, + "low_lr": 2.536842105263158e-06, + "step": 1659 + }, + { + "epoch": 4.3629191321499015, + "high_lr": 0.0001268421052631579, + "low_lr": 2.536842105263158e-06, + "step": 1659 + }, + { + "epoch": 4.3629191321499015, + "high_lr": 0.0001268421052631579, + "low_lr": 2.536842105263158e-06, + "step": 1659 + }, + { + "epoch": 4.3629191321499015, + "high_lr": 0.0001268421052631579, + "low_lr": 2.536842105263158e-06, + "step": 1659 + }, + { + "epoch": 4.365548980933596, + "grad_norm": 1.8712507486343384, + "learning_rate": 0.0001263157894736842, + "loss": 1.2871, + "step": 1660 + }, + { + "epoch": 4.365548980933596, + "high_lr": 0.0001263157894736842, + "low_lr": 2.5263157894736844e-06, + "step": 1660 + }, + { + "epoch": 4.365548980933596, + "high_lr": 0.0001263157894736842, + "low_lr": 2.5263157894736844e-06, + "step": 1660 + }, + { + "epoch": 4.365548980933596, + "high_lr": 0.0001263157894736842, + "low_lr": 2.5263157894736844e-06, + "step": 1660 + }, + { + "epoch": 4.365548980933596, + "high_lr": 0.0001263157894736842, + "low_lr": 2.5263157894736844e-06, + "step": 1660 + }, + { + "epoch": 4.365548980933596, + "high_lr": 0.0001263157894736842, + "low_lr": 2.5263157894736844e-06, + "step": 1660 + }, + { + "epoch": 4.365548980933596, + "high_lr": 0.0001263157894736842, + "low_lr": 2.5263157894736844e-06, + "step": 1660 + }, + { + "epoch": 4.365548980933596, + "high_lr": 0.0001263157894736842, + "low_lr": 2.5263157894736844e-06, + "step": 1660 + }, + { + "epoch": 4.365548980933596, + "high_lr": 0.0001263157894736842, + "low_lr": 2.5263157894736844e-06, + "step": 1660 + }, + { + "epoch": 4.368178829717292, + "grad_norm": 1.3986327648162842, + "learning_rate": 0.00012578947368421055, + "loss": 1.16, + "step": 1661 + }, + { + "epoch": 4.368178829717292, + "high_lr": 0.00012578947368421055, + "low_lr": 2.5157894736842107e-06, + "step": 1661 + }, + { + "epoch": 4.368178829717292, + "high_lr": 0.00012578947368421055, + "low_lr": 2.5157894736842107e-06, + "step": 1661 + }, + { + "epoch": 4.368178829717292, + "high_lr": 0.00012578947368421055, + "low_lr": 2.5157894736842107e-06, + "step": 1661 + }, + { + "epoch": 4.368178829717292, + "high_lr": 0.00012578947368421055, + "low_lr": 2.5157894736842107e-06, + "step": 1661 + }, + { + "epoch": 4.368178829717292, + "high_lr": 0.00012578947368421055, + "low_lr": 2.5157894736842107e-06, + "step": 1661 + }, + { + "epoch": 4.368178829717292, + "high_lr": 0.00012578947368421055, + "low_lr": 2.5157894736842107e-06, + "step": 1661 + }, + { + "epoch": 4.368178829717292, + "high_lr": 0.00012578947368421055, + "low_lr": 2.5157894736842107e-06, + "step": 1661 + }, + { + "epoch": 4.368178829717292, + "high_lr": 0.00012578947368421055, + "low_lr": 2.5157894736842107e-06, + "step": 1661 + }, + { + "epoch": 4.370808678500986, + "grad_norm": 1.5190725326538086, + "learning_rate": 0.00012526315789473686, + "loss": 1.233, + "step": 1662 + }, + { + "epoch": 4.370808678500986, + "high_lr": 0.00012526315789473686, + "low_lr": 2.5052631578947375e-06, + "step": 1662 + }, + { + "epoch": 4.370808678500986, + "high_lr": 0.00012526315789473686, + "low_lr": 2.5052631578947375e-06, + "step": 1662 + }, + { + "epoch": 4.370808678500986, + "high_lr": 0.00012526315789473686, + "low_lr": 2.5052631578947375e-06, + "step": 1662 + }, + { + "epoch": 4.370808678500986, + "high_lr": 0.00012526315789473686, + "low_lr": 2.5052631578947375e-06, + "step": 1662 + }, + { + "epoch": 4.370808678500986, + "high_lr": 0.00012526315789473686, + "low_lr": 2.5052631578947375e-06, + "step": 1662 + }, + { + "epoch": 4.370808678500986, + "high_lr": 0.00012526315789473686, + "low_lr": 2.5052631578947375e-06, + "step": 1662 + }, + { + "epoch": 4.370808678500986, + "high_lr": 0.00012526315789473686, + "low_lr": 2.5052631578947375e-06, + "step": 1662 + }, + { + "epoch": 4.370808678500986, + "high_lr": 0.00012526315789473686, + "low_lr": 2.5052631578947375e-06, + "step": 1662 + }, + { + "epoch": 4.373438527284681, + "grad_norm": 1.5399186611175537, + "learning_rate": 0.00012473684210526317, + "loss": 1.238, + "step": 1663 + }, + { + "epoch": 4.373438527284681, + "high_lr": 0.00012473684210526317, + "low_lr": 2.4947368421052634e-06, + "step": 1663 + }, + { + "epoch": 4.373438527284681, + "high_lr": 0.00012473684210526317, + "low_lr": 2.4947368421052634e-06, + "step": 1663 + }, + { + "epoch": 4.373438527284681, + "high_lr": 0.00012473684210526317, + "low_lr": 2.4947368421052634e-06, + "step": 1663 + }, + { + "epoch": 4.373438527284681, + "high_lr": 0.00012473684210526317, + "low_lr": 2.4947368421052634e-06, + "step": 1663 + }, + { + "epoch": 4.373438527284681, + "high_lr": 0.00012473684210526317, + "low_lr": 2.4947368421052634e-06, + "step": 1663 + }, + { + "epoch": 4.373438527284681, + "high_lr": 0.00012473684210526317, + "low_lr": 2.4947368421052634e-06, + "step": 1663 + }, + { + "epoch": 4.373438527284681, + "high_lr": 0.00012473684210526317, + "low_lr": 2.4947368421052634e-06, + "step": 1663 + }, + { + "epoch": 4.373438527284681, + "high_lr": 0.00012473684210526317, + "low_lr": 2.4947368421052634e-06, + "step": 1663 + }, + { + "epoch": 4.3760683760683765, + "grad_norm": 1.5317223072052002, + "learning_rate": 0.00012421052631578949, + "loss": 1.2542, + "step": 1664 + }, + { + "epoch": 4.3760683760683765, + "high_lr": 0.00012421052631578949, + "low_lr": 2.4842105263157897e-06, + "step": 1664 + }, + { + "epoch": 4.3760683760683765, + "high_lr": 0.00012421052631578949, + "low_lr": 2.4842105263157897e-06, + "step": 1664 + }, + { + "epoch": 4.3760683760683765, + "high_lr": 0.00012421052631578949, + "low_lr": 2.4842105263157897e-06, + "step": 1664 + }, + { + "epoch": 4.3760683760683765, + "high_lr": 0.00012421052631578949, + "low_lr": 2.4842105263157897e-06, + "step": 1664 + }, + { + "epoch": 4.3760683760683765, + "high_lr": 0.00012421052631578949, + "low_lr": 2.4842105263157897e-06, + "step": 1664 + }, + { + "epoch": 4.3760683760683765, + "high_lr": 0.00012421052631578949, + "low_lr": 2.4842105263157897e-06, + "step": 1664 + }, + { + "epoch": 4.3760683760683765, + "high_lr": 0.00012421052631578949, + "low_lr": 2.4842105263157897e-06, + "step": 1664 + }, + { + "epoch": 4.3760683760683765, + "high_lr": 0.00012421052631578949, + "low_lr": 2.4842105263157897e-06, + "step": 1664 + }, + { + "epoch": 4.378698224852071, + "grad_norm": 1.6379618644714355, + "learning_rate": 0.0001236842105263158, + "loss": 1.2087, + "step": 1665 + }, + { + "epoch": 4.378698224852071, + "high_lr": 0.0001236842105263158, + "low_lr": 2.473684210526316e-06, + "step": 1665 + }, + { + "epoch": 4.378698224852071, + "high_lr": 0.0001236842105263158, + "low_lr": 2.473684210526316e-06, + "step": 1665 + }, + { + "epoch": 4.378698224852071, + "high_lr": 0.0001236842105263158, + "low_lr": 2.473684210526316e-06, + "step": 1665 + }, + { + "epoch": 4.378698224852071, + "high_lr": 0.0001236842105263158, + "low_lr": 2.473684210526316e-06, + "step": 1665 + }, + { + "epoch": 4.378698224852071, + "high_lr": 0.0001236842105263158, + "low_lr": 2.473684210526316e-06, + "step": 1665 + }, + { + "epoch": 4.378698224852071, + "high_lr": 0.0001236842105263158, + "low_lr": 2.473684210526316e-06, + "step": 1665 + }, + { + "epoch": 4.378698224852071, + "high_lr": 0.0001236842105263158, + "low_lr": 2.473684210526316e-06, + "step": 1665 + }, + { + "epoch": 4.378698224852071, + "high_lr": 0.0001236842105263158, + "low_lr": 2.473684210526316e-06, + "step": 1665 + }, + { + "epoch": 4.381328073635766, + "grad_norm": 1.547407627105713, + "learning_rate": 0.0001231578947368421, + "loss": 1.2358, + "step": 1666 + }, + { + "epoch": 4.381328073635766, + "high_lr": 0.0001231578947368421, + "low_lr": 2.4631578947368424e-06, + "step": 1666 + }, + { + "epoch": 4.381328073635766, + "high_lr": 0.0001231578947368421, + "low_lr": 2.4631578947368424e-06, + "step": 1666 + }, + { + "epoch": 4.381328073635766, + "high_lr": 0.0001231578947368421, + "low_lr": 2.4631578947368424e-06, + "step": 1666 + }, + { + "epoch": 4.381328073635766, + "high_lr": 0.0001231578947368421, + "low_lr": 2.4631578947368424e-06, + "step": 1666 + }, + { + "epoch": 4.381328073635766, + "high_lr": 0.0001231578947368421, + "low_lr": 2.4631578947368424e-06, + "step": 1666 + }, + { + "epoch": 4.381328073635766, + "high_lr": 0.0001231578947368421, + "low_lr": 2.4631578947368424e-06, + "step": 1666 + }, + { + "epoch": 4.381328073635766, + "high_lr": 0.0001231578947368421, + "low_lr": 2.4631578947368424e-06, + "step": 1666 + }, + { + "epoch": 4.381328073635766, + "high_lr": 0.0001231578947368421, + "low_lr": 2.4631578947368424e-06, + "step": 1666 + }, + { + "epoch": 4.3839579224194605, + "grad_norm": 1.5736956596374512, + "learning_rate": 0.00012263157894736842, + "loss": 1.2619, + "step": 1667 + }, + { + "epoch": 4.3839579224194605, + "high_lr": 0.00012263157894736842, + "low_lr": 2.4526315789473687e-06, + "step": 1667 + }, + { + "epoch": 4.3839579224194605, + "high_lr": 0.00012263157894736842, + "low_lr": 2.4526315789473687e-06, + "step": 1667 + }, + { + "epoch": 4.3839579224194605, + "high_lr": 0.00012263157894736842, + "low_lr": 2.4526315789473687e-06, + "step": 1667 + }, + { + "epoch": 4.3839579224194605, + "high_lr": 0.00012263157894736842, + "low_lr": 2.4526315789473687e-06, + "step": 1667 + }, + { + "epoch": 4.3839579224194605, + "high_lr": 0.00012263157894736842, + "low_lr": 2.4526315789473687e-06, + "step": 1667 + }, + { + "epoch": 4.3839579224194605, + "high_lr": 0.00012263157894736842, + "low_lr": 2.4526315789473687e-06, + "step": 1667 + }, + { + "epoch": 4.3839579224194605, + "high_lr": 0.00012263157894736842, + "low_lr": 2.4526315789473687e-06, + "step": 1667 + }, + { + "epoch": 4.3839579224194605, + "high_lr": 0.00012263157894736842, + "low_lr": 2.4526315789473687e-06, + "step": 1667 + }, + { + "epoch": 4.386587771203156, + "grad_norm": 1.7092012166976929, + "learning_rate": 0.00012210526315789474, + "loss": 1.217, + "step": 1668 + }, + { + "epoch": 4.386587771203156, + "high_lr": 0.00012210526315789474, + "low_lr": 2.442105263157895e-06, + "step": 1668 + }, + { + "epoch": 4.386587771203156, + "high_lr": 0.00012210526315789474, + "low_lr": 2.442105263157895e-06, + "step": 1668 + }, + { + "epoch": 4.386587771203156, + "high_lr": 0.00012210526315789474, + "low_lr": 2.442105263157895e-06, + "step": 1668 + }, + { + "epoch": 4.386587771203156, + "high_lr": 0.00012210526315789474, + "low_lr": 2.442105263157895e-06, + "step": 1668 + }, + { + "epoch": 4.386587771203156, + "high_lr": 0.00012210526315789474, + "low_lr": 2.442105263157895e-06, + "step": 1668 + }, + { + "epoch": 4.386587771203156, + "high_lr": 0.00012210526315789474, + "low_lr": 2.442105263157895e-06, + "step": 1668 + }, + { + "epoch": 4.386587771203156, + "high_lr": 0.00012210526315789474, + "low_lr": 2.442105263157895e-06, + "step": 1668 + }, + { + "epoch": 4.386587771203156, + "high_lr": 0.00012210526315789474, + "low_lr": 2.442105263157895e-06, + "step": 1668 + }, + { + "epoch": 4.389217619986851, + "grad_norm": 1.5518778562545776, + "learning_rate": 0.00012157894736842105, + "loss": 1.2391, + "step": 1669 + }, + { + "epoch": 4.389217619986851, + "high_lr": 0.00012157894736842105, + "low_lr": 2.4315789473684213e-06, + "step": 1669 + }, + { + "epoch": 4.389217619986851, + "high_lr": 0.00012157894736842105, + "low_lr": 2.4315789473684213e-06, + "step": 1669 + }, + { + "epoch": 4.389217619986851, + "high_lr": 0.00012157894736842105, + "low_lr": 2.4315789473684213e-06, + "step": 1669 + }, + { + "epoch": 4.389217619986851, + "high_lr": 0.00012157894736842105, + "low_lr": 2.4315789473684213e-06, + "step": 1669 + }, + { + "epoch": 4.389217619986851, + "high_lr": 0.00012157894736842105, + "low_lr": 2.4315789473684213e-06, + "step": 1669 + }, + { + "epoch": 4.389217619986851, + "high_lr": 0.00012157894736842105, + "low_lr": 2.4315789473684213e-06, + "step": 1669 + }, + { + "epoch": 4.389217619986851, + "high_lr": 0.00012157894736842105, + "low_lr": 2.4315789473684213e-06, + "step": 1669 + }, + { + "epoch": 4.389217619986851, + "high_lr": 0.00012157894736842105, + "low_lr": 2.4315789473684213e-06, + "step": 1669 + }, + { + "epoch": 4.391847468770545, + "grad_norm": 1.568029522895813, + "learning_rate": 0.00012105263157894738, + "loss": 1.2401, + "step": 1670 + }, + { + "epoch": 4.391847468770545, + "high_lr": 0.00012105263157894738, + "low_lr": 2.4210526315789477e-06, + "step": 1670 + }, + { + "epoch": 4.391847468770545, + "high_lr": 0.00012105263157894738, + "low_lr": 2.4210526315789477e-06, + "step": 1670 + }, + { + "epoch": 4.391847468770545, + "high_lr": 0.00012105263157894738, + "low_lr": 2.4210526315789477e-06, + "step": 1670 + }, + { + "epoch": 4.391847468770545, + "high_lr": 0.00012105263157894738, + "low_lr": 2.4210526315789477e-06, + "step": 1670 + }, + { + "epoch": 4.391847468770545, + "high_lr": 0.00012105263157894738, + "low_lr": 2.4210526315789477e-06, + "step": 1670 + }, + { + "epoch": 4.391847468770545, + "high_lr": 0.00012105263157894738, + "low_lr": 2.4210526315789477e-06, + "step": 1670 + }, + { + "epoch": 4.391847468770545, + "high_lr": 0.00012105263157894738, + "low_lr": 2.4210526315789477e-06, + "step": 1670 + }, + { + "epoch": 4.391847468770545, + "high_lr": 0.00012105263157894738, + "low_lr": 2.4210526315789477e-06, + "step": 1670 + }, + { + "epoch": 4.394477317554241, + "grad_norm": 1.6637319326400757, + "learning_rate": 0.00012052631578947369, + "loss": 1.2115, + "step": 1671 + }, + { + "epoch": 4.394477317554241, + "high_lr": 0.00012052631578947369, + "low_lr": 2.410526315789474e-06, + "step": 1671 + }, + { + "epoch": 4.394477317554241, + "high_lr": 0.00012052631578947369, + "low_lr": 2.410526315789474e-06, + "step": 1671 + }, + { + "epoch": 4.394477317554241, + "high_lr": 0.00012052631578947369, + "low_lr": 2.410526315789474e-06, + "step": 1671 + }, + { + "epoch": 4.394477317554241, + "high_lr": 0.00012052631578947369, + "low_lr": 2.410526315789474e-06, + "step": 1671 + }, + { + "epoch": 4.394477317554241, + "high_lr": 0.00012052631578947369, + "low_lr": 2.410526315789474e-06, + "step": 1671 + }, + { + "epoch": 4.394477317554241, + "high_lr": 0.00012052631578947369, + "low_lr": 2.410526315789474e-06, + "step": 1671 + }, + { + "epoch": 4.394477317554241, + "high_lr": 0.00012052631578947369, + "low_lr": 2.410526315789474e-06, + "step": 1671 + }, + { + "epoch": 4.394477317554241, + "high_lr": 0.00012052631578947369, + "low_lr": 2.410526315789474e-06, + "step": 1671 + }, + { + "epoch": 4.3971071663379355, + "grad_norm": 1.4761321544647217, + "learning_rate": 0.00012, + "loss": 1.1913, + "step": 1672 + }, + { + "epoch": 4.3971071663379355, + "high_lr": 0.00012, + "low_lr": 2.4000000000000003e-06, + "step": 1672 + }, + { + "epoch": 4.3971071663379355, + "high_lr": 0.00012, + "low_lr": 2.4000000000000003e-06, + "step": 1672 + }, + { + "epoch": 4.3971071663379355, + "high_lr": 0.00012, + "low_lr": 2.4000000000000003e-06, + "step": 1672 + }, + { + "epoch": 4.3971071663379355, + "high_lr": 0.00012, + "low_lr": 2.4000000000000003e-06, + "step": 1672 + }, + { + "epoch": 4.3971071663379355, + "high_lr": 0.00012, + "low_lr": 2.4000000000000003e-06, + "step": 1672 + }, + { + "epoch": 4.3971071663379355, + "high_lr": 0.00012, + "low_lr": 2.4000000000000003e-06, + "step": 1672 + }, + { + "epoch": 4.3971071663379355, + "high_lr": 0.00012, + "low_lr": 2.4000000000000003e-06, + "step": 1672 + }, + { + "epoch": 4.3971071663379355, + "high_lr": 0.00012, + "low_lr": 2.4000000000000003e-06, + "step": 1672 + }, + { + "epoch": 4.39973701512163, + "grad_norm": 1.4216514825820923, + "learning_rate": 0.00011947368421052632, + "loss": 1.2146, + "step": 1673 + }, + { + "epoch": 4.39973701512163, + "high_lr": 0.00011947368421052632, + "low_lr": 2.3894736842105266e-06, + "step": 1673 + }, + { + "epoch": 4.39973701512163, + "high_lr": 0.00011947368421052632, + "low_lr": 2.3894736842105266e-06, + "step": 1673 + }, + { + "epoch": 4.39973701512163, + "high_lr": 0.00011947368421052632, + "low_lr": 2.3894736842105266e-06, + "step": 1673 + }, + { + "epoch": 4.39973701512163, + "high_lr": 0.00011947368421052632, + "low_lr": 2.3894736842105266e-06, + "step": 1673 + }, + { + "epoch": 4.39973701512163, + "high_lr": 0.00011947368421052632, + "low_lr": 2.3894736842105266e-06, + "step": 1673 + }, + { + "epoch": 4.39973701512163, + "high_lr": 0.00011947368421052632, + "low_lr": 2.3894736842105266e-06, + "step": 1673 + }, + { + "epoch": 4.39973701512163, + "high_lr": 0.00011947368421052632, + "low_lr": 2.3894736842105266e-06, + "step": 1673 + }, + { + "epoch": 4.39973701512163, + "high_lr": 0.00011947368421052632, + "low_lr": 2.3894736842105266e-06, + "step": 1673 + }, + { + "epoch": 4.402366863905326, + "grad_norm": 1.601261854171753, + "learning_rate": 0.00011894736842105264, + "loss": 1.2112, + "step": 1674 + }, + { + "epoch": 4.402366863905326, + "high_lr": 0.00011894736842105264, + "low_lr": 2.378947368421053e-06, + "step": 1674 + }, + { + "epoch": 4.402366863905326, + "high_lr": 0.00011894736842105264, + "low_lr": 2.378947368421053e-06, + "step": 1674 + }, + { + "epoch": 4.402366863905326, + "high_lr": 0.00011894736842105264, + "low_lr": 2.378947368421053e-06, + "step": 1674 + }, + { + "epoch": 4.402366863905326, + "high_lr": 0.00011894736842105264, + "low_lr": 2.378947368421053e-06, + "step": 1674 + }, + { + "epoch": 4.402366863905326, + "high_lr": 0.00011894736842105264, + "low_lr": 2.378947368421053e-06, + "step": 1674 + }, + { + "epoch": 4.402366863905326, + "high_lr": 0.00011894736842105264, + "low_lr": 2.378947368421053e-06, + "step": 1674 + }, + { + "epoch": 4.402366863905326, + "high_lr": 0.00011894736842105264, + "low_lr": 2.378947368421053e-06, + "step": 1674 + }, + { + "epoch": 4.402366863905326, + "high_lr": 0.00011894736842105264, + "low_lr": 2.378947368421053e-06, + "step": 1674 + }, + { + "epoch": 4.40499671268902, + "grad_norm": 1.5565073490142822, + "learning_rate": 0.00011842105263157894, + "loss": 1.2682, + "step": 1675 + }, + { + "epoch": 4.40499671268902, + "high_lr": 0.00011842105263157894, + "low_lr": 2.368421052631579e-06, + "step": 1675 + }, + { + "epoch": 4.40499671268902, + "high_lr": 0.00011842105263157894, + "low_lr": 2.368421052631579e-06, + "step": 1675 + }, + { + "epoch": 4.40499671268902, + "high_lr": 0.00011842105263157894, + "low_lr": 2.368421052631579e-06, + "step": 1675 + }, + { + "epoch": 4.40499671268902, + "high_lr": 0.00011842105263157894, + "low_lr": 2.368421052631579e-06, + "step": 1675 + }, + { + "epoch": 4.40499671268902, + "high_lr": 0.00011842105263157894, + "low_lr": 2.368421052631579e-06, + "step": 1675 + }, + { + "epoch": 4.40499671268902, + "high_lr": 0.00011842105263157894, + "low_lr": 2.368421052631579e-06, + "step": 1675 + }, + { + "epoch": 4.40499671268902, + "high_lr": 0.00011842105263157894, + "low_lr": 2.368421052631579e-06, + "step": 1675 + }, + { + "epoch": 4.40499671268902, + "high_lr": 0.00011842105263157894, + "low_lr": 2.368421052631579e-06, + "step": 1675 + }, + { + "epoch": 4.407626561472715, + "grad_norm": 1.542503833770752, + "learning_rate": 0.00011789473684210527, + "loss": 1.1684, + "step": 1676 + }, + { + "epoch": 4.407626561472715, + "high_lr": 0.00011789473684210527, + "low_lr": 2.357894736842105e-06, + "step": 1676 + }, + { + "epoch": 4.407626561472715, + "high_lr": 0.00011789473684210527, + "low_lr": 2.357894736842105e-06, + "step": 1676 + }, + { + "epoch": 4.407626561472715, + "high_lr": 0.00011789473684210527, + "low_lr": 2.357894736842105e-06, + "step": 1676 + }, + { + "epoch": 4.407626561472715, + "high_lr": 0.00011789473684210527, + "low_lr": 2.357894736842105e-06, + "step": 1676 + }, + { + "epoch": 4.407626561472715, + "high_lr": 0.00011789473684210527, + "low_lr": 2.357894736842105e-06, + "step": 1676 + }, + { + "epoch": 4.407626561472715, + "high_lr": 0.00011789473684210527, + "low_lr": 2.357894736842105e-06, + "step": 1676 + }, + { + "epoch": 4.407626561472715, + "high_lr": 0.00011789473684210527, + "low_lr": 2.357894736842105e-06, + "step": 1676 + }, + { + "epoch": 4.407626561472715, + "high_lr": 0.00011789473684210527, + "low_lr": 2.357894736842105e-06, + "step": 1676 + }, + { + "epoch": 4.410256410256411, + "grad_norm": 1.492455005645752, + "learning_rate": 0.00011736842105263158, + "loss": 1.2266, + "step": 1677 + }, + { + "epoch": 4.410256410256411, + "high_lr": 0.00011736842105263158, + "low_lr": 2.347368421052632e-06, + "step": 1677 + }, + { + "epoch": 4.410256410256411, + "high_lr": 0.00011736842105263158, + "low_lr": 2.347368421052632e-06, + "step": 1677 + }, + { + "epoch": 4.410256410256411, + "high_lr": 0.00011736842105263158, + "low_lr": 2.347368421052632e-06, + "step": 1677 + }, + { + "epoch": 4.410256410256411, + "high_lr": 0.00011736842105263158, + "low_lr": 2.347368421052632e-06, + "step": 1677 + }, + { + "epoch": 4.410256410256411, + "high_lr": 0.00011736842105263158, + "low_lr": 2.347368421052632e-06, + "step": 1677 + }, + { + "epoch": 4.410256410256411, + "high_lr": 0.00011736842105263158, + "low_lr": 2.347368421052632e-06, + "step": 1677 + }, + { + "epoch": 4.410256410256411, + "high_lr": 0.00011736842105263158, + "low_lr": 2.347368421052632e-06, + "step": 1677 + }, + { + "epoch": 4.410256410256411, + "high_lr": 0.00011736842105263158, + "low_lr": 2.347368421052632e-06, + "step": 1677 + }, + { + "epoch": 4.412886259040105, + "grad_norm": 1.5621352195739746, + "learning_rate": 0.00011684210526315791, + "loss": 1.142, + "step": 1678 + }, + { + "epoch": 4.412886259040105, + "high_lr": 0.00011684210526315791, + "low_lr": 2.3368421052631583e-06, + "step": 1678 + }, + { + "epoch": 4.412886259040105, + "high_lr": 0.00011684210526315791, + "low_lr": 2.3368421052631583e-06, + "step": 1678 + }, + { + "epoch": 4.412886259040105, + "high_lr": 0.00011684210526315791, + "low_lr": 2.3368421052631583e-06, + "step": 1678 + }, + { + "epoch": 4.412886259040105, + "high_lr": 0.00011684210526315791, + "low_lr": 2.3368421052631583e-06, + "step": 1678 + }, + { + "epoch": 4.412886259040105, + "high_lr": 0.00011684210526315791, + "low_lr": 2.3368421052631583e-06, + "step": 1678 + }, + { + "epoch": 4.412886259040105, + "high_lr": 0.00011684210526315791, + "low_lr": 2.3368421052631583e-06, + "step": 1678 + }, + { + "epoch": 4.412886259040105, + "high_lr": 0.00011684210526315791, + "low_lr": 2.3368421052631583e-06, + "step": 1678 + }, + { + "epoch": 4.412886259040105, + "high_lr": 0.00011684210526315791, + "low_lr": 2.3368421052631583e-06, + "step": 1678 + }, + { + "epoch": 4.4155161078238, + "grad_norm": 1.5134774446487427, + "learning_rate": 0.00011631578947368421, + "loss": 1.1955, + "step": 1679 + }, + { + "epoch": 4.4155161078238, + "high_lr": 0.00011631578947368421, + "low_lr": 2.326315789473684e-06, + "step": 1679 + }, + { + "epoch": 4.4155161078238, + "high_lr": 0.00011631578947368421, + "low_lr": 2.326315789473684e-06, + "step": 1679 + }, + { + "epoch": 4.4155161078238, + "high_lr": 0.00011631578947368421, + "low_lr": 2.326315789473684e-06, + "step": 1679 + }, + { + "epoch": 4.4155161078238, + "high_lr": 0.00011631578947368421, + "low_lr": 2.326315789473684e-06, + "step": 1679 + }, + { + "epoch": 4.4155161078238, + "high_lr": 0.00011631578947368421, + "low_lr": 2.326315789473684e-06, + "step": 1679 + }, + { + "epoch": 4.4155161078238, + "high_lr": 0.00011631578947368421, + "low_lr": 2.326315789473684e-06, + "step": 1679 + }, + { + "epoch": 4.4155161078238, + "high_lr": 0.00011631578947368421, + "low_lr": 2.326315789473684e-06, + "step": 1679 + }, + { + "epoch": 4.4155161078238, + "high_lr": 0.00011631578947368421, + "low_lr": 2.326315789473684e-06, + "step": 1679 + }, + { + "epoch": 4.418145956607495, + "grad_norm": 1.425864815711975, + "learning_rate": 0.00011578947368421053, + "loss": 1.2632, + "step": 1680 + }, + { + "epoch": 4.418145956607495, + "high_lr": 0.00011578947368421053, + "low_lr": 2.3157894736842105e-06, + "step": 1680 + }, + { + "epoch": 4.418145956607495, + "high_lr": 0.00011578947368421053, + "low_lr": 2.3157894736842105e-06, + "step": 1680 + }, + { + "epoch": 4.418145956607495, + "high_lr": 0.00011578947368421053, + "low_lr": 2.3157894736842105e-06, + "step": 1680 + }, + { + "epoch": 4.418145956607495, + "high_lr": 0.00011578947368421053, + "low_lr": 2.3157894736842105e-06, + "step": 1680 + }, + { + "epoch": 4.418145956607495, + "high_lr": 0.00011578947368421053, + "low_lr": 2.3157894736842105e-06, + "step": 1680 + }, + { + "epoch": 4.418145956607495, + "high_lr": 0.00011578947368421053, + "low_lr": 2.3157894736842105e-06, + "step": 1680 + }, + { + "epoch": 4.418145956607495, + "high_lr": 0.00011578947368421053, + "low_lr": 2.3157894736842105e-06, + "step": 1680 + }, + { + "epoch": 4.418145956607495, + "high_lr": 0.00011578947368421053, + "low_lr": 2.3157894736842105e-06, + "step": 1680 + }, + { + "epoch": 4.42077580539119, + "grad_norm": 1.4130666255950928, + "learning_rate": 0.00011526315789473685, + "loss": 1.191, + "step": 1681 + }, + { + "epoch": 4.42077580539119, + "high_lr": 0.00011526315789473685, + "low_lr": 2.3052631578947373e-06, + "step": 1681 + }, + { + "epoch": 4.42077580539119, + "high_lr": 0.00011526315789473685, + "low_lr": 2.3052631578947373e-06, + "step": 1681 + }, + { + "epoch": 4.42077580539119, + "high_lr": 0.00011526315789473685, + "low_lr": 2.3052631578947373e-06, + "step": 1681 + }, + { + "epoch": 4.42077580539119, + "high_lr": 0.00011526315789473685, + "low_lr": 2.3052631578947373e-06, + "step": 1681 + }, + { + "epoch": 4.42077580539119, + "high_lr": 0.00011526315789473685, + "low_lr": 2.3052631578947373e-06, + "step": 1681 + }, + { + "epoch": 4.42077580539119, + "high_lr": 0.00011526315789473685, + "low_lr": 2.3052631578947373e-06, + "step": 1681 + }, + { + "epoch": 4.42077580539119, + "high_lr": 0.00011526315789473685, + "low_lr": 2.3052631578947373e-06, + "step": 1681 + }, + { + "epoch": 4.42077580539119, + "high_lr": 0.00011526315789473685, + "low_lr": 2.3052631578947373e-06, + "step": 1681 + }, + { + "epoch": 4.423405654174885, + "grad_norm": 1.4954359531402588, + "learning_rate": 0.00011473684210526316, + "loss": 1.1924, + "step": 1682 + }, + { + "epoch": 4.423405654174885, + "high_lr": 0.00011473684210526316, + "low_lr": 2.294736842105263e-06, + "step": 1682 + }, + { + "epoch": 4.423405654174885, + "high_lr": 0.00011473684210526316, + "low_lr": 2.294736842105263e-06, + "step": 1682 + }, + { + "epoch": 4.423405654174885, + "high_lr": 0.00011473684210526316, + "low_lr": 2.294736842105263e-06, + "step": 1682 + }, + { + "epoch": 4.423405654174885, + "high_lr": 0.00011473684210526316, + "low_lr": 2.294736842105263e-06, + "step": 1682 + }, + { + "epoch": 4.423405654174885, + "high_lr": 0.00011473684210526316, + "low_lr": 2.294736842105263e-06, + "step": 1682 + }, + { + "epoch": 4.423405654174885, + "high_lr": 0.00011473684210526316, + "low_lr": 2.294736842105263e-06, + "step": 1682 + }, + { + "epoch": 4.423405654174885, + "high_lr": 0.00011473684210526316, + "low_lr": 2.294736842105263e-06, + "step": 1682 + }, + { + "epoch": 4.423405654174885, + "high_lr": 0.00011473684210526316, + "low_lr": 2.294736842105263e-06, + "step": 1682 + }, + { + "epoch": 4.42603550295858, + "grad_norm": 1.8377904891967773, + "learning_rate": 0.00011421052631578947, + "loss": 1.1947, + "step": 1683 + }, + { + "epoch": 4.42603550295858, + "high_lr": 0.00011421052631578947, + "low_lr": 2.2842105263157895e-06, + "step": 1683 + }, + { + "epoch": 4.42603550295858, + "high_lr": 0.00011421052631578947, + "low_lr": 2.2842105263157895e-06, + "step": 1683 + }, + { + "epoch": 4.42603550295858, + "high_lr": 0.00011421052631578947, + "low_lr": 2.2842105263157895e-06, + "step": 1683 + }, + { + "epoch": 4.42603550295858, + "high_lr": 0.00011421052631578947, + "low_lr": 2.2842105263157895e-06, + "step": 1683 + }, + { + "epoch": 4.42603550295858, + "high_lr": 0.00011421052631578947, + "low_lr": 2.2842105263157895e-06, + "step": 1683 + }, + { + "epoch": 4.42603550295858, + "high_lr": 0.00011421052631578947, + "low_lr": 2.2842105263157895e-06, + "step": 1683 + }, + { + "epoch": 4.42603550295858, + "high_lr": 0.00011421052631578947, + "low_lr": 2.2842105263157895e-06, + "step": 1683 + }, + { + "epoch": 4.42603550295858, + "high_lr": 0.00011421052631578947, + "low_lr": 2.2842105263157895e-06, + "step": 1683 + }, + { + "epoch": 4.428665351742275, + "grad_norm": 1.5843218564987183, + "learning_rate": 0.0001136842105263158, + "loss": 1.2083, + "step": 1684 + }, + { + "epoch": 4.428665351742275, + "high_lr": 0.0001136842105263158, + "low_lr": 2.273684210526316e-06, + "step": 1684 + }, + { + "epoch": 4.428665351742275, + "high_lr": 0.0001136842105263158, + "low_lr": 2.273684210526316e-06, + "step": 1684 + }, + { + "epoch": 4.428665351742275, + "high_lr": 0.0001136842105263158, + "low_lr": 2.273684210526316e-06, + "step": 1684 + }, + { + "epoch": 4.428665351742275, + "high_lr": 0.0001136842105263158, + "low_lr": 2.273684210526316e-06, + "step": 1684 + }, + { + "epoch": 4.428665351742275, + "high_lr": 0.0001136842105263158, + "low_lr": 2.273684210526316e-06, + "step": 1684 + }, + { + "epoch": 4.428665351742275, + "high_lr": 0.0001136842105263158, + "low_lr": 2.273684210526316e-06, + "step": 1684 + }, + { + "epoch": 4.428665351742275, + "high_lr": 0.0001136842105263158, + "low_lr": 2.273684210526316e-06, + "step": 1684 + }, + { + "epoch": 4.428665351742275, + "high_lr": 0.0001136842105263158, + "low_lr": 2.273684210526316e-06, + "step": 1684 + }, + { + "epoch": 4.43129520052597, + "grad_norm": 1.5587294101715088, + "learning_rate": 0.00011315789473684211, + "loss": 1.2195, + "step": 1685 + }, + { + "epoch": 4.43129520052597, + "high_lr": 0.00011315789473684211, + "low_lr": 2.2631578947368426e-06, + "step": 1685 + }, + { + "epoch": 4.43129520052597, + "high_lr": 0.00011315789473684211, + "low_lr": 2.2631578947368426e-06, + "step": 1685 + }, + { + "epoch": 4.43129520052597, + "high_lr": 0.00011315789473684211, + "low_lr": 2.2631578947368426e-06, + "step": 1685 + }, + { + "epoch": 4.43129520052597, + "high_lr": 0.00011315789473684211, + "low_lr": 2.2631578947368426e-06, + "step": 1685 + }, + { + "epoch": 4.43129520052597, + "high_lr": 0.00011315789473684211, + "low_lr": 2.2631578947368426e-06, + "step": 1685 + }, + { + "epoch": 4.43129520052597, + "high_lr": 0.00011315789473684211, + "low_lr": 2.2631578947368426e-06, + "step": 1685 + }, + { + "epoch": 4.43129520052597, + "high_lr": 0.00011315789473684211, + "low_lr": 2.2631578947368426e-06, + "step": 1685 + }, + { + "epoch": 4.43129520052597, + "high_lr": 0.00011315789473684211, + "low_lr": 2.2631578947368426e-06, + "step": 1685 + }, + { + "epoch": 4.433925049309664, + "grad_norm": 1.4924415349960327, + "learning_rate": 0.00011263157894736841, + "loss": 1.2152, + "step": 1686 + }, + { + "epoch": 4.433925049309664, + "high_lr": 0.00011263157894736841, + "low_lr": 2.2526315789473685e-06, + "step": 1686 + }, + { + "epoch": 4.433925049309664, + "high_lr": 0.00011263157894736841, + "low_lr": 2.2526315789473685e-06, + "step": 1686 + }, + { + "epoch": 4.433925049309664, + "high_lr": 0.00011263157894736841, + "low_lr": 2.2526315789473685e-06, + "step": 1686 + }, + { + "epoch": 4.433925049309664, + "high_lr": 0.00011263157894736841, + "low_lr": 2.2526315789473685e-06, + "step": 1686 + }, + { + "epoch": 4.433925049309664, + "high_lr": 0.00011263157894736841, + "low_lr": 2.2526315789473685e-06, + "step": 1686 + }, + { + "epoch": 4.433925049309664, + "high_lr": 0.00011263157894736841, + "low_lr": 2.2526315789473685e-06, + "step": 1686 + }, + { + "epoch": 4.433925049309664, + "high_lr": 0.00011263157894736841, + "low_lr": 2.2526315789473685e-06, + "step": 1686 + }, + { + "epoch": 4.433925049309664, + "high_lr": 0.00011263157894736841, + "low_lr": 2.2526315789473685e-06, + "step": 1686 + }, + { + "epoch": 4.43655489809336, + "grad_norm": 1.5917601585388184, + "learning_rate": 0.00011210526315789474, + "loss": 1.2778, + "step": 1687 + }, + { + "epoch": 4.43655489809336, + "high_lr": 0.00011210526315789474, + "low_lr": 2.242105263157895e-06, + "step": 1687 + }, + { + "epoch": 4.43655489809336, + "high_lr": 0.00011210526315789474, + "low_lr": 2.242105263157895e-06, + "step": 1687 + }, + { + "epoch": 4.43655489809336, + "high_lr": 0.00011210526315789474, + "low_lr": 2.242105263157895e-06, + "step": 1687 + }, + { + "epoch": 4.43655489809336, + "high_lr": 0.00011210526315789474, + "low_lr": 2.242105263157895e-06, + "step": 1687 + }, + { + "epoch": 4.43655489809336, + "high_lr": 0.00011210526315789474, + "low_lr": 2.242105263157895e-06, + "step": 1687 + }, + { + "epoch": 4.43655489809336, + "high_lr": 0.00011210526315789474, + "low_lr": 2.242105263157895e-06, + "step": 1687 + }, + { + "epoch": 4.43655489809336, + "high_lr": 0.00011210526315789474, + "low_lr": 2.242105263157895e-06, + "step": 1687 + }, + { + "epoch": 4.43655489809336, + "high_lr": 0.00011210526315789474, + "low_lr": 2.242105263157895e-06, + "step": 1687 + }, + { + "epoch": 4.439184746877054, + "grad_norm": 1.445594310760498, + "learning_rate": 0.00011157894736842105, + "loss": 1.2154, + "step": 1688 + }, + { + "epoch": 4.439184746877054, + "high_lr": 0.00011157894736842105, + "low_lr": 2.231578947368421e-06, + "step": 1688 + }, + { + "epoch": 4.439184746877054, + "high_lr": 0.00011157894736842105, + "low_lr": 2.231578947368421e-06, + "step": 1688 + }, + { + "epoch": 4.439184746877054, + "high_lr": 0.00011157894736842105, + "low_lr": 2.231578947368421e-06, + "step": 1688 + }, + { + "epoch": 4.439184746877054, + "high_lr": 0.00011157894736842105, + "low_lr": 2.231578947368421e-06, + "step": 1688 + }, + { + "epoch": 4.439184746877054, + "high_lr": 0.00011157894736842105, + "low_lr": 2.231578947368421e-06, + "step": 1688 + }, + { + "epoch": 4.439184746877054, + "high_lr": 0.00011157894736842105, + "low_lr": 2.231578947368421e-06, + "step": 1688 + }, + { + "epoch": 4.439184746877054, + "high_lr": 0.00011157894736842105, + "low_lr": 2.231578947368421e-06, + "step": 1688 + }, + { + "epoch": 4.439184746877054, + "high_lr": 0.00011157894736842105, + "low_lr": 2.231578947368421e-06, + "step": 1688 + }, + { + "epoch": 4.441814595660749, + "grad_norm": 1.5913069248199463, + "learning_rate": 0.00011105263157894738, + "loss": 1.2254, + "step": 1689 + }, + { + "epoch": 4.441814595660749, + "high_lr": 0.00011105263157894738, + "low_lr": 2.221052631578948e-06, + "step": 1689 + }, + { + "epoch": 4.441814595660749, + "high_lr": 0.00011105263157894738, + "low_lr": 2.221052631578948e-06, + "step": 1689 + }, + { + "epoch": 4.441814595660749, + "high_lr": 0.00011105263157894738, + "low_lr": 2.221052631578948e-06, + "step": 1689 + }, + { + "epoch": 4.441814595660749, + "high_lr": 0.00011105263157894738, + "low_lr": 2.221052631578948e-06, + "step": 1689 + }, + { + "epoch": 4.441814595660749, + "high_lr": 0.00011105263157894738, + "low_lr": 2.221052631578948e-06, + "step": 1689 + }, + { + "epoch": 4.441814595660749, + "high_lr": 0.00011105263157894738, + "low_lr": 2.221052631578948e-06, + "step": 1689 + }, + { + "epoch": 4.441814595660749, + "high_lr": 0.00011105263157894738, + "low_lr": 2.221052631578948e-06, + "step": 1689 + }, + { + "epoch": 4.441814595660749, + "high_lr": 0.00011105263157894738, + "low_lr": 2.221052631578948e-06, + "step": 1689 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 1.5536595582962036, + "learning_rate": 0.00011052631578947368, + "loss": 1.244, + "step": 1690 + }, + { + "epoch": 4.444444444444445, + "high_lr": 0.00011052631578947368, + "low_lr": 2.2105263157894738e-06, + "step": 1690 + }, + { + "epoch": 4.444444444444445, + "high_lr": 0.00011052631578947368, + "low_lr": 2.2105263157894738e-06, + "step": 1690 + }, + { + "epoch": 4.444444444444445, + "high_lr": 0.00011052631578947368, + "low_lr": 2.2105263157894738e-06, + "step": 1690 + }, + { + "epoch": 4.444444444444445, + "high_lr": 0.00011052631578947368, + "low_lr": 2.2105263157894738e-06, + "step": 1690 + }, + { + "epoch": 4.444444444444445, + "high_lr": 0.00011052631578947368, + "low_lr": 2.2105263157894738e-06, + "step": 1690 + }, + { + "epoch": 4.444444444444445, + "high_lr": 0.00011052631578947368, + "low_lr": 2.2105263157894738e-06, + "step": 1690 + }, + { + "epoch": 4.444444444444445, + "high_lr": 0.00011052631578947368, + "low_lr": 2.2105263157894738e-06, + "step": 1690 + }, + { + "epoch": 4.444444444444445, + "high_lr": 0.00011052631578947368, + "low_lr": 2.2105263157894738e-06, + "step": 1690 + }, + { + "epoch": 4.447074293228139, + "grad_norm": 1.6489031314849854, + "learning_rate": 0.00011, + "loss": 1.1711, + "step": 1691 + }, + { + "epoch": 4.447074293228139, + "high_lr": 0.00011, + "low_lr": 2.2e-06, + "step": 1691 + }, + { + "epoch": 4.447074293228139, + "high_lr": 0.00011, + "low_lr": 2.2e-06, + "step": 1691 + }, + { + "epoch": 4.447074293228139, + "high_lr": 0.00011, + "low_lr": 2.2e-06, + "step": 1691 + }, + { + "epoch": 4.447074293228139, + "high_lr": 0.00011, + "low_lr": 2.2e-06, + "step": 1691 + }, + { + "epoch": 4.447074293228139, + "high_lr": 0.00011, + "low_lr": 2.2e-06, + "step": 1691 + }, + { + "epoch": 4.447074293228139, + "high_lr": 0.00011, + "low_lr": 2.2e-06, + "step": 1691 + }, + { + "epoch": 4.447074293228139, + "high_lr": 0.00011, + "low_lr": 2.2e-06, + "step": 1691 + }, + { + "epoch": 4.447074293228139, + "high_lr": 0.00011, + "low_lr": 2.2e-06, + "step": 1691 + }, + { + "epoch": 4.449704142011834, + "grad_norm": 1.5201053619384766, + "learning_rate": 0.00010947368421052632, + "loss": 1.2052, + "step": 1692 + }, + { + "epoch": 4.449704142011834, + "high_lr": 0.00010947368421052632, + "low_lr": 2.1894736842105264e-06, + "step": 1692 + }, + { + "epoch": 4.449704142011834, + "high_lr": 0.00010947368421052632, + "low_lr": 2.1894736842105264e-06, + "step": 1692 + }, + { + "epoch": 4.449704142011834, + "high_lr": 0.00010947368421052632, + "low_lr": 2.1894736842105264e-06, + "step": 1692 + }, + { + "epoch": 4.449704142011834, + "high_lr": 0.00010947368421052632, + "low_lr": 2.1894736842105264e-06, + "step": 1692 + }, + { + "epoch": 4.449704142011834, + "high_lr": 0.00010947368421052632, + "low_lr": 2.1894736842105264e-06, + "step": 1692 + }, + { + "epoch": 4.449704142011834, + "high_lr": 0.00010947368421052632, + "low_lr": 2.1894736842105264e-06, + "step": 1692 + }, + { + "epoch": 4.449704142011834, + "high_lr": 0.00010947368421052632, + "low_lr": 2.1894736842105264e-06, + "step": 1692 + }, + { + "epoch": 4.449704142011834, + "high_lr": 0.00010947368421052632, + "low_lr": 2.1894736842105264e-06, + "step": 1692 + }, + { + "epoch": 4.4523339907955295, + "grad_norm": 1.6563962697982788, + "learning_rate": 0.00010894736842105263, + "loss": 1.2061, + "step": 1693 + }, + { + "epoch": 4.4523339907955295, + "high_lr": 0.00010894736842105263, + "low_lr": 2.1789473684210528e-06, + "step": 1693 + }, + { + "epoch": 4.4523339907955295, + "high_lr": 0.00010894736842105263, + "low_lr": 2.1789473684210528e-06, + "step": 1693 + }, + { + "epoch": 4.4523339907955295, + "high_lr": 0.00010894736842105263, + "low_lr": 2.1789473684210528e-06, + "step": 1693 + }, + { + "epoch": 4.4523339907955295, + "high_lr": 0.00010894736842105263, + "low_lr": 2.1789473684210528e-06, + "step": 1693 + }, + { + "epoch": 4.4523339907955295, + "high_lr": 0.00010894736842105263, + "low_lr": 2.1789473684210528e-06, + "step": 1693 + }, + { + "epoch": 4.4523339907955295, + "high_lr": 0.00010894736842105263, + "low_lr": 2.1789473684210528e-06, + "step": 1693 + }, + { + "epoch": 4.4523339907955295, + "high_lr": 0.00010894736842105263, + "low_lr": 2.1789473684210528e-06, + "step": 1693 + }, + { + "epoch": 4.4523339907955295, + "high_lr": 0.00010894736842105263, + "low_lr": 2.1789473684210528e-06, + "step": 1693 + }, + { + "epoch": 4.454963839579224, + "grad_norm": 1.60569167137146, + "learning_rate": 0.00010842105263157894, + "loss": 1.197, + "step": 1694 + }, + { + "epoch": 4.454963839579224, + "high_lr": 0.00010842105263157894, + "low_lr": 2.168421052631579e-06, + "step": 1694 + }, + { + "epoch": 4.454963839579224, + "high_lr": 0.00010842105263157894, + "low_lr": 2.168421052631579e-06, + "step": 1694 + }, + { + "epoch": 4.454963839579224, + "high_lr": 0.00010842105263157894, + "low_lr": 2.168421052631579e-06, + "step": 1694 + }, + { + "epoch": 4.454963839579224, + "high_lr": 0.00010842105263157894, + "low_lr": 2.168421052631579e-06, + "step": 1694 + }, + { + "epoch": 4.454963839579224, + "high_lr": 0.00010842105263157894, + "low_lr": 2.168421052631579e-06, + "step": 1694 + }, + { + "epoch": 4.454963839579224, + "high_lr": 0.00010842105263157894, + "low_lr": 2.168421052631579e-06, + "step": 1694 + }, + { + "epoch": 4.454963839579224, + "high_lr": 0.00010842105263157894, + "low_lr": 2.168421052631579e-06, + "step": 1694 + }, + { + "epoch": 4.454963839579224, + "high_lr": 0.00010842105263157894, + "low_lr": 2.168421052631579e-06, + "step": 1694 + }, + { + "epoch": 4.457593688362919, + "grad_norm": 1.5314149856567383, + "learning_rate": 0.00010789473684210527, + "loss": 1.1758, + "step": 1695 + }, + { + "epoch": 4.457593688362919, + "high_lr": 0.00010789473684210527, + "low_lr": 2.1578947368421054e-06, + "step": 1695 + }, + { + "epoch": 4.457593688362919, + "high_lr": 0.00010789473684210527, + "low_lr": 2.1578947368421054e-06, + "step": 1695 + }, + { + "epoch": 4.457593688362919, + "high_lr": 0.00010789473684210527, + "low_lr": 2.1578947368421054e-06, + "step": 1695 + }, + { + "epoch": 4.457593688362919, + "high_lr": 0.00010789473684210527, + "low_lr": 2.1578947368421054e-06, + "step": 1695 + }, + { + "epoch": 4.457593688362919, + "high_lr": 0.00010789473684210527, + "low_lr": 2.1578947368421054e-06, + "step": 1695 + }, + { + "epoch": 4.457593688362919, + "high_lr": 0.00010789473684210527, + "low_lr": 2.1578947368421054e-06, + "step": 1695 + }, + { + "epoch": 4.457593688362919, + "high_lr": 0.00010789473684210527, + "low_lr": 2.1578947368421054e-06, + "step": 1695 + }, + { + "epoch": 4.457593688362919, + "high_lr": 0.00010789473684210527, + "low_lr": 2.1578947368421054e-06, + "step": 1695 + }, + { + "epoch": 4.460223537146614, + "grad_norm": 1.638298511505127, + "learning_rate": 0.00010736842105263158, + "loss": 1.2319, + "step": 1696 + }, + { + "epoch": 4.460223537146614, + "high_lr": 0.00010736842105263158, + "low_lr": 2.1473684210526317e-06, + "step": 1696 + }, + { + "epoch": 4.460223537146614, + "high_lr": 0.00010736842105263158, + "low_lr": 2.1473684210526317e-06, + "step": 1696 + }, + { + "epoch": 4.460223537146614, + "high_lr": 0.00010736842105263158, + "low_lr": 2.1473684210526317e-06, + "step": 1696 + }, + { + "epoch": 4.460223537146614, + "high_lr": 0.00010736842105263158, + "low_lr": 2.1473684210526317e-06, + "step": 1696 + }, + { + "epoch": 4.460223537146614, + "high_lr": 0.00010736842105263158, + "low_lr": 2.1473684210526317e-06, + "step": 1696 + }, + { + "epoch": 4.460223537146614, + "high_lr": 0.00010736842105263158, + "low_lr": 2.1473684210526317e-06, + "step": 1696 + }, + { + "epoch": 4.460223537146614, + "high_lr": 0.00010736842105263158, + "low_lr": 2.1473684210526317e-06, + "step": 1696 + }, + { + "epoch": 4.460223537146614, + "high_lr": 0.00010736842105263158, + "low_lr": 2.1473684210526317e-06, + "step": 1696 + }, + { + "epoch": 4.462853385930309, + "grad_norm": 1.6212403774261475, + "learning_rate": 0.0001068421052631579, + "loss": 1.1893, + "step": 1697 + }, + { + "epoch": 4.462853385930309, + "high_lr": 0.0001068421052631579, + "low_lr": 2.136842105263158e-06, + "step": 1697 + }, + { + "epoch": 4.462853385930309, + "high_lr": 0.0001068421052631579, + "low_lr": 2.136842105263158e-06, + "step": 1697 + }, + { + "epoch": 4.462853385930309, + "high_lr": 0.0001068421052631579, + "low_lr": 2.136842105263158e-06, + "step": 1697 + }, + { + "epoch": 4.462853385930309, + "high_lr": 0.0001068421052631579, + "low_lr": 2.136842105263158e-06, + "step": 1697 + }, + { + "epoch": 4.462853385930309, + "high_lr": 0.0001068421052631579, + "low_lr": 2.136842105263158e-06, + "step": 1697 + }, + { + "epoch": 4.462853385930309, + "high_lr": 0.0001068421052631579, + "low_lr": 2.136842105263158e-06, + "step": 1697 + }, + { + "epoch": 4.462853385930309, + "high_lr": 0.0001068421052631579, + "low_lr": 2.136842105263158e-06, + "step": 1697 + }, + { + "epoch": 4.462853385930309, + "high_lr": 0.0001068421052631579, + "low_lr": 2.136842105263158e-06, + "step": 1697 + }, + { + "epoch": 4.465483234714004, + "grad_norm": 1.431408166885376, + "learning_rate": 0.00010631578947368421, + "loss": 1.2464, + "step": 1698 + }, + { + "epoch": 4.465483234714004, + "high_lr": 0.00010631578947368421, + "low_lr": 2.1263157894736844e-06, + "step": 1698 + }, + { + "epoch": 4.465483234714004, + "high_lr": 0.00010631578947368421, + "low_lr": 2.1263157894736844e-06, + "step": 1698 + }, + { + "epoch": 4.465483234714004, + "high_lr": 0.00010631578947368421, + "low_lr": 2.1263157894736844e-06, + "step": 1698 + }, + { + "epoch": 4.465483234714004, + "high_lr": 0.00010631578947368421, + "low_lr": 2.1263157894736844e-06, + "step": 1698 + }, + { + "epoch": 4.465483234714004, + "high_lr": 0.00010631578947368421, + "low_lr": 2.1263157894736844e-06, + "step": 1698 + }, + { + "epoch": 4.465483234714004, + "high_lr": 0.00010631578947368421, + "low_lr": 2.1263157894736844e-06, + "step": 1698 + }, + { + "epoch": 4.465483234714004, + "high_lr": 0.00010631578947368421, + "low_lr": 2.1263157894736844e-06, + "step": 1698 + }, + { + "epoch": 4.465483234714004, + "high_lr": 0.00010631578947368421, + "low_lr": 2.1263157894736844e-06, + "step": 1698 + }, + { + "epoch": 4.468113083497699, + "grad_norm": 1.4881477355957031, + "learning_rate": 0.00010578947368421053, + "loss": 1.2306, + "step": 1699 + }, + { + "epoch": 4.468113083497699, + "high_lr": 0.00010578947368421053, + "low_lr": 2.1157894736842107e-06, + "step": 1699 + }, + { + "epoch": 4.468113083497699, + "high_lr": 0.00010578947368421053, + "low_lr": 2.1157894736842107e-06, + "step": 1699 + }, + { + "epoch": 4.468113083497699, + "high_lr": 0.00010578947368421053, + "low_lr": 2.1157894736842107e-06, + "step": 1699 + }, + { + "epoch": 4.468113083497699, + "high_lr": 0.00010578947368421053, + "low_lr": 2.1157894736842107e-06, + "step": 1699 + }, + { + "epoch": 4.468113083497699, + "high_lr": 0.00010578947368421053, + "low_lr": 2.1157894736842107e-06, + "step": 1699 + }, + { + "epoch": 4.468113083497699, + "high_lr": 0.00010578947368421053, + "low_lr": 2.1157894736842107e-06, + "step": 1699 + }, + { + "epoch": 4.468113083497699, + "high_lr": 0.00010578947368421053, + "low_lr": 2.1157894736842107e-06, + "step": 1699 + }, + { + "epoch": 4.468113083497699, + "high_lr": 0.00010578947368421053, + "low_lr": 2.1157894736842107e-06, + "step": 1699 + }, + { + "epoch": 4.470742932281394, + "grad_norm": 1.4463176727294922, + "learning_rate": 0.00010526315789473683, + "loss": 1.2367, + "step": 1700 + }, + { + "epoch": 4.470742932281394, + "high_lr": 0.00010526315789473683, + "low_lr": 2.105263157894737e-06, + "step": 1700 + }, + { + "epoch": 4.470742932281394, + "high_lr": 0.00010526315789473683, + "low_lr": 2.105263157894737e-06, + "step": 1700 + }, + { + "epoch": 4.470742932281394, + "high_lr": 0.00010526315789473683, + "low_lr": 2.105263157894737e-06, + "step": 1700 + }, + { + "epoch": 4.470742932281394, + "high_lr": 0.00010526315789473683, + "low_lr": 2.105263157894737e-06, + "step": 1700 + }, + { + "epoch": 4.470742932281394, + "high_lr": 0.00010526315789473683, + "low_lr": 2.105263157894737e-06, + "step": 1700 + }, + { + "epoch": 4.470742932281394, + "high_lr": 0.00010526315789473683, + "low_lr": 2.105263157894737e-06, + "step": 1700 + }, + { + "epoch": 4.470742932281394, + "high_lr": 0.00010526315789473683, + "low_lr": 2.105263157894737e-06, + "step": 1700 + }, + { + "epoch": 4.470742932281394, + "high_lr": 0.00010526315789473683, + "low_lr": 2.105263157894737e-06, + "step": 1700 + }, + { + "epoch": 4.4733727810650885, + "grad_norm": 1.4434162378311157, + "learning_rate": 0.00010473684210526316, + "loss": 1.2676, + "step": 1701 + }, + { + "epoch": 4.4733727810650885, + "high_lr": 0.00010473684210526316, + "low_lr": 2.0947368421052634e-06, + "step": 1701 + }, + { + "epoch": 4.4733727810650885, + "high_lr": 0.00010473684210526316, + "low_lr": 2.0947368421052634e-06, + "step": 1701 + }, + { + "epoch": 4.4733727810650885, + "high_lr": 0.00010473684210526316, + "low_lr": 2.0947368421052634e-06, + "step": 1701 + }, + { + "epoch": 4.4733727810650885, + "high_lr": 0.00010473684210526316, + "low_lr": 2.0947368421052634e-06, + "step": 1701 + }, + { + "epoch": 4.4733727810650885, + "high_lr": 0.00010473684210526316, + "low_lr": 2.0947368421052634e-06, + "step": 1701 + }, + { + "epoch": 4.4733727810650885, + "high_lr": 0.00010473684210526316, + "low_lr": 2.0947368421052634e-06, + "step": 1701 + }, + { + "epoch": 4.4733727810650885, + "high_lr": 0.00010473684210526316, + "low_lr": 2.0947368421052634e-06, + "step": 1701 + }, + { + "epoch": 4.4733727810650885, + "high_lr": 0.00010473684210526316, + "low_lr": 2.0947368421052634e-06, + "step": 1701 + }, + { + "epoch": 4.476002629848784, + "grad_norm": 1.4597495794296265, + "learning_rate": 0.00010421052631578947, + "loss": 1.208, + "step": 1702 + }, + { + "epoch": 4.476002629848784, + "high_lr": 0.00010421052631578947, + "low_lr": 2.0842105263157897e-06, + "step": 1702 + }, + { + "epoch": 4.476002629848784, + "high_lr": 0.00010421052631578947, + "low_lr": 2.0842105263157897e-06, + "step": 1702 + }, + { + "epoch": 4.476002629848784, + "high_lr": 0.00010421052631578947, + "low_lr": 2.0842105263157897e-06, + "step": 1702 + }, + { + "epoch": 4.476002629848784, + "high_lr": 0.00010421052631578947, + "low_lr": 2.0842105263157897e-06, + "step": 1702 + }, + { + "epoch": 4.476002629848784, + "high_lr": 0.00010421052631578947, + "low_lr": 2.0842105263157897e-06, + "step": 1702 + }, + { + "epoch": 4.476002629848784, + "high_lr": 0.00010421052631578947, + "low_lr": 2.0842105263157897e-06, + "step": 1702 + }, + { + "epoch": 4.476002629848784, + "high_lr": 0.00010421052631578947, + "low_lr": 2.0842105263157897e-06, + "step": 1702 + }, + { + "epoch": 4.476002629848784, + "high_lr": 0.00010421052631578947, + "low_lr": 2.0842105263157897e-06, + "step": 1702 + }, + { + "epoch": 4.478632478632479, + "grad_norm": 1.516308069229126, + "learning_rate": 0.0001036842105263158, + "loss": 1.2793, + "step": 1703 + }, + { + "epoch": 4.478632478632479, + "high_lr": 0.0001036842105263158, + "low_lr": 2.073684210526316e-06, + "step": 1703 + }, + { + "epoch": 4.478632478632479, + "high_lr": 0.0001036842105263158, + "low_lr": 2.073684210526316e-06, + "step": 1703 + }, + { + "epoch": 4.478632478632479, + "high_lr": 0.0001036842105263158, + "low_lr": 2.073684210526316e-06, + "step": 1703 + }, + { + "epoch": 4.478632478632479, + "high_lr": 0.0001036842105263158, + "low_lr": 2.073684210526316e-06, + "step": 1703 + }, + { + "epoch": 4.478632478632479, + "high_lr": 0.0001036842105263158, + "low_lr": 2.073684210526316e-06, + "step": 1703 + }, + { + "epoch": 4.478632478632479, + "high_lr": 0.0001036842105263158, + "low_lr": 2.073684210526316e-06, + "step": 1703 + }, + { + "epoch": 4.478632478632479, + "high_lr": 0.0001036842105263158, + "low_lr": 2.073684210526316e-06, + "step": 1703 + }, + { + "epoch": 4.478632478632479, + "high_lr": 0.0001036842105263158, + "low_lr": 2.073684210526316e-06, + "step": 1703 + }, + { + "epoch": 4.481262327416173, + "grad_norm": 1.5422941446304321, + "learning_rate": 0.0001031578947368421, + "loss": 1.1922, + "step": 1704 + }, + { + "epoch": 4.481262327416173, + "high_lr": 0.0001031578947368421, + "low_lr": 2.0631578947368424e-06, + "step": 1704 + }, + { + "epoch": 4.481262327416173, + "high_lr": 0.0001031578947368421, + "low_lr": 2.0631578947368424e-06, + "step": 1704 + }, + { + "epoch": 4.481262327416173, + "high_lr": 0.0001031578947368421, + "low_lr": 2.0631578947368424e-06, + "step": 1704 + }, + { + "epoch": 4.481262327416173, + "high_lr": 0.0001031578947368421, + "low_lr": 2.0631578947368424e-06, + "step": 1704 + }, + { + "epoch": 4.481262327416173, + "high_lr": 0.0001031578947368421, + "low_lr": 2.0631578947368424e-06, + "step": 1704 + }, + { + "epoch": 4.481262327416173, + "high_lr": 0.0001031578947368421, + "low_lr": 2.0631578947368424e-06, + "step": 1704 + }, + { + "epoch": 4.481262327416173, + "high_lr": 0.0001031578947368421, + "low_lr": 2.0631578947368424e-06, + "step": 1704 + }, + { + "epoch": 4.481262327416173, + "high_lr": 0.0001031578947368421, + "low_lr": 2.0631578947368424e-06, + "step": 1704 + }, + { + "epoch": 4.483892176199869, + "grad_norm": 1.4864860773086548, + "learning_rate": 0.00010263157894736843, + "loss": 1.2205, + "step": 1705 + }, + { + "epoch": 4.483892176199869, + "high_lr": 0.00010263157894736843, + "low_lr": 2.0526315789473687e-06, + "step": 1705 + }, + { + "epoch": 4.483892176199869, + "high_lr": 0.00010263157894736843, + "low_lr": 2.0526315789473687e-06, + "step": 1705 + }, + { + "epoch": 4.483892176199869, + "high_lr": 0.00010263157894736843, + "low_lr": 2.0526315789473687e-06, + "step": 1705 + }, + { + "epoch": 4.483892176199869, + "high_lr": 0.00010263157894736843, + "low_lr": 2.0526315789473687e-06, + "step": 1705 + }, + { + "epoch": 4.483892176199869, + "high_lr": 0.00010263157894736843, + "low_lr": 2.0526315789473687e-06, + "step": 1705 + }, + { + "epoch": 4.483892176199869, + "high_lr": 0.00010263157894736843, + "low_lr": 2.0526315789473687e-06, + "step": 1705 + }, + { + "epoch": 4.483892176199869, + "high_lr": 0.00010263157894736843, + "low_lr": 2.0526315789473687e-06, + "step": 1705 + }, + { + "epoch": 4.483892176199869, + "high_lr": 0.00010263157894736843, + "low_lr": 2.0526315789473687e-06, + "step": 1705 + }, + { + "epoch": 4.486522024983564, + "grad_norm": 1.792134165763855, + "learning_rate": 0.00010210526315789474, + "loss": 1.2321, + "step": 1706 + }, + { + "epoch": 4.486522024983564, + "high_lr": 0.00010210526315789474, + "low_lr": 2.042105263157895e-06, + "step": 1706 + }, + { + "epoch": 4.486522024983564, + "high_lr": 0.00010210526315789474, + "low_lr": 2.042105263157895e-06, + "step": 1706 + }, + { + "epoch": 4.486522024983564, + "high_lr": 0.00010210526315789474, + "low_lr": 2.042105263157895e-06, + "step": 1706 + }, + { + "epoch": 4.486522024983564, + "high_lr": 0.00010210526315789474, + "low_lr": 2.042105263157895e-06, + "step": 1706 + }, + { + "epoch": 4.486522024983564, + "high_lr": 0.00010210526315789474, + "low_lr": 2.042105263157895e-06, + "step": 1706 + }, + { + "epoch": 4.486522024983564, + "high_lr": 0.00010210526315789474, + "low_lr": 2.042105263157895e-06, + "step": 1706 + }, + { + "epoch": 4.486522024983564, + "high_lr": 0.00010210526315789474, + "low_lr": 2.042105263157895e-06, + "step": 1706 + }, + { + "epoch": 4.486522024983564, + "high_lr": 0.00010210526315789474, + "low_lr": 2.042105263157895e-06, + "step": 1706 + }, + { + "epoch": 4.489151873767258, + "grad_norm": 1.527948021888733, + "learning_rate": 0.00010157894736842105, + "loss": 1.2312, + "step": 1707 + }, + { + "epoch": 4.489151873767258, + "high_lr": 0.00010157894736842105, + "low_lr": 2.031578947368421e-06, + "step": 1707 + }, + { + "epoch": 4.489151873767258, + "high_lr": 0.00010157894736842105, + "low_lr": 2.031578947368421e-06, + "step": 1707 + }, + { + "epoch": 4.489151873767258, + "high_lr": 0.00010157894736842105, + "low_lr": 2.031578947368421e-06, + "step": 1707 + }, + { + "epoch": 4.489151873767258, + "high_lr": 0.00010157894736842105, + "low_lr": 2.031578947368421e-06, + "step": 1707 + }, + { + "epoch": 4.489151873767258, + "high_lr": 0.00010157894736842105, + "low_lr": 2.031578947368421e-06, + "step": 1707 + }, + { + "epoch": 4.489151873767258, + "high_lr": 0.00010157894736842105, + "low_lr": 2.031578947368421e-06, + "step": 1707 + }, + { + "epoch": 4.489151873767258, + "high_lr": 0.00010157894736842105, + "low_lr": 2.031578947368421e-06, + "step": 1707 + }, + { + "epoch": 4.489151873767258, + "high_lr": 0.00010157894736842105, + "low_lr": 2.031578947368421e-06, + "step": 1707 + }, + { + "epoch": 4.491781722550954, + "grad_norm": 1.4565075635910034, + "learning_rate": 0.00010105263157894737, + "loss": 1.2345, + "step": 1708 + }, + { + "epoch": 4.491781722550954, + "high_lr": 0.00010105263157894737, + "low_lr": 2.0210526315789477e-06, + "step": 1708 + }, + { + "epoch": 4.491781722550954, + "high_lr": 0.00010105263157894737, + "low_lr": 2.0210526315789477e-06, + "step": 1708 + }, + { + "epoch": 4.491781722550954, + "high_lr": 0.00010105263157894737, + "low_lr": 2.0210526315789477e-06, + "step": 1708 + }, + { + "epoch": 4.491781722550954, + "high_lr": 0.00010105263157894737, + "low_lr": 2.0210526315789477e-06, + "step": 1708 + }, + { + "epoch": 4.491781722550954, + "high_lr": 0.00010105263157894737, + "low_lr": 2.0210526315789477e-06, + "step": 1708 + }, + { + "epoch": 4.491781722550954, + "high_lr": 0.00010105263157894737, + "low_lr": 2.0210526315789477e-06, + "step": 1708 + }, + { + "epoch": 4.491781722550954, + "high_lr": 0.00010105263157894737, + "low_lr": 2.0210526315789477e-06, + "step": 1708 + }, + { + "epoch": 4.491781722550954, + "high_lr": 0.00010105263157894737, + "low_lr": 2.0210526315789477e-06, + "step": 1708 + }, + { + "epoch": 4.494411571334648, + "grad_norm": 1.5402123928070068, + "learning_rate": 0.00010052631578947369, + "loss": 1.1942, + "step": 1709 + }, + { + "epoch": 4.494411571334648, + "high_lr": 0.00010052631578947369, + "low_lr": 2.010526315789474e-06, + "step": 1709 + }, + { + "epoch": 4.494411571334648, + "high_lr": 0.00010052631578947369, + "low_lr": 2.010526315789474e-06, + "step": 1709 + }, + { + "epoch": 4.494411571334648, + "high_lr": 0.00010052631578947369, + "low_lr": 2.010526315789474e-06, + "step": 1709 + }, + { + "epoch": 4.494411571334648, + "high_lr": 0.00010052631578947369, + "low_lr": 2.010526315789474e-06, + "step": 1709 + }, + { + "epoch": 4.494411571334648, + "high_lr": 0.00010052631578947369, + "low_lr": 2.010526315789474e-06, + "step": 1709 + }, + { + "epoch": 4.494411571334648, + "high_lr": 0.00010052631578947369, + "low_lr": 2.010526315789474e-06, + "step": 1709 + }, + { + "epoch": 4.494411571334648, + "high_lr": 0.00010052631578947369, + "low_lr": 2.010526315789474e-06, + "step": 1709 + }, + { + "epoch": 4.494411571334648, + "high_lr": 0.00010052631578947369, + "low_lr": 2.010526315789474e-06, + "step": 1709 + }, + { + "epoch": 4.497041420118343, + "grad_norm": 1.6114143133163452, + "learning_rate": 0.0001, + "loss": 1.2542, + "step": 1710 + }, + { + "epoch": 4.497041420118343, + "high_lr": 0.0001, + "low_lr": 2.0000000000000003e-06, + "step": 1710 + }, + { + "epoch": 4.497041420118343, + "high_lr": 0.0001, + "low_lr": 2.0000000000000003e-06, + "step": 1710 + }, + { + "epoch": 4.497041420118343, + "high_lr": 0.0001, + "low_lr": 2.0000000000000003e-06, + "step": 1710 + }, + { + "epoch": 4.497041420118343, + "high_lr": 0.0001, + "low_lr": 2.0000000000000003e-06, + "step": 1710 + }, + { + "epoch": 4.497041420118343, + "high_lr": 0.0001, + "low_lr": 2.0000000000000003e-06, + "step": 1710 + }, + { + "epoch": 4.497041420118343, + "high_lr": 0.0001, + "low_lr": 2.0000000000000003e-06, + "step": 1710 + }, + { + "epoch": 4.497041420118343, + "high_lr": 0.0001, + "low_lr": 2.0000000000000003e-06, + "step": 1710 + }, + { + "epoch": 4.497041420118343, + "high_lr": 0.0001, + "low_lr": 2.0000000000000003e-06, + "step": 1710 + }, + { + "epoch": 4.499671268902038, + "grad_norm": 1.480592131614685, + "learning_rate": 9.947368421052632e-05, + "loss": 1.2448, + "step": 1711 + }, + { + "epoch": 4.499671268902038, + "high_lr": 9.947368421052632e-05, + "low_lr": 1.9894736842105262e-06, + "step": 1711 + }, + { + "epoch": 4.499671268902038, + "high_lr": 9.947368421052632e-05, + "low_lr": 1.9894736842105262e-06, + "step": 1711 + }, + { + "epoch": 4.499671268902038, + "high_lr": 9.947368421052632e-05, + "low_lr": 1.9894736842105262e-06, + "step": 1711 + }, + { + "epoch": 4.499671268902038, + "high_lr": 9.947368421052632e-05, + "low_lr": 1.9894736842105262e-06, + "step": 1711 + }, + { + "epoch": 4.499671268902038, + "high_lr": 9.947368421052632e-05, + "low_lr": 1.9894736842105262e-06, + "step": 1711 + }, + { + "epoch": 4.499671268902038, + "high_lr": 9.947368421052632e-05, + "low_lr": 1.9894736842105262e-06, + "step": 1711 + }, + { + "epoch": 4.499671268902038, + "high_lr": 9.947368421052632e-05, + "low_lr": 1.9894736842105262e-06, + "step": 1711 + }, + { + "epoch": 4.499671268902038, + "high_lr": 9.947368421052632e-05, + "low_lr": 1.9894736842105262e-06, + "step": 1711 + }, + { + "epoch": 4.502301117685733, + "grad_norm": 1.4475226402282715, + "learning_rate": 9.894736842105263e-05, + "loss": 1.2349, + "step": 1712 + }, + { + "epoch": 4.502301117685733, + "high_lr": 9.894736842105263e-05, + "low_lr": 1.978947368421053e-06, + "step": 1712 + }, + { + "epoch": 4.502301117685733, + "high_lr": 9.894736842105263e-05, + "low_lr": 1.978947368421053e-06, + "step": 1712 + }, + { + "epoch": 4.502301117685733, + "high_lr": 9.894736842105263e-05, + "low_lr": 1.978947368421053e-06, + "step": 1712 + }, + { + "epoch": 4.502301117685733, + "high_lr": 9.894736842105263e-05, + "low_lr": 1.978947368421053e-06, + "step": 1712 + }, + { + "epoch": 4.502301117685733, + "high_lr": 9.894736842105263e-05, + "low_lr": 1.978947368421053e-06, + "step": 1712 + }, + { + "epoch": 4.502301117685733, + "high_lr": 9.894736842105263e-05, + "low_lr": 1.978947368421053e-06, + "step": 1712 + }, + { + "epoch": 4.502301117685733, + "high_lr": 9.894736842105263e-05, + "low_lr": 1.978947368421053e-06, + "step": 1712 + }, + { + "epoch": 4.502301117685733, + "high_lr": 9.894736842105263e-05, + "low_lr": 1.978947368421053e-06, + "step": 1712 + }, + { + "epoch": 4.504930966469428, + "grad_norm": 1.7226275205612183, + "learning_rate": 9.842105263157896e-05, + "loss": 1.1949, + "step": 1713 + }, + { + "epoch": 4.504930966469428, + "high_lr": 9.842105263157896e-05, + "low_lr": 1.9684210526315793e-06, + "step": 1713 + }, + { + "epoch": 4.504930966469428, + "high_lr": 9.842105263157896e-05, + "low_lr": 1.9684210526315793e-06, + "step": 1713 + }, + { + "epoch": 4.504930966469428, + "high_lr": 9.842105263157896e-05, + "low_lr": 1.9684210526315793e-06, + "step": 1713 + }, + { + "epoch": 4.504930966469428, + "high_lr": 9.842105263157896e-05, + "low_lr": 1.9684210526315793e-06, + "step": 1713 + }, + { + "epoch": 4.504930966469428, + "high_lr": 9.842105263157896e-05, + "low_lr": 1.9684210526315793e-06, + "step": 1713 + }, + { + "epoch": 4.504930966469428, + "high_lr": 9.842105263157896e-05, + "low_lr": 1.9684210526315793e-06, + "step": 1713 + }, + { + "epoch": 4.504930966469428, + "high_lr": 9.842105263157896e-05, + "low_lr": 1.9684210526315793e-06, + "step": 1713 + }, + { + "epoch": 4.504930966469428, + "high_lr": 9.842105263157896e-05, + "low_lr": 1.9684210526315793e-06, + "step": 1713 + }, + { + "epoch": 4.507560815253123, + "grad_norm": 1.5617784261703491, + "learning_rate": 9.789473684210526e-05, + "loss": 1.2295, + "step": 1714 + }, + { + "epoch": 4.507560815253123, + "high_lr": 9.789473684210526e-05, + "low_lr": 1.9578947368421052e-06, + "step": 1714 + }, + { + "epoch": 4.507560815253123, + "high_lr": 9.789473684210526e-05, + "low_lr": 1.9578947368421052e-06, + "step": 1714 + }, + { + "epoch": 4.507560815253123, + "high_lr": 9.789473684210526e-05, + "low_lr": 1.9578947368421052e-06, + "step": 1714 + }, + { + "epoch": 4.507560815253123, + "high_lr": 9.789473684210526e-05, + "low_lr": 1.9578947368421052e-06, + "step": 1714 + }, + { + "epoch": 4.507560815253123, + "high_lr": 9.789473684210526e-05, + "low_lr": 1.9578947368421052e-06, + "step": 1714 + }, + { + "epoch": 4.507560815253123, + "high_lr": 9.789473684210526e-05, + "low_lr": 1.9578947368421052e-06, + "step": 1714 + }, + { + "epoch": 4.507560815253123, + "high_lr": 9.789473684210526e-05, + "low_lr": 1.9578947368421052e-06, + "step": 1714 + }, + { + "epoch": 4.507560815253123, + "high_lr": 9.789473684210526e-05, + "low_lr": 1.9578947368421052e-06, + "step": 1714 + }, + { + "epoch": 4.510190664036818, + "grad_norm": 1.5321367979049683, + "learning_rate": 9.736842105263158e-05, + "loss": 1.2088, + "step": 1715 + }, + { + "epoch": 4.510190664036818, + "high_lr": 9.736842105263158e-05, + "low_lr": 1.9473684210526315e-06, + "step": 1715 + }, + { + "epoch": 4.510190664036818, + "high_lr": 9.736842105263158e-05, + "low_lr": 1.9473684210526315e-06, + "step": 1715 + }, + { + "epoch": 4.510190664036818, + "high_lr": 9.736842105263158e-05, + "low_lr": 1.9473684210526315e-06, + "step": 1715 + }, + { + "epoch": 4.510190664036818, + "high_lr": 9.736842105263158e-05, + "low_lr": 1.9473684210526315e-06, + "step": 1715 + }, + { + "epoch": 4.510190664036818, + "high_lr": 9.736842105263158e-05, + "low_lr": 1.9473684210526315e-06, + "step": 1715 + }, + { + "epoch": 4.510190664036818, + "high_lr": 9.736842105263158e-05, + "low_lr": 1.9473684210526315e-06, + "step": 1715 + }, + { + "epoch": 4.510190664036818, + "high_lr": 9.736842105263158e-05, + "low_lr": 1.9473684210526315e-06, + "step": 1715 + }, + { + "epoch": 4.510190664036818, + "high_lr": 9.736842105263158e-05, + "low_lr": 1.9473684210526315e-06, + "step": 1715 + }, + { + "epoch": 4.512820512820513, + "grad_norm": 1.6108227968215942, + "learning_rate": 9.68421052631579e-05, + "loss": 1.2579, + "step": 1716 + }, + { + "epoch": 4.512820512820513, + "high_lr": 9.68421052631579e-05, + "low_lr": 1.936842105263158e-06, + "step": 1716 + }, + { + "epoch": 4.512820512820513, + "high_lr": 9.68421052631579e-05, + "low_lr": 1.936842105263158e-06, + "step": 1716 + }, + { + "epoch": 4.512820512820513, + "high_lr": 9.68421052631579e-05, + "low_lr": 1.936842105263158e-06, + "step": 1716 + }, + { + "epoch": 4.512820512820513, + "high_lr": 9.68421052631579e-05, + "low_lr": 1.936842105263158e-06, + "step": 1716 + }, + { + "epoch": 4.512820512820513, + "high_lr": 9.68421052631579e-05, + "low_lr": 1.936842105263158e-06, + "step": 1716 + }, + { + "epoch": 4.512820512820513, + "high_lr": 9.68421052631579e-05, + "low_lr": 1.936842105263158e-06, + "step": 1716 + }, + { + "epoch": 4.512820512820513, + "high_lr": 9.68421052631579e-05, + "low_lr": 1.936842105263158e-06, + "step": 1716 + }, + { + "epoch": 4.512820512820513, + "high_lr": 9.68421052631579e-05, + "low_lr": 1.936842105263158e-06, + "step": 1716 + }, + { + "epoch": 4.515450361604207, + "grad_norm": 1.5909062623977661, + "learning_rate": 9.631578947368422e-05, + "loss": 1.1905, + "step": 1717 + }, + { + "epoch": 4.515450361604207, + "high_lr": 9.631578947368422e-05, + "low_lr": 1.9263157894736846e-06, + "step": 1717 + }, + { + "epoch": 4.515450361604207, + "high_lr": 9.631578947368422e-05, + "low_lr": 1.9263157894736846e-06, + "step": 1717 + }, + { + "epoch": 4.515450361604207, + "high_lr": 9.631578947368422e-05, + "low_lr": 1.9263157894736846e-06, + "step": 1717 + }, + { + "epoch": 4.515450361604207, + "high_lr": 9.631578947368422e-05, + "low_lr": 1.9263157894736846e-06, + "step": 1717 + }, + { + "epoch": 4.515450361604207, + "high_lr": 9.631578947368422e-05, + "low_lr": 1.9263157894736846e-06, + "step": 1717 + }, + { + "epoch": 4.515450361604207, + "high_lr": 9.631578947368422e-05, + "low_lr": 1.9263157894736846e-06, + "step": 1717 + }, + { + "epoch": 4.515450361604207, + "high_lr": 9.631578947368422e-05, + "low_lr": 1.9263157894736846e-06, + "step": 1717 + }, + { + "epoch": 4.515450361604207, + "high_lr": 9.631578947368422e-05, + "low_lr": 1.9263157894736846e-06, + "step": 1717 + }, + { + "epoch": 4.518080210387903, + "grad_norm": 2.0398621559143066, + "learning_rate": 9.578947368421052e-05, + "loss": 1.1962, + "step": 1718 + }, + { + "epoch": 4.518080210387903, + "high_lr": 9.578947368421052e-05, + "low_lr": 1.9157894736842105e-06, + "step": 1718 + }, + { + "epoch": 4.518080210387903, + "high_lr": 9.578947368421052e-05, + "low_lr": 1.9157894736842105e-06, + "step": 1718 + }, + { + "epoch": 4.518080210387903, + "high_lr": 9.578947368421052e-05, + "low_lr": 1.9157894736842105e-06, + "step": 1718 + }, + { + "epoch": 4.518080210387903, + "high_lr": 9.578947368421052e-05, + "low_lr": 1.9157894736842105e-06, + "step": 1718 + }, + { + "epoch": 4.518080210387903, + "high_lr": 9.578947368421052e-05, + "low_lr": 1.9157894736842105e-06, + "step": 1718 + }, + { + "epoch": 4.518080210387903, + "high_lr": 9.578947368421052e-05, + "low_lr": 1.9157894736842105e-06, + "step": 1718 + }, + { + "epoch": 4.518080210387903, + "high_lr": 9.578947368421052e-05, + "low_lr": 1.9157894736842105e-06, + "step": 1718 + }, + { + "epoch": 4.518080210387903, + "high_lr": 9.578947368421052e-05, + "low_lr": 1.9157894736842105e-06, + "step": 1718 + }, + { + "epoch": 4.520710059171598, + "grad_norm": 1.517663836479187, + "learning_rate": 9.526315789473685e-05, + "loss": 1.2209, + "step": 1719 + }, + { + "epoch": 4.520710059171598, + "high_lr": 9.526315789473685e-05, + "low_lr": 1.905263157894737e-06, + "step": 1719 + }, + { + "epoch": 4.520710059171598, + "high_lr": 9.526315789473685e-05, + "low_lr": 1.905263157894737e-06, + "step": 1719 + }, + { + "epoch": 4.520710059171598, + "high_lr": 9.526315789473685e-05, + "low_lr": 1.905263157894737e-06, + "step": 1719 + }, + { + "epoch": 4.520710059171598, + "high_lr": 9.526315789473685e-05, + "low_lr": 1.905263157894737e-06, + "step": 1719 + }, + { + "epoch": 4.520710059171598, + "high_lr": 9.526315789473685e-05, + "low_lr": 1.905263157894737e-06, + "step": 1719 + }, + { + "epoch": 4.520710059171598, + "high_lr": 9.526315789473685e-05, + "low_lr": 1.905263157894737e-06, + "step": 1719 + }, + { + "epoch": 4.520710059171598, + "high_lr": 9.526315789473685e-05, + "low_lr": 1.905263157894737e-06, + "step": 1719 + }, + { + "epoch": 4.520710059171598, + "high_lr": 9.526315789473685e-05, + "low_lr": 1.905263157894737e-06, + "step": 1719 + }, + { + "epoch": 4.523339907955292, + "grad_norm": 1.4186933040618896, + "learning_rate": 9.473684210526316e-05, + "loss": 1.2476, + "step": 1720 + }, + { + "epoch": 4.523339907955292, + "high_lr": 9.473684210526316e-05, + "low_lr": 1.8947368421052634e-06, + "step": 1720 + }, + { + "epoch": 4.523339907955292, + "high_lr": 9.473684210526316e-05, + "low_lr": 1.8947368421052634e-06, + "step": 1720 + }, + { + "epoch": 4.523339907955292, + "high_lr": 9.473684210526316e-05, + "low_lr": 1.8947368421052634e-06, + "step": 1720 + }, + { + "epoch": 4.523339907955292, + "high_lr": 9.473684210526316e-05, + "low_lr": 1.8947368421052634e-06, + "step": 1720 + }, + { + "epoch": 4.523339907955292, + "high_lr": 9.473684210526316e-05, + "low_lr": 1.8947368421052634e-06, + "step": 1720 + }, + { + "epoch": 4.523339907955292, + "high_lr": 9.473684210526316e-05, + "low_lr": 1.8947368421052634e-06, + "step": 1720 + }, + { + "epoch": 4.523339907955292, + "high_lr": 9.473684210526316e-05, + "low_lr": 1.8947368421052634e-06, + "step": 1720 + }, + { + "epoch": 4.523339907955292, + "high_lr": 9.473684210526316e-05, + "low_lr": 1.8947368421052634e-06, + "step": 1720 + }, + { + "epoch": 4.525969756738988, + "grad_norm": 1.722344160079956, + "learning_rate": 9.421052631578947e-05, + "loss": 1.2545, + "step": 1721 + }, + { + "epoch": 4.525969756738988, + "high_lr": 9.421052631578947e-05, + "low_lr": 1.8842105263157895e-06, + "step": 1721 + }, + { + "epoch": 4.525969756738988, + "high_lr": 9.421052631578947e-05, + "low_lr": 1.8842105263157895e-06, + "step": 1721 + }, + { + "epoch": 4.525969756738988, + "high_lr": 9.421052631578947e-05, + "low_lr": 1.8842105263157895e-06, + "step": 1721 + }, + { + "epoch": 4.525969756738988, + "high_lr": 9.421052631578947e-05, + "low_lr": 1.8842105263157895e-06, + "step": 1721 + }, + { + "epoch": 4.525969756738988, + "high_lr": 9.421052631578947e-05, + "low_lr": 1.8842105263157895e-06, + "step": 1721 + }, + { + "epoch": 4.525969756738988, + "high_lr": 9.421052631578947e-05, + "low_lr": 1.8842105263157895e-06, + "step": 1721 + }, + { + "epoch": 4.525969756738988, + "high_lr": 9.421052631578947e-05, + "low_lr": 1.8842105263157895e-06, + "step": 1721 + }, + { + "epoch": 4.525969756738988, + "high_lr": 9.421052631578947e-05, + "low_lr": 1.8842105263157895e-06, + "step": 1721 + }, + { + "epoch": 4.5285996055226825, + "grad_norm": 1.625738263130188, + "learning_rate": 9.368421052631579e-05, + "loss": 1.2292, + "step": 1722 + }, + { + "epoch": 4.5285996055226825, + "high_lr": 9.368421052631579e-05, + "low_lr": 1.8736842105263158e-06, + "step": 1722 + }, + { + "epoch": 4.5285996055226825, + "high_lr": 9.368421052631579e-05, + "low_lr": 1.8736842105263158e-06, + "step": 1722 + }, + { + "epoch": 4.5285996055226825, + "high_lr": 9.368421052631579e-05, + "low_lr": 1.8736842105263158e-06, + "step": 1722 + }, + { + "epoch": 4.5285996055226825, + "high_lr": 9.368421052631579e-05, + "low_lr": 1.8736842105263158e-06, + "step": 1722 + }, + { + "epoch": 4.5285996055226825, + "high_lr": 9.368421052631579e-05, + "low_lr": 1.8736842105263158e-06, + "step": 1722 + }, + { + "epoch": 4.5285996055226825, + "high_lr": 9.368421052631579e-05, + "low_lr": 1.8736842105263158e-06, + "step": 1722 + }, + { + "epoch": 4.5285996055226825, + "high_lr": 9.368421052631579e-05, + "low_lr": 1.8736842105263158e-06, + "step": 1722 + }, + { + "epoch": 4.5285996055226825, + "high_lr": 9.368421052631579e-05, + "low_lr": 1.8736842105263158e-06, + "step": 1722 + }, + { + "epoch": 4.531229454306377, + "grad_norm": 1.4990150928497314, + "learning_rate": 9.315789473684211e-05, + "loss": 1.1858, + "step": 1723 + }, + { + "epoch": 4.531229454306377, + "high_lr": 9.315789473684211e-05, + "low_lr": 1.8631578947368424e-06, + "step": 1723 + }, + { + "epoch": 4.531229454306377, + "high_lr": 9.315789473684211e-05, + "low_lr": 1.8631578947368424e-06, + "step": 1723 + }, + { + "epoch": 4.531229454306377, + "high_lr": 9.315789473684211e-05, + "low_lr": 1.8631578947368424e-06, + "step": 1723 + }, + { + "epoch": 4.531229454306377, + "high_lr": 9.315789473684211e-05, + "low_lr": 1.8631578947368424e-06, + "step": 1723 + }, + { + "epoch": 4.531229454306377, + "high_lr": 9.315789473684211e-05, + "low_lr": 1.8631578947368424e-06, + "step": 1723 + }, + { + "epoch": 4.531229454306377, + "high_lr": 9.315789473684211e-05, + "low_lr": 1.8631578947368424e-06, + "step": 1723 + }, + { + "epoch": 4.531229454306377, + "high_lr": 9.315789473684211e-05, + "low_lr": 1.8631578947368424e-06, + "step": 1723 + }, + { + "epoch": 4.531229454306377, + "high_lr": 9.315789473684211e-05, + "low_lr": 1.8631578947368424e-06, + "step": 1723 + }, + { + "epoch": 4.533859303090073, + "grad_norm": 1.6220035552978516, + "learning_rate": 9.263157894736843e-05, + "loss": 1.2138, + "step": 1724 + }, + { + "epoch": 4.533859303090073, + "high_lr": 9.263157894736843e-05, + "low_lr": 1.8526315789473687e-06, + "step": 1724 + }, + { + "epoch": 4.533859303090073, + "high_lr": 9.263157894736843e-05, + "low_lr": 1.8526315789473687e-06, + "step": 1724 + }, + { + "epoch": 4.533859303090073, + "high_lr": 9.263157894736843e-05, + "low_lr": 1.8526315789473687e-06, + "step": 1724 + }, + { + "epoch": 4.533859303090073, + "high_lr": 9.263157894736843e-05, + "low_lr": 1.8526315789473687e-06, + "step": 1724 + }, + { + "epoch": 4.533859303090073, + "high_lr": 9.263157894736843e-05, + "low_lr": 1.8526315789473687e-06, + "step": 1724 + }, + { + "epoch": 4.533859303090073, + "high_lr": 9.263157894736843e-05, + "low_lr": 1.8526315789473687e-06, + "step": 1724 + }, + { + "epoch": 4.533859303090073, + "high_lr": 9.263157894736843e-05, + "low_lr": 1.8526315789473687e-06, + "step": 1724 + }, + { + "epoch": 4.533859303090073, + "high_lr": 9.263157894736843e-05, + "low_lr": 1.8526315789473687e-06, + "step": 1724 + }, + { + "epoch": 4.536489151873767, + "grad_norm": 1.5870214700698853, + "learning_rate": 9.210526315789474e-05, + "loss": 1.235, + "step": 1725 + }, + { + "epoch": 4.536489151873767, + "high_lr": 9.210526315789474e-05, + "low_lr": 1.8421052631578948e-06, + "step": 1725 + }, + { + "epoch": 4.536489151873767, + "high_lr": 9.210526315789474e-05, + "low_lr": 1.8421052631578948e-06, + "step": 1725 + }, + { + "epoch": 4.536489151873767, + "high_lr": 9.210526315789474e-05, + "low_lr": 1.8421052631578948e-06, + "step": 1725 + }, + { + "epoch": 4.536489151873767, + "high_lr": 9.210526315789474e-05, + "low_lr": 1.8421052631578948e-06, + "step": 1725 + }, + { + "epoch": 4.536489151873767, + "high_lr": 9.210526315789474e-05, + "low_lr": 1.8421052631578948e-06, + "step": 1725 + }, + { + "epoch": 4.536489151873767, + "high_lr": 9.210526315789474e-05, + "low_lr": 1.8421052631578948e-06, + "step": 1725 + }, + { + "epoch": 4.536489151873767, + "high_lr": 9.210526315789474e-05, + "low_lr": 1.8421052631578948e-06, + "step": 1725 + }, + { + "epoch": 4.536489151873767, + "high_lr": 9.210526315789474e-05, + "low_lr": 1.8421052631578948e-06, + "step": 1725 + }, + { + "epoch": 4.539119000657462, + "grad_norm": 1.57758629322052, + "learning_rate": 9.157894736842105e-05, + "loss": 1.2318, + "step": 1726 + }, + { + "epoch": 4.539119000657462, + "high_lr": 9.157894736842105e-05, + "low_lr": 1.8315789473684211e-06, + "step": 1726 + }, + { + "epoch": 4.539119000657462, + "high_lr": 9.157894736842105e-05, + "low_lr": 1.8315789473684211e-06, + "step": 1726 + }, + { + "epoch": 4.539119000657462, + "high_lr": 9.157894736842105e-05, + "low_lr": 1.8315789473684211e-06, + "step": 1726 + }, + { + "epoch": 4.539119000657462, + "high_lr": 9.157894736842105e-05, + "low_lr": 1.8315789473684211e-06, + "step": 1726 + }, + { + "epoch": 4.539119000657462, + "high_lr": 9.157894736842105e-05, + "low_lr": 1.8315789473684211e-06, + "step": 1726 + }, + { + "epoch": 4.539119000657462, + "high_lr": 9.157894736842105e-05, + "low_lr": 1.8315789473684211e-06, + "step": 1726 + }, + { + "epoch": 4.539119000657462, + "high_lr": 9.157894736842105e-05, + "low_lr": 1.8315789473684211e-06, + "step": 1726 + }, + { + "epoch": 4.539119000657462, + "high_lr": 9.157894736842105e-05, + "low_lr": 1.8315789473684211e-06, + "step": 1726 + }, + { + "epoch": 4.5417488494411575, + "grad_norm": 1.504319667816162, + "learning_rate": 9.105263157894738e-05, + "loss": 1.1902, + "step": 1727 + }, + { + "epoch": 4.5417488494411575, + "high_lr": 9.105263157894738e-05, + "low_lr": 1.8210526315789475e-06, + "step": 1727 + }, + { + "epoch": 4.5417488494411575, + "high_lr": 9.105263157894738e-05, + "low_lr": 1.8210526315789475e-06, + "step": 1727 + }, + { + "epoch": 4.5417488494411575, + "high_lr": 9.105263157894738e-05, + "low_lr": 1.8210526315789475e-06, + "step": 1727 + }, + { + "epoch": 4.5417488494411575, + "high_lr": 9.105263157894738e-05, + "low_lr": 1.8210526315789475e-06, + "step": 1727 + }, + { + "epoch": 4.5417488494411575, + "high_lr": 9.105263157894738e-05, + "low_lr": 1.8210526315789475e-06, + "step": 1727 + }, + { + "epoch": 4.5417488494411575, + "high_lr": 9.105263157894738e-05, + "low_lr": 1.8210526315789475e-06, + "step": 1727 + }, + { + "epoch": 4.5417488494411575, + "high_lr": 9.105263157894738e-05, + "low_lr": 1.8210526315789475e-06, + "step": 1727 + }, + { + "epoch": 4.5417488494411575, + "high_lr": 9.105263157894738e-05, + "low_lr": 1.8210526315789475e-06, + "step": 1727 + }, + { + "epoch": 4.544378698224852, + "grad_norm": 1.4558043479919434, + "learning_rate": 9.052631578947369e-05, + "loss": 1.2492, + "step": 1728 + }, + { + "epoch": 4.544378698224852, + "high_lr": 9.052631578947369e-05, + "low_lr": 1.810526315789474e-06, + "step": 1728 + }, + { + "epoch": 4.544378698224852, + "high_lr": 9.052631578947369e-05, + "low_lr": 1.810526315789474e-06, + "step": 1728 + }, + { + "epoch": 4.544378698224852, + "high_lr": 9.052631578947369e-05, + "low_lr": 1.810526315789474e-06, + "step": 1728 + }, + { + "epoch": 4.544378698224852, + "high_lr": 9.052631578947369e-05, + "low_lr": 1.810526315789474e-06, + "step": 1728 + }, + { + "epoch": 4.544378698224852, + "high_lr": 9.052631578947369e-05, + "low_lr": 1.810526315789474e-06, + "step": 1728 + }, + { + "epoch": 4.544378698224852, + "high_lr": 9.052631578947369e-05, + "low_lr": 1.810526315789474e-06, + "step": 1728 + }, + { + "epoch": 4.544378698224852, + "high_lr": 9.052631578947369e-05, + "low_lr": 1.810526315789474e-06, + "step": 1728 + }, + { + "epoch": 4.544378698224852, + "high_lr": 9.052631578947369e-05, + "low_lr": 1.810526315789474e-06, + "step": 1728 + }, + { + "epoch": 4.547008547008547, + "grad_norm": 1.7187362909317017, + "learning_rate": 8.999999999999999e-05, + "loss": 1.2288, + "step": 1729 + }, + { + "epoch": 4.547008547008547, + "high_lr": 8.999999999999999e-05, + "low_lr": 1.8000000000000001e-06, + "step": 1729 + }, + { + "epoch": 4.547008547008547, + "high_lr": 8.999999999999999e-05, + "low_lr": 1.8000000000000001e-06, + "step": 1729 + }, + { + "epoch": 4.547008547008547, + "high_lr": 8.999999999999999e-05, + "low_lr": 1.8000000000000001e-06, + "step": 1729 + }, + { + "epoch": 4.547008547008547, + "high_lr": 8.999999999999999e-05, + "low_lr": 1.8000000000000001e-06, + "step": 1729 + }, + { + "epoch": 4.547008547008547, + "high_lr": 8.999999999999999e-05, + "low_lr": 1.8000000000000001e-06, + "step": 1729 + }, + { + "epoch": 4.547008547008547, + "high_lr": 8.999999999999999e-05, + "low_lr": 1.8000000000000001e-06, + "step": 1729 + }, + { + "epoch": 4.547008547008547, + "high_lr": 8.999999999999999e-05, + "low_lr": 1.8000000000000001e-06, + "step": 1729 + }, + { + "epoch": 4.547008547008547, + "high_lr": 8.999999999999999e-05, + "low_lr": 1.8000000000000001e-06, + "step": 1729 + }, + { + "epoch": 4.5496383957922415, + "grad_norm": 1.537452220916748, + "learning_rate": 8.947368421052632e-05, + "loss": 1.2804, + "step": 1730 + }, + { + "epoch": 4.5496383957922415, + "high_lr": 8.947368421052632e-05, + "low_lr": 1.7894736842105265e-06, + "step": 1730 + }, + { + "epoch": 4.5496383957922415, + "high_lr": 8.947368421052632e-05, + "low_lr": 1.7894736842105265e-06, + "step": 1730 + }, + { + "epoch": 4.5496383957922415, + "high_lr": 8.947368421052632e-05, + "low_lr": 1.7894736842105265e-06, + "step": 1730 + }, + { + "epoch": 4.5496383957922415, + "high_lr": 8.947368421052632e-05, + "low_lr": 1.7894736842105265e-06, + "step": 1730 + }, + { + "epoch": 4.5496383957922415, + "high_lr": 8.947368421052632e-05, + "low_lr": 1.7894736842105265e-06, + "step": 1730 + }, + { + "epoch": 4.5496383957922415, + "high_lr": 8.947368421052632e-05, + "low_lr": 1.7894736842105265e-06, + "step": 1730 + }, + { + "epoch": 4.5496383957922415, + "high_lr": 8.947368421052632e-05, + "low_lr": 1.7894736842105265e-06, + "step": 1730 + }, + { + "epoch": 4.5496383957922415, + "high_lr": 8.947368421052632e-05, + "low_lr": 1.7894736842105265e-06, + "step": 1730 + }, + { + "epoch": 4.552268244575937, + "grad_norm": 1.5931189060211182, + "learning_rate": 8.894736842105263e-05, + "loss": 1.205, + "step": 1731 + }, + { + "epoch": 4.552268244575937, + "high_lr": 8.894736842105263e-05, + "low_lr": 1.7789473684210528e-06, + "step": 1731 + }, + { + "epoch": 4.552268244575937, + "high_lr": 8.894736842105263e-05, + "low_lr": 1.7789473684210528e-06, + "step": 1731 + }, + { + "epoch": 4.552268244575937, + "high_lr": 8.894736842105263e-05, + "low_lr": 1.7789473684210528e-06, + "step": 1731 + }, + { + "epoch": 4.552268244575937, + "high_lr": 8.894736842105263e-05, + "low_lr": 1.7789473684210528e-06, + "step": 1731 + }, + { + "epoch": 4.552268244575937, + "high_lr": 8.894736842105263e-05, + "low_lr": 1.7789473684210528e-06, + "step": 1731 + }, + { + "epoch": 4.552268244575937, + "high_lr": 8.894736842105263e-05, + "low_lr": 1.7789473684210528e-06, + "step": 1731 + }, + { + "epoch": 4.552268244575937, + "high_lr": 8.894736842105263e-05, + "low_lr": 1.7789473684210528e-06, + "step": 1731 + }, + { + "epoch": 4.552268244575937, + "high_lr": 8.894736842105263e-05, + "low_lr": 1.7789473684210528e-06, + "step": 1731 + }, + { + "epoch": 4.554898093359632, + "grad_norm": 1.483323574066162, + "learning_rate": 8.842105263157894e-05, + "loss": 1.1979, + "step": 1732 + }, + { + "epoch": 4.554898093359632, + "high_lr": 8.842105263157894e-05, + "low_lr": 1.768421052631579e-06, + "step": 1732 + }, + { + "epoch": 4.554898093359632, + "high_lr": 8.842105263157894e-05, + "low_lr": 1.768421052631579e-06, + "step": 1732 + }, + { + "epoch": 4.554898093359632, + "high_lr": 8.842105263157894e-05, + "low_lr": 1.768421052631579e-06, + "step": 1732 + }, + { + "epoch": 4.554898093359632, + "high_lr": 8.842105263157894e-05, + "low_lr": 1.768421052631579e-06, + "step": 1732 + }, + { + "epoch": 4.554898093359632, + "high_lr": 8.842105263157894e-05, + "low_lr": 1.768421052631579e-06, + "step": 1732 + }, + { + "epoch": 4.554898093359632, + "high_lr": 8.842105263157894e-05, + "low_lr": 1.768421052631579e-06, + "step": 1732 + }, + { + "epoch": 4.554898093359632, + "high_lr": 8.842105263157894e-05, + "low_lr": 1.768421052631579e-06, + "step": 1732 + }, + { + "epoch": 4.554898093359632, + "high_lr": 8.842105263157894e-05, + "low_lr": 1.768421052631579e-06, + "step": 1732 + }, + { + "epoch": 4.557527942143327, + "grad_norm": 1.5738469362258911, + "learning_rate": 8.789473684210526e-05, + "loss": 1.2481, + "step": 1733 + }, + { + "epoch": 4.557527942143327, + "high_lr": 8.789473684210526e-05, + "low_lr": 1.7578947368421054e-06, + "step": 1733 + }, + { + "epoch": 4.557527942143327, + "high_lr": 8.789473684210526e-05, + "low_lr": 1.7578947368421054e-06, + "step": 1733 + }, + { + "epoch": 4.557527942143327, + "high_lr": 8.789473684210526e-05, + "low_lr": 1.7578947368421054e-06, + "step": 1733 + }, + { + "epoch": 4.557527942143327, + "high_lr": 8.789473684210526e-05, + "low_lr": 1.7578947368421054e-06, + "step": 1733 + }, + { + "epoch": 4.557527942143327, + "high_lr": 8.789473684210526e-05, + "low_lr": 1.7578947368421054e-06, + "step": 1733 + }, + { + "epoch": 4.557527942143327, + "high_lr": 8.789473684210526e-05, + "low_lr": 1.7578947368421054e-06, + "step": 1733 + }, + { + "epoch": 4.557527942143327, + "high_lr": 8.789473684210526e-05, + "low_lr": 1.7578947368421054e-06, + "step": 1733 + }, + { + "epoch": 4.557527942143327, + "high_lr": 8.789473684210526e-05, + "low_lr": 1.7578947368421054e-06, + "step": 1733 + }, + { + "epoch": 4.560157790927022, + "grad_norm": 1.4959828853607178, + "learning_rate": 8.736842105263158e-05, + "loss": 1.1967, + "step": 1734 + }, + { + "epoch": 4.560157790927022, + "high_lr": 8.736842105263158e-05, + "low_lr": 1.7473684210526318e-06, + "step": 1734 + }, + { + "epoch": 4.560157790927022, + "high_lr": 8.736842105263158e-05, + "low_lr": 1.7473684210526318e-06, + "step": 1734 + }, + { + "epoch": 4.560157790927022, + "high_lr": 8.736842105263158e-05, + "low_lr": 1.7473684210526318e-06, + "step": 1734 + }, + { + "epoch": 4.560157790927022, + "high_lr": 8.736842105263158e-05, + "low_lr": 1.7473684210526318e-06, + "step": 1734 + }, + { + "epoch": 4.560157790927022, + "high_lr": 8.736842105263158e-05, + "low_lr": 1.7473684210526318e-06, + "step": 1734 + }, + { + "epoch": 4.560157790927022, + "high_lr": 8.736842105263158e-05, + "low_lr": 1.7473684210526318e-06, + "step": 1734 + }, + { + "epoch": 4.560157790927022, + "high_lr": 8.736842105263158e-05, + "low_lr": 1.7473684210526318e-06, + "step": 1734 + }, + { + "epoch": 4.560157790927022, + "high_lr": 8.736842105263158e-05, + "low_lr": 1.7473684210526318e-06, + "step": 1734 + }, + { + "epoch": 4.5627876397107165, + "grad_norm": 1.5765563249588013, + "learning_rate": 8.68421052631579e-05, + "loss": 1.2323, + "step": 1735 + }, + { + "epoch": 4.5627876397107165, + "high_lr": 8.68421052631579e-05, + "low_lr": 1.736842105263158e-06, + "step": 1735 + }, + { + "epoch": 4.5627876397107165, + "high_lr": 8.68421052631579e-05, + "low_lr": 1.736842105263158e-06, + "step": 1735 + }, + { + "epoch": 4.5627876397107165, + "high_lr": 8.68421052631579e-05, + "low_lr": 1.736842105263158e-06, + "step": 1735 + }, + { + "epoch": 4.5627876397107165, + "high_lr": 8.68421052631579e-05, + "low_lr": 1.736842105263158e-06, + "step": 1735 + }, + { + "epoch": 4.5627876397107165, + "high_lr": 8.68421052631579e-05, + "low_lr": 1.736842105263158e-06, + "step": 1735 + }, + { + "epoch": 4.5627876397107165, + "high_lr": 8.68421052631579e-05, + "low_lr": 1.736842105263158e-06, + "step": 1735 + }, + { + "epoch": 4.5627876397107165, + "high_lr": 8.68421052631579e-05, + "low_lr": 1.736842105263158e-06, + "step": 1735 + }, + { + "epoch": 4.5627876397107165, + "high_lr": 8.68421052631579e-05, + "low_lr": 1.736842105263158e-06, + "step": 1735 + }, + { + "epoch": 4.565417488494411, + "grad_norm": 1.4519524574279785, + "learning_rate": 8.631578947368421e-05, + "loss": 1.1848, + "step": 1736 + }, + { + "epoch": 4.565417488494411, + "high_lr": 8.631578947368421e-05, + "low_lr": 1.7263157894736842e-06, + "step": 1736 + }, + { + "epoch": 4.565417488494411, + "high_lr": 8.631578947368421e-05, + "low_lr": 1.7263157894736842e-06, + "step": 1736 + }, + { + "epoch": 4.565417488494411, + "high_lr": 8.631578947368421e-05, + "low_lr": 1.7263157894736842e-06, + "step": 1736 + }, + { + "epoch": 4.565417488494411, + "high_lr": 8.631578947368421e-05, + "low_lr": 1.7263157894736842e-06, + "step": 1736 + }, + { + "epoch": 4.565417488494411, + "high_lr": 8.631578947368421e-05, + "low_lr": 1.7263157894736842e-06, + "step": 1736 + }, + { + "epoch": 4.565417488494411, + "high_lr": 8.631578947368421e-05, + "low_lr": 1.7263157894736842e-06, + "step": 1736 + }, + { + "epoch": 4.565417488494411, + "high_lr": 8.631578947368421e-05, + "low_lr": 1.7263157894736842e-06, + "step": 1736 + }, + { + "epoch": 4.565417488494411, + "high_lr": 8.631578947368421e-05, + "low_lr": 1.7263157894736842e-06, + "step": 1736 + }, + { + "epoch": 4.568047337278107, + "grad_norm": 1.6031697988510132, + "learning_rate": 8.578947368421052e-05, + "loss": 1.2147, + "step": 1737 + }, + { + "epoch": 4.568047337278107, + "high_lr": 8.578947368421052e-05, + "low_lr": 1.7157894736842107e-06, + "step": 1737 + }, + { + "epoch": 4.568047337278107, + "high_lr": 8.578947368421052e-05, + "low_lr": 1.7157894736842107e-06, + "step": 1737 + }, + { + "epoch": 4.568047337278107, + "high_lr": 8.578947368421052e-05, + "low_lr": 1.7157894736842107e-06, + "step": 1737 + }, + { + "epoch": 4.568047337278107, + "high_lr": 8.578947368421052e-05, + "low_lr": 1.7157894736842107e-06, + "step": 1737 + }, + { + "epoch": 4.568047337278107, + "high_lr": 8.578947368421052e-05, + "low_lr": 1.7157894736842107e-06, + "step": 1737 + }, + { + "epoch": 4.568047337278107, + "high_lr": 8.578947368421052e-05, + "low_lr": 1.7157894736842107e-06, + "step": 1737 + }, + { + "epoch": 4.568047337278107, + "high_lr": 8.578947368421052e-05, + "low_lr": 1.7157894736842107e-06, + "step": 1737 + }, + { + "epoch": 4.568047337278107, + "high_lr": 8.578947368421052e-05, + "low_lr": 1.7157894736842107e-06, + "step": 1737 + }, + { + "epoch": 4.570677186061801, + "grad_norm": 1.5287643671035767, + "learning_rate": 8.526315789473685e-05, + "loss": 1.2137, + "step": 1738 + }, + { + "epoch": 4.570677186061801, + "high_lr": 8.526315789473685e-05, + "low_lr": 1.705263157894737e-06, + "step": 1738 + }, + { + "epoch": 4.570677186061801, + "high_lr": 8.526315789473685e-05, + "low_lr": 1.705263157894737e-06, + "step": 1738 + }, + { + "epoch": 4.570677186061801, + "high_lr": 8.526315789473685e-05, + "low_lr": 1.705263157894737e-06, + "step": 1738 + }, + { + "epoch": 4.570677186061801, + "high_lr": 8.526315789473685e-05, + "low_lr": 1.705263157894737e-06, + "step": 1738 + }, + { + "epoch": 4.570677186061801, + "high_lr": 8.526315789473685e-05, + "low_lr": 1.705263157894737e-06, + "step": 1738 + }, + { + "epoch": 4.570677186061801, + "high_lr": 8.526315789473685e-05, + "low_lr": 1.705263157894737e-06, + "step": 1738 + }, + { + "epoch": 4.570677186061801, + "high_lr": 8.526315789473685e-05, + "low_lr": 1.705263157894737e-06, + "step": 1738 + }, + { + "epoch": 4.570677186061801, + "high_lr": 8.526315789473685e-05, + "low_lr": 1.705263157894737e-06, + "step": 1738 + }, + { + "epoch": 4.573307034845496, + "grad_norm": 1.4393537044525146, + "learning_rate": 8.473684210526315e-05, + "loss": 1.1929, + "step": 1739 + }, + { + "epoch": 4.573307034845496, + "high_lr": 8.473684210526315e-05, + "low_lr": 1.6947368421052632e-06, + "step": 1739 + }, + { + "epoch": 4.573307034845496, + "high_lr": 8.473684210526315e-05, + "low_lr": 1.6947368421052632e-06, + "step": 1739 + }, + { + "epoch": 4.573307034845496, + "high_lr": 8.473684210526315e-05, + "low_lr": 1.6947368421052632e-06, + "step": 1739 + }, + { + "epoch": 4.573307034845496, + "high_lr": 8.473684210526315e-05, + "low_lr": 1.6947368421052632e-06, + "step": 1739 + }, + { + "epoch": 4.573307034845496, + "high_lr": 8.473684210526315e-05, + "low_lr": 1.6947368421052632e-06, + "step": 1739 + }, + { + "epoch": 4.573307034845496, + "high_lr": 8.473684210526315e-05, + "low_lr": 1.6947368421052632e-06, + "step": 1739 + }, + { + "epoch": 4.573307034845496, + "high_lr": 8.473684210526315e-05, + "low_lr": 1.6947368421052632e-06, + "step": 1739 + }, + { + "epoch": 4.573307034845496, + "high_lr": 8.473684210526315e-05, + "low_lr": 1.6947368421052632e-06, + "step": 1739 + }, + { + "epoch": 4.575936883629192, + "grad_norm": 1.4716784954071045, + "learning_rate": 8.421052631578948e-05, + "loss": 1.3061, + "step": 1740 + }, + { + "epoch": 4.575936883629192, + "high_lr": 8.421052631578948e-05, + "low_lr": 1.6842105263157895e-06, + "step": 1740 + }, + { + "epoch": 4.575936883629192, + "high_lr": 8.421052631578948e-05, + "low_lr": 1.6842105263157895e-06, + "step": 1740 + }, + { + "epoch": 4.575936883629192, + "high_lr": 8.421052631578948e-05, + "low_lr": 1.6842105263157895e-06, + "step": 1740 + }, + { + "epoch": 4.575936883629192, + "high_lr": 8.421052631578948e-05, + "low_lr": 1.6842105263157895e-06, + "step": 1740 + }, + { + "epoch": 4.575936883629192, + "high_lr": 8.421052631578948e-05, + "low_lr": 1.6842105263157895e-06, + "step": 1740 + }, + { + "epoch": 4.575936883629192, + "high_lr": 8.421052631578948e-05, + "low_lr": 1.6842105263157895e-06, + "step": 1740 + }, + { + "epoch": 4.575936883629192, + "high_lr": 8.421052631578948e-05, + "low_lr": 1.6842105263157895e-06, + "step": 1740 + }, + { + "epoch": 4.575936883629192, + "high_lr": 8.421052631578948e-05, + "low_lr": 1.6842105263157895e-06, + "step": 1740 + }, + { + "epoch": 4.578566732412886, + "grad_norm": 1.615149736404419, + "learning_rate": 8.368421052631579e-05, + "loss": 1.214, + "step": 1741 + }, + { + "epoch": 4.578566732412886, + "high_lr": 8.368421052631579e-05, + "low_lr": 1.673684210526316e-06, + "step": 1741 + }, + { + "epoch": 4.578566732412886, + "high_lr": 8.368421052631579e-05, + "low_lr": 1.673684210526316e-06, + "step": 1741 + }, + { + "epoch": 4.578566732412886, + "high_lr": 8.368421052631579e-05, + "low_lr": 1.673684210526316e-06, + "step": 1741 + }, + { + "epoch": 4.578566732412886, + "high_lr": 8.368421052631579e-05, + "low_lr": 1.673684210526316e-06, + "step": 1741 + }, + { + "epoch": 4.578566732412886, + "high_lr": 8.368421052631579e-05, + "low_lr": 1.673684210526316e-06, + "step": 1741 + }, + { + "epoch": 4.578566732412886, + "high_lr": 8.368421052631579e-05, + "low_lr": 1.673684210526316e-06, + "step": 1741 + }, + { + "epoch": 4.578566732412886, + "high_lr": 8.368421052631579e-05, + "low_lr": 1.673684210526316e-06, + "step": 1741 + }, + { + "epoch": 4.578566732412886, + "high_lr": 8.368421052631579e-05, + "low_lr": 1.673684210526316e-06, + "step": 1741 + }, + { + "epoch": 4.581196581196581, + "grad_norm": 1.5729044675827026, + "learning_rate": 8.315789473684212e-05, + "loss": 1.2583, + "step": 1742 + }, + { + "epoch": 4.581196581196581, + "high_lr": 8.315789473684212e-05, + "low_lr": 1.6631578947368424e-06, + "step": 1742 + }, + { + "epoch": 4.581196581196581, + "high_lr": 8.315789473684212e-05, + "low_lr": 1.6631578947368424e-06, + "step": 1742 + }, + { + "epoch": 4.581196581196581, + "high_lr": 8.315789473684212e-05, + "low_lr": 1.6631578947368424e-06, + "step": 1742 + }, + { + "epoch": 4.581196581196581, + "high_lr": 8.315789473684212e-05, + "low_lr": 1.6631578947368424e-06, + "step": 1742 + }, + { + "epoch": 4.581196581196581, + "high_lr": 8.315789473684212e-05, + "low_lr": 1.6631578947368424e-06, + "step": 1742 + }, + { + "epoch": 4.581196581196581, + "high_lr": 8.315789473684212e-05, + "low_lr": 1.6631578947368424e-06, + "step": 1742 + }, + { + "epoch": 4.581196581196581, + "high_lr": 8.315789473684212e-05, + "low_lr": 1.6631578947368424e-06, + "step": 1742 + }, + { + "epoch": 4.581196581196581, + "high_lr": 8.315789473684212e-05, + "low_lr": 1.6631578947368424e-06, + "step": 1742 + }, + { + "epoch": 4.5838264299802765, + "grad_norm": 1.6791571378707886, + "learning_rate": 8.263157894736841e-05, + "loss": 1.2709, + "step": 1743 + }, + { + "epoch": 4.5838264299802765, + "high_lr": 8.263157894736841e-05, + "low_lr": 1.6526315789473685e-06, + "step": 1743 + }, + { + "epoch": 4.5838264299802765, + "high_lr": 8.263157894736841e-05, + "low_lr": 1.6526315789473685e-06, + "step": 1743 + }, + { + "epoch": 4.5838264299802765, + "high_lr": 8.263157894736841e-05, + "low_lr": 1.6526315789473685e-06, + "step": 1743 + }, + { + "epoch": 4.5838264299802765, + "high_lr": 8.263157894736841e-05, + "low_lr": 1.6526315789473685e-06, + "step": 1743 + }, + { + "epoch": 4.5838264299802765, + "high_lr": 8.263157894736841e-05, + "low_lr": 1.6526315789473685e-06, + "step": 1743 + }, + { + "epoch": 4.5838264299802765, + "high_lr": 8.263157894736841e-05, + "low_lr": 1.6526315789473685e-06, + "step": 1743 + }, + { + "epoch": 4.5838264299802765, + "high_lr": 8.263157894736841e-05, + "low_lr": 1.6526315789473685e-06, + "step": 1743 + }, + { + "epoch": 4.5838264299802765, + "high_lr": 8.263157894736841e-05, + "low_lr": 1.6526315789473685e-06, + "step": 1743 + }, + { + "epoch": 4.586456278763971, + "grad_norm": 1.678160309791565, + "learning_rate": 8.210526315789474e-05, + "loss": 1.2268, + "step": 1744 + }, + { + "epoch": 4.586456278763971, + "high_lr": 8.210526315789474e-05, + "low_lr": 1.6421052631578948e-06, + "step": 1744 + }, + { + "epoch": 4.586456278763971, + "high_lr": 8.210526315789474e-05, + "low_lr": 1.6421052631578948e-06, + "step": 1744 + }, + { + "epoch": 4.586456278763971, + "high_lr": 8.210526315789474e-05, + "low_lr": 1.6421052631578948e-06, + "step": 1744 + }, + { + "epoch": 4.586456278763971, + "high_lr": 8.210526315789474e-05, + "low_lr": 1.6421052631578948e-06, + "step": 1744 + }, + { + "epoch": 4.586456278763971, + "high_lr": 8.210526315789474e-05, + "low_lr": 1.6421052631578948e-06, + "step": 1744 + }, + { + "epoch": 4.586456278763971, + "high_lr": 8.210526315789474e-05, + "low_lr": 1.6421052631578948e-06, + "step": 1744 + }, + { + "epoch": 4.586456278763971, + "high_lr": 8.210526315789474e-05, + "low_lr": 1.6421052631578948e-06, + "step": 1744 + }, + { + "epoch": 4.586456278763971, + "high_lr": 8.210526315789474e-05, + "low_lr": 1.6421052631578948e-06, + "step": 1744 + }, + { + "epoch": 4.589086127547666, + "grad_norm": 1.5540153980255127, + "learning_rate": 8.157894736842105e-05, + "loss": 1.202, + "step": 1745 + }, + { + "epoch": 4.589086127547666, + "high_lr": 8.157894736842105e-05, + "low_lr": 1.6315789473684212e-06, + "step": 1745 + }, + { + "epoch": 4.589086127547666, + "high_lr": 8.157894736842105e-05, + "low_lr": 1.6315789473684212e-06, + "step": 1745 + }, + { + "epoch": 4.589086127547666, + "high_lr": 8.157894736842105e-05, + "low_lr": 1.6315789473684212e-06, + "step": 1745 + }, + { + "epoch": 4.589086127547666, + "high_lr": 8.157894736842105e-05, + "low_lr": 1.6315789473684212e-06, + "step": 1745 + }, + { + "epoch": 4.589086127547666, + "high_lr": 8.157894736842105e-05, + "low_lr": 1.6315789473684212e-06, + "step": 1745 + }, + { + "epoch": 4.589086127547666, + "high_lr": 8.157894736842105e-05, + "low_lr": 1.6315789473684212e-06, + "step": 1745 + }, + { + "epoch": 4.589086127547666, + "high_lr": 8.157894736842105e-05, + "low_lr": 1.6315789473684212e-06, + "step": 1745 + }, + { + "epoch": 4.589086127547666, + "high_lr": 8.157894736842105e-05, + "low_lr": 1.6315789473684212e-06, + "step": 1745 + }, + { + "epoch": 4.591715976331361, + "grad_norm": 1.7042880058288574, + "learning_rate": 8.105263157894737e-05, + "loss": 1.177, + "step": 1746 + }, + { + "epoch": 4.591715976331361, + "high_lr": 8.105263157894737e-05, + "low_lr": 1.6210526315789473e-06, + "step": 1746 + }, + { + "epoch": 4.591715976331361, + "high_lr": 8.105263157894737e-05, + "low_lr": 1.6210526315789473e-06, + "step": 1746 + }, + { + "epoch": 4.591715976331361, + "high_lr": 8.105263157894737e-05, + "low_lr": 1.6210526315789473e-06, + "step": 1746 + }, + { + "epoch": 4.591715976331361, + "high_lr": 8.105263157894737e-05, + "low_lr": 1.6210526315789473e-06, + "step": 1746 + }, + { + "epoch": 4.591715976331361, + "high_lr": 8.105263157894737e-05, + "low_lr": 1.6210526315789473e-06, + "step": 1746 + }, + { + "epoch": 4.591715976331361, + "high_lr": 8.105263157894737e-05, + "low_lr": 1.6210526315789473e-06, + "step": 1746 + }, + { + "epoch": 4.591715976331361, + "high_lr": 8.105263157894737e-05, + "low_lr": 1.6210526315789473e-06, + "step": 1746 + }, + { + "epoch": 4.591715976331361, + "high_lr": 8.105263157894737e-05, + "low_lr": 1.6210526315789473e-06, + "step": 1746 + }, + { + "epoch": 4.594345825115056, + "grad_norm": 1.5718997716903687, + "learning_rate": 8.052631578947368e-05, + "loss": 1.1972, + "step": 1747 + }, + { + "epoch": 4.594345825115056, + "high_lr": 8.052631578947368e-05, + "low_lr": 1.6105263157894738e-06, + "step": 1747 + }, + { + "epoch": 4.594345825115056, + "high_lr": 8.052631578947368e-05, + "low_lr": 1.6105263157894738e-06, + "step": 1747 + }, + { + "epoch": 4.594345825115056, + "high_lr": 8.052631578947368e-05, + "low_lr": 1.6105263157894738e-06, + "step": 1747 + }, + { + "epoch": 4.594345825115056, + "high_lr": 8.052631578947368e-05, + "low_lr": 1.6105263157894738e-06, + "step": 1747 + }, + { + "epoch": 4.594345825115056, + "high_lr": 8.052631578947368e-05, + "low_lr": 1.6105263157894738e-06, + "step": 1747 + }, + { + "epoch": 4.594345825115056, + "high_lr": 8.052631578947368e-05, + "low_lr": 1.6105263157894738e-06, + "step": 1747 + }, + { + "epoch": 4.594345825115056, + "high_lr": 8.052631578947368e-05, + "low_lr": 1.6105263157894738e-06, + "step": 1747 + }, + { + "epoch": 4.594345825115056, + "high_lr": 8.052631578947368e-05, + "low_lr": 1.6105263157894738e-06, + "step": 1747 + }, + { + "epoch": 4.596975673898751, + "grad_norm": 1.4358165264129639, + "learning_rate": 8e-05, + "loss": 1.2242, + "step": 1748 + }, + { + "epoch": 4.596975673898751, + "high_lr": 8e-05, + "low_lr": 1.6000000000000001e-06, + "step": 1748 + }, + { + "epoch": 4.596975673898751, + "high_lr": 8e-05, + "low_lr": 1.6000000000000001e-06, + "step": 1748 + }, + { + "epoch": 4.596975673898751, + "high_lr": 8e-05, + "low_lr": 1.6000000000000001e-06, + "step": 1748 + }, + { + "epoch": 4.596975673898751, + "high_lr": 8e-05, + "low_lr": 1.6000000000000001e-06, + "step": 1748 + }, + { + "epoch": 4.596975673898751, + "high_lr": 8e-05, + "low_lr": 1.6000000000000001e-06, + "step": 1748 + }, + { + "epoch": 4.596975673898751, + "high_lr": 8e-05, + "low_lr": 1.6000000000000001e-06, + "step": 1748 + }, + { + "epoch": 4.596975673898751, + "high_lr": 8e-05, + "low_lr": 1.6000000000000001e-06, + "step": 1748 + }, + { + "epoch": 4.596975673898751, + "high_lr": 8e-05, + "low_lr": 1.6000000000000001e-06, + "step": 1748 + }, + { + "epoch": 4.599605522682445, + "grad_norm": 1.642826795578003, + "learning_rate": 7.947368421052632e-05, + "loss": 1.153, + "step": 1749 + }, + { + "epoch": 4.599605522682445, + "high_lr": 7.947368421052632e-05, + "low_lr": 1.5894736842105265e-06, + "step": 1749 + }, + { + "epoch": 4.599605522682445, + "high_lr": 7.947368421052632e-05, + "low_lr": 1.5894736842105265e-06, + "step": 1749 + }, + { + "epoch": 4.599605522682445, + "high_lr": 7.947368421052632e-05, + "low_lr": 1.5894736842105265e-06, + "step": 1749 + }, + { + "epoch": 4.599605522682445, + "high_lr": 7.947368421052632e-05, + "low_lr": 1.5894736842105265e-06, + "step": 1749 + }, + { + "epoch": 4.599605522682445, + "high_lr": 7.947368421052632e-05, + "low_lr": 1.5894736842105265e-06, + "step": 1749 + }, + { + "epoch": 4.599605522682445, + "high_lr": 7.947368421052632e-05, + "low_lr": 1.5894736842105265e-06, + "step": 1749 + }, + { + "epoch": 4.599605522682445, + "high_lr": 7.947368421052632e-05, + "low_lr": 1.5894736842105265e-06, + "step": 1749 + }, + { + "epoch": 4.599605522682445, + "high_lr": 7.947368421052632e-05, + "low_lr": 1.5894736842105265e-06, + "step": 1749 + }, + { + "epoch": 4.602235371466141, + "grad_norm": 1.6457551717758179, + "learning_rate": 7.894736842105263e-05, + "loss": 1.1835, + "step": 1750 + }, + { + "epoch": 4.602235371466141, + "high_lr": 7.894736842105263e-05, + "low_lr": 1.5789473684210526e-06, + "step": 1750 + }, + { + "epoch": 4.602235371466141, + "high_lr": 7.894736842105263e-05, + "low_lr": 1.5789473684210526e-06, + "step": 1750 + }, + { + "epoch": 4.602235371466141, + "high_lr": 7.894736842105263e-05, + "low_lr": 1.5789473684210526e-06, + "step": 1750 + }, + { + "epoch": 4.602235371466141, + "high_lr": 7.894736842105263e-05, + "low_lr": 1.5789473684210526e-06, + "step": 1750 + }, + { + "epoch": 4.602235371466141, + "high_lr": 7.894736842105263e-05, + "low_lr": 1.5789473684210526e-06, + "step": 1750 + }, + { + "epoch": 4.602235371466141, + "high_lr": 7.894736842105263e-05, + "low_lr": 1.5789473684210526e-06, + "step": 1750 + }, + { + "epoch": 4.602235371466141, + "high_lr": 7.894736842105263e-05, + "low_lr": 1.5789473684210526e-06, + "step": 1750 + }, + { + "epoch": 4.602235371466141, + "high_lr": 7.894736842105263e-05, + "low_lr": 1.5789473684210526e-06, + "step": 1750 + }, + { + "epoch": 4.6048652202498355, + "grad_norm": 1.6383179426193237, + "learning_rate": 7.842105263157895e-05, + "loss": 1.2388, + "step": 1751 + }, + { + "epoch": 4.6048652202498355, + "high_lr": 7.842105263157895e-05, + "low_lr": 1.5684210526315791e-06, + "step": 1751 + }, + { + "epoch": 4.6048652202498355, + "high_lr": 7.842105263157895e-05, + "low_lr": 1.5684210526315791e-06, + "step": 1751 + }, + { + "epoch": 4.6048652202498355, + "high_lr": 7.842105263157895e-05, + "low_lr": 1.5684210526315791e-06, + "step": 1751 + }, + { + "epoch": 4.6048652202498355, + "high_lr": 7.842105263157895e-05, + "low_lr": 1.5684210526315791e-06, + "step": 1751 + }, + { + "epoch": 4.6048652202498355, + "high_lr": 7.842105263157895e-05, + "low_lr": 1.5684210526315791e-06, + "step": 1751 + }, + { + "epoch": 4.6048652202498355, + "high_lr": 7.842105263157895e-05, + "low_lr": 1.5684210526315791e-06, + "step": 1751 + }, + { + "epoch": 4.6048652202498355, + "high_lr": 7.842105263157895e-05, + "low_lr": 1.5684210526315791e-06, + "step": 1751 + }, + { + "epoch": 4.6048652202498355, + "high_lr": 7.842105263157895e-05, + "low_lr": 1.5684210526315791e-06, + "step": 1751 + }, + { + "epoch": 4.607495069033531, + "grad_norm": 1.5649884939193726, + "learning_rate": 7.789473684210527e-05, + "loss": 1.1955, + "step": 1752 + }, + { + "epoch": 4.607495069033531, + "high_lr": 7.789473684210527e-05, + "low_lr": 1.5578947368421054e-06, + "step": 1752 + }, + { + "epoch": 4.607495069033531, + "high_lr": 7.789473684210527e-05, + "low_lr": 1.5578947368421054e-06, + "step": 1752 + }, + { + "epoch": 4.607495069033531, + "high_lr": 7.789473684210527e-05, + "low_lr": 1.5578947368421054e-06, + "step": 1752 + }, + { + "epoch": 4.607495069033531, + "high_lr": 7.789473684210527e-05, + "low_lr": 1.5578947368421054e-06, + "step": 1752 + }, + { + "epoch": 4.607495069033531, + "high_lr": 7.789473684210527e-05, + "low_lr": 1.5578947368421054e-06, + "step": 1752 + }, + { + "epoch": 4.607495069033531, + "high_lr": 7.789473684210527e-05, + "low_lr": 1.5578947368421054e-06, + "step": 1752 + }, + { + "epoch": 4.607495069033531, + "high_lr": 7.789473684210527e-05, + "low_lr": 1.5578947368421054e-06, + "step": 1752 + }, + { + "epoch": 4.607495069033531, + "high_lr": 7.789473684210527e-05, + "low_lr": 1.5578947368421054e-06, + "step": 1752 + }, + { + "epoch": 4.610124917817226, + "grad_norm": 1.5859493017196655, + "learning_rate": 7.736842105263157e-05, + "loss": 1.1874, + "step": 1753 + }, + { + "epoch": 4.610124917817226, + "high_lr": 7.736842105263157e-05, + "low_lr": 1.5473684210526316e-06, + "step": 1753 + }, + { + "epoch": 4.610124917817226, + "high_lr": 7.736842105263157e-05, + "low_lr": 1.5473684210526316e-06, + "step": 1753 + }, + { + "epoch": 4.610124917817226, + "high_lr": 7.736842105263157e-05, + "low_lr": 1.5473684210526316e-06, + "step": 1753 + }, + { + "epoch": 4.610124917817226, + "high_lr": 7.736842105263157e-05, + "low_lr": 1.5473684210526316e-06, + "step": 1753 + }, + { + "epoch": 4.610124917817226, + "high_lr": 7.736842105263157e-05, + "low_lr": 1.5473684210526316e-06, + "step": 1753 + }, + { + "epoch": 4.610124917817226, + "high_lr": 7.736842105263157e-05, + "low_lr": 1.5473684210526316e-06, + "step": 1753 + }, + { + "epoch": 4.610124917817226, + "high_lr": 7.736842105263157e-05, + "low_lr": 1.5473684210526316e-06, + "step": 1753 + }, + { + "epoch": 4.610124917817226, + "high_lr": 7.736842105263157e-05, + "low_lr": 1.5473684210526316e-06, + "step": 1753 + }, + { + "epoch": 4.61275476660092, + "grad_norm": 1.630597472190857, + "learning_rate": 7.68421052631579e-05, + "loss": 1.193, + "step": 1754 + }, + { + "epoch": 4.61275476660092, + "high_lr": 7.68421052631579e-05, + "low_lr": 1.5368421052631579e-06, + "step": 1754 + }, + { + "epoch": 4.61275476660092, + "high_lr": 7.68421052631579e-05, + "low_lr": 1.5368421052631579e-06, + "step": 1754 + }, + { + "epoch": 4.61275476660092, + "high_lr": 7.68421052631579e-05, + "low_lr": 1.5368421052631579e-06, + "step": 1754 + }, + { + "epoch": 4.61275476660092, + "high_lr": 7.68421052631579e-05, + "low_lr": 1.5368421052631579e-06, + "step": 1754 + }, + { + "epoch": 4.61275476660092, + "high_lr": 7.68421052631579e-05, + "low_lr": 1.5368421052631579e-06, + "step": 1754 + }, + { + "epoch": 4.61275476660092, + "high_lr": 7.68421052631579e-05, + "low_lr": 1.5368421052631579e-06, + "step": 1754 + }, + { + "epoch": 4.61275476660092, + "high_lr": 7.68421052631579e-05, + "low_lr": 1.5368421052631579e-06, + "step": 1754 + }, + { + "epoch": 4.61275476660092, + "high_lr": 7.68421052631579e-05, + "low_lr": 1.5368421052631579e-06, + "step": 1754 + }, + { + "epoch": 4.615384615384615, + "grad_norm": 1.5932203531265259, + "learning_rate": 7.631578947368421e-05, + "loss": 1.2025, + "step": 1755 + }, + { + "epoch": 4.615384615384615, + "high_lr": 7.631578947368421e-05, + "low_lr": 1.5263157894736844e-06, + "step": 1755 + }, + { + "epoch": 4.615384615384615, + "high_lr": 7.631578947368421e-05, + "low_lr": 1.5263157894736844e-06, + "step": 1755 + }, + { + "epoch": 4.615384615384615, + "high_lr": 7.631578947368421e-05, + "low_lr": 1.5263157894736844e-06, + "step": 1755 + }, + { + "epoch": 4.615384615384615, + "high_lr": 7.631578947368421e-05, + "low_lr": 1.5263157894736844e-06, + "step": 1755 + }, + { + "epoch": 4.615384615384615, + "high_lr": 7.631578947368421e-05, + "low_lr": 1.5263157894736844e-06, + "step": 1755 + }, + { + "epoch": 4.615384615384615, + "high_lr": 7.631578947368421e-05, + "low_lr": 1.5263157894736844e-06, + "step": 1755 + }, + { + "epoch": 4.615384615384615, + "high_lr": 7.631578947368421e-05, + "low_lr": 1.5263157894736844e-06, + "step": 1755 + }, + { + "epoch": 4.615384615384615, + "high_lr": 7.631578947368421e-05, + "low_lr": 1.5263157894736844e-06, + "step": 1755 + }, + { + "epoch": 4.6180144641683105, + "grad_norm": 1.6301535367965698, + "learning_rate": 7.578947368421054e-05, + "loss": 1.2028, + "step": 1756 + }, + { + "epoch": 4.6180144641683105, + "high_lr": 7.578947368421054e-05, + "low_lr": 1.5157894736842108e-06, + "step": 1756 + }, + { + "epoch": 4.6180144641683105, + "high_lr": 7.578947368421054e-05, + "low_lr": 1.5157894736842108e-06, + "step": 1756 + }, + { + "epoch": 4.6180144641683105, + "high_lr": 7.578947368421054e-05, + "low_lr": 1.5157894736842108e-06, + "step": 1756 + }, + { + "epoch": 4.6180144641683105, + "high_lr": 7.578947368421054e-05, + "low_lr": 1.5157894736842108e-06, + "step": 1756 + }, + { + "epoch": 4.6180144641683105, + "high_lr": 7.578947368421054e-05, + "low_lr": 1.5157894736842108e-06, + "step": 1756 + }, + { + "epoch": 4.6180144641683105, + "high_lr": 7.578947368421054e-05, + "low_lr": 1.5157894736842108e-06, + "step": 1756 + }, + { + "epoch": 4.6180144641683105, + "high_lr": 7.578947368421054e-05, + "low_lr": 1.5157894736842108e-06, + "step": 1756 + }, + { + "epoch": 4.6180144641683105, + "high_lr": 7.578947368421054e-05, + "low_lr": 1.5157894736842108e-06, + "step": 1756 + }, + { + "epoch": 4.620644312952005, + "grad_norm": 1.7253530025482178, + "learning_rate": 7.526315789473684e-05, + "loss": 1.2403, + "step": 1757 + }, + { + "epoch": 4.620644312952005, + "high_lr": 7.526315789473684e-05, + "low_lr": 1.5052631578947369e-06, + "step": 1757 + }, + { + "epoch": 4.620644312952005, + "high_lr": 7.526315789473684e-05, + "low_lr": 1.5052631578947369e-06, + "step": 1757 + }, + { + "epoch": 4.620644312952005, + "high_lr": 7.526315789473684e-05, + "low_lr": 1.5052631578947369e-06, + "step": 1757 + }, + { + "epoch": 4.620644312952005, + "high_lr": 7.526315789473684e-05, + "low_lr": 1.5052631578947369e-06, + "step": 1757 + }, + { + "epoch": 4.620644312952005, + "high_lr": 7.526315789473684e-05, + "low_lr": 1.5052631578947369e-06, + "step": 1757 + }, + { + "epoch": 4.620644312952005, + "high_lr": 7.526315789473684e-05, + "low_lr": 1.5052631578947369e-06, + "step": 1757 + }, + { + "epoch": 4.620644312952005, + "high_lr": 7.526315789473684e-05, + "low_lr": 1.5052631578947369e-06, + "step": 1757 + }, + { + "epoch": 4.620644312952005, + "high_lr": 7.526315789473684e-05, + "low_lr": 1.5052631578947369e-06, + "step": 1757 + }, + { + "epoch": 4.6232741617357, + "grad_norm": 1.6097984313964844, + "learning_rate": 7.473684210526316e-05, + "loss": 1.2019, + "step": 1758 + }, + { + "epoch": 4.6232741617357, + "high_lr": 7.473684210526316e-05, + "low_lr": 1.4947368421052632e-06, + "step": 1758 + }, + { + "epoch": 4.6232741617357, + "high_lr": 7.473684210526316e-05, + "low_lr": 1.4947368421052632e-06, + "step": 1758 + }, + { + "epoch": 4.6232741617357, + "high_lr": 7.473684210526316e-05, + "low_lr": 1.4947368421052632e-06, + "step": 1758 + }, + { + "epoch": 4.6232741617357, + "high_lr": 7.473684210526316e-05, + "low_lr": 1.4947368421052632e-06, + "step": 1758 + }, + { + "epoch": 4.6232741617357, + "high_lr": 7.473684210526316e-05, + "low_lr": 1.4947368421052632e-06, + "step": 1758 + }, + { + "epoch": 4.6232741617357, + "high_lr": 7.473684210526316e-05, + "low_lr": 1.4947368421052632e-06, + "step": 1758 + }, + { + "epoch": 4.6232741617357, + "high_lr": 7.473684210526316e-05, + "low_lr": 1.4947368421052632e-06, + "step": 1758 + }, + { + "epoch": 4.6232741617357, + "high_lr": 7.473684210526316e-05, + "low_lr": 1.4947368421052632e-06, + "step": 1758 + }, + { + "epoch": 4.625904010519395, + "grad_norm": 1.4939899444580078, + "learning_rate": 7.421052631578948e-05, + "loss": 1.1483, + "step": 1759 + }, + { + "epoch": 4.625904010519395, + "high_lr": 7.421052631578948e-05, + "low_lr": 1.4842105263157897e-06, + "step": 1759 + }, + { + "epoch": 4.625904010519395, + "high_lr": 7.421052631578948e-05, + "low_lr": 1.4842105263157897e-06, + "step": 1759 + }, + { + "epoch": 4.625904010519395, + "high_lr": 7.421052631578948e-05, + "low_lr": 1.4842105263157897e-06, + "step": 1759 + }, + { + "epoch": 4.625904010519395, + "high_lr": 7.421052631578948e-05, + "low_lr": 1.4842105263157897e-06, + "step": 1759 + }, + { + "epoch": 4.625904010519395, + "high_lr": 7.421052631578948e-05, + "low_lr": 1.4842105263157897e-06, + "step": 1759 + }, + { + "epoch": 4.625904010519395, + "high_lr": 7.421052631578948e-05, + "low_lr": 1.4842105263157897e-06, + "step": 1759 + }, + { + "epoch": 4.625904010519395, + "high_lr": 7.421052631578948e-05, + "low_lr": 1.4842105263157897e-06, + "step": 1759 + }, + { + "epoch": 4.625904010519395, + "high_lr": 7.421052631578948e-05, + "low_lr": 1.4842105263157897e-06, + "step": 1759 + }, + { + "epoch": 4.62853385930309, + "grad_norm": 1.5386459827423096, + "learning_rate": 7.368421052631579e-05, + "loss": 1.266, + "step": 1760 + }, + { + "epoch": 4.62853385930309, + "high_lr": 7.368421052631579e-05, + "low_lr": 1.4736842105263159e-06, + "step": 1760 + }, + { + "epoch": 4.62853385930309, + "high_lr": 7.368421052631579e-05, + "low_lr": 1.4736842105263159e-06, + "step": 1760 + }, + { + "epoch": 4.62853385930309, + "high_lr": 7.368421052631579e-05, + "low_lr": 1.4736842105263159e-06, + "step": 1760 + }, + { + "epoch": 4.62853385930309, + "high_lr": 7.368421052631579e-05, + "low_lr": 1.4736842105263159e-06, + "step": 1760 + }, + { + "epoch": 4.62853385930309, + "high_lr": 7.368421052631579e-05, + "low_lr": 1.4736842105263159e-06, + "step": 1760 + }, + { + "epoch": 4.62853385930309, + "high_lr": 7.368421052631579e-05, + "low_lr": 1.4736842105263159e-06, + "step": 1760 + }, + { + "epoch": 4.62853385930309, + "high_lr": 7.368421052631579e-05, + "low_lr": 1.4736842105263159e-06, + "step": 1760 + }, + { + "epoch": 4.62853385930309, + "high_lr": 7.368421052631579e-05, + "low_lr": 1.4736842105263159e-06, + "step": 1760 + }, + { + "epoch": 4.631163708086785, + "grad_norm": 1.3842270374298096, + "learning_rate": 7.31578947368421e-05, + "loss": 1.2, + "step": 1761 + }, + { + "epoch": 4.631163708086785, + "high_lr": 7.31578947368421e-05, + "low_lr": 1.4631578947368422e-06, + "step": 1761 + }, + { + "epoch": 4.631163708086785, + "high_lr": 7.31578947368421e-05, + "low_lr": 1.4631578947368422e-06, + "step": 1761 + }, + { + "epoch": 4.631163708086785, + "high_lr": 7.31578947368421e-05, + "low_lr": 1.4631578947368422e-06, + "step": 1761 + }, + { + "epoch": 4.631163708086785, + "high_lr": 7.31578947368421e-05, + "low_lr": 1.4631578947368422e-06, + "step": 1761 + }, + { + "epoch": 4.631163708086785, + "high_lr": 7.31578947368421e-05, + "low_lr": 1.4631578947368422e-06, + "step": 1761 + }, + { + "epoch": 4.631163708086785, + "high_lr": 7.31578947368421e-05, + "low_lr": 1.4631578947368422e-06, + "step": 1761 + }, + { + "epoch": 4.631163708086785, + "high_lr": 7.31578947368421e-05, + "low_lr": 1.4631578947368422e-06, + "step": 1761 + }, + { + "epoch": 4.631163708086785, + "high_lr": 7.31578947368421e-05, + "low_lr": 1.4631578947368422e-06, + "step": 1761 + }, + { + "epoch": 4.63379355687048, + "grad_norm": 1.5356643199920654, + "learning_rate": 7.263157894736843e-05, + "loss": 1.2668, + "step": 1762 + }, + { + "epoch": 4.63379355687048, + "high_lr": 7.263157894736843e-05, + "low_lr": 1.4526315789473685e-06, + "step": 1762 + }, + { + "epoch": 4.63379355687048, + "high_lr": 7.263157894736843e-05, + "low_lr": 1.4526315789473685e-06, + "step": 1762 + }, + { + "epoch": 4.63379355687048, + "high_lr": 7.263157894736843e-05, + "low_lr": 1.4526315789473685e-06, + "step": 1762 + }, + { + "epoch": 4.63379355687048, + "high_lr": 7.263157894736843e-05, + "low_lr": 1.4526315789473685e-06, + "step": 1762 + }, + { + "epoch": 4.63379355687048, + "high_lr": 7.263157894736843e-05, + "low_lr": 1.4526315789473685e-06, + "step": 1762 + }, + { + "epoch": 4.63379355687048, + "high_lr": 7.263157894736843e-05, + "low_lr": 1.4526315789473685e-06, + "step": 1762 + }, + { + "epoch": 4.63379355687048, + "high_lr": 7.263157894736843e-05, + "low_lr": 1.4526315789473685e-06, + "step": 1762 + }, + { + "epoch": 4.63379355687048, + "high_lr": 7.263157894736843e-05, + "low_lr": 1.4526315789473685e-06, + "step": 1762 + }, + { + "epoch": 4.636423405654175, + "grad_norm": 1.7029571533203125, + "learning_rate": 7.210526315789474e-05, + "loss": 1.2446, + "step": 1763 + }, + { + "epoch": 4.636423405654175, + "high_lr": 7.210526315789474e-05, + "low_lr": 1.442105263157895e-06, + "step": 1763 + }, + { + "epoch": 4.636423405654175, + "high_lr": 7.210526315789474e-05, + "low_lr": 1.442105263157895e-06, + "step": 1763 + }, + { + "epoch": 4.636423405654175, + "high_lr": 7.210526315789474e-05, + "low_lr": 1.442105263157895e-06, + "step": 1763 + }, + { + "epoch": 4.636423405654175, + "high_lr": 7.210526315789474e-05, + "low_lr": 1.442105263157895e-06, + "step": 1763 + }, + { + "epoch": 4.636423405654175, + "high_lr": 7.210526315789474e-05, + "low_lr": 1.442105263157895e-06, + "step": 1763 + }, + { + "epoch": 4.636423405654175, + "high_lr": 7.210526315789474e-05, + "low_lr": 1.442105263157895e-06, + "step": 1763 + }, + { + "epoch": 4.636423405654175, + "high_lr": 7.210526315789474e-05, + "low_lr": 1.442105263157895e-06, + "step": 1763 + }, + { + "epoch": 4.636423405654175, + "high_lr": 7.210526315789474e-05, + "low_lr": 1.442105263157895e-06, + "step": 1763 + }, + { + "epoch": 4.6390532544378695, + "grad_norm": 1.6327396631240845, + "learning_rate": 7.157894736842105e-05, + "loss": 1.1984, + "step": 1764 + }, + { + "epoch": 4.6390532544378695, + "high_lr": 7.157894736842105e-05, + "low_lr": 1.4315789473684212e-06, + "step": 1764 + }, + { + "epoch": 4.6390532544378695, + "high_lr": 7.157894736842105e-05, + "low_lr": 1.4315789473684212e-06, + "step": 1764 + }, + { + "epoch": 4.6390532544378695, + "high_lr": 7.157894736842105e-05, + "low_lr": 1.4315789473684212e-06, + "step": 1764 + }, + { + "epoch": 4.6390532544378695, + "high_lr": 7.157894736842105e-05, + "low_lr": 1.4315789473684212e-06, + "step": 1764 + }, + { + "epoch": 4.6390532544378695, + "high_lr": 7.157894736842105e-05, + "low_lr": 1.4315789473684212e-06, + "step": 1764 + }, + { + "epoch": 4.6390532544378695, + "high_lr": 7.157894736842105e-05, + "low_lr": 1.4315789473684212e-06, + "step": 1764 + }, + { + "epoch": 4.6390532544378695, + "high_lr": 7.157894736842105e-05, + "low_lr": 1.4315789473684212e-06, + "step": 1764 + }, + { + "epoch": 4.6390532544378695, + "high_lr": 7.157894736842105e-05, + "low_lr": 1.4315789473684212e-06, + "step": 1764 + }, + { + "epoch": 4.641683103221565, + "grad_norm": 1.6883735656738281, + "learning_rate": 7.105263157894737e-05, + "loss": 1.2464, + "step": 1765 + }, + { + "epoch": 4.641683103221565, + "high_lr": 7.105263157894737e-05, + "low_lr": 1.4210526315789475e-06, + "step": 1765 + }, + { + "epoch": 4.641683103221565, + "high_lr": 7.105263157894737e-05, + "low_lr": 1.4210526315789475e-06, + "step": 1765 + }, + { + "epoch": 4.641683103221565, + "high_lr": 7.105263157894737e-05, + "low_lr": 1.4210526315789475e-06, + "step": 1765 + }, + { + "epoch": 4.641683103221565, + "high_lr": 7.105263157894737e-05, + "low_lr": 1.4210526315789475e-06, + "step": 1765 + }, + { + "epoch": 4.641683103221565, + "high_lr": 7.105263157894737e-05, + "low_lr": 1.4210526315789475e-06, + "step": 1765 + }, + { + "epoch": 4.641683103221565, + "high_lr": 7.105263157894737e-05, + "low_lr": 1.4210526315789475e-06, + "step": 1765 + }, + { + "epoch": 4.641683103221565, + "high_lr": 7.105263157894737e-05, + "low_lr": 1.4210526315789475e-06, + "step": 1765 + }, + { + "epoch": 4.641683103221565, + "high_lr": 7.105263157894737e-05, + "low_lr": 1.4210526315789475e-06, + "step": 1765 + }, + { + "epoch": 4.64431295200526, + "grad_norm": 1.5838812589645386, + "learning_rate": 7.05263157894737e-05, + "loss": 1.2236, + "step": 1766 + }, + { + "epoch": 4.64431295200526, + "high_lr": 7.05263157894737e-05, + "low_lr": 1.4105263157894738e-06, + "step": 1766 + }, + { + "epoch": 4.64431295200526, + "high_lr": 7.05263157894737e-05, + "low_lr": 1.4105263157894738e-06, + "step": 1766 + }, + { + "epoch": 4.64431295200526, + "high_lr": 7.05263157894737e-05, + "low_lr": 1.4105263157894738e-06, + "step": 1766 + }, + { + "epoch": 4.64431295200526, + "high_lr": 7.05263157894737e-05, + "low_lr": 1.4105263157894738e-06, + "step": 1766 + }, + { + "epoch": 4.64431295200526, + "high_lr": 7.05263157894737e-05, + "low_lr": 1.4105263157894738e-06, + "step": 1766 + }, + { + "epoch": 4.64431295200526, + "high_lr": 7.05263157894737e-05, + "low_lr": 1.4105263157894738e-06, + "step": 1766 + }, + { + "epoch": 4.64431295200526, + "high_lr": 7.05263157894737e-05, + "low_lr": 1.4105263157894738e-06, + "step": 1766 + }, + { + "epoch": 4.64431295200526, + "high_lr": 7.05263157894737e-05, + "low_lr": 1.4105263157894738e-06, + "step": 1766 + }, + { + "epoch": 4.646942800788954, + "grad_norm": 1.6791688203811646, + "learning_rate": 7.000000000000001e-05, + "loss": 1.1848, + "step": 1767 + }, + { + "epoch": 4.646942800788954, + "high_lr": 7.000000000000001e-05, + "low_lr": 1.4000000000000001e-06, + "step": 1767 + }, + { + "epoch": 4.646942800788954, + "high_lr": 7.000000000000001e-05, + "low_lr": 1.4000000000000001e-06, + "step": 1767 + }, + { + "epoch": 4.646942800788954, + "high_lr": 7.000000000000001e-05, + "low_lr": 1.4000000000000001e-06, + "step": 1767 + }, + { + "epoch": 4.646942800788954, + "high_lr": 7.000000000000001e-05, + "low_lr": 1.4000000000000001e-06, + "step": 1767 + }, + { + "epoch": 4.646942800788954, + "high_lr": 7.000000000000001e-05, + "low_lr": 1.4000000000000001e-06, + "step": 1767 + }, + { + "epoch": 4.646942800788954, + "high_lr": 7.000000000000001e-05, + "low_lr": 1.4000000000000001e-06, + "step": 1767 + }, + { + "epoch": 4.646942800788954, + "high_lr": 7.000000000000001e-05, + "low_lr": 1.4000000000000001e-06, + "step": 1767 + }, + { + "epoch": 4.646942800788954, + "high_lr": 7.000000000000001e-05, + "low_lr": 1.4000000000000001e-06, + "step": 1767 + }, + { + "epoch": 4.64957264957265, + "grad_norm": 1.6748144626617432, + "learning_rate": 6.947368421052632e-05, + "loss": 1.1814, + "step": 1768 + }, + { + "epoch": 4.64957264957265, + "high_lr": 6.947368421052632e-05, + "low_lr": 1.3894736842105263e-06, + "step": 1768 + }, + { + "epoch": 4.64957264957265, + "high_lr": 6.947368421052632e-05, + "low_lr": 1.3894736842105263e-06, + "step": 1768 + }, + { + "epoch": 4.64957264957265, + "high_lr": 6.947368421052632e-05, + "low_lr": 1.3894736842105263e-06, + "step": 1768 + }, + { + "epoch": 4.64957264957265, + "high_lr": 6.947368421052632e-05, + "low_lr": 1.3894736842105263e-06, + "step": 1768 + }, + { + "epoch": 4.64957264957265, + "high_lr": 6.947368421052632e-05, + "low_lr": 1.3894736842105263e-06, + "step": 1768 + }, + { + "epoch": 4.64957264957265, + "high_lr": 6.947368421052632e-05, + "low_lr": 1.3894736842105263e-06, + "step": 1768 + }, + { + "epoch": 4.64957264957265, + "high_lr": 6.947368421052632e-05, + "low_lr": 1.3894736842105263e-06, + "step": 1768 + }, + { + "epoch": 4.64957264957265, + "high_lr": 6.947368421052632e-05, + "low_lr": 1.3894736842105263e-06, + "step": 1768 + }, + { + "epoch": 4.652202498356345, + "grad_norm": 1.42503821849823, + "learning_rate": 6.894736842105263e-05, + "loss": 1.2064, + "step": 1769 + }, + { + "epoch": 4.652202498356345, + "high_lr": 6.894736842105263e-05, + "low_lr": 1.3789473684210528e-06, + "step": 1769 + }, + { + "epoch": 4.652202498356345, + "high_lr": 6.894736842105263e-05, + "low_lr": 1.3789473684210528e-06, + "step": 1769 + }, + { + "epoch": 4.652202498356345, + "high_lr": 6.894736842105263e-05, + "low_lr": 1.3789473684210528e-06, + "step": 1769 + }, + { + "epoch": 4.652202498356345, + "high_lr": 6.894736842105263e-05, + "low_lr": 1.3789473684210528e-06, + "step": 1769 + }, + { + "epoch": 4.652202498356345, + "high_lr": 6.894736842105263e-05, + "low_lr": 1.3789473684210528e-06, + "step": 1769 + }, + { + "epoch": 4.652202498356345, + "high_lr": 6.894736842105263e-05, + "low_lr": 1.3789473684210528e-06, + "step": 1769 + }, + { + "epoch": 4.652202498356345, + "high_lr": 6.894736842105263e-05, + "low_lr": 1.3789473684210528e-06, + "step": 1769 + }, + { + "epoch": 4.652202498356345, + "high_lr": 6.894736842105263e-05, + "low_lr": 1.3789473684210528e-06, + "step": 1769 + }, + { + "epoch": 4.654832347140039, + "grad_norm": 1.7519937753677368, + "learning_rate": 6.842105263157896e-05, + "loss": 1.2512, + "step": 1770 + }, + { + "epoch": 4.654832347140039, + "high_lr": 6.842105263157896e-05, + "low_lr": 1.3684210526315791e-06, + "step": 1770 + }, + { + "epoch": 4.654832347140039, + "high_lr": 6.842105263157896e-05, + "low_lr": 1.3684210526315791e-06, + "step": 1770 + }, + { + "epoch": 4.654832347140039, + "high_lr": 6.842105263157896e-05, + "low_lr": 1.3684210526315791e-06, + "step": 1770 + }, + { + "epoch": 4.654832347140039, + "high_lr": 6.842105263157896e-05, + "low_lr": 1.3684210526315791e-06, + "step": 1770 + }, + { + "epoch": 4.654832347140039, + "high_lr": 6.842105263157896e-05, + "low_lr": 1.3684210526315791e-06, + "step": 1770 + }, + { + "epoch": 4.654832347140039, + "high_lr": 6.842105263157896e-05, + "low_lr": 1.3684210526315791e-06, + "step": 1770 + }, + { + "epoch": 4.654832347140039, + "high_lr": 6.842105263157896e-05, + "low_lr": 1.3684210526315791e-06, + "step": 1770 + }, + { + "epoch": 4.654832347140039, + "high_lr": 6.842105263157896e-05, + "low_lr": 1.3684210526315791e-06, + "step": 1770 + }, + { + "epoch": 4.657462195923735, + "grad_norm": 1.4318557977676392, + "learning_rate": 6.789473684210526e-05, + "loss": 1.2424, + "step": 1771 + }, + { + "epoch": 4.657462195923735, + "high_lr": 6.789473684210526e-05, + "low_lr": 1.3578947368421052e-06, + "step": 1771 + }, + { + "epoch": 4.657462195923735, + "high_lr": 6.789473684210526e-05, + "low_lr": 1.3578947368421052e-06, + "step": 1771 + }, + { + "epoch": 4.657462195923735, + "high_lr": 6.789473684210526e-05, + "low_lr": 1.3578947368421052e-06, + "step": 1771 + }, + { + "epoch": 4.657462195923735, + "high_lr": 6.789473684210526e-05, + "low_lr": 1.3578947368421052e-06, + "step": 1771 + }, + { + "epoch": 4.657462195923735, + "high_lr": 6.789473684210526e-05, + "low_lr": 1.3578947368421052e-06, + "step": 1771 + }, + { + "epoch": 4.657462195923735, + "high_lr": 6.789473684210526e-05, + "low_lr": 1.3578947368421052e-06, + "step": 1771 + }, + { + "epoch": 4.657462195923735, + "high_lr": 6.789473684210526e-05, + "low_lr": 1.3578947368421052e-06, + "step": 1771 + }, + { + "epoch": 4.657462195923735, + "high_lr": 6.789473684210526e-05, + "low_lr": 1.3578947368421052e-06, + "step": 1771 + }, + { + "epoch": 4.660092044707429, + "grad_norm": 1.539652705192566, + "learning_rate": 6.736842105263157e-05, + "loss": 1.2336, + "step": 1772 + }, + { + "epoch": 4.660092044707429, + "high_lr": 6.736842105263157e-05, + "low_lr": 1.3473684210526316e-06, + "step": 1772 + }, + { + "epoch": 4.660092044707429, + "high_lr": 6.736842105263157e-05, + "low_lr": 1.3473684210526316e-06, + "step": 1772 + }, + { + "epoch": 4.660092044707429, + "high_lr": 6.736842105263157e-05, + "low_lr": 1.3473684210526316e-06, + "step": 1772 + }, + { + "epoch": 4.660092044707429, + "high_lr": 6.736842105263157e-05, + "low_lr": 1.3473684210526316e-06, + "step": 1772 + }, + { + "epoch": 4.660092044707429, + "high_lr": 6.736842105263157e-05, + "low_lr": 1.3473684210526316e-06, + "step": 1772 + }, + { + "epoch": 4.660092044707429, + "high_lr": 6.736842105263157e-05, + "low_lr": 1.3473684210526316e-06, + "step": 1772 + }, + { + "epoch": 4.660092044707429, + "high_lr": 6.736842105263157e-05, + "low_lr": 1.3473684210526316e-06, + "step": 1772 + }, + { + "epoch": 4.660092044707429, + "high_lr": 6.736842105263157e-05, + "low_lr": 1.3473684210526316e-06, + "step": 1772 + }, + { + "epoch": 4.662721893491124, + "grad_norm": 1.5103962421417236, + "learning_rate": 6.68421052631579e-05, + "loss": 1.2183, + "step": 1773 + }, + { + "epoch": 4.662721893491124, + "high_lr": 6.68421052631579e-05, + "low_lr": 1.3368421052631581e-06, + "step": 1773 + }, + { + "epoch": 4.662721893491124, + "high_lr": 6.68421052631579e-05, + "low_lr": 1.3368421052631581e-06, + "step": 1773 + }, + { + "epoch": 4.662721893491124, + "high_lr": 6.68421052631579e-05, + "low_lr": 1.3368421052631581e-06, + "step": 1773 + }, + { + "epoch": 4.662721893491124, + "high_lr": 6.68421052631579e-05, + "low_lr": 1.3368421052631581e-06, + "step": 1773 + }, + { + "epoch": 4.662721893491124, + "high_lr": 6.68421052631579e-05, + "low_lr": 1.3368421052631581e-06, + "step": 1773 + }, + { + "epoch": 4.662721893491124, + "high_lr": 6.68421052631579e-05, + "low_lr": 1.3368421052631581e-06, + "step": 1773 + }, + { + "epoch": 4.662721893491124, + "high_lr": 6.68421052631579e-05, + "low_lr": 1.3368421052631581e-06, + "step": 1773 + }, + { + "epoch": 4.662721893491124, + "high_lr": 6.68421052631579e-05, + "low_lr": 1.3368421052631581e-06, + "step": 1773 + }, + { + "epoch": 4.665351742274819, + "grad_norm": 1.577190637588501, + "learning_rate": 6.631578947368421e-05, + "loss": 1.2445, + "step": 1774 + }, + { + "epoch": 4.665351742274819, + "high_lr": 6.631578947368421e-05, + "low_lr": 1.3263157894736844e-06, + "step": 1774 + }, + { + "epoch": 4.665351742274819, + "high_lr": 6.631578947368421e-05, + "low_lr": 1.3263157894736844e-06, + "step": 1774 + }, + { + "epoch": 4.665351742274819, + "high_lr": 6.631578947368421e-05, + "low_lr": 1.3263157894736844e-06, + "step": 1774 + }, + { + "epoch": 4.665351742274819, + "high_lr": 6.631578947368421e-05, + "low_lr": 1.3263157894736844e-06, + "step": 1774 + }, + { + "epoch": 4.665351742274819, + "high_lr": 6.631578947368421e-05, + "low_lr": 1.3263157894736844e-06, + "step": 1774 + }, + { + "epoch": 4.665351742274819, + "high_lr": 6.631578947368421e-05, + "low_lr": 1.3263157894736844e-06, + "step": 1774 + }, + { + "epoch": 4.665351742274819, + "high_lr": 6.631578947368421e-05, + "low_lr": 1.3263157894736844e-06, + "step": 1774 + }, + { + "epoch": 4.665351742274819, + "high_lr": 6.631578947368421e-05, + "low_lr": 1.3263157894736844e-06, + "step": 1774 + }, + { + "epoch": 4.667981591058514, + "grad_norm": 1.4346929788589478, + "learning_rate": 6.578947368421052e-05, + "loss": 1.1832, + "step": 1775 + }, + { + "epoch": 4.667981591058514, + "high_lr": 6.578947368421052e-05, + "low_lr": 1.3157894736842106e-06, + "step": 1775 + }, + { + "epoch": 4.667981591058514, + "high_lr": 6.578947368421052e-05, + "low_lr": 1.3157894736842106e-06, + "step": 1775 + }, + { + "epoch": 4.667981591058514, + "high_lr": 6.578947368421052e-05, + "low_lr": 1.3157894736842106e-06, + "step": 1775 + }, + { + "epoch": 4.667981591058514, + "high_lr": 6.578947368421052e-05, + "low_lr": 1.3157894736842106e-06, + "step": 1775 + }, + { + "epoch": 4.667981591058514, + "high_lr": 6.578947368421052e-05, + "low_lr": 1.3157894736842106e-06, + "step": 1775 + }, + { + "epoch": 4.667981591058514, + "high_lr": 6.578947368421052e-05, + "low_lr": 1.3157894736842106e-06, + "step": 1775 + }, + { + "epoch": 4.667981591058514, + "high_lr": 6.578947368421052e-05, + "low_lr": 1.3157894736842106e-06, + "step": 1775 + }, + { + "epoch": 4.667981591058514, + "high_lr": 6.578947368421052e-05, + "low_lr": 1.3157894736842106e-06, + "step": 1775 + }, + { + "epoch": 4.670611439842209, + "grad_norm": 1.6455739736557007, + "learning_rate": 6.526315789473684e-05, + "loss": 1.1984, + "step": 1776 + }, + { + "epoch": 4.670611439842209, + "high_lr": 6.526315789473684e-05, + "low_lr": 1.3052631578947369e-06, + "step": 1776 + }, + { + "epoch": 4.670611439842209, + "high_lr": 6.526315789473684e-05, + "low_lr": 1.3052631578947369e-06, + "step": 1776 + }, + { + "epoch": 4.670611439842209, + "high_lr": 6.526315789473684e-05, + "low_lr": 1.3052631578947369e-06, + "step": 1776 + }, + { + "epoch": 4.670611439842209, + "high_lr": 6.526315789473684e-05, + "low_lr": 1.3052631578947369e-06, + "step": 1776 + }, + { + "epoch": 4.670611439842209, + "high_lr": 6.526315789473684e-05, + "low_lr": 1.3052631578947369e-06, + "step": 1776 + }, + { + "epoch": 4.670611439842209, + "high_lr": 6.526315789473684e-05, + "low_lr": 1.3052631578947369e-06, + "step": 1776 + }, + { + "epoch": 4.670611439842209, + "high_lr": 6.526315789473684e-05, + "low_lr": 1.3052631578947369e-06, + "step": 1776 + }, + { + "epoch": 4.670611439842209, + "high_lr": 6.526315789473684e-05, + "low_lr": 1.3052631578947369e-06, + "step": 1776 + }, + { + "epoch": 4.6732412886259045, + "grad_norm": 1.6151623725891113, + "learning_rate": 6.473684210526316e-05, + "loss": 1.2542, + "step": 1777 + }, + { + "epoch": 4.6732412886259045, + "high_lr": 6.473684210526316e-05, + "low_lr": 1.2947368421052634e-06, + "step": 1777 + }, + { + "epoch": 4.6732412886259045, + "high_lr": 6.473684210526316e-05, + "low_lr": 1.2947368421052634e-06, + "step": 1777 + }, + { + "epoch": 4.6732412886259045, + "high_lr": 6.473684210526316e-05, + "low_lr": 1.2947368421052634e-06, + "step": 1777 + }, + { + "epoch": 4.6732412886259045, + "high_lr": 6.473684210526316e-05, + "low_lr": 1.2947368421052634e-06, + "step": 1777 + }, + { + "epoch": 4.6732412886259045, + "high_lr": 6.473684210526316e-05, + "low_lr": 1.2947368421052634e-06, + "step": 1777 + }, + { + "epoch": 4.6732412886259045, + "high_lr": 6.473684210526316e-05, + "low_lr": 1.2947368421052634e-06, + "step": 1777 + }, + { + "epoch": 4.6732412886259045, + "high_lr": 6.473684210526316e-05, + "low_lr": 1.2947368421052634e-06, + "step": 1777 + }, + { + "epoch": 4.6732412886259045, + "high_lr": 6.473684210526316e-05, + "low_lr": 1.2947368421052634e-06, + "step": 1777 + }, + { + "epoch": 4.675871137409599, + "grad_norm": 1.5017870664596558, + "learning_rate": 6.421052631578946e-05, + "loss": 1.2247, + "step": 1778 + }, + { + "epoch": 4.675871137409599, + "high_lr": 6.421052631578946e-05, + "low_lr": 1.2842105263157895e-06, + "step": 1778 + }, + { + "epoch": 4.675871137409599, + "high_lr": 6.421052631578946e-05, + "low_lr": 1.2842105263157895e-06, + "step": 1778 + }, + { + "epoch": 4.675871137409599, + "high_lr": 6.421052631578946e-05, + "low_lr": 1.2842105263157895e-06, + "step": 1778 + }, + { + "epoch": 4.675871137409599, + "high_lr": 6.421052631578946e-05, + "low_lr": 1.2842105263157895e-06, + "step": 1778 + }, + { + "epoch": 4.675871137409599, + "high_lr": 6.421052631578946e-05, + "low_lr": 1.2842105263157895e-06, + "step": 1778 + }, + { + "epoch": 4.675871137409599, + "high_lr": 6.421052631578946e-05, + "low_lr": 1.2842105263157895e-06, + "step": 1778 + }, + { + "epoch": 4.675871137409599, + "high_lr": 6.421052631578946e-05, + "low_lr": 1.2842105263157895e-06, + "step": 1778 + }, + { + "epoch": 4.675871137409599, + "high_lr": 6.421052631578946e-05, + "low_lr": 1.2842105263157895e-06, + "step": 1778 + }, + { + "epoch": 4.678500986193294, + "grad_norm": 1.5715364217758179, + "learning_rate": 6.368421052631579e-05, + "loss": 1.208, + "step": 1779 + }, + { + "epoch": 4.678500986193294, + "high_lr": 6.368421052631579e-05, + "low_lr": 1.2736842105263159e-06, + "step": 1779 + }, + { + "epoch": 4.678500986193294, + "high_lr": 6.368421052631579e-05, + "low_lr": 1.2736842105263159e-06, + "step": 1779 + }, + { + "epoch": 4.678500986193294, + "high_lr": 6.368421052631579e-05, + "low_lr": 1.2736842105263159e-06, + "step": 1779 + }, + { + "epoch": 4.678500986193294, + "high_lr": 6.368421052631579e-05, + "low_lr": 1.2736842105263159e-06, + "step": 1779 + }, + { + "epoch": 4.678500986193294, + "high_lr": 6.368421052631579e-05, + "low_lr": 1.2736842105263159e-06, + "step": 1779 + }, + { + "epoch": 4.678500986193294, + "high_lr": 6.368421052631579e-05, + "low_lr": 1.2736842105263159e-06, + "step": 1779 + }, + { + "epoch": 4.678500986193294, + "high_lr": 6.368421052631579e-05, + "low_lr": 1.2736842105263159e-06, + "step": 1779 + }, + { + "epoch": 4.678500986193294, + "high_lr": 6.368421052631579e-05, + "low_lr": 1.2736842105263159e-06, + "step": 1779 + }, + { + "epoch": 4.6811308349769885, + "grad_norm": 1.5146335363388062, + "learning_rate": 6.31578947368421e-05, + "loss": 1.2493, + "step": 1780 + }, + { + "epoch": 4.6811308349769885, + "high_lr": 6.31578947368421e-05, + "low_lr": 1.2631578947368422e-06, + "step": 1780 + }, + { + "epoch": 4.6811308349769885, + "high_lr": 6.31578947368421e-05, + "low_lr": 1.2631578947368422e-06, + "step": 1780 + }, + { + "epoch": 4.6811308349769885, + "high_lr": 6.31578947368421e-05, + "low_lr": 1.2631578947368422e-06, + "step": 1780 + }, + { + "epoch": 4.6811308349769885, + "high_lr": 6.31578947368421e-05, + "low_lr": 1.2631578947368422e-06, + "step": 1780 + }, + { + "epoch": 4.6811308349769885, + "high_lr": 6.31578947368421e-05, + "low_lr": 1.2631578947368422e-06, + "step": 1780 + }, + { + "epoch": 4.6811308349769885, + "high_lr": 6.31578947368421e-05, + "low_lr": 1.2631578947368422e-06, + "step": 1780 + }, + { + "epoch": 4.6811308349769885, + "high_lr": 6.31578947368421e-05, + "low_lr": 1.2631578947368422e-06, + "step": 1780 + }, + { + "epoch": 4.6811308349769885, + "high_lr": 6.31578947368421e-05, + "low_lr": 1.2631578947368422e-06, + "step": 1780 + }, + { + "epoch": 4.683760683760684, + "grad_norm": 1.4685746431350708, + "learning_rate": 6.263157894736843e-05, + "loss": 1.2214, + "step": 1781 + }, + { + "epoch": 4.683760683760684, + "high_lr": 6.263157894736843e-05, + "low_lr": 1.2526315789473687e-06, + "step": 1781 + }, + { + "epoch": 4.683760683760684, + "high_lr": 6.263157894736843e-05, + "low_lr": 1.2526315789473687e-06, + "step": 1781 + }, + { + "epoch": 4.683760683760684, + "high_lr": 6.263157894736843e-05, + "low_lr": 1.2526315789473687e-06, + "step": 1781 + }, + { + "epoch": 4.683760683760684, + "high_lr": 6.263157894736843e-05, + "low_lr": 1.2526315789473687e-06, + "step": 1781 + }, + { + "epoch": 4.683760683760684, + "high_lr": 6.263157894736843e-05, + "low_lr": 1.2526315789473687e-06, + "step": 1781 + }, + { + "epoch": 4.683760683760684, + "high_lr": 6.263157894736843e-05, + "low_lr": 1.2526315789473687e-06, + "step": 1781 + }, + { + "epoch": 4.683760683760684, + "high_lr": 6.263157894736843e-05, + "low_lr": 1.2526315789473687e-06, + "step": 1781 + }, + { + "epoch": 4.683760683760684, + "high_lr": 6.263157894736843e-05, + "low_lr": 1.2526315789473687e-06, + "step": 1781 + }, + { + "epoch": 4.686390532544379, + "grad_norm": 1.6440409421920776, + "learning_rate": 6.210526315789474e-05, + "loss": 1.1739, + "step": 1782 + }, + { + "epoch": 4.686390532544379, + "high_lr": 6.210526315789474e-05, + "low_lr": 1.2421052631578948e-06, + "step": 1782 + }, + { + "epoch": 4.686390532544379, + "high_lr": 6.210526315789474e-05, + "low_lr": 1.2421052631578948e-06, + "step": 1782 + }, + { + "epoch": 4.686390532544379, + "high_lr": 6.210526315789474e-05, + "low_lr": 1.2421052631578948e-06, + "step": 1782 + }, + { + "epoch": 4.686390532544379, + "high_lr": 6.210526315789474e-05, + "low_lr": 1.2421052631578948e-06, + "step": 1782 + }, + { + "epoch": 4.686390532544379, + "high_lr": 6.210526315789474e-05, + "low_lr": 1.2421052631578948e-06, + "step": 1782 + }, + { + "epoch": 4.686390532544379, + "high_lr": 6.210526315789474e-05, + "low_lr": 1.2421052631578948e-06, + "step": 1782 + }, + { + "epoch": 4.686390532544379, + "high_lr": 6.210526315789474e-05, + "low_lr": 1.2421052631578948e-06, + "step": 1782 + }, + { + "epoch": 4.686390532544379, + "high_lr": 6.210526315789474e-05, + "low_lr": 1.2421052631578948e-06, + "step": 1782 + }, + { + "epoch": 4.689020381328073, + "grad_norm": 1.6437931060791016, + "learning_rate": 6.157894736842106e-05, + "loss": 1.2407, + "step": 1783 + }, + { + "epoch": 4.689020381328073, + "high_lr": 6.157894736842106e-05, + "low_lr": 1.2315789473684212e-06, + "step": 1783 + }, + { + "epoch": 4.689020381328073, + "high_lr": 6.157894736842106e-05, + "low_lr": 1.2315789473684212e-06, + "step": 1783 + }, + { + "epoch": 4.689020381328073, + "high_lr": 6.157894736842106e-05, + "low_lr": 1.2315789473684212e-06, + "step": 1783 + }, + { + "epoch": 4.689020381328073, + "high_lr": 6.157894736842106e-05, + "low_lr": 1.2315789473684212e-06, + "step": 1783 + }, + { + "epoch": 4.689020381328073, + "high_lr": 6.157894736842106e-05, + "low_lr": 1.2315789473684212e-06, + "step": 1783 + }, + { + "epoch": 4.689020381328073, + "high_lr": 6.157894736842106e-05, + "low_lr": 1.2315789473684212e-06, + "step": 1783 + }, + { + "epoch": 4.689020381328073, + "high_lr": 6.157894736842106e-05, + "low_lr": 1.2315789473684212e-06, + "step": 1783 + }, + { + "epoch": 4.689020381328073, + "high_lr": 6.157894736842106e-05, + "low_lr": 1.2315789473684212e-06, + "step": 1783 + }, + { + "epoch": 4.691650230111769, + "grad_norm": 1.6867625713348389, + "learning_rate": 6.105263157894737e-05, + "loss": 1.1678, + "step": 1784 + }, + { + "epoch": 4.691650230111769, + "high_lr": 6.105263157894737e-05, + "low_lr": 1.2210526315789475e-06, + "step": 1784 + }, + { + "epoch": 4.691650230111769, + "high_lr": 6.105263157894737e-05, + "low_lr": 1.2210526315789475e-06, + "step": 1784 + }, + { + "epoch": 4.691650230111769, + "high_lr": 6.105263157894737e-05, + "low_lr": 1.2210526315789475e-06, + "step": 1784 + }, + { + "epoch": 4.691650230111769, + "high_lr": 6.105263157894737e-05, + "low_lr": 1.2210526315789475e-06, + "step": 1784 + }, + { + "epoch": 4.691650230111769, + "high_lr": 6.105263157894737e-05, + "low_lr": 1.2210526315789475e-06, + "step": 1784 + }, + { + "epoch": 4.691650230111769, + "high_lr": 6.105263157894737e-05, + "low_lr": 1.2210526315789475e-06, + "step": 1784 + }, + { + "epoch": 4.691650230111769, + "high_lr": 6.105263157894737e-05, + "low_lr": 1.2210526315789475e-06, + "step": 1784 + }, + { + "epoch": 4.691650230111769, + "high_lr": 6.105263157894737e-05, + "low_lr": 1.2210526315789475e-06, + "step": 1784 + }, + { + "epoch": 4.6942800788954635, + "grad_norm": 1.5684926509857178, + "learning_rate": 6.052631578947369e-05, + "loss": 1.2521, + "step": 1785 + }, + { + "epoch": 4.6942800788954635, + "high_lr": 6.052631578947369e-05, + "low_lr": 1.2105263157894738e-06, + "step": 1785 + }, + { + "epoch": 4.6942800788954635, + "high_lr": 6.052631578947369e-05, + "low_lr": 1.2105263157894738e-06, + "step": 1785 + }, + { + "epoch": 4.6942800788954635, + "high_lr": 6.052631578947369e-05, + "low_lr": 1.2105263157894738e-06, + "step": 1785 + }, + { + "epoch": 4.6942800788954635, + "high_lr": 6.052631578947369e-05, + "low_lr": 1.2105263157894738e-06, + "step": 1785 + }, + { + "epoch": 4.6942800788954635, + "high_lr": 6.052631578947369e-05, + "low_lr": 1.2105263157894738e-06, + "step": 1785 + }, + { + "epoch": 4.6942800788954635, + "high_lr": 6.052631578947369e-05, + "low_lr": 1.2105263157894738e-06, + "step": 1785 + }, + { + "epoch": 4.6942800788954635, + "high_lr": 6.052631578947369e-05, + "low_lr": 1.2105263157894738e-06, + "step": 1785 + }, + { + "epoch": 4.6942800788954635, + "high_lr": 6.052631578947369e-05, + "low_lr": 1.2105263157894738e-06, + "step": 1785 + }, + { + "epoch": 4.696909927679158, + "grad_norm": 1.5172704458236694, + "learning_rate": 6e-05, + "loss": 1.2188, + "step": 1786 + }, + { + "epoch": 4.696909927679158, + "high_lr": 6e-05, + "low_lr": 1.2000000000000002e-06, + "step": 1786 + }, + { + "epoch": 4.696909927679158, + "high_lr": 6e-05, + "low_lr": 1.2000000000000002e-06, + "step": 1786 + }, + { + "epoch": 4.696909927679158, + "high_lr": 6e-05, + "low_lr": 1.2000000000000002e-06, + "step": 1786 + }, + { + "epoch": 4.696909927679158, + "high_lr": 6e-05, + "low_lr": 1.2000000000000002e-06, + "step": 1786 + }, + { + "epoch": 4.696909927679158, + "high_lr": 6e-05, + "low_lr": 1.2000000000000002e-06, + "step": 1786 + }, + { + "epoch": 4.696909927679158, + "high_lr": 6e-05, + "low_lr": 1.2000000000000002e-06, + "step": 1786 + }, + { + "epoch": 4.696909927679158, + "high_lr": 6e-05, + "low_lr": 1.2000000000000002e-06, + "step": 1786 + }, + { + "epoch": 4.696909927679158, + "high_lr": 6e-05, + "low_lr": 1.2000000000000002e-06, + "step": 1786 + }, + { + "epoch": 4.699539776462854, + "grad_norm": 1.485851764678955, + "learning_rate": 5.947368421052632e-05, + "loss": 1.1884, + "step": 1787 + }, + { + "epoch": 4.699539776462854, + "high_lr": 5.947368421052632e-05, + "low_lr": 1.1894736842105265e-06, + "step": 1787 + }, + { + "epoch": 4.699539776462854, + "high_lr": 5.947368421052632e-05, + "low_lr": 1.1894736842105265e-06, + "step": 1787 + }, + { + "epoch": 4.699539776462854, + "high_lr": 5.947368421052632e-05, + "low_lr": 1.1894736842105265e-06, + "step": 1787 + }, + { + "epoch": 4.699539776462854, + "high_lr": 5.947368421052632e-05, + "low_lr": 1.1894736842105265e-06, + "step": 1787 + }, + { + "epoch": 4.699539776462854, + "high_lr": 5.947368421052632e-05, + "low_lr": 1.1894736842105265e-06, + "step": 1787 + }, + { + "epoch": 4.699539776462854, + "high_lr": 5.947368421052632e-05, + "low_lr": 1.1894736842105265e-06, + "step": 1787 + }, + { + "epoch": 4.699539776462854, + "high_lr": 5.947368421052632e-05, + "low_lr": 1.1894736842105265e-06, + "step": 1787 + }, + { + "epoch": 4.699539776462854, + "high_lr": 5.947368421052632e-05, + "low_lr": 1.1894736842105265e-06, + "step": 1787 + }, + { + "epoch": 4.702169625246548, + "grad_norm": 1.542825698852539, + "learning_rate": 5.8947368421052634e-05, + "loss": 1.1931, + "step": 1788 + }, + { + "epoch": 4.702169625246548, + "high_lr": 5.8947368421052634e-05, + "low_lr": 1.1789473684210526e-06, + "step": 1788 + }, + { + "epoch": 4.702169625246548, + "high_lr": 5.8947368421052634e-05, + "low_lr": 1.1789473684210526e-06, + "step": 1788 + }, + { + "epoch": 4.702169625246548, + "high_lr": 5.8947368421052634e-05, + "low_lr": 1.1789473684210526e-06, + "step": 1788 + }, + { + "epoch": 4.702169625246548, + "high_lr": 5.8947368421052634e-05, + "low_lr": 1.1789473684210526e-06, + "step": 1788 + }, + { + "epoch": 4.702169625246548, + "high_lr": 5.8947368421052634e-05, + "low_lr": 1.1789473684210526e-06, + "step": 1788 + }, + { + "epoch": 4.702169625246548, + "high_lr": 5.8947368421052634e-05, + "low_lr": 1.1789473684210526e-06, + "step": 1788 + }, + { + "epoch": 4.702169625246548, + "high_lr": 5.8947368421052634e-05, + "low_lr": 1.1789473684210526e-06, + "step": 1788 + }, + { + "epoch": 4.702169625246548, + "high_lr": 5.8947368421052634e-05, + "low_lr": 1.1789473684210526e-06, + "step": 1788 + }, + { + "epoch": 4.704799474030243, + "grad_norm": 1.4580672979354858, + "learning_rate": 5.8421052631578954e-05, + "loss": 1.2192, + "step": 1789 + }, + { + "epoch": 4.704799474030243, + "high_lr": 5.8421052631578954e-05, + "low_lr": 1.1684210526315791e-06, + "step": 1789 + }, + { + "epoch": 4.704799474030243, + "high_lr": 5.8421052631578954e-05, + "low_lr": 1.1684210526315791e-06, + "step": 1789 + }, + { + "epoch": 4.704799474030243, + "high_lr": 5.8421052631578954e-05, + "low_lr": 1.1684210526315791e-06, + "step": 1789 + }, + { + "epoch": 4.704799474030243, + "high_lr": 5.8421052631578954e-05, + "low_lr": 1.1684210526315791e-06, + "step": 1789 + }, + { + "epoch": 4.704799474030243, + "high_lr": 5.8421052631578954e-05, + "low_lr": 1.1684210526315791e-06, + "step": 1789 + }, + { + "epoch": 4.704799474030243, + "high_lr": 5.8421052631578954e-05, + "low_lr": 1.1684210526315791e-06, + "step": 1789 + }, + { + "epoch": 4.704799474030243, + "high_lr": 5.8421052631578954e-05, + "low_lr": 1.1684210526315791e-06, + "step": 1789 + }, + { + "epoch": 4.704799474030243, + "high_lr": 5.8421052631578954e-05, + "low_lr": 1.1684210526315791e-06, + "step": 1789 + }, + { + "epoch": 4.7074293228139386, + "grad_norm": 1.5951370000839233, + "learning_rate": 5.789473684210527e-05, + "loss": 1.2283, + "step": 1790 + }, + { + "epoch": 4.7074293228139386, + "high_lr": 5.789473684210527e-05, + "low_lr": 1.1578947368421053e-06, + "step": 1790 + }, + { + "epoch": 4.7074293228139386, + "high_lr": 5.789473684210527e-05, + "low_lr": 1.1578947368421053e-06, + "step": 1790 + }, + { + "epoch": 4.7074293228139386, + "high_lr": 5.789473684210527e-05, + "low_lr": 1.1578947368421053e-06, + "step": 1790 + }, + { + "epoch": 4.7074293228139386, + "high_lr": 5.789473684210527e-05, + "low_lr": 1.1578947368421053e-06, + "step": 1790 + }, + { + "epoch": 4.7074293228139386, + "high_lr": 5.789473684210527e-05, + "low_lr": 1.1578947368421053e-06, + "step": 1790 + }, + { + "epoch": 4.7074293228139386, + "high_lr": 5.789473684210527e-05, + "low_lr": 1.1578947368421053e-06, + "step": 1790 + }, + { + "epoch": 4.7074293228139386, + "high_lr": 5.789473684210527e-05, + "low_lr": 1.1578947368421053e-06, + "step": 1790 + }, + { + "epoch": 4.7074293228139386, + "high_lr": 5.789473684210527e-05, + "low_lr": 1.1578947368421053e-06, + "step": 1790 + }, + { + "epoch": 4.710059171597633, + "grad_norm": 1.511790156364441, + "learning_rate": 5.736842105263158e-05, + "loss": 1.2441, + "step": 1791 + }, + { + "epoch": 4.710059171597633, + "high_lr": 5.736842105263158e-05, + "low_lr": 1.1473684210526316e-06, + "step": 1791 + }, + { + "epoch": 4.710059171597633, + "high_lr": 5.736842105263158e-05, + "low_lr": 1.1473684210526316e-06, + "step": 1791 + }, + { + "epoch": 4.710059171597633, + "high_lr": 5.736842105263158e-05, + "low_lr": 1.1473684210526316e-06, + "step": 1791 + }, + { + "epoch": 4.710059171597633, + "high_lr": 5.736842105263158e-05, + "low_lr": 1.1473684210526316e-06, + "step": 1791 + }, + { + "epoch": 4.710059171597633, + "high_lr": 5.736842105263158e-05, + "low_lr": 1.1473684210526316e-06, + "step": 1791 + }, + { + "epoch": 4.710059171597633, + "high_lr": 5.736842105263158e-05, + "low_lr": 1.1473684210526316e-06, + "step": 1791 + }, + { + "epoch": 4.710059171597633, + "high_lr": 5.736842105263158e-05, + "low_lr": 1.1473684210526316e-06, + "step": 1791 + }, + { + "epoch": 4.710059171597633, + "high_lr": 5.736842105263158e-05, + "low_lr": 1.1473684210526316e-06, + "step": 1791 + }, + { + "epoch": 4.712689020381328, + "grad_norm": 1.5266692638397217, + "learning_rate": 5.68421052631579e-05, + "loss": 1.2198, + "step": 1792 + }, + { + "epoch": 4.712689020381328, + "high_lr": 5.68421052631579e-05, + "low_lr": 1.136842105263158e-06, + "step": 1792 + }, + { + "epoch": 4.712689020381328, + "high_lr": 5.68421052631579e-05, + "low_lr": 1.136842105263158e-06, + "step": 1792 + }, + { + "epoch": 4.712689020381328, + "high_lr": 5.68421052631579e-05, + "low_lr": 1.136842105263158e-06, + "step": 1792 + }, + { + "epoch": 4.712689020381328, + "high_lr": 5.68421052631579e-05, + "low_lr": 1.136842105263158e-06, + "step": 1792 + }, + { + "epoch": 4.712689020381328, + "high_lr": 5.68421052631579e-05, + "low_lr": 1.136842105263158e-06, + "step": 1792 + }, + { + "epoch": 4.712689020381328, + "high_lr": 5.68421052631579e-05, + "low_lr": 1.136842105263158e-06, + "step": 1792 + }, + { + "epoch": 4.712689020381328, + "high_lr": 5.68421052631579e-05, + "low_lr": 1.136842105263158e-06, + "step": 1792 + }, + { + "epoch": 4.712689020381328, + "high_lr": 5.68421052631579e-05, + "low_lr": 1.136842105263158e-06, + "step": 1792 + }, + { + "epoch": 4.7153188691650225, + "grad_norm": 1.5269373655319214, + "learning_rate": 5.6315789473684206e-05, + "loss": 1.2131, + "step": 1793 + }, + { + "epoch": 4.7153188691650225, + "high_lr": 5.6315789473684206e-05, + "low_lr": 1.1263157894736842e-06, + "step": 1793 + }, + { + "epoch": 4.7153188691650225, + "high_lr": 5.6315789473684206e-05, + "low_lr": 1.1263157894736842e-06, + "step": 1793 + }, + { + "epoch": 4.7153188691650225, + "high_lr": 5.6315789473684206e-05, + "low_lr": 1.1263157894736842e-06, + "step": 1793 + }, + { + "epoch": 4.7153188691650225, + "high_lr": 5.6315789473684206e-05, + "low_lr": 1.1263157894736842e-06, + "step": 1793 + }, + { + "epoch": 4.7153188691650225, + "high_lr": 5.6315789473684206e-05, + "low_lr": 1.1263157894736842e-06, + "step": 1793 + }, + { + "epoch": 4.7153188691650225, + "high_lr": 5.6315789473684206e-05, + "low_lr": 1.1263157894736842e-06, + "step": 1793 + }, + { + "epoch": 4.7153188691650225, + "high_lr": 5.6315789473684206e-05, + "low_lr": 1.1263157894736842e-06, + "step": 1793 + }, + { + "epoch": 4.7153188691650225, + "high_lr": 5.6315789473684206e-05, + "low_lr": 1.1263157894736842e-06, + "step": 1793 + }, + { + "epoch": 4.717948717948718, + "grad_norm": 1.5177338123321533, + "learning_rate": 5.5789473684210526e-05, + "loss": 1.2279, + "step": 1794 + }, + { + "epoch": 4.717948717948718, + "high_lr": 5.5789473684210526e-05, + "low_lr": 1.1157894736842106e-06, + "step": 1794 + }, + { + "epoch": 4.717948717948718, + "high_lr": 5.5789473684210526e-05, + "low_lr": 1.1157894736842106e-06, + "step": 1794 + }, + { + "epoch": 4.717948717948718, + "high_lr": 5.5789473684210526e-05, + "low_lr": 1.1157894736842106e-06, + "step": 1794 + }, + { + "epoch": 4.717948717948718, + "high_lr": 5.5789473684210526e-05, + "low_lr": 1.1157894736842106e-06, + "step": 1794 + }, + { + "epoch": 4.717948717948718, + "high_lr": 5.5789473684210526e-05, + "low_lr": 1.1157894736842106e-06, + "step": 1794 + }, + { + "epoch": 4.717948717948718, + "high_lr": 5.5789473684210526e-05, + "low_lr": 1.1157894736842106e-06, + "step": 1794 + }, + { + "epoch": 4.717948717948718, + "high_lr": 5.5789473684210526e-05, + "low_lr": 1.1157894736842106e-06, + "step": 1794 + }, + { + "epoch": 4.717948717948718, + "high_lr": 5.5789473684210526e-05, + "low_lr": 1.1157894736842106e-06, + "step": 1794 + }, + { + "epoch": 4.720578566732413, + "grad_norm": 1.61833655834198, + "learning_rate": 5.526315789473684e-05, + "loss": 1.1762, + "step": 1795 + }, + { + "epoch": 4.720578566732413, + "high_lr": 5.526315789473684e-05, + "low_lr": 1.1052631578947369e-06, + "step": 1795 + }, + { + "epoch": 4.720578566732413, + "high_lr": 5.526315789473684e-05, + "low_lr": 1.1052631578947369e-06, + "step": 1795 + }, + { + "epoch": 4.720578566732413, + "high_lr": 5.526315789473684e-05, + "low_lr": 1.1052631578947369e-06, + "step": 1795 + }, + { + "epoch": 4.720578566732413, + "high_lr": 5.526315789473684e-05, + "low_lr": 1.1052631578947369e-06, + "step": 1795 + }, + { + "epoch": 4.720578566732413, + "high_lr": 5.526315789473684e-05, + "low_lr": 1.1052631578947369e-06, + "step": 1795 + }, + { + "epoch": 4.720578566732413, + "high_lr": 5.526315789473684e-05, + "low_lr": 1.1052631578947369e-06, + "step": 1795 + }, + { + "epoch": 4.720578566732413, + "high_lr": 5.526315789473684e-05, + "low_lr": 1.1052631578947369e-06, + "step": 1795 + }, + { + "epoch": 4.720578566732413, + "high_lr": 5.526315789473684e-05, + "low_lr": 1.1052631578947369e-06, + "step": 1795 + }, + { + "epoch": 4.723208415516108, + "grad_norm": 1.5420902967453003, + "learning_rate": 5.473684210526316e-05, + "loss": 1.1901, + "step": 1796 + }, + { + "epoch": 4.723208415516108, + "high_lr": 5.473684210526316e-05, + "low_lr": 1.0947368421052632e-06, + "step": 1796 + }, + { + "epoch": 4.723208415516108, + "high_lr": 5.473684210526316e-05, + "low_lr": 1.0947368421052632e-06, + "step": 1796 + }, + { + "epoch": 4.723208415516108, + "high_lr": 5.473684210526316e-05, + "low_lr": 1.0947368421052632e-06, + "step": 1796 + }, + { + "epoch": 4.723208415516108, + "high_lr": 5.473684210526316e-05, + "low_lr": 1.0947368421052632e-06, + "step": 1796 + }, + { + "epoch": 4.723208415516108, + "high_lr": 5.473684210526316e-05, + "low_lr": 1.0947368421052632e-06, + "step": 1796 + }, + { + "epoch": 4.723208415516108, + "high_lr": 5.473684210526316e-05, + "low_lr": 1.0947368421052632e-06, + "step": 1796 + }, + { + "epoch": 4.723208415516108, + "high_lr": 5.473684210526316e-05, + "low_lr": 1.0947368421052632e-06, + "step": 1796 + }, + { + "epoch": 4.723208415516108, + "high_lr": 5.473684210526316e-05, + "low_lr": 1.0947368421052632e-06, + "step": 1796 + }, + { + "epoch": 4.725838264299803, + "grad_norm": 1.4761364459991455, + "learning_rate": 5.421052631578947e-05, + "loss": 1.1768, + "step": 1797 + }, + { + "epoch": 4.725838264299803, + "high_lr": 5.421052631578947e-05, + "low_lr": 1.0842105263157895e-06, + "step": 1797 + }, + { + "epoch": 4.725838264299803, + "high_lr": 5.421052631578947e-05, + "low_lr": 1.0842105263157895e-06, + "step": 1797 + }, + { + "epoch": 4.725838264299803, + "high_lr": 5.421052631578947e-05, + "low_lr": 1.0842105263157895e-06, + "step": 1797 + }, + { + "epoch": 4.725838264299803, + "high_lr": 5.421052631578947e-05, + "low_lr": 1.0842105263157895e-06, + "step": 1797 + }, + { + "epoch": 4.725838264299803, + "high_lr": 5.421052631578947e-05, + "low_lr": 1.0842105263157895e-06, + "step": 1797 + }, + { + "epoch": 4.725838264299803, + "high_lr": 5.421052631578947e-05, + "low_lr": 1.0842105263157895e-06, + "step": 1797 + }, + { + "epoch": 4.725838264299803, + "high_lr": 5.421052631578947e-05, + "low_lr": 1.0842105263157895e-06, + "step": 1797 + }, + { + "epoch": 4.725838264299803, + "high_lr": 5.421052631578947e-05, + "low_lr": 1.0842105263157895e-06, + "step": 1797 + }, + { + "epoch": 4.728468113083498, + "grad_norm": 1.5077128410339355, + "learning_rate": 5.368421052631579e-05, + "loss": 1.2006, + "step": 1798 + }, + { + "epoch": 4.728468113083498, + "high_lr": 5.368421052631579e-05, + "low_lr": 1.0736842105263159e-06, + "step": 1798 + }, + { + "epoch": 4.728468113083498, + "high_lr": 5.368421052631579e-05, + "low_lr": 1.0736842105263159e-06, + "step": 1798 + }, + { + "epoch": 4.728468113083498, + "high_lr": 5.368421052631579e-05, + "low_lr": 1.0736842105263159e-06, + "step": 1798 + }, + { + "epoch": 4.728468113083498, + "high_lr": 5.368421052631579e-05, + "low_lr": 1.0736842105263159e-06, + "step": 1798 + }, + { + "epoch": 4.728468113083498, + "high_lr": 5.368421052631579e-05, + "low_lr": 1.0736842105263159e-06, + "step": 1798 + }, + { + "epoch": 4.728468113083498, + "high_lr": 5.368421052631579e-05, + "low_lr": 1.0736842105263159e-06, + "step": 1798 + }, + { + "epoch": 4.728468113083498, + "high_lr": 5.368421052631579e-05, + "low_lr": 1.0736842105263159e-06, + "step": 1798 + }, + { + "epoch": 4.728468113083498, + "high_lr": 5.368421052631579e-05, + "low_lr": 1.0736842105263159e-06, + "step": 1798 + }, + { + "epoch": 4.731097961867192, + "grad_norm": 1.6717760562896729, + "learning_rate": 5.3157894736842104e-05, + "loss": 1.2318, + "step": 1799 + }, + { + "epoch": 4.731097961867192, + "high_lr": 5.3157894736842104e-05, + "low_lr": 1.0631578947368422e-06, + "step": 1799 + }, + { + "epoch": 4.731097961867192, + "high_lr": 5.3157894736842104e-05, + "low_lr": 1.0631578947368422e-06, + "step": 1799 + }, + { + "epoch": 4.731097961867192, + "high_lr": 5.3157894736842104e-05, + "low_lr": 1.0631578947368422e-06, + "step": 1799 + }, + { + "epoch": 4.731097961867192, + "high_lr": 5.3157894736842104e-05, + "low_lr": 1.0631578947368422e-06, + "step": 1799 + }, + { + "epoch": 4.731097961867192, + "high_lr": 5.3157894736842104e-05, + "low_lr": 1.0631578947368422e-06, + "step": 1799 + }, + { + "epoch": 4.731097961867192, + "high_lr": 5.3157894736842104e-05, + "low_lr": 1.0631578947368422e-06, + "step": 1799 + }, + { + "epoch": 4.731097961867192, + "high_lr": 5.3157894736842104e-05, + "low_lr": 1.0631578947368422e-06, + "step": 1799 + }, + { + "epoch": 4.731097961867192, + "high_lr": 5.3157894736842104e-05, + "low_lr": 1.0631578947368422e-06, + "step": 1799 + }, + { + "epoch": 4.733727810650888, + "grad_norm": 1.5752482414245605, + "learning_rate": 5.263157894736842e-05, + "loss": 1.1888, + "step": 1800 + }, + { + "epoch": 4.733727810650888, + "high_lr": 5.263157894736842e-05, + "low_lr": 1.0526315789473685e-06, + "step": 1800 + }, + { + "epoch": 4.733727810650888, + "high_lr": 5.263157894736842e-05, + "low_lr": 1.0526315789473685e-06, + "step": 1800 + }, + { + "epoch": 4.733727810650888, + "high_lr": 5.263157894736842e-05, + "low_lr": 1.0526315789473685e-06, + "step": 1800 + }, + { + "epoch": 4.733727810650888, + "high_lr": 5.263157894736842e-05, + "low_lr": 1.0526315789473685e-06, + "step": 1800 + }, + { + "epoch": 4.733727810650888, + "high_lr": 5.263157894736842e-05, + "low_lr": 1.0526315789473685e-06, + "step": 1800 + }, + { + "epoch": 4.733727810650888, + "high_lr": 5.263157894736842e-05, + "low_lr": 1.0526315789473685e-06, + "step": 1800 + }, + { + "epoch": 4.733727810650888, + "high_lr": 5.263157894736842e-05, + "low_lr": 1.0526315789473685e-06, + "step": 1800 + }, + { + "epoch": 4.733727810650888, + "high_lr": 5.263157894736842e-05, + "low_lr": 1.0526315789473685e-06, + "step": 1800 + }, + { + "epoch": 4.736357659434582, + "grad_norm": 1.6021584272384644, + "learning_rate": 5.210526315789474e-05, + "loss": 1.2797, + "step": 1801 + }, + { + "epoch": 4.736357659434582, + "high_lr": 5.210526315789474e-05, + "low_lr": 1.0421052631578949e-06, + "step": 1801 + }, + { + "epoch": 4.736357659434582, + "high_lr": 5.210526315789474e-05, + "low_lr": 1.0421052631578949e-06, + "step": 1801 + }, + { + "epoch": 4.736357659434582, + "high_lr": 5.210526315789474e-05, + "low_lr": 1.0421052631578949e-06, + "step": 1801 + }, + { + "epoch": 4.736357659434582, + "high_lr": 5.210526315789474e-05, + "low_lr": 1.0421052631578949e-06, + "step": 1801 + }, + { + "epoch": 4.736357659434582, + "high_lr": 5.210526315789474e-05, + "low_lr": 1.0421052631578949e-06, + "step": 1801 + }, + { + "epoch": 4.736357659434582, + "high_lr": 5.210526315789474e-05, + "low_lr": 1.0421052631578949e-06, + "step": 1801 + }, + { + "epoch": 4.736357659434582, + "high_lr": 5.210526315789474e-05, + "low_lr": 1.0421052631578949e-06, + "step": 1801 + }, + { + "epoch": 4.736357659434582, + "high_lr": 5.210526315789474e-05, + "low_lr": 1.0421052631578949e-06, + "step": 1801 + }, + { + "epoch": 4.738987508218277, + "grad_norm": 1.5199358463287354, + "learning_rate": 5.157894736842105e-05, + "loss": 1.1962, + "step": 1802 + }, + { + "epoch": 4.738987508218277, + "high_lr": 5.157894736842105e-05, + "low_lr": 1.0315789473684212e-06, + "step": 1802 + }, + { + "epoch": 4.738987508218277, + "high_lr": 5.157894736842105e-05, + "low_lr": 1.0315789473684212e-06, + "step": 1802 + }, + { + "epoch": 4.738987508218277, + "high_lr": 5.157894736842105e-05, + "low_lr": 1.0315789473684212e-06, + "step": 1802 + }, + { + "epoch": 4.738987508218277, + "high_lr": 5.157894736842105e-05, + "low_lr": 1.0315789473684212e-06, + "step": 1802 + }, + { + "epoch": 4.738987508218277, + "high_lr": 5.157894736842105e-05, + "low_lr": 1.0315789473684212e-06, + "step": 1802 + }, + { + "epoch": 4.738987508218277, + "high_lr": 5.157894736842105e-05, + "low_lr": 1.0315789473684212e-06, + "step": 1802 + }, + { + "epoch": 4.738987508218277, + "high_lr": 5.157894736842105e-05, + "low_lr": 1.0315789473684212e-06, + "step": 1802 + }, + { + "epoch": 4.738987508218277, + "high_lr": 5.157894736842105e-05, + "low_lr": 1.0315789473684212e-06, + "step": 1802 + }, + { + "epoch": 4.741617357001973, + "grad_norm": 1.5833852291107178, + "learning_rate": 5.105263157894737e-05, + "loss": 1.2322, + "step": 1803 + }, + { + "epoch": 4.741617357001973, + "high_lr": 5.105263157894737e-05, + "low_lr": 1.0210526315789475e-06, + "step": 1803 + }, + { + "epoch": 4.741617357001973, + "high_lr": 5.105263157894737e-05, + "low_lr": 1.0210526315789475e-06, + "step": 1803 + }, + { + "epoch": 4.741617357001973, + "high_lr": 5.105263157894737e-05, + "low_lr": 1.0210526315789475e-06, + "step": 1803 + }, + { + "epoch": 4.741617357001973, + "high_lr": 5.105263157894737e-05, + "low_lr": 1.0210526315789475e-06, + "step": 1803 + }, + { + "epoch": 4.741617357001973, + "high_lr": 5.105263157894737e-05, + "low_lr": 1.0210526315789475e-06, + "step": 1803 + }, + { + "epoch": 4.741617357001973, + "high_lr": 5.105263157894737e-05, + "low_lr": 1.0210526315789475e-06, + "step": 1803 + }, + { + "epoch": 4.741617357001973, + "high_lr": 5.105263157894737e-05, + "low_lr": 1.0210526315789475e-06, + "step": 1803 + }, + { + "epoch": 4.741617357001973, + "high_lr": 5.105263157894737e-05, + "low_lr": 1.0210526315789475e-06, + "step": 1803 + }, + { + "epoch": 4.744247205785667, + "grad_norm": 1.4354584217071533, + "learning_rate": 5.052631578947368e-05, + "loss": 1.2384, + "step": 1804 + }, + { + "epoch": 4.744247205785667, + "high_lr": 5.052631578947368e-05, + "low_lr": 1.0105263157894738e-06, + "step": 1804 + }, + { + "epoch": 4.744247205785667, + "high_lr": 5.052631578947368e-05, + "low_lr": 1.0105263157894738e-06, + "step": 1804 + }, + { + "epoch": 4.744247205785667, + "high_lr": 5.052631578947368e-05, + "low_lr": 1.0105263157894738e-06, + "step": 1804 + }, + { + "epoch": 4.744247205785667, + "high_lr": 5.052631578947368e-05, + "low_lr": 1.0105263157894738e-06, + "step": 1804 + }, + { + "epoch": 4.744247205785667, + "high_lr": 5.052631578947368e-05, + "low_lr": 1.0105263157894738e-06, + "step": 1804 + }, + { + "epoch": 4.744247205785667, + "high_lr": 5.052631578947368e-05, + "low_lr": 1.0105263157894738e-06, + "step": 1804 + }, + { + "epoch": 4.744247205785667, + "high_lr": 5.052631578947368e-05, + "low_lr": 1.0105263157894738e-06, + "step": 1804 + }, + { + "epoch": 4.744247205785667, + "high_lr": 5.052631578947368e-05, + "low_lr": 1.0105263157894738e-06, + "step": 1804 + }, + { + "epoch": 4.746877054569362, + "grad_norm": 1.5235344171524048, + "learning_rate": 5e-05, + "loss": 1.2031, + "step": 1805 + }, + { + "epoch": 4.746877054569362, + "high_lr": 5e-05, + "low_lr": 1.0000000000000002e-06, + "step": 1805 + }, + { + "epoch": 4.746877054569362, + "high_lr": 5e-05, + "low_lr": 1.0000000000000002e-06, + "step": 1805 + }, + { + "epoch": 4.746877054569362, + "high_lr": 5e-05, + "low_lr": 1.0000000000000002e-06, + "step": 1805 + }, + { + "epoch": 4.746877054569362, + "high_lr": 5e-05, + "low_lr": 1.0000000000000002e-06, + "step": 1805 + }, + { + "epoch": 4.746877054569362, + "high_lr": 5e-05, + "low_lr": 1.0000000000000002e-06, + "step": 1805 + }, + { + "epoch": 4.746877054569362, + "high_lr": 5e-05, + "low_lr": 1.0000000000000002e-06, + "step": 1805 + }, + { + "epoch": 4.746877054569362, + "high_lr": 5e-05, + "low_lr": 1.0000000000000002e-06, + "step": 1805 + }, + { + "epoch": 4.746877054569362, + "high_lr": 5e-05, + "low_lr": 1.0000000000000002e-06, + "step": 1805 + }, + { + "epoch": 4.7495069033530575, + "grad_norm": 1.5912785530090332, + "learning_rate": 4.9473684210526315e-05, + "loss": 1.1858, + "step": 1806 + }, + { + "epoch": 4.7495069033530575, + "high_lr": 4.9473684210526315e-05, + "low_lr": 9.894736842105265e-07, + "step": 1806 + }, + { + "epoch": 4.7495069033530575, + "high_lr": 4.9473684210526315e-05, + "low_lr": 9.894736842105265e-07, + "step": 1806 + }, + { + "epoch": 4.7495069033530575, + "high_lr": 4.9473684210526315e-05, + "low_lr": 9.894736842105265e-07, + "step": 1806 + }, + { + "epoch": 4.7495069033530575, + "high_lr": 4.9473684210526315e-05, + "low_lr": 9.894736842105265e-07, + "step": 1806 + }, + { + "epoch": 4.7495069033530575, + "high_lr": 4.9473684210526315e-05, + "low_lr": 9.894736842105265e-07, + "step": 1806 + }, + { + "epoch": 4.7495069033530575, + "high_lr": 4.9473684210526315e-05, + "low_lr": 9.894736842105265e-07, + "step": 1806 + }, + { + "epoch": 4.7495069033530575, + "high_lr": 4.9473684210526315e-05, + "low_lr": 9.894736842105265e-07, + "step": 1806 + }, + { + "epoch": 4.7495069033530575, + "high_lr": 4.9473684210526315e-05, + "low_lr": 9.894736842105265e-07, + "step": 1806 + }, + { + "epoch": 4.752136752136752, + "grad_norm": 1.6209546327590942, + "learning_rate": 4.894736842105263e-05, + "loss": 1.2086, + "step": 1807 + }, + { + "epoch": 4.752136752136752, + "high_lr": 4.894736842105263e-05, + "low_lr": 9.789473684210526e-07, + "step": 1807 + }, + { + "epoch": 4.752136752136752, + "high_lr": 4.894736842105263e-05, + "low_lr": 9.789473684210526e-07, + "step": 1807 + }, + { + "epoch": 4.752136752136752, + "high_lr": 4.894736842105263e-05, + "low_lr": 9.789473684210526e-07, + "step": 1807 + }, + { + "epoch": 4.752136752136752, + "high_lr": 4.894736842105263e-05, + "low_lr": 9.789473684210526e-07, + "step": 1807 + }, + { + "epoch": 4.752136752136752, + "high_lr": 4.894736842105263e-05, + "low_lr": 9.789473684210526e-07, + "step": 1807 + }, + { + "epoch": 4.752136752136752, + "high_lr": 4.894736842105263e-05, + "low_lr": 9.789473684210526e-07, + "step": 1807 + }, + { + "epoch": 4.752136752136752, + "high_lr": 4.894736842105263e-05, + "low_lr": 9.789473684210526e-07, + "step": 1807 + }, + { + "epoch": 4.752136752136752, + "high_lr": 4.894736842105263e-05, + "low_lr": 9.789473684210526e-07, + "step": 1807 + }, + { + "epoch": 4.754766600920447, + "grad_norm": 1.6664385795593262, + "learning_rate": 4.842105263157895e-05, + "loss": 1.2079, + "step": 1808 + }, + { + "epoch": 4.754766600920447, + "high_lr": 4.842105263157895e-05, + "low_lr": 9.68421052631579e-07, + "step": 1808 + }, + { + "epoch": 4.754766600920447, + "high_lr": 4.842105263157895e-05, + "low_lr": 9.68421052631579e-07, + "step": 1808 + }, + { + "epoch": 4.754766600920447, + "high_lr": 4.842105263157895e-05, + "low_lr": 9.68421052631579e-07, + "step": 1808 + }, + { + "epoch": 4.754766600920447, + "high_lr": 4.842105263157895e-05, + "low_lr": 9.68421052631579e-07, + "step": 1808 + }, + { + "epoch": 4.754766600920447, + "high_lr": 4.842105263157895e-05, + "low_lr": 9.68421052631579e-07, + "step": 1808 + }, + { + "epoch": 4.754766600920447, + "high_lr": 4.842105263157895e-05, + "low_lr": 9.68421052631579e-07, + "step": 1808 + }, + { + "epoch": 4.754766600920447, + "high_lr": 4.842105263157895e-05, + "low_lr": 9.68421052631579e-07, + "step": 1808 + }, + { + "epoch": 4.754766600920447, + "high_lr": 4.842105263157895e-05, + "low_lr": 9.68421052631579e-07, + "step": 1808 + }, + { + "epoch": 4.757396449704142, + "grad_norm": 1.6431585550308228, + "learning_rate": 4.789473684210526e-05, + "loss": 1.2445, + "step": 1809 + }, + { + "epoch": 4.757396449704142, + "high_lr": 4.789473684210526e-05, + "low_lr": 9.578947368421053e-07, + "step": 1809 + }, + { + "epoch": 4.757396449704142, + "high_lr": 4.789473684210526e-05, + "low_lr": 9.578947368421053e-07, + "step": 1809 + }, + { + "epoch": 4.757396449704142, + "high_lr": 4.789473684210526e-05, + "low_lr": 9.578947368421053e-07, + "step": 1809 + }, + { + "epoch": 4.757396449704142, + "high_lr": 4.789473684210526e-05, + "low_lr": 9.578947368421053e-07, + "step": 1809 + }, + { + "epoch": 4.757396449704142, + "high_lr": 4.789473684210526e-05, + "low_lr": 9.578947368421053e-07, + "step": 1809 + }, + { + "epoch": 4.757396449704142, + "high_lr": 4.789473684210526e-05, + "low_lr": 9.578947368421053e-07, + "step": 1809 + }, + { + "epoch": 4.757396449704142, + "high_lr": 4.789473684210526e-05, + "low_lr": 9.578947368421053e-07, + "step": 1809 + }, + { + "epoch": 4.757396449704142, + "high_lr": 4.789473684210526e-05, + "low_lr": 9.578947368421053e-07, + "step": 1809 + }, + { + "epoch": 4.760026298487837, + "grad_norm": 1.4404851198196411, + "learning_rate": 4.736842105263158e-05, + "loss": 1.2352, + "step": 1810 + }, + { + "epoch": 4.760026298487837, + "high_lr": 4.736842105263158e-05, + "low_lr": 9.473684210526317e-07, + "step": 1810 + }, + { + "epoch": 4.760026298487837, + "high_lr": 4.736842105263158e-05, + "low_lr": 9.473684210526317e-07, + "step": 1810 + }, + { + "epoch": 4.760026298487837, + "high_lr": 4.736842105263158e-05, + "low_lr": 9.473684210526317e-07, + "step": 1810 + }, + { + "epoch": 4.760026298487837, + "high_lr": 4.736842105263158e-05, + "low_lr": 9.473684210526317e-07, + "step": 1810 + }, + { + "epoch": 4.760026298487837, + "high_lr": 4.736842105263158e-05, + "low_lr": 9.473684210526317e-07, + "step": 1810 + }, + { + "epoch": 4.760026298487837, + "high_lr": 4.736842105263158e-05, + "low_lr": 9.473684210526317e-07, + "step": 1810 + }, + { + "epoch": 4.760026298487837, + "high_lr": 4.736842105263158e-05, + "low_lr": 9.473684210526317e-07, + "step": 1810 + }, + { + "epoch": 4.760026298487837, + "high_lr": 4.736842105263158e-05, + "low_lr": 9.473684210526317e-07, + "step": 1810 + }, + { + "epoch": 4.762656147271532, + "grad_norm": 1.6453670263290405, + "learning_rate": 4.6842105263157894e-05, + "loss": 1.234, + "step": 1811 + }, + { + "epoch": 4.762656147271532, + "high_lr": 4.6842105263157894e-05, + "low_lr": 9.368421052631579e-07, + "step": 1811 + }, + { + "epoch": 4.762656147271532, + "high_lr": 4.6842105263157894e-05, + "low_lr": 9.368421052631579e-07, + "step": 1811 + }, + { + "epoch": 4.762656147271532, + "high_lr": 4.6842105263157894e-05, + "low_lr": 9.368421052631579e-07, + "step": 1811 + }, + { + "epoch": 4.762656147271532, + "high_lr": 4.6842105263157894e-05, + "low_lr": 9.368421052631579e-07, + "step": 1811 + }, + { + "epoch": 4.762656147271532, + "high_lr": 4.6842105263157894e-05, + "low_lr": 9.368421052631579e-07, + "step": 1811 + }, + { + "epoch": 4.762656147271532, + "high_lr": 4.6842105263157894e-05, + "low_lr": 9.368421052631579e-07, + "step": 1811 + }, + { + "epoch": 4.762656147271532, + "high_lr": 4.6842105263157894e-05, + "low_lr": 9.368421052631579e-07, + "step": 1811 + }, + { + "epoch": 4.762656147271532, + "high_lr": 4.6842105263157894e-05, + "low_lr": 9.368421052631579e-07, + "step": 1811 + }, + { + "epoch": 4.765285996055227, + "grad_norm": 1.5535515546798706, + "learning_rate": 4.6315789473684214e-05, + "loss": 1.1985, + "step": 1812 + }, + { + "epoch": 4.765285996055227, + "high_lr": 4.6315789473684214e-05, + "low_lr": 9.263157894736844e-07, + "step": 1812 + }, + { + "epoch": 4.765285996055227, + "high_lr": 4.6315789473684214e-05, + "low_lr": 9.263157894736844e-07, + "step": 1812 + }, + { + "epoch": 4.765285996055227, + "high_lr": 4.6315789473684214e-05, + "low_lr": 9.263157894736844e-07, + "step": 1812 + }, + { + "epoch": 4.765285996055227, + "high_lr": 4.6315789473684214e-05, + "low_lr": 9.263157894736844e-07, + "step": 1812 + }, + { + "epoch": 4.765285996055227, + "high_lr": 4.6315789473684214e-05, + "low_lr": 9.263157894736844e-07, + "step": 1812 + }, + { + "epoch": 4.765285996055227, + "high_lr": 4.6315789473684214e-05, + "low_lr": 9.263157894736844e-07, + "step": 1812 + }, + { + "epoch": 4.765285996055227, + "high_lr": 4.6315789473684214e-05, + "low_lr": 9.263157894736844e-07, + "step": 1812 + }, + { + "epoch": 4.765285996055227, + "high_lr": 4.6315789473684214e-05, + "low_lr": 9.263157894736844e-07, + "step": 1812 + }, + { + "epoch": 4.767915844838922, + "grad_norm": 1.5614182949066162, + "learning_rate": 4.5789473684210527e-05, + "loss": 1.2091, + "step": 1813 + }, + { + "epoch": 4.767915844838922, + "high_lr": 4.5789473684210527e-05, + "low_lr": 9.157894736842106e-07, + "step": 1813 + }, + { + "epoch": 4.767915844838922, + "high_lr": 4.5789473684210527e-05, + "low_lr": 9.157894736842106e-07, + "step": 1813 + }, + { + "epoch": 4.767915844838922, + "high_lr": 4.5789473684210527e-05, + "low_lr": 9.157894736842106e-07, + "step": 1813 + }, + { + "epoch": 4.767915844838922, + "high_lr": 4.5789473684210527e-05, + "low_lr": 9.157894736842106e-07, + "step": 1813 + }, + { + "epoch": 4.767915844838922, + "high_lr": 4.5789473684210527e-05, + "low_lr": 9.157894736842106e-07, + "step": 1813 + }, + { + "epoch": 4.767915844838922, + "high_lr": 4.5789473684210527e-05, + "low_lr": 9.157894736842106e-07, + "step": 1813 + }, + { + "epoch": 4.767915844838922, + "high_lr": 4.5789473684210527e-05, + "low_lr": 9.157894736842106e-07, + "step": 1813 + }, + { + "epoch": 4.767915844838922, + "high_lr": 4.5789473684210527e-05, + "low_lr": 9.157894736842106e-07, + "step": 1813 + }, + { + "epoch": 4.7705456936226165, + "grad_norm": 1.6125268936157227, + "learning_rate": 4.5263157894736846e-05, + "loss": 1.1937, + "step": 1814 + }, + { + "epoch": 4.7705456936226165, + "high_lr": 4.5263157894736846e-05, + "low_lr": 9.05263157894737e-07, + "step": 1814 + }, + { + "epoch": 4.7705456936226165, + "high_lr": 4.5263157894736846e-05, + "low_lr": 9.05263157894737e-07, + "step": 1814 + }, + { + "epoch": 4.7705456936226165, + "high_lr": 4.5263157894736846e-05, + "low_lr": 9.05263157894737e-07, + "step": 1814 + }, + { + "epoch": 4.7705456936226165, + "high_lr": 4.5263157894736846e-05, + "low_lr": 9.05263157894737e-07, + "step": 1814 + }, + { + "epoch": 4.7705456936226165, + "high_lr": 4.5263157894736846e-05, + "low_lr": 9.05263157894737e-07, + "step": 1814 + }, + { + "epoch": 4.7705456936226165, + "high_lr": 4.5263157894736846e-05, + "low_lr": 9.05263157894737e-07, + "step": 1814 + }, + { + "epoch": 4.7705456936226165, + "high_lr": 4.5263157894736846e-05, + "low_lr": 9.05263157894737e-07, + "step": 1814 + }, + { + "epoch": 4.7705456936226165, + "high_lr": 4.5263157894736846e-05, + "low_lr": 9.05263157894737e-07, + "step": 1814 + }, + { + "epoch": 4.773175542406312, + "grad_norm": 1.580861210823059, + "learning_rate": 4.473684210526316e-05, + "loss": 1.2034, + "step": 1815 + }, + { + "epoch": 4.773175542406312, + "high_lr": 4.473684210526316e-05, + "low_lr": 8.947368421052632e-07, + "step": 1815 + }, + { + "epoch": 4.773175542406312, + "high_lr": 4.473684210526316e-05, + "low_lr": 8.947368421052632e-07, + "step": 1815 + }, + { + "epoch": 4.773175542406312, + "high_lr": 4.473684210526316e-05, + "low_lr": 8.947368421052632e-07, + "step": 1815 + }, + { + "epoch": 4.773175542406312, + "high_lr": 4.473684210526316e-05, + "low_lr": 8.947368421052632e-07, + "step": 1815 + }, + { + "epoch": 4.773175542406312, + "high_lr": 4.473684210526316e-05, + "low_lr": 8.947368421052632e-07, + "step": 1815 + }, + { + "epoch": 4.773175542406312, + "high_lr": 4.473684210526316e-05, + "low_lr": 8.947368421052632e-07, + "step": 1815 + }, + { + "epoch": 4.773175542406312, + "high_lr": 4.473684210526316e-05, + "low_lr": 8.947368421052632e-07, + "step": 1815 + }, + { + "epoch": 4.773175542406312, + "high_lr": 4.473684210526316e-05, + "low_lr": 8.947368421052632e-07, + "step": 1815 + }, + { + "epoch": 4.775805391190007, + "grad_norm": 1.6488875150680542, + "learning_rate": 4.421052631578947e-05, + "loss": 1.2161, + "step": 1816 + }, + { + "epoch": 4.775805391190007, + "high_lr": 4.421052631578947e-05, + "low_lr": 8.842105263157895e-07, + "step": 1816 + }, + { + "epoch": 4.775805391190007, + "high_lr": 4.421052631578947e-05, + "low_lr": 8.842105263157895e-07, + "step": 1816 + }, + { + "epoch": 4.775805391190007, + "high_lr": 4.421052631578947e-05, + "low_lr": 8.842105263157895e-07, + "step": 1816 + }, + { + "epoch": 4.775805391190007, + "high_lr": 4.421052631578947e-05, + "low_lr": 8.842105263157895e-07, + "step": 1816 + }, + { + "epoch": 4.775805391190007, + "high_lr": 4.421052631578947e-05, + "low_lr": 8.842105263157895e-07, + "step": 1816 + }, + { + "epoch": 4.775805391190007, + "high_lr": 4.421052631578947e-05, + "low_lr": 8.842105263157895e-07, + "step": 1816 + }, + { + "epoch": 4.775805391190007, + "high_lr": 4.421052631578947e-05, + "low_lr": 8.842105263157895e-07, + "step": 1816 + }, + { + "epoch": 4.775805391190007, + "high_lr": 4.421052631578947e-05, + "low_lr": 8.842105263157895e-07, + "step": 1816 + }, + { + "epoch": 4.778435239973701, + "grad_norm": 1.513824224472046, + "learning_rate": 4.368421052631579e-05, + "loss": 1.1955, + "step": 1817 + }, + { + "epoch": 4.778435239973701, + "high_lr": 4.368421052631579e-05, + "low_lr": 8.736842105263159e-07, + "step": 1817 + }, + { + "epoch": 4.778435239973701, + "high_lr": 4.368421052631579e-05, + "low_lr": 8.736842105263159e-07, + "step": 1817 + }, + { + "epoch": 4.778435239973701, + "high_lr": 4.368421052631579e-05, + "low_lr": 8.736842105263159e-07, + "step": 1817 + }, + { + "epoch": 4.778435239973701, + "high_lr": 4.368421052631579e-05, + "low_lr": 8.736842105263159e-07, + "step": 1817 + }, + { + "epoch": 4.778435239973701, + "high_lr": 4.368421052631579e-05, + "low_lr": 8.736842105263159e-07, + "step": 1817 + }, + { + "epoch": 4.778435239973701, + "high_lr": 4.368421052631579e-05, + "low_lr": 8.736842105263159e-07, + "step": 1817 + }, + { + "epoch": 4.778435239973701, + "high_lr": 4.368421052631579e-05, + "low_lr": 8.736842105263159e-07, + "step": 1817 + }, + { + "epoch": 4.778435239973701, + "high_lr": 4.368421052631579e-05, + "low_lr": 8.736842105263159e-07, + "step": 1817 + }, + { + "epoch": 4.781065088757396, + "grad_norm": 1.5169581174850464, + "learning_rate": 4.3157894736842105e-05, + "loss": 1.1814, + "step": 1818 + }, + { + "epoch": 4.781065088757396, + "high_lr": 4.3157894736842105e-05, + "low_lr": 8.631578947368421e-07, + "step": 1818 + }, + { + "epoch": 4.781065088757396, + "high_lr": 4.3157894736842105e-05, + "low_lr": 8.631578947368421e-07, + "step": 1818 + }, + { + "epoch": 4.781065088757396, + "high_lr": 4.3157894736842105e-05, + "low_lr": 8.631578947368421e-07, + "step": 1818 + }, + { + "epoch": 4.781065088757396, + "high_lr": 4.3157894736842105e-05, + "low_lr": 8.631578947368421e-07, + "step": 1818 + }, + { + "epoch": 4.781065088757396, + "high_lr": 4.3157894736842105e-05, + "low_lr": 8.631578947368421e-07, + "step": 1818 + }, + { + "epoch": 4.781065088757396, + "high_lr": 4.3157894736842105e-05, + "low_lr": 8.631578947368421e-07, + "step": 1818 + }, + { + "epoch": 4.781065088757396, + "high_lr": 4.3157894736842105e-05, + "low_lr": 8.631578947368421e-07, + "step": 1818 + }, + { + "epoch": 4.781065088757396, + "high_lr": 4.3157894736842105e-05, + "low_lr": 8.631578947368421e-07, + "step": 1818 + }, + { + "epoch": 4.7836949375410915, + "grad_norm": 1.5888378620147705, + "learning_rate": 4.2631578947368425e-05, + "loss": 1.2298, + "step": 1819 + }, + { + "epoch": 4.7836949375410915, + "high_lr": 4.2631578947368425e-05, + "low_lr": 8.526315789473685e-07, + "step": 1819 + }, + { + "epoch": 4.7836949375410915, + "high_lr": 4.2631578947368425e-05, + "low_lr": 8.526315789473685e-07, + "step": 1819 + }, + { + "epoch": 4.7836949375410915, + "high_lr": 4.2631578947368425e-05, + "low_lr": 8.526315789473685e-07, + "step": 1819 + }, + { + "epoch": 4.7836949375410915, + "high_lr": 4.2631578947368425e-05, + "low_lr": 8.526315789473685e-07, + "step": 1819 + }, + { + "epoch": 4.7836949375410915, + "high_lr": 4.2631578947368425e-05, + "low_lr": 8.526315789473685e-07, + "step": 1819 + }, + { + "epoch": 4.7836949375410915, + "high_lr": 4.2631578947368425e-05, + "low_lr": 8.526315789473685e-07, + "step": 1819 + }, + { + "epoch": 4.7836949375410915, + "high_lr": 4.2631578947368425e-05, + "low_lr": 8.526315789473685e-07, + "step": 1819 + }, + { + "epoch": 4.7836949375410915, + "high_lr": 4.2631578947368425e-05, + "low_lr": 8.526315789473685e-07, + "step": 1819 + }, + { + "epoch": 4.786324786324786, + "grad_norm": 1.5194802284240723, + "learning_rate": 4.210526315789474e-05, + "loss": 1.2103, + "step": 1820 + }, + { + "epoch": 4.786324786324786, + "high_lr": 4.210526315789474e-05, + "low_lr": 8.421052631578948e-07, + "step": 1820 + }, + { + "epoch": 4.786324786324786, + "high_lr": 4.210526315789474e-05, + "low_lr": 8.421052631578948e-07, + "step": 1820 + }, + { + "epoch": 4.786324786324786, + "high_lr": 4.210526315789474e-05, + "low_lr": 8.421052631578948e-07, + "step": 1820 + }, + { + "epoch": 4.786324786324786, + "high_lr": 4.210526315789474e-05, + "low_lr": 8.421052631578948e-07, + "step": 1820 + }, + { + "epoch": 4.786324786324786, + "high_lr": 4.210526315789474e-05, + "low_lr": 8.421052631578948e-07, + "step": 1820 + }, + { + "epoch": 4.786324786324786, + "high_lr": 4.210526315789474e-05, + "low_lr": 8.421052631578948e-07, + "step": 1820 + }, + { + "epoch": 4.786324786324786, + "high_lr": 4.210526315789474e-05, + "low_lr": 8.421052631578948e-07, + "step": 1820 + }, + { + "epoch": 4.786324786324786, + "high_lr": 4.210526315789474e-05, + "low_lr": 8.421052631578948e-07, + "step": 1820 + }, + { + "epoch": 4.788954635108482, + "grad_norm": 1.589921236038208, + "learning_rate": 4.157894736842106e-05, + "loss": 1.2039, + "step": 1821 + }, + { + "epoch": 4.788954635108482, + "high_lr": 4.157894736842106e-05, + "low_lr": 8.315789473684212e-07, + "step": 1821 + }, + { + "epoch": 4.788954635108482, + "high_lr": 4.157894736842106e-05, + "low_lr": 8.315789473684212e-07, + "step": 1821 + }, + { + "epoch": 4.788954635108482, + "high_lr": 4.157894736842106e-05, + "low_lr": 8.315789473684212e-07, + "step": 1821 + }, + { + "epoch": 4.788954635108482, + "high_lr": 4.157894736842106e-05, + "low_lr": 8.315789473684212e-07, + "step": 1821 + }, + { + "epoch": 4.788954635108482, + "high_lr": 4.157894736842106e-05, + "low_lr": 8.315789473684212e-07, + "step": 1821 + }, + { + "epoch": 4.788954635108482, + "high_lr": 4.157894736842106e-05, + "low_lr": 8.315789473684212e-07, + "step": 1821 + }, + { + "epoch": 4.788954635108482, + "high_lr": 4.157894736842106e-05, + "low_lr": 8.315789473684212e-07, + "step": 1821 + }, + { + "epoch": 4.788954635108482, + "high_lr": 4.157894736842106e-05, + "low_lr": 8.315789473684212e-07, + "step": 1821 + }, + { + "epoch": 4.791584483892176, + "grad_norm": 1.4875950813293457, + "learning_rate": 4.105263157894737e-05, + "loss": 1.2248, + "step": 1822 + }, + { + "epoch": 4.791584483892176, + "high_lr": 4.105263157894737e-05, + "low_lr": 8.210526315789474e-07, + "step": 1822 + }, + { + "epoch": 4.791584483892176, + "high_lr": 4.105263157894737e-05, + "low_lr": 8.210526315789474e-07, + "step": 1822 + }, + { + "epoch": 4.791584483892176, + "high_lr": 4.105263157894737e-05, + "low_lr": 8.210526315789474e-07, + "step": 1822 + }, + { + "epoch": 4.791584483892176, + "high_lr": 4.105263157894737e-05, + "low_lr": 8.210526315789474e-07, + "step": 1822 + }, + { + "epoch": 4.791584483892176, + "high_lr": 4.105263157894737e-05, + "low_lr": 8.210526315789474e-07, + "step": 1822 + }, + { + "epoch": 4.791584483892176, + "high_lr": 4.105263157894737e-05, + "low_lr": 8.210526315789474e-07, + "step": 1822 + }, + { + "epoch": 4.791584483892176, + "high_lr": 4.105263157894737e-05, + "low_lr": 8.210526315789474e-07, + "step": 1822 + }, + { + "epoch": 4.791584483892176, + "high_lr": 4.105263157894737e-05, + "low_lr": 8.210526315789474e-07, + "step": 1822 + }, + { + "epoch": 4.794214332675871, + "grad_norm": 1.4205827713012695, + "learning_rate": 4.0526315789473684e-05, + "loss": 1.2522, + "step": 1823 + }, + { + "epoch": 4.794214332675871, + "high_lr": 4.0526315789473684e-05, + "low_lr": 8.105263157894736e-07, + "step": 1823 + }, + { + "epoch": 4.794214332675871, + "high_lr": 4.0526315789473684e-05, + "low_lr": 8.105263157894736e-07, + "step": 1823 + }, + { + "epoch": 4.794214332675871, + "high_lr": 4.0526315789473684e-05, + "low_lr": 8.105263157894736e-07, + "step": 1823 + }, + { + "epoch": 4.794214332675871, + "high_lr": 4.0526315789473684e-05, + "low_lr": 8.105263157894736e-07, + "step": 1823 + }, + { + "epoch": 4.794214332675871, + "high_lr": 4.0526315789473684e-05, + "low_lr": 8.105263157894736e-07, + "step": 1823 + }, + { + "epoch": 4.794214332675871, + "high_lr": 4.0526315789473684e-05, + "low_lr": 8.105263157894736e-07, + "step": 1823 + }, + { + "epoch": 4.794214332675871, + "high_lr": 4.0526315789473684e-05, + "low_lr": 8.105263157894736e-07, + "step": 1823 + }, + { + "epoch": 4.794214332675871, + "high_lr": 4.0526315789473684e-05, + "low_lr": 8.105263157894736e-07, + "step": 1823 + }, + { + "epoch": 4.796844181459566, + "grad_norm": 1.5499900579452515, + "learning_rate": 4e-05, + "loss": 1.2627, + "step": 1824 + }, + { + "epoch": 4.796844181459566, + "high_lr": 4e-05, + "low_lr": 8.000000000000001e-07, + "step": 1824 + }, + { + "epoch": 4.796844181459566, + "high_lr": 4e-05, + "low_lr": 8.000000000000001e-07, + "step": 1824 + }, + { + "epoch": 4.796844181459566, + "high_lr": 4e-05, + "low_lr": 8.000000000000001e-07, + "step": 1824 + }, + { + "epoch": 4.796844181459566, + "high_lr": 4e-05, + "low_lr": 8.000000000000001e-07, + "step": 1824 + }, + { + "epoch": 4.796844181459566, + "high_lr": 4e-05, + "low_lr": 8.000000000000001e-07, + "step": 1824 + }, + { + "epoch": 4.796844181459566, + "high_lr": 4e-05, + "low_lr": 8.000000000000001e-07, + "step": 1824 + }, + { + "epoch": 4.796844181459566, + "high_lr": 4e-05, + "low_lr": 8.000000000000001e-07, + "step": 1824 + }, + { + "epoch": 4.796844181459566, + "high_lr": 4e-05, + "low_lr": 8.000000000000001e-07, + "step": 1824 + }, + { + "epoch": 4.799474030243261, + "grad_norm": 1.6648582220077515, + "learning_rate": 3.9473684210526316e-05, + "loss": 1.2229, + "step": 1825 + }, + { + "epoch": 4.799474030243261, + "high_lr": 3.9473684210526316e-05, + "low_lr": 7.894736842105263e-07, + "step": 1825 + }, + { + "epoch": 4.799474030243261, + "high_lr": 3.9473684210526316e-05, + "low_lr": 7.894736842105263e-07, + "step": 1825 + }, + { + "epoch": 4.799474030243261, + "high_lr": 3.9473684210526316e-05, + "low_lr": 7.894736842105263e-07, + "step": 1825 + }, + { + "epoch": 4.799474030243261, + "high_lr": 3.9473684210526316e-05, + "low_lr": 7.894736842105263e-07, + "step": 1825 + }, + { + "epoch": 4.799474030243261, + "high_lr": 3.9473684210526316e-05, + "low_lr": 7.894736842105263e-07, + "step": 1825 + }, + { + "epoch": 4.799474030243261, + "high_lr": 3.9473684210526316e-05, + "low_lr": 7.894736842105263e-07, + "step": 1825 + }, + { + "epoch": 4.799474030243261, + "high_lr": 3.9473684210526316e-05, + "low_lr": 7.894736842105263e-07, + "step": 1825 + }, + { + "epoch": 4.799474030243261, + "high_lr": 3.9473684210526316e-05, + "low_lr": 7.894736842105263e-07, + "step": 1825 + }, + { + "epoch": 4.802103879026956, + "grad_norm": 1.5830285549163818, + "learning_rate": 3.8947368421052636e-05, + "loss": 1.2279, + "step": 1826 + }, + { + "epoch": 4.802103879026956, + "high_lr": 3.8947368421052636e-05, + "low_lr": 7.789473684210527e-07, + "step": 1826 + }, + { + "epoch": 4.802103879026956, + "high_lr": 3.8947368421052636e-05, + "low_lr": 7.789473684210527e-07, + "step": 1826 + }, + { + "epoch": 4.802103879026956, + "high_lr": 3.8947368421052636e-05, + "low_lr": 7.789473684210527e-07, + "step": 1826 + }, + { + "epoch": 4.802103879026956, + "high_lr": 3.8947368421052636e-05, + "low_lr": 7.789473684210527e-07, + "step": 1826 + }, + { + "epoch": 4.802103879026956, + "high_lr": 3.8947368421052636e-05, + "low_lr": 7.789473684210527e-07, + "step": 1826 + }, + { + "epoch": 4.802103879026956, + "high_lr": 3.8947368421052636e-05, + "low_lr": 7.789473684210527e-07, + "step": 1826 + }, + { + "epoch": 4.802103879026956, + "high_lr": 3.8947368421052636e-05, + "low_lr": 7.789473684210527e-07, + "step": 1826 + }, + { + "epoch": 4.802103879026956, + "high_lr": 3.8947368421052636e-05, + "low_lr": 7.789473684210527e-07, + "step": 1826 + }, + { + "epoch": 4.804733727810651, + "grad_norm": 1.554327130317688, + "learning_rate": 3.842105263157895e-05, + "loss": 1.2297, + "step": 1827 + }, + { + "epoch": 4.804733727810651, + "high_lr": 3.842105263157895e-05, + "low_lr": 7.684210526315789e-07, + "step": 1827 + }, + { + "epoch": 4.804733727810651, + "high_lr": 3.842105263157895e-05, + "low_lr": 7.684210526315789e-07, + "step": 1827 + }, + { + "epoch": 4.804733727810651, + "high_lr": 3.842105263157895e-05, + "low_lr": 7.684210526315789e-07, + "step": 1827 + }, + { + "epoch": 4.804733727810651, + "high_lr": 3.842105263157895e-05, + "low_lr": 7.684210526315789e-07, + "step": 1827 + }, + { + "epoch": 4.804733727810651, + "high_lr": 3.842105263157895e-05, + "low_lr": 7.684210526315789e-07, + "step": 1827 + }, + { + "epoch": 4.804733727810651, + "high_lr": 3.842105263157895e-05, + "low_lr": 7.684210526315789e-07, + "step": 1827 + }, + { + "epoch": 4.804733727810651, + "high_lr": 3.842105263157895e-05, + "low_lr": 7.684210526315789e-07, + "step": 1827 + }, + { + "epoch": 4.804733727810651, + "high_lr": 3.842105263157895e-05, + "low_lr": 7.684210526315789e-07, + "step": 1827 + }, + { + "epoch": 4.807363576594346, + "grad_norm": 1.5508402585983276, + "learning_rate": 3.789473684210527e-05, + "loss": 1.216, + "step": 1828 + }, + { + "epoch": 4.807363576594346, + "high_lr": 3.789473684210527e-05, + "low_lr": 7.578947368421054e-07, + "step": 1828 + }, + { + "epoch": 4.807363576594346, + "high_lr": 3.789473684210527e-05, + "low_lr": 7.578947368421054e-07, + "step": 1828 + }, + { + "epoch": 4.807363576594346, + "high_lr": 3.789473684210527e-05, + "low_lr": 7.578947368421054e-07, + "step": 1828 + }, + { + "epoch": 4.807363576594346, + "high_lr": 3.789473684210527e-05, + "low_lr": 7.578947368421054e-07, + "step": 1828 + }, + { + "epoch": 4.807363576594346, + "high_lr": 3.789473684210527e-05, + "low_lr": 7.578947368421054e-07, + "step": 1828 + }, + { + "epoch": 4.807363576594346, + "high_lr": 3.789473684210527e-05, + "low_lr": 7.578947368421054e-07, + "step": 1828 + }, + { + "epoch": 4.807363576594346, + "high_lr": 3.789473684210527e-05, + "low_lr": 7.578947368421054e-07, + "step": 1828 + }, + { + "epoch": 4.807363576594346, + "high_lr": 3.789473684210527e-05, + "low_lr": 7.578947368421054e-07, + "step": 1828 + }, + { + "epoch": 4.809993425378041, + "grad_norm": 1.5729447603225708, + "learning_rate": 3.736842105263158e-05, + "loss": 1.2267, + "step": 1829 + }, + { + "epoch": 4.809993425378041, + "high_lr": 3.736842105263158e-05, + "low_lr": 7.473684210526316e-07, + "step": 1829 + }, + { + "epoch": 4.809993425378041, + "high_lr": 3.736842105263158e-05, + "low_lr": 7.473684210526316e-07, + "step": 1829 + }, + { + "epoch": 4.809993425378041, + "high_lr": 3.736842105263158e-05, + "low_lr": 7.473684210526316e-07, + "step": 1829 + }, + { + "epoch": 4.809993425378041, + "high_lr": 3.736842105263158e-05, + "low_lr": 7.473684210526316e-07, + "step": 1829 + }, + { + "epoch": 4.809993425378041, + "high_lr": 3.736842105263158e-05, + "low_lr": 7.473684210526316e-07, + "step": 1829 + }, + { + "epoch": 4.809993425378041, + "high_lr": 3.736842105263158e-05, + "low_lr": 7.473684210526316e-07, + "step": 1829 + }, + { + "epoch": 4.809993425378041, + "high_lr": 3.736842105263158e-05, + "low_lr": 7.473684210526316e-07, + "step": 1829 + }, + { + "epoch": 4.809993425378041, + "high_lr": 3.736842105263158e-05, + "low_lr": 7.473684210526316e-07, + "step": 1829 + }, + { + "epoch": 4.812623274161735, + "grad_norm": 1.4417654275894165, + "learning_rate": 3.6842105263157895e-05, + "loss": 1.2055, + "step": 1830 + }, + { + "epoch": 4.812623274161735, + "high_lr": 3.6842105263157895e-05, + "low_lr": 7.368421052631579e-07, + "step": 1830 + }, + { + "epoch": 4.812623274161735, + "high_lr": 3.6842105263157895e-05, + "low_lr": 7.368421052631579e-07, + "step": 1830 + }, + { + "epoch": 4.812623274161735, + "high_lr": 3.6842105263157895e-05, + "low_lr": 7.368421052631579e-07, + "step": 1830 + }, + { + "epoch": 4.812623274161735, + "high_lr": 3.6842105263157895e-05, + "low_lr": 7.368421052631579e-07, + "step": 1830 + }, + { + "epoch": 4.812623274161735, + "high_lr": 3.6842105263157895e-05, + "low_lr": 7.368421052631579e-07, + "step": 1830 + }, + { + "epoch": 4.812623274161735, + "high_lr": 3.6842105263157895e-05, + "low_lr": 7.368421052631579e-07, + "step": 1830 + }, + { + "epoch": 4.812623274161735, + "high_lr": 3.6842105263157895e-05, + "low_lr": 7.368421052631579e-07, + "step": 1830 + }, + { + "epoch": 4.812623274161735, + "high_lr": 3.6842105263157895e-05, + "low_lr": 7.368421052631579e-07, + "step": 1830 + }, + { + "epoch": 4.815253122945431, + "grad_norm": 1.4560467004776, + "learning_rate": 3.6315789473684214e-05, + "loss": 1.2136, + "step": 1831 + }, + { + "epoch": 4.815253122945431, + "high_lr": 3.6315789473684214e-05, + "low_lr": 7.263157894736843e-07, + "step": 1831 + }, + { + "epoch": 4.815253122945431, + "high_lr": 3.6315789473684214e-05, + "low_lr": 7.263157894736843e-07, + "step": 1831 + }, + { + "epoch": 4.815253122945431, + "high_lr": 3.6315789473684214e-05, + "low_lr": 7.263157894736843e-07, + "step": 1831 + }, + { + "epoch": 4.815253122945431, + "high_lr": 3.6315789473684214e-05, + "low_lr": 7.263157894736843e-07, + "step": 1831 + }, + { + "epoch": 4.815253122945431, + "high_lr": 3.6315789473684214e-05, + "low_lr": 7.263157894736843e-07, + "step": 1831 + }, + { + "epoch": 4.815253122945431, + "high_lr": 3.6315789473684214e-05, + "low_lr": 7.263157894736843e-07, + "step": 1831 + }, + { + "epoch": 4.815253122945431, + "high_lr": 3.6315789473684214e-05, + "low_lr": 7.263157894736843e-07, + "step": 1831 + }, + { + "epoch": 4.815253122945431, + "high_lr": 3.6315789473684214e-05, + "low_lr": 7.263157894736843e-07, + "step": 1831 + }, + { + "epoch": 4.817882971729126, + "grad_norm": 1.5613391399383545, + "learning_rate": 3.578947368421053e-05, + "loss": 1.2168, + "step": 1832 + }, + { + "epoch": 4.817882971729126, + "high_lr": 3.578947368421053e-05, + "low_lr": 7.157894736842106e-07, + "step": 1832 + }, + { + "epoch": 4.817882971729126, + "high_lr": 3.578947368421053e-05, + "low_lr": 7.157894736842106e-07, + "step": 1832 + }, + { + "epoch": 4.817882971729126, + "high_lr": 3.578947368421053e-05, + "low_lr": 7.157894736842106e-07, + "step": 1832 + }, + { + "epoch": 4.817882971729126, + "high_lr": 3.578947368421053e-05, + "low_lr": 7.157894736842106e-07, + "step": 1832 + }, + { + "epoch": 4.817882971729126, + "high_lr": 3.578947368421053e-05, + "low_lr": 7.157894736842106e-07, + "step": 1832 + }, + { + "epoch": 4.817882971729126, + "high_lr": 3.578947368421053e-05, + "low_lr": 7.157894736842106e-07, + "step": 1832 + }, + { + "epoch": 4.817882971729126, + "high_lr": 3.578947368421053e-05, + "low_lr": 7.157894736842106e-07, + "step": 1832 + }, + { + "epoch": 4.817882971729126, + "high_lr": 3.578947368421053e-05, + "low_lr": 7.157894736842106e-07, + "step": 1832 + }, + { + "epoch": 4.82051282051282, + "grad_norm": 1.5630950927734375, + "learning_rate": 3.526315789473685e-05, + "loss": 1.2381, + "step": 1833 + }, + { + "epoch": 4.82051282051282, + "high_lr": 3.526315789473685e-05, + "low_lr": 7.052631578947369e-07, + "step": 1833 + }, + { + "epoch": 4.82051282051282, + "high_lr": 3.526315789473685e-05, + "low_lr": 7.052631578947369e-07, + "step": 1833 + }, + { + "epoch": 4.82051282051282, + "high_lr": 3.526315789473685e-05, + "low_lr": 7.052631578947369e-07, + "step": 1833 + }, + { + "epoch": 4.82051282051282, + "high_lr": 3.526315789473685e-05, + "low_lr": 7.052631578947369e-07, + "step": 1833 + }, + { + "epoch": 4.82051282051282, + "high_lr": 3.526315789473685e-05, + "low_lr": 7.052631578947369e-07, + "step": 1833 + }, + { + "epoch": 4.82051282051282, + "high_lr": 3.526315789473685e-05, + "low_lr": 7.052631578947369e-07, + "step": 1833 + }, + { + "epoch": 4.82051282051282, + "high_lr": 3.526315789473685e-05, + "low_lr": 7.052631578947369e-07, + "step": 1833 + }, + { + "epoch": 4.82051282051282, + "high_lr": 3.526315789473685e-05, + "low_lr": 7.052631578947369e-07, + "step": 1833 + }, + { + "epoch": 4.823142669296516, + "grad_norm": 1.5562907457351685, + "learning_rate": 3.473684210526316e-05, + "loss": 1.2246, + "step": 1834 + }, + { + "epoch": 4.823142669296516, + "high_lr": 3.473684210526316e-05, + "low_lr": 6.947368421052631e-07, + "step": 1834 + }, + { + "epoch": 4.823142669296516, + "high_lr": 3.473684210526316e-05, + "low_lr": 6.947368421052631e-07, + "step": 1834 + }, + { + "epoch": 4.823142669296516, + "high_lr": 3.473684210526316e-05, + "low_lr": 6.947368421052631e-07, + "step": 1834 + }, + { + "epoch": 4.823142669296516, + "high_lr": 3.473684210526316e-05, + "low_lr": 6.947368421052631e-07, + "step": 1834 + }, + { + "epoch": 4.823142669296516, + "high_lr": 3.473684210526316e-05, + "low_lr": 6.947368421052631e-07, + "step": 1834 + }, + { + "epoch": 4.823142669296516, + "high_lr": 3.473684210526316e-05, + "low_lr": 6.947368421052631e-07, + "step": 1834 + }, + { + "epoch": 4.823142669296516, + "high_lr": 3.473684210526316e-05, + "low_lr": 6.947368421052631e-07, + "step": 1834 + }, + { + "epoch": 4.823142669296516, + "high_lr": 3.473684210526316e-05, + "low_lr": 6.947368421052631e-07, + "step": 1834 + }, + { + "epoch": 4.8257725180802105, + "grad_norm": 1.4658030271530151, + "learning_rate": 3.421052631578948e-05, + "loss": 1.2116, + "step": 1835 + }, + { + "epoch": 4.8257725180802105, + "high_lr": 3.421052631578948e-05, + "low_lr": 6.842105263157896e-07, + "step": 1835 + }, + { + "epoch": 4.8257725180802105, + "high_lr": 3.421052631578948e-05, + "low_lr": 6.842105263157896e-07, + "step": 1835 + }, + { + "epoch": 4.8257725180802105, + "high_lr": 3.421052631578948e-05, + "low_lr": 6.842105263157896e-07, + "step": 1835 + }, + { + "epoch": 4.8257725180802105, + "high_lr": 3.421052631578948e-05, + "low_lr": 6.842105263157896e-07, + "step": 1835 + }, + { + "epoch": 4.8257725180802105, + "high_lr": 3.421052631578948e-05, + "low_lr": 6.842105263157896e-07, + "step": 1835 + }, + { + "epoch": 4.8257725180802105, + "high_lr": 3.421052631578948e-05, + "low_lr": 6.842105263157896e-07, + "step": 1835 + }, + { + "epoch": 4.8257725180802105, + "high_lr": 3.421052631578948e-05, + "low_lr": 6.842105263157896e-07, + "step": 1835 + }, + { + "epoch": 4.8257725180802105, + "high_lr": 3.421052631578948e-05, + "low_lr": 6.842105263157896e-07, + "step": 1835 + }, + { + "epoch": 4.828402366863905, + "grad_norm": 1.5255767107009888, + "learning_rate": 3.3684210526315786e-05, + "loss": 1.2127, + "step": 1836 + }, + { + "epoch": 4.828402366863905, + "high_lr": 3.3684210526315786e-05, + "low_lr": 6.736842105263158e-07, + "step": 1836 + }, + { + "epoch": 4.828402366863905, + "high_lr": 3.3684210526315786e-05, + "low_lr": 6.736842105263158e-07, + "step": 1836 + }, + { + "epoch": 4.828402366863905, + "high_lr": 3.3684210526315786e-05, + "low_lr": 6.736842105263158e-07, + "step": 1836 + }, + { + "epoch": 4.828402366863905, + "high_lr": 3.3684210526315786e-05, + "low_lr": 6.736842105263158e-07, + "step": 1836 + }, + { + "epoch": 4.828402366863905, + "high_lr": 3.3684210526315786e-05, + "low_lr": 6.736842105263158e-07, + "step": 1836 + }, + { + "epoch": 4.828402366863905, + "high_lr": 3.3684210526315786e-05, + "low_lr": 6.736842105263158e-07, + "step": 1836 + }, + { + "epoch": 4.828402366863905, + "high_lr": 3.3684210526315786e-05, + "low_lr": 6.736842105263158e-07, + "step": 1836 + }, + { + "epoch": 4.828402366863905, + "high_lr": 3.3684210526315786e-05, + "low_lr": 6.736842105263158e-07, + "step": 1836 + }, + { + "epoch": 4.8310322156476, + "grad_norm": 1.6164422035217285, + "learning_rate": 3.3157894736842106e-05, + "loss": 1.1702, + "step": 1837 + }, + { + "epoch": 4.8310322156476, + "high_lr": 3.3157894736842106e-05, + "low_lr": 6.631578947368422e-07, + "step": 1837 + }, + { + "epoch": 4.8310322156476, + "high_lr": 3.3157894736842106e-05, + "low_lr": 6.631578947368422e-07, + "step": 1837 + }, + { + "epoch": 4.8310322156476, + "high_lr": 3.3157894736842106e-05, + "low_lr": 6.631578947368422e-07, + "step": 1837 + }, + { + "epoch": 4.8310322156476, + "high_lr": 3.3157894736842106e-05, + "low_lr": 6.631578947368422e-07, + "step": 1837 + }, + { + "epoch": 4.8310322156476, + "high_lr": 3.3157894736842106e-05, + "low_lr": 6.631578947368422e-07, + "step": 1837 + }, + { + "epoch": 4.8310322156476, + "high_lr": 3.3157894736842106e-05, + "low_lr": 6.631578947368422e-07, + "step": 1837 + }, + { + "epoch": 4.8310322156476, + "high_lr": 3.3157894736842106e-05, + "low_lr": 6.631578947368422e-07, + "step": 1837 + }, + { + "epoch": 4.8310322156476, + "high_lr": 3.3157894736842106e-05, + "low_lr": 6.631578947368422e-07, + "step": 1837 + }, + { + "epoch": 4.833662064431295, + "grad_norm": 1.6228440999984741, + "learning_rate": 3.263157894736842e-05, + "loss": 1.2445, + "step": 1838 + }, + { + "epoch": 4.833662064431295, + "high_lr": 3.263157894736842e-05, + "low_lr": 6.526315789473684e-07, + "step": 1838 + }, + { + "epoch": 4.833662064431295, + "high_lr": 3.263157894736842e-05, + "low_lr": 6.526315789473684e-07, + "step": 1838 + }, + { + "epoch": 4.833662064431295, + "high_lr": 3.263157894736842e-05, + "low_lr": 6.526315789473684e-07, + "step": 1838 + }, + { + "epoch": 4.833662064431295, + "high_lr": 3.263157894736842e-05, + "low_lr": 6.526315789473684e-07, + "step": 1838 + }, + { + "epoch": 4.833662064431295, + "high_lr": 3.263157894736842e-05, + "low_lr": 6.526315789473684e-07, + "step": 1838 + }, + { + "epoch": 4.833662064431295, + "high_lr": 3.263157894736842e-05, + "low_lr": 6.526315789473684e-07, + "step": 1838 + }, + { + "epoch": 4.833662064431295, + "high_lr": 3.263157894736842e-05, + "low_lr": 6.526315789473684e-07, + "step": 1838 + }, + { + "epoch": 4.833662064431295, + "high_lr": 3.263157894736842e-05, + "low_lr": 6.526315789473684e-07, + "step": 1838 + }, + { + "epoch": 4.83629191321499, + "grad_norm": 1.4909409284591675, + "learning_rate": 3.210526315789473e-05, + "loss": 1.2221, + "step": 1839 + }, + { + "epoch": 4.83629191321499, + "high_lr": 3.210526315789473e-05, + "low_lr": 6.421052631578948e-07, + "step": 1839 + }, + { + "epoch": 4.83629191321499, + "high_lr": 3.210526315789473e-05, + "low_lr": 6.421052631578948e-07, + "step": 1839 + }, + { + "epoch": 4.83629191321499, + "high_lr": 3.210526315789473e-05, + "low_lr": 6.421052631578948e-07, + "step": 1839 + }, + { + "epoch": 4.83629191321499, + "high_lr": 3.210526315789473e-05, + "low_lr": 6.421052631578948e-07, + "step": 1839 + }, + { + "epoch": 4.83629191321499, + "high_lr": 3.210526315789473e-05, + "low_lr": 6.421052631578948e-07, + "step": 1839 + }, + { + "epoch": 4.83629191321499, + "high_lr": 3.210526315789473e-05, + "low_lr": 6.421052631578948e-07, + "step": 1839 + }, + { + "epoch": 4.83629191321499, + "high_lr": 3.210526315789473e-05, + "low_lr": 6.421052631578948e-07, + "step": 1839 + }, + { + "epoch": 4.83629191321499, + "high_lr": 3.210526315789473e-05, + "low_lr": 6.421052631578948e-07, + "step": 1839 + }, + { + "epoch": 4.8389217619986855, + "grad_norm": 1.4390934705734253, + "learning_rate": 3.157894736842105e-05, + "loss": 1.2059, + "step": 1840 + }, + { + "epoch": 4.8389217619986855, + "high_lr": 3.157894736842105e-05, + "low_lr": 6.315789473684211e-07, + "step": 1840 + }, + { + "epoch": 4.8389217619986855, + "high_lr": 3.157894736842105e-05, + "low_lr": 6.315789473684211e-07, + "step": 1840 + }, + { + "epoch": 4.8389217619986855, + "high_lr": 3.157894736842105e-05, + "low_lr": 6.315789473684211e-07, + "step": 1840 + }, + { + "epoch": 4.8389217619986855, + "high_lr": 3.157894736842105e-05, + "low_lr": 6.315789473684211e-07, + "step": 1840 + }, + { + "epoch": 4.8389217619986855, + "high_lr": 3.157894736842105e-05, + "low_lr": 6.315789473684211e-07, + "step": 1840 + }, + { + "epoch": 4.8389217619986855, + "high_lr": 3.157894736842105e-05, + "low_lr": 6.315789473684211e-07, + "step": 1840 + }, + { + "epoch": 4.8389217619986855, + "high_lr": 3.157894736842105e-05, + "low_lr": 6.315789473684211e-07, + "step": 1840 + }, + { + "epoch": 4.8389217619986855, + "high_lr": 3.157894736842105e-05, + "low_lr": 6.315789473684211e-07, + "step": 1840 + }, + { + "epoch": 4.84155161078238, + "grad_norm": 1.493750810623169, + "learning_rate": 3.105263157894737e-05, + "loss": 1.2165, + "step": 1841 + }, + { + "epoch": 4.84155161078238, + "high_lr": 3.105263157894737e-05, + "low_lr": 6.210526315789474e-07, + "step": 1841 + }, + { + "epoch": 4.84155161078238, + "high_lr": 3.105263157894737e-05, + "low_lr": 6.210526315789474e-07, + "step": 1841 + }, + { + "epoch": 4.84155161078238, + "high_lr": 3.105263157894737e-05, + "low_lr": 6.210526315789474e-07, + "step": 1841 + }, + { + "epoch": 4.84155161078238, + "high_lr": 3.105263157894737e-05, + "low_lr": 6.210526315789474e-07, + "step": 1841 + }, + { + "epoch": 4.84155161078238, + "high_lr": 3.105263157894737e-05, + "low_lr": 6.210526315789474e-07, + "step": 1841 + }, + { + "epoch": 4.84155161078238, + "high_lr": 3.105263157894737e-05, + "low_lr": 6.210526315789474e-07, + "step": 1841 + }, + { + "epoch": 4.84155161078238, + "high_lr": 3.105263157894737e-05, + "low_lr": 6.210526315789474e-07, + "step": 1841 + }, + { + "epoch": 4.84155161078238, + "high_lr": 3.105263157894737e-05, + "low_lr": 6.210526315789474e-07, + "step": 1841 + }, + { + "epoch": 4.844181459566075, + "grad_norm": 1.6403719186782837, + "learning_rate": 3.0526315789473684e-05, + "loss": 1.2407, + "step": 1842 + }, + { + "epoch": 4.844181459566075, + "high_lr": 3.0526315789473684e-05, + "low_lr": 6.105263157894738e-07, + "step": 1842 + }, + { + "epoch": 4.844181459566075, + "high_lr": 3.0526315789473684e-05, + "low_lr": 6.105263157894738e-07, + "step": 1842 + }, + { + "epoch": 4.844181459566075, + "high_lr": 3.0526315789473684e-05, + "low_lr": 6.105263157894738e-07, + "step": 1842 + }, + { + "epoch": 4.844181459566075, + "high_lr": 3.0526315789473684e-05, + "low_lr": 6.105263157894738e-07, + "step": 1842 + }, + { + "epoch": 4.844181459566075, + "high_lr": 3.0526315789473684e-05, + "low_lr": 6.105263157894738e-07, + "step": 1842 + }, + { + "epoch": 4.844181459566075, + "high_lr": 3.0526315789473684e-05, + "low_lr": 6.105263157894738e-07, + "step": 1842 + }, + { + "epoch": 4.844181459566075, + "high_lr": 3.0526315789473684e-05, + "low_lr": 6.105263157894738e-07, + "step": 1842 + }, + { + "epoch": 4.844181459566075, + "high_lr": 3.0526315789473684e-05, + "low_lr": 6.105263157894738e-07, + "step": 1842 + }, + { + "epoch": 4.8468113083497695, + "grad_norm": 1.5683202743530273, + "learning_rate": 3e-05, + "loss": 1.194, + "step": 1843 + }, + { + "epoch": 4.8468113083497695, + "high_lr": 3e-05, + "low_lr": 6.000000000000001e-07, + "step": 1843 + }, + { + "epoch": 4.8468113083497695, + "high_lr": 3e-05, + "low_lr": 6.000000000000001e-07, + "step": 1843 + }, + { + "epoch": 4.8468113083497695, + "high_lr": 3e-05, + "low_lr": 6.000000000000001e-07, + "step": 1843 + }, + { + "epoch": 4.8468113083497695, + "high_lr": 3e-05, + "low_lr": 6.000000000000001e-07, + "step": 1843 + }, + { + "epoch": 4.8468113083497695, + "high_lr": 3e-05, + "low_lr": 6.000000000000001e-07, + "step": 1843 + }, + { + "epoch": 4.8468113083497695, + "high_lr": 3e-05, + "low_lr": 6.000000000000001e-07, + "step": 1843 + }, + { + "epoch": 4.8468113083497695, + "high_lr": 3e-05, + "low_lr": 6.000000000000001e-07, + "step": 1843 + }, + { + "epoch": 4.8468113083497695, + "high_lr": 3e-05, + "low_lr": 6.000000000000001e-07, + "step": 1843 + }, + { + "epoch": 4.849441157133465, + "grad_norm": 1.7279189825057983, + "learning_rate": 2.9473684210526317e-05, + "loss": 1.2562, + "step": 1844 + }, + { + "epoch": 4.849441157133465, + "high_lr": 2.9473684210526317e-05, + "low_lr": 5.894736842105263e-07, + "step": 1844 + }, + { + "epoch": 4.849441157133465, + "high_lr": 2.9473684210526317e-05, + "low_lr": 5.894736842105263e-07, + "step": 1844 + }, + { + "epoch": 4.849441157133465, + "high_lr": 2.9473684210526317e-05, + "low_lr": 5.894736842105263e-07, + "step": 1844 + }, + { + "epoch": 4.849441157133465, + "high_lr": 2.9473684210526317e-05, + "low_lr": 5.894736842105263e-07, + "step": 1844 + }, + { + "epoch": 4.849441157133465, + "high_lr": 2.9473684210526317e-05, + "low_lr": 5.894736842105263e-07, + "step": 1844 + }, + { + "epoch": 4.849441157133465, + "high_lr": 2.9473684210526317e-05, + "low_lr": 5.894736842105263e-07, + "step": 1844 + }, + { + "epoch": 4.849441157133465, + "high_lr": 2.9473684210526317e-05, + "low_lr": 5.894736842105263e-07, + "step": 1844 + }, + { + "epoch": 4.849441157133465, + "high_lr": 2.9473684210526317e-05, + "low_lr": 5.894736842105263e-07, + "step": 1844 + }, + { + "epoch": 4.85207100591716, + "grad_norm": 1.4668720960617065, + "learning_rate": 2.8947368421052634e-05, + "loss": 1.2107, + "step": 1845 + }, + { + "epoch": 4.85207100591716, + "high_lr": 2.8947368421052634e-05, + "low_lr": 5.789473684210526e-07, + "step": 1845 + }, + { + "epoch": 4.85207100591716, + "high_lr": 2.8947368421052634e-05, + "low_lr": 5.789473684210526e-07, + "step": 1845 + }, + { + "epoch": 4.85207100591716, + "high_lr": 2.8947368421052634e-05, + "low_lr": 5.789473684210526e-07, + "step": 1845 + }, + { + "epoch": 4.85207100591716, + "high_lr": 2.8947368421052634e-05, + "low_lr": 5.789473684210526e-07, + "step": 1845 + }, + { + "epoch": 4.85207100591716, + "high_lr": 2.8947368421052634e-05, + "low_lr": 5.789473684210526e-07, + "step": 1845 + }, + { + "epoch": 4.85207100591716, + "high_lr": 2.8947368421052634e-05, + "low_lr": 5.789473684210526e-07, + "step": 1845 + }, + { + "epoch": 4.85207100591716, + "high_lr": 2.8947368421052634e-05, + "low_lr": 5.789473684210526e-07, + "step": 1845 + }, + { + "epoch": 4.85207100591716, + "high_lr": 2.8947368421052634e-05, + "low_lr": 5.789473684210526e-07, + "step": 1845 + }, + { + "epoch": 4.854700854700854, + "grad_norm": 1.5402225255966187, + "learning_rate": 2.842105263157895e-05, + "loss": 1.2733, + "step": 1846 + }, + { + "epoch": 4.854700854700854, + "high_lr": 2.842105263157895e-05, + "low_lr": 5.68421052631579e-07, + "step": 1846 + }, + { + "epoch": 4.854700854700854, + "high_lr": 2.842105263157895e-05, + "low_lr": 5.68421052631579e-07, + "step": 1846 + }, + { + "epoch": 4.854700854700854, + "high_lr": 2.842105263157895e-05, + "low_lr": 5.68421052631579e-07, + "step": 1846 + }, + { + "epoch": 4.854700854700854, + "high_lr": 2.842105263157895e-05, + "low_lr": 5.68421052631579e-07, + "step": 1846 + }, + { + "epoch": 4.854700854700854, + "high_lr": 2.842105263157895e-05, + "low_lr": 5.68421052631579e-07, + "step": 1846 + }, + { + "epoch": 4.854700854700854, + "high_lr": 2.842105263157895e-05, + "low_lr": 5.68421052631579e-07, + "step": 1846 + }, + { + "epoch": 4.854700854700854, + "high_lr": 2.842105263157895e-05, + "low_lr": 5.68421052631579e-07, + "step": 1846 + }, + { + "epoch": 4.854700854700854, + "high_lr": 2.842105263157895e-05, + "low_lr": 5.68421052631579e-07, + "step": 1846 + }, + { + "epoch": 4.85733070348455, + "grad_norm": 1.7148109674453735, + "learning_rate": 2.7894736842105263e-05, + "loss": 1.1944, + "step": 1847 + }, + { + "epoch": 4.85733070348455, + "high_lr": 2.7894736842105263e-05, + "low_lr": 5.578947368421053e-07, + "step": 1847 + }, + { + "epoch": 4.85733070348455, + "high_lr": 2.7894736842105263e-05, + "low_lr": 5.578947368421053e-07, + "step": 1847 + }, + { + "epoch": 4.85733070348455, + "high_lr": 2.7894736842105263e-05, + "low_lr": 5.578947368421053e-07, + "step": 1847 + }, + { + "epoch": 4.85733070348455, + "high_lr": 2.7894736842105263e-05, + "low_lr": 5.578947368421053e-07, + "step": 1847 + }, + { + "epoch": 4.85733070348455, + "high_lr": 2.7894736842105263e-05, + "low_lr": 5.578947368421053e-07, + "step": 1847 + }, + { + "epoch": 4.85733070348455, + "high_lr": 2.7894736842105263e-05, + "low_lr": 5.578947368421053e-07, + "step": 1847 + }, + { + "epoch": 4.85733070348455, + "high_lr": 2.7894736842105263e-05, + "low_lr": 5.578947368421053e-07, + "step": 1847 + }, + { + "epoch": 4.85733070348455, + "high_lr": 2.7894736842105263e-05, + "low_lr": 5.578947368421053e-07, + "step": 1847 + }, + { + "epoch": 4.8599605522682445, + "grad_norm": 1.5167216062545776, + "learning_rate": 2.736842105263158e-05, + "loss": 1.2431, + "step": 1848 + }, + { + "epoch": 4.8599605522682445, + "high_lr": 2.736842105263158e-05, + "low_lr": 5.473684210526316e-07, + "step": 1848 + }, + { + "epoch": 4.8599605522682445, + "high_lr": 2.736842105263158e-05, + "low_lr": 5.473684210526316e-07, + "step": 1848 + }, + { + "epoch": 4.8599605522682445, + "high_lr": 2.736842105263158e-05, + "low_lr": 5.473684210526316e-07, + "step": 1848 + }, + { + "epoch": 4.8599605522682445, + "high_lr": 2.736842105263158e-05, + "low_lr": 5.473684210526316e-07, + "step": 1848 + }, + { + "epoch": 4.8599605522682445, + "high_lr": 2.736842105263158e-05, + "low_lr": 5.473684210526316e-07, + "step": 1848 + }, + { + "epoch": 4.8599605522682445, + "high_lr": 2.736842105263158e-05, + "low_lr": 5.473684210526316e-07, + "step": 1848 + }, + { + "epoch": 4.8599605522682445, + "high_lr": 2.736842105263158e-05, + "low_lr": 5.473684210526316e-07, + "step": 1848 + }, + { + "epoch": 4.8599605522682445, + "high_lr": 2.736842105263158e-05, + "low_lr": 5.473684210526316e-07, + "step": 1848 + }, + { + "epoch": 4.862590401051939, + "grad_norm": 1.5432519912719727, + "learning_rate": 2.6842105263157896e-05, + "loss": 1.1531, + "step": 1849 + }, + { + "epoch": 4.862590401051939, + "high_lr": 2.6842105263157896e-05, + "low_lr": 5.368421052631579e-07, + "step": 1849 + }, + { + "epoch": 4.862590401051939, + "high_lr": 2.6842105263157896e-05, + "low_lr": 5.368421052631579e-07, + "step": 1849 + }, + { + "epoch": 4.862590401051939, + "high_lr": 2.6842105263157896e-05, + "low_lr": 5.368421052631579e-07, + "step": 1849 + }, + { + "epoch": 4.862590401051939, + "high_lr": 2.6842105263157896e-05, + "low_lr": 5.368421052631579e-07, + "step": 1849 + }, + { + "epoch": 4.862590401051939, + "high_lr": 2.6842105263157896e-05, + "low_lr": 5.368421052631579e-07, + "step": 1849 + }, + { + "epoch": 4.862590401051939, + "high_lr": 2.6842105263157896e-05, + "low_lr": 5.368421052631579e-07, + "step": 1849 + }, + { + "epoch": 4.862590401051939, + "high_lr": 2.6842105263157896e-05, + "low_lr": 5.368421052631579e-07, + "step": 1849 + }, + { + "epoch": 4.862590401051939, + "high_lr": 2.6842105263157896e-05, + "low_lr": 5.368421052631579e-07, + "step": 1849 + }, + { + "epoch": 4.865220249835635, + "grad_norm": 1.5639528036117554, + "learning_rate": 2.631578947368421e-05, + "loss": 1.1994, + "step": 1850 + }, + { + "epoch": 4.865220249835635, + "high_lr": 2.631578947368421e-05, + "low_lr": 5.263157894736843e-07, + "step": 1850 + }, + { + "epoch": 4.865220249835635, + "high_lr": 2.631578947368421e-05, + "low_lr": 5.263157894736843e-07, + "step": 1850 + }, + { + "epoch": 4.865220249835635, + "high_lr": 2.631578947368421e-05, + "low_lr": 5.263157894736843e-07, + "step": 1850 + }, + { + "epoch": 4.865220249835635, + "high_lr": 2.631578947368421e-05, + "low_lr": 5.263157894736843e-07, + "step": 1850 + }, + { + "epoch": 4.865220249835635, + "high_lr": 2.631578947368421e-05, + "low_lr": 5.263157894736843e-07, + "step": 1850 + }, + { + "epoch": 4.865220249835635, + "high_lr": 2.631578947368421e-05, + "low_lr": 5.263157894736843e-07, + "step": 1850 + }, + { + "epoch": 4.865220249835635, + "high_lr": 2.631578947368421e-05, + "low_lr": 5.263157894736843e-07, + "step": 1850 + }, + { + "epoch": 4.865220249835635, + "high_lr": 2.631578947368421e-05, + "low_lr": 5.263157894736843e-07, + "step": 1850 + }, + { + "epoch": 4.867850098619329, + "grad_norm": 1.597714900970459, + "learning_rate": 2.5789473684210525e-05, + "loss": 1.1889, + "step": 1851 + }, + { + "epoch": 4.867850098619329, + "high_lr": 2.5789473684210525e-05, + "low_lr": 5.157894736842106e-07, + "step": 1851 + }, + { + "epoch": 4.867850098619329, + "high_lr": 2.5789473684210525e-05, + "low_lr": 5.157894736842106e-07, + "step": 1851 + }, + { + "epoch": 4.867850098619329, + "high_lr": 2.5789473684210525e-05, + "low_lr": 5.157894736842106e-07, + "step": 1851 + }, + { + "epoch": 4.867850098619329, + "high_lr": 2.5789473684210525e-05, + "low_lr": 5.157894736842106e-07, + "step": 1851 + }, + { + "epoch": 4.867850098619329, + "high_lr": 2.5789473684210525e-05, + "low_lr": 5.157894736842106e-07, + "step": 1851 + }, + { + "epoch": 4.867850098619329, + "high_lr": 2.5789473684210525e-05, + "low_lr": 5.157894736842106e-07, + "step": 1851 + }, + { + "epoch": 4.867850098619329, + "high_lr": 2.5789473684210525e-05, + "low_lr": 5.157894736842106e-07, + "step": 1851 + }, + { + "epoch": 4.867850098619329, + "high_lr": 2.5789473684210525e-05, + "low_lr": 5.157894736842106e-07, + "step": 1851 + }, + { + "epoch": 4.870479947403024, + "grad_norm": 1.6463160514831543, + "learning_rate": 2.526315789473684e-05, + "loss": 1.2115, + "step": 1852 + }, + { + "epoch": 4.870479947403024, + "high_lr": 2.526315789473684e-05, + "low_lr": 5.052631578947369e-07, + "step": 1852 + }, + { + "epoch": 4.870479947403024, + "high_lr": 2.526315789473684e-05, + "low_lr": 5.052631578947369e-07, + "step": 1852 + }, + { + "epoch": 4.870479947403024, + "high_lr": 2.526315789473684e-05, + "low_lr": 5.052631578947369e-07, + "step": 1852 + }, + { + "epoch": 4.870479947403024, + "high_lr": 2.526315789473684e-05, + "low_lr": 5.052631578947369e-07, + "step": 1852 + }, + { + "epoch": 4.870479947403024, + "high_lr": 2.526315789473684e-05, + "low_lr": 5.052631578947369e-07, + "step": 1852 + }, + { + "epoch": 4.870479947403024, + "high_lr": 2.526315789473684e-05, + "low_lr": 5.052631578947369e-07, + "step": 1852 + }, + { + "epoch": 4.870479947403024, + "high_lr": 2.526315789473684e-05, + "low_lr": 5.052631578947369e-07, + "step": 1852 + }, + { + "epoch": 4.870479947403024, + "high_lr": 2.526315789473684e-05, + "low_lr": 5.052631578947369e-07, + "step": 1852 + }, + { + "epoch": 4.87310979618672, + "grad_norm": 1.67196524143219, + "learning_rate": 2.4736842105263158e-05, + "loss": 1.2499, + "step": 1853 + }, + { + "epoch": 4.87310979618672, + "high_lr": 2.4736842105263158e-05, + "low_lr": 4.947368421052632e-07, + "step": 1853 + }, + { + "epoch": 4.87310979618672, + "high_lr": 2.4736842105263158e-05, + "low_lr": 4.947368421052632e-07, + "step": 1853 + }, + { + "epoch": 4.87310979618672, + "high_lr": 2.4736842105263158e-05, + "low_lr": 4.947368421052632e-07, + "step": 1853 + }, + { + "epoch": 4.87310979618672, + "high_lr": 2.4736842105263158e-05, + "low_lr": 4.947368421052632e-07, + "step": 1853 + }, + { + "epoch": 4.87310979618672, + "high_lr": 2.4736842105263158e-05, + "low_lr": 4.947368421052632e-07, + "step": 1853 + }, + { + "epoch": 4.87310979618672, + "high_lr": 2.4736842105263158e-05, + "low_lr": 4.947368421052632e-07, + "step": 1853 + }, + { + "epoch": 4.87310979618672, + "high_lr": 2.4736842105263158e-05, + "low_lr": 4.947368421052632e-07, + "step": 1853 + }, + { + "epoch": 4.87310979618672, + "high_lr": 2.4736842105263158e-05, + "low_lr": 4.947368421052632e-07, + "step": 1853 + }, + { + "epoch": 4.875739644970414, + "grad_norm": 1.541540265083313, + "learning_rate": 2.4210526315789474e-05, + "loss": 1.1752, + "step": 1854 + }, + { + "epoch": 4.875739644970414, + "high_lr": 2.4210526315789474e-05, + "low_lr": 4.842105263157895e-07, + "step": 1854 + }, + { + "epoch": 4.875739644970414, + "high_lr": 2.4210526315789474e-05, + "low_lr": 4.842105263157895e-07, + "step": 1854 + }, + { + "epoch": 4.875739644970414, + "high_lr": 2.4210526315789474e-05, + "low_lr": 4.842105263157895e-07, + "step": 1854 + }, + { + "epoch": 4.875739644970414, + "high_lr": 2.4210526315789474e-05, + "low_lr": 4.842105263157895e-07, + "step": 1854 + }, + { + "epoch": 4.875739644970414, + "high_lr": 2.4210526315789474e-05, + "low_lr": 4.842105263157895e-07, + "step": 1854 + }, + { + "epoch": 4.875739644970414, + "high_lr": 2.4210526315789474e-05, + "low_lr": 4.842105263157895e-07, + "step": 1854 + }, + { + "epoch": 4.875739644970414, + "high_lr": 2.4210526315789474e-05, + "low_lr": 4.842105263157895e-07, + "step": 1854 + }, + { + "epoch": 4.875739644970414, + "high_lr": 2.4210526315789474e-05, + "low_lr": 4.842105263157895e-07, + "step": 1854 + }, + { + "epoch": 4.878369493754109, + "grad_norm": 1.6568161249160767, + "learning_rate": 2.368421052631579e-05, + "loss": 1.1952, + "step": 1855 + }, + { + "epoch": 4.878369493754109, + "high_lr": 2.368421052631579e-05, + "low_lr": 4.7368421052631585e-07, + "step": 1855 + }, + { + "epoch": 4.878369493754109, + "high_lr": 2.368421052631579e-05, + "low_lr": 4.7368421052631585e-07, + "step": 1855 + }, + { + "epoch": 4.878369493754109, + "high_lr": 2.368421052631579e-05, + "low_lr": 4.7368421052631585e-07, + "step": 1855 + }, + { + "epoch": 4.878369493754109, + "high_lr": 2.368421052631579e-05, + "low_lr": 4.7368421052631585e-07, + "step": 1855 + }, + { + "epoch": 4.878369493754109, + "high_lr": 2.368421052631579e-05, + "low_lr": 4.7368421052631585e-07, + "step": 1855 + }, + { + "epoch": 4.878369493754109, + "high_lr": 2.368421052631579e-05, + "low_lr": 4.7368421052631585e-07, + "step": 1855 + }, + { + "epoch": 4.878369493754109, + "high_lr": 2.368421052631579e-05, + "low_lr": 4.7368421052631585e-07, + "step": 1855 + }, + { + "epoch": 4.878369493754109, + "high_lr": 2.368421052631579e-05, + "low_lr": 4.7368421052631585e-07, + "step": 1855 + }, + { + "epoch": 4.880999342537804, + "grad_norm": 1.5552313327789307, + "learning_rate": 2.3157894736842107e-05, + "loss": 1.19, + "step": 1856 + }, + { + "epoch": 4.880999342537804, + "high_lr": 2.3157894736842107e-05, + "low_lr": 4.631578947368422e-07, + "step": 1856 + }, + { + "epoch": 4.880999342537804, + "high_lr": 2.3157894736842107e-05, + "low_lr": 4.631578947368422e-07, + "step": 1856 + }, + { + "epoch": 4.880999342537804, + "high_lr": 2.3157894736842107e-05, + "low_lr": 4.631578947368422e-07, + "step": 1856 + }, + { + "epoch": 4.880999342537804, + "high_lr": 2.3157894736842107e-05, + "low_lr": 4.631578947368422e-07, + "step": 1856 + }, + { + "epoch": 4.880999342537804, + "high_lr": 2.3157894736842107e-05, + "low_lr": 4.631578947368422e-07, + "step": 1856 + }, + { + "epoch": 4.880999342537804, + "high_lr": 2.3157894736842107e-05, + "low_lr": 4.631578947368422e-07, + "step": 1856 + }, + { + "epoch": 4.880999342537804, + "high_lr": 2.3157894736842107e-05, + "low_lr": 4.631578947368422e-07, + "step": 1856 + }, + { + "epoch": 4.880999342537804, + "high_lr": 2.3157894736842107e-05, + "low_lr": 4.631578947368422e-07, + "step": 1856 + }, + { + "epoch": 4.883629191321499, + "grad_norm": 1.3749786615371704, + "learning_rate": 2.2631578947368423e-05, + "loss": 1.1782, + "step": 1857 + }, + { + "epoch": 4.883629191321499, + "high_lr": 2.2631578947368423e-05, + "low_lr": 4.526315789473685e-07, + "step": 1857 + }, + { + "epoch": 4.883629191321499, + "high_lr": 2.2631578947368423e-05, + "low_lr": 4.526315789473685e-07, + "step": 1857 + }, + { + "epoch": 4.883629191321499, + "high_lr": 2.2631578947368423e-05, + "low_lr": 4.526315789473685e-07, + "step": 1857 + }, + { + "epoch": 4.883629191321499, + "high_lr": 2.2631578947368423e-05, + "low_lr": 4.526315789473685e-07, + "step": 1857 + }, + { + "epoch": 4.883629191321499, + "high_lr": 2.2631578947368423e-05, + "low_lr": 4.526315789473685e-07, + "step": 1857 + }, + { + "epoch": 4.883629191321499, + "high_lr": 2.2631578947368423e-05, + "low_lr": 4.526315789473685e-07, + "step": 1857 + }, + { + "epoch": 4.883629191321499, + "high_lr": 2.2631578947368423e-05, + "low_lr": 4.526315789473685e-07, + "step": 1857 + }, + { + "epoch": 4.883629191321499, + "high_lr": 2.2631578947368423e-05, + "low_lr": 4.526315789473685e-07, + "step": 1857 + }, + { + "epoch": 4.886259040105194, + "grad_norm": 1.627353310585022, + "learning_rate": 2.2105263157894736e-05, + "loss": 1.2503, + "step": 1858 + }, + { + "epoch": 4.886259040105194, + "high_lr": 2.2105263157894736e-05, + "low_lr": 4.421052631578947e-07, + "step": 1858 + }, + { + "epoch": 4.886259040105194, + "high_lr": 2.2105263157894736e-05, + "low_lr": 4.421052631578947e-07, + "step": 1858 + }, + { + "epoch": 4.886259040105194, + "high_lr": 2.2105263157894736e-05, + "low_lr": 4.421052631578947e-07, + "step": 1858 + }, + { + "epoch": 4.886259040105194, + "high_lr": 2.2105263157894736e-05, + "low_lr": 4.421052631578947e-07, + "step": 1858 + }, + { + "epoch": 4.886259040105194, + "high_lr": 2.2105263157894736e-05, + "low_lr": 4.421052631578947e-07, + "step": 1858 + }, + { + "epoch": 4.886259040105194, + "high_lr": 2.2105263157894736e-05, + "low_lr": 4.421052631578947e-07, + "step": 1858 + }, + { + "epoch": 4.886259040105194, + "high_lr": 2.2105263157894736e-05, + "low_lr": 4.421052631578947e-07, + "step": 1858 + }, + { + "epoch": 4.886259040105194, + "high_lr": 2.2105263157894736e-05, + "low_lr": 4.421052631578947e-07, + "step": 1858 + }, + { + "epoch": 4.888888888888889, + "grad_norm": 1.7379108667373657, + "learning_rate": 2.1578947368421053e-05, + "loss": 1.2567, + "step": 1859 + }, + { + "epoch": 4.888888888888889, + "high_lr": 2.1578947368421053e-05, + "low_lr": 4.3157894736842105e-07, + "step": 1859 + }, + { + "epoch": 4.888888888888889, + "high_lr": 2.1578947368421053e-05, + "low_lr": 4.3157894736842105e-07, + "step": 1859 + }, + { + "epoch": 4.888888888888889, + "high_lr": 2.1578947368421053e-05, + "low_lr": 4.3157894736842105e-07, + "step": 1859 + }, + { + "epoch": 4.888888888888889, + "high_lr": 2.1578947368421053e-05, + "low_lr": 4.3157894736842105e-07, + "step": 1859 + }, + { + "epoch": 4.888888888888889, + "high_lr": 2.1578947368421053e-05, + "low_lr": 4.3157894736842105e-07, + "step": 1859 + }, + { + "epoch": 4.888888888888889, + "high_lr": 2.1578947368421053e-05, + "low_lr": 4.3157894736842105e-07, + "step": 1859 + }, + { + "epoch": 4.888888888888889, + "high_lr": 2.1578947368421053e-05, + "low_lr": 4.3157894736842105e-07, + "step": 1859 + }, + { + "epoch": 4.888888888888889, + "high_lr": 2.1578947368421053e-05, + "low_lr": 4.3157894736842105e-07, + "step": 1859 + }, + { + "epoch": 4.891518737672584, + "grad_norm": 1.6780471801757812, + "learning_rate": 2.105263157894737e-05, + "loss": 1.2238, + "step": 1860 + }, + { + "epoch": 4.891518737672584, + "high_lr": 2.105263157894737e-05, + "low_lr": 4.210526315789474e-07, + "step": 1860 + }, + { + "epoch": 4.891518737672584, + "high_lr": 2.105263157894737e-05, + "low_lr": 4.210526315789474e-07, + "step": 1860 + }, + { + "epoch": 4.891518737672584, + "high_lr": 2.105263157894737e-05, + "low_lr": 4.210526315789474e-07, + "step": 1860 + }, + { + "epoch": 4.891518737672584, + "high_lr": 2.105263157894737e-05, + "low_lr": 4.210526315789474e-07, + "step": 1860 + }, + { + "epoch": 4.891518737672584, + "high_lr": 2.105263157894737e-05, + "low_lr": 4.210526315789474e-07, + "step": 1860 + }, + { + "epoch": 4.891518737672584, + "high_lr": 2.105263157894737e-05, + "low_lr": 4.210526315789474e-07, + "step": 1860 + }, + { + "epoch": 4.891518737672584, + "high_lr": 2.105263157894737e-05, + "low_lr": 4.210526315789474e-07, + "step": 1860 + }, + { + "epoch": 4.891518737672584, + "high_lr": 2.105263157894737e-05, + "low_lr": 4.210526315789474e-07, + "step": 1860 + }, + { + "epoch": 4.894148586456279, + "grad_norm": 1.7077248096466064, + "learning_rate": 2.0526315789473685e-05, + "loss": 1.2527, + "step": 1861 + }, + { + "epoch": 4.894148586456279, + "high_lr": 2.0526315789473685e-05, + "low_lr": 4.105263157894737e-07, + "step": 1861 + }, + { + "epoch": 4.894148586456279, + "high_lr": 2.0526315789473685e-05, + "low_lr": 4.105263157894737e-07, + "step": 1861 + }, + { + "epoch": 4.894148586456279, + "high_lr": 2.0526315789473685e-05, + "low_lr": 4.105263157894737e-07, + "step": 1861 + }, + { + "epoch": 4.894148586456279, + "high_lr": 2.0526315789473685e-05, + "low_lr": 4.105263157894737e-07, + "step": 1861 + }, + { + "epoch": 4.894148586456279, + "high_lr": 2.0526315789473685e-05, + "low_lr": 4.105263157894737e-07, + "step": 1861 + }, + { + "epoch": 4.894148586456279, + "high_lr": 2.0526315789473685e-05, + "low_lr": 4.105263157894737e-07, + "step": 1861 + }, + { + "epoch": 4.894148586456279, + "high_lr": 2.0526315789473685e-05, + "low_lr": 4.105263157894737e-07, + "step": 1861 + }, + { + "epoch": 4.894148586456279, + "high_lr": 2.0526315789473685e-05, + "low_lr": 4.105263157894737e-07, + "step": 1861 + }, + { + "epoch": 4.896778435239973, + "grad_norm": 1.4945363998413086, + "learning_rate": 2e-05, + "loss": 1.2072, + "step": 1862 + }, + { + "epoch": 4.896778435239973, + "high_lr": 2e-05, + "low_lr": 4.0000000000000003e-07, + "step": 1862 + }, + { + "epoch": 4.896778435239973, + "high_lr": 2e-05, + "low_lr": 4.0000000000000003e-07, + "step": 1862 + }, + { + "epoch": 4.896778435239973, + "high_lr": 2e-05, + "low_lr": 4.0000000000000003e-07, + "step": 1862 + }, + { + "epoch": 4.896778435239973, + "high_lr": 2e-05, + "low_lr": 4.0000000000000003e-07, + "step": 1862 + }, + { + "epoch": 4.896778435239973, + "high_lr": 2e-05, + "low_lr": 4.0000000000000003e-07, + "step": 1862 + }, + { + "epoch": 4.896778435239973, + "high_lr": 2e-05, + "low_lr": 4.0000000000000003e-07, + "step": 1862 + }, + { + "epoch": 4.896778435239973, + "high_lr": 2e-05, + "low_lr": 4.0000000000000003e-07, + "step": 1862 + }, + { + "epoch": 4.896778435239973, + "high_lr": 2e-05, + "low_lr": 4.0000000000000003e-07, + "step": 1862 + }, + { + "epoch": 4.899408284023669, + "grad_norm": 1.5202914476394653, + "learning_rate": 1.9473684210526318e-05, + "loss": 1.2008, + "step": 1863 + }, + { + "epoch": 4.899408284023669, + "high_lr": 1.9473684210526318e-05, + "low_lr": 3.8947368421052636e-07, + "step": 1863 + }, + { + "epoch": 4.899408284023669, + "high_lr": 1.9473684210526318e-05, + "low_lr": 3.8947368421052636e-07, + "step": 1863 + }, + { + "epoch": 4.899408284023669, + "high_lr": 1.9473684210526318e-05, + "low_lr": 3.8947368421052636e-07, + "step": 1863 + }, + { + "epoch": 4.899408284023669, + "high_lr": 1.9473684210526318e-05, + "low_lr": 3.8947368421052636e-07, + "step": 1863 + }, + { + "epoch": 4.899408284023669, + "high_lr": 1.9473684210526318e-05, + "low_lr": 3.8947368421052636e-07, + "step": 1863 + }, + { + "epoch": 4.899408284023669, + "high_lr": 1.9473684210526318e-05, + "low_lr": 3.8947368421052636e-07, + "step": 1863 + }, + { + "epoch": 4.899408284023669, + "high_lr": 1.9473684210526318e-05, + "low_lr": 3.8947368421052636e-07, + "step": 1863 + }, + { + "epoch": 4.899408284023669, + "high_lr": 1.9473684210526318e-05, + "low_lr": 3.8947368421052636e-07, + "step": 1863 + }, + { + "epoch": 4.9020381328073634, + "grad_norm": 1.537229299545288, + "learning_rate": 1.8947368421052634e-05, + "loss": 1.2101, + "step": 1864 + }, + { + "epoch": 4.9020381328073634, + "high_lr": 1.8947368421052634e-05, + "low_lr": 3.789473684210527e-07, + "step": 1864 + }, + { + "epoch": 4.9020381328073634, + "high_lr": 1.8947368421052634e-05, + "low_lr": 3.789473684210527e-07, + "step": 1864 + }, + { + "epoch": 4.9020381328073634, + "high_lr": 1.8947368421052634e-05, + "low_lr": 3.789473684210527e-07, + "step": 1864 + }, + { + "epoch": 4.9020381328073634, + "high_lr": 1.8947368421052634e-05, + "low_lr": 3.789473684210527e-07, + "step": 1864 + }, + { + "epoch": 4.9020381328073634, + "high_lr": 1.8947368421052634e-05, + "low_lr": 3.789473684210527e-07, + "step": 1864 + }, + { + "epoch": 4.9020381328073634, + "high_lr": 1.8947368421052634e-05, + "low_lr": 3.789473684210527e-07, + "step": 1864 + }, + { + "epoch": 4.9020381328073634, + "high_lr": 1.8947368421052634e-05, + "low_lr": 3.789473684210527e-07, + "step": 1864 + }, + { + "epoch": 4.9020381328073634, + "high_lr": 1.8947368421052634e-05, + "low_lr": 3.789473684210527e-07, + "step": 1864 + }, + { + "epoch": 4.904667981591059, + "grad_norm": 1.3996493816375732, + "learning_rate": 1.8421052631578947e-05, + "loss": 1.1862, + "step": 1865 + }, + { + "epoch": 4.904667981591059, + "high_lr": 1.8421052631578947e-05, + "low_lr": 3.6842105263157896e-07, + "step": 1865 + }, + { + "epoch": 4.904667981591059, + "high_lr": 1.8421052631578947e-05, + "low_lr": 3.6842105263157896e-07, + "step": 1865 + }, + { + "epoch": 4.904667981591059, + "high_lr": 1.8421052631578947e-05, + "low_lr": 3.6842105263157896e-07, + "step": 1865 + }, + { + "epoch": 4.904667981591059, + "high_lr": 1.8421052631578947e-05, + "low_lr": 3.6842105263157896e-07, + "step": 1865 + }, + { + "epoch": 4.904667981591059, + "high_lr": 1.8421052631578947e-05, + "low_lr": 3.6842105263157896e-07, + "step": 1865 + }, + { + "epoch": 4.904667981591059, + "high_lr": 1.8421052631578947e-05, + "low_lr": 3.6842105263157896e-07, + "step": 1865 + }, + { + "epoch": 4.904667981591059, + "high_lr": 1.8421052631578947e-05, + "low_lr": 3.6842105263157896e-07, + "step": 1865 + }, + { + "epoch": 4.904667981591059, + "high_lr": 1.8421052631578947e-05, + "low_lr": 3.6842105263157896e-07, + "step": 1865 + }, + { + "epoch": 4.907297830374754, + "grad_norm": 1.4659632444381714, + "learning_rate": 1.7894736842105264e-05, + "loss": 1.1872, + "step": 1866 + }, + { + "epoch": 4.907297830374754, + "high_lr": 1.7894736842105264e-05, + "low_lr": 3.578947368421053e-07, + "step": 1866 + }, + { + "epoch": 4.907297830374754, + "high_lr": 1.7894736842105264e-05, + "low_lr": 3.578947368421053e-07, + "step": 1866 + }, + { + "epoch": 4.907297830374754, + "high_lr": 1.7894736842105264e-05, + "low_lr": 3.578947368421053e-07, + "step": 1866 + }, + { + "epoch": 4.907297830374754, + "high_lr": 1.7894736842105264e-05, + "low_lr": 3.578947368421053e-07, + "step": 1866 + }, + { + "epoch": 4.907297830374754, + "high_lr": 1.7894736842105264e-05, + "low_lr": 3.578947368421053e-07, + "step": 1866 + }, + { + "epoch": 4.907297830374754, + "high_lr": 1.7894736842105264e-05, + "low_lr": 3.578947368421053e-07, + "step": 1866 + }, + { + "epoch": 4.907297830374754, + "high_lr": 1.7894736842105264e-05, + "low_lr": 3.578947368421053e-07, + "step": 1866 + }, + { + "epoch": 4.907297830374754, + "high_lr": 1.7894736842105264e-05, + "low_lr": 3.578947368421053e-07, + "step": 1866 + }, + { + "epoch": 4.909927679158448, + "grad_norm": 1.7892240285873413, + "learning_rate": 1.736842105263158e-05, + "loss": 1.1989, + "step": 1867 + }, + { + "epoch": 4.909927679158448, + "high_lr": 1.736842105263158e-05, + "low_lr": 3.4736842105263157e-07, + "step": 1867 + }, + { + "epoch": 4.909927679158448, + "high_lr": 1.736842105263158e-05, + "low_lr": 3.4736842105263157e-07, + "step": 1867 + }, + { + "epoch": 4.909927679158448, + "high_lr": 1.736842105263158e-05, + "low_lr": 3.4736842105263157e-07, + "step": 1867 + }, + { + "epoch": 4.909927679158448, + "high_lr": 1.736842105263158e-05, + "low_lr": 3.4736842105263157e-07, + "step": 1867 + }, + { + "epoch": 4.909927679158448, + "high_lr": 1.736842105263158e-05, + "low_lr": 3.4736842105263157e-07, + "step": 1867 + }, + { + "epoch": 4.909927679158448, + "high_lr": 1.736842105263158e-05, + "low_lr": 3.4736842105263157e-07, + "step": 1867 + }, + { + "epoch": 4.909927679158448, + "high_lr": 1.736842105263158e-05, + "low_lr": 3.4736842105263157e-07, + "step": 1867 + }, + { + "epoch": 4.909927679158448, + "high_lr": 1.736842105263158e-05, + "low_lr": 3.4736842105263157e-07, + "step": 1867 + }, + { + "epoch": 4.912557527942143, + "grad_norm": 1.5503710508346558, + "learning_rate": 1.6842105263157893e-05, + "loss": 1.1529, + "step": 1868 + }, + { + "epoch": 4.912557527942143, + "high_lr": 1.6842105263157893e-05, + "low_lr": 3.368421052631579e-07, + "step": 1868 + }, + { + "epoch": 4.912557527942143, + "high_lr": 1.6842105263157893e-05, + "low_lr": 3.368421052631579e-07, + "step": 1868 + }, + { + "epoch": 4.912557527942143, + "high_lr": 1.6842105263157893e-05, + "low_lr": 3.368421052631579e-07, + "step": 1868 + }, + { + "epoch": 4.912557527942143, + "high_lr": 1.6842105263157893e-05, + "low_lr": 3.368421052631579e-07, + "step": 1868 + }, + { + "epoch": 4.912557527942143, + "high_lr": 1.6842105263157893e-05, + "low_lr": 3.368421052631579e-07, + "step": 1868 + }, + { + "epoch": 4.912557527942143, + "high_lr": 1.6842105263157893e-05, + "low_lr": 3.368421052631579e-07, + "step": 1868 + }, + { + "epoch": 4.912557527942143, + "high_lr": 1.6842105263157893e-05, + "low_lr": 3.368421052631579e-07, + "step": 1868 + }, + { + "epoch": 4.912557527942143, + "high_lr": 1.6842105263157893e-05, + "low_lr": 3.368421052631579e-07, + "step": 1868 + }, + { + "epoch": 4.9151873767258385, + "grad_norm": 1.544754981994629, + "learning_rate": 1.631578947368421e-05, + "loss": 1.1871, + "step": 1869 + }, + { + "epoch": 4.9151873767258385, + "high_lr": 1.631578947368421e-05, + "low_lr": 3.263157894736842e-07, + "step": 1869 + }, + { + "epoch": 4.9151873767258385, + "high_lr": 1.631578947368421e-05, + "low_lr": 3.263157894736842e-07, + "step": 1869 + }, + { + "epoch": 4.9151873767258385, + "high_lr": 1.631578947368421e-05, + "low_lr": 3.263157894736842e-07, + "step": 1869 + }, + { + "epoch": 4.9151873767258385, + "high_lr": 1.631578947368421e-05, + "low_lr": 3.263157894736842e-07, + "step": 1869 + }, + { + "epoch": 4.9151873767258385, + "high_lr": 1.631578947368421e-05, + "low_lr": 3.263157894736842e-07, + "step": 1869 + }, + { + "epoch": 4.9151873767258385, + "high_lr": 1.631578947368421e-05, + "low_lr": 3.263157894736842e-07, + "step": 1869 + }, + { + "epoch": 4.9151873767258385, + "high_lr": 1.631578947368421e-05, + "low_lr": 3.263157894736842e-07, + "step": 1869 + }, + { + "epoch": 4.9151873767258385, + "high_lr": 1.631578947368421e-05, + "low_lr": 3.263157894736842e-07, + "step": 1869 + }, + { + "epoch": 4.917817225509533, + "grad_norm": 1.4125280380249023, + "learning_rate": 1.5789473684210526e-05, + "loss": 1.2186, + "step": 1870 + }, + { + "epoch": 4.917817225509533, + "high_lr": 1.5789473684210526e-05, + "low_lr": 3.1578947368421055e-07, + "step": 1870 + }, + { + "epoch": 4.917817225509533, + "high_lr": 1.5789473684210526e-05, + "low_lr": 3.1578947368421055e-07, + "step": 1870 + }, + { + "epoch": 4.917817225509533, + "high_lr": 1.5789473684210526e-05, + "low_lr": 3.1578947368421055e-07, + "step": 1870 + }, + { + "epoch": 4.917817225509533, + "high_lr": 1.5789473684210526e-05, + "low_lr": 3.1578947368421055e-07, + "step": 1870 + }, + { + "epoch": 4.917817225509533, + "high_lr": 1.5789473684210526e-05, + "low_lr": 3.1578947368421055e-07, + "step": 1870 + }, + { + "epoch": 4.917817225509533, + "high_lr": 1.5789473684210526e-05, + "low_lr": 3.1578947368421055e-07, + "step": 1870 + }, + { + "epoch": 4.917817225509533, + "high_lr": 1.5789473684210526e-05, + "low_lr": 3.1578947368421055e-07, + "step": 1870 + }, + { + "epoch": 4.917817225509533, + "high_lr": 1.5789473684210526e-05, + "low_lr": 3.1578947368421055e-07, + "step": 1870 + }, + { + "epoch": 4.920447074293228, + "grad_norm": 1.5392786264419556, + "learning_rate": 1.5263157894736842e-05, + "loss": 1.2449, + "step": 1871 + }, + { + "epoch": 4.920447074293228, + "high_lr": 1.5263157894736842e-05, + "low_lr": 3.052631578947369e-07, + "step": 1871 + }, + { + "epoch": 4.920447074293228, + "high_lr": 1.5263157894736842e-05, + "low_lr": 3.052631578947369e-07, + "step": 1871 + }, + { + "epoch": 4.920447074293228, + "high_lr": 1.5263157894736842e-05, + "low_lr": 3.052631578947369e-07, + "step": 1871 + }, + { + "epoch": 4.920447074293228, + "high_lr": 1.5263157894736842e-05, + "low_lr": 3.052631578947369e-07, + "step": 1871 + }, + { + "epoch": 4.920447074293228, + "high_lr": 1.5263157894736842e-05, + "low_lr": 3.052631578947369e-07, + "step": 1871 + }, + { + "epoch": 4.920447074293228, + "high_lr": 1.5263157894736842e-05, + "low_lr": 3.052631578947369e-07, + "step": 1871 + }, + { + "epoch": 4.920447074293228, + "high_lr": 1.5263157894736842e-05, + "low_lr": 3.052631578947369e-07, + "step": 1871 + }, + { + "epoch": 4.920447074293228, + "high_lr": 1.5263157894736842e-05, + "low_lr": 3.052631578947369e-07, + "step": 1871 + }, + { + "epoch": 4.923076923076923, + "grad_norm": 1.5249948501586914, + "learning_rate": 1.4736842105263159e-05, + "loss": 1.1833, + "step": 1872 + }, + { + "epoch": 4.923076923076923, + "high_lr": 1.4736842105263159e-05, + "low_lr": 2.9473684210526315e-07, + "step": 1872 + }, + { + "epoch": 4.923076923076923, + "high_lr": 1.4736842105263159e-05, + "low_lr": 2.9473684210526315e-07, + "step": 1872 + }, + { + "epoch": 4.923076923076923, + "high_lr": 1.4736842105263159e-05, + "low_lr": 2.9473684210526315e-07, + "step": 1872 + }, + { + "epoch": 4.923076923076923, + "high_lr": 1.4736842105263159e-05, + "low_lr": 2.9473684210526315e-07, + "step": 1872 + }, + { + "epoch": 4.923076923076923, + "high_lr": 1.4736842105263159e-05, + "low_lr": 2.9473684210526315e-07, + "step": 1872 + }, + { + "epoch": 4.923076923076923, + "high_lr": 1.4736842105263159e-05, + "low_lr": 2.9473684210526315e-07, + "step": 1872 + }, + { + "epoch": 4.923076923076923, + "high_lr": 1.4736842105263159e-05, + "low_lr": 2.9473684210526315e-07, + "step": 1872 + }, + { + "epoch": 4.923076923076923, + "high_lr": 1.4736842105263159e-05, + "low_lr": 2.9473684210526315e-07, + "step": 1872 + }, + { + "epoch": 4.925706771860618, + "grad_norm": 1.487542986869812, + "learning_rate": 1.4210526315789475e-05, + "loss": 1.2305, + "step": 1873 + }, + { + "epoch": 4.925706771860618, + "high_lr": 1.4210526315789475e-05, + "low_lr": 2.842105263157895e-07, + "step": 1873 + }, + { + "epoch": 4.925706771860618, + "high_lr": 1.4210526315789475e-05, + "low_lr": 2.842105263157895e-07, + "step": 1873 + }, + { + "epoch": 4.925706771860618, + "high_lr": 1.4210526315789475e-05, + "low_lr": 2.842105263157895e-07, + "step": 1873 + }, + { + "epoch": 4.925706771860618, + "high_lr": 1.4210526315789475e-05, + "low_lr": 2.842105263157895e-07, + "step": 1873 + }, + { + "epoch": 4.925706771860618, + "high_lr": 1.4210526315789475e-05, + "low_lr": 2.842105263157895e-07, + "step": 1873 + }, + { + "epoch": 4.925706771860618, + "high_lr": 1.4210526315789475e-05, + "low_lr": 2.842105263157895e-07, + "step": 1873 + }, + { + "epoch": 4.925706771860618, + "high_lr": 1.4210526315789475e-05, + "low_lr": 2.842105263157895e-07, + "step": 1873 + }, + { + "epoch": 4.925706771860618, + "high_lr": 1.4210526315789475e-05, + "low_lr": 2.842105263157895e-07, + "step": 1873 + }, + { + "epoch": 4.928336620644313, + "grad_norm": 1.5068485736846924, + "learning_rate": 1.368421052631579e-05, + "loss": 1.1939, + "step": 1874 + }, + { + "epoch": 4.928336620644313, + "high_lr": 1.368421052631579e-05, + "low_lr": 2.736842105263158e-07, + "step": 1874 + }, + { + "epoch": 4.928336620644313, + "high_lr": 1.368421052631579e-05, + "low_lr": 2.736842105263158e-07, + "step": 1874 + }, + { + "epoch": 4.928336620644313, + "high_lr": 1.368421052631579e-05, + "low_lr": 2.736842105263158e-07, + "step": 1874 + }, + { + "epoch": 4.928336620644313, + "high_lr": 1.368421052631579e-05, + "low_lr": 2.736842105263158e-07, + "step": 1874 + }, + { + "epoch": 4.928336620644313, + "high_lr": 1.368421052631579e-05, + "low_lr": 2.736842105263158e-07, + "step": 1874 + }, + { + "epoch": 4.928336620644313, + "high_lr": 1.368421052631579e-05, + "low_lr": 2.736842105263158e-07, + "step": 1874 + }, + { + "epoch": 4.928336620644313, + "high_lr": 1.368421052631579e-05, + "low_lr": 2.736842105263158e-07, + "step": 1874 + }, + { + "epoch": 4.928336620644313, + "high_lr": 1.368421052631579e-05, + "low_lr": 2.736842105263158e-07, + "step": 1874 + }, + { + "epoch": 4.930966469428008, + "grad_norm": 1.566163182258606, + "learning_rate": 1.3157894736842104e-05, + "loss": 1.2882, + "step": 1875 + }, + { + "epoch": 4.930966469428008, + "high_lr": 1.3157894736842104e-05, + "low_lr": 2.6315789473684213e-07, + "step": 1875 + }, + { + "epoch": 4.930966469428008, + "high_lr": 1.3157894736842104e-05, + "low_lr": 2.6315789473684213e-07, + "step": 1875 + }, + { + "epoch": 4.930966469428008, + "high_lr": 1.3157894736842104e-05, + "low_lr": 2.6315789473684213e-07, + "step": 1875 + }, + { + "epoch": 4.930966469428008, + "high_lr": 1.3157894736842104e-05, + "low_lr": 2.6315789473684213e-07, + "step": 1875 + }, + { + "epoch": 4.930966469428008, + "high_lr": 1.3157894736842104e-05, + "low_lr": 2.6315789473684213e-07, + "step": 1875 + }, + { + "epoch": 4.930966469428008, + "high_lr": 1.3157894736842104e-05, + "low_lr": 2.6315789473684213e-07, + "step": 1875 + }, + { + "epoch": 4.930966469428008, + "high_lr": 1.3157894736842104e-05, + "low_lr": 2.6315789473684213e-07, + "step": 1875 + }, + { + "epoch": 4.930966469428008, + "high_lr": 1.3157894736842104e-05, + "low_lr": 2.6315789473684213e-07, + "step": 1875 + }, + { + "epoch": 4.933596318211703, + "grad_norm": 1.42124342918396, + "learning_rate": 1.263157894736842e-05, + "loss": 1.2149, + "step": 1876 + }, + { + "epoch": 4.933596318211703, + "high_lr": 1.263157894736842e-05, + "low_lr": 2.5263157894736846e-07, + "step": 1876 + }, + { + "epoch": 4.933596318211703, + "high_lr": 1.263157894736842e-05, + "low_lr": 2.5263157894736846e-07, + "step": 1876 + }, + { + "epoch": 4.933596318211703, + "high_lr": 1.263157894736842e-05, + "low_lr": 2.5263157894736846e-07, + "step": 1876 + }, + { + "epoch": 4.933596318211703, + "high_lr": 1.263157894736842e-05, + "low_lr": 2.5263157894736846e-07, + "step": 1876 + }, + { + "epoch": 4.933596318211703, + "high_lr": 1.263157894736842e-05, + "low_lr": 2.5263157894736846e-07, + "step": 1876 + }, + { + "epoch": 4.933596318211703, + "high_lr": 1.263157894736842e-05, + "low_lr": 2.5263157894736846e-07, + "step": 1876 + }, + { + "epoch": 4.933596318211703, + "high_lr": 1.263157894736842e-05, + "low_lr": 2.5263157894736846e-07, + "step": 1876 + }, + { + "epoch": 4.933596318211703, + "high_lr": 1.263157894736842e-05, + "low_lr": 2.5263157894736846e-07, + "step": 1876 + }, + { + "epoch": 4.9362261669953975, + "grad_norm": 1.578900933265686, + "learning_rate": 1.2105263157894737e-05, + "loss": 1.1708, + "step": 1877 + }, + { + "epoch": 4.9362261669953975, + "high_lr": 1.2105263157894737e-05, + "low_lr": 2.4210526315789473e-07, + "step": 1877 + }, + { + "epoch": 4.9362261669953975, + "high_lr": 1.2105263157894737e-05, + "low_lr": 2.4210526315789473e-07, + "step": 1877 + }, + { + "epoch": 4.9362261669953975, + "high_lr": 1.2105263157894737e-05, + "low_lr": 2.4210526315789473e-07, + "step": 1877 + }, + { + "epoch": 4.9362261669953975, + "high_lr": 1.2105263157894737e-05, + "low_lr": 2.4210526315789473e-07, + "step": 1877 + }, + { + "epoch": 4.9362261669953975, + "high_lr": 1.2105263157894737e-05, + "low_lr": 2.4210526315789473e-07, + "step": 1877 + }, + { + "epoch": 4.9362261669953975, + "high_lr": 1.2105263157894737e-05, + "low_lr": 2.4210526315789473e-07, + "step": 1877 + }, + { + "epoch": 4.9362261669953975, + "high_lr": 1.2105263157894737e-05, + "low_lr": 2.4210526315789473e-07, + "step": 1877 + }, + { + "epoch": 4.9362261669953975, + "high_lr": 1.2105263157894737e-05, + "low_lr": 2.4210526315789473e-07, + "step": 1877 + }, + { + "epoch": 4.938856015779093, + "grad_norm": 1.6655032634735107, + "learning_rate": 1.1578947368421053e-05, + "loss": 1.1479, + "step": 1878 + }, + { + "epoch": 4.938856015779093, + "high_lr": 1.1578947368421053e-05, + "low_lr": 2.315789473684211e-07, + "step": 1878 + }, + { + "epoch": 4.938856015779093, + "high_lr": 1.1578947368421053e-05, + "low_lr": 2.315789473684211e-07, + "step": 1878 + }, + { + "epoch": 4.938856015779093, + "high_lr": 1.1578947368421053e-05, + "low_lr": 2.315789473684211e-07, + "step": 1878 + }, + { + "epoch": 4.938856015779093, + "high_lr": 1.1578947368421053e-05, + "low_lr": 2.315789473684211e-07, + "step": 1878 + }, + { + "epoch": 4.938856015779093, + "high_lr": 1.1578947368421053e-05, + "low_lr": 2.315789473684211e-07, + "step": 1878 + }, + { + "epoch": 4.938856015779093, + "high_lr": 1.1578947368421053e-05, + "low_lr": 2.315789473684211e-07, + "step": 1878 + }, + { + "epoch": 4.938856015779093, + "high_lr": 1.1578947368421053e-05, + "low_lr": 2.315789473684211e-07, + "step": 1878 + }, + { + "epoch": 4.938856015779093, + "high_lr": 1.1578947368421053e-05, + "low_lr": 2.315789473684211e-07, + "step": 1878 + }, + { + "epoch": 4.941485864562788, + "grad_norm": 1.6482006311416626, + "learning_rate": 1.1052631578947368e-05, + "loss": 1.1975, + "step": 1879 + }, + { + "epoch": 4.941485864562788, + "high_lr": 1.1052631578947368e-05, + "low_lr": 2.2105263157894736e-07, + "step": 1879 + }, + { + "epoch": 4.941485864562788, + "high_lr": 1.1052631578947368e-05, + "low_lr": 2.2105263157894736e-07, + "step": 1879 + }, + { + "epoch": 4.941485864562788, + "high_lr": 1.1052631578947368e-05, + "low_lr": 2.2105263157894736e-07, + "step": 1879 + }, + { + "epoch": 4.941485864562788, + "high_lr": 1.1052631578947368e-05, + "low_lr": 2.2105263157894736e-07, + "step": 1879 + }, + { + "epoch": 4.941485864562788, + "high_lr": 1.1052631578947368e-05, + "low_lr": 2.2105263157894736e-07, + "step": 1879 + }, + { + "epoch": 4.941485864562788, + "high_lr": 1.1052631578947368e-05, + "low_lr": 2.2105263157894736e-07, + "step": 1879 + }, + { + "epoch": 4.941485864562788, + "high_lr": 1.1052631578947368e-05, + "low_lr": 2.2105263157894736e-07, + "step": 1879 + }, + { + "epoch": 4.941485864562788, + "high_lr": 1.1052631578947368e-05, + "low_lr": 2.2105263157894736e-07, + "step": 1879 + }, + { + "epoch": 4.944115713346482, + "grad_norm": 1.5597196817398071, + "learning_rate": 1.0526315789473684e-05, + "loss": 1.2371, + "step": 1880 + }, + { + "epoch": 4.944115713346482, + "high_lr": 1.0526315789473684e-05, + "low_lr": 2.105263157894737e-07, + "step": 1880 + }, + { + "epoch": 4.944115713346482, + "high_lr": 1.0526315789473684e-05, + "low_lr": 2.105263157894737e-07, + "step": 1880 + }, + { + "epoch": 4.944115713346482, + "high_lr": 1.0526315789473684e-05, + "low_lr": 2.105263157894737e-07, + "step": 1880 + }, + { + "epoch": 4.944115713346482, + "high_lr": 1.0526315789473684e-05, + "low_lr": 2.105263157894737e-07, + "step": 1880 + }, + { + "epoch": 4.944115713346482, + "high_lr": 1.0526315789473684e-05, + "low_lr": 2.105263157894737e-07, + "step": 1880 + }, + { + "epoch": 4.944115713346482, + "high_lr": 1.0526315789473684e-05, + "low_lr": 2.105263157894737e-07, + "step": 1880 + }, + { + "epoch": 4.944115713346482, + "high_lr": 1.0526315789473684e-05, + "low_lr": 2.105263157894737e-07, + "step": 1880 + }, + { + "epoch": 4.944115713346482, + "high_lr": 1.0526315789473684e-05, + "low_lr": 2.105263157894737e-07, + "step": 1880 + }, + { + "epoch": 4.946745562130177, + "grad_norm": 1.6737630367279053, + "learning_rate": 1e-05, + "loss": 1.2194, + "step": 1881 + }, + { + "epoch": 4.946745562130177, + "high_lr": 1e-05, + "low_lr": 2.0000000000000002e-07, + "step": 1881 + }, + { + "epoch": 4.946745562130177, + "high_lr": 1e-05, + "low_lr": 2.0000000000000002e-07, + "step": 1881 + }, + { + "epoch": 4.946745562130177, + "high_lr": 1e-05, + "low_lr": 2.0000000000000002e-07, + "step": 1881 + }, + { + "epoch": 4.946745562130177, + "high_lr": 1e-05, + "low_lr": 2.0000000000000002e-07, + "step": 1881 + }, + { + "epoch": 4.946745562130177, + "high_lr": 1e-05, + "low_lr": 2.0000000000000002e-07, + "step": 1881 + }, + { + "epoch": 4.946745562130177, + "high_lr": 1e-05, + "low_lr": 2.0000000000000002e-07, + "step": 1881 + }, + { + "epoch": 4.946745562130177, + "high_lr": 1e-05, + "low_lr": 2.0000000000000002e-07, + "step": 1881 + }, + { + "epoch": 4.946745562130177, + "high_lr": 1e-05, + "low_lr": 2.0000000000000002e-07, + "step": 1881 + }, + { + "epoch": 4.949375410913873, + "grad_norm": 1.546088695526123, + "learning_rate": 9.473684210526317e-06, + "loss": 1.2373, + "step": 1882 + }, + { + "epoch": 4.949375410913873, + "high_lr": 9.473684210526317e-06, + "low_lr": 1.8947368421052634e-07, + "step": 1882 + }, + { + "epoch": 4.949375410913873, + "high_lr": 9.473684210526317e-06, + "low_lr": 1.8947368421052634e-07, + "step": 1882 + }, + { + "epoch": 4.949375410913873, + "high_lr": 9.473684210526317e-06, + "low_lr": 1.8947368421052634e-07, + "step": 1882 + }, + { + "epoch": 4.949375410913873, + "high_lr": 9.473684210526317e-06, + "low_lr": 1.8947368421052634e-07, + "step": 1882 + }, + { + "epoch": 4.949375410913873, + "high_lr": 9.473684210526317e-06, + "low_lr": 1.8947368421052634e-07, + "step": 1882 + }, + { + "epoch": 4.949375410913873, + "high_lr": 9.473684210526317e-06, + "low_lr": 1.8947368421052634e-07, + "step": 1882 + }, + { + "epoch": 4.949375410913873, + "high_lr": 9.473684210526317e-06, + "low_lr": 1.8947368421052634e-07, + "step": 1882 + }, + { + "epoch": 4.949375410913873, + "high_lr": 9.473684210526317e-06, + "low_lr": 1.8947368421052634e-07, + "step": 1882 + }, + { + "epoch": 4.952005259697567, + "grad_norm": 1.5193564891815186, + "learning_rate": 8.947368421052632e-06, + "loss": 1.2045, + "step": 1883 + }, + { + "epoch": 4.952005259697567, + "high_lr": 8.947368421052632e-06, + "low_lr": 1.7894736842105265e-07, + "step": 1883 + }, + { + "epoch": 4.952005259697567, + "high_lr": 8.947368421052632e-06, + "low_lr": 1.7894736842105265e-07, + "step": 1883 + }, + { + "epoch": 4.952005259697567, + "high_lr": 8.947368421052632e-06, + "low_lr": 1.7894736842105265e-07, + "step": 1883 + }, + { + "epoch": 4.952005259697567, + "high_lr": 8.947368421052632e-06, + "low_lr": 1.7894736842105265e-07, + "step": 1883 + }, + { + "epoch": 4.952005259697567, + "high_lr": 8.947368421052632e-06, + "low_lr": 1.7894736842105265e-07, + "step": 1883 + }, + { + "epoch": 4.952005259697567, + "high_lr": 8.947368421052632e-06, + "low_lr": 1.7894736842105265e-07, + "step": 1883 + }, + { + "epoch": 4.952005259697567, + "high_lr": 8.947368421052632e-06, + "low_lr": 1.7894736842105265e-07, + "step": 1883 + }, + { + "epoch": 4.952005259697567, + "high_lr": 8.947368421052632e-06, + "low_lr": 1.7894736842105265e-07, + "step": 1883 + }, + { + "epoch": 4.954635108481263, + "grad_norm": 1.4926979541778564, + "learning_rate": 8.421052631578947e-06, + "loss": 1.1982, + "step": 1884 + }, + { + "epoch": 4.954635108481263, + "high_lr": 8.421052631578947e-06, + "low_lr": 1.6842105263157895e-07, + "step": 1884 + }, + { + "epoch": 4.954635108481263, + "high_lr": 8.421052631578947e-06, + "low_lr": 1.6842105263157895e-07, + "step": 1884 + }, + { + "epoch": 4.954635108481263, + "high_lr": 8.421052631578947e-06, + "low_lr": 1.6842105263157895e-07, + "step": 1884 + }, + { + "epoch": 4.954635108481263, + "high_lr": 8.421052631578947e-06, + "low_lr": 1.6842105263157895e-07, + "step": 1884 + }, + { + "epoch": 4.954635108481263, + "high_lr": 8.421052631578947e-06, + "low_lr": 1.6842105263157895e-07, + "step": 1884 + }, + { + "epoch": 4.954635108481263, + "high_lr": 8.421052631578947e-06, + "low_lr": 1.6842105263157895e-07, + "step": 1884 + }, + { + "epoch": 4.954635108481263, + "high_lr": 8.421052631578947e-06, + "low_lr": 1.6842105263157895e-07, + "step": 1884 + }, + { + "epoch": 4.954635108481263, + "high_lr": 8.421052631578947e-06, + "low_lr": 1.6842105263157895e-07, + "step": 1884 + }, + { + "epoch": 4.957264957264957, + "grad_norm": 1.6040585041046143, + "learning_rate": 7.894736842105263e-06, + "loss": 1.1909, + "step": 1885 + }, + { + "epoch": 4.957264957264957, + "high_lr": 7.894736842105263e-06, + "low_lr": 1.5789473684210527e-07, + "step": 1885 + }, + { + "epoch": 4.957264957264957, + "high_lr": 7.894736842105263e-06, + "low_lr": 1.5789473684210527e-07, + "step": 1885 + }, + { + "epoch": 4.957264957264957, + "high_lr": 7.894736842105263e-06, + "low_lr": 1.5789473684210527e-07, + "step": 1885 + }, + { + "epoch": 4.957264957264957, + "high_lr": 7.894736842105263e-06, + "low_lr": 1.5789473684210527e-07, + "step": 1885 + }, + { + "epoch": 4.957264957264957, + "high_lr": 7.894736842105263e-06, + "low_lr": 1.5789473684210527e-07, + "step": 1885 + }, + { + "epoch": 4.957264957264957, + "high_lr": 7.894736842105263e-06, + "low_lr": 1.5789473684210527e-07, + "step": 1885 + }, + { + "epoch": 4.957264957264957, + "high_lr": 7.894736842105263e-06, + "low_lr": 1.5789473684210527e-07, + "step": 1885 + }, + { + "epoch": 4.957264957264957, + "high_lr": 7.894736842105263e-06, + "low_lr": 1.5789473684210527e-07, + "step": 1885 + }, + { + "epoch": 4.959894806048652, + "grad_norm": 1.4907753467559814, + "learning_rate": 7.368421052631579e-06, + "loss": 1.1852, + "step": 1886 + }, + { + "epoch": 4.959894806048652, + "high_lr": 7.368421052631579e-06, + "low_lr": 1.4736842105263158e-07, + "step": 1886 + }, + { + "epoch": 4.959894806048652, + "high_lr": 7.368421052631579e-06, + "low_lr": 1.4736842105263158e-07, + "step": 1886 + }, + { + "epoch": 4.959894806048652, + "high_lr": 7.368421052631579e-06, + "low_lr": 1.4736842105263158e-07, + "step": 1886 + }, + { + "epoch": 4.959894806048652, + "high_lr": 7.368421052631579e-06, + "low_lr": 1.4736842105263158e-07, + "step": 1886 + }, + { + "epoch": 4.959894806048652, + "high_lr": 7.368421052631579e-06, + "low_lr": 1.4736842105263158e-07, + "step": 1886 + }, + { + "epoch": 4.959894806048652, + "high_lr": 7.368421052631579e-06, + "low_lr": 1.4736842105263158e-07, + "step": 1886 + }, + { + "epoch": 4.959894806048652, + "high_lr": 7.368421052631579e-06, + "low_lr": 1.4736842105263158e-07, + "step": 1886 + }, + { + "epoch": 4.959894806048652, + "high_lr": 7.368421052631579e-06, + "low_lr": 1.4736842105263158e-07, + "step": 1886 + }, + { + "epoch": 4.962524654832347, + "grad_norm": 1.4674580097198486, + "learning_rate": 6.842105263157895e-06, + "loss": 1.2096, + "step": 1887 + }, + { + "epoch": 4.962524654832347, + "high_lr": 6.842105263157895e-06, + "low_lr": 1.368421052631579e-07, + "step": 1887 + }, + { + "epoch": 4.962524654832347, + "high_lr": 6.842105263157895e-06, + "low_lr": 1.368421052631579e-07, + "step": 1887 + }, + { + "epoch": 4.962524654832347, + "high_lr": 6.842105263157895e-06, + "low_lr": 1.368421052631579e-07, + "step": 1887 + }, + { + "epoch": 4.962524654832347, + "high_lr": 6.842105263157895e-06, + "low_lr": 1.368421052631579e-07, + "step": 1887 + }, + { + "epoch": 4.962524654832347, + "high_lr": 6.842105263157895e-06, + "low_lr": 1.368421052631579e-07, + "step": 1887 + }, + { + "epoch": 4.962524654832347, + "high_lr": 6.842105263157895e-06, + "low_lr": 1.368421052631579e-07, + "step": 1887 + }, + { + "epoch": 4.962524654832347, + "high_lr": 6.842105263157895e-06, + "low_lr": 1.368421052631579e-07, + "step": 1887 + }, + { + "epoch": 4.962524654832347, + "high_lr": 6.842105263157895e-06, + "low_lr": 1.368421052631579e-07, + "step": 1887 + }, + { + "epoch": 4.965154503616042, + "grad_norm": 1.4111533164978027, + "learning_rate": 6.31578947368421e-06, + "loss": 1.195, + "step": 1888 + }, + { + "epoch": 4.965154503616042, + "high_lr": 6.31578947368421e-06, + "low_lr": 1.2631578947368423e-07, + "step": 1888 + }, + { + "epoch": 4.965154503616042, + "high_lr": 6.31578947368421e-06, + "low_lr": 1.2631578947368423e-07, + "step": 1888 + }, + { + "epoch": 4.965154503616042, + "high_lr": 6.31578947368421e-06, + "low_lr": 1.2631578947368423e-07, + "step": 1888 + }, + { + "epoch": 4.965154503616042, + "high_lr": 6.31578947368421e-06, + "low_lr": 1.2631578947368423e-07, + "step": 1888 + }, + { + "epoch": 4.965154503616042, + "high_lr": 6.31578947368421e-06, + "low_lr": 1.2631578947368423e-07, + "step": 1888 + }, + { + "epoch": 4.965154503616042, + "high_lr": 6.31578947368421e-06, + "low_lr": 1.2631578947368423e-07, + "step": 1888 + }, + { + "epoch": 4.965154503616042, + "high_lr": 6.31578947368421e-06, + "low_lr": 1.2631578947368423e-07, + "step": 1888 + }, + { + "epoch": 4.965154503616042, + "high_lr": 6.31578947368421e-06, + "low_lr": 1.2631578947368423e-07, + "step": 1888 + }, + { + "epoch": 4.967784352399737, + "grad_norm": 1.5410196781158447, + "learning_rate": 5.789473684210527e-06, + "loss": 1.2292, + "step": 1889 + }, + { + "epoch": 4.967784352399737, + "high_lr": 5.789473684210527e-06, + "low_lr": 1.1578947368421054e-07, + "step": 1889 + }, + { + "epoch": 4.967784352399737, + "high_lr": 5.789473684210527e-06, + "low_lr": 1.1578947368421054e-07, + "step": 1889 + }, + { + "epoch": 4.967784352399737, + "high_lr": 5.789473684210527e-06, + "low_lr": 1.1578947368421054e-07, + "step": 1889 + }, + { + "epoch": 4.967784352399737, + "high_lr": 5.789473684210527e-06, + "low_lr": 1.1578947368421054e-07, + "step": 1889 + }, + { + "epoch": 4.967784352399737, + "high_lr": 5.789473684210527e-06, + "low_lr": 1.1578947368421054e-07, + "step": 1889 + }, + { + "epoch": 4.967784352399737, + "high_lr": 5.789473684210527e-06, + "low_lr": 1.1578947368421054e-07, + "step": 1889 + }, + { + "epoch": 4.967784352399737, + "high_lr": 5.789473684210527e-06, + "low_lr": 1.1578947368421054e-07, + "step": 1889 + }, + { + "epoch": 4.967784352399737, + "high_lr": 5.789473684210527e-06, + "low_lr": 1.1578947368421054e-07, + "step": 1889 + }, + { + "epoch": 4.970414201183432, + "grad_norm": 1.4448254108428955, + "learning_rate": 5.263157894736842e-06, + "loss": 1.2125, + "step": 1890 + }, + { + "epoch": 4.970414201183432, + "high_lr": 5.263157894736842e-06, + "low_lr": 1.0526315789473685e-07, + "step": 1890 + }, + { + "epoch": 4.970414201183432, + "high_lr": 5.263157894736842e-06, + "low_lr": 1.0526315789473685e-07, + "step": 1890 + }, + { + "epoch": 4.970414201183432, + "high_lr": 5.263157894736842e-06, + "low_lr": 1.0526315789473685e-07, + "step": 1890 + }, + { + "epoch": 4.970414201183432, + "high_lr": 5.263157894736842e-06, + "low_lr": 1.0526315789473685e-07, + "step": 1890 + }, + { + "epoch": 4.970414201183432, + "high_lr": 5.263157894736842e-06, + "low_lr": 1.0526315789473685e-07, + "step": 1890 + }, + { + "epoch": 4.970414201183432, + "high_lr": 5.263157894736842e-06, + "low_lr": 1.0526315789473685e-07, + "step": 1890 + }, + { + "epoch": 4.970414201183432, + "high_lr": 5.263157894736842e-06, + "low_lr": 1.0526315789473685e-07, + "step": 1890 + }, + { + "epoch": 4.970414201183432, + "high_lr": 5.263157894736842e-06, + "low_lr": 1.0526315789473685e-07, + "step": 1890 + }, + { + "epoch": 4.973044049967127, + "grad_norm": 1.7245209217071533, + "learning_rate": 4.736842105263159e-06, + "loss": 1.2209, + "step": 1891 + }, + { + "epoch": 4.973044049967127, + "high_lr": 4.736842105263159e-06, + "low_lr": 9.473684210526317e-08, + "step": 1891 + }, + { + "epoch": 4.973044049967127, + "high_lr": 4.736842105263159e-06, + "low_lr": 9.473684210526317e-08, + "step": 1891 + }, + { + "epoch": 4.973044049967127, + "high_lr": 4.736842105263159e-06, + "low_lr": 9.473684210526317e-08, + "step": 1891 + }, + { + "epoch": 4.973044049967127, + "high_lr": 4.736842105263159e-06, + "low_lr": 9.473684210526317e-08, + "step": 1891 + }, + { + "epoch": 4.973044049967127, + "high_lr": 4.736842105263159e-06, + "low_lr": 9.473684210526317e-08, + "step": 1891 + }, + { + "epoch": 4.973044049967127, + "high_lr": 4.736842105263159e-06, + "low_lr": 9.473684210526317e-08, + "step": 1891 + }, + { + "epoch": 4.973044049967127, + "high_lr": 4.736842105263159e-06, + "low_lr": 9.473684210526317e-08, + "step": 1891 + }, + { + "epoch": 4.973044049967127, + "high_lr": 4.736842105263159e-06, + "low_lr": 9.473684210526317e-08, + "step": 1891 + }, + { + "epoch": 4.975673898750822, + "grad_norm": 1.7153047323226929, + "learning_rate": 4.210526315789473e-06, + "loss": 1.2268, + "step": 1892 + }, + { + "epoch": 4.975673898750822, + "high_lr": 4.210526315789473e-06, + "low_lr": 8.421052631578947e-08, + "step": 1892 + }, + { + "epoch": 4.975673898750822, + "high_lr": 4.210526315789473e-06, + "low_lr": 8.421052631578947e-08, + "step": 1892 + }, + { + "epoch": 4.975673898750822, + "high_lr": 4.210526315789473e-06, + "low_lr": 8.421052631578947e-08, + "step": 1892 + }, + { + "epoch": 4.975673898750822, + "high_lr": 4.210526315789473e-06, + "low_lr": 8.421052631578947e-08, + "step": 1892 + }, + { + "epoch": 4.975673898750822, + "high_lr": 4.210526315789473e-06, + "low_lr": 8.421052631578947e-08, + "step": 1892 + }, + { + "epoch": 4.975673898750822, + "high_lr": 4.210526315789473e-06, + "low_lr": 8.421052631578947e-08, + "step": 1892 + }, + { + "epoch": 4.975673898750822, + "high_lr": 4.210526315789473e-06, + "low_lr": 8.421052631578947e-08, + "step": 1892 + }, + { + "epoch": 4.975673898750822, + "high_lr": 4.210526315789473e-06, + "low_lr": 8.421052631578947e-08, + "step": 1892 + }, + { + "epoch": 4.978303747534516, + "grad_norm": 1.6368732452392578, + "learning_rate": 3.6842105263157896e-06, + "loss": 1.192, + "step": 1893 + }, + { + "epoch": 4.978303747534516, + "high_lr": 3.6842105263157896e-06, + "low_lr": 7.368421052631579e-08, + "step": 1893 + }, + { + "epoch": 4.978303747534516, + "high_lr": 3.6842105263157896e-06, + "low_lr": 7.368421052631579e-08, + "step": 1893 + }, + { + "epoch": 4.978303747534516, + "high_lr": 3.6842105263157896e-06, + "low_lr": 7.368421052631579e-08, + "step": 1893 + }, + { + "epoch": 4.978303747534516, + "high_lr": 3.6842105263157896e-06, + "low_lr": 7.368421052631579e-08, + "step": 1893 + }, + { + "epoch": 4.978303747534516, + "high_lr": 3.6842105263157896e-06, + "low_lr": 7.368421052631579e-08, + "step": 1893 + }, + { + "epoch": 4.978303747534516, + "high_lr": 3.6842105263157896e-06, + "low_lr": 7.368421052631579e-08, + "step": 1893 + }, + { + "epoch": 4.978303747534516, + "high_lr": 3.6842105263157896e-06, + "low_lr": 7.368421052631579e-08, + "step": 1893 + }, + { + "epoch": 4.978303747534516, + "high_lr": 3.6842105263157896e-06, + "low_lr": 7.368421052631579e-08, + "step": 1893 + }, + { + "epoch": 4.980933596318212, + "grad_norm": 1.6589494943618774, + "learning_rate": 3.157894736842105e-06, + "loss": 1.1895, + "step": 1894 + }, + { + "epoch": 4.980933596318212, + "high_lr": 3.157894736842105e-06, + "low_lr": 6.315789473684211e-08, + "step": 1894 + }, + { + "epoch": 4.980933596318212, + "high_lr": 3.157894736842105e-06, + "low_lr": 6.315789473684211e-08, + "step": 1894 + }, + { + "epoch": 4.980933596318212, + "high_lr": 3.157894736842105e-06, + "low_lr": 6.315789473684211e-08, + "step": 1894 + }, + { + "epoch": 4.980933596318212, + "high_lr": 3.157894736842105e-06, + "low_lr": 6.315789473684211e-08, + "step": 1894 + }, + { + "epoch": 4.980933596318212, + "high_lr": 3.157894736842105e-06, + "low_lr": 6.315789473684211e-08, + "step": 1894 + }, + { + "epoch": 4.980933596318212, + "high_lr": 3.157894736842105e-06, + "low_lr": 6.315789473684211e-08, + "step": 1894 + }, + { + "epoch": 4.980933596318212, + "high_lr": 3.157894736842105e-06, + "low_lr": 6.315789473684211e-08, + "step": 1894 + }, + { + "epoch": 4.980933596318212, + "high_lr": 3.157894736842105e-06, + "low_lr": 6.315789473684211e-08, + "step": 1894 + }, + { + "epoch": 4.983563445101907, + "grad_norm": 1.4983834028244019, + "learning_rate": 2.631578947368421e-06, + "loss": 1.2616, + "step": 1895 + }, + { + "epoch": 4.983563445101907, + "high_lr": 2.631578947368421e-06, + "low_lr": 5.263157894736842e-08, + "step": 1895 + }, + { + "epoch": 4.983563445101907, + "high_lr": 2.631578947368421e-06, + "low_lr": 5.263157894736842e-08, + "step": 1895 + }, + { + "epoch": 4.983563445101907, + "high_lr": 2.631578947368421e-06, + "low_lr": 5.263157894736842e-08, + "step": 1895 + }, + { + "epoch": 4.983563445101907, + "high_lr": 2.631578947368421e-06, + "low_lr": 5.263157894736842e-08, + "step": 1895 + }, + { + "epoch": 4.983563445101907, + "high_lr": 2.631578947368421e-06, + "low_lr": 5.263157894736842e-08, + "step": 1895 + }, + { + "epoch": 4.983563445101907, + "high_lr": 2.631578947368421e-06, + "low_lr": 5.263157894736842e-08, + "step": 1895 + }, + { + "epoch": 4.983563445101907, + "high_lr": 2.631578947368421e-06, + "low_lr": 5.263157894736842e-08, + "step": 1895 + }, + { + "epoch": 4.983563445101907, + "high_lr": 2.631578947368421e-06, + "low_lr": 5.263157894736842e-08, + "step": 1895 + }, + { + "epoch": 4.986193293885601, + "grad_norm": 1.6577376127243042, + "learning_rate": 2.1052631578947366e-06, + "loss": 1.2307, + "step": 1896 + }, + { + "epoch": 4.986193293885601, + "high_lr": 2.1052631578947366e-06, + "low_lr": 4.2105263157894737e-08, + "step": 1896 + }, + { + "epoch": 4.986193293885601, + "high_lr": 2.1052631578947366e-06, + "low_lr": 4.2105263157894737e-08, + "step": 1896 + }, + { + "epoch": 4.986193293885601, + "high_lr": 2.1052631578947366e-06, + "low_lr": 4.2105263157894737e-08, + "step": 1896 + }, + { + "epoch": 4.986193293885601, + "high_lr": 2.1052631578947366e-06, + "low_lr": 4.2105263157894737e-08, + "step": 1896 + }, + { + "epoch": 4.986193293885601, + "high_lr": 2.1052631578947366e-06, + "low_lr": 4.2105263157894737e-08, + "step": 1896 + }, + { + "epoch": 4.986193293885601, + "high_lr": 2.1052631578947366e-06, + "low_lr": 4.2105263157894737e-08, + "step": 1896 + }, + { + "epoch": 4.986193293885601, + "high_lr": 2.1052631578947366e-06, + "low_lr": 4.2105263157894737e-08, + "step": 1896 + }, + { + "epoch": 4.986193293885601, + "high_lr": 2.1052631578947366e-06, + "low_lr": 4.2105263157894737e-08, + "step": 1896 + }, + { + "epoch": 4.988823142669297, + "grad_norm": 1.618255853652954, + "learning_rate": 1.5789473684210526e-06, + "loss": 1.2291, + "step": 1897 + }, + { + "epoch": 4.988823142669297, + "high_lr": 1.5789473684210526e-06, + "low_lr": 3.157894736842106e-08, + "step": 1897 + }, + { + "epoch": 4.988823142669297, + "high_lr": 1.5789473684210526e-06, + "low_lr": 3.157894736842106e-08, + "step": 1897 + }, + { + "epoch": 4.988823142669297, + "high_lr": 1.5789473684210526e-06, + "low_lr": 3.157894736842106e-08, + "step": 1897 + }, + { + "epoch": 4.988823142669297, + "high_lr": 1.5789473684210526e-06, + "low_lr": 3.157894736842106e-08, + "step": 1897 + }, + { + "epoch": 4.988823142669297, + "high_lr": 1.5789473684210526e-06, + "low_lr": 3.157894736842106e-08, + "step": 1897 + }, + { + "epoch": 4.988823142669297, + "high_lr": 1.5789473684210526e-06, + "low_lr": 3.157894736842106e-08, + "step": 1897 + }, + { + "epoch": 4.988823142669297, + "high_lr": 1.5789473684210526e-06, + "low_lr": 3.157894736842106e-08, + "step": 1897 + }, + { + "epoch": 4.988823142669297, + "high_lr": 1.5789473684210526e-06, + "low_lr": 3.157894736842106e-08, + "step": 1897 + }, + { + "epoch": 4.9914529914529915, + "grad_norm": 1.6011543273925781, + "learning_rate": 1.0526315789473683e-06, + "loss": 1.1987, + "step": 1898 + }, + { + "epoch": 4.9914529914529915, + "high_lr": 1.0526315789473683e-06, + "low_lr": 2.1052631578947368e-08, + "step": 1898 + }, + { + "epoch": 4.9914529914529915, + "high_lr": 1.0526315789473683e-06, + "low_lr": 2.1052631578947368e-08, + "step": 1898 + }, + { + "epoch": 4.9914529914529915, + "high_lr": 1.0526315789473683e-06, + "low_lr": 2.1052631578947368e-08, + "step": 1898 + }, + { + "epoch": 4.9914529914529915, + "high_lr": 1.0526315789473683e-06, + "low_lr": 2.1052631578947368e-08, + "step": 1898 + }, + { + "epoch": 4.9914529914529915, + "high_lr": 1.0526315789473683e-06, + "low_lr": 2.1052631578947368e-08, + "step": 1898 + }, + { + "epoch": 4.9914529914529915, + "high_lr": 1.0526315789473683e-06, + "low_lr": 2.1052631578947368e-08, + "step": 1898 + }, + { + "epoch": 4.9914529914529915, + "high_lr": 1.0526315789473683e-06, + "low_lr": 2.1052631578947368e-08, + "step": 1898 + }, + { + "epoch": 4.9914529914529915, + "high_lr": 1.0526315789473683e-06, + "low_lr": 2.1052631578947368e-08, + "step": 1898 + }, + { + "epoch": 4.994082840236686, + "grad_norm": 1.5550873279571533, + "learning_rate": 5.263157894736842e-07, + "loss": 1.251, + "step": 1899 + }, + { + "epoch": 4.994082840236686, + "high_lr": 5.263157894736842e-07, + "low_lr": 1.0526315789473684e-08, + "step": 1899 + }, + { + "epoch": 4.994082840236686, + "high_lr": 5.263157894736842e-07, + "low_lr": 1.0526315789473684e-08, + "step": 1899 + }, + { + "epoch": 4.994082840236686, + "high_lr": 5.263157894736842e-07, + "low_lr": 1.0526315789473684e-08, + "step": 1899 + }, + { + "epoch": 4.994082840236686, + "high_lr": 5.263157894736842e-07, + "low_lr": 1.0526315789473684e-08, + "step": 1899 + }, + { + "epoch": 4.994082840236686, + "high_lr": 5.263157894736842e-07, + "low_lr": 1.0526315789473684e-08, + "step": 1899 + }, + { + "epoch": 4.994082840236686, + "high_lr": 5.263157894736842e-07, + "low_lr": 1.0526315789473684e-08, + "step": 1899 + }, + { + "epoch": 4.994082840236686, + "high_lr": 5.263157894736842e-07, + "low_lr": 1.0526315789473684e-08, + "step": 1899 + }, + { + "epoch": 4.994082840236686, + "high_lr": 5.263157894736842e-07, + "low_lr": 1.0526315789473684e-08, + "step": 1899 + }, + { + "epoch": 4.996712689020382, + "grad_norm": 1.5914562940597534, + "learning_rate": 0.0, + "loss": 1.2178, + "step": 1900 + }, + { + "epoch": 4.996712689020382, + "step": 1900, + "total_flos": 1.1194761013985542e+19, + "train_loss": 1.445524227619171, + "train_runtime": 18692.4736, + "train_samples_per_second": 6.509, + "train_steps_per_second": 0.102 + } + ], + "logging_steps": 1.0, + "max_steps": 1900, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.1194761013985542e+19, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}