|
{ |
|
"best_metric": 0.7959330677986145, |
|
"best_model_checkpoint": "./lora-alpaca/checkpoint-800", |
|
"epoch": 2.045381911153723, |
|
"global_step": 800, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 1.6613, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 1.6025, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 1.4512, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 1.1653, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00015, |
|
"loss": 0.9588, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 0.9058, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00020999999999999998, |
|
"loss": 0.8655, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 0.8493, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00027, |
|
"loss": 0.8462, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0003, |
|
"loss": 0.8353, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002972041006523765, |
|
"loss": 0.833, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.000294408201304753, |
|
"loss": 0.8322, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002916123019571295, |
|
"loss": 0.8239, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00028881640260950607, |
|
"loss": 0.8174, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00028602050326188253, |
|
"loss": 0.8178, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00028322460391425904, |
|
"loss": 0.8321, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0002804287045666356, |
|
"loss": 0.8213, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0002776328052190121, |
|
"loss": 0.8088, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00027483690587138857, |
|
"loss": 0.8021, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00027204100652376514, |
|
"loss": 0.8297, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 0.8141016960144043, |
|
"eval_runtime": 87.4274, |
|
"eval_samples_per_second": 22.876, |
|
"eval_steps_per_second": 2.86, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00026924510717614165, |
|
"loss": 0.8148, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00026644920782851816, |
|
"loss": 0.8204, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00026365330848089467, |
|
"loss": 0.8181, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0002608574091332712, |
|
"loss": 0.8146, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0002580615097856477, |
|
"loss": 0.8108, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002552656104380242, |
|
"loss": 0.8069, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0002524697110904007, |
|
"loss": 0.8219, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0002496738117427772, |
|
"loss": 0.8076, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00024687791239515373, |
|
"loss": 0.8076, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00024408201304753027, |
|
"loss": 0.8173, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00024128611369990678, |
|
"loss": 0.8032, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0002384902143522833, |
|
"loss": 0.8185, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0002356943150046598, |
|
"loss": 0.8046, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00023289841565703632, |
|
"loss": 0.812, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00023010251630941285, |
|
"loss": 0.807, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00022730661696178936, |
|
"loss": 0.8003, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00022451071761416585, |
|
"loss": 0.802, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00022171481826654239, |
|
"loss": 0.8073, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0002189189189189189, |
|
"loss": 0.8055, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00021612301957129543, |
|
"loss": 0.8132, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 0.8033702373504639, |
|
"eval_runtime": 87.6065, |
|
"eval_samples_per_second": 22.829, |
|
"eval_steps_per_second": 2.854, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00021332712022367195, |
|
"loss": 0.7992, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00021053122087604843, |
|
"loss": 0.7945, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00020773532152842497, |
|
"loss": 0.7917, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00020493942218080148, |
|
"loss": 0.8002, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00020214352283317796, |
|
"loss": 0.7934, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.0001993476234855545, |
|
"loss": 0.7982, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.000196551724137931, |
|
"loss": 0.7995, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00019375582479030755, |
|
"loss": 0.7935, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00019095992544268406, |
|
"loss": 0.8003, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00018816402609506054, |
|
"loss": 0.7967, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00018536812674743708, |
|
"loss": 0.7962, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0001825722273998136, |
|
"loss": 0.7956, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00017977632805219013, |
|
"loss": 0.7958, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00017698042870456661, |
|
"loss": 0.7944, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00017418452935694312, |
|
"loss": 0.7945, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00017138863000931966, |
|
"loss": 0.802, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00016859273066169617, |
|
"loss": 0.7923, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00016579683131407266, |
|
"loss": 0.8024, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.0001630009319664492, |
|
"loss": 0.7902, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0001602050326188257, |
|
"loss": 0.8066, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 0.7989646196365356, |
|
"eval_runtime": 87.5148, |
|
"eval_samples_per_second": 22.853, |
|
"eval_steps_per_second": 2.857, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00015740913327120224, |
|
"loss": 0.7958, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00015461323392357873, |
|
"loss": 0.7837, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00015181733457595524, |
|
"loss": 0.8008, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00014902143522833175, |
|
"loss": 0.7964, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.0001462255358807083, |
|
"loss": 0.7951, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.0001434296365330848, |
|
"loss": 0.8012, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0001406337371854613, |
|
"loss": 0.7928, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00013783783783783782, |
|
"loss": 0.7903, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00013504193849021433, |
|
"loss": 0.7949, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00013224603914259084, |
|
"loss": 0.7866, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00012945013979496738, |
|
"loss": 0.7889, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.0001266542404473439, |
|
"loss": 0.7928, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0001238583410997204, |
|
"loss": 0.7932, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00012106244175209691, |
|
"loss": 0.7963, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00011826654240447344, |
|
"loss": 0.7979, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00011547064305684995, |
|
"loss": 0.7943, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00011267474370922644, |
|
"loss": 0.7983, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00010987884436160297, |
|
"loss": 0.801, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.0001070829450139795, |
|
"loss": 0.7836, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.000104287045666356, |
|
"loss": 0.785, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 0.7959330677986145, |
|
"eval_runtime": 88.4745, |
|
"eval_samples_per_second": 22.605, |
|
"eval_steps_per_second": 2.826, |
|
"step": 800 |
|
} |
|
], |
|
"max_steps": 1173, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.040223650649342e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|