Malasar_50_latest / trainer_state.json
kavyamanohar's picture
Upload 8 files
d92e2e0
raw
history blame contribute delete
No virus
14.8 kB
{
"best_metric": 30.100143061516455,
"best_model_checkpoint": "./Malasar_50_latest/checkpoint-1000",
"epoch": 2.846299810246679,
"global_step": 1500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"learning_rate": 4.000000000000001e-06,
"loss": 2.4065,
"step": 25
},
{
"epoch": 0.09,
"learning_rate": 8.8e-06,
"loss": 0.6768,
"step": 50
},
{
"epoch": 0.09,
"eval_loss": 0.15913546085357666,
"eval_runtime": 1024.1019,
"eval_samples_per_second": 1.829,
"eval_steps_per_second": 0.115,
"eval_wer": 63.91988555078684,
"step": 50
},
{
"epoch": 0.14,
"learning_rate": 9.86896551724138e-06,
"loss": 0.1782,
"step": 75
},
{
"epoch": 0.19,
"learning_rate": 9.696551724137932e-06,
"loss": 0.1711,
"step": 100
},
{
"epoch": 0.19,
"eval_loss": 0.11521261185407639,
"eval_runtime": 1068.1467,
"eval_samples_per_second": 1.754,
"eval_steps_per_second": 0.11,
"eval_wer": 52.989985693848354,
"step": 100
},
{
"epoch": 0.24,
"learning_rate": 9.524137931034484e-06,
"loss": 0.151,
"step": 125
},
{
"epoch": 0.28,
"learning_rate": 9.351724137931034e-06,
"loss": 0.1392,
"step": 150
},
{
"epoch": 0.28,
"eval_loss": 0.1063603013753891,
"eval_runtime": 1067.453,
"eval_samples_per_second": 1.755,
"eval_steps_per_second": 0.111,
"eval_wer": 42.00286123032904,
"step": 150
},
{
"epoch": 0.33,
"learning_rate": 9.179310344827587e-06,
"loss": 0.1149,
"step": 175
},
{
"epoch": 0.38,
"learning_rate": 9.006896551724139e-06,
"loss": 0.1131,
"step": 200
},
{
"epoch": 0.38,
"eval_loss": 0.09218524396419525,
"eval_runtime": 1059.1591,
"eval_samples_per_second": 1.768,
"eval_steps_per_second": 0.111,
"eval_wer": 40.2002861230329,
"step": 200
},
{
"epoch": 0.43,
"learning_rate": 8.83448275862069e-06,
"loss": 0.1052,
"step": 225
},
{
"epoch": 0.47,
"learning_rate": 8.662068965517241e-06,
"loss": 0.1208,
"step": 250
},
{
"epoch": 0.47,
"eval_loss": 0.08814000338315964,
"eval_runtime": 1037.3449,
"eval_samples_per_second": 1.806,
"eval_steps_per_second": 0.114,
"eval_wer": 46.49499284692418,
"step": 250
},
{
"epoch": 0.52,
"learning_rate": 8.489655172413795e-06,
"loss": 0.1107,
"step": 275
},
{
"epoch": 0.57,
"learning_rate": 8.317241379310345e-06,
"loss": 0.1312,
"step": 300
},
{
"epoch": 0.57,
"eval_loss": 0.07998558133840561,
"eval_runtime": 1053.0015,
"eval_samples_per_second": 1.779,
"eval_steps_per_second": 0.112,
"eval_wer": 37.51072961373391,
"step": 300
},
{
"epoch": 0.62,
"learning_rate": 8.144827586206897e-06,
"loss": 0.0947,
"step": 325
},
{
"epoch": 0.66,
"learning_rate": 7.972413793103448e-06,
"loss": 0.1112,
"step": 350
},
{
"epoch": 0.66,
"eval_loss": 0.07743828743696213,
"eval_runtime": 1073.0726,
"eval_samples_per_second": 1.745,
"eval_steps_per_second": 0.11,
"eval_wer": 37.56795422031474,
"step": 350
},
{
"epoch": 0.71,
"learning_rate": 7.800000000000002e-06,
"loss": 0.118,
"step": 375
},
{
"epoch": 0.76,
"learning_rate": 7.627586206896552e-06,
"loss": 0.1,
"step": 400
},
{
"epoch": 0.76,
"eval_loss": 0.0833013579249382,
"eval_runtime": 1078.2864,
"eval_samples_per_second": 1.737,
"eval_steps_per_second": 0.109,
"eval_wer": 33.59084406294707,
"step": 400
},
{
"epoch": 0.81,
"learning_rate": 7.455172413793104e-06,
"loss": 0.1023,
"step": 425
},
{
"epoch": 0.85,
"learning_rate": 7.282758620689656e-06,
"loss": 0.094,
"step": 450
},
{
"epoch": 0.85,
"eval_loss": 0.07670725882053375,
"eval_runtime": 1078.233,
"eval_samples_per_second": 1.737,
"eval_steps_per_second": 0.109,
"eval_wer": 31.702432045779684,
"step": 450
},
{
"epoch": 0.9,
"learning_rate": 7.110344827586207e-06,
"loss": 0.0787,
"step": 475
},
{
"epoch": 0.95,
"learning_rate": 6.937931034482759e-06,
"loss": 0.0758,
"step": 500
},
{
"epoch": 0.95,
"eval_loss": 0.07279360294342041,
"eval_runtime": 1081.1361,
"eval_samples_per_second": 1.732,
"eval_steps_per_second": 0.109,
"eval_wer": 32.33190271816881,
"step": 500
},
{
"epoch": 1.0,
"learning_rate": 6.7655172413793116e-06,
"loss": 0.1144,
"step": 525
},
{
"epoch": 1.04,
"learning_rate": 6.593103448275863e-06,
"loss": 0.0967,
"step": 550
},
{
"epoch": 1.04,
"eval_loss": 0.07088593393564224,
"eval_runtime": 1067.9119,
"eval_samples_per_second": 1.754,
"eval_steps_per_second": 0.11,
"eval_wer": 34.39198855507868,
"step": 550
},
{
"epoch": 1.09,
"learning_rate": 6.420689655172414e-06,
"loss": 0.0497,
"step": 575
},
{
"epoch": 1.14,
"learning_rate": 6.248275862068966e-06,
"loss": 0.0423,
"step": 600
},
{
"epoch": 1.14,
"eval_loss": 0.07051743566989899,
"eval_runtime": 1075.828,
"eval_samples_per_second": 1.741,
"eval_steps_per_second": 0.11,
"eval_wer": 31.04434907010014,
"step": 600
},
{
"epoch": 1.19,
"learning_rate": 6.075862068965518e-06,
"loss": 0.0686,
"step": 625
},
{
"epoch": 1.23,
"learning_rate": 5.9034482758620695e-06,
"loss": 0.0669,
"step": 650
},
{
"epoch": 1.23,
"eval_loss": 0.07203543931245804,
"eval_runtime": 1076.3331,
"eval_samples_per_second": 1.74,
"eval_steps_per_second": 0.11,
"eval_wer": 31.587982832618028,
"step": 650
},
{
"epoch": 1.28,
"learning_rate": 5.731034482758621e-06,
"loss": 0.0615,
"step": 675
},
{
"epoch": 1.33,
"learning_rate": 5.558620689655173e-06,
"loss": 0.0686,
"step": 700
},
{
"epoch": 1.33,
"eval_loss": 0.06905751675367355,
"eval_runtime": 1084.2893,
"eval_samples_per_second": 1.727,
"eval_steps_per_second": 0.109,
"eval_wer": 31.21602288984263,
"step": 700
},
{
"epoch": 1.38,
"learning_rate": 5.386206896551725e-06,
"loss": 0.0704,
"step": 725
},
{
"epoch": 1.42,
"learning_rate": 5.213793103448276e-06,
"loss": 0.0798,
"step": 750
},
{
"epoch": 1.42,
"eval_loss": 0.06694240123033524,
"eval_runtime": 1087.4811,
"eval_samples_per_second": 1.722,
"eval_steps_per_second": 0.109,
"eval_wer": 30.643776824034337,
"step": 750
},
{
"epoch": 1.47,
"learning_rate": 5.041379310344828e-06,
"loss": 0.0756,
"step": 775
},
{
"epoch": 1.52,
"learning_rate": 4.8689655172413795e-06,
"loss": 0.0818,
"step": 800
},
{
"epoch": 1.52,
"eval_loss": 0.06710417568683624,
"eval_runtime": 1073.6009,
"eval_samples_per_second": 1.745,
"eval_steps_per_second": 0.11,
"eval_wer": 32.18884120171674,
"step": 800
},
{
"epoch": 1.57,
"learning_rate": 4.6965517241379315e-06,
"loss": 0.0571,
"step": 825
},
{
"epoch": 1.61,
"learning_rate": 4.524137931034483e-06,
"loss": 0.0841,
"step": 850
},
{
"epoch": 1.61,
"eval_loss": 0.06191212683916092,
"eval_runtime": 1076.9574,
"eval_samples_per_second": 1.739,
"eval_steps_per_second": 0.11,
"eval_wer": 28.898426323319025,
"step": 850
},
{
"epoch": 1.66,
"learning_rate": 4.351724137931035e-06,
"loss": 0.0927,
"step": 875
},
{
"epoch": 1.71,
"learning_rate": 4.179310344827587e-06,
"loss": 0.0566,
"step": 900
},
{
"epoch": 1.71,
"eval_loss": 0.06160915642976761,
"eval_runtime": 1078.2038,
"eval_samples_per_second": 1.737,
"eval_steps_per_second": 0.109,
"eval_wer": 29.127324749642348,
"step": 900
},
{
"epoch": 1.76,
"learning_rate": 4.006896551724138e-06,
"loss": 0.0466,
"step": 925
},
{
"epoch": 1.8,
"learning_rate": 3.83448275862069e-06,
"loss": 0.0542,
"step": 950
},
{
"epoch": 1.8,
"eval_loss": 0.06113772094249725,
"eval_runtime": 1072.2754,
"eval_samples_per_second": 1.747,
"eval_steps_per_second": 0.11,
"eval_wer": 28.841201716738198,
"step": 950
},
{
"epoch": 1.85,
"learning_rate": 3.6620689655172415e-06,
"loss": 0.0536,
"step": 975
},
{
"epoch": 1.9,
"learning_rate": 3.489655172413793e-06,
"loss": 0.0638,
"step": 1000
},
{
"epoch": 1.9,
"eval_loss": 0.06121109798550606,
"eval_runtime": 1073.2525,
"eval_samples_per_second": 1.745,
"eval_steps_per_second": 0.11,
"eval_wer": 30.100143061516455,
"step": 1000
},
{
"epoch": 1.94,
"learning_rate": 3.3172413793103453e-06,
"loss": 0.0559,
"step": 1025
},
{
"epoch": 1.99,
"learning_rate": 3.1448275862068965e-06,
"loss": 0.0503,
"step": 1050
},
{
"epoch": 1.99,
"eval_loss": 0.06099317967891693,
"eval_runtime": 1074.3991,
"eval_samples_per_second": 1.743,
"eval_steps_per_second": 0.11,
"eval_wer": 29.32761087267525,
"step": 1050
},
{
"epoch": 2.04,
"learning_rate": 2.9724137931034486e-06,
"loss": 0.0627,
"step": 1075
},
{
"epoch": 2.09,
"learning_rate": 2.8000000000000003e-06,
"loss": 0.0369,
"step": 1100
},
{
"epoch": 2.09,
"eval_loss": 0.06123083084821701,
"eval_runtime": 1071.2265,
"eval_samples_per_second": 1.748,
"eval_steps_per_second": 0.11,
"eval_wer": 29.32761087267525,
"step": 1100
},
{
"epoch": 2.13,
"learning_rate": 2.627586206896552e-06,
"loss": 0.0395,
"step": 1125
},
{
"epoch": 2.18,
"learning_rate": 2.4551724137931036e-06,
"loss": 0.0273,
"step": 1150
},
{
"epoch": 2.18,
"eval_loss": 0.06379574537277222,
"eval_runtime": 1064.3917,
"eval_samples_per_second": 1.76,
"eval_steps_per_second": 0.111,
"eval_wer": 32.58941344778255,
"step": 1150
},
{
"epoch": 2.23,
"learning_rate": 2.2827586206896553e-06,
"loss": 0.0345,
"step": 1175
},
{
"epoch": 2.28,
"learning_rate": 2.110344827586207e-06,
"loss": 0.0269,
"step": 1200
},
{
"epoch": 2.28,
"eval_loss": 0.06369271874427795,
"eval_runtime": 1147.8053,
"eval_samples_per_second": 1.632,
"eval_steps_per_second": 0.103,
"eval_wer": 31.673819742489272,
"step": 1200
},
{
"epoch": 2.32,
"learning_rate": 1.9379310344827586e-06,
"loss": 0.0446,
"step": 1225
},
{
"epoch": 2.37,
"learning_rate": 1.7655172413793103e-06,
"loss": 0.0384,
"step": 1250
},
{
"epoch": 2.37,
"eval_loss": 0.06297700107097626,
"eval_runtime": 1155.6941,
"eval_samples_per_second": 1.621,
"eval_steps_per_second": 0.102,
"eval_wer": 32.04577968526466,
"step": 1250
},
{
"epoch": 2.42,
"learning_rate": 1.5931034482758622e-06,
"loss": 0.0254,
"step": 1275
},
{
"epoch": 2.47,
"learning_rate": 1.4206896551724138e-06,
"loss": 0.0574,
"step": 1300
},
{
"epoch": 2.47,
"eval_loss": 0.062077928334474564,
"eval_runtime": 1073.2284,
"eval_samples_per_second": 1.745,
"eval_steps_per_second": 0.11,
"eval_wer": 30.014306151645208,
"step": 1300
},
{
"epoch": 2.51,
"learning_rate": 1.2482758620689655e-06,
"loss": 0.0447,
"step": 1325
},
{
"epoch": 2.56,
"learning_rate": 1.0758620689655174e-06,
"loss": 0.053,
"step": 1350
},
{
"epoch": 2.56,
"eval_loss": 0.06072333827614784,
"eval_runtime": 1068.6925,
"eval_samples_per_second": 1.753,
"eval_steps_per_second": 0.11,
"eval_wer": 30.529327610872674,
"step": 1350
},
{
"epoch": 2.61,
"learning_rate": 9.034482758620689e-07,
"loss": 0.0303,
"step": 1375
},
{
"epoch": 2.66,
"learning_rate": 7.310344827586207e-07,
"loss": 0.0217,
"step": 1400
},
{
"epoch": 2.66,
"eval_loss": 0.060529597103595734,
"eval_runtime": 1230.1339,
"eval_samples_per_second": 1.523,
"eval_steps_per_second": 0.096,
"eval_wer": 31.359084406294706,
"step": 1400
},
{
"epoch": 2.7,
"learning_rate": 5.586206896551725e-07,
"loss": 0.0394,
"step": 1425
},
{
"epoch": 2.75,
"learning_rate": 3.862068965517242e-07,
"loss": 0.0327,
"step": 1450
},
{
"epoch": 2.75,
"eval_loss": 0.060149554163217545,
"eval_runtime": 1225.2391,
"eval_samples_per_second": 1.529,
"eval_steps_per_second": 0.096,
"eval_wer": 31.301859799713878,
"step": 1450
},
{
"epoch": 2.8,
"learning_rate": 2.1379310344827587e-07,
"loss": 0.0326,
"step": 1475
},
{
"epoch": 2.85,
"learning_rate": 4.137931034482759e-08,
"loss": 0.0235,
"step": 1500
},
{
"epoch": 2.85,
"eval_loss": 0.05990791320800781,
"eval_runtime": 1212.8468,
"eval_samples_per_second": 1.544,
"eval_steps_per_second": 0.097,
"eval_wer": 30.90128755364807,
"step": 1500
}
],
"max_steps": 1500,
"num_train_epochs": 3,
"total_flos": 1.018527602098176e+20,
"trial_name": null,
"trial_params": null
}