{ "best_metric": 0.24260137975215912, "best_model_checkpoint": "wave2vec2-xlsr-Persian/checkpoint-240000", "epoch": 3.0, "eval_steps": 10000, "global_step": 255012, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1176415227518705, "grad_norm": 1.416466236114502, "learning_rate": 9.646079712769477e-06, "loss": 3.7691, "step": 10000 }, { "epoch": 0.1176415227518705, "eval_loss": 0.7940966486930847, "eval_runtime": 534.4277, "eval_samples_per_second": 16.743, "eval_steps_per_second": 2.094, "eval_wer": 0.6079105283797882, "step": 10000 }, { "epoch": 0.235283045503741, "grad_norm": 2.3572049140930176, "learning_rate": 9.252436892745226e-06, "loss": 0.8658, "step": 20000 }, { "epoch": 0.235283045503741, "eval_loss": 0.5118501782417297, "eval_runtime": 536.8454, "eval_samples_per_second": 16.668, "eval_steps_per_second": 2.084, "eval_wer": 0.4575246579700923, "step": 20000 }, { "epoch": 0.3529245682556115, "grad_norm": 2.209596872329712, "learning_rate": 8.858872809158622e-06, "loss": 0.6829, "step": 30000 }, { "epoch": 0.3529245682556115, "eval_loss": 0.4285117983818054, "eval_runtime": 539.6896, "eval_samples_per_second": 16.58, "eval_steps_per_second": 2.073, "eval_wer": 0.40392825696083584, "step": 30000 }, { "epoch": 0.470566091007482, "grad_norm": 2.2252326011657715, "learning_rate": 8.465348093790845e-06, "loss": 0.6078, "step": 40000 }, { "epoch": 0.470566091007482, "eval_loss": 0.38793477416038513, "eval_runtime": 547.0046, "eval_samples_per_second": 16.358, "eval_steps_per_second": 2.046, "eval_wer": 0.3664437028760849, "step": 40000 }, { "epoch": 0.5882076137593525, "grad_norm": 2.7398128509521484, "learning_rate": 8.071784010204244e-06, "loss": 0.5561, "step": 50000 }, { "epoch": 0.5882076137593525, "eval_loss": 0.3593791723251343, "eval_runtime": 551.125, "eval_samples_per_second": 16.236, "eval_steps_per_second": 2.03, "eval_wer": 0.34282162650112097, "step": 50000 }, { "epoch": 0.705849136511223, "grad_norm": 2.960217237472534, "learning_rate": 7.678259294836465e-06, "loss": 0.5168, "step": 60000 }, { "epoch": 0.705849136511223, "eval_loss": 0.33377397060394287, "eval_runtime": 546.4831, "eval_samples_per_second": 16.374, "eval_steps_per_second": 2.048, "eval_wer": 0.3202835389089079, "step": 60000 }, { "epoch": 0.8234906592630935, "grad_norm": 2.531191349029541, "learning_rate": 7.284734579468687e-06, "loss": 0.499, "step": 70000 }, { "epoch": 0.8234906592630935, "eval_loss": 0.3143016993999481, "eval_runtime": 550.3802, "eval_samples_per_second": 16.258, "eval_steps_per_second": 2.033, "eval_wer": 0.30924387157877603, "step": 70000 }, { "epoch": 0.941132182014964, "grad_norm": 2.0980894565582275, "learning_rate": 6.891131127663261e-06, "loss": 0.4768, "step": 80000 }, { "epoch": 0.941132182014964, "eval_loss": 0.3023754954338074, "eval_runtime": 546.9538, "eval_samples_per_second": 16.36, "eval_steps_per_second": 2.046, "eval_wer": 0.29365367113334173, "step": 80000 }, { "epoch": 1.0587737047668344, "grad_norm": 1.7364046573638916, "learning_rate": 6.497606412295483e-06, "loss": 0.4529, "step": 90000 }, { "epoch": 1.0587737047668344, "eval_loss": 0.29402047395706177, "eval_runtime": 544.3376, "eval_samples_per_second": 16.438, "eval_steps_per_second": 2.056, "eval_wer": 0.2827878859629002, "step": 90000 }, { "epoch": 1.176415227518705, "grad_norm": 3.4313299655914307, "learning_rate": 6.1040816969277054e-06, "loss": 0.44, "step": 100000 }, { "epoch": 1.176415227518705, "eval_loss": 0.2909528613090515, "eval_runtime": 543.2034, "eval_samples_per_second": 16.473, "eval_steps_per_second": 2.06, "eval_wer": 0.27463762217996435, "step": 100000 }, { "epoch": 1.2940567502705755, "grad_norm": 1.1621043682098389, "learning_rate": 5.710478245122278e-06, "loss": 0.4264, "step": 110000 }, { "epoch": 1.2940567502705755, "eval_loss": 0.2814837098121643, "eval_runtime": 547.8907, "eval_samples_per_second": 16.332, "eval_steps_per_second": 2.042, "eval_wer": 0.2683001724023115, "step": 110000 }, { "epoch": 1.4116982730224459, "grad_norm": 2.507716178894043, "learning_rate": 5.316914161535675e-06, "loss": 0.4189, "step": 120000 }, { "epoch": 1.4116982730224459, "eval_loss": 0.27175650000572205, "eval_runtime": 548.1953, "eval_samples_per_second": 16.323, "eval_steps_per_second": 2.041, "eval_wer": 0.26368304611946813, "step": 120000 }, { "epoch": 1.5293397957743164, "grad_norm": 2.3356528282165527, "learning_rate": 4.923428814386722e-06, "loss": 0.4052, "step": 130000 }, { "epoch": 1.5293397957743164, "eval_loss": 0.2673029899597168, "eval_runtime": 544.6822, "eval_samples_per_second": 16.428, "eval_steps_per_second": 2.054, "eval_wer": 0.25851467639420195, "step": 130000 }, { "epoch": 1.646981318526187, "grad_norm": 2.476557970046997, "learning_rate": 4.529864730800119e-06, "loss": 0.4044, "step": 140000 }, { "epoch": 1.646981318526187, "eval_loss": 0.26591917872428894, "eval_runtime": 545.1182, "eval_samples_per_second": 16.415, "eval_steps_per_second": 2.053, "eval_wer": 0.2534720937631799, "step": 140000 }, { "epoch": 1.7646228412780576, "grad_norm": 1.8470633029937744, "learning_rate": 4.136418751869991e-06, "loss": 0.4046, "step": 150000 }, { "epoch": 1.7646228412780576, "eval_loss": 0.2603091299533844, "eval_runtime": 545.5751, "eval_samples_per_second": 16.401, "eval_steps_per_second": 2.051, "eval_wer": 0.24952459877616556, "step": 150000 }, { "epoch": 1.8822643640299281, "grad_norm": 2.6165308952331543, "learning_rate": 3.742815300064564e-06, "loss": 0.3944, "step": 160000 }, { "epoch": 1.8822643640299281, "eval_loss": 0.2575734555721283, "eval_runtime": 545.6155, "eval_samples_per_second": 16.4, "eval_steps_per_second": 2.051, "eval_wer": 0.24606545368445198, "step": 160000 }, { "epoch": 1.9999058867817985, "grad_norm": 2.0350422859191895, "learning_rate": 3.3492512164779615e-06, "loss": 0.3876, "step": 170000 }, { "epoch": 1.9999058867817985, "eval_loss": 0.25538763403892517, "eval_runtime": 543.8449, "eval_samples_per_second": 16.453, "eval_steps_per_second": 2.058, "eval_wer": 0.24299106911630866, "step": 170000 }, { "epoch": 2.117547409533669, "grad_norm": 2.234062433242798, "learning_rate": 2.955726501110184e-06, "loss": 0.3836, "step": 180000 }, { "epoch": 2.117547409533669, "eval_loss": 0.25172147154808044, "eval_runtime": 543.9926, "eval_samples_per_second": 16.449, "eval_steps_per_second": 2.057, "eval_wer": 0.24226224389377649, "step": 180000 }, { "epoch": 2.2351889322855394, "grad_norm": 2.7782626152038574, "learning_rate": 2.5621624175235817e-06, "loss": 0.3767, "step": 190000 }, { "epoch": 2.2351889322855394, "eval_loss": 0.2503082752227783, "eval_runtime": 546.7703, "eval_samples_per_second": 16.365, "eval_steps_per_second": 2.047, "eval_wer": 0.23942833465286462, "step": 190000 }, { "epoch": 2.35283045503741, "grad_norm": 2.365490198135376, "learning_rate": 2.1686770703746284e-06, "loss": 0.3738, "step": 200000 }, { "epoch": 2.35283045503741, "eval_loss": 0.24804162979125977, "eval_runtime": 545.3342, "eval_samples_per_second": 16.408, "eval_steps_per_second": 2.052, "eval_wer": 0.23909166919474062, "step": 200000 }, { "epoch": 2.4704719777892805, "grad_norm": 3.105099678039551, "learning_rate": 1.7751523550068502e-06, "loss": 0.3707, "step": 210000 }, { "epoch": 2.4704719777892805, "eval_loss": 0.24506914615631104, "eval_runtime": 547.7088, "eval_samples_per_second": 16.337, "eval_steps_per_second": 2.043, "eval_wer": 0.23790409104025928, "step": 210000 }, { "epoch": 2.588113500541151, "grad_norm": 0.9898041486740112, "learning_rate": 1.3815882714202481e-06, "loss": 0.3649, "step": 220000 }, { "epoch": 2.588113500541151, "eval_loss": 0.24370211362838745, "eval_runtime": 547.4398, "eval_samples_per_second": 16.345, "eval_steps_per_second": 2.044, "eval_wer": 0.23637614780723498, "step": 220000 }, { "epoch": 2.7057550232930216, "grad_norm": 4.53593635559082, "learning_rate": 9.880241878336459e-07, "loss": 0.369, "step": 230000 }, { "epoch": 2.7057550232930216, "eval_loss": 0.24421393871307373, "eval_runtime": 549.6273, "eval_samples_per_second": 16.28, "eval_steps_per_second": 2.036, "eval_wer": 0.23538094991453876, "step": 230000 }, { "epoch": 2.8233965460448918, "grad_norm": 3.995215654373169, "learning_rate": 5.945388406846922e-07, "loss": 0.3608, "step": 240000 }, { "epoch": 2.8233965460448918, "eval_loss": 0.24260137975215912, "eval_runtime": 548.4399, "eval_samples_per_second": 16.315, "eval_steps_per_second": 2.04, "eval_wer": 0.23452633759776248, "step": 240000 }, { "epoch": 2.9410380687967628, "grad_norm": 1.9774836301803589, "learning_rate": 2.0097475709808989e-07, "loss": 0.3644, "step": 250000 }, { "epoch": 2.9410380687967628, "eval_loss": 0.24260272085666656, "eval_runtime": 548.9964, "eval_samples_per_second": 16.299, "eval_steps_per_second": 2.038, "eval_wer": 0.234019489600367, "step": 250000 }, { "epoch": 3.0, "step": 255012, "total_flos": 2.7896646019563717e+20, "train_loss": 0.5826510110653187, "train_runtime": 84804.3792, "train_samples_per_second": 6.014, "train_steps_per_second": 3.007 } ], "logging_steps": 10000, "max_steps": 255012, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.7896646019563717e+20, "train_batch_size": 2, "trial_name": null, "trial_params": null }