diff --git a/README.md b/README.md index 5c76d0bcae2315cbbf52d9322cdf00e640eaff39..82fd57e991e131f726e51739e497734f663b94ee 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ base_model: openai/whisper-tiny tags: - generated_from_trainer datasets: -- common_voice_17_0 +- mozilla-foundation/common_voice_17_0 metrics: - wer model-index: @@ -14,8 +14,8 @@ model-index: name: Automatic Speech Recognition type: automatic-speech-recognition dataset: - name: common_voice_17_0 - type: common_voice_17_0 + name: mozilla-foundation/common_voice_17_0 id + type: mozilla-foundation/common_voice_17_0 config: id split: None args: id @@ -30,7 +30,7 @@ should probably proofread and complete it, then remove this comment. --> # whisper-tiny-common_voice_17_0-id -This model is a fine-tuned version of [openai/whisper-tiny](https://huggingface.co/openai/whisper-tiny) on the common_voice_17_0 dataset. +This model is a fine-tuned version of [openai/whisper-tiny](https://huggingface.co/openai/whisper-tiny) on the mozilla-foundation/common_voice_17_0 id dataset. It achieves the following results on the evaluation set: - Loss: 0.2000 - Wer: 0.1807 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..42beb6a1178c2a5d41f2a2e32cf952bcd10affb2 --- /dev/null +++ b/all_results.json @@ -0,0 +1,15 @@ +{ + "epoch": 8.458447874814972, + "eval_loss": 0.20003820955753326, + "eval_runtime": 438.9869, + "eval_samples": 3641, + "eval_samples_per_second": 8.294, + "eval_steps_per_second": 1.039, + "eval_wer": 0.1807044410413476, + "total_flos": 7.8770584829952e+18, + "train_loss": 0.15760719767808914, + "train_runtime": 46114.6561, + "train_samples": 37827, + "train_samples_per_second": 6.939, + "train_steps_per_second": 0.434 +} \ No newline at end of file diff --git a/eval_results.json b/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..bc01c33f4448e653b9042ef09a0330b42567311a --- /dev/null +++ b/eval_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 8.458447874814972, + "eval_loss": 0.20003820955753326, + "eval_runtime": 438.9869, + "eval_samples": 3641, + "eval_samples_per_second": 8.294, + "eval_steps_per_second": 1.039, + "eval_wer": 0.1807044410413476 +} \ No newline at end of file diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..22244535f00fcd97e90bf4fe1a9147971b9aa7fc --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 8.458447874814972, + "total_flos": 7.8770584829952e+18, + "train_loss": 0.15760719767808914, + "train_runtime": 46114.6561, + "train_samples": 37827, + "train_samples_per_second": 6.939, + "train_steps_per_second": 0.434 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a48c4e8465667ef2c64a8a458ecbfb9aa405e104 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,5822 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 8.458447874814972, + "eval_steps": 1000, + "global_step": 20000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.010573059843518714, + "grad_norm": 59.40861129760742, + "learning_rate": 4.2000000000000006e-07, + "loss": 3.5073, + "step": 25 + }, + { + "epoch": 0.02114611968703743, + "grad_norm": 48.962947845458984, + "learning_rate": 9.200000000000001e-07, + "loss": 3.1121, + "step": 50 + }, + { + "epoch": 0.03171917953055614, + "grad_norm": 31.433500289916992, + "learning_rate": 1.42e-06, + "loss": 2.3354, + "step": 75 + }, + { + "epoch": 0.04229223937407486, + "grad_norm": 21.955114364624023, + "learning_rate": 1.9200000000000003e-06, + "loss": 1.676, + "step": 100 + }, + { + "epoch": 0.05286529921759357, + "grad_norm": 16.944923400878906, + "learning_rate": 2.42e-06, + "loss": 1.1735, + "step": 125 + }, + { + "epoch": 0.06343835906111228, + "grad_norm": 17.72088623046875, + "learning_rate": 2.92e-06, + "loss": 1.0192, + "step": 150 + }, + { + "epoch": 0.074011418904631, + "grad_norm": 14.417439460754395, + "learning_rate": 3.4200000000000007e-06, + "loss": 0.9191, + "step": 175 + }, + { + "epoch": 0.08458447874814971, + "grad_norm": 15.664250373840332, + "learning_rate": 3.920000000000001e-06, + "loss": 0.87, + "step": 200 + }, + { + "epoch": 0.09515753859166842, + "grad_norm": 15.430508613586426, + "learning_rate": 4.42e-06, + "loss": 0.8069, + "step": 225 + }, + { + "epoch": 0.10573059843518715, + "grad_norm": 16.615825653076172, + "learning_rate": 4.92e-06, + "loss": 0.7783, + "step": 250 + }, + { + "epoch": 0.11630365827870585, + "grad_norm": 19.77940559387207, + "learning_rate": 5.420000000000001e-06, + "loss": 0.7989, + "step": 275 + }, + { + "epoch": 0.12687671812222456, + "grad_norm": 14.731008529663086, + "learning_rate": 5.92e-06, + "loss": 0.7918, + "step": 300 + }, + { + "epoch": 0.13744977796574329, + "grad_norm": 14.75744915008545, + "learning_rate": 6.42e-06, + "loss": 0.7237, + "step": 325 + }, + { + "epoch": 0.148022837809262, + "grad_norm": 15.654507637023926, + "learning_rate": 6.92e-06, + "loss": 0.6985, + "step": 350 + }, + { + "epoch": 0.1585958976527807, + "grad_norm": 14.50328540802002, + "learning_rate": 7.420000000000001e-06, + "loss": 0.7516, + "step": 375 + }, + { + "epoch": 0.16916895749629943, + "grad_norm": 15.2078275680542, + "learning_rate": 7.92e-06, + "loss": 0.636, + "step": 400 + }, + { + "epoch": 0.17974201733981815, + "grad_norm": 13.78581714630127, + "learning_rate": 8.42e-06, + "loss": 0.7175, + "step": 425 + }, + { + "epoch": 0.19031507718333684, + "grad_norm": 13.427356719970703, + "learning_rate": 8.920000000000001e-06, + "loss": 0.6515, + "step": 450 + }, + { + "epoch": 0.20088813702685557, + "grad_norm": 14.424434661865234, + "learning_rate": 9.42e-06, + "loss": 0.626, + "step": 475 + }, + { + "epoch": 0.2114611968703743, + "grad_norm": 11.046919822692871, + "learning_rate": 9.920000000000002e-06, + "loss": 0.6333, + "step": 500 + }, + { + "epoch": 0.222034256713893, + "grad_norm": 12.185227394104004, + "learning_rate": 9.98923076923077e-06, + "loss": 0.6356, + "step": 525 + }, + { + "epoch": 0.2326073165574117, + "grad_norm": 15.45386791229248, + "learning_rate": 9.976410256410257e-06, + "loss": 0.6256, + "step": 550 + }, + { + "epoch": 0.24318037640093043, + "grad_norm": 15.364215850830078, + "learning_rate": 9.963589743589744e-06, + "loss": 0.6079, + "step": 575 + }, + { + "epoch": 0.2537534362444491, + "grad_norm": 12.546235084533691, + "learning_rate": 9.950769230769232e-06, + "loss": 0.588, + "step": 600 + }, + { + "epoch": 0.2643264960879679, + "grad_norm": 17.720355987548828, + "learning_rate": 9.937948717948719e-06, + "loss": 0.6077, + "step": 625 + }, + { + "epoch": 0.27489955593148657, + "grad_norm": 14.936497688293457, + "learning_rate": 9.925128205128206e-06, + "loss": 0.5884, + "step": 650 + }, + { + "epoch": 0.28547261577500527, + "grad_norm": 13.690508842468262, + "learning_rate": 9.912307692307693e-06, + "loss": 0.5346, + "step": 675 + }, + { + "epoch": 0.296045675618524, + "grad_norm": 14.297891616821289, + "learning_rate": 9.899487179487181e-06, + "loss": 0.5544, + "step": 700 + }, + { + "epoch": 0.3066187354620427, + "grad_norm": 13.275521278381348, + "learning_rate": 9.886666666666668e-06, + "loss": 0.5572, + "step": 725 + }, + { + "epoch": 0.3171917953055614, + "grad_norm": 12.175935745239258, + "learning_rate": 9.873846153846155e-06, + "loss": 0.5401, + "step": 750 + }, + { + "epoch": 0.32776485514908016, + "grad_norm": 14.95595932006836, + "learning_rate": 9.861025641025642e-06, + "loss": 0.5794, + "step": 775 + }, + { + "epoch": 0.33833791499259885, + "grad_norm": 16.529836654663086, + "learning_rate": 9.84820512820513e-06, + "loss": 0.5597, + "step": 800 + }, + { + "epoch": 0.34891097483611755, + "grad_norm": 18.89246368408203, + "learning_rate": 9.835384615384617e-06, + "loss": 0.5481, + "step": 825 + }, + { + "epoch": 0.3594840346796363, + "grad_norm": 14.310006141662598, + "learning_rate": 9.822564102564104e-06, + "loss": 0.5452, + "step": 850 + }, + { + "epoch": 0.370057094523155, + "grad_norm": 12.255072593688965, + "learning_rate": 9.80974358974359e-06, + "loss": 0.4932, + "step": 875 + }, + { + "epoch": 0.3806301543666737, + "grad_norm": 10.623993873596191, + "learning_rate": 9.796923076923077e-06, + "loss": 0.5534, + "step": 900 + }, + { + "epoch": 0.39120321421019244, + "grad_norm": 11.355914115905762, + "learning_rate": 9.784102564102564e-06, + "loss": 0.5154, + "step": 925 + }, + { + "epoch": 0.40177627405371114, + "grad_norm": 10.385716438293457, + "learning_rate": 9.771282051282051e-06, + "loss": 0.5318, + "step": 950 + }, + { + "epoch": 0.4123493338972299, + "grad_norm": 11.580507278442383, + "learning_rate": 9.75846153846154e-06, + "loss": 0.564, + "step": 975 + }, + { + "epoch": 0.4229223937407486, + "grad_norm": 18.01216697692871, + "learning_rate": 9.745641025641026e-06, + "loss": 0.4911, + "step": 1000 + }, + { + "epoch": 0.4229223937407486, + "eval_loss": 0.45455271005630493, + "eval_runtime": 445.4699, + "eval_samples_per_second": 8.173, + "eval_steps_per_second": 1.024, + "eval_wer": 0.3321267808250963, + "step": 1000 + }, + { + "epoch": 0.4334954535842673, + "grad_norm": 12.367226600646973, + "learning_rate": 9.732820512820513e-06, + "loss": 0.5246, + "step": 1025 + }, + { + "epoch": 0.444068513427786, + "grad_norm": 11.605467796325684, + "learning_rate": 9.72e-06, + "loss": 0.4813, + "step": 1050 + }, + { + "epoch": 0.4546415732713047, + "grad_norm": 12.51762580871582, + "learning_rate": 9.707179487179487e-06, + "loss": 0.4954, + "step": 1075 + }, + { + "epoch": 0.4652146331148234, + "grad_norm": 10.517561912536621, + "learning_rate": 9.694358974358975e-06, + "loss": 0.4649, + "step": 1100 + }, + { + "epoch": 0.47578769295834217, + "grad_norm": 10.634852409362793, + "learning_rate": 9.681538461538462e-06, + "loss": 0.4426, + "step": 1125 + }, + { + "epoch": 0.48636075280186086, + "grad_norm": 11.74187183380127, + "learning_rate": 9.668717948717949e-06, + "loss": 0.5364, + "step": 1150 + }, + { + "epoch": 0.49693381264537956, + "grad_norm": 9.76657772064209, + "learning_rate": 9.655897435897436e-06, + "loss": 0.4363, + "step": 1175 + }, + { + "epoch": 0.5075068724888983, + "grad_norm": 13.493627548217773, + "learning_rate": 9.643076923076924e-06, + "loss": 0.4867, + "step": 1200 + }, + { + "epoch": 0.518079932332417, + "grad_norm": 13.604817390441895, + "learning_rate": 9.630256410256411e-06, + "loss": 0.4778, + "step": 1225 + }, + { + "epoch": 0.5286529921759358, + "grad_norm": 11.087203979492188, + "learning_rate": 9.617435897435898e-06, + "loss": 0.4571, + "step": 1250 + }, + { + "epoch": 0.5392260520194544, + "grad_norm": 11.273646354675293, + "learning_rate": 9.604615384615385e-06, + "loss": 0.4579, + "step": 1275 + }, + { + "epoch": 0.5497991118629731, + "grad_norm": 11.16568660736084, + "learning_rate": 9.591794871794873e-06, + "loss": 0.4568, + "step": 1300 + }, + { + "epoch": 0.5603721717064919, + "grad_norm": 9.37689208984375, + "learning_rate": 9.57897435897436e-06, + "loss": 0.4577, + "step": 1325 + }, + { + "epoch": 0.5709452315500105, + "grad_norm": 15.166648864746094, + "learning_rate": 9.566153846153847e-06, + "loss": 0.439, + "step": 1350 + }, + { + "epoch": 0.5815182913935293, + "grad_norm": 9.157109260559082, + "learning_rate": 9.553333333333334e-06, + "loss": 0.4611, + "step": 1375 + }, + { + "epoch": 0.592091351237048, + "grad_norm": 11.311399459838867, + "learning_rate": 9.540512820512822e-06, + "loss": 0.4449, + "step": 1400 + }, + { + "epoch": 0.6026644110805667, + "grad_norm": 10.030913352966309, + "learning_rate": 9.52769230769231e-06, + "loss": 0.4634, + "step": 1425 + }, + { + "epoch": 0.6132374709240854, + "grad_norm": 12.241848945617676, + "learning_rate": 9.514871794871796e-06, + "loss": 0.4374, + "step": 1450 + }, + { + "epoch": 0.6238105307676042, + "grad_norm": 9.528079986572266, + "learning_rate": 9.502051282051283e-06, + "loss": 0.4202, + "step": 1475 + }, + { + "epoch": 0.6343835906111228, + "grad_norm": 9.903778076171875, + "learning_rate": 9.48923076923077e-06, + "loss": 0.3996, + "step": 1500 + }, + { + "epoch": 0.6449566504546416, + "grad_norm": 11.979716300964355, + "learning_rate": 9.476410256410257e-06, + "loss": 0.4275, + "step": 1525 + }, + { + "epoch": 0.6555297102981603, + "grad_norm": 11.950919151306152, + "learning_rate": 9.463589743589743e-06, + "loss": 0.4537, + "step": 1550 + }, + { + "epoch": 0.666102770141679, + "grad_norm": 12.06128978729248, + "learning_rate": 9.450769230769232e-06, + "loss": 0.4567, + "step": 1575 + }, + { + "epoch": 0.6766758299851977, + "grad_norm": 12.286346435546875, + "learning_rate": 9.437948717948719e-06, + "loss": 0.4666, + "step": 1600 + }, + { + "epoch": 0.6872488898287165, + "grad_norm": 14.898148536682129, + "learning_rate": 9.425128205128206e-06, + "loss": 0.4442, + "step": 1625 + }, + { + "epoch": 0.6978219496722351, + "grad_norm": 11.827438354492188, + "learning_rate": 9.412307692307692e-06, + "loss": 0.4067, + "step": 1650 + }, + { + "epoch": 0.7083950095157538, + "grad_norm": 12.726160049438477, + "learning_rate": 9.39948717948718e-06, + "loss": 0.4191, + "step": 1675 + }, + { + "epoch": 0.7189680693592726, + "grad_norm": 12.630799293518066, + "learning_rate": 9.386666666666668e-06, + "loss": 0.4785, + "step": 1700 + }, + { + "epoch": 0.7295411292027912, + "grad_norm": 10.818669319152832, + "learning_rate": 9.373846153846155e-06, + "loss": 0.4636, + "step": 1725 + }, + { + "epoch": 0.74011418904631, + "grad_norm": 11.605988502502441, + "learning_rate": 9.361025641025641e-06, + "loss": 0.398, + "step": 1750 + }, + { + "epoch": 0.7506872488898287, + "grad_norm": 13.083930969238281, + "learning_rate": 9.348205128205128e-06, + "loss": 0.4364, + "step": 1775 + }, + { + "epoch": 0.7612603087333474, + "grad_norm": 9.565361022949219, + "learning_rate": 9.335384615384617e-06, + "loss": 0.3984, + "step": 1800 + }, + { + "epoch": 0.7718333685768661, + "grad_norm": 11.05252742767334, + "learning_rate": 9.322564102564104e-06, + "loss": 0.412, + "step": 1825 + }, + { + "epoch": 0.7824064284203849, + "grad_norm": 8.760066032409668, + "learning_rate": 9.30974358974359e-06, + "loss": 0.4375, + "step": 1850 + }, + { + "epoch": 0.7929794882639035, + "grad_norm": 9.936036109924316, + "learning_rate": 9.296923076923077e-06, + "loss": 0.4016, + "step": 1875 + }, + { + "epoch": 0.8035525481074223, + "grad_norm": 10.414031028747559, + "learning_rate": 9.284102564102566e-06, + "loss": 0.4877, + "step": 1900 + }, + { + "epoch": 0.814125607950941, + "grad_norm": 13.588311195373535, + "learning_rate": 9.271282051282053e-06, + "loss": 0.4117, + "step": 1925 + }, + { + "epoch": 0.8246986677944598, + "grad_norm": 11.925647735595703, + "learning_rate": 9.25846153846154e-06, + "loss": 0.4358, + "step": 1950 + }, + { + "epoch": 0.8352717276379784, + "grad_norm": 10.80334186553955, + "learning_rate": 9.245641025641026e-06, + "loss": 0.3665, + "step": 1975 + }, + { + "epoch": 0.8458447874814972, + "grad_norm": 8.696456909179688, + "learning_rate": 9.232820512820515e-06, + "loss": 0.4078, + "step": 2000 + }, + { + "epoch": 0.8458447874814972, + "eval_loss": 0.3520306944847107, + "eval_runtime": 449.1186, + "eval_samples_per_second": 8.107, + "eval_steps_per_second": 1.015, + "eval_wer": 0.28070908162791774, + "step": 2000 + }, + { + "epoch": 0.8564178473250159, + "grad_norm": 9.429954528808594, + "learning_rate": 9.220000000000002e-06, + "loss": 0.4529, + "step": 2025 + }, + { + "epoch": 0.8669909071685346, + "grad_norm": 10.896949768066406, + "learning_rate": 9.207179487179488e-06, + "loss": 0.3886, + "step": 2050 + }, + { + "epoch": 0.8775639670120533, + "grad_norm": 12.577573776245117, + "learning_rate": 9.194358974358975e-06, + "loss": 0.3831, + "step": 2075 + }, + { + "epoch": 0.888137026855572, + "grad_norm": 7.320788860321045, + "learning_rate": 9.181538461538464e-06, + "loss": 0.3698, + "step": 2100 + }, + { + "epoch": 0.8987100866990907, + "grad_norm": 9.53736400604248, + "learning_rate": 9.168717948717949e-06, + "loss": 0.3932, + "step": 2125 + }, + { + "epoch": 0.9092831465426094, + "grad_norm": 11.435847282409668, + "learning_rate": 9.155897435897436e-06, + "loss": 0.4034, + "step": 2150 + }, + { + "epoch": 0.9198562063861282, + "grad_norm": 13.579808235168457, + "learning_rate": 9.143076923076924e-06, + "loss": 0.3795, + "step": 2175 + }, + { + "epoch": 0.9304292662296468, + "grad_norm": 10.266934394836426, + "learning_rate": 9.130256410256411e-06, + "loss": 0.3833, + "step": 2200 + }, + { + "epoch": 0.9410023260731656, + "grad_norm": 11.516539573669434, + "learning_rate": 9.117435897435898e-06, + "loss": 0.3711, + "step": 2225 + }, + { + "epoch": 0.9515753859166843, + "grad_norm": 8.717472076416016, + "learning_rate": 9.104615384615385e-06, + "loss": 0.3711, + "step": 2250 + }, + { + "epoch": 0.962148445760203, + "grad_norm": 10.881747245788574, + "learning_rate": 9.091794871794873e-06, + "loss": 0.4005, + "step": 2275 + }, + { + "epoch": 0.9727215056037217, + "grad_norm": 11.219775199890137, + "learning_rate": 9.07897435897436e-06, + "loss": 0.3693, + "step": 2300 + }, + { + "epoch": 0.9832945654472405, + "grad_norm": 11.497136116027832, + "learning_rate": 9.066153846153847e-06, + "loss": 0.3515, + "step": 2325 + }, + { + "epoch": 0.9938676252907591, + "grad_norm": 14.518182754516602, + "learning_rate": 9.053333333333334e-06, + "loss": 0.3744, + "step": 2350 + }, + { + "epoch": 1.0044406851342778, + "grad_norm": 8.55494213104248, + "learning_rate": 9.04051282051282e-06, + "loss": 0.342, + "step": 2375 + }, + { + "epoch": 1.0150137449777965, + "grad_norm": 9.091997146606445, + "learning_rate": 9.027692307692309e-06, + "loss": 0.3083, + "step": 2400 + }, + { + "epoch": 1.0255868048213153, + "grad_norm": 9.034937858581543, + "learning_rate": 9.014871794871796e-06, + "loss": 0.3031, + "step": 2425 + }, + { + "epoch": 1.036159864664834, + "grad_norm": 13.605484008789062, + "learning_rate": 9.002051282051283e-06, + "loss": 0.3051, + "step": 2450 + }, + { + "epoch": 1.0467329245083528, + "grad_norm": 10.612420082092285, + "learning_rate": 8.98923076923077e-06, + "loss": 0.2721, + "step": 2475 + }, + { + "epoch": 1.0573059843518715, + "grad_norm": 8.616438865661621, + "learning_rate": 8.976410256410258e-06, + "loss": 0.2989, + "step": 2500 + }, + { + "epoch": 1.06787904419539, + "grad_norm": 10.424883842468262, + "learning_rate": 8.963589743589745e-06, + "loss": 0.2742, + "step": 2525 + }, + { + "epoch": 1.0784521040389088, + "grad_norm": 9.381563186645508, + "learning_rate": 8.950769230769232e-06, + "loss": 0.331, + "step": 2550 + }, + { + "epoch": 1.0890251638824275, + "grad_norm": 7.882634162902832, + "learning_rate": 8.937948717948718e-06, + "loss": 0.2943, + "step": 2575 + }, + { + "epoch": 1.0995982237259463, + "grad_norm": 7.2311601638793945, + "learning_rate": 8.925128205128207e-06, + "loss": 0.2694, + "step": 2600 + }, + { + "epoch": 1.110171283569465, + "grad_norm": 8.663016319274902, + "learning_rate": 8.912307692307694e-06, + "loss": 0.2807, + "step": 2625 + }, + { + "epoch": 1.1207443434129838, + "grad_norm": 9.223522186279297, + "learning_rate": 8.89948717948718e-06, + "loss": 0.2863, + "step": 2650 + }, + { + "epoch": 1.1313174032565025, + "grad_norm": 8.749750137329102, + "learning_rate": 8.886666666666667e-06, + "loss": 0.2888, + "step": 2675 + }, + { + "epoch": 1.141890463100021, + "grad_norm": 9.135551452636719, + "learning_rate": 8.873846153846156e-06, + "loss": 0.3135, + "step": 2700 + }, + { + "epoch": 1.1524635229435398, + "grad_norm": 9.296309471130371, + "learning_rate": 8.861025641025641e-06, + "loss": 0.2728, + "step": 2725 + }, + { + "epoch": 1.1630365827870586, + "grad_norm": 9.713606834411621, + "learning_rate": 8.848205128205128e-06, + "loss": 0.2781, + "step": 2750 + }, + { + "epoch": 1.1736096426305773, + "grad_norm": 9.5156888961792, + "learning_rate": 8.835384615384616e-06, + "loss": 0.2863, + "step": 2775 + }, + { + "epoch": 1.184182702474096, + "grad_norm": 7.512620449066162, + "learning_rate": 8.822564102564103e-06, + "loss": 0.2719, + "step": 2800 + }, + { + "epoch": 1.1947557623176146, + "grad_norm": 8.517248153686523, + "learning_rate": 8.80974358974359e-06, + "loss": 0.2609, + "step": 2825 + }, + { + "epoch": 1.2053288221611334, + "grad_norm": 9.989941596984863, + "learning_rate": 8.796923076923077e-06, + "loss": 0.2476, + "step": 2850 + }, + { + "epoch": 1.215901882004652, + "grad_norm": 10.013803482055664, + "learning_rate": 8.784102564102565e-06, + "loss": 0.2683, + "step": 2875 + }, + { + "epoch": 1.2264749418481709, + "grad_norm": 8.619990348815918, + "learning_rate": 8.771282051282052e-06, + "loss": 0.2867, + "step": 2900 + }, + { + "epoch": 1.2370480016916896, + "grad_norm": 8.64876937866211, + "learning_rate": 8.758461538461539e-06, + "loss": 0.2843, + "step": 2925 + }, + { + "epoch": 1.2476210615352084, + "grad_norm": 6.9110541343688965, + "learning_rate": 8.745641025641026e-06, + "loss": 0.2731, + "step": 2950 + }, + { + "epoch": 1.258194121378727, + "grad_norm": 9.259427070617676, + "learning_rate": 8.732820512820513e-06, + "loss": 0.2849, + "step": 2975 + }, + { + "epoch": 1.2687671812222456, + "grad_norm": 8.702181816101074, + "learning_rate": 8.720000000000001e-06, + "loss": 0.2679, + "step": 3000 + }, + { + "epoch": 1.2687671812222456, + "eval_loss": 0.3050294816493988, + "eval_runtime": 439.9928, + "eval_samples_per_second": 8.275, + "eval_steps_per_second": 1.036, + "eval_wer": 0.24209940136433244, + "step": 3000 + }, + { + "epoch": 1.2793402410657644, + "grad_norm": 7.549104690551758, + "learning_rate": 8.707179487179488e-06, + "loss": 0.2563, + "step": 3025 + }, + { + "epoch": 1.2899133009092831, + "grad_norm": 9.588334083557129, + "learning_rate": 8.694358974358975e-06, + "loss": 0.3047, + "step": 3050 + }, + { + "epoch": 1.3004863607528019, + "grad_norm": 9.205412864685059, + "learning_rate": 8.681538461538462e-06, + "loss": 0.2512, + "step": 3075 + }, + { + "epoch": 1.3110594205963206, + "grad_norm": 9.721763610839844, + "learning_rate": 8.66871794871795e-06, + "loss": 0.2827, + "step": 3100 + }, + { + "epoch": 1.3216324804398392, + "grad_norm": 9.485997200012207, + "learning_rate": 8.655897435897437e-06, + "loss": 0.3243, + "step": 3125 + }, + { + "epoch": 1.3322055402833581, + "grad_norm": 9.156095504760742, + "learning_rate": 8.643076923076924e-06, + "loss": 0.2937, + "step": 3150 + }, + { + "epoch": 1.3427786001268767, + "grad_norm": 6.857023239135742, + "learning_rate": 8.63025641025641e-06, + "loss": 0.3141, + "step": 3175 + }, + { + "epoch": 1.3533516599703954, + "grad_norm": 9.577827453613281, + "learning_rate": 8.6174358974359e-06, + "loss": 0.2856, + "step": 3200 + }, + { + "epoch": 1.3639247198139142, + "grad_norm": 10.062349319458008, + "learning_rate": 8.604615384615386e-06, + "loss": 0.2712, + "step": 3225 + }, + { + "epoch": 1.374497779657433, + "grad_norm": 10.257223129272461, + "learning_rate": 8.591794871794873e-06, + "loss": 0.2936, + "step": 3250 + }, + { + "epoch": 1.3850708395009517, + "grad_norm": 8.2437105178833, + "learning_rate": 8.57897435897436e-06, + "loss": 0.239, + "step": 3275 + }, + { + "epoch": 1.3956438993444702, + "grad_norm": 8.366438865661621, + "learning_rate": 8.566153846153848e-06, + "loss": 0.2726, + "step": 3300 + }, + { + "epoch": 1.406216959187989, + "grad_norm": 10.779414176940918, + "learning_rate": 8.553333333333333e-06, + "loss": 0.2646, + "step": 3325 + }, + { + "epoch": 1.4167900190315077, + "grad_norm": 11.594691276550293, + "learning_rate": 8.54051282051282e-06, + "loss": 0.2835, + "step": 3350 + }, + { + "epoch": 1.4273630788750264, + "grad_norm": 8.851668357849121, + "learning_rate": 8.527692307692309e-06, + "loss": 0.256, + "step": 3375 + }, + { + "epoch": 1.4379361387185452, + "grad_norm": 7.315084457397461, + "learning_rate": 8.514871794871795e-06, + "loss": 0.2359, + "step": 3400 + }, + { + "epoch": 1.4485091985620637, + "grad_norm": 9.037189483642578, + "learning_rate": 8.502051282051282e-06, + "loss": 0.2955, + "step": 3425 + }, + { + "epoch": 1.4590822584055827, + "grad_norm": 10.756566047668457, + "learning_rate": 8.489230769230769e-06, + "loss": 0.2964, + "step": 3450 + }, + { + "epoch": 1.4696553182491012, + "grad_norm": 9.553821563720703, + "learning_rate": 8.476410256410258e-06, + "loss": 0.2674, + "step": 3475 + }, + { + "epoch": 1.48022837809262, + "grad_norm": 8.768060684204102, + "learning_rate": 8.463589743589744e-06, + "loss": 0.2678, + "step": 3500 + }, + { + "epoch": 1.4908014379361387, + "grad_norm": 8.738430976867676, + "learning_rate": 8.450769230769231e-06, + "loss": 0.2391, + "step": 3525 + }, + { + "epoch": 1.5013744977796575, + "grad_norm": 7.157522201538086, + "learning_rate": 8.437948717948718e-06, + "loss": 0.2845, + "step": 3550 + }, + { + "epoch": 1.5119475576231762, + "grad_norm": 9.683340072631836, + "learning_rate": 8.425128205128205e-06, + "loss": 0.2961, + "step": 3575 + }, + { + "epoch": 1.5225206174666948, + "grad_norm": 13.411097526550293, + "learning_rate": 8.412307692307693e-06, + "loss": 0.2654, + "step": 3600 + }, + { + "epoch": 1.5330936773102137, + "grad_norm": 8.458112716674805, + "learning_rate": 8.39948717948718e-06, + "loss": 0.2771, + "step": 3625 + }, + { + "epoch": 1.5436667371537323, + "grad_norm": 5.15408182144165, + "learning_rate": 8.386666666666667e-06, + "loss": 0.2654, + "step": 3650 + }, + { + "epoch": 1.554239796997251, + "grad_norm": 7.64996862411499, + "learning_rate": 8.373846153846154e-06, + "loss": 0.2631, + "step": 3675 + }, + { + "epoch": 1.5648128568407698, + "grad_norm": 10.501543998718262, + "learning_rate": 8.361025641025642e-06, + "loss": 0.2666, + "step": 3700 + }, + { + "epoch": 1.5753859166842883, + "grad_norm": 7.34133768081665, + "learning_rate": 8.34820512820513e-06, + "loss": 0.2186, + "step": 3725 + }, + { + "epoch": 1.5859589765278073, + "grad_norm": 9.846173286437988, + "learning_rate": 8.335384615384616e-06, + "loss": 0.264, + "step": 3750 + }, + { + "epoch": 1.5965320363713258, + "grad_norm": 7.888981819152832, + "learning_rate": 8.322564102564103e-06, + "loss": 0.2304, + "step": 3775 + }, + { + "epoch": 1.6071050962148445, + "grad_norm": 14.889488220214844, + "learning_rate": 8.309743589743591e-06, + "loss": 0.2509, + "step": 3800 + }, + { + "epoch": 1.6176781560583633, + "grad_norm": 7.089621067047119, + "learning_rate": 8.296923076923078e-06, + "loss": 0.2638, + "step": 3825 + }, + { + "epoch": 1.628251215901882, + "grad_norm": 8.458942413330078, + "learning_rate": 8.284102564102565e-06, + "loss": 0.2433, + "step": 3850 + }, + { + "epoch": 1.6388242757454008, + "grad_norm": 9.503662109375, + "learning_rate": 8.271282051282052e-06, + "loss": 0.2392, + "step": 3875 + }, + { + "epoch": 1.6493973355889193, + "grad_norm": 8.198525428771973, + "learning_rate": 8.25846153846154e-06, + "loss": 0.2561, + "step": 3900 + }, + { + "epoch": 1.6599703954324383, + "grad_norm": 8.04946517944336, + "learning_rate": 8.245641025641027e-06, + "loss": 0.262, + "step": 3925 + }, + { + "epoch": 1.6705434552759568, + "grad_norm": 11.769758224487305, + "learning_rate": 8.232820512820512e-06, + "loss": 0.2351, + "step": 3950 + }, + { + "epoch": 1.6811165151194756, + "grad_norm": 10.128782272338867, + "learning_rate": 8.220000000000001e-06, + "loss": 0.2444, + "step": 3975 + }, + { + "epoch": 1.6916895749629943, + "grad_norm": 9.632699966430664, + "learning_rate": 8.207179487179488e-06, + "loss": 0.2423, + "step": 4000 + }, + { + "epoch": 1.6916895749629943, + "eval_loss": 0.27250364422798157, + "eval_runtime": 441.2867, + "eval_samples_per_second": 8.251, + "eval_steps_per_second": 1.033, + "eval_wer": 0.22172722632140704, + "step": 4000 + }, + { + "epoch": 1.7022626348065129, + "grad_norm": 5.695612907409668, + "learning_rate": 8.194358974358975e-06, + "loss": 0.274, + "step": 4025 + }, + { + "epoch": 1.7128356946500318, + "grad_norm": 10.697525024414062, + "learning_rate": 8.181538461538461e-06, + "loss": 0.2634, + "step": 4050 + }, + { + "epoch": 1.7234087544935504, + "grad_norm": 12.469213485717773, + "learning_rate": 8.16871794871795e-06, + "loss": 0.2551, + "step": 4075 + }, + { + "epoch": 1.733981814337069, + "grad_norm": 7.183727264404297, + "learning_rate": 8.155897435897437e-06, + "loss": 0.2787, + "step": 4100 + }, + { + "epoch": 1.7445548741805879, + "grad_norm": 8.577070236206055, + "learning_rate": 8.143076923076924e-06, + "loss": 0.24, + "step": 4125 + }, + { + "epoch": 1.7551279340241066, + "grad_norm": 8.086338996887207, + "learning_rate": 8.13025641025641e-06, + "loss": 0.2337, + "step": 4150 + }, + { + "epoch": 1.7657009938676254, + "grad_norm": 10.768961906433105, + "learning_rate": 8.117435897435897e-06, + "loss": 0.2335, + "step": 4175 + }, + { + "epoch": 1.7762740537111439, + "grad_norm": 8.794693946838379, + "learning_rate": 8.104615384615386e-06, + "loss": 0.275, + "step": 4200 + }, + { + "epoch": 1.7868471135546629, + "grad_norm": 9.108808517456055, + "learning_rate": 8.091794871794873e-06, + "loss": 0.2573, + "step": 4225 + }, + { + "epoch": 1.7974201733981814, + "grad_norm": 8.723715782165527, + "learning_rate": 8.07897435897436e-06, + "loss": 0.2637, + "step": 4250 + }, + { + "epoch": 1.8079932332417001, + "grad_norm": 9.015399932861328, + "learning_rate": 8.066153846153846e-06, + "loss": 0.2372, + "step": 4275 + }, + { + "epoch": 1.8185662930852189, + "grad_norm": 8.118802070617676, + "learning_rate": 8.053333333333335e-06, + "loss": 0.2461, + "step": 4300 + }, + { + "epoch": 1.8291393529287374, + "grad_norm": 7.922321796417236, + "learning_rate": 8.040512820512822e-06, + "loss": 0.2389, + "step": 4325 + }, + { + "epoch": 1.8397124127722564, + "grad_norm": 13.12256145477295, + "learning_rate": 8.027692307692308e-06, + "loss": 0.2526, + "step": 4350 + }, + { + "epoch": 1.850285472615775, + "grad_norm": 7.943728923797607, + "learning_rate": 8.014871794871795e-06, + "loss": 0.2399, + "step": 4375 + }, + { + "epoch": 1.8608585324592937, + "grad_norm": 8.962715148925781, + "learning_rate": 8.002051282051284e-06, + "loss": 0.23, + "step": 4400 + }, + { + "epoch": 1.8714315923028124, + "grad_norm": 9.588024139404297, + "learning_rate": 7.98923076923077e-06, + "loss": 0.2147, + "step": 4425 + }, + { + "epoch": 1.8820046521463312, + "grad_norm": 10.069765090942383, + "learning_rate": 7.976410256410257e-06, + "loss": 0.2641, + "step": 4450 + }, + { + "epoch": 1.89257771198985, + "grad_norm": 9.397997856140137, + "learning_rate": 7.963589743589744e-06, + "loss": 0.2138, + "step": 4475 + }, + { + "epoch": 1.9031507718333684, + "grad_norm": 8.495611190795898, + "learning_rate": 7.950769230769233e-06, + "loss": 0.234, + "step": 4500 + }, + { + "epoch": 1.9137238316768874, + "grad_norm": 9.057598114013672, + "learning_rate": 7.93794871794872e-06, + "loss": 0.2415, + "step": 4525 + }, + { + "epoch": 1.924296891520406, + "grad_norm": 11.452310562133789, + "learning_rate": 7.925128205128205e-06, + "loss": 0.2428, + "step": 4550 + }, + { + "epoch": 1.9348699513639247, + "grad_norm": 8.656147003173828, + "learning_rate": 7.912307692307693e-06, + "loss": 0.2148, + "step": 4575 + }, + { + "epoch": 1.9454430112074435, + "grad_norm": 7.229951858520508, + "learning_rate": 7.89948717948718e-06, + "loss": 0.2303, + "step": 4600 + }, + { + "epoch": 1.9560160710509622, + "grad_norm": 9.499919891357422, + "learning_rate": 7.886666666666667e-06, + "loss": 0.2458, + "step": 4625 + }, + { + "epoch": 1.966589130894481, + "grad_norm": 8.735013008117676, + "learning_rate": 7.873846153846154e-06, + "loss": 0.2474, + "step": 4650 + }, + { + "epoch": 1.9771621907379995, + "grad_norm": 7.496155261993408, + "learning_rate": 7.861025641025642e-06, + "loss": 0.2132, + "step": 4675 + }, + { + "epoch": 1.9877352505815185, + "grad_norm": 10.107941627502441, + "learning_rate": 7.848205128205129e-06, + "loss": 0.2357, + "step": 4700 + }, + { + "epoch": 1.998308310425037, + "grad_norm": 9.089138984680176, + "learning_rate": 7.835384615384616e-06, + "loss": 0.2426, + "step": 4725 + }, + { + "epoch": 2.0088813702685555, + "grad_norm": 6.171664714813232, + "learning_rate": 7.822564102564103e-06, + "loss": 0.1577, + "step": 4750 + }, + { + "epoch": 2.0194544301120745, + "grad_norm": 8.21010684967041, + "learning_rate": 7.80974358974359e-06, + "loss": 0.1737, + "step": 4775 + }, + { + "epoch": 2.030027489955593, + "grad_norm": 7.623335838317871, + "learning_rate": 7.796923076923078e-06, + "loss": 0.1755, + "step": 4800 + }, + { + "epoch": 2.040600549799112, + "grad_norm": 6.5446391105651855, + "learning_rate": 7.784102564102565e-06, + "loss": 0.173, + "step": 4825 + }, + { + "epoch": 2.0511736096426305, + "grad_norm": 7.576420307159424, + "learning_rate": 7.771282051282052e-06, + "loss": 0.1673, + "step": 4850 + }, + { + "epoch": 2.0617466694861495, + "grad_norm": 6.1533074378967285, + "learning_rate": 7.758461538461538e-06, + "loss": 0.1584, + "step": 4875 + }, + { + "epoch": 2.072319729329668, + "grad_norm": 8.039182662963867, + "learning_rate": 7.745641025641027e-06, + "loss": 0.1761, + "step": 4900 + }, + { + "epoch": 2.0828927891731865, + "grad_norm": 6.898794174194336, + "learning_rate": 7.732820512820514e-06, + "loss": 0.1626, + "step": 4925 + }, + { + "epoch": 2.0934658490167055, + "grad_norm": 8.714920997619629, + "learning_rate": 7.72e-06, + "loss": 0.1822, + "step": 4950 + }, + { + "epoch": 2.104038908860224, + "grad_norm": 7.137439727783203, + "learning_rate": 7.707179487179487e-06, + "loss": 0.1531, + "step": 4975 + }, + { + "epoch": 2.114611968703743, + "grad_norm": 7.202429294586182, + "learning_rate": 7.694358974358976e-06, + "loss": 0.169, + "step": 5000 + }, + { + "epoch": 2.114611968703743, + "eval_loss": 0.25147655606269836, + "eval_runtime": 441.5298, + "eval_samples_per_second": 8.246, + "eval_steps_per_second": 1.033, + "eval_wer": 0.21838600399090444, + "step": 5000 + }, + { + "epoch": 2.1251850285472615, + "grad_norm": 8.99411678314209, + "learning_rate": 7.681538461538463e-06, + "loss": 0.1642, + "step": 5025 + }, + { + "epoch": 2.13575808839078, + "grad_norm": 7.8761420249938965, + "learning_rate": 7.66871794871795e-06, + "loss": 0.1877, + "step": 5050 + }, + { + "epoch": 2.146331148234299, + "grad_norm": 8.506233215332031, + "learning_rate": 7.655897435897436e-06, + "loss": 0.1671, + "step": 5075 + }, + { + "epoch": 2.1569042080778176, + "grad_norm": 11.958497047424316, + "learning_rate": 7.643076923076925e-06, + "loss": 0.181, + "step": 5100 + }, + { + "epoch": 2.1674772679213365, + "grad_norm": 6.008707523345947, + "learning_rate": 7.630256410256412e-06, + "loss": 0.1655, + "step": 5125 + }, + { + "epoch": 2.178050327764855, + "grad_norm": 7.9332451820373535, + "learning_rate": 7.617435897435898e-06, + "loss": 0.1782, + "step": 5150 + }, + { + "epoch": 2.188623387608374, + "grad_norm": 9.106295585632324, + "learning_rate": 7.604615384615385e-06, + "loss": 0.1521, + "step": 5175 + }, + { + "epoch": 2.1991964474518926, + "grad_norm": 7.487992286682129, + "learning_rate": 7.591794871794872e-06, + "loss": 0.1735, + "step": 5200 + }, + { + "epoch": 2.209769507295411, + "grad_norm": 9.595857620239258, + "learning_rate": 7.578974358974359e-06, + "loss": 0.1642, + "step": 5225 + }, + { + "epoch": 2.22034256713893, + "grad_norm": 6.064367294311523, + "learning_rate": 7.566153846153847e-06, + "loss": 0.1515, + "step": 5250 + }, + { + "epoch": 2.2309156269824486, + "grad_norm": 6.732094764709473, + "learning_rate": 7.553333333333334e-06, + "loss": 0.1615, + "step": 5275 + }, + { + "epoch": 2.2414886868259676, + "grad_norm": 6.89235258102417, + "learning_rate": 7.540512820512821e-06, + "loss": 0.1519, + "step": 5300 + }, + { + "epoch": 2.252061746669486, + "grad_norm": 6.550455570220947, + "learning_rate": 7.527692307692308e-06, + "loss": 0.1606, + "step": 5325 + }, + { + "epoch": 2.262634806513005, + "grad_norm": 6.678434371948242, + "learning_rate": 7.514871794871795e-06, + "loss": 0.1653, + "step": 5350 + }, + { + "epoch": 2.2732078663565236, + "grad_norm": 8.418935775756836, + "learning_rate": 7.5020512820512826e-06, + "loss": 0.1574, + "step": 5375 + }, + { + "epoch": 2.283780926200042, + "grad_norm": 6.893301486968994, + "learning_rate": 7.489230769230769e-06, + "loss": 0.1647, + "step": 5400 + }, + { + "epoch": 2.294353986043561, + "grad_norm": 7.643205165863037, + "learning_rate": 7.476410256410257e-06, + "loss": 0.187, + "step": 5425 + }, + { + "epoch": 2.3049270458870796, + "grad_norm": 9.927549362182617, + "learning_rate": 7.463589743589744e-06, + "loss": 0.1677, + "step": 5450 + }, + { + "epoch": 2.3155001057305986, + "grad_norm": 10.559179306030273, + "learning_rate": 7.4507692307692316e-06, + "loss": 0.1644, + "step": 5475 + }, + { + "epoch": 2.326073165574117, + "grad_norm": 7.4924187660217285, + "learning_rate": 7.437948717948718e-06, + "loss": 0.1544, + "step": 5500 + }, + { + "epoch": 2.3366462254176357, + "grad_norm": 7.554123401641846, + "learning_rate": 7.425128205128206e-06, + "loss": 0.1891, + "step": 5525 + }, + { + "epoch": 2.3472192852611546, + "grad_norm": 7.673270225524902, + "learning_rate": 7.412307692307693e-06, + "loss": 0.1841, + "step": 5550 + }, + { + "epoch": 2.357792345104673, + "grad_norm": 9.479844093322754, + "learning_rate": 7.3994871794871806e-06, + "loss": 0.1706, + "step": 5575 + }, + { + "epoch": 2.368365404948192, + "grad_norm": 11.788003921508789, + "learning_rate": 7.386666666666667e-06, + "loss": 0.1573, + "step": 5600 + }, + { + "epoch": 2.3789384647917107, + "grad_norm": 10.670574188232422, + "learning_rate": 7.373846153846155e-06, + "loss": 0.1762, + "step": 5625 + }, + { + "epoch": 2.389511524635229, + "grad_norm": 5.672201633453369, + "learning_rate": 7.361025641025642e-06, + "loss": 0.1868, + "step": 5650 + }, + { + "epoch": 2.400084584478748, + "grad_norm": 6.6854729652404785, + "learning_rate": 7.3482051282051295e-06, + "loss": 0.1377, + "step": 5675 + }, + { + "epoch": 2.4106576443222667, + "grad_norm": 8.25365924835205, + "learning_rate": 7.335384615384616e-06, + "loss": 0.181, + "step": 5700 + }, + { + "epoch": 2.4212307041657857, + "grad_norm": 8.488265991210938, + "learning_rate": 7.322564102564104e-06, + "loss": 0.1528, + "step": 5725 + }, + { + "epoch": 2.431803764009304, + "grad_norm": 11.608367919921875, + "learning_rate": 7.309743589743591e-06, + "loss": 0.1564, + "step": 5750 + }, + { + "epoch": 2.442376823852823, + "grad_norm": 10.334943771362305, + "learning_rate": 7.296923076923077e-06, + "loss": 0.1676, + "step": 5775 + }, + { + "epoch": 2.4529498836963417, + "grad_norm": 9.751703262329102, + "learning_rate": 7.2841025641025645e-06, + "loss": 0.157, + "step": 5800 + }, + { + "epoch": 2.4635229435398607, + "grad_norm": 8.788719177246094, + "learning_rate": 7.271282051282051e-06, + "loss": 0.1664, + "step": 5825 + }, + { + "epoch": 2.474096003383379, + "grad_norm": 5.966717720031738, + "learning_rate": 7.258461538461539e-06, + "loss": 0.1569, + "step": 5850 + }, + { + "epoch": 2.4846690632268977, + "grad_norm": 9.089386940002441, + "learning_rate": 7.245641025641026e-06, + "loss": 0.1751, + "step": 5875 + }, + { + "epoch": 2.4952421230704167, + "grad_norm": 7.7753190994262695, + "learning_rate": 7.2328205128205135e-06, + "loss": 0.174, + "step": 5900 + }, + { + "epoch": 2.5058151829139352, + "grad_norm": 8.18852424621582, + "learning_rate": 7.22e-06, + "loss": 0.2026, + "step": 5925 + }, + { + "epoch": 2.516388242757454, + "grad_norm": 7.778114318847656, + "learning_rate": 7.207179487179487e-06, + "loss": 0.181, + "step": 5950 + }, + { + "epoch": 2.5269613026009727, + "grad_norm": 7.439593315124512, + "learning_rate": 7.194358974358975e-06, + "loss": 0.1679, + "step": 5975 + }, + { + "epoch": 2.5375343624444913, + "grad_norm": 12.546255111694336, + "learning_rate": 7.181538461538462e-06, + "loss": 0.1646, + "step": 6000 + }, + { + "epoch": 2.5375343624444913, + "eval_loss": 0.23773407936096191, + "eval_runtime": 439.7036, + "eval_samples_per_second": 8.281, + "eval_steps_per_second": 1.037, + "eval_wer": 0.20822311940229246, + "step": 6000 + }, + { + "epoch": 2.5481074222880102, + "grad_norm": 5.546166896820068, + "learning_rate": 7.168717948717949e-06, + "loss": 0.1642, + "step": 6025 + }, + { + "epoch": 2.5586804821315288, + "grad_norm": 6.507399082183838, + "learning_rate": 7.155897435897436e-06, + "loss": 0.1715, + "step": 6050 + }, + { + "epoch": 2.5692535419750477, + "grad_norm": 8.499435424804688, + "learning_rate": 7.143076923076924e-06, + "loss": 0.1724, + "step": 6075 + }, + { + "epoch": 2.5798266018185663, + "grad_norm": 6.351449966430664, + "learning_rate": 7.130256410256411e-06, + "loss": 0.1514, + "step": 6100 + }, + { + "epoch": 2.590399661662085, + "grad_norm": 5.271184921264648, + "learning_rate": 7.117435897435898e-06, + "loss": 0.1625, + "step": 6125 + }, + { + "epoch": 2.6009727215056038, + "grad_norm": 6.257565021514893, + "learning_rate": 7.104615384615385e-06, + "loss": 0.1386, + "step": 6150 + }, + { + "epoch": 2.6115457813491223, + "grad_norm": 8.022757530212402, + "learning_rate": 7.0923076923076926e-06, + "loss": 0.1545, + "step": 6175 + }, + { + "epoch": 2.6221188411926413, + "grad_norm": 9.464363098144531, + "learning_rate": 7.07948717948718e-06, + "loss": 0.1729, + "step": 6200 + }, + { + "epoch": 2.63269190103616, + "grad_norm": 9.03287124633789, + "learning_rate": 7.066666666666667e-06, + "loss": 0.1885, + "step": 6225 + }, + { + "epoch": 2.6432649608796783, + "grad_norm": 10.790355682373047, + "learning_rate": 7.053846153846155e-06, + "loss": 0.1629, + "step": 6250 + }, + { + "epoch": 2.6538380207231973, + "grad_norm": 6.963956832885742, + "learning_rate": 7.0410256410256415e-06, + "loss": 0.1771, + "step": 6275 + }, + { + "epoch": 2.6644110805667163, + "grad_norm": 8.90701675415039, + "learning_rate": 7.028205128205129e-06, + "loss": 0.1514, + "step": 6300 + }, + { + "epoch": 2.674984140410235, + "grad_norm": 6.524221420288086, + "learning_rate": 7.015384615384616e-06, + "loss": 0.1486, + "step": 6325 + }, + { + "epoch": 2.6855572002537533, + "grad_norm": 6.47484827041626, + "learning_rate": 7.002564102564104e-06, + "loss": 0.1344, + "step": 6350 + }, + { + "epoch": 2.6961302600972723, + "grad_norm": 7.201345443725586, + "learning_rate": 6.9897435897435905e-06, + "loss": 0.1458, + "step": 6375 + }, + { + "epoch": 2.706703319940791, + "grad_norm": 6.066169261932373, + "learning_rate": 6.976923076923078e-06, + "loss": 0.1546, + "step": 6400 + }, + { + "epoch": 2.71727637978431, + "grad_norm": 7.907600402832031, + "learning_rate": 6.964102564102565e-06, + "loss": 0.1713, + "step": 6425 + }, + { + "epoch": 2.7278494396278283, + "grad_norm": 7.963566303253174, + "learning_rate": 6.951282051282052e-06, + "loss": 0.1704, + "step": 6450 + }, + { + "epoch": 2.738422499471347, + "grad_norm": 7.709939956665039, + "learning_rate": 6.9384615384615395e-06, + "loss": 0.1685, + "step": 6475 + }, + { + "epoch": 2.748995559314866, + "grad_norm": 6.702678680419922, + "learning_rate": 6.925641025641026e-06, + "loss": 0.1777, + "step": 6500 + }, + { + "epoch": 2.7595686191583844, + "grad_norm": 5.540548324584961, + "learning_rate": 6.912820512820514e-06, + "loss": 0.1513, + "step": 6525 + }, + { + "epoch": 2.7701416790019033, + "grad_norm": 5.520662307739258, + "learning_rate": 6.9e-06, + "loss": 0.1692, + "step": 6550 + }, + { + "epoch": 2.780714738845422, + "grad_norm": 9.791701316833496, + "learning_rate": 6.887179487179488e-06, + "loss": 0.1518, + "step": 6575 + }, + { + "epoch": 2.7912877986889404, + "grad_norm": 6.41890811920166, + "learning_rate": 6.8743589743589745e-06, + "loss": 0.1525, + "step": 6600 + }, + { + "epoch": 2.8018608585324594, + "grad_norm": 6.0221943855285645, + "learning_rate": 6.861538461538461e-06, + "loss": 0.1637, + "step": 6625 + }, + { + "epoch": 2.812433918375978, + "grad_norm": 8.487308502197266, + "learning_rate": 6.848717948717949e-06, + "loss": 0.1431, + "step": 6650 + }, + { + "epoch": 2.823006978219497, + "grad_norm": 15.519043922424316, + "learning_rate": 6.835897435897436e-06, + "loss": 0.1604, + "step": 6675 + }, + { + "epoch": 2.8335800380630154, + "grad_norm": 6.480257987976074, + "learning_rate": 6.8230769230769235e-06, + "loss": 0.1465, + "step": 6700 + }, + { + "epoch": 2.844153097906534, + "grad_norm": 6.450438976287842, + "learning_rate": 6.81025641025641e-06, + "loss": 0.1476, + "step": 6725 + }, + { + "epoch": 2.854726157750053, + "grad_norm": 5.721517562866211, + "learning_rate": 6.797435897435898e-06, + "loss": 0.1825, + "step": 6750 + }, + { + "epoch": 2.8652992175935714, + "grad_norm": 9.688868522644043, + "learning_rate": 6.784615384615385e-06, + "loss": 0.1669, + "step": 6775 + }, + { + "epoch": 2.8758722774370904, + "grad_norm": 8.336874961853027, + "learning_rate": 6.7717948717948725e-06, + "loss": 0.1434, + "step": 6800 + }, + { + "epoch": 2.886445337280609, + "grad_norm": 8.098302841186523, + "learning_rate": 6.758974358974359e-06, + "loss": 0.1506, + "step": 6825 + }, + { + "epoch": 2.8970183971241275, + "grad_norm": 5.927369117736816, + "learning_rate": 6.746153846153847e-06, + "loss": 0.1607, + "step": 6850 + }, + { + "epoch": 2.9075914569676464, + "grad_norm": 20.486467361450195, + "learning_rate": 6.733333333333334e-06, + "loss": 0.1616, + "step": 6875 + }, + { + "epoch": 2.9181645168111654, + "grad_norm": 9.060097694396973, + "learning_rate": 6.7205128205128215e-06, + "loss": 0.1671, + "step": 6900 + }, + { + "epoch": 2.928737576654684, + "grad_norm": 6.587460517883301, + "learning_rate": 6.707692307692308e-06, + "loss": 0.1645, + "step": 6925 + }, + { + "epoch": 2.9393106364982025, + "grad_norm": 14.763110160827637, + "learning_rate": 6.694871794871796e-06, + "loss": 0.1485, + "step": 6950 + }, + { + "epoch": 2.9498836963417214, + "grad_norm": 7.697717666625977, + "learning_rate": 6.682051282051283e-06, + "loss": 0.1664, + "step": 6975 + }, + { + "epoch": 2.96045675618524, + "grad_norm": 8.12340259552002, + "learning_rate": 6.6692307692307705e-06, + "loss": 0.1731, + "step": 7000 + }, + { + "epoch": 2.96045675618524, + "eval_loss": 0.2189464271068573, + "eval_runtime": 440.062, + "eval_samples_per_second": 8.274, + "eval_steps_per_second": 1.036, + "eval_wer": 0.19109935495846675, + "step": 7000 + }, + { + "epoch": 2.971029816028759, + "grad_norm": 6.9427809715271, + "learning_rate": 6.656410256410257e-06, + "loss": 0.1594, + "step": 7025 + }, + { + "epoch": 2.9816028758722775, + "grad_norm": 7.702503204345703, + "learning_rate": 6.643589743589744e-06, + "loss": 0.1508, + "step": 7050 + }, + { + "epoch": 2.992175935715796, + "grad_norm": 8.890095710754395, + "learning_rate": 6.630769230769232e-06, + "loss": 0.1757, + "step": 7075 + }, + { + "epoch": 3.002748995559315, + "grad_norm": 5.879498481750488, + "learning_rate": 6.617948717948719e-06, + "loss": 0.1206, + "step": 7100 + }, + { + "epoch": 3.0133220554028335, + "grad_norm": 4.067333221435547, + "learning_rate": 6.605128205128206e-06, + "loss": 0.1087, + "step": 7125 + }, + { + "epoch": 3.0238951152463525, + "grad_norm": 4.698537826538086, + "learning_rate": 6.592307692307692e-06, + "loss": 0.1071, + "step": 7150 + }, + { + "epoch": 3.034468175089871, + "grad_norm": 7.69912576675415, + "learning_rate": 6.57948717948718e-06, + "loss": 0.1102, + "step": 7175 + }, + { + "epoch": 3.0450412349333895, + "grad_norm": 6.612318992614746, + "learning_rate": 6.566666666666667e-06, + "loss": 0.1069, + "step": 7200 + }, + { + "epoch": 3.0556142947769085, + "grad_norm": 6.134123802185059, + "learning_rate": 6.553846153846154e-06, + "loss": 0.1154, + "step": 7225 + }, + { + "epoch": 3.066187354620427, + "grad_norm": 6.283836841583252, + "learning_rate": 6.541025641025641e-06, + "loss": 0.1334, + "step": 7250 + }, + { + "epoch": 3.076760414463946, + "grad_norm": 5.295098781585693, + "learning_rate": 6.528205128205128e-06, + "loss": 0.1064, + "step": 7275 + }, + { + "epoch": 3.0873334743074645, + "grad_norm": 4.781207084655762, + "learning_rate": 6.515384615384616e-06, + "loss": 0.0878, + "step": 7300 + }, + { + "epoch": 3.0979065341509835, + "grad_norm": 8.744128227233887, + "learning_rate": 6.5025641025641026e-06, + "loss": 0.117, + "step": 7325 + }, + { + "epoch": 3.108479593994502, + "grad_norm": 6.0972900390625, + "learning_rate": 6.48974358974359e-06, + "loss": 0.1024, + "step": 7350 + }, + { + "epoch": 3.1190526538380206, + "grad_norm": 5.786510944366455, + "learning_rate": 6.476923076923077e-06, + "loss": 0.1036, + "step": 7375 + }, + { + "epoch": 3.1296257136815395, + "grad_norm": 5.329776763916016, + "learning_rate": 6.464102564102565e-06, + "loss": 0.1122, + "step": 7400 + }, + { + "epoch": 3.140198773525058, + "grad_norm": 6.620870590209961, + "learning_rate": 6.4512820512820516e-06, + "loss": 0.094, + "step": 7425 + }, + { + "epoch": 3.150771833368577, + "grad_norm": 5.6966962814331055, + "learning_rate": 6.438461538461539e-06, + "loss": 0.1034, + "step": 7450 + }, + { + "epoch": 3.1613448932120956, + "grad_norm": 5.702958583831787, + "learning_rate": 6.425641025641026e-06, + "loss": 0.1002, + "step": 7475 + }, + { + "epoch": 3.1719179530556145, + "grad_norm": 6.589195251464844, + "learning_rate": 6.412820512820514e-06, + "loss": 0.0962, + "step": 7500 + }, + { + "epoch": 3.182491012899133, + "grad_norm": 6.6714558601379395, + "learning_rate": 6.4000000000000006e-06, + "loss": 0.1006, + "step": 7525 + }, + { + "epoch": 3.1930640727426516, + "grad_norm": 6.5564165115356445, + "learning_rate": 6.387179487179488e-06, + "loss": 0.1045, + "step": 7550 + }, + { + "epoch": 3.2036371325861706, + "grad_norm": 5.653778076171875, + "learning_rate": 6.374358974358975e-06, + "loss": 0.1251, + "step": 7575 + }, + { + "epoch": 3.214210192429689, + "grad_norm": 5.369845867156982, + "learning_rate": 6.361538461538463e-06, + "loss": 0.1021, + "step": 7600 + }, + { + "epoch": 3.224783252273208, + "grad_norm": 10.076266288757324, + "learning_rate": 6.3487179487179495e-06, + "loss": 0.0952, + "step": 7625 + }, + { + "epoch": 3.2353563121167266, + "grad_norm": 7.338042259216309, + "learning_rate": 6.335897435897436e-06, + "loss": 0.0995, + "step": 7650 + }, + { + "epoch": 3.245929371960245, + "grad_norm": 7.094664573669434, + "learning_rate": 6.323076923076924e-06, + "loss": 0.1049, + "step": 7675 + }, + { + "epoch": 3.256502431803764, + "grad_norm": 3.372304916381836, + "learning_rate": 6.310256410256411e-06, + "loss": 0.1015, + "step": 7700 + }, + { + "epoch": 3.2670754916472826, + "grad_norm": 6.649098873138428, + "learning_rate": 6.2974358974358985e-06, + "loss": 0.0854, + "step": 7725 + }, + { + "epoch": 3.2776485514908016, + "grad_norm": 6.159810543060303, + "learning_rate": 6.284615384615385e-06, + "loss": 0.103, + "step": 7750 + }, + { + "epoch": 3.28822161133432, + "grad_norm": 7.026951313018799, + "learning_rate": 6.271794871794872e-06, + "loss": 0.0999, + "step": 7775 + }, + { + "epoch": 3.2987946711778386, + "grad_norm": 6.820842266082764, + "learning_rate": 6.258974358974359e-06, + "loss": 0.1172, + "step": 7800 + }, + { + "epoch": 3.3093677310213576, + "grad_norm": 4.771303176879883, + "learning_rate": 6.246153846153846e-06, + "loss": 0.1003, + "step": 7825 + }, + { + "epoch": 3.319940790864876, + "grad_norm": 8.24927043914795, + "learning_rate": 6.2333333333333335e-06, + "loss": 0.0958, + "step": 7850 + }, + { + "epoch": 3.330513850708395, + "grad_norm": 5.464736461639404, + "learning_rate": 6.22051282051282e-06, + "loss": 0.1236, + "step": 7875 + }, + { + "epoch": 3.3410869105519136, + "grad_norm": 9.046813011169434, + "learning_rate": 6.207692307692308e-06, + "loss": 0.1195, + "step": 7900 + }, + { + "epoch": 3.3516599703954326, + "grad_norm": 9.43157958984375, + "learning_rate": 6.194871794871795e-06, + "loss": 0.1182, + "step": 7925 + }, + { + "epoch": 3.362233030238951, + "grad_norm": 5.381494045257568, + "learning_rate": 6.1820512820512825e-06, + "loss": 0.1256, + "step": 7950 + }, + { + "epoch": 3.3728060900824697, + "grad_norm": 5.340794086456299, + "learning_rate": 6.169230769230769e-06, + "loss": 0.1161, + "step": 7975 + }, + { + "epoch": 3.3833791499259886, + "grad_norm": 5.442756652832031, + "learning_rate": 6.156410256410257e-06, + "loss": 0.1017, + "step": 8000 + }, + { + "epoch": 3.3833791499259886, + "eval_loss": 0.21353289484977722, + "eval_runtime": 440.3677, + "eval_samples_per_second": 8.268, + "eval_steps_per_second": 1.035, + "eval_wer": 0.1970393057682491, + "step": 8000 + }, + { + "epoch": 3.393952209769507, + "grad_norm": 7.739430904388428, + "learning_rate": 6.143589743589744e-06, + "loss": 0.0922, + "step": 8025 + }, + { + "epoch": 3.404525269613026, + "grad_norm": 6.592048168182373, + "learning_rate": 6.1307692307692315e-06, + "loss": 0.116, + "step": 8050 + }, + { + "epoch": 3.4150983294565447, + "grad_norm": 6.631498336791992, + "learning_rate": 6.117948717948718e-06, + "loss": 0.0926, + "step": 8075 + }, + { + "epoch": 3.4256713893000637, + "grad_norm": 4.4608049392700195, + "learning_rate": 6.105128205128206e-06, + "loss": 0.0946, + "step": 8100 + }, + { + "epoch": 3.436244449143582, + "grad_norm": 6.894134044647217, + "learning_rate": 6.092307692307693e-06, + "loss": 0.1056, + "step": 8125 + }, + { + "epoch": 3.4468175089871007, + "grad_norm": 7.685297966003418, + "learning_rate": 6.0794871794871805e-06, + "loss": 0.1031, + "step": 8150 + }, + { + "epoch": 3.4573905688306197, + "grad_norm": 5.096285820007324, + "learning_rate": 6.066666666666667e-06, + "loss": 0.1157, + "step": 8175 + }, + { + "epoch": 3.467963628674138, + "grad_norm": 4.975133419036865, + "learning_rate": 6.053846153846155e-06, + "loss": 0.1033, + "step": 8200 + }, + { + "epoch": 3.478536688517657, + "grad_norm": 4.345186233520508, + "learning_rate": 6.041025641025642e-06, + "loss": 0.0987, + "step": 8225 + }, + { + "epoch": 3.4891097483611757, + "grad_norm": 5.9824299812316895, + "learning_rate": 6.028205128205129e-06, + "loss": 0.0985, + "step": 8250 + }, + { + "epoch": 3.4996828082046942, + "grad_norm": 7.680878162384033, + "learning_rate": 6.015384615384616e-06, + "loss": 0.1005, + "step": 8275 + }, + { + "epoch": 3.510255868048213, + "grad_norm": 6.961033344268799, + "learning_rate": 6.002564102564103e-06, + "loss": 0.1046, + "step": 8300 + }, + { + "epoch": 3.5208289278917317, + "grad_norm": 6.000370025634766, + "learning_rate": 5.989743589743591e-06, + "loss": 0.1193, + "step": 8325 + }, + { + "epoch": 3.5314019877352507, + "grad_norm": 5.969180583953857, + "learning_rate": 5.976923076923078e-06, + "loss": 0.0992, + "step": 8350 + }, + { + "epoch": 3.5419750475787692, + "grad_norm": 7.239658355712891, + "learning_rate": 5.9641025641025644e-06, + "loss": 0.1076, + "step": 8375 + }, + { + "epoch": 3.5525481074222878, + "grad_norm": 6.9521708488464355, + "learning_rate": 5.951282051282051e-06, + "loss": 0.088, + "step": 8400 + }, + { + "epoch": 3.5631211672658067, + "grad_norm": 5.2356109619140625, + "learning_rate": 5.938461538461538e-06, + "loss": 0.111, + "step": 8425 + }, + { + "epoch": 3.5736942271093257, + "grad_norm": 7.44065523147583, + "learning_rate": 5.925641025641026e-06, + "loss": 0.1006, + "step": 8450 + }, + { + "epoch": 3.5842672869528442, + "grad_norm": 6.604626178741455, + "learning_rate": 5.912820512820513e-06, + "loss": 0.1187, + "step": 8475 + }, + { + "epoch": 3.5948403467963628, + "grad_norm": 5.469221591949463, + "learning_rate": 5.9e-06, + "loss": 0.1256, + "step": 8500 + }, + { + "epoch": 3.6054134066398817, + "grad_norm": 3.496335983276367, + "learning_rate": 5.887179487179487e-06, + "loss": 0.0875, + "step": 8525 + }, + { + "epoch": 3.6159864664834003, + "grad_norm": 4.853099346160889, + "learning_rate": 5.874358974358975e-06, + "loss": 0.1401, + "step": 8550 + }, + { + "epoch": 3.6265595263269192, + "grad_norm": 6.409478187561035, + "learning_rate": 5.861538461538462e-06, + "loss": 0.1222, + "step": 8575 + }, + { + "epoch": 3.6371325861704378, + "grad_norm": 4.797995567321777, + "learning_rate": 5.848717948717949e-06, + "loss": 0.113, + "step": 8600 + }, + { + "epoch": 3.6477056460139563, + "grad_norm": 6.740935802459717, + "learning_rate": 5.835897435897436e-06, + "loss": 0.1081, + "step": 8625 + }, + { + "epoch": 3.6582787058574753, + "grad_norm": 7.878625392913818, + "learning_rate": 5.823076923076924e-06, + "loss": 0.0906, + "step": 8650 + }, + { + "epoch": 3.668851765700994, + "grad_norm": 8.157573699951172, + "learning_rate": 5.8102564102564106e-06, + "loss": 0.1016, + "step": 8675 + }, + { + "epoch": 3.679424825544513, + "grad_norm": 5.366656303405762, + "learning_rate": 5.797435897435898e-06, + "loss": 0.1038, + "step": 8700 + }, + { + "epoch": 3.6899978853880313, + "grad_norm": 7.368963241577148, + "learning_rate": 5.784615384615385e-06, + "loss": 0.1131, + "step": 8725 + }, + { + "epoch": 3.70057094523155, + "grad_norm": 3.6952099800109863, + "learning_rate": 5.771794871794873e-06, + "loss": 0.0917, + "step": 8750 + }, + { + "epoch": 3.711144005075069, + "grad_norm": 6.393226623535156, + "learning_rate": 5.7589743589743596e-06, + "loss": 0.117, + "step": 8775 + }, + { + "epoch": 3.7217170649185873, + "grad_norm": 6.510304927825928, + "learning_rate": 5.746153846153847e-06, + "loss": 0.1187, + "step": 8800 + }, + { + "epoch": 3.7322901247621063, + "grad_norm": 7.048624038696289, + "learning_rate": 5.733333333333334e-06, + "loss": 0.0916, + "step": 8825 + }, + { + "epoch": 3.742863184605625, + "grad_norm": 4.117500305175781, + "learning_rate": 5.720512820512821e-06, + "loss": 0.1035, + "step": 8850 + }, + { + "epoch": 3.7534362444491434, + "grad_norm": 6.197905540466309, + "learning_rate": 5.7076923076923086e-06, + "loss": 0.1128, + "step": 8875 + }, + { + "epoch": 3.7640093042926623, + "grad_norm": 7.742534637451172, + "learning_rate": 5.694871794871795e-06, + "loss": 0.1026, + "step": 8900 + }, + { + "epoch": 3.774582364136181, + "grad_norm": 6.087040424346924, + "learning_rate": 5.682051282051283e-06, + "loss": 0.1175, + "step": 8925 + }, + { + "epoch": 3.7851554239797, + "grad_norm": 8.049386024475098, + "learning_rate": 5.66923076923077e-06, + "loss": 0.1121, + "step": 8950 + }, + { + "epoch": 3.7957284838232184, + "grad_norm": 3.5438435077667236, + "learning_rate": 5.6564102564102575e-06, + "loss": 0.1293, + "step": 8975 + }, + { + "epoch": 3.806301543666737, + "grad_norm": 8.00123119354248, + "learning_rate": 5.6435897435897435e-06, + "loss": 0.0985, + "step": 9000 + }, + { + "epoch": 3.806301543666737, + "eval_loss": 0.2076605260372162, + "eval_runtime": 438.019, + "eval_samples_per_second": 8.312, + "eval_steps_per_second": 1.041, + "eval_wer": 0.18186458768388325, + "step": 9000 + }, + { + "epoch": 3.816874603510256, + "grad_norm": 4.4232869148254395, + "learning_rate": 5.63076923076923e-06, + "loss": 0.1045, + "step": 9025 + }, + { + "epoch": 3.827447663353775, + "grad_norm": 6.264804363250732, + "learning_rate": 5.617948717948718e-06, + "loss": 0.0949, + "step": 9050 + }, + { + "epoch": 3.8380207231972934, + "grad_norm": 7.897494792938232, + "learning_rate": 5.605128205128205e-06, + "loss": 0.1008, + "step": 9075 + }, + { + "epoch": 3.848593783040812, + "grad_norm": 5.078146457672119, + "learning_rate": 5.5923076923076925e-06, + "loss": 0.0932, + "step": 9100 + }, + { + "epoch": 3.859166842884331, + "grad_norm": 5.596771240234375, + "learning_rate": 5.579487179487179e-06, + "loss": 0.0994, + "step": 9125 + }, + { + "epoch": 3.8697399027278494, + "grad_norm": 5.29062032699585, + "learning_rate": 5.566666666666667e-06, + "loss": 0.1101, + "step": 9150 + }, + { + "epoch": 3.8803129625713684, + "grad_norm": 6.083955764770508, + "learning_rate": 5.553846153846154e-06, + "loss": 0.1173, + "step": 9175 + }, + { + "epoch": 3.890886022414887, + "grad_norm": 5.622173309326172, + "learning_rate": 5.5410256410256415e-06, + "loss": 0.099, + "step": 9200 + }, + { + "epoch": 3.9014590822584054, + "grad_norm": 6.262368202209473, + "learning_rate": 5.528205128205128e-06, + "loss": 0.1033, + "step": 9225 + }, + { + "epoch": 3.9120321421019244, + "grad_norm": 9.63398265838623, + "learning_rate": 5.515384615384616e-06, + "loss": 0.1171, + "step": 9250 + }, + { + "epoch": 3.922605201945443, + "grad_norm": 8.468087196350098, + "learning_rate": 5.502564102564103e-06, + "loss": 0.1062, + "step": 9275 + }, + { + "epoch": 3.933178261788962, + "grad_norm": 5.411839485168457, + "learning_rate": 5.4897435897435905e-06, + "loss": 0.0946, + "step": 9300 + }, + { + "epoch": 3.9437513216324804, + "grad_norm": 4.889887809753418, + "learning_rate": 5.476923076923077e-06, + "loss": 0.1097, + "step": 9325 + }, + { + "epoch": 3.954324381475999, + "grad_norm": 6.154474258422852, + "learning_rate": 5.464102564102565e-06, + "loss": 0.0974, + "step": 9350 + }, + { + "epoch": 3.964897441319518, + "grad_norm": 7.760702133178711, + "learning_rate": 5.451282051282052e-06, + "loss": 0.1074, + "step": 9375 + }, + { + "epoch": 3.9754705011630365, + "grad_norm": 4.108585834503174, + "learning_rate": 5.4384615384615395e-06, + "loss": 0.0997, + "step": 9400 + }, + { + "epoch": 3.9860435610065554, + "grad_norm": 6.994529724121094, + "learning_rate": 5.425641025641026e-06, + "loss": 0.1078, + "step": 9425 + }, + { + "epoch": 3.996616620850074, + "grad_norm": 2.9072048664093018, + "learning_rate": 5.412820512820514e-06, + "loss": 0.1128, + "step": 9450 + }, + { + "epoch": 4.0071896806935925, + "grad_norm": 5.968387603759766, + "learning_rate": 5.400000000000001e-06, + "loss": 0.0853, + "step": 9475 + }, + { + "epoch": 4.017762740537111, + "grad_norm": 2.4111251831054688, + "learning_rate": 5.387179487179488e-06, + "loss": 0.0553, + "step": 9500 + }, + { + "epoch": 4.02833580038063, + "grad_norm": 3.6558897495269775, + "learning_rate": 5.374358974358975e-06, + "loss": 0.0832, + "step": 9525 + }, + { + "epoch": 4.038908860224149, + "grad_norm": 5.5717973709106445, + "learning_rate": 5.361538461538462e-06, + "loss": 0.0784, + "step": 9550 + }, + { + "epoch": 4.0494819200676675, + "grad_norm": 5.605922698974609, + "learning_rate": 5.34871794871795e-06, + "loss": 0.0743, + "step": 9575 + }, + { + "epoch": 4.060054979911186, + "grad_norm": 5.932694911956787, + "learning_rate": 5.335897435897436e-06, + "loss": 0.0634, + "step": 9600 + }, + { + "epoch": 4.070628039754705, + "grad_norm": 4.883663177490234, + "learning_rate": 5.323076923076923e-06, + "loss": 0.0627, + "step": 9625 + }, + { + "epoch": 4.081201099598224, + "grad_norm": 4.509759902954102, + "learning_rate": 5.31025641025641e-06, + "loss": 0.0716, + "step": 9650 + }, + { + "epoch": 4.0917741594417425, + "grad_norm": 8.665048599243164, + "learning_rate": 5.297435897435897e-06, + "loss": 0.0678, + "step": 9675 + }, + { + "epoch": 4.102347219285261, + "grad_norm": 4.062932968139648, + "learning_rate": 5.284615384615385e-06, + "loss": 0.0573, + "step": 9700 + }, + { + "epoch": 4.1129202791287796, + "grad_norm": 5.353725433349609, + "learning_rate": 5.271794871794872e-06, + "loss": 0.0823, + "step": 9725 + }, + { + "epoch": 4.123493338972299, + "grad_norm": 3.5691452026367188, + "learning_rate": 5.258974358974359e-06, + "loss": 0.0722, + "step": 9750 + }, + { + "epoch": 4.1340663988158175, + "grad_norm": 4.866578578948975, + "learning_rate": 5.246153846153846e-06, + "loss": 0.0763, + "step": 9775 + }, + { + "epoch": 4.144639458659336, + "grad_norm": 3.342728614807129, + "learning_rate": 5.233333333333334e-06, + "loss": 0.0621, + "step": 9800 + }, + { + "epoch": 4.155212518502855, + "grad_norm": 3.556044816970825, + "learning_rate": 5.220512820512821e-06, + "loss": 0.0725, + "step": 9825 + }, + { + "epoch": 4.165785578346373, + "grad_norm": 5.201412677764893, + "learning_rate": 5.207692307692308e-06, + "loss": 0.0671, + "step": 9850 + }, + { + "epoch": 4.1763586381898925, + "grad_norm": 5.240591526031494, + "learning_rate": 5.194871794871795e-06, + "loss": 0.0728, + "step": 9875 + }, + { + "epoch": 4.186931698033411, + "grad_norm": 3.6806883811950684, + "learning_rate": 5.182051282051283e-06, + "loss": 0.0653, + "step": 9900 + }, + { + "epoch": 4.19750475787693, + "grad_norm": 3.5019476413726807, + "learning_rate": 5.16923076923077e-06, + "loss": 0.0566, + "step": 9925 + }, + { + "epoch": 4.208077817720448, + "grad_norm": 4.452245712280273, + "learning_rate": 5.156410256410257e-06, + "loss": 0.0582, + "step": 9950 + }, + { + "epoch": 4.218650877563967, + "grad_norm": 5.536275863647461, + "learning_rate": 5.143589743589744e-06, + "loss": 0.0737, + "step": 9975 + }, + { + "epoch": 4.229223937407486, + "grad_norm": 5.515459060668945, + "learning_rate": 5.130769230769232e-06, + "loss": 0.0828, + "step": 10000 + }, + { + "epoch": 4.229223937407486, + "eval_loss": 0.20704784989356995, + "eval_runtime": 438.0885, + "eval_samples_per_second": 8.311, + "eval_steps_per_second": 1.041, + "eval_wer": 0.17917304747320062, + "step": 10000 + }, + { + "epoch": 4.239796997251005, + "grad_norm": 3.628927230834961, + "learning_rate": 5.1179487179487186e-06, + "loss": 0.0657, + "step": 10025 + }, + { + "epoch": 4.250370057094523, + "grad_norm": 4.095386028289795, + "learning_rate": 5.105128205128206e-06, + "loss": 0.0644, + "step": 10050 + }, + { + "epoch": 4.260943116938042, + "grad_norm": 4.334606170654297, + "learning_rate": 5.092307692307693e-06, + "loss": 0.0707, + "step": 10075 + }, + { + "epoch": 4.27151617678156, + "grad_norm": 4.358844757080078, + "learning_rate": 5.07948717948718e-06, + "loss": 0.0712, + "step": 10100 + }, + { + "epoch": 4.28208923662508, + "grad_norm": 3.893803358078003, + "learning_rate": 5.0666666666666676e-06, + "loss": 0.0742, + "step": 10125 + }, + { + "epoch": 4.292662296468598, + "grad_norm": 4.782370090484619, + "learning_rate": 5.053846153846154e-06, + "loss": 0.0654, + "step": 10150 + }, + { + "epoch": 4.303235356312117, + "grad_norm": 4.6114821434021, + "learning_rate": 5.041025641025642e-06, + "loss": 0.0678, + "step": 10175 + }, + { + "epoch": 4.313808416155635, + "grad_norm": 2.206007480621338, + "learning_rate": 5.0287179487179495e-06, + "loss": 0.0778, + "step": 10200 + }, + { + "epoch": 4.324381475999155, + "grad_norm": 3.889173984527588, + "learning_rate": 5.015897435897436e-06, + "loss": 0.0754, + "step": 10225 + }, + { + "epoch": 4.334954535842673, + "grad_norm": 3.887746810913086, + "learning_rate": 5.003076923076924e-06, + "loss": 0.0777, + "step": 10250 + }, + { + "epoch": 4.345527595686192, + "grad_norm": 4.674836158752441, + "learning_rate": 4.990256410256411e-06, + "loss": 0.0746, + "step": 10275 + }, + { + "epoch": 4.35610065552971, + "grad_norm": 4.411125183105469, + "learning_rate": 4.977435897435898e-06, + "loss": 0.0703, + "step": 10300 + }, + { + "epoch": 4.366673715373229, + "grad_norm": 9.945749282836914, + "learning_rate": 4.964615384615385e-06, + "loss": 0.0666, + "step": 10325 + }, + { + "epoch": 4.377246775216748, + "grad_norm": 5.479196071624756, + "learning_rate": 4.951794871794872e-06, + "loss": 0.0672, + "step": 10350 + }, + { + "epoch": 4.387819835060267, + "grad_norm": 5.061864376068115, + "learning_rate": 4.93897435897436e-06, + "loss": 0.0649, + "step": 10375 + }, + { + "epoch": 4.398392894903785, + "grad_norm": 5.103855133056641, + "learning_rate": 4.926153846153847e-06, + "loss": 0.0761, + "step": 10400 + }, + { + "epoch": 4.408965954747304, + "grad_norm": 3.8901400566101074, + "learning_rate": 4.9133333333333334e-06, + "loss": 0.0696, + "step": 10425 + }, + { + "epoch": 4.419539014590822, + "grad_norm": 8.482890129089355, + "learning_rate": 4.900512820512821e-06, + "loss": 0.0725, + "step": 10450 + }, + { + "epoch": 4.430112074434342, + "grad_norm": 3.7482855319976807, + "learning_rate": 4.887692307692308e-06, + "loss": 0.0586, + "step": 10475 + }, + { + "epoch": 4.44068513427786, + "grad_norm": 19.449848175048828, + "learning_rate": 4.874871794871796e-06, + "loss": 0.067, + "step": 10500 + }, + { + "epoch": 4.451258194121379, + "grad_norm": 3.536351203918457, + "learning_rate": 4.8620512820512824e-06, + "loss": 0.0659, + "step": 10525 + }, + { + "epoch": 4.461831253964897, + "grad_norm": 3.9151482582092285, + "learning_rate": 4.849230769230769e-06, + "loss": 0.0623, + "step": 10550 + }, + { + "epoch": 4.472404313808416, + "grad_norm": 3.5885860919952393, + "learning_rate": 4.836410256410257e-06, + "loss": 0.0683, + "step": 10575 + }, + { + "epoch": 4.482977373651935, + "grad_norm": 4.230422019958496, + "learning_rate": 4.823589743589744e-06, + "loss": 0.0776, + "step": 10600 + }, + { + "epoch": 4.493550433495454, + "grad_norm": 3.4841506481170654, + "learning_rate": 4.8107692307692314e-06, + "loss": 0.0719, + "step": 10625 + }, + { + "epoch": 4.504123493338972, + "grad_norm": 5.864824295043945, + "learning_rate": 4.797948717948718e-06, + "loss": 0.068, + "step": 10650 + }, + { + "epoch": 4.514696553182491, + "grad_norm": 5.13723087310791, + "learning_rate": 4.785128205128206e-06, + "loss": 0.0796, + "step": 10675 + }, + { + "epoch": 4.52526961302601, + "grad_norm": 4.680889129638672, + "learning_rate": 4.772307692307693e-06, + "loss": 0.0835, + "step": 10700 + }, + { + "epoch": 4.535842672869529, + "grad_norm": 4.532434463500977, + "learning_rate": 4.7594871794871796e-06, + "loss": 0.0737, + "step": 10725 + }, + { + "epoch": 4.546415732713047, + "grad_norm": 6.627336502075195, + "learning_rate": 4.746666666666667e-06, + "loss": 0.0905, + "step": 10750 + }, + { + "epoch": 4.556988792556566, + "grad_norm": 3.9481518268585205, + "learning_rate": 4.733846153846154e-06, + "loss": 0.0666, + "step": 10775 + }, + { + "epoch": 4.567561852400084, + "grad_norm": 4.709788799285889, + "learning_rate": 4.721025641025642e-06, + "loss": 0.074, + "step": 10800 + }, + { + "epoch": 4.578134912243604, + "grad_norm": 4.298018932342529, + "learning_rate": 4.7082051282051286e-06, + "loss": 0.0721, + "step": 10825 + }, + { + "epoch": 4.588707972087122, + "grad_norm": 2.7479617595672607, + "learning_rate": 4.695384615384615e-06, + "loss": 0.0653, + "step": 10850 + }, + { + "epoch": 4.599281031930641, + "grad_norm": 3.157724380493164, + "learning_rate": 4.682564102564103e-06, + "loss": 0.0923, + "step": 10875 + }, + { + "epoch": 4.609854091774159, + "grad_norm": 5.712294101715088, + "learning_rate": 4.66974358974359e-06, + "loss": 0.0741, + "step": 10900 + }, + { + "epoch": 4.620427151617678, + "grad_norm": 4.904754638671875, + "learning_rate": 4.6569230769230776e-06, + "loss": 0.0649, + "step": 10925 + }, + { + "epoch": 4.631000211461197, + "grad_norm": 13.393159866333008, + "learning_rate": 4.644102564102564e-06, + "loss": 0.0709, + "step": 10950 + }, + { + "epoch": 4.641573271304716, + "grad_norm": 3.0155038833618164, + "learning_rate": 4.631282051282052e-06, + "loss": 0.0744, + "step": 10975 + }, + { + "epoch": 4.652146331148234, + "grad_norm": 3.272163152694702, + "learning_rate": 4.618461538461539e-06, + "loss": 0.06, + "step": 11000 + }, + { + "epoch": 4.652146331148234, + "eval_loss": 0.19907286763191223, + "eval_runtime": 440.5684, + "eval_samples_per_second": 8.264, + "eval_steps_per_second": 1.035, + "eval_wer": 0.18256067566940462, + "step": 11000 + }, + { + "epoch": 4.662719390991753, + "grad_norm": 4.0782060623168945, + "learning_rate": 4.605641025641026e-06, + "loss": 0.0631, + "step": 11025 + }, + { + "epoch": 4.673292450835271, + "grad_norm": 8.949256896972656, + "learning_rate": 4.592820512820513e-06, + "loss": 0.0737, + "step": 11050 + }, + { + "epoch": 4.683865510678791, + "grad_norm": 6.3690505027771, + "learning_rate": 4.58e-06, + "loss": 0.0755, + "step": 11075 + }, + { + "epoch": 4.694438570522309, + "grad_norm": 5.039371490478516, + "learning_rate": 4.567179487179488e-06, + "loss": 0.079, + "step": 11100 + }, + { + "epoch": 4.705011630365828, + "grad_norm": 3.4718008041381836, + "learning_rate": 4.554358974358975e-06, + "loss": 0.0552, + "step": 11125 + }, + { + "epoch": 4.715584690209346, + "grad_norm": 7.306105613708496, + "learning_rate": 4.5415384615384615e-06, + "loss": 0.0607, + "step": 11150 + }, + { + "epoch": 4.726157750052865, + "grad_norm": 4.172931671142578, + "learning_rate": 4.528717948717949e-06, + "loss": 0.0828, + "step": 11175 + }, + { + "epoch": 4.736730809896384, + "grad_norm": 4.136832237243652, + "learning_rate": 4.515897435897436e-06, + "loss": 0.0993, + "step": 11200 + }, + { + "epoch": 4.747303869739903, + "grad_norm": 2.4323673248291016, + "learning_rate": 4.503076923076924e-06, + "loss": 0.0603, + "step": 11225 + }, + { + "epoch": 4.757876929583421, + "grad_norm": 6.04355525970459, + "learning_rate": 4.4902564102564105e-06, + "loss": 0.06, + "step": 11250 + }, + { + "epoch": 4.76844998942694, + "grad_norm": 5.033048629760742, + "learning_rate": 4.477435897435898e-06, + "loss": 0.0849, + "step": 11275 + }, + { + "epoch": 4.779023049270458, + "grad_norm": 6.350639820098877, + "learning_rate": 4.464615384615385e-06, + "loss": 0.0576, + "step": 11300 + }, + { + "epoch": 4.789596109113978, + "grad_norm": 4.880070209503174, + "learning_rate": 4.451794871794872e-06, + "loss": 0.0712, + "step": 11325 + }, + { + "epoch": 4.800169168957496, + "grad_norm": 5.648702621459961, + "learning_rate": 4.4389743589743595e-06, + "loss": 0.0564, + "step": 11350 + }, + { + "epoch": 4.810742228801015, + "grad_norm": 8.151969909667969, + "learning_rate": 4.426153846153846e-06, + "loss": 0.0616, + "step": 11375 + }, + { + "epoch": 4.821315288644533, + "grad_norm": 4.684852123260498, + "learning_rate": 4.413333333333334e-06, + "loss": 0.0724, + "step": 11400 + }, + { + "epoch": 4.831888348488053, + "grad_norm": 3.9812090396881104, + "learning_rate": 4.400512820512821e-06, + "loss": 0.0774, + "step": 11425 + }, + { + "epoch": 4.842461408331571, + "grad_norm": 5.788280963897705, + "learning_rate": 4.387692307692308e-06, + "loss": 0.0776, + "step": 11450 + }, + { + "epoch": 4.85303446817509, + "grad_norm": 4.4890522956848145, + "learning_rate": 4.374871794871795e-06, + "loss": 0.0636, + "step": 11475 + }, + { + "epoch": 4.863607528018608, + "grad_norm": 3.6684515476226807, + "learning_rate": 4.362051282051282e-06, + "loss": 0.0707, + "step": 11500 + }, + { + "epoch": 4.874180587862127, + "grad_norm": 4.590714454650879, + "learning_rate": 4.34923076923077e-06, + "loss": 0.0761, + "step": 11525 + }, + { + "epoch": 4.884753647705646, + "grad_norm": 5.7994256019592285, + "learning_rate": 4.336410256410257e-06, + "loss": 0.063, + "step": 11550 + }, + { + "epoch": 4.895326707549165, + "grad_norm": 4.685302734375, + "learning_rate": 4.323589743589744e-06, + "loss": 0.0595, + "step": 11575 + }, + { + "epoch": 4.905899767392683, + "grad_norm": 6.557898044586182, + "learning_rate": 4.310769230769231e-06, + "loss": 0.0727, + "step": 11600 + }, + { + "epoch": 4.916472827236202, + "grad_norm": 3.237697124481201, + "learning_rate": 4.297948717948718e-06, + "loss": 0.0758, + "step": 11625 + }, + { + "epoch": 4.927045887079721, + "grad_norm": 6.924361705780029, + "learning_rate": 4.285128205128206e-06, + "loss": 0.0556, + "step": 11650 + }, + { + "epoch": 4.93761894692324, + "grad_norm": 6.172597408294678, + "learning_rate": 4.2723076923076925e-06, + "loss": 0.074, + "step": 11675 + }, + { + "epoch": 4.948192006766758, + "grad_norm": 4.857706546783447, + "learning_rate": 4.25948717948718e-06, + "loss": 0.0656, + "step": 11700 + }, + { + "epoch": 4.958765066610277, + "grad_norm": 11.788376808166504, + "learning_rate": 4.246666666666667e-06, + "loss": 0.0724, + "step": 11725 + }, + { + "epoch": 4.9693381264537955, + "grad_norm": 2.799781322479248, + "learning_rate": 4.233846153846154e-06, + "loss": 0.067, + "step": 11750 + }, + { + "epoch": 4.979911186297315, + "grad_norm": 3.4480295181274414, + "learning_rate": 4.2210256410256414e-06, + "loss": 0.0667, + "step": 11775 + }, + { + "epoch": 4.990484246140833, + "grad_norm": 5.533037185668945, + "learning_rate": 4.208205128205128e-06, + "loss": 0.0598, + "step": 11800 + }, + { + "epoch": 5.001057305984352, + "grad_norm": 3.5116047859191895, + "learning_rate": 4.195384615384616e-06, + "loss": 0.0669, + "step": 11825 + }, + { + "epoch": 5.0116303658278705, + "grad_norm": 2.5963640213012695, + "learning_rate": 4.182564102564103e-06, + "loss": 0.043, + "step": 11850 + }, + { + "epoch": 5.022203425671389, + "grad_norm": 5.519112586975098, + "learning_rate": 4.1697435897435904e-06, + "loss": 0.044, + "step": 11875 + }, + { + "epoch": 5.032776485514908, + "grad_norm": 3.7173142433166504, + "learning_rate": 4.156923076923077e-06, + "loss": 0.0473, + "step": 11900 + }, + { + "epoch": 5.043349545358427, + "grad_norm": 2.8905227184295654, + "learning_rate": 4.144102564102564e-06, + "loss": 0.0389, + "step": 11925 + }, + { + "epoch": 5.0539226052019455, + "grad_norm": 2.328718423843384, + "learning_rate": 4.131282051282052e-06, + "loss": 0.0502, + "step": 11950 + }, + { + "epoch": 5.064495665045464, + "grad_norm": 4.866549015045166, + "learning_rate": 4.118461538461539e-06, + "loss": 0.043, + "step": 11975 + }, + { + "epoch": 5.0750687248889825, + "grad_norm": 1.9600281715393066, + "learning_rate": 4.105641025641026e-06, + "loss": 0.0629, + "step": 12000 + }, + { + "epoch": 5.0750687248889825, + "eval_loss": 0.20117300748825073, + "eval_runtime": 442.5655, + "eval_samples_per_second": 8.227, + "eval_steps_per_second": 1.03, + "eval_wer": 0.19184184880968955, + "step": 12000 + }, + { + "epoch": 5.085641784732502, + "grad_norm": 4.797824382781982, + "learning_rate": 4.092820512820513e-06, + "loss": 0.0554, + "step": 12025 + }, + { + "epoch": 5.0962148445760205, + "grad_norm": 2.1049580574035645, + "learning_rate": 4.08e-06, + "loss": 0.0553, + "step": 12050 + }, + { + "epoch": 5.106787904419539, + "grad_norm": 2.2943015098571777, + "learning_rate": 4.0671794871794876e-06, + "loss": 0.0413, + "step": 12075 + }, + { + "epoch": 5.1173609642630575, + "grad_norm": 3.040888547897339, + "learning_rate": 4.054358974358974e-06, + "loss": 0.0464, + "step": 12100 + }, + { + "epoch": 5.127934024106576, + "grad_norm": 2.409097909927368, + "learning_rate": 4.041538461538462e-06, + "loss": 0.0419, + "step": 12125 + }, + { + "epoch": 5.1385070839500955, + "grad_norm": 4.554444789886475, + "learning_rate": 4.028717948717949e-06, + "loss": 0.0415, + "step": 12150 + }, + { + "epoch": 5.149080143793614, + "grad_norm": 4.427338600158691, + "learning_rate": 4.0158974358974366e-06, + "loss": 0.0477, + "step": 12175 + }, + { + "epoch": 5.1596532036371325, + "grad_norm": 4.4627580642700195, + "learning_rate": 4.003076923076923e-06, + "loss": 0.0485, + "step": 12200 + }, + { + "epoch": 5.170226263480651, + "grad_norm": 5.8571882247924805, + "learning_rate": 3.990256410256411e-06, + "loss": 0.0443, + "step": 12225 + }, + { + "epoch": 5.18079932332417, + "grad_norm": 2.8459227085113525, + "learning_rate": 3.977435897435898e-06, + "loss": 0.0446, + "step": 12250 + }, + { + "epoch": 5.191372383167689, + "grad_norm": 4.811408996582031, + "learning_rate": 3.964615384615385e-06, + "loss": 0.045, + "step": 12275 + }, + { + "epoch": 5.2019454430112075, + "grad_norm": 3.7404351234436035, + "learning_rate": 3.951794871794872e-06, + "loss": 0.0358, + "step": 12300 + }, + { + "epoch": 5.212518502854726, + "grad_norm": 4.323686122894287, + "learning_rate": 3.938974358974359e-06, + "loss": 0.0462, + "step": 12325 + }, + { + "epoch": 5.223091562698245, + "grad_norm": 4.527756690979004, + "learning_rate": 3.926153846153846e-06, + "loss": 0.0429, + "step": 12350 + }, + { + "epoch": 5.233664622541764, + "grad_norm": 2.838021755218506, + "learning_rate": 3.913333333333334e-06, + "loss": 0.0367, + "step": 12375 + }, + { + "epoch": 5.2442376823852825, + "grad_norm": 4.497096061706543, + "learning_rate": 3.9005128205128205e-06, + "loss": 0.0435, + "step": 12400 + }, + { + "epoch": 5.254810742228801, + "grad_norm": 4.4126410484313965, + "learning_rate": 3.887692307692308e-06, + "loss": 0.0465, + "step": 12425 + }, + { + "epoch": 5.26538380207232, + "grad_norm": 4.237513065338135, + "learning_rate": 3.874871794871795e-06, + "loss": 0.0466, + "step": 12450 + }, + { + "epoch": 5.275956861915838, + "grad_norm": 3.8605165481567383, + "learning_rate": 3.862051282051283e-06, + "loss": 0.0631, + "step": 12475 + }, + { + "epoch": 5.2865299217593575, + "grad_norm": 5.698480606079102, + "learning_rate": 3.8492307692307695e-06, + "loss": 0.0435, + "step": 12500 + }, + { + "epoch": 5.297102981602876, + "grad_norm": 3.7574024200439453, + "learning_rate": 3.836410256410257e-06, + "loss": 0.049, + "step": 12525 + }, + { + "epoch": 5.307676041446395, + "grad_norm": 2.125375509262085, + "learning_rate": 3.824102564102565e-06, + "loss": 0.05, + "step": 12550 + }, + { + "epoch": 5.318249101289913, + "grad_norm": 4.459465026855469, + "learning_rate": 3.8112820512820514e-06, + "loss": 0.0453, + "step": 12575 + }, + { + "epoch": 5.328822161133432, + "grad_norm": 3.0165951251983643, + "learning_rate": 3.7984615384615387e-06, + "loss": 0.0436, + "step": 12600 + }, + { + "epoch": 5.339395220976951, + "grad_norm": 8.148979187011719, + "learning_rate": 3.785641025641026e-06, + "loss": 0.0596, + "step": 12625 + }, + { + "epoch": 5.34996828082047, + "grad_norm": 4.275932788848877, + "learning_rate": 3.772820512820513e-06, + "loss": 0.0554, + "step": 12650 + }, + { + "epoch": 5.360541340663988, + "grad_norm": 4.193767070770264, + "learning_rate": 3.7600000000000004e-06, + "loss": 0.0542, + "step": 12675 + }, + { + "epoch": 5.371114400507507, + "grad_norm": 3.2880380153656006, + "learning_rate": 3.7471794871794877e-06, + "loss": 0.0418, + "step": 12700 + }, + { + "epoch": 5.381687460351025, + "grad_norm": 4.61824369430542, + "learning_rate": 3.734358974358975e-06, + "loss": 0.0476, + "step": 12725 + }, + { + "epoch": 5.392260520194545, + "grad_norm": 4.10459566116333, + "learning_rate": 3.721538461538462e-06, + "loss": 0.0551, + "step": 12750 + }, + { + "epoch": 5.402833580038063, + "grad_norm": 3.0435068607330322, + "learning_rate": 3.7087179487179494e-06, + "loss": 0.0501, + "step": 12775 + }, + { + "epoch": 5.413406639881582, + "grad_norm": 5.007273197174072, + "learning_rate": 3.6958974358974362e-06, + "loss": 0.051, + "step": 12800 + }, + { + "epoch": 5.4239796997251, + "grad_norm": 4.816375255584717, + "learning_rate": 3.683076923076923e-06, + "loss": 0.0444, + "step": 12825 + }, + { + "epoch": 5.43455275956862, + "grad_norm": 3.2627782821655273, + "learning_rate": 3.6702564102564103e-06, + "loss": 0.048, + "step": 12850 + }, + { + "epoch": 5.445125819412138, + "grad_norm": 4.271965026855469, + "learning_rate": 3.6574358974358976e-06, + "loss": 0.0476, + "step": 12875 + }, + { + "epoch": 5.455698879255657, + "grad_norm": 3.3179333209991455, + "learning_rate": 3.644615384615385e-06, + "loss": 0.05, + "step": 12900 + }, + { + "epoch": 5.466271939099175, + "grad_norm": 1.9072633981704712, + "learning_rate": 3.631794871794872e-06, + "loss": 0.0441, + "step": 12925 + }, + { + "epoch": 5.476844998942694, + "grad_norm": 3.9856927394866943, + "learning_rate": 3.6189743589743593e-06, + "loss": 0.0382, + "step": 12950 + }, + { + "epoch": 5.487418058786213, + "grad_norm": 4.388250827789307, + "learning_rate": 3.6061538461538466e-06, + "loss": 0.0487, + "step": 12975 + }, + { + "epoch": 5.497991118629732, + "grad_norm": 7.110579967498779, + "learning_rate": 3.593333333333334e-06, + "loss": 0.0545, + "step": 13000 + }, + { + "epoch": 5.497991118629732, + "eval_loss": 0.20169800519943237, + "eval_runtime": 439.2743, + "eval_samples_per_second": 8.289, + "eval_steps_per_second": 1.038, + "eval_wer": 0.18641236252262286, + "step": 13000 + }, + { + "epoch": 5.50856417847325, + "grad_norm": 3.1531167030334473, + "learning_rate": 3.580512820512821e-06, + "loss": 0.051, + "step": 13025 + }, + { + "epoch": 5.519137238316769, + "grad_norm": 4.554049491882324, + "learning_rate": 3.5676923076923083e-06, + "loss": 0.0493, + "step": 13050 + }, + { + "epoch": 5.529710298160287, + "grad_norm": 1.308733582496643, + "learning_rate": 3.5548717948717955e-06, + "loss": 0.0522, + "step": 13075 + }, + { + "epoch": 5.540283358003807, + "grad_norm": 3.7449610233306885, + "learning_rate": 3.5420512820512824e-06, + "loss": 0.05, + "step": 13100 + }, + { + "epoch": 5.550856417847325, + "grad_norm": 6.0974273681640625, + "learning_rate": 3.5292307692307696e-06, + "loss": 0.0453, + "step": 13125 + }, + { + "epoch": 5.561429477690844, + "grad_norm": 13.427715301513672, + "learning_rate": 3.5164102564102564e-06, + "loss": 0.0478, + "step": 13150 + }, + { + "epoch": 5.572002537534362, + "grad_norm": 6.203893661499023, + "learning_rate": 3.5035897435897437e-06, + "loss": 0.0429, + "step": 13175 + }, + { + "epoch": 5.582575597377881, + "grad_norm": 6.499892234802246, + "learning_rate": 3.490769230769231e-06, + "loss": 0.0545, + "step": 13200 + }, + { + "epoch": 5.5931486572214, + "grad_norm": 4.413330078125, + "learning_rate": 3.477948717948718e-06, + "loss": 0.0496, + "step": 13225 + }, + { + "epoch": 5.603721717064919, + "grad_norm": 1.8597774505615234, + "learning_rate": 3.4651282051282054e-06, + "loss": 0.0509, + "step": 13250 + }, + { + "epoch": 5.614294776908437, + "grad_norm": 4.8508405685424805, + "learning_rate": 3.4523076923076927e-06, + "loss": 0.0524, + "step": 13275 + }, + { + "epoch": 5.624867836751956, + "grad_norm": 5.132781505584717, + "learning_rate": 3.43948717948718e-06, + "loss": 0.0539, + "step": 13300 + }, + { + "epoch": 5.635440896595474, + "grad_norm": 4.2406110763549805, + "learning_rate": 3.426666666666667e-06, + "loss": 0.0387, + "step": 13325 + }, + { + "epoch": 5.646013956438994, + "grad_norm": 5.790931701660156, + "learning_rate": 3.4138461538461544e-06, + "loss": 0.0476, + "step": 13350 + }, + { + "epoch": 5.656587016282512, + "grad_norm": 5.764678955078125, + "learning_rate": 3.4010256410256417e-06, + "loss": 0.0484, + "step": 13375 + }, + { + "epoch": 5.667160076126031, + "grad_norm": 4.839846611022949, + "learning_rate": 3.3882051282051285e-06, + "loss": 0.0494, + "step": 13400 + }, + { + "epoch": 5.677733135969549, + "grad_norm": 5.120883941650391, + "learning_rate": 3.3753846153846157e-06, + "loss": 0.0364, + "step": 13425 + }, + { + "epoch": 5.688306195813068, + "grad_norm": 1.9081509113311768, + "learning_rate": 3.3625641025641026e-06, + "loss": 0.0399, + "step": 13450 + }, + { + "epoch": 5.698879255656587, + "grad_norm": 3.328843593597412, + "learning_rate": 3.34974358974359e-06, + "loss": 0.043, + "step": 13475 + }, + { + "epoch": 5.709452315500106, + "grad_norm": 3.8817241191864014, + "learning_rate": 3.336923076923077e-06, + "loss": 0.05, + "step": 13500 + }, + { + "epoch": 5.720025375343624, + "grad_norm": 3.7421860694885254, + "learning_rate": 3.3241025641025643e-06, + "loss": 0.0435, + "step": 13525 + }, + { + "epoch": 5.730598435187143, + "grad_norm": 4.542396545410156, + "learning_rate": 3.3112820512820516e-06, + "loss": 0.0697, + "step": 13550 + }, + { + "epoch": 5.741171495030661, + "grad_norm": 2.264399528503418, + "learning_rate": 3.298461538461539e-06, + "loss": 0.0477, + "step": 13575 + }, + { + "epoch": 5.751744554874181, + "grad_norm": 4.958556652069092, + "learning_rate": 3.285641025641026e-06, + "loss": 0.0636, + "step": 13600 + }, + { + "epoch": 5.762317614717699, + "grad_norm": 5.233386993408203, + "learning_rate": 3.2728205128205133e-06, + "loss": 0.0509, + "step": 13625 + }, + { + "epoch": 5.772890674561218, + "grad_norm": 2.960580587387085, + "learning_rate": 3.2600000000000006e-06, + "loss": 0.0455, + "step": 13650 + }, + { + "epoch": 5.783463734404736, + "grad_norm": 5.023665904998779, + "learning_rate": 3.247179487179488e-06, + "loss": 0.0552, + "step": 13675 + }, + { + "epoch": 5.794036794248256, + "grad_norm": 2.2450520992279053, + "learning_rate": 3.2343589743589746e-06, + "loss": 0.0395, + "step": 13700 + }, + { + "epoch": 5.804609854091774, + "grad_norm": 6.890443801879883, + "learning_rate": 3.221538461538462e-06, + "loss": 0.0525, + "step": 13725 + }, + { + "epoch": 5.815182913935293, + "grad_norm": 3.799301862716675, + "learning_rate": 3.2087179487179487e-06, + "loss": 0.0486, + "step": 13750 + }, + { + "epoch": 5.825755973778811, + "grad_norm": 1.8951350450515747, + "learning_rate": 3.195897435897436e-06, + "loss": 0.0573, + "step": 13775 + }, + { + "epoch": 5.836329033622331, + "grad_norm": 2.119630813598633, + "learning_rate": 3.183076923076923e-06, + "loss": 0.0377, + "step": 13800 + }, + { + "epoch": 5.846902093465849, + "grad_norm": 5.626587867736816, + "learning_rate": 3.1702564102564104e-06, + "loss": 0.0486, + "step": 13825 + }, + { + "epoch": 5.857475153309368, + "grad_norm": 4.05784797668457, + "learning_rate": 3.1574358974358977e-06, + "loss": 0.0439, + "step": 13850 + }, + { + "epoch": 5.868048213152886, + "grad_norm": 3.4822025299072266, + "learning_rate": 3.144615384615385e-06, + "loss": 0.0502, + "step": 13875 + }, + { + "epoch": 5.878621272996405, + "grad_norm": 6.456898212432861, + "learning_rate": 3.131794871794872e-06, + "loss": 0.0429, + "step": 13900 + }, + { + "epoch": 5.889194332839924, + "grad_norm": 5.004892826080322, + "learning_rate": 3.1189743589743594e-06, + "loss": 0.0431, + "step": 13925 + }, + { + "epoch": 5.899767392683443, + "grad_norm": 5.014365196228027, + "learning_rate": 3.1061538461538467e-06, + "loss": 0.0484, + "step": 13950 + }, + { + "epoch": 5.910340452526961, + "grad_norm": 2.9999139308929443, + "learning_rate": 3.093333333333334e-06, + "loss": 0.0366, + "step": 13975 + }, + { + "epoch": 5.92091351237048, + "grad_norm": 3.330026388168335, + "learning_rate": 3.080512820512821e-06, + "loss": 0.0392, + "step": 14000 + }, + { + "epoch": 5.92091351237048, + "eval_loss": 0.1985091120004654, + "eval_runtime": 442.1262, + "eval_samples_per_second": 8.235, + "eval_steps_per_second": 1.031, + "eval_wer": 0.19096013736136247, + "step": 14000 + }, + { + "epoch": 5.931486572213998, + "grad_norm": 3.9660110473632812, + "learning_rate": 3.067692307692308e-06, + "loss": 0.0507, + "step": 14025 + }, + { + "epoch": 5.942059632057518, + "grad_norm": 4.192524433135986, + "learning_rate": 3.054871794871795e-06, + "loss": 0.0705, + "step": 14050 + }, + { + "epoch": 5.952632691901036, + "grad_norm": 6.012485980987549, + "learning_rate": 3.042051282051282e-06, + "loss": 0.0484, + "step": 14075 + }, + { + "epoch": 5.963205751744555, + "grad_norm": 4.29821252822876, + "learning_rate": 3.0292307692307693e-06, + "loss": 0.0407, + "step": 14100 + }, + { + "epoch": 5.9737788115880734, + "grad_norm": 2.2624855041503906, + "learning_rate": 3.0164102564102566e-06, + "loss": 0.0376, + "step": 14125 + }, + { + "epoch": 5.984351871431592, + "grad_norm": 3.6296563148498535, + "learning_rate": 3.003589743589744e-06, + "loss": 0.0462, + "step": 14150 + }, + { + "epoch": 5.994924931275111, + "grad_norm": 5.471303939819336, + "learning_rate": 2.990769230769231e-06, + "loss": 0.0445, + "step": 14175 + }, + { + "epoch": 6.00549799111863, + "grad_norm": 1.4642462730407715, + "learning_rate": 2.9779487179487183e-06, + "loss": 0.041, + "step": 14200 + }, + { + "epoch": 6.0160710509621484, + "grad_norm": 1.2915719747543335, + "learning_rate": 2.9651282051282056e-06, + "loss": 0.0352, + "step": 14225 + }, + { + "epoch": 6.026644110805667, + "grad_norm": 1.6247045993804932, + "learning_rate": 2.952307692307693e-06, + "loss": 0.0277, + "step": 14250 + }, + { + "epoch": 6.0372171706491855, + "grad_norm": 4.9181671142578125, + "learning_rate": 2.93948717948718e-06, + "loss": 0.0396, + "step": 14275 + }, + { + "epoch": 6.047790230492705, + "grad_norm": 9.853203773498535, + "learning_rate": 2.9266666666666673e-06, + "loss": 0.029, + "step": 14300 + }, + { + "epoch": 6.0583632903362235, + "grad_norm": 2.4494051933288574, + "learning_rate": 2.913846153846154e-06, + "loss": 0.0334, + "step": 14325 + }, + { + "epoch": 6.068936350179742, + "grad_norm": 2.7287230491638184, + "learning_rate": 2.901025641025641e-06, + "loss": 0.0391, + "step": 14350 + }, + { + "epoch": 6.0795094100232605, + "grad_norm": 3.225248098373413, + "learning_rate": 2.888205128205128e-06, + "loss": 0.042, + "step": 14375 + }, + { + "epoch": 6.090082469866779, + "grad_norm": 2.684298276901245, + "learning_rate": 2.8753846153846154e-06, + "loss": 0.032, + "step": 14400 + }, + { + "epoch": 6.1006555297102985, + "grad_norm": 2.294804334640503, + "learning_rate": 2.8625641025641027e-06, + "loss": 0.0295, + "step": 14425 + }, + { + "epoch": 6.111228589553817, + "grad_norm": 3.1335229873657227, + "learning_rate": 2.84974358974359e-06, + "loss": 0.0366, + "step": 14450 + }, + { + "epoch": 6.1218016493973355, + "grad_norm": 1.516295313835144, + "learning_rate": 2.836923076923077e-06, + "loss": 0.0382, + "step": 14475 + }, + { + "epoch": 6.132374709240854, + "grad_norm": 1.7349085807800293, + "learning_rate": 2.8241025641025644e-06, + "loss": 0.0443, + "step": 14500 + }, + { + "epoch": 6.142947769084373, + "grad_norm": 3.1473402976989746, + "learning_rate": 2.8112820512820517e-06, + "loss": 0.0223, + "step": 14525 + }, + { + "epoch": 6.153520828927892, + "grad_norm": 4.05681848526001, + "learning_rate": 2.798461538461539e-06, + "loss": 0.0365, + "step": 14550 + }, + { + "epoch": 6.1640938887714105, + "grad_norm": 2.0210795402526855, + "learning_rate": 2.785641025641026e-06, + "loss": 0.0319, + "step": 14575 + }, + { + "epoch": 6.174666948614929, + "grad_norm": 2.4962692260742188, + "learning_rate": 2.7728205128205134e-06, + "loss": 0.0253, + "step": 14600 + }, + { + "epoch": 6.185240008458448, + "grad_norm": 1.9406040906906128, + "learning_rate": 2.7600000000000003e-06, + "loss": 0.0322, + "step": 14625 + }, + { + "epoch": 6.195813068301967, + "grad_norm": 2.434849739074707, + "learning_rate": 2.7471794871794875e-06, + "loss": 0.03, + "step": 14650 + }, + { + "epoch": 6.2063861281454855, + "grad_norm": 2.942782163619995, + "learning_rate": 2.7343589743589743e-06, + "loss": 0.0424, + "step": 14675 + }, + { + "epoch": 6.216959187989004, + "grad_norm": 2.0401015281677246, + "learning_rate": 2.7215384615384616e-06, + "loss": 0.0359, + "step": 14700 + }, + { + "epoch": 6.227532247832523, + "grad_norm": 1.1493183374404907, + "learning_rate": 2.708717948717949e-06, + "loss": 0.0279, + "step": 14725 + }, + { + "epoch": 6.238105307676041, + "grad_norm": 1.1555042266845703, + "learning_rate": 2.695897435897436e-06, + "loss": 0.0298, + "step": 14750 + }, + { + "epoch": 6.2486783675195605, + "grad_norm": 4.421550273895264, + "learning_rate": 2.683589743589744e-06, + "loss": 0.0325, + "step": 14775 + }, + { + "epoch": 6.259251427363079, + "grad_norm": 4.805689334869385, + "learning_rate": 2.670769230769231e-06, + "loss": 0.0247, + "step": 14800 + }, + { + "epoch": 6.269824487206598, + "grad_norm": 0.8942240476608276, + "learning_rate": 2.657948717948718e-06, + "loss": 0.0305, + "step": 14825 + }, + { + "epoch": 6.280397547050116, + "grad_norm": 2.235232353210449, + "learning_rate": 2.6451282051282052e-06, + "loss": 0.0302, + "step": 14850 + }, + { + "epoch": 6.290970606893635, + "grad_norm": 1.470533013343811, + "learning_rate": 2.6323076923076925e-06, + "loss": 0.0359, + "step": 14875 + }, + { + "epoch": 6.301543666737154, + "grad_norm": 4.327214241027832, + "learning_rate": 2.6194871794871797e-06, + "loss": 0.0394, + "step": 14900 + }, + { + "epoch": 6.312116726580673, + "grad_norm": 1.062364101409912, + "learning_rate": 2.606666666666667e-06, + "loss": 0.0341, + "step": 14925 + }, + { + "epoch": 6.322689786424191, + "grad_norm": 3.674901247024536, + "learning_rate": 2.593846153846154e-06, + "loss": 0.025, + "step": 14950 + }, + { + "epoch": 6.33326284626771, + "grad_norm": 2.2653369903564453, + "learning_rate": 2.581025641025641e-06, + "loss": 0.0264, + "step": 14975 + }, + { + "epoch": 6.343835906111229, + "grad_norm": 1.9021022319793701, + "learning_rate": 2.5682051282051283e-06, + "loss": 0.0338, + "step": 15000 + }, + { + "epoch": 6.343835906111229, + "eval_loss": 0.19885598123073578, + "eval_runtime": 437.3615, + "eval_samples_per_second": 8.325, + "eval_steps_per_second": 1.043, + "eval_wer": 0.1807044410413476, + "step": 15000 + }, + { + "epoch": 6.354408965954748, + "grad_norm": 2.683213472366333, + "learning_rate": 2.5553846153846155e-06, + "loss": 0.0293, + "step": 15025 + }, + { + "epoch": 6.364982025798266, + "grad_norm": 4.173922538757324, + "learning_rate": 2.542564102564103e-06, + "loss": 0.0299, + "step": 15050 + }, + { + "epoch": 6.375555085641785, + "grad_norm": 2.688981294631958, + "learning_rate": 2.52974358974359e-06, + "loss": 0.0349, + "step": 15075 + }, + { + "epoch": 6.386128145485303, + "grad_norm": 3.2048428058624268, + "learning_rate": 2.5169230769230773e-06, + "loss": 0.0401, + "step": 15100 + }, + { + "epoch": 6.396701205328823, + "grad_norm": 2.362328052520752, + "learning_rate": 2.5041025641025645e-06, + "loss": 0.0368, + "step": 15125 + }, + { + "epoch": 6.407274265172341, + "grad_norm": 2.584799289703369, + "learning_rate": 2.4912820512820514e-06, + "loss": 0.0252, + "step": 15150 + }, + { + "epoch": 6.41784732501586, + "grad_norm": 3.501321315765381, + "learning_rate": 2.4784615384615386e-06, + "loss": 0.0278, + "step": 15175 + }, + { + "epoch": 6.428420384859378, + "grad_norm": 2.7278032302856445, + "learning_rate": 2.465641025641026e-06, + "loss": 0.0421, + "step": 15200 + }, + { + "epoch": 6.438993444702897, + "grad_norm": 2.1528923511505127, + "learning_rate": 2.452820512820513e-06, + "loss": 0.0354, + "step": 15225 + }, + { + "epoch": 6.449566504546416, + "grad_norm": 2.926116704940796, + "learning_rate": 2.4400000000000004e-06, + "loss": 0.032, + "step": 15250 + }, + { + "epoch": 6.460139564389935, + "grad_norm": 2.286530017852783, + "learning_rate": 2.427179487179487e-06, + "loss": 0.0416, + "step": 15275 + }, + { + "epoch": 6.470712624233453, + "grad_norm": 3.249962329864502, + "learning_rate": 2.4143589743589744e-06, + "loss": 0.0327, + "step": 15300 + }, + { + "epoch": 6.481285684076972, + "grad_norm": 3.736004114151001, + "learning_rate": 2.4015384615384617e-06, + "loss": 0.032, + "step": 15325 + }, + { + "epoch": 6.49185874392049, + "grad_norm": 1.310922384262085, + "learning_rate": 2.388717948717949e-06, + "loss": 0.0342, + "step": 15350 + }, + { + "epoch": 6.50243180376401, + "grad_norm": 1.6140261888504028, + "learning_rate": 2.375897435897436e-06, + "loss": 0.0402, + "step": 15375 + }, + { + "epoch": 6.513004863607528, + "grad_norm": 2.1381657123565674, + "learning_rate": 2.3630769230769234e-06, + "loss": 0.0378, + "step": 15400 + }, + { + "epoch": 6.523577923451047, + "grad_norm": 1.3571207523345947, + "learning_rate": 2.3502564102564102e-06, + "loss": 0.0318, + "step": 15425 + }, + { + "epoch": 6.534150983294565, + "grad_norm": 3.115314483642578, + "learning_rate": 2.3374358974358975e-06, + "loss": 0.0422, + "step": 15450 + }, + { + "epoch": 6.544724043138084, + "grad_norm": 3.6001369953155518, + "learning_rate": 2.3246153846153847e-06, + "loss": 0.0329, + "step": 15475 + }, + { + "epoch": 6.555297102981603, + "grad_norm": 2.726231098175049, + "learning_rate": 2.311794871794872e-06, + "loss": 0.0417, + "step": 15500 + }, + { + "epoch": 6.565870162825122, + "grad_norm": 2.179466485977173, + "learning_rate": 2.2989743589743592e-06, + "loss": 0.0326, + "step": 15525 + }, + { + "epoch": 6.57644322266864, + "grad_norm": 4.61680269241333, + "learning_rate": 2.2861538461538465e-06, + "loss": 0.0376, + "step": 15550 + }, + { + "epoch": 6.587016282512159, + "grad_norm": 4.001598358154297, + "learning_rate": 2.2733333333333333e-06, + "loss": 0.0341, + "step": 15575 + }, + { + "epoch": 6.597589342355677, + "grad_norm": 2.2401235103607178, + "learning_rate": 2.2605128205128206e-06, + "loss": 0.0298, + "step": 15600 + }, + { + "epoch": 6.608162402199197, + "grad_norm": 5.490719795227051, + "learning_rate": 2.247692307692308e-06, + "loss": 0.0405, + "step": 15625 + }, + { + "epoch": 6.618735462042715, + "grad_norm": 4.875611305236816, + "learning_rate": 2.234871794871795e-06, + "loss": 0.0431, + "step": 15650 + }, + { + "epoch": 6.629308521886234, + "grad_norm": 2.3372411727905273, + "learning_rate": 2.2220512820512823e-06, + "loss": 0.0309, + "step": 15675 + }, + { + "epoch": 6.639881581729752, + "grad_norm": 7.326153755187988, + "learning_rate": 2.2092307692307695e-06, + "loss": 0.0377, + "step": 15700 + }, + { + "epoch": 6.650454641573271, + "grad_norm": 2.733332633972168, + "learning_rate": 2.1964102564102564e-06, + "loss": 0.0341, + "step": 15725 + }, + { + "epoch": 6.66102770141679, + "grad_norm": 2.7690587043762207, + "learning_rate": 2.1835897435897436e-06, + "loss": 0.0269, + "step": 15750 + }, + { + "epoch": 6.671600761260309, + "grad_norm": 3.4367501735687256, + "learning_rate": 2.170769230769231e-06, + "loss": 0.0416, + "step": 15775 + }, + { + "epoch": 6.682173821103827, + "grad_norm": 1.8765815496444702, + "learning_rate": 2.157948717948718e-06, + "loss": 0.0274, + "step": 15800 + }, + { + "epoch": 6.692746880947346, + "grad_norm": 4.376313209533691, + "learning_rate": 2.1451282051282054e-06, + "loss": 0.0353, + "step": 15825 + }, + { + "epoch": 6.703319940790865, + "grad_norm": 2.716111660003662, + "learning_rate": 2.1323076923076926e-06, + "loss": 0.0346, + "step": 15850 + }, + { + "epoch": 6.713893000634384, + "grad_norm": 1.410538911819458, + "learning_rate": 2.1194871794871794e-06, + "loss": 0.0516, + "step": 15875 + }, + { + "epoch": 6.724466060477902, + "grad_norm": 2.3586137294769287, + "learning_rate": 2.1066666666666667e-06, + "loss": 0.0312, + "step": 15900 + }, + { + "epoch": 6.735039120321421, + "grad_norm": 2.5256056785583496, + "learning_rate": 2.093846153846154e-06, + "loss": 0.0342, + "step": 15925 + }, + { + "epoch": 6.745612180164939, + "grad_norm": 1.971276879310608, + "learning_rate": 2.081025641025641e-06, + "loss": 0.0286, + "step": 15950 + }, + { + "epoch": 6.756185240008459, + "grad_norm": 3.3202672004699707, + "learning_rate": 2.0682051282051284e-06, + "loss": 0.0324, + "step": 15975 + }, + { + "epoch": 6.766758299851977, + "grad_norm": 4.523472309112549, + "learning_rate": 2.0553846153846157e-06, + "loss": 0.0312, + "step": 16000 + }, + { + "epoch": 6.766758299851977, + "eval_loss": 0.1982458233833313, + "eval_runtime": 440.4101, + "eval_samples_per_second": 8.267, + "eval_steps_per_second": 1.035, + "eval_wer": 0.19453338902037218, + "step": 16000 + }, + { + "epoch": 6.777331359695496, + "grad_norm": 1.8507798910140991, + "learning_rate": 2.0425641025641025e-06, + "loss": 0.0326, + "step": 16025 + }, + { + "epoch": 6.787904419539014, + "grad_norm": 1.808121681213379, + "learning_rate": 2.0297435897435897e-06, + "loss": 0.0317, + "step": 16050 + }, + { + "epoch": 6.798477479382534, + "grad_norm": 1.5614056587219238, + "learning_rate": 2.016923076923077e-06, + "loss": 0.0404, + "step": 16075 + }, + { + "epoch": 6.809050539226052, + "grad_norm": 1.277215600013733, + "learning_rate": 2.0041025641025642e-06, + "loss": 0.0257, + "step": 16100 + }, + { + "epoch": 6.819623599069571, + "grad_norm": 2.63959002494812, + "learning_rate": 1.9912820512820515e-06, + "loss": 0.0232, + "step": 16125 + }, + { + "epoch": 6.830196658913089, + "grad_norm": 3.124812602996826, + "learning_rate": 1.9784615384615387e-06, + "loss": 0.0277, + "step": 16150 + }, + { + "epoch": 6.840769718756608, + "grad_norm": 5.799993991851807, + "learning_rate": 1.9656410256410256e-06, + "loss": 0.0354, + "step": 16175 + }, + { + "epoch": 6.851342778600127, + "grad_norm": 1.860620141029358, + "learning_rate": 1.952820512820513e-06, + "loss": 0.0231, + "step": 16200 + }, + { + "epoch": 6.861915838443646, + "grad_norm": 3.376303195953369, + "learning_rate": 1.94e-06, + "loss": 0.0357, + "step": 16225 + }, + { + "epoch": 6.872488898287164, + "grad_norm": 3.7502923011779785, + "learning_rate": 1.9271794871794873e-06, + "loss": 0.0365, + "step": 16250 + }, + { + "epoch": 6.883061958130683, + "grad_norm": 2.344674587249756, + "learning_rate": 1.9143589743589746e-06, + "loss": 0.0354, + "step": 16275 + }, + { + "epoch": 6.893635017974201, + "grad_norm": 1.4450386762619019, + "learning_rate": 1.9015384615384616e-06, + "loss": 0.0257, + "step": 16300 + }, + { + "epoch": 6.904208077817721, + "grad_norm": 2.097320079803467, + "learning_rate": 1.8887179487179488e-06, + "loss": 0.0265, + "step": 16325 + }, + { + "epoch": 6.914781137661239, + "grad_norm": 4.6603851318359375, + "learning_rate": 1.875897435897436e-06, + "loss": 0.0354, + "step": 16350 + }, + { + "epoch": 6.925354197504758, + "grad_norm": 1.707977533340454, + "learning_rate": 1.8630769230769233e-06, + "loss": 0.0422, + "step": 16375 + }, + { + "epoch": 6.935927257348276, + "grad_norm": 3.779792070388794, + "learning_rate": 1.8502564102564106e-06, + "loss": 0.0315, + "step": 16400 + }, + { + "epoch": 6.946500317191795, + "grad_norm": 4.249421119689941, + "learning_rate": 1.8374358974358974e-06, + "loss": 0.0309, + "step": 16425 + }, + { + "epoch": 6.957073377035314, + "grad_norm": 2.3399875164031982, + "learning_rate": 1.8246153846153847e-06, + "loss": 0.0276, + "step": 16450 + }, + { + "epoch": 6.967646436878833, + "grad_norm": 4.361936092376709, + "learning_rate": 1.811794871794872e-06, + "loss": 0.0255, + "step": 16475 + }, + { + "epoch": 6.978219496722351, + "grad_norm": 2.4711239337921143, + "learning_rate": 1.7989743589743592e-06, + "loss": 0.03, + "step": 16500 + }, + { + "epoch": 6.98879255656587, + "grad_norm": 1.2361491918563843, + "learning_rate": 1.7861538461538464e-06, + "loss": 0.0311, + "step": 16525 + }, + { + "epoch": 6.9993656164093885, + "grad_norm": 3.5375123023986816, + "learning_rate": 1.7733333333333336e-06, + "loss": 0.0367, + "step": 16550 + }, + { + "epoch": 7.009938676252908, + "grad_norm": 1.0913958549499512, + "learning_rate": 1.7605128205128205e-06, + "loss": 0.0279, + "step": 16575 + }, + { + "epoch": 7.020511736096426, + "grad_norm": 1.6949819326400757, + "learning_rate": 1.7476923076923077e-06, + "loss": 0.0241, + "step": 16600 + }, + { + "epoch": 7.031084795939945, + "grad_norm": 2.930881977081299, + "learning_rate": 1.734871794871795e-06, + "loss": 0.0216, + "step": 16625 + }, + { + "epoch": 7.0416578557834635, + "grad_norm": 4.314076900482178, + "learning_rate": 1.7220512820512822e-06, + "loss": 0.0248, + "step": 16650 + }, + { + "epoch": 7.052230915626982, + "grad_norm": 2.817746639251709, + "learning_rate": 1.7092307692307695e-06, + "loss": 0.0208, + "step": 16675 + }, + { + "epoch": 7.062803975470501, + "grad_norm": 1.8693445920944214, + "learning_rate": 1.6964102564102567e-06, + "loss": 0.0245, + "step": 16700 + }, + { + "epoch": 7.07337703531402, + "grad_norm": 2.050861120223999, + "learning_rate": 1.683589743589744e-06, + "loss": 0.0178, + "step": 16725 + }, + { + "epoch": 7.0839500951575385, + "grad_norm": 1.6270172595977783, + "learning_rate": 1.6707692307692308e-06, + "loss": 0.0211, + "step": 16750 + }, + { + "epoch": 7.094523155001057, + "grad_norm": 2.275005578994751, + "learning_rate": 1.657948717948718e-06, + "loss": 0.022, + "step": 16775 + }, + { + "epoch": 7.105096214844576, + "grad_norm": 2.681896924972534, + "learning_rate": 1.6451282051282053e-06, + "loss": 0.0228, + "step": 16800 + }, + { + "epoch": 7.115669274688095, + "grad_norm": 1.60031259059906, + "learning_rate": 1.632820512820513e-06, + "loss": 0.0213, + "step": 16825 + }, + { + "epoch": 7.1262423345316135, + "grad_norm": 1.0850863456726074, + "learning_rate": 1.6200000000000002e-06, + "loss": 0.0289, + "step": 16850 + }, + { + "epoch": 7.136815394375132, + "grad_norm": 1.6733803749084473, + "learning_rate": 1.6071794871794874e-06, + "loss": 0.0374, + "step": 16875 + }, + { + "epoch": 7.1473884542186505, + "grad_norm": 1.9587990045547485, + "learning_rate": 1.5943589743589744e-06, + "loss": 0.0276, + "step": 16900 + }, + { + "epoch": 7.15796151406217, + "grad_norm": 1.7133156061172485, + "learning_rate": 1.5815384615384615e-06, + "loss": 0.0246, + "step": 16925 + }, + { + "epoch": 7.1685345739056885, + "grad_norm": 1.5173008441925049, + "learning_rate": 1.5687179487179487e-06, + "loss": 0.0269, + "step": 16950 + }, + { + "epoch": 7.179107633749207, + "grad_norm": 2.4916610717773438, + "learning_rate": 1.555897435897436e-06, + "loss": 0.022, + "step": 16975 + }, + { + "epoch": 7.1896806935927255, + "grad_norm": 0.9858763217926025, + "learning_rate": 1.5430769230769232e-06, + "loss": 0.0237, + "step": 17000 + }, + { + "epoch": 7.1896806935927255, + "eval_loss": 0.19975194334983826, + "eval_runtime": 439.8052, + "eval_samples_per_second": 8.279, + "eval_steps_per_second": 1.037, + "eval_wer": 0.1842312868346559, + "step": 17000 + }, + { + "epoch": 7.200253753436244, + "grad_norm": 2.612891912460327, + "learning_rate": 1.5302564102564105e-06, + "loss": 0.0265, + "step": 17025 + }, + { + "epoch": 7.2108268132797635, + "grad_norm": 2.0947177410125732, + "learning_rate": 1.5174358974358977e-06, + "loss": 0.0256, + "step": 17050 + }, + { + "epoch": 7.221399873123282, + "grad_norm": 2.62943959236145, + "learning_rate": 1.5046153846153845e-06, + "loss": 0.0177, + "step": 17075 + }, + { + "epoch": 7.2319729329668005, + "grad_norm": 1.3514835834503174, + "learning_rate": 1.4917948717948718e-06, + "loss": 0.0249, + "step": 17100 + }, + { + "epoch": 7.242545992810319, + "grad_norm": 1.2198314666748047, + "learning_rate": 1.478974358974359e-06, + "loss": 0.0358, + "step": 17125 + }, + { + "epoch": 7.253119052653838, + "grad_norm": 1.2630308866500854, + "learning_rate": 1.4661538461538463e-06, + "loss": 0.0283, + "step": 17150 + }, + { + "epoch": 7.263692112497357, + "grad_norm": 2.1470680236816406, + "learning_rate": 1.4533333333333335e-06, + "loss": 0.0324, + "step": 17175 + }, + { + "epoch": 7.2742651723408756, + "grad_norm": 1.8368949890136719, + "learning_rate": 1.4405128205128208e-06, + "loss": 0.0346, + "step": 17200 + }, + { + "epoch": 7.284838232184394, + "grad_norm": 1.5384070873260498, + "learning_rate": 1.4276923076923076e-06, + "loss": 0.0196, + "step": 17225 + }, + { + "epoch": 7.295411292027913, + "grad_norm": 1.1340972185134888, + "learning_rate": 1.4148717948717949e-06, + "loss": 0.019, + "step": 17250 + }, + { + "epoch": 7.305984351871432, + "grad_norm": 1.1070088148117065, + "learning_rate": 1.402051282051282e-06, + "loss": 0.022, + "step": 17275 + }, + { + "epoch": 7.3165574117149506, + "grad_norm": 1.5131502151489258, + "learning_rate": 1.3892307692307694e-06, + "loss": 0.0212, + "step": 17300 + }, + { + "epoch": 7.327130471558469, + "grad_norm": 1.0822603702545166, + "learning_rate": 1.3764102564102566e-06, + "loss": 0.0167, + "step": 17325 + }, + { + "epoch": 7.337703531401988, + "grad_norm": 1.711835503578186, + "learning_rate": 1.3635897435897438e-06, + "loss": 0.0184, + "step": 17350 + }, + { + "epoch": 7.348276591245506, + "grad_norm": 1.619463324546814, + "learning_rate": 1.3507692307692307e-06, + "loss": 0.02, + "step": 17375 + }, + { + "epoch": 7.358849651089026, + "grad_norm": 0.940372884273529, + "learning_rate": 1.337948717948718e-06, + "loss": 0.0244, + "step": 17400 + }, + { + "epoch": 7.369422710932544, + "grad_norm": 1.5679852962493896, + "learning_rate": 1.3251282051282052e-06, + "loss": 0.0245, + "step": 17425 + }, + { + "epoch": 7.379995770776063, + "grad_norm": 1.4729161262512207, + "learning_rate": 1.3123076923076924e-06, + "loss": 0.0225, + "step": 17450 + }, + { + "epoch": 7.390568830619581, + "grad_norm": 1.3189888000488281, + "learning_rate": 1.2994871794871797e-06, + "loss": 0.0377, + "step": 17475 + }, + { + "epoch": 7.4011418904631, + "grad_norm": 2.973733425140381, + "learning_rate": 1.286666666666667e-06, + "loss": 0.0241, + "step": 17500 + }, + { + "epoch": 7.411714950306619, + "grad_norm": 5.635532855987549, + "learning_rate": 1.273846153846154e-06, + "loss": 0.0364, + "step": 17525 + }, + { + "epoch": 7.422288010150138, + "grad_norm": 1.3458870649337769, + "learning_rate": 1.261025641025641e-06, + "loss": 0.0236, + "step": 17550 + }, + { + "epoch": 7.432861069993656, + "grad_norm": 3.022913932800293, + "learning_rate": 1.2482051282051282e-06, + "loss": 0.0395, + "step": 17575 + }, + { + "epoch": 7.443434129837175, + "grad_norm": 1.9709924459457397, + "learning_rate": 1.2353846153846155e-06, + "loss": 0.0253, + "step": 17600 + }, + { + "epoch": 7.454007189680693, + "grad_norm": 5.252633571624756, + "learning_rate": 1.2225641025641025e-06, + "loss": 0.0305, + "step": 17625 + }, + { + "epoch": 7.464580249524213, + "grad_norm": 1.3409159183502197, + "learning_rate": 1.2097435897435898e-06, + "loss": 0.0253, + "step": 17650 + }, + { + "epoch": 7.475153309367731, + "grad_norm": 0.8901488780975342, + "learning_rate": 1.196923076923077e-06, + "loss": 0.0236, + "step": 17675 + }, + { + "epoch": 7.48572636921125, + "grad_norm": 2.1367154121398926, + "learning_rate": 1.184102564102564e-06, + "loss": 0.0329, + "step": 17700 + }, + { + "epoch": 7.496299429054768, + "grad_norm": 2.0283923149108887, + "learning_rate": 1.1712820512820513e-06, + "loss": 0.0231, + "step": 17725 + }, + { + "epoch": 7.506872488898287, + "grad_norm": 4.209811210632324, + "learning_rate": 1.1584615384615385e-06, + "loss": 0.0205, + "step": 17750 + }, + { + "epoch": 7.517445548741806, + "grad_norm": 1.1471270322799683, + "learning_rate": 1.1456410256410256e-06, + "loss": 0.0262, + "step": 17775 + }, + { + "epoch": 7.528018608585325, + "grad_norm": 1.345858097076416, + "learning_rate": 1.1328205128205128e-06, + "loss": 0.0419, + "step": 17800 + }, + { + "epoch": 7.538591668428843, + "grad_norm": 4.0828857421875, + "learning_rate": 1.12e-06, + "loss": 0.0248, + "step": 17825 + }, + { + "epoch": 7.549164728272362, + "grad_norm": 2.618866443634033, + "learning_rate": 1.1071794871794873e-06, + "loss": 0.0239, + "step": 17850 + }, + { + "epoch": 7.55973778811588, + "grad_norm": 3.4622833728790283, + "learning_rate": 1.0943589743589744e-06, + "loss": 0.0291, + "step": 17875 + }, + { + "epoch": 7.5703108479594, + "grad_norm": 4.808816432952881, + "learning_rate": 1.0815384615384616e-06, + "loss": 0.0308, + "step": 17900 + }, + { + "epoch": 7.580883907802918, + "grad_norm": 1.1136987209320068, + "learning_rate": 1.0687179487179489e-06, + "loss": 0.0185, + "step": 17925 + }, + { + "epoch": 7.591456967646437, + "grad_norm": 1.1457035541534424, + "learning_rate": 1.0558974358974359e-06, + "loss": 0.0234, + "step": 17950 + }, + { + "epoch": 7.602030027489955, + "grad_norm": 2.3021388053894043, + "learning_rate": 1.0430769230769231e-06, + "loss": 0.0172, + "step": 17975 + }, + { + "epoch": 7.612603087333475, + "grad_norm": 2.4270472526550293, + "learning_rate": 1.0302564102564104e-06, + "loss": 0.0223, + "step": 18000 + }, + { + "epoch": 7.612603087333475, + "eval_loss": 0.19938968122005463, + "eval_runtime": 437.8238, + "eval_samples_per_second": 8.316, + "eval_steps_per_second": 1.042, + "eval_wer": 0.18000835305582627, + "step": 18000 + }, + { + "epoch": 7.623176147176993, + "grad_norm": 1.8567931652069092, + "learning_rate": 1.0174358974358974e-06, + "loss": 0.0193, + "step": 18025 + }, + { + "epoch": 7.633749207020512, + "grad_norm": 1.3256702423095703, + "learning_rate": 1.0046153846153847e-06, + "loss": 0.0237, + "step": 18050 + }, + { + "epoch": 7.64432226686403, + "grad_norm": 2.2503092288970947, + "learning_rate": 9.91794871794872e-07, + "loss": 0.0335, + "step": 18075 + }, + { + "epoch": 7.654895326707549, + "grad_norm": 2.70359468460083, + "learning_rate": 9.78974358974359e-07, + "loss": 0.0217, + "step": 18100 + }, + { + "epoch": 7.665468386551068, + "grad_norm": 2.691211700439453, + "learning_rate": 9.661538461538462e-07, + "loss": 0.0198, + "step": 18125 + }, + { + "epoch": 7.676041446394587, + "grad_norm": 2.4848880767822266, + "learning_rate": 9.533333333333335e-07, + "loss": 0.0209, + "step": 18150 + }, + { + "epoch": 7.686614506238105, + "grad_norm": 1.926702857017517, + "learning_rate": 9.405128205128206e-07, + "loss": 0.0355, + "step": 18175 + }, + { + "epoch": 7.697187566081624, + "grad_norm": 1.5971437692642212, + "learning_rate": 9.276923076923077e-07, + "loss": 0.0197, + "step": 18200 + }, + { + "epoch": 7.707760625925143, + "grad_norm": 2.465510845184326, + "learning_rate": 9.14871794871795e-07, + "loss": 0.0248, + "step": 18225 + }, + { + "epoch": 7.718333685768662, + "grad_norm": 2.2006723880767822, + "learning_rate": 9.020512820512821e-07, + "loss": 0.0325, + "step": 18250 + }, + { + "epoch": 7.72890674561218, + "grad_norm": 0.9062207937240601, + "learning_rate": 8.892307692307693e-07, + "loss": 0.025, + "step": 18275 + }, + { + "epoch": 7.739479805455699, + "grad_norm": 0.9305509328842163, + "learning_rate": 8.764102564102565e-07, + "loss": 0.0268, + "step": 18300 + }, + { + "epoch": 7.750052865299217, + "grad_norm": 2.8776371479034424, + "learning_rate": 8.635897435897437e-07, + "loss": 0.024, + "step": 18325 + }, + { + "epoch": 7.760625925142737, + "grad_norm": 1.8713349103927612, + "learning_rate": 8.507692307692308e-07, + "loss": 0.0201, + "step": 18350 + }, + { + "epoch": 7.771198984986255, + "grad_norm": 1.8037798404693604, + "learning_rate": 8.37948717948718e-07, + "loss": 0.0195, + "step": 18375 + }, + { + "epoch": 7.781772044829774, + "grad_norm": 2.990373134613037, + "learning_rate": 8.251282051282052e-07, + "loss": 0.019, + "step": 18400 + }, + { + "epoch": 7.792345104673292, + "grad_norm": 2.369366407394409, + "learning_rate": 8.123076923076923e-07, + "loss": 0.0306, + "step": 18425 + }, + { + "epoch": 7.802918164516811, + "grad_norm": 2.136784553527832, + "learning_rate": 7.994871794871796e-07, + "loss": 0.0268, + "step": 18450 + }, + { + "epoch": 7.81349122436033, + "grad_norm": 1.5172721147537231, + "learning_rate": 7.866666666666667e-07, + "loss": 0.0255, + "step": 18475 + }, + { + "epoch": 7.824064284203849, + "grad_norm": 1.0286389589309692, + "learning_rate": 7.738461538461539e-07, + "loss": 0.0226, + "step": 18500 + }, + { + "epoch": 7.834637344047367, + "grad_norm": 3.877328634262085, + "learning_rate": 7.610256410256411e-07, + "loss": 0.031, + "step": 18525 + }, + { + "epoch": 7.845210403890886, + "grad_norm": 1.6196309328079224, + "learning_rate": 7.482051282051283e-07, + "loss": 0.0187, + "step": 18550 + }, + { + "epoch": 7.855783463734404, + "grad_norm": 3.4636642932891846, + "learning_rate": 7.353846153846154e-07, + "loss": 0.0314, + "step": 18575 + }, + { + "epoch": 7.866356523577924, + "grad_norm": 1.571252703666687, + "learning_rate": 7.225641025641026e-07, + "loss": 0.0216, + "step": 18600 + }, + { + "epoch": 7.876929583421442, + "grad_norm": 3.157963752746582, + "learning_rate": 7.097435897435898e-07, + "loss": 0.0219, + "step": 18625 + }, + { + "epoch": 7.887502643264961, + "grad_norm": 1.0556855201721191, + "learning_rate": 6.969230769230769e-07, + "loss": 0.0271, + "step": 18650 + }, + { + "epoch": 7.898075703108479, + "grad_norm": 3.6477906703948975, + "learning_rate": 6.841025641025642e-07, + "loss": 0.0297, + "step": 18675 + }, + { + "epoch": 7.908648762951998, + "grad_norm": 2.9608850479125977, + "learning_rate": 6.712820512820513e-07, + "loss": 0.0289, + "step": 18700 + }, + { + "epoch": 7.919221822795517, + "grad_norm": 2.4232919216156006, + "learning_rate": 6.584615384615385e-07, + "loss": 0.0254, + "step": 18725 + }, + { + "epoch": 7.929794882639036, + "grad_norm": 2.4459691047668457, + "learning_rate": 6.456410256410257e-07, + "loss": 0.0212, + "step": 18750 + }, + { + "epoch": 7.940367942482554, + "grad_norm": 2.3065927028656006, + "learning_rate": 6.328205128205129e-07, + "loss": 0.0312, + "step": 18775 + }, + { + "epoch": 7.950941002326073, + "grad_norm": 1.5412800312042236, + "learning_rate": 6.200000000000001e-07, + "loss": 0.0196, + "step": 18800 + }, + { + "epoch": 7.9615140621695915, + "grad_norm": 1.0917569398880005, + "learning_rate": 6.071794871794872e-07, + "loss": 0.0197, + "step": 18825 + }, + { + "epoch": 7.972087122013111, + "grad_norm": 4.866013526916504, + "learning_rate": 5.943589743589744e-07, + "loss": 0.0253, + "step": 18850 + }, + { + "epoch": 7.982660181856629, + "grad_norm": 1.2132177352905273, + "learning_rate": 5.815384615384616e-07, + "loss": 0.0235, + "step": 18875 + }, + { + "epoch": 7.993233241700148, + "grad_norm": 1.900343894958496, + "learning_rate": 5.687179487179488e-07, + "loss": 0.0264, + "step": 18900 + }, + { + "epoch": 8.003806301543667, + "grad_norm": 2.312898874282837, + "learning_rate": 5.558974358974359e-07, + "loss": 0.0239, + "step": 18925 + }, + { + "epoch": 8.014379361387185, + "grad_norm": 1.2997639179229736, + "learning_rate": 5.430769230769232e-07, + "loss": 0.0164, + "step": 18950 + }, + { + "epoch": 8.024952421230704, + "grad_norm": 1.6723324060440063, + "learning_rate": 5.302564102564103e-07, + "loss": 0.0151, + "step": 18975 + }, + { + "epoch": 8.035525481074222, + "grad_norm": 1.057395339012146, + "learning_rate": 5.174358974358974e-07, + "loss": 0.0192, + "step": 19000 + }, + { + "epoch": 8.035525481074222, + "eval_loss": 0.19927473366260529, + "eval_runtime": 441.263, + "eval_samples_per_second": 8.251, + "eval_steps_per_second": 1.033, + "eval_wer": 0.1806116293099448, + "step": 19000 + }, + { + "epoch": 8.046098540917741, + "grad_norm": 1.4631696939468384, + "learning_rate": 5.046153846153847e-07, + "loss": 0.0164, + "step": 19025 + }, + { + "epoch": 8.05667160076126, + "grad_norm": 1.539763331413269, + "learning_rate": 4.917948717948718e-07, + "loss": 0.0242, + "step": 19050 + }, + { + "epoch": 8.067244660604779, + "grad_norm": 0.7737773060798645, + "learning_rate": 4.78974358974359e-07, + "loss": 0.0147, + "step": 19075 + }, + { + "epoch": 8.077817720448298, + "grad_norm": 1.1692899465560913, + "learning_rate": 4.661538461538462e-07, + "loss": 0.0266, + "step": 19100 + }, + { + "epoch": 8.088390780291816, + "grad_norm": 0.7815419435501099, + "learning_rate": 4.533333333333334e-07, + "loss": 0.0234, + "step": 19125 + }, + { + "epoch": 8.098963840135335, + "grad_norm": 1.5961151123046875, + "learning_rate": 4.4051282051282056e-07, + "loss": 0.0209, + "step": 19150 + }, + { + "epoch": 8.109536899978854, + "grad_norm": 2.6668717861175537, + "learning_rate": 4.276923076923077e-07, + "loss": 0.02, + "step": 19175 + }, + { + "epoch": 8.120109959822372, + "grad_norm": 2.6953537464141846, + "learning_rate": 4.1487179487179495e-07, + "loss": 0.0227, + "step": 19200 + }, + { + "epoch": 8.130683019665891, + "grad_norm": 1.7729073762893677, + "learning_rate": 4.025641025641026e-07, + "loss": 0.0262, + "step": 19225 + }, + { + "epoch": 8.14125607950941, + "grad_norm": 1.3672327995300293, + "learning_rate": 3.897435897435898e-07, + "loss": 0.0168, + "step": 19250 + }, + { + "epoch": 8.151829139352929, + "grad_norm": 3.492060422897339, + "learning_rate": 3.769230769230769e-07, + "loss": 0.0193, + "step": 19275 + }, + { + "epoch": 8.162402199196448, + "grad_norm": 3.2767393589019775, + "learning_rate": 3.641025641025641e-07, + "loss": 0.0197, + "step": 19300 + }, + { + "epoch": 8.172975259039966, + "grad_norm": 1.0066640377044678, + "learning_rate": 3.512820512820513e-07, + "loss": 0.0222, + "step": 19325 + }, + { + "epoch": 8.183548318883485, + "grad_norm": 2.359158515930176, + "learning_rate": 3.3846153846153845e-07, + "loss": 0.0171, + "step": 19350 + }, + { + "epoch": 8.194121378727004, + "grad_norm": 1.6191521883010864, + "learning_rate": 3.2564102564102565e-07, + "loss": 0.0192, + "step": 19375 + }, + { + "epoch": 8.204694438570522, + "grad_norm": 3.286207914352417, + "learning_rate": 3.1282051282051284e-07, + "loss": 0.0171, + "step": 19400 + }, + { + "epoch": 8.215267498414041, + "grad_norm": 4.682559013366699, + "learning_rate": 3.0000000000000004e-07, + "loss": 0.0291, + "step": 19425 + }, + { + "epoch": 8.225840558257559, + "grad_norm": 5.000258445739746, + "learning_rate": 2.871794871794872e-07, + "loss": 0.0287, + "step": 19450 + }, + { + "epoch": 8.236413618101079, + "grad_norm": 1.9743106365203857, + "learning_rate": 2.743589743589744e-07, + "loss": 0.0185, + "step": 19475 + }, + { + "epoch": 8.246986677944598, + "grad_norm": 1.1319037675857544, + "learning_rate": 2.6153846153846157e-07, + "loss": 0.0156, + "step": 19500 + }, + { + "epoch": 8.257559737788116, + "grad_norm": 4.324277400970459, + "learning_rate": 2.487179487179487e-07, + "loss": 0.021, + "step": 19525 + }, + { + "epoch": 8.268132797631635, + "grad_norm": 3.4904730319976807, + "learning_rate": 2.3589743589743593e-07, + "loss": 0.0195, + "step": 19550 + }, + { + "epoch": 8.278705857475153, + "grad_norm": 1.5097222328186035, + "learning_rate": 2.2307692307692308e-07, + "loss": 0.0181, + "step": 19575 + }, + { + "epoch": 8.289278917318672, + "grad_norm": 2.1649582386016846, + "learning_rate": 2.1025641025641027e-07, + "loss": 0.0198, + "step": 19600 + }, + { + "epoch": 8.299851977162191, + "grad_norm": 3.739485740661621, + "learning_rate": 1.9743589743589747e-07, + "loss": 0.021, + "step": 19625 + }, + { + "epoch": 8.31042503700571, + "grad_norm": 4.509456634521484, + "learning_rate": 1.8461538461538464e-07, + "loss": 0.0364, + "step": 19650 + }, + { + "epoch": 8.320998096849229, + "grad_norm": 1.5747437477111816, + "learning_rate": 1.717948717948718e-07, + "loss": 0.0283, + "step": 19675 + }, + { + "epoch": 8.331571156692746, + "grad_norm": 2.8335936069488525, + "learning_rate": 1.58974358974359e-07, + "loss": 0.0273, + "step": 19700 + }, + { + "epoch": 8.342144216536266, + "grad_norm": 1.661657452583313, + "learning_rate": 1.4615384615384617e-07, + "loss": 0.0283, + "step": 19725 + }, + { + "epoch": 8.352717276379785, + "grad_norm": 2.5407464504241943, + "learning_rate": 1.3333333333333336e-07, + "loss": 0.0225, + "step": 19750 + }, + { + "epoch": 8.363290336223303, + "grad_norm": 1.7496103048324585, + "learning_rate": 1.2051282051282053e-07, + "loss": 0.02, + "step": 19775 + }, + { + "epoch": 8.373863396066822, + "grad_norm": 1.1357078552246094, + "learning_rate": 1.076923076923077e-07, + "loss": 0.0159, + "step": 19800 + }, + { + "epoch": 8.38443645591034, + "grad_norm": 1.138163685798645, + "learning_rate": 9.487179487179488e-08, + "loss": 0.0219, + "step": 19825 + }, + { + "epoch": 8.39500951575386, + "grad_norm": 1.9725435972213745, + "learning_rate": 8.205128205128206e-08, + "loss": 0.0189, + "step": 19850 + }, + { + "epoch": 8.405582575597379, + "grad_norm": 1.0541646480560303, + "learning_rate": 6.923076923076924e-08, + "loss": 0.0292, + "step": 19875 + }, + { + "epoch": 8.416155635440896, + "grad_norm": 3.125678062438965, + "learning_rate": 5.641025641025642e-08, + "loss": 0.0196, + "step": 19900 + }, + { + "epoch": 8.426728695284416, + "grad_norm": 1.1036850214004517, + "learning_rate": 4.358974358974359e-08, + "loss": 0.0163, + "step": 19925 + }, + { + "epoch": 8.437301755127933, + "grad_norm": 3.5145390033721924, + "learning_rate": 3.076923076923077e-08, + "loss": 0.0224, + "step": 19950 + }, + { + "epoch": 8.447874814971453, + "grad_norm": 4.518385410308838, + "learning_rate": 1.794871794871795e-08, + "loss": 0.0223, + "step": 19975 + }, + { + "epoch": 8.458447874814972, + "grad_norm": 1.1149485111236572, + "learning_rate": 5.128205128205129e-09, + "loss": 0.0158, + "step": 20000 + }, + { + "epoch": 8.458447874814972, + "eval_loss": 0.20003820955753326, + "eval_runtime": 439.4967, + "eval_samples_per_second": 8.284, + "eval_steps_per_second": 1.038, + "eval_wer": 0.1807044410413476, + "step": 20000 + }, + { + "epoch": 8.458447874814972, + "step": 20000, + "total_flos": 7.8770584829952e+18, + "train_loss": 0.15760719767808914, + "train_runtime": 46114.6561, + "train_samples_per_second": 6.939, + "train_steps_per_second": 0.434 + } + ], + "logging_steps": 25, + "max_steps": 20000, + "num_input_tokens_seen": 0, + "num_train_epochs": 9, + "save_steps": 1000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 7.8770584829952e+18, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}