{ "best_metric": 23.738796414852754, "best_model_checkpoint": "/app/data/hieunq/whisper_asr_train/whisper-large-chinese-pipyin/checkpoint-4410", "epoch": 5.0, "eval_steps": 500, "global_step": 4410, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 6.144965986394558e-06, "loss": 2.1824, "step": 25 }, { "epoch": 0.06, "learning_rate": 6.109931972789116e-06, "loss": 0.8831, "step": 50 }, { "epoch": 0.09, "learning_rate": 6.074897959183673e-06, "loss": 0.6375, "step": 75 }, { "epoch": 0.11, "learning_rate": 6.039863945578232e-06, "loss": 0.5378, "step": 100 }, { "epoch": 0.14, "learning_rate": 6.004829931972789e-06, "loss": 0.4835, "step": 125 }, { "epoch": 0.17, "learning_rate": 5.969795918367347e-06, "loss": 0.5321, "step": 150 }, { "epoch": 0.2, "learning_rate": 5.934761904761905e-06, "loss": 0.5099, "step": 175 }, { "epoch": 0.23, "learning_rate": 5.899727891156462e-06, "loss": 0.5304, "step": 200 }, { "epoch": 0.26, "learning_rate": 5.864693877551021e-06, "loss": 0.4958, "step": 225 }, { "epoch": 0.28, "learning_rate": 5.8296598639455785e-06, "loss": 0.4695, "step": 250 }, { "epoch": 0.31, "learning_rate": 5.794625850340136e-06, "loss": 0.4351, "step": 275 }, { "epoch": 0.34, "learning_rate": 5.759591836734694e-06, "loss": 0.4394, "step": 300 }, { "epoch": 0.37, "learning_rate": 5.7245578231292515e-06, "loss": 0.414, "step": 325 }, { "epoch": 0.4, "learning_rate": 5.689523809523809e-06, "loss": 0.4521, "step": 350 }, { "epoch": 0.43, "learning_rate": 5.654489795918368e-06, "loss": 0.4334, "step": 375 }, { "epoch": 0.45, "learning_rate": 5.619455782312925e-06, "loss": 0.4217, "step": 400 }, { "epoch": 0.48, "learning_rate": 5.584421768707483e-06, "loss": 0.4108, "step": 425 }, { "epoch": 0.51, "learning_rate": 5.549387755102041e-06, "loss": 0.4005, "step": 450 }, { "epoch": 0.54, "learning_rate": 5.514353741496598e-06, "loss": 0.4097, "step": 475 }, { "epoch": 0.57, "learning_rate": 5.479319727891157e-06, "loss": 0.3855, "step": 500 }, { "epoch": 0.6, "learning_rate": 5.4442857142857146e-06, "loss": 0.4167, "step": 525 }, { "epoch": 0.62, "learning_rate": 5.409251700680272e-06, "loss": 0.3667, "step": 550 }, { "epoch": 0.65, "learning_rate": 5.37421768707483e-06, "loss": 0.4307, "step": 575 }, { "epoch": 0.68, "learning_rate": 5.3391836734693876e-06, "loss": 0.3693, "step": 600 }, { "epoch": 0.71, "learning_rate": 5.304149659863946e-06, "loss": 0.3089, "step": 625 }, { "epoch": 0.74, "learning_rate": 5.269115646258504e-06, "loss": 0.3185, "step": 650 }, { "epoch": 0.77, "learning_rate": 5.234081632653061e-06, "loss": 0.3783, "step": 675 }, { "epoch": 0.79, "learning_rate": 5.199047619047619e-06, "loss": 0.3681, "step": 700 }, { "epoch": 0.82, "learning_rate": 5.164013605442177e-06, "loss": 0.4317, "step": 725 }, { "epoch": 0.85, "learning_rate": 5.128979591836735e-06, "loss": 0.3815, "step": 750 }, { "epoch": 0.88, "learning_rate": 5.093945578231293e-06, "loss": 0.3618, "step": 775 }, { "epoch": 0.91, "learning_rate": 5.05891156462585e-06, "loss": 0.3376, "step": 800 }, { "epoch": 0.94, "learning_rate": 5.023877551020408e-06, "loss": 0.3485, "step": 825 }, { "epoch": 0.96, "learning_rate": 4.988843537414966e-06, "loss": 0.3397, "step": 850 }, { "epoch": 0.99, "learning_rate": 4.9538095238095245e-06, "loss": 0.3836, "step": 875 }, { "epoch": 1.0, "eval_loss": 0.342790424823761, "eval_runtime": 1384.9795, "eval_samples_per_second": 1.274, "eval_steps_per_second": 0.16, "eval_wer": 27.76781903542467, "step": 882 }, { "epoch": 1.02, "learning_rate": 4.918775510204081e-06, "loss": 0.2476, "step": 900 }, { "epoch": 1.05, "learning_rate": 4.883741496598639e-06, "loss": 0.1817, "step": 925 }, { "epoch": 1.08, "learning_rate": 4.8487074829931975e-06, "loss": 0.2218, "step": 950 }, { "epoch": 1.11, "learning_rate": 4.813673469387755e-06, "loss": 0.1971, "step": 975 }, { "epoch": 1.13, "learning_rate": 4.778639455782314e-06, "loss": 0.2224, "step": 1000 }, { "epoch": 1.16, "learning_rate": 4.7436054421768705e-06, "loss": 0.1781, "step": 1025 }, { "epoch": 1.19, "learning_rate": 4.708571428571428e-06, "loss": 0.2359, "step": 1050 }, { "epoch": 1.22, "learning_rate": 4.673537414965987e-06, "loss": 0.1955, "step": 1075 }, { "epoch": 1.25, "learning_rate": 4.638503401360544e-06, "loss": 0.2005, "step": 1100 }, { "epoch": 1.28, "learning_rate": 4.603469387755103e-06, "loss": 0.2131, "step": 1125 }, { "epoch": 1.3, "learning_rate": 4.56843537414966e-06, "loss": 0.179, "step": 1150 }, { "epoch": 1.33, "learning_rate": 4.533401360544217e-06, "loss": 0.2156, "step": 1175 }, { "epoch": 1.36, "learning_rate": 4.498367346938776e-06, "loss": 0.186, "step": 1200 }, { "epoch": 1.39, "learning_rate": 4.4633333333333335e-06, "loss": 0.2004, "step": 1225 }, { "epoch": 1.42, "learning_rate": 4.428299319727891e-06, "loss": 0.2206, "step": 1250 }, { "epoch": 1.45, "learning_rate": 4.393265306122449e-06, "loss": 0.2029, "step": 1275 }, { "epoch": 1.47, "learning_rate": 4.3582312925170065e-06, "loss": 0.1952, "step": 1300 }, { "epoch": 1.5, "learning_rate": 4.323197278911565e-06, "loss": 0.2412, "step": 1325 }, { "epoch": 1.53, "learning_rate": 4.288163265306123e-06, "loss": 0.2011, "step": 1350 }, { "epoch": 1.56, "learning_rate": 4.25312925170068e-06, "loss": 0.1771, "step": 1375 }, { "epoch": 1.59, "learning_rate": 4.218095238095238e-06, "loss": 0.2092, "step": 1400 }, { "epoch": 1.62, "learning_rate": 4.183061224489796e-06, "loss": 0.1916, "step": 1425 }, { "epoch": 1.64, "learning_rate": 4.148027210884354e-06, "loss": 0.191, "step": 1450 }, { "epoch": 1.67, "learning_rate": 4.112993197278912e-06, "loss": 0.1907, "step": 1475 }, { "epoch": 1.7, "learning_rate": 4.0779591836734696e-06, "loss": 0.1984, "step": 1500 }, { "epoch": 1.73, "learning_rate": 4.042925170068027e-06, "loss": 0.1986, "step": 1525 }, { "epoch": 1.76, "learning_rate": 4.007891156462585e-06, "loss": 0.196, "step": 1550 }, { "epoch": 1.79, "learning_rate": 3.972857142857143e-06, "loss": 0.1685, "step": 1575 }, { "epoch": 1.81, "learning_rate": 3.937823129251701e-06, "loss": 0.2104, "step": 1600 }, { "epoch": 1.84, "learning_rate": 3.902789115646259e-06, "loss": 0.2079, "step": 1625 }, { "epoch": 1.87, "learning_rate": 3.8677551020408164e-06, "loss": 0.2119, "step": 1650 }, { "epoch": 1.9, "learning_rate": 3.832721088435374e-06, "loss": 0.2, "step": 1675 }, { "epoch": 1.93, "learning_rate": 3.797687074829932e-06, "loss": 0.1765, "step": 1700 }, { "epoch": 1.96, "learning_rate": 3.76265306122449e-06, "loss": 0.2026, "step": 1725 }, { "epoch": 1.98, "learning_rate": 3.7276190476190475e-06, "loss": 0.1914, "step": 1750 }, { "epoch": 2.0, "eval_loss": 0.32358798384666443, "eval_runtime": 1373.0201, "eval_samples_per_second": 1.285, "eval_steps_per_second": 0.161, "eval_wer": 25.761843790012804, "step": 1764 }, { "epoch": 2.01, "learning_rate": 3.6925850340136056e-06, "loss": 0.1853, "step": 1775 }, { "epoch": 2.04, "learning_rate": 3.6575510204081633e-06, "loss": 0.104, "step": 1800 }, { "epoch": 2.07, "learning_rate": 3.622517006802721e-06, "loss": 0.1111, "step": 1825 }, { "epoch": 2.1, "learning_rate": 3.587482993197279e-06, "loss": 0.1044, "step": 1850 }, { "epoch": 2.13, "learning_rate": 3.5524489795918367e-06, "loss": 0.0938, "step": 1875 }, { "epoch": 2.15, "learning_rate": 3.517414965986395e-06, "loss": 0.1344, "step": 1900 }, { "epoch": 2.18, "learning_rate": 3.4823809523809525e-06, "loss": 0.1153, "step": 1925 }, { "epoch": 2.21, "learning_rate": 3.44734693877551e-06, "loss": 0.0989, "step": 1950 }, { "epoch": 2.24, "learning_rate": 3.4123129251700682e-06, "loss": 0.1086, "step": 1975 }, { "epoch": 2.27, "learning_rate": 3.377278911564626e-06, "loss": 0.1255, "step": 2000 }, { "epoch": 2.3, "learning_rate": 3.342244897959184e-06, "loss": 0.1093, "step": 2025 }, { "epoch": 2.32, "learning_rate": 3.3072108843537417e-06, "loss": 0.0984, "step": 2050 }, { "epoch": 2.35, "learning_rate": 3.2721768707482993e-06, "loss": 0.0984, "step": 2075 }, { "epoch": 2.38, "learning_rate": 3.2371428571428574e-06, "loss": 0.1139, "step": 2100 }, { "epoch": 2.41, "learning_rate": 3.202108843537415e-06, "loss": 0.1043, "step": 2125 }, { "epoch": 2.44, "learning_rate": 3.167074829931973e-06, "loss": 0.0983, "step": 2150 }, { "epoch": 2.47, "learning_rate": 3.132040816326531e-06, "loss": 0.1194, "step": 2175 }, { "epoch": 2.49, "learning_rate": 3.097006802721088e-06, "loss": 0.0958, "step": 2200 }, { "epoch": 2.52, "learning_rate": 3.061972789115646e-06, "loss": 0.0857, "step": 2225 }, { "epoch": 2.55, "learning_rate": 3.026938775510204e-06, "loss": 0.1022, "step": 2250 }, { "epoch": 2.58, "learning_rate": 2.991904761904762e-06, "loss": 0.1118, "step": 2275 }, { "epoch": 2.61, "learning_rate": 2.95687074829932e-06, "loss": 0.092, "step": 2300 }, { "epoch": 2.64, "learning_rate": 2.9218367346938777e-06, "loss": 0.1063, "step": 2325 }, { "epoch": 2.66, "learning_rate": 2.8868027210884354e-06, "loss": 0.1028, "step": 2350 }, { "epoch": 2.69, "learning_rate": 2.851768707482993e-06, "loss": 0.1077, "step": 2375 }, { "epoch": 2.72, "learning_rate": 2.816734693877551e-06, "loss": 0.0974, "step": 2400 }, { "epoch": 2.75, "learning_rate": 2.781700680272109e-06, "loss": 0.108, "step": 2425 }, { "epoch": 2.78, "learning_rate": 2.7466666666666665e-06, "loss": 0.0852, "step": 2450 }, { "epoch": 2.81, "learning_rate": 2.7116326530612246e-06, "loss": 0.1009, "step": 2475 }, { "epoch": 2.83, "learning_rate": 2.6765986394557822e-06, "loss": 0.1127, "step": 2500 }, { "epoch": 2.86, "learning_rate": 2.6415646258503403e-06, "loss": 0.1072, "step": 2525 }, { "epoch": 2.89, "learning_rate": 2.606530612244898e-06, "loss": 0.0893, "step": 2550 }, { "epoch": 2.92, "learning_rate": 2.5714965986394557e-06, "loss": 0.0945, "step": 2575 }, { "epoch": 2.95, "learning_rate": 2.5364625850340138e-06, "loss": 0.1149, "step": 2600 }, { "epoch": 2.98, "learning_rate": 2.5014285714285714e-06, "loss": 0.0844, "step": 2625 }, { "epoch": 3.0, "eval_loss": 0.33547088503837585, "eval_runtime": 1375.3795, "eval_samples_per_second": 1.283, "eval_steps_per_second": 0.161, "eval_wer": 24.609475032010245, "step": 2646 }, { "epoch": 3.0, "learning_rate": 2.4663945578231295e-06, "loss": 0.0889, "step": 2650 }, { "epoch": 3.03, "learning_rate": 2.4313605442176868e-06, "loss": 0.0382, "step": 2675 }, { "epoch": 3.06, "learning_rate": 2.396326530612245e-06, "loss": 0.0627, "step": 2700 }, { "epoch": 3.09, "learning_rate": 2.361292517006803e-06, "loss": 0.0562, "step": 2725 }, { "epoch": 3.12, "learning_rate": 2.3262585034013606e-06, "loss": 0.049, "step": 2750 }, { "epoch": 3.15, "learning_rate": 2.2912244897959187e-06, "loss": 0.0432, "step": 2775 }, { "epoch": 3.17, "learning_rate": 2.256190476190476e-06, "loss": 0.0594, "step": 2800 }, { "epoch": 3.2, "learning_rate": 2.221156462585034e-06, "loss": 0.0415, "step": 2825 }, { "epoch": 3.23, "learning_rate": 2.1861224489795917e-06, "loss": 0.0451, "step": 2850 }, { "epoch": 3.26, "learning_rate": 2.15108843537415e-06, "loss": 0.045, "step": 2875 }, { "epoch": 3.29, "learning_rate": 2.1160544217687075e-06, "loss": 0.0503, "step": 2900 }, { "epoch": 3.32, "learning_rate": 2.081020408163265e-06, "loss": 0.0368, "step": 2925 }, { "epoch": 3.34, "learning_rate": 2.0459863945578232e-06, "loss": 0.0447, "step": 2950 }, { "epoch": 3.37, "learning_rate": 2.010952380952381e-06, "loss": 0.041, "step": 2975 }, { "epoch": 3.4, "learning_rate": 1.975918367346939e-06, "loss": 0.054, "step": 3000 }, { "epoch": 3.43, "learning_rate": 1.9408843537414967e-06, "loss": 0.0409, "step": 3025 }, { "epoch": 3.46, "learning_rate": 1.9058503401360543e-06, "loss": 0.0423, "step": 3050 }, { "epoch": 3.49, "learning_rate": 1.8708163265306122e-06, "loss": 0.0485, "step": 3075 }, { "epoch": 3.51, "learning_rate": 1.8357823129251701e-06, "loss": 0.048, "step": 3100 }, { "epoch": 3.54, "learning_rate": 1.8007482993197282e-06, "loss": 0.0509, "step": 3125 }, { "epoch": 3.57, "learning_rate": 1.7657142857142857e-06, "loss": 0.0525, "step": 3150 }, { "epoch": 3.6, "learning_rate": 1.7306802721088435e-06, "loss": 0.0539, "step": 3175 }, { "epoch": 3.63, "learning_rate": 1.6956462585034014e-06, "loss": 0.0417, "step": 3200 }, { "epoch": 3.66, "learning_rate": 1.6606122448979593e-06, "loss": 0.0415, "step": 3225 }, { "epoch": 3.68, "learning_rate": 1.625578231292517e-06, "loss": 0.0557, "step": 3250 }, { "epoch": 3.71, "learning_rate": 1.5905442176870748e-06, "loss": 0.0458, "step": 3275 }, { "epoch": 3.74, "learning_rate": 1.5555102040816327e-06, "loss": 0.0425, "step": 3300 }, { "epoch": 3.77, "learning_rate": 1.5204761904761904e-06, "loss": 0.0505, "step": 3325 }, { "epoch": 3.8, "learning_rate": 1.4854421768707483e-06, "loss": 0.0505, "step": 3350 }, { "epoch": 3.83, "learning_rate": 1.4504081632653062e-06, "loss": 0.0445, "step": 3375 }, { "epoch": 3.85, "learning_rate": 1.415374149659864e-06, "loss": 0.0462, "step": 3400 }, { "epoch": 3.88, "learning_rate": 1.380340136054422e-06, "loss": 0.0556, "step": 3425 }, { "epoch": 3.91, "learning_rate": 1.3453061224489796e-06, "loss": 0.0459, "step": 3450 }, { "epoch": 3.94, "learning_rate": 1.3102721088435375e-06, "loss": 0.0439, "step": 3475 }, { "epoch": 3.97, "learning_rate": 1.2752380952380951e-06, "loss": 0.0586, "step": 3500 }, { "epoch": 4.0, "learning_rate": 1.240204081632653e-06, "loss": 0.0418, "step": 3525 }, { "epoch": 4.0, "eval_loss": 0.37163934111595154, "eval_runtime": 1376.5549, "eval_samples_per_second": 1.281, "eval_steps_per_second": 0.161, "eval_wer": 23.781476739223216, "step": 3528 }, { "epoch": 4.02, "learning_rate": 1.205170068027211e-06, "loss": 0.0216, "step": 3550 }, { "epoch": 4.05, "learning_rate": 1.1701360544217688e-06, "loss": 0.0167, "step": 3575 }, { "epoch": 4.08, "learning_rate": 1.1351020408163267e-06, "loss": 0.0179, "step": 3600 }, { "epoch": 4.11, "learning_rate": 1.1000680272108843e-06, "loss": 0.016, "step": 3625 }, { "epoch": 4.14, "learning_rate": 1.0650340136054422e-06, "loss": 0.0157, "step": 3650 }, { "epoch": 4.17, "learning_rate": 1.0299999999999999e-06, "loss": 0.019, "step": 3675 }, { "epoch": 4.2, "learning_rate": 9.949659863945578e-07, "loss": 0.017, "step": 3700 }, { "epoch": 4.22, "learning_rate": 9.599319727891156e-07, "loss": 0.0193, "step": 3725 }, { "epoch": 4.25, "learning_rate": 9.248979591836735e-07, "loss": 0.0144, "step": 3750 }, { "epoch": 4.28, "learning_rate": 8.898639455782314e-07, "loss": 0.0161, "step": 3775 }, { "epoch": 4.31, "learning_rate": 8.548299319727891e-07, "loss": 0.018, "step": 3800 }, { "epoch": 4.34, "learning_rate": 8.197959183673471e-07, "loss": 0.0173, "step": 3825 }, { "epoch": 4.37, "learning_rate": 7.847619047619047e-07, "loss": 0.013, "step": 3850 }, { "epoch": 4.39, "learning_rate": 7.497278911564626e-07, "loss": 0.018, "step": 3875 }, { "epoch": 4.42, "learning_rate": 7.146938775510205e-07, "loss": 0.012, "step": 3900 }, { "epoch": 4.45, "learning_rate": 6.796598639455783e-07, "loss": 0.0224, "step": 3925 }, { "epoch": 4.48, "learning_rate": 6.44625850340136e-07, "loss": 0.0142, "step": 3950 }, { "epoch": 4.51, "learning_rate": 6.095918367346939e-07, "loss": 0.0167, "step": 3975 }, { "epoch": 4.54, "learning_rate": 5.745578231292517e-07, "loss": 0.0136, "step": 4000 }, { "epoch": 4.56, "learning_rate": 5.395238095238095e-07, "loss": 0.0184, "step": 4025 }, { "epoch": 4.59, "learning_rate": 5.044897959183673e-07, "loss": 0.0168, "step": 4050 }, { "epoch": 4.62, "learning_rate": 4.6945578231292523e-07, "loss": 0.0137, "step": 4075 }, { "epoch": 4.65, "learning_rate": 4.34421768707483e-07, "loss": 0.0133, "step": 4100 }, { "epoch": 4.68, "learning_rate": 3.9938775510204083e-07, "loss": 0.0141, "step": 4125 }, { "epoch": 4.71, "learning_rate": 3.6435374149659866e-07, "loss": 0.0125, "step": 4150 }, { "epoch": 4.73, "learning_rate": 3.293197278911565e-07, "loss": 0.0261, "step": 4175 }, { "epoch": 4.76, "learning_rate": 2.9428571428571426e-07, "loss": 0.0153, "step": 4200 }, { "epoch": 4.79, "learning_rate": 2.5925170068027214e-07, "loss": 0.0115, "step": 4225 }, { "epoch": 4.82, "learning_rate": 2.2421768707482994e-07, "loss": 0.0155, "step": 4250 }, { "epoch": 4.85, "learning_rate": 1.8918367346938774e-07, "loss": 0.0126, "step": 4275 }, { "epoch": 4.88, "learning_rate": 1.541496598639456e-07, "loss": 0.0146, "step": 4300 }, { "epoch": 4.9, "learning_rate": 1.191156462585034e-07, "loss": 0.0199, "step": 4325 }, { "epoch": 4.93, "learning_rate": 8.408163265306122e-08, "loss": 0.0149, "step": 4350 }, { "epoch": 4.96, "learning_rate": 4.9047619047619045e-08, "loss": 0.0104, "step": 4375 }, { "epoch": 4.99, "learning_rate": 1.4013605442176871e-08, "loss": 0.0121, "step": 4400 }, { "epoch": 5.0, "eval_loss": 0.4422626793384552, "eval_runtime": 1364.6517, "eval_samples_per_second": 1.293, "eval_steps_per_second": 0.162, "eval_wer": 23.738796414852754, "step": 4410 } ], "logging_steps": 25, "max_steps": 4410, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 7.4863010193408e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }