{ "best_metric": 56.830823497490165, "best_model_checkpoint": "/app/data/hieunq/whisper_asr_train/whisper-large-chinese/checkpoint-7322", "epoch": 2.0, "eval_steps": 500, "global_step": 7322, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 6.1715596831466814e-06, "loss": 1.2908, "step": 25 }, { "epoch": 0.01, "learning_rate": 6.163119366293362e-06, "loss": 0.4083, "step": 50 }, { "epoch": 0.02, "learning_rate": 6.154679049440044e-06, "loss": 0.374, "step": 75 }, { "epoch": 0.03, "learning_rate": 6.146238732586725e-06, "loss": 0.3357, "step": 100 }, { "epoch": 0.03, "learning_rate": 6.137798415733407e-06, "loss": 0.3155, "step": 125 }, { "epoch": 0.04, "learning_rate": 6.129358098880087e-06, "loss": 0.3666, "step": 150 }, { "epoch": 0.05, "learning_rate": 6.120917782026768e-06, "loss": 0.3536, "step": 175 }, { "epoch": 0.05, "learning_rate": 6.11247746517345e-06, "loss": 0.3199, "step": 200 }, { "epoch": 0.06, "learning_rate": 6.104037148320131e-06, "loss": 0.319, "step": 225 }, { "epoch": 0.07, "learning_rate": 6.095596831466812e-06, "loss": 0.3515, "step": 250 }, { "epoch": 0.08, "learning_rate": 6.087156514613494e-06, "loss": 0.307, "step": 275 }, { "epoch": 0.08, "learning_rate": 6.078716197760175e-06, "loss": 0.3225, "step": 300 }, { "epoch": 0.09, "learning_rate": 6.070275880906856e-06, "loss": 0.3027, "step": 325 }, { "epoch": 0.1, "learning_rate": 6.061835564053537e-06, "loss": 0.3104, "step": 350 }, { "epoch": 0.1, "learning_rate": 6.053395247200219e-06, "loss": 0.2731, "step": 375 }, { "epoch": 0.11, "learning_rate": 6.0449549303469e-06, "loss": 0.2996, "step": 400 }, { "epoch": 0.12, "learning_rate": 6.0365146134935814e-06, "loss": 0.3431, "step": 425 }, { "epoch": 0.12, "learning_rate": 6.028074296640262e-06, "loss": 0.3011, "step": 450 }, { "epoch": 0.13, "learning_rate": 6.019633979786943e-06, "loss": 0.3444, "step": 475 }, { "epoch": 0.14, "learning_rate": 6.0111936629336245e-06, "loss": 0.3152, "step": 500 }, { "epoch": 0.14, "learning_rate": 6.002753346080307e-06, "loss": 0.3099, "step": 525 }, { "epoch": 0.15, "learning_rate": 5.994313029226987e-06, "loss": 0.2888, "step": 550 }, { "epoch": 0.16, "learning_rate": 5.9858727123736684e-06, "loss": 0.3475, "step": 575 }, { "epoch": 0.16, "learning_rate": 5.97743239552035e-06, "loss": 0.3177, "step": 600 }, { "epoch": 0.17, "learning_rate": 5.968992078667031e-06, "loss": 0.3131, "step": 625 }, { "epoch": 0.18, "learning_rate": 5.9605517618137115e-06, "loss": 0.285, "step": 650 }, { "epoch": 0.18, "learning_rate": 5.952111444960394e-06, "loss": 0.2687, "step": 675 }, { "epoch": 0.19, "learning_rate": 5.943671128107075e-06, "loss": 0.2852, "step": 700 }, { "epoch": 0.2, "learning_rate": 5.935230811253756e-06, "loss": 0.2886, "step": 725 }, { "epoch": 0.2, "learning_rate": 5.926790494400437e-06, "loss": 0.2863, "step": 750 }, { "epoch": 0.21, "learning_rate": 5.918350177547118e-06, "loss": 0.3231, "step": 775 }, { "epoch": 0.22, "learning_rate": 5.909909860693799e-06, "loss": 0.2638, "step": 800 }, { "epoch": 0.23, "learning_rate": 5.9014695438404815e-06, "loss": 0.2892, "step": 825 }, { "epoch": 0.23, "learning_rate": 5.893029226987162e-06, "loss": 0.2969, "step": 850 }, { "epoch": 0.24, "learning_rate": 5.884588910133843e-06, "loss": 0.3163, "step": 875 }, { "epoch": 0.25, "learning_rate": 5.8761485932805245e-06, "loss": 0.2862, "step": 900 }, { "epoch": 0.25, "learning_rate": 5.867708276427206e-06, "loss": 0.2791, "step": 925 }, { "epoch": 0.26, "learning_rate": 5.859267959573887e-06, "loss": 0.2662, "step": 950 }, { "epoch": 0.27, "learning_rate": 5.8508276427205685e-06, "loss": 0.3065, "step": 975 }, { "epoch": 0.27, "learning_rate": 5.84238732586725e-06, "loss": 0.2954, "step": 1000 }, { "epoch": 0.28, "learning_rate": 5.833947009013931e-06, "loss": 0.3087, "step": 1025 }, { "epoch": 0.29, "learning_rate": 5.8255066921606115e-06, "loss": 0.2659, "step": 1050 }, { "epoch": 0.29, "learning_rate": 5.817066375307293e-06, "loss": 0.2946, "step": 1075 }, { "epoch": 0.3, "learning_rate": 5.808626058453974e-06, "loss": 0.2661, "step": 1100 }, { "epoch": 0.31, "learning_rate": 5.800185741600656e-06, "loss": 0.2881, "step": 1125 }, { "epoch": 0.31, "learning_rate": 5.791745424747337e-06, "loss": 0.2819, "step": 1150 }, { "epoch": 0.32, "learning_rate": 5.783305107894018e-06, "loss": 0.32, "step": 1175 }, { "epoch": 0.33, "learning_rate": 5.774864791040699e-06, "loss": 0.3108, "step": 1200 }, { "epoch": 0.33, "learning_rate": 5.766424474187381e-06, "loss": 0.2866, "step": 1225 }, { "epoch": 0.34, "learning_rate": 5.757984157334062e-06, "loss": 0.3029, "step": 1250 }, { "epoch": 0.35, "learning_rate": 5.749543840480743e-06, "loss": 0.2577, "step": 1275 }, { "epoch": 0.36, "learning_rate": 5.7411035236274246e-06, "loss": 0.2835, "step": 1300 }, { "epoch": 0.36, "learning_rate": 5.732663206774106e-06, "loss": 0.2623, "step": 1325 }, { "epoch": 0.37, "learning_rate": 5.724222889920786e-06, "loss": 0.3214, "step": 1350 }, { "epoch": 0.38, "learning_rate": 5.715782573067468e-06, "loss": 0.2773, "step": 1375 }, { "epoch": 0.38, "learning_rate": 5.707342256214149e-06, "loss": 0.2758, "step": 1400 }, { "epoch": 0.39, "learning_rate": 5.698901939360831e-06, "loss": 0.2813, "step": 1425 }, { "epoch": 0.4, "learning_rate": 5.6904616225075115e-06, "loss": 0.2874, "step": 1450 }, { "epoch": 0.4, "learning_rate": 5.682021305654193e-06, "loss": 0.261, "step": 1475 }, { "epoch": 0.41, "learning_rate": 5.673580988800874e-06, "loss": 0.2798, "step": 1500 }, { "epoch": 0.42, "learning_rate": 5.6651406719475555e-06, "loss": 0.2965, "step": 1525 }, { "epoch": 0.42, "learning_rate": 5.656700355094237e-06, "loss": 0.2431, "step": 1550 }, { "epoch": 0.43, "learning_rate": 5.648260038240918e-06, "loss": 0.2916, "step": 1575 }, { "epoch": 0.44, "learning_rate": 5.639819721387599e-06, "loss": 0.3695, "step": 1600 }, { "epoch": 0.44, "learning_rate": 5.631379404534281e-06, "loss": 0.2764, "step": 1625 }, { "epoch": 0.45, "learning_rate": 5.622939087680961e-06, "loss": 0.2653, "step": 1650 }, { "epoch": 0.46, "learning_rate": 5.6144987708276424e-06, "loss": 0.2866, "step": 1675 }, { "epoch": 0.46, "learning_rate": 5.606058453974325e-06, "loss": 0.3056, "step": 1700 }, { "epoch": 0.47, "learning_rate": 5.597618137121006e-06, "loss": 0.238, "step": 1725 }, { "epoch": 0.48, "learning_rate": 5.589177820267686e-06, "loss": 0.2842, "step": 1750 }, { "epoch": 0.48, "learning_rate": 5.580737503414368e-06, "loss": 0.2917, "step": 1775 }, { "epoch": 0.49, "learning_rate": 5.572297186561049e-06, "loss": 0.2697, "step": 1800 }, { "epoch": 0.5, "learning_rate": 5.56385686970773e-06, "loss": 0.2741, "step": 1825 }, { "epoch": 0.51, "learning_rate": 5.5554165528544116e-06, "loss": 0.2716, "step": 1850 }, { "epoch": 0.51, "learning_rate": 5.546976236001093e-06, "loss": 0.2946, "step": 1875 }, { "epoch": 0.52, "learning_rate": 5.538535919147774e-06, "loss": 0.3018, "step": 1900 }, { "epoch": 0.53, "learning_rate": 5.5300956022944555e-06, "loss": 0.2724, "step": 1925 }, { "epoch": 0.53, "learning_rate": 5.521655285441136e-06, "loss": 0.266, "step": 1950 }, { "epoch": 0.54, "learning_rate": 5.513214968587817e-06, "loss": 0.279, "step": 1975 }, { "epoch": 0.55, "learning_rate": 5.504774651734499e-06, "loss": 0.2936, "step": 2000 }, { "epoch": 0.55, "learning_rate": 5.496334334881181e-06, "loss": 0.3085, "step": 2025 }, { "epoch": 0.56, "learning_rate": 5.487894018027861e-06, "loss": 0.2742, "step": 2050 }, { "epoch": 0.57, "learning_rate": 5.4794537011745425e-06, "loss": 0.2908, "step": 2075 }, { "epoch": 0.57, "learning_rate": 5.471013384321224e-06, "loss": 0.2731, "step": 2100 }, { "epoch": 0.58, "learning_rate": 5.462573067467905e-06, "loss": 0.2506, "step": 2125 }, { "epoch": 0.59, "learning_rate": 5.454132750614586e-06, "loss": 0.3134, "step": 2150 }, { "epoch": 0.59, "learning_rate": 5.445692433761268e-06, "loss": 0.3139, "step": 2175 }, { "epoch": 0.6, "learning_rate": 5.437252116907949e-06, "loss": 0.2645, "step": 2200 }, { "epoch": 0.61, "learning_rate": 5.42881180005463e-06, "loss": 0.2586, "step": 2225 }, { "epoch": 0.61, "learning_rate": 5.420371483201311e-06, "loss": 0.271, "step": 2250 }, { "epoch": 0.62, "learning_rate": 5.411931166347992e-06, "loss": 0.2609, "step": 2275 }, { "epoch": 0.63, "learning_rate": 5.403490849494674e-06, "loss": 0.2556, "step": 2300 }, { "epoch": 0.64, "learning_rate": 5.3950505326413555e-06, "loss": 0.2484, "step": 2325 }, { "epoch": 0.64, "learning_rate": 5.386610215788036e-06, "loss": 0.2852, "step": 2350 }, { "epoch": 0.65, "learning_rate": 5.378169898934717e-06, "loss": 0.316, "step": 2375 }, { "epoch": 0.66, "learning_rate": 5.3697295820813986e-06, "loss": 0.2798, "step": 2400 }, { "epoch": 0.66, "learning_rate": 5.36128926522808e-06, "loss": 0.2677, "step": 2425 }, { "epoch": 0.67, "learning_rate": 5.352848948374761e-06, "loss": 0.2962, "step": 2450 }, { "epoch": 0.68, "learning_rate": 5.3444086315214425e-06, "loss": 0.2392, "step": 2475 }, { "epoch": 0.68, "learning_rate": 5.335968314668124e-06, "loss": 0.2874, "step": 2500 }, { "epoch": 0.69, "learning_rate": 5.327527997814805e-06, "loss": 0.2796, "step": 2525 }, { "epoch": 0.7, "learning_rate": 5.3190876809614855e-06, "loss": 0.2947, "step": 2550 }, { "epoch": 0.7, "learning_rate": 5.310647364108167e-06, "loss": 0.2904, "step": 2575 }, { "epoch": 0.71, "learning_rate": 5.302207047254849e-06, "loss": 0.2777, "step": 2600 }, { "epoch": 0.72, "learning_rate": 5.29376673040153e-06, "loss": 0.2468, "step": 2625 }, { "epoch": 0.72, "learning_rate": 5.285326413548211e-06, "loss": 0.2745, "step": 2650 }, { "epoch": 0.73, "learning_rate": 5.276886096694892e-06, "loss": 0.2793, "step": 2675 }, { "epoch": 0.74, "learning_rate": 5.268445779841573e-06, "loss": 0.2873, "step": 2700 }, { "epoch": 0.74, "learning_rate": 5.260005462988255e-06, "loss": 0.2586, "step": 2725 }, { "epoch": 0.75, "learning_rate": 5.251565146134936e-06, "loss": 0.2637, "step": 2750 }, { "epoch": 0.76, "learning_rate": 5.243124829281617e-06, "loss": 0.2922, "step": 2775 }, { "epoch": 0.76, "learning_rate": 5.234684512428299e-06, "loss": 0.2675, "step": 2800 }, { "epoch": 0.77, "learning_rate": 5.22624419557498e-06, "loss": 0.2953, "step": 2825 }, { "epoch": 0.78, "learning_rate": 5.21780387872166e-06, "loss": 0.2293, "step": 2850 }, { "epoch": 0.79, "learning_rate": 5.2093635618683425e-06, "loss": 0.2422, "step": 2875 }, { "epoch": 0.79, "learning_rate": 5.200923245015024e-06, "loss": 0.292, "step": 2900 }, { "epoch": 0.8, "learning_rate": 5.192482928161705e-06, "loss": 0.2779, "step": 2925 }, { "epoch": 0.81, "learning_rate": 5.1840426113083856e-06, "loss": 0.3213, "step": 2950 }, { "epoch": 0.81, "learning_rate": 5.175602294455067e-06, "loss": 0.2653, "step": 2975 }, { "epoch": 0.82, "learning_rate": 5.167161977601748e-06, "loss": 0.2165, "step": 3000 }, { "epoch": 0.83, "learning_rate": 5.1587216607484295e-06, "loss": 0.2913, "step": 3025 }, { "epoch": 0.83, "learning_rate": 5.150281343895111e-06, "loss": 0.2627, "step": 3050 }, { "epoch": 0.84, "learning_rate": 5.141841027041792e-06, "loss": 0.2562, "step": 3075 }, { "epoch": 0.85, "learning_rate": 5.133400710188473e-06, "loss": 0.2611, "step": 3100 }, { "epoch": 0.85, "learning_rate": 5.124960393335154e-06, "loss": 0.2752, "step": 3125 }, { "epoch": 0.86, "learning_rate": 5.116520076481835e-06, "loss": 0.3039, "step": 3150 }, { "epoch": 0.87, "learning_rate": 5.108079759628517e-06, "loss": 0.2758, "step": 3175 }, { "epoch": 0.87, "learning_rate": 5.099639442775199e-06, "loss": 0.2495, "step": 3200 }, { "epoch": 0.88, "learning_rate": 5.091199125921879e-06, "loss": 0.2435, "step": 3225 }, { "epoch": 0.89, "learning_rate": 5.08275880906856e-06, "loss": 0.2637, "step": 3250 }, { "epoch": 0.89, "learning_rate": 5.074318492215242e-06, "loss": 0.3487, "step": 3275 }, { "epoch": 0.9, "learning_rate": 5.065878175361923e-06, "loss": 0.2781, "step": 3300 }, { "epoch": 0.91, "learning_rate": 5.057437858508604e-06, "loss": 0.2242, "step": 3325 }, { "epoch": 0.92, "learning_rate": 5.048997541655286e-06, "loss": 0.2589, "step": 3350 }, { "epoch": 0.92, "learning_rate": 5.040557224801967e-06, "loss": 0.2365, "step": 3375 }, { "epoch": 0.93, "learning_rate": 5.032116907948648e-06, "loss": 0.2659, "step": 3400 }, { "epoch": 0.94, "learning_rate": 5.023676591095329e-06, "loss": 0.2468, "step": 3425 }, { "epoch": 0.94, "learning_rate": 5.01523627424201e-06, "loss": 0.2516, "step": 3450 }, { "epoch": 0.95, "learning_rate": 5.006795957388692e-06, "loss": 0.2809, "step": 3475 }, { "epoch": 0.96, "learning_rate": 4.998355640535373e-06, "loss": 0.3185, "step": 3500 }, { "epoch": 0.96, "learning_rate": 4.989915323682054e-06, "loss": 0.2825, "step": 3525 }, { "epoch": 0.97, "learning_rate": 4.981475006828735e-06, "loss": 0.2818, "step": 3550 }, { "epoch": 0.98, "learning_rate": 4.9730346899754165e-06, "loss": 0.2833, "step": 3575 }, { "epoch": 0.98, "learning_rate": 4.964594373122098e-06, "loss": 0.2716, "step": 3600 }, { "epoch": 0.99, "learning_rate": 4.956154056268779e-06, "loss": 0.3022, "step": 3625 }, { "epoch": 1.0, "learning_rate": 4.94771373941546e-06, "loss": 0.2745, "step": 3650 }, { "epoch": 1.0, "eval_loss": 0.27263349294662476, "eval_runtime": 5336.6361, "eval_samples_per_second": 1.372, "eval_steps_per_second": 0.172, "eval_wer": 57.088590421923755, "step": 3661 }, { "epoch": 1.0, "learning_rate": 4.939273422562142e-06, "loss": 0.1763, "step": 3675 }, { "epoch": 1.01, "learning_rate": 4.930833105708823e-06, "loss": 0.137, "step": 3700 }, { "epoch": 1.02, "learning_rate": 4.9223927888555035e-06, "loss": 0.1398, "step": 3725 }, { "epoch": 1.02, "learning_rate": 4.913952472002185e-06, "loss": 0.1329, "step": 3750 }, { "epoch": 1.03, "learning_rate": 4.905512155148867e-06, "loss": 0.1263, "step": 3775 }, { "epoch": 1.04, "learning_rate": 4.897071838295548e-06, "loss": 0.1056, "step": 3800 }, { "epoch": 1.04, "learning_rate": 4.888631521442229e-06, "loss": 0.1095, "step": 3825 }, { "epoch": 1.05, "learning_rate": 4.88019120458891e-06, "loss": 0.1457, "step": 3850 }, { "epoch": 1.06, "learning_rate": 4.871750887735591e-06, "loss": 0.1464, "step": 3875 }, { "epoch": 1.07, "learning_rate": 4.863310570882273e-06, "loss": 0.1194, "step": 3900 }, { "epoch": 1.07, "learning_rate": 4.854870254028954e-06, "loss": 0.1312, "step": 3925 }, { "epoch": 1.08, "learning_rate": 4.846429937175635e-06, "loss": 0.1279, "step": 3950 }, { "epoch": 1.09, "learning_rate": 4.8379896203223165e-06, "loss": 0.1328, "step": 3975 }, { "epoch": 1.09, "learning_rate": 4.829549303468998e-06, "loss": 0.1252, "step": 4000 }, { "epoch": 1.1, "learning_rate": 4.821108986615678e-06, "loss": 0.1125, "step": 4025 }, { "epoch": 1.11, "learning_rate": 4.81266866976236e-06, "loss": 0.1354, "step": 4050 }, { "epoch": 1.11, "learning_rate": 4.804228352909042e-06, "loss": 0.1301, "step": 4075 }, { "epoch": 1.12, "learning_rate": 4.795788036055723e-06, "loss": 0.116, "step": 4100 }, { "epoch": 1.13, "learning_rate": 4.7873477192024035e-06, "loss": 0.1535, "step": 4125 }, { "epoch": 1.13, "learning_rate": 4.778907402349085e-06, "loss": 0.1318, "step": 4150 }, { "epoch": 1.14, "learning_rate": 4.770467085495766e-06, "loss": 0.1302, "step": 4175 }, { "epoch": 1.15, "learning_rate": 4.762026768642447e-06, "loss": 0.1093, "step": 4200 }, { "epoch": 1.15, "learning_rate": 4.753586451789129e-06, "loss": 0.1315, "step": 4225 }, { "epoch": 1.16, "learning_rate": 4.74514613493581e-06, "loss": 0.1154, "step": 4250 }, { "epoch": 1.17, "learning_rate": 4.736705818082491e-06, "loss": 0.1433, "step": 4275 }, { "epoch": 1.17, "learning_rate": 4.728265501229173e-06, "loss": 0.1177, "step": 4300 }, { "epoch": 1.18, "learning_rate": 4.719825184375853e-06, "loss": 0.1203, "step": 4325 }, { "epoch": 1.19, "learning_rate": 4.711384867522535e-06, "loss": 0.1563, "step": 4350 }, { "epoch": 1.2, "learning_rate": 4.7029445506692165e-06, "loss": 0.1426, "step": 4375 }, { "epoch": 1.2, "learning_rate": 4.694504233815898e-06, "loss": 0.1487, "step": 4400 }, { "epoch": 1.21, "learning_rate": 4.686063916962578e-06, "loss": 0.1437, "step": 4425 }, { "epoch": 1.22, "learning_rate": 4.67762360010926e-06, "loss": 0.1164, "step": 4450 }, { "epoch": 1.22, "learning_rate": 4.669183283255941e-06, "loss": 0.1192, "step": 4475 }, { "epoch": 1.23, "learning_rate": 4.660742966402622e-06, "loss": 0.1271, "step": 4500 }, { "epoch": 1.24, "learning_rate": 4.6523026495493035e-06, "loss": 0.1229, "step": 4525 }, { "epoch": 1.24, "learning_rate": 4.643862332695985e-06, "loss": 0.1305, "step": 4550 }, { "epoch": 1.25, "learning_rate": 4.635422015842666e-06, "loss": 0.1124, "step": 4575 }, { "epoch": 1.26, "learning_rate": 4.626981698989347e-06, "loss": 0.1316, "step": 4600 }, { "epoch": 1.26, "learning_rate": 4.618541382136028e-06, "loss": 0.1331, "step": 4625 }, { "epoch": 1.27, "learning_rate": 4.61010106528271e-06, "loss": 0.1217, "step": 4650 }, { "epoch": 1.28, "learning_rate": 4.601660748429391e-06, "loss": 0.1217, "step": 4675 }, { "epoch": 1.28, "learning_rate": 4.593220431576073e-06, "loss": 0.1398, "step": 4700 }, { "epoch": 1.29, "learning_rate": 4.584780114722753e-06, "loss": 0.1288, "step": 4725 }, { "epoch": 1.3, "learning_rate": 4.576339797869434e-06, "loss": 0.1283, "step": 4750 }, { "epoch": 1.3, "learning_rate": 4.567899481016116e-06, "loss": 0.1065, "step": 4775 }, { "epoch": 1.31, "learning_rate": 4.559459164162797e-06, "loss": 0.1309, "step": 4800 }, { "epoch": 1.32, "learning_rate": 4.551018847309478e-06, "loss": 0.1238, "step": 4825 }, { "epoch": 1.32, "learning_rate": 4.54257853045616e-06, "loss": 0.1488, "step": 4850 }, { "epoch": 1.33, "learning_rate": 4.534138213602841e-06, "loss": 0.1172, "step": 4875 }, { "epoch": 1.34, "learning_rate": 4.525697896749522e-06, "loss": 0.1319, "step": 4900 }, { "epoch": 1.35, "learning_rate": 4.517257579896203e-06, "loss": 0.1304, "step": 4925 }, { "epoch": 1.35, "learning_rate": 4.508817263042885e-06, "loss": 0.1398, "step": 4950 }, { "epoch": 1.36, "learning_rate": 4.500376946189566e-06, "loss": 0.1367, "step": 4975 }, { "epoch": 1.37, "learning_rate": 4.4919366293362474e-06, "loss": 0.1398, "step": 5000 }, { "epoch": 1.37, "learning_rate": 4.483496312482928e-06, "loss": 0.1251, "step": 5025 }, { "epoch": 1.38, "learning_rate": 4.475055995629609e-06, "loss": 0.1568, "step": 5050 }, { "epoch": 1.39, "learning_rate": 4.4666156787762905e-06, "loss": 0.1268, "step": 5075 }, { "epoch": 1.39, "learning_rate": 4.458175361922973e-06, "loss": 0.1233, "step": 5100 }, { "epoch": 1.4, "learning_rate": 4.449735045069653e-06, "loss": 0.1307, "step": 5125 }, { "epoch": 1.41, "learning_rate": 4.441294728216334e-06, "loss": 0.13, "step": 5150 }, { "epoch": 1.41, "learning_rate": 4.432854411363016e-06, "loss": 0.1542, "step": 5175 }, { "epoch": 1.42, "learning_rate": 4.424414094509697e-06, "loss": 0.1318, "step": 5200 }, { "epoch": 1.43, "learning_rate": 4.415973777656378e-06, "loss": 0.1492, "step": 5225 }, { "epoch": 1.43, "learning_rate": 4.40753346080306e-06, "loss": 0.1296, "step": 5250 }, { "epoch": 1.44, "learning_rate": 4.399093143949741e-06, "loss": 0.1341, "step": 5275 }, { "epoch": 1.45, "learning_rate": 4.390652827096422e-06, "loss": 0.1146, "step": 5300 }, { "epoch": 1.45, "learning_rate": 4.382212510243103e-06, "loss": 0.1466, "step": 5325 }, { "epoch": 1.46, "learning_rate": 4.373772193389784e-06, "loss": 0.1182, "step": 5350 }, { "epoch": 1.47, "learning_rate": 4.365331876536465e-06, "loss": 0.1388, "step": 5375 }, { "epoch": 1.48, "learning_rate": 4.3568915596831475e-06, "loss": 0.1346, "step": 5400 }, { "epoch": 1.48, "learning_rate": 4.348451242829828e-06, "loss": 0.1169, "step": 5425 }, { "epoch": 1.49, "learning_rate": 4.340010925976509e-06, "loss": 0.1112, "step": 5450 }, { "epoch": 1.5, "learning_rate": 4.3315706091231905e-06, "loss": 0.1454, "step": 5475 }, { "epoch": 1.5, "learning_rate": 4.323130292269872e-06, "loss": 0.1231, "step": 5500 }, { "epoch": 1.51, "learning_rate": 4.314689975416553e-06, "loss": 0.1218, "step": 5525 }, { "epoch": 1.52, "learning_rate": 4.3062496585632344e-06, "loss": 0.1412, "step": 5550 }, { "epoch": 1.52, "learning_rate": 4.297809341709916e-06, "loss": 0.1393, "step": 5575 }, { "epoch": 1.53, "learning_rate": 4.289369024856597e-06, "loss": 0.126, "step": 5600 }, { "epoch": 1.54, "learning_rate": 4.2809287080032775e-06, "loss": 0.1399, "step": 5625 }, { "epoch": 1.54, "learning_rate": 4.272488391149959e-06, "loss": 0.1509, "step": 5650 }, { "epoch": 1.55, "learning_rate": 4.26404807429664e-06, "loss": 0.1508, "step": 5675 }, { "epoch": 1.56, "learning_rate": 4.255607757443322e-06, "loss": 0.1013, "step": 5700 }, { "epoch": 1.56, "learning_rate": 4.247167440590003e-06, "loss": 0.1416, "step": 5725 }, { "epoch": 1.57, "learning_rate": 4.238727123736684e-06, "loss": 0.1611, "step": 5750 }, { "epoch": 1.58, "learning_rate": 4.230286806883365e-06, "loss": 0.1448, "step": 5775 }, { "epoch": 1.58, "learning_rate": 4.221846490030047e-06, "loss": 0.1402, "step": 5800 }, { "epoch": 1.59, "learning_rate": 4.213406173176728e-06, "loss": 0.1075, "step": 5825 }, { "epoch": 1.6, "learning_rate": 4.204965856323409e-06, "loss": 0.1546, "step": 5850 }, { "epoch": 1.6, "learning_rate": 4.1965255394700905e-06, "loss": 0.1454, "step": 5875 }, { "epoch": 1.61, "learning_rate": 4.188085222616772e-06, "loss": 0.124, "step": 5900 }, { "epoch": 1.62, "learning_rate": 4.179644905763452e-06, "loss": 0.1219, "step": 5925 }, { "epoch": 1.63, "learning_rate": 4.171204588910134e-06, "loss": 0.1218, "step": 5950 }, { "epoch": 1.63, "learning_rate": 4.162764272056815e-06, "loss": 0.1452, "step": 5975 }, { "epoch": 1.64, "learning_rate": 4.154323955203496e-06, "loss": 0.145, "step": 6000 }, { "epoch": 1.65, "learning_rate": 4.1458836383501775e-06, "loss": 0.1274, "step": 6025 }, { "epoch": 1.65, "learning_rate": 4.137443321496859e-06, "loss": 0.1271, "step": 6050 }, { "epoch": 1.66, "learning_rate": 4.12900300464354e-06, "loss": 0.1401, "step": 6075 }, { "epoch": 1.67, "learning_rate": 4.120562687790221e-06, "loss": 0.1151, "step": 6100 }, { "epoch": 1.67, "learning_rate": 4.112122370936903e-06, "loss": 0.125, "step": 6125 }, { "epoch": 1.68, "learning_rate": 4.103682054083584e-06, "loss": 0.1576, "step": 6150 }, { "epoch": 1.69, "learning_rate": 4.095241737230265e-06, "loss": 0.1314, "step": 6175 }, { "epoch": 1.69, "learning_rate": 4.086801420376946e-06, "loss": 0.1287, "step": 6200 }, { "epoch": 1.7, "learning_rate": 4.078361103523627e-06, "loss": 0.1163, "step": 6225 }, { "epoch": 1.71, "learning_rate": 4.069920786670308e-06, "loss": 0.1209, "step": 6250 }, { "epoch": 1.71, "learning_rate": 4.0614804698169906e-06, "loss": 0.1271, "step": 6275 }, { "epoch": 1.72, "learning_rate": 4.053040152963671e-06, "loss": 0.1339, "step": 6300 }, { "epoch": 1.73, "learning_rate": 4.044599836110352e-06, "loss": 0.1283, "step": 6325 }, { "epoch": 1.73, "learning_rate": 4.036159519257034e-06, "loss": 0.1269, "step": 6350 }, { "epoch": 1.74, "learning_rate": 4.027719202403715e-06, "loss": 0.1416, "step": 6375 }, { "epoch": 1.75, "learning_rate": 4.019278885550396e-06, "loss": 0.1126, "step": 6400 }, { "epoch": 1.75, "learning_rate": 4.0108385686970776e-06, "loss": 0.1204, "step": 6425 }, { "epoch": 1.76, "learning_rate": 4.002398251843759e-06, "loss": 0.1189, "step": 6450 }, { "epoch": 1.77, "learning_rate": 3.99395793499044e-06, "loss": 0.1479, "step": 6475 }, { "epoch": 1.78, "learning_rate": 3.985517618137121e-06, "loss": 0.1274, "step": 6500 }, { "epoch": 1.78, "learning_rate": 3.977077301283802e-06, "loss": 0.1273, "step": 6525 }, { "epoch": 1.79, "learning_rate": 3.968636984430483e-06, "loss": 0.1071, "step": 6550 }, { "epoch": 1.8, "learning_rate": 3.960196667577165e-06, "loss": 0.1358, "step": 6575 }, { "epoch": 1.8, "learning_rate": 3.951756350723846e-06, "loss": 0.1314, "step": 6600 }, { "epoch": 1.81, "learning_rate": 3.943316033870527e-06, "loss": 0.1223, "step": 6625 }, { "epoch": 1.82, "learning_rate": 3.9348757170172084e-06, "loss": 0.1145, "step": 6650 }, { "epoch": 1.82, "learning_rate": 3.92643540016389e-06, "loss": 0.1331, "step": 6675 }, { "epoch": 1.83, "learning_rate": 3.917995083310571e-06, "loss": 0.1198, "step": 6700 }, { "epoch": 1.84, "learning_rate": 3.909554766457252e-06, "loss": 0.1517, "step": 6725 }, { "epoch": 1.84, "learning_rate": 3.901114449603934e-06, "loss": 0.1477, "step": 6750 }, { "epoch": 1.85, "learning_rate": 3.892674132750615e-06, "loss": 0.1187, "step": 6775 }, { "epoch": 1.86, "learning_rate": 3.884233815897295e-06, "loss": 0.1293, "step": 6800 }, { "epoch": 1.86, "learning_rate": 3.875793499043977e-06, "loss": 0.1228, "step": 6825 }, { "epoch": 1.87, "learning_rate": 3.867353182190658e-06, "loss": 0.1554, "step": 6850 }, { "epoch": 1.88, "learning_rate": 3.85891286533734e-06, "loss": 0.1236, "step": 6875 }, { "epoch": 1.88, "learning_rate": 3.850472548484021e-06, "loss": 0.1146, "step": 6900 }, { "epoch": 1.89, "learning_rate": 3.842032231630702e-06, "loss": 0.1364, "step": 6925 }, { "epoch": 1.9, "learning_rate": 3.833591914777383e-06, "loss": 0.1254, "step": 6950 }, { "epoch": 1.91, "learning_rate": 3.8251515979240646e-06, "loss": 0.1209, "step": 6975 }, { "epoch": 1.91, "learning_rate": 3.816711281070746e-06, "loss": 0.1159, "step": 7000 }, { "epoch": 1.92, "learning_rate": 3.8082709642174267e-06, "loss": 0.1266, "step": 7025 }, { "epoch": 1.93, "learning_rate": 3.7998306473641085e-06, "loss": 0.1497, "step": 7050 }, { "epoch": 1.93, "learning_rate": 3.7913903305107898e-06, "loss": 0.1159, "step": 7075 }, { "epoch": 1.94, "learning_rate": 3.7829500136574702e-06, "loss": 0.1423, "step": 7100 }, { "epoch": 1.95, "learning_rate": 3.774509696804152e-06, "loss": 0.1307, "step": 7125 }, { "epoch": 1.95, "learning_rate": 3.7660693799508333e-06, "loss": 0.1195, "step": 7150 }, { "epoch": 1.96, "learning_rate": 3.7576290630975146e-06, "loss": 0.1396, "step": 7175 }, { "epoch": 1.97, "learning_rate": 3.7491887462441954e-06, "loss": 0.135, "step": 7200 }, { "epoch": 1.97, "learning_rate": 3.7407484293908767e-06, "loss": 0.141, "step": 7225 }, { "epoch": 1.98, "learning_rate": 3.732308112537558e-06, "loss": 0.1353, "step": 7250 }, { "epoch": 1.99, "learning_rate": 3.7238677956842398e-06, "loss": 0.149, "step": 7275 }, { "epoch": 1.99, "learning_rate": 3.7154274788309202e-06, "loss": 0.1259, "step": 7300 }, { "epoch": 2.0, "eval_loss": 0.27949637174606323, "eval_runtime": 5445.6948, "eval_samples_per_second": 1.345, "eval_steps_per_second": 0.168, "eval_wer": 56.830823497490165, "step": 7322 } ], "logging_steps": 25, "max_steps": 18305, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 1.243626193723392e+20, "train_batch_size": 8, "trial_name": null, "trial_params": null }