{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.99809427010545, "eval_steps": 500, "global_step": 9835, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 4.999987245581571e-05, "loss": 1.7374, "step": 10 }, { "epoch": 0.01, "learning_rate": 4.9999489824564244e-05, "loss": 1.5943, "step": 20 }, { "epoch": 0.02, "learning_rate": 4.9998852110149786e-05, "loss": 1.4529, "step": 30 }, { "epoch": 0.02, "learning_rate": 4.999795931907928e-05, "loss": 1.4299, "step": 40 }, { "epoch": 0.03, "learning_rate": 4.999681146046236e-05, "loss": 1.2767, "step": 50 }, { "epoch": 0.03, "learning_rate": 4.9995408546011235e-05, "loss": 1.2893, "step": 60 }, { "epoch": 0.04, "learning_rate": 4.9993750590040575e-05, "loss": 1.297, "step": 70 }, { "epoch": 0.04, "learning_rate": 4.9991837609467425e-05, "loss": 1.1932, "step": 80 }, { "epoch": 0.05, "learning_rate": 4.998966962381092e-05, "loss": 1.1411, "step": 90 }, { "epoch": 0.05, "learning_rate": 4.998724665519219e-05, "loss": 1.1558, "step": 100 }, { "epoch": 0.06, "learning_rate": 4.9984568728334075e-05, "loss": 1.1515, "step": 110 }, { "epoch": 0.06, "learning_rate": 4.998163587056089e-05, "loss": 1.0365, "step": 120 }, { "epoch": 0.07, "learning_rate": 4.997844811179817e-05, "loss": 1.0264, "step": 130 }, { "epoch": 0.07, "learning_rate": 4.9975005484572305e-05, "loss": 1.0283, "step": 140 }, { "epoch": 0.08, "learning_rate": 4.997130802401027e-05, "loss": 1.0356, "step": 150 }, { "epoch": 0.08, "learning_rate": 4.9967355767839225e-05, "loss": 1.0245, "step": 160 }, { "epoch": 0.09, "learning_rate": 4.996314875638616e-05, "loss": 1.0557, "step": 170 }, { "epoch": 0.09, "learning_rate": 4.995868703257745e-05, "loss": 1.0417, "step": 180 }, { "epoch": 0.1, "learning_rate": 4.995397064193846e-05, "loss": 1.0008, "step": 190 }, { "epoch": 0.1, "learning_rate": 4.9948999632593055e-05, "loss": 1.035, "step": 200 }, { "epoch": 0.11, "learning_rate": 4.994377405526308e-05, "loss": 1.0543, "step": 210 }, { "epoch": 0.11, "learning_rate": 4.9938293963267914e-05, "loss": 1.0384, "step": 220 }, { "epoch": 0.12, "learning_rate": 4.993255941252385e-05, "loss": 1.0359, "step": 230 }, { "epoch": 0.12, "learning_rate": 4.9926570461543586e-05, "loss": 1.0484, "step": 240 }, { "epoch": 0.13, "learning_rate": 4.992032717143559e-05, "loss": 0.9157, "step": 250 }, { "epoch": 0.13, "learning_rate": 4.9913829605903486e-05, "loss": 1.0029, "step": 260 }, { "epoch": 0.14, "learning_rate": 4.990707783124541e-05, "loss": 1.0332, "step": 270 }, { "epoch": 0.14, "learning_rate": 4.990007191635334e-05, "loss": 0.9525, "step": 280 }, { "epoch": 0.15, "learning_rate": 4.989281193271236e-05, "loss": 0.9969, "step": 290 }, { "epoch": 0.15, "learning_rate": 4.9885297954399964e-05, "loss": 0.9313, "step": 300 }, { "epoch": 0.16, "learning_rate": 4.98775300580853e-05, "loss": 0.9715, "step": 310 }, { "epoch": 0.16, "learning_rate": 4.986950832302836e-05, "loss": 0.9459, "step": 320 }, { "epoch": 0.17, "learning_rate": 4.9861232831079194e-05, "loss": 0.9614, "step": 330 }, { "epoch": 0.17, "learning_rate": 4.985270366667708e-05, "loss": 0.9995, "step": 340 }, { "epoch": 0.18, "learning_rate": 4.9843920916849645e-05, "loss": 0.9779, "step": 350 }, { "epoch": 0.18, "learning_rate": 4.9834884671211976e-05, "loss": 0.926, "step": 360 }, { "epoch": 0.19, "learning_rate": 4.982559502196572e-05, "loss": 1.0427, "step": 370 }, { "epoch": 0.19, "learning_rate": 4.981605206389814e-05, "loss": 0.9495, "step": 380 }, { "epoch": 0.2, "learning_rate": 4.9806255894381135e-05, "loss": 0.9446, "step": 390 }, { "epoch": 0.2, "learning_rate": 4.979620661337026e-05, "loss": 0.9732, "step": 400 }, { "epoch": 0.21, "learning_rate": 4.978590432340371e-05, "loss": 0.8346, "step": 410 }, { "epoch": 0.21, "learning_rate": 4.9775349129601243e-05, "loss": 0.9642, "step": 420 }, { "epoch": 0.22, "learning_rate": 4.9764541139663176e-05, "loss": 0.8718, "step": 430 }, { "epoch": 0.22, "learning_rate": 4.975348046386917e-05, "loss": 0.9406, "step": 440 }, { "epoch": 0.23, "learning_rate": 4.974216721507725e-05, "loss": 0.8534, "step": 450 }, { "epoch": 0.23, "learning_rate": 4.973060150872253e-05, "loss": 0.9735, "step": 460 }, { "epoch": 0.24, "learning_rate": 4.971878346281609e-05, "loss": 0.9225, "step": 470 }, { "epoch": 0.24, "learning_rate": 4.970671319794378e-05, "loss": 0.8771, "step": 480 }, { "epoch": 0.25, "learning_rate": 4.969439083726496e-05, "loss": 0.9068, "step": 490 }, { "epoch": 0.25, "learning_rate": 4.968181650651127e-05, "loss": 0.9524, "step": 500 }, { "epoch": 0.26, "learning_rate": 4.966899033398533e-05, "loss": 0.8811, "step": 510 }, { "epoch": 0.26, "learning_rate": 4.965591245055944e-05, "loss": 0.9009, "step": 520 }, { "epoch": 0.27, "learning_rate": 4.964258298967423e-05, "loss": 0.8791, "step": 530 }, { "epoch": 0.27, "learning_rate": 4.962900208733734e-05, "loss": 0.9129, "step": 540 }, { "epoch": 0.28, "learning_rate": 4.9615169882121945e-05, "loss": 0.9973, "step": 550 }, { "epoch": 0.28, "learning_rate": 4.960108651516545e-05, "loss": 1.0256, "step": 560 }, { "epoch": 0.29, "learning_rate": 4.958675213016798e-05, "loss": 0.8638, "step": 570 }, { "epoch": 0.29, "learning_rate": 4.9572166873390925e-05, "loss": 0.8928, "step": 580 }, { "epoch": 0.3, "learning_rate": 4.955733089365546e-05, "loss": 0.8579, "step": 590 }, { "epoch": 0.3, "learning_rate": 4.9542244342341026e-05, "loss": 0.9767, "step": 600 }, { "epoch": 0.31, "learning_rate": 4.9526907373383766e-05, "loss": 0.8605, "step": 610 }, { "epoch": 0.32, "learning_rate": 4.951132014327498e-05, "loss": 0.883, "step": 620 }, { "epoch": 0.32, "learning_rate": 4.949548281105951e-05, "loss": 0.9282, "step": 630 }, { "epoch": 0.33, "learning_rate": 4.947939553833412e-05, "loss": 0.8909, "step": 640 }, { "epoch": 0.33, "learning_rate": 4.9463058489245874e-05, "loss": 0.8618, "step": 650 }, { "epoch": 0.34, "learning_rate": 4.9446471830490396e-05, "loss": 0.8674, "step": 660 }, { "epoch": 0.34, "learning_rate": 4.942963573131025e-05, "loss": 0.9487, "step": 670 }, { "epoch": 0.35, "learning_rate": 4.941255036349316e-05, "loss": 0.959, "step": 680 }, { "epoch": 0.35, "learning_rate": 4.9395215901370265e-05, "loss": 0.9217, "step": 690 }, { "epoch": 0.36, "learning_rate": 4.937763252181434e-05, "loss": 0.9214, "step": 700 }, { "epoch": 0.36, "learning_rate": 4.935980040423803e-05, "loss": 0.9023, "step": 710 }, { "epoch": 0.37, "learning_rate": 4.934171973059196e-05, "loss": 0.863, "step": 720 }, { "epoch": 0.37, "learning_rate": 4.9323390685362915e-05, "loss": 0.9249, "step": 730 }, { "epoch": 0.38, "learning_rate": 4.930481345557193e-05, "loss": 0.9269, "step": 740 }, { "epoch": 0.38, "learning_rate": 4.928598823077243e-05, "loss": 0.8938, "step": 750 }, { "epoch": 0.39, "learning_rate": 4.926691520304824e-05, "loss": 0.9187, "step": 760 }, { "epoch": 0.39, "learning_rate": 4.924759456701167e-05, "loss": 0.8909, "step": 770 }, { "epoch": 0.4, "learning_rate": 4.922802651980149e-05, "loss": 0.8665, "step": 780 }, { "epoch": 0.4, "learning_rate": 4.920821126108096e-05, "loss": 0.9484, "step": 790 }, { "epoch": 0.41, "learning_rate": 4.9188148993035754e-05, "loss": 0.9234, "step": 800 }, { "epoch": 0.41, "learning_rate": 4.916783992037193e-05, "loss": 0.9564, "step": 810 }, { "epoch": 0.42, "learning_rate": 4.914728425031379e-05, "loss": 0.8569, "step": 820 }, { "epoch": 0.42, "learning_rate": 4.912648219260188e-05, "loss": 0.972, "step": 830 }, { "epoch": 0.43, "learning_rate": 4.910543395949067e-05, "loss": 0.7543, "step": 840 }, { "epoch": 0.43, "learning_rate": 4.908413976574655e-05, "loss": 0.8996, "step": 850 }, { "epoch": 0.44, "learning_rate": 4.9062599828645574e-05, "loss": 0.7431, "step": 860 }, { "epoch": 0.44, "learning_rate": 4.9040814367971236e-05, "loss": 0.8302, "step": 870 }, { "epoch": 0.45, "learning_rate": 4.901878360601223e-05, "loss": 0.9348, "step": 880 }, { "epoch": 0.45, "learning_rate": 4.899650776756023e-05, "loss": 0.8658, "step": 890 }, { "epoch": 0.46, "learning_rate": 4.897398707990749e-05, "loss": 0.8688, "step": 900 }, { "epoch": 0.46, "learning_rate": 4.895122177284465e-05, "loss": 0.8237, "step": 910 }, { "epoch": 0.47, "learning_rate": 4.8928212078658315e-05, "loss": 0.8714, "step": 920 }, { "epoch": 0.47, "learning_rate": 4.8904958232128687e-05, "loss": 0.8695, "step": 930 }, { "epoch": 0.48, "learning_rate": 4.888146047052721e-05, "loss": 0.8811, "step": 940 }, { "epoch": 0.48, "learning_rate": 4.88577190336141e-05, "loss": 0.954, "step": 950 }, { "epoch": 0.49, "learning_rate": 4.883373416363593e-05, "loss": 0.9335, "step": 960 }, { "epoch": 0.49, "learning_rate": 4.8809506105323164e-05, "loss": 0.864, "step": 970 }, { "epoch": 0.5, "learning_rate": 4.878503510588765e-05, "loss": 0.9355, "step": 980 }, { "epoch": 0.5, "learning_rate": 4.876032141502004e-05, "loss": 0.885, "step": 990 }, { "epoch": 0.51, "learning_rate": 4.8735365284887374e-05, "loss": 0.8106, "step": 1000 }, { "epoch": 0.51, "learning_rate": 4.8710166970130376e-05, "loss": 0.7904, "step": 1010 }, { "epoch": 0.52, "learning_rate": 4.8684726727860944e-05, "loss": 0.8767, "step": 1020 }, { "epoch": 0.52, "learning_rate": 4.865904481765945e-05, "loss": 0.9206, "step": 1030 }, { "epoch": 0.53, "learning_rate": 4.863312150157216e-05, "loss": 0.905, "step": 1040 }, { "epoch": 0.53, "learning_rate": 4.8606957044108556e-05, "loss": 0.8438, "step": 1050 }, { "epoch": 0.54, "learning_rate": 4.858055171223856e-05, "loss": 0.8928, "step": 1060 }, { "epoch": 0.54, "learning_rate": 4.855390577538991e-05, "loss": 0.8943, "step": 1070 }, { "epoch": 0.55, "learning_rate": 4.8527019505445346e-05, "loss": 0.8802, "step": 1080 }, { "epoch": 0.55, "learning_rate": 4.849989317673984e-05, "loss": 0.8525, "step": 1090 }, { "epoch": 0.56, "learning_rate": 4.847252706605786e-05, "loss": 0.8939, "step": 1100 }, { "epoch": 0.56, "learning_rate": 4.844492145263044e-05, "loss": 0.8652, "step": 1110 }, { "epoch": 0.57, "learning_rate": 4.8417076618132426e-05, "loss": 0.8308, "step": 1120 }, { "epoch": 0.57, "learning_rate": 4.838899284667956e-05, "loss": 0.8173, "step": 1130 }, { "epoch": 0.58, "learning_rate": 4.836067042482557e-05, "loss": 0.9149, "step": 1140 }, { "epoch": 0.58, "learning_rate": 4.833210964155928e-05, "loss": 0.7346, "step": 1150 }, { "epoch": 0.59, "learning_rate": 4.8303310788301624e-05, "loss": 1.0617, "step": 1160 }, { "epoch": 0.59, "learning_rate": 4.827427415890271e-05, "loss": 0.8963, "step": 1170 }, { "epoch": 0.6, "learning_rate": 4.82450000496388e-05, "loss": 0.9368, "step": 1180 }, { "epoch": 0.6, "learning_rate": 4.821548875920927e-05, "loss": 0.9157, "step": 1190 }, { "epoch": 0.61, "learning_rate": 4.818574058873361e-05, "loss": 0.7684, "step": 1200 }, { "epoch": 0.61, "learning_rate": 4.8155755841748296e-05, "loss": 0.8846, "step": 1210 }, { "epoch": 0.62, "learning_rate": 4.8125534824203754e-05, "loss": 0.9657, "step": 1220 }, { "epoch": 0.63, "learning_rate": 4.8095077844461176e-05, "loss": 0.6925, "step": 1230 }, { "epoch": 0.63, "learning_rate": 4.8064385213289414e-05, "loss": 0.8198, "step": 1240 }, { "epoch": 0.64, "learning_rate": 4.8033457243861804e-05, "loss": 0.8938, "step": 1250 }, { "epoch": 0.64, "learning_rate": 4.800229425175294e-05, "loss": 0.7988, "step": 1260 }, { "epoch": 0.65, "learning_rate": 4.7970896554935506e-05, "loss": 0.933, "step": 1270 }, { "epoch": 0.65, "learning_rate": 4.7939264473776995e-05, "loss": 0.8274, "step": 1280 }, { "epoch": 0.66, "learning_rate": 4.790739833103644e-05, "loss": 0.946, "step": 1290 }, { "epoch": 0.66, "learning_rate": 4.787529845186114e-05, "loss": 0.8698, "step": 1300 }, { "epoch": 0.67, "learning_rate": 4.784296516378333e-05, "loss": 0.9597, "step": 1310 }, { "epoch": 0.67, "learning_rate": 4.7810398796716825e-05, "loss": 0.7929, "step": 1320 }, { "epoch": 0.68, "learning_rate": 4.777759968295369e-05, "loss": 0.9, "step": 1330 }, { "epoch": 0.68, "learning_rate": 4.774456815716083e-05, "loss": 0.7947, "step": 1340 }, { "epoch": 0.69, "learning_rate": 4.7711304556376555e-05, "loss": 0.948, "step": 1350 }, { "epoch": 0.69, "learning_rate": 4.767780922000718e-05, "loss": 0.8153, "step": 1360 }, { "epoch": 0.7, "learning_rate": 4.7644082489823525e-05, "loss": 0.9613, "step": 1370 }, { "epoch": 0.7, "learning_rate": 4.761012470995746e-05, "loss": 0.861, "step": 1380 }, { "epoch": 0.71, "learning_rate": 4.7575936226898366e-05, "loss": 0.8061, "step": 1390 }, { "epoch": 0.71, "learning_rate": 4.754151738948962e-05, "loss": 0.8082, "step": 1400 }, { "epoch": 0.72, "learning_rate": 4.750686854892503e-05, "loss": 0.8568, "step": 1410 }, { "epoch": 0.72, "learning_rate": 4.747199005874524e-05, "loss": 0.8792, "step": 1420 }, { "epoch": 0.73, "learning_rate": 4.7436882274834135e-05, "loss": 0.9441, "step": 1430 }, { "epoch": 0.73, "learning_rate": 4.7401545555415204e-05, "loss": 0.8254, "step": 1440 }, { "epoch": 0.74, "learning_rate": 4.73659802610479e-05, "loss": 0.9127, "step": 1450 }, { "epoch": 0.74, "learning_rate": 4.733018675462394e-05, "loss": 0.8423, "step": 1460 }, { "epoch": 0.75, "learning_rate": 4.729416540136361e-05, "loss": 0.8832, "step": 1470 }, { "epoch": 0.75, "learning_rate": 4.725791656881203e-05, "loss": 0.8718, "step": 1480 }, { "epoch": 0.76, "learning_rate": 4.722144062683543e-05, "loss": 0.7659, "step": 1490 }, { "epoch": 0.76, "learning_rate": 4.7184737947617354e-05, "loss": 0.793, "step": 1500 }, { "epoch": 0.77, "learning_rate": 4.714780890565485e-05, "loss": 0.8891, "step": 1510 }, { "epoch": 0.77, "learning_rate": 4.71106538777547e-05, "loss": 1.041, "step": 1520 }, { "epoch": 0.78, "learning_rate": 4.707327324302951e-05, "loss": 0.8006, "step": 1530 }, { "epoch": 0.78, "learning_rate": 4.703566738289389e-05, "loss": 0.9304, "step": 1540 }, { "epoch": 0.79, "learning_rate": 4.699783668106054e-05, "loss": 0.9484, "step": 1550 }, { "epoch": 0.79, "learning_rate": 4.695978152353634e-05, "loss": 0.8737, "step": 1560 }, { "epoch": 0.8, "learning_rate": 4.69215022986184e-05, "loss": 0.8265, "step": 1570 }, { "epoch": 0.8, "learning_rate": 4.688299939689015e-05, "loss": 0.9525, "step": 1580 }, { "epoch": 0.81, "learning_rate": 4.684427321121726e-05, "loss": 0.8764, "step": 1590 }, { "epoch": 0.81, "learning_rate": 4.6805324136743714e-05, "loss": 0.9115, "step": 1600 }, { "epoch": 0.82, "learning_rate": 4.676615257088776e-05, "loss": 0.8334, "step": 1610 }, { "epoch": 0.82, "learning_rate": 4.672675891333782e-05, "loss": 0.8515, "step": 1620 }, { "epoch": 0.83, "learning_rate": 4.668714356604845e-05, "loss": 0.9023, "step": 1630 }, { "epoch": 0.83, "learning_rate": 4.664730693323622e-05, "loss": 0.8983, "step": 1640 }, { "epoch": 0.84, "learning_rate": 4.660724942137561e-05, "loss": 0.8063, "step": 1650 }, { "epoch": 0.84, "learning_rate": 4.656697143919482e-05, "loss": 0.91, "step": 1660 }, { "epoch": 0.85, "learning_rate": 4.6526473397671644e-05, "loss": 0.8909, "step": 1670 }, { "epoch": 0.85, "learning_rate": 4.6485755710029256e-05, "loss": 0.8485, "step": 1680 }, { "epoch": 0.86, "learning_rate": 4.644481879173199e-05, "loss": 0.793, "step": 1690 }, { "epoch": 0.86, "learning_rate": 4.640366306048113e-05, "loss": 0.9396, "step": 1700 }, { "epoch": 0.87, "learning_rate": 4.63622889362106e-05, "loss": 0.8538, "step": 1710 }, { "epoch": 0.87, "learning_rate": 4.63206968410827e-05, "loss": 0.8986, "step": 1720 }, { "epoch": 0.88, "learning_rate": 4.627888719948385e-05, "loss": 0.9277, "step": 1730 }, { "epoch": 0.88, "learning_rate": 4.623686043802016e-05, "loss": 0.8013, "step": 1740 }, { "epoch": 0.89, "learning_rate": 4.619461698551315e-05, "loss": 0.8565, "step": 1750 }, { "epoch": 0.89, "learning_rate": 4.6152157272995355e-05, "loss": 0.7764, "step": 1760 }, { "epoch": 0.9, "learning_rate": 4.610948173370594e-05, "loss": 0.8674, "step": 1770 }, { "epoch": 0.9, "learning_rate": 4.606659080308624e-05, "loss": 0.7947, "step": 1780 }, { "epoch": 0.91, "learning_rate": 4.6023484918775364e-05, "loss": 0.8766, "step": 1790 }, { "epoch": 0.91, "learning_rate": 4.598016452060569e-05, "loss": 0.8197, "step": 1800 }, { "epoch": 0.92, "learning_rate": 4.593663005059841e-05, "loss": 0.9353, "step": 1810 }, { "epoch": 0.92, "learning_rate": 4.589288195295901e-05, "loss": 0.8794, "step": 1820 }, { "epoch": 0.93, "learning_rate": 4.584892067407272e-05, "loss": 0.844, "step": 1830 }, { "epoch": 0.94, "learning_rate": 4.580474666249997e-05, "loss": 0.8291, "step": 1840 }, { "epoch": 0.94, "learning_rate": 4.576036036897182e-05, "loss": 0.7954, "step": 1850 }, { "epoch": 0.95, "learning_rate": 4.571576224638536e-05, "loss": 0.9331, "step": 1860 }, { "epoch": 0.95, "learning_rate": 4.56709527497991e-05, "loss": 0.8131, "step": 1870 }, { "epoch": 0.96, "learning_rate": 4.562593233642828e-05, "loss": 0.8134, "step": 1880 }, { "epoch": 0.96, "learning_rate": 4.5580701465640254e-05, "loss": 0.8629, "step": 1890 }, { "epoch": 0.97, "learning_rate": 4.553526059894978e-05, "loss": 0.8637, "step": 1900 }, { "epoch": 0.97, "learning_rate": 4.548961020001432e-05, "loss": 0.8737, "step": 1910 }, { "epoch": 0.98, "learning_rate": 4.544375073462932e-05, "loss": 0.8404, "step": 1920 }, { "epoch": 0.98, "learning_rate": 4.539768267072341e-05, "loss": 0.9458, "step": 1930 }, { "epoch": 0.99, "learning_rate": 4.535140647835369e-05, "loss": 0.8777, "step": 1940 }, { "epoch": 0.99, "learning_rate": 4.5304922629700896e-05, "loss": 0.8794, "step": 1950 }, { "epoch": 1.0, "learning_rate": 4.525823159906459e-05, "loss": 0.8848, "step": 1960 }, { "epoch": 1.0, "learning_rate": 4.521133386285833e-05, "loss": 0.8398, "step": 1970 }, { "epoch": 1.01, "learning_rate": 4.5164229899604796e-05, "loss": 0.8368, "step": 1980 }, { "epoch": 1.01, "learning_rate": 4.51169201899309e-05, "loss": 0.7742, "step": 1990 }, { "epoch": 1.02, "learning_rate": 4.506940521656293e-05, "loss": 0.875, "step": 2000 }, { "epoch": 1.02, "learning_rate": 4.502168546432155e-05, "loss": 0.8689, "step": 2010 }, { "epoch": 1.03, "learning_rate": 4.497376142011693e-05, "loss": 0.8329, "step": 2020 }, { "epoch": 1.03, "learning_rate": 4.492563357294369e-05, "loss": 0.804, "step": 2030 }, { "epoch": 1.04, "learning_rate": 4.487730241387602e-05, "loss": 0.7985, "step": 2040 }, { "epoch": 1.04, "learning_rate": 4.482876843606257e-05, "loss": 0.8019, "step": 2050 }, { "epoch": 1.05, "learning_rate": 4.478003213472146e-05, "loss": 0.7925, "step": 2060 }, { "epoch": 1.05, "learning_rate": 4.473109400713525e-05, "loss": 0.8566, "step": 2070 }, { "epoch": 1.06, "learning_rate": 4.468195455264581e-05, "loss": 0.7679, "step": 2080 }, { "epoch": 1.06, "learning_rate": 4.463261427264928e-05, "loss": 0.7556, "step": 2090 }, { "epoch": 1.07, "learning_rate": 4.458307367059092e-05, "loss": 0.7138, "step": 2100 }, { "epoch": 1.07, "learning_rate": 4.4533333251959985e-05, "loss": 0.893, "step": 2110 }, { "epoch": 1.08, "learning_rate": 4.448339352428456e-05, "loss": 0.9398, "step": 2120 }, { "epoch": 1.08, "learning_rate": 4.4433254997126394e-05, "loss": 0.9158, "step": 2130 }, { "epoch": 1.09, "learning_rate": 4.438291818207569e-05, "loss": 0.8109, "step": 2140 }, { "epoch": 1.09, "learning_rate": 4.4332383592745894e-05, "loss": 0.9455, "step": 2150 }, { "epoch": 1.1, "learning_rate": 4.4281651744768436e-05, "loss": 0.8938, "step": 2160 }, { "epoch": 1.1, "learning_rate": 4.42307231557875e-05, "loss": 0.7618, "step": 2170 }, { "epoch": 1.11, "learning_rate": 4.4179598345454704e-05, "loss": 0.8194, "step": 2180 }, { "epoch": 1.11, "learning_rate": 4.4128277835423825e-05, "loss": 0.8848, "step": 2190 }, { "epoch": 1.12, "learning_rate": 4.407676214934548e-05, "loss": 0.7657, "step": 2200 }, { "epoch": 1.12, "learning_rate": 4.402505181286175e-05, "loss": 0.8897, "step": 2210 }, { "epoch": 1.13, "learning_rate": 4.3973147353600866e-05, "loss": 0.8431, "step": 2220 }, { "epoch": 1.13, "learning_rate": 4.392104930117177e-05, "loss": 0.8565, "step": 2230 }, { "epoch": 1.14, "learning_rate": 4.386875818715874e-05, "loss": 0.8426, "step": 2240 }, { "epoch": 1.14, "learning_rate": 4.3816274545116e-05, "loss": 0.8096, "step": 2250 }, { "epoch": 1.15, "learning_rate": 4.37635989105622e-05, "loss": 0.8651, "step": 2260 }, { "epoch": 1.15, "learning_rate": 4.3710731820975e-05, "loss": 0.8658, "step": 2270 }, { "epoch": 1.16, "learning_rate": 4.36576738157856e-05, "loss": 0.8642, "step": 2280 }, { "epoch": 1.16, "learning_rate": 4.3604425436373194e-05, "loss": 0.8334, "step": 2290 }, { "epoch": 1.17, "learning_rate": 4.355098722605946e-05, "loss": 0.929, "step": 2300 }, { "epoch": 1.17, "learning_rate": 4.349735973010305e-05, "loss": 0.8318, "step": 2310 }, { "epoch": 1.18, "learning_rate": 4.344354349569398e-05, "loss": 0.7576, "step": 2320 }, { "epoch": 1.18, "learning_rate": 4.3389539071948065e-05, "loss": 0.7787, "step": 2330 }, { "epoch": 1.19, "learning_rate": 4.3335347009901314e-05, "loss": 0.7632, "step": 2340 }, { "epoch": 1.19, "learning_rate": 4.328096786250432e-05, "loss": 0.7624, "step": 2350 }, { "epoch": 1.2, "learning_rate": 4.32264021846166e-05, "loss": 0.8955, "step": 2360 }, { "epoch": 1.2, "learning_rate": 4.317165053300095e-05, "loss": 0.9177, "step": 2370 }, { "epoch": 1.21, "learning_rate": 4.311671346631774e-05, "loss": 0.9347, "step": 2380 }, { "epoch": 1.21, "learning_rate": 4.306159154511925e-05, "loss": 0.8386, "step": 2390 }, { "epoch": 1.22, "learning_rate": 4.300628533184391e-05, "loss": 0.7803, "step": 2400 }, { "epoch": 1.22, "learning_rate": 4.295079539081058e-05, "loss": 0.8833, "step": 2410 }, { "epoch": 1.23, "learning_rate": 4.289512228821282e-05, "loss": 0.896, "step": 2420 }, { "epoch": 1.23, "learning_rate": 4.283926659211306e-05, "loss": 0.8046, "step": 2430 }, { "epoch": 1.24, "learning_rate": 4.278322887243683e-05, "loss": 0.8341, "step": 2440 }, { "epoch": 1.25, "learning_rate": 4.272700970096696e-05, "loss": 0.8365, "step": 2450 }, { "epoch": 1.25, "learning_rate": 4.26706096513377e-05, "loss": 0.8236, "step": 2460 }, { "epoch": 1.26, "learning_rate": 4.2614029299028944e-05, "loss": 0.9149, "step": 2470 }, { "epoch": 1.26, "learning_rate": 4.2557269221360265e-05, "loss": 0.8772, "step": 2480 }, { "epoch": 1.27, "learning_rate": 4.250032999748508e-05, "loss": 0.8428, "step": 2490 }, { "epoch": 1.27, "learning_rate": 4.2443212208384755e-05, "loss": 0.7791, "step": 2500 }, { "epoch": 1.28, "learning_rate": 4.238591643686263e-05, "loss": 0.894, "step": 2510 }, { "epoch": 1.28, "learning_rate": 4.23284432675381e-05, "loss": 0.8412, "step": 2520 }, { "epoch": 1.29, "learning_rate": 4.2270793286840636e-05, "loss": 0.7827, "step": 2530 }, { "epoch": 1.29, "learning_rate": 4.2212967083003835e-05, "loss": 0.8868, "step": 2540 }, { "epoch": 1.3, "learning_rate": 4.215496524605936e-05, "loss": 0.8999, "step": 2550 }, { "epoch": 1.3, "learning_rate": 4.209678836783098e-05, "loss": 0.8319, "step": 2560 }, { "epoch": 1.31, "learning_rate": 4.2038437041928505e-05, "loss": 0.8147, "step": 2570 }, { "epoch": 1.31, "learning_rate": 4.1979911863741686e-05, "loss": 0.8202, "step": 2580 }, { "epoch": 1.32, "learning_rate": 4.192121343043424e-05, "loss": 0.8346, "step": 2590 }, { "epoch": 1.32, "learning_rate": 4.1862342340937655e-05, "loss": 0.8413, "step": 2600 }, { "epoch": 1.33, "learning_rate": 4.1803299195945145e-05, "loss": 0.8867, "step": 2610 }, { "epoch": 1.33, "learning_rate": 4.174408459790549e-05, "loss": 0.789, "step": 2620 }, { "epoch": 1.34, "learning_rate": 4.1684699151016896e-05, "loss": 0.7649, "step": 2630 }, { "epoch": 1.34, "learning_rate": 4.162514346122083e-05, "loss": 0.8685, "step": 2640 }, { "epoch": 1.35, "learning_rate": 4.156541813619585e-05, "loss": 0.7793, "step": 2650 }, { "epoch": 1.35, "learning_rate": 4.150552378535137e-05, "loss": 0.871, "step": 2660 }, { "epoch": 1.36, "learning_rate": 4.144546101982151e-05, "loss": 0.8534, "step": 2670 }, { "epoch": 1.36, "learning_rate": 4.1385230452458756e-05, "loss": 0.8658, "step": 2680 }, { "epoch": 1.37, "learning_rate": 4.132483269782781e-05, "loss": 0.7654, "step": 2690 }, { "epoch": 1.37, "learning_rate": 4.126426837219925e-05, "loss": 0.788, "step": 2700 }, { "epoch": 1.38, "learning_rate": 4.120353809354328e-05, "loss": 0.7899, "step": 2710 }, { "epoch": 1.38, "learning_rate": 4.114264248152342e-05, "loss": 0.8984, "step": 2720 }, { "epoch": 1.39, "learning_rate": 4.108158215749014e-05, "loss": 0.9037, "step": 2730 }, { "epoch": 1.39, "learning_rate": 4.10203577444746e-05, "loss": 0.8417, "step": 2740 }, { "epoch": 1.4, "learning_rate": 4.095896986718221e-05, "loss": 0.8844, "step": 2750 }, { "epoch": 1.4, "learning_rate": 4.089741915198632e-05, "loss": 0.8565, "step": 2760 }, { "epoch": 1.41, "learning_rate": 4.0835706226921776e-05, "loss": 0.8313, "step": 2770 }, { "epoch": 1.41, "learning_rate": 4.077383172167857e-05, "loss": 0.8312, "step": 2780 }, { "epoch": 1.42, "learning_rate": 4.0711796267595355e-05, "loss": 0.8551, "step": 2790 }, { "epoch": 1.42, "learning_rate": 4.064960049765304e-05, "loss": 0.8134, "step": 2800 }, { "epoch": 1.43, "learning_rate": 4.058724504646834e-05, "loss": 0.8246, "step": 2810 }, { "epoch": 1.43, "learning_rate": 4.052473055028726e-05, "loss": 0.7552, "step": 2820 }, { "epoch": 1.44, "learning_rate": 4.046205764697862e-05, "loss": 0.8374, "step": 2830 }, { "epoch": 1.44, "learning_rate": 4.0399226976027583e-05, "loss": 0.8721, "step": 2840 }, { "epoch": 1.45, "learning_rate": 4.0336239178529075e-05, "loss": 0.756, "step": 2850 }, { "epoch": 1.45, "learning_rate": 4.0273094897181285e-05, "loss": 0.7646, "step": 2860 }, { "epoch": 1.46, "learning_rate": 4.020979477627907e-05, "loss": 0.8254, "step": 2870 }, { "epoch": 1.46, "learning_rate": 4.014633946170742e-05, "loss": 0.843, "step": 2880 }, { "epoch": 1.47, "learning_rate": 4.0082729600934844e-05, "loss": 0.8923, "step": 2890 }, { "epoch": 1.47, "learning_rate": 4.001896584300675e-05, "loss": 0.8476, "step": 2900 }, { "epoch": 1.48, "learning_rate": 3.995504883853888e-05, "loss": 0.8202, "step": 2910 }, { "epoch": 1.48, "learning_rate": 3.98909792397106e-05, "loss": 0.8049, "step": 2920 }, { "epoch": 1.49, "learning_rate": 3.9826757700258284e-05, "loss": 0.7977, "step": 2930 }, { "epoch": 1.49, "learning_rate": 3.976238487546864e-05, "loss": 0.8494, "step": 2940 }, { "epoch": 1.5, "learning_rate": 3.9697861422172034e-05, "loss": 0.871, "step": 2950 }, { "epoch": 1.5, "learning_rate": 3.963318799873575e-05, "loss": 0.9323, "step": 2960 }, { "epoch": 1.51, "learning_rate": 3.956836526505733e-05, "loss": 0.912, "step": 2970 }, { "epoch": 1.51, "learning_rate": 3.9503393882557766e-05, "loss": 0.851, "step": 2980 }, { "epoch": 1.52, "learning_rate": 3.943827451417483e-05, "loss": 0.78, "step": 2990 }, { "epoch": 1.52, "learning_rate": 3.937300782435625e-05, "loss": 0.7798, "step": 3000 }, { "epoch": 1.53, "learning_rate": 3.930759447905298e-05, "loss": 0.8496, "step": 3010 }, { "epoch": 1.53, "learning_rate": 3.9242035145712344e-05, "loss": 0.8427, "step": 3020 }, { "epoch": 1.54, "learning_rate": 3.9176330493271285e-05, "loss": 0.8619, "step": 3030 }, { "epoch": 1.54, "learning_rate": 3.9110481192149504e-05, "loss": 0.7663, "step": 3040 }, { "epoch": 1.55, "learning_rate": 3.9044487914242646e-05, "loss": 0.7478, "step": 3050 }, { "epoch": 1.56, "learning_rate": 3.897835133291539e-05, "loss": 0.8048, "step": 3060 }, { "epoch": 1.56, "learning_rate": 3.891207212299467e-05, "loss": 0.8875, "step": 3070 }, { "epoch": 1.57, "learning_rate": 3.884565096076269e-05, "loss": 0.8649, "step": 3080 }, { "epoch": 1.57, "learning_rate": 3.877908852395008e-05, "loss": 0.8313, "step": 3090 }, { "epoch": 1.58, "learning_rate": 3.8712385491729e-05, "loss": 0.8779, "step": 3100 }, { "epoch": 1.58, "learning_rate": 3.864554254470613e-05, "loss": 0.7845, "step": 3110 }, { "epoch": 1.59, "learning_rate": 3.857856036491582e-05, "loss": 0.8581, "step": 3120 }, { "epoch": 1.59, "learning_rate": 3.851143963581306e-05, "loss": 0.8512, "step": 3130 }, { "epoch": 1.6, "learning_rate": 3.844418104226656e-05, "loss": 0.7689, "step": 3140 }, { "epoch": 1.6, "learning_rate": 3.837678527055168e-05, "loss": 0.8677, "step": 3150 }, { "epoch": 1.61, "learning_rate": 3.830925300834356e-05, "loss": 0.7601, "step": 3160 }, { "epoch": 1.61, "learning_rate": 3.824158494470996e-05, "loss": 0.8637, "step": 3170 }, { "epoch": 1.62, "learning_rate": 3.817378177010431e-05, "loss": 0.7152, "step": 3180 }, { "epoch": 1.62, "learning_rate": 3.8105844176358674e-05, "loss": 0.9339, "step": 3190 }, { "epoch": 1.63, "learning_rate": 3.803777285667665e-05, "loss": 0.8261, "step": 3200 }, { "epoch": 1.63, "learning_rate": 3.7969568505626305e-05, "loss": 0.896, "step": 3210 }, { "epoch": 1.64, "learning_rate": 3.7901231819133105e-05, "loss": 0.9026, "step": 3220 }, { "epoch": 1.64, "learning_rate": 3.783276349447281e-05, "loss": 0.8242, "step": 3230 }, { "epoch": 1.65, "learning_rate": 3.7764164230264357e-05, "loss": 0.8292, "step": 3240 }, { "epoch": 1.65, "learning_rate": 3.7695434726462704e-05, "loss": 0.9249, "step": 3250 }, { "epoch": 1.66, "learning_rate": 3.762657568435174e-05, "loss": 0.9214, "step": 3260 }, { "epoch": 1.66, "learning_rate": 3.7557587806537094e-05, "loss": 0.8414, "step": 3270 }, { "epoch": 1.67, "learning_rate": 3.748847179693897e-05, "loss": 0.7875, "step": 3280 }, { "epoch": 1.67, "learning_rate": 3.741922836078499e-05, "loss": 0.7981, "step": 3290 }, { "epoch": 1.68, "learning_rate": 3.734985820460293e-05, "loss": 0.8205, "step": 3300 }, { "epoch": 1.68, "learning_rate": 3.728036203621361e-05, "loss": 0.8429, "step": 3310 }, { "epoch": 1.69, "learning_rate": 3.72107405647236e-05, "loss": 0.8432, "step": 3320 }, { "epoch": 1.69, "learning_rate": 3.7140994500517995e-05, "loss": 0.86, "step": 3330 }, { "epoch": 1.7, "learning_rate": 3.707112455525318e-05, "loss": 0.7461, "step": 3340 }, { "epoch": 1.7, "learning_rate": 3.7001131441849586e-05, "loss": 0.8739, "step": 3350 }, { "epoch": 1.71, "learning_rate": 3.693101587448436e-05, "loss": 0.8064, "step": 3360 }, { "epoch": 1.71, "learning_rate": 3.6860778568584145e-05, "loss": 0.8171, "step": 3370 }, { "epoch": 1.72, "learning_rate": 3.6790420240817715e-05, "loss": 0.7549, "step": 3380 }, { "epoch": 1.72, "learning_rate": 3.671994160908872e-05, "loss": 0.8102, "step": 3390 }, { "epoch": 1.73, "learning_rate": 3.6649343392528335e-05, "loss": 0.8086, "step": 3400 }, { "epoch": 1.73, "learning_rate": 3.657862631148791e-05, "loss": 0.9243, "step": 3410 }, { "epoch": 1.74, "learning_rate": 3.650779108753163e-05, "loss": 0.8874, "step": 3420 }, { "epoch": 1.74, "learning_rate": 3.6436838443429175e-05, "loss": 0.7962, "step": 3430 }, { "epoch": 1.75, "learning_rate": 3.636576910314831e-05, "loss": 0.7621, "step": 3440 }, { "epoch": 1.75, "learning_rate": 3.6294583791847514e-05, "loss": 0.8126, "step": 3450 }, { "epoch": 1.76, "learning_rate": 3.622328323586859e-05, "loss": 0.8358, "step": 3460 }, { "epoch": 1.76, "learning_rate": 3.615186816272925e-05, "loss": 0.8677, "step": 3470 }, { "epoch": 1.77, "learning_rate": 3.608033930111564e-05, "loss": 0.8286, "step": 3480 }, { "epoch": 1.77, "learning_rate": 3.600869738087501e-05, "loss": 0.8292, "step": 3490 }, { "epoch": 1.78, "learning_rate": 3.5936943133008183e-05, "loss": 0.8448, "step": 3500 }, { "epoch": 1.78, "learning_rate": 3.5865077289662114e-05, "loss": 0.7162, "step": 3510 }, { "epoch": 1.79, "learning_rate": 3.5793100584122426e-05, "loss": 0.7949, "step": 3520 }, { "epoch": 1.79, "learning_rate": 3.572101375080594e-05, "loss": 0.8263, "step": 3530 }, { "epoch": 1.8, "learning_rate": 3.564881752525317e-05, "loss": 0.9174, "step": 3540 }, { "epoch": 1.8, "learning_rate": 3.5576512644120804e-05, "loss": 0.8188, "step": 3550 }, { "epoch": 1.81, "learning_rate": 3.550409984517421e-05, "loss": 0.81, "step": 3560 }, { "epoch": 1.81, "learning_rate": 3.5431579867279905e-05, "loss": 0.8592, "step": 3570 }, { "epoch": 1.82, "learning_rate": 3.5358953450397995e-05, "loss": 0.8419, "step": 3580 }, { "epoch": 1.82, "learning_rate": 3.528622133557465e-05, "loss": 0.7349, "step": 3590 }, { "epoch": 1.83, "learning_rate": 3.521338426493453e-05, "loss": 0.8005, "step": 3600 }, { "epoch": 1.83, "learning_rate": 3.514044298167322e-05, "loss": 0.7567, "step": 3610 }, { "epoch": 1.84, "learning_rate": 3.506739823004963e-05, "loss": 0.7951, "step": 3620 }, { "epoch": 1.84, "learning_rate": 3.4994250755378434e-05, "loss": 0.8423, "step": 3630 }, { "epoch": 1.85, "learning_rate": 3.492100130402242e-05, "loss": 0.844, "step": 3640 }, { "epoch": 1.85, "learning_rate": 3.4847650623384914e-05, "loss": 0.8515, "step": 3650 }, { "epoch": 1.86, "learning_rate": 3.477419946190213e-05, "loss": 0.906, "step": 3660 }, { "epoch": 1.87, "learning_rate": 3.470064856903555e-05, "loss": 0.8309, "step": 3670 }, { "epoch": 1.87, "learning_rate": 3.462699869526427e-05, "loss": 0.8666, "step": 3680 }, { "epoch": 1.88, "learning_rate": 3.455325059207732e-05, "loss": 0.7585, "step": 3690 }, { "epoch": 1.88, "learning_rate": 3.4479405011966056e-05, "loss": 0.8133, "step": 3700 }, { "epoch": 1.89, "learning_rate": 3.440546270841639e-05, "loss": 0.9355, "step": 3710 }, { "epoch": 1.89, "learning_rate": 3.4331424435901214e-05, "loss": 0.7332, "step": 3720 }, { "epoch": 1.9, "learning_rate": 3.4257290949872614e-05, "loss": 0.8603, "step": 3730 }, { "epoch": 1.9, "learning_rate": 3.418306300675416e-05, "loss": 0.8269, "step": 3740 }, { "epoch": 1.91, "learning_rate": 3.410874136393327e-05, "loss": 0.799, "step": 3750 }, { "epoch": 1.91, "learning_rate": 3.403432677975341e-05, "loss": 0.8898, "step": 3760 }, { "epoch": 1.92, "learning_rate": 3.395982001350637e-05, "loss": 0.7441, "step": 3770 }, { "epoch": 1.92, "learning_rate": 3.3885221825424537e-05, "loss": 0.8466, "step": 3780 }, { "epoch": 1.93, "learning_rate": 3.381053297667309e-05, "loss": 0.8273, "step": 3790 }, { "epoch": 1.93, "learning_rate": 3.3735754229342326e-05, "loss": 0.8397, "step": 3800 }, { "epoch": 1.94, "learning_rate": 3.3660886346439765e-05, "loss": 0.8455, "step": 3810 }, { "epoch": 1.94, "learning_rate": 3.358593009188247e-05, "loss": 0.8254, "step": 3820 }, { "epoch": 1.95, "learning_rate": 3.351088623048918e-05, "loss": 0.8374, "step": 3830 }, { "epoch": 1.95, "learning_rate": 3.3435755527972536e-05, "loss": 0.781, "step": 3840 }, { "epoch": 1.96, "learning_rate": 3.336053875093128e-05, "loss": 0.8414, "step": 3850 }, { "epoch": 1.96, "learning_rate": 3.32852366668424e-05, "loss": 0.7875, "step": 3860 }, { "epoch": 1.97, "learning_rate": 3.320985004405334e-05, "loss": 0.7889, "step": 3870 }, { "epoch": 1.97, "learning_rate": 3.3134379651774114e-05, "loss": 0.894, "step": 3880 }, { "epoch": 1.98, "learning_rate": 3.30588262600695e-05, "loss": 0.8475, "step": 3890 }, { "epoch": 1.98, "learning_rate": 3.298319063985116e-05, "loss": 0.8024, "step": 3900 }, { "epoch": 1.99, "learning_rate": 3.2907473562869754e-05, "loss": 0.8467, "step": 3910 }, { "epoch": 1.99, "learning_rate": 3.283167580170712e-05, "loss": 0.7829, "step": 3920 }, { "epoch": 2.0, "learning_rate": 3.275579812976835e-05, "loss": 0.8466, "step": 3930 }, { "epoch": 2.0, "learning_rate": 3.2679841321273895e-05, "loss": 0.7958, "step": 3940 }, { "epoch": 2.01, "learning_rate": 3.260380615125171e-05, "loss": 0.7956, "step": 3950 }, { "epoch": 2.01, "learning_rate": 3.252769339552927e-05, "loss": 0.8578, "step": 3960 }, { "epoch": 2.02, "learning_rate": 3.245150383072573e-05, "loss": 0.8806, "step": 3970 }, { "epoch": 2.02, "learning_rate": 3.2375238234243965e-05, "loss": 0.8477, "step": 3980 }, { "epoch": 2.03, "learning_rate": 3.229889738426264e-05, "loss": 0.7173, "step": 3990 }, { "epoch": 2.03, "learning_rate": 3.222248205972827e-05, "loss": 0.8259, "step": 4000 }, { "epoch": 2.04, "learning_rate": 3.2145993040347264e-05, "loss": 0.7454, "step": 4010 }, { "epoch": 2.04, "learning_rate": 3.2069431106577995e-05, "loss": 0.8054, "step": 4020 }, { "epoch": 2.05, "learning_rate": 3.199279703962282e-05, "loss": 0.7146, "step": 4030 }, { "epoch": 2.05, "learning_rate": 3.1916091621420104e-05, "loss": 0.8322, "step": 4040 }, { "epoch": 2.06, "learning_rate": 3.183931563463624e-05, "loss": 0.7718, "step": 4050 }, { "epoch": 2.06, "learning_rate": 3.176246986265767e-05, "loss": 0.9118, "step": 4060 }, { "epoch": 2.07, "learning_rate": 3.1685555089582906e-05, "loss": 0.8052, "step": 4070 }, { "epoch": 2.07, "learning_rate": 3.1608572100214526e-05, "loss": 0.8209, "step": 4080 }, { "epoch": 2.08, "learning_rate": 3.15315216800511e-05, "loss": 0.7682, "step": 4090 }, { "epoch": 2.08, "learning_rate": 3.145440461527929e-05, "loss": 0.8159, "step": 4100 }, { "epoch": 2.09, "learning_rate": 3.137722169276574e-05, "loss": 0.8396, "step": 4110 }, { "epoch": 2.09, "learning_rate": 3.129997370004909e-05, "loss": 0.7799, "step": 4120 }, { "epoch": 2.1, "learning_rate": 3.122266142533191e-05, "loss": 0.8488, "step": 4130 }, { "epoch": 2.1, "learning_rate": 3.114528565747268e-05, "loss": 0.7617, "step": 4140 }, { "epoch": 2.11, "learning_rate": 3.1067847185977735e-05, "loss": 0.8345, "step": 4150 }, { "epoch": 2.11, "learning_rate": 3.099034680099321e-05, "loss": 0.7212, "step": 4160 }, { "epoch": 2.12, "learning_rate": 3.091278529329698e-05, "loss": 0.8031, "step": 4170 }, { "epoch": 2.12, "learning_rate": 3.0835163454290574e-05, "loss": 0.8333, "step": 4180 }, { "epoch": 2.13, "learning_rate": 3.075748207599114e-05, "loss": 0.7761, "step": 4190 }, { "epoch": 2.13, "learning_rate": 3.06797419510233e-05, "loss": 0.7531, "step": 4200 }, { "epoch": 2.14, "learning_rate": 3.060194387261114e-05, "loss": 0.8292, "step": 4210 }, { "epoch": 2.14, "learning_rate": 3.0524088634570035e-05, "loss": 0.82, "step": 4220 }, { "epoch": 2.15, "learning_rate": 3.0446177031298627e-05, "loss": 0.8561, "step": 4230 }, { "epoch": 2.15, "learning_rate": 3.036820985777067e-05, "loss": 0.9112, "step": 4240 }, { "epoch": 2.16, "learning_rate": 3.0290187909526914e-05, "loss": 0.8364, "step": 4250 }, { "epoch": 2.16, "learning_rate": 3.0212111982667024e-05, "loss": 0.7643, "step": 4260 }, { "epoch": 2.17, "learning_rate": 3.013398287384144e-05, "loss": 0.8117, "step": 4270 }, { "epoch": 2.18, "learning_rate": 3.0055801380243224e-05, "loss": 0.8721, "step": 4280 }, { "epoch": 2.18, "learning_rate": 2.9977568299599973e-05, "loss": 0.76, "step": 4290 }, { "epoch": 2.19, "learning_rate": 2.989928443016564e-05, "loss": 0.7813, "step": 4300 }, { "epoch": 2.19, "learning_rate": 2.9820950570712414e-05, "loss": 0.8918, "step": 4310 }, { "epoch": 2.2, "learning_rate": 2.9742567520522534e-05, "loss": 0.8043, "step": 4320 }, { "epoch": 2.2, "learning_rate": 2.966413607938019e-05, "loss": 0.8443, "step": 4330 }, { "epoch": 2.21, "learning_rate": 2.9585657047563315e-05, "loss": 0.7935, "step": 4340 }, { "epoch": 2.21, "learning_rate": 2.9507131225835432e-05, "loss": 0.7864, "step": 4350 }, { "epoch": 2.22, "learning_rate": 2.9428559415437496e-05, "loss": 0.8375, "step": 4360 }, { "epoch": 2.22, "learning_rate": 2.93499424180797e-05, "loss": 0.8113, "step": 4370 }, { "epoch": 2.23, "learning_rate": 2.9271281035933313e-05, "loss": 0.7886, "step": 4380 }, { "epoch": 2.23, "learning_rate": 2.9192576071622473e-05, "loss": 0.9166, "step": 4390 }, { "epoch": 2.24, "learning_rate": 2.9113828328216027e-05, "loss": 0.8631, "step": 4400 }, { "epoch": 2.24, "learning_rate": 2.9035038609219306e-05, "loss": 0.861, "step": 4410 }, { "epoch": 2.25, "learning_rate": 2.8956207718565942e-05, "loss": 0.8465, "step": 4420 }, { "epoch": 2.25, "learning_rate": 2.8877336460609673e-05, "loss": 0.7999, "step": 4430 }, { "epoch": 2.26, "learning_rate": 2.879842564011612e-05, "loss": 0.8585, "step": 4440 }, { "epoch": 2.26, "learning_rate": 2.871947606225458e-05, "loss": 0.885, "step": 4450 }, { "epoch": 2.27, "learning_rate": 2.8640488532589803e-05, "loss": 0.736, "step": 4460 }, { "epoch": 2.27, "learning_rate": 2.8561463857073804e-05, "loss": 0.7454, "step": 4470 }, { "epoch": 2.28, "learning_rate": 2.8482402842037614e-05, "loss": 0.8043, "step": 4480 }, { "epoch": 2.28, "learning_rate": 2.8403306294183026e-05, "loss": 0.74, "step": 4490 }, { "epoch": 2.29, "learning_rate": 2.8324175020574424e-05, "loss": 0.8533, "step": 4500 }, { "epoch": 2.29, "learning_rate": 2.8245009828630502e-05, "loss": 0.735, "step": 4510 }, { "epoch": 2.3, "learning_rate": 2.816581152611606e-05, "loss": 0.867, "step": 4520 }, { "epoch": 2.3, "learning_rate": 2.808658092113372e-05, "loss": 0.8848, "step": 4530 }, { "epoch": 2.31, "learning_rate": 2.8007318822115713e-05, "loss": 0.7563, "step": 4540 }, { "epoch": 2.31, "learning_rate": 2.792802603781562e-05, "loss": 0.762, "step": 4550 }, { "epoch": 2.32, "learning_rate": 2.7848703377300118e-05, "loss": 0.7755, "step": 4560 }, { "epoch": 2.32, "learning_rate": 2.776935164994074e-05, "loss": 0.8489, "step": 4570 }, { "epoch": 2.33, "learning_rate": 2.7689971665405578e-05, "loss": 0.7536, "step": 4580 }, { "epoch": 2.33, "learning_rate": 2.761056423365107e-05, "loss": 0.7741, "step": 4590 }, { "epoch": 2.34, "learning_rate": 2.7531130164913703e-05, "loss": 0.7624, "step": 4600 }, { "epoch": 2.34, "learning_rate": 2.7451670269701767e-05, "loss": 0.8003, "step": 4610 }, { "epoch": 2.35, "learning_rate": 2.737218535878705e-05, "loss": 0.8823, "step": 4620 }, { "epoch": 2.35, "learning_rate": 2.7292676243196608e-05, "loss": 0.8301, "step": 4630 }, { "epoch": 2.36, "learning_rate": 2.7213143734204462e-05, "loss": 0.8486, "step": 4640 }, { "epoch": 2.36, "learning_rate": 2.7133588643323334e-05, "loss": 0.7807, "step": 4650 }, { "epoch": 2.37, "learning_rate": 2.7054011782296356e-05, "loss": 0.8104, "step": 4660 }, { "epoch": 2.37, "learning_rate": 2.6974413963088797e-05, "loss": 0.847, "step": 4670 }, { "epoch": 2.38, "learning_rate": 2.6894795997879762e-05, "loss": 0.8375, "step": 4680 }, { "epoch": 2.38, "learning_rate": 2.6815158699053932e-05, "loss": 0.834, "step": 4690 }, { "epoch": 2.39, "learning_rate": 2.6735502879193264e-05, "loss": 0.7997, "step": 4700 }, { "epoch": 2.39, "learning_rate": 2.665582935106866e-05, "loss": 0.7941, "step": 4710 }, { "epoch": 2.4, "learning_rate": 2.6576138927631742e-05, "loss": 0.8244, "step": 4720 }, { "epoch": 2.4, "learning_rate": 2.6496432422006522e-05, "loss": 0.8158, "step": 4730 }, { "epoch": 2.41, "learning_rate": 2.641671064748109e-05, "loss": 0.8289, "step": 4740 }, { "epoch": 2.41, "learning_rate": 2.633697441749935e-05, "loss": 0.8029, "step": 4750 }, { "epoch": 2.42, "learning_rate": 2.6257224545652688e-05, "loss": 0.8135, "step": 4760 }, { "epoch": 2.42, "learning_rate": 2.6177461845671685e-05, "loss": 0.8097, "step": 4770 }, { "epoch": 2.43, "learning_rate": 2.6097687131417843e-05, "loss": 0.8128, "step": 4780 }, { "epoch": 2.43, "learning_rate": 2.6017901216875217e-05, "loss": 0.8145, "step": 4790 }, { "epoch": 2.44, "learning_rate": 2.5938104916142155e-05, "loss": 0.7725, "step": 4800 }, { "epoch": 2.44, "learning_rate": 2.585829904342299e-05, "loss": 0.8902, "step": 4810 }, { "epoch": 2.45, "learning_rate": 2.577848441301971e-05, "loss": 0.8069, "step": 4820 }, { "epoch": 2.45, "learning_rate": 2.569866183932368e-05, "loss": 0.781, "step": 4830 }, { "epoch": 2.46, "learning_rate": 2.5618832136807297e-05, "loss": 0.7496, "step": 4840 }, { "epoch": 2.46, "learning_rate": 2.553899612001571e-05, "loss": 0.8554, "step": 4850 }, { "epoch": 2.47, "learning_rate": 2.5459154603558483e-05, "loss": 0.8187, "step": 4860 }, { "epoch": 2.47, "learning_rate": 2.5379308402101303e-05, "loss": 0.7848, "step": 4870 }, { "epoch": 2.48, "learning_rate": 2.529945833035767e-05, "loss": 0.7408, "step": 4880 }, { "epoch": 2.49, "learning_rate": 2.521960520308056e-05, "loss": 0.7655, "step": 4890 }, { "epoch": 2.49, "learning_rate": 2.5139749835054123e-05, "loss": 0.7614, "step": 4900 }, { "epoch": 2.5, "learning_rate": 2.5059893041085392e-05, "loss": 0.7382, "step": 4910 }, { "epoch": 2.5, "learning_rate": 2.4980035635995943e-05, "loss": 0.7321, "step": 4920 }, { "epoch": 2.51, "learning_rate": 2.4900178434613566e-05, "loss": 0.7464, "step": 4930 }, { "epoch": 2.51, "learning_rate": 2.4820322251764e-05, "loss": 0.7925, "step": 4940 }, { "epoch": 2.52, "learning_rate": 2.4740467902262583e-05, "loss": 0.8016, "step": 4950 }, { "epoch": 2.52, "learning_rate": 2.466061620090594e-05, "loss": 0.8147, "step": 4960 }, { "epoch": 2.53, "learning_rate": 2.4580767962463687e-05, "loss": 0.8129, "step": 4970 }, { "epoch": 2.53, "learning_rate": 2.4500924001670088e-05, "loss": 0.8099, "step": 4980 }, { "epoch": 2.54, "learning_rate": 2.4421085133215787e-05, "loss": 0.8304, "step": 4990 }, { "epoch": 2.54, "learning_rate": 2.4341252171739436e-05, "loss": 0.9241, "step": 5000 }, { "epoch": 2.55, "learning_rate": 2.4261425931819437e-05, "loss": 0.7888, "step": 5010 }, { "epoch": 2.55, "learning_rate": 2.4181607227965604e-05, "loss": 0.8431, "step": 5020 }, { "epoch": 2.56, "learning_rate": 2.4101796874610855e-05, "loss": 0.7654, "step": 5030 }, { "epoch": 2.56, "learning_rate": 2.40219956861029e-05, "loss": 0.8724, "step": 5040 }, { "epoch": 2.57, "learning_rate": 2.3942204476695943e-05, "loss": 0.9028, "step": 5050 }, { "epoch": 2.57, "learning_rate": 2.3862424060542357e-05, "loss": 0.7866, "step": 5060 }, { "epoch": 2.58, "learning_rate": 2.3782655251684394e-05, "loss": 0.8155, "step": 5070 }, { "epoch": 2.58, "learning_rate": 2.3702898864045876e-05, "loss": 0.766, "step": 5080 }, { "epoch": 2.59, "learning_rate": 2.362315571142385e-05, "loss": 0.7741, "step": 5090 }, { "epoch": 2.59, "learning_rate": 2.3543426607480364e-05, "loss": 0.8394, "step": 5100 }, { "epoch": 2.6, "learning_rate": 2.346371236573409e-05, "loss": 0.8572, "step": 5110 }, { "epoch": 2.6, "learning_rate": 2.3384013799552072e-05, "loss": 0.8239, "step": 5120 }, { "epoch": 2.61, "learning_rate": 2.3304331722141393e-05, "loss": 0.7008, "step": 5130 }, { "epoch": 2.61, "learning_rate": 2.32246669465409e-05, "loss": 0.7752, "step": 5140 }, { "epoch": 2.62, "learning_rate": 2.3145020285612894e-05, "loss": 0.7641, "step": 5150 }, { "epoch": 2.62, "learning_rate": 2.3065392552034857e-05, "loss": 0.8388, "step": 5160 }, { "epoch": 2.63, "learning_rate": 2.298578455829114e-05, "loss": 0.8176, "step": 5170 }, { "epoch": 2.63, "learning_rate": 2.2906197116664653e-05, "loss": 0.7676, "step": 5180 }, { "epoch": 2.64, "learning_rate": 2.282663103922863e-05, "loss": 0.7121, "step": 5190 }, { "epoch": 2.64, "learning_rate": 2.2747087137838307e-05, "loss": 0.7567, "step": 5200 }, { "epoch": 2.65, "learning_rate": 2.2667566224122648e-05, "loss": 0.8355, "step": 5210 }, { "epoch": 2.65, "learning_rate": 2.2588069109476057e-05, "loss": 0.7708, "step": 5220 }, { "epoch": 2.66, "learning_rate": 2.2508596605050107e-05, "loss": 0.8587, "step": 5230 }, { "epoch": 2.66, "learning_rate": 2.2429149521745254e-05, "loss": 0.7971, "step": 5240 }, { "epoch": 2.67, "learning_rate": 2.2349728670202582e-05, "loss": 0.7568, "step": 5250 }, { "epoch": 2.67, "learning_rate": 2.2270334860795497e-05, "loss": 0.7911, "step": 5260 }, { "epoch": 2.68, "learning_rate": 2.2190968903621498e-05, "loss": 0.8176, "step": 5270 }, { "epoch": 2.68, "learning_rate": 2.2111631608493885e-05, "loss": 0.8239, "step": 5280 }, { "epoch": 2.69, "learning_rate": 2.2032323784933505e-05, "loss": 0.8844, "step": 5290 }, { "epoch": 2.69, "learning_rate": 2.1953046242160493e-05, "loss": 0.7868, "step": 5300 }, { "epoch": 2.7, "learning_rate": 2.187379978908601e-05, "loss": 0.8765, "step": 5310 }, { "epoch": 2.7, "learning_rate": 2.1794585234303993e-05, "loss": 0.7886, "step": 5320 }, { "epoch": 2.71, "learning_rate": 2.1715403386082907e-05, "loss": 0.861, "step": 5330 }, { "epoch": 2.71, "learning_rate": 2.1636255052357497e-05, "loss": 0.8678, "step": 5340 }, { "epoch": 2.72, "learning_rate": 2.1557141040720515e-05, "loss": 0.8169, "step": 5350 }, { "epoch": 2.72, "learning_rate": 2.147806215841454e-05, "loss": 0.7597, "step": 5360 }, { "epoch": 2.73, "learning_rate": 2.1399019212323697e-05, "loss": 0.8513, "step": 5370 }, { "epoch": 2.73, "learning_rate": 2.1320013008965432e-05, "loss": 0.7976, "step": 5380 }, { "epoch": 2.74, "learning_rate": 2.124104435448228e-05, "loss": 0.7306, "step": 5390 }, { "epoch": 2.74, "learning_rate": 2.1162114054633663e-05, "loss": 0.8193, "step": 5400 }, { "epoch": 2.75, "learning_rate": 2.1083222914787623e-05, "loss": 0.8915, "step": 5410 }, { "epoch": 2.75, "learning_rate": 2.1004371739912654e-05, "loss": 0.7684, "step": 5420 }, { "epoch": 2.76, "learning_rate": 2.0925561334569464e-05, "loss": 0.7708, "step": 5430 }, { "epoch": 2.76, "learning_rate": 2.0846792502902753e-05, "loss": 0.7513, "step": 5440 }, { "epoch": 2.77, "learning_rate": 2.0768066048633033e-05, "loss": 0.7225, "step": 5450 }, { "epoch": 2.77, "learning_rate": 2.0689382775048418e-05, "loss": 0.7696, "step": 5460 }, { "epoch": 2.78, "learning_rate": 2.061074348499642e-05, "loss": 0.751, "step": 5470 }, { "epoch": 2.78, "learning_rate": 2.0532148980875768e-05, "loss": 0.7263, "step": 5480 }, { "epoch": 2.79, "learning_rate": 2.045360006462822e-05, "loss": 0.8106, "step": 5490 }, { "epoch": 2.8, "learning_rate": 2.037509753773037e-05, "loss": 0.7924, "step": 5500 }, { "epoch": 2.8, "learning_rate": 2.0296642201185473e-05, "loss": 0.8711, "step": 5510 }, { "epoch": 2.81, "learning_rate": 2.02182348555153e-05, "loss": 0.8576, "step": 5520 }, { "epoch": 2.81, "learning_rate": 2.0139876300751904e-05, "loss": 0.8587, "step": 5530 }, { "epoch": 2.82, "learning_rate": 2.0061567336429527e-05, "loss": 0.8752, "step": 5540 }, { "epoch": 2.82, "learning_rate": 1.9983308761576407e-05, "loss": 0.8727, "step": 5550 }, { "epoch": 2.83, "learning_rate": 1.990510137470664e-05, "loss": 0.7785, "step": 5560 }, { "epoch": 2.83, "learning_rate": 1.9826945973812005e-05, "loss": 0.7669, "step": 5570 }, { "epoch": 2.84, "learning_rate": 1.9748843356353856e-05, "loss": 0.8083, "step": 5580 }, { "epoch": 2.84, "learning_rate": 1.9670794319254963e-05, "loss": 0.813, "step": 5590 }, { "epoch": 2.85, "learning_rate": 1.9592799658891385e-05, "loss": 0.815, "step": 5600 }, { "epoch": 2.85, "learning_rate": 1.951486017108436e-05, "loss": 0.9141, "step": 5610 }, { "epoch": 2.86, "learning_rate": 1.9436976651092144e-05, "loss": 0.7419, "step": 5620 }, { "epoch": 2.86, "learning_rate": 1.9359149893601944e-05, "loss": 0.8557, "step": 5630 }, { "epoch": 2.87, "learning_rate": 1.9281380692721786e-05, "loss": 0.871, "step": 5640 }, { "epoch": 2.87, "learning_rate": 1.9203669841972416e-05, "loss": 0.8396, "step": 5650 }, { "epoch": 2.88, "learning_rate": 1.9126018134279193e-05, "loss": 0.8437, "step": 5660 }, { "epoch": 2.88, "learning_rate": 1.904842636196402e-05, "loss": 0.7932, "step": 5670 }, { "epoch": 2.89, "learning_rate": 1.8970895316737238e-05, "loss": 0.6945, "step": 5680 }, { "epoch": 2.89, "learning_rate": 1.8893425789689575e-05, "loss": 0.738, "step": 5690 }, { "epoch": 2.9, "learning_rate": 1.8816018571284017e-05, "loss": 0.7109, "step": 5700 }, { "epoch": 2.9, "learning_rate": 1.8738674451347818e-05, "loss": 0.8739, "step": 5710 }, { "epoch": 2.91, "learning_rate": 1.866139421906439e-05, "loss": 0.8342, "step": 5720 }, { "epoch": 2.91, "learning_rate": 1.858417866296528e-05, "loss": 0.7657, "step": 5730 }, { "epoch": 2.92, "learning_rate": 1.850702857092208e-05, "loss": 0.8464, "step": 5740 }, { "epoch": 2.92, "learning_rate": 1.8429944730138448e-05, "loss": 0.7853, "step": 5750 }, { "epoch": 2.93, "learning_rate": 1.8352927927142026e-05, "loss": 0.7642, "step": 5760 }, { "epoch": 2.93, "learning_rate": 1.8275978947776436e-05, "loss": 0.8316, "step": 5770 }, { "epoch": 2.94, "learning_rate": 1.819909857719328e-05, "loss": 0.8784, "step": 5780 }, { "epoch": 2.94, "learning_rate": 1.8122287599844066e-05, "loss": 0.7567, "step": 5790 }, { "epoch": 2.95, "learning_rate": 1.8045546799472286e-05, "loss": 0.7579, "step": 5800 }, { "epoch": 2.95, "learning_rate": 1.796887695910535e-05, "loss": 0.7957, "step": 5810 }, { "epoch": 2.96, "learning_rate": 1.7892278861046648e-05, "loss": 0.7458, "step": 5820 }, { "epoch": 2.96, "learning_rate": 1.7815753286867533e-05, "loss": 0.7479, "step": 5830 }, { "epoch": 2.97, "learning_rate": 1.7739301017399355e-05, "loss": 0.7617, "step": 5840 }, { "epoch": 2.97, "learning_rate": 1.7662922832725514e-05, "loss": 0.8818, "step": 5850 }, { "epoch": 2.98, "learning_rate": 1.7586619512173458e-05, "loss": 0.8075, "step": 5860 }, { "epoch": 2.98, "learning_rate": 1.751039183430678e-05, "loss": 0.786, "step": 5870 }, { "epoch": 2.99, "learning_rate": 1.7434240576917226e-05, "loss": 0.8369, "step": 5880 }, { "epoch": 2.99, "learning_rate": 1.735816651701681e-05, "loss": 0.8883, "step": 5890 }, { "epoch": 3.0, "learning_rate": 1.7282170430829837e-05, "loss": 0.6625, "step": 5900 }, { "epoch": 3.0, "learning_rate": 1.7206253093785012e-05, "loss": 0.838, "step": 5910 }, { "epoch": 3.01, "learning_rate": 1.713041528050753e-05, "loss": 0.6941, "step": 5920 }, { "epoch": 3.01, "learning_rate": 1.705465776481114e-05, "loss": 0.7716, "step": 5930 }, { "epoch": 3.02, "learning_rate": 1.6978981319690298e-05, "loss": 0.8099, "step": 5940 }, { "epoch": 3.02, "learning_rate": 1.6903386717312236e-05, "loss": 0.7046, "step": 5950 }, { "epoch": 3.03, "learning_rate": 1.682787472900912e-05, "loss": 0.8008, "step": 5960 }, { "epoch": 3.03, "learning_rate": 1.6752446125270117e-05, "loss": 0.8587, "step": 5970 }, { "epoch": 3.04, "learning_rate": 1.6677101675733625e-05, "loss": 0.8083, "step": 5980 }, { "epoch": 3.04, "learning_rate": 1.6601842149179347e-05, "loss": 0.7906, "step": 5990 }, { "epoch": 3.05, "learning_rate": 1.6526668313520478e-05, "loss": 0.7089, "step": 6000 }, { "epoch": 3.05, "learning_rate": 1.6451580935795863e-05, "loss": 0.8412, "step": 6010 }, { "epoch": 3.06, "learning_rate": 1.637658078216217e-05, "loss": 0.7232, "step": 6020 }, { "epoch": 3.06, "learning_rate": 1.6301668617886072e-05, "loss": 0.8772, "step": 6030 }, { "epoch": 3.07, "learning_rate": 1.622684520733644e-05, "loss": 0.8135, "step": 6040 }, { "epoch": 3.07, "learning_rate": 1.615211131397654e-05, "loss": 0.7315, "step": 6050 }, { "epoch": 3.08, "learning_rate": 1.6077467700356256e-05, "loss": 0.8048, "step": 6060 }, { "epoch": 3.08, "learning_rate": 1.6002915128104284e-05, "loss": 0.684, "step": 6070 }, { "epoch": 3.09, "learning_rate": 1.592845435792039e-05, "loss": 0.7887, "step": 6080 }, { "epoch": 3.09, "learning_rate": 1.585408614956763e-05, "loss": 0.7469, "step": 6090 }, { "epoch": 3.1, "learning_rate": 1.5779811261864604e-05, "loss": 0.7575, "step": 6100 }, { "epoch": 3.11, "learning_rate": 1.5705630452677707e-05, "loss": 0.7354, "step": 6110 }, { "epoch": 3.11, "learning_rate": 1.56315444789134e-05, "loss": 0.7734, "step": 6120 }, { "epoch": 3.12, "learning_rate": 1.555755409651049e-05, "loss": 0.804, "step": 6130 }, { "epoch": 3.12, "learning_rate": 1.5483660060432432e-05, "loss": 0.7408, "step": 6140 }, { "epoch": 3.13, "learning_rate": 1.5409863124659562e-05, "loss": 0.819, "step": 6150 }, { "epoch": 3.13, "learning_rate": 1.5336164042181494e-05, "loss": 0.7686, "step": 6160 }, { "epoch": 3.14, "learning_rate": 1.5262563564989374e-05, "loss": 0.8118, "step": 6170 }, { "epoch": 3.14, "learning_rate": 1.5189062444068225e-05, "loss": 0.7973, "step": 6180 }, { "epoch": 3.15, "learning_rate": 1.5115661429389294e-05, "loss": 0.7622, "step": 6190 }, { "epoch": 3.15, "learning_rate": 1.5042361269902383e-05, "loss": 0.875, "step": 6200 }, { "epoch": 3.16, "learning_rate": 1.4969162713528212e-05, "loss": 0.7767, "step": 6210 }, { "epoch": 3.16, "learning_rate": 1.4896066507150804e-05, "loss": 0.7193, "step": 6220 }, { "epoch": 3.17, "learning_rate": 1.482307339660983e-05, "loss": 0.7978, "step": 6230 }, { "epoch": 3.17, "learning_rate": 1.4750184126693028e-05, "loss": 0.7754, "step": 6240 }, { "epoch": 3.18, "learning_rate": 1.4677399441128603e-05, "loss": 0.8816, "step": 6250 }, { "epoch": 3.18, "learning_rate": 1.4604720082577622e-05, "loss": 0.8264, "step": 6260 }, { "epoch": 3.19, "learning_rate": 1.4532146792626449e-05, "loss": 0.7528, "step": 6270 }, { "epoch": 3.19, "learning_rate": 1.4459680311779159e-05, "loss": 0.7898, "step": 6280 }, { "epoch": 3.2, "learning_rate": 1.438732137945001e-05, "loss": 0.8136, "step": 6290 }, { "epoch": 3.2, "learning_rate": 1.4315070733955888e-05, "loss": 0.7874, "step": 6300 }, { "epoch": 3.21, "learning_rate": 1.4242929112508769e-05, "loss": 0.7637, "step": 6310 }, { "epoch": 3.21, "learning_rate": 1.417089725120817e-05, "loss": 0.8007, "step": 6320 }, { "epoch": 3.22, "learning_rate": 1.4098975885033713e-05, "loss": 0.7594, "step": 6330 }, { "epoch": 3.22, "learning_rate": 1.4027165747837548e-05, "loss": 0.7395, "step": 6340 }, { "epoch": 3.23, "learning_rate": 1.3955467572336905e-05, "loss": 0.7882, "step": 6350 }, { "epoch": 3.23, "learning_rate": 1.3883882090106611e-05, "loss": 0.7563, "step": 6360 }, { "epoch": 3.24, "learning_rate": 1.381241003157162e-05, "loss": 0.7223, "step": 6370 }, { "epoch": 3.24, "learning_rate": 1.3741052125999564e-05, "loss": 0.7144, "step": 6380 }, { "epoch": 3.25, "learning_rate": 1.3669809101493314e-05, "loss": 0.7936, "step": 6390 }, { "epoch": 3.25, "learning_rate": 1.3605789203581502e-05, "loss": 0.7189, "step": 6400 }, { "epoch": 3.26, "learning_rate": 1.3534766454819494e-05, "loss": 0.7533, "step": 6410 }, { "epoch": 3.26, "learning_rate": 1.3463860691966308e-05, "loss": 0.7467, "step": 6420 }, { "epoch": 3.27, "learning_rate": 1.3393072638511351e-05, "loss": 0.7655, "step": 6430 }, { "epoch": 3.27, "learning_rate": 1.332240301674299e-05, "loss": 0.7911, "step": 6440 }, { "epoch": 3.28, "learning_rate": 1.3251852547741161e-05, "loss": 0.8208, "step": 6450 }, { "epoch": 3.28, "learning_rate": 1.3181421951370035e-05, "loss": 0.8012, "step": 6460 }, { "epoch": 3.29, "learning_rate": 1.311111194627064e-05, "loss": 0.8289, "step": 6470 }, { "epoch": 3.29, "learning_rate": 1.304092324985356e-05, "loss": 0.7399, "step": 6480 }, { "epoch": 3.3, "learning_rate": 1.2970856578291598e-05, "loss": 0.7232, "step": 6490 }, { "epoch": 3.3, "learning_rate": 1.290091264651247e-05, "loss": 0.8301, "step": 6500 }, { "epoch": 3.31, "learning_rate": 1.2831092168191517e-05, "loss": 0.7985, "step": 6510 }, { "epoch": 3.31, "learning_rate": 1.2761395855744408e-05, "loss": 0.7452, "step": 6520 }, { "epoch": 3.32, "learning_rate": 1.2691824420319895e-05, "loss": 0.8126, "step": 6530 }, { "epoch": 3.32, "learning_rate": 1.2622378571792535e-05, "loss": 0.8346, "step": 6540 }, { "epoch": 3.33, "learning_rate": 1.2553059018755454e-05, "loss": 0.7807, "step": 6550 }, { "epoch": 3.33, "learning_rate": 1.2483866468513125e-05, "loss": 0.8407, "step": 6560 }, { "epoch": 3.34, "learning_rate": 1.2414801627074144e-05, "loss": 0.8276, "step": 6570 }, { "epoch": 3.34, "learning_rate": 1.2345865199144e-05, "loss": 0.7426, "step": 6580 }, { "epoch": 3.35, "learning_rate": 1.2277057888117944e-05, "loss": 0.7461, "step": 6590 }, { "epoch": 3.35, "learning_rate": 1.220838039607376e-05, "loss": 0.7221, "step": 6600 }, { "epoch": 3.36, "learning_rate": 1.2139833423764626e-05, "loss": 0.8026, "step": 6610 }, { "epoch": 3.36, "learning_rate": 1.2071417670611959e-05, "loss": 0.7952, "step": 6620 }, { "epoch": 3.37, "learning_rate": 1.2003133834698268e-05, "loss": 0.7638, "step": 6630 }, { "epoch": 3.37, "learning_rate": 1.1934982612760049e-05, "loss": 0.7576, "step": 6640 }, { "epoch": 3.38, "learning_rate": 1.186696470018066e-05, "loss": 0.7676, "step": 6650 }, { "epoch": 3.38, "learning_rate": 1.1799080790983246e-05, "loss": 0.7482, "step": 6660 }, { "epoch": 3.39, "learning_rate": 1.1731331577823617e-05, "loss": 0.8294, "step": 6670 }, { "epoch": 3.39, "learning_rate": 1.1663717751983228e-05, "loss": 0.8052, "step": 6680 }, { "epoch": 3.4, "learning_rate": 1.15962400033621e-05, "loss": 0.844, "step": 6690 }, { "epoch": 3.4, "learning_rate": 1.152889902047179e-05, "loss": 0.7958, "step": 6700 }, { "epoch": 3.41, "learning_rate": 1.1461695490428352e-05, "loss": 0.7427, "step": 6710 }, { "epoch": 3.42, "learning_rate": 1.1394630098945342e-05, "loss": 0.7311, "step": 6720 }, { "epoch": 3.42, "learning_rate": 1.1327703530326811e-05, "loss": 0.8388, "step": 6730 }, { "epoch": 3.43, "learning_rate": 1.1260916467460336e-05, "loss": 0.768, "step": 6740 }, { "epoch": 3.43, "learning_rate": 1.1194269591810018e-05, "loss": 0.7228, "step": 6750 }, { "epoch": 3.44, "learning_rate": 1.1127763583409576e-05, "loss": 0.8205, "step": 6760 }, { "epoch": 3.44, "learning_rate": 1.1061399120855375e-05, "loss": 0.6815, "step": 6770 }, { "epoch": 3.45, "learning_rate": 1.0995176881299515e-05, "loss": 0.7285, "step": 6780 }, { "epoch": 3.45, "learning_rate": 1.092909754044292e-05, "loss": 0.7713, "step": 6790 }, { "epoch": 3.46, "learning_rate": 1.086316177252844e-05, "loss": 0.8213, "step": 6800 }, { "epoch": 3.46, "learning_rate": 1.0797370250333975e-05, "loss": 0.8498, "step": 6810 }, { "epoch": 3.47, "learning_rate": 1.0731723645165603e-05, "loss": 0.6787, "step": 6820 }, { "epoch": 3.47, "learning_rate": 1.0666222626850752e-05, "loss": 0.8175, "step": 6830 }, { "epoch": 3.48, "learning_rate": 1.0600867863731321e-05, "loss": 0.8446, "step": 6840 }, { "epoch": 3.48, "learning_rate": 1.0535660022656915e-05, "loss": 0.7999, "step": 6850 }, { "epoch": 3.49, "learning_rate": 1.0470599768978004e-05, "loss": 0.7724, "step": 6860 }, { "epoch": 3.49, "learning_rate": 1.040568776653915e-05, "loss": 0.8615, "step": 6870 }, { "epoch": 3.5, "learning_rate": 1.0340924677672223e-05, "loss": 0.8512, "step": 6880 }, { "epoch": 3.5, "learning_rate": 1.0276311163189647e-05, "loss": 0.8195, "step": 6890 }, { "epoch": 3.51, "learning_rate": 1.021184788237767e-05, "loss": 0.7919, "step": 6900 }, { "epoch": 3.51, "learning_rate": 1.0147535492989613e-05, "loss": 0.736, "step": 6910 }, { "epoch": 3.52, "learning_rate": 1.0083374651239172e-05, "loss": 0.8283, "step": 6920 }, { "epoch": 3.52, "learning_rate": 1.0019366011793732e-05, "loss": 0.814, "step": 6930 }, { "epoch": 3.53, "learning_rate": 9.955510227767665e-06, "loss": 0.8149, "step": 6940 }, { "epoch": 3.53, "learning_rate": 9.891807950715682e-06, "loss": 0.7876, "step": 6950 }, { "epoch": 3.54, "learning_rate": 9.82825983062618e-06, "loss": 0.6661, "step": 6960 }, { "epoch": 3.54, "learning_rate": 9.764866515914611e-06, "loss": 0.7429, "step": 6970 }, { "epoch": 3.55, "learning_rate": 9.701628653416867e-06, "loss": 0.7564, "step": 6980 }, { "epoch": 3.55, "learning_rate": 9.638546888382672e-06, "loss": 0.8258, "step": 6990 }, { "epoch": 3.56, "learning_rate": 9.575621864469006e-06, "loss": 0.848, "step": 7000 }, { "epoch": 3.56, "learning_rate": 9.512854223733547e-06, "loss": 0.7468, "step": 7010 }, { "epoch": 3.57, "learning_rate": 9.450244606628082e-06, "loss": 0.7268, "step": 7020 }, { "epoch": 3.57, "learning_rate": 9.38779365199202e-06, "loss": 0.896, "step": 7030 }, { "epoch": 3.58, "learning_rate": 9.325501997045847e-06, "loss": 0.8309, "step": 7040 }, { "epoch": 3.58, "learning_rate": 9.263370277384631e-06, "loss": 0.7935, "step": 7050 }, { "epoch": 3.59, "learning_rate": 9.20139912697153e-06, "loss": 0.7681, "step": 7060 }, { "epoch": 3.59, "learning_rate": 9.139589178131333e-06, "loss": 0.7819, "step": 7070 }, { "epoch": 3.6, "learning_rate": 9.077941061543996e-06, "loss": 0.7642, "step": 7080 }, { "epoch": 3.6, "learning_rate": 9.016455406238222e-06, "loss": 0.6481, "step": 7090 }, { "epoch": 3.61, "learning_rate": 8.955132839585037e-06, "loss": 0.7378, "step": 7100 }, { "epoch": 3.61, "learning_rate": 8.893973987291369e-06, "loss": 0.8364, "step": 7110 }, { "epoch": 3.62, "learning_rate": 8.832979473393693e-06, "loss": 0.7551, "step": 7120 }, { "epoch": 3.62, "learning_rate": 8.772149920251654e-06, "loss": 0.8637, "step": 7130 }, { "epoch": 3.63, "learning_rate": 8.711485948541715e-06, "loss": 0.7805, "step": 7140 }, { "epoch": 3.63, "learning_rate": 8.650988177250812e-06, "loss": 0.8183, "step": 7150 }, { "epoch": 3.64, "learning_rate": 8.590657223670059e-06, "loss": 0.8564, "step": 7160 }, { "epoch": 3.64, "learning_rate": 8.53049370338844e-06, "loss": 0.8067, "step": 7170 }, { "epoch": 3.65, "learning_rate": 8.470498230286523e-06, "loss": 0.7626, "step": 7180 }, { "epoch": 3.65, "learning_rate": 8.410671416530205e-06, "loss": 0.8154, "step": 7190 }, { "epoch": 3.66, "learning_rate": 8.351013872564447e-06, "loss": 0.7763, "step": 7200 }, { "epoch": 3.66, "learning_rate": 8.291526207107084e-06, "loss": 0.7609, "step": 7210 }, { "epoch": 3.67, "learning_rate": 8.232209027142571e-06, "loss": 0.7759, "step": 7220 }, { "epoch": 3.67, "learning_rate": 8.173062937915812e-06, "loss": 0.8389, "step": 7230 }, { "epoch": 3.68, "learning_rate": 8.114088542925993e-06, "loss": 0.7771, "step": 7240 }, { "epoch": 3.68, "learning_rate": 8.055286443920395e-06, "loss": 0.7084, "step": 7250 }, { "epoch": 3.69, "learning_rate": 7.99665724088828e-06, "loss": 0.8226, "step": 7260 }, { "epoch": 3.69, "learning_rate": 7.938201532054768e-06, "loss": 0.7129, "step": 7270 }, { "epoch": 3.7, "learning_rate": 7.879919913874695e-06, "loss": 0.7918, "step": 7280 }, { "epoch": 3.7, "learning_rate": 7.821812981026588e-06, "loss": 0.8319, "step": 7290 }, { "epoch": 3.71, "learning_rate": 7.763881326406552e-06, "loss": 0.7772, "step": 7300 }, { "epoch": 3.71, "learning_rate": 7.706125541122244e-06, "loss": 0.8371, "step": 7310 }, { "epoch": 3.72, "learning_rate": 7.64854621448682e-06, "loss": 0.7229, "step": 7320 }, { "epoch": 3.73, "learning_rate": 7.591143934012942e-06, "loss": 0.8227, "step": 7330 }, { "epoch": 3.73, "learning_rate": 7.5339192854067736e-06, "loss": 0.8326, "step": 7340 }, { "epoch": 3.74, "learning_rate": 7.476872852562003e-06, "loss": 0.7516, "step": 7350 }, { "epoch": 3.74, "learning_rate": 7.420005217553891e-06, "loss": 0.7943, "step": 7360 }, { "epoch": 3.75, "learning_rate": 7.3633169606333265e-06, "loss": 0.8194, "step": 7370 }, { "epoch": 3.75, "learning_rate": 7.306808660220909e-06, "loss": 0.8268, "step": 7380 }, { "epoch": 3.76, "learning_rate": 7.250480892901046e-06, "loss": 0.7529, "step": 7390 }, { "epoch": 3.76, "learning_rate": 7.194334233416069e-06, "loss": 0.7817, "step": 7400 }, { "epoch": 3.77, "learning_rate": 7.138369254660365e-06, "loss": 0.8199, "step": 7410 }, { "epoch": 3.77, "learning_rate": 7.082586527674542e-06, "loss": 0.8484, "step": 7420 }, { "epoch": 3.78, "learning_rate": 7.0269866216395915e-06, "loss": 0.7921, "step": 7430 }, { "epoch": 3.78, "learning_rate": 6.971570103871089e-06, "loss": 0.8236, "step": 7440 }, { "epoch": 3.79, "learning_rate": 6.9163375398134024e-06, "loss": 0.8259, "step": 7450 }, { "epoch": 3.79, "learning_rate": 6.8612894930339065e-06, "loss": 0.8038, "step": 7460 }, { "epoch": 3.8, "learning_rate": 6.806426525217266e-06, "loss": 0.8309, "step": 7470 }, { "epoch": 3.8, "learning_rate": 6.751749196159679e-06, "loss": 0.8441, "step": 7480 }, { "epoch": 3.81, "learning_rate": 6.69725806376317e-06, "loss": 0.7264, "step": 7490 }, { "epoch": 3.81, "learning_rate": 6.6429536840299035e-06, "loss": 0.8848, "step": 7500 }, { "epoch": 3.82, "learning_rate": 6.588836611056507e-06, "loss": 0.8101, "step": 7510 }, { "epoch": 3.82, "learning_rate": 6.534907397028409e-06, "loss": 0.724, "step": 7520 }, { "epoch": 3.83, "learning_rate": 6.481166592214225e-06, "loss": 0.8091, "step": 7530 }, { "epoch": 3.83, "learning_rate": 6.427614744960126e-06, "loss": 0.7131, "step": 7540 }, { "epoch": 3.84, "learning_rate": 6.374252401684233e-06, "loss": 0.7833, "step": 7550 }, { "epoch": 3.84, "learning_rate": 6.32108010687108e-06, "loss": 0.8134, "step": 7560 }, { "epoch": 3.85, "learning_rate": 6.268098403066022e-06, "loss": 0.7773, "step": 7570 }, { "epoch": 3.85, "learning_rate": 6.2153078308697125e-06, "loss": 0.7609, "step": 7580 }, { "epoch": 3.86, "learning_rate": 6.162708928932592e-06, "loss": 0.8762, "step": 7590 }, { "epoch": 3.86, "learning_rate": 6.110302233949383e-06, "loss": 0.7745, "step": 7600 }, { "epoch": 3.87, "learning_rate": 6.05808828065362e-06, "loss": 0.7402, "step": 7610 }, { "epoch": 3.87, "learning_rate": 6.006067601812187e-06, "loss": 0.7638, "step": 7620 }, { "epoch": 3.88, "learning_rate": 5.954240728219898e-06, "loss": 0.7565, "step": 7630 }, { "epoch": 3.88, "learning_rate": 5.902608188694039e-06, "loss": 0.7291, "step": 7640 }, { "epoch": 3.89, "learning_rate": 5.8511705100690314e-06, "loss": 0.7956, "step": 7650 }, { "epoch": 3.89, "learning_rate": 5.79992821719102e-06, "loss": 0.7756, "step": 7660 }, { "epoch": 3.9, "learning_rate": 5.7488818329125114e-06, "loss": 0.8543, "step": 7670 }, { "epoch": 3.9, "learning_rate": 5.698031878087071e-06, "loss": 0.6868, "step": 7680 }, { "epoch": 3.91, "learning_rate": 5.647378871563971e-06, "loss": 0.7887, "step": 7690 }, { "epoch": 3.91, "learning_rate": 5.59692333018293e-06, "loss": 0.7896, "step": 7700 }, { "epoch": 3.92, "learning_rate": 5.546665768768814e-06, "loss": 0.8122, "step": 7710 }, { "epoch": 3.92, "learning_rate": 5.496606700126397e-06, "loss": 0.8105, "step": 7720 }, { "epoch": 3.93, "learning_rate": 5.4467466350351245e-06, "loss": 0.8576, "step": 7730 }, { "epoch": 3.93, "learning_rate": 5.3970860822439045e-06, "loss": 0.8091, "step": 7740 }, { "epoch": 3.94, "learning_rate": 5.347625548465915e-06, "loss": 0.7318, "step": 7750 }, { "epoch": 3.94, "learning_rate": 5.298365538373426e-06, "loss": 0.9173, "step": 7760 }, { "epoch": 3.95, "learning_rate": 5.2493065545926644e-06, "loss": 0.8607, "step": 7770 }, { "epoch": 3.95, "learning_rate": 5.200449097698676e-06, "loss": 0.8263, "step": 7780 }, { "epoch": 3.96, "learning_rate": 5.151793666210217e-06, "loss": 0.8193, "step": 7790 }, { "epoch": 3.96, "learning_rate": 5.103340756584685e-06, "loss": 0.7089, "step": 7800 }, { "epoch": 3.97, "learning_rate": 5.055090863213008e-06, "loss": 0.7857, "step": 7810 }, { "epoch": 3.97, "learning_rate": 5.007044478414658e-06, "loss": 0.7785, "step": 7820 }, { "epoch": 3.98, "learning_rate": 4.9592020924325936e-06, "loss": 0.8344, "step": 7830 }, { "epoch": 3.98, "learning_rate": 4.911564193428259e-06, "loss": 0.8568, "step": 7840 }, { "epoch": 3.99, "learning_rate": 4.864131267476615e-06, "loss": 0.8104, "step": 7850 }, { "epoch": 3.99, "learning_rate": 4.816903798561168e-06, "loss": 0.7698, "step": 7860 }, { "epoch": 4.0, "learning_rate": 4.769882268569037e-06, "loss": 0.7878, "step": 7870 }, { "epoch": 4.0, "learning_rate": 4.723067157286038e-06, "loss": 0.8072, "step": 7880 }, { "epoch": 4.01, "learning_rate": 4.6764589423917895e-06, "loss": 0.7909, "step": 7890 }, { "epoch": 4.01, "learning_rate": 4.630058099454823e-06, "loss": 0.8484, "step": 7900 }, { "epoch": 4.02, "learning_rate": 4.583865101927756e-06, "loss": 0.8064, "step": 7910 }, { "epoch": 4.02, "learning_rate": 4.537880421142443e-06, "loss": 0.7695, "step": 7920 }, { "epoch": 4.03, "learning_rate": 4.492104526305174e-06, "loss": 0.7681, "step": 7930 }, { "epoch": 4.04, "learning_rate": 4.44653788449188e-06, "loss": 0.7337, "step": 7940 }, { "epoch": 4.04, "learning_rate": 4.401180960643375e-06, "loss": 0.7758, "step": 7950 }, { "epoch": 4.05, "learning_rate": 4.3560342175606064e-06, "loss": 0.7611, "step": 7960 }, { "epoch": 4.05, "learning_rate": 4.311098115899936e-06, "loss": 0.762, "step": 7970 }, { "epoch": 4.06, "learning_rate": 4.266373114168445e-06, "loss": 0.7921, "step": 7980 }, { "epoch": 4.06, "learning_rate": 4.22185966871923e-06, "loss": 0.7286, "step": 7990 }, { "epoch": 4.07, "learning_rate": 4.177558233746787e-06, "loss": 0.7583, "step": 8000 }, { "epoch": 4.07, "learning_rate": 4.133469261282341e-06, "loss": 0.8479, "step": 8010 }, { "epoch": 4.08, "learning_rate": 4.089593201189259e-06, "loss": 0.8339, "step": 8020 }, { "epoch": 4.08, "learning_rate": 4.045930501158443e-06, "loss": 0.7481, "step": 8030 }, { "epoch": 4.09, "learning_rate": 4.00248160670377e-06, "loss": 0.8175, "step": 8040 }, { "epoch": 4.09, "learning_rate": 3.959246961157545e-06, "loss": 0.7694, "step": 8050 }, { "epoch": 4.1, "learning_rate": 3.916227005665976e-06, "loss": 0.7131, "step": 8060 }, { "epoch": 4.1, "learning_rate": 3.873422179184677e-06, "loss": 0.7284, "step": 8070 }, { "epoch": 4.11, "learning_rate": 3.83083291847417e-06, "loss": 0.8848, "step": 8080 }, { "epoch": 4.11, "learning_rate": 3.7884596580954668e-06, "loss": 0.7927, "step": 8090 }, { "epoch": 4.12, "learning_rate": 3.7463028304055987e-06, "loss": 0.884, "step": 8100 }, { "epoch": 4.12, "learning_rate": 3.704362865553221e-06, "loss": 0.7089, "step": 8110 }, { "epoch": 4.13, "learning_rate": 3.662640191474223e-06, "loss": 0.8192, "step": 8120 }, { "epoch": 4.13, "learning_rate": 3.621135233887363e-06, "loss": 0.8491, "step": 8130 }, { "epoch": 4.14, "learning_rate": 3.5798484162899105e-06, "loss": 0.8009, "step": 8140 }, { "epoch": 4.14, "learning_rate": 3.5387801599533475e-06, "loss": 0.8679, "step": 8150 }, { "epoch": 4.15, "learning_rate": 3.4979308839190565e-06, "loss": 0.7229, "step": 8160 }, { "epoch": 4.15, "learning_rate": 3.4573010049940403e-06, "loss": 0.8634, "step": 8170 }, { "epoch": 4.16, "learning_rate": 3.4168909377466836e-06, "loss": 0.8256, "step": 8180 }, { "epoch": 4.16, "learning_rate": 3.3767010945025075e-06, "loss": 0.7816, "step": 8190 }, { "epoch": 4.17, "learning_rate": 3.3367318853399775e-06, "loss": 0.7579, "step": 8200 }, { "epoch": 4.17, "learning_rate": 3.296983718086308e-06, "loss": 0.7629, "step": 8210 }, { "epoch": 4.18, "learning_rate": 3.257456998313302e-06, "loss": 0.8061, "step": 8220 }, { "epoch": 4.18, "learning_rate": 3.2181521293332213e-06, "loss": 0.8339, "step": 8230 }, { "epoch": 4.19, "learning_rate": 3.1790695121946627e-06, "loss": 0.677, "step": 8240 }, { "epoch": 4.19, "learning_rate": 3.140209545678463e-06, "loss": 0.7408, "step": 8250 }, { "epoch": 4.2, "learning_rate": 3.101572626293642e-06, "loss": 0.8345, "step": 8260 }, { "epoch": 4.2, "learning_rate": 3.063159148273351e-06, "loss": 0.7823, "step": 8270 }, { "epoch": 4.21, "learning_rate": 3.024969503570843e-06, "loss": 0.8105, "step": 8280 }, { "epoch": 4.21, "learning_rate": 2.9870040818554934e-06, "loss": 0.7489, "step": 8290 }, { "epoch": 4.22, "learning_rate": 2.9492632705087926e-06, "loss": 0.7394, "step": 8300 }, { "epoch": 4.22, "learning_rate": 2.9117474546204283e-06, "loss": 0.8176, "step": 8310 }, { "epoch": 4.23, "learning_rate": 2.874457016984325e-06, "loss": 0.7422, "step": 8320 }, { "epoch": 4.23, "learning_rate": 2.8373923380947657e-06, "loss": 0.7723, "step": 8330 }, { "epoch": 4.24, "learning_rate": 2.80055379614248e-06, "loss": 0.7783, "step": 8340 }, { "epoch": 4.24, "learning_rate": 2.7639417670108165e-06, "loss": 0.8144, "step": 8350 }, { "epoch": 4.25, "learning_rate": 2.7275566242718846e-06, "loss": 0.6772, "step": 8360 }, { "epoch": 4.25, "learning_rate": 2.6913987391827545e-06, "loss": 0.7854, "step": 8370 }, { "epoch": 4.26, "learning_rate": 2.655468480681658e-06, "loss": 0.8284, "step": 8380 }, { "epoch": 4.26, "learning_rate": 2.6197662153842424e-06, "loss": 0.8052, "step": 8390 }, { "epoch": 4.27, "learning_rate": 2.584292307579808e-06, "loss": 0.7989, "step": 8400 }, { "epoch": 4.27, "learning_rate": 2.549047119227607e-06, "loss": 0.6619, "step": 8410 }, { "epoch": 4.28, "learning_rate": 2.5140310099531494e-06, "loss": 0.7794, "step": 8420 }, { "epoch": 4.28, "learning_rate": 2.479244337044509e-06, "loss": 0.6983, "step": 8430 }, { "epoch": 4.29, "learning_rate": 2.4446874554487216e-06, "loss": 0.8443, "step": 8440 }, { "epoch": 4.29, "learning_rate": 2.4103607177681233e-06, "loss": 0.7472, "step": 8450 }, { "epoch": 4.3, "learning_rate": 2.3762644742567786e-06, "loss": 0.7612, "step": 8460 }, { "epoch": 4.3, "learning_rate": 2.342399072816895e-06, "loss": 0.7782, "step": 8470 }, { "epoch": 4.31, "learning_rate": 2.308764858995266e-06, "loss": 0.7171, "step": 8480 }, { "epoch": 4.31, "learning_rate": 2.275362175979767e-06, "loss": 0.777, "step": 8490 }, { "epoch": 4.32, "learning_rate": 2.2421913645958304e-06, "loss": 0.7401, "step": 8500 }, { "epoch": 4.32, "learning_rate": 2.209252763302988e-06, "loss": 0.6972, "step": 8510 }, { "epoch": 4.33, "learning_rate": 2.176546708191396e-06, "loss": 0.7424, "step": 8520 }, { "epoch": 4.33, "learning_rate": 2.1440735329784273e-06, "loss": 0.7971, "step": 8530 }, { "epoch": 4.34, "learning_rate": 2.1118335690052533e-06, "loss": 0.8577, "step": 8540 }, { "epoch": 4.35, "learning_rate": 2.079827145233465e-06, "loss": 0.8388, "step": 8550 }, { "epoch": 4.35, "learning_rate": 2.048054588241721e-06, "loss": 0.7897, "step": 8560 }, { "epoch": 4.36, "learning_rate": 2.0165162222224087e-06, "loss": 0.7454, "step": 8570 }, { "epoch": 4.36, "learning_rate": 1.985212368978345e-06, "loss": 0.6974, "step": 8580 }, { "epoch": 4.37, "learning_rate": 1.954143347919482e-06, "loss": 0.8017, "step": 8590 }, { "epoch": 4.37, "learning_rate": 1.923309476059654e-06, "loss": 0.6967, "step": 8600 }, { "epoch": 4.38, "learning_rate": 1.8927110680133448e-06, "loss": 0.7471, "step": 8610 }, { "epoch": 4.38, "learning_rate": 1.8623484359924753e-06, "loss": 0.7466, "step": 8620 }, { "epoch": 4.39, "learning_rate": 1.832221889803215e-06, "loss": 0.7727, "step": 8630 }, { "epoch": 4.39, "learning_rate": 1.8023317368428272e-06, "loss": 0.6746, "step": 8640 }, { "epoch": 4.4, "learning_rate": 1.772678282096521e-06, "loss": 0.7834, "step": 8650 }, { "epoch": 4.4, "learning_rate": 1.7432618281343571e-06, "loss": 0.854, "step": 8660 }, { "epoch": 4.41, "learning_rate": 1.7140826751081417e-06, "loss": 0.7994, "step": 8670 }, { "epoch": 4.41, "learning_rate": 1.685141120748379e-06, "loss": 0.8229, "step": 8680 }, { "epoch": 4.42, "learning_rate": 1.6564374603612293e-06, "loss": 0.7353, "step": 8690 }, { "epoch": 4.42, "learning_rate": 1.6279719868254772e-06, "loss": 0.7952, "step": 8700 }, { "epoch": 4.43, "learning_rate": 1.5997449905895773e-06, "loss": 0.866, "step": 8710 }, { "epoch": 4.43, "learning_rate": 1.5717567596686661e-06, "loss": 0.8125, "step": 8720 }, { "epoch": 4.44, "learning_rate": 1.5440075796416292e-06, "loss": 0.8302, "step": 8730 }, { "epoch": 4.44, "learning_rate": 1.5164977336481896e-06, "loss": 0.7714, "step": 8740 }, { "epoch": 4.45, "learning_rate": 1.4892275023860176e-06, "loss": 0.841, "step": 8750 }, { "epoch": 4.45, "learning_rate": 1.4621971641078646e-06, "loss": 0.8062, "step": 8760 }, { "epoch": 4.46, "learning_rate": 1.4354069946187292e-06, "loss": 0.839, "step": 8770 }, { "epoch": 4.46, "learning_rate": 1.408857267273031e-06, "loss": 0.7927, "step": 8780 }, { "epoch": 4.47, "learning_rate": 1.3825482529718382e-06, "loss": 0.7557, "step": 8790 }, { "epoch": 4.47, "learning_rate": 1.3564802201600919e-06, "loss": 0.7451, "step": 8800 }, { "epoch": 4.48, "learning_rate": 1.3306534348238697e-06, "loss": 0.7812, "step": 8810 }, { "epoch": 4.48, "learning_rate": 1.305068160487674e-06, "loss": 0.7787, "step": 8820 }, { "epoch": 4.49, "learning_rate": 1.2797246582117422e-06, "loss": 0.712, "step": 8830 }, { "epoch": 4.49, "learning_rate": 1.2546231865893794e-06, "loss": 0.8208, "step": 8840 }, { "epoch": 4.5, "learning_rate": 1.2297640017443213e-06, "loss": 0.7492, "step": 8850 }, { "epoch": 4.5, "learning_rate": 1.2051473573281292e-06, "loss": 0.8182, "step": 8860 }, { "epoch": 4.51, "learning_rate": 1.180773504517585e-06, "loss": 0.7812, "step": 8870 }, { "epoch": 4.51, "learning_rate": 1.1566426920121415e-06, "loss": 0.7459, "step": 8880 }, { "epoch": 4.52, "learning_rate": 1.132755166031385e-06, "loss": 0.8221, "step": 8890 }, { "epoch": 4.52, "learning_rate": 1.1091111703125157e-06, "loss": 0.6909, "step": 8900 }, { "epoch": 4.53, "learning_rate": 1.0857109461078679e-06, "loss": 0.8285, "step": 8910 }, { "epoch": 4.53, "learning_rate": 1.0625547321824385e-06, "loss": 0.8397, "step": 8920 }, { "epoch": 4.54, "learning_rate": 1.0396427648114632e-06, "loss": 0.765, "step": 8930 }, { "epoch": 4.54, "learning_rate": 1.0169752777779984e-06, "loss": 0.795, "step": 8940 }, { "epoch": 4.55, "learning_rate": 9.945525023705327e-07, "loss": 0.6929, "step": 8950 }, { "epoch": 4.55, "learning_rate": 9.723746673806377e-07, "loss": 0.8618, "step": 8960 }, { "epoch": 4.56, "learning_rate": 9.50441999100618e-07, "loss": 0.7577, "step": 8970 }, { "epoch": 4.56, "learning_rate": 9.287547213212206e-07, "loss": 0.8186, "step": 8980 }, { "epoch": 4.57, "learning_rate": 9.07313055329334e-07, "loss": 0.8268, "step": 8990 }, { "epoch": 4.57, "learning_rate": 8.861172199057466e-07, "loss": 0.8767, "step": 9000 }, { "epoch": 4.58, "learning_rate": 8.651674313228997e-07, "loss": 0.716, "step": 9010 }, { "epoch": 4.58, "learning_rate": 8.444639033426904e-07, "loss": 0.8054, "step": 9020 }, { "epoch": 4.59, "learning_rate": 8.240068472142815e-07, "loss": 0.7585, "step": 9030 }, { "epoch": 4.59, "learning_rate": 8.037964716719609e-07, "loss": 0.8554, "step": 9040 }, { "epoch": 4.6, "learning_rate": 7.838329829329943e-07, "loss": 0.7113, "step": 9050 }, { "epoch": 4.6, "learning_rate": 7.641165846955345e-07, "loss": 0.8091, "step": 9060 }, { "epoch": 4.61, "learning_rate": 7.446474781365314e-07, "loss": 0.7636, "step": 9070 }, { "epoch": 4.61, "learning_rate": 7.254258619096982e-07, "loss": 0.7905, "step": 9080 }, { "epoch": 4.62, "learning_rate": 7.083381736919054e-07, "loss": 0.796, "step": 9090 }, { "epoch": 4.62, "learning_rate": 6.895873273412512e-07, "loss": 0.7762, "step": 9100 }, { "epoch": 4.63, "learning_rate": 6.710845331309279e-07, "loss": 0.7687, "step": 9110 }, { "epoch": 4.63, "learning_rate": 6.528299798548327e-07, "loss": 0.769, "step": 9120 }, { "epoch": 4.64, "learning_rate": 6.34823853773936e-07, "loss": 0.8484, "step": 9130 }, { "epoch": 4.64, "learning_rate": 6.170663386143721e-07, "loss": 0.7223, "step": 9140 }, { "epoch": 4.65, "learning_rate": 5.995576155655657e-07, "loss": 0.8462, "step": 9150 }, { "epoch": 4.66, "learning_rate": 5.822978632783748e-07, "loss": 0.7518, "step": 9160 }, { "epoch": 4.66, "learning_rate": 5.652872578632867e-07, "loss": 0.7555, "step": 9170 }, { "epoch": 4.67, "learning_rate": 5.485259728886055e-07, "loss": 0.7159, "step": 9180 }, { "epoch": 4.67, "learning_rate": 5.320141793786815e-07, "loss": 0.8041, "step": 9190 }, { "epoch": 4.68, "learning_rate": 5.157520458121734e-07, "loss": 0.7807, "step": 9200 }, { "epoch": 4.68, "learning_rate": 4.997397381203278e-07, "loss": 0.7471, "step": 9210 }, { "epoch": 4.69, "learning_rate": 4.839774196852831e-07, "loss": 0.6955, "step": 9220 }, { "epoch": 4.69, "learning_rate": 4.6846525133840135e-07, "loss": 0.8771, "step": 9230 }, { "epoch": 4.7, "learning_rate": 4.532033913586281e-07, "loss": 0.7871, "step": 9240 }, { "epoch": 4.7, "learning_rate": 4.3819199547089073e-07, "loss": 0.794, "step": 9250 }, { "epoch": 4.71, "learning_rate": 4.234312168444804e-07, "loss": 0.7949, "step": 9260 }, { "epoch": 4.71, "learning_rate": 4.0892120609151706e-07, "loss": 0.8159, "step": 9270 }, { "epoch": 4.72, "learning_rate": 3.946621112654009e-07, "loss": 0.7887, "step": 9280 }, { "epoch": 4.72, "learning_rate": 3.806540778593021e-07, "loss": 0.7609, "step": 9290 }, { "epoch": 4.73, "learning_rate": 3.668972488046762e-07, "loss": 0.7927, "step": 9300 }, { "epoch": 4.73, "learning_rate": 3.5339176446980424e-07, "loss": 0.7027, "step": 9310 }, { "epoch": 4.74, "learning_rate": 3.4013776265836293e-07, "loss": 0.7591, "step": 9320 }, { "epoch": 4.74, "learning_rate": 3.271353786080261e-07, "loss": 0.8672, "step": 9330 }, { "epoch": 4.75, "learning_rate": 3.143847449890658e-07, "loss": 0.7764, "step": 9340 }, { "epoch": 4.75, "learning_rate": 3.018859919030198e-07, "loss": 0.7795, "step": 9350 }, { "epoch": 4.76, "learning_rate": 2.896392468813458e-07, "loss": 0.8484, "step": 9360 }, { "epoch": 4.76, "learning_rate": 2.7764463488413327e-07, "loss": 0.7502, "step": 9370 }, { "epoch": 4.77, "learning_rate": 2.659022782988241e-07, "loss": 0.7886, "step": 9380 }, { "epoch": 4.77, "learning_rate": 2.5441229693895786e-07, "loss": 0.7578, "step": 9390 }, { "epoch": 4.78, "learning_rate": 2.431748080429619e-07, "loss": 0.7947, "step": 9400 }, { "epoch": 4.78, "learning_rate": 2.32189926272941e-07, "loss": 0.7695, "step": 9410 }, { "epoch": 4.79, "learning_rate": 2.2145776371352288e-07, "loss": 0.8766, "step": 9420 }, { "epoch": 4.79, "learning_rate": 2.109784298707007e-07, "loss": 0.9572, "step": 9430 }, { "epoch": 4.8, "learning_rate": 2.0075203167071733e-07, "loss": 0.8072, "step": 9440 }, { "epoch": 4.8, "learning_rate": 1.9077867345898282e-07, "loss": 0.7861, "step": 9450 }, { "epoch": 4.81, "learning_rate": 1.8105845699900592e-07, "loss": 0.813, "step": 9460 }, { "epoch": 4.81, "learning_rate": 1.7159148147135596e-07, "loss": 0.714, "step": 9470 }, { "epoch": 4.82, "learning_rate": 1.623778434726414e-07, "loss": 0.8831, "step": 9480 }, { "epoch": 4.82, "learning_rate": 1.5341763701453848e-07, "loss": 0.7104, "step": 9490 }, { "epoch": 4.83, "learning_rate": 1.4471095352282804e-07, "loss": 0.7877, "step": 9500 }, { "epoch": 4.83, "learning_rate": 1.362578818364546e-07, "loss": 0.7484, "step": 9510 }, { "epoch": 4.84, "learning_rate": 1.280585082066299e-07, "loss": 0.6747, "step": 9520 }, { "epoch": 4.84, "learning_rate": 1.2011291629594746e-07, "loss": 0.7271, "step": 9530 }, { "epoch": 4.85, "learning_rate": 1.1242118717753047e-07, "loss": 0.8372, "step": 9540 }, { "epoch": 4.85, "learning_rate": 1.0498339933420476e-07, "loss": 0.8522, "step": 9550 }, { "epoch": 4.86, "learning_rate": 9.779962865769654e-08, "loss": 0.8283, "step": 9560 }, { "epoch": 4.86, "learning_rate": 9.086994844786089e-08, "loss": 0.7474, "step": 9570 }, { "epoch": 4.87, "learning_rate": 8.419442941192679e-08, "loss": 0.7889, "step": 9580 }, { "epoch": 4.87, "learning_rate": 7.77731396637893e-08, "loss": 0.7646, "step": 9590 }, { "epoch": 4.88, "learning_rate": 7.160614472329907e-08, "loss": 0.8362, "step": 9600 }, { "epoch": 4.88, "learning_rate": 6.569350751560177e-08, "loss": 0.7019, "step": 9610 }, { "epoch": 4.89, "learning_rate": 6.003528837049966e-08, "loss": 0.7858, "step": 9620 }, { "epoch": 4.89, "learning_rate": 5.46315450218271e-08, "loss": 0.7506, "step": 9630 }, { "epoch": 4.9, "learning_rate": 4.9482332606867746e-08, "loss": 0.7883, "step": 9640 }, { "epoch": 4.9, "learning_rate": 4.458770366578824e-08, "loss": 0.7803, "step": 9650 }, { "epoch": 4.91, "learning_rate": 3.994770814110538e-08, "loss": 0.772, "step": 9660 }, { "epoch": 4.91, "learning_rate": 3.5562393377172595e-08, "loss": 0.8655, "step": 9670 }, { "epoch": 4.92, "learning_rate": 3.1431804119705366e-08, "loss": 0.7657, "step": 9680 }, { "epoch": 4.92, "learning_rate": 2.7555982515312107e-08, "loss": 0.8572, "step": 9690 }, { "epoch": 4.93, "learning_rate": 2.3934968111075095e-08, "loss": 0.8006, "step": 9700 }, { "epoch": 4.93, "learning_rate": 2.0568797854139678e-08, "loss": 0.749, "step": 9710 }, { "epoch": 4.94, "learning_rate": 1.745750609133956e-08, "loss": 0.7909, "step": 9720 }, { "epoch": 4.94, "learning_rate": 1.4601124568849878e-08, "loss": 0.8854, "step": 9730 }, { "epoch": 4.95, "learning_rate": 1.1999682431859672e-08, "loss": 0.6958, "step": 9740 }, { "epoch": 4.95, "learning_rate": 9.653206224272126e-09, "loss": 0.7082, "step": 9750 }, { "epoch": 4.96, "learning_rate": 7.561719888440899e-09, "loss": 0.8519, "step": 9760 }, { "epoch": 4.97, "learning_rate": 5.725244764917537e-09, "loss": 0.7919, "step": 9770 }, { "epoch": 4.97, "learning_rate": 4.143799592240538e-09, "loss": 0.7442, "step": 9780 }, { "epoch": 4.98, "learning_rate": 2.8174005067410637e-09, "loss": 0.7768, "step": 9790 }, { "epoch": 4.98, "learning_rate": 1.7460610423764011e-09, "loss": 0.803, "step": 9800 }, { "epoch": 4.99, "learning_rate": 9.297921305967405e-10, "loss": 0.738, "step": 9810 }, { "epoch": 4.99, "learning_rate": 3.686021002313744e-10, "loss": 0.7374, "step": 9820 }, { "epoch": 5.0, "learning_rate": 6.249667740265696e-11, "loss": 0.7616, "step": 9830 }, { "epoch": 5.0, "step": 9835, "total_flos": 3.008290083981312e+18, "train_loss": 0.8266432806266161, "train_runtime": 62063.7747, "train_samples_per_second": 2.536, "train_steps_per_second": 0.158 } ], "logging_steps": 10, "max_steps": 9835, "num_train_epochs": 5, "save_steps": 1000, "total_flos": 3.008290083981312e+18, "trial_name": null, "trial_params": null }