{ "best_metric": 2.7569446563720703, "best_model_checkpoint": "outputs/llama2_7b_darulm_unigram_tie_2e_16_11_23/checkpoint-260000", "epoch": 1.9999927672239521, "eval_steps": 1000, "global_step": 276518, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 1.9994503030566046e-05, "loss": 10.303, "step": 100 }, { "epoch": 0.0, "learning_rate": 1.9987342504592864e-05, "loss": 8.0353, "step": 200 }, { "epoch": 0.0, "learning_rate": 1.99801096500745e-05, "loss": 7.283, "step": 300 }, { "epoch": 0.0, "learning_rate": 1.9972876795556137e-05, "loss": 6.7724, "step": 400 }, { "epoch": 0.0, "learning_rate": 1.996564394103777e-05, "loss": 6.3252, "step": 500 }, { "epoch": 0.0, "learning_rate": 1.995848341506459e-05, "loss": 5.9418, "step": 600 }, { "epoch": 0.01, "learning_rate": 1.9951250560546228e-05, "loss": 5.6204, "step": 700 }, { "epoch": 0.01, "learning_rate": 1.9944017706027864e-05, "loss": 5.3454, "step": 800 }, { "epoch": 0.01, "learning_rate": 1.9936784851509498e-05, "loss": 5.1143, "step": 900 }, { "epoch": 0.01, "learning_rate": 1.9929551996991134e-05, "loss": 4.9167, "step": 1000 }, { "epoch": 0.01, "eval_accuracy": 0.2686138407237027, "eval_loss": 4.864671230316162, "eval_runtime": 28.3398, "eval_samples_per_second": 228.759, "eval_steps_per_second": 2.399, "step": 1000 }, { "epoch": 0.01, "learning_rate": 1.992231914247277e-05, "loss": 4.7595, "step": 1100 }, { "epoch": 0.01, "learning_rate": 1.9915086287954407e-05, "loss": 4.6176, "step": 1200 }, { "epoch": 0.01, "learning_rate": 1.990785343343604e-05, "loss": 4.492, "step": 1300 }, { "epoch": 0.01, "learning_rate": 1.9900620578917677e-05, "loss": 4.3949, "step": 1400 }, { "epoch": 0.01, "learning_rate": 1.9893387724399313e-05, "loss": 4.3078, "step": 1500 }, { "epoch": 0.01, "learning_rate": 1.988615486988095e-05, "loss": 4.2192, "step": 1600 }, { "epoch": 0.01, "learning_rate": 1.9878922015362586e-05, "loss": 4.1453, "step": 1700 }, { "epoch": 0.01, "learning_rate": 1.987168916084422e-05, "loss": 4.0878, "step": 1800 }, { "epoch": 0.01, "learning_rate": 1.9864456306325856e-05, "loss": 4.0281, "step": 1900 }, { "epoch": 0.01, "learning_rate": 1.9857223451807492e-05, "loss": 3.9697, "step": 2000 }, { "epoch": 0.01, "eval_accuracy": 0.34088309394404065, "eval_loss": 3.9705445766448975, "eval_runtime": 29.5273, "eval_samples_per_second": 219.56, "eval_steps_per_second": 2.303, "step": 2000 }, { "epoch": 0.02, "learning_rate": 1.984999059728913e-05, "loss": 3.9334, "step": 2100 }, { "epoch": 0.02, "learning_rate": 1.9842757742770765e-05, "loss": 3.8789, "step": 2200 }, { "epoch": 0.02, "learning_rate": 1.98355248882524e-05, "loss": 3.8488, "step": 2300 }, { "epoch": 0.02, "learning_rate": 1.9828292033734035e-05, "loss": 3.8073, "step": 2400 }, { "epoch": 0.02, "learning_rate": 1.982105917921567e-05, "loss": 3.7793, "step": 2500 }, { "epoch": 0.02, "learning_rate": 1.9813826324697308e-05, "loss": 3.7395, "step": 2600 }, { "epoch": 0.02, "learning_rate": 1.9806593470178945e-05, "loss": 3.7196, "step": 2700 }, { "epoch": 0.02, "learning_rate": 1.9799360615660578e-05, "loss": 3.6915, "step": 2800 }, { "epoch": 0.02, "learning_rate": 1.9792127761142214e-05, "loss": 3.6569, "step": 2900 }, { "epoch": 0.02, "learning_rate": 1.978489490662385e-05, "loss": 3.6398, "step": 3000 }, { "epoch": 0.02, "eval_accuracy": 0.3693563558386489, "eval_loss": 3.6475651264190674, "eval_runtime": 28.718, "eval_samples_per_second": 225.747, "eval_steps_per_second": 2.368, "step": 3000 }, { "epoch": 0.02, "learning_rate": 1.9777662052105487e-05, "loss": 3.6159, "step": 3100 }, { "epoch": 0.02, "learning_rate": 1.977042919758712e-05, "loss": 3.5994, "step": 3200 }, { "epoch": 0.02, "learning_rate": 1.9763196343068757e-05, "loss": 3.5769, "step": 3300 }, { "epoch": 0.02, "learning_rate": 1.9755963488550393e-05, "loss": 3.5605, "step": 3400 }, { "epoch": 0.03, "learning_rate": 1.974873063403203e-05, "loss": 3.5432, "step": 3500 }, { "epoch": 0.03, "learning_rate": 1.9741497779513667e-05, "loss": 3.5282, "step": 3600 }, { "epoch": 0.03, "learning_rate": 1.97342649249953e-05, "loss": 3.508, "step": 3700 }, { "epoch": 0.03, "learning_rate": 1.9727032070476936e-05, "loss": 3.4947, "step": 3800 }, { "epoch": 0.03, "learning_rate": 1.9719799215958573e-05, "loss": 3.4862, "step": 3900 }, { "epoch": 0.03, "learning_rate": 1.971256636144021e-05, "loss": 3.468, "step": 4000 }, { "epoch": 0.03, "eval_accuracy": 0.38501903923685776, "eval_loss": 3.4784200191497803, "eval_runtime": 29.0726, "eval_samples_per_second": 222.994, "eval_steps_per_second": 2.339, "step": 4000 }, { "epoch": 0.03, "learning_rate": 1.9705333506921846e-05, "loss": 3.454, "step": 4100 }, { "epoch": 0.03, "learning_rate": 1.969810065240348e-05, "loss": 3.4428, "step": 4200 }, { "epoch": 0.03, "learning_rate": 1.9690867797885115e-05, "loss": 3.4316, "step": 4300 }, { "epoch": 0.03, "learning_rate": 1.9683634943366752e-05, "loss": 3.4171, "step": 4400 }, { "epoch": 0.03, "learning_rate": 1.967640208884839e-05, "loss": 3.4073, "step": 4500 }, { "epoch": 0.03, "learning_rate": 1.9669241562875206e-05, "loss": 3.4011, "step": 4600 }, { "epoch": 0.03, "learning_rate": 1.966200870835684e-05, "loss": 3.389, "step": 4700 }, { "epoch": 0.03, "learning_rate": 1.9654775853838476e-05, "loss": 3.3863, "step": 4800 }, { "epoch": 0.04, "learning_rate": 1.9647542999320113e-05, "loss": 3.3624, "step": 4900 }, { "epoch": 0.04, "learning_rate": 1.964031014480175e-05, "loss": 3.3567, "step": 5000 }, { "epoch": 0.04, "eval_accuracy": 0.39525576696820947, "eval_loss": 3.3732998371124268, "eval_runtime": 28.1701, "eval_samples_per_second": 230.137, "eval_steps_per_second": 2.414, "step": 5000 }, { "epoch": 0.04, "learning_rate": 1.9633077290283386e-05, "loss": 3.3488, "step": 5100 }, { "epoch": 0.04, "learning_rate": 1.962584443576502e-05, "loss": 3.3415, "step": 5200 }, { "epoch": 0.04, "learning_rate": 1.9618611581246655e-05, "loss": 3.3334, "step": 5300 }, { "epoch": 0.04, "learning_rate": 1.9611378726728292e-05, "loss": 3.3279, "step": 5400 }, { "epoch": 0.04, "learning_rate": 1.9604145872209928e-05, "loss": 3.3107, "step": 5500 }, { "epoch": 0.04, "learning_rate": 1.9596913017691565e-05, "loss": 3.3096, "step": 5600 }, { "epoch": 0.04, "learning_rate": 1.9589752491718383e-05, "loss": 3.301, "step": 5700 }, { "epoch": 0.04, "learning_rate": 1.958251963720002e-05, "loss": 3.3033, "step": 5800 }, { "epoch": 0.04, "learning_rate": 1.9575286782681656e-05, "loss": 3.2967, "step": 5900 }, { "epoch": 0.04, "learning_rate": 1.9568053928163292e-05, "loss": 3.2828, "step": 6000 }, { "epoch": 0.04, "eval_accuracy": 0.40260530558050767, "eval_loss": 3.299899101257324, "eval_runtime": 28.3308, "eval_samples_per_second": 228.832, "eval_steps_per_second": 2.4, "step": 6000 }, { "epoch": 0.04, "learning_rate": 1.9560821073644925e-05, "loss": 3.2859, "step": 6100 }, { "epoch": 0.04, "learning_rate": 1.9553588219126562e-05, "loss": 3.2764, "step": 6200 }, { "epoch": 0.05, "learning_rate": 1.9546427693153383e-05, "loss": 3.2684, "step": 6300 }, { "epoch": 0.05, "learning_rate": 1.953919483863502e-05, "loss": 3.2635, "step": 6400 }, { "epoch": 0.05, "learning_rate": 1.9531961984116653e-05, "loss": 3.2525, "step": 6500 }, { "epoch": 0.05, "learning_rate": 1.952472912959829e-05, "loss": 3.2538, "step": 6600 }, { "epoch": 0.05, "learning_rate": 1.9517496275079926e-05, "loss": 3.2469, "step": 6700 }, { "epoch": 0.05, "learning_rate": 1.9510263420561563e-05, "loss": 3.2353, "step": 6800 }, { "epoch": 0.05, "learning_rate": 1.9503030566043196e-05, "loss": 3.2268, "step": 6900 }, { "epoch": 0.05, "learning_rate": 1.9495797711524832e-05, "loss": 3.2235, "step": 7000 }, { "epoch": 0.05, "eval_accuracy": 0.4080512229571761, "eval_loss": 3.245333433151245, "eval_runtime": 30.054, "eval_samples_per_second": 215.712, "eval_steps_per_second": 2.263, "step": 7000 }, { "epoch": 0.05, "learning_rate": 1.9488564857006465e-05, "loss": 3.2305, "step": 7100 }, { "epoch": 0.05, "learning_rate": 1.9481332002488102e-05, "loss": 3.2131, "step": 7200 }, { "epoch": 0.05, "learning_rate": 1.947409914796974e-05, "loss": 3.2181, "step": 7300 }, { "epoch": 0.05, "learning_rate": 1.9466866293451375e-05, "loss": 3.213, "step": 7400 }, { "epoch": 0.05, "learning_rate": 1.945963343893301e-05, "loss": 3.206, "step": 7500 }, { "epoch": 0.05, "learning_rate": 1.9452400584414645e-05, "loss": 3.1944, "step": 7600 }, { "epoch": 0.06, "learning_rate": 1.944516772989628e-05, "loss": 3.194, "step": 7700 }, { "epoch": 0.06, "learning_rate": 1.9437934875377918e-05, "loss": 3.19, "step": 7800 }, { "epoch": 0.06, "learning_rate": 1.9430702020859554e-05, "loss": 3.1975, "step": 7900 }, { "epoch": 0.06, "learning_rate": 1.942346916634119e-05, "loss": 3.1898, "step": 8000 }, { "epoch": 0.06, "eval_accuracy": 0.41249965974358277, "eval_loss": 3.20282244682312, "eval_runtime": 31.4355, "eval_samples_per_second": 206.232, "eval_steps_per_second": 2.163, "step": 8000 }, { "epoch": 0.06, "learning_rate": 1.9416236311822824e-05, "loss": 3.1871, "step": 8100 }, { "epoch": 0.06, "learning_rate": 1.9409075785849645e-05, "loss": 3.1755, "step": 8200 }, { "epoch": 0.06, "learning_rate": 1.940184293133128e-05, "loss": 3.1774, "step": 8300 }, { "epoch": 0.06, "learning_rate": 1.9394610076812915e-05, "loss": 3.1726, "step": 8400 }, { "epoch": 0.06, "learning_rate": 1.938737722229455e-05, "loss": 3.173, "step": 8500 }, { "epoch": 0.06, "learning_rate": 1.9380144367776188e-05, "loss": 3.1738, "step": 8600 }, { "epoch": 0.06, "learning_rate": 1.9372911513257824e-05, "loss": 3.1624, "step": 8700 }, { "epoch": 0.06, "learning_rate": 1.936567865873946e-05, "loss": 3.1588, "step": 8800 }, { "epoch": 0.06, "learning_rate": 1.9358445804221094e-05, "loss": 3.1592, "step": 8900 }, { "epoch": 0.07, "learning_rate": 1.935121294970273e-05, "loss": 3.1552, "step": 9000 }, { "epoch": 0.07, "eval_accuracy": 0.4160389313831348, "eval_loss": 3.1682770252227783, "eval_runtime": 28.3625, "eval_samples_per_second": 228.576, "eval_steps_per_second": 2.398, "step": 9000 }, { "epoch": 0.07, "learning_rate": 1.9343980095184367e-05, "loss": 3.1564, "step": 9100 }, { "epoch": 0.07, "learning_rate": 1.9336747240666004e-05, "loss": 3.1467, "step": 9200 }, { "epoch": 0.07, "learning_rate": 1.932951438614764e-05, "loss": 3.1519, "step": 9300 }, { "epoch": 0.07, "learning_rate": 1.9322281531629273e-05, "loss": 3.1442, "step": 9400 }, { "epoch": 0.07, "learning_rate": 1.931504867711091e-05, "loss": 3.1372, "step": 9500 }, { "epoch": 0.07, "learning_rate": 1.9307815822592546e-05, "loss": 3.1327, "step": 9600 }, { "epoch": 0.07, "learning_rate": 1.9300582968074183e-05, "loss": 3.1327, "step": 9700 }, { "epoch": 0.07, "learning_rate": 1.929335011355582e-05, "loss": 3.1293, "step": 9800 }, { "epoch": 0.07, "learning_rate": 1.9286117259037452e-05, "loss": 3.1369, "step": 9900 }, { "epoch": 0.07, "learning_rate": 1.927888440451909e-05, "loss": 3.1068, "step": 10000 }, { "epoch": 0.07, "eval_accuracy": 0.41901262124470334, "eval_loss": 3.139714002609253, "eval_runtime": 28.3677, "eval_samples_per_second": 228.535, "eval_steps_per_second": 2.397, "step": 10000 }, { "epoch": 0.07, "learning_rate": 1.9271651550000725e-05, "loss": 3.1264, "step": 10100 }, { "epoch": 0.07, "learning_rate": 1.9264418695482362e-05, "loss": 3.1248, "step": 10200 }, { "epoch": 0.07, "learning_rate": 1.9257185840963995e-05, "loss": 3.1291, "step": 10300 }, { "epoch": 0.08, "learning_rate": 1.924995298644563e-05, "loss": 3.1076, "step": 10400 }, { "epoch": 0.08, "learning_rate": 1.9242792460472453e-05, "loss": 3.107, "step": 10500 }, { "epoch": 0.08, "learning_rate": 1.923555960595409e-05, "loss": 3.1117, "step": 10600 }, { "epoch": 0.08, "learning_rate": 1.9228326751435723e-05, "loss": 3.1053, "step": 10700 }, { "epoch": 0.08, "learning_rate": 1.922109389691736e-05, "loss": 3.1084, "step": 10800 }, { "epoch": 0.08, "learning_rate": 1.9213861042398996e-05, "loss": 3.1009, "step": 10900 }, { "epoch": 0.08, "learning_rate": 1.9206628187880632e-05, "loss": 3.1019, "step": 11000 }, { "epoch": 0.08, "eval_accuracy": 0.4217092667701046, "eval_loss": 3.115234136581421, "eval_runtime": 28.1789, "eval_samples_per_second": 230.066, "eval_steps_per_second": 2.413, "step": 11000 }, { "epoch": 0.08, "learning_rate": 1.919939533336227e-05, "loss": 3.1015, "step": 11100 }, { "epoch": 0.08, "learning_rate": 1.9192162478843902e-05, "loss": 3.1001, "step": 11200 }, { "epoch": 0.08, "learning_rate": 1.9184929624325538e-05, "loss": 3.0938, "step": 11300 }, { "epoch": 0.08, "learning_rate": 1.9177696769807175e-05, "loss": 3.1016, "step": 11400 }, { "epoch": 0.08, "learning_rate": 1.9170536243833993e-05, "loss": 3.0905, "step": 11500 }, { "epoch": 0.08, "learning_rate": 1.916330338931563e-05, "loss": 3.0883, "step": 11600 }, { "epoch": 0.08, "learning_rate": 1.9156070534797266e-05, "loss": 3.0899, "step": 11700 }, { "epoch": 0.09, "learning_rate": 1.91488376802789e-05, "loss": 3.0803, "step": 11800 }, { "epoch": 0.09, "learning_rate": 1.9141604825760535e-05, "loss": 3.0806, "step": 11900 }, { "epoch": 0.09, "learning_rate": 1.9134371971242172e-05, "loss": 3.0849, "step": 12000 }, { "epoch": 0.09, "eval_accuracy": 0.423874809834469, "eval_loss": 3.094170331954956, "eval_runtime": 28.7606, "eval_samples_per_second": 225.413, "eval_steps_per_second": 2.364, "step": 12000 }, { "epoch": 0.09, "learning_rate": 1.912713911672381e-05, "loss": 3.0781, "step": 12100 }, { "epoch": 0.09, "learning_rate": 1.911990626220544e-05, "loss": 3.0784, "step": 12200 }, { "epoch": 0.09, "learning_rate": 1.9112673407687078e-05, "loss": 3.0682, "step": 12300 }, { "epoch": 0.09, "learning_rate": 1.9105440553168715e-05, "loss": 3.0766, "step": 12400 }, { "epoch": 0.09, "learning_rate": 1.909820769865035e-05, "loss": 3.0708, "step": 12500 }, { "epoch": 0.09, "learning_rate": 1.9090974844131988e-05, "loss": 3.06, "step": 12600 }, { "epoch": 0.09, "learning_rate": 1.908374198961362e-05, "loss": 3.0599, "step": 12700 }, { "epoch": 0.09, "learning_rate": 1.9076509135095257e-05, "loss": 3.0593, "step": 12800 }, { "epoch": 0.09, "learning_rate": 1.9069276280576894e-05, "loss": 3.0675, "step": 12900 }, { "epoch": 0.09, "learning_rate": 1.906204342605853e-05, "loss": 3.0561, "step": 13000 }, { "epoch": 0.09, "eval_accuracy": 0.42562236679339327, "eval_loss": 3.076077938079834, "eval_runtime": 28.8743, "eval_samples_per_second": 224.525, "eval_steps_per_second": 2.355, "step": 13000 }, { "epoch": 0.09, "learning_rate": 1.9054810571540167e-05, "loss": 3.0535, "step": 13100 }, { "epoch": 0.1, "learning_rate": 1.90475777170218e-05, "loss": 3.0541, "step": 13200 }, { "epoch": 0.1, "learning_rate": 1.9040344862503437e-05, "loss": 3.0628, "step": 13300 }, { "epoch": 0.1, "learning_rate": 1.9033112007985073e-05, "loss": 3.0601, "step": 13400 }, { "epoch": 0.1, "learning_rate": 1.902587915346671e-05, "loss": 3.0525, "step": 13500 }, { "epoch": 0.1, "learning_rate": 1.9018718627493528e-05, "loss": 3.0553, "step": 13600 }, { "epoch": 0.1, "learning_rate": 1.9011485772975164e-05, "loss": 3.0457, "step": 13700 }, { "epoch": 0.1, "learning_rate": 1.90042529184568e-05, "loss": 3.0466, "step": 13800 }, { "epoch": 0.1, "learning_rate": 1.8997020063938437e-05, "loss": 3.0506, "step": 13900 }, { "epoch": 0.1, "learning_rate": 1.898978720942007e-05, "loss": 3.0429, "step": 14000 }, { "epoch": 0.1, "eval_accuracy": 0.42772923452891876, "eval_loss": 3.0595009326934814, "eval_runtime": 28.2676, "eval_samples_per_second": 229.344, "eval_steps_per_second": 2.406, "step": 14000 }, { "epoch": 0.1, "learning_rate": 1.8982554354901707e-05, "loss": 3.046, "step": 14100 }, { "epoch": 0.1, "learning_rate": 1.8975321500383343e-05, "loss": 3.0338, "step": 14200 }, { "epoch": 0.1, "learning_rate": 1.896808864586498e-05, "loss": 3.0346, "step": 14300 }, { "epoch": 0.1, "learning_rate": 1.8960855791346616e-05, "loss": 3.0413, "step": 14400 }, { "epoch": 0.1, "learning_rate": 1.895362293682825e-05, "loss": 3.0441, "step": 14500 }, { "epoch": 0.11, "learning_rate": 1.8946390082309886e-05, "loss": 3.0364, "step": 14600 }, { "epoch": 0.11, "learning_rate": 1.8939157227791522e-05, "loss": 3.0317, "step": 14700 }, { "epoch": 0.11, "learning_rate": 1.893192437327316e-05, "loss": 3.0346, "step": 14800 }, { "epoch": 0.11, "learning_rate": 1.8924691518754795e-05, "loss": 3.0259, "step": 14900 }, { "epoch": 0.11, "learning_rate": 1.891745866423643e-05, "loss": 3.035, "step": 15000 }, { "epoch": 0.11, "eval_accuracy": 0.4292789890906231, "eval_loss": 3.045100450515747, "eval_runtime": 37.5227, "eval_samples_per_second": 172.775, "eval_steps_per_second": 1.812, "step": 15000 }, { "epoch": 0.11, "learning_rate": 1.8910298138263247e-05, "loss": 3.0315, "step": 15100 }, { "epoch": 0.11, "learning_rate": 1.8903065283744883e-05, "loss": 3.0256, "step": 15200 }, { "epoch": 0.11, "learning_rate": 1.889583242922652e-05, "loss": 3.0319, "step": 15300 }, { "epoch": 0.11, "learning_rate": 1.8888599574708156e-05, "loss": 3.0208, "step": 15400 }, { "epoch": 0.11, "learning_rate": 1.888136672018979e-05, "loss": 3.0232, "step": 15500 }, { "epoch": 0.11, "learning_rate": 1.887420619421661e-05, "loss": 3.0272, "step": 15600 }, { "epoch": 0.11, "learning_rate": 1.8866973339698247e-05, "loss": 3.019, "step": 15700 }, { "epoch": 0.11, "learning_rate": 1.8859740485179884e-05, "loss": 3.0133, "step": 15800 }, { "epoch": 0.12, "learning_rate": 1.8852507630661517e-05, "loss": 3.0151, "step": 15900 }, { "epoch": 0.12, "learning_rate": 1.8845274776143153e-05, "loss": 3.0077, "step": 16000 }, { "epoch": 0.12, "eval_accuracy": 0.4305855737328095, "eval_loss": 3.0321857929229736, "eval_runtime": 28.0011, "eval_samples_per_second": 231.526, "eval_steps_per_second": 2.428, "step": 16000 }, { "epoch": 0.12, "learning_rate": 1.883804192162479e-05, "loss": 3.0229, "step": 16100 }, { "epoch": 0.12, "learning_rate": 1.8830809067106426e-05, "loss": 3.0126, "step": 16200 }, { "epoch": 0.12, "learning_rate": 1.8823576212588063e-05, "loss": 3.0175, "step": 16300 }, { "epoch": 0.12, "learning_rate": 1.8816343358069696e-05, "loss": 3.0128, "step": 16400 }, { "epoch": 0.12, "learning_rate": 1.8809110503551333e-05, "loss": 3.0058, "step": 16500 }, { "epoch": 0.12, "learning_rate": 1.880187764903297e-05, "loss": 3.0173, "step": 16600 }, { "epoch": 0.12, "learning_rate": 1.8794644794514606e-05, "loss": 3.0078, "step": 16700 }, { "epoch": 0.12, "learning_rate": 1.8787411939996242e-05, "loss": 2.9971, "step": 16800 }, { "epoch": 0.12, "learning_rate": 1.8780179085477875e-05, "loss": 3.0027, "step": 16900 }, { "epoch": 0.12, "learning_rate": 1.8772946230959512e-05, "loss": 3.0008, "step": 17000 }, { "epoch": 0.12, "eval_accuracy": 0.4320149531353495, "eval_loss": 3.0199708938598633, "eval_runtime": 27.8606, "eval_samples_per_second": 232.694, "eval_steps_per_second": 2.441, "step": 17000 }, { "epoch": 0.12, "learning_rate": 1.8765713376441148e-05, "loss": 3.0051, "step": 17100 }, { "epoch": 0.12, "learning_rate": 1.8758480521922785e-05, "loss": 3.0007, "step": 17200 }, { "epoch": 0.13, "learning_rate": 1.8751319995949603e-05, "loss": 3.0039, "step": 17300 }, { "epoch": 0.13, "learning_rate": 1.874408714143124e-05, "loss": 3.0022, "step": 17400 }, { "epoch": 0.13, "learning_rate": 1.8736854286912876e-05, "loss": 3.0052, "step": 17500 }, { "epoch": 0.13, "learning_rate": 1.8729621432394512e-05, "loss": 3.0004, "step": 17600 }, { "epoch": 0.13, "learning_rate": 1.8722388577876146e-05, "loss": 2.9994, "step": 17700 }, { "epoch": 0.13, "learning_rate": 1.8715155723357782e-05, "loss": 2.9951, "step": 17800 }, { "epoch": 0.13, "learning_rate": 1.870792286883942e-05, "loss": 2.9933, "step": 17900 }, { "epoch": 0.13, "learning_rate": 1.8700690014321055e-05, "loss": 2.9952, "step": 18000 }, { "epoch": 0.13, "eval_accuracy": 0.4330487277434497, "eval_loss": 3.0093255043029785, "eval_runtime": 28.2096, "eval_samples_per_second": 229.815, "eval_steps_per_second": 2.411, "step": 18000 }, { "epoch": 0.13, "learning_rate": 1.8693457159802688e-05, "loss": 2.993, "step": 18100 }, { "epoch": 0.13, "learning_rate": 1.8686224305284325e-05, "loss": 2.9969, "step": 18200 }, { "epoch": 0.13, "learning_rate": 1.867899145076596e-05, "loss": 2.995, "step": 18300 }, { "epoch": 0.13, "learning_rate": 1.8671758596247594e-05, "loss": 2.9944, "step": 18400 }, { "epoch": 0.13, "learning_rate": 1.866452574172923e-05, "loss": 2.9843, "step": 18500 }, { "epoch": 0.13, "learning_rate": 1.8657292887210867e-05, "loss": 2.993, "step": 18600 }, { "epoch": 0.14, "learning_rate": 1.8650060032692504e-05, "loss": 2.9936, "step": 18700 }, { "epoch": 0.14, "learning_rate": 1.864282717817414e-05, "loss": 2.9957, "step": 18800 }, { "epoch": 0.14, "learning_rate": 1.8635594323655774e-05, "loss": 2.9793, "step": 18900 }, { "epoch": 0.14, "learning_rate": 1.862836146913741e-05, "loss": 2.9825, "step": 19000 }, { "epoch": 0.14, "eval_accuracy": 0.4340897611551176, "eval_loss": 2.999600410461426, "eval_runtime": 28.9603, "eval_samples_per_second": 223.858, "eval_steps_per_second": 2.348, "step": 19000 }, { "epoch": 0.14, "learning_rate": 1.8621128614619047e-05, "loss": 2.9911, "step": 19100 }, { "epoch": 0.14, "learning_rate": 1.8613895760100683e-05, "loss": 2.9847, "step": 19200 }, { "epoch": 0.14, "learning_rate": 1.8606662905582316e-05, "loss": 2.979, "step": 19300 }, { "epoch": 0.14, "learning_rate": 1.8599430051063953e-05, "loss": 2.9858, "step": 19400 }, { "epoch": 0.14, "learning_rate": 1.859219719654559e-05, "loss": 2.9766, "step": 19500 }, { "epoch": 0.14, "learning_rate": 1.8584964342027226e-05, "loss": 2.9735, "step": 19600 }, { "epoch": 0.14, "learning_rate": 1.8577731487508862e-05, "loss": 2.9777, "step": 19700 }, { "epoch": 0.14, "learning_rate": 1.8570498632990495e-05, "loss": 2.9714, "step": 19800 }, { "epoch": 0.14, "learning_rate": 1.8563265778472132e-05, "loss": 2.9762, "step": 19900 }, { "epoch": 0.14, "learning_rate": 1.855603292395377e-05, "loss": 2.9781, "step": 20000 }, { "epoch": 0.14, "eval_accuracy": 0.4350987348510282, "eval_loss": 2.990344285964966, "eval_runtime": 28.2496, "eval_samples_per_second": 229.49, "eval_steps_per_second": 2.407, "step": 20000 }, { "epoch": 0.15, "learning_rate": 1.8548800069435405e-05, "loss": 2.9686, "step": 20100 }, { "epoch": 0.15, "learning_rate": 1.854156721491704e-05, "loss": 2.9724, "step": 20200 }, { "epoch": 0.15, "learning_rate": 1.8534334360398675e-05, "loss": 2.9771, "step": 20300 }, { "epoch": 0.15, "learning_rate": 1.852710150588031e-05, "loss": 2.9698, "step": 20400 }, { "epoch": 0.15, "learning_rate": 1.8519868651361948e-05, "loss": 2.9693, "step": 20500 }, { "epoch": 0.15, "learning_rate": 1.8512635796843584e-05, "loss": 2.9712, "step": 20600 }, { "epoch": 0.15, "learning_rate": 1.850540294232522e-05, "loss": 2.9724, "step": 20700 }, { "epoch": 0.15, "learning_rate": 1.849824241635204e-05, "loss": 2.9726, "step": 20800 }, { "epoch": 0.15, "learning_rate": 1.8491009561833675e-05, "loss": 2.9656, "step": 20900 }, { "epoch": 0.15, "learning_rate": 1.848377670731531e-05, "loss": 2.957, "step": 21000 }, { "epoch": 0.15, "eval_accuracy": 0.435996406892234, "eval_loss": 2.9821181297302246, "eval_runtime": 27.9904, "eval_samples_per_second": 231.615, "eval_steps_per_second": 2.429, "step": 21000 }, { "epoch": 0.15, "learning_rate": 1.8476543852796948e-05, "loss": 2.9718, "step": 21100 }, { "epoch": 0.15, "learning_rate": 1.846931099827858e-05, "loss": 2.9685, "step": 21200 }, { "epoch": 0.15, "learning_rate": 1.8462078143760218e-05, "loss": 2.9582, "step": 21300 }, { "epoch": 0.15, "learning_rate": 1.8454845289241854e-05, "loss": 2.9644, "step": 21400 }, { "epoch": 0.16, "learning_rate": 1.844761243472349e-05, "loss": 2.9678, "step": 21500 }, { "epoch": 0.16, "learning_rate": 1.8440379580205124e-05, "loss": 2.9632, "step": 21600 }, { "epoch": 0.16, "learning_rate": 1.843314672568676e-05, "loss": 2.9642, "step": 21700 }, { "epoch": 0.16, "learning_rate": 1.8425913871168397e-05, "loss": 2.9633, "step": 21800 }, { "epoch": 0.16, "learning_rate": 1.841875334519522e-05, "loss": 2.9495, "step": 21900 }, { "epoch": 0.16, "learning_rate": 1.841152049067685e-05, "loss": 2.9676, "step": 22000 }, { "epoch": 0.16, "eval_accuracy": 0.4368305644022224, "eval_loss": 2.973825454711914, "eval_runtime": 30.0107, "eval_samples_per_second": 216.023, "eval_steps_per_second": 2.266, "step": 22000 }, { "epoch": 0.16, "learning_rate": 1.8404287636158488e-05, "loss": 2.9593, "step": 22100 }, { "epoch": 0.16, "learning_rate": 1.8397054781640125e-05, "loss": 2.9548, "step": 22200 }, { "epoch": 0.16, "learning_rate": 1.838982192712176e-05, "loss": 2.9656, "step": 22300 }, { "epoch": 0.16, "learning_rate": 1.8382589072603398e-05, "loss": 2.957, "step": 22400 }, { "epoch": 0.16, "learning_rate": 1.837535621808503e-05, "loss": 2.9557, "step": 22500 }, { "epoch": 0.16, "learning_rate": 1.8368123363566667e-05, "loss": 2.9577, "step": 22600 }, { "epoch": 0.16, "learning_rate": 1.8360962837593485e-05, "loss": 2.9519, "step": 22700 }, { "epoch": 0.16, "learning_rate": 1.8353729983075122e-05, "loss": 2.955, "step": 22800 }, { "epoch": 0.17, "learning_rate": 1.834649712855676e-05, "loss": 2.9548, "step": 22900 }, { "epoch": 0.17, "learning_rate": 1.8339336602583576e-05, "loss": 2.9513, "step": 23000 }, { "epoch": 0.17, "eval_accuracy": 0.43764657490329156, "eval_loss": 2.9663443565368652, "eval_runtime": 28.3956, "eval_samples_per_second": 228.31, "eval_steps_per_second": 2.395, "step": 23000 }, { "epoch": 0.17, "learning_rate": 1.8332103748065213e-05, "loss": 2.9514, "step": 23100 }, { "epoch": 0.17, "learning_rate": 1.832487089354685e-05, "loss": 2.9546, "step": 23200 }, { "epoch": 0.17, "learning_rate": 1.8317638039028486e-05, "loss": 2.9449, "step": 23300 }, { "epoch": 0.17, "learning_rate": 1.831040518451012e-05, "loss": 2.9515, "step": 23400 }, { "epoch": 0.17, "learning_rate": 1.8303172329991756e-05, "loss": 2.946, "step": 23500 }, { "epoch": 0.17, "learning_rate": 1.8295939475473392e-05, "loss": 2.9443, "step": 23600 }, { "epoch": 0.17, "learning_rate": 1.828870662095503e-05, "loss": 2.9437, "step": 23700 }, { "epoch": 0.17, "learning_rate": 1.8281473766436665e-05, "loss": 2.9395, "step": 23800 }, { "epoch": 0.17, "learning_rate": 1.8274240911918298e-05, "loss": 2.9422, "step": 23900 }, { "epoch": 0.17, "learning_rate": 1.8267008057399935e-05, "loss": 2.9475, "step": 24000 }, { "epoch": 0.17, "eval_accuracy": 0.4384904108180369, "eval_loss": 2.959416389465332, "eval_runtime": 30.8733, "eval_samples_per_second": 209.988, "eval_steps_per_second": 2.203, "step": 24000 }, { "epoch": 0.17, "learning_rate": 1.825977520288157e-05, "loss": 2.9432, "step": 24100 }, { "epoch": 0.18, "learning_rate": 1.8252542348363208e-05, "loss": 2.9413, "step": 24200 }, { "epoch": 0.18, "learning_rate": 1.8245309493844844e-05, "loss": 2.9378, "step": 24300 }, { "epoch": 0.18, "learning_rate": 1.8238076639326477e-05, "loss": 2.9429, "step": 24400 }, { "epoch": 0.18, "learning_rate": 1.8230843784808114e-05, "loss": 2.9398, "step": 24500 }, { "epoch": 0.18, "learning_rate": 1.822361093028975e-05, "loss": 2.9462, "step": 24600 }, { "epoch": 0.18, "learning_rate": 1.8216378075771387e-05, "loss": 2.9334, "step": 24700 }, { "epoch": 0.18, "learning_rate": 1.820914522125302e-05, "loss": 2.944, "step": 24800 }, { "epoch": 0.18, "learning_rate": 1.8201912366734657e-05, "loss": 2.9331, "step": 24900 }, { "epoch": 0.18, "learning_rate": 1.8194679512216293e-05, "loss": 2.9406, "step": 25000 }, { "epoch": 0.18, "eval_accuracy": 0.43914672764061663, "eval_loss": 2.953129291534424, "eval_runtime": 29.0217, "eval_samples_per_second": 223.385, "eval_steps_per_second": 2.343, "step": 25000 }, { "epoch": 0.18, "learning_rate": 1.818744665769793e-05, "loss": 2.9408, "step": 25100 }, { "epoch": 0.18, "learning_rate": 1.8180213803179566e-05, "loss": 2.9313, "step": 25200 }, { "epoch": 0.18, "learning_rate": 1.81729809486612e-05, "loss": 2.9436, "step": 25300 }, { "epoch": 0.18, "learning_rate": 1.8165748094142836e-05, "loss": 2.9374, "step": 25400 }, { "epoch": 0.18, "learning_rate": 1.8158515239624472e-05, "loss": 2.9326, "step": 25500 }, { "epoch": 0.19, "learning_rate": 1.815128238510611e-05, "loss": 2.942, "step": 25600 }, { "epoch": 0.19, "learning_rate": 1.8144049530587745e-05, "loss": 2.9281, "step": 25700 }, { "epoch": 0.19, "learning_rate": 1.813681667606938e-05, "loss": 2.9341, "step": 25800 }, { "epoch": 0.19, "learning_rate": 1.8129583821551015e-05, "loss": 2.9369, "step": 25900 }, { "epoch": 0.19, "learning_rate": 1.812235096703265e-05, "loss": 2.9387, "step": 26000 }, { "epoch": 0.19, "eval_accuracy": 0.4397600965420875, "eval_loss": 2.9472999572753906, "eval_runtime": 28.7579, "eval_samples_per_second": 225.434, "eval_steps_per_second": 2.365, "step": 26000 }, { "epoch": 0.19, "learning_rate": 1.8115118112514288e-05, "loss": 2.9313, "step": 26100 }, { "epoch": 0.19, "learning_rate": 1.8107957586541106e-05, "loss": 2.9328, "step": 26200 }, { "epoch": 0.19, "learning_rate": 1.810072473202274e-05, "loss": 2.9346, "step": 26300 }, { "epoch": 0.19, "learning_rate": 1.8093491877504376e-05, "loss": 2.9297, "step": 26400 }, { "epoch": 0.19, "learning_rate": 1.8086259022986012e-05, "loss": 2.9283, "step": 26500 }, { "epoch": 0.19, "learning_rate": 1.807902616846765e-05, "loss": 2.9278, "step": 26600 }, { "epoch": 0.19, "learning_rate": 1.8071793313949285e-05, "loss": 2.9337, "step": 26700 }, { "epoch": 0.19, "learning_rate": 1.806456045943092e-05, "loss": 2.9267, "step": 26800 }, { "epoch": 0.19, "learning_rate": 1.8057327604912555e-05, "loss": 2.9238, "step": 26900 }, { "epoch": 0.2, "learning_rate": 1.8050167078939376e-05, "loss": 2.9353, "step": 27000 }, { "epoch": 0.2, "eval_accuracy": 0.4403087411117463, "eval_loss": 2.941570520401001, "eval_runtime": 29.8384, "eval_samples_per_second": 217.271, "eval_steps_per_second": 2.279, "step": 27000 }, { "epoch": 0.2, "learning_rate": 1.8042934224421013e-05, "loss": 2.9234, "step": 27100 }, { "epoch": 0.2, "learning_rate": 1.8035701369902646e-05, "loss": 2.9276, "step": 27200 }, { "epoch": 0.2, "learning_rate": 1.8028468515384282e-05, "loss": 2.9249, "step": 27300 }, { "epoch": 0.2, "learning_rate": 1.802123566086592e-05, "loss": 2.9259, "step": 27400 }, { "epoch": 0.2, "learning_rate": 1.8014002806347555e-05, "loss": 2.9235, "step": 27500 }, { "epoch": 0.2, "learning_rate": 1.8006769951829192e-05, "loss": 2.9182, "step": 27600 }, { "epoch": 0.2, "learning_rate": 1.7999537097310825e-05, "loss": 2.9273, "step": 27700 }, { "epoch": 0.2, "learning_rate": 1.799230424279246e-05, "loss": 2.925, "step": 27800 }, { "epoch": 0.2, "learning_rate": 1.7985071388274098e-05, "loss": 2.9196, "step": 27900 }, { "epoch": 0.2, "learning_rate": 1.7977838533755735e-05, "loss": 2.9208, "step": 28000 }, { "epoch": 0.2, "eval_accuracy": 0.44107756938962533, "eval_loss": 2.936343193054199, "eval_runtime": 29.069, "eval_samples_per_second": 223.021, "eval_steps_per_second": 2.339, "step": 28000 }, { "epoch": 0.2, "learning_rate": 1.797060567923737e-05, "loss": 2.907, "step": 28100 }, { "epoch": 0.2, "learning_rate": 1.7963372824719004e-05, "loss": 2.9215, "step": 28200 }, { "epoch": 0.2, "learning_rate": 1.795613997020064e-05, "loss": 2.9223, "step": 28300 }, { "epoch": 0.21, "learning_rate": 1.7948907115682277e-05, "loss": 2.9122, "step": 28400 }, { "epoch": 0.21, "learning_rate": 1.7941674261163914e-05, "loss": 2.9166, "step": 28500 }, { "epoch": 0.21, "learning_rate": 1.7934441406645547e-05, "loss": 2.9289, "step": 28600 }, { "epoch": 0.21, "learning_rate": 1.7927208552127183e-05, "loss": 2.914, "step": 28700 }, { "epoch": 0.21, "learning_rate": 1.791997569760882e-05, "loss": 2.9175, "step": 28800 }, { "epoch": 0.21, "learning_rate": 1.7912742843090456e-05, "loss": 2.9142, "step": 28900 }, { "epoch": 0.21, "learning_rate": 1.7905509988572093e-05, "loss": 2.9142, "step": 29000 }, { "epoch": 0.21, "eval_accuracy": 0.4415330593134987, "eval_loss": 2.9310333728790283, "eval_runtime": 30.502, "eval_samples_per_second": 212.543, "eval_steps_per_second": 2.229, "step": 29000 }, { "epoch": 0.21, "learning_rate": 1.789834946259891e-05, "loss": 2.9196, "step": 29100 }, { "epoch": 0.21, "learning_rate": 1.7891116608080548e-05, "loss": 2.9166, "step": 29200 }, { "epoch": 0.21, "learning_rate": 1.788388375356218e-05, "loss": 2.917, "step": 29300 }, { "epoch": 0.21, "learning_rate": 1.7876650899043817e-05, "loss": 2.9178, "step": 29400 }, { "epoch": 0.21, "learning_rate": 1.7869418044525454e-05, "loss": 2.9148, "step": 29500 }, { "epoch": 0.21, "learning_rate": 1.786218519000709e-05, "loss": 2.9086, "step": 29600 }, { "epoch": 0.21, "learning_rate": 1.7855024664033908e-05, "loss": 2.9154, "step": 29700 }, { "epoch": 0.22, "learning_rate": 1.7847791809515545e-05, "loss": 2.9156, "step": 29800 }, { "epoch": 0.22, "learning_rate": 1.784055895499718e-05, "loss": 2.9178, "step": 29900 }, { "epoch": 0.22, "learning_rate": 1.7833326100478814e-05, "loss": 2.9167, "step": 30000 }, { "epoch": 0.22, "eval_accuracy": 0.44185305157077487, "eval_loss": 2.9265494346618652, "eval_runtime": 28.0744, "eval_samples_per_second": 230.922, "eval_steps_per_second": 2.422, "step": 30000 }, { "epoch": 0.22, "learning_rate": 1.782609324596045e-05, "loss": 2.905, "step": 30100 }, { "epoch": 0.22, "learning_rate": 1.7818860391442087e-05, "loss": 2.9161, "step": 30200 }, { "epoch": 0.22, "learning_rate": 1.7811627536923724e-05, "loss": 2.9094, "step": 30300 }, { "epoch": 0.22, "learning_rate": 1.780439468240536e-05, "loss": 2.9145, "step": 30400 }, { "epoch": 0.22, "learning_rate": 1.7797161827886994e-05, "loss": 2.9115, "step": 30500 }, { "epoch": 0.22, "learning_rate": 1.778992897336863e-05, "loss": 2.9093, "step": 30600 }, { "epoch": 0.22, "learning_rate": 1.7782696118850267e-05, "loss": 2.9111, "step": 30700 }, { "epoch": 0.22, "learning_rate": 1.7775463264331903e-05, "loss": 2.9052, "step": 30800 }, { "epoch": 0.22, "learning_rate": 1.776823040981354e-05, "loss": 2.9072, "step": 30900 }, { "epoch": 0.22, "learning_rate": 1.7760997555295173e-05, "loss": 2.9069, "step": 31000 }, { "epoch": 0.22, "eval_accuracy": 0.4425190467981115, "eval_loss": 2.921447992324829, "eval_runtime": 27.8624, "eval_samples_per_second": 232.679, "eval_steps_per_second": 2.441, "step": 31000 }, { "epoch": 0.22, "learning_rate": 1.7753837029321994e-05, "loss": 2.9063, "step": 31100 }, { "epoch": 0.23, "learning_rate": 1.7746676503348816e-05, "loss": 2.9101, "step": 31200 }, { "epoch": 0.23, "learning_rate": 1.773944364883045e-05, "loss": 2.8999, "step": 31300 }, { "epoch": 0.23, "learning_rate": 1.7732210794312085e-05, "loss": 2.9026, "step": 31400 }, { "epoch": 0.23, "learning_rate": 1.772497793979372e-05, "loss": 2.9066, "step": 31500 }, { "epoch": 0.23, "learning_rate": 1.7717745085275355e-05, "loss": 2.9032, "step": 31600 }, { "epoch": 0.23, "learning_rate": 1.771051223075699e-05, "loss": 2.9003, "step": 31700 }, { "epoch": 0.23, "learning_rate": 1.7703279376238628e-05, "loss": 2.9049, "step": 31800 }, { "epoch": 0.23, "learning_rate": 1.769604652172026e-05, "loss": 2.8992, "step": 31900 }, { "epoch": 0.23, "learning_rate": 1.7688813667201898e-05, "loss": 2.9067, "step": 32000 }, { "epoch": 0.23, "eval_accuracy": 0.44299268373090406, "eval_loss": 2.9168407917022705, "eval_runtime": 29.0945, "eval_samples_per_second": 222.825, "eval_steps_per_second": 2.337, "step": 32000 }, { "epoch": 0.23, "learning_rate": 1.7681580812683534e-05, "loss": 2.9007, "step": 32100 }, { "epoch": 0.23, "learning_rate": 1.767434795816517e-05, "loss": 2.8995, "step": 32200 }, { "epoch": 0.23, "learning_rate": 1.7667115103646807e-05, "loss": 2.9044, "step": 32300 }, { "epoch": 0.23, "learning_rate": 1.765988224912844e-05, "loss": 2.8931, "step": 32400 }, { "epoch": 0.24, "learning_rate": 1.7652649394610077e-05, "loss": 2.902, "step": 32500 }, { "epoch": 0.24, "learning_rate": 1.7645416540091713e-05, "loss": 2.9019, "step": 32600 }, { "epoch": 0.24, "learning_rate": 1.763818368557335e-05, "loss": 2.9064, "step": 32700 }, { "epoch": 0.24, "learning_rate": 1.7630950831054986e-05, "loss": 2.9004, "step": 32800 }, { "epoch": 0.24, "learning_rate": 1.7623790305081804e-05, "loss": 2.8917, "step": 32900 }, { "epoch": 0.24, "learning_rate": 1.761655745056344e-05, "loss": 2.8978, "step": 33000 }, { "epoch": 0.24, "eval_accuracy": 0.4434288168452635, "eval_loss": 2.91280198097229, "eval_runtime": 27.8037, "eval_samples_per_second": 233.17, "eval_steps_per_second": 2.446, "step": 33000 }, { "epoch": 0.24, "learning_rate": 1.7609324596045077e-05, "loss": 2.9001, "step": 33100 }, { "epoch": 0.24, "learning_rate": 1.7602091741526714e-05, "loss": 2.898, "step": 33200 }, { "epoch": 0.24, "learning_rate": 1.7594858887008347e-05, "loss": 2.8938, "step": 33300 }, { "epoch": 0.24, "learning_rate": 1.7587626032489984e-05, "loss": 2.8943, "step": 33400 }, { "epoch": 0.24, "learning_rate": 1.758039317797162e-05, "loss": 2.8966, "step": 33500 }, { "epoch": 0.24, "learning_rate": 1.7573160323453257e-05, "loss": 2.9033, "step": 33600 }, { "epoch": 0.24, "learning_rate": 1.756592746893489e-05, "loss": 2.8971, "step": 33700 }, { "epoch": 0.24, "learning_rate": 1.7558694614416526e-05, "loss": 2.8919, "step": 33800 }, { "epoch": 0.25, "learning_rate": 1.7551461759898163e-05, "loss": 2.8973, "step": 33900 }, { "epoch": 0.25, "learning_rate": 1.75442289053798e-05, "loss": 2.8982, "step": 34000 }, { "epoch": 0.25, "eval_accuracy": 0.44378510312037817, "eval_loss": 2.9087538719177246, "eval_runtime": 30.238, "eval_samples_per_second": 214.399, "eval_steps_per_second": 2.249, "step": 34000 }, { "epoch": 0.25, "learning_rate": 1.7536996050861436e-05, "loss": 2.8893, "step": 34100 }, { "epoch": 0.25, "learning_rate": 1.752976319634307e-05, "loss": 2.8867, "step": 34200 }, { "epoch": 0.25, "learning_rate": 1.7522530341824705e-05, "loss": 2.8991, "step": 34300 }, { "epoch": 0.25, "learning_rate": 1.7515297487306342e-05, "loss": 2.8888, "step": 34400 }, { "epoch": 0.25, "learning_rate": 1.750806463278798e-05, "loss": 2.895, "step": 34500 }, { "epoch": 0.25, "learning_rate": 1.7500831778269615e-05, "loss": 2.8962, "step": 34600 }, { "epoch": 0.25, "learning_rate": 1.7493598923751248e-05, "loss": 2.8883, "step": 34700 }, { "epoch": 0.25, "learning_rate": 1.7486366069232885e-05, "loss": 2.8861, "step": 34800 }, { "epoch": 0.25, "learning_rate": 1.747913321471452e-05, "loss": 2.8939, "step": 34900 }, { "epoch": 0.25, "learning_rate": 1.7471900360196158e-05, "loss": 2.8856, "step": 35000 }, { "epoch": 0.25, "eval_accuracy": 0.44437488091025396, "eval_loss": 2.904993772506714, "eval_runtime": 29.3345, "eval_samples_per_second": 221.002, "eval_steps_per_second": 2.318, "step": 35000 }, { "epoch": 0.25, "learning_rate": 1.7464667505677794e-05, "loss": 2.8966, "step": 35100 }, { "epoch": 0.25, "learning_rate": 1.7457434651159427e-05, "loss": 2.8854, "step": 35200 }, { "epoch": 0.26, "learning_rate": 1.7450201796641064e-05, "loss": 2.8906, "step": 35300 }, { "epoch": 0.26, "learning_rate": 1.7443041270667885e-05, "loss": 2.8887, "step": 35400 }, { "epoch": 0.26, "learning_rate": 1.743580841614952e-05, "loss": 2.8843, "step": 35500 }, { "epoch": 0.26, "learning_rate": 1.7428575561631155e-05, "loss": 2.8908, "step": 35600 }, { "epoch": 0.26, "learning_rate": 1.742134270711279e-05, "loss": 2.8883, "step": 35700 }, { "epoch": 0.26, "learning_rate": 1.7414109852594428e-05, "loss": 2.8848, "step": 35800 }, { "epoch": 0.26, "learning_rate": 1.740687699807606e-05, "loss": 2.876, "step": 35900 }, { "epoch": 0.26, "learning_rate": 1.7399644143557697e-05, "loss": 2.8981, "step": 36000 }, { "epoch": 0.26, "eval_accuracy": 0.44447287475841796, "eval_loss": 2.901261806488037, "eval_runtime": 28.6761, "eval_samples_per_second": 226.077, "eval_steps_per_second": 2.371, "step": 36000 }, { "epoch": 0.26, "learning_rate": 1.7392411289039334e-05, "loss": 2.8881, "step": 36100 }, { "epoch": 0.26, "learning_rate": 1.7385178434520967e-05, "loss": 2.8854, "step": 36200 }, { "epoch": 0.26, "learning_rate": 1.7377945580002604e-05, "loss": 2.8843, "step": 36300 }, { "epoch": 0.26, "learning_rate": 1.7370785054029425e-05, "loss": 2.8885, "step": 36400 }, { "epoch": 0.26, "learning_rate": 1.736355219951106e-05, "loss": 2.8869, "step": 36500 }, { "epoch": 0.26, "learning_rate": 1.7356319344992695e-05, "loss": 2.8813, "step": 36600 }, { "epoch": 0.27, "learning_rate": 1.734908649047433e-05, "loss": 2.884, "step": 36700 }, { "epoch": 0.27, "learning_rate": 1.7341853635955968e-05, "loss": 2.8834, "step": 36800 }, { "epoch": 0.27, "learning_rate": 1.7334620781437604e-05, "loss": 2.8773, "step": 36900 }, { "epoch": 0.27, "learning_rate": 1.732738792691924e-05, "loss": 2.8813, "step": 37000 }, { "epoch": 0.27, "eval_accuracy": 0.44499187921350863, "eval_loss": 2.8976523876190186, "eval_runtime": 27.9421, "eval_samples_per_second": 232.015, "eval_steps_per_second": 2.434, "step": 37000 }, { "epoch": 0.27, "learning_rate": 1.7320155072400874e-05, "loss": 2.8865, "step": 37100 }, { "epoch": 0.27, "learning_rate": 1.731292221788251e-05, "loss": 2.881, "step": 37200 }, { "epoch": 0.27, "learning_rate": 1.7305689363364147e-05, "loss": 2.8781, "step": 37300 }, { "epoch": 0.27, "learning_rate": 1.7298456508845783e-05, "loss": 2.884, "step": 37400 }, { "epoch": 0.27, "learning_rate": 1.7291223654327417e-05, "loss": 2.8802, "step": 37500 }, { "epoch": 0.27, "learning_rate": 1.7283990799809053e-05, "loss": 2.8802, "step": 37600 }, { "epoch": 0.27, "learning_rate": 1.727675794529069e-05, "loss": 2.8685, "step": 37700 }, { "epoch": 0.27, "learning_rate": 1.7269525090772326e-05, "loss": 2.8745, "step": 37800 }, { "epoch": 0.27, "learning_rate": 1.7262292236253963e-05, "loss": 2.8799, "step": 37900 }, { "epoch": 0.27, "learning_rate": 1.7255059381735596e-05, "loss": 2.8765, "step": 38000 }, { "epoch": 0.27, "eval_accuracy": 0.44532699397821757, "eval_loss": 2.8943746089935303, "eval_runtime": 28.0195, "eval_samples_per_second": 231.375, "eval_steps_per_second": 2.427, "step": 38000 }, { "epoch": 0.28, "learning_rate": 1.7247826527217232e-05, "loss": 2.8834, "step": 38100 }, { "epoch": 0.28, "learning_rate": 1.724059367269887e-05, "loss": 2.8782, "step": 38200 }, { "epoch": 0.28, "learning_rate": 1.7233360818180505e-05, "loss": 2.8855, "step": 38300 }, { "epoch": 0.28, "learning_rate": 1.7226127963662142e-05, "loss": 2.8681, "step": 38400 }, { "epoch": 0.28, "learning_rate": 1.7218895109143775e-05, "loss": 2.8832, "step": 38500 }, { "epoch": 0.28, "learning_rate": 1.721166225462541e-05, "loss": 2.8764, "step": 38600 }, { "epoch": 0.28, "learning_rate": 1.7204429400107048e-05, "loss": 2.8771, "step": 38700 }, { "epoch": 0.28, "learning_rate": 1.7197196545588684e-05, "loss": 2.869, "step": 38800 }, { "epoch": 0.28, "learning_rate": 1.7190036019615502e-05, "loss": 2.8749, "step": 38900 }, { "epoch": 0.28, "learning_rate": 1.718280316509714e-05, "loss": 2.879, "step": 39000 }, { "epoch": 0.28, "eval_accuracy": 0.4457794594006043, "eval_loss": 2.891030788421631, "eval_runtime": 33.3451, "eval_samples_per_second": 194.421, "eval_steps_per_second": 2.039, "step": 39000 }, { "epoch": 0.28, "learning_rate": 1.7175570310578776e-05, "loss": 2.8757, "step": 39100 }, { "epoch": 0.28, "learning_rate": 1.7168409784605594e-05, "loss": 2.8721, "step": 39200 }, { "epoch": 0.28, "learning_rate": 1.716117693008723e-05, "loss": 2.8751, "step": 39300 }, { "epoch": 0.28, "learning_rate": 1.7153944075568863e-05, "loss": 2.8813, "step": 39400 }, { "epoch": 0.29, "learning_rate": 1.71467112210505e-05, "loss": 2.8738, "step": 39500 }, { "epoch": 0.29, "learning_rate": 1.7139478366532136e-05, "loss": 2.8761, "step": 39600 }, { "epoch": 0.29, "learning_rate": 1.7132245512013773e-05, "loss": 2.877, "step": 39700 }, { "epoch": 0.29, "learning_rate": 1.712501265749541e-05, "loss": 2.8673, "step": 39800 }, { "epoch": 0.29, "learning_rate": 1.7117779802977042e-05, "loss": 2.8789, "step": 39900 }, { "epoch": 0.29, "learning_rate": 1.711054694845868e-05, "loss": 2.8738, "step": 40000 }, { "epoch": 0.29, "eval_accuracy": 0.4462434179286399, "eval_loss": 2.8878371715545654, "eval_runtime": 30.9298, "eval_samples_per_second": 209.604, "eval_steps_per_second": 2.199, "step": 40000 }, { "epoch": 0.29, "learning_rate": 1.7103314093940315e-05, "loss": 2.8666, "step": 40100 }, { "epoch": 0.29, "learning_rate": 1.7096081239421952e-05, "loss": 2.8741, "step": 40200 }, { "epoch": 0.29, "learning_rate": 1.708884838490359e-05, "loss": 2.8752, "step": 40300 }, { "epoch": 0.29, "learning_rate": 1.708161553038522e-05, "loss": 2.8725, "step": 40400 }, { "epoch": 0.29, "learning_rate": 1.7074382675866858e-05, "loss": 2.8711, "step": 40500 }, { "epoch": 0.29, "learning_rate": 1.7067149821348495e-05, "loss": 2.8709, "step": 40600 }, { "epoch": 0.29, "learning_rate": 1.705991696683013e-05, "loss": 2.8677, "step": 40700 }, { "epoch": 0.3, "learning_rate": 1.7052684112311764e-05, "loss": 2.8678, "step": 40800 }, { "epoch": 0.3, "learning_rate": 1.70454512577934e-05, "loss": 2.8671, "step": 40900 }, { "epoch": 0.3, "learning_rate": 1.7038218403275037e-05, "loss": 2.8671, "step": 41000 }, { "epoch": 0.3, "eval_accuracy": 0.4465404239746184, "eval_loss": 2.8851165771484375, "eval_runtime": 28.3107, "eval_samples_per_second": 228.994, "eval_steps_per_second": 2.402, "step": 41000 }, { "epoch": 0.3, "learning_rate": 1.7030985548756674e-05, "loss": 2.8682, "step": 41100 }, { "epoch": 0.3, "learning_rate": 1.7023825022783492e-05, "loss": 2.8793, "step": 41200 }, { "epoch": 0.3, "learning_rate": 1.701659216826513e-05, "loss": 2.864, "step": 41300 }, { "epoch": 0.3, "learning_rate": 1.7009359313746765e-05, "loss": 2.8711, "step": 41400 }, { "epoch": 0.3, "learning_rate": 1.70021264592284e-05, "loss": 2.8738, "step": 41500 }, { "epoch": 0.3, "learning_rate": 1.6994893604710038e-05, "loss": 2.862, "step": 41600 }, { "epoch": 0.3, "learning_rate": 1.698766075019167e-05, "loss": 2.8652, "step": 41700 }, { "epoch": 0.3, "learning_rate": 1.6980427895673307e-05, "loss": 2.8758, "step": 41800 }, { "epoch": 0.3, "learning_rate": 1.6973195041154944e-05, "loss": 2.8695, "step": 41900 }, { "epoch": 0.3, "learning_rate": 1.696596218663658e-05, "loss": 2.866, "step": 42000 }, { "epoch": 0.3, "eval_accuracy": 0.44681746831078567, "eval_loss": 2.882030963897705, "eval_runtime": 28.8345, "eval_samples_per_second": 224.835, "eval_steps_per_second": 2.358, "step": 42000 }, { "epoch": 0.3, "learning_rate": 1.6958729332118217e-05, "loss": 2.8696, "step": 42100 }, { "epoch": 0.31, "learning_rate": 1.695149647759985e-05, "loss": 2.8766, "step": 42200 }, { "epoch": 0.31, "learning_rate": 1.6944263623081487e-05, "loss": 2.8706, "step": 42300 }, { "epoch": 0.31, "learning_rate": 1.6937030768563123e-05, "loss": 2.8673, "step": 42400 }, { "epoch": 0.31, "learning_rate": 1.692979791404476e-05, "loss": 2.864, "step": 42500 }, { "epoch": 0.31, "learning_rate": 1.6922565059526396e-05, "loss": 2.8708, "step": 42600 }, { "epoch": 0.31, "learning_rate": 1.691533220500803e-05, "loss": 2.8642, "step": 42700 }, { "epoch": 0.31, "learning_rate": 1.6908099350489666e-05, "loss": 2.8601, "step": 42800 }, { "epoch": 0.31, "learning_rate": 1.6900938824516484e-05, "loss": 2.8662, "step": 42900 }, { "epoch": 0.31, "learning_rate": 1.689370596999812e-05, "loss": 2.8561, "step": 43000 }, { "epoch": 0.31, "eval_accuracy": 0.4472874758417944, "eval_loss": 2.879122018814087, "eval_runtime": 29.1494, "eval_samples_per_second": 222.406, "eval_steps_per_second": 2.333, "step": 43000 }, { "epoch": 0.31, "learning_rate": 1.6886473115479757e-05, "loss": 2.8746, "step": 43100 }, { "epoch": 0.31, "learning_rate": 1.687924026096139e-05, "loss": 2.8657, "step": 43200 }, { "epoch": 0.31, "learning_rate": 1.6872007406443027e-05, "loss": 2.8655, "step": 43300 }, { "epoch": 0.31, "learning_rate": 1.6864774551924663e-05, "loss": 2.8716, "step": 43400 }, { "epoch": 0.31, "learning_rate": 1.68575416974063e-05, "loss": 2.8701, "step": 43500 }, { "epoch": 0.32, "learning_rate": 1.6850308842887936e-05, "loss": 2.8669, "step": 43600 }, { "epoch": 0.32, "learning_rate": 1.684307598836957e-05, "loss": 2.8615, "step": 43700 }, { "epoch": 0.32, "learning_rate": 1.6835843133851206e-05, "loss": 2.8664, "step": 43800 }, { "epoch": 0.32, "learning_rate": 1.6828610279332842e-05, "loss": 2.8594, "step": 43900 }, { "epoch": 0.32, "learning_rate": 1.6821449753359664e-05, "loss": 2.8601, "step": 44000 }, { "epoch": 0.32, "eval_accuracy": 0.4477096962493157, "eval_loss": 2.8765430450439453, "eval_runtime": 29.4041, "eval_samples_per_second": 220.48, "eval_steps_per_second": 2.313, "step": 44000 }, { "epoch": 0.32, "learning_rate": 1.6814216898841297e-05, "loss": 2.8605, "step": 44100 }, { "epoch": 0.32, "learning_rate": 1.6806984044322933e-05, "loss": 2.8665, "step": 44200 }, { "epoch": 0.32, "learning_rate": 1.679975118980457e-05, "loss": 2.8647, "step": 44300 }, { "epoch": 0.32, "learning_rate": 1.6792518335286206e-05, "loss": 2.8585, "step": 44400 }, { "epoch": 0.32, "learning_rate": 1.678528548076784e-05, "loss": 2.8644, "step": 44500 }, { "epoch": 0.32, "learning_rate": 1.6778052626249476e-05, "loss": 2.8634, "step": 44600 }, { "epoch": 0.32, "learning_rate": 1.6770819771731113e-05, "loss": 2.8644, "step": 44700 }, { "epoch": 0.32, "learning_rate": 1.676358691721275e-05, "loss": 2.8555, "step": 44800 }, { "epoch": 0.32, "learning_rate": 1.6756354062694386e-05, "loss": 2.8591, "step": 44900 }, { "epoch": 0.33, "learning_rate": 1.6749193536721204e-05, "loss": 2.8518, "step": 45000 }, { "epoch": 0.33, "eval_accuracy": 0.44793955836229293, "eval_loss": 2.8740601539611816, "eval_runtime": 29.2958, "eval_samples_per_second": 221.294, "eval_steps_per_second": 2.321, "step": 45000 }, { "epoch": 0.33, "learning_rate": 1.674196068220284e-05, "loss": 2.8613, "step": 45100 }, { "epoch": 0.33, "learning_rate": 1.6734727827684477e-05, "loss": 2.8672, "step": 45200 }, { "epoch": 0.33, "learning_rate": 1.6727494973166113e-05, "loss": 2.8603, "step": 45300 }, { "epoch": 0.33, "learning_rate": 1.6720262118647746e-05, "loss": 2.8581, "step": 45400 }, { "epoch": 0.33, "learning_rate": 1.6713029264129383e-05, "loss": 2.8484, "step": 45500 }, { "epoch": 0.33, "learning_rate": 1.670579640961102e-05, "loss": 2.8483, "step": 45600 }, { "epoch": 0.33, "learning_rate": 1.6698563555092656e-05, "loss": 2.8518, "step": 45700 }, { "epoch": 0.33, "learning_rate": 1.6691330700574292e-05, "loss": 2.8528, "step": 45800 }, { "epoch": 0.33, "learning_rate": 1.6684097846055925e-05, "loss": 2.8618, "step": 45900 }, { "epoch": 0.33, "learning_rate": 1.6676864991537562e-05, "loss": 2.8577, "step": 46000 }, { "epoch": 0.33, "eval_accuracy": 0.4482964495377049, "eval_loss": 2.8713486194610596, "eval_runtime": 27.9428, "eval_samples_per_second": 232.01, "eval_steps_per_second": 2.434, "step": 46000 }, { "epoch": 0.33, "learning_rate": 1.66696321370192e-05, "loss": 2.8578, "step": 46100 }, { "epoch": 0.33, "learning_rate": 1.6662399282500835e-05, "loss": 2.8581, "step": 46200 }, { "epoch": 0.33, "learning_rate": 1.665516642798247e-05, "loss": 2.8532, "step": 46300 }, { "epoch": 0.34, "learning_rate": 1.6647933573464105e-05, "loss": 2.8543, "step": 46400 }, { "epoch": 0.34, "learning_rate": 1.664070071894574e-05, "loss": 2.865, "step": 46500 }, { "epoch": 0.34, "learning_rate": 1.6633467864427378e-05, "loss": 2.8621, "step": 46600 }, { "epoch": 0.34, "learning_rate": 1.6626235009909014e-05, "loss": 2.8531, "step": 46700 }, { "epoch": 0.34, "learning_rate": 1.6619002155390647e-05, "loss": 2.8576, "step": 46800 }, { "epoch": 0.34, "learning_rate": 1.6611769300872284e-05, "loss": 2.8607, "step": 46900 }, { "epoch": 0.34, "learning_rate": 1.660453644635392e-05, "loss": 2.8588, "step": 47000 }, { "epoch": 0.34, "eval_accuracy": 0.44838053067903083, "eval_loss": 2.869096040725708, "eval_runtime": 27.7995, "eval_samples_per_second": 233.206, "eval_steps_per_second": 2.446, "step": 47000 }, { "epoch": 0.34, "learning_rate": 1.6597448248925923e-05, "loss": 2.8548, "step": 47100 }, { "epoch": 0.34, "learning_rate": 1.659021539440756e-05, "loss": 2.8561, "step": 47200 }, { "epoch": 0.34, "learning_rate": 1.6582982539889193e-05, "loss": 2.8538, "step": 47300 }, { "epoch": 0.34, "learning_rate": 1.657574968537083e-05, "loss": 2.851, "step": 47400 }, { "epoch": 0.34, "learning_rate": 1.6568516830852466e-05, "loss": 2.8456, "step": 47500 }, { "epoch": 0.34, "learning_rate": 1.6561283976334102e-05, "loss": 2.8511, "step": 47600 }, { "epoch": 0.35, "learning_rate": 1.655405112181574e-05, "loss": 2.8498, "step": 47700 }, { "epoch": 0.35, "learning_rate": 1.6546818267297372e-05, "loss": 2.8539, "step": 47800 }, { "epoch": 0.35, "learning_rate": 1.653958541277901e-05, "loss": 2.8587, "step": 47900 }, { "epoch": 0.35, "learning_rate": 1.6532352558260645e-05, "loss": 2.8584, "step": 48000 }, { "epoch": 0.35, "eval_accuracy": 0.44867330242292813, "eval_loss": 2.8666162490844727, "eval_runtime": 29.6889, "eval_samples_per_second": 218.364, "eval_steps_per_second": 2.29, "step": 48000 }, { "epoch": 0.35, "learning_rate": 1.652511970374228e-05, "loss": 2.8576, "step": 48100 }, { "epoch": 0.35, "learning_rate": 1.6517886849223915e-05, "loss": 2.8559, "step": 48200 }, { "epoch": 0.35, "learning_rate": 1.651065399470555e-05, "loss": 2.8476, "step": 48300 }, { "epoch": 0.35, "learning_rate": 1.6503421140187188e-05, "loss": 2.8528, "step": 48400 }, { "epoch": 0.35, "learning_rate": 1.6496188285668824e-05, "loss": 2.8514, "step": 48500 }, { "epoch": 0.35, "learning_rate": 1.648895543115046e-05, "loss": 2.8468, "step": 48600 }, { "epoch": 0.35, "learning_rate": 1.6481722576632094e-05, "loss": 2.8479, "step": 48700 }, { "epoch": 0.35, "learning_rate": 1.647448972211373e-05, "loss": 2.858, "step": 48800 }, { "epoch": 0.35, "learning_rate": 1.6467256867595367e-05, "loss": 2.8567, "step": 48900 }, { "epoch": 0.35, "learning_rate": 1.6460024013077003e-05, "loss": 2.8527, "step": 49000 }, { "epoch": 0.35, "eval_accuracy": 0.448782789376741, "eval_loss": 2.864643096923828, "eval_runtime": 27.8655, "eval_samples_per_second": 232.653, "eval_steps_per_second": 2.44, "step": 49000 }, { "epoch": 0.36, "learning_rate": 1.645279115855864e-05, "loss": 2.8534, "step": 49100 }, { "epoch": 0.36, "learning_rate": 1.6445558304040273e-05, "loss": 2.8563, "step": 49200 }, { "epoch": 0.36, "learning_rate": 1.643832544952191e-05, "loss": 2.8536, "step": 49300 }, { "epoch": 0.36, "learning_rate": 1.6431092595003546e-05, "loss": 2.8378, "step": 49400 }, { "epoch": 0.36, "learning_rate": 1.6423932069030364e-05, "loss": 2.8455, "step": 49500 }, { "epoch": 0.36, "learning_rate": 1.6416699214512e-05, "loss": 2.8519, "step": 49600 }, { "epoch": 0.36, "learning_rate": 1.6409466359993634e-05, "loss": 2.8505, "step": 49700 }, { "epoch": 0.36, "learning_rate": 1.640223350547527e-05, "loss": 2.8437, "step": 49800 }, { "epoch": 0.36, "learning_rate": 1.6395000650956907e-05, "loss": 2.8428, "step": 49900 }, { "epoch": 0.36, "learning_rate": 1.6387767796438543e-05, "loss": 2.8425, "step": 50000 }, { "epoch": 0.36, "eval_accuracy": 0.44904108180369173, "eval_loss": 2.8624136447906494, "eval_runtime": 32.7107, "eval_samples_per_second": 198.192, "eval_steps_per_second": 2.079, "step": 50000 }, { "epoch": 0.36, "learning_rate": 1.638053494192018e-05, "loss": 2.8406, "step": 50100 }, { "epoch": 0.36, "learning_rate": 1.6373302087401813e-05, "loss": 2.8552, "step": 50200 }, { "epoch": 0.36, "learning_rate": 1.636606923288345e-05, "loss": 2.8473, "step": 50300 }, { "epoch": 0.36, "learning_rate": 1.6358836378365086e-05, "loss": 2.8458, "step": 50400 }, { "epoch": 0.37, "learning_rate": 1.6351603523846723e-05, "loss": 2.8471, "step": 50500 }, { "epoch": 0.37, "learning_rate": 1.634437066932836e-05, "loss": 2.8518, "step": 50600 }, { "epoch": 0.37, "learning_rate": 1.6337137814809992e-05, "loss": 2.848, "step": 50700 }, { "epoch": 0.37, "learning_rate": 1.632990496029163e-05, "loss": 2.8491, "step": 50800 }, { "epoch": 0.37, "learning_rate": 1.6322672105773265e-05, "loss": 2.8433, "step": 50900 }, { "epoch": 0.37, "learning_rate": 1.6315439251254902e-05, "loss": 2.8457, "step": 51000 }, { "epoch": 0.37, "eval_accuracy": 0.4494481797037803, "eval_loss": 2.8601181507110596, "eval_runtime": 31.3807, "eval_samples_per_second": 206.592, "eval_steps_per_second": 2.167, "step": 51000 }, { "epoch": 0.37, "learning_rate": 1.6308206396736538e-05, "loss": 2.8436, "step": 51100 }, { "epoch": 0.37, "learning_rate": 1.630097354221817e-05, "loss": 2.8474, "step": 51200 }, { "epoch": 0.37, "learning_rate": 1.6293740687699808e-05, "loss": 2.8361, "step": 51300 }, { "epoch": 0.37, "learning_rate": 1.6286507833181444e-05, "loss": 2.842, "step": 51400 }, { "epoch": 0.37, "learning_rate": 1.627927497866308e-05, "loss": 2.8527, "step": 51500 }, { "epoch": 0.37, "learning_rate": 1.62721144526899e-05, "loss": 2.8376, "step": 51600 }, { "epoch": 0.37, "learning_rate": 1.6264881598171535e-05, "loss": 2.8461, "step": 51700 }, { "epoch": 0.37, "learning_rate": 1.6257721072198357e-05, "loss": 2.836, "step": 51800 }, { "epoch": 0.38, "learning_rate": 1.625048821767999e-05, "loss": 2.8444, "step": 51900 }, { "epoch": 0.38, "learning_rate": 1.6243255363161627e-05, "loss": 2.849, "step": 52000 }, { "epoch": 0.38, "eval_accuracy": 0.4496465870012975, "eval_loss": 2.8579957485198975, "eval_runtime": 30.949, "eval_samples_per_second": 209.474, "eval_steps_per_second": 2.197, "step": 52000 }, { "epoch": 0.38, "learning_rate": 1.6236022508643263e-05, "loss": 2.8402, "step": 52100 }, { "epoch": 0.38, "learning_rate": 1.62287896541249e-05, "loss": 2.8411, "step": 52200 }, { "epoch": 0.38, "learning_rate": 1.6221556799606536e-05, "loss": 2.8423, "step": 52300 }, { "epoch": 0.38, "learning_rate": 1.621432394508817e-05, "loss": 2.838, "step": 52400 }, { "epoch": 0.38, "learning_rate": 1.6207091090569806e-05, "loss": 2.8415, "step": 52500 }, { "epoch": 0.38, "learning_rate": 1.6199858236051442e-05, "loss": 2.8418, "step": 52600 }, { "epoch": 0.38, "learning_rate": 1.619262538153308e-05, "loss": 2.8366, "step": 52700 }, { "epoch": 0.38, "learning_rate": 1.6185392527014715e-05, "loss": 2.8443, "step": 52800 }, { "epoch": 0.38, "learning_rate": 1.617815967249635e-05, "loss": 2.8395, "step": 52900 }, { "epoch": 0.38, "learning_rate": 1.6170926817977985e-05, "loss": 2.8431, "step": 53000 }, { "epoch": 0.38, "eval_accuracy": 0.44991455783300516, "eval_loss": 2.856027841567993, "eval_runtime": 31.4773, "eval_samples_per_second": 205.958, "eval_steps_per_second": 2.16, "step": 53000 }, { "epoch": 0.38, "learning_rate": 1.616369396345962e-05, "loss": 2.8451, "step": 53100 }, { "epoch": 0.38, "learning_rate": 1.6156461108941258e-05, "loss": 2.8367, "step": 53200 }, { "epoch": 0.39, "learning_rate": 1.6149300582968076e-05, "loss": 2.8403, "step": 53300 }, { "epoch": 0.39, "learning_rate": 1.614206772844971e-05, "loss": 2.8385, "step": 53400 }, { "epoch": 0.39, "learning_rate": 1.6134834873931346e-05, "loss": 2.8323, "step": 53500 }, { "epoch": 0.39, "learning_rate": 1.6127602019412982e-05, "loss": 2.8408, "step": 53600 }, { "epoch": 0.39, "learning_rate": 1.612036916489462e-05, "loss": 2.8449, "step": 53700 }, { "epoch": 0.39, "learning_rate": 1.6113136310376255e-05, "loss": 2.8424, "step": 53800 }, { "epoch": 0.39, "learning_rate": 1.6105903455857888e-05, "loss": 2.8384, "step": 53900 }, { "epoch": 0.39, "learning_rate": 1.609874292988471e-05, "loss": 2.8463, "step": 54000 }, { "epoch": 0.39, "eval_accuracy": 0.45014865424806355, "eval_loss": 2.8539552688598633, "eval_runtime": 31.4595, "eval_samples_per_second": 206.075, "eval_steps_per_second": 2.162, "step": 54000 }, { "epoch": 0.39, "learning_rate": 1.6091510075366346e-05, "loss": 2.8336, "step": 54100 }, { "epoch": 0.39, "learning_rate": 1.6084277220847983e-05, "loss": 2.8433, "step": 54200 }, { "epoch": 0.39, "learning_rate": 1.6077044366329616e-05, "loss": 2.8454, "step": 54300 }, { "epoch": 0.39, "learning_rate": 1.6069811511811252e-05, "loss": 2.846, "step": 54400 }, { "epoch": 0.39, "learning_rate": 1.606257865729289e-05, "loss": 2.8422, "step": 54500 }, { "epoch": 0.39, "learning_rate": 1.6055345802774525e-05, "loss": 2.8489, "step": 54600 }, { "epoch": 0.4, "learning_rate": 1.6048112948256162e-05, "loss": 2.8397, "step": 54700 }, { "epoch": 0.4, "learning_rate": 1.6040880093737795e-05, "loss": 2.8381, "step": 54800 }, { "epoch": 0.4, "learning_rate": 1.603364723921943e-05, "loss": 2.8319, "step": 54900 }, { "epoch": 0.4, "learning_rate": 1.6026414384701068e-05, "loss": 2.8437, "step": 55000 }, { "epoch": 0.4, "eval_accuracy": 0.45043900639077167, "eval_loss": 2.8520514965057373, "eval_runtime": 28.1189, "eval_samples_per_second": 230.557, "eval_steps_per_second": 2.418, "step": 55000 }, { "epoch": 0.4, "learning_rate": 1.6019181530182705e-05, "loss": 2.8415, "step": 55100 }, { "epoch": 0.4, "learning_rate": 1.601194867566434e-05, "loss": 2.8417, "step": 55200 }, { "epoch": 0.4, "learning_rate": 1.6004715821145974e-05, "loss": 2.8385, "step": 55300 }, { "epoch": 0.4, "learning_rate": 1.599748296662761e-05, "loss": 2.8361, "step": 55400 }, { "epoch": 0.4, "learning_rate": 1.5990250112109247e-05, "loss": 2.8356, "step": 55500 }, { "epoch": 0.4, "learning_rate": 1.5983017257590884e-05, "loss": 2.8419, "step": 55600 }, { "epoch": 0.4, "learning_rate": 1.5975784403072517e-05, "loss": 2.8403, "step": 55700 }, { "epoch": 0.4, "learning_rate": 1.5968551548554153e-05, "loss": 2.8414, "step": 55800 }, { "epoch": 0.4, "learning_rate": 1.596131869403579e-05, "loss": 2.8342, "step": 55900 }, { "epoch": 0.41, "learning_rate": 1.5954085839517426e-05, "loss": 2.845, "step": 56000 }, { "epoch": 0.41, "eval_accuracy": 0.45049465721812404, "eval_loss": 2.850494146347046, "eval_runtime": 27.8769, "eval_samples_per_second": 232.558, "eval_steps_per_second": 2.439, "step": 56000 }, { "epoch": 0.41, "learning_rate": 1.5946852984999063e-05, "loss": 2.8348, "step": 56100 }, { "epoch": 0.41, "learning_rate": 1.5939620130480696e-05, "loss": 2.8362, "step": 56200 }, { "epoch": 0.41, "learning_rate": 1.5932387275962333e-05, "loss": 2.8346, "step": 56300 }, { "epoch": 0.41, "learning_rate": 1.592515442144397e-05, "loss": 2.8384, "step": 56400 }, { "epoch": 0.41, "learning_rate": 1.5917921566925606e-05, "loss": 2.8455, "step": 56500 }, { "epoch": 0.41, "learning_rate": 1.5910761040952424e-05, "loss": 2.8325, "step": 56600 }, { "epoch": 0.41, "learning_rate": 1.590352818643406e-05, "loss": 2.8399, "step": 56700 }, { "epoch": 0.41, "learning_rate": 1.5896295331915693e-05, "loss": 2.8294, "step": 56800 }, { "epoch": 0.41, "learning_rate": 1.588906247739733e-05, "loss": 2.8307, "step": 56900 }, { "epoch": 0.41, "learning_rate": 1.5881829622878966e-05, "loss": 2.8218, "step": 57000 }, { "epoch": 0.41, "eval_accuracy": 0.4507807750587509, "eval_loss": 2.848620653152466, "eval_runtime": 29.8126, "eval_samples_per_second": 217.458, "eval_steps_per_second": 2.281, "step": 57000 }, { "epoch": 0.41, "learning_rate": 1.5874596768360603e-05, "loss": 2.8282, "step": 57100 }, { "epoch": 0.41, "learning_rate": 1.5867363913842236e-05, "loss": 2.8378, "step": 57200 }, { "epoch": 0.41, "learning_rate": 1.5860131059323872e-05, "loss": 2.8355, "step": 57300 }, { "epoch": 0.42, "learning_rate": 1.585289820480551e-05, "loss": 2.8298, "step": 57400 }, { "epoch": 0.42, "learning_rate": 1.5845665350287145e-05, "loss": 2.8378, "step": 57500 }, { "epoch": 0.42, "learning_rate": 1.5838432495768782e-05, "loss": 2.8199, "step": 57600 }, { "epoch": 0.42, "learning_rate": 1.58312719697956e-05, "loss": 2.8338, "step": 57700 }, { "epoch": 0.42, "learning_rate": 1.5824039115277237e-05, "loss": 2.8349, "step": 57800 }, { "epoch": 0.42, "learning_rate": 1.5816806260758873e-05, "loss": 2.829, "step": 57900 }, { "epoch": 0.42, "learning_rate": 1.580957340624051e-05, "loss": 2.8366, "step": 58000 }, { "epoch": 0.42, "eval_accuracy": 0.4509053845199965, "eval_loss": 2.8470458984375, "eval_runtime": 29.7851, "eval_samples_per_second": 217.659, "eval_steps_per_second": 2.283, "step": 58000 }, { "epoch": 0.42, "learning_rate": 1.5802340551722143e-05, "loss": 2.8331, "step": 58100 }, { "epoch": 0.42, "learning_rate": 1.579510769720378e-05, "loss": 2.8348, "step": 58200 }, { "epoch": 0.42, "learning_rate": 1.5787874842685416e-05, "loss": 2.8353, "step": 58300 }, { "epoch": 0.42, "learning_rate": 1.5780641988167052e-05, "loss": 2.8369, "step": 58400 }, { "epoch": 0.42, "learning_rate": 1.577340913364869e-05, "loss": 2.8315, "step": 58500 }, { "epoch": 0.42, "learning_rate": 1.5766176279130322e-05, "loss": 2.8344, "step": 58600 }, { "epoch": 0.42, "learning_rate": 1.5759015753157143e-05, "loss": 2.8307, "step": 58700 }, { "epoch": 0.43, "learning_rate": 1.575178289863878e-05, "loss": 2.8218, "step": 58800 }, { "epoch": 0.43, "learning_rate": 1.5744550044120413e-05, "loss": 2.8299, "step": 58900 }, { "epoch": 0.43, "learning_rate": 1.573731718960205e-05, "loss": 2.8339, "step": 59000 }, { "epoch": 0.43, "eval_accuracy": 0.45116549164783915, "eval_loss": 2.8453407287597656, "eval_runtime": 29.6273, "eval_samples_per_second": 218.818, "eval_steps_per_second": 2.295, "step": 59000 }, { "epoch": 0.43, "learning_rate": 1.5730084335083686e-05, "loss": 2.8327, "step": 59100 }, { "epoch": 0.43, "learning_rate": 1.5722851480565323e-05, "loss": 2.8305, "step": 59200 }, { "epoch": 0.43, "learning_rate": 1.571561862604696e-05, "loss": 2.8327, "step": 59300 }, { "epoch": 0.43, "learning_rate": 1.5708385771528592e-05, "loss": 2.8264, "step": 59400 }, { "epoch": 0.43, "learning_rate": 1.570115291701023e-05, "loss": 2.8368, "step": 59500 }, { "epoch": 0.43, "learning_rate": 1.5693920062491865e-05, "loss": 2.8295, "step": 59600 }, { "epoch": 0.43, "learning_rate": 1.5686687207973502e-05, "loss": 2.8248, "step": 59700 }, { "epoch": 0.43, "learning_rate": 1.5679454353455138e-05, "loss": 2.826, "step": 59800 }, { "epoch": 0.43, "learning_rate": 1.567222149893677e-05, "loss": 2.8306, "step": 59900 }, { "epoch": 0.43, "learning_rate": 1.5664988644418408e-05, "loss": 2.8338, "step": 60000 }, { "epoch": 0.43, "eval_accuracy": 0.4511189143249464, "eval_loss": 2.843701124191284, "eval_runtime": 30.181, "eval_samples_per_second": 214.804, "eval_steps_per_second": 2.253, "step": 60000 }, { "epoch": 0.43, "learning_rate": 1.5657755789900044e-05, "loss": 2.8309, "step": 60100 }, { "epoch": 0.44, "learning_rate": 1.5650595263926862e-05, "loss": 2.8288, "step": 60200 }, { "epoch": 0.44, "learning_rate": 1.56433624094085e-05, "loss": 2.8238, "step": 60300 }, { "epoch": 0.44, "learning_rate": 1.5636129554890132e-05, "loss": 2.8257, "step": 60400 }, { "epoch": 0.44, "learning_rate": 1.562889670037177e-05, "loss": 2.8334, "step": 60500 }, { "epoch": 0.44, "learning_rate": 1.5621663845853405e-05, "loss": 2.8232, "step": 60600 }, { "epoch": 0.44, "learning_rate": 1.561443099133504e-05, "loss": 2.8356, "step": 60700 }, { "epoch": 0.44, "learning_rate": 1.5607198136816678e-05, "loss": 2.8199, "step": 60800 }, { "epoch": 0.44, "learning_rate": 1.559996528229831e-05, "loss": 2.8215, "step": 60900 }, { "epoch": 0.44, "learning_rate": 1.5592732427779948e-05, "loss": 2.8237, "step": 61000 }, { "epoch": 0.44, "eval_accuracy": 0.4512713491998681, "eval_loss": 2.842045783996582, "eval_runtime": 33.0993, "eval_samples_per_second": 195.865, "eval_steps_per_second": 2.054, "step": 61000 }, { "epoch": 0.44, "learning_rate": 1.5585499573261584e-05, "loss": 2.8305, "step": 61100 }, { "epoch": 0.44, "learning_rate": 1.5578339047288406e-05, "loss": 2.823, "step": 61200 }, { "epoch": 0.44, "learning_rate": 1.557110619277004e-05, "loss": 2.839, "step": 61300 }, { "epoch": 0.44, "learning_rate": 1.5563873338251675e-05, "loss": 2.8265, "step": 61400 }, { "epoch": 0.44, "learning_rate": 1.5556640483733312e-05, "loss": 2.8256, "step": 61500 }, { "epoch": 0.45, "learning_rate": 1.554940762921495e-05, "loss": 2.8295, "step": 61600 }, { "epoch": 0.45, "learning_rate": 1.5542174774696585e-05, "loss": 2.8207, "step": 61700 }, { "epoch": 0.45, "learning_rate": 1.5534941920178218e-05, "loss": 2.8201, "step": 61800 }, { "epoch": 0.45, "learning_rate": 1.5527709065659855e-05, "loss": 2.8283, "step": 61900 }, { "epoch": 0.45, "learning_rate": 1.552047621114149e-05, "loss": 2.8334, "step": 62000 }, { "epoch": 0.45, "eval_accuracy": 0.45149939661195343, "eval_loss": 2.840452194213867, "eval_runtime": 29.9094, "eval_samples_per_second": 216.755, "eval_steps_per_second": 2.274, "step": 62000 }, { "epoch": 0.45, "learning_rate": 1.5513243356623128e-05, "loss": 2.8248, "step": 62100 }, { "epoch": 0.45, "learning_rate": 1.5506010502104764e-05, "loss": 2.8386, "step": 62200 }, { "epoch": 0.45, "learning_rate": 1.5498777647586397e-05, "loss": 2.8242, "step": 62300 }, { "epoch": 0.45, "learning_rate": 1.5491544793068034e-05, "loss": 2.8268, "step": 62400 }, { "epoch": 0.45, "learning_rate": 1.548431193854967e-05, "loss": 2.8286, "step": 62500 }, { "epoch": 0.45, "learning_rate": 1.5477079084031307e-05, "loss": 2.8274, "step": 62600 }, { "epoch": 0.45, "learning_rate": 1.546984622951294e-05, "loss": 2.8292, "step": 62700 }, { "epoch": 0.45, "learning_rate": 1.5462613374994576e-05, "loss": 2.8183, "step": 62800 }, { "epoch": 0.45, "learning_rate": 1.5455380520476213e-05, "loss": 2.8279, "step": 62900 }, { "epoch": 0.46, "learning_rate": 1.544814766595785e-05, "loss": 2.8229, "step": 63000 }, { "epoch": 0.46, "eval_accuracy": 0.4518381407784462, "eval_loss": 2.8387556076049805, "eval_runtime": 29.7722, "eval_samples_per_second": 217.753, "eval_steps_per_second": 2.284, "step": 63000 }, { "epoch": 0.46, "learning_rate": 1.5440914811439486e-05, "loss": 2.8227, "step": 63100 }, { "epoch": 0.46, "learning_rate": 1.543368195692112e-05, "loss": 2.8249, "step": 63200 }, { "epoch": 0.46, "learning_rate": 1.5426449102402756e-05, "loss": 2.8246, "step": 63300 }, { "epoch": 0.46, "learning_rate": 1.5419288576429574e-05, "loss": 2.8265, "step": 63400 }, { "epoch": 0.46, "learning_rate": 1.541205572191121e-05, "loss": 2.824, "step": 63500 }, { "epoch": 0.46, "learning_rate": 1.5404822867392847e-05, "loss": 2.8188, "step": 63600 }, { "epoch": 0.46, "learning_rate": 1.5397590012874483e-05, "loss": 2.8234, "step": 63700 }, { "epoch": 0.46, "learning_rate": 1.5390357158356116e-05, "loss": 2.8216, "step": 63800 }, { "epoch": 0.46, "learning_rate": 1.5383124303837753e-05, "loss": 2.8256, "step": 63900 }, { "epoch": 0.46, "learning_rate": 1.537589144931939e-05, "loss": 2.8214, "step": 64000 }, { "epoch": 0.46, "eval_accuracy": 0.45194218362958327, "eval_loss": 2.837294340133667, "eval_runtime": 33.0659, "eval_samples_per_second": 196.063, "eval_steps_per_second": 2.057, "step": 64000 }, { "epoch": 0.46, "learning_rate": 1.5368658594801026e-05, "loss": 2.8143, "step": 64100 }, { "epoch": 0.46, "learning_rate": 1.536142574028266e-05, "loss": 2.8199, "step": 64200 }, { "epoch": 0.47, "learning_rate": 1.5354192885764295e-05, "loss": 2.8217, "step": 64300 }, { "epoch": 0.47, "learning_rate": 1.5346960031245932e-05, "loss": 2.8202, "step": 64400 }, { "epoch": 0.47, "learning_rate": 1.533972717672757e-05, "loss": 2.8211, "step": 64500 }, { "epoch": 0.47, "learning_rate": 1.5332566650754386e-05, "loss": 2.8251, "step": 64600 }, { "epoch": 0.47, "learning_rate": 1.5325333796236023e-05, "loss": 2.8261, "step": 64700 }, { "epoch": 0.47, "learning_rate": 1.531810094171766e-05, "loss": 2.8217, "step": 64800 }, { "epoch": 0.47, "learning_rate": 1.5310868087199296e-05, "loss": 2.8227, "step": 64900 }, { "epoch": 0.47, "learning_rate": 1.5303635232680933e-05, "loss": 2.8245, "step": 65000 }, { "epoch": 0.47, "eval_accuracy": 0.45223193087199404, "eval_loss": 2.835636854171753, "eval_runtime": 29.5456, "eval_samples_per_second": 219.424, "eval_steps_per_second": 2.302, "step": 65000 }, { "epoch": 0.47, "learning_rate": 1.5296402378162566e-05, "loss": 2.8268, "step": 65100 }, { "epoch": 0.47, "learning_rate": 1.5289169523644202e-05, "loss": 2.8265, "step": 65200 }, { "epoch": 0.47, "learning_rate": 1.528193666912584e-05, "loss": 2.8221, "step": 65300 }, { "epoch": 0.47, "learning_rate": 1.5274703814607475e-05, "loss": 2.8193, "step": 65400 }, { "epoch": 0.47, "learning_rate": 1.5267470960089112e-05, "loss": 2.8281, "step": 65500 }, { "epoch": 0.47, "learning_rate": 1.526031043411593e-05, "loss": 2.8179, "step": 65600 }, { "epoch": 0.48, "learning_rate": 1.5253077579597566e-05, "loss": 2.8206, "step": 65700 }, { "epoch": 0.48, "learning_rate": 1.5245844725079201e-05, "loss": 2.8187, "step": 65800 }, { "epoch": 0.48, "learning_rate": 1.5238611870560838e-05, "loss": 2.8146, "step": 65900 }, { "epoch": 0.48, "learning_rate": 1.5231379016042474e-05, "loss": 2.822, "step": 66000 }, { "epoch": 0.48, "eval_accuracy": 0.4523559354329423, "eval_loss": 2.834322929382324, "eval_runtime": 27.8576, "eval_samples_per_second": 232.719, "eval_steps_per_second": 2.441, "step": 66000 }, { "epoch": 0.48, "learning_rate": 1.5224146161524109e-05, "loss": 2.8221, "step": 66100 }, { "epoch": 0.48, "learning_rate": 1.5216913307005745e-05, "loss": 2.8157, "step": 66200 }, { "epoch": 0.48, "learning_rate": 1.520968045248738e-05, "loss": 2.8233, "step": 66300 }, { "epoch": 0.48, "learning_rate": 1.5202447597969017e-05, "loss": 2.8273, "step": 66400 }, { "epoch": 0.48, "learning_rate": 1.5195214743450653e-05, "loss": 2.8149, "step": 66500 }, { "epoch": 0.48, "learning_rate": 1.5187981888932288e-05, "loss": 2.8215, "step": 66600 }, { "epoch": 0.48, "learning_rate": 1.5180821362959106e-05, "loss": 2.822, "step": 66700 }, { "epoch": 0.48, "learning_rate": 1.5173588508440743e-05, "loss": 2.8222, "step": 66800 }, { "epoch": 0.48, "learning_rate": 1.5166355653922378e-05, "loss": 2.8245, "step": 66900 }, { "epoch": 0.48, "learning_rate": 1.5159122799404014e-05, "loss": 2.8139, "step": 67000 }, { "epoch": 0.48, "eval_accuracy": 0.45257007013818945, "eval_loss": 2.833111524581909, "eval_runtime": 29.3281, "eval_samples_per_second": 221.051, "eval_steps_per_second": 2.319, "step": 67000 }, { "epoch": 0.49, "learning_rate": 1.5151889944885649e-05, "loss": 2.8134, "step": 67100 }, { "epoch": 0.49, "learning_rate": 1.5144657090367285e-05, "loss": 2.812, "step": 67200 }, { "epoch": 0.49, "learning_rate": 1.513742423584892e-05, "loss": 2.8225, "step": 67300 }, { "epoch": 0.49, "learning_rate": 1.5130191381330557e-05, "loss": 2.8177, "step": 67400 }, { "epoch": 0.49, "learning_rate": 1.5122958526812193e-05, "loss": 2.8167, "step": 67500 }, { "epoch": 0.49, "learning_rate": 1.5115725672293828e-05, "loss": 2.8189, "step": 67600 }, { "epoch": 0.49, "learning_rate": 1.5108565146320648e-05, "loss": 2.8239, "step": 67700 }, { "epoch": 0.49, "learning_rate": 1.5101332291802284e-05, "loss": 2.8216, "step": 67800 }, { "epoch": 0.49, "learning_rate": 1.509409943728392e-05, "loss": 2.8193, "step": 67900 }, { "epoch": 0.49, "learning_rate": 1.5086866582765556e-05, "loss": 2.8201, "step": 68000 }, { "epoch": 0.49, "eval_accuracy": 0.45263963367237997, "eval_loss": 2.8317487239837646, "eval_runtime": 30.249, "eval_samples_per_second": 214.321, "eval_steps_per_second": 2.248, "step": 68000 }, { "epoch": 0.49, "learning_rate": 1.5079633728247192e-05, "loss": 2.8189, "step": 68100 }, { "epoch": 0.49, "learning_rate": 1.5072400873728827e-05, "loss": 2.8271, "step": 68200 }, { "epoch": 0.49, "learning_rate": 1.5065168019210463e-05, "loss": 2.8229, "step": 68300 }, { "epoch": 0.49, "learning_rate": 1.5057935164692098e-05, "loss": 2.8144, "step": 68400 }, { "epoch": 0.5, "learning_rate": 1.5050702310173735e-05, "loss": 2.8177, "step": 68500 }, { "epoch": 0.5, "learning_rate": 1.5043469455655371e-05, "loss": 2.8075, "step": 68600 }, { "epoch": 0.5, "learning_rate": 1.5036236601137006e-05, "loss": 2.818, "step": 68700 }, { "epoch": 0.5, "learning_rate": 1.5029003746618643e-05, "loss": 2.8183, "step": 68800 }, { "epoch": 0.5, "learning_rate": 1.5021915549190644e-05, "loss": 2.8202, "step": 68900 }, { "epoch": 0.5, "learning_rate": 1.501468269467228e-05, "loss": 2.8132, "step": 69000 }, { "epoch": 0.5, "eval_accuracy": 0.4527346030190574, "eval_loss": 2.830460548400879, "eval_runtime": 31.7655, "eval_samples_per_second": 204.09, "eval_steps_per_second": 2.141, "step": 69000 }, { "epoch": 0.5, "learning_rate": 1.5007449840153915e-05, "loss": 2.8269, "step": 69100 }, { "epoch": 0.5, "learning_rate": 1.5000216985635552e-05, "loss": 2.8226, "step": 69200 }, { "epoch": 0.5, "learning_rate": 1.4992984131117188e-05, "loss": 2.8165, "step": 69300 }, { "epoch": 0.5, "learning_rate": 1.4985751276598823e-05, "loss": 2.8167, "step": 69400 }, { "epoch": 0.5, "learning_rate": 1.497851842208046e-05, "loss": 2.8201, "step": 69500 }, { "epoch": 0.5, "learning_rate": 1.4971285567562094e-05, "loss": 2.814, "step": 69600 }, { "epoch": 0.5, "learning_rate": 1.4964052713043731e-05, "loss": 2.811, "step": 69700 }, { "epoch": 0.5, "learning_rate": 1.4956819858525366e-05, "loss": 2.8127, "step": 69800 }, { "epoch": 0.51, "learning_rate": 1.4949587004007002e-05, "loss": 2.8146, "step": 69900 }, { "epoch": 0.51, "learning_rate": 1.4942354149488639e-05, "loss": 2.8138, "step": 70000 }, { "epoch": 0.51, "eval_accuracy": 0.45299531504719737, "eval_loss": 2.8289763927459717, "eval_runtime": 32.3608, "eval_samples_per_second": 200.335, "eval_steps_per_second": 2.101, "step": 70000 }, { "epoch": 0.51, "learning_rate": 1.4935121294970274e-05, "loss": 2.8153, "step": 70100 }, { "epoch": 0.51, "learning_rate": 1.492788844045191e-05, "loss": 2.8128, "step": 70200 }, { "epoch": 0.51, "learning_rate": 1.4920655585933545e-05, "loss": 2.8177, "step": 70300 }, { "epoch": 0.51, "learning_rate": 1.4913422731415181e-05, "loss": 2.8185, "step": 70400 }, { "epoch": 0.51, "learning_rate": 1.4906189876896818e-05, "loss": 2.8144, "step": 70500 }, { "epoch": 0.51, "learning_rate": 1.4898957022378453e-05, "loss": 2.8157, "step": 70600 }, { "epoch": 0.51, "learning_rate": 1.489172416786009e-05, "loss": 2.8074, "step": 70700 }, { "epoch": 0.51, "learning_rate": 1.4884491313341724e-05, "loss": 2.8124, "step": 70800 }, { "epoch": 0.51, "learning_rate": 1.487725845882336e-05, "loss": 2.8143, "step": 70900 }, { "epoch": 0.51, "learning_rate": 1.4870025604304995e-05, "loss": 2.8171, "step": 71000 }, { "epoch": 0.51, "eval_accuracy": 0.45300983265433276, "eval_loss": 2.8278744220733643, "eval_runtime": 29.4032, "eval_samples_per_second": 220.486, "eval_steps_per_second": 2.313, "step": 71000 }, { "epoch": 0.51, "learning_rate": 1.4862792749786632e-05, "loss": 2.8229, "step": 71100 }, { "epoch": 0.51, "learning_rate": 1.4855559895268268e-05, "loss": 2.8153, "step": 71200 }, { "epoch": 0.52, "learning_rate": 1.4848327040749903e-05, "loss": 2.8163, "step": 71300 }, { "epoch": 0.52, "learning_rate": 1.484109418623154e-05, "loss": 2.8162, "step": 71400 }, { "epoch": 0.52, "learning_rate": 1.4833861331713175e-05, "loss": 2.8081, "step": 71500 }, { "epoch": 0.52, "learning_rate": 1.4826628477194811e-05, "loss": 2.8158, "step": 71600 }, { "epoch": 0.52, "learning_rate": 1.4819467951221631e-05, "loss": 2.8137, "step": 71700 }, { "epoch": 0.52, "learning_rate": 1.4812235096703267e-05, "loss": 2.8065, "step": 71800 }, { "epoch": 0.52, "learning_rate": 1.4805002242184902e-05, "loss": 2.8151, "step": 71900 }, { "epoch": 0.52, "learning_rate": 1.4797769387666539e-05, "loss": 2.8123, "step": 72000 }, { "epoch": 0.52, "eval_accuracy": 0.4531562185262814, "eval_loss": 2.8267478942871094, "eval_runtime": 28.1731, "eval_samples_per_second": 230.113, "eval_steps_per_second": 2.414, "step": 72000 }, { "epoch": 0.52, "learning_rate": 1.4790536533148174e-05, "loss": 2.8118, "step": 72100 }, { "epoch": 0.52, "learning_rate": 1.478330367862981e-05, "loss": 2.8177, "step": 72200 }, { "epoch": 0.52, "learning_rate": 1.4776070824111447e-05, "loss": 2.8059, "step": 72300 }, { "epoch": 0.52, "learning_rate": 1.4768837969593081e-05, "loss": 2.8214, "step": 72400 }, { "epoch": 0.52, "learning_rate": 1.4761605115074718e-05, "loss": 2.8068, "step": 72500 }, { "epoch": 0.53, "learning_rate": 1.4754372260556353e-05, "loss": 2.8163, "step": 72600 }, { "epoch": 0.53, "learning_rate": 1.474713940603799e-05, "loss": 2.8139, "step": 72700 }, { "epoch": 0.53, "learning_rate": 1.4739978880064807e-05, "loss": 2.8139, "step": 72800 }, { "epoch": 0.53, "learning_rate": 1.4732746025546442e-05, "loss": 2.8151, "step": 72900 }, { "epoch": 0.53, "learning_rate": 1.4725513171028079e-05, "loss": 2.8118, "step": 73000 }, { "epoch": 0.53, "eval_accuracy": 0.45336127972706897, "eval_loss": 2.825540781021118, "eval_runtime": 30.4285, "eval_samples_per_second": 213.057, "eval_steps_per_second": 2.235, "step": 73000 }, { "epoch": 0.53, "learning_rate": 1.4718280316509715e-05, "loss": 2.8174, "step": 73100 }, { "epoch": 0.53, "learning_rate": 1.471104746199135e-05, "loss": 2.8049, "step": 73200 }, { "epoch": 0.53, "learning_rate": 1.4703814607472986e-05, "loss": 2.8129, "step": 73300 }, { "epoch": 0.53, "learning_rate": 1.4696581752954621e-05, "loss": 2.8127, "step": 73400 }, { "epoch": 0.53, "learning_rate": 1.4689348898436258e-05, "loss": 2.8125, "step": 73500 }, { "epoch": 0.53, "learning_rate": 1.4682116043917893e-05, "loss": 2.8173, "step": 73600 }, { "epoch": 0.53, "learning_rate": 1.4674883189399529e-05, "loss": 2.8136, "step": 73700 }, { "epoch": 0.53, "learning_rate": 1.4667722663426349e-05, "loss": 2.8116, "step": 73800 }, { "epoch": 0.53, "learning_rate": 1.4660489808907985e-05, "loss": 2.804, "step": 73900 }, { "epoch": 0.54, "learning_rate": 1.465325695438962e-05, "loss": 2.8183, "step": 74000 }, { "epoch": 0.54, "eval_accuracy": 0.45355484782220773, "eval_loss": 2.8243465423583984, "eval_runtime": 30.6984, "eval_samples_per_second": 211.184, "eval_steps_per_second": 2.215, "step": 74000 }, { "epoch": 0.54, "learning_rate": 1.4646024099871257e-05, "loss": 2.8043, "step": 74100 }, { "epoch": 0.54, "learning_rate": 1.4638791245352893e-05, "loss": 2.8099, "step": 74200 }, { "epoch": 0.54, "learning_rate": 1.4631558390834528e-05, "loss": 2.8101, "step": 74300 }, { "epoch": 0.54, "learning_rate": 1.4624325536316165e-05, "loss": 2.8051, "step": 74400 }, { "epoch": 0.54, "learning_rate": 1.46170926817978e-05, "loss": 2.8176, "step": 74500 }, { "epoch": 0.54, "learning_rate": 1.4609859827279436e-05, "loss": 2.826, "step": 74600 }, { "epoch": 0.54, "learning_rate": 1.460262697276107e-05, "loss": 2.7999, "step": 74700 }, { "epoch": 0.54, "learning_rate": 1.4595394118242707e-05, "loss": 2.8013, "step": 74800 }, { "epoch": 0.54, "learning_rate": 1.4588161263724344e-05, "loss": 2.8069, "step": 74900 }, { "epoch": 0.54, "learning_rate": 1.4580928409205979e-05, "loss": 2.8052, "step": 75000 }, { "epoch": 0.54, "eval_accuracy": 0.4536056594471816, "eval_loss": 2.8232624530792236, "eval_runtime": 33.2026, "eval_samples_per_second": 195.256, "eval_steps_per_second": 2.048, "step": 75000 }, { "epoch": 0.54, "learning_rate": 1.4573695554687615e-05, "loss": 2.8113, "step": 75100 }, { "epoch": 0.54, "learning_rate": 1.456646270016925e-05, "loss": 2.809, "step": 75200 }, { "epoch": 0.54, "learning_rate": 1.4559229845650886e-05, "loss": 2.8048, "step": 75300 }, { "epoch": 0.55, "learning_rate": 1.4551996991132521e-05, "loss": 2.8071, "step": 75400 }, { "epoch": 0.55, "learning_rate": 1.454483646515934e-05, "loss": 2.8094, "step": 75500 }, { "epoch": 0.55, "learning_rate": 1.4537603610640976e-05, "loss": 2.8038, "step": 75600 }, { "epoch": 0.55, "learning_rate": 1.4530370756122612e-05, "loss": 2.815, "step": 75700 }, { "epoch": 0.55, "learning_rate": 1.4523137901604247e-05, "loss": 2.8076, "step": 75800 }, { "epoch": 0.55, "learning_rate": 1.4515905047085884e-05, "loss": 2.8066, "step": 75900 }, { "epoch": 0.55, "learning_rate": 1.4508672192567518e-05, "loss": 2.8101, "step": 76000 }, { "epoch": 0.55, "eval_accuracy": 0.4537810805334011, "eval_loss": 2.8219878673553467, "eval_runtime": 29.3462, "eval_samples_per_second": 220.914, "eval_steps_per_second": 2.317, "step": 76000 }, { "epoch": 0.55, "learning_rate": 1.4501439338049155e-05, "loss": 2.8143, "step": 76100 }, { "epoch": 0.55, "learning_rate": 1.449420648353079e-05, "loss": 2.8062, "step": 76200 }, { "epoch": 0.55, "learning_rate": 1.4486973629012426e-05, "loss": 2.8041, "step": 76300 }, { "epoch": 0.55, "learning_rate": 1.4479740774494063e-05, "loss": 2.8018, "step": 76400 }, { "epoch": 0.55, "learning_rate": 1.4472507919975698e-05, "loss": 2.8006, "step": 76500 }, { "epoch": 0.55, "learning_rate": 1.4465275065457334e-05, "loss": 2.7967, "step": 76600 }, { "epoch": 0.55, "learning_rate": 1.4458042210938969e-05, "loss": 2.8028, "step": 76700 }, { "epoch": 0.56, "learning_rate": 1.4450809356420605e-05, "loss": 2.8051, "step": 76800 }, { "epoch": 0.56, "learning_rate": 1.444357650190224e-05, "loss": 2.8104, "step": 76900 }, { "epoch": 0.56, "learning_rate": 1.4436343647383877e-05, "loss": 2.8021, "step": 77000 }, { "epoch": 0.56, "eval_accuracy": 0.45396497022378285, "eval_loss": 2.8208632469177246, "eval_runtime": 29.8785, "eval_samples_per_second": 216.979, "eval_steps_per_second": 2.276, "step": 77000 }, { "epoch": 0.56, "learning_rate": 1.4429110792865513e-05, "loss": 2.7955, "step": 77100 }, { "epoch": 0.56, "learning_rate": 1.4421877938347148e-05, "loss": 2.7983, "step": 77200 }, { "epoch": 0.56, "learning_rate": 1.4414645083828785e-05, "loss": 2.8088, "step": 77300 }, { "epoch": 0.56, "learning_rate": 1.440741222931042e-05, "loss": 2.8086, "step": 77400 }, { "epoch": 0.56, "learning_rate": 1.4400179374792056e-05, "loss": 2.8189, "step": 77500 }, { "epoch": 0.56, "learning_rate": 1.4393018848818876e-05, "loss": 2.8047, "step": 77600 }, { "epoch": 0.56, "learning_rate": 1.4385785994300512e-05, "loss": 2.8058, "step": 77700 }, { "epoch": 0.56, "learning_rate": 1.4378553139782147e-05, "loss": 2.8104, "step": 77800 }, { "epoch": 0.56, "learning_rate": 1.4371320285263784e-05, "loss": 2.8071, "step": 77900 }, { "epoch": 0.56, "learning_rate": 1.4364159759290603e-05, "loss": 2.8076, "step": 78000 }, { "epoch": 0.56, "eval_accuracy": 0.4540236455526218, "eval_loss": 2.819603204727173, "eval_runtime": 28.0095, "eval_samples_per_second": 231.457, "eval_steps_per_second": 2.428, "step": 78000 }, { "epoch": 0.56, "learning_rate": 1.435692690477224e-05, "loss": 2.7967, "step": 78100 }, { "epoch": 0.57, "learning_rate": 1.4349694050253875e-05, "loss": 2.8074, "step": 78200 }, { "epoch": 0.57, "learning_rate": 1.4342461195735511e-05, "loss": 2.8124, "step": 78300 }, { "epoch": 0.57, "learning_rate": 1.4335228341217146e-05, "loss": 2.8052, "step": 78400 }, { "epoch": 0.57, "learning_rate": 1.4327995486698782e-05, "loss": 2.8116, "step": 78500 }, { "epoch": 0.57, "learning_rate": 1.4320762632180419e-05, "loss": 2.8076, "step": 78600 }, { "epoch": 0.57, "learning_rate": 1.4313529777662054e-05, "loss": 2.8153, "step": 78700 }, { "epoch": 0.57, "learning_rate": 1.430629692314369e-05, "loss": 2.7979, "step": 78800 }, { "epoch": 0.57, "learning_rate": 1.4299064068625325e-05, "loss": 2.8034, "step": 78900 }, { "epoch": 0.57, "learning_rate": 1.4291903542652143e-05, "loss": 2.7937, "step": 79000 }, { "epoch": 0.57, "eval_accuracy": 0.45417608042754354, "eval_loss": 2.8189663887023926, "eval_runtime": 29.6646, "eval_samples_per_second": 218.543, "eval_steps_per_second": 2.292, "step": 79000 }, { "epoch": 0.57, "learning_rate": 1.428467068813378e-05, "loss": 2.7999, "step": 79100 }, { "epoch": 0.57, "learning_rate": 1.4277437833615415e-05, "loss": 2.7967, "step": 79200 }, { "epoch": 0.57, "learning_rate": 1.4270204979097051e-05, "loss": 2.8032, "step": 79300 }, { "epoch": 0.57, "learning_rate": 1.4262972124578688e-05, "loss": 2.8101, "step": 79400 }, { "epoch": 0.58, "learning_rate": 1.4255739270060322e-05, "loss": 2.8052, "step": 79500 }, { "epoch": 0.58, "learning_rate": 1.4248506415541959e-05, "loss": 2.7995, "step": 79600 }, { "epoch": 0.58, "learning_rate": 1.4241273561023594e-05, "loss": 2.8048, "step": 79700 }, { "epoch": 0.58, "learning_rate": 1.423404070650523e-05, "loss": 2.8033, "step": 79800 }, { "epoch": 0.58, "learning_rate": 1.4226807851986865e-05, "loss": 2.8033, "step": 79900 }, { "epoch": 0.58, "learning_rate": 1.4219574997468502e-05, "loss": 2.8057, "step": 80000 }, { "epoch": 0.58, "eval_accuracy": 0.4541452305123808, "eval_loss": 2.8179192543029785, "eval_runtime": 27.9926, "eval_samples_per_second": 231.597, "eval_steps_per_second": 2.429, "step": 80000 }, { "epoch": 0.58, "learning_rate": 1.4212342142950138e-05, "loss": 2.8101, "step": 80100 }, { "epoch": 0.58, "learning_rate": 1.4205109288431773e-05, "loss": 2.8088, "step": 80200 }, { "epoch": 0.58, "learning_rate": 1.419787643391341e-05, "loss": 2.8033, "step": 80300 }, { "epoch": 0.58, "learning_rate": 1.4190643579395044e-05, "loss": 2.8072, "step": 80400 }, { "epoch": 0.58, "learning_rate": 1.418341072487668e-05, "loss": 2.8053, "step": 80500 }, { "epoch": 0.58, "learning_rate": 1.4176177870358316e-05, "loss": 2.8044, "step": 80600 }, { "epoch": 0.58, "learning_rate": 1.4168945015839952e-05, "loss": 2.8059, "step": 80700 }, { "epoch": 0.58, "learning_rate": 1.4161712161321589e-05, "loss": 2.7949, "step": 80800 }, { "epoch": 0.59, "learning_rate": 1.4154479306803223e-05, "loss": 2.8008, "step": 80900 }, { "epoch": 0.59, "learning_rate": 1.414724645228486e-05, "loss": 2.8082, "step": 81000 }, { "epoch": 0.59, "eval_accuracy": 0.45446219826817047, "eval_loss": 2.8168437480926514, "eval_runtime": 30.4701, "eval_samples_per_second": 212.766, "eval_steps_per_second": 2.232, "step": 81000 }, { "epoch": 0.59, "learning_rate": 1.4140013597766495e-05, "loss": 2.803, "step": 81100 }, { "epoch": 0.59, "learning_rate": 1.4132780743248131e-05, "loss": 2.806, "step": 81200 }, { "epoch": 0.59, "learning_rate": 1.4125620217274951e-05, "loss": 2.8034, "step": 81300 }, { "epoch": 0.59, "learning_rate": 1.4118387362756588e-05, "loss": 2.7982, "step": 81400 }, { "epoch": 0.59, "learning_rate": 1.4111154508238222e-05, "loss": 2.7955, "step": 81500 }, { "epoch": 0.59, "learning_rate": 1.4103921653719859e-05, "loss": 2.8114, "step": 81600 }, { "epoch": 0.59, "learning_rate": 1.4096688799201494e-05, "loss": 2.8103, "step": 81700 }, { "epoch": 0.59, "learning_rate": 1.408945594468313e-05, "loss": 2.7982, "step": 81800 }, { "epoch": 0.59, "learning_rate": 1.4082223090164767e-05, "loss": 2.8026, "step": 81900 }, { "epoch": 0.59, "learning_rate": 1.4074990235646402e-05, "loss": 2.7986, "step": 82000 }, { "epoch": 0.59, "eval_accuracy": 0.45461100374130836, "eval_loss": 2.815699577331543, "eval_runtime": 31.1592, "eval_samples_per_second": 208.061, "eval_steps_per_second": 2.182, "step": 82000 }, { "epoch": 0.59, "learning_rate": 1.4067757381128038e-05, "loss": 2.8006, "step": 82100 }, { "epoch": 0.59, "learning_rate": 1.4060524526609673e-05, "loss": 2.8056, "step": 82200 }, { "epoch": 0.6, "learning_rate": 1.405329167209131e-05, "loss": 2.7992, "step": 82300 }, { "epoch": 0.6, "learning_rate": 1.4046058817572946e-05, "loss": 2.8123, "step": 82400 }, { "epoch": 0.6, "learning_rate": 1.403882596305458e-05, "loss": 2.7977, "step": 82500 }, { "epoch": 0.6, "learning_rate": 1.4031593108536217e-05, "loss": 2.808, "step": 82600 }, { "epoch": 0.6, "learning_rate": 1.4024360254017852e-05, "loss": 2.7964, "step": 82700 }, { "epoch": 0.6, "learning_rate": 1.4017127399499489e-05, "loss": 2.7978, "step": 82800 }, { "epoch": 0.6, "learning_rate": 1.4009894544981123e-05, "loss": 2.7935, "step": 82900 }, { "epoch": 0.6, "learning_rate": 1.400266169046276e-05, "loss": 2.8062, "step": 83000 }, { "epoch": 0.6, "eval_accuracy": 0.45453962550622595, "eval_loss": 2.814990520477295, "eval_runtime": 29.5244, "eval_samples_per_second": 219.581, "eval_steps_per_second": 2.303, "step": 83000 }, { "epoch": 0.6, "learning_rate": 1.3995428835944396e-05, "loss": 2.7966, "step": 83100 }, { "epoch": 0.6, "learning_rate": 1.3988195981426031e-05, "loss": 2.803, "step": 83200 }, { "epoch": 0.6, "learning_rate": 1.3981107783998034e-05, "loss": 2.799, "step": 83300 }, { "epoch": 0.6, "learning_rate": 1.3973874929479669e-05, "loss": 2.801, "step": 83400 }, { "epoch": 0.6, "learning_rate": 1.3966642074961306e-05, "loss": 2.7902, "step": 83500 }, { "epoch": 0.6, "learning_rate": 1.395940922044294e-05, "loss": 2.7999, "step": 83600 }, { "epoch": 0.61, "learning_rate": 1.3952176365924577e-05, "loss": 2.8011, "step": 83700 }, { "epoch": 0.61, "learning_rate": 1.3944943511406213e-05, "loss": 2.8004, "step": 83800 }, { "epoch": 0.61, "learning_rate": 1.3937710656887848e-05, "loss": 2.7973, "step": 83900 }, { "epoch": 0.61, "learning_rate": 1.3930477802369485e-05, "loss": 2.7981, "step": 84000 }, { "epoch": 0.61, "eval_accuracy": 0.4545880175300106, "eval_loss": 2.813809871673584, "eval_runtime": 28.1508, "eval_samples_per_second": 230.295, "eval_steps_per_second": 2.416, "step": 84000 }, { "epoch": 0.61, "learning_rate": 1.392324494785112e-05, "loss": 2.7926, "step": 84100 }, { "epoch": 0.61, "learning_rate": 1.3916012093332756e-05, "loss": 2.7982, "step": 84200 }, { "epoch": 0.61, "learning_rate": 1.390877923881439e-05, "loss": 2.8091, "step": 84300 }, { "epoch": 0.61, "learning_rate": 1.3901618712841212e-05, "loss": 2.8055, "step": 84400 }, { "epoch": 0.61, "learning_rate": 1.389445818686803e-05, "loss": 2.7951, "step": 84500 }, { "epoch": 0.61, "learning_rate": 1.3887225332349665e-05, "loss": 2.8024, "step": 84600 }, { "epoch": 0.61, "learning_rate": 1.3879992477831302e-05, "loss": 2.7985, "step": 84700 }, { "epoch": 0.61, "learning_rate": 1.3872759623312936e-05, "loss": 2.7949, "step": 84800 }, { "epoch": 0.61, "learning_rate": 1.3865526768794573e-05, "loss": 2.7992, "step": 84900 }, { "epoch": 0.61, "learning_rate": 1.3858293914276208e-05, "loss": 2.8041, "step": 85000 }, { "epoch": 0.61, "eval_accuracy": 0.4546454830582549, "eval_loss": 2.8130455017089844, "eval_runtime": 29.7879, "eval_samples_per_second": 217.639, "eval_steps_per_second": 2.283, "step": 85000 }, { "epoch": 0.62, "learning_rate": 1.3851061059757844e-05, "loss": 2.8013, "step": 85100 }, { "epoch": 0.62, "learning_rate": 1.384382820523948e-05, "loss": 2.8052, "step": 85200 }, { "epoch": 0.62, "learning_rate": 1.3836595350721116e-05, "loss": 2.801, "step": 85300 }, { "epoch": 0.62, "learning_rate": 1.3829362496202752e-05, "loss": 2.8045, "step": 85400 }, { "epoch": 0.62, "learning_rate": 1.3822129641684387e-05, "loss": 2.8031, "step": 85500 }, { "epoch": 0.62, "learning_rate": 1.3814896787166023e-05, "loss": 2.7966, "step": 85600 }, { "epoch": 0.62, "learning_rate": 1.3807663932647658e-05, "loss": 2.7934, "step": 85700 }, { "epoch": 0.62, "learning_rate": 1.3800431078129295e-05, "loss": 2.7978, "step": 85800 }, { "epoch": 0.62, "learning_rate": 1.3793198223610931e-05, "loss": 2.7973, "step": 85900 }, { "epoch": 0.62, "learning_rate": 1.3785965369092566e-05, "loss": 2.7978, "step": 86000 }, { "epoch": 0.62, "eval_accuracy": 0.4548626422649887, "eval_loss": 2.8118443489074707, "eval_runtime": 29.2336, "eval_samples_per_second": 221.765, "eval_steps_per_second": 2.326, "step": 86000 }, { "epoch": 0.62, "learning_rate": 1.3778732514574203e-05, "loss": 2.798, "step": 86100 }, { "epoch": 0.62, "learning_rate": 1.3771499660055837e-05, "loss": 2.7967, "step": 86200 }, { "epoch": 0.62, "learning_rate": 1.3764266805537474e-05, "loss": 2.8004, "step": 86300 }, { "epoch": 0.62, "learning_rate": 1.375703395101911e-05, "loss": 2.7957, "step": 86400 }, { "epoch": 0.63, "learning_rate": 1.3749801096500745e-05, "loss": 2.7932, "step": 86500 }, { "epoch": 0.63, "learning_rate": 1.3742568241982382e-05, "loss": 2.8002, "step": 86600 }, { "epoch": 0.63, "learning_rate": 1.3735335387464017e-05, "loss": 2.8027, "step": 86700 }, { "epoch": 0.63, "learning_rate": 1.3728102532945653e-05, "loss": 2.8001, "step": 86800 }, { "epoch": 0.63, "learning_rate": 1.3720869678427288e-05, "loss": 2.7988, "step": 86900 }, { "epoch": 0.63, "learning_rate": 1.3713636823908925e-05, "loss": 2.8016, "step": 87000 }, { "epoch": 0.63, "eval_accuracy": 0.45493644010126033, "eval_loss": 2.8109002113342285, "eval_runtime": 29.8642, "eval_samples_per_second": 217.082, "eval_steps_per_second": 2.277, "step": 87000 }, { "epoch": 0.63, "learning_rate": 1.3706476297935744e-05, "loss": 2.7946, "step": 87100 }, { "epoch": 0.63, "learning_rate": 1.369924344341738e-05, "loss": 2.794, "step": 87200 }, { "epoch": 0.63, "learning_rate": 1.3692010588899016e-05, "loss": 2.8011, "step": 87300 }, { "epoch": 0.63, "learning_rate": 1.3684777734380652e-05, "loss": 2.7973, "step": 87400 }, { "epoch": 0.63, "learning_rate": 1.3677544879862289e-05, "loss": 2.8076, "step": 87500 }, { "epoch": 0.63, "learning_rate": 1.3670312025343923e-05, "loss": 2.8012, "step": 87600 }, { "epoch": 0.63, "learning_rate": 1.366307917082556e-05, "loss": 2.7916, "step": 87700 }, { "epoch": 0.64, "learning_rate": 1.3655846316307195e-05, "loss": 2.798, "step": 87800 }, { "epoch": 0.64, "learning_rate": 1.3648613461788831e-05, "loss": 2.7971, "step": 87900 }, { "epoch": 0.64, "learning_rate": 1.3641380607270466e-05, "loss": 2.7901, "step": 88000 }, { "epoch": 0.64, "eval_accuracy": 0.4551348473987775, "eval_loss": 2.809919834136963, "eval_runtime": 30.2675, "eval_samples_per_second": 214.19, "eval_steps_per_second": 2.247, "step": 88000 }, { "epoch": 0.64, "learning_rate": 1.3634220081297288e-05, "loss": 2.8022, "step": 88100 }, { "epoch": 0.64, "learning_rate": 1.3626987226778922e-05, "loss": 2.7927, "step": 88200 }, { "epoch": 0.64, "learning_rate": 1.3619754372260559e-05, "loss": 2.7951, "step": 88300 }, { "epoch": 0.64, "learning_rate": 1.3612521517742194e-05, "loss": 2.7995, "step": 88400 }, { "epoch": 0.64, "learning_rate": 1.360528866322383e-05, "loss": 2.7966, "step": 88500 }, { "epoch": 0.64, "learning_rate": 1.3598055808705463e-05, "loss": 2.7882, "step": 88600 }, { "epoch": 0.64, "learning_rate": 1.35908229541871e-05, "loss": 2.7933, "step": 88700 }, { "epoch": 0.64, "learning_rate": 1.3583590099668735e-05, "loss": 2.7965, "step": 88800 }, { "epoch": 0.64, "learning_rate": 1.3576357245150371e-05, "loss": 2.7948, "step": 88900 }, { "epoch": 0.64, "learning_rate": 1.3569124390632008e-05, "loss": 2.8075, "step": 89000 }, { "epoch": 0.64, "eval_accuracy": 0.4553132929864835, "eval_loss": 2.809250593185425, "eval_runtime": 29.5022, "eval_samples_per_second": 219.746, "eval_steps_per_second": 2.305, "step": 89000 }, { "epoch": 0.64, "learning_rate": 1.3561891536113643e-05, "loss": 2.7911, "step": 89100 }, { "epoch": 0.65, "learning_rate": 1.3554658681595279e-05, "loss": 2.7966, "step": 89200 }, { "epoch": 0.65, "learning_rate": 1.3547425827076914e-05, "loss": 2.7951, "step": 89300 }, { "epoch": 0.65, "learning_rate": 1.354019297255855e-05, "loss": 2.7992, "step": 89400 }, { "epoch": 0.65, "learning_rate": 1.3532960118040185e-05, "loss": 2.7913, "step": 89500 }, { "epoch": 0.65, "learning_rate": 1.3525727263521822e-05, "loss": 2.7985, "step": 89600 }, { "epoch": 0.65, "learning_rate": 1.3518494409003458e-05, "loss": 2.7968, "step": 89700 }, { "epoch": 0.65, "learning_rate": 1.3511261554485093e-05, "loss": 2.7985, "step": 89800 }, { "epoch": 0.65, "learning_rate": 1.350402869996673e-05, "loss": 2.7937, "step": 89900 }, { "epoch": 0.65, "learning_rate": 1.3496795845448364e-05, "loss": 2.7915, "step": 90000 }, { "epoch": 0.65, "eval_accuracy": 0.4552001766308868, "eval_loss": 2.808422327041626, "eval_runtime": 29.3832, "eval_samples_per_second": 220.636, "eval_steps_per_second": 2.314, "step": 90000 }, { "epoch": 0.65, "learning_rate": 1.3489562990930001e-05, "loss": 2.7934, "step": 90100 }, { "epoch": 0.65, "learning_rate": 1.348240246495682e-05, "loss": 2.7959, "step": 90200 }, { "epoch": 0.65, "learning_rate": 1.3475169610438457e-05, "loss": 2.8034, "step": 90300 }, { "epoch": 0.65, "learning_rate": 1.3467936755920092e-05, "loss": 2.7908, "step": 90400 }, { "epoch": 0.65, "learning_rate": 1.3460703901401728e-05, "loss": 2.7904, "step": 90500 }, { "epoch": 0.66, "learning_rate": 1.3453543375428548e-05, "loss": 2.789, "step": 90600 }, { "epoch": 0.66, "learning_rate": 1.3446310520910185e-05, "loss": 2.7961, "step": 90700 }, { "epoch": 0.66, "learning_rate": 1.343907766639182e-05, "loss": 2.7939, "step": 90800 }, { "epoch": 0.66, "learning_rate": 1.3431844811873456e-05, "loss": 2.7922, "step": 90900 }, { "epoch": 0.66, "learning_rate": 1.3424611957355091e-05, "loss": 2.7916, "step": 91000 }, { "epoch": 0.66, "eval_accuracy": 0.4554560494566483, "eval_loss": 2.807447910308838, "eval_runtime": 30.8057, "eval_samples_per_second": 210.448, "eval_steps_per_second": 2.207, "step": 91000 }, { "epoch": 0.66, "learning_rate": 1.3417379102836727e-05, "loss": 2.7855, "step": 91100 }, { "epoch": 0.66, "learning_rate": 1.3410146248318364e-05, "loss": 2.8014, "step": 91200 }, { "epoch": 0.66, "learning_rate": 1.3402913393799999e-05, "loss": 2.7801, "step": 91300 }, { "epoch": 0.66, "learning_rate": 1.3395680539281635e-05, "loss": 2.7898, "step": 91400 }, { "epoch": 0.66, "learning_rate": 1.338844768476327e-05, "loss": 2.7983, "step": 91500 }, { "epoch": 0.66, "learning_rate": 1.3381214830244907e-05, "loss": 2.7945, "step": 91600 }, { "epoch": 0.66, "learning_rate": 1.3373981975726541e-05, "loss": 2.7854, "step": 91700 }, { "epoch": 0.66, "learning_rate": 1.3366749121208178e-05, "loss": 2.7905, "step": 91800 }, { "epoch": 0.66, "learning_rate": 1.3359516266689814e-05, "loss": 2.7875, "step": 91900 }, { "epoch": 0.67, "learning_rate": 1.335228341217145e-05, "loss": 2.7751, "step": 92000 }, { "epoch": 0.67, "eval_accuracy": 0.4554330632453506, "eval_loss": 2.806763172149658, "eval_runtime": 32.0762, "eval_samples_per_second": 202.113, "eval_steps_per_second": 2.12, "step": 92000 }, { "epoch": 0.67, "learning_rate": 1.3345050557653086e-05, "loss": 2.7943, "step": 92100 }, { "epoch": 0.67, "learning_rate": 1.333781770313472e-05, "loss": 2.7919, "step": 92200 }, { "epoch": 0.67, "learning_rate": 1.3330584848616357e-05, "loss": 2.7911, "step": 92300 }, { "epoch": 0.67, "learning_rate": 1.3323351994097994e-05, "loss": 2.789, "step": 92400 }, { "epoch": 0.67, "learning_rate": 1.3316119139579628e-05, "loss": 2.7876, "step": 92500 }, { "epoch": 0.67, "learning_rate": 1.3308886285061265e-05, "loss": 2.7861, "step": 92600 }, { "epoch": 0.67, "learning_rate": 1.33016534305429e-05, "loss": 2.7807, "step": 92700 }, { "epoch": 0.67, "learning_rate": 1.3294420576024536e-05, "loss": 2.8013, "step": 92800 }, { "epoch": 0.67, "learning_rate": 1.3287260050051354e-05, "loss": 2.7933, "step": 92900 }, { "epoch": 0.67, "learning_rate": 1.3280027195532989e-05, "loss": 2.7896, "step": 93000 }, { "epoch": 0.67, "eval_accuracy": 0.45561574313513775, "eval_loss": 2.8058676719665527, "eval_runtime": 29.3079, "eval_samples_per_second": 221.203, "eval_steps_per_second": 2.32, "step": 93000 }, { "epoch": 0.67, "learning_rate": 1.3272794341014626e-05, "loss": 2.7916, "step": 93100 }, { "epoch": 0.67, "learning_rate": 1.326556148649626e-05, "loss": 2.7959, "step": 93200 }, { "epoch": 0.67, "learning_rate": 1.3258328631977897e-05, "loss": 2.7946, "step": 93300 }, { "epoch": 0.68, "learning_rate": 1.3251095777459533e-05, "loss": 2.789, "step": 93400 }, { "epoch": 0.68, "learning_rate": 1.3243862922941168e-05, "loss": 2.7914, "step": 93500 }, { "epoch": 0.68, "learning_rate": 1.3236630068422805e-05, "loss": 2.7956, "step": 93600 }, { "epoch": 0.68, "learning_rate": 1.322939721390444e-05, "loss": 2.7945, "step": 93700 }, { "epoch": 0.68, "learning_rate": 1.3222164359386076e-05, "loss": 2.7877, "step": 93800 }, { "epoch": 0.68, "learning_rate": 1.3214931504867713e-05, "loss": 2.7865, "step": 93900 }, { "epoch": 0.68, "learning_rate": 1.3207698650349347e-05, "loss": 2.7886, "step": 94000 }, { "epoch": 0.68, "eval_accuracy": 0.45565748125565203, "eval_loss": 2.8051185607910156, "eval_runtime": 29.7995, "eval_samples_per_second": 217.554, "eval_steps_per_second": 2.282, "step": 94000 }, { "epoch": 0.68, "learning_rate": 1.3200538124376167e-05, "loss": 2.7942, "step": 94100 }, { "epoch": 0.68, "learning_rate": 1.3193305269857804e-05, "loss": 2.7838, "step": 94200 }, { "epoch": 0.68, "learning_rate": 1.3186072415339439e-05, "loss": 2.7905, "step": 94300 }, { "epoch": 0.68, "learning_rate": 1.3178839560821075e-05, "loss": 2.7868, "step": 94400 }, { "epoch": 0.68, "learning_rate": 1.3171606706302712e-05, "loss": 2.7851, "step": 94500 }, { "epoch": 0.68, "learning_rate": 1.3164373851784346e-05, "loss": 2.7934, "step": 94600 }, { "epoch": 0.68, "learning_rate": 1.3157140997265983e-05, "loss": 2.789, "step": 94700 }, { "epoch": 0.69, "learning_rate": 1.3149908142747618e-05, "loss": 2.7892, "step": 94800 }, { "epoch": 0.69, "learning_rate": 1.3142675288229254e-05, "loss": 2.7828, "step": 94900 }, { "epoch": 0.69, "learning_rate": 1.3135442433710889e-05, "loss": 2.7909, "step": 95000 }, { "epoch": 0.69, "eval_accuracy": 0.45570647817973403, "eval_loss": 2.804401397705078, "eval_runtime": 30.1768, "eval_samples_per_second": 214.834, "eval_steps_per_second": 2.253, "step": 95000 }, { "epoch": 0.69, "learning_rate": 1.3128209579192526e-05, "loss": 2.7872, "step": 95100 }, { "epoch": 0.69, "learning_rate": 1.3121049053219344e-05, "loss": 2.7878, "step": 95200 }, { "epoch": 0.69, "learning_rate": 1.3113888527246163e-05, "loss": 2.7852, "step": 95300 }, { "epoch": 0.69, "learning_rate": 1.31066556727278e-05, "loss": 2.7896, "step": 95400 }, { "epoch": 0.69, "learning_rate": 1.3099422818209435e-05, "loss": 2.7886, "step": 95500 }, { "epoch": 0.69, "learning_rate": 1.3092189963691071e-05, "loss": 2.7876, "step": 95600 }, { "epoch": 0.69, "learning_rate": 1.3084957109172706e-05, "loss": 2.7866, "step": 95700 }, { "epoch": 0.69, "learning_rate": 1.3077724254654343e-05, "loss": 2.791, "step": 95800 }, { "epoch": 0.69, "learning_rate": 1.3070491400135979e-05, "loss": 2.7915, "step": 95900 }, { "epoch": 0.69, "learning_rate": 1.3063258545617614e-05, "loss": 2.7926, "step": 96000 }, { "epoch": 0.69, "eval_accuracy": 0.45580084262611414, "eval_loss": 2.803481101989746, "eval_runtime": 30.9308, "eval_samples_per_second": 209.597, "eval_steps_per_second": 2.198, "step": 96000 }, { "epoch": 0.7, "learning_rate": 1.305602569109925e-05, "loss": 2.7844, "step": 96100 }, { "epoch": 0.7, "learning_rate": 1.3048792836580885e-05, "loss": 2.7857, "step": 96200 }, { "epoch": 0.7, "learning_rate": 1.3041559982062522e-05, "loss": 2.7882, "step": 96300 }, { "epoch": 0.7, "learning_rate": 1.3034327127544158e-05, "loss": 2.7929, "step": 96400 }, { "epoch": 0.7, "learning_rate": 1.3027094273025793e-05, "loss": 2.7979, "step": 96500 }, { "epoch": 0.7, "learning_rate": 1.301986141850743e-05, "loss": 2.798, "step": 96600 }, { "epoch": 0.7, "learning_rate": 1.3012628563989064e-05, "loss": 2.7847, "step": 96700 }, { "epoch": 0.7, "learning_rate": 1.3005395709470701e-05, "loss": 2.7885, "step": 96800 }, { "epoch": 0.7, "learning_rate": 1.2998162854952336e-05, "loss": 2.7781, "step": 96900 }, { "epoch": 0.7, "learning_rate": 1.2990930000433972e-05, "loss": 2.7931, "step": 97000 }, { "epoch": 0.7, "eval_accuracy": 0.45598533721679324, "eval_loss": 2.802797317504883, "eval_runtime": 30.7179, "eval_samples_per_second": 211.05, "eval_steps_per_second": 2.214, "step": 97000 }, { "epoch": 0.7, "learning_rate": 1.2983769474460792e-05, "loss": 2.7851, "step": 97100 }, { "epoch": 0.7, "learning_rate": 1.2976536619942428e-05, "loss": 2.7835, "step": 97200 }, { "epoch": 0.7, "learning_rate": 1.2969303765424063e-05, "loss": 2.7948, "step": 97300 }, { "epoch": 0.7, "learning_rate": 1.29620709109057e-05, "loss": 2.7872, "step": 97400 }, { "epoch": 0.71, "learning_rate": 1.2954838056387336e-05, "loss": 2.7882, "step": 97500 }, { "epoch": 0.71, "learning_rate": 1.2947605201868971e-05, "loss": 2.792, "step": 97600 }, { "epoch": 0.71, "learning_rate": 1.2940372347350608e-05, "loss": 2.7907, "step": 97700 }, { "epoch": 0.71, "learning_rate": 1.2933139492832242e-05, "loss": 2.7916, "step": 97800 }, { "epoch": 0.71, "learning_rate": 1.2925906638313879e-05, "loss": 2.7805, "step": 97900 }, { "epoch": 0.71, "learning_rate": 1.2918673783795514e-05, "loss": 2.7838, "step": 98000 }, { "epoch": 0.71, "eval_accuracy": 0.4561528945991477, "eval_loss": 2.802030563354492, "eval_runtime": 29.961, "eval_samples_per_second": 216.381, "eval_steps_per_second": 2.27, "step": 98000 }, { "epoch": 0.71, "learning_rate": 1.291144092927715e-05, "loss": 2.7927, "step": 98100 }, { "epoch": 0.71, "learning_rate": 1.2904208074758787e-05, "loss": 2.7861, "step": 98200 }, { "epoch": 0.71, "learning_rate": 1.2896975220240422e-05, "loss": 2.7933, "step": 98300 }, { "epoch": 0.71, "learning_rate": 1.2889742365722058e-05, "loss": 2.785, "step": 98400 }, { "epoch": 0.71, "learning_rate": 1.2882509511203693e-05, "loss": 2.7827, "step": 98500 }, { "epoch": 0.71, "learning_rate": 1.287527665668533e-05, "loss": 2.7861, "step": 98600 }, { "epoch": 0.71, "learning_rate": 1.2868043802166964e-05, "loss": 2.7901, "step": 98700 }, { "epoch": 0.71, "learning_rate": 1.28608109476486e-05, "loss": 2.7939, "step": 98800 }, { "epoch": 0.72, "learning_rate": 1.2853578093130237e-05, "loss": 2.7834, "step": 98900 }, { "epoch": 0.72, "learning_rate": 1.2846489895702239e-05, "loss": 2.779, "step": 99000 }, { "epoch": 0.72, "eval_accuracy": 0.45607909676287606, "eval_loss": 2.8013815879821777, "eval_runtime": 29.8613, "eval_samples_per_second": 217.104, "eval_steps_per_second": 2.277, "step": 99000 }, { "epoch": 0.72, "learning_rate": 1.2839257041183875e-05, "loss": 2.7808, "step": 99100 }, { "epoch": 0.72, "learning_rate": 1.283202418666551e-05, "loss": 2.7838, "step": 99200 }, { "epoch": 0.72, "learning_rate": 1.2824791332147146e-05, "loss": 2.7794, "step": 99300 }, { "epoch": 0.72, "learning_rate": 1.2817558477628781e-05, "loss": 2.7863, "step": 99400 }, { "epoch": 0.72, "learning_rate": 1.2810325623110418e-05, "loss": 2.7833, "step": 99500 }, { "epoch": 0.72, "learning_rate": 1.2803092768592054e-05, "loss": 2.7833, "step": 99600 }, { "epoch": 0.72, "learning_rate": 1.2795859914073689e-05, "loss": 2.7824, "step": 99700 }, { "epoch": 0.72, "learning_rate": 1.2788627059555326e-05, "loss": 2.792, "step": 99800 }, { "epoch": 0.72, "learning_rate": 1.278139420503696e-05, "loss": 2.7919, "step": 99900 }, { "epoch": 0.72, "learning_rate": 1.2774161350518597e-05, "loss": 2.7922, "step": 100000 }, { "epoch": 0.72, "eval_accuracy": 0.4561583387018235, "eval_loss": 2.8006463050842285, "eval_runtime": 28.183, "eval_samples_per_second": 230.032, "eval_steps_per_second": 2.413, "step": 100000 }, { "epoch": 0.72, "learning_rate": 1.2766928496000233e-05, "loss": 2.7826, "step": 100100 }, { "epoch": 0.72, "learning_rate": 1.2759695641481868e-05, "loss": 2.788, "step": 100200 }, { "epoch": 0.73, "learning_rate": 1.2752462786963505e-05, "loss": 2.7929, "step": 100300 }, { "epoch": 0.73, "learning_rate": 1.274522993244514e-05, "loss": 2.7889, "step": 100400 }, { "epoch": 0.73, "learning_rate": 1.2737997077926776e-05, "loss": 2.7805, "step": 100500 }, { "epoch": 0.73, "learning_rate": 1.2730764223408411e-05, "loss": 2.7857, "step": 100600 }, { "epoch": 0.73, "learning_rate": 1.2723531368890047e-05, "loss": 2.7963, "step": 100700 }, { "epoch": 0.73, "learning_rate": 1.2716298514371684e-05, "loss": 2.7759, "step": 100800 }, { "epoch": 0.73, "learning_rate": 1.2709065659853319e-05, "loss": 2.784, "step": 100900 }, { "epoch": 0.73, "learning_rate": 1.2701832805334955e-05, "loss": 2.7786, "step": 101000 }, { "epoch": 0.73, "eval_accuracy": 0.4561758808104454, "eval_loss": 2.7999138832092285, "eval_runtime": 29.8581, "eval_samples_per_second": 217.127, "eval_steps_per_second": 2.277, "step": 101000 }, { "epoch": 0.73, "learning_rate": 1.269459995081659e-05, "loss": 2.7861, "step": 101100 }, { "epoch": 0.73, "learning_rate": 1.2687367096298227e-05, "loss": 2.7875, "step": 101200 }, { "epoch": 0.73, "learning_rate": 1.2680134241779861e-05, "loss": 2.7815, "step": 101300 }, { "epoch": 0.73, "learning_rate": 1.2672901387261498e-05, "loss": 2.7838, "step": 101400 }, { "epoch": 0.73, "learning_rate": 1.2665668532743135e-05, "loss": 2.7861, "step": 101500 }, { "epoch": 0.73, "learning_rate": 1.265843567822477e-05, "loss": 2.7867, "step": 101600 }, { "epoch": 0.74, "learning_rate": 1.2651202823706406e-05, "loss": 2.7878, "step": 101700 }, { "epoch": 0.74, "learning_rate": 1.264396996918804e-05, "loss": 2.7861, "step": 101800 }, { "epoch": 0.74, "learning_rate": 1.2636809443214859e-05, "loss": 2.7844, "step": 101900 }, { "epoch": 0.74, "learning_rate": 1.2629576588696495e-05, "loss": 2.7791, "step": 102000 }, { "epoch": 0.74, "eval_accuracy": 0.45630411967347484, "eval_loss": 2.7991721630096436, "eval_runtime": 29.6469, "eval_samples_per_second": 218.674, "eval_steps_per_second": 2.294, "step": 102000 }, { "epoch": 0.74, "learning_rate": 1.262234373417813e-05, "loss": 2.7853, "step": 102100 }, { "epoch": 0.74, "learning_rate": 1.2615110879659767e-05, "loss": 2.7833, "step": 102200 }, { "epoch": 0.74, "learning_rate": 1.2607878025141403e-05, "loss": 2.7832, "step": 102300 }, { "epoch": 0.74, "learning_rate": 1.2600645170623038e-05, "loss": 2.778, "step": 102400 }, { "epoch": 0.74, "learning_rate": 1.2593412316104674e-05, "loss": 2.7849, "step": 102500 }, { "epoch": 0.74, "learning_rate": 1.258617946158631e-05, "loss": 2.7798, "step": 102600 }, { "epoch": 0.74, "learning_rate": 1.2578946607067946e-05, "loss": 2.7834, "step": 102700 }, { "epoch": 0.74, "learning_rate": 1.257171375254958e-05, "loss": 2.7792, "step": 102800 }, { "epoch": 0.74, "learning_rate": 1.2564480898031217e-05, "loss": 2.7889, "step": 102900 }, { "epoch": 0.74, "learning_rate": 1.2557248043512854e-05, "loss": 2.7908, "step": 103000 }, { "epoch": 0.74, "eval_accuracy": 0.4565454748921009, "eval_loss": 2.798401117324829, "eval_runtime": 29.5641, "eval_samples_per_second": 219.287, "eval_steps_per_second": 2.3, "step": 103000 }, { "epoch": 0.75, "learning_rate": 1.2550087517539673e-05, "loss": 2.7769, "step": 103100 }, { "epoch": 0.75, "learning_rate": 1.2542854663021308e-05, "loss": 2.7866, "step": 103200 }, { "epoch": 0.75, "learning_rate": 1.2535621808502945e-05, "loss": 2.7823, "step": 103300 }, { "epoch": 0.75, "learning_rate": 1.2528388953984581e-05, "loss": 2.785, "step": 103400 }, { "epoch": 0.75, "learning_rate": 1.2521156099466216e-05, "loss": 2.7782, "step": 103500 }, { "epoch": 0.75, "learning_rate": 1.2513923244947853e-05, "loss": 2.7795, "step": 103600 }, { "epoch": 0.75, "learning_rate": 1.2506690390429487e-05, "loss": 2.7857, "step": 103700 }, { "epoch": 0.75, "learning_rate": 1.2499457535911124e-05, "loss": 2.7851, "step": 103800 }, { "epoch": 0.75, "learning_rate": 1.2492224681392759e-05, "loss": 2.7816, "step": 103900 }, { "epoch": 0.75, "learning_rate": 1.2484991826874395e-05, "loss": 2.7872, "step": 104000 }, { "epoch": 0.75, "eval_accuracy": 0.45662834623283216, "eval_loss": 2.7977957725524902, "eval_runtime": 28.0059, "eval_samples_per_second": 231.487, "eval_steps_per_second": 2.428, "step": 104000 }, { "epoch": 0.75, "learning_rate": 1.2477758972356032e-05, "loss": 2.7802, "step": 104100 }, { "epoch": 0.75, "learning_rate": 1.2470526117837667e-05, "loss": 2.7833, "step": 104200 }, { "epoch": 0.75, "learning_rate": 1.2463365591864486e-05, "loss": 2.779, "step": 104300 }, { "epoch": 0.76, "learning_rate": 1.2456132737346123e-05, "loss": 2.7811, "step": 104400 }, { "epoch": 0.76, "learning_rate": 1.244889988282776e-05, "loss": 2.7838, "step": 104500 }, { "epoch": 0.76, "learning_rate": 1.2441667028309394e-05, "loss": 2.7762, "step": 104600 }, { "epoch": 0.76, "learning_rate": 1.243443417379103e-05, "loss": 2.781, "step": 104700 }, { "epoch": 0.76, "learning_rate": 1.2427201319272665e-05, "loss": 2.7898, "step": 104800 }, { "epoch": 0.76, "learning_rate": 1.2419968464754302e-05, "loss": 2.7823, "step": 104900 }, { "epoch": 0.76, "learning_rate": 1.2412735610235937e-05, "loss": 2.7763, "step": 105000 }, { "epoch": 0.76, "eval_accuracy": 0.45665980104829224, "eval_loss": 2.7971575260162354, "eval_runtime": 29.269, "eval_samples_per_second": 221.497, "eval_steps_per_second": 2.323, "step": 105000 }, { "epoch": 0.76, "learning_rate": 1.2405502755717573e-05, "loss": 2.7825, "step": 105100 }, { "epoch": 0.76, "learning_rate": 1.239826990119921e-05, "loss": 2.7826, "step": 105200 }, { "epoch": 0.76, "learning_rate": 1.2391037046680845e-05, "loss": 2.7811, "step": 105300 }, { "epoch": 0.76, "learning_rate": 1.2383876520707663e-05, "loss": 2.7769, "step": 105400 }, { "epoch": 0.76, "learning_rate": 1.23766436661893e-05, "loss": 2.7875, "step": 105500 }, { "epoch": 0.76, "learning_rate": 1.2369410811670934e-05, "loss": 2.7801, "step": 105600 }, { "epoch": 0.76, "learning_rate": 1.236217795715257e-05, "loss": 2.7875, "step": 105700 }, { "epoch": 0.77, "learning_rate": 1.2354945102634205e-05, "loss": 2.7838, "step": 105800 }, { "epoch": 0.77, "learning_rate": 1.2347712248115842e-05, "loss": 2.7865, "step": 105900 }, { "epoch": 0.77, "learning_rate": 1.2340479393597478e-05, "loss": 2.7785, "step": 106000 }, { "epoch": 0.77, "eval_accuracy": 0.4568334074336198, "eval_loss": 2.7966232299804688, "eval_runtime": 29.6697, "eval_samples_per_second": 218.506, "eval_steps_per_second": 2.292, "step": 106000 }, { "epoch": 0.77, "learning_rate": 1.2333246539079113e-05, "loss": 2.7809, "step": 106100 }, { "epoch": 0.77, "learning_rate": 1.232601368456075e-05, "loss": 2.7837, "step": 106200 }, { "epoch": 0.77, "learning_rate": 1.2318780830042385e-05, "loss": 2.7868, "step": 106300 }, { "epoch": 0.77, "learning_rate": 1.2311547975524021e-05, "loss": 2.781, "step": 106400 }, { "epoch": 0.77, "learning_rate": 1.2304315121005656e-05, "loss": 2.781, "step": 106500 }, { "epoch": 0.77, "learning_rate": 1.2297082266487292e-05, "loss": 2.7859, "step": 106600 }, { "epoch": 0.77, "learning_rate": 1.2289849411968929e-05, "loss": 2.7813, "step": 106700 }, { "epoch": 0.77, "learning_rate": 1.2282616557450564e-05, "loss": 2.7857, "step": 106800 }, { "epoch": 0.77, "learning_rate": 1.22753837029322e-05, "loss": 2.7835, "step": 106900 }, { "epoch": 0.77, "learning_rate": 1.2268150848413835e-05, "loss": 2.7861, "step": 107000 }, { "epoch": 0.77, "eval_accuracy": 0.4568231241285655, "eval_loss": 2.795985698699951, "eval_runtime": 29.6433, "eval_samples_per_second": 218.7, "eval_steps_per_second": 2.294, "step": 107000 }, { "epoch": 0.77, "learning_rate": 1.2260917993895472e-05, "loss": 2.7802, "step": 107100 }, { "epoch": 0.78, "learning_rate": 1.2253685139377108e-05, "loss": 2.7901, "step": 107200 }, { "epoch": 0.78, "learning_rate": 1.2246452284858743e-05, "loss": 2.7804, "step": 107300 }, { "epoch": 0.78, "learning_rate": 1.223921943034038e-05, "loss": 2.7784, "step": 107400 }, { "epoch": 0.78, "learning_rate": 1.2231986575822014e-05, "loss": 2.7824, "step": 107500 }, { "epoch": 0.78, "learning_rate": 1.2224826049848834e-05, "loss": 2.7756, "step": 107600 }, { "epoch": 0.78, "learning_rate": 1.221759319533047e-05, "loss": 2.7849, "step": 107700 }, { "epoch": 0.78, "learning_rate": 1.2210360340812107e-05, "loss": 2.7808, "step": 107800 }, { "epoch": 0.78, "learning_rate": 1.2203127486293742e-05, "loss": 2.7741, "step": 107900 }, { "epoch": 0.78, "learning_rate": 1.2195894631775378e-05, "loss": 2.784, "step": 108000 }, { "epoch": 0.78, "eval_accuracy": 0.4569779786046765, "eval_loss": 2.795300245285034, "eval_runtime": 29.2976, "eval_samples_per_second": 221.281, "eval_steps_per_second": 2.321, "step": 108000 }, { "epoch": 0.78, "learning_rate": 1.2188661777257013e-05, "loss": 2.781, "step": 108100 }, { "epoch": 0.78, "learning_rate": 1.218142892273865e-05, "loss": 2.7924, "step": 108200 }, { "epoch": 0.78, "learning_rate": 1.2174196068220286e-05, "loss": 2.7781, "step": 108300 }, { "epoch": 0.78, "learning_rate": 1.2166963213701921e-05, "loss": 2.7808, "step": 108400 }, { "epoch": 0.78, "learning_rate": 1.2159730359183557e-05, "loss": 2.7725, "step": 108500 }, { "epoch": 0.79, "learning_rate": 1.2152497504665192e-05, "loss": 2.7791, "step": 108600 }, { "epoch": 0.79, "learning_rate": 1.2145264650146829e-05, "loss": 2.79, "step": 108700 }, { "epoch": 0.79, "learning_rate": 1.2138031795628464e-05, "loss": 2.7776, "step": 108800 }, { "epoch": 0.79, "learning_rate": 1.21307989411101e-05, "loss": 2.7821, "step": 108900 }, { "epoch": 0.79, "learning_rate": 1.2123638415136918e-05, "loss": 2.7804, "step": 109000 }, { "epoch": 0.79, "eval_accuracy": 0.457063269546597, "eval_loss": 2.794382333755493, "eval_runtime": 29.8656, "eval_samples_per_second": 217.073, "eval_steps_per_second": 2.277, "step": 109000 }, { "epoch": 0.79, "learning_rate": 1.2116405560618553e-05, "loss": 2.7838, "step": 109100 }, { "epoch": 0.79, "learning_rate": 1.210917270610019e-05, "loss": 2.7806, "step": 109200 }, { "epoch": 0.79, "learning_rate": 1.2101939851581826e-05, "loss": 2.7839, "step": 109300 }, { "epoch": 0.79, "learning_rate": 1.2094706997063461e-05, "loss": 2.7805, "step": 109400 }, { "epoch": 0.79, "learning_rate": 1.2087474142545097e-05, "loss": 2.7782, "step": 109500 }, { "epoch": 0.79, "learning_rate": 1.2080241288026732e-05, "loss": 2.7788, "step": 109600 }, { "epoch": 0.79, "learning_rate": 1.2073008433508369e-05, "loss": 2.7809, "step": 109700 }, { "epoch": 0.79, "learning_rate": 1.2065775578990005e-05, "loss": 2.7753, "step": 109800 }, { "epoch": 0.79, "learning_rate": 1.205854272447164e-05, "loss": 2.7894, "step": 109900 }, { "epoch": 0.8, "learning_rate": 1.2051309869953277e-05, "loss": 2.7828, "step": 110000 }, { "epoch": 0.8, "eval_accuracy": 0.45704633233827235, "eval_loss": 2.793990135192871, "eval_runtime": 30.316, "eval_samples_per_second": 213.848, "eval_steps_per_second": 2.243, "step": 110000 }, { "epoch": 0.8, "learning_rate": 1.2044077015434911e-05, "loss": 2.7702, "step": 110100 }, { "epoch": 0.8, "learning_rate": 1.2036844160916548e-05, "loss": 2.7875, "step": 110200 }, { "epoch": 0.8, "learning_rate": 1.2029611306398183e-05, "loss": 2.7813, "step": 110300 }, { "epoch": 0.8, "learning_rate": 1.2022450780425004e-05, "loss": 2.7843, "step": 110400 }, { "epoch": 0.8, "learning_rate": 1.2015217925906639e-05, "loss": 2.7784, "step": 110500 }, { "epoch": 0.8, "learning_rate": 1.2007985071388275e-05, "loss": 2.7802, "step": 110600 }, { "epoch": 0.8, "learning_rate": 1.200075221686991e-05, "loss": 2.7725, "step": 110700 }, { "epoch": 0.8, "learning_rate": 1.1993519362351547e-05, "loss": 2.7745, "step": 110800 }, { "epoch": 0.8, "learning_rate": 1.1986286507833183e-05, "loss": 2.7734, "step": 110900 }, { "epoch": 0.8, "learning_rate": 1.1979125981860003e-05, "loss": 2.7761, "step": 111000 }, { "epoch": 0.8, "eval_accuracy": 0.4570904900599759, "eval_loss": 2.793330192565918, "eval_runtime": 30.099, "eval_samples_per_second": 215.389, "eval_steps_per_second": 2.259, "step": 111000 }, { "epoch": 0.8, "learning_rate": 1.1971893127341638e-05, "loss": 2.7721, "step": 111100 }, { "epoch": 0.8, "learning_rate": 1.1964660272823274e-05, "loss": 2.7846, "step": 111200 }, { "epoch": 0.81, "learning_rate": 1.195742741830491e-05, "loss": 2.7753, "step": 111300 }, { "epoch": 0.81, "learning_rate": 1.1950194563786546e-05, "loss": 2.767, "step": 111400 }, { "epoch": 0.81, "learning_rate": 1.1942961709268182e-05, "loss": 2.7864, "step": 111500 }, { "epoch": 0.81, "learning_rate": 1.1935728854749817e-05, "loss": 2.7781, "step": 111600 }, { "epoch": 0.81, "learning_rate": 1.1928496000231454e-05, "loss": 2.7768, "step": 111700 }, { "epoch": 0.81, "learning_rate": 1.1921263145713088e-05, "loss": 2.7838, "step": 111800 }, { "epoch": 0.81, "learning_rate": 1.1914030291194725e-05, "loss": 2.7771, "step": 111900 }, { "epoch": 0.81, "learning_rate": 1.1906797436676361e-05, "loss": 2.7797, "step": 112000 }, { "epoch": 0.81, "eval_accuracy": 0.45710379786651667, "eval_loss": 2.7928030490875244, "eval_runtime": 30.824, "eval_samples_per_second": 210.323, "eval_steps_per_second": 2.206, "step": 112000 }, { "epoch": 0.81, "learning_rate": 1.1899564582157996e-05, "loss": 2.7739, "step": 112100 }, { "epoch": 0.81, "learning_rate": 1.1892331727639633e-05, "loss": 2.7837, "step": 112200 }, { "epoch": 0.81, "learning_rate": 1.1885098873121268e-05, "loss": 2.7712, "step": 112300 }, { "epoch": 0.81, "learning_rate": 1.1877866018602904e-05, "loss": 2.7802, "step": 112400 }, { "epoch": 0.81, "learning_rate": 1.1870633164084539e-05, "loss": 2.7717, "step": 112500 }, { "epoch": 0.81, "learning_rate": 1.1863400309566175e-05, "loss": 2.7827, "step": 112600 }, { "epoch": 0.82, "learning_rate": 1.1856167455047812e-05, "loss": 2.7758, "step": 112700 }, { "epoch": 0.82, "learning_rate": 1.1848934600529447e-05, "loss": 2.7769, "step": 112800 }, { "epoch": 0.82, "learning_rate": 1.1841701746011083e-05, "loss": 2.7712, "step": 112900 }, { "epoch": 0.82, "learning_rate": 1.1834468891492716e-05, "loss": 2.7792, "step": 113000 }, { "epoch": 0.82, "eval_accuracy": 0.4573149080702773, "eval_loss": 2.792189598083496, "eval_runtime": 27.9605, "eval_samples_per_second": 231.863, "eval_steps_per_second": 2.432, "step": 113000 }, { "epoch": 0.82, "learning_rate": 1.1827236036974353e-05, "loss": 2.7799, "step": 113100 }, { "epoch": 0.82, "learning_rate": 1.1820075511001173e-05, "loss": 2.7849, "step": 113200 }, { "epoch": 0.82, "learning_rate": 1.1812842656482807e-05, "loss": 2.7789, "step": 113300 }, { "epoch": 0.82, "learning_rate": 1.1805609801964444e-05, "loss": 2.7732, "step": 113400 }, { "epoch": 0.82, "learning_rate": 1.179837694744608e-05, "loss": 2.7791, "step": 113500 }, { "epoch": 0.82, "learning_rate": 1.1791144092927715e-05, "loss": 2.7844, "step": 113600 }, { "epoch": 0.82, "learning_rate": 1.1783911238409352e-05, "loss": 2.7713, "step": 113700 }, { "epoch": 0.82, "learning_rate": 1.1776678383890987e-05, "loss": 2.7754, "step": 113800 }, { "epoch": 0.82, "learning_rate": 1.1769445529372623e-05, "loss": 2.7714, "step": 113900 }, { "epoch": 0.82, "learning_rate": 1.1762212674854258e-05, "loss": 2.7819, "step": 114000 }, { "epoch": 0.82, "eval_accuracy": 0.4572997855628446, "eval_loss": 2.791494846343994, "eval_runtime": 28.1324, "eval_samples_per_second": 230.446, "eval_steps_per_second": 2.417, "step": 114000 }, { "epoch": 0.83, "learning_rate": 1.1754979820335894e-05, "loss": 2.7814, "step": 114100 }, { "epoch": 0.83, "learning_rate": 1.1747746965817531e-05, "loss": 2.7755, "step": 114200 }, { "epoch": 0.83, "learning_rate": 1.1740514111299166e-05, "loss": 2.7741, "step": 114300 }, { "epoch": 0.83, "learning_rate": 1.1733281256780802e-05, "loss": 2.772, "step": 114400 }, { "epoch": 0.83, "learning_rate": 1.1726120730807622e-05, "loss": 2.7795, "step": 114500 }, { "epoch": 0.83, "learning_rate": 1.1718887876289257e-05, "loss": 2.7728, "step": 114600 }, { "epoch": 0.83, "learning_rate": 1.1711655021770893e-05, "loss": 2.7714, "step": 114700 }, { "epoch": 0.83, "learning_rate": 1.170442216725253e-05, "loss": 2.7771, "step": 114800 }, { "epoch": 0.83, "learning_rate": 1.1697189312734165e-05, "loss": 2.779, "step": 114900 }, { "epoch": 0.83, "learning_rate": 1.1689956458215801e-05, "loss": 2.7837, "step": 115000 }, { "epoch": 0.83, "eval_accuracy": 0.4572991806625473, "eval_loss": 2.7909815311431885, "eval_runtime": 29.3759, "eval_samples_per_second": 220.691, "eval_steps_per_second": 2.315, "step": 115000 }, { "epoch": 0.83, "learning_rate": 1.168279593224262e-05, "loss": 2.7764, "step": 115100 }, { "epoch": 0.83, "learning_rate": 1.1675563077724254e-05, "loss": 2.7767, "step": 115200 }, { "epoch": 0.83, "learning_rate": 1.166833022320589e-05, "loss": 2.7741, "step": 115300 }, { "epoch": 0.83, "learning_rate": 1.1661097368687525e-05, "loss": 2.7706, "step": 115400 }, { "epoch": 0.84, "learning_rate": 1.1653864514169162e-05, "loss": 2.7744, "step": 115500 }, { "epoch": 0.84, "learning_rate": 1.1646631659650798e-05, "loss": 2.7759, "step": 115600 }, { "epoch": 0.84, "learning_rate": 1.1639398805132433e-05, "loss": 2.77, "step": 115700 }, { "epoch": 0.84, "learning_rate": 1.163216595061407e-05, "loss": 2.7758, "step": 115800 }, { "epoch": 0.84, "learning_rate": 1.1624933096095705e-05, "loss": 2.7732, "step": 115900 }, { "epoch": 0.84, "learning_rate": 1.1617700241577341e-05, "loss": 2.781, "step": 116000 }, { "epoch": 0.84, "eval_accuracy": 0.45746008414163136, "eval_loss": 2.790616989135742, "eval_runtime": 30.033, "eval_samples_per_second": 215.862, "eval_steps_per_second": 2.264, "step": 116000 }, { "epoch": 0.84, "learning_rate": 1.1610467387058976e-05, "loss": 2.776, "step": 116100 }, { "epoch": 0.84, "learning_rate": 1.1603234532540612e-05, "loss": 2.7762, "step": 116200 }, { "epoch": 0.84, "learning_rate": 1.1596001678022249e-05, "loss": 2.7787, "step": 116300 }, { "epoch": 0.84, "learning_rate": 1.1588768823503884e-05, "loss": 2.7725, "step": 116400 }, { "epoch": 0.84, "learning_rate": 1.1581608297530704e-05, "loss": 2.7759, "step": 116500 }, { "epoch": 0.84, "learning_rate": 1.157437544301234e-05, "loss": 2.774, "step": 116600 }, { "epoch": 0.84, "learning_rate": 1.1567142588493977e-05, "loss": 2.7737, "step": 116700 }, { "epoch": 0.84, "learning_rate": 1.1559909733975611e-05, "loss": 2.7801, "step": 116800 }, { "epoch": 0.85, "learning_rate": 1.1552676879457248e-05, "loss": 2.7774, "step": 116900 }, { "epoch": 0.85, "learning_rate": 1.1545444024938883e-05, "loss": 2.7765, "step": 117000 }, { "epoch": 0.85, "eval_accuracy": 0.45765970123974314, "eval_loss": 2.7898108959198, "eval_runtime": 30.1629, "eval_samples_per_second": 214.933, "eval_steps_per_second": 2.254, "step": 117000 }, { "epoch": 0.85, "learning_rate": 1.1538283498965704e-05, "loss": 2.7819, "step": 117100 }, { "epoch": 0.85, "learning_rate": 1.1531050644447339e-05, "loss": 2.7817, "step": 117200 }, { "epoch": 0.85, "learning_rate": 1.1523817789928975e-05, "loss": 2.7694, "step": 117300 }, { "epoch": 0.85, "learning_rate": 1.151658493541061e-05, "loss": 2.7779, "step": 117400 }, { "epoch": 0.85, "learning_rate": 1.1509352080892247e-05, "loss": 2.7796, "step": 117500 }, { "epoch": 0.85, "learning_rate": 1.1502119226373882e-05, "loss": 2.7798, "step": 117600 }, { "epoch": 0.85, "learning_rate": 1.1494886371855518e-05, "loss": 2.7729, "step": 117700 }, { "epoch": 0.85, "learning_rate": 1.1487653517337155e-05, "loss": 2.7779, "step": 117800 }, { "epoch": 0.85, "learning_rate": 1.148042066281879e-05, "loss": 2.7795, "step": 117900 }, { "epoch": 0.85, "learning_rate": 1.1473187808300426e-05, "loss": 2.7778, "step": 118000 }, { "epoch": 0.85, "eval_accuracy": 0.4575302525761191, "eval_loss": 2.789475202560425, "eval_runtime": 31.2717, "eval_samples_per_second": 207.312, "eval_steps_per_second": 2.174, "step": 118000 }, { "epoch": 0.85, "learning_rate": 1.146595495378206e-05, "loss": 2.7761, "step": 118100 }, { "epoch": 0.85, "learning_rate": 1.1458722099263697e-05, "loss": 2.7809, "step": 118200 }, { "epoch": 0.86, "learning_rate": 1.1451489244745332e-05, "loss": 2.7705, "step": 118300 }, { "epoch": 0.86, "learning_rate": 1.1444256390226969e-05, "loss": 2.773, "step": 118400 }, { "epoch": 0.86, "learning_rate": 1.1437023535708605e-05, "loss": 2.7688, "step": 118500 }, { "epoch": 0.86, "learning_rate": 1.142979068119024e-05, "loss": 2.7681, "step": 118600 }, { "epoch": 0.86, "learning_rate": 1.1422557826671877e-05, "loss": 2.7745, "step": 118700 }, { "epoch": 0.86, "learning_rate": 1.1415324972153511e-05, "loss": 2.7753, "step": 118800 }, { "epoch": 0.86, "learning_rate": 1.1408092117635148e-05, "loss": 2.7714, "step": 118900 }, { "epoch": 0.86, "learning_rate": 1.1400859263116784e-05, "loss": 2.776, "step": 119000 }, { "epoch": 0.86, "eval_accuracy": 0.4576566767382566, "eval_loss": 2.7886581420898438, "eval_runtime": 29.9514, "eval_samples_per_second": 216.451, "eval_steps_per_second": 2.27, "step": 119000 }, { "epoch": 0.86, "learning_rate": 1.139362640859842e-05, "loss": 2.7766, "step": 119100 }, { "epoch": 0.86, "learning_rate": 1.1386465882625237e-05, "loss": 2.7718, "step": 119200 }, { "epoch": 0.86, "learning_rate": 1.1379233028106874e-05, "loss": 2.779, "step": 119300 }, { "epoch": 0.86, "learning_rate": 1.1372000173588509e-05, "loss": 2.7753, "step": 119400 }, { "epoch": 0.86, "learning_rate": 1.1364767319070145e-05, "loss": 2.7763, "step": 119500 }, { "epoch": 0.87, "learning_rate": 1.135753446455178e-05, "loss": 2.7798, "step": 119600 }, { "epoch": 0.87, "learning_rate": 1.1350301610033416e-05, "loss": 2.775, "step": 119700 }, { "epoch": 0.87, "learning_rate": 1.1343068755515051e-05, "loss": 2.7736, "step": 119800 }, { "epoch": 0.87, "learning_rate": 1.1335908229541873e-05, "loss": 2.7801, "step": 119900 }, { "epoch": 0.87, "learning_rate": 1.1328675375023507e-05, "loss": 2.7719, "step": 120000 }, { "epoch": 0.87, "eval_accuracy": 0.45784359093012494, "eval_loss": 2.788266658782959, "eval_runtime": 31.0222, "eval_samples_per_second": 208.979, "eval_steps_per_second": 2.192, "step": 120000 }, { "epoch": 0.87, "learning_rate": 1.1321442520505144e-05, "loss": 2.771, "step": 120100 }, { "epoch": 0.87, "learning_rate": 1.1314209665986779e-05, "loss": 2.7762, "step": 120200 }, { "epoch": 0.87, "learning_rate": 1.1306976811468415e-05, "loss": 2.7741, "step": 120300 }, { "epoch": 0.87, "learning_rate": 1.1299743956950052e-05, "loss": 2.773, "step": 120400 }, { "epoch": 0.87, "learning_rate": 1.1292511102431687e-05, "loss": 2.7681, "step": 120500 }, { "epoch": 0.87, "learning_rate": 1.1285278247913323e-05, "loss": 2.7713, "step": 120600 }, { "epoch": 0.87, "learning_rate": 1.1278045393394958e-05, "loss": 2.7704, "step": 120700 }, { "epoch": 0.87, "learning_rate": 1.1270812538876595e-05, "loss": 2.7758, "step": 120800 }, { "epoch": 0.87, "learning_rate": 1.126357968435823e-05, "loss": 2.7696, "step": 120900 }, { "epoch": 0.88, "learning_rate": 1.1256346829839866e-05, "loss": 2.7759, "step": 121000 }, { "epoch": 0.88, "eval_accuracy": 0.45785326933488185, "eval_loss": 2.787775993347168, "eval_runtime": 29.7592, "eval_samples_per_second": 217.849, "eval_steps_per_second": 2.285, "step": 121000 }, { "epoch": 0.88, "learning_rate": 1.1249113975321502e-05, "loss": 2.7682, "step": 121100 }, { "epoch": 0.88, "learning_rate": 1.1241881120803137e-05, "loss": 2.7752, "step": 121200 }, { "epoch": 0.88, "learning_rate": 1.1234648266284774e-05, "loss": 2.7708, "step": 121300 }, { "epoch": 0.88, "learning_rate": 1.1227415411766409e-05, "loss": 2.7718, "step": 121400 }, { "epoch": 0.88, "learning_rate": 1.1220182557248045e-05, "loss": 2.7701, "step": 121500 }, { "epoch": 0.88, "learning_rate": 1.1212949702729682e-05, "loss": 2.7732, "step": 121600 }, { "epoch": 0.88, "learning_rate": 1.1205716848211316e-05, "loss": 2.7709, "step": 121700 }, { "epoch": 0.88, "learning_rate": 1.1198483993692953e-05, "loss": 2.7735, "step": 121800 }, { "epoch": 0.88, "learning_rate": 1.1191251139174588e-05, "loss": 2.7724, "step": 121900 }, { "epoch": 0.88, "learning_rate": 1.1184018284656224e-05, "loss": 2.7654, "step": 122000 }, { "epoch": 0.88, "eval_accuracy": 0.45778854500306987, "eval_loss": 2.787409782409668, "eval_runtime": 30.1905, "eval_samples_per_second": 214.736, "eval_steps_per_second": 2.252, "step": 122000 }, { "epoch": 0.88, "learning_rate": 1.1176785430137859e-05, "loss": 2.7705, "step": 122100 }, { "epoch": 0.88, "learning_rate": 1.1169552575619496e-05, "loss": 2.7753, "step": 122200 }, { "epoch": 0.88, "learning_rate": 1.1162392049646314e-05, "loss": 2.776, "step": 122300 }, { "epoch": 0.89, "learning_rate": 1.1155159195127948e-05, "loss": 2.7663, "step": 122400 }, { "epoch": 0.89, "learning_rate": 1.1147926340609585e-05, "loss": 2.7699, "step": 122500 }, { "epoch": 0.89, "learning_rate": 1.1140693486091221e-05, "loss": 2.7761, "step": 122600 }, { "epoch": 0.89, "learning_rate": 1.1133460631572856e-05, "loss": 2.7632, "step": 122700 }, { "epoch": 0.89, "learning_rate": 1.1126227777054493e-05, "loss": 2.7688, "step": 122800 }, { "epoch": 0.89, "learning_rate": 1.1118994922536128e-05, "loss": 2.7719, "step": 122900 }, { "epoch": 0.89, "learning_rate": 1.1111762068017764e-05, "loss": 2.7661, "step": 123000 }, { "epoch": 0.89, "eval_accuracy": 0.45800570420980363, "eval_loss": 2.7868072986602783, "eval_runtime": 29.8221, "eval_samples_per_second": 217.389, "eval_steps_per_second": 2.28, "step": 123000 }, { "epoch": 0.89, "learning_rate": 1.11045292134994e-05, "loss": 2.7701, "step": 123100 }, { "epoch": 0.89, "learning_rate": 1.1097296358981035e-05, "loss": 2.763, "step": 123200 }, { "epoch": 0.89, "learning_rate": 1.1090063504462672e-05, "loss": 2.7767, "step": 123300 }, { "epoch": 0.89, "learning_rate": 1.1082830649944307e-05, "loss": 2.7696, "step": 123400 }, { "epoch": 0.89, "learning_rate": 1.1075597795425943e-05, "loss": 2.7704, "step": 123500 }, { "epoch": 0.89, "learning_rate": 1.1068364940907578e-05, "loss": 2.7772, "step": 123600 }, { "epoch": 0.89, "learning_rate": 1.1061132086389215e-05, "loss": 2.7761, "step": 123700 }, { "epoch": 0.9, "learning_rate": 1.1053899231870851e-05, "loss": 2.7743, "step": 123800 }, { "epoch": 0.9, "learning_rate": 1.1046666377352486e-05, "loss": 2.7656, "step": 123900 }, { "epoch": 0.9, "learning_rate": 1.1039433522834122e-05, "loss": 2.7718, "step": 124000 }, { "epoch": 0.9, "eval_accuracy": 0.4579833228988032, "eval_loss": 2.786105155944824, "eval_runtime": 29.402, "eval_samples_per_second": 220.495, "eval_steps_per_second": 2.313, "step": 124000 }, { "epoch": 0.9, "learning_rate": 1.1032200668315757e-05, "loss": 2.7757, "step": 124100 }, { "epoch": 0.9, "learning_rate": 1.1024967813797394e-05, "loss": 2.7759, "step": 124200 }, { "epoch": 0.9, "learning_rate": 1.101773495927903e-05, "loss": 2.7791, "step": 124300 }, { "epoch": 0.9, "learning_rate": 1.1010502104760665e-05, "loss": 2.7698, "step": 124400 }, { "epoch": 0.9, "learning_rate": 1.1003269250242302e-05, "loss": 2.7707, "step": 124500 }, { "epoch": 0.9, "learning_rate": 1.0996036395723936e-05, "loss": 2.7732, "step": 124600 }, { "epoch": 0.9, "learning_rate": 1.0988803541205573e-05, "loss": 2.7705, "step": 124700 }, { "epoch": 0.9, "learning_rate": 1.0981570686687208e-05, "loss": 2.7693, "step": 124800 }, { "epoch": 0.9, "learning_rate": 1.0974337832168844e-05, "loss": 2.7693, "step": 124900 }, { "epoch": 0.9, "learning_rate": 1.0967177306195664e-05, "loss": 2.7775, "step": 125000 }, { "epoch": 0.9, "eval_accuracy": 0.4579536827842351, "eval_loss": 2.785790205001831, "eval_runtime": 30.0386, "eval_samples_per_second": 215.823, "eval_steps_per_second": 2.264, "step": 125000 }, { "epoch": 0.9, "learning_rate": 1.09599444516773e-05, "loss": 2.7759, "step": 125100 }, { "epoch": 0.91, "learning_rate": 1.0952711597158935e-05, "loss": 2.7729, "step": 125200 }, { "epoch": 0.91, "learning_rate": 1.0945478742640572e-05, "loss": 2.7698, "step": 125300 }, { "epoch": 0.91, "learning_rate": 1.0938245888122208e-05, "loss": 2.7665, "step": 125400 }, { "epoch": 0.91, "learning_rate": 1.0931013033603843e-05, "loss": 2.7668, "step": 125500 }, { "epoch": 0.91, "learning_rate": 1.092378017908548e-05, "loss": 2.771, "step": 125600 }, { "epoch": 0.91, "learning_rate": 1.0916547324567115e-05, "loss": 2.7749, "step": 125700 }, { "epoch": 0.91, "learning_rate": 1.0909386798593934e-05, "loss": 2.7676, "step": 125800 }, { "epoch": 0.91, "learning_rate": 1.090215394407557e-05, "loss": 2.7716, "step": 125900 }, { "epoch": 0.91, "learning_rate": 1.0894921089557207e-05, "loss": 2.7835, "step": 126000 }, { "epoch": 0.91, "eval_accuracy": 0.45800933361158747, "eval_loss": 2.7854855060577393, "eval_runtime": 27.8858, "eval_samples_per_second": 232.484, "eval_steps_per_second": 2.439, "step": 126000 }, { "epoch": 0.91, "learning_rate": 1.0887688235038842e-05, "loss": 2.7658, "step": 126100 }, { "epoch": 0.91, "learning_rate": 1.0880455380520475e-05, "loss": 2.7706, "step": 126200 }, { "epoch": 0.91, "learning_rate": 1.0873222526002112e-05, "loss": 2.7689, "step": 126300 }, { "epoch": 0.91, "learning_rate": 1.0865989671483748e-05, "loss": 2.768, "step": 126400 }, { "epoch": 0.91, "learning_rate": 1.0858756816965383e-05, "loss": 2.7656, "step": 126500 }, { "epoch": 0.92, "learning_rate": 1.085152396244702e-05, "loss": 2.769, "step": 126600 }, { "epoch": 0.92, "learning_rate": 1.0844291107928654e-05, "loss": 2.7722, "step": 126700 }, { "epoch": 0.92, "learning_rate": 1.0837058253410291e-05, "loss": 2.7725, "step": 126800 }, { "epoch": 0.92, "learning_rate": 1.0829825398891927e-05, "loss": 2.7739, "step": 126900 }, { "epoch": 0.92, "learning_rate": 1.0822592544373562e-05, "loss": 2.768, "step": 127000 }, { "epoch": 0.92, "eval_accuracy": 0.4580704285416156, "eval_loss": 2.784817695617676, "eval_runtime": 30.4636, "eval_samples_per_second": 212.811, "eval_steps_per_second": 2.232, "step": 127000 }, { "epoch": 0.92, "learning_rate": 1.0815359689855199e-05, "loss": 2.7736, "step": 127100 }, { "epoch": 0.92, "learning_rate": 1.0808126835336834e-05, "loss": 2.7665, "step": 127200 }, { "epoch": 0.92, "learning_rate": 1.0800966309363653e-05, "loss": 2.7702, "step": 127300 }, { "epoch": 0.92, "learning_rate": 1.079373345484529e-05, "loss": 2.7739, "step": 127400 }, { "epoch": 0.92, "learning_rate": 1.0786500600326926e-05, "loss": 2.7685, "step": 127500 }, { "epoch": 0.92, "learning_rate": 1.0779267745808561e-05, "loss": 2.7725, "step": 127600 }, { "epoch": 0.92, "learning_rate": 1.0772034891290198e-05, "loss": 2.7645, "step": 127700 }, { "epoch": 0.92, "learning_rate": 1.0764802036771833e-05, "loss": 2.7751, "step": 127800 }, { "epoch": 0.93, "learning_rate": 1.0757569182253469e-05, "loss": 2.7822, "step": 127900 }, { "epoch": 0.93, "learning_rate": 1.0750336327735106e-05, "loss": 2.7701, "step": 128000 }, { "epoch": 0.93, "eval_accuracy": 0.45819806250434775, "eval_loss": 2.7843172550201416, "eval_runtime": 29.9391, "eval_samples_per_second": 216.54, "eval_steps_per_second": 2.271, "step": 128000 }, { "epoch": 0.93, "learning_rate": 1.074310347321674e-05, "loss": 2.7667, "step": 128100 }, { "epoch": 0.93, "learning_rate": 1.0735870618698377e-05, "loss": 2.7741, "step": 128200 }, { "epoch": 0.93, "learning_rate": 1.0728637764180012e-05, "loss": 2.767, "step": 128300 }, { "epoch": 0.93, "learning_rate": 1.0721404909661648e-05, "loss": 2.7672, "step": 128400 }, { "epoch": 0.93, "learning_rate": 1.0714172055143283e-05, "loss": 2.7694, "step": 128500 }, { "epoch": 0.93, "learning_rate": 1.070693920062492e-05, "loss": 2.775, "step": 128600 }, { "epoch": 0.93, "learning_rate": 1.0699706346106556e-05, "loss": 2.7747, "step": 128700 }, { "epoch": 0.93, "learning_rate": 1.0692473491588191e-05, "loss": 2.7689, "step": 128800 }, { "epoch": 0.93, "learning_rate": 1.0685240637069827e-05, "loss": 2.7631, "step": 128900 }, { "epoch": 0.93, "learning_rate": 1.0678007782551462e-05, "loss": 2.7682, "step": 129000 }, { "epoch": 0.93, "eval_accuracy": 0.45829424165161975, "eval_loss": 2.783777952194214, "eval_runtime": 29.7932, "eval_samples_per_second": 217.6, "eval_steps_per_second": 2.282, "step": 129000 }, { "epoch": 0.93, "learning_rate": 1.0670774928033099e-05, "loss": 2.7683, "step": 129100 }, { "epoch": 0.93, "learning_rate": 1.0663542073514734e-05, "loss": 2.773, "step": 129200 }, { "epoch": 0.94, "learning_rate": 1.065630921899637e-05, "loss": 2.769, "step": 129300 }, { "epoch": 0.94, "learning_rate": 1.0649076364478007e-05, "loss": 2.7738, "step": 129400 }, { "epoch": 0.94, "learning_rate": 1.0641915838504826e-05, "loss": 2.7609, "step": 129500 }, { "epoch": 0.94, "learning_rate": 1.0634682983986461e-05, "loss": 2.7699, "step": 129600 }, { "epoch": 0.94, "learning_rate": 1.062752245801328e-05, "loss": 2.7711, "step": 129700 }, { "epoch": 0.94, "learning_rate": 1.0620289603494916e-05, "loss": 2.7743, "step": 129800 }, { "epoch": 0.94, "learning_rate": 1.061305674897655e-05, "loss": 2.768, "step": 129900 }, { "epoch": 0.94, "learning_rate": 1.0605823894458187e-05, "loss": 2.7595, "step": 130000 }, { "epoch": 0.94, "eval_accuracy": 0.458337794473026, "eval_loss": 2.7834300994873047, "eval_runtime": 28.4215, "eval_samples_per_second": 228.102, "eval_steps_per_second": 2.393, "step": 130000 }, { "epoch": 0.94, "learning_rate": 1.0598591039939824e-05, "loss": 2.7653, "step": 130100 }, { "epoch": 0.94, "learning_rate": 1.0591358185421458e-05, "loss": 2.7622, "step": 130200 }, { "epoch": 0.94, "learning_rate": 1.0584125330903095e-05, "loss": 2.7737, "step": 130300 }, { "epoch": 0.94, "learning_rate": 1.057689247638473e-05, "loss": 2.7744, "step": 130400 }, { "epoch": 0.94, "learning_rate": 1.0569659621866366e-05, "loss": 2.7739, "step": 130500 }, { "epoch": 0.94, "learning_rate": 1.0562426767348001e-05, "loss": 2.7748, "step": 130600 }, { "epoch": 0.95, "learning_rate": 1.0555193912829638e-05, "loss": 2.7696, "step": 130700 }, { "epoch": 0.95, "learning_rate": 1.0547961058311274e-05, "loss": 2.7628, "step": 130800 }, { "epoch": 0.95, "learning_rate": 1.0540728203792909e-05, "loss": 2.7739, "step": 130900 }, { "epoch": 0.95, "learning_rate": 1.0533495349274545e-05, "loss": 2.7627, "step": 131000 }, { "epoch": 0.95, "eval_accuracy": 0.45831420336143097, "eval_loss": 2.783060312271118, "eval_runtime": 31.194, "eval_samples_per_second": 207.829, "eval_steps_per_second": 2.18, "step": 131000 }, { "epoch": 0.95, "learning_rate": 1.052626249475618e-05, "loss": 2.7654, "step": 131100 }, { "epoch": 0.95, "learning_rate": 1.0519029640237817e-05, "loss": 2.7662, "step": 131200 }, { "epoch": 0.95, "learning_rate": 1.0511796785719453e-05, "loss": 2.7627, "step": 131300 }, { "epoch": 0.95, "learning_rate": 1.0504636259746273e-05, "loss": 2.7735, "step": 131400 }, { "epoch": 0.95, "learning_rate": 1.0497403405227908e-05, "loss": 2.7787, "step": 131500 }, { "epoch": 0.95, "learning_rate": 1.0490170550709544e-05, "loss": 2.7708, "step": 131600 }, { "epoch": 0.95, "learning_rate": 1.0482937696191179e-05, "loss": 2.7678, "step": 131700 }, { "epoch": 0.95, "learning_rate": 1.0475704841672816e-05, "loss": 2.7681, "step": 131800 }, { "epoch": 0.95, "learning_rate": 1.0468471987154452e-05, "loss": 2.7792, "step": 131900 }, { "epoch": 0.95, "learning_rate": 1.0461239132636087e-05, "loss": 2.7716, "step": 132000 }, { "epoch": 0.95, "eval_accuracy": 0.45840554330632455, "eval_loss": 2.7826600074768066, "eval_runtime": 29.7072, "eval_samples_per_second": 218.23, "eval_steps_per_second": 2.289, "step": 132000 }, { "epoch": 0.96, "learning_rate": 1.0454006278117723e-05, "loss": 2.7721, "step": 132100 }, { "epoch": 0.96, "learning_rate": 1.0446773423599358e-05, "loss": 2.7637, "step": 132200 }, { "epoch": 0.96, "learning_rate": 1.0439540569080995e-05, "loss": 2.7743, "step": 132300 }, { "epoch": 0.96, "learning_rate": 1.0432307714562631e-05, "loss": 2.7649, "step": 132400 }, { "epoch": 0.96, "learning_rate": 1.0425074860044266e-05, "loss": 2.7672, "step": 132500 }, { "epoch": 0.96, "learning_rate": 1.0417842005525903e-05, "loss": 2.7683, "step": 132600 }, { "epoch": 0.96, "learning_rate": 1.0410609151007537e-05, "loss": 2.7668, "step": 132700 }, { "epoch": 0.96, "learning_rate": 1.0403376296489174e-05, "loss": 2.7582, "step": 132800 }, { "epoch": 0.96, "learning_rate": 1.0396143441970809e-05, "loss": 2.7663, "step": 132900 }, { "epoch": 0.96, "learning_rate": 1.0388910587452445e-05, "loss": 2.7719, "step": 133000 }, { "epoch": 0.96, "eval_accuracy": 0.45850656165597503, "eval_loss": 2.782144069671631, "eval_runtime": 29.573, "eval_samples_per_second": 219.22, "eval_steps_per_second": 2.299, "step": 133000 }, { "epoch": 0.96, "learning_rate": 1.0381677732934082e-05, "loss": 2.77, "step": 133100 }, { "epoch": 0.96, "learning_rate": 1.0374444878415717e-05, "loss": 2.7684, "step": 133200 }, { "epoch": 0.96, "learning_rate": 1.0367212023897353e-05, "loss": 2.7562, "step": 133300 }, { "epoch": 0.96, "learning_rate": 1.0359979169378988e-05, "loss": 2.7682, "step": 133400 }, { "epoch": 0.97, "learning_rate": 1.0352746314860625e-05, "loss": 2.7686, "step": 133500 }, { "epoch": 0.97, "learning_rate": 1.0345513460342261e-05, "loss": 2.7624, "step": 133600 }, { "epoch": 0.97, "learning_rate": 1.0338352934369077e-05, "loss": 2.7643, "step": 133700 }, { "epoch": 0.97, "learning_rate": 1.0331120079850714e-05, "loss": 2.7725, "step": 133800 }, { "epoch": 0.97, "learning_rate": 1.0323959553877534e-05, "loss": 2.7617, "step": 133900 }, { "epoch": 0.97, "learning_rate": 1.031672669935917e-05, "loss": 2.7723, "step": 134000 }, { "epoch": 0.97, "eval_accuracy": 0.45827972404448436, "eval_loss": 2.781625747680664, "eval_runtime": 30.6595, "eval_samples_per_second": 211.452, "eval_steps_per_second": 2.218, "step": 134000 }, { "epoch": 0.97, "learning_rate": 1.0309493844840805e-05, "loss": 2.7677, "step": 134100 }, { "epoch": 0.97, "learning_rate": 1.0302260990322441e-05, "loss": 2.7655, "step": 134200 }, { "epoch": 0.97, "learning_rate": 1.0295028135804076e-05, "loss": 2.7718, "step": 134300 }, { "epoch": 0.97, "learning_rate": 1.0287795281285713e-05, "loss": 2.7667, "step": 134400 }, { "epoch": 0.97, "learning_rate": 1.028056242676735e-05, "loss": 2.7639, "step": 134500 }, { "epoch": 0.97, "learning_rate": 1.0273329572248984e-05, "loss": 2.771, "step": 134600 }, { "epoch": 0.97, "learning_rate": 1.026609671773062e-05, "loss": 2.7588, "step": 134700 }, { "epoch": 0.97, "learning_rate": 1.0258863863212255e-05, "loss": 2.7687, "step": 134800 }, { "epoch": 0.98, "learning_rate": 1.0251631008693892e-05, "loss": 2.7606, "step": 134900 }, { "epoch": 0.98, "learning_rate": 1.0244398154175529e-05, "loss": 2.7736, "step": 135000 }, { "epoch": 0.98, "eval_accuracy": 0.45850595675567773, "eval_loss": 2.7812275886535645, "eval_runtime": 31.0326, "eval_samples_per_second": 208.91, "eval_steps_per_second": 2.191, "step": 135000 }, { "epoch": 0.98, "learning_rate": 1.0237165299657163e-05, "loss": 2.768, "step": 135100 }, { "epoch": 0.98, "learning_rate": 1.02299324451388e-05, "loss": 2.7736, "step": 135200 }, { "epoch": 0.98, "learning_rate": 1.0222699590620435e-05, "loss": 2.7712, "step": 135300 }, { "epoch": 0.98, "learning_rate": 1.0215466736102071e-05, "loss": 2.7636, "step": 135400 }, { "epoch": 0.98, "learning_rate": 1.0208306210128891e-05, "loss": 2.7671, "step": 135500 }, { "epoch": 0.98, "learning_rate": 1.0201073355610527e-05, "loss": 2.7746, "step": 135600 }, { "epoch": 0.98, "learning_rate": 1.0193840501092162e-05, "loss": 2.7717, "step": 135700 }, { "epoch": 0.98, "learning_rate": 1.0186607646573799e-05, "loss": 2.7739, "step": 135800 }, { "epoch": 0.98, "learning_rate": 1.0179374792055434e-05, "loss": 2.766, "step": 135900 }, { "epoch": 0.98, "learning_rate": 1.017214193753707e-05, "loss": 2.7646, "step": 136000 }, { "epoch": 0.98, "eval_accuracy": 0.4585743104892736, "eval_loss": 2.7808570861816406, "eval_runtime": 30.7384, "eval_samples_per_second": 210.909, "eval_steps_per_second": 2.212, "step": 136000 }, { "epoch": 0.98, "learning_rate": 1.0164909083018707e-05, "loss": 2.7681, "step": 136100 }, { "epoch": 0.99, "learning_rate": 1.0157676228500341e-05, "loss": 2.766, "step": 136200 }, { "epoch": 0.99, "learning_rate": 1.0150443373981978e-05, "loss": 2.7689, "step": 136300 }, { "epoch": 0.99, "learning_rate": 1.0143210519463613e-05, "loss": 2.7692, "step": 136400 }, { "epoch": 0.99, "learning_rate": 1.013597766494525e-05, "loss": 2.766, "step": 136500 }, { "epoch": 0.99, "learning_rate": 1.0128744810426884e-05, "loss": 2.7685, "step": 136600 }, { "epoch": 0.99, "learning_rate": 1.012151195590852e-05, "loss": 2.7625, "step": 136700 }, { "epoch": 0.99, "learning_rate": 1.0114351429935339e-05, "loss": 2.7596, "step": 136800 }, { "epoch": 0.99, "learning_rate": 1.0107118575416973e-05, "loss": 2.7644, "step": 136900 }, { "epoch": 0.99, "learning_rate": 1.009988572089861e-05, "loss": 2.76, "step": 137000 }, { "epoch": 0.99, "eval_accuracy": 0.458596691800274, "eval_loss": 2.7804572582244873, "eval_runtime": 29.4041, "eval_samples_per_second": 220.48, "eval_steps_per_second": 2.313, "step": 137000 }, { "epoch": 0.99, "learning_rate": 1.0092652866380247e-05, "loss": 2.7618, "step": 137100 }, { "epoch": 0.99, "learning_rate": 1.0085420011861881e-05, "loss": 2.7663, "step": 137200 }, { "epoch": 0.99, "learning_rate": 1.0078187157343518e-05, "loss": 2.7684, "step": 137300 }, { "epoch": 0.99, "learning_rate": 1.0071026631370338e-05, "loss": 2.7632, "step": 137400 }, { "epoch": 0.99, "learning_rate": 1.0063793776851974e-05, "loss": 2.7688, "step": 137500 }, { "epoch": 1.0, "learning_rate": 1.0056560922333609e-05, "loss": 2.7616, "step": 137600 }, { "epoch": 1.0, "learning_rate": 1.0049328067815245e-05, "loss": 2.7634, "step": 137700 }, { "epoch": 1.0, "learning_rate": 1.004209521329688e-05, "loss": 2.7608, "step": 137800 }, { "epoch": 1.0, "learning_rate": 1.0034862358778517e-05, "loss": 2.7697, "step": 137900 }, { "epoch": 1.0, "learning_rate": 1.0027629504260152e-05, "loss": 2.7659, "step": 138000 }, { "epoch": 1.0, "eval_accuracy": 0.4585610026827328, "eval_loss": 2.780273914337158, "eval_runtime": 29.5606, "eval_samples_per_second": 219.313, "eval_steps_per_second": 2.3, "step": 138000 }, { "epoch": 1.0, "learning_rate": 1.0020396649741788e-05, "loss": 2.7704, "step": 138100 }, { "epoch": 1.0, "learning_rate": 1.0013163795223425e-05, "loss": 2.7639, "step": 138200 }, { "epoch": 1.0, "learning_rate": 1.000593094070506e-05, "loss": 2.7586, "step": 138300 }, { "epoch": 1.0, "learning_rate": 9.998698086186696e-06, "loss": 2.7646, "step": 138400 }, { "epoch": 1.0, "learning_rate": 9.99146523166833e-06, "loss": 2.7588, "step": 138500 }, { "epoch": 1.0, "learning_rate": 9.984232377149967e-06, "loss": 2.7584, "step": 138600 }, { "epoch": 1.0, "learning_rate": 9.976999522631604e-06, "loss": 2.7627, "step": 138700 }, { "epoch": 1.0, "learning_rate": 9.969766668113239e-06, "loss": 2.7523, "step": 138800 }, { "epoch": 1.0, "learning_rate": 9.962533813594875e-06, "loss": 2.7636, "step": 138900 }, { "epoch": 1.01, "learning_rate": 9.95530095907651e-06, "loss": 2.7604, "step": 139000 }, { "epoch": 1.01, "eval_accuracy": 0.45872069636122226, "eval_loss": 2.7798776626586914, "eval_runtime": 29.5234, "eval_samples_per_second": 219.589, "eval_steps_per_second": 2.303, "step": 139000 }, { "epoch": 1.01, "learning_rate": 9.948068104558146e-06, "loss": 2.7533, "step": 139100 }, { "epoch": 1.01, "learning_rate": 9.940835250039781e-06, "loss": 2.7478, "step": 139200 }, { "epoch": 1.01, "learning_rate": 9.933602395521418e-06, "loss": 2.762, "step": 139300 }, { "epoch": 1.01, "learning_rate": 9.926369541003054e-06, "loss": 2.7616, "step": 139400 }, { "epoch": 1.01, "learning_rate": 9.919136686484689e-06, "loss": 2.766, "step": 139500 }, { "epoch": 1.01, "learning_rate": 9.911903831966324e-06, "loss": 2.762, "step": 139600 }, { "epoch": 1.01, "learning_rate": 9.90467097744796e-06, "loss": 2.7599, "step": 139700 }, { "epoch": 1.01, "learning_rate": 9.897438122929595e-06, "loss": 2.7588, "step": 139800 }, { "epoch": 1.01, "learning_rate": 9.890205268411232e-06, "loss": 2.752, "step": 139900 }, { "epoch": 1.01, "learning_rate": 9.882972413892867e-06, "loss": 2.7597, "step": 140000 }, { "epoch": 1.01, "eval_accuracy": 0.4586741190383295, "eval_loss": 2.7793562412261963, "eval_runtime": 32.292, "eval_samples_per_second": 200.762, "eval_steps_per_second": 2.106, "step": 140000 }, { "epoch": 1.01, "learning_rate": 9.875739559374503e-06, "loss": 2.7523, "step": 140100 }, { "epoch": 1.01, "learning_rate": 9.86850670485614e-06, "loss": 2.7645, "step": 140200 }, { "epoch": 1.01, "learning_rate": 9.861273850337774e-06, "loss": 2.7598, "step": 140300 }, { "epoch": 1.02, "learning_rate": 9.854040995819411e-06, "loss": 2.7589, "step": 140400 }, { "epoch": 1.02, "learning_rate": 9.846808141301046e-06, "loss": 2.755, "step": 140500 }, { "epoch": 1.02, "learning_rate": 9.839575286782682e-06, "loss": 2.7631, "step": 140600 }, { "epoch": 1.02, "learning_rate": 9.832342432264317e-06, "loss": 2.7571, "step": 140700 }, { "epoch": 1.02, "learning_rate": 9.825109577745954e-06, "loss": 2.7596, "step": 140800 }, { "epoch": 1.02, "learning_rate": 9.817949051772773e-06, "loss": 2.7562, "step": 140900 }, { "epoch": 1.02, "learning_rate": 9.81071619725441e-06, "loss": 2.7551, "step": 141000 }, { "epoch": 1.02, "eval_accuracy": 0.45877937169006117, "eval_loss": 2.7791290283203125, "eval_runtime": 29.4298, "eval_samples_per_second": 220.287, "eval_steps_per_second": 2.311, "step": 141000 }, { "epoch": 1.02, "learning_rate": 9.803483342736045e-06, "loss": 2.753, "step": 141100 }, { "epoch": 1.02, "learning_rate": 9.796250488217681e-06, "loss": 2.7499, "step": 141200 }, { "epoch": 1.02, "learning_rate": 9.789017633699318e-06, "loss": 2.7595, "step": 141300 }, { "epoch": 1.02, "learning_rate": 9.781784779180953e-06, "loss": 2.7612, "step": 141400 }, { "epoch": 1.02, "learning_rate": 9.774551924662589e-06, "loss": 2.7634, "step": 141500 }, { "epoch": 1.02, "learning_rate": 9.767319070144224e-06, "loss": 2.749, "step": 141600 }, { "epoch": 1.02, "learning_rate": 9.76008621562586e-06, "loss": 2.7567, "step": 141700 }, { "epoch": 1.03, "learning_rate": 9.752853361107495e-06, "loss": 2.7546, "step": 141800 }, { "epoch": 1.03, "learning_rate": 9.74562050658913e-06, "loss": 2.7588, "step": 141900 }, { "epoch": 1.03, "learning_rate": 9.738387652070767e-06, "loss": 2.7619, "step": 142000 }, { "epoch": 1.03, "eval_accuracy": 0.4588090118046293, "eval_loss": 2.7787861824035645, "eval_runtime": 29.6893, "eval_samples_per_second": 218.361, "eval_steps_per_second": 2.29, "step": 142000 }, { "epoch": 1.03, "learning_rate": 9.731227126097586e-06, "loss": 2.7533, "step": 142100 }, { "epoch": 1.03, "learning_rate": 9.723994271579223e-06, "loss": 2.7554, "step": 142200 }, { "epoch": 1.03, "learning_rate": 9.716761417060858e-06, "loss": 2.7627, "step": 142300 }, { "epoch": 1.03, "learning_rate": 9.709528562542494e-06, "loss": 2.7635, "step": 142400 }, { "epoch": 1.03, "learning_rate": 9.702368036569312e-06, "loss": 2.757, "step": 142500 }, { "epoch": 1.03, "learning_rate": 9.695135182050949e-06, "loss": 2.7527, "step": 142600 }, { "epoch": 1.03, "learning_rate": 9.687902327532585e-06, "loss": 2.761, "step": 142700 }, { "epoch": 1.03, "learning_rate": 9.68066947301422e-06, "loss": 2.7562, "step": 142800 }, { "epoch": 1.03, "learning_rate": 9.673436618495857e-06, "loss": 2.7565, "step": 142900 }, { "epoch": 1.03, "learning_rate": 9.666203763977491e-06, "loss": 2.7658, "step": 143000 }, { "epoch": 1.03, "eval_accuracy": 0.45889793214833363, "eval_loss": 2.778470754623413, "eval_runtime": 31.4699, "eval_samples_per_second": 206.006, "eval_steps_per_second": 2.161, "step": 143000 }, { "epoch": 1.04, "learning_rate": 9.658970909459128e-06, "loss": 2.7582, "step": 143100 }, { "epoch": 1.04, "learning_rate": 9.651738054940764e-06, "loss": 2.7627, "step": 143200 }, { "epoch": 1.04, "learning_rate": 9.6445052004224e-06, "loss": 2.7568, "step": 143300 }, { "epoch": 1.04, "learning_rate": 9.637272345904036e-06, "loss": 2.7529, "step": 143400 }, { "epoch": 1.04, "learning_rate": 9.63003949138567e-06, "loss": 2.7641, "step": 143500 }, { "epoch": 1.04, "learning_rate": 9.622806636867307e-06, "loss": 2.7539, "step": 143600 }, { "epoch": 1.04, "learning_rate": 9.615573782348942e-06, "loss": 2.7585, "step": 143700 }, { "epoch": 1.04, "learning_rate": 9.608340927830578e-06, "loss": 2.7628, "step": 143800 }, { "epoch": 1.04, "learning_rate": 9.601108073312215e-06, "loss": 2.763, "step": 143900 }, { "epoch": 1.04, "learning_rate": 9.59387521879385e-06, "loss": 2.751, "step": 144000 }, { "epoch": 1.04, "eval_accuracy": 0.45885679892811665, "eval_loss": 2.778137683868408, "eval_runtime": 32.5333, "eval_samples_per_second": 199.273, "eval_steps_per_second": 2.09, "step": 144000 }, { "epoch": 1.04, "learning_rate": 9.586642364275486e-06, "loss": 2.7642, "step": 144100 }, { "epoch": 1.04, "learning_rate": 9.579409509757121e-06, "loss": 2.7635, "step": 144200 }, { "epoch": 1.04, "learning_rate": 9.572176655238758e-06, "loss": 2.7698, "step": 144300 }, { "epoch": 1.04, "learning_rate": 9.564943800720392e-06, "loss": 2.7617, "step": 144400 }, { "epoch": 1.05, "learning_rate": 9.557710946202029e-06, "loss": 2.7541, "step": 144500 }, { "epoch": 1.05, "learning_rate": 9.550478091683665e-06, "loss": 2.7584, "step": 144600 }, { "epoch": 1.05, "learning_rate": 9.5432452371653e-06, "loss": 2.7594, "step": 144700 }, { "epoch": 1.05, "learning_rate": 9.536012382646937e-06, "loss": 2.7506, "step": 144800 }, { "epoch": 1.05, "learning_rate": 9.528779528128572e-06, "loss": 2.7573, "step": 144900 }, { "epoch": 1.05, "learning_rate": 9.521546673610208e-06, "loss": 2.7589, "step": 145000 }, { "epoch": 1.05, "eval_accuracy": 0.45895902707836184, "eval_loss": 2.7777721881866455, "eval_runtime": 29.5823, "eval_samples_per_second": 219.151, "eval_steps_per_second": 2.299, "step": 145000 }, { "epoch": 1.05, "learning_rate": 9.514313819091845e-06, "loss": 2.7514, "step": 145100 }, { "epoch": 1.05, "learning_rate": 9.50708096457348e-06, "loss": 2.7523, "step": 145200 }, { "epoch": 1.05, "learning_rate": 9.499848110055116e-06, "loss": 2.7626, "step": 145300 }, { "epoch": 1.05, "learning_rate": 9.49261525553675e-06, "loss": 2.7541, "step": 145400 }, { "epoch": 1.05, "learning_rate": 9.485382401018387e-06, "loss": 2.7624, "step": 145500 }, { "epoch": 1.05, "learning_rate": 9.478221875045205e-06, "loss": 2.7604, "step": 145600 }, { "epoch": 1.05, "learning_rate": 9.470989020526842e-06, "loss": 2.7576, "step": 145700 }, { "epoch": 1.05, "learning_rate": 9.463828494553662e-06, "loss": 2.7547, "step": 145800 }, { "epoch": 1.06, "learning_rate": 9.456595640035298e-06, "loss": 2.7526, "step": 145900 }, { "epoch": 1.06, "learning_rate": 9.449362785516933e-06, "loss": 2.7459, "step": 146000 }, { "epoch": 1.06, "eval_accuracy": 0.4589529780753887, "eval_loss": 2.777561902999878, "eval_runtime": 29.3866, "eval_samples_per_second": 220.611, "eval_steps_per_second": 2.314, "step": 146000 }, { "epoch": 1.06, "learning_rate": 9.442129930998568e-06, "loss": 2.7579, "step": 146100 }, { "epoch": 1.06, "learning_rate": 9.434897076480204e-06, "loss": 2.7543, "step": 146200 }, { "epoch": 1.06, "learning_rate": 9.427664221961839e-06, "loss": 2.7608, "step": 146300 }, { "epoch": 1.06, "learning_rate": 9.420431367443476e-06, "loss": 2.7595, "step": 146400 }, { "epoch": 1.06, "learning_rate": 9.413198512925112e-06, "loss": 2.7531, "step": 146500 }, { "epoch": 1.06, "learning_rate": 9.405965658406747e-06, "loss": 2.7576, "step": 146600 }, { "epoch": 1.06, "learning_rate": 9.398732803888383e-06, "loss": 2.7546, "step": 146700 }, { "epoch": 1.06, "learning_rate": 9.391499949370018e-06, "loss": 2.7465, "step": 146800 }, { "epoch": 1.06, "learning_rate": 9.384267094851655e-06, "loss": 2.7597, "step": 146900 }, { "epoch": 1.06, "learning_rate": 9.37703424033329e-06, "loss": 2.7646, "step": 147000 }, { "epoch": 1.06, "eval_accuracy": 0.4591423118684463, "eval_loss": 2.7770681381225586, "eval_runtime": 29.9251, "eval_samples_per_second": 216.641, "eval_steps_per_second": 2.272, "step": 147000 }, { "epoch": 1.06, "learning_rate": 9.369801385814926e-06, "loss": 2.7594, "step": 147100 }, { "epoch": 1.06, "learning_rate": 9.362568531296563e-06, "loss": 2.7502, "step": 147200 }, { "epoch": 1.07, "learning_rate": 9.355335676778197e-06, "loss": 2.7557, "step": 147300 }, { "epoch": 1.07, "learning_rate": 9.348102822259834e-06, "loss": 2.7589, "step": 147400 }, { "epoch": 1.07, "learning_rate": 9.340942296286654e-06, "loss": 2.7468, "step": 147500 }, { "epoch": 1.07, "learning_rate": 9.33370944176829e-06, "loss": 2.7484, "step": 147600 }, { "epoch": 1.07, "learning_rate": 9.326476587249925e-06, "loss": 2.7559, "step": 147700 }, { "epoch": 1.07, "learning_rate": 9.319243732731561e-06, "loss": 2.7502, "step": 147800 }, { "epoch": 1.07, "learning_rate": 9.312010878213196e-06, "loss": 2.7512, "step": 147900 }, { "epoch": 1.07, "learning_rate": 9.304778023694833e-06, "loss": 2.7529, "step": 148000 }, { "epoch": 1.07, "eval_accuracy": 0.4589487437733076, "eval_loss": 2.7767789363861084, "eval_runtime": 29.4704, "eval_samples_per_second": 219.984, "eval_steps_per_second": 2.307, "step": 148000 }, { "epoch": 1.07, "learning_rate": 9.297545169176468e-06, "loss": 2.7583, "step": 148100 }, { "epoch": 1.07, "learning_rate": 9.290312314658104e-06, "loss": 2.7579, "step": 148200 }, { "epoch": 1.07, "learning_rate": 9.283151788684924e-06, "loss": 2.7561, "step": 148300 }, { "epoch": 1.07, "learning_rate": 9.275918934166559e-06, "loss": 2.7596, "step": 148400 }, { "epoch": 1.07, "learning_rate": 9.268686079648195e-06, "loss": 2.7601, "step": 148500 }, { "epoch": 1.07, "learning_rate": 9.26145322512983e-06, "loss": 2.7478, "step": 148600 }, { "epoch": 1.08, "learning_rate": 9.254220370611467e-06, "loss": 2.7583, "step": 148700 }, { "epoch": 1.08, "learning_rate": 9.246987516093101e-06, "loss": 2.7545, "step": 148800 }, { "epoch": 1.08, "learning_rate": 9.239754661574738e-06, "loss": 2.7507, "step": 148900 }, { "epoch": 1.08, "learning_rate": 9.232521807056374e-06, "loss": 2.7573, "step": 149000 }, { "epoch": 1.08, "eval_accuracy": 0.4591822352880686, "eval_loss": 2.7764034271240234, "eval_runtime": 29.9267, "eval_samples_per_second": 216.63, "eval_steps_per_second": 2.272, "step": 149000 }, { "epoch": 1.08, "learning_rate": 9.22528895253801e-06, "loss": 2.7514, "step": 149100 }, { "epoch": 1.08, "learning_rate": 9.218056098019646e-06, "loss": 2.7569, "step": 149200 }, { "epoch": 1.08, "learning_rate": 9.21082324350128e-06, "loss": 2.757, "step": 149300 }, { "epoch": 1.08, "learning_rate": 9.203590388982917e-06, "loss": 2.7477, "step": 149400 }, { "epoch": 1.08, "learning_rate": 9.196357534464554e-06, "loss": 2.7616, "step": 149500 }, { "epoch": 1.08, "learning_rate": 9.189124679946188e-06, "loss": 2.7647, "step": 149600 }, { "epoch": 1.08, "learning_rate": 9.181891825427825e-06, "loss": 2.7509, "step": 149700 }, { "epoch": 1.08, "learning_rate": 9.174731299454643e-06, "loss": 2.7546, "step": 149800 }, { "epoch": 1.08, "learning_rate": 9.16749844493628e-06, "loss": 2.7492, "step": 149900 }, { "epoch": 1.08, "learning_rate": 9.160265590417914e-06, "loss": 2.754, "step": 150000 }, { "epoch": 1.08, "eval_accuracy": 0.459124164859527, "eval_loss": 2.7761712074279785, "eval_runtime": 30.2068, "eval_samples_per_second": 214.621, "eval_steps_per_second": 2.251, "step": 150000 }, { "epoch": 1.09, "learning_rate": 9.15303273589955e-06, "loss": 2.7537, "step": 150100 }, { "epoch": 1.09, "learning_rate": 9.145799881381187e-06, "loss": 2.753, "step": 150200 }, { "epoch": 1.09, "learning_rate": 9.138567026862822e-06, "loss": 2.7594, "step": 150300 }, { "epoch": 1.09, "learning_rate": 9.131334172344459e-06, "loss": 2.7595, "step": 150400 }, { "epoch": 1.09, "learning_rate": 9.124101317826093e-06, "loss": 2.7532, "step": 150500 }, { "epoch": 1.09, "learning_rate": 9.11686846330773e-06, "loss": 2.7571, "step": 150600 }, { "epoch": 1.09, "learning_rate": 9.109635608789365e-06, "loss": 2.7499, "step": 150700 }, { "epoch": 1.09, "learning_rate": 9.102475082816185e-06, "loss": 2.7486, "step": 150800 }, { "epoch": 1.09, "learning_rate": 9.095242228297821e-06, "loss": 2.7537, "step": 150900 }, { "epoch": 1.09, "learning_rate": 9.088009373779456e-06, "loss": 2.7553, "step": 151000 }, { "epoch": 1.09, "eval_accuracy": 0.45908545124049926, "eval_loss": 2.7759199142456055, "eval_runtime": 31.1913, "eval_samples_per_second": 207.846, "eval_steps_per_second": 2.18, "step": 151000 }, { "epoch": 1.09, "learning_rate": 9.080776519261092e-06, "loss": 2.7495, "step": 151100 }, { "epoch": 1.09, "learning_rate": 9.073543664742727e-06, "loss": 2.7562, "step": 151200 }, { "epoch": 1.09, "learning_rate": 9.066310810224364e-06, "loss": 2.7567, "step": 151300 }, { "epoch": 1.1, "learning_rate": 9.059077955705999e-06, "loss": 2.7532, "step": 151400 }, { "epoch": 1.1, "learning_rate": 9.051845101187635e-06, "loss": 2.7485, "step": 151500 }, { "epoch": 1.1, "learning_rate": 9.044612246669272e-06, "loss": 2.7502, "step": 151600 }, { "epoch": 1.1, "learning_rate": 9.037379392150906e-06, "loss": 2.756, "step": 151700 }, { "epoch": 1.1, "learning_rate": 9.030146537632543e-06, "loss": 2.7559, "step": 151800 }, { "epoch": 1.1, "learning_rate": 9.022913683114178e-06, "loss": 2.753, "step": 151900 }, { "epoch": 1.1, "learning_rate": 9.015680828595814e-06, "loss": 2.7485, "step": 152000 }, { "epoch": 1.1, "eval_accuracy": 0.4592844634383138, "eval_loss": 2.7755496501922607, "eval_runtime": 34.9091, "eval_samples_per_second": 185.711, "eval_steps_per_second": 1.948, "step": 152000 }, { "epoch": 1.1, "learning_rate": 9.00844797407745e-06, "loss": 2.7569, "step": 152100 }, { "epoch": 1.1, "learning_rate": 9.001215119559086e-06, "loss": 2.7532, "step": 152200 }, { "epoch": 1.1, "learning_rate": 8.993982265040722e-06, "loss": 2.7619, "step": 152300 }, { "epoch": 1.1, "learning_rate": 8.986749410522357e-06, "loss": 2.754, "step": 152400 }, { "epoch": 1.1, "learning_rate": 8.979516556003993e-06, "loss": 2.7528, "step": 152500 }, { "epoch": 1.1, "learning_rate": 8.972283701485628e-06, "loss": 2.761, "step": 152600 }, { "epoch": 1.1, "learning_rate": 8.965050846967265e-06, "loss": 2.7578, "step": 152700 }, { "epoch": 1.11, "learning_rate": 8.957817992448901e-06, "loss": 2.7531, "step": 152800 }, { "epoch": 1.11, "learning_rate": 8.950585137930536e-06, "loss": 2.7593, "step": 152900 }, { "epoch": 1.11, "learning_rate": 8.943352283412173e-06, "loss": 2.7558, "step": 153000 }, { "epoch": 1.11, "eval_accuracy": 0.45928869774039494, "eval_loss": 2.7751994132995605, "eval_runtime": 29.596, "eval_samples_per_second": 219.05, "eval_steps_per_second": 2.298, "step": 153000 }, { "epoch": 1.11, "learning_rate": 8.936119428893807e-06, "loss": 2.7529, "step": 153100 }, { "epoch": 1.11, "learning_rate": 8.928886574375444e-06, "loss": 2.7601, "step": 153200 }, { "epoch": 1.11, "learning_rate": 8.92165371985708e-06, "loss": 2.766, "step": 153300 }, { "epoch": 1.11, "learning_rate": 8.914420865338715e-06, "loss": 2.7564, "step": 153400 }, { "epoch": 1.11, "learning_rate": 8.907188010820352e-06, "loss": 2.7583, "step": 153500 }, { "epoch": 1.11, "learning_rate": 8.899955156301987e-06, "loss": 2.7546, "step": 153600 }, { "epoch": 1.11, "learning_rate": 8.892722301783623e-06, "loss": 2.752, "step": 153700 }, { "epoch": 1.11, "learning_rate": 8.885489447265258e-06, "loss": 2.7557, "step": 153800 }, { "epoch": 1.11, "learning_rate": 8.878256592746894e-06, "loss": 2.7539, "step": 153900 }, { "epoch": 1.11, "learning_rate": 8.871023738228531e-06, "loss": 2.7563, "step": 154000 }, { "epoch": 1.11, "eval_accuracy": 0.459253613523151, "eval_loss": 2.774827003479004, "eval_runtime": 31.2857, "eval_samples_per_second": 207.22, "eval_steps_per_second": 2.174, "step": 154000 }, { "epoch": 1.11, "learning_rate": 8.863790883710166e-06, "loss": 2.7591, "step": 154100 }, { "epoch": 1.12, "learning_rate": 8.856558029191802e-06, "loss": 2.7548, "step": 154200 }, { "epoch": 1.12, "learning_rate": 8.84939750321862e-06, "loss": 2.7506, "step": 154300 }, { "epoch": 1.12, "learning_rate": 8.842164648700257e-06, "loss": 2.7549, "step": 154400 }, { "epoch": 1.12, "learning_rate": 8.834931794181892e-06, "loss": 2.7589, "step": 154500 }, { "epoch": 1.12, "learning_rate": 8.827698939663528e-06, "loss": 2.7581, "step": 154600 }, { "epoch": 1.12, "learning_rate": 8.820466085145165e-06, "loss": 2.7543, "step": 154700 }, { "epoch": 1.12, "learning_rate": 8.8132332306268e-06, "loss": 2.7567, "step": 154800 }, { "epoch": 1.12, "learning_rate": 8.806000376108436e-06, "loss": 2.7518, "step": 154900 }, { "epoch": 1.12, "learning_rate": 8.798767521590071e-06, "loss": 2.7557, "step": 155000 }, { "epoch": 1.12, "eval_accuracy": 0.4593570514739908, "eval_loss": 2.774669647216797, "eval_runtime": 29.7619, "eval_samples_per_second": 217.829, "eval_steps_per_second": 2.285, "step": 155000 }, { "epoch": 1.12, "learning_rate": 8.791534667071707e-06, "loss": 2.7495, "step": 155100 }, { "epoch": 1.12, "learning_rate": 8.784301812553342e-06, "loss": 2.7528, "step": 155200 }, { "epoch": 1.12, "learning_rate": 8.777068958034979e-06, "loss": 2.7503, "step": 155300 }, { "epoch": 1.12, "learning_rate": 8.769836103516615e-06, "loss": 2.7503, "step": 155400 }, { "epoch": 1.12, "learning_rate": 8.762675577543433e-06, "loss": 2.7492, "step": 155500 }, { "epoch": 1.13, "learning_rate": 8.75544272302507e-06, "loss": 2.7544, "step": 155600 }, { "epoch": 1.13, "learning_rate": 8.748209868506705e-06, "loss": 2.7547, "step": 155700 }, { "epoch": 1.13, "learning_rate": 8.740977013988341e-06, "loss": 2.7446, "step": 155800 }, { "epoch": 1.13, "learning_rate": 8.733744159469978e-06, "loss": 2.753, "step": 155900 }, { "epoch": 1.13, "learning_rate": 8.726511304951612e-06, "loss": 2.7593, "step": 156000 }, { "epoch": 1.13, "eval_accuracy": 0.4591931234934202, "eval_loss": 2.7744040489196777, "eval_runtime": 29.5223, "eval_samples_per_second": 219.597, "eval_steps_per_second": 2.303, "step": 156000 }, { "epoch": 1.13, "learning_rate": 8.719278450433249e-06, "loss": 2.758, "step": 156100 }, { "epoch": 1.13, "learning_rate": 8.712045595914884e-06, "loss": 2.761, "step": 156200 }, { "epoch": 1.13, "learning_rate": 8.70481274139652e-06, "loss": 2.756, "step": 156300 }, { "epoch": 1.13, "learning_rate": 8.697579886878155e-06, "loss": 2.7484, "step": 156400 }, { "epoch": 1.13, "learning_rate": 8.690347032359792e-06, "loss": 2.7491, "step": 156500 }, { "epoch": 1.13, "learning_rate": 8.683114177841428e-06, "loss": 2.7551, "step": 156600 }, { "epoch": 1.13, "learning_rate": 8.675881323323063e-06, "loss": 2.7515, "step": 156700 }, { "epoch": 1.13, "learning_rate": 8.6686484688047e-06, "loss": 2.7541, "step": 156800 }, { "epoch": 1.13, "learning_rate": 8.66148794283152e-06, "loss": 2.7546, "step": 156900 }, { "epoch": 1.14, "learning_rate": 8.654255088313154e-06, "loss": 2.752, "step": 157000 }, { "epoch": 1.14, "eval_accuracy": 0.4592584527255295, "eval_loss": 2.774146795272827, "eval_runtime": 30.0141, "eval_samples_per_second": 215.999, "eval_steps_per_second": 2.266, "step": 157000 }, { "epoch": 1.14, "learning_rate": 8.64702223379479e-06, "loss": 2.7488, "step": 157100 }, { "epoch": 1.14, "learning_rate": 8.639789379276427e-06, "loss": 2.7539, "step": 157200 }, { "epoch": 1.14, "learning_rate": 8.632556524758062e-06, "loss": 2.7552, "step": 157300 }, { "epoch": 1.14, "learning_rate": 8.625323670239697e-06, "loss": 2.7566, "step": 157400 }, { "epoch": 1.14, "learning_rate": 8.618090815721333e-06, "loss": 2.7454, "step": 157500 }, { "epoch": 1.14, "learning_rate": 8.610930289748153e-06, "loss": 2.7527, "step": 157600 }, { "epoch": 1.14, "learning_rate": 8.60369743522979e-06, "loss": 2.7558, "step": 157700 }, { "epoch": 1.14, "learning_rate": 8.596464580711424e-06, "loss": 2.7579, "step": 157800 }, { "epoch": 1.14, "learning_rate": 8.58923172619306e-06, "loss": 2.7495, "step": 157900 }, { "epoch": 1.14, "learning_rate": 8.581998871674696e-06, "loss": 2.748, "step": 158000 }, { "epoch": 1.14, "eval_accuracy": 0.4593262015588281, "eval_loss": 2.773747205734253, "eval_runtime": 30.6235, "eval_samples_per_second": 211.7, "eval_steps_per_second": 2.221, "step": 158000 }, { "epoch": 1.14, "learning_rate": 8.574766017156332e-06, "loss": 2.7465, "step": 158100 }, { "epoch": 1.14, "learning_rate": 8.567533162637967e-06, "loss": 2.7549, "step": 158200 }, { "epoch": 1.14, "learning_rate": 8.560300308119603e-06, "loss": 2.7489, "step": 158300 }, { "epoch": 1.15, "learning_rate": 8.55306745360124e-06, "loss": 2.7508, "step": 158400 }, { "epoch": 1.15, "learning_rate": 8.545834599082875e-06, "loss": 2.7512, "step": 158500 }, { "epoch": 1.15, "learning_rate": 8.538601744564511e-06, "loss": 2.7481, "step": 158600 }, { "epoch": 1.15, "learning_rate": 8.531368890046146e-06, "loss": 2.7517, "step": 158700 }, { "epoch": 1.15, "learning_rate": 8.524136035527783e-06, "loss": 2.7504, "step": 158800 }, { "epoch": 1.15, "learning_rate": 8.516903181009417e-06, "loss": 2.7538, "step": 158900 }, { "epoch": 1.15, "learning_rate": 8.509670326491054e-06, "loss": 2.7549, "step": 159000 }, { "epoch": 1.15, "eval_accuracy": 0.4593860866882616, "eval_loss": 2.773451566696167, "eval_runtime": 27.8905, "eval_samples_per_second": 232.445, "eval_steps_per_second": 2.438, "step": 159000 }, { "epoch": 1.15, "learning_rate": 8.50243747197269e-06, "loss": 2.7499, "step": 159100 }, { "epoch": 1.15, "learning_rate": 8.495204617454325e-06, "loss": 2.7457, "step": 159200 }, { "epoch": 1.15, "learning_rate": 8.487971762935962e-06, "loss": 2.7566, "step": 159300 }, { "epoch": 1.15, "learning_rate": 8.480738908417597e-06, "loss": 2.7566, "step": 159400 }, { "epoch": 1.15, "learning_rate": 8.473506053899233e-06, "loss": 2.7539, "step": 159500 }, { "epoch": 1.15, "learning_rate": 8.46627319938087e-06, "loss": 2.7471, "step": 159600 }, { "epoch": 1.16, "learning_rate": 8.459040344862503e-06, "loss": 2.7586, "step": 159700 }, { "epoch": 1.16, "learning_rate": 8.45180749034414e-06, "loss": 2.7493, "step": 159800 }, { "epoch": 1.16, "learning_rate": 8.444574635825776e-06, "loss": 2.7554, "step": 159900 }, { "epoch": 1.16, "learning_rate": 8.437414109852596e-06, "loss": 2.7455, "step": 160000 }, { "epoch": 1.16, "eval_accuracy": 0.45956574207656226, "eval_loss": 2.7732744216918945, "eval_runtime": 30.512, "eval_samples_per_second": 212.474, "eval_steps_per_second": 2.229, "step": 160000 }, { "epoch": 1.16, "learning_rate": 8.43018125533423e-06, "loss": 2.7551, "step": 160100 }, { "epoch": 1.16, "learning_rate": 8.422948400815867e-06, "loss": 2.7513, "step": 160200 }, { "epoch": 1.16, "learning_rate": 8.415715546297503e-06, "loss": 2.7594, "step": 160300 }, { "epoch": 1.16, "learning_rate": 8.408555020324321e-06, "loss": 2.7598, "step": 160400 }, { "epoch": 1.16, "learning_rate": 8.401322165805958e-06, "loss": 2.7547, "step": 160500 }, { "epoch": 1.16, "learning_rate": 8.394089311287593e-06, "loss": 2.7433, "step": 160600 }, { "epoch": 1.16, "learning_rate": 8.38685645676923e-06, "loss": 2.7512, "step": 160700 }, { "epoch": 1.16, "learning_rate": 8.379623602250864e-06, "loss": 2.7544, "step": 160800 }, { "epoch": 1.16, "learning_rate": 8.3723907477325e-06, "loss": 2.7475, "step": 160900 }, { "epoch": 1.16, "learning_rate": 8.365157893214137e-06, "loss": 2.7582, "step": 161000 }, { "epoch": 1.16, "eval_accuracy": 0.45939636999331585, "eval_loss": 2.7731149196624756, "eval_runtime": 31.8214, "eval_samples_per_second": 203.731, "eval_steps_per_second": 2.137, "step": 161000 }, { "epoch": 1.17, "learning_rate": 8.357925038695772e-06, "loss": 2.747, "step": 161100 }, { "epoch": 1.17, "learning_rate": 8.350692184177408e-06, "loss": 2.7507, "step": 161200 }, { "epoch": 1.17, "learning_rate": 8.343459329659043e-06, "loss": 2.7441, "step": 161300 }, { "epoch": 1.17, "learning_rate": 8.33622647514068e-06, "loss": 2.7502, "step": 161400 }, { "epoch": 1.17, "learning_rate": 8.328993620622315e-06, "loss": 2.7447, "step": 161500 }, { "epoch": 1.17, "learning_rate": 8.321760766103951e-06, "loss": 2.7473, "step": 161600 }, { "epoch": 1.17, "learning_rate": 8.314527911585588e-06, "loss": 2.7443, "step": 161700 }, { "epoch": 1.17, "learning_rate": 8.307295057067222e-06, "loss": 2.7603, "step": 161800 }, { "epoch": 1.17, "learning_rate": 8.300062202548859e-06, "loss": 2.7555, "step": 161900 }, { "epoch": 1.17, "learning_rate": 8.292829348030494e-06, "loss": 2.7532, "step": 162000 }, { "epoch": 1.17, "eval_accuracy": 0.45951795495307485, "eval_loss": 2.7727766036987305, "eval_runtime": 30.0745, "eval_samples_per_second": 215.565, "eval_steps_per_second": 2.261, "step": 162000 }, { "epoch": 1.17, "learning_rate": 8.28559649351213e-06, "loss": 2.7477, "step": 162100 }, { "epoch": 1.17, "learning_rate": 8.278435967538948e-06, "loss": 2.7539, "step": 162200 }, { "epoch": 1.17, "learning_rate": 8.271203113020585e-06, "loss": 2.7502, "step": 162300 }, { "epoch": 1.17, "learning_rate": 8.263970258502221e-06, "loss": 2.752, "step": 162400 }, { "epoch": 1.18, "learning_rate": 8.256737403983856e-06, "loss": 2.7551, "step": 162500 }, { "epoch": 1.18, "learning_rate": 8.249504549465493e-06, "loss": 2.7514, "step": 162600 }, { "epoch": 1.18, "learning_rate": 8.242271694947128e-06, "loss": 2.7475, "step": 162700 }, { "epoch": 1.18, "learning_rate": 8.235038840428764e-06, "loss": 2.751, "step": 162800 }, { "epoch": 1.18, "learning_rate": 8.2278059859104e-06, "loss": 2.7475, "step": 162900 }, { "epoch": 1.18, "learning_rate": 8.220573131392035e-06, "loss": 2.7496, "step": 163000 }, { "epoch": 1.18, "eval_accuracy": 0.4595270284575345, "eval_loss": 2.772428512573242, "eval_runtime": 30.2915, "eval_samples_per_second": 214.021, "eval_steps_per_second": 2.245, "step": 163000 }, { "epoch": 1.18, "learning_rate": 8.213340276873672e-06, "loss": 2.7493, "step": 163100 }, { "epoch": 1.18, "learning_rate": 8.206107422355307e-06, "loss": 2.7641, "step": 163200 }, { "epoch": 1.18, "learning_rate": 8.198874567836943e-06, "loss": 2.7511, "step": 163300 }, { "epoch": 1.18, "learning_rate": 8.191641713318578e-06, "loss": 2.7539, "step": 163400 }, { "epoch": 1.18, "learning_rate": 8.184408858800215e-06, "loss": 2.7525, "step": 163500 }, { "epoch": 1.18, "learning_rate": 8.177176004281851e-06, "loss": 2.7518, "step": 163600 }, { "epoch": 1.18, "learning_rate": 8.169943149763486e-06, "loss": 2.7566, "step": 163700 }, { "epoch": 1.18, "learning_rate": 8.162782623790306e-06, "loss": 2.752, "step": 163800 }, { "epoch": 1.19, "learning_rate": 8.155549769271942e-06, "loss": 2.7538, "step": 163900 }, { "epoch": 1.19, "learning_rate": 8.148316914753577e-06, "loss": 2.75, "step": 164000 }, { "epoch": 1.19, "eval_accuracy": 0.45964437911521233, "eval_loss": 2.7721121311187744, "eval_runtime": 29.4536, "eval_samples_per_second": 220.109, "eval_steps_per_second": 2.309, "step": 164000 }, { "epoch": 1.19, "learning_rate": 8.141084060235212e-06, "loss": 2.7486, "step": 164100 }, { "epoch": 1.19, "learning_rate": 8.133851205716848e-06, "loss": 2.7547, "step": 164200 }, { "epoch": 1.19, "learning_rate": 8.126618351198485e-06, "loss": 2.749, "step": 164300 }, { "epoch": 1.19, "learning_rate": 8.11938549668012e-06, "loss": 2.7498, "step": 164400 }, { "epoch": 1.19, "learning_rate": 8.11222497070694e-06, "loss": 2.7537, "step": 164500 }, { "epoch": 1.19, "learning_rate": 8.104992116188576e-06, "loss": 2.754, "step": 164600 }, { "epoch": 1.19, "learning_rate": 8.097759261670212e-06, "loss": 2.7577, "step": 164700 }, { "epoch": 1.19, "learning_rate": 8.090526407151847e-06, "loss": 2.7514, "step": 164800 }, { "epoch": 1.19, "learning_rate": 8.083293552633484e-06, "loss": 2.755, "step": 164900 }, { "epoch": 1.19, "learning_rate": 8.076060698115119e-06, "loss": 2.7517, "step": 165000 }, { "epoch": 1.19, "eval_accuracy": 0.45970123974315935, "eval_loss": 2.7717862129211426, "eval_runtime": 29.3512, "eval_samples_per_second": 220.877, "eval_steps_per_second": 2.317, "step": 165000 }, { "epoch": 1.19, "learning_rate": 8.068827843596755e-06, "loss": 2.7543, "step": 165100 }, { "epoch": 1.19, "learning_rate": 8.06159498907839e-06, "loss": 2.744, "step": 165200 }, { "epoch": 1.2, "learning_rate": 8.054362134560026e-06, "loss": 2.7539, "step": 165300 }, { "epoch": 1.2, "learning_rate": 8.047129280041663e-06, "loss": 2.7509, "step": 165400 }, { "epoch": 1.2, "learning_rate": 8.039896425523298e-06, "loss": 2.7535, "step": 165500 }, { "epoch": 1.2, "learning_rate": 8.032663571004934e-06, "loss": 2.7485, "step": 165600 }, { "epoch": 1.2, "learning_rate": 8.025430716486569e-06, "loss": 2.7493, "step": 165700 }, { "epoch": 1.2, "learning_rate": 8.018197861968206e-06, "loss": 2.7477, "step": 165800 }, { "epoch": 1.2, "learning_rate": 8.010965007449842e-06, "loss": 2.7504, "step": 165900 }, { "epoch": 1.2, "learning_rate": 8.003732152931477e-06, "loss": 2.7522, "step": 166000 }, { "epoch": 1.2, "eval_accuracy": 0.4596595016226451, "eval_loss": 2.7715861797332764, "eval_runtime": 29.3689, "eval_samples_per_second": 220.744, "eval_steps_per_second": 2.315, "step": 166000 }, { "epoch": 1.2, "learning_rate": 7.996499298413113e-06, "loss": 2.7494, "step": 166100 }, { "epoch": 1.2, "learning_rate": 7.989266443894748e-06, "loss": 2.7555, "step": 166200 }, { "epoch": 1.2, "learning_rate": 7.982033589376383e-06, "loss": 2.7561, "step": 166300 }, { "epoch": 1.2, "learning_rate": 7.974873063403203e-06, "loss": 2.7441, "step": 166400 }, { "epoch": 1.2, "learning_rate": 7.96764020888484e-06, "loss": 2.7574, "step": 166500 }, { "epoch": 1.2, "learning_rate": 7.960407354366476e-06, "loss": 2.7528, "step": 166600 }, { "epoch": 1.21, "learning_rate": 7.95317449984811e-06, "loss": 2.7582, "step": 166700 }, { "epoch": 1.21, "learning_rate": 7.945941645329747e-06, "loss": 2.7543, "step": 166800 }, { "epoch": 1.21, "learning_rate": 7.938708790811382e-06, "loss": 2.7501, "step": 166900 }, { "epoch": 1.21, "learning_rate": 7.931475936293019e-06, "loss": 2.7514, "step": 167000 }, { "epoch": 1.21, "eval_accuracy": 0.45986395792313534, "eval_loss": 2.771327495574951, "eval_runtime": 32.3724, "eval_samples_per_second": 200.263, "eval_steps_per_second": 2.101, "step": 167000 }, { "epoch": 1.21, "learning_rate": 7.924243081774653e-06, "loss": 2.7594, "step": 167100 }, { "epoch": 1.21, "learning_rate": 7.91701022725629e-06, "loss": 2.7553, "step": 167200 }, { "epoch": 1.21, "learning_rate": 7.909777372737926e-06, "loss": 2.7411, "step": 167300 }, { "epoch": 1.21, "learning_rate": 7.902544518219561e-06, "loss": 2.7499, "step": 167400 }, { "epoch": 1.21, "learning_rate": 7.895311663701198e-06, "loss": 2.7544, "step": 167500 }, { "epoch": 1.21, "learning_rate": 7.888078809182833e-06, "loss": 2.7509, "step": 167600 }, { "epoch": 1.21, "learning_rate": 7.880845954664469e-06, "loss": 2.7516, "step": 167700 }, { "epoch": 1.21, "learning_rate": 7.873613100146106e-06, "loss": 2.7502, "step": 167800 }, { "epoch": 1.21, "learning_rate": 7.86638024562774e-06, "loss": 2.7581, "step": 167900 }, { "epoch": 1.22, "learning_rate": 7.859147391109377e-06, "loss": 2.7515, "step": 168000 }, { "epoch": 1.22, "eval_accuracy": 0.4597810865824041, "eval_loss": 2.7710556983947754, "eval_runtime": 30.9587, "eval_samples_per_second": 209.408, "eval_steps_per_second": 2.196, "step": 168000 }, { "epoch": 1.22, "learning_rate": 7.851914536591012e-06, "loss": 2.75, "step": 168100 }, { "epoch": 1.22, "learning_rate": 7.844681682072648e-06, "loss": 2.7496, "step": 168200 }, { "epoch": 1.22, "learning_rate": 7.837448827554283e-06, "loss": 2.747, "step": 168300 }, { "epoch": 1.22, "learning_rate": 7.83021597303592e-06, "loss": 2.7516, "step": 168400 }, { "epoch": 1.22, "learning_rate": 7.822983118517556e-06, "loss": 2.7546, "step": 168500 }, { "epoch": 1.22, "learning_rate": 7.81575026399919e-06, "loss": 2.7462, "step": 168600 }, { "epoch": 1.22, "learning_rate": 7.808517409480826e-06, "loss": 2.7511, "step": 168700 }, { "epoch": 1.22, "learning_rate": 7.801284554962462e-06, "loss": 2.7571, "step": 168800 }, { "epoch": 1.22, "learning_rate": 7.794051700444097e-06, "loss": 2.7533, "step": 168900 }, { "epoch": 1.22, "learning_rate": 7.786818845925734e-06, "loss": 2.7493, "step": 169000 }, { "epoch": 1.22, "eval_accuracy": 0.45982221980262106, "eval_loss": 2.7707936763763428, "eval_runtime": 30.5041, "eval_samples_per_second": 212.529, "eval_steps_per_second": 2.229, "step": 169000 }, { "epoch": 1.22, "learning_rate": 7.779585991407368e-06, "loss": 2.7488, "step": 169100 }, { "epoch": 1.22, "learning_rate": 7.772353136889005e-06, "loss": 2.7433, "step": 169200 }, { "epoch": 1.22, "learning_rate": 7.765120282370641e-06, "loss": 2.7549, "step": 169300 }, { "epoch": 1.23, "learning_rate": 7.757887427852276e-06, "loss": 2.7575, "step": 169400 }, { "epoch": 1.23, "learning_rate": 7.750726901879096e-06, "loss": 2.7478, "step": 169500 }, { "epoch": 1.23, "learning_rate": 7.743566375905916e-06, "loss": 2.7501, "step": 169600 }, { "epoch": 1.23, "learning_rate": 7.73633352138755e-06, "loss": 2.7466, "step": 169700 }, { "epoch": 1.23, "learning_rate": 7.729100666869187e-06, "loss": 2.7534, "step": 169800 }, { "epoch": 1.23, "learning_rate": 7.721867812350824e-06, "loss": 2.7458, "step": 169900 }, { "epoch": 1.23, "learning_rate": 7.714634957832458e-06, "loss": 2.7491, "step": 170000 }, { "epoch": 1.23, "eval_accuracy": 0.45979620908983676, "eval_loss": 2.7705278396606445, "eval_runtime": 32.0791, "eval_samples_per_second": 202.094, "eval_steps_per_second": 2.12, "step": 170000 }, { "epoch": 1.23, "learning_rate": 7.707402103314095e-06, "loss": 2.7529, "step": 170100 }, { "epoch": 1.23, "learning_rate": 7.70016924879573e-06, "loss": 2.7448, "step": 170200 }, { "epoch": 1.23, "learning_rate": 7.692936394277366e-06, "loss": 2.7452, "step": 170300 }, { "epoch": 1.23, "learning_rate": 7.685703539759003e-06, "loss": 2.7512, "step": 170400 }, { "epoch": 1.23, "learning_rate": 7.678470685240638e-06, "loss": 2.7513, "step": 170500 }, { "epoch": 1.23, "learning_rate": 7.671237830722274e-06, "loss": 2.7544, "step": 170600 }, { "epoch": 1.23, "learning_rate": 7.664004976203909e-06, "loss": 2.749, "step": 170700 }, { "epoch": 1.24, "learning_rate": 7.656772121685545e-06, "loss": 2.7389, "step": 170800 }, { "epoch": 1.24, "learning_rate": 7.64953926716718e-06, "loss": 2.7474, "step": 170900 }, { "epoch": 1.24, "learning_rate": 7.642306412648817e-06, "loss": 2.7552, "step": 171000 }, { "epoch": 1.24, "eval_accuracy": 0.45987726572967613, "eval_loss": 2.7704155445098877, "eval_runtime": 30.2626, "eval_samples_per_second": 214.224, "eval_steps_per_second": 2.247, "step": 171000 }, { "epoch": 1.24, "learning_rate": 7.635073558130453e-06, "loss": 2.7507, "step": 171100 }, { "epoch": 1.24, "learning_rate": 7.627840703612088e-06, "loss": 2.7527, "step": 171200 }, { "epoch": 1.24, "learning_rate": 7.620607849093724e-06, "loss": 2.7471, "step": 171300 }, { "epoch": 1.24, "learning_rate": 7.61337499457536e-06, "loss": 2.7481, "step": 171400 }, { "epoch": 1.24, "learning_rate": 7.606142140056996e-06, "loss": 2.7542, "step": 171500 }, { "epoch": 1.24, "learning_rate": 7.5989092855386315e-06, "loss": 2.7582, "step": 171600 }, { "epoch": 1.24, "learning_rate": 7.5917487595654504e-06, "loss": 2.7523, "step": 171700 }, { "epoch": 1.24, "learning_rate": 7.58458823359227e-06, "loss": 2.7543, "step": 171800 }, { "epoch": 1.24, "learning_rate": 7.577355379073907e-06, "loss": 2.7501, "step": 171900 }, { "epoch": 1.24, "learning_rate": 7.570122524555542e-06, "loss": 2.7536, "step": 172000 }, { "epoch": 1.24, "eval_accuracy": 0.4599921967861647, "eval_loss": 2.7700235843658447, "eval_runtime": 30.9099, "eval_samples_per_second": 209.739, "eval_steps_per_second": 2.2, "step": 172000 }, { "epoch": 1.24, "learning_rate": 7.562889670037178e-06, "loss": 2.7438, "step": 172100 }, { "epoch": 1.25, "learning_rate": 7.555656815518814e-06, "loss": 2.7532, "step": 172200 }, { "epoch": 1.25, "learning_rate": 7.548423961000449e-06, "loss": 2.7427, "step": 172300 }, { "epoch": 1.25, "learning_rate": 7.541191106482085e-06, "loss": 2.7507, "step": 172400 }, { "epoch": 1.25, "learning_rate": 7.533958251963721e-06, "loss": 2.7479, "step": 172500 }, { "epoch": 1.25, "learning_rate": 7.526725397445357e-06, "loss": 2.7546, "step": 172600 }, { "epoch": 1.25, "learning_rate": 7.519492542926993e-06, "loss": 2.7616, "step": 172700 }, { "epoch": 1.25, "learning_rate": 7.5122596884086285e-06, "loss": 2.752, "step": 172800 }, { "epoch": 1.25, "learning_rate": 7.505026833890263e-06, "loss": 2.7501, "step": 172900 }, { "epoch": 1.25, "learning_rate": 7.497793979371899e-06, "loss": 2.7485, "step": 173000 }, { "epoch": 1.25, "eval_accuracy": 0.4599425949617854, "eval_loss": 2.769742727279663, "eval_runtime": 29.9989, "eval_samples_per_second": 216.108, "eval_steps_per_second": 2.267, "step": 173000 }, { "epoch": 1.25, "learning_rate": 7.490561124853535e-06, "loss": 2.7499, "step": 173100 }, { "epoch": 1.25, "learning_rate": 7.48332827033517e-06, "loss": 2.7605, "step": 173200 }, { "epoch": 1.25, "learning_rate": 7.476095415816806e-06, "loss": 2.7559, "step": 173300 }, { "epoch": 1.25, "learning_rate": 7.4688625612984425e-06, "loss": 2.7446, "step": 173400 }, { "epoch": 1.25, "learning_rate": 7.461629706780078e-06, "loss": 2.7475, "step": 173500 }, { "epoch": 1.26, "learning_rate": 7.454396852261714e-06, "loss": 2.7498, "step": 173600 }, { "epoch": 1.26, "learning_rate": 7.4471639977433495e-06, "loss": 2.7551, "step": 173700 }, { "epoch": 1.26, "learning_rate": 7.439931143224985e-06, "loss": 2.7502, "step": 173800 }, { "epoch": 1.26, "learning_rate": 7.432698288706621e-06, "loss": 2.7391, "step": 173900 }, { "epoch": 1.26, "learning_rate": 7.425465434188257e-06, "loss": 2.7455, "step": 174000 }, { "epoch": 1.26, "eval_accuracy": 0.45988815393502763, "eval_loss": 2.7696611881256104, "eval_runtime": 29.7398, "eval_samples_per_second": 217.991, "eval_steps_per_second": 2.286, "step": 174000 }, { "epoch": 1.26, "learning_rate": 7.418232579669893e-06, "loss": 2.7492, "step": 174100 }, { "epoch": 1.26, "learning_rate": 7.410999725151529e-06, "loss": 2.7532, "step": 174200 }, { "epoch": 1.26, "learning_rate": 7.4038391991783485e-06, "loss": 2.7515, "step": 174300 }, { "epoch": 1.26, "learning_rate": 7.396606344659984e-06, "loss": 2.746, "step": 174400 }, { "epoch": 1.26, "learning_rate": 7.389373490141621e-06, "loss": 2.7493, "step": 174500 }, { "epoch": 1.26, "learning_rate": 7.382140635623256e-06, "loss": 2.7509, "step": 174600 }, { "epoch": 1.26, "learning_rate": 7.374907781104892e-06, "loss": 2.7518, "step": 174700 }, { "epoch": 1.26, "learning_rate": 7.367674926586528e-06, "loss": 2.748, "step": 174800 }, { "epoch": 1.27, "learning_rate": 7.360442072068163e-06, "loss": 2.7488, "step": 174900 }, { "epoch": 1.27, "learning_rate": 7.353209217549799e-06, "loss": 2.7516, "step": 175000 }, { "epoch": 1.27, "eval_accuracy": 0.45990811564483886, "eval_loss": 2.7693846225738525, "eval_runtime": 29.4233, "eval_samples_per_second": 220.336, "eval_steps_per_second": 2.311, "step": 175000 }, { "epoch": 1.27, "learning_rate": 7.346048691576618e-06, "loss": 2.7507, "step": 175100 }, { "epoch": 1.27, "learning_rate": 7.338815837058254e-06, "loss": 2.7448, "step": 175200 }, { "epoch": 1.27, "learning_rate": 7.33158298253989e-06, "loss": 2.7467, "step": 175300 }, { "epoch": 1.27, "learning_rate": 7.324350128021526e-06, "loss": 2.7441, "step": 175400 }, { "epoch": 1.27, "learning_rate": 7.317117273503161e-06, "loss": 2.7504, "step": 175500 }, { "epoch": 1.27, "learning_rate": 7.309884418984797e-06, "loss": 2.753, "step": 175600 }, { "epoch": 1.27, "learning_rate": 7.302651564466433e-06, "loss": 2.7544, "step": 175700 }, { "epoch": 1.27, "learning_rate": 7.295418709948069e-06, "loss": 2.7518, "step": 175800 }, { "epoch": 1.27, "learning_rate": 7.288185855429705e-06, "loss": 2.7521, "step": 175900 }, { "epoch": 1.27, "learning_rate": 7.2809530009113406e-06, "loss": 2.754, "step": 176000 }, { "epoch": 1.27, "eval_accuracy": 0.460049662314409, "eval_loss": 2.7690155506134033, "eval_runtime": 29.3683, "eval_samples_per_second": 220.749, "eval_steps_per_second": 2.315, "step": 176000 }, { "epoch": 1.27, "learning_rate": 7.273720146392976e-06, "loss": 2.7484, "step": 176100 }, { "epoch": 1.27, "learning_rate": 7.266487291874612e-06, "loss": 2.7408, "step": 176200 }, { "epoch": 1.28, "learning_rate": 7.2592544373562476e-06, "loss": 2.7564, "step": 176300 }, { "epoch": 1.28, "learning_rate": 7.2520939113830664e-06, "loss": 2.7572, "step": 176400 }, { "epoch": 1.28, "learning_rate": 7.244861056864703e-06, "loss": 2.754, "step": 176500 }, { "epoch": 1.28, "learning_rate": 7.237628202346339e-06, "loss": 2.7524, "step": 176600 }, { "epoch": 1.28, "learning_rate": 7.230395347827974e-06, "loss": 2.7537, "step": 176700 }, { "epoch": 1.28, "learning_rate": 7.22316249330961e-06, "loss": 2.7523, "step": 176800 }, { "epoch": 1.28, "learning_rate": 7.215929638791246e-06, "loss": 2.7486, "step": 176900 }, { "epoch": 1.28, "learning_rate": 7.208696784272881e-06, "loss": 2.7489, "step": 177000 }, { "epoch": 1.28, "eval_accuracy": 0.4598288737058914, "eval_loss": 2.76901912689209, "eval_runtime": 29.8199, "eval_samples_per_second": 217.405, "eval_steps_per_second": 2.28, "step": 177000 }, { "epoch": 1.28, "learning_rate": 7.201463929754518e-06, "loss": 2.7491, "step": 177100 }, { "epoch": 1.28, "learning_rate": 7.1942310752361535e-06, "loss": 2.7486, "step": 177200 }, { "epoch": 1.28, "learning_rate": 7.186998220717789e-06, "loss": 2.7605, "step": 177300 }, { "epoch": 1.28, "learning_rate": 7.179765366199425e-06, "loss": 2.7506, "step": 177400 }, { "epoch": 1.28, "learning_rate": 7.1725325116810605e-06, "loss": 2.7473, "step": 177500 }, { "epoch": 1.28, "learning_rate": 7.165299657162696e-06, "loss": 2.7484, "step": 177600 }, { "epoch": 1.29, "learning_rate": 7.158066802644333e-06, "loss": 2.7501, "step": 177700 }, { "epoch": 1.29, "learning_rate": 7.150833948125968e-06, "loss": 2.7492, "step": 177800 }, { "epoch": 1.29, "learning_rate": 7.143673422152787e-06, "loss": 2.7521, "step": 177900 }, { "epoch": 1.29, "learning_rate": 7.136440567634423e-06, "loss": 2.7491, "step": 178000 }, { "epoch": 1.29, "eval_accuracy": 0.4601452365613838, "eval_loss": 2.7686147689819336, "eval_runtime": 29.3757, "eval_samples_per_second": 220.693, "eval_steps_per_second": 2.315, "step": 178000 }, { "epoch": 1.29, "learning_rate": 7.1292077131160585e-06, "loss": 2.7487, "step": 178100 }, { "epoch": 1.29, "learning_rate": 7.121974858597694e-06, "loss": 2.748, "step": 178200 }, { "epoch": 1.29, "learning_rate": 7.11474200407933e-06, "loss": 2.7602, "step": 178300 }, { "epoch": 1.29, "learning_rate": 7.107509149560966e-06, "loss": 2.7475, "step": 178400 }, { "epoch": 1.29, "learning_rate": 7.100276295042602e-06, "loss": 2.7534, "step": 178500 }, { "epoch": 1.29, "learning_rate": 7.093043440524238e-06, "loss": 2.7507, "step": 178600 }, { "epoch": 1.29, "learning_rate": 7.085810586005873e-06, "loss": 2.7415, "step": 178700 }, { "epoch": 1.29, "learning_rate": 7.078577731487509e-06, "loss": 2.7544, "step": 178800 }, { "epoch": 1.29, "learning_rate": 7.071344876969145e-06, "loss": 2.7539, "step": 178900 }, { "epoch": 1.29, "learning_rate": 7.064112022450781e-06, "loss": 2.7432, "step": 179000 }, { "epoch": 1.29, "eval_accuracy": 0.46003030550489515, "eval_loss": 2.768362283706665, "eval_runtime": 29.3614, "eval_samples_per_second": 220.8, "eval_steps_per_second": 2.316, "step": 179000 }, { "epoch": 1.3, "learning_rate": 7.056879167932417e-06, "loss": 2.74, "step": 179100 }, { "epoch": 1.3, "learning_rate": 7.0496463134140526e-06, "loss": 2.7498, "step": 179200 }, { "epoch": 1.3, "learning_rate": 7.042413458895688e-06, "loss": 2.7522, "step": 179300 }, { "epoch": 1.3, "learning_rate": 7.035180604377324e-06, "loss": 2.7575, "step": 179400 }, { "epoch": 1.3, "learning_rate": 7.0279477498589596e-06, "loss": 2.7505, "step": 179500 }, { "epoch": 1.3, "learning_rate": 7.020714895340596e-06, "loss": 2.7489, "step": 179600 }, { "epoch": 1.3, "learning_rate": 7.013482040822232e-06, "loss": 2.7459, "step": 179700 }, { "epoch": 1.3, "learning_rate": 7.006321514849051e-06, "loss": 2.7523, "step": 179800 }, { "epoch": 1.3, "learning_rate": 6.999088660330686e-06, "loss": 2.7475, "step": 179900 }, { "epoch": 1.3, "learning_rate": 6.991855805812322e-06, "loss": 2.7388, "step": 180000 }, { "epoch": 1.3, "eval_accuracy": 0.460155519866438, "eval_loss": 2.768120050430298, "eval_runtime": 29.4147, "eval_samples_per_second": 220.4, "eval_steps_per_second": 2.312, "step": 180000 }, { "epoch": 1.3, "learning_rate": 6.984622951293958e-06, "loss": 2.7413, "step": 180100 }, { "epoch": 1.3, "learning_rate": 6.977390096775593e-06, "loss": 2.743, "step": 180200 }, { "epoch": 1.3, "learning_rate": 6.97015724225723e-06, "loss": 2.7562, "step": 180300 }, { "epoch": 1.3, "learning_rate": 6.9629243877388655e-06, "loss": 2.7498, "step": 180400 }, { "epoch": 1.31, "learning_rate": 6.955691533220501e-06, "loss": 2.7485, "step": 180500 }, { "epoch": 1.31, "learning_rate": 6.948458678702137e-06, "loss": 2.7551, "step": 180600 }, { "epoch": 1.31, "learning_rate": 6.9412258241837725e-06, "loss": 2.7558, "step": 180700 }, { "epoch": 1.31, "learning_rate": 6.933992969665408e-06, "loss": 2.7451, "step": 180800 }, { "epoch": 1.31, "learning_rate": 6.926760115147045e-06, "loss": 2.7498, "step": 180900 }, { "epoch": 1.31, "learning_rate": 6.91952726062868e-06, "loss": 2.7501, "step": 181000 }, { "epoch": 1.31, "eval_accuracy": 0.46024988431281816, "eval_loss": 2.767861843109131, "eval_runtime": 30.1014, "eval_samples_per_second": 215.372, "eval_steps_per_second": 2.259, "step": 181000 }, { "epoch": 1.31, "learning_rate": 6.912294406110316e-06, "loss": 2.7455, "step": 181100 }, { "epoch": 1.31, "learning_rate": 6.905061551591952e-06, "loss": 2.7505, "step": 181200 }, { "epoch": 1.31, "learning_rate": 6.897828697073587e-06, "loss": 2.7485, "step": 181300 }, { "epoch": 1.31, "learning_rate": 6.890595842555223e-06, "loss": 2.7509, "step": 181400 }, { "epoch": 1.31, "learning_rate": 6.8833629880368595e-06, "loss": 2.7526, "step": 181500 }, { "epoch": 1.31, "learning_rate": 6.876130133518495e-06, "loss": 2.7452, "step": 181600 }, { "epoch": 1.31, "learning_rate": 6.868897279000131e-06, "loss": 2.7528, "step": 181700 }, { "epoch": 1.31, "learning_rate": 6.8616644244817665e-06, "loss": 2.7542, "step": 181800 }, { "epoch": 1.32, "learning_rate": 6.854431569963402e-06, "loss": 2.7439, "step": 181900 }, { "epoch": 1.32, "learning_rate": 6.847198715445038e-06, "loss": 2.7526, "step": 182000 }, { "epoch": 1.32, "eval_accuracy": 0.4602668215211428, "eval_loss": 2.767512798309326, "eval_runtime": 29.7082, "eval_samples_per_second": 218.222, "eval_steps_per_second": 2.289, "step": 182000 }, { "epoch": 1.32, "learning_rate": 6.8399658609266735e-06, "loss": 2.7462, "step": 182100 }, { "epoch": 1.32, "learning_rate": 6.832877663498677e-06, "loss": 2.7469, "step": 182200 }, { "epoch": 1.32, "learning_rate": 6.825644808980313e-06, "loss": 2.7407, "step": 182300 }, { "epoch": 1.32, "learning_rate": 6.818411954461949e-06, "loss": 2.751, "step": 182400 }, { "epoch": 1.32, "learning_rate": 6.811179099943584e-06, "loss": 2.7415, "step": 182500 }, { "epoch": 1.32, "learning_rate": 6.80394624542522e-06, "loss": 2.7469, "step": 182600 }, { "epoch": 1.32, "learning_rate": 6.7967133909068565e-06, "loss": 2.7459, "step": 182700 }, { "epoch": 1.32, "learning_rate": 6.789480536388492e-06, "loss": 2.7417, "step": 182800 }, { "epoch": 1.32, "learning_rate": 6.782247681870128e-06, "loss": 2.7458, "step": 182900 }, { "epoch": 1.32, "learning_rate": 6.7750148273517635e-06, "loss": 2.7478, "step": 183000 }, { "epoch": 1.32, "eval_accuracy": 0.46032186744819786, "eval_loss": 2.7674057483673096, "eval_runtime": 30.0098, "eval_samples_per_second": 216.029, "eval_steps_per_second": 2.266, "step": 183000 }, { "epoch": 1.32, "learning_rate": 6.767781972833399e-06, "loss": 2.7386, "step": 183100 }, { "epoch": 1.33, "learning_rate": 6.760549118315035e-06, "loss": 2.7463, "step": 183200 }, { "epoch": 1.33, "learning_rate": 6.753316263796671e-06, "loss": 2.7496, "step": 183300 }, { "epoch": 1.33, "learning_rate": 6.74615573782349e-06, "loss": 2.7441, "step": 183400 }, { "epoch": 1.33, "learning_rate": 6.738922883305126e-06, "loss": 2.752, "step": 183500 }, { "epoch": 1.33, "learning_rate": 6.7316900287867616e-06, "loss": 2.753, "step": 183600 }, { "epoch": 1.33, "learning_rate": 6.724457174268397e-06, "loss": 2.7478, "step": 183700 }, { "epoch": 1.33, "learning_rate": 6.717224319750033e-06, "loss": 2.7465, "step": 183800 }, { "epoch": 1.33, "learning_rate": 6.7099914652316686e-06, "loss": 2.7441, "step": 183900 }, { "epoch": 1.33, "learning_rate": 6.702758610713305e-06, "loss": 2.7491, "step": 184000 }, { "epoch": 1.33, "eval_accuracy": 0.46035937126663096, "eval_loss": 2.767024040222168, "eval_runtime": 29.3058, "eval_samples_per_second": 221.219, "eval_steps_per_second": 2.32, "step": 184000 }, { "epoch": 1.33, "learning_rate": 6.695525756194941e-06, "loss": 2.7492, "step": 184100 }, { "epoch": 1.33, "learning_rate": 6.688292901676576e-06, "loss": 2.7514, "step": 184200 }, { "epoch": 1.33, "learning_rate": 6.681060047158212e-06, "loss": 2.7474, "step": 184300 }, { "epoch": 1.33, "learning_rate": 6.673827192639848e-06, "loss": 2.7496, "step": 184400 }, { "epoch": 1.33, "learning_rate": 6.666594338121483e-06, "loss": 2.7429, "step": 184500 }, { "epoch": 1.34, "learning_rate": 6.65936148360312e-06, "loss": 2.7572, "step": 184600 }, { "epoch": 1.34, "learning_rate": 6.652128629084756e-06, "loss": 2.7417, "step": 184700 }, { "epoch": 1.34, "learning_rate": 6.644895774566391e-06, "loss": 2.7459, "step": 184800 }, { "epoch": 1.34, "learning_rate": 6.637662920048027e-06, "loss": 2.7471, "step": 184900 }, { "epoch": 1.34, "learning_rate": 6.630430065529663e-06, "loss": 2.7505, "step": 185000 }, { "epoch": 1.34, "eval_accuracy": 0.46044466220855146, "eval_loss": 2.7669997215270996, "eval_runtime": 30.3971, "eval_samples_per_second": 213.277, "eval_steps_per_second": 2.237, "step": 185000 }, { "epoch": 1.34, "learning_rate": 6.623197211011298e-06, "loss": 2.7449, "step": 185100 }, { "epoch": 1.34, "learning_rate": 6.615964356492934e-06, "loss": 2.7466, "step": 185200 }, { "epoch": 1.34, "learning_rate": 6.6087315019745704e-06, "loss": 2.7491, "step": 185300 }, { "epoch": 1.34, "learning_rate": 6.601498647456206e-06, "loss": 2.753, "step": 185400 }, { "epoch": 1.34, "learning_rate": 6.594265792937842e-06, "loss": 2.7509, "step": 185500 }, { "epoch": 1.34, "learning_rate": 6.5870329384194774e-06, "loss": 2.7459, "step": 185600 }, { "epoch": 1.34, "learning_rate": 6.579800083901113e-06, "loss": 2.7465, "step": 185700 }, { "epoch": 1.34, "learning_rate": 6.572567229382749e-06, "loss": 2.7477, "step": 185800 }, { "epoch": 1.34, "learning_rate": 6.565334374864385e-06, "loss": 2.7506, "step": 185900 }, { "epoch": 1.35, "learning_rate": 6.558101520346021e-06, "loss": 2.7436, "step": 186000 }, { "epoch": 1.35, "eval_accuracy": 0.46046341411776803, "eval_loss": 2.766613245010376, "eval_runtime": 30.3071, "eval_samples_per_second": 213.91, "eval_steps_per_second": 2.244, "step": 186000 }, { "epoch": 1.35, "learning_rate": 6.550868665827657e-06, "loss": 2.7414, "step": 186100 }, { "epoch": 1.35, "learning_rate": 6.543635811309292e-06, "loss": 2.7517, "step": 186200 }, { "epoch": 1.35, "learning_rate": 6.536402956790928e-06, "loss": 2.7479, "step": 186300 }, { "epoch": 1.35, "learning_rate": 6.529170102272563e-06, "loss": 2.7473, "step": 186400 }, { "epoch": 1.35, "learning_rate": 6.5220095762993825e-06, "loss": 2.7451, "step": 186500 }, { "epoch": 1.35, "learning_rate": 6.514776721781019e-06, "loss": 2.7426, "step": 186600 }, { "epoch": 1.35, "learning_rate": 6.507543867262655e-06, "loss": 2.7463, "step": 186700 }, { "epoch": 1.35, "learning_rate": 6.50031101274429e-06, "loss": 2.7519, "step": 186800 }, { "epoch": 1.35, "learning_rate": 6.493078158225926e-06, "loss": 2.7468, "step": 186900 }, { "epoch": 1.35, "learning_rate": 6.485845303707562e-06, "loss": 2.7389, "step": 187000 }, { "epoch": 1.35, "eval_accuracy": 0.4603109792428463, "eval_loss": 2.7665233612060547, "eval_runtime": 30.7223, "eval_samples_per_second": 211.019, "eval_steps_per_second": 2.213, "step": 187000 }, { "epoch": 1.35, "learning_rate": 6.478612449189197e-06, "loss": 2.7471, "step": 187100 }, { "epoch": 1.35, "learning_rate": 6.471379594670834e-06, "loss": 2.7468, "step": 187200 }, { "epoch": 1.35, "learning_rate": 6.4641467401524695e-06, "loss": 2.7295, "step": 187300 }, { "epoch": 1.36, "learning_rate": 6.456913885634105e-06, "loss": 2.7394, "step": 187400 }, { "epoch": 1.36, "learning_rate": 6.449681031115741e-06, "loss": 2.7495, "step": 187500 }, { "epoch": 1.36, "learning_rate": 6.44252050514256e-06, "loss": 2.7429, "step": 187600 }, { "epoch": 1.36, "learning_rate": 6.4352876506241954e-06, "loss": 2.7461, "step": 187700 }, { "epoch": 1.36, "learning_rate": 6.428054796105831e-06, "loss": 2.7471, "step": 187800 }, { "epoch": 1.36, "learning_rate": 6.420894270132652e-06, "loss": 2.75, "step": 187900 }, { "epoch": 1.36, "learning_rate": 6.413661415614287e-06, "loss": 2.7564, "step": 188000 }, { "epoch": 1.36, "eval_accuracy": 0.4603660251699014, "eval_loss": 2.7662112712860107, "eval_runtime": 28.077, "eval_samples_per_second": 230.901, "eval_steps_per_second": 2.422, "step": 188000 }, { "epoch": 1.36, "learning_rate": 6.406428561095923e-06, "loss": 2.7547, "step": 188100 }, { "epoch": 1.36, "learning_rate": 6.399195706577559e-06, "loss": 2.7463, "step": 188200 }, { "epoch": 1.36, "learning_rate": 6.391962852059194e-06, "loss": 2.7426, "step": 188300 }, { "epoch": 1.36, "learning_rate": 6.384729997540829e-06, "loss": 2.7424, "step": 188400 }, { "epoch": 1.36, "learning_rate": 6.377497143022465e-06, "loss": 2.7434, "step": 188500 }, { "epoch": 1.36, "learning_rate": 6.370264288504101e-06, "loss": 2.7483, "step": 188600 }, { "epoch": 1.36, "learning_rate": 6.363031433985737e-06, "loss": 2.752, "step": 188700 }, { "epoch": 1.37, "learning_rate": 6.355798579467373e-06, "loss": 2.7474, "step": 188800 }, { "epoch": 1.37, "learning_rate": 6.348565724949008e-06, "loss": 2.7408, "step": 188900 }, { "epoch": 1.37, "learning_rate": 6.341332870430644e-06, "loss": 2.7464, "step": 189000 }, { "epoch": 1.37, "eval_accuracy": 0.4603968750850641, "eval_loss": 2.76613712310791, "eval_runtime": 28.0132, "eval_samples_per_second": 231.427, "eval_steps_per_second": 2.427, "step": 189000 }, { "epoch": 1.37, "learning_rate": 6.33410001591228e-06, "loss": 2.7481, "step": 189100 }, { "epoch": 1.37, "learning_rate": 6.3269394899391e-06, "loss": 2.7505, "step": 189200 }, { "epoch": 1.37, "learning_rate": 6.319706635420736e-06, "loss": 2.7506, "step": 189300 }, { "epoch": 1.37, "learning_rate": 6.312473780902372e-06, "loss": 2.7421, "step": 189400 }, { "epoch": 1.37, "learning_rate": 6.305240926384007e-06, "loss": 2.7442, "step": 189500 }, { "epoch": 1.37, "learning_rate": 6.298008071865643e-06, "loss": 2.739, "step": 189600 }, { "epoch": 1.37, "learning_rate": 6.2907752173472795e-06, "loss": 2.7498, "step": 189700 }, { "epoch": 1.37, "learning_rate": 6.283542362828915e-06, "loss": 2.7451, "step": 189800 }, { "epoch": 1.37, "learning_rate": 6.276309508310551e-06, "loss": 2.7447, "step": 189900 }, { "epoch": 1.37, "learning_rate": 6.2690766537921865e-06, "loss": 2.7459, "step": 190000 }, { "epoch": 1.37, "eval_accuracy": 0.4604603896162815, "eval_loss": 2.7658865451812744, "eval_runtime": 28.2461, "eval_samples_per_second": 229.518, "eval_steps_per_second": 2.407, "step": 190000 }, { "epoch": 1.37, "learning_rate": 6.261843799273822e-06, "loss": 2.7398, "step": 190100 }, { "epoch": 1.38, "learning_rate": 6.254610944755458e-06, "loss": 2.7458, "step": 190200 }, { "epoch": 1.38, "learning_rate": 6.247378090237094e-06, "loss": 2.7484, "step": 190300 }, { "epoch": 1.38, "learning_rate": 6.24014523571873e-06, "loss": 2.7425, "step": 190400 }, { "epoch": 1.38, "learning_rate": 6.232984709745549e-06, "loss": 2.7482, "step": 190500 }, { "epoch": 1.38, "learning_rate": 6.2257518552271845e-06, "loss": 2.7479, "step": 190600 }, { "epoch": 1.38, "learning_rate": 6.21851900070882e-06, "loss": 2.7382, "step": 190700 }, { "epoch": 1.38, "learning_rate": 6.211286146190456e-06, "loss": 2.7414, "step": 190800 }, { "epoch": 1.38, "learning_rate": 6.2040532916720915e-06, "loss": 2.7534, "step": 190900 }, { "epoch": 1.38, "learning_rate": 6.196820437153728e-06, "loss": 2.7481, "step": 191000 }, { "epoch": 1.38, "eval_accuracy": 0.4605136208424446, "eval_loss": 2.765713691711426, "eval_runtime": 28.3745, "eval_samples_per_second": 228.48, "eval_steps_per_second": 2.397, "step": 191000 }, { "epoch": 1.38, "learning_rate": 6.189587582635364e-06, "loss": 2.7396, "step": 191100 }, { "epoch": 1.38, "learning_rate": 6.182354728116999e-06, "loss": 2.7452, "step": 191200 }, { "epoch": 1.38, "learning_rate": 6.175121873598635e-06, "loss": 2.7406, "step": 191300 }, { "epoch": 1.38, "learning_rate": 6.167889019080271e-06, "loss": 2.7457, "step": 191400 }, { "epoch": 1.39, "learning_rate": 6.160656164561906e-06, "loss": 2.758, "step": 191500 }, { "epoch": 1.39, "learning_rate": 6.153423310043543e-06, "loss": 2.7454, "step": 191600 }, { "epoch": 1.39, "learning_rate": 6.1461904555251785e-06, "loss": 2.7408, "step": 191700 }, { "epoch": 1.39, "learning_rate": 6.138957601006814e-06, "loss": 2.7432, "step": 191800 }, { "epoch": 1.39, "learning_rate": 6.131797075033633e-06, "loss": 2.7435, "step": 191900 }, { "epoch": 1.39, "learning_rate": 6.124564220515269e-06, "loss": 2.7458, "step": 192000 }, { "epoch": 1.39, "eval_accuracy": 0.46044345240795687, "eval_loss": 2.765500783920288, "eval_runtime": 32.8692, "eval_samples_per_second": 197.236, "eval_steps_per_second": 2.069, "step": 192000 }, { "epoch": 1.39, "learning_rate": 6.1173313659969044e-06, "loss": 2.7541, "step": 192100 }, { "epoch": 1.39, "learning_rate": 6.11009851147854e-06, "loss": 2.743, "step": 192200 }, { "epoch": 1.39, "learning_rate": 6.102865656960177e-06, "loss": 2.752, "step": 192300 }, { "epoch": 1.39, "learning_rate": 6.095632802441812e-06, "loss": 2.7452, "step": 192400 }, { "epoch": 1.39, "learning_rate": 6.088399947923448e-06, "loss": 2.7477, "step": 192500 }, { "epoch": 1.39, "learning_rate": 6.081167093405084e-06, "loss": 2.7538, "step": 192600 }, { "epoch": 1.39, "learning_rate": 6.073934238886719e-06, "loss": 2.7416, "step": 192700 }, { "epoch": 1.39, "learning_rate": 6.066701384368355e-06, "loss": 2.7492, "step": 192800 }, { "epoch": 1.4, "learning_rate": 6.0594685298499915e-06, "loss": 2.7435, "step": 192900 }, { "epoch": 1.4, "learning_rate": 6.052235675331627e-06, "loss": 2.7427, "step": 193000 }, { "epoch": 1.4, "eval_accuracy": 0.4605184600448231, "eval_loss": 2.7652785778045654, "eval_runtime": 31.9488, "eval_samples_per_second": 202.919, "eval_steps_per_second": 2.128, "step": 193000 }, { "epoch": 1.4, "learning_rate": 6.045002820813263e-06, "loss": 2.7376, "step": 193100 }, { "epoch": 1.4, "learning_rate": 6.0377699662948985e-06, "loss": 2.7449, "step": 193200 }, { "epoch": 1.4, "learning_rate": 6.030537111776534e-06, "loss": 2.7361, "step": 193300 }, { "epoch": 1.4, "learning_rate": 6.02330425725817e-06, "loss": 2.747, "step": 193400 }, { "epoch": 1.4, "learning_rate": 6.016071402739806e-06, "loss": 2.748, "step": 193500 }, { "epoch": 1.4, "learning_rate": 6.008838548221442e-06, "loss": 2.7475, "step": 193600 }, { "epoch": 1.4, "learning_rate": 6.001605693703078e-06, "loss": 2.7504, "step": 193700 }, { "epoch": 1.4, "learning_rate": 5.994372839184713e-06, "loss": 2.7381, "step": 193800 }, { "epoch": 1.4, "learning_rate": 5.987212313211532e-06, "loss": 2.7453, "step": 193900 }, { "epoch": 1.4, "learning_rate": 5.980051787238352e-06, "loss": 2.741, "step": 194000 }, { "epoch": 1.4, "eval_accuracy": 0.4605601981653374, "eval_loss": 2.7650601863861084, "eval_runtime": 29.1266, "eval_samples_per_second": 222.58, "eval_steps_per_second": 2.335, "step": 194000 }, { "epoch": 1.4, "learning_rate": 5.9728189327199885e-06, "loss": 2.7422, "step": 194100 }, { "epoch": 1.4, "learning_rate": 5.965586078201624e-06, "loss": 2.7339, "step": 194200 }, { "epoch": 1.41, "learning_rate": 5.95835322368326e-06, "loss": 2.746, "step": 194300 }, { "epoch": 1.41, "learning_rate": 5.9511203691648955e-06, "loss": 2.7438, "step": 194400 }, { "epoch": 1.41, "learning_rate": 5.943887514646531e-06, "loss": 2.749, "step": 194500 }, { "epoch": 1.41, "learning_rate": 5.936654660128167e-06, "loss": 2.7492, "step": 194600 }, { "epoch": 1.41, "learning_rate": 5.929494134154986e-06, "loss": 2.7466, "step": 194700 }, { "epoch": 1.41, "learning_rate": 5.922261279636622e-06, "loss": 2.7385, "step": 194800 }, { "epoch": 1.41, "learning_rate": 5.915028425118258e-06, "loss": 2.7399, "step": 194900 }, { "epoch": 1.41, "learning_rate": 5.9077955705998935e-06, "loss": 2.7488, "step": 195000 }, { "epoch": 1.41, "eval_accuracy": 0.4605995166846624, "eval_loss": 2.7648675441741943, "eval_runtime": 29.4441, "eval_samples_per_second": 220.18, "eval_steps_per_second": 2.309, "step": 195000 }, { "epoch": 1.41, "learning_rate": 5.900562716081529e-06, "loss": 2.7498, "step": 195100 }, { "epoch": 1.41, "learning_rate": 5.893329861563165e-06, "loss": 2.746, "step": 195200 }, { "epoch": 1.41, "learning_rate": 5.8860970070448005e-06, "loss": 2.7481, "step": 195300 }, { "epoch": 1.41, "learning_rate": 5.878864152526437e-06, "loss": 2.7438, "step": 195400 }, { "epoch": 1.41, "learning_rate": 5.871631298008073e-06, "loss": 2.7449, "step": 195500 }, { "epoch": 1.41, "learning_rate": 5.864398443489708e-06, "loss": 2.7426, "step": 195600 }, { "epoch": 1.42, "learning_rate": 5.857165588971344e-06, "loss": 2.744, "step": 195700 }, { "epoch": 1.42, "learning_rate": 5.84993273445298e-06, "loss": 2.7455, "step": 195800 }, { "epoch": 1.42, "learning_rate": 5.842699879934615e-06, "loss": 2.7543, "step": 195900 }, { "epoch": 1.42, "learning_rate": 5.835467025416252e-06, "loss": 2.7353, "step": 196000 }, { "epoch": 1.42, "eval_accuracy": 0.4605263237486881, "eval_loss": 2.7647223472595215, "eval_runtime": 32.6984, "eval_samples_per_second": 198.266, "eval_steps_per_second": 2.08, "step": 196000 }, { "epoch": 1.42, "learning_rate": 5.8282341708978876e-06, "loss": 2.7424, "step": 196100 }, { "epoch": 1.42, "learning_rate": 5.821001316379523e-06, "loss": 2.7506, "step": 196200 }, { "epoch": 1.42, "learning_rate": 5.813768461861159e-06, "loss": 2.7501, "step": 196300 }, { "epoch": 1.42, "learning_rate": 5.8065356073427946e-06, "loss": 2.7455, "step": 196400 }, { "epoch": 1.42, "learning_rate": 5.79930275282443e-06, "loss": 2.7528, "step": 196500 }, { "epoch": 1.42, "learning_rate": 5.792069898306067e-06, "loss": 2.7473, "step": 196600 }, { "epoch": 1.42, "learning_rate": 5.784909372332886e-06, "loss": 2.7343, "step": 196700 }, { "epoch": 1.42, "learning_rate": 5.777676517814521e-06, "loss": 2.7428, "step": 196800 }, { "epoch": 1.42, "learning_rate": 5.770443663296157e-06, "loss": 2.7387, "step": 196900 }, { "epoch": 1.42, "learning_rate": 5.763210808777793e-06, "loss": 2.7503, "step": 197000 }, { "epoch": 1.42, "eval_accuracy": 0.46069085662955606, "eval_loss": 2.7644920349121094, "eval_runtime": 29.638, "eval_samples_per_second": 218.74, "eval_steps_per_second": 2.294, "step": 197000 }, { "epoch": 1.43, "learning_rate": 5.755977954259428e-06, "loss": 2.7531, "step": 197100 }, { "epoch": 1.43, "learning_rate": 5.748745099741064e-06, "loss": 2.7383, "step": 197200 }, { "epoch": 1.43, "learning_rate": 5.7415122452227005e-06, "loss": 2.7487, "step": 197300 }, { "epoch": 1.43, "learning_rate": 5.734279390704336e-06, "loss": 2.7425, "step": 197400 }, { "epoch": 1.43, "learning_rate": 5.727046536185972e-06, "loss": 2.7485, "step": 197500 }, { "epoch": 1.43, "learning_rate": 5.7198136816676075e-06, "loss": 2.7438, "step": 197600 }, { "epoch": 1.43, "learning_rate": 5.712653155694426e-06, "loss": 2.7458, "step": 197700 }, { "epoch": 1.43, "learning_rate": 5.705420301176062e-06, "loss": 2.7499, "step": 197800 }, { "epoch": 1.43, "learning_rate": 5.698187446657698e-06, "loss": 2.7384, "step": 197900 }, { "epoch": 1.43, "learning_rate": 5.690954592139334e-06, "loss": 2.7446, "step": 198000 }, { "epoch": 1.43, "eval_accuracy": 0.46069690563252913, "eval_loss": 2.7643613815307617, "eval_runtime": 29.4958, "eval_samples_per_second": 219.794, "eval_steps_per_second": 2.305, "step": 198000 }, { "epoch": 1.43, "learning_rate": 5.68372173762097e-06, "loss": 2.7482, "step": 198100 }, { "epoch": 1.43, "learning_rate": 5.6764888831026055e-06, "loss": 2.745, "step": 198200 }, { "epoch": 1.43, "learning_rate": 5.669256028584241e-06, "loss": 2.743, "step": 198300 }, { "epoch": 1.43, "learning_rate": 5.662023174065877e-06, "loss": 2.7392, "step": 198400 }, { "epoch": 1.44, "learning_rate": 5.6547903195475125e-06, "loss": 2.7378, "step": 198500 }, { "epoch": 1.44, "learning_rate": 5.647557465029149e-06, "loss": 2.747, "step": 198600 }, { "epoch": 1.44, "learning_rate": 5.640324610510785e-06, "loss": 2.7432, "step": 198700 }, { "epoch": 1.44, "learning_rate": 5.6331640845376045e-06, "loss": 2.7402, "step": 198800 }, { "epoch": 1.44, "learning_rate": 5.62593123001924e-06, "loss": 2.7439, "step": 198900 }, { "epoch": 1.44, "learning_rate": 5.618698375500876e-06, "loss": 2.748, "step": 199000 }, { "epoch": 1.44, "eval_accuracy": 0.460747717257503, "eval_loss": 2.764165163040161, "eval_runtime": 31.2501, "eval_samples_per_second": 207.455, "eval_steps_per_second": 2.176, "step": 199000 }, { "epoch": 1.44, "learning_rate": 5.611465520982512e-06, "loss": 2.7423, "step": 199100 }, { "epoch": 1.44, "learning_rate": 5.604232666464146e-06, "loss": 2.7383, "step": 199200 }, { "epoch": 1.44, "learning_rate": 5.596999811945783e-06, "loss": 2.748, "step": 199300 }, { "epoch": 1.44, "learning_rate": 5.5897669574274185e-06, "loss": 2.7418, "step": 199400 }, { "epoch": 1.44, "learning_rate": 5.582534102909054e-06, "loss": 2.7437, "step": 199500 }, { "epoch": 1.44, "learning_rate": 5.57530124839069e-06, "loss": 2.7444, "step": 199600 }, { "epoch": 1.44, "learning_rate": 5.5680683938723255e-06, "loss": 2.7405, "step": 199700 }, { "epoch": 1.45, "learning_rate": 5.560835539353961e-06, "loss": 2.7555, "step": 199800 }, { "epoch": 1.45, "learning_rate": 5.553602684835598e-06, "loss": 2.7455, "step": 199900 }, { "epoch": 1.45, "learning_rate": 5.546369830317233e-06, "loss": 2.7394, "step": 200000 }, { "epoch": 1.45, "eval_accuracy": 0.46070537423669145, "eval_loss": 2.7640960216522217, "eval_runtime": 29.999, "eval_samples_per_second": 216.107, "eval_steps_per_second": 2.267, "step": 200000 }, { "epoch": 1.45, "learning_rate": 5.539136975798869e-06, "loss": 2.7442, "step": 200100 }, { "epoch": 1.45, "learning_rate": 5.531904121280505e-06, "loss": 2.746, "step": 200200 }, { "epoch": 1.45, "learning_rate": 5.524743595307324e-06, "loss": 2.7457, "step": 200300 }, { "epoch": 1.45, "learning_rate": 5.517510740788961e-06, "loss": 2.738, "step": 200400 }, { "epoch": 1.45, "learning_rate": 5.5102778862705966e-06, "loss": 2.7424, "step": 200500 }, { "epoch": 1.45, "learning_rate": 5.503045031752232e-06, "loss": 2.7345, "step": 200600 }, { "epoch": 1.45, "learning_rate": 5.495812177233868e-06, "loss": 2.7399, "step": 200700 }, { "epoch": 1.45, "learning_rate": 5.4885793227155036e-06, "loss": 2.7494, "step": 200800 }, { "epoch": 1.45, "learning_rate": 5.481346468197139e-06, "loss": 2.7468, "step": 200900 }, { "epoch": 1.45, "learning_rate": 5.474113613678776e-06, "loss": 2.7403, "step": 201000 }, { "epoch": 1.45, "eval_accuracy": 0.46072291634531337, "eval_loss": 2.7638235092163086, "eval_runtime": 29.4493, "eval_samples_per_second": 220.141, "eval_steps_per_second": 2.309, "step": 201000 }, { "epoch": 1.45, "learning_rate": 5.466880759160411e-06, "loss": 2.7505, "step": 201100 }, { "epoch": 1.46, "learning_rate": 5.459647904642047e-06, "loss": 2.7523, "step": 201200 }, { "epoch": 1.46, "learning_rate": 5.452415050123683e-06, "loss": 2.7507, "step": 201300 }, { "epoch": 1.46, "learning_rate": 5.445182195605318e-06, "loss": 2.7374, "step": 201400 }, { "epoch": 1.46, "learning_rate": 5.437949341086953e-06, "loss": 2.7449, "step": 201500 }, { "epoch": 1.46, "learning_rate": 5.430788815113773e-06, "loss": 2.739, "step": 201600 }, { "epoch": 1.46, "learning_rate": 5.4235559605954095e-06, "loss": 2.7527, "step": 201700 }, { "epoch": 1.46, "learning_rate": 5.416323106077045e-06, "loss": 2.7362, "step": 201800 }, { "epoch": 1.46, "learning_rate": 5.409090251558681e-06, "loss": 2.7489, "step": 201900 }, { "epoch": 1.46, "learning_rate": 5.4018573970403165e-06, "loss": 2.7467, "step": 202000 }, { "epoch": 1.46, "eval_accuracy": 0.46072291634531337, "eval_loss": 2.763704299926758, "eval_runtime": 30.6951, "eval_samples_per_second": 211.206, "eval_steps_per_second": 2.215, "step": 202000 }, { "epoch": 1.46, "learning_rate": 5.394624542521952e-06, "loss": 2.7419, "step": 202100 }, { "epoch": 1.46, "learning_rate": 5.387391688003588e-06, "loss": 2.7384, "step": 202200 }, { "epoch": 1.46, "learning_rate": 5.380158833485224e-06, "loss": 2.7425, "step": 202300 }, { "epoch": 1.46, "learning_rate": 5.37292597896686e-06, "loss": 2.7346, "step": 202400 }, { "epoch": 1.46, "learning_rate": 5.365693124448496e-06, "loss": 2.7527, "step": 202500 }, { "epoch": 1.47, "learning_rate": 5.358460269930131e-06, "loss": 2.7396, "step": 202600 }, { "epoch": 1.47, "learning_rate": 5.351227415411767e-06, "loss": 2.7407, "step": 202700 }, { "epoch": 1.47, "learning_rate": 5.343994560893403e-06, "loss": 2.7501, "step": 202800 }, { "epoch": 1.47, "learning_rate": 5.3368340349202216e-06, "loss": 2.7392, "step": 202900 }, { "epoch": 1.47, "learning_rate": 5.329601180401858e-06, "loss": 2.7532, "step": 203000 }, { "epoch": 1.47, "eval_accuracy": 0.4608082072872339, "eval_loss": 2.7634613513946533, "eval_runtime": 27.9391, "eval_samples_per_second": 232.04, "eval_steps_per_second": 2.434, "step": 203000 }, { "epoch": 1.47, "learning_rate": 5.322368325883494e-06, "loss": 2.738, "step": 203100 }, { "epoch": 1.47, "learning_rate": 5.315135471365129e-06, "loss": 2.7442, "step": 203200 }, { "epoch": 1.47, "learning_rate": 5.307902616846765e-06, "loss": 2.7478, "step": 203300 }, { "epoch": 1.47, "learning_rate": 5.300669762328401e-06, "loss": 2.7452, "step": 203400 }, { "epoch": 1.47, "learning_rate": 5.293436907810036e-06, "loss": 2.7407, "step": 203500 }, { "epoch": 1.47, "learning_rate": 5.286204053291673e-06, "loss": 2.7475, "step": 203600 }, { "epoch": 1.47, "learning_rate": 5.278971198773309e-06, "loss": 2.7537, "step": 203700 }, { "epoch": 1.47, "learning_rate": 5.271738344254944e-06, "loss": 2.7459, "step": 203800 }, { "epoch": 1.47, "learning_rate": 5.26450548973658e-06, "loss": 2.7525, "step": 203900 }, { "epoch": 1.48, "learning_rate": 5.257344963763399e-06, "loss": 2.7431, "step": 204000 }, { "epoch": 1.48, "eval_accuracy": 0.4608662777157755, "eval_loss": 2.763364553451538, "eval_runtime": 32.4224, "eval_samples_per_second": 199.954, "eval_steps_per_second": 2.097, "step": 204000 }, { "epoch": 1.48, "learning_rate": 5.2501121092450345e-06, "loss": 2.7486, "step": 204100 }, { "epoch": 1.48, "learning_rate": 5.24287925472667e-06, "loss": 2.7437, "step": 204200 }, { "epoch": 1.48, "learning_rate": 5.235646400208307e-06, "loss": 2.7369, "step": 204300 }, { "epoch": 1.48, "learning_rate": 5.228413545689942e-06, "loss": 2.7487, "step": 204400 }, { "epoch": 1.48, "learning_rate": 5.221180691171578e-06, "loss": 2.7439, "step": 204500 }, { "epoch": 1.48, "learning_rate": 5.213947836653214e-06, "loss": 2.7435, "step": 204600 }, { "epoch": 1.48, "learning_rate": 5.206714982134849e-06, "loss": 2.7452, "step": 204700 }, { "epoch": 1.48, "learning_rate": 5.199482127616485e-06, "loss": 2.7409, "step": 204800 }, { "epoch": 1.48, "learning_rate": 5.1922492730981215e-06, "loss": 2.7448, "step": 204900 }, { "epoch": 1.48, "learning_rate": 5.185016418579757e-06, "loss": 2.7433, "step": 205000 }, { "epoch": 1.48, "eval_accuracy": 0.4608082072872339, "eval_loss": 2.7632086277008057, "eval_runtime": 29.7969, "eval_samples_per_second": 217.573, "eval_steps_per_second": 2.282, "step": 205000 }, { "epoch": 1.48, "learning_rate": 5.177855892606577e-06, "loss": 2.7424, "step": 205100 }, { "epoch": 1.48, "learning_rate": 5.1706230380882126e-06, "loss": 2.7355, "step": 205200 }, { "epoch": 1.48, "learning_rate": 5.163390183569848e-06, "loss": 2.7338, "step": 205300 }, { "epoch": 1.49, "learning_rate": 5.156157329051485e-06, "loss": 2.7346, "step": 205400 }, { "epoch": 1.49, "learning_rate": 5.14892447453312e-06, "loss": 2.7495, "step": 205500 }, { "epoch": 1.49, "learning_rate": 5.141691620014756e-06, "loss": 2.7364, "step": 205600 }, { "epoch": 1.49, "learning_rate": 5.134458765496392e-06, "loss": 2.7424, "step": 205700 }, { "epoch": 1.49, "learning_rate": 5.1272259109780266e-06, "loss": 2.74, "step": 205800 }, { "epoch": 1.49, "learning_rate": 5.119993056459662e-06, "loss": 2.748, "step": 205900 }, { "epoch": 1.49, "learning_rate": 5.112760201941298e-06, "loss": 2.7436, "step": 206000 }, { "epoch": 1.49, "eval_accuracy": 0.4609152746398575, "eval_loss": 2.762951135635376, "eval_runtime": 29.8903, "eval_samples_per_second": 216.893, "eval_steps_per_second": 2.275, "step": 206000 }, { "epoch": 1.49, "learning_rate": 5.1055273474229336e-06, "loss": 2.7419, "step": 206100 }, { "epoch": 1.49, "learning_rate": 5.09829449290457e-06, "loss": 2.7369, "step": 206200 }, { "epoch": 1.49, "learning_rate": 5.091061638386206e-06, "loss": 2.7427, "step": 206300 }, { "epoch": 1.49, "learning_rate": 5.0839011124130255e-06, "loss": 2.747, "step": 206400 }, { "epoch": 1.49, "learning_rate": 5.076668257894661e-06, "loss": 2.7469, "step": 206500 }, { "epoch": 1.49, "learning_rate": 5.069435403376297e-06, "loss": 2.7459, "step": 206600 }, { "epoch": 1.5, "learning_rate": 5.062202548857933e-06, "loss": 2.7446, "step": 206700 }, { "epoch": 1.5, "learning_rate": 5.054969694339569e-06, "loss": 2.7419, "step": 206800 }, { "epoch": 1.5, "learning_rate": 5.047736839821205e-06, "loss": 2.749, "step": 206900 }, { "epoch": 1.5, "learning_rate": 5.04050398530284e-06, "loss": 2.747, "step": 207000 }, { "epoch": 1.5, "eval_accuracy": 0.46086809241666743, "eval_loss": 2.7627713680267334, "eval_runtime": 29.635, "eval_samples_per_second": 218.761, "eval_steps_per_second": 2.295, "step": 207000 }, { "epoch": 1.5, "learning_rate": 5.033271130784476e-06, "loss": 2.7381, "step": 207100 }, { "epoch": 1.5, "learning_rate": 5.026038276266112e-06, "loss": 2.7322, "step": 207200 }, { "epoch": 1.5, "learning_rate": 5.018805421747748e-06, "loss": 2.7446, "step": 207300 }, { "epoch": 1.5, "learning_rate": 5.011572567229384e-06, "loss": 2.7473, "step": 207400 }, { "epoch": 1.5, "learning_rate": 5.0043397127110195e-06, "loss": 2.7449, "step": 207500 }, { "epoch": 1.5, "learning_rate": 4.997106858192654e-06, "loss": 2.7365, "step": 207600 }, { "epoch": 1.5, "learning_rate": 4.98987400367429e-06, "loss": 2.7431, "step": 207700 }, { "epoch": 1.5, "learning_rate": 4.98271347770111e-06, "loss": 2.7405, "step": 207800 }, { "epoch": 1.5, "learning_rate": 4.975480623182745e-06, "loss": 2.7495, "step": 207900 }, { "epoch": 1.5, "learning_rate": 4.968247768664382e-06, "loss": 2.7395, "step": 208000 }, { "epoch": 1.5, "eval_accuracy": 0.4608783757217217, "eval_loss": 2.7625892162323, "eval_runtime": 28.0543, "eval_samples_per_second": 231.088, "eval_steps_per_second": 2.424, "step": 208000 }, { "epoch": 1.51, "learning_rate": 4.961014914146018e-06, "loss": 2.746, "step": 208100 }, { "epoch": 1.51, "learning_rate": 4.953782059627653e-06, "loss": 2.7362, "step": 208200 }, { "epoch": 1.51, "learning_rate": 4.946549205109289e-06, "loss": 2.7453, "step": 208300 }, { "epoch": 1.51, "learning_rate": 4.939316350590925e-06, "loss": 2.7409, "step": 208400 }, { "epoch": 1.51, "learning_rate": 4.93208349607256e-06, "loss": 2.7459, "step": 208500 }, { "epoch": 1.51, "learning_rate": 4.924850641554196e-06, "loss": 2.7428, "step": 208600 }, { "epoch": 1.51, "learning_rate": 4.9176177870358324e-06, "loss": 2.7507, "step": 208700 }, { "epoch": 1.51, "learning_rate": 4.910384932517468e-06, "loss": 2.7423, "step": 208800 }, { "epoch": 1.51, "learning_rate": 4.903152077999104e-06, "loss": 2.7494, "step": 208900 }, { "epoch": 1.51, "learning_rate": 4.8959192234807394e-06, "loss": 2.7443, "step": 209000 }, { "epoch": 1.51, "eval_accuracy": 0.4609176942410467, "eval_loss": 2.7624387741088867, "eval_runtime": 30.0098, "eval_samples_per_second": 216.029, "eval_steps_per_second": 2.266, "step": 209000 }, { "epoch": 1.51, "learning_rate": 4.888686368962375e-06, "loss": 2.7409, "step": 209100 }, { "epoch": 1.51, "learning_rate": 4.881525842989194e-06, "loss": 2.7504, "step": 209200 }, { "epoch": 1.51, "learning_rate": 4.8742929884708305e-06, "loss": 2.7428, "step": 209300 }, { "epoch": 1.51, "learning_rate": 4.867060133952466e-06, "loss": 2.7458, "step": 209400 }, { "epoch": 1.52, "learning_rate": 4.859827279434102e-06, "loss": 2.7465, "step": 209500 }, { "epoch": 1.52, "learning_rate": 4.8525944249157375e-06, "loss": 2.7435, "step": 209600 }, { "epoch": 1.52, "learning_rate": 4.845361570397373e-06, "loss": 2.7431, "step": 209700 }, { "epoch": 1.52, "learning_rate": 4.838128715879009e-06, "loss": 2.7376, "step": 209800 }, { "epoch": 1.52, "learning_rate": 4.8308958613606445e-06, "loss": 2.7403, "step": 209900 }, { "epoch": 1.52, "learning_rate": 4.823663006842281e-06, "loss": 2.7395, "step": 210000 }, { "epoch": 1.52, "eval_accuracy": 0.46079066517861195, "eval_loss": 2.762295961380005, "eval_runtime": 30.222, "eval_samples_per_second": 214.513, "eval_steps_per_second": 2.25, "step": 210000 }, { "epoch": 1.52, "learning_rate": 4.816430152323917e-06, "loss": 2.7402, "step": 210100 }, { "epoch": 1.52, "learning_rate": 4.809197297805552e-06, "loss": 2.7445, "step": 210200 }, { "epoch": 1.52, "learning_rate": 4.802036771832372e-06, "loss": 2.7432, "step": 210300 }, { "epoch": 1.52, "learning_rate": 4.794803917314008e-06, "loss": 2.7407, "step": 210400 }, { "epoch": 1.52, "learning_rate": 4.7875710627956434e-06, "loss": 2.7449, "step": 210500 }, { "epoch": 1.52, "learning_rate": 4.780338208277279e-06, "loss": 2.7481, "step": 210600 }, { "epoch": 1.52, "learning_rate": 4.773105353758915e-06, "loss": 2.7456, "step": 210700 }, { "epoch": 1.52, "learning_rate": 4.7658724992405504e-06, "loss": 2.7489, "step": 210800 }, { "epoch": 1.53, "learning_rate": 4.75871197326737e-06, "loss": 2.7345, "step": 210900 }, { "epoch": 1.53, "learning_rate": 4.751479118749006e-06, "loss": 2.7353, "step": 211000 }, { "epoch": 1.53, "eval_accuracy": 0.4608324032991262, "eval_loss": 2.7621333599090576, "eval_runtime": 31.0021, "eval_samples_per_second": 209.115, "eval_steps_per_second": 2.193, "step": 211000 }, { "epoch": 1.53, "learning_rate": 4.7442462642306415e-06, "loss": 2.7442, "step": 211100 }, { "epoch": 1.53, "learning_rate": 4.737013409712277e-06, "loss": 2.7349, "step": 211200 }, { "epoch": 1.53, "learning_rate": 4.729780555193913e-06, "loss": 2.7421, "step": 211300 }, { "epoch": 1.53, "learning_rate": 4.7225477006755485e-06, "loss": 2.7441, "step": 211400 }, { "epoch": 1.53, "learning_rate": 4.715314846157185e-06, "loss": 2.74, "step": 211500 }, { "epoch": 1.53, "learning_rate": 4.708081991638821e-06, "loss": 2.7479, "step": 211600 }, { "epoch": 1.53, "learning_rate": 4.700849137120456e-06, "loss": 2.7426, "step": 211700 }, { "epoch": 1.53, "learning_rate": 4.693616282602092e-06, "loss": 2.7466, "step": 211800 }, { "epoch": 1.53, "learning_rate": 4.686383428083728e-06, "loss": 2.7438, "step": 211900 }, { "epoch": 1.53, "learning_rate": 4.679150573565363e-06, "loss": 2.7401, "step": 212000 }, { "epoch": 1.53, "eval_accuracy": 0.46098302347315606, "eval_loss": 2.7617835998535156, "eval_runtime": 29.9041, "eval_samples_per_second": 216.793, "eval_steps_per_second": 2.274, "step": 212000 }, { "epoch": 1.53, "learning_rate": 4.671917719046999e-06, "loss": 2.7432, "step": 212100 }, { "epoch": 1.53, "learning_rate": 4.6646848645286355e-06, "loss": 2.7462, "step": 212200 }, { "epoch": 1.54, "learning_rate": 4.657452010010271e-06, "loss": 2.7348, "step": 212300 }, { "epoch": 1.54, "learning_rate": 4.650219155491907e-06, "loss": 2.7424, "step": 212400 }, { "epoch": 1.54, "learning_rate": 4.6429863009735425e-06, "loss": 2.7427, "step": 212500 }, { "epoch": 1.54, "learning_rate": 4.635753446455178e-06, "loss": 2.75, "step": 212600 }, { "epoch": 1.54, "learning_rate": 4.628520591936814e-06, "loss": 2.7498, "step": 212700 }, { "epoch": 1.54, "learning_rate": 4.62128773741845e-06, "loss": 2.7432, "step": 212800 }, { "epoch": 1.54, "learning_rate": 4.614054882900086e-06, "loss": 2.7424, "step": 212900 }, { "epoch": 1.54, "learning_rate": 4.606894356926905e-06, "loss": 2.7371, "step": 213000 }, { "epoch": 1.54, "eval_accuracy": 0.4609551980594798, "eval_loss": 2.761749744415283, "eval_runtime": 30.3654, "eval_samples_per_second": 213.5, "eval_steps_per_second": 2.239, "step": 213000 }, { "epoch": 1.54, "learning_rate": 4.5996615024085414e-06, "loss": 2.7503, "step": 213100 }, { "epoch": 1.54, "learning_rate": 4.592428647890177e-06, "loss": 2.7367, "step": 213200 }, { "epoch": 1.54, "learning_rate": 4.585195793371813e-06, "loss": 2.7346, "step": 213300 }, { "epoch": 1.54, "learning_rate": 4.577962938853448e-06, "loss": 2.7493, "step": 213400 }, { "epoch": 1.54, "learning_rate": 4.570730084335084e-06, "loss": 2.748, "step": 213500 }, { "epoch": 1.54, "learning_rate": 4.563569558361903e-06, "loss": 2.7412, "step": 213600 }, { "epoch": 1.55, "learning_rate": 4.5563367038435395e-06, "loss": 2.7452, "step": 213700 }, { "epoch": 1.55, "learning_rate": 4.549103849325175e-06, "loss": 2.7362, "step": 213800 }, { "epoch": 1.55, "learning_rate": 4.541870994806811e-06, "loss": 2.7471, "step": 213900 }, { "epoch": 1.55, "learning_rate": 4.5346381402884465e-06, "loss": 2.7458, "step": 214000 }, { "epoch": 1.55, "eval_accuracy": 0.46101931749099456, "eval_loss": 2.7615652084350586, "eval_runtime": 31.5152, "eval_samples_per_second": 205.71, "eval_steps_per_second": 2.158, "step": 214000 }, { "epoch": 1.55, "learning_rate": 4.527405285770082e-06, "loss": 2.7413, "step": 214100 }, { "epoch": 1.55, "learning_rate": 4.520172431251718e-06, "loss": 2.7408, "step": 214200 }, { "epoch": 1.55, "learning_rate": 4.5129395767333535e-06, "loss": 2.7456, "step": 214300 }, { "epoch": 1.55, "learning_rate": 4.50570672221499e-06, "loss": 2.741, "step": 214400 }, { "epoch": 1.55, "learning_rate": 4.498473867696626e-06, "loss": 2.7465, "step": 214500 }, { "epoch": 1.55, "learning_rate": 4.491241013178261e-06, "loss": 2.7435, "step": 214600 }, { "epoch": 1.55, "learning_rate": 4.484008158659897e-06, "loss": 2.7377, "step": 214700 }, { "epoch": 1.55, "learning_rate": 4.476775304141533e-06, "loss": 2.7381, "step": 214800 }, { "epoch": 1.55, "learning_rate": 4.469542449623168e-06, "loss": 2.7445, "step": 214900 }, { "epoch": 1.56, "learning_rate": 4.462309595104805e-06, "loss": 2.7416, "step": 215000 }, { "epoch": 1.56, "eval_accuracy": 0.4611487661546186, "eval_loss": 2.7614753246307373, "eval_runtime": 27.7512, "eval_samples_per_second": 233.612, "eval_steps_per_second": 2.45, "step": 215000 }, { "epoch": 1.56, "learning_rate": 4.4550767405864405e-06, "loss": 2.7414, "step": 215100 }, { "epoch": 1.56, "learning_rate": 4.447843886068076e-06, "loss": 2.7419, "step": 215200 }, { "epoch": 1.56, "learning_rate": 4.440611031549712e-06, "loss": 2.7466, "step": 215300 }, { "epoch": 1.56, "learning_rate": 4.4333781770313475e-06, "loss": 2.7416, "step": 215400 }, { "epoch": 1.56, "learning_rate": 4.426145322512983e-06, "loss": 2.7512, "step": 215500 }, { "epoch": 1.56, "learning_rate": 4.418984796539802e-06, "loss": 2.7472, "step": 215600 }, { "epoch": 1.56, "learning_rate": 4.411751942021439e-06, "loss": 2.7447, "step": 215700 }, { "epoch": 1.56, "learning_rate": 4.404519087503074e-06, "loss": 2.7415, "step": 215800 }, { "epoch": 1.56, "learning_rate": 4.39728623298471e-06, "loss": 2.7387, "step": 215900 }, { "epoch": 1.56, "learning_rate": 4.390053378466346e-06, "loss": 2.7434, "step": 216000 }, { "epoch": 1.56, "eval_accuracy": 0.4610798075207254, "eval_loss": 2.761385440826416, "eval_runtime": 27.9023, "eval_samples_per_second": 232.347, "eval_steps_per_second": 2.437, "step": 216000 }, { "epoch": 1.56, "learning_rate": 4.382820523947981e-06, "loss": 2.7392, "step": 216100 }, { "epoch": 1.56, "learning_rate": 4.375587669429617e-06, "loss": 2.7394, "step": 216200 }, { "epoch": 1.56, "learning_rate": 4.3683548149112535e-06, "loss": 2.7423, "step": 216300 }, { "epoch": 1.57, "learning_rate": 4.361121960392889e-06, "loss": 2.7389, "step": 216400 }, { "epoch": 1.57, "learning_rate": 4.353889105874525e-06, "loss": 2.7334, "step": 216500 }, { "epoch": 1.57, "learning_rate": 4.3466562513561605e-06, "loss": 2.7384, "step": 216600 }, { "epoch": 1.57, "learning_rate": 4.339423396837796e-06, "loss": 2.7432, "step": 216700 }, { "epoch": 1.57, "learning_rate": 4.332190542319432e-06, "loss": 2.7388, "step": 216800 }, { "epoch": 1.57, "learning_rate": 4.324957687801068e-06, "loss": 2.7417, "step": 216900 }, { "epoch": 1.57, "learning_rate": 4.317724833282704e-06, "loss": 2.7456, "step": 217000 }, { "epoch": 1.57, "eval_accuracy": 0.4610507723064546, "eval_loss": 2.7613608837127686, "eval_runtime": 32.1832, "eval_samples_per_second": 201.441, "eval_steps_per_second": 2.113, "step": 217000 }, { "epoch": 1.57, "learning_rate": 4.31049197876434e-06, "loss": 2.744, "step": 217100 }, { "epoch": 1.57, "learning_rate": 4.303259124245975e-06, "loss": 2.7446, "step": 217200 }, { "epoch": 1.57, "learning_rate": 4.296026269727611e-06, "loss": 2.7351, "step": 217300 }, { "epoch": 1.57, "learning_rate": 4.288793415209247e-06, "loss": 2.7413, "step": 217400 }, { "epoch": 1.57, "learning_rate": 4.281560560690883e-06, "loss": 2.7478, "step": 217500 }, { "epoch": 1.57, "learning_rate": 4.274327706172519e-06, "loss": 2.7409, "step": 217600 }, { "epoch": 1.57, "learning_rate": 4.267167180199338e-06, "loss": 2.731, "step": 217700 }, { "epoch": 1.58, "learning_rate": 4.259934325680974e-06, "loss": 2.7469, "step": 217800 }, { "epoch": 1.58, "learning_rate": 4.252701471162609e-06, "loss": 2.7392, "step": 217900 }, { "epoch": 1.58, "learning_rate": 4.245540945189429e-06, "loss": 2.7499, "step": 218000 }, { "epoch": 1.58, "eval_accuracy": 0.46106226541210343, "eval_loss": 2.7610652446746826, "eval_runtime": 29.5792, "eval_samples_per_second": 219.174, "eval_steps_per_second": 2.299, "step": 218000 }, { "epoch": 1.58, "learning_rate": 4.2383080906710645e-06, "loss": 2.7383, "step": 218100 }, { "epoch": 1.58, "learning_rate": 4.2310752361527e-06, "loss": 2.7398, "step": 218200 }, { "epoch": 1.58, "learning_rate": 4.223842381634336e-06, "loss": 2.7354, "step": 218300 }, { "epoch": 1.58, "learning_rate": 4.2166095271159715e-06, "loss": 2.7482, "step": 218400 }, { "epoch": 1.58, "learning_rate": 4.209376672597608e-06, "loss": 2.7427, "step": 218500 }, { "epoch": 1.58, "learning_rate": 4.202143818079244e-06, "loss": 2.7346, "step": 218600 }, { "epoch": 1.58, "learning_rate": 4.194910963560879e-06, "loss": 2.743, "step": 218700 }, { "epoch": 1.58, "learning_rate": 4.187678109042515e-06, "loss": 2.738, "step": 218800 }, { "epoch": 1.58, "learning_rate": 4.180445254524151e-06, "loss": 2.7347, "step": 218900 }, { "epoch": 1.58, "learning_rate": 4.173212400005786e-06, "loss": 2.744, "step": 219000 }, { "epoch": 1.58, "eval_accuracy": 0.46114574165313205, "eval_loss": 2.760906457901001, "eval_runtime": 27.8233, "eval_samples_per_second": 233.006, "eval_steps_per_second": 2.444, "step": 219000 }, { "epoch": 1.58, "learning_rate": 4.165979545487423e-06, "loss": 2.7415, "step": 219100 }, { "epoch": 1.59, "learning_rate": 4.158819019514242e-06, "loss": 2.7429, "step": 219200 }, { "epoch": 1.59, "learning_rate": 4.151586164995877e-06, "loss": 2.7381, "step": 219300 }, { "epoch": 1.59, "learning_rate": 4.144353310477514e-06, "loss": 2.7389, "step": 219400 }, { "epoch": 1.59, "learning_rate": 4.1371204559591496e-06, "loss": 2.7519, "step": 219500 }, { "epoch": 1.59, "learning_rate": 4.129887601440785e-06, "loss": 2.7505, "step": 219600 }, { "epoch": 1.59, "learning_rate": 4.122727075467605e-06, "loss": 2.7414, "step": 219700 }, { "epoch": 1.59, "learning_rate": 4.11549422094924e-06, "loss": 2.7272, "step": 219800 }, { "epoch": 1.59, "learning_rate": 4.1082613664308754e-06, "loss": 2.7408, "step": 219900 }, { "epoch": 1.59, "learning_rate": 4.101028511912511e-06, "loss": 2.7375, "step": 220000 }, { "epoch": 1.59, "eval_accuracy": 0.4611257799433208, "eval_loss": 2.760807514190674, "eval_runtime": 29.5284, "eval_samples_per_second": 219.552, "eval_steps_per_second": 2.303, "step": 220000 }, { "epoch": 1.59, "learning_rate": 4.093795657394148e-06, "loss": 2.7376, "step": 220100 }, { "epoch": 1.59, "learning_rate": 4.086562802875783e-06, "loss": 2.7406, "step": 220200 }, { "epoch": 1.59, "learning_rate": 4.079329948357419e-06, "loss": 2.7457, "step": 220300 }, { "epoch": 1.59, "learning_rate": 4.072097093839055e-06, "loss": 2.7377, "step": 220400 }, { "epoch": 1.59, "learning_rate": 4.06486423932069e-06, "loss": 2.7417, "step": 220500 }, { "epoch": 1.6, "learning_rate": 4.057631384802326e-06, "loss": 2.7429, "step": 220600 }, { "epoch": 1.6, "learning_rate": 4.050470858829146e-06, "loss": 2.7458, "step": 220700 }, { "epoch": 1.6, "learning_rate": 4.043238004310781e-06, "loss": 2.74, "step": 220800 }, { "epoch": 1.6, "learning_rate": 4.036005149792417e-06, "loss": 2.7355, "step": 220900 }, { "epoch": 1.6, "learning_rate": 4.0287722952740535e-06, "loss": 2.7428, "step": 221000 }, { "epoch": 1.6, "eval_accuracy": 0.4611263848436182, "eval_loss": 2.7606468200683594, "eval_runtime": 29.1245, "eval_samples_per_second": 222.596, "eval_steps_per_second": 2.335, "step": 221000 }, { "epoch": 1.6, "learning_rate": 4.021539440755689e-06, "loss": 2.7413, "step": 221100 }, { "epoch": 1.6, "learning_rate": 4.014306586237325e-06, "loss": 2.7345, "step": 221200 }, { "epoch": 1.6, "learning_rate": 4.0070737317189605e-06, "loss": 2.7442, "step": 221300 }, { "epoch": 1.6, "learning_rate": 3.999840877200596e-06, "loss": 2.7402, "step": 221400 }, { "epoch": 1.6, "learning_rate": 3.992608022682232e-06, "loss": 2.7399, "step": 221500 }, { "epoch": 1.6, "learning_rate": 3.985375168163868e-06, "loss": 2.7461, "step": 221600 }, { "epoch": 1.6, "learning_rate": 3.978142313645504e-06, "loss": 2.7288, "step": 221700 }, { "epoch": 1.6, "learning_rate": 3.97090945912714e-06, "loss": 2.7323, "step": 221800 }, { "epoch": 1.6, "learning_rate": 3.963676604608775e-06, "loss": 2.735, "step": 221900 }, { "epoch": 1.61, "learning_rate": 3.956443750090411e-06, "loss": 2.7442, "step": 222000 }, { "epoch": 1.61, "eval_accuracy": 0.4611021888317258, "eval_loss": 2.7605795860290527, "eval_runtime": 29.7064, "eval_samples_per_second": 218.236, "eval_steps_per_second": 2.289, "step": 222000 }, { "epoch": 1.61, "learning_rate": 3.949210895572047e-06, "loss": 2.7375, "step": 222100 }, { "epoch": 1.61, "learning_rate": 3.941978041053682e-06, "loss": 2.7365, "step": 222200 }, { "epoch": 1.61, "learning_rate": 3.934745186535318e-06, "loss": 2.7442, "step": 222300 }, { "epoch": 1.61, "learning_rate": 3.927512332016954e-06, "loss": 2.7405, "step": 222400 }, { "epoch": 1.61, "learning_rate": 3.920279477498589e-06, "loss": 2.7429, "step": 222500 }, { "epoch": 1.61, "learning_rate": 3.913046622980226e-06, "loss": 2.7361, "step": 222600 }, { "epoch": 1.61, "learning_rate": 3.9058137684618616e-06, "loss": 2.7376, "step": 222700 }, { "epoch": 1.61, "learning_rate": 3.8986532424886805e-06, "loss": 2.7357, "step": 222800 }, { "epoch": 1.61, "learning_rate": 3.891420387970317e-06, "loss": 2.7413, "step": 222900 }, { "epoch": 1.61, "learning_rate": 3.884187533451953e-06, "loss": 2.7395, "step": 223000 }, { "epoch": 1.61, "eval_accuracy": 0.46116146906086203, "eval_loss": 2.7603955268859863, "eval_runtime": 29.5556, "eval_samples_per_second": 219.349, "eval_steps_per_second": 2.301, "step": 223000 }, { "epoch": 1.61, "learning_rate": 3.876954678933588e-06, "loss": 2.7413, "step": 223100 }, { "epoch": 1.61, "learning_rate": 3.869721824415224e-06, "loss": 2.7495, "step": 223200 }, { "epoch": 1.62, "learning_rate": 3.86248896989686e-06, "loss": 2.7346, "step": 223300 }, { "epoch": 1.62, "learning_rate": 3.855256115378495e-06, "loss": 2.7434, "step": 223400 }, { "epoch": 1.62, "learning_rate": 3.848023260860132e-06, "loss": 2.7414, "step": 223500 }, { "epoch": 1.62, "learning_rate": 3.8407904063417675e-06, "loss": 2.7452, "step": 223600 }, { "epoch": 1.62, "learning_rate": 3.833557551823403e-06, "loss": 2.7334, "step": 223700 }, { "epoch": 1.62, "learning_rate": 3.826397025850223e-06, "loss": 2.734, "step": 223800 }, { "epoch": 1.62, "learning_rate": 3.8191641713318586e-06, "loss": 2.7417, "step": 223900 }, { "epoch": 1.62, "learning_rate": 3.8119313168134942e-06, "loss": 2.7445, "step": 224000 }, { "epoch": 1.62, "eval_accuracy": 0.4612249835920794, "eval_loss": 2.7602407932281494, "eval_runtime": 27.9042, "eval_samples_per_second": 232.331, "eval_steps_per_second": 2.437, "step": 224000 }, { "epoch": 1.62, "learning_rate": 3.80469846229513e-06, "loss": 2.7441, "step": 224100 }, { "epoch": 1.62, "learning_rate": 3.7974656077767656e-06, "loss": 2.7424, "step": 224200 }, { "epoch": 1.62, "learning_rate": 3.790232753258401e-06, "loss": 2.7458, "step": 224300 }, { "epoch": 1.62, "learning_rate": 3.782999898740037e-06, "loss": 2.7381, "step": 224400 }, { "epoch": 1.62, "learning_rate": 3.7757670442216726e-06, "loss": 2.7407, "step": 224500 }, { "epoch": 1.62, "learning_rate": 3.7685341897033082e-06, "loss": 2.7407, "step": 224600 }, { "epoch": 1.63, "learning_rate": 3.7613013351849443e-06, "loss": 2.7438, "step": 224700 }, { "epoch": 1.63, "learning_rate": 3.75406848066658e-06, "loss": 2.7364, "step": 224800 }, { "epoch": 1.63, "learning_rate": 3.7468356261482156e-06, "loss": 2.7412, "step": 224900 }, { "epoch": 1.63, "learning_rate": 3.7396027716298517e-06, "loss": 2.7394, "step": 225000 }, { "epoch": 1.63, "eval_accuracy": 0.4611070280341043, "eval_loss": 2.760154962539673, "eval_runtime": 30.9249, "eval_samples_per_second": 209.637, "eval_steps_per_second": 2.199, "step": 225000 }, { "epoch": 1.63, "learning_rate": 3.732442245656671e-06, "loss": 2.744, "step": 225100 }, { "epoch": 1.63, "learning_rate": 3.7252093911383067e-06, "loss": 2.7371, "step": 225200 }, { "epoch": 1.63, "learning_rate": 3.717976536619943e-06, "loss": 2.7374, "step": 225300 }, { "epoch": 1.63, "learning_rate": 3.7107436821015785e-06, "loss": 2.7375, "step": 225400 }, { "epoch": 1.63, "learning_rate": 3.703510827583214e-06, "loss": 2.7368, "step": 225500 }, { "epoch": 1.63, "learning_rate": 3.6962779730648502e-06, "loss": 2.7399, "step": 225600 }, { "epoch": 1.63, "learning_rate": 3.689045118546486e-06, "loss": 2.7354, "step": 225700 }, { "epoch": 1.63, "learning_rate": 3.6818122640281216e-06, "loss": 2.7475, "step": 225800 }, { "epoch": 1.63, "learning_rate": 3.6745794095097577e-06, "loss": 2.749, "step": 225900 }, { "epoch": 1.63, "learning_rate": 3.6673465549913933e-06, "loss": 2.7403, "step": 226000 }, { "epoch": 1.63, "eval_accuracy": 0.46118143077067325, "eval_loss": 2.7599334716796875, "eval_runtime": 29.7721, "eval_samples_per_second": 217.754, "eval_steps_per_second": 2.284, "step": 226000 }, { "epoch": 1.64, "learning_rate": 3.6601860290182126e-06, "loss": 2.7356, "step": 226100 }, { "epoch": 1.64, "learning_rate": 3.6529531744998487e-06, "loss": 2.7327, "step": 226200 }, { "epoch": 1.64, "learning_rate": 3.6457203199814844e-06, "loss": 2.7394, "step": 226300 }, { "epoch": 1.64, "learning_rate": 3.6384874654631196e-06, "loss": 2.7427, "step": 226400 }, { "epoch": 1.64, "learning_rate": 3.6312546109447553e-06, "loss": 2.7417, "step": 226500 }, { "epoch": 1.64, "learning_rate": 3.6240217564263914e-06, "loss": 2.7395, "step": 226600 }, { "epoch": 1.64, "learning_rate": 3.616788901908027e-06, "loss": 2.7454, "step": 226700 }, { "epoch": 1.64, "learning_rate": 3.6095560473896627e-06, "loss": 2.737, "step": 226800 }, { "epoch": 1.64, "learning_rate": 3.602323192871299e-06, "loss": 2.7436, "step": 226900 }, { "epoch": 1.64, "learning_rate": 3.5950903383529345e-06, "loss": 2.738, "step": 227000 }, { "epoch": 1.64, "eval_accuracy": 0.46119715817840323, "eval_loss": 2.7598636150360107, "eval_runtime": 27.9537, "eval_samples_per_second": 231.919, "eval_steps_per_second": 2.433, "step": 227000 }, { "epoch": 1.64, "learning_rate": 3.58785748383457e-06, "loss": 2.7383, "step": 227100 }, { "epoch": 1.64, "learning_rate": 3.5806246293162062e-06, "loss": 2.7407, "step": 227200 }, { "epoch": 1.64, "learning_rate": 3.573391774797842e-06, "loss": 2.7394, "step": 227300 }, { "epoch": 1.64, "learning_rate": 3.5661589202794776e-06, "loss": 2.7449, "step": 227400 }, { "epoch": 1.65, "learning_rate": 3.5589260657611137e-06, "loss": 2.7449, "step": 227500 }, { "epoch": 1.65, "learning_rate": 3.5516932112427493e-06, "loss": 2.7383, "step": 227600 }, { "epoch": 1.65, "learning_rate": 3.5445326852695686e-06, "loss": 2.7364, "step": 227700 }, { "epoch": 1.65, "learning_rate": 3.5372998307512047e-06, "loss": 2.7375, "step": 227800 }, { "epoch": 1.65, "learning_rate": 3.5300669762328404e-06, "loss": 2.7405, "step": 227900 }, { "epoch": 1.65, "learning_rate": 3.522834121714476e-06, "loss": 2.7332, "step": 228000 }, { "epoch": 1.65, "eval_accuracy": 0.4612788197185399, "eval_loss": 2.759690284729004, "eval_runtime": 32.3658, "eval_samples_per_second": 200.304, "eval_steps_per_second": 2.101, "step": 228000 }, { "epoch": 1.65, "learning_rate": 3.515601267196112e-06, "loss": 2.7336, "step": 228100 }, { "epoch": 1.65, "learning_rate": 3.508368412677748e-06, "loss": 2.73, "step": 228200 }, { "epoch": 1.65, "learning_rate": 3.5011355581593835e-06, "loss": 2.7353, "step": 228300 }, { "epoch": 1.65, "learning_rate": 3.4939027036410196e-06, "loss": 2.7394, "step": 228400 }, { "epoch": 1.65, "learning_rate": 3.4866698491226552e-06, "loss": 2.7291, "step": 228500 }, { "epoch": 1.65, "learning_rate": 3.479436994604291e-06, "loss": 2.7395, "step": 228600 }, { "epoch": 1.65, "learning_rate": 3.472204140085926e-06, "loss": 2.7351, "step": 228700 }, { "epoch": 1.65, "learning_rate": 3.4649712855675622e-06, "loss": 2.7361, "step": 228800 }, { "epoch": 1.66, "learning_rate": 3.457738431049198e-06, "loss": 2.7387, "step": 228900 }, { "epoch": 1.66, "learning_rate": 3.4505055765308336e-06, "loss": 2.7388, "step": 229000 }, { "epoch": 1.66, "eval_accuracy": 0.46129575692686453, "eval_loss": 2.759584903717041, "eval_runtime": 29.3954, "eval_samples_per_second": 220.545, "eval_steps_per_second": 2.313, "step": 229000 }, { "epoch": 1.66, "learning_rate": 3.4432727220124697e-06, "loss": 2.7428, "step": 229100 }, { "epoch": 1.66, "learning_rate": 3.4360398674941053e-06, "loss": 2.7331, "step": 229200 }, { "epoch": 1.66, "learning_rate": 3.428807012975741e-06, "loss": 2.7316, "step": 229300 }, { "epoch": 1.66, "learning_rate": 3.421574158457377e-06, "loss": 2.7302, "step": 229400 }, { "epoch": 1.66, "learning_rate": 3.4143413039390128e-06, "loss": 2.7417, "step": 229500 }, { "epoch": 1.66, "learning_rate": 3.4071084494206484e-06, "loss": 2.7427, "step": 229600 }, { "epoch": 1.66, "learning_rate": 3.3998755949022845e-06, "loss": 2.7407, "step": 229700 }, { "epoch": 1.66, "learning_rate": 3.39264274038392e-06, "loss": 2.7359, "step": 229800 }, { "epoch": 1.66, "learning_rate": 3.385409885865556e-06, "loss": 2.7396, "step": 229900 }, { "epoch": 1.66, "learning_rate": 3.3782493598923756e-06, "loss": 2.743, "step": 230000 }, { "epoch": 1.66, "eval_accuracy": 0.46134172934946, "eval_loss": 2.7594590187072754, "eval_runtime": 29.5124, "eval_samples_per_second": 219.67, "eval_steps_per_second": 2.304, "step": 230000 }, { "epoch": 1.66, "learning_rate": 3.3710165053740113e-06, "loss": 2.7326, "step": 230100 }, { "epoch": 1.66, "learning_rate": 3.363783650855647e-06, "loss": 2.7364, "step": 230200 }, { "epoch": 1.67, "learning_rate": 3.356550796337283e-06, "loss": 2.7352, "step": 230300 }, { "epoch": 1.67, "learning_rate": 3.3493179418189187e-06, "loss": 2.7397, "step": 230400 }, { "epoch": 1.67, "learning_rate": 3.3420850873005543e-06, "loss": 2.7393, "step": 230500 }, { "epoch": 1.67, "learning_rate": 3.3348522327821904e-06, "loss": 2.744, "step": 230600 }, { "epoch": 1.67, "learning_rate": 3.327619378263826e-06, "loss": 2.7422, "step": 230700 }, { "epoch": 1.67, "learning_rate": 3.3203865237454618e-06, "loss": 2.7439, "step": 230800 }, { "epoch": 1.67, "learning_rate": 3.3132259977722807e-06, "loss": 2.746, "step": 230900 }, { "epoch": 1.67, "learning_rate": 3.3059931432539168e-06, "loss": 2.7368, "step": 231000 }, { "epoch": 1.67, "eval_accuracy": 0.4612715609149722, "eval_loss": 2.759273052215576, "eval_runtime": 29.4403, "eval_samples_per_second": 220.209, "eval_steps_per_second": 2.31, "step": 231000 }, { "epoch": 1.67, "learning_rate": 3.2987602887355524e-06, "loss": 2.7397, "step": 231100 }, { "epoch": 1.67, "learning_rate": 3.291527434217188e-06, "loss": 2.7324, "step": 231200 }, { "epoch": 1.67, "learning_rate": 3.284294579698824e-06, "loss": 2.7456, "step": 231300 }, { "epoch": 1.67, "learning_rate": 3.27706172518046e-06, "loss": 2.731, "step": 231400 }, { "epoch": 1.67, "learning_rate": 3.2698288706620955e-06, "loss": 2.7319, "step": 231500 }, { "epoch": 1.68, "learning_rate": 3.2625960161437316e-06, "loss": 2.7492, "step": 231600 }, { "epoch": 1.68, "learning_rate": 3.2553631616253673e-06, "loss": 2.741, "step": 231700 }, { "epoch": 1.68, "learning_rate": 3.248130307107003e-06, "loss": 2.7384, "step": 231800 }, { "epoch": 1.68, "learning_rate": 3.240897452588639e-06, "loss": 2.7307, "step": 231900 }, { "epoch": 1.68, "learning_rate": 3.2336645980702747e-06, "loss": 2.7426, "step": 232000 }, { "epoch": 1.68, "eval_accuracy": 0.4613774184670012, "eval_loss": 2.7592265605926514, "eval_runtime": 31.7025, "eval_samples_per_second": 204.495, "eval_steps_per_second": 2.145, "step": 232000 }, { "epoch": 1.68, "learning_rate": 3.2264317435519104e-06, "loss": 2.7339, "step": 232100 }, { "epoch": 1.68, "learning_rate": 3.2191988890335464e-06, "loss": 2.7413, "step": 232200 }, { "epoch": 1.68, "learning_rate": 3.211966034515182e-06, "loss": 2.7413, "step": 232300 }, { "epoch": 1.68, "learning_rate": 3.2047331799968178e-06, "loss": 2.7421, "step": 232400 }, { "epoch": 1.68, "learning_rate": 3.197500325478454e-06, "loss": 2.7387, "step": 232500 }, { "epoch": 1.68, "learning_rate": 3.1902674709600895e-06, "loss": 2.7344, "step": 232600 }, { "epoch": 1.68, "learning_rate": 3.183034616441725e-06, "loss": 2.7488, "step": 232700 }, { "epoch": 1.68, "learning_rate": 3.1758017619233613e-06, "loss": 2.7391, "step": 232800 }, { "epoch": 1.68, "learning_rate": 3.168568907404997e-06, "loss": 2.7362, "step": 232900 }, { "epoch": 1.69, "learning_rate": 3.1614083814318163e-06, "loss": 2.7332, "step": 233000 }, { "epoch": 1.69, "eval_accuracy": 0.4614445624000024, "eval_loss": 2.7591168880462646, "eval_runtime": 30.2871, "eval_samples_per_second": 214.052, "eval_steps_per_second": 2.245, "step": 233000 }, { "epoch": 1.69, "learning_rate": 3.1541755269134524e-06, "loss": 2.7441, "step": 233100 }, { "epoch": 1.69, "learning_rate": 3.1469426723950876e-06, "loss": 2.74, "step": 233200 }, { "epoch": 1.69, "learning_rate": 3.1397098178767233e-06, "loss": 2.7379, "step": 233300 }, { "epoch": 1.69, "learning_rate": 3.132476963358359e-06, "loss": 2.7406, "step": 233400 }, { "epoch": 1.69, "learning_rate": 3.1253164373851787e-06, "loss": 2.7385, "step": 233500 }, { "epoch": 1.69, "learning_rate": 3.1180835828668143e-06, "loss": 2.7332, "step": 233600 }, { "epoch": 1.69, "learning_rate": 3.11085072834845e-06, "loss": 2.7396, "step": 233700 }, { "epoch": 1.69, "learning_rate": 3.103617873830086e-06, "loss": 2.743, "step": 233800 }, { "epoch": 1.69, "learning_rate": 3.0963850193117218e-06, "loss": 2.7363, "step": 233900 }, { "epoch": 1.69, "learning_rate": 3.0891521647933574e-06, "loss": 2.7413, "step": 234000 }, { "epoch": 1.69, "eval_accuracy": 0.4613556420562981, "eval_loss": 2.7590200901031494, "eval_runtime": 29.9923, "eval_samples_per_second": 216.155, "eval_steps_per_second": 2.267, "step": 234000 }, { "epoch": 1.69, "learning_rate": 3.0819193102749935e-06, "loss": 2.7473, "step": 234100 }, { "epoch": 1.69, "learning_rate": 3.074686455756629e-06, "loss": 2.7374, "step": 234200 }, { "epoch": 1.69, "learning_rate": 3.067453601238265e-06, "loss": 2.7358, "step": 234300 }, { "epoch": 1.7, "learning_rate": 3.060220746719901e-06, "loss": 2.7322, "step": 234400 }, { "epoch": 1.7, "learning_rate": 3.0529878922015366e-06, "loss": 2.7412, "step": 234500 }, { "epoch": 1.7, "learning_rate": 3.0457550376831723e-06, "loss": 2.7346, "step": 234600 }, { "epoch": 1.7, "learning_rate": 3.0385221831648084e-06, "loss": 2.7395, "step": 234700 }, { "epoch": 1.7, "learning_rate": 3.031289328646444e-06, "loss": 2.7384, "step": 234800 }, { "epoch": 1.7, "learning_rate": 3.0240564741280797e-06, "loss": 2.7408, "step": 234900 }, { "epoch": 1.7, "learning_rate": 3.016823619609716e-06, "loss": 2.735, "step": 235000 }, { "epoch": 1.7, "eval_accuracy": 0.46131692843727035, "eval_loss": 2.7589235305786133, "eval_runtime": 29.8027, "eval_samples_per_second": 217.53, "eval_steps_per_second": 2.282, "step": 235000 }, { "epoch": 1.7, "learning_rate": 3.0095907650913515e-06, "loss": 2.7375, "step": 235100 }, { "epoch": 1.7, "learning_rate": 3.002357910572987e-06, "loss": 2.7379, "step": 235200 }, { "epoch": 1.7, "learning_rate": 2.9951250560546232e-06, "loss": 2.734, "step": 235300 }, { "epoch": 1.7, "learning_rate": 2.987892201536259e-06, "loss": 2.743, "step": 235400 }, { "epoch": 1.7, "learning_rate": 2.980659347017894e-06, "loss": 2.7419, "step": 235500 }, { "epoch": 1.7, "learning_rate": 2.97342649249953e-06, "loss": 2.743, "step": 235600 }, { "epoch": 1.7, "learning_rate": 2.9662659665263495e-06, "loss": 2.7387, "step": 235700 }, { "epoch": 1.71, "learning_rate": 2.959033112007985e-06, "loss": 2.7431, "step": 235800 }, { "epoch": 1.71, "learning_rate": 2.951800257489621e-06, "loss": 2.7385, "step": 235900 }, { "epoch": 1.71, "learning_rate": 2.9446397315164406e-06, "loss": 2.7393, "step": 236000 }, { "epoch": 1.71, "eval_accuracy": 0.46144032809792124, "eval_loss": 2.7588789463043213, "eval_runtime": 29.5655, "eval_samples_per_second": 219.276, "eval_steps_per_second": 2.3, "step": 236000 }, { "epoch": 1.71, "learning_rate": 2.9374068769980763e-06, "loss": 2.7405, "step": 236100 }, { "epoch": 1.71, "learning_rate": 2.930174022479712e-06, "loss": 2.7432, "step": 236200 }, { "epoch": 1.71, "learning_rate": 2.922941167961348e-06, "loss": 2.7447, "step": 236300 }, { "epoch": 1.71, "learning_rate": 2.9157083134429837e-06, "loss": 2.7412, "step": 236400 }, { "epoch": 1.71, "learning_rate": 2.9084754589246194e-06, "loss": 2.7406, "step": 236500 }, { "epoch": 1.71, "learning_rate": 2.9012426044062555e-06, "loss": 2.7361, "step": 236600 }, { "epoch": 1.71, "learning_rate": 2.894009749887891e-06, "loss": 2.7427, "step": 236700 }, { "epoch": 1.71, "learning_rate": 2.8867768953695268e-06, "loss": 2.732, "step": 236800 }, { "epoch": 1.71, "learning_rate": 2.879544040851163e-06, "loss": 2.7359, "step": 236900 }, { "epoch": 1.71, "learning_rate": 2.8723111863327985e-06, "loss": 2.7382, "step": 237000 }, { "epoch": 1.71, "eval_accuracy": 0.4614735976142732, "eval_loss": 2.7587404251098633, "eval_runtime": 29.6674, "eval_samples_per_second": 218.523, "eval_steps_per_second": 2.292, "step": 237000 }, { "epoch": 1.71, "learning_rate": 2.865078331814434e-06, "loss": 2.7361, "step": 237100 }, { "epoch": 1.72, "learning_rate": 2.8578454772960703e-06, "loss": 2.7353, "step": 237200 }, { "epoch": 1.72, "learning_rate": 2.850612622777706e-06, "loss": 2.7453, "step": 237300 }, { "epoch": 1.72, "learning_rate": 2.8433797682593416e-06, "loss": 2.7412, "step": 237400 }, { "epoch": 1.72, "learning_rate": 2.8361469137409773e-06, "loss": 2.7355, "step": 237500 }, { "epoch": 1.72, "learning_rate": 2.828914059222613e-06, "loss": 2.7308, "step": 237600 }, { "epoch": 1.72, "learning_rate": 2.8216812047042486e-06, "loss": 2.7353, "step": 237700 }, { "epoch": 1.72, "learning_rate": 2.8144483501858843e-06, "loss": 2.7321, "step": 237800 }, { "epoch": 1.72, "learning_rate": 2.80721549566752e-06, "loss": 2.7433, "step": 237900 }, { "epoch": 1.72, "learning_rate": 2.8000549696943397e-06, "loss": 2.7403, "step": 238000 }, { "epoch": 1.72, "eval_accuracy": 0.46152561903984174, "eval_loss": 2.7587077617645264, "eval_runtime": 29.8439, "eval_samples_per_second": 217.231, "eval_steps_per_second": 2.279, "step": 238000 }, { "epoch": 1.72, "learning_rate": 2.7928221151759754e-06, "loss": 2.738, "step": 238100 }, { "epoch": 1.72, "learning_rate": 2.7855892606576115e-06, "loss": 2.7421, "step": 238200 }, { "epoch": 1.72, "learning_rate": 2.778356406139247e-06, "loss": 2.747, "step": 238300 }, { "epoch": 1.72, "learning_rate": 2.771123551620883e-06, "loss": 2.7361, "step": 238400 }, { "epoch": 1.73, "learning_rate": 2.763890697102519e-06, "loss": 2.7439, "step": 238500 }, { "epoch": 1.73, "learning_rate": 2.7566578425841545e-06, "loss": 2.7365, "step": 238600 }, { "epoch": 1.73, "learning_rate": 2.7494249880657902e-06, "loss": 2.744, "step": 238700 }, { "epoch": 1.73, "learning_rate": 2.742192133547426e-06, "loss": 2.7478, "step": 238800 }, { "epoch": 1.73, "learning_rate": 2.7350316075742456e-06, "loss": 2.7332, "step": 238900 }, { "epoch": 1.73, "learning_rate": 2.7277987530558813e-06, "loss": 2.7436, "step": 239000 }, { "epoch": 1.73, "eval_accuracy": 0.46152017493716596, "eval_loss": 2.7586169242858887, "eval_runtime": 29.5848, "eval_samples_per_second": 219.132, "eval_steps_per_second": 2.298, "step": 239000 }, { "epoch": 1.73, "learning_rate": 2.7205658985375174e-06, "loss": 2.7422, "step": 239100 }, { "epoch": 1.73, "learning_rate": 2.713333044019153e-06, "loss": 2.7439, "step": 239200 }, { "epoch": 1.73, "learning_rate": 2.7061001895007887e-06, "loss": 2.7457, "step": 239300 }, { "epoch": 1.73, "learning_rate": 2.698867334982425e-06, "loss": 2.7453, "step": 239400 }, { "epoch": 1.73, "learning_rate": 2.6916344804640605e-06, "loss": 2.7448, "step": 239500 }, { "epoch": 1.73, "learning_rate": 2.684401625945696e-06, "loss": 2.7451, "step": 239600 }, { "epoch": 1.73, "learning_rate": 2.677168771427332e-06, "loss": 2.75, "step": 239700 }, { "epoch": 1.73, "learning_rate": 2.669935916908967e-06, "loss": 2.7402, "step": 239800 }, { "epoch": 1.74, "learning_rate": 2.662703062390603e-06, "loss": 2.7313, "step": 239900 }, { "epoch": 1.74, "learning_rate": 2.655470207872239e-06, "loss": 2.7422, "step": 240000 }, { "epoch": 1.74, "eval_accuracy": 0.461479041716949, "eval_loss": 2.758490562438965, "eval_runtime": 30.0813, "eval_samples_per_second": 215.516, "eval_steps_per_second": 2.261, "step": 240000 }, { "epoch": 1.74, "learning_rate": 2.6482373533538745e-06, "loss": 2.7413, "step": 240100 }, { "epoch": 1.74, "learning_rate": 2.6410044988355106e-06, "loss": 2.7452, "step": 240200 }, { "epoch": 1.74, "learning_rate": 2.6337716443171462e-06, "loss": 2.7438, "step": 240300 }, { "epoch": 1.74, "learning_rate": 2.626538789798782e-06, "loss": 2.7333, "step": 240400 }, { "epoch": 1.74, "learning_rate": 2.619305935280418e-06, "loss": 2.7387, "step": 240500 }, { "epoch": 1.74, "learning_rate": 2.6120730807620536e-06, "loss": 2.7413, "step": 240600 }, { "epoch": 1.74, "learning_rate": 2.6048402262436893e-06, "loss": 2.7352, "step": 240700 }, { "epoch": 1.74, "learning_rate": 2.5976073717253254e-06, "loss": 2.7375, "step": 240800 }, { "epoch": 1.74, "learning_rate": 2.590374517206961e-06, "loss": 2.7369, "step": 240900 }, { "epoch": 1.74, "learning_rate": 2.5832139912337804e-06, "loss": 2.7257, "step": 241000 }, { "epoch": 1.74, "eval_accuracy": 0.4614288349922724, "eval_loss": 2.758410930633545, "eval_runtime": 30.0568, "eval_samples_per_second": 215.692, "eval_steps_per_second": 2.262, "step": 241000 }, { "epoch": 1.74, "learning_rate": 2.5759811367154165e-06, "loss": 2.738, "step": 241100 }, { "epoch": 1.74, "learning_rate": 2.568748282197052e-06, "loss": 2.7356, "step": 241200 }, { "epoch": 1.75, "learning_rate": 2.561515427678688e-06, "loss": 2.7441, "step": 241300 }, { "epoch": 1.75, "learning_rate": 2.554282573160324e-06, "loss": 2.7457, "step": 241400 }, { "epoch": 1.75, "learning_rate": 2.5470497186419596e-06, "loss": 2.7406, "step": 241500 }, { "epoch": 1.75, "learning_rate": 2.5398168641235952e-06, "loss": 2.7382, "step": 241600 }, { "epoch": 1.75, "learning_rate": 2.532656338150415e-06, "loss": 2.742, "step": 241700 }, { "epoch": 1.75, "learning_rate": 2.5254234836320506e-06, "loss": 2.7328, "step": 241800 }, { "epoch": 1.75, "learning_rate": 2.518190629113686e-06, "loss": 2.7437, "step": 241900 }, { "epoch": 1.75, "learning_rate": 2.5109577745953215e-06, "loss": 2.7351, "step": 242000 }, { "epoch": 1.75, "eval_accuracy": 0.461543766048761, "eval_loss": 2.758322238922119, "eval_runtime": 29.9387, "eval_samples_per_second": 216.543, "eval_steps_per_second": 2.271, "step": 242000 }, { "epoch": 1.75, "learning_rate": 2.5037249200769576e-06, "loss": 2.7423, "step": 242100 }, { "epoch": 1.75, "learning_rate": 2.4964920655585937e-06, "loss": 2.7486, "step": 242200 }, { "epoch": 1.75, "learning_rate": 2.4892592110402294e-06, "loss": 2.7439, "step": 242300 }, { "epoch": 1.75, "learning_rate": 2.482026356521865e-06, "loss": 2.7344, "step": 242400 }, { "epoch": 1.75, "learning_rate": 2.4747935020035007e-06, "loss": 2.7396, "step": 242500 }, { "epoch": 1.75, "learning_rate": 2.4675606474851364e-06, "loss": 2.7368, "step": 242600 }, { "epoch": 1.76, "learning_rate": 2.4603277929667725e-06, "loss": 2.7349, "step": 242700 }, { "epoch": 1.76, "learning_rate": 2.453094938448408e-06, "loss": 2.7427, "step": 242800 }, { "epoch": 1.76, "learning_rate": 2.445862083930044e-06, "loss": 2.7458, "step": 242900 }, { "epoch": 1.76, "learning_rate": 2.43862922941168e-06, "loss": 2.7391, "step": 243000 }, { "epoch": 1.76, "eval_accuracy": 0.4615431611484637, "eval_loss": 2.7582499980926514, "eval_runtime": 29.9454, "eval_samples_per_second": 216.494, "eval_steps_per_second": 2.271, "step": 243000 }, { "epoch": 1.76, "learning_rate": 2.4313963748933156e-06, "loss": 2.7428, "step": 243100 }, { "epoch": 1.76, "learning_rate": 2.4241635203749512e-06, "loss": 2.7424, "step": 243200 }, { "epoch": 1.76, "learning_rate": 2.4169306658565873e-06, "loss": 2.7412, "step": 243300 }, { "epoch": 1.76, "learning_rate": 2.409697811338223e-06, "loss": 2.7338, "step": 243400 }, { "epoch": 1.76, "learning_rate": 2.4024649568198587e-06, "loss": 2.7389, "step": 243500 }, { "epoch": 1.76, "learning_rate": 2.3952321023014948e-06, "loss": 2.7363, "step": 243600 }, { "epoch": 1.76, "learning_rate": 2.3879992477831304e-06, "loss": 2.7472, "step": 243700 }, { "epoch": 1.76, "learning_rate": 2.3808387218099497e-06, "loss": 2.7422, "step": 243800 }, { "epoch": 1.76, "learning_rate": 2.3736058672915854e-06, "loss": 2.7432, "step": 243900 }, { "epoch": 1.76, "learning_rate": 2.366373012773221e-06, "loss": 2.7495, "step": 244000 }, { "epoch": 1.76, "eval_accuracy": 0.4615492101514368, "eval_loss": 2.758117437362671, "eval_runtime": 29.5895, "eval_samples_per_second": 219.098, "eval_steps_per_second": 2.298, "step": 244000 }, { "epoch": 1.77, "learning_rate": 2.359140158254857e-06, "loss": 2.7379, "step": 244100 }, { "epoch": 1.77, "learning_rate": 2.351907303736493e-06, "loss": 2.736, "step": 244200 }, { "epoch": 1.77, "learning_rate": 2.3446744492181285e-06, "loss": 2.7374, "step": 244300 }, { "epoch": 1.77, "learning_rate": 2.3374415946997646e-06, "loss": 2.7352, "step": 244400 }, { "epoch": 1.77, "learning_rate": 2.3302087401814002e-06, "loss": 2.7366, "step": 244500 }, { "epoch": 1.77, "learning_rate": 2.322975885663036e-06, "loss": 2.742, "step": 244600 }, { "epoch": 1.77, "learning_rate": 2.315743031144672e-06, "loss": 2.7376, "step": 244700 }, { "epoch": 1.77, "learning_rate": 2.3085101766263077e-06, "loss": 2.7423, "step": 244800 }, { "epoch": 1.77, "learning_rate": 2.3012773221079433e-06, "loss": 2.7357, "step": 244900 }, { "epoch": 1.77, "learning_rate": 2.294044467589579e-06, "loss": 2.7399, "step": 245000 }, { "epoch": 1.77, "eval_accuracy": 0.4614409329982186, "eval_loss": 2.7580342292785645, "eval_runtime": 31.4458, "eval_samples_per_second": 206.164, "eval_steps_per_second": 2.162, "step": 245000 }, { "epoch": 1.77, "learning_rate": 2.2868116130712147e-06, "loss": 2.7369, "step": 245100 }, { "epoch": 1.77, "learning_rate": 2.2795787585528508e-06, "loss": 2.7372, "step": 245200 }, { "epoch": 1.77, "learning_rate": 2.2723459040344864e-06, "loss": 2.7426, "step": 245300 }, { "epoch": 1.77, "learning_rate": 2.2651853780613057e-06, "loss": 2.742, "step": 245400 }, { "epoch": 1.78, "learning_rate": 2.257952523542942e-06, "loss": 2.7461, "step": 245500 }, { "epoch": 1.78, "learning_rate": 2.2507196690245775e-06, "loss": 2.7337, "step": 245600 }, { "epoch": 1.78, "learning_rate": 2.243486814506213e-06, "loss": 2.7397, "step": 245700 }, { "epoch": 1.78, "learning_rate": 2.2362539599878493e-06, "loss": 2.7301, "step": 245800 }, { "epoch": 1.78, "learning_rate": 2.2290211054694845e-06, "loss": 2.7431, "step": 245900 }, { "epoch": 1.78, "learning_rate": 2.2217882509511206e-06, "loss": 2.7435, "step": 246000 }, { "epoch": 1.78, "eval_accuracy": 0.4615564689550045, "eval_loss": 2.757960081100464, "eval_runtime": 29.7199, "eval_samples_per_second": 218.136, "eval_steps_per_second": 2.288, "step": 246000 }, { "epoch": 1.78, "learning_rate": 2.2145553964327563e-06, "loss": 2.739, "step": 246100 }, { "epoch": 1.78, "learning_rate": 2.2073948704595756e-06, "loss": 2.7395, "step": 246200 }, { "epoch": 1.78, "learning_rate": 2.2001620159412117e-06, "loss": 2.7303, "step": 246300 }, { "epoch": 1.78, "learning_rate": 2.1929291614228473e-06, "loss": 2.7363, "step": 246400 }, { "epoch": 1.78, "learning_rate": 2.185696306904483e-06, "loss": 2.7318, "step": 246500 }, { "epoch": 1.78, "learning_rate": 2.178463452386119e-06, "loss": 2.7335, "step": 246600 }, { "epoch": 1.78, "learning_rate": 2.1712305978677548e-06, "loss": 2.7384, "step": 246700 }, { "epoch": 1.79, "learning_rate": 2.1639977433493904e-06, "loss": 2.7412, "step": 246800 }, { "epoch": 1.79, "learning_rate": 2.1567648888310265e-06, "loss": 2.7449, "step": 246900 }, { "epoch": 1.79, "learning_rate": 2.1495320343126617e-06, "loss": 2.7414, "step": 247000 }, { "epoch": 1.79, "eval_accuracy": 0.46152924844162563, "eval_loss": 2.7578506469726562, "eval_runtime": 30.2105, "eval_samples_per_second": 214.595, "eval_steps_per_second": 2.251, "step": 247000 }, { "epoch": 1.79, "learning_rate": 2.142299179794298e-06, "loss": 2.7373, "step": 247100 }, { "epoch": 1.79, "learning_rate": 2.1350663252759335e-06, "loss": 2.7383, "step": 247200 }, { "epoch": 1.79, "learning_rate": 2.127833470757569e-06, "loss": 2.7326, "step": 247300 }, { "epoch": 1.79, "learning_rate": 2.1206006162392053e-06, "loss": 2.7407, "step": 247400 }, { "epoch": 1.79, "learning_rate": 2.113367761720841e-06, "loss": 2.7294, "step": 247500 }, { "epoch": 1.79, "learning_rate": 2.1061349072024766e-06, "loss": 2.7417, "step": 247600 }, { "epoch": 1.79, "learning_rate": 2.0989020526841127e-06, "loss": 2.7314, "step": 247700 }, { "epoch": 1.79, "learning_rate": 2.0916691981657484e-06, "loss": 2.736, "step": 247800 }, { "epoch": 1.79, "learning_rate": 2.084436343647384e-06, "loss": 2.7482, "step": 247900 }, { "epoch": 1.79, "learning_rate": 2.07720348912902e-06, "loss": 2.7478, "step": 248000 }, { "epoch": 1.79, "eval_accuracy": 0.4615903433716538, "eval_loss": 2.7578227519989014, "eval_runtime": 29.6353, "eval_samples_per_second": 218.759, "eval_steps_per_second": 2.295, "step": 248000 }, { "epoch": 1.79, "learning_rate": 2.0699706346106558e-06, "loss": 2.7368, "step": 248100 }, { "epoch": 1.8, "learning_rate": 2.0627377800922914e-06, "loss": 2.735, "step": 248200 }, { "epoch": 1.8, "learning_rate": 2.055504925573927e-06, "loss": 2.7335, "step": 248300 }, { "epoch": 1.8, "learning_rate": 2.0482720710555628e-06, "loss": 2.7429, "step": 248400 }, { "epoch": 1.8, "learning_rate": 2.041039216537199e-06, "loss": 2.7418, "step": 248500 }, { "epoch": 1.8, "learning_rate": 2.0338063620188345e-06, "loss": 2.7387, "step": 248600 }, { "epoch": 1.8, "learning_rate": 2.02657350750047e-06, "loss": 2.7381, "step": 248700 }, { "epoch": 1.8, "learning_rate": 2.0193406529821063e-06, "loss": 2.7433, "step": 248800 }, { "epoch": 1.8, "learning_rate": 2.012107798463742e-06, "loss": 2.7437, "step": 248900 }, { "epoch": 1.8, "learning_rate": 2.0048749439453776e-06, "loss": 2.7299, "step": 249000 }, { "epoch": 1.8, "eval_accuracy": 0.4615842943686807, "eval_loss": 2.7576780319213867, "eval_runtime": 30.3855, "eval_samples_per_second": 213.358, "eval_steps_per_second": 2.238, "step": 249000 }, { "epoch": 1.8, "learning_rate": 1.9976420894270133e-06, "loss": 2.74, "step": 249100 }, { "epoch": 1.8, "learning_rate": 1.990481563453833e-06, "loss": 2.7341, "step": 249200 }, { "epoch": 1.8, "learning_rate": 1.9833210374806523e-06, "loss": 2.7405, "step": 249300 }, { "epoch": 1.8, "learning_rate": 1.976088182962288e-06, "loss": 2.7356, "step": 249400 }, { "epoch": 1.8, "learning_rate": 1.9689276569891073e-06, "loss": 2.7424, "step": 249500 }, { "epoch": 1.81, "learning_rate": 1.9616948024707434e-06, "loss": 2.7359, "step": 249600 }, { "epoch": 1.81, "learning_rate": 1.954461947952379e-06, "loss": 2.7368, "step": 249700 }, { "epoch": 1.81, "learning_rate": 1.9472290934340148e-06, "loss": 2.7339, "step": 249800 }, { "epoch": 1.81, "learning_rate": 1.939996238915651e-06, "loss": 2.7452, "step": 249900 }, { "epoch": 1.81, "learning_rate": 1.9327633843972865e-06, "loss": 2.7401, "step": 250000 }, { "epoch": 1.81, "eval_accuracy": 0.4615576787555991, "eval_loss": 2.7575795650482178, "eval_runtime": 30.2185, "eval_samples_per_second": 214.538, "eval_steps_per_second": 2.25, "step": 250000 }, { "epoch": 1.81, "learning_rate": 1.925530529878922e-06, "loss": 2.7378, "step": 250100 }, { "epoch": 1.81, "learning_rate": 1.918297675360558e-06, "loss": 2.7439, "step": 250200 }, { "epoch": 1.81, "learning_rate": 1.9110648208421935e-06, "loss": 2.7375, "step": 250300 }, { "epoch": 1.81, "learning_rate": 1.9038319663238294e-06, "loss": 2.7377, "step": 250400 }, { "epoch": 1.81, "learning_rate": 1.8965991118054653e-06, "loss": 2.7367, "step": 250500 }, { "epoch": 1.81, "learning_rate": 1.8893662572871011e-06, "loss": 2.7409, "step": 250600 }, { "epoch": 1.81, "learning_rate": 1.8821334027687368e-06, "loss": 2.7352, "step": 250700 }, { "epoch": 1.81, "learning_rate": 1.8749005482503727e-06, "loss": 2.7421, "step": 250800 }, { "epoch": 1.81, "learning_rate": 1.8676676937320086e-06, "loss": 2.7286, "step": 250900 }, { "epoch": 1.82, "learning_rate": 1.8604348392136442e-06, "loss": 2.7395, "step": 251000 }, { "epoch": 1.82, "eval_accuracy": 0.4616157491841407, "eval_loss": 2.757535219192505, "eval_runtime": 29.6145, "eval_samples_per_second": 218.913, "eval_steps_per_second": 2.296, "step": 251000 }, { "epoch": 1.82, "learning_rate": 1.8532019846952801e-06, "loss": 2.734, "step": 251100 }, { "epoch": 1.82, "learning_rate": 1.8459691301769158e-06, "loss": 2.7448, "step": 251200 }, { "epoch": 1.82, "learning_rate": 1.8387362756585517e-06, "loss": 2.7367, "step": 251300 }, { "epoch": 1.82, "learning_rate": 1.8315034211401871e-06, "loss": 2.7354, "step": 251400 }, { "epoch": 1.82, "learning_rate": 1.824270566621823e-06, "loss": 2.7409, "step": 251500 }, { "epoch": 1.82, "learning_rate": 1.8171100406486425e-06, "loss": 2.7331, "step": 251600 }, { "epoch": 1.82, "learning_rate": 1.8098771861302784e-06, "loss": 2.7325, "step": 251700 }, { "epoch": 1.82, "learning_rate": 1.802644331611914e-06, "loss": 2.7439, "step": 251800 }, { "epoch": 1.82, "learning_rate": 1.79541147709355e-06, "loss": 2.7325, "step": 251900 }, { "epoch": 1.82, "learning_rate": 1.7881786225751856e-06, "loss": 2.7399, "step": 252000 }, { "epoch": 1.82, "eval_accuracy": 0.46158852867076183, "eval_loss": 2.757430076599121, "eval_runtime": 29.3317, "eval_samples_per_second": 221.024, "eval_steps_per_second": 2.318, "step": 252000 }, { "epoch": 1.82, "learning_rate": 1.7809457680568215e-06, "loss": 2.744, "step": 252100 }, { "epoch": 1.82, "learning_rate": 1.7737129135384574e-06, "loss": 2.7365, "step": 252200 }, { "epoch": 1.82, "learning_rate": 1.766480059020093e-06, "loss": 2.7367, "step": 252300 }, { "epoch": 1.83, "learning_rate": 1.759247204501729e-06, "loss": 2.7402, "step": 252400 }, { "epoch": 1.83, "learning_rate": 1.7520143499833644e-06, "loss": 2.74, "step": 252500 }, { "epoch": 1.83, "learning_rate": 1.7447814954650002e-06, "loss": 2.7433, "step": 252600 }, { "epoch": 1.83, "learning_rate": 1.7376209694918198e-06, "loss": 2.7307, "step": 252700 }, { "epoch": 1.83, "learning_rate": 1.7303881149734556e-06, "loss": 2.7333, "step": 252800 }, { "epoch": 1.83, "learning_rate": 1.7231552604550913e-06, "loss": 2.7416, "step": 252900 }, { "epoch": 1.83, "learning_rate": 1.7159224059367272e-06, "loss": 2.7413, "step": 253000 }, { "epoch": 1.83, "eval_accuracy": 0.4616139344832488, "eval_loss": 2.757355213165283, "eval_runtime": 29.4627, "eval_samples_per_second": 220.041, "eval_steps_per_second": 2.308, "step": 253000 }, { "epoch": 1.83, "learning_rate": 1.7086895514183629e-06, "loss": 2.7397, "step": 253100 }, { "epoch": 1.83, "learning_rate": 1.7014566968999987e-06, "loss": 2.7499, "step": 253200 }, { "epoch": 1.83, "learning_rate": 1.6942238423816346e-06, "loss": 2.7391, "step": 253300 }, { "epoch": 1.83, "learning_rate": 1.6869909878632703e-06, "loss": 2.7351, "step": 253400 }, { "epoch": 1.83, "learning_rate": 1.6797581333449062e-06, "loss": 2.7421, "step": 253500 }, { "epoch": 1.83, "learning_rate": 1.6725252788265416e-06, "loss": 2.7468, "step": 253600 }, { "epoch": 1.83, "learning_rate": 1.6652924243081775e-06, "loss": 2.7392, "step": 253700 }, { "epoch": 1.84, "learning_rate": 1.6580595697898134e-06, "loss": 2.7259, "step": 253800 }, { "epoch": 1.84, "learning_rate": 1.650899043816633e-06, "loss": 2.7404, "step": 253900 }, { "epoch": 1.84, "learning_rate": 1.6436661892982686e-06, "loss": 2.7294, "step": 254000 }, { "epoch": 1.84, "eval_accuracy": 0.4616357108939519, "eval_loss": 2.757269859313965, "eval_runtime": 30.4439, "eval_samples_per_second": 212.949, "eval_steps_per_second": 2.234, "step": 254000 }, { "epoch": 1.84, "learning_rate": 1.6364333347799044e-06, "loss": 2.74, "step": 254100 }, { "epoch": 1.84, "learning_rate": 1.6292004802615401e-06, "loss": 2.7326, "step": 254200 }, { "epoch": 1.84, "learning_rate": 1.621967625743176e-06, "loss": 2.7434, "step": 254300 }, { "epoch": 1.84, "learning_rate": 1.6148070997699953e-06, "loss": 2.7419, "step": 254400 }, { "epoch": 1.84, "learning_rate": 1.607574245251631e-06, "loss": 2.7295, "step": 254500 }, { "epoch": 1.84, "learning_rate": 1.6003413907332668e-06, "loss": 2.7313, "step": 254600 }, { "epoch": 1.84, "learning_rate": 1.5931085362149027e-06, "loss": 2.7341, "step": 254700 }, { "epoch": 1.84, "learning_rate": 1.5858756816965384e-06, "loss": 2.7482, "step": 254800 }, { "epoch": 1.84, "learning_rate": 1.5786428271781743e-06, "loss": 2.7316, "step": 254900 }, { "epoch": 1.84, "learning_rate": 1.5714099726598101e-06, "loss": 2.7329, "step": 255000 }, { "epoch": 1.84, "eval_accuracy": 0.4616484138001954, "eval_loss": 2.757246732711792, "eval_runtime": 29.9443, "eval_samples_per_second": 216.502, "eval_steps_per_second": 2.271, "step": 255000 }, { "epoch": 1.85, "learning_rate": 1.5641771181414458e-06, "loss": 2.7439, "step": 255100 }, { "epoch": 1.85, "learning_rate": 1.5569442636230817e-06, "loss": 2.7369, "step": 255200 }, { "epoch": 1.85, "learning_rate": 1.5497114091047174e-06, "loss": 2.7376, "step": 255300 }, { "epoch": 1.85, "learning_rate": 1.5424785545863532e-06, "loss": 2.7391, "step": 255400 }, { "epoch": 1.85, "learning_rate": 1.5352457000679891e-06, "loss": 2.7324, "step": 255500 }, { "epoch": 1.85, "learning_rate": 1.5280128455496248e-06, "loss": 2.7475, "step": 255600 }, { "epoch": 1.85, "learning_rate": 1.5207799910312604e-06, "loss": 2.7312, "step": 255700 }, { "epoch": 1.85, "learning_rate": 1.5135471365128961e-06, "loss": 2.7444, "step": 255800 }, { "epoch": 1.85, "learning_rate": 1.506314281994532e-06, "loss": 2.7346, "step": 255900 }, { "epoch": 1.85, "learning_rate": 1.4991537560213515e-06, "loss": 2.7454, "step": 256000 }, { "epoch": 1.85, "eval_accuracy": 0.46165385790287117, "eval_loss": 2.757188081741333, "eval_runtime": 28.8568, "eval_samples_per_second": 224.661, "eval_steps_per_second": 2.356, "step": 256000 }, { "epoch": 1.85, "learning_rate": 1.4919209015029872e-06, "loss": 2.7443, "step": 256100 }, { "epoch": 1.85, "learning_rate": 1.484688046984623e-06, "loss": 2.7341, "step": 256200 }, { "epoch": 1.85, "learning_rate": 1.477455192466259e-06, "loss": 2.7427, "step": 256300 }, { "epoch": 1.85, "learning_rate": 1.4702223379478946e-06, "loss": 2.7386, "step": 256400 }, { "epoch": 1.86, "learning_rate": 1.4629894834295305e-06, "loss": 2.7349, "step": 256500 }, { "epoch": 1.86, "learning_rate": 1.4559012860015335e-06, "loss": 2.7334, "step": 256600 }, { "epoch": 1.86, "learning_rate": 1.4486684314831691e-06, "loss": 2.7423, "step": 256700 }, { "epoch": 1.86, "learning_rate": 1.441435576964805e-06, "loss": 2.7376, "step": 256800 }, { "epoch": 1.86, "learning_rate": 1.4342027224464409e-06, "loss": 2.7353, "step": 256900 }, { "epoch": 1.86, "learning_rate": 1.4269698679280765e-06, "loss": 2.7343, "step": 257000 }, { "epoch": 1.86, "eval_accuracy": 0.46170527442814235, "eval_loss": 2.757138252258301, "eval_runtime": 30.9623, "eval_samples_per_second": 209.383, "eval_steps_per_second": 2.196, "step": 257000 }, { "epoch": 1.86, "learning_rate": 1.4197370134097124e-06, "loss": 2.7396, "step": 257100 }, { "epoch": 1.86, "learning_rate": 1.4125041588913483e-06, "loss": 2.7398, "step": 257200 }, { "epoch": 1.86, "learning_rate": 1.405271304372984e-06, "loss": 2.7293, "step": 257300 }, { "epoch": 1.86, "learning_rate": 1.3980384498546199e-06, "loss": 2.7378, "step": 257400 }, { "epoch": 1.86, "learning_rate": 1.3908055953362557e-06, "loss": 2.7431, "step": 257500 }, { "epoch": 1.86, "learning_rate": 1.3835727408178912e-06, "loss": 2.7375, "step": 257600 }, { "epoch": 1.86, "learning_rate": 1.376339886299527e-06, "loss": 2.7346, "step": 257700 }, { "epoch": 1.86, "learning_rate": 1.3691070317811627e-06, "loss": 2.7308, "step": 257800 }, { "epoch": 1.87, "learning_rate": 1.3618741772627986e-06, "loss": 2.7364, "step": 257900 }, { "epoch": 1.87, "learning_rate": 1.3546413227444345e-06, "loss": 2.7356, "step": 258000 }, { "epoch": 1.87, "eval_accuracy": 0.46169559602338545, "eval_loss": 2.757066488265991, "eval_runtime": 29.5596, "eval_samples_per_second": 219.319, "eval_steps_per_second": 2.3, "step": 258000 }, { "epoch": 1.87, "learning_rate": 1.3474084682260701e-06, "loss": 2.7362, "step": 258100 }, { "epoch": 1.87, "learning_rate": 1.340175613707706e-06, "loss": 2.7308, "step": 258200 }, { "epoch": 1.87, "learning_rate": 1.3329427591893417e-06, "loss": 2.7459, "step": 258300 }, { "epoch": 1.87, "learning_rate": 1.3257099046709776e-06, "loss": 2.7381, "step": 258400 }, { "epoch": 1.87, "learning_rate": 1.3184770501526135e-06, "loss": 2.7388, "step": 258500 }, { "epoch": 1.87, "learning_rate": 1.3112441956342491e-06, "loss": 2.7453, "step": 258600 }, { "epoch": 1.87, "learning_rate": 1.304011341115885e-06, "loss": 2.7413, "step": 258700 }, { "epoch": 1.87, "learning_rate": 1.2967784865975209e-06, "loss": 2.7237, "step": 258800 }, { "epoch": 1.87, "learning_rate": 1.2895456320791563e-06, "loss": 2.7355, "step": 258900 }, { "epoch": 1.87, "learning_rate": 1.2823127775607922e-06, "loss": 2.7462, "step": 259000 }, { "epoch": 1.87, "eval_accuracy": 0.4617197920352778, "eval_loss": 2.7570412158966064, "eval_runtime": 27.9363, "eval_samples_per_second": 232.064, "eval_steps_per_second": 2.434, "step": 259000 }, { "epoch": 1.87, "learning_rate": 1.2750799230424279e-06, "loss": 2.7382, "step": 259100 }, { "epoch": 1.87, "learning_rate": 1.2678470685240638e-06, "loss": 2.7421, "step": 259200 }, { "epoch": 1.88, "learning_rate": 1.2606142140056996e-06, "loss": 2.7328, "step": 259300 }, { "epoch": 1.88, "learning_rate": 1.2533813594873353e-06, "loss": 2.7247, "step": 259400 }, { "epoch": 1.88, "learning_rate": 1.2461485049689712e-06, "loss": 2.735, "step": 259500 }, { "epoch": 1.88, "learning_rate": 1.238915650450607e-06, "loss": 2.737, "step": 259600 }, { "epoch": 1.88, "learning_rate": 1.2316827959322427e-06, "loss": 2.7381, "step": 259700 }, { "epoch": 1.88, "learning_rate": 1.224522269959062e-06, "loss": 2.7417, "step": 259800 }, { "epoch": 1.88, "learning_rate": 1.217289415440698e-06, "loss": 2.745, "step": 259900 }, { "epoch": 1.88, "learning_rate": 1.2100565609223338e-06, "loss": 2.7375, "step": 260000 }, { "epoch": 1.88, "eval_accuracy": 0.4617488272495486, "eval_loss": 2.7569446563720703, "eval_runtime": 31.5522, "eval_samples_per_second": 205.469, "eval_steps_per_second": 2.155, "step": 260000 }, { "epoch": 1.88, "learning_rate": 1.2028237064039695e-06, "loss": 2.7467, "step": 260100 }, { "epoch": 1.88, "learning_rate": 1.1955908518856053e-06, "loss": 2.7409, "step": 260200 }, { "epoch": 1.88, "learning_rate": 1.188357997367241e-06, "loss": 2.7339, "step": 260300 }, { "epoch": 1.88, "learning_rate": 1.1811251428488769e-06, "loss": 2.7397, "step": 260400 }, { "epoch": 1.88, "learning_rate": 1.1738922883305125e-06, "loss": 2.7418, "step": 260500 }, { "epoch": 1.88, "learning_rate": 1.1666594338121484e-06, "loss": 2.7402, "step": 260600 }, { "epoch": 1.89, "learning_rate": 1.1594265792937843e-06, "loss": 2.7448, "step": 260700 }, { "epoch": 1.89, "learning_rate": 1.1522660533206036e-06, "loss": 2.7441, "step": 260800 }, { "epoch": 1.89, "learning_rate": 1.1450331988022393e-06, "loss": 2.7411, "step": 260900 }, { "epoch": 1.89, "learning_rate": 1.1378003442838752e-06, "loss": 2.7368, "step": 261000 }, { "epoch": 1.89, "eval_accuracy": 0.46175306155162976, "eval_loss": 2.7569141387939453, "eval_runtime": 29.4044, "eval_samples_per_second": 220.477, "eval_steps_per_second": 2.313, "step": 261000 }, { "epoch": 1.89, "learning_rate": 1.130567489765511e-06, "loss": 2.7327, "step": 261100 }, { "epoch": 1.89, "learning_rate": 1.1233346352471467e-06, "loss": 2.7408, "step": 261200 }, { "epoch": 1.89, "learning_rate": 1.1161017807287824e-06, "loss": 2.7415, "step": 261300 }, { "epoch": 1.89, "learning_rate": 1.1088689262104183e-06, "loss": 2.7472, "step": 261400 }, { "epoch": 1.89, "learning_rate": 1.1016360716920541e-06, "loss": 2.7347, "step": 261500 }, { "epoch": 1.89, "learning_rate": 1.0944032171736898e-06, "loss": 2.7333, "step": 261600 }, { "epoch": 1.89, "learning_rate": 1.0871703626553257e-06, "loss": 2.7426, "step": 261700 }, { "epoch": 1.89, "learning_rate": 1.0799375081369616e-06, "loss": 2.7343, "step": 261800 }, { "epoch": 1.89, "learning_rate": 1.0727046536185972e-06, "loss": 2.7387, "step": 261900 }, { "epoch": 1.89, "learning_rate": 1.0654717991002329e-06, "loss": 2.7452, "step": 262000 }, { "epoch": 1.89, "eval_accuracy": 0.4617373341438997, "eval_loss": 2.7568695545196533, "eval_runtime": 29.6699, "eval_samples_per_second": 218.504, "eval_steps_per_second": 2.292, "step": 262000 }, { "epoch": 1.9, "learning_rate": 1.0582389445818688e-06, "loss": 2.7431, "step": 262100 }, { "epoch": 1.9, "learning_rate": 1.0510060900635046e-06, "loss": 2.737, "step": 262200 }, { "epoch": 1.9, "learning_rate": 1.0437732355451403e-06, "loss": 2.7351, "step": 262300 }, { "epoch": 1.9, "learning_rate": 1.0365403810267762e-06, "loss": 2.7306, "step": 262400 }, { "epoch": 1.9, "learning_rate": 1.0293075265084119e-06, "loss": 2.7392, "step": 262500 }, { "epoch": 1.9, "learning_rate": 1.0220746719900475e-06, "loss": 2.7406, "step": 262600 }, { "epoch": 1.9, "learning_rate": 1.0148418174716834e-06, "loss": 2.7378, "step": 262700 }, { "epoch": 1.9, "learning_rate": 1.0076089629533193e-06, "loss": 2.7391, "step": 262800 }, { "epoch": 1.9, "learning_rate": 1.000376108434955e-06, "loss": 2.7356, "step": 262900 }, { "epoch": 1.9, "learning_rate": 9.931432539165908e-07, "loss": 2.7394, "step": 263000 }, { "epoch": 1.9, "eval_accuracy": 0.46170285482695317, "eval_loss": 2.7567996978759766, "eval_runtime": 27.9535, "eval_samples_per_second": 231.921, "eval_steps_per_second": 2.433, "step": 263000 }, { "epoch": 1.9, "learning_rate": 9.859103993982267e-07, "loss": 2.7391, "step": 263100 }, { "epoch": 1.9, "learning_rate": 9.786775448798624e-07, "loss": 2.7399, "step": 263200 }, { "epoch": 1.9, "learning_rate": 9.71444690361498e-07, "loss": 2.7326, "step": 263300 }, { "epoch": 1.91, "learning_rate": 9.64211835843134e-07, "loss": 2.7277, "step": 263400 }, { "epoch": 1.91, "learning_rate": 9.569789813247698e-07, "loss": 2.7386, "step": 263500 }, { "epoch": 1.91, "learning_rate": 9.497461268064056e-07, "loss": 2.7339, "step": 263600 }, { "epoch": 1.91, "learning_rate": 9.425132722880413e-07, "loss": 2.7326, "step": 263700 }, { "epoch": 1.91, "learning_rate": 9.352804177696771e-07, "loss": 2.7286, "step": 263800 }, { "epoch": 1.91, "learning_rate": 9.280475632513128e-07, "loss": 2.7334, "step": 263900 }, { "epoch": 1.91, "learning_rate": 9.208147087329485e-07, "loss": 2.7378, "step": 264000 }, { "epoch": 1.91, "eval_accuracy": 0.46175245665133247, "eval_loss": 2.7567875385284424, "eval_runtime": 29.609, "eval_samples_per_second": 218.954, "eval_steps_per_second": 2.297, "step": 264000 }, { "epoch": 1.91, "learning_rate": 9.136541827597681e-07, "loss": 2.7481, "step": 264100 }, { "epoch": 1.91, "learning_rate": 9.064213282414038e-07, "loss": 2.7415, "step": 264200 }, { "epoch": 1.91, "learning_rate": 8.991884737230395e-07, "loss": 2.7386, "step": 264300 }, { "epoch": 1.91, "learning_rate": 8.919556192046754e-07, "loss": 2.7357, "step": 264400 }, { "epoch": 1.91, "learning_rate": 8.847950932314948e-07, "loss": 2.743, "step": 264500 }, { "epoch": 1.91, "learning_rate": 8.775622387131307e-07, "loss": 2.7419, "step": 264600 }, { "epoch": 1.91, "learning_rate": 8.703293841947664e-07, "loss": 2.7376, "step": 264700 }, { "epoch": 1.92, "learning_rate": 8.630965296764021e-07, "loss": 2.7359, "step": 264800 }, { "epoch": 1.92, "learning_rate": 8.558636751580379e-07, "loss": 2.7316, "step": 264900 }, { "epoch": 1.92, "learning_rate": 8.486308206396737e-07, "loss": 2.7446, "step": 265000 }, { "epoch": 1.92, "eval_accuracy": 0.4617639497569813, "eval_loss": 2.7567336559295654, "eval_runtime": 29.2914, "eval_samples_per_second": 221.328, "eval_steps_per_second": 2.322, "step": 265000 }, { "epoch": 1.92, "learning_rate": 8.413979661213096e-07, "loss": 2.7289, "step": 265100 }, { "epoch": 1.92, "learning_rate": 8.341651116029453e-07, "loss": 2.7291, "step": 265200 }, { "epoch": 1.92, "learning_rate": 8.269322570845811e-07, "loss": 2.7372, "step": 265300 }, { "epoch": 1.92, "learning_rate": 8.196994025662168e-07, "loss": 2.7369, "step": 265400 }, { "epoch": 1.92, "learning_rate": 8.124665480478526e-07, "loss": 2.739, "step": 265500 }, { "epoch": 1.92, "learning_rate": 8.052336935294884e-07, "loss": 2.7476, "step": 265600 }, { "epoch": 1.92, "learning_rate": 7.980008390111242e-07, "loss": 2.7415, "step": 265700 }, { "epoch": 1.92, "learning_rate": 7.907679844927601e-07, "loss": 2.738, "step": 265800 }, { "epoch": 1.92, "learning_rate": 7.835351299743958e-07, "loss": 2.7455, "step": 265900 }, { "epoch": 1.92, "learning_rate": 7.763022754560315e-07, "loss": 2.7436, "step": 266000 }, { "epoch": 1.92, "eval_accuracy": 0.46179419477184674, "eval_loss": 2.756711006164551, "eval_runtime": 31.9975, "eval_samples_per_second": 202.61, "eval_steps_per_second": 2.125, "step": 266000 }, { "epoch": 1.92, "learning_rate": 7.690694209376673e-07, "loss": 2.7329, "step": 266100 }, { "epoch": 1.93, "learning_rate": 7.61836566419303e-07, "loss": 2.7423, "step": 266200 }, { "epoch": 1.93, "learning_rate": 7.546037119009389e-07, "loss": 2.7321, "step": 266300 }, { "epoch": 1.93, "learning_rate": 7.473708573825747e-07, "loss": 2.7325, "step": 266400 }, { "epoch": 1.93, "learning_rate": 7.401380028642105e-07, "loss": 2.7361, "step": 266500 }, { "epoch": 1.93, "learning_rate": 7.329051483458461e-07, "loss": 2.7379, "step": 266600 }, { "epoch": 1.93, "learning_rate": 7.25672293827482e-07, "loss": 2.7332, "step": 266700 }, { "epoch": 1.93, "learning_rate": 7.184394393091178e-07, "loss": 2.7375, "step": 266800 }, { "epoch": 1.93, "learning_rate": 7.112065847907536e-07, "loss": 2.7367, "step": 266900 }, { "epoch": 1.93, "learning_rate": 7.040460588175731e-07, "loss": 2.7505, "step": 267000 }, { "epoch": 1.93, "eval_accuracy": 0.4617736281617382, "eval_loss": 2.7566604614257812, "eval_runtime": 29.751, "eval_samples_per_second": 217.908, "eval_steps_per_second": 2.286, "step": 267000 }, { "epoch": 1.93, "learning_rate": 6.968132042992088e-07, "loss": 2.7389, "step": 267100 }, { "epoch": 1.93, "learning_rate": 6.895803497808445e-07, "loss": 2.7352, "step": 267200 }, { "epoch": 1.93, "learning_rate": 6.823474952624803e-07, "loss": 2.7449, "step": 267300 }, { "epoch": 1.93, "learning_rate": 6.751146407441162e-07, "loss": 2.7338, "step": 267400 }, { "epoch": 1.93, "learning_rate": 6.67881786225752e-07, "loss": 2.7355, "step": 267500 }, { "epoch": 1.94, "learning_rate": 6.606489317073877e-07, "loss": 2.7429, "step": 267600 }, { "epoch": 1.94, "learning_rate": 6.534160771890234e-07, "loss": 2.7364, "step": 267700 }, { "epoch": 1.94, "learning_rate": 6.461832226706593e-07, "loss": 2.738, "step": 267800 }, { "epoch": 1.94, "learning_rate": 6.38950368152295e-07, "loss": 2.7408, "step": 267900 }, { "epoch": 1.94, "learning_rate": 6.317175136339308e-07, "loss": 2.7493, "step": 268000 }, { "epoch": 1.94, "eval_accuracy": 0.4617833065664952, "eval_loss": 2.7566213607788086, "eval_runtime": 28.2536, "eval_samples_per_second": 229.457, "eval_steps_per_second": 2.407, "step": 268000 }, { "epoch": 1.94, "learning_rate": 6.245569876607502e-07, "loss": 2.7406, "step": 268100 }, { "epoch": 1.94, "learning_rate": 6.17324133142386e-07, "loss": 2.7364, "step": 268200 }, { "epoch": 1.94, "learning_rate": 6.100912786240218e-07, "loss": 2.7406, "step": 268300 }, { "epoch": 1.94, "learning_rate": 6.029307526508412e-07, "loss": 2.7454, "step": 268400 }, { "epoch": 1.94, "learning_rate": 5.95697898132477e-07, "loss": 2.7426, "step": 268500 }, { "epoch": 1.94, "learning_rate": 5.884650436141129e-07, "loss": 2.7343, "step": 268600 }, { "epoch": 1.94, "learning_rate": 5.812321890957485e-07, "loss": 2.7358, "step": 268700 }, { "epoch": 1.94, "learning_rate": 5.739993345773844e-07, "loss": 2.736, "step": 268800 }, { "epoch": 1.94, "learning_rate": 5.667664800590202e-07, "loss": 2.732, "step": 268900 }, { "epoch": 1.95, "learning_rate": 5.595336255406559e-07, "loss": 2.7391, "step": 269000 }, { "epoch": 1.95, "eval_accuracy": 0.4617845163670898, "eval_loss": 2.7565996646881104, "eval_runtime": 30.4715, "eval_samples_per_second": 212.756, "eval_steps_per_second": 2.232, "step": 269000 }, { "epoch": 1.95, "learning_rate": 5.523007710222917e-07, "loss": 2.733, "step": 269100 }, { "epoch": 1.95, "learning_rate": 5.450679165039275e-07, "loss": 2.7411, "step": 269200 }, { "epoch": 1.95, "learning_rate": 5.378350619855633e-07, "loss": 2.7453, "step": 269300 }, { "epoch": 1.95, "learning_rate": 5.30602207467199e-07, "loss": 2.7358, "step": 269400 }, { "epoch": 1.95, "learning_rate": 5.233693529488348e-07, "loss": 2.7314, "step": 269500 }, { "epoch": 1.95, "learning_rate": 5.161364984304707e-07, "loss": 2.744, "step": 269600 }, { "epoch": 1.95, "learning_rate": 5.089036439121064e-07, "loss": 2.7467, "step": 269700 }, { "epoch": 1.95, "learning_rate": 5.016707893937422e-07, "loss": 2.7395, "step": 269800 }, { "epoch": 1.95, "learning_rate": 4.94437934875378e-07, "loss": 2.7339, "step": 269900 }, { "epoch": 1.95, "learning_rate": 4.872774089021974e-07, "loss": 2.7431, "step": 270000 }, { "epoch": 1.95, "eval_accuracy": 0.461747617448954, "eval_loss": 2.756573438644409, "eval_runtime": 31.3033, "eval_samples_per_second": 207.103, "eval_steps_per_second": 2.172, "step": 270000 }, { "epoch": 1.95, "learning_rate": 4.800445543838331e-07, "loss": 2.727, "step": 270100 }, { "epoch": 1.95, "learning_rate": 4.7281169986546897e-07, "loss": 2.7348, "step": 270200 }, { "epoch": 1.96, "learning_rate": 4.6557884534710474e-07, "loss": 2.7455, "step": 270300 }, { "epoch": 1.96, "learning_rate": 4.5834599082874046e-07, "loss": 2.7347, "step": 270400 }, { "epoch": 1.96, "learning_rate": 4.511131363103763e-07, "loss": 2.7418, "step": 270500 }, { "epoch": 1.96, "learning_rate": 4.438802817920121e-07, "loss": 2.7362, "step": 270600 }, { "epoch": 1.96, "learning_rate": 4.3664742727364783e-07, "loss": 2.7349, "step": 270700 }, { "epoch": 1.96, "learning_rate": 4.2941457275528366e-07, "loss": 2.7409, "step": 270800 }, { "epoch": 1.96, "learning_rate": 4.2218171823691943e-07, "loss": 2.7341, "step": 270900 }, { "epoch": 1.96, "learning_rate": 4.1494886371855515e-07, "loss": 2.7387, "step": 271000 }, { "epoch": 1.96, "eval_accuracy": 0.46175306155162976, "eval_loss": 2.7565271854400635, "eval_runtime": 28.2552, "eval_samples_per_second": 229.444, "eval_steps_per_second": 2.407, "step": 271000 }, { "epoch": 1.96, "learning_rate": 4.0771600920019097e-07, "loss": 2.7284, "step": 271100 }, { "epoch": 1.96, "learning_rate": 4.004831546818268e-07, "loss": 2.7376, "step": 271200 }, { "epoch": 1.96, "learning_rate": 3.9325030016346257e-07, "loss": 2.7438, "step": 271300 }, { "epoch": 1.96, "learning_rate": 3.860174456450983e-07, "loss": 2.73, "step": 271400 }, { "epoch": 1.96, "learning_rate": 3.787845911267341e-07, "loss": 2.7454, "step": 271500 }, { "epoch": 1.96, "learning_rate": 3.7155173660836994e-07, "loss": 2.7397, "step": 271600 }, { "epoch": 1.97, "learning_rate": 3.6431888209000566e-07, "loss": 2.7391, "step": 271700 }, { "epoch": 1.97, "learning_rate": 3.570860275716415e-07, "loss": 2.7378, "step": 271800 }, { "epoch": 1.97, "learning_rate": 3.4985317305327726e-07, "loss": 2.7392, "step": 271900 }, { "epoch": 1.97, "learning_rate": 3.42620318534913e-07, "loss": 2.741, "step": 272000 }, { "epoch": 1.97, "eval_accuracy": 0.46179358987154945, "eval_loss": 2.7564971446990967, "eval_runtime": 28.8041, "eval_samples_per_second": 225.072, "eval_steps_per_second": 2.361, "step": 272000 }, { "epoch": 1.97, "learning_rate": 3.354597925617324e-07, "loss": 2.7359, "step": 272100 }, { "epoch": 1.97, "learning_rate": 3.282269380433682e-07, "loss": 2.7351, "step": 272200 }, { "epoch": 1.97, "learning_rate": 3.2099408352500405e-07, "loss": 2.7398, "step": 272300 }, { "epoch": 1.97, "learning_rate": 3.1376122900663977e-07, "loss": 2.7399, "step": 272400 }, { "epoch": 1.97, "learning_rate": 3.0652837448827554e-07, "loss": 2.7299, "step": 272500 }, { "epoch": 1.97, "learning_rate": 2.9929551996991137e-07, "loss": 2.7412, "step": 272600 }, { "epoch": 1.97, "learning_rate": 2.9206266545154714e-07, "loss": 2.7434, "step": 272700 }, { "epoch": 1.97, "learning_rate": 2.848298109331829e-07, "loss": 2.741, "step": 272800 }, { "epoch": 1.97, "learning_rate": 2.775969564148187e-07, "loss": 2.7356, "step": 272900 }, { "epoch": 1.97, "learning_rate": 2.7036410189645445e-07, "loss": 2.7343, "step": 273000 }, { "epoch": 1.97, "eval_accuracy": 0.4617863310679817, "eval_loss": 2.756471872329712, "eval_runtime": 29.6952, "eval_samples_per_second": 218.318, "eval_steps_per_second": 2.29, "step": 273000 }, { "epoch": 1.98, "learning_rate": 2.6313124737809023e-07, "loss": 2.7316, "step": 273100 }, { "epoch": 1.98, "learning_rate": 2.5589839285972605e-07, "loss": 2.7433, "step": 273200 }, { "epoch": 1.98, "learning_rate": 2.486655383413618e-07, "loss": 2.7338, "step": 273300 }, { "epoch": 1.98, "learning_rate": 2.4150501236818125e-07, "loss": 2.7303, "step": 273400 }, { "epoch": 1.98, "learning_rate": 2.3427215784981705e-07, "loss": 2.7432, "step": 273500 }, { "epoch": 1.98, "learning_rate": 2.2703930333145282e-07, "loss": 2.7416, "step": 273600 }, { "epoch": 1.98, "learning_rate": 2.198064488130886e-07, "loss": 2.739, "step": 273700 }, { "epoch": 1.98, "learning_rate": 2.125735942947244e-07, "loss": 2.7426, "step": 273800 }, { "epoch": 1.98, "learning_rate": 2.0534073977636016e-07, "loss": 2.7369, "step": 273900 }, { "epoch": 1.98, "learning_rate": 1.9810788525799593e-07, "loss": 2.7378, "step": 274000 }, { "epoch": 1.98, "eval_accuracy": 0.4617851212673871, "eval_loss": 2.756432056427002, "eval_runtime": 29.9933, "eval_samples_per_second": 216.148, "eval_steps_per_second": 2.267, "step": 274000 }, { "epoch": 1.98, "learning_rate": 1.9087503073963173e-07, "loss": 2.7462, "step": 274100 }, { "epoch": 1.98, "learning_rate": 1.836421762212675e-07, "loss": 2.7361, "step": 274200 }, { "epoch": 1.98, "learning_rate": 1.764093217029033e-07, "loss": 2.7366, "step": 274300 }, { "epoch": 1.98, "learning_rate": 1.6917646718453908e-07, "loss": 2.7395, "step": 274400 }, { "epoch": 1.99, "learning_rate": 1.6194361266617485e-07, "loss": 2.7322, "step": 274500 }, { "epoch": 1.99, "learning_rate": 1.5478308669299427e-07, "loss": 2.7378, "step": 274600 }, { "epoch": 1.99, "learning_rate": 1.4755023217463005e-07, "loss": 2.7433, "step": 274700 }, { "epoch": 1.99, "learning_rate": 1.4031737765626584e-07, "loss": 2.7401, "step": 274800 }, { "epoch": 1.99, "learning_rate": 1.3308452313790162e-07, "loss": 2.7369, "step": 274900 }, { "epoch": 1.99, "learning_rate": 1.2585166861953741e-07, "loss": 2.737, "step": 275000 }, { "epoch": 1.99, "eval_accuracy": 0.46180387317660365, "eval_loss": 2.756422996520996, "eval_runtime": 29.6811, "eval_samples_per_second": 218.422, "eval_steps_per_second": 2.291, "step": 275000 }, { "epoch": 1.99, "learning_rate": 1.1861881410117317e-07, "loss": 2.7343, "step": 275100 }, { "epoch": 1.99, "learning_rate": 1.1138595958280896e-07, "loss": 2.7331, "step": 275200 }, { "epoch": 1.99, "learning_rate": 1.0415310506444474e-07, "loss": 2.7372, "step": 275300 }, { "epoch": 1.99, "learning_rate": 9.692025054608053e-08, "loss": 2.7342, "step": 275400 }, { "epoch": 1.99, "learning_rate": 8.96873960277163e-08, "loss": 2.7394, "step": 275500 }, { "epoch": 1.99, "learning_rate": 8.252687005453573e-08, "loss": 2.733, "step": 275600 }, { "epoch": 1.99, "learning_rate": 7.529401553617151e-08, "loss": 2.7389, "step": 275700 }, { "epoch": 1.99, "learning_rate": 6.80611610178073e-08, "loss": 2.7343, "step": 275800 }, { "epoch": 2.0, "learning_rate": 6.082830649944307e-08, "loss": 2.7303, "step": 275900 }, { "epoch": 2.0, "learning_rate": 5.3595451981078855e-08, "loss": 2.7397, "step": 276000 }, { "epoch": 2.0, "eval_accuracy": 0.4618002437748198, "eval_loss": 2.756411075592041, "eval_runtime": 30.3922, "eval_samples_per_second": 213.311, "eval_steps_per_second": 2.237, "step": 276000 }, { "epoch": 2.0, "learning_rate": 4.6362597462714634e-08, "loss": 2.7391, "step": 276100 }, { "epoch": 2.0, "learning_rate": 3.912974294435042e-08, "loss": 2.739, "step": 276200 }, { "epoch": 2.0, "learning_rate": 3.18968884259862e-08, "loss": 2.7493, "step": 276300 }, { "epoch": 2.0, "learning_rate": 2.4736362452805626e-08, "loss": 2.733, "step": 276400 }, { "epoch": 2.0, "learning_rate": 1.7503507934441408e-08, "loss": 2.7424, "step": 276500 }, { "epoch": 2.0, "step": 276518, "total_flos": 5.3881880355706765e+20, "train_loss": 2.8250040690803138, "train_runtime": 396233.9412, "train_samples_per_second": 133.99, "train_steps_per_second": 0.698 } ], "logging_steps": 100, "max_steps": 276518, "num_train_epochs": 2, "save_steps": 20000, "total_flos": 5.3881880355706765e+20, "trial_name": null, "trial_params": null }