{ "best_metric": null, "best_model_checkpoint": null, "epoch": 97.17314487632508, "global_step": 110000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.44, "learning_rate": 4.97791519434629e-05, "loss": 0.9761, "step": 500 }, { "epoch": 0.88, "learning_rate": 4.95583038869258e-05, "loss": 0.6349, "step": 1000 }, { "epoch": 1.33, "learning_rate": 4.93374558303887e-05, "loss": 0.5803, "step": 1500 }, { "epoch": 1.77, "learning_rate": 4.9116607773851593e-05, "loss": 0.5568, "step": 2000 }, { "epoch": 2.21, "learning_rate": 4.889575971731449e-05, "loss": 0.5413, "step": 2500 }, { "epoch": 2.65, "learning_rate": 4.867491166077739e-05, "loss": 0.5304, "step": 3000 }, { "epoch": 3.09, "learning_rate": 4.8454063604240283e-05, "loss": 0.5222, "step": 3500 }, { "epoch": 3.53, "learning_rate": 4.823321554770318e-05, "loss": 0.5149, "step": 4000 }, { "epoch": 3.98, "learning_rate": 4.8012367491166086e-05, "loss": 0.5104, "step": 4500 }, { "epoch": 4.42, "learning_rate": 4.779151943462898e-05, "loss": 0.5041, "step": 5000 }, { "epoch": 4.86, "learning_rate": 4.7570671378091875e-05, "loss": 0.5012, "step": 5500 }, { "epoch": 5.3, "learning_rate": 4.734982332155477e-05, "loss": 0.4968, "step": 6000 }, { "epoch": 5.74, "learning_rate": 4.712897526501767e-05, "loss": 0.4941, "step": 6500 }, { "epoch": 6.18, "learning_rate": 4.690812720848057e-05, "loss": 0.4911, "step": 7000 }, { "epoch": 6.63, "learning_rate": 4.6687279151943466e-05, "loss": 0.488, "step": 7500 }, { "epoch": 7.07, "learning_rate": 4.646643109540637e-05, "loss": 0.4861, "step": 8000 }, { "epoch": 7.51, "learning_rate": 4.624558303886926e-05, "loss": 0.4828, "step": 8500 }, { "epoch": 7.95, "learning_rate": 4.6024734982332156e-05, "loss": 0.4821, "step": 9000 }, { "epoch": 8.39, "learning_rate": 4.580388692579505e-05, "loss": 0.4785, "step": 9500 }, { "epoch": 8.83, "learning_rate": 4.558303886925796e-05, "loss": 0.4781, "step": 10000 }, { "epoch": 9.28, "learning_rate": 4.536219081272085e-05, "loss": 0.4754, "step": 10500 }, { "epoch": 9.72, "learning_rate": 4.514134275618375e-05, "loss": 0.4745, "step": 11000 }, { "epoch": 10.16, "learning_rate": 4.492049469964665e-05, "loss": 0.4726, "step": 11500 }, { "epoch": 10.6, "learning_rate": 4.469964664310954e-05, "loss": 0.471, "step": 12000 }, { "epoch": 11.04, "learning_rate": 4.4478798586572437e-05, "loss": 0.4704, "step": 12500 }, { "epoch": 11.48, "learning_rate": 4.425795053003534e-05, "loss": 0.4678, "step": 13000 }, { "epoch": 11.93, "learning_rate": 4.403710247349824e-05, "loss": 0.4681, "step": 13500 }, { "epoch": 12.37, "learning_rate": 4.381625441696113e-05, "loss": 0.4649, "step": 14000 }, { "epoch": 12.81, "learning_rate": 4.359540636042403e-05, "loss": 0.4654, "step": 14500 }, { "epoch": 13.25, "learning_rate": 4.337455830388692e-05, "loss": 0.4632, "step": 15000 }, { "epoch": 13.69, "learning_rate": 4.315371024734983e-05, "loss": 0.4627, "step": 15500 }, { "epoch": 14.13, "learning_rate": 4.2932862190812724e-05, "loss": 0.4616, "step": 16000 }, { "epoch": 14.58, "learning_rate": 4.271201413427562e-05, "loss": 0.4601, "step": 16500 }, { "epoch": 15.02, "learning_rate": 4.249116607773852e-05, "loss": 0.4607, "step": 17000 }, { "epoch": 15.46, "learning_rate": 4.2270318021201414e-05, "loss": 0.4572, "step": 17500 }, { "epoch": 15.9, "learning_rate": 4.204946996466431e-05, "loss": 0.4587, "step": 18000 }, { "epoch": 16.34, "learning_rate": 4.182862190812721e-05, "loss": 0.4557, "step": 18500 }, { "epoch": 16.78, "learning_rate": 4.160777385159011e-05, "loss": 0.4565, "step": 19000 }, { "epoch": 17.23, "learning_rate": 4.1386925795053005e-05, "loss": 0.4547, "step": 19500 }, { "epoch": 17.67, "learning_rate": 4.11660777385159e-05, "loss": 0.4541, "step": 20000 }, { "epoch": 18.11, "learning_rate": 4.09452296819788e-05, "loss": 0.4539, "step": 20500 }, { "epoch": 18.55, "learning_rate": 4.07243816254417e-05, "loss": 0.452, "step": 21000 }, { "epoch": 18.99, "learning_rate": 4.0503533568904596e-05, "loss": 0.4531, "step": 21500 }, { "epoch": 19.43, "learning_rate": 4.028268551236749e-05, "loss": 0.4496, "step": 22000 }, { "epoch": 19.88, "learning_rate": 4.006183745583039e-05, "loss": 0.4513, "step": 22500 }, { "epoch": 20.32, "learning_rate": 3.9840989399293286e-05, "loss": 0.4488, "step": 23000 }, { "epoch": 20.76, "learning_rate": 3.962014134275618e-05, "loss": 0.4493, "step": 23500 }, { "epoch": 21.2, "learning_rate": 3.939929328621909e-05, "loss": 0.4481, "step": 24000 }, { "epoch": 21.64, "learning_rate": 3.917844522968198e-05, "loss": 0.4475, "step": 24500 }, { "epoch": 22.08, "learning_rate": 3.895759717314488e-05, "loss": 0.4475, "step": 25000 }, { "epoch": 22.53, "learning_rate": 3.873674911660777e-05, "loss": 0.4453, "step": 25500 }, { "epoch": 22.97, "learning_rate": 3.851590106007067e-05, "loss": 0.447, "step": 26000 }, { "epoch": 23.41, "learning_rate": 3.8295053003533574e-05, "loss": 0.4435, "step": 26500 }, { "epoch": 23.85, "learning_rate": 3.807420494699647e-05, "loss": 0.4454, "step": 27000 }, { "epoch": 24.29, "learning_rate": 3.785335689045937e-05, "loss": 0.443, "step": 27500 }, { "epoch": 24.73, "learning_rate": 3.7632508833922264e-05, "loss": 0.4436, "step": 28000 }, { "epoch": 25.18, "learning_rate": 3.741166077738516e-05, "loss": 0.4427, "step": 28500 }, { "epoch": 25.62, "learning_rate": 3.719081272084805e-05, "loss": 0.4418, "step": 29000 }, { "epoch": 26.06, "learning_rate": 3.696996466431096e-05, "loss": 0.4426, "step": 29500 }, { "epoch": 26.5, "learning_rate": 3.6749116607773855e-05, "loss": 0.4398, "step": 30000 }, { "epoch": 26.94, "learning_rate": 3.652826855123675e-05, "loss": 0.4417, "step": 30500 }, { "epoch": 27.39, "learning_rate": 3.630742049469965e-05, "loss": 0.4386, "step": 31000 }, { "epoch": 27.83, "learning_rate": 3.6086572438162545e-05, "loss": 0.4401, "step": 31500 }, { "epoch": 28.27, "learning_rate": 3.586572438162544e-05, "loss": 0.4382, "step": 32000 }, { "epoch": 28.71, "learning_rate": 3.564487632508834e-05, "loss": 0.4386, "step": 32500 }, { "epoch": 29.15, "learning_rate": 3.542402826855124e-05, "loss": 0.4381, "step": 33000 }, { "epoch": 29.59, "learning_rate": 3.5203180212014136e-05, "loss": 0.4369, "step": 33500 }, { "epoch": 30.04, "learning_rate": 3.498233215547703e-05, "loss": 0.438, "step": 34000 }, { "epoch": 30.48, "learning_rate": 3.476148409893993e-05, "loss": 0.435, "step": 34500 }, { "epoch": 30.92, "learning_rate": 3.454063604240283e-05, "loss": 0.4372, "step": 35000 }, { "epoch": 31.36, "learning_rate": 3.431978798586573e-05, "loss": 0.4343, "step": 35500 }, { "epoch": 31.8, "learning_rate": 3.409893992932862e-05, "loss": 0.4358, "step": 36000 }, { "epoch": 32.24, "learning_rate": 3.387809187279152e-05, "loss": 0.4341, "step": 36500 }, { "epoch": 32.69, "learning_rate": 3.365724381625442e-05, "loss": 0.4343, "step": 37000 }, { "epoch": 33.13, "learning_rate": 3.343639575971731e-05, "loss": 0.4343, "step": 37500 }, { "epoch": 33.57, "learning_rate": 3.321554770318021e-05, "loss": 0.4326, "step": 38000 }, { "epoch": 34.01, "learning_rate": 3.2994699646643114e-05, "loss": 0.4343, "step": 38500 }, { "epoch": 34.45, "learning_rate": 3.277385159010601e-05, "loss": 0.4309, "step": 39000 }, { "epoch": 34.89, "learning_rate": 3.25530035335689e-05, "loss": 0.4332, "step": 39500 }, { "epoch": 35.34, "learning_rate": 3.2332155477031804e-05, "loss": 0.4306, "step": 40000 }, { "epoch": 35.78, "learning_rate": 3.2111307420494705e-05, "loss": 0.4318, "step": 40500 }, { "epoch": 36.22, "learning_rate": 3.18904593639576e-05, "loss": 0.4306, "step": 41000 }, { "epoch": 36.66, "learning_rate": 3.1669611307420494e-05, "loss": 0.4304, "step": 41500 }, { "epoch": 37.1, "learning_rate": 3.1448763250883395e-05, "loss": 0.4307, "step": 42000 }, { "epoch": 37.54, "learning_rate": 3.122791519434629e-05, "loss": 0.4289, "step": 42500 }, { "epoch": 37.99, "learning_rate": 3.1007067137809184e-05, "loss": 0.431, "step": 43000 }, { "epoch": 38.43, "learning_rate": 3.078621908127209e-05, "loss": 0.4276, "step": 43500 }, { "epoch": 38.87, "learning_rate": 3.0565371024734986e-05, "loss": 0.4296, "step": 44000 }, { "epoch": 39.31, "learning_rate": 3.034452296819788e-05, "loss": 0.4274, "step": 44500 }, { "epoch": 39.75, "learning_rate": 3.0123674911660775e-05, "loss": 0.4285, "step": 45000 }, { "epoch": 40.19, "learning_rate": 2.990282685512368e-05, "loss": 0.4276, "step": 45500 }, { "epoch": 40.64, "learning_rate": 2.9681978798586574e-05, "loss": 0.427, "step": 46000 }, { "epoch": 41.08, "learning_rate": 2.9461130742049468e-05, "loss": 0.4278, "step": 46500 }, { "epoch": 41.52, "learning_rate": 2.9240282685512373e-05, "loss": 0.4256, "step": 47000 }, { "epoch": 41.96, "learning_rate": 2.9019434628975267e-05, "loss": 0.4277, "step": 47500 }, { "epoch": 42.4, "learning_rate": 2.879858657243816e-05, "loss": 0.4246, "step": 48000 }, { "epoch": 42.84, "learning_rate": 2.857773851590106e-05, "loss": 0.4266, "step": 48500 }, { "epoch": 43.29, "learning_rate": 2.835689045936396e-05, "loss": 0.4246, "step": 49000 }, { "epoch": 43.73, "learning_rate": 2.8136042402826858e-05, "loss": 0.4254, "step": 49500 }, { "epoch": 44.17, "learning_rate": 2.7915194346289753e-05, "loss": 0.4248, "step": 50000 }, { "epoch": 44.61, "learning_rate": 2.7694346289752654e-05, "loss": 0.424, "step": 50500 }, { "epoch": 45.05, "learning_rate": 2.747349823321555e-05, "loss": 0.4251, "step": 51000 }, { "epoch": 45.49, "learning_rate": 2.7252650176678446e-05, "loss": 0.4227, "step": 51500 }, { "epoch": 45.94, "learning_rate": 2.703180212014134e-05, "loss": 0.4248, "step": 52000 }, { "epoch": 46.38, "learning_rate": 2.6810954063604245e-05, "loss": 0.4221, "step": 52500 }, { "epoch": 46.82, "learning_rate": 2.659010600706714e-05, "loss": 0.4237, "step": 53000 }, { "epoch": 47.26, "learning_rate": 2.6369257950530034e-05, "loss": 0.4221, "step": 53500 }, { "epoch": 47.7, "learning_rate": 2.6148409893992938e-05, "loss": 0.4225, "step": 54000 }, { "epoch": 48.14, "learning_rate": 2.5927561837455833e-05, "loss": 0.4225, "step": 54500 }, { "epoch": 48.59, "learning_rate": 2.5706713780918727e-05, "loss": 0.4214, "step": 55000 }, { "epoch": 49.03, "learning_rate": 2.5485865724381625e-05, "loss": 0.4228, "step": 55500 }, { "epoch": 49.47, "learning_rate": 2.5265017667844526e-05, "loss": 0.42, "step": 56000 }, { "epoch": 49.91, "learning_rate": 2.5044169611307424e-05, "loss": 0.4222, "step": 56500 }, { "epoch": 50.35, "learning_rate": 2.4823321554770318e-05, "loss": 0.4197, "step": 57000 }, { "epoch": 50.8, "learning_rate": 2.4602473498233216e-05, "loss": 0.4211, "step": 57500 }, { "epoch": 51.24, "learning_rate": 2.4381625441696117e-05, "loss": 0.42, "step": 58000 }, { "epoch": 51.68, "learning_rate": 2.416077738515901e-05, "loss": 0.4201, "step": 58500 }, { "epoch": 52.12, "learning_rate": 2.393992932862191e-05, "loss": 0.4203, "step": 59000 }, { "epoch": 52.56, "learning_rate": 2.3719081272084807e-05, "loss": 0.419, "step": 59500 }, { "epoch": 53.0, "learning_rate": 2.3498233215547705e-05, "loss": 0.4207, "step": 60000 }, { "epoch": 53.45, "learning_rate": 2.32773851590106e-05, "loss": 0.4176, "step": 60500 }, { "epoch": 53.89, "learning_rate": 2.30565371024735e-05, "loss": 0.4199, "step": 61000 }, { "epoch": 54.33, "learning_rate": 2.2835689045936398e-05, "loss": 0.4179, "step": 61500 }, { "epoch": 54.77, "learning_rate": 2.2614840989399292e-05, "loss": 0.4188, "step": 62000 }, { "epoch": 55.21, "learning_rate": 2.2393992932862194e-05, "loss": 0.4181, "step": 62500 }, { "epoch": 55.65, "learning_rate": 2.2173144876325088e-05, "loss": 0.4178, "step": 63000 }, { "epoch": 56.1, "learning_rate": 2.195229681978799e-05, "loss": 0.4184, "step": 63500 }, { "epoch": 56.54, "learning_rate": 2.1731448763250883e-05, "loss": 0.4169, "step": 64000 }, { "epoch": 56.98, "learning_rate": 2.151060070671378e-05, "loss": 0.4185, "step": 64500 }, { "epoch": 57.42, "learning_rate": 2.128975265017668e-05, "loss": 0.4159, "step": 65000 }, { "epoch": 57.86, "learning_rate": 2.1068904593639577e-05, "loss": 0.4177, "step": 65500 }, { "epoch": 58.3, "learning_rate": 2.0848056537102475e-05, "loss": 0.4159, "step": 66000 }, { "epoch": 58.75, "learning_rate": 2.0627208480565372e-05, "loss": 0.4169, "step": 66500 }, { "epoch": 59.19, "learning_rate": 2.040636042402827e-05, "loss": 0.4163, "step": 67000 }, { "epoch": 59.63, "learning_rate": 2.0185512367491165e-05, "loss": 0.4159, "step": 67500 }, { "epoch": 60.07, "learning_rate": 1.9964664310954066e-05, "loss": 0.4167, "step": 68000 }, { "epoch": 60.51, "learning_rate": 1.974381625441696e-05, "loss": 0.4148, "step": 68500 }, { "epoch": 60.95, "learning_rate": 1.9522968197879858e-05, "loss": 0.4165, "step": 69000 }, { "epoch": 61.4, "learning_rate": 1.930212014134276e-05, "loss": 0.4142, "step": 69500 }, { "epoch": 61.84, "learning_rate": 1.9081272084805653e-05, "loss": 0.4157, "step": 70000 }, { "epoch": 62.28, "learning_rate": 1.8860424028268555e-05, "loss": 0.4144, "step": 70500 }, { "epoch": 62.72, "learning_rate": 1.863957597173145e-05, "loss": 0.4149, "step": 71000 }, { "epoch": 63.16, "learning_rate": 1.8418727915194347e-05, "loss": 0.4147, "step": 71500 }, { "epoch": 63.6, "learning_rate": 1.8197879858657244e-05, "loss": 0.4139, "step": 72000 }, { "epoch": 64.05, "learning_rate": 1.7977031802120142e-05, "loss": 0.4151, "step": 72500 }, { "epoch": 64.49, "learning_rate": 1.775618374558304e-05, "loss": 0.413, "step": 73000 }, { "epoch": 64.93, "learning_rate": 1.7535335689045938e-05, "loss": 0.4147, "step": 73500 }, { "epoch": 65.37, "learning_rate": 1.7314487632508836e-05, "loss": 0.4127, "step": 74000 }, { "epoch": 65.81, "learning_rate": 1.709363957597173e-05, "loss": 0.4139, "step": 74500 }, { "epoch": 66.25, "learning_rate": 1.687279151943463e-05, "loss": 0.4128, "step": 75000 }, { "epoch": 66.7, "learning_rate": 1.6651943462897526e-05, "loss": 0.4131, "step": 75500 }, { "epoch": 67.14, "learning_rate": 1.6431095406360427e-05, "loss": 0.4133, "step": 76000 }, { "epoch": 67.58, "learning_rate": 1.6210247349823324e-05, "loss": 0.4123, "step": 76500 }, { "epoch": 68.02, "learning_rate": 1.598939929328622e-05, "loss": 0.4136, "step": 77000 }, { "epoch": 68.46, "learning_rate": 1.576855123674912e-05, "loss": 0.4114, "step": 77500 }, { "epoch": 68.9, "learning_rate": 1.5547703180212014e-05, "loss": 0.413, "step": 78000 }, { "epoch": 69.35, "learning_rate": 1.5326855123674912e-05, "loss": 0.4113, "step": 78500 }, { "epoch": 69.79, "learning_rate": 1.5106007067137808e-05, "loss": 0.4123, "step": 79000 }, { "epoch": 70.23, "learning_rate": 1.4885159010600708e-05, "loss": 0.4116, "step": 79500 }, { "epoch": 70.67, "learning_rate": 1.4664310954063604e-05, "loss": 0.4116, "step": 80000 }, { "epoch": 71.11, "learning_rate": 1.4443462897526502e-05, "loss": 0.412, "step": 80500 }, { "epoch": 71.55, "learning_rate": 1.4222614840989401e-05, "loss": 0.4107, "step": 81000 }, { "epoch": 72.0, "learning_rate": 1.4001766784452297e-05, "loss": 0.4122, "step": 81500 }, { "epoch": 72.44, "learning_rate": 1.3780918727915197e-05, "loss": 0.41, "step": 82000 }, { "epoch": 72.88, "learning_rate": 1.3560070671378091e-05, "loss": 0.4114, "step": 82500 }, { "epoch": 73.32, "learning_rate": 1.333922261484099e-05, "loss": 0.4102, "step": 83000 }, { "epoch": 73.76, "learning_rate": 1.3118374558303886e-05, "loss": 0.4108, "step": 83500 }, { "epoch": 74.2, "learning_rate": 1.2897526501766786e-05, "loss": 0.4104, "step": 84000 }, { "epoch": 74.65, "learning_rate": 1.2676678445229684e-05, "loss": 0.4101, "step": 84500 }, { "epoch": 75.09, "learning_rate": 1.245583038869258e-05, "loss": 0.4106, "step": 85000 }, { "epoch": 75.53, "learning_rate": 1.2234982332155478e-05, "loss": 0.4094, "step": 85500 }, { "epoch": 75.97, "learning_rate": 1.2014134275618374e-05, "loss": 0.4107, "step": 86000 }, { "epoch": 76.41, "learning_rate": 1.1793286219081273e-05, "loss": 0.4088, "step": 86500 }, { "epoch": 76.86, "learning_rate": 1.1572438162544171e-05, "loss": 0.4101, "step": 87000 }, { "epoch": 77.3, "learning_rate": 1.1351590106007069e-05, "loss": 0.4089, "step": 87500 }, { "epoch": 77.74, "learning_rate": 1.1130742049469965e-05, "loss": 0.4095, "step": 88000 }, { "epoch": 78.18, "learning_rate": 1.0909893992932863e-05, "loss": 0.4092, "step": 88500 }, { "epoch": 78.62, "learning_rate": 1.068904593639576e-05, "loss": 0.4087, "step": 89000 }, { "epoch": 79.06, "learning_rate": 1.0468197879858656e-05, "loss": 0.4094, "step": 89500 }, { "epoch": 79.51, "learning_rate": 1.0247349823321556e-05, "loss": 0.4081, "step": 90000 }, { "epoch": 79.95, "learning_rate": 1.0026501766784454e-05, "loss": 0.4092, "step": 90500 }, { "epoch": 80.39, "learning_rate": 9.805653710247351e-06, "loss": 0.4078, "step": 91000 }, { "epoch": 80.83, "learning_rate": 9.584805653710247e-06, "loss": 0.4087, "step": 91500 }, { "epoch": 81.27, "learning_rate": 9.363957597173145e-06, "loss": 0.4078, "step": 92000 }, { "epoch": 81.71, "learning_rate": 9.143109540636043e-06, "loss": 0.4081, "step": 92500 }, { "epoch": 82.16, "learning_rate": 8.922261484098939e-06, "loss": 0.4082, "step": 93000 }, { "epoch": 82.6, "learning_rate": 8.701413427561837e-06, "loss": 0.4075, "step": 93500 }, { "epoch": 83.04, "learning_rate": 8.480565371024736e-06, "loss": 0.4082, "step": 94000 }, { "epoch": 83.48, "learning_rate": 8.259717314487634e-06, "loss": 0.4069, "step": 94500 }, { "epoch": 83.92, "learning_rate": 8.03886925795053e-06, "loss": 0.4081, "step": 95000 }, { "epoch": 84.36, "learning_rate": 7.818021201413428e-06, "loss": 0.4068, "step": 95500 }, { "epoch": 84.81, "learning_rate": 7.597173144876325e-06, "loss": 0.4075, "step": 96000 }, { "epoch": 85.25, "learning_rate": 7.376325088339223e-06, "loss": 0.407, "step": 96500 }, { "epoch": 85.69, "learning_rate": 7.15547703180212e-06, "loss": 0.407, "step": 97000 }, { "epoch": 86.13, "learning_rate": 6.934628975265017e-06, "loss": 0.4071, "step": 97500 }, { "epoch": 86.57, "learning_rate": 6.713780918727916e-06, "loss": 0.4064, "step": 98000 }, { "epoch": 87.01, "learning_rate": 6.492932862190814e-06, "loss": 0.4073, "step": 98500 }, { "epoch": 87.46, "learning_rate": 6.272084805653711e-06, "loss": 0.406, "step": 99000 }, { "epoch": 87.9, "learning_rate": 6.051236749116608e-06, "loss": 0.4068, "step": 99500 }, { "epoch": 88.34, "learning_rate": 5.830388692579505e-06, "loss": 0.4059, "step": 100000 }, { "epoch": 88.78, "learning_rate": 5.609540636042403e-06, "loss": 0.4063, "step": 100500 }, { "epoch": 89.22, "learning_rate": 5.388692579505301e-06, "loss": 0.4061, "step": 101000 }, { "epoch": 89.66, "learning_rate": 5.167844522968198e-06, "loss": 0.4059, "step": 101500 }, { "epoch": 90.11, "learning_rate": 4.946996466431096e-06, "loss": 0.4061, "step": 102000 }, { "epoch": 90.55, "learning_rate": 4.726148409893993e-06, "loss": 0.4056, "step": 102500 }, { "epoch": 90.99, "learning_rate": 4.505300353356891e-06, "loss": 0.406, "step": 103000 }, { "epoch": 91.43, "learning_rate": 4.284452296819788e-06, "loss": 0.4051, "step": 103500 }, { "epoch": 91.87, "learning_rate": 4.063604240282685e-06, "loss": 0.4057, "step": 104000 }, { "epoch": 92.31, "learning_rate": 3.842756183745584e-06, "loss": 0.4052, "step": 104500 }, { "epoch": 92.76, "learning_rate": 3.621908127208481e-06, "loss": 0.4054, "step": 105000 }, { "epoch": 93.2, "learning_rate": 3.401060070671378e-06, "loss": 0.4051, "step": 105500 }, { "epoch": 93.64, "learning_rate": 3.1802120141342753e-06, "loss": 0.4049, "step": 106000 }, { "epoch": 94.08, "learning_rate": 2.959363957597173e-06, "loss": 0.4053, "step": 106500 }, { "epoch": 94.52, "learning_rate": 2.738515901060071e-06, "loss": 0.4047, "step": 107000 }, { "epoch": 94.96, "learning_rate": 2.517667844522968e-06, "loss": 0.405, "step": 107500 }, { "epoch": 95.41, "learning_rate": 2.296819787985866e-06, "loss": 0.4044, "step": 108000 }, { "epoch": 95.85, "learning_rate": 2.0759717314487633e-06, "loss": 0.4047, "step": 108500 }, { "epoch": 96.29, "learning_rate": 1.8551236749116609e-06, "loss": 0.4044, "step": 109000 }, { "epoch": 96.73, "learning_rate": 1.6342756183745586e-06, "loss": 0.4044, "step": 109500 }, { "epoch": 97.17, "learning_rate": 1.4134275618374558e-06, "loss": 0.4044, "step": 110000 } ], "max_steps": 113200, "num_train_epochs": 100, "total_flos": 7.858501008656171e+18, "trial_name": null, "trial_params": null }