{ "best_metric": 0.6904399394989014, "best_model_checkpoint": "nllb_200_distilled_600M_ENtoFO_bsz_64_epochs_10lr7e-05/checkpoint-16500", "epoch": 5.798947842417985, "eval_steps": 500, "global_step": 16500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "learning_rate": 1.4e-05, "loss": 1.6553, "step": 100 }, { "epoch": 0.07, "learning_rate": 2.8e-05, "loss": 1.3075, "step": 200 }, { "epoch": 0.11, "learning_rate": 4.2e-05, "loss": 1.2606, "step": 300 }, { "epoch": 0.14, "learning_rate": 5.6e-05, "loss": 1.2085, "step": 400 }, { "epoch": 0.18, "learning_rate": 7e-05, "loss": 1.1956, "step": 500 }, { "epoch": 0.18, "eval_bleu": 36.3211, "eval_chrf++": 55.6095, "eval_gen_len": 17.5404, "eval_loss": 1.01542067527771, "eval_runtime": 1596.0286, "eval_samples_per_second": 4.587, "eval_steps_per_second": 2.294, "step": 500 }, { "epoch": 0.21, "learning_rate": 6.974955277280858e-05, "loss": 1.1487, "step": 600 }, { "epoch": 0.25, "learning_rate": 6.949910554561716e-05, "loss": 1.1406, "step": 700 }, { "epoch": 0.28, "learning_rate": 6.924865831842576e-05, "loss": 1.1197, "step": 800 }, { "epoch": 0.32, "learning_rate": 6.899821109123434e-05, "loss": 1.0739, "step": 900 }, { "epoch": 0.35, "learning_rate": 6.874776386404293e-05, "loss": 1.0817, "step": 1000 }, { "epoch": 0.35, "eval_bleu": 37.9118, "eval_chrf++": 57.1775, "eval_gen_len": 17.6499, "eval_loss": 0.9288437962532043, "eval_runtime": 1599.8831, "eval_samples_per_second": 4.576, "eval_steps_per_second": 2.288, "step": 1000 }, { "epoch": 0.39, "learning_rate": 6.849731663685151e-05, "loss": 1.0784, "step": 1100 }, { "epoch": 0.42, "learning_rate": 6.824686940966009e-05, "loss": 1.0538, "step": 1200 }, { "epoch": 0.46, "learning_rate": 6.799642218246869e-05, "loss": 1.0431, "step": 1300 }, { "epoch": 0.49, "learning_rate": 6.774597495527727e-05, "loss": 1.0347, "step": 1400 }, { "epoch": 0.53, "learning_rate": 6.749552772808586e-05, "loss": 1.0162, "step": 1500 }, { "epoch": 0.53, "eval_bleu": 39.3818, "eval_chrf++": 58.3559, "eval_gen_len": 17.6375, "eval_loss": 0.8730005025863647, "eval_runtime": 1591.8771, "eval_samples_per_second": 4.599, "eval_steps_per_second": 2.3, "step": 1500 }, { "epoch": 0.56, "learning_rate": 6.724508050089444e-05, "loss": 0.9995, "step": 1600 }, { "epoch": 0.6, "learning_rate": 6.699463327370304e-05, "loss": 0.987, "step": 1700 }, { "epoch": 0.63, "learning_rate": 6.674418604651162e-05, "loss": 1.0009, "step": 1800 }, { "epoch": 0.67, "learning_rate": 6.64937388193202e-05, "loss": 0.9843, "step": 1900 }, { "epoch": 0.7, "learning_rate": 6.624329159212879e-05, "loss": 0.974, "step": 2000 }, { "epoch": 0.7, "eval_bleu": 40.0752, "eval_chrf++": 58.9288, "eval_gen_len": 17.741, "eval_loss": 0.8381767272949219, "eval_runtime": 1584.9965, "eval_samples_per_second": 4.619, "eval_steps_per_second": 2.31, "step": 2000 }, { "epoch": 0.74, "learning_rate": 6.599284436493739e-05, "loss": 0.9649, "step": 2100 }, { "epoch": 0.77, "learning_rate": 6.574239713774597e-05, "loss": 0.9511, "step": 2200 }, { "epoch": 0.81, "learning_rate": 6.549194991055455e-05, "loss": 0.9689, "step": 2300 }, { "epoch": 0.84, "learning_rate": 6.524150268336314e-05, "loss": 0.9512, "step": 2400 }, { "epoch": 0.88, "learning_rate": 6.499105545617173e-05, "loss": 0.9513, "step": 2500 }, { "epoch": 0.88, "eval_bleu": 40.6133, "eval_chrf++": 59.5241, "eval_gen_len": 17.7642, "eval_loss": 0.8131038546562195, "eval_runtime": 1608.2394, "eval_samples_per_second": 4.552, "eval_steps_per_second": 2.276, "step": 2500 }, { "epoch": 0.91, "learning_rate": 6.474060822898032e-05, "loss": 0.9476, "step": 2600 }, { "epoch": 0.95, "learning_rate": 6.44901610017889e-05, "loss": 0.9202, "step": 2700 }, { "epoch": 0.98, "learning_rate": 6.423971377459748e-05, "loss": 0.9423, "step": 2800 }, { "epoch": 1.02, "learning_rate": 6.398926654740608e-05, "loss": 0.8949, "step": 2900 }, { "epoch": 1.05, "learning_rate": 6.373881932021467e-05, "loss": 0.8405, "step": 3000 }, { "epoch": 1.05, "eval_bleu": 40.8822, "eval_chrf++": 59.9339, "eval_gen_len": 17.7189, "eval_loss": 0.7998338937759399, "eval_runtime": 1592.0637, "eval_samples_per_second": 4.598, "eval_steps_per_second": 2.3, "step": 3000 }, { "epoch": 1.09, "learning_rate": 6.348837209302325e-05, "loss": 0.8431, "step": 3100 }, { "epoch": 1.12, "learning_rate": 6.323792486583183e-05, "loss": 0.8383, "step": 3200 }, { "epoch": 1.16, "learning_rate": 6.298747763864043e-05, "loss": 0.838, "step": 3300 }, { "epoch": 1.19, "learning_rate": 6.273703041144901e-05, "loss": 0.8527, "step": 3400 }, { "epoch": 1.23, "learning_rate": 6.24865831842576e-05, "loss": 0.8252, "step": 3500 }, { "epoch": 1.23, "eval_bleu": 41.6082, "eval_chrf++": 60.3254, "eval_gen_len": 17.7662, "eval_loss": 0.7859154939651489, "eval_runtime": 1613.9109, "eval_samples_per_second": 4.536, "eval_steps_per_second": 2.268, "step": 3500 }, { "epoch": 1.27, "learning_rate": 6.22361359570662e-05, "loss": 0.836, "step": 3600 }, { "epoch": 1.3, "learning_rate": 6.198568872987478e-05, "loss": 0.8274, "step": 3700 }, { "epoch": 1.34, "learning_rate": 6.173524150268336e-05, "loss": 0.8257, "step": 3800 }, { "epoch": 1.37, "learning_rate": 6.148479427549194e-05, "loss": 0.8301, "step": 3900 }, { "epoch": 1.41, "learning_rate": 6.123434704830053e-05, "loss": 0.8235, "step": 4000 }, { "epoch": 1.41, "eval_bleu": 41.9785, "eval_chrf++": 60.615, "eval_gen_len": 17.6996, "eval_loss": 0.7718562483787537, "eval_runtime": 1591.3834, "eval_samples_per_second": 4.6, "eval_steps_per_second": 2.301, "step": 4000 }, { "epoch": 1.44, "learning_rate": 6.098389982110912e-05, "loss": 0.8335, "step": 4100 }, { "epoch": 1.48, "learning_rate": 6.073345259391771e-05, "loss": 0.8184, "step": 4200 }, { "epoch": 1.51, "learning_rate": 6.048300536672629e-05, "loss": 0.8139, "step": 4300 }, { "epoch": 1.55, "learning_rate": 6.0232558139534877e-05, "loss": 0.8121, "step": 4400 }, { "epoch": 1.58, "learning_rate": 5.998211091234346e-05, "loss": 0.8174, "step": 4500 }, { "epoch": 1.58, "eval_bleu": 41.9942, "eval_chrf++": 60.7015, "eval_gen_len": 17.7548, "eval_loss": 0.7601388692855835, "eval_runtime": 1610.8686, "eval_samples_per_second": 4.545, "eval_steps_per_second": 2.273, "step": 4500 }, { "epoch": 1.62, "learning_rate": 5.973166368515206e-05, "loss": 0.8232, "step": 4600 }, { "epoch": 1.65, "learning_rate": 5.948121645796064e-05, "loss": 0.8099, "step": 4700 }, { "epoch": 1.69, "learning_rate": 5.9230769230769225e-05, "loss": 0.8154, "step": 4800 }, { "epoch": 1.72, "learning_rate": 5.898032200357781e-05, "loss": 0.818, "step": 4900 }, { "epoch": 1.76, "learning_rate": 5.87298747763864e-05, "loss": 0.7992, "step": 5000 }, { "epoch": 1.76, "eval_bleu": 42.3622, "eval_chrf++": 61.0481, "eval_gen_len": 17.8145, "eval_loss": 0.7486168146133423, "eval_runtime": 1597.5591, "eval_samples_per_second": 4.583, "eval_steps_per_second": 2.292, "step": 5000 }, { "epoch": 1.79, "learning_rate": 5.847942754919499e-05, "loss": 0.8087, "step": 5100 }, { "epoch": 1.83, "learning_rate": 5.822898032200357e-05, "loss": 0.7958, "step": 5200 }, { "epoch": 1.86, "learning_rate": 5.7978533094812156e-05, "loss": 0.8022, "step": 5300 }, { "epoch": 1.9, "learning_rate": 5.7728085867620747e-05, "loss": 0.7876, "step": 5400 }, { "epoch": 1.93, "learning_rate": 5.747763864042934e-05, "loss": 0.7915, "step": 5500 }, { "epoch": 1.93, "eval_bleu": 42.4851, "eval_chrf++": 61.1145, "eval_gen_len": 17.7756, "eval_loss": 0.7351738810539246, "eval_runtime": 1598.7533, "eval_samples_per_second": 4.579, "eval_steps_per_second": 2.29, "step": 5500 }, { "epoch": 1.97, "learning_rate": 5.722719141323792e-05, "loss": 0.7795, "step": 5600 }, { "epoch": 2.0, "learning_rate": 5.6976744186046504e-05, "loss": 0.8015, "step": 5700 }, { "epoch": 2.04, "learning_rate": 5.6726296958855094e-05, "loss": 0.7219, "step": 5800 }, { "epoch": 2.07, "learning_rate": 5.647584973166368e-05, "loss": 0.7231, "step": 5900 }, { "epoch": 2.11, "learning_rate": 5.622540250447227e-05, "loss": 0.718, "step": 6000 }, { "epoch": 2.11, "eval_bleu": 42.5957, "eval_chrf++": 61.1828, "eval_gen_len": 17.7144, "eval_loss": 0.7349444627761841, "eval_runtime": 1593.2386, "eval_samples_per_second": 4.595, "eval_steps_per_second": 2.298, "step": 6000 }, { "epoch": 2.14, "learning_rate": 5.597495527728085e-05, "loss": 0.7155, "step": 6100 }, { "epoch": 2.18, "learning_rate": 5.572450805008944e-05, "loss": 0.7222, "step": 6200 }, { "epoch": 2.21, "learning_rate": 5.5474060822898026e-05, "loss": 0.7113, "step": 6300 }, { "epoch": 2.25, "learning_rate": 5.522361359570661e-05, "loss": 0.7067, "step": 6400 }, { "epoch": 2.28, "learning_rate": 5.497316636851521e-05, "loss": 0.714, "step": 6500 }, { "epoch": 2.28, "eval_bleu": 43.1947, "eval_chrf++": 61.6389, "eval_gen_len": 17.7485, "eval_loss": 0.7279652953147888, "eval_runtime": 1602.3425, "eval_samples_per_second": 4.569, "eval_steps_per_second": 2.285, "step": 6500 }, { "epoch": 2.32, "learning_rate": 5.472271914132379e-05, "loss": 0.7284, "step": 6600 }, { "epoch": 2.35, "learning_rate": 5.4472271914132374e-05, "loss": 0.7106, "step": 6700 }, { "epoch": 2.39, "learning_rate": 5.422182468694096e-05, "loss": 0.7226, "step": 6800 }, { "epoch": 2.43, "learning_rate": 5.3971377459749555e-05, "loss": 0.7151, "step": 6900 }, { "epoch": 2.46, "learning_rate": 5.372093023255814e-05, "loss": 0.7242, "step": 7000 }, { "epoch": 2.46, "eval_bleu": 43.0217, "eval_chrf++": 61.4, "eval_gen_len": 17.7472, "eval_loss": 0.7255465984344482, "eval_runtime": 1596.9259, "eval_samples_per_second": 4.584, "eval_steps_per_second": 2.293, "step": 7000 }, { "epoch": 2.5, "learning_rate": 5.347048300536672e-05, "loss": 0.7115, "step": 7100 }, { "epoch": 2.53, "learning_rate": 5.3220035778175306e-05, "loss": 0.6996, "step": 7200 }, { "epoch": 2.57, "learning_rate": 5.296958855098389e-05, "loss": 0.7226, "step": 7300 }, { "epoch": 2.6, "learning_rate": 5.2719141323792486e-05, "loss": 0.7023, "step": 7400 }, { "epoch": 2.64, "learning_rate": 5.246869409660107e-05, "loss": 0.7035, "step": 7500 }, { "epoch": 2.64, "eval_bleu": 42.9886, "eval_chrf++": 61.5585, "eval_gen_len": 17.7513, "eval_loss": 0.7192216515541077, "eval_runtime": 1608.427, "eval_samples_per_second": 4.552, "eval_steps_per_second": 2.276, "step": 7500 }, { "epoch": 2.67, "learning_rate": 5.2218246869409654e-05, "loss": 0.7175, "step": 7600 }, { "epoch": 2.71, "learning_rate": 5.1967799642218244e-05, "loss": 0.7164, "step": 7700 }, { "epoch": 2.74, "learning_rate": 5.171735241502683e-05, "loss": 0.703, "step": 7800 }, { "epoch": 2.78, "learning_rate": 5.146690518783542e-05, "loss": 0.7067, "step": 7900 }, { "epoch": 2.81, "learning_rate": 5.1216457960644e-05, "loss": 0.7048, "step": 8000 }, { "epoch": 2.81, "eval_bleu": 42.9399, "eval_chrf++": 61.4851, "eval_gen_len": 17.7067, "eval_loss": 0.7168448567390442, "eval_runtime": 1552.4929, "eval_samples_per_second": 4.716, "eval_steps_per_second": 2.358, "step": 8000 }, { "epoch": 2.85, "learning_rate": 5.096601073345259e-05, "loss": 0.7127, "step": 8100 }, { "epoch": 2.88, "learning_rate": 5.0715563506261176e-05, "loss": 0.7091, "step": 8200 }, { "epoch": 2.92, "learning_rate": 5.046511627906976e-05, "loss": 0.7122, "step": 8300 }, { "epoch": 2.95, "learning_rate": 5.021466905187835e-05, "loss": 0.6949, "step": 8400 }, { "epoch": 2.99, "learning_rate": 4.996422182468694e-05, "loss": 0.685, "step": 8500 }, { "epoch": 2.99, "eval_bleu": 43.114, "eval_chrf++": 61.6028, "eval_gen_len": 17.844, "eval_loss": 0.7094260454177856, "eval_runtime": 1415.2591, "eval_samples_per_second": 5.173, "eval_steps_per_second": 2.587, "step": 8500 }, { "epoch": 3.02, "learning_rate": 4.9713774597495524e-05, "loss": 0.6618, "step": 8600 }, { "epoch": 3.06, "learning_rate": 4.946332737030411e-05, "loss": 0.6417, "step": 8700 }, { "epoch": 3.09, "learning_rate": 4.92128801431127e-05, "loss": 0.65, "step": 8800 }, { "epoch": 3.13, "learning_rate": 4.896243291592129e-05, "loss": 0.6375, "step": 8900 }, { "epoch": 3.16, "learning_rate": 4.871198568872987e-05, "loss": 0.632, "step": 9000 }, { "epoch": 3.16, "eval_bleu": 43.3779, "eval_chrf++": 61.8915, "eval_gen_len": 17.7121, "eval_loss": 0.7186790108680725, "eval_runtime": 1408.2967, "eval_samples_per_second": 5.198, "eval_steps_per_second": 2.6, "step": 9000 }, { "epoch": 3.2, "learning_rate": 4.8461538461538455e-05, "loss": 0.6434, "step": 9100 }, { "epoch": 3.23, "learning_rate": 4.821109123434704e-05, "loss": 0.6354, "step": 9200 }, { "epoch": 3.27, "learning_rate": 4.7960644007155636e-05, "loss": 0.6374, "step": 9300 }, { "epoch": 3.3, "learning_rate": 4.771019677996422e-05, "loss": 0.6478, "step": 9400 }, { "epoch": 3.34, "learning_rate": 4.74597495527728e-05, "loss": 0.6444, "step": 9500 }, { "epoch": 3.34, "eval_bleu": 43.0761, "eval_chrf++": 61.6092, "eval_gen_len": 17.7518, "eval_loss": 0.7161450982093811, "eval_runtime": 1412.5727, "eval_samples_per_second": 5.183, "eval_steps_per_second": 2.592, "step": 9500 }, { "epoch": 3.37, "learning_rate": 4.720930232558139e-05, "loss": 0.6358, "step": 9600 }, { "epoch": 3.41, "learning_rate": 4.695885509838998e-05, "loss": 0.6502, "step": 9700 }, { "epoch": 3.44, "learning_rate": 4.670840787119857e-05, "loss": 0.6376, "step": 9800 }, { "epoch": 3.48, "learning_rate": 4.645796064400715e-05, "loss": 0.638, "step": 9900 }, { "epoch": 3.51, "learning_rate": 4.6207513416815735e-05, "loss": 0.6302, "step": 10000 }, { "epoch": 3.51, "eval_bleu": 43.4763, "eval_chrf++": 61.8105, "eval_gen_len": 17.7754, "eval_loss": 0.7070448398590088, "eval_runtime": 1409.243, "eval_samples_per_second": 5.195, "eval_steps_per_second": 2.598, "step": 10000 }, { "epoch": 3.55, "learning_rate": 4.5957066189624325e-05, "loss": 0.632, "step": 10100 }, { "epoch": 3.58, "learning_rate": 4.570661896243291e-05, "loss": 0.6364, "step": 10200 }, { "epoch": 3.62, "learning_rate": 4.54561717352415e-05, "loss": 0.6466, "step": 10300 }, { "epoch": 3.66, "learning_rate": 4.520572450805009e-05, "loss": 0.6373, "step": 10400 }, { "epoch": 3.69, "learning_rate": 4.495527728085867e-05, "loss": 0.6478, "step": 10500 }, { "epoch": 3.69, "eval_bleu": 43.725, "eval_chrf++": 62.0616, "eval_gen_len": 17.788, "eval_loss": 0.705007016658783, "eval_runtime": 1414.2328, "eval_samples_per_second": 5.177, "eval_steps_per_second": 2.589, "step": 10500 }, { "epoch": 3.73, "learning_rate": 4.470483005366726e-05, "loss": 0.6516, "step": 10600 }, { "epoch": 3.76, "learning_rate": 4.445438282647585e-05, "loss": 0.6334, "step": 10700 }, { "epoch": 3.8, "learning_rate": 4.420393559928444e-05, "loss": 0.6542, "step": 10800 }, { "epoch": 3.83, "learning_rate": 4.395348837209302e-05, "loss": 0.646, "step": 10900 }, { "epoch": 3.87, "learning_rate": 4.3703041144901605e-05, "loss": 0.6374, "step": 11000 }, { "epoch": 3.87, "eval_bleu": 43.7206, "eval_chrf++": 62.1048, "eval_gen_len": 17.7229, "eval_loss": 0.6963800191879272, "eval_runtime": 1413.4358, "eval_samples_per_second": 5.18, "eval_steps_per_second": 2.59, "step": 11000 }, { "epoch": 3.9, "learning_rate": 4.345259391771019e-05, "loss": 0.6416, "step": 11100 }, { "epoch": 3.94, "learning_rate": 4.3202146690518786e-05, "loss": 0.6249, "step": 11200 }, { "epoch": 3.97, "learning_rate": 4.295169946332737e-05, "loss": 0.638, "step": 11300 }, { "epoch": 4.01, "learning_rate": 4.270125223613595e-05, "loss": 0.6352, "step": 11400 }, { "epoch": 4.04, "learning_rate": 4.2450805008944536e-05, "loss": 0.5804, "step": 11500 }, { "epoch": 4.04, "eval_bleu": 43.8669, "eval_chrf++": 62.1364, "eval_gen_len": 17.7865, "eval_loss": 0.7024260759353638, "eval_runtime": 1410.9857, "eval_samples_per_second": 5.189, "eval_steps_per_second": 2.595, "step": 11500 }, { "epoch": 4.08, "learning_rate": 4.220035778175313e-05, "loss": 0.5799, "step": 11600 }, { "epoch": 4.11, "learning_rate": 4.194991055456172e-05, "loss": 0.5852, "step": 11700 }, { "epoch": 4.15, "learning_rate": 4.16994633273703e-05, "loss": 0.5801, "step": 11800 }, { "epoch": 4.18, "learning_rate": 4.1449016100178884e-05, "loss": 0.5945, "step": 11900 }, { "epoch": 4.22, "learning_rate": 4.1198568872987475e-05, "loss": 0.5919, "step": 12000 }, { "epoch": 4.22, "eval_bleu": 43.6775, "eval_chrf++": 61.9586, "eval_gen_len": 17.8369, "eval_loss": 0.7032192945480347, "eval_runtime": 1420.6308, "eval_samples_per_second": 5.153, "eval_steps_per_second": 2.577, "step": 12000 }, { "epoch": 4.25, "learning_rate": 4.094812164579606e-05, "loss": 0.5928, "step": 12100 }, { "epoch": 4.29, "learning_rate": 4.069767441860465e-05, "loss": 0.5923, "step": 12200 }, { "epoch": 4.32, "learning_rate": 4.044722719141323e-05, "loss": 0.5943, "step": 12300 }, { "epoch": 4.36, "learning_rate": 4.019677996422182e-05, "loss": 0.5905, "step": 12400 }, { "epoch": 4.39, "learning_rate": 3.9946332737030406e-05, "loss": 0.5879, "step": 12500 }, { "epoch": 4.39, "eval_bleu": 43.9642, "eval_chrf++": 62.2021, "eval_gen_len": 17.7873, "eval_loss": 0.7024480104446411, "eval_runtime": 1420.746, "eval_samples_per_second": 5.153, "eval_steps_per_second": 2.577, "step": 12500 }, { "epoch": 4.43, "learning_rate": 3.9695885509839e-05, "loss": 0.5855, "step": 12600 }, { "epoch": 4.46, "learning_rate": 3.944543828264758e-05, "loss": 0.5855, "step": 12700 }, { "epoch": 4.5, "learning_rate": 3.919499105545617e-05, "loss": 0.5833, "step": 12800 }, { "epoch": 4.53, "learning_rate": 3.8944543828264754e-05, "loss": 0.5767, "step": 12900 }, { "epoch": 4.57, "learning_rate": 3.869409660107334e-05, "loss": 0.5858, "step": 13000 }, { "epoch": 4.57, "eval_bleu": 44.027, "eval_chrf++": 62.2226, "eval_gen_len": 17.7678, "eval_loss": 0.6992958784103394, "eval_runtime": 1410.8973, "eval_samples_per_second": 5.189, "eval_steps_per_second": 2.595, "step": 13000 }, { "epoch": 4.6, "learning_rate": 3.8443649373881935e-05, "loss": 0.5775, "step": 13100 }, { "epoch": 4.64, "learning_rate": 3.819320214669052e-05, "loss": 0.5825, "step": 13200 }, { "epoch": 4.67, "learning_rate": 3.79427549194991e-05, "loss": 0.5851, "step": 13300 }, { "epoch": 4.71, "learning_rate": 3.7692307692307686e-05, "loss": 0.5913, "step": 13400 }, { "epoch": 4.74, "learning_rate": 3.744186046511627e-05, "loss": 0.5877, "step": 13500 }, { "epoch": 4.74, "eval_bleu": 44.1426, "eval_chrf++": 62.3429, "eval_gen_len": 17.7805, "eval_loss": 0.6957116723060608, "eval_runtime": 1414.8882, "eval_samples_per_second": 5.174, "eval_steps_per_second": 2.587, "step": 13500 }, { "epoch": 4.78, "learning_rate": 3.719141323792487e-05, "loss": 0.5857, "step": 13600 }, { "epoch": 4.81, "learning_rate": 3.694096601073345e-05, "loss": 0.5776, "step": 13700 }, { "epoch": 4.85, "learning_rate": 3.6690518783542034e-05, "loss": 0.5985, "step": 13800 }, { "epoch": 4.89, "learning_rate": 3.644007155635062e-05, "loss": 0.5855, "step": 13900 }, { "epoch": 4.92, "learning_rate": 3.618962432915921e-05, "loss": 0.5895, "step": 14000 }, { "epoch": 4.92, "eval_bleu": 44.2097, "eval_chrf++": 62.4158, "eval_gen_len": 17.7713, "eval_loss": 0.6944009065628052, "eval_runtime": 1417.3776, "eval_samples_per_second": 5.165, "eval_steps_per_second": 2.583, "step": 14000 }, { "epoch": 4.96, "learning_rate": 3.59391771019678e-05, "loss": 0.581, "step": 14100 }, { "epoch": 4.99, "learning_rate": 3.568872987477638e-05, "loss": 0.5835, "step": 14200 }, { "epoch": 5.03, "learning_rate": 3.543828264758497e-05, "loss": 0.5512, "step": 14300 }, { "epoch": 5.06, "learning_rate": 3.5187835420393556e-05, "loss": 0.5324, "step": 14400 }, { "epoch": 5.1, "learning_rate": 3.4937388193202146e-05, "loss": 0.5381, "step": 14500 }, { "epoch": 5.1, "eval_bleu": 43.9778, "eval_chrf++": 62.2087, "eval_gen_len": 17.8153, "eval_loss": 0.7013605833053589, "eval_runtime": 1419.0259, "eval_samples_per_second": 5.159, "eval_steps_per_second": 2.58, "step": 14500 }, { "epoch": 5.13, "learning_rate": 3.468694096601073e-05, "loss": 0.5364, "step": 14600 }, { "epoch": 5.17, "learning_rate": 3.443649373881932e-05, "loss": 0.543, "step": 14700 }, { "epoch": 5.2, "learning_rate": 3.4186046511627904e-05, "loss": 0.5432, "step": 14800 }, { "epoch": 5.24, "learning_rate": 3.3935599284436494e-05, "loss": 0.542, "step": 14900 }, { "epoch": 5.27, "learning_rate": 3.368515205724508e-05, "loss": 0.5385, "step": 15000 }, { "epoch": 5.27, "eval_bleu": 44.1326, "eval_chrf++": 62.3372, "eval_gen_len": 17.8174, "eval_loss": 0.7036887407302856, "eval_runtime": 1418.4921, "eval_samples_per_second": 5.161, "eval_steps_per_second": 2.581, "step": 15000 }, { "epoch": 5.31, "learning_rate": 3.343470483005367e-05, "loss": 0.5467, "step": 15100 }, { "epoch": 5.34, "learning_rate": 3.318425760286225e-05, "loss": 0.5466, "step": 15200 }, { "epoch": 5.38, "learning_rate": 3.2933810375670835e-05, "loss": 0.5439, "step": 15300 }, { "epoch": 5.41, "learning_rate": 3.2683363148479426e-05, "loss": 0.5403, "step": 15400 }, { "epoch": 5.45, "learning_rate": 3.243291592128801e-05, "loss": 0.5481, "step": 15500 }, { "epoch": 5.45, "eval_bleu": 44.053, "eval_chrf++": 62.418, "eval_gen_len": 17.7614, "eval_loss": 0.6976599097251892, "eval_runtime": 1417.644, "eval_samples_per_second": 5.164, "eval_steps_per_second": 2.582, "step": 15500 }, { "epoch": 5.48, "learning_rate": 3.21824686940966e-05, "loss": 0.5445, "step": 15600 }, { "epoch": 5.52, "learning_rate": 3.1932021466905183e-05, "loss": 0.5518, "step": 15700 }, { "epoch": 5.55, "learning_rate": 3.1681574239713774e-05, "loss": 0.5451, "step": 15800 }, { "epoch": 5.59, "learning_rate": 3.143112701252236e-05, "loss": 0.5387, "step": 15900 }, { "epoch": 5.62, "learning_rate": 3.118067978533094e-05, "loss": 0.5473, "step": 16000 }, { "epoch": 5.62, "eval_bleu": 44.2406, "eval_chrf++": 62.4882, "eval_gen_len": 17.8263, "eval_loss": 0.6982511878013611, "eval_runtime": 1415.9788, "eval_samples_per_second": 5.17, "eval_steps_per_second": 2.585, "step": 16000 }, { "epoch": 5.66, "learning_rate": 3.093023255813953e-05, "loss": 0.5483, "step": 16100 }, { "epoch": 5.69, "learning_rate": 3.0679785330948115e-05, "loss": 0.537, "step": 16200 }, { "epoch": 5.73, "learning_rate": 3.0429338103756705e-05, "loss": 0.5479, "step": 16300 }, { "epoch": 5.76, "learning_rate": 3.0178890876565292e-05, "loss": 0.5445, "step": 16400 }, { "epoch": 5.8, "learning_rate": 2.992844364937388e-05, "loss": 0.5466, "step": 16500 }, { "epoch": 5.8, "eval_bleu": 44.2692, "eval_chrf++": 62.4172, "eval_gen_len": 17.7783, "eval_loss": 0.6904399394989014, "eval_runtime": 1415.8621, "eval_samples_per_second": 5.171, "eval_steps_per_second": 2.586, "step": 16500 } ], "logging_steps": 100, "max_steps": 28450, "num_train_epochs": 10, "save_steps": 1500, "total_flos": 2.2884516243072614e+18, "trial_name": null, "trial_params": null }